Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/docutils/transforms/universal.py: 63%

161 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:06 +0000

1# $Id$ 

2# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde 

3# Maintainer: docutils-develop@lists.sourceforge.net 

4# Copyright: This module has been placed in the public domain. 

5 

6""" 

7Transforms needed by most or all documents: 

8 

9- `Decorations`: Generate a document's header & footer. 

10- `ExposeInternals`: Expose internal attributes. 

11- `Messages`: Placement of system messages generated after parsing. 

12- `FilterMessages`: Remove system messages below verbosity threshold. 

13- `TestMessages`: Like `Messages`, used on test runs. 

14- `StripComments`: Remove comment elements from the document tree. 

15- `StripClassesAndElements`: Remove elements with classes 

16 in `self.document.settings.strip_elements_with_classes` 

17 and class values in `self.document.settings.strip_classes`. 

18- `SmartQuotes`: Replace ASCII quotation marks with typographic form. 

19""" 

20 

21__docformat__ = 'reStructuredText' 

22 

23import re 

24import time 

25from docutils import nodes, utils 

26from docutils.transforms import Transform 

27from docutils.utils import smartquotes 

28 

29 

30class Decorations(Transform): 

31 

32 """ 

33 Populate a document's decoration element (header, footer). 

34 """ 

35 

36 default_priority = 820 

37 

38 def apply(self): 

39 header_nodes = self.generate_header() 

40 if header_nodes: 

41 decoration = self.document.get_decoration() 

42 header = decoration.get_header() 

43 header.extend(header_nodes) 

44 footer_nodes = self.generate_footer() 

45 if footer_nodes: 

46 decoration = self.document.get_decoration() 

47 footer = decoration.get_footer() 

48 footer.extend(footer_nodes) 

49 

50 def generate_header(self): 

51 return None 

52 

53 def generate_footer(self): 

54 # @@@ Text is hard-coded for now. 

55 # Should be made dynamic (language-dependent). 

56 # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable 

57 # for the datestamp? 

58 # See https://sourceforge.net/p/docutils/patches/132/ 

59 # and https://reproducible-builds.org/specs/source-date-epoch/ 

60 settings = self.document.settings 

61 if (settings.generator or settings.datestamp 

62 or settings.source_link or settings.source_url): 

63 text = [] 

64 if (settings.source_link and settings._source 

65 or settings.source_url): 

66 if settings.source_url: 

67 source = settings.source_url 

68 else: 

69 source = utils.relative_path(settings._destination, 

70 settings._source) 

71 text.extend([ 

72 nodes.reference('', 'View document source', 

73 refuri=source), 

74 nodes.Text('.\n')]) 

75 if settings.datestamp: 

76 datestamp = time.strftime(settings.datestamp, time.gmtime()) 

77 text.append(nodes.Text('Generated on: ' + datestamp + '.\n')) 

78 if settings.generator: 

79 text.extend([ 

80 nodes.Text('Generated by '), 

81 nodes.reference('', 'Docutils', 

82 refuri='https://docutils.sourceforge.io/'), 

83 nodes.Text(' from '), 

84 nodes.reference('', 'reStructuredText', 

85 refuri='https://docutils.sourceforge.io/' 

86 'rst.html'), 

87 nodes.Text(' source.\n')]) 

88 return [nodes.paragraph('', '', *text)] 

89 else: 

90 return None 

91 

92 

93class ExposeInternals(Transform): 

94 

95 """ 

96 Expose internal attributes if ``expose_internals`` setting is set. 

97 """ 

98 

99 default_priority = 840 

100 

101 def not_Text(self, node): 

102 return not isinstance(node, nodes.Text) 

103 

104 def apply(self): 

105 if self.document.settings.expose_internals: 

106 for node in self.document.findall(self.not_Text): 

107 for att in self.document.settings.expose_internals: 

108 value = getattr(node, att, None) 

109 if value is not None: 

110 node['internal:' + att] = value 

111 

112 

113class Messages(Transform): 

114 

115 """ 

116 Place any system messages generated after parsing into a dedicated section 

117 of the document. 

118 """ 

119 

120 default_priority = 860 

121 

122 def apply(self): 

123 messages = self.document.transform_messages 

124 loose_messages = [msg for msg in messages if not msg.parent] 

125 if loose_messages: 

126 section = nodes.section(classes=['system-messages']) 

127 # @@@ get this from the language module? 

128 section += nodes.title('', 'Docutils System Messages') 

129 section += loose_messages 

130 self.document.transform_messages[:] = [] 

131 self.document += section 

132 

133 

134# TODO: fix bug #435: 

135 

136# Messages are filtered at a very late stage 

137# This breaks the link from inline error messages to the corresponding 

138# system message at the end of document. 

139 

140class FilterMessages(Transform): 

141 

142 """ 

143 Remove system messages below verbosity threshold. 

144 

145 Convert <problematic> nodes referencing removed messages to <Text>. 

146 Remove "System Messages" section if empty. 

147 """ 

148 

149 default_priority = 870 

150 

151 def apply(self): 

152 for node in tuple(self.document.findall(nodes.system_message)): 

153 if node['level'] < self.document.reporter.report_level: 

154 node.parent.remove(node) 

155 try: # also remove id-entry 

156 del self.document.ids[node['ids'][0]] 

157 except (IndexError): 

158 pass 

159 for node in tuple(self.document.findall(nodes.problematic)): 

160 if node['refid'] not in self.document.ids: 

161 node.parent.replace(node, nodes.Text(node.astext())) 

162 for node in self.document.findall(nodes.section): 

163 if "system-messages" in node['classes'] and len(node) == 1: 

164 node.parent.remove(node) 

165 

166 

167class TestMessages(Transform): 

168 

169 """ 

170 Append all post-parse system messages to the end of the document. 

171 

172 Used for testing purposes. 

173 """ 

174 

175 # marker for pytest to ignore this class during test discovery 

176 __test__ = False 

177 

178 default_priority = 880 

179 

180 def apply(self): 

181 for msg in self.document.transform_messages: 

182 if not msg.parent: 

183 self.document += msg 

184 

185 

186class StripComments(Transform): 

187 

188 """ 

189 Remove comment elements from the document tree (only if the 

190 ``strip_comments`` setting is enabled). 

191 """ 

192 

193 default_priority = 740 

194 

195 def apply(self): 

196 if self.document.settings.strip_comments: 

197 for node in tuple(self.document.findall(nodes.comment)): 

198 node.parent.remove(node) 

199 

200 

201class StripClassesAndElements(Transform): 

202 

203 """ 

204 Remove from the document tree all elements with classes in 

205 `self.document.settings.strip_elements_with_classes` and all "classes" 

206 attribute values in `self.document.settings.strip_classes`. 

207 """ 

208 

209 default_priority = 420 

210 

211 def apply(self): 

212 if self.document.settings.strip_elements_with_classes: 

213 self.strip_elements = {*self.document.settings 

214 .strip_elements_with_classes} 

215 # Iterate over a tuple as removing the current node 

216 # corrupts the iterator returned by `iter`: 

217 for node in tuple(self.document.findall(self.check_classes)): 

218 node.parent.remove(node) 

219 

220 if not self.document.settings.strip_classes: 

221 return 

222 strip_classes = self.document.settings.strip_classes 

223 for node in self.document.findall(nodes.Element): 

224 for class_value in strip_classes: 

225 try: 

226 node['classes'].remove(class_value) 

227 except ValueError: 

228 pass 

229 

230 def check_classes(self, node): 

231 if not isinstance(node, nodes.Element): 

232 return False 

233 for class_value in node['classes'][:]: 

234 if class_value in self.strip_elements: 

235 return True 

236 return False 

237 

238 

239class SmartQuotes(Transform): 

240 

241 """ 

242 Replace ASCII quotation marks with typographic form. 

243 

244 Also replace multiple dashes with em-dash/en-dash characters. 

245 """ 

246 

247 default_priority = 855 

248 

249 nodes_to_skip = (nodes.FixedTextElement, nodes.Special) 

250 """Do not apply "smartquotes" to instances of these block-level nodes.""" 

251 

252 literal_nodes = (nodes.FixedTextElement, nodes.Special, 

253 nodes.image, nodes.literal, nodes.math, 

254 nodes.raw, nodes.problematic) 

255 """Do not apply smartquotes to instances of these inline nodes.""" 

256 

257 smartquotes_action = 'qDe' 

258 """Setting to select smartquote transformations. 

259 

260 The default 'qDe' educates normal quote characters: (", '), 

261 em- and en-dashes (---, --) and ellipses (...). 

262 """ 

263 

264 def __init__(self, document, startnode): 

265 Transform.__init__(self, document, startnode=startnode) 

266 self.unsupported_languages = set() 

267 

268 def get_tokens(self, txtnodes): 

269 # A generator that yields ``(texttype, nodetext)`` tuples for a list 

270 # of "Text" nodes (interface to ``smartquotes.educate_tokens()``). 

271 for node in txtnodes: 

272 if (isinstance(node.parent, self.literal_nodes) 

273 or isinstance(node.parent.parent, self.literal_nodes)): 

274 yield 'literal', str(node) 

275 else: 

276 # SmartQuotes uses backslash escapes instead of null-escapes 

277 # Insert backslashes before escaped "active" characters. 

278 txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', str(node)) 

279 yield 'plain', txt 

280 

281 def apply(self): 

282 smart_quotes = self.document.settings.setdefault('smart_quotes', 

283 False) 

284 if not smart_quotes: 

285 return 

286 try: 

287 alternative = smart_quotes.startswith('alt') 

288 except AttributeError: 

289 alternative = False 

290 

291 document_language = self.document.settings.language_code 

292 lc_smartquotes = self.document.settings.smartquotes_locales 

293 if lc_smartquotes: 

294 smartquotes.smartchars.quotes.update(dict(lc_smartquotes)) 

295 

296 # "Educate" quotes in normal text. Handle each block of text 

297 # (TextElement node) as a unit to keep context around inline nodes: 

298 for node in self.document.findall(nodes.TextElement): 

299 # skip preformatted text blocks and special elements: 

300 if isinstance(node, self.nodes_to_skip): 

301 continue 

302 # nested TextElements are not "block-level" elements: 

303 if isinstance(node.parent, nodes.TextElement): 

304 continue 

305 

306 # list of text nodes in the "text block": 

307 txtnodes = [txtnode for txtnode in node.findall(nodes.Text) 

308 if not isinstance(txtnode.parent, 

309 nodes.option_string)] 

310 

311 # language: use typographical quotes for language "lang" 

312 lang = node.get_language_code(document_language) 

313 # use alternative form if `smart-quotes` setting starts with "alt": 

314 if alternative: 

315 if '-x-altquot' in lang: 

316 lang = lang.replace('-x-altquot', '') 

317 else: 

318 lang += '-x-altquot' 

319 # drop unsupported subtags: 

320 for tag in utils.normalize_language_tag(lang): 

321 if tag in smartquotes.smartchars.quotes: 

322 lang = tag 

323 break 

324 else: # language not supported -- keep ASCII quotes 

325 if lang not in self.unsupported_languages: 

326 self.document.reporter.warning( 

327 'No smart quotes defined for language "%s".' % lang, 

328 base_node=node) 

329 self.unsupported_languages.add(lang) 

330 lang = '' 

331 

332 # Iterator educating quotes in plain text: 

333 # (see "utils/smartquotes.py" for the attribute setting) 

334 teacher = smartquotes.educate_tokens( 

335 self.get_tokens(txtnodes), 

336 attr=self.smartquotes_action, language=lang) 

337 

338 for txtnode, newtext in zip(txtnodes, teacher): 

339 txtnode.parent.replace(txtnode, nodes.Text(newtext)) 

340 

341 self.unsupported_languages.clear()