Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/botocore/docs/bcdoc/docstringparser.py: 30%
175 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
1# Copyright 2012-2013 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"). You
4# may not use this file except in compliance with the License. A copy of
5# the License is located at
6#
7# http://aws.amazon.com/apache2.0/
8#
9# or in the "license" file accompanying this file. This file is
10# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11# ANY KIND, either express or implied. See the License for the specific
12# language governing permissions and limitations under the License.
13from html.parser import HTMLParser
14from itertools import zip_longest
16PRIORITY_PARENT_TAGS = ('code', 'a')
17OMIT_NESTED_TAGS = ('span', 'i', 'code', 'a')
18OMIT_SELF_TAGS = ('i', 'b')
19HTML_BLOCK_DISPLAY_TAGS = ('p', 'note', 'ul', 'li')
22class DocStringParser(HTMLParser):
23 """
24 A simple HTML parser. Focused on converting the subset of HTML
25 that appears in the documentation strings of the JSON models into
26 simple ReST format.
27 """
29 def __init__(self, doc):
30 self.tree = None
31 self.doc = doc
32 super().__init__()
34 def reset(self):
35 HTMLParser.reset(self)
36 self.tree = HTMLTree(self.doc)
38 def feed(self, data):
39 super().feed(data)
40 self.tree.write()
41 self.tree = HTMLTree(self.doc)
43 def close(self):
44 super().close()
45 # Write if there is anything remaining.
46 self.tree.write()
47 self.tree = HTMLTree(self.doc)
49 def handle_starttag(self, tag, attrs):
50 self.tree.add_tag(tag, attrs=attrs)
52 def handle_endtag(self, tag):
53 self.tree.add_tag(tag, is_start=False)
55 def handle_data(self, data):
56 self.tree.add_data(data)
59class HTMLTree:
60 """
61 A tree which handles HTML nodes. Designed to work with a python HTML parser,
62 meaning that the current_node will be the most recently opened tag. When
63 a tag is closed, the current_node moves up to the parent node.
64 """
66 def __init__(self, doc):
67 self.doc = doc
68 self.head = StemNode()
69 self.current_node = self.head
70 self.unhandled_tags = []
72 def add_tag(self, tag, attrs=None, is_start=True):
73 if not self._doc_has_handler(tag, is_start):
74 self.unhandled_tags.append(tag)
75 return
77 if is_start:
78 node = TagNode(tag, attrs)
79 self.current_node.add_child(node)
80 self.current_node = node
81 else:
82 self.current_node = self.current_node.parent
84 def _doc_has_handler(self, tag, is_start):
85 if is_start:
86 handler_name = 'start_%s' % tag
87 else:
88 handler_name = 'end_%s' % tag
90 return hasattr(self.doc.style, handler_name)
92 def add_data(self, data):
93 self.current_node.add_child(DataNode(data))
95 def write(self):
96 self.head.write(self.doc)
99class Node:
100 def __init__(self, parent=None):
101 self.parent = parent
103 def write(self, doc):
104 raise NotImplementedError
107class StemNode(Node):
108 def __init__(self, parent=None):
109 super().__init__(parent)
110 self.children = []
112 def add_child(self, child):
113 child.parent = self
114 self.children.append(child)
116 def write(self, doc):
117 self.collapse_whitespace()
118 self._write_children(doc)
120 def _write_children(self, doc):
121 for child, next_child in zip_longest(self.children, self.children[1:]):
122 if isinstance(child, TagNode) and next_child is not None:
123 child.write(doc, next_child)
124 else:
125 child.write(doc)
127 def is_whitespace(self):
128 return all(child.is_whitespace() for child in self.children)
130 def startswith_whitespace(self):
131 return self.children and self.children[0].startswith_whitespace()
133 def endswith_whitespace(self):
134 return self.children and self.children[-1].endswith_whitespace()
136 def lstrip(self):
137 while self.children and self.children[0].is_whitespace():
138 self.children = self.children[1:]
139 if self.children:
140 self.children[0].lstrip()
142 def rstrip(self):
143 while self.children and self.children[-1].is_whitespace():
144 self.children = self.children[:-1]
145 if self.children:
146 self.children[-1].rstrip()
148 def collapse_whitespace(self):
149 """Remove collapsible white-space from HTML.
151 HTML in docstrings often contains extraneous white-space around tags,
152 for readability. Browsers would collapse this white-space before
153 rendering. If not removed before conversion to RST where white-space is
154 part of the syntax, for example for indentation, it can result in
155 incorrect output.
156 """
157 self.lstrip()
158 self.rstrip()
159 for child in self.children:
160 child.collapse_whitespace()
163class TagNode(StemNode):
164 """
165 A generic Tag node. It will verify that handlers exist before writing.
166 """
168 def __init__(self, tag, attrs=None, parent=None):
169 super().__init__(parent)
170 self.attrs = attrs
171 self.tag = tag
173 def _has_nested_tags(self):
174 # Returns True if any children are TagNodes and False otherwise.
175 return any(isinstance(child, TagNode) for child in self.children)
177 def write(self, doc, next_child=None):
178 prioritize_nested_tags = (
179 self.tag in OMIT_SELF_TAGS and self._has_nested_tags()
180 )
181 prioritize_parent_tag = (
182 isinstance(self.parent, TagNode)
183 and self.parent.tag in PRIORITY_PARENT_TAGS
184 and self.tag in OMIT_NESTED_TAGS
185 )
186 if prioritize_nested_tags or prioritize_parent_tag:
187 self._write_children(doc)
188 return
190 self._write_start(doc)
191 self._write_children(doc)
192 self._write_end(doc, next_child)
194 def collapse_whitespace(self):
195 """Remove collapsible white-space.
197 All tags collapse internal whitespace. Block-display HTML tags also
198 strip all leading and trailing whitespace.
200 Approximately follows the specification used in browsers:
201 https://www.w3.org/TR/css-text-3/#white-space-rules
202 https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace
203 """
204 if self.tag in HTML_BLOCK_DISPLAY_TAGS:
205 self.lstrip()
206 self.rstrip()
207 # Collapse whitespace in situations like ``</b> <i> foo</i>`` into
208 # ``</b><i> foo</i>``.
209 for prev, cur in zip(self.children[:-1], self.children[1:]):
210 if (
211 isinstance(prev, DataNode)
212 and prev.endswith_whitespace()
213 and cur.startswith_whitespace()
214 ):
215 cur.lstrip()
216 # Same logic, but for situations like ``<b>bar </b> <i>``:
217 for cur, nxt in zip(self.children[:-1], self.children[1:]):
218 if (
219 isinstance(nxt, DataNode)
220 and cur.endswith_whitespace()
221 and nxt.startswith_whitespace()
222 ):
223 cur.rstrip()
224 # Recurse into children
225 for child in self.children:
226 child.collapse_whitespace()
228 def _write_start(self, doc):
229 handler_name = 'start_%s' % self.tag
230 if hasattr(doc.style, handler_name):
231 getattr(doc.style, handler_name)(self.attrs)
233 def _write_end(self, doc, next_child):
234 handler_name = 'end_%s' % self.tag
235 if hasattr(doc.style, handler_name):
236 if handler_name == 'end_a':
237 # We use lookahead to determine if a space is needed after a link node
238 getattr(doc.style, handler_name)(next_child)
239 else:
240 getattr(doc.style, handler_name)()
243class DataNode(Node):
244 """
245 A Node that contains only string data.
246 """
248 def __init__(self, data, parent=None):
249 super().__init__(parent)
250 if not isinstance(data, str):
251 raise ValueError("Expecting string type, %s given." % type(data))
252 self._leading_whitespace = ''
253 self._trailing_whitespace = ''
254 self._stripped_data = ''
255 if data == '':
256 return
257 if data.isspace():
258 self._trailing_whitespace = data
259 return
260 first_non_space = next(
261 idx for idx, ch in enumerate(data) if not ch.isspace()
262 )
263 last_non_space = len(data) - next(
264 idx for idx, ch in enumerate(reversed(data)) if not ch.isspace()
265 )
266 self._leading_whitespace = data[:first_non_space]
267 self._trailing_whitespace = data[last_non_space:]
268 self._stripped_data = data[first_non_space:last_non_space]
270 @property
271 def data(self):
272 return (
273 f'{self._leading_whitespace}{self._stripped_data}'
274 f'{self._trailing_whitespace}'
275 )
277 def is_whitespace(self):
278 return self._stripped_data == '' and (
279 self._leading_whitespace != '' or self._trailing_whitespace != ''
280 )
282 def startswith_whitespace(self):
283 return self._leading_whitespace != '' or (
284 self._stripped_data == '' and self._trailing_whitespace != ''
285 )
287 def endswith_whitespace(self):
288 return self._trailing_whitespace != '' or (
289 self._stripped_data == '' and self._leading_whitespace != ''
290 )
292 def lstrip(self):
293 if self._leading_whitespace != '':
294 self._leading_whitespace = ''
295 elif self._stripped_data == '':
296 self.rstrip()
298 def rstrip(self):
299 if self._trailing_whitespace != '':
300 self._trailing_whitespace = ''
301 elif self._stripped_data == '':
302 self.lstrip()
304 def collapse_whitespace(self):
305 """Noop, ``DataNode.write`` always collapses whitespace"""
306 return
308 def write(self, doc):
309 words = doc.translate_words(self._stripped_data.split())
310 str_data = (
311 f'{self._leading_whitespace}{" ".join(words)}'
312 f'{self._trailing_whitespace}'
313 )
314 if str_data != '':
315 doc.handle_data(str_data)