1"""Quote strings to be valid DOT identifiers, assemble quoted attribute lists."""
2
3import functools
4import re
5import typing
6import warnings
7
8from . import _tools
9from . import exceptions
10
11__all__ = ['quote', 'quote_edge',
12 'a_list', 'attr_list',
13 'escape', 'nohtml']
14
15# https://www.graphviz.org/doc/info/lang.html
16# https://www.graphviz.org/doc/info/attrs.html#k:escString
17
18HTML_STRING = re.compile(r'<.*>$', re.DOTALL)
19
20ID = re.compile(r'([a-zA-Z_][a-zA-Z0-9_]*|-?(\.[0-9]+|[0-9]+(\.[0-9]*)?))$')
21
22KEYWORDS = {'node', 'edge', 'graph', 'digraph', 'subgraph', 'strict'}
23
24COMPASS = {'n', 'ne', 'e', 'se', 's', 'sw', 'w', 'nw', 'c', '_'} # TODO
25
26FINAL_ODD_BACKSLASHES = re.compile(r'(?<!\\)(?:\\{2})*\\$')
27
28QUOTE_WITH_OPTIONAL_BACKSLASHES = re.compile(r'''
29 (?P<escaped_backslashes>(?:\\{2})*)
30 \\? # treat \" same as "
31 (?P<literal_quote>")
32 ''', flags=re.VERBOSE)
33
34ESCAPE_UNESCAPED_QUOTES = functools.partial(QUOTE_WITH_OPTIONAL_BACKSLASHES.sub,
35 r'\g<escaped_backslashes>'
36 r'\\'
37 r'\g<literal_quote>')
38
39
40@_tools.deprecate_positional_args(supported_number=1)
41def quote(identifier: str,
42 is_html_string=HTML_STRING.match,
43 is_valid_id=ID.match,
44 dot_keywords=KEYWORDS,
45 endswith_odd_number_of_backslashes=FINAL_ODD_BACKSLASHES.search,
46 escape_unescaped_quotes=ESCAPE_UNESCAPED_QUOTES) -> str:
47 r"""Return DOT identifier from string, quote if needed.
48
49 >>> quote('') # doctest: +NO_EXE
50 '""'
51
52 >>> quote('spam')
53 'spam'
54
55 >>> quote('spam spam')
56 '"spam spam"'
57
58 >>> quote('-4.2')
59 '-4.2'
60
61 >>> quote('.42')
62 '.42'
63
64 >>> quote('<<b>spam</b>>')
65 '<<b>spam</b>>'
66
67 >>> quote(nohtml('<>'))
68 '"<>"'
69
70 >>> print(quote('"'))
71 "\""
72
73 >>> print(quote('\\"'))
74 "\""
75
76 >>> print(quote('\\\\"'))
77 "\\\""
78
79 >>> print(quote('\\\\\\"'))
80 "\\\""
81 """
82 if is_html_string(identifier) and not isinstance(identifier, NoHtml):
83 pass
84 elif not is_valid_id(identifier) or identifier.lower() in dot_keywords:
85 if endswith_odd_number_of_backslashes(identifier):
86 warnings.warn('expect syntax error scanning invalid quoted string:'
87 f' {identifier!r}',
88 category=exceptions.DotSyntaxWarning)
89 return f'"{escape_unescaped_quotes(identifier)}"'
90 return identifier
91
92
93def quote_edge(identifier: str) -> str:
94 """Return DOT edge statement node_id from string, quote if needed.
95
96 >>> quote_edge('spam') # doctest: +NO_EXE
97 'spam'
98
99 >>> quote_edge('spam spam:eggs eggs')
100 '"spam spam":"eggs eggs"'
101
102 >>> quote_edge('spam:eggs:s')
103 'spam:eggs:s'
104 """
105 node, _, rest = identifier.partition(':')
106 parts = [quote(node)]
107 if rest:
108 port, _, compass = rest.partition(':')
109 parts.append(quote(port))
110 if compass:
111 parts.append(compass)
112 return ':'.join(parts)
113
114
115@_tools.deprecate_positional_args(supported_number=1)
116def a_list(label: typing.Optional[str] = None,
117 kwargs=None, attributes=None) -> str:
118 """Return assembled DOT a_list string.
119
120 >>> a_list('spam', kwargs={'spam': None, 'ham': 'ham ham', 'eggs': ''}) # doctest: +NO_EXE
121 'label=spam eggs="" ham="ham ham"'
122 """
123 result = [f'label={quote(label)}'] if label is not None else []
124 if kwargs:
125 result += [f'{quote(k)}={quote(v)}'
126 for k, v in _tools.mapping_items(kwargs) if v is not None]
127 if attributes:
128 if hasattr(attributes, 'items'):
129 attributes = _tools.mapping_items(attributes)
130 result += [f'{quote(k)}={quote(v)}'
131 for k, v in attributes if v is not None]
132 return ' '.join(result)
133
134
135@_tools.deprecate_positional_args(supported_number=1)
136def attr_list(label: typing.Optional[str] = None,
137 kwargs=None, attributes=None) -> str:
138 """Return assembled DOT attribute list string.
139
140 Sorts ``kwargs`` and ``attributes`` if they are plain dicts
141 (to avoid unpredictable order from hash randomization in Python < 3.7).
142
143 >>> attr_list() # doctest: +NO_EXE
144 ''
145
146 >>> attr_list('spam spam', kwargs={'eggs': 'eggs', 'ham': 'ham ham'})
147 ' [label="spam spam" eggs=eggs ham="ham ham"]'
148
149 >>> attr_list(kwargs={'spam': None, 'eggs': ''})
150 ' [eggs=""]'
151 """
152 content = a_list(label, kwargs=kwargs, attributes=attributes)
153 if not content:
154 return ''
155 return f' [{content}]'
156
157
158class Quote:
159 """Quote strings to be valid DOT identifiers, assemble quoted attribute lists."""
160
161 _quote = staticmethod(quote)
162 _quote_edge = staticmethod(quote_edge)
163
164 _a_list = staticmethod(a_list)
165 _attr_list = staticmethod(attr_list)
166
167
168def escape(s: str) -> str:
169 r"""Return string disabling special meaning of backslashes and ``'<...>'``.
170
171 Args:
172 s: String in which backslashes and ``'<...>'``
173 should be treated as literal.
174
175 Returns:
176 Escaped string subclass instance.
177
178 Raises:
179 TypeError: If ``s`` is not a ``str``.
180
181 Example:
182 >>> import graphviz # doctest: +NO_EXE
183 >>> print(graphviz.escape(r'\l'))
184 \\l
185
186 See also:
187 Upstream documentation:
188 https://www.graphviz.org/doc/info/attrs.html#k:escString
189 """
190 return nohtml(s.replace('\\', '\\\\'))
191
192
193class NoHtml(str):
194 """String subclass that does not treat ``'<...>'`` as DOT HTML string."""
195
196 __slots__ = ()
197
198
199def nohtml(s: str) -> str:
200 """Return string not treating ``'<...>'`` as DOT HTML string in quoting.
201
202 Args:
203 s: String in which leading ``'<'`` and trailing ``'>'``
204 should be treated as literal.
205
206 Returns:
207 String subclass instance.
208
209 Raises:
210 TypeError: If ``s`` is not a ``str``.
211
212 Example:
213 >>> import graphviz # doctest: +NO_EXE
214 >>> g = graphviz.Graph()
215 >>> g.node(graphviz.nohtml('<>-*-<>'))
216 >>> print(g.source) # doctest: +NORMALIZE_WHITESPACE
217 graph {
218 "<>-*-<>"
219 }
220 """
221 return NoHtml(s)