1"""Quote strings to be valid DOT identifiers, assemble quoted attribute lists."""
2
3from collections.abc import Sequence, Set, Mapping
4import functools
5import re
6from typing import Final
7import warnings
8
9from . import _tools
10from . import exceptions
11
12__all__ = ['quote', 'quote_edge',
13 'a_list', 'attr_list',
14 'escape', 'nohtml']
15
16# https://www.graphviz.org/doc/info/lang.html
17# https://www.graphviz.org/doc/info/attrs.html#k:escString
18
19HTML_STRING: Final = re.compile(r'<.*>$', re.DOTALL)
20
21ID: Final = re.compile(r'([a-zA-Z_][a-zA-Z0-9_]*|-?(\.[0-9]+|[0-9]+(\.[0-9]*)?))$')
22
23KEYWORDS: Final[Set[str]] = {'node', 'edge', 'graph', 'digraph', 'subgraph', 'strict'}
24
25COMPASS: Final[Set[str]] = {'n', 'ne', 'e', 'se', 's', 'sw', 'w', 'nw', 'c', '_'} # TODO
26
27FINAL_ODD_BACKSLASHES: Final = re.compile(r'(?<!\\)(?:\\{2})*\\$')
28
29QUOTE_WITH_OPTIONAL_BACKSLASHES: Final = re.compile(r'''
30 (?P<escaped_backslashes>(?:\\{2})*)
31 \\? # treat \" same as "
32 (?P<literal_quote>")
33 ''', flags=re.VERBOSE)
34
35ESCAPE_UNESCAPED_QUOTES: Final = functools.partial(QUOTE_WITH_OPTIONAL_BACKSLASHES.sub,
36 r'\g<escaped_backslashes>'
37 r'\\'
38 r'\g<literal_quote>')
39
40
41@_tools.deprecate_positional_args(supported_number=1)
42def quote(identifier: str,
43 is_html_string=HTML_STRING.match,
44 is_valid_id=ID.match,
45 dot_keywords=KEYWORDS,
46 endswith_odd_number_of_backslashes=FINAL_ODD_BACKSLASHES.search,
47 escape_unescaped_quotes=ESCAPE_UNESCAPED_QUOTES) -> str:
48 r"""Return DOT identifier from string, quote if needed.
49
50 >>> quote('') # doctest: +NO_EXE
51 '""'
52
53 >>> quote('spam')
54 'spam'
55
56 >>> quote('spam spam')
57 '"spam spam"'
58
59 >>> quote('-4.2')
60 '-4.2'
61
62 >>> quote('.42')
63 '.42'
64
65 >>> quote('<<b>spam</b>>')
66 '<<b>spam</b>>'
67
68 >>> quote(nohtml('<>'))
69 '"<>"'
70
71 >>> print(quote('"'))
72 "\""
73
74 >>> print(quote('\\"'))
75 "\""
76
77 >>> print(quote('\\\\"'))
78 "\\\""
79
80 >>> print(quote('\\\\\\"'))
81 "\\\""
82 """
83 if is_html_string(identifier) and not isinstance(identifier, NoHtml):
84 pass
85 elif not is_valid_id(identifier) or identifier.lower() in dot_keywords:
86 if endswith_odd_number_of_backslashes(identifier):
87 warnings.warn('expect syntax error scanning invalid quoted string:'
88 f' {identifier!r}',
89 category=exceptions.DotSyntaxWarning)
90 return f'"{escape_unescaped_quotes(identifier)}"'
91 return identifier
92
93
94def quote_edge(identifier: str) -> str:
95 """Return DOT edge statement node_id from string, quote if needed.
96
97 >>> quote_edge('spam') # doctest: +NO_EXE
98 'spam'
99
100 >>> quote_edge('spam spam:eggs eggs')
101 '"spam spam":"eggs eggs"'
102
103 >>> quote_edge('spam:eggs:s')
104 'spam:eggs:s'
105 """
106 node, _, rest = identifier.partition(':')
107 parts = [quote(node)]
108 if rest:
109 port, _, compass = rest.partition(':')
110 parts.append(quote(port))
111 if compass:
112 parts.append(compass)
113 return ':'.join(parts)
114
115
116@_tools.deprecate_positional_args(supported_number=1)
117def a_list(label: str | None = None,
118 kwargs: Mapping[str, str] | None = None,
119 attributes: (Mapping[str, str]
120 | Sequence[tuple[str, str]]
121 | None) = None) -> str:
122 """Return assembled DOT a_list string.
123
124 >>> a_list('spam', kwargs={'spam': None, 'ham': 'ham ham', 'eggs': ''}) # doctest: +NO_EXE
125 'label=spam eggs="" ham="ham ham"'
126 """
127 result = [f'label={quote(label)}'] if label is not None else []
128 if kwargs:
129 result += [f'{quote(k)}={quote(v)}'
130 for k, v in _tools.mapping_items(kwargs) if v is not None]
131 if attributes:
132 items = (_tools.mapping_items(attributes)
133 if isinstance(attributes, Mapping) else attributes)
134 result += [f'{quote(k)}={quote(v)}'
135 for k, v in items if v is not None]
136 return ' '.join(result)
137
138
139@_tools.deprecate_positional_args(supported_number=1)
140def attr_list(label: str | None = None,
141 kwargs: Mapping[str, str] | None = None,
142 attributes: (Mapping[str, str]
143 | Sequence[tuple[str, str]]
144 | None) = None) -> str:
145 """Return assembled DOT attribute list string.
146
147 Sorts ``kwargs`` and ``attributes`` if they are plain dicts
148 (to avoid unpredictable order from hash randomization in Python < 3.7).
149
150 >>> attr_list() # doctest: +NO_EXE
151 ''
152
153 >>> attr_list('spam spam', kwargs={'eggs': 'eggs', 'ham': 'ham ham'})
154 ' [label="spam spam" eggs=eggs ham="ham ham"]'
155
156 >>> attr_list(kwargs={'spam': None, 'eggs': ''})
157 ' [eggs=""]'
158 """
159 content = a_list(label, kwargs=kwargs, attributes=attributes)
160 if not content:
161 return ''
162 return f' [{content}]'
163
164
165class Quote:
166 """Quote strings to be valid DOT identifiers, assemble quoted attribute lists."""
167
168 _quote = staticmethod(quote)
169 _quote_edge = staticmethod(quote_edge)
170
171 _a_list = staticmethod(a_list)
172 _attr_list = staticmethod(attr_list)
173
174
175def escape(s: str) -> str:
176 r"""Return string disabling special meaning of backslashes and ``'<...>'``.
177
178 Args:
179 s: String in which backslashes and ``'<...>'``
180 should be treated as literal.
181
182 Returns:
183 Escaped string subclass instance.
184
185 Raises:
186 TypeError: If ``s`` is not a ``str``.
187
188 Example:
189 >>> import graphviz # doctest: +NO_EXE
190 >>> print(graphviz.escape(r'\l'))
191 \\l
192
193 See also:
194 Upstream documentation:
195 https://www.graphviz.org/doc/info/attrs.html#k:escString
196 """
197 return nohtml(s.replace('\\', '\\\\'))
198
199
200class NoHtml(str):
201 """String subclass that does not treat ``'<...>'`` as DOT HTML string."""
202
203 __slots__ = ()
204
205
206def nohtml(s: str) -> str:
207 """Return string not treating ``'<...>'`` as DOT HTML string in quoting.
208
209 Args:
210 s: String in which leading ``'<'`` and trailing ``'>'``
211 should be treated as literal.
212
213 Returns:
214 String subclass instance.
215
216 Raises:
217 TypeError: If ``s`` is not a ``str``.
218
219 Example:
220 >>> import graphviz # doctest: +NO_EXE
221 >>> g = graphviz.Graph()
222 >>> g.node(graphviz.nohtml('<>-*-<>'))
223 >>> print(g.source) # doctest: +NORMALIZE_WHITESPACE
224 graph {
225 "<>-*-<>"
226 }
227 """
228 return NoHtml(s)