1"""
2Format a pretty string of a `SoupSieve` object for easy debugging.
3
4This won't necessarily support all types and such, and definitely
5not support custom outputs.
6
7It is mainly geared towards our types as the `SelectorList`
8object is a beast to look at without some indentation and newlines.
9The format and various output types is fairly known (though it
10hasn't been tested extensively to make sure we aren't missing corners).
11
12Example:
13-------
14```
15>>> import soupsieve as sv
16>>> sv.compile('this > that.class[name=value]').selectors.pretty()
17SelectorList(
18 selectors=(
19 Selector(
20 tag=SelectorTag(
21 name='that',
22 prefix=None),
23 ids=(),
24 classes=(
25 'class',
26 ),
27 attributes=(
28 SelectorAttribute(
29 attribute='name',
30 prefix='',
31 pattern=re.compile(
32 '^value$'),
33 xml_type_pattern=None),
34 ),
35 nth=(),
36 selectors=(),
37 relation=SelectorList(
38 selectors=(
39 Selector(
40 tag=SelectorTag(
41 name='this',
42 prefix=None),
43 ids=(),
44 classes=(),
45 attributes=(),
46 nth=(),
47 selectors=(),
48 relation=SelectorList(
49 selectors=(),
50 is_not=False,
51 is_html=False),
52 rel_type='>',
53 contains=(),
54 lang=(),
55 flags=0),
56 ),
57 is_not=False,
58 is_html=False),
59 rel_type=None,
60 contains=(),
61 lang=(),
62 flags=0),
63 ),
64 is_not=False,
65 is_html=False)
66```
67
68"""
69from __future__ import annotations
70import re
71from typing import Any
72
73RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
74RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
75RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
76RE_LSTRT = re.compile(r'\[')
77RE_DSTRT = re.compile(r'\{')
78RE_TSTRT = re.compile(r'\(')
79RE_LEND = re.compile(r'\]')
80RE_DEND = re.compile(r'\}')
81RE_TEND = re.compile(r'\)')
82RE_INT = re.compile(r'\d+')
83RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
84RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
85RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
86RE_SEP = re.compile(r'\s*(,)\s*')
87RE_DSEP = re.compile(r'\s*(:)\s*')
88
89TOKENS = {
90 'class': RE_CLASS,
91 'param': RE_PARAM,
92 'empty': RE_EMPTY,
93 'lstrt': RE_LSTRT,
94 'dstrt': RE_DSTRT,
95 'tstrt': RE_TSTRT,
96 'lend': RE_LEND,
97 'dend': RE_DEND,
98 'tend': RE_TEND,
99 'sqstr': RE_SQSTR,
100 'sep': RE_SEP,
101 'dsep': RE_DSEP,
102 'int': RE_INT,
103 'kword': RE_KWORD,
104 'dqstr': RE_DQSTR
105}
106
107
108def pretty(obj: Any) -> str: # pragma: no cover
109 """Make the object output string pretty."""
110
111 sel = str(obj)
112 index = 0
113 end = len(sel) - 1
114 indent = 0
115 output = []
116
117 while index <= end:
118 m = None
119 for k, v in TOKENS.items():
120 m = v.match(sel, index)
121
122 if m:
123 name = k
124 index = m.end(0)
125 if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
126 indent += 4
127 output.append(f'{m.group(0)}\n{" " * indent}')
128 elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
129 output.append(m.group(0))
130 elif name in ('lend', 'dend', 'tend'):
131 indent -= 4
132 output.append(m.group(0))
133 elif name in ('sep',):
134 output.append(f'{m.group(1)}\n{" " * indent}')
135 elif name in ('dsep',):
136 output.append(f'{m.group(1)} ')
137 break
138
139 return ''.join(output)