1import functools
2import string
3import sys
4import typing as t
5
6if t.TYPE_CHECKING:
7 import typing_extensions as te
8
9 class HasHTML(te.Protocol):
10 def __html__(self) -> str:
11 pass
12
13 _P = te.ParamSpec("_P")
14
15
16__version__ = "2.1.5"
17
18
19def _simple_escaping_wrapper(func: "t.Callable[_P, str]") -> "t.Callable[_P, Markup]":
20 @functools.wraps(func)
21 def wrapped(self: "Markup", *args: "_P.args", **kwargs: "_P.kwargs") -> "Markup":
22 arg_list = _escape_argspec(list(args), enumerate(args), self.escape)
23 _escape_argspec(kwargs, kwargs.items(), self.escape)
24 return self.__class__(func(self, *arg_list, **kwargs)) # type: ignore[arg-type]
25
26 return wrapped # type: ignore[return-value]
27
28
29class Markup(str):
30 """A string that is ready to be safely inserted into an HTML or XML
31 document, either because it was escaped or because it was marked
32 safe.
33
34 Passing an object to the constructor converts it to text and wraps
35 it to mark it safe without escaping. To escape the text, use the
36 :meth:`escape` class method instead.
37
38 >>> Markup("Hello, <em>World</em>!")
39 Markup('Hello, <em>World</em>!')
40 >>> Markup(42)
41 Markup('42')
42 >>> Markup.escape("Hello, <em>World</em>!")
43 Markup('Hello <em>World</em>!')
44
45 This implements the ``__html__()`` interface that some frameworks
46 use. Passing an object that implements ``__html__()`` will wrap the
47 output of that method, marking it safe.
48
49 >>> class Foo:
50 ... def __html__(self):
51 ... return '<a href="/foo">foo</a>'
52 ...
53 >>> Markup(Foo())
54 Markup('<a href="/foo">foo</a>')
55
56 This is a subclass of :class:`str`. It has the same methods, but
57 escapes their arguments and returns a ``Markup`` instance.
58
59 >>> Markup("<em>%s</em>") % ("foo & bar",)
60 Markup('<em>foo & bar</em>')
61 >>> Markup("<em>Hello</em> ") + "<foo>"
62 Markup('<em>Hello</em> <foo>')
63 """
64
65 __slots__ = ()
66
67 def __new__(
68 cls, base: t.Any = "", encoding: t.Optional[str] = None, errors: str = "strict"
69 ) -> "te.Self":
70 if hasattr(base, "__html__"):
71 base = base.__html__()
72
73 if encoding is None:
74 return super().__new__(cls, base)
75
76 return super().__new__(cls, base, encoding, errors)
77
78 def __html__(self) -> "te.Self":
79 return self
80
81 def __add__(self, other: t.Union[str, "HasHTML"]) -> "te.Self":
82 if isinstance(other, str) or hasattr(other, "__html__"):
83 return self.__class__(super().__add__(self.escape(other)))
84
85 return NotImplemented
86
87 def __radd__(self, other: t.Union[str, "HasHTML"]) -> "te.Self":
88 if isinstance(other, str) or hasattr(other, "__html__"):
89 return self.escape(other).__add__(self)
90
91 return NotImplemented
92
93 def __mul__(self, num: "te.SupportsIndex") -> "te.Self":
94 if isinstance(num, int):
95 return self.__class__(super().__mul__(num))
96
97 return NotImplemented
98
99 __rmul__ = __mul__
100
101 def __mod__(self, arg: t.Any) -> "te.Self":
102 if isinstance(arg, tuple):
103 # a tuple of arguments, each wrapped
104 arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
105 elif hasattr(type(arg), "__getitem__") and not isinstance(arg, str):
106 # a mapping of arguments, wrapped
107 arg = _MarkupEscapeHelper(arg, self.escape)
108 else:
109 # a single argument, wrapped with the helper and a tuple
110 arg = (_MarkupEscapeHelper(arg, self.escape),)
111
112 return self.__class__(super().__mod__(arg))
113
114 def __repr__(self) -> str:
115 return f"{self.__class__.__name__}({super().__repr__()})"
116
117 def join(self, seq: t.Iterable[t.Union[str, "HasHTML"]]) -> "te.Self":
118 return self.__class__(super().join(map(self.escape, seq)))
119
120 join.__doc__ = str.join.__doc__
121
122 def split( # type: ignore[override]
123 self, sep: t.Optional[str] = None, maxsplit: int = -1
124 ) -> t.List["te.Self"]:
125 return [self.__class__(v) for v in super().split(sep, maxsplit)]
126
127 split.__doc__ = str.split.__doc__
128
129 def rsplit( # type: ignore[override]
130 self, sep: t.Optional[str] = None, maxsplit: int = -1
131 ) -> t.List["te.Self"]:
132 return [self.__class__(v) for v in super().rsplit(sep, maxsplit)]
133
134 rsplit.__doc__ = str.rsplit.__doc__
135
136 def splitlines( # type: ignore[override]
137 self, keepends: bool = False
138 ) -> t.List["te.Self"]:
139 return [self.__class__(v) for v in super().splitlines(keepends)]
140
141 splitlines.__doc__ = str.splitlines.__doc__
142
143 def unescape(self) -> str:
144 """Convert escaped markup back into a text string. This replaces
145 HTML entities with the characters they represent.
146
147 >>> Markup("Main » <em>About</em>").unescape()
148 'Main » <em>About</em>'
149 """
150 from html import unescape
151
152 return unescape(str(self))
153
154 def striptags(self) -> str:
155 """:meth:`unescape` the markup, remove tags, and normalize
156 whitespace to single spaces.
157
158 >>> Markup("Main »\t<em>About</em>").striptags()
159 'Main » About'
160 """
161 value = str(self)
162
163 # Look for comments then tags separately. Otherwise, a comment that
164 # contains a tag would end early, leaving some of the comment behind.
165
166 while True:
167 # keep finding comment start marks
168 start = value.find("<!--")
169
170 if start == -1:
171 break
172
173 # find a comment end mark beyond the start, otherwise stop
174 end = value.find("-->", start)
175
176 if end == -1:
177 break
178
179 value = f"{value[:start]}{value[end + 3:]}"
180
181 # remove tags using the same method
182 while True:
183 start = value.find("<")
184
185 if start == -1:
186 break
187
188 end = value.find(">", start)
189
190 if end == -1:
191 break
192
193 value = f"{value[:start]}{value[end + 1:]}"
194
195 # collapse spaces
196 value = " ".join(value.split())
197 return self.__class__(value).unescape()
198
199 @classmethod
200 def escape(cls, s: t.Any) -> "te.Self":
201 """Escape a string. Calls :func:`escape` and ensures that for
202 subclasses the correct type is returned.
203 """
204 rv = escape(s)
205
206 if rv.__class__ is not cls:
207 return cls(rv)
208
209 return rv # type: ignore[return-value]
210
211 __getitem__ = _simple_escaping_wrapper(str.__getitem__)
212 capitalize = _simple_escaping_wrapper(str.capitalize)
213 title = _simple_escaping_wrapper(str.title)
214 lower = _simple_escaping_wrapper(str.lower)
215 upper = _simple_escaping_wrapper(str.upper)
216 replace = _simple_escaping_wrapper(str.replace)
217 ljust = _simple_escaping_wrapper(str.ljust)
218 rjust = _simple_escaping_wrapper(str.rjust)
219 lstrip = _simple_escaping_wrapper(str.lstrip)
220 rstrip = _simple_escaping_wrapper(str.rstrip)
221 center = _simple_escaping_wrapper(str.center)
222 strip = _simple_escaping_wrapper(str.strip)
223 translate = _simple_escaping_wrapper(str.translate)
224 expandtabs = _simple_escaping_wrapper(str.expandtabs)
225 swapcase = _simple_escaping_wrapper(str.swapcase)
226 zfill = _simple_escaping_wrapper(str.zfill)
227 casefold = _simple_escaping_wrapper(str.casefold)
228
229 if sys.version_info >= (3, 9):
230 removeprefix = _simple_escaping_wrapper(str.removeprefix)
231 removesuffix = _simple_escaping_wrapper(str.removesuffix)
232
233 def partition(self, sep: str) -> t.Tuple["te.Self", "te.Self", "te.Self"]:
234 l, s, r = super().partition(self.escape(sep))
235 cls = self.__class__
236 return cls(l), cls(s), cls(r)
237
238 def rpartition(self, sep: str) -> t.Tuple["te.Self", "te.Self", "te.Self"]:
239 l, s, r = super().rpartition(self.escape(sep))
240 cls = self.__class__
241 return cls(l), cls(s), cls(r)
242
243 def format(self, *args: t.Any, **kwargs: t.Any) -> "te.Self":
244 formatter = EscapeFormatter(self.escape)
245 return self.__class__(formatter.vformat(self, args, kwargs))
246
247 def format_map( # type: ignore[override]
248 self, map: t.Mapping[str, t.Any]
249 ) -> "te.Self":
250 formatter = EscapeFormatter(self.escape)
251 return self.__class__(formatter.vformat(self, (), map))
252
253 def __html_format__(self, format_spec: str) -> "te.Self":
254 if format_spec:
255 raise ValueError("Unsupported format specification for Markup.")
256
257 return self
258
259
260class EscapeFormatter(string.Formatter):
261 __slots__ = ("escape",)
262
263 def __init__(self, escape: t.Callable[[t.Any], Markup]) -> None:
264 self.escape = escape
265 super().__init__()
266
267 def format_field(self, value: t.Any, format_spec: str) -> str:
268 if hasattr(value, "__html_format__"):
269 rv = value.__html_format__(format_spec)
270 elif hasattr(value, "__html__"):
271 if format_spec:
272 raise ValueError(
273 f"Format specifier {format_spec} given, but {type(value)} does not"
274 " define __html_format__. A class that defines __html__ must define"
275 " __html_format__ to work with format specifiers."
276 )
277 rv = value.__html__()
278 else:
279 # We need to make sure the format spec is str here as
280 # otherwise the wrong callback methods are invoked.
281 rv = string.Formatter.format_field(self, value, str(format_spec))
282 return str(self.escape(rv))
283
284
285_ListOrDict = t.TypeVar("_ListOrDict", list, dict)
286
287
288def _escape_argspec(
289 obj: _ListOrDict, iterable: t.Iterable[t.Any], escape: t.Callable[[t.Any], Markup]
290) -> _ListOrDict:
291 """Helper for various string-wrapped functions."""
292 for key, value in iterable:
293 if isinstance(value, str) or hasattr(value, "__html__"):
294 obj[key] = escape(value)
295
296 return obj
297
298
299class _MarkupEscapeHelper:
300 """Helper for :meth:`Markup.__mod__`."""
301
302 __slots__ = ("obj", "escape")
303
304 def __init__(self, obj: t.Any, escape: t.Callable[[t.Any], Markup]) -> None:
305 self.obj = obj
306 self.escape = escape
307
308 def __getitem__(self, item: t.Any) -> "te.Self":
309 return self.__class__(self.obj[item], self.escape)
310
311 def __str__(self) -> str:
312 return str(self.escape(self.obj))
313
314 def __repr__(self) -> str:
315 return str(self.escape(repr(self.obj)))
316
317 def __int__(self) -> int:
318 return int(self.obj)
319
320 def __float__(self) -> float:
321 return float(self.obj)
322
323
324# circular import
325try:
326 from ._speedups import escape as escape
327 from ._speedups import escape_silent as escape_silent
328 from ._speedups import soft_str as soft_str
329except ImportError:
330 from ._native import escape as escape
331 from ._native import escape_silent as escape_silent # noqa: F401
332 from ._native import soft_str as soft_str # noqa: F401