1from __future__ import annotations
2
3import collections.abc as cabc
4import string
5import typing as t
6
7try:
8 from ._speedups import _escape_inner
9except ImportError:
10 from ._native import _escape_inner
11
12if t.TYPE_CHECKING:
13 import typing_extensions as te
14
15
16class _HasHTML(t.Protocol):
17 def __html__(self, /) -> str: ...
18
19
20class _TPEscape(t.Protocol):
21 def __call__(self, s: t.Any, /) -> Markup: ...
22
23
24def escape(s: t.Any, /) -> Markup:
25 """Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in
26 the string with HTML-safe sequences. Use this if you need to display
27 text that might contain such characters in HTML.
28
29 If the object has an ``__html__`` method, it is called and the
30 return value is assumed to already be safe for HTML.
31
32 :param s: An object to be converted to a string and escaped.
33 :return: A :class:`Markup` string with the escaped text.
34 """
35 # If the object is already a plain string, skip __html__ check and string
36 # conversion. This is the most common use case.
37 # Use type(s) instead of s.__class__ because a proxy object may be reporting
38 # the __class__ of the proxied value.
39 if type(s) is str:
40 return Markup(_escape_inner(s))
41
42 if hasattr(s, "__html__"):
43 return Markup(s.__html__())
44
45 return Markup(_escape_inner(str(s)))
46
47
48def escape_silent(s: t.Any | None, /) -> Markup:
49 """Like :func:`escape` but treats ``None`` as the empty string.
50 Useful with optional values, as otherwise you get the string
51 ``'None'`` when the value is ``None``.
52
53 >>> escape(None)
54 Markup('None')
55 >>> escape_silent(None)
56 Markup('')
57 """
58 if s is None:
59 return Markup()
60
61 return escape(s)
62
63
64def soft_str(s: t.Any, /) -> str:
65 """Convert an object to a string if it isn't already. This preserves
66 a :class:`Markup` string rather than converting it back to a basic
67 string, so it will still be marked as safe and won't be escaped
68 again.
69
70 >>> value = escape("<User 1>")
71 >>> value
72 Markup('<User 1>')
73 >>> escape(str(value))
74 Markup('&lt;User 1&gt;')
75 >>> escape(soft_str(value))
76 Markup('<User 1>')
77 """
78 if not isinstance(s, str):
79 return str(s)
80
81 return s
82
83
84class Markup(str):
85 """A string that is ready to be safely inserted into an HTML or XML
86 document, either because it was escaped or because it was marked
87 safe.
88
89 Passing an object to the constructor converts it to text and wraps
90 it to mark it safe without escaping. To escape the text, use the
91 :meth:`escape` class method instead.
92
93 >>> Markup("Hello, <em>World</em>!")
94 Markup('Hello, <em>World</em>!')
95 >>> Markup(42)
96 Markup('42')
97 >>> Markup.escape("Hello, <em>World</em>!")
98 Markup('Hello <em>World</em>!')
99
100 This implements the ``__html__()`` interface that some frameworks
101 use. Passing an object that implements ``__html__()`` will wrap the
102 output of that method, marking it safe.
103
104 >>> class Foo:
105 ... def __html__(self):
106 ... return '<a href="/foo">foo</a>'
107 ...
108 >>> Markup(Foo())
109 Markup('<a href="/foo">foo</a>')
110
111 This is a subclass of :class:`str`. It has the same methods, but
112 escapes their arguments and returns a ``Markup`` instance.
113
114 >>> Markup("<em>%s</em>") % ("foo & bar",)
115 Markup('<em>foo & bar</em>')
116 >>> Markup("<em>Hello</em> ") + "<foo>"
117 Markup('<em>Hello</em> <foo>')
118 """
119
120 __slots__ = ()
121
122 def __new__(
123 cls, object: t.Any = "", encoding: str | None = None, errors: str = "strict"
124 ) -> te.Self:
125 if hasattr(object, "__html__"):
126 object = object.__html__()
127
128 if encoding is None:
129 return super().__new__(cls, object)
130
131 return super().__new__(cls, object, encoding, errors)
132
133 def __html__(self, /) -> te.Self:
134 return self
135
136 def __add__(self, value: str | _HasHTML, /) -> te.Self:
137 if isinstance(value, str) or hasattr(value, "__html__"):
138 return self.__class__(super().__add__(self.escape(value)))
139
140 return NotImplemented
141
142 def __radd__(self, value: str | _HasHTML, /) -> te.Self:
143 if isinstance(value, str) or hasattr(value, "__html__"):
144 return self.escape(value).__add__(self)
145
146 return NotImplemented
147
148 def __mul__(self, value: t.SupportsIndex, /) -> te.Self:
149 return self.__class__(super().__mul__(value))
150
151 def __rmul__(self, value: t.SupportsIndex, /) -> te.Self:
152 return self.__class__(super().__mul__(value))
153
154 def __mod__(self, value: t.Any, /) -> te.Self:
155 if isinstance(value, tuple):
156 # a tuple of arguments, each wrapped
157 value = tuple(_MarkupEscapeHelper(x, self.escape) for x in value)
158 elif hasattr(type(value), "__getitem__") and not isinstance(value, str):
159 # a mapping of arguments, wrapped
160 value = _MarkupEscapeHelper(value, self.escape)
161 else:
162 # a single argument, wrapped with the helper and a tuple
163 value = (_MarkupEscapeHelper(value, self.escape),)
164
165 return self.__class__(super().__mod__(value))
166
167 def __repr__(self, /) -> str:
168 return f"{self.__class__.__name__}({super().__repr__()})"
169
170 def join(self, iterable: cabc.Iterable[str | _HasHTML], /) -> te.Self:
171 return self.__class__(super().join(map(self.escape, iterable)))
172
173 def split( # type: ignore[override]
174 self, /, sep: str | None = None, maxsplit: t.SupportsIndex = -1
175 ) -> list[te.Self]:
176 return [self.__class__(v) for v in super().split(sep, maxsplit)]
177
178 def rsplit( # type: ignore[override]
179 self, /, sep: str | None = None, maxsplit: t.SupportsIndex = -1
180 ) -> list[te.Self]:
181 return [self.__class__(v) for v in super().rsplit(sep, maxsplit)]
182
183 def splitlines( # type: ignore[override]
184 self, /, keepends: bool = False
185 ) -> list[te.Self]:
186 return [self.__class__(v) for v in super().splitlines(keepends)]
187
188 def unescape(self, /) -> str:
189 """Convert escaped markup back into a text string. This replaces
190 HTML entities with the characters they represent.
191
192 >>> Markup("Main » <em>About</em>").unescape()
193 'Main » <em>About</em>'
194 """
195 from html import unescape
196
197 return unescape(str(self))
198
199 def striptags(self, /) -> str:
200 """:meth:`unescape` the markup, remove tags, and normalize
201 whitespace to single spaces.
202
203 >>> Markup("Main »\t<em>About</em>").striptags()
204 'Main » About'
205 """
206 value = str(self)
207
208 # Look for comments then tags separately. Otherwise, a comment that
209 # contains a tag would end early, leaving some of the comment behind.
210
211 # keep finding comment start marks
212 while (start := value.find("<!--")) != -1:
213 # find a comment end mark beyond the start, otherwise stop
214 if (end := value.find("-->", start)) == -1:
215 break
216
217 value = f"{value[:start]}{value[end + 3:]}"
218
219 # remove tags using the same method
220 while (start := value.find("<")) != -1:
221 if (end := value.find(">", start)) == -1:
222 break
223
224 value = f"{value[:start]}{value[end + 1:]}"
225
226 # collapse spaces
227 value = " ".join(value.split())
228 return self.__class__(value).unescape()
229
230 @classmethod
231 def escape(cls, s: t.Any, /) -> te.Self:
232 """Escape a string. Calls :func:`escape` and ensures that for
233 subclasses the correct type is returned.
234 """
235 rv = escape(s)
236
237 if rv.__class__ is not cls:
238 return cls(rv)
239
240 return rv # type: ignore[return-value]
241
242 def __getitem__(self, key: t.SupportsIndex | slice, /) -> te.Self:
243 return self.__class__(super().__getitem__(key))
244
245 def capitalize(self, /) -> te.Self:
246 return self.__class__(super().capitalize())
247
248 def title(self, /) -> te.Self:
249 return self.__class__(super().title())
250
251 def lower(self, /) -> te.Self:
252 return self.__class__(super().lower())
253
254 def upper(self, /) -> te.Self:
255 return self.__class__(super().upper())
256
257 def replace(self, old: str, new: str, count: t.SupportsIndex = -1, /) -> te.Self:
258 return self.__class__(super().replace(old, self.escape(new), count))
259
260 def ljust(self, width: t.SupportsIndex, fillchar: str = " ", /) -> te.Self:
261 return self.__class__(super().ljust(width, self.escape(fillchar)))
262
263 def rjust(self, width: t.SupportsIndex, fillchar: str = " ", /) -> te.Self:
264 return self.__class__(super().rjust(width, self.escape(fillchar)))
265
266 def lstrip(self, chars: str | None = None, /) -> te.Self:
267 return self.__class__(super().lstrip(chars))
268
269 def rstrip(self, chars: str | None = None, /) -> te.Self:
270 return self.__class__(super().rstrip(chars))
271
272 def center(self, width: t.SupportsIndex, fillchar: str = " ", /) -> te.Self:
273 return self.__class__(super().center(width, self.escape(fillchar)))
274
275 def strip(self, chars: str | None = None, /) -> te.Self:
276 return self.__class__(super().strip(chars))
277
278 def translate(
279 self,
280 table: cabc.Mapping[int, str | int | None], # type: ignore[override]
281 /,
282 ) -> str:
283 return self.__class__(super().translate(table))
284
285 def expandtabs(self, /, tabsize: t.SupportsIndex = 8) -> te.Self:
286 return self.__class__(super().expandtabs(tabsize))
287
288 def swapcase(self, /) -> te.Self:
289 return self.__class__(super().swapcase())
290
291 def zfill(self, width: t.SupportsIndex, /) -> te.Self:
292 return self.__class__(super().zfill(width))
293
294 def casefold(self, /) -> te.Self:
295 return self.__class__(super().casefold())
296
297 def removeprefix(self, prefix: str, /) -> te.Self:
298 return self.__class__(super().removeprefix(prefix))
299
300 def removesuffix(self, suffix: str) -> te.Self:
301 return self.__class__(super().removesuffix(suffix))
302
303 def partition(self, sep: str, /) -> tuple[te.Self, te.Self, te.Self]:
304 left, sep, right = super().partition(sep)
305 cls = self.__class__
306 return cls(left), cls(sep), cls(right)
307
308 def rpartition(self, sep: str, /) -> tuple[te.Self, te.Self, te.Self]:
309 left, sep, right = super().rpartition(sep)
310 cls = self.__class__
311 return cls(left), cls(sep), cls(right)
312
313 def format(self, *args: t.Any, **kwargs: t.Any) -> te.Self:
314 formatter = EscapeFormatter(self.escape)
315 return self.__class__(formatter.vformat(self, args, kwargs))
316
317 def format_map(
318 self,
319 mapping: cabc.Mapping[str, t.Any], # type: ignore[override]
320 /,
321 ) -> te.Self:
322 formatter = EscapeFormatter(self.escape)
323 return self.__class__(formatter.vformat(self, (), mapping))
324
325 def __html_format__(self, format_spec: str, /) -> te.Self:
326 if format_spec:
327 raise ValueError("Unsupported format specification for Markup.")
328
329 return self
330
331
332class EscapeFormatter(string.Formatter):
333 __slots__ = ("escape",)
334
335 def __init__(self, escape: _TPEscape) -> None:
336 self.escape: _TPEscape = escape
337 super().__init__()
338
339 def format_field(self, value: t.Any, format_spec: str) -> str:
340 if hasattr(value, "__html_format__"):
341 rv = value.__html_format__(format_spec)
342 elif hasattr(value, "__html__"):
343 if format_spec:
344 raise ValueError(
345 f"Format specifier {format_spec} given, but {type(value)} does not"
346 " define __html_format__. A class that defines __html__ must define"
347 " __html_format__ to work with format specifiers."
348 )
349 rv = value.__html__()
350 else:
351 # We need to make sure the format spec is str here as
352 # otherwise the wrong callback methods are invoked.
353 rv = super().format_field(value, str(format_spec))
354 return str(self.escape(rv))
355
356
357class _MarkupEscapeHelper:
358 """Helper for :meth:`Markup.__mod__`."""
359
360 __slots__ = ("obj", "escape")
361
362 def __init__(self, obj: t.Any, escape: _TPEscape) -> None:
363 self.obj: t.Any = obj
364 self.escape: _TPEscape = escape
365
366 def __getitem__(self, key: t.Any, /) -> te.Self:
367 return self.__class__(self.obj[key], self.escape)
368
369 def __str__(self, /) -> str:
370 return str(self.escape(self.obj))
371
372 def __repr__(self, /) -> str:
373 return str(self.escape(repr(self.obj)))
374
375 def __int__(self, /) -> int:
376 return int(self.obj)
377
378 def __float__(self, /) -> float:
379 return float(self.obj)
380
381
382def __getattr__(name: str) -> t.Any:
383 if name == "__version__":
384 import importlib.metadata
385 import warnings
386
387 warnings.warn(
388 "The '__version__' attribute is deprecated and will be removed in"
389 " MarkupSafe 3.1. Use feature detection, or"
390 ' `importlib.metadata.version("markupsafe")`, instead.',
391 stacklevel=2,
392 )
393 return importlib.metadata.version("markupsafe")
394
395 raise AttributeError(name)