Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/django/utils/text.py: 18%
205 statements
« prev ^ index » next coverage.py v7.0.5, created at 2023-01-17 06:13 +0000
« prev ^ index » next coverage.py v7.0.5, created at 2023-01-17 06:13 +0000
1import gzip
2import re
3import secrets
4import unicodedata
5from gzip import GzipFile
6from gzip import compress as gzip_compress
7from io import BytesIO
9from django.core.exceptions import SuspiciousFileOperation
10from django.utils.functional import SimpleLazyObject, keep_lazy_text, lazy
11from django.utils.regex_helper import _lazy_re_compile
12from django.utils.translation import gettext as _
13from django.utils.translation import gettext_lazy, pgettext
16@keep_lazy_text
17def capfirst(x):
18 """Capitalize the first letter of a string."""
19 if not x:
20 return x
21 if not isinstance(x, str):
22 x = str(x)
23 return x[0].upper() + x[1:]
26# Set up regular expressions
27re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
28re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S)
29re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S)
30re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines
31re_camel_case = _lazy_re_compile(r"(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))")
34@keep_lazy_text
35def wrap(text, width):
36 """
37 A word-wrap function that preserves existing line breaks. Expects that
38 existing line breaks are posix newlines.
40 Preserve all white space except added line breaks consume the space on
41 which they break the line.
43 Don't wrap long words, thus the output text may have lines longer than
44 ``width``.
45 """
47 def _generator():
48 for line in text.splitlines(True): # True keeps trailing linebreaks
49 max_width = min((line.endswith("\n") and width + 1 or width), width)
50 while len(line) > max_width:
51 space = line[: max_width + 1].rfind(" ") + 1
52 if space == 0:
53 space = line.find(" ") + 1
54 if space == 0:
55 yield line
56 line = ""
57 break
58 yield "%s\n" % line[: space - 1]
59 line = line[space:]
60 max_width = min((line.endswith("\n") and width + 1 or width), width)
61 if line:
62 yield line
64 return "".join(_generator())
67class Truncator(SimpleLazyObject):
68 """
69 An object used to truncate text, either by characters or words.
70 """
72 def __init__(self, text):
73 super().__init__(lambda: str(text))
75 def add_truncation_text(self, text, truncate=None):
76 if truncate is None:
77 truncate = pgettext(
78 "String to return when truncating text", "%(truncated_text)s…"
79 )
80 if "%(truncated_text)s" in truncate:
81 return truncate % {"truncated_text": text}
82 # The truncation text didn't contain the %(truncated_text)s string
83 # replacement argument so just append it to the text.
84 if text.endswith(truncate):
85 # But don't append the truncation text if the current text already
86 # ends in this.
87 return text
88 return "%s%s" % (text, truncate)
90 def chars(self, num, truncate=None, html=False):
91 """
92 Return the text truncated to be no longer than the specified number
93 of characters.
95 `truncate` specifies what should be used to notify that the string has
96 been truncated, defaulting to a translatable string of an ellipsis.
97 """
98 self._setup()
99 length = int(num)
100 text = unicodedata.normalize("NFC", self._wrapped)
102 # Calculate the length to truncate to (max length - end_text length)
103 truncate_len = length
104 for char in self.add_truncation_text("", truncate):
105 if not unicodedata.combining(char):
106 truncate_len -= 1
107 if truncate_len == 0:
108 break
109 if html:
110 return self._truncate_html(length, truncate, text, truncate_len, False)
111 return self._text_chars(length, truncate, text, truncate_len)
113 def _text_chars(self, length, truncate, text, truncate_len):
114 """Truncate a string after a certain number of chars."""
115 s_len = 0
116 end_index = None
117 for i, char in enumerate(text):
118 if unicodedata.combining(char):
119 # Don't consider combining characters
120 # as adding to the string length
121 continue
122 s_len += 1
123 if end_index is None and s_len > truncate_len:
124 end_index = i
125 if s_len > length:
126 # Return the truncated string
127 return self.add_truncation_text(text[: end_index or 0], truncate)
129 # Return the original string since no truncation was necessary
130 return text
132 def words(self, num, truncate=None, html=False):
133 """
134 Truncate a string after a certain number of words. `truncate` specifies
135 what should be used to notify that the string has been truncated,
136 defaulting to ellipsis.
137 """
138 self._setup()
139 length = int(num)
140 if html:
141 return self._truncate_html(length, truncate, self._wrapped, length, True)
142 return self._text_words(length, truncate)
144 def _text_words(self, length, truncate):
145 """
146 Truncate a string after a certain number of words.
148 Strip newlines in the string.
149 """
150 words = self._wrapped.split()
151 if len(words) > length:
152 words = words[:length]
153 return self.add_truncation_text(" ".join(words), truncate)
154 return " ".join(words)
156 def _truncate_html(self, length, truncate, text, truncate_len, words):
157 """
158 Truncate HTML to a certain number of chars (not counting tags and
159 comments), or, if words is True, then to a certain number of words.
160 Close opened tags if they were correctly closed in the given HTML.
162 Preserve newlines in the HTML.
163 """
164 if words and length <= 0:
165 return ""
167 html4_singlets = (
168 "br",
169 "col",
170 "link",
171 "base",
172 "img",
173 "param",
174 "area",
175 "hr",
176 "input",
177 )
179 # Count non-HTML chars/words and keep note of open tags
180 pos = 0
181 end_text_pos = 0
182 current_len = 0
183 open_tags = []
185 regex = re_words if words else re_chars
187 while current_len <= length:
188 m = regex.search(text, pos)
189 if not m:
190 # Checked through whole string
191 break
192 pos = m.end(0)
193 if m[1]:
194 # It's an actual non-HTML word or char
195 current_len += 1
196 if current_len == truncate_len:
197 end_text_pos = pos
198 continue
199 # Check for tag
200 tag = re_tag.match(m[0])
201 if not tag or current_len >= truncate_len:
202 # Don't worry about non tags or tags after our truncate point
203 continue
204 closing_tag, tagname, self_closing = tag.groups()
205 # Element names are always case-insensitive
206 tagname = tagname.lower()
207 if self_closing or tagname in html4_singlets:
208 pass
209 elif closing_tag:
210 # Check for match in open tags list
211 try:
212 i = open_tags.index(tagname)
213 except ValueError:
214 pass
215 else:
216 # SGML: An end tag closes, back to the matching start tag,
217 # all unclosed intervening start tags with omitted end tags
218 open_tags = open_tags[i + 1 :]
219 else:
220 # Add it to the start of the open tags list
221 open_tags.insert(0, tagname)
223 if current_len <= length:
224 return text
225 out = text[:end_text_pos]
226 truncate_text = self.add_truncation_text("", truncate)
227 if truncate_text:
228 out += truncate_text
229 # Close any tags still open
230 for tag in open_tags:
231 out += "</%s>" % tag
232 # Return string
233 return out
236@keep_lazy_text
237def get_valid_filename(name):
238 """
239 Return the given string converted to a string that can be used for a clean
240 filename. Remove leading and trailing spaces; convert other spaces to
241 underscores; and remove anything that is not an alphanumeric, dash,
242 underscore, or dot.
243 >>> get_valid_filename("john's portrait in 2004.jpg")
244 'johns_portrait_in_2004.jpg'
245 """
246 s = str(name).strip().replace(" ", "_")
247 s = re.sub(r"(?u)[^-\w.]", "", s)
248 if s in {"", ".", ".."}:
249 raise SuspiciousFileOperation("Could not derive file name from '%s'" % name)
250 return s
253@keep_lazy_text
254def get_text_list(list_, last_word=gettext_lazy("or")):
255 """
256 >>> get_text_list(['a', 'b', 'c', 'd'])
257 'a, b, c or d'
258 >>> get_text_list(['a', 'b', 'c'], 'and')
259 'a, b and c'
260 >>> get_text_list(['a', 'b'], 'and')
261 'a and b'
262 >>> get_text_list(['a'])
263 'a'
264 >>> get_text_list([])
265 ''
266 """
267 if not list_:
268 return ""
269 if len(list_) == 1:
270 return str(list_[0])
271 return "%s %s %s" % (
272 # Translators: This string is used as a separator between list elements
273 _(", ").join(str(i) for i in list_[:-1]),
274 str(last_word),
275 str(list_[-1]),
276 )
279@keep_lazy_text
280def normalize_newlines(text):
281 """Normalize CRLF and CR newlines to just LF."""
282 return re_newlines.sub("\n", str(text))
285@keep_lazy_text
286def phone2numeric(phone):
287 """Convert a phone number with letters into its numeric equivalent."""
288 char2number = {
289 "a": "2",
290 "b": "2",
291 "c": "2",
292 "d": "3",
293 "e": "3",
294 "f": "3",
295 "g": "4",
296 "h": "4",
297 "i": "4",
298 "j": "5",
299 "k": "5",
300 "l": "5",
301 "m": "6",
302 "n": "6",
303 "o": "6",
304 "p": "7",
305 "q": "7",
306 "r": "7",
307 "s": "7",
308 "t": "8",
309 "u": "8",
310 "v": "8",
311 "w": "9",
312 "x": "9",
313 "y": "9",
314 "z": "9",
315 }
316 return "".join(char2number.get(c, c) for c in phone.lower())
319def _get_random_filename(max_random_bytes):
320 return b"a" * secrets.randbelow(max_random_bytes)
323def compress_string(s, *, max_random_bytes=None):
324 compressed_data = gzip_compress(s, compresslevel=6, mtime=0)
326 if not max_random_bytes:
327 return compressed_data
329 compressed_view = memoryview(compressed_data)
330 header = bytearray(compressed_view[:10])
331 header[3] = gzip.FNAME
333 filename = _get_random_filename(max_random_bytes) + b"\x00"
335 return bytes(header) + filename + compressed_view[10:]
338class StreamingBuffer(BytesIO):
339 def read(self):
340 ret = self.getvalue()
341 self.seek(0)
342 self.truncate()
343 return ret
346# Like compress_string, but for iterators of strings.
347def compress_sequence(sequence, *, max_random_bytes=None):
348 buf = StreamingBuffer()
349 filename = _get_random_filename(max_random_bytes) if max_random_bytes else None
350 with GzipFile(
351 filename=filename, mode="wb", compresslevel=6, fileobj=buf, mtime=0
352 ) as zfile:
353 # Output headers...
354 yield buf.read()
355 for item in sequence:
356 zfile.write(item)
357 data = buf.read()
358 if data:
359 yield data
360 yield buf.read()
363# Expression to match some_token and some_token="with spaces" (and similarly
364# for single-quoted strings).
365smart_split_re = _lazy_re_compile(
366 r"""
367 ((?:
368 [^\s'"]*
369 (?:
370 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*')
371 [^\s'"]*
372 )+
373 ) | \S+)
374""",
375 re.VERBOSE,
376)
379def smart_split(text):
380 r"""
381 Generator that splits a string by spaces, leaving quoted phrases together.
382 Supports both single and double quotes, and supports escaping quotes with
383 backslashes. In the output, strings will keep their initial and trailing
384 quote marks and escaped quotes will remain escaped (the results can then
385 be further processed with unescape_string_literal()).
387 >>> list(smart_split(r'This is "a person\'s" test.'))
388 ['This', 'is', '"a person\\\'s"', 'test.']
389 >>> list(smart_split(r"Another 'person\'s' test."))
390 ['Another', "'person\\'s'", 'test.']
391 >>> list(smart_split(r'A "\"funky\" style" test.'))
392 ['A', '"\\"funky\\" style"', 'test.']
393 """
394 for bit in smart_split_re.finditer(str(text)):
395 yield bit[0]
398@keep_lazy_text
399def unescape_string_literal(s):
400 r"""
401 Convert quoted string literals to unquoted strings with escaped quotes and
402 backslashes unquoted::
404 >>> unescape_string_literal('"abc"')
405 'abc'
406 >>> unescape_string_literal("'abc'")
407 'abc'
408 >>> unescape_string_literal('"a \"bc\""')
409 'a "bc"'
410 >>> unescape_string_literal("'\'ab\' c'")
411 "'ab' c"
412 """
413 if not s or s[0] not in "\"'" or s[-1] != s[0]:
414 raise ValueError("Not a string literal: %r" % s)
415 quote = s[0]
416 return s[1:-1].replace(r"\%s" % quote, quote).replace(r"\\", "\\")
419@keep_lazy_text
420def slugify(value, allow_unicode=False):
421 """
422 Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
423 dashes to single dashes. Remove characters that aren't alphanumerics,
424 underscores, or hyphens. Convert to lowercase. Also strip leading and
425 trailing whitespace, dashes, and underscores.
426 """
427 value = str(value)
428 if allow_unicode:
429 value = unicodedata.normalize("NFKC", value)
430 else:
431 value = (
432 unicodedata.normalize("NFKD", value)
433 .encode("ascii", "ignore")
434 .decode("ascii")
435 )
436 value = re.sub(r"[^\w\s-]", "", value.lower())
437 return re.sub(r"[-\s]+", "-", value).strip("-_")
440def camel_case_to_spaces(value):
441 """
442 Split CamelCase and convert to lowercase. Strip surrounding whitespace.
443 """
444 return re_camel_case.sub(r" \1", value).strip().lower()
447def _format_lazy(format_string, *args, **kwargs):
448 """
449 Apply str.format() on 'format_string' where format_string, args,
450 and/or kwargs might be lazy.
451 """
452 return format_string.format(*args, **kwargs)
455format_lazy = lazy(_format_lazy, str)