Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/django/utils/text.py: 51%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

239 statements  

1import gzip 

2import re 

3import secrets 

4import unicodedata 

5from collections import deque 

6from gzip import GzipFile 

7from gzip import compress as gzip_compress 

8from html import escape 

9from html.parser import HTMLParser 

10from io import BytesIO 

11 

12from django.core.exceptions import SuspiciousFileOperation 

13from django.utils.functional import ( 

14 SimpleLazyObject, 

15 cached_property, 

16 keep_lazy_text, 

17 lazy, 

18) 

19from django.utils.regex_helper import _lazy_re_compile 

20from django.utils.translation import gettext as _ 

21from django.utils.translation import gettext_lazy, pgettext 

22 

23 

24@keep_lazy_text 

25def capfirst(x): 

26 """Capitalize the first letter of a string.""" 

27 if not x: 

28 return x 

29 if not isinstance(x, str): 

30 x = str(x) 

31 return x[0].upper() + x[1:] 

32 

33 

34# Set up regular expressions 

35re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines 

36re_camel_case = _lazy_re_compile(r"(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))") 

37 

38 

39@keep_lazy_text 

40def wrap(text, width): 

41 """ 

42 A word-wrap function that preserves existing line breaks. Expects that 

43 existing line breaks are posix newlines. 

44 

45 Preserve all white space except added line breaks consume the space on 

46 which they break the line. 

47 

48 Don't wrap long words, thus the output text may have lines longer than 

49 ``width``. 

50 """ 

51 

52 def _generator(): 

53 for line in text.splitlines(True): # True keeps trailing linebreaks 

54 max_width = min((line.endswith("\n") and width + 1 or width), width) 

55 while len(line) > max_width: 

56 space = line[: max_width + 1].rfind(" ") + 1 

57 if space == 0: 

58 space = line.find(" ") + 1 

59 if space == 0: 

60 yield line 

61 line = "" 

62 break 

63 yield "%s\n" % line[: space - 1] 

64 line = line[space:] 

65 max_width = min((line.endswith("\n") and width + 1 or width), width) 

66 if line: 

67 yield line 

68 

69 return "".join(_generator()) 

70 

71 

72def add_truncation_text(text, truncate=None): 

73 if truncate is None: 

74 truncate = pgettext( 

75 "String to return when truncating text", "%(truncated_text)s…" 

76 ) 

77 if "%(truncated_text)s" in truncate: 

78 return truncate % {"truncated_text": text} 

79 # The truncation text didn't contain the %(truncated_text)s string 

80 # replacement argument so just append it to the text. 

81 if text.endswith(truncate): 

82 # But don't append the truncation text if the current text already ends 

83 # in this. 

84 return text 

85 return f"{text}{truncate}" 

86 

87 

88def calculate_truncate_chars_length(length, replacement): 

89 truncate_len = length 

90 for char in add_truncation_text("", replacement): 

91 if not unicodedata.combining(char): 

92 truncate_len -= 1 

93 if truncate_len == 0: 

94 break 

95 return truncate_len 

96 

97 

98class TruncateHTMLParser(HTMLParser): 

99 class TruncationCompleted(Exception): 

100 pass 

101 

102 def __init__(self, *, length, replacement, convert_charrefs=True): 

103 super().__init__(convert_charrefs=convert_charrefs) 

104 self.tags = deque() 

105 self.output = "" 

106 self.remaining = length 

107 self.replacement = replacement 

108 

109 @cached_property 

110 def void_elements(self): 

111 from django.utils.html import VOID_ELEMENTS 

112 

113 return VOID_ELEMENTS 

114 

115 def handle_startendtag(self, tag, attrs): 

116 self.handle_starttag(tag, attrs) 

117 if tag not in self.void_elements: 

118 self.handle_endtag(tag) 

119 

120 def handle_starttag(self, tag, attrs): 

121 self.output += self.get_starttag_text() 

122 if tag not in self.void_elements: 

123 self.tags.appendleft(tag) 

124 

125 def handle_endtag(self, tag): 

126 if tag not in self.void_elements: 

127 self.output += f"</{tag}>" 

128 try: 

129 self.tags.remove(tag) 

130 except ValueError: 

131 pass 

132 

133 def handle_data(self, data): 

134 data, output = self.process(data) 

135 data_len = len(data) 

136 if self.remaining < data_len: 

137 self.remaining = 0 

138 self.output += add_truncation_text(output, self.replacement) 

139 raise self.TruncationCompleted 

140 self.remaining -= data_len 

141 self.output += output 

142 

143 def feed(self, data): 

144 try: 

145 super().feed(data) 

146 except self.TruncationCompleted: 

147 self.output += "".join([f"</{tag}>" for tag in self.tags]) 

148 self.tags.clear() 

149 self.reset() 

150 else: 

151 # No data was handled. 

152 self.reset() 

153 

154 

155class TruncateCharsHTMLParser(TruncateHTMLParser): 

156 def __init__(self, *, length, replacement, convert_charrefs=True): 

157 self.length = length 

158 self.processed_chars = 0 

159 super().__init__( 

160 length=calculate_truncate_chars_length(length, replacement), 

161 replacement=replacement, 

162 convert_charrefs=convert_charrefs, 

163 ) 

164 

165 def process(self, data): 

166 self.processed_chars += len(data) 

167 if (self.processed_chars == self.length) and ( 

168 len(self.output) + len(data) == len(self.rawdata) 

169 ): 

170 self.output += data 

171 raise self.TruncationCompleted 

172 output = escape("".join(data[: self.remaining])) 

173 return data, output 

174 

175 

176class TruncateWordsHTMLParser(TruncateHTMLParser): 

177 def process(self, data): 

178 data = re.split(r"(?<=\S)\s+(?=\S)", data) 

179 output = escape(" ".join(data[: self.remaining])) 

180 return data, output 

181 

182 

183class Truncator(SimpleLazyObject): 

184 """ 

185 An object used to truncate text, either by characters or words. 

186 

187 When truncating HTML text (either chars or words), input will be limited to 

188 at most `MAX_LENGTH_HTML` characters. 

189 """ 

190 

191 # 5 million characters are approximately 4000 text pages or 3 web pages. 

192 MAX_LENGTH_HTML = 5_000_000 

193 

194 def __init__(self, text): 

195 super().__init__(lambda: str(text)) 

196 

197 def chars(self, num, truncate=None, html=False): 

198 """ 

199 Return the text truncated to be no longer than the specified number 

200 of characters. 

201 

202 `truncate` specifies what should be used to notify that the string has 

203 been truncated, defaulting to a translatable string of an ellipsis. 

204 """ 

205 self._setup() 

206 length = int(num) 

207 if length <= 0: 

208 return "" 

209 text = unicodedata.normalize("NFC", self._wrapped) 

210 

211 if html: 

212 parser = TruncateCharsHTMLParser(length=length, replacement=truncate) 

213 parser.feed(text) 

214 parser.close() 

215 return parser.output 

216 return self._text_chars(length, truncate, text) 

217 

218 def _text_chars(self, length, truncate, text): 

219 """Truncate a string after a certain number of chars.""" 

220 truncate_len = calculate_truncate_chars_length(length, truncate) 

221 s_len = 0 

222 end_index = None 

223 for i, char in enumerate(text): 

224 if unicodedata.combining(char): 

225 # Don't consider combining characters 

226 # as adding to the string length 

227 continue 

228 s_len += 1 

229 if end_index is None and s_len > truncate_len: 

230 end_index = i 

231 if s_len > length: 

232 # Return the truncated string 

233 return add_truncation_text(text[: end_index or 0], truncate) 

234 

235 # Return the original string since no truncation was necessary 

236 return text 

237 

238 def words(self, num, truncate=None, html=False): 

239 """ 

240 Truncate a string after a certain number of words. `truncate` specifies 

241 what should be used to notify that the string has been truncated, 

242 defaulting to ellipsis. 

243 """ 

244 self._setup() 

245 length = int(num) 

246 if length <= 0: 

247 return "" 

248 if html: 

249 parser = TruncateWordsHTMLParser(length=length, replacement=truncate) 

250 parser.feed(self._wrapped) 

251 parser.close() 

252 return parser.output 

253 return self._text_words(length, truncate) 

254 

255 def _text_words(self, length, truncate): 

256 """ 

257 Truncate a string after a certain number of words. 

258 

259 Strip newlines in the string. 

260 """ 

261 words = self._wrapped.split() 

262 if len(words) > length: 

263 words = words[:length] 

264 return add_truncation_text(" ".join(words), truncate) 

265 return " ".join(words) 

266 

267 

268@keep_lazy_text 

269def get_valid_filename(name): 

270 """ 

271 Return the given string converted to a string that can be used for a clean 

272 filename. Remove leading and trailing spaces; convert other spaces to 

273 underscores; and remove anything that is not an alphanumeric, dash, 

274 underscore, or dot. 

275 >>> get_valid_filename("john's portrait in 2004.jpg") 

276 'johns_portrait_in_2004.jpg' 

277 """ 

278 s = str(name).strip().replace(" ", "_") 

279 s = re.sub(r"(?u)[^-\w.]", "", s) 

280 if s in {"", ".", ".."}: 

281 raise SuspiciousFileOperation("Could not derive file name from '%s'" % name) 

282 return s 

283 

284 

285@keep_lazy_text 

286def get_text_list(list_, last_word=gettext_lazy("or")): 

287 """ 

288 >>> get_text_list(['a', 'b', 'c', 'd']) 

289 'a, b, c or d' 

290 >>> get_text_list(['a', 'b', 'c'], 'and') 

291 'a, b and c' 

292 >>> get_text_list(['a', 'b'], 'and') 

293 'a and b' 

294 >>> get_text_list(['a']) 

295 'a' 

296 >>> get_text_list([]) 

297 '' 

298 """ 

299 if not list_: 

300 return "" 

301 if len(list_) == 1: 

302 return str(list_[0]) 

303 return "%s %s %s" % ( 

304 # Translators: This string is used as a separator between list elements 

305 _(", ").join(str(i) for i in list_[:-1]), 

306 str(last_word), 

307 str(list_[-1]), 

308 ) 

309 

310 

311@keep_lazy_text 

312def normalize_newlines(text): 

313 """Normalize CRLF and CR newlines to just LF.""" 

314 return re_newlines.sub("\n", str(text)) 

315 

316 

317@keep_lazy_text 

318def phone2numeric(phone): 

319 """Convert a phone number with letters into its numeric equivalent.""" 

320 char2number = { 

321 "a": "2", 

322 "b": "2", 

323 "c": "2", 

324 "d": "3", 

325 "e": "3", 

326 "f": "3", 

327 "g": "4", 

328 "h": "4", 

329 "i": "4", 

330 "j": "5", 

331 "k": "5", 

332 "l": "5", 

333 "m": "6", 

334 "n": "6", 

335 "o": "6", 

336 "p": "7", 

337 "q": "7", 

338 "r": "7", 

339 "s": "7", 

340 "t": "8", 

341 "u": "8", 

342 "v": "8", 

343 "w": "9", 

344 "x": "9", 

345 "y": "9", 

346 "z": "9", 

347 } 

348 return "".join(char2number.get(c, c) for c in phone.lower()) 

349 

350 

351def _get_random_filename(max_random_bytes): 

352 return b"a" * secrets.randbelow(max_random_bytes) 

353 

354 

355def compress_string(s, *, max_random_bytes=None): 

356 compressed_data = gzip_compress(s, compresslevel=6, mtime=0) 

357 

358 if not max_random_bytes: 

359 return compressed_data 

360 

361 compressed_view = memoryview(compressed_data) 

362 header = bytearray(compressed_view[:10]) 

363 header[3] = gzip.FNAME 

364 

365 filename = _get_random_filename(max_random_bytes) + b"\x00" 

366 

367 return bytes(header) + filename + compressed_view[10:] 

368 

369 

370class StreamingBuffer(BytesIO): 

371 def read(self): 

372 ret = self.getvalue() 

373 self.seek(0) 

374 self.truncate() 

375 return ret 

376 

377 

378# Like compress_string, but for iterators of strings. 

379def compress_sequence(sequence, *, max_random_bytes=None): 

380 buf = StreamingBuffer() 

381 filename = _get_random_filename(max_random_bytes) if max_random_bytes else None 

382 with GzipFile( 

383 filename=filename, mode="wb", compresslevel=6, fileobj=buf, mtime=0 

384 ) as zfile: 

385 # Output headers... 

386 yield buf.read() 

387 for item in sequence: 

388 zfile.write(item) 

389 data = buf.read() 

390 if data: 

391 yield data 

392 yield buf.read() 

393 

394 

395# Expression to match some_token and some_token="with spaces" (and similarly 

396# for single-quoted strings). 

397smart_split_re = _lazy_re_compile( 

398 r""" 

399 ((?: 

400 [^\s'"]* 

401 (?: 

402 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*') 

403 [^\s'"]* 

404 )+ 

405 ) | \S+) 

406""", 

407 re.VERBOSE, 

408) 

409 

410 

411def smart_split(text): 

412 r""" 

413 Generator that splits a string by spaces, leaving quoted phrases together. 

414 Supports both single and double quotes, and supports escaping quotes with 

415 backslashes. In the output, strings will keep their initial and trailing 

416 quote marks and escaped quotes will remain escaped (the results can then 

417 be further processed with unescape_string_literal()). 

418 

419 >>> list(smart_split(r'This is "a person\'s" test.')) 

420 ['This', 'is', '"a person\\\'s"', 'test.'] 

421 >>> list(smart_split(r"Another 'person\'s' test.")) 

422 ['Another', "'person\\'s'", 'test.'] 

423 >>> list(smart_split(r'A "\"funky\" style" test.')) 

424 ['A', '"\\"funky\\" style"', 'test.'] 

425 """ 

426 for bit in smart_split_re.finditer(str(text)): 

427 yield bit[0] 

428 

429 

430@keep_lazy_text 

431def unescape_string_literal(s): 

432 r""" 

433 Convert quoted string literals to unquoted strings with escaped quotes and 

434 backslashes unquoted:: 

435 

436 >>> unescape_string_literal('"abc"') 

437 'abc' 

438 >>> unescape_string_literal("'abc'") 

439 'abc' 

440 >>> unescape_string_literal('"a \"bc\""') 

441 'a "bc"' 

442 >>> unescape_string_literal("'\'ab\' c'") 

443 "'ab' c" 

444 """ 

445 if not s or s[0] not in "\"'" or s[-1] != s[0]: 

446 raise ValueError("Not a string literal: %r" % s) 

447 quote = s[0] 

448 return s[1:-1].replace(r"\%s" % quote, quote).replace(r"\\", "\\") 

449 

450 

451@keep_lazy_text 

452def slugify(value, allow_unicode=False): 

453 """ 

454 Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated 

455 dashes to single dashes. Remove characters that aren't alphanumerics, 

456 underscores, or hyphens. Convert to lowercase. Also strip leading and 

457 trailing whitespace, dashes, and underscores. 

458 """ 

459 value = str(value) 

460 if allow_unicode: 

461 value = unicodedata.normalize("NFKC", value) 

462 else: 

463 value = ( 

464 unicodedata.normalize("NFKD", value) 

465 .encode("ascii", "ignore") 

466 .decode("ascii") 

467 ) 

468 value = re.sub(r"[^\w\s-]", "", value.lower()) 

469 return re.sub(r"[-\s]+", "-", value).strip("-_") 

470 

471 

472def camel_case_to_spaces(value): 

473 """ 

474 Split CamelCase and convert to lowercase. Strip surrounding whitespace. 

475 """ 

476 return re_camel_case.sub(r" \1", value).strip().lower() 

477 

478 

479def _format_lazy(format_string, *args, **kwargs): 

480 """ 

481 Apply str.format() on 'format_string' where format_string, args, 

482 and/or kwargs might be lazy. 

483 """ 

484 return format_string.format(*args, **kwargs) 

485 

486 

487format_lazy = lazy(_format_lazy, str)