Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/django/utils/text.py: 18%

205 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 06:13 +0000

1import gzip 

2import re 

3import secrets 

4import unicodedata 

5from gzip import GzipFile 

6from gzip import compress as gzip_compress 

7from io import BytesIO 

8 

9from django.core.exceptions import SuspiciousFileOperation 

10from django.utils.functional import SimpleLazyObject, keep_lazy_text, lazy 

11from django.utils.regex_helper import _lazy_re_compile 

12from django.utils.translation import gettext as _ 

13from django.utils.translation import gettext_lazy, pgettext 

14 

15 

16@keep_lazy_text 

17def capfirst(x): 

18 """Capitalize the first letter of a string.""" 

19 if not x: 

20 return x 

21 if not isinstance(x, str): 

22 x = str(x) 

23 return x[0].upper() + x[1:] 

24 

25 

26# Set up regular expressions 

27re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S) 

28re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S) 

29re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S) 

30re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines 

31re_camel_case = _lazy_re_compile(r"(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))") 

32 

33 

34@keep_lazy_text 

35def wrap(text, width): 

36 """ 

37 A word-wrap function that preserves existing line breaks. Expects that 

38 existing line breaks are posix newlines. 

39 

40 Preserve all white space except added line breaks consume the space on 

41 which they break the line. 

42 

43 Don't wrap long words, thus the output text may have lines longer than 

44 ``width``. 

45 """ 

46 

47 def _generator(): 

48 for line in text.splitlines(True): # True keeps trailing linebreaks 

49 max_width = min((line.endswith("\n") and width + 1 or width), width) 

50 while len(line) > max_width: 

51 space = line[: max_width + 1].rfind(" ") + 1 

52 if space == 0: 

53 space = line.find(" ") + 1 

54 if space == 0: 

55 yield line 

56 line = "" 

57 break 

58 yield "%s\n" % line[: space - 1] 

59 line = line[space:] 

60 max_width = min((line.endswith("\n") and width + 1 or width), width) 

61 if line: 

62 yield line 

63 

64 return "".join(_generator()) 

65 

66 

67class Truncator(SimpleLazyObject): 

68 """ 

69 An object used to truncate text, either by characters or words. 

70 """ 

71 

72 def __init__(self, text): 

73 super().__init__(lambda: str(text)) 

74 

75 def add_truncation_text(self, text, truncate=None): 

76 if truncate is None: 

77 truncate = pgettext( 

78 "String to return when truncating text", "%(truncated_text)s…" 

79 ) 

80 if "%(truncated_text)s" in truncate: 

81 return truncate % {"truncated_text": text} 

82 # The truncation text didn't contain the %(truncated_text)s string 

83 # replacement argument so just append it to the text. 

84 if text.endswith(truncate): 

85 # But don't append the truncation text if the current text already 

86 # ends in this. 

87 return text 

88 return "%s%s" % (text, truncate) 

89 

90 def chars(self, num, truncate=None, html=False): 

91 """ 

92 Return the text truncated to be no longer than the specified number 

93 of characters. 

94 

95 `truncate` specifies what should be used to notify that the string has 

96 been truncated, defaulting to a translatable string of an ellipsis. 

97 """ 

98 self._setup() 

99 length = int(num) 

100 text = unicodedata.normalize("NFC", self._wrapped) 

101 

102 # Calculate the length to truncate to (max length - end_text length) 

103 truncate_len = length 

104 for char in self.add_truncation_text("", truncate): 

105 if not unicodedata.combining(char): 

106 truncate_len -= 1 

107 if truncate_len == 0: 

108 break 

109 if html: 

110 return self._truncate_html(length, truncate, text, truncate_len, False) 

111 return self._text_chars(length, truncate, text, truncate_len) 

112 

113 def _text_chars(self, length, truncate, text, truncate_len): 

114 """Truncate a string after a certain number of chars.""" 

115 s_len = 0 

116 end_index = None 

117 for i, char in enumerate(text): 

118 if unicodedata.combining(char): 

119 # Don't consider combining characters 

120 # as adding to the string length 

121 continue 

122 s_len += 1 

123 if end_index is None and s_len > truncate_len: 

124 end_index = i 

125 if s_len > length: 

126 # Return the truncated string 

127 return self.add_truncation_text(text[: end_index or 0], truncate) 

128 

129 # Return the original string since no truncation was necessary 

130 return text 

131 

132 def words(self, num, truncate=None, html=False): 

133 """ 

134 Truncate a string after a certain number of words. `truncate` specifies 

135 what should be used to notify that the string has been truncated, 

136 defaulting to ellipsis. 

137 """ 

138 self._setup() 

139 length = int(num) 

140 if html: 

141 return self._truncate_html(length, truncate, self._wrapped, length, True) 

142 return self._text_words(length, truncate) 

143 

144 def _text_words(self, length, truncate): 

145 """ 

146 Truncate a string after a certain number of words. 

147 

148 Strip newlines in the string. 

149 """ 

150 words = self._wrapped.split() 

151 if len(words) > length: 

152 words = words[:length] 

153 return self.add_truncation_text(" ".join(words), truncate) 

154 return " ".join(words) 

155 

156 def _truncate_html(self, length, truncate, text, truncate_len, words): 

157 """ 

158 Truncate HTML to a certain number of chars (not counting tags and 

159 comments), or, if words is True, then to a certain number of words. 

160 Close opened tags if they were correctly closed in the given HTML. 

161 

162 Preserve newlines in the HTML. 

163 """ 

164 if words and length <= 0: 

165 return "" 

166 

167 html4_singlets = ( 

168 "br", 

169 "col", 

170 "link", 

171 "base", 

172 "img", 

173 "param", 

174 "area", 

175 "hr", 

176 "input", 

177 ) 

178 

179 # Count non-HTML chars/words and keep note of open tags 

180 pos = 0 

181 end_text_pos = 0 

182 current_len = 0 

183 open_tags = [] 

184 

185 regex = re_words if words else re_chars 

186 

187 while current_len <= length: 

188 m = regex.search(text, pos) 

189 if not m: 

190 # Checked through whole string 

191 break 

192 pos = m.end(0) 

193 if m[1]: 

194 # It's an actual non-HTML word or char 

195 current_len += 1 

196 if current_len == truncate_len: 

197 end_text_pos = pos 

198 continue 

199 # Check for tag 

200 tag = re_tag.match(m[0]) 

201 if not tag or current_len >= truncate_len: 

202 # Don't worry about non tags or tags after our truncate point 

203 continue 

204 closing_tag, tagname, self_closing = tag.groups() 

205 # Element names are always case-insensitive 

206 tagname = tagname.lower() 

207 if self_closing or tagname in html4_singlets: 

208 pass 

209 elif closing_tag: 

210 # Check for match in open tags list 

211 try: 

212 i = open_tags.index(tagname) 

213 except ValueError: 

214 pass 

215 else: 

216 # SGML: An end tag closes, back to the matching start tag, 

217 # all unclosed intervening start tags with omitted end tags 

218 open_tags = open_tags[i + 1 :] 

219 else: 

220 # Add it to the start of the open tags list 

221 open_tags.insert(0, tagname) 

222 

223 if current_len <= length: 

224 return text 

225 out = text[:end_text_pos] 

226 truncate_text = self.add_truncation_text("", truncate) 

227 if truncate_text: 

228 out += truncate_text 

229 # Close any tags still open 

230 for tag in open_tags: 

231 out += "</%s>" % tag 

232 # Return string 

233 return out 

234 

235 

236@keep_lazy_text 

237def get_valid_filename(name): 

238 """ 

239 Return the given string converted to a string that can be used for a clean 

240 filename. Remove leading and trailing spaces; convert other spaces to 

241 underscores; and remove anything that is not an alphanumeric, dash, 

242 underscore, or dot. 

243 >>> get_valid_filename("john's portrait in 2004.jpg") 

244 'johns_portrait_in_2004.jpg' 

245 """ 

246 s = str(name).strip().replace(" ", "_") 

247 s = re.sub(r"(?u)[^-\w.]", "", s) 

248 if s in {"", ".", ".."}: 

249 raise SuspiciousFileOperation("Could not derive file name from '%s'" % name) 

250 return s 

251 

252 

253@keep_lazy_text 

254def get_text_list(list_, last_word=gettext_lazy("or")): 

255 """ 

256 >>> get_text_list(['a', 'b', 'c', 'd']) 

257 'a, b, c or d' 

258 >>> get_text_list(['a', 'b', 'c'], 'and') 

259 'a, b and c' 

260 >>> get_text_list(['a', 'b'], 'and') 

261 'a and b' 

262 >>> get_text_list(['a']) 

263 'a' 

264 >>> get_text_list([]) 

265 '' 

266 """ 

267 if not list_: 

268 return "" 

269 if len(list_) == 1: 

270 return str(list_[0]) 

271 return "%s %s %s" % ( 

272 # Translators: This string is used as a separator between list elements 

273 _(", ").join(str(i) for i in list_[:-1]), 

274 str(last_word), 

275 str(list_[-1]), 

276 ) 

277 

278 

279@keep_lazy_text 

280def normalize_newlines(text): 

281 """Normalize CRLF and CR newlines to just LF.""" 

282 return re_newlines.sub("\n", str(text)) 

283 

284 

285@keep_lazy_text 

286def phone2numeric(phone): 

287 """Convert a phone number with letters into its numeric equivalent.""" 

288 char2number = { 

289 "a": "2", 

290 "b": "2", 

291 "c": "2", 

292 "d": "3", 

293 "e": "3", 

294 "f": "3", 

295 "g": "4", 

296 "h": "4", 

297 "i": "4", 

298 "j": "5", 

299 "k": "5", 

300 "l": "5", 

301 "m": "6", 

302 "n": "6", 

303 "o": "6", 

304 "p": "7", 

305 "q": "7", 

306 "r": "7", 

307 "s": "7", 

308 "t": "8", 

309 "u": "8", 

310 "v": "8", 

311 "w": "9", 

312 "x": "9", 

313 "y": "9", 

314 "z": "9", 

315 } 

316 return "".join(char2number.get(c, c) for c in phone.lower()) 

317 

318 

319def _get_random_filename(max_random_bytes): 

320 return b"a" * secrets.randbelow(max_random_bytes) 

321 

322 

323def compress_string(s, *, max_random_bytes=None): 

324 compressed_data = gzip_compress(s, compresslevel=6, mtime=0) 

325 

326 if not max_random_bytes: 

327 return compressed_data 

328 

329 compressed_view = memoryview(compressed_data) 

330 header = bytearray(compressed_view[:10]) 

331 header[3] = gzip.FNAME 

332 

333 filename = _get_random_filename(max_random_bytes) + b"\x00" 

334 

335 return bytes(header) + filename + compressed_view[10:] 

336 

337 

338class StreamingBuffer(BytesIO): 

339 def read(self): 

340 ret = self.getvalue() 

341 self.seek(0) 

342 self.truncate() 

343 return ret 

344 

345 

346# Like compress_string, but for iterators of strings. 

347def compress_sequence(sequence, *, max_random_bytes=None): 

348 buf = StreamingBuffer() 

349 filename = _get_random_filename(max_random_bytes) if max_random_bytes else None 

350 with GzipFile( 

351 filename=filename, mode="wb", compresslevel=6, fileobj=buf, mtime=0 

352 ) as zfile: 

353 # Output headers... 

354 yield buf.read() 

355 for item in sequence: 

356 zfile.write(item) 

357 data = buf.read() 

358 if data: 

359 yield data 

360 yield buf.read() 

361 

362 

363# Expression to match some_token and some_token="with spaces" (and similarly 

364# for single-quoted strings). 

365smart_split_re = _lazy_re_compile( 

366 r""" 

367 ((?: 

368 [^\s'"]* 

369 (?: 

370 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*') 

371 [^\s'"]* 

372 )+ 

373 ) | \S+) 

374""", 

375 re.VERBOSE, 

376) 

377 

378 

379def smart_split(text): 

380 r""" 

381 Generator that splits a string by spaces, leaving quoted phrases together. 

382 Supports both single and double quotes, and supports escaping quotes with 

383 backslashes. In the output, strings will keep their initial and trailing 

384 quote marks and escaped quotes will remain escaped (the results can then 

385 be further processed with unescape_string_literal()). 

386 

387 >>> list(smart_split(r'This is "a person\'s" test.')) 

388 ['This', 'is', '"a person\\\'s"', 'test.'] 

389 >>> list(smart_split(r"Another 'person\'s' test.")) 

390 ['Another', "'person\\'s'", 'test.'] 

391 >>> list(smart_split(r'A "\"funky\" style" test.')) 

392 ['A', '"\\"funky\\" style"', 'test.'] 

393 """ 

394 for bit in smart_split_re.finditer(str(text)): 

395 yield bit[0] 

396 

397 

398@keep_lazy_text 

399def unescape_string_literal(s): 

400 r""" 

401 Convert quoted string literals to unquoted strings with escaped quotes and 

402 backslashes unquoted:: 

403 

404 >>> unescape_string_literal('"abc"') 

405 'abc' 

406 >>> unescape_string_literal("'abc'") 

407 'abc' 

408 >>> unescape_string_literal('"a \"bc\""') 

409 'a "bc"' 

410 >>> unescape_string_literal("'\'ab\' c'") 

411 "'ab' c" 

412 """ 

413 if not s or s[0] not in "\"'" or s[-1] != s[0]: 

414 raise ValueError("Not a string literal: %r" % s) 

415 quote = s[0] 

416 return s[1:-1].replace(r"\%s" % quote, quote).replace(r"\\", "\\") 

417 

418 

419@keep_lazy_text 

420def slugify(value, allow_unicode=False): 

421 """ 

422 Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated 

423 dashes to single dashes. Remove characters that aren't alphanumerics, 

424 underscores, or hyphens. Convert to lowercase. Also strip leading and 

425 trailing whitespace, dashes, and underscores. 

426 """ 

427 value = str(value) 

428 if allow_unicode: 

429 value = unicodedata.normalize("NFKC", value) 

430 else: 

431 value = ( 

432 unicodedata.normalize("NFKD", value) 

433 .encode("ascii", "ignore") 

434 .decode("ascii") 

435 ) 

436 value = re.sub(r"[^\w\s-]", "", value.lower()) 

437 return re.sub(r"[-\s]+", "-", value).strip("-_") 

438 

439 

440def camel_case_to_spaces(value): 

441 """ 

442 Split CamelCase and convert to lowercase. Strip surrounding whitespace. 

443 """ 

444 return re_camel_case.sub(r" \1", value).strip().lower() 

445 

446 

447def _format_lazy(format_string, *args, **kwargs): 

448 """ 

449 Apply str.format() on 'format_string' where format_string, args, 

450 and/or kwargs might be lazy. 

451 """ 

452 return format_string.format(*args, **kwargs) 

453 

454 

455format_lazy = lazy(_format_lazy, str)