Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/django/utils/text.py: 51%

1import gzip

2import re

3import secrets

4import unicodedata

5from collections import deque

6from gzip import GzipFile

7from gzip import compress as gzip_compress

8from html import escape

9from html.parser import HTMLParser

10from io import BytesIO

12from django.core.exceptions import SuspiciousFileOperation

13from django.utils.functional import (

14 SimpleLazyObject,

15 cached_property,

16 keep_lazy_text,

17 lazy,

18)

19from django.utils.regex_helper import _lazy_re_compile

20from django.utils.translation import gettext as _

21from django.utils.translation import gettext_lazy, pgettext

24@keep_lazy_text

25def capfirst(x):

26 """Capitalize the first letter of a string."""

27 if not x:

28 return x

29 if not isinstance(x, str):

30 x = str(x)

31 return x[0].upper() + x[1:]

34# Set up regular expressions

35re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines

36re_camel_case = _lazy_re_compile(r"(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))")

39@keep_lazy_text

40def wrap(text, width):

41 """

42 A word-wrap function that preserves existing line breaks. Expects that

43 existing line breaks are posix newlines.

45 Preserve all white space except added line breaks consume the space on

46 which they break the line.

48 Don't wrap long words, thus the output text may have lines longer than

49 ``width``.

50 """

52 def _generator():

53 for line in text.splitlines(True): # True keeps trailing linebreaks

54 max_width = min((line.endswith("\n") and width + 1 or width), width)

55 while len(line) > max_width:

56 space = line[: max_width + 1].rfind(" ") + 1

57 if space == 0:

58 space = line.find(" ") + 1

59 if space == 0:

60 yield line

61 line = ""

62 break

63 yield "%s\n" % line[: space - 1]

64 line = line[space:]

65 max_width = min((line.endswith("\n") and width + 1 or width), width)

66 if line:

67 yield line

69 return "".join(_generator())

72def add_truncation_text(text, truncate=None):

73 if truncate is None:

74 truncate = pgettext(

75 "String to return when truncating text", "%(truncated_text)s…"

76 )

77 if "%(truncated_text)s" in truncate:

78 return truncate % {"truncated_text": text}

79 # The truncation text didn't contain the %(truncated_text)s string

80 # replacement argument so just append it to the text.

81 if text.endswith(truncate):

82 # But don't append the truncation text if the current text already ends

83 # in this.

84 return text

85 return f"{text}{truncate}"

88def calculate_truncate_chars_length(length, replacement):

89 truncate_len = length

90 for char in add_truncation_text("", replacement):

91 if not unicodedata.combining(char):

92 truncate_len -= 1

93 if truncate_len == 0:

94 break

95 return truncate_len

98class TruncateHTMLParser(HTMLParser):

99 class TruncationCompleted(Exception):

100 pass

101

102 def __init__(self, *, length, replacement, convert_charrefs=True):

103 super().__init__(convert_charrefs=convert_charrefs)

104 self.tags = deque()

105 self.output = ""

106 self.remaining = length

107 self.replacement = replacement

108

109 @cached_property

110 def void_elements(self):

111 from django.utils.html import VOID_ELEMENTS

112

113 return VOID_ELEMENTS

114

115 def handle_startendtag(self, tag, attrs):

116 self.handle_starttag(tag, attrs)

117 if tag not in self.void_elements:

118 self.handle_endtag(tag)

119

120 def handle_starttag(self, tag, attrs):

121 self.output += self.get_starttag_text()

122 if tag not in self.void_elements:

123 self.tags.appendleft(tag)

124

125 def handle_endtag(self, tag):

126 if tag not in self.void_elements:

127 self.output += f"</{tag}>"

128 try:

129 self.tags.remove(tag)

130 except ValueError:

131 pass

132

133 def handle_data(self, data):

134 data, output = self.process(data)

135 data_len = len(data)

136 if self.remaining < data_len:

137 self.remaining = 0

138 self.output += add_truncation_text(output, self.replacement)

139 raise self.TruncationCompleted

140 self.remaining -= data_len

141 self.output += output

142

143 def feed(self, data):

144 try:

145 super().feed(data)

146 except self.TruncationCompleted:

147 self.output += "".join([f"</{tag}>" for tag in self.tags])

148 self.tags.clear()

149 self.reset()

150 else:

151 # No data was handled.

152 self.reset()

153

154

155class TruncateCharsHTMLParser(TruncateHTMLParser):

156 def __init__(self, *, length, replacement, convert_charrefs=True):

157 self.length = length

158 self.processed_chars = 0

159 super().__init__(

160 length=calculate_truncate_chars_length(length, replacement),

161 replacement=replacement,

162 convert_charrefs=convert_charrefs,

163 )

164

165 def process(self, data):

166 self.processed_chars += len(data)

167 if (self.processed_chars == self.length) and (

168 len(self.output) + len(data) == len(self.rawdata)

169 ):

170 self.output += data

171 raise self.TruncationCompleted

172 output = escape("".join(data[: self.remaining]))

173 return data, output

174

175

176class TruncateWordsHTMLParser(TruncateHTMLParser):

177 def process(self, data):

178 data = re.split(r"(?<=\S)\s+(?=\S)", data)

179 output = escape(" ".join(data[: self.remaining]))

180 return data, output

181

182

183class Truncator(SimpleLazyObject):

184 """

185 An object used to truncate text, either by characters or words.

186

187 When truncating HTML text (either chars or words), input will be limited to

188 at most `MAX_LENGTH_HTML` characters.

189 """

190

191 # 5 million characters are approximately 4000 text pages or 3 web pages.

192 MAX_LENGTH_HTML = 5_000_000

193

194 def __init__(self, text):

195 super().__init__(lambda: str(text))

196

197 def chars(self, num, truncate=None, html=False):

198 """

199 Return the text truncated to be no longer than the specified number

200 of characters.

201

202 `truncate` specifies what should be used to notify that the string has

203 been truncated, defaulting to a translatable string of an ellipsis.

204 """

205 self._setup()

206 length = int(num)

207 if length <= 0:

208 return ""

209 text = unicodedata.normalize("NFC", self._wrapped)

210

211 if html:

212 parser = TruncateCharsHTMLParser(length=length, replacement=truncate)

213 parser.feed(text)

214 parser.close()

215 return parser.output

216 return self._text_chars(length, truncate, text)

217

218 def _text_chars(self, length, truncate, text):

219 """Truncate a string after a certain number of chars."""

220 truncate_len = calculate_truncate_chars_length(length, truncate)

221 s_len = 0

222 end_index = None

223 for i, char in enumerate(text):

224 if unicodedata.combining(char):

225 # Don't consider combining characters

226 # as adding to the string length

227 continue

228 s_len += 1

229 if end_index is None and s_len > truncate_len:

230 end_index = i

231 if s_len > length:

232 # Return the truncated string

233 return add_truncation_text(text[: end_index or 0], truncate)

234

235 # Return the original string since no truncation was necessary

236 return text

237

238 def words(self, num, truncate=None, html=False):

239 """

240 Truncate a string after a certain number of words. `truncate` specifies

241 what should be used to notify that the string has been truncated,

242 defaulting to ellipsis.

243 """

244 self._setup()

245 length = int(num)

246 if length <= 0:

247 return ""

248 if html:

249 parser = TruncateWordsHTMLParser(length=length, replacement=truncate)

250 parser.feed(self._wrapped)

251 parser.close()

252 return parser.output

253 return self._text_words(length, truncate)

254

255 def _text_words(self, length, truncate):

256 """

257 Truncate a string after a certain number of words.

258

259 Strip newlines in the string.

260 """

261 words = self._wrapped.split()

262 if len(words) > length:

263 words = words[:length]

264 return add_truncation_text(" ".join(words), truncate)

265 return " ".join(words)

266

267

268@keep_lazy_text

269def get_valid_filename(name):

270 """

271 Return the given string converted to a string that can be used for a clean

272 filename. Remove leading and trailing spaces; convert other spaces to

273 underscores; and remove anything that is not an alphanumeric, dash,

274 underscore, or dot.

275 >>> get_valid_filename("john's portrait in 2004.jpg")

276 'johns_portrait_in_2004.jpg'

277 """

278 s = str(name).strip().replace(" ", "_")

279 s = re.sub(r"(?u)[^-\w.]", "", s)

280 if s in {"", ".", ".."}:

281 raise SuspiciousFileOperation("Could not derive file name from '%s'" % name)

282 return s

283

284

285@keep_lazy_text

286def get_text_list(list_, last_word=gettext_lazy("or")):

287 """

288 >>> get_text_list(['a', 'b', 'c', 'd'])

289 'a, b, c or d'

290 >>> get_text_list(['a', 'b', 'c'], 'and')

291 'a, b and c'

292 >>> get_text_list(['a', 'b'], 'and')

293 'a and b'

294 >>> get_text_list(['a'])

295 'a'

296 >>> get_text_list([])

297 ''

298 """

299 if not list_:

300 return ""

301 if len(list_) == 1:

302 return str(list_[0])

303 return "%s %s %s" % (

304 # Translators: This string is used as a separator between list elements

305 _(", ").join(str(i) for i in list_[:-1]),

306 str(last_word),

307 str(list_[-1]),

308 )

309

310

311@keep_lazy_text

312def normalize_newlines(text):

313 """Normalize CRLF and CR newlines to just LF."""

314 return re_newlines.sub("\n", str(text))

315

316

317@keep_lazy_text

318def phone2numeric(phone):

319 """Convert a phone number with letters into its numeric equivalent."""

320 char2number = {

321 "a": "2",

322 "b": "2",

323 "c": "2",

324 "d": "3",

325 "e": "3",

326 "f": "3",

327 "g": "4",

328 "h": "4",

329 "i": "4",

330 "j": "5",

331 "k": "5",

332 "l": "5",

333 "m": "6",

334 "n": "6",

335 "o": "6",

336 "p": "7",

337 "q": "7",

338 "r": "7",

339 "s": "7",

340 "t": "8",

341 "u": "8",

342 "v": "8",

343 "w": "9",

344 "x": "9",

345 "y": "9",

346 "z": "9",

347 }

348 return "".join(char2number.get(c, c) for c in phone.lower())

349

350

351def _get_random_filename(max_random_bytes):

352 return b"a" * secrets.randbelow(max_random_bytes)

353

354

355def compress_string(s, *, max_random_bytes=None):

356 compressed_data = gzip_compress(s, compresslevel=6, mtime=0)

357

358 if not max_random_bytes:

359 return compressed_data

360

361 compressed_view = memoryview(compressed_data)

362 header = bytearray(compressed_view[:10])

363 header[3] = gzip.FNAME

364

365 filename = _get_random_filename(max_random_bytes) + b"\x00"

366

367 return bytes(header) + filename + compressed_view[10:]

368

369

370class StreamingBuffer(BytesIO):

371 def read(self):

372 ret = self.getvalue()

373 self.seek(0)

374 self.truncate()

375 return ret

376

377

378# Like compress_string, but for iterators of strings.

379def compress_sequence(sequence, *, max_random_bytes=None):

380 buf = StreamingBuffer()

381 filename = _get_random_filename(max_random_bytes) if max_random_bytes else None

382 with GzipFile(

383 filename=filename, mode="wb", compresslevel=6, fileobj=buf, mtime=0

384 ) as zfile:

385 # Output headers...

386 yield buf.read()

387 for item in sequence:

388 zfile.write(item)

389 data = buf.read()

390 if data:

391 yield data

392 yield buf.read()

393

394

395# Expression to match some_token and some_token="with spaces" (and similarly

396# for single-quoted strings).

397smart_split_re = _lazy_re_compile(

398 r"""

399 ((?:

400 [^\s'"]*

401 (?:

402 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*')

403 [^\s'"]*

404 )+

405 ) | \S+)

406""",

407 re.VERBOSE,

408)

409

410

411def smart_split(text):

412 r"""

413 Generator that splits a string by spaces, leaving quoted phrases together.

414 Supports both single and double quotes, and supports escaping quotes with

415 backslashes. In the output, strings will keep their initial and trailing

416 quote marks and escaped quotes will remain escaped (the results can then

417 be further processed with unescape_string_literal()).

418

419 >>> list(smart_split(r'This is "a person\'s" test.'))

420 ['This', 'is', '"a person\\\'s"', 'test.']

421 >>> list(smart_split(r"Another 'person\'s' test."))

422 ['Another', "'person\\'s'", 'test.']

423 >>> list(smart_split(r'A "\"funky\" style" test.'))

424 ['A', '"\\"funky\\" style"', 'test.']

425 """

426 for bit in smart_split_re.finditer(str(text)):

427 yield bit[0]

428

429

430@keep_lazy_text

431def unescape_string_literal(s):

432 r"""

433 Convert quoted string literals to unquoted strings with escaped quotes and

434 backslashes unquoted::

435

436 >>> unescape_string_literal('"abc"')

437 'abc'

438 >>> unescape_string_literal("'abc'")

439 'abc'

440 >>> unescape_string_literal('"a \"bc\""')

441 'a "bc"'

442 >>> unescape_string_literal("'\'ab\' c'")

443 "'ab' c"

444 """

445 if not s or s[0] not in "\"'" or s[-1] != s[0]:

446 raise ValueError("Not a string literal: %r" % s)

447 quote = s[0]

448 return s[1:-1].replace(r"\%s" % quote, quote).replace(r"\\", "\\")

449

450

451@keep_lazy_text

452def slugify(value, allow_unicode=False):

453 """

454 Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated

455 dashes to single dashes. Remove characters that aren't alphanumerics,

456 underscores, or hyphens. Convert to lowercase. Also strip leading and

457 trailing whitespace, dashes, and underscores.

458 """

459 value = str(value)

460 if allow_unicode:

461 value = unicodedata.normalize("NFKC", value)

462 else:

463 value = (

464 unicodedata.normalize("NFKD", value)

465 .encode("ascii", "ignore")

466 .decode("ascii")

467 )

468 value = re.sub(r"[^\w\s-]", "", value.lower())

469 return re.sub(r"[-\s]+", "-", value).strip("-_")

470

471

472def camel_case_to_spaces(value):

473 """

474 Split CamelCase and convert to lowercase. Strip surrounding whitespace.

475 """

476 return re_camel_case.sub(r" \1", value).strip().lower()

477

478

479def _format_lazy(format_string, *args, **kwargs):

480 """

481 Apply str.format() on 'format_string' where format_string, args,

482 and/or kwargs might be lazy.

483 """

484 return format_string.format(*args, **kwargs)

485

486

487format_lazy = lazy(_format_lazy, str)