Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/django/utils/text.py: 18%

1import gzip

2import re

3import secrets

4import unicodedata

5from gzip import GzipFile

6from gzip import compress as gzip_compress

7from io import BytesIO

9from django.core.exceptions import SuspiciousFileOperation

10from django.utils.functional import SimpleLazyObject, keep_lazy_text, lazy

11from django.utils.regex_helper import _lazy_re_compile

12from django.utils.translation import gettext as _

13from django.utils.translation import gettext_lazy, pgettext

16@keep_lazy_text

17def capfirst(x):

18 """Capitalize the first letter of a string."""

19 if not x:

20 return x

21 if not isinstance(x, str):

22 x = str(x)

23 return x[0].upper() + x[1:]

26# Set up regular expressions

27re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)

28re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S)

29re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S)

30re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines

31re_camel_case = _lazy_re_compile(r"(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))")

34@keep_lazy_text

35def wrap(text, width):

36 """

37 A word-wrap function that preserves existing line breaks. Expects that

38 existing line breaks are posix newlines.

40 Preserve all white space except added line breaks consume the space on

41 which they break the line.

43 Don't wrap long words, thus the output text may have lines longer than

44 ``width``.

45 """

47 def _generator():

48 for line in text.splitlines(True): # True keeps trailing linebreaks

49 max_width = min((line.endswith("\n") and width + 1 or width), width)

50 while len(line) > max_width:

51 space = line[: max_width + 1].rfind(" ") + 1

52 if space == 0:

53 space = line.find(" ") + 1

54 if space == 0:

55 yield line

56 line = ""

57 break

58 yield "%s\n" % line[: space - 1]

59 line = line[space:]

60 max_width = min((line.endswith("\n") and width + 1 or width), width)

61 if line:

62 yield line

64 return "".join(_generator())

67class Truncator(SimpleLazyObject):

68 """

69 An object used to truncate text, either by characters or words.

70 """

72 def __init__(self, text):

73 super().__init__(lambda: str(text))

75 def add_truncation_text(self, text, truncate=None):

76 if truncate is None:

77 truncate = pgettext(

78 "String to return when truncating text", "%(truncated_text)s…"

79 )

80 if "%(truncated_text)s" in truncate:

81 return truncate % {"truncated_text": text}

82 # The truncation text didn't contain the %(truncated_text)s string

83 # replacement argument so just append it to the text.

84 if text.endswith(truncate):

85 # But don't append the truncation text if the current text already

86 # ends in this.

87 return text

88 return "%s%s" % (text, truncate)

90 def chars(self, num, truncate=None, html=False):

91 """

92 Return the text truncated to be no longer than the specified number

93 of characters.

95 `truncate` specifies what should be used to notify that the string has

96 been truncated, defaulting to a translatable string of an ellipsis.

97 """

98 self._setup()

99 length = int(num)

100 text = unicodedata.normalize("NFC", self._wrapped)

101

102 # Calculate the length to truncate to (max length - end_text length)

103 truncate_len = length

104 for char in self.add_truncation_text("", truncate):

105 if not unicodedata.combining(char):

106 truncate_len -= 1

107 if truncate_len == 0:

108 break

109 if html:

110 return self._truncate_html(length, truncate, text, truncate_len, False)

111 return self._text_chars(length, truncate, text, truncate_len)

112

113 def _text_chars(self, length, truncate, text, truncate_len):

114 """Truncate a string after a certain number of chars."""

115 s_len = 0

116 end_index = None

117 for i, char in enumerate(text):

118 if unicodedata.combining(char):

119 # Don't consider combining characters

120 # as adding to the string length

121 continue

122 s_len += 1

123 if end_index is None and s_len > truncate_len:

124 end_index = i

125 if s_len > length:

126 # Return the truncated string

127 return self.add_truncation_text(text[: end_index or 0], truncate)

128

129 # Return the original string since no truncation was necessary

130 return text

131

132 def words(self, num, truncate=None, html=False):

133 """

134 Truncate a string after a certain number of words. `truncate` specifies

135 what should be used to notify that the string has been truncated,

136 defaulting to ellipsis.

137 """

138 self._setup()

139 length = int(num)

140 if html:

141 return self._truncate_html(length, truncate, self._wrapped, length, True)

142 return self._text_words(length, truncate)

143

144 def _text_words(self, length, truncate):

145 """

146 Truncate a string after a certain number of words.

147

148 Strip newlines in the string.

149 """

150 words = self._wrapped.split()

151 if len(words) > length:

152 words = words[:length]

153 return self.add_truncation_text(" ".join(words), truncate)

154 return " ".join(words)

155

156 def _truncate_html(self, length, truncate, text, truncate_len, words):

157 """

158 Truncate HTML to a certain number of chars (not counting tags and

159 comments), or, if words is True, then to a certain number of words.

160 Close opened tags if they were correctly closed in the given HTML.

161

162 Preserve newlines in the HTML.

163 """

164 if words and length <= 0:

165 return ""

166

167 html4_singlets = (

168 "br",

169 "col",

170 "link",

171 "base",

172 "img",

173 "param",

174 "area",

175 "hr",

176 "input",

177 )

178

179 # Count non-HTML chars/words and keep note of open tags

180 pos = 0

181 end_text_pos = 0

182 current_len = 0

183 open_tags = []

184

185 regex = re_words if words else re_chars

186

187 while current_len <= length:

188 m = regex.search(text, pos)

189 if not m:

190 # Checked through whole string

191 break

192 pos = m.end(0)

193 if m[1]:

194 # It's an actual non-HTML word or char

195 current_len += 1

196 if current_len == truncate_len:

197 end_text_pos = pos

198 continue

199 # Check for tag

200 tag = re_tag.match(m[0])

201 if not tag or current_len >= truncate_len:

202 # Don't worry about non tags or tags after our truncate point

203 continue

204 closing_tag, tagname, self_closing = tag.groups()

205 # Element names are always case-insensitive

206 tagname = tagname.lower()

207 if self_closing or tagname in html4_singlets:

208 pass

209 elif closing_tag:

210 # Check for match in open tags list

211 try:

212 i = open_tags.index(tagname)

213 except ValueError:

214 pass

215 else:

216 # SGML: An end tag closes, back to the matching start tag,

217 # all unclosed intervening start tags with omitted end tags

218 open_tags = open_tags[i + 1 :]

219 else:

220 # Add it to the start of the open tags list

221 open_tags.insert(0, tagname)

222

223 if current_len <= length:

224 return text

225 out = text[:end_text_pos]

226 truncate_text = self.add_truncation_text("", truncate)

227 if truncate_text:

228 out += truncate_text

229 # Close any tags still open

230 for tag in open_tags:

231 out += "</%s>" % tag

232 # Return string

233 return out

234

235

236@keep_lazy_text

237def get_valid_filename(name):

238 """

239 Return the given string converted to a string that can be used for a clean

240 filename. Remove leading and trailing spaces; convert other spaces to

241 underscores; and remove anything that is not an alphanumeric, dash,

242 underscore, or dot.

243 >>> get_valid_filename("john's portrait in 2004.jpg")

244 'johns_portrait_in_2004.jpg'

245 """

246 s = str(name).strip().replace(" ", "_")

247 s = re.sub(r"(?u)[^-\w.]", "", s)

248 if s in {"", ".", ".."}:

249 raise SuspiciousFileOperation("Could not derive file name from '%s'" % name)

250 return s

251

252

253@keep_lazy_text

254def get_text_list(list_, last_word=gettext_lazy("or")):

255 """

256 >>> get_text_list(['a', 'b', 'c', 'd'])

257 'a, b, c or d'

258 >>> get_text_list(['a', 'b', 'c'], 'and')

259 'a, b and c'

260 >>> get_text_list(['a', 'b'], 'and')

261 'a and b'

262 >>> get_text_list(['a'])

263 'a'

264 >>> get_text_list([])

265 ''

266 """

267 if not list_:

268 return ""

269 if len(list_) == 1:

270 return str(list_[0])

271 return "%s %s %s" % (

272 # Translators: This string is used as a separator between list elements

273 _(", ").join(str(i) for i in list_[:-1]),

274 str(last_word),

275 str(list_[-1]),

276 )

277

278

279@keep_lazy_text

280def normalize_newlines(text):

281 """Normalize CRLF and CR newlines to just LF."""

282 return re_newlines.sub("\n", str(text))

283

284

285@keep_lazy_text

286def phone2numeric(phone):

287 """Convert a phone number with letters into its numeric equivalent."""

288 char2number = {

289 "a": "2",

290 "b": "2",

291 "c": "2",

292 "d": "3",

293 "e": "3",

294 "f": "3",

295 "g": "4",

296 "h": "4",

297 "i": "4",

298 "j": "5",

299 "k": "5",

300 "l": "5",

301 "m": "6",

302 "n": "6",

303 "o": "6",

304 "p": "7",

305 "q": "7",

306 "r": "7",

307 "s": "7",

308 "t": "8",

309 "u": "8",

310 "v": "8",

311 "w": "9",

312 "x": "9",

313 "y": "9",

314 "z": "9",

315 }

316 return "".join(char2number.get(c, c) for c in phone.lower())

317

318

319def _get_random_filename(max_random_bytes):

320 return b"a" * secrets.randbelow(max_random_bytes)

321

322

323def compress_string(s, *, max_random_bytes=None):

324 compressed_data = gzip_compress(s, compresslevel=6, mtime=0)

325

326 if not max_random_bytes:

327 return compressed_data

328

329 compressed_view = memoryview(compressed_data)

330 header = bytearray(compressed_view[:10])

331 header[3] = gzip.FNAME

332

333 filename = _get_random_filename(max_random_bytes) + b"\x00"

334

335 return bytes(header) + filename + compressed_view[10:]

336

337

338class StreamingBuffer(BytesIO):

339 def read(self):

340 ret = self.getvalue()

341 self.seek(0)

342 self.truncate()

343 return ret

344

345

346# Like compress_string, but for iterators of strings.

347def compress_sequence(sequence, *, max_random_bytes=None):

348 buf = StreamingBuffer()

349 filename = _get_random_filename(max_random_bytes) if max_random_bytes else None

350 with GzipFile(

351 filename=filename, mode="wb", compresslevel=6, fileobj=buf, mtime=0

352 ) as zfile:

353 # Output headers...

354 yield buf.read()

355 for item in sequence:

356 zfile.write(item)

357 data = buf.read()

358 if data:

359 yield data

360 yield buf.read()

361

362

363# Expression to match some_token and some_token="with spaces" (and similarly

364# for single-quoted strings).

365smart_split_re = _lazy_re_compile(

366 r"""

367 ((?:

368 [^\s'"]*

369 (?:

370 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*')

371 [^\s'"]*

372 )+

373 ) | \S+)

374""",

375 re.VERBOSE,

376)

377

378

379def smart_split(text):

380 r"""

381 Generator that splits a string by spaces, leaving quoted phrases together.

382 Supports both single and double quotes, and supports escaping quotes with

383 backslashes. In the output, strings will keep their initial and trailing

384 quote marks and escaped quotes will remain escaped (the results can then

385 be further processed with unescape_string_literal()).

386

387 >>> list(smart_split(r'This is "a person\'s" test.'))

388 ['This', 'is', '"a person\\\'s"', 'test.']

389 >>> list(smart_split(r"Another 'person\'s' test."))

390 ['Another', "'person\\'s'", 'test.']

391 >>> list(smart_split(r'A "\"funky\" style" test.'))

392 ['A', '"\\"funky\\" style"', 'test.']

393 """

394 for bit in smart_split_re.finditer(str(text)):

395 yield bit[0]

396

397

398@keep_lazy_text

399def unescape_string_literal(s):

400 r"""

401 Convert quoted string literals to unquoted strings with escaped quotes and

402 backslashes unquoted::

403

404 >>> unescape_string_literal('"abc"')

405 'abc'

406 >>> unescape_string_literal("'abc'")

407 'abc'

408 >>> unescape_string_literal('"a \"bc\""')

409 'a "bc"'

410 >>> unescape_string_literal("'\'ab\' c'")

411 "'ab' c"

412 """

413 if not s or s[0] not in "\"'" or s[-1] != s[0]:

414 raise ValueError("Not a string literal: %r" % s)

415 quote = s[0]

416 return s[1:-1].replace(r"\%s" % quote, quote).replace(r"\\", "\\")

417

418

419@keep_lazy_text

420def slugify(value, allow_unicode=False):

421 """

422 Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated

423 dashes to single dashes. Remove characters that aren't alphanumerics,

424 underscores, or hyphens. Convert to lowercase. Also strip leading and

425 trailing whitespace, dashes, and underscores.

426 """

427 value = str(value)

428 if allow_unicode:

429 value = unicodedata.normalize("NFKC", value)

430 else:

431 value = (

432 unicodedata.normalize("NFKD", value)

433 .encode("ascii", "ignore")

434 .decode("ascii")

435 )

436 value = re.sub(r"[^\w\s-]", "", value.lower())

437 return re.sub(r"[-\s]+", "-", value).strip("-_")

438

439

440def camel_case_to_spaces(value):

441 """

442 Split CamelCase and convert to lowercase. Strip surrounding whitespace.

443 """

444 return re_camel_case.sub(r" \1", value).strip().lower()

445

446

447def _format_lazy(format_string, *args, **kwargs):

448 """

449 Apply str.format() on 'format_string' where format_string, args,

450 and/or kwargs might be lazy.

451 """

452 return format_string.format(*args, **kwargs)

453

454

455format_lazy = lazy(_format_lazy, str)