Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/boltons/strutils.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

384 statements  

1# Copyright (c) 2013, Mahmoud Hashemi 

2# 

3# Redistribution and use in source and binary forms, with or without 

4# modification, are permitted provided that the following conditions are 

5# met: 

6# 

7# * Redistributions of source code must retain the above copyright 

8# notice, this list of conditions and the following disclaimer. 

9# 

10# * Redistributions in binary form must reproduce the above 

11# copyright notice, this list of conditions and the following 

12# disclaimer in the documentation and/or other materials provided 

13# with the distribution. 

14# 

15# * The names of the contributors may not be used to endorse or 

16# promote products derived from this software without specific 

17# prior written permission. 

18# 

19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

30 

31"""So much practical programming involves string manipulation, which 

32Python readily accommodates. Still, there are dozens of basic and 

33common capabilities missing from the standard library, several of them 

34provided by ``strutils``. 

35""" 

36 

37 

38import builtins 

39import re 

40import sys 

41import uuid 

42import zlib 

43import string 

44import unicodedata 

45import collections 

46from collections.abc import Mapping 

47from gzip import GzipFile 

48from html.parser import HTMLParser 

49from html import entities as htmlentitydefs 

50from io import BytesIO as StringIO 

51 

52 

53__all__ = ['camel2under', 'under2camel', 'slugify', 'split_punct_ws', 

54 'unit_len', 'ordinalize', 'cardinalize', 'pluralize', 'singularize', 

55 'asciify', 'is_ascii', 'is_uuid', 'html2text', 'strip_ansi', 

56 'bytes2human', 'find_hashtags', 'a10n', 'gzip_bytes', 'gunzip_bytes', 

57 'iter_splitlines', 'indent', 'escape_shell_args', 

58 'args2cmd', 'args2sh', 'parse_int_list', 'format_int_list', 

59 'complement_int_list', 'int_ranges_from_int_list', 'MultiReplace', 

60 'multi_replace', 'unwrap_text', 'removeprefix'] 

61 

62 

63_punct_ws_str = string.punctuation + string.whitespace 

64_punct_re = re.compile('[' + _punct_ws_str + ']+') 

65_camel2under_re = re.compile('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))') 

66 

67 

68def camel2under(camel_string): 

69 """Converts a camelcased string to underscores. Useful for turning a 

70 class name into a function name. 

71 

72 >>> camel2under('BasicParseTest') 

73 'basic_parse_test' 

74 """ 

75 return _camel2under_re.sub(r'_\1', camel_string).lower() 

76 

77 

78def under2camel(under_string): 

79 """Converts an underscored string to camelcased. Useful for turning a 

80 function name into a class name. 

81 

82 >>> under2camel('complex_tokenizer') 

83 'ComplexTokenizer' 

84 """ 

85 return ''.join(w.capitalize() or '_' for w in under_string.split('_')) 

86 

87 

88def slugify(text, delim='_', lower=True, ascii=False): 

89 """ 

90 A basic function that turns text full of scary characters 

91 (i.e., punctuation and whitespace), into a relatively safe 

92 lowercased string separated only by the delimiter specified 

93 by *delim*, which defaults to ``_``. 

94 

95 The *ascii* convenience flag will :func:`asciify` the slug if 

96 you require ascii-only slugs. 

97 

98 >>> slugify('First post! Hi!!!!~1 ') 

99 'first_post_hi_1' 

100 

101 >>> slugify("Kurt Gödel's pretty cool.", ascii=True) == \ 

102 b'kurt_goedel_s_pretty_cool' 

103 True 

104 

105 """ 

106 ret = delim.join(split_punct_ws(text)) or delim if text else '' 

107 if ascii: 

108 ret = asciify(ret) 

109 if lower: 

110 ret = ret.lower() 

111 return ret 

112 

113 

114def split_punct_ws(text): 

115 """While :meth:`str.split` will split on whitespace, 

116 :func:`split_punct_ws` will split on punctuation and 

117 whitespace. This used internally by :func:`slugify`, above. 

118 

119 >>> split_punct_ws('First post! Hi!!!!~1 ') 

120 ['First', 'post', 'Hi', '1'] 

121 """ 

122 return [w for w in _punct_re.split(text) if w] 

123 

124 

125def unit_len(sized_iterable, unit_noun='item'): # TODO: len_units()/unitize()? 

126 """Returns a plain-English description of an iterable's 

127 :func:`len()`, conditionally pluralized with :func:`cardinalize`, 

128 detailed below. 

129 

130 >>> print(unit_len(range(10), 'number')) 

131 10 numbers 

132 >>> print(unit_len('aeiou', 'vowel')) 

133 5 vowels 

134 >>> print(unit_len([], 'worry')) 

135 No worries 

136 """ 

137 count = len(sized_iterable) 

138 units = cardinalize(unit_noun, count) 

139 if count: 

140 return f'{count} {units}' 

141 return f'No {units}' 

142 

143 

144_ORDINAL_MAP = {'1': 'st', 

145 '2': 'nd', 

146 '3': 'rd'} # 'th' is the default 

147 

148 

149def ordinalize(number, ext_only=False): 

150 """Turns *number* into its cardinal form, i.e., 1st, 2nd, 

151 3rd, 4th, etc. If the last character isn't a digit, it returns the 

152 string value unchanged. 

153 

154 Args: 

155 number (int or str): Number to be cardinalized. 

156 ext_only (bool): Whether to return only the suffix. Default ``False``. 

157 

158 >>> print(ordinalize(1)) 

159 1st 

160 >>> print(ordinalize(3694839230)) 

161 3694839230th 

162 >>> print(ordinalize('hi')) 

163 hi 

164 >>> print(ordinalize(1515)) 

165 1515th 

166 """ 

167 numstr, ext = str(number), '' 

168 if numstr and numstr[-1] in string.digits: 

169 try: 

170 # first check for teens 

171 if numstr[-2] == '1': 

172 ext = 'th' 

173 else: 

174 # all other cases 

175 ext = _ORDINAL_MAP.get(numstr[-1], 'th') 

176 except IndexError: 

177 # single digit numbers (will reach here based on [-2] above) 

178 ext = _ORDINAL_MAP.get(numstr[-1], 'th') 

179 if ext_only: 

180 return ext 

181 else: 

182 return numstr + ext 

183 

184 

185def cardinalize(unit_noun, count): 

186 """Conditionally pluralizes a singular word *unit_noun* if 

187 *count* is not one, preserving case when possible. 

188 

189 >>> vowels = 'aeiou' 

190 >>> print(len(vowels), cardinalize('vowel', len(vowels))) 

191 5 vowels 

192 >>> print(3, cardinalize('Wish', 3)) 

193 3 Wishes 

194 """ 

195 if count == 1: 

196 return unit_noun 

197 return pluralize(unit_noun) 

198 

199 

200def singularize(word): 

201 """Semi-intelligently converts an English plural *word* to its 

202 singular form, preserving case pattern. 

203 

204 >>> singularize('chances') 

205 'chance' 

206 >>> singularize('Activities') 

207 'Activity' 

208 >>> singularize('Glasses') 

209 'Glass' 

210 >>> singularize('FEET') 

211 'FOOT' 

212 

213 """ 

214 orig_word, word = word, word.strip().lower() 

215 if not word or word in _IRR_S2P: 

216 return orig_word 

217 

218 irr_singular = _IRR_P2S.get(word) 

219 if irr_singular: 

220 singular = irr_singular 

221 elif not word.endswith('s'): 

222 return orig_word 

223 elif len(word) == 2: 

224 singular = word[:-1] # or just return word? 

225 elif word.endswith('ies') and word[-4:-3] not in 'aeiou': 

226 singular = word[:-3] + 'y' 

227 elif word.endswith('es') and word[-3] == 's': 

228 singular = word[:-2] 

229 else: 

230 singular = word[:-1] 

231 return _match_case(orig_word, singular) 

232 

233 

234def pluralize(word): 

235 """Semi-intelligently converts an English *word* from singular form to 

236 plural, preserving case pattern. 

237 

238 >>> pluralize('friend') 

239 'friends' 

240 >>> pluralize('enemy') 

241 'enemies' 

242 >>> pluralize('Sheep') 

243 'Sheep' 

244 """ 

245 orig_word, word = word, word.strip().lower() 

246 if not word or word in _IRR_P2S: 

247 return orig_word 

248 irr_plural = _IRR_S2P.get(word) 

249 if irr_plural: 

250 plural = irr_plural 

251 elif word.endswith('y') and word[-2:-1] not in 'aeiou': 

252 plural = word[:-1] + 'ies' 

253 elif word[-1] == 's' or word.endswith('ch') or word.endswith('sh'): 

254 plural = word if word.endswith('es') else word + 'es' 

255 else: 

256 plural = word + 's' 

257 return _match_case(orig_word, plural) 

258 

259 

260def _match_case(master, disciple): 

261 if not master.strip(): 

262 return disciple 

263 if master.lower() == master: 

264 return disciple.lower() 

265 elif master.upper() == master: 

266 return disciple.upper() 

267 elif master.title() == master: 

268 return disciple.title() 

269 return disciple 

270 

271 

272# Singular to plural map of irregular pluralizations 

273_IRR_S2P = {'addendum': 'addenda', 'alga': 'algae', 'alumna': 'alumnae', 

274 'alumnus': 'alumni', 'analysis': 'analyses', 'antenna': 'antennae', 

275 'appendix': 'appendices', 'axis': 'axes', 'bacillus': 'bacilli', 

276 'bacterium': 'bacteria', 'basis': 'bases', 'beau': 'beaux', 

277 'bison': 'bison', 'bureau': 'bureaus', 'cactus': 'cacti', 

278 'calf': 'calves', 'child': 'children', 'corps': 'corps', 

279 'corpus': 'corpora', 'crisis': 'crises', 'criterion': 'criteria', 

280 'curriculum': 'curricula', 'datum': 'data', 'deer': 'deer', 

281 'diagnosis': 'diagnoses', 'die': 'dice', 'dwarf': 'dwarves', 

282 'echo': 'echoes', 'elf': 'elves', 'ellipsis': 'ellipses', 

283 'embargo': 'embargoes', 'emphasis': 'emphases', 'erratum': 'errata', 

284 'fireman': 'firemen', 'fish': 'fish', 'focus': 'foci', 

285 'foot': 'feet', 'formula': 'formulae', 'formula': 'formulas', 

286 'fungus': 'fungi', 'genus': 'genera', 'goose': 'geese', 

287 'half': 'halves', 'hero': 'heroes', 'hippopotamus': 'hippopotami', 

288 'hoof': 'hooves', 'hypothesis': 'hypotheses', 'index': 'indices', 

289 'knife': 'knives', 'leaf': 'leaves', 'life': 'lives', 

290 'loaf': 'loaves', 'louse': 'lice', 'man': 'men', 

291 'matrix': 'matrices', 'means': 'means', 'medium': 'media', 

292 'memorandum': 'memoranda', 'millennium': 'milennia', 'moose': 'moose', 

293 'mosquito': 'mosquitoes', 'mouse': 'mice', 'nebula': 'nebulae', 

294 'neurosis': 'neuroses', 'nucleus': 'nuclei', 'oasis': 'oases', 

295 'octopus': 'octopi', 'offspring': 'offspring', 'ovum': 'ova', 

296 'ox': 'oxen', 'paralysis': 'paralyses', 'parenthesis': 'parentheses', 

297 'person': 'people', 'phenomenon': 'phenomena', 'potato': 'potatoes', 

298 'radius': 'radii', 'scarf': 'scarves', 'scissors': 'scissors', 

299 'self': 'selves', 'sense': 'senses', 'series': 'series', 'sheep': 

300 'sheep', 'shelf': 'shelves', 'species': 'species', 'stimulus': 

301 'stimuli', 'stratum': 'strata', 'syllabus': 'syllabi', 'symposium': 

302 'symposia', 'synopsis': 'synopses', 'synthesis': 'syntheses', 

303 'tableau': 'tableaux', 'that': 'those', 'thesis': 'theses', 

304 'thief': 'thieves', 'this': 'these', 'tomato': 'tomatoes', 'tooth': 

305 'teeth', 'torpedo': 'torpedoes', 'vertebra': 'vertebrae', 'veto': 

306 'vetoes', 'vita': 'vitae', 'watch': 'watches', 'wife': 'wives', 

307 'wolf': 'wolves', 'woman': 'women'} 

308 

309 

310# Reverse index of the above 

311_IRR_P2S = {v: k for k, v in _IRR_S2P.items()} 

312 

313HASHTAG_RE = re.compile(r"(?:^|\s)[##]{1}(\w+)", re.UNICODE) 

314 

315 

316def find_hashtags(string): 

317 """Finds and returns all hashtags in a string, with the hashmark 

318 removed. Supports full-width hashmarks for Asian languages and 

319 does not false-positive on URL anchors. 

320 

321 >>> find_hashtags('#atag http://asite/#ananchor') 

322 ['atag'] 

323 

324 ``find_hashtags`` also works with unicode hashtags. 

325 """ 

326 

327 # the following works, doctest just struggles with it 

328 # >>> find_hashtags(u"can't get enough of that dignity chicken #肯德基 woo") 

329 # [u'\u80af\u5fb7\u57fa'] 

330 return HASHTAG_RE.findall(string) 

331 

332 

333def a10n(string): 

334 """That thing where "internationalization" becomes "i18n", what's it 

335 called? Abbreviation? Oh wait, no: ``a10n``. (It's actually a form 

336 of `numeronym`_.) 

337 

338 >>> a10n('abbreviation') 

339 'a10n' 

340 >>> a10n('internationalization') 

341 'i18n' 

342 >>> a10n('') 

343 '' 

344 

345 .. _numeronym: http://en.wikipedia.org/wiki/Numeronym 

346 """ 

347 if len(string) < 3: 

348 return string 

349 return f'{string[0]}{len(string[1:-1])}{string[-1]}' 

350 

351 

352# Based on https://en.wikipedia.org/wiki/ANSI_escape_code#Escape_sequences 

353ANSI_SEQUENCES = re.compile(r''' 

354 \x1B # Sequence starts with ESC, i.e. hex 0x1B 

355 (?: 

356 [@-Z\\-_] # Second byte: 

357 # all 0x40–0x5F range but CSI char, i.e ASCII @A–Z\]^_ 

358 | # Or 

359 \[ # CSI sequences, starting with [ 

360 [0-?]* # Parameter bytes: 

361 # range 0x30–0x3F, ASCII 0–9:;<=>? 

362 [ -/]* # Intermediate bytes: 

363 # range 0x20–0x2F, ASCII space and !"#$%&'()*+,-./ 

364 [@-~] # Final byte 

365 # range 0x40–0x7E, ASCII @A–Z[\]^_`a–z{|}~ 

366 ) 

367''', re.VERBOSE) 

368 

369 

370def strip_ansi(text): 

371 """Strips ANSI escape codes from *text*. Useful for the occasional 

372 time when a log or redirected output accidentally captures console 

373 color codes and the like. 

374 

375 >>> strip_ansi('\x1b[0m\x1b[1;36mart\x1b[46;34m') 

376 'art' 

377 

378 Supports str, bytes and bytearray content as input. Returns the 

379 same type as the input. 

380 

381 There's a lot of ANSI art available for testing on `sixteencolors.net`_. 

382 This function does not interpret or render ANSI art, but you can do so with 

383 `ansi2img`_ or `escapes.js`_. 

384 

385 .. _sixteencolors.net: http://sixteencolors.net 

386 .. _ansi2img: http://www.bedroomlan.org/projects/ansi2img 

387 .. _escapes.js: https://github.com/atdt/escapes.js 

388 """ 

389 # TODO: move to cliutils.py 

390 

391 # Transform any ASCII-like content to unicode to allow regex to match, and 

392 # save input type for later. 

393 target_type = None 

394 # Unicode type aliased to str is code-smell for Boltons in Python 3 env. 

395 if isinstance(text, (bytes, bytearray)): 

396 target_type = type(text) 

397 text = text.decode('utf-8') 

398 

399 cleaned = ANSI_SEQUENCES.sub('', text) 

400 

401 # Transform back the result to the same bytearray type provided by the user. 

402 if target_type and target_type != type(cleaned): 

403 cleaned = target_type(cleaned, 'utf-8') 

404 

405 return cleaned 

406 

407 

408def asciify(text, ignore=False): 

409 """Converts a unicode or bytestring, *text*, into a bytestring with 

410 just ascii characters. Performs basic deaccenting for all you 

411 Europhiles out there. 

412 

413 Also, a gentle reminder that this is a **utility**, primarily meant 

414 for slugification. Whenever possible, make your application work 

415 **with** unicode, not against it. 

416 

417 Args: 

418 text (str): The string to be asciified. 

419 ignore (bool): Configures final encoding to ignore remaining 

420 unasciified string instead of replacing it. 

421 

422 >>> asciify('Beyoncé') == b'Beyonce' 

423 True 

424 """ 

425 try: 

426 try: 

427 return text.encode('ascii') 

428 except UnicodeDecodeError: 

429 # this usually means you passed in a non-unicode string 

430 text = text.decode('utf-8') 

431 return text.encode('ascii') 

432 except UnicodeEncodeError: 

433 mode = 'replace' 

434 if ignore: 

435 mode = 'ignore' 

436 transd = unicodedata.normalize('NFKD', text.translate(DEACCENT_MAP)) 

437 ret = transd.encode('ascii', mode) 

438 return ret 

439 

440 

441def is_ascii(text): 

442 """Check if a string or bytestring, *text*, is composed of ascii 

443 characters only. Raises :exc:`ValueError` if argument is not text. 

444 

445 Args: 

446 text (str): The string to be checked. 

447 

448 >>> is_ascii('Beyoncé') 

449 False 

450 >>> is_ascii('Beyonce') 

451 True 

452 """ 

453 if isinstance(text, str): 

454 try: 

455 text.encode('ascii') 

456 except UnicodeEncodeError: 

457 return False 

458 elif isinstance(text, bytes): 

459 try: 

460 text.decode('ascii') 

461 except UnicodeDecodeError: 

462 return False 

463 else: 

464 raise ValueError('expected text or bytes, not %r' % type(text)) 

465 return True 

466 

467 

468class DeaccenterDict(dict): 

469 "A small caching dictionary for deaccenting." 

470 def __missing__(self, key): 

471 ch = self.get(key) 

472 if ch is not None: 

473 return ch 

474 try: 

475 de = unicodedata.decomposition(chr(key)) 

476 p1, _, p2 = de.rpartition(' ') 

477 if int(p2, 16) == 0x308: 

478 ch = self.get(key) 

479 else: 

480 ch = int(p1, 16) 

481 except (IndexError, ValueError): 

482 ch = self.get(key, key) 

483 self[key] = ch 

484 return ch 

485 

486 

487# http://chmullig.com/2009/12/python-unicode-ascii-ifier/ 

488# For something more complete, investigate the unidecode 

489# or isounidecode packages, which are capable of performing 

490# crude transliteration. 

491_BASE_DEACCENT_MAP = { 

492 0xc6: "AE", # Æ LATIN CAPITAL LETTER AE 

493 0xd0: "D", # Ð LATIN CAPITAL LETTER ETH 

494 0xd8: "OE", # Ø LATIN CAPITAL LETTER O WITH STROKE 

495 0xde: "Th", # Þ LATIN CAPITAL LETTER THORN 

496 0xc4: 'Ae', # Ä LATIN CAPITAL LETTER A WITH DIAERESIS 

497 0xd6: 'Oe', # Ö LATIN CAPITAL LETTER O WITH DIAERESIS 

498 0xdc: 'Ue', # Ü LATIN CAPITAL LETTER U WITH DIAERESIS 

499 0xc0: "A", # À LATIN CAPITAL LETTER A WITH GRAVE 

500 0xc1: "A", # Á LATIN CAPITAL LETTER A WITH ACUTE 

501 0xc3: "A", # Ã LATIN CAPITAL LETTER A WITH TILDE 

502 0xc7: "C", # Ç LATIN CAPITAL LETTER C WITH CEDILLA 

503 0xc8: "E", # È LATIN CAPITAL LETTER E WITH GRAVE 

504 0xc9: "E", # É LATIN CAPITAL LETTER E WITH ACUTE 

505 0xca: "E", # Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX 

506 0xcc: "I", # Ì LATIN CAPITAL LETTER I WITH GRAVE 

507 0xcd: "I", # Í LATIN CAPITAL LETTER I WITH ACUTE 

508 0xd2: "O", # Ò LATIN CAPITAL LETTER O WITH GRAVE 

509 0xd3: "O", # Ó LATIN CAPITAL LETTER O WITH ACUTE 

510 0xd5: "O", # Õ LATIN CAPITAL LETTER O WITH TILDE 

511 0xd9: "U", # Ù LATIN CAPITAL LETTER U WITH GRAVE 

512 0xda: "U", # Ú LATIN CAPITAL LETTER U WITH ACUTE 

513 0xdf: "ss", # ß LATIN SMALL LETTER SHARP S 

514 0xe6: "ae", # æ LATIN SMALL LETTER AE 

515 0xf0: "d", # ð LATIN SMALL LETTER ETH 

516 0xf8: "oe", # ø LATIN SMALL LETTER O WITH STROKE 

517 0xfe: "th", # þ LATIN SMALL LETTER THORN, 

518 0xe4: 'ae', # ä LATIN SMALL LETTER A WITH DIAERESIS 

519 0xf6: 'oe', # ö LATIN SMALL LETTER O WITH DIAERESIS 

520 0xfc: 'ue', # ü LATIN SMALL LETTER U WITH DIAERESIS 

521 0xe0: "a", # à LATIN SMALL LETTER A WITH GRAVE 

522 0xe1: "a", # á LATIN SMALL LETTER A WITH ACUTE 

523 0xe3: "a", # ã LATIN SMALL LETTER A WITH TILDE 

524 0xe7: "c", # ç LATIN SMALL LETTER C WITH CEDILLA 

525 0xe8: "e", # è LATIN SMALL LETTER E WITH GRAVE 

526 0xe9: "e", # é LATIN SMALL LETTER E WITH ACUTE 

527 0xea: "e", # ê LATIN SMALL LETTER E WITH CIRCUMFLEX 

528 0xec: "i", # ì LATIN SMALL LETTER I WITH GRAVE 

529 0xed: "i", # í LATIN SMALL LETTER I WITH ACUTE 

530 0xf2: "o", # ò LATIN SMALL LETTER O WITH GRAVE 

531 0xf3: "o", # ó LATIN SMALL LETTER O WITH ACUTE 

532 0xf5: "o", # õ LATIN SMALL LETTER O WITH TILDE 

533 0xf9: "u", # ù LATIN SMALL LETTER U WITH GRAVE 

534 0xfa: "u", # ú LATIN SMALL LETTER U WITH ACUTE 

535 0x2018: "'", # ‘ LEFT SINGLE QUOTATION MARK 

536 0x2019: "'", # ’ RIGHT SINGLE QUOTATION MARK 

537 0x201c: '"', # “ LEFT DOUBLE QUOTATION MARK 

538 0x201d: '"', # ” RIGHT DOUBLE QUOTATION MARK 

539 } 

540 

541 

542DEACCENT_MAP = DeaccenterDict(_BASE_DEACCENT_MAP) 

543 

544 

545_SIZE_SYMBOLS = ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y') 

546_SIZE_BOUNDS = [(1024 ** i, sym) for i, sym in enumerate(_SIZE_SYMBOLS)] 

547_SIZE_RANGES = list(zip(_SIZE_BOUNDS, _SIZE_BOUNDS[1:])) 

548 

549 

550def bytes2human(nbytes, ndigits=0): 

551 """Turns an integer value of *nbytes* into a human readable format. Set 

552 *ndigits* to control how many digits after the decimal point 

553 should be shown (default ``0``). 

554 

555 >>> bytes2human(128991) 

556 '126K' 

557 >>> bytes2human(100001221) 

558 '95M' 

559 >>> bytes2human(0, 2) 

560 '0.00B' 

561 """ 

562 abs_bytes = abs(nbytes) 

563 for (size, symbol), (next_size, next_symbol) in _SIZE_RANGES: 

564 if abs_bytes <= next_size: 

565 break 

566 hnbytes = float(nbytes) / size 

567 return '{hnbytes:.{ndigits}f}{symbol}'.format(hnbytes=hnbytes, 

568 ndigits=ndigits, 

569 symbol=symbol) 

570 

571 

572class HTMLTextExtractor(HTMLParser): 

573 def __init__(self): 

574 self.reset() 

575 self.strict = False 

576 self.convert_charrefs = True 

577 self.result = [] 

578 

579 def handle_data(self, d): 

580 self.result.append(d) 

581 

582 def handle_charref(self, number): 

583 if number[0] == 'x' or number[0] == 'X': 

584 codepoint = int(number[1:], 16) 

585 else: 

586 codepoint = int(number) 

587 self.result.append(chr(codepoint)) 

588 

589 def handle_entityref(self, name): 

590 try: 

591 codepoint = htmlentitydefs.name2codepoint[name] 

592 except KeyError: 

593 self.result.append('&' + name + ';') 

594 else: 

595 self.result.append(chr(codepoint)) 

596 

597 def get_text(self): 

598 return ''.join(self.result) 

599 

600 

601def html2text(html): 

602 """Strips tags from HTML text, returning markup-free text. Also, does 

603 a best effort replacement of entities like "&nbsp;" 

604 

605 >>> r = html2text(u'<a href="#">Test &amp;<em>(\u0394&#x03b7;&#956;&#x03CE;)</em></a>') 

606 >>> r == u'Test &(\u0394\u03b7\u03bc\u03ce)' 

607 True 

608 """ 

609 # based on answers to http://stackoverflow.com/questions/753052/ 

610 s = HTMLTextExtractor() 

611 s.feed(html) 

612 return s.get_text() 

613 

614 

615_EMPTY_GZIP_BYTES = b'\x1f\x8b\x08\x089\xf3\xb9U\x00\x03empty\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00' 

616_NON_EMPTY_GZIP_BYTES = b'\x1f\x8b\x08\x08\xbc\xf7\xb9U\x00\x03not_empty\x00K\xaa,I-N\xcc\xc8\xafT\xe4\x02\x00\xf3nb\xbf\x0b\x00\x00\x00' 

617 

618 

619def gunzip_bytes(bytestring): 

620 """The :mod:`gzip` module is great if you have a file or file-like 

621 object, but what if you just have bytes. StringIO is one 

622 possibility, but it's often faster, easier, and simpler to just 

623 use this one-liner. Use this tried-and-true utility function to 

624 decompress gzip from bytes. 

625 

626 >>> gunzip_bytes(_EMPTY_GZIP_BYTES) == b'' 

627 True 

628 >>> gunzip_bytes(_NON_EMPTY_GZIP_BYTES).rstrip() == b'bytesahoy!' 

629 True 

630 """ 

631 return zlib.decompress(bytestring, 16 + zlib.MAX_WBITS) 

632 

633 

634def gzip_bytes(bytestring, level=6): 

635 """Turn some bytes into some compressed bytes. 

636 

637 >>> len(gzip_bytes(b'a' * 10000)) 

638 46 

639 

640 Args: 

641 bytestring (bytes): Bytes to be compressed 

642 level (int): An integer, 1-9, controlling the 

643 speed/compression. 1 is fastest, least compressed, 9 is 

644 slowest, but most compressed. 

645 

646 Note that all levels of gzip are pretty fast these days, though 

647 it's not really a competitor in compression, at any level. 

648 """ 

649 out = StringIO() 

650 f = GzipFile(fileobj=out, mode='wb', compresslevel=level) 

651 f.write(bytestring) 

652 f.close() 

653 return out.getvalue() 

654 

655 

656 

657_line_ending_re = re.compile(r'(\r\n|\n|\x0b|\f|\r|\x85|\x2028|\x2029)', 

658 re.UNICODE) 

659 

660 

661def iter_splitlines(text): 

662 r"""Like :meth:`str.splitlines`, but returns an iterator of lines 

663 instead of a list. Also similar to :meth:`file.next`, as that also 

664 lazily reads and yields lines from a file. 

665 

666 This function works with a variety of line endings, but as always, 

667 be careful when mixing line endings within a file. 

668 

669 >>> list(iter_splitlines('\nhi\nbye\n')) 

670 ['', 'hi', 'bye', ''] 

671 >>> list(iter_splitlines('\r\nhi\rbye\r\n')) 

672 ['', 'hi', 'bye', ''] 

673 >>> list(iter_splitlines('')) 

674 [] 

675 """ 

676 prev_end, len_text = 0, len(text) 

677 # print('last: %r' % last_idx) 

678 # start, end = None, None 

679 for match in _line_ending_re.finditer(text): 

680 start, end = match.start(1), match.end(1) 

681 # print(start, end) 

682 if prev_end <= start: 

683 yield text[prev_end:start] 

684 if end == len_text: 

685 yield '' 

686 prev_end = end 

687 tail = text[prev_end:] 

688 if tail: 

689 yield tail 

690 return 

691 

692 

693def indent(text, margin, newline='\n', key=bool): 

694 """The missing counterpart to the built-in :func:`textwrap.dedent`. 

695 

696 Args: 

697 text (str): The text to indent. 

698 margin (str): The string to prepend to each line. 

699 newline (str): The newline used to rejoin the lines (default: ``\\n``) 

700 key (callable): Called on each line to determine whether to 

701 indent it. Default: :class:`bool`, to ensure that empty lines do 

702 not get whitespace added. 

703 """ 

704 indented_lines = [(margin + line if key(line) else line) 

705 for line in iter_splitlines(text)] 

706 return newline.join(indented_lines) 

707 

708 

709def is_uuid(obj, version=4): 

710 """Check the argument is either a valid UUID object or string. 

711 

712 Args: 

713 obj (object): The test target. Strings and UUID objects supported. 

714 version (int): The target UUID version, set to 0 to skip version check. 

715 

716 >>> is_uuid('e682ccca-5a4c-4ef2-9711-73f9ad1e15ea') 

717 True 

718 >>> is_uuid('0221f0d9-d4b9-11e5-a478-10ddb1c2feb9') 

719 False 

720 >>> is_uuid('0221f0d9-d4b9-11e5-a478-10ddb1c2feb9', version=1) 

721 True 

722 """ 

723 if not isinstance(obj, uuid.UUID): 

724 try: 

725 obj = uuid.UUID(obj) 

726 except (TypeError, ValueError, AttributeError): 

727 return False 

728 if version and obj.version != int(version): 

729 return False 

730 return True 

731 

732 

733def escape_shell_args(args, sep=' ', style=None): 

734 """Returns an escaped version of each string in *args*, according to 

735 *style*. 

736 

737 Args: 

738 args (list): A list of arguments to escape and join together 

739 sep (str): The separator used to join the escaped arguments. 

740 style (str): The style of escaping to use. Can be one of 

741 ``cmd`` or ``sh``, geared toward Windows and Linux/BSD/etc., 

742 respectively. If *style* is ``None``, then it is picked 

743 according to the system platform. 

744 

745 See :func:`args2cmd` and :func:`args2sh` for details and example 

746 output for each style. 

747 """ 

748 if not style: 

749 style = 'cmd' if sys.platform == 'win32' else 'sh' 

750 

751 if style == 'sh': 

752 return args2sh(args, sep=sep) 

753 elif style == 'cmd': 

754 return args2cmd(args, sep=sep) 

755 

756 raise ValueError("style expected one of 'cmd' or 'sh', not %r" % style) 

757 

758 

759_find_sh_unsafe = re.compile(r'[^a-zA-Z0-9_@%+=:,./-]').search 

760 

761 

762def args2sh(args, sep=' '): 

763 """Return a shell-escaped string version of *args*, separated by 

764 *sep*, based on the rules of sh, bash, and other shells in the 

765 Linux/BSD/MacOS ecosystem. 

766 

767 >>> print(args2sh(['aa', '[bb]', "cc'cc", 'dd"dd'])) 

768 aa '[bb]' 'cc'"'"'cc' 'dd"dd' 

769 

770 As you can see, arguments with no special characters are not 

771 escaped, arguments with special characters are quoted with single 

772 quotes, and single quotes themselves are quoted with double 

773 quotes. Double quotes are handled like any other special 

774 character. 

775 

776 Based on code from the :mod:`pipes`/:mod:`shlex` modules. Also 

777 note that :mod:`shlex` and :mod:`argparse` have functions to split 

778 and parse strings escaped in this manner. 

779 """ 

780 ret_list = [] 

781 

782 for arg in args: 

783 if not arg: 

784 ret_list.append("''") 

785 continue 

786 if _find_sh_unsafe(arg) is None: 

787 ret_list.append(arg) 

788 continue 

789 # use single quotes, and put single quotes into double quotes 

790 # the string $'b is then quoted as '$'"'"'b' 

791 ret_list.append("'" + arg.replace("'", "'\"'\"'") + "'") 

792 

793 return ' '.join(ret_list) 

794 

795 

796def args2cmd(args, sep=' '): 

797 r"""Return a shell-escaped string version of *args*, separated by 

798 *sep*, using the same rules as the Microsoft C runtime. 

799 

800 >>> print(args2cmd(['aa', '[bb]', "cc'cc", 'dd"dd'])) 

801 aa [bb] cc'cc dd\"dd 

802 

803 As you can see, escaping is through backslashing and not quoting, 

804 and double quotes are the only special character. See the comment 

805 in the code for more details. Based on internal code from the 

806 :mod:`subprocess` module. 

807 

808 """ 

809 # technique description from subprocess below 

810 """ 

811 1) Arguments are delimited by white space, which is either a 

812 space or a tab. 

813 

814 2) A string surrounded by double quotation marks is 

815 interpreted as a single argument, regardless of white space 

816 contained within. A quoted string can be embedded in an 

817 argument. 

818 

819 3) A double quotation mark preceded by a backslash is 

820 interpreted as a literal double quotation mark. 

821 

822 4) Backslashes are interpreted literally, unless they 

823 immediately precede a double quotation mark. 

824 

825 5) If backslashes immediately precede a double quotation mark, 

826 every pair of backslashes is interpreted as a literal 

827 backslash. If the number of backslashes is odd, the last 

828 backslash escapes the next double quotation mark as 

829 described in rule 3. 

830 

831 See http://msdn.microsoft.com/en-us/library/17w5ykft.aspx 

832 or search http://msdn.microsoft.com for 

833 "Parsing C++ Command-Line Arguments" 

834 """ 

835 result = [] 

836 needquote = False 

837 for arg in args: 

838 bs_buf = [] 

839 

840 # Add a space to separate this argument from the others 

841 if result: 

842 result.append(' ') 

843 

844 needquote = (" " in arg) or ("\t" in arg) or not arg 

845 if needquote: 

846 result.append('"') 

847 

848 for c in arg: 

849 if c == '\\': 

850 # Don't know if we need to double yet. 

851 bs_buf.append(c) 

852 elif c == '"': 

853 # Double backslashes. 

854 result.append('\\' * len(bs_buf)*2) 

855 bs_buf = [] 

856 result.append('\\"') 

857 else: 

858 # Normal char 

859 if bs_buf: 

860 result.extend(bs_buf) 

861 bs_buf = [] 

862 result.append(c) 

863 

864 # Add remaining backslashes, if any. 

865 if bs_buf: 

866 result.extend(bs_buf) 

867 

868 if needquote: 

869 result.extend(bs_buf) 

870 result.append('"') 

871 

872 return ''.join(result) 

873 

874 

875def parse_int_list(range_string, delim=',', range_delim='-'): 

876 """Returns a sorted list of positive integers based on 

877 *range_string*. Reverse of :func:`format_int_list`. 

878 

879 Args: 

880 range_string (str): String of comma separated positive 

881 integers or ranges (e.g. '1,2,4-6,8'). Typical of a custom 

882 page range string used in printer dialogs. 

883 delim (char): Defaults to ','. Separates integers and 

884 contiguous ranges of integers. 

885 range_delim (char): Defaults to '-'. Indicates a contiguous 

886 range of integers. 

887 

888 >>> parse_int_list('1,3,5-8,10-11,15') 

889 [1, 3, 5, 6, 7, 8, 10, 11, 15] 

890 

891 """ 

892 output = [] 

893 

894 for x in range_string.strip().split(delim): 

895 

896 # Range 

897 if range_delim in x: 

898 range_limits = list(map(int, x.split(range_delim))) 

899 output += list(range(min(range_limits), max(range_limits)+1)) 

900 

901 # Empty String 

902 elif not x: 

903 continue 

904 

905 # Integer 

906 else: 

907 output.append(int(x)) 

908 

909 return sorted(output) 

910 

911 

912def format_int_list(int_list, delim=',', range_delim='-', delim_space=False): 

913 """Returns a sorted range string from a list of positive integers 

914 (*int_list*). Contiguous ranges of integers are collapsed to min 

915 and max values. Reverse of :func:`parse_int_list`. 

916 

917 Args: 

918 int_list (list): List of positive integers to be converted 

919 into a range string (e.g. [1,2,4,5,6,8]). 

920 delim (char): Defaults to ','. Separates integers and 

921 contiguous ranges of integers. 

922 range_delim (char): Defaults to '-'. Indicates a contiguous 

923 range of integers. 

924 delim_space (bool): Defaults to ``False``. If ``True``, adds a 

925 space after all *delim* characters. 

926 

927 >>> format_int_list([1,3,5,6,7,8,10,11,15]) 

928 '1,3,5-8,10-11,15' 

929 

930 """ 

931 output = [] 

932 contig_range = collections.deque() 

933 

934 for x in sorted(int_list): 

935 

936 # Handle current (and first) value. 

937 if len(contig_range) < 1: 

938 contig_range.append(x) 

939 

940 # Handle current value, given multiple previous values are contiguous. 

941 elif len(contig_range) > 1: 

942 delta = x - contig_range[-1] 

943 

944 # Current value is contiguous. 

945 if delta == 1: 

946 contig_range.append(x) 

947 

948 # Current value is non-contiguous. 

949 elif delta > 1: 

950 range_substr = '{:d}{}{:d}'.format(min(contig_range), 

951 range_delim, 

952 max(contig_range)) 

953 output.append(range_substr) 

954 contig_range.clear() 

955 contig_range.append(x) 

956 

957 # Current value repeated. 

958 else: 

959 continue 

960 

961 # Handle current value, given no previous contiguous integers 

962 else: 

963 delta = x - contig_range[0] 

964 

965 # Current value is contiguous. 

966 if delta == 1: 

967 contig_range.append(x) 

968 

969 # Current value is non-contiguous. 

970 elif delta > 1: 

971 output.append(f'{contig_range.popleft():d}') 

972 contig_range.append(x) 

973 

974 # Current value repeated. 

975 else: 

976 continue 

977 

978 # Handle the last value. 

979 else: 

980 

981 # Last value is non-contiguous. 

982 if len(contig_range) == 1: 

983 output.append(f'{contig_range.popleft():d}') 

984 contig_range.clear() 

985 

986 # Last value is part of contiguous range. 

987 elif len(contig_range) > 1: 

988 range_substr = '{:d}{}{:d}'.format(min(contig_range), 

989 range_delim, 

990 max(contig_range)) 

991 output.append(range_substr) 

992 contig_range.clear() 

993 

994 if delim_space: 

995 output_str = (delim+' ').join(output) 

996 else: 

997 output_str = delim.join(output) 

998 

999 return output_str 

1000 

1001 

1002def complement_int_list( 

1003 range_string, range_start=0, range_end=None, 

1004 delim=',', range_delim='-'): 

1005 """ Returns range string that is the complement of the one provided as 

1006 *range_string* parameter. 

1007 

1008 These range strings are of the kind produce by :func:`format_int_list`, and 

1009 parseable by :func:`parse_int_list`. 

1010 

1011 Args: 

1012 range_string (str): String of comma separated positive integers or 

1013 ranges (e.g. '1,2,4-6,8'). Typical of a custom page range string 

1014 used in printer dialogs. 

1015 range_start (int): A positive integer from which to start the resulting 

1016 range. Value is inclusive. Defaults to ``0``. 

1017 range_end (int): A positive integer from which the produced range is 

1018 stopped. Value is exclusive. Defaults to the maximum value found in 

1019 the provided ``range_string``. 

1020 delim (char): Defaults to ','. Separates integers and contiguous ranges 

1021 of integers. 

1022 range_delim (char): Defaults to '-'. Indicates a contiguous range of 

1023 integers. 

1024 

1025 >>> complement_int_list('1,3,5-8,10-11,15') 

1026 '0,2,4,9,12-14' 

1027 

1028 >>> complement_int_list('1,3,5-8,10-11,15', range_start=0) 

1029 '0,2,4,9,12-14' 

1030 

1031 >>> complement_int_list('1,3,5-8,10-11,15', range_start=1) 

1032 '2,4,9,12-14' 

1033 

1034 >>> complement_int_list('1,3,5-8,10-11,15', range_start=2) 

1035 '2,4,9,12-14' 

1036 

1037 >>> complement_int_list('1,3,5-8,10-11,15', range_start=3) 

1038 '4,9,12-14' 

1039 

1040 >>> complement_int_list('1,3,5-8,10-11,15', range_end=15) 

1041 '0,2,4,9,12-14' 

1042 

1043 >>> complement_int_list('1,3,5-8,10-11,15', range_end=14) 

1044 '0,2,4,9,12-13' 

1045 

1046 >>> complement_int_list('1,3,5-8,10-11,15', range_end=13) 

1047 '0,2,4,9,12' 

1048 

1049 >>> complement_int_list('1,3,5-8,10-11,15', range_end=20) 

1050 '0,2,4,9,12-14,16-19' 

1051 

1052 >>> complement_int_list('1,3,5-8,10-11,15', range_end=0) 

1053 '' 

1054 

1055 >>> complement_int_list('1,3,5-8,10-11,15', range_start=-1) 

1056 '0,2,4,9,12-14' 

1057 

1058 >>> complement_int_list('1,3,5-8,10-11,15', range_end=-1) 

1059 '' 

1060 

1061 >>> complement_int_list('1,3,5-8', range_start=1, range_end=1) 

1062 '' 

1063 

1064 >>> complement_int_list('1,3,5-8', range_start=2, range_end=2) 

1065 '' 

1066 

1067 >>> complement_int_list('1,3,5-8', range_start=2, range_end=3) 

1068 '2' 

1069 

1070 >>> complement_int_list('1,3,5-8', range_start=-10, range_end=-5) 

1071 '' 

1072 

1073 >>> complement_int_list('1,3,5-8', range_start=20, range_end=10) 

1074 '' 

1075 

1076 >>> complement_int_list('') 

1077 '' 

1078 """ 

1079 int_list = set(parse_int_list(range_string, delim, range_delim)) 

1080 if range_end is None: 

1081 if int_list: 

1082 range_end = max(int_list) + 1 

1083 else: 

1084 range_end = range_start 

1085 complement_values = set( 

1086 range(range_end)) - int_list - set(range(range_start)) 

1087 return format_int_list(complement_values, delim, range_delim) 

1088 

1089 

1090def int_ranges_from_int_list(range_string, delim=',', range_delim='-'): 

1091 """ Transform a string of ranges (*range_string*) into a tuple of tuples. 

1092 

1093 Args: 

1094 range_string (str): String of comma separated positive integers or 

1095 ranges (e.g. '1,2,4-6,8'). Typical of a custom page range string 

1096 used in printer dialogs. 

1097 delim (char): Defaults to ','. Separates integers and contiguous ranges 

1098 of integers. 

1099 range_delim (char): Defaults to '-'. Indicates a contiguous range of 

1100 integers. 

1101 

1102 >>> int_ranges_from_int_list('1,3,5-8,10-11,15') 

1103 ((1, 1), (3, 3), (5, 8), (10, 11), (15, 15)) 

1104 

1105 >>> int_ranges_from_int_list('1') 

1106 ((1, 1),) 

1107 

1108 >>> int_ranges_from_int_list('') 

1109 () 

1110 """ 

1111 int_tuples = [] 

1112 # Normalize the range string to our internal format for processing. 

1113 range_string = format_int_list( 

1114 parse_int_list(range_string, delim, range_delim)) 

1115 if range_string: 

1116 for bounds in range_string.split(','): 

1117 if '-' in bounds: 

1118 start, end = bounds.split('-') 

1119 else: 

1120 start, end = bounds, bounds 

1121 int_tuples.append((int(start), int(end))) 

1122 return tuple(int_tuples) 

1123 

1124 

1125class MultiReplace: 

1126 """ 

1127 MultiReplace is a tool for doing multiple find/replace actions in one pass. 

1128 

1129 Given a mapping of values to be replaced it allows for all of the matching 

1130 values to be replaced in a single pass which can save a lot of performance 

1131 on very large strings. In addition to simple replace, it also allows for 

1132 replacing based on regular expressions. 

1133 

1134 Keyword Arguments: 

1135 

1136 :type regex: bool 

1137 :param regex: Treat search keys as regular expressions [Default: False] 

1138 :type flags: int 

1139 :param flags: flags to pass to the regex engine during compile 

1140 

1141 Dictionary Usage:: 

1142 

1143 from boltons import strutils 

1144 s = strutils.MultiReplace({ 

1145 'foo': 'zoo', 

1146 'cat': 'hat', 

1147 'bat': 'kraken' 

1148 }) 

1149 new = s.sub('The foo bar cat ate a bat') 

1150 new == 'The zoo bar hat ate a kraken' 

1151 

1152 Iterable Usage:: 

1153 

1154 from boltons import strutils 

1155 s = strutils.MultiReplace([ 

1156 ('foo', 'zoo'), 

1157 ('cat', 'hat'), 

1158 ('bat', 'kraken)' 

1159 ]) 

1160 new = s.sub('The foo bar cat ate a bat') 

1161 new == 'The zoo bar hat ate a kraken' 

1162 

1163 

1164 The constructor can be passed a dictionary or other mapping as well as 

1165 an iterable of tuples. If given an iterable, the substitution will be run 

1166 in the order the replacement values are specified in the iterable. This is 

1167 also true if it is given an OrderedDict. If given a dictionary then the 

1168 order will be non-deterministic:: 

1169 

1170 >>> 'foo bar baz'.replace('foo', 'baz').replace('baz', 'bar') 

1171 'bar bar bar' 

1172 >>> m = MultiReplace({'foo': 'baz', 'baz': 'bar'}) 

1173 >>> m.sub('foo bar baz') 

1174 'baz bar bar' 

1175 

1176 This is because the order of replacement can matter if you're inserting 

1177 something that might be replaced by a later substitution. Pay attention and 

1178 if you need to rely on order then consider using a list of tuples instead 

1179 of a dictionary. 

1180 """ 

1181 

1182 def __init__(self, sub_map, **kwargs): 

1183 """Compile any regular expressions that have been passed.""" 

1184 options = { 

1185 'regex': False, 

1186 'flags': 0, 

1187 } 

1188 options.update(kwargs) 

1189 self.group_map = {} 

1190 regex_values = [] 

1191 

1192 if isinstance(sub_map, Mapping): 

1193 sub_map = sub_map.items() 

1194 

1195 for idx, vals in enumerate(sub_map): 

1196 group_name = f'group{idx}' 

1197 if isinstance(vals[0], str): 

1198 # If we're not treating input strings like a regex, escape it 

1199 if not options['regex']: 

1200 exp = re.escape(vals[0]) 

1201 else: 

1202 exp = vals[0] 

1203 else: 

1204 exp = vals[0].pattern 

1205 

1206 regex_values.append(f'(?P<{group_name}>{exp})') 

1207 self.group_map[group_name] = vals[1] 

1208 

1209 self.combined_pattern = re.compile( 

1210 '|'.join(regex_values), 

1211 flags=options['flags'] 

1212 ) 

1213 

1214 def _get_value(self, match): 

1215 """Given a match object find replacement value.""" 

1216 group_dict = match.groupdict() 

1217 key = [x for x in group_dict if group_dict[x]][0] 

1218 return self.group_map[key] 

1219 

1220 def sub(self, text): 

1221 """ 

1222 Run substitutions on the input text. 

1223 

1224 Given an input string, run all substitutions given in the 

1225 constructor. 

1226 """ 

1227 return self.combined_pattern.sub(self._get_value, text) 

1228 

1229 

1230def multi_replace(text, sub_map, **kwargs): 

1231 """ 

1232 Shortcut function to invoke MultiReplace in a single call. 

1233 

1234 Example Usage:: 

1235 

1236 from boltons.strutils import multi_replace 

1237 new = multi_replace( 

1238 'The foo bar cat ate a bat', 

1239 {'foo': 'zoo', 'cat': 'hat', 'bat': 'kraken'} 

1240 ) 

1241 new == 'The zoo bar hat ate a kraken' 

1242 """ 

1243 m = MultiReplace(sub_map, **kwargs) 

1244 return m.sub(text) 

1245 

1246 

1247def unwrap_text(text, ending='\n\n'): 

1248 r""" 

1249 Unwrap text, the natural complement to :func:`textwrap.wrap`. 

1250 

1251 >>> text = "Short \n lines \nwrapped\nsmall.\n\nAnother\nparagraph." 

1252 >>> unwrap_text(text) 

1253 'Short lines wrapped small.\n\nAnother paragraph.' 

1254 

1255 Args: 

1256 text: A string to unwrap. 

1257 ending (str): The string to join all unwrapped paragraphs 

1258 by. Pass ``None`` to get the list. Defaults to '\n\n' for 

1259 compatibility with Markdown and RST. 

1260 

1261 """ 

1262 all_grafs = [] 

1263 cur_graf = [] 

1264 for line in text.splitlines(): 

1265 line = line.strip() 

1266 if line: 

1267 cur_graf.append(line) 

1268 else: 

1269 all_grafs.append(' '.join(cur_graf)) 

1270 cur_graf = [] 

1271 if cur_graf: 

1272 all_grafs.append(' '.join(cur_graf)) 

1273 if ending is None: 

1274 return all_grafs 

1275 return ending.join(all_grafs) 

1276 

1277def removeprefix(text: str, prefix: str) -> str: 

1278 r""" 

1279 Remove `prefix` from start of `text` if present. 

1280 

1281 Backport of `str.removeprefix` for Python versions less than 3.9. 

1282 

1283 Args: 

1284 text: A string to remove the prefix from. 

1285 prefix: The string to remove from the beginning of `text`. 

1286 """ 

1287 if text.startswith(prefix): 

1288 return text[len(prefix):] 

1289 return text