Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/IPython/utils/text.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

219 statements  

1""" 

2Utilities for working with strings and text. 

3 

4Inheritance diagram: 

5 

6.. inheritance-diagram:: IPython.utils.text 

7 :parts: 3 

8""" 

9 

10import os 

11import re 

12import string 

13import sys 

14import textwrap 

15import warnings 

16from string import Formatter 

17from pathlib import Path 

18 

19from typing import ( 

20 List, 

21 Dict, 

22 Tuple, 

23 Optional, 

24 cast, 

25 Sequence, 

26 Mapping, 

27 Any, 

28 Union, 

29 Callable, 

30 Iterator, 

31 TypeVar, 

32) 

33 

34if sys.version_info < (3, 12): 

35 from typing_extensions import Self 

36else: 

37 from typing import Self 

38 

39 

40class LSString(str): 

41 """String derivative with a special access attributes. 

42 

43 These are normal strings, but with the special attributes: 

44 

45 .l (or .list) : value as list (split on newlines). 

46 .n (or .nlstr): original value (the string itself). 

47 .s (or .spstr): value as whitespace-separated string. 

48 .p (or .paths): list of path objects (requires path.py package) 

49 

50 Any values which require transformations are computed only once and 

51 cached. 

52 

53 Such strings are very useful to efficiently interact with the shell, which 

54 typically only understands whitespace-separated options for commands.""" 

55 

56 __list: List[str] 

57 __spstr: str 

58 __paths: List[Path] 

59 

60 def get_list(self) -> List[str]: 

61 try: 

62 return self.__list 

63 except AttributeError: 

64 self.__list = self.split('\n') 

65 return self.__list 

66 

67 l = list = property(get_list) 

68 

69 def get_spstr(self) -> str: 

70 try: 

71 return self.__spstr 

72 except AttributeError: 

73 self.__spstr = self.replace('\n',' ') 

74 return self.__spstr 

75 

76 s = spstr = property(get_spstr) 

77 

78 def get_nlstr(self) -> Self: 

79 return self 

80 

81 n = nlstr = property(get_nlstr) 

82 

83 def get_paths(self) -> List[Path]: 

84 try: 

85 return self.__paths 

86 except AttributeError: 

87 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)] 

88 return self.__paths 

89 

90 p = paths = property(get_paths) 

91 

92# FIXME: We need to reimplement type specific displayhook and then add this 

93# back as a custom printer. This should also be moved outside utils into the 

94# core. 

95 

96# def print_lsstring(arg): 

97# """ Prettier (non-repr-like) and more informative printer for LSString """ 

98# print("LSString (.p, .n, .l, .s available). Value:") 

99# print(arg) 

100# 

101# 

102# print_lsstring = result_display.register(LSString)(print_lsstring) 

103 

104 

105class SList(list): 

106 """List derivative with a special access attributes. 

107 

108 These are normal lists, but with the special attributes: 

109 

110 * .l (or .list) : value as list (the list itself). 

111 * .n (or .nlstr): value as a string, joined on newlines. 

112 * .s (or .spstr): value as a string, joined on spaces. 

113 * .p (or .paths): list of path objects (requires path.py package) 

114 

115 Any values which require transformations are computed only once and 

116 cached.""" 

117 

118 __spstr: str 

119 __nlstr: str 

120 __paths: List[Path] 

121 

122 def get_list(self) -> Self: 

123 return self 

124 

125 l = list = property(get_list) 

126 

127 def get_spstr(self) -> str: 

128 try: 

129 return self.__spstr 

130 except AttributeError: 

131 self.__spstr = ' '.join(self) 

132 return self.__spstr 

133 

134 s = spstr = property(get_spstr) 

135 

136 def get_nlstr(self) -> str: 

137 try: 

138 return self.__nlstr 

139 except AttributeError: 

140 self.__nlstr = '\n'.join(self) 

141 return self.__nlstr 

142 

143 n = nlstr = property(get_nlstr) 

144 

145 def get_paths(self) -> List[Path]: 

146 try: 

147 return self.__paths 

148 except AttributeError: 

149 self.__paths = [Path(p) for p in self if os.path.exists(p)] 

150 return self.__paths 

151 

152 p = paths = property(get_paths) 

153 

154 def grep( 

155 self, 

156 pattern: Union[str, Callable[[Any], re.Match[str] | None]], 

157 prune: bool = False, 

158 field: Optional[int] = None, 

159 ) -> Self: 

160 """Return all strings matching 'pattern' (a regex or callable) 

161 

162 This is case-insensitive. If prune is true, return all items 

163 NOT matching the pattern. 

164 

165 If field is specified, the match must occur in the specified 

166 whitespace-separated field. 

167 

168 Examples:: 

169 

170 a.grep( lambda x: x.startswith('C') ) 

171 a.grep('Cha.*log', prune=1) 

172 a.grep('chm', field=-1) 

173 """ 

174 

175 def match_target(s: str) -> str: 

176 if field is None: 

177 return s 

178 parts = s.split() 

179 try: 

180 tgt = parts[field] 

181 return tgt 

182 except IndexError: 

183 return "" 

184 

185 if isinstance(pattern, str): 

186 pred = lambda x : re.search(pattern, x, re.IGNORECASE) 

187 else: 

188 pred = pattern 

189 if not prune: 

190 return type(self)([el for el in self if pred(match_target(el))]) 

191 else: 

192 return type(self)([el for el in self if not pred(match_target(el))]) 

193 

194 def fields(self, *fields: List[str]) -> List[List[str]]: 

195 """Collect whitespace-separated fields from string list 

196 

197 Allows quick awk-like usage of string lists. 

198 

199 Example data (in var a, created by 'a = !ls -l'):: 

200 

201 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog 

202 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython 

203 

204 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']`` 

205 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']`` 

206 (note the joining by space). 

207 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']`` 

208 

209 IndexErrors are ignored. 

210 

211 Without args, fields() just split()'s the strings. 

212 """ 

213 if len(fields) == 0: 

214 return [el.split() for el in self] 

215 

216 res = SList() 

217 for el in [f.split() for f in self]: 

218 lineparts = [] 

219 

220 for fd in fields: 

221 try: 

222 lineparts.append(el[fd]) 

223 except IndexError: 

224 pass 

225 if lineparts: 

226 res.append(" ".join(lineparts)) 

227 

228 return res 

229 

230 def sort( # type:ignore[override] 

231 self, 

232 field: Optional[List[str]] = None, 

233 nums: bool = False, 

234 ) -> Self: 

235 """sort by specified fields (see fields()) 

236 

237 Example:: 

238 

239 a.sort(1, nums = True) 

240 

241 Sorts a by second field, in numerical order (so that 21 > 3) 

242 

243 """ 

244 

245 #decorate, sort, undecorate 

246 if field is not None: 

247 dsu = [[SList([line]).fields(field), line] for line in self] 

248 else: 

249 dsu = [[line, line] for line in self] 

250 if nums: 

251 for i in range(len(dsu)): 

252 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()]) 

253 try: 

254 n = int(numstr) 

255 except ValueError: 

256 n = 0 

257 dsu[i][0] = n 

258 

259 

260 dsu.sort() 

261 return type(self)([t[1] for t in dsu]) 

262 

263 

264def indent(instr: str, nspaces: int = 4, ntabs: int = 0, flatten: bool = False) -> str: 

265 """Indent a string a given number of spaces or tabstops. 

266 

267 indent(str, nspaces=4, ntabs=0) -> indent str by ntabs+nspaces. 

268 

269 Parameters 

270 ---------- 

271 instr : basestring 

272 The string to be indented. 

273 nspaces : int (default: 4) 

274 The number of spaces to be indented. 

275 ntabs : int (default: 0) 

276 The number of tabs to be indented. 

277 flatten : bool (default: False) 

278 Whether to scrub existing indentation. If True, all lines will be 

279 aligned to the same indentation. If False, existing indentation will 

280 be strictly increased. 

281 

282 Returns 

283 ------- 

284 str : string indented by ntabs and nspaces. 

285 

286 """ 

287 ind = "\t" * ntabs + " " * nspaces 

288 if flatten: 

289 pat = re.compile(r'^\s*', re.MULTILINE) 

290 else: 

291 pat = re.compile(r'^', re.MULTILINE) 

292 outstr = re.sub(pat, ind, instr) 

293 if outstr.endswith(os.linesep+ind): 

294 return outstr[:-len(ind)] 

295 else: 

296 return outstr 

297 

298 

299def list_strings(arg: Union[str, List[str]]) -> List[str]: 

300 """Always return a list of strings, given a string or list of strings 

301 as input. 

302 

303 Examples 

304 -------- 

305 :: 

306 

307 In [7]: list_strings('A single string') 

308 Out[7]: ['A single string'] 

309 

310 In [8]: list_strings(['A single string in a list']) 

311 Out[8]: ['A single string in a list'] 

312 

313 In [9]: list_strings(['A','list','of','strings']) 

314 Out[9]: ['A', 'list', 'of', 'strings'] 

315 """ 

316 

317 if isinstance(arg, str): 

318 return [arg] 

319 else: 

320 return arg 

321 

322 

323def marquee(txt: str = "", width: int = 78, mark: str = "*") -> str: 

324 """Return the input string centered in a 'marquee'. 

325 

326 Examples 

327 -------- 

328 :: 

329 

330 In [16]: marquee('A test',40) 

331 Out[16]: '**************** A test ****************' 

332 

333 In [17]: marquee('A test',40,'-') 

334 Out[17]: '---------------- A test ----------------' 

335 

336 In [18]: marquee('A test',40,' ') 

337 Out[18]: ' A test ' 

338 

339 """ 

340 if not txt: 

341 return (mark*width)[:width] 

342 nmark = (width-len(txt)-2)//len(mark)//2 

343 if nmark < 0: nmark =0 

344 marks = mark*nmark 

345 return '%s %s %s' % (marks,txt,marks) 

346 

347 

348def format_screen(strng: str) -> str: 

349 """Format a string for screen printing. 

350 

351 This removes some latex-type format codes.""" 

352 # Paragraph continue 

353 par_re = re.compile(r'\\$',re.MULTILINE) 

354 strng = par_re.sub('',strng) 

355 return strng 

356 

357 

358def dedent(text: str) -> str: 

359 """Equivalent of textwrap.dedent that ignores unindented first line. 

360 

361 This means it will still dedent strings like: 

362 '''foo 

363 is a bar 

364 ''' 

365 

366 For use in wrap_paragraphs. 

367 """ 

368 

369 if text.startswith('\n'): 

370 # text starts with blank line, don't ignore the first line 

371 return textwrap.dedent(text) 

372 

373 # split first line 

374 splits = text.split('\n',1) 

375 if len(splits) == 1: 

376 # only one line 

377 return textwrap.dedent(text) 

378 

379 first, rest = splits 

380 # dedent everything but the first line 

381 rest = textwrap.dedent(rest) 

382 return '\n'.join([first, rest]) 

383 

384 

385def strip_email_quotes(text: str) -> str: 

386 """Strip leading email quotation characters ('>'). 

387 

388 Removes any combination of leading '>' interspersed with whitespace that 

389 appears *identically* in all lines of the input text. 

390 

391 Parameters 

392 ---------- 

393 text : str 

394 

395 Examples 

396 -------- 

397 

398 Simple uses:: 

399 

400 In [2]: strip_email_quotes('> > text') 

401 Out[2]: 'text' 

402 

403 In [3]: strip_email_quotes('> > text\\n> > more') 

404 Out[3]: 'text\\nmore' 

405 

406 Note how only the common prefix that appears in all lines is stripped:: 

407 

408 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...') 

409 Out[4]: '> text\\n> more\\nmore...' 

410 

411 So if any line has no quote marks ('>'), then none are stripped from any 

412 of them :: 

413 

414 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different') 

415 Out[5]: '> > text\\n> > more\\nlast different' 

416 """ 

417 lines = text.splitlines() 

418 strip_len = 0 

419 

420 for characters in zip(*lines): 

421 # Check if all characters in this position are the same 

422 if len(set(characters)) > 1: 

423 break 

424 prefix_char = characters[0] 

425 

426 if prefix_char in string.whitespace or prefix_char == ">": 

427 strip_len += 1 

428 else: 

429 break 

430 

431 text = "\n".join([ln[strip_len:] for ln in lines]) 

432 return text 

433 

434 

435class EvalFormatter(Formatter): 

436 """A String Formatter that allows evaluation of simple expressions. 

437 

438 Note that this version interprets a `:` as specifying a format string (as per 

439 standard string formatting), so if slicing is required, you must explicitly 

440 create a slice. 

441 

442 Note that on Python 3.14+ this version interprets `[]` as indexing operator 

443 so you need to use generators instead of list comprehensions, for example: 

444 `list(i for i in range(10))`. 

445 

446 This is to be used in templating cases, such as the parallel batch 

447 script templates, where simple arithmetic on arguments is useful. 

448 

449 Examples 

450 -------- 

451 :: 

452 

453 In [1]: f = EvalFormatter() 

454 In [2]: f.format('{n//4}', n=8) 

455 Out[2]: '2' 

456 

457 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello") 

458 Out[3]: 'll' 

459 """ 

460 

461 def get_field(self, name: str, args: Any, kwargs: Any) -> Tuple[Any, str]: 

462 v = eval(name, kwargs, kwargs) 

463 return v, name 

464 

465#XXX: As of Python 3.4, the format string parsing no longer splits on a colon 

466# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and 

467# above, it should be possible to remove FullEvalFormatter. 

468 

469class FullEvalFormatter(Formatter): 

470 """A String Formatter that allows evaluation of simple expressions. 

471  

472 Any time a format key is not found in the kwargs, 

473 it will be tried as an expression in the kwargs namespace. 

474  

475 Note that this version allows slicing using [1:2], so you cannot specify 

476 a format string. Use :class:`EvalFormatter` to permit format strings. 

477  

478 Examples 

479 -------- 

480 :: 

481 

482 In [1]: f = FullEvalFormatter() 

483 In [2]: f.format('{n//4}', n=8) 

484 Out[2]: '2' 

485 

486 In [3]: f.format('{list(range(5))[2:4]}') 

487 Out[3]: '[2, 3]' 

488 

489 In [4]: f.format('{3*2}') 

490 Out[4]: '6' 

491 """ 

492 # copied from Formatter._vformat with minor changes to allow eval 

493 # and replace the format_spec code with slicing 

494 def vformat( 

495 self, format_string: str, args: Sequence[Any], kwargs: Mapping[str, Any] 

496 ) -> str: 

497 result = [] 

498 conversion: Optional[str] 

499 for literal_text, field_name, format_spec, conversion in self.parse( 

500 format_string 

501 ): 

502 # output the literal text 

503 if literal_text: 

504 result.append(literal_text) 

505 

506 # if there's a field, output it 

507 if field_name is not None: 

508 # this is some markup, find the object and do 

509 # the formatting 

510 

511 if format_spec: 

512 # override format spec, to allow slicing: 

513 field_name = ':'.join([field_name, format_spec]) 

514 

515 # eval the contents of the field for the object 

516 # to be formatted 

517 obj = eval(field_name, dict(kwargs)) 

518 

519 # do any conversion on the resulting object 

520 # type issue in typeshed, fined in https://github.com/python/typeshed/pull/11377 

521 obj = self.convert_field(obj, conversion) # type: ignore[arg-type] 

522 

523 # format the object and append to the result 

524 result.append(self.format_field(obj, '')) 

525 

526 return ''.join(result) 

527 

528 

529class DollarFormatter(FullEvalFormatter): 

530 """Formatter allowing Itpl style $foo replacement, for names and attribute 

531 access only. Standard {foo} replacement also works, and allows full 

532 evaluation of its arguments. 

533 

534 Examples 

535 -------- 

536 :: 

537 

538 In [1]: f = DollarFormatter() 

539 In [2]: f.format('{n//4}', n=8) 

540 Out[2]: '2' 

541 

542 In [3]: f.format('23 * 76 is $result', result=23*76) 

543 Out[3]: '23 * 76 is 1748' 

544 

545 In [4]: f.format('$a or {b}', a=1, b=2) 

546 Out[4]: '1 or 2' 

547 """ 

548 

549 _dollar_pattern_ignore_single_quote = re.compile( 

550 r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)" 

551 ) 

552 

553 def parse(self, fmt_string: str) -> Iterator[Tuple[Any, Any, Any, Any]]: # type: ignore[explicit-override] 

554 for literal_txt, field_name, format_spec, conversion in Formatter.parse( 

555 self, fmt_string 

556 ): 

557 # Find $foo patterns in the literal text. 

558 continue_from = 0 

559 txt = "" 

560 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt): 

561 new_txt, new_field = m.group(1,2) 

562 # $$foo --> $foo 

563 if new_field.startswith("$"): 

564 txt += new_txt + new_field 

565 else: 

566 yield (txt + new_txt, new_field, "", None) 

567 txt = "" 

568 continue_from = m.end() 

569 

570 # Re-yield the {foo} style pattern 

571 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion) 

572 

573 def __repr__(self) -> str: 

574 return "<DollarFormatter>" 

575 

576#----------------------------------------------------------------------------- 

577# Utils to columnize a list of string 

578#----------------------------------------------------------------------------- 

579 

580 

581def _col_chunks( 

582 l: List[int], max_rows: int, row_first: bool = False 

583) -> Iterator[List[int]]: 

584 """Yield successive max_rows-sized column chunks from l.""" 

585 if row_first: 

586 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0) 

587 for i in range(ncols): 

588 yield [l[j] for j in range(i, len(l), ncols)] 

589 else: 

590 for i in range(0, len(l), max_rows): 

591 yield l[i:(i + max_rows)] 

592 

593 

594def _find_optimal( 

595 rlist: List[int], row_first: bool, separator_size: int, displaywidth: int 

596) -> Dict[str, Any]: 

597 """Calculate optimal info to columnize a list of string""" 

598 for max_rows in range(1, len(rlist) + 1): 

599 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first))) 

600 sumlength = sum(col_widths) 

601 ncols = len(col_widths) 

602 if sumlength + separator_size * (ncols - 1) <= displaywidth: 

603 break 

604 return {'num_columns': ncols, 

605 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0, 

606 'max_rows': max_rows, 

607 'column_widths': col_widths 

608 } 

609 

610 

611T = TypeVar("T") 

612 

613 

614def _get_or_default(mylist: List[T], i: int, default: T) -> T: 

615 """return list item number, or default if don't exist""" 

616 if i >= len(mylist): 

617 return default 

618 else : 

619 return mylist[i] 

620 

621 

622def get_text_list( 

623 list_: List[str], last_sep: str = " and ", sep: str = ", ", wrap_item_with: str = "" 

624) -> str: 

625 """ 

626 Return a string with a natural enumeration of items 

627 

628 >>> get_text_list(['a', 'b', 'c', 'd']) 

629 'a, b, c and d' 

630 >>> get_text_list(['a', 'b', 'c'], ' or ') 

631 'a, b or c' 

632 >>> get_text_list(['a', 'b', 'c'], ', ') 

633 'a, b, c' 

634 >>> get_text_list(['a', 'b'], ' or ') 

635 'a or b' 

636 >>> get_text_list(['a']) 

637 'a' 

638 >>> get_text_list([]) 

639 '' 

640 >>> get_text_list(['a', 'b'], wrap_item_with="`") 

641 '`a` and `b`' 

642 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ") 

643 'a + b + c = d' 

644 """ 

645 if len(list_) == 0: 

646 return '' 

647 if wrap_item_with: 

648 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for 

649 item in list_] 

650 if len(list_) == 1: 

651 return list_[0] 

652 return '%s%s%s' % ( 

653 sep.join(i for i in list_[:-1]), 

654 last_sep, list_[-1])