Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/IPython/utils/text.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

220 statements  

1""" 

2Utilities for working with strings and text. 

3 

4Inheritance diagram: 

5 

6.. inheritance-diagram:: IPython.utils.text 

7 :parts: 3 

8""" 

9 

10import os 

11import re 

12import string 

13import sys 

14import textwrap 

15import warnings 

16from string import Formatter 

17from pathlib import Path 

18 

19from typing import ( 

20 List, 

21 Dict, 

22 Tuple, 

23 Optional, 

24 cast, 

25 Any, 

26 Union, 

27 TypeVar, 

28) 

29from collections.abc import Sequence, Mapping, Callable, Iterator 

30 

31if sys.version_info < (3, 12): 

32 from typing import Self 

33else: 

34 from typing import Self 

35 

36 

37class LSString(str): 

38 """String derivative with a special access attributes. 

39 

40 These are normal strings, but with the special attributes: 

41 

42 .l (or .list) : value as list (split on newlines). 

43 .n (or .nlstr): original value (the string itself). 

44 .s (or .spstr): value as whitespace-separated string. 

45 .p (or .paths): list of path objects (requires path.py package) 

46 

47 Any values which require transformations are computed only once and 

48 cached. 

49 

50 Such strings are very useful to efficiently interact with the shell, which 

51 typically only understands whitespace-separated options for commands.""" 

52 

53 __list: List[str] 

54 __spstr: str 

55 __paths: List[Path] 

56 

57 def get_list(self) -> List[str]: 

58 try: 

59 return self.__list 

60 except AttributeError: 

61 self.__list = self.split('\n') 

62 return self.__list 

63 

64 l = list = property(get_list) 

65 

66 def get_spstr(self) -> str: 

67 try: 

68 return self.__spstr 

69 except AttributeError: 

70 self.__spstr = self.replace('\n',' ') 

71 return self.__spstr 

72 

73 s = spstr = property(get_spstr) 

74 

75 def get_nlstr(self) -> Self: 

76 return self 

77 

78 n = nlstr = property(get_nlstr) 

79 

80 def get_paths(self) -> List[Path]: 

81 try: 

82 return self.__paths 

83 except AttributeError: 

84 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)] 

85 return self.__paths 

86 

87 p = paths = property(get_paths) 

88 

89# FIXME: We need to reimplement type specific displayhook and then add this 

90# back as a custom printer. This should also be moved outside utils into the 

91# core. 

92 

93# def print_lsstring(arg): 

94# """ Prettier (non-repr-like) and more informative printer for LSString """ 

95# print("LSString (.p, .n, .l, .s available). Value:") 

96# print(arg) 

97# 

98# 

99# print_lsstring = result_display.register(LSString)(print_lsstring) 

100 

101 

102class SList(list[Any]): 

103 """List derivative with a special access attributes. 

104 

105 These are normal lists, but with the special attributes: 

106 

107 * .l (or .list) : value as list (the list itself). 

108 * .n (or .nlstr): value as a string, joined on newlines. 

109 * .s (or .spstr): value as a string, joined on spaces. 

110 * .p (or .paths): list of path objects (requires path.py package) 

111 

112 Any values which require transformations are computed only once and 

113 cached.""" 

114 

115 __spstr: str 

116 __nlstr: str 

117 __paths: List[Path] 

118 

119 def get_list(self) -> Self: 

120 return self 

121 

122 l = list = property(get_list) 

123 

124 def get_spstr(self) -> str: 

125 try: 

126 return self.__spstr 

127 except AttributeError: 

128 self.__spstr = ' '.join(self) 

129 return self.__spstr 

130 

131 s = spstr = property(get_spstr) 

132 

133 def get_nlstr(self) -> str: 

134 try: 

135 return self.__nlstr 

136 except AttributeError: 

137 self.__nlstr = '\n'.join(self) 

138 return self.__nlstr 

139 

140 n = nlstr = property(get_nlstr) 

141 

142 def get_paths(self) -> List[Path]: 

143 try: 

144 return self.__paths 

145 except AttributeError: 

146 self.__paths = [Path(p) for p in self if os.path.exists(p)] 

147 return self.__paths 

148 

149 p = paths = property(get_paths) 

150 

151 def grep( 

152 self, 

153 pattern: Union[str, Callable[[Any], re.Match[str] | None]], 

154 prune: bool = False, 

155 field: Optional[int] = None, 

156 ) -> Self: 

157 """Return all strings matching 'pattern' (a regex or callable) 

158 

159 This is case-insensitive. If prune is true, return all items 

160 NOT matching the pattern. 

161 

162 If field is specified, the match must occur in the specified 

163 whitespace-separated field. 

164 

165 Examples:: 

166 

167 a.grep( lambda x: x.startswith('C') ) 

168 a.grep('Cha.*log', prune=1) 

169 a.grep('chm', field=-1) 

170 """ 

171 

172 def match_target(s: str) -> str: 

173 if field is None: 

174 return s 

175 parts = s.split() 

176 try: 

177 tgt = parts[field] 

178 return tgt 

179 except IndexError: 

180 return "" 

181 

182 if isinstance(pattern, str): 

183 pred = lambda x : re.search(pattern, x, re.IGNORECASE) 

184 else: 

185 pred = pattern 

186 if not prune: 

187 return type(self)([el for el in self if pred(match_target(el))]) # type: ignore [no-untyped-call] 

188 else: 

189 return type(self)([el for el in self if not pred(match_target(el))]) # type: ignore [no-untyped-call] 

190 

191 def fields(self, *fields: List[str]) -> List[List[str]]: 

192 """Collect whitespace-separated fields from string list 

193 

194 Allows quick awk-like usage of string lists. 

195 

196 Example data (in var a, created by 'a = !ls -l'):: 

197 

198 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog 

199 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython 

200 

201 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']`` 

202 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']`` 

203 (note the joining by space). 

204 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']`` 

205 

206 IndexErrors are ignored. 

207 

208 Without args, fields() just split()'s the strings. 

209 """ 

210 if len(fields) == 0: 

211 return [el.split() for el in self] 

212 

213 res = SList() 

214 for el in [f.split() for f in self]: 

215 lineparts = [] 

216 

217 for fd in fields: 

218 try: 

219 lineparts.append(el[fd]) 

220 except IndexError: 

221 pass 

222 if lineparts: 

223 res.append(" ".join(lineparts)) 

224 

225 return res 

226 

227 def sort( # type:ignore[override] 

228 self, 

229 field: Optional[List[str]] = None, 

230 nums: bool = False, 

231 ) -> Self: 

232 """sort by specified fields (see fields()) 

233 

234 Example:: 

235 

236 a.sort(1, nums = True) 

237 

238 Sorts a by second field, in numerical order (so that 21 > 3) 

239 

240 """ 

241 

242 #decorate, sort, undecorate 

243 if field is not None: 

244 dsu = [[SList([line]).fields(field), line] for line in self] 

245 else: 

246 dsu = [[line, line] for line in self] 

247 if nums: 

248 for i in range(len(dsu)): 

249 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()]) 

250 try: 

251 n = int(numstr) 

252 except ValueError: 

253 n = 0 

254 dsu[i][0] = n 

255 

256 

257 dsu.sort() 

258 return type(self)([t[1] for t in dsu]) 

259 

260 

261def indent(instr: str, nspaces: int = 4, ntabs: int = 0, flatten: bool = False) -> str: 

262 """Indent a string a given number of spaces or tabstops. 

263 

264 indent(str, nspaces=4, ntabs=0) -> indent str by ntabs+nspaces. 

265 

266 Parameters 

267 ---------- 

268 instr : basestring 

269 The string to be indented. 

270 nspaces : int (default: 4) 

271 The number of spaces to be indented. 

272 ntabs : int (default: 0) 

273 The number of tabs to be indented. 

274 flatten : bool (default: False) 

275 Whether to scrub existing indentation. If True, all lines will be 

276 aligned to the same indentation. If False, existing indentation will 

277 be strictly increased. 

278 

279 Returns 

280 ------- 

281 str : string indented by ntabs and nspaces. 

282 

283 """ 

284 ind = "\t" * ntabs + " " * nspaces 

285 if flatten: 

286 pat = re.compile(r'^\s*', re.MULTILINE) 

287 else: 

288 pat = re.compile(r'^', re.MULTILINE) 

289 outstr = re.sub(pat, ind, instr) 

290 if outstr.endswith(os.linesep+ind): 

291 return outstr[:-len(ind)] 

292 else: 

293 return outstr 

294 

295 

296def list_strings(arg: Union[str, List[str]]) -> List[str]: 

297 """Always return a list of strings, given a string or list of strings 

298 as input. 

299 

300 Examples 

301 -------- 

302 :: 

303 

304 In [7]: list_strings('A single string') 

305 Out[7]: ['A single string'] 

306 

307 In [8]: list_strings(['A single string in a list']) 

308 Out[8]: ['A single string in a list'] 

309 

310 In [9]: list_strings(['A','list','of','strings']) 

311 Out[9]: ['A', 'list', 'of', 'strings'] 

312 """ 

313 

314 if isinstance(arg, str): 

315 return [arg] 

316 else: 

317 return arg 

318 

319 

320def marquee(txt: str = "", width: int = 78, mark: str = "*") -> str: 

321 """Return the input string centered in a 'marquee'. 

322 

323 Examples 

324 -------- 

325 :: 

326 

327 In [16]: marquee('A test',40) 

328 Out[16]: '**************** A test ****************' 

329 

330 In [17]: marquee('A test',40,'-') 

331 Out[17]: '---------------- A test ----------------' 

332 

333 In [18]: marquee('A test',40,' ') 

334 Out[18]: ' A test ' 

335 

336 """ 

337 if not txt: 

338 return (mark*width)[:width] 

339 nmark = (width-len(txt)-2)//len(mark)//2 

340 if nmark < 0: nmark =0 

341 marks = mark*nmark 

342 return '%s %s %s' % (marks,txt,marks) 

343 

344 

345def format_screen(strng: str) -> str: 

346 """Format a string for screen printing. 

347 

348 This removes some latex-type format codes.""" 

349 # Paragraph continue 

350 par_re = re.compile(r'\\$',re.MULTILINE) 

351 strng = par_re.sub('',strng) 

352 return strng 

353 

354 

355def dedent(text: str) -> str: 

356 """Equivalent of textwrap.dedent that ignores unindented first line. 

357 

358 This means it will still dedent strings like: 

359 '''foo 

360 is a bar 

361 ''' 

362 

363 For use in wrap_paragraphs. 

364 """ 

365 

366 if text.startswith('\n'): 

367 # text starts with blank line, don't ignore the first line 

368 return textwrap.dedent(text) 

369 

370 # split first line 

371 splits = text.split('\n',1) 

372 if len(splits) == 1: 

373 # only one line 

374 return textwrap.dedent(text) 

375 

376 first, rest = splits 

377 # dedent everything but the first line 

378 rest = textwrap.dedent(rest) 

379 return '\n'.join([first, rest]) 

380 

381 

382def strip_email_quotes(text: str) -> str: 

383 """Strip leading email quotation characters ('>'). 

384 

385 Removes any combination of leading '>' interspersed with whitespace that 

386 appears *identically* in all lines of the input text. 

387 

388 Parameters 

389 ---------- 

390 text : str 

391 

392 Examples 

393 -------- 

394 

395 Simple uses:: 

396 

397 In [2]: strip_email_quotes('> > text') 

398 Out[2]: 'text' 

399 

400 In [3]: strip_email_quotes('> > text\\n> > more') 

401 Out[3]: 'text\\nmore' 

402 

403 Note how only the common prefix that appears in all lines is stripped:: 

404 

405 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...') 

406 Out[4]: '> text\\n> more\\nmore...' 

407 

408 So if any line has no quote marks ('>'), then none are stripped from any 

409 of them :: 

410 

411 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different') 

412 Out[5]: '> > text\\n> > more\\nlast different' 

413 """ 

414 lines = text.splitlines() 

415 strip_len = 0 

416 

417 for characters in zip(*lines): 

418 # Check if all characters in this position are the same 

419 if len(set(characters)) > 1: 

420 break 

421 prefix_char = characters[0] 

422 

423 if prefix_char in string.whitespace or prefix_char == ">": 

424 strip_len += 1 

425 else: 

426 break 

427 

428 text = "\n".join([ln[strip_len:] for ln in lines]) 

429 return text 

430 

431 

432class EvalFormatter(Formatter): 

433 """A String Formatter that allows evaluation of simple expressions. 

434 

435 Note that this version interprets a `:` as specifying a format string (as per 

436 standard string formatting), so if slicing is required, you must explicitly 

437 create a slice. 

438 

439 Note that on Python 3.14+ this version interprets `[]` as indexing operator 

440 so you need to use generators instead of list comprehensions, for example: 

441 `list(i for i in range(10))`. 

442 

443 This is to be used in templating cases, such as the parallel batch 

444 script templates, where simple arithmetic on arguments is useful. 

445 

446 Examples 

447 -------- 

448 :: 

449 

450 In [1]: f = EvalFormatter() 

451 In [2]: f.format('{n//4}', n=8) 

452 Out[2]: '2' 

453 

454 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello") 

455 Out[3]: 'll' 

456 """ 

457 

458 def get_field(self, name: str, args: Any, kwargs: Any) -> Tuple[Any, str]: 

459 v = eval(name, kwargs, kwargs) 

460 return v, name 

461 

462#XXX: As of Python 3.4, the format string parsing no longer splits on a colon 

463# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and 

464# above, it should be possible to remove FullEvalFormatter. 

465 

466class FullEvalFormatter(Formatter): 

467 """A String Formatter that allows evaluation of simple expressions. 

468  

469 Any time a format key is not found in the kwargs, 

470 it will be tried as an expression in the kwargs namespace. 

471  

472 Note that this version allows slicing using [1:2], so you cannot specify 

473 a format string. Use :class:`EvalFormatter` to permit format strings. 

474  

475 Examples 

476 -------- 

477 :: 

478 

479 In [1]: f = FullEvalFormatter() 

480 In [2]: f.format('{n//4}', n=8) 

481 Out[2]: '2' 

482 

483 In [3]: f.format('{list(range(5))[2:4]}') 

484 Out[3]: '[2, 3]' 

485 

486 In [4]: f.format('{3*2}') 

487 Out[4]: '6' 

488 """ 

489 # copied from Formatter._vformat with minor changes to allow eval 

490 # and replace the format_spec code with slicing 

491 def vformat( 

492 self, format_string: str, args: Sequence[Any], kwargs: Mapping[str, Any] 

493 ) -> str: 

494 result = [] 

495 conversion: Optional[str] 

496 for literal_text, field_name, format_spec, conversion in self.parse( 

497 format_string 

498 ): 

499 # output the literal text 

500 if literal_text: 

501 result.append(literal_text) 

502 

503 # if there's a field, output it 

504 if field_name is not None: 

505 # this is some markup, find the object and do 

506 # the formatting 

507 

508 if format_spec: 

509 # override format spec, to allow slicing: 

510 field_name = ':'.join([field_name, format_spec]) 

511 

512 # eval the contents of the field for the object 

513 # to be formatted 

514 obj = eval(field_name, dict(kwargs)) 

515 

516 # do any conversion on the resulting object 

517 # type issue in typeshed, fined in https://github.com/python/typeshed/pull/11377 

518 obj = self.convert_field(obj, conversion) 

519 

520 # format the object and append to the result 

521 result.append(self.format_field(obj, '')) 

522 

523 return ''.join(result) 

524 

525 

526class DollarFormatter(FullEvalFormatter): 

527 """Formatter allowing Itpl style $foo replacement, for names and attribute 

528 access only. Standard {foo} replacement also works, and allows full 

529 evaluation of its arguments. 

530 

531 Examples 

532 -------- 

533 :: 

534 

535 In [1]: f = DollarFormatter() 

536 In [2]: f.format('{n//4}', n=8) 

537 Out[2]: '2' 

538 

539 In [3]: f.format('23 * 76 is $result', result=23*76) 

540 Out[3]: '23 * 76 is 1748' 

541 

542 In [4]: f.format('$a or {b}', a=1, b=2) 

543 Out[4]: '1 or 2' 

544 """ 

545 

546 _dollar_pattern_ignore_single_quote = re.compile( 

547 r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)" 

548 ) 

549 

550 def parse(self, fmt_string: str) -> Iterator[Tuple[Any, Any, Any, Any]]: 

551 for literal_txt, field_name, format_spec, conversion in Formatter.parse( 

552 self, fmt_string 

553 ): 

554 # Find $foo patterns in the literal text. 

555 continue_from = 0 

556 txt = "" 

557 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt): 

558 new_txt, new_field = m.group(1,2) 

559 # $$foo --> $foo 

560 if new_field.startswith("$"): 

561 txt += new_txt + new_field 

562 else: 

563 yield (txt + new_txt, new_field, "", None) 

564 txt = "" 

565 continue_from = m.end() 

566 

567 # Re-yield the {foo} style pattern 

568 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion) 

569 

570 def __repr__(self) -> str: 

571 return "<DollarFormatter>" 

572 

573#----------------------------------------------------------------------------- 

574# Utils to columnize a list of string 

575#----------------------------------------------------------------------------- 

576 

577 

578def _col_chunks( 

579 l: List[int], max_rows: int, row_first: bool = False 

580) -> Iterator[List[int]]: 

581 """Yield successive max_rows-sized column chunks from l.""" 

582 if row_first: 

583 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0) 

584 for i in range(ncols): 

585 yield [l[j] for j in range(i, len(l), ncols)] 

586 else: 

587 for i in range(0, len(l), max_rows): 

588 yield l[i:(i + max_rows)] 

589 

590 

591def _find_optimal( 

592 rlist: List[int], row_first: bool, separator_size: int, displaywidth: int 

593) -> Dict[str, Any]: 

594 """Calculate optimal info to columnize a list of string""" 

595 for max_rows in range(1, len(rlist) + 1): 

596 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first))) 

597 sumlength = sum(col_widths) 

598 ncols = len(col_widths) 

599 if sumlength + separator_size * (ncols - 1) <= displaywidth: 

600 break 

601 return {'num_columns': ncols, 

602 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0, 

603 'max_rows': max_rows, 

604 'column_widths': col_widths 

605 } 

606 

607 

608T = TypeVar("T") 

609 

610 

611def _get_or_default(mylist: List[T], i: int, default: T) -> T: 

612 """return list item number, or default if don't exist""" 

613 if i >= len(mylist): 

614 return default 

615 else : 

616 return mylist[i] 

617 

618 

619def get_text_list( 

620 list_: List[str], last_sep: str = " and ", sep: str = ", ", wrap_item_with: str = "" 

621) -> str: 

622 """ 

623 Return a string with a natural enumeration of items 

624 

625 >>> get_text_list(['a', 'b', 'c', 'd']) 

626 'a, b, c and d' 

627 >>> get_text_list(['a', 'b', 'c'], ' or ') 

628 'a, b or c' 

629 >>> get_text_list(['a', 'b', 'c'], ', ') 

630 'a, b, c' 

631 >>> get_text_list(['a', 'b'], ' or ') 

632 'a or b' 

633 >>> get_text_list(['a']) 

634 'a' 

635 >>> get_text_list([]) 

636 '' 

637 >>> get_text_list(['a', 'b'], wrap_item_with="`") 

638 '`a` and `b`' 

639 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ") 

640 'a + b + c = d' 

641 """ 

642 if len(list_) == 0: 

643 return '' 

644 if wrap_item_with: 

645 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for 

646 item in list_] 

647 if len(list_) == 1: 

648 return list_[0] 

649 return '%s%s%s' % ( 

650 sep.join(i for i in list_[:-1]), 

651 last_sep, list_[-1])