Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/IPython/utils/text.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

218 statements  

1""" 

2Utilities for working with strings and text. 

3 

4Inheritance diagram: 

5 

6.. inheritance-diagram:: IPython.utils.text 

7 :parts: 3 

8""" 

9 

10import os 

11import re 

12import string 

13import sys 

14import textwrap 

15import warnings 

16from string import Formatter 

17from pathlib import Path 

18 

19from typing import ( 

20 List, 

21 Dict, 

22 Tuple, 

23 Optional, 

24 cast, 

25 Any, 

26 Union, 

27 TypeVar, 

28) 

29from collections.abc import Sequence, Mapping, Callable, Iterator 

30 

31from typing import Self 

32 

33 

34class LSString(str): 

35 """String derivative with a special access attributes. 

36 

37 These are normal strings, but with the special attributes: 

38 

39 .l (or .list) : value as list (split on newlines). 

40 .n (or .nlstr): original value (the string itself). 

41 .s (or .spstr): value as whitespace-separated string. 

42 .p (or .paths): list of path objects (requires path.py package) 

43 

44 Any values which require transformations are computed only once and 

45 cached. 

46 

47 Such strings are very useful to efficiently interact with the shell, which 

48 typically only understands whitespace-separated options for commands.""" 

49 

50 __list: List[str] 

51 __spstr: str 

52 __paths: List[Path] 

53 

54 def get_list(self) -> List[str]: 

55 try: 

56 return self.__list 

57 except AttributeError: 

58 self.__list = self.split('\n') 

59 return self.__list 

60 

61 l = list = property(get_list) 

62 

63 def get_spstr(self) -> str: 

64 try: 

65 return self.__spstr 

66 except AttributeError: 

67 self.__spstr = self.replace('\n',' ') 

68 return self.__spstr 

69 

70 s = spstr = property(get_spstr) 

71 

72 def get_nlstr(self) -> Self: 

73 return self 

74 

75 n = nlstr = property(get_nlstr) 

76 

77 def get_paths(self) -> List[Path]: 

78 try: 

79 return self.__paths 

80 except AttributeError: 

81 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)] 

82 return self.__paths 

83 

84 p = paths = property(get_paths) 

85 

86# FIXME: We need to reimplement type specific displayhook and then add this 

87# back as a custom printer. This should also be moved outside utils into the 

88# core. 

89 

90# def print_lsstring(arg): 

91# """ Prettier (non-repr-like) and more informative printer for LSString """ 

92# print("LSString (.p, .n, .l, .s available). Value:") 

93# print(arg) 

94# 

95# 

96# print_lsstring = result_display.register(LSString)(print_lsstring) 

97 

98 

99class SList(list[Any]): 

100 """List derivative with a special access attributes. 

101 

102 These are normal lists, but with the special attributes: 

103 

104 * .l (or .list) : value as list (the list itself). 

105 * .n (or .nlstr): value as a string, joined on newlines. 

106 * .s (or .spstr): value as a string, joined on spaces. 

107 * .p (or .paths): list of path objects (requires path.py package) 

108 

109 Any values which require transformations are computed only once and 

110 cached.""" 

111 

112 __spstr: str 

113 __nlstr: str 

114 __paths: List[Path] 

115 

116 def get_list(self) -> Self: 

117 return self 

118 

119 l = list = property(get_list) 

120 

121 def get_spstr(self) -> str: 

122 try: 

123 return self.__spstr 

124 except AttributeError: 

125 self.__spstr = ' '.join(self) 

126 return self.__spstr 

127 

128 s = spstr = property(get_spstr) 

129 

130 def get_nlstr(self) -> str: 

131 try: 

132 return self.__nlstr 

133 except AttributeError: 

134 self.__nlstr = '\n'.join(self) 

135 return self.__nlstr 

136 

137 n = nlstr = property(get_nlstr) 

138 

139 def get_paths(self) -> List[Path]: 

140 try: 

141 return self.__paths 

142 except AttributeError: 

143 self.__paths = [Path(p) for p in self if os.path.exists(p)] 

144 return self.__paths 

145 

146 p = paths = property(get_paths) 

147 

148 def grep( 

149 self, 

150 pattern: Union[str, Callable[[Any], re.Match[str] | None]], 

151 prune: bool = False, 

152 field: Optional[int] = None, 

153 ) -> Self: 

154 """Return all strings matching 'pattern' (a regex or callable) 

155 

156 This is case-insensitive. If prune is true, return all items 

157 NOT matching the pattern. 

158 

159 If field is specified, the match must occur in the specified 

160 whitespace-separated field. 

161 

162 Examples:: 

163 

164 a.grep( lambda x: x.startswith('C') ) 

165 a.grep('Cha.*log', prune=1) 

166 a.grep('chm', field=-1) 

167 """ 

168 

169 def match_target(s: str) -> str: 

170 if field is None: 

171 return s 

172 parts = s.split() 

173 try: 

174 tgt = parts[field] 

175 return tgt 

176 except IndexError: 

177 return "" 

178 

179 if isinstance(pattern, str): 

180 pred = lambda x : re.search(pattern, x, re.IGNORECASE) 

181 else: 

182 pred = pattern 

183 if not prune: 

184 return type(self)([el for el in self if pred(match_target(el))]) # type: ignore [no-untyped-call] 

185 else: 

186 return type(self)([el for el in self if not pred(match_target(el))]) # type: ignore [no-untyped-call] 

187 

188 def fields(self, *fields: List[str]) -> List[List[str]]: 

189 """Collect whitespace-separated fields from string list 

190 

191 Allows quick awk-like usage of string lists. 

192 

193 Example data (in var a, created by 'a = !ls -l'):: 

194 

195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog 

196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython 

197 

198 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']`` 

199 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']`` 

200 (note the joining by space). 

201 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']`` 

202 

203 IndexErrors are ignored. 

204 

205 Without args, fields() just split()'s the strings. 

206 """ 

207 if len(fields) == 0: 

208 return [el.split() for el in self] 

209 

210 res = SList() 

211 for el in [f.split() for f in self]: 

212 lineparts = [] 

213 

214 for fd in fields: 

215 try: 

216 lineparts.append(el[fd]) 

217 except IndexError: 

218 pass 

219 if lineparts: 

220 res.append(" ".join(lineparts)) 

221 

222 return res 

223 

224 def sort( # type:ignore[override] 

225 self, 

226 field: Optional[List[str]] = None, 

227 nums: bool = False, 

228 ) -> Self: 

229 """sort by specified fields (see fields()) 

230 

231 Example:: 

232 

233 a.sort(1, nums = True) 

234 

235 Sorts a by second field, in numerical order (so that 21 > 3) 

236 

237 """ 

238 

239 #decorate, sort, undecorate 

240 if field is not None: 

241 dsu = [[SList([line]).fields(field), line] for line in self] 

242 else: 

243 dsu = [[line, line] for line in self] 

244 if nums: 

245 for i in range(len(dsu)): 

246 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()]) 

247 try: 

248 n = int(numstr) 

249 except ValueError: 

250 n = 0 

251 dsu[i][0] = n 

252 

253 

254 dsu.sort() 

255 return type(self)([t[1] for t in dsu]) 

256 

257 

258def indent(instr: str, nspaces: int = 4, ntabs: int = 0, flatten: bool = False) -> str: 

259 """Indent a string a given number of spaces or tabstops. 

260 

261 indent(str, nspaces=4, ntabs=0) -> indent str by ntabs+nspaces. 

262 

263 Parameters 

264 ---------- 

265 instr : basestring 

266 The string to be indented. 

267 nspaces : int (default: 4) 

268 The number of spaces to be indented. 

269 ntabs : int (default: 0) 

270 The number of tabs to be indented. 

271 flatten : bool (default: False) 

272 Whether to scrub existing indentation. If True, all lines will be 

273 aligned to the same indentation. If False, existing indentation will 

274 be strictly increased. 

275 

276 Returns 

277 ------- 

278 str : string indented by ntabs and nspaces. 

279 

280 """ 

281 ind = "\t" * ntabs + " " * nspaces 

282 if flatten: 

283 pat = re.compile(r'^\s*', re.MULTILINE) 

284 else: 

285 pat = re.compile(r'^', re.MULTILINE) 

286 outstr = re.sub(pat, ind, instr) 

287 if outstr.endswith(os.linesep+ind): 

288 return outstr[:-len(ind)] 

289 else: 

290 return outstr 

291 

292 

293def list_strings(arg: Union[str, List[str]]) -> List[str]: 

294 """Always return a list of strings, given a string or list of strings 

295 as input. 

296 

297 Examples 

298 -------- 

299 :: 

300 

301 In [7]: list_strings('A single string') 

302 Out[7]: ['A single string'] 

303 

304 In [8]: list_strings(['A single string in a list']) 

305 Out[8]: ['A single string in a list'] 

306 

307 In [9]: list_strings(['A','list','of','strings']) 

308 Out[9]: ['A', 'list', 'of', 'strings'] 

309 """ 

310 

311 if isinstance(arg, str): 

312 return [arg] 

313 else: 

314 return arg 

315 

316 

317def marquee(txt: str = "", width: int = 78, mark: str = "*") -> str: 

318 """Return the input string centered in a 'marquee'. 

319 

320 Examples 

321 -------- 

322 :: 

323 

324 In [16]: marquee('A test',40) 

325 Out[16]: '**************** A test ****************' 

326 

327 In [17]: marquee('A test',40,'-') 

328 Out[17]: '---------------- A test ----------------' 

329 

330 In [18]: marquee('A test',40,' ') 

331 Out[18]: ' A test ' 

332 

333 """ 

334 if not txt: 

335 return (mark*width)[:width] 

336 nmark = (width-len(txt)-2)//len(mark)//2 

337 if nmark < 0: nmark =0 

338 marks = mark*nmark 

339 return '%s %s %s' % (marks,txt,marks) 

340 

341 

342def format_screen(strng: str) -> str: 

343 """Format a string for screen printing. 

344 

345 This removes some latex-type format codes.""" 

346 # Paragraph continue 

347 par_re = re.compile(r'\\$',re.MULTILINE) 

348 strng = par_re.sub('',strng) 

349 return strng 

350 

351 

352def dedent(text: str) -> str: 

353 """Equivalent of textwrap.dedent that ignores unindented first line. 

354 

355 This means it will still dedent strings like: 

356 '''foo 

357 is a bar 

358 ''' 

359 

360 For use in wrap_paragraphs. 

361 """ 

362 

363 if text.startswith('\n'): 

364 # text starts with blank line, don't ignore the first line 

365 return textwrap.dedent(text) 

366 

367 # split first line 

368 splits = text.split('\n',1) 

369 if len(splits) == 1: 

370 # only one line 

371 return textwrap.dedent(text) 

372 

373 first, rest = splits 

374 # dedent everything but the first line 

375 rest = textwrap.dedent(rest) 

376 return '\n'.join([first, rest]) 

377 

378 

379def strip_email_quotes(text: str) -> str: 

380 """Strip leading email quotation characters ('>'). 

381 

382 Removes any combination of leading '>' interspersed with whitespace that 

383 appears *identically* in all lines of the input text. 

384 

385 Parameters 

386 ---------- 

387 text : str 

388 

389 Examples 

390 -------- 

391 

392 Simple uses:: 

393 

394 In [2]: strip_email_quotes('> > text') 

395 Out[2]: 'text' 

396 

397 In [3]: strip_email_quotes('> > text\\n> > more') 

398 Out[3]: 'text\\nmore' 

399 

400 Note how only the common prefix that appears in all lines is stripped:: 

401 

402 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...') 

403 Out[4]: '> text\\n> more\\nmore...' 

404 

405 So if any line has no quote marks ('>'), then none are stripped from any 

406 of them :: 

407 

408 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different') 

409 Out[5]: '> > text\\n> > more\\nlast different' 

410 """ 

411 lines = text.splitlines() 

412 strip_len = 0 

413 

414 for characters in zip(*lines): 

415 # Check if all characters in this position are the same 

416 if len(set(characters)) > 1: 

417 break 

418 prefix_char = characters[0] 

419 

420 if prefix_char in string.whitespace or prefix_char == ">": 

421 strip_len += 1 

422 else: 

423 break 

424 

425 text = "\n".join([ln[strip_len:] for ln in lines]) 

426 return text 

427 

428 

429class EvalFormatter(Formatter): 

430 """A String Formatter that allows evaluation of simple expressions. 

431 

432 Note that this version interprets a `:` as specifying a format string (as per 

433 standard string formatting), so if slicing is required, you must explicitly 

434 create a slice. 

435 

436 Note that on Python 3.14+ this version interprets `[]` as indexing operator 

437 so you need to use generators instead of list comprehensions, for example: 

438 `list(i for i in range(10))`. 

439 

440 This is to be used in templating cases, such as the parallel batch 

441 script templates, where simple arithmetic on arguments is useful. 

442 

443 Examples 

444 -------- 

445 :: 

446 

447 In [1]: f = EvalFormatter() 

448 In [2]: f.format('{n//4}', n=8) 

449 Out[2]: '2' 

450 

451 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello") 

452 Out[3]: 'll' 

453 """ 

454 

455 def get_field(self, name: str, args: Any, kwargs: Any) -> Tuple[Any, str]: 

456 v = eval(name, kwargs, kwargs) 

457 return v, name 

458 

459#XXX: As of Python 3.4, the format string parsing no longer splits on a colon 

460# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and 

461# above, it should be possible to remove FullEvalFormatter. 

462 

463class FullEvalFormatter(Formatter): 

464 """A String Formatter that allows evaluation of simple expressions. 

465  

466 Any time a format key is not found in the kwargs, 

467 it will be tried as an expression in the kwargs namespace. 

468  

469 Note that this version allows slicing using [1:2], so you cannot specify 

470 a format string. Use :class:`EvalFormatter` to permit format strings. 

471  

472 Examples 

473 -------- 

474 :: 

475 

476 In [1]: f = FullEvalFormatter() 

477 In [2]: f.format('{n//4}', n=8) 

478 Out[2]: '2' 

479 

480 In [3]: f.format('{list(range(5))[2:4]}') 

481 Out[3]: '[2, 3]' 

482 

483 In [4]: f.format('{3*2}') 

484 Out[4]: '6' 

485 """ 

486 # copied from Formatter._vformat with minor changes to allow eval 

487 # and replace the format_spec code with slicing 

488 def vformat( 

489 self, format_string: str, args: Sequence[Any], kwargs: Mapping[str, Any] 

490 ) -> str: 

491 result = [] 

492 conversion: Optional[str] 

493 for literal_text, field_name, format_spec, conversion in self.parse( 

494 format_string 

495 ): 

496 # output the literal text 

497 if literal_text: 

498 result.append(literal_text) 

499 

500 # if there's a field, output it 

501 if field_name is not None: 

502 # this is some markup, find the object and do 

503 # the formatting 

504 

505 if format_spec: 

506 # override format spec, to allow slicing: 

507 field_name = ':'.join([field_name, format_spec]) 

508 

509 # eval the contents of the field for the object 

510 # to be formatted 

511 obj = eval(field_name, dict(kwargs)) 

512 

513 # do any conversion on the resulting object 

514 # type issue in typeshed, fined in https://github.com/python/typeshed/pull/11377 

515 obj = self.convert_field(obj, conversion) 

516 

517 # format the object and append to the result 

518 result.append(self.format_field(obj, '')) 

519 

520 return ''.join(result) 

521 

522 

523class DollarFormatter(FullEvalFormatter): 

524 """Formatter allowing Itpl style $foo replacement, for names and attribute 

525 access only. Standard {foo} replacement also works, and allows full 

526 evaluation of its arguments. 

527 

528 Examples 

529 -------- 

530 :: 

531 

532 In [1]: f = DollarFormatter() 

533 In [2]: f.format('{n//4}', n=8) 

534 Out[2]: '2' 

535 

536 In [3]: f.format('23 * 76 is $result', result=23*76) 

537 Out[3]: '23 * 76 is 1748' 

538 

539 In [4]: f.format('$a or {b}', a=1, b=2) 

540 Out[4]: '1 or 2' 

541 """ 

542 

543 _dollar_pattern_ignore_single_quote = re.compile( 

544 r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)" 

545 ) 

546 

547 def parse(self, fmt_string: str) -> Iterator[Tuple[Any, Any, Any, Any]]: 

548 for literal_txt, field_name, format_spec, conversion in Formatter.parse( 

549 self, fmt_string 

550 ): 

551 # Find $foo patterns in the literal text. 

552 continue_from = 0 

553 txt = "" 

554 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt): 

555 new_txt, new_field = m.group(1,2) 

556 # $$foo --> $foo 

557 if new_field.startswith("$"): 

558 txt += new_txt + new_field 

559 else: 

560 yield (txt + new_txt, new_field, "", None) 

561 txt = "" 

562 continue_from = m.end() 

563 

564 # Re-yield the {foo} style pattern 

565 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion) 

566 

567 def __repr__(self) -> str: 

568 return "<DollarFormatter>" 

569 

570#----------------------------------------------------------------------------- 

571# Utils to columnize a list of string 

572#----------------------------------------------------------------------------- 

573 

574 

575def _col_chunks( 

576 l: List[int], max_rows: int, row_first: bool = False 

577) -> Iterator[List[int]]: 

578 """Yield successive max_rows-sized column chunks from l.""" 

579 if row_first: 

580 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0) 

581 for i in range(ncols): 

582 yield [l[j] for j in range(i, len(l), ncols)] 

583 else: 

584 for i in range(0, len(l), max_rows): 

585 yield l[i:(i + max_rows)] 

586 

587 

588def _find_optimal( 

589 rlist: List[int], row_first: bool, separator_size: int, displaywidth: int 

590) -> Dict[str, Any]: 

591 """Calculate optimal info to columnize a list of string""" 

592 for max_rows in range(1, len(rlist) + 1): 

593 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first))) 

594 sumlength = sum(col_widths) 

595 ncols = len(col_widths) 

596 if sumlength + separator_size * (ncols - 1) <= displaywidth: 

597 break 

598 return {'num_columns': ncols, 

599 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0, 

600 'max_rows': max_rows, 

601 'column_widths': col_widths 

602 } 

603 

604 

605T = TypeVar("T") 

606 

607 

608def _get_or_default(mylist: List[T], i: int, default: T) -> T: 

609 """return list item number, or default if don't exist""" 

610 if i >= len(mylist): 

611 return default 

612 else : 

613 return mylist[i] 

614 

615 

616def get_text_list( 

617 list_: List[str], last_sep: str = " and ", sep: str = ", ", wrap_item_with: str = "" 

618) -> str: 

619 """ 

620 Return a string with a natural enumeration of items 

621 

622 >>> get_text_list(['a', 'b', 'c', 'd']) 

623 'a, b, c and d' 

624 >>> get_text_list(['a', 'b', 'c'], ' or ') 

625 'a, b or c' 

626 >>> get_text_list(['a', 'b', 'c'], ', ') 

627 'a, b, c' 

628 >>> get_text_list(['a', 'b'], ' or ') 

629 'a or b' 

630 >>> get_text_list(['a']) 

631 'a' 

632 >>> get_text_list([]) 

633 '' 

634 >>> get_text_list(['a', 'b'], wrap_item_with="`") 

635 '`a` and `b`' 

636 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ") 

637 'a + b + c = d' 

638 """ 

639 if len(list_) == 0: 

640 return '' 

641 if wrap_item_with: 

642 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for 

643 item in list_] 

644 if len(list_) == 1: 

645 return list_[0] 

646 return '%s%s%s' % ( 

647 sep.join(i for i in list_[:-1]), 

648 last_sep, list_[-1])