Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/IPython/utils/text.py: 30%

1"""

2Utilities for working with strings and text.

4Inheritance diagram:

6.. inheritance-diagram:: IPython.utils.text

7 :parts: 3

8"""

10import os

11import re

12import string

13import sys

14import textwrap

15import warnings

16from string import Formatter

17from pathlib import Path

19from typing import (

20 List,

21 Dict,

22 Tuple,

23 Optional,

24 cast,

25 Sequence,

26 Mapping,

27 Any,

28 Union,

29 Callable,

30 Iterator,

31 TypeVar,

32)

34if sys.version_info < (3, 12):

35 from typing_extensions import Self

36else:

37 from typing import Self

40class LSString(str):

41 """String derivative with a special access attributes.

43 These are normal strings, but with the special attributes:

45 .l (or .list) : value as list (split on newlines).

46 .n (or .nlstr): original value (the string itself).

47 .s (or .spstr): value as whitespace-separated string.

48 .p (or .paths): list of path objects (requires path.py package)

50 Any values which require transformations are computed only once and

51 cached.

53 Such strings are very useful to efficiently interact with the shell, which

54 typically only understands whitespace-separated options for commands."""

56 __list: List[str]

57 __spstr: str

58 __paths: List[Path]

60 def get_list(self) -> List[str]:

61 try:

62 return self.__list

63 except AttributeError:

64 self.__list = self.split('\n')

65 return self.__list

67 l = list = property(get_list)

69 def get_spstr(self) -> str:

70 try:

71 return self.__spstr

72 except AttributeError:

73 self.__spstr = self.replace('\n',' ')

74 return self.__spstr

76 s = spstr = property(get_spstr)

78 def get_nlstr(self) -> Self:

79 return self

81 n = nlstr = property(get_nlstr)

83 def get_paths(self) -> List[Path]:

84 try:

85 return self.__paths

86 except AttributeError:

87 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]

88 return self.__paths

90 p = paths = property(get_paths)

92# FIXME: We need to reimplement type specific displayhook and then add this

93# back as a custom printer. This should also be moved outside utils into the

94# core.

96# def print_lsstring(arg):

97# """ Prettier (non-repr-like) and more informative printer for LSString """

98# print("LSString (.p, .n, .l, .s available). Value:")

99# print(arg)

100#

101#

102# print_lsstring = result_display.register(LSString)(print_lsstring)

103

104

105class SList(list):

106 """List derivative with a special access attributes.

107

108 These are normal lists, but with the special attributes:

109

110 * .l (or .list) : value as list (the list itself).

111 * .n (or .nlstr): value as a string, joined on newlines.

112 * .s (or .spstr): value as a string, joined on spaces.

113 * .p (or .paths): list of path objects (requires path.py package)

114

115 Any values which require transformations are computed only once and

116 cached."""

117

118 __spstr: str

119 __nlstr: str

120 __paths: List[Path]

121

122 def get_list(self) -> Self:

123 return self

124

125 l = list = property(get_list)

126

127 def get_spstr(self) -> str:

128 try:

129 return self.__spstr

130 except AttributeError:

131 self.__spstr = ' '.join(self)

132 return self.__spstr

133

134 s = spstr = property(get_spstr)

135

136 def get_nlstr(self) -> str:

137 try:

138 return self.__nlstr

139 except AttributeError:

140 self.__nlstr = '\n'.join(self)

141 return self.__nlstr

142

143 n = nlstr = property(get_nlstr)

144

145 def get_paths(self) -> List[Path]:

146 try:

147 return self.__paths

148 except AttributeError:

149 self.__paths = [Path(p) for p in self if os.path.exists(p)]

150 return self.__paths

151

152 p = paths = property(get_paths)

153

154 def grep(

155 self,

156 pattern: Union[str, Callable[[Any], re.Match[str] | None]],

157 prune: bool = False,

158 field: Optional[int] = None,

159 ) -> Self:

160 """Return all strings matching 'pattern' (a regex or callable)

161

162 This is case-insensitive. If prune is true, return all items

163 NOT matching the pattern.

164

165 If field is specified, the match must occur in the specified

166 whitespace-separated field.

167

168 Examples::

169

170 a.grep( lambda x: x.startswith('C') )

171 a.grep('Cha.*log', prune=1)

172 a.grep('chm', field=-1)

173 """

174

175 def match_target(s: str) -> str:

176 if field is None:

177 return s

178 parts = s.split()

179 try:

180 tgt = parts[field]

181 return tgt

182 except IndexError:

183 return ""

184

185 if isinstance(pattern, str):

186 pred = lambda x : re.search(pattern, x, re.IGNORECASE)

187 else:

188 pred = pattern

189 if not prune:

190 return type(self)([el for el in self if pred(match_target(el))])

191 else:

192 return type(self)([el for el in self if not pred(match_target(el))])

193

194 def fields(self, *fields: List[str]) -> List[List[str]]:

195 """Collect whitespace-separated fields from string list

196

197 Allows quick awk-like usage of string lists.

198

199 Example data (in var a, created by 'a = !ls -l')::

200

201 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog

202 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython

203

204 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``

205 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``

206 (note the joining by space).

207 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``

208

209 IndexErrors are ignored.

210

211 Without args, fields() just split()'s the strings.

212 """

213 if len(fields) == 0:

214 return [el.split() for el in self]

215

216 res = SList()

217 for el in [f.split() for f in self]:

218 lineparts = []

219

220 for fd in fields:

221 try:

222 lineparts.append(el[fd])

223 except IndexError:

224 pass

225 if lineparts:

226 res.append(" ".join(lineparts))

227

228 return res

229

230 def sort( # type:ignore[override]

231 self,

232 field: Optional[List[str]] = None,

233 nums: bool = False,

234 ) -> Self:

235 """sort by specified fields (see fields())

236

237 Example::

238

239 a.sort(1, nums = True)

240

241 Sorts a by second field, in numerical order (so that 21 > 3)

242

243 """

244

245 #decorate, sort, undecorate

246 if field is not None:

247 dsu = [[SList([line]).fields(field), line] for line in self]

248 else:

249 dsu = [[line, line] for line in self]

250 if nums:

251 for i in range(len(dsu)):

252 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])

253 try:

254 n = int(numstr)

255 except ValueError:

256 n = 0

257 dsu[i][0] = n

258

259

260 dsu.sort()

261 return type(self)([t[1] for t in dsu])

262

263

264def indent(instr: str, nspaces: int = 4, ntabs: int = 0, flatten: bool = False) -> str:

265 """Indent a string a given number of spaces or tabstops.

266

267 indent(str, nspaces=4, ntabs=0) -> indent str by ntabs+nspaces.

268

269 Parameters

270 ----------

271 instr : basestring

272 The string to be indented.

273 nspaces : int (default: 4)

274 The number of spaces to be indented.

275 ntabs : int (default: 0)

276 The number of tabs to be indented.

277 flatten : bool (default: False)

278 Whether to scrub existing indentation. If True, all lines will be

279 aligned to the same indentation. If False, existing indentation will

280 be strictly increased.

281

282 Returns

283 -------

284 str : string indented by ntabs and nspaces.

285

286 """

287 ind = "\t" * ntabs + " " * nspaces

288 if flatten:

289 pat = re.compile(r'^\s*', re.MULTILINE)

290 else:

291 pat = re.compile(r'^', re.MULTILINE)

292 outstr = re.sub(pat, ind, instr)

293 if outstr.endswith(os.linesep+ind):

294 return outstr[:-len(ind)]

295 else:

296 return outstr

297

298

299def list_strings(arg: Union[str, List[str]]) -> List[str]:

300 """Always return a list of strings, given a string or list of strings

301 as input.

302

303 Examples

304 --------

305 ::

306

307 In [7]: list_strings('A single string')

308 Out[7]: ['A single string']

309

310 In [8]: list_strings(['A single string in a list'])

311 Out[8]: ['A single string in a list']

312

313 In [9]: list_strings(['A','list','of','strings'])

314 Out[9]: ['A', 'list', 'of', 'strings']

315 """

316

317 if isinstance(arg, str):

318 return [arg]

319 else:

320 return arg

321

322

323def marquee(txt: str = "", width: int = 78, mark: str = "*") -> str:

324 """Return the input string centered in a 'marquee'.

325

326 Examples

327 --------

328 ::

329

330 In [16]: marquee('A test',40)

331 Out[16]: '**************** A test ****************'

332

333 In [17]: marquee('A test',40,'-')

334 Out[17]: '---------------- A test ----------------'

335

336 In [18]: marquee('A test',40,' ')

337 Out[18]: ' A test '

338

339 """

340 if not txt:

341 return (mark*width)[:width]

342 nmark = (width-len(txt)-2)//len(mark)//2

343 if nmark < 0: nmark =0

344 marks = mark*nmark

345 return '%s %s %s' % (marks,txt,marks)

346

347

348def format_screen(strng: str) -> str:

349 """Format a string for screen printing.

350

351 This removes some latex-type format codes."""

352 # Paragraph continue

353 par_re = re.compile(r'\\$',re.MULTILINE)

354 strng = par_re.sub('',strng)

355 return strng

356

357

358def dedent(text: str) -> str:

359 """Equivalent of textwrap.dedent that ignores unindented first line.

360

361 This means it will still dedent strings like:

362 '''foo

363 is a bar

364 '''

365

366 For use in wrap_paragraphs.

367 """

368

369 if text.startswith('\n'):

370 # text starts with blank line, don't ignore the first line

371 return textwrap.dedent(text)

372

373 # split first line

374 splits = text.split('\n',1)

375 if len(splits) == 1:

376 # only one line

377 return textwrap.dedent(text)

378

379 first, rest = splits

380 # dedent everything but the first line

381 rest = textwrap.dedent(rest)

382 return '\n'.join([first, rest])

383

384

385def strip_email_quotes(text: str) -> str:

386 """Strip leading email quotation characters ('>').

387

388 Removes any combination of leading '>' interspersed with whitespace that

389 appears *identically* in all lines of the input text.

390

391 Parameters

392 ----------

393 text : str

394

395 Examples

396 --------

397

398 Simple uses::

399

400 In [2]: strip_email_quotes('> > text')

401 Out[2]: 'text'

402

403 In [3]: strip_email_quotes('> > text\\n> > more')

404 Out[3]: 'text\\nmore'

405

406 Note how only the common prefix that appears in all lines is stripped::

407

408 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')

409 Out[4]: '> text\\n> more\\nmore...'

410

411 So if any line has no quote marks ('>'), then none are stripped from any

412 of them ::

413

414 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')

415 Out[5]: '> > text\\n> > more\\nlast different'

416 """

417 lines = text.splitlines()

418 strip_len = 0

419

420 for characters in zip(*lines):

421 # Check if all characters in this position are the same

422 if len(set(characters)) > 1:

423 break

424 prefix_char = characters[0]

425

426 if prefix_char in string.whitespace or prefix_char == ">":

427 strip_len += 1

428 else:

429 break

430

431 text = "\n".join([ln[strip_len:] for ln in lines])

432 return text

433

434

435class EvalFormatter(Formatter):

436 """A String Formatter that allows evaluation of simple expressions.

437

438 Note that this version interprets a `:` as specifying a format string (as per

439 standard string formatting), so if slicing is required, you must explicitly

440 create a slice.

441

442 This is to be used in templating cases, such as the parallel batch

443 script templates, where simple arithmetic on arguments is useful.

444

445 Examples

446 --------

447 ::

448

449 In [1]: f = EvalFormatter()

450 In [2]: f.format('{n//4}', n=8)

451 Out[2]: '2'

452

453 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")

454 Out[3]: 'll'

455 """

456

457 def get_field(self, name: str, args: Any, kwargs: Any) -> Tuple[Any, str]:

458 v = eval(name, kwargs)

459 return v, name

460

461#XXX: As of Python 3.4, the format string parsing no longer splits on a colon

462# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and

463# above, it should be possible to remove FullEvalFormatter.

464

465class FullEvalFormatter(Formatter):

466 """A String Formatter that allows evaluation of simple expressions.

467

468 Any time a format key is not found in the kwargs,

469 it will be tried as an expression in the kwargs namespace.

470

471 Note that this version allows slicing using [1:2], so you cannot specify

472 a format string. Use :class:`EvalFormatter` to permit format strings.

473

474 Examples

475 --------

476 ::

477

478 In [1]: f = FullEvalFormatter()

479 In [2]: f.format('{n//4}', n=8)

480 Out[2]: '2'

481

482 In [3]: f.format('{list(range(5))[2:4]}')

483 Out[3]: '[2, 3]'

484

485 In [4]: f.format('{3*2}')

486 Out[4]: '6'

487 """

488 # copied from Formatter._vformat with minor changes to allow eval

489 # and replace the format_spec code with slicing

490 def vformat(

491 self, format_string: str, args: Sequence[Any], kwargs: Mapping[str, Any]

492 ) -> str:

493 result = []

494 conversion: Optional[str]

495 for literal_text, field_name, format_spec, conversion in self.parse(

496 format_string

497 ):

498 # output the literal text

499 if literal_text:

500 result.append(literal_text)

501

502 # if there's a field, output it

503 if field_name is not None:

504 # this is some markup, find the object and do

505 # the formatting

506

507 if format_spec:

508 # override format spec, to allow slicing:

509 field_name = ':'.join([field_name, format_spec])

510

511 # eval the contents of the field for the object

512 # to be formatted

513 obj = eval(field_name, dict(kwargs))

514

515 # do any conversion on the resulting object

516 # type issue in typeshed, fined in https://github.com/python/typeshed/pull/11377

517 obj = self.convert_field(obj, conversion) # type: ignore[arg-type]

518

519 # format the object and append to the result

520 result.append(self.format_field(obj, ''))

521

522 return ''.join(result)

523

524

525class DollarFormatter(FullEvalFormatter):

526 """Formatter allowing Itpl style $foo replacement, for names and attribute

527 access only. Standard {foo} replacement also works, and allows full

528 evaluation of its arguments.

529

530 Examples

531 --------

532 ::

533

534 In [1]: f = DollarFormatter()

535 In [2]: f.format('{n//4}', n=8)

536 Out[2]: '2'

537

538 In [3]: f.format('23 * 76 is $result', result=23*76)

539 Out[3]: '23 * 76 is 1748'

540

541 In [4]: f.format('$a or {b}', a=1, b=2)

542 Out[4]: '1 or 2'

543 """

544

545 _dollar_pattern_ignore_single_quote = re.compile(

546 r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)"

547 )

548

549 def parse(self, fmt_string: str) -> Iterator[Tuple[Any, Any, Any, Any]]: # type: ignore[explicit-override]

550 for literal_txt, field_name, format_spec, conversion in Formatter.parse(

551 self, fmt_string

552 ):

553 # Find $foo patterns in the literal text.

554 continue_from = 0

555 txt = ""

556 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):

557 new_txt, new_field = m.group(1,2)

558 # $$foo --> $foo

559 if new_field.startswith("$"):

560 txt += new_txt + new_field

561 else:

562 yield (txt + new_txt, new_field, "", None)

563 txt = ""

564 continue_from = m.end()

565

566 # Re-yield the {foo} style pattern

567 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)

568

569 def __repr__(self) -> str:

570 return "<DollarFormatter>"

571

572#-----------------------------------------------------------------------------

573# Utils to columnize a list of string

574#-----------------------------------------------------------------------------

575

576

577def _col_chunks(

578 l: List[int], max_rows: int, row_first: bool = False

579) -> Iterator[List[int]]:

580 """Yield successive max_rows-sized column chunks from l."""

581 if row_first:

582 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)

583 for i in range(ncols):

584 yield [l[j] for j in range(i, len(l), ncols)]

585 else:

586 for i in range(0, len(l), max_rows):

587 yield l[i:(i + max_rows)]

588

589

590def _find_optimal(

591 rlist: List[int], row_first: bool, separator_size: int, displaywidth: int

592) -> Dict[str, Any]:

593 """Calculate optimal info to columnize a list of string"""

594 for max_rows in range(1, len(rlist) + 1):

595 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))

596 sumlength = sum(col_widths)

597 ncols = len(col_widths)

598 if sumlength + separator_size * (ncols - 1) <= displaywidth:

599 break

600 return {'num_columns': ncols,

601 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,

602 'max_rows': max_rows,

603 'column_widths': col_widths

604 }

607T = TypeVar("T")

610def _get_or_default(mylist: List[T], i: int, default: T) -> T:

611 """return list item number, or default if don't exist"""

612 if i >= len(mylist):

613 return default

614 else :

615 return mylist[i]

616

617

618def get_text_list(

619 list_: List[str], last_sep: str = " and ", sep: str = ", ", wrap_item_with: str = ""

620) -> str:

621 """

622 Return a string with a natural enumeration of items

623

624 >>> get_text_list(['a', 'b', 'c', 'd'])

625 'a, b, c and d'

626 >>> get_text_list(['a', 'b', 'c'], ' or ')

627 'a, b or c'

628 >>> get_text_list(['a', 'b', 'c'], ', ')

629 'a, b, c'

630 >>> get_text_list(['a', 'b'], ' or ')

631 'a or b'

632 >>> get_text_list(['a'])

633 'a'

634 >>> get_text_list([])

635 ''

636 >>> get_text_list(['a', 'b'], wrap_item_with="`")

637 '`a` and `b`'

638 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")

639 'a + b + c = d'

640 """

641 if len(list_) == 0:

642 return ''

643 if wrap_item_with:

644 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for

645 item in list_]

646 if len(list_) == 1:

647 return list_[0]

648 return '%s%s%s' % (

649 sep.join(i for i in list_[:-1]),

650 last_sep, list_[-1])