Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/IPython/utils/text.py: 30%

1"""

2Utilities for working with strings and text.

4Inheritance diagram:

6.. inheritance-diagram:: IPython.utils.text

7 :parts: 3

8"""

10import os

11import re

12import string

13import sys

14import textwrap

15import warnings

16from string import Formatter

17from pathlib import Path

19from typing import (

20 List,

21 Dict,

22 Tuple,

23 Optional,

24 cast,

25 Any,

26 Union,

27 TypeVar,

28)

29from collections.abc import Sequence, Mapping, Callable, Iterator

31from typing import Self

34class LSString(str):

35 """String derivative with a special access attributes.

37 These are normal strings, but with the special attributes:

39 .l (or .list) : value as list (split on newlines).

40 .n (or .nlstr): original value (the string itself).

41 .s (or .spstr): value as whitespace-separated string.

42 .p (or .paths): list of path objects (requires path.py package)

44 Any values which require transformations are computed only once and

45 cached.

47 Such strings are very useful to efficiently interact with the shell, which

48 typically only understands whitespace-separated options for commands."""

50 __list: List[str]

51 __spstr: str

52 __paths: List[Path]

54 def get_list(self) -> List[str]:

55 try:

56 return self.__list

57 except AttributeError:

58 self.__list = self.split('\n')

59 return self.__list

61 l = list = property(get_list)

63 def get_spstr(self) -> str:

64 try:

65 return self.__spstr

66 except AttributeError:

67 self.__spstr = self.replace('\n',' ')

68 return self.__spstr

70 s = spstr = property(get_spstr)

72 def get_nlstr(self) -> Self:

73 return self

75 n = nlstr = property(get_nlstr)

77 def get_paths(self) -> List[Path]:

78 try:

79 return self.__paths

80 except AttributeError:

81 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]

82 return self.__paths

84 p = paths = property(get_paths)

86# FIXME: We need to reimplement type specific displayhook and then add this

87# back as a custom printer. This should also be moved outside utils into the

88# core.

90# def print_lsstring(arg):

91# """ Prettier (non-repr-like) and more informative printer for LSString """

92# print("LSString (.p, .n, .l, .s available). Value:")

93# print(arg)

94#

95#

96# print_lsstring = result_display.register(LSString)(print_lsstring)

99class SList(list[Any]):

100 """List derivative with a special access attributes.

101

102 These are normal lists, but with the special attributes:

103

104 * .l (or .list) : value as list (the list itself).

105 * .n (or .nlstr): value as a string, joined on newlines.

106 * .s (or .spstr): value as a string, joined on spaces.

107 * .p (or .paths): list of path objects (requires path.py package)

108

109 Any values which require transformations are computed only once and

110 cached."""

111

112 __spstr: str

113 __nlstr: str

114 __paths: List[Path]

115

116 def get_list(self) -> Self:

117 return self

118

119 l = list = property(get_list)

120

121 def get_spstr(self) -> str:

122 try:

123 return self.__spstr

124 except AttributeError:

125 self.__spstr = ' '.join(self)

126 return self.__spstr

127

128 s = spstr = property(get_spstr)

129

130 def get_nlstr(self) -> str:

131 try:

132 return self.__nlstr

133 except AttributeError:

134 self.__nlstr = '\n'.join(self)

135 return self.__nlstr

136

137 n = nlstr = property(get_nlstr)

138

139 def get_paths(self) -> List[Path]:

140 try:

141 return self.__paths

142 except AttributeError:

143 self.__paths = [Path(p) for p in self if os.path.exists(p)]

144 return self.__paths

145

146 p = paths = property(get_paths)

147

148 def grep(

149 self,

150 pattern: Union[str, Callable[[Any], re.Match[str] | None]],

151 prune: bool = False,

152 field: Optional[int] = None,

153 ) -> Self:

154 """Return all strings matching 'pattern' (a regex or callable)

155

156 This is case-insensitive. If prune is true, return all items

157 NOT matching the pattern.

158

159 If field is specified, the match must occur in the specified

160 whitespace-separated field.

161

162 Examples::

163

164 a.grep( lambda x: x.startswith('C') )

165 a.grep('Cha.*log', prune=1)

166 a.grep('chm', field=-1)

167 """

168

169 def match_target(s: str) -> str:

170 if field is None:

171 return s

172 parts = s.split()

173 try:

174 tgt = parts[field]

175 return tgt

176 except IndexError:

177 return ""

178

179 if isinstance(pattern, str):

180 pred = lambda x : re.search(pattern, x, re.IGNORECASE)

181 else:

182 pred = pattern

183 if not prune:

184 return type(self)([el for el in self if pred(match_target(el))]) # type: ignore [no-untyped-call]

185 else:

186 return type(self)([el for el in self if not pred(match_target(el))]) # type: ignore [no-untyped-call]

187

188 def fields(self, *fields: List[str]) -> List[List[str]]:

189 """Collect whitespace-separated fields from string list

190

191 Allows quick awk-like usage of string lists.

192

193 Example data (in var a, created by 'a = !ls -l')::

194

195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog

196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython

197

198 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``

199 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``

200 (note the joining by space).

201 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``

202

203 IndexErrors are ignored.

204

205 Without args, fields() just split()'s the strings.

206 """

207 if len(fields) == 0:

208 return [el.split() for el in self]

209

210 res = SList()

211 for el in [f.split() for f in self]:

212 lineparts = []

213

214 for fd in fields:

215 try:

216 lineparts.append(el[fd])

217 except IndexError:

218 pass

219 if lineparts:

220 res.append(" ".join(lineparts))

221

222 return res

223

224 def sort( # type:ignore[override]

225 self,

226 field: Optional[List[str]] = None,

227 nums: bool = False,

228 ) -> Self:

229 """sort by specified fields (see fields())

230

231 Example::

232

233 a.sort(1, nums = True)

234

235 Sorts a by second field, in numerical order (so that 21 > 3)

236

237 """

238

239 #decorate, sort, undecorate

240 if field is not None:

241 dsu = [[SList([line]).fields(field), line] for line in self]

242 else:

243 dsu = [[line, line] for line in self]

244 if nums:

245 for i in range(len(dsu)):

246 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])

247 try:

248 n = int(numstr)

249 except ValueError:

250 n = 0

251 dsu[i][0] = n

252

253

254 dsu.sort()

255 return type(self)([t[1] for t in dsu])

256

257

258def indent(instr: str, nspaces: int = 4, ntabs: int = 0, flatten: bool = False) -> str:

259 """Indent a string a given number of spaces or tabstops.

260

261 indent(str, nspaces=4, ntabs=0) -> indent str by ntabs+nspaces.

262

263 Parameters

264 ----------

265 instr : basestring

266 The string to be indented.

267 nspaces : int (default: 4)

268 The number of spaces to be indented.

269 ntabs : int (default: 0)

270 The number of tabs to be indented.

271 flatten : bool (default: False)

272 Whether to scrub existing indentation. If True, all lines will be

273 aligned to the same indentation. If False, existing indentation will

274 be strictly increased.

275

276 Returns

277 -------

278 str : string indented by ntabs and nspaces.

279

280 """

281 ind = "\t" * ntabs + " " * nspaces

282 if flatten:

283 pat = re.compile(r'^\s*', re.MULTILINE)

284 else:

285 pat = re.compile(r'^', re.MULTILINE)

286 outstr = re.sub(pat, ind, instr)

287 if outstr.endswith(os.linesep+ind):

288 return outstr[:-len(ind)]

289 else:

290 return outstr

291

292

293def list_strings(arg: Union[str, List[str]]) -> List[str]:

294 """Always return a list of strings, given a string or list of strings

295 as input.

296

297 Examples

298 --------

299 ::

300

301 In [7]: list_strings('A single string')

302 Out[7]: ['A single string']

303

304 In [8]: list_strings(['A single string in a list'])

305 Out[8]: ['A single string in a list']

306

307 In [9]: list_strings(['A','list','of','strings'])

308 Out[9]: ['A', 'list', 'of', 'strings']

309 """

310

311 if isinstance(arg, str):

312 return [arg]

313 else:

314 return arg

315

316

317def marquee(txt: str = "", width: int = 78, mark: str = "*") -> str:

318 """Return the input string centered in a 'marquee'.

319

320 Examples

321 --------

322 ::

323

324 In [16]: marquee('A test',40)

325 Out[16]: '**************** A test ****************'

326

327 In [17]: marquee('A test',40,'-')

328 Out[17]: '---------------- A test ----------------'

329

330 In [18]: marquee('A test',40,' ')

331 Out[18]: ' A test '

332

333 """

334 if not txt:

335 return (mark*width)[:width]

336 nmark = (width-len(txt)-2)//len(mark)//2

337 if nmark < 0: nmark =0

338 marks = mark*nmark

339 return '%s %s %s' % (marks,txt,marks)

340

341

342def format_screen(strng: str) -> str:

343 """Format a string for screen printing.

344

345 This removes some latex-type format codes."""

346 # Paragraph continue

347 par_re = re.compile(r'\\$',re.MULTILINE)

348 strng = par_re.sub('',strng)

349 return strng

350

351

352def dedent(text: str) -> str:

353 """Equivalent of textwrap.dedent that ignores unindented first line.

354

355 This means it will still dedent strings like:

356 '''foo

357 is a bar

358 '''

359

360 For use in wrap_paragraphs.

361 """

362

363 if text.startswith('\n'):

364 # text starts with blank line, don't ignore the first line

365 return textwrap.dedent(text)

366

367 # split first line

368 splits = text.split('\n',1)

369 if len(splits) == 1:

370 # only one line

371 return textwrap.dedent(text)

372

373 first, rest = splits

374 # dedent everything but the first line

375 rest = textwrap.dedent(rest)

376 return '\n'.join([first, rest])

377

378

379def strip_email_quotes(text: str) -> str:

380 """Strip leading email quotation characters ('>').

381

382 Removes any combination of leading '>' interspersed with whitespace that

383 appears *identically* in all lines of the input text.

384

385 Parameters

386 ----------

387 text : str

388

389 Examples

390 --------

391

392 Simple uses::

393

394 In [2]: strip_email_quotes('> > text')

395 Out[2]: 'text'

396

397 In [3]: strip_email_quotes('> > text\\n> > more')

398 Out[3]: 'text\\nmore'

399

400 Note how only the common prefix that appears in all lines is stripped::

401

402 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')

403 Out[4]: '> text\\n> more\\nmore...'

404

405 So if any line has no quote marks ('>'), then none are stripped from any

406 of them ::

407

408 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')

409 Out[5]: '> > text\\n> > more\\nlast different'

410 """

411 lines = text.splitlines()

412 strip_len = 0

413

414 for characters in zip(*lines):

415 # Check if all characters in this position are the same

416 if len(set(characters)) > 1:

417 break

418 prefix_char = characters[0]

419

420 if prefix_char in string.whitespace or prefix_char == ">":

421 strip_len += 1

422 else:

423 break

424

425 text = "\n".join([ln[strip_len:] for ln in lines])

426 return text

427

428

429class EvalFormatter(Formatter):

430 """A String Formatter that allows evaluation of simple expressions.

431

432 Note that this version interprets a `:` as specifying a format string (as per

433 standard string formatting), so if slicing is required, you must explicitly

434 create a slice.

435

436 Note that on Python 3.14+ this version interprets `[]` as indexing operator

437 so you need to use generators instead of list comprehensions, for example:

438 `list(i for i in range(10))`.

439

440 This is to be used in templating cases, such as the parallel batch

441 script templates, where simple arithmetic on arguments is useful.

442

443 Examples

444 --------

445 ::

446

447 In [1]: f = EvalFormatter()

448 In [2]: f.format('{n//4}', n=8)

449 Out[2]: '2'

450

451 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")

452 Out[3]: 'll'

453 """

454

455 def get_field(self, name: str, args: Any, kwargs: Any) -> Tuple[Any, str]:

456 v = eval(name, kwargs, kwargs)

457 return v, name

458

459#XXX: As of Python 3.4, the format string parsing no longer splits on a colon

460# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and

461# above, it should be possible to remove FullEvalFormatter.

462

463class FullEvalFormatter(Formatter):

464 """A String Formatter that allows evaluation of simple expressions.

465

466 Any time a format key is not found in the kwargs,

467 it will be tried as an expression in the kwargs namespace.

468

469 Note that this version allows slicing using [1:2], so you cannot specify

470 a format string. Use :class:`EvalFormatter` to permit format strings.

471

472 Examples

473 --------

474 ::

475

476 In [1]: f = FullEvalFormatter()

477 In [2]: f.format('{n//4}', n=8)

478 Out[2]: '2'

479

480 In [3]: f.format('{list(range(5))[2:4]}')

481 Out[3]: '[2, 3]'

482

483 In [4]: f.format('{3*2}')

484 Out[4]: '6'

485 """

486 # copied from Formatter._vformat with minor changes to allow eval

487 # and replace the format_spec code with slicing

488 def vformat(

489 self, format_string: str, args: Sequence[Any], kwargs: Mapping[str, Any]

490 ) -> str:

491 result = []

492 conversion: Optional[str]

493 for literal_text, field_name, format_spec, conversion in self.parse(

494 format_string

495 ):

496 # output the literal text

497 if literal_text:

498 result.append(literal_text)

499

500 # if there's a field, output it

501 if field_name is not None:

502 # this is some markup, find the object and do

503 # the formatting

504

505 if format_spec:

506 # override format spec, to allow slicing:

507 field_name = ':'.join([field_name, format_spec])

508

509 # eval the contents of the field for the object

510 # to be formatted

511 obj = eval(field_name, dict(kwargs))

512

513 # do any conversion on the resulting object

514 # type issue in typeshed, fined in https://github.com/python/typeshed/pull/11377

515 obj = self.convert_field(obj, conversion)

516

517 # format the object and append to the result

518 result.append(self.format_field(obj, ''))

519

520 return ''.join(result)

521

522

523class DollarFormatter(FullEvalFormatter):

524 """Formatter allowing Itpl style $foo replacement, for names and attribute

525 access only. Standard {foo} replacement also works, and allows full

526 evaluation of its arguments.

527

528 Examples

529 --------

530 ::

531

532 In [1]: f = DollarFormatter()

533 In [2]: f.format('{n//4}', n=8)

534 Out[2]: '2'

535

536 In [3]: f.format('23 * 76 is $result', result=23*76)

537 Out[3]: '23 * 76 is 1748'

538

539 In [4]: f.format('$a or {b}', a=1, b=2)

540 Out[4]: '1 or 2'

541 """

542

543 _dollar_pattern_ignore_single_quote = re.compile(

544 r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)"

545 )

546

547 def parse(self, fmt_string: str) -> Iterator[Tuple[Any, Any, Any, Any]]:

548 for literal_txt, field_name, format_spec, conversion in Formatter.parse(

549 self, fmt_string

550 ):

551 # Find $foo patterns in the literal text.

552 continue_from = 0

553 txt = ""

554 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):

555 new_txt, new_field = m.group(1,2)

556 # $$foo --> $foo

557 if new_field.startswith("$"):

558 txt += new_txt + new_field

559 else:

560 yield (txt + new_txt, new_field, "", None)

561 txt = ""

562 continue_from = m.end()

563

564 # Re-yield the {foo} style pattern

565 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)

566

567 def __repr__(self) -> str:

568 return "<DollarFormatter>"

569

570#-----------------------------------------------------------------------------

571# Utils to columnize a list of string

572#-----------------------------------------------------------------------------

573

574

575def _col_chunks(

576 l: List[int], max_rows: int, row_first: bool = False

577) -> Iterator[List[int]]:

578 """Yield successive max_rows-sized column chunks from l."""

579 if row_first:

580 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)

581 for i in range(ncols):

582 yield [l[j] for j in range(i, len(l), ncols)]

583 else:

584 for i in range(0, len(l), max_rows):

585 yield l[i:(i + max_rows)]

586

587

588def _find_optimal(

589 rlist: List[int], row_first: bool, separator_size: int, displaywidth: int

590) -> Dict[str, Any]:

591 """Calculate optimal info to columnize a list of string"""

592 for max_rows in range(1, len(rlist) + 1):

593 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))

594 sumlength = sum(col_widths)

595 ncols = len(col_widths)

596 if sumlength + separator_size * (ncols - 1) <= displaywidth:

597 break

598 return {'num_columns': ncols,

599 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,

600 'max_rows': max_rows,

601 'column_widths': col_widths

602 }

605T = TypeVar("T")

608def _get_or_default(mylist: List[T], i: int, default: T) -> T:

609 """return list item number, or default if don't exist"""

610 if i >= len(mylist):

611 return default

612 else :

613 return mylist[i]

614

615

616def get_text_list(

617 list_: List[str], last_sep: str = " and ", sep: str = ", ", wrap_item_with: str = ""

618) -> str:

619 """

620 Return a string with a natural enumeration of items

621

622 >>> get_text_list(['a', 'b', 'c', 'd'])

623 'a, b, c and d'

624 >>> get_text_list(['a', 'b', 'c'], ' or ')

625 'a, b or c'

626 >>> get_text_list(['a', 'b', 'c'], ', ')

627 'a, b, c'

628 >>> get_text_list(['a', 'b'], ' or ')

629 'a or b'

630 >>> get_text_list(['a'])

631 'a'

632 >>> get_text_list([])

633 ''

634 >>> get_text_list(['a', 'b'], wrap_item_with="`")

635 '`a` and `b`'

636 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")

637 'a + b + c = d'

638 """

639 if len(list_) == 0:

640 return ''

641 if wrap_item_with:

642 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for

643 item in list_]

644 if len(list_) == 1:

645 return list_[0]

646 return '%s%s%s' % (

647 sep.join(i for i in list_[:-1]),

648 last_sep, list_[-1])