Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/IPython/utils/text.py: 30%

1"""

2Utilities for working with strings and text.

4Inheritance diagram:

6.. inheritance-diagram:: IPython.utils.text

7 :parts: 3

8"""

10import os

11import re

12import string

13import sys

14import textwrap

15import warnings

16from string import Formatter

17from pathlib import Path

19from typing import (

20 List,

21 Dict,

22 Tuple,

23 Optional,

24 cast,

25 Any,

26 Union,

27 TypeVar,

28)

29from collections.abc import Sequence, Mapping, Callable, Iterator

31if sys.version_info < (3, 12):

32 from typing import Self

33else:

34 from typing import Self

37class LSString(str):

38 """String derivative with a special access attributes.

40 These are normal strings, but with the special attributes:

42 .l (or .list) : value as list (split on newlines).

43 .n (or .nlstr): original value (the string itself).

44 .s (or .spstr): value as whitespace-separated string.

45 .p (or .paths): list of path objects (requires path.py package)

47 Any values which require transformations are computed only once and

48 cached.

50 Such strings are very useful to efficiently interact with the shell, which

51 typically only understands whitespace-separated options for commands."""

53 __list: List[str]

54 __spstr: str

55 __paths: List[Path]

57 def get_list(self) -> List[str]:

58 try:

59 return self.__list

60 except AttributeError:

61 self.__list = self.split('\n')

62 return self.__list

64 l = list = property(get_list)

66 def get_spstr(self) -> str:

67 try:

68 return self.__spstr

69 except AttributeError:

70 self.__spstr = self.replace('\n',' ')

71 return self.__spstr

73 s = spstr = property(get_spstr)

75 def get_nlstr(self) -> Self:

76 return self

78 n = nlstr = property(get_nlstr)

80 def get_paths(self) -> List[Path]:

81 try:

82 return self.__paths

83 except AttributeError:

84 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]

85 return self.__paths

87 p = paths = property(get_paths)

89# FIXME: We need to reimplement type specific displayhook and then add this

90# back as a custom printer. This should also be moved outside utils into the

91# core.

93# def print_lsstring(arg):

94# """ Prettier (non-repr-like) and more informative printer for LSString """

95# print("LSString (.p, .n, .l, .s available). Value:")

96# print(arg)

97#

98#

99# print_lsstring = result_display.register(LSString)(print_lsstring)

100

101

102class SList(list[Any]):

103 """List derivative with a special access attributes.

104

105 These are normal lists, but with the special attributes:

106

107 * .l (or .list) : value as list (the list itself).

108 * .n (or .nlstr): value as a string, joined on newlines.

109 * .s (or .spstr): value as a string, joined on spaces.

110 * .p (or .paths): list of path objects (requires path.py package)

111

112 Any values which require transformations are computed only once and

113 cached."""

114

115 __spstr: str

116 __nlstr: str

117 __paths: List[Path]

118

119 def get_list(self) -> Self:

120 return self

121

122 l = list = property(get_list)

123

124 def get_spstr(self) -> str:

125 try:

126 return self.__spstr

127 except AttributeError:

128 self.__spstr = ' '.join(self)

129 return self.__spstr

130

131 s = spstr = property(get_spstr)

132

133 def get_nlstr(self) -> str:

134 try:

135 return self.__nlstr

136 except AttributeError:

137 self.__nlstr = '\n'.join(self)

138 return self.__nlstr

139

140 n = nlstr = property(get_nlstr)

141

142 def get_paths(self) -> List[Path]:

143 try:

144 return self.__paths

145 except AttributeError:

146 self.__paths = [Path(p) for p in self if os.path.exists(p)]

147 return self.__paths

148

149 p = paths = property(get_paths)

150

151 def grep(

152 self,

153 pattern: Union[str, Callable[[Any], re.Match[str] | None]],

154 prune: bool = False,

155 field: Optional[int] = None,

156 ) -> Self:

157 """Return all strings matching 'pattern' (a regex or callable)

158

159 This is case-insensitive. If prune is true, return all items

160 NOT matching the pattern.

161

162 If field is specified, the match must occur in the specified

163 whitespace-separated field.

164

165 Examples::

166

167 a.grep( lambda x: x.startswith('C') )

168 a.grep('Cha.*log', prune=1)

169 a.grep('chm', field=-1)

170 """

171

172 def match_target(s: str) -> str:

173 if field is None:

174 return s

175 parts = s.split()

176 try:

177 tgt = parts[field]

178 return tgt

179 except IndexError:

180 return ""

181

182 if isinstance(pattern, str):

183 pred = lambda x : re.search(pattern, x, re.IGNORECASE)

184 else:

185 pred = pattern

186 if not prune:

187 return type(self)([el for el in self if pred(match_target(el))]) # type: ignore [no-untyped-call]

188 else:

189 return type(self)([el for el in self if not pred(match_target(el))]) # type: ignore [no-untyped-call]

190

191 def fields(self, *fields: List[str]) -> List[List[str]]:

192 """Collect whitespace-separated fields from string list

193

194 Allows quick awk-like usage of string lists.

195

196 Example data (in var a, created by 'a = !ls -l')::

197

198 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog

199 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython

200

201 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``

202 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``

203 (note the joining by space).

204 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``

205

206 IndexErrors are ignored.

207

208 Without args, fields() just split()'s the strings.

209 """

210 if len(fields) == 0:

211 return [el.split() for el in self]

212

213 res = SList()

214 for el in [f.split() for f in self]:

215 lineparts = []

216

217 for fd in fields:

218 try:

219 lineparts.append(el[fd])

220 except IndexError:

221 pass

222 if lineparts:

223 res.append(" ".join(lineparts))

224

225 return res

226

227 def sort( # type:ignore[override]

228 self,

229 field: Optional[List[str]] = None,

230 nums: bool = False,

231 ) -> Self:

232 """sort by specified fields (see fields())

233

234 Example::

235

236 a.sort(1, nums = True)

237

238 Sorts a by second field, in numerical order (so that 21 > 3)

239

240 """

241

242 #decorate, sort, undecorate

243 if field is not None:

244 dsu = [[SList([line]).fields(field), line] for line in self]

245 else:

246 dsu = [[line, line] for line in self]

247 if nums:

248 for i in range(len(dsu)):

249 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])

250 try:

251 n = int(numstr)

252 except ValueError:

253 n = 0

254 dsu[i][0] = n

255

256

257 dsu.sort()

258 return type(self)([t[1] for t in dsu])

259

260

261def indent(instr: str, nspaces: int = 4, ntabs: int = 0, flatten: bool = False) -> str:

262 """Indent a string a given number of spaces or tabstops.

263

264 indent(str, nspaces=4, ntabs=0) -> indent str by ntabs+nspaces.

265

266 Parameters

267 ----------

268 instr : basestring

269 The string to be indented.

270 nspaces : int (default: 4)

271 The number of spaces to be indented.

272 ntabs : int (default: 0)

273 The number of tabs to be indented.

274 flatten : bool (default: False)

275 Whether to scrub existing indentation. If True, all lines will be

276 aligned to the same indentation. If False, existing indentation will

277 be strictly increased.

278

279 Returns

280 -------

281 str : string indented by ntabs and nspaces.

282

283 """

284 ind = "\t" * ntabs + " " * nspaces

285 if flatten:

286 pat = re.compile(r'^\s*', re.MULTILINE)

287 else:

288 pat = re.compile(r'^', re.MULTILINE)

289 outstr = re.sub(pat, ind, instr)

290 if outstr.endswith(os.linesep+ind):

291 return outstr[:-len(ind)]

292 else:

293 return outstr

294

295

296def list_strings(arg: Union[str, List[str]]) -> List[str]:

297 """Always return a list of strings, given a string or list of strings

298 as input.

299

300 Examples

301 --------

302 ::

303

304 In [7]: list_strings('A single string')

305 Out[7]: ['A single string']

306

307 In [8]: list_strings(['A single string in a list'])

308 Out[8]: ['A single string in a list']

309

310 In [9]: list_strings(['A','list','of','strings'])

311 Out[9]: ['A', 'list', 'of', 'strings']

312 """

313

314 if isinstance(arg, str):

315 return [arg]

316 else:

317 return arg

318

319

320def marquee(txt: str = "", width: int = 78, mark: str = "*") -> str:

321 """Return the input string centered in a 'marquee'.

322

323 Examples

324 --------

325 ::

326

327 In [16]: marquee('A test',40)

328 Out[16]: '**************** A test ****************'

329

330 In [17]: marquee('A test',40,'-')

331 Out[17]: '---------------- A test ----------------'

332

333 In [18]: marquee('A test',40,' ')

334 Out[18]: ' A test '

335

336 """

337 if not txt:

338 return (mark*width)[:width]

339 nmark = (width-len(txt)-2)//len(mark)//2

340 if nmark < 0: nmark =0

341 marks = mark*nmark

342 return '%s %s %s' % (marks,txt,marks)

343

344

345def format_screen(strng: str) -> str:

346 """Format a string for screen printing.

347

348 This removes some latex-type format codes."""

349 # Paragraph continue

350 par_re = re.compile(r'\\$',re.MULTILINE)

351 strng = par_re.sub('',strng)

352 return strng

353

354

355def dedent(text: str) -> str:

356 """Equivalent of textwrap.dedent that ignores unindented first line.

357

358 This means it will still dedent strings like:

359 '''foo

360 is a bar

361 '''

362

363 For use in wrap_paragraphs.

364 """

365

366 if text.startswith('\n'):

367 # text starts with blank line, don't ignore the first line

368 return textwrap.dedent(text)

369

370 # split first line

371 splits = text.split('\n',1)

372 if len(splits) == 1:

373 # only one line

374 return textwrap.dedent(text)

375

376 first, rest = splits

377 # dedent everything but the first line

378 rest = textwrap.dedent(rest)

379 return '\n'.join([first, rest])

380

381

382def strip_email_quotes(text: str) -> str:

383 """Strip leading email quotation characters ('>').

384

385 Removes any combination of leading '>' interspersed with whitespace that

386 appears *identically* in all lines of the input text.

387

388 Parameters

389 ----------

390 text : str

391

392 Examples

393 --------

394

395 Simple uses::

396

397 In [2]: strip_email_quotes('> > text')

398 Out[2]: 'text'

399

400 In [3]: strip_email_quotes('> > text\\n> > more')

401 Out[3]: 'text\\nmore'

402

403 Note how only the common prefix that appears in all lines is stripped::

404

405 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')

406 Out[4]: '> text\\n> more\\nmore...'

407

408 So if any line has no quote marks ('>'), then none are stripped from any

409 of them ::

410

411 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')

412 Out[5]: '> > text\\n> > more\\nlast different'

413 """

414 lines = text.splitlines()

415 strip_len = 0

416

417 for characters in zip(*lines):

418 # Check if all characters in this position are the same

419 if len(set(characters)) > 1:

420 break

421 prefix_char = characters[0]

422

423 if prefix_char in string.whitespace or prefix_char == ">":

424 strip_len += 1

425 else:

426 break

427

428 text = "\n".join([ln[strip_len:] for ln in lines])

429 return text

430

431

432class EvalFormatter(Formatter):

433 """A String Formatter that allows evaluation of simple expressions.

434

435 Note that this version interprets a `:` as specifying a format string (as per

436 standard string formatting), so if slicing is required, you must explicitly

437 create a slice.

438

439 Note that on Python 3.14+ this version interprets `[]` as indexing operator

440 so you need to use generators instead of list comprehensions, for example:

441 `list(i for i in range(10))`.

442

443 This is to be used in templating cases, such as the parallel batch

444 script templates, where simple arithmetic on arguments is useful.

445

446 Examples

447 --------

448 ::

449

450 In [1]: f = EvalFormatter()

451 In [2]: f.format('{n//4}', n=8)

452 Out[2]: '2'

453

454 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")

455 Out[3]: 'll'

456 """

457

458 def get_field(self, name: str, args: Any, kwargs: Any) -> Tuple[Any, str]:

459 v = eval(name, kwargs, kwargs)

460 return v, name

461

462#XXX: As of Python 3.4, the format string parsing no longer splits on a colon

463# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and

464# above, it should be possible to remove FullEvalFormatter.

465

466class FullEvalFormatter(Formatter):

467 """A String Formatter that allows evaluation of simple expressions.

468

469 Any time a format key is not found in the kwargs,

470 it will be tried as an expression in the kwargs namespace.

471

472 Note that this version allows slicing using [1:2], so you cannot specify

473 a format string. Use :class:`EvalFormatter` to permit format strings.

474

475 Examples

476 --------

477 ::

478

479 In [1]: f = FullEvalFormatter()

480 In [2]: f.format('{n//4}', n=8)

481 Out[2]: '2'

482

483 In [3]: f.format('{list(range(5))[2:4]}')

484 Out[3]: '[2, 3]'

485

486 In [4]: f.format('{3*2}')

487 Out[4]: '6'

488 """

489 # copied from Formatter._vformat with minor changes to allow eval

490 # and replace the format_spec code with slicing

491 def vformat(

492 self, format_string: str, args: Sequence[Any], kwargs: Mapping[str, Any]

493 ) -> str:

494 result = []

495 conversion: Optional[str]

496 for literal_text, field_name, format_spec, conversion in self.parse(

497 format_string

498 ):

499 # output the literal text

500 if literal_text:

501 result.append(literal_text)

502

503 # if there's a field, output it

504 if field_name is not None:

505 # this is some markup, find the object and do

506 # the formatting

507

508 if format_spec:

509 # override format spec, to allow slicing:

510 field_name = ':'.join([field_name, format_spec])

511

512 # eval the contents of the field for the object

513 # to be formatted

514 obj = eval(field_name, dict(kwargs))

515

516 # do any conversion on the resulting object

517 # type issue in typeshed, fined in https://github.com/python/typeshed/pull/11377

518 obj = self.convert_field(obj, conversion)

519

520 # format the object and append to the result

521 result.append(self.format_field(obj, ''))

522

523 return ''.join(result)

524

525

526class DollarFormatter(FullEvalFormatter):

527 """Formatter allowing Itpl style $foo replacement, for names and attribute

528 access only. Standard {foo} replacement also works, and allows full

529 evaluation of its arguments.

530

531 Examples

532 --------

533 ::

534

535 In [1]: f = DollarFormatter()

536 In [2]: f.format('{n//4}', n=8)

537 Out[2]: '2'

538

539 In [3]: f.format('23 * 76 is $result', result=23*76)

540 Out[3]: '23 * 76 is 1748'

541

542 In [4]: f.format('$a or {b}', a=1, b=2)

543 Out[4]: '1 or 2'

544 """

545

546 _dollar_pattern_ignore_single_quote = re.compile(

547 r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)"

548 )

549

550 def parse(self, fmt_string: str) -> Iterator[Tuple[Any, Any, Any, Any]]:

551 for literal_txt, field_name, format_spec, conversion in Formatter.parse(

552 self, fmt_string

553 ):

554 # Find $foo patterns in the literal text.

555 continue_from = 0

556 txt = ""

557 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):

558 new_txt, new_field = m.group(1,2)

559 # $$foo --> $foo

560 if new_field.startswith("$"):

561 txt += new_txt + new_field

562 else:

563 yield (txt + new_txt, new_field, "", None)

564 txt = ""

565 continue_from = m.end()

566

567 # Re-yield the {foo} style pattern

568 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)

569

570 def __repr__(self) -> str:

571 return "<DollarFormatter>"

572

573#-----------------------------------------------------------------------------

574# Utils to columnize a list of string

575#-----------------------------------------------------------------------------

576

577

578def _col_chunks(

579 l: List[int], max_rows: int, row_first: bool = False

580) -> Iterator[List[int]]:

581 """Yield successive max_rows-sized column chunks from l."""

582 if row_first:

583 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)

584 for i in range(ncols):

585 yield [l[j] for j in range(i, len(l), ncols)]

586 else:

587 for i in range(0, len(l), max_rows):

588 yield l[i:(i + max_rows)]

589

590

591def _find_optimal(

592 rlist: List[int], row_first: bool, separator_size: int, displaywidth: int

593) -> Dict[str, Any]:

594 """Calculate optimal info to columnize a list of string"""

595 for max_rows in range(1, len(rlist) + 1):

596 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))

597 sumlength = sum(col_widths)

598 ncols = len(col_widths)

599 if sumlength + separator_size * (ncols - 1) <= displaywidth:

600 break

601 return {'num_columns': ncols,

602 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,

603 'max_rows': max_rows,

604 'column_widths': col_widths

605 }

608T = TypeVar("T")

611def _get_or_default(mylist: List[T], i: int, default: T) -> T:

612 """return list item number, or default if don't exist"""

613 if i >= len(mylist):

614 return default

615 else :

616 return mylist[i]

617

618

619def get_text_list(

620 list_: List[str], last_sep: str = " and ", sep: str = ", ", wrap_item_with: str = ""

621) -> str:

622 """

623 Return a string with a natural enumeration of items

624

625 >>> get_text_list(['a', 'b', 'c', 'd'])

626 'a, b, c and d'

627 >>> get_text_list(['a', 'b', 'c'], ' or ')

628 'a, b or c'

629 >>> get_text_list(['a', 'b', 'c'], ', ')

630 'a, b, c'

631 >>> get_text_list(['a', 'b'], ' or ')

632 'a or b'

633 >>> get_text_list(['a'])

634 'a'

635 >>> get_text_list([])

636 ''

637 >>> get_text_list(['a', 'b'], wrap_item_with="`")

638 '`a` and `b`'

639 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")

640 'a + b + c = d'

641 """

642 if len(list_) == 0:

643 return ''

644 if wrap_item_with:

645 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for

646 item in list_]

647 if len(list_) == 1:

648 return list_[0]

649 return '%s%s%s' % (

650 sep.join(i for i in list_[:-1]),

651 last_sep, list_[-1])