Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/IPython/utils/text.py: 26%

1# encoding: utf-8

2"""

3Utilities for working with strings and text.

5Inheritance diagram:

7.. inheritance-diagram:: IPython.utils.text

8 :parts: 3

9"""

11import os

12import re

13import string

14import sys

15import textwrap

16from string import Formatter

17from pathlib import Path

20# datetime.strftime date format for ipython

21if sys.platform == 'win32':

22 date_format = "%B %d, %Y"

23else:

24 date_format = "%B %-d, %Y"

26class LSString(str):

27 """String derivative with a special access attributes.

29 These are normal strings, but with the special attributes:

31 .l (or .list) : value as list (split on newlines).

32 .n (or .nlstr): original value (the string itself).

33 .s (or .spstr): value as whitespace-separated string.

34 .p (or .paths): list of path objects (requires path.py package)

36 Any values which require transformations are computed only once and

37 cached.

39 Such strings are very useful to efficiently interact with the shell, which

40 typically only understands whitespace-separated options for commands."""

42 def get_list(self):

43 try:

44 return self.__list

45 except AttributeError:

46 self.__list = self.split('\n')

47 return self.__list

49 l = list = property(get_list)

51 def get_spstr(self):

52 try:

53 return self.__spstr

54 except AttributeError:

55 self.__spstr = self.replace('\n',' ')

56 return self.__spstr

58 s = spstr = property(get_spstr)

60 def get_nlstr(self):

61 return self

63 n = nlstr = property(get_nlstr)

65 def get_paths(self):

66 try:

67 return self.__paths

68 except AttributeError:

69 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]

70 return self.__paths

72 p = paths = property(get_paths)

74# FIXME: We need to reimplement type specific displayhook and then add this

75# back as a custom printer. This should also be moved outside utils into the

76# core.

78# def print_lsstring(arg):

79# """ Prettier (non-repr-like) and more informative printer for LSString """

80# print "LSString (.p, .n, .l, .s available). Value:"

81# print arg

82#

83#

84# print_lsstring = result_display.register(LSString)(print_lsstring)

87class SList(list):

88 """List derivative with a special access attributes.

90 These are normal lists, but with the special attributes:

92 * .l (or .list) : value as list (the list itself).

93 * .n (or .nlstr): value as a string, joined on newlines.

94 * .s (or .spstr): value as a string, joined on spaces.

95 * .p (or .paths): list of path objects (requires path.py package)

97 Any values which require transformations are computed only once and

98 cached."""

100 def get_list(self):

101 return self

102

103 l = list = property(get_list)

104

105 def get_spstr(self):

106 try:

107 return self.__spstr

108 except AttributeError:

109 self.__spstr = ' '.join(self)

110 return self.__spstr

111

112 s = spstr = property(get_spstr)

113

114 def get_nlstr(self):

115 try:

116 return self.__nlstr

117 except AttributeError:

118 self.__nlstr = '\n'.join(self)

119 return self.__nlstr

120

121 n = nlstr = property(get_nlstr)

122

123 def get_paths(self):

124 try:

125 return self.__paths

126 except AttributeError:

127 self.__paths = [Path(p) for p in self if os.path.exists(p)]

128 return self.__paths

129

130 p = paths = property(get_paths)

131

132 def grep(self, pattern, prune = False, field = None):

133 """ Return all strings matching 'pattern' (a regex or callable)

134

135 This is case-insensitive. If prune is true, return all items

136 NOT matching the pattern.

137

138 If field is specified, the match must occur in the specified

139 whitespace-separated field.

140

141 Examples::

142

143 a.grep( lambda x: x.startswith('C') )

144 a.grep('Cha.*log', prune=1)

145 a.grep('chm', field=-1)

146 """

147

148 def match_target(s):

149 if field is None:

150 return s

151 parts = s.split()

152 try:

153 tgt = parts[field]

154 return tgt

155 except IndexError:

156 return ""

157

158 if isinstance(pattern, str):

159 pred = lambda x : re.search(pattern, x, re.IGNORECASE)

160 else:

161 pred = pattern

162 if not prune:

163 return SList([el for el in self if pred(match_target(el))])

164 else:

165 return SList([el for el in self if not pred(match_target(el))])

166

167 def fields(self, *fields):

168 """ Collect whitespace-separated fields from string list

169

170 Allows quick awk-like usage of string lists.

171

172 Example data (in var a, created by 'a = !ls -l')::

173

174 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog

175 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython

176

177 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``

178 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``

179 (note the joining by space).

180 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``

181

182 IndexErrors are ignored.

183

184 Without args, fields() just split()'s the strings.

185 """

186 if len(fields) == 0:

187 return [el.split() for el in self]

188

189 res = SList()

190 for el in [f.split() for f in self]:

191 lineparts = []

192

193 for fd in fields:

194 try:

195 lineparts.append(el[fd])

196 except IndexError:

197 pass

198 if lineparts:

199 res.append(" ".join(lineparts))

200

201 return res

202

203 def sort(self,field= None, nums = False):

204 """ sort by specified fields (see fields())

205

206 Example::

207

208 a.sort(1, nums = True)

209

210 Sorts a by second field, in numerical order (so that 21 > 3)

211

212 """

213

214 #decorate, sort, undecorate

215 if field is not None:

216 dsu = [[SList([line]).fields(field), line] for line in self]

217 else:

218 dsu = [[line, line] for line in self]

219 if nums:

220 for i in range(len(dsu)):

221 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])

222 try:

223 n = int(numstr)

224 except ValueError:

225 n = 0

226 dsu[i][0] = n

227

228

229 dsu.sort()

230 return SList([t[1] for t in dsu])

231

232

233# FIXME: We need to reimplement type specific displayhook and then add this

234# back as a custom printer. This should also be moved outside utils into the

235# core.

236

237# def print_slist(arg):

238# """ Prettier (non-repr-like) and more informative printer for SList """

239# print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"

240# if hasattr(arg, 'hideonce') and arg.hideonce:

241# arg.hideonce = False

242# return

243#

244# nlprint(arg) # This was a nested list printer, now removed.

245#

246# print_slist = result_display.register(SList)(print_slist)

247

248

249def indent(instr,nspaces=4, ntabs=0, flatten=False):

250 """Indent a string a given number of spaces or tabstops.

251

252 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.

253

254 Parameters

255 ----------

256 instr : basestring

257 The string to be indented.

258 nspaces : int (default: 4)

259 The number of spaces to be indented.

260 ntabs : int (default: 0)

261 The number of tabs to be indented.

262 flatten : bool (default: False)

263 Whether to scrub existing indentation. If True, all lines will be

264 aligned to the same indentation. If False, existing indentation will

265 be strictly increased.

266

267 Returns

268 -------

269 str|unicode : string indented by ntabs and nspaces.

270

271 """

272 if instr is None:

273 return

274 ind = '\t'*ntabs+' '*nspaces

275 if flatten:

276 pat = re.compile(r'^\s*', re.MULTILINE)

277 else:

278 pat = re.compile(r'^', re.MULTILINE)

279 outstr = re.sub(pat, ind, instr)

280 if outstr.endswith(os.linesep+ind):

281 return outstr[:-len(ind)]

282 else:

283 return outstr

284

285

286def list_strings(arg):

287 """Always return a list of strings, given a string or list of strings

288 as input.

289

290 Examples

291 --------

292 ::

293

294 In [7]: list_strings('A single string')

295 Out[7]: ['A single string']

296

297 In [8]: list_strings(['A single string in a list'])

298 Out[8]: ['A single string in a list']

299

300 In [9]: list_strings(['A','list','of','strings'])

301 Out[9]: ['A', 'list', 'of', 'strings']

302 """

303

304 if isinstance(arg, str):

305 return [arg]

306 else:

307 return arg

308

309

310def marquee(txt='',width=78,mark='*'):

311 """Return the input string centered in a 'marquee'.

312

313 Examples

314 --------

315 ::

316

317 In [16]: marquee('A test',40)

318 Out[16]: '**************** A test ****************'

319

320 In [17]: marquee('A test',40,'-')

321 Out[17]: '---------------- A test ----------------'

322

323 In [18]: marquee('A test',40,' ')

324 Out[18]: ' A test '

325

326 """

327 if not txt:

328 return (mark*width)[:width]

329 nmark = (width-len(txt)-2)//len(mark)//2

330 if nmark < 0: nmark =0

331 marks = mark*nmark

332 return '%s %s %s' % (marks,txt,marks)

333

334

335ini_spaces_re = re.compile(r'^(\s+)')

336

337def num_ini_spaces(strng):

338 """Return the number of initial spaces in a string"""

339

340 ini_spaces = ini_spaces_re.match(strng)

341 if ini_spaces:

342 return ini_spaces.end()

343 else:

344 return 0

345

346

347def format_screen(strng):

348 """Format a string for screen printing.

349

350 This removes some latex-type format codes."""

351 # Paragraph continue

352 par_re = re.compile(r'\\$',re.MULTILINE)

353 strng = par_re.sub('',strng)

354 return strng

355

356

357def dedent(text):

358 """Equivalent of textwrap.dedent that ignores unindented first line.

359

360 This means it will still dedent strings like:

361 '''foo

362 is a bar

363 '''

364

365 For use in wrap_paragraphs.

366 """

367

368 if text.startswith('\n'):

369 # text starts with blank line, don't ignore the first line

370 return textwrap.dedent(text)

371

372 # split first line

373 splits = text.split('\n',1)

374 if len(splits) == 1:

375 # only one line

376 return textwrap.dedent(text)

377

378 first, rest = splits

379 # dedent everything but the first line

380 rest = textwrap.dedent(rest)

381 return '\n'.join([first, rest])

382

383

384def wrap_paragraphs(text, ncols=80):

385 """Wrap multiple paragraphs to fit a specified width.

386

387 This is equivalent to textwrap.wrap, but with support for multiple

388 paragraphs, as separated by empty lines.

389

390 Returns

391 -------

392 list of complete paragraphs, wrapped to fill `ncols` columns.

393 """

394 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)

395 text = dedent(text).strip()

396 paragraphs = paragraph_re.split(text)[::2] # every other entry is space

397 out_ps = []

398 indent_re = re.compile(r'\n\s+', re.MULTILINE)

399 for p in paragraphs:

400 # presume indentation that survives dedent is meaningful formatting,

401 # so don't fill unless text is flush.

402 if indent_re.search(p) is None:

403 # wrap paragraph

404 p = textwrap.fill(p, ncols)

405 out_ps.append(p)

406 return out_ps

407

408

409def strip_email_quotes(text):

410 """Strip leading email quotation characters ('>').

411

412 Removes any combination of leading '>' interspersed with whitespace that

413 appears *identically* in all lines of the input text.

414

415 Parameters

416 ----------

417 text : str

418

419 Examples

420 --------

421

422 Simple uses::

423

424 In [2]: strip_email_quotes('> > text')

425 Out[2]: 'text'

426

427 In [3]: strip_email_quotes('> > text\\n> > more')

428 Out[3]: 'text\\nmore'

429

430 Note how only the common prefix that appears in all lines is stripped::

431

432 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')

433 Out[4]: '> text\\n> more\\nmore...'

434

435 So if any line has no quote marks ('>'), then none are stripped from any

436 of them ::

437

438 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')

439 Out[5]: '> > text\\n> > more\\nlast different'

440 """

441 lines = text.splitlines()

442 strip_len = 0

443

444 for characters in zip(*lines):

445 # Check if all characters in this position are the same

446 if len(set(characters)) > 1:

447 break

448 prefix_char = characters[0]

449

450 if prefix_char in string.whitespace or prefix_char == ">":

451 strip_len += 1

452 else:

453 break

454

455 text = "\n".join([ln[strip_len:] for ln in lines])

456 return text

457

458

459def strip_ansi(source):

460 """

461 Remove ansi escape codes from text.

462

463 Parameters

464 ----------

465 source : str

466 Source to remove the ansi from

467 """

468 return re.sub(r'\033\[(\d|;)+?m', '', source)

469

470

471class EvalFormatter(Formatter):

472 """A String Formatter that allows evaluation of simple expressions.

473

474 Note that this version interprets a `:` as specifying a format string (as per

475 standard string formatting), so if slicing is required, you must explicitly

476 create a slice.

477

478 This is to be used in templating cases, such as the parallel batch

479 script templates, where simple arithmetic on arguments is useful.

480

481 Examples

482 --------

483 ::

484

485 In [1]: f = EvalFormatter()

486 In [2]: f.format('{n//4}', n=8)

487 Out[2]: '2'

488

489 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")

490 Out[3]: 'll'

491 """

492 def get_field(self, name, args, kwargs):

493 v = eval(name, kwargs)

494 return v, name

495

496#XXX: As of Python 3.4, the format string parsing no longer splits on a colon

497# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and

498# above, it should be possible to remove FullEvalFormatter.

499

500class FullEvalFormatter(Formatter):

501 """A String Formatter that allows evaluation of simple expressions.

502

503 Any time a format key is not found in the kwargs,

504 it will be tried as an expression in the kwargs namespace.

505

506 Note that this version allows slicing using [1:2], so you cannot specify

507 a format string. Use :class:`EvalFormatter` to permit format strings.

508

509 Examples

510 --------

511 ::

512

513 In [1]: f = FullEvalFormatter()

514 In [2]: f.format('{n//4}', n=8)

515 Out[2]: '2'

516

517 In [3]: f.format('{list(range(5))[2:4]}')

518 Out[3]: '[2, 3]'

519

520 In [4]: f.format('{3*2}')

521 Out[4]: '6'

522 """

523 # copied from Formatter._vformat with minor changes to allow eval

524 # and replace the format_spec code with slicing

525 def vformat(self, format_string:str, args, kwargs)->str:

526 result = []

527 for literal_text, field_name, format_spec, conversion in \

528 self.parse(format_string):

529

530 # output the literal text

531 if literal_text:

532 result.append(literal_text)

533

534 # if there's a field, output it

535 if field_name is not None:

536 # this is some markup, find the object and do

537 # the formatting

538

539 if format_spec:

540 # override format spec, to allow slicing:

541 field_name = ':'.join([field_name, format_spec])

542

543 # eval the contents of the field for the object

544 # to be formatted

545 obj = eval(field_name, kwargs)

546

547 # do any conversion on the resulting object

548 obj = self.convert_field(obj, conversion)

549

550 # format the object and append to the result

551 result.append(self.format_field(obj, ''))

552

553 return ''.join(result)

554

555

556class DollarFormatter(FullEvalFormatter):

557 """Formatter allowing Itpl style $foo replacement, for names and attribute

558 access only. Standard {foo} replacement also works, and allows full

559 evaluation of its arguments.

560

561 Examples

562 --------

563 ::

564

565 In [1]: f = DollarFormatter()

566 In [2]: f.format('{n//4}', n=8)

567 Out[2]: '2'

568

569 In [3]: f.format('23 * 76 is $result', result=23*76)

570 Out[3]: '23 * 76 is 1748'

571

572 In [4]: f.format('$a or {b}', a=1, b=2)

573 Out[4]: '1 or 2'

574 """

575 _dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")

576 def parse(self, fmt_string):

577 for literal_txt, field_name, format_spec, conversion \

578 in Formatter.parse(self, fmt_string):

579

580 # Find $foo patterns in the literal text.

581 continue_from = 0

582 txt = ""

583 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):

584 new_txt, new_field = m.group(1,2)

585 # $$foo --> $foo

586 if new_field.startswith("$"):

587 txt += new_txt + new_field

588 else:

589 yield (txt + new_txt, new_field, "", None)

590 txt = ""

591 continue_from = m.end()

592

593 # Re-yield the {foo} style pattern

594 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)

595

596 def __repr__(self):

597 return "<DollarFormatter>"

598

599#-----------------------------------------------------------------------------

600# Utils to columnize a list of string

601#-----------------------------------------------------------------------------

602

603def _col_chunks(l, max_rows, row_first=False):

604 """Yield successive max_rows-sized column chunks from l."""

605 if row_first:

606 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)

607 for i in range(ncols):

608 yield [l[j] for j in range(i, len(l), ncols)]

609 else:

610 for i in range(0, len(l), max_rows):

611 yield l[i:(i + max_rows)]

612

613

614def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):

615 """Calculate optimal info to columnize a list of string"""

616 for max_rows in range(1, len(rlist) + 1):

617 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))

618 sumlength = sum(col_widths)

619 ncols = len(col_widths)

620 if sumlength + separator_size * (ncols - 1) <= displaywidth:

621 break

622 return {'num_columns': ncols,

623 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,

624 'max_rows': max_rows,

625 'column_widths': col_widths

626 }

627

628

629def _get_or_default(mylist, i, default=None):

630 """return list item number, or default if don't exist"""

631 if i >= len(mylist):

632 return default

633 else :

634 return mylist[i]

635

636

637def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :

638 """Returns a nested list, and info to columnize items

639

640 Parameters

641 ----------

642 items

643 list of strings to columize

644 row_first : (default False)

645 Whether to compute columns for a row-first matrix instead of

646 column-first (default).

647 empty : (default None)

648 default value to fill list if needed

649 separator_size : int (default=2)

650 How much characters will be used as a separation between each columns.

651 displaywidth : int (default=80)

652 The width of the area onto which the columns should enter

653

654 Returns

655 -------

656 strings_matrix

657 nested list of string, the outer most list contains as many list as

658 rows, the innermost lists have each as many element as columns. If the

659 total number of elements in `items` does not equal the product of

660 rows*columns, the last element of some lists are filled with `None`.

661 dict_info

662 some info to make columnize easier:

663

664 num_columns

665 number of columns

666 max_rows

667 maximum number of rows (final number may be less)

668 column_widths

669 list of with of each columns

670 optimal_separator_width

671 best separator width between columns

672

673 Examples

674 --------

675 ::

676

677 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']

678 In [2]: list, info = compute_item_matrix(l, displaywidth=12)

679 In [3]: list

680 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]

681 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}

682 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))

683 Out[5]: True

684 """

685 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)

686 nrow, ncol = info['max_rows'], info['num_columns']

687 if row_first:

688 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)

689 else:

690 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)

691

692

693def columnize(items, row_first=False, separator=" ", displaywidth=80, spread=False):

694 """Transform a list of strings into a single string with columns.

695

696 Parameters

697 ----------

698 items : sequence of strings

699 The strings to process.

700 row_first : (default False)

701 Whether to compute columns for a row-first matrix instead of

702 column-first (default).

703 separator : str, optional [default is two spaces]

704 The string that separates columns.

705 displaywidth : int, optional [default is 80]

706 Width of the display in number of characters.

707

708 Returns

709 -------

710 The formatted string.

711 """

712 if not items:

713 return '\n'

714 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)

715 if spread:

716 separator = separator.ljust(int(info['optimal_separator_width']))

717 fmatrix = [filter(None, x) for x in matrix]

718 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])

719 return '\n'.join(map(sjoin, fmatrix))+'\n'

720

721

722def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):

723 """

724 Return a string with a natural enumeration of items

725

726 >>> get_text_list(['a', 'b', 'c', 'd'])

727 'a, b, c and d'

728 >>> get_text_list(['a', 'b', 'c'], ' or ')

729 'a, b or c'

730 >>> get_text_list(['a', 'b', 'c'], ', ')

731 'a, b, c'

732 >>> get_text_list(['a', 'b'], ' or ')

733 'a or b'

734 >>> get_text_list(['a'])

735 'a'

736 >>> get_text_list([])

737 ''

738 >>> get_text_list(['a', 'b'], wrap_item_with="`")

739 '`a` and `b`'

740 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")

741 'a + b + c = d'

742 """

743 if len(list_) == 0:

744 return ''

745 if wrap_item_with:

746 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for

747 item in list_]

748 if len(list_) == 1:

749 return list_[0]

750 return '%s%s%s' % (

751 sep.join(i for i in list_[:-1]),

752 last_sep, list_[-1])