Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/IPython/utils/text.py: 30%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Utilities for working with strings and text.
4Inheritance diagram:
6.. inheritance-diagram:: IPython.utils.text
7 :parts: 3
8"""
10import os
11import re
12import string
13import sys
14import textwrap
15import warnings
16from string import Formatter
17from pathlib import Path
19from typing import (
20 List,
21 Dict,
22 Tuple,
23 Optional,
24 cast,
25 Any,
26 Union,
27 TypeVar,
28)
29from collections.abc import Sequence, Mapping, Callable, Iterator
31from typing import Self
34class LSString(str):
35 """String derivative with a special access attributes.
37 These are normal strings, but with the special attributes:
39 .l (or .list) : value as list (split on newlines).
40 .n (or .nlstr): original value (the string itself).
41 .s (or .spstr): value as whitespace-separated string.
42 .p (or .paths): list of path objects (requires path.py package)
44 Any values which require transformations are computed only once and
45 cached.
47 Such strings are very useful to efficiently interact with the shell, which
48 typically only understands whitespace-separated options for commands."""
50 __list: List[str]
51 __spstr: str
52 __paths: List[Path]
54 def get_list(self) -> List[str]:
55 try:
56 return self.__list
57 except AttributeError:
58 self.__list = self.split('\n')
59 return self.__list
61 l = list = property(get_list)
63 def get_spstr(self) -> str:
64 try:
65 return self.__spstr
66 except AttributeError:
67 self.__spstr = self.replace('\n',' ')
68 return self.__spstr
70 s = spstr = property(get_spstr)
72 def get_nlstr(self) -> Self:
73 return self
75 n = nlstr = property(get_nlstr)
77 def get_paths(self) -> List[Path]:
78 try:
79 return self.__paths
80 except AttributeError:
81 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
82 return self.__paths
84 p = paths = property(get_paths)
86# FIXME: We need to reimplement type specific displayhook and then add this
87# back as a custom printer. This should also be moved outside utils into the
88# core.
90# def print_lsstring(arg):
91# """ Prettier (non-repr-like) and more informative printer for LSString """
92# print("LSString (.p, .n, .l, .s available). Value:")
93# print(arg)
94#
95#
96# print_lsstring = result_display.register(LSString)(print_lsstring)
99class SList(list[Any]):
100 """List derivative with a special access attributes.
102 These are normal lists, but with the special attributes:
104 * .l (or .list) : value as list (the list itself).
105 * .n (or .nlstr): value as a string, joined on newlines.
106 * .s (or .spstr): value as a string, joined on spaces.
107 * .p (or .paths): list of path objects (requires path.py package)
109 Any values which require transformations are computed only once and
110 cached."""
112 __spstr: str
113 __nlstr: str
114 __paths: List[Path]
116 def get_list(self) -> Self:
117 return self
119 l = list = property(get_list)
121 def get_spstr(self) -> str:
122 try:
123 return self.__spstr
124 except AttributeError:
125 self.__spstr = ' '.join(self)
126 return self.__spstr
128 s = spstr = property(get_spstr)
130 def get_nlstr(self) -> str:
131 try:
132 return self.__nlstr
133 except AttributeError:
134 self.__nlstr = '\n'.join(self)
135 return self.__nlstr
137 n = nlstr = property(get_nlstr)
139 def get_paths(self) -> List[Path]:
140 try:
141 return self.__paths
142 except AttributeError:
143 self.__paths = [Path(p) for p in self if os.path.exists(p)]
144 return self.__paths
146 p = paths = property(get_paths)
148 def grep(
149 self,
150 pattern: Union[str, Callable[[Any], re.Match[str] | None]],
151 prune: bool = False,
152 field: Optional[int] = None,
153 ) -> Self:
154 """Return all strings matching 'pattern' (a regex or callable)
156 This is case-insensitive. If prune is true, return all items
157 NOT matching the pattern.
159 If field is specified, the match must occur in the specified
160 whitespace-separated field.
162 Examples::
164 a.grep( lambda x: x.startswith('C') )
165 a.grep('Cha.*log', prune=1)
166 a.grep('chm', field=-1)
167 """
169 def match_target(s: str) -> str:
170 if field is None:
171 return s
172 parts = s.split()
173 try:
174 tgt = parts[field]
175 return tgt
176 except IndexError:
177 return ""
179 if isinstance(pattern, str):
180 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
181 else:
182 pred = pattern
183 if not prune:
184 return type(self)([el for el in self if pred(match_target(el))]) # type: ignore [no-untyped-call]
185 else:
186 return type(self)([el for el in self if not pred(match_target(el))]) # type: ignore [no-untyped-call]
188 def fields(self, *fields: List[str]) -> List[List[str]]:
189 """Collect whitespace-separated fields from string list
191 Allows quick awk-like usage of string lists.
193 Example data (in var a, created by 'a = !ls -l')::
195 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
196 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
198 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
199 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
200 (note the joining by space).
201 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
203 IndexErrors are ignored.
205 Without args, fields() just split()'s the strings.
206 """
207 if len(fields) == 0:
208 return [el.split() for el in self]
210 res = SList()
211 for el in [f.split() for f in self]:
212 lineparts = []
214 for fd in fields:
215 try:
216 lineparts.append(el[fd])
217 except IndexError:
218 pass
219 if lineparts:
220 res.append(" ".join(lineparts))
222 return res
224 def sort( # type:ignore[override]
225 self,
226 field: Optional[List[str]] = None,
227 nums: bool = False,
228 ) -> Self:
229 """sort by specified fields (see fields())
231 Example::
233 a.sort(1, nums = True)
235 Sorts a by second field, in numerical order (so that 21 > 3)
237 """
239 #decorate, sort, undecorate
240 if field is not None:
241 dsu = [[SList([line]).fields(field), line] for line in self]
242 else:
243 dsu = [[line, line] for line in self]
244 if nums:
245 for i in range(len(dsu)):
246 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
247 try:
248 n = int(numstr)
249 except ValueError:
250 n = 0
251 dsu[i][0] = n
254 dsu.sort()
255 return type(self)([t[1] for t in dsu])
258def indent(instr: str, nspaces: int = 4, ntabs: int = 0, flatten: bool = False) -> str:
259 """Indent a string a given number of spaces or tabstops.
261 indent(str, nspaces=4, ntabs=0) -> indent str by ntabs+nspaces.
263 Parameters
264 ----------
265 instr : basestring
266 The string to be indented.
267 nspaces : int (default: 4)
268 The number of spaces to be indented.
269 ntabs : int (default: 0)
270 The number of tabs to be indented.
271 flatten : bool (default: False)
272 Whether to scrub existing indentation. If True, all lines will be
273 aligned to the same indentation. If False, existing indentation will
274 be strictly increased.
276 Returns
277 -------
278 str : string indented by ntabs and nspaces.
280 """
281 ind = "\t" * ntabs + " " * nspaces
282 if flatten:
283 pat = re.compile(r'^\s*', re.MULTILINE)
284 else:
285 pat = re.compile(r'^', re.MULTILINE)
286 outstr = re.sub(pat, ind, instr)
287 if outstr.endswith(os.linesep+ind):
288 return outstr[:-len(ind)]
289 else:
290 return outstr
293def list_strings(arg: Union[str, List[str]]) -> List[str]:
294 """Always return a list of strings, given a string or list of strings
295 as input.
297 Examples
298 --------
299 ::
301 In [7]: list_strings('A single string')
302 Out[7]: ['A single string']
304 In [8]: list_strings(['A single string in a list'])
305 Out[8]: ['A single string in a list']
307 In [9]: list_strings(['A','list','of','strings'])
308 Out[9]: ['A', 'list', 'of', 'strings']
309 """
311 if isinstance(arg, str):
312 return [arg]
313 else:
314 return arg
317def marquee(txt: str = "", width: int = 78, mark: str = "*") -> str:
318 """Return the input string centered in a 'marquee'.
320 Examples
321 --------
322 ::
324 In [16]: marquee('A test',40)
325 Out[16]: '**************** A test ****************'
327 In [17]: marquee('A test',40,'-')
328 Out[17]: '---------------- A test ----------------'
330 In [18]: marquee('A test',40,' ')
331 Out[18]: ' A test '
333 """
334 if not txt:
335 return (mark*width)[:width]
336 nmark = (width-len(txt)-2)//len(mark)//2
337 if nmark < 0: nmark =0
338 marks = mark*nmark
339 return '%s %s %s' % (marks,txt,marks)
342def format_screen(strng: str) -> str:
343 """Format a string for screen printing.
345 This removes some latex-type format codes."""
346 # Paragraph continue
347 par_re = re.compile(r'\\$',re.MULTILINE)
348 strng = par_re.sub('',strng)
349 return strng
352def dedent(text: str) -> str:
353 """Equivalent of textwrap.dedent that ignores unindented first line.
355 This means it will still dedent strings like:
356 '''foo
357 is a bar
358 '''
360 For use in wrap_paragraphs.
361 """
363 if text.startswith('\n'):
364 # text starts with blank line, don't ignore the first line
365 return textwrap.dedent(text)
367 # split first line
368 splits = text.split('\n',1)
369 if len(splits) == 1:
370 # only one line
371 return textwrap.dedent(text)
373 first, rest = splits
374 # dedent everything but the first line
375 rest = textwrap.dedent(rest)
376 return '\n'.join([first, rest])
379def strip_email_quotes(text: str) -> str:
380 """Strip leading email quotation characters ('>').
382 Removes any combination of leading '>' interspersed with whitespace that
383 appears *identically* in all lines of the input text.
385 Parameters
386 ----------
387 text : str
389 Examples
390 --------
392 Simple uses::
394 In [2]: strip_email_quotes('> > text')
395 Out[2]: 'text'
397 In [3]: strip_email_quotes('> > text\\n> > more')
398 Out[3]: 'text\\nmore'
400 Note how only the common prefix that appears in all lines is stripped::
402 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
403 Out[4]: '> text\\n> more\\nmore...'
405 So if any line has no quote marks ('>'), then none are stripped from any
406 of them ::
408 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
409 Out[5]: '> > text\\n> > more\\nlast different'
410 """
411 lines = text.splitlines()
412 strip_len = 0
414 for characters in zip(*lines):
415 # Check if all characters in this position are the same
416 if len(set(characters)) > 1:
417 break
418 prefix_char = characters[0]
420 if prefix_char in string.whitespace or prefix_char == ">":
421 strip_len += 1
422 else:
423 break
425 text = "\n".join([ln[strip_len:] for ln in lines])
426 return text
429class EvalFormatter(Formatter):
430 """A String Formatter that allows evaluation of simple expressions.
432 Note that this version interprets a `:` as specifying a format string (as per
433 standard string formatting), so if slicing is required, you must explicitly
434 create a slice.
436 Note that on Python 3.14+ this version interprets `[]` as indexing operator
437 so you need to use generators instead of list comprehensions, for example:
438 `list(i for i in range(10))`.
440 This is to be used in templating cases, such as the parallel batch
441 script templates, where simple arithmetic on arguments is useful.
443 Examples
444 --------
445 ::
447 In [1]: f = EvalFormatter()
448 In [2]: f.format('{n//4}', n=8)
449 Out[2]: '2'
451 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
452 Out[3]: 'll'
453 """
455 def get_field(self, name: str, args: Any, kwargs: Any) -> Tuple[Any, str]:
456 v = eval(name, kwargs, kwargs)
457 return v, name
459#XXX: As of Python 3.4, the format string parsing no longer splits on a colon
460# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
461# above, it should be possible to remove FullEvalFormatter.
463class FullEvalFormatter(Formatter):
464 """A String Formatter that allows evaluation of simple expressions.
466 Any time a format key is not found in the kwargs,
467 it will be tried as an expression in the kwargs namespace.
469 Note that this version allows slicing using [1:2], so you cannot specify
470 a format string. Use :class:`EvalFormatter` to permit format strings.
472 Examples
473 --------
474 ::
476 In [1]: f = FullEvalFormatter()
477 In [2]: f.format('{n//4}', n=8)
478 Out[2]: '2'
480 In [3]: f.format('{list(range(5))[2:4]}')
481 Out[3]: '[2, 3]'
483 In [4]: f.format('{3*2}')
484 Out[4]: '6'
485 """
486 # copied from Formatter._vformat with minor changes to allow eval
487 # and replace the format_spec code with slicing
488 def vformat(
489 self, format_string: str, args: Sequence[Any], kwargs: Mapping[str, Any]
490 ) -> str:
491 result = []
492 conversion: Optional[str]
493 for literal_text, field_name, format_spec, conversion in self.parse(
494 format_string
495 ):
496 # output the literal text
497 if literal_text:
498 result.append(literal_text)
500 # if there's a field, output it
501 if field_name is not None:
502 # this is some markup, find the object and do
503 # the formatting
505 if format_spec:
506 # override format spec, to allow slicing:
507 field_name = ':'.join([field_name, format_spec])
509 # eval the contents of the field for the object
510 # to be formatted
511 obj = eval(field_name, dict(kwargs))
513 # do any conversion on the resulting object
514 # type issue in typeshed, fined in https://github.com/python/typeshed/pull/11377
515 obj = self.convert_field(obj, conversion)
517 # format the object and append to the result
518 result.append(self.format_field(obj, ''))
520 return ''.join(result)
523class DollarFormatter(FullEvalFormatter):
524 """Formatter allowing Itpl style $foo replacement, for names and attribute
525 access only. Standard {foo} replacement also works, and allows full
526 evaluation of its arguments.
528 Examples
529 --------
530 ::
532 In [1]: f = DollarFormatter()
533 In [2]: f.format('{n//4}', n=8)
534 Out[2]: '2'
536 In [3]: f.format('23 * 76 is $result', result=23*76)
537 Out[3]: '23 * 76 is 1748'
539 In [4]: f.format('$a or {b}', a=1, b=2)
540 Out[4]: '1 or 2'
541 """
543 _dollar_pattern_ignore_single_quote = re.compile(
544 r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)"
545 )
547 def parse(self, fmt_string: str) -> Iterator[Tuple[Any, Any, Any, Any]]:
548 for literal_txt, field_name, format_spec, conversion in Formatter.parse(
549 self, fmt_string
550 ):
551 # Find $foo patterns in the literal text.
552 continue_from = 0
553 txt = ""
554 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
555 new_txt, new_field = m.group(1,2)
556 # $$foo --> $foo
557 if new_field.startswith("$"):
558 txt += new_txt + new_field
559 else:
560 yield (txt + new_txt, new_field, "", None)
561 txt = ""
562 continue_from = m.end()
564 # Re-yield the {foo} style pattern
565 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
567 def __repr__(self) -> str:
568 return "<DollarFormatter>"
570#-----------------------------------------------------------------------------
571# Utils to columnize a list of string
572#-----------------------------------------------------------------------------
575def _col_chunks(
576 l: List[int], max_rows: int, row_first: bool = False
577) -> Iterator[List[int]]:
578 """Yield successive max_rows-sized column chunks from l."""
579 if row_first:
580 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
581 for i in range(ncols):
582 yield [l[j] for j in range(i, len(l), ncols)]
583 else:
584 for i in range(0, len(l), max_rows):
585 yield l[i:(i + max_rows)]
588def _find_optimal(
589 rlist: List[int], row_first: bool, separator_size: int, displaywidth: int
590) -> Dict[str, Any]:
591 """Calculate optimal info to columnize a list of string"""
592 for max_rows in range(1, len(rlist) + 1):
593 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
594 sumlength = sum(col_widths)
595 ncols = len(col_widths)
596 if sumlength + separator_size * (ncols - 1) <= displaywidth:
597 break
598 return {'num_columns': ncols,
599 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
600 'max_rows': max_rows,
601 'column_widths': col_widths
602 }
605T = TypeVar("T")
608def _get_or_default(mylist: List[T], i: int, default: T) -> T:
609 """return list item number, or default if don't exist"""
610 if i >= len(mylist):
611 return default
612 else :
613 return mylist[i]
616def get_text_list(
617 list_: List[str], last_sep: str = " and ", sep: str = ", ", wrap_item_with: str = ""
618) -> str:
619 """
620 Return a string with a natural enumeration of items
622 >>> get_text_list(['a', 'b', 'c', 'd'])
623 'a, b, c and d'
624 >>> get_text_list(['a', 'b', 'c'], ' or ')
625 'a, b or c'
626 >>> get_text_list(['a', 'b', 'c'], ', ')
627 'a, b, c'
628 >>> get_text_list(['a', 'b'], ' or ')
629 'a or b'
630 >>> get_text_list(['a'])
631 'a'
632 >>> get_text_list([])
633 ''
634 >>> get_text_list(['a', 'b'], wrap_item_with="`")
635 '`a` and `b`'
636 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
637 'a + b + c = d'
638 """
639 if len(list_) == 0:
640 return ''
641 if wrap_item_with:
642 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
643 item in list_]
644 if len(list_) == 1:
645 return list_[0]
646 return '%s%s%s' % (
647 sep.join(i for i in list_[:-1]),
648 last_sep, list_[-1])