Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/IPython/utils/text.py: 30%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Utilities for working with strings and text.
4Inheritance diagram:
6.. inheritance-diagram:: IPython.utils.text
7 :parts: 3
8"""
10import os
11import re
12import string
13import sys
14import textwrap
15import warnings
16from string import Formatter
17from pathlib import Path
19from typing import (
20 List,
21 Dict,
22 Tuple,
23 Optional,
24 cast,
25 Any,
26 Union,
27 TypeVar,
28)
29from collections.abc import Sequence, Mapping, Callable, Iterator
31if sys.version_info < (3, 12):
32 from typing import Self
33else:
34 from typing import Self
37class LSString(str):
38 """String derivative with a special access attributes.
40 These are normal strings, but with the special attributes:
42 .l (or .list) : value as list (split on newlines).
43 .n (or .nlstr): original value (the string itself).
44 .s (or .spstr): value as whitespace-separated string.
45 .p (or .paths): list of path objects (requires path.py package)
47 Any values which require transformations are computed only once and
48 cached.
50 Such strings are very useful to efficiently interact with the shell, which
51 typically only understands whitespace-separated options for commands."""
53 __list: List[str]
54 __spstr: str
55 __paths: List[Path]
57 def get_list(self) -> List[str]:
58 try:
59 return self.__list
60 except AttributeError:
61 self.__list = self.split('\n')
62 return self.__list
64 l = list = property(get_list)
66 def get_spstr(self) -> str:
67 try:
68 return self.__spstr
69 except AttributeError:
70 self.__spstr = self.replace('\n',' ')
71 return self.__spstr
73 s = spstr = property(get_spstr)
75 def get_nlstr(self) -> Self:
76 return self
78 n = nlstr = property(get_nlstr)
80 def get_paths(self) -> List[Path]:
81 try:
82 return self.__paths
83 except AttributeError:
84 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
85 return self.__paths
87 p = paths = property(get_paths)
89# FIXME: We need to reimplement type specific displayhook and then add this
90# back as a custom printer. This should also be moved outside utils into the
91# core.
93# def print_lsstring(arg):
94# """ Prettier (non-repr-like) and more informative printer for LSString """
95# print("LSString (.p, .n, .l, .s available). Value:")
96# print(arg)
97#
98#
99# print_lsstring = result_display.register(LSString)(print_lsstring)
102class SList(list[Any]):
103 """List derivative with a special access attributes.
105 These are normal lists, but with the special attributes:
107 * .l (or .list) : value as list (the list itself).
108 * .n (or .nlstr): value as a string, joined on newlines.
109 * .s (or .spstr): value as a string, joined on spaces.
110 * .p (or .paths): list of path objects (requires path.py package)
112 Any values which require transformations are computed only once and
113 cached."""
115 __spstr: str
116 __nlstr: str
117 __paths: List[Path]
119 def get_list(self) -> Self:
120 return self
122 l = list = property(get_list)
124 def get_spstr(self) -> str:
125 try:
126 return self.__spstr
127 except AttributeError:
128 self.__spstr = ' '.join(self)
129 return self.__spstr
131 s = spstr = property(get_spstr)
133 def get_nlstr(self) -> str:
134 try:
135 return self.__nlstr
136 except AttributeError:
137 self.__nlstr = '\n'.join(self)
138 return self.__nlstr
140 n = nlstr = property(get_nlstr)
142 def get_paths(self) -> List[Path]:
143 try:
144 return self.__paths
145 except AttributeError:
146 self.__paths = [Path(p) for p in self if os.path.exists(p)]
147 return self.__paths
149 p = paths = property(get_paths)
151 def grep(
152 self,
153 pattern: Union[str, Callable[[Any], re.Match[str] | None]],
154 prune: bool = False,
155 field: Optional[int] = None,
156 ) -> Self:
157 """Return all strings matching 'pattern' (a regex or callable)
159 This is case-insensitive. If prune is true, return all items
160 NOT matching the pattern.
162 If field is specified, the match must occur in the specified
163 whitespace-separated field.
165 Examples::
167 a.grep( lambda x: x.startswith('C') )
168 a.grep('Cha.*log', prune=1)
169 a.grep('chm', field=-1)
170 """
172 def match_target(s: str) -> str:
173 if field is None:
174 return s
175 parts = s.split()
176 try:
177 tgt = parts[field]
178 return tgt
179 except IndexError:
180 return ""
182 if isinstance(pattern, str):
183 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
184 else:
185 pred = pattern
186 if not prune:
187 return type(self)([el for el in self if pred(match_target(el))]) # type: ignore [no-untyped-call]
188 else:
189 return type(self)([el for el in self if not pred(match_target(el))]) # type: ignore [no-untyped-call]
191 def fields(self, *fields: List[str]) -> List[List[str]]:
192 """Collect whitespace-separated fields from string list
194 Allows quick awk-like usage of string lists.
196 Example data (in var a, created by 'a = !ls -l')::
198 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
199 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
201 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
202 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
203 (note the joining by space).
204 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
206 IndexErrors are ignored.
208 Without args, fields() just split()'s the strings.
209 """
210 if len(fields) == 0:
211 return [el.split() for el in self]
213 res = SList()
214 for el in [f.split() for f in self]:
215 lineparts = []
217 for fd in fields:
218 try:
219 lineparts.append(el[fd])
220 except IndexError:
221 pass
222 if lineparts:
223 res.append(" ".join(lineparts))
225 return res
227 def sort( # type:ignore[override]
228 self,
229 field: Optional[List[str]] = None,
230 nums: bool = False,
231 ) -> Self:
232 """sort by specified fields (see fields())
234 Example::
236 a.sort(1, nums = True)
238 Sorts a by second field, in numerical order (so that 21 > 3)
240 """
242 #decorate, sort, undecorate
243 if field is not None:
244 dsu = [[SList([line]).fields(field), line] for line in self]
245 else:
246 dsu = [[line, line] for line in self]
247 if nums:
248 for i in range(len(dsu)):
249 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
250 try:
251 n = int(numstr)
252 except ValueError:
253 n = 0
254 dsu[i][0] = n
257 dsu.sort()
258 return type(self)([t[1] for t in dsu])
261def indent(instr: str, nspaces: int = 4, ntabs: int = 0, flatten: bool = False) -> str:
262 """Indent a string a given number of spaces or tabstops.
264 indent(str, nspaces=4, ntabs=0) -> indent str by ntabs+nspaces.
266 Parameters
267 ----------
268 instr : basestring
269 The string to be indented.
270 nspaces : int (default: 4)
271 The number of spaces to be indented.
272 ntabs : int (default: 0)
273 The number of tabs to be indented.
274 flatten : bool (default: False)
275 Whether to scrub existing indentation. If True, all lines will be
276 aligned to the same indentation. If False, existing indentation will
277 be strictly increased.
279 Returns
280 -------
281 str : string indented by ntabs and nspaces.
283 """
284 ind = "\t" * ntabs + " " * nspaces
285 if flatten:
286 pat = re.compile(r'^\s*', re.MULTILINE)
287 else:
288 pat = re.compile(r'^', re.MULTILINE)
289 outstr = re.sub(pat, ind, instr)
290 if outstr.endswith(os.linesep+ind):
291 return outstr[:-len(ind)]
292 else:
293 return outstr
296def list_strings(arg: Union[str, List[str]]) -> List[str]:
297 """Always return a list of strings, given a string or list of strings
298 as input.
300 Examples
301 --------
302 ::
304 In [7]: list_strings('A single string')
305 Out[7]: ['A single string']
307 In [8]: list_strings(['A single string in a list'])
308 Out[8]: ['A single string in a list']
310 In [9]: list_strings(['A','list','of','strings'])
311 Out[9]: ['A', 'list', 'of', 'strings']
312 """
314 if isinstance(arg, str):
315 return [arg]
316 else:
317 return arg
320def marquee(txt: str = "", width: int = 78, mark: str = "*") -> str:
321 """Return the input string centered in a 'marquee'.
323 Examples
324 --------
325 ::
327 In [16]: marquee('A test',40)
328 Out[16]: '**************** A test ****************'
330 In [17]: marquee('A test',40,'-')
331 Out[17]: '---------------- A test ----------------'
333 In [18]: marquee('A test',40,' ')
334 Out[18]: ' A test '
336 """
337 if not txt:
338 return (mark*width)[:width]
339 nmark = (width-len(txt)-2)//len(mark)//2
340 if nmark < 0: nmark =0
341 marks = mark*nmark
342 return '%s %s %s' % (marks,txt,marks)
345def format_screen(strng: str) -> str:
346 """Format a string for screen printing.
348 This removes some latex-type format codes."""
349 # Paragraph continue
350 par_re = re.compile(r'\\$',re.MULTILINE)
351 strng = par_re.sub('',strng)
352 return strng
355def dedent(text: str) -> str:
356 """Equivalent of textwrap.dedent that ignores unindented first line.
358 This means it will still dedent strings like:
359 '''foo
360 is a bar
361 '''
363 For use in wrap_paragraphs.
364 """
366 if text.startswith('\n'):
367 # text starts with blank line, don't ignore the first line
368 return textwrap.dedent(text)
370 # split first line
371 splits = text.split('\n',1)
372 if len(splits) == 1:
373 # only one line
374 return textwrap.dedent(text)
376 first, rest = splits
377 # dedent everything but the first line
378 rest = textwrap.dedent(rest)
379 return '\n'.join([first, rest])
382def strip_email_quotes(text: str) -> str:
383 """Strip leading email quotation characters ('>').
385 Removes any combination of leading '>' interspersed with whitespace that
386 appears *identically* in all lines of the input text.
388 Parameters
389 ----------
390 text : str
392 Examples
393 --------
395 Simple uses::
397 In [2]: strip_email_quotes('> > text')
398 Out[2]: 'text'
400 In [3]: strip_email_quotes('> > text\\n> > more')
401 Out[3]: 'text\\nmore'
403 Note how only the common prefix that appears in all lines is stripped::
405 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
406 Out[4]: '> text\\n> more\\nmore...'
408 So if any line has no quote marks ('>'), then none are stripped from any
409 of them ::
411 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
412 Out[5]: '> > text\\n> > more\\nlast different'
413 """
414 lines = text.splitlines()
415 strip_len = 0
417 for characters in zip(*lines):
418 # Check if all characters in this position are the same
419 if len(set(characters)) > 1:
420 break
421 prefix_char = characters[0]
423 if prefix_char in string.whitespace or prefix_char == ">":
424 strip_len += 1
425 else:
426 break
428 text = "\n".join([ln[strip_len:] for ln in lines])
429 return text
432class EvalFormatter(Formatter):
433 """A String Formatter that allows evaluation of simple expressions.
435 Note that this version interprets a `:` as specifying a format string (as per
436 standard string formatting), so if slicing is required, you must explicitly
437 create a slice.
439 Note that on Python 3.14+ this version interprets `[]` as indexing operator
440 so you need to use generators instead of list comprehensions, for example:
441 `list(i for i in range(10))`.
443 This is to be used in templating cases, such as the parallel batch
444 script templates, where simple arithmetic on arguments is useful.
446 Examples
447 --------
448 ::
450 In [1]: f = EvalFormatter()
451 In [2]: f.format('{n//4}', n=8)
452 Out[2]: '2'
454 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
455 Out[3]: 'll'
456 """
458 def get_field(self, name: str, args: Any, kwargs: Any) -> Tuple[Any, str]:
459 v = eval(name, kwargs, kwargs)
460 return v, name
462#XXX: As of Python 3.4, the format string parsing no longer splits on a colon
463# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
464# above, it should be possible to remove FullEvalFormatter.
466class FullEvalFormatter(Formatter):
467 """A String Formatter that allows evaluation of simple expressions.
469 Any time a format key is not found in the kwargs,
470 it will be tried as an expression in the kwargs namespace.
472 Note that this version allows slicing using [1:2], so you cannot specify
473 a format string. Use :class:`EvalFormatter` to permit format strings.
475 Examples
476 --------
477 ::
479 In [1]: f = FullEvalFormatter()
480 In [2]: f.format('{n//4}', n=8)
481 Out[2]: '2'
483 In [3]: f.format('{list(range(5))[2:4]}')
484 Out[3]: '[2, 3]'
486 In [4]: f.format('{3*2}')
487 Out[4]: '6'
488 """
489 # copied from Formatter._vformat with minor changes to allow eval
490 # and replace the format_spec code with slicing
491 def vformat(
492 self, format_string: str, args: Sequence[Any], kwargs: Mapping[str, Any]
493 ) -> str:
494 result = []
495 conversion: Optional[str]
496 for literal_text, field_name, format_spec, conversion in self.parse(
497 format_string
498 ):
499 # output the literal text
500 if literal_text:
501 result.append(literal_text)
503 # if there's a field, output it
504 if field_name is not None:
505 # this is some markup, find the object and do
506 # the formatting
508 if format_spec:
509 # override format spec, to allow slicing:
510 field_name = ':'.join([field_name, format_spec])
512 # eval the contents of the field for the object
513 # to be formatted
514 obj = eval(field_name, dict(kwargs))
516 # do any conversion on the resulting object
517 # type issue in typeshed, fined in https://github.com/python/typeshed/pull/11377
518 obj = self.convert_field(obj, conversion)
520 # format the object and append to the result
521 result.append(self.format_field(obj, ''))
523 return ''.join(result)
526class DollarFormatter(FullEvalFormatter):
527 """Formatter allowing Itpl style $foo replacement, for names and attribute
528 access only. Standard {foo} replacement also works, and allows full
529 evaluation of its arguments.
531 Examples
532 --------
533 ::
535 In [1]: f = DollarFormatter()
536 In [2]: f.format('{n//4}', n=8)
537 Out[2]: '2'
539 In [3]: f.format('23 * 76 is $result', result=23*76)
540 Out[3]: '23 * 76 is 1748'
542 In [4]: f.format('$a or {b}', a=1, b=2)
543 Out[4]: '1 or 2'
544 """
546 _dollar_pattern_ignore_single_quote = re.compile(
547 r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)"
548 )
550 def parse(self, fmt_string: str) -> Iterator[Tuple[Any, Any, Any, Any]]:
551 for literal_txt, field_name, format_spec, conversion in Formatter.parse(
552 self, fmt_string
553 ):
554 # Find $foo patterns in the literal text.
555 continue_from = 0
556 txt = ""
557 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
558 new_txt, new_field = m.group(1,2)
559 # $$foo --> $foo
560 if new_field.startswith("$"):
561 txt += new_txt + new_field
562 else:
563 yield (txt + new_txt, new_field, "", None)
564 txt = ""
565 continue_from = m.end()
567 # Re-yield the {foo} style pattern
568 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
570 def __repr__(self) -> str:
571 return "<DollarFormatter>"
573#-----------------------------------------------------------------------------
574# Utils to columnize a list of string
575#-----------------------------------------------------------------------------
578def _col_chunks(
579 l: List[int], max_rows: int, row_first: bool = False
580) -> Iterator[List[int]]:
581 """Yield successive max_rows-sized column chunks from l."""
582 if row_first:
583 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
584 for i in range(ncols):
585 yield [l[j] for j in range(i, len(l), ncols)]
586 else:
587 for i in range(0, len(l), max_rows):
588 yield l[i:(i + max_rows)]
591def _find_optimal(
592 rlist: List[int], row_first: bool, separator_size: int, displaywidth: int
593) -> Dict[str, Any]:
594 """Calculate optimal info to columnize a list of string"""
595 for max_rows in range(1, len(rlist) + 1):
596 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
597 sumlength = sum(col_widths)
598 ncols = len(col_widths)
599 if sumlength + separator_size * (ncols - 1) <= displaywidth:
600 break
601 return {'num_columns': ncols,
602 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
603 'max_rows': max_rows,
604 'column_widths': col_widths
605 }
608T = TypeVar("T")
611def _get_or_default(mylist: List[T], i: int, default: T) -> T:
612 """return list item number, or default if don't exist"""
613 if i >= len(mylist):
614 return default
615 else :
616 return mylist[i]
619def get_text_list(
620 list_: List[str], last_sep: str = " and ", sep: str = ", ", wrap_item_with: str = ""
621) -> str:
622 """
623 Return a string with a natural enumeration of items
625 >>> get_text_list(['a', 'b', 'c', 'd'])
626 'a, b, c and d'
627 >>> get_text_list(['a', 'b', 'c'], ' or ')
628 'a, b or c'
629 >>> get_text_list(['a', 'b', 'c'], ', ')
630 'a, b, c'
631 >>> get_text_list(['a', 'b'], ' or ')
632 'a or b'
633 >>> get_text_list(['a'])
634 'a'
635 >>> get_text_list([])
636 ''
637 >>> get_text_list(['a', 'b'], wrap_item_with="`")
638 '`a` and `b`'
639 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
640 'a + b + c = d'
641 """
642 if len(list_) == 0:
643 return ''
644 if wrap_item_with:
645 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
646 item in list_]
647 if len(list_) == 1:
648 return list_[0]
649 return '%s%s%s' % (
650 sep.join(i for i in list_[:-1]),
651 last_sep, list_[-1])