Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/IPython/utils/text.py: 26%
246 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
1# encoding: utf-8
2"""
3Utilities for working with strings and text.
5Inheritance diagram:
7.. inheritance-diagram:: IPython.utils.text
8 :parts: 3
9"""
11import os
12import re
13import string
14import sys
15import textwrap
16from string import Formatter
17from pathlib import Path
20# datetime.strftime date format for ipython
21if sys.platform == 'win32':
22 date_format = "%B %d, %Y"
23else:
24 date_format = "%B %-d, %Y"
26class LSString(str):
27 """String derivative with a special access attributes.
29 These are normal strings, but with the special attributes:
31 .l (or .list) : value as list (split on newlines).
32 .n (or .nlstr): original value (the string itself).
33 .s (or .spstr): value as whitespace-separated string.
34 .p (or .paths): list of path objects (requires path.py package)
36 Any values which require transformations are computed only once and
37 cached.
39 Such strings are very useful to efficiently interact with the shell, which
40 typically only understands whitespace-separated options for commands."""
42 def get_list(self):
43 try:
44 return self.__list
45 except AttributeError:
46 self.__list = self.split('\n')
47 return self.__list
49 l = list = property(get_list)
51 def get_spstr(self):
52 try:
53 return self.__spstr
54 except AttributeError:
55 self.__spstr = self.replace('\n',' ')
56 return self.__spstr
58 s = spstr = property(get_spstr)
60 def get_nlstr(self):
61 return self
63 n = nlstr = property(get_nlstr)
65 def get_paths(self):
66 try:
67 return self.__paths
68 except AttributeError:
69 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
70 return self.__paths
72 p = paths = property(get_paths)
74# FIXME: We need to reimplement type specific displayhook and then add this
75# back as a custom printer. This should also be moved outside utils into the
76# core.
78# def print_lsstring(arg):
79# """ Prettier (non-repr-like) and more informative printer for LSString """
80# print "LSString (.p, .n, .l, .s available). Value:"
81# print arg
82#
83#
84# print_lsstring = result_display.register(LSString)(print_lsstring)
87class SList(list):
88 """List derivative with a special access attributes.
90 These are normal lists, but with the special attributes:
92 * .l (or .list) : value as list (the list itself).
93 * .n (or .nlstr): value as a string, joined on newlines.
94 * .s (or .spstr): value as a string, joined on spaces.
95 * .p (or .paths): list of path objects (requires path.py package)
97 Any values which require transformations are computed only once and
98 cached."""
100 def get_list(self):
101 return self
103 l = list = property(get_list)
105 def get_spstr(self):
106 try:
107 return self.__spstr
108 except AttributeError:
109 self.__spstr = ' '.join(self)
110 return self.__spstr
112 s = spstr = property(get_spstr)
114 def get_nlstr(self):
115 try:
116 return self.__nlstr
117 except AttributeError:
118 self.__nlstr = '\n'.join(self)
119 return self.__nlstr
121 n = nlstr = property(get_nlstr)
123 def get_paths(self):
124 try:
125 return self.__paths
126 except AttributeError:
127 self.__paths = [Path(p) for p in self if os.path.exists(p)]
128 return self.__paths
130 p = paths = property(get_paths)
132 def grep(self, pattern, prune = False, field = None):
133 """ Return all strings matching 'pattern' (a regex or callable)
135 This is case-insensitive. If prune is true, return all items
136 NOT matching the pattern.
138 If field is specified, the match must occur in the specified
139 whitespace-separated field.
141 Examples::
143 a.grep( lambda x: x.startswith('C') )
144 a.grep('Cha.*log', prune=1)
145 a.grep('chm', field=-1)
146 """
148 def match_target(s):
149 if field is None:
150 return s
151 parts = s.split()
152 try:
153 tgt = parts[field]
154 return tgt
155 except IndexError:
156 return ""
158 if isinstance(pattern, str):
159 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
160 else:
161 pred = pattern
162 if not prune:
163 return SList([el for el in self if pred(match_target(el))])
164 else:
165 return SList([el for el in self if not pred(match_target(el))])
167 def fields(self, *fields):
168 """ Collect whitespace-separated fields from string list
170 Allows quick awk-like usage of string lists.
172 Example data (in var a, created by 'a = !ls -l')::
174 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
175 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
177 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
178 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
179 (note the joining by space).
180 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
182 IndexErrors are ignored.
184 Without args, fields() just split()'s the strings.
185 """
186 if len(fields) == 0:
187 return [el.split() for el in self]
189 res = SList()
190 for el in [f.split() for f in self]:
191 lineparts = []
193 for fd in fields:
194 try:
195 lineparts.append(el[fd])
196 except IndexError:
197 pass
198 if lineparts:
199 res.append(" ".join(lineparts))
201 return res
203 def sort(self,field= None, nums = False):
204 """ sort by specified fields (see fields())
206 Example::
208 a.sort(1, nums = True)
210 Sorts a by second field, in numerical order (so that 21 > 3)
212 """
214 #decorate, sort, undecorate
215 if field is not None:
216 dsu = [[SList([line]).fields(field), line] for line in self]
217 else:
218 dsu = [[line, line] for line in self]
219 if nums:
220 for i in range(len(dsu)):
221 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
222 try:
223 n = int(numstr)
224 except ValueError:
225 n = 0
226 dsu[i][0] = n
229 dsu.sort()
230 return SList([t[1] for t in dsu])
233# FIXME: We need to reimplement type specific displayhook and then add this
234# back as a custom printer. This should also be moved outside utils into the
235# core.
237# def print_slist(arg):
238# """ Prettier (non-repr-like) and more informative printer for SList """
239# print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
240# if hasattr(arg, 'hideonce') and arg.hideonce:
241# arg.hideonce = False
242# return
243#
244# nlprint(arg) # This was a nested list printer, now removed.
245#
246# print_slist = result_display.register(SList)(print_slist)
249def indent(instr,nspaces=4, ntabs=0, flatten=False):
250 """Indent a string a given number of spaces or tabstops.
252 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
254 Parameters
255 ----------
256 instr : basestring
257 The string to be indented.
258 nspaces : int (default: 4)
259 The number of spaces to be indented.
260 ntabs : int (default: 0)
261 The number of tabs to be indented.
262 flatten : bool (default: False)
263 Whether to scrub existing indentation. If True, all lines will be
264 aligned to the same indentation. If False, existing indentation will
265 be strictly increased.
267 Returns
268 -------
269 str|unicode : string indented by ntabs and nspaces.
271 """
272 if instr is None:
273 return
274 ind = '\t'*ntabs+' '*nspaces
275 if flatten:
276 pat = re.compile(r'^\s*', re.MULTILINE)
277 else:
278 pat = re.compile(r'^', re.MULTILINE)
279 outstr = re.sub(pat, ind, instr)
280 if outstr.endswith(os.linesep+ind):
281 return outstr[:-len(ind)]
282 else:
283 return outstr
286def list_strings(arg):
287 """Always return a list of strings, given a string or list of strings
288 as input.
290 Examples
291 --------
292 ::
294 In [7]: list_strings('A single string')
295 Out[7]: ['A single string']
297 In [8]: list_strings(['A single string in a list'])
298 Out[8]: ['A single string in a list']
300 In [9]: list_strings(['A','list','of','strings'])
301 Out[9]: ['A', 'list', 'of', 'strings']
302 """
304 if isinstance(arg, str):
305 return [arg]
306 else:
307 return arg
310def marquee(txt='',width=78,mark='*'):
311 """Return the input string centered in a 'marquee'.
313 Examples
314 --------
315 ::
317 In [16]: marquee('A test',40)
318 Out[16]: '**************** A test ****************'
320 In [17]: marquee('A test',40,'-')
321 Out[17]: '---------------- A test ----------------'
323 In [18]: marquee('A test',40,' ')
324 Out[18]: ' A test '
326 """
327 if not txt:
328 return (mark*width)[:width]
329 nmark = (width-len(txt)-2)//len(mark)//2
330 if nmark < 0: nmark =0
331 marks = mark*nmark
332 return '%s %s %s' % (marks,txt,marks)
335ini_spaces_re = re.compile(r'^(\s+)')
337def num_ini_spaces(strng):
338 """Return the number of initial spaces in a string"""
340 ini_spaces = ini_spaces_re.match(strng)
341 if ini_spaces:
342 return ini_spaces.end()
343 else:
344 return 0
347def format_screen(strng):
348 """Format a string for screen printing.
350 This removes some latex-type format codes."""
351 # Paragraph continue
352 par_re = re.compile(r'\\$',re.MULTILINE)
353 strng = par_re.sub('',strng)
354 return strng
357def dedent(text):
358 """Equivalent of textwrap.dedent that ignores unindented first line.
360 This means it will still dedent strings like:
361 '''foo
362 is a bar
363 '''
365 For use in wrap_paragraphs.
366 """
368 if text.startswith('\n'):
369 # text starts with blank line, don't ignore the first line
370 return textwrap.dedent(text)
372 # split first line
373 splits = text.split('\n',1)
374 if len(splits) == 1:
375 # only one line
376 return textwrap.dedent(text)
378 first, rest = splits
379 # dedent everything but the first line
380 rest = textwrap.dedent(rest)
381 return '\n'.join([first, rest])
384def wrap_paragraphs(text, ncols=80):
385 """Wrap multiple paragraphs to fit a specified width.
387 This is equivalent to textwrap.wrap, but with support for multiple
388 paragraphs, as separated by empty lines.
390 Returns
391 -------
392 list of complete paragraphs, wrapped to fill `ncols` columns.
393 """
394 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
395 text = dedent(text).strip()
396 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
397 out_ps = []
398 indent_re = re.compile(r'\n\s+', re.MULTILINE)
399 for p in paragraphs:
400 # presume indentation that survives dedent is meaningful formatting,
401 # so don't fill unless text is flush.
402 if indent_re.search(p) is None:
403 # wrap paragraph
404 p = textwrap.fill(p, ncols)
405 out_ps.append(p)
406 return out_ps
409def strip_email_quotes(text):
410 """Strip leading email quotation characters ('>').
412 Removes any combination of leading '>' interspersed with whitespace that
413 appears *identically* in all lines of the input text.
415 Parameters
416 ----------
417 text : str
419 Examples
420 --------
422 Simple uses::
424 In [2]: strip_email_quotes('> > text')
425 Out[2]: 'text'
427 In [3]: strip_email_quotes('> > text\\n> > more')
428 Out[3]: 'text\\nmore'
430 Note how only the common prefix that appears in all lines is stripped::
432 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
433 Out[4]: '> text\\n> more\\nmore...'
435 So if any line has no quote marks ('>'), then none are stripped from any
436 of them ::
438 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
439 Out[5]: '> > text\\n> > more\\nlast different'
440 """
441 lines = text.splitlines()
442 strip_len = 0
444 for characters in zip(*lines):
445 # Check if all characters in this position are the same
446 if len(set(characters)) > 1:
447 break
448 prefix_char = characters[0]
450 if prefix_char in string.whitespace or prefix_char == ">":
451 strip_len += 1
452 else:
453 break
455 text = "\n".join([ln[strip_len:] for ln in lines])
456 return text
459def strip_ansi(source):
460 """
461 Remove ansi escape codes from text.
463 Parameters
464 ----------
465 source : str
466 Source to remove the ansi from
467 """
468 return re.sub(r'\033\[(\d|;)+?m', '', source)
471class EvalFormatter(Formatter):
472 """A String Formatter that allows evaluation of simple expressions.
474 Note that this version interprets a `:` as specifying a format string (as per
475 standard string formatting), so if slicing is required, you must explicitly
476 create a slice.
478 This is to be used in templating cases, such as the parallel batch
479 script templates, where simple arithmetic on arguments is useful.
481 Examples
482 --------
483 ::
485 In [1]: f = EvalFormatter()
486 In [2]: f.format('{n//4}', n=8)
487 Out[2]: '2'
489 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
490 Out[3]: 'll'
491 """
492 def get_field(self, name, args, kwargs):
493 v = eval(name, kwargs)
494 return v, name
496#XXX: As of Python 3.4, the format string parsing no longer splits on a colon
497# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
498# above, it should be possible to remove FullEvalFormatter.
500class FullEvalFormatter(Formatter):
501 """A String Formatter that allows evaluation of simple expressions.
503 Any time a format key is not found in the kwargs,
504 it will be tried as an expression in the kwargs namespace.
506 Note that this version allows slicing using [1:2], so you cannot specify
507 a format string. Use :class:`EvalFormatter` to permit format strings.
509 Examples
510 --------
511 ::
513 In [1]: f = FullEvalFormatter()
514 In [2]: f.format('{n//4}', n=8)
515 Out[2]: '2'
517 In [3]: f.format('{list(range(5))[2:4]}')
518 Out[3]: '[2, 3]'
520 In [4]: f.format('{3*2}')
521 Out[4]: '6'
522 """
523 # copied from Formatter._vformat with minor changes to allow eval
524 # and replace the format_spec code with slicing
525 def vformat(self, format_string:str, args, kwargs)->str:
526 result = []
527 for literal_text, field_name, format_spec, conversion in \
528 self.parse(format_string):
530 # output the literal text
531 if literal_text:
532 result.append(literal_text)
534 # if there's a field, output it
535 if field_name is not None:
536 # this is some markup, find the object and do
537 # the formatting
539 if format_spec:
540 # override format spec, to allow slicing:
541 field_name = ':'.join([field_name, format_spec])
543 # eval the contents of the field for the object
544 # to be formatted
545 obj = eval(field_name, kwargs)
547 # do any conversion on the resulting object
548 obj = self.convert_field(obj, conversion)
550 # format the object and append to the result
551 result.append(self.format_field(obj, ''))
553 return ''.join(result)
556class DollarFormatter(FullEvalFormatter):
557 """Formatter allowing Itpl style $foo replacement, for names and attribute
558 access only. Standard {foo} replacement also works, and allows full
559 evaluation of its arguments.
561 Examples
562 --------
563 ::
565 In [1]: f = DollarFormatter()
566 In [2]: f.format('{n//4}', n=8)
567 Out[2]: '2'
569 In [3]: f.format('23 * 76 is $result', result=23*76)
570 Out[3]: '23 * 76 is 1748'
572 In [4]: f.format('$a or {b}', a=1, b=2)
573 Out[4]: '1 or 2'
574 """
575 _dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")
576 def parse(self, fmt_string):
577 for literal_txt, field_name, format_spec, conversion \
578 in Formatter.parse(self, fmt_string):
580 # Find $foo patterns in the literal text.
581 continue_from = 0
582 txt = ""
583 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
584 new_txt, new_field = m.group(1,2)
585 # $$foo --> $foo
586 if new_field.startswith("$"):
587 txt += new_txt + new_field
588 else:
589 yield (txt + new_txt, new_field, "", None)
590 txt = ""
591 continue_from = m.end()
593 # Re-yield the {foo} style pattern
594 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
596 def __repr__(self):
597 return "<DollarFormatter>"
599#-----------------------------------------------------------------------------
600# Utils to columnize a list of string
601#-----------------------------------------------------------------------------
603def _col_chunks(l, max_rows, row_first=False):
604 """Yield successive max_rows-sized column chunks from l."""
605 if row_first:
606 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
607 for i in range(ncols):
608 yield [l[j] for j in range(i, len(l), ncols)]
609 else:
610 for i in range(0, len(l), max_rows):
611 yield l[i:(i + max_rows)]
614def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
615 """Calculate optimal info to columnize a list of string"""
616 for max_rows in range(1, len(rlist) + 1):
617 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
618 sumlength = sum(col_widths)
619 ncols = len(col_widths)
620 if sumlength + separator_size * (ncols - 1) <= displaywidth:
621 break
622 return {'num_columns': ncols,
623 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
624 'max_rows': max_rows,
625 'column_widths': col_widths
626 }
629def _get_or_default(mylist, i, default=None):
630 """return list item number, or default if don't exist"""
631 if i >= len(mylist):
632 return default
633 else :
634 return mylist[i]
637def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
638 """Returns a nested list, and info to columnize items
640 Parameters
641 ----------
642 items
643 list of strings to columize
644 row_first : (default False)
645 Whether to compute columns for a row-first matrix instead of
646 column-first (default).
647 empty : (default None)
648 default value to fill list if needed
649 separator_size : int (default=2)
650 How much characters will be used as a separation between each columns.
651 displaywidth : int (default=80)
652 The width of the area onto which the columns should enter
654 Returns
655 -------
656 strings_matrix
657 nested list of string, the outer most list contains as many list as
658 rows, the innermost lists have each as many element as columns. If the
659 total number of elements in `items` does not equal the product of
660 rows*columns, the last element of some lists are filled with `None`.
661 dict_info
662 some info to make columnize easier:
664 num_columns
665 number of columns
666 max_rows
667 maximum number of rows (final number may be less)
668 column_widths
669 list of with of each columns
670 optimal_separator_width
671 best separator width between columns
673 Examples
674 --------
675 ::
677 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
678 In [2]: list, info = compute_item_matrix(l, displaywidth=12)
679 In [3]: list
680 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]
681 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}
682 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))
683 Out[5]: True
684 """
685 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
686 nrow, ncol = info['max_rows'], info['num_columns']
687 if row_first:
688 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
689 else:
690 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
693def columnize(items, row_first=False, separator=" ", displaywidth=80, spread=False):
694 """Transform a list of strings into a single string with columns.
696 Parameters
697 ----------
698 items : sequence of strings
699 The strings to process.
700 row_first : (default False)
701 Whether to compute columns for a row-first matrix instead of
702 column-first (default).
703 separator : str, optional [default is two spaces]
704 The string that separates columns.
705 displaywidth : int, optional [default is 80]
706 Width of the display in number of characters.
708 Returns
709 -------
710 The formatted string.
711 """
712 if not items:
713 return '\n'
714 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
715 if spread:
716 separator = separator.ljust(int(info['optimal_separator_width']))
717 fmatrix = [filter(None, x) for x in matrix]
718 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
719 return '\n'.join(map(sjoin, fmatrix))+'\n'
722def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
723 """
724 Return a string with a natural enumeration of items
726 >>> get_text_list(['a', 'b', 'c', 'd'])
727 'a, b, c and d'
728 >>> get_text_list(['a', 'b', 'c'], ' or ')
729 'a, b or c'
730 >>> get_text_list(['a', 'b', 'c'], ', ')
731 'a, b, c'
732 >>> get_text_list(['a', 'b'], ' or ')
733 'a or b'
734 >>> get_text_list(['a'])
735 'a'
736 >>> get_text_list([])
737 ''
738 >>> get_text_list(['a', 'b'], wrap_item_with="`")
739 '`a` and `b`'
740 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
741 'a + b + c = d'
742 """
743 if len(list_) == 0:
744 return ''
745 if wrap_item_with:
746 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
747 item in list_]
748 if len(list_) == 1:
749 return list_[0]
750 return '%s%s%s' % (
751 sep.join(i for i in list_[:-1]),
752 last_sep, list_[-1])