1"""
2Utilities for working with strings and text.
3
4Inheritance diagram:
5
6.. inheritance-diagram:: IPython.utils.text
7 :parts: 3
8"""
9
10import os
11import re
12import string
13import sys
14import textwrap
15import warnings
16from string import Formatter
17from pathlib import Path
18
19from typing import (
20 List,
21 Dict,
22 Tuple,
23 Optional,
24 cast,
25 Sequence,
26 Mapping,
27 Any,
28 Union,
29 Callable,
30 Iterator,
31 TypeVar,
32)
33
34if sys.version_info < (3, 12):
35 from typing_extensions import Self
36else:
37 from typing import Self
38
39
40class LSString(str):
41 """String derivative with a special access attributes.
42
43 These are normal strings, but with the special attributes:
44
45 .l (or .list) : value as list (split on newlines).
46 .n (or .nlstr): original value (the string itself).
47 .s (or .spstr): value as whitespace-separated string.
48 .p (or .paths): list of path objects (requires path.py package)
49
50 Any values which require transformations are computed only once and
51 cached.
52
53 Such strings are very useful to efficiently interact with the shell, which
54 typically only understands whitespace-separated options for commands."""
55
56 __list: List[str]
57 __spstr: str
58 __paths: List[Path]
59
60 def get_list(self) -> List[str]:
61 try:
62 return self.__list
63 except AttributeError:
64 self.__list = self.split('\n')
65 return self.__list
66
67 l = list = property(get_list)
68
69 def get_spstr(self) -> str:
70 try:
71 return self.__spstr
72 except AttributeError:
73 self.__spstr = self.replace('\n',' ')
74 return self.__spstr
75
76 s = spstr = property(get_spstr)
77
78 def get_nlstr(self) -> Self:
79 return self
80
81 n = nlstr = property(get_nlstr)
82
83 def get_paths(self) -> List[Path]:
84 try:
85 return self.__paths
86 except AttributeError:
87 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
88 return self.__paths
89
90 p = paths = property(get_paths)
91
92# FIXME: We need to reimplement type specific displayhook and then add this
93# back as a custom printer. This should also be moved outside utils into the
94# core.
95
96# def print_lsstring(arg):
97# """ Prettier (non-repr-like) and more informative printer for LSString """
98# print("LSString (.p, .n, .l, .s available). Value:")
99# print(arg)
100#
101#
102# print_lsstring = result_display.register(LSString)(print_lsstring)
103
104
105class SList(list):
106 """List derivative with a special access attributes.
107
108 These are normal lists, but with the special attributes:
109
110 * .l (or .list) : value as list (the list itself).
111 * .n (or .nlstr): value as a string, joined on newlines.
112 * .s (or .spstr): value as a string, joined on spaces.
113 * .p (or .paths): list of path objects (requires path.py package)
114
115 Any values which require transformations are computed only once and
116 cached."""
117
118 __spstr: str
119 __nlstr: str
120 __paths: List[Path]
121
122 def get_list(self) -> Self:
123 return self
124
125 l = list = property(get_list)
126
127 def get_spstr(self) -> str:
128 try:
129 return self.__spstr
130 except AttributeError:
131 self.__spstr = ' '.join(self)
132 return self.__spstr
133
134 s = spstr = property(get_spstr)
135
136 def get_nlstr(self) -> str:
137 try:
138 return self.__nlstr
139 except AttributeError:
140 self.__nlstr = '\n'.join(self)
141 return self.__nlstr
142
143 n = nlstr = property(get_nlstr)
144
145 def get_paths(self) -> List[Path]:
146 try:
147 return self.__paths
148 except AttributeError:
149 self.__paths = [Path(p) for p in self if os.path.exists(p)]
150 return self.__paths
151
152 p = paths = property(get_paths)
153
154 def grep(
155 self,
156 pattern: Union[str, Callable[[Any], re.Match[str] | None]],
157 prune: bool = False,
158 field: Optional[int] = None,
159 ) -> Self:
160 """Return all strings matching 'pattern' (a regex or callable)
161
162 This is case-insensitive. If prune is true, return all items
163 NOT matching the pattern.
164
165 If field is specified, the match must occur in the specified
166 whitespace-separated field.
167
168 Examples::
169
170 a.grep( lambda x: x.startswith('C') )
171 a.grep('Cha.*log', prune=1)
172 a.grep('chm', field=-1)
173 """
174
175 def match_target(s: str) -> str:
176 if field is None:
177 return s
178 parts = s.split()
179 try:
180 tgt = parts[field]
181 return tgt
182 except IndexError:
183 return ""
184
185 if isinstance(pattern, str):
186 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
187 else:
188 pred = pattern
189 if not prune:
190 return type(self)([el for el in self if pred(match_target(el))])
191 else:
192 return type(self)([el for el in self if not pred(match_target(el))])
193
194 def fields(self, *fields: List[str]) -> List[List[str]]:
195 """Collect whitespace-separated fields from string list
196
197 Allows quick awk-like usage of string lists.
198
199 Example data (in var a, created by 'a = !ls -l')::
200
201 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
202 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
203
204 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
205 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
206 (note the joining by space).
207 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
208
209 IndexErrors are ignored.
210
211 Without args, fields() just split()'s the strings.
212 """
213 if len(fields) == 0:
214 return [el.split() for el in self]
215
216 res = SList()
217 for el in [f.split() for f in self]:
218 lineparts = []
219
220 for fd in fields:
221 try:
222 lineparts.append(el[fd])
223 except IndexError:
224 pass
225 if lineparts:
226 res.append(" ".join(lineparts))
227
228 return res
229
230 def sort( # type:ignore[override]
231 self,
232 field: Optional[List[str]] = None,
233 nums: bool = False,
234 ) -> Self:
235 """sort by specified fields (see fields())
236
237 Example::
238
239 a.sort(1, nums = True)
240
241 Sorts a by second field, in numerical order (so that 21 > 3)
242
243 """
244
245 #decorate, sort, undecorate
246 if field is not None:
247 dsu = [[SList([line]).fields(field), line] for line in self]
248 else:
249 dsu = [[line, line] for line in self]
250 if nums:
251 for i in range(len(dsu)):
252 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
253 try:
254 n = int(numstr)
255 except ValueError:
256 n = 0
257 dsu[i][0] = n
258
259
260 dsu.sort()
261 return type(self)([t[1] for t in dsu])
262
263
264def indent(instr: str, nspaces: int = 4, ntabs: int = 0, flatten: bool = False) -> str:
265 """Indent a string a given number of spaces or tabstops.
266
267 indent(str, nspaces=4, ntabs=0) -> indent str by ntabs+nspaces.
268
269 Parameters
270 ----------
271 instr : basestring
272 The string to be indented.
273 nspaces : int (default: 4)
274 The number of spaces to be indented.
275 ntabs : int (default: 0)
276 The number of tabs to be indented.
277 flatten : bool (default: False)
278 Whether to scrub existing indentation. If True, all lines will be
279 aligned to the same indentation. If False, existing indentation will
280 be strictly increased.
281
282 Returns
283 -------
284 str : string indented by ntabs and nspaces.
285
286 """
287 ind = "\t" * ntabs + " " * nspaces
288 if flatten:
289 pat = re.compile(r'^\s*', re.MULTILINE)
290 else:
291 pat = re.compile(r'^', re.MULTILINE)
292 outstr = re.sub(pat, ind, instr)
293 if outstr.endswith(os.linesep+ind):
294 return outstr[:-len(ind)]
295 else:
296 return outstr
297
298
299def list_strings(arg: Union[str, List[str]]) -> List[str]:
300 """Always return a list of strings, given a string or list of strings
301 as input.
302
303 Examples
304 --------
305 ::
306
307 In [7]: list_strings('A single string')
308 Out[7]: ['A single string']
309
310 In [8]: list_strings(['A single string in a list'])
311 Out[8]: ['A single string in a list']
312
313 In [9]: list_strings(['A','list','of','strings'])
314 Out[9]: ['A', 'list', 'of', 'strings']
315 """
316
317 if isinstance(arg, str):
318 return [arg]
319 else:
320 return arg
321
322
323def marquee(txt: str = "", width: int = 78, mark: str = "*") -> str:
324 """Return the input string centered in a 'marquee'.
325
326 Examples
327 --------
328 ::
329
330 In [16]: marquee('A test',40)
331 Out[16]: '**************** A test ****************'
332
333 In [17]: marquee('A test',40,'-')
334 Out[17]: '---------------- A test ----------------'
335
336 In [18]: marquee('A test',40,' ')
337 Out[18]: ' A test '
338
339 """
340 if not txt:
341 return (mark*width)[:width]
342 nmark = (width-len(txt)-2)//len(mark)//2
343 if nmark < 0: nmark =0
344 marks = mark*nmark
345 return '%s %s %s' % (marks,txt,marks)
346
347
348def format_screen(strng: str) -> str:
349 """Format a string for screen printing.
350
351 This removes some latex-type format codes."""
352 # Paragraph continue
353 par_re = re.compile(r'\\$',re.MULTILINE)
354 strng = par_re.sub('',strng)
355 return strng
356
357
358def dedent(text: str) -> str:
359 """Equivalent of textwrap.dedent that ignores unindented first line.
360
361 This means it will still dedent strings like:
362 '''foo
363 is a bar
364 '''
365
366 For use in wrap_paragraphs.
367 """
368
369 if text.startswith('\n'):
370 # text starts with blank line, don't ignore the first line
371 return textwrap.dedent(text)
372
373 # split first line
374 splits = text.split('\n',1)
375 if len(splits) == 1:
376 # only one line
377 return textwrap.dedent(text)
378
379 first, rest = splits
380 # dedent everything but the first line
381 rest = textwrap.dedent(rest)
382 return '\n'.join([first, rest])
383
384
385def strip_email_quotes(text: str) -> str:
386 """Strip leading email quotation characters ('>').
387
388 Removes any combination of leading '>' interspersed with whitespace that
389 appears *identically* in all lines of the input text.
390
391 Parameters
392 ----------
393 text : str
394
395 Examples
396 --------
397
398 Simple uses::
399
400 In [2]: strip_email_quotes('> > text')
401 Out[2]: 'text'
402
403 In [3]: strip_email_quotes('> > text\\n> > more')
404 Out[3]: 'text\\nmore'
405
406 Note how only the common prefix that appears in all lines is stripped::
407
408 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
409 Out[4]: '> text\\n> more\\nmore...'
410
411 So if any line has no quote marks ('>'), then none are stripped from any
412 of them ::
413
414 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
415 Out[5]: '> > text\\n> > more\\nlast different'
416 """
417 lines = text.splitlines()
418 strip_len = 0
419
420 for characters in zip(*lines):
421 # Check if all characters in this position are the same
422 if len(set(characters)) > 1:
423 break
424 prefix_char = characters[0]
425
426 if prefix_char in string.whitespace or prefix_char == ">":
427 strip_len += 1
428 else:
429 break
430
431 text = "\n".join([ln[strip_len:] for ln in lines])
432 return text
433
434
435class EvalFormatter(Formatter):
436 """A String Formatter that allows evaluation of simple expressions.
437
438 Note that this version interprets a `:` as specifying a format string (as per
439 standard string formatting), so if slicing is required, you must explicitly
440 create a slice.
441
442 Note that on Python 3.14+ this version interprets `[]` as indexing operator
443 so you need to use generators instead of list comprehensions, for example:
444 `list(i for i in range(10))`.
445
446 This is to be used in templating cases, such as the parallel batch
447 script templates, where simple arithmetic on arguments is useful.
448
449 Examples
450 --------
451 ::
452
453 In [1]: f = EvalFormatter()
454 In [2]: f.format('{n//4}', n=8)
455 Out[2]: '2'
456
457 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
458 Out[3]: 'll'
459 """
460
461 def get_field(self, name: str, args: Any, kwargs: Any) -> Tuple[Any, str]:
462 v = eval(name, kwargs, kwargs)
463 return v, name
464
465#XXX: As of Python 3.4, the format string parsing no longer splits on a colon
466# inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
467# above, it should be possible to remove FullEvalFormatter.
468
469class FullEvalFormatter(Formatter):
470 """A String Formatter that allows evaluation of simple expressions.
471
472 Any time a format key is not found in the kwargs,
473 it will be tried as an expression in the kwargs namespace.
474
475 Note that this version allows slicing using [1:2], so you cannot specify
476 a format string. Use :class:`EvalFormatter` to permit format strings.
477
478 Examples
479 --------
480 ::
481
482 In [1]: f = FullEvalFormatter()
483 In [2]: f.format('{n//4}', n=8)
484 Out[2]: '2'
485
486 In [3]: f.format('{list(range(5))[2:4]}')
487 Out[3]: '[2, 3]'
488
489 In [4]: f.format('{3*2}')
490 Out[4]: '6'
491 """
492 # copied from Formatter._vformat with minor changes to allow eval
493 # and replace the format_spec code with slicing
494 def vformat(
495 self, format_string: str, args: Sequence[Any], kwargs: Mapping[str, Any]
496 ) -> str:
497 result = []
498 conversion: Optional[str]
499 for literal_text, field_name, format_spec, conversion in self.parse(
500 format_string
501 ):
502 # output the literal text
503 if literal_text:
504 result.append(literal_text)
505
506 # if there's a field, output it
507 if field_name is not None:
508 # this is some markup, find the object and do
509 # the formatting
510
511 if format_spec:
512 # override format spec, to allow slicing:
513 field_name = ':'.join([field_name, format_spec])
514
515 # eval the contents of the field for the object
516 # to be formatted
517 obj = eval(field_name, dict(kwargs))
518
519 # do any conversion on the resulting object
520 # type issue in typeshed, fined in https://github.com/python/typeshed/pull/11377
521 obj = self.convert_field(obj, conversion) # type: ignore[arg-type]
522
523 # format the object and append to the result
524 result.append(self.format_field(obj, ''))
525
526 return ''.join(result)
527
528
529class DollarFormatter(FullEvalFormatter):
530 """Formatter allowing Itpl style $foo replacement, for names and attribute
531 access only. Standard {foo} replacement also works, and allows full
532 evaluation of its arguments.
533
534 Examples
535 --------
536 ::
537
538 In [1]: f = DollarFormatter()
539 In [2]: f.format('{n//4}', n=8)
540 Out[2]: '2'
541
542 In [3]: f.format('23 * 76 is $result', result=23*76)
543 Out[3]: '23 * 76 is 1748'
544
545 In [4]: f.format('$a or {b}', a=1, b=2)
546 Out[4]: '1 or 2'
547 """
548
549 _dollar_pattern_ignore_single_quote = re.compile(
550 r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)"
551 )
552
553 def parse(self, fmt_string: str) -> Iterator[Tuple[Any, Any, Any, Any]]: # type: ignore[explicit-override]
554 for literal_txt, field_name, format_spec, conversion in Formatter.parse(
555 self, fmt_string
556 ):
557 # Find $foo patterns in the literal text.
558 continue_from = 0
559 txt = ""
560 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
561 new_txt, new_field = m.group(1,2)
562 # $$foo --> $foo
563 if new_field.startswith("$"):
564 txt += new_txt + new_field
565 else:
566 yield (txt + new_txt, new_field, "", None)
567 txt = ""
568 continue_from = m.end()
569
570 # Re-yield the {foo} style pattern
571 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
572
573 def __repr__(self) -> str:
574 return "<DollarFormatter>"
575
576#-----------------------------------------------------------------------------
577# Utils to columnize a list of string
578#-----------------------------------------------------------------------------
579
580
581def _col_chunks(
582 l: List[int], max_rows: int, row_first: bool = False
583) -> Iterator[List[int]]:
584 """Yield successive max_rows-sized column chunks from l."""
585 if row_first:
586 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
587 for i in range(ncols):
588 yield [l[j] for j in range(i, len(l), ncols)]
589 else:
590 for i in range(0, len(l), max_rows):
591 yield l[i:(i + max_rows)]
592
593
594def _find_optimal(
595 rlist: List[int], row_first: bool, separator_size: int, displaywidth: int
596) -> Dict[str, Any]:
597 """Calculate optimal info to columnize a list of string"""
598 for max_rows in range(1, len(rlist) + 1):
599 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
600 sumlength = sum(col_widths)
601 ncols = len(col_widths)
602 if sumlength + separator_size * (ncols - 1) <= displaywidth:
603 break
604 return {'num_columns': ncols,
605 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
606 'max_rows': max_rows,
607 'column_widths': col_widths
608 }
609
610
611T = TypeVar("T")
612
613
614def _get_or_default(mylist: List[T], i: int, default: T) -> T:
615 """return list item number, or default if don't exist"""
616 if i >= len(mylist):
617 return default
618 else :
619 return mylist[i]
620
621
622def get_text_list(
623 list_: List[str], last_sep: str = " and ", sep: str = ", ", wrap_item_with: str = ""
624) -> str:
625 """
626 Return a string with a natural enumeration of items
627
628 >>> get_text_list(['a', 'b', 'c', 'd'])
629 'a, b, c and d'
630 >>> get_text_list(['a', 'b', 'c'], ' or ')
631 'a, b or c'
632 >>> get_text_list(['a', 'b', 'c'], ', ')
633 'a, b, c'
634 >>> get_text_list(['a', 'b'], ' or ')
635 'a or b'
636 >>> get_text_list(['a'])
637 'a'
638 >>> get_text_list([])
639 ''
640 >>> get_text_list(['a', 'b'], wrap_item_with="`")
641 '`a` and `b`'
642 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
643 'a + b + c = d'
644 """
645 if len(list_) == 0:
646 return ''
647 if wrap_item_with:
648 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
649 item in list_]
650 if len(list_) == 1:
651 return list_[0]
652 return '%s%s%s' % (
653 sep.join(i for i in list_[:-1]),
654 last_sep, list_[-1])