1r"""
2Terminal escape sequence patterns.
3
4This module provides regex patterns for matching terminal escape sequences. All patterns match
5sequences that begin with ESC (``\x1b``). Before calling re.match with these patterns, callers
6should first check that the character at the current position is ESC for optimal performance.
7"""
8
9# std imports
10import re
11
12import typing
13
14# local
15from .sgr_state import _SGR_PATTERN
16
17# Text Sizing Protocol (OSC 66), https://sw.kovidgoyal.net/kitty/text-sizing-protocol/
18TEXT_SIZING_PATTERN = re.compile(
19 r'\x1b\]66;([^;\x07\x1b]*);([^\x07\x1b]*)(\x07|\x1b\\)'
20)
21
22# Zero-width escape sequences (SGR, OSC, CSI, etc.). This table, like INDETERMINATE_EFFECT_SEQUENCE,
23# originated from the 'blessed' library.
24ZERO_WIDTH_PATTERN = re.compile(
25 # CSI sequences
26 r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]|'
27 # OSC sequences, note that text sizing protocol (OSC 66) is special case in width() and clip(),
28 # and contrary to the variable name, it is positive width.
29 r'\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|'
30 # APC sequences
31 r'\x1b_[^\x1b\x07]*(?:\x07|\x1b\\)|'
32 # DCS sequences
33 r'\x1bP[^\x1b\x07]*(?:\x07|\x1b\\)|'
34 # PM sequences
35 r'\x1b\^[^\x1b\x07]*(?:\x07|\x1b\\)|'
36 # Character set designation (subset of nF, handled separately for clarity)
37 r'\x1b[()].|'
38 # nF sequences: ESC + one or more intermediate bytes (0x20-0x2F) + final byte (0x30-0x7E)
39 r'\x1b[\x20-\x2f]+[\x30-\x7e]|'
40 # Fe sequences (C1 controls)
41 r'\x1b[\x40-\x5f]|'
42 # Fp sequences (private use)
43 r'\x1b[\x30-\x3f]|'
44 # Fs sequences (independent functions)
45 r'\x1b[\x60-\x7e]'
46)
47
48# Cursor right movement: CSI [n] C, parameter may be parsed by width()
49CURSOR_RIGHT_SEQUENCE = re.compile(r'\x1b\[(\d*)C')
50
51# Cursor left movement: CSI [n] D, parameter may be parsed by width()
52CURSOR_LEFT_SEQUENCE = re.compile(r'\x1b\[(\d*)D')
53
54# Horizontal position absolute: CSI [n] G, parameter may be parsed by width()
55CURSOR_HPA_SEQUENCE = re.compile(r'\x1b\[(\d*)G')
56
57# Combined cursor movement: single regex for fast-path detection of any
58# horizontal cursor movement (left, right, hpa). Avoids two separate search()
59# calls in hot-path width() and clip() pre-checks.
60CURSOR_MOVEMENT_SEQUENCE = re.compile(r'\x1b\[(\d*)[CDG]')
61
62# Combined horizontal cursor movement: matches BS, CR, and CSI C/D/G cursor sequences
63# in a single regex pass. Used by clip() to decide between the simple append path
64# and the painter's algorithm.
65_HORIZONTAL_CURSOR_MOVEMENT = re.compile(r'[\x08\r]|\x1b\[(\d*)[CDG]')
66
67# Combined pattern: a single regex that matches any zero-width escape sequence
68# and classifies it via named groups, aprox 2x faster than redundant re.matches
69# in clip() and width().
70_SEQUENCE_CLASSIFY = re.compile(
71 _SGR_PATTERN.pattern.replace('(', '(?P<sgr_params>', 1)
72 + '|' + CURSOR_HPA_SEQUENCE.pattern.replace('(', '(?P<hpa_n>', 1)
73 + '|' + CURSOR_RIGHT_SEQUENCE.pattern.replace('(', '(?P<cforward_n>', 1)
74 + '|' + CURSOR_LEFT_SEQUENCE.pattern.replace('(', '(?P<cbackward_n>', 1)
75 + '|' + r'\x1b\]66;(?P<ts_meta>[^;\x07\x1b]*);(?P<ts_text>[^\x07\x1b]*)(?P<ts_term>\x07|\x1b\\)'
76 + '|' + r'(?P<other_seq>(?:' + ZERO_WIDTH_PATTERN.pattern + '))'
77)
78
79# Indeterminate effect sequences - raise ValueError in 'strict' mode. The effects of these sequences
80# are likely to be undesirable, moving the cursor vertically or to any unknown position, and
81# otherwise not managed by the 'width' method of this library.
82#
83# This table was created initially with code generation by extraction of termcap library with
84# techniques used at 'blessed' library runtime for 'xterm', 'alacritty', 'kitty', ghostty',
85# 'screen', 'tmux', and others. Then, these common capabilities were merged into the list below.
86INDETERMINATE_EFFECT_SEQUENCE = re.compile(
87 '|'.join(f'(?:{_pattern})' for _pattern in (
88 r'\x1b\[\d+;\d+r', # change_scroll_region
89 r'\x1b\[\d*K', # erase_in_line (clr_eol, clr_bol)
90 r'\x1b\[\d*J', # erase_in_display (clr_eos, erase_display)
91 r'\x1b\[\d+;\d+H', # cursor_address
92 r'\x1b\[\d*H', # cursor_home
93 r'\x1b\[\d*A', # cursor_up
94 r'\x1b\[\d*B', # cursor_down
95 r'\x1b\[\d*P', # delete_character
96 r'\x1b\[\d*M', # delete_line
97 r'\x1b\[\d*L', # insert_line
98 r'\x1b\[\d*@', # insert_character
99 r'\x1b\[\d+X', # erase_chars
100 r'\x1b\[\d*S', # scroll_up (parm_index)
101 r'\x1b\[\d*T', # scroll_down (parm_rindex)
102 r'\x1b\[\d*d', # row_address
103 r'\x1b\[\?1049[hl]', # alternate screen buffer
104 r'\x1b\[\?47[hl]', # alternate screen (legacy)
105 r'\x1b8', # restore_cursor
106 r'\x1bD', # scroll_forward (index)
107 r'\x1bM', # scroll_reverse (reverse index)
108 r'\x1bc', # full_reset (RIS)
109 ))
110)
111
112
113def iter_sequences(text: str) -> typing.Iterator[typing.Tuple[str, bool]]:
114 r"""
115 Iterate through text, yielding segments with sequence identification.
116
117 This generator yields tuples of ``(segment, is_sequence)`` for each part
118 of the input text, where ``is_sequence`` is ``True`` if the segment is
119 a recognized terminal escape sequence.
120
121 :param text: String to iterate through.
122 :returns: Iterator of (segment, is_sequence) tuples.
123
124 .. versionadded:: 0.3.0
125
126 Example::
127
128 >>> list(iter_sequences('hello'))
129 [('hello', False)]
130 >>> list(iter_sequences('\x1b[31mred'))
131 [('\x1b[31m', True), ('red', False)]
132 >>> list(iter_sequences('\x1b[1m\x1b[31m'))
133 [('\x1b[1m', True), ('\x1b[31m', True)]
134 """
135 idx = 0
136 text_len = len(text)
137 segment_start = 0
138
139 while idx < text_len:
140 char = text[idx]
141
142 if char == '\x1b':
143 # Yield any accumulated non-sequence text
144 if idx > segment_start:
145 yield (text[segment_start:idx], False)
146
147 # Try to match an escape sequence
148 match = ZERO_WIDTH_PATTERN.match(text, idx)
149 if match:
150 yield (match.group(), True)
151 idx = match.end()
152 else:
153 # Lone ESC or unrecognized - yield as sequence anyway
154 yield (char, True)
155 idx += 1
156 segment_start = idx
157 else:
158 idx += 1
159
160 # Yield any remaining text
161 if segment_start < text_len:
162 yield (text[segment_start:], False)
163
164
165def strip_sequences(text: str) -> str:
166 r"""
167 Return text with all terminal escape sequences removed.
168
169 Unknown or incomplete ESC sequences are preserved.
170
171 :param text: String that may contain terminal escape sequences.
172 :returns: The input text with all escape sequences stripped.
173
174 .. versionadded:: 0.3.0
175
176 .. versionchanged:: 0.7.0
177 Inner text of OSC 66 (Text sizing protocol) is preserved.
178
179 Example::
180
181 >>> strip_sequences('\x1b[31mred\x1b[0m')
182 'red'
183 >>> strip_sequences('hello')
184 'hello'
185 >>> strip_sequences('\x1b[1m\x1b[31mbold red\x1b[0m text')
186 'bold red text'
187 >>> strip_sequences('\x1b]66;s=2;hello\x07')
188 'hello'
189 >>> strip_sequences('\x1b]8;id=34;https://example.com\x1b\\[view]\x1b]8;;\x1b\\')
190 '[view]'
191 """
192 if '\x1b]66;' in text:
193 text = TEXT_SIZING_PATTERN.sub(r'\2', text)
194 return ZERO_WIDTH_PATTERN.sub('', text)