Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/escape_sequences.py: 42%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

36 statements  

1r""" 

2Terminal escape sequence patterns. 

3 

4This module provides regex patterns for matching terminal escape sequences. All patterns match 

5sequences that begin with ESC (``\x1b``). Before calling re.match with these patterns, callers 

6should first check that the character at the current position is ESC for optimal performance. 

7""" 

8 

9# std imports 

10import re 

11 

12import typing 

13 

14# local 

15from .sgr_state import _SGR_PATTERN 

16 

17# Text Sizing Protocol (OSC 66), https://sw.kovidgoyal.net/kitty/text-sizing-protocol/ 

18TEXT_SIZING_PATTERN = re.compile( 

19 r'\x1b\]66;([^;\x07\x1b]*);([^\x07\x1b]*)(\x07|\x1b\\)' 

20) 

21 

22# Zero-width escape sequences (SGR, OSC, CSI, etc.). This table, like INDETERMINATE_EFFECT_SEQUENCE, 

23# originated from the 'blessed' library. 

24ZERO_WIDTH_PATTERN = re.compile( 

25 # CSI sequences 

26 r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]|' 

27 # OSC sequences, note that text sizing protocol (OSC 66) is special case in width() and clip(), 

28 # and contrary to the variable name, it is positive width. 

29 r'\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|' 

30 # APC sequences 

31 r'\x1b_[^\x1b\x07]*(?:\x07|\x1b\\)|' 

32 # DCS sequences 

33 r'\x1bP[^\x1b\x07]*(?:\x07|\x1b\\)|' 

34 # PM sequences 

35 r'\x1b\^[^\x1b\x07]*(?:\x07|\x1b\\)|' 

36 # Character set designation (subset of nF, handled separately for clarity) 

37 r'\x1b[()].|' 

38 # nF sequences: ESC + one or more intermediate bytes (0x20-0x2F) + final byte (0x30-0x7E) 

39 r'\x1b[\x20-\x2f]+[\x30-\x7e]|' 

40 # Fe sequences (C1 controls) 

41 r'\x1b[\x40-\x5f]|' 

42 # Fp sequences (private use) 

43 r'\x1b[\x30-\x3f]|' 

44 # Fs sequences (independent functions) 

45 r'\x1b[\x60-\x7e]' 

46) 

47 

48# Cursor right movement: CSI [n] C, parameter may be parsed by width() 

49CURSOR_RIGHT_SEQUENCE = re.compile(r'\x1b\[(\d*)C') 

50 

51# Cursor left movement: CSI [n] D, parameter may be parsed by width() 

52CURSOR_LEFT_SEQUENCE = re.compile(r'\x1b\[(\d*)D') 

53 

54# Horizontal position absolute: CSI [n] G, parameter may be parsed by width() 

55CURSOR_HPA_SEQUENCE = re.compile(r'\x1b\[(\d*)G') 

56 

57# Combined cursor movement: single regex for fast-path detection of any 

58# horizontal cursor movement (left, right, hpa). Avoids two separate search() 

59# calls in hot-path width() and clip() pre-checks. 

60CURSOR_MOVEMENT_SEQUENCE = re.compile(r'\x1b\[(\d*)[CDG]') 

61 

62# Combined horizontal cursor movement: matches BS, CR, and CSI C/D/G cursor sequences 

63# in a single regex pass. Used by clip() to decide between the simple append path 

64# and the painter's algorithm. 

65_HORIZONTAL_CURSOR_MOVEMENT = re.compile(r'[\x08\r]|\x1b\[(\d*)[CDG]') 

66 

67# Combined pattern: a single regex that matches any zero-width escape sequence 

68# and classifies it via named groups, aprox 2x faster than redundant re.matches 

69# in clip() and width(). 

70_SEQUENCE_CLASSIFY = re.compile( 

71 _SGR_PATTERN.pattern.replace('(', '(?P<sgr_params>', 1) 

72 + '|' + CURSOR_HPA_SEQUENCE.pattern.replace('(', '(?P<hpa_n>', 1) 

73 + '|' + CURSOR_RIGHT_SEQUENCE.pattern.replace('(', '(?P<cforward_n>', 1) 

74 + '|' + CURSOR_LEFT_SEQUENCE.pattern.replace('(', '(?P<cbackward_n>', 1) 

75 + '|' + r'\x1b\]66;(?P<ts_meta>[^;\x07\x1b]*);(?P<ts_text>[^\x07\x1b]*)(?P<ts_term>\x07|\x1b\\)' 

76 + '|' + r'(?P<other_seq>(?:' + ZERO_WIDTH_PATTERN.pattern + '))' 

77) 

78 

79# Indeterminate effect sequences - raise ValueError in 'strict' mode. The effects of these sequences 

80# are likely to be undesirable, moving the cursor vertically or to any unknown position, and 

81# otherwise not managed by the 'width' method of this library. 

82# 

83# This table was created initially with code generation by extraction of termcap library with 

84# techniques used at 'blessed' library runtime for 'xterm', 'alacritty', 'kitty', ghostty', 

85# 'screen', 'tmux', and others. Then, these common capabilities were merged into the list below. 

86INDETERMINATE_EFFECT_SEQUENCE = re.compile( 

87 '|'.join(f'(?:{_pattern})' for _pattern in ( 

88 r'\x1b\[\d+;\d+r', # change_scroll_region 

89 r'\x1b\[\d*K', # erase_in_line (clr_eol, clr_bol) 

90 r'\x1b\[\d*J', # erase_in_display (clr_eos, erase_display) 

91 r'\x1b\[\d+;\d+H', # cursor_address 

92 r'\x1b\[\d*H', # cursor_home 

93 r'\x1b\[\d*A', # cursor_up 

94 r'\x1b\[\d*B', # cursor_down 

95 r'\x1b\[\d*P', # delete_character 

96 r'\x1b\[\d*M', # delete_line 

97 r'\x1b\[\d*L', # insert_line 

98 r'\x1b\[\d*@', # insert_character 

99 r'\x1b\[\d+X', # erase_chars 

100 r'\x1b\[\d*S', # scroll_up (parm_index) 

101 r'\x1b\[\d*T', # scroll_down (parm_rindex) 

102 r'\x1b\[\d*d', # row_address 

103 r'\x1b\[\?1049[hl]', # alternate screen buffer 

104 r'\x1b\[\?47[hl]', # alternate screen (legacy) 

105 r'\x1b8', # restore_cursor 

106 r'\x1bD', # scroll_forward (index) 

107 r'\x1bM', # scroll_reverse (reverse index) 

108 r'\x1bc', # full_reset (RIS) 

109 )) 

110) 

111 

112 

113def iter_sequences(text: str) -> typing.Iterator[typing.Tuple[str, bool]]: 

114 r""" 

115 Iterate through text, yielding segments with sequence identification. 

116 

117 This generator yields tuples of ``(segment, is_sequence)`` for each part 

118 of the input text, where ``is_sequence`` is ``True`` if the segment is 

119 a recognized terminal escape sequence. 

120 

121 :param text: String to iterate through. 

122 :returns: Iterator of (segment, is_sequence) tuples. 

123 

124 .. versionadded:: 0.3.0 

125 

126 Example:: 

127 

128 >>> list(iter_sequences('hello')) 

129 [('hello', False)] 

130 >>> list(iter_sequences('\x1b[31mred')) 

131 [('\x1b[31m', True), ('red', False)] 

132 >>> list(iter_sequences('\x1b[1m\x1b[31m')) 

133 [('\x1b[1m', True), ('\x1b[31m', True)] 

134 """ 

135 idx = 0 

136 text_len = len(text) 

137 segment_start = 0 

138 

139 while idx < text_len: 

140 char = text[idx] 

141 

142 if char == '\x1b': 

143 # Yield any accumulated non-sequence text 

144 if idx > segment_start: 

145 yield (text[segment_start:idx], False) 

146 

147 # Try to match an escape sequence 

148 match = ZERO_WIDTH_PATTERN.match(text, idx) 

149 if match: 

150 yield (match.group(), True) 

151 idx = match.end() 

152 else: 

153 # Lone ESC or unrecognized - yield as sequence anyway 

154 yield (char, True) 

155 idx += 1 

156 segment_start = idx 

157 else: 

158 idx += 1 

159 

160 # Yield any remaining text 

161 if segment_start < text_len: 

162 yield (text[segment_start:], False) 

163 

164 

165def strip_sequences(text: str) -> str: 

166 r""" 

167 Return text with all terminal escape sequences removed. 

168 

169 Unknown or incomplete ESC sequences are preserved. 

170 

171 :param text: String that may contain terminal escape sequences. 

172 :returns: The input text with all escape sequences stripped. 

173 

174 .. versionadded:: 0.3.0 

175 

176 .. versionchanged:: 0.7.0 

177 Inner text of OSC 66 (Text sizing protocol) is preserved. 

178 

179 Example:: 

180 

181 >>> strip_sequences('\x1b[31mred\x1b[0m') 

182 'red' 

183 >>> strip_sequences('hello') 

184 'hello' 

185 >>> strip_sequences('\x1b[1m\x1b[31mbold red\x1b[0m text') 

186 'bold red text' 

187 >>> strip_sequences('\x1b]66;s=2;hello\x07') 

188 'hello' 

189 >>> strip_sequences('\x1b]8;id=34;https://example.com\x1b\\[view]\x1b]8;;\x1b\\') 

190 '[view]' 

191 """ 

192 if '\x1b]66;' in text: 

193 text = TEXT_SIZING_PATTERN.sub(r'\2', text) 

194 return ZERO_WIDTH_PATTERN.sub('', text)