Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/_width.py: 6%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

288 statements  

1"""This is a high-level width() supporting terminal output.""" 

2 

3from __future__ import annotations 

4 

5from typing import Literal 

6 

7__lazy_modules__ = [ 

8 "wcwidth._constants", 

9 "wcwidth._wcswidth", 

10 "wcwidth._wcwidth", 

11 "wcwidth.bisearch", 

12 "wcwidth.control_codes", 

13 "wcwidth.escape_sequences", 

14 "wcwidth.table_grapheme", 

15 "wcwidth.table_vs16", 

16 "wcwidth.text_sizing", 

17] 

18# local 

19from . import table_grapheme_overrides 

20from ._wcwidth import wcwidth 

21from .bisearch import bisearch 

22from ._wcswidth import wcswidth, wcstwidth, _scan_zwj_cluster_end 

23from ._constants import (_EMOJI_ZWJ_SET, 

24 _ISC_VIRAMA_SET, 

25 _CATEGORY_MC_TABLE, 

26 _FITZPATRICK_RANGE, 

27 _REGIONAL_INDICATOR_SET, 

28 resolve_terminal, 

29 get_term_overrides) 

30from .table_vs15 import VS15_WIDE_TO_NARROW 

31from .table_vs16 import VS16_NARROW_TO_WIDE 

32from .text_sizing import TextSizing, TextSizingParams 

33from .control_codes import ILLEGAL_CTRL, VERTICAL_CTRL, HORIZONTAL_CTRL, ZERO_WIDTH_CTRL 

34from .escape_sequences import (_SEQUENCE_CLASSIFY, 

35 TEXT_SIZING_PATTERN, 

36 CURSOR_MOVEMENT_SEQUENCE, 

37 INDETERMINATE_EFFECT_SEQUENCE, 

38 strip_sequences) 

39 

40# In 'parse' mode, strings longer than this are checked for cursor-movement 

41# controls (BS, TAB, CR, cursor sequences); when absent, mode downgrades to 

42# 'ignore' to skip character-by-character parsing. The detection scan cost is 

43# negligible for long strings but wasted on short ones like labels or headings. 

44_WIDTH_FAST_PATH_MIN_LEN = 20 

45 

46# Translation table to strip C0/C1 control characters for fast 'ignore' mode. 

47_CONTROL_CHAR_TABLE = str.maketrans('', '', ( 

48 ''.join(chr(c) for c in range(0x00, 0x20)) + # C0: NUL through US (including tab) 

49 '\x7f' + # DEL 

50 ''.join(chr(c) for c in range(0x80, 0xa0)) # C1: U+0080-U+009F 

51)) 

52 

53 

54def _width_ignored_codes(text: str, ambiguous_width: int = 1, 

55 term_program: bool | str = False) -> int: 

56 """ 

57 Fast path for width() with control_codes='ignore'. 

58 

59 Strips escape sequences and control characters, then measures remaining text. 

60 """ 

61 if term_program is False: 

62 return wcswidth( 

63 strip_sequences(text).translate(_CONTROL_CHAR_TABLE), 

64 ambiguous_width=ambiguous_width, 

65 ) 

66 return wcstwidth( 

67 strip_sequences(text).translate(_CONTROL_CHAR_TABLE), 

68 ambiguous_width=ambiguous_width, 

69 term_program=term_program, 

70 ) 

71 

72 

73def width( 

74 text: str, 

75 *, 

76 control_codes: Literal['parse', 'strict', 'ignore'] = 'parse', 

77 tabsize: int = 8, 

78 ambiguous_width: int = 1, 

79 term_program: bool | str = False, 

80) -> int: 

81 r""" 

82 Return printable width of text containing many kinds of control codes and sequences. 

83 

84 Unlike :func:`wcswidth`, this function handles most control characters and many popular terminal 

85 output sequences. Never returns -1. 

86 

87 :param text: String to measure. 

88 :param control_codes: How to handle control characters and sequences: 

89 

90 - ``'parse'`` (default): Track horizontal cursor movement like BS ``\b``, CR ``\r``, TAB 

91 ``\t``, cursor left and right movement sequences. Vertical movement (LF, VT, FF) and 

92 indeterminate terminal sequences are zero-width. OSC 66 Kitty Text Sizing protocol, OSC 8 

93 Hyperlink, and many other kinds of output sequences are parsed for displayed measurements. 

94 - ``'strict'``: Like parse, but raises :exc:`ValueError` on control characters with 

95 indeterminate results of the screen or cursor, like clear or vertical movement. Generally, 

96 these should be handled with a virtual terminal emulator (like 'pyte'). 

97 - ``'ignore'``: All C0 and C1 control characters and escape sequences are measured as 

98 width 0. This is the fastest measurement for text already filtered or known not to contain 

99 any kinds of control codes or sequences. TAB ``\t`` is zero-width; to ensure 

100 tab expansion, pre-process text using :func:`str.expandtabs`. 

101 

102 :param tabsize: Tab stop width for ``'parse'`` and ``'strict'`` modes. Default is 8. Must be 

103 positive. Has no effect when ``control_codes='ignore'``. 

104 :param ambiguous_width: Width to use for East Asian Ambiguous (A) characters. Default is ``1`` 

105 (narrow). Set to ``2`` for CJK contexts. 

106 :param term_program: Terminal software identifier for table correction. 

107 ``False`` (default) disables override lookup. ``True`` reads the 

108 ``TERM_PROGRAM`` or ``TERM`` environment variable for auto-detection. 

109 Accepts a canonical terminal name matching :func:`list_term_programs`, 

110 such as from XTVERSION_, ENQ_, or ``TERM_PROGRAM``. 

111 

112 .. versionadded:: 0.8.0 

113 :returns: Maximum cursor position reached, "extent", accounting for cursor movement sequences 

114 present in ``text`` according to given parameters. This represents the rightmost column the 

115 cursor reaches. Always a non-negative integer. 

116 :raises ValueError: If ``control_codes='strict'`` and control characters with indeterminate 

117 effects, such as vertical movement or clear sequences are encountered, or on unexpected 

118 C0 or C1 control code. Also raised when ``control_codes`` is not one of the valid values. 

119 

120 .. versionadded:: 0.3.0 

121 

122 .. versionchanged:: 0.7.0 

123 Expanded strict-mode to raise :exc:`ValueError` when cursor-left movement 

124 (CSI D) would move beyond the beginning of the string. Previously, cursor-left 

125 was silently clamped to column 0 in all modes. 

126 

127 Support horizontal cursor sequences (``cub``, ``cuf``, ``hpa``). Cursor-left (``cub``) or 

128 backspace (``\b``) now overwrites text. ``column_address`` (``hpa``) and carriage return 

129 (``\r``) are now parsed, and some values conditionally raise ``ValueError`` when 

130 ``control_codes='parse'``. 

131 

132 Examples:: 

133 

134 >>> width('hello') 

135 5 

136 >>> width('コンニチハ') 

137 10 

138 >>> width('\x1b[31mred\x1b[0m') 

139 3 

140 >>> width('\x1b[31mred\x1b[0m', control_codes='ignore') # same result (ignored) 

141 3 

142 >>> width('123\b4') # backspace overwrites previous cell (outputs '124') 

143 3 

144 >>> width('abc\t') # tab caused cursor to move to column 8 

145 8 

146 >>> width('1\x1b[10C') # '1' + cursor right 10, cursor ends on column 11 

147 11 

148 >>> width('1\x1b[10C', control_codes='ignore') # faster but wrong in this case 

149 1 

150 """ 

151 # pylint: disable=too-complex,too-many-branches,too-many-statements,too-many-locals,redefined-variable-type,too-many-nested-blocks 

152 # This could be broken into sub-functions (#1, #3, and #6 especially), but for reduced overhead 

153 # in consideration of this function a likely "hot path", they are inline, breaking many pylint 

154 # complexity rules. 

155 

156 # Fast path for ASCII printable (no tabs, escapes, or control chars) 

157 if text.isascii() and text.isprintable(): 

158 return len(text) 

159 

160 # Fast parse: if no horizontal cursor movements are possible, switch to 'ignore' mode. 

161 # Only check longer strings - the detection overhead hurts short string performance. 

162 if control_codes == 'parse' and len(text) > _WIDTH_FAST_PATH_MIN_LEN: 

163 # Check for cursor-affecting control characters 

164 if '\b' not in text and '\t' not in text and '\r' not in text: 

165 # Check for escape sequences, if none contain cursor movement or 

166 # text sizing, downgrade to 'ignore' 

167 if '\x1b' not in text or ( 

168 not CURSOR_MOVEMENT_SEQUENCE.search(text) 

169 and not TEXT_SIZING_PATTERN.search(text) 

170 ): 

171 control_codes = 'ignore' 

172 

173 # Fast path for ignore mode, useful if you know the text is already free of control codes 

174 if control_codes == 'ignore': 

175 return _width_ignored_codes(text, ambiguous_width, term_program=term_program) 

176 

177 # Resolve terminal software for override lookup 

178 term_canonical = resolve_terminal(term_program) 

179 

180 # Skip override lookup when no terminal detected (avoids lru_cache call overhead). 

181 # Extract locals for hot-loop performance (NamedTuple attribute access is slow). 

182 if term_canonical: 

183 overrides = get_term_overrides(term_canonical) 

184 _narrower = overrides.narrower 

185 _vs16_narrower = overrides.vs16_narrower 

186 _vs15_wider = overrides.vs15_wider 

187 _zeroer = overrides.zeroer 

188 _narrow_wider = overrides.narrow_wider 

189 _narrow_zeroer = overrides.narrow_zeroer 

190 _grapheme_overrides = table_grapheme_overrides.get(term_canonical) 

191 else: 

192 _narrower = () 

193 _vs16_narrower = () 

194 _vs15_wider = () 

195 _zeroer = () 

196 _narrow_wider = () 

197 _narrow_zeroer = () 

198 _grapheme_overrides = {} 

199 

200 strict = control_codes == 'strict' 

201 # Track absolute positions: tab stops need modulo on absolute column, CR resets to 0. 

202 # Initialize max_extent to 0 so backward movement (CR, BS) won't yield negative width. 

203 current_col = 0 

204 max_extent = 0 

205 idx = 0 

206 text_len = len(text) 

207 

208 # Select wcwidth call pattern for best lru_cache performance: 

209 # - ambiguous_width=1 (default): single-arg calls share cache with direct wcwidth() calls 

210 # - ambiguous_width=2: full positional args needed (results differ, separate cache is correct) 

211 _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width) 

212 

213 # grapheme-clustering state and local re-binding for performance. 

214 # Widths accumulate in cluster_width and flush at boundaries (see _wcswidth.py) 

215 last_measured_idx = -2 # -2 sentinel blocks VS16/VS15 (no base available) 

216 last_measured_ucs = -1 

217 last_measured_w = 0 

218 prev_was_virama = False 

219 _max_extent_before = 0 

220 cluster_start = -1 

221 col_before_cluster = 0 

222 max_extent_before_cluster = 0 

223 cluster_width = 0 

224 vs16_nw_table = VS16_NARROW_TO_WIDE['9.0.0'] 

225 vs15_wn_table = VS15_WIDE_TO_NARROW['9.0.0'] 

226 _bisearch = bisearch 

227 

228 while idx < text_len: 

229 char = text[idx] 

230 

231 # 1. ESC sequences 

232 if char == '\x1b': 

233 # Flush pending cluster before processing escape sequence 

234 if cluster_width: 

235 current_col += cluster_width 

236 if current_col > max_extent: 

237 max_extent = current_col 

238 cluster_width = 0 

239 m = _SEQUENCE_CLASSIFY.match(text, idx) 

240 if not m: 

241 # 1a. Errant ESC or unknown sequence: only the first character is zero-width 

242 idx += 1 

243 else: 

244 seq = m.group() 

245 if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq): 

246 raise ValueError(f"Indeterminate cursor sequence at position {idx}, {seq!r}") 

247 

248 # 2b. horizontal position absolute (before forward/backward to 

249 # avoid other_seq match in _SEQUENCE_CLASSIFY) 

250 if (hpa_n := m.group('hpa_n')) is not None: 

251 target_col = int(hpa_n) if hpa_n else 1 

252 if strict: 

253 raise ValueError( 

254 f"Indeterminate horizontal position at position {idx}, " 

255 f"{seq!r} (absolute column unknown)" 

256 ) 

257 current_col = target_col - 1 # HPA is 1-indexed, convert to 0-indexed 

258 # 2c. cursor forward, backward 

259 elif (cforward_n := m.group('cforward_n')) is not None: 

260 current_col += int(cforward_n) if cforward_n else 1 

261 elif (cbackward_n := m.group('cbackward_n')) is not None: 

262 n_backward = int(cbackward_n) if cbackward_n else 1 

263 if strict and n_backward > current_col: 

264 raise ValueError( 

265 f"Cursor left movement at position {idx} would move " 

266 f"{n_backward} cells left from column {current_col}, " 

267 f"exceeding string start" 

268 ) 

269 current_col -= n_backward 

270 if current_col < 0: 

271 current_col = 0 

272 # 2d. OSC 66 Text Sizing — has positive display width 

273 elif (ts_meta := m.group('ts_meta')) is not None: 

274 ts_text = m.group('ts_text') 

275 ts_term = m.group('ts_term') 

276 assert ts_text is not None and ts_term is not None 

277 text_size = TextSizing( 

278 TextSizingParams.from_params(ts_meta, control_codes=control_codes), 

279 ts_text, ts_term) 

280 current_col += text_size.display_width(ambiguous_width) 

281 # 2e. SGR and other zero-width sequences -- no column advance 

282 idx = m.end() 

283 # Escape sequences break VS16 adjacency: reset last-measured state 

284 last_measured_idx = -2 

285 last_measured_ucs = -1 

286 cluster_start = -1 

287 if current_col > max_extent: 

288 max_extent = current_col 

289 continue 

290 

291 # 2. Vertical or Illegal control characters zero width or error when 'strict' 

292 if char in ILLEGAL_CTRL: 

293 if strict: 

294 raise ValueError(f"Illegal control character {ord(char):#x} at position {idx}") 

295 if cluster_width: 

296 current_col += cluster_width 

297 if current_col > max_extent: 

298 max_extent = current_col 

299 cluster_width = 0 

300 idx += 1 

301 last_measured_idx = -2 

302 last_measured_ucs = -1 

303 cluster_start = -1 

304 continue 

305 

306 if char in VERTICAL_CTRL: 

307 if strict: 

308 raise ValueError(f"Vertical movement character {ord(char):#x} at position {idx}") 

309 if cluster_width: 

310 current_col += cluster_width 

311 if current_col > max_extent: 

312 max_extent = current_col 

313 cluster_width = 0 

314 idx += 1 

315 last_measured_idx = -2 

316 last_measured_ucs = -1 

317 cluster_start = -1 

318 continue 

319 

320 # 3. Horizontal movement characters 

321 if char in HORIZONTAL_CTRL: 

322 if cluster_width: 

323 current_col += cluster_width 

324 if current_col > max_extent: 

325 max_extent = current_col 

326 cluster_width = 0 

327 if char == '\t' and tabsize > 0: 

328 current_col += tabsize - (current_col % tabsize) 

329 elif char == '\b': 

330 if current_col > 0: 

331 current_col -= 1 

332 elif char == '\r': 

333 if strict: 

334 raise ValueError( 

335 f"Horizontal movement character \\r at position {idx}: " 

336 "indeterminate starting column" 

337 ) 

338 current_col = 0 

339 if current_col > max_extent: 

340 max_extent = current_col 

341 idx += 1 

342 last_measured_idx = -2 

343 last_measured_ucs = -1 

344 cluster_start = -1 

345 continue 

346 

347 # 4. Zero-width control characters 

348 if char in ZERO_WIDTH_CTRL: 

349 if cluster_width: 

350 current_col += cluster_width 

351 if current_col > max_extent: 

352 max_extent = current_col 

353 cluster_width = 0 

354 idx += 1 

355 last_measured_idx = -2 

356 last_measured_ucs = -1 

357 cluster_start = -1 

358 continue 

359 

360 # 5. Inline grapheme-clustering: ZWJ, Virama, VS16, Regional Indicators, 

361 # Fitzpatrick, Mc, wcwidth 

362 ucs = ord(char) 

363 

364 # ZWJ (U+200D) 

365 if ucs == 0x200D: 

366 if prev_was_virama: 

367 idx += 1 

368 elif idx + 1 < text_len: 

369 # Check for terminal grapheme override when base char is ExtPict/RI 

370 if (_grapheme_overrides 

371 and last_measured_idx >= 0 

372 and last_measured_ucs in _EMOJI_ZWJ_SET): 

373 cluster_end = _scan_zwj_cluster_end(text, last_measured_idx, text_len) 

374 cluster = text[last_measured_idx:cluster_end] 

375 override_w = _grapheme_overrides.get(cluster) 

376 if override_w is not None: 

377 current_col += (override_w - last_measured_w) 

378 max_extent = max(max_extent, current_col) 

379 last_measured_idx = -2 

380 last_measured_ucs = -1 

381 last_measured_w = 0 

382 prev_was_virama = False 

383 cluster_start = -1 

384 idx = cluster_end 

385 continue 

386 # No override; ZWJ breaks VS adjacency. 

387 # VS16 already set last_measured_idx = -2, blocking further VS16. 

388 last_measured_w = 0 

389 prev_was_virama = False 

390 idx += 2 

391 else: 

392 prev_was_virama = False 

393 idx += 1 

394 continue 

395 

396 # 6. VS16 (U+FE0F): converts preceding narrow character to wide. 

397 if ucs == 0xFE0F and last_measured_idx >= 0: 

398 if _vs16_narrower and _bisearch(last_measured_ucs, _vs16_narrower): 

399 pass 

400 elif _bisearch(last_measured_ucs, vs16_nw_table): 

401 cluster_width = 2 

402 last_measured_idx = -2 # prevent double application 

403 idx += 1 

404 continue 

405 

406 # VS15 (U+FE0E): text variation selector, requests narrow presentation. 

407 if ucs == 0xFE0E and last_measured_idx >= 0: 

408 base_ucs = last_measured_ucs 

409 vs15_narrow = bisearch(base_ucs, vs15_wn_table) 

410 if _vs15_wider and bisearch(base_ucs, _vs15_wider): 

411 vs15_narrow = False 

412 if vs15_narrow and last_measured_w == 2: 

413 current_col -= 1 

414 max_extent = max(_max_extent_before, current_col) 

415 idx += 1 

416 continue 

417 

418 # 7. Regional Indicator & Fitzpatrick (both above BMP) 

419 if ucs > 0xFFFF: 

420 if ucs in _REGIONAL_INDICATOR_SET: 

421 ri_before = 0 

422 j = idx - 1 

423 while j >= 0 and ord(text[j]) in _REGIONAL_INDICATOR_SET: 

424 ri_before += 1 

425 j -= 1 

426 if ri_before % 2 == 1: 

427 last_measured_ucs = ucs 

428 idx += 1 

429 continue 

430 elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1] 

431 and last_measured_ucs in _EMOJI_ZWJ_SET): 

432 idx += 1 

433 continue 

434 

435 # 8. Normal character: measure with wcwidth 

436 w = _wcwidth(char) 

437 # Apply single-codepoint terminal overrides (pre-merged tuples) 

438 if w == 2 and _narrower and bisearch(ucs, _narrower): 

439 w = 1 

440 elif w == 2 and _zeroer and bisearch(ucs, _zeroer): 

441 w = 0 

442 if w == 1 and _narrow_wider and bisearch(ucs, _narrow_wider): 

443 w = 2 

444 elif w == 1 and _narrow_zeroer and bisearch(ucs, _narrow_zeroer): 

445 w = 0 

446 if w > 0: 

447 # virama+consonant extends current cluster; otherwise start new 

448 if prev_was_virama: 

449 cluster_width = 2 

450 elif cluster_width: 

451 # flush previous cluster, check for grapheme overrides 

452 flushed = False 

453 if _grapheme_overrides and cluster_start >= 0: 

454 # Two-phase override lookup (see _wcswidth.py) 

455 candidate = text[cluster_start:idx + 1] 

456 override_w = _grapheme_overrides.get(candidate) 

457 if override_w is not None: 

458 current_col = col_before_cluster + override_w 

459 max_extent = max(max_extent_before_cluster, current_col) 

460 flushed = True 

461 cluster_width = 0 

462 else: 

463 cluster_text = text[cluster_start:idx] 

464 override_w = _grapheme_overrides.get(cluster_text) 

465 if override_w is not None: 

466 current_col = col_before_cluster + override_w 

467 max_extent = max(max_extent_before_cluster, current_col) 

468 else: 

469 current_col += cluster_width 

470 else: 

471 current_col += cluster_width 

472 if current_col > max_extent: 

473 max_extent = current_col 

474 if not flushed: 

475 cluster_width = w 

476 cluster_start = idx 

477 col_before_cluster = current_col 

478 max_extent_before_cluster = max_extent 

479 else: 

480 cluster_width = w 

481 cluster_start = idx 

482 col_before_cluster = current_col 

483 max_extent_before_cluster = max_extent 

484 last_measured_idx = idx 

485 last_measured_ucs = ucs 

486 last_measured_w = w 

487 _max_extent_before = max_extent 

488 prev_was_virama = False 

489 elif ucs in _ISC_VIRAMA_SET: 

490 prev_was_virama = True 

491 elif last_measured_idx >= 0 and _bisearch(ucs, _CATEGORY_MC_TABLE): 

492 # Spacing Combining Mark (Mc) following a base character 

493 cluster_width = 2 

494 last_measured_idx = -2 

495 prev_was_virama = False 

496 else: 

497 prev_was_virama = False 

498 idx += 1 

499 

500 if cluster_width: 

501 if _grapheme_overrides and cluster_start >= 0: 

502 cluster_text = text[cluster_start:text_len] 

503 override_w = _grapheme_overrides.get(cluster_text) 

504 if override_w is not None: 

505 current_col = col_before_cluster + override_w 

506 max_extent = max(max_extent_before_cluster, current_col) 

507 else: 

508 current_col += cluster_width 

509 if current_col > max_extent: 

510 max_extent = current_col 

511 else: 

512 current_col += cluster_width 

513 if current_col > max_extent: 

514 max_extent = current_col 

515 return max_extent