Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/_clip.py: 8%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

388 statements  

1"""This is a python implementation of clip().""" 

2from __future__ import annotations 

3 

4# std imports 

5import enum 

6from itertools import islice 

7 

8from typing import Literal, Callable, Optional, NamedTuple 

9 

10# local 

11from ._width import width 

12from .grapheme import iter_graphemes 

13from .hyperlink import Hyperlink, HyperlinkParams 

14from .sgr_state import (_SGR_STATE_DEFAULT, 

15 _SGRState, 

16 _sgr_state_update, 

17 _sgr_state_is_active, 

18 _sgr_state_to_sequence) 

19from .text_sizing import TextSizing, TextSizingParams 

20from .escape_sequences import (_SEQUENCE_CLASSIFY, 

21 _HORIZONTAL_CURSOR_MOVEMENT, 

22 INDETERMINATE_EFFECT_SEQUENCE) 

23 

24 

25class _HyperlinkAction(enum.Enum): 

26 """Outcome of processing an OSC 8 hyperlink unit.""" 

27 

28 NO_CLOSE = enum.auto() # open sequence without matching close 

29 EMPTY = enum.auto() # hyperlink with no visible inner text 

30 OUTSIDE = enum.auto() # hyperlink entirely outside the clip window 

31 VISIBLE = enum.auto() # hyperlink overlaps the clip window 

32 

33 

34class _HyperlinkResult(NamedTuple): 

35 """ 

36 Result of processing an OSC 8 hyperlink. 

37 

38 Only the fields relevant to each action are populated. 

39 """ 

40 

41 action: _HyperlinkAction 

42 close_end: int = 0 

43 inner_width: int = 0 

44 open_seq: str = '' 

45 clipped_inner: str = '' 

46 close_seq: str = '' 

47 clipped_width: int = 0 

48 hl_col_end: int = 0 

49 

50 

51def _apply_sgr_wrap(result: str, captured_style: Optional[_SGRState]) -> str: 

52 """ 

53 Apply SGR prefix/suffix around *result*. 

54 

55 If an SGR state was captured at the first visible character, prefix the result with the 

56 corresponding SGR sequence and suffix with a reset if any styles are active. 

57 """ 

58 if captured_style is not None: 

59 if prefix := _sgr_state_to_sequence(captured_style): 

60 result = prefix + result 

61 if _sgr_state_is_active(captured_style): 

62 result += '\x1b[0m' 

63 return result 

64 

65 

66def _process_hyperlink( 

67 text: str, 

68 start: int, 

69 end: int, 

70 fillchar: str, 

71 tabsize: int, 

72 ambiguous_width: int, 

73 term_program: bool | str, 

74 control_codes: Literal['parse', 'strict', 'ignore'], 

75 *, 

76 params: HyperlinkParams, 

77 match_end: int, 

78 col: int, 

79) -> _HyperlinkResult: 

80 """ 

81 Process an OSC 8 hyperlink unit. 

82 

83 Finds the matching close sequence, measures the inner text width, and determines whether the 

84 hyperlink is empty, outside the clip window, or visible (requiring inner-text clipping). 

85 """ 

86 # pylint: disable=too-many-locals,too-many-positional-arguments,too-many-arguments 

87 close_start, close_end = Hyperlink.find_close(text, match_end) 

88 if (close_start, close_end) == (-1, -1): 

89 return _HyperlinkResult(_HyperlinkAction.NO_CLOSE) 

90 inner_text = text[match_end:close_start] 

91 inner_width = width( 

92 inner_text, control_codes=control_codes, 

93 tabsize=tabsize, ambiguous_width=ambiguous_width, 

94 term_program=term_program, 

95 ) 

96 

97 if inner_width == 0: 

98 return _HyperlinkResult(_HyperlinkAction.EMPTY, close_end=close_end) 

99 

100 hl_col_end = col + inner_width 

101 

102 if hl_col_end <= start or col >= end: 

103 return _HyperlinkResult(_HyperlinkAction.OUTSIDE, close_end=close_end, 

104 inner_width=inner_width) 

105 

106 inner_clip_start = max(0, start - col) 

107 inner_clip_end = end - col 

108 

109 clipped_inner = clip( 

110 inner_text, inner_clip_start, inner_clip_end, 

111 fillchar=fillchar, tabsize=tabsize, 

112 ambiguous_width=ambiguous_width, 

113 term_program=term_program, 

114 propagate_sgr=False, 

115 control_codes=control_codes, 

116 ) 

117 

118 clipped_width = width( 

119 clipped_inner, control_codes=control_codes, 

120 tabsize=tabsize, ambiguous_width=ambiguous_width, 

121 term_program=term_program, 

122 ) 

123 

124 return _HyperlinkResult( 

125 _HyperlinkAction.VISIBLE, 

126 close_end=close_end, 

127 inner_width=inner_width, 

128 open_seq=params.make_open(), 

129 clipped_inner=clipped_inner, 

130 close_seq=params.make_close(), 

131 clipped_width=clipped_width, 

132 hl_col_end=hl_col_end, 

133 ) 

134 

135 

136def _reconstruct_painter( 

137 cells: dict[int, tuple[str, int]], 

138 sequences: list[tuple[int, int, str]], 

139 start: int, 

140 end: int, 

141 fillchar: str, 

142) -> str: 

143 """ 

144 Reconstruct the output string from painter's algorithm state. 

145 

146 Walks columns left-to-right, interleaving escape sequences and cell content, filling gaps with 

147 *fillchar*. 

148 """ 

149 # pylint: disable=too-many-locals 

150 # Group and sort sequences by column, preserving insertion order within each. 

151 seqs_by_col: dict[int, list[tuple[int, str]]] = {} 

152 for col_pos, order, seq_text in sequences: 

153 seqs_by_col.setdefault(col_pos, []).append((order, seq_text)) 

154 for entries in seqs_by_col.values(): 

155 entries.sort() 

156 

157 max_cell_col = max(cells.keys()) if cells else -1 

158 max_seq_col = max(seqs_by_col.keys()) if seqs_by_col else -1 

159 max_col = max(max_cell_col, max_seq_col) 

160 

161 parts: list[str] = [] 

162 walk_col = 0 

163 col_limit = min(max_col, end) 

164 while walk_col <= col_limit: 

165 # Emit any sequences anchored at this column. 

166 for _, seq_text in seqs_by_col.get(walk_col, ()): 

167 parts.append(seq_text) 

168 

169 if walk_col >= end: 

170 walk_col += 1 

171 continue 

172 

173 if walk_col in cells: 

174 cell_text, cell_w = cells[walk_col] 

175 parts.append(cell_text) 

176 walk_col += cell_w 

177 else: 

178 if start <= walk_col <= max_cell_col: 

179 parts.append(fillchar) 

180 walk_col += 1 

181 

182 # Emit sequences anchored beyond the visible region. 

183 for c in sorted(seqs_by_col.keys()): 

184 if c > col_limit: 

185 for _, seq_text in seqs_by_col[c]: 

186 parts.append(seq_text) 

187 

188 return ''.join(parts) 

189 

190 

191def _clip_simple( 

192 text: str, 

193 start: int, 

194 end: int, 

195 *, 

196 propagate_sgr: bool, 

197 ambiguous_width: int, 

198 term_program: bool | str, 

199 fillchar: str, 

200 tabsize: int, 

201 strict: bool, 

202 control_codes: Literal['parse', 'strict', 'ignore'], 

203) -> tuple[str, Optional[_SGRState]]: 

204 """ 

205 Clip text without cursor movement (simple append-to-output path). 

206 

207 Returns ``(result, captured_style)``. The caller applies SGR wrapping. 

208 """ 

209 # pylint: disable=too-complex,too-many-locals,too-many-branches,too-many-statements 

210 # pylint: disable=too-many-nested-blocks 

211 # code length and complexity traded for performance, to allow this to be used as a "hot path" 

212 

213 output: list[str] = [] 

214 col = 0 

215 idx = 0 

216 # captured_style is a frozen snapshot of current_style taken at the first 

217 # visible character emitted within the clip window (start, end). It stays 

218 # None until that point. current_style, by contrast, is continuously 

219 # updated by SGR sequences throughout the scan. The snapshot is what the 

220 # caller uses to wrap the result in the correct SGR state. 

221 # 

222 # When propagate_sgr is False, current_style (and therefore captured_style) 

223 # remain None, and SGR sequences pass through as literal text. 

224 captured_style: Optional[_SGRState] = None 

225 current_style = _SGR_STATE_DEFAULT if propagate_sgr else None 

226 

227 while idx < len(text): 

228 char = text[idx] 

229 

230 # Early exit: past visible region. 

231 if col >= end and char not in '\r\x08\t\x1b': 

232 if captured_style is not None: 

233 break 

234 # propagate_sgr is always False here: with propagate_sgr=True, 

235 # captured_style is set on the first visible emission in the 

236 # clip window and we would have broken above. The skip-ahead 

237 # optimization is only needed (and safe) when SGR tracking is off. 

238 next_esc = text.find('\x1b', idx + 1) 

239 if next_esc == -1: 

240 break 

241 idx = next_esc 

242 continue 

243 

244 if char == '\x1b': 

245 m = _SEQUENCE_CLASSIFY.match(text, idx) 

246 if not m: 

247 output.append(char) 

248 idx += 1 

249 continue 

250 

251 # SGR: update current_style, do not emit. 

252 if m.group('sgr_params') is not None and propagate_sgr and current_style is not None: 

253 current_style = _sgr_state_update(current_style, m.group()) 

254 idx = m.end() 

255 continue 

256 

257 # OSC 8 hyperlink. 

258 if hl_state := HyperlinkParams.parse(m.group()): 

259 r = _process_hyperlink( 

260 text, start, end, fillchar, tabsize, ambiguous_width, 

261 term_program, 

262 control_codes, 

263 params=hl_state, match_end=m.end(), col=col, 

264 ) 

265 if r.action is _HyperlinkAction.NO_CLOSE: 

266 output.append(m.group()) 

267 idx = m.end() 

268 elif r.action is _HyperlinkAction.EMPTY: 

269 idx = r.close_end 

270 elif r.action is _HyperlinkAction.OUTSIDE: 

271 col += r.inner_width 

272 idx = r.close_end 

273 else: 

274 output.append(r.open_seq) 

275 output.append(r.clipped_inner) 

276 output.append(r.close_seq) 

277 if propagate_sgr and captured_style is None: 

278 captured_style = current_style 

279 col += r.inner_width 

280 idx = r.close_end 

281 continue 

282 

283 # OSC 66 Text Sizing. 

284 if (ts_meta := m.group('ts_meta')) is not None: 

285 ts_text = m.group('ts_text') 

286 ts_term = m.group('ts_term') 

287 assert ts_text is not None and ts_term is not None 

288 ts = TextSizing( 

289 TextSizingParams.from_params(ts_meta, control_codes=control_codes), 

290 ts_text, ts_term) 

291 ts_width = ts.display_width(ambiguous_width) 

292 

293 if col >= start and col + ts_width <= end: 

294 output.append(ts.make_sequence()) 

295 if propagate_sgr and captured_style is None: 

296 captured_style = current_style 

297 col += ts_width 

298 elif col < end and col + ts_width > start: 

299 ts_parts: list[str] = [] 

300 

301 def _ts_write(s: str, _w: int, _col: int) -> None: 

302 ts_parts.append(s) 

303 col = _text_sizing_clip( 

304 ts, col, start, end, fillchar, ambiguous_width, 

305 term_program, 

306 _ts_write) 

307 output.extend(ts_parts) 

308 if propagate_sgr and captured_style is None: 

309 captured_style = current_style 

310 else: 

311 col += ts_width 

312 idx = m.end() 

313 continue 

314 

315 # Indeterminate-effect sequences: raise in strict mode. 

316 seq = m.group() 

317 if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq): 

318 raise ValueError( 

319 f"Indeterminate cursor sequence at position {idx}, " 

320 f"{seq!r}" 

321 ) 

322 

323 # Any other recognized sequence: preserve as-is. 

324 output.append(seq) 

325 idx = m.end() 

326 continue 

327 

328 if char == '\t': 

329 # Expand tab, filling clip window with spaces. 

330 if tabsize > 0: 

331 next_tab = col + (tabsize - (col % tabsize)) 

332 while col < next_tab: 

333 if start <= col < end: 

334 output.append(' ') 

335 if propagate_sgr and captured_style is None: 

336 captured_style = current_style 

337 col += 1 

338 else: 

339 output.append('\t') 

340 idx += 1 

341 continue 

342 

343 grapheme = next(iter_graphemes(text, start=idx)) 

344 grapheme_w = width(grapheme, ambiguous_width=ambiguous_width, 

345 term_program=term_program) 

346 

347 # Emit grapheme or fillchar depending on visibility within clip window. 

348 if grapheme_w == 0: 

349 if start <= col < end: 

350 output.append(grapheme) 

351 elif col >= start and col + grapheme_w <= end: 

352 output.append(grapheme) 

353 if propagate_sgr and captured_style is None: 

354 captured_style = current_style 

355 elif col < end and col + grapheme_w > start: 

356 output.append(fillchar * (min(end, col + grapheme_w) - max(start, col))) 

357 if propagate_sgr and captured_style is None: 

358 captured_style = current_style 

359 

360 col += grapheme_w 

361 idx += len(grapheme) 

362 

363 return ''.join(output), captured_style 

364 

365 

366def _text_sizing_clip( 

367 ts: TextSizing, 

368 col: int, 

369 start: int, 

370 end: int, 

371 fillchar: str, 

372 ambiguous_width: int, 

373 term_program: bool | str, 

374 write_cells: Callable[[str, int, int], None], 

375) -> int: 

376 """ 

377 Emit tokens for a text-sizing (OSC 66) sequence, clipped to (start, end). 

378 

379 Calls *write_cells(text, width, col)* for each emitted cell or sequence. Returns new column 

380 position. 

381 """ 

382 # pylint: disable=too-many-locals,too-many-branches,too-many-positional-arguments,too-complex 

383 ts_width = ts.display_width(ambiguous_width) 

384 

385 # Fully visible: emit entire sequence 

386 if col >= start and col + ts_width <= end: 

387 write_cells(ts.make_sequence(), ts_width, col) 

388 return col + ts_width 

389 # Fully outside: just advance column 

390 if col >= end or col + ts_width <= start: 

391 return col + ts_width 

392 

393 # Partial overlap: decompose 

394 rel_start = max(0, start - col) 

395 rel_end = min(end, col + ts_width) - col 

396 scale = ts.params.scale 

397 

398 units: list[tuple[str, int]] = [] 

399 if ts.params.width > 0: 

400 for g in islice(iter_graphemes(ts.text), ts.params.width): 

401 units.append((g, scale)) 

402 for _ in range(ts.params.width - len(units)): 

403 units.append(('', scale)) 

404 else: 

405 for g in iter_graphemes(ts.text): 

406 units.append( 

407 (g, width(g, ambiguous_width=ambiguous_width, 

408 term_program=term_program) * scale)) 

409 

410 pending_units: list[tuple[str, int]] = [] 

411 

412 def flush(flush_col: int) -> None: 

413 if not pending_units: 

414 return 

415 texts = [u[0] for u in pending_units] 

416 total_w = sum(u[1] for u in pending_units) 

417 params = TextSizingParams( 

418 scale, 

419 len(texts) if ts.params.width > 0 else 0, 

420 ts.params.numerator, ts.params.denominator, 

421 ts.params.vertical_align, ts.params.horizontal_align) 

422 write_cells( 

423 TextSizing(params, ''.join(texts), ts.terminator).make_sequence(), 

424 total_w, 

425 flush_col) 

426 pending_units.clear() 

427 

428 flush_col_pos = col + rel_start 

429 unit_pos = 0 

430 for unit_text, unit_w in units: 

431 unit_end = unit_pos + unit_w 

432 if unit_end <= rel_start: 

433 unit_pos = unit_end 

434 continue 

435 if unit_pos >= rel_end: 

436 break 

437 

438 overlap = min(unit_end, rel_end) - max(unit_pos, rel_start) 

439 if overlap == unit_w and unit_w > 0: 

440 if not pending_units: 

441 flush_col_pos = col + max(unit_pos, rel_start) 

442 pending_units.append((unit_text, unit_w)) 

443 else: 

444 flush(flush_col_pos) 

445 abs_start = col + max(unit_pos, rel_start) 

446 for i in range(overlap): 

447 write_cells(fillchar, 1, abs_start + i) 

448 unit_pos = unit_end 

449 

450 flush(flush_col_pos) 

451 return col + ts_width 

452 

453 

454def _clip_painter( 

455 text: str, 

456 start: int, 

457 end: int, 

458 *, 

459 propagate_sgr: bool, 

460 ambiguous_width: int, 

461 term_program: bool | str, 

462 fillchar: str, 

463 tabsize: int, 

464 strict: bool, 

465 control_codes: Literal['parse', 'strict', 'ignore'], 

466) -> tuple[str, Optional[_SGRState]]: 

467 """ 

468 Clip text with cursor movement (painter's algorithm path). 

469 

470 Returns ``(result, captured_style)``. The caller applies SGR wrapping. 

471 """ 

472 # pylint: disable=too-complex,too-many-locals,too-many-branches 

473 # pylint: disable=too-many-statements,too-many-nested-blocks 

474 # code length and complexity traded for performance, to allow this to be used as a "hot path" 

475 

476 cells: dict[int, tuple[str, int]] = {} 

477 hyperlink_cells: set[int] = set() 

478 sequences: list[tuple[int, int, str]] = [] 

479 seq_order = 0 

480 

481 col = 0 

482 idx = 0 

483 # captured_style is a frozen snapshot of current_style taken at the first 

484 # visible character emitted within the clip window (start, end). It stays 

485 # None until that point. current_style, by contrast, is continuously 

486 # updated by SGR sequences throughout the scan. 

487 # 

488 # When propagate_sgr is False, current_style (and therefore captured_style) 

489 # remain None, and SGR sequences pass through as literal text. 

490 captured_style: Optional[_SGRState] = None 

491 current_style = _SGR_STATE_DEFAULT if propagate_sgr else None 

492 

493 def _write_cells(s: str, w: int, write_col: int, 

494 is_hyperlink: bool = False) -> None: 

495 """Write *w* cells of text *s* at *write_col*, handling wide-char splitting.""" 

496 nonlocal captured_style 

497 for offset in range(w): 

498 src_col = write_col + offset 

499 if src_col > 0 and cells.get(src_col - 1, ('', 0))[1] == 2: 

500 cells[src_col - 1] = (fillchar, 1) 

501 hyperlink_cells.discard(src_col - 1) 

502 if cells.get(src_col, ('', 0))[1] == 2: 

503 cells[src_col + 1] = (fillchar, 1) 

504 hyperlink_cells.discard(src_col + 1) 

505 cells.pop(src_col, None) 

506 hyperlink_cells.discard(src_col) 

507 cells[write_col] = (s, w) 

508 if is_hyperlink: 

509 for offset in range(w): 

510 hyperlink_cells.add(write_col + offset) 

511 if propagate_sgr and captured_style is None: 

512 captured_style = current_style 

513 

514 while idx < len(text): 

515 char = text[idx] 

516 

517 # Early exit: past visible region, SGR captured, no escape ahead. 

518 if col >= end and captured_style is not None and char != '\x1b': 

519 break 

520 

521 if char == '\x1b': 

522 m = _SEQUENCE_CLASSIFY.match(text, idx) 

523 if not m: 

524 # Record lone ESC as a zero-width sequence at current column. 

525 sequences.append((col, seq_order, char)) 

526 seq_order += 1 

527 if propagate_sgr and captured_style is None: 

528 captured_style = current_style 

529 idx += 1 

530 continue 

531 

532 # SGR: update current_style, do not emit. 

533 if m.group('sgr_params') is not None and propagate_sgr and current_style is not None: 

534 current_style = _sgr_state_update(current_style, m.group()) 

535 idx = m.end() 

536 continue 

537 

538 # OSC 8 hyperlink. 

539 if hl_state := HyperlinkParams.parse(m.group()): 

540 r = _process_hyperlink( 

541 text, start, end, fillchar, tabsize, ambiguous_width, 

542 term_program, 

543 control_codes, 

544 params=hl_state, match_end=m.end(), col=col, 

545 ) 

546 if r.action is _HyperlinkAction.NO_CLOSE: 

547 sequences.append((col, seq_order, m.group())) 

548 seq_order += 1 

549 if propagate_sgr and captured_style is None: 

550 captured_style = current_style 

551 idx = m.end() 

552 elif r.action is _HyperlinkAction.EMPTY: 

553 idx = r.close_end 

554 elif r.action is _HyperlinkAction.OUTSIDE: 

555 col += r.inner_width 

556 idx = r.close_end 

557 else: 

558 sequences.append((col, seq_order, r.open_seq)) 

559 seq_order += 1 

560 if propagate_sgr and captured_style is None: 

561 captured_style = current_style 

562 _write_cells(r.clipped_inner, r.clipped_width, col, 

563 is_hyperlink=True) 

564 col += r.clipped_width 

565 sequences.append((col, seq_order, r.close_seq)) 

566 seq_order += 1 

567 col = r.hl_col_end 

568 idx = r.close_end 

569 continue 

570 

571 # OSC 66 Text Sizing. 

572 if (ts_meta := m.group('ts_meta')) is not None: 

573 ts_text = m.group('ts_text') 

574 ts_term = m.group('ts_term') 

575 assert ts_text is not None and ts_term is not None 

576 ts = TextSizing( 

577 TextSizingParams.from_params(ts_meta, control_codes=control_codes), 

578 ts_text, ts_term) 

579 col = _text_sizing_clip( 

580 ts, col, start, end, fillchar, ambiguous_width, 

581 term_program, 

582 _write_cells) 

583 if propagate_sgr and captured_style is None: 

584 captured_style = current_style 

585 idx = m.end() 

586 continue 

587 

588 # Indeterminate-effect sequences: raise in strict mode. 

589 seq = m.group() 

590 if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq): 

591 raise ValueError( 

592 f"Indeterminate cursor sequence at position {idx}, " 

593 f"{seq!r}" 

594 ) 

595 

596 # Horizontal Position Absolute (CSI n G). 

597 if (hpa_n := m.group('hpa_n')) is not None: 

598 col = int(hpa_n) - 1 if hpa_n else 0 

599 idx = m.end() 

600 continue 

601 

602 # Cursor Forward (CSI n C). 

603 if (cforward_n := m.group('cforward_n')) is not None: 

604 n_forward = int(cforward_n) if cforward_n else 1 

605 move_end = col + n_forward 

606 if col < end and move_end > start: 

607 for i in range(max(col, start), min(move_end, end)): 

608 _write_cells(fillchar, 1, i) 

609 col = move_end 

610 idx = m.end() 

611 continue 

612 

613 # Cursor Backward (CSI n D). 

614 if (cbackward_n := m.group('cbackward_n')) is not None: 

615 n_backward = int(cbackward_n) if cbackward_n else 1 

616 if strict and n_backward > col: 

617 raise ValueError( 

618 f"Cursor left movement at position {idx} would move " 

619 f"{n_backward} cells left from column {col}, " 

620 f"exceeding string start" 

621 ) 

622 col -= n_backward 

623 if col < 0: 

624 col = 0 

625 idx = m.end() 

626 continue 

627 

628 # Any other recognized sequence: preserve as-is. 

629 sequences.append((col, seq_order, m.group())) 

630 seq_order += 1 

631 if propagate_sgr and captured_style is None: 

632 captured_style = current_style 

633 idx = m.end() 

634 continue 

635 

636 # Carriage return. 

637 if char == '\r': 

638 col = 0 

639 idx += 1 

640 continue 

641 

642 # Backspace. 

643 if char == '\x08': 

644 if col > 0: 

645 col -= 1 

646 idx += 1 

647 continue 

648 

649 # Tab expansion. 

650 if char == '\t': 

651 if tabsize > 0: 

652 next_tab = col + (tabsize - (col % tabsize)) 

653 while col < next_tab: 

654 if start <= col < end: 

655 _write_cells(fillchar, 1, col) 

656 col += 1 

657 else: 

658 sequences.append((col, seq_order, '\t')) 

659 seq_order += 1 

660 if propagate_sgr and captured_style is None: 

661 captured_style = current_style 

662 idx += 1 

663 continue 

664 

665 # Grapheme cluster. 

666 grapheme = next(iter_graphemes(text, start=idx)) 

667 grapheme_w = width(grapheme, ambiguous_width=ambiguous_width, 

668 term_program=term_program) 

669 

670 # Emit grapheme or fillchar depending on visibility within clip window. 

671 if grapheme_w == 0: 

672 if start <= col < end: 

673 sequences.append((col, seq_order, grapheme)) 

674 seq_order += 1 

675 if propagate_sgr and captured_style is None: 

676 captured_style = current_style 

677 elif col >= start and col + grapheme_w <= end: 

678 _write_cells(grapheme, grapheme_w, col) 

679 elif col < end and col + grapheme_w > start: 

680 clip_start = max(start, col) 

681 for offset in range(min(end, col + grapheme_w) - clip_start): 

682 _write_cells(fillchar, 1, clip_start + offset) 

683 

684 col += grapheme_w 

685 idx += len(grapheme) 

686 

687 return _reconstruct_painter(cells, sequences, start, end, fillchar), captured_style 

688 

689 

690def clip( 

691 text: str, 

692 start: int, 

693 end: int, 

694 *, 

695 fillchar: str = ' ', 

696 tabsize: int = 8, 

697 ambiguous_width: int = 1, 

698 propagate_sgr: bool = True, 

699 control_codes: Literal['parse', 'strict', 'ignore'] = 'parse', 

700 overtyping: Optional[bool] = None, 

701 term_program: bool | str = False, 

702) -> str: 

703 r""" 

704 Clip text to display columns (start, end) while preserving all terminal sequences. 

705 

706 This function extracts a substring based on visible column positions rather than 

707 character indices. Terminal escape sequences are preserved in the output since 

708 they have zero display width. If a wide character (width 2) is split at 

709 either boundary, it is replaced with ``fillchar``. 

710 

711 TAB characters (``\t``) are expanded to spaces up to the next tab stop, 

712 controlled by the ``tabsize`` parameter. When cursor movement is detected, 

713 a "painter's algorithm" is used, cursor movements actively change the write 

714 position, allowing cursor-left and carriage return to overwrite previously 

715 written cells. It is assumed that ``text`` begins at column 0. 

716 

717 **OSC 8 hyperlinks** are handled specially: the visible text inside a hyperlink 

718 is clipped to the requested column range, and the hyperlink is rebuilt around 

719 the clipped text. Empty hyperlinks (those with no remaining visible text after 

720 clipping) are removed:: 

721 

722 >>> clip('\x1b]8;;http://example.com\x07Click This link\x1b]8;;\x07', 6, 10) 

723 '\x1b]8;;http://example.com\x07This\x1b]8;;\x07' 

724 

725 :param text: String to clip, may contain terminal escape sequences. 

726 :param start: Absolute starting column (inclusive, 0-indexed). 

727 :param end: Absolute ending column (exclusive). 

728 :param fillchar: Character to use when a wide character must be split at 

729 a boundary (default space). Must have display width of 1. 

730 :param tabsize: Tab stop width (default 8). Set to 0 to pass tabs through 

731 as zero-width (preserved in output but don't advance column position). 

732 :param ambiguous_width: Width to use for East Asian Ambiguous (A) 

733 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. 

734 :param propagate_sgr: If True (default), SGR (terminal styling) sequences 

735 are propagated. The result begins with any active style at the start 

736 position and ends with a reset sequence if styles are active. 

737 :param control_codes: How to handle control characters and sequences: 

738 

739 - ``'parse'`` (default): Track horizontal cursor movement and clip 

740 hyperlink text. Cursor overwrite is always allowed, with best effort 

741 results; indeterminate sequences (home, clear, reset, etc.) are 

742 preserved as zero-width. 

743 - ``'strict'``: Like ``parse``, but raises :exc:`ValueError` on 

744 sequences with indeterminate effects (cursor home, clear screen, 

745 reset, vertical movement, etc.) matching :func:`width` behavior. 

746 Also raises on out-of-bounds horizontal cursor movement. 

747 - ``'ignore'``: All control characters are treated as zero-width. 

748 Cursor movement is not tracked (fastest path). 

749 

750 :param overtyping: Whether to use the painter's algorithm for cursor 

751 movement (``\b`` backspace, ``\r`` carriage return, and CSI cursor 

752 left/right/position sequences). When ``None`` (default), auto-detects 

753 by scanning for these characters in *text*. Set to ``False`` for improved 

754 performance when the caller knows *text* contains no cursor movement 

755 characters. Set to ``True`` to force the painter's algorithm (useful 

756 for testing). Has no effect when ``control_codes='ignore'``. 

757 :param term_program: Terminal software identifier for table correction. 

758 ``False`` (default) disables override lookup. ``True`` reads the 

759 ``TERM_PROGRAM`` or ``TERM`` environment variable for auto-detection. 

760 Accepts a canonical terminal name matching :func:`list_term_programs`, 

761 such as from XTVERSION_, ENQ_, or ``TERM_PROGRAM``. 

762 

763 .. versionadded:: 0.8.0 

764 

765 :returns: Substring of ``text`` spanning display columns (start, end), 

766 with all terminal sequences preserved and wide characters at boundaries 

767 replaced with ``fillchar``. 

768 

769 :raises ValueError: If ``control_codes='strict'`` and an indeterminate-effect 

770 sequence or out-of-bounds cursor movement is encountered. 

771 

772 SGR (terminal styling) sequences are propagated by default. The result 

773 begins with any active style and ends with a reset:: 

774 

775 >>> clip('\x1b[1;34mHello world\x1b[0m', 6, 11) 

776 '\x1b[1;34mworld\x1b[0m' 

777 

778 Set ``propagate_sgr=False`` to disable this behavior. 

779 

780 .. versionadded:: 0.3.0 

781 

782 .. versionchanged:: 0.5.0 

783 Added ``propagate_sgr`` parameter (default True). 

784 

785 .. versionchanged:: 0.7.0 

786 Added ``control_codes`` parameter (default 'parse'). 

787 OSC 8 hyperlink-aware clipping. OSC 66 text sizing protocol support. 

788 Added ``overtyping`` parameter (default None, auto-detect). 

789 

790 Example:: 

791 

792 >>> clip('hello world', 0, 5) 

793 'hello' 

794 >>> clip('中文字', 0, 3) # Wide char split at column 3 

795 '中 ' 

796 >>> clip('a\tb', 0, 10) # Tab expanded to spaces 

797 'a b' 

798 """ 

799 start = max(start, 0) 

800 if end <= start: 

801 return '' 

802 

803 # Fast path: printable ASCII only. 

804 if text.isascii() and text.isprintable(): 

805 return text[start:end] 

806 

807 # No escape sequences => no SGR tracking needed. 

808 has_esc = '\x1b' in text 

809 if propagate_sgr and not has_esc: 

810 propagate_sgr = False 

811 

812 # Determine whether painter's algorithm is needed. 

813 if overtyping is None: 

814 # Auto-detect: scan for cursor movement characters. 

815 overtyping = ( 

816 control_codes != 'ignore' and 

817 ('\x08' in text or '\r' in text or 

818 (has_esc and bool(_HORIZONTAL_CURSOR_MOVEMENT.search(text)))) 

819 ) 

820 elif overtyping and control_codes == 'ignore': 

821 overtyping = False # control_codes='ignore' overrides 

822 fn_clip = _clip_painter if overtyping else _clip_simple 

823 

824 return _apply_sgr_wrap(*fn_clip( 

825 text=text, 

826 start=start, 

827 end=end, 

828 propagate_sgr=propagate_sgr, 

829 ambiguous_width=ambiguous_width, 

830 term_program=term_program, 

831 fillchar=fillchar, 

832 tabsize=tabsize, 

833 strict=(control_codes == 'strict'), 

834 control_codes=control_codes, 

835 ))