Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/_clip.py: 8%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

386 statements  

1"""This is a python implementation of clip().""" 

2from __future__ import annotations 

3 

4# std imports 

5import enum 

6from itertools import islice 

7 

8from typing import Literal, Callable, Optional, NamedTuple 

9 

10# local 

11from ._width import width 

12from .grapheme import iter_graphemes 

13from .hyperlink import Hyperlink, HyperlinkParams 

14from .sgr_state import (_SGR_STATE_DEFAULT, 

15 _SGRState, 

16 _sgr_state_update, 

17 _sgr_state_is_active, 

18 _sgr_state_to_sequence) 

19from .text_sizing import TextSizing, TextSizingParams 

20from .escape_sequences import (_SEQUENCE_CLASSIFY, 

21 _HORIZONTAL_CURSOR_MOVEMENT, 

22 INDETERMINATE_EFFECT_SEQUENCE) 

23 

24 

25class _HyperlinkAction(enum.Enum): 

26 """Outcome of processing an OSC 8 hyperlink unit.""" 

27 

28 NO_CLOSE = enum.auto() # open sequence without matching close 

29 EMPTY = enum.auto() # hyperlink with no visible inner text 

30 OUTSIDE = enum.auto() # hyperlink entirely outside the clip window 

31 VISIBLE = enum.auto() # hyperlink overlaps the clip window 

32 

33 

34class _HyperlinkResult(NamedTuple): 

35 """ 

36 Result of processing an OSC 8 hyperlink. 

37 

38 Only the fields relevant to each action are populated. 

39 """ 

40 

41 action: _HyperlinkAction 

42 close_end: int = 0 

43 inner_width: int = 0 

44 open_seq: str = '' 

45 clipped_inner: str = '' 

46 close_seq: str = '' 

47 clipped_width: int = 0 

48 hl_col_end: int = 0 

49 

50 

51def _apply_sgr_wrap(result: str, captured_style: Optional[_SGRState]) -> str: 

52 """ 

53 Apply SGR prefix/suffix around *result*. 

54 

55 If an SGR state was captured at the first visible character, prefix the result with the 

56 corresponding SGR sequence and suffix with a reset if any styles are active. 

57 """ 

58 if captured_style is not None: 

59 if prefix := _sgr_state_to_sequence(captured_style): 

60 result = prefix + result 

61 if _sgr_state_is_active(captured_style): 

62 result += '\x1b[0m' 

63 return result 

64 

65 

66def _process_hyperlink( 

67 text: str, 

68 start: int, 

69 end: int, 

70 fillchar: str, 

71 tabsize: int, 

72 ambiguous_width: int, 

73 control_codes: Literal['parse', 'strict', 'ignore'], 

74 *, 

75 params: HyperlinkParams, 

76 match_end: int, 

77 col: int, 

78) -> _HyperlinkResult: 

79 """ 

80 Process an OSC 8 hyperlink unit. 

81 

82 Finds the matching close sequence, measures the inner text width, and determines whether the 

83 hyperlink is empty, outside the clip window, or visible (requiring inner-text clipping). 

84 """ 

85 # pylint: disable=too-many-locals,too-many-positional-arguments 

86 close_start, close_end = Hyperlink.find_close(text, match_end) 

87 if (close_start, close_end) == (-1, -1): 

88 return _HyperlinkResult(_HyperlinkAction.NO_CLOSE) 

89 inner_text = text[match_end:close_start] 

90 inner_width = width( 

91 inner_text, control_codes=control_codes, 

92 tabsize=tabsize, ambiguous_width=ambiguous_width, 

93 ) 

94 

95 if inner_width == 0: 

96 return _HyperlinkResult(_HyperlinkAction.EMPTY, close_end=close_end) 

97 

98 hl_col_end = col + inner_width 

99 

100 if hl_col_end <= start or col >= end: 

101 return _HyperlinkResult(_HyperlinkAction.OUTSIDE, close_end=close_end, 

102 inner_width=inner_width) 

103 

104 inner_clip_start = max(0, start - col) 

105 inner_clip_end = end - col 

106 

107 clipped_inner = clip( 

108 inner_text, inner_clip_start, inner_clip_end, 

109 fillchar=fillchar, tabsize=tabsize, 

110 ambiguous_width=ambiguous_width, 

111 propagate_sgr=False, 

112 control_codes=control_codes, 

113 ) 

114 

115 clipped_width = width( 

116 clipped_inner, control_codes=control_codes, 

117 tabsize=tabsize, ambiguous_width=ambiguous_width, 

118 ) 

119 

120 return _HyperlinkResult( 

121 _HyperlinkAction.VISIBLE, 

122 close_end=close_end, 

123 inner_width=inner_width, 

124 open_seq=params.make_open(), 

125 clipped_inner=clipped_inner, 

126 close_seq=params.make_close(), 

127 clipped_width=clipped_width, 

128 hl_col_end=hl_col_end, 

129 ) 

130 

131 

132def _reconstruct_painter( 

133 cells: dict[int, tuple[str, int]], 

134 sequences: list[tuple[int, int, str]], 

135 start: int, 

136 end: int, 

137 fillchar: str, 

138) -> str: 

139 """ 

140 Reconstruct the output string from painter's algorithm state. 

141 

142 Walks columns left-to-right, interleaving escape sequences and cell content, filling gaps with 

143 *fillchar*. 

144 """ 

145 # pylint: disable=too-many-locals 

146 # Group and sort sequences by column, preserving insertion order within each. 

147 seqs_by_col: dict[int, list[tuple[int, str]]] = {} 

148 for col_pos, order, seq_text in sequences: 

149 seqs_by_col.setdefault(col_pos, []).append((order, seq_text)) 

150 for entries in seqs_by_col.values(): 

151 entries.sort() 

152 

153 max_cell_col = max(cells.keys()) if cells else -1 

154 max_seq_col = max(seqs_by_col.keys()) if seqs_by_col else -1 

155 max_col = max(max_cell_col, max_seq_col) 

156 

157 parts: list[str] = [] 

158 walk_col = 0 

159 col_limit = min(max_col, end) 

160 while walk_col <= col_limit: 

161 # Emit any sequences anchored at this column. 

162 for _, seq_text in seqs_by_col.get(walk_col, ()): 

163 parts.append(seq_text) 

164 

165 if walk_col >= end: 

166 walk_col += 1 

167 continue 

168 

169 if walk_col in cells: 

170 cell_text, cell_w = cells[walk_col] 

171 parts.append(cell_text) 

172 walk_col += cell_w 

173 else: 

174 if start <= walk_col <= max_cell_col: 

175 parts.append(fillchar) 

176 walk_col += 1 

177 

178 # Emit sequences anchored beyond the visible region. 

179 for c in sorted(seqs_by_col.keys()): 

180 if c > col_limit: 

181 for _, seq_text in seqs_by_col[c]: 

182 parts.append(seq_text) 

183 

184 return ''.join(parts) 

185 

186 

187def _clip_simple( 

188 text: str, 

189 start: int, 

190 end: int, 

191 *, 

192 propagate_sgr: bool, 

193 ambiguous_width: int, 

194 fillchar: str, 

195 tabsize: int, 

196 strict: bool, 

197 control_codes: Literal['parse', 'strict', 'ignore'], 

198) -> tuple[str, Optional[_SGRState]]: 

199 """ 

200 Clip text without cursor movement (simple append-to-output path). 

201 

202 Returns ``(result, captured_style)``. The caller applies SGR wrapping. 

203 """ 

204 # pylint: disable=too-complex,too-many-locals,too-many-branches,too-many-statements 

205 # pylint: disable=too-many-nested-blocks 

206 # code length and complexity traded for performance, to allow this to be used as a "hot path" 

207 

208 output: list[str] = [] 

209 col = 0 

210 idx = 0 

211 # captured_style is a frozen snapshot of current_style taken at the first 

212 # visible character emitted within the clip window (start, end). It stays 

213 # None until that point. current_style, by contrast, is continuously 

214 # updated by SGR sequences throughout the scan. The snapshot is what the 

215 # caller uses to wrap the result in the correct SGR state. 

216 # 

217 # When propagate_sgr is False, current_style (and therefore captured_style) 

218 # remain None, and SGR sequences pass through as literal text. 

219 captured_style: Optional[_SGRState] = None 

220 current_style = _SGR_STATE_DEFAULT if propagate_sgr else None 

221 

222 while idx < len(text): 

223 char = text[idx] 

224 

225 # Early exit: past visible region. 

226 if col >= end and char not in '\r\x08\t\x1b': 

227 if captured_style is not None: 

228 break 

229 # propagate_sgr is always False here: with propagate_sgr=True, 

230 # captured_style is set on the first visible emission in the 

231 # clip window and we would have broken above. The skip-ahead 

232 # optimization is only needed (and safe) when SGR tracking is off. 

233 next_esc = text.find('\x1b', idx + 1) 

234 if next_esc == -1: 

235 break 

236 idx = next_esc 

237 continue 

238 

239 if char == '\x1b': 

240 m = _SEQUENCE_CLASSIFY.match(text, idx) 

241 if not m: 

242 output.append(char) 

243 idx += 1 

244 continue 

245 

246 # SGR: update current_style, do not emit. 

247 if m.group('sgr_params') is not None and propagate_sgr and current_style is not None: 

248 current_style = _sgr_state_update(current_style, m.group()) 

249 idx = m.end() 

250 continue 

251 

252 # OSC 8 hyperlink. 

253 if hl_state := HyperlinkParams.parse(m.group()): 

254 r = _process_hyperlink( 

255 text, start, end, fillchar, tabsize, ambiguous_width, 

256 control_codes, 

257 params=hl_state, match_end=m.end(), col=col, 

258 ) 

259 if r.action is _HyperlinkAction.NO_CLOSE: 

260 output.append(m.group()) 

261 idx = m.end() 

262 elif r.action is _HyperlinkAction.EMPTY: 

263 idx = r.close_end 

264 elif r.action is _HyperlinkAction.OUTSIDE: 

265 col += r.inner_width 

266 idx = r.close_end 

267 else: 

268 output.append(r.open_seq) 

269 output.append(r.clipped_inner) 

270 output.append(r.close_seq) 

271 if propagate_sgr and captured_style is None: 

272 captured_style = current_style 

273 col += r.inner_width 

274 idx = r.close_end 

275 continue 

276 

277 # OSC 66 Text Sizing. 

278 if (ts_meta := m.group('ts_meta')) is not None: 

279 ts_text = m.group('ts_text') 

280 ts_term = m.group('ts_term') 

281 assert ts_text is not None and ts_term is not None 

282 ts = TextSizing( 

283 TextSizingParams.from_params(ts_meta, control_codes=control_codes), 

284 ts_text, ts_term) 

285 ts_width = ts.display_width(ambiguous_width) 

286 

287 if col >= start and col + ts_width <= end: 

288 output.append(ts.make_sequence()) 

289 if propagate_sgr and captured_style is None: 

290 captured_style = current_style 

291 col += ts_width 

292 elif col < end and col + ts_width > start: 

293 ts_parts: list[str] = [] 

294 

295 def _ts_write(s: str, _w: int, _col: int) -> None: 

296 ts_parts.append(s) 

297 col = _text_sizing_clip( 

298 ts, col, start, end, fillchar, ambiguous_width, 

299 _ts_write) 

300 output.extend(ts_parts) 

301 if propagate_sgr and captured_style is None: 

302 captured_style = current_style 

303 else: 

304 col += ts_width 

305 idx = m.end() 

306 continue 

307 

308 # Indeterminate-effect sequences: raise in strict mode. 

309 seq = m.group() 

310 if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq): 

311 raise ValueError( 

312 f"Indeterminate cursor sequence at position {idx}, " 

313 f"{seq!r}" 

314 ) 

315 

316 # Any other recognized sequence: preserve as-is. 

317 output.append(seq) 

318 idx = m.end() 

319 continue 

320 

321 if char == '\t': 

322 # Expand tab, filling clip window with spaces. 

323 if tabsize > 0: 

324 next_tab = col + (tabsize - (col % tabsize)) 

325 while col < next_tab: 

326 if start <= col < end: 

327 output.append(' ') 

328 if propagate_sgr and captured_style is None: 

329 captured_style = current_style 

330 col += 1 

331 else: 

332 output.append('\t') 

333 idx += 1 

334 continue 

335 

336 grapheme = next(iter_graphemes(text, start=idx)) 

337 grapheme_w = width(grapheme, ambiguous_width=ambiguous_width) 

338 

339 # Emit grapheme or fillchar depending on visibility within clip window. 

340 if grapheme_w == 0: 

341 if start <= col < end: 

342 output.append(grapheme) 

343 elif col >= start and col + grapheme_w <= end: 

344 output.append(grapheme) 

345 if propagate_sgr and captured_style is None: 

346 captured_style = current_style 

347 elif col < end and col + grapheme_w > start: 

348 output.append(fillchar * (min(end, col + grapheme_w) - max(start, col))) 

349 if propagate_sgr and captured_style is None: 

350 captured_style = current_style 

351 

352 col += grapheme_w 

353 idx += len(grapheme) 

354 

355 return ''.join(output), captured_style 

356 

357 

358def _text_sizing_clip( 

359 ts: TextSizing, 

360 col: int, 

361 start: int, 

362 end: int, 

363 fillchar: str, 

364 ambiguous_width: int, 

365 write_cells: Callable[[str, int, int], None], 

366) -> int: 

367 """ 

368 Emit tokens for a text-sizing (OSC 66) sequence, clipped to (start, end). 

369 

370 Calls *write_cells(text, width, col)* for each emitted cell or sequence. Returns new column 

371 position. 

372 """ 

373 # pylint: disable=too-many-locals,too-many-branches,too-many-positional-arguments,too-complex 

374 ts_width = ts.display_width(ambiguous_width) 

375 

376 # Fully visible: emit entire sequence 

377 if col >= start and col + ts_width <= end: 

378 write_cells(ts.make_sequence(), ts_width, col) 

379 return col + ts_width 

380 # Fully outside: just advance column 

381 if col >= end or col + ts_width <= start: 

382 return col + ts_width 

383 

384 # Partial overlap: decompose 

385 rel_start = max(0, start - col) 

386 rel_end = min(end, col + ts_width) - col 

387 scale = ts.params.scale 

388 

389 units: list[tuple[str, int]] = [] 

390 if ts.params.width > 0: 

391 for g in islice(iter_graphemes(ts.text), ts.params.width): 

392 units.append((g, scale)) 

393 for _ in range(ts.params.width - len(units)): 

394 units.append(('', scale)) 

395 else: 

396 for g in iter_graphemes(ts.text): 

397 units.append((g, width(g, ambiguous_width=ambiguous_width) * scale)) 

398 

399 pending_units: list[tuple[str, int]] = [] 

400 

401 def flush(flush_col: int) -> None: 

402 if not pending_units: 

403 return 

404 texts = [u[0] for u in pending_units] 

405 total_w = sum(u[1] for u in pending_units) 

406 params = TextSizingParams( 

407 scale, 

408 len(texts) if ts.params.width > 0 else 0, 

409 ts.params.numerator, ts.params.denominator, 

410 ts.params.vertical_align, ts.params.horizontal_align) 

411 write_cells( 

412 TextSizing(params, ''.join(texts), ts.terminator).make_sequence(), 

413 total_w, 

414 flush_col) 

415 pending_units.clear() 

416 

417 flush_col_pos = col + rel_start 

418 unit_pos = 0 

419 for unit_text, unit_w in units: 

420 unit_end = unit_pos + unit_w 

421 if unit_end <= rel_start: 

422 unit_pos = unit_end 

423 continue 

424 if unit_pos >= rel_end: 

425 break 

426 

427 overlap = min(unit_end, rel_end) - max(unit_pos, rel_start) 

428 if overlap == unit_w and unit_w > 0: 

429 if not pending_units: 

430 flush_col_pos = col + max(unit_pos, rel_start) 

431 pending_units.append((unit_text, unit_w)) 

432 else: 

433 flush(flush_col_pos) 

434 abs_start = col + max(unit_pos, rel_start) 

435 for i in range(overlap): 

436 write_cells(fillchar, 1, abs_start + i) 

437 unit_pos = unit_end 

438 

439 flush(flush_col_pos) 

440 return col + ts_width 

441 

442 

443def _clip_painter( 

444 text: str, 

445 start: int, 

446 end: int, 

447 *, 

448 propagate_sgr: bool, 

449 ambiguous_width: int, 

450 fillchar: str, 

451 tabsize: int, 

452 strict: bool, 

453 control_codes: Literal['parse', 'strict', 'ignore'], 

454) -> tuple[str, Optional[_SGRState]]: 

455 """ 

456 Clip text with cursor movement (painter's algorithm path). 

457 

458 Returns ``(result, captured_style)``. The caller applies SGR wrapping. 

459 """ 

460 # pylint: disable=too-complex,too-many-locals,too-many-branches 

461 # pylint: disable=too-many-statements,too-many-nested-blocks 

462 # code length and complexity traded for performance, to allow this to be used as a "hot path" 

463 

464 cells: dict[int, tuple[str, int]] = {} 

465 hyperlink_cells: set[int] = set() 

466 sequences: list[tuple[int, int, str]] = [] 

467 seq_order = 0 

468 

469 col = 0 

470 idx = 0 

471 # captured_style is a frozen snapshot of current_style taken at the first 

472 # visible character emitted within the clip window (start, end). It stays 

473 # None until that point. current_style, by contrast, is continuously 

474 # updated by SGR sequences throughout the scan. 

475 # 

476 # When propagate_sgr is False, current_style (and therefore captured_style) 

477 # remain None, and SGR sequences pass through as literal text. 

478 captured_style: Optional[_SGRState] = None 

479 current_style = _SGR_STATE_DEFAULT if propagate_sgr else None 

480 

481 def _write_cells(s: str, w: int, write_col: int, 

482 is_hyperlink: bool = False) -> None: 

483 """Write *w* cells of text *s* at *write_col*, handling wide-char splitting.""" 

484 nonlocal captured_style 

485 for offset in range(w): 

486 src_col = write_col + offset 

487 if src_col > 0 and cells.get(src_col - 1, ('', 0))[1] == 2: 

488 cells[src_col - 1] = (fillchar, 1) 

489 hyperlink_cells.discard(src_col - 1) 

490 if cells.get(src_col, ('', 0))[1] == 2: 

491 cells[src_col + 1] = (fillchar, 1) 

492 hyperlink_cells.discard(src_col + 1) 

493 cells.pop(src_col, None) 

494 hyperlink_cells.discard(src_col) 

495 cells[write_col] = (s, w) 

496 if is_hyperlink: 

497 for offset in range(w): 

498 hyperlink_cells.add(write_col + offset) 

499 if propagate_sgr and captured_style is None: 

500 captured_style = current_style 

501 

502 while idx < len(text): 

503 char = text[idx] 

504 

505 # Early exit: past visible region, SGR captured, no escape ahead. 

506 if col >= end and captured_style is not None and char != '\x1b': 

507 break 

508 

509 if char == '\x1b': 

510 m = _SEQUENCE_CLASSIFY.match(text, idx) 

511 if not m: 

512 # Record lone ESC as a zero-width sequence at current column. 

513 sequences.append((col, seq_order, char)) 

514 seq_order += 1 

515 if propagate_sgr and captured_style is None: 

516 captured_style = current_style 

517 idx += 1 

518 continue 

519 

520 # SGR: update current_style, do not emit. 

521 if m.group('sgr_params') is not None and propagate_sgr and current_style is not None: 

522 current_style = _sgr_state_update(current_style, m.group()) 

523 idx = m.end() 

524 continue 

525 

526 # OSC 8 hyperlink. 

527 if hl_state := HyperlinkParams.parse(m.group()): 

528 r = _process_hyperlink( 

529 text, start, end, fillchar, tabsize, ambiguous_width, 

530 control_codes, 

531 params=hl_state, match_end=m.end(), col=col, 

532 ) 

533 if r.action is _HyperlinkAction.NO_CLOSE: 

534 sequences.append((col, seq_order, m.group())) 

535 seq_order += 1 

536 if propagate_sgr and captured_style is None: 

537 captured_style = current_style 

538 idx = m.end() 

539 elif r.action is _HyperlinkAction.EMPTY: 

540 idx = r.close_end 

541 elif r.action is _HyperlinkAction.OUTSIDE: 

542 col += r.inner_width 

543 idx = r.close_end 

544 else: 

545 sequences.append((col, seq_order, r.open_seq)) 

546 seq_order += 1 

547 if propagate_sgr and captured_style is None: 

548 captured_style = current_style 

549 _write_cells(r.clipped_inner, r.clipped_width, col, 

550 is_hyperlink=True) 

551 col += r.clipped_width 

552 sequences.append((col, seq_order, r.close_seq)) 

553 seq_order += 1 

554 col = r.hl_col_end 

555 idx = r.close_end 

556 continue 

557 

558 # OSC 66 Text Sizing. 

559 if (ts_meta := m.group('ts_meta')) is not None: 

560 ts_text = m.group('ts_text') 

561 ts_term = m.group('ts_term') 

562 assert ts_text is not None and ts_term is not None 

563 ts = TextSizing( 

564 TextSizingParams.from_params(ts_meta, control_codes=control_codes), 

565 ts_text, ts_term) 

566 col = _text_sizing_clip( 

567 ts, col, start, end, fillchar, ambiguous_width, 

568 _write_cells) 

569 if propagate_sgr and captured_style is None: 

570 captured_style = current_style 

571 idx = m.end() 

572 continue 

573 

574 # Indeterminate-effect sequences: raise in strict mode. 

575 seq = m.group() 

576 if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq): 

577 raise ValueError( 

578 f"Indeterminate cursor sequence at position {idx}, " 

579 f"{seq!r}" 

580 ) 

581 

582 # Horizontal Position Absolute (CSI n G). 

583 if (hpa_n := m.group('hpa_n')) is not None: 

584 col = int(hpa_n) - 1 if hpa_n else 0 

585 idx = m.end() 

586 continue 

587 

588 # Cursor Forward (CSI n C). 

589 if (cforward_n := m.group('cforward_n')) is not None: 

590 n_forward = int(cforward_n) if cforward_n else 1 

591 move_end = col + n_forward 

592 if col < end and move_end > start: 

593 for i in range(max(col, start), min(move_end, end)): 

594 _write_cells(fillchar, 1, i) 

595 col = move_end 

596 idx = m.end() 

597 continue 

598 

599 # Cursor Backward (CSI n D). 

600 if (cbackward_n := m.group('cbackward_n')) is not None: 

601 n_backward = int(cbackward_n) if cbackward_n else 1 

602 if strict and n_backward > col: 

603 raise ValueError( 

604 f"Cursor left movement at position {idx} would move " 

605 f"{n_backward} cells left from column {col}, " 

606 f"exceeding string start" 

607 ) 

608 col = max(0, col - n_backward) 

609 idx = m.end() 

610 continue 

611 

612 # Any other recognized sequence: preserve as-is. 

613 sequences.append((col, seq_order, m.group())) 

614 seq_order += 1 

615 if propagate_sgr and captured_style is None: 

616 captured_style = current_style 

617 idx = m.end() 

618 continue 

619 

620 # Carriage return. 

621 if char == '\r': 

622 col = 0 

623 idx += 1 

624 continue 

625 

626 # Backspace. 

627 if char == '\x08': 

628 if col > 0: 

629 col -= 1 

630 idx += 1 

631 continue 

632 

633 # Tab expansion. 

634 if char == '\t': 

635 if tabsize > 0: 

636 next_tab = col + (tabsize - (col % tabsize)) 

637 while col < next_tab: 

638 if start <= col < end: 

639 _write_cells(fillchar, 1, col) 

640 col += 1 

641 else: 

642 sequences.append((col, seq_order, '\t')) 

643 seq_order += 1 

644 if propagate_sgr and captured_style is None: 

645 captured_style = current_style 

646 idx += 1 

647 continue 

648 

649 # Grapheme cluster. 

650 grapheme = next(iter_graphemes(text, start=idx)) 

651 grapheme_w = width(grapheme, ambiguous_width=ambiguous_width) 

652 

653 # Emit grapheme or fillchar depending on visibility within clip window. 

654 if grapheme_w == 0: 

655 if start <= col < end: 

656 sequences.append((col, seq_order, grapheme)) 

657 seq_order += 1 

658 if propagate_sgr and captured_style is None: 

659 captured_style = current_style 

660 elif col >= start and col + grapheme_w <= end: 

661 _write_cells(grapheme, grapheme_w, col) 

662 elif col < end and col + grapheme_w > start: 

663 clip_start = max(start, col) 

664 for offset in range(min(end, col + grapheme_w) - clip_start): 

665 _write_cells(fillchar, 1, clip_start + offset) 

666 

667 col += grapheme_w 

668 idx += len(grapheme) 

669 

670 return _reconstruct_painter(cells, sequences, start, end, fillchar), captured_style 

671 

672 

673def clip( 

674 text: str, 

675 start: int, 

676 end: int, 

677 *, 

678 fillchar: str = ' ', 

679 tabsize: int = 8, 

680 ambiguous_width: int = 1, 

681 propagate_sgr: bool = True, 

682 control_codes: Literal['parse', 'strict', 'ignore'] = 'parse', 

683 overtyping: Optional[bool] = None, 

684) -> str: 

685 r""" 

686 Clip text to display columns (start, end) while preserving all terminal sequences. 

687 

688 This function extracts a substring based on visible column positions rather than 

689 character indices. Terminal escape sequences are preserved in the output since 

690 they have zero display width. If a wide character (width 2) is split at 

691 either boundary, it is replaced with ``fillchar``. 

692 

693 TAB characters (``\t``) are expanded to spaces up to the next tab stop, 

694 controlled by the ``tabsize`` parameter. When cursor movement is detected, 

695 a "painter's algorithm" is used, cursor movements actively change the write 

696 position, allowing cursor-left and carriage return to overwrite previously 

697 written cells. It is assumed that ``text`` begins at column 0. 

698 

699 **OSC 8 hyperlinks** are handled specially: the visible text inside a hyperlink 

700 is clipped to the requested column range, and the hyperlink is rebuilt around 

701 the clipped text. Empty hyperlinks (those with no remaining visible text after 

702 clipping) are removed:: 

703 

704 >>> clip('\x1b]8;;http://example.com\x07Click This link\x1b]8;;\x07', 6, 10) 

705 '\x1b]8;;http://example.com\x07This\x1b]8;;\x07' 

706 

707 :param text: String to clip, may contain terminal escape sequences. 

708 :param start: Absolute starting column (inclusive, 0-indexed). 

709 :param end: Absolute ending column (exclusive). 

710 :param fillchar: Character to use when a wide character must be split at 

711 a boundary (default space). Must have display width of 1. 

712 :param tabsize: Tab stop width (default 8). Set to 0 to pass tabs through 

713 as zero-width (preserved in output but don't advance column position). 

714 :param ambiguous_width: Width to use for East Asian Ambiguous (A) 

715 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. 

716 :param propagate_sgr: If True (default), SGR (terminal styling) sequences 

717 are propagated. The result begins with any active style at the start 

718 position and ends with a reset sequence if styles are active. 

719 :param control_codes: How to handle control characters and sequences: 

720 

721 - ``'parse'`` (default): Track horizontal cursor movement and clip 

722 hyperlink text. Cursor overwrite is always allowed, with best effort 

723 results; indeterminate sequences (home, clear, reset, etc.) are 

724 preserved as zero-width. 

725 - ``'strict'``: Like ``parse``, but raises :exc:`ValueError` on 

726 sequences with indeterminate effects (cursor home, clear screen, 

727 reset, vertical movement, etc.) matching :func:`width` behavior. 

728 Also raises on out-of-bounds horizontal cursor movement. 

729 - ``'ignore'``: All control characters are treated as zero-width. 

730 Cursor movement is not tracked (fastest path). 

731 

732 :param overtyping: Whether to use the painter's algorithm for cursor 

733 movement (``\b`` backspace, ``\r`` carriage return, and CSI cursor 

734 left/right/position sequences). When ``None`` (default), auto-detects 

735 by scanning for these characters in *text*. Set to ``False`` for improved 

736 performance when the caller knows *text* contains no cursor movement 

737 characters. Set to ``True`` to force the painter's algorithm (useful 

738 for testing). Has no effect when ``control_codes='ignore'``. 

739 

740 :returns: Substring of ``text`` spanning display columns (start, end), 

741 with all terminal sequences preserved and wide characters at boundaries 

742 replaced with ``fillchar``. 

743 

744 :raises ValueError: If ``control_codes='strict'`` and an indeterminate-effect 

745 sequence or out-of-bounds cursor movement is encountered. 

746 

747 SGR (terminal styling) sequences are propagated by default. The result 

748 begins with any active style and ends with a reset:: 

749 

750 >>> clip('\x1b[1;34mHello world\x1b[0m', 6, 11) 

751 '\x1b[1;34mworld\x1b[0m' 

752 

753 Set ``propagate_sgr=False`` to disable this behavior. 

754 

755 .. versionadded:: 0.3.0 

756 

757 .. versionchanged:: 0.5.0 

758 Added ``propagate_sgr`` parameter (default True). 

759 

760 .. versionchanged:: 0.7.0 

761 Added ``control_codes`` parameter (default 'parse'). 

762 OSC 8 hyperlink-aware clipping. OSC 66 text sizing protocol support. 

763 Added ``overtyping`` parameter (default None, auto-detect). 

764 

765 Example:: 

766 

767 >>> clip('hello world', 0, 5) 

768 'hello' 

769 >>> clip('中文字', 0, 3) # Wide char split at column 3 

770 '中 ' 

771 >>> clip('a\tb', 0, 10) # Tab expanded to spaces 

772 'a b' 

773 """ 

774 start = max(start, 0) 

775 if end <= start: 

776 return '' 

777 

778 # Fast path: printable ASCII only. 

779 if text.isascii() and text.isprintable(): 

780 return text[start:end] 

781 

782 # No escape sequences => no SGR tracking needed. 

783 has_esc = '\x1b' in text 

784 if propagate_sgr and not has_esc: 

785 propagate_sgr = False 

786 

787 # Determine whether painter's algorithm is needed. 

788 if overtyping is None: 

789 # Auto-detect: scan for cursor movement characters. 

790 overtyping = ( 

791 control_codes != 'ignore' and 

792 ('\x08' in text or '\r' in text or 

793 (has_esc and bool(_HORIZONTAL_CURSOR_MOVEMENT.search(text)))) 

794 ) 

795 elif overtyping and control_codes == 'ignore': 

796 overtyping = False # control_codes='ignore' overrides 

797 fn_clip = _clip_painter if overtyping else _clip_simple 

798 

799 return _apply_sgr_wrap(*fn_clip( 

800 text=text, 

801 start=start, 

802 end=end, 

803 propagate_sgr=propagate_sgr, 

804 ambiguous_width=ambiguous_width, 

805 fillchar=fillchar, 

806 tabsize=tabsize, 

807 strict=(control_codes == 'strict'), 

808 control_codes=control_codes, 

809 ))