Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/_clip.py: 8%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""This is a python implementation of clip()."""
2from __future__ import annotations
4# std imports
5import enum
6from itertools import islice
8from typing import Literal, Callable, Optional, NamedTuple
10# local
11from ._width import width
12from .grapheme import iter_graphemes
13from .hyperlink import Hyperlink, HyperlinkParams
14from .sgr_state import (_SGR_STATE_DEFAULT,
15 _SGRState,
16 _sgr_state_update,
17 _sgr_state_is_active,
18 _sgr_state_to_sequence)
19from .text_sizing import TextSizing, TextSizingParams
20from .escape_sequences import (_SEQUENCE_CLASSIFY,
21 _HORIZONTAL_CURSOR_MOVEMENT,
22 INDETERMINATE_EFFECT_SEQUENCE)
25class _HyperlinkAction(enum.Enum):
26 """Outcome of processing an OSC 8 hyperlink unit."""
28 NO_CLOSE = enum.auto() # open sequence without matching close
29 EMPTY = enum.auto() # hyperlink with no visible inner text
30 OUTSIDE = enum.auto() # hyperlink entirely outside the clip window
31 VISIBLE = enum.auto() # hyperlink overlaps the clip window
34class _HyperlinkResult(NamedTuple):
35 """
36 Result of processing an OSC 8 hyperlink.
38 Only the fields relevant to each action are populated.
39 """
41 action: _HyperlinkAction
42 close_end: int = 0
43 inner_width: int = 0
44 open_seq: str = ''
45 clipped_inner: str = ''
46 close_seq: str = ''
47 clipped_width: int = 0
48 hl_col_end: int = 0
51def _apply_sgr_wrap(result: str, captured_style: Optional[_SGRState]) -> str:
52 """
53 Apply SGR prefix/suffix around *result*.
55 If an SGR state was captured at the first visible character, prefix the result with the
56 corresponding SGR sequence and suffix with a reset if any styles are active.
57 """
58 if captured_style is not None:
59 if prefix := _sgr_state_to_sequence(captured_style):
60 result = prefix + result
61 if _sgr_state_is_active(captured_style):
62 result += '\x1b[0m'
63 return result
66def _process_hyperlink(
67 text: str,
68 start: int,
69 end: int,
70 fillchar: str,
71 tabsize: int,
72 ambiguous_width: int,
73 control_codes: Literal['parse', 'strict', 'ignore'],
74 *,
75 params: HyperlinkParams,
76 match_end: int,
77 col: int,
78) -> _HyperlinkResult:
79 """
80 Process an OSC 8 hyperlink unit.
82 Finds the matching close sequence, measures the inner text width, and determines whether the
83 hyperlink is empty, outside the clip window, or visible (requiring inner-text clipping).
84 """
85 # pylint: disable=too-many-locals,too-many-positional-arguments
86 close_start, close_end = Hyperlink.find_close(text, match_end)
87 if (close_start, close_end) == (-1, -1):
88 return _HyperlinkResult(_HyperlinkAction.NO_CLOSE)
89 inner_text = text[match_end:close_start]
90 inner_width = width(
91 inner_text, control_codes=control_codes,
92 tabsize=tabsize, ambiguous_width=ambiguous_width,
93 )
95 if inner_width == 0:
96 return _HyperlinkResult(_HyperlinkAction.EMPTY, close_end=close_end)
98 hl_col_end = col + inner_width
100 if hl_col_end <= start or col >= end:
101 return _HyperlinkResult(_HyperlinkAction.OUTSIDE, close_end=close_end,
102 inner_width=inner_width)
104 inner_clip_start = max(0, start - col)
105 inner_clip_end = end - col
107 clipped_inner = clip(
108 inner_text, inner_clip_start, inner_clip_end,
109 fillchar=fillchar, tabsize=tabsize,
110 ambiguous_width=ambiguous_width,
111 propagate_sgr=False,
112 control_codes=control_codes,
113 )
115 clipped_width = width(
116 clipped_inner, control_codes=control_codes,
117 tabsize=tabsize, ambiguous_width=ambiguous_width,
118 )
120 return _HyperlinkResult(
121 _HyperlinkAction.VISIBLE,
122 close_end=close_end,
123 inner_width=inner_width,
124 open_seq=params.make_open(),
125 clipped_inner=clipped_inner,
126 close_seq=params.make_close(),
127 clipped_width=clipped_width,
128 hl_col_end=hl_col_end,
129 )
132def _reconstruct_painter(
133 cells: dict[int, tuple[str, int]],
134 sequences: list[tuple[int, int, str]],
135 start: int,
136 end: int,
137 fillchar: str,
138) -> str:
139 """
140 Reconstruct the output string from painter's algorithm state.
142 Walks columns left-to-right, interleaving escape sequences and cell content, filling gaps with
143 *fillchar*.
144 """
145 # pylint: disable=too-many-locals
146 # Group and sort sequences by column, preserving insertion order within each.
147 seqs_by_col: dict[int, list[tuple[int, str]]] = {}
148 for col_pos, order, seq_text in sequences:
149 seqs_by_col.setdefault(col_pos, []).append((order, seq_text))
150 for entries in seqs_by_col.values():
151 entries.sort()
153 max_cell_col = max(cells.keys()) if cells else -1
154 max_seq_col = max(seqs_by_col.keys()) if seqs_by_col else -1
155 max_col = max(max_cell_col, max_seq_col)
157 parts: list[str] = []
158 walk_col = 0
159 col_limit = min(max_col, end)
160 while walk_col <= col_limit:
161 # Emit any sequences anchored at this column.
162 for _, seq_text in seqs_by_col.get(walk_col, ()):
163 parts.append(seq_text)
165 if walk_col >= end:
166 walk_col += 1
167 continue
169 if walk_col in cells:
170 cell_text, cell_w = cells[walk_col]
171 parts.append(cell_text)
172 walk_col += cell_w
173 else:
174 if start <= walk_col <= max_cell_col:
175 parts.append(fillchar)
176 walk_col += 1
178 # Emit sequences anchored beyond the visible region.
179 for c in sorted(seqs_by_col.keys()):
180 if c > col_limit:
181 for _, seq_text in seqs_by_col[c]:
182 parts.append(seq_text)
184 return ''.join(parts)
187def _clip_simple(
188 text: str,
189 start: int,
190 end: int,
191 *,
192 propagate_sgr: bool,
193 ambiguous_width: int,
194 fillchar: str,
195 tabsize: int,
196 strict: bool,
197 control_codes: Literal['parse', 'strict', 'ignore'],
198) -> tuple[str, Optional[_SGRState]]:
199 """
200 Clip text without cursor movement (simple append-to-output path).
202 Returns ``(result, captured_style)``. The caller applies SGR wrapping.
203 """
204 # pylint: disable=too-complex,too-many-locals,too-many-branches,too-many-statements
205 # pylint: disable=too-many-nested-blocks
206 # code length and complexity traded for performance, to allow this to be used as a "hot path"
208 output: list[str] = []
209 col = 0
210 idx = 0
211 # captured_style is a frozen snapshot of current_style taken at the first
212 # visible character emitted within the clip window (start, end). It stays
213 # None until that point. current_style, by contrast, is continuously
214 # updated by SGR sequences throughout the scan. The snapshot is what the
215 # caller uses to wrap the result in the correct SGR state.
216 #
217 # When propagate_sgr is False, current_style (and therefore captured_style)
218 # remain None, and SGR sequences pass through as literal text.
219 captured_style: Optional[_SGRState] = None
220 current_style = _SGR_STATE_DEFAULT if propagate_sgr else None
222 while idx < len(text):
223 char = text[idx]
225 # Early exit: past visible region.
226 if col >= end and char not in '\r\x08\t\x1b':
227 if captured_style is not None:
228 break
229 # propagate_sgr is always False here: with propagate_sgr=True,
230 # captured_style is set on the first visible emission in the
231 # clip window and we would have broken above. The skip-ahead
232 # optimization is only needed (and safe) when SGR tracking is off.
233 next_esc = text.find('\x1b', idx + 1)
234 if next_esc == -1:
235 break
236 idx = next_esc
237 continue
239 if char == '\x1b':
240 m = _SEQUENCE_CLASSIFY.match(text, idx)
241 if not m:
242 output.append(char)
243 idx += 1
244 continue
246 # SGR: update current_style, do not emit.
247 if m.group('sgr_params') is not None and propagate_sgr and current_style is not None:
248 current_style = _sgr_state_update(current_style, m.group())
249 idx = m.end()
250 continue
252 # OSC 8 hyperlink.
253 if hl_state := HyperlinkParams.parse(m.group()):
254 r = _process_hyperlink(
255 text, start, end, fillchar, tabsize, ambiguous_width,
256 control_codes,
257 params=hl_state, match_end=m.end(), col=col,
258 )
259 if r.action is _HyperlinkAction.NO_CLOSE:
260 output.append(m.group())
261 idx = m.end()
262 elif r.action is _HyperlinkAction.EMPTY:
263 idx = r.close_end
264 elif r.action is _HyperlinkAction.OUTSIDE:
265 col += r.inner_width
266 idx = r.close_end
267 else:
268 output.append(r.open_seq)
269 output.append(r.clipped_inner)
270 output.append(r.close_seq)
271 if propagate_sgr and captured_style is None:
272 captured_style = current_style
273 col += r.inner_width
274 idx = r.close_end
275 continue
277 # OSC 66 Text Sizing.
278 if (ts_meta := m.group('ts_meta')) is not None:
279 ts_text = m.group('ts_text')
280 ts_term = m.group('ts_term')
281 assert ts_text is not None and ts_term is not None
282 ts = TextSizing(
283 TextSizingParams.from_params(ts_meta, control_codes=control_codes),
284 ts_text, ts_term)
285 ts_width = ts.display_width(ambiguous_width)
287 if col >= start and col + ts_width <= end:
288 output.append(ts.make_sequence())
289 if propagate_sgr and captured_style is None:
290 captured_style = current_style
291 col += ts_width
292 elif col < end and col + ts_width > start:
293 ts_parts: list[str] = []
295 def _ts_write(s: str, _w: int, _col: int) -> None:
296 ts_parts.append(s)
297 col = _text_sizing_clip(
298 ts, col, start, end, fillchar, ambiguous_width,
299 _ts_write)
300 output.extend(ts_parts)
301 if propagate_sgr and captured_style is None:
302 captured_style = current_style
303 else:
304 col += ts_width
305 idx = m.end()
306 continue
308 # Indeterminate-effect sequences: raise in strict mode.
309 seq = m.group()
310 if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq):
311 raise ValueError(
312 f"Indeterminate cursor sequence at position {idx}, "
313 f"{seq!r}"
314 )
316 # Any other recognized sequence: preserve as-is.
317 output.append(seq)
318 idx = m.end()
319 continue
321 if char == '\t':
322 # Expand tab, filling clip window with spaces.
323 if tabsize > 0:
324 next_tab = col + (tabsize - (col % tabsize))
325 while col < next_tab:
326 if start <= col < end:
327 output.append(' ')
328 if propagate_sgr and captured_style is None:
329 captured_style = current_style
330 col += 1
331 else:
332 output.append('\t')
333 idx += 1
334 continue
336 grapheme = next(iter_graphemes(text, start=idx))
337 grapheme_w = width(grapheme, ambiguous_width=ambiguous_width)
339 # Emit grapheme or fillchar depending on visibility within clip window.
340 if grapheme_w == 0:
341 if start <= col < end:
342 output.append(grapheme)
343 elif col >= start and col + grapheme_w <= end:
344 output.append(grapheme)
345 if propagate_sgr and captured_style is None:
346 captured_style = current_style
347 elif col < end and col + grapheme_w > start:
348 output.append(fillchar * (min(end, col + grapheme_w) - max(start, col)))
349 if propagate_sgr and captured_style is None:
350 captured_style = current_style
352 col += grapheme_w
353 idx += len(grapheme)
355 return ''.join(output), captured_style
358def _text_sizing_clip(
359 ts: TextSizing,
360 col: int,
361 start: int,
362 end: int,
363 fillchar: str,
364 ambiguous_width: int,
365 write_cells: Callable[[str, int, int], None],
366) -> int:
367 """
368 Emit tokens for a text-sizing (OSC 66) sequence, clipped to (start, end).
370 Calls *write_cells(text, width, col)* for each emitted cell or sequence. Returns new column
371 position.
372 """
373 # pylint: disable=too-many-locals,too-many-branches,too-many-positional-arguments,too-complex
374 ts_width = ts.display_width(ambiguous_width)
376 # Fully visible: emit entire sequence
377 if col >= start and col + ts_width <= end:
378 write_cells(ts.make_sequence(), ts_width, col)
379 return col + ts_width
380 # Fully outside: just advance column
381 if col >= end or col + ts_width <= start:
382 return col + ts_width
384 # Partial overlap: decompose
385 rel_start = max(0, start - col)
386 rel_end = min(end, col + ts_width) - col
387 scale = ts.params.scale
389 units: list[tuple[str, int]] = []
390 if ts.params.width > 0:
391 for g in islice(iter_graphemes(ts.text), ts.params.width):
392 units.append((g, scale))
393 for _ in range(ts.params.width - len(units)):
394 units.append(('', scale))
395 else:
396 for g in iter_graphemes(ts.text):
397 units.append((g, width(g, ambiguous_width=ambiguous_width) * scale))
399 pending_units: list[tuple[str, int]] = []
401 def flush(flush_col: int) -> None:
402 if not pending_units:
403 return
404 texts = [u[0] for u in pending_units]
405 total_w = sum(u[1] for u in pending_units)
406 params = TextSizingParams(
407 scale,
408 len(texts) if ts.params.width > 0 else 0,
409 ts.params.numerator, ts.params.denominator,
410 ts.params.vertical_align, ts.params.horizontal_align)
411 write_cells(
412 TextSizing(params, ''.join(texts), ts.terminator).make_sequence(),
413 total_w,
414 flush_col)
415 pending_units.clear()
417 flush_col_pos = col + rel_start
418 unit_pos = 0
419 for unit_text, unit_w in units:
420 unit_end = unit_pos + unit_w
421 if unit_end <= rel_start:
422 unit_pos = unit_end
423 continue
424 if unit_pos >= rel_end:
425 break
427 overlap = min(unit_end, rel_end) - max(unit_pos, rel_start)
428 if overlap == unit_w and unit_w > 0:
429 if not pending_units:
430 flush_col_pos = col + max(unit_pos, rel_start)
431 pending_units.append((unit_text, unit_w))
432 else:
433 flush(flush_col_pos)
434 abs_start = col + max(unit_pos, rel_start)
435 for i in range(overlap):
436 write_cells(fillchar, 1, abs_start + i)
437 unit_pos = unit_end
439 flush(flush_col_pos)
440 return col + ts_width
443def _clip_painter(
444 text: str,
445 start: int,
446 end: int,
447 *,
448 propagate_sgr: bool,
449 ambiguous_width: int,
450 fillchar: str,
451 tabsize: int,
452 strict: bool,
453 control_codes: Literal['parse', 'strict', 'ignore'],
454) -> tuple[str, Optional[_SGRState]]:
455 """
456 Clip text with cursor movement (painter's algorithm path).
458 Returns ``(result, captured_style)``. The caller applies SGR wrapping.
459 """
460 # pylint: disable=too-complex,too-many-locals,too-many-branches
461 # pylint: disable=too-many-statements,too-many-nested-blocks
462 # code length and complexity traded for performance, to allow this to be used as a "hot path"
464 cells: dict[int, tuple[str, int]] = {}
465 hyperlink_cells: set[int] = set()
466 sequences: list[tuple[int, int, str]] = []
467 seq_order = 0
469 col = 0
470 idx = 0
471 # captured_style is a frozen snapshot of current_style taken at the first
472 # visible character emitted within the clip window (start, end). It stays
473 # None until that point. current_style, by contrast, is continuously
474 # updated by SGR sequences throughout the scan.
475 #
476 # When propagate_sgr is False, current_style (and therefore captured_style)
477 # remain None, and SGR sequences pass through as literal text.
478 captured_style: Optional[_SGRState] = None
479 current_style = _SGR_STATE_DEFAULT if propagate_sgr else None
481 def _write_cells(s: str, w: int, write_col: int,
482 is_hyperlink: bool = False) -> None:
483 """Write *w* cells of text *s* at *write_col*, handling wide-char splitting."""
484 nonlocal captured_style
485 for offset in range(w):
486 src_col = write_col + offset
487 if src_col > 0 and cells.get(src_col - 1, ('', 0))[1] == 2:
488 cells[src_col - 1] = (fillchar, 1)
489 hyperlink_cells.discard(src_col - 1)
490 if cells.get(src_col, ('', 0))[1] == 2:
491 cells[src_col + 1] = (fillchar, 1)
492 hyperlink_cells.discard(src_col + 1)
493 cells.pop(src_col, None)
494 hyperlink_cells.discard(src_col)
495 cells[write_col] = (s, w)
496 if is_hyperlink:
497 for offset in range(w):
498 hyperlink_cells.add(write_col + offset)
499 if propagate_sgr and captured_style is None:
500 captured_style = current_style
502 while idx < len(text):
503 char = text[idx]
505 # Early exit: past visible region, SGR captured, no escape ahead.
506 if col >= end and captured_style is not None and char != '\x1b':
507 break
509 if char == '\x1b':
510 m = _SEQUENCE_CLASSIFY.match(text, idx)
511 if not m:
512 # Record lone ESC as a zero-width sequence at current column.
513 sequences.append((col, seq_order, char))
514 seq_order += 1
515 if propagate_sgr and captured_style is None:
516 captured_style = current_style
517 idx += 1
518 continue
520 # SGR: update current_style, do not emit.
521 if m.group('sgr_params') is not None and propagate_sgr and current_style is not None:
522 current_style = _sgr_state_update(current_style, m.group())
523 idx = m.end()
524 continue
526 # OSC 8 hyperlink.
527 if hl_state := HyperlinkParams.parse(m.group()):
528 r = _process_hyperlink(
529 text, start, end, fillchar, tabsize, ambiguous_width,
530 control_codes,
531 params=hl_state, match_end=m.end(), col=col,
532 )
533 if r.action is _HyperlinkAction.NO_CLOSE:
534 sequences.append((col, seq_order, m.group()))
535 seq_order += 1
536 if propagate_sgr and captured_style is None:
537 captured_style = current_style
538 idx = m.end()
539 elif r.action is _HyperlinkAction.EMPTY:
540 idx = r.close_end
541 elif r.action is _HyperlinkAction.OUTSIDE:
542 col += r.inner_width
543 idx = r.close_end
544 else:
545 sequences.append((col, seq_order, r.open_seq))
546 seq_order += 1
547 if propagate_sgr and captured_style is None:
548 captured_style = current_style
549 _write_cells(r.clipped_inner, r.clipped_width, col,
550 is_hyperlink=True)
551 col += r.clipped_width
552 sequences.append((col, seq_order, r.close_seq))
553 seq_order += 1
554 col = r.hl_col_end
555 idx = r.close_end
556 continue
558 # OSC 66 Text Sizing.
559 if (ts_meta := m.group('ts_meta')) is not None:
560 ts_text = m.group('ts_text')
561 ts_term = m.group('ts_term')
562 assert ts_text is not None and ts_term is not None
563 ts = TextSizing(
564 TextSizingParams.from_params(ts_meta, control_codes=control_codes),
565 ts_text, ts_term)
566 col = _text_sizing_clip(
567 ts, col, start, end, fillchar, ambiguous_width,
568 _write_cells)
569 if propagate_sgr and captured_style is None:
570 captured_style = current_style
571 idx = m.end()
572 continue
574 # Indeterminate-effect sequences: raise in strict mode.
575 seq = m.group()
576 if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq):
577 raise ValueError(
578 f"Indeterminate cursor sequence at position {idx}, "
579 f"{seq!r}"
580 )
582 # Horizontal Position Absolute (CSI n G).
583 if (hpa_n := m.group('hpa_n')) is not None:
584 col = int(hpa_n) - 1 if hpa_n else 0
585 idx = m.end()
586 continue
588 # Cursor Forward (CSI n C).
589 if (cforward_n := m.group('cforward_n')) is not None:
590 n_forward = int(cforward_n) if cforward_n else 1
591 move_end = col + n_forward
592 if col < end and move_end > start:
593 for i in range(max(col, start), min(move_end, end)):
594 _write_cells(fillchar, 1, i)
595 col = move_end
596 idx = m.end()
597 continue
599 # Cursor Backward (CSI n D).
600 if (cbackward_n := m.group('cbackward_n')) is not None:
601 n_backward = int(cbackward_n) if cbackward_n else 1
602 if strict and n_backward > col:
603 raise ValueError(
604 f"Cursor left movement at position {idx} would move "
605 f"{n_backward} cells left from column {col}, "
606 f"exceeding string start"
607 )
608 col = max(0, col - n_backward)
609 idx = m.end()
610 continue
612 # Any other recognized sequence: preserve as-is.
613 sequences.append((col, seq_order, m.group()))
614 seq_order += 1
615 if propagate_sgr and captured_style is None:
616 captured_style = current_style
617 idx = m.end()
618 continue
620 # Carriage return.
621 if char == '\r':
622 col = 0
623 idx += 1
624 continue
626 # Backspace.
627 if char == '\x08':
628 if col > 0:
629 col -= 1
630 idx += 1
631 continue
633 # Tab expansion.
634 if char == '\t':
635 if tabsize > 0:
636 next_tab = col + (tabsize - (col % tabsize))
637 while col < next_tab:
638 if start <= col < end:
639 _write_cells(fillchar, 1, col)
640 col += 1
641 else:
642 sequences.append((col, seq_order, '\t'))
643 seq_order += 1
644 if propagate_sgr and captured_style is None:
645 captured_style = current_style
646 idx += 1
647 continue
649 # Grapheme cluster.
650 grapheme = next(iter_graphemes(text, start=idx))
651 grapheme_w = width(grapheme, ambiguous_width=ambiguous_width)
653 # Emit grapheme or fillchar depending on visibility within clip window.
654 if grapheme_w == 0:
655 if start <= col < end:
656 sequences.append((col, seq_order, grapheme))
657 seq_order += 1
658 if propagate_sgr and captured_style is None:
659 captured_style = current_style
660 elif col >= start and col + grapheme_w <= end:
661 _write_cells(grapheme, grapheme_w, col)
662 elif col < end and col + grapheme_w > start:
663 clip_start = max(start, col)
664 for offset in range(min(end, col + grapheme_w) - clip_start):
665 _write_cells(fillchar, 1, clip_start + offset)
667 col += grapheme_w
668 idx += len(grapheme)
670 return _reconstruct_painter(cells, sequences, start, end, fillchar), captured_style
673def clip(
674 text: str,
675 start: int,
676 end: int,
677 *,
678 fillchar: str = ' ',
679 tabsize: int = 8,
680 ambiguous_width: int = 1,
681 propagate_sgr: bool = True,
682 control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
683 overtyping: Optional[bool] = None,
684) -> str:
685 r"""
686 Clip text to display columns (start, end) while preserving all terminal sequences.
688 This function extracts a substring based on visible column positions rather than
689 character indices. Terminal escape sequences are preserved in the output since
690 they have zero display width. If a wide character (width 2) is split at
691 either boundary, it is replaced with ``fillchar``.
693 TAB characters (``\t``) are expanded to spaces up to the next tab stop,
694 controlled by the ``tabsize`` parameter. When cursor movement is detected,
695 a "painter's algorithm" is used, cursor movements actively change the write
696 position, allowing cursor-left and carriage return to overwrite previously
697 written cells. It is assumed that ``text`` begins at column 0.
699 **OSC 8 hyperlinks** are handled specially: the visible text inside a hyperlink
700 is clipped to the requested column range, and the hyperlink is rebuilt around
701 the clipped text. Empty hyperlinks (those with no remaining visible text after
702 clipping) are removed::
704 >>> clip('\x1b]8;;http://example.com\x07Click This link\x1b]8;;\x07', 6, 10)
705 '\x1b]8;;http://example.com\x07This\x1b]8;;\x07'
707 :param text: String to clip, may contain terminal escape sequences.
708 :param start: Absolute starting column (inclusive, 0-indexed).
709 :param end: Absolute ending column (exclusive).
710 :param fillchar: Character to use when a wide character must be split at
711 a boundary (default space). Must have display width of 1.
712 :param tabsize: Tab stop width (default 8). Set to 0 to pass tabs through
713 as zero-width (preserved in output but don't advance column position).
714 :param ambiguous_width: Width to use for East Asian Ambiguous (A)
715 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
716 :param propagate_sgr: If True (default), SGR (terminal styling) sequences
717 are propagated. The result begins with any active style at the start
718 position and ends with a reset sequence if styles are active.
719 :param control_codes: How to handle control characters and sequences:
721 - ``'parse'`` (default): Track horizontal cursor movement and clip
722 hyperlink text. Cursor overwrite is always allowed, with best effort
723 results; indeterminate sequences (home, clear, reset, etc.) are
724 preserved as zero-width.
725 - ``'strict'``: Like ``parse``, but raises :exc:`ValueError` on
726 sequences with indeterminate effects (cursor home, clear screen,
727 reset, vertical movement, etc.) matching :func:`width` behavior.
728 Also raises on out-of-bounds horizontal cursor movement.
729 - ``'ignore'``: All control characters are treated as zero-width.
730 Cursor movement is not tracked (fastest path).
732 :param overtyping: Whether to use the painter's algorithm for cursor
733 movement (``\b`` backspace, ``\r`` carriage return, and CSI cursor
734 left/right/position sequences). When ``None`` (default), auto-detects
735 by scanning for these characters in *text*. Set to ``False`` for improved
736 performance when the caller knows *text* contains no cursor movement
737 characters. Set to ``True`` to force the painter's algorithm (useful
738 for testing). Has no effect when ``control_codes='ignore'``.
740 :returns: Substring of ``text`` spanning display columns (start, end),
741 with all terminal sequences preserved and wide characters at boundaries
742 replaced with ``fillchar``.
744 :raises ValueError: If ``control_codes='strict'`` and an indeterminate-effect
745 sequence or out-of-bounds cursor movement is encountered.
747 SGR (terminal styling) sequences are propagated by default. The result
748 begins with any active style and ends with a reset::
750 >>> clip('\x1b[1;34mHello world\x1b[0m', 6, 11)
751 '\x1b[1;34mworld\x1b[0m'
753 Set ``propagate_sgr=False`` to disable this behavior.
755 .. versionadded:: 0.3.0
757 .. versionchanged:: 0.5.0
758 Added ``propagate_sgr`` parameter (default True).
760 .. versionchanged:: 0.7.0
761 Added ``control_codes`` parameter (default 'parse').
762 OSC 8 hyperlink-aware clipping. OSC 66 text sizing protocol support.
763 Added ``overtyping`` parameter (default None, auto-detect).
765 Example::
767 >>> clip('hello world', 0, 5)
768 'hello'
769 >>> clip('中文字', 0, 3) # Wide char split at column 3
770 '中 '
771 >>> clip('a\tb', 0, 10) # Tab expanded to spaces
772 'a b'
773 """
774 start = max(start, 0)
775 if end <= start:
776 return ''
778 # Fast path: printable ASCII only.
779 if text.isascii() and text.isprintable():
780 return text[start:end]
782 # No escape sequences => no SGR tracking needed.
783 has_esc = '\x1b' in text
784 if propagate_sgr and not has_esc:
785 propagate_sgr = False
787 # Determine whether painter's algorithm is needed.
788 if overtyping is None:
789 # Auto-detect: scan for cursor movement characters.
790 overtyping = (
791 control_codes != 'ignore' and
792 ('\x08' in text or '\r' in text or
793 (has_esc and bool(_HORIZONTAL_CURSOR_MOVEMENT.search(text))))
794 )
795 elif overtyping and control_codes == 'ignore':
796 overtyping = False # control_codes='ignore' overrides
797 fn_clip = _clip_painter if overtyping else _clip_simple
799 return _apply_sgr_wrap(*fn_clip(
800 text=text,
801 start=start,
802 end=end,
803 propagate_sgr=propagate_sgr,
804 ambiguous_width=ambiguous_width,
805 fillchar=fillchar,
806 tabsize=tabsize,
807 strict=(control_codes == 'strict'),
808 control_codes=control_codes,
809 ))