Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/textwrap.py: 24%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Sequence-aware text wrapping functions.
4This module provides functions for wrapping text that may contain terminal escape sequences, with
5proper handling of Unicode grapheme clusters and character display widths.
6"""
8from __future__ import annotations
10# std imports
11import secrets
12import textwrap
14from typing import TYPE_CHECKING, Optional
16# local
17from ._width import width as wcwidth_width
18from .grapheme import iter_graphemes
19from .hyperlink import HyperlinkParams
20from .sgr_state import propagate_sgr as _propagate_sgr
21from .escape_sequences import ZERO_WIDTH_PATTERN, iter_sequences
23if TYPE_CHECKING: # pragma: no cover
24 from typing import Any, Literal
27class SequenceTextWrapper(textwrap.TextWrapper):
28 """
29 Sequence-aware text wrapper extending :class:`textwrap.TextWrapper`.
31 This wrapper properly handles terminal escape sequences and Unicode grapheme clusters when
32 calculating text width for wrapping.
34 This implementation is based on the SequenceTextWrapper from the 'blessed' library, with
35 contributions from Avram Lubkin and grayjk.
37 The key difference from the blessed implementation is the addition of grapheme cluster support
38 via :func:`~.iter_graphemes`, providing width calculation for ZWJ emoji sequences, VS-16 emojis
39 and variations, regional indicator flags, and combining characters.
41 OSC 8 hyperlinks are handled specially: when a hyperlink must span multiple lines, each line
42 receives complete open/close sequences with a shared ``id`` parameter, ensuring terminals
43 treat the fragments as a single hyperlink for hover underlining. If the original hyperlink
44 already has an ``id`` parameter, it is preserved; otherwise, one is generated.
45 """
47 def __init__(self, width: int = 70, *,
48 control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
49 tabsize: int = 8,
50 ambiguous_width: int = 1,
51 **kwargs: Any) -> None:
52 """
53 Initialize the wrapper.
55 :param width: Maximum line width in display cells.
56 :param control_codes: How to handle control sequences (see :func:`~.width`).
57 :param tabsize: Tab stop width for tab expansion.
58 :param ambiguous_width: Width to use for East Asian Ambiguous (A) characters.
59 :param kwargs: Additional arguments passed to :class:`textwrap.TextWrapper`.
60 """
61 super().__init__(width=width, **kwargs)
62 self.control_codes = control_codes
63 self.tabsize = tabsize
64 self.ambiguous_width = ambiguous_width
66 @staticmethod
67 def _next_hyperlink_id() -> str:
68 """Generate unique hyperlink id as 8-character hex string."""
69 return secrets.token_hex(4)
71 def _width(self, text: str) -> int:
72 """Measure text width accounting for sequences."""
73 return wcwidth_width(text, control_codes=self.control_codes, tabsize=self.tabsize,
74 ambiguous_width=self.ambiguous_width)
76 def _strip_sequences(self, text: str) -> str:
77 """Strip all terminal sequences from text."""
78 result = []
79 for segment, is_seq in iter_sequences(text):
80 if not is_seq:
81 result.append(segment)
82 return ''.join(result)
84 def _extract_sequences(self, text: str) -> str:
85 """Extract only terminal sequences from text."""
86 result = []
87 for segment, is_seq in iter_sequences(text):
88 if is_seq:
89 result.append(segment)
90 return ''.join(result)
92 def _split(self, text: str) -> list[str]: # pylint: disable=too-many-locals
93 r"""
94 Sequence-aware variant of :meth:`textwrap.TextWrapper._split`.
96 This method ensures that terminal escape sequences don't interfere with the text splitting
97 logic, particularly for hyphen-based word breaking. It builds a position mapping from
98 stripped text to original text, calls the parent's _split on stripped text, then maps chunks
99 back.
101 OSC hyperlink sequences are treated as word boundaries::
103 >>> wrap('foo \x1b]8;;https://example.com\x07link\x1b]8;;\x07 bar', 6)
104 ['foo', '\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 'bar']
106 Both BEL (``\x07``) and ST (``\x1b\\``) terminators are supported.
107 """
108 # pylint: disable=too-many-locals,too-many-branches
109 # Build a mapping from stripped text positions to original text positions.
110 #
111 # Track where each character ENDS so that sequences between characters
112 # attach to the following text (not preceding text). This ensures sequences
113 # aren't lost when whitespace is dropped.
114 #
115 # char_end[i] = position in original text right after the i-th stripped char
116 char_end: list[int] = []
117 stripped_text = ''
118 original_pos = 0
119 prev_was_hyperlink_close = False
121 for segment, is_seq in iter_sequences(text):
122 if not is_seq:
123 # Conditionally insert space after hyperlink close to force word boundary
124 if prev_was_hyperlink_close and segment and not segment[0].isspace():
125 stripped_text += ' '
126 char_end.append(original_pos)
127 for char in segment:
128 original_pos += 1
129 char_end.append(original_pos)
130 stripped_text += char
131 prev_was_hyperlink_close = False
132 else:
133 is_hyperlink_close = segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07'))
135 # Conditionally insert space before OSC sequences to artificially create word
136 # boundary, but *not* before hyperlink close sequences, to ensure hyperlink is
137 # terminated on the same line.
138 if (segment.startswith('\x1b]') and stripped_text and not
139 stripped_text[-1].isspace()):
140 if not is_hyperlink_close:
141 stripped_text += ' '
142 char_end.append(original_pos)
144 # Escape sequences advance position but don't add to stripped text
145 original_pos += len(segment)
146 prev_was_hyperlink_close = is_hyperlink_close
148 # Add sentinel for final position
149 char_end.append(original_pos)
151 # Use parent's _split on the stripped text
152 # pylint: disable-next=protected-access
153 stripped_chunks = textwrap.TextWrapper._split(self, stripped_text)
155 # Handle text that contains only sequences (no visible characters).
156 # Return the sequences as a single chunk to preserve them.
157 if not stripped_chunks and text:
158 return [text]
160 # Map the chunks back to the original text with sequences
161 result: list[str] = []
162 stripped_pos = 0
163 num_chunks = len(stripped_chunks)
165 for idx, chunk in enumerate(stripped_chunks):
166 chunk_len = len(chunk)
168 # Start is where previous character ended (or 0 for first chunk)
169 start_orig = 0 if stripped_pos == 0 else char_end[stripped_pos - 1]
171 # End is where next character starts. For last chunk, use sentinel
172 # to include any trailing sequences.
173 if idx == num_chunks - 1:
174 end_orig = char_end[-1] # sentinel includes trailing sequences
175 else:
176 end_orig = char_end[stripped_pos + chunk_len - 1]
178 # Extract the corresponding portion from the original text
179 # Skip empty chunks (from virtual spaces inserted at OSC boundaries)
180 if start_orig != end_orig:
181 result.append(text[start_orig:end_orig])
182 stripped_pos += chunk_len
184 return result
186 def _wrap_chunks(self, chunks: list[str]) -> list[str]: # pylint: disable=too-many-branches
187 """
188 Wrap chunks into lines using sequence-aware width.
190 Override TextWrapper._wrap_chunks to use _width instead of len. Follows stdlib's algorithm:
191 greedily fill lines, handle long words. Also handle OSC hyperlink processing. When
192 hyperlinks span multiple lines, each line gets complete open/close sequences with matching
193 id parameters for hover underlining continuity per OSC 8 spec.
194 """
195 # pylint: disable=too-many-branches,too-many-statements,too-complex,too-many-locals
196 # pylint: disable=too-many-nested-blocks
197 # the hyperlink code in particular really pushes the complexity rating of this method.
198 # preferring to keep it "all in one method" because of so much local state and manipulation.
199 if not chunks:
200 return []
202 if self.max_lines is not None:
203 if self.max_lines > 1:
204 indent = self.subsequent_indent
205 else:
206 indent = self.initial_indent
207 if (self._width(indent)
208 + self._width(self.placeholder.lstrip())
209 > self.width):
210 raise ValueError("placeholder too large for max width")
212 lines: list[str] = []
213 is_first_line = True
215 hyperlink_state: Optional[HyperlinkParams] = None
216 # Track the id we're using for the current hyperlink continuation
217 current_hyperlink_id: Optional[str] = None
219 # Arrange in reverse order so items can be efficiently popped
220 chunks = list(reversed(chunks))
222 while chunks:
223 current_line: list[str] = []
224 current_width = 0
226 # Get the indent and available width for current line
227 indent = self.initial_indent if is_first_line else self.subsequent_indent
228 line_width = self.width - self._width(indent)
230 # If continuing a hyperlink from previous line, prepend open sequence
231 if hyperlink_state is not None:
232 open_seq = HyperlinkParams(
233 url=hyperlink_state.url,
234 params=hyperlink_state.params,
235 terminator=hyperlink_state.terminator,
236 ).make_open()
237 chunks[-1] = open_seq + chunks[-1]
239 # Drop leading whitespace (except at very start)
240 # When dropping, transfer any sequences to the next chunk.
241 # Only drop if there's actual whitespace text, not if it's only sequences.
242 stripped = self._strip_sequences(chunks[-1])
243 if self.drop_whitespace and lines and stripped and not stripped.strip():
244 sequences = self._extract_sequences(chunks[-1])
245 del chunks[-1]
246 if sequences and chunks:
247 chunks[-1] = sequences + chunks[-1]
249 # Greedily add chunks that fit
250 while chunks:
251 chunk = chunks[-1]
252 chunk_width = self._width(chunk)
254 if current_width + chunk_width <= line_width:
255 current_line.append(chunks.pop())
256 current_width += chunk_width
257 else:
258 break
260 # Handle chunk that's too long for any line
261 if chunks and self._width(chunks[-1]) > line_width:
262 self._handle_long_word(
263 chunks, current_line, current_width, line_width
264 )
265 current_width = self._width(''.join(current_line))
266 # Remove any empty chunks left by _handle_long_word
267 while chunks and not chunks[-1]:
268 del chunks[-1]
270 # Drop trailing whitespace
271 # When dropping, transfer any sequences to the previous chunk.
272 # Only drop if there's actual whitespace text, not if it's only sequences.
273 stripped_last = self._strip_sequences(current_line[-1]) if current_line else ''
274 if (self.drop_whitespace and current_line and
275 stripped_last and not stripped_last.strip()):
276 sequences = self._extract_sequences(current_line[-1])
277 current_width -= self._width(current_line[-1])
278 del current_line[-1]
279 if sequences and current_line:
280 current_line[-1] = current_line[-1] + sequences
282 if current_line:
283 # Check whether this is a normal append or max_lines
284 # truncation. Matches stdlib textwrap precedence:
285 # normal if max_lines not set, not yet reached, or no
286 # remaining visible content that would need truncation.
287 no_more_content = (
288 not chunks or
289 self.drop_whitespace and
290 len(chunks) == 1 and
291 not self._strip_sequences(chunks[0]).strip()
292 )
293 if (self.max_lines is None or
294 len(lines) + 1 < self.max_lines or
295 no_more_content
296 and current_width <= line_width):
297 line_content = ''.join(current_line)
299 # Track hyperlink state through this line's content
300 new_state = self._track_hyperlink_state(line_content, hyperlink_state)
302 # If we end inside a hyperlink, append close sequence
303 if new_state is not None:
304 # Ensure we have an id for continuation
305 if current_hyperlink_id is None:
306 if 'id=' in new_state.params:
307 current_hyperlink_id = new_state.params
308 elif new_state.params:
309 # Prepend id to existing params. Per OSC 8 spec, params can have
310 # multiple key=value pairs separated by ':'.
311 current_hyperlink_id = (
312 f'id={self._next_hyperlink_id()}:{new_state.params}')
313 else:
314 current_hyperlink_id = f'id={self._next_hyperlink_id()}'
315 line_content += HyperlinkParams(
316 terminator=new_state.terminator, url='').make_close()
318 # Also need to inject the id into the opening
319 # sequence if it didn't have one
320 if 'id=' not in new_state.params:
321 # Find and replace the original open sequence with one that has id
322 old_open = HyperlinkParams(
323 url=new_state.url,
324 params=new_state.params,
325 terminator=new_state.terminator,
326 ).make_open()
327 new_open = HyperlinkParams(
328 url=new_state.url,
329 params=current_hyperlink_id,
330 terminator=new_state.terminator,
331 ).make_open()
332 line_content = line_content.replace(old_open, new_open, 1)
334 # Update state for next line, using computed id
335 hyperlink_state = HyperlinkParams(
336 new_state.url, current_hyperlink_id, new_state.terminator)
337 else:
338 hyperlink_state = None
339 current_hyperlink_id = None # Reset id when hyperlink closes
341 # Strip trailing whitespace when drop_whitespace is enabled
342 # (matches CPython #140627 fix behavior)
343 if self.drop_whitespace:
344 line_content = line_content.rstrip()
345 lines.append(indent + line_content)
346 is_first_line = False
347 else:
348 # max_lines reached with remaining content.
349 # pop chunks until placeholder fits, then break.
350 placeholder_w = self._width(self.placeholder)
351 while current_line:
352 last_text = self._strip_sequences(current_line[-1])
353 if (last_text.strip()
354 and current_width + placeholder_w <= line_width):
355 line_content = ''.join(current_line)
356 new_state = self._track_hyperlink_state(
357 line_content, hyperlink_state)
358 if new_state is not None:
359 line_content += HyperlinkParams(
360 terminator=new_state.terminator, url='').make_close()
361 lines.append(indent + line_content + self.placeholder)
362 break
363 current_width -= self._width(current_line[-1])
364 del current_line[-1]
365 else:
366 if lines:
367 prev_line = self._rstrip_visible(lines[-1])
368 if (self._width(prev_line) + placeholder_w
369 <= self.width):
370 lines[-1] = prev_line + self.placeholder
371 break
372 lines.append(indent + self.placeholder.lstrip())
373 break
375 return lines
377 def _track_hyperlink_state(
378 self, text: str,
379 state: Optional[HyperlinkParams]) -> Optional[HyperlinkParams]:
380 """
381 Track hyperlink state through text.
383 :param text: Text to scan for hyperlink sequences.
384 :param state: Current state or None if outside hyperlink.
385 :returns: Updated state after processing text.
386 """
387 for segment, is_seq in iter_sequences(text):
388 if is_seq:
389 parsed_link = HyperlinkParams.parse(segment)
390 if parsed_link is not None and parsed_link.url: # has URL = open
391 state = parsed_link
392 elif segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07')): # close
393 state = None
394 return state
396 def _handle_long_word(self, reversed_chunks: list[str],
397 cur_line: list[str], cur_len: int,
398 width: int) -> None:
399 """
400 Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`.
402 This method ensures that word boundaries are not broken mid-sequence, and respects grapheme
403 cluster boundaries when breaking long words.
404 """
405 if width < 1:
406 space_left = 1
407 else:
408 space_left = width - cur_len
410 chunk = reversed_chunks[-1]
412 if self.break_long_words:
413 break_at_hyphen = False
414 hyphen_end = 0
416 # Handle break_on_hyphens: find last hyphen within space_left
417 if self.break_on_hyphens:
418 # Strip sequences to find hyphen in logical text
419 stripped = self._strip_sequences(chunk)
420 if len(stripped) > space_left:
421 # Find last hyphen in the portion that fits
422 hyphen_pos = stripped.rfind('-', 0, space_left)
423 if hyphen_pos > 0 and any(c != '-' for c in stripped[:hyphen_pos]):
424 # Map back to original position including sequences
425 hyphen_end = self._map_stripped_pos_to_original(chunk, hyphen_pos + 1)
426 break_at_hyphen = True
428 # Break at grapheme boundaries to avoid splitting multi-codepoint characters
429 if break_at_hyphen:
430 actual_end = hyphen_end
431 else:
432 actual_end = self._find_break_position(chunk, space_left)
433 # If no progress possible (e.g., wide char exceeds line width),
434 # force at least one grapheme to avoid infinite loop.
435 # Only force when cur_line is empty; if line has content,
436 # appending nothing is safe and the line will be committed.
437 if actual_end == 0 and not cur_line:
438 actual_end = self._find_first_grapheme_end(chunk)
439 cur_line.append(chunk[:actual_end])
440 reversed_chunks[-1] = chunk[actual_end:]
442 elif not cur_line:
443 cur_line.append(reversed_chunks.pop())
445 def _map_stripped_pos_to_original(self, text: str, stripped_pos: int) -> int:
446 """Map a position in stripped text back to original text position."""
447 stripped_idx = 0
448 original_idx = 0
450 for segment, is_seq in iter_sequences(text):
451 if is_seq:
452 original_idx += len(segment)
453 elif stripped_idx + len(segment) > stripped_pos:
454 # Position is within this segment
455 return original_idx + (stripped_pos - stripped_idx)
456 else:
457 stripped_idx += len(segment)
458 original_idx += len(segment)
460 # Caller guarantees stripped_pos < total stripped chars, so we always
461 # return from within the loop. This line satisfies the type checker.
462 return original_idx # pragma: no cover
464 def _find_break_position(self, text: str, max_width: int) -> int:
465 """Find string index in text that fits within max_width cells."""
466 idx = 0
467 width_so_far = 0
469 while idx < len(text):
470 char = text[idx]
472 # Skip escape sequences (they don't add width)
473 if char == '\x1b':
474 match = ZERO_WIDTH_PATTERN.match(text, idx)
475 if match:
476 idx = match.end()
477 continue
479 # Get grapheme (use start= to avoid slice allocation)
480 grapheme = next(iter_graphemes(text, start=idx))
482 grapheme_width = self._width(grapheme)
483 if width_so_far + grapheme_width > max_width:
484 return idx # Found break point
486 width_so_far += grapheme_width
487 idx += len(grapheme)
489 # Caller guarantees chunk_width > max_width, so a grapheme always
490 # exceeds and we return from within the loop. Type checker requires this.
491 return idx # pragma: no cover
493 def _find_first_grapheme_end(self, text: str) -> int:
494 """Find the end position of the first grapheme."""
495 return len(next(iter_graphemes(text)))
497 def _rstrip_visible(self, text: str) -> str:
498 """Strip trailing visible whitespace, preserving trailing sequences."""
499 segments = list(iter_sequences(text))
500 last_vis = -1
501 for i, (segment, is_seq) in enumerate(segments):
502 if not is_seq and segment.rstrip():
503 last_vis = i
504 if last_vis == -1:
505 return ''
506 result = []
507 for i, (segment, is_seq) in enumerate(segments):
508 if i < last_vis:
509 result.append(segment)
510 elif i == last_vis:
511 result.append(segment.rstrip())
512 elif is_seq:
513 result.append(segment)
514 return ''.join(result)
517def wrap(text: str, width: int = 70, *,
518 control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
519 tabsize: int = 8,
520 expand_tabs: bool = True,
521 replace_whitespace: bool = True,
522 ambiguous_width: int = 1,
523 initial_indent: str = '',
524 subsequent_indent: str = '',
525 fix_sentence_endings: bool = False,
526 break_long_words: bool = True,
527 break_on_hyphens: bool = True,
528 drop_whitespace: bool = True,
529 max_lines: Optional[int] = None,
530 placeholder: str = ' [...]',
531 propagate_sgr: bool = True) -> list[str]:
532 r"""
533 Wrap text to fit within given width, returning a list of wrapped lines.
535 Like :func:`textwrap.wrap`, but measures width in display cells rather than
536 characters, correctly handling wide characters, combining marks, and terminal
537 escape sequences.
539 :param text: Text to wrap, may contain terminal sequences.
540 :param width: Maximum line width in display cells.
541 :param control_codes: How to handle terminal sequences (see :func:`~.width`).
542 :param tabsize: Tab stop width for tab expansion.
543 :param expand_tabs: If True (default), tab characters are expanded
544 to spaces using ``tabsize``.
545 :param replace_whitespace: If True (default), each whitespace character
546 is replaced with a single space after tab expansion. When False,
547 control whitespace like ``\n`` has zero display width (unlike
548 :func:`textwrap.wrap` which counts ``len()``), so wrap points
549 may differ from stdlib for non-space whitespace characters.
550 :param ambiguous_width: Width to use for East Asian Ambiguous (A)
551 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
552 :param initial_indent: String prepended to first line.
553 :param subsequent_indent: String prepended to subsequent lines.
554 :param fix_sentence_endings: If True, ensure sentences are always
555 separated by exactly two spaces.
556 :param break_long_words: If True, break words longer than width.
557 :param break_on_hyphens: If True, allow breaking at hyphens.
558 :param drop_whitespace: If True (default), whitespace at the beginning
559 and end of each line (after wrapping but before indenting) is dropped.
560 Set to False to preserve whitespace.
561 :param max_lines: If set, output contains at most this many lines, with
562 ``placeholder`` appended to the last line if the text was truncated.
563 :param placeholder: String appended to the last line when text is
564 truncated by ``max_lines``. Default is ``' [...]'``.
565 :param propagate_sgr: If True (default), SGR (terminal styling) sequences
566 are propagated across wrapped lines. Each line ends with a reset
567 sequence and the next line begins with the active style restored.
568 :returns: List of wrapped lines without trailing newlines.
570 SGR (terminal styling) sequences are propagated across wrapped lines
571 by default. Each line ends with a reset sequence and the next line
572 begins with the active style restored::
574 >>> wrap('\x1b[1;34mHello world\x1b[0m', width=6)
575 ['\x1b[1;34mHello\x1b[0m', '\x1b[1;34mworld\x1b[0m']
577 Set ``propagate_sgr=False`` to disable this behavior.
579 Like :func:`textwrap.wrap`, newlines in the input text are treated as
580 whitespace and collapsed. To preserve paragraph breaks, wrap each
581 paragraph separately::
583 >>> text = 'First line.\nSecond line.'
584 >>> wrap(text, 40) # newline collapsed to space
585 ['First line. Second line.']
586 >>> [line for para in text.split('\n')
587 ... for line in (wrap(para, 40) if para else [''])]
588 ['First line.', 'Second line.']
590 .. seealso::
592 :func:`textwrap.wrap`, :class:`textwrap.TextWrapper`
593 Standard library text wrapping (character-based).
595 :class:`.SequenceTextWrapper`
596 Class interface for advanced wrapping options.
598 .. versionadded:: 0.3.0
600 .. versionchanged:: 0.5.0
601 Added ``propagate_sgr`` parameter (default True).
603 .. versionchanged:: 0.6.0
604 Added ``expand_tabs``, ``replace_whitespace``, ``fix_sentence_endings``,
605 ``drop_whitespace``, ``max_lines``, and ``placeholder`` parameters.
607 Example::
609 >>> from wcwidth import wrap
610 >>> wrap('hello world', 5)
611 ['hello', 'world']
612 >>> wrap('中文字符', 4) # CJK characters (2 cells each)
613 ['中文', '字符']
614 """
615 # pylint: disable=too-many-arguments,too-many-locals
616 wrapper = SequenceTextWrapper(
617 width=width,
618 control_codes=control_codes,
619 tabsize=tabsize,
620 expand_tabs=expand_tabs,
621 replace_whitespace=replace_whitespace,
622 ambiguous_width=ambiguous_width,
623 initial_indent=initial_indent,
624 subsequent_indent=subsequent_indent,
625 fix_sentence_endings=fix_sentence_endings,
626 break_long_words=break_long_words,
627 break_on_hyphens=break_on_hyphens,
628 drop_whitespace=drop_whitespace,
629 max_lines=max_lines,
630 placeholder=placeholder,
631 )
632 lines = wrapper.wrap(text)
634 if propagate_sgr:
635 lines = _propagate_sgr(lines)
637 return lines