Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/textwrap.py: 24%

1"""

2Sequence-aware text wrapping functions.

4This module provides functions for wrapping text that may contain terminal escape sequences, with

5proper handling of Unicode grapheme clusters and character display widths.

6"""

8from __future__ import annotations

10# std imports

11import secrets

12import textwrap

14from typing import TYPE_CHECKING, Optional

16# local

17from ._width import width as wcwidth_width

18from .grapheme import iter_graphemes

19from .hyperlink import HyperlinkParams

20from .sgr_state import propagate_sgr as _propagate_sgr

21from .escape_sequences import ZERO_WIDTH_PATTERN, iter_sequences

23if TYPE_CHECKING: # pragma: no cover

24 from typing import Any, Literal

27class SequenceTextWrapper(textwrap.TextWrapper):

28 """

29 Sequence-aware text wrapper extending :class:`textwrap.TextWrapper`.

31 This wrapper properly handles terminal escape sequences and Unicode grapheme clusters when

32 calculating text width for wrapping.

34 This implementation is based on the SequenceTextWrapper from the 'blessed' library, with

35 contributions from Avram Lubkin and grayjk.

37 The key difference from the blessed implementation is the addition of grapheme cluster support

38 via :func:`~.iter_graphemes`, providing width calculation for ZWJ emoji sequences, VS-16 emojis

39 and variations, regional indicator flags, and combining characters.

41 OSC 8 hyperlinks are handled specially: when a hyperlink must span multiple lines, each line

42 receives complete open/close sequences with a shared ``id`` parameter, ensuring terminals

43 treat the fragments as a single hyperlink for hover underlining. If the original hyperlink

44 already has an ``id`` parameter, it is preserved; otherwise, one is generated.

45 """

47 def __init__(self, width: int = 70, *,

48 control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',

49 tabsize: int = 8,

50 ambiguous_width: int = 1,

51 **kwargs: Any) -> None:

52 """

53 Initialize the wrapper.

55 :param width: Maximum line width in display cells.

56 :param control_codes: How to handle control sequences (see :func:`~.width`).

57 :param tabsize: Tab stop width for tab expansion.

58 :param ambiguous_width: Width to use for East Asian Ambiguous (A) characters.

59 :param kwargs: Additional arguments passed to :class:`textwrap.TextWrapper`.

60 """

61 super().__init__(width=width, **kwargs)

62 self.control_codes = control_codes

63 self.tabsize = tabsize

64 self.ambiguous_width = ambiguous_width

66 @staticmethod

67 def _next_hyperlink_id() -> str:

68 """Generate unique hyperlink id as 8-character hex string."""

69 return secrets.token_hex(4)

71 def _width(self, text: str) -> int:

72 """Measure text width accounting for sequences."""

73 return wcwidth_width(text, control_codes=self.control_codes, tabsize=self.tabsize,

74 ambiguous_width=self.ambiguous_width)

76 def _strip_sequences(self, text: str) -> str:

77 """Strip all terminal sequences from text."""

78 result = []

79 for segment, is_seq in iter_sequences(text):

80 if not is_seq:

81 result.append(segment)

82 return ''.join(result)

84 def _extract_sequences(self, text: str) -> str:

85 """Extract only terminal sequences from text."""

86 result = []

87 for segment, is_seq in iter_sequences(text):

88 if is_seq:

89 result.append(segment)

90 return ''.join(result)

92 def _split(self, text: str) -> list[str]: # pylint: disable=too-many-locals

93 r"""

94 Sequence-aware variant of :meth:`textwrap.TextWrapper._split`.

96 This method ensures that terminal escape sequences don't interfere with the text splitting

97 logic, particularly for hyphen-based word breaking. It builds a position mapping from

98 stripped text to original text, calls the parent's _split on stripped text, then maps chunks

99 back.

100

101 OSC hyperlink sequences are treated as word boundaries::

102

103 >>> wrap('foo \x1b]8;;https://example.com\x07link\x1b]8;;\x07 bar', 6)

104 ['foo', '\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 'bar']

105

106 Both BEL (``\x07``) and ST (``\x1b\\``) terminators are supported.

107 """

108 # pylint: disable=too-many-locals,too-many-branches

109 # Build a mapping from stripped text positions to original text positions.

110 #

111 # Track where each character ENDS so that sequences between characters

112 # attach to the following text (not preceding text). This ensures sequences

113 # aren't lost when whitespace is dropped.

114 #

115 # char_end[i] = position in original text right after the i-th stripped char

116 char_end: list[int] = []

117 stripped_text = ''

118 original_pos = 0

119 prev_was_hyperlink_close = False

120

121 for segment, is_seq in iter_sequences(text):

122 if not is_seq:

123 # Conditionally insert space after hyperlink close to force word boundary

124 if prev_was_hyperlink_close and segment and not segment[0].isspace():

125 stripped_text += ' '

126 char_end.append(original_pos)

127 for char in segment:

128 original_pos += 1

129 char_end.append(original_pos)

130 stripped_text += char

131 prev_was_hyperlink_close = False

132 else:

133 is_hyperlink_close = segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07'))

134

135 # Conditionally insert space before OSC sequences to artificially create word

136 # boundary, but *not* before hyperlink close sequences, to ensure hyperlink is

137 # terminated on the same line.

138 if (segment.startswith('\x1b]') and stripped_text and not

139 stripped_text[-1].isspace()):

140 if not is_hyperlink_close:

141 stripped_text += ' '

142 char_end.append(original_pos)

143

144 # Escape sequences advance position but don't add to stripped text

145 original_pos += len(segment)

146 prev_was_hyperlink_close = is_hyperlink_close

147

148 # Add sentinel for final position

149 char_end.append(original_pos)

150

151 # Use parent's _split on the stripped text

152 # pylint: disable-next=protected-access

153 stripped_chunks = textwrap.TextWrapper._split(self, stripped_text)

154

155 # Handle text that contains only sequences (no visible characters).

156 # Return the sequences as a single chunk to preserve them.

157 if not stripped_chunks and text:

158 return [text]

159

160 # Map the chunks back to the original text with sequences

161 result: list[str] = []

162 stripped_pos = 0

163 num_chunks = len(stripped_chunks)

164

165 for idx, chunk in enumerate(stripped_chunks):

166 chunk_len = len(chunk)

167

168 # Start is where previous character ended (or 0 for first chunk)

169 start_orig = 0 if stripped_pos == 0 else char_end[stripped_pos - 1]

170

171 # End is where next character starts. For last chunk, use sentinel

172 # to include any trailing sequences.

173 if idx == num_chunks - 1:

174 end_orig = char_end[-1] # sentinel includes trailing sequences

175 else:

176 end_orig = char_end[stripped_pos + chunk_len - 1]

177

178 # Extract the corresponding portion from the original text

179 # Skip empty chunks (from virtual spaces inserted at OSC boundaries)

180 if start_orig != end_orig:

181 result.append(text[start_orig:end_orig])

182 stripped_pos += chunk_len

183

184 return result

185

186 def _wrap_chunks(self, chunks: list[str]) -> list[str]: # pylint: disable=too-many-branches

187 """

188 Wrap chunks into lines using sequence-aware width.

189

190 Override TextWrapper._wrap_chunks to use _width instead of len. Follows stdlib's algorithm:

191 greedily fill lines, handle long words. Also handle OSC hyperlink processing. When

192 hyperlinks span multiple lines, each line gets complete open/close sequences with matching

193 id parameters for hover underlining continuity per OSC 8 spec.

194 """

195 # pylint: disable=too-many-branches,too-many-statements,too-complex,too-many-locals

196 # pylint: disable=too-many-nested-blocks

197 # the hyperlink code in particular really pushes the complexity rating of this method.

198 # preferring to keep it "all in one method" because of so much local state and manipulation.

199 if not chunks:

200 return []

201

202 if self.max_lines is not None:

203 if self.max_lines > 1:

204 indent = self.subsequent_indent

205 else:

206 indent = self.initial_indent

207 if (self._width(indent)

208 + self._width(self.placeholder.lstrip())

209 > self.width):

210 raise ValueError("placeholder too large for max width")

211

212 lines: list[str] = []

213 is_first_line = True

214

215 hyperlink_state: Optional[HyperlinkParams] = None

216 # Track the id we're using for the current hyperlink continuation

217 current_hyperlink_id: Optional[str] = None

218

219 # Arrange in reverse order so items can be efficiently popped

220 chunks = list(reversed(chunks))

221

222 while chunks:

223 current_line: list[str] = []

224 current_width = 0

225

226 # Get the indent and available width for current line

227 indent = self.initial_indent if is_first_line else self.subsequent_indent

228 line_width = self.width - self._width(indent)

229

230 # If continuing a hyperlink from previous line, prepend open sequence

231 if hyperlink_state is not None:

232 open_seq = HyperlinkParams(

233 url=hyperlink_state.url,

234 params=hyperlink_state.params,

235 terminator=hyperlink_state.terminator,

236 ).make_open()

237 chunks[-1] = open_seq + chunks[-1]

238

239 # Drop leading whitespace (except at very start)

240 # When dropping, transfer any sequences to the next chunk.

241 # Only drop if there's actual whitespace text, not if it's only sequences.

242 stripped = self._strip_sequences(chunks[-1])

243 if self.drop_whitespace and lines and stripped and not stripped.strip():

244 sequences = self._extract_sequences(chunks[-1])

245 del chunks[-1]

246 if sequences and chunks:

247 chunks[-1] = sequences + chunks[-1]

248

249 # Greedily add chunks that fit

250 while chunks:

251 chunk = chunks[-1]

252 chunk_width = self._width(chunk)

253

254 if current_width + chunk_width <= line_width:

255 current_line.append(chunks.pop())

256 current_width += chunk_width

257 else:

258 break

259

260 # Handle chunk that's too long for any line

261 if chunks and self._width(chunks[-1]) > line_width:

262 self._handle_long_word(

263 chunks, current_line, current_width, line_width

264 )

265 current_width = self._width(''.join(current_line))

266 # Remove any empty chunks left by _handle_long_word

267 while chunks and not chunks[-1]:

268 del chunks[-1]

269

270 # Drop trailing whitespace

271 # When dropping, transfer any sequences to the previous chunk.

272 # Only drop if there's actual whitespace text, not if it's only sequences.

273 stripped_last = self._strip_sequences(current_line[-1]) if current_line else ''

274 if (self.drop_whitespace and current_line and

275 stripped_last and not stripped_last.strip()):

276 sequences = self._extract_sequences(current_line[-1])

277 current_width -= self._width(current_line[-1])

278 del current_line[-1]

279 if sequences and current_line:

280 current_line[-1] = current_line[-1] + sequences

281

282 if current_line:

283 # Check whether this is a normal append or max_lines

284 # truncation. Matches stdlib textwrap precedence:

285 # normal if max_lines not set, not yet reached, or no

286 # remaining visible content that would need truncation.

287 no_more_content = (

288 not chunks or

289 self.drop_whitespace and

290 len(chunks) == 1 and

291 not self._strip_sequences(chunks[0]).strip()

292 )

293 if (self.max_lines is None or

294 len(lines) + 1 < self.max_lines or

295 no_more_content

296 and current_width <= line_width):

297 line_content = ''.join(current_line)

298

299 # Track hyperlink state through this line's content

300 new_state = self._track_hyperlink_state(line_content, hyperlink_state)

301

302 # If we end inside a hyperlink, append close sequence

303 if new_state is not None:

304 # Ensure we have an id for continuation

305 if current_hyperlink_id is None:

306 if 'id=' in new_state.params:

307 current_hyperlink_id = new_state.params

308 elif new_state.params:

309 # Prepend id to existing params. Per OSC 8 spec, params can have

310 # multiple key=value pairs separated by ':'.

311 current_hyperlink_id = (

312 f'id={self._next_hyperlink_id()}:{new_state.params}')

313 else:

314 current_hyperlink_id = f'id={self._next_hyperlink_id()}'

315 line_content += HyperlinkParams(

316 terminator=new_state.terminator, url='').make_close()

317

318 # Also need to inject the id into the opening

319 # sequence if it didn't have one

320 if 'id=' not in new_state.params:

321 # Find and replace the original open sequence with one that has id

322 old_open = HyperlinkParams(

323 url=new_state.url,

324 params=new_state.params,

325 terminator=new_state.terminator,

326 ).make_open()

327 new_open = HyperlinkParams(

328 url=new_state.url,

329 params=current_hyperlink_id,

330 terminator=new_state.terminator,

331 ).make_open()

332 line_content = line_content.replace(old_open, new_open, 1)

333

334 # Update state for next line, using computed id

335 hyperlink_state = HyperlinkParams(

336 new_state.url, current_hyperlink_id, new_state.terminator)

337 else:

338 hyperlink_state = None

339 current_hyperlink_id = None # Reset id when hyperlink closes

340

341 # Strip trailing whitespace when drop_whitespace is enabled

342 # (matches CPython #140627 fix behavior)

343 if self.drop_whitespace:

344 line_content = line_content.rstrip()

345 lines.append(indent + line_content)

346 is_first_line = False

347 else:

348 # max_lines reached with remaining content.

349 # pop chunks until placeholder fits, then break.

350 placeholder_w = self._width(self.placeholder)

351 while current_line:

352 last_text = self._strip_sequences(current_line[-1])

353 if (last_text.strip()

354 and current_width + placeholder_w <= line_width):

355 line_content = ''.join(current_line)

356 new_state = self._track_hyperlink_state(

357 line_content, hyperlink_state)

358 if new_state is not None:

359 line_content += HyperlinkParams(

360 terminator=new_state.terminator, url='').make_close()

361 lines.append(indent + line_content + self.placeholder)

362 break

363 current_width -= self._width(current_line[-1])

364 del current_line[-1]

365 else:

366 if lines:

367 prev_line = self._rstrip_visible(lines[-1])

368 if (self._width(prev_line) + placeholder_w

369 <= self.width):

370 lines[-1] = prev_line + self.placeholder

371 break

372 lines.append(indent + self.placeholder.lstrip())

373 break

374

375 return lines

376

377 def _track_hyperlink_state(

378 self, text: str,

379 state: Optional[HyperlinkParams]) -> Optional[HyperlinkParams]:

380 """

381 Track hyperlink state through text.

382

383 :param text: Text to scan for hyperlink sequences.

384 :param state: Current state or None if outside hyperlink.

385 :returns: Updated state after processing text.

386 """

387 for segment, is_seq in iter_sequences(text):

388 if is_seq:

389 parsed_link = HyperlinkParams.parse(segment)

390 if parsed_link is not None and parsed_link.url: # has URL = open

391 state = parsed_link

392 elif segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07')): # close

393 state = None

394 return state

395

396 def _handle_long_word(self, reversed_chunks: list[str],

397 cur_line: list[str], cur_len: int,

398 width: int) -> None:

399 """

400 Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`.

401

402 This method ensures that word boundaries are not broken mid-sequence, and respects grapheme

403 cluster boundaries when breaking long words.

404 """

405 if width < 1:

406 space_left = 1

407 else:

408 space_left = width - cur_len

409

410 chunk = reversed_chunks[-1]

411

412 if self.break_long_words:

413 break_at_hyphen = False

414 hyphen_end = 0

415

416 # Handle break_on_hyphens: find last hyphen within space_left

417 if self.break_on_hyphens:

418 # Strip sequences to find hyphen in logical text

419 stripped = self._strip_sequences(chunk)

420 if len(stripped) > space_left:

421 # Find last hyphen in the portion that fits

422 hyphen_pos = stripped.rfind('-', 0, space_left)

423 if hyphen_pos > 0 and any(c != '-' for c in stripped[:hyphen_pos]):

424 # Map back to original position including sequences

425 hyphen_end = self._map_stripped_pos_to_original(chunk, hyphen_pos + 1)

426 break_at_hyphen = True

427

428 # Break at grapheme boundaries to avoid splitting multi-codepoint characters

429 if break_at_hyphen:

430 actual_end = hyphen_end

431 else:

432 actual_end = self._find_break_position(chunk, space_left)

433 # If no progress possible (e.g., wide char exceeds line width),

434 # force at least one grapheme to avoid infinite loop.

435 # Only force when cur_line is empty; if line has content,

436 # appending nothing is safe and the line will be committed.

437 if actual_end == 0 and not cur_line:

438 actual_end = self._find_first_grapheme_end(chunk)

439 cur_line.append(chunk[:actual_end])

440 reversed_chunks[-1] = chunk[actual_end:]

441

442 elif not cur_line:

443 cur_line.append(reversed_chunks.pop())

444

445 def _map_stripped_pos_to_original(self, text: str, stripped_pos: int) -> int:

446 """Map a position in stripped text back to original text position."""

447 stripped_idx = 0

448 original_idx = 0

449

450 for segment, is_seq in iter_sequences(text):

451 if is_seq:

452 original_idx += len(segment)

453 elif stripped_idx + len(segment) > stripped_pos:

454 # Position is within this segment

455 return original_idx + (stripped_pos - stripped_idx)

456 else:

457 stripped_idx += len(segment)

458 original_idx += len(segment)

459

460 # Caller guarantees stripped_pos < total stripped chars, so we always

461 # return from within the loop. This line satisfies the type checker.

462 return original_idx # pragma: no cover

463

464 def _find_break_position(self, text: str, max_width: int) -> int:

465 """Find string index in text that fits within max_width cells."""

466 idx = 0

467 width_so_far = 0

468

469 while idx < len(text):

470 char = text[idx]

471

472 # Skip escape sequences (they don't add width)

473 if char == '\x1b':

474 match = ZERO_WIDTH_PATTERN.match(text, idx)

475 if match:

476 idx = match.end()

477 continue

478

479 # Get grapheme (use start= to avoid slice allocation)

480 grapheme = next(iter_graphemes(text, start=idx))

481

482 grapheme_width = self._width(grapheme)

483 if width_so_far + grapheme_width > max_width:

484 return idx # Found break point

485

486 width_so_far += grapheme_width

487 idx += len(grapheme)

488

489 # Caller guarantees chunk_width > max_width, so a grapheme always

490 # exceeds and we return from within the loop. Type checker requires this.

491 return idx # pragma: no cover

492

493 def _find_first_grapheme_end(self, text: str) -> int:

494 """Find the end position of the first grapheme."""

495 return len(next(iter_graphemes(text)))

496

497 def _rstrip_visible(self, text: str) -> str:

498 """Strip trailing visible whitespace, preserving trailing sequences."""

499 segments = list(iter_sequences(text))

500 last_vis = -1

501 for i, (segment, is_seq) in enumerate(segments):

502 if not is_seq and segment.rstrip():

503 last_vis = i

504 if last_vis == -1:

505 return ''

506 result = []

507 for i, (segment, is_seq) in enumerate(segments):

508 if i < last_vis:

509 result.append(segment)

510 elif i == last_vis:

511 result.append(segment.rstrip())

512 elif is_seq:

513 result.append(segment)

514 return ''.join(result)

515

516

517def wrap(text: str, width: int = 70, *,

518 control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',

519 tabsize: int = 8,

520 expand_tabs: bool = True,

521 replace_whitespace: bool = True,

522 ambiguous_width: int = 1,

523 initial_indent: str = '',

524 subsequent_indent: str = '',

525 fix_sentence_endings: bool = False,

526 break_long_words: bool = True,

527 break_on_hyphens: bool = True,

528 drop_whitespace: bool = True,

529 max_lines: Optional[int] = None,

530 placeholder: str = ' [...]',

531 propagate_sgr: bool = True) -> list[str]:

532 r"""

533 Wrap text to fit within given width, returning a list of wrapped lines.

534

535 Like :func:`textwrap.wrap`, but measures width in display cells rather than

536 characters, correctly handling wide characters, combining marks, and terminal

537 escape sequences.

538

539 :param text: Text to wrap, may contain terminal sequences.

540 :param width: Maximum line width in display cells.

541 :param control_codes: How to handle terminal sequences (see :func:`~.width`).

542 :param tabsize: Tab stop width for tab expansion.

543 :param expand_tabs: If True (default), tab characters are expanded

544 to spaces using ``tabsize``.

545 :param replace_whitespace: If True (default), each whitespace character

546 is replaced with a single space after tab expansion. When False,

547 control whitespace like ``\n`` has zero display width (unlike

548 :func:`textwrap.wrap` which counts ``len()``), so wrap points

549 may differ from stdlib for non-space whitespace characters.

550 :param ambiguous_width: Width to use for East Asian Ambiguous (A)

551 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.

552 :param initial_indent: String prepended to first line.

553 :param subsequent_indent: String prepended to subsequent lines.

554 :param fix_sentence_endings: If True, ensure sentences are always

555 separated by exactly two spaces.

556 :param break_long_words: If True, break words longer than width.

557 :param break_on_hyphens: If True, allow breaking at hyphens.

558 :param drop_whitespace: If True (default), whitespace at the beginning

559 and end of each line (after wrapping but before indenting) is dropped.

560 Set to False to preserve whitespace.

561 :param max_lines: If set, output contains at most this many lines, with

562 ``placeholder`` appended to the last line if the text was truncated.

563 :param placeholder: String appended to the last line when text is

564 truncated by ``max_lines``. Default is ``' [...]'``.

565 :param propagate_sgr: If True (default), SGR (terminal styling) sequences

566 are propagated across wrapped lines. Each line ends with a reset

567 sequence and the next line begins with the active style restored.

568 :returns: List of wrapped lines without trailing newlines.

569

570 SGR (terminal styling) sequences are propagated across wrapped lines

571 by default. Each line ends with a reset sequence and the next line

572 begins with the active style restored::

573

574 >>> wrap('\x1b[1;34mHello world\x1b[0m', width=6)

575 ['\x1b[1;34mHello\x1b[0m', '\x1b[1;34mworld\x1b[0m']

576

577 Set ``propagate_sgr=False`` to disable this behavior.

578

579 Like :func:`textwrap.wrap`, newlines in the input text are treated as

580 whitespace and collapsed. To preserve paragraph breaks, wrap each

581 paragraph separately::

582

583 >>> text = 'First line.\nSecond line.'

584 >>> wrap(text, 40) # newline collapsed to space

585 ['First line. Second line.']

586 >>> [line for para in text.split('\n')

587 ... for line in (wrap(para, 40) if para else [''])]

588 ['First line.', 'Second line.']

589

590 .. seealso::

591

592 :func:`textwrap.wrap`, :class:`textwrap.TextWrapper`

593 Standard library text wrapping (character-based).

594

595 :class:`.SequenceTextWrapper`

596 Class interface for advanced wrapping options.

597

598 .. versionadded:: 0.3.0

599

600 .. versionchanged:: 0.5.0

601 Added ``propagate_sgr`` parameter (default True).

602

603 .. versionchanged:: 0.6.0

604 Added ``expand_tabs``, ``replace_whitespace``, ``fix_sentence_endings``,

605 ``drop_whitespace``, ``max_lines``, and ``placeholder`` parameters.

606

607 Example::

608

609 >>> from wcwidth import wrap

610 >>> wrap('hello world', 5)

611 ['hello', 'world']

612 >>> wrap('中文字符', 4) # CJK characters (2 cells each)

613 ['中文', '字符']

614 """

615 # pylint: disable=too-many-arguments,too-many-locals

616 wrapper = SequenceTextWrapper(

617 width=width,

618 control_codes=control_codes,

619 tabsize=tabsize,

620 expand_tabs=expand_tabs,

621 replace_whitespace=replace_whitespace,

622 ambiguous_width=ambiguous_width,

623 initial_indent=initial_indent,

624 subsequent_indent=subsequent_indent,

625 fix_sentence_endings=fix_sentence_endings,

626 break_long_words=break_long_words,

627 break_on_hyphens=break_on_hyphens,

628 drop_whitespace=drop_whitespace,

629 max_lines=max_lines,

630 placeholder=placeholder,

631 )

632 lines = wrapper.wrap(text)

633

634 if propagate_sgr:

635 lines = _propagate_sgr(lines)

636

637 return lines