Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/_wcswidth.py: 6%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""This is a python implementation of wcswidth()."""
3from __future__ import annotations
5from typing import Optional
7__lazy_modules__ = [
8 "wcwidth._constants",
9 "wcwidth._wcwidth",
10 "wcwidth.bisearch",
11 "wcwidth.table_grapheme",
12 "wcwidth.table_vs16",
13]
14# local
15from . import table_grapheme_overrides
16from ._wcwidth import wcwidth
17from .bisearch import bisearch
18from ._constants import (_EMOJI_ZWJ_SET,
19 _ISC_VIRAMA_SET,
20 _CATEGORY_MC_TABLE,
21 _FITZPATRICK_RANGE,
22 _REGIONAL_INDICATOR_SET,
23 resolve_terminal,
24 get_term_overrides)
25from .table_vs15 import VS15_WIDE_TO_NARROW
26from .table_vs16 import VS16_NARROW_TO_WIDE
27from .table_grapheme import GRAPHEME_EXTEND
30def _scan_zwj_cluster_end(text: str, start: int, end: int) -> int:
31 """
32 Scan forward from *start* (base character) to end of a ZWJ grapheme cluster.
34 Follows the UAX #29 GB11 pattern (ExtPict Extend* ZWJ x ExtPict) chained repeatedly until no
35 more ZWJ joins are found.
36 """
37 idx = start + 1
38 # Skip Extend characters (Fitzpatrick modifiers, etc.) before first ZWJ
39 while idx < end and bisearch(ord(text[idx]), GRAPHEME_EXTEND):
40 idx += 1
41 # Follow ZWJ chains
42 while idx < end:
43 if ord(text[idx]) != 0x200D:
44 break
45 idx += 1
46 # GB11: \p{ExtPict} Extend* ZWJ × \p{ExtPict}
47 # Extend modifiers (VS16, Fitzpatrick skin tones, etc.) attach to
48 # the ExtPict *before* the ZWJ, not after it. After ZWJ the next
49 # codepoint is always an ExtPict directly, no Extend skip needed.
50 if idx < end and ord(text[idx]) in _EMOJI_ZWJ_SET:
51 idx += 1
52 # Skip trailing Extend (VS16, etc.) after ExtPict before next ZWJ
53 while idx < end and bisearch(ord(text[idx]), GRAPHEME_EXTEND):
54 idx += 1
55 continue
56 break
57 return idx
60def wcswidth(
61 pwcs: str,
62 n: Optional[int] = None,
63 unicode_version: str = 'auto',
64 ambiguous_width: int = 1,
65) -> int:
66 """
67 Given a unicode string, return its printable length on a terminal.
69 See :ref:`Specification` for details of cell measurement.
71 This implementation differs from Markus Khun's original POSIX C implementation, in that this
72 ``wcswidth()`` processes graphemes strings yielded by :func:`wcwidth.iter_graphemes` defined by
73 `Unicode Standard Annex #29`_. POSIX wcswidth(3) is not grapheme-aware and does not measure many
74 kinds of Emojis or complex scripts correctly.
76 :param pwcs: Measure width of given unicode string.
77 :param n: When ``n`` is None (default), return the length of the entire
78 string, otherwise only the first ``n`` characters are measured.
79 :param unicode_version: Ignored. Retained for backwards compatibility.
81 .. deprecated:: 0.3.0
82 Only the latest Unicode version is now shipped.
84 :param ambiguous_width: Width to use for East Asian Ambiguous (A)
85 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
86 :returns: The width, in cells, needed to display the first ``n`` characters
87 of the unicode string ``pwcs``. Returns ``-1`` for C0 and C1 control
88 characters!
90 .. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/
91 """
92 # pylint: disable=unused-argument,too-many-locals,too-many-statements,redefined-variable-type
93 # pylint: disable=too-complex,too-many-branches,duplicate-code,too-many-nested-blocks
95 # Fast path: pure ASCII printable strings are always width == length
96 if n is None and pwcs.isascii() and pwcs.isprintable():
97 return len(pwcs)
99 _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width)
101 end = len(pwcs) if n is None else n
102 total_width = 0
103 idx = 0
105 last_measured_idx = -2 # -2 sentinel blocks VS16/VS15 (no base available)
106 last_measured_ucs = -1
107 last_measured_w = 0
108 prev_was_virama = False
109 cluster_width = 0
110 vs16_nw_table = VS16_NARROW_TO_WIDE['9.0.0']
111 vs15_wn_table = VS15_WIDE_TO_NARROW['9.0.0']
112 _bisearch = bisearch
114 while idx < end:
115 char = pwcs[idx]
116 ucs = ord(char)
118 # 5. ZWJ (U+200D): consumed without contributing width.
119 # Virama codepoints are treated as zero-width combining marks (Mn). When a
120 # virama+consonant sequence forms a conjunct, its width is capped at 2 cells.
122 # ZWJ (U+200D)
123 if ucs == 0x200D:
124 if prev_was_virama:
125 idx += 1
126 elif idx + 1 < end:
127 last_measured_w = 0
128 prev_was_virama = False
129 idx += 2
130 else:
131 prev_was_virama = False
132 idx += 1
133 continue
135 # 6. VS16 (U+FE0F): converts preceding narrow character to wide.
136 if ucs == 0xFE0F and last_measured_idx >= 0:
137 if _bisearch(last_measured_ucs, vs16_nw_table):
138 cluster_width = 2
139 last_measured_idx = -2
140 idx += 1
141 continue
143 # VS15 (U+FE0E): text variation selector, requests narrow presentation.
144 if ucs == 0xFE0E and last_measured_idx >= 0:
145 if bisearch(last_measured_ucs, vs15_wn_table) and last_measured_w == 2:
146 total_width -= 1
147 idx += 1
148 continue
150 # 7. Regional Indicator & Fitzpatrick (both above BMP)
151 if ucs > 0xFFFF:
152 if ucs in _REGIONAL_INDICATOR_SET:
153 ri_before = 0
154 j = idx - 1
155 while j >= 0 and ord(pwcs[j]) in _REGIONAL_INDICATOR_SET:
156 ri_before += 1
157 j -= 1
158 if ri_before % 2 == 1:
159 last_measured_ucs = ucs
160 idx += 1
161 continue
162 elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1]
163 and last_measured_ucs in _EMOJI_ZWJ_SET):
164 idx += 1
165 continue
167 # 8. Normal character: measure with wcwidth
168 w = _wcwidth(char)
169 if w < 0:
170 return -1
171 if w > 0:
172 if prev_was_virama:
173 cluster_width = 2
174 elif cluster_width:
175 total_width += cluster_width
176 cluster_width = w
177 else:
178 cluster_width = w
180 last_measured_idx = idx
181 last_measured_ucs = ucs
182 last_measured_w = w
183 prev_was_virama = False
184 elif ucs in _ISC_VIRAMA_SET:
185 prev_was_virama = True
186 elif last_measured_idx >= 0 and _bisearch(ucs, _CATEGORY_MC_TABLE):
187 cluster_width = 2
188 last_measured_idx = -2
189 prev_was_virama = False
190 else:
191 prev_was_virama = False
192 idx += 1
194 if cluster_width:
195 total_width += cluster_width
196 return total_width
199def wcstwidth(
200 pwcs: str,
201 n: Optional[int] = None,
202 unicode_version: str = 'auto',
203 ambiguous_width: int = 1,
204 term_program: bool | str = True,
205) -> int:
206 """
207 Given a unicode string, return its printable length on a terminal given by ``term_program``.
209 See :ref:`Specification` for details of cell measurement.
211 Unlike :func:`wcswidth`, this function applies per-terminal correction tables for
212 emoji presentation and grapheme clusters.
214 :param pwcs: Measure width of given unicode string.
215 :param n: When ``n`` is None (default), return the length of the entire
216 string, otherwise only the first ``n`` characters are measured.
217 :param unicode_version: Ignored. Retained for backwards compatibility.
218 :param ambiguous_width: Width to use for East Asian Ambiguous (A)
219 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
220 :param term_program: Terminal software identifier for table correction.
221 ``True`` (default) reads the ``TERM_PROGRAM`` or ``TERM`` environment
222 variable for auto-detection. ``False`` disables override lookup.
223 Accepts a canonical terminal name matching :func:`list_term_programs`,
224 such as from XTVERSION_, ENQ_, or ``TERM_PROGRAM``.
226 .. versionadded:: 0.8.0
227 :returns: The width, in cells, needed to display the first ``n`` characters
228 of the unicode string ``pwcs``. Returns ``-1`` for C0 and C1 control
229 characters!
230 """
231 # pylint: disable=unused-argument,too-many-locals,too-many-statements,redefined-variable-type
232 # pylint: disable=too-complex,too-many-branches,duplicate-code,too-many-nested-blocks
233 # This function intentionally keeps all logic inline for performance.
235 # Fast path: pure ASCII printable strings are always width == length
236 if n is None and pwcs.isascii() and pwcs.isprintable():
237 return len(pwcs)
239 # Resolve terminal software for override lookup
240 term_canonical = resolve_terminal(term_program)
242 # Skip override lookup when no terminal detected (avoids lru_cache call overhead).
243 # Extract locals for hot-loop performance (NamedTuple attribute access is slow).
244 if term_canonical:
245 overrides = get_term_overrides(term_canonical)
246 _narrower = overrides.narrower
247 _vs16_narrower = overrides.vs16_narrower
248 _vs15_wider = overrides.vs15_wider
249 _zeroer = overrides.zeroer
250 _narrow_wider = overrides.narrow_wider
251 _narrow_zeroer = overrides.narrow_zeroer
252 _grapheme_overrides = table_grapheme_overrides.get(term_canonical)
253 else:
254 _narrower = ()
255 _vs16_narrower = ()
256 _vs15_wider = ()
257 _zeroer = ()
258 _narrow_wider = ()
259 _narrow_zeroer = ()
260 _grapheme_overrides = {}
262 # Select wcwidth call pattern for best lru_cache performance
263 _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width)
265 end = len(pwcs) if n is None else n
266 total_width = 0
267 idx = 0
269 # grapheme-clustering state and local re-binding for performance.
270 # Widths accumulate in cluster_width and flush at boundaries. A cluster is a base character
271 # plus combining marks, deferring the flush lets grapheme overrides replace the measured width
272 # retrospectively.
273 last_measured_idx = -2 # -2 sentinel blocks VS16/VS15 (no base available)
274 last_measured_ucs = -1
275 last_measured_w = 0
276 prev_was_virama = False
277 cluster_start = -1
278 total_before_cluster = 0
279 cluster_width = 0
280 vs16_nw_table = VS16_NARROW_TO_WIDE['9.0.0']
281 vs15_wn_table = VS15_WIDE_TO_NARROW['9.0.0']
282 _bisearch = bisearch
284 while idx < end:
285 char = pwcs[idx]
286 ucs = ord(char)
288 #
289 # Much of the logic below matches the logic in width(), but is repeated for improved
290 # performance, they are given matching index reference numbers (starting at #5).
291 #
292 # 5. ZWJ (U+200D): consumed without contributing width.
293 # Virama codepoints are treated as zero-width combining marks (Mn). When a
294 # virama+consonant sequence forms a conjunct, its width is capped at 2 cells
295 # matching behavior of popular terminals (PR #224)
297 # ZWJ (U+200D)
298 if ucs == 0x200D:
299 if prev_was_virama:
300 idx += 1
301 elif idx + 1 < end:
302 # Check for terminal grapheme override when base char is ExtPict/RI
303 if (_grapheme_overrides
304 and last_measured_idx >= 0
305 and last_measured_ucs in _EMOJI_ZWJ_SET):
306 cluster_end = _scan_zwj_cluster_end(pwcs, last_measured_idx, end)
307 cluster = pwcs[last_measured_idx:cluster_end]
308 override_w = _grapheme_overrides.get(cluster)
309 if override_w is not None:
310 total_width += (override_w - last_measured_w)
311 last_measured_idx = -2
312 last_measured_ucs = -1
313 last_measured_w = 0
314 prev_was_virama = False
315 cluster_start = -1
316 idx = cluster_end
317 continue
318 # No override; ZWJ breaks VS adjacency.
319 # VS16 already set last_measured_idx = -2, blocking further VS16.
320 last_measured_w = 0
321 prev_was_virama = False
322 idx += 2
323 else:
324 prev_was_virama = False
325 idx += 1
326 continue
328 # 6. VS16 (U+FE0F): converts preceding narrow character to wide.
329 if ucs == 0xFE0F and last_measured_idx >= 0:
330 if _vs16_narrower and _bisearch(last_measured_ucs, _vs16_narrower):
331 pass
332 elif _bisearch(last_measured_ucs, vs16_nw_table):
333 cluster_width = 2
334 last_measured_idx = -2 # prevent double application
335 idx += 1
336 continue
338 # VS15 (U+FE0E): text variation selector, requests narrow presentation.
339 if ucs == 0xFE0E and last_measured_idx >= 0:
340 base_ucs = last_measured_ucs
341 vs15_narrow = bisearch(base_ucs, vs15_wn_table)
342 if _vs15_wider and bisearch(base_ucs, _vs15_wider):
343 vs15_narrow = False
344 if vs15_narrow and last_measured_w == 2:
345 total_width -= 1
346 idx += 1
347 continue
349 # 7. Regional Indicator & Fitzpatrick (both above BMP)
350 if ucs > 0xFFFF:
351 if ucs in _REGIONAL_INDICATOR_SET:
352 ri_before = 0
353 j = idx - 1
354 while j >= 0 and ord(pwcs[j]) in _REGIONAL_INDICATOR_SET:
355 ri_before += 1
356 j -= 1
357 if ri_before % 2 == 1:
358 last_measured_ucs = ucs
359 idx += 1
360 continue
361 elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1]
362 and last_measured_ucs in _EMOJI_ZWJ_SET):
363 idx += 1
364 continue
366 # 8. Normal character: measure with wcwidth
367 w = _wcwidth(char)
368 if w < 0:
369 # C0/C1 control character
370 return -1
371 # Apply single-codepoint terminal overrides (pre-merged tuples)
372 if w == 2 and _narrower and bisearch(ucs, _narrower):
373 w = 1
374 elif w == 2 and _zeroer and bisearch(ucs, _zeroer):
375 w = 0
376 if w == 1 and _narrow_wider and bisearch(ucs, _narrow_wider):
377 w = 2
378 elif w == 1 and _narrow_zeroer and bisearch(ucs, _narrow_zeroer):
379 w = 0
380 if w > 0:
381 # virama+consonant extends current cluster; otherwise start new
382 if prev_was_virama:
383 cluster_width = 2
384 elif cluster_width:
385 # flush previous cluster, check for grapheme overrides
386 flushed = False
387 if _grapheme_overrides and cluster_start >= 0:
388 # Two-phase override lookup: candidate (cluster+current) catches Lo+Lo pairs
389 # where both chars bear width (Thai KO KAI + SARA AM). cluster_text (cluster
390 # alone) catches C+Mc clusters where the override key is shorter.
391 candidate = pwcs[cluster_start:idx + 1]
392 override_w = _grapheme_overrides.get(candidate)
393 if override_w is not None:
394 total_width = total_before_cluster + override_w
395 flushed = True
396 cluster_width = 0
397 else:
398 cluster_text = pwcs[cluster_start:idx]
399 override_w = _grapheme_overrides.get(cluster_text)
400 if override_w is not None:
401 total_width = total_before_cluster + override_w
402 else:
403 total_width += cluster_width
404 else:
405 total_width += cluster_width
406 if not flushed:
407 cluster_width = w
408 cluster_start = idx
409 total_before_cluster = total_width
410 else:
411 cluster_width = w
412 cluster_start = idx
413 total_before_cluster = total_width
414 last_measured_idx = idx
415 last_measured_ucs = ucs
416 last_measured_w = w
417 prev_was_virama = False
418 elif ucs in _ISC_VIRAMA_SET:
419 prev_was_virama = True
420 elif last_measured_idx >= 0 and _bisearch(ucs, _CATEGORY_MC_TABLE):
421 # Spacing Combining Mark (Mc) following a base character
422 cluster_width = 2
423 last_measured_idx = -2
424 prev_was_virama = False
425 else:
426 prev_was_virama = False
427 idx += 1
429 if cluster_width:
430 if _grapheme_overrides and cluster_start >= 0:
431 cluster_text = pwcs[cluster_start:end]
432 override_w = _grapheme_overrides.get(cluster_text)
433 if override_w is not None:
434 total_width = total_before_cluster + override_w
435 else:
436 total_width += cluster_width
437 else:
438 total_width += cluster_width
439 return total_width