Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/_wcswidth.py: 12%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""This is a python implementation of wcswidth()."""
3from __future__ import annotations
5from typing import Optional
7# local
8from ._wcwidth import wcwidth
9from .bisearch import bisearch
10from ._constants import (_EMOJI_ZWJ_SET,
11 _ISC_VIRAMA_SET,
12 _CATEGORY_MC_TABLE,
13 _FITZPATRICK_RANGE,
14 _REGIONAL_INDICATOR_SET)
15from .table_vs16 import VS16_NARROW_TO_WIDE
16from .table_grapheme import ISC_CONSONANT
19def wcswidth(
20 pwcs: str,
21 n: Optional[int] = None,
22 unicode_version: str = 'auto',
23 ambiguous_width: int = 1,
24) -> int:
25 """
26 Given a unicode string, return its printable length on a terminal.
28 See :ref:`Specification` for details of cell measurement.
30 This implementation differs from Markus Khun's original POSIX C implementation, in that this
31 ``wcswidth()`` processes graphemes strings yielded by :func:`wcwidth.iter_graphemes` defined by
32 `Unicode Standard Annex #29`_. POSIX wcswidth(3) is not grapheme-aware and does not measure many
33 kinds of Emojis or complex scripts correctly.
35 :param pwcs: Measure width of given unicode string.
36 :param n: When ``n`` is None (default), return the length of the entire
37 string, otherwise only the first ``n`` characters are measured.
39 :param unicode_version: Ignored. Retained for backwards compatibility.
41 .. deprecated:: 0.3.0
42 Only the latest Unicode version is now shipped.
44 :param ambiguous_width: Width to use for East Asian Ambiguous (A)
45 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
46 :returns: The width, in cells, needed to display the first ``n`` characters
47 of the unicode string ``pwcs``. Returns ``-1`` for C0 and C1 control
48 characters!
50 .. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/
51 """
52 # pylint: disable=unused-argument,too-many-locals,too-many-statements
53 # pylint: disable=too-complex,too-many-branches,duplicate-code
54 # This function intentionally keeps all logic inline for performance.
56 # Fast path: pure ASCII printable strings are always width == length
57 if n is None and pwcs.isascii() and pwcs.isprintable():
58 return len(pwcs)
60 # Select wcwidth call pattern for best lru_cache performance
61 _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width)
63 end = len(pwcs) if n is None else n
64 total_width = 0
65 idx = 0
67 # grapheme-clustering state
68 last_measured_idx = -2
69 last_measured_ucs = -1
70 last_was_virama = False
71 conjunct_pending = False
73 while idx < end:
74 char = pwcs[idx]
75 ucs = ord(char)
77 # ZWJ (U+200D)
78 if ucs == 0x200D:
79 if last_was_virama:
80 idx += 1
81 elif idx + 1 < end:
82 last_was_virama = False
83 idx += 2
84 else:
85 last_was_virama = False
86 idx += 1
87 continue
89 # VS16 (U+FE0F): converts preceding narrow character to wide.
90 if ucs == 0xFE0F and last_measured_idx >= 0:
91 total_width += bisearch(
92 ord(pwcs[last_measured_idx]),
93 VS16_NARROW_TO_WIDE['9.0.0'],
94 )
95 last_measured_idx = -2 # prevent double application
96 idx += 1
97 continue
99 # Regional Indicator & Fitzpatrick (both above BMP)
100 if ucs > 0xFFFF:
101 if ucs in _REGIONAL_INDICATOR_SET:
102 ri_before = 0
103 j = idx - 1
104 while j >= 0 and ord(pwcs[j]) in _REGIONAL_INDICATOR_SET:
105 ri_before += 1
106 j -= 1
107 if ri_before % 2 == 1:
108 last_measured_ucs = ucs
109 idx += 1
110 continue
111 elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1]
112 and last_measured_ucs in _EMOJI_ZWJ_SET):
113 idx += 1
114 continue
116 # Virama conjunct formation
117 if last_was_virama and bisearch(ucs, ISC_CONSONANT):
118 last_measured_idx = idx
119 last_measured_ucs = ucs
120 last_was_virama = False
121 conjunct_pending = True
122 idx += 1
123 continue
125 # Normal character: measure with wcwidth
126 w = _wcwidth(char)
127 if w < 0:
128 # C0/C1 control character
129 return -1
130 if w > 0:
131 if conjunct_pending:
132 total_width += 1
133 conjunct_pending = False
134 total_width += w
135 last_measured_idx = idx
136 last_measured_ucs = ucs
137 last_was_virama = False
138 elif last_measured_idx >= 0 and bisearch(ucs, _CATEGORY_MC_TABLE):
139 # Spacing Combining Mark (Mc) following a base character adds 1
140 total_width += 1
141 last_measured_idx = -2
142 last_was_virama = False
143 conjunct_pending = False
144 else:
145 last_was_virama = ucs in _ISC_VIRAMA_SET
146 idx += 1
148 if conjunct_pending:
149 total_width += 1
150 return total_width