Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/_wcswidth.py: 12%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

78 statements  

1"""This is a python implementation of wcswidth().""" 

2 

3from __future__ import annotations 

4 

5from typing import Optional 

6 

7# local 

8from ._wcwidth import wcwidth 

9from .bisearch import bisearch 

10from ._constants import (_EMOJI_ZWJ_SET, 

11 _ISC_VIRAMA_SET, 

12 _CATEGORY_MC_TABLE, 

13 _FITZPATRICK_RANGE, 

14 _REGIONAL_INDICATOR_SET) 

15from .table_vs16 import VS16_NARROW_TO_WIDE 

16from .table_grapheme import ISC_CONSONANT 

17 

18 

19def wcswidth( 

20 pwcs: str, 

21 n: Optional[int] = None, 

22 unicode_version: str = 'auto', 

23 ambiguous_width: int = 1, 

24) -> int: 

25 """ 

26 Given a unicode string, return its printable length on a terminal. 

27 

28 See :ref:`Specification` for details of cell measurement. 

29 

30 This implementation differs from Markus Khun's original POSIX C implementation, in that this 

31 ``wcswidth()`` processes graphemes strings yielded by :func:`wcwidth.iter_graphemes` defined by 

32 `Unicode Standard Annex #29`_. POSIX wcswidth(3) is not grapheme-aware and does not measure many 

33 kinds of Emojis or complex scripts correctly. 

34 

35 :param pwcs: Measure width of given unicode string. 

36 :param n: When ``n`` is None (default), return the length of the entire 

37 string, otherwise only the first ``n`` characters are measured. 

38 

39 :param unicode_version: Ignored. Retained for backwards compatibility. 

40 

41 .. deprecated:: 0.3.0 

42 Only the latest Unicode version is now shipped. 

43 

44 :param ambiguous_width: Width to use for East Asian Ambiguous (A) 

45 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. 

46 :returns: The width, in cells, needed to display the first ``n`` characters 

47 of the unicode string ``pwcs``. Returns ``-1`` for C0 and C1 control 

48 characters! 

49 

50 .. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/ 

51 """ 

52 # pylint: disable=unused-argument,too-many-locals,too-many-statements 

53 # pylint: disable=too-complex,too-many-branches,duplicate-code 

54 # This function intentionally keeps all logic inline for performance. 

55 

56 # Fast path: pure ASCII printable strings are always width == length 

57 if n is None and pwcs.isascii() and pwcs.isprintable(): 

58 return len(pwcs) 

59 

60 # Select wcwidth call pattern for best lru_cache performance 

61 _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width) 

62 

63 end = len(pwcs) if n is None else n 

64 total_width = 0 

65 idx = 0 

66 

67 # grapheme-clustering state 

68 last_measured_idx = -2 

69 last_measured_ucs = -1 

70 last_was_virama = False 

71 conjunct_pending = False 

72 

73 while idx < end: 

74 char = pwcs[idx] 

75 ucs = ord(char) 

76 

77 # ZWJ (U+200D) 

78 if ucs == 0x200D: 

79 if last_was_virama: 

80 idx += 1 

81 elif idx + 1 < end: 

82 last_was_virama = False 

83 idx += 2 

84 else: 

85 last_was_virama = False 

86 idx += 1 

87 continue 

88 

89 # VS16 (U+FE0F): converts preceding narrow character to wide. 

90 if ucs == 0xFE0F and last_measured_idx >= 0: 

91 total_width += bisearch( 

92 ord(pwcs[last_measured_idx]), 

93 VS16_NARROW_TO_WIDE['9.0.0'], 

94 ) 

95 last_measured_idx = -2 # prevent double application 

96 idx += 1 

97 continue 

98 

99 # Regional Indicator & Fitzpatrick (both above BMP) 

100 if ucs > 0xFFFF: 

101 if ucs in _REGIONAL_INDICATOR_SET: 

102 ri_before = 0 

103 j = idx - 1 

104 while j >= 0 and ord(pwcs[j]) in _REGIONAL_INDICATOR_SET: 

105 ri_before += 1 

106 j -= 1 

107 if ri_before % 2 == 1: 

108 last_measured_ucs = ucs 

109 idx += 1 

110 continue 

111 elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1] 

112 and last_measured_ucs in _EMOJI_ZWJ_SET): 

113 idx += 1 

114 continue 

115 

116 # Virama conjunct formation 

117 if last_was_virama and bisearch(ucs, ISC_CONSONANT): 

118 last_measured_idx = idx 

119 last_measured_ucs = ucs 

120 last_was_virama = False 

121 conjunct_pending = True 

122 idx += 1 

123 continue 

124 

125 # Normal character: measure with wcwidth 

126 w = _wcwidth(char) 

127 if w < 0: 

128 # C0/C1 control character 

129 return -1 

130 if w > 0: 

131 if conjunct_pending: 

132 total_width += 1 

133 conjunct_pending = False 

134 total_width += w 

135 last_measured_idx = idx 

136 last_measured_ucs = ucs 

137 last_was_virama = False 

138 elif last_measured_idx >= 0 and bisearch(ucs, _CATEGORY_MC_TABLE): 

139 # Spacing Combining Mark (Mc) following a base character adds 1 

140 total_width += 1 

141 last_measured_idx = -2 

142 last_was_virama = False 

143 conjunct_pending = False 

144 else: 

145 last_was_virama = ucs in _ISC_VIRAMA_SET 

146 idx += 1 

147 

148 if conjunct_pending: 

149 total_width += 1 

150 return total_width