Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/

1"""

2This is a python implementation of wcwidth() and wcswidth().

4https://github.com/jquast/wcwidth

6Derived from Markus Kuhn's C code,

8This is an implementation of wcwidth() and wcswidth() (defined in

9IEEE Std 1002.1-2001) for Unicode.

11http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html

12http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html

14In fixed-width output devices, Latin characters all occupy a single

15"cell" position of equal width, whereas ideographic CJK characters

16occupy two such cells. Interoperability between terminal-line

17applications and (teletype-style) character terminals using the

18UTF-8 encoding requires agreement on which character should advance

19the cursor by how many cell positions. No established formal

20standards exist at present on which Unicode character shall occupy

21how many cell positions on character terminals. These routines are

22a first attempt of defining such behavior based on simple rules

23applied to data provided by the Unicode Consortium.

25For some graphical characters, the Unicode standard explicitly

26defines a character-cell width via the definition of the East Asian

27FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.

28In all these cases, there is no ambiguity about which width a

29terminal shall use. For characters in the East Asian Ambiguous (A)

30class, the width choice depends purely on a preference of backward

31compatibility with either historic CJK or Western practice.

32Choosing single-width for these characters is easy to justify as

33the appropriate long-term solution, as the CJK practice of

34displaying these characters as double-width comes from historic

35implementation simplicity (8-bit encoded characters were displayed

36single-width and 16-bit ones double-width, even for Greek,

37Cyrillic, etc.) and not any typographic considerations.

39Much less clear is the choice of width for the Not East Asian

40(Neutral) class. Existing practice does not dictate a width for any

41of these characters. It would nevertheless make sense

42typographically to allocate two character cells to characters such

43as for instance EM SPACE or VOLUME INTEGRAL, which cannot be

44represented adequately with a single-width glyph. The following

45routines at present merely assign a single-cell width to all

46neutral characters, in the interest of simplicity. This is not

47entirely satisfactory and should be reconsidered before

48establishing a formal standard in this area. At the moment, the

49decision which Not East Asian (Neutral) characters should be

50represented by double-width glyphs cannot yet be answered by

51applying a simple rule from the Unicode database content. Setting

52up a proper standard for the behavior of UTF-8 character terminals

53will require a careful analysis not only of each Unicode character,

54but also of each presentation form, something the author of these

55routines has avoided to do so far.

57http://www.unicode.org/unicode/reports/tr11/

59Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c

60"""

62from __future__ import annotations

64# std imports

65from functools import lru_cache

67# local

68from .bisearch import bisearch

69from ._constants import _LATEST_VERSION, _AMBIGUOUS_TABLE, _ZERO_WIDTH_TABLE, _WIDE_EASTASIAN_TABLE

72@lru_cache(maxsize=128)

73def _wcversion_value(ver_string: str) -> tuple[int, ...]: # pragma: no cover

74 """

75 Integer-mapped value of given dotted version string.

77 .. deprecated:: 0.3.0

79 This function is no longer used internally by wcwidth but is retained

80 for API compatibility with external tools.

82 :param ver_string: Unicode version string, of form ``n.n.n``.

83 :returns: tuple of digit tuples, ``tuple(int, [...])``.

84 """

85 retval = tuple(map(int, (ver_string.split('.'))))

86 return retval

89@lru_cache(maxsize=8)

90def _wcmatch_version(given_version: str) -> str: # pylint: disable=unused-argument

91 """

92 Return the supported Unicode version level.

94 .. deprecated:: 0.3.0

95 This function now always returns the latest version.

97 This function is no longer used internally by wcwidth but is retained

98 for API compatibility with external tools.

100 :param given_version: Ignored. Any value is accepted for compatibility.

101 :returns: The latest unicode version string.

102 """

103 return _LATEST_VERSION

104

105

106# maxsize=1024: western scripts need ~64 unique codepoints per session, but

107# CJK sessions may use ~2000 of ~3500 common hanzi/kanji. 1024 accommodates

108# heavy CJK use. Performance floor at 32; bisearch is ~100ns per miss.

109

110@lru_cache(maxsize=1024)

111def wcwidth(wc: str, unicode_version: str = 'auto', ambiguous_width: int = 1) -> int: # pylint: disable=unused-argument

112 r"""

113 Given one Unicode codepoint, return its printable length on a terminal.

114

115 :param wc: A single Unicode character.

116 :param unicode_version: Ignored. Retained for backwards compatibility.

117

118 .. deprecated:: 0.3.0

119 Only the latest Unicode version is now shipped.

120

121 :param ambiguous_width: Width to use for East Asian Ambiguous (A)

122 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts

123 where ambiguous characters display as double-width. See

124 :ref:`ambiguous_width` for details.

125 :returns: The width, in cells, necessary to display the character of

126 Unicode string character, ``wc``. Returns 0 if the ``wc`` argument has

127 no printable effect on a terminal (such as NUL '\0'), -1 if ``wc`` is

128 not printable, or has an indeterminate effect on the terminal, such as

129 a control character. Otherwise, the number of column positions the

130 character occupies on a graphic terminal (1 or 2) is returned.

131

132 See :ref:`Specification` for details of cell measurement.

133 """

134 ucs = ord(wc) if wc else 0

135

136 # small optimization: early return of 1 for printable ASCII, this provides

137 # approximately 40% performance improvement for mostly-ascii documents, with

138 # less than 1% impact to others.

139 if 32 <= ucs < 0x7f:

140 return 1

141

142 # C0/C1 control characters are -1 for compatibility with POSIX-like calls

143 if ucs and ucs < 32 or 0x07F <= ucs < 0x0A0:

144 return -1

145

146 # Zero width

147 if bisearch(ucs, _ZERO_WIDTH_TABLE):

148 return 0

149

150 # Wide (F/W categories)

151 if bisearch(ucs, _WIDE_EASTASIAN_TABLE):

152 return 2

153

154 # Ambiguous width (A category) - only when ambiguous_width=2

155 if ambiguous_width == 2 and bisearch(ucs, _AMBIGUOUS_TABLE):

156 return 2

157

158 return 1

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/_wcwidth.py: 43%

23 statements