Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/

1"""

2This is a python implementation of wcwidth() and wcswidth().

4https://github.com/jquast/wcwidth

6Derived from Markus Kuhn's C code,

8This is an implementation of wcwidth() and wcswidth() (defined in

9IEEE Std 1002.1-2001) for Unicode.

11http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html

12http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html

14In fixed-width output devices, Latin characters all occupy a single

15"cell" position of equal width, whereas ideographic CJK characters

16occupy two such cells. Interoperability between terminal-line

17applications and (teletype-style) character terminals using the

18UTF-8 encoding requires agreement on which character should advance

19the cursor by how many cell positions. No established formal

20standards exist at present on which Unicode character shall occupy

21how many cell positions on character terminals. These routines are

22a first attempt of defining such behavior based on simple rules

23applied to data provided by the Unicode Consortium.

25For some graphical characters, the Unicode standard explicitly

26defines a character-cell width via the definition of the East Asian

27FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.

28In all these cases, there is no ambiguity about which width a

29terminal shall use. For characters in the East Asian Ambiguous (A)

30class, the width choice depends purely on a preference of backward

31compatibility with either historic CJK or Western practice.

32Choosing single-width for these characters is easy to justify as

33the appropriate long-term solution, as the CJK practice of

34displaying these characters as double-width comes from historic

35implementation simplicity (8-bit encoded characters were displayed

36single-width and 16-bit ones double-width, even for Greek,

37Cyrillic, etc.) and not any typographic considerations.

39Much less clear is the choice of width for the Not East Asian

40(Neutral) class. Existing practice does not dictate a width for any

41of these characters. It would nevertheless make sense

42typographically to allocate two character cells to characters such

43as for instance EM SPACE or VOLUME INTEGRAL, which cannot be

44represented adequately with a single-width glyph. The following

45routines at present merely assign a single-cell width to all

46neutral characters, in the interest of simplicity. This is not

47entirely satisfactory and should be reconsidered before

48establishing a formal standard in this area. At the moment, the

49decision which Not East Asian (Neutral) characters should be

50represented by double-width glyphs cannot yet be answered by

51applying a simple rule from the Unicode database content. Setting

52up a proper standard for the behavior of UTF-8 character terminals

53will require a careful analysis not only of each Unicode character,

54but also of each presentation form, something the author of these

55routines has avoided to do so far.

57http://www.unicode.org/unicode/reports/tr11/

59Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c

60"""

62from __future__ import annotations

64# std imports

65from functools import lru_cache

67__lazy_modules__ = [

68 "wcwidth.bisearch",

69 "wcwidth._constants",

70]

71# local

72from .bisearch import bisearch

73from ._constants import _LATEST_VERSION, _AMBIGUOUS_TABLE, _ZERO_WIDTH_TABLE, _WIDE_EASTASIAN_TABLE

76@lru_cache(maxsize=128)

77def _wcversion_value(ver_string: str) -> tuple[int, ...]: # pragma: no cover

78 """

79 Integer-mapped value of given dotted version string.

81 .. deprecated:: 0.3.0

83 This function is no longer used internally by wcwidth but is retained

84 for API compatibility with external tools.

86 :param ver_string: Unicode version string, of form ``n.n.n``.

87 :returns: tuple of digit tuples, ``tuple(int, [...])``.

88 """

89 retval = tuple(map(int, (ver_string.split('.'))))

90 return retval

93@lru_cache(maxsize=8)

94def _wcmatch_version(given_version: str) -> str: # pylint: disable=unused-argument

95 """

96 Return the supported Unicode version level.

98 .. deprecated:: 0.3.0

99 This function now always returns the latest version.

100

101 This function is no longer used internally by wcwidth but is retained

102 for API compatibility with external tools.

103

104 :param given_version: Ignored. Any value is accepted for compatibility.

105 :returns: The latest unicode version string.

106 """

107 return _LATEST_VERSION

108

109

110# maxsize=1024: western scripts need ~64 unique codepoints per session, but

111# CJK sessions may use ~2000 of ~3500 common hanzi/kanji. 1024 accommodates

112# heavy CJK use. Performance floor at 32; bisearch is ~100ns per miss.

113

114@lru_cache(maxsize=1024)

115def wcwidth(wc: str, unicode_version: str = 'auto', ambiguous_width: int = 1) -> int: # pylint: disable=unused-argument

116 r"""

117 Given one Unicode codepoint, return its printable length on a terminal.

118

119 :param wc: A single Unicode character.

120 :param unicode_version: Ignored. Retained for backwards compatibility.

121

122 .. deprecated:: 0.3.0

123 Only the latest Unicode version is now shipped.

124

125 :param ambiguous_width: Width to use for East Asian Ambiguous (A)

126 characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts

127 where ambiguous characters display as double-width. See

128 :ref:`ambiguous_width` for details.

129 :returns: The width, in cells, necessary to display the character of

130 Unicode string character, ``wc``. Returns 0 if the ``wc`` argument has

131 no printable effect on a terminal (such as NUL '\0'), -1 if ``wc`` is

132 not printable, or has an indeterminate effect on the terminal, such as

133 a control character. Otherwise, the number of column positions the

134 character occupies on a graphic terminal (1 or 2) is returned.

135

136 See :ref:`Specification` for details of cell measurement.

137 """

138 ucs = ord(wc) if wc else 0

139

140 # small optimization: early return of 1 for printable ASCII, this provides

141 # approximately 40% performance improvement for mostly-ascii documents, with

142 # less than 1% impact to others.

143 if 32 <= ucs < 0x7f:

144 return 1

145

146 # C0/C1 control characters are -1 for compatibility with POSIX-like calls

147 if ucs and ucs < 32 or 0x07F <= ucs < 0x0A0:

148 return -1

149

150 # Zero width

151 if bisearch(ucs, _ZERO_WIDTH_TABLE):

152 return 0

153

154 # Wide (F/W categories)

155 if bisearch(ucs, _WIDE_EASTASIAN_TABLE):

156 return 2

157

158 # Ambiguous width (A category) - only when ambiguous_width=2

159 if ambiguous_width == 2 and bisearch(ucs, _AMBIGUOUS_TABLE):

160 return 2

161

162 return 1

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/wcwidth/_wcwidth.py: 46%

24 statements