Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/ftfy/formatting.py: 84%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

37 statements  

1""" 

2This module provides functions for justifying Unicode text in a monospaced 

3display such as a terminal. 

4 

5We used to have our own implementation here, but now we mostly rely on 

6the 'wcwidth' library. 

7""" 

8 

9from unicodedata import normalize 

10 

11from wcwidth import wcswidth, wcwidth 

12 

13from ftfy.fixes import remove_terminal_escapes 

14 

15 

16def character_width(char: str) -> int: 

17 r""" 

18 Determine the width that a character is likely to be displayed as in 

19 a monospaced terminal. The width for a printable character will 

20 always be 0, 1, or 2. 

21 

22 Nonprintable or control characters will return -1, a convention that comes 

23 from wcwidth. 

24 

25 >>> character_width('車') 

26 2 

27 >>> character_width('A') 

28 1 

29 >>> character_width('\N{ZERO WIDTH JOINER}') 

30 0 

31 >>> character_width('\n') 

32 -1 

33 """ 

34 return int(wcwidth(char)) 

35 

36 

37def monospaced_width(text: str) -> int: 

38 r""" 

39 Return the number of character cells that this string is likely to occupy 

40 when displayed in a monospaced, modern, Unicode-aware terminal emulator. 

41 We refer to this as the "display width" of the string. 

42 

43 This can be useful for formatting text that may contain non-spacing 

44 characters, or CJK characters that take up two character cells. 

45 

46 Returns -1 if the string contains a non-printable or control character. 

47 

48 >>> monospaced_width('ちゃぶ台返し') 

49 12 

50 >>> len('ちゃぶ台返し') 

51 6 

52 >>> monospaced_width('owl\N{SOFT HYPHEN}flavored') 

53 11 

54 >>> monospaced_width('example\x80') 

55 -1 

56 

57 A more complex example: The Korean word 'ibnida' can be written with 3 

58 pre-composed characters or 7 jamo. Either way, it *looks* the same and 

59 takes up 6 character cells. 

60 

61 >>> monospaced_width('입니다') 

62 6 

63 >>> monospaced_width('\u110b\u1175\u11b8\u1102\u1175\u1103\u1161') 

64 6 

65 

66 The word "blue" with terminal escapes to make it blue still takes up only 

67 4 characters, when shown as intended. 

68 >>> monospaced_width('\x1b[34mblue\x1b[m') 

69 4 

70 """ 

71 # NFC-normalize the text first, so that we don't need special cases for 

72 # Hangul jamo. 

73 # 

74 # Remove terminal escapes before calculating width, because if they are 

75 # displayed as intended, they will have zero width. 

76 return int(wcswidth(remove_terminal_escapes(normalize("NFC", text)))) 

77 

78 

79def display_ljust(text: str, width: int, fillchar: str = " ") -> str: 

80 """ 

81 Return `text` left-justified in a Unicode string whose display width, 

82 in a monospaced terminal, should be at least `width` character cells. 

83 The rest of the string will be padded with `fillchar`, which must be 

84 a width-1 character. 

85 

86 "Left" here means toward the beginning of the string, which may actually 

87 appear on the right in an RTL context. This is similar to the use of the 

88 word "left" in "left parenthesis". 

89 

90 >>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し'] 

91 >>> for line in lines: 

92 ... print(display_ljust(line, 20, '▒')) 

93 Table flip▒▒▒▒▒▒▒▒▒▒ 

94 (╯°□°)╯︵ ┻━┻▒▒▒▒▒▒▒ 

95 ちゃぶ台返し▒▒▒▒▒▒▒▒ 

96 

97 This example, and the similar ones that follow, should come out justified 

98 correctly when viewed in a monospaced terminal. It will probably not look 

99 correct if you're viewing this code or documentation in a Web browser. 

100 """ 

101 if character_width(fillchar) != 1: 

102 msg = "The padding character must have display width 1" 

103 raise ValueError(msg) 

104 

105 text_width = monospaced_width(text) 

106 if text_width == -1: 

107 # There's a control character here, so just don't add padding 

108 return text 

109 

110 padding = max(0, width - text_width) 

111 return text + fillchar * padding 

112 

113 

114def display_rjust(text: str, width: int, fillchar: str = " ") -> str: 

115 """ 

116 Return `text` right-justified in a Unicode string whose display width, 

117 in a monospaced terminal, should be at least `width` character cells. 

118 The rest of the string will be padded with `fillchar`, which must be 

119 a width-1 character. 

120 

121 "Right" here means toward the end of the string, which may actually be on 

122 the left in an RTL context. This is similar to the use of the word "right" 

123 in "right parenthesis". 

124 

125 >>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し'] 

126 >>> for line in lines: 

127 ... print(display_rjust(line, 20, '▒')) 

128 ▒▒▒▒▒▒▒▒▒▒Table flip 

129 ▒▒▒▒▒▒▒(╯°□°)╯︵ ┻━┻ 

130 ▒▒▒▒▒▒▒▒ちゃぶ台返し 

131 """ 

132 if character_width(fillchar) != 1: 

133 msg = "The padding character must have display width 1" 

134 raise ValueError(msg) 

135 

136 text_width = monospaced_width(text) 

137 if text_width == -1: 

138 return text 

139 

140 padding = max(0, width - text_width) 

141 return fillchar * padding + text 

142 

143 

144def display_center(text: str, width: int, fillchar: str = " ") -> str: 

145 """ 

146 Return `text` centered in a Unicode string whose display width, in a 

147 monospaced terminal, should be at least `width` character cells. The rest 

148 of the string will be padded with `fillchar`, which must be a width-1 

149 character. 

150 

151 >>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し'] 

152 >>> for line in lines: 

153 ... print(display_center(line, 20, '▒')) 

154 ▒▒▒▒▒Table flip▒▒▒▒▒ 

155 ▒▒▒(╯°□°)╯︵ ┻━┻▒▒▒▒ 

156 ▒▒▒▒ちゃぶ台返し▒▒▒▒ 

157 """ 

158 if character_width(fillchar) != 1: 

159 msg = "The padding character must have display width 1" 

160 raise ValueError(msg) 

161 

162 text_width = monospaced_width(text) 

163 if text_width == -1: 

164 return text 

165 

166 padding = max(0, width - text_width) 

167 left_padding = padding // 2 

168 right_padding = padding - left_padding 

169 return fillchar * left_padding + text + fillchar * right_padding