Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/ftfy/formatting.py: 91%

33 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:33 +0000

1""" 

2This module provides functions for justifying Unicode text in a monospaced 

3display such as a terminal. 

4 

5We used to have our own implementation here, but now we mostly rely on 

6the 'wcwidth' library. 

7""" 

8from unicodedata import normalize 

9 

10from wcwidth import wcswidth, wcwidth 

11from ftfy.fixes import remove_terminal_escapes 

12 

13 

14def character_width(char: str) -> int: 

15 r""" 

16 Determine the width that a character is likely to be displayed as in 

17 a monospaced terminal. The width for a printable character will 

18 always be 0, 1, or 2. 

19 

20 Nonprintable or control characters will return -1, a convention that comes 

21 from wcwidth. 

22 

23 >>> character_width('車') 

24 2 

25 >>> character_width('A') 

26 1 

27 >>> character_width('\N{ZERO WIDTH JOINER}') 

28 0 

29 >>> character_width('\n') 

30 -1 

31 """ 

32 return int(wcwidth(char)) 

33 

34 

35def monospaced_width(text: str) -> int: 

36 r""" 

37 Return the number of character cells that this string is likely to occupy 

38 when displayed in a monospaced, modern, Unicode-aware terminal emulator. 

39 We refer to this as the "display width" of the string. 

40 

41 This can be useful for formatting text that may contain non-spacing 

42 characters, or CJK characters that take up two character cells. 

43 

44 Returns -1 if the string contains a non-printable or control character. 

45 

46 >>> monospaced_width('ちゃぶ台返し') 

47 12 

48 >>> len('ちゃぶ台返し') 

49 6 

50 >>> monospaced_width('owl\N{SOFT HYPHEN}flavored') 

51 11 

52 >>> monospaced_width('example\x80') 

53 -1 

54 

55 A more complex example: The Korean word 'ibnida' can be written with 3 

56 pre-composed characters or 7 jamo. Either way, it *looks* the same and 

57 takes up 6 character cells. 

58 

59 >>> monospaced_width('입니다') 

60 6 

61 >>> monospaced_width('\u110b\u1175\u11b8\u1102\u1175\u1103\u1161') 

62 6 

63 

64 The word "blue" with terminal escapes to make it blue still takes up only 

65 4 characters, when shown as intended. 

66 >>> monospaced_width('\x1b[34mblue\x1b[m') 

67 4 

68 """ 

69 # NFC-normalize the text first, so that we don't need special cases for 

70 # Hangul jamo. 

71 # 

72 # Remove terminal escapes before calculating width, because if they are 

73 # displayed as intended, they will have zero width. 

74 return int(wcswidth(remove_terminal_escapes(normalize("NFC", text)))) 

75 

76 

77def display_ljust(text, width, fillchar=" "): 

78 """ 

79 Return `text` left-justified in a Unicode string whose display width, 

80 in a monospaced terminal, should be at least `width` character cells. 

81 The rest of the string will be padded with `fillchar`, which must be 

82 a width-1 character. 

83 

84 "Left" here means toward the beginning of the string, which may actually 

85 appear on the right in an RTL context. This is similar to the use of the 

86 word "left" in "left parenthesis". 

87 

88 >>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し'] 

89 >>> for line in lines: 

90 ... print(display_ljust(line, 20, '▒')) 

91 Table flip▒▒▒▒▒▒▒▒▒▒ 

92 (╯°□°)╯︵ ┻━┻▒▒▒▒▒▒▒ 

93 ちゃぶ台返し▒▒▒▒▒▒▒▒ 

94 

95 This example, and the similar ones that follow, should come out justified 

96 correctly when viewed in a monospaced terminal. It will probably not look 

97 correct if you're viewing this code or documentation in a Web browser. 

98 """ 

99 if character_width(fillchar) != 1: 

100 raise ValueError("The padding character must have display width 1") 

101 

102 text_width = monospaced_width(text) 

103 if text_width == -1: 

104 # There's a control character here, so just don't add padding 

105 return text 

106 

107 padding = max(0, width - text_width) 

108 return text + fillchar * padding 

109 

110 

111def display_rjust(text, width, fillchar=" "): 

112 """ 

113 Return `text` right-justified in a Unicode string whose display width, 

114 in a monospaced terminal, should be at least `width` character cells. 

115 The rest of the string will be padded with `fillchar`, which must be 

116 a width-1 character. 

117 

118 "Right" here means toward the end of the string, which may actually be on 

119 the left in an RTL context. This is similar to the use of the word "right" 

120 in "right parenthesis". 

121 

122 >>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し'] 

123 >>> for line in lines: 

124 ... print(display_rjust(line, 20, '▒')) 

125 ▒▒▒▒▒▒▒▒▒▒Table flip 

126 ▒▒▒▒▒▒▒(╯°□°)╯︵ ┻━┻ 

127 ▒▒▒▒▒▒▒▒ちゃぶ台返し 

128 """ 

129 if character_width(fillchar) != 1: 

130 raise ValueError("The padding character must have display width 1") 

131 

132 text_width = monospaced_width(text) 

133 if text_width == -1: 

134 return text 

135 

136 padding = max(0, width - text_width) 

137 return fillchar * padding + text 

138 

139 

140def display_center(text, width, fillchar=" "): 

141 """ 

142 Return `text` centered in a Unicode string whose display width, in a 

143 monospaced terminal, should be at least `width` character cells. The rest 

144 of the string will be padded with `fillchar`, which must be a width-1 

145 character. 

146 

147 >>> lines = ['Table flip', '(╯°□°)╯︵ ┻━┻', 'ちゃぶ台返し'] 

148 >>> for line in lines: 

149 ... print(display_center(line, 20, '▒')) 

150 ▒▒▒▒▒Table flip▒▒▒▒▒ 

151 ▒▒▒(╯°□°)╯︵ ┻━┻▒▒▒▒ 

152 ▒▒▒▒ちゃぶ台返し▒▒▒▒ 

153 """ 

154 if character_width(fillchar) != 1: 

155 raise ValueError("The padding character must have display width 1") 

156 

157 text_width = monospaced_width(text) 

158 if text_width == -1: 

159 return text 

160 

161 padding = max(0, width - text_width) 

162 left_padding = padding // 2 

163 right_padding = padding - left_padding 

164 return fillchar * left_padding + text + fillchar * right_padding