Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/rich/cells.py: 16%

1from __future__ import annotations

3from functools import lru_cache

4from operator import itemgetter

5from typing import Callable, NamedTuple, Sequence, Tuple

7from rich._unicode_data import load as load_cell_table

9CellSpan = Tuple[int, int, int]

11_span_get_cell_len = itemgetter(2)

13# Ranges of unicode ordinals that produce a 1-cell wide character

14# This is non-exhaustive, but covers most common Western characters

15_SINGLE_CELL_UNICODE_RANGES: list[tuple[int, int]] = [

16 (0x20, 0x7E), # Latin (excluding non-printable)

17 (0xA0, 0xAC),

18 (0xAE, 0x002FF),

19 (0x00370, 0x00482), # Greek / Cyrillic

20 (0x02500, 0x025FC), # Box drawing, box elements, geometric shapes

21 (0x02800, 0x028FF), # Braille

22]

24# A frozen set of characters that are a single cell wide

25_SINGLE_CELLS = frozenset(

26 [

27 character

28 for _start, _end in _SINGLE_CELL_UNICODE_RANGES

29 for character in map(chr, range(_start, _end + 1))

30 ]

31)

33# When called with a string this will return True if all

34# characters are single-cell, otherwise False

35_is_single_cell_widths: Callable[[str], bool] = _SINGLE_CELLS.issuperset

38class CellTable(NamedTuple):

39 """Contains unicode data required to measure the cell widths of glyphs."""

41 unicode_version: str

42 widths: Sequence[tuple[int, int, int]]

43 narrow_to_wide: frozenset[str]

46@lru_cache(maxsize=4096)

47def get_character_cell_size(character: str, unicode_version: str = "auto") -> int:

48 """Get the cell size of a character.

50 Args:

51 character (str): A single character.

52 unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.

54 Returns:

55 int: Number of cells (0, 1 or 2) occupied by that character.

56 """

57 codepoint = ord(character)

58 if codepoint and codepoint < 32 or 0x07F <= codepoint < 0x0A0:

59 return 0

60 table = load_cell_table(unicode_version).widths

62 last_entry = table[-1]

63 if codepoint > last_entry[1]:

64 return 1

66 lower_bound = 0

67 upper_bound = len(table) - 1

69 while lower_bound <= upper_bound:

70 index = (lower_bound + upper_bound) >> 1

71 start, end, width = table[index]

72 if codepoint < start:

73 upper_bound = index - 1

74 elif codepoint > end:

75 lower_bound = index + 1

76 else:

77 return width

78 return 1

81@lru_cache(4096)

82def cached_cell_len(text: str, unicode_version: str = "auto") -> int:

83 """Get the number of cells required to display text.

85 This method always caches, which may use up a lot of memory. It is recommended to use

86 `cell_len` over this method.

88 Args:

89 text (str): Text to display.

90 unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.

92 Returns:

93 int: Get the number of cells required to display text.

94 """

95 return _cell_len(text, unicode_version)

98def cell_len(text: str, unicode_version: str = "auto") -> int:

99 """Get the cell length of a string (length as it appears in the terminal).

100

101 Args:

102 text: String to measure.

103 unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.

104

105 Returns:

106 Length of string in terminal cells.

107 """

108 if len(text) < 512:

109 return cached_cell_len(text, unicode_version)

110 return _cell_len(text, unicode_version)

111

112

113def _cell_len(text: str, unicode_version: str) -> int:

114 """Get the cell length of a string (length as it appears in the terminal).

115

116 Args:

117 text: String to measure.

118 unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.

119

120 Returns:

121 Length of string in terminal cells.

122 """

123

124 if _is_single_cell_widths(text):

125 return len(text)

126

127 # "\u200d" is zero width joiner

128 # "\ufe0f" is variation selector 16

129 if "\u200d" not in text and "\ufe0f" not in text:

130 # Simplest case with no unicode stuff that changes the size

131 return sum(

132 get_character_cell_size(character, unicode_version) for character in text

133 )

134

135 cell_table = load_cell_table(unicode_version)

136 total_width = 0

137 last_measured_character: str | None = None

138

139 SPECIAL = {"\u200d", "\ufe0f"}

140

141 index = 0

142 character_count = len(text)

143

144 while index < character_count:

145 character = text[index]

146 if character in SPECIAL:

147 if character == "\u200d":

148 index += 1

149 elif last_measured_character:

150 total_width += last_measured_character in cell_table.narrow_to_wide

151 last_measured_character = None

152 else:

153 if character_width := get_character_cell_size(character, unicode_version):

154 last_measured_character = character

155 total_width += character_width

156 index += 1

157

158 return total_width

159

160

161def split_graphemes(

162 text: str, unicode_version: str = "auto"

163) -> "tuple[list[CellSpan], int]":

164 """Divide text into spans that define a single grapheme, and additionally return the cell length of the whole string.

165

166 The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero.

167 This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size.

168

169 Args:

170 text: String to split.

171 unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.

172

173 Returns:

174 A tuple of a list of *spans* and the cell length of the entire string. A span is a list of tuples

175 of three values consisting of (<START>, <END>, <CELL LENGTH>), where START and END are string indices,

176 and CELL LENGTH is the cell length of the single grapheme.

177 """

178

179 cell_table = load_cell_table(unicode_version)

180 codepoint_count = len(text)

181 index = 0

182 last_measured_character: str | None = None

183

184 total_width = 0

185 spans: list[tuple[int, int, int]] = []

186 SPECIAL = {"\u200d", "\ufe0f"}

187 while index < codepoint_count:

188 if (character := text[index]) in SPECIAL:

189 if not spans:

190 # ZWJ or variation selector at the beginning of the string doesn't really make sense.

191 # But handle it, we must.

192 spans.append((index, index := index + 1, 0))

193 continue

194 if character == "\u200d":

195 # zero width joiner

196 # The condition handles the case where a ZWJ is at the end of the string, and has nothing to join

197 index += 2 if index < (codepoint_count - 1) else 1

198 start, _end, cell_length = spans[-1]

199 spans[-1] = (start, index, cell_length)

200 else:

201 # variation selector 16

202 index += 1

203 if last_measured_character:

204 start, _end, cell_length = spans[-1]

205 if last_measured_character in cell_table.narrow_to_wide:

206 last_measured_character = None

207 cell_length += 1

208 total_width += 1

209 spans[-1] = (start, index, cell_length)

210 else:

211 # No previous character to change the size of.

212 # Shouldn't occur in practice.

213 # But handle it, we must.

214 start, _end, cell_length = spans[-1]

215 spans[-1] = (start, index, cell_length)

216 continue

217

218 if character_width := get_character_cell_size(character, unicode_version):

219 last_measured_character = character

220 spans.append((index, index := index + 1, character_width))

221 total_width += character_width

222 else:

223 # Character has zero width

224 if spans:

225 # zero width characters are associated with the previous character

226 start, _end, cell_length = spans[-1]

227 spans[-1] = (start, index := index + 1, cell_length)

228 else:

229 # A zero width character with no prior spans

230 spans.append((index, index := index + 1, 0))

231

232 return (spans, total_width)

233

234

235def _split_text(

236 text: str, cell_position: int, unicode_version: str = "auto"

237) -> tuple[str, str]:

238 """Split text by cell position.

239

240 If the cell position falls within a double width character, it is converted to two spaces.

241

242 Args:

243 text: Text to split.

244 cell_position Offset in cells.

245 unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.

246

247 Returns:

248 Tuple to two split strings.

249 """

250 if cell_position <= 0:

251 return "", text

252

253 spans, cell_length = split_graphemes(text, unicode_version)

254

255 # Guess initial offset

256 offset = int((cell_position / cell_length) * len(spans))

257 left_size = sum(map(_span_get_cell_len, spans[:offset]))

258

259 while True:

260 if left_size == cell_position:

261 if offset >= len(spans):

262 return text, ""

263 split_index = spans[offset][0]

264 return text[:split_index], text[split_index:]

265 if left_size < cell_position:

266 start, end, cell_size = spans[offset]

267 if left_size + cell_size > cell_position:

268 return text[:start] + " ", " " + text[end:]

269 offset += 1

270 left_size += cell_size

271 else: # left_size > cell_position

272 start, end, cell_size = spans[offset - 1]

273 if left_size - cell_size < cell_position:

274 return text[:start] + " ", " " + text[end:]

275 offset -= 1

276 left_size -= cell_size

277

278

279def split_text(

280 text: str, cell_position: int, unicode_version: str = "auto"

281) -> tuple[str, str]:

282 """Split text by cell position.

283

284 If the cell position falls within a double width character, it is converted to two spaces.

285

286 Args:

287 text: Text to split.

288 cell_position Offset in cells.

289 unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.

290

291 Returns:

292 Tuple to two split strings.

293 """

294 if _is_single_cell_widths(text):

295 return text[:cell_position], text[cell_position:]

296 return _split_text(text, cell_position, unicode_version)

297

298

299def set_cell_size(text: str, total: int, unicode_version: str = "auto") -> str:

300 """Adjust a string by cropping or padding with spaces such that it fits within the given number of cells.

301

302 Args:

303 text: String to adjust.

304 total: Desired size in cells.

305 unicode_version: Unicode version.

306

307 Returns:

308 A string with cell size equal to total.

309 """

310 if _is_single_cell_widths(text):

311 size = len(text)

312 if size < total:

313 return text + " " * (total - size)

314 return text[:total]

315 if total <= 0:

316 return ""

317 cell_size = cell_len(text)

318 if cell_size == total:

319 return text

320 if cell_size < total:

321 return text + " " * (total - cell_size)

322 text, _ = _split_text(text, total, unicode_version)

323 return text

324

325

326def chop_cells(text: str, width: int, unicode_version: str = "auto") -> list[str]:

327 """Split text into lines such that each line fits within the available (cell) width.

328

329 Args:

330 text: The text to fold such that it fits in the given width.

331 width: The width available (number of cells).

332

333 Returns:

334 A list of strings such that each string in the list has cell width

335 less than or equal to the available width.

336 """

337 if _is_single_cell_widths(text):

338 return [text[index : index + width] for index in range(0, len(text), width)]

339 spans, _ = split_graphemes(text, unicode_version)

340 line_size = 0 # Size of line in cells

341 lines: list[str] = []

342 line_offset = 0 # Offset (in codepoints) of start of line

343 for start, end, cell_size in spans:

344 if line_size + cell_size > width:

345 lines.append(text[line_offset:start])

346 line_offset = start

347 line_size = 0

348 line_size += cell_size

349 if line_size:

350 lines.append(text[line_offset:])

351

352 return lines