Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/textwrap.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

161 statements  

1"""Text wrapping and filling. 

2""" 

3 

4# Copyright (C) 1999-2001 Gregory P. Ward. 

5# Copyright (C) 2002, 2003 Python Software Foundation. 

6# Written by Greg Ward <gward@python.net> 

7 

8import re 

9 

10__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten'] 

11 

12# Hardcode the recognized whitespace characters to the US-ASCII 

13# whitespace characters. The main reason for doing this is that 

14# some Unicode spaces (like \u00a0) are non-breaking whitespaces. 

15_whitespace = '\t\n\x0b\x0c\r ' 

16 

17class TextWrapper: 

18 """ 

19 Object for wrapping/filling text. The public interface consists of 

20 the wrap() and fill() methods; the other methods are just there for 

21 subclasses to override in order to tweak the default behaviour. 

22 If you want to completely replace the main wrapping algorithm, 

23 you'll probably have to override _wrap_chunks(). 

24 

25 Several instance attributes control various aspects of wrapping: 

26 width (default: 70) 

27 the maximum width of wrapped lines (unless break_long_words 

28 is false) 

29 initial_indent (default: "") 

30 string that will be prepended to the first line of wrapped 

31 output. Counts towards the line's width. 

32 subsequent_indent (default: "") 

33 string that will be prepended to all lines save the first 

34 of wrapped output; also counts towards each line's width. 

35 expand_tabs (default: true) 

36 Expand tabs in input text to spaces before further processing. 

37 Each tab will become 0 .. 'tabsize' spaces, depending on its position 

38 in its line. If false, each tab is treated as a single character. 

39 tabsize (default: 8) 

40 Expand tabs in input text to 0 .. 'tabsize' spaces, unless 

41 'expand_tabs' is false. 

42 replace_whitespace (default: true) 

43 Replace all whitespace characters in the input text by spaces 

44 after tab expansion. Note that if expand_tabs is false and 

45 replace_whitespace is true, every tab will be converted to a 

46 single space! 

47 fix_sentence_endings (default: false) 

48 Ensure that sentence-ending punctuation is always followed 

49 by two spaces. Off by default because the algorithm is 

50 (unavoidably) imperfect. 

51 break_long_words (default: true) 

52 Break words longer than 'width'. If false, those words will not 

53 be broken, and some lines might be longer than 'width'. 

54 break_on_hyphens (default: true) 

55 Allow breaking hyphenated words. If true, wrapping will occur 

56 preferably on whitespaces and right after hyphens part of 

57 compound words. 

58 drop_whitespace (default: true) 

59 Drop leading and trailing whitespace from lines. 

60 max_lines (default: None) 

61 Truncate wrapped lines. 

62 placeholder (default: ' [...]') 

63 Append to the last line of truncated text. 

64 """ 

65 

66 unicode_whitespace_trans = {} 

67 uspace = ord(' ') 

68 for x in _whitespace: 

69 unicode_whitespace_trans[ord(x)] = uspace 

70 

71 # This funky little regex is just the trick for splitting 

72 # text up into word-wrappable chunks. E.g. 

73 # "Hello there -- you goof-ball, use the -b option!" 

74 # splits into 

75 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! 

76 # (after stripping out empty strings). 

77 word_punct = r'[\w!"\'&.,?]' 

78 letter = r'[^\d\W]' 

79 whitespace = r'[%s]' % re.escape(_whitespace) 

80 nowhitespace = '[^' + whitespace[1:] 

81 wordsep_re = re.compile(r''' 

82 ( # any whitespace 

83 %(ws)s+ 

84 | # em-dash between words 

85 (?<=%(wp)s) -{2,} (?=\w) 

86 | # word, possibly hyphenated 

87 %(nws)s+? (?: 

88 # hyphenated word 

89 -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) 

90 (?= %(lt)s -? %(lt)s) 

91 | # end of word 

92 (?=%(ws)s|\Z) 

93 | # em-dash 

94 (?<=%(wp)s) (?=-{2,}\w) 

95 ) 

96 )''' % {'wp': word_punct, 'lt': letter, 

97 'ws': whitespace, 'nws': nowhitespace}, 

98 re.VERBOSE) 

99 del word_punct, letter, nowhitespace 

100 

101 # This less funky little regex just split on recognized spaces. E.g. 

102 # "Hello there -- you goof-ball, use the -b option!" 

103 # splits into 

104 # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ 

105 wordsep_simple_re = re.compile(r'(%s+)' % whitespace) 

106 del whitespace 

107 

108 # XXX this is not locale- or charset-aware -- string.lowercase 

109 # is US-ASCII only (and therefore English-only) 

110 sentence_end_re = re.compile(r'[a-z]' # lowercase letter 

111 r'[\.\!\?]' # sentence-ending punct. 

112 r'[\"\']?' # optional end-of-quote 

113 r'\Z') # end of chunk 

114 

115 def __init__(self, 

116 width=70, 

117 initial_indent="", 

118 subsequent_indent="", 

119 expand_tabs=True, 

120 replace_whitespace=True, 

121 fix_sentence_endings=False, 

122 break_long_words=True, 

123 drop_whitespace=True, 

124 break_on_hyphens=True, 

125 tabsize=8, 

126 *, 

127 max_lines=None, 

128 placeholder=' [...]'): 

129 self.width = width 

130 self.initial_indent = initial_indent 

131 self.subsequent_indent = subsequent_indent 

132 self.expand_tabs = expand_tabs 

133 self.replace_whitespace = replace_whitespace 

134 self.fix_sentence_endings = fix_sentence_endings 

135 self.break_long_words = break_long_words 

136 self.drop_whitespace = drop_whitespace 

137 self.break_on_hyphens = break_on_hyphens 

138 self.tabsize = tabsize 

139 self.max_lines = max_lines 

140 self.placeholder = placeholder 

141 

142 

143 # -- Private methods ----------------------------------------------- 

144 # (possibly useful for subclasses to override) 

145 

146 def _munge_whitespace(self, text): 

147 """_munge_whitespace(text : string) -> string 

148 

149 Munge whitespace in text: expand tabs and convert all other 

150 whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz" 

151 becomes " foo bar baz". 

152 """ 

153 if self.expand_tabs: 

154 text = text.expandtabs(self.tabsize) 

155 if self.replace_whitespace: 

156 text = text.translate(self.unicode_whitespace_trans) 

157 return text 

158 

159 

160 def _split(self, text): 

161 """_split(text : string) -> [string] 

162 

163 Split the text to wrap into indivisible chunks. Chunks are 

164 not quite the same as words; see _wrap_chunks() for full 

165 details. As an example, the text 

166 Look, goof-ball -- use the -b option! 

167 breaks into the following chunks: 

168 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 

169 'use', ' ', 'the', ' ', '-b', ' ', 'option!' 

170 if break_on_hyphens is True, or in: 

171 'Look,', ' ', 'goof-ball', ' ', '--', ' ', 

172 'use', ' ', 'the', ' ', '-b', ' ', option!' 

173 otherwise. 

174 """ 

175 if self.break_on_hyphens is True: 

176 chunks = self.wordsep_re.split(text) 

177 else: 

178 chunks = self.wordsep_simple_re.split(text) 

179 chunks = [c for c in chunks if c] 

180 return chunks 

181 

182 def _fix_sentence_endings(self, chunks): 

183 """_fix_sentence_endings(chunks : [string]) 

184 

185 Correct for sentence endings buried in 'chunks'. Eg. when the 

186 original text contains "... foo.\\nBar ...", munge_whitespace() 

187 and split() will convert that to [..., "foo.", " ", "Bar", ...] 

188 which has one too few spaces; this method simply changes the one 

189 space to two. 

190 """ 

191 i = 0 

192 patsearch = self.sentence_end_re.search 

193 while i < len(chunks)-1: 

194 if chunks[i+1] == " " and patsearch(chunks[i]): 

195 chunks[i+1] = " " 

196 i += 2 

197 else: 

198 i += 1 

199 

200 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): 

201 """_handle_long_word(chunks : [string], 

202 cur_line : [string], 

203 cur_len : int, width : int) 

204 

205 Handle a chunk of text (most likely a word, not whitespace) that 

206 is too long to fit in any line. 

207 """ 

208 # Figure out when indent is larger than the specified width, and make 

209 # sure at least one character is stripped off on every pass 

210 if width < 1: 

211 space_left = 1 

212 else: 

213 space_left = width - cur_len 

214 

215 # If we're allowed to break long words, then do so: put as much 

216 # of the next chunk onto the current line as will fit. 

217 if self.break_long_words: 

218 cur_line.append(reversed_chunks[-1][:space_left]) 

219 reversed_chunks[-1] = reversed_chunks[-1][space_left:] 

220 

221 # Otherwise, we have to preserve the long word intact. Only add 

222 # it to the current line if there's nothing already there -- 

223 # that minimizes how much we violate the width constraint. 

224 elif not cur_line: 

225 cur_line.append(reversed_chunks.pop()) 

226 

227 # If we're not allowed to break long words, and there's already 

228 # text on the current line, do nothing. Next time through the 

229 # main loop of _wrap_chunks(), we'll wind up here again, but 

230 # cur_len will be zero, so the next line will be entirely 

231 # devoted to the long word that we can't handle right now. 

232 

233 def _wrap_chunks(self, chunks): 

234 """_wrap_chunks(chunks : [string]) -> [string] 

235 

236 Wrap a sequence of text chunks and return a list of lines of 

237 length 'self.width' or less. (If 'break_long_words' is false, 

238 some lines may be longer than this.) Chunks correspond roughly 

239 to words and the whitespace between them: each chunk is 

240 indivisible (modulo 'break_long_words'), but a line break can 

241 come between any two chunks. Chunks should not have internal 

242 whitespace; ie. a chunk is either all whitespace or a "word". 

243 Whitespace chunks will be removed from the beginning and end of 

244 lines, but apart from that whitespace is preserved. 

245 """ 

246 lines = [] 

247 if self.width <= 0: 

248 raise ValueError("invalid width %r (must be > 0)" % self.width) 

249 if self.max_lines is not None: 

250 if self.max_lines > 1: 

251 indent = self.subsequent_indent 

252 else: 

253 indent = self.initial_indent 

254 if len(indent) + len(self.placeholder.lstrip()) > self.width: 

255 raise ValueError("placeholder too large for max width") 

256 

257 # Arrange in reverse order so items can be efficiently popped 

258 # from a stack of chucks. 

259 chunks.reverse() 

260 

261 while chunks: 

262 

263 # Start the list of chunks that will make up the current line. 

264 # cur_len is just the length of all the chunks in cur_line. 

265 cur_line = [] 

266 cur_len = 0 

267 

268 # Figure out which static string will prefix this line. 

269 if lines: 

270 indent = self.subsequent_indent 

271 else: 

272 indent = self.initial_indent 

273 

274 # Maximum width for this line. 

275 width = self.width - len(indent) 

276 

277 # First chunk on line is whitespace -- drop it, unless this 

278 # is the very beginning of the text (ie. no lines started yet). 

279 if self.drop_whitespace and chunks[-1].strip() == '' and lines: 

280 del chunks[-1] 

281 

282 while chunks: 

283 l = len(chunks[-1]) 

284 

285 # Can at least squeeze this chunk onto the current line. 

286 if cur_len + l <= width: 

287 cur_line.append(chunks.pop()) 

288 cur_len += l 

289 

290 # Nope, this line is full. 

291 else: 

292 break 

293 

294 # The current line is full, and the next chunk is too big to 

295 # fit on *any* line (not just this one). 

296 if chunks and len(chunks[-1]) > width: 

297 self._handle_long_word(chunks, cur_line, cur_len, width) 

298 cur_len = sum(map(len, cur_line)) 

299 

300 # If the last chunk on this line is all whitespace, drop it. 

301 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': 

302 cur_len -= len(cur_line[-1]) 

303 del cur_line[-1] 

304 

305 if cur_line: 

306 if (self.max_lines is None or 

307 len(lines) + 1 < self.max_lines or 

308 (not chunks or 

309 self.drop_whitespace and 

310 len(chunks) == 1 and 

311 not chunks[0].strip()) and cur_len <= width): 

312 # Convert current line back to a string and store it in 

313 # list of all lines (return value). 

314 lines.append(indent + ''.join(cur_line)) 

315 else: 

316 while cur_line: 

317 if (cur_line[-1].strip() and 

318 cur_len + len(self.placeholder) <= width): 

319 cur_line.append(self.placeholder) 

320 lines.append(indent + ''.join(cur_line)) 

321 break 

322 cur_len -= len(cur_line[-1]) 

323 del cur_line[-1] 

324 else: 

325 if lines: 

326 prev_line = lines[-1].rstrip() 

327 if (len(prev_line) + len(self.placeholder) <= 

328 self.width): 

329 lines[-1] = prev_line + self.placeholder 

330 break 

331 lines.append(indent + self.placeholder.lstrip()) 

332 break 

333 

334 return lines 

335 

336 def _split_chunks(self, text): 

337 text = self._munge_whitespace(text) 

338 return self._split(text) 

339 

340 # -- Public interface ---------------------------------------------- 

341 

342 def wrap(self, text): 

343 """wrap(text : string) -> [string] 

344 

345 Reformat the single paragraph in 'text' so it fits in lines of 

346 no more than 'self.width' columns, and return a list of wrapped 

347 lines. Tabs in 'text' are expanded with string.expandtabs(), 

348 and all other whitespace characters (including newline) are 

349 converted to space. 

350 """ 

351 chunks = self._split_chunks(text) 

352 if self.fix_sentence_endings: 

353 self._fix_sentence_endings(chunks) 

354 return self._wrap_chunks(chunks) 

355 

356 def fill(self, text): 

357 """fill(text : string) -> string 

358 

359 Reformat the single paragraph in 'text' to fit in lines of no 

360 more than 'self.width' columns, and return a new string 

361 containing the entire wrapped paragraph. 

362 """ 

363 return "\n".join(self.wrap(text)) 

364 

365 

366# -- Convenience interface --------------------------------------------- 

367 

368def wrap(text, width=70, **kwargs): 

369 """Wrap a single paragraph of text, returning a list of wrapped lines. 

370 

371 Reformat the single paragraph in 'text' so it fits in lines of no 

372 more than 'width' columns, and return a list of wrapped lines. By 

373 default, tabs in 'text' are expanded with string.expandtabs(), and 

374 all other whitespace characters (including newline) are converted to 

375 space. See TextWrapper class for available keyword args to customize 

376 wrapping behaviour. 

377 """ 

378 w = TextWrapper(width=width, **kwargs) 

379 return w.wrap(text) 

380 

381def fill(text, width=70, **kwargs): 

382 """Fill a single paragraph of text, returning a new string. 

383 

384 Reformat the single paragraph in 'text' to fit in lines of no more 

385 than 'width' columns, and return a new string containing the entire 

386 wrapped paragraph. As with wrap(), tabs are expanded and other 

387 whitespace characters converted to space. See TextWrapper class for 

388 available keyword args to customize wrapping behaviour. 

389 """ 

390 w = TextWrapper(width=width, **kwargs) 

391 return w.fill(text) 

392 

393def shorten(text, width, **kwargs): 

394 """Collapse and truncate the given text to fit in the given width. 

395 

396 The text first has its whitespace collapsed. If it then fits in 

397 the *width*, it is returned as is. Otherwise, as many words 

398 as possible are joined and then the placeholder is appended:: 

399 

400 >>> textwrap.shorten("Hello world!", width=12) 

401 'Hello world!' 

402 >>> textwrap.shorten("Hello world!", width=11) 

403 'Hello [...]' 

404 """ 

405 w = TextWrapper(width=width, max_lines=1, **kwargs) 

406 return w.fill(' '.join(text.strip().split())) 

407 

408 

409# -- Loosely related functionality ------------------------------------- 

410 

411_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) 

412_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) 

413 

414def dedent(text): 

415 """Remove any common leading whitespace from every line in `text`. 

416 

417 This can be used to make triple-quoted strings line up with the left 

418 edge of the display, while still presenting them in the source code 

419 in indented form. 

420 

421 Note that tabs and spaces are both treated as whitespace, but they 

422 are not equal: the lines " hello" and "\\thello" are 

423 considered to have no common leading whitespace. 

424 

425 Entirely blank lines are normalized to a newline character. 

426 """ 

427 # Look for the longest leading string of spaces and tabs common to 

428 # all lines. 

429 margin = None 

430 text = _whitespace_only_re.sub('', text) 

431 indents = _leading_whitespace_re.findall(text) 

432 for indent in indents: 

433 if margin is None: 

434 margin = indent 

435 

436 # Current line more deeply indented than previous winner: 

437 # no change (previous winner is still on top). 

438 elif indent.startswith(margin): 

439 pass 

440 

441 # Current line consistent with and no deeper than previous winner: 

442 # it's the new winner. 

443 elif margin.startswith(indent): 

444 margin = indent 

445 

446 # Find the largest common whitespace between current line and previous 

447 # winner. 

448 else: 

449 for i, (x, y) in enumerate(zip(margin, indent)): 

450 if x != y: 

451 margin = margin[:i] 

452 break 

453 

454 # sanity check (testing/debugging only) 

455 if 0 and margin: 

456 for line in text.split("\n"): 

457 assert not line or line.startswith(margin), \ 

458 "line = %r, margin = %r" % (line, margin) 

459 

460 if margin: 

461 text = re.sub(r'(?m)^' + margin, '', text) 

462 return text 

463 

464 

465def indent(text, prefix, predicate=None): 

466 """Adds 'prefix' to the beginning of selected lines in 'text'. 

467 

468 If 'predicate' is provided, 'prefix' will only be added to the lines 

469 where 'predicate(line)' is True. If 'predicate' is not provided, 

470 it will default to adding 'prefix' to all non-empty lines that do not 

471 consist solely of whitespace characters. 

472 """ 

473 if predicate is None: 

474 def predicate(line): 

475 return line.strip() 

476 

477 def prefixed_lines(): 

478 for line in text.splitlines(True): 

479 yield (prefix + line if predicate(line) else line) 

480 return ''.join(prefixed_lines()) 

481 

482 

483if __name__ == "__main__": 

484 #print dedent("\tfoo\n\tbar") 

485 #print dedent(" \thello there\n \t how are you?") 

486 print(dedent("Hello there.\n This is indented."))