Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/autopep8.py: 56%

2398 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:34 +0000

1#!/usr/bin/env python 

2 

3# Copyright (C) 2010-2011 Hideo Hattori 

4# Copyright (C) 2011-2013 Hideo Hattori, Steven Myint 

5# Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling 

6# 

7# Permission is hereby granted, free of charge, to any person obtaining 

8# a copy of this software and associated documentation files (the 

9# "Software"), to deal in the Software without restriction, including 

10# without limitation the rights to use, copy, modify, merge, publish, 

11# distribute, sublicense, and/or sell copies of the Software, and to 

12# permit persons to whom the Software is furnished to do so, subject to 

13# the following conditions: 

14# 

15# The above copyright notice and this permission notice shall be 

16# included in all copies or substantial portions of the Software. 

17# 

18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

19# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

20# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 

21# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 

22# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 

23# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 

24# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 

25# SOFTWARE. 

26 

27# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net> 

28# Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com> 

29# 

30# Permission is hereby granted, free of charge, to any person 

31# obtaining a copy of this software and associated documentation files 

32# (the "Software"), to deal in the Software without restriction, 

33# including without limitation the rights to use, copy, modify, merge, 

34# publish, distribute, sublicense, and/or sell copies of the Software, 

35# and to permit persons to whom the Software is furnished to do so, 

36# subject to the following conditions: 

37# 

38# The above copyright notice and this permission notice shall be 

39# included in all copies or substantial portions of the Software. 

40# 

41# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

42# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

43# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 

44# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 

45# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 

46# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 

47# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 

48# SOFTWARE. 

49 

50"""Automatically formats Python code to conform to the PEP 8 style guide. 

51 

52Fixes that only need be done once can be added by adding a function of the form 

53"fix_<code>(source)" to this module. They should return the fixed source code. 

54These fixes are picked up by apply_global_fixes(). 

55 

56Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the 

57class documentation for more information. 

58 

59""" 

60 

61from __future__ import absolute_import 

62from __future__ import division 

63from __future__ import print_function 

64from __future__ import unicode_literals 

65 

66import argparse 

67import codecs 

68import collections 

69import copy 

70import difflib 

71import fnmatch 

72import inspect 

73import io 

74import itertools 

75import keyword 

76import locale 

77import os 

78import re 

79import signal 

80import sys 

81import textwrap 

82import token 

83import tokenize 

84import warnings 

85import ast 

86from configparser import ConfigParser as SafeConfigParser, Error 

87 

88import pycodestyle 

89from pycodestyle import STARTSWITH_INDENT_STATEMENT_REGEX 

90 

91 

92__version__ = '2.0.2' 

93 

94 

95CR = '\r' 

96LF = '\n' 

97CRLF = '\r\n' 

98 

99 

100PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$') 

101LAMBDA_REGEX = re.compile(r'([\w.]+)\s=\slambda\s*([)(=\w,\s.]*):') 

102COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s') 

103COMPARE_NEGATIVE_REGEX_THROUGH = re.compile(r'\b(not\s+in|is\s+not)\s') 

104BARE_EXCEPT_REGEX = re.compile(r'except\s*:') 

105STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\s.*\):') 

106DOCSTRING_START_REGEX = re.compile(r'^u?r?(?P<kind>["\']{3})') 

107ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on') 

108DISABLE_REGEX = re.compile(r'# *(fmt|autopep8): *off') 

109 

110EXIT_CODE_OK = 0 

111EXIT_CODE_ERROR = 1 

112EXIT_CODE_EXISTS_DIFF = 2 

113EXIT_CODE_ARGPARSE_ERROR = 99 

114 

115# For generating line shortening candidates. 

116SHORTEN_OPERATOR_GROUPS = frozenset([ 

117 frozenset([',']), 

118 frozenset(['%']), 

119 frozenset([',', '(', '[', '{']), 

120 frozenset(['%', '(', '[', '{']), 

121 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']), 

122 frozenset(['%', '+', '-', '*', '/', '//']), 

123]) 

124 

125 

126DEFAULT_IGNORE = 'E226,E24,W50,W690' # TODO: use pycodestyle.DEFAULT_IGNORE 

127DEFAULT_INDENT_SIZE = 4 

128# these fixes conflict with each other, if the `--ignore` setting causes both 

129# to be enabled, disable both of them 

130CONFLICTING_CODES = ('W503', 'W504') 

131 

132# W602 is handled separately due to the need to avoid "with_traceback". 

133CODE_TO_2TO3 = { 

134 'E231': ['ws_comma'], 

135 'E721': ['idioms'], 

136 'W690': ['apply', 

137 'except', 

138 'exitfunc', 

139 'numliterals', 

140 'operator', 

141 'paren', 

142 'reduce', 

143 'renames', 

144 'standarderror', 

145 'sys_exc', 

146 'throw', 

147 'tuple_params', 

148 'xreadlines']} 

149 

150 

151if sys.platform == 'win32': # pragma: no cover 

152 DEFAULT_CONFIG = os.path.expanduser(r'~\.pycodestyle') 

153else: 

154 DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or 

155 os.path.expanduser('~/.config'), 

156 'pycodestyle') 

157# fallback, use .pep8 

158if not os.path.exists(DEFAULT_CONFIG): # pragma: no cover 

159 if sys.platform == 'win32': 

160 DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8') 

161 else: 

162 DEFAULT_CONFIG = os.path.join(os.path.expanduser('~/.config'), 'pep8') 

163PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8', '.flake8') 

164 

165 

166MAX_PYTHON_FILE_DETECTION_BYTES = 1024 

167 

168 

169def open_with_encoding(filename, mode='r', encoding=None, limit_byte_check=-1): 

170 """Return opened file with a specific encoding.""" 

171 if not encoding: 

172 encoding = detect_encoding(filename, limit_byte_check=limit_byte_check) 

173 

174 return io.open(filename, mode=mode, encoding=encoding, 

175 newline='') # Preserve line endings 

176 

177 

178def detect_encoding(filename, limit_byte_check=-1): 

179 """Return file encoding.""" 

180 try: 

181 with open(filename, 'rb') as input_file: 

182 from lib2to3.pgen2 import tokenize as lib2to3_tokenize 

183 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0] 

184 

185 with open_with_encoding(filename, encoding=encoding) as test_file: 

186 test_file.read(limit_byte_check) 

187 

188 return encoding 

189 except (LookupError, SyntaxError, UnicodeDecodeError): 

190 return 'latin-1' 

191 

192 

193def readlines_from_file(filename): 

194 """Return contents of file.""" 

195 with open_with_encoding(filename) as input_file: 

196 return input_file.readlines() 

197 

198 

199def extended_blank_lines(logical_line, 

200 blank_lines, 

201 blank_before, 

202 indent_level, 

203 previous_logical): 

204 """Check for missing blank lines after class declaration.""" 

205 if previous_logical.startswith('def '): 

206 if blank_lines and pycodestyle.DOCSTRING_REGEX.match(logical_line): 

207 yield (0, 'E303 too many blank lines ({})'.format(blank_lines)) 

208 elif pycodestyle.DOCSTRING_REGEX.match(previous_logical): 

209 # Missing blank line between class docstring and method declaration. 

210 if ( 

211 indent_level and 

212 not blank_lines and 

213 not blank_before and 

214 logical_line.startswith(('def ')) and 

215 '(self' in logical_line 

216 ): 

217 yield (0, 'E301 expected 1 blank line, found 0') 

218 

219 

220pycodestyle.register_check(extended_blank_lines) 

221 

222 

223def continued_indentation(logical_line, tokens, indent_level, hang_closing, 

224 indent_char, noqa): 

225 """Override pycodestyle's function to provide indentation information.""" 

226 first_row = tokens[0][2][0] 

227 nrows = 1 + tokens[-1][2][0] - first_row 

228 if noqa or nrows == 1: 

229 return 

230 

231 # indent_next tells us whether the next block is indented. Assuming 

232 # that it is indented by 4 spaces, then we should not allow 4-space 

233 # indents on the final continuation line. In turn, some other 

234 # indents are allowed to have an extra 4 spaces. 

235 indent_next = logical_line.endswith(':') 

236 

237 row = depth = 0 

238 valid_hangs = ( 

239 (DEFAULT_INDENT_SIZE,) 

240 if indent_char != '\t' else (DEFAULT_INDENT_SIZE, 

241 2 * DEFAULT_INDENT_SIZE) 

242 ) 

243 

244 # Remember how many brackets were opened on each line. 

245 parens = [0] * nrows 

246 

247 # Relative indents of physical lines. 

248 rel_indent = [0] * nrows 

249 

250 # For each depth, collect a list of opening rows. 

251 open_rows = [[0]] 

252 # For each depth, memorize the hanging indentation. 

253 hangs = [None] 

254 

255 # Visual indents. 

256 indent_chances = {} 

257 last_indent = tokens[0][2] 

258 indent = [last_indent[1]] 

259 

260 last_token_multiline = None 

261 line = None 

262 last_line = '' 

263 last_line_begins_with_multiline = False 

264 for token_type, text, start, end, line in tokens: 

265 

266 newline = row < start[0] - first_row 

267 if newline: 

268 row = start[0] - first_row 

269 newline = (not last_token_multiline and 

270 token_type not in (tokenize.NL, tokenize.NEWLINE)) 

271 last_line_begins_with_multiline = last_token_multiline 

272 

273 if newline: 

274 # This is the beginning of a continuation line. 

275 last_indent = start 

276 

277 # Record the initial indent. 

278 rel_indent[row] = pycodestyle.expand_indent(line) - indent_level 

279 

280 # Identify closing bracket. 

281 close_bracket = (token_type == tokenize.OP and text in ']})') 

282 

283 # Is the indent relative to an opening bracket line? 

284 for open_row in reversed(open_rows[depth]): 

285 hang = rel_indent[row] - rel_indent[open_row] 

286 hanging_indent = hang in valid_hangs 

287 if hanging_indent: 

288 break 

289 if hangs[depth]: 

290 hanging_indent = (hang == hangs[depth]) 

291 

292 visual_indent = (not close_bracket and hang > 0 and 

293 indent_chances.get(start[1])) 

294 

295 if close_bracket and indent[depth]: 

296 # Closing bracket for visual indent. 

297 if start[1] != indent[depth]: 

298 yield (start, 'E124 {}'.format(indent[depth])) 

299 elif close_bracket and not hang: 

300 # closing bracket matches indentation of opening bracket's line 

301 if hang_closing: 

302 yield (start, 'E133 {}'.format(indent[depth])) 

303 elif indent[depth] and start[1] < indent[depth]: 

304 if visual_indent is not True: 

305 # Visual indent is broken. 

306 yield (start, 'E128 {}'.format(indent[depth])) 

307 elif (hanging_indent or 

308 (indent_next and 

309 rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)): 

310 # Hanging indent is verified. 

311 if close_bracket and not hang_closing: 

312 yield (start, 'E123 {}'.format(indent_level + 

313 rel_indent[open_row])) 

314 hangs[depth] = hang 

315 elif visual_indent is True: 

316 # Visual indent is verified. 

317 indent[depth] = start[1] 

318 elif visual_indent in (text, str): 

319 # Ignore token lined up with matching one from a previous line. 

320 pass 

321 else: 

322 one_indented = (indent_level + rel_indent[open_row] + 

323 DEFAULT_INDENT_SIZE) 

324 # Indent is broken. 

325 if hang <= 0: 

326 error = ('E122', one_indented) 

327 elif indent[depth]: 

328 error = ('E127', indent[depth]) 

329 elif not close_bracket and hangs[depth]: 

330 error = ('E131', one_indented) 

331 elif hang > DEFAULT_INDENT_SIZE: 

332 error = ('E126', one_indented) 

333 else: 

334 hangs[depth] = hang 

335 error = ('E121', one_indented) 

336 

337 yield (start, '{} {}'.format(*error)) 

338 

339 # Look for visual indenting. 

340 if ( 

341 parens[row] and 

342 token_type not in (tokenize.NL, tokenize.COMMENT) and 

343 not indent[depth] 

344 ): 

345 indent[depth] = start[1] 

346 indent_chances[start[1]] = True 

347 # Deal with implicit string concatenation. 

348 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or 

349 text in ('u', 'ur', 'b', 'br')): 

350 indent_chances[start[1]] = str 

351 # Special case for the "if" statement because len("if (") is equal to 

352 # 4. 

353 elif not indent_chances and not row and not depth and text == 'if': 

354 indent_chances[end[1] + 1] = True 

355 elif text == ':' and line[end[1]:].isspace(): 

356 open_rows[depth].append(row) 

357 

358 # Keep track of bracket depth. 

359 if token_type == tokenize.OP: 

360 if text in '([{': 

361 depth += 1 

362 indent.append(0) 

363 hangs.append(None) 

364 if len(open_rows) == depth: 

365 open_rows.append([]) 

366 open_rows[depth].append(row) 

367 parens[row] += 1 

368 elif text in ')]}' and depth > 0: 

369 # Parent indents should not be more than this one. 

370 prev_indent = indent.pop() or last_indent[1] 

371 hangs.pop() 

372 for d in range(depth): 

373 if indent[d] > prev_indent: 

374 indent[d] = 0 

375 for ind in list(indent_chances): 

376 if ind >= prev_indent: 

377 del indent_chances[ind] 

378 del open_rows[depth + 1:] 

379 depth -= 1 

380 if depth: 

381 indent_chances[indent[depth]] = True 

382 for idx in range(row, -1, -1): 

383 if parens[idx]: 

384 parens[idx] -= 1 

385 break 

386 assert len(indent) == depth + 1 

387 if ( 

388 start[1] not in indent_chances and 

389 # This is for purposes of speeding up E121 (GitHub #90). 

390 not last_line.rstrip().endswith(',') 

391 ): 

392 # Allow to line up tokens. 

393 indent_chances[start[1]] = text 

394 

395 last_token_multiline = (start[0] != end[0]) 

396 if last_token_multiline: 

397 rel_indent[end[0] - first_row] = rel_indent[row] 

398 

399 last_line = line 

400 

401 if ( 

402 indent_next and 

403 not last_line_begins_with_multiline and 

404 pycodestyle.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE 

405 ): 

406 pos = (start[0], indent[0] + 4) 

407 desired_indent = indent_level + 2 * DEFAULT_INDENT_SIZE 

408 if visual_indent: 

409 yield (pos, 'E129 {}'.format(desired_indent)) 

410 else: 

411 yield (pos, 'E125 {}'.format(desired_indent)) 

412 

413 

414del pycodestyle._checks['logical_line'][pycodestyle.continued_indentation] 

415pycodestyle.register_check(continued_indentation) 

416 

417 

418class FixPEP8(object): 

419 

420 """Fix invalid code. 

421 

422 Fixer methods are prefixed "fix_". The _fix_source() method looks for these 

423 automatically. 

424 

425 The fixer method can take either one or two arguments (in addition to 

426 self). The first argument is "result", which is the error information from 

427 pycodestyle. The second argument, "logical", is required only for 

428 logical-line fixes. 

429 

430 The fixer method can return the list of modified lines or None. An empty 

431 list would mean that no changes were made. None would mean that only the 

432 line reported in the pycodestyle error was modified. Note that the modified 

433 line numbers that are returned are indexed at 1. This typically would 

434 correspond with the line number reported in the pycodestyle error 

435 information. 

436 

437 [fixed method list] 

438 - e111,e114,e115,e116 

439 - e121,e122,e123,e124,e125,e126,e127,e128,e129 

440 - e201,e202,e203 

441 - e211 

442 - e221,e222,e223,e224,e225 

443 - e231 

444 - e251,e252 

445 - e261,e262 

446 - e271,e272,e273,e274,e275 

447 - e301,e302,e303,e304,e305,e306 

448 - e401,e402 

449 - e502 

450 - e701,e702,e703,e704 

451 - e711,e712,e713,e714 

452 - e722 

453 - e731 

454 - w291 

455 - w503,504 

456 

457 """ 

458 

459 def __init__(self, filename, 

460 options, 

461 contents=None, 

462 long_line_ignore_cache=None): 

463 self.filename = filename 

464 if contents is None: 

465 self.source = readlines_from_file(filename) 

466 else: 

467 sio = io.StringIO(contents) 

468 self.source = sio.readlines() 

469 self.options = options 

470 self.indent_word = _get_indentword(''.join(self.source)) 

471 

472 # collect imports line 

473 self.imports = {} 

474 for i, line in enumerate(self.source): 

475 if (line.find("import ") == 0 or line.find("from ") == 0) and \ 

476 line not in self.imports: 

477 # collect only import statements that first appeared 

478 self.imports[line] = i 

479 

480 self.long_line_ignore_cache = ( 

481 set() if long_line_ignore_cache is None 

482 else long_line_ignore_cache) 

483 

484 # Many fixers are the same even though pycodestyle categorizes them 

485 # differently. 

486 self.fix_e115 = self.fix_e112 

487 self.fix_e121 = self._fix_reindent 

488 self.fix_e122 = self._fix_reindent 

489 self.fix_e123 = self._fix_reindent 

490 self.fix_e124 = self._fix_reindent 

491 self.fix_e126 = self._fix_reindent 

492 self.fix_e127 = self._fix_reindent 

493 self.fix_e128 = self._fix_reindent 

494 self.fix_e129 = self._fix_reindent 

495 self.fix_e133 = self.fix_e131 

496 self.fix_e202 = self.fix_e201 

497 self.fix_e203 = self.fix_e201 

498 self.fix_e211 = self.fix_e201 

499 self.fix_e221 = self.fix_e271 

500 self.fix_e222 = self.fix_e271 

501 self.fix_e223 = self.fix_e271 

502 self.fix_e226 = self.fix_e225 

503 self.fix_e227 = self.fix_e225 

504 self.fix_e228 = self.fix_e225 

505 self.fix_e241 = self.fix_e271 

506 self.fix_e242 = self.fix_e224 

507 self.fix_e252 = self.fix_e225 

508 self.fix_e261 = self.fix_e262 

509 self.fix_e272 = self.fix_e271 

510 self.fix_e273 = self.fix_e271 

511 self.fix_e274 = self.fix_e271 

512 self.fix_e275 = self.fix_e271 

513 self.fix_e306 = self.fix_e301 

514 self.fix_e501 = ( 

515 self.fix_long_line_logically if 

516 options and (options.aggressive >= 2 or options.experimental) else 

517 self.fix_long_line_physically) 

518 self.fix_e703 = self.fix_e702 

519 self.fix_w292 = self.fix_w291 

520 self.fix_w293 = self.fix_w291 

521 

522 def _fix_source(self, results): 

523 try: 

524 (logical_start, logical_end) = _find_logical(self.source) 

525 logical_support = True 

526 except (SyntaxError, tokenize.TokenError): # pragma: no cover 

527 logical_support = False 

528 

529 completed_lines = set() 

530 for result in sorted(results, key=_priority_key): 

531 if result['line'] in completed_lines: 

532 continue 

533 

534 fixed_methodname = 'fix_' + result['id'].lower() 

535 if hasattr(self, fixed_methodname): 

536 fix = getattr(self, fixed_methodname) 

537 

538 line_index = result['line'] - 1 

539 original_line = self.source[line_index] 

540 

541 is_logical_fix = len(_get_parameters(fix)) > 2 

542 if is_logical_fix: 

543 logical = None 

544 if logical_support: 

545 logical = _get_logical(self.source, 

546 result, 

547 logical_start, 

548 logical_end) 

549 if logical and set(range( 

550 logical[0][0] + 1, 

551 logical[1][0] + 1)).intersection( 

552 completed_lines): 

553 continue 

554 

555 modified_lines = fix(result, logical) 

556 else: 

557 modified_lines = fix(result) 

558 

559 if modified_lines is None: 

560 # Force logical fixes to report what they modified. 

561 assert not is_logical_fix 

562 

563 if self.source[line_index] == original_line: 

564 modified_lines = [] 

565 

566 if modified_lines: 

567 completed_lines.update(modified_lines) 

568 elif modified_lines == []: # Empty list means no fix 

569 if self.options.verbose >= 2: 

570 print( 

571 '---> Not fixing {error} on line {line}'.format( 

572 error=result['id'], line=result['line']), 

573 file=sys.stderr) 

574 else: # We assume one-line fix when None. 

575 completed_lines.add(result['line']) 

576 else: 

577 if self.options.verbose >= 3: 

578 print( 

579 "---> '{}' is not defined.".format(fixed_methodname), 

580 file=sys.stderr) 

581 

582 info = result['info'].strip() 

583 print('---> {}:{}:{}:{}'.format(self.filename, 

584 result['line'], 

585 result['column'], 

586 info), 

587 file=sys.stderr) 

588 

589 def fix(self): 

590 """Return a version of the source code with PEP 8 violations fixed.""" 

591 pep8_options = { 

592 'ignore': self.options.ignore, 

593 'select': self.options.select, 

594 'max_line_length': self.options.max_line_length, 

595 'hang_closing': self.options.hang_closing, 

596 } 

597 results = _execute_pep8(pep8_options, self.source) 

598 

599 if self.options.verbose: 

600 progress = {} 

601 for r in results: 

602 if r['id'] not in progress: 

603 progress[r['id']] = set() 

604 progress[r['id']].add(r['line']) 

605 print('---> {n} issue(s) to fix {progress}'.format( 

606 n=len(results), progress=progress), file=sys.stderr) 

607 

608 if self.options.line_range: 

609 start, end = self.options.line_range 

610 results = [r for r in results 

611 if start <= r['line'] <= end] 

612 

613 self._fix_source(filter_results(source=''.join(self.source), 

614 results=results, 

615 aggressive=self.options.aggressive)) 

616 

617 if self.options.line_range: 

618 # If number of lines has changed then change line_range. 

619 count = sum(sline.count('\n') 

620 for sline in self.source[start - 1:end]) 

621 self.options.line_range[1] = start + count - 1 

622 

623 return ''.join(self.source) 

624 

625 def _fix_reindent(self, result): 

626 """Fix a badly indented line. 

627 

628 This is done by adding or removing from its initial indent only. 

629 

630 """ 

631 num_indent_spaces = int(result['info'].split()[1]) 

632 line_index = result['line'] - 1 

633 target = self.source[line_index] 

634 

635 self.source[line_index] = ' ' * num_indent_spaces + target.lstrip() 

636 

637 def fix_e112(self, result): 

638 """Fix under-indented comments.""" 

639 line_index = result['line'] - 1 

640 target = self.source[line_index] 

641 

642 if not target.lstrip().startswith('#'): 

643 # Don't screw with invalid syntax. 

644 return [] 

645 

646 self.source[line_index] = self.indent_word + target 

647 

648 def fix_e113(self, result): 

649 """Fix unexpected indentation.""" 

650 line_index = result['line'] - 1 

651 target = self.source[line_index] 

652 indent = _get_indentation(target) 

653 stripped = target.lstrip() 

654 self.source[line_index] = indent[1:] + stripped 

655 

656 def fix_e116(self, result): 

657 """Fix over-indented comments.""" 

658 line_index = result['line'] - 1 

659 target = self.source[line_index] 

660 

661 indent = _get_indentation(target) 

662 stripped = target.lstrip() 

663 

664 if not stripped.startswith('#'): 

665 # Don't screw with invalid syntax. 

666 return [] 

667 

668 self.source[line_index] = indent[1:] + stripped 

669 

670 def fix_e117(self, result): 

671 """Fix over-indented.""" 

672 line_index = result['line'] - 1 

673 target = self.source[line_index] 

674 

675 indent = _get_indentation(target) 

676 if indent == '\t': 

677 return [] 

678 

679 stripped = target.lstrip() 

680 

681 self.source[line_index] = indent[1:] + stripped 

682 

683 def fix_e125(self, result): 

684 """Fix indentation undistinguish from the next logical line.""" 

685 num_indent_spaces = int(result['info'].split()[1]) 

686 line_index = result['line'] - 1 

687 target = self.source[line_index] 

688 

689 spaces_to_add = num_indent_spaces - len(_get_indentation(target)) 

690 indent = len(_get_indentation(target)) 

691 modified_lines = [] 

692 

693 while len(_get_indentation(self.source[line_index])) >= indent: 

694 self.source[line_index] = (' ' * spaces_to_add + 

695 self.source[line_index]) 

696 modified_lines.append(1 + line_index) # Line indexed at 1. 

697 line_index -= 1 

698 

699 return modified_lines 

700 

701 def fix_e131(self, result): 

702 """Fix indentation undistinguish from the next logical line.""" 

703 num_indent_spaces = int(result['info'].split()[1]) 

704 line_index = result['line'] - 1 

705 target = self.source[line_index] 

706 

707 spaces_to_add = num_indent_spaces - len(_get_indentation(target)) 

708 

709 indent_length = len(_get_indentation(target)) 

710 spaces_to_add = num_indent_spaces - indent_length 

711 if num_indent_spaces == 0 and indent_length == 0: 

712 spaces_to_add = 4 

713 

714 if spaces_to_add >= 0: 

715 self.source[line_index] = (' ' * spaces_to_add + 

716 self.source[line_index]) 

717 else: 

718 offset = abs(spaces_to_add) 

719 self.source[line_index] = self.source[line_index][offset:] 

720 

721 def fix_e201(self, result): 

722 """Remove extraneous whitespace.""" 

723 line_index = result['line'] - 1 

724 target = self.source[line_index] 

725 offset = result['column'] - 1 

726 

727 fixed = fix_whitespace(target, 

728 offset=offset, 

729 replacement='') 

730 

731 self.source[line_index] = fixed 

732 

733 def fix_e224(self, result): 

734 """Remove extraneous whitespace around operator.""" 

735 target = self.source[result['line'] - 1] 

736 offset = result['column'] - 1 

737 fixed = target[:offset] + target[offset:].replace('\t', ' ') 

738 self.source[result['line'] - 1] = fixed 

739 

740 def fix_e225(self, result): 

741 """Fix missing whitespace around operator.""" 

742 target = self.source[result['line'] - 1] 

743 offset = result['column'] - 1 

744 fixed = target[:offset] + ' ' + target[offset:] 

745 

746 # Only proceed if non-whitespace characters match. 

747 # And make sure we don't break the indentation. 

748 if ( 

749 fixed.replace(' ', '') == target.replace(' ', '') and 

750 _get_indentation(fixed) == _get_indentation(target) 

751 ): 

752 self.source[result['line'] - 1] = fixed 

753 error_code = result.get('id', 0) 

754 try: 

755 ts = generate_tokens(fixed) 

756 except (SyntaxError, tokenize.TokenError): 

757 return 

758 if not check_syntax(fixed.lstrip()): 

759 return 

760 errors = list( 

761 pycodestyle.missing_whitespace_around_operator(fixed, ts)) 

762 for e in reversed(errors): 

763 if error_code != e[1].split()[0]: 

764 continue 

765 offset = e[0][1] 

766 fixed = fixed[:offset] + ' ' + fixed[offset:] 

767 self.source[result['line'] - 1] = fixed 

768 else: 

769 return [] 

770 

771 def fix_e231(self, result): 

772 """Add missing whitespace.""" 

773 line_index = result['line'] - 1 

774 target = self.source[line_index] 

775 offset = result['column'] 

776 fixed = target[:offset].rstrip() + ' ' + target[offset:].lstrip() 

777 self.source[line_index] = fixed 

778 

779 def fix_e251(self, result): 

780 """Remove whitespace around parameter '=' sign.""" 

781 line_index = result['line'] - 1 

782 target = self.source[line_index] 

783 

784 # This is necessary since pycodestyle sometimes reports columns that 

785 # goes past the end of the physical line. This happens in cases like, 

786 # foo(bar\n=None) 

787 c = min(result['column'] - 1, 

788 len(target) - 1) 

789 

790 if target[c].strip(): 

791 fixed = target 

792 else: 

793 fixed = target[:c].rstrip() + target[c:].lstrip() 

794 

795 # There could be an escaped newline 

796 # 

797 # def foo(a=\ 

798 # 1) 

799 if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')): 

800 self.source[line_index] = fixed.rstrip('\n\r \t\\') 

801 self.source[line_index + 1] = self.source[line_index + 1].lstrip() 

802 return [line_index + 1, line_index + 2] # Line indexed at 1 

803 

804 self.source[result['line'] - 1] = fixed 

805 

806 def fix_e262(self, result): 

807 """Fix spacing after inline comment hash.""" 

808 target = self.source[result['line'] - 1] 

809 offset = result['column'] 

810 

811 code = target[:offset].rstrip(' \t#') 

812 comment = target[offset:].lstrip(' \t#') 

813 

814 fixed = code + (' # ' + comment if comment.strip() else '\n') 

815 

816 self.source[result['line'] - 1] = fixed 

817 

818 def fix_e265(self, result): 

819 """Fix spacing after block comment hash.""" 

820 target = self.source[result['line'] - 1] 

821 

822 indent = _get_indentation(target) 

823 line = target.lstrip(' \t') 

824 pos = next((index for index, c in enumerate(line) if c != '#')) 

825 hashes = line[:pos] 

826 comment = line[pos:].lstrip(' \t') 

827 

828 # Ignore special comments, even in the middle of the file. 

829 if comment.startswith('!'): 

830 return 

831 

832 fixed = indent + hashes + (' ' + comment if comment.strip() else '\n') 

833 

834 self.source[result['line'] - 1] = fixed 

835 

836 def fix_e266(self, result): 

837 """Fix too many block comment hashes.""" 

838 target = self.source[result['line'] - 1] 

839 

840 # Leave stylistic outlined blocks alone. 

841 if target.strip().endswith('#'): 

842 return 

843 

844 indentation = _get_indentation(target) 

845 fixed = indentation + '# ' + target.lstrip('# \t') 

846 

847 self.source[result['line'] - 1] = fixed 

848 

849 def fix_e271(self, result): 

850 """Fix extraneous whitespace around keywords.""" 

851 line_index = result['line'] - 1 

852 target = self.source[line_index] 

853 offset = result['column'] - 1 

854 

855 fixed = fix_whitespace(target, 

856 offset=offset, 

857 replacement=' ') 

858 

859 if fixed == target: 

860 return [] 

861 else: 

862 self.source[line_index] = fixed 

863 

864 def fix_e301(self, result): 

865 """Add missing blank line.""" 

866 cr = '\n' 

867 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1] 

868 

869 def fix_e302(self, result): 

870 """Add missing 2 blank lines.""" 

871 add_linenum = 2 - int(result['info'].split()[-1]) 

872 offset = 1 

873 if self.source[result['line'] - 2].strip() == "\\": 

874 offset = 2 

875 cr = '\n' * add_linenum 

876 self.source[result['line'] - offset] = ( 

877 cr + self.source[result['line'] - offset] 

878 ) 

879 

880 def fix_e303(self, result): 

881 """Remove extra blank lines.""" 

882 delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2 

883 delete_linenum = max(1, delete_linenum) 

884 

885 # We need to count because pycodestyle reports an offset line number if 

886 # there are comments. 

887 cnt = 0 

888 line = result['line'] - 2 

889 modified_lines = [] 

890 while cnt < delete_linenum and line >= 0: 

891 if not self.source[line].strip(): 

892 self.source[line] = '' 

893 modified_lines.append(1 + line) # Line indexed at 1 

894 cnt += 1 

895 line -= 1 

896 

897 return modified_lines 

898 

899 def fix_e304(self, result): 

900 """Remove blank line following function decorator.""" 

901 line = result['line'] - 2 

902 if not self.source[line].strip(): 

903 self.source[line] = '' 

904 

905 def fix_e305(self, result): 

906 """Add missing 2 blank lines after end of function or class.""" 

907 add_delete_linenum = 2 - int(result['info'].split()[-1]) 

908 cnt = 0 

909 offset = result['line'] - 2 

910 modified_lines = [] 

911 if add_delete_linenum < 0: 

912 # delete cr 

913 add_delete_linenum = abs(add_delete_linenum) 

914 while cnt < add_delete_linenum and offset >= 0: 

915 if not self.source[offset].strip(): 

916 self.source[offset] = '' 

917 modified_lines.append(1 + offset) # Line indexed at 1 

918 cnt += 1 

919 offset -= 1 

920 else: 

921 # add cr 

922 cr = '\n' 

923 # check comment line 

924 while True: 

925 if offset < 0: 

926 break 

927 line = self.source[offset].lstrip() 

928 if not line: 

929 break 

930 if line[0] != '#': 

931 break 

932 offset -= 1 

933 offset += 1 

934 self.source[offset] = cr + self.source[offset] 

935 modified_lines.append(1 + offset) # Line indexed at 1. 

936 return modified_lines 

937 

938 def fix_e401(self, result): 

939 """Put imports on separate lines.""" 

940 line_index = result['line'] - 1 

941 target = self.source[line_index] 

942 offset = result['column'] - 1 

943 

944 if not target.lstrip().startswith('import'): 

945 return [] 

946 

947 indentation = re.split(pattern=r'\bimport\b', 

948 string=target, maxsplit=1)[0] 

949 fixed = (target[:offset].rstrip('\t ,') + '\n' + 

950 indentation + 'import ' + target[offset:].lstrip('\t ,')) 

951 self.source[line_index] = fixed 

952 

953 def fix_e402(self, result): 

954 (line_index, offset, target) = get_index_offset_contents(result, 

955 self.source) 

956 for i in range(1, 100): 

957 line = "".join(self.source[line_index:line_index+i]) 

958 try: 

959 generate_tokens("".join(line)) 

960 except (SyntaxError, tokenize.TokenError): 

961 continue 

962 break 

963 if not (target in self.imports and self.imports[target] != line_index): 

964 mod_offset = get_module_imports_on_top_of_file(self.source, 

965 line_index) 

966 self.source[mod_offset] = line + self.source[mod_offset] 

967 for offset in range(i): 

968 self.source[line_index+offset] = '' 

969 

970 def fix_long_line_logically(self, result, logical): 

971 """Try to make lines fit within --max-line-length characters.""" 

972 if ( 

973 not logical or 

974 len(logical[2]) == 1 or 

975 self.source[result['line'] - 1].lstrip().startswith('#') 

976 ): 

977 return self.fix_long_line_physically(result) 

978 

979 start_line_index = logical[0][0] 

980 end_line_index = logical[1][0] 

981 logical_lines = logical[2] 

982 

983 previous_line = get_item(self.source, start_line_index - 1, default='') 

984 next_line = get_item(self.source, end_line_index + 1, default='') 

985 

986 single_line = join_logical_line(''.join(logical_lines)) 

987 

988 try: 

989 fixed = self.fix_long_line( 

990 target=single_line, 

991 previous_line=previous_line, 

992 next_line=next_line, 

993 original=''.join(logical_lines)) 

994 except (SyntaxError, tokenize.TokenError): 

995 return self.fix_long_line_physically(result) 

996 

997 if fixed: 

998 for line_index in range(start_line_index, end_line_index + 1): 

999 self.source[line_index] = '' 

1000 self.source[start_line_index] = fixed 

1001 return range(start_line_index + 1, end_line_index + 1) 

1002 

1003 return [] 

1004 

1005 def fix_long_line_physically(self, result): 

1006 """Try to make lines fit within --max-line-length characters.""" 

1007 line_index = result['line'] - 1 

1008 target = self.source[line_index] 

1009 

1010 previous_line = get_item(self.source, line_index - 1, default='') 

1011 next_line = get_item(self.source, line_index + 1, default='') 

1012 

1013 try: 

1014 fixed = self.fix_long_line( 

1015 target=target, 

1016 previous_line=previous_line, 

1017 next_line=next_line, 

1018 original=target) 

1019 except (SyntaxError, tokenize.TokenError): 

1020 return [] 

1021 

1022 if fixed: 

1023 self.source[line_index] = fixed 

1024 return [line_index + 1] 

1025 

1026 return [] 

1027 

1028 def fix_long_line(self, target, previous_line, 

1029 next_line, original): 

1030 cache_entry = (target, previous_line, next_line) 

1031 if cache_entry in self.long_line_ignore_cache: 

1032 return [] 

1033 

1034 if target.lstrip().startswith('#'): 

1035 if self.options.aggressive: 

1036 # Wrap commented lines. 

1037 return shorten_comment( 

1038 line=target, 

1039 max_line_length=self.options.max_line_length, 

1040 last_comment=not next_line.lstrip().startswith('#')) 

1041 return [] 

1042 

1043 fixed = get_fixed_long_line( 

1044 target=target, 

1045 previous_line=previous_line, 

1046 original=original, 

1047 indent_word=self.indent_word, 

1048 max_line_length=self.options.max_line_length, 

1049 aggressive=self.options.aggressive, 

1050 experimental=self.options.experimental, 

1051 verbose=self.options.verbose) 

1052 

1053 if fixed and not code_almost_equal(original, fixed): 

1054 return fixed 

1055 

1056 self.long_line_ignore_cache.add(cache_entry) 

1057 return None 

1058 

1059 def fix_e502(self, result): 

1060 """Remove extraneous escape of newline.""" 

1061 (line_index, _, target) = get_index_offset_contents(result, 

1062 self.source) 

1063 self.source[line_index] = target.rstrip('\n\r \t\\') + '\n' 

1064 

1065 def fix_e701(self, result): 

1066 """Put colon-separated compound statement on separate lines.""" 

1067 line_index = result['line'] - 1 

1068 target = self.source[line_index] 

1069 c = result['column'] 

1070 

1071 fixed_source = (target[:c] + '\n' + 

1072 _get_indentation(target) + self.indent_word + 

1073 target[c:].lstrip('\n\r \t\\')) 

1074 self.source[result['line'] - 1] = fixed_source 

1075 return [result['line'], result['line'] + 1] 

1076 

1077 def fix_e702(self, result, logical): 

1078 """Put semicolon-separated compound statement on separate lines.""" 

1079 if not logical: 

1080 return [] # pragma: no cover 

1081 logical_lines = logical[2] 

1082 

1083 # Avoid applying this when indented. 

1084 # https://docs.python.org/reference/compound_stmts.html 

1085 for line in logical_lines: 

1086 if (result['id'] == 'E702' and ':' in line 

1087 and STARTSWITH_INDENT_STATEMENT_REGEX.match(line)): 

1088 if self.options.verbose: 

1089 print( 

1090 '---> avoid fixing {error} with ' 

1091 'other compound statements'.format(error=result['id']), 

1092 file=sys.stderr 

1093 ) 

1094 return [] 

1095 

1096 line_index = result['line'] - 1 

1097 target = self.source[line_index] 

1098 

1099 if target.rstrip().endswith('\\'): 

1100 # Normalize '1; \\\n2' into '1; 2'. 

1101 self.source[line_index] = target.rstrip('\n \r\t\\') 

1102 self.source[line_index + 1] = self.source[line_index + 1].lstrip() 

1103 return [line_index + 1, line_index + 2] 

1104 

1105 if target.rstrip().endswith(';'): 

1106 self.source[line_index] = target.rstrip('\n \r\t;') + '\n' 

1107 return [line_index + 1] 

1108 

1109 offset = result['column'] - 1 

1110 first = target[:offset].rstrip(';').rstrip() 

1111 second = (_get_indentation(logical_lines[0]) + 

1112 target[offset:].lstrip(';').lstrip()) 

1113 

1114 # Find inline comment. 

1115 inline_comment = None 

1116 if target[offset:].lstrip(';').lstrip()[:2] == '# ': 

1117 inline_comment = target[offset:].lstrip(';') 

1118 

1119 if inline_comment: 

1120 self.source[line_index] = first + inline_comment 

1121 else: 

1122 self.source[line_index] = first + '\n' + second 

1123 return [line_index + 1] 

1124 

1125 def fix_e704(self, result): 

1126 """Fix multiple statements on one line def""" 

1127 (line_index, _, target) = get_index_offset_contents(result, 

1128 self.source) 

1129 match = STARTSWITH_DEF_REGEX.match(target) 

1130 if match: 

1131 self.source[line_index] = '{}\n{}{}'.format( 

1132 match.group(0), 

1133 _get_indentation(target) + self.indent_word, 

1134 target[match.end(0):].lstrip()) 

1135 

1136 def fix_e711(self, result): 

1137 """Fix comparison with None.""" 

1138 (line_index, offset, target) = get_index_offset_contents(result, 

1139 self.source) 

1140 

1141 right_offset = offset + 2 

1142 if right_offset >= len(target): 

1143 return [] 

1144 

1145 left = target[:offset].rstrip() 

1146 center = target[offset:right_offset] 

1147 right = target[right_offset:].lstrip() 

1148 

1149 if center.strip() == '==': 

1150 new_center = 'is' 

1151 elif center.strip() == '!=': 

1152 new_center = 'is not' 

1153 else: 

1154 return [] 

1155 

1156 self.source[line_index] = ' '.join([left, new_center, right]) 

1157 

1158 def fix_e712(self, result): 

1159 """Fix (trivial case of) comparison with boolean.""" 

1160 (line_index, offset, target) = get_index_offset_contents(result, 

1161 self.source) 

1162 

1163 # Handle very easy "not" special cases. 

1164 if re.match(r'^\s*if [\w."\'\[\]]+ == False:$', target): 

1165 self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) == False:', 

1166 r'if not \1:', target, count=1) 

1167 elif re.match(r'^\s*if [\w."\'\[\]]+ != True:$', target): 

1168 self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) != True:', 

1169 r'if not \1:', target, count=1) 

1170 else: 

1171 right_offset = offset + 2 

1172 if right_offset >= len(target): 

1173 return [] 

1174 

1175 left = target[:offset].rstrip() 

1176 center = target[offset:right_offset] 

1177 right = target[right_offset:].lstrip() 

1178 

1179 # Handle simple cases only. 

1180 new_right = None 

1181 if center.strip() == '==': 

1182 if re.match(r'\bTrue\b', right): 

1183 new_right = re.sub(r'\bTrue\b *', '', right, count=1) 

1184 elif center.strip() == '!=': 

1185 if re.match(r'\bFalse\b', right): 

1186 new_right = re.sub(r'\bFalse\b *', '', right, count=1) 

1187 

1188 if new_right is None: 

1189 return [] 

1190 

1191 if new_right[0].isalnum(): 

1192 new_right = ' ' + new_right 

1193 

1194 self.source[line_index] = left + new_right 

1195 

1196 def fix_e713(self, result): 

1197 """Fix (trivial case of) non-membership check.""" 

1198 (line_index, offset, target) = get_index_offset_contents(result, 

1199 self.source) 

1200 

1201 # to convert once 'not in' -> 'in' 

1202 before_target = target[:offset] 

1203 target = target[offset:] 

1204 match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target) 

1205 notin_pos_start, notin_pos_end = 0, 0 

1206 if match_notin: 

1207 notin_pos_start = match_notin.start(1) 

1208 notin_pos_end = match_notin.end() 

1209 target = '{}{} {}'.format( 

1210 target[:notin_pos_start], 'in', target[notin_pos_end:]) 

1211 

1212 # fix 'not in' 

1213 match = COMPARE_NEGATIVE_REGEX.search(target) 

1214 if match: 

1215 if match.group(3) == 'in': 

1216 pos_start = match.start(1) 

1217 new_target = '{5}{0}{1} {2} {3} {4}'.format( 

1218 target[:pos_start], match.group(2), match.group(1), 

1219 match.group(3), target[match.end():], before_target) 

1220 if match_notin: 

1221 # revert 'in' -> 'not in' 

1222 pos_start = notin_pos_start + offset 

1223 pos_end = notin_pos_end + offset - 4 # len('not ') 

1224 new_target = '{}{} {}'.format( 

1225 new_target[:pos_start], 'not in', new_target[pos_end:]) 

1226 self.source[line_index] = new_target 

1227 

1228 def fix_e714(self, result): 

1229 """Fix object identity should be 'is not' case.""" 

1230 (line_index, offset, target) = get_index_offset_contents(result, 

1231 self.source) 

1232 

1233 # to convert once 'is not' -> 'is' 

1234 before_target = target[:offset] 

1235 target = target[offset:] 

1236 match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target) 

1237 isnot_pos_start, isnot_pos_end = 0, 0 

1238 if match_isnot: 

1239 isnot_pos_start = match_isnot.start(1) 

1240 isnot_pos_end = match_isnot.end() 

1241 target = '{}{} {}'.format( 

1242 target[:isnot_pos_start], 'in', target[isnot_pos_end:]) 

1243 

1244 match = COMPARE_NEGATIVE_REGEX.search(target) 

1245 if match: 

1246 if match.group(3).startswith('is'): 

1247 pos_start = match.start(1) 

1248 new_target = '{5}{0}{1} {2} {3} {4}'.format( 

1249 target[:pos_start], match.group(2), match.group(3), 

1250 match.group(1), target[match.end():], before_target) 

1251 if match_isnot: 

1252 # revert 'is' -> 'is not' 

1253 pos_start = isnot_pos_start + offset 

1254 pos_end = isnot_pos_end + offset - 4 # len('not ') 

1255 new_target = '{}{} {}'.format( 

1256 new_target[:pos_start], 'is not', new_target[pos_end:]) 

1257 self.source[line_index] = new_target 

1258 

1259 def fix_e722(self, result): 

1260 """fix bare except""" 

1261 (line_index, _, target) = get_index_offset_contents(result, 

1262 self.source) 

1263 match = BARE_EXCEPT_REGEX.search(target) 

1264 if match: 

1265 self.source[line_index] = '{}{}{}'.format( 

1266 target[:result['column'] - 1], "except BaseException:", 

1267 target[match.end():]) 

1268 

1269 def fix_e731(self, result): 

1270 """Fix do not assign a lambda expression check.""" 

1271 (line_index, _, target) = get_index_offset_contents(result, 

1272 self.source) 

1273 match = LAMBDA_REGEX.search(target) 

1274 if match: 

1275 end = match.end() 

1276 self.source[line_index] = '{}def {}({}): return {}'.format( 

1277 target[:match.start(0)], match.group(1), match.group(2), 

1278 target[end:].lstrip()) 

1279 

1280 def fix_w291(self, result): 

1281 """Remove trailing whitespace.""" 

1282 fixed_line = self.source[result['line'] - 1].rstrip() 

1283 self.source[result['line'] - 1] = fixed_line + '\n' 

1284 

1285 def fix_w391(self, _): 

1286 """Remove trailing blank lines.""" 

1287 blank_count = 0 

1288 for line in reversed(self.source): 

1289 line = line.rstrip() 

1290 if line: 

1291 break 

1292 else: 

1293 blank_count += 1 

1294 

1295 original_length = len(self.source) 

1296 self.source = self.source[:original_length - blank_count] 

1297 return range(1, 1 + original_length) 

1298 

1299 def fix_w503(self, result): 

1300 (line_index, _, target) = get_index_offset_contents(result, 

1301 self.source) 

1302 one_string_token = target.split()[0] 

1303 try: 

1304 ts = generate_tokens(one_string_token) 

1305 except (SyntaxError, tokenize.TokenError): 

1306 return 

1307 if not _is_binary_operator(ts[0][0], one_string_token): 

1308 return 

1309 # find comment 

1310 comment_index = 0 

1311 found_not_comment_only_line = False 

1312 comment_only_linenum = 0 

1313 for i in range(5): 

1314 # NOTE: try to parse code in 5 times 

1315 if (line_index - i) < 0: 

1316 break 

1317 from_index = line_index - i - 1 

1318 if from_index < 0 or len(self.source) <= from_index: 

1319 break 

1320 to_index = line_index + 1 

1321 strip_line = self.source[from_index].lstrip() 

1322 if ( 

1323 not found_not_comment_only_line and 

1324 strip_line and strip_line[0] == '#' 

1325 ): 

1326 comment_only_linenum += 1 

1327 continue 

1328 found_not_comment_only_line = True 

1329 try: 

1330 ts = generate_tokens("".join(self.source[from_index:to_index])) 

1331 except (SyntaxError, tokenize.TokenError): 

1332 continue 

1333 newline_count = 0 

1334 newline_index = [] 

1335 for index, t in enumerate(ts): 

1336 if t[0] in (tokenize.NEWLINE, tokenize.NL): 

1337 newline_index.append(index) 

1338 newline_count += 1 

1339 if newline_count > 2: 

1340 tts = ts[newline_index[-3]:] 

1341 else: 

1342 tts = ts 

1343 old = [] 

1344 for t in tts: 

1345 if t[0] in (tokenize.NEWLINE, tokenize.NL): 

1346 newline_count -= 1 

1347 if newline_count <= 1: 

1348 break 

1349 if tokenize.COMMENT == t[0] and old and old[0] != tokenize.NL: 

1350 comment_index = old[3][1] 

1351 break 

1352 old = t 

1353 break 

1354 i = target.index(one_string_token) 

1355 fix_target_line = line_index - 1 - comment_only_linenum 

1356 self.source[line_index] = '{}{}'.format( 

1357 target[:i], target[i + len(one_string_token):].lstrip()) 

1358 nl = find_newline(self.source[fix_target_line:line_index]) 

1359 before_line = self.source[fix_target_line] 

1360 bl = before_line.index(nl) 

1361 if comment_index: 

1362 self.source[fix_target_line] = '{} {} {}'.format( 

1363 before_line[:comment_index], one_string_token, 

1364 before_line[comment_index + 1:]) 

1365 else: 

1366 if before_line[:bl].endswith("#"): 

1367 # special case 

1368 # see: https://github.com/hhatto/autopep8/issues/503 

1369 self.source[fix_target_line] = '{}{} {}'.format( 

1370 before_line[:bl-2], one_string_token, before_line[bl-2:]) 

1371 else: 

1372 self.source[fix_target_line] = '{} {}{}'.format( 

1373 before_line[:bl], one_string_token, before_line[bl:]) 

1374 

1375 def fix_w504(self, result): 

1376 (line_index, _, target) = get_index_offset_contents(result, 

1377 self.source) 

1378 # NOTE: is not collect pointed out in pycodestyle==2.4.0 

1379 comment_index = 0 

1380 operator_position = None # (start_position, end_position) 

1381 for i in range(1, 6): 

1382 to_index = line_index + i 

1383 try: 

1384 ts = generate_tokens("".join(self.source[line_index:to_index])) 

1385 except (SyntaxError, tokenize.TokenError): 

1386 continue 

1387 newline_count = 0 

1388 newline_index = [] 

1389 for index, t in enumerate(ts): 

1390 if _is_binary_operator(t[0], t[1]): 

1391 if t[2][0] == 1 and t[3][0] == 1: 

1392 operator_position = (t[2][1], t[3][1]) 

1393 elif t[0] == tokenize.NAME and t[1] in ("and", "or"): 

1394 if t[2][0] == 1 and t[3][0] == 1: 

1395 operator_position = (t[2][1], t[3][1]) 

1396 elif t[0] in (tokenize.NEWLINE, tokenize.NL): 

1397 newline_index.append(index) 

1398 newline_count += 1 

1399 if newline_count > 2: 

1400 tts = ts[:newline_index[-3]] 

1401 else: 

1402 tts = ts 

1403 old = [] 

1404 for t in tts: 

1405 if tokenize.COMMENT == t[0] and old: 

1406 comment_row, comment_index = old[3] 

1407 break 

1408 old = t 

1409 break 

1410 if not operator_position: 

1411 return 

1412 target_operator = target[operator_position[0]:operator_position[1]] 

1413 

1414 if comment_index and comment_row == 1: 

1415 self.source[line_index] = '{}{}'.format( 

1416 target[:operator_position[0]].rstrip(), 

1417 target[comment_index:]) 

1418 else: 

1419 self.source[line_index] = '{}{}{}'.format( 

1420 target[:operator_position[0]].rstrip(), 

1421 target[operator_position[1]:].lstrip(), 

1422 target[operator_position[1]:]) 

1423 

1424 next_line = self.source[line_index + 1] 

1425 next_line_indent = 0 

1426 m = re.match(r'\s*', next_line) 

1427 if m: 

1428 next_line_indent = m.span()[1] 

1429 self.source[line_index + 1] = '{}{} {}'.format( 

1430 next_line[:next_line_indent], target_operator, 

1431 next_line[next_line_indent:]) 

1432 

1433 def fix_w605(self, result): 

1434 (line_index, offset, target) = get_index_offset_contents(result, 

1435 self.source) 

1436 self.source[line_index] = '{}\\{}'.format( 

1437 target[:offset + 1], target[offset + 1:]) 

1438 

1439 

1440def get_module_imports_on_top_of_file(source, import_line_index): 

1441 """return import or from keyword position 

1442 

1443 example: 

1444 > 0: import sys 

1445 1: import os 

1446 2: 

1447 3: def function(): 

1448 """ 

1449 def is_string_literal(line): 

1450 if line[0] in 'uUbB': 

1451 line = line[1:] 

1452 if line and line[0] in 'rR': 

1453 line = line[1:] 

1454 return line and (line[0] == '"' or line[0] == "'") 

1455 

1456 def is_future_import(line): 

1457 nodes = ast.parse(line) 

1458 for n in nodes.body: 

1459 if isinstance(n, ast.ImportFrom) and n.module == '__future__': 

1460 return True 

1461 return False 

1462 

1463 def has_future_import(source): 

1464 offset = 0 

1465 line = '' 

1466 for _, next_line in source: 

1467 for line_part in next_line.strip().splitlines(True): 

1468 line = line + line_part 

1469 try: 

1470 return is_future_import(line), offset 

1471 except SyntaxError: 

1472 continue 

1473 offset += 1 

1474 return False, offset 

1475 

1476 allowed_try_keywords = ('try', 'except', 'else', 'finally') 

1477 in_docstring = False 

1478 docstring_kind = '"""' 

1479 source_stream = iter(enumerate(source)) 

1480 for cnt, line in source_stream: 

1481 if not in_docstring: 

1482 m = DOCSTRING_START_REGEX.match(line.lstrip()) 

1483 if m is not None: 

1484 in_docstring = True 

1485 docstring_kind = m.group('kind') 

1486 remain = line[m.end(): m.endpos].rstrip() 

1487 if remain[-3:] == docstring_kind: # one line doc 

1488 in_docstring = False 

1489 continue 

1490 if in_docstring: 

1491 if line.rstrip()[-3:] == docstring_kind: 

1492 in_docstring = False 

1493 continue 

1494 

1495 if not line.rstrip(): 

1496 continue 

1497 elif line.startswith('#'): 

1498 continue 

1499 

1500 if line.startswith('import '): 

1501 if cnt == import_line_index: 

1502 continue 

1503 return cnt 

1504 elif line.startswith('from '): 

1505 if cnt == import_line_index: 

1506 continue 

1507 hit, offset = has_future_import( 

1508 itertools.chain([(cnt, line)], source_stream) 

1509 ) 

1510 if hit: 

1511 # move to the back 

1512 return cnt + offset + 1 

1513 return cnt 

1514 elif pycodestyle.DUNDER_REGEX.match(line): 

1515 return cnt 

1516 elif any(line.startswith(kw) for kw in allowed_try_keywords): 

1517 continue 

1518 elif is_string_literal(line): 

1519 return cnt 

1520 else: 

1521 return cnt 

1522 return 0 

1523 

1524 

1525def get_index_offset_contents(result, source): 

1526 """Return (line_index, column_offset, line_contents).""" 

1527 line_index = result['line'] - 1 

1528 return (line_index, 

1529 result['column'] - 1, 

1530 source[line_index]) 

1531 

1532 

1533def get_fixed_long_line(target, previous_line, original, 

1534 indent_word=' ', max_line_length=79, 

1535 aggressive=False, experimental=False, verbose=False): 

1536 """Break up long line and return result. 

1537 

1538 Do this by generating multiple reformatted candidates and then 

1539 ranking the candidates to heuristically select the best option. 

1540 

1541 """ 

1542 indent = _get_indentation(target) 

1543 source = target[len(indent):] 

1544 assert source.lstrip() == source 

1545 assert not target.lstrip().startswith('#') 

1546 

1547 # Check for partial multiline. 

1548 tokens = list(generate_tokens(source)) 

1549 

1550 candidates = shorten_line( 

1551 tokens, source, indent, 

1552 indent_word, 

1553 max_line_length, 

1554 aggressive=aggressive, 

1555 experimental=experimental, 

1556 previous_line=previous_line) 

1557 

1558 # Also sort alphabetically as a tie breaker (for determinism). 

1559 candidates = sorted( 

1560 sorted(set(candidates).union([target, original])), 

1561 key=lambda x: line_shortening_rank( 

1562 x, 

1563 indent_word, 

1564 max_line_length, 

1565 experimental=experimental)) 

1566 

1567 if verbose >= 4: 

1568 print(('-' * 79 + '\n').join([''] + candidates + ['']), 

1569 file=wrap_output(sys.stderr, 'utf-8')) 

1570 

1571 if candidates: 

1572 best_candidate = candidates[0] 

1573 

1574 # Don't allow things to get longer. 

1575 if longest_line_length(best_candidate) > longest_line_length(original): 

1576 return None 

1577 

1578 return best_candidate 

1579 

1580 

1581def longest_line_length(code): 

1582 """Return length of longest line.""" 

1583 if len(code) == 0: 

1584 return 0 

1585 return max(len(line) for line in code.splitlines()) 

1586 

1587 

1588def join_logical_line(logical_line): 

1589 """Return single line based on logical line input.""" 

1590 indentation = _get_indentation(logical_line) 

1591 

1592 return indentation + untokenize_without_newlines( 

1593 generate_tokens(logical_line.lstrip())) + '\n' 

1594 

1595 

1596def untokenize_without_newlines(tokens): 

1597 """Return source code based on tokens.""" 

1598 text = '' 

1599 last_row = 0 

1600 last_column = -1 

1601 

1602 for t in tokens: 

1603 token_string = t[1] 

1604 (start_row, start_column) = t[2] 

1605 (end_row, end_column) = t[3] 

1606 

1607 if start_row > last_row: 

1608 last_column = 0 

1609 if ( 

1610 (start_column > last_column or token_string == '\n') and 

1611 not text.endswith(' ') 

1612 ): 

1613 text += ' ' 

1614 

1615 if token_string != '\n': 

1616 text += token_string 

1617 

1618 last_row = end_row 

1619 last_column = end_column 

1620 

1621 return text.rstrip() 

1622 

1623 

1624def _find_logical(source_lines): 

1625 # Make a variable which is the index of all the starts of lines. 

1626 logical_start = [] 

1627 logical_end = [] 

1628 last_newline = True 

1629 parens = 0 

1630 for t in generate_tokens(''.join(source_lines)): 

1631 if t[0] in [tokenize.COMMENT, tokenize.DEDENT, 

1632 tokenize.INDENT, tokenize.NL, 

1633 tokenize.ENDMARKER]: 

1634 continue 

1635 if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]: 

1636 last_newline = True 

1637 logical_end.append((t[3][0] - 1, t[2][1])) 

1638 continue 

1639 if last_newline and not parens: 

1640 logical_start.append((t[2][0] - 1, t[2][1])) 

1641 last_newline = False 

1642 if t[0] == tokenize.OP: 

1643 if t[1] in '([{': 

1644 parens += 1 

1645 elif t[1] in '}])': 

1646 parens -= 1 

1647 return (logical_start, logical_end) 

1648 

1649 

1650def _get_logical(source_lines, result, logical_start, logical_end): 

1651 """Return the logical line corresponding to the result. 

1652 

1653 Assumes input is already E702-clean. 

1654 

1655 """ 

1656 row = result['line'] - 1 

1657 col = result['column'] - 1 

1658 ls = None 

1659 le = None 

1660 for i in range(0, len(logical_start), 1): 

1661 assert logical_end 

1662 x = logical_end[i] 

1663 if x[0] > row or (x[0] == row and x[1] > col): 

1664 le = x 

1665 ls = logical_start[i] 

1666 break 

1667 if ls is None: 

1668 return None 

1669 original = source_lines[ls[0]:le[0] + 1] 

1670 return ls, le, original 

1671 

1672 

1673def get_item(items, index, default=None): 

1674 if 0 <= index < len(items): 

1675 return items[index] 

1676 

1677 return default 

1678 

1679 

1680def reindent(source, indent_size, leave_tabs=False): 

1681 """Reindent all lines.""" 

1682 reindenter = Reindenter(source, leave_tabs) 

1683 return reindenter.run(indent_size) 

1684 

1685 

1686def code_almost_equal(a, b): 

1687 """Return True if code is similar. 

1688 

1689 Ignore whitespace when comparing specific line. 

1690 

1691 """ 

1692 split_a = split_and_strip_non_empty_lines(a) 

1693 split_b = split_and_strip_non_empty_lines(b) 

1694 

1695 if len(split_a) != len(split_b): 

1696 return False 

1697 

1698 for (index, _) in enumerate(split_a): 

1699 if ''.join(split_a[index].split()) != ''.join(split_b[index].split()): 

1700 return False 

1701 

1702 return True 

1703 

1704 

1705def split_and_strip_non_empty_lines(text): 

1706 """Return lines split by newline. 

1707 

1708 Ignore empty lines. 

1709 

1710 """ 

1711 return [line.strip() for line in text.splitlines() if line.strip()] 

1712 

1713 

1714def refactor(source, fixer_names, ignore=None, filename=''): 

1715 """Return refactored code using lib2to3. 

1716 

1717 Skip if ignore string is produced in the refactored code. 

1718 

1719 """ 

1720 not_found_end_of_file_newline = source and source.rstrip("\r\n") == source 

1721 if not_found_end_of_file_newline: 

1722 input_source = source + "\n" 

1723 else: 

1724 input_source = source 

1725 

1726 from lib2to3 import pgen2 

1727 try: 

1728 new_text = refactor_with_2to3(input_source, 

1729 fixer_names=fixer_names, 

1730 filename=filename) 

1731 except (pgen2.parse.ParseError, 

1732 SyntaxError, 

1733 UnicodeDecodeError, 

1734 UnicodeEncodeError): 

1735 return source 

1736 

1737 if ignore: 

1738 if ignore in new_text and ignore not in source: 

1739 return source 

1740 

1741 if not_found_end_of_file_newline: 

1742 return new_text.rstrip("\r\n") 

1743 

1744 return new_text 

1745 

1746 

1747def code_to_2to3(select, ignore, where='', verbose=False): 

1748 fixes = set() 

1749 for code, fix in CODE_TO_2TO3.items(): 

1750 if code_match(code, select=select, ignore=ignore): 

1751 if verbose: 

1752 print('---> Applying {} fix for {}'.format(where, 

1753 code.upper()), 

1754 file=sys.stderr) 

1755 fixes |= set(fix) 

1756 return fixes 

1757 

1758 

1759def fix_2to3(source, 

1760 aggressive=True, select=None, ignore=None, filename='', 

1761 where='global', verbose=False): 

1762 """Fix various deprecated code (via lib2to3).""" 

1763 if not aggressive: 

1764 return source 

1765 

1766 select = select or [] 

1767 ignore = ignore or [] 

1768 

1769 return refactor(source, 

1770 code_to_2to3(select=select, 

1771 ignore=ignore, 

1772 where=where, 

1773 verbose=verbose), 

1774 filename=filename) 

1775 

1776 

1777def find_newline(source): 

1778 """Return type of newline used in source. 

1779 

1780 Input is a list of lines. 

1781 

1782 """ 

1783 assert not isinstance(source, str) 

1784 

1785 counter = collections.defaultdict(int) 

1786 for line in source: 

1787 if line.endswith(CRLF): 

1788 counter[CRLF] += 1 

1789 elif line.endswith(CR): 

1790 counter[CR] += 1 

1791 elif line.endswith(LF): 

1792 counter[LF] += 1 

1793 

1794 return (sorted(counter, key=counter.get, reverse=True) or [LF])[0] 

1795 

1796 

1797def _get_indentword(source): 

1798 """Return indentation type.""" 

1799 indent_word = ' ' # Default in case source has no indentation 

1800 try: 

1801 for t in generate_tokens(source): 

1802 if t[0] == token.INDENT: 

1803 indent_word = t[1] 

1804 break 

1805 except (SyntaxError, tokenize.TokenError): 

1806 pass 

1807 return indent_word 

1808 

1809 

1810def _get_indentation(line): 

1811 """Return leading whitespace.""" 

1812 if line.strip(): 

1813 non_whitespace_index = len(line) - len(line.lstrip()) 

1814 return line[:non_whitespace_index] 

1815 

1816 return '' 

1817 

1818 

1819def get_diff_text(old, new, filename): 

1820 """Return text of unified diff between old and new.""" 

1821 newline = '\n' 

1822 diff = difflib.unified_diff( 

1823 old, new, 

1824 'original/' + filename, 

1825 'fixed/' + filename, 

1826 lineterm=newline) 

1827 

1828 text = '' 

1829 for line in diff: 

1830 text += line 

1831 

1832 # Work around missing newline (http://bugs.python.org/issue2142). 

1833 if text and not line.endswith(newline): 

1834 text += newline + r'\ No newline at end of file' + newline 

1835 

1836 return text 

1837 

1838 

1839def _priority_key(pep8_result): 

1840 """Key for sorting PEP8 results. 

1841 

1842 Global fixes should be done first. This is important for things like 

1843 indentation. 

1844 

1845 """ 

1846 priority = [ 

1847 # Fix multiline colon-based before semicolon based. 

1848 'e701', 

1849 # Break multiline statements early. 

1850 'e702', 

1851 # Things that make lines longer. 

1852 'e225', 'e231', 

1853 # Remove extraneous whitespace before breaking lines. 

1854 'e201', 

1855 # Shorten whitespace in comment before resorting to wrapping. 

1856 'e262' 

1857 ] 

1858 middle_index = 10000 

1859 lowest_priority = [ 

1860 # We need to shorten lines last since the logical fixer can get in a 

1861 # loop, which causes us to exit early. 

1862 'e501', 

1863 ] 

1864 key = pep8_result['id'].lower() 

1865 try: 

1866 return priority.index(key) 

1867 except ValueError: 

1868 try: 

1869 return middle_index + lowest_priority.index(key) + 1 

1870 except ValueError: 

1871 return middle_index 

1872 

1873 

1874def shorten_line(tokens, source, indentation, indent_word, max_line_length, 

1875 aggressive=False, experimental=False, previous_line=''): 

1876 """Separate line at OPERATOR. 

1877 

1878 Multiple candidates will be yielded. 

1879 

1880 """ 

1881 for candidate in _shorten_line(tokens=tokens, 

1882 source=source, 

1883 indentation=indentation, 

1884 indent_word=indent_word, 

1885 aggressive=aggressive, 

1886 previous_line=previous_line): 

1887 yield candidate 

1888 

1889 if aggressive: 

1890 for key_token_strings in SHORTEN_OPERATOR_GROUPS: 

1891 shortened = _shorten_line_at_tokens( 

1892 tokens=tokens, 

1893 source=source, 

1894 indentation=indentation, 

1895 indent_word=indent_word, 

1896 key_token_strings=key_token_strings, 

1897 aggressive=aggressive) 

1898 

1899 if shortened is not None and shortened != source: 

1900 yield shortened 

1901 

1902 if experimental: 

1903 for shortened in _shorten_line_at_tokens_new( 

1904 tokens=tokens, 

1905 source=source, 

1906 indentation=indentation, 

1907 max_line_length=max_line_length): 

1908 

1909 yield shortened 

1910 

1911 

1912def _shorten_line(tokens, source, indentation, indent_word, 

1913 aggressive=False, previous_line=''): 

1914 """Separate line at OPERATOR. 

1915 

1916 The input is expected to be free of newlines except for inside multiline 

1917 strings and at the end. 

1918 

1919 Multiple candidates will be yielded. 

1920 

1921 """ 

1922 for (token_type, 

1923 token_string, 

1924 start_offset, 

1925 end_offset) in token_offsets(tokens): 

1926 

1927 if ( 

1928 token_type == tokenize.COMMENT and 

1929 not is_probably_part_of_multiline(previous_line) and 

1930 not is_probably_part_of_multiline(source) and 

1931 not source[start_offset + 1:].strip().lower().startswith( 

1932 ('noqa', 'pragma:', 'pylint:')) 

1933 ): 

1934 # Move inline comments to previous line. 

1935 first = source[:start_offset] 

1936 second = source[start_offset:] 

1937 yield (indentation + second.strip() + '\n' + 

1938 indentation + first.strip() + '\n') 

1939 elif token_type == token.OP and token_string != '=': 

1940 # Don't break on '=' after keyword as this violates PEP 8. 

1941 

1942 assert token_type != token.INDENT 

1943 

1944 first = source[:end_offset] 

1945 

1946 second_indent = indentation 

1947 if (first.rstrip().endswith('(') and 

1948 source[end_offset:].lstrip().startswith(')')): 

1949 pass 

1950 elif first.rstrip().endswith('('): 

1951 second_indent += indent_word 

1952 elif '(' in first: 

1953 second_indent += ' ' * (1 + first.find('(')) 

1954 else: 

1955 second_indent += indent_word 

1956 

1957 second = (second_indent + source[end_offset:].lstrip()) 

1958 if ( 

1959 not second.strip() or 

1960 second.lstrip().startswith('#') 

1961 ): 

1962 continue 

1963 

1964 # Do not begin a line with a comma 

1965 if second.lstrip().startswith(','): 

1966 continue 

1967 # Do end a line with a dot 

1968 if first.rstrip().endswith('.'): 

1969 continue 

1970 if token_string in '+-*/': 

1971 fixed = first + ' \\' + '\n' + second 

1972 else: 

1973 fixed = first + '\n' + second 

1974 

1975 # Only fix if syntax is okay. 

1976 if check_syntax(normalize_multiline(fixed) 

1977 if aggressive else fixed): 

1978 yield indentation + fixed 

1979 

1980 

1981def _is_binary_operator(token_type, text): 

1982 return ((token_type == tokenize.OP or text in ['and', 'or']) and 

1983 text not in '()[]{},:.;@=%~') 

1984 

1985 

1986# A convenient way to handle tokens. 

1987Token = collections.namedtuple('Token', ['token_type', 'token_string', 

1988 'spos', 'epos', 'line']) 

1989 

1990 

1991class ReformattedLines(object): 

1992 

1993 """The reflowed lines of atoms. 

1994 

1995 Each part of the line is represented as an "atom." They can be moved 

1996 around when need be to get the optimal formatting. 

1997 

1998 """ 

1999 

2000 ########################################################################### 

2001 # Private Classes 

2002 

2003 class _Indent(object): 

2004 

2005 """Represent an indentation in the atom stream.""" 

2006 

2007 def __init__(self, indent_amt): 

2008 self._indent_amt = indent_amt 

2009 

2010 def emit(self): 

2011 return ' ' * self._indent_amt 

2012 

2013 @property 

2014 def size(self): 

2015 return self._indent_amt 

2016 

2017 class _Space(object): 

2018 

2019 """Represent a space in the atom stream.""" 

2020 

2021 def emit(self): 

2022 return ' ' 

2023 

2024 @property 

2025 def size(self): 

2026 return 1 

2027 

2028 class _LineBreak(object): 

2029 

2030 """Represent a line break in the atom stream.""" 

2031 

2032 def emit(self): 

2033 return '\n' 

2034 

2035 @property 

2036 def size(self): 

2037 return 0 

2038 

2039 def __init__(self, max_line_length): 

2040 self._max_line_length = max_line_length 

2041 self._lines = [] 

2042 self._bracket_depth = 0 

2043 self._prev_item = None 

2044 self._prev_prev_item = None 

2045 

2046 def __repr__(self): 

2047 return self.emit() 

2048 

2049 ########################################################################### 

2050 # Public Methods 

2051 

2052 def add(self, obj, indent_amt, break_after_open_bracket): 

2053 if isinstance(obj, Atom): 

2054 self._add_item(obj, indent_amt) 

2055 return 

2056 

2057 self._add_container(obj, indent_amt, break_after_open_bracket) 

2058 

2059 def add_comment(self, item): 

2060 num_spaces = 2 

2061 if len(self._lines) > 1: 

2062 if isinstance(self._lines[-1], self._Space): 

2063 num_spaces -= 1 

2064 if len(self._lines) > 2: 

2065 if isinstance(self._lines[-2], self._Space): 

2066 num_spaces -= 1 

2067 

2068 while num_spaces > 0: 

2069 self._lines.append(self._Space()) 

2070 num_spaces -= 1 

2071 self._lines.append(item) 

2072 

2073 def add_indent(self, indent_amt): 

2074 self._lines.append(self._Indent(indent_amt)) 

2075 

2076 def add_line_break(self, indent): 

2077 self._lines.append(self._LineBreak()) 

2078 self.add_indent(len(indent)) 

2079 

2080 def add_line_break_at(self, index, indent_amt): 

2081 self._lines.insert(index, self._LineBreak()) 

2082 self._lines.insert(index + 1, self._Indent(indent_amt)) 

2083 

2084 def add_space_if_needed(self, curr_text, equal=False): 

2085 if ( 

2086 not self._lines or isinstance( 

2087 self._lines[-1], (self._LineBreak, self._Indent, self._Space)) 

2088 ): 

2089 return 

2090 

2091 prev_text = str(self._prev_item) 

2092 prev_prev_text = ( 

2093 str(self._prev_prev_item) if self._prev_prev_item else '') 

2094 

2095 if ( 

2096 # The previous item was a keyword or identifier and the current 

2097 # item isn't an operator that doesn't require a space. 

2098 ((self._prev_item.is_keyword or self._prev_item.is_string or 

2099 self._prev_item.is_name or self._prev_item.is_number) and 

2100 (curr_text[0] not in '([{.,:}])' or 

2101 (curr_text[0] == '=' and equal))) or 

2102 

2103 # Don't place spaces around a '.', unless it's in an 'import' 

2104 # statement. 

2105 ((prev_prev_text != 'from' and prev_text[-1] != '.' and 

2106 curr_text != 'import') and 

2107 

2108 # Don't place a space before a colon. 

2109 curr_text[0] != ':' and 

2110 

2111 # Don't split up ending brackets by spaces. 

2112 ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or 

2113 

2114 # Put a space after a colon or comma. 

2115 prev_text[-1] in ':,' or 

2116 

2117 # Put space around '=' if asked to. 

2118 (equal and prev_text == '=') or 

2119 

2120 # Put spaces around non-unary arithmetic operators. 

2121 ((self._prev_prev_item and 

2122 (prev_text not in '+-' and 

2123 (self._prev_prev_item.is_name or 

2124 self._prev_prev_item.is_number or 

2125 self._prev_prev_item.is_string)) and 

2126 prev_text in ('+', '-', '%', '*', '/', '//', '**', 'in'))))) 

2127 ): 

2128 self._lines.append(self._Space()) 

2129 

2130 def previous_item(self): 

2131 """Return the previous non-whitespace item.""" 

2132 return self._prev_item 

2133 

2134 def fits_on_current_line(self, item_extent): 

2135 return self.current_size() + item_extent <= self._max_line_length 

2136 

2137 def current_size(self): 

2138 """The size of the current line minus the indentation.""" 

2139 size = 0 

2140 for item in reversed(self._lines): 

2141 size += item.size 

2142 if isinstance(item, self._LineBreak): 

2143 break 

2144 

2145 return size 

2146 

2147 def line_empty(self): 

2148 return (self._lines and 

2149 isinstance(self._lines[-1], 

2150 (self._LineBreak, self._Indent))) 

2151 

2152 def emit(self): 

2153 string = '' 

2154 for item in self._lines: 

2155 if isinstance(item, self._LineBreak): 

2156 string = string.rstrip() 

2157 string += item.emit() 

2158 

2159 return string.rstrip() + '\n' 

2160 

2161 ########################################################################### 

2162 # Private Methods 

2163 

2164 def _add_item(self, item, indent_amt): 

2165 """Add an item to the line. 

2166 

2167 Reflow the line to get the best formatting after the item is 

2168 inserted. The bracket depth indicates if the item is being 

2169 inserted inside of a container or not. 

2170 

2171 """ 

2172 if self._prev_item and self._prev_item.is_string and item.is_string: 

2173 # Place consecutive string literals on separate lines. 

2174 self._lines.append(self._LineBreak()) 

2175 self._lines.append(self._Indent(indent_amt)) 

2176 

2177 item_text = str(item) 

2178 if self._lines and self._bracket_depth: 

2179 # Adding the item into a container. 

2180 self._prevent_default_initializer_splitting(item, indent_amt) 

2181 

2182 if item_text in '.,)]}': 

2183 self._split_after_delimiter(item, indent_amt) 

2184 

2185 elif self._lines and not self.line_empty(): 

2186 # Adding the item outside of a container. 

2187 if self.fits_on_current_line(len(item_text)): 

2188 self._enforce_space(item) 

2189 

2190 else: 

2191 # Line break for the new item. 

2192 self._lines.append(self._LineBreak()) 

2193 self._lines.append(self._Indent(indent_amt)) 

2194 

2195 self._lines.append(item) 

2196 self._prev_item, self._prev_prev_item = item, self._prev_item 

2197 

2198 if item_text in '([{': 

2199 self._bracket_depth += 1 

2200 

2201 elif item_text in '}])': 

2202 self._bracket_depth -= 1 

2203 assert self._bracket_depth >= 0 

2204 

2205 def _add_container(self, container, indent_amt, break_after_open_bracket): 

2206 actual_indent = indent_amt + 1 

2207 

2208 if ( 

2209 str(self._prev_item) != '=' and 

2210 not self.line_empty() and 

2211 not self.fits_on_current_line( 

2212 container.size + self._bracket_depth + 2) 

2213 ): 

2214 

2215 if str(container)[0] == '(' and self._prev_item.is_name: 

2216 # Don't split before the opening bracket of a call. 

2217 break_after_open_bracket = True 

2218 actual_indent = indent_amt + 4 

2219 elif ( 

2220 break_after_open_bracket or 

2221 str(self._prev_item) not in '([{' 

2222 ): 

2223 # If the container doesn't fit on the current line and the 

2224 # current line isn't empty, place the container on the next 

2225 # line. 

2226 self._lines.append(self._LineBreak()) 

2227 self._lines.append(self._Indent(indent_amt)) 

2228 break_after_open_bracket = False 

2229 else: 

2230 actual_indent = self.current_size() + 1 

2231 break_after_open_bracket = False 

2232 

2233 if isinstance(container, (ListComprehension, IfExpression)): 

2234 actual_indent = indent_amt 

2235 

2236 # Increase the continued indentation only if recursing on a 

2237 # container. 

2238 container.reflow(self, ' ' * actual_indent, 

2239 break_after_open_bracket=break_after_open_bracket) 

2240 

2241 def _prevent_default_initializer_splitting(self, item, indent_amt): 

2242 """Prevent splitting between a default initializer. 

2243 

2244 When there is a default initializer, it's best to keep it all on 

2245 the same line. It's nicer and more readable, even if it goes 

2246 over the maximum allowable line length. This goes back along the 

2247 current line to determine if we have a default initializer, and, 

2248 if so, to remove extraneous whitespaces and add a line 

2249 break/indent before it if needed. 

2250 

2251 """ 

2252 if str(item) == '=': 

2253 # This is the assignment in the initializer. Just remove spaces for 

2254 # now. 

2255 self._delete_whitespace() 

2256 return 

2257 

2258 if (not self._prev_item or not self._prev_prev_item or 

2259 str(self._prev_item) != '='): 

2260 return 

2261 

2262 self._delete_whitespace() 

2263 prev_prev_index = self._lines.index(self._prev_prev_item) 

2264 

2265 if ( 

2266 isinstance(self._lines[prev_prev_index - 1], self._Indent) or 

2267 self.fits_on_current_line(item.size + 1) 

2268 ): 

2269 # The default initializer is already the only item on this line. 

2270 # Don't insert a newline here. 

2271 return 

2272 

2273 # Replace the space with a newline/indent combo. 

2274 if isinstance(self._lines[prev_prev_index - 1], self._Space): 

2275 del self._lines[prev_prev_index - 1] 

2276 

2277 self.add_line_break_at(self._lines.index(self._prev_prev_item), 

2278 indent_amt) 

2279 

2280 def _split_after_delimiter(self, item, indent_amt): 

2281 """Split the line only after a delimiter.""" 

2282 self._delete_whitespace() 

2283 

2284 if self.fits_on_current_line(item.size): 

2285 return 

2286 

2287 last_space = None 

2288 for current_item in reversed(self._lines): 

2289 if ( 

2290 last_space and 

2291 (not isinstance(current_item, Atom) or 

2292 not current_item.is_colon) 

2293 ): 

2294 break 

2295 else: 

2296 last_space = None 

2297 if isinstance(current_item, self._Space): 

2298 last_space = current_item 

2299 if isinstance(current_item, (self._LineBreak, self._Indent)): 

2300 return 

2301 

2302 if not last_space: 

2303 return 

2304 

2305 self.add_line_break_at(self._lines.index(last_space), indent_amt) 

2306 

2307 def _enforce_space(self, item): 

2308 """Enforce a space in certain situations. 

2309 

2310 There are cases where we will want a space where normally we 

2311 wouldn't put one. This just enforces the addition of a space. 

2312 

2313 """ 

2314 if isinstance(self._lines[-1], 

2315 (self._Space, self._LineBreak, self._Indent)): 

2316 return 

2317 

2318 if not self._prev_item: 

2319 return 

2320 

2321 item_text = str(item) 

2322 prev_text = str(self._prev_item) 

2323 

2324 # Prefer a space around a '.' in an import statement, and between the 

2325 # 'import' and '('. 

2326 if ( 

2327 (item_text == '.' and prev_text == 'from') or 

2328 (item_text == 'import' and prev_text == '.') or 

2329 (item_text == '(' and prev_text == 'import') 

2330 ): 

2331 self._lines.append(self._Space()) 

2332 

2333 def _delete_whitespace(self): 

2334 """Delete all whitespace from the end of the line.""" 

2335 while isinstance(self._lines[-1], (self._Space, self._LineBreak, 

2336 self._Indent)): 

2337 del self._lines[-1] 

2338 

2339 

2340class Atom(object): 

2341 

2342 """The smallest unbreakable unit that can be reflowed.""" 

2343 

2344 def __init__(self, atom): 

2345 self._atom = atom 

2346 

2347 def __repr__(self): 

2348 return self._atom.token_string 

2349 

2350 def __len__(self): 

2351 return self.size 

2352 

2353 def reflow( 

2354 self, reflowed_lines, continued_indent, extent, 

2355 break_after_open_bracket=False, 

2356 is_list_comp_or_if_expr=False, 

2357 next_is_dot=False 

2358 ): 

2359 if self._atom.token_type == tokenize.COMMENT: 

2360 reflowed_lines.add_comment(self) 

2361 return 

2362 

2363 total_size = extent if extent else self.size 

2364 

2365 if self._atom.token_string not in ',:([{}])': 

2366 # Some atoms will need an extra 1-sized space token after them. 

2367 total_size += 1 

2368 

2369 prev_item = reflowed_lines.previous_item() 

2370 if ( 

2371 not is_list_comp_or_if_expr and 

2372 not reflowed_lines.fits_on_current_line(total_size) and 

2373 not (next_is_dot and 

2374 reflowed_lines.fits_on_current_line(self.size + 1)) and 

2375 not reflowed_lines.line_empty() and 

2376 not self.is_colon and 

2377 not (prev_item and prev_item.is_name and 

2378 str(self) == '(') 

2379 ): 

2380 # Start a new line if there is already something on the line and 

2381 # adding this atom would make it go over the max line length. 

2382 reflowed_lines.add_line_break(continued_indent) 

2383 else: 

2384 reflowed_lines.add_space_if_needed(str(self)) 

2385 

2386 reflowed_lines.add(self, len(continued_indent), 

2387 break_after_open_bracket) 

2388 

2389 def emit(self): 

2390 return self.__repr__() 

2391 

2392 @property 

2393 def is_keyword(self): 

2394 return keyword.iskeyword(self._atom.token_string) 

2395 

2396 @property 

2397 def is_string(self): 

2398 return self._atom.token_type == tokenize.STRING 

2399 

2400 @property 

2401 def is_name(self): 

2402 return self._atom.token_type == tokenize.NAME 

2403 

2404 @property 

2405 def is_number(self): 

2406 return self._atom.token_type == tokenize.NUMBER 

2407 

2408 @property 

2409 def is_comma(self): 

2410 return self._atom.token_string == ',' 

2411 

2412 @property 

2413 def is_colon(self): 

2414 return self._atom.token_string == ':' 

2415 

2416 @property 

2417 def size(self): 

2418 return len(self._atom.token_string) 

2419 

2420 

2421class Container(object): 

2422 

2423 """Base class for all container types.""" 

2424 

2425 def __init__(self, items): 

2426 self._items = items 

2427 

2428 def __repr__(self): 

2429 string = '' 

2430 last_was_keyword = False 

2431 

2432 for item in self._items: 

2433 if item.is_comma: 

2434 string += ', ' 

2435 elif item.is_colon: 

2436 string += ': ' 

2437 else: 

2438 item_string = str(item) 

2439 if ( 

2440 string and 

2441 (last_was_keyword or 

2442 (not string.endswith(tuple('([{,.:}]) ')) and 

2443 not item_string.startswith(tuple('([{,.:}])')))) 

2444 ): 

2445 string += ' ' 

2446 string += item_string 

2447 

2448 last_was_keyword = item.is_keyword 

2449 return string 

2450 

2451 def __iter__(self): 

2452 for element in self._items: 

2453 yield element 

2454 

2455 def __getitem__(self, idx): 

2456 return self._items[idx] 

2457 

2458 def reflow(self, reflowed_lines, continued_indent, 

2459 break_after_open_bracket=False): 

2460 last_was_container = False 

2461 for (index, item) in enumerate(self._items): 

2462 next_item = get_item(self._items, index + 1) 

2463 

2464 if isinstance(item, Atom): 

2465 is_list_comp_or_if_expr = ( 

2466 isinstance(self, (ListComprehension, IfExpression))) 

2467 item.reflow(reflowed_lines, continued_indent, 

2468 self._get_extent(index), 

2469 is_list_comp_or_if_expr=is_list_comp_or_if_expr, 

2470 next_is_dot=(next_item and 

2471 str(next_item) == '.')) 

2472 if last_was_container and item.is_comma: 

2473 reflowed_lines.add_line_break(continued_indent) 

2474 last_was_container = False 

2475 else: # isinstance(item, Container) 

2476 reflowed_lines.add(item, len(continued_indent), 

2477 break_after_open_bracket) 

2478 last_was_container = not isinstance(item, (ListComprehension, 

2479 IfExpression)) 

2480 

2481 if ( 

2482 break_after_open_bracket and index == 0 and 

2483 # Prefer to keep empty containers together instead of 

2484 # separating them. 

2485 str(item) == self.open_bracket and 

2486 (not next_item or str(next_item) != self.close_bracket) and 

2487 (len(self._items) != 3 or not isinstance(next_item, Atom)) 

2488 ): 

2489 reflowed_lines.add_line_break(continued_indent) 

2490 break_after_open_bracket = False 

2491 else: 

2492 next_next_item = get_item(self._items, index + 2) 

2493 if ( 

2494 str(item) not in ['.', '%', 'in'] and 

2495 next_item and not isinstance(next_item, Container) and 

2496 str(next_item) != ':' and 

2497 next_next_item and (not isinstance(next_next_item, Atom) or 

2498 str(next_item) == 'not') and 

2499 not reflowed_lines.line_empty() and 

2500 not reflowed_lines.fits_on_current_line( 

2501 self._get_extent(index + 1) + 2) 

2502 ): 

2503 reflowed_lines.add_line_break(continued_indent) 

2504 

2505 def _get_extent(self, index): 

2506 """The extent of the full element. 

2507 

2508 E.g., the length of a function call or keyword. 

2509 

2510 """ 

2511 extent = 0 

2512 prev_item = get_item(self._items, index - 1) 

2513 seen_dot = prev_item and str(prev_item) == '.' 

2514 while index < len(self._items): 

2515 item = get_item(self._items, index) 

2516 index += 1 

2517 

2518 if isinstance(item, (ListComprehension, IfExpression)): 

2519 break 

2520 

2521 if isinstance(item, Container): 

2522 if prev_item and prev_item.is_name: 

2523 if seen_dot: 

2524 extent += 1 

2525 else: 

2526 extent += item.size 

2527 

2528 prev_item = item 

2529 continue 

2530 elif (str(item) not in ['.', '=', ':', 'not'] and 

2531 not item.is_name and not item.is_string): 

2532 break 

2533 

2534 if str(item) == '.': 

2535 seen_dot = True 

2536 

2537 extent += item.size 

2538 prev_item = item 

2539 

2540 return extent 

2541 

2542 @property 

2543 def is_string(self): 

2544 return False 

2545 

2546 @property 

2547 def size(self): 

2548 return len(self.__repr__()) 

2549 

2550 @property 

2551 def is_keyword(self): 

2552 return False 

2553 

2554 @property 

2555 def is_name(self): 

2556 return False 

2557 

2558 @property 

2559 def is_comma(self): 

2560 return False 

2561 

2562 @property 

2563 def is_colon(self): 

2564 return False 

2565 

2566 @property 

2567 def open_bracket(self): 

2568 return None 

2569 

2570 @property 

2571 def close_bracket(self): 

2572 return None 

2573 

2574 

2575class Tuple(Container): 

2576 

2577 """A high-level representation of a tuple.""" 

2578 

2579 @property 

2580 def open_bracket(self): 

2581 return '(' 

2582 

2583 @property 

2584 def close_bracket(self): 

2585 return ')' 

2586 

2587 

2588class List(Container): 

2589 

2590 """A high-level representation of a list.""" 

2591 

2592 @property 

2593 def open_bracket(self): 

2594 return '[' 

2595 

2596 @property 

2597 def close_bracket(self): 

2598 return ']' 

2599 

2600 

2601class DictOrSet(Container): 

2602 

2603 """A high-level representation of a dictionary or set.""" 

2604 

2605 @property 

2606 def open_bracket(self): 

2607 return '{' 

2608 

2609 @property 

2610 def close_bracket(self): 

2611 return '}' 

2612 

2613 

2614class ListComprehension(Container): 

2615 

2616 """A high-level representation of a list comprehension.""" 

2617 

2618 @property 

2619 def size(self): 

2620 length = 0 

2621 for item in self._items: 

2622 if isinstance(item, IfExpression): 

2623 break 

2624 length += item.size 

2625 return length 

2626 

2627 

2628class IfExpression(Container): 

2629 

2630 """A high-level representation of an if-expression.""" 

2631 

2632 

2633def _parse_container(tokens, index, for_or_if=None): 

2634 """Parse a high-level container, such as a list, tuple, etc.""" 

2635 

2636 # Store the opening bracket. 

2637 items = [Atom(Token(*tokens[index]))] 

2638 index += 1 

2639 

2640 num_tokens = len(tokens) 

2641 while index < num_tokens: 

2642 tok = Token(*tokens[index]) 

2643 

2644 if tok.token_string in ',)]}': 

2645 # First check if we're at the end of a list comprehension or 

2646 # if-expression. Don't add the ending token as part of the list 

2647 # comprehension or if-expression, because they aren't part of those 

2648 # constructs. 

2649 if for_or_if == 'for': 

2650 return (ListComprehension(items), index - 1) 

2651 

2652 elif for_or_if == 'if': 

2653 return (IfExpression(items), index - 1) 

2654 

2655 # We've reached the end of a container. 

2656 items.append(Atom(tok)) 

2657 

2658 # If not, then we are at the end of a container. 

2659 if tok.token_string == ')': 

2660 # The end of a tuple. 

2661 return (Tuple(items), index) 

2662 

2663 elif tok.token_string == ']': 

2664 # The end of a list. 

2665 return (List(items), index) 

2666 

2667 elif tok.token_string == '}': 

2668 # The end of a dictionary or set. 

2669 return (DictOrSet(items), index) 

2670 

2671 elif tok.token_string in '([{': 

2672 # A sub-container is being defined. 

2673 (container, index) = _parse_container(tokens, index) 

2674 items.append(container) 

2675 

2676 elif tok.token_string == 'for': 

2677 (container, index) = _parse_container(tokens, index, 'for') 

2678 items.append(container) 

2679 

2680 elif tok.token_string == 'if': 

2681 (container, index) = _parse_container(tokens, index, 'if') 

2682 items.append(container) 

2683 

2684 else: 

2685 items.append(Atom(tok)) 

2686 

2687 index += 1 

2688 

2689 return (None, None) 

2690 

2691 

2692def _parse_tokens(tokens): 

2693 """Parse the tokens. 

2694 

2695 This converts the tokens into a form where we can manipulate them 

2696 more easily. 

2697 

2698 """ 

2699 

2700 index = 0 

2701 parsed_tokens = [] 

2702 

2703 num_tokens = len(tokens) 

2704 while index < num_tokens: 

2705 tok = Token(*tokens[index]) 

2706 

2707 assert tok.token_type != token.INDENT 

2708 if tok.token_type == tokenize.NEWLINE: 

2709 # There's only one newline and it's at the end. 

2710 break 

2711 

2712 if tok.token_string in '([{': 

2713 (container, index) = _parse_container(tokens, index) 

2714 if not container: 

2715 return None 

2716 parsed_tokens.append(container) 

2717 else: 

2718 parsed_tokens.append(Atom(tok)) 

2719 

2720 index += 1 

2721 

2722 return parsed_tokens 

2723 

2724 

2725def _reflow_lines(parsed_tokens, indentation, max_line_length, 

2726 start_on_prefix_line): 

2727 """Reflow the lines so that it looks nice.""" 

2728 

2729 if str(parsed_tokens[0]) == 'def': 

2730 # A function definition gets indented a bit more. 

2731 continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE 

2732 else: 

2733 continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE 

2734 

2735 break_after_open_bracket = not start_on_prefix_line 

2736 

2737 lines = ReformattedLines(max_line_length) 

2738 lines.add_indent(len(indentation.lstrip('\r\n'))) 

2739 

2740 if not start_on_prefix_line: 

2741 # If splitting after the opening bracket will cause the first element 

2742 # to be aligned weirdly, don't try it. 

2743 first_token = get_item(parsed_tokens, 0) 

2744 second_token = get_item(parsed_tokens, 1) 

2745 

2746 if ( 

2747 first_token and second_token and 

2748 str(second_token)[0] == '(' and 

2749 len(indentation) + len(first_token) + 1 == len(continued_indent) 

2750 ): 

2751 return None 

2752 

2753 for item in parsed_tokens: 

2754 lines.add_space_if_needed(str(item), equal=True) 

2755 

2756 save_continued_indent = continued_indent 

2757 if start_on_prefix_line and isinstance(item, Container): 

2758 start_on_prefix_line = False 

2759 continued_indent = ' ' * (lines.current_size() + 1) 

2760 

2761 item.reflow(lines, continued_indent, break_after_open_bracket) 

2762 continued_indent = save_continued_indent 

2763 

2764 return lines.emit() 

2765 

2766 

2767def _shorten_line_at_tokens_new(tokens, source, indentation, 

2768 max_line_length): 

2769 """Shorten the line taking its length into account. 

2770 

2771 The input is expected to be free of newlines except for inside 

2772 multiline strings and at the end. 

2773 

2774 """ 

2775 # Yield the original source so to see if it's a better choice than the 

2776 # shortened candidate lines we generate here. 

2777 yield indentation + source 

2778 

2779 parsed_tokens = _parse_tokens(tokens) 

2780 

2781 if parsed_tokens: 

2782 # Perform two reflows. The first one starts on the same line as the 

2783 # prefix. The second starts on the line after the prefix. 

2784 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, 

2785 start_on_prefix_line=True) 

2786 if fixed and check_syntax(normalize_multiline(fixed.lstrip())): 

2787 yield fixed 

2788 

2789 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, 

2790 start_on_prefix_line=False) 

2791 if fixed and check_syntax(normalize_multiline(fixed.lstrip())): 

2792 yield fixed 

2793 

2794 

2795def _shorten_line_at_tokens(tokens, source, indentation, indent_word, 

2796 key_token_strings, aggressive): 

2797 """Separate line by breaking at tokens in key_token_strings. 

2798 

2799 The input is expected to be free of newlines except for inside 

2800 multiline strings and at the end. 

2801 

2802 """ 

2803 offsets = [] 

2804 for (index, _t) in enumerate(token_offsets(tokens)): 

2805 (token_type, 

2806 token_string, 

2807 start_offset, 

2808 end_offset) = _t 

2809 

2810 assert token_type != token.INDENT 

2811 

2812 if token_string in key_token_strings: 

2813 # Do not break in containers with zero or one items. 

2814 unwanted_next_token = { 

2815 '(': ')', 

2816 '[': ']', 

2817 '{': '}'}.get(token_string) 

2818 if unwanted_next_token: 

2819 if ( 

2820 get_item(tokens, 

2821 index + 1, 

2822 default=[None, None])[1] == unwanted_next_token or 

2823 get_item(tokens, 

2824 index + 2, 

2825 default=[None, None])[1] == unwanted_next_token 

2826 ): 

2827 continue 

2828 

2829 if ( 

2830 index > 2 and token_string == '(' and 

2831 tokens[index - 1][1] in ',(%[' 

2832 ): 

2833 # Don't split after a tuple start, or before a tuple start if 

2834 # the tuple is in a list. 

2835 continue 

2836 

2837 if end_offset < len(source) - 1: 

2838 # Don't split right before newline. 

2839 offsets.append(end_offset) 

2840 else: 

2841 # Break at adjacent strings. These were probably meant to be on 

2842 # separate lines in the first place. 

2843 previous_token = get_item(tokens, index - 1) 

2844 if ( 

2845 token_type == tokenize.STRING and 

2846 previous_token and previous_token[0] == tokenize.STRING 

2847 ): 

2848 offsets.append(start_offset) 

2849 

2850 current_indent = None 

2851 fixed = None 

2852 for line in split_at_offsets(source, offsets): 

2853 if fixed: 

2854 fixed += '\n' + current_indent + line 

2855 

2856 for symbol in '([{': 

2857 if line.endswith(symbol): 

2858 current_indent += indent_word 

2859 else: 

2860 # First line. 

2861 fixed = line 

2862 assert not current_indent 

2863 current_indent = indent_word 

2864 

2865 assert fixed is not None 

2866 

2867 if check_syntax(normalize_multiline(fixed) 

2868 if aggressive > 1 else fixed): 

2869 return indentation + fixed 

2870 

2871 return None 

2872 

2873 

2874def token_offsets(tokens): 

2875 """Yield tokens and offsets.""" 

2876 end_offset = 0 

2877 previous_end_row = 0 

2878 previous_end_column = 0 

2879 for t in tokens: 

2880 token_type = t[0] 

2881 token_string = t[1] 

2882 (start_row, start_column) = t[2] 

2883 (end_row, end_column) = t[3] 

2884 

2885 # Account for the whitespace between tokens. 

2886 end_offset += start_column 

2887 if previous_end_row == start_row: 

2888 end_offset -= previous_end_column 

2889 

2890 # Record the start offset of the token. 

2891 start_offset = end_offset 

2892 

2893 # Account for the length of the token itself. 

2894 end_offset += len(token_string) 

2895 

2896 yield (token_type, 

2897 token_string, 

2898 start_offset, 

2899 end_offset) 

2900 

2901 previous_end_row = end_row 

2902 previous_end_column = end_column 

2903 

2904 

2905def normalize_multiline(line): 

2906 """Normalize multiline-related code that will cause syntax error. 

2907 

2908 This is for purposes of checking syntax. 

2909 

2910 """ 

2911 if line.startswith('def ') and line.rstrip().endswith(':'): 

2912 return line + ' pass' 

2913 elif line.startswith('return '): 

2914 return 'def _(): ' + line 

2915 elif line.startswith('@'): 

2916 return line + 'def _(): pass' 

2917 elif line.startswith('class '): 

2918 return line + ' pass' 

2919 elif line.startswith(('if ', 'elif ', 'for ', 'while ')): 

2920 return line + ' pass' 

2921 

2922 return line 

2923 

2924 

2925def fix_whitespace(line, offset, replacement): 

2926 """Replace whitespace at offset and return fixed line.""" 

2927 # Replace escaped newlines too 

2928 left = line[:offset].rstrip('\n\r \t\\') 

2929 right = line[offset:].lstrip('\n\r \t\\') 

2930 if right.startswith('#'): 

2931 return line 

2932 

2933 return left + replacement + right 

2934 

2935 

2936def _execute_pep8(pep8_options, source): 

2937 """Execute pycodestyle via python method calls.""" 

2938 class QuietReport(pycodestyle.BaseReport): 

2939 

2940 """Version of checker that does not print.""" 

2941 

2942 def __init__(self, options): 

2943 super(QuietReport, self).__init__(options) 

2944 self.__full_error_results = [] 

2945 

2946 def error(self, line_number, offset, text, check): 

2947 """Collect errors.""" 

2948 code = super(QuietReport, self).error(line_number, 

2949 offset, 

2950 text, 

2951 check) 

2952 if code: 

2953 self.__full_error_results.append( 

2954 {'id': code, 

2955 'line': line_number, 

2956 'column': offset + 1, 

2957 'info': text}) 

2958 

2959 def full_error_results(self): 

2960 """Return error results in detail. 

2961 

2962 Results are in the form of a list of dictionaries. Each 

2963 dictionary contains 'id', 'line', 'column', and 'info'. 

2964 

2965 """ 

2966 return self.__full_error_results 

2967 

2968 checker = pycodestyle.Checker('', lines=source, reporter=QuietReport, 

2969 **pep8_options) 

2970 checker.check_all() 

2971 return checker.report.full_error_results() 

2972 

2973 

2974def _remove_leading_and_normalize(line, with_rstrip=True): 

2975 # ignore FF in first lstrip() 

2976 if with_rstrip: 

2977 return line.lstrip(' \t\v').rstrip(CR + LF) + '\n' 

2978 return line.lstrip(' \t\v') 

2979 

2980 

2981class Reindenter(object): 

2982 

2983 """Reindents badly-indented code to uniformly use four-space indentation. 

2984 

2985 Released to the public domain, by Tim Peters, 03 October 2000. 

2986 

2987 """ 

2988 

2989 def __init__(self, input_text, leave_tabs=False): 

2990 sio = io.StringIO(input_text) 

2991 source_lines = sio.readlines() 

2992 

2993 self.string_content_line_numbers = multiline_string_lines(input_text) 

2994 

2995 # File lines, rstripped & tab-expanded. Dummy at start is so 

2996 # that we can use tokenize's 1-based line numbering easily. 

2997 # Note that a line is all-blank iff it is a newline. 

2998 self.lines = [] 

2999 for line_number, line in enumerate(source_lines, start=1): 

3000 # Do not modify if inside a multiline string. 

3001 if line_number in self.string_content_line_numbers: 

3002 self.lines.append(line) 

3003 else: 

3004 # Only expand leading tabs. 

3005 with_rstrip = line_number != len(source_lines) 

3006 if leave_tabs: 

3007 self.lines.append( 

3008 _get_indentation(line) + 

3009 _remove_leading_and_normalize(line, with_rstrip) 

3010 ) 

3011 else: 

3012 self.lines.append( 

3013 _get_indentation(line).expandtabs() + 

3014 _remove_leading_and_normalize(line, with_rstrip) 

3015 ) 

3016 

3017 self.lines.insert(0, None) 

3018 self.index = 1 # index into self.lines of next line 

3019 self.input_text = input_text 

3020 

3021 def run(self, indent_size=DEFAULT_INDENT_SIZE): 

3022 """Fix indentation and return modified line numbers. 

3023 

3024 Line numbers are indexed at 1. 

3025 

3026 """ 

3027 if indent_size < 1: 

3028 return self.input_text 

3029 

3030 try: 

3031 stats = _reindent_stats(tokenize.generate_tokens(self.getline)) 

3032 except (SyntaxError, tokenize.TokenError): 

3033 return self.input_text 

3034 # Remove trailing empty lines. 

3035 lines = self.lines 

3036 # Sentinel. 

3037 stats.append((len(lines), 0)) 

3038 # Map count of leading spaces to # we want. 

3039 have2want = {} 

3040 # Program after transformation. 

3041 after = [] 

3042 # Copy over initial empty lines -- there's nothing to do until 

3043 # we see a line with *something* on it. 

3044 i = stats[0][0] 

3045 after.extend(lines[1:i]) 

3046 for i in range(len(stats) - 1): 

3047 thisstmt, thislevel = stats[i] 

3048 nextstmt = stats[i + 1][0] 

3049 have = _leading_space_count(lines[thisstmt]) 

3050 want = thislevel * indent_size 

3051 if want < 0: 

3052 # A comment line. 

3053 if have: 

3054 # An indented comment line. If we saw the same 

3055 # indentation before, reuse what it most recently 

3056 # mapped to. 

3057 want = have2want.get(have, -1) 

3058 if want < 0: 

3059 # Then it probably belongs to the next real stmt. 

3060 for j in range(i + 1, len(stats) - 1): 

3061 jline, jlevel = stats[j] 

3062 if jlevel >= 0: 

3063 if have == _leading_space_count(lines[jline]): 

3064 want = jlevel * indent_size 

3065 break 

3066 # Maybe it's a hanging comment like this one, 

3067 if want < 0: 

3068 # in which case we should shift it like its base 

3069 # line got shifted. 

3070 for j in range(i - 1, -1, -1): 

3071 jline, jlevel = stats[j] 

3072 if jlevel >= 0: 

3073 want = (have + _leading_space_count( 

3074 after[jline - 1]) - 

3075 _leading_space_count(lines[jline])) 

3076 break 

3077 if want < 0: 

3078 # Still no luck -- leave it alone. 

3079 want = have 

3080 else: 

3081 want = 0 

3082 assert want >= 0 

3083 have2want[have] = want 

3084 diff = want - have 

3085 if diff == 0 or have == 0: 

3086 after.extend(lines[thisstmt:nextstmt]) 

3087 else: 

3088 for line_number, line in enumerate(lines[thisstmt:nextstmt], 

3089 start=thisstmt): 

3090 if line_number in self.string_content_line_numbers: 

3091 after.append(line) 

3092 elif diff > 0: 

3093 if line == '\n': 

3094 after.append(line) 

3095 else: 

3096 after.append(' ' * diff + line) 

3097 else: 

3098 remove = min(_leading_space_count(line), -diff) 

3099 after.append(line[remove:]) 

3100 

3101 return ''.join(after) 

3102 

3103 def getline(self): 

3104 """Line-getter for tokenize.""" 

3105 if self.index >= len(self.lines): 

3106 line = '' 

3107 else: 

3108 line = self.lines[self.index] 

3109 self.index += 1 

3110 return line 

3111 

3112 

3113def _reindent_stats(tokens): 

3114 """Return list of (lineno, indentlevel) pairs. 

3115 

3116 One for each stmt and comment line. indentlevel is -1 for comment 

3117 lines, as a signal that tokenize doesn't know what to do about them; 

3118 indeed, they're our headache! 

3119 

3120 """ 

3121 find_stmt = 1 # Next token begins a fresh stmt? 

3122 level = 0 # Current indent level. 

3123 stats = [] 

3124 

3125 for t in tokens: 

3126 token_type = t[0] 

3127 sline = t[2][0] 

3128 line = t[4] 

3129 

3130 if token_type == tokenize.NEWLINE: 

3131 # A program statement, or ENDMARKER, will eventually follow, 

3132 # after some (possibly empty) run of tokens of the form 

3133 # (NL | COMMENT)* (INDENT | DEDENT+)? 

3134 find_stmt = 1 

3135 

3136 elif token_type == tokenize.INDENT: 

3137 find_stmt = 1 

3138 level += 1 

3139 

3140 elif token_type == tokenize.DEDENT: 

3141 find_stmt = 1 

3142 level -= 1 

3143 

3144 elif token_type == tokenize.COMMENT: 

3145 if find_stmt: 

3146 stats.append((sline, -1)) 

3147 # But we're still looking for a new stmt, so leave 

3148 # find_stmt alone. 

3149 

3150 elif token_type == tokenize.NL: 

3151 pass 

3152 

3153 elif find_stmt: 

3154 # This is the first "real token" following a NEWLINE, so it 

3155 # must be the first token of the next program statement, or an 

3156 # ENDMARKER. 

3157 find_stmt = 0 

3158 if line: # Not endmarker. 

3159 stats.append((sline, level)) 

3160 

3161 return stats 

3162 

3163 

3164def _leading_space_count(line): 

3165 """Return number of leading spaces in line.""" 

3166 i = 0 

3167 while i < len(line) and line[i] == ' ': 

3168 i += 1 

3169 return i 

3170 

3171 

3172def refactor_with_2to3(source_text, fixer_names, filename=''): 

3173 """Use lib2to3 to refactor the source. 

3174 

3175 Return the refactored source code. 

3176 

3177 """ 

3178 from lib2to3.refactor import RefactoringTool 

3179 fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names] 

3180 tool = RefactoringTool(fixer_names=fixers, explicit=fixers) 

3181 

3182 from lib2to3.pgen2 import tokenize as lib2to3_tokenize 

3183 try: 

3184 # The name parameter is necessary particularly for the "import" fixer. 

3185 return str(tool.refactor_string(source_text, name=filename)) 

3186 except lib2to3_tokenize.TokenError: 

3187 return source_text 

3188 

3189 

3190def check_syntax(code): 

3191 """Return True if syntax is okay.""" 

3192 try: 

3193 return compile(code, '<string>', 'exec', dont_inherit=True) 

3194 except (SyntaxError, TypeError, ValueError): 

3195 return False 

3196 

3197 

3198def find_with_line_numbers(pattern, contents): 

3199 """A wrapper around 're.finditer' to find line numbers. 

3200 

3201 Returns a list of line numbers where pattern was found in contents. 

3202 """ 

3203 matches = list(re.finditer(pattern, contents)) 

3204 if not matches: 

3205 return [] 

3206 

3207 end = matches[-1].start() 

3208 

3209 # -1 so a failed `rfind` maps to the first line. 

3210 newline_offsets = { 

3211 -1: 0 

3212 } 

3213 for line_num, m in enumerate(re.finditer(r'\n', contents), 1): 

3214 offset = m.start() 

3215 if offset > end: 

3216 break 

3217 newline_offsets[offset] = line_num 

3218 

3219 def get_line_num(match, contents): 

3220 """Get the line number of string in a files contents. 

3221 

3222 Failing to find the newline is OK, -1 maps to 0 

3223 

3224 """ 

3225 newline_offset = contents.rfind('\n', 0, match.start()) 

3226 return newline_offsets[newline_offset] 

3227 

3228 return [get_line_num(match, contents) + 1 for match in matches] 

3229 

3230 

3231def get_disabled_ranges(source): 

3232 """Returns a list of tuples representing the disabled ranges. 

3233 

3234 If disabled and no re-enable will disable for rest of file. 

3235 

3236 """ 

3237 enable_line_nums = find_with_line_numbers(ENABLE_REGEX, source) 

3238 disable_line_nums = find_with_line_numbers(DISABLE_REGEX, source) 

3239 total_lines = len(re.findall("\n", source)) + 1 

3240 

3241 enable_commands = {} 

3242 for num in enable_line_nums: 

3243 enable_commands[num] = True 

3244 for num in disable_line_nums: 

3245 enable_commands[num] = False 

3246 

3247 disabled_ranges = [] 

3248 currently_enabled = True 

3249 disabled_start = None 

3250 

3251 for line, commanded_enabled in sorted(enable_commands.items()): 

3252 if commanded_enabled is False and currently_enabled is True: 

3253 disabled_start = line 

3254 currently_enabled = False 

3255 elif commanded_enabled is True and currently_enabled is False: 

3256 disabled_ranges.append((disabled_start, line)) 

3257 currently_enabled = True 

3258 

3259 if currently_enabled is False: 

3260 disabled_ranges.append((disabled_start, total_lines)) 

3261 

3262 return disabled_ranges 

3263 

3264 

3265def filter_disabled_results(result, disabled_ranges): 

3266 """Filter out reports based on tuple of disabled ranges. 

3267 

3268 """ 

3269 line = result['line'] 

3270 for disabled_range in disabled_ranges: 

3271 if disabled_range[0] <= line <= disabled_range[1]: 

3272 return False 

3273 return True 

3274 

3275 

3276def filter_results(source, results, aggressive): 

3277 """Filter out spurious reports from pycodestyle. 

3278 

3279 If aggressive is True, we allow possibly unsafe fixes (E711, E712). 

3280 

3281 """ 

3282 non_docstring_string_line_numbers = multiline_string_lines( 

3283 source, include_docstrings=False) 

3284 all_string_line_numbers = multiline_string_lines( 

3285 source, include_docstrings=True) 

3286 

3287 commented_out_code_line_numbers = commented_out_code_lines(source) 

3288 

3289 # Filter out the disabled ranges 

3290 disabled_ranges = get_disabled_ranges(source) 

3291 if disabled_ranges: 

3292 results = [ 

3293 result for result in results if filter_disabled_results( 

3294 result, 

3295 disabled_ranges, 

3296 ) 

3297 ] 

3298 

3299 has_e901 = any(result['id'].lower() == 'e901' for result in results) 

3300 

3301 for r in results: 

3302 issue_id = r['id'].lower() 

3303 

3304 if r['line'] in non_docstring_string_line_numbers: 

3305 if issue_id.startswith(('e1', 'e501', 'w191')): 

3306 continue 

3307 

3308 if r['line'] in all_string_line_numbers: 

3309 if issue_id in ['e501']: 

3310 continue 

3311 

3312 # We must offset by 1 for lines that contain the trailing contents of 

3313 # multiline strings. 

3314 if not aggressive and (r['line'] + 1) in all_string_line_numbers: 

3315 # Do not modify multiline strings in non-aggressive mode. Remove 

3316 # trailing whitespace could break doctests. 

3317 if issue_id.startswith(('w29', 'w39')): 

3318 continue 

3319 

3320 if aggressive <= 0: 

3321 if issue_id.startswith(('e711', 'e72', 'w6')): 

3322 continue 

3323 

3324 if aggressive <= 1: 

3325 if issue_id.startswith(('e712', 'e713', 'e714')): 

3326 continue 

3327 

3328 if aggressive <= 2: 

3329 if issue_id.startswith(('e704')): 

3330 continue 

3331 

3332 if r['line'] in commented_out_code_line_numbers: 

3333 if issue_id.startswith(('e261', 'e262', 'e501')): 

3334 continue 

3335 

3336 # Do not touch indentation if there is a token error caused by 

3337 # incomplete multi-line statement. Otherwise, we risk screwing up the 

3338 # indentation. 

3339 if has_e901: 

3340 if issue_id.startswith(('e1', 'e7')): 

3341 continue 

3342 

3343 yield r 

3344 

3345 

3346def multiline_string_lines(source, include_docstrings=False): 

3347 """Return line numbers that are within multiline strings. 

3348 

3349 The line numbers are indexed at 1. 

3350 

3351 Docstrings are ignored. 

3352 

3353 """ 

3354 line_numbers = set() 

3355 previous_token_type = '' 

3356 try: 

3357 for t in generate_tokens(source): 

3358 token_type = t[0] 

3359 start_row = t[2][0] 

3360 end_row = t[3][0] 

3361 

3362 if token_type == tokenize.STRING and start_row != end_row: 

3363 if ( 

3364 include_docstrings or 

3365 previous_token_type != tokenize.INDENT 

3366 ): 

3367 # We increment by one since we want the contents of the 

3368 # string. 

3369 line_numbers |= set(range(1 + start_row, 1 + end_row)) 

3370 

3371 previous_token_type = token_type 

3372 except (SyntaxError, tokenize.TokenError): 

3373 pass 

3374 

3375 return line_numbers 

3376 

3377 

3378def commented_out_code_lines(source): 

3379 """Return line numbers of comments that are likely code. 

3380 

3381 Commented-out code is bad practice, but modifying it just adds even 

3382 more clutter. 

3383 

3384 """ 

3385 line_numbers = [] 

3386 try: 

3387 for t in generate_tokens(source): 

3388 token_type = t[0] 

3389 token_string = t[1] 

3390 start_row = t[2][0] 

3391 line = t[4] 

3392 

3393 # Ignore inline comments. 

3394 if not line.lstrip().startswith('#'): 

3395 continue 

3396 

3397 if token_type == tokenize.COMMENT: 

3398 stripped_line = token_string.lstrip('#').strip() 

3399 with warnings.catch_warnings(): 

3400 # ignore SyntaxWarning in Python3.8+ 

3401 # refs: 

3402 # https://bugs.python.org/issue15248 

3403 # https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes 

3404 warnings.filterwarnings("ignore", category=SyntaxWarning) 

3405 if ( 

3406 ' ' in stripped_line and 

3407 '#' not in stripped_line and 

3408 check_syntax(stripped_line) 

3409 ): 

3410 line_numbers.append(start_row) 

3411 except (SyntaxError, tokenize.TokenError): 

3412 pass 

3413 

3414 return line_numbers 

3415 

3416 

3417def shorten_comment(line, max_line_length, last_comment=False): 

3418 """Return trimmed or split long comment line. 

3419 

3420 If there are no comments immediately following it, do a text wrap. 

3421 Doing this wrapping on all comments in general would lead to jagged 

3422 comment text. 

3423 

3424 """ 

3425 assert len(line) > max_line_length 

3426 line = line.rstrip() 

3427 

3428 # PEP 8 recommends 72 characters for comment text. 

3429 indentation = _get_indentation(line) + '# ' 

3430 max_line_length = min(max_line_length, 

3431 len(indentation) + 72) 

3432 

3433 MIN_CHARACTER_REPEAT = 5 

3434 if ( 

3435 len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and 

3436 not line[-1].isalnum() 

3437 ): 

3438 # Trim comments that end with things like --------- 

3439 return line[:max_line_length] + '\n' 

3440 elif last_comment and re.match(r'\s*#+\s*\w+', line): 

3441 split_lines = textwrap.wrap(line.lstrip(' \t#'), 

3442 initial_indent=indentation, 

3443 subsequent_indent=indentation, 

3444 width=max_line_length, 

3445 break_long_words=False, 

3446 break_on_hyphens=False) 

3447 return '\n'.join(split_lines) + '\n' 

3448 

3449 return line + '\n' 

3450 

3451 

3452def normalize_line_endings(lines, newline): 

3453 """Return fixed line endings. 

3454 

3455 All lines will be modified to use the most common line ending. 

3456 """ 

3457 line = [line.rstrip('\n\r') + newline for line in lines] 

3458 if line and lines[-1] == lines[-1].rstrip('\n\r'): 

3459 line[-1] = line[-1].rstrip('\n\r') 

3460 return line 

3461 

3462 

3463def mutual_startswith(a, b): 

3464 return b.startswith(a) or a.startswith(b) 

3465 

3466 

3467def code_match(code, select, ignore): 

3468 if ignore: 

3469 assert not isinstance(ignore, str) 

3470 for ignored_code in [c.strip() for c in ignore]: 

3471 if mutual_startswith(code.lower(), ignored_code.lower()): 

3472 return False 

3473 

3474 if select: 

3475 assert not isinstance(select, str) 

3476 for selected_code in [c.strip() for c in select]: 

3477 if mutual_startswith(code.lower(), selected_code.lower()): 

3478 return True 

3479 return False 

3480 

3481 return True 

3482 

3483 

3484def fix_code(source, options=None, encoding=None, apply_config=False): 

3485 """Return fixed source code. 

3486 

3487 "encoding" will be used to decode "source" if it is a byte string. 

3488 

3489 """ 

3490 options = _get_options(options, apply_config) 

3491 # normalize 

3492 options.ignore = [opt.upper() for opt in options.ignore] 

3493 options.select = [opt.upper() for opt in options.select] 

3494 

3495 # check ignore args 

3496 # NOTE: If W50x is not included, add W50x because the code 

3497 # correction result is indefinite. 

3498 ignore_opt = options.ignore 

3499 if not {"W50", "W503", "W504"} & set(ignore_opt): 

3500 options.ignore.append("W50") 

3501 

3502 if not isinstance(source, str): 

3503 source = source.decode(encoding or get_encoding()) 

3504 

3505 sio = io.StringIO(source) 

3506 return fix_lines(sio.readlines(), options=options) 

3507 

3508 

3509def _get_options(raw_options, apply_config): 

3510 """Return parsed options.""" 

3511 if not raw_options: 

3512 return parse_args([''], apply_config=apply_config) 

3513 

3514 if isinstance(raw_options, dict): 

3515 options = parse_args([''], apply_config=apply_config) 

3516 for name, value in raw_options.items(): 

3517 if not hasattr(options, name): 

3518 raise ValueError("No such option '{}'".format(name)) 

3519 

3520 # Check for very basic type errors. 

3521 expected_type = type(getattr(options, name)) 

3522 if not isinstance(expected_type, (str, )): 

3523 if isinstance(value, (str, )): 

3524 raise ValueError( 

3525 "Option '{}' should not be a string".format(name)) 

3526 setattr(options, name, value) 

3527 else: 

3528 options = raw_options 

3529 

3530 return options 

3531 

3532 

3533def fix_lines(source_lines, options, filename=''): 

3534 """Return fixed source code.""" 

3535 # Transform everything to line feed. Then change them back to original 

3536 # before returning fixed source code. 

3537 original_newline = find_newline(source_lines) 

3538 tmp_source = ''.join(normalize_line_endings(source_lines, '\n')) 

3539 

3540 # Keep a history to break out of cycles. 

3541 previous_hashes = set() 

3542 

3543 if options.line_range: 

3544 # Disable "apply_local_fixes()" for now due to issue #175. 

3545 fixed_source = tmp_source 

3546 else: 

3547 # Apply global fixes only once (for efficiency). 

3548 fixed_source = apply_global_fixes(tmp_source, 

3549 options, 

3550 filename=filename) 

3551 

3552 passes = 0 

3553 long_line_ignore_cache = set() 

3554 while hash(fixed_source) not in previous_hashes: 

3555 if options.pep8_passes >= 0 and passes > options.pep8_passes: 

3556 break 

3557 passes += 1 

3558 

3559 previous_hashes.add(hash(fixed_source)) 

3560 

3561 tmp_source = copy.copy(fixed_source) 

3562 

3563 fix = FixPEP8( 

3564 filename, 

3565 options, 

3566 contents=tmp_source, 

3567 long_line_ignore_cache=long_line_ignore_cache) 

3568 

3569 fixed_source = fix.fix() 

3570 

3571 sio = io.StringIO(fixed_source) 

3572 return ''.join(normalize_line_endings(sio.readlines(), original_newline)) 

3573 

3574 

3575def fix_file(filename, options=None, output=None, apply_config=False): 

3576 if not options: 

3577 options = parse_args([filename], apply_config=apply_config) 

3578 

3579 original_source = readlines_from_file(filename) 

3580 

3581 fixed_source = original_source 

3582 

3583 if options.in_place or options.diff or output: 

3584 encoding = detect_encoding(filename) 

3585 

3586 if output: 

3587 output = LineEndingWrapper(wrap_output(output, encoding=encoding)) 

3588 

3589 fixed_source = fix_lines(fixed_source, options, filename=filename) 

3590 

3591 if options.diff: 

3592 new = io.StringIO(fixed_source) 

3593 new = new.readlines() 

3594 diff = get_diff_text(original_source, new, filename) 

3595 if output: 

3596 output.write(diff) 

3597 output.flush() 

3598 elif options.jobs > 1: 

3599 diff = diff.encode(encoding) 

3600 return diff 

3601 elif options.in_place: 

3602 original = "".join(original_source).splitlines() 

3603 fixed = fixed_source.splitlines() 

3604 original_source_last_line = ( 

3605 original_source[-1].split("\n")[-1] if original_source else "" 

3606 ) 

3607 fixed_source_last_line = fixed_source.split("\n")[-1] 

3608 if original != fixed or ( 

3609 original_source_last_line != fixed_source_last_line 

3610 ): 

3611 with open_with_encoding(filename, 'w', encoding=encoding) as fp: 

3612 fp.write(fixed_source) 

3613 return fixed_source 

3614 return None 

3615 else: 

3616 if output: 

3617 output.write(fixed_source) 

3618 output.flush() 

3619 return fixed_source 

3620 

3621 

3622def global_fixes(): 

3623 """Yield multiple (code, function) tuples.""" 

3624 for function in list(globals().values()): 

3625 if inspect.isfunction(function): 

3626 arguments = _get_parameters(function) 

3627 if arguments[:1] != ['source']: 

3628 continue 

3629 

3630 code = extract_code_from_function(function) 

3631 if code: 

3632 yield (code, function) 

3633 

3634 

3635def _get_parameters(function): 

3636 # pylint: disable=deprecated-method 

3637 if sys.version_info.major >= 3: 

3638 # We need to match "getargspec()", which includes "self" as the first 

3639 # value for methods. 

3640 # https://bugs.python.org/issue17481#msg209469 

3641 if inspect.ismethod(function): 

3642 function = function.__func__ 

3643 

3644 return list(inspect.signature(function).parameters) 

3645 else: 

3646 return inspect.getargspec(function)[0] 

3647 

3648 

3649def apply_global_fixes(source, options, where='global', filename='', 

3650 codes=None): 

3651 """Run global fixes on source code. 

3652 

3653 These are fixes that only need be done once (unlike those in 

3654 FixPEP8, which are dependent on pycodestyle). 

3655 

3656 """ 

3657 if codes is None: 

3658 codes = [] 

3659 if any(code_match(code, select=options.select, ignore=options.ignore) 

3660 for code in ['E101', 'E111']): 

3661 source = reindent( 

3662 source, 

3663 indent_size=options.indent_size, 

3664 leave_tabs=not ( 

3665 code_match( 

3666 'W191', 

3667 select=options.select, 

3668 ignore=options.ignore 

3669 ) 

3670 ) 

3671 ) 

3672 

3673 for (code, function) in global_fixes(): 

3674 if code_match(code, select=options.select, ignore=options.ignore): 

3675 if options.verbose: 

3676 print('---> Applying {} fix for {}'.format(where, 

3677 code.upper()), 

3678 file=sys.stderr) 

3679 source = function(source, 

3680 aggressive=options.aggressive) 

3681 

3682 source = fix_2to3(source, 

3683 aggressive=options.aggressive, 

3684 select=options.select, 

3685 ignore=options.ignore, 

3686 filename=filename, 

3687 where=where, 

3688 verbose=options.verbose) 

3689 

3690 return source 

3691 

3692 

3693def extract_code_from_function(function): 

3694 """Return code handled by function.""" 

3695 if not function.__name__.startswith('fix_'): 

3696 return None 

3697 

3698 code = re.sub('^fix_', '', function.__name__) 

3699 if not code: 

3700 return None 

3701 

3702 try: 

3703 int(code[1:]) 

3704 except ValueError: 

3705 return None 

3706 

3707 return code 

3708 

3709 

3710def _get_package_version(): 

3711 packages = ["pycodestyle: {}".format(pycodestyle.__version__)] 

3712 return ", ".join(packages) 

3713 

3714 

3715def create_parser(): 

3716 """Return command-line parser.""" 

3717 parser = argparse.ArgumentParser(description=docstring_summary(__doc__), 

3718 prog='autopep8') 

3719 parser.add_argument('--version', action='version', 

3720 version='%(prog)s {} ({})'.format( 

3721 __version__, _get_package_version())) 

3722 parser.add_argument('-v', '--verbose', action='count', 

3723 default=0, 

3724 help='print verbose messages; ' 

3725 'multiple -v result in more verbose messages') 

3726 parser.add_argument('-d', '--diff', action='store_true', 

3727 help='print the diff for the fixed source') 

3728 parser.add_argument('-i', '--in-place', action='store_true', 

3729 help='make changes to files in place') 

3730 parser.add_argument('--global-config', metavar='filename', 

3731 default=DEFAULT_CONFIG, 

3732 help='path to a global pep8 config file; if this file ' 

3733 'does not exist then this is ignored ' 

3734 '(default: {})'.format(DEFAULT_CONFIG)) 

3735 parser.add_argument('--ignore-local-config', action='store_true', 

3736 help="don't look for and apply local config files; " 

3737 'if not passed, defaults are updated with any ' 

3738 "config files in the project's root directory") 

3739 parser.add_argument('-r', '--recursive', action='store_true', 

3740 help='run recursively over directories; ' 

3741 'must be used with --in-place or --diff') 

3742 parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1, 

3743 help='number of parallel jobs; ' 

3744 'match CPU count if value is less than 1') 

3745 parser.add_argument('-p', '--pep8-passes', metavar='n', 

3746 default=-1, type=int, 

3747 help='maximum number of additional pep8 passes ' 

3748 '(default: infinite)') 

3749 parser.add_argument('-a', '--aggressive', action='count', default=0, 

3750 help='enable non-whitespace changes; ' 

3751 'multiple -a result in more aggressive changes') 

3752 parser.add_argument('--experimental', action='store_true', 

3753 help='enable experimental fixes') 

3754 parser.add_argument('--exclude', metavar='globs', 

3755 help='exclude file/directory names that match these ' 

3756 'comma-separated globs') 

3757 parser.add_argument('--list-fixes', action='store_true', 

3758 help='list codes for fixes; ' 

3759 'used by --ignore and --select') 

3760 parser.add_argument('--ignore', metavar='errors', default='', 

3761 help='do not fix these errors/warnings ' 

3762 '(default: {})'.format(DEFAULT_IGNORE)) 

3763 parser.add_argument('--select', metavar='errors', default='', 

3764 help='fix only these errors/warnings (e.g. E4,W)') 

3765 parser.add_argument('--max-line-length', metavar='n', default=79, type=int, 

3766 help='set maximum allowed line length ' 

3767 '(default: %(default)s)') 

3768 parser.add_argument('--line-range', '--range', metavar='line', 

3769 default=None, type=int, nargs=2, 

3770 help='only fix errors found within this inclusive ' 

3771 'range of line numbers (e.g. 1 99); ' 

3772 'line numbers are indexed at 1') 

3773 parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE, 

3774 type=int, help=argparse.SUPPRESS) 

3775 parser.add_argument('--hang-closing', action='store_true', 

3776 help='hang-closing option passed to pycodestyle') 

3777 parser.add_argument('--exit-code', action='store_true', 

3778 help='change to behavior of exit code.' 

3779 ' default behavior of return value, 0 is no ' 

3780 'differences, 1 is error exit. return 2 when' 

3781 ' add this option. 2 is exists differences.') 

3782 parser.add_argument('files', nargs='*', 

3783 help="files to format or '-' for standard in") 

3784 

3785 return parser 

3786 

3787 

3788def _expand_codes(codes, ignore_codes): 

3789 """expand to individual E/W codes""" 

3790 ret = set() 

3791 

3792 is_conflict = False 

3793 if all( 

3794 any( 

3795 conflicting_code.startswith(code) 

3796 for code in codes 

3797 ) 

3798 for conflicting_code in CONFLICTING_CODES 

3799 ): 

3800 is_conflict = True 

3801 

3802 is_ignore_w503 = "W503" in ignore_codes 

3803 is_ignore_w504 = "W504" in ignore_codes 

3804 

3805 for code in codes: 

3806 if code == "W": 

3807 if is_ignore_w503 and is_ignore_w504: 

3808 ret.update({"W1", "W2", "W3", "W505", "W6"}) 

3809 elif is_ignore_w503: 

3810 ret.update({"W1", "W2", "W3", "W504", "W505", "W6"}) 

3811 else: 

3812 ret.update({"W1", "W2", "W3", "W503", "W505", "W6"}) 

3813 elif code in ("W5", "W50"): 

3814 if is_ignore_w503 and is_ignore_w504: 

3815 ret.update({"W505"}) 

3816 elif is_ignore_w503: 

3817 ret.update({"W504", "W505"}) 

3818 else: 

3819 ret.update({"W503", "W505"}) 

3820 elif not (code in ("W503", "W504") and is_conflict): 

3821 ret.add(code) 

3822 

3823 return ret 

3824 

3825 

3826def parse_args(arguments, apply_config=False): 

3827 """Parse command-line options.""" 

3828 parser = create_parser() 

3829 args = parser.parse_args(arguments) 

3830 

3831 if not args.files and not args.list_fixes: 

3832 parser.exit(EXIT_CODE_ARGPARSE_ERROR, 'incorrect number of arguments') 

3833 

3834 args.files = [decode_filename(name) for name in args.files] 

3835 

3836 if apply_config: 

3837 parser = read_config(args, parser) 

3838 # prioritize settings when exist pyproject.toml's tool.autopep8 section 

3839 try: 

3840 parser_with_pyproject_toml = read_pyproject_toml(args, parser) 

3841 except Exception: 

3842 parser_with_pyproject_toml = None 

3843 if parser_with_pyproject_toml: 

3844 parser = parser_with_pyproject_toml 

3845 args = parser.parse_args(arguments) 

3846 args.files = [decode_filename(name) for name in args.files] 

3847 

3848 if '-' in args.files: 

3849 if len(args.files) > 1: 

3850 parser.exit( 

3851 EXIT_CODE_ARGPARSE_ERROR, 

3852 'cannot mix stdin and regular files', 

3853 ) 

3854 

3855 if args.diff: 

3856 parser.exit( 

3857 EXIT_CODE_ARGPARSE_ERROR, 

3858 '--diff cannot be used with standard input', 

3859 ) 

3860 

3861 if args.in_place: 

3862 parser.exit( 

3863 EXIT_CODE_ARGPARSE_ERROR, 

3864 '--in-place cannot be used with standard input', 

3865 ) 

3866 

3867 if args.recursive: 

3868 parser.exit( 

3869 EXIT_CODE_ARGPARSE_ERROR, 

3870 '--recursive cannot be used with standard input', 

3871 ) 

3872 

3873 if len(args.files) > 1 and not (args.in_place or args.diff): 

3874 parser.exit( 

3875 EXIT_CODE_ARGPARSE_ERROR, 

3876 'autopep8 only takes one filename as argument ' 

3877 'unless the "--in-place" or "--diff" args are used', 

3878 ) 

3879 

3880 if args.recursive and not (args.in_place or args.diff): 

3881 parser.exit( 

3882 EXIT_CODE_ARGPARSE_ERROR, 

3883 '--recursive must be used with --in-place or --diff', 

3884 ) 

3885 

3886 if args.in_place and args.diff: 

3887 parser.exit( 

3888 EXIT_CODE_ARGPARSE_ERROR, 

3889 '--in-place and --diff are mutually exclusive', 

3890 ) 

3891 

3892 if args.max_line_length <= 0: 

3893 parser.exit( 

3894 EXIT_CODE_ARGPARSE_ERROR, 

3895 '--max-line-length must be greater than 0', 

3896 ) 

3897 

3898 if args.indent_size <= 0: 

3899 parser.exit( 

3900 EXIT_CODE_ARGPARSE_ERROR, 

3901 '--indent-size must be greater than 0', 

3902 ) 

3903 

3904 if args.select: 

3905 args.select = _expand_codes( 

3906 _split_comma_separated(args.select), 

3907 (_split_comma_separated(args.ignore) if args.ignore else []) 

3908 ) 

3909 

3910 if args.ignore: 

3911 args.ignore = _split_comma_separated(args.ignore) 

3912 if all( 

3913 not any( 

3914 conflicting_code.startswith(ignore_code) 

3915 for ignore_code in args.ignore 

3916 ) 

3917 for conflicting_code in CONFLICTING_CODES 

3918 ): 

3919 args.ignore.update(CONFLICTING_CODES) 

3920 elif not args.select: 

3921 if args.aggressive: 

3922 # Enable everything by default if aggressive. 

3923 args.select = {'E', 'W1', 'W2', 'W3', 'W6'} 

3924 else: 

3925 args.ignore = _split_comma_separated(DEFAULT_IGNORE) 

3926 

3927 if args.exclude: 

3928 args.exclude = _split_comma_separated(args.exclude) 

3929 else: 

3930 args.exclude = {} 

3931 

3932 if args.jobs < 1: 

3933 # Do not import multiprocessing globally in case it is not supported 

3934 # on the platform. 

3935 import multiprocessing 

3936 args.jobs = multiprocessing.cpu_count() 

3937 

3938 if args.jobs > 1 and not (args.in_place or args.diff): 

3939 parser.exit( 

3940 EXIT_CODE_ARGPARSE_ERROR, 

3941 'parallel jobs requires --in-place', 

3942 ) 

3943 

3944 if args.line_range: 

3945 if args.line_range[0] <= 0: 

3946 parser.exit( 

3947 EXIT_CODE_ARGPARSE_ERROR, 

3948 '--range must be positive numbers', 

3949 ) 

3950 if args.line_range[0] > args.line_range[1]: 

3951 parser.exit( 

3952 EXIT_CODE_ARGPARSE_ERROR, 

3953 'First value of --range should be less than or equal ' 

3954 'to the second', 

3955 ) 

3956 

3957 return args 

3958 

3959 

3960def _get_normalize_options(args, config, section, option_list): 

3961 for (k, v) in config.items(section): 

3962 norm_opt = k.lstrip('-').replace('-', '_') 

3963 if not option_list.get(norm_opt): 

3964 continue 

3965 opt_type = option_list[norm_opt] 

3966 if opt_type is int: 

3967 if v.strip() == "auto": 

3968 # skip to special case 

3969 if args.verbose: 

3970 print(f"ignore config: {k}={v}") 

3971 continue 

3972 value = config.getint(section, k) 

3973 elif opt_type is bool: 

3974 value = config.getboolean(section, k) 

3975 else: 

3976 value = config.get(section, k) 

3977 yield norm_opt, k, value 

3978 

3979 

3980def read_config(args, parser): 

3981 """Read both user configuration and local configuration.""" 

3982 config = SafeConfigParser() 

3983 

3984 try: 

3985 if args.verbose and os.path.exists(args.global_config): 

3986 print("read config path: {}".format(args.global_config)) 

3987 config.read(args.global_config) 

3988 

3989 if not args.ignore_local_config: 

3990 parent = tail = args.files and os.path.abspath( 

3991 os.path.commonprefix(args.files)) 

3992 while tail: 

3993 if config.read([os.path.join(parent, fn) 

3994 for fn in PROJECT_CONFIG]): 

3995 if args.verbose: 

3996 for fn in PROJECT_CONFIG: 

3997 config_file = os.path.join(parent, fn) 

3998 if not os.path.exists(config_file): 

3999 continue 

4000 print( 

4001 "read config path: {}".format( 

4002 os.path.join(parent, fn) 

4003 ) 

4004 ) 

4005 break 

4006 (parent, tail) = os.path.split(parent) 

4007 

4008 defaults = {} 

4009 option_list = {o.dest: o.type or type(o.default) 

4010 for o in parser._actions} 

4011 

4012 for section in ['pep8', 'pycodestyle', 'flake8']: 

4013 if not config.has_section(section): 

4014 continue 

4015 for norm_opt, k, value in _get_normalize_options( 

4016 args, config, section, option_list 

4017 ): 

4018 if args.verbose: 

4019 print("enable config: section={}, key={}, value={}".format( 

4020 section, k, value)) 

4021 defaults[norm_opt] = value 

4022 

4023 parser.set_defaults(**defaults) 

4024 except Error: 

4025 # Ignore for now. 

4026 pass 

4027 

4028 return parser 

4029 

4030 

4031def read_pyproject_toml(args, parser): 

4032 """Read pyproject.toml and load configuration.""" 

4033 if sys.version_info >= (3, 11): 

4034 import tomllib 

4035 else: 

4036 import tomli as tomllib 

4037 

4038 config = None 

4039 

4040 if os.path.exists(args.global_config): 

4041 with open(args.global_config, "rb") as fp: 

4042 config = tomllib.load(fp) 

4043 

4044 if not args.ignore_local_config: 

4045 parent = tail = args.files and os.path.abspath( 

4046 os.path.commonprefix(args.files)) 

4047 while tail: 

4048 pyproject_toml = os.path.join(parent, "pyproject.toml") 

4049 if os.path.exists(pyproject_toml): 

4050 with open(pyproject_toml, "rb") as fp: 

4051 config = tomllib.load(fp) 

4052 break 

4053 (parent, tail) = os.path.split(parent) 

4054 

4055 if not config: 

4056 return None 

4057 

4058 if config.get("tool", {}).get("autopep8") is None: 

4059 return None 

4060 

4061 config = config.get("tool").get("autopep8") 

4062 

4063 defaults = {} 

4064 option_list = {o.dest: o.type or type(o.default) 

4065 for o in parser._actions} 

4066 

4067 TUPLED_OPTIONS = ("ignore", "select") 

4068 for (k, v) in config.items(): 

4069 norm_opt = k.lstrip('-').replace('-', '_') 

4070 if not option_list.get(norm_opt): 

4071 continue 

4072 if type(v) in (list, tuple) and norm_opt in TUPLED_OPTIONS: 

4073 value = ",".join(v) 

4074 else: 

4075 value = v 

4076 if args.verbose: 

4077 print("enable pyproject.toml config: " 

4078 "key={}, value={}".format(k, value)) 

4079 defaults[norm_opt] = value 

4080 

4081 if defaults: 

4082 # set value when exists key-value in defaults dict 

4083 parser.set_defaults(**defaults) 

4084 

4085 return parser 

4086 

4087 

4088def _split_comma_separated(string): 

4089 """Return a set of strings.""" 

4090 return {text.strip() for text in string.split(',') if text.strip()} 

4091 

4092 

4093def decode_filename(filename): 

4094 """Return Unicode filename.""" 

4095 if isinstance(filename, str): 

4096 return filename 

4097 

4098 return filename.decode(sys.getfilesystemencoding()) 

4099 

4100 

4101def supported_fixes(): 

4102 """Yield pep8 error codes that autopep8 fixes. 

4103 

4104 Each item we yield is a tuple of the code followed by its 

4105 description. 

4106 

4107 """ 

4108 yield ('E101', docstring_summary(reindent.__doc__)) 

4109 

4110 instance = FixPEP8(filename=None, options=None, contents='') 

4111 for attribute in dir(instance): 

4112 code = re.match('fix_([ew][0-9][0-9][0-9])', attribute) 

4113 if code: 

4114 yield ( 

4115 code.group(1).upper(), 

4116 re.sub(r'\s+', ' ', 

4117 docstring_summary(getattr(instance, attribute).__doc__)) 

4118 ) 

4119 

4120 for (code, function) in sorted(global_fixes()): 

4121 yield (code.upper() + (4 - len(code)) * ' ', 

4122 re.sub(r'\s+', ' ', docstring_summary(function.__doc__))) 

4123 

4124 for code in sorted(CODE_TO_2TO3): 

4125 yield (code.upper() + (4 - len(code)) * ' ', 

4126 re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__))) 

4127 

4128 

4129def docstring_summary(docstring): 

4130 """Return summary of docstring.""" 

4131 return docstring.split('\n')[0] if docstring else '' 

4132 

4133 

4134def line_shortening_rank(candidate, indent_word, max_line_length, 

4135 experimental=False): 

4136 """Return rank of candidate. 

4137 

4138 This is for sorting candidates. 

4139 

4140 """ 

4141 if not candidate.strip(): 

4142 return 0 

4143 

4144 rank = 0 

4145 lines = candidate.rstrip().split('\n') 

4146 

4147 offset = 0 

4148 if ( 

4149 not lines[0].lstrip().startswith('#') and 

4150 lines[0].rstrip()[-1] not in '([{' 

4151 ): 

4152 for (opening, closing) in ('()', '[]', '{}'): 

4153 # Don't penalize empty containers that aren't split up. Things like 

4154 # this "foo(\n )" aren't particularly good. 

4155 opening_loc = lines[0].find(opening) 

4156 closing_loc = lines[0].find(closing) 

4157 if opening_loc >= 0: 

4158 if closing_loc < 0 or closing_loc != opening_loc + 1: 

4159 offset = max(offset, 1 + opening_loc) 

4160 

4161 current_longest = max(offset + len(x.strip()) for x in lines) 

4162 

4163 rank += 4 * max(0, current_longest - max_line_length) 

4164 

4165 rank += len(lines) 

4166 

4167 # Too much variation in line length is ugly. 

4168 rank += 2 * standard_deviation(len(line) for line in lines) 

4169 

4170 bad_staring_symbol = { 

4171 '(': ')', 

4172 '[': ']', 

4173 '{': '}'}.get(lines[0][-1]) 

4174 

4175 if len(lines) > 1: 

4176 if ( 

4177 bad_staring_symbol and 

4178 lines[1].lstrip().startswith(bad_staring_symbol) 

4179 ): 

4180 rank += 20 

4181 

4182 for lineno, current_line in enumerate(lines): 

4183 current_line = current_line.strip() 

4184 

4185 if current_line.startswith('#'): 

4186 continue 

4187 

4188 for bad_start in ['.', '%', '+', '-', '/']: 

4189 if current_line.startswith(bad_start): 

4190 rank += 100 

4191 

4192 # Do not tolerate operators on their own line. 

4193 if current_line == bad_start: 

4194 rank += 1000 

4195 

4196 if ( 

4197 current_line.endswith(('.', '%', '+', '-', '/')) and 

4198 "': " in current_line 

4199 ): 

4200 rank += 1000 

4201 

4202 if current_line.endswith(('(', '[', '{', '.')): 

4203 # Avoid lonely opening. They result in longer lines. 

4204 if len(current_line) <= len(indent_word): 

4205 rank += 100 

4206 

4207 # Avoid the ugliness of ", (\n". 

4208 if ( 

4209 current_line.endswith('(') and 

4210 current_line[:-1].rstrip().endswith(',') 

4211 ): 

4212 rank += 100 

4213 

4214 # Avoid the ugliness of "something[\n" and something[index][\n. 

4215 if ( 

4216 current_line.endswith('[') and 

4217 len(current_line) > 1 and 

4218 (current_line[-2].isalnum() or current_line[-2] in ']') 

4219 ): 

4220 rank += 300 

4221 

4222 # Also avoid the ugliness of "foo.\nbar" 

4223 if current_line.endswith('.'): 

4224 rank += 100 

4225 

4226 if has_arithmetic_operator(current_line): 

4227 rank += 100 

4228 

4229 # Avoid breaking at unary operators. 

4230 if re.match(r'.*[(\[{]\s*[\-\+~]$', current_line.rstrip('\\ ')): 

4231 rank += 1000 

4232 

4233 if re.match(r'.*lambda\s*\*$', current_line.rstrip('\\ ')): 

4234 rank += 1000 

4235 

4236 if current_line.endswith(('%', '(', '[', '{')): 

4237 rank -= 20 

4238 

4239 # Try to break list comprehensions at the "for". 

4240 if current_line.startswith('for '): 

4241 rank -= 50 

4242 

4243 if current_line.endswith('\\'): 

4244 # If a line ends in \-newline, it may be part of a 

4245 # multiline string. In that case, we would like to know 

4246 # how long that line is without the \-newline. If it's 

4247 # longer than the maximum, or has comments, then we assume 

4248 # that the \-newline is an okay candidate and only 

4249 # penalize it a bit. 

4250 total_len = len(current_line) 

4251 lineno += 1 

4252 while lineno < len(lines): 

4253 total_len += len(lines[lineno]) 

4254 

4255 if lines[lineno].lstrip().startswith('#'): 

4256 total_len = max_line_length 

4257 break 

4258 

4259 if not lines[lineno].endswith('\\'): 

4260 break 

4261 

4262 lineno += 1 

4263 

4264 if total_len < max_line_length: 

4265 rank += 10 

4266 else: 

4267 rank += 100 if experimental else 1 

4268 

4269 # Prefer breaking at commas rather than colon. 

4270 if ',' in current_line and current_line.endswith(':'): 

4271 rank += 10 

4272 

4273 # Avoid splitting dictionaries between key and value. 

4274 if current_line.endswith(':'): 

4275 rank += 100 

4276 

4277 rank += 10 * count_unbalanced_brackets(current_line) 

4278 

4279 return max(0, rank) 

4280 

4281 

4282def standard_deviation(numbers): 

4283 """Return standard deviation.""" 

4284 numbers = list(numbers) 

4285 if not numbers: 

4286 return 0 

4287 mean = sum(numbers) / len(numbers) 

4288 return (sum((n - mean) ** 2 for n in numbers) / 

4289 len(numbers)) ** .5 

4290 

4291 

4292def has_arithmetic_operator(line): 

4293 """Return True if line contains any arithmetic operators.""" 

4294 for operator in pycodestyle.ARITHMETIC_OP: 

4295 if operator in line: 

4296 return True 

4297 

4298 return False 

4299 

4300 

4301def count_unbalanced_brackets(line): 

4302 """Return number of unmatched open/close brackets.""" 

4303 count = 0 

4304 for opening, closing in ['()', '[]', '{}']: 

4305 count += abs(line.count(opening) - line.count(closing)) 

4306 

4307 return count 

4308 

4309 

4310def split_at_offsets(line, offsets): 

4311 """Split line at offsets. 

4312 

4313 Return list of strings. 

4314 

4315 """ 

4316 result = [] 

4317 

4318 previous_offset = 0 

4319 current_offset = 0 

4320 for current_offset in sorted(offsets): 

4321 if current_offset < len(line) and previous_offset != current_offset: 

4322 result.append(line[previous_offset:current_offset].strip()) 

4323 previous_offset = current_offset 

4324 

4325 result.append(line[current_offset:]) 

4326 

4327 return result 

4328 

4329 

4330class LineEndingWrapper(object): 

4331 

4332 r"""Replace line endings to work with sys.stdout. 

4333 

4334 It seems that sys.stdout expects only '\n' as the line ending, no matter 

4335 the platform. Otherwise, we get repeated line endings. 

4336 

4337 """ 

4338 

4339 def __init__(self, output): 

4340 self.__output = output 

4341 

4342 def write(self, s): 

4343 self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n')) 

4344 

4345 def flush(self): 

4346 self.__output.flush() 

4347 

4348 

4349def match_file(filename, exclude): 

4350 """Return True if file is okay for modifying/recursing.""" 

4351 base_name = os.path.basename(filename) 

4352 

4353 if base_name.startswith('.'): 

4354 return False 

4355 

4356 for pattern in exclude: 

4357 if fnmatch.fnmatch(base_name, pattern): 

4358 return False 

4359 if fnmatch.fnmatch(filename, pattern): 

4360 return False 

4361 

4362 if not os.path.isdir(filename) and not is_python_file(filename): 

4363 return False 

4364 

4365 return True 

4366 

4367 

4368def find_files(filenames, recursive, exclude): 

4369 """Yield filenames.""" 

4370 while filenames: 

4371 name = filenames.pop(0) 

4372 if recursive and os.path.isdir(name): 

4373 for root, directories, children in os.walk(name): 

4374 filenames += [os.path.join(root, f) for f in children 

4375 if match_file(os.path.join(root, f), 

4376 exclude)] 

4377 directories[:] = [d for d in directories 

4378 if match_file(os.path.join(root, d), 

4379 exclude)] 

4380 else: 

4381 is_exclude_match = False 

4382 for pattern in exclude: 

4383 if fnmatch.fnmatch(name, pattern): 

4384 is_exclude_match = True 

4385 break 

4386 if not is_exclude_match: 

4387 yield name 

4388 

4389 

4390def _fix_file(parameters): 

4391 """Helper function for optionally running fix_file() in parallel.""" 

4392 if parameters[1].verbose: 

4393 print('[file:{}]'.format(parameters[0]), file=sys.stderr) 

4394 try: 

4395 return fix_file(*parameters) 

4396 except IOError as error: 

4397 print(str(error), file=sys.stderr) 

4398 raise error 

4399 

4400 

4401def fix_multiple_files(filenames, options, output=None): 

4402 """Fix list of files. 

4403 

4404 Optionally fix files recursively. 

4405 

4406 """ 

4407 results = [] 

4408 filenames = find_files(filenames, options.recursive, options.exclude) 

4409 if options.jobs > 1: 

4410 import multiprocessing 

4411 pool = multiprocessing.Pool(options.jobs) 

4412 rets = [] 

4413 for name in filenames: 

4414 ret = pool.apply_async(_fix_file, ((name, options),)) 

4415 rets.append(ret) 

4416 pool.close() 

4417 pool.join() 

4418 if options.diff: 

4419 for r in rets: 

4420 sys.stdout.write(r.get().decode()) 

4421 sys.stdout.flush() 

4422 results.extend([x.get() for x in rets if x is not None]) 

4423 else: 

4424 for name in filenames: 

4425 ret = _fix_file((name, options, output)) 

4426 if ret is None: 

4427 continue 

4428 if options.diff: 

4429 if ret != '': 

4430 results.append(ret) 

4431 elif options.in_place: 

4432 results.append(ret) 

4433 else: 

4434 original_source = readlines_from_file(name) 

4435 if "".join(original_source).splitlines() != ret.splitlines(): 

4436 results.append(ret) 

4437 return results 

4438 

4439 

4440def is_python_file(filename): 

4441 """Return True if filename is Python file.""" 

4442 if filename.endswith('.py'): 

4443 return True 

4444 

4445 try: 

4446 with open_with_encoding( 

4447 filename, 

4448 limit_byte_check=MAX_PYTHON_FILE_DETECTION_BYTES) as f: 

4449 text = f.read(MAX_PYTHON_FILE_DETECTION_BYTES) 

4450 if not text: 

4451 return False 

4452 first_line = text.splitlines()[0] 

4453 except (IOError, IndexError): 

4454 return False 

4455 

4456 if not PYTHON_SHEBANG_REGEX.match(first_line): 

4457 return False 

4458 

4459 return True 

4460 

4461 

4462def is_probably_part_of_multiline(line): 

4463 """Return True if line is likely part of a multiline string. 

4464 

4465 When multiline strings are involved, pep8 reports the error as being 

4466 at the start of the multiline string, which doesn't work for us. 

4467 

4468 """ 

4469 return ( 

4470 '"""' in line or 

4471 "'''" in line or 

4472 line.rstrip().endswith('\\') 

4473 ) 

4474 

4475 

4476def wrap_output(output, encoding): 

4477 """Return output with specified encoding.""" 

4478 return codecs.getwriter(encoding)(output.buffer 

4479 if hasattr(output, 'buffer') 

4480 else output) 

4481 

4482 

4483def get_encoding(): 

4484 """Return preferred encoding.""" 

4485 return locale.getpreferredencoding() or sys.getdefaultencoding() 

4486 

4487 

4488def main(argv=None, apply_config=True): 

4489 """Command-line entry.""" 

4490 if argv is None: 

4491 argv = sys.argv 

4492 

4493 try: 

4494 # Exit on broken pipe. 

4495 signal.signal(signal.SIGPIPE, signal.SIG_DFL) 

4496 except AttributeError: # pragma: no cover 

4497 # SIGPIPE is not available on Windows. 

4498 pass 

4499 

4500 try: 

4501 args = parse_args(argv[1:], apply_config=apply_config) 

4502 

4503 if args.list_fixes: 

4504 for code, description in sorted(supported_fixes()): 

4505 print('{code} - {description}'.format( 

4506 code=code, description=description)) 

4507 return EXIT_CODE_OK 

4508 

4509 if args.files == ['-']: 

4510 assert not args.in_place 

4511 

4512 encoding = sys.stdin.encoding or get_encoding() 

4513 read_stdin = sys.stdin.read() 

4514 fixed_stdin = fix_code(read_stdin, args, encoding=encoding) 

4515 

4516 # LineEndingWrapper is unnecessary here due to the symmetry between 

4517 # standard in and standard out. 

4518 wrap_output(sys.stdout, encoding=encoding).write(fixed_stdin) 

4519 

4520 if hash(read_stdin) != hash(fixed_stdin): 

4521 if args.exit_code: 

4522 return EXIT_CODE_EXISTS_DIFF 

4523 else: 

4524 if args.in_place or args.diff: 

4525 args.files = list(set(args.files)) 

4526 else: 

4527 assert len(args.files) == 1 

4528 assert not args.recursive 

4529 

4530 results = fix_multiple_files(args.files, args, sys.stdout) 

4531 if args.diff: 

4532 ret = any([len(ret) != 0 for ret in results]) 

4533 else: 

4534 # with in-place option 

4535 ret = any([ret is not None for ret in results]) 

4536 if args.exit_code and ret: 

4537 return EXIT_CODE_EXISTS_DIFF 

4538 except IOError: 

4539 return EXIT_CODE_ERROR 

4540 except KeyboardInterrupt: 

4541 return EXIT_CODE_ERROR # pragma: no cover 

4542 

4543 

4544class CachedTokenizer(object): 

4545 

4546 """A one-element cache around tokenize.generate_tokens(). 

4547 

4548 Original code written by Ned Batchelder, in coverage.py. 

4549 

4550 """ 

4551 

4552 def __init__(self): 

4553 self.last_text = None 

4554 self.last_tokens = None 

4555 

4556 def generate_tokens(self, text): 

4557 """A stand-in for tokenize.generate_tokens().""" 

4558 if text != self.last_text: 

4559 string_io = io.StringIO(text) 

4560 self.last_tokens = list( 

4561 tokenize.generate_tokens(string_io.readline) 

4562 ) 

4563 self.last_text = text 

4564 return self.last_tokens 

4565 

4566 

4567_cached_tokenizer = CachedTokenizer() 

4568generate_tokens = _cached_tokenizer.generate_tokens 

4569 

4570 

4571if __name__ == '__main__': 

4572 sys.exit(main())