Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/autopep8.py: 56%
2398 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:34 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:34 +0000
1#!/usr/bin/env python
3# Copyright (C) 2010-2011 Hideo Hattori
4# Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
5# Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
6#
7# Permission is hereby granted, free of charge, to any person obtaining
8# a copy of this software and associated documentation files (the
9# "Software"), to deal in the Software without restriction, including
10# without limitation the rights to use, copy, modify, merge, publish,
11# distribute, sublicense, and/or sell copies of the Software, and to
12# permit persons to whom the Software is furnished to do so, subject to
13# the following conditions:
14#
15# The above copyright notice and this permission notice shall be
16# included in all copies or substantial portions of the Software.
17#
18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25# SOFTWARE.
27# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
28# Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
29#
30# Permission is hereby granted, free of charge, to any person
31# obtaining a copy of this software and associated documentation files
32# (the "Software"), to deal in the Software without restriction,
33# including without limitation the rights to use, copy, modify, merge,
34# publish, distribute, sublicense, and/or sell copies of the Software,
35# and to permit persons to whom the Software is furnished to do so,
36# subject to the following conditions:
37#
38# The above copyright notice and this permission notice shall be
39# included in all copies or substantial portions of the Software.
40#
41# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
42# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
44# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
45# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
46# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
47# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
48# SOFTWARE.
50"""Automatically formats Python code to conform to the PEP 8 style guide.
52Fixes that only need be done once can be added by adding a function of the form
53"fix_<code>(source)" to this module. They should return the fixed source code.
54These fixes are picked up by apply_global_fixes().
56Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the
57class documentation for more information.
59"""
61from __future__ import absolute_import
62from __future__ import division
63from __future__ import print_function
64from __future__ import unicode_literals
66import argparse
67import codecs
68import collections
69import copy
70import difflib
71import fnmatch
72import inspect
73import io
74import itertools
75import keyword
76import locale
77import os
78import re
79import signal
80import sys
81import textwrap
82import token
83import tokenize
84import warnings
85import ast
86from configparser import ConfigParser as SafeConfigParser, Error
88import pycodestyle
89from pycodestyle import STARTSWITH_INDENT_STATEMENT_REGEX
92__version__ = '2.0.2'
95CR = '\r'
96LF = '\n'
97CRLF = '\r\n'
100PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
101LAMBDA_REGEX = re.compile(r'([\w.]+)\s=\slambda\s*([)(=\w,\s.]*):')
102COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s')
103COMPARE_NEGATIVE_REGEX_THROUGH = re.compile(r'\b(not\s+in|is\s+not)\s')
104BARE_EXCEPT_REGEX = re.compile(r'except\s*:')
105STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\s.*\):')
106DOCSTRING_START_REGEX = re.compile(r'^u?r?(?P<kind>["\']{3})')
107ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on')
108DISABLE_REGEX = re.compile(r'# *(fmt|autopep8): *off')
110EXIT_CODE_OK = 0
111EXIT_CODE_ERROR = 1
112EXIT_CODE_EXISTS_DIFF = 2
113EXIT_CODE_ARGPARSE_ERROR = 99
115# For generating line shortening candidates.
116SHORTEN_OPERATOR_GROUPS = frozenset([
117 frozenset([',']),
118 frozenset(['%']),
119 frozenset([',', '(', '[', '{']),
120 frozenset(['%', '(', '[', '{']),
121 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
122 frozenset(['%', '+', '-', '*', '/', '//']),
123])
126DEFAULT_IGNORE = 'E226,E24,W50,W690' # TODO: use pycodestyle.DEFAULT_IGNORE
127DEFAULT_INDENT_SIZE = 4
128# these fixes conflict with each other, if the `--ignore` setting causes both
129# to be enabled, disable both of them
130CONFLICTING_CODES = ('W503', 'W504')
132# W602 is handled separately due to the need to avoid "with_traceback".
133CODE_TO_2TO3 = {
134 'E231': ['ws_comma'],
135 'E721': ['idioms'],
136 'W690': ['apply',
137 'except',
138 'exitfunc',
139 'numliterals',
140 'operator',
141 'paren',
142 'reduce',
143 'renames',
144 'standarderror',
145 'sys_exc',
146 'throw',
147 'tuple_params',
148 'xreadlines']}
151if sys.platform == 'win32': # pragma: no cover
152 DEFAULT_CONFIG = os.path.expanduser(r'~\.pycodestyle')
153else:
154 DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
155 os.path.expanduser('~/.config'),
156 'pycodestyle')
157# fallback, use .pep8
158if not os.path.exists(DEFAULT_CONFIG): # pragma: no cover
159 if sys.platform == 'win32':
160 DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
161 else:
162 DEFAULT_CONFIG = os.path.join(os.path.expanduser('~/.config'), 'pep8')
163PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8', '.flake8')
166MAX_PYTHON_FILE_DETECTION_BYTES = 1024
169def open_with_encoding(filename, mode='r', encoding=None, limit_byte_check=-1):
170 """Return opened file with a specific encoding."""
171 if not encoding:
172 encoding = detect_encoding(filename, limit_byte_check=limit_byte_check)
174 return io.open(filename, mode=mode, encoding=encoding,
175 newline='') # Preserve line endings
178def detect_encoding(filename, limit_byte_check=-1):
179 """Return file encoding."""
180 try:
181 with open(filename, 'rb') as input_file:
182 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
183 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
185 with open_with_encoding(filename, encoding=encoding) as test_file:
186 test_file.read(limit_byte_check)
188 return encoding
189 except (LookupError, SyntaxError, UnicodeDecodeError):
190 return 'latin-1'
193def readlines_from_file(filename):
194 """Return contents of file."""
195 with open_with_encoding(filename) as input_file:
196 return input_file.readlines()
199def extended_blank_lines(logical_line,
200 blank_lines,
201 blank_before,
202 indent_level,
203 previous_logical):
204 """Check for missing blank lines after class declaration."""
205 if previous_logical.startswith('def '):
206 if blank_lines and pycodestyle.DOCSTRING_REGEX.match(logical_line):
207 yield (0, 'E303 too many blank lines ({})'.format(blank_lines))
208 elif pycodestyle.DOCSTRING_REGEX.match(previous_logical):
209 # Missing blank line between class docstring and method declaration.
210 if (
211 indent_level and
212 not blank_lines and
213 not blank_before and
214 logical_line.startswith(('def ')) and
215 '(self' in logical_line
216 ):
217 yield (0, 'E301 expected 1 blank line, found 0')
220pycodestyle.register_check(extended_blank_lines)
223def continued_indentation(logical_line, tokens, indent_level, hang_closing,
224 indent_char, noqa):
225 """Override pycodestyle's function to provide indentation information."""
226 first_row = tokens[0][2][0]
227 nrows = 1 + tokens[-1][2][0] - first_row
228 if noqa or nrows == 1:
229 return
231 # indent_next tells us whether the next block is indented. Assuming
232 # that it is indented by 4 spaces, then we should not allow 4-space
233 # indents on the final continuation line. In turn, some other
234 # indents are allowed to have an extra 4 spaces.
235 indent_next = logical_line.endswith(':')
237 row = depth = 0
238 valid_hangs = (
239 (DEFAULT_INDENT_SIZE,)
240 if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
241 2 * DEFAULT_INDENT_SIZE)
242 )
244 # Remember how many brackets were opened on each line.
245 parens = [0] * nrows
247 # Relative indents of physical lines.
248 rel_indent = [0] * nrows
250 # For each depth, collect a list of opening rows.
251 open_rows = [[0]]
252 # For each depth, memorize the hanging indentation.
253 hangs = [None]
255 # Visual indents.
256 indent_chances = {}
257 last_indent = tokens[0][2]
258 indent = [last_indent[1]]
260 last_token_multiline = None
261 line = None
262 last_line = ''
263 last_line_begins_with_multiline = False
264 for token_type, text, start, end, line in tokens:
266 newline = row < start[0] - first_row
267 if newline:
268 row = start[0] - first_row
269 newline = (not last_token_multiline and
270 token_type not in (tokenize.NL, tokenize.NEWLINE))
271 last_line_begins_with_multiline = last_token_multiline
273 if newline:
274 # This is the beginning of a continuation line.
275 last_indent = start
277 # Record the initial indent.
278 rel_indent[row] = pycodestyle.expand_indent(line) - indent_level
280 # Identify closing bracket.
281 close_bracket = (token_type == tokenize.OP and text in ']})')
283 # Is the indent relative to an opening bracket line?
284 for open_row in reversed(open_rows[depth]):
285 hang = rel_indent[row] - rel_indent[open_row]
286 hanging_indent = hang in valid_hangs
287 if hanging_indent:
288 break
289 if hangs[depth]:
290 hanging_indent = (hang == hangs[depth])
292 visual_indent = (not close_bracket and hang > 0 and
293 indent_chances.get(start[1]))
295 if close_bracket and indent[depth]:
296 # Closing bracket for visual indent.
297 if start[1] != indent[depth]:
298 yield (start, 'E124 {}'.format(indent[depth]))
299 elif close_bracket and not hang:
300 # closing bracket matches indentation of opening bracket's line
301 if hang_closing:
302 yield (start, 'E133 {}'.format(indent[depth]))
303 elif indent[depth] and start[1] < indent[depth]:
304 if visual_indent is not True:
305 # Visual indent is broken.
306 yield (start, 'E128 {}'.format(indent[depth]))
307 elif (hanging_indent or
308 (indent_next and
309 rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
310 # Hanging indent is verified.
311 if close_bracket and not hang_closing:
312 yield (start, 'E123 {}'.format(indent_level +
313 rel_indent[open_row]))
314 hangs[depth] = hang
315 elif visual_indent is True:
316 # Visual indent is verified.
317 indent[depth] = start[1]
318 elif visual_indent in (text, str):
319 # Ignore token lined up with matching one from a previous line.
320 pass
321 else:
322 one_indented = (indent_level + rel_indent[open_row] +
323 DEFAULT_INDENT_SIZE)
324 # Indent is broken.
325 if hang <= 0:
326 error = ('E122', one_indented)
327 elif indent[depth]:
328 error = ('E127', indent[depth])
329 elif not close_bracket and hangs[depth]:
330 error = ('E131', one_indented)
331 elif hang > DEFAULT_INDENT_SIZE:
332 error = ('E126', one_indented)
333 else:
334 hangs[depth] = hang
335 error = ('E121', one_indented)
337 yield (start, '{} {}'.format(*error))
339 # Look for visual indenting.
340 if (
341 parens[row] and
342 token_type not in (tokenize.NL, tokenize.COMMENT) and
343 not indent[depth]
344 ):
345 indent[depth] = start[1]
346 indent_chances[start[1]] = True
347 # Deal with implicit string concatenation.
348 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
349 text in ('u', 'ur', 'b', 'br')):
350 indent_chances[start[1]] = str
351 # Special case for the "if" statement because len("if (") is equal to
352 # 4.
353 elif not indent_chances and not row and not depth and text == 'if':
354 indent_chances[end[1] + 1] = True
355 elif text == ':' and line[end[1]:].isspace():
356 open_rows[depth].append(row)
358 # Keep track of bracket depth.
359 if token_type == tokenize.OP:
360 if text in '([{':
361 depth += 1
362 indent.append(0)
363 hangs.append(None)
364 if len(open_rows) == depth:
365 open_rows.append([])
366 open_rows[depth].append(row)
367 parens[row] += 1
368 elif text in ')]}' and depth > 0:
369 # Parent indents should not be more than this one.
370 prev_indent = indent.pop() or last_indent[1]
371 hangs.pop()
372 for d in range(depth):
373 if indent[d] > prev_indent:
374 indent[d] = 0
375 for ind in list(indent_chances):
376 if ind >= prev_indent:
377 del indent_chances[ind]
378 del open_rows[depth + 1:]
379 depth -= 1
380 if depth:
381 indent_chances[indent[depth]] = True
382 for idx in range(row, -1, -1):
383 if parens[idx]:
384 parens[idx] -= 1
385 break
386 assert len(indent) == depth + 1
387 if (
388 start[1] not in indent_chances and
389 # This is for purposes of speeding up E121 (GitHub #90).
390 not last_line.rstrip().endswith(',')
391 ):
392 # Allow to line up tokens.
393 indent_chances[start[1]] = text
395 last_token_multiline = (start[0] != end[0])
396 if last_token_multiline:
397 rel_indent[end[0] - first_row] = rel_indent[row]
399 last_line = line
401 if (
402 indent_next and
403 not last_line_begins_with_multiline and
404 pycodestyle.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
405 ):
406 pos = (start[0], indent[0] + 4)
407 desired_indent = indent_level + 2 * DEFAULT_INDENT_SIZE
408 if visual_indent:
409 yield (pos, 'E129 {}'.format(desired_indent))
410 else:
411 yield (pos, 'E125 {}'.format(desired_indent))
414del pycodestyle._checks['logical_line'][pycodestyle.continued_indentation]
415pycodestyle.register_check(continued_indentation)
418class FixPEP8(object):
420 """Fix invalid code.
422 Fixer methods are prefixed "fix_". The _fix_source() method looks for these
423 automatically.
425 The fixer method can take either one or two arguments (in addition to
426 self). The first argument is "result", which is the error information from
427 pycodestyle. The second argument, "logical", is required only for
428 logical-line fixes.
430 The fixer method can return the list of modified lines or None. An empty
431 list would mean that no changes were made. None would mean that only the
432 line reported in the pycodestyle error was modified. Note that the modified
433 line numbers that are returned are indexed at 1. This typically would
434 correspond with the line number reported in the pycodestyle error
435 information.
437 [fixed method list]
438 - e111,e114,e115,e116
439 - e121,e122,e123,e124,e125,e126,e127,e128,e129
440 - e201,e202,e203
441 - e211
442 - e221,e222,e223,e224,e225
443 - e231
444 - e251,e252
445 - e261,e262
446 - e271,e272,e273,e274,e275
447 - e301,e302,e303,e304,e305,e306
448 - e401,e402
449 - e502
450 - e701,e702,e703,e704
451 - e711,e712,e713,e714
452 - e722
453 - e731
454 - w291
455 - w503,504
457 """
459 def __init__(self, filename,
460 options,
461 contents=None,
462 long_line_ignore_cache=None):
463 self.filename = filename
464 if contents is None:
465 self.source = readlines_from_file(filename)
466 else:
467 sio = io.StringIO(contents)
468 self.source = sio.readlines()
469 self.options = options
470 self.indent_word = _get_indentword(''.join(self.source))
472 # collect imports line
473 self.imports = {}
474 for i, line in enumerate(self.source):
475 if (line.find("import ") == 0 or line.find("from ") == 0) and \
476 line not in self.imports:
477 # collect only import statements that first appeared
478 self.imports[line] = i
480 self.long_line_ignore_cache = (
481 set() if long_line_ignore_cache is None
482 else long_line_ignore_cache)
484 # Many fixers are the same even though pycodestyle categorizes them
485 # differently.
486 self.fix_e115 = self.fix_e112
487 self.fix_e121 = self._fix_reindent
488 self.fix_e122 = self._fix_reindent
489 self.fix_e123 = self._fix_reindent
490 self.fix_e124 = self._fix_reindent
491 self.fix_e126 = self._fix_reindent
492 self.fix_e127 = self._fix_reindent
493 self.fix_e128 = self._fix_reindent
494 self.fix_e129 = self._fix_reindent
495 self.fix_e133 = self.fix_e131
496 self.fix_e202 = self.fix_e201
497 self.fix_e203 = self.fix_e201
498 self.fix_e211 = self.fix_e201
499 self.fix_e221 = self.fix_e271
500 self.fix_e222 = self.fix_e271
501 self.fix_e223 = self.fix_e271
502 self.fix_e226 = self.fix_e225
503 self.fix_e227 = self.fix_e225
504 self.fix_e228 = self.fix_e225
505 self.fix_e241 = self.fix_e271
506 self.fix_e242 = self.fix_e224
507 self.fix_e252 = self.fix_e225
508 self.fix_e261 = self.fix_e262
509 self.fix_e272 = self.fix_e271
510 self.fix_e273 = self.fix_e271
511 self.fix_e274 = self.fix_e271
512 self.fix_e275 = self.fix_e271
513 self.fix_e306 = self.fix_e301
514 self.fix_e501 = (
515 self.fix_long_line_logically if
516 options and (options.aggressive >= 2 or options.experimental) else
517 self.fix_long_line_physically)
518 self.fix_e703 = self.fix_e702
519 self.fix_w292 = self.fix_w291
520 self.fix_w293 = self.fix_w291
522 def _fix_source(self, results):
523 try:
524 (logical_start, logical_end) = _find_logical(self.source)
525 logical_support = True
526 except (SyntaxError, tokenize.TokenError): # pragma: no cover
527 logical_support = False
529 completed_lines = set()
530 for result in sorted(results, key=_priority_key):
531 if result['line'] in completed_lines:
532 continue
534 fixed_methodname = 'fix_' + result['id'].lower()
535 if hasattr(self, fixed_methodname):
536 fix = getattr(self, fixed_methodname)
538 line_index = result['line'] - 1
539 original_line = self.source[line_index]
541 is_logical_fix = len(_get_parameters(fix)) > 2
542 if is_logical_fix:
543 logical = None
544 if logical_support:
545 logical = _get_logical(self.source,
546 result,
547 logical_start,
548 logical_end)
549 if logical and set(range(
550 logical[0][0] + 1,
551 logical[1][0] + 1)).intersection(
552 completed_lines):
553 continue
555 modified_lines = fix(result, logical)
556 else:
557 modified_lines = fix(result)
559 if modified_lines is None:
560 # Force logical fixes to report what they modified.
561 assert not is_logical_fix
563 if self.source[line_index] == original_line:
564 modified_lines = []
566 if modified_lines:
567 completed_lines.update(modified_lines)
568 elif modified_lines == []: # Empty list means no fix
569 if self.options.verbose >= 2:
570 print(
571 '---> Not fixing {error} on line {line}'.format(
572 error=result['id'], line=result['line']),
573 file=sys.stderr)
574 else: # We assume one-line fix when None.
575 completed_lines.add(result['line'])
576 else:
577 if self.options.verbose >= 3:
578 print(
579 "---> '{}' is not defined.".format(fixed_methodname),
580 file=sys.stderr)
582 info = result['info'].strip()
583 print('---> {}:{}:{}:{}'.format(self.filename,
584 result['line'],
585 result['column'],
586 info),
587 file=sys.stderr)
589 def fix(self):
590 """Return a version of the source code with PEP 8 violations fixed."""
591 pep8_options = {
592 'ignore': self.options.ignore,
593 'select': self.options.select,
594 'max_line_length': self.options.max_line_length,
595 'hang_closing': self.options.hang_closing,
596 }
597 results = _execute_pep8(pep8_options, self.source)
599 if self.options.verbose:
600 progress = {}
601 for r in results:
602 if r['id'] not in progress:
603 progress[r['id']] = set()
604 progress[r['id']].add(r['line'])
605 print('---> {n} issue(s) to fix {progress}'.format(
606 n=len(results), progress=progress), file=sys.stderr)
608 if self.options.line_range:
609 start, end = self.options.line_range
610 results = [r for r in results
611 if start <= r['line'] <= end]
613 self._fix_source(filter_results(source=''.join(self.source),
614 results=results,
615 aggressive=self.options.aggressive))
617 if self.options.line_range:
618 # If number of lines has changed then change line_range.
619 count = sum(sline.count('\n')
620 for sline in self.source[start - 1:end])
621 self.options.line_range[1] = start + count - 1
623 return ''.join(self.source)
625 def _fix_reindent(self, result):
626 """Fix a badly indented line.
628 This is done by adding or removing from its initial indent only.
630 """
631 num_indent_spaces = int(result['info'].split()[1])
632 line_index = result['line'] - 1
633 target = self.source[line_index]
635 self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
637 def fix_e112(self, result):
638 """Fix under-indented comments."""
639 line_index = result['line'] - 1
640 target = self.source[line_index]
642 if not target.lstrip().startswith('#'):
643 # Don't screw with invalid syntax.
644 return []
646 self.source[line_index] = self.indent_word + target
648 def fix_e113(self, result):
649 """Fix unexpected indentation."""
650 line_index = result['line'] - 1
651 target = self.source[line_index]
652 indent = _get_indentation(target)
653 stripped = target.lstrip()
654 self.source[line_index] = indent[1:] + stripped
656 def fix_e116(self, result):
657 """Fix over-indented comments."""
658 line_index = result['line'] - 1
659 target = self.source[line_index]
661 indent = _get_indentation(target)
662 stripped = target.lstrip()
664 if not stripped.startswith('#'):
665 # Don't screw with invalid syntax.
666 return []
668 self.source[line_index] = indent[1:] + stripped
670 def fix_e117(self, result):
671 """Fix over-indented."""
672 line_index = result['line'] - 1
673 target = self.source[line_index]
675 indent = _get_indentation(target)
676 if indent == '\t':
677 return []
679 stripped = target.lstrip()
681 self.source[line_index] = indent[1:] + stripped
683 def fix_e125(self, result):
684 """Fix indentation undistinguish from the next logical line."""
685 num_indent_spaces = int(result['info'].split()[1])
686 line_index = result['line'] - 1
687 target = self.source[line_index]
689 spaces_to_add = num_indent_spaces - len(_get_indentation(target))
690 indent = len(_get_indentation(target))
691 modified_lines = []
693 while len(_get_indentation(self.source[line_index])) >= indent:
694 self.source[line_index] = (' ' * spaces_to_add +
695 self.source[line_index])
696 modified_lines.append(1 + line_index) # Line indexed at 1.
697 line_index -= 1
699 return modified_lines
701 def fix_e131(self, result):
702 """Fix indentation undistinguish from the next logical line."""
703 num_indent_spaces = int(result['info'].split()[1])
704 line_index = result['line'] - 1
705 target = self.source[line_index]
707 spaces_to_add = num_indent_spaces - len(_get_indentation(target))
709 indent_length = len(_get_indentation(target))
710 spaces_to_add = num_indent_spaces - indent_length
711 if num_indent_spaces == 0 and indent_length == 0:
712 spaces_to_add = 4
714 if spaces_to_add >= 0:
715 self.source[line_index] = (' ' * spaces_to_add +
716 self.source[line_index])
717 else:
718 offset = abs(spaces_to_add)
719 self.source[line_index] = self.source[line_index][offset:]
721 def fix_e201(self, result):
722 """Remove extraneous whitespace."""
723 line_index = result['line'] - 1
724 target = self.source[line_index]
725 offset = result['column'] - 1
727 fixed = fix_whitespace(target,
728 offset=offset,
729 replacement='')
731 self.source[line_index] = fixed
733 def fix_e224(self, result):
734 """Remove extraneous whitespace around operator."""
735 target = self.source[result['line'] - 1]
736 offset = result['column'] - 1
737 fixed = target[:offset] + target[offset:].replace('\t', ' ')
738 self.source[result['line'] - 1] = fixed
740 def fix_e225(self, result):
741 """Fix missing whitespace around operator."""
742 target = self.source[result['line'] - 1]
743 offset = result['column'] - 1
744 fixed = target[:offset] + ' ' + target[offset:]
746 # Only proceed if non-whitespace characters match.
747 # And make sure we don't break the indentation.
748 if (
749 fixed.replace(' ', '') == target.replace(' ', '') and
750 _get_indentation(fixed) == _get_indentation(target)
751 ):
752 self.source[result['line'] - 1] = fixed
753 error_code = result.get('id', 0)
754 try:
755 ts = generate_tokens(fixed)
756 except (SyntaxError, tokenize.TokenError):
757 return
758 if not check_syntax(fixed.lstrip()):
759 return
760 errors = list(
761 pycodestyle.missing_whitespace_around_operator(fixed, ts))
762 for e in reversed(errors):
763 if error_code != e[1].split()[0]:
764 continue
765 offset = e[0][1]
766 fixed = fixed[:offset] + ' ' + fixed[offset:]
767 self.source[result['line'] - 1] = fixed
768 else:
769 return []
771 def fix_e231(self, result):
772 """Add missing whitespace."""
773 line_index = result['line'] - 1
774 target = self.source[line_index]
775 offset = result['column']
776 fixed = target[:offset].rstrip() + ' ' + target[offset:].lstrip()
777 self.source[line_index] = fixed
779 def fix_e251(self, result):
780 """Remove whitespace around parameter '=' sign."""
781 line_index = result['line'] - 1
782 target = self.source[line_index]
784 # This is necessary since pycodestyle sometimes reports columns that
785 # goes past the end of the physical line. This happens in cases like,
786 # foo(bar\n=None)
787 c = min(result['column'] - 1,
788 len(target) - 1)
790 if target[c].strip():
791 fixed = target
792 else:
793 fixed = target[:c].rstrip() + target[c:].lstrip()
795 # There could be an escaped newline
796 #
797 # def foo(a=\
798 # 1)
799 if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
800 self.source[line_index] = fixed.rstrip('\n\r \t\\')
801 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
802 return [line_index + 1, line_index + 2] # Line indexed at 1
804 self.source[result['line'] - 1] = fixed
806 def fix_e262(self, result):
807 """Fix spacing after inline comment hash."""
808 target = self.source[result['line'] - 1]
809 offset = result['column']
811 code = target[:offset].rstrip(' \t#')
812 comment = target[offset:].lstrip(' \t#')
814 fixed = code + (' # ' + comment if comment.strip() else '\n')
816 self.source[result['line'] - 1] = fixed
818 def fix_e265(self, result):
819 """Fix spacing after block comment hash."""
820 target = self.source[result['line'] - 1]
822 indent = _get_indentation(target)
823 line = target.lstrip(' \t')
824 pos = next((index for index, c in enumerate(line) if c != '#'))
825 hashes = line[:pos]
826 comment = line[pos:].lstrip(' \t')
828 # Ignore special comments, even in the middle of the file.
829 if comment.startswith('!'):
830 return
832 fixed = indent + hashes + (' ' + comment if comment.strip() else '\n')
834 self.source[result['line'] - 1] = fixed
836 def fix_e266(self, result):
837 """Fix too many block comment hashes."""
838 target = self.source[result['line'] - 1]
840 # Leave stylistic outlined blocks alone.
841 if target.strip().endswith('#'):
842 return
844 indentation = _get_indentation(target)
845 fixed = indentation + '# ' + target.lstrip('# \t')
847 self.source[result['line'] - 1] = fixed
849 def fix_e271(self, result):
850 """Fix extraneous whitespace around keywords."""
851 line_index = result['line'] - 1
852 target = self.source[line_index]
853 offset = result['column'] - 1
855 fixed = fix_whitespace(target,
856 offset=offset,
857 replacement=' ')
859 if fixed == target:
860 return []
861 else:
862 self.source[line_index] = fixed
864 def fix_e301(self, result):
865 """Add missing blank line."""
866 cr = '\n'
867 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
869 def fix_e302(self, result):
870 """Add missing 2 blank lines."""
871 add_linenum = 2 - int(result['info'].split()[-1])
872 offset = 1
873 if self.source[result['line'] - 2].strip() == "\\":
874 offset = 2
875 cr = '\n' * add_linenum
876 self.source[result['line'] - offset] = (
877 cr + self.source[result['line'] - offset]
878 )
880 def fix_e303(self, result):
881 """Remove extra blank lines."""
882 delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
883 delete_linenum = max(1, delete_linenum)
885 # We need to count because pycodestyle reports an offset line number if
886 # there are comments.
887 cnt = 0
888 line = result['line'] - 2
889 modified_lines = []
890 while cnt < delete_linenum and line >= 0:
891 if not self.source[line].strip():
892 self.source[line] = ''
893 modified_lines.append(1 + line) # Line indexed at 1
894 cnt += 1
895 line -= 1
897 return modified_lines
899 def fix_e304(self, result):
900 """Remove blank line following function decorator."""
901 line = result['line'] - 2
902 if not self.source[line].strip():
903 self.source[line] = ''
905 def fix_e305(self, result):
906 """Add missing 2 blank lines after end of function or class."""
907 add_delete_linenum = 2 - int(result['info'].split()[-1])
908 cnt = 0
909 offset = result['line'] - 2
910 modified_lines = []
911 if add_delete_linenum < 0:
912 # delete cr
913 add_delete_linenum = abs(add_delete_linenum)
914 while cnt < add_delete_linenum and offset >= 0:
915 if not self.source[offset].strip():
916 self.source[offset] = ''
917 modified_lines.append(1 + offset) # Line indexed at 1
918 cnt += 1
919 offset -= 1
920 else:
921 # add cr
922 cr = '\n'
923 # check comment line
924 while True:
925 if offset < 0:
926 break
927 line = self.source[offset].lstrip()
928 if not line:
929 break
930 if line[0] != '#':
931 break
932 offset -= 1
933 offset += 1
934 self.source[offset] = cr + self.source[offset]
935 modified_lines.append(1 + offset) # Line indexed at 1.
936 return modified_lines
938 def fix_e401(self, result):
939 """Put imports on separate lines."""
940 line_index = result['line'] - 1
941 target = self.source[line_index]
942 offset = result['column'] - 1
944 if not target.lstrip().startswith('import'):
945 return []
947 indentation = re.split(pattern=r'\bimport\b',
948 string=target, maxsplit=1)[0]
949 fixed = (target[:offset].rstrip('\t ,') + '\n' +
950 indentation + 'import ' + target[offset:].lstrip('\t ,'))
951 self.source[line_index] = fixed
953 def fix_e402(self, result):
954 (line_index, offset, target) = get_index_offset_contents(result,
955 self.source)
956 for i in range(1, 100):
957 line = "".join(self.source[line_index:line_index+i])
958 try:
959 generate_tokens("".join(line))
960 except (SyntaxError, tokenize.TokenError):
961 continue
962 break
963 if not (target in self.imports and self.imports[target] != line_index):
964 mod_offset = get_module_imports_on_top_of_file(self.source,
965 line_index)
966 self.source[mod_offset] = line + self.source[mod_offset]
967 for offset in range(i):
968 self.source[line_index+offset] = ''
970 def fix_long_line_logically(self, result, logical):
971 """Try to make lines fit within --max-line-length characters."""
972 if (
973 not logical or
974 len(logical[2]) == 1 or
975 self.source[result['line'] - 1].lstrip().startswith('#')
976 ):
977 return self.fix_long_line_physically(result)
979 start_line_index = logical[0][0]
980 end_line_index = logical[1][0]
981 logical_lines = logical[2]
983 previous_line = get_item(self.source, start_line_index - 1, default='')
984 next_line = get_item(self.source, end_line_index + 1, default='')
986 single_line = join_logical_line(''.join(logical_lines))
988 try:
989 fixed = self.fix_long_line(
990 target=single_line,
991 previous_line=previous_line,
992 next_line=next_line,
993 original=''.join(logical_lines))
994 except (SyntaxError, tokenize.TokenError):
995 return self.fix_long_line_physically(result)
997 if fixed:
998 for line_index in range(start_line_index, end_line_index + 1):
999 self.source[line_index] = ''
1000 self.source[start_line_index] = fixed
1001 return range(start_line_index + 1, end_line_index + 1)
1003 return []
1005 def fix_long_line_physically(self, result):
1006 """Try to make lines fit within --max-line-length characters."""
1007 line_index = result['line'] - 1
1008 target = self.source[line_index]
1010 previous_line = get_item(self.source, line_index - 1, default='')
1011 next_line = get_item(self.source, line_index + 1, default='')
1013 try:
1014 fixed = self.fix_long_line(
1015 target=target,
1016 previous_line=previous_line,
1017 next_line=next_line,
1018 original=target)
1019 except (SyntaxError, tokenize.TokenError):
1020 return []
1022 if fixed:
1023 self.source[line_index] = fixed
1024 return [line_index + 1]
1026 return []
1028 def fix_long_line(self, target, previous_line,
1029 next_line, original):
1030 cache_entry = (target, previous_line, next_line)
1031 if cache_entry in self.long_line_ignore_cache:
1032 return []
1034 if target.lstrip().startswith('#'):
1035 if self.options.aggressive:
1036 # Wrap commented lines.
1037 return shorten_comment(
1038 line=target,
1039 max_line_length=self.options.max_line_length,
1040 last_comment=not next_line.lstrip().startswith('#'))
1041 return []
1043 fixed = get_fixed_long_line(
1044 target=target,
1045 previous_line=previous_line,
1046 original=original,
1047 indent_word=self.indent_word,
1048 max_line_length=self.options.max_line_length,
1049 aggressive=self.options.aggressive,
1050 experimental=self.options.experimental,
1051 verbose=self.options.verbose)
1053 if fixed and not code_almost_equal(original, fixed):
1054 return fixed
1056 self.long_line_ignore_cache.add(cache_entry)
1057 return None
1059 def fix_e502(self, result):
1060 """Remove extraneous escape of newline."""
1061 (line_index, _, target) = get_index_offset_contents(result,
1062 self.source)
1063 self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
1065 def fix_e701(self, result):
1066 """Put colon-separated compound statement on separate lines."""
1067 line_index = result['line'] - 1
1068 target = self.source[line_index]
1069 c = result['column']
1071 fixed_source = (target[:c] + '\n' +
1072 _get_indentation(target) + self.indent_word +
1073 target[c:].lstrip('\n\r \t\\'))
1074 self.source[result['line'] - 1] = fixed_source
1075 return [result['line'], result['line'] + 1]
1077 def fix_e702(self, result, logical):
1078 """Put semicolon-separated compound statement on separate lines."""
1079 if not logical:
1080 return [] # pragma: no cover
1081 logical_lines = logical[2]
1083 # Avoid applying this when indented.
1084 # https://docs.python.org/reference/compound_stmts.html
1085 for line in logical_lines:
1086 if (result['id'] == 'E702' and ':' in line
1087 and STARTSWITH_INDENT_STATEMENT_REGEX.match(line)):
1088 if self.options.verbose:
1089 print(
1090 '---> avoid fixing {error} with '
1091 'other compound statements'.format(error=result['id']),
1092 file=sys.stderr
1093 )
1094 return []
1096 line_index = result['line'] - 1
1097 target = self.source[line_index]
1099 if target.rstrip().endswith('\\'):
1100 # Normalize '1; \\\n2' into '1; 2'.
1101 self.source[line_index] = target.rstrip('\n \r\t\\')
1102 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
1103 return [line_index + 1, line_index + 2]
1105 if target.rstrip().endswith(';'):
1106 self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
1107 return [line_index + 1]
1109 offset = result['column'] - 1
1110 first = target[:offset].rstrip(';').rstrip()
1111 second = (_get_indentation(logical_lines[0]) +
1112 target[offset:].lstrip(';').lstrip())
1114 # Find inline comment.
1115 inline_comment = None
1116 if target[offset:].lstrip(';').lstrip()[:2] == '# ':
1117 inline_comment = target[offset:].lstrip(';')
1119 if inline_comment:
1120 self.source[line_index] = first + inline_comment
1121 else:
1122 self.source[line_index] = first + '\n' + second
1123 return [line_index + 1]
1125 def fix_e704(self, result):
1126 """Fix multiple statements on one line def"""
1127 (line_index, _, target) = get_index_offset_contents(result,
1128 self.source)
1129 match = STARTSWITH_DEF_REGEX.match(target)
1130 if match:
1131 self.source[line_index] = '{}\n{}{}'.format(
1132 match.group(0),
1133 _get_indentation(target) + self.indent_word,
1134 target[match.end(0):].lstrip())
1136 def fix_e711(self, result):
1137 """Fix comparison with None."""
1138 (line_index, offset, target) = get_index_offset_contents(result,
1139 self.source)
1141 right_offset = offset + 2
1142 if right_offset >= len(target):
1143 return []
1145 left = target[:offset].rstrip()
1146 center = target[offset:right_offset]
1147 right = target[right_offset:].lstrip()
1149 if center.strip() == '==':
1150 new_center = 'is'
1151 elif center.strip() == '!=':
1152 new_center = 'is not'
1153 else:
1154 return []
1156 self.source[line_index] = ' '.join([left, new_center, right])
1158 def fix_e712(self, result):
1159 """Fix (trivial case of) comparison with boolean."""
1160 (line_index, offset, target) = get_index_offset_contents(result,
1161 self.source)
1163 # Handle very easy "not" special cases.
1164 if re.match(r'^\s*if [\w."\'\[\]]+ == False:$', target):
1165 self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) == False:',
1166 r'if not \1:', target, count=1)
1167 elif re.match(r'^\s*if [\w."\'\[\]]+ != True:$', target):
1168 self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) != True:',
1169 r'if not \1:', target, count=1)
1170 else:
1171 right_offset = offset + 2
1172 if right_offset >= len(target):
1173 return []
1175 left = target[:offset].rstrip()
1176 center = target[offset:right_offset]
1177 right = target[right_offset:].lstrip()
1179 # Handle simple cases only.
1180 new_right = None
1181 if center.strip() == '==':
1182 if re.match(r'\bTrue\b', right):
1183 new_right = re.sub(r'\bTrue\b *', '', right, count=1)
1184 elif center.strip() == '!=':
1185 if re.match(r'\bFalse\b', right):
1186 new_right = re.sub(r'\bFalse\b *', '', right, count=1)
1188 if new_right is None:
1189 return []
1191 if new_right[0].isalnum():
1192 new_right = ' ' + new_right
1194 self.source[line_index] = left + new_right
1196 def fix_e713(self, result):
1197 """Fix (trivial case of) non-membership check."""
1198 (line_index, offset, target) = get_index_offset_contents(result,
1199 self.source)
1201 # to convert once 'not in' -> 'in'
1202 before_target = target[:offset]
1203 target = target[offset:]
1204 match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1205 notin_pos_start, notin_pos_end = 0, 0
1206 if match_notin:
1207 notin_pos_start = match_notin.start(1)
1208 notin_pos_end = match_notin.end()
1209 target = '{}{} {}'.format(
1210 target[:notin_pos_start], 'in', target[notin_pos_end:])
1212 # fix 'not in'
1213 match = COMPARE_NEGATIVE_REGEX.search(target)
1214 if match:
1215 if match.group(3) == 'in':
1216 pos_start = match.start(1)
1217 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1218 target[:pos_start], match.group(2), match.group(1),
1219 match.group(3), target[match.end():], before_target)
1220 if match_notin:
1221 # revert 'in' -> 'not in'
1222 pos_start = notin_pos_start + offset
1223 pos_end = notin_pos_end + offset - 4 # len('not ')
1224 new_target = '{}{} {}'.format(
1225 new_target[:pos_start], 'not in', new_target[pos_end:])
1226 self.source[line_index] = new_target
1228 def fix_e714(self, result):
1229 """Fix object identity should be 'is not' case."""
1230 (line_index, offset, target) = get_index_offset_contents(result,
1231 self.source)
1233 # to convert once 'is not' -> 'is'
1234 before_target = target[:offset]
1235 target = target[offset:]
1236 match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1237 isnot_pos_start, isnot_pos_end = 0, 0
1238 if match_isnot:
1239 isnot_pos_start = match_isnot.start(1)
1240 isnot_pos_end = match_isnot.end()
1241 target = '{}{} {}'.format(
1242 target[:isnot_pos_start], 'in', target[isnot_pos_end:])
1244 match = COMPARE_NEGATIVE_REGEX.search(target)
1245 if match:
1246 if match.group(3).startswith('is'):
1247 pos_start = match.start(1)
1248 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1249 target[:pos_start], match.group(2), match.group(3),
1250 match.group(1), target[match.end():], before_target)
1251 if match_isnot:
1252 # revert 'is' -> 'is not'
1253 pos_start = isnot_pos_start + offset
1254 pos_end = isnot_pos_end + offset - 4 # len('not ')
1255 new_target = '{}{} {}'.format(
1256 new_target[:pos_start], 'is not', new_target[pos_end:])
1257 self.source[line_index] = new_target
1259 def fix_e722(self, result):
1260 """fix bare except"""
1261 (line_index, _, target) = get_index_offset_contents(result,
1262 self.source)
1263 match = BARE_EXCEPT_REGEX.search(target)
1264 if match:
1265 self.source[line_index] = '{}{}{}'.format(
1266 target[:result['column'] - 1], "except BaseException:",
1267 target[match.end():])
1269 def fix_e731(self, result):
1270 """Fix do not assign a lambda expression check."""
1271 (line_index, _, target) = get_index_offset_contents(result,
1272 self.source)
1273 match = LAMBDA_REGEX.search(target)
1274 if match:
1275 end = match.end()
1276 self.source[line_index] = '{}def {}({}): return {}'.format(
1277 target[:match.start(0)], match.group(1), match.group(2),
1278 target[end:].lstrip())
1280 def fix_w291(self, result):
1281 """Remove trailing whitespace."""
1282 fixed_line = self.source[result['line'] - 1].rstrip()
1283 self.source[result['line'] - 1] = fixed_line + '\n'
1285 def fix_w391(self, _):
1286 """Remove trailing blank lines."""
1287 blank_count = 0
1288 for line in reversed(self.source):
1289 line = line.rstrip()
1290 if line:
1291 break
1292 else:
1293 blank_count += 1
1295 original_length = len(self.source)
1296 self.source = self.source[:original_length - blank_count]
1297 return range(1, 1 + original_length)
1299 def fix_w503(self, result):
1300 (line_index, _, target) = get_index_offset_contents(result,
1301 self.source)
1302 one_string_token = target.split()[0]
1303 try:
1304 ts = generate_tokens(one_string_token)
1305 except (SyntaxError, tokenize.TokenError):
1306 return
1307 if not _is_binary_operator(ts[0][0], one_string_token):
1308 return
1309 # find comment
1310 comment_index = 0
1311 found_not_comment_only_line = False
1312 comment_only_linenum = 0
1313 for i in range(5):
1314 # NOTE: try to parse code in 5 times
1315 if (line_index - i) < 0:
1316 break
1317 from_index = line_index - i - 1
1318 if from_index < 0 or len(self.source) <= from_index:
1319 break
1320 to_index = line_index + 1
1321 strip_line = self.source[from_index].lstrip()
1322 if (
1323 not found_not_comment_only_line and
1324 strip_line and strip_line[0] == '#'
1325 ):
1326 comment_only_linenum += 1
1327 continue
1328 found_not_comment_only_line = True
1329 try:
1330 ts = generate_tokens("".join(self.source[from_index:to_index]))
1331 except (SyntaxError, tokenize.TokenError):
1332 continue
1333 newline_count = 0
1334 newline_index = []
1335 for index, t in enumerate(ts):
1336 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1337 newline_index.append(index)
1338 newline_count += 1
1339 if newline_count > 2:
1340 tts = ts[newline_index[-3]:]
1341 else:
1342 tts = ts
1343 old = []
1344 for t in tts:
1345 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1346 newline_count -= 1
1347 if newline_count <= 1:
1348 break
1349 if tokenize.COMMENT == t[0] and old and old[0] != tokenize.NL:
1350 comment_index = old[3][1]
1351 break
1352 old = t
1353 break
1354 i = target.index(one_string_token)
1355 fix_target_line = line_index - 1 - comment_only_linenum
1356 self.source[line_index] = '{}{}'.format(
1357 target[:i], target[i + len(one_string_token):].lstrip())
1358 nl = find_newline(self.source[fix_target_line:line_index])
1359 before_line = self.source[fix_target_line]
1360 bl = before_line.index(nl)
1361 if comment_index:
1362 self.source[fix_target_line] = '{} {} {}'.format(
1363 before_line[:comment_index], one_string_token,
1364 before_line[comment_index + 1:])
1365 else:
1366 if before_line[:bl].endswith("#"):
1367 # special case
1368 # see: https://github.com/hhatto/autopep8/issues/503
1369 self.source[fix_target_line] = '{}{} {}'.format(
1370 before_line[:bl-2], one_string_token, before_line[bl-2:])
1371 else:
1372 self.source[fix_target_line] = '{} {}{}'.format(
1373 before_line[:bl], one_string_token, before_line[bl:])
1375 def fix_w504(self, result):
1376 (line_index, _, target) = get_index_offset_contents(result,
1377 self.source)
1378 # NOTE: is not collect pointed out in pycodestyle==2.4.0
1379 comment_index = 0
1380 operator_position = None # (start_position, end_position)
1381 for i in range(1, 6):
1382 to_index = line_index + i
1383 try:
1384 ts = generate_tokens("".join(self.source[line_index:to_index]))
1385 except (SyntaxError, tokenize.TokenError):
1386 continue
1387 newline_count = 0
1388 newline_index = []
1389 for index, t in enumerate(ts):
1390 if _is_binary_operator(t[0], t[1]):
1391 if t[2][0] == 1 and t[3][0] == 1:
1392 operator_position = (t[2][1], t[3][1])
1393 elif t[0] == tokenize.NAME and t[1] in ("and", "or"):
1394 if t[2][0] == 1 and t[3][0] == 1:
1395 operator_position = (t[2][1], t[3][1])
1396 elif t[0] in (tokenize.NEWLINE, tokenize.NL):
1397 newline_index.append(index)
1398 newline_count += 1
1399 if newline_count > 2:
1400 tts = ts[:newline_index[-3]]
1401 else:
1402 tts = ts
1403 old = []
1404 for t in tts:
1405 if tokenize.COMMENT == t[0] and old:
1406 comment_row, comment_index = old[3]
1407 break
1408 old = t
1409 break
1410 if not operator_position:
1411 return
1412 target_operator = target[operator_position[0]:operator_position[1]]
1414 if comment_index and comment_row == 1:
1415 self.source[line_index] = '{}{}'.format(
1416 target[:operator_position[0]].rstrip(),
1417 target[comment_index:])
1418 else:
1419 self.source[line_index] = '{}{}{}'.format(
1420 target[:operator_position[0]].rstrip(),
1421 target[operator_position[1]:].lstrip(),
1422 target[operator_position[1]:])
1424 next_line = self.source[line_index + 1]
1425 next_line_indent = 0
1426 m = re.match(r'\s*', next_line)
1427 if m:
1428 next_line_indent = m.span()[1]
1429 self.source[line_index + 1] = '{}{} {}'.format(
1430 next_line[:next_line_indent], target_operator,
1431 next_line[next_line_indent:])
1433 def fix_w605(self, result):
1434 (line_index, offset, target) = get_index_offset_contents(result,
1435 self.source)
1436 self.source[line_index] = '{}\\{}'.format(
1437 target[:offset + 1], target[offset + 1:])
1440def get_module_imports_on_top_of_file(source, import_line_index):
1441 """return import or from keyword position
1443 example:
1444 > 0: import sys
1445 1: import os
1446 2:
1447 3: def function():
1448 """
1449 def is_string_literal(line):
1450 if line[0] in 'uUbB':
1451 line = line[1:]
1452 if line and line[0] in 'rR':
1453 line = line[1:]
1454 return line and (line[0] == '"' or line[0] == "'")
1456 def is_future_import(line):
1457 nodes = ast.parse(line)
1458 for n in nodes.body:
1459 if isinstance(n, ast.ImportFrom) and n.module == '__future__':
1460 return True
1461 return False
1463 def has_future_import(source):
1464 offset = 0
1465 line = ''
1466 for _, next_line in source:
1467 for line_part in next_line.strip().splitlines(True):
1468 line = line + line_part
1469 try:
1470 return is_future_import(line), offset
1471 except SyntaxError:
1472 continue
1473 offset += 1
1474 return False, offset
1476 allowed_try_keywords = ('try', 'except', 'else', 'finally')
1477 in_docstring = False
1478 docstring_kind = '"""'
1479 source_stream = iter(enumerate(source))
1480 for cnt, line in source_stream:
1481 if not in_docstring:
1482 m = DOCSTRING_START_REGEX.match(line.lstrip())
1483 if m is not None:
1484 in_docstring = True
1485 docstring_kind = m.group('kind')
1486 remain = line[m.end(): m.endpos].rstrip()
1487 if remain[-3:] == docstring_kind: # one line doc
1488 in_docstring = False
1489 continue
1490 if in_docstring:
1491 if line.rstrip()[-3:] == docstring_kind:
1492 in_docstring = False
1493 continue
1495 if not line.rstrip():
1496 continue
1497 elif line.startswith('#'):
1498 continue
1500 if line.startswith('import '):
1501 if cnt == import_line_index:
1502 continue
1503 return cnt
1504 elif line.startswith('from '):
1505 if cnt == import_line_index:
1506 continue
1507 hit, offset = has_future_import(
1508 itertools.chain([(cnt, line)], source_stream)
1509 )
1510 if hit:
1511 # move to the back
1512 return cnt + offset + 1
1513 return cnt
1514 elif pycodestyle.DUNDER_REGEX.match(line):
1515 return cnt
1516 elif any(line.startswith(kw) for kw in allowed_try_keywords):
1517 continue
1518 elif is_string_literal(line):
1519 return cnt
1520 else:
1521 return cnt
1522 return 0
1525def get_index_offset_contents(result, source):
1526 """Return (line_index, column_offset, line_contents)."""
1527 line_index = result['line'] - 1
1528 return (line_index,
1529 result['column'] - 1,
1530 source[line_index])
1533def get_fixed_long_line(target, previous_line, original,
1534 indent_word=' ', max_line_length=79,
1535 aggressive=False, experimental=False, verbose=False):
1536 """Break up long line and return result.
1538 Do this by generating multiple reformatted candidates and then
1539 ranking the candidates to heuristically select the best option.
1541 """
1542 indent = _get_indentation(target)
1543 source = target[len(indent):]
1544 assert source.lstrip() == source
1545 assert not target.lstrip().startswith('#')
1547 # Check for partial multiline.
1548 tokens = list(generate_tokens(source))
1550 candidates = shorten_line(
1551 tokens, source, indent,
1552 indent_word,
1553 max_line_length,
1554 aggressive=aggressive,
1555 experimental=experimental,
1556 previous_line=previous_line)
1558 # Also sort alphabetically as a tie breaker (for determinism).
1559 candidates = sorted(
1560 sorted(set(candidates).union([target, original])),
1561 key=lambda x: line_shortening_rank(
1562 x,
1563 indent_word,
1564 max_line_length,
1565 experimental=experimental))
1567 if verbose >= 4:
1568 print(('-' * 79 + '\n').join([''] + candidates + ['']),
1569 file=wrap_output(sys.stderr, 'utf-8'))
1571 if candidates:
1572 best_candidate = candidates[0]
1574 # Don't allow things to get longer.
1575 if longest_line_length(best_candidate) > longest_line_length(original):
1576 return None
1578 return best_candidate
1581def longest_line_length(code):
1582 """Return length of longest line."""
1583 if len(code) == 0:
1584 return 0
1585 return max(len(line) for line in code.splitlines())
1588def join_logical_line(logical_line):
1589 """Return single line based on logical line input."""
1590 indentation = _get_indentation(logical_line)
1592 return indentation + untokenize_without_newlines(
1593 generate_tokens(logical_line.lstrip())) + '\n'
1596def untokenize_without_newlines(tokens):
1597 """Return source code based on tokens."""
1598 text = ''
1599 last_row = 0
1600 last_column = -1
1602 for t in tokens:
1603 token_string = t[1]
1604 (start_row, start_column) = t[2]
1605 (end_row, end_column) = t[3]
1607 if start_row > last_row:
1608 last_column = 0
1609 if (
1610 (start_column > last_column or token_string == '\n') and
1611 not text.endswith(' ')
1612 ):
1613 text += ' '
1615 if token_string != '\n':
1616 text += token_string
1618 last_row = end_row
1619 last_column = end_column
1621 return text.rstrip()
1624def _find_logical(source_lines):
1625 # Make a variable which is the index of all the starts of lines.
1626 logical_start = []
1627 logical_end = []
1628 last_newline = True
1629 parens = 0
1630 for t in generate_tokens(''.join(source_lines)):
1631 if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
1632 tokenize.INDENT, tokenize.NL,
1633 tokenize.ENDMARKER]:
1634 continue
1635 if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
1636 last_newline = True
1637 logical_end.append((t[3][0] - 1, t[2][1]))
1638 continue
1639 if last_newline and not parens:
1640 logical_start.append((t[2][0] - 1, t[2][1]))
1641 last_newline = False
1642 if t[0] == tokenize.OP:
1643 if t[1] in '([{':
1644 parens += 1
1645 elif t[1] in '}])':
1646 parens -= 1
1647 return (logical_start, logical_end)
1650def _get_logical(source_lines, result, logical_start, logical_end):
1651 """Return the logical line corresponding to the result.
1653 Assumes input is already E702-clean.
1655 """
1656 row = result['line'] - 1
1657 col = result['column'] - 1
1658 ls = None
1659 le = None
1660 for i in range(0, len(logical_start), 1):
1661 assert logical_end
1662 x = logical_end[i]
1663 if x[0] > row or (x[0] == row and x[1] > col):
1664 le = x
1665 ls = logical_start[i]
1666 break
1667 if ls is None:
1668 return None
1669 original = source_lines[ls[0]:le[0] + 1]
1670 return ls, le, original
1673def get_item(items, index, default=None):
1674 if 0 <= index < len(items):
1675 return items[index]
1677 return default
1680def reindent(source, indent_size, leave_tabs=False):
1681 """Reindent all lines."""
1682 reindenter = Reindenter(source, leave_tabs)
1683 return reindenter.run(indent_size)
1686def code_almost_equal(a, b):
1687 """Return True if code is similar.
1689 Ignore whitespace when comparing specific line.
1691 """
1692 split_a = split_and_strip_non_empty_lines(a)
1693 split_b = split_and_strip_non_empty_lines(b)
1695 if len(split_a) != len(split_b):
1696 return False
1698 for (index, _) in enumerate(split_a):
1699 if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
1700 return False
1702 return True
1705def split_and_strip_non_empty_lines(text):
1706 """Return lines split by newline.
1708 Ignore empty lines.
1710 """
1711 return [line.strip() for line in text.splitlines() if line.strip()]
1714def refactor(source, fixer_names, ignore=None, filename=''):
1715 """Return refactored code using lib2to3.
1717 Skip if ignore string is produced in the refactored code.
1719 """
1720 not_found_end_of_file_newline = source and source.rstrip("\r\n") == source
1721 if not_found_end_of_file_newline:
1722 input_source = source + "\n"
1723 else:
1724 input_source = source
1726 from lib2to3 import pgen2
1727 try:
1728 new_text = refactor_with_2to3(input_source,
1729 fixer_names=fixer_names,
1730 filename=filename)
1731 except (pgen2.parse.ParseError,
1732 SyntaxError,
1733 UnicodeDecodeError,
1734 UnicodeEncodeError):
1735 return source
1737 if ignore:
1738 if ignore in new_text and ignore not in source:
1739 return source
1741 if not_found_end_of_file_newline:
1742 return new_text.rstrip("\r\n")
1744 return new_text
1747def code_to_2to3(select, ignore, where='', verbose=False):
1748 fixes = set()
1749 for code, fix in CODE_TO_2TO3.items():
1750 if code_match(code, select=select, ignore=ignore):
1751 if verbose:
1752 print('---> Applying {} fix for {}'.format(where,
1753 code.upper()),
1754 file=sys.stderr)
1755 fixes |= set(fix)
1756 return fixes
1759def fix_2to3(source,
1760 aggressive=True, select=None, ignore=None, filename='',
1761 where='global', verbose=False):
1762 """Fix various deprecated code (via lib2to3)."""
1763 if not aggressive:
1764 return source
1766 select = select or []
1767 ignore = ignore or []
1769 return refactor(source,
1770 code_to_2to3(select=select,
1771 ignore=ignore,
1772 where=where,
1773 verbose=verbose),
1774 filename=filename)
1777def find_newline(source):
1778 """Return type of newline used in source.
1780 Input is a list of lines.
1782 """
1783 assert not isinstance(source, str)
1785 counter = collections.defaultdict(int)
1786 for line in source:
1787 if line.endswith(CRLF):
1788 counter[CRLF] += 1
1789 elif line.endswith(CR):
1790 counter[CR] += 1
1791 elif line.endswith(LF):
1792 counter[LF] += 1
1794 return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
1797def _get_indentword(source):
1798 """Return indentation type."""
1799 indent_word = ' ' # Default in case source has no indentation
1800 try:
1801 for t in generate_tokens(source):
1802 if t[0] == token.INDENT:
1803 indent_word = t[1]
1804 break
1805 except (SyntaxError, tokenize.TokenError):
1806 pass
1807 return indent_word
1810def _get_indentation(line):
1811 """Return leading whitespace."""
1812 if line.strip():
1813 non_whitespace_index = len(line) - len(line.lstrip())
1814 return line[:non_whitespace_index]
1816 return ''
1819def get_diff_text(old, new, filename):
1820 """Return text of unified diff between old and new."""
1821 newline = '\n'
1822 diff = difflib.unified_diff(
1823 old, new,
1824 'original/' + filename,
1825 'fixed/' + filename,
1826 lineterm=newline)
1828 text = ''
1829 for line in diff:
1830 text += line
1832 # Work around missing newline (http://bugs.python.org/issue2142).
1833 if text and not line.endswith(newline):
1834 text += newline + r'\ No newline at end of file' + newline
1836 return text
1839def _priority_key(pep8_result):
1840 """Key for sorting PEP8 results.
1842 Global fixes should be done first. This is important for things like
1843 indentation.
1845 """
1846 priority = [
1847 # Fix multiline colon-based before semicolon based.
1848 'e701',
1849 # Break multiline statements early.
1850 'e702',
1851 # Things that make lines longer.
1852 'e225', 'e231',
1853 # Remove extraneous whitespace before breaking lines.
1854 'e201',
1855 # Shorten whitespace in comment before resorting to wrapping.
1856 'e262'
1857 ]
1858 middle_index = 10000
1859 lowest_priority = [
1860 # We need to shorten lines last since the logical fixer can get in a
1861 # loop, which causes us to exit early.
1862 'e501',
1863 ]
1864 key = pep8_result['id'].lower()
1865 try:
1866 return priority.index(key)
1867 except ValueError:
1868 try:
1869 return middle_index + lowest_priority.index(key) + 1
1870 except ValueError:
1871 return middle_index
1874def shorten_line(tokens, source, indentation, indent_word, max_line_length,
1875 aggressive=False, experimental=False, previous_line=''):
1876 """Separate line at OPERATOR.
1878 Multiple candidates will be yielded.
1880 """
1881 for candidate in _shorten_line(tokens=tokens,
1882 source=source,
1883 indentation=indentation,
1884 indent_word=indent_word,
1885 aggressive=aggressive,
1886 previous_line=previous_line):
1887 yield candidate
1889 if aggressive:
1890 for key_token_strings in SHORTEN_OPERATOR_GROUPS:
1891 shortened = _shorten_line_at_tokens(
1892 tokens=tokens,
1893 source=source,
1894 indentation=indentation,
1895 indent_word=indent_word,
1896 key_token_strings=key_token_strings,
1897 aggressive=aggressive)
1899 if shortened is not None and shortened != source:
1900 yield shortened
1902 if experimental:
1903 for shortened in _shorten_line_at_tokens_new(
1904 tokens=tokens,
1905 source=source,
1906 indentation=indentation,
1907 max_line_length=max_line_length):
1909 yield shortened
1912def _shorten_line(tokens, source, indentation, indent_word,
1913 aggressive=False, previous_line=''):
1914 """Separate line at OPERATOR.
1916 The input is expected to be free of newlines except for inside multiline
1917 strings and at the end.
1919 Multiple candidates will be yielded.
1921 """
1922 for (token_type,
1923 token_string,
1924 start_offset,
1925 end_offset) in token_offsets(tokens):
1927 if (
1928 token_type == tokenize.COMMENT and
1929 not is_probably_part_of_multiline(previous_line) and
1930 not is_probably_part_of_multiline(source) and
1931 not source[start_offset + 1:].strip().lower().startswith(
1932 ('noqa', 'pragma:', 'pylint:'))
1933 ):
1934 # Move inline comments to previous line.
1935 first = source[:start_offset]
1936 second = source[start_offset:]
1937 yield (indentation + second.strip() + '\n' +
1938 indentation + first.strip() + '\n')
1939 elif token_type == token.OP and token_string != '=':
1940 # Don't break on '=' after keyword as this violates PEP 8.
1942 assert token_type != token.INDENT
1944 first = source[:end_offset]
1946 second_indent = indentation
1947 if (first.rstrip().endswith('(') and
1948 source[end_offset:].lstrip().startswith(')')):
1949 pass
1950 elif first.rstrip().endswith('('):
1951 second_indent += indent_word
1952 elif '(' in first:
1953 second_indent += ' ' * (1 + first.find('('))
1954 else:
1955 second_indent += indent_word
1957 second = (second_indent + source[end_offset:].lstrip())
1958 if (
1959 not second.strip() or
1960 second.lstrip().startswith('#')
1961 ):
1962 continue
1964 # Do not begin a line with a comma
1965 if second.lstrip().startswith(','):
1966 continue
1967 # Do end a line with a dot
1968 if first.rstrip().endswith('.'):
1969 continue
1970 if token_string in '+-*/':
1971 fixed = first + ' \\' + '\n' + second
1972 else:
1973 fixed = first + '\n' + second
1975 # Only fix if syntax is okay.
1976 if check_syntax(normalize_multiline(fixed)
1977 if aggressive else fixed):
1978 yield indentation + fixed
1981def _is_binary_operator(token_type, text):
1982 return ((token_type == tokenize.OP or text in ['and', 'or']) and
1983 text not in '()[]{},:.;@=%~')
1986# A convenient way to handle tokens.
1987Token = collections.namedtuple('Token', ['token_type', 'token_string',
1988 'spos', 'epos', 'line'])
1991class ReformattedLines(object):
1993 """The reflowed lines of atoms.
1995 Each part of the line is represented as an "atom." They can be moved
1996 around when need be to get the optimal formatting.
1998 """
2000 ###########################################################################
2001 # Private Classes
2003 class _Indent(object):
2005 """Represent an indentation in the atom stream."""
2007 def __init__(self, indent_amt):
2008 self._indent_amt = indent_amt
2010 def emit(self):
2011 return ' ' * self._indent_amt
2013 @property
2014 def size(self):
2015 return self._indent_amt
2017 class _Space(object):
2019 """Represent a space in the atom stream."""
2021 def emit(self):
2022 return ' '
2024 @property
2025 def size(self):
2026 return 1
2028 class _LineBreak(object):
2030 """Represent a line break in the atom stream."""
2032 def emit(self):
2033 return '\n'
2035 @property
2036 def size(self):
2037 return 0
2039 def __init__(self, max_line_length):
2040 self._max_line_length = max_line_length
2041 self._lines = []
2042 self._bracket_depth = 0
2043 self._prev_item = None
2044 self._prev_prev_item = None
2046 def __repr__(self):
2047 return self.emit()
2049 ###########################################################################
2050 # Public Methods
2052 def add(self, obj, indent_amt, break_after_open_bracket):
2053 if isinstance(obj, Atom):
2054 self._add_item(obj, indent_amt)
2055 return
2057 self._add_container(obj, indent_amt, break_after_open_bracket)
2059 def add_comment(self, item):
2060 num_spaces = 2
2061 if len(self._lines) > 1:
2062 if isinstance(self._lines[-1], self._Space):
2063 num_spaces -= 1
2064 if len(self._lines) > 2:
2065 if isinstance(self._lines[-2], self._Space):
2066 num_spaces -= 1
2068 while num_spaces > 0:
2069 self._lines.append(self._Space())
2070 num_spaces -= 1
2071 self._lines.append(item)
2073 def add_indent(self, indent_amt):
2074 self._lines.append(self._Indent(indent_amt))
2076 def add_line_break(self, indent):
2077 self._lines.append(self._LineBreak())
2078 self.add_indent(len(indent))
2080 def add_line_break_at(self, index, indent_amt):
2081 self._lines.insert(index, self._LineBreak())
2082 self._lines.insert(index + 1, self._Indent(indent_amt))
2084 def add_space_if_needed(self, curr_text, equal=False):
2085 if (
2086 not self._lines or isinstance(
2087 self._lines[-1], (self._LineBreak, self._Indent, self._Space))
2088 ):
2089 return
2091 prev_text = str(self._prev_item)
2092 prev_prev_text = (
2093 str(self._prev_prev_item) if self._prev_prev_item else '')
2095 if (
2096 # The previous item was a keyword or identifier and the current
2097 # item isn't an operator that doesn't require a space.
2098 ((self._prev_item.is_keyword or self._prev_item.is_string or
2099 self._prev_item.is_name or self._prev_item.is_number) and
2100 (curr_text[0] not in '([{.,:}])' or
2101 (curr_text[0] == '=' and equal))) or
2103 # Don't place spaces around a '.', unless it's in an 'import'
2104 # statement.
2105 ((prev_prev_text != 'from' and prev_text[-1] != '.' and
2106 curr_text != 'import') and
2108 # Don't place a space before a colon.
2109 curr_text[0] != ':' and
2111 # Don't split up ending brackets by spaces.
2112 ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
2114 # Put a space after a colon or comma.
2115 prev_text[-1] in ':,' or
2117 # Put space around '=' if asked to.
2118 (equal and prev_text == '=') or
2120 # Put spaces around non-unary arithmetic operators.
2121 ((self._prev_prev_item and
2122 (prev_text not in '+-' and
2123 (self._prev_prev_item.is_name or
2124 self._prev_prev_item.is_number or
2125 self._prev_prev_item.is_string)) and
2126 prev_text in ('+', '-', '%', '*', '/', '//', '**', 'in')))))
2127 ):
2128 self._lines.append(self._Space())
2130 def previous_item(self):
2131 """Return the previous non-whitespace item."""
2132 return self._prev_item
2134 def fits_on_current_line(self, item_extent):
2135 return self.current_size() + item_extent <= self._max_line_length
2137 def current_size(self):
2138 """The size of the current line minus the indentation."""
2139 size = 0
2140 for item in reversed(self._lines):
2141 size += item.size
2142 if isinstance(item, self._LineBreak):
2143 break
2145 return size
2147 def line_empty(self):
2148 return (self._lines and
2149 isinstance(self._lines[-1],
2150 (self._LineBreak, self._Indent)))
2152 def emit(self):
2153 string = ''
2154 for item in self._lines:
2155 if isinstance(item, self._LineBreak):
2156 string = string.rstrip()
2157 string += item.emit()
2159 return string.rstrip() + '\n'
2161 ###########################################################################
2162 # Private Methods
2164 def _add_item(self, item, indent_amt):
2165 """Add an item to the line.
2167 Reflow the line to get the best formatting after the item is
2168 inserted. The bracket depth indicates if the item is being
2169 inserted inside of a container or not.
2171 """
2172 if self._prev_item and self._prev_item.is_string and item.is_string:
2173 # Place consecutive string literals on separate lines.
2174 self._lines.append(self._LineBreak())
2175 self._lines.append(self._Indent(indent_amt))
2177 item_text = str(item)
2178 if self._lines and self._bracket_depth:
2179 # Adding the item into a container.
2180 self._prevent_default_initializer_splitting(item, indent_amt)
2182 if item_text in '.,)]}':
2183 self._split_after_delimiter(item, indent_amt)
2185 elif self._lines and not self.line_empty():
2186 # Adding the item outside of a container.
2187 if self.fits_on_current_line(len(item_text)):
2188 self._enforce_space(item)
2190 else:
2191 # Line break for the new item.
2192 self._lines.append(self._LineBreak())
2193 self._lines.append(self._Indent(indent_amt))
2195 self._lines.append(item)
2196 self._prev_item, self._prev_prev_item = item, self._prev_item
2198 if item_text in '([{':
2199 self._bracket_depth += 1
2201 elif item_text in '}])':
2202 self._bracket_depth -= 1
2203 assert self._bracket_depth >= 0
2205 def _add_container(self, container, indent_amt, break_after_open_bracket):
2206 actual_indent = indent_amt + 1
2208 if (
2209 str(self._prev_item) != '=' and
2210 not self.line_empty() and
2211 not self.fits_on_current_line(
2212 container.size + self._bracket_depth + 2)
2213 ):
2215 if str(container)[0] == '(' and self._prev_item.is_name:
2216 # Don't split before the opening bracket of a call.
2217 break_after_open_bracket = True
2218 actual_indent = indent_amt + 4
2219 elif (
2220 break_after_open_bracket or
2221 str(self._prev_item) not in '([{'
2222 ):
2223 # If the container doesn't fit on the current line and the
2224 # current line isn't empty, place the container on the next
2225 # line.
2226 self._lines.append(self._LineBreak())
2227 self._lines.append(self._Indent(indent_amt))
2228 break_after_open_bracket = False
2229 else:
2230 actual_indent = self.current_size() + 1
2231 break_after_open_bracket = False
2233 if isinstance(container, (ListComprehension, IfExpression)):
2234 actual_indent = indent_amt
2236 # Increase the continued indentation only if recursing on a
2237 # container.
2238 container.reflow(self, ' ' * actual_indent,
2239 break_after_open_bracket=break_after_open_bracket)
2241 def _prevent_default_initializer_splitting(self, item, indent_amt):
2242 """Prevent splitting between a default initializer.
2244 When there is a default initializer, it's best to keep it all on
2245 the same line. It's nicer and more readable, even if it goes
2246 over the maximum allowable line length. This goes back along the
2247 current line to determine if we have a default initializer, and,
2248 if so, to remove extraneous whitespaces and add a line
2249 break/indent before it if needed.
2251 """
2252 if str(item) == '=':
2253 # This is the assignment in the initializer. Just remove spaces for
2254 # now.
2255 self._delete_whitespace()
2256 return
2258 if (not self._prev_item or not self._prev_prev_item or
2259 str(self._prev_item) != '='):
2260 return
2262 self._delete_whitespace()
2263 prev_prev_index = self._lines.index(self._prev_prev_item)
2265 if (
2266 isinstance(self._lines[prev_prev_index - 1], self._Indent) or
2267 self.fits_on_current_line(item.size + 1)
2268 ):
2269 # The default initializer is already the only item on this line.
2270 # Don't insert a newline here.
2271 return
2273 # Replace the space with a newline/indent combo.
2274 if isinstance(self._lines[prev_prev_index - 1], self._Space):
2275 del self._lines[prev_prev_index - 1]
2277 self.add_line_break_at(self._lines.index(self._prev_prev_item),
2278 indent_amt)
2280 def _split_after_delimiter(self, item, indent_amt):
2281 """Split the line only after a delimiter."""
2282 self._delete_whitespace()
2284 if self.fits_on_current_line(item.size):
2285 return
2287 last_space = None
2288 for current_item in reversed(self._lines):
2289 if (
2290 last_space and
2291 (not isinstance(current_item, Atom) or
2292 not current_item.is_colon)
2293 ):
2294 break
2295 else:
2296 last_space = None
2297 if isinstance(current_item, self._Space):
2298 last_space = current_item
2299 if isinstance(current_item, (self._LineBreak, self._Indent)):
2300 return
2302 if not last_space:
2303 return
2305 self.add_line_break_at(self._lines.index(last_space), indent_amt)
2307 def _enforce_space(self, item):
2308 """Enforce a space in certain situations.
2310 There are cases where we will want a space where normally we
2311 wouldn't put one. This just enforces the addition of a space.
2313 """
2314 if isinstance(self._lines[-1],
2315 (self._Space, self._LineBreak, self._Indent)):
2316 return
2318 if not self._prev_item:
2319 return
2321 item_text = str(item)
2322 prev_text = str(self._prev_item)
2324 # Prefer a space around a '.' in an import statement, and between the
2325 # 'import' and '('.
2326 if (
2327 (item_text == '.' and prev_text == 'from') or
2328 (item_text == 'import' and prev_text == '.') or
2329 (item_text == '(' and prev_text == 'import')
2330 ):
2331 self._lines.append(self._Space())
2333 def _delete_whitespace(self):
2334 """Delete all whitespace from the end of the line."""
2335 while isinstance(self._lines[-1], (self._Space, self._LineBreak,
2336 self._Indent)):
2337 del self._lines[-1]
2340class Atom(object):
2342 """The smallest unbreakable unit that can be reflowed."""
2344 def __init__(self, atom):
2345 self._atom = atom
2347 def __repr__(self):
2348 return self._atom.token_string
2350 def __len__(self):
2351 return self.size
2353 def reflow(
2354 self, reflowed_lines, continued_indent, extent,
2355 break_after_open_bracket=False,
2356 is_list_comp_or_if_expr=False,
2357 next_is_dot=False
2358 ):
2359 if self._atom.token_type == tokenize.COMMENT:
2360 reflowed_lines.add_comment(self)
2361 return
2363 total_size = extent if extent else self.size
2365 if self._atom.token_string not in ',:([{}])':
2366 # Some atoms will need an extra 1-sized space token after them.
2367 total_size += 1
2369 prev_item = reflowed_lines.previous_item()
2370 if (
2371 not is_list_comp_or_if_expr and
2372 not reflowed_lines.fits_on_current_line(total_size) and
2373 not (next_is_dot and
2374 reflowed_lines.fits_on_current_line(self.size + 1)) and
2375 not reflowed_lines.line_empty() and
2376 not self.is_colon and
2377 not (prev_item and prev_item.is_name and
2378 str(self) == '(')
2379 ):
2380 # Start a new line if there is already something on the line and
2381 # adding this atom would make it go over the max line length.
2382 reflowed_lines.add_line_break(continued_indent)
2383 else:
2384 reflowed_lines.add_space_if_needed(str(self))
2386 reflowed_lines.add(self, len(continued_indent),
2387 break_after_open_bracket)
2389 def emit(self):
2390 return self.__repr__()
2392 @property
2393 def is_keyword(self):
2394 return keyword.iskeyword(self._atom.token_string)
2396 @property
2397 def is_string(self):
2398 return self._atom.token_type == tokenize.STRING
2400 @property
2401 def is_name(self):
2402 return self._atom.token_type == tokenize.NAME
2404 @property
2405 def is_number(self):
2406 return self._atom.token_type == tokenize.NUMBER
2408 @property
2409 def is_comma(self):
2410 return self._atom.token_string == ','
2412 @property
2413 def is_colon(self):
2414 return self._atom.token_string == ':'
2416 @property
2417 def size(self):
2418 return len(self._atom.token_string)
2421class Container(object):
2423 """Base class for all container types."""
2425 def __init__(self, items):
2426 self._items = items
2428 def __repr__(self):
2429 string = ''
2430 last_was_keyword = False
2432 for item in self._items:
2433 if item.is_comma:
2434 string += ', '
2435 elif item.is_colon:
2436 string += ': '
2437 else:
2438 item_string = str(item)
2439 if (
2440 string and
2441 (last_was_keyword or
2442 (not string.endswith(tuple('([{,.:}]) ')) and
2443 not item_string.startswith(tuple('([{,.:}])'))))
2444 ):
2445 string += ' '
2446 string += item_string
2448 last_was_keyword = item.is_keyword
2449 return string
2451 def __iter__(self):
2452 for element in self._items:
2453 yield element
2455 def __getitem__(self, idx):
2456 return self._items[idx]
2458 def reflow(self, reflowed_lines, continued_indent,
2459 break_after_open_bracket=False):
2460 last_was_container = False
2461 for (index, item) in enumerate(self._items):
2462 next_item = get_item(self._items, index + 1)
2464 if isinstance(item, Atom):
2465 is_list_comp_or_if_expr = (
2466 isinstance(self, (ListComprehension, IfExpression)))
2467 item.reflow(reflowed_lines, continued_indent,
2468 self._get_extent(index),
2469 is_list_comp_or_if_expr=is_list_comp_or_if_expr,
2470 next_is_dot=(next_item and
2471 str(next_item) == '.'))
2472 if last_was_container and item.is_comma:
2473 reflowed_lines.add_line_break(continued_indent)
2474 last_was_container = False
2475 else: # isinstance(item, Container)
2476 reflowed_lines.add(item, len(continued_indent),
2477 break_after_open_bracket)
2478 last_was_container = not isinstance(item, (ListComprehension,
2479 IfExpression))
2481 if (
2482 break_after_open_bracket and index == 0 and
2483 # Prefer to keep empty containers together instead of
2484 # separating them.
2485 str(item) == self.open_bracket and
2486 (not next_item or str(next_item) != self.close_bracket) and
2487 (len(self._items) != 3 or not isinstance(next_item, Atom))
2488 ):
2489 reflowed_lines.add_line_break(continued_indent)
2490 break_after_open_bracket = False
2491 else:
2492 next_next_item = get_item(self._items, index + 2)
2493 if (
2494 str(item) not in ['.', '%', 'in'] and
2495 next_item and not isinstance(next_item, Container) and
2496 str(next_item) != ':' and
2497 next_next_item and (not isinstance(next_next_item, Atom) or
2498 str(next_item) == 'not') and
2499 not reflowed_lines.line_empty() and
2500 not reflowed_lines.fits_on_current_line(
2501 self._get_extent(index + 1) + 2)
2502 ):
2503 reflowed_lines.add_line_break(continued_indent)
2505 def _get_extent(self, index):
2506 """The extent of the full element.
2508 E.g., the length of a function call or keyword.
2510 """
2511 extent = 0
2512 prev_item = get_item(self._items, index - 1)
2513 seen_dot = prev_item and str(prev_item) == '.'
2514 while index < len(self._items):
2515 item = get_item(self._items, index)
2516 index += 1
2518 if isinstance(item, (ListComprehension, IfExpression)):
2519 break
2521 if isinstance(item, Container):
2522 if prev_item and prev_item.is_name:
2523 if seen_dot:
2524 extent += 1
2525 else:
2526 extent += item.size
2528 prev_item = item
2529 continue
2530 elif (str(item) not in ['.', '=', ':', 'not'] and
2531 not item.is_name and not item.is_string):
2532 break
2534 if str(item) == '.':
2535 seen_dot = True
2537 extent += item.size
2538 prev_item = item
2540 return extent
2542 @property
2543 def is_string(self):
2544 return False
2546 @property
2547 def size(self):
2548 return len(self.__repr__())
2550 @property
2551 def is_keyword(self):
2552 return False
2554 @property
2555 def is_name(self):
2556 return False
2558 @property
2559 def is_comma(self):
2560 return False
2562 @property
2563 def is_colon(self):
2564 return False
2566 @property
2567 def open_bracket(self):
2568 return None
2570 @property
2571 def close_bracket(self):
2572 return None
2575class Tuple(Container):
2577 """A high-level representation of a tuple."""
2579 @property
2580 def open_bracket(self):
2581 return '('
2583 @property
2584 def close_bracket(self):
2585 return ')'
2588class List(Container):
2590 """A high-level representation of a list."""
2592 @property
2593 def open_bracket(self):
2594 return '['
2596 @property
2597 def close_bracket(self):
2598 return ']'
2601class DictOrSet(Container):
2603 """A high-level representation of a dictionary or set."""
2605 @property
2606 def open_bracket(self):
2607 return '{'
2609 @property
2610 def close_bracket(self):
2611 return '}'
2614class ListComprehension(Container):
2616 """A high-level representation of a list comprehension."""
2618 @property
2619 def size(self):
2620 length = 0
2621 for item in self._items:
2622 if isinstance(item, IfExpression):
2623 break
2624 length += item.size
2625 return length
2628class IfExpression(Container):
2630 """A high-level representation of an if-expression."""
2633def _parse_container(tokens, index, for_or_if=None):
2634 """Parse a high-level container, such as a list, tuple, etc."""
2636 # Store the opening bracket.
2637 items = [Atom(Token(*tokens[index]))]
2638 index += 1
2640 num_tokens = len(tokens)
2641 while index < num_tokens:
2642 tok = Token(*tokens[index])
2644 if tok.token_string in ',)]}':
2645 # First check if we're at the end of a list comprehension or
2646 # if-expression. Don't add the ending token as part of the list
2647 # comprehension or if-expression, because they aren't part of those
2648 # constructs.
2649 if for_or_if == 'for':
2650 return (ListComprehension(items), index - 1)
2652 elif for_or_if == 'if':
2653 return (IfExpression(items), index - 1)
2655 # We've reached the end of a container.
2656 items.append(Atom(tok))
2658 # If not, then we are at the end of a container.
2659 if tok.token_string == ')':
2660 # The end of a tuple.
2661 return (Tuple(items), index)
2663 elif tok.token_string == ']':
2664 # The end of a list.
2665 return (List(items), index)
2667 elif tok.token_string == '}':
2668 # The end of a dictionary or set.
2669 return (DictOrSet(items), index)
2671 elif tok.token_string in '([{':
2672 # A sub-container is being defined.
2673 (container, index) = _parse_container(tokens, index)
2674 items.append(container)
2676 elif tok.token_string == 'for':
2677 (container, index) = _parse_container(tokens, index, 'for')
2678 items.append(container)
2680 elif tok.token_string == 'if':
2681 (container, index) = _parse_container(tokens, index, 'if')
2682 items.append(container)
2684 else:
2685 items.append(Atom(tok))
2687 index += 1
2689 return (None, None)
2692def _parse_tokens(tokens):
2693 """Parse the tokens.
2695 This converts the tokens into a form where we can manipulate them
2696 more easily.
2698 """
2700 index = 0
2701 parsed_tokens = []
2703 num_tokens = len(tokens)
2704 while index < num_tokens:
2705 tok = Token(*tokens[index])
2707 assert tok.token_type != token.INDENT
2708 if tok.token_type == tokenize.NEWLINE:
2709 # There's only one newline and it's at the end.
2710 break
2712 if tok.token_string in '([{':
2713 (container, index) = _parse_container(tokens, index)
2714 if not container:
2715 return None
2716 parsed_tokens.append(container)
2717 else:
2718 parsed_tokens.append(Atom(tok))
2720 index += 1
2722 return parsed_tokens
2725def _reflow_lines(parsed_tokens, indentation, max_line_length,
2726 start_on_prefix_line):
2727 """Reflow the lines so that it looks nice."""
2729 if str(parsed_tokens[0]) == 'def':
2730 # A function definition gets indented a bit more.
2731 continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
2732 else:
2733 continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
2735 break_after_open_bracket = not start_on_prefix_line
2737 lines = ReformattedLines(max_line_length)
2738 lines.add_indent(len(indentation.lstrip('\r\n')))
2740 if not start_on_prefix_line:
2741 # If splitting after the opening bracket will cause the first element
2742 # to be aligned weirdly, don't try it.
2743 first_token = get_item(parsed_tokens, 0)
2744 second_token = get_item(parsed_tokens, 1)
2746 if (
2747 first_token and second_token and
2748 str(second_token)[0] == '(' and
2749 len(indentation) + len(first_token) + 1 == len(continued_indent)
2750 ):
2751 return None
2753 for item in parsed_tokens:
2754 lines.add_space_if_needed(str(item), equal=True)
2756 save_continued_indent = continued_indent
2757 if start_on_prefix_line and isinstance(item, Container):
2758 start_on_prefix_line = False
2759 continued_indent = ' ' * (lines.current_size() + 1)
2761 item.reflow(lines, continued_indent, break_after_open_bracket)
2762 continued_indent = save_continued_indent
2764 return lines.emit()
2767def _shorten_line_at_tokens_new(tokens, source, indentation,
2768 max_line_length):
2769 """Shorten the line taking its length into account.
2771 The input is expected to be free of newlines except for inside
2772 multiline strings and at the end.
2774 """
2775 # Yield the original source so to see if it's a better choice than the
2776 # shortened candidate lines we generate here.
2777 yield indentation + source
2779 parsed_tokens = _parse_tokens(tokens)
2781 if parsed_tokens:
2782 # Perform two reflows. The first one starts on the same line as the
2783 # prefix. The second starts on the line after the prefix.
2784 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2785 start_on_prefix_line=True)
2786 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2787 yield fixed
2789 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2790 start_on_prefix_line=False)
2791 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2792 yield fixed
2795def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
2796 key_token_strings, aggressive):
2797 """Separate line by breaking at tokens in key_token_strings.
2799 The input is expected to be free of newlines except for inside
2800 multiline strings and at the end.
2802 """
2803 offsets = []
2804 for (index, _t) in enumerate(token_offsets(tokens)):
2805 (token_type,
2806 token_string,
2807 start_offset,
2808 end_offset) = _t
2810 assert token_type != token.INDENT
2812 if token_string in key_token_strings:
2813 # Do not break in containers with zero or one items.
2814 unwanted_next_token = {
2815 '(': ')',
2816 '[': ']',
2817 '{': '}'}.get(token_string)
2818 if unwanted_next_token:
2819 if (
2820 get_item(tokens,
2821 index + 1,
2822 default=[None, None])[1] == unwanted_next_token or
2823 get_item(tokens,
2824 index + 2,
2825 default=[None, None])[1] == unwanted_next_token
2826 ):
2827 continue
2829 if (
2830 index > 2 and token_string == '(' and
2831 tokens[index - 1][1] in ',(%['
2832 ):
2833 # Don't split after a tuple start, or before a tuple start if
2834 # the tuple is in a list.
2835 continue
2837 if end_offset < len(source) - 1:
2838 # Don't split right before newline.
2839 offsets.append(end_offset)
2840 else:
2841 # Break at adjacent strings. These were probably meant to be on
2842 # separate lines in the first place.
2843 previous_token = get_item(tokens, index - 1)
2844 if (
2845 token_type == tokenize.STRING and
2846 previous_token and previous_token[0] == tokenize.STRING
2847 ):
2848 offsets.append(start_offset)
2850 current_indent = None
2851 fixed = None
2852 for line in split_at_offsets(source, offsets):
2853 if fixed:
2854 fixed += '\n' + current_indent + line
2856 for symbol in '([{':
2857 if line.endswith(symbol):
2858 current_indent += indent_word
2859 else:
2860 # First line.
2861 fixed = line
2862 assert not current_indent
2863 current_indent = indent_word
2865 assert fixed is not None
2867 if check_syntax(normalize_multiline(fixed)
2868 if aggressive > 1 else fixed):
2869 return indentation + fixed
2871 return None
2874def token_offsets(tokens):
2875 """Yield tokens and offsets."""
2876 end_offset = 0
2877 previous_end_row = 0
2878 previous_end_column = 0
2879 for t in tokens:
2880 token_type = t[0]
2881 token_string = t[1]
2882 (start_row, start_column) = t[2]
2883 (end_row, end_column) = t[3]
2885 # Account for the whitespace between tokens.
2886 end_offset += start_column
2887 if previous_end_row == start_row:
2888 end_offset -= previous_end_column
2890 # Record the start offset of the token.
2891 start_offset = end_offset
2893 # Account for the length of the token itself.
2894 end_offset += len(token_string)
2896 yield (token_type,
2897 token_string,
2898 start_offset,
2899 end_offset)
2901 previous_end_row = end_row
2902 previous_end_column = end_column
2905def normalize_multiline(line):
2906 """Normalize multiline-related code that will cause syntax error.
2908 This is for purposes of checking syntax.
2910 """
2911 if line.startswith('def ') and line.rstrip().endswith(':'):
2912 return line + ' pass'
2913 elif line.startswith('return '):
2914 return 'def _(): ' + line
2915 elif line.startswith('@'):
2916 return line + 'def _(): pass'
2917 elif line.startswith('class '):
2918 return line + ' pass'
2919 elif line.startswith(('if ', 'elif ', 'for ', 'while ')):
2920 return line + ' pass'
2922 return line
2925def fix_whitespace(line, offset, replacement):
2926 """Replace whitespace at offset and return fixed line."""
2927 # Replace escaped newlines too
2928 left = line[:offset].rstrip('\n\r \t\\')
2929 right = line[offset:].lstrip('\n\r \t\\')
2930 if right.startswith('#'):
2931 return line
2933 return left + replacement + right
2936def _execute_pep8(pep8_options, source):
2937 """Execute pycodestyle via python method calls."""
2938 class QuietReport(pycodestyle.BaseReport):
2940 """Version of checker that does not print."""
2942 def __init__(self, options):
2943 super(QuietReport, self).__init__(options)
2944 self.__full_error_results = []
2946 def error(self, line_number, offset, text, check):
2947 """Collect errors."""
2948 code = super(QuietReport, self).error(line_number,
2949 offset,
2950 text,
2951 check)
2952 if code:
2953 self.__full_error_results.append(
2954 {'id': code,
2955 'line': line_number,
2956 'column': offset + 1,
2957 'info': text})
2959 def full_error_results(self):
2960 """Return error results in detail.
2962 Results are in the form of a list of dictionaries. Each
2963 dictionary contains 'id', 'line', 'column', and 'info'.
2965 """
2966 return self.__full_error_results
2968 checker = pycodestyle.Checker('', lines=source, reporter=QuietReport,
2969 **pep8_options)
2970 checker.check_all()
2971 return checker.report.full_error_results()
2974def _remove_leading_and_normalize(line, with_rstrip=True):
2975 # ignore FF in first lstrip()
2976 if with_rstrip:
2977 return line.lstrip(' \t\v').rstrip(CR + LF) + '\n'
2978 return line.lstrip(' \t\v')
2981class Reindenter(object):
2983 """Reindents badly-indented code to uniformly use four-space indentation.
2985 Released to the public domain, by Tim Peters, 03 October 2000.
2987 """
2989 def __init__(self, input_text, leave_tabs=False):
2990 sio = io.StringIO(input_text)
2991 source_lines = sio.readlines()
2993 self.string_content_line_numbers = multiline_string_lines(input_text)
2995 # File lines, rstripped & tab-expanded. Dummy at start is so
2996 # that we can use tokenize's 1-based line numbering easily.
2997 # Note that a line is all-blank iff it is a newline.
2998 self.lines = []
2999 for line_number, line in enumerate(source_lines, start=1):
3000 # Do not modify if inside a multiline string.
3001 if line_number in self.string_content_line_numbers:
3002 self.lines.append(line)
3003 else:
3004 # Only expand leading tabs.
3005 with_rstrip = line_number != len(source_lines)
3006 if leave_tabs:
3007 self.lines.append(
3008 _get_indentation(line) +
3009 _remove_leading_and_normalize(line, with_rstrip)
3010 )
3011 else:
3012 self.lines.append(
3013 _get_indentation(line).expandtabs() +
3014 _remove_leading_and_normalize(line, with_rstrip)
3015 )
3017 self.lines.insert(0, None)
3018 self.index = 1 # index into self.lines of next line
3019 self.input_text = input_text
3021 def run(self, indent_size=DEFAULT_INDENT_SIZE):
3022 """Fix indentation and return modified line numbers.
3024 Line numbers are indexed at 1.
3026 """
3027 if indent_size < 1:
3028 return self.input_text
3030 try:
3031 stats = _reindent_stats(tokenize.generate_tokens(self.getline))
3032 except (SyntaxError, tokenize.TokenError):
3033 return self.input_text
3034 # Remove trailing empty lines.
3035 lines = self.lines
3036 # Sentinel.
3037 stats.append((len(lines), 0))
3038 # Map count of leading spaces to # we want.
3039 have2want = {}
3040 # Program after transformation.
3041 after = []
3042 # Copy over initial empty lines -- there's nothing to do until
3043 # we see a line with *something* on it.
3044 i = stats[0][0]
3045 after.extend(lines[1:i])
3046 for i in range(len(stats) - 1):
3047 thisstmt, thislevel = stats[i]
3048 nextstmt = stats[i + 1][0]
3049 have = _leading_space_count(lines[thisstmt])
3050 want = thislevel * indent_size
3051 if want < 0:
3052 # A comment line.
3053 if have:
3054 # An indented comment line. If we saw the same
3055 # indentation before, reuse what it most recently
3056 # mapped to.
3057 want = have2want.get(have, -1)
3058 if want < 0:
3059 # Then it probably belongs to the next real stmt.
3060 for j in range(i + 1, len(stats) - 1):
3061 jline, jlevel = stats[j]
3062 if jlevel >= 0:
3063 if have == _leading_space_count(lines[jline]):
3064 want = jlevel * indent_size
3065 break
3066 # Maybe it's a hanging comment like this one,
3067 if want < 0:
3068 # in which case we should shift it like its base
3069 # line got shifted.
3070 for j in range(i - 1, -1, -1):
3071 jline, jlevel = stats[j]
3072 if jlevel >= 0:
3073 want = (have + _leading_space_count(
3074 after[jline - 1]) -
3075 _leading_space_count(lines[jline]))
3076 break
3077 if want < 0:
3078 # Still no luck -- leave it alone.
3079 want = have
3080 else:
3081 want = 0
3082 assert want >= 0
3083 have2want[have] = want
3084 diff = want - have
3085 if diff == 0 or have == 0:
3086 after.extend(lines[thisstmt:nextstmt])
3087 else:
3088 for line_number, line in enumerate(lines[thisstmt:nextstmt],
3089 start=thisstmt):
3090 if line_number in self.string_content_line_numbers:
3091 after.append(line)
3092 elif diff > 0:
3093 if line == '\n':
3094 after.append(line)
3095 else:
3096 after.append(' ' * diff + line)
3097 else:
3098 remove = min(_leading_space_count(line), -diff)
3099 after.append(line[remove:])
3101 return ''.join(after)
3103 def getline(self):
3104 """Line-getter for tokenize."""
3105 if self.index >= len(self.lines):
3106 line = ''
3107 else:
3108 line = self.lines[self.index]
3109 self.index += 1
3110 return line
3113def _reindent_stats(tokens):
3114 """Return list of (lineno, indentlevel) pairs.
3116 One for each stmt and comment line. indentlevel is -1 for comment
3117 lines, as a signal that tokenize doesn't know what to do about them;
3118 indeed, they're our headache!
3120 """
3121 find_stmt = 1 # Next token begins a fresh stmt?
3122 level = 0 # Current indent level.
3123 stats = []
3125 for t in tokens:
3126 token_type = t[0]
3127 sline = t[2][0]
3128 line = t[4]
3130 if token_type == tokenize.NEWLINE:
3131 # A program statement, or ENDMARKER, will eventually follow,
3132 # after some (possibly empty) run of tokens of the form
3133 # (NL | COMMENT)* (INDENT | DEDENT+)?
3134 find_stmt = 1
3136 elif token_type == tokenize.INDENT:
3137 find_stmt = 1
3138 level += 1
3140 elif token_type == tokenize.DEDENT:
3141 find_stmt = 1
3142 level -= 1
3144 elif token_type == tokenize.COMMENT:
3145 if find_stmt:
3146 stats.append((sline, -1))
3147 # But we're still looking for a new stmt, so leave
3148 # find_stmt alone.
3150 elif token_type == tokenize.NL:
3151 pass
3153 elif find_stmt:
3154 # This is the first "real token" following a NEWLINE, so it
3155 # must be the first token of the next program statement, or an
3156 # ENDMARKER.
3157 find_stmt = 0
3158 if line: # Not endmarker.
3159 stats.append((sline, level))
3161 return stats
3164def _leading_space_count(line):
3165 """Return number of leading spaces in line."""
3166 i = 0
3167 while i < len(line) and line[i] == ' ':
3168 i += 1
3169 return i
3172def refactor_with_2to3(source_text, fixer_names, filename=''):
3173 """Use lib2to3 to refactor the source.
3175 Return the refactored source code.
3177 """
3178 from lib2to3.refactor import RefactoringTool
3179 fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
3180 tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
3182 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
3183 try:
3184 # The name parameter is necessary particularly for the "import" fixer.
3185 return str(tool.refactor_string(source_text, name=filename))
3186 except lib2to3_tokenize.TokenError:
3187 return source_text
3190def check_syntax(code):
3191 """Return True if syntax is okay."""
3192 try:
3193 return compile(code, '<string>', 'exec', dont_inherit=True)
3194 except (SyntaxError, TypeError, ValueError):
3195 return False
3198def find_with_line_numbers(pattern, contents):
3199 """A wrapper around 're.finditer' to find line numbers.
3201 Returns a list of line numbers where pattern was found in contents.
3202 """
3203 matches = list(re.finditer(pattern, contents))
3204 if not matches:
3205 return []
3207 end = matches[-1].start()
3209 # -1 so a failed `rfind` maps to the first line.
3210 newline_offsets = {
3211 -1: 0
3212 }
3213 for line_num, m in enumerate(re.finditer(r'\n', contents), 1):
3214 offset = m.start()
3215 if offset > end:
3216 break
3217 newline_offsets[offset] = line_num
3219 def get_line_num(match, contents):
3220 """Get the line number of string in a files contents.
3222 Failing to find the newline is OK, -1 maps to 0
3224 """
3225 newline_offset = contents.rfind('\n', 0, match.start())
3226 return newline_offsets[newline_offset]
3228 return [get_line_num(match, contents) + 1 for match in matches]
3231def get_disabled_ranges(source):
3232 """Returns a list of tuples representing the disabled ranges.
3234 If disabled and no re-enable will disable for rest of file.
3236 """
3237 enable_line_nums = find_with_line_numbers(ENABLE_REGEX, source)
3238 disable_line_nums = find_with_line_numbers(DISABLE_REGEX, source)
3239 total_lines = len(re.findall("\n", source)) + 1
3241 enable_commands = {}
3242 for num in enable_line_nums:
3243 enable_commands[num] = True
3244 for num in disable_line_nums:
3245 enable_commands[num] = False
3247 disabled_ranges = []
3248 currently_enabled = True
3249 disabled_start = None
3251 for line, commanded_enabled in sorted(enable_commands.items()):
3252 if commanded_enabled is False and currently_enabled is True:
3253 disabled_start = line
3254 currently_enabled = False
3255 elif commanded_enabled is True and currently_enabled is False:
3256 disabled_ranges.append((disabled_start, line))
3257 currently_enabled = True
3259 if currently_enabled is False:
3260 disabled_ranges.append((disabled_start, total_lines))
3262 return disabled_ranges
3265def filter_disabled_results(result, disabled_ranges):
3266 """Filter out reports based on tuple of disabled ranges.
3268 """
3269 line = result['line']
3270 for disabled_range in disabled_ranges:
3271 if disabled_range[0] <= line <= disabled_range[1]:
3272 return False
3273 return True
3276def filter_results(source, results, aggressive):
3277 """Filter out spurious reports from pycodestyle.
3279 If aggressive is True, we allow possibly unsafe fixes (E711, E712).
3281 """
3282 non_docstring_string_line_numbers = multiline_string_lines(
3283 source, include_docstrings=False)
3284 all_string_line_numbers = multiline_string_lines(
3285 source, include_docstrings=True)
3287 commented_out_code_line_numbers = commented_out_code_lines(source)
3289 # Filter out the disabled ranges
3290 disabled_ranges = get_disabled_ranges(source)
3291 if disabled_ranges:
3292 results = [
3293 result for result in results if filter_disabled_results(
3294 result,
3295 disabled_ranges,
3296 )
3297 ]
3299 has_e901 = any(result['id'].lower() == 'e901' for result in results)
3301 for r in results:
3302 issue_id = r['id'].lower()
3304 if r['line'] in non_docstring_string_line_numbers:
3305 if issue_id.startswith(('e1', 'e501', 'w191')):
3306 continue
3308 if r['line'] in all_string_line_numbers:
3309 if issue_id in ['e501']:
3310 continue
3312 # We must offset by 1 for lines that contain the trailing contents of
3313 # multiline strings.
3314 if not aggressive and (r['line'] + 1) in all_string_line_numbers:
3315 # Do not modify multiline strings in non-aggressive mode. Remove
3316 # trailing whitespace could break doctests.
3317 if issue_id.startswith(('w29', 'w39')):
3318 continue
3320 if aggressive <= 0:
3321 if issue_id.startswith(('e711', 'e72', 'w6')):
3322 continue
3324 if aggressive <= 1:
3325 if issue_id.startswith(('e712', 'e713', 'e714')):
3326 continue
3328 if aggressive <= 2:
3329 if issue_id.startswith(('e704')):
3330 continue
3332 if r['line'] in commented_out_code_line_numbers:
3333 if issue_id.startswith(('e261', 'e262', 'e501')):
3334 continue
3336 # Do not touch indentation if there is a token error caused by
3337 # incomplete multi-line statement. Otherwise, we risk screwing up the
3338 # indentation.
3339 if has_e901:
3340 if issue_id.startswith(('e1', 'e7')):
3341 continue
3343 yield r
3346def multiline_string_lines(source, include_docstrings=False):
3347 """Return line numbers that are within multiline strings.
3349 The line numbers are indexed at 1.
3351 Docstrings are ignored.
3353 """
3354 line_numbers = set()
3355 previous_token_type = ''
3356 try:
3357 for t in generate_tokens(source):
3358 token_type = t[0]
3359 start_row = t[2][0]
3360 end_row = t[3][0]
3362 if token_type == tokenize.STRING and start_row != end_row:
3363 if (
3364 include_docstrings or
3365 previous_token_type != tokenize.INDENT
3366 ):
3367 # We increment by one since we want the contents of the
3368 # string.
3369 line_numbers |= set(range(1 + start_row, 1 + end_row))
3371 previous_token_type = token_type
3372 except (SyntaxError, tokenize.TokenError):
3373 pass
3375 return line_numbers
3378def commented_out_code_lines(source):
3379 """Return line numbers of comments that are likely code.
3381 Commented-out code is bad practice, but modifying it just adds even
3382 more clutter.
3384 """
3385 line_numbers = []
3386 try:
3387 for t in generate_tokens(source):
3388 token_type = t[0]
3389 token_string = t[1]
3390 start_row = t[2][0]
3391 line = t[4]
3393 # Ignore inline comments.
3394 if not line.lstrip().startswith('#'):
3395 continue
3397 if token_type == tokenize.COMMENT:
3398 stripped_line = token_string.lstrip('#').strip()
3399 with warnings.catch_warnings():
3400 # ignore SyntaxWarning in Python3.8+
3401 # refs:
3402 # https://bugs.python.org/issue15248
3403 # https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes
3404 warnings.filterwarnings("ignore", category=SyntaxWarning)
3405 if (
3406 ' ' in stripped_line and
3407 '#' not in stripped_line and
3408 check_syntax(stripped_line)
3409 ):
3410 line_numbers.append(start_row)
3411 except (SyntaxError, tokenize.TokenError):
3412 pass
3414 return line_numbers
3417def shorten_comment(line, max_line_length, last_comment=False):
3418 """Return trimmed or split long comment line.
3420 If there are no comments immediately following it, do a text wrap.
3421 Doing this wrapping on all comments in general would lead to jagged
3422 comment text.
3424 """
3425 assert len(line) > max_line_length
3426 line = line.rstrip()
3428 # PEP 8 recommends 72 characters for comment text.
3429 indentation = _get_indentation(line) + '# '
3430 max_line_length = min(max_line_length,
3431 len(indentation) + 72)
3433 MIN_CHARACTER_REPEAT = 5
3434 if (
3435 len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
3436 not line[-1].isalnum()
3437 ):
3438 # Trim comments that end with things like ---------
3439 return line[:max_line_length] + '\n'
3440 elif last_comment and re.match(r'\s*#+\s*\w+', line):
3441 split_lines = textwrap.wrap(line.lstrip(' \t#'),
3442 initial_indent=indentation,
3443 subsequent_indent=indentation,
3444 width=max_line_length,
3445 break_long_words=False,
3446 break_on_hyphens=False)
3447 return '\n'.join(split_lines) + '\n'
3449 return line + '\n'
3452def normalize_line_endings(lines, newline):
3453 """Return fixed line endings.
3455 All lines will be modified to use the most common line ending.
3456 """
3457 line = [line.rstrip('\n\r') + newline for line in lines]
3458 if line and lines[-1] == lines[-1].rstrip('\n\r'):
3459 line[-1] = line[-1].rstrip('\n\r')
3460 return line
3463def mutual_startswith(a, b):
3464 return b.startswith(a) or a.startswith(b)
3467def code_match(code, select, ignore):
3468 if ignore:
3469 assert not isinstance(ignore, str)
3470 for ignored_code in [c.strip() for c in ignore]:
3471 if mutual_startswith(code.lower(), ignored_code.lower()):
3472 return False
3474 if select:
3475 assert not isinstance(select, str)
3476 for selected_code in [c.strip() for c in select]:
3477 if mutual_startswith(code.lower(), selected_code.lower()):
3478 return True
3479 return False
3481 return True
3484def fix_code(source, options=None, encoding=None, apply_config=False):
3485 """Return fixed source code.
3487 "encoding" will be used to decode "source" if it is a byte string.
3489 """
3490 options = _get_options(options, apply_config)
3491 # normalize
3492 options.ignore = [opt.upper() for opt in options.ignore]
3493 options.select = [opt.upper() for opt in options.select]
3495 # check ignore args
3496 # NOTE: If W50x is not included, add W50x because the code
3497 # correction result is indefinite.
3498 ignore_opt = options.ignore
3499 if not {"W50", "W503", "W504"} & set(ignore_opt):
3500 options.ignore.append("W50")
3502 if not isinstance(source, str):
3503 source = source.decode(encoding or get_encoding())
3505 sio = io.StringIO(source)
3506 return fix_lines(sio.readlines(), options=options)
3509def _get_options(raw_options, apply_config):
3510 """Return parsed options."""
3511 if not raw_options:
3512 return parse_args([''], apply_config=apply_config)
3514 if isinstance(raw_options, dict):
3515 options = parse_args([''], apply_config=apply_config)
3516 for name, value in raw_options.items():
3517 if not hasattr(options, name):
3518 raise ValueError("No such option '{}'".format(name))
3520 # Check for very basic type errors.
3521 expected_type = type(getattr(options, name))
3522 if not isinstance(expected_type, (str, )):
3523 if isinstance(value, (str, )):
3524 raise ValueError(
3525 "Option '{}' should not be a string".format(name))
3526 setattr(options, name, value)
3527 else:
3528 options = raw_options
3530 return options
3533def fix_lines(source_lines, options, filename=''):
3534 """Return fixed source code."""
3535 # Transform everything to line feed. Then change them back to original
3536 # before returning fixed source code.
3537 original_newline = find_newline(source_lines)
3538 tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
3540 # Keep a history to break out of cycles.
3541 previous_hashes = set()
3543 if options.line_range:
3544 # Disable "apply_local_fixes()" for now due to issue #175.
3545 fixed_source = tmp_source
3546 else:
3547 # Apply global fixes only once (for efficiency).
3548 fixed_source = apply_global_fixes(tmp_source,
3549 options,
3550 filename=filename)
3552 passes = 0
3553 long_line_ignore_cache = set()
3554 while hash(fixed_source) not in previous_hashes:
3555 if options.pep8_passes >= 0 and passes > options.pep8_passes:
3556 break
3557 passes += 1
3559 previous_hashes.add(hash(fixed_source))
3561 tmp_source = copy.copy(fixed_source)
3563 fix = FixPEP8(
3564 filename,
3565 options,
3566 contents=tmp_source,
3567 long_line_ignore_cache=long_line_ignore_cache)
3569 fixed_source = fix.fix()
3571 sio = io.StringIO(fixed_source)
3572 return ''.join(normalize_line_endings(sio.readlines(), original_newline))
3575def fix_file(filename, options=None, output=None, apply_config=False):
3576 if not options:
3577 options = parse_args([filename], apply_config=apply_config)
3579 original_source = readlines_from_file(filename)
3581 fixed_source = original_source
3583 if options.in_place or options.diff or output:
3584 encoding = detect_encoding(filename)
3586 if output:
3587 output = LineEndingWrapper(wrap_output(output, encoding=encoding))
3589 fixed_source = fix_lines(fixed_source, options, filename=filename)
3591 if options.diff:
3592 new = io.StringIO(fixed_source)
3593 new = new.readlines()
3594 diff = get_diff_text(original_source, new, filename)
3595 if output:
3596 output.write(diff)
3597 output.flush()
3598 elif options.jobs > 1:
3599 diff = diff.encode(encoding)
3600 return diff
3601 elif options.in_place:
3602 original = "".join(original_source).splitlines()
3603 fixed = fixed_source.splitlines()
3604 original_source_last_line = (
3605 original_source[-1].split("\n")[-1] if original_source else ""
3606 )
3607 fixed_source_last_line = fixed_source.split("\n")[-1]
3608 if original != fixed or (
3609 original_source_last_line != fixed_source_last_line
3610 ):
3611 with open_with_encoding(filename, 'w', encoding=encoding) as fp:
3612 fp.write(fixed_source)
3613 return fixed_source
3614 return None
3615 else:
3616 if output:
3617 output.write(fixed_source)
3618 output.flush()
3619 return fixed_source
3622def global_fixes():
3623 """Yield multiple (code, function) tuples."""
3624 for function in list(globals().values()):
3625 if inspect.isfunction(function):
3626 arguments = _get_parameters(function)
3627 if arguments[:1] != ['source']:
3628 continue
3630 code = extract_code_from_function(function)
3631 if code:
3632 yield (code, function)
3635def _get_parameters(function):
3636 # pylint: disable=deprecated-method
3637 if sys.version_info.major >= 3:
3638 # We need to match "getargspec()", which includes "self" as the first
3639 # value for methods.
3640 # https://bugs.python.org/issue17481#msg209469
3641 if inspect.ismethod(function):
3642 function = function.__func__
3644 return list(inspect.signature(function).parameters)
3645 else:
3646 return inspect.getargspec(function)[0]
3649def apply_global_fixes(source, options, where='global', filename='',
3650 codes=None):
3651 """Run global fixes on source code.
3653 These are fixes that only need be done once (unlike those in
3654 FixPEP8, which are dependent on pycodestyle).
3656 """
3657 if codes is None:
3658 codes = []
3659 if any(code_match(code, select=options.select, ignore=options.ignore)
3660 for code in ['E101', 'E111']):
3661 source = reindent(
3662 source,
3663 indent_size=options.indent_size,
3664 leave_tabs=not (
3665 code_match(
3666 'W191',
3667 select=options.select,
3668 ignore=options.ignore
3669 )
3670 )
3671 )
3673 for (code, function) in global_fixes():
3674 if code_match(code, select=options.select, ignore=options.ignore):
3675 if options.verbose:
3676 print('---> Applying {} fix for {}'.format(where,
3677 code.upper()),
3678 file=sys.stderr)
3679 source = function(source,
3680 aggressive=options.aggressive)
3682 source = fix_2to3(source,
3683 aggressive=options.aggressive,
3684 select=options.select,
3685 ignore=options.ignore,
3686 filename=filename,
3687 where=where,
3688 verbose=options.verbose)
3690 return source
3693def extract_code_from_function(function):
3694 """Return code handled by function."""
3695 if not function.__name__.startswith('fix_'):
3696 return None
3698 code = re.sub('^fix_', '', function.__name__)
3699 if not code:
3700 return None
3702 try:
3703 int(code[1:])
3704 except ValueError:
3705 return None
3707 return code
3710def _get_package_version():
3711 packages = ["pycodestyle: {}".format(pycodestyle.__version__)]
3712 return ", ".join(packages)
3715def create_parser():
3716 """Return command-line parser."""
3717 parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
3718 prog='autopep8')
3719 parser.add_argument('--version', action='version',
3720 version='%(prog)s {} ({})'.format(
3721 __version__, _get_package_version()))
3722 parser.add_argument('-v', '--verbose', action='count',
3723 default=0,
3724 help='print verbose messages; '
3725 'multiple -v result in more verbose messages')
3726 parser.add_argument('-d', '--diff', action='store_true',
3727 help='print the diff for the fixed source')
3728 parser.add_argument('-i', '--in-place', action='store_true',
3729 help='make changes to files in place')
3730 parser.add_argument('--global-config', metavar='filename',
3731 default=DEFAULT_CONFIG,
3732 help='path to a global pep8 config file; if this file '
3733 'does not exist then this is ignored '
3734 '(default: {})'.format(DEFAULT_CONFIG))
3735 parser.add_argument('--ignore-local-config', action='store_true',
3736 help="don't look for and apply local config files; "
3737 'if not passed, defaults are updated with any '
3738 "config files in the project's root directory")
3739 parser.add_argument('-r', '--recursive', action='store_true',
3740 help='run recursively over directories; '
3741 'must be used with --in-place or --diff')
3742 parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
3743 help='number of parallel jobs; '
3744 'match CPU count if value is less than 1')
3745 parser.add_argument('-p', '--pep8-passes', metavar='n',
3746 default=-1, type=int,
3747 help='maximum number of additional pep8 passes '
3748 '(default: infinite)')
3749 parser.add_argument('-a', '--aggressive', action='count', default=0,
3750 help='enable non-whitespace changes; '
3751 'multiple -a result in more aggressive changes')
3752 parser.add_argument('--experimental', action='store_true',
3753 help='enable experimental fixes')
3754 parser.add_argument('--exclude', metavar='globs',
3755 help='exclude file/directory names that match these '
3756 'comma-separated globs')
3757 parser.add_argument('--list-fixes', action='store_true',
3758 help='list codes for fixes; '
3759 'used by --ignore and --select')
3760 parser.add_argument('--ignore', metavar='errors', default='',
3761 help='do not fix these errors/warnings '
3762 '(default: {})'.format(DEFAULT_IGNORE))
3763 parser.add_argument('--select', metavar='errors', default='',
3764 help='fix only these errors/warnings (e.g. E4,W)')
3765 parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
3766 help='set maximum allowed line length '
3767 '(default: %(default)s)')
3768 parser.add_argument('--line-range', '--range', metavar='line',
3769 default=None, type=int, nargs=2,
3770 help='only fix errors found within this inclusive '
3771 'range of line numbers (e.g. 1 99); '
3772 'line numbers are indexed at 1')
3773 parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
3774 type=int, help=argparse.SUPPRESS)
3775 parser.add_argument('--hang-closing', action='store_true',
3776 help='hang-closing option passed to pycodestyle')
3777 parser.add_argument('--exit-code', action='store_true',
3778 help='change to behavior of exit code.'
3779 ' default behavior of return value, 0 is no '
3780 'differences, 1 is error exit. return 2 when'
3781 ' add this option. 2 is exists differences.')
3782 parser.add_argument('files', nargs='*',
3783 help="files to format or '-' for standard in")
3785 return parser
3788def _expand_codes(codes, ignore_codes):
3789 """expand to individual E/W codes"""
3790 ret = set()
3792 is_conflict = False
3793 if all(
3794 any(
3795 conflicting_code.startswith(code)
3796 for code in codes
3797 )
3798 for conflicting_code in CONFLICTING_CODES
3799 ):
3800 is_conflict = True
3802 is_ignore_w503 = "W503" in ignore_codes
3803 is_ignore_w504 = "W504" in ignore_codes
3805 for code in codes:
3806 if code == "W":
3807 if is_ignore_w503 and is_ignore_w504:
3808 ret.update({"W1", "W2", "W3", "W505", "W6"})
3809 elif is_ignore_w503:
3810 ret.update({"W1", "W2", "W3", "W504", "W505", "W6"})
3811 else:
3812 ret.update({"W1", "W2", "W3", "W503", "W505", "W6"})
3813 elif code in ("W5", "W50"):
3814 if is_ignore_w503 and is_ignore_w504:
3815 ret.update({"W505"})
3816 elif is_ignore_w503:
3817 ret.update({"W504", "W505"})
3818 else:
3819 ret.update({"W503", "W505"})
3820 elif not (code in ("W503", "W504") and is_conflict):
3821 ret.add(code)
3823 return ret
3826def parse_args(arguments, apply_config=False):
3827 """Parse command-line options."""
3828 parser = create_parser()
3829 args = parser.parse_args(arguments)
3831 if not args.files and not args.list_fixes:
3832 parser.exit(EXIT_CODE_ARGPARSE_ERROR, 'incorrect number of arguments')
3834 args.files = [decode_filename(name) for name in args.files]
3836 if apply_config:
3837 parser = read_config(args, parser)
3838 # prioritize settings when exist pyproject.toml's tool.autopep8 section
3839 try:
3840 parser_with_pyproject_toml = read_pyproject_toml(args, parser)
3841 except Exception:
3842 parser_with_pyproject_toml = None
3843 if parser_with_pyproject_toml:
3844 parser = parser_with_pyproject_toml
3845 args = parser.parse_args(arguments)
3846 args.files = [decode_filename(name) for name in args.files]
3848 if '-' in args.files:
3849 if len(args.files) > 1:
3850 parser.exit(
3851 EXIT_CODE_ARGPARSE_ERROR,
3852 'cannot mix stdin and regular files',
3853 )
3855 if args.diff:
3856 parser.exit(
3857 EXIT_CODE_ARGPARSE_ERROR,
3858 '--diff cannot be used with standard input',
3859 )
3861 if args.in_place:
3862 parser.exit(
3863 EXIT_CODE_ARGPARSE_ERROR,
3864 '--in-place cannot be used with standard input',
3865 )
3867 if args.recursive:
3868 parser.exit(
3869 EXIT_CODE_ARGPARSE_ERROR,
3870 '--recursive cannot be used with standard input',
3871 )
3873 if len(args.files) > 1 and not (args.in_place or args.diff):
3874 parser.exit(
3875 EXIT_CODE_ARGPARSE_ERROR,
3876 'autopep8 only takes one filename as argument '
3877 'unless the "--in-place" or "--diff" args are used',
3878 )
3880 if args.recursive and not (args.in_place or args.diff):
3881 parser.exit(
3882 EXIT_CODE_ARGPARSE_ERROR,
3883 '--recursive must be used with --in-place or --diff',
3884 )
3886 if args.in_place and args.diff:
3887 parser.exit(
3888 EXIT_CODE_ARGPARSE_ERROR,
3889 '--in-place and --diff are mutually exclusive',
3890 )
3892 if args.max_line_length <= 0:
3893 parser.exit(
3894 EXIT_CODE_ARGPARSE_ERROR,
3895 '--max-line-length must be greater than 0',
3896 )
3898 if args.indent_size <= 0:
3899 parser.exit(
3900 EXIT_CODE_ARGPARSE_ERROR,
3901 '--indent-size must be greater than 0',
3902 )
3904 if args.select:
3905 args.select = _expand_codes(
3906 _split_comma_separated(args.select),
3907 (_split_comma_separated(args.ignore) if args.ignore else [])
3908 )
3910 if args.ignore:
3911 args.ignore = _split_comma_separated(args.ignore)
3912 if all(
3913 not any(
3914 conflicting_code.startswith(ignore_code)
3915 for ignore_code in args.ignore
3916 )
3917 for conflicting_code in CONFLICTING_CODES
3918 ):
3919 args.ignore.update(CONFLICTING_CODES)
3920 elif not args.select:
3921 if args.aggressive:
3922 # Enable everything by default if aggressive.
3923 args.select = {'E', 'W1', 'W2', 'W3', 'W6'}
3924 else:
3925 args.ignore = _split_comma_separated(DEFAULT_IGNORE)
3927 if args.exclude:
3928 args.exclude = _split_comma_separated(args.exclude)
3929 else:
3930 args.exclude = {}
3932 if args.jobs < 1:
3933 # Do not import multiprocessing globally in case it is not supported
3934 # on the platform.
3935 import multiprocessing
3936 args.jobs = multiprocessing.cpu_count()
3938 if args.jobs > 1 and not (args.in_place or args.diff):
3939 parser.exit(
3940 EXIT_CODE_ARGPARSE_ERROR,
3941 'parallel jobs requires --in-place',
3942 )
3944 if args.line_range:
3945 if args.line_range[0] <= 0:
3946 parser.exit(
3947 EXIT_CODE_ARGPARSE_ERROR,
3948 '--range must be positive numbers',
3949 )
3950 if args.line_range[0] > args.line_range[1]:
3951 parser.exit(
3952 EXIT_CODE_ARGPARSE_ERROR,
3953 'First value of --range should be less than or equal '
3954 'to the second',
3955 )
3957 return args
3960def _get_normalize_options(args, config, section, option_list):
3961 for (k, v) in config.items(section):
3962 norm_opt = k.lstrip('-').replace('-', '_')
3963 if not option_list.get(norm_opt):
3964 continue
3965 opt_type = option_list[norm_opt]
3966 if opt_type is int:
3967 if v.strip() == "auto":
3968 # skip to special case
3969 if args.verbose:
3970 print(f"ignore config: {k}={v}")
3971 continue
3972 value = config.getint(section, k)
3973 elif opt_type is bool:
3974 value = config.getboolean(section, k)
3975 else:
3976 value = config.get(section, k)
3977 yield norm_opt, k, value
3980def read_config(args, parser):
3981 """Read both user configuration and local configuration."""
3982 config = SafeConfigParser()
3984 try:
3985 if args.verbose and os.path.exists(args.global_config):
3986 print("read config path: {}".format(args.global_config))
3987 config.read(args.global_config)
3989 if not args.ignore_local_config:
3990 parent = tail = args.files and os.path.abspath(
3991 os.path.commonprefix(args.files))
3992 while tail:
3993 if config.read([os.path.join(parent, fn)
3994 for fn in PROJECT_CONFIG]):
3995 if args.verbose:
3996 for fn in PROJECT_CONFIG:
3997 config_file = os.path.join(parent, fn)
3998 if not os.path.exists(config_file):
3999 continue
4000 print(
4001 "read config path: {}".format(
4002 os.path.join(parent, fn)
4003 )
4004 )
4005 break
4006 (parent, tail) = os.path.split(parent)
4008 defaults = {}
4009 option_list = {o.dest: o.type or type(o.default)
4010 for o in parser._actions}
4012 for section in ['pep8', 'pycodestyle', 'flake8']:
4013 if not config.has_section(section):
4014 continue
4015 for norm_opt, k, value in _get_normalize_options(
4016 args, config, section, option_list
4017 ):
4018 if args.verbose:
4019 print("enable config: section={}, key={}, value={}".format(
4020 section, k, value))
4021 defaults[norm_opt] = value
4023 parser.set_defaults(**defaults)
4024 except Error:
4025 # Ignore for now.
4026 pass
4028 return parser
4031def read_pyproject_toml(args, parser):
4032 """Read pyproject.toml and load configuration."""
4033 if sys.version_info >= (3, 11):
4034 import tomllib
4035 else:
4036 import tomli as tomllib
4038 config = None
4040 if os.path.exists(args.global_config):
4041 with open(args.global_config, "rb") as fp:
4042 config = tomllib.load(fp)
4044 if not args.ignore_local_config:
4045 parent = tail = args.files and os.path.abspath(
4046 os.path.commonprefix(args.files))
4047 while tail:
4048 pyproject_toml = os.path.join(parent, "pyproject.toml")
4049 if os.path.exists(pyproject_toml):
4050 with open(pyproject_toml, "rb") as fp:
4051 config = tomllib.load(fp)
4052 break
4053 (parent, tail) = os.path.split(parent)
4055 if not config:
4056 return None
4058 if config.get("tool", {}).get("autopep8") is None:
4059 return None
4061 config = config.get("tool").get("autopep8")
4063 defaults = {}
4064 option_list = {o.dest: o.type or type(o.default)
4065 for o in parser._actions}
4067 TUPLED_OPTIONS = ("ignore", "select")
4068 for (k, v) in config.items():
4069 norm_opt = k.lstrip('-').replace('-', '_')
4070 if not option_list.get(norm_opt):
4071 continue
4072 if type(v) in (list, tuple) and norm_opt in TUPLED_OPTIONS:
4073 value = ",".join(v)
4074 else:
4075 value = v
4076 if args.verbose:
4077 print("enable pyproject.toml config: "
4078 "key={}, value={}".format(k, value))
4079 defaults[norm_opt] = value
4081 if defaults:
4082 # set value when exists key-value in defaults dict
4083 parser.set_defaults(**defaults)
4085 return parser
4088def _split_comma_separated(string):
4089 """Return a set of strings."""
4090 return {text.strip() for text in string.split(',') if text.strip()}
4093def decode_filename(filename):
4094 """Return Unicode filename."""
4095 if isinstance(filename, str):
4096 return filename
4098 return filename.decode(sys.getfilesystemencoding())
4101def supported_fixes():
4102 """Yield pep8 error codes that autopep8 fixes.
4104 Each item we yield is a tuple of the code followed by its
4105 description.
4107 """
4108 yield ('E101', docstring_summary(reindent.__doc__))
4110 instance = FixPEP8(filename=None, options=None, contents='')
4111 for attribute in dir(instance):
4112 code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
4113 if code:
4114 yield (
4115 code.group(1).upper(),
4116 re.sub(r'\s+', ' ',
4117 docstring_summary(getattr(instance, attribute).__doc__))
4118 )
4120 for (code, function) in sorted(global_fixes()):
4121 yield (code.upper() + (4 - len(code)) * ' ',
4122 re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
4124 for code in sorted(CODE_TO_2TO3):
4125 yield (code.upper() + (4 - len(code)) * ' ',
4126 re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
4129def docstring_summary(docstring):
4130 """Return summary of docstring."""
4131 return docstring.split('\n')[0] if docstring else ''
4134def line_shortening_rank(candidate, indent_word, max_line_length,
4135 experimental=False):
4136 """Return rank of candidate.
4138 This is for sorting candidates.
4140 """
4141 if not candidate.strip():
4142 return 0
4144 rank = 0
4145 lines = candidate.rstrip().split('\n')
4147 offset = 0
4148 if (
4149 not lines[0].lstrip().startswith('#') and
4150 lines[0].rstrip()[-1] not in '([{'
4151 ):
4152 for (opening, closing) in ('()', '[]', '{}'):
4153 # Don't penalize empty containers that aren't split up. Things like
4154 # this "foo(\n )" aren't particularly good.
4155 opening_loc = lines[0].find(opening)
4156 closing_loc = lines[0].find(closing)
4157 if opening_loc >= 0:
4158 if closing_loc < 0 or closing_loc != opening_loc + 1:
4159 offset = max(offset, 1 + opening_loc)
4161 current_longest = max(offset + len(x.strip()) for x in lines)
4163 rank += 4 * max(0, current_longest - max_line_length)
4165 rank += len(lines)
4167 # Too much variation in line length is ugly.
4168 rank += 2 * standard_deviation(len(line) for line in lines)
4170 bad_staring_symbol = {
4171 '(': ')',
4172 '[': ']',
4173 '{': '}'}.get(lines[0][-1])
4175 if len(lines) > 1:
4176 if (
4177 bad_staring_symbol and
4178 lines[1].lstrip().startswith(bad_staring_symbol)
4179 ):
4180 rank += 20
4182 for lineno, current_line in enumerate(lines):
4183 current_line = current_line.strip()
4185 if current_line.startswith('#'):
4186 continue
4188 for bad_start in ['.', '%', '+', '-', '/']:
4189 if current_line.startswith(bad_start):
4190 rank += 100
4192 # Do not tolerate operators on their own line.
4193 if current_line == bad_start:
4194 rank += 1000
4196 if (
4197 current_line.endswith(('.', '%', '+', '-', '/')) and
4198 "': " in current_line
4199 ):
4200 rank += 1000
4202 if current_line.endswith(('(', '[', '{', '.')):
4203 # Avoid lonely opening. They result in longer lines.
4204 if len(current_line) <= len(indent_word):
4205 rank += 100
4207 # Avoid the ugliness of ", (\n".
4208 if (
4209 current_line.endswith('(') and
4210 current_line[:-1].rstrip().endswith(',')
4211 ):
4212 rank += 100
4214 # Avoid the ugliness of "something[\n" and something[index][\n.
4215 if (
4216 current_line.endswith('[') and
4217 len(current_line) > 1 and
4218 (current_line[-2].isalnum() or current_line[-2] in ']')
4219 ):
4220 rank += 300
4222 # Also avoid the ugliness of "foo.\nbar"
4223 if current_line.endswith('.'):
4224 rank += 100
4226 if has_arithmetic_operator(current_line):
4227 rank += 100
4229 # Avoid breaking at unary operators.
4230 if re.match(r'.*[(\[{]\s*[\-\+~]$', current_line.rstrip('\\ ')):
4231 rank += 1000
4233 if re.match(r'.*lambda\s*\*$', current_line.rstrip('\\ ')):
4234 rank += 1000
4236 if current_line.endswith(('%', '(', '[', '{')):
4237 rank -= 20
4239 # Try to break list comprehensions at the "for".
4240 if current_line.startswith('for '):
4241 rank -= 50
4243 if current_line.endswith('\\'):
4244 # If a line ends in \-newline, it may be part of a
4245 # multiline string. In that case, we would like to know
4246 # how long that line is without the \-newline. If it's
4247 # longer than the maximum, or has comments, then we assume
4248 # that the \-newline is an okay candidate and only
4249 # penalize it a bit.
4250 total_len = len(current_line)
4251 lineno += 1
4252 while lineno < len(lines):
4253 total_len += len(lines[lineno])
4255 if lines[lineno].lstrip().startswith('#'):
4256 total_len = max_line_length
4257 break
4259 if not lines[lineno].endswith('\\'):
4260 break
4262 lineno += 1
4264 if total_len < max_line_length:
4265 rank += 10
4266 else:
4267 rank += 100 if experimental else 1
4269 # Prefer breaking at commas rather than colon.
4270 if ',' in current_line and current_line.endswith(':'):
4271 rank += 10
4273 # Avoid splitting dictionaries between key and value.
4274 if current_line.endswith(':'):
4275 rank += 100
4277 rank += 10 * count_unbalanced_brackets(current_line)
4279 return max(0, rank)
4282def standard_deviation(numbers):
4283 """Return standard deviation."""
4284 numbers = list(numbers)
4285 if not numbers:
4286 return 0
4287 mean = sum(numbers) / len(numbers)
4288 return (sum((n - mean) ** 2 for n in numbers) /
4289 len(numbers)) ** .5
4292def has_arithmetic_operator(line):
4293 """Return True if line contains any arithmetic operators."""
4294 for operator in pycodestyle.ARITHMETIC_OP:
4295 if operator in line:
4296 return True
4298 return False
4301def count_unbalanced_brackets(line):
4302 """Return number of unmatched open/close brackets."""
4303 count = 0
4304 for opening, closing in ['()', '[]', '{}']:
4305 count += abs(line.count(opening) - line.count(closing))
4307 return count
4310def split_at_offsets(line, offsets):
4311 """Split line at offsets.
4313 Return list of strings.
4315 """
4316 result = []
4318 previous_offset = 0
4319 current_offset = 0
4320 for current_offset in sorted(offsets):
4321 if current_offset < len(line) and previous_offset != current_offset:
4322 result.append(line[previous_offset:current_offset].strip())
4323 previous_offset = current_offset
4325 result.append(line[current_offset:])
4327 return result
4330class LineEndingWrapper(object):
4332 r"""Replace line endings to work with sys.stdout.
4334 It seems that sys.stdout expects only '\n' as the line ending, no matter
4335 the platform. Otherwise, we get repeated line endings.
4337 """
4339 def __init__(self, output):
4340 self.__output = output
4342 def write(self, s):
4343 self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
4345 def flush(self):
4346 self.__output.flush()
4349def match_file(filename, exclude):
4350 """Return True if file is okay for modifying/recursing."""
4351 base_name = os.path.basename(filename)
4353 if base_name.startswith('.'):
4354 return False
4356 for pattern in exclude:
4357 if fnmatch.fnmatch(base_name, pattern):
4358 return False
4359 if fnmatch.fnmatch(filename, pattern):
4360 return False
4362 if not os.path.isdir(filename) and not is_python_file(filename):
4363 return False
4365 return True
4368def find_files(filenames, recursive, exclude):
4369 """Yield filenames."""
4370 while filenames:
4371 name = filenames.pop(0)
4372 if recursive and os.path.isdir(name):
4373 for root, directories, children in os.walk(name):
4374 filenames += [os.path.join(root, f) for f in children
4375 if match_file(os.path.join(root, f),
4376 exclude)]
4377 directories[:] = [d for d in directories
4378 if match_file(os.path.join(root, d),
4379 exclude)]
4380 else:
4381 is_exclude_match = False
4382 for pattern in exclude:
4383 if fnmatch.fnmatch(name, pattern):
4384 is_exclude_match = True
4385 break
4386 if not is_exclude_match:
4387 yield name
4390def _fix_file(parameters):
4391 """Helper function for optionally running fix_file() in parallel."""
4392 if parameters[1].verbose:
4393 print('[file:{}]'.format(parameters[0]), file=sys.stderr)
4394 try:
4395 return fix_file(*parameters)
4396 except IOError as error:
4397 print(str(error), file=sys.stderr)
4398 raise error
4401def fix_multiple_files(filenames, options, output=None):
4402 """Fix list of files.
4404 Optionally fix files recursively.
4406 """
4407 results = []
4408 filenames = find_files(filenames, options.recursive, options.exclude)
4409 if options.jobs > 1:
4410 import multiprocessing
4411 pool = multiprocessing.Pool(options.jobs)
4412 rets = []
4413 for name in filenames:
4414 ret = pool.apply_async(_fix_file, ((name, options),))
4415 rets.append(ret)
4416 pool.close()
4417 pool.join()
4418 if options.diff:
4419 for r in rets:
4420 sys.stdout.write(r.get().decode())
4421 sys.stdout.flush()
4422 results.extend([x.get() for x in rets if x is not None])
4423 else:
4424 for name in filenames:
4425 ret = _fix_file((name, options, output))
4426 if ret is None:
4427 continue
4428 if options.diff:
4429 if ret != '':
4430 results.append(ret)
4431 elif options.in_place:
4432 results.append(ret)
4433 else:
4434 original_source = readlines_from_file(name)
4435 if "".join(original_source).splitlines() != ret.splitlines():
4436 results.append(ret)
4437 return results
4440def is_python_file(filename):
4441 """Return True if filename is Python file."""
4442 if filename.endswith('.py'):
4443 return True
4445 try:
4446 with open_with_encoding(
4447 filename,
4448 limit_byte_check=MAX_PYTHON_FILE_DETECTION_BYTES) as f:
4449 text = f.read(MAX_PYTHON_FILE_DETECTION_BYTES)
4450 if not text:
4451 return False
4452 first_line = text.splitlines()[0]
4453 except (IOError, IndexError):
4454 return False
4456 if not PYTHON_SHEBANG_REGEX.match(first_line):
4457 return False
4459 return True
4462def is_probably_part_of_multiline(line):
4463 """Return True if line is likely part of a multiline string.
4465 When multiline strings are involved, pep8 reports the error as being
4466 at the start of the multiline string, which doesn't work for us.
4468 """
4469 return (
4470 '"""' in line or
4471 "'''" in line or
4472 line.rstrip().endswith('\\')
4473 )
4476def wrap_output(output, encoding):
4477 """Return output with specified encoding."""
4478 return codecs.getwriter(encoding)(output.buffer
4479 if hasattr(output, 'buffer')
4480 else output)
4483def get_encoding():
4484 """Return preferred encoding."""
4485 return locale.getpreferredencoding() or sys.getdefaultencoding()
4488def main(argv=None, apply_config=True):
4489 """Command-line entry."""
4490 if argv is None:
4491 argv = sys.argv
4493 try:
4494 # Exit on broken pipe.
4495 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
4496 except AttributeError: # pragma: no cover
4497 # SIGPIPE is not available on Windows.
4498 pass
4500 try:
4501 args = parse_args(argv[1:], apply_config=apply_config)
4503 if args.list_fixes:
4504 for code, description in sorted(supported_fixes()):
4505 print('{code} - {description}'.format(
4506 code=code, description=description))
4507 return EXIT_CODE_OK
4509 if args.files == ['-']:
4510 assert not args.in_place
4512 encoding = sys.stdin.encoding or get_encoding()
4513 read_stdin = sys.stdin.read()
4514 fixed_stdin = fix_code(read_stdin, args, encoding=encoding)
4516 # LineEndingWrapper is unnecessary here due to the symmetry between
4517 # standard in and standard out.
4518 wrap_output(sys.stdout, encoding=encoding).write(fixed_stdin)
4520 if hash(read_stdin) != hash(fixed_stdin):
4521 if args.exit_code:
4522 return EXIT_CODE_EXISTS_DIFF
4523 else:
4524 if args.in_place or args.diff:
4525 args.files = list(set(args.files))
4526 else:
4527 assert len(args.files) == 1
4528 assert not args.recursive
4530 results = fix_multiple_files(args.files, args, sys.stdout)
4531 if args.diff:
4532 ret = any([len(ret) != 0 for ret in results])
4533 else:
4534 # with in-place option
4535 ret = any([ret is not None for ret in results])
4536 if args.exit_code and ret:
4537 return EXIT_CODE_EXISTS_DIFF
4538 except IOError:
4539 return EXIT_CODE_ERROR
4540 except KeyboardInterrupt:
4541 return EXIT_CODE_ERROR # pragma: no cover
4544class CachedTokenizer(object):
4546 """A one-element cache around tokenize.generate_tokens().
4548 Original code written by Ned Batchelder, in coverage.py.
4550 """
4552 def __init__(self):
4553 self.last_text = None
4554 self.last_tokens = None
4556 def generate_tokens(self, text):
4557 """A stand-in for tokenize.generate_tokens()."""
4558 if text != self.last_text:
4559 string_io = io.StringIO(text)
4560 self.last_tokens = list(
4561 tokenize.generate_tokens(string_io.readline)
4562 )
4563 self.last_text = text
4564 return self.last_tokens
4567_cached_tokenizer = CachedTokenizer()
4568generate_tokens = _cached_tokenizer.generate_tokens
4571if __name__ == '__main__':
4572 sys.exit(main())