Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/autopep8.py: 54%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
3# Copyright (C) 2010-2011 Hideo Hattori
4# Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
5# Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
6#
7# Permission is hereby granted, free of charge, to any person obtaining
8# a copy of this software and associated documentation files (the
9# "Software"), to deal in the Software without restriction, including
10# without limitation the rights to use, copy, modify, merge, publish,
11# distribute, sublicense, and/or sell copies of the Software, and to
12# permit persons to whom the Software is furnished to do so, subject to
13# the following conditions:
14#
15# The above copyright notice and this permission notice shall be
16# included in all copies or substantial portions of the Software.
17#
18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25# SOFTWARE.
27# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
28# Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
29#
30# Permission is hereby granted, free of charge, to any person
31# obtaining a copy of this software and associated documentation files
32# (the "Software"), to deal in the Software without restriction,
33# including without limitation the rights to use, copy, modify, merge,
34# publish, distribute, sublicense, and/or sell copies of the Software,
35# and to permit persons to whom the Software is furnished to do so,
36# subject to the following conditions:
37#
38# The above copyright notice and this permission notice shall be
39# included in all copies or substantial portions of the Software.
40#
41# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
42# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
44# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
45# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
46# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
47# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
48# SOFTWARE.
50"""Automatically formats Python code to conform to the PEP 8 style guide.
52Fixes that only need be done once can be added by adding a function of the form
53"fix_<code>(source)" to this module. They should return the fixed source code.
54These fixes are picked up by apply_global_fixes().
56Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the
57class documentation for more information.
59"""
61from __future__ import absolute_import
62from __future__ import division
63from __future__ import print_function
64from __future__ import unicode_literals
66import argparse
67import codecs
68import collections
69import copy
70import difflib
71import fnmatch
72import importlib
73import inspect
74import io
75import itertools
76import keyword
77import locale
78import os
79import re
80import signal
81import sys
82import textwrap
83import token
84import tokenize
85import warnings
86import ast
87from configparser import ConfigParser as SafeConfigParser, Error
89import pycodestyle
92__version__ = '2.3.1'
95CR = '\r'
96LF = '\n'
97CRLF = '\r\n'
100PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
101LAMBDA_REGEX = re.compile(r'([\w.]+)\s=\slambda\s*([)(=\w,\s.]*):')
102COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s')
103COMPARE_NEGATIVE_REGEX_THROUGH = re.compile(r'\b(not\s+in|is\s+not)\s')
104BARE_EXCEPT_REGEX = re.compile(r'except\s*:')
105STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\s.*\):')
106DOCSTRING_START_REGEX = re.compile(r'^u?r?(?P<kind>["\']{3})')
107ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on')
108DISABLE_REGEX = re.compile(r'# *(fmt|autopep8): *off')
109ENCODING_MAGIC_COMMENT = re.compile(
110 r'^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)'
111)
112COMPARE_TYPE_REGEX = re.compile(
113 r'([=!]=)\s+type(?:\s*\(\s*([^)]*[^ )])\s*\))'
114 r'|\btype(?:\s*\(\s*([^)]*[^ )])\s*\))\s+([=!]=)'
115)
116TYPE_REGEX = re.compile(r'(type\s*\(\s*[^)]*?[^\s)]\s*\))')
118EXIT_CODE_OK = 0
119EXIT_CODE_ERROR = 1
120EXIT_CODE_EXISTS_DIFF = 2
121EXIT_CODE_ARGPARSE_ERROR = 99
123# For generating line shortening candidates.
124SHORTEN_OPERATOR_GROUPS = frozenset([
125 frozenset([',']),
126 frozenset(['%']),
127 frozenset([',', '(', '[', '{']),
128 frozenset(['%', '(', '[', '{']),
129 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
130 frozenset(['%', '+', '-', '*', '/', '//']),
131])
134DEFAULT_IGNORE = 'E226,E24,W50,W690' # TODO: use pycodestyle.DEFAULT_IGNORE
135DEFAULT_INDENT_SIZE = 4
136# these fixes conflict with each other, if the `--ignore` setting causes both
137# to be enabled, disable both of them
138CONFLICTING_CODES = ('W503', 'W504')
140if sys.platform == 'win32': # pragma: no cover
141 DEFAULT_CONFIG = os.path.expanduser(r'~\.pycodestyle')
142else:
143 DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
144 os.path.expanduser('~/.config'),
145 'pycodestyle')
146# fallback, use .pep8
147if not os.path.exists(DEFAULT_CONFIG): # pragma: no cover
148 if sys.platform == 'win32':
149 DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
150 else:
151 DEFAULT_CONFIG = os.path.join(os.path.expanduser('~/.config'), 'pep8')
152PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8', '.flake8')
155MAX_PYTHON_FILE_DETECTION_BYTES = 1024
157IS_SUPPORT_TOKEN_FSTRING = False
158if sys.version_info >= (3, 12): # pgrama: no cover
159 IS_SUPPORT_TOKEN_FSTRING = True
162def _custom_formatwarning(message, category, _, __, line=None):
163 return f"{category.__name__}: {message}\n"
166def open_with_encoding(filename, mode='r', encoding=None, limit_byte_check=-1):
167 """Return opened file with a specific encoding."""
168 if not encoding:
169 encoding = detect_encoding(filename, limit_byte_check=limit_byte_check)
171 return io.open(filename, mode=mode, encoding=encoding,
172 newline='') # Preserve line endings
175def _detect_encoding_from_file(filename: str):
176 try:
177 with open(filename) as input_file:
178 for idx, line in enumerate(input_file):
179 if idx == 0 and line[0] == '\ufeff':
180 return "utf-8-sig"
181 if idx >= 2:
182 break
183 match = ENCODING_MAGIC_COMMENT.search(line)
184 if match:
185 return match.groups()[0]
186 except Exception:
187 pass
188 # Python3's default encoding
189 return 'utf-8'
192def detect_encoding(filename, limit_byte_check=-1):
193 """Return file encoding."""
194 encoding = _detect_encoding_from_file(filename)
195 if encoding == "utf-8-sig":
196 return encoding
197 try:
198 with open_with_encoding(filename, encoding=encoding) as test_file:
199 test_file.read(limit_byte_check)
200 return encoding
201 except (LookupError, SyntaxError, UnicodeDecodeError):
202 return 'latin-1'
205def readlines_from_file(filename):
206 """Return contents of file."""
207 with open_with_encoding(filename) as input_file:
208 return input_file.readlines()
211def extended_blank_lines(logical_line,
212 blank_lines,
213 blank_before,
214 indent_level,
215 previous_logical):
216 """Check for missing blank lines after class declaration."""
217 if previous_logical.startswith(('def ', 'async def ')):
218 if blank_lines and pycodestyle.DOCSTRING_REGEX.match(logical_line):
219 yield (0, 'E303 too many blank lines ({})'.format(blank_lines))
220 elif pycodestyle.DOCSTRING_REGEX.match(previous_logical):
221 # Missing blank line between class docstring and method declaration.
222 if (
223 indent_level and
224 not blank_lines and
225 not blank_before and
226 logical_line.startswith(('def ', 'async def ')) and
227 '(self' in logical_line
228 ):
229 yield (0, 'E301 expected 1 blank line, found 0')
232def continued_indentation(logical_line, tokens, indent_level, hang_closing,
233 indent_char, noqa):
234 """Override pycodestyle's function to provide indentation information."""
235 first_row = tokens[0][2][0]
236 nrows = 1 + tokens[-1][2][0] - first_row
237 if noqa or nrows == 1:
238 return
240 # indent_next tells us whether the next block is indented. Assuming
241 # that it is indented by 4 spaces, then we should not allow 4-space
242 # indents on the final continuation line. In turn, some other
243 # indents are allowed to have an extra 4 spaces.
244 indent_next = logical_line.endswith(':')
246 row = depth = 0
247 valid_hangs = (
248 (DEFAULT_INDENT_SIZE,)
249 if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
250 2 * DEFAULT_INDENT_SIZE)
251 )
253 # Remember how many brackets were opened on each line.
254 parens = [0] * nrows
256 # Relative indents of physical lines.
257 rel_indent = [0] * nrows
259 # For each depth, collect a list of opening rows.
260 open_rows = [[0]]
261 # For each depth, memorize the hanging indentation.
262 hangs = [None]
264 # Visual indents.
265 indent_chances = {}
266 last_indent = tokens[0][2]
267 indent = [last_indent[1]]
269 last_token_multiline = None
270 line = None
271 last_line = ''
272 last_line_begins_with_multiline = False
273 for token_type, text, start, end, line in tokens:
275 newline = row < start[0] - first_row
276 if newline:
277 row = start[0] - first_row
278 newline = (not last_token_multiline and
279 token_type not in (tokenize.NL, tokenize.NEWLINE))
280 last_line_begins_with_multiline = last_token_multiline
282 if newline:
283 # This is the beginning of a continuation line.
284 last_indent = start
286 # Record the initial indent.
287 rel_indent[row] = pycodestyle.expand_indent(line) - indent_level
289 # Identify closing bracket.
290 close_bracket = (token_type == tokenize.OP and text in ']})')
292 # Is the indent relative to an opening bracket line?
293 for open_row in reversed(open_rows[depth]):
294 hang = rel_indent[row] - rel_indent[open_row]
295 hanging_indent = hang in valid_hangs
296 if hanging_indent:
297 break
298 if hangs[depth]:
299 hanging_indent = (hang == hangs[depth])
301 visual_indent = (not close_bracket and hang > 0 and
302 indent_chances.get(start[1]))
304 if close_bracket and indent[depth]:
305 # Closing bracket for visual indent.
306 if start[1] != indent[depth]:
307 yield (start, 'E124 {}'.format(indent[depth]))
308 elif close_bracket and not hang:
309 # closing bracket matches indentation of opening bracket's line
310 if hang_closing:
311 yield (start, 'E133 {}'.format(indent[depth]))
312 elif indent[depth] and start[1] < indent[depth]:
313 if visual_indent is not True:
314 # Visual indent is broken.
315 yield (start, 'E128 {}'.format(indent[depth]))
316 elif (hanging_indent or
317 (indent_next and
318 rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
319 # Hanging indent is verified.
320 if close_bracket and not hang_closing:
321 yield (start, 'E123 {}'.format(indent_level +
322 rel_indent[open_row]))
323 hangs[depth] = hang
324 elif visual_indent is True:
325 # Visual indent is verified.
326 indent[depth] = start[1]
327 elif visual_indent in (text, str):
328 # Ignore token lined up with matching one from a previous line.
329 pass
330 else:
331 one_indented = (indent_level + rel_indent[open_row] +
332 DEFAULT_INDENT_SIZE)
333 # Indent is broken.
334 if hang <= 0:
335 error = ('E122', one_indented)
336 elif indent[depth]:
337 error = ('E127', indent[depth])
338 elif not close_bracket and hangs[depth]:
339 error = ('E131', one_indented)
340 elif hang > DEFAULT_INDENT_SIZE:
341 error = ('E126', one_indented)
342 else:
343 hangs[depth] = hang
344 error = ('E121', one_indented)
346 yield (start, '{} {}'.format(*error))
348 # Look for visual indenting.
349 if (
350 parens[row] and
351 token_type not in (tokenize.NL, tokenize.COMMENT) and
352 not indent[depth]
353 ):
354 indent[depth] = start[1]
355 indent_chances[start[1]] = True
356 # Deal with implicit string concatenation.
357 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
358 text in ('u', 'ur', 'b', 'br')):
359 indent_chances[start[1]] = str
360 # Special case for the "if" statement because len("if (") is equal to
361 # 4.
362 elif not indent_chances and not row and not depth and text == 'if':
363 indent_chances[end[1] + 1] = True
364 elif text == ':' and line[end[1]:].isspace():
365 open_rows[depth].append(row)
367 # Keep track of bracket depth.
368 if token_type == tokenize.OP:
369 if text in '([{':
370 depth += 1
371 indent.append(0)
372 hangs.append(None)
373 if len(open_rows) == depth:
374 open_rows.append([])
375 open_rows[depth].append(row)
376 parens[row] += 1
377 elif text in ')]}' and depth > 0:
378 # Parent indents should not be more than this one.
379 prev_indent = indent.pop() or last_indent[1]
380 hangs.pop()
381 for d in range(depth):
382 if indent[d] > prev_indent:
383 indent[d] = 0
384 for ind in list(indent_chances):
385 if ind >= prev_indent:
386 del indent_chances[ind]
387 del open_rows[depth + 1:]
388 depth -= 1
389 if depth:
390 indent_chances[indent[depth]] = True
391 for idx in range(row, -1, -1):
392 if parens[idx]:
393 parens[idx] -= 1
394 break
395 assert len(indent) == depth + 1
396 if (
397 start[1] not in indent_chances and
398 # This is for purposes of speeding up E121 (GitHub #90).
399 not last_line.rstrip().endswith(',')
400 ):
401 # Allow to line up tokens.
402 indent_chances[start[1]] = text
404 last_token_multiline = (start[0] != end[0])
405 if last_token_multiline:
406 rel_indent[end[0] - first_row] = rel_indent[row]
408 last_line = line
410 if (
411 indent_next and
412 not last_line_begins_with_multiline and
413 pycodestyle.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
414 ):
415 pos = (start[0], indent[0] + 4)
416 desired_indent = indent_level + 2 * DEFAULT_INDENT_SIZE
417 if visual_indent:
418 yield (pos, 'E129 {}'.format(desired_indent))
419 else:
420 yield (pos, 'E125 {}'.format(desired_indent))
423# NOTE: need reload with runpy and call twice
424# see: https://github.com/hhatto/autopep8/issues/625
425importlib.reload(pycodestyle)
426del pycodestyle._checks['logical_line'][pycodestyle.continued_indentation]
427pycodestyle.register_check(extended_blank_lines)
428pycodestyle.register_check(continued_indentation)
431class FixPEP8(object):
433 """Fix invalid code.
435 Fixer methods are prefixed "fix_". The _fix_source() method looks for these
436 automatically.
438 The fixer method can take either one or two arguments (in addition to
439 self). The first argument is "result", which is the error information from
440 pycodestyle. The second argument, "logical", is required only for
441 logical-line fixes.
443 The fixer method can return the list of modified lines or None. An empty
444 list would mean that no changes were made. None would mean that only the
445 line reported in the pycodestyle error was modified. Note that the modified
446 line numbers that are returned are indexed at 1. This typically would
447 correspond with the line number reported in the pycodestyle error
448 information.
450 [fixed method list]
451 - e111,e114,e115,e116
452 - e121,e122,e123,e124,e125,e126,e127,e128,e129
453 - e201,e202,e203
454 - e211
455 - e221,e222,e223,e224,e225
456 - e231
457 - e251,e252
458 - e261,e262
459 - e271,e272,e273,e274,e275
460 - e301,e302,e303,e304,e305,e306
461 - e401,e402
462 - e502
463 - e701,e702,e703,e704
464 - e711,e712,e713,e714
465 - e721,e722
466 - e731
467 - w291
468 - w503,504
470 """
472 def __init__(self, filename,
473 options,
474 contents=None,
475 long_line_ignore_cache=None):
476 self.filename = filename
477 if contents is None:
478 self.source = readlines_from_file(filename)
479 else:
480 sio = io.StringIO(contents)
481 self.source = sio.readlines()
482 self.options = options
483 self.indent_word = _get_indentword(''.join(self.source))
484 self.original_source = copy.copy(self.source)
486 # collect imports line
487 self.imports = {}
488 for i, line in enumerate(self.source):
489 if (line.find("import ") == 0 or line.find("from ") == 0) and \
490 line not in self.imports:
491 # collect only import statements that first appeared
492 self.imports[line] = i
494 self.long_line_ignore_cache = (
495 set() if long_line_ignore_cache is None
496 else long_line_ignore_cache)
498 # Many fixers are the same even though pycodestyle categorizes them
499 # differently.
500 self.fix_e115 = self.fix_e112
501 self.fix_e121 = self._fix_reindent
502 self.fix_e122 = self._fix_reindent
503 self.fix_e123 = self._fix_reindent
504 self.fix_e124 = self._fix_reindent
505 self.fix_e126 = self._fix_reindent
506 self.fix_e127 = self._fix_reindent
507 self.fix_e128 = self._fix_reindent
508 self.fix_e129 = self._fix_reindent
509 self.fix_e133 = self.fix_e131
510 self.fix_e202 = self.fix_e201
511 self.fix_e203 = self.fix_e201
512 self.fix_e204 = self.fix_e201
513 self.fix_e211 = self.fix_e201
514 self.fix_e221 = self.fix_e271
515 self.fix_e222 = self.fix_e271
516 self.fix_e223 = self.fix_e271
517 self.fix_e226 = self.fix_e225
518 self.fix_e227 = self.fix_e225
519 self.fix_e228 = self.fix_e225
520 self.fix_e241 = self.fix_e271
521 self.fix_e242 = self.fix_e224
522 self.fix_e252 = self.fix_e225
523 self.fix_e261 = self.fix_e262
524 self.fix_e272 = self.fix_e271
525 self.fix_e273 = self.fix_e271
526 self.fix_e274 = self.fix_e271
527 self.fix_e275 = self.fix_e271
528 self.fix_e306 = self.fix_e301
529 self.fix_e501 = (
530 self.fix_long_line_logically if
531 options and (options.aggressive >= 2 or options.experimental) else
532 self.fix_long_line_physically)
533 self.fix_e703 = self.fix_e702
534 self.fix_w292 = self.fix_w291
535 self.fix_w293 = self.fix_w291
537 def _check_affected_anothers(self, result) -> bool:
538 """Check if the fix affects the number of lines of another remark."""
539 line_index = result['line'] - 1
540 target = self.source[line_index]
541 original_target = self.original_source[line_index]
542 return target != original_target
544 def _fix_source(self, results):
545 try:
546 (logical_start, logical_end) = _find_logical(self.source)
547 logical_support = True
548 except (SyntaxError, tokenize.TokenError): # pragma: no cover
549 logical_support = False
551 completed_lines = set()
552 for result in sorted(results, key=_priority_key):
553 if result['line'] in completed_lines:
554 continue
556 fixed_methodname = 'fix_' + result['id'].lower()
557 if hasattr(self, fixed_methodname):
558 fix = getattr(self, fixed_methodname)
560 line_index = result['line'] - 1
561 original_line = self.source[line_index]
563 is_logical_fix = len(_get_parameters(fix)) > 2
564 if is_logical_fix:
565 logical = None
566 if logical_support:
567 logical = _get_logical(self.source,
568 result,
569 logical_start,
570 logical_end)
571 if logical and set(range(
572 logical[0][0] + 1,
573 logical[1][0] + 1)).intersection(
574 completed_lines):
575 continue
577 if self._check_affected_anothers(result):
578 continue
579 modified_lines = fix(result, logical)
580 else:
581 if self._check_affected_anothers(result):
582 continue
583 modified_lines = fix(result)
585 if modified_lines is None:
586 # Force logical fixes to report what they modified.
587 assert not is_logical_fix
589 if self.source[line_index] == original_line:
590 modified_lines = []
592 if modified_lines:
593 completed_lines.update(modified_lines)
594 elif modified_lines == []: # Empty list means no fix
595 if self.options.verbose >= 2:
596 print(
597 '---> Not fixing {error} on line {line}'.format(
598 error=result['id'], line=result['line']),
599 file=sys.stderr)
600 else: # We assume one-line fix when None.
601 completed_lines.add(result['line'])
602 else:
603 if self.options.verbose >= 3:
604 print(
605 "---> '{}' is not defined.".format(fixed_methodname),
606 file=sys.stderr)
608 info = result['info'].strip()
609 print('---> {}:{}:{}:{}'.format(self.filename,
610 result['line'],
611 result['column'],
612 info),
613 file=sys.stderr)
615 def fix(self):
616 """Return a version of the source code with PEP 8 violations fixed."""
617 pep8_options = {
618 'ignore': self.options.ignore,
619 'select': self.options.select,
620 'max_line_length': self.options.max_line_length,
621 'hang_closing': self.options.hang_closing,
622 }
623 results = _execute_pep8(pep8_options, self.source)
625 if self.options.verbose:
626 progress = {}
627 for r in results:
628 if r['id'] not in progress:
629 progress[r['id']] = set()
630 progress[r['id']].add(r['line'])
631 print('---> {n} issue(s) to fix {progress}'.format(
632 n=len(results), progress=progress), file=sys.stderr)
634 if self.options.line_range:
635 start, end = self.options.line_range
636 results = [r for r in results
637 if start <= r['line'] <= end]
639 self._fix_source(filter_results(source=''.join(self.source),
640 results=results,
641 aggressive=self.options.aggressive))
643 if self.options.line_range:
644 # If number of lines has changed then change line_range.
645 count = sum(sline.count('\n')
646 for sline in self.source[start - 1:end])
647 self.options.line_range[1] = start + count - 1
649 return ''.join(self.source)
651 def _fix_reindent(self, result):
652 """Fix a badly indented line.
654 This is done by adding or removing from its initial indent only.
656 """
657 num_indent_spaces = int(result['info'].split()[1])
658 line_index = result['line'] - 1
659 target = self.source[line_index]
661 self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
663 def fix_e112(self, result):
664 """Fix under-indented comments."""
665 line_index = result['line'] - 1
666 target = self.source[line_index]
668 if not target.lstrip().startswith('#'):
669 # Don't screw with invalid syntax.
670 return []
672 self.source[line_index] = self.indent_word + target
674 def fix_e113(self, result):
675 """Fix unexpected indentation."""
676 line_index = result['line'] - 1
677 target = self.source[line_index]
678 indent = _get_indentation(target)
679 stripped = target.lstrip()
680 self.source[line_index] = indent[1:] + stripped
682 def fix_e116(self, result):
683 """Fix over-indented comments."""
684 line_index = result['line'] - 1
685 target = self.source[line_index]
687 indent = _get_indentation(target)
688 stripped = target.lstrip()
690 if not stripped.startswith('#'):
691 # Don't screw with invalid syntax.
692 return []
694 self.source[line_index] = indent[1:] + stripped
696 def fix_e117(self, result):
697 """Fix over-indented."""
698 line_index = result['line'] - 1
699 target = self.source[line_index]
701 indent = _get_indentation(target)
702 if indent == '\t':
703 return []
705 stripped = target.lstrip()
707 self.source[line_index] = indent[1:] + stripped
709 def fix_e125(self, result):
710 """Fix indentation undistinguish from the next logical line."""
711 num_indent_spaces = int(result['info'].split()[1])
712 line_index = result['line'] - 1
713 target = self.source[line_index]
715 spaces_to_add = num_indent_spaces - len(_get_indentation(target))
716 indent = len(_get_indentation(target))
717 modified_lines = []
719 while len(_get_indentation(self.source[line_index])) >= indent:
720 self.source[line_index] = (' ' * spaces_to_add +
721 self.source[line_index])
722 modified_lines.append(1 + line_index) # Line indexed at 1.
723 line_index -= 1
725 return modified_lines
727 def fix_e131(self, result):
728 """Fix indentation undistinguish from the next logical line."""
729 num_indent_spaces = int(result['info'].split()[1])
730 line_index = result['line'] - 1
731 target = self.source[line_index]
733 spaces_to_add = num_indent_spaces - len(_get_indentation(target))
735 indent_length = len(_get_indentation(target))
736 spaces_to_add = num_indent_spaces - indent_length
737 if num_indent_spaces == 0 and indent_length == 0:
738 spaces_to_add = 4
740 if spaces_to_add >= 0:
741 self.source[line_index] = (' ' * spaces_to_add +
742 self.source[line_index])
743 else:
744 offset = abs(spaces_to_add)
745 self.source[line_index] = self.source[line_index][offset:]
747 def fix_e201(self, result):
748 """Remove extraneous whitespace."""
749 line_index = result['line'] - 1
750 target = self.source[line_index]
751 offset = result['column'] - 1
753 fixed = fix_whitespace(target,
754 offset=offset,
755 replacement='')
757 self.source[line_index] = fixed
759 def fix_e224(self, result):
760 """Remove extraneous whitespace around operator."""
761 target = self.source[result['line'] - 1]
762 offset = result['column'] - 1
763 fixed = target[:offset] + target[offset:].replace('\t', ' ')
764 self.source[result['line'] - 1] = fixed
766 def fix_e225(self, result):
767 """Fix missing whitespace around operator."""
768 target = self.source[result['line'] - 1]
769 offset = result['column'] - 1
770 fixed = target[:offset] + ' ' + target[offset:]
772 # Only proceed if non-whitespace characters match.
773 # And make sure we don't break the indentation.
774 if (
775 fixed.replace(' ', '') == target.replace(' ', '') and
776 _get_indentation(fixed) == _get_indentation(target)
777 ):
778 self.source[result['line'] - 1] = fixed
779 error_code = result.get('id', 0)
780 try:
781 ts = generate_tokens(fixed)
782 except (SyntaxError, tokenize.TokenError):
783 return
784 if not check_syntax(fixed.lstrip()):
785 return
786 try:
787 _missing_whitespace = (
788 pycodestyle.missing_whitespace_around_operator
789 )
790 except AttributeError:
791 # pycodestyle >= 2.11.0
792 _missing_whitespace = pycodestyle.missing_whitespace
793 errors = list(_missing_whitespace(fixed, ts))
794 for e in reversed(errors):
795 if error_code != e[1].split()[0]:
796 continue
797 offset = e[0][1]
798 fixed = fixed[:offset] + ' ' + fixed[offset:]
799 self.source[result['line'] - 1] = fixed
800 else:
801 return []
803 def fix_e231(self, result):
804 """Add missing whitespace."""
805 line_index = result['line'] - 1
806 target = self.source[line_index]
807 offset = result['column']
808 fixed = target[:offset].rstrip() + ' ' + target[offset:].lstrip()
809 self.source[line_index] = fixed
811 def fix_e251(self, result):
812 """Remove whitespace around parameter '=' sign."""
813 line_index = result['line'] - 1
814 target = self.source[line_index]
816 # This is necessary since pycodestyle sometimes reports columns that
817 # goes past the end of the physical line. This happens in cases like,
818 # foo(bar\n=None)
819 c = min(result['column'] - 1,
820 len(target) - 1)
822 if target[c].strip():
823 fixed = target
824 else:
825 fixed = target[:c].rstrip() + target[c:].lstrip()
827 # There could be an escaped newline
828 #
829 # def foo(a=\
830 # 1)
831 if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
832 self.source[line_index] = fixed.rstrip('\n\r \t\\')
833 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
834 return [line_index + 1, line_index + 2] # Line indexed at 1
836 self.source[result['line'] - 1] = fixed
838 def fix_e262(self, result):
839 """Fix spacing after inline comment hash."""
840 target = self.source[result['line'] - 1]
841 offset = result['column']
843 code = target[:offset].rstrip(' \t#')
844 comment = target[offset:].lstrip(' \t#')
846 fixed = code + (' # ' + comment if comment.strip() else '\n')
848 self.source[result['line'] - 1] = fixed
850 def fix_e265(self, result):
851 """Fix spacing after block comment hash."""
852 target = self.source[result['line'] - 1]
854 indent = _get_indentation(target)
855 line = target.lstrip(' \t')
856 pos = next((index for index, c in enumerate(line) if c != '#'))
857 hashes = line[:pos]
858 comment = line[pos:].lstrip(' \t')
860 # Ignore special comments, even in the middle of the file.
861 if comment.startswith('!'):
862 return
864 fixed = indent + hashes + (' ' + comment if comment.strip() else '\n')
866 self.source[result['line'] - 1] = fixed
868 def fix_e266(self, result):
869 """Fix too many block comment hashes."""
870 target = self.source[result['line'] - 1]
872 # Leave stylistic outlined blocks alone.
873 if target.strip().endswith('#'):
874 return
876 indentation = _get_indentation(target)
877 fixed = indentation + '# ' + target.lstrip('# \t')
879 self.source[result['line'] - 1] = fixed
881 def fix_e271(self, result):
882 """Fix extraneous whitespace around keywords."""
883 line_index = result['line'] - 1
884 target = self.source[line_index]
885 offset = result['column'] - 1
887 fixed = fix_whitespace(target,
888 offset=offset,
889 replacement=' ')
891 if fixed == target:
892 return []
893 else:
894 self.source[line_index] = fixed
896 def fix_e301(self, result):
897 """Add missing blank line."""
898 cr = '\n'
899 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
901 def fix_e302(self, result):
902 """Add missing 2 blank lines."""
903 add_linenum = 2 - int(result['info'].split()[-1])
904 offset = 1
905 if self.source[result['line'] - 2].strip() == "\\":
906 offset = 2
907 cr = '\n' * add_linenum
908 self.source[result['line'] - offset] = (
909 cr + self.source[result['line'] - offset]
910 )
912 def fix_e303(self, result):
913 """Remove extra blank lines."""
914 delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
915 delete_linenum = max(1, delete_linenum)
917 # We need to count because pycodestyle reports an offset line number if
918 # there are comments.
919 cnt = 0
920 line = result['line'] - 2
921 modified_lines = []
922 while cnt < delete_linenum and line >= 0:
923 if not self.source[line].strip():
924 self.source[line] = ''
925 modified_lines.append(1 + line) # Line indexed at 1
926 cnt += 1
927 line -= 1
929 return modified_lines
931 def fix_e304(self, result):
932 """Remove blank line following function decorator."""
933 line = result['line'] - 2
934 if not self.source[line].strip():
935 self.source[line] = ''
937 def fix_e305(self, result):
938 """Add missing 2 blank lines after end of function or class."""
939 add_delete_linenum = 2 - int(result['info'].split()[-1])
940 cnt = 0
941 offset = result['line'] - 2
942 modified_lines = []
943 if add_delete_linenum < 0:
944 # delete cr
945 add_delete_linenum = abs(add_delete_linenum)
946 while cnt < add_delete_linenum and offset >= 0:
947 if not self.source[offset].strip():
948 self.source[offset] = ''
949 modified_lines.append(1 + offset) # Line indexed at 1
950 cnt += 1
951 offset -= 1
952 else:
953 # add cr
954 cr = '\n'
955 # check comment line
956 while True:
957 if offset < 0:
958 break
959 line = self.source[offset].lstrip()
960 if not line:
961 break
962 if line[0] != '#':
963 break
964 offset -= 1
965 offset += 1
966 self.source[offset] = cr + self.source[offset]
967 modified_lines.append(1 + offset) # Line indexed at 1.
968 return modified_lines
970 def fix_e401(self, result):
971 """Put imports on separate lines."""
972 line_index = result['line'] - 1
973 target = self.source[line_index]
974 offset = result['column'] - 1
976 if not target.lstrip().startswith('import'):
977 return []
979 indentation = re.split(pattern=r'\bimport\b',
980 string=target, maxsplit=1)[0]
981 fixed = (target[:offset].rstrip('\t ,') + '\n' +
982 indentation + 'import ' + target[offset:].lstrip('\t ,'))
983 self.source[line_index] = fixed
985 def fix_e402(self, result):
986 (line_index, offset, target) = get_index_offset_contents(result,
987 self.source)
988 for i in range(1, 100):
989 line = "".join(self.source[line_index:line_index+i])
990 try:
991 generate_tokens("".join(line))
992 except (SyntaxError, tokenize.TokenError):
993 continue
994 break
995 if not (target in self.imports and self.imports[target] != line_index):
996 mod_offset = get_module_imports_on_top_of_file(self.source,
997 line_index)
998 self.source[mod_offset] = line + self.source[mod_offset]
999 for offset in range(i):
1000 self.source[line_index+offset] = ''
1002 def fix_long_line_logically(self, result, logical):
1003 """Try to make lines fit within --max-line-length characters."""
1004 if (
1005 not logical or
1006 len(logical[2]) == 1 or
1007 self.source[result['line'] - 1].lstrip().startswith('#')
1008 ):
1009 return self.fix_long_line_physically(result)
1011 start_line_index = logical[0][0]
1012 end_line_index = logical[1][0]
1013 logical_lines = logical[2]
1015 previous_line = get_item(self.source, start_line_index - 1, default='')
1016 next_line = get_item(self.source, end_line_index + 1, default='')
1018 single_line = join_logical_line(''.join(logical_lines))
1020 try:
1021 fixed = self.fix_long_line(
1022 target=single_line,
1023 previous_line=previous_line,
1024 next_line=next_line,
1025 original=''.join(logical_lines))
1026 except (SyntaxError, tokenize.TokenError):
1027 return self.fix_long_line_physically(result)
1029 if fixed:
1030 for line_index in range(start_line_index, end_line_index + 1):
1031 self.source[line_index] = ''
1032 self.source[start_line_index] = fixed
1033 return range(start_line_index + 1, end_line_index + 1)
1035 return []
1037 def fix_long_line_physically(self, result):
1038 """Try to make lines fit within --max-line-length characters."""
1039 line_index = result['line'] - 1
1040 target = self.source[line_index]
1042 previous_line = get_item(self.source, line_index - 1, default='')
1043 next_line = get_item(self.source, line_index + 1, default='')
1045 try:
1046 fixed = self.fix_long_line(
1047 target=target,
1048 previous_line=previous_line,
1049 next_line=next_line,
1050 original=target)
1051 except (SyntaxError, tokenize.TokenError):
1052 return []
1054 if fixed:
1055 self.source[line_index] = fixed
1056 return [line_index + 1]
1058 return []
1060 def fix_long_line(self, target, previous_line,
1061 next_line, original):
1062 cache_entry = (target, previous_line, next_line)
1063 if cache_entry in self.long_line_ignore_cache:
1064 return []
1066 if target.lstrip().startswith('#'):
1067 if self.options.aggressive:
1068 # Wrap commented lines.
1069 return shorten_comment(
1070 line=target,
1071 max_line_length=self.options.max_line_length,
1072 last_comment=not next_line.lstrip().startswith('#'))
1073 return []
1075 fixed = get_fixed_long_line(
1076 target=target,
1077 previous_line=previous_line,
1078 original=original,
1079 indent_word=self.indent_word,
1080 max_line_length=self.options.max_line_length,
1081 aggressive=self.options.aggressive,
1082 experimental=self.options.experimental,
1083 verbose=self.options.verbose)
1085 if fixed and not code_almost_equal(original, fixed):
1086 return fixed
1088 self.long_line_ignore_cache.add(cache_entry)
1089 return None
1091 def fix_e502(self, result):
1092 """Remove extraneous escape of newline."""
1093 (line_index, _, target) = get_index_offset_contents(result,
1094 self.source)
1095 self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
1097 def fix_e701(self, result):
1098 """Put colon-separated compound statement on separate lines."""
1099 line_index = result['line'] - 1
1100 target = self.source[line_index]
1101 c = result['column']
1103 fixed_source = (target[:c] + '\n' +
1104 _get_indentation(target) + self.indent_word +
1105 target[c:].lstrip('\n\r \t\\'))
1106 self.source[result['line'] - 1] = fixed_source
1107 return [result['line'], result['line'] + 1]
1109 def fix_e702(self, result, logical):
1110 """Put semicolon-separated compound statement on separate lines."""
1111 if not logical:
1112 return [] # pragma: no cover
1113 logical_lines = logical[2]
1115 # Avoid applying this when indented.
1116 # https://docs.python.org/reference/compound_stmts.html
1117 for line in logical_lines:
1118 if (
1119 result['id'] == 'E702'
1120 and ':' in line
1121 and pycodestyle.STARTSWITH_INDENT_STATEMENT_REGEX.match(line)
1122 ):
1123 if self.options.verbose:
1124 print(
1125 '---> avoid fixing {error} with '
1126 'other compound statements'.format(error=result['id']),
1127 file=sys.stderr
1128 )
1129 return []
1131 line_index = result['line'] - 1
1132 target = self.source[line_index]
1134 if target.rstrip().endswith('\\'):
1135 # Normalize '1; \\\n2' into '1; 2'.
1136 self.source[line_index] = target.rstrip('\n \r\t\\')
1137 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
1138 return [line_index + 1, line_index + 2]
1140 if target.rstrip().endswith(';'):
1141 self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
1142 return [line_index + 1]
1144 offset = result['column'] - 1
1145 first = target[:offset].rstrip(';').rstrip()
1146 second = (_get_indentation(logical_lines[0]) +
1147 target[offset:].lstrip(';').lstrip())
1149 # Find inline comment.
1150 inline_comment = None
1151 if target[offset:].lstrip(';').lstrip()[:2] == '# ':
1152 inline_comment = target[offset:].lstrip(';')
1154 if inline_comment:
1155 self.source[line_index] = first + inline_comment
1156 else:
1157 self.source[line_index] = first + '\n' + second
1158 return [line_index + 1]
1160 def fix_e704(self, result):
1161 """Fix multiple statements on one line def"""
1162 (line_index, _, target) = get_index_offset_contents(result,
1163 self.source)
1164 match = STARTSWITH_DEF_REGEX.match(target)
1165 if match:
1166 self.source[line_index] = '{}\n{}{}'.format(
1167 match.group(0),
1168 _get_indentation(target) + self.indent_word,
1169 target[match.end(0):].lstrip())
1171 def fix_e711(self, result):
1172 """Fix comparison with None."""
1173 (line_index, offset, target) = get_index_offset_contents(result,
1174 self.source)
1176 right_offset = offset + 2
1177 if right_offset >= len(target):
1178 return []
1180 left = target[:offset].rstrip()
1181 center = target[offset:right_offset]
1182 right = target[right_offset:].lstrip()
1184 if center.strip() == '==':
1185 new_center = 'is'
1186 elif center.strip() == '!=':
1187 new_center = 'is not'
1188 else:
1189 return []
1191 self.source[line_index] = ' '.join([left, new_center, right])
1193 def fix_e712(self, result):
1194 """Fix (trivial case of) comparison with boolean."""
1195 (line_index, offset, target) = get_index_offset_contents(result,
1196 self.source)
1198 # Handle very easy "not" special cases.
1199 if re.match(r'^\s*if [\w."\'\[\]]+ == False:$', target):
1200 self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) == False:',
1201 r'if not \1:', target, count=1)
1202 elif re.match(r'^\s*if [\w."\'\[\]]+ != True:$', target):
1203 self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) != True:',
1204 r'if not \1:', target, count=1)
1205 else:
1206 right_offset = offset + 2
1207 if right_offset >= len(target):
1208 return []
1210 left = target[:offset].rstrip()
1211 center = target[offset:right_offset]
1212 right = target[right_offset:].lstrip()
1214 # Handle simple cases only.
1215 new_right = None
1216 if center.strip() == '==':
1217 if re.match(r'\bTrue\b', right):
1218 new_right = re.sub(r'\bTrue\b *', '', right, count=1)
1219 elif center.strip() == '!=':
1220 if re.match(r'\bFalse\b', right):
1221 new_right = re.sub(r'\bFalse\b *', '', right, count=1)
1223 if new_right is None:
1224 return []
1226 if new_right[0].isalnum():
1227 new_right = ' ' + new_right
1229 self.source[line_index] = left + new_right
1231 def fix_e713(self, result):
1232 """Fix (trivial case of) non-membership check."""
1233 (line_index, offset, target) = get_index_offset_contents(result,
1234 self.source)
1236 # to convert once 'not in' -> 'in'
1237 before_target = target[:offset]
1238 target = target[offset:]
1239 match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1240 notin_pos_start, notin_pos_end = 0, 0
1241 if match_notin:
1242 notin_pos_start = match_notin.start(1)
1243 notin_pos_end = match_notin.end()
1244 target = '{}{} {}'.format(
1245 target[:notin_pos_start], 'in', target[notin_pos_end:])
1247 # fix 'not in'
1248 match = COMPARE_NEGATIVE_REGEX.search(target)
1249 if match:
1250 if match.group(3) == 'in':
1251 pos_start = match.start(1)
1252 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1253 target[:pos_start], match.group(2), match.group(1),
1254 match.group(3), target[match.end():], before_target)
1255 if match_notin:
1256 # revert 'in' -> 'not in'
1257 pos_start = notin_pos_start + offset
1258 pos_end = notin_pos_end + offset - 4 # len('not ')
1259 new_target = '{}{} {}'.format(
1260 new_target[:pos_start], 'not in', new_target[pos_end:])
1261 self.source[line_index] = new_target
1263 def fix_e714(self, result):
1264 """Fix object identity should be 'is not' case."""
1265 (line_index, offset, target) = get_index_offset_contents(result,
1266 self.source)
1268 # to convert once 'is not' -> 'is'
1269 before_target = target[:offset]
1270 target = target[offset:]
1271 match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1272 isnot_pos_start, isnot_pos_end = 0, 0
1273 if match_isnot:
1274 isnot_pos_start = match_isnot.start(1)
1275 isnot_pos_end = match_isnot.end()
1276 target = '{}{} {}'.format(
1277 target[:isnot_pos_start], 'in', target[isnot_pos_end:])
1279 match = COMPARE_NEGATIVE_REGEX.search(target)
1280 if match:
1281 if match.group(3).startswith('is'):
1282 pos_start = match.start(1)
1283 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1284 target[:pos_start], match.group(2), match.group(3),
1285 match.group(1), target[match.end():], before_target)
1286 if match_isnot:
1287 # revert 'is' -> 'is not'
1288 pos_start = isnot_pos_start + offset
1289 pos_end = isnot_pos_end + offset - 4 # len('not ')
1290 new_target = '{}{} {}'.format(
1291 new_target[:pos_start], 'is not', new_target[pos_end:])
1292 self.source[line_index] = new_target
1294 def fix_e721(self, result):
1295 """fix comparison type"""
1296 (line_index, _, target) = get_index_offset_contents(result,
1297 self.source)
1298 match = COMPARE_TYPE_REGEX.search(target)
1299 if match:
1300 # NOTE: match objects
1301 # * type(a) == type(b) -> (None, None, 'a', '==')
1302 # * str == type(b) -> ('==', 'b', None, None)
1303 # * type("") != type(b) -> (None, None, '""', '!=')
1304 start = match.start()
1305 end = match.end()
1306 _prefix = ""
1307 _suffix = ""
1308 first_match_type_obj = match.groups()[1]
1309 if first_match_type_obj is None:
1310 _target_obj = match.groups()[2]
1311 else:
1312 _target_obj = match.groups()[1]
1313 _suffix = target[end:]
1315 isinstance_stmt = " isinstance"
1316 is_not_condition = (
1317 match.groups()[0] == "!=" or match.groups()[3] == "!="
1318 )
1319 if is_not_condition:
1320 isinstance_stmt = " not isinstance"
1322 _type_comp = f"{_target_obj}, {target[:start]}"
1324 _prefix_tmp = target[:start].split()
1325 if len(_prefix_tmp) >= 1:
1326 _type_comp = f"{_target_obj}, {target[:start]}"
1327 if first_match_type_obj is not None:
1328 _prefix = " ".join(_prefix_tmp[:-1])
1329 _type_comp = f"{_target_obj}, {_prefix_tmp[-1]}"
1330 else:
1331 _prefix = " ".join(_prefix_tmp)
1333 _suffix_tmp = target[end:]
1334 _suffix_type_match = TYPE_REGEX.search(_suffix_tmp)
1335 if len(_suffix_tmp.split()) >= 1 and _suffix_type_match:
1336 if _suffix_type_match:
1337 type_match_end = _suffix_type_match.end()
1338 _suffix = _suffix_tmp[type_match_end:]
1339 if _suffix_type_match:
1340 cmp_b = _suffix_type_match.groups()[0]
1341 _type_comp = f"{_target_obj}, {cmp_b}"
1343 fix_line = f"{_prefix}{isinstance_stmt}({_type_comp}){_suffix}"
1344 self.source[line_index] = fix_line
1346 def fix_e722(self, result):
1347 """fix bare except"""
1348 (line_index, _, target) = get_index_offset_contents(result,
1349 self.source)
1350 match = BARE_EXCEPT_REGEX.search(target)
1351 if match:
1352 self.source[line_index] = '{}{}{}'.format(
1353 target[:result['column'] - 1], "except BaseException:",
1354 target[match.end():])
1356 def fix_e731(self, result):
1357 """Fix do not assign a lambda expression check."""
1358 (line_index, _, target) = get_index_offset_contents(result,
1359 self.source)
1360 match = LAMBDA_REGEX.search(target)
1361 if match:
1362 end = match.end()
1363 self.source[line_index] = '{}def {}({}): return {}'.format(
1364 target[:match.start(0)], match.group(1), match.group(2),
1365 target[end:].lstrip())
1367 def fix_w291(self, result):
1368 """Remove trailing whitespace."""
1369 fixed_line = self.source[result['line'] - 1].rstrip()
1370 self.source[result['line'] - 1] = fixed_line + '\n'
1372 def fix_w391(self, _):
1373 """Remove trailing blank lines."""
1374 blank_count = 0
1375 for line in reversed(self.source):
1376 line = line.rstrip()
1377 if line:
1378 break
1379 else:
1380 blank_count += 1
1382 original_length = len(self.source)
1383 self.source = self.source[:original_length - blank_count]
1384 return range(1, 1 + original_length)
1386 def fix_w503(self, result):
1387 (line_index, _, target) = get_index_offset_contents(result,
1388 self.source)
1389 one_string_token = target.split()[0]
1390 try:
1391 ts = generate_tokens(one_string_token)
1392 except (SyntaxError, tokenize.TokenError):
1393 return
1394 if not _is_binary_operator(ts[0][0], one_string_token):
1395 return
1396 # find comment
1397 comment_index = 0
1398 found_not_comment_only_line = False
1399 comment_only_linenum = 0
1400 for i in range(5):
1401 # NOTE: try to parse code in 5 times
1402 if (line_index - i) < 0:
1403 break
1404 from_index = line_index - i - 1
1405 if from_index < 0 or len(self.source) <= from_index:
1406 break
1407 to_index = line_index + 1
1408 strip_line = self.source[from_index].lstrip()
1409 if (
1410 not found_not_comment_only_line and
1411 strip_line and strip_line[0] == '#'
1412 ):
1413 comment_only_linenum += 1
1414 continue
1415 found_not_comment_only_line = True
1416 try:
1417 ts = generate_tokens("".join(self.source[from_index:to_index]))
1418 except (SyntaxError, tokenize.TokenError):
1419 continue
1420 newline_count = 0
1421 newline_index = []
1422 for index, t in enumerate(ts):
1423 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1424 newline_index.append(index)
1425 newline_count += 1
1426 if newline_count > 2:
1427 tts = ts[newline_index[-3]:]
1428 else:
1429 tts = ts
1430 old = []
1431 for t in tts:
1432 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1433 newline_count -= 1
1434 if newline_count <= 1:
1435 break
1436 if tokenize.COMMENT == t[0] and old and old[0] != tokenize.NL:
1437 comment_index = old[3][1]
1438 break
1439 old = t
1440 break
1441 i = target.index(one_string_token)
1442 fix_target_line = line_index - 1 - comment_only_linenum
1443 self.source[line_index] = '{}{}'.format(
1444 target[:i], target[i + len(one_string_token):].lstrip())
1445 nl = find_newline(self.source[fix_target_line:line_index])
1446 before_line = self.source[fix_target_line]
1447 bl = before_line.index(nl)
1448 if comment_index:
1449 self.source[fix_target_line] = '{} {} {}'.format(
1450 before_line[:comment_index], one_string_token,
1451 before_line[comment_index + 1:])
1452 else:
1453 if before_line[:bl].endswith("#"):
1454 # special case
1455 # see: https://github.com/hhatto/autopep8/issues/503
1456 self.source[fix_target_line] = '{}{} {}'.format(
1457 before_line[:bl-2], one_string_token, before_line[bl-2:])
1458 else:
1459 self.source[fix_target_line] = '{} {}{}'.format(
1460 before_line[:bl], one_string_token, before_line[bl:])
1462 def fix_w504(self, result):
1463 (line_index, _, target) = get_index_offset_contents(result,
1464 self.source)
1465 # NOTE: is not collect pointed out in pycodestyle==2.4.0
1466 comment_index = 0
1467 operator_position = None # (start_position, end_position)
1468 for i in range(1, 6):
1469 to_index = line_index + i
1470 try:
1471 ts = generate_tokens("".join(self.source[line_index:to_index]))
1472 except (SyntaxError, tokenize.TokenError):
1473 continue
1474 newline_count = 0
1475 newline_index = []
1476 for index, t in enumerate(ts):
1477 if _is_binary_operator(t[0], t[1]):
1478 if t[2][0] == 1 and t[3][0] == 1:
1479 operator_position = (t[2][1], t[3][1])
1480 elif t[0] == tokenize.NAME and t[1] in ("and", "or"):
1481 if t[2][0] == 1 and t[3][0] == 1:
1482 operator_position = (t[2][1], t[3][1])
1483 elif t[0] in (tokenize.NEWLINE, tokenize.NL):
1484 newline_index.append(index)
1485 newline_count += 1
1486 if newline_count > 2:
1487 tts = ts[:newline_index[-3]]
1488 else:
1489 tts = ts
1490 old = []
1491 for t in tts:
1492 if tokenize.COMMENT == t[0] and old:
1493 comment_row, comment_index = old[3]
1494 break
1495 old = t
1496 break
1497 if not operator_position:
1498 return
1499 target_operator = target[operator_position[0]:operator_position[1]]
1501 if comment_index and comment_row == 1:
1502 self.source[line_index] = '{}{}'.format(
1503 target[:operator_position[0]].rstrip(),
1504 target[comment_index:])
1505 else:
1506 self.source[line_index] = '{}{}{}'.format(
1507 target[:operator_position[0]].rstrip(),
1508 target[operator_position[1]:].lstrip(),
1509 target[operator_position[1]:])
1511 next_line = self.source[line_index + 1]
1512 next_line_indent = 0
1513 m = re.match(r'\s*', next_line)
1514 if m:
1515 next_line_indent = m.span()[1]
1516 self.source[line_index + 1] = '{}{} {}'.format(
1517 next_line[:next_line_indent], target_operator,
1518 next_line[next_line_indent:])
1520 def fix_w605(self, result):
1521 (line_index, offset, target) = get_index_offset_contents(result,
1522 self.source)
1523 self.source[line_index] = '{}\\{}'.format(
1524 target[:offset + 1], target[offset + 1:])
1527def get_module_imports_on_top_of_file(source, import_line_index):
1528 """return import or from keyword position
1530 example:
1531 > 0: import sys
1532 1: import os
1533 2:
1534 3: def function():
1535 """
1536 def is_string_literal(line):
1537 if line[0] in 'uUbB':
1538 line = line[1:]
1539 if line and line[0] in 'rR':
1540 line = line[1:]
1541 return line and (line[0] == '"' or line[0] == "'")
1543 def is_future_import(line):
1544 nodes = ast.parse(line)
1545 for n in nodes.body:
1546 if isinstance(n, ast.ImportFrom) and n.module == '__future__':
1547 return True
1548 return False
1550 def has_future_import(source):
1551 offset = 0
1552 line = ''
1553 for _, next_line in source:
1554 for line_part in next_line.strip().splitlines(True):
1555 line = line + line_part
1556 try:
1557 return is_future_import(line), offset
1558 except SyntaxError:
1559 continue
1560 offset += 1
1561 return False, offset
1563 allowed_try_keywords = ('try', 'except', 'else', 'finally')
1564 in_docstring = False
1565 docstring_kind = '"""'
1566 source_stream = iter(enumerate(source))
1567 for cnt, line in source_stream:
1568 if not in_docstring:
1569 m = DOCSTRING_START_REGEX.match(line.lstrip())
1570 if m is not None:
1571 in_docstring = True
1572 docstring_kind = m.group('kind')
1573 remain = line[m.end(): m.endpos].rstrip()
1574 if remain[-3:] == docstring_kind: # one line doc
1575 in_docstring = False
1576 continue
1577 if in_docstring:
1578 if line.rstrip()[-3:] == docstring_kind:
1579 in_docstring = False
1580 continue
1582 if not line.rstrip():
1583 continue
1584 elif line.startswith('#'):
1585 continue
1587 if line.startswith('import '):
1588 if cnt == import_line_index:
1589 continue
1590 return cnt
1591 elif line.startswith('from '):
1592 if cnt == import_line_index:
1593 continue
1594 hit, offset = has_future_import(
1595 itertools.chain([(cnt, line)], source_stream)
1596 )
1597 if hit:
1598 # move to the back
1599 return cnt + offset + 1
1600 return cnt
1601 elif pycodestyle.DUNDER_REGEX.match(line):
1602 return cnt
1603 elif any(line.startswith(kw) for kw in allowed_try_keywords):
1604 continue
1605 elif is_string_literal(line):
1606 return cnt
1607 else:
1608 return cnt
1609 return 0
1612def get_index_offset_contents(result, source):
1613 """Return (line_index, column_offset, line_contents)."""
1614 line_index = result['line'] - 1
1615 return (line_index,
1616 result['column'] - 1,
1617 source[line_index])
1620def get_fixed_long_line(target, previous_line, original,
1621 indent_word=' ', max_line_length=79,
1622 aggressive=0, experimental=False, verbose=False):
1623 """Break up long line and return result.
1625 Do this by generating multiple reformatted candidates and then
1626 ranking the candidates to heuristically select the best option.
1628 """
1629 indent = _get_indentation(target)
1630 source = target[len(indent):]
1631 assert source.lstrip() == source
1632 assert not target.lstrip().startswith('#')
1634 # Check for partial multiline.
1635 tokens = list(generate_tokens(source))
1637 candidates = shorten_line(
1638 tokens, source, indent,
1639 indent_word,
1640 max_line_length,
1641 aggressive=aggressive,
1642 experimental=experimental,
1643 previous_line=previous_line)
1645 # Also sort alphabetically as a tie breaker (for determinism).
1646 candidates = sorted(
1647 sorted(set(candidates).union([target, original])),
1648 key=lambda x: line_shortening_rank(
1649 x,
1650 indent_word,
1651 max_line_length,
1652 experimental=experimental))
1654 if verbose >= 4:
1655 print(('-' * 79 + '\n').join([''] + candidates + ['']),
1656 file=wrap_output(sys.stderr, 'utf-8'))
1658 if candidates:
1659 best_candidate = candidates[0]
1661 # Don't allow things to get longer.
1662 if longest_line_length(best_candidate) > longest_line_length(original):
1663 return None
1665 return best_candidate
1668def longest_line_length(code):
1669 """Return length of longest line."""
1670 if len(code) == 0:
1671 return 0
1672 return max(len(line) for line in code.splitlines())
1675def join_logical_line(logical_line):
1676 """Return single line based on logical line input."""
1677 indentation = _get_indentation(logical_line)
1679 return indentation + untokenize_without_newlines(
1680 generate_tokens(logical_line.lstrip())) + '\n'
1683def untokenize_without_newlines(tokens):
1684 """Return source code based on tokens."""
1685 text = ''
1686 last_row = 0
1687 last_column = -1
1689 for t in tokens:
1690 token_string = t[1]
1691 (start_row, start_column) = t[2]
1692 (end_row, end_column) = t[3]
1694 if start_row > last_row:
1695 last_column = 0
1696 if (
1697 (start_column > last_column or token_string == '\n') and
1698 not text.endswith(' ')
1699 ):
1700 text += ' '
1702 if token_string != '\n':
1703 text += token_string
1705 last_row = end_row
1706 last_column = end_column
1708 return text.rstrip()
1711def _find_logical(source_lines):
1712 # Make a variable which is the index of all the starts of lines.
1713 logical_start = []
1714 logical_end = []
1715 last_newline = True
1716 parens = 0
1717 for t in generate_tokens(''.join(source_lines)):
1718 if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
1719 tokenize.INDENT, tokenize.NL,
1720 tokenize.ENDMARKER]:
1721 continue
1722 if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
1723 last_newline = True
1724 logical_end.append((t[3][0] - 1, t[2][1]))
1725 continue
1726 if last_newline and not parens:
1727 logical_start.append((t[2][0] - 1, t[2][1]))
1728 last_newline = False
1729 if t[0] == tokenize.OP:
1730 if t[1] in '([{':
1731 parens += 1
1732 elif t[1] in '}])':
1733 parens -= 1
1734 return (logical_start, logical_end)
1737def _get_logical(source_lines, result, logical_start, logical_end):
1738 """Return the logical line corresponding to the result.
1740 Assumes input is already E702-clean.
1742 """
1743 row = result['line'] - 1
1744 col = result['column'] - 1
1745 ls = None
1746 le = None
1747 for i in range(0, len(logical_start), 1):
1748 assert logical_end
1749 x = logical_end[i]
1750 if x[0] > row or (x[0] == row and x[1] > col):
1751 le = x
1752 ls = logical_start[i]
1753 break
1754 if ls is None:
1755 return None
1756 original = source_lines[ls[0]:le[0] + 1]
1757 return ls, le, original
1760def get_item(items, index, default=None):
1761 if 0 <= index < len(items):
1762 return items[index]
1764 return default
1767def reindent(source, indent_size, leave_tabs=False):
1768 """Reindent all lines."""
1769 reindenter = Reindenter(source, leave_tabs)
1770 return reindenter.run(indent_size)
1773def code_almost_equal(a, b):
1774 """Return True if code is similar.
1776 Ignore whitespace when comparing specific line.
1778 """
1779 split_a = split_and_strip_non_empty_lines(a)
1780 split_b = split_and_strip_non_empty_lines(b)
1782 if len(split_a) != len(split_b):
1783 return False
1785 for (index, _) in enumerate(split_a):
1786 if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
1787 return False
1789 return True
1792def split_and_strip_non_empty_lines(text):
1793 """Return lines split by newline.
1795 Ignore empty lines.
1797 """
1798 return [line.strip() for line in text.splitlines() if line.strip()]
1801def find_newline(source):
1802 """Return type of newline used in source.
1804 Input is a list of lines.
1806 """
1807 assert not isinstance(source, str)
1809 counter = collections.defaultdict(int)
1810 for line in source:
1811 if line.endswith(CRLF):
1812 counter[CRLF] += 1
1813 elif line.endswith(CR):
1814 counter[CR] += 1
1815 elif line.endswith(LF):
1816 counter[LF] += 1
1818 return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
1821def _get_indentword(source):
1822 """Return indentation type."""
1823 indent_word = ' ' # Default in case source has no indentation
1824 try:
1825 for t in generate_tokens(source):
1826 if t[0] == token.INDENT:
1827 indent_word = t[1]
1828 break
1829 except (SyntaxError, tokenize.TokenError):
1830 pass
1831 return indent_word
1834def _get_indentation(line):
1835 """Return leading whitespace."""
1836 if line.strip():
1837 non_whitespace_index = len(line) - len(line.lstrip())
1838 return line[:non_whitespace_index]
1840 return ''
1843def get_diff_text(old, new, filename):
1844 """Return text of unified diff between old and new."""
1845 newline = '\n'
1846 diff = difflib.unified_diff(
1847 old, new,
1848 'original/' + filename,
1849 'fixed/' + filename,
1850 lineterm=newline)
1852 text = ''
1853 for line in diff:
1854 text += line
1856 # Work around missing newline (http://bugs.python.org/issue2142).
1857 if text and not line.endswith(newline):
1858 text += newline + r'\ No newline at end of file' + newline
1860 return text
1863def _priority_key(pep8_result):
1864 """Key for sorting PEP8 results.
1866 Global fixes should be done first. This is important for things like
1867 indentation.
1869 """
1870 priority = [
1871 # Fix multiline colon-based before semicolon based.
1872 'e701',
1873 # Break multiline statements early.
1874 'e702',
1875 # Things that make lines longer.
1876 'e225', 'e231',
1877 # Remove extraneous whitespace before breaking lines.
1878 'e201',
1879 # Shorten whitespace in comment before resorting to wrapping.
1880 'e262'
1881 ]
1882 middle_index = 10000
1883 lowest_priority = [
1884 # We need to shorten lines last since the logical fixer can get in a
1885 # loop, which causes us to exit early.
1886 'e501',
1887 ]
1888 key = pep8_result['id'].lower()
1889 try:
1890 return priority.index(key)
1891 except ValueError:
1892 try:
1893 return middle_index + lowest_priority.index(key) + 1
1894 except ValueError:
1895 return middle_index
1898def shorten_line(tokens, source, indentation, indent_word, max_line_length,
1899 aggressive=0, experimental=False, previous_line=''):
1900 """Separate line at OPERATOR.
1902 Multiple candidates will be yielded.
1904 """
1905 for candidate in _shorten_line(tokens=tokens,
1906 source=source,
1907 indentation=indentation,
1908 indent_word=indent_word,
1909 aggressive=aggressive,
1910 previous_line=previous_line):
1911 yield candidate
1913 if aggressive:
1914 for key_token_strings in SHORTEN_OPERATOR_GROUPS:
1915 shortened = _shorten_line_at_tokens(
1916 tokens=tokens,
1917 source=source,
1918 indentation=indentation,
1919 indent_word=indent_word,
1920 key_token_strings=key_token_strings,
1921 aggressive=aggressive)
1923 if shortened is not None and shortened != source:
1924 yield shortened
1926 if experimental:
1927 for shortened in _shorten_line_at_tokens_new(
1928 tokens=tokens,
1929 source=source,
1930 indentation=indentation,
1931 max_line_length=max_line_length):
1933 yield shortened
1936def _shorten_line(tokens, source, indentation, indent_word,
1937 aggressive=0, previous_line=''):
1938 """Separate line at OPERATOR.
1940 The input is expected to be free of newlines except for inside multiline
1941 strings and at the end.
1943 Multiple candidates will be yielded.
1945 """
1946 in_string = False
1947 for (token_type,
1948 token_string,
1949 start_offset,
1950 end_offset) in token_offsets(tokens):
1952 if IS_SUPPORT_TOKEN_FSTRING:
1953 if token_type == tokenize.FSTRING_START:
1954 in_string = True
1955 elif token_type == tokenize.FSTRING_END:
1956 in_string = False
1957 if in_string:
1958 continue
1960 if (
1961 token_type == tokenize.COMMENT and
1962 not is_probably_part_of_multiline(previous_line) and
1963 not is_probably_part_of_multiline(source) and
1964 not source[start_offset + 1:].strip().lower().startswith(
1965 ('noqa', 'pragma:', 'pylint:'))
1966 ):
1967 # Move inline comments to previous line.
1968 first = source[:start_offset]
1969 second = source[start_offset:]
1970 yield (indentation + second.strip() + '\n' +
1971 indentation + first.strip() + '\n')
1972 elif token_type == token.OP and token_string != '=':
1973 # Don't break on '=' after keyword as this violates PEP 8.
1975 assert token_type != token.INDENT
1977 first = source[:end_offset]
1979 second_indent = indentation
1980 if (first.rstrip().endswith('(') and
1981 source[end_offset:].lstrip().startswith(')')):
1982 pass
1983 elif first.rstrip().endswith('('):
1984 second_indent += indent_word
1985 elif '(' in first:
1986 second_indent += ' ' * (1 + first.find('('))
1987 else:
1988 second_indent += indent_word
1990 second = (second_indent + source[end_offset:].lstrip())
1991 if (
1992 not second.strip() or
1993 second.lstrip().startswith('#')
1994 ):
1995 continue
1997 # Do not begin a line with a comma
1998 if second.lstrip().startswith(','):
1999 continue
2000 # Do end a line with a dot
2001 if first.rstrip().endswith('.'):
2002 continue
2003 if token_string in '+-*/':
2004 fixed = first + ' \\' + '\n' + second
2005 else:
2006 fixed = first + '\n' + second
2008 # Only fix if syntax is okay.
2009 if check_syntax(normalize_multiline(fixed)
2010 if aggressive else fixed):
2011 yield indentation + fixed
2014def _is_binary_operator(token_type, text):
2015 return ((token_type == tokenize.OP or text in ['and', 'or']) and
2016 text not in '()[]{},:.;@=%~')
2019# A convenient way to handle tokens.
2020Token = collections.namedtuple('Token', ['token_type', 'token_string',
2021 'spos', 'epos', 'line'])
2024class ReformattedLines(object):
2026 """The reflowed lines of atoms.
2028 Each part of the line is represented as an "atom." They can be moved
2029 around when need be to get the optimal formatting.
2031 """
2033 ###########################################################################
2034 # Private Classes
2036 class _Indent(object):
2038 """Represent an indentation in the atom stream."""
2040 def __init__(self, indent_amt):
2041 self._indent_amt = indent_amt
2043 def emit(self):
2044 return ' ' * self._indent_amt
2046 @property
2047 def size(self):
2048 return self._indent_amt
2050 class _Space(object):
2052 """Represent a space in the atom stream."""
2054 def emit(self):
2055 return ' '
2057 @property
2058 def size(self):
2059 return 1
2061 class _LineBreak(object):
2063 """Represent a line break in the atom stream."""
2065 def emit(self):
2066 return '\n'
2068 @property
2069 def size(self):
2070 return 0
2072 def __init__(self, max_line_length):
2073 self._max_line_length = max_line_length
2074 self._lines = []
2075 self._bracket_depth = 0
2076 self._prev_item = None
2077 self._prev_prev_item = None
2078 self._in_fstring = False
2080 def __repr__(self):
2081 return self.emit()
2083 ###########################################################################
2084 # Public Methods
2086 def add(self, obj, indent_amt, break_after_open_bracket):
2087 if isinstance(obj, Atom):
2088 self._add_item(obj, indent_amt)
2089 return
2091 self._add_container(obj, indent_amt, break_after_open_bracket)
2093 def add_comment(self, item):
2094 num_spaces = 2
2095 if len(self._lines) > 1:
2096 if isinstance(self._lines[-1], self._Space):
2097 num_spaces -= 1
2098 if len(self._lines) > 2:
2099 if isinstance(self._lines[-2], self._Space):
2100 num_spaces -= 1
2102 while num_spaces > 0:
2103 self._lines.append(self._Space())
2104 num_spaces -= 1
2105 self._lines.append(item)
2107 def add_indent(self, indent_amt):
2108 self._lines.append(self._Indent(indent_amt))
2110 def add_line_break(self, indent):
2111 self._lines.append(self._LineBreak())
2112 self.add_indent(len(indent))
2114 def add_line_break_at(self, index, indent_amt):
2115 self._lines.insert(index, self._LineBreak())
2116 self._lines.insert(index + 1, self._Indent(indent_amt))
2118 def add_space_if_needed(self, curr_text, equal=False):
2119 if (
2120 not self._lines or isinstance(
2121 self._lines[-1], (self._LineBreak, self._Indent, self._Space))
2122 ):
2123 return
2125 prev_text = str(self._prev_item)
2126 prev_prev_text = (
2127 str(self._prev_prev_item) if self._prev_prev_item else '')
2129 if (
2130 # The previous item was a keyword or identifier and the current
2131 # item isn't an operator that doesn't require a space.
2132 ((self._prev_item.is_keyword or self._prev_item.is_string or
2133 self._prev_item.is_name or self._prev_item.is_number) and
2134 (curr_text[0] not in '([{.,:}])' or
2135 (curr_text[0] == '=' and equal))) or
2137 # Don't place spaces around a '.', unless it's in an 'import'
2138 # statement.
2139 ((prev_prev_text != 'from' and prev_text[-1] != '.' and
2140 curr_text != 'import') and
2142 # Don't place a space before a colon.
2143 curr_text[0] != ':' and
2145 # Don't split up ending brackets by spaces.
2146 ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
2148 # Put a space after a colon or comma.
2149 prev_text[-1] in ':,' or
2151 # Put space around '=' if asked to.
2152 (equal and prev_text == '=') or
2154 # Put spaces around non-unary arithmetic operators.
2155 ((self._prev_prev_item and
2156 (prev_text not in '+-' and
2157 (self._prev_prev_item.is_name or
2158 self._prev_prev_item.is_number or
2159 self._prev_prev_item.is_string)) and
2160 prev_text in ('+', '-', '%', '*', '/', '//', '**', 'in')))))
2161 ):
2162 self._lines.append(self._Space())
2164 def previous_item(self):
2165 """Return the previous non-whitespace item."""
2166 return self._prev_item
2168 def fits_on_current_line(self, item_extent):
2169 return self.current_size() + item_extent <= self._max_line_length
2171 def current_size(self):
2172 """The size of the current line minus the indentation."""
2173 size = 0
2174 for item in reversed(self._lines):
2175 size += item.size
2176 if isinstance(item, self._LineBreak):
2177 break
2179 return size
2181 def line_empty(self):
2182 return (self._lines and
2183 isinstance(self._lines[-1],
2184 (self._LineBreak, self._Indent)))
2186 def emit(self):
2187 string = ''
2188 for item in self._lines:
2189 if isinstance(item, self._LineBreak):
2190 string = string.rstrip()
2191 string += item.emit()
2193 return string.rstrip() + '\n'
2195 ###########################################################################
2196 # Private Methods
2198 def _add_item(self, item, indent_amt):
2199 """Add an item to the line.
2201 Reflow the line to get the best formatting after the item is
2202 inserted. The bracket depth indicates if the item is being
2203 inserted inside of a container or not.
2205 """
2206 if item.is_fstring_start:
2207 self._in_fstring = True
2208 elif self._prev_item and self._prev_item.is_fstring_end:
2209 self._in_fstring = False
2211 if self._prev_item and self._prev_item.is_string and item.is_string:
2212 # Place consecutive string literals on separate lines.
2213 self._lines.append(self._LineBreak())
2214 self._lines.append(self._Indent(indent_amt))
2216 item_text = str(item)
2217 if self._lines and self._bracket_depth:
2218 # Adding the item into a container.
2219 self._prevent_default_initializer_splitting(item, indent_amt)
2221 if item_text in '.,)]}':
2222 self._split_after_delimiter(item, indent_amt)
2224 elif self._lines and not self.line_empty():
2225 # Adding the item outside of a container.
2226 if self.fits_on_current_line(len(item_text)):
2227 self._enforce_space(item)
2229 else:
2230 # Line break for the new item.
2231 self._lines.append(self._LineBreak())
2232 self._lines.append(self._Indent(indent_amt))
2234 self._lines.append(item)
2235 self._prev_item, self._prev_prev_item = item, self._prev_item
2237 if item_text in '([{' and not self._in_fstring:
2238 self._bracket_depth += 1
2240 elif item_text in '}])' and not self._in_fstring:
2241 self._bracket_depth -= 1
2242 assert self._bracket_depth >= 0
2244 def _add_container(self, container, indent_amt, break_after_open_bracket):
2245 actual_indent = indent_amt + 1
2247 if (
2248 str(self._prev_item) != '=' and
2249 not self.line_empty() and
2250 not self.fits_on_current_line(
2251 container.size + self._bracket_depth + 2)
2252 ):
2254 if str(container)[0] == '(' and self._prev_item.is_name:
2255 # Don't split before the opening bracket of a call.
2256 break_after_open_bracket = True
2257 actual_indent = indent_amt + 4
2258 elif (
2259 break_after_open_bracket or
2260 str(self._prev_item) not in '([{'
2261 ):
2262 # If the container doesn't fit on the current line and the
2263 # current line isn't empty, place the container on the next
2264 # line.
2265 self._lines.append(self._LineBreak())
2266 self._lines.append(self._Indent(indent_amt))
2267 break_after_open_bracket = False
2268 else:
2269 actual_indent = self.current_size() + 1
2270 break_after_open_bracket = False
2272 if isinstance(container, (ListComprehension, IfExpression)):
2273 actual_indent = indent_amt
2275 # Increase the continued indentation only if recursing on a
2276 # container.
2277 container.reflow(self, ' ' * actual_indent,
2278 break_after_open_bracket=break_after_open_bracket)
2280 def _prevent_default_initializer_splitting(self, item, indent_amt):
2281 """Prevent splitting between a default initializer.
2283 When there is a default initializer, it's best to keep it all on
2284 the same line. It's nicer and more readable, even if it goes
2285 over the maximum allowable line length. This goes back along the
2286 current line to determine if we have a default initializer, and,
2287 if so, to remove extraneous whitespaces and add a line
2288 break/indent before it if needed.
2290 """
2291 if str(item) == '=':
2292 # This is the assignment in the initializer. Just remove spaces for
2293 # now.
2294 self._delete_whitespace()
2295 return
2297 if (not self._prev_item or not self._prev_prev_item or
2298 str(self._prev_item) != '='):
2299 return
2301 self._delete_whitespace()
2302 prev_prev_index = self._lines.index(self._prev_prev_item)
2304 if (
2305 isinstance(self._lines[prev_prev_index - 1], self._Indent) or
2306 self.fits_on_current_line(item.size + 1)
2307 ):
2308 # The default initializer is already the only item on this line.
2309 # Don't insert a newline here.
2310 return
2312 # Replace the space with a newline/indent combo.
2313 if isinstance(self._lines[prev_prev_index - 1], self._Space):
2314 del self._lines[prev_prev_index - 1]
2316 self.add_line_break_at(self._lines.index(self._prev_prev_item),
2317 indent_amt)
2319 def _split_after_delimiter(self, item, indent_amt):
2320 """Split the line only after a delimiter."""
2321 self._delete_whitespace()
2323 if self.fits_on_current_line(item.size):
2324 return
2326 last_space = None
2327 for current_item in reversed(self._lines):
2328 if (
2329 last_space and
2330 (not isinstance(current_item, Atom) or
2331 not current_item.is_colon)
2332 ):
2333 break
2334 else:
2335 last_space = None
2336 if isinstance(current_item, self._Space):
2337 last_space = current_item
2338 if isinstance(current_item, (self._LineBreak, self._Indent)):
2339 return
2341 if not last_space:
2342 return
2344 self.add_line_break_at(self._lines.index(last_space), indent_amt)
2346 def _enforce_space(self, item):
2347 """Enforce a space in certain situations.
2349 There are cases where we will want a space where normally we
2350 wouldn't put one. This just enforces the addition of a space.
2352 """
2353 if isinstance(self._lines[-1],
2354 (self._Space, self._LineBreak, self._Indent)):
2355 return
2357 if not self._prev_item:
2358 return
2360 item_text = str(item)
2361 prev_text = str(self._prev_item)
2363 # Prefer a space around a '.' in an import statement, and between the
2364 # 'import' and '('.
2365 if (
2366 (item_text == '.' and prev_text == 'from') or
2367 (item_text == 'import' and prev_text == '.') or
2368 (item_text == '(' and prev_text == 'import')
2369 ):
2370 self._lines.append(self._Space())
2372 def _delete_whitespace(self):
2373 """Delete all whitespace from the end of the line."""
2374 while isinstance(self._lines[-1], (self._Space, self._LineBreak,
2375 self._Indent)):
2376 del self._lines[-1]
2379class Atom(object):
2381 """The smallest unbreakable unit that can be reflowed."""
2383 def __init__(self, atom):
2384 self._atom = atom
2386 def __repr__(self):
2387 return self._atom.token_string
2389 def __len__(self):
2390 return self.size
2392 def reflow(
2393 self, reflowed_lines, continued_indent, extent,
2394 break_after_open_bracket=False,
2395 is_list_comp_or_if_expr=False,
2396 next_is_dot=False
2397 ):
2398 if self._atom.token_type == tokenize.COMMENT:
2399 reflowed_lines.add_comment(self)
2400 return
2402 total_size = extent if extent else self.size
2404 if self._atom.token_string not in ',:([{}])':
2405 # Some atoms will need an extra 1-sized space token after them.
2406 total_size += 1
2408 prev_item = reflowed_lines.previous_item()
2409 if (
2410 not is_list_comp_or_if_expr and
2411 not reflowed_lines.fits_on_current_line(total_size) and
2412 not (next_is_dot and
2413 reflowed_lines.fits_on_current_line(self.size + 1)) and
2414 not reflowed_lines.line_empty() and
2415 not self.is_colon and
2416 not (prev_item and prev_item.is_name and
2417 str(self) == '(')
2418 ):
2419 # Start a new line if there is already something on the line and
2420 # adding this atom would make it go over the max line length.
2421 reflowed_lines.add_line_break(continued_indent)
2422 else:
2423 reflowed_lines.add_space_if_needed(str(self))
2425 reflowed_lines.add(self, len(continued_indent),
2426 break_after_open_bracket)
2428 def emit(self):
2429 return self.__repr__()
2431 @property
2432 def is_keyword(self):
2433 return keyword.iskeyword(self._atom.token_string)
2435 @property
2436 def is_string(self):
2437 return self._atom.token_type == tokenize.STRING
2439 @property
2440 def is_fstring_start(self):
2441 if not IS_SUPPORT_TOKEN_FSTRING:
2442 return False
2443 return self._atom.token_type == tokenize.FSTRING_START
2445 @property
2446 def is_fstring_end(self):
2447 if not IS_SUPPORT_TOKEN_FSTRING:
2448 return False
2449 return self._atom.token_type == tokenize.FSTRING_END
2451 @property
2452 def is_name(self):
2453 return self._atom.token_type == tokenize.NAME
2455 @property
2456 def is_number(self):
2457 return self._atom.token_type == tokenize.NUMBER
2459 @property
2460 def is_comma(self):
2461 return self._atom.token_string == ','
2463 @property
2464 def is_colon(self):
2465 return self._atom.token_string == ':'
2467 @property
2468 def size(self):
2469 return len(self._atom.token_string)
2472class Container(object):
2474 """Base class for all container types."""
2476 def __init__(self, items):
2477 self._items = items
2479 def __repr__(self):
2480 string = ''
2481 last_was_keyword = False
2483 for item in self._items:
2484 if item.is_comma:
2485 string += ', '
2486 elif item.is_colon:
2487 string += ': '
2488 else:
2489 item_string = str(item)
2490 if (
2491 string and
2492 (last_was_keyword or
2493 (not string.endswith(tuple('([{,.:}]) ')) and
2494 not item_string.startswith(tuple('([{,.:}])'))))
2495 ):
2496 string += ' '
2497 string += item_string
2499 last_was_keyword = item.is_keyword
2500 return string
2502 def __iter__(self):
2503 for element in self._items:
2504 yield element
2506 def __getitem__(self, idx):
2507 return self._items[idx]
2509 def reflow(self, reflowed_lines, continued_indent,
2510 break_after_open_bracket=False):
2511 last_was_container = False
2512 for (index, item) in enumerate(self._items):
2513 next_item = get_item(self._items, index + 1)
2515 if isinstance(item, Atom):
2516 is_list_comp_or_if_expr = (
2517 isinstance(self, (ListComprehension, IfExpression)))
2518 item.reflow(reflowed_lines, continued_indent,
2519 self._get_extent(index),
2520 is_list_comp_or_if_expr=is_list_comp_or_if_expr,
2521 next_is_dot=(next_item and
2522 str(next_item) == '.'))
2523 if last_was_container and item.is_comma:
2524 reflowed_lines.add_line_break(continued_indent)
2525 last_was_container = False
2526 else: # isinstance(item, Container)
2527 reflowed_lines.add(item, len(continued_indent),
2528 break_after_open_bracket)
2529 last_was_container = not isinstance(item, (ListComprehension,
2530 IfExpression))
2532 if (
2533 break_after_open_bracket and index == 0 and
2534 # Prefer to keep empty containers together instead of
2535 # separating them.
2536 str(item) == self.open_bracket and
2537 (not next_item or str(next_item) != self.close_bracket) and
2538 (len(self._items) != 3 or not isinstance(next_item, Atom))
2539 ):
2540 reflowed_lines.add_line_break(continued_indent)
2541 break_after_open_bracket = False
2542 else:
2543 next_next_item = get_item(self._items, index + 2)
2544 if (
2545 str(item) not in ['.', '%', 'in'] and
2546 next_item and not isinstance(next_item, Container) and
2547 str(next_item) != ':' and
2548 next_next_item and (not isinstance(next_next_item, Atom) or
2549 str(next_item) == 'not') and
2550 not reflowed_lines.line_empty() and
2551 not reflowed_lines.fits_on_current_line(
2552 self._get_extent(index + 1) + 2)
2553 ):
2554 reflowed_lines.add_line_break(continued_indent)
2556 def _get_extent(self, index):
2557 """The extent of the full element.
2559 E.g., the length of a function call or keyword.
2561 """
2562 extent = 0
2563 prev_item = get_item(self._items, index - 1)
2564 seen_dot = prev_item and str(prev_item) == '.'
2565 while index < len(self._items):
2566 item = get_item(self._items, index)
2567 index += 1
2569 if isinstance(item, (ListComprehension, IfExpression)):
2570 break
2572 if isinstance(item, Container):
2573 if prev_item and prev_item.is_name:
2574 if seen_dot:
2575 extent += 1
2576 else:
2577 extent += item.size
2579 prev_item = item
2580 continue
2581 elif (str(item) not in ['.', '=', ':', 'not'] and
2582 not item.is_name and not item.is_string):
2583 break
2585 if str(item) == '.':
2586 seen_dot = True
2588 extent += item.size
2589 prev_item = item
2591 return extent
2593 @property
2594 def is_string(self):
2595 return False
2597 @property
2598 def size(self):
2599 return len(self.__repr__())
2601 @property
2602 def is_keyword(self):
2603 return False
2605 @property
2606 def is_name(self):
2607 return False
2609 @property
2610 def is_comma(self):
2611 return False
2613 @property
2614 def is_colon(self):
2615 return False
2617 @property
2618 def open_bracket(self):
2619 return None
2621 @property
2622 def close_bracket(self):
2623 return None
2626class Tuple(Container):
2628 """A high-level representation of a tuple."""
2630 @property
2631 def open_bracket(self):
2632 return '('
2634 @property
2635 def close_bracket(self):
2636 return ')'
2639class List(Container):
2641 """A high-level representation of a list."""
2643 @property
2644 def open_bracket(self):
2645 return '['
2647 @property
2648 def close_bracket(self):
2649 return ']'
2652class DictOrSet(Container):
2654 """A high-level representation of a dictionary or set."""
2656 @property
2657 def open_bracket(self):
2658 return '{'
2660 @property
2661 def close_bracket(self):
2662 return '}'
2665class ListComprehension(Container):
2667 """A high-level representation of a list comprehension."""
2669 @property
2670 def size(self):
2671 length = 0
2672 for item in self._items:
2673 if isinstance(item, IfExpression):
2674 break
2675 length += item.size
2676 return length
2679class IfExpression(Container):
2681 """A high-level representation of an if-expression."""
2684def _parse_container(tokens, index, for_or_if=None):
2685 """Parse a high-level container, such as a list, tuple, etc."""
2687 # Store the opening bracket.
2688 items = [Atom(Token(*tokens[index]))]
2689 index += 1
2691 num_tokens = len(tokens)
2692 while index < num_tokens:
2693 tok = Token(*tokens[index])
2695 if tok.token_string in ',)]}':
2696 # First check if we're at the end of a list comprehension or
2697 # if-expression. Don't add the ending token as part of the list
2698 # comprehension or if-expression, because they aren't part of those
2699 # constructs.
2700 if for_or_if == 'for':
2701 return (ListComprehension(items), index - 1)
2703 elif for_or_if == 'if':
2704 return (IfExpression(items), index - 1)
2706 # We've reached the end of a container.
2707 items.append(Atom(tok))
2709 # If not, then we are at the end of a container.
2710 if tok.token_string == ')':
2711 # The end of a tuple.
2712 return (Tuple(items), index)
2714 elif tok.token_string == ']':
2715 # The end of a list.
2716 return (List(items), index)
2718 elif tok.token_string == '}':
2719 # The end of a dictionary or set.
2720 return (DictOrSet(items), index)
2722 elif tok.token_string in '([{':
2723 # A sub-container is being defined.
2724 (container, index) = _parse_container(tokens, index)
2725 items.append(container)
2727 elif tok.token_string == 'for':
2728 (container, index) = _parse_container(tokens, index, 'for')
2729 items.append(container)
2731 elif tok.token_string == 'if':
2732 (container, index) = _parse_container(tokens, index, 'if')
2733 items.append(container)
2735 else:
2736 items.append(Atom(tok))
2738 index += 1
2740 return (None, None)
2743def _parse_tokens(tokens):
2744 """Parse the tokens.
2746 This converts the tokens into a form where we can manipulate them
2747 more easily.
2749 """
2751 index = 0
2752 parsed_tokens = []
2754 num_tokens = len(tokens)
2755 while index < num_tokens:
2756 tok = Token(*tokens[index])
2758 assert tok.token_type != token.INDENT
2759 if tok.token_type == tokenize.NEWLINE:
2760 # There's only one newline and it's at the end.
2761 break
2763 if tok.token_string in '([{':
2764 (container, index) = _parse_container(tokens, index)
2765 if not container:
2766 return None
2767 parsed_tokens.append(container)
2768 else:
2769 parsed_tokens.append(Atom(tok))
2771 index += 1
2773 return parsed_tokens
2776def _reflow_lines(parsed_tokens, indentation, max_line_length,
2777 start_on_prefix_line):
2778 """Reflow the lines so that it looks nice."""
2780 if str(parsed_tokens[0]) == 'def':
2781 # A function definition gets indented a bit more.
2782 continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
2783 else:
2784 continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
2786 break_after_open_bracket = not start_on_prefix_line
2788 lines = ReformattedLines(max_line_length)
2789 lines.add_indent(len(indentation.lstrip('\r\n')))
2791 if not start_on_prefix_line:
2792 # If splitting after the opening bracket will cause the first element
2793 # to be aligned weirdly, don't try it.
2794 first_token = get_item(parsed_tokens, 0)
2795 second_token = get_item(parsed_tokens, 1)
2797 if (
2798 first_token and second_token and
2799 str(second_token)[0] == '(' and
2800 len(indentation) + len(first_token) + 1 == len(continued_indent)
2801 ):
2802 return None
2804 for item in parsed_tokens:
2805 lines.add_space_if_needed(str(item), equal=True)
2807 save_continued_indent = continued_indent
2808 if start_on_prefix_line and isinstance(item, Container):
2809 start_on_prefix_line = False
2810 continued_indent = ' ' * (lines.current_size() + 1)
2812 item.reflow(lines, continued_indent, break_after_open_bracket)
2813 continued_indent = save_continued_indent
2815 return lines.emit()
2818def _shorten_line_at_tokens_new(tokens, source, indentation,
2819 max_line_length):
2820 """Shorten the line taking its length into account.
2822 The input is expected to be free of newlines except for inside
2823 multiline strings and at the end.
2825 """
2826 # Yield the original source so to see if it's a better choice than the
2827 # shortened candidate lines we generate here.
2828 yield indentation + source
2830 parsed_tokens = _parse_tokens(tokens)
2832 if parsed_tokens:
2833 # Perform two reflows. The first one starts on the same line as the
2834 # prefix. The second starts on the line after the prefix.
2835 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2836 start_on_prefix_line=True)
2837 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2838 yield fixed
2840 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2841 start_on_prefix_line=False)
2842 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2843 yield fixed
2846def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
2847 key_token_strings, aggressive):
2848 """Separate line by breaking at tokens in key_token_strings.
2850 The input is expected to be free of newlines except for inside
2851 multiline strings and at the end.
2853 """
2854 offsets = []
2855 for (index, _t) in enumerate(token_offsets(tokens)):
2856 (token_type,
2857 token_string,
2858 start_offset,
2859 end_offset) = _t
2861 assert token_type != token.INDENT
2863 if token_string in key_token_strings:
2864 # Do not break in containers with zero or one items.
2865 unwanted_next_token = {
2866 '(': ')',
2867 '[': ']',
2868 '{': '}'}.get(token_string)
2869 if unwanted_next_token:
2870 if (
2871 get_item(tokens,
2872 index + 1,
2873 default=[None, None])[1] == unwanted_next_token or
2874 get_item(tokens,
2875 index + 2,
2876 default=[None, None])[1] == unwanted_next_token
2877 ):
2878 continue
2880 if (
2881 index > 2 and token_string == '(' and
2882 tokens[index - 1][1] in ',(%['
2883 ):
2884 # Don't split after a tuple start, or before a tuple start if
2885 # the tuple is in a list.
2886 continue
2888 if end_offset < len(source) - 1:
2889 # Don't split right before newline.
2890 offsets.append(end_offset)
2891 else:
2892 # Break at adjacent strings. These were probably meant to be on
2893 # separate lines in the first place.
2894 previous_token = get_item(tokens, index - 1)
2895 if (
2896 token_type == tokenize.STRING and
2897 previous_token and previous_token[0] == tokenize.STRING
2898 ):
2899 offsets.append(start_offset)
2901 current_indent = None
2902 fixed = None
2903 for line in split_at_offsets(source, offsets):
2904 if fixed:
2905 fixed += '\n' + current_indent + line
2907 for symbol in '([{':
2908 if line.endswith(symbol):
2909 current_indent += indent_word
2910 else:
2911 # First line.
2912 fixed = line
2913 assert not current_indent
2914 current_indent = indent_word
2916 assert fixed is not None
2918 if check_syntax(normalize_multiline(fixed)
2919 if aggressive > 1 else fixed):
2920 return indentation + fixed
2922 return None
2925def token_offsets(tokens):
2926 """Yield tokens and offsets."""
2927 end_offset = 0
2928 previous_end_row = 0
2929 previous_end_column = 0
2930 for t in tokens:
2931 token_type = t[0]
2932 token_string = t[1]
2933 (start_row, start_column) = t[2]
2934 (end_row, end_column) = t[3]
2936 # Account for the whitespace between tokens.
2937 end_offset += start_column
2938 if previous_end_row == start_row:
2939 end_offset -= previous_end_column
2941 # Record the start offset of the token.
2942 start_offset = end_offset
2944 # Account for the length of the token itself.
2945 end_offset += len(token_string)
2947 yield (token_type,
2948 token_string,
2949 start_offset,
2950 end_offset)
2952 previous_end_row = end_row
2953 previous_end_column = end_column
2956def normalize_multiline(line):
2957 """Normalize multiline-related code that will cause syntax error.
2959 This is for purposes of checking syntax.
2961 """
2962 if line.startswith(('def ', 'async def ')) and line.rstrip().endswith(':'):
2963 return line + ' pass'
2964 elif line.startswith('return '):
2965 return 'def _(): ' + line
2966 elif line.startswith('@'):
2967 return line + 'def _(): pass'
2968 elif line.startswith('class '):
2969 return line + ' pass'
2970 elif line.startswith(('if ', 'elif ', 'for ', 'while ')):
2971 return line + ' pass'
2973 return line
2976def fix_whitespace(line, offset, replacement):
2977 """Replace whitespace at offset and return fixed line."""
2978 # Replace escaped newlines too
2979 left = line[:offset].rstrip('\n\r \t\\')
2980 right = line[offset:].lstrip('\n\r \t\\')
2981 if right.startswith('#'):
2982 return line
2984 return left + replacement + right
2987def _execute_pep8(pep8_options, source):
2988 """Execute pycodestyle via python method calls."""
2989 class QuietReport(pycodestyle.BaseReport):
2991 """Version of checker that does not print."""
2993 def __init__(self, options):
2994 super(QuietReport, self).__init__(options)
2995 self.__full_error_results = []
2997 def error(self, line_number, offset, text, check):
2998 """Collect errors."""
2999 code = super(QuietReport, self).error(line_number,
3000 offset,
3001 text,
3002 check)
3003 if code:
3004 self.__full_error_results.append(
3005 {'id': code,
3006 'line': line_number,
3007 'column': offset + 1,
3008 'info': text})
3010 def full_error_results(self):
3011 """Return error results in detail.
3013 Results are in the form of a list of dictionaries. Each
3014 dictionary contains 'id', 'line', 'column', and 'info'.
3016 """
3017 return self.__full_error_results
3019 checker = pycodestyle.Checker('', lines=source, reporter=QuietReport,
3020 **pep8_options)
3021 checker.check_all()
3022 return checker.report.full_error_results()
3025def _remove_leading_and_normalize(line, with_rstrip=True):
3026 # ignore FF in first lstrip()
3027 if with_rstrip:
3028 return line.lstrip(' \t\v').rstrip(CR + LF) + '\n'
3029 return line.lstrip(' \t\v')
3032class Reindenter(object):
3034 """Reindents badly-indented code to uniformly use four-space indentation.
3036 Released to the public domain, by Tim Peters, 03 October 2000.
3038 """
3040 def __init__(self, input_text, leave_tabs=False):
3041 sio = io.StringIO(input_text)
3042 source_lines = sio.readlines()
3044 self.string_content_line_numbers = multiline_string_lines(input_text)
3046 # File lines, rstripped & tab-expanded. Dummy at start is so
3047 # that we can use tokenize's 1-based line numbering easily.
3048 # Note that a line is all-blank iff it is a newline.
3049 self.lines = []
3050 for line_number, line in enumerate(source_lines, start=1):
3051 # Do not modify if inside a multiline string.
3052 if line_number in self.string_content_line_numbers:
3053 self.lines.append(line)
3054 else:
3055 # Only expand leading tabs.
3056 with_rstrip = line_number != len(source_lines)
3057 if leave_tabs:
3058 self.lines.append(
3059 _get_indentation(line) +
3060 _remove_leading_and_normalize(line, with_rstrip)
3061 )
3062 else:
3063 self.lines.append(
3064 _get_indentation(line).expandtabs() +
3065 _remove_leading_and_normalize(line, with_rstrip)
3066 )
3068 self.lines.insert(0, None)
3069 self.index = 1 # index into self.lines of next line
3070 self.input_text = input_text
3072 def run(self, indent_size=DEFAULT_INDENT_SIZE):
3073 """Fix indentation and return modified line numbers.
3075 Line numbers are indexed at 1.
3077 """
3078 if indent_size < 1:
3079 return self.input_text
3081 try:
3082 stats = _reindent_stats(tokenize.generate_tokens(self.getline))
3083 except (SyntaxError, tokenize.TokenError):
3084 return self.input_text
3085 # Remove trailing empty lines.
3086 lines = self.lines
3087 # Sentinel.
3088 stats.append((len(lines), 0))
3089 # Map count of leading spaces to # we want.
3090 have2want = {}
3091 # Program after transformation.
3092 after = []
3093 # Copy over initial empty lines -- there's nothing to do until
3094 # we see a line with *something* on it.
3095 i = stats[0][0]
3096 after.extend(lines[1:i])
3097 for i in range(len(stats) - 1):
3098 thisstmt, thislevel = stats[i]
3099 nextstmt = stats[i + 1][0]
3100 have = _leading_space_count(lines[thisstmt])
3101 want = thislevel * indent_size
3102 if want < 0:
3103 # A comment line.
3104 if have:
3105 # An indented comment line. If we saw the same
3106 # indentation before, reuse what it most recently
3107 # mapped to.
3108 want = have2want.get(have, -1)
3109 if want < 0:
3110 # Then it probably belongs to the next real stmt.
3111 for j in range(i + 1, len(stats) - 1):
3112 jline, jlevel = stats[j]
3113 if jlevel >= 0:
3114 if have == _leading_space_count(lines[jline]):
3115 want = jlevel * indent_size
3116 break
3117 # Maybe it's a hanging comment like this one,
3118 if want < 0:
3119 # in which case we should shift it like its base
3120 # line got shifted.
3121 for j in range(i - 1, -1, -1):
3122 jline, jlevel = stats[j]
3123 if jlevel >= 0:
3124 want = (have + _leading_space_count(
3125 after[jline - 1]) -
3126 _leading_space_count(lines[jline]))
3127 break
3128 if want < 0:
3129 # Still no luck -- leave it alone.
3130 want = have
3131 else:
3132 want = 0
3133 assert want >= 0
3134 have2want[have] = want
3135 diff = want - have
3136 if diff == 0 or have == 0:
3137 after.extend(lines[thisstmt:nextstmt])
3138 else:
3139 for line_number, line in enumerate(lines[thisstmt:nextstmt],
3140 start=thisstmt):
3141 if line_number in self.string_content_line_numbers:
3142 after.append(line)
3143 elif diff > 0:
3144 if line == '\n':
3145 after.append(line)
3146 else:
3147 after.append(' ' * diff + line)
3148 else:
3149 remove = min(_leading_space_count(line), -diff)
3150 after.append(line[remove:])
3152 return ''.join(after)
3154 def getline(self):
3155 """Line-getter for tokenize."""
3156 if self.index >= len(self.lines):
3157 line = ''
3158 else:
3159 line = self.lines[self.index]
3160 self.index += 1
3161 return line
3164def _reindent_stats(tokens):
3165 """Return list of (lineno, indentlevel) pairs.
3167 One for each stmt and comment line. indentlevel is -1 for comment
3168 lines, as a signal that tokenize doesn't know what to do about them;
3169 indeed, they're our headache!
3171 """
3172 find_stmt = 1 # Next token begins a fresh stmt?
3173 level = 0 # Current indent level.
3174 stats = []
3176 for t in tokens:
3177 token_type = t[0]
3178 sline = t[2][0]
3179 line = t[4]
3181 if token_type == tokenize.NEWLINE:
3182 # A program statement, or ENDMARKER, will eventually follow,
3183 # after some (possibly empty) run of tokens of the form
3184 # (NL | COMMENT)* (INDENT | DEDENT+)?
3185 find_stmt = 1
3187 elif token_type == tokenize.INDENT:
3188 find_stmt = 1
3189 level += 1
3191 elif token_type == tokenize.DEDENT:
3192 find_stmt = 1
3193 level -= 1
3195 elif token_type == tokenize.COMMENT:
3196 if find_stmt:
3197 stats.append((sline, -1))
3198 # But we're still looking for a new stmt, so leave
3199 # find_stmt alone.
3201 elif token_type == tokenize.NL:
3202 pass
3204 elif find_stmt:
3205 # This is the first "real token" following a NEWLINE, so it
3206 # must be the first token of the next program statement, or an
3207 # ENDMARKER.
3208 find_stmt = 0
3209 if line: # Not endmarker.
3210 stats.append((sline, level))
3212 return stats
3215def _leading_space_count(line):
3216 """Return number of leading spaces in line."""
3217 i = 0
3218 while i < len(line) and line[i] == ' ':
3219 i += 1
3220 return i
3223def check_syntax(code):
3224 """Return True if syntax is okay."""
3225 try:
3226 return compile(code, '<string>', 'exec', dont_inherit=True)
3227 except (SyntaxError, TypeError, ValueError):
3228 return False
3231def find_with_line_numbers(pattern, contents):
3232 """A wrapper around 're.finditer' to find line numbers.
3234 Returns a list of line numbers where pattern was found in contents.
3235 """
3236 matches = list(re.finditer(pattern, contents))
3237 if not matches:
3238 return []
3240 end = matches[-1].start()
3242 # -1 so a failed `rfind` maps to the first line.
3243 newline_offsets = {
3244 -1: 0
3245 }
3246 for line_num, m in enumerate(re.finditer(r'\n', contents), 1):
3247 offset = m.start()
3248 if offset > end:
3249 break
3250 newline_offsets[offset] = line_num
3252 def get_line_num(match, contents):
3253 """Get the line number of string in a files contents.
3255 Failing to find the newline is OK, -1 maps to 0
3257 """
3258 newline_offset = contents.rfind('\n', 0, match.start())
3259 return newline_offsets[newline_offset]
3261 return [get_line_num(match, contents) + 1 for match in matches]
3264def get_disabled_ranges(source):
3265 """Returns a list of tuples representing the disabled ranges.
3267 If disabled and no re-enable will disable for rest of file.
3269 """
3270 enable_line_nums = find_with_line_numbers(ENABLE_REGEX, source)
3271 disable_line_nums = find_with_line_numbers(DISABLE_REGEX, source)
3272 total_lines = len(re.findall("\n", source)) + 1
3274 enable_commands = {}
3275 for num in enable_line_nums:
3276 enable_commands[num] = True
3277 for num in disable_line_nums:
3278 enable_commands[num] = False
3280 disabled_ranges = []
3281 currently_enabled = True
3282 disabled_start = None
3284 for line, commanded_enabled in sorted(enable_commands.items()):
3285 if commanded_enabled is False and currently_enabled is True:
3286 disabled_start = line
3287 currently_enabled = False
3288 elif commanded_enabled is True and currently_enabled is False:
3289 disabled_ranges.append((disabled_start, line))
3290 currently_enabled = True
3292 if currently_enabled is False:
3293 disabled_ranges.append((disabled_start, total_lines))
3295 return disabled_ranges
3298def filter_disabled_results(result, disabled_ranges):
3299 """Filter out reports based on tuple of disabled ranges.
3301 """
3302 line = result['line']
3303 for disabled_range in disabled_ranges:
3304 if disabled_range[0] <= line <= disabled_range[1]:
3305 return False
3306 return True
3309def filter_results(source, results, aggressive):
3310 """Filter out spurious reports from pycodestyle.
3312 If aggressive is True, we allow possibly unsafe fixes (E711, E712).
3314 """
3315 non_docstring_string_line_numbers = multiline_string_lines(
3316 source, include_docstrings=False)
3317 all_string_line_numbers = multiline_string_lines(
3318 source, include_docstrings=True)
3320 commented_out_code_line_numbers = commented_out_code_lines(source)
3322 # Filter out the disabled ranges
3323 disabled_ranges = get_disabled_ranges(source)
3324 if disabled_ranges:
3325 results = [
3326 result for result in results if filter_disabled_results(
3327 result,
3328 disabled_ranges,
3329 )
3330 ]
3332 has_e901 = any(result['id'].lower() == 'e901' for result in results)
3334 for r in results:
3335 issue_id = r['id'].lower()
3337 if r['line'] in non_docstring_string_line_numbers:
3338 if issue_id.startswith(('e1', 'e501', 'w191')):
3339 continue
3341 if r['line'] in all_string_line_numbers:
3342 if issue_id in ['e501']:
3343 continue
3345 # We must offset by 1 for lines that contain the trailing contents of
3346 # multiline strings.
3347 if not aggressive and (r['line'] + 1) in all_string_line_numbers:
3348 # Do not modify multiline strings in non-aggressive mode. Remove
3349 # trailing whitespace could break doctests.
3350 if issue_id.startswith(('w29', 'w39')):
3351 continue
3353 if aggressive <= 0:
3354 if issue_id.startswith(('e711', 'e72', 'w6')):
3355 continue
3357 if aggressive <= 1:
3358 if issue_id.startswith(('e712', 'e713', 'e714')):
3359 continue
3361 if aggressive <= 2:
3362 if issue_id.startswith(('e704')):
3363 continue
3365 if r['line'] in commented_out_code_line_numbers:
3366 if issue_id.startswith(('e261', 'e262', 'e501')):
3367 continue
3369 # Do not touch indentation if there is a token error caused by
3370 # incomplete multi-line statement. Otherwise, we risk screwing up the
3371 # indentation.
3372 if has_e901:
3373 if issue_id.startswith(('e1', 'e7')):
3374 continue
3376 yield r
3379def multiline_string_lines(source, include_docstrings=False):
3380 """Return line numbers that are within multiline strings.
3382 The line numbers are indexed at 1.
3384 Docstrings are ignored.
3386 """
3387 line_numbers = set()
3388 previous_token_type = ''
3389 _check_target_tokens = [tokenize.STRING]
3390 if IS_SUPPORT_TOKEN_FSTRING:
3391 _check_target_tokens.extend([
3392 tokenize.FSTRING_START,
3393 tokenize.FSTRING_MIDDLE,
3394 tokenize.FSTRING_END,
3395 ])
3396 try:
3397 for t in generate_tokens(source):
3398 token_type = t[0]
3399 start_row = t[2][0]
3400 end_row = t[3][0]
3402 if token_type in _check_target_tokens and start_row != end_row:
3403 if (
3404 include_docstrings or
3405 previous_token_type != tokenize.INDENT
3406 ):
3407 # We increment by one since we want the contents of the
3408 # string.
3409 line_numbers |= set(range(1 + start_row, 1 + end_row))
3411 previous_token_type = token_type
3412 except (SyntaxError, tokenize.TokenError):
3413 pass
3415 return line_numbers
3418def commented_out_code_lines(source):
3419 """Return line numbers of comments that are likely code.
3421 Commented-out code is bad practice, but modifying it just adds even
3422 more clutter.
3424 """
3425 line_numbers = []
3426 try:
3427 for t in generate_tokens(source):
3428 token_type = t[0]
3429 token_string = t[1]
3430 start_row = t[2][0]
3431 line = t[4]
3433 # Ignore inline comments.
3434 if not line.lstrip().startswith('#'):
3435 continue
3437 if token_type == tokenize.COMMENT:
3438 stripped_line = token_string.lstrip('#').strip()
3439 with warnings.catch_warnings():
3440 # ignore SyntaxWarning in Python3.8+
3441 # refs:
3442 # https://bugs.python.org/issue15248
3443 # https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes
3444 warnings.filterwarnings("ignore", category=SyntaxWarning)
3445 if (
3446 ' ' in stripped_line and
3447 '#' not in stripped_line and
3448 check_syntax(stripped_line)
3449 ):
3450 line_numbers.append(start_row)
3451 except (SyntaxError, tokenize.TokenError):
3452 pass
3454 return line_numbers
3457def shorten_comment(line, max_line_length, last_comment=False):
3458 """Return trimmed or split long comment line.
3460 If there are no comments immediately following it, do a text wrap.
3461 Doing this wrapping on all comments in general would lead to jagged
3462 comment text.
3464 """
3465 assert len(line) > max_line_length
3466 line = line.rstrip()
3468 # PEP 8 recommends 72 characters for comment text.
3469 indentation = _get_indentation(line) + '# '
3470 max_line_length = min(max_line_length,
3471 len(indentation) + 72)
3473 MIN_CHARACTER_REPEAT = 5
3474 if (
3475 len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
3476 not line[-1].isalnum()
3477 ):
3478 # Trim comments that end with things like ---------
3479 return line[:max_line_length] + '\n'
3480 elif last_comment and re.match(r'\s*#+\s*\w+', line):
3481 split_lines = textwrap.wrap(line.lstrip(' \t#'),
3482 initial_indent=indentation,
3483 subsequent_indent=indentation,
3484 width=max_line_length,
3485 break_long_words=False,
3486 break_on_hyphens=False)
3487 return '\n'.join(split_lines) + '\n'
3489 return line + '\n'
3492def normalize_line_endings(lines, newline):
3493 """Return fixed line endings.
3495 All lines will be modified to use the most common line ending.
3496 """
3497 line = [line.rstrip('\n\r') + newline for line in lines]
3498 if line and lines[-1] == lines[-1].rstrip('\n\r'):
3499 line[-1] = line[-1].rstrip('\n\r')
3500 return line
3503def mutual_startswith(a, b):
3504 return b.startswith(a) or a.startswith(b)
3507def code_match(code, select, ignore):
3508 if ignore:
3509 assert not isinstance(ignore, str)
3510 for ignored_code in [c.strip() for c in ignore]:
3511 if mutual_startswith(code.lower(), ignored_code.lower()):
3512 return False
3514 if select:
3515 assert not isinstance(select, str)
3516 for selected_code in [c.strip() for c in select]:
3517 if mutual_startswith(code.lower(), selected_code.lower()):
3518 return True
3519 return False
3521 return True
3524def fix_code(source, options=None, encoding=None, apply_config=False):
3525 """Return fixed source code.
3527 "encoding" will be used to decode "source" if it is a byte string.
3529 """
3530 options = _get_options(options, apply_config)
3531 # normalize
3532 options.ignore = [opt.upper() for opt in options.ignore]
3533 options.select = [opt.upper() for opt in options.select]
3535 # check ignore args
3536 # NOTE: If W50x is not included, add W50x because the code
3537 # correction result is indefinite.
3538 ignore_opt = options.ignore
3539 if not {"W50", "W503", "W504"} & set(ignore_opt):
3540 options.ignore.append("W50")
3542 if not isinstance(source, str):
3543 source = source.decode(encoding or get_encoding())
3545 sio = io.StringIO(source)
3546 return fix_lines(sio.readlines(), options=options)
3549def _get_options(raw_options, apply_config):
3550 """Return parsed options."""
3551 if not raw_options:
3552 return parse_args([''], apply_config=apply_config)
3554 if isinstance(raw_options, dict):
3555 options = parse_args([''], apply_config=apply_config)
3556 for name, value in raw_options.items():
3557 if not hasattr(options, name):
3558 raise ValueError("No such option '{}'".format(name))
3560 # Check for very basic type errors.
3561 expected_type = type(getattr(options, name))
3562 if not isinstance(expected_type, (str, )):
3563 if isinstance(value, (str, )):
3564 raise ValueError(
3565 "Option '{}' should not be a string".format(name))
3566 setattr(options, name, value)
3567 else:
3568 options = raw_options
3570 return options
3573def fix_lines(source_lines, options, filename=''):
3574 """Return fixed source code."""
3575 # Transform everything to line feed. Then change them back to original
3576 # before returning fixed source code.
3577 original_newline = find_newline(source_lines)
3578 tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
3580 # Keep a history to break out of cycles.
3581 previous_hashes = set()
3583 if options.line_range:
3584 # Disable "apply_local_fixes()" for now due to issue #175.
3585 fixed_source = tmp_source
3586 else:
3587 # Apply global fixes only once (for efficiency).
3588 fixed_source = apply_global_fixes(tmp_source,
3589 options,
3590 filename=filename)
3592 passes = 0
3593 long_line_ignore_cache = set()
3594 while hash(fixed_source) not in previous_hashes:
3595 if options.pep8_passes >= 0 and passes > options.pep8_passes:
3596 break
3597 passes += 1
3599 previous_hashes.add(hash(fixed_source))
3601 tmp_source = copy.copy(fixed_source)
3603 fix = FixPEP8(
3604 filename,
3605 options,
3606 contents=tmp_source,
3607 long_line_ignore_cache=long_line_ignore_cache)
3609 fixed_source = fix.fix()
3611 sio = io.StringIO(fixed_source)
3612 return ''.join(normalize_line_endings(sio.readlines(), original_newline))
3615def fix_file(filename, options=None, output=None, apply_config=False):
3616 if not options:
3617 options = parse_args([filename], apply_config=apply_config)
3619 original_source = readlines_from_file(filename)
3621 fixed_source = original_source
3623 if options.in_place or options.diff or output:
3624 encoding = detect_encoding(filename)
3626 if output:
3627 output = LineEndingWrapper(wrap_output(output, encoding=encoding))
3629 fixed_source = fix_lines(fixed_source, options, filename=filename)
3631 if options.diff:
3632 new = io.StringIO(fixed_source)
3633 new = new.readlines()
3634 diff = get_diff_text(original_source, new, filename)
3635 if output:
3636 output.write(diff)
3637 output.flush()
3638 elif options.jobs > 1:
3639 diff = diff.encode(encoding)
3640 return diff
3641 elif options.in_place:
3642 original = "".join(original_source).splitlines()
3643 fixed = fixed_source.splitlines()
3644 original_source_last_line = ( <