Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pycodestyle.py: 69%
1377 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:34 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:34 +0000
1#!/usr/bin/env python
2# pycodestyle.py - Check Python source code formatting, according to
3# PEP 8
4#
5# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
6# Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
7# Copyright (C) 2014-2016 Ian Lee <ianlee1521@gmail.com>
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation files
11# (the "Software"), to deal in the Software without restriction,
12# including without limitation the rights to use, copy, modify, merge,
13# publish, distribute, sublicense, and/or sell copies of the Software,
14# and to permit persons to whom the Software is furnished to do so,
15# subject to the following conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
24# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27# SOFTWARE.
28r"""
29Check Python source code formatting, according to PEP 8.
31For usage and a list of options, try this:
32$ python pycodestyle.py -h
34This program and its regression test suite live here:
35https://github.com/pycqa/pycodestyle
37Groups of errors and warnings:
38E errors
39W warnings
40100 indentation
41200 whitespace
42300 blank lines
43400 imports
44500 line length
45600 deprecation
46700 statements
47900 syntax error
48"""
49import bisect
50import configparser
51import inspect
52import io
53import keyword
54import os
55import re
56import sys
57import time
58import tokenize
59import warnings
60from fnmatch import fnmatch
61from functools import lru_cache
62from optparse import OptionParser
64# this is a performance hack. see https://bugs.python.org/issue43014
65if (
66 sys.version_info < (3, 10) and
67 callable(getattr(tokenize, '_compile', None))
68): # pragma: no cover (<py310)
69 tokenize._compile = lru_cache()(tokenize._compile) # type: ignore
71__version__ = '2.10.0'
73DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
74DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704,W503,W504'
75try:
76 if sys.platform == 'win32':
77 USER_CONFIG = os.path.expanduser(r'~\.pycodestyle')
78 else:
79 USER_CONFIG = os.path.join(
80 os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'),
81 'pycodestyle'
82 )
83except ImportError:
84 USER_CONFIG = None
86PROJECT_CONFIG = ('setup.cfg', 'tox.ini')
87TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
88MAX_LINE_LENGTH = 79
89# Number of blank lines between various code parts.
90BLANK_LINES_CONFIG = {
91 # Top level class and function.
92 'top_level': 2,
93 # Methods and nested class and function.
94 'method': 1,
95}
96MAX_DOC_LENGTH = 72
97INDENT_SIZE = 4
98REPORT_FORMAT = {
99 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
100 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
101}
103PyCF_ONLY_AST = 1024
104SINGLETONS = frozenset(['False', 'None', 'True'])
105KEYWORDS = frozenset(keyword.kwlist + ['print', 'async']) - SINGLETONS
106UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
107ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-', '@'])
108WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
109ASSIGNMENT_EXPRESSION_OP = [':='] if sys.version_info >= (3, 8) else []
110WS_NEEDED_OPERATORS = frozenset([
111 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
112 '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=',
113 'and', 'in', 'is', 'or', '->'] +
114 ASSIGNMENT_EXPRESSION_OP)
115WHITESPACE = frozenset(' \t\xa0')
116NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
117SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
118# ERRORTOKEN is triggered by backticks in Python 3
119SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
120BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
122INDENT_REGEX = re.compile(r'([ \t]*)')
123ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
124DOCSTRING_REGEX = re.compile(r'u?r?["\']')
125EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[\[({][ \t]|[ \t][\]}),;:](?!=)')
126WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
127COMPARE_SINGLETON_REGEX = re.compile(r'(\bNone|\bFalse|\bTrue)?\s*([=!]=)'
128 r'\s*(?(1)|(None|False|True))\b')
129COMPARE_NEGATIVE_REGEX = re.compile(r'\b(?<!is\s)(not)\s+[^][)(}{ ]+\s+'
130 r'(in|is)\s')
131COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s+type(?:s.\w+Type'
132 r'|\s*\(\s*([^)]*[^ )])\s*\))')
133KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
134OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)')
135LAMBDA_REGEX = re.compile(r'\blambda\b')
136HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
137STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\b')
138STARTSWITH_TOP_LEVEL_REGEX = re.compile(r'^(async\s+def\s+|def\s+|class\s+|@)')
139STARTSWITH_INDENT_STATEMENT_REGEX = re.compile(
140 r'^\s*({})\b'.format('|'.join(s.replace(' ', r'\s+') for s in (
141 'def', 'async def',
142 'for', 'async for',
143 'if', 'elif', 'else',
144 'try', 'except', 'finally',
145 'with', 'async with',
146 'class',
147 'while',
148 )))
149)
150DUNDER_REGEX = re.compile(r"^__([^\s]+)__(?::\s*[a-zA-Z.0-9_\[\]\"]+)? = ")
151BLANK_EXCEPT_REGEX = re.compile(r"except\s*:")
153_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
156def _get_parameters(function):
157 return [parameter.name
158 for parameter
159 in inspect.signature(function).parameters.values()
160 if parameter.kind == parameter.POSITIONAL_OR_KEYWORD]
163def register_check(check, codes=None):
164 """Register a new check object."""
165 def _add_check(check, kind, codes, args):
166 if check in _checks[kind]:
167 _checks[kind][check][0].extend(codes or [])
168 else:
169 _checks[kind][check] = (codes or [''], args)
170 if inspect.isfunction(check):
171 args = _get_parameters(check)
172 if args and args[0] in ('physical_line', 'logical_line'):
173 if codes is None:
174 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
175 _add_check(check, args[0], codes, args)
176 elif inspect.isclass(check):
177 if _get_parameters(check.__init__)[:2] == ['self', 'tree']:
178 _add_check(check, 'tree', codes, None)
179 return check
182########################################################################
183# Plugins (check functions) for physical lines
184########################################################################
186@register_check
187def tabs_or_spaces(physical_line, indent_char):
188 r"""Never mix tabs and spaces.
190 The most popular way of indenting Python is with spaces only. The
191 second-most popular way is with tabs only. Code indented with a
192 mixture of tabs and spaces should be converted to using spaces
193 exclusively. When invoking the Python command line interpreter with
194 the -t option, it issues warnings about code that illegally mixes
195 tabs and spaces. When using -tt these warnings become errors.
196 These options are highly recommended!
198 Okay: if a == 0:\n a = 1\n b = 1
199 E101: if a == 0:\n a = 1\n\tb = 1
200 """
201 indent = INDENT_REGEX.match(physical_line).group(1)
202 for offset, char in enumerate(indent):
203 if char != indent_char:
204 return offset, "E101 indentation contains mixed spaces and tabs"
207@register_check
208def tabs_obsolete(physical_line):
209 r"""On new projects, spaces-only are strongly recommended over tabs.
211 Okay: if True:\n return
212 W191: if True:\n\treturn
213 """
214 indent = INDENT_REGEX.match(physical_line).group(1)
215 if '\t' in indent:
216 return indent.index('\t'), "W191 indentation contains tabs"
219@register_check
220def trailing_whitespace(physical_line):
221 r"""Trailing whitespace is superfluous.
223 The warning returned varies on whether the line itself is blank,
224 for easier filtering for those who want to indent their blank lines.
226 Okay: spam(1)\n#
227 W291: spam(1) \n#
228 W293: class Foo(object):\n \n bang = 12
229 """
230 physical_line = physical_line.rstrip('\n') # chr(10), newline
231 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
232 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
233 stripped = physical_line.rstrip(' \t\v')
234 if physical_line != stripped:
235 if stripped:
236 return len(stripped), "W291 trailing whitespace"
237 else:
238 return 0, "W293 blank line contains whitespace"
241@register_check
242def trailing_blank_lines(physical_line, lines, line_number, total_lines):
243 r"""Trailing blank lines are superfluous.
245 Okay: spam(1)
246 W391: spam(1)\n
248 However the last line should end with a new line (warning W292).
249 """
250 if line_number == total_lines:
251 stripped_last_line = physical_line.rstrip('\r\n')
252 if physical_line and not stripped_last_line:
253 return 0, "W391 blank line at end of file"
254 if stripped_last_line == physical_line:
255 return len(lines[-1]), "W292 no newline at end of file"
258@register_check
259def maximum_line_length(physical_line, max_line_length, multiline,
260 line_number, noqa):
261 r"""Limit all lines to a maximum of 79 characters.
263 There are still many devices around that are limited to 80 character
264 lines; plus, limiting windows to 80 characters makes it possible to
265 have several windows side-by-side. The default wrapping on such
266 devices looks ugly. Therefore, please limit all lines to a maximum
267 of 79 characters. For flowing long blocks of text (docstrings or
268 comments), limiting the length to 72 characters is recommended.
270 Reports error E501.
271 """
272 line = physical_line.rstrip()
273 length = len(line)
274 if length > max_line_length and not noqa:
275 # Special case: ignore long shebang lines.
276 if line_number == 1 and line.startswith('#!'):
277 return
278 # Special case for long URLs in multi-line docstrings or
279 # comments, but still report the error when the 72 first chars
280 # are whitespaces.
281 chunks = line.split()
282 if ((len(chunks) == 1 and multiline) or
283 (len(chunks) == 2 and chunks[0] == '#')) and \
284 len(line) - len(chunks[-1]) < max_line_length - 7:
285 return
286 if length > max_line_length:
287 return (max_line_length, "E501 line too long "
288 "(%d > %d characters)" % (length, max_line_length))
291########################################################################
292# Plugins (check functions) for logical lines
293########################################################################
296def _is_one_liner(logical_line, indent_level, lines, line_number):
297 if not STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
298 return False
300 line_idx = line_number - 1
302 if line_idx < 1:
303 prev_indent = 0
304 else:
305 prev_indent = expand_indent(lines[line_idx - 1])
307 if prev_indent > indent_level:
308 return False
310 while line_idx < len(lines):
311 line = lines[line_idx].strip()
312 if not line.startswith('@') and STARTSWITH_TOP_LEVEL_REGEX.match(line):
313 break
314 else:
315 line_idx += 1
316 else:
317 return False # invalid syntax: EOF while searching for def/class
319 next_idx = line_idx + 1
320 while next_idx < len(lines):
321 if lines[next_idx].strip():
322 break
323 else:
324 next_idx += 1
325 else:
326 return True # line is last in the file
328 return expand_indent(lines[next_idx]) <= indent_level
331@register_check
332def blank_lines(logical_line, blank_lines, indent_level, line_number,
333 blank_before, previous_logical,
334 previous_unindented_logical_line, previous_indent_level,
335 lines):
336 r"""Separate top-level function and class definitions with two blank
337 lines.
339 Method definitions inside a class are separated by a single blank
340 line.
342 Extra blank lines may be used (sparingly) to separate groups of
343 related functions. Blank lines may be omitted between a bunch of
344 related one-liners (e.g. a set of dummy implementations).
346 Use blank lines in functions, sparingly, to indicate logical
347 sections.
349 Okay: def a():\n pass\n\n\ndef b():\n pass
350 Okay: def a():\n pass\n\n\nasync def b():\n pass
351 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
352 Okay: default = 1\nfoo = 1
353 Okay: classify = 1\nfoo = 1
355 E301: class Foo:\n b = 0\n def bar():\n pass
356 E302: def a():\n pass\n\ndef b(n):\n pass
357 E302: def a():\n pass\n\nasync def b(n):\n pass
358 E303: def a():\n pass\n\n\n\ndef b(n):\n pass
359 E303: def a():\n\n\n\n pass
360 E304: @decorator\n\ndef a():\n pass
361 E305: def a():\n pass\na()
362 E306: def a():\n def b():\n pass\n def c():\n pass
363 """ # noqa
364 top_level_lines = BLANK_LINES_CONFIG['top_level']
365 method_lines = BLANK_LINES_CONFIG['method']
367 if not previous_logical and blank_before < top_level_lines:
368 return # Don't expect blank lines before the first line
369 if previous_logical.startswith('@'):
370 if blank_lines:
371 yield 0, "E304 blank lines found after function decorator"
372 elif (blank_lines > top_level_lines or
373 (indent_level and blank_lines == method_lines + 1)
374 ):
375 yield 0, "E303 too many blank lines (%d)" % blank_lines
376 elif STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
377 # allow a group of one-liners
378 if (
379 _is_one_liner(logical_line, indent_level, lines, line_number) and
380 blank_before == 0
381 ):
382 return
383 if indent_level:
384 if not (blank_before == method_lines or
385 previous_indent_level < indent_level or
386 DOCSTRING_REGEX.match(previous_logical)
387 ):
388 ancestor_level = indent_level
389 nested = False
390 # Search backwards for a def ancestor or tree root
391 # (top level).
392 for line in lines[line_number - top_level_lines::-1]:
393 if line.strip() and expand_indent(line) < ancestor_level:
394 ancestor_level = expand_indent(line)
395 nested = STARTSWITH_DEF_REGEX.match(line.lstrip())
396 if nested or ancestor_level == 0:
397 break
398 if nested:
399 yield 0, "E306 expected %s blank line before a " \
400 "nested definition, found 0" % (method_lines,)
401 else:
402 yield 0, "E301 expected {} blank line, found 0".format(
403 method_lines)
404 elif blank_before != top_level_lines:
405 yield 0, "E302 expected %s blank lines, found %d" % (
406 top_level_lines, blank_before)
407 elif (logical_line and
408 not indent_level and
409 blank_before != top_level_lines and
410 previous_unindented_logical_line.startswith(('def ', 'class '))
411 ):
412 yield 0, "E305 expected %s blank lines after " \
413 "class or function definition, found %d" % (
414 top_level_lines, blank_before)
417@register_check
418def extraneous_whitespace(logical_line):
419 r"""Avoid extraneous whitespace.
421 Avoid extraneous whitespace in these situations:
422 - Immediately inside parentheses, brackets or braces.
423 - Immediately before a comma, semicolon, or colon.
425 Okay: spam(ham[1], {eggs: 2})
426 E201: spam( ham[1], {eggs: 2})
427 E201: spam(ham[ 1], {eggs: 2})
428 E201: spam(ham[1], { eggs: 2})
429 E202: spam(ham[1], {eggs: 2} )
430 E202: spam(ham[1 ], {eggs: 2})
431 E202: spam(ham[1], {eggs: 2 })
433 E203: if x == 4: print x, y; x, y = y , x
434 E203: if x == 4: print x, y ; x, y = y, x
435 E203: if x == 4 : print x, y; x, y = y, x
436 """
437 line = logical_line
438 for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
439 text = match.group()
440 char = text.strip()
441 found = match.start()
442 if text[-1].isspace():
443 # assert char in '([{'
444 yield found + 1, "E201 whitespace after '%s'" % char
445 elif line[found - 1] != ',':
446 code = ('E202' if char in '}])' else 'E203') # if char in ',;:'
447 yield found, f"{code} whitespace before '{char}'"
450@register_check
451def whitespace_around_keywords(logical_line):
452 r"""Avoid extraneous whitespace around keywords.
454 Okay: True and False
455 E271: True and False
456 E272: True and False
457 E273: True and\tFalse
458 E274: True\tand False
459 """
460 for match in KEYWORD_REGEX.finditer(logical_line):
461 before, after = match.groups()
463 if '\t' in before:
464 yield match.start(1), "E274 tab before keyword"
465 elif len(before) > 1:
466 yield match.start(1), "E272 multiple spaces before keyword"
468 if '\t' in after:
469 yield match.start(2), "E273 tab after keyword"
470 elif len(after) > 1:
471 yield match.start(2), "E271 multiple spaces after keyword"
474@register_check
475def missing_whitespace_after_keyword(logical_line, tokens):
476 r"""Keywords should be followed by whitespace.
478 Okay: from foo import (bar, baz)
479 E275: from foo import(bar, baz)
480 E275: from importable.module import(bar, baz)
481 E275: if(foo): bar
482 """
483 for tok0, tok1 in zip(tokens, tokens[1:]):
484 # This must exclude the True/False/None singletons, which can
485 # appear e.g. as "if x is None:", and async/await, which were
486 # valid identifier names in old Python versions.
487 if (tok0.end == tok1.start and
488 keyword.iskeyword(tok0.string) and
489 tok0.string not in SINGLETONS and
490 tok0.string not in ('async', 'await') and
491 not (tok0.string == 'except' and tok1.string == '*') and
492 not (tok0.string == 'yield' and tok1.string == ')') and
493 tok1.string not in ':\n'):
494 yield tok0.end, "E275 missing whitespace after keyword"
497@register_check
498def missing_whitespace(logical_line):
499 r"""Each comma, semicolon or colon should be followed by whitespace.
501 Okay: [a, b]
502 Okay: (3,)
503 Okay: a[3,] = 1
504 Okay: a[1:4]
505 Okay: a[:4]
506 Okay: a[1:]
507 Okay: a[1:4:2]
508 E231: ['a','b']
509 E231: foo(bar,baz)
510 E231: [{'a':'b'}]
511 """
512 line = logical_line
513 for index in range(len(line) - 1):
514 char = line[index]
515 next_char = line[index + 1]
516 if char in ',;:' and next_char not in WHITESPACE:
517 before = line[:index]
518 if char == ':' and before.count('[') > before.count(']') and \
519 before.rfind('{') < before.rfind('['):
520 continue # Slice syntax, no space required
521 if char == ',' and next_char in ')]':
522 continue # Allow tuple with only one element: (3,)
523 if char == ':' and next_char == '=' and sys.version_info >= (3, 8):
524 continue # Allow assignment expression
525 yield index, "E231 missing whitespace after '%s'" % char
528@register_check
529def indentation(logical_line, previous_logical, indent_char,
530 indent_level, previous_indent_level,
531 indent_size):
532 r"""Use indent_size (PEP8 says 4) spaces per indentation level.
534 For really old code that you don't want to mess up, you can continue
535 to use 8-space tabs.
537 Okay: a = 1
538 Okay: if a == 0:\n a = 1
539 E111: a = 1
540 E114: # a = 1
542 Okay: for item in items:\n pass
543 E112: for item in items:\npass
544 E115: for item in items:\n# Hi\n pass
546 Okay: a = 1\nb = 2
547 E113: a = 1\n b = 2
548 E116: a = 1\n # b = 2
549 """
550 c = 0 if logical_line else 3
551 tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
552 if indent_level % indent_size:
553 yield 0, tmpl % (
554 1 + c,
555 "indentation is not a multiple of " + str(indent_size),
556 )
557 indent_expect = previous_logical.endswith(':')
558 if indent_expect and indent_level <= previous_indent_level:
559 yield 0, tmpl % (2 + c, "expected an indented block")
560 elif not indent_expect and indent_level > previous_indent_level:
561 yield 0, tmpl % (3 + c, "unexpected indentation")
563 if indent_expect:
564 expected_indent_amount = 8 if indent_char == '\t' else 4
565 expected_indent_level = previous_indent_level + expected_indent_amount
566 if indent_level > expected_indent_level:
567 yield 0, tmpl % (7, 'over-indented')
570@register_check
571def continued_indentation(logical_line, tokens, indent_level, hang_closing,
572 indent_char, indent_size, noqa, verbose):
573 r"""Continuation lines indentation.
575 Continuation lines should align wrapped elements either vertically
576 using Python's implicit line joining inside parentheses, brackets
577 and braces, or using a hanging indent.
579 When using a hanging indent these considerations should be applied:
580 - there should be no arguments on the first line, and
581 - further indentation should be used to clearly distinguish itself
582 as a continuation line.
584 Okay: a = (\n)
585 E123: a = (\n )
587 Okay: a = (\n 42)
588 E121: a = (\n 42)
589 E122: a = (\n42)
590 E123: a = (\n 42\n )
591 E124: a = (24,\n 42\n)
592 E125: if (\n b):\n pass
593 E126: a = (\n 42)
594 E127: a = (24,\n 42)
595 E128: a = (24,\n 42)
596 E129: if (a or\n b):\n pass
597 E131: a = (\n 42\n 24)
598 """
599 first_row = tokens[0][2][0]
600 nrows = 1 + tokens[-1][2][0] - first_row
601 if noqa or nrows == 1:
602 return
604 # indent_next tells us whether the next block is indented; assuming
605 # that it is indented by 4 spaces, then we should not allow 4-space
606 # indents on the final continuation line; in turn, some other
607 # indents are allowed to have an extra 4 spaces.
608 indent_next = logical_line.endswith(':')
610 row = depth = 0
611 valid_hangs = (indent_size,) if indent_char != '\t' \
612 else (indent_size, indent_size * 2)
613 # remember how many brackets were opened on each line
614 parens = [0] * nrows
615 # relative indents of physical lines
616 rel_indent = [0] * nrows
617 # for each depth, collect a list of opening rows
618 open_rows = [[0]]
619 # for each depth, memorize the hanging indentation
620 hangs = [None]
621 # visual indents
622 indent_chances = {}
623 last_indent = tokens[0][2]
624 visual_indent = None
625 last_token_multiline = False
626 # for each depth, memorize the visual indent column
627 indent = [last_indent[1]]
628 if verbose >= 3:
629 print(">>> " + tokens[0][4].rstrip())
631 for token_type, text, start, end, line in tokens:
633 newline = row < start[0] - first_row
634 if newline:
635 row = start[0] - first_row
636 newline = not last_token_multiline and token_type not in NEWLINE
638 if newline:
639 # this is the beginning of a continuation line.
640 last_indent = start
641 if verbose >= 3:
642 print("... " + line.rstrip())
644 # record the initial indent.
645 rel_indent[row] = expand_indent(line) - indent_level
647 # identify closing bracket
648 close_bracket = (token_type == tokenize.OP and text in ']})')
650 # is the indent relative to an opening bracket line?
651 for open_row in reversed(open_rows[depth]):
652 hang = rel_indent[row] - rel_indent[open_row]
653 hanging_indent = hang in valid_hangs
654 if hanging_indent:
655 break
656 if hangs[depth]:
657 hanging_indent = (hang == hangs[depth])
658 # is there any chance of visual indent?
659 visual_indent = (not close_bracket and hang > 0 and
660 indent_chances.get(start[1]))
662 if close_bracket and indent[depth]:
663 # closing bracket for visual indent
664 if start[1] != indent[depth]:
665 yield (start, "E124 closing bracket does not match "
666 "visual indentation")
667 elif close_bracket and not hang:
668 # closing bracket matches indentation of opening
669 # bracket's line
670 if hang_closing:
671 yield start, "E133 closing bracket is missing indentation"
672 elif indent[depth] and start[1] < indent[depth]:
673 if visual_indent is not True:
674 # visual indent is broken
675 yield (start, "E128 continuation line "
676 "under-indented for visual indent")
677 elif hanging_indent or (indent_next and
678 rel_indent[row] == 2 * indent_size):
679 # hanging indent is verified
680 if close_bracket and not hang_closing:
681 yield (start, "E123 closing bracket does not match "
682 "indentation of opening bracket's line")
683 hangs[depth] = hang
684 elif visual_indent is True:
685 # visual indent is verified
686 indent[depth] = start[1]
687 elif visual_indent in (text, str):
688 # ignore token lined up with matching one from a
689 # previous line
690 pass
691 else:
692 # indent is broken
693 if hang <= 0:
694 error = "E122", "missing indentation or outdented"
695 elif indent[depth]:
696 error = "E127", "over-indented for visual indent"
697 elif not close_bracket and hangs[depth]:
698 error = "E131", "unaligned for hanging indent"
699 else:
700 hangs[depth] = hang
701 if hang > indent_size:
702 error = "E126", "over-indented for hanging indent"
703 else:
704 error = "E121", "under-indented for hanging indent"
705 yield start, "%s continuation line %s" % error
707 # look for visual indenting
708 if (parens[row] and
709 token_type not in (tokenize.NL, tokenize.COMMENT) and
710 not indent[depth]):
711 indent[depth] = start[1]
712 indent_chances[start[1]] = True
713 if verbose >= 4:
714 print(f"bracket depth {depth} indent to {start[1]}")
715 # deal with implicit string concatenation
716 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
717 text in ('u', 'ur', 'b', 'br')):
718 indent_chances[start[1]] = str
719 # visual indent after assert/raise/with
720 elif not row and not depth and text in ["assert", "raise", "with"]:
721 indent_chances[end[1] + 1] = True
722 # special case for the "if" statement because len("if (") == 4
723 elif not indent_chances and not row and not depth and text == 'if':
724 indent_chances[end[1] + 1] = True
725 elif text == ':' and line[end[1]:].isspace():
726 open_rows[depth].append(row)
728 # keep track of bracket depth
729 if token_type == tokenize.OP:
730 if text in '([{':
731 depth += 1
732 indent.append(0)
733 hangs.append(None)
734 if len(open_rows) == depth:
735 open_rows.append([])
736 open_rows[depth].append(row)
737 parens[row] += 1
738 if verbose >= 4:
739 print("bracket depth %s seen, col %s, visual min = %s" %
740 (depth, start[1], indent[depth]))
741 elif text in ')]}' and depth > 0:
742 # parent indents should not be more than this one
743 prev_indent = indent.pop() or last_indent[1]
744 hangs.pop()
745 for d in range(depth):
746 if indent[d] > prev_indent:
747 indent[d] = 0
748 for ind in list(indent_chances):
749 if ind >= prev_indent:
750 del indent_chances[ind]
751 del open_rows[depth + 1:]
752 depth -= 1
753 if depth:
754 indent_chances[indent[depth]] = True
755 for idx in range(row, -1, -1):
756 if parens[idx]:
757 parens[idx] -= 1
758 break
759 assert len(indent) == depth + 1
760 if start[1] not in indent_chances:
761 # allow lining up tokens
762 indent_chances[start[1]] = text
764 last_token_multiline = (start[0] != end[0])
765 if last_token_multiline:
766 rel_indent[end[0] - first_row] = rel_indent[row]
768 if indent_next and expand_indent(line) == indent_level + indent_size:
769 pos = (start[0], indent[0] + indent_size)
770 if visual_indent:
771 code = "E129 visually indented line"
772 else:
773 code = "E125 continuation line"
774 yield pos, "%s with same indent as next logical line" % code
777@register_check
778def whitespace_before_parameters(logical_line, tokens):
779 r"""Avoid extraneous whitespace.
781 Avoid extraneous whitespace in the following situations:
782 - before the open parenthesis that starts the argument list of a
783 function call.
784 - before the open parenthesis that starts an indexing or slicing.
786 Okay: spam(1)
787 E211: spam (1)
789 Okay: dict['key'] = list[index]
790 E211: dict ['key'] = list[index]
791 E211: dict['key'] = list [index]
792 """
793 prev_type, prev_text, __, prev_end, __ = tokens[0]
794 for index in range(1, len(tokens)):
795 token_type, text, start, end, __ = tokens[index]
796 if (
797 token_type == tokenize.OP and
798 text in '([' and
799 start != prev_end and
800 (prev_type == tokenize.NAME or prev_text in '}])') and
801 # Syntax "class A (B):" is allowed, but avoid it
802 (index < 2 or tokens[index - 2][1] != 'class') and
803 # Allow "return (a.foo for a in range(5))"
804 not keyword.iskeyword(prev_text) and
805 # 'match' and 'case' are only soft keywords
806 (
807 sys.version_info < (3, 9) or
808 not keyword.issoftkeyword(prev_text)
809 )
810 ):
811 yield prev_end, "E211 whitespace before '%s'" % text
812 prev_type = token_type
813 prev_text = text
814 prev_end = end
817@register_check
818def whitespace_around_operator(logical_line):
819 r"""Avoid extraneous whitespace around an operator.
821 Okay: a = 12 + 3
822 E221: a = 4 + 5
823 E222: a = 4 + 5
824 E223: a = 4\t+ 5
825 E224: a = 4 +\t5
826 """
827 for match in OPERATOR_REGEX.finditer(logical_line):
828 before, after = match.groups()
830 if '\t' in before:
831 yield match.start(1), "E223 tab before operator"
832 elif len(before) > 1:
833 yield match.start(1), "E221 multiple spaces before operator"
835 if '\t' in after:
836 yield match.start(2), "E224 tab after operator"
837 elif len(after) > 1:
838 yield match.start(2), "E222 multiple spaces after operator"
841@register_check
842def missing_whitespace_around_operator(logical_line, tokens):
843 r"""Surround operators with a single space on either side.
845 - Always surround these binary operators with a single space on
846 either side: assignment (=), augmented assignment (+=, -= etc.),
847 comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
848 Booleans (and, or, not).
850 - If operators with different priorities are used, consider adding
851 whitespace around the operators with the lowest priorities.
853 Okay: i = i + 1
854 Okay: submitted += 1
855 Okay: x = x * 2 - 1
856 Okay: hypot2 = x * x + y * y
857 Okay: c = (a + b) * (a - b)
858 Okay: foo(bar, key='word', *args, **kwargs)
859 Okay: alpha[:-i]
861 E225: i=i+1
862 E225: submitted +=1
863 E225: x = x /2 - 1
864 E225: z = x **y
865 E225: z = 1and 1
866 E226: c = (a+b) * (a-b)
867 E226: hypot2 = x*x + y*y
868 E227: c = a|b
869 E228: msg = fmt%(errno, errmsg)
870 """
871 parens = 0
872 need_space = False
873 prev_type = tokenize.OP
874 prev_text = prev_end = None
875 operator_types = (tokenize.OP, tokenize.NAME)
876 for token_type, text, start, end, line in tokens:
877 if token_type in SKIP_COMMENTS:
878 continue
879 if text in ('(', 'lambda'):
880 parens += 1
881 elif text == ')':
882 parens -= 1
883 if need_space:
884 if start != prev_end:
885 # Found a (probably) needed space
886 if need_space is not True and not need_space[1]:
887 yield (need_space[0],
888 "E225 missing whitespace around operator")
889 need_space = False
890 elif text == '>' and prev_text in ('<', '-'):
891 # Tolerate the "<>" operator, even if running Python 3
892 # Deal with Python 3's annotated return value "->"
893 pass
894 elif (
895 # def f(a, /, b):
896 # ^
897 # def f(a, b, /):
898 # ^
899 # f = lambda a, /:
900 # ^
901 prev_text == '/' and text in {',', ')', ':'} or
902 # def f(a, b, /):
903 # ^
904 prev_text == ')' and text == ':'
905 ):
906 # Tolerate the "/" operator in function definition
907 # For more info see PEP570
908 pass
909 else:
910 if need_space is True or need_space[1]:
911 # A needed trailing space was not found
912 yield prev_end, "E225 missing whitespace around operator"
913 elif prev_text != '**':
914 code, optype = 'E226', 'arithmetic'
915 if prev_text == '%':
916 code, optype = 'E228', 'modulo'
917 elif prev_text not in ARITHMETIC_OP:
918 code, optype = 'E227', 'bitwise or shift'
919 yield (need_space[0], "%s missing whitespace "
920 "around %s operator" % (code, optype))
921 need_space = False
922 elif token_type in operator_types and prev_end is not None:
923 if text == '=' and parens:
924 # Allow keyword args or defaults: foo(bar=None).
925 pass
926 elif text in WS_NEEDED_OPERATORS:
927 need_space = True
928 elif text in UNARY_OPERATORS:
929 # Check if the operator is used as a binary operator
930 # Allow unary operators: -123, -x, +1.
931 # Allow argument unpacking: foo(*args, **kwargs).
932 if prev_type == tokenize.OP and prev_text in '}])' or (
933 prev_type != tokenize.OP and
934 prev_text not in KEYWORDS and (
935 sys.version_info < (3, 9) or
936 not keyword.issoftkeyword(prev_text)
937 )
938 ):
939 need_space = None
940 elif text in WS_OPTIONAL_OPERATORS:
941 need_space = None
943 if need_space is None:
944 # Surrounding space is optional, but ensure that
945 # trailing space matches opening space
946 need_space = (prev_end, start != prev_end)
947 elif need_space and start == prev_end:
948 # A needed opening space was not found
949 yield prev_end, "E225 missing whitespace around operator"
950 need_space = False
951 prev_type = token_type
952 prev_text = text
953 prev_end = end
956@register_check
957def whitespace_around_comma(logical_line):
958 r"""Avoid extraneous whitespace after a comma or a colon.
960 Note: these checks are disabled by default
962 Okay: a = (1, 2)
963 E241: a = (1, 2)
964 E242: a = (1,\t2)
965 """
966 line = logical_line
967 for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
968 found = m.start() + 1
969 if '\t' in m.group():
970 yield found, "E242 tab after '%s'" % m.group()[0]
971 else:
972 yield found, "E241 multiple spaces after '%s'" % m.group()[0]
975@register_check
976def whitespace_around_named_parameter_equals(logical_line, tokens):
977 r"""Don't use spaces around the '=' sign in function arguments.
979 Don't use spaces around the '=' sign when used to indicate a
980 keyword argument or a default parameter value, except when
981 using a type annotation.
983 Okay: def complex(real, imag=0.0):
984 Okay: return magic(r=real, i=imag)
985 Okay: boolean(a == b)
986 Okay: boolean(a != b)
987 Okay: boolean(a <= b)
988 Okay: boolean(a >= b)
989 Okay: def foo(arg: int = 42):
990 Okay: async def foo(arg: int = 42):
992 E251: def complex(real, imag = 0.0):
993 E251: return magic(r = real, i = imag)
994 E252: def complex(real, image: float=0.0):
995 """
996 parens = 0
997 no_space = False
998 require_space = False
999 prev_end = None
1000 annotated_func_arg = False
1001 in_def = bool(STARTSWITH_DEF_REGEX.match(logical_line))
1003 message = "E251 unexpected spaces around keyword / parameter equals"
1004 missing_message = "E252 missing whitespace around parameter equals"
1006 for token_type, text, start, end, line in tokens:
1007 if token_type == tokenize.NL:
1008 continue
1009 if no_space:
1010 no_space = False
1011 if start != prev_end:
1012 yield (prev_end, message)
1013 if require_space:
1014 require_space = False
1015 if start == prev_end:
1016 yield (prev_end, missing_message)
1017 if token_type == tokenize.OP:
1018 if text in '([':
1019 parens += 1
1020 elif text in ')]':
1021 parens -= 1
1022 elif in_def and text == ':' and parens == 1:
1023 annotated_func_arg = True
1024 elif parens == 1 and text == ',':
1025 annotated_func_arg = False
1026 elif parens and text == '=':
1027 if annotated_func_arg and parens == 1:
1028 require_space = True
1029 if start == prev_end:
1030 yield (prev_end, missing_message)
1031 else:
1032 no_space = True
1033 if start != prev_end:
1034 yield (prev_end, message)
1035 if not parens:
1036 annotated_func_arg = False
1038 prev_end = end
1041@register_check
1042def whitespace_before_comment(logical_line, tokens):
1043 """Separate inline comments by at least two spaces.
1045 An inline comment is a comment on the same line as a statement.
1046 Inline comments should be separated by at least two spaces from the
1047 statement. They should start with a # and a single space.
1049 Each line of a block comment starts with a # and one or multiple
1050 spaces as there can be indented text inside the comment.
1052 Okay: x = x + 1 # Increment x
1053 Okay: x = x + 1 # Increment x
1054 Okay: # Block comments:
1055 Okay: # - Block comment list
1056 Okay: # \xa0- Block comment list
1057 E261: x = x + 1 # Increment x
1058 E262: x = x + 1 #Increment x
1059 E262: x = x + 1 # Increment x
1060 E262: x = x + 1 # \xa0Increment x
1061 E265: #Block comment
1062 E266: ### Block comment
1063 """
1064 prev_end = (0, 0)
1065 for token_type, text, start, end, line in tokens:
1066 if token_type == tokenize.COMMENT:
1067 inline_comment = line[:start[1]].strip()
1068 if inline_comment:
1069 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
1070 yield (prev_end,
1071 "E261 at least two spaces before inline comment")
1072 symbol, sp, comment = text.partition(' ')
1073 bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
1074 if inline_comment:
1075 if bad_prefix or comment[:1] in WHITESPACE:
1076 yield start, "E262 inline comment should start with '# '"
1077 elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
1078 if bad_prefix != '#':
1079 yield start, "E265 block comment should start with '# '"
1080 elif comment:
1081 yield start, "E266 too many leading '#' for block comment"
1082 elif token_type != tokenize.NL:
1083 prev_end = end
1086@register_check
1087def imports_on_separate_lines(logical_line):
1088 r"""Place imports on separate lines.
1090 Okay: import os\nimport sys
1091 E401: import sys, os
1093 Okay: from subprocess import Popen, PIPE
1094 Okay: from myclas import MyClass
1095 Okay: from foo.bar.yourclass import YourClass
1096 Okay: import myclass
1097 Okay: import foo.bar.yourclass
1098 """
1099 line = logical_line
1100 if line.startswith('import '):
1101 found = line.find(',')
1102 if -1 < found and ';' not in line[:found]:
1103 yield found, "E401 multiple imports on one line"
1106@register_check
1107def module_imports_on_top_of_file(
1108 logical_line, indent_level, checker_state, noqa):
1109 r"""Place imports at the top of the file.
1111 Always put imports at the top of the file, just after any module
1112 comments and docstrings, and before module globals and constants.
1114 Okay: import os
1115 Okay: # this is a comment\nimport os
1116 Okay: '''this is a module docstring'''\nimport os
1117 Okay: r'''this is a module docstring'''\nimport os
1118 Okay:
1119 try:\n\timport x\nexcept ImportError:\n\tpass\nelse:\n\tpass\nimport y
1120 Okay:
1121 try:\n\timport x\nexcept ImportError:\n\tpass\nfinally:\n\tpass\nimport y
1122 E402: a=1\nimport os
1123 E402: 'One string'\n"Two string"\nimport os
1124 E402: a=1\nfrom sys import x
1126 Okay: if x:\n import os
1127 """ # noqa
1128 def is_string_literal(line):
1129 if line[0] in 'uUbB':
1130 line = line[1:]
1131 if line and line[0] in 'rR':
1132 line = line[1:]
1133 return line and (line[0] == '"' or line[0] == "'")
1135 allowed_keywords = (
1136 'try', 'except', 'else', 'finally', 'with', 'if', 'elif')
1138 if indent_level: # Allow imports in conditional statement/function
1139 return
1140 if not logical_line: # Allow empty lines or comments
1141 return
1142 if noqa:
1143 return
1144 line = logical_line
1145 if line.startswith('import ') or line.startswith('from '):
1146 if checker_state.get('seen_non_imports', False):
1147 yield 0, "E402 module level import not at top of file"
1148 elif re.match(DUNDER_REGEX, line):
1149 return
1150 elif any(line.startswith(kw) for kw in allowed_keywords):
1151 # Allow certain keywords intermixed with imports in order to
1152 # support conditional or filtered importing
1153 return
1154 elif is_string_literal(line):
1155 # The first literal is a docstring, allow it. Otherwise, report
1156 # error.
1157 if checker_state.get('seen_docstring', False):
1158 checker_state['seen_non_imports'] = True
1159 else:
1160 checker_state['seen_docstring'] = True
1161 else:
1162 checker_state['seen_non_imports'] = True
1165@register_check
1166def compound_statements(logical_line):
1167 r"""Compound statements (on the same line) are generally
1168 discouraged.
1170 While sometimes it's okay to put an if/for/while with a small body
1171 on the same line, never do this for multi-clause statements.
1172 Also avoid folding such long lines!
1174 Always use a def statement instead of an assignment statement that
1175 binds a lambda expression directly to a name.
1177 Okay: if foo == 'blah':\n do_blah_thing()
1178 Okay: do_one()
1179 Okay: do_two()
1180 Okay: do_three()
1182 E701: if foo == 'blah': do_blah_thing()
1183 E701: for x in lst: total += x
1184 E701: while t < 10: t = delay()
1185 E701: if foo == 'blah': do_blah_thing()
1186 E701: else: do_non_blah_thing()
1187 E701: try: something()
1188 E701: finally: cleanup()
1189 E701: if foo == 'blah': one(); two(); three()
1190 E702: do_one(); do_two(); do_three()
1191 E703: do_four(); # useless semicolon
1192 E704: def f(x): return 2*x
1193 E731: f = lambda x: 2*x
1194 """
1195 line = logical_line
1196 last_char = len(line) - 1
1197 found = line.find(':')
1198 prev_found = 0
1199 counts = {char: 0 for char in '{}[]()'}
1200 while -1 < found < last_char:
1201 update_counts(line[prev_found:found], counts)
1202 if ((counts['{'] <= counts['}'] and # {'a': 1} (dict)
1203 counts['['] <= counts[']'] and # [1:2] (slice)
1204 counts['('] <= counts[')']) and # (annotation)
1205 not (sys.version_info >= (3, 8) and
1206 line[found + 1] == '=')): # assignment expression
1207 lambda_kw = LAMBDA_REGEX.search(line, 0, found)
1208 if lambda_kw:
1209 before = line[:lambda_kw.start()].rstrip()
1210 if before[-1:] == '=' and before[:-1].strip().isidentifier():
1211 yield 0, ("E731 do not assign a lambda expression, use a "
1212 "def")
1213 break
1214 if STARTSWITH_DEF_REGEX.match(line):
1215 yield 0, "E704 multiple statements on one line (def)"
1216 elif STARTSWITH_INDENT_STATEMENT_REGEX.match(line):
1217 yield found, "E701 multiple statements on one line (colon)"
1218 prev_found = found
1219 found = line.find(':', found + 1)
1220 found = line.find(';')
1221 while -1 < found:
1222 if found < last_char:
1223 yield found, "E702 multiple statements on one line (semicolon)"
1224 else:
1225 yield found, "E703 statement ends with a semicolon"
1226 found = line.find(';', found + 1)
1229@register_check
1230def explicit_line_join(logical_line, tokens):
1231 r"""Avoid explicit line join between brackets.
1233 The preferred way of wrapping long lines is by using Python's
1234 implied line continuation inside parentheses, brackets and braces.
1235 Long lines can be broken over multiple lines by wrapping expressions
1236 in parentheses. These should be used in preference to using a
1237 backslash for line continuation.
1239 E502: aaa = [123, \\n 123]
1240 E502: aaa = ("bbb " \\n "ccc")
1242 Okay: aaa = [123,\n 123]
1243 Okay: aaa = ("bbb "\n "ccc")
1244 Okay: aaa = "bbb " \\n "ccc"
1245 Okay: aaa = 123 # \\
1246 """
1247 prev_start = prev_end = parens = 0
1248 comment = False
1249 backslash = None
1250 for token_type, text, start, end, line in tokens:
1251 if token_type == tokenize.COMMENT:
1252 comment = True
1253 if start[0] != prev_start and parens and backslash and not comment:
1254 yield backslash, "E502 the backslash is redundant between brackets"
1255 if end[0] != prev_end:
1256 if line.rstrip('\r\n').endswith('\\'):
1257 backslash = (end[0], len(line.splitlines()[-1]) - 1)
1258 else:
1259 backslash = None
1260 prev_start = prev_end = end[0]
1261 else:
1262 prev_start = start[0]
1263 if token_type == tokenize.OP:
1264 if text in '([{':
1265 parens += 1
1266 elif text in ')]}':
1267 parens -= 1
1270# The % character is strictly speaking a binary operator, but the
1271# common usage seems to be to put it next to the format parameters,
1272# after a line break.
1273_SYMBOLIC_OPS = frozenset("()[]{},:.;@=%~") | frozenset(("...",))
1276def _is_binary_operator(token_type, text):
1277 return (
1278 token_type == tokenize.OP or
1279 text in {'and', 'or'}
1280 ) and (
1281 text not in _SYMBOLIC_OPS
1282 )
1285def _break_around_binary_operators(tokens):
1286 """Private function to reduce duplication.
1288 This factors out the shared details between
1289 :func:`break_before_binary_operator` and
1290 :func:`break_after_binary_operator`.
1291 """
1292 line_break = False
1293 unary_context = True
1294 # Previous non-newline token types and text
1295 previous_token_type = None
1296 previous_text = None
1297 for token_type, text, start, end, line in tokens:
1298 if token_type == tokenize.COMMENT:
1299 continue
1300 if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
1301 line_break = True
1302 else:
1303 yield (token_type, text, previous_token_type, previous_text,
1304 line_break, unary_context, start)
1305 unary_context = text in '([{,;'
1306 line_break = False
1307 previous_token_type = token_type
1308 previous_text = text
1311@register_check
1312def break_before_binary_operator(logical_line, tokens):
1313 r"""
1314 Avoid breaks before binary operators.
1316 The preferred place to break around a binary operator is after the
1317 operator, not before it.
1319 W503: (width == 0\n + height == 0)
1320 W503: (width == 0\n and height == 0)
1321 W503: var = (1\n & ~2)
1322 W503: var = (1\n / -2)
1323 W503: var = (1\n + -1\n + -2)
1325 Okay: foo(\n -x)
1326 Okay: foo(x\n [])
1327 Okay: x = '''\n''' + ''
1328 Okay: foo(x,\n -y)
1329 Okay: foo(x, # comment\n -y)
1330 """
1331 for context in _break_around_binary_operators(tokens):
1332 (token_type, text, previous_token_type, previous_text,
1333 line_break, unary_context, start) = context
1334 if (_is_binary_operator(token_type, text) and line_break and
1335 not unary_context and
1336 not _is_binary_operator(previous_token_type,
1337 previous_text)):
1338 yield start, "W503 line break before binary operator"
1341@register_check
1342def break_after_binary_operator(logical_line, tokens):
1343 r"""
1344 Avoid breaks after binary operators.
1346 The preferred place to break around a binary operator is before the
1347 operator, not after it.
1349 W504: (width == 0 +\n height == 0)
1350 W504: (width == 0 and\n height == 0)
1351 W504: var = (1 &\n ~2)
1353 Okay: foo(\n -x)
1354 Okay: foo(x\n [])
1355 Okay: x = '''\n''' + ''
1356 Okay: x = '' + '''\n'''
1357 Okay: foo(x,\n -y)
1358 Okay: foo(x, # comment\n -y)
1360 The following should be W504 but unary_context is tricky with these
1361 Okay: var = (1 /\n -2)
1362 Okay: var = (1 +\n -1 +\n -2)
1363 """
1364 prev_start = None
1365 for context in _break_around_binary_operators(tokens):
1366 (token_type, text, previous_token_type, previous_text,
1367 line_break, unary_context, start) = context
1368 if (_is_binary_operator(previous_token_type, previous_text) and
1369 line_break and
1370 not unary_context and
1371 not _is_binary_operator(token_type, text)):
1372 yield prev_start, "W504 line break after binary operator"
1373 prev_start = start
1376@register_check
1377def comparison_to_singleton(logical_line, noqa):
1378 r"""Comparison to singletons should use "is" or "is not".
1380 Comparisons to singletons like None should always be done
1381 with "is" or "is not", never the equality operators.
1383 Okay: if arg is not None:
1384 E711: if arg != None:
1385 E711: if None == arg:
1386 E712: if arg == True:
1387 E712: if False == arg:
1389 Also, beware of writing if x when you really mean if x is not None
1390 -- e.g. when testing whether a variable or argument that defaults to
1391 None was set to some other value. The other value might have a type
1392 (such as a container) that could be false in a boolean context!
1393 """
1394 if noqa:
1395 return
1397 for match in COMPARE_SINGLETON_REGEX.finditer(logical_line):
1398 singleton = match.group(1) or match.group(3)
1399 same = (match.group(2) == '==')
1401 msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
1402 if singleton in ('None',):
1403 code = 'E711'
1404 else:
1405 code = 'E712'
1406 nonzero = ((singleton == 'True' and same) or
1407 (singleton == 'False' and not same))
1408 msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
1409 yield match.start(2), ("%s comparison to %s should be %s" %
1410 (code, singleton, msg))
1413@register_check
1414def comparison_negative(logical_line):
1415 r"""Negative comparison should be done using "not in" and "is not".
1417 Okay: if x not in y:\n pass
1418 Okay: assert (X in Y or X is Z)
1419 Okay: if not (X in Y):\n pass
1420 Okay: zz = x is not y
1421 E713: Z = not X in Y
1422 E713: if not X.B in Y:\n pass
1423 E714: if not X is Y:\n pass
1424 E714: Z = not X.B is Y
1425 """
1426 match = COMPARE_NEGATIVE_REGEX.search(logical_line)
1427 if match:
1428 pos = match.start(1)
1429 if match.group(2) == 'in':
1430 yield pos, "E713 test for membership should be 'not in'"
1431 else:
1432 yield pos, "E714 test for object identity should be 'is not'"
1435@register_check
1436def comparison_type(logical_line, noqa):
1437 r"""Object type comparisons should always use isinstance().
1439 Do not compare types directly.
1441 Okay: if isinstance(obj, int):
1442 E721: if type(obj) is type(1):
1444 When checking if an object is a string, keep in mind that it might
1445 be a unicode string too! In Python 2.3, str and unicode have a
1446 common base class, basestring, so you can do:
1448 Okay: if isinstance(obj, basestring):
1449 Okay: if type(a1) is type(b1):
1450 """
1451 match = COMPARE_TYPE_REGEX.search(logical_line)
1452 if match and not noqa:
1453 inst = match.group(1)
1454 if inst and inst.isidentifier() and inst not in SINGLETONS:
1455 return # Allow comparison for types which are not obvious
1456 yield match.start(), "E721 do not compare types, use 'isinstance()'"
1459@register_check
1460def bare_except(logical_line, noqa):
1461 r"""When catching exceptions, mention specific exceptions when
1462 possible.
1464 Okay: except Exception:
1465 Okay: except BaseException:
1466 E722: except:
1467 """
1468 if noqa:
1469 return
1471 match = BLANK_EXCEPT_REGEX.match(logical_line)
1472 if match:
1473 yield match.start(), "E722 do not use bare 'except'"
1476@register_check
1477def ambiguous_identifier(logical_line, tokens):
1478 r"""Never use the characters 'l', 'O', or 'I' as variable names.
1480 In some fonts, these characters are indistinguishable from the
1481 numerals one and zero. When tempted to use 'l', use 'L' instead.
1483 Okay: L = 0
1484 Okay: o = 123
1485 Okay: i = 42
1486 E741: l = 0
1487 E741: O = 123
1488 E741: I = 42
1490 Variables can be bound in several other contexts, including class
1491 and function definitions, lambda functions, 'global' and 'nonlocal'
1492 statements, exception handlers, and 'with' and 'for' statements.
1493 In addition, we have a special handling for function parameters.
1495 Okay: except AttributeError as o:
1496 Okay: with lock as L:
1497 Okay: foo(l=12)
1498 Okay: foo(l=I)
1499 Okay: for a in foo(l=12):
1500 Okay: lambda arg: arg * l
1501 Okay: lambda a=l[I:5]: None
1502 Okay: lambda x=a.I: None
1503 Okay: if l >= 12:
1504 E741: except AttributeError as O:
1505 E741: with lock as l:
1506 E741: global I
1507 E741: nonlocal l
1508 E741: def foo(l):
1509 E741: def foo(l=12):
1510 E741: l = foo(l=12)
1511 E741: for l in range(10):
1512 E741: [l for l in lines if l]
1513 E741: lambda l: None
1514 E741: lambda a=x[1:5], l: None
1515 E741: lambda **l:
1516 E741: def f(**l):
1517 E742: class I(object):
1518 E743: def l(x):
1519 """
1520 func_depth = None # set to brace depth if 'def' or 'lambda' is found
1521 seen_colon = False # set to true if we're done with function parameters
1522 brace_depth = 0
1523 idents_to_avoid = ('l', 'O', 'I')
1524 prev_type, prev_text, prev_start, prev_end, __ = tokens[0]
1525 for index in range(1, len(tokens)):
1526 token_type, text, start, end, line = tokens[index]
1527 ident = pos = None
1528 # find function definitions
1529 if prev_text in {'def', 'lambda'}:
1530 func_depth = brace_depth
1531 seen_colon = False
1532 elif (
1533 func_depth is not None and
1534 text == ':' and
1535 brace_depth == func_depth
1536 ):
1537 seen_colon = True
1538 # update parameter parentheses level
1539 if text in '([{':
1540 brace_depth += 1
1541 elif text in ')]}':
1542 brace_depth -= 1
1543 # identifiers on the lhs of an assignment operator
1544 if text == ':=' or (text == '=' and brace_depth == 0):
1545 if prev_text in idents_to_avoid:
1546 ident = prev_text
1547 pos = prev_start
1548 # identifiers bound to values with 'as', 'for',
1549 # 'global', or 'nonlocal'
1550 if prev_text in ('as', 'for', 'global', 'nonlocal'):
1551 if text in idents_to_avoid:
1552 ident = text
1553 pos = start
1554 # function / lambda parameter definitions
1555 if (
1556 func_depth is not None and
1557 not seen_colon and
1558 index < len(tokens) - 1 and tokens[index + 1][1] in ':,=)' and
1559 prev_text in {'lambda', ',', '*', '**', '('} and
1560 text in idents_to_avoid
1561 ):
1562 ident = text
1563 pos = start
1564 if prev_text == 'class':
1565 if text in idents_to_avoid:
1566 yield start, "E742 ambiguous class definition '%s'" % text
1567 if prev_text == 'def':
1568 if text in idents_to_avoid:
1569 yield start, "E743 ambiguous function definition '%s'" % text
1570 if ident:
1571 yield pos, "E741 ambiguous variable name '%s'" % ident
1572 prev_text = text
1573 prev_start = start
1576@register_check
1577def python_3000_invalid_escape_sequence(logical_line, tokens, noqa):
1578 r"""Invalid escape sequences are deprecated in Python 3.6.
1580 Okay: regex = r'\.png$'
1581 W605: regex = '\.png$'
1582 """
1583 if noqa:
1584 return
1586 # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
1587 valid = [
1588 '\n',
1589 '\\',
1590 '\'',
1591 '"',
1592 'a',
1593 'b',
1594 'f',
1595 'n',
1596 'r',
1597 't',
1598 'v',
1599 '0', '1', '2', '3', '4', '5', '6', '7',
1600 'x',
1602 # Escape sequences only recognized in string literals
1603 'N',
1604 'u',
1605 'U',
1606 ]
1608 for token_type, text, start, end, line in tokens:
1609 if token_type == tokenize.STRING:
1610 start_line, start_col = start
1611 quote = text[-3:] if text[-3:] in ('"""', "'''") else text[-1]
1612 # Extract string modifiers (e.g. u or r)
1613 quote_pos = text.index(quote)
1614 prefix = text[:quote_pos].lower()
1615 start = quote_pos + len(quote)
1616 string = text[start:-len(quote)]
1618 if 'r' not in prefix:
1619 pos = string.find('\\')
1620 while pos >= 0:
1621 pos += 1
1622 if string[pos] not in valid:
1623 line = start_line + string.count('\n', 0, pos)
1624 if line == start_line:
1625 col = start_col + len(prefix) + len(quote) + pos
1626 else:
1627 col = pos - string.rfind('\n', 0, pos) - 1
1628 yield (
1629 (line, col - 1),
1630 "W605 invalid escape sequence '\\%s'" %
1631 string[pos],
1632 )
1633 pos = string.find('\\', pos + 1)
1636@register_check
1637def python_3000_async_await_keywords(logical_line, tokens):
1638 """'async' and 'await' are reserved keywords starting at Python 3.7.
1640 W606: async = 42
1641 W606: await = 42
1642 Okay: async def read(db):\n data = await db.fetch('SELECT ...')
1643 """
1644 # The Python tokenize library before Python 3.5 recognizes
1645 # async/await as a NAME token. Therefore, use a state machine to
1646 # look for the possible async/await constructs as defined by the
1647 # Python grammar:
1648 # https://docs.python.org/3/reference/grammar.html
1650 state = None
1651 for token_type, text, start, end, line in tokens:
1652 error = False
1654 if token_type == tokenize.NL:
1655 continue
1657 if state is None:
1658 if token_type == tokenize.NAME:
1659 if text == 'async':
1660 state = ('async_stmt', start)
1661 elif text == 'await':
1662 state = ('await', start)
1663 elif (token_type == tokenize.NAME and
1664 text in ('def', 'for')):
1665 state = ('define', start)
1667 elif state[0] == 'async_stmt':
1668 if token_type == tokenize.NAME and text in ('def', 'with', 'for'):
1669 # One of funcdef, with_stmt, or for_stmt. Return to
1670 # looking for async/await names.
1671 state = None
1672 else:
1673 error = True
1674 elif state[0] == 'await':
1675 if token_type == tokenize.NAME:
1676 # An await expression. Return to looking for async/await
1677 # names.
1678 state = None
1679 elif token_type == tokenize.OP and text == '(':
1680 state = None
1681 else:
1682 error = True
1683 elif state[0] == 'define':
1684 if token_type == tokenize.NAME and text in ('async', 'await'):
1685 error = True
1686 else:
1687 state = None
1689 if error:
1690 yield (
1691 state[1],
1692 "W606 'async' and 'await' are reserved keywords starting with "
1693 "Python 3.7",
1694 )
1695 state = None
1697 # Last token
1698 if state is not None:
1699 yield (
1700 state[1],
1701 "W606 'async' and 'await' are reserved keywords starting with "
1702 "Python 3.7",
1703 )
1706########################################################################
1707@register_check
1708def maximum_doc_length(logical_line, max_doc_length, noqa, tokens):
1709 r"""Limit all doc lines to a maximum of 72 characters.
1711 For flowing long blocks of text (docstrings or comments), limiting
1712 the length to 72 characters is recommended.
1714 Reports warning W505
1715 """
1716 if max_doc_length is None or noqa:
1717 return
1719 prev_token = None
1720 skip_lines = set()
1721 # Skip lines that
1722 for token_type, text, start, end, line in tokens:
1723 if token_type not in SKIP_COMMENTS.union([tokenize.STRING]):
1724 skip_lines.add(line)
1726 for token_type, text, start, end, line in tokens:
1727 # Skip lines that aren't pure strings
1728 if token_type == tokenize.STRING and skip_lines:
1729 continue
1730 if token_type in (tokenize.STRING, tokenize.COMMENT):
1731 # Only check comment-only lines
1732 if prev_token is None or prev_token in SKIP_TOKENS:
1733 lines = line.splitlines()
1734 for line_num, physical_line in enumerate(lines):
1735 if start[0] + line_num == 1 and line.startswith('#!'):
1736 return
1737 length = len(physical_line)
1738 chunks = physical_line.split()
1739 if token_type == tokenize.COMMENT:
1740 if (len(chunks) == 2 and
1741 length - len(chunks[-1]) < MAX_DOC_LENGTH):
1742 continue
1743 if len(chunks) == 1 and line_num + 1 < len(lines):
1744 if (len(chunks) == 1 and
1745 length - len(chunks[-1]) < MAX_DOC_LENGTH):
1746 continue
1747 if length > max_doc_length:
1748 doc_error = (start[0] + line_num, max_doc_length)
1749 yield (doc_error, "W505 doc line too long "
1750 "(%d > %d characters)"
1751 % (length, max_doc_length))
1752 prev_token = token_type
1755########################################################################
1756# Helper functions
1757########################################################################
1760def readlines(filename):
1761 """Read the source code."""
1762 try:
1763 with tokenize.open(filename) as f:
1764 return f.readlines()
1765 except (LookupError, SyntaxError, UnicodeError):
1766 # Fall back if file encoding is improperly declared
1767 with open(filename, encoding='latin-1') as f:
1768 return f.readlines()
1771def stdin_get_value():
1772 """Read the value from stdin."""
1773 return io.TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1776noqa = lru_cache(512)(re.compile(r'# no(?:qa|pep8)\b', re.I).search)
1779def expand_indent(line):
1780 r"""Return the amount of indentation.
1782 Tabs are expanded to the next multiple of 8.
1784 >>> expand_indent(' ')
1785 4
1786 >>> expand_indent('\t')
1787 8
1788 >>> expand_indent(' \t')
1789 8
1790 >>> expand_indent(' \t')
1791 16
1792 """
1793 line = line.rstrip('\n\r')
1794 if '\t' not in line:
1795 return len(line) - len(line.lstrip())
1796 result = 0
1797 for char in line:
1798 if char == '\t':
1799 result = result // 8 * 8 + 8
1800 elif char == ' ':
1801 result += 1
1802 else:
1803 break
1804 return result
1807def mute_string(text):
1808 """Replace contents with 'xxx' to prevent syntax matching.
1810 >>> mute_string('"abc"')
1811 '"xxx"'
1812 >>> mute_string("'''abc'''")
1813 "'''xxx'''"
1814 >>> mute_string("r'abc'")
1815 "r'xxx'"
1816 """
1817 # String modifiers (e.g. u or r)
1818 start = text.index(text[-1]) + 1
1819 end = len(text) - 1
1820 # Triple quotes
1821 if text[-3:] in ('"""', "'''"):
1822 start += 2
1823 end -= 2
1824 return text[:start] + 'x' * (end - start) + text[end:]
1827def parse_udiff(diff, patterns=None, parent='.'):
1828 """Return a dictionary of matching lines."""
1829 # For each file of the diff, the entry key is the filename,
1830 # and the value is a set of row numbers to consider.
1831 rv = {}
1832 path = nrows = None
1833 for line in diff.splitlines():
1834 if nrows:
1835 if line[:1] != '-':
1836 nrows -= 1
1837 continue
1838 if line[:3] == '@@ ':
1839 hunk_match = HUNK_REGEX.match(line)
1840 (row, nrows) = (int(g or '1') for g in hunk_match.groups())
1841 rv[path].update(range(row, row + nrows))
1842 elif line[:3] == '+++':
1843 path = line[4:].split('\t', 1)[0]
1844 # Git diff will use (i)ndex, (w)ork tree, (c)ommit and
1845 # (o)bject instead of a/b/c/d as prefixes for patches
1846 if path[:2] in ('b/', 'w/', 'i/'):
1847 path = path[2:]
1848 rv[path] = set()
1849 return {
1850 os.path.join(parent, filepath): rows
1851 for (filepath, rows) in rv.items()
1852 if rows and filename_match(filepath, patterns)
1853 }
1856def normalize_paths(value, parent=os.curdir):
1857 """Parse a comma-separated list of paths.
1859 Return a list of absolute paths.
1860 """
1861 if not value:
1862 return []
1863 if isinstance(value, list):
1864 return value
1865 paths = []
1866 for path in value.split(','):
1867 path = path.strip()
1868 if '/' in path:
1869 path = os.path.abspath(os.path.join(parent, path))
1870 paths.append(path.rstrip('/'))
1871 return paths
1874def filename_match(filename, patterns, default=True):
1875 """Check if patterns contains a pattern that matches filename.
1877 If patterns is unspecified, this always returns True.
1878 """
1879 if not patterns:
1880 return default
1881 return any(fnmatch(filename, pattern) for pattern in patterns)
1884def update_counts(s, counts):
1885 r"""Adds one to the counts of each appearance of characters in s,
1886 for characters in counts"""
1887 for char in s:
1888 if char in counts:
1889 counts[char] += 1
1892def _is_eol_token(token):
1893 return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
1896########################################################################
1897# Framework to run all checks
1898########################################################################
1901class Checker:
1902 """Load a Python source file, tokenize it, check coding style."""
1904 def __init__(self, filename=None, lines=None,
1905 options=None, report=None, **kwargs):
1906 if options is None:
1907 options = StyleGuide(kwargs).options
1908 else:
1909 assert not kwargs
1910 self._io_error = None
1911 self._physical_checks = options.physical_checks
1912 self._logical_checks = options.logical_checks
1913 self._ast_checks = options.ast_checks
1914 self.max_line_length = options.max_line_length
1915 self.max_doc_length = options.max_doc_length
1916 self.indent_size = options.indent_size
1917 self.multiline = False # in a multiline string?
1918 self.hang_closing = options.hang_closing
1919 self.indent_size = options.indent_size
1920 self.verbose = options.verbose
1921 self.filename = filename
1922 # Dictionary where a checker can store its custom state.
1923 self._checker_states = {}
1924 if filename is None:
1925 self.filename = 'stdin'
1926 self.lines = lines or []
1927 elif filename == '-':
1928 self.filename = 'stdin'
1929 self.lines = stdin_get_value().splitlines(True)
1930 elif lines is None:
1931 try:
1932 self.lines = readlines(filename)
1933 except OSError:
1934 (exc_type, exc) = sys.exc_info()[:2]
1935 self._io_error = f'{exc_type.__name__}: {exc}'
1936 self.lines = []
1937 else:
1938 self.lines = lines
1939 if self.lines:
1940 ord0 = ord(self.lines[0][0])
1941 if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM
1942 if ord0 == 0xfeff:
1943 self.lines[0] = self.lines[0][1:]
1944 elif self.lines[0][:3] == '\xef\xbb\xbf':
1945 self.lines[0] = self.lines[0][3:]
1946 self.report = report or options.report
1947 self.report_error = self.report.error
1948 self.noqa = False
1950 def report_invalid_syntax(self):
1951 """Check if the syntax is valid."""
1952 (exc_type, exc) = sys.exc_info()[:2]
1953 if len(exc.args) > 1:
1954 offset = exc.args[1]
1955 if len(offset) > 2:
1956 offset = offset[1:3]
1957 else:
1958 offset = (1, 0)
1959 self.report_error(offset[0], offset[1] or 0,
1960 f'E901 {exc_type.__name__}: {exc.args[0]}',
1961 self.report_invalid_syntax)
1963 def readline(self):
1964 """Get the next line from the input buffer."""
1965 if self.line_number >= self.total_lines:
1966 return ''
1967 line = self.lines[self.line_number]
1968 self.line_number += 1
1969 if self.indent_char is None and line[:1] in WHITESPACE:
1970 self.indent_char = line[0]
1971 return line
1973 def run_check(self, check, argument_names):
1974 """Run a check plugin."""
1975 arguments = []
1976 for name in argument_names:
1977 arguments.append(getattr(self, name))
1978 return check(*arguments)
1980 def init_checker_state(self, name, argument_names):
1981 """Prepare custom state for the specific checker plugin."""
1982 if 'checker_state' in argument_names:
1983 self.checker_state = self._checker_states.setdefault(name, {})
1985 def check_physical(self, line):
1986 """Run all physical checks on a raw input line."""
1987 self.physical_line = line
1988 for name, check, argument_names in self._physical_checks:
1989 self.init_checker_state(name, argument_names)
1990 result = self.run_check(check, argument_names)
1991 if result is not None:
1992 (offset, text) = result
1993 self.report_error(self.line_number, offset, text, check)
1994 if text[:4] == 'E101':
1995 self.indent_char = line[0]
1997 def build_tokens_line(self):
1998 """Build a logical line from tokens."""
1999 logical = []
2000 comments = []
2001 length = 0
2002 prev_row = prev_col = mapping = None
2003 for token_type, text, start, end, line in self.tokens:
2004 if token_type in SKIP_TOKENS:
2005 continue
2006 if not mapping:
2007 mapping = [(0, start)]
2008 if token_type == tokenize.COMMENT:
2009 comments.append(text)
2010 continue
2011 if token_type == tokenize.STRING:
2012 text = mute_string(text)
2013 if prev_row:
2014 (start_row, start_col) = start
2015 if prev_row != start_row: # different row
2016 prev_text = self.lines[prev_row - 1][prev_col - 1]
2017 if prev_text == ',' or (prev_text not in '{[(' and
2018 text not in '}])'):
2019 text = ' ' + text
2020 elif prev_col != start_col: # different column
2021 text = line[prev_col:start_col] + text
2022 logical.append(text)
2023 length += len(text)
2024 mapping.append((length, end))
2025 (prev_row, prev_col) = end
2026 self.logical_line = ''.join(logical)
2027 self.noqa = comments and noqa(''.join(comments))
2028 return mapping
2030 def check_logical(self):
2031 """Build a line from tokens and run all logical checks on it."""
2032 self.report.increment_logical_line()
2033 mapping = self.build_tokens_line()
2034 if not mapping:
2035 return
2037 mapping_offsets = [offset for offset, _ in mapping]
2038 (start_row, start_col) = mapping[0][1]
2039 start_line = self.lines[start_row - 1]
2040 self.indent_level = expand_indent(start_line[:start_col])
2041 if self.blank_before < self.blank_lines:
2042 self.blank_before = self.blank_lines
2043 if self.verbose >= 2:
2044 print(self.logical_line[:80].rstrip())
2045 for name, check, argument_names in self._logical_checks:
2046 if self.verbose >= 4:
2047 print(' ' + name)
2048 self.init_checker_state(name, argument_names)
2049 for offset, text in self.run_check(check, argument_names) or ():
2050 if not isinstance(offset, tuple):
2051 # As mappings are ordered, bisecting is a fast way
2052 # to find a given offset in them.
2053 token_offset, pos = mapping[bisect.bisect_left(
2054 mapping_offsets, offset)]
2055 offset = (pos[0], pos[1] + offset - token_offset)
2056 self.report_error(offset[0], offset[1], text, check)
2057 if self.logical_line:
2058 self.previous_indent_level = self.indent_level
2059 self.previous_logical = self.logical_line
2060 if not self.indent_level:
2061 self.previous_unindented_logical_line = self.logical_line
2062 self.blank_lines = 0
2063 self.tokens = []
2065 def check_ast(self):
2066 """Build the file's AST and run all AST checks."""
2067 try:
2068 tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
2069 except (ValueError, SyntaxError, TypeError):
2070 return self.report_invalid_syntax()
2071 for name, cls, __ in self._ast_checks:
2072 checker = cls(tree, self.filename)
2073 for lineno, offset, text, check in checker.run():
2074 if not self.lines or not noqa(self.lines[lineno - 1]):
2075 self.report_error(lineno, offset, text, check)
2077 def generate_tokens(self):
2078 """Tokenize file, run physical line checks and yield tokens."""
2079 if self._io_error:
2080 self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
2081 tokengen = tokenize.generate_tokens(self.readline)
2082 try:
2083 prev_physical = ''
2084 for token in tokengen:
2085 if token[2][0] > self.total_lines:
2086 return
2087 self.noqa = token[4] and noqa(token[4])
2088 self.maybe_check_physical(token, prev_physical)
2089 yield token
2090 prev_physical = token[4]
2091 except (SyntaxError, tokenize.TokenError):
2092 self.report_invalid_syntax()
2094 def maybe_check_physical(self, token, prev_physical):
2095 """If appropriate for token, check current physical line(s)."""
2096 # Called after every token, but act only on end of line.
2098 # a newline token ends a single physical line.
2099 if _is_eol_token(token):
2100 # if the file does not end with a newline, the NEWLINE
2101 # token is inserted by the parser, but it does not contain
2102 # the previous physical line in `token[4]`
2103 if token[4] == '':
2104 self.check_physical(prev_physical)
2105 else:
2106 self.check_physical(token[4])
2107 elif token[0] == tokenize.STRING and '\n' in token[1]:
2108 # Less obviously, a string that contains newlines is a
2109 # multiline string, either triple-quoted or with internal
2110 # newlines backslash-escaped. Check every physical line in
2111 # the string *except* for the last one: its newline is
2112 # outside of the multiline string, so we consider it a
2113 # regular physical line, and will check it like any other
2114 # physical line.
2115 #
2116 # Subtleties:
2117 # - we don't *completely* ignore the last line; if it
2118 # contains the magical "# noqa" comment, we disable all
2119 # physical checks for the entire multiline string
2120 # - have to wind self.line_number back because initially it
2121 # points to the last line of the string, and we want
2122 # check_physical() to give accurate feedback
2123 if noqa(token[4]):
2124 return
2125 self.multiline = True
2126 self.line_number = token[2][0]
2127 _, src, (_, offset), _, _ = token
2128 src = self.lines[self.line_number - 1][:offset] + src
2129 for line in src.split('\n')[:-1]:
2130 self.check_physical(line + '\n')
2131 self.line_number += 1
2132 self.multiline = False
2134 def check_all(self, expected=None, line_offset=0):
2135 """Run all checks on the input file."""
2136 self.report.init_file(self.filename, self.lines, expected, line_offset)
2137 self.total_lines = len(self.lines)
2138 if self._ast_checks:
2139 self.check_ast()
2140 self.line_number = 0
2141 self.indent_char = None
2142 self.indent_level = self.previous_indent_level = 0
2143 self.previous_logical = ''
2144 self.previous_unindented_logical_line = ''
2145 self.tokens = []
2146 self.blank_lines = self.blank_before = 0
2147 parens = 0
2148 for token in self.generate_tokens():
2149 self.tokens.append(token)
2150 token_type, text = token[0:2]
2151 if self.verbose >= 3:
2152 if token[2][0] == token[3][0]:
2153 pos = '[{}:{}]'.format(token[2][1] or '', token[3][1])
2154 else:
2155 pos = 'l.%s' % token[3][0]
2156 print('l.%s\t%s\t%s\t%r' %
2157 (token[2][0], pos, tokenize.tok_name[token[0]], text))
2158 if token_type == tokenize.OP:
2159 if text in '([{':
2160 parens += 1
2161 elif text in '}])':
2162 parens -= 1
2163 elif not parens:
2164 if token_type in NEWLINE:
2165 if token_type == tokenize.NEWLINE:
2166 self.check_logical()
2167 self.blank_before = 0
2168 elif len(self.tokens) == 1:
2169 # The physical line contains only this token.
2170 self.blank_lines += 1
2171 del self.tokens[0]
2172 else:
2173 self.check_logical()
2174 if self.tokens:
2175 self.check_physical(self.lines[-1])
2176 self.check_logical()
2177 return self.report.get_file_results()
2180class BaseReport:
2181 """Collect the results of the checks."""
2183 print_filename = False
2185 def __init__(self, options):
2186 self._benchmark_keys = options.benchmark_keys
2187 self._ignore_code = options.ignore_code
2188 # Results
2189 self.elapsed = 0
2190 self.total_errors = 0
2191 self.counters = dict.fromkeys(self._benchmark_keys, 0)
2192 self.messages = {}
2194 def start(self):
2195 """Start the timer."""
2196 self._start_time = time.time()
2198 def stop(self):
2199 """Stop the timer."""
2200 self.elapsed = time.time() - self._start_time
2202 def init_file(self, filename, lines, expected, line_offset):
2203 """Signal a new file."""
2204 self.filename = filename
2205 self.lines = lines
2206 self.expected = expected or ()
2207 self.line_offset = line_offset
2208 self.file_errors = 0
2209 self.counters['files'] += 1
2210 self.counters['physical lines'] += len(lines)
2212 def increment_logical_line(self):
2213 """Signal a new logical line."""
2214 self.counters['logical lines'] += 1
2216 def error(self, line_number, offset, text, check):
2217 """Report an error, according to options."""
2218 code = text[:4]
2219 if self._ignore_code(code):
2220 return
2221 if code in self.counters:
2222 self.counters[code] += 1
2223 else:
2224 self.counters[code] = 1
2225 self.messages[code] = text[5:]
2226 # Don't care about expected errors or warnings
2227 if code in self.expected:
2228 return
2229 if self.print_filename and not self.file_errors:
2230 print(self.filename)
2231 self.file_errors += 1
2232 self.total_errors += 1
2233 return code
2235 def get_file_results(self):
2236 """Return the count of errors and warnings for this file."""
2237 return self.file_errors
2239 def get_count(self, prefix=''):
2240 """Return the total count of errors and warnings."""
2241 return sum(self.counters[key]
2242 for key in self.messages if key.startswith(prefix))
2244 def get_statistics(self, prefix=''):
2245 """Get statistics for message codes that start with the prefix.
2247 prefix='' matches all errors and warnings
2248 prefix='E' matches all errors
2249 prefix='W' matches all warnings
2250 prefix='E4' matches all errors that have to do with imports
2251 """
2252 return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
2253 for key in sorted(self.messages) if key.startswith(prefix)]
2255 def print_statistics(self, prefix=''):
2256 """Print overall statistics (number of errors and warnings)."""
2257 for line in self.get_statistics(prefix):
2258 print(line)
2260 def print_benchmark(self):
2261 """Print benchmark numbers."""
2262 print('{:<7.2f} {}'.format(self.elapsed, 'seconds elapsed'))
2263 if self.elapsed:
2264 for key in self._benchmark_keys:
2265 print('%-7d %s per second (%d total)' %
2266 (self.counters[key] / self.elapsed, key,
2267 self.counters[key]))
2270class FileReport(BaseReport):
2271 """Collect the results of the checks and print the filenames."""
2273 print_filename = True
2276class StandardReport(BaseReport):
2277 """Collect and print the results of the checks."""
2279 def __init__(self, options):
2280 super().__init__(options)
2281 self._fmt = REPORT_FORMAT.get(options.format.lower(),
2282 options.format)
2283 self._repeat = options.repeat
2284 self._show_source = options.show_source
2285 self._show_pep8 = options.show_pep8
2287 def init_file(self, filename, lines, expected, line_offset):
2288 """Signal a new file."""
2289 self._deferred_print = []
2290 return super().init_file(
2291 filename, lines, expected, line_offset)
2293 def error(self, line_number, offset, text, check):
2294 """Report an error, according to options."""
2295 code = super().error(line_number, offset, text, check)
2296 if code and (self.counters[code] == 1 or self._repeat):
2297 self._deferred_print.append(
2298 (line_number, offset, code, text[5:], check.__doc__))
2299 return code
2301 def get_file_results(self):
2302 """Print results and return the overall count for this file."""
2303 self._deferred_print.sort()
2304 for line_number, offset, code, text, doc in self._deferred_print:
2305 print(self._fmt % {
2306 'path': self.filename,
2307 'row': self.line_offset + line_number, 'col': offset + 1,
2308 'code': code, 'text': text,
2309 })
2310 if self._show_source:
2311 if line_number > len(self.lines):
2312 line = ''
2313 else:
2314 line = self.lines[line_number - 1]
2315 print(line.rstrip())
2316 print(re.sub(r'\S', ' ', line[:offset]) + '^')
2317 if self._show_pep8 and doc:
2318 print(' ' + doc.strip())
2320 # stdout is block buffered when not stdout.isatty().
2321 # line can be broken where buffer boundary since other
2322 # processes write to same file.
2323 # flush() after print() to avoid buffer boundary.
2324 # Typical buffer size is 8192. line written safely when
2325 # len(line) < 8192.
2326 sys.stdout.flush()
2327 return self.file_errors
2330class DiffReport(StandardReport):
2331 """Collect and print the results for the changed lines only."""
2333 def __init__(self, options):
2334 super().__init__(options)
2335 self._selected = options.selected_lines
2337 def error(self, line_number, offset, text, check):
2338 if line_number not in self._selected[self.filename]:
2339 return
2340 return super().error(line_number, offset, text, check)
2343class StyleGuide:
2344 """Initialize a PEP-8 instance with few options."""
2346 def __init__(self, *args, **kwargs):
2347 # build options from the command line
2348 self.checker_class = kwargs.pop('checker_class', Checker)
2349 parse_argv = kwargs.pop('parse_argv', False)
2350 config_file = kwargs.pop('config_file', False)
2351 parser = kwargs.pop('parser', None)
2352 # build options from dict
2353 options_dict = dict(*args, **kwargs)
2354 arglist = None if parse_argv else options_dict.get('paths', None)
2355 verbose = options_dict.get('verbose', None)
2356 options, self.paths = process_options(
2357 arglist, parse_argv, config_file, parser, verbose)
2358 if options_dict:
2359 options.__dict__.update(options_dict)
2360 if 'paths' in options_dict:
2361 self.paths = options_dict['paths']
2363 self.runner = self.input_file
2364 self.options = options
2366 if not options.reporter:
2367 options.reporter = BaseReport if options.quiet else StandardReport
2369 options.select = tuple(options.select or ())
2370 if not (options.select or options.ignore or
2371 options.testsuite or options.doctest) and DEFAULT_IGNORE:
2372 # The default choice: ignore controversial checks
2373 options.ignore = tuple(DEFAULT_IGNORE.split(','))
2374 else:
2375 # Ignore all checks which are not explicitly selected
2376 options.ignore = ('',) if options.select else tuple(options.ignore)
2377 options.benchmark_keys = BENCHMARK_KEYS[:]
2378 options.ignore_code = self.ignore_code
2379 options.physical_checks = self.get_checks('physical_line')
2380 options.logical_checks = self.get_checks('logical_line')
2381 options.ast_checks = self.get_checks('tree')
2382 self.init_report()
2384 def init_report(self, reporter=None):
2385 """Initialize the report instance."""
2386 self.options.report = (reporter or self.options.reporter)(self.options)
2387 return self.options.report
2389 def check_files(self, paths=None):
2390 """Run all checks on the paths."""
2391 if paths is None:
2392 paths = self.paths
2393 report = self.options.report
2394 runner = self.runner
2395 report.start()
2396 try:
2397 for path in paths:
2398 if os.path.isdir(path):
2399 self.input_dir(path)
2400 elif not self.excluded(path):
2401 runner(path)
2402 except KeyboardInterrupt:
2403 print('... stopped')
2404 report.stop()
2405 return report
2407 def input_file(self, filename, lines=None, expected=None, line_offset=0):
2408 """Run all checks on a Python source file."""
2409 if self.options.verbose:
2410 print('checking %s' % filename)
2411 fchecker = self.checker_class(
2412 filename, lines=lines, options=self.options)
2413 return fchecker.check_all(expected=expected, line_offset=line_offset)
2415 def input_dir(self, dirname):
2416 """Check all files in this directory and all subdirectories."""
2417 dirname = dirname.rstrip('/')
2418 if self.excluded(dirname):
2419 return 0
2420 counters = self.options.report.counters
2421 verbose = self.options.verbose
2422 filepatterns = self.options.filename
2423 runner = self.runner
2424 for root, dirs, files in os.walk(dirname):
2425 if verbose:
2426 print('directory ' + root)
2427 counters['directories'] += 1
2428 for subdir in sorted(dirs):
2429 if self.excluded(subdir, root):
2430 dirs.remove(subdir)
2431 for filename in sorted(files):
2432 # contain a pattern that matches?
2433 if (
2434 filename_match(filename, filepatterns) and
2435 not self.excluded(filename, root)
2436 ):
2437 runner(os.path.join(root, filename))
2439 def excluded(self, filename, parent=None):
2440 """Check if the file should be excluded.
2442 Check if 'options.exclude' contains a pattern matching filename.
2443 """
2444 if not self.options.exclude:
2445 return False
2446 basename = os.path.basename(filename)
2447 if filename_match(basename, self.options.exclude):
2448 return True
2449 if parent:
2450 filename = os.path.join(parent, filename)
2451 filename = os.path.abspath(filename)
2452 return filename_match(filename, self.options.exclude)
2454 def ignore_code(self, code):
2455 """Check if the error code should be ignored.
2457 If 'options.select' contains a prefix of the error code,
2458 return False. Else, if 'options.ignore' contains a prefix of
2459 the error code, return True.
2460 """
2461 if len(code) < 4 and any(s.startswith(code)
2462 for s in self.options.select):
2463 return False
2464 return (code.startswith(self.options.ignore) and
2465 not code.startswith(self.options.select))
2467 def get_checks(self, argument_name):
2468 """Get all the checks for this category.
2470 Find all globally visible functions where the first argument
2471 name starts with argument_name and which contain selected tests.
2472 """
2473 checks = []
2474 for check, attrs in _checks[argument_name].items():
2475 (codes, args) = attrs
2476 if any(not (code and self.ignore_code(code)) for code in codes):
2477 checks.append((check.__name__, check, args))
2478 return sorted(checks)
2481def get_parser(prog='pycodestyle', version=__version__):
2482 """Create the parser for the program."""
2483 parser = OptionParser(prog=prog, version=version,
2484 usage="%prog [options] input ...")
2485 parser.config_options = [
2486 'exclude', 'filename', 'select', 'ignore', 'max-line-length',
2487 'max-doc-length', 'indent-size', 'hang-closing', 'count', 'format',
2488 'quiet', 'show-pep8', 'show-source', 'statistics', 'verbose']
2489 parser.add_option('-v', '--verbose', default=0, action='count',
2490 help="print status messages, or debug with -vv")
2491 parser.add_option('-q', '--quiet', default=0, action='count',
2492 help="report only file names, or nothing with -qq")
2493 parser.add_option('-r', '--repeat', default=True, action='store_true',
2494 help="(obsolete) show all occurrences of the same error")
2495 parser.add_option('--first', action='store_false', dest='repeat',
2496 help="show first occurrence of each error")
2497 parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
2498 help="exclude files or directories which match these "
2499 "comma separated patterns (default: %default)")
2500 parser.add_option('--filename', metavar='patterns', default='*.py',
2501 help="when parsing directories, only check filenames "
2502 "matching these comma separated patterns "
2503 "(default: %default)")
2504 parser.add_option('--select', metavar='errors', default='',
2505 help="select errors and warnings (e.g. E,W6)")
2506 parser.add_option('--ignore', metavar='errors', default='',
2507 help="skip errors and warnings (e.g. E4,W) "
2508 "(default: %s)" % DEFAULT_IGNORE)
2509 parser.add_option('--show-source', action='store_true',
2510 help="show source code for each error")
2511 parser.add_option('--show-pep8', action='store_true',
2512 help="show text of PEP 8 for each error "
2513 "(implies --first)")
2514 parser.add_option('--statistics', action='store_true',
2515 help="count errors and warnings")
2516 parser.add_option('--count', action='store_true',
2517 help="print total number of errors and warnings "
2518 "to standard error and set exit code to 1 if "
2519 "total is not null")
2520 parser.add_option('--max-line-length', type='int', metavar='n',
2521 default=MAX_LINE_LENGTH,
2522 help="set maximum allowed line length "
2523 "(default: %default)")
2524 parser.add_option('--max-doc-length', type='int', metavar='n',
2525 default=None,
2526 help="set maximum allowed doc line length and perform "
2527 "these checks (unchecked if not set)")
2528 parser.add_option('--indent-size', type='int', metavar='n',
2529 default=INDENT_SIZE,
2530 help="set how many spaces make up an indent "
2531 "(default: %default)")
2532 parser.add_option('--hang-closing', action='store_true',
2533 help="hang closing bracket instead of matching "
2534 "indentation of opening bracket's line")
2535 parser.add_option('--format', metavar='format', default='default',
2536 help="set the error format [default|pylint|<custom>]")
2537 parser.add_option('--diff', action='store_true',
2538 help="report changes only within line number ranges in "
2539 "the unified diff received on STDIN")
2540 group = parser.add_option_group("Testing Options")
2541 if os.path.exists(TESTSUITE_PATH):
2542 group.add_option('--testsuite', metavar='dir',
2543 help="run regression tests from dir")
2544 group.add_option('--doctest', action='store_true',
2545 help="run doctest on myself")
2546 group.add_option('--benchmark', action='store_true',
2547 help="measure processing speed")
2548 return parser
2551def read_config(options, args, arglist, parser):
2552 """Read and parse configurations.
2554 If a config file is specified on the command line with the
2555 "--config" option, then only it is used for configuration.
2557 Otherwise, the user configuration (~/.config/pycodestyle) and any
2558 local configurations in the current directory or above will be
2559 merged together (in that order) using the read method of
2560 ConfigParser.
2561 """
2562 config = configparser.RawConfigParser()
2564 cli_conf = options.config
2566 local_dir = os.curdir
2568 if USER_CONFIG and os.path.isfile(USER_CONFIG):
2569 if options.verbose:
2570 print('user configuration: %s' % USER_CONFIG)
2571 config.read(USER_CONFIG)
2573 parent = tail = args and os.path.abspath(os.path.commonprefix(args))
2574 while tail:
2575 if config.read(os.path.join(parent, fn) for fn in PROJECT_CONFIG):
2576 local_dir = parent
2577 if options.verbose:
2578 print('local configuration: in %s' % parent)
2579 break
2580 (parent, tail) = os.path.split(parent)
2582 if cli_conf and os.path.isfile(cli_conf):
2583 if options.verbose:
2584 print('cli configuration: %s' % cli_conf)
2585 config.read(cli_conf)
2587 pycodestyle_section = None
2588 if config.has_section(parser.prog):
2589 pycodestyle_section = parser.prog
2590 elif config.has_section('pep8'):
2591 pycodestyle_section = 'pep8' # Deprecated
2592 warnings.warn('[pep8] section is deprecated. Use [pycodestyle].')
2594 if pycodestyle_section:
2595 option_list = {o.dest: o.type or o.action for o in parser.option_list}
2597 # First, read the default values
2598 (new_options, __) = parser.parse_args([])
2600 # Second, parse the configuration
2601 for opt in config.options(pycodestyle_section):
2602 if opt.replace('_', '-') not in parser.config_options:
2603 print(" unknown option '%s' ignored" % opt)
2604 continue
2605 if options.verbose > 1:
2606 print(" {} = {}".format(opt,
2607 config.get(pycodestyle_section, opt)))
2608 normalized_opt = opt.replace('-', '_')
2609 opt_type = option_list[normalized_opt]
2610 if opt_type in ('int', 'count'):
2611 value = config.getint(pycodestyle_section, opt)
2612 elif opt_type in ('store_true', 'store_false'):
2613 value = config.getboolean(pycodestyle_section, opt)
2614 else:
2615 value = config.get(pycodestyle_section, opt)
2616 if normalized_opt == 'exclude':
2617 value = normalize_paths(value, local_dir)
2618 setattr(new_options, normalized_opt, value)
2620 # Third, overwrite with the command-line options
2621 (options, __) = parser.parse_args(arglist, values=new_options)
2622 options.doctest = options.testsuite = False
2623 return options
2626def process_options(arglist=None, parse_argv=False, config_file=None,
2627 parser=None, verbose=None):
2628 """Process options passed either via arglist or command line args.
2630 Passing in the ``config_file`` parameter allows other tools, such as
2631 flake8 to specify their own options to be processed in pycodestyle.
2632 """
2633 if not parser:
2634 parser = get_parser()
2635 if not parser.has_option('--config'):
2636 group = parser.add_option_group("Configuration", description=(
2637 "The project options are read from the [%s] section of the "
2638 "tox.ini file or the setup.cfg file located in any parent folder "
2639 "of the path(s) being processed. Allowed options are: %s." %
2640 (parser.prog, ', '.join(parser.config_options))))
2641 group.add_option('--config', metavar='path', default=config_file,
2642 help="user config file location")
2643 # Don't read the command line if the module is used as a library.
2644 if not arglist and not parse_argv:
2645 arglist = []
2646 # If parse_argv is True and arglist is None, arguments are
2647 # parsed from the command line (sys.argv)
2648 (options, args) = parser.parse_args(arglist)
2649 options.reporter = None
2651 # If explicitly specified verbosity, override any `-v` CLI flag
2652 if verbose is not None:
2653 options.verbose = verbose
2655 if options.ensure_value('testsuite', False):
2656 args.append(options.testsuite)
2657 elif not options.ensure_value('doctest', False):
2658 if parse_argv and not args:
2659 if options.diff or any(os.path.exists(name)
2660 for name in PROJECT_CONFIG):
2661 args = ['.']
2662 else:
2663 parser.error('input not specified')
2664 options = read_config(options, args, arglist, parser)
2665 options.reporter = parse_argv and options.quiet == 1 and FileReport
2667 options.filename = _parse_multi_options(options.filename)
2668 options.exclude = normalize_paths(options.exclude)
2669 options.select = _parse_multi_options(options.select)
2670 options.ignore = _parse_multi_options(options.ignore)
2672 if options.diff:
2673 options.reporter = DiffReport
2674 stdin = stdin_get_value()
2675 options.selected_lines = parse_udiff(stdin, options.filename, args[0])
2676 args = sorted(options.selected_lines)
2678 return options, args
2681def _parse_multi_options(options, split_token=','):
2682 r"""Split and strip and discard empties.
2684 Turns the following:
2686 A,
2687 B,
2689 into ["A", "B"]
2690 """
2691 if options:
2692 return [o.strip() for o in options.split(split_token) if o.strip()]
2693 else:
2694 return options
2697def _main():
2698 """Parse options and run checks on Python source."""
2699 import signal
2701 # Handle "Broken pipe" gracefully
2702 try:
2703 signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
2704 except AttributeError:
2705 pass # not supported on Windows
2707 style_guide = StyleGuide(parse_argv=True)
2708 options = style_guide.options
2710 if options.doctest or options.testsuite:
2711 from testsuite.support import run_tests
2712 report = run_tests(style_guide)
2713 else:
2714 report = style_guide.check_files()
2716 if options.statistics:
2717 report.print_statistics()
2719 if options.benchmark:
2720 report.print_benchmark()
2722 if options.testsuite and not options.quiet:
2723 report.print_results()
2725 if report.total_errors:
2726 if options.count:
2727 sys.stderr.write(str(report.total_errors) + '\n')
2728 sys.exit(1)
2731if __name__ == '__main__':
2732 _main()