Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h11/_abnf.py: 100%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# We use native strings for all the re patterns, to take advantage of string
2# formatting, and then convert to bytestrings when compiling the final re
3# objects.
5# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#whitespace
6# OWS = *( SP / HTAB )
7# ; optional whitespace
8OWS = r"[ \t]*"
10# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.token.separators
11# token = 1*tchar
12#
13# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
14# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
15# / DIGIT / ALPHA
16# ; any VCHAR, except delimiters
17token = r"[-!#$%&'*+.^_`|~0-9a-zA-Z]+"
19# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#header.fields
20# field-name = token
21field_name = token
23# The standard says:
24#
25# field-value = *( field-content / obs-fold )
26# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
27# field-vchar = VCHAR / obs-text
28# obs-fold = CRLF 1*( SP / HTAB )
29# ; obsolete line folding
30# ; see Section 3.2.4
31#
32# https://tools.ietf.org/html/rfc5234#appendix-B.1
33#
34# VCHAR = %x21-7E
35# ; visible (printing) characters
36#
37# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.quoted-string
38# obs-text = %x80-FF
39#
40# However, the standard definition of field-content is WRONG! It disallows
41# fields containing a single visible character surrounded by whitespace,
42# e.g. "foo a bar".
43#
44# See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
45#
46# So our definition of field_content attempts to fix it up...
47#
48# Also, we allow lots of control characters, because apparently people assume
49# that they're legal in practice (e.g., google analytics makes cookies with
50# \x01 in them!):
51# https://github.com/python-hyper/h11/issues/57
52# We still don't allow NUL or whitespace, because those are often treated as
53# meta-characters and letting them through can lead to nasty issues like SSRF.
54vchar = r"[\x21-\x7e]"
55vchar_or_obs_text = r"[^\x00\s]"
56field_vchar = vchar_or_obs_text
57field_content = r"{field_vchar}+(?:[ \t]+{field_vchar}+)*".format(**globals())
59# We handle obs-fold at a different level, and our fixed-up field_content
60# already grows to swallow the whole value, so ? instead of *
61field_value = r"({field_content})?".format(**globals())
63# header-field = field-name ":" OWS field-value OWS
64header_field = (
65 r"(?P<field_name>{field_name})"
66 r":"
67 r"{OWS}"
68 r"(?P<field_value>{field_value})"
69 r"{OWS}".format(**globals())
70)
72# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#request.line
73#
74# request-line = method SP request-target SP HTTP-version CRLF
75# method = token
76# HTTP-version = HTTP-name "/" DIGIT "." DIGIT
77# HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive
78#
79# request-target is complicated (see RFC 7230 sec 5.3) -- could be path, full
80# URL, host+port (for connect), or even "*", but in any case we are guaranteed
81# that it contists of the visible printing characters.
82method = token
83request_target = r"{vchar}+".format(**globals())
84http_version = r"HTTP/(?P<http_version>[0-9]\.[0-9])"
85request_line = (
86 r"(?P<method>{method})"
87 r" "
88 r"(?P<target>{request_target})"
89 r" "
90 r"{http_version}".format(**globals())
91)
93# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#status.line
94#
95# status-line = HTTP-version SP status-code SP reason-phrase CRLF
96# status-code = 3DIGIT
97# reason-phrase = *( HTAB / SP / VCHAR / obs-text )
98status_code = r"[0-9]{3}"
99reason_phrase = r"([ \t]|{vchar_or_obs_text})*".format(**globals())
100status_line = (
101 r"{http_version}"
102 r" "
103 r"(?P<status_code>{status_code})"
104 # However, there are apparently a few too many servers out there that just
105 # leave out the reason phrase:
106 # https://github.com/scrapy/scrapy/issues/345#issuecomment-281756036
107 # https://github.com/seanmonstar/httparse/issues/29
108 # so make it optional. ?: is a non-capturing group.
109 r"(?: (?P<reason>{reason_phrase}))?".format(**globals())
110)
112HEXDIG = r"[0-9A-Fa-f]"
113# Actually
114#
115# chunk-size = 1*HEXDIG
116#
117# but we impose an upper-limit to avoid ridiculosity. len(str(2**64)) == 20
118chunk_size = r"({HEXDIG}){{1,20}}".format(**globals())
119# Actually
120#
121# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
122#
123# but we aren't parsing the things so we don't really care.
124chunk_ext = r";.*"
125chunk_header = (
126 r"(?P<chunk_size>{chunk_size})"
127 r"(?P<chunk_ext>{chunk_ext})?"
128 r"{OWS}\r\n".format(
129 **globals()
130 ) # Even though the specification does not allow for extra whitespaces,
131 # we are lenient with trailing whitespaces because some servers on the wild use it.
132)