Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h11/_abnf.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

20 statements  

1# We use native strings for all the re patterns, to take advantage of string 

2# formatting, and then convert to bytestrings when compiling the final re 

3# objects. 

4 

5# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#whitespace 

6# OWS = *( SP / HTAB ) 

7# ; optional whitespace 

8OWS = r"[ \t]*" 

9 

10# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.token.separators 

11# token = 1*tchar 

12# 

13# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" 

14# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" 

15# / DIGIT / ALPHA 

16# ; any VCHAR, except delimiters 

17token = r"[-!#$%&'*+.^_`|~0-9a-zA-Z]+" 

18 

19# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#header.fields 

20# field-name = token 

21field_name = token 

22 

23# The standard says: 

24# 

25# field-value = *( field-content / obs-fold ) 

26# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] 

27# field-vchar = VCHAR / obs-text 

28# obs-fold = CRLF 1*( SP / HTAB ) 

29# ; obsolete line folding 

30# ; see Section 3.2.4 

31# 

32# https://tools.ietf.org/html/rfc5234#appendix-B.1 

33# 

34# VCHAR = %x21-7E 

35# ; visible (printing) characters 

36# 

37# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.quoted-string 

38# obs-text = %x80-FF 

39# 

40# However, the standard definition of field-content is WRONG! It disallows 

41# fields containing a single visible character surrounded by whitespace, 

42# e.g. "foo a bar". 

43# 

44# See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 

45# 

46# So our definition of field_content attempts to fix it up... 

47# 

48# Also, we allow lots of control characters, because apparently people assume 

49# that they're legal in practice (e.g., google analytics makes cookies with 

50# \x01 in them!): 

51# https://github.com/python-hyper/h11/issues/57 

52# We still don't allow NUL or whitespace, because those are often treated as 

53# meta-characters and letting them through can lead to nasty issues like SSRF. 

54vchar = r"[\x21-\x7e]" 

55vchar_or_obs_text = r"[^\x00\s]" 

56field_vchar = vchar_or_obs_text 

57field_content = r"{field_vchar}+(?:[ \t]+{field_vchar}+)*".format(**globals()) 

58 

59# We handle obs-fold at a different level, and our fixed-up field_content 

60# already grows to swallow the whole value, so ? instead of * 

61field_value = r"({field_content})?".format(**globals()) 

62 

63# header-field = field-name ":" OWS field-value OWS 

64header_field = ( 

65 r"(?P<field_name>{field_name})" 

66 r":" 

67 r"{OWS}" 

68 r"(?P<field_value>{field_value})" 

69 r"{OWS}".format(**globals()) 

70) 

71 

72# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#request.line 

73# 

74# request-line = method SP request-target SP HTTP-version CRLF 

75# method = token 

76# HTTP-version = HTTP-name "/" DIGIT "." DIGIT 

77# HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive 

78# 

79# request-target is complicated (see RFC 7230 sec 5.3) -- could be path, full 

80# URL, host+port (for connect), or even "*", but in any case we are guaranteed 

81# that it contists of the visible printing characters. 

82method = token 

83request_target = r"{vchar}+".format(**globals()) 

84http_version = r"HTTP/(?P<http_version>[0-9]\.[0-9])" 

85request_line = ( 

86 r"(?P<method>{method})" 

87 r" " 

88 r"(?P<target>{request_target})" 

89 r" " 

90 r"{http_version}".format(**globals()) 

91) 

92 

93# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#status.line 

94# 

95# status-line = HTTP-version SP status-code SP reason-phrase CRLF 

96# status-code = 3DIGIT 

97# reason-phrase = *( HTAB / SP / VCHAR / obs-text ) 

98status_code = r"[0-9]{3}" 

99reason_phrase = r"([ \t]|{vchar_or_obs_text})*".format(**globals()) 

100status_line = ( 

101 r"{http_version}" 

102 r" " 

103 r"(?P<status_code>{status_code})" 

104 # However, there are apparently a few too many servers out there that just 

105 # leave out the reason phrase: 

106 # https://github.com/scrapy/scrapy/issues/345#issuecomment-281756036 

107 # https://github.com/seanmonstar/httparse/issues/29 

108 # so make it optional. ?: is a non-capturing group. 

109 r"(?: (?P<reason>{reason_phrase}))?".format(**globals()) 

110) 

111 

112HEXDIG = r"[0-9A-Fa-f]" 

113# Actually 

114# 

115# chunk-size = 1*HEXDIG 

116# 

117# but we impose an upper-limit to avoid ridiculosity. len(str(2**64)) == 20 

118chunk_size = r"({HEXDIG}){{1,20}}".format(**globals()) 

119# Actually 

120# 

121# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) 

122# 

123# but we aren't parsing the things so we don't really care. 

124chunk_ext = r";.*" 

125chunk_header = ( 

126 r"(?P<chunk_size>{chunk_size})" 

127 r"(?P<chunk_ext>{chunk_ext})?" 

128 r"{OWS}\r\n".format( 

129 **globals() 

130 ) # Even though the specification does not allow for extra whitespaces, 

131 # we are lenient with trailing whitespaces because some servers on the wild use it. 

132)