Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/rfc3986/abnf_regexp.py: 100%

63 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 06:12 +0000

1# -*- coding: utf-8 -*- 

2# Licensed under the Apache License, Version 2.0 (the "License"); 

3# you may not use this file except in compliance with the License. 

4# You may obtain a copy of the License at 

5# 

6# http://www.apache.org/licenses/LICENSE-2.0 

7# 

8# Unless required by applicable law or agreed to in writing, software 

9# distributed under the License is distributed on an "AS IS" BASIS, 

10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 

11# implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14"""Module for the regular expressions crafted from ABNF.""" 

15 

16import sys 

17 

18# https://tools.ietf.org/html/rfc3986#page-13 

19GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@" 

20GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS) 

21# https://tools.ietf.org/html/rfc3986#page-13 

22SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;=" 

23SUB_DELIMITERS_SET = set(SUB_DELIMITERS) 

24# Escape the '*' for use in regular expressions 

25SUB_DELIMITERS_RE = r"!$&'()\*+,;=" 

26RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET) 

27ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 

28DIGIT = "0123456789" 

29# https://tools.ietf.org/html/rfc3986#section-2.3 

30UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + r"._!-~" 

31UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS) 

32NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET) 

33# We need to escape the '-' in this case: 

34UNRESERVED_RE = r"A-Za-z0-9._~\-" 

35 

36# Percent encoded character values 

37PERCENT_ENCODED = PCT_ENCODED = "%[A-Fa-f0-9]{2}" 

38PCHAR = "([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":@]|%s)" % PCT_ENCODED 

39 

40# NOTE(sigmavirus24): We're going to use more strict regular expressions 

41# than appear in Appendix B for scheme. This will prevent over-eager 

42# consuming of items that aren't schemes. 

43SCHEME_RE = "[a-zA-Z][a-zA-Z0-9+.-]*" 

44_AUTHORITY_RE = "[^\\\\/?#]*" 

45_PATH_RE = "[^?#]*" 

46_QUERY_RE = "[^#]*" 

47_FRAGMENT_RE = ".*" 

48 

49# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B 

50COMPONENT_PATTERN_DICT = { 

51 "scheme": SCHEME_RE, 

52 "authority": _AUTHORITY_RE, 

53 "path": _PATH_RE, 

54 "query": _QUERY_RE, 

55 "fragment": _FRAGMENT_RE, 

56} 

57 

58# See http://tools.ietf.org/html/rfc3986#appendix-B 

59# In this case, we name each of the important matches so we can use 

60# SRE_Match#groupdict to parse the values out if we so choose. This is also 

61# modified to ignore other matches that are not important to the parsing of 

62# the reference so we can also simply use SRE_Match#groups. 

63URL_PARSING_RE = ( 

64 r"(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?" 

65 r"(?P<path>{path})(?:\?(?P<query>{query}))?" 

66 r"(?:#(?P<fragment>{fragment}))?" 

67).format(**COMPONENT_PATTERN_DICT) 

68 

69 

70# ######################### 

71# Authority Matcher Section 

72# ######################### 

73 

74# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 

75# The pattern for a regular name, e.g., www.google.com, api.github.com 

76REGULAR_NAME_RE = REG_NAME = "((?:{0}|[{1}])*)".format( 

77 "%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + UNRESERVED_RE 

78) 

79# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, 

80IPv4_RE = r"([0-9]{1,3}\.){3}[0-9]{1,3}" 

81# Hexadecimal characters used in each piece of an IPv6 address 

82HEXDIG_RE = "[0-9A-Fa-f]{1,4}" 

83# Least-significant 32 bits of an IPv6 address 

84LS32_RE = "({hex}:{hex}|{ipv4})".format(hex=HEXDIG_RE, ipv4=IPv4_RE) 

85# Substitutions into the following patterns for IPv6 patterns defined 

86# http://tools.ietf.org/html/rfc3986#page-20 

87_subs = {"hex": HEXDIG_RE, "ls32": LS32_RE} 

88 

89# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details 

90# about ABNF (Augmented Backus-Naur Form) use in the comments 

91variations = [ 

92 # 6( h16 ":" ) ls32 

93 "(%(hex)s:){6}%(ls32)s" % _subs, 

94 # "::" 5( h16 ":" ) ls32 

95 "::(%(hex)s:){5}%(ls32)s" % _subs, 

96 # [ h16 ] "::" 4( h16 ":" ) ls32 

97 "(%(hex)s)?::(%(hex)s:){4}%(ls32)s" % _subs, 

98 # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 

99 "((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s" % _subs, 

100 # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 

101 "((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s" % _subs, 

102 # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 

103 "((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s" % _subs, 

104 # [ *4( h16 ":" ) h16 ] "::" ls32 

105 "((%(hex)s:){0,4}%(hex)s)?::%(ls32)s" % _subs, 

106 # [ *5( h16 ":" ) h16 ] "::" h16 

107 "((%(hex)s:){0,5}%(hex)s)?::%(hex)s" % _subs, 

108 # [ *6( h16 ":" ) h16 ] "::" 

109 "((%(hex)s:){0,6}%(hex)s)?::" % _subs, 

110] 

111 

112IPv6_RE = "(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7})|({8}))".format( 

113 *variations 

114) 

115 

116IPv_FUTURE_RE = r"v[0-9A-Fa-f]+\.[%s]+" % ( 

117 UNRESERVED_RE + SUB_DELIMITERS_RE + ":" 

118) 

119 

120# RFC 6874 Zone ID ABNF 

121ZONE_ID = "(?:[" + UNRESERVED_RE + "]|" + PCT_ENCODED + ")+" 

122 

123IPv6_ADDRZ_RFC4007_RE = IPv6_RE + "(?:(?:%25|%)" + ZONE_ID + ")?" 

124IPv6_ADDRZ_RE = IPv6_RE + "(?:%25" + ZONE_ID + ")?" 

125 

126IP_LITERAL_RE = r"\[({0}|{1})\]".format( 

127 IPv6_ADDRZ_RFC4007_RE, 

128 IPv_FUTURE_RE, 

129) 

130 

131# Pattern for matching the host piece of the authority 

132HOST_RE = HOST_PATTERN = "({0}|{1}|{2})".format( 

133 REG_NAME, 

134 IPv4_RE, 

135 IP_LITERAL_RE, 

136) 

137USERINFO_RE = ( 

138 "^([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":]|%s)+" % (PCT_ENCODED) 

139) 

140PORT_RE = "[0-9]{1,5}" 

141 

142# #################### 

143# Path Matcher Section 

144# #################### 

145 

146# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information 

147# about the path patterns defined below. 

148segments = { 

149 "segment": PCHAR + "*", 

150 # Non-zero length segment 

151 "segment-nz": PCHAR + "+", 

152 # Non-zero length segment without ":" 

153 "segment-nz-nc": PCHAR.replace(":", "") + "+", 

154} 

155 

156# Path types taken from Section 3.3 (linked above) 

157PATH_EMPTY = "^$" 

158PATH_ROOTLESS = "%(segment-nz)s(/%(segment)s)*" % segments 

159PATH_NOSCHEME = "%(segment-nz-nc)s(/%(segment)s)*" % segments 

160PATH_ABSOLUTE = "/(%s)?" % PATH_ROOTLESS 

161PATH_ABEMPTY = "(/%(segment)s)*" % segments 

162PATH_RE = "^(%s|%s|%s|%s|%s)$" % ( 

163 PATH_ABEMPTY, 

164 PATH_ABSOLUTE, 

165 PATH_NOSCHEME, 

166 PATH_ROOTLESS, 

167 PATH_EMPTY, 

168) 

169 

170FRAGMENT_RE = QUERY_RE = ( 

171 "^([/?:@" + UNRESERVED_RE + SUB_DELIMITERS_RE + "]|%s)*$" % PCT_ENCODED 

172) 

173 

174# ########################## 

175# Relative reference matcher 

176# ########################## 

177 

178# See http://tools.ietf.org/html/rfc3986#section-4.2 for details 

179RELATIVE_PART_RE = "(//%s%s|%s|%s|%s)" % ( 

180 COMPONENT_PATTERN_DICT["authority"], 

181 PATH_ABEMPTY, 

182 PATH_ABSOLUTE, 

183 PATH_NOSCHEME, 

184 PATH_EMPTY, 

185) 

186 

187# See http://tools.ietf.org/html/rfc3986#section-3 for definition 

188HIER_PART_RE = "(//%s%s|%s|%s|%s)" % ( 

189 COMPONENT_PATTERN_DICT["authority"], 

190 PATH_ABEMPTY, 

191 PATH_ABSOLUTE, 

192 PATH_ROOTLESS, 

193 PATH_EMPTY, 

194) 

195 

196# ############### 

197# IRIs / RFC 3987 

198# ############### 

199 

200# Only wide-unicode gets the high-ranges of UCSCHAR 

201if sys.maxunicode > 0xFFFF: # pragma: no cover 

202 IPRIVATE = u"\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD" 

203 UCSCHAR_RE = ( 

204 u"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF" 

205 u"\U00010000-\U0001FFFD\U00020000-\U0002FFFD" 

206 u"\U00030000-\U0003FFFD\U00040000-\U0004FFFD" 

207 u"\U00050000-\U0005FFFD\U00060000-\U0006FFFD" 

208 u"\U00070000-\U0007FFFD\U00080000-\U0008FFFD" 

209 u"\U00090000-\U0009FFFD\U000A0000-\U000AFFFD" 

210 u"\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD" 

211 u"\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD" 

212 ) 

213else: # pragma: no cover 

214 IPRIVATE = u"\uE000-\uF8FF" 

215 UCSCHAR_RE = u"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF" 

216 

217IUNRESERVED_RE = u"A-Za-z0-9\\._~\\-" + UCSCHAR_RE 

218IPCHAR = u"([" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u":@]|%s)" % PCT_ENCODED 

219 

220isegments = { 

221 "isegment": IPCHAR + u"*", 

222 # Non-zero length segment 

223 "isegment-nz": IPCHAR + u"+", 

224 # Non-zero length segment without ":" 

225 "isegment-nz-nc": IPCHAR.replace(":", "") + u"+", 

226} 

227 

228IPATH_ROOTLESS = u"%(isegment-nz)s(/%(isegment)s)*" % isegments 

229IPATH_NOSCHEME = u"%(isegment-nz-nc)s(/%(isegment)s)*" % isegments 

230IPATH_ABSOLUTE = u"/(?:%s)?" % IPATH_ROOTLESS 

231IPATH_ABEMPTY = u"(?:/%(isegment)s)*" % isegments 

232IPATH_RE = u"^(?:%s|%s|%s|%s|%s)$" % ( 

233 IPATH_ABEMPTY, 

234 IPATH_ABSOLUTE, 

235 IPATH_NOSCHEME, 

236 IPATH_ROOTLESS, 

237 PATH_EMPTY, 

238) 

239 

240IREGULAR_NAME_RE = IREG_NAME = u"(?:{0}|[{1}])*".format( 

241 u"%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + IUNRESERVED_RE 

242) 

243 

244IHOST_RE = IHOST_PATTERN = u"({0}|{1}|{2})".format( 

245 IREG_NAME, 

246 IPv4_RE, 

247 IP_LITERAL_RE, 

248) 

249 

250IUSERINFO_RE = ( 

251 u"^(?:[" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u":]|%s)+" % (PCT_ENCODED) 

252) 

253 

254IFRAGMENT_RE = ( 

255 u"^(?:[/?:@" 

256 + IUNRESERVED_RE 

257 + SUB_DELIMITERS_RE 

258 + u"]|%s)*$" % PCT_ENCODED 

259) 

260IQUERY_RE = ( 

261 u"^(?:[/?:@" 

262 + IUNRESERVED_RE 

263 + SUB_DELIMITERS_RE 

264 + IPRIVATE 

265 + u"]|%s)*$" % PCT_ENCODED 

266) 

267 

268IRELATIVE_PART_RE = u"(//%s%s|%s|%s|%s)" % ( 

269 COMPONENT_PATTERN_DICT["authority"], 

270 IPATH_ABEMPTY, 

271 IPATH_ABSOLUTE, 

272 IPATH_NOSCHEME, 

273 PATH_EMPTY, 

274) 

275 

276IHIER_PART_RE = u"(//%s%s|%s|%s|%s)" % ( 

277 COMPONENT_PATTERN_DICT["authority"], 

278 IPATH_ABEMPTY, 

279 IPATH_ABSOLUTE, 

280 IPATH_ROOTLESS, 

281 PATH_EMPTY, 

282)