Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/rfc3986/abnf_regexp.py: 98%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

64 statements  

1# Licensed under the Apache License, Version 2.0 (the "License"); 

2# you may not use this file except in compliance with the License. 

3# You may obtain a copy of the License at 

4# 

5# http://www.apache.org/licenses/LICENSE-2.0 

6# 

7# Unless required by applicable law or agreed to in writing, software 

8# distributed under the License is distributed on an "AS IS" BASIS, 

9# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 

10# implied. 

11# See the License for the specific language governing permissions and 

12# limitations under the License. 

13"""Module for the regular expressions crafted from ABNF.""" 

14import sys 

15 

16# https://tools.ietf.org/html/rfc3986#page-13 

17GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@" 

18GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS) 

19# https://tools.ietf.org/html/rfc3986#page-13 

20SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;=" 

21SUB_DELIMITERS_SET = set(SUB_DELIMITERS) 

22# Escape the '*' for use in regular expressions 

23SUB_DELIMITERS_RE = r"!$&'()\*+,;=" 

24RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET) 

25ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 

26DIGIT = "0123456789" 

27# https://tools.ietf.org/html/rfc3986#section-2.3 

28UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + r"._!-~" 

29UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS) 

30NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET) 

31# We need to escape the '-' in this case: 

32UNRESERVED_RE = r"A-Za-z0-9._~\-" 

33 

34# Percent encoded character values 

35PERCENT_ENCODED = PCT_ENCODED = "%[A-Fa-f0-9]{2}" 

36PCHAR = "([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":@]|%s)" % PCT_ENCODED 

37 

38# NOTE(sigmavirus24): We're going to use more strict regular expressions 

39# than appear in Appendix B for scheme. This will prevent over-eager 

40# consuming of items that aren't schemes. 

41SCHEME_RE = "[a-zA-Z][a-zA-Z0-9+.-]*" 

42_AUTHORITY_RE = "[^\\\\/?#]*" 

43_PATH_RE = "[^?#]*" 

44_QUERY_RE = "[^#]*" 

45_FRAGMENT_RE = "(?s:.*)" 

46 

47# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B 

48COMPONENT_PATTERN_DICT = { 

49 "scheme": SCHEME_RE, 

50 "authority": _AUTHORITY_RE, 

51 "path": _PATH_RE, 

52 "query": _QUERY_RE, 

53 "fragment": _FRAGMENT_RE, 

54} 

55 

56# See http://tools.ietf.org/html/rfc3986#appendix-B 

57# In this case, we name each of the important matches so we can use 

58# SRE_Match#groupdict to parse the values out if we so choose. This is also 

59# modified to ignore other matches that are not important to the parsing of 

60# the reference so we can also simply use SRE_Match#groups. 

61URL_PARSING_RE = ( 

62 r"(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?" 

63 r"(?P<path>{path})(?:\?(?P<query>{query}))?" 

64 r"(?:#(?P<fragment>{fragment}))?" 

65).format(**COMPONENT_PATTERN_DICT) 

66 

67 

68# ######################### 

69# Authority Matcher Section 

70# ######################### 

71 

72# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 

73# The pattern for a regular name, e.g., www.google.com, api.github.com 

74REGULAR_NAME_RE = REG_NAME = "((?:{}|[{}])*)".format( 

75 "%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + UNRESERVED_RE 

76) 

77# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, 

78IPv4_RE = r"([0-9]{1,3}\.){3}[0-9]{1,3}" 

79# Hexadecimal characters used in each piece of an IPv6 address 

80HEXDIG_RE = "[0-9A-Fa-f]{1,4}" 

81# Least-significant 32 bits of an IPv6 address 

82LS32_RE = "({hex}:{hex}|{ipv4})".format(hex=HEXDIG_RE, ipv4=IPv4_RE) 

83# Substitutions into the following patterns for IPv6 patterns defined 

84# http://tools.ietf.org/html/rfc3986#page-20 

85_subs = {"hex": HEXDIG_RE, "ls32": LS32_RE} 

86 

87# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details 

88# about ABNF (Augmented Backus-Naur Form) use in the comments 

89variations = [ 

90 # 6( h16 ":" ) ls32 

91 "(%(hex)s:){6}%(ls32)s" % _subs, 

92 # "::" 5( h16 ":" ) ls32 

93 "::(%(hex)s:){5}%(ls32)s" % _subs, 

94 # [ h16 ] "::" 4( h16 ":" ) ls32 

95 "(%(hex)s)?::(%(hex)s:){4}%(ls32)s" % _subs, 

96 # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 

97 "((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s" % _subs, 

98 # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 

99 "((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s" % _subs, 

100 # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 

101 "((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s" % _subs, 

102 # [ *4( h16 ":" ) h16 ] "::" ls32 

103 "((%(hex)s:){0,4}%(hex)s)?::%(ls32)s" % _subs, 

104 # [ *5( h16 ":" ) h16 ] "::" h16 

105 "((%(hex)s:){0,5}%(hex)s)?::%(hex)s" % _subs, 

106 # [ *6( h16 ":" ) h16 ] "::" 

107 "((%(hex)s:){0,6}%(hex)s)?::" % _subs, 

108] 

109 

110IPv6_RE = "(({})|({})|({})|({})|({})|({})|({})|({})|({}))".format(*variations) 

111 

112IPv_FUTURE_RE = r"v[0-9A-Fa-f]+\.[%s]+" % ( 

113 UNRESERVED_RE + SUB_DELIMITERS_RE + ":" 

114) 

115 

116# RFC 6874 Zone ID ABNF 

117ZONE_ID = "(?:[" + UNRESERVED_RE + "]|" + PCT_ENCODED + ")+" 

118 

119IPv6_ADDRZ_RFC4007_RE = IPv6_RE + "(?:(?:%25|%)" + ZONE_ID + ")?" 

120IPv6_ADDRZ_RE = IPv6_RE + "(?:%25" + ZONE_ID + ")?" 

121 

122IP_LITERAL_RE = r"\[({}|{})\]".format( 

123 IPv6_ADDRZ_RFC4007_RE, 

124 IPv_FUTURE_RE, 

125) 

126 

127# Pattern for matching the host piece of the authority 

128HOST_RE = HOST_PATTERN = "({}|{}|{})".format( 

129 REG_NAME, 

130 IPv4_RE, 

131 IP_LITERAL_RE, 

132) 

133USERINFO_RE = ( 

134 "^([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":]|%s)+" % (PCT_ENCODED) 

135) 

136PORT_RE = "[0-9]{1,5}" 

137 

138# #################### 

139# Path Matcher Section 

140# #################### 

141 

142# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information 

143# about the path patterns defined below. 

144segments = { 

145 "segment": PCHAR + "*", 

146 # Non-zero length segment 

147 "segment-nz": PCHAR + "+", 

148 # Non-zero length segment without ":" 

149 "segment-nz-nc": PCHAR.replace(":", "") + "+", 

150} 

151 

152# Path types taken from Section 3.3 (linked above) 

153PATH_EMPTY = "(?:)" 

154PATH_ROOTLESS = "%(segment-nz)s(/%(segment)s)*" % segments 

155PATH_NOSCHEME = "%(segment-nz-nc)s(/%(segment)s)*" % segments 

156PATH_ABSOLUTE = "/(%s)?" % PATH_ROOTLESS 

157PATH_ABEMPTY = "(/%(segment)s)*" % segments 

158PATH_RE = "^({}|{}|{}|{}|{})$".format( 

159 PATH_ABEMPTY, 

160 PATH_ABSOLUTE, 

161 PATH_NOSCHEME, 

162 PATH_ROOTLESS, 

163 PATH_EMPTY, 

164) 

165 

166FRAGMENT_RE = QUERY_RE = ( 

167 "^([/?:@" + UNRESERVED_RE + SUB_DELIMITERS_RE + "]|%s)*$" % PCT_ENCODED 

168) 

169 

170# ########################## 

171# Relative reference matcher 

172# ########################## 

173 

174# See http://tools.ietf.org/html/rfc3986#section-4.2 for details 

175RELATIVE_PART_RE = "(//{}{}|{}|{}|{})".format( 

176 COMPONENT_PATTERN_DICT["authority"], 

177 PATH_ABEMPTY, 

178 PATH_ABSOLUTE, 

179 PATH_NOSCHEME, 

180 PATH_EMPTY, 

181) 

182 

183# See http://tools.ietf.org/html/rfc3986#section-3 for definition 

184HIER_PART_RE = "(//{}{}|{}|{}|{})".format( 

185 COMPONENT_PATTERN_DICT["authority"], 

186 PATH_ABEMPTY, 

187 PATH_ABSOLUTE, 

188 PATH_ROOTLESS, 

189 PATH_EMPTY, 

190) 

191 

192# ############### 

193# IRIs / RFC 3987 

194# ############### 

195 

196# Only wide-unicode gets the high-ranges of UCSCHAR 

197if sys.maxunicode > 0xFFFF: # pragma: no cover 

198 IPRIVATE = "\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD" 

199 UCSCHAR_RE = ( 

200 "\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF" 

201 "\U00010000-\U0001FFFD\U00020000-\U0002FFFD" 

202 "\U00030000-\U0003FFFD\U00040000-\U0004FFFD" 

203 "\U00050000-\U0005FFFD\U00060000-\U0006FFFD" 

204 "\U00070000-\U0007FFFD\U00080000-\U0008FFFD" 

205 "\U00090000-\U0009FFFD\U000A0000-\U000AFFFD" 

206 "\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD" 

207 "\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD" 

208 ) 

209else: # pragma: no cover 

210 IPRIVATE = "\uE000-\uF8FF" 

211 UCSCHAR_RE = "\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF" 

212 

213IUNRESERVED_RE = "A-Za-z0-9\\._~\\-" + UCSCHAR_RE 

214IPCHAR = "([" + IUNRESERVED_RE + SUB_DELIMITERS_RE + ":@]|%s)" % PCT_ENCODED 

215 

216isegments = { 

217 "isegment": IPCHAR + "*", 

218 # Non-zero length segment 

219 "isegment-nz": IPCHAR + "+", 

220 # Non-zero length segment without ":" 

221 "isegment-nz-nc": IPCHAR.replace(":", "") + "+", 

222} 

223 

224IPATH_ROOTLESS = "%(isegment-nz)s(/%(isegment)s)*" % isegments 

225IPATH_NOSCHEME = "%(isegment-nz-nc)s(/%(isegment)s)*" % isegments 

226IPATH_ABSOLUTE = "/(?:%s)?" % IPATH_ROOTLESS 

227IPATH_ABEMPTY = "(?:/%(isegment)s)*" % isegments 

228IPATH_RE = "^(?:{}|{}|{}|{}|{})$".format( 

229 IPATH_ABEMPTY, 

230 IPATH_ABSOLUTE, 

231 IPATH_NOSCHEME, 

232 IPATH_ROOTLESS, 

233 PATH_EMPTY, 

234) 

235 

236IREGULAR_NAME_RE = IREG_NAME = "(?:{}|[{}])*".format( 

237 "%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + IUNRESERVED_RE 

238) 

239 

240IHOST_RE = IHOST_PATTERN = "({}|{}|{})".format( 

241 IREG_NAME, 

242 IPv4_RE, 

243 IP_LITERAL_RE, 

244) 

245 

246IUSERINFO_RE = ( 

247 "^(?:[" + IUNRESERVED_RE + SUB_DELIMITERS_RE + ":]|%s)+" % (PCT_ENCODED) 

248) 

249 

250IFRAGMENT_RE = ( 

251 "^(?:[/?:@" + IUNRESERVED_RE + SUB_DELIMITERS_RE + "]|%s)*$" % PCT_ENCODED 

252) 

253IQUERY_RE = ( 

254 "^(?:[/?:@" 

255 + IUNRESERVED_RE 

256 + SUB_DELIMITERS_RE 

257 + IPRIVATE 

258 + "]|%s)*$" % PCT_ENCODED 

259) 

260 

261IRELATIVE_PART_RE = "(//{}{}|{}|{}|{})".format( 

262 COMPONENT_PATTERN_DICT["authority"], 

263 IPATH_ABEMPTY, 

264 IPATH_ABSOLUTE, 

265 IPATH_NOSCHEME, 

266 PATH_EMPTY, 

267) 

268 

269IHIER_PART_RE = "(//{}{}|{}|{}|{})".format( 

270 COMPONENT_PATTERN_DICT["authority"], 

271 IPATH_ABEMPTY, 

272 IPATH_ABSOLUTE, 

273 IPATH_ROOTLESS, 

274 PATH_EMPTY, 

275)