1"""
2Regex for URIs
3
4These regex are directly derived from the collected ABNF in RFC3986
5(except for DIGIT, ALPHA and HEXDIG, defined by RFC2234).
6
7They should be processed with re.VERBOSE.
8
9Thanks Mark Nottingham for this code - https://gist.github.com/138549
10"""
11import re
12
13# basics
14
15DIGIT = r"[\x30-\x39]"
16
17ALPHA = r"[\x41-\x5A\x61-\x7A]"
18
19HEXDIG = r"[\x30-\x39A-Fa-f]"
20
21# pct-encoded = "%" HEXDIG HEXDIG
22pct_encoded = r" %% %(HEXDIG)s %(HEXDIG)s" % locals()
23
24# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
25unreserved = r"(?: %(ALPHA)s | %(DIGIT)s | \- | \. | _ | ~ )" % locals()
26
27# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
28gen_delims = r"(?: : | / | \? | \# | \[ | \] | @ )"
29
30# sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
31# / "*" / "+" / "," / ";" / "="
32sub_delims = r"""(?: ! | \$ | & | ' | \( | \) |
33 \* | \+ | , | ; | = )"""
34
35# pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
36pchar = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s | : | @ )" % locals(
37)
38
39# reserved = gen-delims / sub-delims
40reserved = r"(?: %(gen_delims)s | %(sub_delims)s )" % locals()
41
42
43# scheme
44
45# scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
46scheme = r"%(ALPHA)s (?: %(ALPHA)s | %(DIGIT)s | \+ | \- | \. )*" % locals()
47
48
49# authority
50
51# dec-octet = DIGIT ; 0-9
52# / %x31-39 DIGIT ; 10-99
53# / "1" 2DIGIT ; 100-199
54# / "2" %x30-34 DIGIT ; 200-249
55# / "25" %x30-35 ; 250-255
56dec_octet = r"""(?: %(DIGIT)s |
57 [\x31-\x39] %(DIGIT)s |
58 1 %(DIGIT)s{2} |
59 2 [\x30-\x34] %(DIGIT)s |
60 25 [\x30-\x35]
61 )
62""" % locals()
63
64# IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
65IPv4address = r"%(dec_octet)s \. %(dec_octet)s \. %(dec_octet)s \. %(dec_octet)s" % locals(
66)
67
68# IPv6address
69IPv6address = r"([A-Fa-f0-9:]+[:$])[A-Fa-f0-9]{1,4}"
70
71# IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
72IPvFuture = r"v %(HEXDIG)s+ \. (?: %(unreserved)s | %(sub_delims)s | : )+" % locals()
73
74# IP-literal = "[" ( IPv6address / IPvFuture ) "]"
75IP_literal = r"\[ (?: %(IPv6address)s | %(IPvFuture)s ) \]" % locals()
76
77# reg-name = *( unreserved / pct-encoded / sub-delims )
78reg_name = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s )*" % locals()
79
80# userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
81userinfo = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s | : )" % locals(
82)
83
84# host = IP-literal / IPv4address / reg-name
85host = r"(?: %(IP_literal)s | %(IPv4address)s | %(reg_name)s )" % locals()
86
87# port = *DIGIT
88port = r"(?: %(DIGIT)s )*" % locals()
89
90# authority = [ userinfo "@" ] host [ ":" port ]
91authority = r"(?: %(userinfo)s @)? %(host)s (?: : %(port)s)?" % locals()
92
93# Path
94
95# segment = *pchar
96segment = r"%(pchar)s*" % locals()
97
98# segment-nz = 1*pchar
99segment_nz = r"%(pchar)s+" % locals()
100
101# segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
102# ; non-zero-length segment without any colon ":"
103segment_nz_nc = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s | @ )+" % locals()
104
105# path-abempty = *( "/" segment )
106path_abempty = r"(?: / %(segment)s )*" % locals()
107
108# path-absolute = "/" [ segment-nz *( "/" segment ) ]
109path_absolute = r"/ (?: %(segment_nz)s (?: / %(segment)s )* )?" % locals()
110
111# path-noscheme = segment-nz-nc *( "/" segment )
112path_noscheme = r"%(segment_nz_nc)s (?: / %(segment)s )*" % locals()
113
114# path-rootless = segment-nz *( "/" segment )
115path_rootless = r"%(segment_nz)s (?: / %(segment)s )*" % locals()
116
117# path-empty = 0<pchar>
118path_empty = r"" # FIXME
119
120# path = path-abempty ; begins with "/" or is empty
121# / path-absolute ; begins with "/" but not "//"
122# / path-noscheme ; begins with a non-colon segment
123# / path-rootless ; begins with a segment
124# / path-empty ; zero characters
125path = r"""(?: %(path_abempty)s |
126 %(path_absolute)s |
127 %(path_noscheme)s |
128 %(path_rootless)s |
129 %(path_empty)s
130 )
131""" % locals()
132
133### Query and Fragment
134
135# query = *( pchar / "/" / "?" )
136query = r"(?: %(pchar)s | / | \? )*" % locals()
137
138# fragment = *( pchar / "/" / "?" )
139fragment = r"(?: %(pchar)s | / | \? )*" % locals()
140
141# URIs
142
143# hier-part = "//" authority path-abempty
144# / path-absolute
145# / path-rootless
146# / path-empty
147hier_part = r"""(?: (?: // %(authority)s %(path_abempty)s ) |
148 %(path_absolute)s |
149 %(path_rootless)s |
150 %(path_empty)s
151 )
152""" % locals()
153
154# relative-part = "//" authority path-abempty
155# / path-absolute
156# / path-noscheme
157# / path-empty
158relative_part = r"""(?: (?: // %(authority)s %(path_abempty)s ) |
159 %(path_absolute)s |
160 %(path_noscheme)s |
161 %(path_empty)s
162 )
163""" % locals()
164
165# relative-ref = relative-part [ "?" query ] [ "#" fragment ]
166relative_ref = r"%(relative_part)s (?: \? %(query)s)? (?: \# %(fragment)s)?" % locals(
167)
168
169# URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
170URI = r"^(?: %(scheme)s : %(hier_part)s (?: \? %(query)s )? (?: \# %(fragment)s )? )$" % locals(
171)
172
173# URI-reference = URI / relative-ref
174URI_reference = r"^(?: %(URI)s | %(relative_ref)s )$" % locals()
175
176# absolute-URI = scheme ":" hier-part [ "?" query ]
177absolute_URI = r"^(?: %(scheme)s : %(hier_part)s (?: \? %(query)s )? )$" % locals() # noqa: N816
178
179
180def is_uri(uri):
181 return re.match(URI, uri, re.VERBOSE)
182
183
184def is_uri_reference(uri):
185 return re.match(URI_reference, uri, re.VERBOSE)
186
187
188def is_absolute_uri(uri):
189 return re.match(absolute_URI, uri, re.VERBOSE)