1import re
2
3__version__ = '0.1.1'
4__author__ = 'Nicolas Aimetti <naimetti@onapsis.com>'
5__all__ = ['validate_rfc3986']
6
7# Following regex rules references the ABNF terminology from
8# [RFC3986](https://tools.ietf.org/html/rfc3986#appendix-A)
9
10
11# IPv6 validation rule
12IPv6_RE = (
13 r"(?:(?:[0-9A-Fa-f]{1,4}:){6}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9]["
14 r"0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|::(?:[0-9A-Fa-f]{1,4}:){5}(?:[0-9A-Fa-f]{1,"
15 r"4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9]["
16 r"0-9]?))|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2["
17 r"0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,"
18 r"4}:)?[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4]["
19 r"0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){,"
20 r"2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4]["
21 r"0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){,"
22 r"3}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:)(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|["
23 r"01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){,4}[0-9A-Fa-f]{1,"
24 r"4})?::(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2["
25 r"0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:["
26 r"0-9A-Fa-f]{1,4}:){,6}[0-9A-Fa-f]{1,4})?::)"
27)
28
29
30# An authority is defined as: [ userinfo "@" ] host [ ":" port ]
31# \[(?:{ip_v6} | v[0-9A-Fa-f]+\.[a-zA-Z0-9_.~\-!$ & '()*+,;=:]+)\] # IP-literal
32AUTHORITY_RE = r"""
33 (?:(?:[a-zA-Z0-9_.~\-!$&'()*+,;=:]|%[0-9A-Fa-f]{{2}})*@)? # user info
34 (?:
35 \[(?:{ip_v6}|v[0-9A-Fa-f]+\.[a-zA-Z0-9_.~\-!$&'()*+,;=:]+)\] # IP-literal
36 | (?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){{3}}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) # IPv4
37 | (?:[a-zA-Z0-9_.~\-!$&'()*+,;=]|%[0-9A-Fa-f]{{2}})* # reg-name
38 ) # host
39 (?::[0-9]*)? # port
40""".format(ip_v6=IPv6_RE,)
41# Path char regex rule
42PCHAR_RE = r"(?:[a-zA-Z0-9_.~\-!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})"
43# Query and Fragment rules are exactly the same
44QUERY_RE = r"(?:[a-zA-Z0-9_.~\-!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*"
45# An URI is defined as: scheme ":" hier-part [ "?" query ] [ "#" fragment ]
46URI_RE = r"""
47 [a-zA-Z][a-zA-Z0-9+.-]* #scheme
48 :
49 (?:
50 //
51 {authority}
52 (?:/{pchar}*)* # path-abempty
53 | /(?:{pchar}+ (?:/{pchar}*)*)? # path-absolute
54 | {pchar}+ (?:/{pchar}*)* # path-rootless
55 | # or nothing
56 ) # hier-part
57 (?:\?{query})? # Query
58 (?:\#{fragment})? # Fragment
59""".format(
60 authority=AUTHORITY_RE,
61 query=QUERY_RE,
62 fragment=QUERY_RE,
63 pchar=PCHAR_RE
64)
65
66# A relative-ref is defined as: relative-part [ "?" query ] [ "#" fragment ]
67RELATIVE_REF_RE = r"""
68 (?:
69 //
70 {authority}
71 (?:/{pchar}*)* # path-abempty
72 | /(?:{pchar}+ (?:/{pchar}*)*)? # path-absolute
73 | (?:[a-zA-Z0-9_.~\-!$&'()*+,;=@]|%[0-9A-Fa-f]{{2}})+ (?:/{pchar}*)* # path-noscheme
74 | # or nothing
75 ) # relative-part
76 (?:\?{query})? # Query
77 (?:\#{fragment})? # Fragment
78""".format(
79 authority=AUTHORITY_RE,
80 query=QUERY_RE,
81 fragment=QUERY_RE,
82 pchar=PCHAR_RE
83)
84# Compiled URI regex rule
85URI_RE_COMP = re.compile(r"^{uri_re}$".format(uri_re=URI_RE), re.VERBOSE)
86# Compiled URI-reference regex rule. URI-reference is defined as: URI / relative-ref
87URI_REF_RE_COMP = re.compile(r"^(?:{uri_re}|{relative_ref})$".format(
88 uri_re=URI_RE,
89 relative_ref=RELATIVE_REF_RE,
90), re.VERBOSE)
91
92
93def validate_rfc3986(url, rule='URI'):
94 """
95 Validates strings according to RFC3986
96
97 :param url: String cointaining URI to validate
98 :param rule: It could be 'URI' (default) or 'URI_reference'.
99 :return: True or False
100 """
101 if rule == 'URI':
102 return URI_RE_COMP.match(url)
103 elif rule == 'URI_reference':
104 return URI_REF_RE_COMP.match(url)
105 else:
106 raise ValueError('Invalid rule')