Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/rfc3986_validator.py: 72%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

18 statements  

1import re 

2 

3__version__ = '0.1.1' 

4__author__ = 'Nicolas Aimetti <naimetti@onapsis.com>' 

5__all__ = ['validate_rfc3986'] 

6 

7# Following regex rules references the ABNF terminology from 

8# [RFC3986](https://tools.ietf.org/html/rfc3986#appendix-A) 

9 

10 

11# IPv6 validation rule 

12IPv6_RE = ( 

13 r"(?:(?:[0-9A-Fa-f]{1,4}:){6}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][" 

14 r"0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|::(?:[0-9A-Fa-f]{1,4}:){5}(?:[0-9A-Fa-f]{1," 

15 r"4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][" 

16 r"0-9]?))|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[" 

17 r"0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1," 

18 r"4}:)?[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][" 

19 r"0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){," 

20 r"2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][" 

21 r"0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){," 

22 r"3}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:)(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[" 

23 r"01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){,4}[0-9A-Fa-f]{1," 

24 r"4})?::(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[" 

25 r"0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:[" 

26 r"0-9A-Fa-f]{1,4}:){,6}[0-9A-Fa-f]{1,4})?::)" 

27) 

28 

29 

30# An authority is defined as: [ userinfo "@" ] host [ ":" port ] 

31# \[(?:{ip_v6} | v[0-9A-Fa-f]+\.[a-zA-Z0-9_.~\-!$ & '()*+,;=:]+)\] # IP-literal 

32AUTHORITY_RE = r""" 

33 (?:(?:[a-zA-Z0-9_.~\-!$&'()*+,;=:]|%[0-9A-Fa-f]{{2}})*@)? # user info 

34 (?: 

35 \[(?:{ip_v6}|v[0-9A-Fa-f]+\.[a-zA-Z0-9_.~\-!$&'()*+,;=:]+)\] # IP-literal 

36 | (?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){{3}}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) # IPv4 

37 | (?:[a-zA-Z0-9_.~\-!$&'()*+,;=]|%[0-9A-Fa-f]{{2}})* # reg-name 

38 ) # host 

39 (?::[0-9]*)? # port 

40""".format(ip_v6=IPv6_RE,) 

41# Path char regex rule 

42PCHAR_RE = r"(?:[a-zA-Z0-9_.~\-!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})" 

43# Query and Fragment rules are exactly the same 

44QUERY_RE = r"(?:[a-zA-Z0-9_.~\-!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*" 

45# An URI is defined as: scheme ":" hier-part [ "?" query ] [ "#" fragment ] 

46URI_RE = r""" 

47 [a-zA-Z][a-zA-Z0-9+.-]* #scheme 

48 : 

49 (?: 

50 // 

51 {authority} 

52 (?:/{pchar}*)* # path-abempty 

53 | /(?:{pchar}+ (?:/{pchar}*)*)? # path-absolute 

54 | {pchar}+ (?:/{pchar}*)* # path-rootless 

55 | # or nothing 

56 ) # hier-part 

57 (?:\?{query})? # Query 

58 (?:\#{fragment})? # Fragment 

59""".format( 

60 authority=AUTHORITY_RE, 

61 query=QUERY_RE, 

62 fragment=QUERY_RE, 

63 pchar=PCHAR_RE 

64) 

65 

66# A relative-ref is defined as: relative-part [ "?" query ] [ "#" fragment ] 

67RELATIVE_REF_RE = r""" 

68 (?: 

69 // 

70 {authority} 

71 (?:/{pchar}*)* # path-abempty 

72 | /(?:{pchar}+ (?:/{pchar}*)*)? # path-absolute 

73 | (?:[a-zA-Z0-9_.~\-!$&'()*+,;=@]|%[0-9A-Fa-f]{{2}})+ (?:/{pchar}*)* # path-noscheme 

74 | # or nothing 

75 ) # relative-part 

76 (?:\?{query})? # Query 

77 (?:\#{fragment})? # Fragment 

78""".format( 

79 authority=AUTHORITY_RE, 

80 query=QUERY_RE, 

81 fragment=QUERY_RE, 

82 pchar=PCHAR_RE 

83) 

84# Compiled URI regex rule 

85URI_RE_COMP = re.compile(r"^{uri_re}$".format(uri_re=URI_RE), re.VERBOSE) 

86# Compiled URI-reference regex rule. URI-reference is defined as: URI / relative-ref 

87URI_REF_RE_COMP = re.compile(r"^(?:{uri_re}|{relative_ref})$".format( 

88 uri_re=URI_RE, 

89 relative_ref=RELATIVE_REF_RE, 

90), re.VERBOSE) 

91 

92 

93def validate_rfc3986(url, rule='URI'): 

94 """ 

95 Validates strings according to RFC3986 

96 

97 :param url: String cointaining URI to validate 

98 :param rule: It could be 'URI' (default) or 'URI_reference'. 

99 :return: True or False 

100 """ 

101 if rule == 'URI': 

102 return URI_RE_COMP.match(url) 

103 elif rule == 'URI_reference': 

104 return URI_REF_RE_COMP.match(url) 

105 else: 

106 raise ValueError('Invalid rule')