Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/validators/url.py: 100%
52 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:08 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:08 +0000
1"""URL."""
2# -*- coding: utf-8 -*-
4# standard
5from urllib.parse import urlsplit, unquote
6from functools import lru_cache
7import re
9# local
10from .hostname import hostname
11from .utils import validator
14@lru_cache
15def _username_regex():
16 return re.compile(
17 # dot-atom
18 r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*$"
19 # non-quoted-string
20 + r"|^([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*$)",
21 re.IGNORECASE,
22 )
25@lru_cache
26def _path_regex():
27 return re.compile(
28 # allowed symbols
29 r"^[\/a-zA-Z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=\:\@\%"
30 # emoticons / emoji
31 + r"\U0001F600-\U0001F64F"
32 # multilingual unicode ranges
33 + r"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+$",
34 re.IGNORECASE,
35 )
38@lru_cache
39def _query_regex():
40 return re.compile(r"&?(\w+=?[^\s&]*)", re.IGNORECASE)
43def _validate_scheme(value: str):
44 """Validate scheme."""
45 # More schemes will be considered later.
46 return (
47 value in {"ftp", "ftps", "git", "http", "https", "rtsp", "sftp", "ssh", "telnet"}
48 if value
49 else False
50 )
53def _confirm_ipv6_skip(value: str, skip_ipv6_addr: bool):
54 """Confirm skip IPv6 check."""
55 return skip_ipv6_addr or value.count(":") < 2 or not value.startswith("[")
58def _validate_auth_segment(value: str):
59 """Validate authentication segment."""
60 if not value:
61 return True
62 if (colon_count := value.count(":")) > 1:
63 # everything before @ is then considered as a username
64 # this is a bad practice, but syntactically valid URL
65 return _username_regex().match(unquote(value))
66 if colon_count < 1:
67 return _username_regex().match(value)
68 username, password = value.rsplit(":", 1)
69 return _username_regex().match(username) and all(
70 char_to_avoid not in password for char_to_avoid in ("/", "?", "#", "@")
71 )
74def _validate_netloc(
75 value: str,
76 skip_ipv6_addr: bool,
77 skip_ipv4_addr: bool,
78 may_have_port: bool,
79 simple_host: bool,
80 rfc_1034: bool,
81 rfc_2782: bool,
82):
83 """Validate netloc."""
84 if not value or value.count("@") > 1:
85 return False
86 if value.count("@") < 1:
87 return hostname(
88 value
89 if _confirm_ipv6_skip(value, skip_ipv6_addr) or "]:" in value
90 else value.lstrip("[").replace("]", "", 1),
91 skip_ipv6_addr=_confirm_ipv6_skip(value, skip_ipv6_addr),
92 skip_ipv4_addr=skip_ipv4_addr,
93 may_have_port=may_have_port,
94 maybe_simple=simple_host,
95 rfc_1034=rfc_1034,
96 rfc_2782=rfc_2782,
97 )
98 basic_auth, host = value.rsplit("@", 1)
99 return hostname(
100 host
101 if _confirm_ipv6_skip(host, skip_ipv6_addr) or "]:" in value
102 else host.lstrip("[").replace("]", "", 1),
103 skip_ipv6_addr=_confirm_ipv6_skip(host, skip_ipv6_addr),
104 skip_ipv4_addr=skip_ipv4_addr,
105 may_have_port=may_have_port,
106 maybe_simple=simple_host,
107 rfc_1034=rfc_1034,
108 rfc_2782=rfc_2782,
109 ) and _validate_auth_segment(basic_auth)
112def _validate_optionals(path: str, query: str, fragment: str):
113 """Validate path query and fragments."""
114 optional_segments = True
115 if path:
116 optional_segments &= bool(_path_regex().match(path))
117 if query:
118 optional_segments &= bool(_query_regex().match(query))
119 if fragment:
120 optional_segments &= all(char_to_avoid not in fragment for char_to_avoid in ("/", "?"))
121 return optional_segments
124@validator
125def url(
126 value: str,
127 /,
128 *,
129 skip_ipv6_addr: bool = False,
130 skip_ipv4_addr: bool = False,
131 may_have_port: bool = True,
132 simple_host: bool = False,
133 rfc_1034: bool = False,
134 rfc_2782: bool = False,
135):
136 r"""Return whether or not given value is a valid URL.
138 This validator was inspired from [URL validator of dperini][1].
139 The following diagram is from [urlly][2].
141 foo://admin:hunter1@example.com:8042/over/there?name=ferret#nose
142 \_/ \___/ \_____/ \_________/ \__/\_________/ \_________/ \__/
143 | | | | | | | |
144 scheme username password hostname port path query fragment
146 [1]: https://gist.github.com/dperini/729294
147 [2]: https://github.com/treeform/urlly
149 Examples:
150 >>> url('http://duck.com')
151 # Output: True
152 >>> url('ftp://foobar.dk')
153 # Output: True
154 >>> url('http://10.0.0.1')
155 # Output: True
156 >>> url('http://example.com/">user@example.com')
157 # Output: ValidationFailure(func=url, ...)
159 Args:
160 value:
161 URL string to validate.
162 skip_ipv6_addr:
163 When URL string cannot contain an IPv6 address.
164 skip_ipv4_addr:
165 When URL string cannot contain an IPv4 address.
166 may_have_port:
167 URL string may contain port number.
168 simple_host:
169 URL string maybe only hyphens and alpha-numerals.
170 rfc_1034:
171 Allow trailing dot in domain/host name.
172 Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
173 rfc_2782:
174 Domain/Host name is of type service record.
175 Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782).
177 Returns:
178 (Literal[True]):
179 If `value` is a valid slug.
180 (ValidationFailure):
181 If `value` is an invalid slug.
183 Note:
184 - *In version 0.11.3*:
185 - Added support for URLs containing localhost.
186 - *In version 0.11.0*:
187 - Made the regular expression case insensitive.
188 - *In version 0.10.3*:
189 - Added a `public` parameter.
190 - *In version 0.10.2*:
191 - Added support for various exotic URLs.
192 - Fixed various false positives.
194 > *New in version 0.2.0*.
195 """
196 if not value or re.search(r"\s", value):
197 # url must not contain any white
198 # spaces, they must be encoded
199 return False
201 try:
202 scheme, netloc, path, query, fragment = urlsplit(value)
203 except ValueError:
204 return False
206 return (
207 _validate_scheme(scheme)
208 and _validate_netloc(
209 netloc,
210 skip_ipv6_addr,
211 skip_ipv4_addr,
212 may_have_port,
213 simple_host,
214 rfc_1034,
215 rfc_2782,
216 )
217 and _validate_optionals(path, query, fragment)
218 )