Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/validators/domain.py: 68%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Domain."""
3# standard
4from os import environ
5from pathlib import Path
6import re
7from typing import Optional, Set
9# local
10from .utils import validator
13class _IanaTLD:
14 """Read IANA TLDs, and optionally cache them."""
16 _full_cache: Optional[Set[str]] = None
17 # source: https://www.statista.com/statistics/265677
18 _popular_cache = {"COM", "ORG", "RU", "DE", "NET", "BR", "UK", "JP", "FR", "IT"}
19 _popular_cache.add("ONION")
21 @classmethod
22 def _retrieve(cls):
23 with Path(__file__).parent.joinpath("_tld.txt").open() as tld_f:
24 _ = next(tld_f) # ignore the first line
25 for line in tld_f:
26 yield line.strip()
28 @classmethod
29 def check(cls, tld: str):
30 if tld in cls._popular_cache:
31 return True
32 if cls._full_cache is None:
33 if environ.get("PYVLD_CACHE_TLD") == "True":
34 cls._full_cache = set(cls._retrieve())
35 else:
36 return tld in cls._retrieve()
37 return tld in cls._full_cache
40@validator
41def domain(
42 value: str, /, *, consider_tld: bool = False, rfc_1034: bool = False, rfc_2782: bool = False
43):
44 """Return whether or not given value is a valid domain.
46 Examples:
47 >>> domain('example.com')
48 True
49 >>> domain('example.com/')
50 ValidationError(func=domain, args={'value': 'example.com/'})
51 >>> # Supports IDN domains as well::
52 >>> domain('xn----gtbspbbmkef.xn--p1ai')
53 True
55 Args:
56 value:
57 Domain string to validate.
58 consider_tld:
59 Restrict domain to TLDs allowed by IANA.
60 rfc_1034:
61 Allows optional trailing dot in the domain name.
62 Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
63 rfc_2782:
64 Domain name is of type service record.
65 Allows optional underscores in the domain name.
66 Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782).
69 Returns:
70 (Literal[True]): If `value` is a valid domain name.
71 (ValidationError): If `value` is an invalid domain name.
73 Raises:
74 (UnicodeError): If `value` cannot be encoded into `idna` or decoded into `utf-8`.
75 """
76 if not value:
77 return False
79 if consider_tld and not _IanaTLD.check(value.rstrip(".").rsplit(".", 1)[-1].upper()):
80 return False
82 try:
83 service_record = r"_" if rfc_2782 else ""
84 trailing_dot = r"\.?$" if rfc_1034 else r"$"
86 return not re.search(r"\s|__+", value) and re.match(
87 # First character of the domain
88 rf"^(?:[a-z0-9{service_record}]"
89 # Sub-domain
90 + rf"(?:[a-z0-9-{service_record}]{{0,61}}"
91 # Hostname
92 + rf"[a-z0-9{service_record}])?\.)"
93 # First 61 characters of the gTLD
94 + r"+[a-z0-9][a-z0-9-_]{0,61}"
95 # Last character of the gTLD
96 + rf"[a-z]{trailing_dot}",
97 value.encode("idna").decode("utf-8"),
98 re.IGNORECASE,
99 )
100 except UnicodeError as err:
101 raise UnicodeError(f"Unable to encode/decode {value}") from err