Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/validators/domain.py: 68%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

37 statements  

1"""Domain.""" 

2 

3# standard 

4from os import environ 

5from pathlib import Path 

6import re 

7from typing import Optional, Set 

8 

9# local 

10from .utils import validator 

11 

12 

13class _IanaTLD: 

14 """Read IANA TLDs, and optionally cache them.""" 

15 

16 _full_cache: Optional[Set[str]] = None 

17 # source: https://www.statista.com/statistics/265677 

18 _popular_cache = {"COM", "ORG", "RU", "DE", "NET", "BR", "UK", "JP", "FR", "IT"} 

19 _popular_cache.add("ONION") 

20 

21 @classmethod 

22 def _retrieve(cls): 

23 with Path(__file__).parent.joinpath("_tld.txt").open() as tld_f: 

24 _ = next(tld_f) # ignore the first line 

25 for line in tld_f: 

26 yield line.strip() 

27 

28 @classmethod 

29 def check(cls, tld: str): 

30 if tld in cls._popular_cache: 

31 return True 

32 if cls._full_cache is None: 

33 if environ.get("PYVLD_CACHE_TLD") == "True": 

34 cls._full_cache = set(cls._retrieve()) 

35 else: 

36 return tld in cls._retrieve() 

37 return tld in cls._full_cache 

38 

39 

40@validator 

41def domain( 

42 value: str, /, *, consider_tld: bool = False, rfc_1034: bool = False, rfc_2782: bool = False 

43): 

44 """Return whether or not given value is a valid domain. 

45 

46 Examples: 

47 >>> domain('example.com') 

48 True 

49 >>> domain('example.com/') 

50 ValidationError(func=domain, args={'value': 'example.com/'}) 

51 >>> # Supports IDN domains as well:: 

52 >>> domain('xn----gtbspbbmkef.xn--p1ai') 

53 True 

54 

55 Args: 

56 value: 

57 Domain string to validate. 

58 consider_tld: 

59 Restrict domain to TLDs allowed by IANA. 

60 rfc_1034: 

61 Allows optional trailing dot in the domain name. 

62 Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034). 

63 rfc_2782: 

64 Domain name is of type service record. 

65 Allows optional underscores in the domain name. 

66 Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782). 

67 

68 

69 Returns: 

70 (Literal[True]): If `value` is a valid domain name. 

71 (ValidationError): If `value` is an invalid domain name. 

72 

73 Raises: 

74 (UnicodeError): If `value` cannot be encoded into `idna` or decoded into `utf-8`. 

75 """ 

76 if not value: 

77 return False 

78 

79 if consider_tld and not _IanaTLD.check(value.rstrip(".").rsplit(".", 1)[-1].upper()): 

80 return False 

81 

82 try: 

83 service_record = r"_" if rfc_2782 else "" 

84 trailing_dot = r"\.?$" if rfc_1034 else r"$" 

85 

86 return not re.search(r"\s|__+", value) and re.match( 

87 # First character of the domain 

88 rf"^(?:[a-z0-9{service_record}]" 

89 # Sub-domain 

90 + rf"(?:[a-z0-9-{service_record}]{{0,61}}" 

91 # Hostname 

92 + rf"[a-z0-9{service_record}])?\.)" 

93 # First 61 characters of the gTLD 

94 + r"+[a-z0-9][a-z0-9-_]{0,61}" 

95 # Last character of the gTLD 

96 + rf"[a-z]{trailing_dot}", 

97 value.encode("idna").decode("utf-8"), 

98 re.IGNORECASE, 

99 ) 

100 except UnicodeError as err: 

101 raise UnicodeError(f"Unable to encode/decode {value}") from err