Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/email_validator/validate_email.py: 64%

85 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:32 +0000

1from typing import Optional, Union 

2 

3from .exceptions_types import EmailSyntaxError, ValidatedEmail 

4from .syntax import validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, get_length_reason 

5from .rfc_constants import EMAIL_MAX_LENGTH, QUOTED_LOCAL_PART_ADDR, CASE_INSENSITIVE_MAILBOX_NAMES 

6 

7 

8def validate_email( 

9 email: Union[str, bytes], 

10 # /, # not supported in Python 3.6, 3.7 

11 *, 

12 allow_smtputf8: Optional[bool] = None, 

13 allow_empty_local: bool = False, 

14 allow_quoted_local: Optional[bool] = None, 

15 allow_domain_literal: Optional[bool] = None, 

16 check_deliverability: Optional[bool] = None, 

17 test_environment: Optional[bool] = None, 

18 globally_deliverable: Optional[bool] = None, 

19 timeout: Optional[int] = None, 

20 dns_resolver: Optional[object] = None 

21) -> ValidatedEmail: 

22 """ 

23 Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of 

24 information when the address is valid. The email argument can be a str or a bytes instance, 

25 but if bytes it must be ASCII-only. This is the main method of this library. 

26 """ 

27 

28 # Fill in default values of arguments. 

29 from . import ALLOW_SMTPUTF8, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, \ 

30 GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT 

31 if allow_smtputf8 is None: 

32 allow_smtputf8 = ALLOW_SMTPUTF8 

33 if allow_quoted_local is None: 

34 allow_quoted_local = ALLOW_QUOTED_LOCAL 

35 if allow_domain_literal is None: 

36 allow_domain_literal = ALLOW_DOMAIN_LITERAL 

37 if check_deliverability is None: 

38 check_deliverability = CHECK_DELIVERABILITY 

39 if test_environment is None: 

40 test_environment = TEST_ENVIRONMENT 

41 if globally_deliverable is None: 

42 globally_deliverable = GLOBALLY_DELIVERABLE 

43 if timeout is None and dns_resolver is None: 

44 timeout = DEFAULT_TIMEOUT 

45 

46 # Allow email to be a str or bytes instance. If bytes, 

47 # it must be ASCII because that's how the bytes work 

48 # on the wire with SMTP. 

49 if not isinstance(email, str): 

50 try: 

51 email = email.decode("ascii") 

52 except ValueError: 

53 raise EmailSyntaxError("The email address is not valid ASCII.") 

54 

55 # Typical email addresses have a single @-sign, but the 

56 # awkward "quoted string" local part form (RFC 5321 4.1.2) 

57 # allows @-signs (and escaped quotes) to appear in the local 

58 # part if the local part is quoted. If the address is quoted, 

59 # split it at a non-escaped @-sign and unescape the escaping. 

60 quoted_local_part = False 

61 m = QUOTED_LOCAL_PART_ADDR.match(email) 

62 if m: 

63 quoted_local_part = True 

64 local_part, domain_part = m.groups() 

65 

66 # Remove backslashes. 

67 import re 

68 local_part = re.sub(r"\\(.)", "\\1", local_part) 

69 

70 else: 

71 # Split at the one and only at-sign. 

72 parts = email.split('@') 

73 if len(parts) != 2: 

74 raise EmailSyntaxError("The email address is not valid. It must have exactly one @-sign.") 

75 local_part, domain_part = parts 

76 

77 # Collect return values in this instance. 

78 ret = ValidatedEmail() 

79 ret.original = email 

80 

81 # Validate the email address's local part syntax and get a normalized form. 

82 # If the original address was quoted and the decoded local part is a valid 

83 # unquoted local part, then we'll get back a normalized (unescaped) local 

84 # part. 

85 local_part_info = validate_email_local_part(local_part, 

86 allow_smtputf8=allow_smtputf8, 

87 allow_empty_local=allow_empty_local, 

88 quoted_local_part=quoted_local_part) 

89 if quoted_local_part and not allow_quoted_local: 

90 raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.") 

91 ret.local_part = local_part_info["local_part"] 

92 ret.ascii_local_part = local_part_info["ascii_local_part"] 

93 ret.smtputf8 = local_part_info["smtputf8"] 

94 

95 # Some local parts are required to be case-insensitive, so we should normalize 

96 # to lowercase. 

97 # RFC 2142 

98 if ret.ascii_local_part is not None \ 

99 and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \ 

100 and ret.local_part is not None: 

101 ret.ascii_local_part = ret.ascii_local_part.lower() 

102 ret.local_part = ret.local_part.lower() 

103 

104 # Validate the email address's domain part syntax and get a normalized form. 

105 is_domain_literal = False 

106 if len(domain_part) == 0: 

107 raise EmailSyntaxError("There must be something after the @-sign.") 

108 

109 elif domain_part.startswith("[") and domain_part.endswith("]"): 

110 # Parse the address in the domain literal and get back a normalized domain. 

111 domain_part_info = validate_email_domain_literal(domain_part[1:-1], allow_domain_literal=allow_domain_literal) 

112 ret.domain = domain_part_info["domain"] 

113 ret.ascii_domain = domain_part_info["domain"] # Domain literals are always ASCII. 

114 ret.domain_address = domain_part_info["domain_address"] 

115 is_domain_literal = True # Prevent deliverability checks. 

116 

117 else: 

118 # Check the syntax of the domain and get back a normalized 

119 # internationalized and ASCII form. 

120 domain_part_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable) 

121 ret.domain = domain_part_info["domain"] 

122 ret.ascii_domain = domain_part_info["ascii_domain"] 

123 

124 # Construct the complete normalized form. 

125 ret.normalized = ret.local_part + "@" + ret.domain 

126 

127 # If the email address has an ASCII form, add it. 

128 if not ret.smtputf8: 

129 if not ret.ascii_domain: 

130 raise Exception("Missing ASCII domain.") 

131 ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain 

132 else: 

133 ret.ascii_email = None 

134 

135 # If the email address has an ASCII representation, then we assume it may be 

136 # transmitted in ASCII (we can't assume SMTPUTF8 will be used on all hops to 

137 # the destination) and the length limit applies to ASCII characters (which is 

138 # the same as octets). The number of characters in the internationalized form 

139 # may be many fewer (because IDNA ASCII is verbose) and could be less than 254 

140 # Unicode characters, and of course the number of octets over the limit may 

141 # not be the number of characters over the limit, so if the email address is 

142 # internationalized, we can't give any simple information about why the address 

143 # is too long. 

144 # 

145 # In addition, check that the UTF-8 encoding (i.e. not IDNA ASCII and not 

146 # Unicode characters) is at most 254 octets. If the addres is transmitted using 

147 # SMTPUTF8, then the length limit probably applies to the UTF-8 encoded octets. 

148 # If the email address has an ASCII form that differs from its internationalized 

149 # form, I don't think the internationalized form can be longer, and so the ASCII 

150 # form length check would be sufficient. If there is no ASCII form, then we have 

151 # to check the UTF-8 encoding. The UTF-8 encoding could be up to about four times 

152 # longer than the number of characters. 

153 # 

154 # See the length checks on the local part and the domain. 

155 if ret.ascii_email and len(ret.ascii_email) > EMAIL_MAX_LENGTH: 

156 if ret.ascii_email == ret.normalized: 

157 reason = get_length_reason(ret.ascii_email) 

158 elif len(ret.normalized) > EMAIL_MAX_LENGTH: 

159 # If there are more than 254 characters, then the ASCII 

160 # form is definitely going to be too long. 

161 reason = get_length_reason(ret.normalized, utf8=True) 

162 else: 

163 reason = "(when converted to IDNA ASCII)" 

164 raise EmailSyntaxError(f"The email address is too long {reason}.") 

165 if len(ret.normalized.encode("utf8")) > EMAIL_MAX_LENGTH: 

166 if len(ret.normalized) > EMAIL_MAX_LENGTH: 

167 # If there are more than 254 characters, then the UTF-8 

168 # encoding is definitely going to be too long. 

169 reason = get_length_reason(ret.normalized, utf8=True) 

170 else: 

171 reason = "(when encoded in bytes)" 

172 raise EmailSyntaxError(f"The email address is too long {reason}.") 

173 

174 if check_deliverability and not test_environment: 

175 # Validate the email address's deliverability using DNS 

176 # and update the return dict with metadata. 

177 

178 if is_domain_literal: 

179 # There is nothing to check --- skip deliverability checks. 

180 return ret 

181 

182 # Lazy load `deliverability` as it is slow to import (due to dns.resolver) 

183 from .deliverability import validate_email_deliverability 

184 deliverability_info = validate_email_deliverability( 

185 ret.ascii_domain, ret.domain, timeout, dns_resolver 

186 ) 

187 for key, value in deliverability_info.items(): 

188 setattr(ret, key, value) 

189 

190 return ret