Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/email_validator/validate_email.py: 75%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

95 statements  

1from typing import Optional, Union, TYPE_CHECKING 

2import unicodedata 

3 

4from .exceptions import EmailSyntaxError 

5from .types import ValidatedEmail 

6from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length 

7from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES 

8 

9if TYPE_CHECKING: 

10 import dns.resolver 

11 _Resolver = dns.resolver.Resolver 

12else: 

13 _Resolver = object 

14 

15 

16def validate_email( 

17 email: Union[str, bytes], 

18 /, # prior arguments are positional-only 

19 *, # subsequent arguments are keyword-only 

20 allow_smtputf8: Optional[bool] = None, 

21 allow_empty_local: Optional[bool] = None, 

22 allow_quoted_local: Optional[bool] = None, 

23 allow_domain_literal: Optional[bool] = None, 

24 allow_display_name: Optional[bool] = None, 

25 strict: Optional[bool] = None, 

26 check_deliverability: Optional[bool] = None, 

27 test_environment: Optional[bool] = None, 

28 globally_deliverable: Optional[bool] = None, 

29 timeout: Optional[int] = None, 

30 dns_resolver: Optional[_Resolver] = None 

31) -> ValidatedEmail: 

32 """ 

33 Given an email address, and some options, returns a ValidatedEmail instance 

34 with information about the address if it is valid or, if the address is not 

35 valid, raises an EmailNotValidError. This is the main function of the module. 

36 """ 

37 

38 # Fill in default values of arguments. 

39 from . import ALLOW_SMTPUTF8, ALLOW_EMPTY_LOCAL, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \ 

40 STRICT, GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT 

41 if allow_smtputf8 is None: 

42 allow_smtputf8 = ALLOW_SMTPUTF8 

43 if allow_empty_local is None: 

44 allow_empty_local = ALLOW_EMPTY_LOCAL 

45 if allow_quoted_local is None: 

46 allow_quoted_local = ALLOW_QUOTED_LOCAL 

47 if allow_domain_literal is None: 

48 allow_domain_literal = ALLOW_DOMAIN_LITERAL 

49 if allow_display_name is None: 

50 allow_display_name = ALLOW_DISPLAY_NAME 

51 if strict is None: 

52 strict = STRICT 

53 if check_deliverability is None: 

54 check_deliverability = CHECK_DELIVERABILITY 

55 if test_environment is None: 

56 test_environment = TEST_ENVIRONMENT 

57 if globally_deliverable is None: 

58 globally_deliverable = GLOBALLY_DELIVERABLE 

59 if timeout is None and dns_resolver is None: 

60 timeout = DEFAULT_TIMEOUT 

61 

62 if isinstance(email, str): 

63 pass 

64 elif isinstance(email, bytes): 

65 # Allow email to be a bytes instance as if it is what 

66 # will be transmitted on the wire. But assume SMTPUTF8 

67 # is unavailable, so it must be ASCII. 

68 try: 

69 email = email.decode("ascii") 

70 except ValueError as e: 

71 raise EmailSyntaxError("The email address is not valid ASCII.") from e 

72 else: 

73 raise TypeError("email must be str or bytes") 

74 

75 # Split the address into the display name (or None), the local part 

76 # (before the @-sign), and the domain part (after the @-sign). 

77 # Normally, there is only one @-sign. But the awkward "quoted string" 

78 # local part form (RFC 5321 4.1.2) allows @-signs in the local 

79 # part if the local part is quoted. 

80 display_name, local_part, domain_part, is_quoted_local_part \ 

81 = split_email(email) 

82 

83 if display_name: 

84 # UTS #39 3.3 Email Security Profiles for Identifiers requires 

85 # display names (incorrectly called "quoted-string-part" there) 

86 # to be NFC normalized. Since these are not a part of what we 

87 # are really validating, we won't check that the input was NFC 

88 # normalized, but we'll normalize in output. 

89 display_name = unicodedata.normalize("NFC", display_name) 

90 

91 # Collect return values in this instance. 

92 ret = ValidatedEmail() 

93 ret.original = ((local_part if not is_quoted_local_part 

94 else ('"' + local_part + '"')) 

95 + "@" + domain_part) # drop the display name, if any, for email length tests at the end 

96 ret.display_name = display_name 

97 

98 # Validate the email address's local part syntax and get a normalized form. 

99 # If the original address was quoted and the decoded local part is a valid 

100 # unquoted local part, then we'll get back a normalized (unescaped) local 

101 # part. 

102 local_part_info = validate_email_local_part(local_part, 

103 allow_smtputf8=allow_smtputf8, 

104 allow_empty_local=allow_empty_local, 

105 quoted_local_part=is_quoted_local_part, 

106 strict=strict) 

107 ret.local_part = local_part_info["local_part"] 

108 ret.ascii_local_part = local_part_info["ascii_local_part"] 

109 ret.smtputf8 = local_part_info["smtputf8"] 

110 

111 # RFC 6532 section 3.1 says that Unicode NFC normalization should be applied, 

112 # so we'll return the NFC-normalized local part. Since the caller may use that 

113 # string in place of the original string, ensure it is also valid. 

114 # 

115 # UTS #39 3.3 Email Security Profiles for Identifiers requires local parts 

116 # to be NFKC normalized, which loses some information in characters that can 

117 # be decomposed. We might want to consider applying NFKC normalization, but 

118 # we can't make the change easily because it would break database lookups 

119 # for any caller that put a normalized address from a previous version of 

120 # this library. (UTS #39 seems to require that the *input* be NKFC normalized 

121 # and has other requirements that are hard to check without additional Unicode 

122 # data, and I don't know whether the rules really apply in the wild.) 

123 normalized_local_part = unicodedata.normalize("NFC", ret.local_part) 

124 if normalized_local_part != ret.local_part: 

125 try: 

126 validate_email_local_part(normalized_local_part, 

127 allow_smtputf8=allow_smtputf8, 

128 allow_empty_local=allow_empty_local, 

129 quoted_local_part=is_quoted_local_part, 

130 strict=strict) 

131 except EmailSyntaxError as e: 

132 raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e 

133 ret.local_part = normalized_local_part 

134 

135 # If a quoted local part isn't allowed but is present, now raise an exception. 

136 # This is done after any exceptions raised by validate_email_local_part so 

137 # that mandatory checks have highest precedence. 

138 if is_quoted_local_part and not allow_quoted_local: 

139 raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.") 

140 

141 # Some local parts are required to be case-insensitive, so we should normalize 

142 # to lowercase. 

143 # RFC 2142 

144 if ret.ascii_local_part is not None \ 

145 and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \ 

146 and ret.local_part is not None: 

147 ret.ascii_local_part = ret.ascii_local_part.lower() 

148 ret.local_part = ret.local_part.lower() 

149 

150 # Validate the email address's domain part syntax and get a normalized form. 

151 is_domain_literal = False 

152 if len(domain_part) == 0: 

153 raise EmailSyntaxError("There must be something after the @-sign.") 

154 

155 elif domain_part.startswith("[") and domain_part.endswith("]"): 

156 # Parse the address in the domain literal and get back a normalized domain. 

157 domain_literal_info = validate_email_domain_literal(domain_part[1:-1]) 

158 if not allow_domain_literal: 

159 raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.") 

160 ret.domain = domain_literal_info["domain"] 

161 ret.ascii_domain = domain_literal_info["domain"] # Domain literals are always ASCII. 

162 ret.domain_address = domain_literal_info["domain_address"] 

163 is_domain_literal = True # Prevent deliverability checks. 

164 

165 else: 

166 # Check the syntax of the domain and get back a normalized 

167 # internationalized and ASCII form. 

168 domain_name_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable) 

169 ret.domain = domain_name_info["domain"] 

170 ret.ascii_domain = domain_name_info["ascii_domain"] 

171 

172 # Construct the complete normalized form. 

173 ret.normalized = ret.local_part + "@" + ret.domain 

174 

175 # If the email address has an ASCII form, add it. 

176 if not ret.smtputf8: 

177 if not ret.ascii_domain: 

178 raise Exception("Missing ASCII domain.") 

179 ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain 

180 else: 

181 ret.ascii_email = None 

182 

183 # Check the length of the address. 

184 validate_email_length(ret) 

185 

186 # Check that a display name is permitted. It's the last syntax check 

187 # because we always check against optional parsing features last. 

188 if display_name is not None and not allow_display_name: 

189 raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.") 

190 

191 if check_deliverability and not test_environment: 

192 # Validate the email address's deliverability using DNS 

193 # and update the returned ValidatedEmail object with metadata. 

194 

195 if is_domain_literal: 

196 # There is nothing to check --- skip deliverability checks. 

197 return ret 

198 

199 # Lazy load `deliverability` as it is slow to import (due to dns.resolver) 

200 from .deliverability import validate_email_deliverability 

201 deliverability_info = validate_email_deliverability( 

202 ret.ascii_domain, ret.domain, timeout, dns_resolver 

203 ) 

204 mx = deliverability_info.get("mx") 

205 if mx is not None: 

206 ret.mx = mx 

207 ret.mx_fallback_type = deliverability_info.get("mx_fallback_type") 

208 

209 return ret