1from typing import Optional, Union, TYPE_CHECKING
2import unicodedata
3
4from .exceptions import EmailSyntaxError
5from .types import ValidatedEmail
6from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length
7from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES
8
9if TYPE_CHECKING:
10 import dns.resolver
11 _Resolver = dns.resolver.Resolver
12else:
13 _Resolver = object
14
15
16def validate_email(
17 email: Union[str, bytes],
18 /, # prior arguments are positional-only
19 *, # subsequent arguments are keyword-only
20 allow_smtputf8: Optional[bool] = None,
21 allow_empty_local: Optional[bool] = None,
22 allow_quoted_local: Optional[bool] = None,
23 allow_domain_literal: Optional[bool] = None,
24 allow_display_name: Optional[bool] = None,
25 strict: Optional[bool] = None,
26 check_deliverability: Optional[bool] = None,
27 test_environment: Optional[bool] = None,
28 globally_deliverable: Optional[bool] = None,
29 timeout: Optional[int] = None,
30 dns_resolver: Optional[_Resolver] = None
31) -> ValidatedEmail:
32 """
33 Given an email address, and some options, returns a ValidatedEmail instance
34 with information about the address if it is valid or, if the address is not
35 valid, raises an EmailNotValidError. This is the main function of the module.
36 """
37
38 # Fill in default values of arguments.
39 from . import ALLOW_SMTPUTF8, ALLOW_EMPTY_LOCAL, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \
40 STRICT, GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
41 if allow_smtputf8 is None:
42 allow_smtputf8 = ALLOW_SMTPUTF8
43 if allow_empty_local is None:
44 allow_empty_local = ALLOW_EMPTY_LOCAL
45 if allow_quoted_local is None:
46 allow_quoted_local = ALLOW_QUOTED_LOCAL
47 if allow_domain_literal is None:
48 allow_domain_literal = ALLOW_DOMAIN_LITERAL
49 if allow_display_name is None:
50 allow_display_name = ALLOW_DISPLAY_NAME
51 if strict is None:
52 strict = STRICT
53 if check_deliverability is None:
54 check_deliverability = CHECK_DELIVERABILITY
55 if test_environment is None:
56 test_environment = TEST_ENVIRONMENT
57 if globally_deliverable is None:
58 globally_deliverable = GLOBALLY_DELIVERABLE
59 if timeout is None and dns_resolver is None:
60 timeout = DEFAULT_TIMEOUT
61
62 if isinstance(email, str):
63 pass
64 elif isinstance(email, bytes):
65 # Allow email to be a bytes instance as if it is what
66 # will be transmitted on the wire. But assume SMTPUTF8
67 # is unavailable, so it must be ASCII.
68 try:
69 email = email.decode("ascii")
70 except ValueError as e:
71 raise EmailSyntaxError("The email address is not valid ASCII.") from e
72 else:
73 raise TypeError("email must be str or bytes")
74
75 # Split the address into the display name (or None), the local part
76 # (before the @-sign), and the domain part (after the @-sign).
77 # Normally, there is only one @-sign. But the awkward "quoted string"
78 # local part form (RFC 5321 4.1.2) allows @-signs in the local
79 # part if the local part is quoted.
80 display_name, local_part, domain_part, is_quoted_local_part \
81 = split_email(email)
82
83 if display_name:
84 # UTS #39 3.3 Email Security Profiles for Identifiers requires
85 # display names (incorrectly called "quoted-string-part" there)
86 # to be NFC normalized. Since these are not a part of what we
87 # are really validating, we won't check that the input was NFC
88 # normalized, but we'll normalize in output.
89 display_name = unicodedata.normalize("NFC", display_name)
90
91 # Collect return values in this instance.
92 ret = ValidatedEmail()
93 ret.original = ((local_part if not is_quoted_local_part
94 else ('"' + local_part + '"'))
95 + "@" + domain_part) # drop the display name, if any, for email length tests at the end
96 ret.display_name = display_name
97
98 # Validate the email address's local part syntax and get a normalized form.
99 # If the original address was quoted and the decoded local part is a valid
100 # unquoted local part, then we'll get back a normalized (unescaped) local
101 # part.
102 local_part_info = validate_email_local_part(local_part,
103 allow_smtputf8=allow_smtputf8,
104 allow_empty_local=allow_empty_local,
105 quoted_local_part=is_quoted_local_part,
106 strict=strict)
107 ret.local_part = local_part_info["local_part"]
108 ret.ascii_local_part = local_part_info["ascii_local_part"]
109 ret.smtputf8 = local_part_info["smtputf8"]
110
111 # RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
112 # so we'll return the NFC-normalized local part. Since the caller may use that
113 # string in place of the original string, ensure it is also valid.
114 #
115 # UTS #39 3.3 Email Security Profiles for Identifiers requires local parts
116 # to be NFKC normalized, which loses some information in characters that can
117 # be decomposed. We might want to consider applying NFKC normalization, but
118 # we can't make the change easily because it would break database lookups
119 # for any caller that put a normalized address from a previous version of
120 # this library. (UTS #39 seems to require that the *input* be NKFC normalized
121 # and has other requirements that are hard to check without additional Unicode
122 # data, and I don't know whether the rules really apply in the wild.)
123 normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
124 if normalized_local_part != ret.local_part:
125 try:
126 validate_email_local_part(normalized_local_part,
127 allow_smtputf8=allow_smtputf8,
128 allow_empty_local=allow_empty_local,
129 quoted_local_part=is_quoted_local_part,
130 strict=strict)
131 except EmailSyntaxError as e:
132 raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e
133 ret.local_part = normalized_local_part
134
135 # If a quoted local part isn't allowed but is present, now raise an exception.
136 # This is done after any exceptions raised by validate_email_local_part so
137 # that mandatory checks have highest precedence.
138 if is_quoted_local_part and not allow_quoted_local:
139 raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.")
140
141 # Some local parts are required to be case-insensitive, so we should normalize
142 # to lowercase.
143 # RFC 2142
144 if ret.ascii_local_part is not None \
145 and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \
146 and ret.local_part is not None:
147 ret.ascii_local_part = ret.ascii_local_part.lower()
148 ret.local_part = ret.local_part.lower()
149
150 # Validate the email address's domain part syntax and get a normalized form.
151 is_domain_literal = False
152 if len(domain_part) == 0:
153 raise EmailSyntaxError("There must be something after the @-sign.")
154
155 elif domain_part.startswith("[") and domain_part.endswith("]"):
156 # Parse the address in the domain literal and get back a normalized domain.
157 domain_literal_info = validate_email_domain_literal(domain_part[1:-1])
158 if not allow_domain_literal:
159 raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.")
160 ret.domain = domain_literal_info["domain"]
161 ret.ascii_domain = domain_literal_info["domain"] # Domain literals are always ASCII.
162 ret.domain_address = domain_literal_info["domain_address"]
163 is_domain_literal = True # Prevent deliverability checks.
164
165 else:
166 # Check the syntax of the domain and get back a normalized
167 # internationalized and ASCII form.
168 domain_name_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable)
169 ret.domain = domain_name_info["domain"]
170 ret.ascii_domain = domain_name_info["ascii_domain"]
171
172 # Construct the complete normalized form.
173 ret.normalized = ret.local_part + "@" + ret.domain
174
175 # If the email address has an ASCII form, add it.
176 if not ret.smtputf8:
177 if not ret.ascii_domain:
178 raise Exception("Missing ASCII domain.")
179 ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain
180 else:
181 ret.ascii_email = None
182
183 # Check the length of the address.
184 validate_email_length(ret)
185
186 # Check that a display name is permitted. It's the last syntax check
187 # because we always check against optional parsing features last.
188 if display_name is not None and not allow_display_name:
189 raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")
190
191 if check_deliverability and not test_environment:
192 # Validate the email address's deliverability using DNS
193 # and update the returned ValidatedEmail object with metadata.
194
195 if is_domain_literal:
196 # There is nothing to check --- skip deliverability checks.
197 return ret
198
199 # Lazy load `deliverability` as it is slow to import (due to dns.resolver)
200 from .deliverability import validate_email_deliverability
201 deliverability_info = validate_email_deliverability(
202 ret.ascii_domain, ret.domain, timeout, dns_resolver
203 )
204 mx = deliverability_info.get("mx")
205 if mx is not None:
206 ret.mx = mx
207 ret.mx_fallback_type = deliverability_info.get("mx_fallback_type")
208
209 return ret