1from typing import Optional, Union, TYPE_CHECKING
2import unicodedata
3
4from .exceptions import EmailSyntaxError
5from .types import ValidatedEmail
6from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length
7from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES
8
9if TYPE_CHECKING:
10 import dns.resolver
11 _Resolver = dns.resolver.Resolver
12else:
13 _Resolver = object
14
15
16def validate_email(
17 email: Union[str, bytes],
18 /, # prior arguments are positional-only
19 *, # subsequent arguments are keyword-only
20 allow_smtputf8: Optional[bool] = None,
21 allow_empty_local: Optional[bool] = None,
22 allow_quoted_local: Optional[bool] = None,
23 allow_domain_literal: Optional[bool] = None,
24 allow_display_name: Optional[bool] = None,
25 check_deliverability: Optional[bool] = None,
26 test_environment: Optional[bool] = None,
27 globally_deliverable: Optional[bool] = None,
28 timeout: Optional[int] = None,
29 dns_resolver: Optional[_Resolver] = None
30) -> ValidatedEmail:
31 """
32 Given an email address, and some options, returns a ValidatedEmail instance
33 with information about the address if it is valid or, if the address is not
34 valid, raises an EmailNotValidError. This is the main function of the module.
35 """
36
37 # Fill in default values of arguments.
38 from . import ALLOW_SMTPUTF8, ALLOW_EMPTY_LOCAL, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \
39 GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
40 if allow_smtputf8 is None:
41 allow_smtputf8 = ALLOW_SMTPUTF8
42 if allow_empty_local is None:
43 allow_empty_local = ALLOW_EMPTY_LOCAL
44 if allow_quoted_local is None:
45 allow_quoted_local = ALLOW_QUOTED_LOCAL
46 if allow_domain_literal is None:
47 allow_domain_literal = ALLOW_DOMAIN_LITERAL
48 if allow_display_name is None:
49 allow_display_name = ALLOW_DISPLAY_NAME
50 if check_deliverability is None:
51 check_deliverability = CHECK_DELIVERABILITY
52 if test_environment is None:
53 test_environment = TEST_ENVIRONMENT
54 if globally_deliverable is None:
55 globally_deliverable = GLOBALLY_DELIVERABLE
56 if timeout is None and dns_resolver is None:
57 timeout = DEFAULT_TIMEOUT
58
59 # Allow email to be a str or bytes instance. If bytes,
60 # it must be ASCII because that's how the bytes work
61 # on the wire with SMTP.
62 if not isinstance(email, str):
63 try:
64 email = email.decode("ascii")
65 except ValueError as e:
66 raise EmailSyntaxError("The email address is not valid ASCII.") from e
67
68 # Split the address into the display name (or None), the local part
69 # (before the @-sign), and the domain part (after the @-sign).
70 # Normally, there is only one @-sign. But the awkward "quoted string"
71 # local part form (RFC 5321 4.1.2) allows @-signs in the local
72 # part if the local part is quoted.
73 display_name, local_part, domain_part, is_quoted_local_part \
74 = split_email(email)
75
76 if display_name:
77 # UTS #39 3.3 Email Security Profiles for Identifiers requires
78 # display names (incorrectly called "quoted-string-part" there)
79 # to be NFC normalized. Since these are not a part of what we
80 # are really validating, we won't check that the input was NFC
81 # normalized, but we'll normalize in output.
82 display_name = unicodedata.normalize("NFC", display_name)
83
84 # Collect return values in this instance.
85 ret = ValidatedEmail()
86 ret.original = ((local_part if not is_quoted_local_part
87 else ('"' + local_part + '"'))
88 + "@" + domain_part) # drop the display name, if any, for email length tests at the end
89 ret.display_name = display_name
90
91 # Validate the email address's local part syntax and get a normalized form.
92 # If the original address was quoted and the decoded local part is a valid
93 # unquoted local part, then we'll get back a normalized (unescaped) local
94 # part.
95 local_part_info = validate_email_local_part(local_part,
96 allow_smtputf8=allow_smtputf8,
97 allow_empty_local=allow_empty_local,
98 quoted_local_part=is_quoted_local_part)
99 ret.local_part = local_part_info["local_part"]
100 ret.ascii_local_part = local_part_info["ascii_local_part"]
101 ret.smtputf8 = local_part_info["smtputf8"]
102
103 # RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
104 # so we'll return the NFC-normalized local part. Since the caller may use that
105 # string in place of the original string, ensure it is also valid.
106 #
107 # UTS #39 3.3 Email Security Profiles for Identifiers requires local parts
108 # to be NFKC normalized, which loses some information in characters that can
109 # be decomposed. We might want to consider applying NFKC normalization, but
110 # we can't make the change easily because it would break database lookups
111 # for any caller that put a normalized address from a previous version of
112 # this library. (UTS #39 seems to require that the *input* be NKFC normalized
113 # and has other requirements that are hard to check without additional Unicode
114 # data, and I don't know whether the rules really apply in the wild.)
115 normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
116 if normalized_local_part != ret.local_part:
117 try:
118 validate_email_local_part(normalized_local_part,
119 allow_smtputf8=allow_smtputf8,
120 allow_empty_local=allow_empty_local,
121 quoted_local_part=is_quoted_local_part)
122 except EmailSyntaxError as e:
123 raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e
124 ret.local_part = normalized_local_part
125
126 # If a quoted local part isn't allowed but is present, now raise an exception.
127 # This is done after any exceptions raised by validate_email_local_part so
128 # that mandatory checks have highest precedence.
129 if is_quoted_local_part and not allow_quoted_local:
130 raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.")
131
132 # Some local parts are required to be case-insensitive, so we should normalize
133 # to lowercase.
134 # RFC 2142
135 if ret.ascii_local_part is not None \
136 and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \
137 and ret.local_part is not None:
138 ret.ascii_local_part = ret.ascii_local_part.lower()
139 ret.local_part = ret.local_part.lower()
140
141 # Validate the email address's domain part syntax and get a normalized form.
142 is_domain_literal = False
143 if len(domain_part) == 0:
144 raise EmailSyntaxError("There must be something after the @-sign.")
145
146 elif domain_part.startswith("[") and domain_part.endswith("]"):
147 # Parse the address in the domain literal and get back a normalized domain.
148 domain_literal_info = validate_email_domain_literal(domain_part[1:-1])
149 if not allow_domain_literal:
150 raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.")
151 ret.domain = domain_literal_info["domain"]
152 ret.ascii_domain = domain_literal_info["domain"] # Domain literals are always ASCII.
153 ret.domain_address = domain_literal_info["domain_address"]
154 is_domain_literal = True # Prevent deliverability checks.
155
156 else:
157 # Check the syntax of the domain and get back a normalized
158 # internationalized and ASCII form.
159 domain_name_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable)
160 ret.domain = domain_name_info["domain"]
161 ret.ascii_domain = domain_name_info["ascii_domain"]
162
163 # Construct the complete normalized form.
164 ret.normalized = ret.local_part + "@" + ret.domain
165
166 # If the email address has an ASCII form, add it.
167 if not ret.smtputf8:
168 if not ret.ascii_domain:
169 raise Exception("Missing ASCII domain.")
170 ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain
171 else:
172 ret.ascii_email = None
173
174 # Check the length of the address.
175 validate_email_length(ret)
176
177 # Check that a display name is permitted. It's the last syntax check
178 # because we always check against optional parsing features last.
179 if display_name is not None and not allow_display_name:
180 raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")
181
182 if check_deliverability and not test_environment:
183 # Validate the email address's deliverability using DNS
184 # and update the returned ValidatedEmail object with metadata.
185
186 if is_domain_literal:
187 # There is nothing to check --- skip deliverability checks.
188 return ret
189
190 # Lazy load `deliverability` as it is slow to import (due to dns.resolver)
191 from .deliverability import validate_email_deliverability
192 deliverability_info = validate_email_deliverability(
193 ret.ascii_domain, ret.domain, timeout, dns_resolver
194 )
195 mx = deliverability_info.get("mx")
196 if mx is not None:
197 ret.mx = mx
198 ret.mx_fallback_type = deliverability_info.get("mx_fallback_type")
199
200 return ret