1from typing import Optional, Union, TYPE_CHECKING
2import unicodedata
3
4from .exceptions_types import EmailSyntaxError, ValidatedEmail
5from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length
6from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES
7
8if TYPE_CHECKING:
9 import dns.resolver
10 _Resolver = dns.resolver.Resolver
11else:
12 _Resolver = object
13
14
15def validate_email(
16 email: Union[str, bytes],
17 /, # prior arguments are positional-only
18 *, # subsequent arguments are keyword-only
19 allow_smtputf8: Optional[bool] = None,
20 allow_empty_local: bool = False,
21 allow_quoted_local: Optional[bool] = None,
22 allow_domain_literal: Optional[bool] = None,
23 allow_display_name: Optional[bool] = None,
24 check_deliverability: Optional[bool] = None,
25 test_environment: Optional[bool] = None,
26 globally_deliverable: Optional[bool] = None,
27 timeout: Optional[int] = None,
28 dns_resolver: Optional[_Resolver] = None
29) -> ValidatedEmail:
30 """
31 Given an email address, and some options, returns a ValidatedEmail instance
32 with information about the address if it is valid or, if the address is not
33 valid, raises an EmailNotValidError. This is the main function of the module.
34 """
35
36 # Fill in default values of arguments.
37 from . import ALLOW_SMTPUTF8, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \
38 GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
39 if allow_smtputf8 is None:
40 allow_smtputf8 = ALLOW_SMTPUTF8
41 if allow_quoted_local is None:
42 allow_quoted_local = ALLOW_QUOTED_LOCAL
43 if allow_domain_literal is None:
44 allow_domain_literal = ALLOW_DOMAIN_LITERAL
45 if allow_display_name is None:
46 allow_display_name = ALLOW_DISPLAY_NAME
47 if check_deliverability is None:
48 check_deliverability = CHECK_DELIVERABILITY
49 if test_environment is None:
50 test_environment = TEST_ENVIRONMENT
51 if globally_deliverable is None:
52 globally_deliverable = GLOBALLY_DELIVERABLE
53 if timeout is None and dns_resolver is None:
54 timeout = DEFAULT_TIMEOUT
55
56 # Allow email to be a str or bytes instance. If bytes,
57 # it must be ASCII because that's how the bytes work
58 # on the wire with SMTP.
59 if not isinstance(email, str):
60 try:
61 email = email.decode("ascii")
62 except ValueError as e:
63 raise EmailSyntaxError("The email address is not valid ASCII.") from e
64
65 # Split the address into the display name (or None), the local part
66 # (before the @-sign), and the domain part (after the @-sign).
67 # Normally, there is only one @-sign. But the awkward "quoted string"
68 # local part form (RFC 5321 4.1.2) allows @-signs in the local
69 # part if the local part is quoted.
70 display_name, local_part, domain_part, is_quoted_local_part \
71 = split_email(email)
72
73 # Collect return values in this instance.
74 ret = ValidatedEmail()
75 ret.original = ((local_part if not is_quoted_local_part
76 else ('"' + local_part + '"'))
77 + "@" + domain_part) # drop the display name, if any, for email length tests at the end
78 ret.display_name = display_name
79
80 # Validate the email address's local part syntax and get a normalized form.
81 # If the original address was quoted and the decoded local part is a valid
82 # unquoted local part, then we'll get back a normalized (unescaped) local
83 # part.
84 local_part_info = validate_email_local_part(local_part,
85 allow_smtputf8=allow_smtputf8,
86 allow_empty_local=allow_empty_local,
87 quoted_local_part=is_quoted_local_part)
88 ret.local_part = local_part_info["local_part"]
89 ret.ascii_local_part = local_part_info["ascii_local_part"]
90 ret.smtputf8 = local_part_info["smtputf8"]
91
92 # RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
93 # so we'll return the NFC-normalized local part. Since the caller may use that
94 # string in place of the original string, ensure it is also valid.
95 normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
96 if normalized_local_part != ret.local_part:
97 try:
98 validate_email_local_part(normalized_local_part,
99 allow_smtputf8=allow_smtputf8,
100 allow_empty_local=allow_empty_local,
101 quoted_local_part=is_quoted_local_part)
102 except EmailSyntaxError as e:
103 raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e
104 ret.local_part = normalized_local_part
105
106 # If a quoted local part isn't allowed but is present, now raise an exception.
107 # This is done after any exceptions raised by validate_email_local_part so
108 # that mandatory checks have highest precedence.
109 if is_quoted_local_part and not allow_quoted_local:
110 raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.")
111
112 # Some local parts are required to be case-insensitive, so we should normalize
113 # to lowercase.
114 # RFC 2142
115 if ret.ascii_local_part is not None \
116 and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \
117 and ret.local_part is not None:
118 ret.ascii_local_part = ret.ascii_local_part.lower()
119 ret.local_part = ret.local_part.lower()
120
121 # Validate the email address's domain part syntax and get a normalized form.
122 is_domain_literal = False
123 if len(domain_part) == 0:
124 raise EmailSyntaxError("There must be something after the @-sign.")
125
126 elif domain_part.startswith("[") and domain_part.endswith("]"):
127 # Parse the address in the domain literal and get back a normalized domain.
128 domain_literal_info = validate_email_domain_literal(domain_part[1:-1])
129 if not allow_domain_literal:
130 raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.")
131 ret.domain = domain_literal_info["domain"]
132 ret.ascii_domain = domain_literal_info["domain"] # Domain literals are always ASCII.
133 ret.domain_address = domain_literal_info["domain_address"]
134 is_domain_literal = True # Prevent deliverability checks.
135
136 else:
137 # Check the syntax of the domain and get back a normalized
138 # internationalized and ASCII form.
139 domain_name_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable)
140 ret.domain = domain_name_info["domain"]
141 ret.ascii_domain = domain_name_info["ascii_domain"]
142
143 # Construct the complete normalized form.
144 ret.normalized = ret.local_part + "@" + ret.domain
145
146 # If the email address has an ASCII form, add it.
147 if not ret.smtputf8:
148 if not ret.ascii_domain:
149 raise Exception("Missing ASCII domain.")
150 ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain
151 else:
152 ret.ascii_email = None
153
154 # Check the length of the address.
155 validate_email_length(ret)
156
157 # Check that a display name is permitted. It's the last syntax check
158 # because we always check against optional parsing features last.
159 if display_name is not None and not allow_display_name:
160 raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")
161
162 if check_deliverability and not test_environment:
163 # Validate the email address's deliverability using DNS
164 # and update the returned ValidatedEmail object with metadata.
165
166 if is_domain_literal:
167 # There is nothing to check --- skip deliverability checks.
168 return ret
169
170 # Lazy load `deliverability` as it is slow to import (due to dns.resolver)
171 from .deliverability import validate_email_deliverability
172 deliverability_info = validate_email_deliverability(
173 ret.ascii_domain, ret.domain, timeout, dns_resolver
174 )
175 mx = deliverability_info.get("mx")
176 if mx is not None:
177 ret.mx = mx
178 ret.mx_fallback_type = deliverability_info.get("mx_fallback_type")
179
180 return ret