Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/email_validator/validate_email.py: 64%
85 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:32 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:32 +0000
1from typing import Optional, Union
3from .exceptions_types import EmailSyntaxError, ValidatedEmail
4from .syntax import validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, get_length_reason
5from .rfc_constants import EMAIL_MAX_LENGTH, QUOTED_LOCAL_PART_ADDR, CASE_INSENSITIVE_MAILBOX_NAMES
8def validate_email(
9 email: Union[str, bytes],
10 # /, # not supported in Python 3.6, 3.7
11 *,
12 allow_smtputf8: Optional[bool] = None,
13 allow_empty_local: bool = False,
14 allow_quoted_local: Optional[bool] = None,
15 allow_domain_literal: Optional[bool] = None,
16 check_deliverability: Optional[bool] = None,
17 test_environment: Optional[bool] = None,
18 globally_deliverable: Optional[bool] = None,
19 timeout: Optional[int] = None,
20 dns_resolver: Optional[object] = None
21) -> ValidatedEmail:
22 """
23 Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of
24 information when the address is valid. The email argument can be a str or a bytes instance,
25 but if bytes it must be ASCII-only. This is the main method of this library.
26 """
28 # Fill in default values of arguments.
29 from . import ALLOW_SMTPUTF8, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, \
30 GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
31 if allow_smtputf8 is None:
32 allow_smtputf8 = ALLOW_SMTPUTF8
33 if allow_quoted_local is None:
34 allow_quoted_local = ALLOW_QUOTED_LOCAL
35 if allow_domain_literal is None:
36 allow_domain_literal = ALLOW_DOMAIN_LITERAL
37 if check_deliverability is None:
38 check_deliverability = CHECK_DELIVERABILITY
39 if test_environment is None:
40 test_environment = TEST_ENVIRONMENT
41 if globally_deliverable is None:
42 globally_deliverable = GLOBALLY_DELIVERABLE
43 if timeout is None and dns_resolver is None:
44 timeout = DEFAULT_TIMEOUT
46 # Allow email to be a str or bytes instance. If bytes,
47 # it must be ASCII because that's how the bytes work
48 # on the wire with SMTP.
49 if not isinstance(email, str):
50 try:
51 email = email.decode("ascii")
52 except ValueError:
53 raise EmailSyntaxError("The email address is not valid ASCII.")
55 # Typical email addresses have a single @-sign, but the
56 # awkward "quoted string" local part form (RFC 5321 4.1.2)
57 # allows @-signs (and escaped quotes) to appear in the local
58 # part if the local part is quoted. If the address is quoted,
59 # split it at a non-escaped @-sign and unescape the escaping.
60 quoted_local_part = False
61 m = QUOTED_LOCAL_PART_ADDR.match(email)
62 if m:
63 quoted_local_part = True
64 local_part, domain_part = m.groups()
66 # Remove backslashes.
67 import re
68 local_part = re.sub(r"\\(.)", "\\1", local_part)
70 else:
71 # Split at the one and only at-sign.
72 parts = email.split('@')
73 if len(parts) != 2:
74 raise EmailSyntaxError("The email address is not valid. It must have exactly one @-sign.")
75 local_part, domain_part = parts
77 # Collect return values in this instance.
78 ret = ValidatedEmail()
79 ret.original = email
81 # Validate the email address's local part syntax and get a normalized form.
82 # If the original address was quoted and the decoded local part is a valid
83 # unquoted local part, then we'll get back a normalized (unescaped) local
84 # part.
85 local_part_info = validate_email_local_part(local_part,
86 allow_smtputf8=allow_smtputf8,
87 allow_empty_local=allow_empty_local,
88 quoted_local_part=quoted_local_part)
89 if quoted_local_part and not allow_quoted_local:
90 raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.")
91 ret.local_part = local_part_info["local_part"]
92 ret.ascii_local_part = local_part_info["ascii_local_part"]
93 ret.smtputf8 = local_part_info["smtputf8"]
95 # Some local parts are required to be case-insensitive, so we should normalize
96 # to lowercase.
97 # RFC 2142
98 if ret.ascii_local_part is not None \
99 and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \
100 and ret.local_part is not None:
101 ret.ascii_local_part = ret.ascii_local_part.lower()
102 ret.local_part = ret.local_part.lower()
104 # Validate the email address's domain part syntax and get a normalized form.
105 is_domain_literal = False
106 if len(domain_part) == 0:
107 raise EmailSyntaxError("There must be something after the @-sign.")
109 elif domain_part.startswith("[") and domain_part.endswith("]"):
110 # Parse the address in the domain literal and get back a normalized domain.
111 domain_part_info = validate_email_domain_literal(domain_part[1:-1], allow_domain_literal=allow_domain_literal)
112 ret.domain = domain_part_info["domain"]
113 ret.ascii_domain = domain_part_info["domain"] # Domain literals are always ASCII.
114 ret.domain_address = domain_part_info["domain_address"]
115 is_domain_literal = True # Prevent deliverability checks.
117 else:
118 # Check the syntax of the domain and get back a normalized
119 # internationalized and ASCII form.
120 domain_part_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable)
121 ret.domain = domain_part_info["domain"]
122 ret.ascii_domain = domain_part_info["ascii_domain"]
124 # Construct the complete normalized form.
125 ret.normalized = ret.local_part + "@" + ret.domain
127 # If the email address has an ASCII form, add it.
128 if not ret.smtputf8:
129 if not ret.ascii_domain:
130 raise Exception("Missing ASCII domain.")
131 ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain
132 else:
133 ret.ascii_email = None
135 # If the email address has an ASCII representation, then we assume it may be
136 # transmitted in ASCII (we can't assume SMTPUTF8 will be used on all hops to
137 # the destination) and the length limit applies to ASCII characters (which is
138 # the same as octets). The number of characters in the internationalized form
139 # may be many fewer (because IDNA ASCII is verbose) and could be less than 254
140 # Unicode characters, and of course the number of octets over the limit may
141 # not be the number of characters over the limit, so if the email address is
142 # internationalized, we can't give any simple information about why the address
143 # is too long.
144 #
145 # In addition, check that the UTF-8 encoding (i.e. not IDNA ASCII and not
146 # Unicode characters) is at most 254 octets. If the addres is transmitted using
147 # SMTPUTF8, then the length limit probably applies to the UTF-8 encoded octets.
148 # If the email address has an ASCII form that differs from its internationalized
149 # form, I don't think the internationalized form can be longer, and so the ASCII
150 # form length check would be sufficient. If there is no ASCII form, then we have
151 # to check the UTF-8 encoding. The UTF-8 encoding could be up to about four times
152 # longer than the number of characters.
153 #
154 # See the length checks on the local part and the domain.
155 if ret.ascii_email and len(ret.ascii_email) > EMAIL_MAX_LENGTH:
156 if ret.ascii_email == ret.normalized:
157 reason = get_length_reason(ret.ascii_email)
158 elif len(ret.normalized) > EMAIL_MAX_LENGTH:
159 # If there are more than 254 characters, then the ASCII
160 # form is definitely going to be too long.
161 reason = get_length_reason(ret.normalized, utf8=True)
162 else:
163 reason = "(when converted to IDNA ASCII)"
164 raise EmailSyntaxError(f"The email address is too long {reason}.")
165 if len(ret.normalized.encode("utf8")) > EMAIL_MAX_LENGTH:
166 if len(ret.normalized) > EMAIL_MAX_LENGTH:
167 # If there are more than 254 characters, then the UTF-8
168 # encoding is definitely going to be too long.
169 reason = get_length_reason(ret.normalized, utf8=True)
170 else:
171 reason = "(when encoded in bytes)"
172 raise EmailSyntaxError(f"The email address is too long {reason}.")
174 if check_deliverability and not test_environment:
175 # Validate the email address's deliverability using DNS
176 # and update the return dict with metadata.
178 if is_domain_literal:
179 # There is nothing to check --- skip deliverability checks.
180 return ret
182 # Lazy load `deliverability` as it is slow to import (due to dns.resolver)
183 from .deliverability import validate_email_deliverability
184 deliverability_info = validate_email_deliverability(
185 ret.ascii_domain, ret.domain, timeout, dns_resolver
186 )
187 for key, value in deliverability_info.items():
188 setattr(ret, key, value)
190 return ret