1from typing import Optional, Union, TYPE_CHECKING 
    2import unicodedata 
    3 
    4from .exceptions import EmailSyntaxError 
    5from .types import ValidatedEmail 
    6from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length 
    7from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES 
    8 
    9if TYPE_CHECKING: 
    10    import dns.resolver 
    11    _Resolver = dns.resolver.Resolver 
    12else: 
    13    _Resolver = object 
    14 
    15 
    16def validate_email( 
    17    email: Union[str, bytes], 
    18    /,  # prior arguments are positional-only 
    19    *,  # subsequent arguments are keyword-only 
    20    allow_smtputf8: Optional[bool] = None, 
    21    allow_empty_local: Optional[bool] = None, 
    22    allow_quoted_local: Optional[bool] = None, 
    23    allow_domain_literal: Optional[bool] = None, 
    24    allow_display_name: Optional[bool] = None, 
    25    strict: Optional[bool] = None, 
    26    check_deliverability: Optional[bool] = None, 
    27    test_environment: Optional[bool] = None, 
    28    globally_deliverable: Optional[bool] = None, 
    29    timeout: Optional[int] = None, 
    30    dns_resolver: Optional[_Resolver] = None 
    31) -> ValidatedEmail: 
    32    """ 
    33    Given an email address, and some options, returns a ValidatedEmail instance 
    34    with information about the address if it is valid or, if the address is not 
    35    valid, raises an EmailNotValidError. This is the main function of the module. 
    36    """ 
    37 
    38    # Fill in default values of arguments. 
    39    from . import ALLOW_SMTPUTF8, ALLOW_EMPTY_LOCAL, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \ 
    40        STRICT, GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT 
    41    if allow_smtputf8 is None: 
    42        allow_smtputf8 = ALLOW_SMTPUTF8 
    43    if allow_empty_local is None: 
    44        allow_empty_local = ALLOW_EMPTY_LOCAL 
    45    if allow_quoted_local is None: 
    46        allow_quoted_local = ALLOW_QUOTED_LOCAL 
    47    if allow_domain_literal is None: 
    48        allow_domain_literal = ALLOW_DOMAIN_LITERAL 
    49    if allow_display_name is None: 
    50        allow_display_name = ALLOW_DISPLAY_NAME 
    51    if strict is None: 
    52        strict = STRICT 
    53    if check_deliverability is None: 
    54        check_deliverability = CHECK_DELIVERABILITY 
    55    if test_environment is None: 
    56        test_environment = TEST_ENVIRONMENT 
    57    if globally_deliverable is None: 
    58        globally_deliverable = GLOBALLY_DELIVERABLE 
    59    if timeout is None and dns_resolver is None: 
    60        timeout = DEFAULT_TIMEOUT 
    61 
    62    if isinstance(email, str): 
    63        pass 
    64    elif isinstance(email, bytes): 
    65        # Allow email to be a bytes instance as if it is what 
    66        # will be transmitted on the wire. But assume SMTPUTF8 
    67        # is unavailable, so it must be ASCII. 
    68        try: 
    69            email = email.decode("ascii") 
    70        except ValueError as e: 
    71            raise EmailSyntaxError("The email address is not valid ASCII.") from e 
    72    else: 
    73        raise TypeError("email must be str or bytes") 
    74 
    75    # Split the address into the display name (or None), the local part 
    76    # (before the @-sign), and the domain part (after the @-sign). 
    77    # Normally, there is only one @-sign. But the awkward "quoted string" 
    78    # local part form (RFC 5321 4.1.2) allows @-signs in the local 
    79    # part if the local part is quoted. 
    80    display_name, local_part, domain_part, is_quoted_local_part \ 
    81        = split_email(email) 
    82 
    83    if display_name: 
    84        # UTS #39 3.3 Email Security Profiles for Identifiers requires 
    85        # display names (incorrectly called "quoted-string-part" there) 
    86        # to be NFC normalized. Since these are not a part of what we 
    87        # are really validating, we won't check that the input was NFC 
    88        # normalized, but we'll normalize in output. 
    89        display_name = unicodedata.normalize("NFC", display_name) 
    90 
    91    # Collect return values in this instance. 
    92    ret = ValidatedEmail() 
    93    ret.original = ((local_part if not is_quoted_local_part 
    94                    else ('"' + local_part + '"')) 
    95                    + "@" + domain_part)  # drop the display name, if any, for email length tests at the end 
    96    ret.display_name = display_name 
    97 
    98    # Validate the email address's local part syntax and get a normalized form. 
    99    # If the original address was quoted and the decoded local part is a valid 
    100    # unquoted local part, then we'll get back a normalized (unescaped) local 
    101    # part. 
    102    local_part_info = validate_email_local_part(local_part, 
    103                                                allow_smtputf8=allow_smtputf8, 
    104                                                allow_empty_local=allow_empty_local, 
    105                                                quoted_local_part=is_quoted_local_part, 
    106                                                strict=strict) 
    107    ret.local_part = local_part_info["local_part"] 
    108    ret.ascii_local_part = local_part_info["ascii_local_part"] 
    109    ret.smtputf8 = local_part_info["smtputf8"] 
    110 
    111    # RFC 6532 section 3.1 says that Unicode NFC normalization should be applied, 
    112    # so we'll return the NFC-normalized local part. Since the caller may use that 
    113    # string in place of the original string, ensure it is also valid. 
    114    # 
    115    # UTS #39 3.3 Email Security Profiles for Identifiers requires local parts 
    116    # to be NFKC normalized, which loses some information in characters that can 
    117    # be decomposed. We might want to consider applying NFKC normalization, but 
    118    # we can't make the change easily because it would break database lookups 
    119    # for any caller that put a normalized address from a previous version of 
    120    # this library. (UTS #39 seems to require that the *input* be NKFC normalized 
    121    # and has other requirements that are hard to check without additional Unicode 
    122    # data, and I don't know whether the rules really apply in the wild.) 
    123    normalized_local_part = unicodedata.normalize("NFC", ret.local_part) 
    124    if normalized_local_part != ret.local_part: 
    125        try: 
    126            validate_email_local_part(normalized_local_part, 
    127                                      allow_smtputf8=allow_smtputf8, 
    128                                      allow_empty_local=allow_empty_local, 
    129                                      quoted_local_part=is_quoted_local_part, 
    130                                      strict=strict) 
    131        except EmailSyntaxError as e: 
    132            raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e 
    133        ret.local_part = normalized_local_part 
    134 
    135    # If a quoted local part isn't allowed but is present, now raise an exception. 
    136    # This is done after any exceptions raised by validate_email_local_part so 
    137    # that mandatory checks have highest precedence. 
    138    if is_quoted_local_part and not allow_quoted_local: 
    139        raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.") 
    140 
    141    # Some local parts are required to be case-insensitive, so we should normalize 
    142    # to lowercase. 
    143    # RFC 2142 
    144    if ret.ascii_local_part is not None \ 
    145       and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \ 
    146       and ret.local_part is not None: 
    147        ret.ascii_local_part = ret.ascii_local_part.lower() 
    148        ret.local_part = ret.local_part.lower() 
    149 
    150    # Validate the email address's domain part syntax and get a normalized form. 
    151    is_domain_literal = False 
    152    if len(domain_part) == 0: 
    153        raise EmailSyntaxError("There must be something after the @-sign.") 
    154 
    155    elif domain_part.startswith("[") and domain_part.endswith("]"): 
    156        # Parse the address in the domain literal and get back a normalized domain. 
    157        domain_literal_info = validate_email_domain_literal(domain_part[1:-1]) 
    158        if not allow_domain_literal: 
    159            raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.") 
    160        ret.domain = domain_literal_info["domain"] 
    161        ret.ascii_domain = domain_literal_info["domain"]  # Domain literals are always ASCII. 
    162        ret.domain_address = domain_literal_info["domain_address"] 
    163        is_domain_literal = True  # Prevent deliverability checks. 
    164 
    165    else: 
    166        # Check the syntax of the domain and get back a normalized 
    167        # internationalized and ASCII form. 
    168        domain_name_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable) 
    169        ret.domain = domain_name_info["domain"] 
    170        ret.ascii_domain = domain_name_info["ascii_domain"] 
    171 
    172    # Construct the complete normalized form. 
    173    ret.normalized = ret.local_part + "@" + ret.domain 
    174 
    175    # If the email address has an ASCII form, add it. 
    176    if not ret.smtputf8: 
    177        if not ret.ascii_domain: 
    178            raise Exception("Missing ASCII domain.") 
    179        ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain 
    180    else: 
    181        ret.ascii_email = None 
    182 
    183    # Check the length of the address. 
    184    validate_email_length(ret) 
    185 
    186    # Check that a display name is permitted. It's the last syntax check 
    187    # because we always check against optional parsing features last. 
    188    if display_name is not None and not allow_display_name: 
    189        raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.") 
    190 
    191    if check_deliverability and not test_environment: 
    192        # Validate the email address's deliverability using DNS 
    193        # and update the returned ValidatedEmail object with metadata. 
    194 
    195        if is_domain_literal: 
    196            # There is nothing to check --- skip deliverability checks. 
    197            return ret 
    198 
    199        # Lazy load `deliverability` as it is slow to import (due to dns.resolver) 
    200        from .deliverability import validate_email_deliverability 
    201        deliverability_info = validate_email_deliverability( 
    202            ret.ascii_domain, ret.domain, timeout, dns_resolver 
    203        ) 
    204        mx = deliverability_info.get("mx") 
    205        if mx is not None: 
    206            ret.mx = mx 
    207        ret.mx_fallback_type = deliverability_info.get("mx_fallback_type") 
    208 
    209    return ret