Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/email_validator/rfc_constants.py: 95%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

19 statements  

1# These constants are defined by the email specifications. 

2 

3import re 

4 

5# Based on RFC 5322 3.2.3, these characters are permitted in email 

6# addresses (not taking into account internationalization) separated by dots: 

7ATEXT = r'a-zA-Z0-9_!#\$%&\'\*\+\-/=\?\^`\{\|\}~' 

8ATEXT_RE = re.compile('[.' + ATEXT + ']') # ATEXT plus dots 

9DOT_ATOM_TEXT = re.compile('[' + ATEXT + ']+(?:\\.[' + ATEXT + r']+)*\Z') 

10 

11# RFC 6531 3.3 extends the allowed characters in internationalized 

12# addresses to also include three specific ranges of UTF8 defined in 

13# RFC 3629 section 4, which appear to be the Unicode code points from 

14# U+0080 to U+10FFFF. 

15ATEXT_INTL = ATEXT + "\u0080-\U0010FFFF" 

16ATEXT_INTL_DOT_RE = re.compile('[.' + ATEXT_INTL + ']') # ATEXT_INTL plus dots 

17DOT_ATOM_TEXT_INTL = re.compile('[' + ATEXT_INTL + ']+(?:\\.[' + ATEXT_INTL + r']+)*\Z') 

18 

19# The domain part of the email address, after IDNA (ASCII) encoding, 

20# must also satisfy the requirements of RFC 952/RFC 1123 2.1 which 

21# restrict the allowed characters of hostnames further. 

22ATEXT_HOSTNAME_INTL = re.compile(r"[a-zA-Z0-9\-\." + "\u0080-\U0010FFFF" + "]") 

23HOSTNAME_LABEL = r'(?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]*)?[a-zA-Z0-9])' 

24DOT_ATOM_TEXT_HOSTNAME = re.compile(HOSTNAME_LABEL + r'(?:\.' + HOSTNAME_LABEL + r')*\Z') 

25DOMAIN_NAME_REGEX = re.compile(r"[A-Za-z]\Z") # all TLDs currently end with a letter 

26 

27# Domain literal (RFC 5322 3.4.1) 

28DOMAIN_LITERAL_CHARS = re.compile(r"[\u0021-\u00FA\u005E-\u007E]") 

29 

30# Quoted-string local part (RFC 5321 4.1.2, internationalized by RFC 6531 3.3) 

31# The permitted characters in a quoted string are the characters in the range 

32# 32-126, except that quotes and (literal) backslashes can only appear when escaped 

33# by a backslash. When internationalized, UTF-8 strings are also permitted except 

34# the ASCII characters that are not previously permitted (see above). 

35# QUOTED_LOCAL_PART_ADDR = re.compile(r"^\"((?:[\u0020-\u0021\u0023-\u005B\u005D-\u007E]|\\[\u0020-\u007E])*)\"@(.*)") 

36QTEXT_INTL = re.compile(r"[\u0020-\u007E\u0080-\U0010FFFF]") 

37 

38# Length constants 

39 

40# RFC 3696 + errata 1003 + errata 1690 (https://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690) 

41# explains the maximum length of an email address is 254 octets based on RFC 5321 4.5.3.1.3. A 

42# maximum local part length is also given at RFC 5321 4.5.3.1.1. 

43# 

44# But RFC 5321 4.5.3.1 says that these (and other) limits are in a sense suggestions, and longer 

45# local parts have been seen in the wild. Consequntely, the local part length is only checked 

46# in "strict" mode. Although the email address maximum length is also somewhat of a suggestion, 

47# I don't like the idea of having no length checks performed, so I'm leaving that to always be 

48# checked. 

49EMAIL_MAX_LENGTH = 254 

50LOCAL_PART_MAX_LENGTH = 64 

51 

52# Although RFC 5321 4.5.3.1.2 gives a (suggested, see above) limit of 255 octets, RFC 1035 2.3.4 also 

53# imposes a length limit (255 octets). But per https://stackoverflow.com/questions/32290167/what-is-the-maximum-length-of-a-dns-name, 

54# two of those octets are taken up by the optional final dot and null root label. 

55DNS_LABEL_LENGTH_LIMIT = 63 # in "octets", RFC 1035 2.3.1 

56DOMAIN_MAX_LENGTH = 253 # in "octets" as transmitted 

57 

58# RFC 2142 

59CASE_INSENSITIVE_MAILBOX_NAMES = [ 

60 'info', 'marketing', 'sales', 'support', # section 3 

61 'abuse', 'noc', 'security', # section 4 

62 'postmaster', 'hostmaster', 'usenet', 'news', 'webmaster', 'www', 'uucp', 'ftp', # section 5 

63]