1# Copyright (C) 2002-2007 Python Software Foundation 
    2# Contact: email-sig@python.org 
    3 
    4"""Email address parsing code. 
    5 
    6Lifted directly from rfc822.py.  This should eventually be rewritten. 
    7""" 
    8 
    9from __future__ import unicode_literals 
    10from __future__ import print_function 
    11from __future__ import division 
    12from __future__ import absolute_import 
    13from future.builtins import int 
    14 
    15__all__ = [ 
    16    'mktime_tz', 
    17    'parsedate', 
    18    'parsedate_tz', 
    19    'quote', 
    20    ] 
    21 
    22import time, calendar 
    23 
    24SPACE = ' ' 
    25EMPTYSTRING = '' 
    26COMMASPACE = ', ' 
    27 
    28# Parse a date field 
    29_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 
    30               'aug', 'sep', 'oct', 'nov', 'dec', 
    31               'january', 'february', 'march', 'april', 'may', 'june', 'july', 
    32               'august', 'september', 'october', 'november', 'december'] 
    33 
    34_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] 
    35 
    36# The timezone table does not include the military time zones defined 
    37# in RFC822, other than Z.  According to RFC1123, the description in 
    38# RFC822 gets the signs wrong, so we can't rely on any such time 
    39# zones.  RFC1123 recommends that numeric timezone indicators be used 
    40# instead of timezone names. 
    41 
    42_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, 
    43              'AST': -400, 'ADT': -300,  # Atlantic (used in Canada) 
    44              'EST': -500, 'EDT': -400,  # Eastern 
    45              'CST': -600, 'CDT': -500,  # Central 
    46              'MST': -700, 'MDT': -600,  # Mountain 
    47              'PST': -800, 'PDT': -700   # Pacific 
    48              } 
    49 
    50 
    51def parsedate_tz(data): 
    52    """Convert a date string to a time tuple. 
    53 
    54    Accounts for military timezones. 
    55    """ 
    56    res = _parsedate_tz(data) 
    57    if not res: 
    58        return 
    59    if res[9] is None: 
    60        res[9] = 0 
    61    return tuple(res) 
    62 
    63def _parsedate_tz(data): 
    64    """Convert date to extended time tuple. 
    65 
    66    The last (additional) element is the time zone offset in seconds, except if 
    67    the timezone was specified as -0000.  In that case the last element is 
    68    None.  This indicates a UTC timestamp that explicitly declaims knowledge of 
    69    the source timezone, as opposed to a +0000 timestamp that indicates the 
    70    source timezone really was UTC. 
    71 
    72    """ 
    73    if not data: 
    74        return 
    75    data = data.split() 
    76    # The FWS after the comma after the day-of-week is optional, so search and 
    77    # adjust for this. 
    78    if data[0].endswith(',') or data[0].lower() in _daynames: 
    79        # There's a dayname here. Skip it 
    80        del data[0] 
    81    else: 
    82        i = data[0].rfind(',') 
    83        if i >= 0: 
    84            data[0] = data[0][i+1:] 
    85    if len(data) == 3: # RFC 850 date, deprecated 
    86        stuff = data[0].split('-') 
    87        if len(stuff) == 3: 
    88            data = stuff + data[1:] 
    89    if len(data) == 4: 
    90        s = data[3] 
    91        i = s.find('+') 
    92        if i == -1: 
    93            i = s.find('-') 
    94        if i > 0: 
    95            data[3:] = [s[:i], s[i:]] 
    96        else: 
    97            data.append('') # Dummy tz 
    98    if len(data) < 5: 
    99        return None 
    100    data = data[:5] 
    101    [dd, mm, yy, tm, tz] = data 
    102    mm = mm.lower() 
    103    if mm not in _monthnames: 
    104        dd, mm = mm, dd.lower() 
    105        if mm not in _monthnames: 
    106            return None 
    107    mm = _monthnames.index(mm) + 1 
    108    if mm > 12: 
    109        mm -= 12 
    110    if dd[-1] == ',': 
    111        dd = dd[:-1] 
    112    i = yy.find(':') 
    113    if i > 0: 
    114        yy, tm = tm, yy 
    115    if yy[-1] == ',': 
    116        yy = yy[:-1] 
    117    if not yy[0].isdigit(): 
    118        yy, tz = tz, yy 
    119    if tm[-1] == ',': 
    120        tm = tm[:-1] 
    121    tm = tm.split(':') 
    122    if len(tm) == 2: 
    123        [thh, tmm] = tm 
    124        tss = '0' 
    125    elif len(tm) == 3: 
    126        [thh, tmm, tss] = tm 
    127    elif len(tm) == 1 and '.' in tm[0]: 
    128        # Some non-compliant MUAs use '.' to separate time elements. 
    129        tm = tm[0].split('.') 
    130        if len(tm) == 2: 
    131            [thh, tmm] = tm 
    132            tss = 0 
    133        elif len(tm) == 3: 
    134            [thh, tmm, tss] = tm 
    135    else: 
    136        return None 
    137    try: 
    138        yy = int(yy) 
    139        dd = int(dd) 
    140        thh = int(thh) 
    141        tmm = int(tmm) 
    142        tss = int(tss) 
    143    except ValueError: 
    144        return None 
    145    # Check for a yy specified in two-digit format, then convert it to the 
    146    # appropriate four-digit format, according to the POSIX standard. RFC 822 
    147    # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) 
    148    # mandates a 4-digit yy. For more information, see the documentation for 
    149    # the time module. 
    150    if yy < 100: 
    151        # The year is between 1969 and 1999 (inclusive). 
    152        if yy > 68: 
    153            yy += 1900 
    154        # The year is between 2000 and 2068 (inclusive). 
    155        else: 
    156            yy += 2000 
    157    tzoffset = None 
    158    tz = tz.upper() 
    159    if tz in _timezones: 
    160        tzoffset = _timezones[tz] 
    161    else: 
    162        try: 
    163            tzoffset = int(tz) 
    164        except ValueError: 
    165            pass 
    166        if tzoffset==0 and tz.startswith('-'): 
    167            tzoffset = None 
    168    # Convert a timezone offset into seconds ; -0500 -> -18000 
    169    if tzoffset: 
    170        if tzoffset < 0: 
    171            tzsign = -1 
    172            tzoffset = -tzoffset 
    173        else: 
    174            tzsign = 1 
    175        tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) 
    176    # Daylight Saving Time flag is set to -1, since DST is unknown. 
    177    return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] 
    178 
    179 
    180def parsedate(data): 
    181    """Convert a time string to a time tuple.""" 
    182    t = parsedate_tz(data) 
    183    if isinstance(t, tuple): 
    184        return t[:9] 
    185    else: 
    186        return t 
    187 
    188 
    189def mktime_tz(data): 
    190    """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" 
    191    if data[9] is None: 
    192        # No zone info, so localtime is better assumption than GMT 
    193        return time.mktime(data[:8] + (-1,)) 
    194    else: 
    195        t = calendar.timegm(data) 
    196        return t - data[9] 
    197 
    198 
    199def quote(str): 
    200    """Prepare string to be used in a quoted string. 
    201 
    202    Turns backslash and double quote characters into quoted pairs.  These 
    203    are the only characters that need to be quoted inside a quoted string. 
    204    Does not add the surrounding double quotes. 
    205    """ 
    206    return str.replace('\\', '\\\\').replace('"', '\\"') 
    207 
    208 
    209class AddrlistClass(object): 
    210    """Address parser class by Ben Escoto. 
    211 
    212    To understand what this class does, it helps to have a copy of RFC 2822 in 
    213    front of you. 
    214 
    215    Note: this class interface is deprecated and may be removed in the future. 
    216    Use email.utils.AddressList instead. 
    217    """ 
    218 
    219    def __init__(self, field): 
    220        """Initialize a new instance. 
    221 
    222        `field' is an unparsed address header field, containing 
    223        one or more addresses. 
    224        """ 
    225        self.specials = '()<>@,:;.\"[]' 
    226        self.pos = 0 
    227        self.LWS = ' \t' 
    228        self.CR = '\r\n' 
    229        self.FWS = self.LWS + self.CR 
    230        self.atomends = self.specials + self.LWS + self.CR 
    231        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it 
    232        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete 
    233        # syntax, so allow dots in phrases. 
    234        self.phraseends = self.atomends.replace('.', '') 
    235        self.field = field 
    236        self.commentlist = [] 
    237 
    238    def gotonext(self): 
    239        """Skip white space and extract comments.""" 
    240        wslist = [] 
    241        while self.pos < len(self.field): 
    242            if self.field[self.pos] in self.LWS + '\n\r': 
    243                if self.field[self.pos] not in '\n\r': 
    244                    wslist.append(self.field[self.pos]) 
    245                self.pos += 1 
    246            elif self.field[self.pos] == '(': 
    247                self.commentlist.append(self.getcomment()) 
    248            else: 
    249                break 
    250        return EMPTYSTRING.join(wslist) 
    251 
    252    def getaddrlist(self): 
    253        """Parse all addresses. 
    254 
    255        Returns a list containing all of the addresses. 
    256        """ 
    257        result = [] 
    258        while self.pos < len(self.field): 
    259            ad = self.getaddress() 
    260            if ad: 
    261                result += ad 
    262            else: 
    263                result.append(('', '')) 
    264        return result 
    265 
    266    def getaddress(self): 
    267        """Parse the next address.""" 
    268        self.commentlist = [] 
    269        self.gotonext() 
    270 
    271        oldpos = self.pos 
    272        oldcl = self.commentlist 
    273        plist = self.getphraselist() 
    274 
    275        self.gotonext() 
    276        returnlist = [] 
    277 
    278        if self.pos >= len(self.field): 
    279            # Bad email address technically, no domain. 
    280            if plist: 
    281                returnlist = [(SPACE.join(self.commentlist), plist[0])] 
    282 
    283        elif self.field[self.pos] in '.@': 
    284            # email address is just an addrspec 
    285            # this isn't very efficient since we start over 
    286            self.pos = oldpos 
    287            self.commentlist = oldcl 
    288            addrspec = self.getaddrspec() 
    289            returnlist = [(SPACE.join(self.commentlist), addrspec)] 
    290 
    291        elif self.field[self.pos] == ':': 
    292            # address is a group 
    293            returnlist = [] 
    294 
    295            fieldlen = len(self.field) 
    296            self.pos += 1 
    297            while self.pos < len(self.field): 
    298                self.gotonext() 
    299                if self.pos < fieldlen and self.field[self.pos] == ';': 
    300                    self.pos += 1 
    301                    break 
    302                returnlist = returnlist + self.getaddress() 
    303 
    304        elif self.field[self.pos] == '<': 
    305            # Address is a phrase then a route addr 
    306            routeaddr = self.getrouteaddr() 
    307 
    308            if self.commentlist: 
    309                returnlist = [(SPACE.join(plist) + ' (' + 
    310                               ' '.join(self.commentlist) + ')', routeaddr)] 
    311            else: 
    312                returnlist = [(SPACE.join(plist), routeaddr)] 
    313 
    314        else: 
    315            if plist: 
    316                returnlist = [(SPACE.join(self.commentlist), plist[0])] 
    317            elif self.field[self.pos] in self.specials: 
    318                self.pos += 1 
    319 
    320        self.gotonext() 
    321        if self.pos < len(self.field) and self.field[self.pos] == ',': 
    322            self.pos += 1 
    323        return returnlist 
    324 
    325    def getrouteaddr(self): 
    326        """Parse a route address (Return-path value). 
    327 
    328        This method just skips all the route stuff and returns the addrspec. 
    329        """ 
    330        if self.field[self.pos] != '<': 
    331            return 
    332 
    333        expectroute = False 
    334        self.pos += 1 
    335        self.gotonext() 
    336        adlist = '' 
    337        while self.pos < len(self.field): 
    338            if expectroute: 
    339                self.getdomain() 
    340                expectroute = False 
    341            elif self.field[self.pos] == '>': 
    342                self.pos += 1 
    343                break 
    344            elif self.field[self.pos] == '@': 
    345                self.pos += 1 
    346                expectroute = True 
    347            elif self.field[self.pos] == ':': 
    348                self.pos += 1 
    349            else: 
    350                adlist = self.getaddrspec() 
    351                self.pos += 1 
    352                break 
    353            self.gotonext() 
    354 
    355        return adlist 
    356 
    357    def getaddrspec(self): 
    358        """Parse an RFC 2822 addr-spec.""" 
    359        aslist = [] 
    360 
    361        self.gotonext() 
    362        while self.pos < len(self.field): 
    363            preserve_ws = True 
    364            if self.field[self.pos] == '.': 
    365                if aslist and not aslist[-1].strip(): 
    366                    aslist.pop() 
    367                aslist.append('.') 
    368                self.pos += 1 
    369                preserve_ws = False 
    370            elif self.field[self.pos] == '"': 
    371                aslist.append('"%s"' % quote(self.getquote())) 
    372            elif self.field[self.pos] in self.atomends: 
    373                if aslist and not aslist[-1].strip(): 
    374                    aslist.pop() 
    375                break 
    376            else: 
    377                aslist.append(self.getatom()) 
    378            ws = self.gotonext() 
    379            if preserve_ws and ws: 
    380                aslist.append(ws) 
    381 
    382        if self.pos >= len(self.field) or self.field[self.pos] != '@': 
    383            return EMPTYSTRING.join(aslist) 
    384 
    385        aslist.append('@') 
    386        self.pos += 1 
    387        self.gotonext() 
    388        return EMPTYSTRING.join(aslist) + self.getdomain() 
    389 
    390    def getdomain(self): 
    391        """Get the complete domain name from an address.""" 
    392        sdlist = [] 
    393        while self.pos < len(self.field): 
    394            if self.field[self.pos] in self.LWS: 
    395                self.pos += 1 
    396            elif self.field[self.pos] == '(': 
    397                self.commentlist.append(self.getcomment()) 
    398            elif self.field[self.pos] == '[': 
    399                sdlist.append(self.getdomainliteral()) 
    400            elif self.field[self.pos] == '.': 
    401                self.pos += 1 
    402                sdlist.append('.') 
    403            elif self.field[self.pos] in self.atomends: 
    404                break 
    405            else: 
    406                sdlist.append(self.getatom()) 
    407        return EMPTYSTRING.join(sdlist) 
    408 
    409    def getdelimited(self, beginchar, endchars, allowcomments=True): 
    410        """Parse a header fragment delimited by special characters. 
    411 
    412        `beginchar' is the start character for the fragment. 
    413        If self is not looking at an instance of `beginchar' then 
    414        getdelimited returns the empty string. 
    415 
    416        `endchars' is a sequence of allowable end-delimiting characters. 
    417        Parsing stops when one of these is encountered. 
    418 
    419        If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed 
    420        within the parsed fragment. 
    421        """ 
    422        if self.field[self.pos] != beginchar: 
    423            return '' 
    424 
    425        slist = [''] 
    426        quote = False 
    427        self.pos += 1 
    428        while self.pos < len(self.field): 
    429            if quote: 
    430                slist.append(self.field[self.pos]) 
    431                quote = False 
    432            elif self.field[self.pos] in endchars: 
    433                self.pos += 1 
    434                break 
    435            elif allowcomments and self.field[self.pos] == '(': 
    436                slist.append(self.getcomment()) 
    437                continue        # have already advanced pos from getcomment 
    438            elif self.field[self.pos] == '\\': 
    439                quote = True 
    440            else: 
    441                slist.append(self.field[self.pos]) 
    442            self.pos += 1 
    443 
    444        return EMPTYSTRING.join(slist) 
    445 
    446    def getquote(self): 
    447        """Get a quote-delimited fragment from self's field.""" 
    448        return self.getdelimited('"', '"\r', False) 
    449 
    450    def getcomment(self): 
    451        """Get a parenthesis-delimited fragment from self's field.""" 
    452        return self.getdelimited('(', ')\r', True) 
    453 
    454    def getdomainliteral(self): 
    455        """Parse an RFC 2822 domain-literal.""" 
    456        return '[%s]' % self.getdelimited('[', ']\r', False) 
    457 
    458    def getatom(self, atomends=None): 
    459        """Parse an RFC 2822 atom. 
    460 
    461        Optional atomends specifies a different set of end token delimiters 
    462        (the default is to use self.atomends).  This is used e.g. in 
    463        getphraselist() since phrase endings must not include the `.' (which 
    464        is legal in phrases).""" 
    465        atomlist = [''] 
    466        if atomends is None: 
    467            atomends = self.atomends 
    468 
    469        while self.pos < len(self.field): 
    470            if self.field[self.pos] in atomends: 
    471                break 
    472            else: 
    473                atomlist.append(self.field[self.pos]) 
    474            self.pos += 1 
    475 
    476        return EMPTYSTRING.join(atomlist) 
    477 
    478    def getphraselist(self): 
    479        """Parse a sequence of RFC 2822 phrases. 
    480 
    481        A phrase is a sequence of words, which are in turn either RFC 2822 
    482        atoms or quoted-strings.  Phrases are canonicalized by squeezing all 
    483        runs of continuous whitespace into one space. 
    484        """ 
    485        plist = [] 
    486 
    487        while self.pos < len(self.field): 
    488            if self.field[self.pos] in self.FWS: 
    489                self.pos += 1 
    490            elif self.field[self.pos] == '"': 
    491                plist.append(self.getquote()) 
    492            elif self.field[self.pos] == '(': 
    493                self.commentlist.append(self.getcomment()) 
    494            elif self.field[self.pos] in self.phraseends: 
    495                break 
    496            else: 
    497                plist.append(self.getatom(self.phraseends)) 
    498 
    499        return plist 
    500 
    501class AddressList(AddrlistClass): 
    502    """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" 
    503    def __init__(self, field): 
    504        AddrlistClass.__init__(self, field) 
    505        if field: 
    506            self.addresslist = self.getaddrlist() 
    507        else: 
    508            self.addresslist = [] 
    509 
    510    def __len__(self): 
    511        return len(self.addresslist) 
    512 
    513    def __add__(self, other): 
    514        # Set union 
    515        newaddr = AddressList(None) 
    516        newaddr.addresslist = self.addresslist[:] 
    517        for x in other.addresslist: 
    518            if not x in self.addresslist: 
    519                newaddr.addresslist.append(x) 
    520        return newaddr 
    521 
    522    def __iadd__(self, other): 
    523        # Set union, in-place 
    524        for x in other.addresslist: 
    525            if not x in self.addresslist: 
    526                self.addresslist.append(x) 
    527        return self 
    528 
    529    def __sub__(self, other): 
    530        # Set difference 
    531        newaddr = AddressList(None) 
    532        for x in self.addresslist: 
    533            if not x in other.addresslist: 
    534                newaddr.addresslist.append(x) 
    535        return newaddr 
    536 
    537    def __isub__(self, other): 
    538        # Set difference, in-place 
    539        for x in other.addresslist: 
    540            if x in self.addresslist: 
    541                self.addresslist.remove(x) 
    542        return self 
    543 
    544    def __getitem__(self, index): 
    545        # Make indexing, slices, and 'in' work 
    546        return self.addresslist[index]