1# Copyright (C) 2001-2010 Python Software Foundation 
    2# Author: Barry Warsaw 
    3# Contact: email-sig@python.org 
    4 
    5"""Miscellaneous utilities.""" 
    6 
    7from __future__ import unicode_literals 
    8from __future__ import division 
    9from __future__ import absolute_import 
    10from future import utils 
    11from future.builtins import bytes, int, str 
    12 
    13__all__ = [ 
    14    'collapse_rfc2231_value', 
    15    'decode_params', 
    16    'decode_rfc2231', 
    17    'encode_rfc2231', 
    18    'formataddr', 
    19    'formatdate', 
    20    'format_datetime', 
    21    'getaddresses', 
    22    'make_msgid', 
    23    'mktime_tz', 
    24    'parseaddr', 
    25    'parsedate', 
    26    'parsedate_tz', 
    27    'parsedate_to_datetime', 
    28    'unquote', 
    29    ] 
    30 
    31import os 
    32import re 
    33if utils.PY2: 
    34    re.ASCII = 0 
    35import time 
    36import base64 
    37import random 
    38import socket 
    39from future.backports import datetime 
    40from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote 
    41import warnings 
    42from io import StringIO 
    43 
    44from future.backports.email._parseaddr import quote 
    45from future.backports.email._parseaddr import AddressList as _AddressList 
    46from future.backports.email._parseaddr import mktime_tz 
    47 
    48from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz 
    49 
    50from quopri import decodestring as _qdecode 
    51 
    52# Intrapackage imports 
    53from future.backports.email.encoders import _bencode, _qencode 
    54from future.backports.email.charset import Charset 
    55 
    56COMMASPACE = ', ' 
    57EMPTYSTRING = '' 
    58UEMPTYSTRING = '' 
    59CRLF = '\r\n' 
    60TICK = "'" 
    61 
    62specialsre = re.compile(r'[][\\()<>@,:;".]') 
    63escapesre = re.compile(r'[\\"]') 
    64 
    65# How to figure out if we are processing strings that come from a byte 
    66# source with undecodable characters. 
    67_has_surrogates = re.compile( 
    68    '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search 
    69 
    70# How to deal with a string containing bytes before handing it to the 
    71# application through the 'normal' interface. 
    72def _sanitize(string): 
    73    # Turn any escaped bytes into unicode 'unknown' char. 
    74    original_bytes = string.encode('ascii', 'surrogateescape') 
    75    return original_bytes.decode('ascii', 'replace') 
    76 
    77 
    78# Helpers 
    79 
    80def formataddr(pair, charset='utf-8'): 
    81    """The inverse of parseaddr(), this takes a 2-tuple of the form 
    82    (realname, email_address) and returns the string value suitable 
    83    for an RFC 2822 From, To or Cc header. 
    84 
    85    If the first element of pair is false, then the second element is 
    86    returned unmodified. 
    87 
    88    Optional charset if given is the character set that is used to encode 
    89    realname in case realname is not ASCII safe.  Can be an instance of str or 
    90    a Charset-like object which has a header_encode method.  Default is 
    91    'utf-8'. 
    92    """ 
    93    name, address = pair 
    94    # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. 
    95    address.encode('ascii') 
    96    if name: 
    97        try: 
    98            name.encode('ascii') 
    99        except UnicodeEncodeError: 
    100            if isinstance(charset, str): 
    101                charset = Charset(charset) 
    102            encoded_name = charset.header_encode(name) 
    103            return "%s <%s>" % (encoded_name, address) 
    104        else: 
    105            quotes = '' 
    106            if specialsre.search(name): 
    107                quotes = '"' 
    108            name = escapesre.sub(r'\\\g<0>', name) 
    109            return '%s%s%s <%s>' % (quotes, name, quotes, address) 
    110    return address 
    111 
    112 
    113 
    114def getaddresses(fieldvalues): 
    115    """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" 
    116    all = COMMASPACE.join(fieldvalues) 
    117    a = _AddressList(all) 
    118    return a.addresslist 
    119 
    120 
    121 
    122ecre = re.compile(r''' 
    123  =\?                   # literal =? 
    124  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset 
    125  \?                    # literal ? 
    126  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive 
    127  \?                    # literal ? 
    128  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom 
    129  \?=                   # literal ?= 
    130  ''', re.VERBOSE | re.IGNORECASE) 
    131 
    132 
    133def _format_timetuple_and_zone(timetuple, zone): 
    134    return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( 
    135        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], 
    136        timetuple[2], 
    137        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
    138         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], 
    139        timetuple[0], timetuple[3], timetuple[4], timetuple[5], 
    140        zone) 
    141 
    142def formatdate(timeval=None, localtime=False, usegmt=False): 
    143    """Returns a date string as specified by RFC 2822, e.g.: 
    144 
    145    Fri, 09 Nov 2001 01:08:47 -0000 
    146 
    147    Optional timeval if given is a floating point time value as accepted by 
    148    gmtime() and localtime(), otherwise the current time is used. 
    149 
    150    Optional localtime is a flag that when True, interprets timeval, and 
    151    returns a date relative to the local timezone instead of UTC, properly 
    152    taking daylight savings time into account. 
    153 
    154    Optional argument usegmt means that the timezone is written out as 
    155    an ascii string, not numeric one (so "GMT" instead of "+0000"). This 
    156    is needed for HTTP, and is only used when localtime==False. 
    157    """ 
    158    # Note: we cannot use strftime() because that honors the locale and RFC 
    159    # 2822 requires that day and month names be the English abbreviations. 
    160    if timeval is None: 
    161        timeval = time.time() 
    162    if localtime: 
    163        now = time.localtime(timeval) 
    164        # Calculate timezone offset, based on whether the local zone has 
    165        # daylight savings time, and whether DST is in effect. 
    166        if time.daylight and now[-1]: 
    167            offset = time.altzone 
    168        else: 
    169            offset = time.timezone 
    170        hours, minutes = divmod(abs(offset), 3600) 
    171        # Remember offset is in seconds west of UTC, but the timezone is in 
    172        # minutes east of UTC, so the signs differ. 
    173        if offset > 0: 
    174            sign = '-' 
    175        else: 
    176            sign = '+' 
    177        zone = '%s%02d%02d' % (sign, hours, minutes // 60) 
    178    else: 
    179        now = time.gmtime(timeval) 
    180        # Timezone offset is always -0000 
    181        if usegmt: 
    182            zone = 'GMT' 
    183        else: 
    184            zone = '-0000' 
    185    return _format_timetuple_and_zone(now, zone) 
    186 
    187def format_datetime(dt, usegmt=False): 
    188    """Turn a datetime into a date string as specified in RFC 2822. 
    189 
    190    If usegmt is True, dt must be an aware datetime with an offset of zero.  In 
    191    this case 'GMT' will be rendered instead of the normal +0000 required by 
    192    RFC2822.  This is to support HTTP headers involving date stamps. 
    193    """ 
    194    now = dt.timetuple() 
    195    if usegmt: 
    196        if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: 
    197            raise ValueError("usegmt option requires a UTC datetime") 
    198        zone = 'GMT' 
    199    elif dt.tzinfo is None: 
    200        zone = '-0000' 
    201    else: 
    202        zone = dt.strftime("%z") 
    203    return _format_timetuple_and_zone(now, zone) 
    204 
    205 
    206def make_msgid(idstring=None, domain=None): 
    207    """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: 
    208 
    209    <20020201195627.33539.96671@nightshade.la.mastaler.com> 
    210 
    211    Optional idstring if given is a string used to strengthen the 
    212    uniqueness of the message id.  Optional domain if given provides the 
    213    portion of the message id after the '@'.  It defaults to the locally 
    214    defined hostname. 
    215    """ 
    216    timeval = time.time() 
    217    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) 
    218    pid = os.getpid() 
    219    randint = random.randrange(100000) 
    220    if idstring is None: 
    221        idstring = '' 
    222    else: 
    223        idstring = '.' + idstring 
    224    if domain is None: 
    225        domain = socket.getfqdn() 
    226    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain) 
    227    return msgid 
    228 
    229 
    230def parsedate_to_datetime(data): 
    231    _3to2list = list(_parsedate_tz(data)) 
    232    dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:] 
    233    if tz is None: 
    234        return datetime.datetime(*dtuple[:6]) 
    235    return datetime.datetime(*dtuple[:6], 
    236            tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) 
    237 
    238 
    239def parseaddr(addr): 
    240    addrs = _AddressList(addr).addresslist 
    241    if not addrs: 
    242        return '', '' 
    243    return addrs[0] 
    244 
    245 
    246# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. 
    247def unquote(str): 
    248    """Remove quotes from a string.""" 
    249    if len(str) > 1: 
    250        if str.startswith('"') and str.endswith('"'): 
    251            return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') 
    252        if str.startswith('<') and str.endswith('>'): 
    253            return str[1:-1] 
    254    return str 
    255 
    256 
    257 
    258# RFC2231-related functions - parameter encoding and decoding 
    259def decode_rfc2231(s): 
    260    """Decode string according to RFC 2231""" 
    261    parts = s.split(TICK, 2) 
    262    if len(parts) <= 2: 
    263        return None, None, s 
    264    return parts 
    265 
    266 
    267def encode_rfc2231(s, charset=None, language=None): 
    268    """Encode string according to RFC 2231. 
    269 
    270    If neither charset nor language is given, then s is returned as-is.  If 
    271    charset is given but not language, the string is encoded using the empty 
    272    string for language. 
    273    """ 
    274    s = url_quote(s, safe='', encoding=charset or 'ascii') 
    275    if charset is None and language is None: 
    276        return s 
    277    if language is None: 
    278        language = '' 
    279    return "%s'%s'%s" % (charset, language, s) 
    280 
    281 
    282rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$', 
    283    re.ASCII) 
    284 
    285def decode_params(params): 
    286    """Decode parameters list according to RFC 2231. 
    287 
    288    params is a sequence of 2-tuples containing (param name, string value). 
    289    """ 
    290    # Copy params so we don't mess with the original 
    291    params = params[:] 
    292    new_params = [] 
    293    # Map parameter's name to a list of continuations.  The values are a 
    294    # 3-tuple of the continuation number, the string value, and a flag 
    295    # specifying whether a particular segment is %-encoded. 
    296    rfc2231_params = {} 
    297    name, value = params.pop(0) 
    298    new_params.append((name, value)) 
    299    while params: 
    300        name, value = params.pop(0) 
    301        if name.endswith('*'): 
    302            encoded = True 
    303        else: 
    304            encoded = False 
    305        value = unquote(value) 
    306        mo = rfc2231_continuation.match(name) 
    307        if mo: 
    308            name, num = mo.group('name', 'num') 
    309            if num is not None: 
    310                num = int(num) 
    311            rfc2231_params.setdefault(name, []).append((num, value, encoded)) 
    312        else: 
    313            new_params.append((name, '"%s"' % quote(value))) 
    314    if rfc2231_params: 
    315        for name, continuations in rfc2231_params.items(): 
    316            value = [] 
    317            extended = False 
    318            # Sort by number 
    319            continuations.sort() 
    320            # And now append all values in numerical order, converting 
    321            # %-encodings for the encoded segments.  If any of the 
    322            # continuation names ends in a *, then the entire string, after 
    323            # decoding segments and concatenating, must have the charset and 
    324            # language specifiers at the beginning of the string. 
    325            for num, s, encoded in continuations: 
    326                if encoded: 
    327                    # Decode as "latin-1", so the characters in s directly 
    328                    # represent the percent-encoded octet values. 
    329                    # collapse_rfc2231_value treats this as an octet sequence. 
    330                    s = url_unquote(s, encoding="latin-1") 
    331                    extended = True 
    332                value.append(s) 
    333            value = quote(EMPTYSTRING.join(value)) 
    334            if extended: 
    335                charset, language, value = decode_rfc2231(value) 
    336                new_params.append((name, (charset, language, '"%s"' % value))) 
    337            else: 
    338                new_params.append((name, '"%s"' % value)) 
    339    return new_params 
    340 
    341def collapse_rfc2231_value(value, errors='replace', 
    342                           fallback_charset='us-ascii'): 
    343    if not isinstance(value, tuple) or len(value) != 3: 
    344        return unquote(value) 
    345    # While value comes to us as a unicode string, we need it to be a bytes 
    346    # object.  We do not want bytes() normal utf-8 decoder, we want a straight 
    347    # interpretation of the string as character bytes. 
    348    charset, language, text = value 
    349    rawbytes = bytes(text, 'raw-unicode-escape') 
    350    try: 
    351        return str(rawbytes, charset, errors) 
    352    except LookupError: 
    353        # charset is not a known codec. 
    354        return unquote(text) 
    355 
    356 
    357# 
    358# datetime doesn't provide a localtime function yet, so provide one.  Code 
    359# adapted from the patch in issue 9527.  This may not be perfect, but it is 
    360# better than not having it. 
    361# 
    362 
    363def localtime(dt=None, isdst=-1): 
    364    """Return local time as an aware datetime object. 
    365 
    366    If called without arguments, return current time.  Otherwise *dt* 
    367    argument should be a datetime instance, and it is converted to the 
    368    local time zone according to the system time zone database.  If *dt* is 
    369    naive (that is, dt.tzinfo is None), it is assumed to be in local time. 
    370    In this case, a positive or zero value for *isdst* causes localtime to 
    371    presume initially that summer time (for example, Daylight Saving Time) 
    372    is or is not (respectively) in effect for the specified time.  A 
    373    negative value for *isdst* causes the localtime() function to attempt 
    374    to divine whether summer time is in effect for the specified time. 
    375 
    376    """ 
    377    if dt is None: 
    378        return datetime.datetime.now(datetime.timezone.utc).astimezone() 
    379    if dt.tzinfo is not None: 
    380        return dt.astimezone() 
    381    # We have a naive datetime.  Convert to a (localtime) timetuple and pass to 
    382    # system mktime together with the isdst hint.  System mktime will return 
    383    # seconds since epoch. 
    384    tm = dt.timetuple()[:-1] + (isdst,) 
    385    seconds = time.mktime(tm) 
    386    localtm = time.localtime(seconds) 
    387    try: 
    388        delta = datetime.timedelta(seconds=localtm.tm_gmtoff) 
    389        tz = datetime.timezone(delta, localtm.tm_zone) 
    390    except AttributeError: 
    391        # Compute UTC offset and compare with the value implied by tm_isdst. 
    392        # If the values match, use the zone name implied by tm_isdst. 
    393        delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) 
    394        dst = time.daylight and localtm.tm_isdst > 0 
    395        gmtoff = -(time.altzone if dst else time.timezone) 
    396        if delta == datetime.timedelta(seconds=gmtoff): 
    397            tz = datetime.timezone(delta, time.tzname[dst]) 
    398        else: 
    399            tz = datetime.timezone(delta) 
    400    return dt.replace(tzinfo=tz)