Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/future/backports/email/utils.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

196 statements  

1# Copyright (C) 2001-2010 Python Software Foundation 

2# Author: Barry Warsaw 

3# Contact: email-sig@python.org 

4 

5"""Miscellaneous utilities.""" 

6 

7from __future__ import unicode_literals 

8from __future__ import division 

9from __future__ import absolute_import 

10from future import utils 

11from future.builtins import bytes, int, str 

12 

13__all__ = [ 

14 'collapse_rfc2231_value', 

15 'decode_params', 

16 'decode_rfc2231', 

17 'encode_rfc2231', 

18 'formataddr', 

19 'formatdate', 

20 'format_datetime', 

21 'getaddresses', 

22 'make_msgid', 

23 'mktime_tz', 

24 'parseaddr', 

25 'parsedate', 

26 'parsedate_tz', 

27 'parsedate_to_datetime', 

28 'unquote', 

29 ] 

30 

31import os 

32import re 

33if utils.PY2: 

34 re.ASCII = 0 

35import time 

36import base64 

37import random 

38import socket 

39from future.backports import datetime 

40from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote 

41import warnings 

42from io import StringIO 

43 

44from future.backports.email._parseaddr import quote 

45from future.backports.email._parseaddr import AddressList as _AddressList 

46from future.backports.email._parseaddr import mktime_tz 

47 

48from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz 

49 

50from quopri import decodestring as _qdecode 

51 

52# Intrapackage imports 

53from future.backports.email.encoders import _bencode, _qencode 

54from future.backports.email.charset import Charset 

55 

56COMMASPACE = ', ' 

57EMPTYSTRING = '' 

58UEMPTYSTRING = '' 

59CRLF = '\r\n' 

60TICK = "'" 

61 

62specialsre = re.compile(r'[][\\()<>@,:;".]') 

63escapesre = re.compile(r'[\\"]') 

64 

65# How to figure out if we are processing strings that come from a byte 

66# source with undecodable characters. 

67_has_surrogates = re.compile( 

68 '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search 

69 

70# How to deal with a string containing bytes before handing it to the 

71# application through the 'normal' interface. 

72def _sanitize(string): 

73 # Turn any escaped bytes into unicode 'unknown' char. 

74 original_bytes = string.encode('ascii', 'surrogateescape') 

75 return original_bytes.decode('ascii', 'replace') 

76 

77 

78# Helpers 

79 

80def formataddr(pair, charset='utf-8'): 

81 """The inverse of parseaddr(), this takes a 2-tuple of the form 

82 (realname, email_address) and returns the string value suitable 

83 for an RFC 2822 From, To or Cc header. 

84 

85 If the first element of pair is false, then the second element is 

86 returned unmodified. 

87 

88 Optional charset if given is the character set that is used to encode 

89 realname in case realname is not ASCII safe. Can be an instance of str or 

90 a Charset-like object which has a header_encode method. Default is 

91 'utf-8'. 

92 """ 

93 name, address = pair 

94 # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. 

95 address.encode('ascii') 

96 if name: 

97 try: 

98 name.encode('ascii') 

99 except UnicodeEncodeError: 

100 if isinstance(charset, str): 

101 charset = Charset(charset) 

102 encoded_name = charset.header_encode(name) 

103 return "%s <%s>" % (encoded_name, address) 

104 else: 

105 quotes = '' 

106 if specialsre.search(name): 

107 quotes = '"' 

108 name = escapesre.sub(r'\\\g<0>', name) 

109 return '%s%s%s <%s>' % (quotes, name, quotes, address) 

110 return address 

111 

112 

113 

114def getaddresses(fieldvalues): 

115 """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" 

116 all = COMMASPACE.join(fieldvalues) 

117 a = _AddressList(all) 

118 return a.addresslist 

119 

120 

121 

122ecre = re.compile(r''' 

123 =\? # literal =? 

124 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset 

125 \? # literal ? 

126 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive 

127 \? # literal ? 

128 (?P<atom>.*?) # non-greedy up to the next ?= is the atom 

129 \?= # literal ?= 

130 ''', re.VERBOSE | re.IGNORECASE) 

131 

132 

133def _format_timetuple_and_zone(timetuple, zone): 

134 return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( 

135 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], 

136 timetuple[2], 

137 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 

138 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], 

139 timetuple[0], timetuple[3], timetuple[4], timetuple[5], 

140 zone) 

141 

142def formatdate(timeval=None, localtime=False, usegmt=False): 

143 """Returns a date string as specified by RFC 2822, e.g.: 

144 

145 Fri, 09 Nov 2001 01:08:47 -0000 

146 

147 Optional timeval if given is a floating point time value as accepted by 

148 gmtime() and localtime(), otherwise the current time is used. 

149 

150 Optional localtime is a flag that when True, interprets timeval, and 

151 returns a date relative to the local timezone instead of UTC, properly 

152 taking daylight savings time into account. 

153 

154 Optional argument usegmt means that the timezone is written out as 

155 an ascii string, not numeric one (so "GMT" instead of "+0000"). This 

156 is needed for HTTP, and is only used when localtime==False. 

157 """ 

158 # Note: we cannot use strftime() because that honors the locale and RFC 

159 # 2822 requires that day and month names be the English abbreviations. 

160 if timeval is None: 

161 timeval = time.time() 

162 if localtime: 

163 now = time.localtime(timeval) 

164 # Calculate timezone offset, based on whether the local zone has 

165 # daylight savings time, and whether DST is in effect. 

166 if time.daylight and now[-1]: 

167 offset = time.altzone 

168 else: 

169 offset = time.timezone 

170 hours, minutes = divmod(abs(offset), 3600) 

171 # Remember offset is in seconds west of UTC, but the timezone is in 

172 # minutes east of UTC, so the signs differ. 

173 if offset > 0: 

174 sign = '-' 

175 else: 

176 sign = '+' 

177 zone = '%s%02d%02d' % (sign, hours, minutes // 60) 

178 else: 

179 now = time.gmtime(timeval) 

180 # Timezone offset is always -0000 

181 if usegmt: 

182 zone = 'GMT' 

183 else: 

184 zone = '-0000' 

185 return _format_timetuple_and_zone(now, zone) 

186 

187def format_datetime(dt, usegmt=False): 

188 """Turn a datetime into a date string as specified in RFC 2822. 

189 

190 If usegmt is True, dt must be an aware datetime with an offset of zero. In 

191 this case 'GMT' will be rendered instead of the normal +0000 required by 

192 RFC2822. This is to support HTTP headers involving date stamps. 

193 """ 

194 now = dt.timetuple() 

195 if usegmt: 

196 if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: 

197 raise ValueError("usegmt option requires a UTC datetime") 

198 zone = 'GMT' 

199 elif dt.tzinfo is None: 

200 zone = '-0000' 

201 else: 

202 zone = dt.strftime("%z") 

203 return _format_timetuple_and_zone(now, zone) 

204 

205 

206def make_msgid(idstring=None, domain=None): 

207 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: 

208 

209 <20020201195627.33539.96671@nightshade.la.mastaler.com> 

210 

211 Optional idstring if given is a string used to strengthen the 

212 uniqueness of the message id. Optional domain if given provides the 

213 portion of the message id after the '@'. It defaults to the locally 

214 defined hostname. 

215 """ 

216 timeval = time.time() 

217 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) 

218 pid = os.getpid() 

219 randint = random.randrange(100000) 

220 if idstring is None: 

221 idstring = '' 

222 else: 

223 idstring = '.' + idstring 

224 if domain is None: 

225 domain = socket.getfqdn() 

226 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain) 

227 return msgid 

228 

229 

230def parsedate_to_datetime(data): 

231 _3to2list = list(_parsedate_tz(data)) 

232 dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:] 

233 if tz is None: 

234 return datetime.datetime(*dtuple[:6]) 

235 return datetime.datetime(*dtuple[:6], 

236 tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) 

237 

238 

239def parseaddr(addr): 

240 addrs = _AddressList(addr).addresslist 

241 if not addrs: 

242 return '', '' 

243 return addrs[0] 

244 

245 

246# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. 

247def unquote(str): 

248 """Remove quotes from a string.""" 

249 if len(str) > 1: 

250 if str.startswith('"') and str.endswith('"'): 

251 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') 

252 if str.startswith('<') and str.endswith('>'): 

253 return str[1:-1] 

254 return str 

255 

256 

257 

258# RFC2231-related functions - parameter encoding and decoding 

259def decode_rfc2231(s): 

260 """Decode string according to RFC 2231""" 

261 parts = s.split(TICK, 2) 

262 if len(parts) <= 2: 

263 return None, None, s 

264 return parts 

265 

266 

267def encode_rfc2231(s, charset=None, language=None): 

268 """Encode string according to RFC 2231. 

269 

270 If neither charset nor language is given, then s is returned as-is. If 

271 charset is given but not language, the string is encoded using the empty 

272 string for language. 

273 """ 

274 s = url_quote(s, safe='', encoding=charset or 'ascii') 

275 if charset is None and language is None: 

276 return s 

277 if language is None: 

278 language = '' 

279 return "%s'%s'%s" % (charset, language, s) 

280 

281 

282rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$', 

283 re.ASCII) 

284 

285def decode_params(params): 

286 """Decode parameters list according to RFC 2231. 

287 

288 params is a sequence of 2-tuples containing (param name, string value). 

289 """ 

290 # Copy params so we don't mess with the original 

291 params = params[:] 

292 new_params = [] 

293 # Map parameter's name to a list of continuations. The values are a 

294 # 3-tuple of the continuation number, the string value, and a flag 

295 # specifying whether a particular segment is %-encoded. 

296 rfc2231_params = {} 

297 name, value = params.pop(0) 

298 new_params.append((name, value)) 

299 while params: 

300 name, value = params.pop(0) 

301 if name.endswith('*'): 

302 encoded = True 

303 else: 

304 encoded = False 

305 value = unquote(value) 

306 mo = rfc2231_continuation.match(name) 

307 if mo: 

308 name, num = mo.group('name', 'num') 

309 if num is not None: 

310 num = int(num) 

311 rfc2231_params.setdefault(name, []).append((num, value, encoded)) 

312 else: 

313 new_params.append((name, '"%s"' % quote(value))) 

314 if rfc2231_params: 

315 for name, continuations in rfc2231_params.items(): 

316 value = [] 

317 extended = False 

318 # Sort by number 

319 continuations.sort() 

320 # And now append all values in numerical order, converting 

321 # %-encodings for the encoded segments. If any of the 

322 # continuation names ends in a *, then the entire string, after 

323 # decoding segments and concatenating, must have the charset and 

324 # language specifiers at the beginning of the string. 

325 for num, s, encoded in continuations: 

326 if encoded: 

327 # Decode as "latin-1", so the characters in s directly 

328 # represent the percent-encoded octet values. 

329 # collapse_rfc2231_value treats this as an octet sequence. 

330 s = url_unquote(s, encoding="latin-1") 

331 extended = True 

332 value.append(s) 

333 value = quote(EMPTYSTRING.join(value)) 

334 if extended: 

335 charset, language, value = decode_rfc2231(value) 

336 new_params.append((name, (charset, language, '"%s"' % value))) 

337 else: 

338 new_params.append((name, '"%s"' % value)) 

339 return new_params 

340 

341def collapse_rfc2231_value(value, errors='replace', 

342 fallback_charset='us-ascii'): 

343 if not isinstance(value, tuple) or len(value) != 3: 

344 return unquote(value) 

345 # While value comes to us as a unicode string, we need it to be a bytes 

346 # object. We do not want bytes() normal utf-8 decoder, we want a straight 

347 # interpretation of the string as character bytes. 

348 charset, language, text = value 

349 rawbytes = bytes(text, 'raw-unicode-escape') 

350 try: 

351 return str(rawbytes, charset, errors) 

352 except LookupError: 

353 # charset is not a known codec. 

354 return unquote(text) 

355 

356 

357# 

358# datetime doesn't provide a localtime function yet, so provide one. Code 

359# adapted from the patch in issue 9527. This may not be perfect, but it is 

360# better than not having it. 

361# 

362 

363def localtime(dt=None, isdst=-1): 

364 """Return local time as an aware datetime object. 

365 

366 If called without arguments, return current time. Otherwise *dt* 

367 argument should be a datetime instance, and it is converted to the 

368 local time zone according to the system time zone database. If *dt* is 

369 naive (that is, dt.tzinfo is None), it is assumed to be in local time. 

370 In this case, a positive or zero value for *isdst* causes localtime to 

371 presume initially that summer time (for example, Daylight Saving Time) 

372 is or is not (respectively) in effect for the specified time. A 

373 negative value for *isdst* causes the localtime() function to attempt 

374 to divine whether summer time is in effect for the specified time. 

375 

376 """ 

377 if dt is None: 

378 return datetime.datetime.now(datetime.timezone.utc).astimezone() 

379 if dt.tzinfo is not None: 

380 return dt.astimezone() 

381 # We have a naive datetime. Convert to a (localtime) timetuple and pass to 

382 # system mktime together with the isdst hint. System mktime will return 

383 # seconds since epoch. 

384 tm = dt.timetuple()[:-1] + (isdst,) 

385 seconds = time.mktime(tm) 

386 localtm = time.localtime(seconds) 

387 try: 

388 delta = datetime.timedelta(seconds=localtm.tm_gmtoff) 

389 tz = datetime.timezone(delta, localtm.tm_zone) 

390 except AttributeError: 

391 # Compute UTC offset and compare with the value implied by tm_isdst. 

392 # If the values match, use the zone name implied by tm_isdst. 

393 delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) 

394 dst = time.daylight and localtm.tm_isdst > 0 

395 gmtoff = -(time.altzone if dst else time.timezone) 

396 if delta == datetime.timedelta(seconds=gmtoff): 

397 tz = datetime.timezone(delta, time.tzname[dst]) 

398 else: 

399 tz = datetime.timezone(delta) 

400 return dt.replace(tzinfo=tz)