Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/future/backports/email/_parseaddr.py: 12%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

340 statements  

1# Copyright (C) 2002-2007 Python Software Foundation 

2# Contact: email-sig@python.org 

3 

4"""Email address parsing code. 

5 

6Lifted directly from rfc822.py. This should eventually be rewritten. 

7""" 

8 

9from __future__ import unicode_literals 

10from __future__ import print_function 

11from __future__ import division 

12from __future__ import absolute_import 

13from future.builtins import int 

14 

15__all__ = [ 

16 'mktime_tz', 

17 'parsedate', 

18 'parsedate_tz', 

19 'quote', 

20 ] 

21 

22import time, calendar 

23 

24SPACE = ' ' 

25EMPTYSTRING = '' 

26COMMASPACE = ', ' 

27 

28# Parse a date field 

29_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 

30 'aug', 'sep', 'oct', 'nov', 'dec', 

31 'january', 'february', 'march', 'april', 'may', 'june', 'july', 

32 'august', 'september', 'october', 'november', 'december'] 

33 

34_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] 

35 

36# The timezone table does not include the military time zones defined 

37# in RFC822, other than Z. According to RFC1123, the description in 

38# RFC822 gets the signs wrong, so we can't rely on any such time 

39# zones. RFC1123 recommends that numeric timezone indicators be used 

40# instead of timezone names. 

41 

42_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, 

43 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) 

44 'EST': -500, 'EDT': -400, # Eastern 

45 'CST': -600, 'CDT': -500, # Central 

46 'MST': -700, 'MDT': -600, # Mountain 

47 'PST': -800, 'PDT': -700 # Pacific 

48 } 

49 

50 

51def parsedate_tz(data): 

52 """Convert a date string to a time tuple. 

53 

54 Accounts for military timezones. 

55 """ 

56 res = _parsedate_tz(data) 

57 if not res: 

58 return 

59 if res[9] is None: 

60 res[9] = 0 

61 return tuple(res) 

62 

63def _parsedate_tz(data): 

64 """Convert date to extended time tuple. 

65 

66 The last (additional) element is the time zone offset in seconds, except if 

67 the timezone was specified as -0000. In that case the last element is 

68 None. This indicates a UTC timestamp that explicitly declaims knowledge of 

69 the source timezone, as opposed to a +0000 timestamp that indicates the 

70 source timezone really was UTC. 

71 

72 """ 

73 if not data: 

74 return 

75 data = data.split() 

76 # The FWS after the comma after the day-of-week is optional, so search and 

77 # adjust for this. 

78 if data[0].endswith(',') or data[0].lower() in _daynames: 

79 # There's a dayname here. Skip it 

80 del data[0] 

81 else: 

82 i = data[0].rfind(',') 

83 if i >= 0: 

84 data[0] = data[0][i+1:] 

85 if len(data) == 3: # RFC 850 date, deprecated 

86 stuff = data[0].split('-') 

87 if len(stuff) == 3: 

88 data = stuff + data[1:] 

89 if len(data) == 4: 

90 s = data[3] 

91 i = s.find('+') 

92 if i == -1: 

93 i = s.find('-') 

94 if i > 0: 

95 data[3:] = [s[:i], s[i:]] 

96 else: 

97 data.append('') # Dummy tz 

98 if len(data) < 5: 

99 return None 

100 data = data[:5] 

101 [dd, mm, yy, tm, tz] = data 

102 mm = mm.lower() 

103 if mm not in _monthnames: 

104 dd, mm = mm, dd.lower() 

105 if mm not in _monthnames: 

106 return None 

107 mm = _monthnames.index(mm) + 1 

108 if mm > 12: 

109 mm -= 12 

110 if dd[-1] == ',': 

111 dd = dd[:-1] 

112 i = yy.find(':') 

113 if i > 0: 

114 yy, tm = tm, yy 

115 if yy[-1] == ',': 

116 yy = yy[:-1] 

117 if not yy[0].isdigit(): 

118 yy, tz = tz, yy 

119 if tm[-1] == ',': 

120 tm = tm[:-1] 

121 tm = tm.split(':') 

122 if len(tm) == 2: 

123 [thh, tmm] = tm 

124 tss = '0' 

125 elif len(tm) == 3: 

126 [thh, tmm, tss] = tm 

127 elif len(tm) == 1 and '.' in tm[0]: 

128 # Some non-compliant MUAs use '.' to separate time elements. 

129 tm = tm[0].split('.') 

130 if len(tm) == 2: 

131 [thh, tmm] = tm 

132 tss = 0 

133 elif len(tm) == 3: 

134 [thh, tmm, tss] = tm 

135 else: 

136 return None 

137 try: 

138 yy = int(yy) 

139 dd = int(dd) 

140 thh = int(thh) 

141 tmm = int(tmm) 

142 tss = int(tss) 

143 except ValueError: 

144 return None 

145 # Check for a yy specified in two-digit format, then convert it to the 

146 # appropriate four-digit format, according to the POSIX standard. RFC 822 

147 # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) 

148 # mandates a 4-digit yy. For more information, see the documentation for 

149 # the time module. 

150 if yy < 100: 

151 # The year is between 1969 and 1999 (inclusive). 

152 if yy > 68: 

153 yy += 1900 

154 # The year is between 2000 and 2068 (inclusive). 

155 else: 

156 yy += 2000 

157 tzoffset = None 

158 tz = tz.upper() 

159 if tz in _timezones: 

160 tzoffset = _timezones[tz] 

161 else: 

162 try: 

163 tzoffset = int(tz) 

164 except ValueError: 

165 pass 

166 if tzoffset==0 and tz.startswith('-'): 

167 tzoffset = None 

168 # Convert a timezone offset into seconds ; -0500 -> -18000 

169 if tzoffset: 

170 if tzoffset < 0: 

171 tzsign = -1 

172 tzoffset = -tzoffset 

173 else: 

174 tzsign = 1 

175 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) 

176 # Daylight Saving Time flag is set to -1, since DST is unknown. 

177 return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] 

178 

179 

180def parsedate(data): 

181 """Convert a time string to a time tuple.""" 

182 t = parsedate_tz(data) 

183 if isinstance(t, tuple): 

184 return t[:9] 

185 else: 

186 return t 

187 

188 

189def mktime_tz(data): 

190 """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" 

191 if data[9] is None: 

192 # No zone info, so localtime is better assumption than GMT 

193 return time.mktime(data[:8] + (-1,)) 

194 else: 

195 t = calendar.timegm(data) 

196 return t - data[9] 

197 

198 

199def quote(str): 

200 """Prepare string to be used in a quoted string. 

201 

202 Turns backslash and double quote characters into quoted pairs. These 

203 are the only characters that need to be quoted inside a quoted string. 

204 Does not add the surrounding double quotes. 

205 """ 

206 return str.replace('\\', '\\\\').replace('"', '\\"') 

207 

208 

209class AddrlistClass(object): 

210 """Address parser class by Ben Escoto. 

211 

212 To understand what this class does, it helps to have a copy of RFC 2822 in 

213 front of you. 

214 

215 Note: this class interface is deprecated and may be removed in the future. 

216 Use email.utils.AddressList instead. 

217 """ 

218 

219 def __init__(self, field): 

220 """Initialize a new instance. 

221 

222 `field' is an unparsed address header field, containing 

223 one or more addresses. 

224 """ 

225 self.specials = '()<>@,:;.\"[]' 

226 self.pos = 0 

227 self.LWS = ' \t' 

228 self.CR = '\r\n' 

229 self.FWS = self.LWS + self.CR 

230 self.atomends = self.specials + self.LWS + self.CR 

231 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it 

232 # is obsolete syntax. RFC 2822 requires that we recognize obsolete 

233 # syntax, so allow dots in phrases. 

234 self.phraseends = self.atomends.replace('.', '') 

235 self.field = field 

236 self.commentlist = [] 

237 

238 def gotonext(self): 

239 """Skip white space and extract comments.""" 

240 wslist = [] 

241 while self.pos < len(self.field): 

242 if self.field[self.pos] in self.LWS + '\n\r': 

243 if self.field[self.pos] not in '\n\r': 

244 wslist.append(self.field[self.pos]) 

245 self.pos += 1 

246 elif self.field[self.pos] == '(': 

247 self.commentlist.append(self.getcomment()) 

248 else: 

249 break 

250 return EMPTYSTRING.join(wslist) 

251 

252 def getaddrlist(self): 

253 """Parse all addresses. 

254 

255 Returns a list containing all of the addresses. 

256 """ 

257 result = [] 

258 while self.pos < len(self.field): 

259 ad = self.getaddress() 

260 if ad: 

261 result += ad 

262 else: 

263 result.append(('', '')) 

264 return result 

265 

266 def getaddress(self): 

267 """Parse the next address.""" 

268 self.commentlist = [] 

269 self.gotonext() 

270 

271 oldpos = self.pos 

272 oldcl = self.commentlist 

273 plist = self.getphraselist() 

274 

275 self.gotonext() 

276 returnlist = [] 

277 

278 if self.pos >= len(self.field): 

279 # Bad email address technically, no domain. 

280 if plist: 

281 returnlist = [(SPACE.join(self.commentlist), plist[0])] 

282 

283 elif self.field[self.pos] in '.@': 

284 # email address is just an addrspec 

285 # this isn't very efficient since we start over 

286 self.pos = oldpos 

287 self.commentlist = oldcl 

288 addrspec = self.getaddrspec() 

289 returnlist = [(SPACE.join(self.commentlist), addrspec)] 

290 

291 elif self.field[self.pos] == ':': 

292 # address is a group 

293 returnlist = [] 

294 

295 fieldlen = len(self.field) 

296 self.pos += 1 

297 while self.pos < len(self.field): 

298 self.gotonext() 

299 if self.pos < fieldlen and self.field[self.pos] == ';': 

300 self.pos += 1 

301 break 

302 returnlist = returnlist + self.getaddress() 

303 

304 elif self.field[self.pos] == '<': 

305 # Address is a phrase then a route addr 

306 routeaddr = self.getrouteaddr() 

307 

308 if self.commentlist: 

309 returnlist = [(SPACE.join(plist) + ' (' + 

310 ' '.join(self.commentlist) + ')', routeaddr)] 

311 else: 

312 returnlist = [(SPACE.join(plist), routeaddr)] 

313 

314 else: 

315 if plist: 

316 returnlist = [(SPACE.join(self.commentlist), plist[0])] 

317 elif self.field[self.pos] in self.specials: 

318 self.pos += 1 

319 

320 self.gotonext() 

321 if self.pos < len(self.field) and self.field[self.pos] == ',': 

322 self.pos += 1 

323 return returnlist 

324 

325 def getrouteaddr(self): 

326 """Parse a route address (Return-path value). 

327 

328 This method just skips all the route stuff and returns the addrspec. 

329 """ 

330 if self.field[self.pos] != '<': 

331 return 

332 

333 expectroute = False 

334 self.pos += 1 

335 self.gotonext() 

336 adlist = '' 

337 while self.pos < len(self.field): 

338 if expectroute: 

339 self.getdomain() 

340 expectroute = False 

341 elif self.field[self.pos] == '>': 

342 self.pos += 1 

343 break 

344 elif self.field[self.pos] == '@': 

345 self.pos += 1 

346 expectroute = True 

347 elif self.field[self.pos] == ':': 

348 self.pos += 1 

349 else: 

350 adlist = self.getaddrspec() 

351 self.pos += 1 

352 break 

353 self.gotonext() 

354 

355 return adlist 

356 

357 def getaddrspec(self): 

358 """Parse an RFC 2822 addr-spec.""" 

359 aslist = [] 

360 

361 self.gotonext() 

362 while self.pos < len(self.field): 

363 preserve_ws = True 

364 if self.field[self.pos] == '.': 

365 if aslist and not aslist[-1].strip(): 

366 aslist.pop() 

367 aslist.append('.') 

368 self.pos += 1 

369 preserve_ws = False 

370 elif self.field[self.pos] == '"': 

371 aslist.append('"%s"' % quote(self.getquote())) 

372 elif self.field[self.pos] in self.atomends: 

373 if aslist and not aslist[-1].strip(): 

374 aslist.pop() 

375 break 

376 else: 

377 aslist.append(self.getatom()) 

378 ws = self.gotonext() 

379 if preserve_ws and ws: 

380 aslist.append(ws) 

381 

382 if self.pos >= len(self.field) or self.field[self.pos] != '@': 

383 return EMPTYSTRING.join(aslist) 

384 

385 aslist.append('@') 

386 self.pos += 1 

387 self.gotonext() 

388 return EMPTYSTRING.join(aslist) + self.getdomain() 

389 

390 def getdomain(self): 

391 """Get the complete domain name from an address.""" 

392 sdlist = [] 

393 while self.pos < len(self.field): 

394 if self.field[self.pos] in self.LWS: 

395 self.pos += 1 

396 elif self.field[self.pos] == '(': 

397 self.commentlist.append(self.getcomment()) 

398 elif self.field[self.pos] == '[': 

399 sdlist.append(self.getdomainliteral()) 

400 elif self.field[self.pos] == '.': 

401 self.pos += 1 

402 sdlist.append('.') 

403 elif self.field[self.pos] in self.atomends: 

404 break 

405 else: 

406 sdlist.append(self.getatom()) 

407 return EMPTYSTRING.join(sdlist) 

408 

409 def getdelimited(self, beginchar, endchars, allowcomments=True): 

410 """Parse a header fragment delimited by special characters. 

411 

412 `beginchar' is the start character for the fragment. 

413 If self is not looking at an instance of `beginchar' then 

414 getdelimited returns the empty string. 

415 

416 `endchars' is a sequence of allowable end-delimiting characters. 

417 Parsing stops when one of these is encountered. 

418 

419 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed 

420 within the parsed fragment. 

421 """ 

422 if self.field[self.pos] != beginchar: 

423 return '' 

424 

425 slist = [''] 

426 quote = False 

427 self.pos += 1 

428 while self.pos < len(self.field): 

429 if quote: 

430 slist.append(self.field[self.pos]) 

431 quote = False 

432 elif self.field[self.pos] in endchars: 

433 self.pos += 1 

434 break 

435 elif allowcomments and self.field[self.pos] == '(': 

436 slist.append(self.getcomment()) 

437 continue # have already advanced pos from getcomment 

438 elif self.field[self.pos] == '\\': 

439 quote = True 

440 else: 

441 slist.append(self.field[self.pos]) 

442 self.pos += 1 

443 

444 return EMPTYSTRING.join(slist) 

445 

446 def getquote(self): 

447 """Get a quote-delimited fragment from self's field.""" 

448 return self.getdelimited('"', '"\r', False) 

449 

450 def getcomment(self): 

451 """Get a parenthesis-delimited fragment from self's field.""" 

452 return self.getdelimited('(', ')\r', True) 

453 

454 def getdomainliteral(self): 

455 """Parse an RFC 2822 domain-literal.""" 

456 return '[%s]' % self.getdelimited('[', ']\r', False) 

457 

458 def getatom(self, atomends=None): 

459 """Parse an RFC 2822 atom. 

460 

461 Optional atomends specifies a different set of end token delimiters 

462 (the default is to use self.atomends). This is used e.g. in 

463 getphraselist() since phrase endings must not include the `.' (which 

464 is legal in phrases).""" 

465 atomlist = [''] 

466 if atomends is None: 

467 atomends = self.atomends 

468 

469 while self.pos < len(self.field): 

470 if self.field[self.pos] in atomends: 

471 break 

472 else: 

473 atomlist.append(self.field[self.pos]) 

474 self.pos += 1 

475 

476 return EMPTYSTRING.join(atomlist) 

477 

478 def getphraselist(self): 

479 """Parse a sequence of RFC 2822 phrases. 

480 

481 A phrase is a sequence of words, which are in turn either RFC 2822 

482 atoms or quoted-strings. Phrases are canonicalized by squeezing all 

483 runs of continuous whitespace into one space. 

484 """ 

485 plist = [] 

486 

487 while self.pos < len(self.field): 

488 if self.field[self.pos] in self.FWS: 

489 self.pos += 1 

490 elif self.field[self.pos] == '"': 

491 plist.append(self.getquote()) 

492 elif self.field[self.pos] == '(': 

493 self.commentlist.append(self.getcomment()) 

494 elif self.field[self.pos] in self.phraseends: 

495 break 

496 else: 

497 plist.append(self.getatom(self.phraseends)) 

498 

499 return plist 

500 

501class AddressList(AddrlistClass): 

502 """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" 

503 def __init__(self, field): 

504 AddrlistClass.__init__(self, field) 

505 if field: 

506 self.addresslist = self.getaddrlist() 

507 else: 

508 self.addresslist = [] 

509 

510 def __len__(self): 

511 return len(self.addresslist) 

512 

513 def __add__(self, other): 

514 # Set union 

515 newaddr = AddressList(None) 

516 newaddr.addresslist = self.addresslist[:] 

517 for x in other.addresslist: 

518 if not x in self.addresslist: 

519 newaddr.addresslist.append(x) 

520 return newaddr 

521 

522 def __iadd__(self, other): 

523 # Set union, in-place 

524 for x in other.addresslist: 

525 if not x in self.addresslist: 

526 self.addresslist.append(x) 

527 return self 

528 

529 def __sub__(self, other): 

530 # Set difference 

531 newaddr = AddressList(None) 

532 for x in self.addresslist: 

533 if not x in other.addresslist: 

534 newaddr.addresslist.append(x) 

535 return newaddr 

536 

537 def __isub__(self, other): 

538 # Set difference, in-place 

539 for x in other.addresslist: 

540 if x in self.addresslist: 

541 self.addresslist.remove(x) 

542 return self 

543 

544 def __getitem__(self, index): 

545 # Make indexing, slices, and 'in' work 

546 return self.addresslist[index]