Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/email/message.py: 22%

571 statements  

« prev     ^ index     » next       coverage.py v7.0.1, created at 2022-12-25 06:11 +0000

1# Copyright (C) 2001-2007 Python Software Foundation 

2# Author: Barry Warsaw 

3# Contact: email-sig@python.org 

4 

5"""Basic message object for the email package object model.""" 

6 

7__all__ = ['Message', 'EmailMessage'] 

8 

9import re 

10import uu 

11import quopri 

12from io import BytesIO, StringIO 

13 

14# Intrapackage imports 

15from email import utils 

16from email import errors 

17from email._policybase import Policy, compat32 

18from email import charset as _charset 

19from email._encoded_words import decode_b 

20Charset = _charset.Charset 

21 

22SEMISPACE = '; ' 

23 

24# Regular expression that matches `special' characters in parameters, the 

25# existence of which force quoting of the parameter value. 

26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') 

27 

28 

29def _splitparam(param): 

30 # Split header parameters. BAW: this may be too simple. It isn't 

31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers 

32 # found in the wild. We may eventually need a full fledged parser. 

33 # RDM: we might have a Header here; for now just stringify it. 

34 a, sep, b = str(param).partition(';') 

35 if not sep: 

36 return a.strip(), None 

37 return a.strip(), b.strip() 

38 

39def _formatparam(param, value=None, quote=True): 

40 """Convenience function to format and return a key=value pair. 

41 

42 This will quote the value if needed or if quote is true. If value is a 

43 three tuple (charset, language, value), it will be encoded according 

44 to RFC2231 rules. If it contains non-ascii characters it will likewise 

45 be encoded according to RFC2231 rules, using the utf-8 charset and 

46 a null language. 

47 """ 

48 if value is not None and len(value) > 0: 

49 # A tuple is used for RFC 2231 encoded parameter values where items 

50 # are (charset, language, value). charset is a string, not a Charset 

51 # instance. RFC 2231 encoded values are never quoted, per RFC. 

52 if isinstance(value, tuple): 

53 # Encode as per RFC 2231 

54 param += '*' 

55 value = utils.encode_rfc2231(value[2], value[0], value[1]) 

56 return '%s=%s' % (param, value) 

57 else: 

58 try: 

59 value.encode('ascii') 

60 except UnicodeEncodeError: 

61 param += '*' 

62 value = utils.encode_rfc2231(value, 'utf-8', '') 

63 return '%s=%s' % (param, value) 

64 # BAW: Please check this. I think that if quote is set it should 

65 # force quoting even if not necessary. 

66 if quote or tspecials.search(value): 

67 return '%s="%s"' % (param, utils.quote(value)) 

68 else: 

69 return '%s=%s' % (param, value) 

70 else: 

71 return param 

72 

73def _parseparam(s): 

74 # RDM This might be a Header, so for now stringify it. 

75 s = ';' + str(s) 

76 plist = [] 

77 while s[:1] == ';': 

78 s = s[1:] 

79 end = s.find(';') 

80 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 

81 end = s.find(';', end + 1) 

82 if end < 0: 

83 end = len(s) 

84 f = s[:end] 

85 if '=' in f: 

86 i = f.index('=') 

87 f = f[:i].strip().lower() + '=' + f[i+1:].strip() 

88 plist.append(f.strip()) 

89 s = s[end:] 

90 return plist 

91 

92 

93def _unquotevalue(value): 

94 # This is different than utils.collapse_rfc2231_value() because it doesn't 

95 # try to convert the value to a unicode. Message.get_param() and 

96 # Message.get_params() are both currently defined to return the tuple in 

97 # the face of RFC 2231 parameters. 

98 if isinstance(value, tuple): 

99 return value[0], value[1], utils.unquote(value[2]) 

100 else: 

101 return utils.unquote(value) 

102 

103 

104 

105class Message: 

106 """Basic message object. 

107 

108 A message object is defined as something that has a bunch of RFC 2822 

109 headers and a payload. It may optionally have an envelope header 

110 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a 

111 multipart or a message/rfc822), then the payload is a list of Message 

112 objects, otherwise it is a string. 

113 

114 Message objects implement part of the `mapping' interface, which assumes 

115 there is exactly one occurrence of the header per message. Some headers 

116 do in fact appear multiple times (e.g. Received) and for those headers, 

117 you must use the explicit API to set or get all the headers. Not all of 

118 the mapping methods are implemented. 

119 """ 

120 def __init__(self, policy=compat32): 

121 self.policy = policy 

122 self._headers = [] 

123 self._unixfrom = None 

124 self._payload = None 

125 self._charset = None 

126 # Defaults for multipart messages 

127 self.preamble = self.epilogue = None 

128 self.defects = [] 

129 # Default content type 

130 self._default_type = 'text/plain' 

131 

132 def __str__(self): 

133 """Return the entire formatted message as a string. 

134 """ 

135 return self.as_string() 

136 

137 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): 

138 """Return the entire formatted message as a string. 

139 

140 Optional 'unixfrom', when true, means include the Unix From_ envelope 

141 header. For backward compatibility reasons, if maxheaderlen is 

142 not specified it defaults to 0, so you must override it explicitly 

143 if you want a different maxheaderlen. 'policy' is passed to the 

144 Generator instance used to serialize the mesasge; if it is not 

145 specified the policy associated with the message instance is used. 

146 

147 If the message object contains binary data that is not encoded 

148 according to RFC standards, the non-compliant data will be replaced by 

149 unicode "unknown character" code points. 

150 """ 

151 from email.generator import Generator 

152 policy = self.policy if policy is None else policy 

153 fp = StringIO() 

154 g = Generator(fp, 

155 mangle_from_=False, 

156 maxheaderlen=maxheaderlen, 

157 policy=policy) 

158 g.flatten(self, unixfrom=unixfrom) 

159 return fp.getvalue() 

160 

161 def __bytes__(self): 

162 """Return the entire formatted message as a bytes object. 

163 """ 

164 return self.as_bytes() 

165 

166 def as_bytes(self, unixfrom=False, policy=None): 

167 """Return the entire formatted message as a bytes object. 

168 

169 Optional 'unixfrom', when true, means include the Unix From_ envelope 

170 header. 'policy' is passed to the BytesGenerator instance used to 

171 serialize the message; if not specified the policy associated with 

172 the message instance is used. 

173 """ 

174 from email.generator import BytesGenerator 

175 policy = self.policy if policy is None else policy 

176 fp = BytesIO() 

177 g = BytesGenerator(fp, mangle_from_=False, policy=policy) 

178 g.flatten(self, unixfrom=unixfrom) 

179 return fp.getvalue() 

180 

181 def is_multipart(self): 

182 """Return True if the message consists of multiple parts.""" 

183 return isinstance(self._payload, list) 

184 

185 # 

186 # Unix From_ line 

187 # 

188 def set_unixfrom(self, unixfrom): 

189 self._unixfrom = unixfrom 

190 

191 def get_unixfrom(self): 

192 return self._unixfrom 

193 

194 # 

195 # Payload manipulation. 

196 # 

197 def attach(self, payload): 

198 """Add the given payload to the current payload. 

199 

200 The current payload will always be a list of objects after this method 

201 is called. If you want to set the payload to a scalar object, use 

202 set_payload() instead. 

203 """ 

204 if self._payload is None: 

205 self._payload = [payload] 

206 else: 

207 try: 

208 self._payload.append(payload) 

209 except AttributeError: 

210 raise TypeError("Attach is not valid on a message with a" 

211 " non-multipart payload") 

212 

213 def get_payload(self, i=None, decode=False): 

214 """Return a reference to the payload. 

215 

216 The payload will either be a list object or a string. If you mutate 

217 the list object, you modify the message's payload in place. Optional 

218 i returns that index into the payload. 

219 

220 Optional decode is a flag indicating whether the payload should be 

221 decoded or not, according to the Content-Transfer-Encoding header 

222 (default is False). 

223 

224 When True and the message is not a multipart, the payload will be 

225 decoded if this header's value is `quoted-printable' or `base64'. If 

226 some other encoding is used, or the header is missing, or if the 

227 payload has bogus data (i.e. bogus base64 or uuencoded data), the 

228 payload is returned as-is. 

229 

230 If the message is a multipart and the decode flag is True, then None 

231 is returned. 

232 """ 

233 # Here is the logic table for this code, based on the email5.0.0 code: 

234 # i decode is_multipart result 

235 # ------ ------ ------------ ------------------------------ 

236 # None True True None 

237 # i True True None 

238 # None False True _payload (a list) 

239 # i False True _payload element i (a Message) 

240 # i False False error (not a list) 

241 # i True False error (not a list) 

242 # None False False _payload 

243 # None True False _payload decoded (bytes) 

244 # Note that Barry planned to factor out the 'decode' case, but that 

245 # isn't so easy now that we handle the 8 bit data, which needs to be 

246 # converted in both the decode and non-decode path. 

247 if self.is_multipart(): 

248 if decode: 

249 return None 

250 if i is None: 

251 return self._payload 

252 else: 

253 return self._payload[i] 

254 # For backward compatibility, Use isinstance and this error message 

255 # instead of the more logical is_multipart test. 

256 if i is not None and not isinstance(self._payload, list): 

257 raise TypeError('Expected list, got %s' % type(self._payload)) 

258 payload = self._payload 

259 # cte might be a Header, so for now stringify it. 

260 cte = str(self.get('content-transfer-encoding', '')).lower() 

261 # payload may be bytes here. 

262 if isinstance(payload, str): 

263 if utils._has_surrogates(payload): 

264 bpayload = payload.encode('ascii', 'surrogateescape') 

265 if not decode: 

266 try: 

267 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') 

268 except LookupError: 

269 payload = bpayload.decode('ascii', 'replace') 

270 elif decode: 

271 try: 

272 bpayload = payload.encode('ascii') 

273 except UnicodeError: 

274 # This won't happen for RFC compliant messages (messages 

275 # containing only ASCII code points in the unicode input). 

276 # If it does happen, turn the string into bytes in a way 

277 # guaranteed not to fail. 

278 bpayload = payload.encode('raw-unicode-escape') 

279 if not decode: 

280 return payload 

281 if cte == 'quoted-printable': 

282 return quopri.decodestring(bpayload) 

283 elif cte == 'base64': 

284 # XXX: this is a bit of a hack; decode_b should probably be factored 

285 # out somewhere, but I haven't figured out where yet. 

286 value, defects = decode_b(b''.join(bpayload.splitlines())) 

287 for defect in defects: 

288 self.policy.handle_defect(self, defect) 

289 return value 

290 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 

291 in_file = BytesIO(bpayload) 

292 out_file = BytesIO() 

293 try: 

294 uu.decode(in_file, out_file, quiet=True) 

295 return out_file.getvalue() 

296 except uu.Error: 

297 # Some decoding problem 

298 return bpayload 

299 if isinstance(payload, str): 

300 return bpayload 

301 return payload 

302 

303 def set_payload(self, payload, charset=None): 

304 """Set the payload to the given value. 

305 

306 Optional charset sets the message's default character set. See 

307 set_charset() for details. 

308 """ 

309 if hasattr(payload, 'encode'): 

310 if charset is None: 

311 self._payload = payload 

312 return 

313 if not isinstance(charset, Charset): 

314 charset = Charset(charset) 

315 payload = payload.encode(charset.output_charset) 

316 if hasattr(payload, 'decode'): 

317 self._payload = payload.decode('ascii', 'surrogateescape') 

318 else: 

319 self._payload = payload 

320 if charset is not None: 

321 self.set_charset(charset) 

322 

323 def set_charset(self, charset): 

324 """Set the charset of the payload to a given character set. 

325 

326 charset can be a Charset instance, a string naming a character set, or 

327 None. If it is a string it will be converted to a Charset instance. 

328 If charset is None, the charset parameter will be removed from the 

329 Content-Type field. Anything else will generate a TypeError. 

330 

331 The message will be assumed to be of type text/* encoded with 

332 charset.input_charset. It will be converted to charset.output_charset 

333 and encoded properly, if needed, when generating the plain text 

334 representation of the message. MIME headers (MIME-Version, 

335 Content-Type, Content-Transfer-Encoding) will be added as needed. 

336 """ 

337 if charset is None: 

338 self.del_param('charset') 

339 self._charset = None 

340 return 

341 if not isinstance(charset, Charset): 

342 charset = Charset(charset) 

343 self._charset = charset 

344 if 'MIME-Version' not in self: 

345 self.add_header('MIME-Version', '1.0') 

346 if 'Content-Type' not in self: 

347 self.add_header('Content-Type', 'text/plain', 

348 charset=charset.get_output_charset()) 

349 else: 

350 self.set_param('charset', charset.get_output_charset()) 

351 if charset != charset.get_output_charset(): 

352 self._payload = charset.body_encode(self._payload) 

353 if 'Content-Transfer-Encoding' not in self: 

354 cte = charset.get_body_encoding() 

355 try: 

356 cte(self) 

357 except TypeError: 

358 # This 'if' is for backward compatibility, it allows unicode 

359 # through even though that won't work correctly if the 

360 # message is serialized. 

361 payload = self._payload 

362 if payload: 

363 try: 

364 payload = payload.encode('ascii', 'surrogateescape') 

365 except UnicodeError: 

366 payload = payload.encode(charset.output_charset) 

367 self._payload = charset.body_encode(payload) 

368 self.add_header('Content-Transfer-Encoding', cte) 

369 

370 def get_charset(self): 

371 """Return the Charset instance associated with the message's payload. 

372 """ 

373 return self._charset 

374 

375 # 

376 # MAPPING INTERFACE (partial) 

377 # 

378 def __len__(self): 

379 """Return the total number of headers, including duplicates.""" 

380 return len(self._headers) 

381 

382 def __getitem__(self, name): 

383 """Get a header value. 

384 

385 Return None if the header is missing instead of raising an exception. 

386 

387 Note that if the header appeared multiple times, exactly which 

388 occurrence gets returned is undefined. Use get_all() to get all 

389 the values matching a header field name. 

390 """ 

391 return self.get(name) 

392 

393 def __setitem__(self, name, val): 

394 """Set the value of a header. 

395 

396 Note: this does not overwrite an existing header with the same field 

397 name. Use __delitem__() first to delete any existing headers. 

398 """ 

399 max_count = self.policy.header_max_count(name) 

400 if max_count: 

401 lname = name.lower() 

402 found = 0 

403 for k, v in self._headers: 

404 if k.lower() == lname: 

405 found += 1 

406 if found >= max_count: 

407 raise ValueError("There may be at most {} {} headers " 

408 "in a message".format(max_count, name)) 

409 self._headers.append(self.policy.header_store_parse(name, val)) 

410 

411 def __delitem__(self, name): 

412 """Delete all occurrences of a header, if present. 

413 

414 Does not raise an exception if the header is missing. 

415 """ 

416 name = name.lower() 

417 newheaders = [] 

418 for k, v in self._headers: 

419 if k.lower() != name: 

420 newheaders.append((k, v)) 

421 self._headers = newheaders 

422 

423 def __contains__(self, name): 

424 return name.lower() in [k.lower() for k, v in self._headers] 

425 

426 def __iter__(self): 

427 for field, value in self._headers: 

428 yield field 

429 

430 def keys(self): 

431 """Return a list of all the message's header field names. 

432 

433 These will be sorted in the order they appeared in the original 

434 message, or were added to the message, and may contain duplicates. 

435 Any fields deleted and re-inserted are always appended to the header 

436 list. 

437 """ 

438 return [k for k, v in self._headers] 

439 

440 def values(self): 

441 """Return a list of all the message's header values. 

442 

443 These will be sorted in the order they appeared in the original 

444 message, or were added to the message, and may contain duplicates. 

445 Any fields deleted and re-inserted are always appended to the header 

446 list. 

447 """ 

448 return [self.policy.header_fetch_parse(k, v) 

449 for k, v in self._headers] 

450 

451 def items(self): 

452 """Get all the message's header fields and values. 

453 

454 These will be sorted in the order they appeared in the original 

455 message, or were added to the message, and may contain duplicates. 

456 Any fields deleted and re-inserted are always appended to the header 

457 list. 

458 """ 

459 return [(k, self.policy.header_fetch_parse(k, v)) 

460 for k, v in self._headers] 

461 

462 def get(self, name, failobj=None): 

463 """Get a header value. 

464 

465 Like __getitem__() but return failobj instead of None when the field 

466 is missing. 

467 """ 

468 name = name.lower() 

469 for k, v in self._headers: 

470 if k.lower() == name: 

471 return self.policy.header_fetch_parse(k, v) 

472 return failobj 

473 

474 # 

475 # "Internal" methods (public API, but only intended for use by a parser 

476 # or generator, not normal application code. 

477 # 

478 

479 def set_raw(self, name, value): 

480 """Store name and value in the model without modification. 

481 

482 This is an "internal" API, intended only for use by a parser. 

483 """ 

484 self._headers.append((name, value)) 

485 

486 def raw_items(self): 

487 """Return the (name, value) header pairs without modification. 

488 

489 This is an "internal" API, intended only for use by a generator. 

490 """ 

491 return iter(self._headers.copy()) 

492 

493 # 

494 # Additional useful stuff 

495 # 

496 

497 def get_all(self, name, failobj=None): 

498 """Return a list of all the values for the named field. 

499 

500 These will be sorted in the order they appeared in the original 

501 message, and may contain duplicates. Any fields deleted and 

502 re-inserted are always appended to the header list. 

503 

504 If no such fields exist, failobj is returned (defaults to None). 

505 """ 

506 values = [] 

507 name = name.lower() 

508 for k, v in self._headers: 

509 if k.lower() == name: 

510 values.append(self.policy.header_fetch_parse(k, v)) 

511 if not values: 

512 return failobj 

513 return values 

514 

515 def add_header(self, _name, _value, **_params): 

516 """Extended header setting. 

517 

518 name is the header field to add. keyword arguments can be used to set 

519 additional parameters for the header field, with underscores converted 

520 to dashes. Normally the parameter will be added as key="value" unless 

521 value is None, in which case only the key will be added. If a 

522 parameter value contains non-ASCII characters it can be specified as a 

523 three-tuple of (charset, language, value), in which case it will be 

524 encoded according to RFC2231 rules. Otherwise it will be encoded using 

525 the utf-8 charset and a language of ''. 

526 

527 Examples: 

528 

529 msg.add_header('content-disposition', 'attachment', filename='bud.gif') 

530 msg.add_header('content-disposition', 'attachment', 

531 filename=('utf-8', '', Fußballer.ppt')) 

532 msg.add_header('content-disposition', 'attachment', 

533 filename='Fußballer.ppt')) 

534 """ 

535 parts = [] 

536 for k, v in _params.items(): 

537 if v is None: 

538 parts.append(k.replace('_', '-')) 

539 else: 

540 parts.append(_formatparam(k.replace('_', '-'), v)) 

541 if _value is not None: 

542 parts.insert(0, _value) 

543 self[_name] = SEMISPACE.join(parts) 

544 

545 def replace_header(self, _name, _value): 

546 """Replace a header. 

547 

548 Replace the first matching header found in the message, retaining 

549 header order and case. If no matching header was found, a KeyError is 

550 raised. 

551 """ 

552 _name = _name.lower() 

553 for i, (k, v) in zip(range(len(self._headers)), self._headers): 

554 if k.lower() == _name: 

555 self._headers[i] = self.policy.header_store_parse(k, _value) 

556 break 

557 else: 

558 raise KeyError(_name) 

559 

560 # 

561 # Use these three methods instead of the three above. 

562 # 

563 

564 def get_content_type(self): 

565 """Return the message's content type. 

566 

567 The returned string is coerced to lower case of the form 

568 `maintype/subtype'. If there was no Content-Type header in the 

569 message, the default type as given by get_default_type() will be 

570 returned. Since according to RFC 2045, messages always have a default 

571 type this will always return a value. 

572 

573 RFC 2045 defines a message's default type to be text/plain unless it 

574 appears inside a multipart/digest container, in which case it would be 

575 message/rfc822. 

576 """ 

577 missing = object() 

578 value = self.get('content-type', missing) 

579 if value is missing: 

580 # This should have no parameters 

581 return self.get_default_type() 

582 ctype = _splitparam(value)[0].lower() 

583 # RFC 2045, section 5.2 says if its invalid, use text/plain 

584 if ctype.count('/') != 1: 

585 return 'text/plain' 

586 return ctype 

587 

588 def get_content_maintype(self): 

589 """Return the message's main content type. 

590 

591 This is the `maintype' part of the string returned by 

592 get_content_type(). 

593 """ 

594 ctype = self.get_content_type() 

595 return ctype.split('/')[0] 

596 

597 def get_content_subtype(self): 

598 """Returns the message's sub-content type. 

599 

600 This is the `subtype' part of the string returned by 

601 get_content_type(). 

602 """ 

603 ctype = self.get_content_type() 

604 return ctype.split('/')[1] 

605 

606 def get_default_type(self): 

607 """Return the `default' content type. 

608 

609 Most messages have a default content type of text/plain, except for 

610 messages that are subparts of multipart/digest containers. Such 

611 subparts have a default content type of message/rfc822. 

612 """ 

613 return self._default_type 

614 

615 def set_default_type(self, ctype): 

616 """Set the `default' content type. 

617 

618 ctype should be either "text/plain" or "message/rfc822", although this 

619 is not enforced. The default content type is not stored in the 

620 Content-Type header. 

621 """ 

622 self._default_type = ctype 

623 

624 def _get_params_preserve(self, failobj, header): 

625 # Like get_params() but preserves the quoting of values. BAW: 

626 # should this be part of the public interface? 

627 missing = object() 

628 value = self.get(header, missing) 

629 if value is missing: 

630 return failobj 

631 params = [] 

632 for p in _parseparam(value): 

633 try: 

634 name, val = p.split('=', 1) 

635 name = name.strip() 

636 val = val.strip() 

637 except ValueError: 

638 # Must have been a bare attribute 

639 name = p.strip() 

640 val = '' 

641 params.append((name, val)) 

642 params = utils.decode_params(params) 

643 return params 

644 

645 def get_params(self, failobj=None, header='content-type', unquote=True): 

646 """Return the message's Content-Type parameters, as a list. 

647 

648 The elements of the returned list are 2-tuples of key/value pairs, as 

649 split on the `=' sign. The left hand side of the `=' is the key, 

650 while the right hand side is the value. If there is no `=' sign in 

651 the parameter the value is the empty string. The value is as 

652 described in the get_param() method. 

653 

654 Optional failobj is the object to return if there is no Content-Type 

655 header. Optional header is the header to search instead of 

656 Content-Type. If unquote is True, the value is unquoted. 

657 """ 

658 missing = object() 

659 params = self._get_params_preserve(missing, header) 

660 if params is missing: 

661 return failobj 

662 if unquote: 

663 return [(k, _unquotevalue(v)) for k, v in params] 

664 else: 

665 return params 

666 

667 def get_param(self, param, failobj=None, header='content-type', 

668 unquote=True): 

669 """Return the parameter value if found in the Content-Type header. 

670 

671 Optional failobj is the object to return if there is no Content-Type 

672 header, or the Content-Type header has no such parameter. Optional 

673 header is the header to search instead of Content-Type. 

674 

675 Parameter keys are always compared case insensitively. The return 

676 value can either be a string, or a 3-tuple if the parameter was RFC 

677 2231 encoded. When it's a 3-tuple, the elements of the value are of 

678 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and 

679 LANGUAGE can be None, in which case you should consider VALUE to be 

680 encoded in the us-ascii charset. You can usually ignore LANGUAGE. 

681 The parameter value (either the returned string, or the VALUE item in 

682 the 3-tuple) is always unquoted, unless unquote is set to False. 

683 

684 If your application doesn't care whether the parameter was RFC 2231 

685 encoded, it can turn the return value into a string as follows: 

686 

687 rawparam = msg.get_param('foo') 

688 param = email.utils.collapse_rfc2231_value(rawparam) 

689 

690 """ 

691 if header not in self: 

692 return failobj 

693 for k, v in self._get_params_preserve(failobj, header): 

694 if k.lower() == param.lower(): 

695 if unquote: 

696 return _unquotevalue(v) 

697 else: 

698 return v 

699 return failobj 

700 

701 def set_param(self, param, value, header='Content-Type', requote=True, 

702 charset=None, language='', replace=False): 

703 """Set a parameter in the Content-Type header. 

704 

705 If the parameter already exists in the header, its value will be 

706 replaced with the new value. 

707 

708 If header is Content-Type and has not yet been defined for this 

709 message, it will be set to "text/plain" and the new parameter and 

710 value will be appended as per RFC 2045. 

711 

712 An alternate header can be specified in the header argument, and all 

713 parameters will be quoted as necessary unless requote is False. 

714 

715 If charset is specified, the parameter will be encoded according to RFC 

716 2231. Optional language specifies the RFC 2231 language, defaulting 

717 to the empty string. Both charset and language should be strings. 

718 """ 

719 if not isinstance(value, tuple) and charset: 

720 value = (charset, language, value) 

721 

722 if header not in self and header.lower() == 'content-type': 

723 ctype = 'text/plain' 

724 else: 

725 ctype = self.get(header) 

726 if not self.get_param(param, header=header): 

727 if not ctype: 

728 ctype = _formatparam(param, value, requote) 

729 else: 

730 ctype = SEMISPACE.join( 

731 [ctype, _formatparam(param, value, requote)]) 

732 else: 

733 ctype = '' 

734 for old_param, old_value in self.get_params(header=header, 

735 unquote=requote): 

736 append_param = '' 

737 if old_param.lower() == param.lower(): 

738 append_param = _formatparam(param, value, requote) 

739 else: 

740 append_param = _formatparam(old_param, old_value, requote) 

741 if not ctype: 

742 ctype = append_param 

743 else: 

744 ctype = SEMISPACE.join([ctype, append_param]) 

745 if ctype != self.get(header): 

746 if replace: 

747 self.replace_header(header, ctype) 

748 else: 

749 del self[header] 

750 self[header] = ctype 

751 

752 def del_param(self, param, header='content-type', requote=True): 

753 """Remove the given parameter completely from the Content-Type header. 

754 

755 The header will be re-written in place without the parameter or its 

756 value. All values will be quoted as necessary unless requote is 

757 False. Optional header specifies an alternative to the Content-Type 

758 header. 

759 """ 

760 if header not in self: 

761 return 

762 new_ctype = '' 

763 for p, v in self.get_params(header=header, unquote=requote): 

764 if p.lower() != param.lower(): 

765 if not new_ctype: 

766 new_ctype = _formatparam(p, v, requote) 

767 else: 

768 new_ctype = SEMISPACE.join([new_ctype, 

769 _formatparam(p, v, requote)]) 

770 if new_ctype != self.get(header): 

771 del self[header] 

772 self[header] = new_ctype 

773 

774 def set_type(self, type, header='Content-Type', requote=True): 

775 """Set the main type and subtype for the Content-Type header. 

776 

777 type must be a string in the form "maintype/subtype", otherwise a 

778 ValueError is raised. 

779 

780 This method replaces the Content-Type header, keeping all the 

781 parameters in place. If requote is False, this leaves the existing 

782 header's quoting as is. Otherwise, the parameters will be quoted (the 

783 default). 

784 

785 An alternative header can be specified in the header argument. When 

786 the Content-Type header is set, we'll always also add a MIME-Version 

787 header. 

788 """ 

789 # BAW: should we be strict? 

790 if not type.count('/') == 1: 

791 raise ValueError 

792 # Set the Content-Type, you get a MIME-Version 

793 if header.lower() == 'content-type': 

794 del self['mime-version'] 

795 self['MIME-Version'] = '1.0' 

796 if header not in self: 

797 self[header] = type 

798 return 

799 params = self.get_params(header=header, unquote=requote) 

800 del self[header] 

801 self[header] = type 

802 # Skip the first param; it's the old type. 

803 for p, v in params[1:]: 

804 self.set_param(p, v, header, requote) 

805 

806 def get_filename(self, failobj=None): 

807 """Return the filename associated with the payload if present. 

808 

809 The filename is extracted from the Content-Disposition header's 

810 `filename' parameter, and it is unquoted. If that header is missing 

811 the `filename' parameter, this method falls back to looking for the 

812 `name' parameter. 

813 """ 

814 missing = object() 

815 filename = self.get_param('filename', missing, 'content-disposition') 

816 if filename is missing: 

817 filename = self.get_param('name', missing, 'content-type') 

818 if filename is missing: 

819 return failobj 

820 return utils.collapse_rfc2231_value(filename).strip() 

821 

822 def get_boundary(self, failobj=None): 

823 """Return the boundary associated with the payload if present. 

824 

825 The boundary is extracted from the Content-Type header's `boundary' 

826 parameter, and it is unquoted. 

827 """ 

828 missing = object() 

829 boundary = self.get_param('boundary', missing) 

830 if boundary is missing: 

831 return failobj 

832 # RFC 2046 says that boundaries may begin but not end in w/s 

833 return utils.collapse_rfc2231_value(boundary).rstrip() 

834 

835 def set_boundary(self, boundary): 

836 """Set the boundary parameter in Content-Type to 'boundary'. 

837 

838 This is subtly different than deleting the Content-Type header and 

839 adding a new one with a new boundary parameter via add_header(). The 

840 main difference is that using the set_boundary() method preserves the 

841 order of the Content-Type header in the original message. 

842 

843 HeaderParseError is raised if the message has no Content-Type header. 

844 """ 

845 missing = object() 

846 params = self._get_params_preserve(missing, 'content-type') 

847 if params is missing: 

848 # There was no Content-Type header, and we don't know what type 

849 # to set it to, so raise an exception. 

850 raise errors.HeaderParseError('No Content-Type header found') 

851 newparams = [] 

852 foundp = False 

853 for pk, pv in params: 

854 if pk.lower() == 'boundary': 

855 newparams.append(('boundary', '"%s"' % boundary)) 

856 foundp = True 

857 else: 

858 newparams.append((pk, pv)) 

859 if not foundp: 

860 # The original Content-Type header had no boundary attribute. 

861 # Tack one on the end. BAW: should we raise an exception 

862 # instead??? 

863 newparams.append(('boundary', '"%s"' % boundary)) 

864 # Replace the existing Content-Type header with the new value 

865 newheaders = [] 

866 for h, v in self._headers: 

867 if h.lower() == 'content-type': 

868 parts = [] 

869 for k, v in newparams: 

870 if v == '': 

871 parts.append(k) 

872 else: 

873 parts.append('%s=%s' % (k, v)) 

874 val = SEMISPACE.join(parts) 

875 newheaders.append(self.policy.header_store_parse(h, val)) 

876 

877 else: 

878 newheaders.append((h, v)) 

879 self._headers = newheaders 

880 

881 def get_content_charset(self, failobj=None): 

882 """Return the charset parameter of the Content-Type header. 

883 

884 The returned string is always coerced to lower case. If there is no 

885 Content-Type header, or if that header has no charset parameter, 

886 failobj is returned. 

887 """ 

888 missing = object() 

889 charset = self.get_param('charset', missing) 

890 if charset is missing: 

891 return failobj 

892 if isinstance(charset, tuple): 

893 # RFC 2231 encoded, so decode it, and it better end up as ascii. 

894 pcharset = charset[0] or 'us-ascii' 

895 try: 

896 # LookupError will be raised if the charset isn't known to 

897 # Python. UnicodeError will be raised if the encoded text 

898 # contains a character not in the charset. 

899 as_bytes = charset[2].encode('raw-unicode-escape') 

900 charset = str(as_bytes, pcharset) 

901 except (LookupError, UnicodeError): 

902 charset = charset[2] 

903 # charset characters must be in us-ascii range 

904 try: 

905 charset.encode('us-ascii') 

906 except UnicodeError: 

907 return failobj 

908 # RFC 2046, $4.1.2 says charsets are not case sensitive 

909 return charset.lower() 

910 

911 def get_charsets(self, failobj=None): 

912 """Return a list containing the charset(s) used in this message. 

913 

914 The returned list of items describes the Content-Type headers' 

915 charset parameter for this message and all the subparts in its 

916 payload. 

917 

918 Each item will either be a string (the value of the charset parameter 

919 in the Content-Type header of that part) or the value of the 

920 'failobj' parameter (defaults to None), if the part does not have a 

921 main MIME type of "text", or the charset is not defined. 

922 

923 The list will contain one string for each part of the message, plus 

924 one for the container message (i.e. self), so that a non-multipart 

925 message will still return a list of length 1. 

926 """ 

927 return [part.get_content_charset(failobj) for part in self.walk()] 

928 

929 def get_content_disposition(self): 

930 """Return the message's content-disposition if it exists, or None. 

931 

932 The return values can be either 'inline', 'attachment' or None 

933 according to the rfc2183. 

934 """ 

935 value = self.get('content-disposition') 

936 if value is None: 

937 return None 

938 c_d = _splitparam(value)[0].lower() 

939 return c_d 

940 

941 # I.e. def walk(self): ... 

942 from email.iterators import walk 

943 

944 

945class MIMEPart(Message): 

946 

947 def __init__(self, policy=None): 

948 if policy is None: 

949 from email.policy import default 

950 policy = default 

951 Message.__init__(self, policy) 

952 

953 

954 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): 

955 """Return the entire formatted message as a string. 

956 

957 Optional 'unixfrom', when true, means include the Unix From_ envelope 

958 header. maxheaderlen is retained for backward compatibility with the 

959 base Message class, but defaults to None, meaning that the policy value 

960 for max_line_length controls the header maximum length. 'policy' is 

961 passed to the Generator instance used to serialize the mesasge; if it 

962 is not specified the policy associated with the message instance is 

963 used. 

964 """ 

965 policy = self.policy if policy is None else policy 

966 if maxheaderlen is None: 

967 maxheaderlen = policy.max_line_length 

968 return super().as_string(maxheaderlen=maxheaderlen, policy=policy) 

969 

970 def __str__(self): 

971 return self.as_string(policy=self.policy.clone(utf8=True)) 

972 

973 def is_attachment(self): 

974 c_d = self.get('content-disposition') 

975 return False if c_d is None else c_d.content_disposition == 'attachment' 

976 

977 def _find_body(self, part, preferencelist): 

978 if part.is_attachment(): 

979 return 

980 maintype, subtype = part.get_content_type().split('/') 

981 if maintype == 'text': 

982 if subtype in preferencelist: 

983 yield (preferencelist.index(subtype), part) 

984 return 

985 if maintype != 'multipart': 

986 return 

987 if subtype != 'related': 

988 for subpart in part.iter_parts(): 

989 yield from self._find_body(subpart, preferencelist) 

990 return 

991 if 'related' in preferencelist: 

992 yield (preferencelist.index('related'), part) 

993 candidate = None 

994 start = part.get_param('start') 

995 if start: 

996 for subpart in part.iter_parts(): 

997 if subpart['content-id'] == start: 

998 candidate = subpart 

999 break 

1000 if candidate is None: 

1001 subparts = part.get_payload() 

1002 candidate = subparts[0] if subparts else None 

1003 if candidate is not None: 

1004 yield from self._find_body(candidate, preferencelist) 

1005 

1006 def get_body(self, preferencelist=('related', 'html', 'plain')): 

1007 """Return best candidate mime part for display as 'body' of message. 

1008 

1009 Do a depth first search, starting with self, looking for the first part 

1010 matching each of the items in preferencelist, and return the part 

1011 corresponding to the first item that has a match, or None if no items 

1012 have a match. If 'related' is not included in preferencelist, consider 

1013 the root part of any multipart/related encountered as a candidate 

1014 match. Ignore parts with 'Content-Disposition: attachment'. 

1015 """ 

1016 best_prio = len(preferencelist) 

1017 body = None 

1018 for prio, part in self._find_body(self, preferencelist): 

1019 if prio < best_prio: 

1020 best_prio = prio 

1021 body = part 

1022 if prio == 0: 

1023 break 

1024 return body 

1025 

1026 _body_types = {('text', 'plain'), 

1027 ('text', 'html'), 

1028 ('multipart', 'related'), 

1029 ('multipart', 'alternative')} 

1030 def iter_attachments(self): 

1031 """Return an iterator over the non-main parts of a multipart. 

1032 

1033 Skip the first of each occurrence of text/plain, text/html, 

1034 multipart/related, or multipart/alternative in the multipart (unless 

1035 they have a 'Content-Disposition: attachment' header) and include all 

1036 remaining subparts in the returned iterator. When applied to a 

1037 multipart/related, return all parts except the root part. Return an 

1038 empty iterator when applied to a multipart/alternative or a 

1039 non-multipart. 

1040 """ 

1041 maintype, subtype = self.get_content_type().split('/') 

1042 if maintype != 'multipart' or subtype == 'alternative': 

1043 return 

1044 payload = self.get_payload() 

1045 # Certain malformed messages can have content type set to `multipart/*` 

1046 # but still have single part body, in which case payload.copy() can 

1047 # fail with AttributeError. 

1048 try: 

1049 parts = payload.copy() 

1050 except AttributeError: 

1051 # payload is not a list, it is most probably a string. 

1052 return 

1053 

1054 if maintype == 'multipart' and subtype == 'related': 

1055 # For related, we treat everything but the root as an attachment. 

1056 # The root may be indicated by 'start'; if there's no start or we 

1057 # can't find the named start, treat the first subpart as the root. 

1058 start = self.get_param('start') 

1059 if start: 

1060 found = False 

1061 attachments = [] 

1062 for part in parts: 

1063 if part.get('content-id') == start: 

1064 found = True 

1065 else: 

1066 attachments.append(part) 

1067 if found: 

1068 yield from attachments 

1069 return 

1070 parts.pop(0) 

1071 yield from parts 

1072 return 

1073 # Otherwise we more or less invert the remaining logic in get_body. 

1074 # This only really works in edge cases (ex: non-text related or 

1075 # alternatives) if the sending agent sets content-disposition. 

1076 seen = [] # Only skip the first example of each candidate type. 

1077 for part in parts: 

1078 maintype, subtype = part.get_content_type().split('/') 

1079 if ((maintype, subtype) in self._body_types and 

1080 not part.is_attachment() and subtype not in seen): 

1081 seen.append(subtype) 

1082 continue 

1083 yield part 

1084 

1085 def iter_parts(self): 

1086 """Return an iterator over all immediate subparts of a multipart. 

1087 

1088 Return an empty iterator for a non-multipart. 

1089 """ 

1090 if self.get_content_maintype() == 'multipart': 

1091 yield from self.get_payload() 

1092 

1093 def get_content(self, *args, content_manager=None, **kw): 

1094 if content_manager is None: 

1095 content_manager = self.policy.content_manager 

1096 return content_manager.get_content(self, *args, **kw) 

1097 

1098 def set_content(self, *args, content_manager=None, **kw): 

1099 if content_manager is None: 

1100 content_manager = self.policy.content_manager 

1101 content_manager.set_content(self, *args, **kw) 

1102 

1103 def _make_multipart(self, subtype, disallowed_subtypes, boundary): 

1104 if self.get_content_maintype() == 'multipart': 

1105 existing_subtype = self.get_content_subtype() 

1106 disallowed_subtypes = disallowed_subtypes + (subtype,) 

1107 if existing_subtype in disallowed_subtypes: 

1108 raise ValueError("Cannot convert {} to {}".format( 

1109 existing_subtype, subtype)) 

1110 keep_headers = [] 

1111 part_headers = [] 

1112 for name, value in self._headers: 

1113 if name.lower().startswith('content-'): 

1114 part_headers.append((name, value)) 

1115 else: 

1116 keep_headers.append((name, value)) 

1117 if part_headers: 

1118 # There is existing content, move it to the first subpart. 

1119 part = type(self)(policy=self.policy) 

1120 part._headers = part_headers 

1121 part._payload = self._payload 

1122 self._payload = [part] 

1123 else: 

1124 self._payload = [] 

1125 self._headers = keep_headers 

1126 self['Content-Type'] = 'multipart/' + subtype 

1127 if boundary is not None: 

1128 self.set_param('boundary', boundary) 

1129 

1130 def make_related(self, boundary=None): 

1131 self._make_multipart('related', ('alternative', 'mixed'), boundary) 

1132 

1133 def make_alternative(self, boundary=None): 

1134 self._make_multipart('alternative', ('mixed',), boundary) 

1135 

1136 def make_mixed(self, boundary=None): 

1137 self._make_multipart('mixed', (), boundary) 

1138 

1139 def _add_multipart(self, _subtype, *args, _disp=None, **kw): 

1140 if (self.get_content_maintype() != 'multipart' or 

1141 self.get_content_subtype() != _subtype): 

1142 getattr(self, 'make_' + _subtype)() 

1143 part = type(self)(policy=self.policy) 

1144 part.set_content(*args, **kw) 

1145 if _disp and 'content-disposition' not in part: 

1146 part['Content-Disposition'] = _disp 

1147 self.attach(part) 

1148 

1149 def add_related(self, *args, **kw): 

1150 self._add_multipart('related', *args, _disp='inline', **kw) 

1151 

1152 def add_alternative(self, *args, **kw): 

1153 self._add_multipart('alternative', *args, **kw) 

1154 

1155 def add_attachment(self, *args, **kw): 

1156 self._add_multipart('mixed', *args, _disp='attachment', **kw) 

1157 

1158 def clear(self): 

1159 self._headers = [] 

1160 self._payload = None 

1161 

1162 def clear_content(self): 

1163 self._headers = [(n, v) for n, v in self._headers 

1164 if not n.lower().startswith('content-')] 

1165 self._payload = None 

1166 

1167 

1168class EmailMessage(MIMEPart): 

1169 

1170 def set_content(self, *args, **kw): 

1171 super().set_content(*args, **kw) 

1172 if 'MIME-Version' not in self: 

1173 self['MIME-Version'] = '1.0'