Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/email/message.py: 22%

2# Author: Barry Warsaw

3# Contact: email-sig@python.org

5"""Basic message object for the email package object model."""

7__all__ = ['Message', 'EmailMessage']

9import re

10import uu

11import quopri

12from io import BytesIO, StringIO

14# Intrapackage imports

15from email import utils

16from email import errors

17from email._policybase import Policy, compat32

18from email import charset as _charset

19from email._encoded_words import decode_b

20Charset = _charset.Charset

22SEMISPACE = '; '

24# Regular expression that matches `special' characters in parameters, the

25# existence of which force quoting of the parameter value.

26tspecials = re.compile(r'[ <>@,;:\\"/\[\]\?=]')

29def _splitparam(param):

30 # Split header parameters. BAW: this may be too simple. It isn't

31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers

32 # found in the wild. We may eventually need a full fledged parser.

33 # RDM: we might have a Header here; for now just stringify it.

34 a, sep, b = str(param).partition(';')

35 if not sep:

36 return a.strip(), None

37 return a.strip(), b.strip()

39def _formatparam(param, value=None, quote=True):

40 """Convenience function to format and return a key=value pair.

42 This will quote the value if needed or if quote is true. If value is a

43 three tuple (charset, language, value), it will be encoded according

44 to RFC2231 rules. If it contains non-ascii characters it will likewise

45 be encoded according to RFC2231 rules, using the utf-8 charset and

46 a null language.

47 """

48 if value is not None and len(value) > 0:

49 # A tuple is used for RFC 2231 encoded parameter values where items

50 # are (charset, language, value). charset is a string, not a Charset

51 # instance. RFC 2231 encoded values are never quoted, per RFC.

52 if isinstance(value, tuple):

53 # Encode as per RFC 2231

54 param += '*'

55 value = utils.encode_rfc2231(value[2], value[0], value[1])

56 return '%s=%s' % (param, value)

57 else:

58 try:

59 value.encode('ascii')

60 except UnicodeEncodeError:

61 param += '*'

62 value = utils.encode_rfc2231(value, 'utf-8', '')

63 return '%s=%s' % (param, value)

64 # BAW: Please check this. I think that if quote is set it should

65 # force quoting even if not necessary.

66 if quote or tspecials.search(value):

67 return '%s="%s"' % (param, utils.quote(value))

68 else:

69 return '%s=%s' % (param, value)

70 else:

71 return param

73def _parseparam(s):

74 # RDM This might be a Header, so for now stringify it.

75 s = ';' + str(s)

76 plist = []

77 while s[:1] == ';':

78 s = s[1:]

79 end = s.find(';')

80 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:

81 end = s.find(';', end + 1)

82 if end < 0:

83 end = len(s)

84 f = s[:end]

85 if '=' in f:

86 i = f.index('=')

87 f = f[:i].strip().lower() + '=' + f[i+1:].strip()

88 plist.append(f.strip())

89 s = s[end:]

90 return plist

93def _unquotevalue(value):

94 # This is different than utils.collapse_rfc2231_value() because it doesn't

95 # try to convert the value to a unicode. Message.get_param() and

96 # Message.get_params() are both currently defined to return the tuple in

97 # the face of RFC 2231 parameters.

98 if isinstance(value, tuple):

99 return value[0], value[1], utils.unquote(value[2])

100 else:

101 return utils.unquote(value)

102

103

104

105class Message:

106 """Basic message object.

107

108 A message object is defined as something that has a bunch of RFC 2822

109 headers and a payload. It may optionally have an envelope header

110 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a

111 multipart or a message/rfc822), then the payload is a list of Message

112 objects, otherwise it is a string.

113

114 Message objects implement part of the `mapping' interface, which assumes

115 there is exactly one occurrence of the header per message. Some headers

116 do in fact appear multiple times (e.g. Received) and for those headers,

117 you must use the explicit API to set or get all the headers. Not all of

118 the mapping methods are implemented.

119 """

120 def __init__(self, policy=compat32):

121 self.policy = policy

122 self._headers = []

123 self._unixfrom = None

124 self._payload = None

125 self._charset = None

126 # Defaults for multipart messages

127 self.preamble = self.epilogue = None

128 self.defects = []

129 # Default content type

130 self._default_type = 'text/plain'

131

132 def __str__(self):

133 """Return the entire formatted message as a string.

134 """

135 return self.as_string()

136

137 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):

138 """Return the entire formatted message as a string.

139

140 Optional 'unixfrom', when true, means include the Unix From_ envelope

141 header. For backward compatibility reasons, if maxheaderlen is

142 not specified it defaults to 0, so you must override it explicitly

143 if you want a different maxheaderlen. 'policy' is passed to the

144 Generator instance used to serialize the mesasge; if it is not

145 specified the policy associated with the message instance is used.

146

147 If the message object contains binary data that is not encoded

148 according to RFC standards, the non-compliant data will be replaced by

149 unicode "unknown character" code points.

150 """

151 from email.generator import Generator

152 policy = self.policy if policy is None else policy

153 fp = StringIO()

154 g = Generator(fp,

155 mangle_from_=False,

156 maxheaderlen=maxheaderlen,

157 policy=policy)

158 g.flatten(self, unixfrom=unixfrom)

159 return fp.getvalue()

160

161 def __bytes__(self):

162 """Return the entire formatted message as a bytes object.

163 """

164 return self.as_bytes()

165

166 def as_bytes(self, unixfrom=False, policy=None):

167 """Return the entire formatted message as a bytes object.

168

169 Optional 'unixfrom', when true, means include the Unix From_ envelope

170 header. 'policy' is passed to the BytesGenerator instance used to

171 serialize the message; if not specified the policy associated with

172 the message instance is used.

173 """

174 from email.generator import BytesGenerator

175 policy = self.policy if policy is None else policy

176 fp = BytesIO()

177 g = BytesGenerator(fp, mangle_from_=False, policy=policy)

178 g.flatten(self, unixfrom=unixfrom)

179 return fp.getvalue()

180

181 def is_multipart(self):

182 """Return True if the message consists of multiple parts."""

183 return isinstance(self._payload, list)

184

185 #

186 # Unix From_ line

187 #

188 def set_unixfrom(self, unixfrom):

189 self._unixfrom = unixfrom

190

191 def get_unixfrom(self):

192 return self._unixfrom

193

194 #

195 # Payload manipulation.

196 #

197 def attach(self, payload):

198 """Add the given payload to the current payload.

199

200 The current payload will always be a list of objects after this method

201 is called. If you want to set the payload to a scalar object, use

202 set_payload() instead.

203 """

204 if self._payload is None:

205 self._payload = [payload]

206 else:

207 try:

208 self._payload.append(payload)

209 except AttributeError:

210 raise TypeError("Attach is not valid on a message with a"

211 " non-multipart payload")

212

213 def get_payload(self, i=None, decode=False):

214 """Return a reference to the payload.

215

216 The payload will either be a list object or a string. If you mutate

217 the list object, you modify the message's payload in place. Optional

218 i returns that index into the payload.

219

220 Optional decode is a flag indicating whether the payload should be

221 decoded or not, according to the Content-Transfer-Encoding header

222 (default is False).

223

224 When True and the message is not a multipart, the payload will be

225 decoded if this header's value is `quoted-printable' or `base64'. If

226 some other encoding is used, or the header is missing, or if the

227 payload has bogus data (i.e. bogus base64 or uuencoded data), the

228 payload is returned as-is.

229

230 If the message is a multipart and the decode flag is True, then None

231 is returned.

232 """

233 # Here is the logic table for this code, based on the email5.0.0 code:

234 # i decode is_multipart result

235 # ------ ------ ------------ ------------------------------

236 # None True True None

237 # i True True None

238 # None False True _payload (a list)

239 # i False True _payload element i (a Message)

240 # i False False error (not a list)

241 # i True False error (not a list)

242 # None False False _payload

243 # None True False _payload decoded (bytes)

244 # Note that Barry planned to factor out the 'decode' case, but that

245 # isn't so easy now that we handle the 8 bit data, which needs to be

246 # converted in both the decode and non-decode path.

247 if self.is_multipart():

248 if decode:

249 return None

250 if i is None:

251 return self._payload

252 else:

253 return self._payload[i]

254 # For backward compatibility, Use isinstance and this error message

255 # instead of the more logical is_multipart test.

256 if i is not None and not isinstance(self._payload, list):

257 raise TypeError('Expected list, got %s' % type(self._payload))

258 payload = self._payload

259 # cte might be a Header, so for now stringify it.

260 cte = str(self.get('content-transfer-encoding', '')).lower()

261 # payload may be bytes here.

262 if isinstance(payload, str):

263 if utils._has_surrogates(payload):

264 bpayload = payload.encode('ascii', 'surrogateescape')

265 if not decode:

266 try:

267 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')

268 except LookupError:

269 payload = bpayload.decode('ascii', 'replace')

270 elif decode:

271 try:

272 bpayload = payload.encode('ascii')

273 except UnicodeError:

274 # This won't happen for RFC compliant messages (messages

275 # containing only ASCII code points in the unicode input).

276 # If it does happen, turn the string into bytes in a way

277 # guaranteed not to fail.

278 bpayload = payload.encode('raw-unicode-escape')

279 if not decode:

280 return payload

281 if cte == 'quoted-printable':

282 return quopri.decodestring(bpayload)

283 elif cte == 'base64':

284 # XXX: this is a bit of a hack; decode_b should probably be factored

285 # out somewhere, but I haven't figured out where yet.

286 value, defects = decode_b(b''.join(bpayload.splitlines()))

287 for defect in defects:

288 self.policy.handle_defect(self, defect)

289 return value

290 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):

291 in_file = BytesIO(bpayload)

292 out_file = BytesIO()

293 try:

294 uu.decode(in_file, out_file, quiet=True)

295 return out_file.getvalue()

296 except uu.Error:

297 # Some decoding problem

298 return bpayload

299 if isinstance(payload, str):

300 return bpayload

301 return payload

302

303 def set_payload(self, payload, charset=None):

304 """Set the payload to the given value.

305

306 Optional charset sets the message's default character set. See

307 set_charset() for details.

308 """

309 if hasattr(payload, 'encode'):

310 if charset is None:

311 self._payload = payload

312 return

313 if not isinstance(charset, Charset):

314 charset = Charset(charset)

315 payload = payload.encode(charset.output_charset)

316 if hasattr(payload, 'decode'):

317 self._payload = payload.decode('ascii', 'surrogateescape')

318 else:

319 self._payload = payload

320 if charset is not None:

321 self.set_charset(charset)

322

323 def set_charset(self, charset):

324 """Set the charset of the payload to a given character set.

325

326 charset can be a Charset instance, a string naming a character set, or

327 None. If it is a string it will be converted to a Charset instance.

328 If charset is None, the charset parameter will be removed from the

329 Content-Type field. Anything else will generate a TypeError.

330

331 The message will be assumed to be of type text/* encoded with

332 charset.input_charset. It will be converted to charset.output_charset

333 and encoded properly, if needed, when generating the plain text

334 representation of the message. MIME headers (MIME-Version,

335 Content-Type, Content-Transfer-Encoding) will be added as needed.

336 """

337 if charset is None:

338 self.del_param('charset')

339 self._charset = None

340 return

341 if not isinstance(charset, Charset):

342 charset = Charset(charset)

343 self._charset = charset

344 if 'MIME-Version' not in self:

345 self.add_header('MIME-Version', '1.0')

346 if 'Content-Type' not in self:

347 self.add_header('Content-Type', 'text/plain',

348 charset=charset.get_output_charset())

349 else:

350 self.set_param('charset', charset.get_output_charset())

351 if charset != charset.get_output_charset():

352 self._payload = charset.body_encode(self._payload)

353 if 'Content-Transfer-Encoding' not in self:

354 cte = charset.get_body_encoding()

355 try:

356 cte(self)

357 except TypeError:

358 # This 'if' is for backward compatibility, it allows unicode

359 # through even though that won't work correctly if the

360 # message is serialized.

361 payload = self._payload

362 if payload:

363 try:

364 payload = payload.encode('ascii', 'surrogateescape')

365 except UnicodeError:

366 payload = payload.encode(charset.output_charset)

367 self._payload = charset.body_encode(payload)

368 self.add_header('Content-Transfer-Encoding', cte)

369

370 def get_charset(self):

371 """Return the Charset instance associated with the message's payload.

372 """

373 return self._charset

374

375 #

376 # MAPPING INTERFACE (partial)

377 #

378 def __len__(self):

379 """Return the total number of headers, including duplicates."""

380 return len(self._headers)

381

382 def __getitem__(self, name):

383 """Get a header value.

384

385 Return None if the header is missing instead of raising an exception.

386

387 Note that if the header appeared multiple times, exactly which

388 occurrence gets returned is undefined. Use get_all() to get all

389 the values matching a header field name.

390 """

391 return self.get(name)

392

393 def __setitem__(self, name, val):

394 """Set the value of a header.

395

396 Note: this does not overwrite an existing header with the same field

397 name. Use __delitem__() first to delete any existing headers.

398 """

399 max_count = self.policy.header_max_count(name)

400 if max_count:

401 lname = name.lower()

402 found = 0

403 for k, v in self._headers:

404 if k.lower() == lname:

405 found += 1

406 if found >= max_count:

407 raise ValueError("There may be at most {} {} headers "

408 "in a message".format(max_count, name))

409 self._headers.append(self.policy.header_store_parse(name, val))

410

411 def __delitem__(self, name):

412 """Delete all occurrences of a header, if present.

413

414 Does not raise an exception if the header is missing.

415 """

416 name = name.lower()

417 newheaders = []

418 for k, v in self._headers:

419 if k.lower() != name:

420 newheaders.append((k, v))

421 self._headers = newheaders

422

423 def __contains__(self, name):

424 return name.lower() in [k.lower() for k, v in self._headers]

425

426 def __iter__(self):

427 for field, value in self._headers:

428 yield field

429

430 def keys(self):

431 """Return a list of all the message's header field names.

432

433 These will be sorted in the order they appeared in the original

434 message, or were added to the message, and may contain duplicates.

435 Any fields deleted and re-inserted are always appended to the header

436 list.

437 """

438 return [k for k, v in self._headers]

439

440 def values(self):

441 """Return a list of all the message's header values.

442

443 These will be sorted in the order they appeared in the original

444 message, or were added to the message, and may contain duplicates.

445 Any fields deleted and re-inserted are always appended to the header

446 list.

447 """

448 return [self.policy.header_fetch_parse(k, v)

449 for k, v in self._headers]

450

451 def items(self):

452 """Get all the message's header fields and values.

453

454 These will be sorted in the order they appeared in the original

455 message, or were added to the message, and may contain duplicates.

456 Any fields deleted and re-inserted are always appended to the header

457 list.

458 """

459 return [(k, self.policy.header_fetch_parse(k, v))

460 for k, v in self._headers]

461

462 def get(self, name, failobj=None):

463 """Get a header value.

464

465 Like __getitem__() but return failobj instead of None when the field

466 is missing.

467 """

468 name = name.lower()

469 for k, v in self._headers:

470 if k.lower() == name:

471 return self.policy.header_fetch_parse(k, v)

472 return failobj

473

474 #

475 # "Internal" methods (public API, but only intended for use by a parser

476 # or generator, not normal application code.

477 #

478

479 def set_raw(self, name, value):

480 """Store name and value in the model without modification.

481

482 This is an "internal" API, intended only for use by a parser.

483 """

484 self._headers.append((name, value))

485

486 def raw_items(self):

487 """Return the (name, value) header pairs without modification.

488

489 This is an "internal" API, intended only for use by a generator.

490 """

491 return iter(self._headers.copy())

492

493 #

494 # Additional useful stuff

495 #

496

497 def get_all(self, name, failobj=None):

498 """Return a list of all the values for the named field.

499

500 These will be sorted in the order they appeared in the original

501 message, and may contain duplicates. Any fields deleted and

502 re-inserted are always appended to the header list.

503

504 If no such fields exist, failobj is returned (defaults to None).

505 """

506 values = []

507 name = name.lower()

508 for k, v in self._headers:

509 if k.lower() == name:

510 values.append(self.policy.header_fetch_parse(k, v))

511 if not values:

512 return failobj

513 return values

514

515 def add_header(self, _name, _value, **_params):

516 """Extended header setting.

517

518 name is the header field to add. keyword arguments can be used to set

519 additional parameters for the header field, with underscores converted

520 to dashes. Normally the parameter will be added as key="value" unless

521 value is None, in which case only the key will be added. If a

522 parameter value contains non-ASCII characters it can be specified as a

523 three-tuple of (charset, language, value), in which case it will be

524 encoded according to RFC2231 rules. Otherwise it will be encoded using

525 the utf-8 charset and a language of ''.

526

527 Examples:

528

529 msg.add_header('content-disposition', 'attachment', filename='bud.gif')

530 msg.add_header('content-disposition', 'attachment',

531 filename=('utf-8', '', Fußballer.ppt'))

532 msg.add_header('content-disposition', 'attachment',

533 filename='Fußballer.ppt'))

534 """

535 parts = []

536 for k, v in _params.items():

537 if v is None:

538 parts.append(k.replace('_', '-'))

539 else:

540 parts.append(_formatparam(k.replace('_', '-'), v))

541 if _value is not None:

542 parts.insert(0, _value)

543 self[_name] = SEMISPACE.join(parts)

544

545 def replace_header(self, _name, _value):

546 """Replace a header.

547

548 Replace the first matching header found in the message, retaining

549 header order and case. If no matching header was found, a KeyError is

550 raised.

551 """

552 _name = _name.lower()

553 for i, (k, v) in zip(range(len(self._headers)), self._headers):

554 if k.lower() == _name:

555 self._headers[i] = self.policy.header_store_parse(k, _value)

556 break

557 else:

558 raise KeyError(_name)

559

560 #

561 # Use these three methods instead of the three above.

562 #

563

564 def get_content_type(self):

565 """Return the message's content type.

566

567 The returned string is coerced to lower case of the form

568 `maintype/subtype'. If there was no Content-Type header in the

569 message, the default type as given by get_default_type() will be

570 returned. Since according to RFC 2045, messages always have a default

571 type this will always return a value.

572

573 RFC 2045 defines a message's default type to be text/plain unless it

574 appears inside a multipart/digest container, in which case it would be

575 message/rfc822.

576 """

577 missing = object()

578 value = self.get('content-type', missing)

579 if value is missing:

580 # This should have no parameters

581 return self.get_default_type()

582 ctype = _splitparam(value)[0].lower()

583 # RFC 2045, section 5.2 says if its invalid, use text/plain

584 if ctype.count('/') != 1:

585 return 'text/plain'

586 return ctype

587

588 def get_content_maintype(self):

589 """Return the message's main content type.

590

591 This is the `maintype' part of the string returned by

592 get_content_type().

593 """

594 ctype = self.get_content_type()

595 return ctype.split('/')[0]

596

597 def get_content_subtype(self):

598 """Returns the message's sub-content type.

599

600 This is the `subtype' part of the string returned by

601 get_content_type().

602 """

603 ctype = self.get_content_type()

604 return ctype.split('/')[1]

605

606 def get_default_type(self):

607 """Return the `default' content type.

608

609 Most messages have a default content type of text/plain, except for

610 messages that are subparts of multipart/digest containers. Such

611 subparts have a default content type of message/rfc822.

612 """

613 return self._default_type

614

615 def set_default_type(self, ctype):

616 """Set the `default' content type.

617

618 ctype should be either "text/plain" or "message/rfc822", although this

619 is not enforced. The default content type is not stored in the

620 Content-Type header.

621 """

622 self._default_type = ctype

623

624 def _get_params_preserve(self, failobj, header):

625 # Like get_params() but preserves the quoting of values. BAW:

626 # should this be part of the public interface?

627 missing = object()

628 value = self.get(header, missing)

629 if value is missing:

630 return failobj

631 params = []

632 for p in _parseparam(value):

633 try:

634 name, val = p.split('=', 1)

635 name = name.strip()

636 val = val.strip()

637 except ValueError:

638 # Must have been a bare attribute

639 name = p.strip()

640 val = ''

641 params.append((name, val))

642 params = utils.decode_params(params)

643 return params

644

645 def get_params(self, failobj=None, header='content-type', unquote=True):

646 """Return the message's Content-Type parameters, as a list.

647

648 The elements of the returned list are 2-tuples of key/value pairs, as

649 split on the `=' sign. The left hand side of the `=' is the key,

650 while the right hand side is the value. If there is no `=' sign in

651 the parameter the value is the empty string. The value is as

652 described in the get_param() method.

653

654 Optional failobj is the object to return if there is no Content-Type

655 header. Optional header is the header to search instead of

656 Content-Type. If unquote is True, the value is unquoted.

657 """

658 missing = object()

659 params = self._get_params_preserve(missing, header)

660 if params is missing:

661 return failobj

662 if unquote:

663 return [(k, _unquotevalue(v)) for k, v in params]

664 else:

665 return params

666

667 def get_param(self, param, failobj=None, header='content-type',

668 unquote=True):

669 """Return the parameter value if found in the Content-Type header.

670

671 Optional failobj is the object to return if there is no Content-Type

672 header, or the Content-Type header has no such parameter. Optional

673 header is the header to search instead of Content-Type.

674

675 Parameter keys are always compared case insensitively. The return

676 value can either be a string, or a 3-tuple if the parameter was RFC

677 2231 encoded. When it's a 3-tuple, the elements of the value are of

678 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and

679 LANGUAGE can be None, in which case you should consider VALUE to be

680 encoded in the us-ascii charset. You can usually ignore LANGUAGE.

681 The parameter value (either the returned string, or the VALUE item in

682 the 3-tuple) is always unquoted, unless unquote is set to False.

683

684 If your application doesn't care whether the parameter was RFC 2231

685 encoded, it can turn the return value into a string as follows:

686

687 rawparam = msg.get_param('foo')

688 param = email.utils.collapse_rfc2231_value(rawparam)

689

690 """

691 if header not in self:

692 return failobj

693 for k, v in self._get_params_preserve(failobj, header):

694 if k.lower() == param.lower():

695 if unquote:

696 return _unquotevalue(v)

697 else:

698 return v

699 return failobj

700

701 def set_param(self, param, value, header='Content-Type', requote=True,

702 charset=None, language='', replace=False):

703 """Set a parameter in the Content-Type header.

704

705 If the parameter already exists in the header, its value will be

706 replaced with the new value.

707

708 If header is Content-Type and has not yet been defined for this

709 message, it will be set to "text/plain" and the new parameter and

710 value will be appended as per RFC 2045.

711

712 An alternate header can be specified in the header argument, and all

713 parameters will be quoted as necessary unless requote is False.

714

715 If charset is specified, the parameter will be encoded according to RFC

716 2231. Optional language specifies the RFC 2231 language, defaulting

717 to the empty string. Both charset and language should be strings.

718 """

719 if not isinstance(value, tuple) and charset:

720 value = (charset, language, value)

721

722 if header not in self and header.lower() == 'content-type':

723 ctype = 'text/plain'

724 else:

725 ctype = self.get(header)

726 if not self.get_param(param, header=header):

727 if not ctype:

728 ctype = _formatparam(param, value, requote)

729 else:

730 ctype = SEMISPACE.join(

731 [ctype, _formatparam(param, value, requote)])

732 else:

733 ctype = ''

734 for old_param, old_value in self.get_params(header=header,

735 unquote=requote):

736 append_param = ''

737 if old_param.lower() == param.lower():

738 append_param = _formatparam(param, value, requote)

739 else:

740 append_param = _formatparam(old_param, old_value, requote)

741 if not ctype:

742 ctype = append_param

743 else:

744 ctype = SEMISPACE.join([ctype, append_param])

745 if ctype != self.get(header):

746 if replace:

747 self.replace_header(header, ctype)

748 else:

749 del self[header]

750 self[header] = ctype

751

752 def del_param(self, param, header='content-type', requote=True):

753 """Remove the given parameter completely from the Content-Type header.

754

755 The header will be re-written in place without the parameter or its

756 value. All values will be quoted as necessary unless requote is

757 False. Optional header specifies an alternative to the Content-Type

758 header.

759 """

760 if header not in self:

761 return

762 new_ctype = ''

763 for p, v in self.get_params(header=header, unquote=requote):

764 if p.lower() != param.lower():

765 if not new_ctype:

766 new_ctype = _formatparam(p, v, requote)

767 else:

768 new_ctype = SEMISPACE.join([new_ctype,

769 _formatparam(p, v, requote)])

770 if new_ctype != self.get(header):

771 del self[header]

772 self[header] = new_ctype

773

774 def set_type(self, type, header='Content-Type', requote=True):

775 """Set the main type and subtype for the Content-Type header.

776

777 type must be a string in the form "maintype/subtype", otherwise a

778 ValueError is raised.

779

780 This method replaces the Content-Type header, keeping all the

781 parameters in place. If requote is False, this leaves the existing

782 header's quoting as is. Otherwise, the parameters will be quoted (the

783 default).

784

785 An alternative header can be specified in the header argument. When

786 the Content-Type header is set, we'll always also add a MIME-Version

787 header.

788 """

789 # BAW: should we be strict?

790 if not type.count('/') == 1:

791 raise ValueError

792 # Set the Content-Type, you get a MIME-Version

793 if header.lower() == 'content-type':

794 del self['mime-version']

795 self['MIME-Version'] = '1.0'

796 if header not in self:

797 self[header] = type

798 return

799 params = self.get_params(header=header, unquote=requote)

800 del self[header]

801 self[header] = type

802 # Skip the first param; it's the old type.

803 for p, v in params[1:]:

804 self.set_param(p, v, header, requote)

805

806 def get_filename(self, failobj=None):

807 """Return the filename associated with the payload if present.

808

809 The filename is extracted from the Content-Disposition header's

810 `filename' parameter, and it is unquoted. If that header is missing

811 the `filename' parameter, this method falls back to looking for the

812 `name' parameter.

813 """

814 missing = object()

815 filename = self.get_param('filename', missing, 'content-disposition')

816 if filename is missing:

817 filename = self.get_param('name', missing, 'content-type')

818 if filename is missing:

819 return failobj

820 return utils.collapse_rfc2231_value(filename).strip()

821

822 def get_boundary(self, failobj=None):

823 """Return the boundary associated with the payload if present.

824

825 The boundary is extracted from the Content-Type header's `boundary'

826 parameter, and it is unquoted.

827 """

828 missing = object()

829 boundary = self.get_param('boundary', missing)

830 if boundary is missing:

831 return failobj

832 # RFC 2046 says that boundaries may begin but not end in w/s

833 return utils.collapse_rfc2231_value(boundary).rstrip()

834

835 def set_boundary(self, boundary):

836 """Set the boundary parameter in Content-Type to 'boundary'.

837

838 This is subtly different than deleting the Content-Type header and

839 adding a new one with a new boundary parameter via add_header(). The

840 main difference is that using the set_boundary() method preserves the

841 order of the Content-Type header in the original message.

842

843 HeaderParseError is raised if the message has no Content-Type header.

844 """

845 missing = object()

846 params = self._get_params_preserve(missing, 'content-type')

847 if params is missing:

848 # There was no Content-Type header, and we don't know what type

849 # to set it to, so raise an exception.

850 raise errors.HeaderParseError('No Content-Type header found')

851 newparams = []

852 foundp = False

853 for pk, pv in params:

854 if pk.lower() == 'boundary':

855 newparams.append(('boundary', '"%s"' % boundary))

856 foundp = True

857 else:

858 newparams.append((pk, pv))

859 if not foundp:

860 # The original Content-Type header had no boundary attribute.

861 # Tack one on the end. BAW: should we raise an exception

862 # instead???

863 newparams.append(('boundary', '"%s"' % boundary))

864 # Replace the existing Content-Type header with the new value

865 newheaders = []

866 for h, v in self._headers:

867 if h.lower() == 'content-type':

868 parts = []

869 for k, v in newparams:

870 if v == '':

871 parts.append(k)

872 else:

873 parts.append('%s=%s' % (k, v))

874 val = SEMISPACE.join(parts)

875 newheaders.append(self.policy.header_store_parse(h, val))

876

877 else:

878 newheaders.append((h, v))

879 self._headers = newheaders

880

881 def get_content_charset(self, failobj=None):

882 """Return the charset parameter of the Content-Type header.

883

884 The returned string is always coerced to lower case. If there is no

885 Content-Type header, or if that header has no charset parameter,

886 failobj is returned.

887 """

888 missing = object()

889 charset = self.get_param('charset', missing)

890 if charset is missing:

891 return failobj

892 if isinstance(charset, tuple):

893 # RFC 2231 encoded, so decode it, and it better end up as ascii.

894 pcharset = charset[0] or 'us-ascii'

895 try:

896 # LookupError will be raised if the charset isn't known to

897 # Python. UnicodeError will be raised if the encoded text

898 # contains a character not in the charset.

899 as_bytes = charset[2].encode('raw-unicode-escape')

900 charset = str(as_bytes, pcharset)

901 except (LookupError, UnicodeError):

902 charset = charset[2]

903 # charset characters must be in us-ascii range

904 try:

905 charset.encode('us-ascii')

906 except UnicodeError:

907 return failobj

908 # RFC 2046, $4.1.2 says charsets are not case sensitive

909 return charset.lower()

910

911 def get_charsets(self, failobj=None):

912 """Return a list containing the charset(s) used in this message.

913

914 The returned list of items describes the Content-Type headers'

915 charset parameter for this message and all the subparts in its

916 payload.

917

918 Each item will either be a string (the value of the charset parameter

919 in the Content-Type header of that part) or the value of the

920 'failobj' parameter (defaults to None), if the part does not have a

921 main MIME type of "text", or the charset is not defined.

922

923 The list will contain one string for each part of the message, plus

924 one for the container message (i.e. self), so that a non-multipart

925 message will still return a list of length 1.

926 """

927 return [part.get_content_charset(failobj) for part in self.walk()]

928

929 def get_content_disposition(self):

930 """Return the message's content-disposition if it exists, or None.

931

932 The return values can be either 'inline', 'attachment' or None

933 according to the rfc2183.

934 """

935 value = self.get('content-disposition')

936 if value is None:

937 return None

938 c_d = _splitparam(value)[0].lower()

939 return c_d

940

941 # I.e. def walk(self): ...

942 from email.iterators import walk

943

944

945class MIMEPart(Message):

946

947 def __init__(self, policy=None):

948 if policy is None:

949 from email.policy import default

950 policy = default

951 Message.__init__(self, policy)

952

953

954 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):

955 """Return the entire formatted message as a string.

956

957 Optional 'unixfrom', when true, means include the Unix From_ envelope

958 header. maxheaderlen is retained for backward compatibility with the

959 base Message class, but defaults to None, meaning that the policy value

960 for max_line_length controls the header maximum length. 'policy' is

961 passed to the Generator instance used to serialize the mesasge; if it

962 is not specified the policy associated with the message instance is

963 used.

964 """

965 policy = self.policy if policy is None else policy

966 if maxheaderlen is None:

967 maxheaderlen = policy.max_line_length

968 return super().as_string(maxheaderlen=maxheaderlen, policy=policy)

969

970 def __str__(self):

971 return self.as_string(policy=self.policy.clone(utf8=True))

972

973 def is_attachment(self):

974 c_d = self.get('content-disposition')

975 return False if c_d is None else c_d.content_disposition == 'attachment'

976

977 def _find_body(self, part, preferencelist):

978 if part.is_attachment():

979 return

980 maintype, subtype = part.get_content_type().split('/')

981 if maintype == 'text':

982 if subtype in preferencelist:

983 yield (preferencelist.index(subtype), part)

984 return

985 if maintype != 'multipart':

986 return

987 if subtype != 'related':

988 for subpart in part.iter_parts():

989 yield from self._find_body(subpart, preferencelist)

990 return

991 if 'related' in preferencelist:

992 yield (preferencelist.index('related'), part)

993 candidate = None

994 start = part.get_param('start')

995 if start:

996 for subpart in part.iter_parts():

997 if subpart['content-id'] == start:

998 candidate = subpart

999 break

1000 if candidate is None:

1001 subparts = part.get_payload()

1002 candidate = subparts[0] if subparts else None

1003 if candidate is not None:

1004 yield from self._find_body(candidate, preferencelist)

1005

1006 def get_body(self, preferencelist=('related', 'html', 'plain')):

1007 """Return best candidate mime part for display as 'body' of message.

1008

1009 Do a depth first search, starting with self, looking for the first part

1010 matching each of the items in preferencelist, and return the part

1011 corresponding to the first item that has a match, or None if no items

1012 have a match. If 'related' is not included in preferencelist, consider

1013 the root part of any multipart/related encountered as a candidate

1014 match. Ignore parts with 'Content-Disposition: attachment'.

1015 """

1016 best_prio = len(preferencelist)

1017 body = None

1018 for prio, part in self._find_body(self, preferencelist):

1019 if prio < best_prio:

1020 best_prio = prio

1021 body = part

1022 if prio == 0:

1023 break

1024 return body

1025

1026 _body_types = {('text', 'plain'),

1027 ('text', 'html'),

1028 ('multipart', 'related'),

1029 ('multipart', 'alternative')}

1030 def iter_attachments(self):

1031 """Return an iterator over the non-main parts of a multipart.

1032

1033 Skip the first of each occurrence of text/plain, text/html,

1034 multipart/related, or multipart/alternative in the multipart (unless

1035 they have a 'Content-Disposition: attachment' header) and include all

1036 remaining subparts in the returned iterator. When applied to a

1037 multipart/related, return all parts except the root part. Return an

1038 empty iterator when applied to a multipart/alternative or a

1039 non-multipart.

1040 """

1041 maintype, subtype = self.get_content_type().split('/')

1042 if maintype != 'multipart' or subtype == 'alternative':

1043 return

1044 payload = self.get_payload()

1045 # Certain malformed messages can have content type set to `multipart/*`

1046 # but still have single part body, in which case payload.copy() can

1047 # fail with AttributeError.

1048 try:

1049 parts = payload.copy()

1050 except AttributeError:

1051 # payload is not a list, it is most probably a string.

1052 return

1053

1054 if maintype == 'multipart' and subtype == 'related':

1055 # For related, we treat everything but the root as an attachment.

1056 # The root may be indicated by 'start'; if there's no start or we

1057 # can't find the named start, treat the first subpart as the root.

1058 start = self.get_param('start')

1059 if start:

1060 found = False

1061 attachments = []

1062 for part in parts:

1063 if part.get('content-id') == start:

1064 found = True

1065 else:

1066 attachments.append(part)

1067 if found:

1068 yield from attachments

1069 return

1070 parts.pop(0)

1071 yield from parts

1072 return

1073 # Otherwise we more or less invert the remaining logic in get_body.

1074 # This only really works in edge cases (ex: non-text related or

1075 # alternatives) if the sending agent sets content-disposition.

1076 seen = [] # Only skip the first example of each candidate type.

1077 for part in parts:

1078 maintype, subtype = part.get_content_type().split('/')

1079 if ((maintype, subtype) in self._body_types and

1080 not part.is_attachment() and subtype not in seen):

1081 seen.append(subtype)

1082 continue

1083 yield part

1084

1085 def iter_parts(self):

1086 """Return an iterator over all immediate subparts of a multipart.

1087

1088 Return an empty iterator for a non-multipart.

1089 """

1090 if self.get_content_maintype() == 'multipart':

1091 yield from self.get_payload()

1092

1093 def get_content(self, *args, content_manager=None, **kw):

1094 if content_manager is None:

1095 content_manager = self.policy.content_manager

1096 return content_manager.get_content(self, *args, **kw)

1097

1098 def set_content(self, *args, content_manager=None, **kw):

1099 if content_manager is None:

1100 content_manager = self.policy.content_manager

1101 content_manager.set_content(self, *args, **kw)

1102

1103 def _make_multipart(self, subtype, disallowed_subtypes, boundary):

1104 if self.get_content_maintype() == 'multipart':

1105 existing_subtype = self.get_content_subtype()

1106 disallowed_subtypes = disallowed_subtypes + (subtype,)

1107 if existing_subtype in disallowed_subtypes:

1108 raise ValueError("Cannot convert {} to {}".format(

1109 existing_subtype, subtype))

1110 keep_headers = []

1111 part_headers = []

1112 for name, value in self._headers:

1113 if name.lower().startswith('content-'):

1114 part_headers.append((name, value))

1115 else:

1116 keep_headers.append((name, value))

1117 if part_headers:

1118 # There is existing content, move it to the first subpart.

1119 part = type(self)(policy=self.policy)

1120 part._headers = part_headers

1121 part._payload = self._payload

1122 self._payload = [part]

1123 else:

1124 self._payload = []

1125 self._headers = keep_headers

1126 self['Content-Type'] = 'multipart/' + subtype

1127 if boundary is not None:

1128 self.set_param('boundary', boundary)

1129

1130 def make_related(self, boundary=None):

1131 self._make_multipart('related', ('alternative', 'mixed'), boundary)

1132

1133 def make_alternative(self, boundary=None):

1134 self._make_multipart('alternative', ('mixed',), boundary)

1135

1136 def make_mixed(self, boundary=None):

1137 self._make_multipart('mixed', (), boundary)

1138

1139 def _add_multipart(self, _subtype, *args, _disp=None, **kw):

1140 if (self.get_content_maintype() != 'multipart' or

1141 self.get_content_subtype() != _subtype):

1142 getattr(self, 'make_' + _subtype)()

1143 part = type(self)(policy=self.policy)

1144 part.set_content(*args, **kw)

1145 if _disp and 'content-disposition' not in part:

1146 part['Content-Disposition'] = _disp

1147 self.attach(part)

1148

1149 def add_related(self, *args, **kw):

1150 self._add_multipart('related', *args, _disp='inline', **kw)

1151

1152 def add_alternative(self, *args, **kw):

1153 self._add_multipart('alternative', *args, **kw)

1154

1155 def add_attachment(self, *args, **kw):

1156 self._add_multipart('mixed', *args, _disp='attachment', **kw)

1157

1158 def clear(self):

1159 self._headers = []

1160 self._payload = None

1161

1162 def clear_content(self):

1163 self._headers = [(n, v) for n, v in self._headers

1164 if not n.lower().startswith('content-')]

1165 self._payload = None

1166

1167

1168class EmailMessage(MIMEPart):

1169

1170 def set_content(self, *args, **kw):

1171 super().set_content(*args, **kw)

1172 if 'MIME-Version' not in self:

1173 self['MIME-Version'] = '1.0'