Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/future/backports/email/quoprimime.py: 30%

2# Author: Ben Gertzfield

3# Contact: email-sig@python.org

5"""Quoted-printable content transfer encoding per RFCs 2045-2047.

7This module handles the content transfer encoding method defined in RFC 2045

8to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to

9safely encode text that is in a character set similar to the 7-bit US ASCII

10character set, but that includes some 8-bit characters that are normally not

11allowed in email bodies or headers.

13Quoted-printable is very space-inefficient for encoding binary files; use the

14email.base64mime module for that instead.

16This module provides an interface to encode and decode both headers and bodies

17with quoted-printable encoding.

19RFC 2045 defines a method for including character set information in an

20`encoded-word' in a header. This method is commonly used for 8-bit real names

21in To:/From:/Cc: etc. fields, as well as Subject: lines.

23This module does not do the line wrapping or end-of-line character

24conversion necessary for proper internationalized headers; it only

25does dumb encoding and decoding. To deal with the various line

26wrapping issues, use the email.header module.

27"""

28from __future__ import unicode_literals

29from __future__ import division

30from __future__ import absolute_import

31from future.builtins import bytes, chr, dict, int, range, super

33__all__ = [

34 'body_decode',

35 'body_encode',

36 'body_length',

37 'decode',

38 'decodestring',

39 'header_decode',

40 'header_encode',

41 'header_length',

42 'quote',

43 'unquote',

44 ]

46import re

47import io

49from string import ascii_letters, digits, hexdigits

51CRLF = '\r\n'

52NL = '\n'

53EMPTYSTRING = ''

55# Build a mapping of octets to the expansion of that octet. Since we're only

56# going to have 256 of these things, this isn't terribly inefficient

57# space-wise. Remember that headers and bodies have different sets of safe

58# characters. Initialize both maps with the full expansion, and then override

59# the safe bytes with the more compact form.

60_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))

61_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()

63# Safe header bytes which need no encoding.

64for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')):

65 _QUOPRI_HEADER_MAP[c] = chr(c)

66# Headers have one other special encoding; spaces become underscores.

67_QUOPRI_HEADER_MAP[ord(' ')] = '_'

69# Safe body bytes which need no encoding.

70for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>'

71 b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'

72 b'abcdefghijklmnopqrstuvwxyz{|}~\t'):

73 _QUOPRI_BODY_MAP[c] = chr(c)

77# Helpers

78def header_check(octet):

79 """Return True if the octet should be escaped with header quopri."""

80 return chr(octet) != _QUOPRI_HEADER_MAP[octet]

83def body_check(octet):

84 """Return True if the octet should be escaped with body quopri."""

85 return chr(octet) != _QUOPRI_BODY_MAP[octet]

88def header_length(bytearray):

89 """Return a header quoted-printable encoding length.

91 Note that this does not include any RFC 2047 chrome added by

92 `header_encode()`.

94 :param bytearray: An array of bytes (a.k.a. octets).

95 :return: The length in bytes of the byte array when it is encoded with

96 quoted-printable for headers.

97 """

98 return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)

100

101def body_length(bytearray):

102 """Return a body quoted-printable encoding length.

103

104 :param bytearray: An array of bytes (a.k.a. octets).

105 :return: The length in bytes of the byte array when it is encoded with

106 quoted-printable for bodies.

107 """

108 return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)

109

110

111def _max_append(L, s, maxlen, extra=''):

112 if not isinstance(s, str):

113 s = chr(s)

114 if not L:

115 L.append(s.lstrip())

116 elif len(L[-1]) + len(s) <= maxlen:

117 L[-1] += extra + s

118 else:

119 L.append(s.lstrip())

120

121

122def unquote(s):

123 """Turn a string in the form =AB to the ASCII character with value 0xab"""

124 return chr(int(s[1:3], 16))

125

126

127def quote(c):

128 return '=%02X' % ord(c)

129

130

131

132def header_encode(header_bytes, charset='iso-8859-1'):

133 """Encode a single header line with quoted-printable (like) encoding.

134

135 Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but

136 used specifically for email header fields to allow charsets with mostly 7

137 bit characters (and some 8 bit) to remain more or less readable in non-RFC

138 2045 aware mail clients.

139

140 charset names the character set to use in the RFC 2046 header. It

141 defaults to iso-8859-1.

142 """

143 # Return empty headers as an empty string.

144 if not header_bytes:

145 return ''

146 # Iterate over every byte, encoding if necessary.

147 encoded = []

148 for octet in header_bytes:

149 encoded.append(_QUOPRI_HEADER_MAP[octet])

150 # Now add the RFC chrome to each encoded chunk and glue the chunks

151 # together.

152 return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))

153

154

155class _body_accumulator(io.StringIO):

156

157 def __init__(self, maxlinelen, eol, *args, **kw):

158 super().__init__(*args, **kw)

159 self.eol = eol

160 self.maxlinelen = self.room = maxlinelen

161

162 def write_str(self, s):

163 """Add string s to the accumulated body."""

164 self.write(s)

165 self.room -= len(s)

166

167 def newline(self):

168 """Write eol, then start new line."""

169 self.write_str(self.eol)

170 self.room = self.maxlinelen

171

172 def write_soft_break(self):

173 """Write a soft break, then start a new line."""

174 self.write_str('=')

175 self.newline()

176

177 def write_wrapped(self, s, extra_room=0):

178 """Add a soft line break if needed, then write s."""

179 if self.room < len(s) + extra_room:

180 self.write_soft_break()

181 self.write_str(s)

182

183 def write_char(self, c, is_last_char):

184 if not is_last_char:

185 # Another character follows on this line, so we must leave

186 # extra room, either for it or a soft break, and whitespace

187 # need not be quoted.

188 self.write_wrapped(c, extra_room=1)

189 elif c not in ' \t':

190 # For this and remaining cases, no more characters follow,

191 # so there is no need to reserve extra room (since a hard

192 # break will immediately follow).

193 self.write_wrapped(c)

194 elif self.room >= 3:

195 # It's a whitespace character at end-of-line, and we have room

196 # for the three-character quoted encoding.

197 self.write(quote(c))

198 elif self.room == 2:

199 # There's room for the whitespace character and a soft break.

200 self.write(c)

201 self.write_soft_break()

202 else:

203 # There's room only for a soft break. The quoted whitespace

204 # will be the only content on the subsequent line.

205 self.write_soft_break()

206 self.write(quote(c))

207

208

209def body_encode(body, maxlinelen=76, eol=NL):

210 """Encode with quoted-printable, wrapping at maxlinelen characters.

211

212 Each line of encoded text will end with eol, which defaults to "\\n". Set

213 this to "\\r\\n" if you will be using the result of this function directly

214 in an email.

215

216 Each line will be wrapped at, at most, maxlinelen characters before the

217 eol string (maxlinelen defaults to 76 characters, the maximum value

218 permitted by RFC 2045). Long lines will have the 'soft line break'

219 quoted-printable character "=" appended to them, so the decoded text will

220 be identical to the original text.

221

222 The minimum maxlinelen is 4 to have room for a quoted character ("=XX")

223 followed by a soft line break. Smaller values will generate a

224 ValueError.

225

226 """

227

228 if maxlinelen < 4:

229 raise ValueError("maxlinelen must be at least 4")

230 if not body:

231 return body

232

233 # The last line may or may not end in eol, but all other lines do.

234 last_has_eol = (body[-1] in '\r\n')

235

236 # This accumulator will make it easier to build the encoded body.

237 encoded_body = _body_accumulator(maxlinelen, eol)

238

239 lines = body.splitlines()

240 last_line_no = len(lines) - 1

241 for line_no, line in enumerate(lines):

242 last_char_index = len(line) - 1

243 for i, c in enumerate(line):

244 if body_check(ord(c)):

245 c = quote(c)

246 encoded_body.write_char(c, i==last_char_index)

247 # Add an eol if input line had eol. All input lines have eol except

248 # possibly the last one.

249 if line_no < last_line_no or last_has_eol:

250 encoded_body.newline()

251

252 return encoded_body.getvalue()

253

254

255

256# BAW: I'm not sure if the intent was for the signature of this function to be

257# the same as base64MIME.decode() or not...

258def decode(encoded, eol=NL):

259 """Decode a quoted-printable string.

260

261 Lines are separated with eol, which defaults to \\n.

262 """

263 if not encoded:

264 return encoded

265 # BAW: see comment in encode() above. Again, we're building up the

266 # decoded string with string concatenation, which could be done much more

267 # efficiently.

268 decoded = ''

269

270 for line in encoded.splitlines():

271 line = line.rstrip()

272 if not line:

273 decoded += eol

274 continue

275

276 i = 0

277 n = len(line)

278 while i < n:

279 c = line[i]

280 if c != '=':

281 decoded += c

282 i += 1

283 # Otherwise, c == "=". Are we at the end of the line? If so, add

284 # a soft line break.

285 elif i+1 == n:

286 i += 1

287 continue

288 # Decode if in form =AB

289 elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:

290 decoded += unquote(line[i:i+3])

291 i += 3

292 # Otherwise, not in form =AB, pass literally

293 else:

294 decoded += c

295 i += 1

296

297 if i == n:

298 decoded += eol

299 # Special case if original string did not end with eol

300 if encoded[-1] not in '\r\n' and decoded.endswith(eol):

301 decoded = decoded[:-1]

302 return decoded

303

304

305# For convenience and backwards compatibility w/ standard base64 module

306body_decode = decode

307decodestring = decode

308

309

310

311def _unquote_match(match):

312 """Turn a match in the form =AB to the ASCII character with value 0xab"""

313 s = match.group(0)

314 return unquote(s)

315

316

317# Header decoding is done a bit differently

318def header_decode(s):

319 """Decode a string encoded with RFC 2045 MIME header `Q' encoding.

320

321 This function does not parse a full MIME header value encoded with

322 quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use

323 the high level email.header class for that functionality.

324 """

325 s = s.replace('_', ' ')

326 return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII)