Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/future/backports/email/quoprimime.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

132 statements  

1# Copyright (C) 2001-2006 Python Software Foundation 

2# Author: Ben Gertzfield 

3# Contact: email-sig@python.org 

4 

5"""Quoted-printable content transfer encoding per RFCs 2045-2047. 

6 

7This module handles the content transfer encoding method defined in RFC 2045 

8to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to 

9safely encode text that is in a character set similar to the 7-bit US ASCII 

10character set, but that includes some 8-bit characters that are normally not 

11allowed in email bodies or headers. 

12 

13Quoted-printable is very space-inefficient for encoding binary files; use the 

14email.base64mime module for that instead. 

15 

16This module provides an interface to encode and decode both headers and bodies 

17with quoted-printable encoding. 

18 

19RFC 2045 defines a method for including character set information in an 

20`encoded-word' in a header. This method is commonly used for 8-bit real names 

21in To:/From:/Cc: etc. fields, as well as Subject: lines. 

22 

23This module does not do the line wrapping or end-of-line character 

24conversion necessary for proper internationalized headers; it only 

25does dumb encoding and decoding. To deal with the various line 

26wrapping issues, use the email.header module. 

27""" 

28from __future__ import unicode_literals 

29from __future__ import division 

30from __future__ import absolute_import 

31from future.builtins import bytes, chr, dict, int, range, super 

32 

33__all__ = [ 

34 'body_decode', 

35 'body_encode', 

36 'body_length', 

37 'decode', 

38 'decodestring', 

39 'header_decode', 

40 'header_encode', 

41 'header_length', 

42 'quote', 

43 'unquote', 

44 ] 

45 

46import re 

47import io 

48 

49from string import ascii_letters, digits, hexdigits 

50 

51CRLF = '\r\n' 

52NL = '\n' 

53EMPTYSTRING = '' 

54 

55# Build a mapping of octets to the expansion of that octet. Since we're only 

56# going to have 256 of these things, this isn't terribly inefficient 

57# space-wise. Remember that headers and bodies have different sets of safe 

58# characters. Initialize both maps with the full expansion, and then override 

59# the safe bytes with the more compact form. 

60_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256)) 

61_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy() 

62 

63# Safe header bytes which need no encoding. 

64for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')): 

65 _QUOPRI_HEADER_MAP[c] = chr(c) 

66# Headers have one other special encoding; spaces become underscores. 

67_QUOPRI_HEADER_MAP[ord(' ')] = '_' 

68 

69# Safe body bytes which need no encoding. 

70for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>' 

71 b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`' 

72 b'abcdefghijklmnopqrstuvwxyz{|}~\t'): 

73 _QUOPRI_BODY_MAP[c] = chr(c) 

74 

75 

76 

77# Helpers 

78def header_check(octet): 

79 """Return True if the octet should be escaped with header quopri.""" 

80 return chr(octet) != _QUOPRI_HEADER_MAP[octet] 

81 

82 

83def body_check(octet): 

84 """Return True if the octet should be escaped with body quopri.""" 

85 return chr(octet) != _QUOPRI_BODY_MAP[octet] 

86 

87 

88def header_length(bytearray): 

89 """Return a header quoted-printable encoding length. 

90 

91 Note that this does not include any RFC 2047 chrome added by 

92 `header_encode()`. 

93 

94 :param bytearray: An array of bytes (a.k.a. octets). 

95 :return: The length in bytes of the byte array when it is encoded with 

96 quoted-printable for headers. 

97 """ 

98 return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray) 

99 

100 

101def body_length(bytearray): 

102 """Return a body quoted-printable encoding length. 

103 

104 :param bytearray: An array of bytes (a.k.a. octets). 

105 :return: The length in bytes of the byte array when it is encoded with 

106 quoted-printable for bodies. 

107 """ 

108 return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray) 

109 

110 

111def _max_append(L, s, maxlen, extra=''): 

112 if not isinstance(s, str): 

113 s = chr(s) 

114 if not L: 

115 L.append(s.lstrip()) 

116 elif len(L[-1]) + len(s) <= maxlen: 

117 L[-1] += extra + s 

118 else: 

119 L.append(s.lstrip()) 

120 

121 

122def unquote(s): 

123 """Turn a string in the form =AB to the ASCII character with value 0xab""" 

124 return chr(int(s[1:3], 16)) 

125 

126 

127def quote(c): 

128 return '=%02X' % ord(c) 

129 

130 

131 

132def header_encode(header_bytes, charset='iso-8859-1'): 

133 """Encode a single header line with quoted-printable (like) encoding. 

134 

135 Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but 

136 used specifically for email header fields to allow charsets with mostly 7 

137 bit characters (and some 8 bit) to remain more or less readable in non-RFC 

138 2045 aware mail clients. 

139 

140 charset names the character set to use in the RFC 2046 header. It 

141 defaults to iso-8859-1. 

142 """ 

143 # Return empty headers as an empty string. 

144 if not header_bytes: 

145 return '' 

146 # Iterate over every byte, encoding if necessary. 

147 encoded = [] 

148 for octet in header_bytes: 

149 encoded.append(_QUOPRI_HEADER_MAP[octet]) 

150 # Now add the RFC chrome to each encoded chunk and glue the chunks 

151 # together. 

152 return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded)) 

153 

154 

155class _body_accumulator(io.StringIO): 

156 

157 def __init__(self, maxlinelen, eol, *args, **kw): 

158 super().__init__(*args, **kw) 

159 self.eol = eol 

160 self.maxlinelen = self.room = maxlinelen 

161 

162 def write_str(self, s): 

163 """Add string s to the accumulated body.""" 

164 self.write(s) 

165 self.room -= len(s) 

166 

167 def newline(self): 

168 """Write eol, then start new line.""" 

169 self.write_str(self.eol) 

170 self.room = self.maxlinelen 

171 

172 def write_soft_break(self): 

173 """Write a soft break, then start a new line.""" 

174 self.write_str('=') 

175 self.newline() 

176 

177 def write_wrapped(self, s, extra_room=0): 

178 """Add a soft line break if needed, then write s.""" 

179 if self.room < len(s) + extra_room: 

180 self.write_soft_break() 

181 self.write_str(s) 

182 

183 def write_char(self, c, is_last_char): 

184 if not is_last_char: 

185 # Another character follows on this line, so we must leave 

186 # extra room, either for it or a soft break, and whitespace 

187 # need not be quoted. 

188 self.write_wrapped(c, extra_room=1) 

189 elif c not in ' \t': 

190 # For this and remaining cases, no more characters follow, 

191 # so there is no need to reserve extra room (since a hard 

192 # break will immediately follow). 

193 self.write_wrapped(c) 

194 elif self.room >= 3: 

195 # It's a whitespace character at end-of-line, and we have room 

196 # for the three-character quoted encoding. 

197 self.write(quote(c)) 

198 elif self.room == 2: 

199 # There's room for the whitespace character and a soft break. 

200 self.write(c) 

201 self.write_soft_break() 

202 else: 

203 # There's room only for a soft break. The quoted whitespace 

204 # will be the only content on the subsequent line. 

205 self.write_soft_break() 

206 self.write(quote(c)) 

207 

208 

209def body_encode(body, maxlinelen=76, eol=NL): 

210 """Encode with quoted-printable, wrapping at maxlinelen characters. 

211 

212 Each line of encoded text will end with eol, which defaults to "\\n". Set 

213 this to "\\r\\n" if you will be using the result of this function directly 

214 in an email. 

215 

216 Each line will be wrapped at, at most, maxlinelen characters before the 

217 eol string (maxlinelen defaults to 76 characters, the maximum value 

218 permitted by RFC 2045). Long lines will have the 'soft line break' 

219 quoted-printable character "=" appended to them, so the decoded text will 

220 be identical to the original text. 

221 

222 The minimum maxlinelen is 4 to have room for a quoted character ("=XX") 

223 followed by a soft line break. Smaller values will generate a 

224 ValueError. 

225 

226 """ 

227 

228 if maxlinelen < 4: 

229 raise ValueError("maxlinelen must be at least 4") 

230 if not body: 

231 return body 

232 

233 # The last line may or may not end in eol, but all other lines do. 

234 last_has_eol = (body[-1] in '\r\n') 

235 

236 # This accumulator will make it easier to build the encoded body. 

237 encoded_body = _body_accumulator(maxlinelen, eol) 

238 

239 lines = body.splitlines() 

240 last_line_no = len(lines) - 1 

241 for line_no, line in enumerate(lines): 

242 last_char_index = len(line) - 1 

243 for i, c in enumerate(line): 

244 if body_check(ord(c)): 

245 c = quote(c) 

246 encoded_body.write_char(c, i==last_char_index) 

247 # Add an eol if input line had eol. All input lines have eol except 

248 # possibly the last one. 

249 if line_no < last_line_no or last_has_eol: 

250 encoded_body.newline() 

251 

252 return encoded_body.getvalue() 

253 

254 

255 

256# BAW: I'm not sure if the intent was for the signature of this function to be 

257# the same as base64MIME.decode() or not... 

258def decode(encoded, eol=NL): 

259 """Decode a quoted-printable string. 

260 

261 Lines are separated with eol, which defaults to \\n. 

262 """ 

263 if not encoded: 

264 return encoded 

265 # BAW: see comment in encode() above. Again, we're building up the 

266 # decoded string with string concatenation, which could be done much more 

267 # efficiently. 

268 decoded = '' 

269 

270 for line in encoded.splitlines(): 

271 line = line.rstrip() 

272 if not line: 

273 decoded += eol 

274 continue 

275 

276 i = 0 

277 n = len(line) 

278 while i < n: 

279 c = line[i] 

280 if c != '=': 

281 decoded += c 

282 i += 1 

283 # Otherwise, c == "=". Are we at the end of the line? If so, add 

284 # a soft line break. 

285 elif i+1 == n: 

286 i += 1 

287 continue 

288 # Decode if in form =AB 

289 elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: 

290 decoded += unquote(line[i:i+3]) 

291 i += 3 

292 # Otherwise, not in form =AB, pass literally 

293 else: 

294 decoded += c 

295 i += 1 

296 

297 if i == n: 

298 decoded += eol 

299 # Special case if original string did not end with eol 

300 if encoded[-1] not in '\r\n' and decoded.endswith(eol): 

301 decoded = decoded[:-1] 

302 return decoded 

303 

304 

305# For convenience and backwards compatibility w/ standard base64 module 

306body_decode = decode 

307decodestring = decode 

308 

309 

310 

311def _unquote_match(match): 

312 """Turn a match in the form =AB to the ASCII character with value 0xab""" 

313 s = match.group(0) 

314 return unquote(s) 

315 

316 

317# Header decoding is done a bit differently 

318def header_decode(s): 

319 """Decode a string encoded with RFC 2045 MIME header `Q' encoding. 

320 

321 This function does not parse a full MIME header value encoded with 

322 quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use 

323 the high level email.header class for that functionality. 

324 """ 

325 s = s.replace('_', ' ') 

326 return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII)