Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/base64.py: 5%

308 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 06:13 +0000

1#! /usr/bin/env python3 

2 

3"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" 

4 

5# Modified 04-Oct-1995 by Jack Jansen to use binascii module 

6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 

7# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere 

8 

9import re 

10import struct 

11import binascii 

12 

13 

14__all__ = [ 

15 # Legacy interface exports traditional RFC 2045 Base64 encodings 

16 'encode', 'decode', 'encodebytes', 'decodebytes', 

17 # Generalized interface for other encodings 

18 'b64encode', 'b64decode', 'b32encode', 'b32decode', 

19 'b16encode', 'b16decode', 

20 # Base85 and Ascii85 encodings 

21 'b85encode', 'b85decode', 'a85encode', 'a85decode', 

22 # Standard Base64 encoding 

23 'standard_b64encode', 'standard_b64decode', 

24 # Some common Base64 alternatives. As referenced by RFC 3458, see thread 

25 # starting at: 

26 # 

27 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 

28 'urlsafe_b64encode', 'urlsafe_b64decode', 

29 ] 

30 

31 

32bytes_types = (bytes, bytearray) # Types acceptable as binary data 

33 

34def _bytes_from_decode_data(s): 

35 if isinstance(s, str): 

36 try: 

37 return s.encode('ascii') 

38 except UnicodeEncodeError: 

39 raise ValueError('string argument should contain only ASCII characters') 

40 if isinstance(s, bytes_types): 

41 return s 

42 try: 

43 return memoryview(s).tobytes() 

44 except TypeError: 

45 raise TypeError("argument should be a bytes-like object or ASCII " 

46 "string, not %r" % s.__class__.__name__) from None 

47 

48 

49# Base64 encoding/decoding uses binascii 

50 

51def b64encode(s, altchars=None): 

52 """Encode the bytes-like object s using Base64 and return a bytes object. 

53 

54 Optional altchars should be a byte string of length 2 which specifies an 

55 alternative alphabet for the '+' and '/' characters. This allows an 

56 application to e.g. generate url or filesystem safe Base64 strings. 

57 """ 

58 encoded = binascii.b2a_base64(s, newline=False) 

59 if altchars is not None: 

60 assert len(altchars) == 2, repr(altchars) 

61 return encoded.translate(bytes.maketrans(b'+/', altchars)) 

62 return encoded 

63 

64 

65def b64decode(s, altchars=None, validate=False): 

66 """Decode the Base64 encoded bytes-like object or ASCII string s. 

67 

68 Optional altchars must be a bytes-like object or ASCII string of length 2 

69 which specifies the alternative alphabet used instead of the '+' and '/' 

70 characters. 

71 

72 The result is returned as a bytes object. A binascii.Error is raised if 

73 s is incorrectly padded. 

74 

75 If validate is False (the default), characters that are neither in the 

76 normal base-64 alphabet nor the alternative alphabet are discarded prior 

77 to the padding check. If validate is True, these non-alphabet characters 

78 in the input result in a binascii.Error. 

79 """ 

80 s = _bytes_from_decode_data(s) 

81 if altchars is not None: 

82 altchars = _bytes_from_decode_data(altchars) 

83 assert len(altchars) == 2, repr(altchars) 

84 s = s.translate(bytes.maketrans(altchars, b'+/')) 

85 if validate and not re.fullmatch(b'[A-Za-z0-9+/]*={0,2}', s): 

86 raise binascii.Error('Non-base64 digit found') 

87 return binascii.a2b_base64(s) 

88 

89 

90def standard_b64encode(s): 

91 """Encode bytes-like object s using the standard Base64 alphabet. 

92 

93 The result is returned as a bytes object. 

94 """ 

95 return b64encode(s) 

96 

97def standard_b64decode(s): 

98 """Decode bytes encoded with the standard Base64 alphabet. 

99 

100 Argument s is a bytes-like object or ASCII string to decode. The result 

101 is returned as a bytes object. A binascii.Error is raised if the input 

102 is incorrectly padded. Characters that are not in the standard alphabet 

103 are discarded prior to the padding check. 

104 """ 

105 return b64decode(s) 

106 

107 

108_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') 

109_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') 

110 

111def urlsafe_b64encode(s): 

112 """Encode bytes using the URL- and filesystem-safe Base64 alphabet. 

113 

114 Argument s is a bytes-like object to encode. The result is returned as a 

115 bytes object. The alphabet uses '-' instead of '+' and '_' instead of 

116 '/'. 

117 """ 

118 return b64encode(s).translate(_urlsafe_encode_translation) 

119 

120def urlsafe_b64decode(s): 

121 """Decode bytes using the URL- and filesystem-safe Base64 alphabet. 

122 

123 Argument s is a bytes-like object or ASCII string to decode. The result 

124 is returned as a bytes object. A binascii.Error is raised if the input 

125 is incorrectly padded. Characters that are not in the URL-safe base-64 

126 alphabet, and are not a plus '+' or slash '/', are discarded prior to the 

127 padding check. 

128 

129 The alphabet uses '-' instead of '+' and '_' instead of '/'. 

130 """ 

131 s = _bytes_from_decode_data(s) 

132 s = s.translate(_urlsafe_decode_translation) 

133 return b64decode(s) 

134 

135 

136 

137# Base32 encoding/decoding must be done in Python 

138_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' 

139_b32tab2 = None 

140_b32rev = None 

141 

142def b32encode(s): 

143 """Encode the bytes-like object s using Base32 and return a bytes object. 

144 """ 

145 global _b32tab2 

146 # Delay the initialization of the table to not waste memory 

147 # if the function is never called 

148 if _b32tab2 is None: 

149 b32tab = [bytes((i,)) for i in _b32alphabet] 

150 _b32tab2 = [a + b for a in b32tab for b in b32tab] 

151 b32tab = None 

152 

153 if not isinstance(s, bytes_types): 

154 s = memoryview(s).tobytes() 

155 leftover = len(s) % 5 

156 # Pad the last quantum with zero bits if necessary 

157 if leftover: 

158 s = s + b'\0' * (5 - leftover) # Don't use += ! 

159 encoded = bytearray() 

160 from_bytes = int.from_bytes 

161 b32tab2 = _b32tab2 

162 for i in range(0, len(s), 5): 

163 c = from_bytes(s[i: i + 5], 'big') 

164 encoded += (b32tab2[c >> 30] + # bits 1 - 10 

165 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20 

166 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30 

167 b32tab2[c & 0x3ff] # bits 31 - 40 

168 ) 

169 # Adjust for any leftover partial quanta 

170 if leftover == 1: 

171 encoded[-6:] = b'======' 

172 elif leftover == 2: 

173 encoded[-4:] = b'====' 

174 elif leftover == 3: 

175 encoded[-3:] = b'===' 

176 elif leftover == 4: 

177 encoded[-1:] = b'=' 

178 return bytes(encoded) 

179 

180def b32decode(s, casefold=False, map01=None): 

181 """Decode the Base32 encoded bytes-like object or ASCII string s. 

182 

183 Optional casefold is a flag specifying whether a lowercase alphabet is 

184 acceptable as input. For security purposes, the default is False. 

185 

186 RFC 3548 allows for optional mapping of the digit 0 (zero) to the 

187 letter O (oh), and for optional mapping of the digit 1 (one) to 

188 either the letter I (eye) or letter L (el). The optional argument 

189 map01 when not None, specifies which letter the digit 1 should be 

190 mapped to (when map01 is not None, the digit 0 is always mapped to 

191 the letter O). For security purposes the default is None, so that 

192 0 and 1 are not allowed in the input. 

193 

194 The result is returned as a bytes object. A binascii.Error is raised if 

195 the input is incorrectly padded or if there are non-alphabet 

196 characters present in the input. 

197 """ 

198 global _b32rev 

199 # Delay the initialization of the table to not waste memory 

200 # if the function is never called 

201 if _b32rev is None: 

202 _b32rev = {v: k for k, v in enumerate(_b32alphabet)} 

203 s = _bytes_from_decode_data(s) 

204 if len(s) % 8: 

205 raise binascii.Error('Incorrect padding') 

206 # Handle section 2.4 zero and one mapping. The flag map01 will be either 

207 # False, or the character to map the digit 1 (one) to. It should be 

208 # either L (el) or I (eye). 

209 if map01 is not None: 

210 map01 = _bytes_from_decode_data(map01) 

211 assert len(map01) == 1, repr(map01) 

212 s = s.translate(bytes.maketrans(b'01', b'O' + map01)) 

213 if casefold: 

214 s = s.upper() 

215 # Strip off pad characters from the right. We need to count the pad 

216 # characters because this will tell us how many null bytes to remove from 

217 # the end of the decoded string. 

218 l = len(s) 

219 s = s.rstrip(b'=') 

220 padchars = l - len(s) 

221 # Now decode the full quanta 

222 decoded = bytearray() 

223 b32rev = _b32rev 

224 for i in range(0, len(s), 8): 

225 quanta = s[i: i + 8] 

226 acc = 0 

227 try: 

228 for c in quanta: 

229 acc = (acc << 5) + b32rev[c] 

230 except KeyError: 

231 raise binascii.Error('Non-base32 digit found') from None 

232 decoded += acc.to_bytes(5, 'big') 

233 # Process the last, partial quanta 

234 if l % 8 or padchars not in {0, 1, 3, 4, 6}: 

235 raise binascii.Error('Incorrect padding') 

236 if padchars and decoded: 

237 acc <<= 5 * padchars 

238 last = acc.to_bytes(5, 'big') 

239 leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1 

240 decoded[-5:] = last[:leftover] 

241 return bytes(decoded) 

242 

243 

244# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns 

245# lowercase. The RFC also recommends against accepting input case 

246# insensitively. 

247def b16encode(s): 

248 """Encode the bytes-like object s using Base16 and return a bytes object. 

249 """ 

250 return binascii.hexlify(s).upper() 

251 

252 

253def b16decode(s, casefold=False): 

254 """Decode the Base16 encoded bytes-like object or ASCII string s. 

255 

256 Optional casefold is a flag specifying whether a lowercase alphabet is 

257 acceptable as input. For security purposes, the default is False. 

258 

259 The result is returned as a bytes object. A binascii.Error is raised if 

260 s is incorrectly padded or if there are non-alphabet characters present 

261 in the input. 

262 """ 

263 s = _bytes_from_decode_data(s) 

264 if casefold: 

265 s = s.upper() 

266 if re.search(b'[^0-9A-F]', s): 

267 raise binascii.Error('Non-base16 digit found') 

268 return binascii.unhexlify(s) 

269 

270# 

271# Ascii85 encoding/decoding 

272# 

273 

274_a85chars = None 

275_a85chars2 = None 

276_A85START = b"<~" 

277_A85END = b"~>" 

278 

279def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): 

280 # Helper function for a85encode and b85encode 

281 if not isinstance(b, bytes_types): 

282 b = memoryview(b).tobytes() 

283 

284 padding = (-len(b)) % 4 

285 if padding: 

286 b = b + b'\0' * padding 

287 words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) 

288 

289 chunks = [b'z' if foldnuls and not word else 

290 b'y' if foldspaces and word == 0x20202020 else 

291 (chars2[word // 614125] + 

292 chars2[word // 85 % 7225] + 

293 chars[word % 85]) 

294 for word in words] 

295 

296 if padding and not pad: 

297 if chunks[-1] == b'z': 

298 chunks[-1] = chars[0] * 5 

299 chunks[-1] = chunks[-1][:-padding] 

300 

301 return b''.join(chunks) 

302 

303def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): 

304 """Encode bytes-like object b using Ascii85 and return a bytes object. 

305 

306 foldspaces is an optional flag that uses the special short sequence 'y' 

307 instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This 

308 feature is not supported by the "standard" Adobe encoding. 

309 

310 wrapcol controls whether the output should have newline (b'\\n') characters 

311 added to it. If this is non-zero, each output line will be at most this 

312 many characters long. 

313 

314 pad controls whether the input is padded to a multiple of 4 before 

315 encoding. Note that the btoa implementation always pads. 

316 

317 adobe controls whether the encoded byte sequence is framed with <~ and ~>, 

318 which is used by the Adobe implementation. 

319 """ 

320 global _a85chars, _a85chars2 

321 # Delay the initialization of tables to not waste memory 

322 # if the function is never called 

323 if _a85chars is None: 

324 _a85chars = [bytes((i,)) for i in range(33, 118)] 

325 _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] 

326 

327 result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) 

328 

329 if adobe: 

330 result = _A85START + result 

331 if wrapcol: 

332 wrapcol = max(2 if adobe else 1, wrapcol) 

333 chunks = [result[i: i + wrapcol] 

334 for i in range(0, len(result), wrapcol)] 

335 if adobe: 

336 if len(chunks[-1]) + 2 > wrapcol: 

337 chunks.append(b'') 

338 result = b'\n'.join(chunks) 

339 if adobe: 

340 result += _A85END 

341 

342 return result 

343 

344def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): 

345 """Decode the Ascii85 encoded bytes-like object or ASCII string b. 

346 

347 foldspaces is a flag that specifies whether the 'y' short sequence should be 

348 accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is 

349 not supported by the "standard" Adobe encoding. 

350 

351 adobe controls whether the input sequence is in Adobe Ascii85 format (i.e. 

352 is framed with <~ and ~>). 

353 

354 ignorechars should be a byte string containing characters to ignore from the 

355 input. This should only contain whitespace characters, and by default 

356 contains all whitespace characters in ASCII. 

357 

358 The result is returned as a bytes object. 

359 """ 

360 b = _bytes_from_decode_data(b) 

361 if adobe: 

362 if not b.endswith(_A85END): 

363 raise ValueError( 

364 "Ascii85 encoded byte sequences must end " 

365 "with {!r}".format(_A85END) 

366 ) 

367 if b.startswith(_A85START): 

368 b = b[2:-2] # Strip off start/end markers 

369 else: 

370 b = b[:-2] 

371 # 

372 # We have to go through this stepwise, so as to ignore spaces and handle 

373 # special short sequences 

374 # 

375 packI = struct.Struct('!I').pack 

376 decoded = [] 

377 decoded_append = decoded.append 

378 curr = [] 

379 curr_append = curr.append 

380 curr_clear = curr.clear 

381 for x in b + b'u' * 4: 

382 if b'!'[0] <= x <= b'u'[0]: 

383 curr_append(x) 

384 if len(curr) == 5: 

385 acc = 0 

386 for x in curr: 

387 acc = 85 * acc + (x - 33) 

388 try: 

389 decoded_append(packI(acc)) 

390 except struct.error: 

391 raise ValueError('Ascii85 overflow') from None 

392 curr_clear() 

393 elif x == b'z'[0]: 

394 if curr: 

395 raise ValueError('z inside Ascii85 5-tuple') 

396 decoded_append(b'\0\0\0\0') 

397 elif foldspaces and x == b'y'[0]: 

398 if curr: 

399 raise ValueError('y inside Ascii85 5-tuple') 

400 decoded_append(b'\x20\x20\x20\x20') 

401 elif x in ignorechars: 

402 # Skip whitespace 

403 continue 

404 else: 

405 raise ValueError('Non-Ascii85 digit found: %c' % x) 

406 

407 result = b''.join(decoded) 

408 padding = 4 - len(curr) 

409 if padding: 

410 # Throw away the extra padding 

411 result = result[:-padding] 

412 return result 

413 

414# The following code is originally taken (with permission) from Mercurial 

415 

416_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 

417 b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") 

418_b85chars = None 

419_b85chars2 = None 

420_b85dec = None 

421 

422def b85encode(b, pad=False): 

423 """Encode bytes-like object b in base85 format and return a bytes object. 

424 

425 If pad is true, the input is padded with b'\\0' so its length is a multiple of 

426 4 bytes before encoding. 

427 """ 

428 global _b85chars, _b85chars2 

429 # Delay the initialization of tables to not waste memory 

430 # if the function is never called 

431 if _b85chars is None: 

432 _b85chars = [bytes((i,)) for i in _b85alphabet] 

433 _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] 

434 return _85encode(b, _b85chars, _b85chars2, pad) 

435 

436def b85decode(b): 

437 """Decode the base85-encoded bytes-like object or ASCII string b 

438 

439 The result is returned as a bytes object. 

440 """ 

441 global _b85dec 

442 # Delay the initialization of tables to not waste memory 

443 # if the function is never called 

444 if _b85dec is None: 

445 _b85dec = [None] * 256 

446 for i, c in enumerate(_b85alphabet): 

447 _b85dec[c] = i 

448 

449 b = _bytes_from_decode_data(b) 

450 padding = (-len(b)) % 5 

451 b = b + b'~' * padding 

452 out = [] 

453 packI = struct.Struct('!I').pack 

454 for i in range(0, len(b), 5): 

455 chunk = b[i:i + 5] 

456 acc = 0 

457 try: 

458 for c in chunk: 

459 acc = acc * 85 + _b85dec[c] 

460 except TypeError: 

461 for j, c in enumerate(chunk): 

462 if _b85dec[c] is None: 

463 raise ValueError('bad base85 character at position %d' 

464 % (i + j)) from None 

465 raise 

466 try: 

467 out.append(packI(acc)) 

468 except struct.error: 

469 raise ValueError('base85 overflow in hunk starting at byte %d' 

470 % i) from None 

471 

472 result = b''.join(out) 

473 if padding: 

474 result = result[:-padding] 

475 return result 

476 

477# Legacy interface. This code could be cleaned up since I don't believe 

478# binascii has any line length limitations. It just doesn't seem worth it 

479# though. The files should be opened in binary mode. 

480 

481MAXLINESIZE = 76 # Excluding the CRLF 

482MAXBINSIZE = (MAXLINESIZE//4)*3 

483 

484def encode(input, output): 

485 """Encode a file; input and output are binary files.""" 

486 while True: 

487 s = input.read(MAXBINSIZE) 

488 if not s: 

489 break 

490 while len(s) < MAXBINSIZE: 

491 ns = input.read(MAXBINSIZE-len(s)) 

492 if not ns: 

493 break 

494 s += ns 

495 line = binascii.b2a_base64(s) 

496 output.write(line) 

497 

498 

499def decode(input, output): 

500 """Decode a file; input and output are binary files.""" 

501 while True: 

502 line = input.readline() 

503 if not line: 

504 break 

505 s = binascii.a2b_base64(line) 

506 output.write(s) 

507 

508def _input_type_check(s): 

509 try: 

510 m = memoryview(s) 

511 except TypeError as err: 

512 msg = "expected bytes-like object, not %s" % s.__class__.__name__ 

513 raise TypeError(msg) from err 

514 if m.format not in ('c', 'b', 'B'): 

515 msg = ("expected single byte elements, not %r from %s" % 

516 (m.format, s.__class__.__name__)) 

517 raise TypeError(msg) 

518 if m.ndim != 1: 

519 msg = ("expected 1-D data, not %d-D data from %s" % 

520 (m.ndim, s.__class__.__name__)) 

521 raise TypeError(msg) 

522 

523 

524def encodebytes(s): 

525 """Encode a bytestring into a bytes object containing multiple lines 

526 of base-64 data.""" 

527 _input_type_check(s) 

528 pieces = [] 

529 for i in range(0, len(s), MAXBINSIZE): 

530 chunk = s[i : i + MAXBINSIZE] 

531 pieces.append(binascii.b2a_base64(chunk)) 

532 return b"".join(pieces) 

533 

534def encodestring(s): 

535 """Legacy alias of encodebytes().""" 

536 import warnings 

537 warnings.warn("encodestring() is a deprecated alias since 3.1, " 

538 "use encodebytes()", 

539 DeprecationWarning, 2) 

540 return encodebytes(s) 

541 

542 

543def decodebytes(s): 

544 """Decode a bytestring of base-64 data into a bytes object.""" 

545 _input_type_check(s) 

546 return binascii.a2b_base64(s) 

547 

548def decodestring(s): 

549 """Legacy alias of decodebytes().""" 

550 import warnings 

551 warnings.warn("decodestring() is a deprecated alias since Python 3.1, " 

552 "use decodebytes()", 

553 DeprecationWarning, 2) 

554 return decodebytes(s) 

555 

556 

557# Usable as a script... 

558def main(): 

559 """Small main program""" 

560 import sys, getopt 

561 try: 

562 opts, args = getopt.getopt(sys.argv[1:], 'deut') 

563 except getopt.error as msg: 

564 sys.stdout = sys.stderr 

565 print(msg) 

566 print("""usage: %s [-d|-e|-u|-t] [file|-] 

567 -d, -u: decode 

568 -e: encode (default) 

569 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]) 

570 sys.exit(2) 

571 func = encode 

572 for o, a in opts: 

573 if o == '-e': func = encode 

574 if o == '-d': func = decode 

575 if o == '-u': func = decode 

576 if o == '-t': test(); return 

577 if args and args[0] != '-': 

578 with open(args[0], 'rb') as f: 

579 func(f, sys.stdout.buffer) 

580 else: 

581 func(sys.stdin.buffer, sys.stdout.buffer) 

582 

583 

584def test(): 

585 s0 = b"Aladdin:open sesame" 

586 print(repr(s0)) 

587 s1 = encodebytes(s0) 

588 print(repr(s1)) 

589 s2 = decodebytes(s1) 

590 print(repr(s2)) 

591 assert s0 == s2 

592 

593 

594if __name__ == '__main__': 

595 main()