Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyzstd/_zstdfile.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

240 statements  

1import io 

2import warnings 

3try: 

4 from os import PathLike 

5except ImportError: 

6 # For Python 3.5 

7 class PathLike: 

8 pass 

9 

10from pyzstd import ZstdCompressor, _ZstdFileReader, \ 

11 _ZstdFileWriter, _ZSTD_DStreamSizes 

12 

13__all__ = ('ZstdFile', 'open') 

14 

15class _ZstdDecompressReader(io.RawIOBase): 

16 """Adapt decompressor to RawIOBase reader API""" 

17 

18 def __init__(self, fp, zstd_dict, option, read_size): 

19 self._fp = fp 

20 self._decomp = _ZstdFileReader(fp, zstd_dict, option, read_size) 

21 

22 def close(self): 

23 self._decomp = None 

24 return super().close() 

25 

26 def readable(self): 

27 return True 

28 

29 # Some file-like objects don't have .seekable(), invoke when necessary. 

30 def seekable(self): 

31 return self._fp.seekable() 

32 

33 def tell(self): 

34 return self._decomp.pos 

35 

36 def readinto(self, b): 

37 return self._decomp.readinto(b) 

38 

39 def readall(self): 

40 return self._decomp.readall() 

41 

42 # If the new position is within io.BufferedReader's buffer, 

43 # this method may not be called. 

44 def seek(self, offset, whence=0): 

45 # offset is absolute file position 

46 if whence == 0: # SEEK_SET 

47 pass 

48 elif whence == 1: # SEEK_CUR 

49 offset = self._decomp.pos + offset 

50 elif whence == 2: # SEEK_END 

51 if self._decomp.size < 0: 

52 # Get file size 

53 self._decomp.forward(None) 

54 offset = self._decomp.size + offset 

55 else: 

56 raise ValueError("Invalid whence value: {}".format(whence)) 

57 

58 # offset is bytes number to skip forward 

59 if offset < self._decomp.pos: 

60 # Rewind 

61 self._decomp.eof = False 

62 self._decomp.pos = 0 

63 self._decomp.reset_session() 

64 self._fp.seek(0) 

65 else: 

66 offset -= self._decomp.pos 

67 # If offset <= 0, .forward() method does nothing. 

68 self._decomp.forward(offset) 

69 

70 return self._decomp.pos 

71 

72_ZSTD_DStreamOutSize = _ZSTD_DStreamSizes[1] 

73 

74_MODE_CLOSED = 0 

75_MODE_READ = 1 

76_MODE_WRITE = 2 

77 

78class _DeprecatedPlaceholder: 

79 def __repr__(self): 

80 return '<DEPRECATED>' 

81_DEPRECATED_PLACEHOLDER = _DeprecatedPlaceholder() 

82 

83class ZstdFile(io.BufferedIOBase): 

84 """A file object providing transparent zstd (de)compression. 

85 

86 A ZstdFile can act as a wrapper for an existing file object, or refer 

87 directly to a named file on disk. 

88 

89 Note that ZstdFile provides a *binary* file interface - data read is 

90 returned as bytes, and data to be written should be an object that 

91 supports the Buffer Protocol. 

92 """ 

93 FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK 

94 FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME 

95 

96 _READER_CLASS = _ZstdDecompressReader 

97 

98 def __init__(self, filename, mode="r", *, 

99 level_or_option=None, zstd_dict=None, 

100 read_size=_DEPRECATED_PLACEHOLDER, write_size=_DEPRECATED_PLACEHOLDER): 

101 """Open a zstd compressed file in binary mode. 

102 

103 filename can be either an actual file name (given as a str, bytes, or 

104 PathLike object), in which case the named file is opened, or it can be 

105 an existing file object to read from or write to. 

106 

107 mode can be "r" for reading (default), "w" for (over)writing, "x" for 

108 creating exclusively, or "a" for appending. These can equivalently be 

109 given as "rb", "wb", "xb" and "ab" respectively. 

110 

111 Parameters 

112 level_or_option: When it's an int object, it represents compression 

113 level. When it's a dict object, it contains advanced compression 

114 parameters. Note, in read mode (decompression), it can only be a 

115 dict object, that represents decompression option. It doesn't 

116 support int type compression level in this case. 

117 zstd_dict: A ZstdDict object, pre-trained dictionary for compression / 

118 decompression. 

119 """ 

120 if read_size == _DEPRECATED_PLACEHOLDER: 

121 read_size = 131075 

122 else: 

123 warnings.warn("pyzstd.ZstdFile()'s read_size parameter is deprecated", DeprecationWarning, stacklevel=2) 

124 if write_size == _DEPRECATED_PLACEHOLDER: 

125 write_size = 131591 

126 else: 

127 warnings.warn("pyzstd.ZstdFile()'s write_size parameter is deprecated", DeprecationWarning, stacklevel=2) 

128 

129 self._fp = None 

130 self._closefp = False 

131 self._mode = _MODE_CLOSED 

132 

133 # Read or write mode 

134 if mode in ("r", "rb"): 

135 if not isinstance(level_or_option, (type(None), dict)): 

136 raise TypeError( 

137 ("In read mode (decompression), level_or_option argument " 

138 "should be a dict object, that represents decompression " 

139 "option. It doesn't support int type compression level " 

140 "in this case.")) 

141 if write_size != 131591: 

142 raise ValueError( 

143 "write_size argument is only valid in write modes.") 

144 mode_code = _MODE_READ 

145 elif mode in ("w", "wb", "a", "ab", "x", "xb"): 

146 if not isinstance(level_or_option, (type(None), int, dict)): 

147 raise TypeError(("level_or_option argument " 

148 "should be int or dict object.")) 

149 if read_size != 131075: 

150 raise ValueError( 

151 "read_size argument is only valid in read mode.") 

152 mode_code = _MODE_WRITE 

153 else: 

154 raise ValueError("Invalid mode: {!r}".format(mode)) 

155 

156 # File object 

157 if isinstance(filename, (str, bytes, PathLike)): 

158 if "b" not in mode: 

159 mode += "b" 

160 self._fp = io.open(filename, mode) 

161 self._closefp = True 

162 elif hasattr(filename, "read") or hasattr(filename, "write"): 

163 self._fp = filename 

164 else: 

165 raise TypeError(("filename must be a str, bytes, " 

166 "file or PathLike object")) 

167 

168 # Set ._mode here for ._closefp in .close(). If the following code 

169 # fails, IOBase's cleanup code will call .close(), so that ._fp can 

170 # be closed. 

171 self._mode = mode_code 

172 

173 # Reader or writer 

174 if mode_code == _MODE_READ: 

175 raw = self._READER_CLASS( 

176 self._fp, 

177 zstd_dict=zstd_dict, 

178 option=level_or_option, 

179 read_size=read_size) 

180 self._buffer = io.BufferedReader(raw, _ZSTD_DStreamOutSize) 

181 elif mode_code == _MODE_WRITE: 

182 self._pos = 0 

183 self._writer = _ZstdFileWriter( 

184 self._fp, 

185 level_or_option=level_or_option, 

186 zstd_dict=zstd_dict, 

187 write_size=write_size) 

188 

189 def close(self): 

190 """Flush and close the file. 

191 

192 May be called more than once without error. Once the file is 

193 closed, any other operation on it will raise a ValueError. 

194 """ 

195 if self._mode == _MODE_CLOSED: 

196 return 

197 

198 try: 

199 # In .__init__ method, if fails after setting ._mode attribute, 

200 # these attributes don't exist. 

201 if hasattr(self, "_buffer"): 

202 try: 

203 self._buffer.close() 

204 finally: 

205 # Set to None for ._check_mode() 

206 self._buffer = None 

207 elif hasattr(self, "_writer"): 

208 try: 

209 self.flush(self.FLUSH_FRAME) 

210 finally: 

211 # Set to None for ._check_mode() 

212 self._writer = None 

213 finally: 

214 try: 

215 if self._closefp: 

216 self._fp.close() 

217 finally: 

218 self._fp = None 

219 self._closefp = False 

220 self._mode = _MODE_CLOSED 

221 

222 # None argument means the file should be closed 

223 def _check_mode(self, expected_mode=None): 

224 # If closed, raise ValueError. 

225 if self._mode == _MODE_CLOSED: 

226 raise ValueError("I/O operation on closed file") 

227 

228 # Check _MODE_READ/_MODE_WRITE mode 

229 if expected_mode == _MODE_READ: 

230 if self._mode != _MODE_READ: 

231 raise io.UnsupportedOperation("File not open for reading") 

232 elif expected_mode == _MODE_WRITE: 

233 if self._mode != _MODE_WRITE: 

234 raise io.UnsupportedOperation("File not open for writing") 

235 

236 # Re-raise other AttributeError exception 

237 raise 

238 

239 # If modify this method, also modify SeekableZstdFile.write() method. 

240 def write(self, data): 

241 """Write a bytes-like object to the file. 

242 

243 Returns the number of uncompressed bytes written, which is 

244 always the length of data in bytes. Note that due to buffering, 

245 the file on disk may not reflect the data written until .flush() 

246 or .close() is called. 

247 """ 

248 # Compress & write 

249 try: 

250 input_size, _ = self._writer.write(data) 

251 except AttributeError: 

252 self._check_mode(_MODE_WRITE) 

253 

254 self._pos += input_size 

255 return input_size 

256 

257 # If modify this method, also modify SeekableZstdFile.flush() method. 

258 def flush(self, mode=FLUSH_BLOCK): 

259 """Flush remaining data to the underlying stream. 

260 

261 The mode argument can be ZstdFile.FLUSH_BLOCK, ZstdFile.FLUSH_FRAME. 

262 Abuse of this method will reduce compression ratio, use it only when 

263 necessary. 

264 

265 If the program is interrupted afterwards, all data can be recovered. 

266 To ensure saving to disk, also need to use os.fsync(fd). 

267 

268 This method does nothing in reading mode. 

269 """ 

270 if self._mode != _MODE_WRITE: 

271 # Like IOBase.flush(), do nothing in reading mode. 

272 # TextIOWrapper.close() relies on this behavior. 

273 if self._mode == _MODE_READ: 

274 return 

275 # Closed, raise ValueError. 

276 self._check_mode() 

277 

278 # Flush zstd block/frame, and write. 

279 self._writer.flush(mode) 

280 

281 def read(self, size=-1): 

282 """Read up to size uncompressed bytes from the file. 

283 

284 If size is negative or omitted, read until EOF is reached. 

285 Returns b"" if the file is already at EOF. 

286 """ 

287 if size is None: 

288 size = -1 

289 try: 

290 return self._buffer.read(size) 

291 except AttributeError: 

292 self._check_mode(_MODE_READ) 

293 

294 def read1(self, size=-1): 

295 """Read up to size uncompressed bytes, while trying to avoid 

296 making multiple reads from the underlying stream. Reads up to a 

297 buffer's worth of data if size is negative. 

298 

299 Returns b"" if the file is at EOF. 

300 """ 

301 if size < 0: 

302 size = _ZSTD_DStreamOutSize 

303 

304 try: 

305 return self._buffer.read1(size) 

306 except AttributeError: 

307 self._check_mode(_MODE_READ) 

308 

309 def readinto(self, b): 

310 """Read bytes into b. 

311 

312 Returns the number of bytes read (0 for EOF). 

313 """ 

314 try: 

315 return self._buffer.readinto(b) 

316 except AttributeError: 

317 self._check_mode(_MODE_READ) 

318 

319 def readinto1(self, b): 

320 """Read bytes into b, while trying to avoid making multiple reads 

321 from the underlying stream. 

322 

323 Returns the number of bytes read (0 for EOF). 

324 """ 

325 try: 

326 return self._buffer.readinto1(b) 

327 except AttributeError: 

328 self._check_mode(_MODE_READ) 

329 

330 def readline(self, size=-1): 

331 """Read a line of uncompressed bytes from the file. 

332 

333 The terminating newline (if present) is retained. If size is 

334 non-negative, no more than size bytes will be read (in which 

335 case the line may be incomplete). Returns b'' if already at EOF. 

336 """ 

337 if size is None: 

338 size = -1 

339 try: 

340 return self._buffer.readline(size) 

341 except AttributeError: 

342 self._check_mode(_MODE_READ) 

343 

344 def seek(self, offset, whence=io.SEEK_SET): 

345 """Change the file position. 

346 

347 The new position is specified by offset, relative to the 

348 position indicated by whence. Possible values for whence are: 

349 

350 0: start of stream (default): offset must not be negative 

351 1: current stream position 

352 2: end of stream; offset must not be positive 

353 

354 Returns the new file position. 

355 

356 Note that seeking is emulated, so depending on the arguments, 

357 this operation may be extremely slow. 

358 """ 

359 try: 

360 # BufferedReader.seek() checks seekable 

361 return self._buffer.seek(offset, whence) 

362 except AttributeError: 

363 self._check_mode(_MODE_READ) 

364 

365 def peek(self, size=-1): 

366 """Return buffered data without advancing the file position. 

367 

368 Always returns at least one byte of data, unless at EOF. 

369 The exact number of bytes returned is unspecified. 

370 """ 

371 # Relies on the undocumented fact that BufferedReader.peek() always 

372 # returns at least one byte (except at EOF) 

373 try: 

374 return self._buffer.peek(size) 

375 except AttributeError: 

376 self._check_mode(_MODE_READ) 

377 

378 def __iter__(self): 

379 try: 

380 self._buffer 

381 except AttributeError: 

382 self._check_mode(_MODE_READ) 

383 return self 

384 

385 def __next__(self): 

386 ret = self._buffer.readline() 

387 if ret: 

388 return ret 

389 raise StopIteration 

390 

391 def tell(self): 

392 """Return the current file position.""" 

393 if self._mode == _MODE_READ: 

394 return self._buffer.tell() 

395 elif self._mode == _MODE_WRITE: 

396 return self._pos 

397 

398 # Closed, raise ValueError. 

399 self._check_mode() 

400 

401 def fileno(self): 

402 """Return the file descriptor for the underlying file.""" 

403 try: 

404 return self._fp.fileno() 

405 except AttributeError: 

406 # Closed, raise ValueError. 

407 self._check_mode() 

408 

409 @property 

410 def name(self): 

411 """Return the file name for the underlying file.""" 

412 try: 

413 return self._fp.name 

414 except AttributeError: 

415 self._check_mode() 

416 

417 @property 

418 def closed(self): 

419 """True if this file is closed.""" 

420 return self._mode == _MODE_CLOSED 

421 

422 def writable(self): 

423 """Return whether the file was opened for writing.""" 

424 if self._mode == _MODE_WRITE: 

425 return True 

426 elif self._mode == _MODE_READ: 

427 return False 

428 

429 # Closed, raise ValueError. 

430 self._check_mode() 

431 

432 def readable(self): 

433 """Return whether the file was opened for reading.""" 

434 if self._mode == _MODE_READ: 

435 return True 

436 elif self._mode == _MODE_WRITE: 

437 return False 

438 

439 # Closed, raise ValueError. 

440 self._check_mode() 

441 

442 def seekable(self): 

443 """Return whether the file supports seeking.""" 

444 if self._mode == _MODE_READ: 

445 return self._buffer.seekable() 

446 elif self._mode == _MODE_WRITE: 

447 return False 

448 

449 # Closed, raise ValueError. 

450 self._check_mode() 

451 

452 

453# Copied from lzma module 

454def open(filename, mode="rb", *, level_or_option=None, zstd_dict=None, 

455 encoding=None, errors=None, newline=None): 

456 """Open a zstd compressed file in binary or text mode. 

457 

458 filename can be either an actual file name (given as a str, bytes, or 

459 PathLike object), in which case the named file is opened, or it can be an 

460 existing file object to read from or write to. 

461 

462 The mode parameter can be "r", "rb" (default), "w", "wb", "x", "xb", "a", 

463 "ab" for binary mode, or "rt", "wt", "xt", "at" for text mode. 

464 

465 The level_or_option and zstd_dict parameters specify the settings, as for 

466 ZstdCompressor, ZstdDecompressor and ZstdFile. 

467 

468 When using read mode (decompression), the level_or_option parameter can 

469 only be a dict object, that represents decompression option. It doesn't 

470 support int type compression level in this case. 

471 

472 For binary mode, this function is equivalent to the ZstdFile constructor: 

473 ZstdFile(filename, mode, ...). In this case, the encoding, errors and 

474 newline parameters must not be provided. 

475 

476 For text mode, an ZstdFile object is created, and wrapped in an 

477 io.TextIOWrapper instance with the specified encoding, error handling 

478 behavior, and line ending(s). 

479 """ 

480 

481 if "t" in mode: 

482 if "b" in mode: 

483 raise ValueError("Invalid mode: %r" % (mode,)) 

484 else: 

485 if encoding is not None: 

486 raise ValueError("Argument 'encoding' not supported in binary mode") 

487 if errors is not None: 

488 raise ValueError("Argument 'errors' not supported in binary mode") 

489 if newline is not None: 

490 raise ValueError("Argument 'newline' not supported in binary mode") 

491 

492 zstd_mode = mode.replace("t", "") 

493 binary_file = ZstdFile(filename, zstd_mode, 

494 level_or_option=level_or_option, zstd_dict=zstd_dict) 

495 

496 if "t" in mode: 

497 return io.TextIOWrapper(binary_file, encoding, errors, newline) 

498 else: 

499 return binary_file