Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyzstd/_zstdfile.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

229 statements  

1import io 

2try: 

3 from os import PathLike 

4except ImportError: 

5 # For Python 3.5 

6 class PathLike: 

7 pass 

8 

9from pyzstd import ZstdCompressor, _ZstdFileReader, \ 

10 _ZstdFileWriter, _ZSTD_DStreamSizes 

11 

12__all__ = ('ZstdFile', 'open') 

13 

14class _ZstdDecompressReader(io.RawIOBase): 

15 """Adapt decompressor to RawIOBase reader API""" 

16 

17 def __init__(self, fp, zstd_dict, option, read_size): 

18 self._fp = fp 

19 self._decomp = _ZstdFileReader(fp, zstd_dict, option, read_size) 

20 

21 def close(self): 

22 self._decomp = None 

23 return super().close() 

24 

25 def readable(self): 

26 return True 

27 

28 # Some file-like objects don't have .seekable(), invoke when necessary. 

29 def seekable(self): 

30 return self._fp.seekable() 

31 

32 def tell(self): 

33 return self._decomp.pos 

34 

35 def readinto(self, b): 

36 return self._decomp.readinto(b) 

37 

38 def readall(self): 

39 return self._decomp.readall() 

40 

41 # If the new position is within io.BufferedReader's buffer, 

42 # this method may not be called. 

43 def seek(self, offset, whence=0): 

44 # offset is absolute file position 

45 if whence == 0: # SEEK_SET 

46 pass 

47 elif whence == 1: # SEEK_CUR 

48 offset = self._decomp.pos + offset 

49 elif whence == 2: # SEEK_END 

50 if self._decomp.size < 0: 

51 # Get file size 

52 self._decomp.forward(None) 

53 offset = self._decomp.size + offset 

54 else: 

55 raise ValueError("Invalid whence value: {}".format(whence)) 

56 

57 # offset is bytes number to skip forward 

58 if offset < self._decomp.pos: 

59 # Rewind 

60 self._decomp.eof = False 

61 self._decomp.pos = 0 

62 self._decomp.reset_session() 

63 self._fp.seek(0) 

64 else: 

65 offset -= self._decomp.pos 

66 # If offset <= 0, .forward() method does nothing. 

67 self._decomp.forward(offset) 

68 

69 return self._decomp.pos 

70 

71_ZSTD_DStreamOutSize = _ZSTD_DStreamSizes[1] 

72 

73_MODE_CLOSED = 0 

74_MODE_READ = 1 

75_MODE_WRITE = 2 

76 

77class ZstdFile(io.BufferedIOBase): 

78 """A file object providing transparent zstd (de)compression. 

79 

80 A ZstdFile can act as a wrapper for an existing file object, or refer 

81 directly to a named file on disk. 

82 

83 Note that ZstdFile provides a *binary* file interface - data read is 

84 returned as bytes, and data to be written should be an object that 

85 supports the Buffer Protocol. 

86 """ 

87 FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK 

88 FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME 

89 

90 _READER_CLASS = _ZstdDecompressReader 

91 

92 def __init__(self, filename, mode="r", *, 

93 level_or_option=None, zstd_dict=None, 

94 read_size=131075, write_size=131591): 

95 """Open a zstd compressed file in binary mode. 

96 

97 filename can be either an actual file name (given as a str, bytes, or 

98 PathLike object), in which case the named file is opened, or it can be 

99 an existing file object to read from or write to. 

100 

101 mode can be "r" for reading (default), "w" for (over)writing, "x" for 

102 creating exclusively, or "a" for appending. These can equivalently be 

103 given as "rb", "wb", "xb" and "ab" respectively. 

104 

105 Parameters 

106 level_or_option: When it's an int object, it represents compression 

107 level. When it's a dict object, it contains advanced compression 

108 parameters. Note, in read mode (decompression), it can only be a 

109 dict object, that represents decompression option. It doesn't 

110 support int type compression level in this case. 

111 zstd_dict: A ZstdDict object, pre-trained dictionary for compression / 

112 decompression. 

113 read_size: In reading mode, this is bytes number that read from the 

114 underlying file object each time, default value is zstd's 

115 recommended value. If use with Network File System, increasing 

116 it may get better performance. 

117 write_size: In writing modes, this is output buffer's size, default 

118 value is zstd's recommended value. If use with Network File 

119 System, increasing it may get better performance. 

120 """ 

121 self._fp = None 

122 self._closefp = False 

123 self._mode = _MODE_CLOSED 

124 

125 # Read or write mode 

126 if mode in ("r", "rb"): 

127 if not isinstance(level_or_option, (type(None), dict)): 

128 raise TypeError( 

129 ("In read mode (decompression), level_or_option argument " 

130 "should be a dict object, that represents decompression " 

131 "option. It doesn't support int type compression level " 

132 "in this case.")) 

133 if write_size != 131591: 

134 raise ValueError( 

135 "write_size argument is only valid in write modes.") 

136 mode_code = _MODE_READ 

137 elif mode in ("w", "wb", "a", "ab", "x", "xb"): 

138 if not isinstance(level_or_option, (type(None), int, dict)): 

139 raise TypeError(("level_or_option argument " 

140 "should be int or dict object.")) 

141 if read_size != 131075: 

142 raise ValueError( 

143 "read_size argument is only valid in read mode.") 

144 mode_code = _MODE_WRITE 

145 else: 

146 raise ValueError("Invalid mode: {!r}".format(mode)) 

147 

148 # File object 

149 if isinstance(filename, (str, bytes, PathLike)): 

150 if "b" not in mode: 

151 mode += "b" 

152 self._fp = io.open(filename, mode) 

153 self._closefp = True 

154 elif hasattr(filename, "read") or hasattr(filename, "write"): 

155 self._fp = filename 

156 else: 

157 raise TypeError(("filename must be a str, bytes, " 

158 "file or PathLike object")) 

159 

160 # Set ._mode here for ._closefp in .close(). If the following code 

161 # fails, IOBase's cleanup code will call .close(), so that ._fp can 

162 # be closed. 

163 self._mode = mode_code 

164 

165 # Reader or writer 

166 if mode_code == _MODE_READ: 

167 raw = self._READER_CLASS( 

168 self._fp, 

169 zstd_dict=zstd_dict, 

170 option=level_or_option, 

171 read_size=read_size) 

172 self._buffer = io.BufferedReader(raw, _ZSTD_DStreamOutSize) 

173 elif mode_code == _MODE_WRITE: 

174 self._pos = 0 

175 self._writer = _ZstdFileWriter( 

176 self._fp, 

177 level_or_option=level_or_option, 

178 zstd_dict=zstd_dict, 

179 write_size=write_size) 

180 

181 def close(self): 

182 """Flush and close the file. 

183 

184 May be called more than once without error. Once the file is 

185 closed, any other operation on it will raise a ValueError. 

186 """ 

187 if self._mode == _MODE_CLOSED: 

188 return 

189 

190 try: 

191 # In .__init__ method, if fails after setting ._mode attribute, 

192 # these attributes don't exist. 

193 if hasattr(self, "_buffer"): 

194 try: 

195 self._buffer.close() 

196 finally: 

197 # Set to None for ._check_mode() 

198 self._buffer = None 

199 elif hasattr(self, "_writer"): 

200 try: 

201 self.flush(self.FLUSH_FRAME) 

202 finally: 

203 # Set to None for ._check_mode() 

204 self._writer = None 

205 finally: 

206 try: 

207 if self._closefp: 

208 self._fp.close() 

209 finally: 

210 self._fp = None 

211 self._closefp = False 

212 self._mode = _MODE_CLOSED 

213 

214 # None argument means the file should be closed 

215 def _check_mode(self, expected_mode=None): 

216 # If closed, raise ValueError. 

217 if self._mode == _MODE_CLOSED: 

218 raise ValueError("I/O operation on closed file") 

219 

220 # Check _MODE_READ/_MODE_WRITE mode 

221 if expected_mode == _MODE_READ: 

222 if self._mode != _MODE_READ: 

223 raise io.UnsupportedOperation("File not open for reading") 

224 elif expected_mode == _MODE_WRITE: 

225 if self._mode != _MODE_WRITE: 

226 raise io.UnsupportedOperation("File not open for writing") 

227 

228 # Re-raise other AttributeError exception 

229 raise 

230 

231 # If modify this method, also modify SeekableZstdFile.write() method. 

232 def write(self, data): 

233 """Write a bytes-like object to the file. 

234 

235 Returns the number of uncompressed bytes written, which is 

236 always the length of data in bytes. Note that due to buffering, 

237 the file on disk may not reflect the data written until .flush() 

238 or .close() is called. 

239 """ 

240 # Compress & write 

241 try: 

242 input_size, _ = self._writer.write(data) 

243 except AttributeError: 

244 self._check_mode(_MODE_WRITE) 

245 

246 self._pos += input_size 

247 return input_size 

248 

249 # If modify this method, also modify SeekableZstdFile.flush() method. 

250 def flush(self, mode=FLUSH_BLOCK): 

251 """Flush remaining data to the underlying stream. 

252 

253 The mode argument can be ZstdFile.FLUSH_BLOCK, ZstdFile.FLUSH_FRAME. 

254 Abuse of this method will reduce compression ratio, use it only when 

255 necessary. 

256 

257 If the program is interrupted afterwards, all data can be recovered. 

258 To ensure saving to disk, also need to use os.fsync(fd). 

259 

260 This method does nothing in reading mode. 

261 """ 

262 if self._mode != _MODE_WRITE: 

263 # Like IOBase.flush(), do nothing in reading mode. 

264 # TextIOWrapper.close() relies on this behavior. 

265 if self._mode == _MODE_READ: 

266 return 

267 # Closed, raise ValueError. 

268 self._check_mode() 

269 

270 # Flush zstd block/frame, and write. 

271 self._writer.flush(mode) 

272 

273 def read(self, size=-1): 

274 """Read up to size uncompressed bytes from the file. 

275 

276 If size is negative or omitted, read until EOF is reached. 

277 Returns b"" if the file is already at EOF. 

278 """ 

279 if size is None: 

280 size = -1 

281 try: 

282 return self._buffer.read(size) 

283 except AttributeError: 

284 self._check_mode(_MODE_READ) 

285 

286 def read1(self, size=-1): 

287 """Read up to size uncompressed bytes, while trying to avoid 

288 making multiple reads from the underlying stream. Reads up to a 

289 buffer's worth of data if size is negative. 

290 

291 Returns b"" if the file is at EOF. 

292 """ 

293 if size < 0: 

294 size = _ZSTD_DStreamOutSize 

295 

296 try: 

297 return self._buffer.read1(size) 

298 except AttributeError: 

299 self._check_mode(_MODE_READ) 

300 

301 def readinto(self, b): 

302 """Read bytes into b. 

303 

304 Returns the number of bytes read (0 for EOF). 

305 """ 

306 try: 

307 return self._buffer.readinto(b) 

308 except AttributeError: 

309 self._check_mode(_MODE_READ) 

310 

311 def readinto1(self, b): 

312 """Read bytes into b, while trying to avoid making multiple reads 

313 from the underlying stream. 

314 

315 Returns the number of bytes read (0 for EOF). 

316 """ 

317 try: 

318 return self._buffer.readinto1(b) 

319 except AttributeError: 

320 self._check_mode(_MODE_READ) 

321 

322 def readline(self, size=-1): 

323 """Read a line of uncompressed bytes from the file. 

324 

325 The terminating newline (if present) is retained. If size is 

326 non-negative, no more than size bytes will be read (in which 

327 case the line may be incomplete). Returns b'' if already at EOF. 

328 """ 

329 if size is None: 

330 size = -1 

331 try: 

332 return self._buffer.readline(size) 

333 except AttributeError: 

334 self._check_mode(_MODE_READ) 

335 

336 def seek(self, offset, whence=io.SEEK_SET): 

337 """Change the file position. 

338 

339 The new position is specified by offset, relative to the 

340 position indicated by whence. Possible values for whence are: 

341 

342 0: start of stream (default): offset must not be negative 

343 1: current stream position 

344 2: end of stream; offset must not be positive 

345 

346 Returns the new file position. 

347 

348 Note that seeking is emulated, so depending on the arguments, 

349 this operation may be extremely slow. 

350 """ 

351 try: 

352 # BufferedReader.seek() checks seekable 

353 return self._buffer.seek(offset, whence) 

354 except AttributeError: 

355 self._check_mode(_MODE_READ) 

356 

357 def peek(self, size=-1): 

358 """Return buffered data without advancing the file position. 

359 

360 Always returns at least one byte of data, unless at EOF. 

361 The exact number of bytes returned is unspecified. 

362 """ 

363 # Relies on the undocumented fact that BufferedReader.peek() always 

364 # returns at least one byte (except at EOF) 

365 try: 

366 return self._buffer.peek(size) 

367 except AttributeError: 

368 self._check_mode(_MODE_READ) 

369 

370 def __iter__(self): 

371 try: 

372 self._buffer 

373 except AttributeError: 

374 self._check_mode(_MODE_READ) 

375 return self 

376 

377 def __next__(self): 

378 ret = self._buffer.readline() 

379 if ret: 

380 return ret 

381 raise StopIteration 

382 

383 def tell(self): 

384 """Return the current file position.""" 

385 if self._mode == _MODE_READ: 

386 return self._buffer.tell() 

387 elif self._mode == _MODE_WRITE: 

388 return self._pos 

389 

390 # Closed, raise ValueError. 

391 self._check_mode() 

392 

393 def fileno(self): 

394 """Return the file descriptor for the underlying file.""" 

395 try: 

396 return self._fp.fileno() 

397 except AttributeError: 

398 # Closed, raise ValueError. 

399 self._check_mode() 

400 

401 @property 

402 def name(self): 

403 """Return the file name for the underlying file.""" 

404 try: 

405 return self._fp.name 

406 except AttributeError: 

407 self._check_mode() 

408 

409 @property 

410 def closed(self): 

411 """True if this file is closed.""" 

412 return self._mode == _MODE_CLOSED 

413 

414 def writable(self): 

415 """Return whether the file was opened for writing.""" 

416 if self._mode == _MODE_WRITE: 

417 return True 

418 elif self._mode == _MODE_READ: 

419 return False 

420 

421 # Closed, raise ValueError. 

422 self._check_mode() 

423 

424 def readable(self): 

425 """Return whether the file was opened for reading.""" 

426 if self._mode == _MODE_READ: 

427 return True 

428 elif self._mode == _MODE_WRITE: 

429 return False 

430 

431 # Closed, raise ValueError. 

432 self._check_mode() 

433 

434 def seekable(self): 

435 """Return whether the file supports seeking.""" 

436 if self._mode == _MODE_READ: 

437 return self._buffer.seekable() 

438 elif self._mode == _MODE_WRITE: 

439 return False 

440 

441 # Closed, raise ValueError. 

442 self._check_mode() 

443 

444 

445# Copied from lzma module 

446def open(filename, mode="rb", *, level_or_option=None, zstd_dict=None, 

447 encoding=None, errors=None, newline=None): 

448 """Open a zstd compressed file in binary or text mode. 

449 

450 filename can be either an actual file name (given as a str, bytes, or 

451 PathLike object), in which case the named file is opened, or it can be an 

452 existing file object to read from or write to. 

453 

454 The mode parameter can be "r", "rb" (default), "w", "wb", "x", "xb", "a", 

455 "ab" for binary mode, or "rt", "wt", "xt", "at" for text mode. 

456 

457 The level_or_option and zstd_dict parameters specify the settings, as for 

458 ZstdCompressor, ZstdDecompressor and ZstdFile. 

459 

460 When using read mode (decompression), the level_or_option parameter can 

461 only be a dict object, that represents decompression option. It doesn't 

462 support int type compression level in this case. 

463 

464 For binary mode, this function is equivalent to the ZstdFile constructor: 

465 ZstdFile(filename, mode, ...). In this case, the encoding, errors and 

466 newline parameters must not be provided. 

467 

468 For text mode, an ZstdFile object is created, and wrapped in an 

469 io.TextIOWrapper instance with the specified encoding, error handling 

470 behavior, and line ending(s). 

471 """ 

472 

473 if "t" in mode: 

474 if "b" in mode: 

475 raise ValueError("Invalid mode: %r" % (mode,)) 

476 else: 

477 if encoding is not None: 

478 raise ValueError("Argument 'encoding' not supported in binary mode") 

479 if errors is not None: 

480 raise ValueError("Argument 'errors' not supported in binary mode") 

481 if newline is not None: 

482 raise ValueError("Argument 'newline' not supported in binary mode") 

483 

484 zstd_mode = mode.replace("t", "") 

485 binary_file = ZstdFile(filename, zstd_mode, 

486 level_or_option=level_or_option, zstd_dict=zstd_dict) 

487 

488 if "t" in mode: 

489 return io.TextIOWrapper(binary_file, encoding, errors, newline) 

490 else: 

491 return binary_file