Coverage Report

Created: 2026-02-14 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/port/cpl_vsil_gzip.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  Implement VSI large file api for gz/zip files (.gz and .zip).
5
 * Author:   Even Rouault, even.rouault at spatialys.com
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2008-2014, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
//! @cond Doxygen_Suppress
14
15
/* gzio.c -- IO on .gz files
16
  Copyright (C) 1995-2005 Jean-loup Gailly.
17
18
  This software is provided 'as-is', without any express or implied
19
  warranty.  In no event will the authors be held liable for any damages
20
  arising from the use of this software.
21
22
  Permission is granted to anyone to use this software for any purpose,
23
  including commercial applications, and to alter it and redistribute it
24
  freely, subject to the following restrictions:
25
26
  1. The origin of this software must not be misrepresented; you must not
27
     claim that you wrote the original software. If you use this software
28
     in a product, an acknowledgment in the product documentation would be
29
     appreciated but is not required.
30
  2. Altered source versions must be plainly marked as such, and must not be
31
     misrepresented as being the original software.
32
  3. This notice may not be removed or altered from any source distribution.
33
34
  Jean-loup Gailly        Mark Adler
35
  jloup@gzip.org          madler@alumni.caltech.edu
36
37
  The data format used by the zlib library is described by RFCs (Request for
38
  Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
39
  (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
40
*/
41
42
/* This file contains a refactoring of gzio.c from zlib project.
43
44
   It replaces classical calls operating on FILE* by calls to the VSI large file
45
   API. It also adds the capability to seek at the end of the file, which is not
46
   implemented in original gzSeek. It also implements a concept of in-memory
47
   "snapshots", that are a way of improving efficiency while seeking GZip
48
   files. Snapshots are created regularly when decompressing the data a snapshot
49
   of the gzip state.  Later we can seek directly in the compressed data to the
50
   closest snapshot in order to reduce the amount of data to uncompress again.
51
52
   For .gz files, an effort is done to cache the size of the uncompressed data
53
   in a .gz.properties file, so that we don't need to seek at the end of the
54
   file each time a Stat() is done.
55
56
   For .zip and .gz, both reading and writing are supported, but just one mode
57
   at a time (read-only or write-only).
58
*/
59
60
#include "cpl_port.h"
61
#include "cpl_conv.h"
62
#include "cpl_vsi.h"
63
64
#include <cerrno>
65
#include <cinttypes>
66
#include <climits>
67
#include <cstddef>
68
#include <cstdio>
69
#include <cstdlib>
70
#include <cstring>
71
#include <ctime>
72
73
#include <fcntl.h>
74
75
#include "cpl_zlib_header.h"  // to avoid warnings when including zlib.h
76
77
#ifdef HAVE_LIBDEFLATE
78
#include "libdeflate.h"
79
#endif
80
81
#include <algorithm>
82
#include <iterator>
83
#include <limits>
84
#include <list>
85
#include <map>
86
#include <memory>
87
#include <mutex>
88
#include <string>
89
#include <utility>
90
#include <vector>
91
92
#include "cpl_error.h"
93
#include "cpl_minizip_ioapi.h"
94
#include "cpl_minizip_unzip.h"
95
#include "cpl_multiproc.h"
96
#include "cpl_string.h"
97
#include "cpl_time.h"
98
#include "cpl_vsi_virtual.h"
99
#include "cpl_worker_thread_pool.h"
100
#include "../gcore/gdal_thread_pool.h"
101
102
constexpr int Z_BUFSIZE = 65536;           // Original size is 16384
103
constexpr int gz_magic[2] = {0x1f, 0x8b};  // gzip magic header
104
105
// gzip flag byte.
106
#define ASCII_FLAG 0x01   // bit 0 set: file probably ascii text
107
0
#define HEAD_CRC 0x02     // bit 1 set: header CRC present
108
0
#define EXTRA_FIELD 0x04  // bit 2 set: extra field present
109
0
#define ORIG_NAME 0x08    // bit 3 set: original file name present
110
0
#define COMMENT 0x10      // bit 4 set: file comment present
111
0
#define RESERVED 0xE0     // bits 5..7: reserved
112
113
0
#define ALLOC(size) malloc(size)
114
#define TRYFREE(p)                                                             \
115
0
    {                                                                          \
116
0
        if (p)                                                                 \
117
0
            free(p);                                                           \
118
0
    }
119
120
#define CPL_VSIL_GZ_RETURN(ret)                                                \
121
0
    CPLError(CE_Failure, CPLE_AppDefined, "In file %s, at line %d, return %d", \
122
0
             __FILE__, __LINE__, ret)
123
124
// To avoid aliasing to CopyFile to CopyFileA on Windows
125
#ifdef CopyFile
126
#undef CopyFile
127
#endif
128
129
// #define ENABLE_DEBUG 1
130
131
/************************************************************************/
132
/* ==================================================================== */
133
/*                       VSIGZipHandle                                  */
134
/* ==================================================================== */
135
/************************************************************************/
136
137
typedef struct
138
{
139
    vsi_l_offset posInBaseHandle;
140
    z_stream stream;
141
    uLong crc;
142
    int transparent;
143
    vsi_l_offset in;
144
    vsi_l_offset out;
145
} GZipSnapshot;
146
147
class VSIGZipHandle final : public VSIVirtualHandle
148
{
149
    VSIVirtualHandleUniquePtr m_poBaseHandle{};
150
#ifdef DEBUG
151
    vsi_l_offset m_offset = 0;
152
#endif
153
    vsi_l_offset m_compressed_size = 0;
154
    vsi_l_offset m_uncompressed_size = 0;
155
    vsi_l_offset offsetEndCompressedData = 0;
156
    uLong m_expected_crc = 0;
157
    char *m_pszBaseFileName = nullptr; /* optional */
158
    bool m_bWriteProperties = false;
159
    bool m_bCanSaveInfo = false;
160
161
    /* Fields from gz_stream structure */
162
    z_stream stream;
163
    int z_err = Z_OK;    /* error code for last stream operation */
164
    int z_eof = 0;       /* set if end of input file (but not necessarily of the
165
                         uncompressed stream !) */
166
    bool m_bEOF = false; /* EOF flag for uncompressed stream */
167
    Byte *inbuf = nullptr;  /* input buffer */
168
    Byte *outbuf = nullptr; /* output buffer */
169
    uLong crc = 0;          /* crc32 of uncompressed data */
170
    int m_transparent = 0;  /* 1 if input file is not a .gz file */
171
    vsi_l_offset startOff =
172
        0; /* startOff of compressed data in file (header skipped) */
173
    vsi_l_offset in = 0;  /* bytes into deflate or inflate */
174
    vsi_l_offset out = 0; /* bytes out of deflate or inflate */
175
    vsi_l_offset m_nLastReadOffset = 0;
176
177
    GZipSnapshot *snapshots = nullptr;
178
    vsi_l_offset snapshot_byte_interval =
179
        0; /* number of compressed bytes at which we create a "snapshot" */
180
181
    void check_header();
182
    int get_byte();
183
    bool gzseek(vsi_l_offset nOffset, int nWhence);
184
    int gzrewind();
185
    uLong getLong();
186
187
    CPL_DISALLOW_COPY_ASSIGN(VSIGZipHandle)
188
189
  public:
190
    VSIGZipHandle(VSIVirtualHandleUniquePtr poBaseHandleIn,
191
                  const char *pszBaseFileName, vsi_l_offset offset = 0,
192
                  vsi_l_offset compressed_size = 0,
193
                  vsi_l_offset uncompressed_size = 0, uLong expected_crc = 0,
194
                  int transparent = 0);
195
    ~VSIGZipHandle() override;
196
197
    bool IsInitOK() const
198
0
    {
199
0
        return inbuf != nullptr;
200
0
    }
201
202
    int Seek(vsi_l_offset nOffset, int nWhence) override;
203
    vsi_l_offset Tell() override;
204
    size_t Read(void *pBuffer, size_t nBytes) override;
205
    size_t Write(const void *pBuffer, size_t nBytes) override;
206
    void ClearErr() override;
207
    int Eof() override;
208
    int Error() override;
209
    int Flush() override;
210
    int Close() override;
211
212
    VSIGZipHandle *Duplicate();
213
    bool CloseBaseHandle();
214
215
    vsi_l_offset GetLastReadOffset()
216
0
    {
217
0
        return m_nLastReadOffset;
218
0
    }
219
220
    const char *GetBaseFileName()
221
0
    {
222
0
        return m_pszBaseFileName;
223
0
    }
224
225
    void SetUncompressedSize(vsi_l_offset nUncompressedSize)
226
0
    {
227
0
        m_uncompressed_size = nUncompressedSize;
228
0
    }
229
230
    vsi_l_offset GetUncompressedSize()
231
0
    {
232
0
        return m_uncompressed_size;
233
0
    }
234
235
    void SaveInfo_unlocked();
236
237
    void UnsetCanSaveInfo()
238
0
    {
239
0
        m_bCanSaveInfo = false;
240
0
    }
241
};
242
243
#ifdef ENABLE_DEFLATE64
244
245
/************************************************************************/
246
/* ==================================================================== */
247
/*                           VSIDeflate64Handle                         */
248
/* ==================================================================== */
249
/************************************************************************/
250
251
struct VSIDeflate64Snapshot
252
{
253
    vsi_l_offset posInBaseHandle = 0;
254
    z_stream stream{};
255
    uLong crc = 0;
256
    vsi_l_offset in = 0;
257
    vsi_l_offset out = 0;
258
    std::vector<GByte> extraOutput{};
259
    bool m_bStreamEndReached = false;
260
};
261
262
class VSIDeflate64Handle final : public VSIVirtualHandle
263
{
264
    VSIVirtualHandleUniquePtr m_poBaseHandle{};
265
#ifdef DEBUG
266
    vsi_l_offset m_offset = 0;
267
#endif
268
    vsi_l_offset m_compressed_size = 0;
269
    vsi_l_offset m_uncompressed_size = 0;
270
    vsi_l_offset offsetEndCompressedData = 0;
271
    uLong m_expected_crc = 0;
272
    char *m_pszBaseFileName = nullptr; /* optional */
273
274
    /* Fields from gz_stream structure */
275
    z_stream stream;
276
    int z_err = Z_OK;    /* error code for last stream operation */
277
    int z_eof = 0;       /* set if end of input file (but not necessarily of the
278
                         uncompressed stream ! ) */
279
    bool m_bEOF = false; /* EOF flag for uncompressed stream */
280
    Byte *inbuf = nullptr;  /* input buffer */
281
    Byte *outbuf = nullptr; /* output buffer */
282
    std::vector<GByte> extraOutput{};
283
    bool m_bStreamEndReached = false;
284
    uLong crc = 0; /* crc32 of uncompressed data */
285
    vsi_l_offset startOff =
286
        0; /* startOff of compressed data in file (header skipped) */
287
    vsi_l_offset in = 0;  /* bytes into deflate or inflate */
288
    vsi_l_offset out = 0; /* bytes out of deflate or inflate */
289
290
    std::vector<VSIDeflate64Snapshot> snapshots{};
291
    vsi_l_offset snapshot_byte_interval =
292
        0; /* number of compressed bytes at which we create a "snapshot" */
293
294
    bool gzseek(vsi_l_offset nOffset, int nWhence);
295
    int gzrewind();
296
297
    CPL_DISALLOW_COPY_ASSIGN(VSIDeflate64Handle)
298
299
  public:
300
    VSIDeflate64Handle(VSIVirtualHandleUniquePtr poBaseHandleIn,
301
                       const char *pszBaseFileName, vsi_l_offset offset = 0,
302
                       vsi_l_offset compressed_size = 0,
303
                       vsi_l_offset uncompressed_size = 0,
304
                       uLong expected_crc = 0);
305
    ~VSIDeflate64Handle() override;
306
307
    bool IsInitOK() const
308
0
    {
309
0
        return inbuf != nullptr;
310
0
    }
311
312
    int Seek(vsi_l_offset nOffset, int nWhence) override;
313
    vsi_l_offset Tell() override;
314
    size_t Read(void *pBuffer, size_t nBytes) override;
315
    size_t Write(const void *pBuffer, size_t nBytes) override;
316
    void ClearErr() override;
317
    int Eof() override;
318
    int Error() override;
319
    int Flush() override;
320
    int Close() override;
321
322
    VSIDeflate64Handle *Duplicate();
323
    bool CloseBaseHandle();
324
325
    const char *GetBaseFileName()
326
0
    {
327
0
        return m_pszBaseFileName;
328
0
    }
329
330
    void SetUncompressedSize(vsi_l_offset nUncompressedSize)
331
0
    {
332
0
        m_uncompressed_size = nUncompressedSize;
333
0
    }
334
335
    vsi_l_offset GetUncompressedSize()
336
0
    {
337
0
        return m_uncompressed_size;
338
0
    }
339
};
340
#endif
341
342
class VSIGZipFilesystemHandler final : public VSIFilesystemHandler
343
{
344
    CPL_DISALLOW_COPY_ASSIGN(VSIGZipFilesystemHandler)
345
346
    std::recursive_mutex oMutex{};
347
    std::unique_ptr<VSIGZipHandle> poHandleLastGZipFile{};
348
    bool m_bInSaveInfo = false;
349
350
  public:
351
3
    VSIGZipFilesystemHandler() = default;
352
    ~VSIGZipFilesystemHandler() override;
353
354
    VSIVirtualHandleUniquePtr Open(const char *pszFilename,
355
                                   const char *pszAccess, bool bSetError,
356
                                   CSLConstList /* papszOptions */) override;
357
    VSIGZipHandle *OpenGZipReadOnly(const char *pszFilename,
358
                                    const char *pszAccess);
359
    int Stat(const char *pszFilename, VSIStatBufL *pStatBuf,
360
             int nFlags) override;
361
    char **ReadDirEx(const char *pszDirname, int nMaxFiles) override;
362
363
    const char *GetOptions() override;
364
365
    virtual bool SupportsSequentialWrite(const char *pszPath,
366
                                         bool bAllowLocalTempFile) override;
367
368
    virtual bool SupportsRandomWrite(const char * /* pszPath */,
369
                                     bool /* bAllowLocalTempFile */) override
370
0
    {
371
0
        return false;
372
0
    }
373
374
    void SaveInfo(VSIGZipHandle *poHandle);
375
    void SaveInfo_unlocked(VSIGZipHandle *poHandle);
376
};
377
378
/************************************************************************/
379
/*                             Duplicate()                              */
380
/************************************************************************/
381
382
VSIGZipHandle *VSIGZipHandle::Duplicate()
383
0
{
384
0
    CPLAssert(m_offset == 0);
385
0
    CPLAssert(m_compressed_size != 0);
386
0
    CPLAssert(m_pszBaseFileName != nullptr);
387
388
0
    VSIFilesystemHandler *poFSHandler =
389
0
        VSIFileManager::GetHandler(m_pszBaseFileName);
390
391
0
    auto poNewBaseHandle = poFSHandler->Open(m_pszBaseFileName, "rb");
392
393
0
    if (poNewBaseHandle == nullptr)
394
0
        return nullptr;
395
396
0
    auto poHandle = std::make_unique<VSIGZipHandle>(
397
0
        std::move(poNewBaseHandle), m_pszBaseFileName, 0, m_compressed_size,
398
0
        m_uncompressed_size);
399
0
    if (!(poHandle->IsInitOK()))
400
0
    {
401
0
        return nullptr;
402
0
    }
403
404
0
    poHandle->m_nLastReadOffset = m_nLastReadOffset;
405
406
    // Most important: duplicate the snapshots!
407
408
0
    for (unsigned int i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
409
0
         i++)
410
0
    {
411
0
        if (snapshots[i].posInBaseHandle == 0)
412
0
            break;
413
414
0
        poHandle->snapshots[i].posInBaseHandle = snapshots[i].posInBaseHandle;
415
0
        inflateCopy(&poHandle->snapshots[i].stream, &snapshots[i].stream);
416
0
        poHandle->snapshots[i].crc = snapshots[i].crc;
417
0
        poHandle->snapshots[i].transparent = snapshots[i].transparent;
418
0
        poHandle->snapshots[i].in = snapshots[i].in;
419
0
        poHandle->snapshots[i].out = snapshots[i].out;
420
0
    }
421
422
0
    return poHandle.release();
423
0
}
424
425
/************************************************************************/
426
/*                          CloseBaseHandle()                           */
427
/************************************************************************/
428
429
bool VSIGZipHandle::CloseBaseHandle()
430
0
{
431
0
    bool bRet = true;
432
0
    if (m_poBaseHandle)
433
0
    {
434
0
        bRet = m_poBaseHandle->Close() == 0;
435
0
        m_poBaseHandle.reset();
436
0
    }
437
0
    return bRet;
438
0
}
439
440
/************************************************************************/
441
/*                           VSIGZipHandle()                            */
442
/************************************************************************/
443
444
VSIGZipHandle::VSIGZipHandle(VSIVirtualHandleUniquePtr poBaseHandleIn,
445
                             const char *pszBaseFileName, vsi_l_offset offset,
446
                             vsi_l_offset compressed_size,
447
                             vsi_l_offset uncompressed_size, uLong expected_crc,
448
                             int transparent)
449
0
    : m_poBaseHandle(std::move(poBaseHandleIn)),
450
#ifdef DEBUG
451
0
      m_offset(offset),
452
#endif
453
0
      m_uncompressed_size(uncompressed_size), m_expected_crc(expected_crc),
454
0
      m_pszBaseFileName(pszBaseFileName ? CPLStrdup(pszBaseFileName) : nullptr),
455
0
      m_bWriteProperties(CPLTestBool(
456
0
          CPLGetConfigOption("CPL_VSIL_GZIP_WRITE_PROPERTIES", "YES"))),
457
      m_bCanSaveInfo(
458
0
          CPLTestBool(CPLGetConfigOption("CPL_VSIL_GZIP_SAVE_INFO", "YES"))),
459
0
      stream(), crc(0), m_transparent(transparent)
460
0
{
461
0
    if (compressed_size || transparent)
462
0
    {
463
0
        m_compressed_size = compressed_size;
464
0
    }
465
0
    else
466
0
    {
467
0
        if (m_poBaseHandle->Seek(0, SEEK_END) != 0)
468
0
        {
469
0
            CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
470
0
            return;
471
0
        }
472
0
        const auto nFileSize = m_poBaseHandle->Tell();
473
0
        if (nFileSize < offset)
474
0
        {
475
0
            CPLError(CE_Failure, CPLE_FileIO, "/vsizip/: invalid file offset");
476
0
            return;
477
0
        }
478
0
        m_compressed_size = nFileSize - offset;
479
0
        compressed_size = m_compressed_size;
480
0
    }
481
0
    offsetEndCompressedData = offset + compressed_size;
482
483
0
    if (m_poBaseHandle->Seek(offset, SEEK_SET) != 0)
484
0
        CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
485
486
0
    stream.zalloc = nullptr;
487
0
    stream.zfree = nullptr;
488
0
    stream.opaque = nullptr;
489
0
    stream.next_in = inbuf = nullptr;
490
0
    stream.next_out = outbuf = nullptr;
491
0
    stream.avail_in = stream.avail_out = 0;
492
493
0
    inbuf = static_cast<Byte *>(ALLOC(Z_BUFSIZE));
494
0
    stream.next_in = inbuf;
495
496
0
    int err = inflateInit2(&(stream), -MAX_WBITS);
497
    // windowBits is passed < 0 to tell that there is no zlib header.
498
    // Note that in this case inflate *requires* an extra "dummy" byte
499
    // after the compressed stream in order to complete decompression and
500
    // return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are
501
    // present after the compressed stream.
502
0
    if (err != Z_OK || inbuf == nullptr)
503
0
    {
504
0
        CPLError(CE_Failure, CPLE_NotSupported, "inflateInit2 init failed");
505
0
        TRYFREE(inbuf);
506
0
        inbuf = nullptr;
507
0
        return;
508
0
    }
509
0
    stream.avail_out = static_cast<uInt>(Z_BUFSIZE);
510
511
0
    if (offset == 0)
512
0
        check_header();  // Skip the .gz header.
513
0
    startOff = m_poBaseHandle->Tell() - stream.avail_in;
514
515
0
    if (transparent == 0)
516
0
    {
517
0
        snapshot_byte_interval = std::max(static_cast<vsi_l_offset>(Z_BUFSIZE),
518
0
                                          compressed_size / 100);
519
0
        snapshots = static_cast<GZipSnapshot *>(CPLCalloc(
520
0
            sizeof(GZipSnapshot),
521
0
            static_cast<size_t>(compressed_size / snapshot_byte_interval + 1)));
522
0
    }
523
0
}
524
525
/************************************************************************/
526
/*                         SaveInfo_unlocked()                          */
527
/************************************************************************/
528
529
void VSIGZipHandle::SaveInfo_unlocked()
530
0
{
531
0
    if (m_pszBaseFileName && m_bCanSaveInfo)
532
0
    {
533
0
        VSIFilesystemHandler *poFSHandler =
534
0
            VSIFileManager::GetHandler("/vsigzip/");
535
0
        cpl::down_cast<VSIGZipFilesystemHandler *>(poFSHandler)
536
0
            ->SaveInfo_unlocked(this);
537
0
        m_bCanSaveInfo = false;
538
0
    }
539
0
}
540
541
/************************************************************************/
542
/*                           ~VSIGZipHandle()                           */
543
/************************************************************************/
544
545
VSIGZipHandle::~VSIGZipHandle()
546
0
{
547
0
    if (m_pszBaseFileName && m_bCanSaveInfo)
548
0
    {
549
0
        VSIFilesystemHandler *poFSHandler =
550
0
            VSIFileManager::GetHandler("/vsigzip/");
551
0
        cpl::down_cast<VSIGZipFilesystemHandler *>(poFSHandler)->SaveInfo(this);
552
0
    }
553
554
0
    if (stream.state != nullptr)
555
0
    {
556
0
        inflateEnd(&(stream));
557
0
    }
558
559
0
    TRYFREE(inbuf);
560
0
    TRYFREE(outbuf);
561
562
0
    if (snapshots != nullptr)
563
0
    {
564
0
        for (size_t i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
565
0
             i++)
566
0
        {
567
0
            if (snapshots[i].posInBaseHandle)
568
0
            {
569
0
                inflateEnd(&(snapshots[i].stream));
570
0
            }
571
0
        }
572
0
        CPLFree(snapshots);
573
0
    }
574
0
    CPLFree(m_pszBaseFileName);
575
576
0
    CloseBaseHandle();
577
0
}
578
579
/************************************************************************/
580
/*                            check_header()                            */
581
/************************************************************************/
582
583
void VSIGZipHandle::check_header()
584
0
{
585
    // Assure two bytes in the buffer so we can peek ahead -- handle case
586
    // where first byte of header is at the end of the buffer after the last
587
    // gzip segment.
588
0
    uInt len = stream.avail_in;
589
0
    if (len < 2)
590
0
    {
591
0
        if (len)
592
0
            inbuf[0] = stream.next_in[0];
593
0
        errno = 0;
594
0
        size_t nToRead = static_cast<size_t>(Z_BUFSIZE - len);
595
0
        CPLAssert(m_poBaseHandle->Tell() <= offsetEndCompressedData);
596
0
        if (m_poBaseHandle->Tell() + nToRead > offsetEndCompressedData)
597
0
            nToRead = static_cast<size_t>(offsetEndCompressedData -
598
0
                                          m_poBaseHandle->Tell());
599
600
0
        len = static_cast<uInt>(m_poBaseHandle->Read(inbuf + len, nToRead));
601
#ifdef ENABLE_DEBUG
602
        CPLDebug("GZIP", CPL_FRMT_GUIB " " CPL_FRMT_GUIB,
603
                 m_poBaseHandle->Tell(), offsetEndCompressedData);
604
#endif
605
0
        if (len == 0)  // && ferror(file)
606
0
        {
607
0
            if (m_poBaseHandle->Tell() != offsetEndCompressedData)
608
0
                z_err = Z_ERRNO;
609
0
        }
610
0
        stream.avail_in += len;
611
0
        stream.next_in = inbuf;
612
0
        if (stream.avail_in < 2)
613
0
        {
614
0
            m_transparent = stream.avail_in;
615
0
            return;
616
0
        }
617
0
    }
618
619
    // Peek ahead to check the gzip magic header.
620
0
    if (stream.next_in[0] != gz_magic[0] || stream.next_in[1] != gz_magic[1])
621
0
    {
622
0
        m_transparent = 1;
623
0
        return;
624
0
    }
625
0
    stream.avail_in -= 2;
626
0
    stream.next_in += 2;
627
628
    // Check the rest of the gzip header.
629
0
    const int method = get_byte();
630
0
    const int flags = get_byte();
631
0
    if (method != Z_DEFLATED || (flags & RESERVED) != 0)
632
0
    {
633
0
        z_err = Z_DATA_ERROR;
634
0
        return;
635
0
    }
636
637
    // Discard time, xflags and OS code:
638
0
    for (len = 0; len < 6; len++)
639
0
        CPL_IGNORE_RET_VAL(get_byte());
640
641
0
    if ((flags & EXTRA_FIELD) != 0)
642
0
    {
643
        // Skip the extra field.
644
0
        len = static_cast<uInt>(get_byte()) & 0xFF;
645
0
        len += (static_cast<uInt>(get_byte()) & 0xFF) << 8;
646
        // len is garbage if EOF but the loop below will quit anyway.
647
0
        while (len != 0 && get_byte() != EOF)
648
0
        {
649
0
            --len;
650
0
        }
651
0
    }
652
653
0
    if ((flags & ORIG_NAME) != 0)
654
0
    {
655
        // Skip the original file name.
656
0
        int c;
657
0
        while ((c = get_byte()) != 0 && c != EOF)
658
0
        {
659
0
        }
660
0
    }
661
0
    if ((flags & COMMENT) != 0)
662
0
    {
663
        // skip the .gz file comment.
664
0
        int c;
665
0
        while ((c = get_byte()) != 0 && c != EOF)
666
0
        {
667
0
        }
668
0
    }
669
0
    if ((flags & HEAD_CRC) != 0)
670
0
    {
671
        // Skip the header crc.
672
0
        for (len = 0; len < 2; len++)
673
0
            CPL_IGNORE_RET_VAL(get_byte());
674
0
    }
675
0
    z_err = z_eof ? Z_DATA_ERROR : Z_OK;
676
0
}
677
678
/************************************************************************/
679
/*                              get_byte()                              */
680
/************************************************************************/
681
682
int VSIGZipHandle::get_byte()
683
0
{
684
0
    if (z_eof)
685
0
        return EOF;
686
0
    if (stream.avail_in == 0)
687
0
    {
688
0
        errno = 0;
689
0
        size_t nToRead = static_cast<size_t>(Z_BUFSIZE);
690
0
        CPLAssert(m_poBaseHandle->Tell() <= offsetEndCompressedData);
691
0
        if (m_poBaseHandle->Tell() + nToRead > offsetEndCompressedData)
692
0
            nToRead = static_cast<size_t>(offsetEndCompressedData -
693
0
                                          m_poBaseHandle->Tell());
694
0
        stream.avail_in =
695
0
            static_cast<uInt>(m_poBaseHandle->Read(inbuf, nToRead));
696
#ifdef ENABLE_DEBUG
697
        CPLDebug("GZIP", CPL_FRMT_GUIB " " CPL_FRMT_GUIB,
698
                 m_poBaseHandle->Tell(), offsetEndCompressedData);
699
#endif
700
0
        if (stream.avail_in == 0)
701
0
        {
702
0
            z_eof = 1;
703
0
            if (m_poBaseHandle->Tell() != offsetEndCompressedData)
704
0
                z_err = Z_ERRNO;
705
            // if( ferror(file) ) z_err = Z_ERRNO;
706
0
            return EOF;
707
0
        }
708
0
        stream.next_in = inbuf;
709
0
    }
710
0
    stream.avail_in--;
711
0
    return *(stream.next_in)++;
712
0
}
713
714
/************************************************************************/
715
/*                              gzrewind()                              */
716
/************************************************************************/
717
718
int VSIGZipHandle::gzrewind()
719
0
{
720
0
    z_err = Z_OK;
721
0
    z_eof = 0;
722
0
    m_bEOF = false;
723
0
    stream.avail_in = 0;
724
0
    stream.next_in = inbuf;
725
0
    crc = 0;
726
0
    if (!m_transparent)
727
0
        CPL_IGNORE_RET_VAL(inflateReset(&stream));
728
0
    in = 0;
729
0
    out = 0;
730
0
    return m_poBaseHandle->Seek(startOff, SEEK_SET);
731
0
}
732
733
/************************************************************************/
734
/*                                Seek()                                */
735
/************************************************************************/
736
737
int VSIGZipHandle::Seek(vsi_l_offset nOffset, int nWhence)
738
0
{
739
0
    m_bEOF = false;
740
741
0
    return gzseek(nOffset, nWhence) ? 0 : -1;
742
0
}
743
744
/************************************************************************/
745
/*                               gzseek()                               */
746
/************************************************************************/
747
748
bool VSIGZipHandle::gzseek(vsi_l_offset offset, int whence)
749
0
{
750
0
    const vsi_l_offset original_offset = offset;
751
0
    const int original_nWhence = whence;
752
753
0
    z_eof = 0;
754
#ifdef ENABLE_DEBUG
755
    CPLDebug("GZIP", "Seek(" CPL_FRMT_GUIB ",%d)", offset, whence);
756
#endif
757
758
0
    if (m_transparent)
759
0
    {
760
0
        stream.avail_in = 0;
761
0
        stream.next_in = inbuf;
762
0
        if (whence == SEEK_CUR)
763
0
        {
764
0
            if (out + offset > m_compressed_size)
765
0
            {
766
0
                CPL_VSIL_GZ_RETURN(FALSE);
767
0
                return false;
768
0
            }
769
770
0
            offset = startOff + out + offset;
771
0
        }
772
0
        else if (whence == SEEK_SET)
773
0
        {
774
0
            if (offset > m_compressed_size)
775
0
            {
776
0
                CPL_VSIL_GZ_RETURN(FALSE);
777
0
                return false;
778
0
            }
779
780
0
            offset = startOff + offset;
781
0
        }
782
0
        else if (whence == SEEK_END)
783
0
        {
784
            // Commented test: because vsi_l_offset is unsigned (for the moment)
785
            // so no way to seek backward. See #1590 */
786
0
            if (offset > 0)  // || -offset > compressed_size
787
0
            {
788
0
                CPL_VSIL_GZ_RETURN(FALSE);
789
0
                return false;
790
0
            }
791
792
0
            offset = startOff + m_compressed_size - offset;
793
0
        }
794
0
        else
795
0
        {
796
0
            CPL_VSIL_GZ_RETURN(FALSE);
797
0
            return false;
798
0
        }
799
800
0
        if (m_poBaseHandle->Seek(offset, SEEK_SET) < 0)
801
0
        {
802
0
            CPL_VSIL_GZ_RETURN(FALSE);
803
0
            return false;
804
0
        }
805
806
0
        out = offset - startOff;
807
0
        in = out;
808
0
        return true;
809
0
    }
810
811
    // whence == SEEK_END is unsuppored in original gzseek.
812
0
    if (whence == SEEK_END)
813
0
    {
814
        // If we known the uncompressed size, we can fake a jump to
815
        // the end of the stream.
816
0
        if (offset == 0 && m_uncompressed_size != 0)
817
0
        {
818
0
            out = m_uncompressed_size;
819
0
            return true;
820
0
        }
821
822
        // We don't know the uncompressed size. This is unfortunate.
823
        // Do the slow version.
824
0
        static int firstWarning = 1;
825
0
        if (m_compressed_size > 10 * 1024 * 1024 && firstWarning)
826
0
        {
827
0
            CPLError(CE_Warning, CPLE_AppDefined,
828
0
                     "VSIFSeekL(xxx, SEEK_END) may be really slow "
829
0
                     "on GZip streams.");
830
0
            firstWarning = 0;
831
0
        }
832
833
0
        whence = SEEK_CUR;
834
0
        offset = 1024 * 1024 * 1024;
835
0
        offset *= 1024 * 1024;
836
0
    }
837
838
    // Rest of function is for reading only.
839
840
    // Compute absolute position.
841
0
    if (whence == SEEK_CUR)
842
0
    {
843
0
        offset += out;
844
0
    }
845
846
    // For a negative seek, rewind and use positive seek.
847
0
    if (offset >= out)
848
0
    {
849
0
        offset -= out;
850
0
    }
851
0
    else if (gzrewind() < 0)
852
0
    {
853
0
        CPL_VSIL_GZ_RETURN(FALSE);
854
0
        return false;
855
0
    }
856
857
0
    if (z_err != Z_OK && z_err != Z_STREAM_END)
858
0
    {
859
0
        CPL_VSIL_GZ_RETURN(FALSE);
860
0
        return false;
861
0
    }
862
863
0
    for (unsigned int i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
864
0
         i++)
865
0
    {
866
0
        if (snapshots[i].posInBaseHandle == 0)
867
0
            break;
868
0
        if (snapshots[i].out <= out + offset &&
869
0
            (i == m_compressed_size / snapshot_byte_interval ||
870
0
             snapshots[i + 1].out == 0 || snapshots[i + 1].out > out + offset))
871
0
        {
872
0
            if (out >= snapshots[i].out)
873
0
                break;
874
875
#ifdef ENABLE_DEBUG
876
            CPLDebug("SNAPSHOT",
877
                     "using snapshot %d : "
878
                     "posInBaseHandle(snapshot)=" CPL_FRMT_GUIB
879
                     " in(snapshot)=" CPL_FRMT_GUIB
880
                     " out(snapshot)=" CPL_FRMT_GUIB " out=" CPL_FRMT_GUIB
881
                     " offset=" CPL_FRMT_GUIB,
882
                     i, snapshots[i].posInBaseHandle, snapshots[i].in,
883
                     snapshots[i].out, out, offset);
884
#endif
885
0
            offset = out + offset - snapshots[i].out;
886
0
            if (m_poBaseHandle->Seek(snapshots[i].posInBaseHandle, SEEK_SET) !=
887
0
                0)
888
0
                CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
889
890
0
            inflateEnd(&stream);
891
0
            inflateCopy(&stream, &snapshots[i].stream);
892
0
            crc = snapshots[i].crc;
893
0
            m_transparent = snapshots[i].transparent;
894
0
            in = snapshots[i].in;
895
0
            out = snapshots[i].out;
896
0
            break;
897
0
        }
898
0
    }
899
900
    // Offset is now the number of bytes to skip.
901
902
0
    if (offset != 0 && outbuf == nullptr)
903
0
    {
904
0
        outbuf = static_cast<Byte *>(ALLOC(Z_BUFSIZE));
905
0
        if (outbuf == nullptr)
906
0
        {
907
0
            CPL_VSIL_GZ_RETURN(FALSE);
908
0
            return false;
909
0
        }
910
0
    }
911
912
0
    if (original_nWhence == SEEK_END && z_err == Z_STREAM_END)
913
0
    {
914
0
        return true;
915
0
    }
916
917
0
    while (offset > 0)
918
0
    {
919
0
        int size = Z_BUFSIZE;
920
0
        if (offset < static_cast<vsi_l_offset>(Z_BUFSIZE))
921
0
            size = static_cast<int>(offset);
922
923
0
        const int read_size =
924
0
            static_cast<int>(Read(outbuf, static_cast<uInt>(size)));
925
0
        if (original_nWhence == SEEK_END)
926
0
        {
927
0
            if (size != read_size)
928
0
            {
929
0
                z_err = Z_STREAM_END;
930
0
                break;
931
0
            }
932
0
        }
933
0
        else if (read_size == 0)
934
0
        {
935
            // CPL_VSIL_GZ_RETURN(FALSE);
936
0
            return false;
937
0
        }
938
0
        offset -= read_size;
939
0
    }
940
#ifdef ENABLE_DEBUG
941
    CPLDebug("GZIP", "gzseek at offset " CPL_FRMT_GUIB, out);
942
#endif
943
944
0
    if (original_offset == 0 && original_nWhence == SEEK_END)
945
0
    {
946
0
        m_uncompressed_size = out;
947
948
0
        if (m_pszBaseFileName && !STARTS_WITH(m_pszBaseFileName, "/vsicurl/") &&
949
0
            !STARTS_WITH(m_pszBaseFileName, "/vsitar/") &&
950
0
            !STARTS_WITH(m_pszBaseFileName, "/vsizip/") && m_bWriteProperties)
951
0
        {
952
0
            CPLErrorStateBackuper oErrorStateBackuper(CPLQuietErrorHandler);
953
954
0
            CPLString osCacheFilename(m_pszBaseFileName);
955
0
            osCacheFilename += ".properties";
956
957
            // Write a .properties file to avoid seeking next time.
958
0
            VSILFILE *fpCacheLength = VSIFOpenL(osCacheFilename.c_str(), "wb");
959
0
            if (fpCacheLength)
960
0
            {
961
0
                char szBuffer[32] = {};
962
963
0
                CPLPrintUIntBig(szBuffer, m_compressed_size, 31);
964
0
                char *pszFirstNonSpace = szBuffer;
965
0
                while (*pszFirstNonSpace == ' ')
966
0
                    pszFirstNonSpace++;
967
0
                CPL_IGNORE_RET_VAL(VSIFPrintfL(
968
0
                    fpCacheLength, "compressed_size=%s\n", pszFirstNonSpace));
969
970
0
                CPLPrintUIntBig(szBuffer, m_uncompressed_size, 31);
971
0
                pszFirstNonSpace = szBuffer;
972
0
                while (*pszFirstNonSpace == ' ')
973
0
                    pszFirstNonSpace++;
974
0
                CPL_IGNORE_RET_VAL(VSIFPrintfL(
975
0
                    fpCacheLength, "uncompressed_size=%s\n", pszFirstNonSpace));
976
977
0
                CPL_IGNORE_RET_VAL(VSIFCloseL(fpCacheLength));
978
0
            }
979
0
        }
980
0
    }
981
982
0
    return true;
983
0
}
984
985
/************************************************************************/
986
/*                                Tell()                                */
987
/************************************************************************/
988
989
vsi_l_offset VSIGZipHandle::Tell()
990
0
{
991
#ifdef ENABLE_DEBUG
992
    CPLDebug("GZIP", "Tell() = " CPL_FRMT_GUIB, out);
993
#endif
994
0
    return out;
995
0
}
996
997
/************************************************************************/
998
/*                                Read()                                */
999
/************************************************************************/
1000
1001
size_t VSIGZipHandle::Read(void *const buf, size_t const nBytes)
1002
0
{
1003
#ifdef ENABLE_DEBUG
1004
    CPLDebug("GZIP", "Read(%p, %d)", buf, static_cast<int>(nBytes));
1005
#endif
1006
1007
0
    if (m_bEOF || z_err != Z_OK)
1008
0
    {
1009
0
        if (z_err == Z_STREAM_END && nBytes > 0)
1010
0
            m_bEOF = true;
1011
0
        return 0;
1012
0
    }
1013
1014
0
    if (nBytes > UINT32_MAX)
1015
0
    {
1016
0
        CPLError(CE_Failure, CPLE_FileIO, "Too many bytes to read at once");
1017
0
        return 0;
1018
0
    }
1019
1020
0
    const unsigned len = static_cast<unsigned int>(nBytes);
1021
0
    Bytef *pStart =
1022
0
        static_cast<Bytef *>(buf);  // Start off point for crc computation.
1023
    // == stream.next_out but not forced far (for MSDOS).
1024
0
    Byte *next_out = static_cast<Byte *>(buf);
1025
0
    stream.next_out = static_cast<Bytef *>(buf);
1026
0
    stream.avail_out = len;
1027
1028
0
    while (stream.avail_out != 0)
1029
0
    {
1030
0
        if (m_transparent)
1031
0
        {
1032
            // Copy first the lookahead bytes:
1033
0
            uInt nRead = 0;
1034
0
            uInt n = stream.avail_in;
1035
0
            if (n > stream.avail_out)
1036
0
                n = stream.avail_out;
1037
0
            if (n > 0)
1038
0
            {
1039
0
                memcpy(stream.next_out, stream.next_in, n);
1040
0
                next_out += n;
1041
0
                stream.next_out = next_out;
1042
0
                stream.next_in += n;
1043
0
                stream.avail_out -= n;
1044
0
                stream.avail_in -= n;
1045
0
                nRead += n;
1046
0
            }
1047
0
            if (stream.avail_out > 0)
1048
0
            {
1049
0
                const uInt nToRead = static_cast<uInt>(
1050
0
                    std::min(m_compressed_size - (in + nRead),
1051
0
                             static_cast<vsi_l_offset>(stream.avail_out)));
1052
0
                const uInt nReadFromFile =
1053
0
                    static_cast<uInt>(m_poBaseHandle->Read(next_out, nToRead));
1054
0
                if (nReadFromFile < nToRead && m_poBaseHandle->Error())
1055
0
                    z_err = Z_ERRNO;
1056
0
                stream.avail_out -= nReadFromFile;
1057
0
                nRead += nReadFromFile;
1058
0
            }
1059
0
            in += nRead;
1060
0
            out += nRead;
1061
0
            if (nRead < len)
1062
0
            {
1063
0
                m_bEOF = true;
1064
0
                z_eof = 1;
1065
0
            }
1066
#ifdef ENABLE_DEBUG
1067
            CPLDebug("GZIP", "Read return %u", nRead);
1068
#endif
1069
0
            return nRead;
1070
0
        }
1071
0
        if (stream.avail_in == 0 && !z_eof)
1072
0
        {
1073
0
            vsi_l_offset posInBaseHandle = m_poBaseHandle->Tell();
1074
0
            if (posInBaseHandle - startOff > m_compressed_size)
1075
0
            {
1076
                // If we reach here, file size has changed (because at
1077
                // construction time startOff + m_compressed_size marked the
1078
                // end of file).
1079
                // We should probably have a better fix than that, by detecting
1080
                // at open time that the saved snapshot is not valid and
1081
                // discarding it.
1082
0
                CPLError(CE_Failure, CPLE_AppDefined,
1083
0
                         "File size of underlying /vsigzip/ file has changed");
1084
0
                z_err = Z_ERRNO;
1085
0
                CPL_VSIL_GZ_RETURN(0);
1086
0
                return 0;
1087
0
            }
1088
0
            GZipSnapshot *snapshot = &snapshots[(posInBaseHandle - startOff) /
1089
0
                                                snapshot_byte_interval];
1090
0
            if (snapshot->posInBaseHandle == 0)
1091
0
            {
1092
0
                snapshot->crc = crc32(
1093
0
                    crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1094
#ifdef ENABLE_DEBUG
1095
                CPLDebug("SNAPSHOT",
1096
                         "creating snapshot %d : "
1097
                         "posInBaseHandle=" CPL_FRMT_GUIB " in=" CPL_FRMT_GUIB
1098
                         " out=" CPL_FRMT_GUIB " crc=%X",
1099
                         static_cast<int>((posInBaseHandle - startOff) /
1100
                                          snapshot_byte_interval),
1101
                         posInBaseHandle, in, out,
1102
                         static_cast<unsigned int>(snapshot->crc));
1103
#endif
1104
0
                snapshot->posInBaseHandle = posInBaseHandle;
1105
0
                inflateCopy(&snapshot->stream, &stream);
1106
0
                snapshot->transparent = m_transparent;
1107
0
                snapshot->in = in;
1108
0
                snapshot->out = out;
1109
1110
0
                if (out > m_nLastReadOffset)
1111
0
                    m_nLastReadOffset = out;
1112
0
            }
1113
1114
0
            errno = 0;
1115
0
            stream.avail_in =
1116
0
                static_cast<uInt>(m_poBaseHandle->Read(inbuf, Z_BUFSIZE));
1117
#ifdef ENABLE_DEBUG
1118
            CPLDebug("GZIP", CPL_FRMT_GUIB " " CPL_FRMT_GUIB,
1119
                     m_poBaseHandle->Tell(), offsetEndCompressedData);
1120
#endif
1121
0
            if (m_poBaseHandle->Tell() > offsetEndCompressedData)
1122
0
            {
1123
#ifdef ENABLE_DEBUG
1124
                CPLDebug("GZIP", "avail_in before = %d", stream.avail_in);
1125
#endif
1126
0
                stream.avail_in = stream.avail_in -
1127
0
                                  static_cast<uInt>(m_poBaseHandle->Tell() -
1128
0
                                                    offsetEndCompressedData);
1129
0
                if (m_poBaseHandle->Seek(offsetEndCompressedData, SEEK_SET) !=
1130
0
                    0)
1131
0
                    CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1132
#ifdef ENABLE_DEBUG
1133
                CPLDebug("GZIP", "avail_in after = %d", stream.avail_in);
1134
#endif
1135
0
            }
1136
0
            if (stream.avail_in == 0)
1137
0
            {
1138
0
                z_eof = 1;
1139
0
                if (m_poBaseHandle->Error() ||
1140
0
                    m_poBaseHandle->Tell() != offsetEndCompressedData)
1141
0
                {
1142
0
                    z_err = Z_ERRNO;
1143
0
                    break;
1144
0
                }
1145
0
            }
1146
0
            stream.next_in = inbuf;
1147
0
        }
1148
0
        in += stream.avail_in;
1149
0
        out += stream.avail_out;
1150
0
        z_err = inflate(&(stream), Z_NO_FLUSH);
1151
0
        in -= stream.avail_in;
1152
0
        out -= stream.avail_out;
1153
1154
0
        if (z_err == Z_STREAM_END && m_compressed_size != 2)
1155
0
        {
1156
            // Check CRC and original size.
1157
0
            crc =
1158
0
                crc32(crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1159
0
            pStart = stream.next_out;
1160
0
            if (m_expected_crc)
1161
0
            {
1162
#ifdef ENABLE_DEBUG
1163
                CPLDebug("GZIP", "Computed CRC = %X. Expected CRC = %X",
1164
                         static_cast<unsigned int>(crc),
1165
                         static_cast<unsigned int>(m_expected_crc));
1166
#endif
1167
0
            }
1168
0
            if (m_expected_crc != 0 && m_expected_crc != crc)
1169
0
            {
1170
0
                CPLError(CE_Failure, CPLE_FileIO,
1171
0
                         "CRC error. Got %X instead of %X",
1172
0
                         static_cast<unsigned int>(crc),
1173
0
                         static_cast<unsigned int>(m_expected_crc));
1174
0
                z_err = Z_DATA_ERROR;
1175
0
            }
1176
0
            else if (m_expected_crc == 0)
1177
0
            {
1178
0
                const uLong read_crc = static_cast<unsigned long>(getLong());
1179
0
                if (read_crc != crc)
1180
0
                {
1181
0
                    CPLError(CE_Failure, CPLE_FileIO,
1182
0
                             "CRC error. Got %X instead of %X",
1183
0
                             static_cast<unsigned int>(crc),
1184
0
                             static_cast<unsigned int>(read_crc));
1185
0
                    z_err = Z_DATA_ERROR;
1186
0
                }
1187
0
                else
1188
0
                {
1189
0
                    CPL_IGNORE_RET_VAL(getLong());
1190
                    // The uncompressed length returned by above getlong() may
1191
                    // be different from out in case of concatenated .gz files.
1192
                    // Check for such files:
1193
0
                    check_header();
1194
0
                    if (z_err == Z_OK)
1195
0
                    {
1196
0
                        inflateReset(&(stream));
1197
0
                        crc = 0;
1198
0
                    }
1199
0
                }
1200
0
            }
1201
0
        }
1202
0
        if (z_err != Z_OK || z_eof)
1203
0
            break;
1204
0
    }
1205
0
    crc = crc32(crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1206
1207
0
    unsigned ret = len - stream.avail_out;
1208
0
    if (z_err != Z_OK && z_err != Z_STREAM_END)
1209
0
    {
1210
0
        CPLError(CE_Failure, CPLE_AppDefined,
1211
0
                 "In file %s, at line %d, decompression failed with "
1212
0
                 "z_err = %d, return = %u",
1213
0
                 __FILE__, __LINE__, z_err, ret);
1214
0
    }
1215
0
    else if (ret < nBytes)
1216
0
    {
1217
0
        m_bEOF = true;
1218
0
    }
1219
1220
#ifdef ENABLE_DEBUG
1221
    CPLDebug("GZIP", "Read return %u (z_err=%d, z_eof=%d)", ret, z_err, z_eof);
1222
#endif
1223
0
    return ret;
1224
0
}
1225
1226
/************************************************************************/
1227
/*                              getLong()                               */
1228
/************************************************************************/
1229
1230
uLong VSIGZipHandle::getLong()
1231
0
{
1232
0
    uLong x = static_cast<uLong>(get_byte()) & 0xFF;
1233
1234
0
    x += (static_cast<uLong>(get_byte()) & 0xFF) << 8;
1235
0
    x += (static_cast<uLong>(get_byte()) & 0xFF) << 16;
1236
0
    const int c = get_byte();
1237
0
    if (c == EOF)
1238
0
    {
1239
0
        z_err = Z_DATA_ERROR;
1240
0
        return 0;
1241
0
    }
1242
0
    x += static_cast<uLong>(c) << 24;
1243
    // coverity[overflow_sink]
1244
0
    return x;
1245
0
}
1246
1247
/************************************************************************/
1248
/*                               Write()                                */
1249
/************************************************************************/
1250
1251
size_t VSIGZipHandle::Write(const void * /* pBuffer */, size_t /* nBytes */)
1252
0
{
1253
0
    CPLError(CE_Failure, CPLE_NotSupported,
1254
0
             "VSIFWriteL is not supported on GZip streams");
1255
0
    return 0;
1256
0
}
1257
1258
/************************************************************************/
1259
/*                                Eof()                                 */
1260
/************************************************************************/
1261
1262
int VSIGZipHandle::Eof()
1263
0
{
1264
#ifdef ENABLE_DEBUG
1265
    CPLDebug("GZIP", "Eof()");
1266
#endif
1267
0
    return m_bEOF;
1268
0
}
1269
1270
/************************************************************************/
1271
/*                               Error()                                */
1272
/************************************************************************/
1273
1274
int VSIGZipHandle::Error()
1275
0
{
1276
#ifdef ENABLE_DEBUG
1277
    CPLDebug("GZIP", "Error()");
1278
#endif
1279
0
    return z_err != Z_OK && z_err != Z_STREAM_END;
1280
0
}
1281
1282
/************************************************************************/
1283
/*                              ClearErr()                              */
1284
/************************************************************************/
1285
1286
void VSIGZipHandle::ClearErr()
1287
0
{
1288
0
    m_poBaseHandle->ClearErr();
1289
0
    z_eof = 0;
1290
0
    m_bEOF = false;
1291
0
    z_err = Z_OK;
1292
0
}
1293
1294
/************************************************************************/
1295
/*                               Flush()                                */
1296
/************************************************************************/
1297
1298
int VSIGZipHandle::Flush()
1299
0
{
1300
0
    return 0;
1301
0
}
1302
1303
/************************************************************************/
1304
/*                               Close()                                */
1305
/************************************************************************/
1306
1307
int VSIGZipHandle::Close()
1308
0
{
1309
0
    return 0;
1310
0
}
1311
1312
#ifdef ENABLE_DEFLATE64
1313
1314
/************************************************************************/
1315
/*                             Duplicate()                              */
1316
/************************************************************************/
1317
1318
VSIDeflate64Handle *VSIDeflate64Handle::Duplicate()
1319
0
{
1320
0
    CPLAssert(m_offset == 0);
1321
0
    CPLAssert(m_compressed_size != 0);
1322
0
    CPLAssert(m_pszBaseFileName != nullptr);
1323
1324
0
    VSIFilesystemHandler *poFSHandler =
1325
0
        VSIFileManager::GetHandler(m_pszBaseFileName);
1326
1327
0
    VSIVirtualHandleUniquePtr poNewBaseHandle(
1328
0
        poFSHandler->Open(m_pszBaseFileName, "rb"));
1329
1330
0
    if (poNewBaseHandle == nullptr)
1331
0
        return nullptr;
1332
1333
0
    auto poHandle = std::make_unique<VSIDeflate64Handle>(
1334
0
        std::move(poNewBaseHandle), m_pszBaseFileName, 0, m_compressed_size,
1335
0
        m_uncompressed_size);
1336
0
    if (!(poHandle->IsInitOK()))
1337
0
    {
1338
0
        return nullptr;
1339
0
    }
1340
1341
    // Most important: duplicate the snapshots!
1342
1343
0
    for (unsigned int i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
1344
0
         i++)
1345
0
    {
1346
0
        if (snapshots[i].posInBaseHandle == 0)
1347
0
            break;
1348
1349
0
        poHandle->snapshots[i].posInBaseHandle = snapshots[i].posInBaseHandle;
1350
0
        if (inflateBack9Copy(&poHandle->snapshots[i].stream,
1351
0
                             &snapshots[i].stream) != Z_OK)
1352
0
            CPLError(CE_Failure, CPLE_AppDefined, "inflateBack9Copy() failed");
1353
0
        poHandle->snapshots[i].crc = snapshots[i].crc;
1354
0
        poHandle->snapshots[i].in = snapshots[i].in;
1355
0
        poHandle->snapshots[i].out = snapshots[i].out;
1356
0
        poHandle->snapshots[i].extraOutput = snapshots[i].extraOutput;
1357
0
        poHandle->snapshots[i].m_bStreamEndReached =
1358
0
            snapshots[i].m_bStreamEndReached;
1359
0
    }
1360
1361
0
    return poHandle.release();
1362
0
}
1363
1364
/************************************************************************/
1365
/*                          CloseBaseHandle()                           */
1366
/************************************************************************/
1367
1368
bool VSIDeflate64Handle::CloseBaseHandle()
1369
0
{
1370
0
    bool bRet = true;
1371
0
    if (m_poBaseHandle)
1372
0
    {
1373
0
        bRet = m_poBaseHandle->Close() == 0;
1374
0
        m_poBaseHandle.reset();
1375
0
    }
1376
0
    return bRet;
1377
0
}
1378
1379
/************************************************************************/
1380
/*                         VSIDeflate64Handle()                         */
1381
/************************************************************************/
1382
1383
VSIDeflate64Handle::VSIDeflate64Handle(VSIVirtualHandleUniquePtr poBaseHandleIn,
1384
                                       const char *pszBaseFileName,
1385
                                       vsi_l_offset offset,
1386
                                       vsi_l_offset compressed_size,
1387
                                       vsi_l_offset uncompressed_size,
1388
                                       uLong expected_crc)
1389
0
    : m_poBaseHandle(std::move(poBaseHandleIn)),
1390
#ifdef DEBUG
1391
0
      m_offset(offset),
1392
#endif
1393
0
      m_uncompressed_size(uncompressed_size), m_expected_crc(expected_crc),
1394
0
      m_pszBaseFileName(pszBaseFileName ? CPLStrdup(pszBaseFileName) : nullptr),
1395
0
      stream(), crc(0)
1396
0
{
1397
0
    if (compressed_size)
1398
0
    {
1399
0
        m_compressed_size = compressed_size;
1400
0
    }
1401
0
    else
1402
0
    {
1403
0
        if (m_poBaseHandle->Seek(0, SEEK_END) != 0)
1404
0
            CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1405
0
        m_compressed_size = m_poBaseHandle->Tell() - offset;
1406
0
        compressed_size = m_compressed_size;
1407
0
    }
1408
0
    offsetEndCompressedData = offset + compressed_size;
1409
1410
0
    if (m_poBaseHandle->Seek(offset, SEEK_SET) != 0)
1411
0
        CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1412
1413
0
    stream.zalloc = nullptr;
1414
0
    stream.zfree = nullptr;
1415
0
    stream.opaque = nullptr;
1416
0
    stream.next_in = inbuf = nullptr;
1417
0
    stream.next_out = outbuf = nullptr;
1418
0
    stream.avail_in = stream.avail_out = 0;
1419
1420
0
    inbuf = static_cast<Byte *>(ALLOC(Z_BUFSIZE));
1421
0
    stream.next_in = inbuf;
1422
1423
0
    int err = inflateBack9Init(&(stream), nullptr);
1424
    // Note that in this case inflate *requires* an extra "dummy" byte
1425
    // after the compressed stream in order to complete decompression and
1426
    // return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are
1427
    // present after the compressed stream.
1428
0
    if (err != Z_OK || inbuf == nullptr)
1429
0
    {
1430
0
        CPLError(CE_Failure, CPLE_NotSupported, "inflateBack9Init init failed");
1431
0
        TRYFREE(inbuf);
1432
0
        inbuf = nullptr;
1433
0
        return;
1434
0
    }
1435
0
    startOff = m_poBaseHandle->Tell() - stream.avail_in;
1436
1437
0
    snapshot_byte_interval =
1438
0
        std::max(static_cast<vsi_l_offset>(Z_BUFSIZE), compressed_size / 100);
1439
0
    snapshots.resize(
1440
0
        static_cast<size_t>(compressed_size / snapshot_byte_interval + 1));
1441
0
}
1442
1443
/************************************************************************/
1444
/*                        ~VSIDeflate64Handle()                         */
1445
/************************************************************************/
1446
1447
VSIDeflate64Handle::~VSIDeflate64Handle()
1448
0
{
1449
0
    if (stream.state != nullptr)
1450
0
    {
1451
0
        inflateBack9End(&(stream));
1452
0
    }
1453
1454
0
    TRYFREE(inbuf);
1455
0
    TRYFREE(outbuf);
1456
1457
0
    for (auto &snapshot : snapshots)
1458
0
    {
1459
0
        if (snapshot.posInBaseHandle)
1460
0
        {
1461
0
            inflateBack9End(&(snapshot.stream));
1462
0
        }
1463
0
    }
1464
0
    CPLFree(m_pszBaseFileName);
1465
1466
0
    CloseBaseHandle();
1467
0
}
1468
1469
/************************************************************************/
1470
/*                              gzrewind()                              */
1471
/************************************************************************/
1472
1473
int VSIDeflate64Handle::gzrewind()
1474
0
{
1475
0
    m_bStreamEndReached = false;
1476
0
    extraOutput.clear();
1477
0
    z_err = Z_OK;
1478
0
    z_eof = 0;
1479
0
    stream.avail_in = 0;
1480
0
    stream.next_in = inbuf;
1481
0
    crc = 0;
1482
0
    CPL_IGNORE_RET_VAL(inflateBack9End(&stream));
1483
0
    CPL_IGNORE_RET_VAL(inflateBack9Init(&stream, nullptr));
1484
0
    in = 0;
1485
0
    out = 0;
1486
0
    return m_poBaseHandle->Seek(startOff, SEEK_SET);
1487
0
}
1488
1489
/************************************************************************/
1490
/*                                Seek()                                */
1491
/************************************************************************/
1492
1493
int VSIDeflate64Handle::Seek(vsi_l_offset nOffset, int nWhence)
1494
0
{
1495
0
    m_bEOF = false;
1496
0
    return gzseek(nOffset, nWhence) ? 0 : -1;
1497
0
}
1498
1499
/************************************************************************/
1500
/*                               gzseek()                               */
1501
/************************************************************************/
1502
1503
bool VSIDeflate64Handle::gzseek(vsi_l_offset offset, int whence)
1504
0
{
1505
0
    const vsi_l_offset original_offset = offset;
1506
0
    const int original_nWhence = whence;
1507
1508
0
    z_eof = 0;
1509
#ifdef ENABLE_DEBUG
1510
    CPLDebug("GZIP", "Seek(" CPL_FRMT_GUIB ",%d)", offset, whence);
1511
#endif
1512
1513
    // whence == SEEK_END is unsuppored in original gzseek.
1514
0
    if (whence == SEEK_END)
1515
0
    {
1516
        // If we known the uncompressed size, we can fake a jump to
1517
        // the end of the stream.
1518
0
        if (offset == 0 && m_uncompressed_size != 0)
1519
0
        {
1520
0
            out = m_uncompressed_size;
1521
0
            return true;
1522
0
        }
1523
1524
        // We don't know the uncompressed size. This is unfortunate.
1525
        // Do the slow version.
1526
0
        static int firstWarning = 1;
1527
0
        if (m_compressed_size > 10 * 1024 * 1024 && firstWarning)
1528
0
        {
1529
0
            CPLError(CE_Warning, CPLE_AppDefined,
1530
0
                     "VSIFSeekL(xxx, SEEK_END) may be really slow "
1531
0
                     "on GZip streams.");
1532
0
            firstWarning = 0;
1533
0
        }
1534
1535
0
        whence = SEEK_CUR;
1536
0
        offset = 1024 * 1024 * 1024;
1537
0
        offset *= 1024 * 1024;
1538
0
    }
1539
1540
    // Rest of function is for reading only.
1541
1542
    // Compute absolute position.
1543
0
    if (whence == SEEK_CUR)
1544
0
    {
1545
0
        offset += out;
1546
0
    }
1547
1548
    // For a negative seek, rewind and use positive seek.
1549
0
    if (offset >= out)
1550
0
    {
1551
0
        offset -= out;
1552
0
    }
1553
0
    else if (gzrewind() < 0)
1554
0
    {
1555
0
        CPL_VSIL_GZ_RETURN(FALSE);
1556
0
        return false;
1557
0
    }
1558
1559
0
    if (z_err != Z_OK && z_err != Z_STREAM_END)
1560
0
    {
1561
0
        CPL_VSIL_GZ_RETURN(FALSE);
1562
0
        return false;
1563
0
    }
1564
1565
0
    for (unsigned int i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
1566
0
         i++)
1567
0
    {
1568
0
        if (snapshots[i].posInBaseHandle == 0)
1569
0
            break;
1570
0
        if (snapshots[i].out <= out + offset &&
1571
0
            (i == m_compressed_size / snapshot_byte_interval ||
1572
0
             snapshots[i + 1].out == 0 || snapshots[i + 1].out > out + offset))
1573
0
        {
1574
0
            if (out >= snapshots[i].out)
1575
0
                break;
1576
1577
#ifdef ENABLE_DEBUG
1578
            CPLDebug("SNAPSHOT",
1579
                     "using snapshot %d : "
1580
                     "posInBaseHandle(snapshot)=" CPL_FRMT_GUIB
1581
                     " in(snapshot)=" CPL_FRMT_GUIB
1582
                     " out(snapshot)=" CPL_FRMT_GUIB " out=" CPL_FRMT_GUIB
1583
                     " offset=" CPL_FRMT_GUIB,
1584
                     i, snapshots[i].posInBaseHandle, snapshots[i].in,
1585
                     snapshots[i].out, out, offset);
1586
#endif
1587
0
            offset = out + offset - snapshots[i].out;
1588
0
            if (m_poBaseHandle->Seek(snapshots[i].posInBaseHandle, SEEK_SET) !=
1589
0
                0)
1590
0
                CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1591
1592
0
            inflateBack9End(&stream);
1593
0
            if (inflateBack9Copy(&stream, &snapshots[i].stream) != Z_OK)
1594
0
                CPLError(CE_Failure, CPLE_AppDefined,
1595
0
                         "inflateBack9Copy() failed");
1596
0
            crc = snapshots[i].crc;
1597
0
            in = snapshots[i].in;
1598
0
            out = snapshots[i].out;
1599
0
            extraOutput = snapshots[i].extraOutput;
1600
0
            m_bStreamEndReached = snapshots[i].m_bStreamEndReached;
1601
0
            break;
1602
0
        }
1603
0
    }
1604
1605
    // Offset is now the number of bytes to skip.
1606
1607
0
    if (offset != 0 && outbuf == nullptr)
1608
0
    {
1609
0
        outbuf = static_cast<Byte *>(ALLOC(Z_BUFSIZE));
1610
0
        if (outbuf == nullptr)
1611
0
        {
1612
0
            CPL_VSIL_GZ_RETURN(FALSE);
1613
0
            return false;
1614
0
        }
1615
0
    }
1616
1617
0
    if (original_nWhence == SEEK_END && z_err == Z_STREAM_END)
1618
0
    {
1619
0
        return true;
1620
0
    }
1621
1622
0
    while (offset > 0)
1623
0
    {
1624
0
        int size = Z_BUFSIZE;
1625
0
        if (offset < static_cast<vsi_l_offset>(Z_BUFSIZE))
1626
0
            size = static_cast<int>(offset);
1627
1628
0
        const int read_size =
1629
0
            static_cast<int>(Read(outbuf, static_cast<uInt>(size)));
1630
0
        if (original_nWhence == SEEK_END)
1631
0
        {
1632
0
            if (size != read_size)
1633
0
            {
1634
0
                z_err = Z_STREAM_END;
1635
0
                break;
1636
0
            }
1637
0
        }
1638
0
        else if (read_size == 0)
1639
0
        {
1640
            // CPL_VSIL_GZ_RETURN(FALSE);
1641
0
            return false;
1642
0
        }
1643
0
        offset -= read_size;
1644
0
    }
1645
#ifdef ENABLE_DEBUG
1646
    CPLDebug("GZIP", "gzseek at offset " CPL_FRMT_GUIB, out);
1647
#endif
1648
1649
0
    if (original_offset == 0 && original_nWhence == SEEK_END)
1650
0
    {
1651
0
        m_uncompressed_size = out;
1652
0
    }
1653
1654
0
    return true;
1655
0
}
1656
1657
/************************************************************************/
1658
/*                                Tell()                                */
1659
/************************************************************************/
1660
1661
vsi_l_offset VSIDeflate64Handle::Tell()
1662
0
{
1663
#ifdef ENABLE_DEBUG
1664
    CPLDebug("GZIP", "Tell() = " CPL_FRMT_GUIB, out);
1665
#endif
1666
0
    return out;
1667
0
}
1668
1669
/************************************************************************/
1670
/*                                Read()                                */
1671
/************************************************************************/
1672
1673
size_t VSIDeflate64Handle::Read(void *const buf, size_t const nBytes)
1674
0
{
1675
#ifdef ENABLE_DEBUG
1676
    CPLDebug("GZIP", "Read(%p, %d)", buf, static_cast<int>(nBytes));
1677
#endif
1678
1679
0
    if (m_bEOF || z_err != Z_OK)
1680
0
    {
1681
0
        if (z_err == Z_STREAM_END && nBytes > 0)
1682
0
            m_bEOF = true;
1683
0
        return 0;
1684
0
    }
1685
1686
0
    if (nBytes > UINT32_MAX)
1687
0
    {
1688
0
        CPLError(CE_Failure, CPLE_FileIO, "Too many bytes to read at once");
1689
0
        return 0;
1690
0
    }
1691
1692
0
    const unsigned len = static_cast<unsigned int>(nBytes);
1693
0
    Bytef *pStart =
1694
0
        static_cast<Bytef *>(buf);  // Start off point for crc computation.
1695
    // == stream.next_out but not forced far (for MSDOS).
1696
0
    stream.next_out = static_cast<Bytef *>(buf);
1697
0
    stream.avail_out = len;
1698
1699
0
    while (stream.avail_out != 0)
1700
0
    {
1701
0
        if (!extraOutput.empty())
1702
0
        {
1703
0
            if (extraOutput.size() >= stream.avail_out)
1704
0
            {
1705
0
                memcpy(stream.next_out, extraOutput.data(), stream.avail_out);
1706
0
                extraOutput.erase(extraOutput.begin(),
1707
0
                                  extraOutput.begin() + stream.avail_out);
1708
0
                out += stream.avail_out;
1709
0
                stream.next_out += stream.avail_out;
1710
0
                stream.avail_out = 0;
1711
0
            }
1712
0
            else
1713
0
            {
1714
0
                memcpy(stream.next_out, extraOutput.data(), extraOutput.size());
1715
0
                stream.next_out += extraOutput.size();
1716
0
                out += static_cast<uInt>(extraOutput.size());
1717
0
                stream.avail_out -= static_cast<uInt>(extraOutput.size());
1718
0
                CPLAssert(stream.avail_out > 0);
1719
0
                extraOutput.clear();
1720
0
            }
1721
0
            z_err = Z_OK;
1722
0
        }
1723
1724
0
        if (stream.avail_in == 0 && !z_eof)
1725
0
        {
1726
0
            vsi_l_offset posInBaseHandle = m_poBaseHandle->Tell();
1727
0
            if (posInBaseHandle - startOff > m_compressed_size)
1728
0
            {
1729
                // If we reach here, file size has changed (because at
1730
                // construction time startOff + m_compressed_size marked the
1731
                // end of file).
1732
                // We should probably have a better fix than that, by detecting
1733
                // at open time that the saved snapshot is not valid and
1734
                // discarding it.
1735
0
                CPLError(CE_Failure, CPLE_AppDefined,
1736
0
                         "File size of underlying /vsigzip/ file has changed");
1737
0
                z_err = Z_ERRNO;
1738
0
                CPL_VSIL_GZ_RETURN(0);
1739
0
                return 0;
1740
0
            }
1741
0
            auto snapshot = &snapshots[static_cast<size_t>(
1742
0
                (posInBaseHandle - startOff) / snapshot_byte_interval)];
1743
0
            if (snapshot->posInBaseHandle == 0)
1744
0
            {
1745
0
                snapshot->crc = crc32(
1746
0
                    crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1747
#ifdef ENABLE_DEBUG
1748
                CPLDebug("SNAPSHOT",
1749
                         "creating snapshot %d : "
1750
                         "posInBaseHandle=" CPL_FRMT_GUIB " in=" CPL_FRMT_GUIB
1751
                         " out=" CPL_FRMT_GUIB " crc=%X",
1752
                         static_cast<int>((posInBaseHandle - startOff) /
1753
                                          snapshot_byte_interval),
1754
                         posInBaseHandle, in, out,
1755
                         static_cast<unsigned int>(snapshot->crc));
1756
#endif
1757
0
                snapshot->posInBaseHandle = posInBaseHandle;
1758
0
                if (inflateBack9Copy(&snapshot->stream, &stream) != Z_OK)
1759
0
                    CPLError(CE_Failure, CPLE_AppDefined,
1760
0
                             "inflateBack9Copy() failed");
1761
0
                snapshot->in = in;
1762
0
                snapshot->out = out;
1763
0
                snapshot->extraOutput = extraOutput;
1764
0
                snapshot->m_bStreamEndReached = m_bStreamEndReached;
1765
0
            }
1766
1767
0
            errno = 0;
1768
0
            stream.avail_in =
1769
0
                static_cast<uInt>(m_poBaseHandle->Read(inbuf, Z_BUFSIZE));
1770
#ifdef ENABLE_DEBUG
1771
            CPLDebug("GZIP", CPL_FRMT_GUIB " " CPL_FRMT_GUIB,
1772
                     m_poBaseHandle->Tell(), offsetEndCompressedData);
1773
#endif
1774
0
            if (m_poBaseHandle->Tell() > offsetEndCompressedData)
1775
0
            {
1776
#ifdef ENABLE_DEBUG
1777
                CPLDebug("GZIP", "avail_in before = %d", stream.avail_in);
1778
#endif
1779
0
                stream.avail_in = stream.avail_in -
1780
0
                                  static_cast<uInt>(m_poBaseHandle->Tell() -
1781
0
                                                    offsetEndCompressedData);
1782
0
                if (m_poBaseHandle->Seek(offsetEndCompressedData, SEEK_SET) !=
1783
0
                    0)
1784
0
                    CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1785
#ifdef ENABLE_DEBUG
1786
                CPLDebug("GZIP", "avail_in after = %d", stream.avail_in);
1787
#endif
1788
0
            }
1789
0
            if (stream.avail_in == 0)
1790
0
            {
1791
0
                z_eof = 1;
1792
0
                if (m_poBaseHandle->Error() ||
1793
0
                    m_poBaseHandle->Tell() != offsetEndCompressedData)
1794
0
                {
1795
0
                    z_err = Z_ERRNO;
1796
0
                    break;
1797
0
                }
1798
0
            }
1799
0
            stream.next_in = inbuf;
1800
0
        }
1801
1802
0
        struct InOutCallback
1803
0
        {
1804
0
            vsi_l_offset *pOut = nullptr;
1805
0
            std::vector<GByte> *pExtraOutput = nullptr;
1806
0
            z_stream *pStream = nullptr;
1807
1808
0
            static unsigned inCbk(void FAR *, z_const unsigned char FAR * FAR *)
1809
0
            {
1810
0
                return 0;
1811
0
            }
1812
1813
0
            static int outCbk(void FAR *user_data, unsigned char FAR *data,
1814
0
                              unsigned len)
1815
0
            {
1816
0
                auto self = static_cast<InOutCallback *>(user_data);
1817
0
                if (self->pStream->avail_out >= len)
1818
0
                {
1819
0
                    memcpy(self->pStream->next_out, data, len);
1820
0
                    *(self->pOut) += len;
1821
0
                    self->pStream->next_out += len;
1822
0
                    self->pStream->avail_out -= len;
1823
0
                }
1824
0
                else
1825
0
                {
1826
0
                    if (self->pStream->avail_out != 0)
1827
0
                    {
1828
0
                        memcpy(self->pStream->next_out, data,
1829
0
                               self->pStream->avail_out);
1830
0
                        *(self->pOut) += self->pStream->avail_out;
1831
0
                        data += self->pStream->avail_out;
1832
0
                        len -= self->pStream->avail_out;
1833
0
                        self->pStream->next_out += self->pStream->avail_out;
1834
0
                        self->pStream->avail_out = 0;
1835
0
                    }
1836
0
                    if (len > 0)
1837
0
                    {
1838
0
                        self->pExtraOutput->insert(self->pExtraOutput->end(),
1839
0
                                                   data, data + len);
1840
0
                    }
1841
0
                }
1842
0
                return 0;
1843
0
            }
1844
0
        };
1845
1846
0
        InOutCallback cbkData;
1847
0
        cbkData.pOut = &out;
1848
0
        cbkData.pExtraOutput = &extraOutput;
1849
0
        cbkData.pStream = &stream;
1850
1851
0
        if (stream.avail_out)
1852
0
        {
1853
0
            if (m_bStreamEndReached)
1854
0
                z_err = Z_STREAM_END;
1855
0
            else
1856
0
            {
1857
0
                in += stream.avail_in;
1858
0
                z_err = inflateBack9(&(stream), InOutCallback::inCbk, &cbkData,
1859
0
                                     InOutCallback::outCbk, &cbkData);
1860
0
                in -= stream.avail_in;
1861
0
            }
1862
0
        }
1863
0
        if (z_err == Z_BUF_ERROR && stream.next_in == Z_NULL)
1864
0
            z_err = Z_OK;
1865
0
        else if (!extraOutput.empty() && z_err == Z_STREAM_END)
1866
0
        {
1867
0
            m_bStreamEndReached = true;
1868
0
            z_err = Z_OK;
1869
0
        }
1870
1871
0
        if (z_err == Z_STREAM_END /*&& m_compressed_size != 2*/)
1872
0
        {
1873
            // Check CRC and original size.
1874
0
            crc =
1875
0
                crc32(crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1876
0
            pStart = stream.next_out;
1877
0
            if (m_expected_crc)
1878
0
            {
1879
#ifdef ENABLE_DEBUG
1880
                CPLDebug("GZIP", "Computed CRC = %X. Expected CRC = %X",
1881
                         static_cast<unsigned int>(crc),
1882
                         static_cast<unsigned int>(m_expected_crc));
1883
#endif
1884
0
            }
1885
0
            if (m_expected_crc != 0 && m_expected_crc != crc)
1886
0
            {
1887
0
                CPLError(CE_Failure, CPLE_FileIO,
1888
0
                         "CRC error. Got %X instead of %X",
1889
0
                         static_cast<unsigned int>(crc),
1890
0
                         static_cast<unsigned int>(m_expected_crc));
1891
0
                z_err = Z_DATA_ERROR;
1892
0
            }
1893
0
        }
1894
0
        if (z_err != Z_OK || z_eof)
1895
0
            break;
1896
0
    }
1897
0
    crc = crc32(crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1898
1899
0
    unsigned ret = (len - stream.avail_out);
1900
0
    if (z_err != Z_OK && z_err != Z_STREAM_END)
1901
0
    {
1902
0
        CPLError(CE_Failure, CPLE_AppDefined,
1903
0
                 "In file %s, at line %d, decompression failed with "
1904
0
                 "z_err = %d, return = %u",
1905
0
                 __FILE__, __LINE__, z_err, ret);
1906
0
    }
1907
0
    else if (ret < nBytes)
1908
0
    {
1909
0
        m_bEOF = true;
1910
0
    }
1911
1912
#ifdef ENABLE_DEBUG
1913
    CPLDebug("GZIP", "Read return %u (z_err=%d, z_eof=%d)", ret, z_err, z_eof);
1914
#endif
1915
0
    return ret;
1916
0
}
1917
1918
/************************************************************************/
1919
/*                               Write()                                */
1920
/************************************************************************/
1921
1922
size_t VSIDeflate64Handle::Write(const void * /* pBuffer */,
1923
                                 size_t /* nBytes */)
1924
0
{
1925
0
    CPLError(CE_Failure, CPLE_NotSupported,
1926
0
             "VSIFWriteL is not supported on GZip streams");
1927
0
    return 0;
1928
0
}
1929
1930
/************************************************************************/
1931
/*                                Eof()                                 */
1932
/************************************************************************/
1933
1934
int VSIDeflate64Handle::Eof()
1935
0
{
1936
#ifdef ENABLE_DEBUG
1937
    CPLDebug("GZIP", "Eof()");
1938
#endif
1939
0
    return m_bEOF;
1940
0
}
1941
1942
/************************************************************************/
1943
/*                               Error()                                */
1944
/************************************************************************/
1945
1946
int VSIDeflate64Handle::Error()
1947
0
{
1948
#ifdef ENABLE_DEBUG
1949
    CPLDebug("GZIP", "Error()");
1950
#endif
1951
0
    return z_err != Z_OK && z_err != Z_STREAM_END;
1952
0
}
1953
1954
/************************************************************************/
1955
/*                              ClearErr()                              */
1956
/************************************************************************/
1957
1958
void VSIDeflate64Handle::ClearErr()
1959
0
{
1960
0
    m_poBaseHandle->ClearErr();
1961
0
    z_eof = 0;
1962
0
    m_bEOF = false;
1963
0
    z_err = Z_OK;
1964
0
}
1965
1966
/************************************************************************/
1967
/*                               Flush()                                */
1968
/************************************************************************/
1969
1970
int VSIDeflate64Handle::Flush()
1971
0
{
1972
0
    return 0;
1973
0
}
1974
1975
/************************************************************************/
1976
/*                               Close()                                */
1977
/************************************************************************/
1978
1979
int VSIDeflate64Handle::Close()
1980
0
{
1981
0
    return 0;
1982
0
}
1983
#endif
1984
1985
/************************************************************************/
1986
/* ==================================================================== */
1987
/*                       VSIGZipWriteHandleMT                           */
1988
/* ==================================================================== */
1989
/************************************************************************/
1990
1991
class VSIGZipWriteHandleMT final : public VSIVirtualHandle
1992
{
1993
    CPL_DISALLOW_COPY_ASSIGN(VSIGZipWriteHandleMT)
1994
1995
    VSIVirtualHandle *poBaseHandle_ = nullptr;
1996
    vsi_l_offset nCurOffset_ = 0;
1997
    uLong nCRC_ = 0;
1998
    int nDeflateType_ = CPL_DEFLATE_TYPE_GZIP;
1999
    bool bAutoCloseBaseHandle_ = false;
2000
    int nThreads_ = 0;
2001
    std::unique_ptr<CPLWorkerThreadPool> poPool_{};
2002
    std::list<std::string *> aposBuffers_{};
2003
    std::string *pCurBuffer_ = nullptr;
2004
    std::mutex sMutex_{};
2005
    int nSeqNumberGenerated_ = 0;
2006
    int nSeqNumberExpected_ = 0;
2007
    int nSeqNumberExpectedCRC_ = 0;
2008
    size_t nChunkSize_ = 0;
2009
    bool bHasErrored_ = false;
2010
2011
    struct Job
2012
    {
2013
        VSIGZipWriteHandleMT *pParent_ = nullptr;
2014
        std::string *pBuffer_ = nullptr;
2015
        int nSeqNumber_ = 0;
2016
        bool bFinish_ = false;
2017
        bool bInCRCComputation_ = false;
2018
2019
        std::string sCompressedData_{};
2020
        uLong nCRC_ = 0;
2021
    };
2022
2023
    std::list<Job *> apoFinishedJobs_{};
2024
    std::list<Job *> apoCRCFinishedJobs_{};
2025
    std::list<Job *> apoFreeJobs_{};
2026
    vsi_l_offset nStartOffset_ = 0;
2027
    size_t nSOZIPIndexEltSize_ = 0;
2028
    std::vector<uint8_t> *panSOZIPIndex_ = nullptr;
2029
2030
    static void DeflateCompress(void *inData);
2031
    static void CRCCompute(void *inData);
2032
    bool ProcessCompletedJobs();
2033
    Job *GetJobObject();
2034
#ifdef DEBUG_VERBOSE
2035
    void DumpState();
2036
#endif
2037
2038
  public:
2039
    VSIGZipWriteHandleMT(VSIVirtualHandle *poBaseHandle, int nDeflateType,
2040
                         bool bAutoCloseBaseHandleIn, int nThreads,
2041
                         size_t nChunkSize, size_t nSOZIPIndexEltSize,
2042
                         std::vector<uint8_t> *panSOZIPIndex);
2043
2044
    ~VSIGZipWriteHandleMT() override;
2045
2046
    int Seek(vsi_l_offset nOffset, int nWhence) override;
2047
    vsi_l_offset Tell() override;
2048
    size_t Read(void *pBuffer, size_t nBytes) override;
2049
    size_t Write(const void *pBuffer, size_t nBytes) override;
2050
2051
    int Eof() override
2052
0
    {
2053
0
        return 0;
2054
0
    }
2055
2056
    int Error() override
2057
0
    {
2058
0
        return 0;
2059
0
    }
2060
2061
    void ClearErr() override
2062
0
    {
2063
0
    }
2064
2065
    int Flush() override;
2066
    int Close() override;
2067
};
2068
2069
/************************************************************************/
2070
/*                        VSIGZipWriteHandleMT()                        */
2071
/************************************************************************/
2072
2073
VSIGZipWriteHandleMT::VSIGZipWriteHandleMT(VSIVirtualHandle *poBaseHandle,
2074
                                           int nDeflateType,
2075
                                           bool bAutoCloseBaseHandleIn,
2076
                                           int nThreads, size_t nChunkSize,
2077
                                           size_t nSOZIPIndexEltSize,
2078
                                           std::vector<uint8_t> *panSOZIPIndex)
2079
0
    : poBaseHandle_(poBaseHandle), nDeflateType_(nDeflateType),
2080
0
      bAutoCloseBaseHandle_(bAutoCloseBaseHandleIn), nThreads_(nThreads),
2081
0
      nChunkSize_(nChunkSize), nSOZIPIndexEltSize_(nSOZIPIndexEltSize),
2082
0
      panSOZIPIndex_(panSOZIPIndex)
2083
0
{
2084
0
    if (nChunkSize_ == 0)
2085
0
    {
2086
0
        const char *pszChunkSize =
2087
0
            CPLGetConfigOption("CPL_VSIL_DEFLATE_CHUNK_SIZE", "1024K");
2088
0
        nChunkSize_ = static_cast<size_t>(atoi(pszChunkSize));
2089
0
        if (strchr(pszChunkSize, 'K'))
2090
0
            nChunkSize_ *= 1024;
2091
0
        else if (strchr(pszChunkSize, 'M'))
2092
0
            nChunkSize_ *= 1024 * 1024;
2093
0
        nChunkSize_ =
2094
0
            std::max(static_cast<size_t>(4 * 1024),
2095
0
                     std::min(static_cast<size_t>(UINT_MAX), nChunkSize_));
2096
0
    }
2097
2098
0
    for (int i = 0; i < 1 + nThreads_; i++)
2099
0
        aposBuffers_.emplace_back(new std::string());
2100
2101
0
    nStartOffset_ = poBaseHandle_->Tell();
2102
0
    if (nDeflateType == CPL_DEFLATE_TYPE_GZIP)
2103
0
    {
2104
0
        char header[11] = {};
2105
2106
        // Write a very simple .gz header:
2107
0
        snprintf(header, sizeof(header), "%c%c%c%c%c%c%c%c%c%c", gz_magic[0],
2108
0
                 gz_magic[1], Z_DEFLATED, 0 /*flags*/, 0, 0, 0, 0 /*time*/,
2109
0
                 0 /*xflags*/, 0x03);
2110
0
        poBaseHandle_->Write(header, 10);
2111
0
    }
2112
0
}
2113
2114
/************************************************************************/
2115
/*                       ~VSIGZipWriteHandleMT()                        */
2116
/************************************************************************/
2117
2118
VSIGZipWriteHandleMT::~VSIGZipWriteHandleMT()
2119
2120
0
{
2121
0
    VSIGZipWriteHandleMT::Close();
2122
0
    for (auto &psJob : apoFinishedJobs_)
2123
0
    {
2124
0
        delete psJob->pBuffer_;
2125
0
        delete psJob;
2126
0
    }
2127
0
    for (auto &psJob : apoCRCFinishedJobs_)
2128
0
    {
2129
0
        delete psJob->pBuffer_;
2130
0
        delete psJob;
2131
0
    }
2132
0
    for (auto &psJob : apoFreeJobs_)
2133
0
    {
2134
0
        delete psJob->pBuffer_;
2135
0
        delete psJob;
2136
0
    }
2137
0
    for (auto &pstr : aposBuffers_)
2138
0
    {
2139
0
        delete pstr;
2140
0
    }
2141
0
    delete pCurBuffer_;
2142
0
}
2143
2144
/************************************************************************/
2145
/*                               Close()                                */
2146
/************************************************************************/
2147
2148
int VSIGZipWriteHandleMT::Close()
2149
2150
0
{
2151
0
    if (!poBaseHandle_)
2152
0
        return 0;
2153
2154
0
    int nRet = 0;
2155
2156
0
    if (!pCurBuffer_)
2157
0
        pCurBuffer_ = new std::string();
2158
2159
0
    {
2160
0
        auto psJob = GetJobObject();
2161
0
        psJob->bFinish_ = true;
2162
0
        psJob->pParent_ = this;
2163
0
        psJob->pBuffer_ = pCurBuffer_;
2164
0
        pCurBuffer_ = nullptr;
2165
0
        psJob->nSeqNumber_ = nSeqNumberGenerated_;
2166
0
        VSIGZipWriteHandleMT::DeflateCompress(psJob);
2167
0
    }
2168
2169
0
    if (poPool_)
2170
0
    {
2171
0
        poPool_->WaitCompletion(0);
2172
0
    }
2173
0
    if (!ProcessCompletedJobs())
2174
0
    {
2175
0
        nRet = -1;
2176
0
    }
2177
0
    else
2178
0
    {
2179
0
        CPLAssert(apoFinishedJobs_.empty());
2180
0
        if (nDeflateType_ == CPL_DEFLATE_TYPE_GZIP)
2181
0
        {
2182
0
            if (poPool_)
2183
0
            {
2184
0
                poPool_->WaitCompletion(0);
2185
0
            }
2186
0
            ProcessCompletedJobs();
2187
0
        }
2188
0
        CPLAssert(apoCRCFinishedJobs_.empty());
2189
0
    }
2190
2191
0
    if (nDeflateType_ == CPL_DEFLATE_TYPE_GZIP)
2192
0
    {
2193
0
        const GUInt32 anTrailer[2] = {
2194
0
            CPL_LSBWORD32(static_cast<GUInt32>(nCRC_)),
2195
0
            CPL_LSBWORD32(static_cast<GUInt32>(nCurOffset_))};
2196
2197
0
        if (poBaseHandle_->Write(anTrailer, 8) < 8)
2198
0
        {
2199
0
            nRet = -1;
2200
0
        }
2201
0
    }
2202
2203
0
    if (bAutoCloseBaseHandle_)
2204
0
    {
2205
0
        int nRetClose = poBaseHandle_->Close();
2206
0
        if (nRet == 0)
2207
0
            nRet = nRetClose;
2208
2209
0
        delete poBaseHandle_;
2210
0
    }
2211
0
    poBaseHandle_ = nullptr;
2212
2213
0
    return nRet;
2214
0
}
2215
2216
/************************************************************************/
2217
/*                                Read()                                */
2218
/************************************************************************/
2219
2220
size_t VSIGZipWriteHandleMT::Read(void * /* pBuffer */, size_t /* nBytes*/)
2221
0
{
2222
0
    CPLError(CE_Failure, CPLE_NotSupported,
2223
0
             "VSIFReadL is not supported on GZip write streams");
2224
0
    return 0;
2225
0
}
2226
2227
/************************************************************************/
2228
/*                          DeflateCompress()                           */
2229
/************************************************************************/
2230
2231
void VSIGZipWriteHandleMT::DeflateCompress(void *inData)
2232
0
{
2233
0
    Job *psJob = static_cast<Job *>(inData);
2234
2235
0
    CPLAssert(psJob->pBuffer_);
2236
2237
0
    z_stream sStream;
2238
0
    memset(&sStream, 0, sizeof(sStream));
2239
0
    sStream.zalloc = nullptr;
2240
0
    sStream.zfree = nullptr;
2241
0
    sStream.opaque = nullptr;
2242
2243
0
    sStream.avail_in = static_cast<uInt>(psJob->pBuffer_->size());
2244
0
    sStream.next_in = reinterpret_cast<Bytef *>(&(*psJob->pBuffer_)[0]);
2245
2246
0
    int ret = deflateInit2(
2247
0
        &sStream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
2248
0
        (psJob->pParent_->nDeflateType_ == CPL_DEFLATE_TYPE_ZLIB) ? MAX_WBITS
2249
0
                                                                  : -MAX_WBITS,
2250
0
        8, Z_DEFAULT_STRATEGY);
2251
0
    CPLAssertAlwaysEval(ret == Z_OK);
2252
2253
0
    size_t nRealSize = 0;
2254
2255
0
    while (sStream.avail_in > 0)
2256
0
    {
2257
0
        psJob->sCompressedData_.resize(nRealSize + Z_BUFSIZE);
2258
0
        sStream.avail_out = static_cast<uInt>(Z_BUFSIZE);
2259
0
        sStream.next_out =
2260
0
            reinterpret_cast<Bytef *>(&psJob->sCompressedData_[0]) + nRealSize;
2261
2262
0
        const int zlibRet = deflate(&sStream, Z_NO_FLUSH);
2263
0
        CPLAssertAlwaysEval(zlibRet == Z_OK);
2264
2265
0
        nRealSize += static_cast<uInt>(Z_BUFSIZE) - sStream.avail_out;
2266
0
    }
2267
2268
0
    psJob->sCompressedData_.resize(nRealSize + Z_BUFSIZE);
2269
0
    sStream.avail_out = static_cast<uInt>(Z_BUFSIZE);
2270
0
    sStream.next_out =
2271
0
        reinterpret_cast<Bytef *>(&psJob->sCompressedData_[0]) + nRealSize;
2272
2273
0
    if (psJob->bFinish_)
2274
0
    {
2275
0
        const int zlibRet = deflate(&sStream, Z_FINISH);
2276
0
        CPLAssertAlwaysEval(zlibRet == Z_STREAM_END);
2277
0
    }
2278
0
    else
2279
0
    {
2280
        // Do a Z_SYNC_FLUSH and Z_FULL_FLUSH, so as to have two markers when
2281
        // independent as pigz 2.3.4 or later. The following 9 byte sequence
2282
        // will be found: 0x00 0x00 0xff 0xff 0x00 0x00 0x00 0xff 0xff
2283
        // Z_FULL_FLUSH only is sufficient, but it is not obvious if a
2284
        // 0x00 0x00 0xff 0xff marker in the codestream is just a SYNC_FLUSH (
2285
        // without dictionary reset) or a FULL_FLUSH (with dictionary reset)
2286
0
        {
2287
0
            const int zlibRet = deflate(&sStream, Z_SYNC_FLUSH);
2288
0
            CPLAssertAlwaysEval(zlibRet == Z_OK);
2289
0
        }
2290
2291
0
        {
2292
0
            const int zlibRet = deflate(&sStream, Z_FULL_FLUSH);
2293
0
            CPLAssertAlwaysEval(zlibRet == Z_OK);
2294
0
        }
2295
0
    }
2296
2297
0
    nRealSize += static_cast<uInt>(Z_BUFSIZE) - sStream.avail_out;
2298
0
    psJob->sCompressedData_.resize(nRealSize);
2299
2300
0
    deflateEnd(&sStream);
2301
2302
0
    {
2303
0
        std::lock_guard<std::mutex> oLock(psJob->pParent_->sMutex_);
2304
0
        psJob->pParent_->apoFinishedJobs_.push_back(psJob);
2305
0
    }
2306
0
}
2307
2308
/************************************************************************/
2309
/*                             CRCCompute()                             */
2310
/************************************************************************/
2311
2312
void VSIGZipWriteHandleMT::CRCCompute(void *inData)
2313
0
{
2314
0
    Job *psJob = static_cast<Job *>(inData);
2315
0
    psJob->bInCRCComputation_ = true;
2316
0
    psJob->nCRC_ =
2317
0
        crc32(0U, reinterpret_cast<const Bytef *>(psJob->pBuffer_->data()),
2318
0
              static_cast<uInt>(psJob->pBuffer_->size()));
2319
2320
0
    {
2321
0
        std::lock_guard<std::mutex> oLock(psJob->pParent_->sMutex_);
2322
0
        psJob->pParent_->apoCRCFinishedJobs_.push_back(psJob);
2323
0
    }
2324
0
}
2325
2326
/************************************************************************/
2327
/*                             DumpState()                              */
2328
/************************************************************************/
2329
2330
#ifdef DEBUG_VERBOSE
2331
void VSIGZipWriteHandleMT::DumpState()
2332
{
2333
    fprintf(stderr, "Finished jobs (expected = %d):\n",  // ok
2334
            nSeqNumberExpected_);
2335
    for (const auto *psJob : apoFinishedJobs_)
2336
    {
2337
        fprintf(stderr, "seq number=%d, bInCRCComputation = %d\n",  // ok
2338
                psJob->nSeqNumber_, psJob->bInCRCComputation_ ? 1 : 0);
2339
    }
2340
    fprintf(stderr, "Finished CRC jobs (expected = %d):\n",  // ok
2341
            nSeqNumberExpectedCRC_);
2342
    for (const auto *psJob : apoFinishedJobs_)
2343
    {
2344
        fprintf(stderr, "seq number=%d\n",  // ok
2345
                psJob->nSeqNumber_);
2346
    }
2347
    fprintf(stderr, "apoFreeJobs_.size() = %d\n",  // ok
2348
            static_cast<int>(apoFreeJobs_.size()));
2349
    fprintf(stderr, "aposBuffers_.size() = %d\n",  // ok
2350
            static_cast<int>(aposBuffers_.size()));
2351
}
2352
#endif
2353
2354
/************************************************************************/
2355
/*                        ProcessCompletedJobs()                        */
2356
/************************************************************************/
2357
2358
bool VSIGZipWriteHandleMT::ProcessCompletedJobs()
2359
0
{
2360
0
    std::lock_guard<std::mutex> oLock(sMutex_);
2361
0
    bool do_it_again = true;
2362
0
    while (do_it_again)
2363
0
    {
2364
0
        do_it_again = false;
2365
0
        if (nDeflateType_ == CPL_DEFLATE_TYPE_GZIP)
2366
0
        {
2367
0
            for (auto iter = apoFinishedJobs_.begin();
2368
0
                 iter != apoFinishedJobs_.end(); ++iter)
2369
0
            {
2370
0
                auto psJob = *iter;
2371
2372
0
                if (!psJob->bInCRCComputation_)
2373
0
                {
2374
0
                    psJob->bInCRCComputation_ = true;
2375
0
                    sMutex_.unlock();
2376
0
                    if (poPool_)
2377
0
                    {
2378
0
                        poPool_->SubmitJob(VSIGZipWriteHandleMT::CRCCompute,
2379
0
                                           psJob);
2380
0
                    }
2381
0
                    else
2382
0
                    {
2383
0
                        CRCCompute(psJob);
2384
0
                    }
2385
0
                    sMutex_.lock();
2386
0
                }
2387
0
            }
2388
0
        }
2389
2390
0
        for (auto iter = apoFinishedJobs_.begin();
2391
0
             iter != apoFinishedJobs_.end(); ++iter)
2392
0
        {
2393
0
            auto psJob = *iter;
2394
0
            if (psJob->nSeqNumber_ == nSeqNumberExpected_)
2395
0
            {
2396
0
                apoFinishedJobs_.erase(iter);
2397
2398
0
                const bool bIsSeqNumberExpectedZero =
2399
0
                    (nSeqNumberExpected_ == 0);
2400
0
                sMutex_.unlock();
2401
2402
0
                const size_t nToWrite = psJob->sCompressedData_.size();
2403
0
                if (panSOZIPIndex_ && !bIsSeqNumberExpectedZero &&
2404
0
                    !psJob->pBuffer_->empty())
2405
0
                {
2406
0
                    uint64_t nOffset = poBaseHandle_->Tell() - nStartOffset_;
2407
0
                    if (nSOZIPIndexEltSize_ == 8)
2408
0
                    {
2409
0
                        CPL_LSBPTR64(&nOffset);
2410
0
                        std::copy(reinterpret_cast<const uint8_t *>(&nOffset),
2411
0
                                  reinterpret_cast<const uint8_t *>(&nOffset) +
2412
0
                                      sizeof(nOffset),
2413
0
                                  std::back_inserter(*panSOZIPIndex_));
2414
0
                    }
2415
0
                    else
2416
0
                    {
2417
0
                        if (nOffset > std::numeric_limits<uint32_t>::max())
2418
0
                        {
2419
                            // shouldn't happen normally...
2420
0
                            CPLError(
2421
0
                                CE_Failure, CPLE_AppDefined,
2422
0
                                "Too big offset for SOZIP_OFFSET_SIZE = 4");
2423
0
                            panSOZIPIndex_->clear();
2424
0
                            panSOZIPIndex_ = nullptr;
2425
0
                        }
2426
0
                        else
2427
0
                        {
2428
0
                            uint32_t nOffset32 = static_cast<uint32_t>(nOffset);
2429
0
                            CPL_LSBPTR32(&nOffset32);
2430
0
                            std::copy(
2431
0
                                reinterpret_cast<const uint8_t *>(&nOffset32),
2432
0
                                reinterpret_cast<const uint8_t *>(&nOffset32) +
2433
0
                                    sizeof(nOffset32),
2434
0
                                std::back_inserter(*panSOZIPIndex_));
2435
0
                        }
2436
0
                    }
2437
0
                }
2438
0
                bool bError =
2439
0
                    poBaseHandle_->Write(psJob->sCompressedData_.data(),
2440
0
                                         nToWrite) < nToWrite;
2441
0
                sMutex_.lock();
2442
0
                nSeqNumberExpected_++;
2443
2444
0
                if (nDeflateType_ != CPL_DEFLATE_TYPE_GZIP)
2445
0
                {
2446
0
                    aposBuffers_.push_back(psJob->pBuffer_);
2447
0
                    psJob->pBuffer_ = nullptr;
2448
2449
0
                    apoFreeJobs_.push_back(psJob);
2450
0
                }
2451
2452
0
                if (bError)
2453
0
                {
2454
0
                    return false;
2455
0
                }
2456
2457
0
                do_it_again = true;
2458
0
                break;
2459
0
            }
2460
0
        }
2461
2462
0
        if (nDeflateType_ == CPL_DEFLATE_TYPE_GZIP)
2463
0
        {
2464
0
            for (auto iter = apoCRCFinishedJobs_.begin();
2465
0
                 iter != apoCRCFinishedJobs_.end(); ++iter)
2466
0
            {
2467
0
                auto psJob = *iter;
2468
0
                if (psJob->nSeqNumber_ == nSeqNumberExpectedCRC_)
2469
0
                {
2470
0
                    apoCRCFinishedJobs_.erase(iter);
2471
2472
0
                    nCRC_ = crc32_combine(
2473
0
                        nCRC_, psJob->nCRC_,
2474
0
                        static_cast<uLong>(psJob->pBuffer_->size()));
2475
2476
0
                    nSeqNumberExpectedCRC_++;
2477
2478
0
                    aposBuffers_.push_back(psJob->pBuffer_);
2479
0
                    psJob->pBuffer_ = nullptr;
2480
2481
0
                    apoFreeJobs_.push_back(psJob);
2482
0
                    do_it_again = true;
2483
0
                    break;
2484
0
                }
2485
0
            }
2486
0
        }
2487
0
    }
2488
0
    return true;
2489
0
}
2490
2491
/************************************************************************/
2492
/*                            GetJobObject()                            */
2493
/************************************************************************/
2494
2495
VSIGZipWriteHandleMT::Job *VSIGZipWriteHandleMT::GetJobObject()
2496
0
{
2497
0
    {
2498
0
        std::lock_guard<std::mutex> oLock(sMutex_);
2499
0
        if (!apoFreeJobs_.empty())
2500
0
        {
2501
0
            auto job = apoFreeJobs_.back();
2502
0
            apoFreeJobs_.pop_back();
2503
0
            job->sCompressedData_.clear();
2504
0
            job->bInCRCComputation_ = false;
2505
0
            return job;
2506
0
        }
2507
0
    }
2508
0
    return new Job();
2509
0
}
2510
2511
/************************************************************************/
2512
/*                               Write()                                */
2513
/************************************************************************/
2514
2515
size_t VSIGZipWriteHandleMT::Write(const void *const pBuffer,
2516
                                   size_t const nBytes)
2517
2518
0
{
2519
0
    if (bHasErrored_)
2520
0
        return 0;
2521
2522
0
    const char *pszBuffer = static_cast<const char *>(pBuffer);
2523
0
    size_t nBytesToWrite = nBytes;
2524
0
    while (nBytesToWrite > 0)
2525
0
    {
2526
0
        if (pCurBuffer_ == nullptr)
2527
0
        {
2528
0
            while (true)
2529
0
            {
2530
                // We store in a local variable instead of pCurBuffer_ directly
2531
                // to avoid Coverity Scan to be confused by the fact that we
2532
                // have used above pCurBuffer_ outside of the mutex. But what
2533
                // is protected by the mutex is aposBuffers_, not pCurBuffer_.
2534
0
                std::string *l_pCurBuffer = nullptr;
2535
0
                {
2536
0
                    std::lock_guard<std::mutex> oLock(sMutex_);
2537
0
                    if (!aposBuffers_.empty())
2538
0
                    {
2539
0
                        l_pCurBuffer = aposBuffers_.back();
2540
0
                        aposBuffers_.pop_back();
2541
0
                    }
2542
0
                }
2543
0
                pCurBuffer_ = l_pCurBuffer;
2544
0
                if (pCurBuffer_)
2545
0
                    break;
2546
2547
0
                if (poPool_)
2548
0
                {
2549
0
                    poPool_->WaitEvent();
2550
0
                }
2551
0
                if (!ProcessCompletedJobs())
2552
0
                {
2553
0
                    bHasErrored_ = true;
2554
0
                    return 0;
2555
0
                }
2556
0
            }
2557
0
            pCurBuffer_->clear();
2558
0
        }
2559
0
        size_t nConsumed =
2560
0
            std::min(nBytesToWrite, nChunkSize_ - pCurBuffer_->size());
2561
0
        pCurBuffer_->append(pszBuffer, nConsumed);
2562
0
        nCurOffset_ += nConsumed;
2563
0
        pszBuffer += nConsumed;
2564
0
        nBytesToWrite -= nConsumed;
2565
0
        if (pCurBuffer_->size() == nChunkSize_)
2566
0
        {
2567
0
            if (poPool_ == nullptr)
2568
0
            {
2569
0
                poPool_.reset(new CPLWorkerThreadPool());
2570
0
                if (!poPool_->Setup(nThreads_, nullptr, nullptr, false))
2571
0
                {
2572
0
                    bHasErrored_ = true;
2573
0
                    poPool_.reset();
2574
0
                    return 0;
2575
0
                }
2576
0
            }
2577
2578
0
            auto psJob = GetJobObject();
2579
0
            psJob->pParent_ = this;
2580
0
            psJob->pBuffer_ = pCurBuffer_;
2581
0
            psJob->nSeqNumber_ = nSeqNumberGenerated_;
2582
0
            nSeqNumberGenerated_++;
2583
0
            pCurBuffer_ = nullptr;
2584
0
            poPool_->SubmitJob(VSIGZipWriteHandleMT::DeflateCompress, psJob);
2585
0
        }
2586
0
    }
2587
2588
0
    return nBytes;
2589
0
}
2590
2591
/************************************************************************/
2592
/*                               Flush()                                */
2593
/************************************************************************/
2594
2595
int VSIGZipWriteHandleMT::Flush()
2596
2597
0
{
2598
    // we *could* do something for this but for now we choose not to.
2599
2600
0
    return 0;
2601
0
}
2602
2603
/************************************************************************/
2604
/*                                Seek()                                */
2605
/************************************************************************/
2606
2607
int VSIGZipWriteHandleMT::Seek(vsi_l_offset nOffset, int nWhence)
2608
2609
0
{
2610
0
    if (nOffset == 0 && (nWhence == SEEK_END || nWhence == SEEK_CUR))
2611
0
        return 0;
2612
0
    else if (nWhence == SEEK_SET && nOffset == nCurOffset_)
2613
0
        return 0;
2614
0
    else
2615
0
    {
2616
0
        CPLError(CE_Failure, CPLE_NotSupported,
2617
0
                 "Seeking on writable compressed data streams not supported.");
2618
2619
0
        return -1;
2620
0
    }
2621
0
}
2622
2623
/************************************************************************/
2624
/*                                Tell()                                */
2625
/************************************************************************/
2626
2627
vsi_l_offset VSIGZipWriteHandleMT::Tell()
2628
2629
0
{
2630
0
    return nCurOffset_;
2631
0
}
2632
2633
/************************************************************************/
2634
/* ==================================================================== */
2635
/*                       VSIGZipWriteHandle                             */
2636
/* ==================================================================== */
2637
/************************************************************************/
2638
2639
class VSIGZipWriteHandle final : public VSIVirtualHandle
2640
{
2641
    CPL_DISALLOW_COPY_ASSIGN(VSIGZipWriteHandle)
2642
2643
    VSIVirtualHandle *m_poBaseHandle = nullptr;
2644
    z_stream sStream;
2645
    Byte *pabyInBuf = nullptr;
2646
    Byte *pabyOutBuf = nullptr;
2647
    bool bCompressActive = false;
2648
    vsi_l_offset nCurOffset = 0;
2649
    uLong nCRC = 0;
2650
    int nDeflateType = CPL_DEFLATE_TYPE_GZIP;
2651
    bool bAutoCloseBaseHandle = false;
2652
2653
  public:
2654
    VSIGZipWriteHandle(VSIVirtualHandle *poBaseHandle, int nDeflateType,
2655
                       bool bAutoCloseBaseHandleIn);
2656
2657
    ~VSIGZipWriteHandle() override;
2658
2659
    int Seek(vsi_l_offset nOffset, int nWhence) override;
2660
    vsi_l_offset Tell() override;
2661
    size_t Read(void *pBuffer, size_t nBytes) override;
2662
    size_t Write(const void *pBuffer, size_t nBytes) override;
2663
2664
    int Eof() override
2665
0
    {
2666
0
        return 0;
2667
0
    }
2668
2669
    int Error() override
2670
0
    {
2671
0
        return 0;
2672
0
    }
2673
2674
    void ClearErr() override
2675
0
    {
2676
0
    }
2677
2678
    int Flush() override;
2679
    int Close() override;
2680
};
2681
2682
/************************************************************************/
2683
/*                         VSIGZipWriteHandle()                         */
2684
/************************************************************************/
2685
2686
VSIGZipWriteHandle::VSIGZipWriteHandle(VSIVirtualHandle *poBaseHandle,
2687
                                       int nDeflateTypeIn,
2688
                                       bool bAutoCloseBaseHandleIn)
2689
0
    : m_poBaseHandle(poBaseHandle), sStream(),
2690
0
      pabyInBuf(static_cast<Byte *>(CPLMalloc(Z_BUFSIZE))),
2691
0
      pabyOutBuf(static_cast<Byte *>(CPLMalloc(Z_BUFSIZE))),
2692
0
      nCRC(crc32(0L, nullptr, 0)), nDeflateType(nDeflateTypeIn),
2693
0
      bAutoCloseBaseHandle(bAutoCloseBaseHandleIn)
2694
0
{
2695
0
    sStream.zalloc = nullptr;
2696
0
    sStream.zfree = nullptr;
2697
0
    sStream.opaque = nullptr;
2698
0
    sStream.next_in = nullptr;
2699
0
    sStream.next_out = nullptr;
2700
0
    sStream.avail_in = sStream.avail_out = 0;
2701
2702
0
    sStream.next_in = pabyInBuf;
2703
2704
0
    if (deflateInit2(&sStream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
2705
0
                     (nDeflateType == CPL_DEFLATE_TYPE_ZLIB) ? MAX_WBITS
2706
0
                                                             : -MAX_WBITS,
2707
0
                     8, Z_DEFAULT_STRATEGY) != Z_OK)
2708
0
    {
2709
0
        bCompressActive = false;
2710
0
    }
2711
0
    else
2712
0
    {
2713
0
        if (nDeflateType == CPL_DEFLATE_TYPE_GZIP)
2714
0
        {
2715
0
            char header[11] = {};
2716
2717
            // Write a very simple .gz header:
2718
0
            snprintf(header, sizeof(header), "%c%c%c%c%c%c%c%c%c%c",
2719
0
                     gz_magic[0], gz_magic[1], Z_DEFLATED, 0 /*flags*/, 0, 0, 0,
2720
0
                     0 /*time*/, 0 /*xflags*/, 0x03);
2721
0
            m_poBaseHandle->Write(header, 10);
2722
0
        }
2723
2724
0
        bCompressActive = true;
2725
0
    }
2726
0
}
2727
2728
/************************************************************************/
2729
/*                       VSICreateGZipWritable()                        */
2730
/************************************************************************/
2731
2732
VSIVirtualHandle *VSICreateGZipWritable(VSIVirtualHandle *poBaseHandle,
2733
                                        int nDeflateTypeIn,
2734
                                        int bAutoCloseBaseHandle)
2735
0
{
2736
0
    return VSICreateGZipWritable(poBaseHandle, nDeflateTypeIn,
2737
0
                                 CPL_TO_BOOL(bAutoCloseBaseHandle), 0, 0, 0,
2738
0
                                 nullptr);
2739
0
}
2740
2741
VSIVirtualHandle *VSICreateGZipWritable(VSIVirtualHandle *poBaseHandle,
2742
                                        int nDeflateTypeIn,
2743
                                        bool bAutoCloseBaseHandle, int nThreads,
2744
                                        size_t nChunkSize,
2745
                                        size_t nSOZIPIndexEltSize,
2746
                                        std::vector<uint8_t> *panSOZIPIndex)
2747
0
{
2748
0
    nThreads = nThreads > 0
2749
0
                   ? nThreads
2750
0
                   : GDALGetNumThreads(/* nMaxVal = */ 128,
2751
0
                                       /* bDefaultToAllCPUs = */ false);
2752
0
    if (nThreads > 1 || nChunkSize > 0)
2753
0
    {
2754
        // coverity[tainted_data]
2755
0
        return new VSIGZipWriteHandleMT(
2756
0
            poBaseHandle, nDeflateTypeIn, bAutoCloseBaseHandle, nThreads,
2757
0
            nChunkSize, nSOZIPIndexEltSize, panSOZIPIndex);
2758
0
    }
2759
0
    return new VSIGZipWriteHandle(poBaseHandle, nDeflateTypeIn,
2760
0
                                  bAutoCloseBaseHandle);
2761
0
}
2762
2763
/************************************************************************/
2764
/*                        ~VSIGZipWriteHandle()                         */
2765
/************************************************************************/
2766
2767
VSIGZipWriteHandle::~VSIGZipWriteHandle()
2768
2769
0
{
2770
0
    if (bCompressActive)
2771
0
        VSIGZipWriteHandle::Close();
2772
2773
0
    CPLFree(pabyInBuf);
2774
0
    CPLFree(pabyOutBuf);
2775
0
}
2776
2777
/************************************************************************/
2778
/*                               Close()                                */
2779
/************************************************************************/
2780
2781
int VSIGZipWriteHandle::Close()
2782
2783
0
{
2784
0
    int nRet = 0;
2785
0
    if (bCompressActive)
2786
0
    {
2787
0
        sStream.next_out = pabyOutBuf;
2788
0
        sStream.avail_out = static_cast<uInt>(Z_BUFSIZE);
2789
2790
0
        const int zlibRet = deflate(&sStream, Z_FINISH);
2791
0
        CPLAssertAlwaysEval(zlibRet == Z_STREAM_END);
2792
2793
0
        const size_t nOutBytes =
2794
0
            static_cast<uInt>(Z_BUFSIZE) - sStream.avail_out;
2795
2796
0
        deflateEnd(&sStream);
2797
2798
0
        if (m_poBaseHandle->Write(pabyOutBuf, nOutBytes) < nOutBytes)
2799
0
        {
2800
0
            nRet = -1;
2801
0
        }
2802
2803
0
        if (nRet == 0 && nDeflateType == CPL_DEFLATE_TYPE_GZIP)
2804
0
        {
2805
0
            const GUInt32 anTrailer[2] = {
2806
0
                CPL_LSBWORD32(static_cast<GUInt32>(nCRC)),
2807
0
                CPL_LSBWORD32(static_cast<GUInt32>(nCurOffset))};
2808
2809
0
            if (m_poBaseHandle->Write(anTrailer, 8) < 8)
2810
0
            {
2811
0
                nRet = -1;
2812
0
            }
2813
0
        }
2814
2815
0
        if (bAutoCloseBaseHandle)
2816
0
        {
2817
0
            if (nRet == 0)
2818
0
                nRet = m_poBaseHandle->Close();
2819
2820
0
            delete m_poBaseHandle;
2821
0
        }
2822
2823
0
        bCompressActive = false;
2824
0
    }
2825
2826
0
    return nRet;
2827
0
}
2828
2829
/************************************************************************/
2830
/*                                Read()                                */
2831
/************************************************************************/
2832
2833
size_t VSIGZipWriteHandle::Read(void * /* pBuffer */, size_t /* nBytes */)
2834
0
{
2835
0
    CPLError(CE_Failure, CPLE_NotSupported,
2836
0
             "VSIFReadL is not supported on GZip write streams");
2837
0
    return 0;
2838
0
}
2839
2840
/************************************************************************/
2841
/*                               Write()                                */
2842
/************************************************************************/
2843
2844
size_t VSIGZipWriteHandle::Write(const void *const pBuffer,
2845
                                 size_t const nBytesToWrite)
2846
2847
0
{
2848
0
    {
2849
0
        size_t nOffset = 0;
2850
0
        while (nOffset < nBytesToWrite)
2851
0
        {
2852
0
            uInt nChunk = static_cast<uInt>(std::min(
2853
0
                static_cast<size_t>(UINT_MAX), nBytesToWrite - nOffset));
2854
0
            nCRC =
2855
0
                crc32(nCRC, reinterpret_cast<const Bytef *>(pBuffer) + nOffset,
2856
0
                      nChunk);
2857
0
            nOffset += nChunk;
2858
0
        }
2859
0
    }
2860
2861
0
    if (!bCompressActive)
2862
0
        return 0;
2863
2864
0
    size_t nNextByte = 0;
2865
0
    while (nNextByte < nBytesToWrite)
2866
0
    {
2867
0
        sStream.next_out = pabyOutBuf;
2868
0
        sStream.avail_out = static_cast<uInt>(Z_BUFSIZE);
2869
2870
0
        if (sStream.avail_in > 0)
2871
0
            memmove(pabyInBuf, sStream.next_in, sStream.avail_in);
2872
2873
0
        const uInt nNewBytesToWrite = static_cast<uInt>(
2874
0
            std::min(static_cast<size_t>(Z_BUFSIZE - sStream.avail_in),
2875
0
                     nBytesToWrite - nNextByte));
2876
0
        memcpy(pabyInBuf + sStream.avail_in,
2877
0
               reinterpret_cast<const Byte *>(pBuffer) + nNextByte,
2878
0
               nNewBytesToWrite);
2879
2880
0
        sStream.next_in = pabyInBuf;
2881
0
        sStream.avail_in += nNewBytesToWrite;
2882
2883
0
        const int zlibRet = deflate(&sStream, Z_NO_FLUSH);
2884
0
        CPLAssertAlwaysEval(zlibRet == Z_OK);
2885
2886
0
        const size_t nOutBytes =
2887
0
            static_cast<uInt>(Z_BUFSIZE) - sStream.avail_out;
2888
2889
0
        if (nOutBytes > 0)
2890
0
        {
2891
0
            if (m_poBaseHandle->Write(pabyOutBuf, nOutBytes) < nOutBytes)
2892
0
                return 0;
2893
0
        }
2894
2895
0
        nNextByte += nNewBytesToWrite;
2896
0
        nCurOffset += nNewBytesToWrite;
2897
0
    }
2898
2899
0
    return nBytesToWrite;
2900
0
}
2901
2902
/************************************************************************/
2903
/*                               Flush()                                */
2904
/************************************************************************/
2905
2906
int VSIGZipWriteHandle::Flush()
2907
2908
0
{
2909
    // we *could* do something for this but for now we choose not to.
2910
2911
0
    return 0;
2912
0
}
2913
2914
/************************************************************************/
2915
/*                                Seek()                                */
2916
/************************************************************************/
2917
2918
int VSIGZipWriteHandle::Seek(vsi_l_offset nOffset, int nWhence)
2919
2920
0
{
2921
0
    if (nOffset == 0 && (nWhence == SEEK_END || nWhence == SEEK_CUR))
2922
0
        return 0;
2923
0
    else if (nWhence == SEEK_SET && nOffset == nCurOffset)
2924
0
        return 0;
2925
0
    else
2926
0
    {
2927
0
        CPLError(CE_Failure, CPLE_NotSupported,
2928
0
                 "Seeking on writable compressed data streams not supported.");
2929
2930
0
        return -1;
2931
0
    }
2932
0
}
2933
2934
/************************************************************************/
2935
/*                                Tell()                                */
2936
/************************************************************************/
2937
2938
vsi_l_offset VSIGZipWriteHandle::Tell()
2939
2940
0
{
2941
0
    return nCurOffset;
2942
0
}
2943
2944
/************************************************************************/
2945
/* ==================================================================== */
2946
/*                       VSIGZipFilesystemHandler                       */
2947
/* ==================================================================== */
2948
/************************************************************************/
2949
2950
/************************************************************************/
2951
/*                     ~VSIGZipFilesystemHandler()                      */
2952
/************************************************************************/
2953
2954
VSIGZipFilesystemHandler::~VSIGZipFilesystemHandler()
2955
0
{
2956
0
    if (poHandleLastGZipFile)
2957
0
    {
2958
0
        poHandleLastGZipFile->UnsetCanSaveInfo();
2959
0
        poHandleLastGZipFile.reset();
2960
0
    }
2961
0
}
2962
2963
/************************************************************************/
2964
/*                              SaveInfo()                              */
2965
/************************************************************************/
2966
2967
void VSIGZipFilesystemHandler::SaveInfo(VSIGZipHandle *poHandle)
2968
0
{
2969
0
    std::unique_lock oLock(oMutex);
2970
0
    SaveInfo_unlocked(poHandle);
2971
0
}
2972
2973
void VSIGZipFilesystemHandler::SaveInfo_unlocked(VSIGZipHandle *poHandle)
2974
0
{
2975
0
    if (m_bInSaveInfo)
2976
0
        return;
2977
0
    m_bInSaveInfo = true;
2978
2979
0
    CPLAssert(poHandle != poHandleLastGZipFile.get());
2980
0
    CPLAssert(poHandle->GetBaseFileName() != nullptr);
2981
2982
0
    if (poHandleLastGZipFile == nullptr ||
2983
0
        strcmp(poHandleLastGZipFile->GetBaseFileName(),
2984
0
               poHandle->GetBaseFileName()) != 0 ||
2985
0
        poHandle->GetLastReadOffset() >
2986
0
            poHandleLastGZipFile->GetLastReadOffset())
2987
0
    {
2988
0
        std::unique_ptr<VSIGZipHandle> poTmp;
2989
0
        std::swap(poTmp, poHandleLastGZipFile);
2990
0
        if (poTmp)
2991
0
        {
2992
0
            poTmp->UnsetCanSaveInfo();
2993
0
            poTmp.reset();
2994
0
        }
2995
0
        poHandleLastGZipFile.reset(poHandle->Duplicate());
2996
0
        if (poHandleLastGZipFile)
2997
0
            poHandleLastGZipFile->CloseBaseHandle();
2998
0
    }
2999
0
    m_bInSaveInfo = false;
3000
0
}
3001
3002
/************************************************************************/
3003
/*                                Open()                                */
3004
/************************************************************************/
3005
3006
VSIVirtualHandleUniquePtr
3007
VSIGZipFilesystemHandler::Open(const char *pszFilename, const char *pszAccess,
3008
                               bool /* bSetError */,
3009
                               CSLConstList /* papszOptions */)
3010
2.03k
{
3011
2.03k
    if (!STARTS_WITH_CI(pszFilename, "/vsigzip/"))
3012
8
        return nullptr;
3013
3014
2.02k
    VSIFilesystemHandler *poFSHandler =
3015
2.02k
        VSIFileManager::GetHandler(pszFilename + strlen("/vsigzip/"));
3016
3017
    /* -------------------------------------------------------------------- */
3018
    /*      Is this an attempt to write a new file without update (w+)      */
3019
    /*      access?  If so, create a writable handle for the underlying     */
3020
    /*      filename.                                                       */
3021
    /* -------------------------------------------------------------------- */
3022
2.02k
    if (strchr(pszAccess, 'w') != nullptr)
3023
0
    {
3024
0
        if (strchr(pszAccess, '+') != nullptr)
3025
0
        {
3026
0
            CPLError(CE_Failure, CPLE_AppDefined,
3027
0
                     "Write+update (w+) not supported for /vsigzip, "
3028
0
                     "only read-only or write-only.");
3029
0
            return nullptr;
3030
0
        }
3031
3032
0
        auto poVirtualHandle =
3033
0
            poFSHandler->Open(pszFilename + strlen("/vsigzip/"), "wb");
3034
3035
0
        if (poVirtualHandle == nullptr)
3036
0
            return nullptr;
3037
3038
0
        return VSIVirtualHandleUniquePtr(
3039
0
            VSICreateGZipWritable(poVirtualHandle.release(),
3040
0
                                  strchr(pszAccess, 'z') != nullptr, TRUE));
3041
0
    }
3042
3043
    /* -------------------------------------------------------------------- */
3044
    /*      Otherwise we are in the read access case.                       */
3045
    /* -------------------------------------------------------------------- */
3046
3047
2.02k
    VSIGZipHandle *poGZIPHandle = OpenGZipReadOnly(pszFilename, pszAccess);
3048
2.02k
    if (poGZIPHandle)
3049
        // Wrap the VSIGZipHandle inside a buffered reader that will
3050
        // improve dramatically performance when doing small backward
3051
        // seeks.
3052
0
        return VSIVirtualHandleUniquePtr(
3053
0
            VSICreateBufferedReaderHandle(poGZIPHandle));
3054
3055
2.02k
    return nullptr;
3056
2.02k
}
3057
3058
/************************************************************************/
3059
/*                      SupportsSequentialWrite()                       */
3060
/************************************************************************/
3061
3062
bool VSIGZipFilesystemHandler::SupportsSequentialWrite(const char *pszPath,
3063
                                                       bool bAllowLocalTempFile)
3064
0
{
3065
0
    if (!STARTS_WITH_CI(pszPath, "/vsigzip/"))
3066
0
        return false;
3067
0
    const char *pszBaseFileName = pszPath + strlen("/vsigzip/");
3068
0
    VSIFilesystemHandler *poFSHandler =
3069
0
        VSIFileManager::GetHandler(pszBaseFileName);
3070
0
    return poFSHandler->SupportsSequentialWrite(pszPath, bAllowLocalTempFile);
3071
0
}
3072
3073
/************************************************************************/
3074
/*                          OpenGZipReadOnly()                          */
3075
/************************************************************************/
3076
3077
VSIGZipHandle *
3078
VSIGZipFilesystemHandler::OpenGZipReadOnly(const char *pszFilename,
3079
                                           const char *pszAccess)
3080
2.20k
{
3081
2.20k
    VSIFilesystemHandler *poFSHandler =
3082
2.20k
        VSIFileManager::GetHandler(pszFilename + strlen("/vsigzip/"));
3083
3084
2.20k
    std::unique_lock oLock(oMutex);
3085
3086
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
3087
    // Disable caching in fuzzing mode as the /vsigzip/ file is likely to
3088
    // change very often
3089
    // TODO: filename-based logic isn't enough. We should probably check
3090
    // timestamp and/or file size.
3091
    if (poHandleLastGZipFile != nullptr &&
3092
        strcmp(pszFilename + strlen("/vsigzip/"),
3093
               poHandleLastGZipFile->GetBaseFileName()) == 0 &&
3094
        EQUAL(pszAccess, "rb"))
3095
    {
3096
        VSIGZipHandle *poHandle = poHandleLastGZipFile->Duplicate();
3097
        if (poHandle)
3098
            return poHandle;
3099
    }
3100
#else
3101
2.20k
    CPL_IGNORE_RET_VAL(pszAccess);
3102
2.20k
#endif
3103
3104
2.20k
    VSIVirtualHandleUniquePtr poVirtualHandle(
3105
2.20k
        poFSHandler->Open(pszFilename + strlen("/vsigzip/"), "rb"));
3106
3107
2.20k
    if (poVirtualHandle == nullptr)
3108
1.37k
        return nullptr;
3109
3110
828
    unsigned char signature[2] = {'\0', '\0'};
3111
828
    if (poVirtualHandle->Read(signature, 2) != 2 ||
3112
0
        signature[0] != gz_magic[0] || signature[1] != gz_magic[1])
3113
828
    {
3114
828
        return nullptr;
3115
828
    }
3116
3117
0
    if (poHandleLastGZipFile)
3118
0
    {
3119
0
        poHandleLastGZipFile->UnsetCanSaveInfo();
3120
0
        poHandleLastGZipFile.reset();
3121
0
    }
3122
3123
0
    auto poHandle = std::make_unique<VSIGZipHandle>(
3124
0
        std::move(poVirtualHandle), pszFilename + strlen("/vsigzip/"));
3125
0
    if (!(poHandle->IsInitOK()))
3126
0
    {
3127
0
        return nullptr;
3128
0
    }
3129
0
    return poHandle.release();
3130
0
}
3131
3132
/************************************************************************/
3133
/*                                Stat()                                */
3134
/************************************************************************/
3135
3136
int VSIGZipFilesystemHandler::Stat(const char *pszFilename,
3137
                                   VSIStatBufL *pStatBuf, int nFlags)
3138
3.50k
{
3139
3.50k
    if (!STARTS_WITH_CI(pszFilename, "/vsigzip/"))
3140
41
        return -1;
3141
3142
3.46k
    std::unique_lock oLock(oMutex);
3143
3144
3.46k
    memset(pStatBuf, 0, sizeof(VSIStatBufL));
3145
3146
3.46k
    if (poHandleLastGZipFile != nullptr &&
3147
0
        strcmp(pszFilename + strlen("/vsigzip/"),
3148
0
               poHandleLastGZipFile->GetBaseFileName()) == 0)
3149
0
    {
3150
0
        if (poHandleLastGZipFile->GetUncompressedSize() != 0)
3151
0
        {
3152
0
            pStatBuf->st_mode = S_IFREG;
3153
0
            pStatBuf->st_size = poHandleLastGZipFile->GetUncompressedSize();
3154
0
            return 0;
3155
0
        }
3156
0
    }
3157
3158
    // Begin by doing a stat on the real file.
3159
3.46k
    int ret = VSIStatExL(pszFilename + strlen("/vsigzip/"), pStatBuf, nFlags);
3160
3161
3.46k
    if (ret == 0 && (nFlags & VSI_STAT_SIZE_FLAG))
3162
182
    {
3163
182
        CPLString osCacheFilename(pszFilename + strlen("/vsigzip/"));
3164
182
        osCacheFilename += ".properties";
3165
3166
        // Can we save a bit of seeking by using a .properties file?
3167
182
        VSILFILE *fpCacheLength = VSIFOpenL(osCacheFilename.c_str(), "rb");
3168
182
        if (fpCacheLength)
3169
94
        {
3170
94
            const char *pszLine;
3171
94
            GUIntBig nCompressedSize = 0;
3172
94
            GUIntBig nUncompressedSize = 0;
3173
94
            while ((pszLine = CPLReadLineL(fpCacheLength)) != nullptr)
3174
0
            {
3175
0
                if (STARTS_WITH_CI(pszLine, "compressed_size="))
3176
0
                {
3177
0
                    const char *pszBuffer =
3178
0
                        pszLine + strlen("compressed_size=");
3179
0
                    nCompressedSize = CPLScanUIntBig(
3180
0
                        pszBuffer, static_cast<int>(strlen(pszBuffer)));
3181
0
                }
3182
0
                else if (STARTS_WITH_CI(pszLine, "uncompressed_size="))
3183
0
                {
3184
0
                    const char *pszBuffer =
3185
0
                        pszLine + strlen("uncompressed_size=");
3186
0
                    nUncompressedSize = CPLScanUIntBig(
3187
0
                        pszBuffer, static_cast<int>(strlen(pszBuffer)));
3188
0
                }
3189
0
            }
3190
3191
94
            CPL_IGNORE_RET_VAL(VSIFCloseL(fpCacheLength));
3192
3193
94
            if (nCompressedSize == static_cast<GUIntBig>(pStatBuf->st_size))
3194
94
            {
3195
                // Patch with the uncompressed size.
3196
94
                pStatBuf->st_size = nUncompressedSize;
3197
3198
94
                VSIGZipHandle *poHandle =
3199
94
                    VSIGZipFilesystemHandler::OpenGZipReadOnly(pszFilename,
3200
94
                                                               "rb");
3201
94
                if (poHandle)
3202
0
                {
3203
0
                    poHandle->SetUncompressedSize(nUncompressedSize);
3204
0
                    SaveInfo_unlocked(poHandle);
3205
0
                    delete poHandle;
3206
0
                }
3207
3208
94
                return ret;
3209
94
            }
3210
94
        }
3211
3212
        // No, then seek at the end of the data (slow).
3213
88
        VSIGZipHandle *poHandle =
3214
88
            VSIGZipFilesystemHandler::OpenGZipReadOnly(pszFilename, "rb");
3215
88
        if (poHandle)
3216
0
        {
3217
0
            poHandle->Seek(0, SEEK_END);
3218
0
            const GUIntBig uncompressed_size =
3219
0
                static_cast<GUIntBig>(poHandle->Tell());
3220
0
            poHandle->Seek(0, SEEK_SET);
3221
3222
            // Patch with the uncompressed size.
3223
0
            pStatBuf->st_size = uncompressed_size;
3224
3225
0
            delete poHandle;
3226
0
        }
3227
88
        else
3228
88
        {
3229
88
            ret = -1;
3230
88
        }
3231
88
    }
3232
3233
3.37k
    return ret;
3234
3.46k
}
3235
3236
/************************************************************************/
3237
/*                             ReadDirEx()                              */
3238
/************************************************************************/
3239
3240
char **VSIGZipFilesystemHandler::ReadDirEx(const char * /*pszDirname*/,
3241
                                           int /* nMaxFiles */)
3242
0
{
3243
0
    return nullptr;
3244
0
}
3245
3246
/************************************************************************/
3247
/*                             GetOptions()                             */
3248
/************************************************************************/
3249
3250
const char *VSIGZipFilesystemHandler::GetOptions()
3251
0
{
3252
0
    return "<Options>"
3253
0
           "  <Option name='GDAL_NUM_THREADS' type='string' "
3254
0
           "description='Number of threads for compression. Either a integer "
3255
0
           "or ALL_CPUS'/>"
3256
0
           "  <Option name='CPL_VSIL_DEFLATE_CHUNK_SIZE' type='string' "
3257
0
           "description='Chunk of uncompressed data for parallelization. "
3258
0
           "Use K(ilobytes) or M(egabytes) suffix' default='1M'/>"
3259
0
           "</Options>";
3260
0
}
3261
3262
//! @endcond
3263
/************************************************************************/
3264
/*                     VSIInstallGZipFileHandler()                      */
3265
/************************************************************************/
3266
3267
/*!
3268
 \brief Install GZip file system handler.
3269
3270
 A special file handler is installed that allows reading on-the-fly and
3271
 writing in GZip (.gz) files.
3272
3273
 All portions of the file system underneath the base
3274
 path "/vsigzip/" will be handled by this driver.
3275
3276
 \verbatim embed:rst
3277
 See :ref:`/vsigzip/ documentation <vsigzip>`
3278
 \endverbatim
3279
3280
 */
3281
3282
void VSIInstallGZipFileHandler()
3283
3
{
3284
3
    VSIFileManager::InstallHandler(
3285
3
        "/vsigzip/", std::make_shared<VSIGZipFilesystemHandler>());
3286
3
}
3287
3288
//! @cond Doxygen_Suppress
3289
3290
/************************************************************************/
3291
/* ==================================================================== */
3292
/*                         VSIZipEntryFileOffset                        */
3293
/* ==================================================================== */
3294
/************************************************************************/
3295
3296
class VSIZipEntryFileOffset final : public VSIArchiveEntryFileOffset
3297
{
3298
  public:
3299
    unz_file_pos m_file_pos;
3300
3301
0
    explicit VSIZipEntryFileOffset(unz_file_pos file_pos) : m_file_pos()
3302
0
    {
3303
0
        m_file_pos.pos_in_zip_directory = file_pos.pos_in_zip_directory;
3304
0
        m_file_pos.num_of_file = file_pos.num_of_file;
3305
0
    }
3306
3307
    ~VSIZipEntryFileOffset() override;
3308
};
3309
3310
0
VSIZipEntryFileOffset::~VSIZipEntryFileOffset() = default;
3311
3312
/************************************************************************/
3313
/* ==================================================================== */
3314
/*                             VSIZipReader                             */
3315
/* ==================================================================== */
3316
/************************************************************************/
3317
3318
class VSIZipReader final : public VSIArchiveReader
3319
{
3320
    CPL_DISALLOW_COPY_ASSIGN(VSIZipReader)
3321
3322
  private:
3323
    unzFile unzF = nullptr;
3324
    unz_file_pos file_pos;
3325
    GUIntBig nNextFileSize = 0;
3326
    CPLString osNextFileName{};
3327
    GIntBig nModifiedTime = 0;
3328
3329
    bool SetInfo();
3330
3331
  public:
3332
    explicit VSIZipReader(const char *pszZipFileName);
3333
    ~VSIZipReader() override;
3334
3335
    int IsValid()
3336
1.33k
    {
3337
1.33k
        return unzF != nullptr;
3338
1.33k
    }
3339
3340
    unzFile GetUnzFileHandle()
3341
0
    {
3342
0
        return unzF;
3343
0
    }
3344
3345
    int GotoFirstFile() override;
3346
    int GotoNextFile() override;
3347
3348
    VSIArchiveEntryFileOffset *GetFileOffset() override
3349
0
    {
3350
0
        return new VSIZipEntryFileOffset(file_pos);
3351
0
    }
3352
3353
    GUIntBig GetFileSize() override
3354
0
    {
3355
0
        return nNextFileSize;
3356
0
    }
3357
3358
    CPLString GetFileName() override
3359
0
    {
3360
0
        return osNextFileName;
3361
0
    }
3362
3363
    GIntBig GetModifiedTime() override
3364
0
    {
3365
0
        return nModifiedTime;
3366
0
    }
3367
3368
    int GotoFileOffset(VSIArchiveEntryFileOffset *pOffset) override;
3369
};
3370
3371
/************************************************************************/
3372
/*                            VSIZipReader()                            */
3373
/************************************************************************/
3374
3375
VSIZipReader::VSIZipReader(const char *pszZipFileName)
3376
1.33k
    : unzF(cpl_unzOpen(pszZipFileName)), file_pos()
3377
1.33k
{
3378
1.33k
    file_pos.pos_in_zip_directory = 0;
3379
1.33k
    file_pos.num_of_file = 0;
3380
1.33k
}
3381
3382
/************************************************************************/
3383
/*                           ~VSIZipReader()                            */
3384
/************************************************************************/
3385
3386
VSIZipReader::~VSIZipReader()
3387
1.33k
{
3388
1.33k
    if (unzF)
3389
0
        cpl_unzClose(unzF);
3390
1.33k
}
3391
3392
/************************************************************************/
3393
/*                              SetInfo()                               */
3394
/************************************************************************/
3395
3396
bool VSIZipReader::SetInfo()
3397
0
{
3398
0
    char fileName[8193] = {};
3399
0
    unz_file_info file_info;
3400
0
    if (UNZ_OK != cpl_unzGetCurrentFileInfo(unzF, &file_info, fileName,
3401
0
                                            sizeof(fileName) - 1, nullptr, 0,
3402
0
                                            nullptr, 0))
3403
0
    {
3404
0
        CPLError(CE_Failure, CPLE_FileIO, "cpl_unzGetCurrentFileInfo failed");
3405
0
        cpl_unzGetFilePos(unzF, &file_pos);
3406
0
        return false;
3407
0
    }
3408
0
    fileName[sizeof(fileName) - 1] = '\0';
3409
0
    osNextFileName = fileName;
3410
0
    nNextFileSize = file_info.uncompressed_size;
3411
0
    struct tm brokendowntime;
3412
0
    brokendowntime.tm_sec = file_info.tmu_date.tm_sec;
3413
0
    brokendowntime.tm_min = file_info.tmu_date.tm_min;
3414
0
    brokendowntime.tm_hour = file_info.tmu_date.tm_hour;
3415
0
    brokendowntime.tm_mday = file_info.tmu_date.tm_mday;
3416
0
    brokendowntime.tm_mon = file_info.tmu_date.tm_mon;
3417
    // The minizip conventions differs from the Unix one.
3418
0
    brokendowntime.tm_year = file_info.tmu_date.tm_year - 1900;
3419
0
    nModifiedTime = CPLYMDHMSToUnixTime(&brokendowntime);
3420
3421
0
    cpl_unzGetFilePos(unzF, &file_pos);
3422
0
    return true;
3423
0
}
3424
3425
/************************************************************************/
3426
/*                            GotoNextFile()                            */
3427
/************************************************************************/
3428
3429
int VSIZipReader::GotoNextFile()
3430
0
{
3431
0
    if (cpl_unzGoToNextFile(unzF) != UNZ_OK)
3432
0
        return FALSE;
3433
3434
0
    if (!SetInfo())
3435
0
        return FALSE;
3436
3437
0
    return TRUE;
3438
0
}
3439
3440
/************************************************************************/
3441
/*                           GotoFirstFile()                            */
3442
/************************************************************************/
3443
3444
int VSIZipReader::GotoFirstFile()
3445
0
{
3446
0
    if (cpl_unzGoToFirstFile(unzF) != UNZ_OK)
3447
0
        return FALSE;
3448
3449
0
    if (!SetInfo())
3450
0
        return FALSE;
3451
3452
0
    return TRUE;
3453
0
}
3454
3455
/************************************************************************/
3456
/*                           GotoFileOffset()                           */
3457
/************************************************************************/
3458
3459
int VSIZipReader::GotoFileOffset(VSIArchiveEntryFileOffset *pOffset)
3460
0
{
3461
0
    VSIZipEntryFileOffset *pZipEntryOffset =
3462
0
        reinterpret_cast<VSIZipEntryFileOffset *>(pOffset);
3463
0
    if (cpl_unzGoToFilePos(unzF, &(pZipEntryOffset->m_file_pos)) != UNZ_OK)
3464
0
    {
3465
0
        CPLError(CE_Failure, CPLE_AppDefined, "GotoFileOffset failed");
3466
0
        return FALSE;
3467
0
    }
3468
3469
0
    if (!SetInfo())
3470
0
        return FALSE;
3471
3472
0
    return TRUE;
3473
0
}
3474
3475
/************************************************************************/
3476
/* ==================================================================== */
3477
/*                       VSIZipFilesystemHandler                        */
3478
/* ==================================================================== */
3479
/************************************************************************/
3480
3481
class VSIZipWriteHandle;
3482
3483
class VSIZipFilesystemHandler final : public VSIArchiveFilesystemHandler
3484
{
3485
    CPL_DISALLOW_COPY_ASSIGN(VSIZipFilesystemHandler)
3486
3487
    std::map<CPLString, VSIZipWriteHandle *> oMapZipWriteHandles{};
3488
    VSIVirtualHandleUniquePtr OpenForWrite_unlocked(const char *pszFilename,
3489
                                                    const char *pszAccess);
3490
3491
    struct VSIFileInZipInfo
3492
    {
3493
        VSIVirtualHandleUniquePtr poVirtualHandle{};
3494
        std::map<std::string, std::string> oMapProperties{};
3495
        int nCompressionMethod = 0;
3496
        uint64_t nUncompressedSize = 0;
3497
        uint64_t nCompressedSize = 0;
3498
        uint64_t nStartDataStream = 0;
3499
        uLong nCRC = 0;
3500
        bool bSOZipIndexFound = false;
3501
        bool bSOZipIndexValid = false;
3502
        uint32_t nSOZIPVersion = 0;
3503
        uint32_t nSOZIPToSkip = 0;
3504
        uint32_t nSOZIPChunkSize = 0;
3505
        uint32_t nSOZIPOffsetSize = 0;
3506
        uint64_t nSOZIPStartData = 0;
3507
    };
3508
3509
    bool GetFileInfo(const char *pszFilename, VSIFileInZipInfo &info,
3510
                     bool bSetError);
3511
3512
  public:
3513
3
    VSIZipFilesystemHandler() = default;
3514
    ~VSIZipFilesystemHandler() override;
3515
3516
    const char *GetPrefix() const override
3517
13.8k
    {
3518
13.8k
        return "/vsizip";
3519
13.8k
    }
3520
3521
    std::vector<CPLString> GetExtensions() const override;
3522
    std::unique_ptr<VSIArchiveReader>
3523
    CreateReader(const char *pszZipFileName) override;
3524
3525
    VSIVirtualHandleUniquePtr Open(const char *pszFilename,
3526
                                   const char *pszAccess, bool bSetError,
3527
                                   CSLConstList /* papszOptions */) override;
3528
3529
    char **GetFileMetadata(const char *pszFilename, const char *pszDomain,
3530
                           CSLConstList papszOptions) override;
3531
3532
    VSIVirtualHandleUniquePtr OpenForWrite(const char *pszFilename,
3533
                                           const char *pszAccess);
3534
3535
    int CopyFile(const char *pszSource, const char *pszTarget,
3536
                 VSILFILE *fpSource, vsi_l_offset nSourceSize,
3537
                 const char *const *papszOptions,
3538
                 GDALProgressFunc pProgressFunc, void *pProgressData) override;
3539
3540
    int Mkdir(const char *pszDirname, long nMode) override;
3541
    char **ReadDirEx(const char *pszDirname, int nMaxFiles) override;
3542
    int Stat(const char *pszFilename, VSIStatBufL *pStatBuf,
3543
             int nFlags) override;
3544
3545
    const char *GetOptions() override;
3546
3547
    void RemoveFromMap(VSIZipWriteHandle *poHandle);
3548
};
3549
3550
/************************************************************************/
3551
/* ==================================================================== */
3552
/*                       VSIZipWriteHandle                              */
3553
/* ==================================================================== */
3554
/************************************************************************/
3555
3556
class VSIZipWriteHandle final : public VSIVirtualHandle
3557
{
3558
    CPL_DISALLOW_COPY_ASSIGN(VSIZipWriteHandle)
3559
3560
    VSIZipFilesystemHandler *m_poFS = nullptr;
3561
    void *m_hZIP = nullptr;
3562
    VSIZipWriteHandle *poChildInWriting = nullptr;
3563
    VSIZipWriteHandle *m_poParent = nullptr;
3564
    bool bAutoDeleteParent = false;
3565
    vsi_l_offset nCurOffset = 0;
3566
3567
  public:
3568
    VSIZipWriteHandle(VSIZipFilesystemHandler *poFS, void *hZIP,
3569
                      VSIZipWriteHandle *poParent);
3570
3571
    ~VSIZipWriteHandle() override;
3572
3573
    int Seek(vsi_l_offset nOffset, int nWhence) override;
3574
    vsi_l_offset Tell() override;
3575
    size_t Read(void *pBuffer, size_t nBytes) override;
3576
    size_t Write(const void *pBuffer, size_t nBytes) override;
3577
3578
    int Eof() override
3579
0
    {
3580
0
        return 0;
3581
0
    }
3582
3583
    int Error() override
3584
0
    {
3585
0
        return 0;
3586
0
    }
3587
3588
    void ClearErr() override
3589
0
    {
3590
0
    }
3591
3592
    int Flush() override;
3593
    int Close() override;
3594
3595
    void StartNewFile(VSIZipWriteHandle *poSubFile);
3596
    void StopCurrentFile();
3597
3598
    void *GetHandle()
3599
0
    {
3600
0
        return m_hZIP;
3601
0
    }
3602
3603
    VSIZipWriteHandle *GetChildInWriting()
3604
0
    {
3605
0
        return poChildInWriting;
3606
0
    }
3607
3608
    void SetAutoDeleteParent()
3609
0
    {
3610
0
        bAutoDeleteParent = true;
3611
0
    }
3612
};
3613
3614
/************************************************************************/
3615
/*                      ~VSIZipFilesystemHandler()                      */
3616
/************************************************************************/
3617
3618
VSIZipFilesystemHandler::~VSIZipFilesystemHandler()
3619
0
{
3620
0
    for (std::map<CPLString, VSIZipWriteHandle *>::const_iterator iter =
3621
0
             oMapZipWriteHandles.begin();
3622
0
         iter != oMapZipWriteHandles.end(); ++iter)
3623
0
    {
3624
0
        CPLError(CE_Failure, CPLE_AppDefined, "%s has not been closed",
3625
0
                 iter->first.c_str());
3626
0
    }
3627
0
}
3628
3629
/************************************************************************/
3630
/*                           GetExtensions()                            */
3631
/************************************************************************/
3632
3633
std::vector<CPLString> VSIZipFilesystemHandler::GetExtensions() const
3634
946
{
3635
946
    std::vector<CPLString> oList;
3636
946
    oList.push_back(".zip");
3637
946
    oList.push_back(".kmz");
3638
946
    oList.push_back(".dwf");
3639
946
    oList.push_back(".ods");
3640
946
    oList.push_back(".xlsx");
3641
946
    oList.push_back(".xlsm");
3642
3643
    // Add to zip FS handler extensions array additional extensions
3644
    // listed in CPL_VSIL_ZIP_ALLOWED_EXTENSIONS config option.
3645
    // The extensions are divided by commas.
3646
946
    const char *pszAllowedExtensions =
3647
946
        CPLGetConfigOption("CPL_VSIL_ZIP_ALLOWED_EXTENSIONS", nullptr);
3648
946
    if (pszAllowedExtensions)
3649
0
    {
3650
0
        char **papszExtensions =
3651
0
            CSLTokenizeString2(pszAllowedExtensions, ", ", 0);
3652
0
        for (int i = 0; papszExtensions[i] != nullptr; i++)
3653
0
        {
3654
0
            oList.push_back(papszExtensions[i]);
3655
0
        }
3656
0
        CSLDestroy(papszExtensions);
3657
0
    }
3658
3659
946
    return oList;
3660
946
}
3661
3662
/************************************************************************/
3663
/*                            CreateReader()                            */
3664
/************************************************************************/
3665
3666
std::unique_ptr<VSIArchiveReader>
3667
VSIZipFilesystemHandler::CreateReader(const char *pszZipFileName)
3668
1.33k
{
3669
1.33k
    auto poReader = std::make_unique<VSIZipReader>(pszZipFileName);
3670
3671
1.33k
    if (!poReader->IsValid() || !poReader->GotoFirstFile())
3672
1.33k
    {
3673
1.33k
        return nullptr;
3674
1.33k
    }
3675
3676
0
    return poReader;
3677
1.33k
}
3678
3679
/************************************************************************/
3680
/*                            VSISOZipHandle                            */
3681
/************************************************************************/
3682
3683
class VSISOZipHandle final : public VSIVirtualHandle
3684
{
3685
    VSIVirtualHandleUniquePtr poBaseHandle_{};
3686
    vsi_l_offset nPosCompressedStream_;
3687
    uint64_t compressed_size_;
3688
    uint64_t uncompressed_size_;
3689
    vsi_l_offset indexPos_;
3690
    uint32_t nToSkip_;
3691
    uint32_t nChunkSize_;
3692
    bool bEOF_ = false;
3693
    bool bError_ = false;
3694
    vsi_l_offset nCurPos_ = 0;
3695
    bool bOK_ = true;
3696
#ifdef HAVE_LIBDEFLATE
3697
    struct libdeflate_decompressor *pDecompressor_ = nullptr;
3698
#else
3699
    z_stream sStream_{};
3700
#endif
3701
3702
    VSISOZipHandle(const VSISOZipHandle &) = delete;
3703
    VSISOZipHandle &operator=(const VSISOZipHandle &) = delete;
3704
3705
  public:
3706
    VSISOZipHandle(VSIVirtualHandleUniquePtr poVirtualHandleIn,
3707
                   vsi_l_offset nPosCompressedStream, uint64_t compressed_size,
3708
                   uint64_t uncompressed_size, vsi_l_offset indexPos,
3709
                   uint32_t nToSkip, uint32_t nChunkSize);
3710
    ~VSISOZipHandle() override;
3711
3712
    int Seek(vsi_l_offset nOffset, int nWhence) override;
3713
3714
    vsi_l_offset Tell() override
3715
0
    {
3716
0
        return nCurPos_;
3717
0
    }
3718
3719
    size_t Read(void *pBuffer, size_t nBytes) override;
3720
3721
    size_t Write(const void *, size_t) override
3722
0
    {
3723
0
        return 0;
3724
0
    }
3725
3726
    int Eof() override
3727
0
    {
3728
0
        return bEOF_;
3729
0
    }
3730
3731
    int Error() override
3732
0
    {
3733
0
        return bError_;
3734
0
    }
3735
3736
    void ClearErr() override
3737
0
    {
3738
0
        bEOF_ = false;
3739
0
        bError_ = false;
3740
0
    }
3741
3742
    int Close() override;
3743
3744
    bool IsOK() const
3745
0
    {
3746
0
        return bOK_;
3747
0
    }
3748
};
3749
3750
/************************************************************************/
3751
/*                           VSISOZipHandle()                           */
3752
/************************************************************************/
3753
3754
VSISOZipHandle::VSISOZipHandle(VSIVirtualHandleUniquePtr poVirtualHandleIn,
3755
                               vsi_l_offset nPosCompressedStream,
3756
                               uint64_t compressed_size,
3757
                               uint64_t uncompressed_size,
3758
                               vsi_l_offset indexPos, uint32_t nToSkip,
3759
                               uint32_t nChunkSize)
3760
0
    : poBaseHandle_(std::move(poVirtualHandleIn)),
3761
0
      nPosCompressedStream_(nPosCompressedStream),
3762
0
      compressed_size_(compressed_size), uncompressed_size_(uncompressed_size),
3763
0
      indexPos_(indexPos), nToSkip_(nToSkip), nChunkSize_(nChunkSize)
3764
0
{
3765
#ifdef HAVE_LIBDEFLATE
3766
    pDecompressor_ = libdeflate_alloc_decompressor();
3767
    if (!pDecompressor_)
3768
        bOK_ = false;
3769
#else
3770
0
    memset(&sStream_, 0, sizeof(sStream_));
3771
0
    int err = inflateInit2(&sStream_, -MAX_WBITS);
3772
0
    if (err != Z_OK)
3773
0
        bOK_ = false;
3774
0
#endif
3775
0
}
3776
3777
/************************************************************************/
3778
/*                          ~VSISOZipHandle()                           */
3779
/************************************************************************/
3780
3781
VSISOZipHandle::~VSISOZipHandle()
3782
0
{
3783
0
    VSISOZipHandle::Close();
3784
0
    if (bOK_)
3785
0
    {
3786
#ifdef HAVE_LIBDEFLATE
3787
        libdeflate_free_decompressor(pDecompressor_);
3788
#else
3789
0
        inflateEnd(&sStream_);
3790
0
#endif
3791
0
    }
3792
0
}
3793
3794
/************************************************************************/
3795
/*                               Close()                                */
3796
/************************************************************************/
3797
3798
int VSISOZipHandle::Close()
3799
0
{
3800
0
    int ret = 0;
3801
0
    if (poBaseHandle_)
3802
0
    {
3803
0
        ret = poBaseHandle_->Close();
3804
0
        poBaseHandle_.reset();
3805
0
    }
3806
0
    return ret;
3807
0
}
3808
3809
/************************************************************************/
3810
/*                                Seek()                                */
3811
/************************************************************************/
3812
3813
int VSISOZipHandle::Seek(vsi_l_offset nOffset, int nWhence)
3814
0
{
3815
0
    bEOF_ = false;
3816
0
    if (nWhence == SEEK_SET)
3817
0
        nCurPos_ = nOffset;
3818
0
    else if (nWhence == SEEK_END)
3819
0
        nCurPos_ = uncompressed_size_;
3820
0
    else
3821
0
        nCurPos_ += nOffset;
3822
0
    return 0;
3823
0
}
3824
3825
/************************************************************************/
3826
/*                                Read()                                */
3827
/************************************************************************/
3828
3829
size_t VSISOZipHandle::Read(void *pBuffer, size_t nBytes)
3830
0
{
3831
0
    size_t nRet = nBytes;
3832
0
    size_t nToRead = nBytes;
3833
0
    if (nCurPos_ >= uncompressed_size_ && nToRead > 0)
3834
0
    {
3835
0
        bEOF_ = true;
3836
0
        return 0;
3837
0
    }
3838
3839
0
    if ((nCurPos_ % nChunkSize_) != 0)
3840
0
    {
3841
0
        bError_ = true;
3842
0
        CPLError(CE_Failure, CPLE_NotSupported,
3843
0
                 "nCurPos is not a multiple of nChunkSize");
3844
0
        return 0;
3845
0
    }
3846
0
    if (nCurPos_ + nToRead > uncompressed_size_)
3847
0
    {
3848
0
        nToRead = static_cast<size_t>(uncompressed_size_ - nCurPos_);
3849
0
        nRet = nToRead;
3850
0
    }
3851
0
    else if ((nToRead % nChunkSize_) != 0)
3852
0
    {
3853
0
        bError_ = true;
3854
0
        CPLError(CE_Failure, CPLE_NotSupported,
3855
0
                 "nToRead is not a multiple of nChunkSize");
3856
0
        return 0;
3857
0
    }
3858
3859
0
    const auto ReadOffsetInCompressedStream =
3860
0
        [this](uint64_t nChunkIdx) -> uint64_t
3861
0
    {
3862
0
        if (nChunkIdx == 0)
3863
0
            return 0;
3864
0
        if (nChunkIdx == 1 + (uncompressed_size_ - 1) / nChunkSize_)
3865
0
            return compressed_size_;
3866
0
        constexpr size_t nOffsetSize = 8;
3867
0
        if (poBaseHandle_->Seek(indexPos_ + 32 + nToSkip_ +
3868
0
                                    (nChunkIdx - 1) * nOffsetSize,
3869
0
                                SEEK_SET) != 0)
3870
0
            return static_cast<uint64_t>(-1);
3871
3872
0
        uint64_t nOffset;
3873
0
        if (poBaseHandle_->Read(&nOffset, sizeof(nOffset)) != sizeof(nOffset))
3874
0
            return static_cast<uint64_t>(-1);
3875
0
        CPL_LSBPTR64(&nOffset);
3876
0
        return nOffset;
3877
0
    };
3878
3879
0
    size_t nOffsetInOutputBuffer = 0;
3880
0
    while (true)
3881
0
    {
3882
0
        uint64_t nOffsetInCompressedStream =
3883
0
            ReadOffsetInCompressedStream(nCurPos_ / nChunkSize_);
3884
0
        if (nOffsetInCompressedStream == static_cast<uint64_t>(-1))
3885
0
        {
3886
0
            bError_ = true;
3887
0
            CPLError(CE_Failure, CPLE_AppDefined,
3888
0
                     "Cannot read nOffsetInCompressedStream");
3889
0
            return 0;
3890
0
        }
3891
0
        uint64_t nNextOffsetInCompressedStream =
3892
0
            ReadOffsetInCompressedStream(1 + nCurPos_ / nChunkSize_);
3893
0
        if (nNextOffsetInCompressedStream == static_cast<uint64_t>(-1))
3894
0
        {
3895
0
            bError_ = true;
3896
0
            CPLError(CE_Failure, CPLE_AppDefined,
3897
0
                     "Cannot read nNextOffsetInCompressedStream");
3898
0
            return 0;
3899
0
        }
3900
3901
0
        if (nNextOffsetInCompressedStream <= nOffsetInCompressedStream ||
3902
0
            nNextOffsetInCompressedStream - nOffsetInCompressedStream >
3903
0
                13 + 2 * nChunkSize_ ||
3904
0
            nNextOffsetInCompressedStream > compressed_size_)
3905
0
        {
3906
0
            bError_ = true;
3907
0
            CPLError(
3908
0
                CE_Failure, CPLE_AppDefined,
3909
0
                "Invalid values for nOffsetInCompressedStream (" CPL_FRMT_GUIB
3910
0
                ") / "
3911
0
                "nNextOffsetInCompressedStream(" CPL_FRMT_GUIB ")",
3912
0
                static_cast<GUIntBig>(nOffsetInCompressedStream),
3913
0
                static_cast<GUIntBig>(nNextOffsetInCompressedStream));
3914
0
            return 0;
3915
0
        }
3916
3917
        // CPLDebug("VSIZIP", "Seek to compressed data at offset "
3918
        // CPL_FRMT_GUIB, static_cast<GUIntBig>(nPosCompressedStream_ +
3919
        // nOffsetInCompressedStream));
3920
0
        if (poBaseHandle_->Seek(
3921
0
                nPosCompressedStream_ + nOffsetInCompressedStream, SEEK_SET) !=
3922
0
            0)
3923
0
        {
3924
0
            bError_ = true;
3925
0
            return 0;
3926
0
        }
3927
3928
0
        const size_t nCompressedToRead = static_cast<size_t>(
3929
0
            nNextOffsetInCompressedStream - nOffsetInCompressedStream);
3930
        // CPLDebug("VSIZIP", "nCompressedToRead = %d", nCompressedToRead);
3931
0
        std::vector<GByte> abyCompressedData(nCompressedToRead);
3932
0
        if (poBaseHandle_->Read(&abyCompressedData[0], nCompressedToRead) !=
3933
0
            nCompressedToRead)
3934
0
        {
3935
0
            bError_ = true;
3936
0
            return 0;
3937
0
        }
3938
3939
0
        size_t nToReadThisIter =
3940
0
            std::min(nToRead, static_cast<size_t>(nChunkSize_));
3941
3942
0
        if (nCompressedToRead >= 5 &&
3943
0
            abyCompressedData[nCompressedToRead - 5] == 0x00 &&
3944
0
            memcmp(&abyCompressedData[nCompressedToRead - 4],
3945
0
                   "\x00\x00\xFF\xFF", 4) == 0)
3946
0
        {
3947
            // Tag this flush block as the last one.
3948
0
            abyCompressedData[nCompressedToRead - 5] = 0x01;
3949
0
        }
3950
3951
#ifdef HAVE_LIBDEFLATE
3952
        size_t nOut = 0;
3953
        if (libdeflate_deflate_decompress(
3954
                pDecompressor_, &abyCompressedData[0], nCompressedToRead,
3955
                static_cast<Bytef *>(pBuffer) + nOffsetInOutputBuffer,
3956
                nToReadThisIter, &nOut) != LIBDEFLATE_SUCCESS)
3957
        {
3958
            bError_ = true;
3959
            CPLError(
3960
                CE_Failure, CPLE_AppDefined,
3961
                "libdeflate_deflate_decompress() failed at pos " CPL_FRMT_GUIB,
3962
                static_cast<GUIntBig>(nCurPos_));
3963
            return 0;
3964
        }
3965
        if (nOut != nToReadThisIter)
3966
        {
3967
            bError_ = true;
3968
            CPLError(CE_Failure, CPLE_AppDefined,
3969
                     "Only %u bytes decompressed at pos " CPL_FRMT_GUIB
3970
                     " whereas %u where expected",
3971
                     static_cast<unsigned>(nOut),
3972
                     static_cast<GUIntBig>(nCurPos_),
3973
                     static_cast<unsigned>(nToReadThisIter));
3974
            return 0;
3975
        }
3976
#else
3977
        if constexpr (sizeof(size_t) > sizeof(uInt))
3978
0
        {
3979
0
            if (nCompressedToRead > UINT32_MAX)
3980
0
            {
3981
0
                CPLError(CE_Failure, CPLE_AppDefined,
3982
0
                         "nCompressedToRead > UINT32_MAX");
3983
0
                return 0;
3984
0
            }
3985
0
        }
3986
0
        sStream_.avail_in = static_cast<uInt>(nCompressedToRead);
3987
0
        sStream_.next_in = &abyCompressedData[0];
3988
0
        sStream_.avail_out = static_cast<int>(nToReadThisIter);
3989
0
        sStream_.next_out =
3990
0
            static_cast<Bytef *>(pBuffer) + nOffsetInOutputBuffer;
3991
3992
0
        int err = inflate(&sStream_, Z_FINISH);
3993
0
        if ((err != Z_OK && err != Z_STREAM_END))
3994
0
        {
3995
0
            bError_ = true;
3996
0
            CPLError(CE_Failure, CPLE_AppDefined,
3997
0
                     "inflate() failed at pos " CPL_FRMT_GUIB,
3998
0
                     static_cast<GUIntBig>(nCurPos_));
3999
0
            inflateReset(&sStream_);
4000
0
            return 0;
4001
0
        }
4002
0
        if (sStream_.avail_in != 0)
4003
0
            CPLDebug("VSIZIP", "avail_in = %d", sStream_.avail_in);
4004
0
        if (sStream_.avail_out != 0)
4005
0
        {
4006
0
            bError_ = true;
4007
0
            CPLError(
4008
0
                CE_Failure, CPLE_AppDefined,
4009
0
                "Only %u bytes decompressed at pos " CPL_FRMT_GUIB
4010
0
                " whereas %u where expected",
4011
0
                static_cast<unsigned>(nToReadThisIter - sStream_.avail_out),
4012
0
                static_cast<GUIntBig>(nCurPos_),
4013
0
                static_cast<unsigned>(nToReadThisIter));
4014
0
            inflateReset(&sStream_);
4015
0
            return 0;
4016
0
        }
4017
0
        inflateReset(&sStream_);
4018
0
#endif
4019
0
        nOffsetInOutputBuffer += nToReadThisIter;
4020
0
        nCurPos_ += nToReadThisIter;
4021
0
        nToRead -= nToReadThisIter;
4022
0
        if (nToRead == 0)
4023
0
            break;
4024
0
    }
4025
4026
0
    return nRet;
4027
0
}
4028
4029
/************************************************************************/
4030
/*                            GetFileInfo()                             */
4031
/************************************************************************/
4032
4033
bool VSIZipFilesystemHandler::GetFileInfo(const char *pszFilename,
4034
                                          VSIFileInZipInfo &info,
4035
                                          bool bSetError)
4036
1.49k
{
4037
4038
1.49k
    CPLString osZipInFileName;
4039
1.49k
    auto zipFilename =
4040
1.49k
        SplitFilename(pszFilename, osZipInFileName, true, bSetError);
4041
1.49k
    if (zipFilename == nullptr)
4042
916
        return false;
4043
4044
582
    {
4045
582
        std::unique_lock oLock(oMutex);
4046
582
        if (oMapZipWriteHandles.find(zipFilename.get()) !=
4047
582
            oMapZipWriteHandles.end())
4048
0
        {
4049
0
            CPLError(CE_Failure, CPLE_AppDefined,
4050
0
                     "Cannot read a zip file being written");
4051
0
            return false;
4052
0
        }
4053
582
    }
4054
4055
582
    auto poReader = OpenArchiveFile(zipFilename.get(), osZipInFileName);
4056
582
    if (poReader == nullptr)
4057
582
    {
4058
582
        return false;
4059
582
    }
4060
4061
0
    VSIFilesystemHandler *poFSHandler =
4062
0
        VSIFileManager::GetHandler(zipFilename.get());
4063
4064
0
    VSIVirtualHandleUniquePtr poVirtualHandle(
4065
0
        poFSHandler->Open(zipFilename.get(), "rb"));
4066
4067
0
    if (poVirtualHandle == nullptr)
4068
0
    {
4069
0
        return false;
4070
0
    }
4071
4072
0
    unzFile unzF =
4073
0
        cpl::down_cast<VSIZipReader *>(poReader.get())->GetUnzFileHandle();
4074
4075
0
    if (cpl_unzOpenCurrentFile(unzF) != UNZ_OK)
4076
0
    {
4077
0
        CPLError(CE_Failure, CPLE_AppDefined,
4078
0
                 "cpl_unzOpenCurrentFile() failed");
4079
0
        return false;
4080
0
    }
4081
4082
0
    info.nStartDataStream = cpl_unzGetCurrentFileZStreamPos(unzF);
4083
4084
0
    unz_file_info file_info;
4085
0
    if (cpl_unzGetCurrentFileInfo(unzF, &file_info, nullptr, 0, nullptr, 0,
4086
0
                                  nullptr, 0) != UNZ_OK)
4087
0
    {
4088
0
        CPLError(CE_Failure, CPLE_AppDefined,
4089
0
                 "cpl_unzGetCurrentFileInfo() failed");
4090
0
        cpl_unzCloseCurrentFile(unzF);
4091
0
        return false;
4092
0
    }
4093
4094
0
    if (file_info.size_file_extra)
4095
0
    {
4096
0
        std::vector<GByte> abyExtra(file_info.size_file_extra);
4097
0
        poVirtualHandle->Seek(file_info.file_extra_abs_offset, SEEK_SET);
4098
0
        if (poVirtualHandle->Read(&abyExtra[0], abyExtra.size()) ==
4099
0
            abyExtra.size())
4100
0
        {
4101
0
            size_t nPos = 0;
4102
0
            while (nPos + 2 * sizeof(uint16_t) <= abyExtra.size())
4103
0
            {
4104
0
                uint16_t nId;
4105
0
                memcpy(&nId, &abyExtra[nPos], sizeof(uint16_t));
4106
0
                nPos += sizeof(uint16_t);
4107
0
                CPL_LSBPTR16(&nId);
4108
0
                uint16_t nSize;
4109
0
                memcpy(&nSize, &abyExtra[nPos], sizeof(uint16_t));
4110
0
                nPos += sizeof(uint16_t);
4111
0
                CPL_LSBPTR16(&nSize);
4112
0
                if (nId == 0x564b && nPos + nSize <= abyExtra.size())  // "KV"
4113
0
                {
4114
0
                    if (nSize >= strlen("KeyValuePairs") + 1 &&
4115
0
                        memcmp(&abyExtra[nPos], "KeyValuePairs",
4116
0
                               strlen("KeyValuePairs")) == 0)
4117
0
                    {
4118
0
                        int nPos2 = static_cast<int>(strlen("KeyValuePairs"));
4119
0
                        int nKVPairs = abyExtra[nPos + nPos2];
4120
0
                        nPos2++;
4121
0
                        for (int iKV = 0; iKV < nKVPairs; ++iKV)
4122
0
                        {
4123
0
                            if (nPos2 + sizeof(uint16_t) > nSize)
4124
0
                                break;
4125
0
                            uint16_t nKeyLen;
4126
0
                            memcpy(&nKeyLen, &abyExtra[nPos + nPos2],
4127
0
                                   sizeof(uint16_t));
4128
0
                            nPos2 += sizeof(uint16_t);
4129
0
                            CPL_LSBPTR16(&nKeyLen);
4130
0
                            if (nPos2 + nKeyLen > nSize)
4131
0
                                break;
4132
0
                            std::string osKey;
4133
0
                            osKey.resize(nKeyLen);
4134
0
                            memcpy(&osKey[0], &abyExtra[nPos + nPos2], nKeyLen);
4135
0
                            nPos2 += nKeyLen;
4136
4137
0
                            if (nPos2 + sizeof(uint16_t) > nSize)
4138
0
                                break;
4139
0
                            uint16_t nValLen;
4140
0
                            memcpy(&nValLen, &abyExtra[nPos + nPos2],
4141
0
                                   sizeof(uint16_t));
4142
0
                            nPos2 += sizeof(uint16_t);
4143
0
                            CPL_LSBPTR16(&nValLen);
4144
0
                            if (nPos2 + nValLen > nSize)
4145
0
                                break;
4146
0
                            std::string osVal;
4147
0
                            osVal.resize(nValLen);
4148
0
                            memcpy(&osVal[0], &abyExtra[nPos + nPos2], nValLen);
4149
0
                            nPos2 += nValLen;
4150
4151
0
                            info.oMapProperties[osKey] = std::move(osVal);
4152
0
                        }
4153
0
                    }
4154
0
                }
4155
0
                nPos += nSize;
4156
0
            }
4157
0
        }
4158
0
    }
4159
4160
0
    info.nCRC = file_info.crc;
4161
0
    info.nCompressionMethod = static_cast<int>(file_info.compression_method);
4162
0
    info.nUncompressedSize = static_cast<uint64_t>(file_info.uncompressed_size);
4163
0
    info.nCompressedSize = static_cast<uint64_t>(file_info.compressed_size);
4164
4165
    // Sanity checks
4166
0
    if (info.nCompressedSize >
4167
0
        std::numeric_limits<uint64_t>::max() - info.nStartDataStream)
4168
0
    {
4169
0
        CPLError(CE_Failure, CPLE_AppDefined,
4170
0
                 "Invalid compressed size for file %s", pszFilename);
4171
0
        return false;
4172
0
    }
4173
0
    const uLong64 afterFileOffset =
4174
0
        info.nStartDataStream + info.nCompressedSize;
4175
4176
    // Cf https://stackoverflow.com/questions/16792189/gzip-compression-ratio-for-zeros/16794960
4177
0
    constexpr unsigned MAX_DEFLATE_COMPRESSION_RATIO = 1032;
4178
0
    if (info.nCompressedSize == 0 && info.nUncompressedSize != 0)
4179
0
    {
4180
0
        CPLError(CE_Failure, CPLE_AppDefined,
4181
0
                 "Invalid compressed size (=0) vs uncompressed size (!=0) for "
4182
0
                 "file %s",
4183
0
                 pszFilename);
4184
0
        return false;
4185
0
    }
4186
0
    else if (info.nCompressedSize != 0 &&
4187
0
             info.nUncompressedSize / info.nCompressedSize >
4188
0
                 MAX_DEFLATE_COMPRESSION_RATIO)
4189
0
    {
4190
0
        CPLError(CE_Failure, CPLE_AppDefined,
4191
0
                 "Invalid compression ratio for file %s: %" PRIu64, pszFilename,
4192
0
                 info.nUncompressedSize / info.nCompressedSize);
4193
0
        return false;
4194
0
    }
4195
4196
    // A bit arbitrary
4197
0
    constexpr unsigned THRESHOLD_FOR_BIG_ALLOCS = 1024 * 1024 * 1024;
4198
0
    if (info.nUncompressedSize > THRESHOLD_FOR_BIG_ALLOCS)
4199
0
    {
4200
        // Check that the compressed file size is consistent with the ZIP file size
4201
0
        poVirtualHandle->Seek(0, SEEK_END);
4202
0
        if (afterFileOffset > poVirtualHandle->Tell())
4203
0
        {
4204
0
            CPLError(CE_Failure, CPLE_AppDefined,
4205
0
                     "Invalid compressed size for file %s: %" PRIu64,
4206
0
                     pszFilename, info.nCompressedSize);
4207
0
            return false;
4208
0
        }
4209
0
    }
4210
4211
    // Try to locate .sozip.idx file
4212
0
    unz_file_info file_info2;
4213
0
    std::string osAuxName;
4214
0
    osAuxName.resize(1024);
4215
0
    uLong64 indexPos;
4216
0
    if (file_info.compression_method == 8 &&
4217
0
        cpl_unzCurrentFileInfoFromLocalHeader(
4218
0
            unzF, afterFileOffset, &file_info2, &osAuxName[0], osAuxName.size(),
4219
0
            &indexPos) == UNZ_OK)
4220
0
    {
4221
0
        osAuxName.resize(strlen(osAuxName.c_str()));
4222
0
        if (osAuxName.find(".sozip.idx") != std::string::npos)
4223
0
        {
4224
0
            info.bSOZipIndexFound = true;
4225
0
            info.nSOZIPStartData = indexPos;
4226
0
            poVirtualHandle->Seek(indexPos, SEEK_SET);
4227
0
            uint32_t nVersion = 0;
4228
0
            poVirtualHandle->Read(&nVersion, sizeof(nVersion));
4229
0
            CPL_LSBPTR32(&nVersion);
4230
0
            uint32_t nToSkip = 0;
4231
0
            poVirtualHandle->Read(&nToSkip, sizeof(nToSkip));
4232
0
            CPL_LSBPTR32(&nToSkip);
4233
0
            uint32_t nChunkSize = 0;
4234
0
            poVirtualHandle->Read(&nChunkSize, sizeof(nChunkSize));
4235
0
            CPL_LSBPTR32(&nChunkSize);
4236
0
            uint32_t nOffsetSize = 0;
4237
0
            poVirtualHandle->Read(&nOffsetSize, sizeof(nOffsetSize));
4238
0
            CPL_LSBPTR32(&nOffsetSize);
4239
0
            uint64_t nUncompressedSize = 0;
4240
0
            poVirtualHandle->Read(&nUncompressedSize,
4241
0
                                  sizeof(nUncompressedSize));
4242
0
            CPL_LSBPTR64(&nUncompressedSize);
4243
0
            uint64_t nCompressedSize = 0;
4244
0
            poVirtualHandle->Read(&nCompressedSize, sizeof(nCompressedSize));
4245
0
            CPL_LSBPTR64(&nCompressedSize);
4246
4247
0
            info.nSOZIPVersion = nVersion;
4248
0
            info.nSOZIPToSkip = nToSkip;
4249
0
            info.nSOZIPChunkSize = nChunkSize;
4250
0
            info.nSOZIPOffsetSize = nOffsetSize;
4251
4252
0
            bool bValid = true;
4253
0
            if (nVersion != 1)
4254
0
            {
4255
0
                CPLDebug("SOZIP", "version = %u, expected 1", nVersion);
4256
0
                bValid = false;
4257
0
            }
4258
0
            if (nCompressedSize != file_info.compressed_size)
4259
0
            {
4260
0
                CPLDebug("SOZIP",
4261
0
                         "compressedSize field inconsistent with file");
4262
0
                bValid = false;
4263
0
            }
4264
0
            if (nUncompressedSize != file_info.uncompressed_size)
4265
0
            {
4266
0
                CPLDebug("SOZIP",
4267
0
                         "uncompressedSize field inconsistent with file");
4268
0
                bValid = false;
4269
0
            }
4270
0
            if (!(nChunkSize > 0 && nChunkSize < 100 * 1024 * 1024))
4271
0
            {
4272
0
                CPLDebug("SOZIP", "invalid chunkSize = %u", nChunkSize);
4273
0
                bValid = false;
4274
0
            }
4275
0
            if (nOffsetSize != 8)
4276
0
            {
4277
0
                CPLDebug("SOZIP", "invalid offsetSize = %u", nOffsetSize);
4278
0
                bValid = false;
4279
0
            }
4280
0
            if (file_info2.compression_method != 0)
4281
0
            {
4282
0
                CPLDebug("SOZIP", "unexpected compression_method = %u",
4283
0
                         static_cast<unsigned>(file_info2.compression_method));
4284
0
                bValid = false;
4285
0
            }
4286
0
            if (bValid)
4287
0
            {
4288
0
                const auto nExpectedIndexSize =
4289
0
                    32 + static_cast<uint64_t>(nToSkip) +
4290
0
                    ((nUncompressedSize - 1) / nChunkSize) * nOffsetSize;
4291
0
                if (nExpectedIndexSize != file_info2.uncompressed_size)
4292
0
                {
4293
0
                    CPLDebug("SOZIP", "invalid file size for index");
4294
0
                    bValid = false;
4295
0
                }
4296
0
            }
4297
0
            if (bValid)
4298
0
            {
4299
0
                info.bSOZipIndexValid = true;
4300
0
                CPLDebug("SOZIP", "Found valid SOZIP index: %s",
4301
0
                         osAuxName.c_str());
4302
0
            }
4303
0
            else
4304
0
            {
4305
0
                CPLDebug("SOZIP", "Found *invalid* SOZIP index: %s",
4306
0
                         osAuxName.c_str());
4307
0
            }
4308
0
        }
4309
0
    }
4310
4311
0
    cpl_unzCloseCurrentFile(unzF);
4312
4313
0
    info.poVirtualHandle = std::move(poVirtualHandle);
4314
4315
0
    return true;
4316
0
}
4317
4318
/************************************************************************/
4319
/*                                Open()                                */
4320
/************************************************************************/
4321
4322
VSIVirtualHandleUniquePtr
4323
VSIZipFilesystemHandler::Open(const char *pszFilename, const char *pszAccess,
4324
                              bool bSetError, CSLConstList /* papszOptions */)
4325
1.49k
{
4326
4327
1.49k
    if (strchr(pszAccess, 'w') != nullptr)
4328
0
    {
4329
0
        return OpenForWrite(pszFilename, pszAccess);
4330
0
    }
4331
4332
1.49k
    if (strchr(pszAccess, '+') != nullptr)
4333
0
    {
4334
0
        CPLError(CE_Failure, CPLE_AppDefined,
4335
0
                 "Read-write random access not supported for /vsizip");
4336
0
        return nullptr;
4337
0
    }
4338
4339
1.49k
    VSIFileInZipInfo info;
4340
1.49k
    if (!GetFileInfo(pszFilename, info, bSetError))
4341
1.49k
        return nullptr;
4342
4343
0
#ifdef ENABLE_DEFLATE64
4344
0
    if (info.nCompressionMethod == 9)
4345
0
    {
4346
0
        auto poGZIPHandle = std::make_unique<VSIDeflate64Handle>(
4347
0
            std::move(info.poVirtualHandle), nullptr, info.nStartDataStream,
4348
0
            info.nCompressedSize, info.nUncompressedSize, info.nCRC);
4349
0
        if (!(poGZIPHandle->IsInitOK()))
4350
0
        {
4351
0
            return nullptr;
4352
0
        }
4353
4354
        // Wrap the VSIGZipHandle inside a buffered reader that will
4355
        // improve dramatically performance when doing small backward
4356
        // seeks.
4357
0
        return VSIVirtualHandleUniquePtr(
4358
0
            VSICreateBufferedReaderHandle(poGZIPHandle.release()));
4359
0
    }
4360
0
    else
4361
0
#endif
4362
0
    {
4363
0
        if (info.bSOZipIndexValid)
4364
0
        {
4365
0
            auto poSOZIPHandle = std::make_unique<VSISOZipHandle>(
4366
0
                std::move(info.poVirtualHandle), info.nStartDataStream,
4367
0
                info.nCompressedSize, info.nUncompressedSize,
4368
0
                info.nSOZIPStartData, info.nSOZIPToSkip, info.nSOZIPChunkSize);
4369
0
            if (!poSOZIPHandle->IsOK())
4370
0
            {
4371
0
                return nullptr;
4372
0
            }
4373
0
            return VSIVirtualHandleUniquePtr(VSICreateCachedFile(
4374
0
                poSOZIPHandle.release(), info.nSOZIPChunkSize, 0));
4375
0
        }
4376
4377
0
        auto poGZIPHandle = std::make_unique<VSIGZipHandle>(
4378
0
            std::move(info.poVirtualHandle), nullptr, info.nStartDataStream,
4379
0
            info.nCompressedSize, info.nUncompressedSize, info.nCRC,
4380
0
            info.nCompressionMethod == 0);
4381
0
        if (!(poGZIPHandle->IsInitOK()))
4382
0
        {
4383
0
            return nullptr;
4384
0
        }
4385
4386
        // Wrap the VSIGZipHandle inside a buffered reader that will
4387
        // improve dramatically performance when doing small backward
4388
        // seeks.
4389
0
        return VSIVirtualHandleUniquePtr(
4390
0
            VSICreateBufferedReaderHandle(poGZIPHandle.release()));
4391
0
    }
4392
0
}
4393
4394
/************************************************************************/
4395
/*                          GetFileMetadata()                           */
4396
/************************************************************************/
4397
4398
char **VSIZipFilesystemHandler::GetFileMetadata(const char *pszFilename,
4399
                                                const char *pszDomain,
4400
                                                CSLConstList /*papszOptions*/)
4401
0
{
4402
0
    VSIFileInZipInfo info;
4403
0
    if (!GetFileInfo(pszFilename, info, true))
4404
0
        return nullptr;
4405
4406
0
    if (!pszDomain)
4407
0
    {
4408
0
        CPLStringList aosMetadata;
4409
0
        for (const auto &kv : info.oMapProperties)
4410
0
        {
4411
0
            aosMetadata.AddNameValue(kv.first.c_str(), kv.second.c_str());
4412
0
        }
4413
0
        return aosMetadata.StealList();
4414
0
    }
4415
0
    else if (EQUAL(pszDomain, "ZIP"))
4416
0
    {
4417
0
        CPLStringList aosMetadata;
4418
0
        aosMetadata.SetNameValue(
4419
0
            "START_DATA_OFFSET",
4420
0
            CPLSPrintf(CPL_FRMT_GUIB,
4421
0
                       static_cast<GUIntBig>(info.nStartDataStream)));
4422
4423
0
        if (info.nCompressionMethod == 0)
4424
0
            aosMetadata.SetNameValue("COMPRESSION_METHOD", "0 (STORED)");
4425
0
        else if (info.nCompressionMethod == 8)
4426
0
            aosMetadata.SetNameValue("COMPRESSION_METHOD", "8 (DEFLATE)");
4427
0
        else
4428
0
        {
4429
0
            aosMetadata.SetNameValue("COMPRESSION_METHOD",
4430
0
                                     CPLSPrintf("%d", info.nCompressionMethod));
4431
0
        }
4432
0
        aosMetadata.SetNameValue(
4433
0
            "COMPRESSED_SIZE",
4434
0
            CPLSPrintf(CPL_FRMT_GUIB,
4435
0
                       static_cast<GUIntBig>(info.nCompressedSize)));
4436
0
        aosMetadata.SetNameValue(
4437
0
            "UNCOMPRESSED_SIZE",
4438
0
            CPLSPrintf(CPL_FRMT_GUIB,
4439
0
                       static_cast<GUIntBig>(info.nUncompressedSize)));
4440
4441
0
        if (info.bSOZipIndexFound)
4442
0
        {
4443
0
            aosMetadata.SetNameValue("SOZIP_FOUND", "YES");
4444
4445
0
            aosMetadata.SetNameValue("SOZIP_VERSION",
4446
0
                                     CPLSPrintf("%u", info.nSOZIPVersion));
4447
4448
0
            aosMetadata.SetNameValue("SOZIP_OFFSET_SIZE",
4449
0
                                     CPLSPrintf("%u", info.nSOZIPOffsetSize));
4450
4451
0
            aosMetadata.SetNameValue("SOZIP_CHUNK_SIZE",
4452
0
                                     CPLSPrintf("%u", info.nSOZIPChunkSize));
4453
4454
0
            aosMetadata.SetNameValue(
4455
0
                "SOZIP_START_DATA_OFFSET",
4456
0
                CPLSPrintf(CPL_FRMT_GUIB,
4457
0
                           static_cast<GUIntBig>(info.nSOZIPStartData)));
4458
4459
0
            if (info.bSOZipIndexValid)
4460
0
            {
4461
0
                aosMetadata.SetNameValue("SOZIP_VALID", "YES");
4462
0
            }
4463
0
        }
4464
4465
0
        return aosMetadata.StealList();
4466
0
    }
4467
0
    return nullptr;
4468
0
}
4469
4470
/************************************************************************/
4471
/*                               Mkdir()                                */
4472
/************************************************************************/
4473
4474
int VSIZipFilesystemHandler::Mkdir(const char *pszDirname, long /* nMode */)
4475
0
{
4476
0
    CPLString osDirname = pszDirname;
4477
0
    if (!osDirname.empty() && osDirname.back() != '/')
4478
0
        osDirname += "/";
4479
0
    return OpenForWrite(osDirname, "wb") != nullptr ? 0 : -1;
4480
0
}
4481
4482
/************************************************************************/
4483
/*                             ReadDirEx()                              */
4484
/************************************************************************/
4485
4486
char **VSIZipFilesystemHandler::ReadDirEx(const char *pszDirname, int nMaxFiles)
4487
0
{
4488
0
    CPLString osInArchiveSubDir;
4489
0
    auto zipFilename = SplitFilename(pszDirname, osInArchiveSubDir, true, true);
4490
0
    if (zipFilename == nullptr)
4491
0
        return nullptr;
4492
4493
0
    {
4494
0
        std::unique_lock oLock(oMutex);
4495
4496
0
        if (oMapZipWriteHandles.find(zipFilename.get()) !=
4497
0
            oMapZipWriteHandles.end())
4498
0
        {
4499
0
            CPLError(CE_Failure, CPLE_AppDefined,
4500
0
                     "Cannot read a zip file being written");
4501
0
            return nullptr;
4502
0
        }
4503
0
    }
4504
4505
0
    return VSIArchiveFilesystemHandler::ReadDirEx(pszDirname, nMaxFiles);
4506
0
}
4507
4508
/************************************************************************/
4509
/*                                Stat()                                */
4510
/************************************************************************/
4511
4512
int VSIZipFilesystemHandler::Stat(const char *pszFilename,
4513
                                  VSIStatBufL *pStatBuf, int nFlags)
4514
1.95k
{
4515
1.95k
    CPLString osInArchiveSubDir;
4516
4517
1.95k
    memset(pStatBuf, 0, sizeof(VSIStatBufL));
4518
4519
1.95k
    auto zipFilename = SplitFilename(pszFilename, osInArchiveSubDir, true,
4520
1.95k
                                     (nFlags & VSI_STAT_SET_ERROR_FLAG) != 0);
4521
1.95k
    if (zipFilename == nullptr)
4522
1.20k
        return -1;
4523
4524
754
    {
4525
754
        std::unique_lock oLock(oMutex);
4526
4527
754
        if (oMapZipWriteHandles.find(zipFilename.get()) !=
4528
754
            oMapZipWriteHandles.end())
4529
0
        {
4530
0
            CPLError(CE_Failure, CPLE_AppDefined,
4531
0
                     "Cannot read a zip file being written");
4532
0
            return -1;
4533
0
        }
4534
754
    }
4535
4536
754
    return VSIArchiveFilesystemHandler::Stat(pszFilename, pStatBuf, nFlags);
4537
754
}
4538
4539
/************************************************************************/
4540
/*                           RemoveFromMap()                            */
4541
/************************************************************************/
4542
4543
void VSIZipFilesystemHandler::RemoveFromMap(VSIZipWriteHandle *poHandle)
4544
0
{
4545
0
    std::unique_lock oLock(oMutex);
4546
4547
0
    for (std::map<CPLString, VSIZipWriteHandle *>::iterator iter =
4548
0
             oMapZipWriteHandles.begin();
4549
0
         iter != oMapZipWriteHandles.end(); ++iter)
4550
0
    {
4551
0
        if (iter->second == poHandle)
4552
0
        {
4553
0
            oMapZipWriteHandles.erase(iter);
4554
0
            break;
4555
0
        }
4556
0
    }
4557
0
}
4558
4559
/************************************************************************/
4560
/*                            OpenForWrite()                            */
4561
/************************************************************************/
4562
4563
VSIVirtualHandleUniquePtr
4564
VSIZipFilesystemHandler::OpenForWrite(const char *pszFilename,
4565
                                      const char *pszAccess)
4566
0
{
4567
0
    std::unique_lock oLock(oMutex);
4568
0
    return OpenForWrite_unlocked(pszFilename, pszAccess);
4569
0
}
4570
4571
VSIVirtualHandleUniquePtr
4572
VSIZipFilesystemHandler::OpenForWrite_unlocked(const char *pszFilename,
4573
                                               const char *pszAccess)
4574
0
{
4575
0
    CPLString osZipInFileName;
4576
4577
0
    auto zipFilename =
4578
0
        SplitFilename(pszFilename, osZipInFileName, false, false);
4579
0
    if (zipFilename == nullptr)
4580
0
        return nullptr;
4581
0
    const CPLString osZipFilename = zipFilename.get();
4582
4583
    // Invalidate cached file list.
4584
0
    auto iter = oFileList.find(osZipFilename);
4585
0
    if (iter != oFileList.end())
4586
0
    {
4587
0
        oFileList.erase(iter);
4588
0
    }
4589
4590
0
    auto oIter = oMapZipWriteHandles.find(osZipFilename);
4591
0
    if (oIter != oMapZipWriteHandles.end())
4592
0
    {
4593
0
        if (strchr(pszAccess, '+') != nullptr)
4594
0
        {
4595
0
            CPLError(
4596
0
                CE_Failure, CPLE_AppDefined,
4597
0
                "Random access not supported for writable file in /vsizip");
4598
0
            return nullptr;
4599
0
        }
4600
4601
0
        VSIZipWriteHandle *poZIPHandle = oIter->second;
4602
4603
0
        if (poZIPHandle->GetChildInWriting() != nullptr)
4604
0
        {
4605
0
            CPLError(CE_Failure, CPLE_AppDefined,
4606
0
                     "Cannot create %s while another file is being "
4607
0
                     "written in the .zip",
4608
0
                     osZipInFileName.c_str());
4609
0
            return nullptr;
4610
0
        }
4611
4612
0
        poZIPHandle->StopCurrentFile();
4613
4614
        // Re-add path separator when creating directories.
4615
0
        char chLastChar = pszFilename[strlen(pszFilename) - 1];
4616
0
        if (chLastChar == '/' || chLastChar == '\\')
4617
0
            osZipInFileName += chLastChar;
4618
4619
0
        if (CPLCreateFileInZip(poZIPHandle->GetHandle(), osZipInFileName,
4620
0
                               nullptr) != CE_None)
4621
0
            return nullptr;
4622
4623
0
        auto poChildHandle =
4624
0
            std::make_unique<VSIZipWriteHandle>(this, nullptr, poZIPHandle);
4625
4626
0
        poZIPHandle->StartNewFile(poChildHandle.get());
4627
4628
0
        return VSIVirtualHandleUniquePtr(poChildHandle.release());
4629
0
    }
4630
0
    else
4631
0
    {
4632
0
        char **papszOptions = nullptr;
4633
0
        if ((strchr(pszAccess, '+') && osZipInFileName.empty()) ||
4634
0
            !osZipInFileName.empty())
4635
0
        {
4636
0
            VSIStatBufL sBuf;
4637
0
            if (VSIStatExL(osZipFilename, &sBuf, VSI_STAT_EXISTS_FLAG) == 0)
4638
0
                papszOptions = CSLAddNameValue(papszOptions, "APPEND", "TRUE");
4639
0
        }
4640
4641
0
        void *hZIP = CPLCreateZip(osZipFilename, papszOptions);
4642
0
        CSLDestroy(papszOptions);
4643
4644
0
        if (hZIP == nullptr)
4645
0
            return nullptr;
4646
4647
0
        auto poHandle = new VSIZipWriteHandle(this, hZIP, nullptr);
4648
0
        oMapZipWriteHandles[osZipFilename] = poHandle;
4649
4650
0
        if (!osZipInFileName.empty())
4651
0
        {
4652
0
            auto poRes = std::unique_ptr<VSIZipWriteHandle>(
4653
0
                cpl::down_cast<VSIZipWriteHandle *>(
4654
0
                    OpenForWrite_unlocked(pszFilename, pszAccess).release()));
4655
0
            if (poRes == nullptr)
4656
0
            {
4657
0
                delete poHandle;
4658
0
                oMapZipWriteHandles.erase(osZipFilename);
4659
0
                return nullptr;
4660
0
            }
4661
4662
0
            poRes->SetAutoDeleteParent();
4663
4664
0
            return VSIVirtualHandleUniquePtr(poRes.release());
4665
0
        }
4666
4667
0
        return VSIVirtualHandleUniquePtr(poHandle);
4668
0
    }
4669
0
}
4670
4671
/************************************************************************/
4672
/*                             GetOptions()                             */
4673
/************************************************************************/
4674
4675
const char *VSIZipFilesystemHandler::GetOptions()
4676
0
{
4677
0
    return "<Options>"
4678
0
           "  <Option name='GDAL_NUM_THREADS' type='string' "
4679
0
           "description='Number of threads for compression. Either a integer "
4680
0
           "or ALL_CPUS'/>"
4681
0
           "  <Option name='CPL_VSIL_DEFLATE_CHUNK_SIZE' type='string' "
4682
0
           "description='Chunk of uncompressed data for parallelization. "
4683
0
           "Use K(ilobytes) or M(egabytes) suffix' default='1M'/>"
4684
0
           "</Options>";
4685
0
}
4686
4687
/************************************************************************/
4688
/*                              CopyFile()                              */
4689
/************************************************************************/
4690
4691
int VSIZipFilesystemHandler::CopyFile(const char *pszSource,
4692
                                      const char *pszTarget, VSILFILE *fpSource,
4693
                                      vsi_l_offset /* nSourceSize */,
4694
                                      CSLConstList papszOptions,
4695
                                      GDALProgressFunc pProgressFunc,
4696
                                      void *pProgressData)
4697
0
{
4698
0
    CPLString osZipInFileName;
4699
4700
0
    auto zipFilename = SplitFilename(pszTarget, osZipInFileName, false, false);
4701
0
    if (zipFilename == nullptr)
4702
0
        return -1;
4703
0
    const CPLString osZipFilename = zipFilename.get();
4704
0
    if (osZipInFileName.empty())
4705
0
    {
4706
0
        CPLError(CE_Failure, CPLE_AppDefined,
4707
0
                 "Target filename should be of the form "
4708
0
                 "/vsizip/path_to.zip/filename_within_zip");
4709
0
        return -1;
4710
0
    }
4711
4712
    // Invalidate cached file list.
4713
0
    auto oIterFileList = oFileList.find(osZipFilename);
4714
0
    if (oIterFileList != oFileList.end())
4715
0
    {
4716
0
        oFileList.erase(oIterFileList);
4717
0
    }
4718
4719
0
    const auto oIter = oMapZipWriteHandles.find(osZipFilename);
4720
0
    if (oIter != oMapZipWriteHandles.end())
4721
0
    {
4722
0
        VSIZipWriteHandle *poZIPHandle = oIter->second;
4723
4724
0
        if (poZIPHandle->GetChildInWriting() != nullptr)
4725
0
        {
4726
0
            CPLError(CE_Failure, CPLE_AppDefined,
4727
0
                     "Cannot create %s while another file is being "
4728
0
                     "written in the .zip",
4729
0
                     osZipInFileName.c_str());
4730
0
            return -1;
4731
0
        }
4732
4733
0
        if (CPLAddFileInZip(poZIPHandle->GetHandle(), osZipInFileName.c_str(),
4734
0
                            pszSource, fpSource, papszOptions, pProgressFunc,
4735
0
                            pProgressData) != CE_None)
4736
0
        {
4737
0
            return -1;
4738
0
        }
4739
0
        return 0;
4740
0
    }
4741
0
    else
4742
0
    {
4743
0
        CPLStringList aosOptionsCreateZip;
4744
0
        VSIStatBufL sBuf;
4745
0
        if (VSIStatExL(osZipFilename, &sBuf, VSI_STAT_EXISTS_FLAG) == 0)
4746
0
            aosOptionsCreateZip.SetNameValue("APPEND", "TRUE");
4747
4748
0
        void *hZIP = CPLCreateZip(osZipFilename, aosOptionsCreateZip.List());
4749
4750
0
        if (hZIP == nullptr)
4751
0
            return -1;
4752
4753
0
        if (CPLAddFileInZip(hZIP, osZipInFileName.c_str(), pszSource, fpSource,
4754
0
                            papszOptions, pProgressFunc,
4755
0
                            pProgressData) != CE_None)
4756
0
        {
4757
0
            CPLCloseZip(hZIP);
4758
0
            return -1;
4759
0
        }
4760
0
        CPLCloseZip(hZIP);
4761
0
        return 0;
4762
0
    }
4763
0
}
4764
4765
/************************************************************************/
4766
/*                         VSIZipWriteHandle()                          */
4767
/************************************************************************/
4768
4769
VSIZipWriteHandle::VSIZipWriteHandle(VSIZipFilesystemHandler *poFS, void *hZIP,
4770
                                     VSIZipWriteHandle *poParent)
4771
0
    : m_poFS(poFS), m_hZIP(hZIP), m_poParent(poParent)
4772
0
{
4773
0
}
4774
4775
/************************************************************************/
4776
/*                         ~VSIZipWriteHandle()                         */
4777
/************************************************************************/
4778
4779
VSIZipWriteHandle::~VSIZipWriteHandle()
4780
0
{
4781
0
    VSIZipWriteHandle::Close();
4782
0
}
4783
4784
/************************************************************************/
4785
/*                                Seek()                                */
4786
/************************************************************************/
4787
4788
int VSIZipWriteHandle::Seek(vsi_l_offset nOffset, int nWhence)
4789
0
{
4790
0
    if (nOffset == 0 && (nWhence == SEEK_END || nWhence == SEEK_CUR))
4791
0
        return 0;
4792
0
    if (nOffset == nCurOffset && nWhence == SEEK_SET)
4793
0
        return 0;
4794
4795
0
    CPLError(CE_Failure, CPLE_NotSupported,
4796
0
             "VSIFSeekL() is not supported on writable Zip files");
4797
0
    return -1;
4798
0
}
4799
4800
/************************************************************************/
4801
/*                                Tell()                                */
4802
/************************************************************************/
4803
4804
vsi_l_offset VSIZipWriteHandle::Tell()
4805
0
{
4806
0
    return nCurOffset;
4807
0
}
4808
4809
/************************************************************************/
4810
/*                                Read()                                */
4811
/************************************************************************/
4812
4813
size_t VSIZipWriteHandle::Read(void * /* pBuffer */, size_t /* nBytes */)
4814
0
{
4815
0
    CPLError(CE_Failure, CPLE_NotSupported,
4816
0
             "VSIFReadL() is not supported on writable Zip files");
4817
0
    return 0;
4818
0
}
4819
4820
/************************************************************************/
4821
/*                               Write()                                */
4822
/************************************************************************/
4823
4824
size_t VSIZipWriteHandle::Write(const void *pBuffer, size_t const nBytesToWrite)
4825
0
{
4826
0
    if (m_poParent == nullptr)
4827
0
    {
4828
0
        CPLError(CE_Failure, CPLE_NotSupported,
4829
0
                 "VSIFWriteL() is not supported on "
4830
0
                 "main Zip file or closed subfiles");
4831
0
        return 0;
4832
0
    }
4833
4834
0
    const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
4835
0
    size_t nWritten = 0;
4836
0
    while (nWritten < nBytesToWrite)
4837
0
    {
4838
0
        int nToWrite = static_cast<int>(
4839
0
            std::min(static_cast<size_t>(INT_MAX), nBytesToWrite));
4840
0
        if (CPLWriteFileInZip(m_poParent->m_hZIP, pabyBuffer, nToWrite) !=
4841
0
            CE_None)
4842
0
            return 0;
4843
0
        nWritten += nToWrite;
4844
0
        pabyBuffer += nToWrite;
4845
0
    }
4846
4847
0
    nCurOffset += nBytesToWrite;
4848
4849
0
    return nBytesToWrite;
4850
0
}
4851
4852
/************************************************************************/
4853
/*                               Flush()                                */
4854
/************************************************************************/
4855
4856
int VSIZipWriteHandle::Flush()
4857
0
{
4858
    /*CPLError(CE_Failure, CPLE_NotSupported,
4859
             "VSIFFlushL() is not supported on writable Zip files");*/
4860
0
    return 0;
4861
0
}
4862
4863
/************************************************************************/
4864
/*                               Close()                                */
4865
/************************************************************************/
4866
4867
int VSIZipWriteHandle::Close()
4868
0
{
4869
0
    int nRet = 0;
4870
0
    if (m_poParent)
4871
0
    {
4872
0
        CPLCloseFileInZip(m_poParent->m_hZIP);
4873
0
        m_poParent->poChildInWriting = nullptr;
4874
0
        if (bAutoDeleteParent)
4875
0
        {
4876
0
            if (m_poParent->Close() != 0)
4877
0
                nRet = -1;
4878
0
            delete m_poParent;
4879
0
        }
4880
0
        m_poParent = nullptr;
4881
0
    }
4882
0
    if (poChildInWriting)
4883
0
    {
4884
0
        if (poChildInWriting->Close() != 0)
4885
0
            nRet = -1;
4886
0
        poChildInWriting = nullptr;
4887
0
    }
4888
0
    if (m_hZIP)
4889
0
    {
4890
0
        if (CPLCloseZip(m_hZIP) != CE_None)
4891
0
            nRet = -1;
4892
0
        m_hZIP = nullptr;
4893
4894
0
        m_poFS->RemoveFromMap(this);
4895
0
    }
4896
4897
0
    return nRet;
4898
0
}
4899
4900
/************************************************************************/
4901
/*                          StopCurrentFile()                           */
4902
/************************************************************************/
4903
4904
void VSIZipWriteHandle::StopCurrentFile()
4905
0
{
4906
0
    if (poChildInWriting)
4907
0
        poChildInWriting->Close();
4908
0
    poChildInWriting = nullptr;
4909
0
}
4910
4911
/************************************************************************/
4912
/*                            StartNewFile()                            */
4913
/************************************************************************/
4914
4915
void VSIZipWriteHandle::StartNewFile(VSIZipWriteHandle *poSubFile)
4916
0
{
4917
0
    poChildInWriting = poSubFile;
4918
0
}
4919
4920
//! @endcond
4921
4922
/************************************************************************/
4923
/*                      VSIInstallZipFileHandler()                      */
4924
/************************************************************************/
4925
4926
/*!
4927
 \brief Install ZIP file system handler.
4928
4929
 A special file handler is installed that allows reading on-the-fly in ZIP
4930
 (.zip) archives.
4931
4932
 All portions of the file system underneath the base path "/vsizip/" will be
4933
 handled by this driver.
4934
4935
 \verbatim embed:rst
4936
 See :ref:`/vsizip/ documentation <vsizip>`
4937
 \endverbatim
4938
4939
 */
4940
4941
void VSIInstallZipFileHandler()
4942
3
{
4943
3
    VSIFileManager::InstallHandler("/vsizip/",
4944
3
                                   std::make_shared<VSIZipFilesystemHandler>());
4945
3
}
4946
4947
/************************************************************************/
4948
/*                           CPLZLibDeflate()                           */
4949
/************************************************************************/
4950
4951
/**
4952
 * \brief Compress a buffer with ZLib compression.
4953
 *
4954
 * @param ptr input buffer.
4955
 * @param nBytes size of input buffer in bytes.
4956
 * @param nLevel ZLib compression level (-1 for default).
4957
 * @param outptr output buffer, or NULL to let the function allocate it.
4958
 * @param nOutAvailableBytes size of output buffer if provided, or ignored.
4959
 * @param pnOutBytes pointer to a size_t, where to store the size of the
4960
 *                   output buffer.
4961
 *
4962
 * @return the output buffer (to be freed with VSIFree() if not provided)
4963
 *         or NULL in case of error.
4964
 *
4965
 */
4966
4967
void *CPLZLibDeflate(const void *ptr, size_t nBytes, int nLevel, void *outptr,
4968
                     size_t nOutAvailableBytes, size_t *pnOutBytes)
4969
0
{
4970
0
    if (pnOutBytes != nullptr)
4971
0
        *pnOutBytes = 0;
4972
4973
0
    size_t nTmpSize = 0;
4974
0
    void *pTmp;
4975
#ifdef HAVE_LIBDEFLATE
4976
    struct libdeflate_compressor *enc =
4977
        libdeflate_alloc_compressor(nLevel < 0 ? 7 : nLevel);
4978
    if (enc == nullptr)
4979
    {
4980
        return nullptr;
4981
    }
4982
#endif
4983
0
    if (outptr == nullptr)
4984
0
    {
4985
#ifdef HAVE_LIBDEFLATE
4986
        nTmpSize = libdeflate_zlib_compress_bound(enc, nBytes);
4987
#else
4988
0
        nTmpSize = 32 + nBytes * 2;
4989
0
#endif
4990
0
        pTmp = VSIMalloc(nTmpSize);
4991
0
        if (pTmp == nullptr)
4992
0
        {
4993
#ifdef HAVE_LIBDEFLATE
4994
            libdeflate_free_compressor(enc);
4995
#endif
4996
0
            return nullptr;
4997
0
        }
4998
0
    }
4999
0
    else
5000
0
    {
5001
0
        pTmp = outptr;
5002
0
        nTmpSize = nOutAvailableBytes;
5003
0
    }
5004
5005
#ifdef HAVE_LIBDEFLATE
5006
    size_t nCompressedBytes =
5007
        libdeflate_zlib_compress(enc, ptr, nBytes, pTmp, nTmpSize);
5008
    libdeflate_free_compressor(enc);
5009
    if (nCompressedBytes == 0)
5010
    {
5011
        if (pTmp != outptr)
5012
            VSIFree(pTmp);
5013
        return nullptr;
5014
    }
5015
    if (pnOutBytes != nullptr)
5016
        *pnOutBytes = nCompressedBytes;
5017
#else
5018
0
    z_stream strm;
5019
0
    strm.zalloc = nullptr;
5020
0
    strm.zfree = nullptr;
5021
0
    strm.opaque = nullptr;
5022
0
    int ret = deflateInit(&strm, nLevel < 0 ? Z_DEFAULT_COMPRESSION : nLevel);
5023
0
    if (ret != Z_OK)
5024
0
    {
5025
0
        if (pTmp != outptr)
5026
0
            VSIFree(pTmp);
5027
0
        return nullptr;
5028
0
    }
5029
5030
0
    strm.avail_in = static_cast<uInt>(nBytes);
5031
0
    strm.next_in = reinterpret_cast<Bytef *>(const_cast<void *>(ptr));
5032
0
    strm.avail_out = static_cast<uInt>(nTmpSize);
5033
0
    strm.next_out = reinterpret_cast<Bytef *>(pTmp);
5034
0
    ret = deflate(&strm, Z_FINISH);
5035
0
    if (ret != Z_STREAM_END)
5036
0
    {
5037
0
        if (pTmp != outptr)
5038
0
            VSIFree(pTmp);
5039
0
        return nullptr;
5040
0
    }
5041
0
    if (pnOutBytes != nullptr)
5042
0
        *pnOutBytes = nTmpSize - strm.avail_out;
5043
0
    deflateEnd(&strm);
5044
0
#endif
5045
5046
0
    return pTmp;
5047
0
}
5048
5049
/************************************************************************/
5050
/*                           CPLZLibInflate()                           */
5051
/************************************************************************/
5052
5053
/**
5054
 * \brief Uncompress a buffer compressed with ZLib compression.
5055
 *
5056
 * @param ptr input buffer.
5057
 * @param nBytes size of input buffer in bytes.
5058
 * @param outptr output buffer, or NULL to let the function allocate it.
5059
 * @param nOutAvailableBytes size of output buffer if provided, or ignored.
5060
 * @param pnOutBytes pointer to a size_t, where to store the size of the
5061
 *                   output buffer.
5062
 *
5063
 * @return the output buffer (to be freed with VSIFree() if not provided)
5064
 *         or NULL in case of error.
5065
 *
5066
 */
5067
5068
void *CPLZLibInflate(const void *ptr, size_t nBytes, void *outptr,
5069
                     size_t nOutAvailableBytes, size_t *pnOutBytes)
5070
0
{
5071
0
    return CPLZLibInflateEx(ptr, nBytes, outptr, nOutAvailableBytes, false,
5072
0
                            pnOutBytes);
5073
0
}
5074
5075
/************************************************************************/
5076
/*                          CPLZLibInflateEx()                          */
5077
/************************************************************************/
5078
5079
/**
5080
 * \brief Uncompress a buffer compressed with ZLib compression.
5081
 *
5082
 * @param ptr input buffer.
5083
 * @param nBytes size of input buffer in bytes.
5084
 * @param outptr output buffer, or NULL to let the function allocate it.
5085
 * @param nOutAvailableBytes size of output buffer if provided, or ignored.
5086
 * @param bAllowResizeOutptr whether the function is allowed to grow outptr
5087
 *                           (using VSIRealloc) if its initial capacity
5088
 *                           provided by nOutAvailableBytes is not
5089
 *                           large enough. Ignored if outptr is NULL.
5090
 * @param pnOutBytes pointer to a size_t, where to store the size of the
5091
 *                   output buffer.
5092
 *
5093
 * @return the output buffer (to be freed with VSIFree() if not provided)
5094
 *         or NULL in case of error. If bAllowResizeOutptr is set to true,
5095
 *         only the returned pointer should be freed by the caller, as outptr
5096
 *         might have been reallocated or freed.
5097
 *
5098
 * @since GDAL 3.9.0
5099
 */
5100
5101
void *CPLZLibInflateEx(const void *ptr, size_t nBytes, void *outptr,
5102
                       size_t nOutAvailableBytes, bool bAllowResizeOutptr,
5103
                       size_t *pnOutBytes)
5104
0
{
5105
0
    if (pnOutBytes != nullptr)
5106
0
        *pnOutBytes = 0;
5107
0
    char *pszReallocatableBuf = nullptr;
5108
5109
#ifdef HAVE_LIBDEFLATE
5110
    if (outptr)
5111
    {
5112
        struct libdeflate_decompressor *dec = libdeflate_alloc_decompressor();
5113
        if (dec == nullptr)
5114
        {
5115
            if (bAllowResizeOutptr)
5116
                VSIFree(outptr);
5117
            return nullptr;
5118
        }
5119
        enum libdeflate_result res;
5120
        size_t nOutBytes = 0;
5121
        if (nBytes > 2 && static_cast<const GByte *>(ptr)[0] == 0x1F &&
5122
            static_cast<const GByte *>(ptr)[1] == 0x8B)
5123
        {
5124
            res = libdeflate_gzip_decompress(dec, ptr, nBytes, outptr,
5125
                                             nOutAvailableBytes, &nOutBytes);
5126
        }
5127
        else
5128
        {
5129
            res = libdeflate_zlib_decompress(dec, ptr, nBytes, outptr,
5130
                                             nOutAvailableBytes, &nOutBytes);
5131
        }
5132
        if (pnOutBytes)
5133
            *pnOutBytes = nOutBytes;
5134
        libdeflate_free_decompressor(dec);
5135
        if (res == LIBDEFLATE_INSUFFICIENT_SPACE && bAllowResizeOutptr)
5136
        {
5137
            if (nOutAvailableBytes >
5138
                (std::numeric_limits<size_t>::max() - 1) / 2)
5139
            {
5140
                VSIFree(outptr);
5141
                return nullptr;
5142
            }
5143
            size_t nOutBufSize = nOutAvailableBytes * 2;
5144
            pszReallocatableBuf = static_cast<char *>(
5145
                VSI_REALLOC_VERBOSE(outptr, nOutBufSize + 1));
5146
            if (!pszReallocatableBuf)
5147
            {
5148
                VSIFree(outptr);
5149
                return nullptr;
5150
            }
5151
            outptr = nullptr;
5152
            nOutAvailableBytes = nOutBufSize;
5153
        }
5154
        else if (res != LIBDEFLATE_SUCCESS)
5155
        {
5156
            if (bAllowResizeOutptr)
5157
                VSIFree(outptr);
5158
            return nullptr;
5159
        }
5160
        else
5161
        {
5162
            // Nul-terminate if possible.
5163
            if (nOutBytes < nOutAvailableBytes)
5164
            {
5165
                static_cast<char *>(outptr)[nOutBytes] = '\0';
5166
            }
5167
            return outptr;
5168
        }
5169
    }
5170
#endif
5171
5172
0
    z_stream strm;
5173
0
    memset(&strm, 0, sizeof(strm));
5174
0
    strm.zalloc = nullptr;
5175
0
    strm.zfree = nullptr;
5176
0
    strm.opaque = nullptr;
5177
0
    int ret;
5178
    // MAX_WBITS + 32 mode which detects automatically gzip vs zlib
5179
    // encapsulation seems to be broken with
5180
    // /opt/intel/oneapi/intelpython/latest/lib/libz.so.1 from
5181
    // intel/oneapi-basekit Docker image
5182
0
    if (nBytes > 2 && static_cast<const GByte *>(ptr)[0] == 0x1F &&
5183
0
        static_cast<const GByte *>(ptr)[1] == 0x8B)
5184
0
    {
5185
0
        ret = inflateInit2(&strm, MAX_WBITS + 16);  // gzip
5186
0
    }
5187
0
    else
5188
0
    {
5189
0
        ret = inflateInit2(&strm, MAX_WBITS);  // zlib
5190
0
    }
5191
0
    if (ret != Z_OK)
5192
0
    {
5193
0
        if (bAllowResizeOutptr)
5194
0
            VSIFree(outptr);
5195
0
        VSIFree(pszReallocatableBuf);
5196
0
        return nullptr;
5197
0
    }
5198
5199
0
    size_t nOutBufSize = 0;
5200
0
    char *pszOutBuf = nullptr;
5201
5202
#ifdef HAVE_LIBDEFLATE
5203
    if (pszReallocatableBuf)
5204
    {
5205
        pszOutBuf = pszReallocatableBuf;
5206
        nOutBufSize = nOutAvailableBytes;
5207
    }
5208
    else
5209
#endif
5210
0
        if (!outptr)
5211
0
    {
5212
0
        if (nBytes > (std::numeric_limits<size_t>::max() - 1) / 2)
5213
0
        {
5214
0
            inflateEnd(&strm);
5215
0
            return nullptr;
5216
0
        }
5217
0
        nOutBufSize = 2 * nBytes + 1;
5218
0
        pszOutBuf = static_cast<char *>(VSI_MALLOC_VERBOSE(nOutBufSize));
5219
0
        if (pszOutBuf == nullptr)
5220
0
        {
5221
0
            inflateEnd(&strm);
5222
0
            return nullptr;
5223
0
        }
5224
0
        pszReallocatableBuf = pszOutBuf;
5225
0
        bAllowResizeOutptr = true;
5226
0
    }
5227
0
#ifndef HAVE_LIBDEFLATE
5228
0
    else
5229
0
    {
5230
0
        pszOutBuf = static_cast<char *>(outptr);
5231
0
        nOutBufSize = nOutAvailableBytes;
5232
0
        if (bAllowResizeOutptr)
5233
0
            pszReallocatableBuf = pszOutBuf;
5234
0
    }
5235
0
#endif
5236
5237
0
    strm.next_in = static_cast<Bytef *>(const_cast<void *>(ptr));
5238
0
    strm.next_out = reinterpret_cast<Bytef *>(pszOutBuf);
5239
0
    size_t nInBytesRemaining = nBytes;
5240
0
    size_t nOutBytesRemaining = nOutBufSize;
5241
5242
0
    while (true)
5243
0
    {
5244
0
        strm.avail_in = static_cast<uInt>(std::min<size_t>(
5245
0
            nInBytesRemaining, std::numeric_limits<uInt>::max()));
5246
0
        const auto avail_in_before = strm.avail_in;
5247
0
        strm.avail_out = static_cast<uInt>(std::min<size_t>(
5248
0
            nOutBytesRemaining, std::numeric_limits<uInt>::max()));
5249
0
        const auto avail_out_before = strm.avail_out;
5250
0
        ret = inflate(&strm, Z_FINISH);
5251
0
        nInBytesRemaining -= (avail_in_before - strm.avail_in);
5252
0
        nOutBytesRemaining -= (avail_out_before - strm.avail_out);
5253
5254
0
        if (ret == Z_BUF_ERROR && strm.avail_out == 0)
5255
0
        {
5256
#ifdef HAVE_LIBDEFLATE
5257
            CPLAssert(bAllowResizeOutptr);
5258
#else
5259
0
            if (!bAllowResizeOutptr)
5260
0
            {
5261
0
                VSIFree(pszReallocatableBuf);
5262
0
                inflateEnd(&strm);
5263
0
                return nullptr;
5264
0
            }
5265
0
#endif
5266
5267
0
            const size_t nAlreadyWritten = nOutBufSize - nOutBytesRemaining;
5268
0
            if (nOutBufSize > (std::numeric_limits<size_t>::max() - 1) / 2)
5269
0
            {
5270
0
                VSIFree(pszReallocatableBuf);
5271
0
                inflateEnd(&strm);
5272
0
                return nullptr;
5273
0
            }
5274
0
            nOutBufSize = nOutBufSize * 2 + 1;
5275
0
            char *pszNew = static_cast<char *>(
5276
0
                VSI_REALLOC_VERBOSE(pszReallocatableBuf, nOutBufSize));
5277
0
            if (!pszNew)
5278
0
            {
5279
0
                VSIFree(pszReallocatableBuf);
5280
0
                inflateEnd(&strm);
5281
0
                return nullptr;
5282
0
            }
5283
0
            pszOutBuf = pszNew;
5284
0
            pszReallocatableBuf = pszOutBuf;
5285
0
            nOutBytesRemaining = nOutBufSize - nAlreadyWritten;
5286
0
            strm.next_out =
5287
0
                reinterpret_cast<Bytef *>(pszOutBuf + nAlreadyWritten);
5288
0
        }
5289
0
        else if (ret != Z_OK || nInBytesRemaining == 0)
5290
0
            break;
5291
0
    }
5292
5293
0
    if (ret == Z_OK || ret == Z_STREAM_END)
5294
0
    {
5295
0
        size_t nOutBytes = nOutBufSize - nOutBytesRemaining;
5296
        // Nul-terminate if possible.
5297
0
        if (nOutBytes < nOutBufSize)
5298
0
        {
5299
0
            pszOutBuf[nOutBytes] = '\0';
5300
0
        }
5301
0
        inflateEnd(&strm);
5302
0
        if (pnOutBytes != nullptr)
5303
0
            *pnOutBytes = nOutBytes;
5304
0
        return pszOutBuf;
5305
0
    }
5306
0
    else
5307
0
    {
5308
0
        VSIFree(pszReallocatableBuf);
5309
0
        inflateEnd(&strm);
5310
0
        return nullptr;
5311
0
    }
5312
0
}