Coverage Report

Created: 2026-04-01 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/port/cpl_vsil_gzip.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  Implement VSI large file api for gz/zip files (.gz and .zip).
5
 * Author:   Even Rouault, even.rouault at spatialys.com
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2008-2014, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
//! @cond Doxygen_Suppress
14
15
/* gzio.c -- IO on .gz files
16
  Copyright (C) 1995-2005 Jean-loup Gailly.
17
18
  This software is provided 'as-is', without any express or implied
19
  warranty.  In no event will the authors be held liable for any damages
20
  arising from the use of this software.
21
22
  Permission is granted to anyone to use this software for any purpose,
23
  including commercial applications, and to alter it and redistribute it
24
  freely, subject to the following restrictions:
25
26
  1. The origin of this software must not be misrepresented; you must not
27
     claim that you wrote the original software. If you use this software
28
     in a product, an acknowledgment in the product documentation would be
29
     appreciated but is not required.
30
  2. Altered source versions must be plainly marked as such, and must not be
31
     misrepresented as being the original software.
32
  3. This notice may not be removed or altered from any source distribution.
33
34
  Jean-loup Gailly        Mark Adler
35
  jloup@gzip.org          madler@alumni.caltech.edu
36
37
  The data format used by the zlib library is described by RFCs (Request for
38
  Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
39
  (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
40
*/
41
42
/* This file contains a refactoring of gzio.c from zlib project.
43
44
   It replaces classical calls operating on FILE* by calls to the VSI large file
45
   API. It also adds the capability to seek at the end of the file, which is not
46
   implemented in original gzSeek. It also implements a concept of in-memory
47
   "snapshots", that are a way of improving efficiency while seeking GZip
48
   files. Snapshots are created regularly when decompressing the data a snapshot
49
   of the gzip state.  Later we can seek directly in the compressed data to the
50
   closest snapshot in order to reduce the amount of data to uncompress again.
51
52
   For .gz files, an effort is done to cache the size of the uncompressed data
53
   in a .gz.properties file, so that we don't need to seek at the end of the
54
   file each time a Stat() is done.
55
56
   For .zip and .gz, both reading and writing are supported, but just one mode
57
   at a time (read-only or write-only).
58
*/
59
60
#include "cpl_port.h"
61
#include "cpl_conv.h"
62
#include "cpl_vsi.h"
63
64
#include <cerrno>
65
#include <cinttypes>
66
#include <climits>
67
#include <cstddef>
68
#include <cstdio>
69
#include <cstdlib>
70
#include <cstring>
71
#include <ctime>
72
73
#include <fcntl.h>
74
75
#include "cpl_zlib_header.h"  // to avoid warnings when including zlib.h
76
77
#ifdef HAVE_LIBDEFLATE
78
#include "libdeflate.h"
79
#endif
80
81
#include <algorithm>
82
#include <iterator>
83
#include <limits>
84
#include <list>
85
#include <map>
86
#include <memory>
87
#include <mutex>
88
#include <string>
89
#include <utility>
90
#include <vector>
91
92
#include "cpl_error.h"
93
#include "cpl_minizip_ioapi.h"
94
#include "cpl_minizip_unzip.h"
95
#include "cpl_multiproc.h"
96
#include "cpl_string.h"
97
#include "cpl_time.h"
98
#include "cpl_vsi_virtual.h"
99
#include "cpl_worker_thread_pool.h"
100
#include "../gcore/gdal_thread_pool.h"
101
102
constexpr int Z_BUFSIZE = 65536;           // Original size is 16384
103
constexpr int gz_magic[2] = {0x1f, 0x8b};  // gzip magic header
104
105
// gzip flag byte.
106
#define ASCII_FLAG 0x01   // bit 0 set: file probably ascii text
107
0
#define HEAD_CRC 0x02     // bit 1 set: header CRC present
108
0
#define EXTRA_FIELD 0x04  // bit 2 set: extra field present
109
0
#define ORIG_NAME 0x08    // bit 3 set: original file name present
110
0
#define COMMENT 0x10      // bit 4 set: file comment present
111
0
#define RESERVED 0xE0     // bits 5..7: reserved
112
113
0
#define ALLOC(size) malloc(size)
114
#define TRYFREE(p)                                                             \
115
0
    {                                                                          \
116
0
        if (p)                                                                 \
117
0
            free(p);                                                           \
118
0
    }
119
120
#define CPL_VSIL_GZ_RETURN(ret)                                                \
121
0
    CPLError(CE_Failure, CPLE_AppDefined, "In file %s, at line %d, return %d", \
122
0
             __FILE__, __LINE__, ret)
123
124
// To avoid aliasing to CopyFile to CopyFileA on Windows
125
#ifdef CopyFile
126
#undef CopyFile
127
#endif
128
129
// #define ENABLE_DEBUG 1
130
131
/************************************************************************/
132
/* ==================================================================== */
133
/*                       VSIGZipHandle                                  */
134
/* ==================================================================== */
135
/************************************************************************/
136
137
typedef struct
138
{
139
    vsi_l_offset posInBaseHandle;
140
    z_stream stream;
141
    uLong crc;
142
    int transparent;
143
    vsi_l_offset in;
144
    vsi_l_offset out;
145
} GZipSnapshot;
146
147
class VSIGZipHandle final : public VSIVirtualHandle
148
{
149
    VSIVirtualHandleUniquePtr m_poBaseHandle{};
150
#ifdef DEBUG
151
    vsi_l_offset m_offset = 0;
152
#endif
153
    vsi_l_offset m_compressed_size = 0;
154
    vsi_l_offset m_uncompressed_size = 0;
155
    vsi_l_offset offsetEndCompressedData = 0;
156
    uLong m_expected_crc = 0;
157
    char *m_pszBaseFileName = nullptr; /* optional */
158
    bool m_bWriteProperties = false;
159
    bool m_bCanSaveInfo = false;
160
161
    /* Fields from gz_stream structure */
162
    z_stream stream;
163
    int z_err = Z_OK;    /* error code for last stream operation */
164
    int z_eof = 0;       /* set if end of input file (but not necessarily of the
165
                         uncompressed stream !) */
166
    bool m_bEOF = false; /* EOF flag for uncompressed stream */
167
    Byte *inbuf = nullptr;  /* input buffer */
168
    Byte *outbuf = nullptr; /* output buffer */
169
    uLong crc = 0;          /* crc32 of uncompressed data */
170
    int m_transparent = 0;  /* 1 if input file is not a .gz file */
171
    vsi_l_offset startOff =
172
        0; /* startOff of compressed data in file (header skipped) */
173
    vsi_l_offset in = 0;  /* bytes into deflate or inflate */
174
    vsi_l_offset out = 0; /* bytes out of deflate or inflate */
175
    vsi_l_offset m_nLastReadOffset = 0;
176
177
    GZipSnapshot *snapshots = nullptr;
178
    vsi_l_offset snapshot_byte_interval =
179
        0; /* number of compressed bytes at which we create a "snapshot" */
180
181
    void check_header();
182
    int get_byte();
183
    bool gzseek(vsi_l_offset nOffset, int nWhence);
184
    int gzrewind();
185
    uLong getLong();
186
187
    CPL_DISALLOW_COPY_ASSIGN(VSIGZipHandle)
188
189
  public:
190
    VSIGZipHandle(VSIVirtualHandleUniquePtr poBaseHandleIn,
191
                  const char *pszBaseFileName, vsi_l_offset offset = 0,
192
                  vsi_l_offset compressed_size = 0,
193
                  vsi_l_offset uncompressed_size = 0, uLong expected_crc = 0,
194
                  int transparent = 0);
195
    ~VSIGZipHandle() override;
196
197
    bool IsInitOK() const
198
0
    {
199
0
        return inbuf != nullptr;
200
0
    }
201
202
    int Seek(vsi_l_offset nOffset, int nWhence) override;
203
    vsi_l_offset Tell() override;
204
    size_t Read(void *pBuffer, size_t nBytes) override;
205
    size_t Write(const void *pBuffer, size_t nBytes) override;
206
    void ClearErr() override;
207
    int Eof() override;
208
    int Error() override;
209
    int Flush() override;
210
    int Close() override;
211
212
    VSIGZipHandle *Duplicate();
213
    bool CloseBaseHandle();
214
215
    vsi_l_offset GetLastReadOffset()
216
0
    {
217
0
        return m_nLastReadOffset;
218
0
    }
219
220
    const char *GetBaseFileName()
221
0
    {
222
0
        return m_pszBaseFileName;
223
0
    }
224
225
    void SetUncompressedSize(vsi_l_offset nUncompressedSize)
226
0
    {
227
0
        m_uncompressed_size = nUncompressedSize;
228
0
    }
229
230
    vsi_l_offset GetUncompressedSize()
231
0
    {
232
0
        return m_uncompressed_size;
233
0
    }
234
235
    void SaveInfo_unlocked();
236
237
    void UnsetCanSaveInfo()
238
0
    {
239
0
        m_bCanSaveInfo = false;
240
0
    }
241
};
242
243
#ifdef ENABLE_DEFLATE64
244
245
/************************************************************************/
246
/* ==================================================================== */
247
/*                           VSIDeflate64Handle                         */
248
/* ==================================================================== */
249
/************************************************************************/
250
251
struct VSIDeflate64Snapshot
252
{
253
    vsi_l_offset posInBaseHandle = 0;
254
    z_stream stream{};
255
    uLong crc = 0;
256
    vsi_l_offset in = 0;
257
    vsi_l_offset out = 0;
258
    std::vector<GByte> extraOutput{};
259
    bool m_bStreamEndReached = false;
260
};
261
262
class VSIDeflate64Handle final : public VSIVirtualHandle
263
{
264
    VSIVirtualHandleUniquePtr m_poBaseHandle{};
265
#ifdef DEBUG
266
    vsi_l_offset m_offset = 0;
267
#endif
268
    vsi_l_offset m_compressed_size = 0;
269
    vsi_l_offset m_uncompressed_size = 0;
270
    vsi_l_offset offsetEndCompressedData = 0;
271
    uLong m_expected_crc = 0;
272
    char *m_pszBaseFileName = nullptr; /* optional */
273
274
    /* Fields from gz_stream structure */
275
    z_stream stream;
276
    int z_err = Z_OK;    /* error code for last stream operation */
277
    int z_eof = 0;       /* set if end of input file (but not necessarily of the
278
                         uncompressed stream ! ) */
279
    bool m_bEOF = false; /* EOF flag for uncompressed stream */
280
    Byte *inbuf = nullptr;  /* input buffer */
281
    Byte *outbuf = nullptr; /* output buffer */
282
    std::vector<GByte> extraOutput{};
283
    bool m_bStreamEndReached = false;
284
    uLong crc = 0; /* crc32 of uncompressed data */
285
    vsi_l_offset startOff =
286
        0; /* startOff of compressed data in file (header skipped) */
287
    vsi_l_offset in = 0;  /* bytes into deflate or inflate */
288
    vsi_l_offset out = 0; /* bytes out of deflate or inflate */
289
290
    std::vector<VSIDeflate64Snapshot> snapshots{};
291
    vsi_l_offset snapshot_byte_interval =
292
        0; /* number of compressed bytes at which we create a "snapshot" */
293
294
    bool gzseek(vsi_l_offset nOffset, int nWhence);
295
    int gzrewind();
296
297
    CPL_DISALLOW_COPY_ASSIGN(VSIDeflate64Handle)
298
299
  public:
300
    VSIDeflate64Handle(VSIVirtualHandleUniquePtr poBaseHandleIn,
301
                       const char *pszBaseFileName, vsi_l_offset offset = 0,
302
                       vsi_l_offset compressed_size = 0,
303
                       vsi_l_offset uncompressed_size = 0,
304
                       uLong expected_crc = 0);
305
    ~VSIDeflate64Handle() override;
306
307
    bool IsInitOK() const
308
0
    {
309
0
        return inbuf != nullptr;
310
0
    }
311
312
    int Seek(vsi_l_offset nOffset, int nWhence) override;
313
    vsi_l_offset Tell() override;
314
    size_t Read(void *pBuffer, size_t nBytes) override;
315
    size_t Write(const void *pBuffer, size_t nBytes) override;
316
    void ClearErr() override;
317
    int Eof() override;
318
    int Error() override;
319
    int Flush() override;
320
    int Close() override;
321
322
    VSIDeflate64Handle *Duplicate();
323
    bool CloseBaseHandle();
324
325
    const char *GetBaseFileName()
326
0
    {
327
0
        return m_pszBaseFileName;
328
0
    }
329
330
    void SetUncompressedSize(vsi_l_offset nUncompressedSize)
331
0
    {
332
0
        m_uncompressed_size = nUncompressedSize;
333
0
    }
334
335
    vsi_l_offset GetUncompressedSize()
336
0
    {
337
0
        return m_uncompressed_size;
338
0
    }
339
};
340
#endif
341
342
class VSIGZipFilesystemHandler final : public VSIFilesystemHandler
343
{
344
    CPL_DISALLOW_COPY_ASSIGN(VSIGZipFilesystemHandler)
345
346
    std::recursive_mutex oMutex{};
347
    std::unique_ptr<VSIGZipHandle> poHandleLastGZipFile{};
348
    bool m_bInSaveInfo = false;
349
350
  public:
351
3
    VSIGZipFilesystemHandler() = default;
352
    ~VSIGZipFilesystemHandler() override;
353
354
    VSIVirtualHandleUniquePtr Open(const char *pszFilename,
355
                                   const char *pszAccess, bool bSetError,
356
                                   CSLConstList /* papszOptions */) override;
357
    VSIGZipHandle *OpenGZipReadOnly(const char *pszFilename,
358
                                    const char *pszAccess);
359
    int Stat(const char *pszFilename, VSIStatBufL *pStatBuf,
360
             int nFlags) override;
361
    char **ReadDirEx(const char *pszDirname, int nMaxFiles) override;
362
363
    const char *GetOptions() override;
364
365
    virtual bool SupportsSequentialWrite(const char *pszPath,
366
                                         bool bAllowLocalTempFile) override;
367
368
    virtual bool SupportsRandomWrite(const char * /* pszPath */,
369
                                     bool /* bAllowLocalTempFile */) override
370
0
    {
371
0
        return false;
372
0
    }
373
374
    void SaveInfo(VSIGZipHandle *poHandle);
375
    void SaveInfo_unlocked(VSIGZipHandle *poHandle);
376
};
377
378
/************************************************************************/
379
/*                             Duplicate()                              */
380
/************************************************************************/
381
382
VSIGZipHandle *VSIGZipHandle::Duplicate()
383
0
{
384
0
    CPLAssert(m_offset == 0);
385
0
    CPLAssert(m_compressed_size != 0);
386
0
    CPLAssert(m_pszBaseFileName != nullptr);
387
388
0
    VSIFilesystemHandler *poFSHandler =
389
0
        VSIFileManager::GetHandler(m_pszBaseFileName);
390
391
0
    auto poNewBaseHandle = poFSHandler->Open(m_pszBaseFileName, "rb");
392
393
0
    if (poNewBaseHandle == nullptr)
394
0
        return nullptr;
395
396
0
    auto poHandle = std::make_unique<VSIGZipHandle>(
397
0
        std::move(poNewBaseHandle), m_pszBaseFileName, 0, m_compressed_size,
398
0
        m_uncompressed_size);
399
0
    if (!(poHandle->IsInitOK()))
400
0
    {
401
0
        return nullptr;
402
0
    }
403
404
0
    poHandle->m_nLastReadOffset = m_nLastReadOffset;
405
406
    // Most important: duplicate the snapshots!
407
408
0
    for (unsigned int i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
409
0
         i++)
410
0
    {
411
0
        if (snapshots[i].posInBaseHandle == 0)
412
0
            break;
413
414
0
        poHandle->snapshots[i].posInBaseHandle = snapshots[i].posInBaseHandle;
415
0
        inflateCopy(&poHandle->snapshots[i].stream, &snapshots[i].stream);
416
0
        poHandle->snapshots[i].crc = snapshots[i].crc;
417
0
        poHandle->snapshots[i].transparent = snapshots[i].transparent;
418
0
        poHandle->snapshots[i].in = snapshots[i].in;
419
0
        poHandle->snapshots[i].out = snapshots[i].out;
420
0
    }
421
422
0
    return poHandle.release();
423
0
}
424
425
/************************************************************************/
426
/*                          CloseBaseHandle()                           */
427
/************************************************************************/
428
429
bool VSIGZipHandle::CloseBaseHandle()
430
0
{
431
0
    bool bRet = true;
432
0
    if (m_poBaseHandle)
433
0
    {
434
0
        bRet = m_poBaseHandle->Close() == 0;
435
0
        m_poBaseHandle.reset();
436
0
    }
437
0
    return bRet;
438
0
}
439
440
/************************************************************************/
441
/*                           VSIGZipHandle()                            */
442
/************************************************************************/
443
444
VSIGZipHandle::VSIGZipHandle(VSIVirtualHandleUniquePtr poBaseHandleIn,
445
                             const char *pszBaseFileName, vsi_l_offset offset,
446
                             vsi_l_offset compressed_size,
447
                             vsi_l_offset uncompressed_size, uLong expected_crc,
448
                             int transparent)
449
0
    : m_poBaseHandle(std::move(poBaseHandleIn)),
450
#ifdef DEBUG
451
0
      m_offset(offset),
452
#endif
453
0
      m_uncompressed_size(uncompressed_size), m_expected_crc(expected_crc),
454
0
      m_pszBaseFileName(pszBaseFileName ? CPLStrdup(pszBaseFileName) : nullptr),
455
0
      m_bWriteProperties(CPLTestBool(
456
0
          CPLGetConfigOption("CPL_VSIL_GZIP_WRITE_PROPERTIES", "YES"))),
457
      m_bCanSaveInfo(
458
0
          CPLTestBool(CPLGetConfigOption("CPL_VSIL_GZIP_SAVE_INFO", "YES"))),
459
0
      stream(), crc(0), m_transparent(transparent)
460
0
{
461
0
    if (compressed_size || transparent)
462
0
    {
463
0
        m_compressed_size = compressed_size;
464
0
    }
465
0
    else
466
0
    {
467
0
        if (m_poBaseHandle->Seek(0, SEEK_END) != 0)
468
0
        {
469
0
            CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
470
0
            return;
471
0
        }
472
0
        const auto nFileSize = m_poBaseHandle->Tell();
473
0
        if (nFileSize < offset)
474
0
        {
475
0
            CPLError(CE_Failure, CPLE_FileIO, "/vsizip/: invalid file offset");
476
0
            return;
477
0
        }
478
0
        m_compressed_size = nFileSize - offset;
479
0
        compressed_size = m_compressed_size;
480
0
    }
481
0
    offsetEndCompressedData = offset + compressed_size;
482
483
0
    if (m_poBaseHandle->Seek(offset, SEEK_SET) != 0)
484
0
        CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
485
486
0
    stream.zalloc = nullptr;
487
0
    stream.zfree = nullptr;
488
0
    stream.opaque = nullptr;
489
0
    stream.next_in = inbuf = nullptr;
490
0
    stream.next_out = outbuf = nullptr;
491
0
    stream.avail_in = stream.avail_out = 0;
492
493
0
    inbuf = static_cast<Byte *>(ALLOC(Z_BUFSIZE));
494
0
    stream.next_in = inbuf;
495
496
0
    int err = inflateInit2(&(stream), -MAX_WBITS);
497
    // windowBits is passed < 0 to tell that there is no zlib header.
498
    // Note that in this case inflate *requires* an extra "dummy" byte
499
    // after the compressed stream in order to complete decompression and
500
    // return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are
501
    // present after the compressed stream.
502
0
    if (err != Z_OK || inbuf == nullptr)
503
0
    {
504
0
        CPLError(CE_Failure, CPLE_NotSupported, "inflateInit2 init failed");
505
0
        TRYFREE(inbuf);
506
0
        inbuf = nullptr;
507
0
        return;
508
0
    }
509
0
    stream.avail_out = static_cast<uInt>(Z_BUFSIZE);
510
511
0
    if (offset == 0)
512
0
        check_header();  // Skip the .gz header.
513
0
    startOff = m_poBaseHandle->Tell() - stream.avail_in;
514
515
0
    if (transparent == 0)
516
0
    {
517
0
        snapshot_byte_interval = std::max(static_cast<vsi_l_offset>(Z_BUFSIZE),
518
0
                                          compressed_size / 100);
519
0
        snapshots = static_cast<GZipSnapshot *>(CPLCalloc(
520
0
            sizeof(GZipSnapshot),
521
0
            static_cast<size_t>(compressed_size / snapshot_byte_interval + 1)));
522
0
    }
523
0
}
524
525
/************************************************************************/
526
/*                         SaveInfo_unlocked()                          */
527
/************************************************************************/
528
529
void VSIGZipHandle::SaveInfo_unlocked()
530
0
{
531
0
    if (m_pszBaseFileName && m_bCanSaveInfo)
532
0
    {
533
0
        VSIFilesystemHandler *poFSHandler =
534
0
            VSIFileManager::GetHandler("/vsigzip/");
535
0
        cpl::down_cast<VSIGZipFilesystemHandler *>(poFSHandler)
536
0
            ->SaveInfo_unlocked(this);
537
0
        m_bCanSaveInfo = false;
538
0
    }
539
0
}
540
541
/************************************************************************/
542
/*                           ~VSIGZipHandle()                           */
543
/************************************************************************/
544
545
VSIGZipHandle::~VSIGZipHandle()
546
0
{
547
0
    if (m_pszBaseFileName && m_bCanSaveInfo)
548
0
    {
549
0
        VSIFilesystemHandler *poFSHandler =
550
0
            VSIFileManager::GetHandler("/vsigzip/");
551
0
        cpl::down_cast<VSIGZipFilesystemHandler *>(poFSHandler)->SaveInfo(this);
552
0
    }
553
554
0
    if (stream.state != nullptr)
555
0
    {
556
0
        inflateEnd(&(stream));
557
0
    }
558
559
0
    TRYFREE(inbuf);
560
0
    TRYFREE(outbuf);
561
562
0
    if (snapshots != nullptr)
563
0
    {
564
0
        for (size_t i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
565
0
             i++)
566
0
        {
567
0
            if (snapshots[i].posInBaseHandle)
568
0
            {
569
0
                inflateEnd(&(snapshots[i].stream));
570
0
            }
571
0
        }
572
0
        CPLFree(snapshots);
573
0
    }
574
0
    CPLFree(m_pszBaseFileName);
575
576
0
    CloseBaseHandle();
577
0
}
578
579
/************************************************************************/
580
/*                            check_header()                            */
581
/************************************************************************/
582
583
void VSIGZipHandle::check_header()
584
0
{
585
    // Assure two bytes in the buffer so we can peek ahead -- handle case
586
    // where first byte of header is at the end of the buffer after the last
587
    // gzip segment.
588
0
    uInt len = stream.avail_in;
589
0
    if (len < 2)
590
0
    {
591
0
        if (len)
592
0
            inbuf[0] = stream.next_in[0];
593
0
        errno = 0;
594
0
        size_t nToRead = static_cast<size_t>(Z_BUFSIZE - len);
595
0
        CPLAssert(m_poBaseHandle->Tell() <= offsetEndCompressedData);
596
0
        if (m_poBaseHandle->Tell() + nToRead > offsetEndCompressedData)
597
0
            nToRead = static_cast<size_t>(offsetEndCompressedData -
598
0
                                          m_poBaseHandle->Tell());
599
600
0
        len = static_cast<uInt>(m_poBaseHandle->Read(inbuf + len, nToRead));
601
#ifdef ENABLE_DEBUG
602
        CPLDebug("GZIP", CPL_FRMT_GUIB " " CPL_FRMT_GUIB,
603
                 m_poBaseHandle->Tell(), offsetEndCompressedData);
604
#endif
605
0
        if (len == 0)  // && ferror(file)
606
0
        {
607
0
            if (m_poBaseHandle->Tell() != offsetEndCompressedData)
608
0
                z_err = Z_ERRNO;
609
0
        }
610
0
        stream.avail_in += len;
611
0
        stream.next_in = inbuf;
612
0
        if (stream.avail_in < 2)
613
0
        {
614
0
            m_transparent = stream.avail_in;
615
0
            return;
616
0
        }
617
0
    }
618
619
    // Peek ahead to check the gzip magic header.
620
0
    if (stream.next_in[0] != gz_magic[0] || stream.next_in[1] != gz_magic[1])
621
0
    {
622
0
        m_transparent = 1;
623
0
        return;
624
0
    }
625
0
    stream.avail_in -= 2;
626
0
    stream.next_in += 2;
627
628
    // Check the rest of the gzip header.
629
0
    const int method = get_byte();
630
0
    const int flags = get_byte();
631
0
    if (method != Z_DEFLATED || (flags & RESERVED) != 0)
632
0
    {
633
0
        z_err = Z_DATA_ERROR;
634
0
        return;
635
0
    }
636
637
    // Discard time, xflags and OS code:
638
0
    for (len = 0; len < 6; len++)
639
0
        CPL_IGNORE_RET_VAL(get_byte());
640
641
0
    if ((flags & EXTRA_FIELD) != 0)
642
0
    {
643
        // Skip the extra field.
644
0
        len = static_cast<uInt>(get_byte()) & 0xFF;
645
0
        len += (static_cast<uInt>(get_byte()) & 0xFF) << 8;
646
        // len is garbage if EOF but the loop below will quit anyway.
647
0
        while (len != 0 && get_byte() != EOF)
648
0
        {
649
0
            --len;
650
0
        }
651
0
    }
652
653
0
    if ((flags & ORIG_NAME) != 0)
654
0
    {
655
        // Skip the original file name.
656
0
        int c;
657
0
        while ((c = get_byte()) != 0 && c != EOF)
658
0
        {
659
0
        }
660
0
    }
661
0
    if ((flags & COMMENT) != 0)
662
0
    {
663
        // skip the .gz file comment.
664
0
        int c;
665
0
        while ((c = get_byte()) != 0 && c != EOF)
666
0
        {
667
0
        }
668
0
    }
669
0
    if ((flags & HEAD_CRC) != 0)
670
0
    {
671
        // Skip the header crc.
672
0
        for (len = 0; len < 2; len++)
673
0
            CPL_IGNORE_RET_VAL(get_byte());
674
0
    }
675
0
    z_err = z_eof ? Z_DATA_ERROR : Z_OK;
676
0
}
677
678
/************************************************************************/
679
/*                              get_byte()                              */
680
/************************************************************************/
681
682
int VSIGZipHandle::get_byte()
683
0
{
684
0
    if (z_eof)
685
0
        return EOF;
686
0
    if (stream.avail_in == 0)
687
0
    {
688
0
        errno = 0;
689
0
        size_t nToRead = static_cast<size_t>(Z_BUFSIZE);
690
0
        CPLAssert(m_poBaseHandle->Tell() <= offsetEndCompressedData);
691
0
        if (m_poBaseHandle->Tell() + nToRead > offsetEndCompressedData)
692
0
            nToRead = static_cast<size_t>(offsetEndCompressedData -
693
0
                                          m_poBaseHandle->Tell());
694
0
        stream.avail_in =
695
0
            static_cast<uInt>(m_poBaseHandle->Read(inbuf, nToRead));
696
#ifdef ENABLE_DEBUG
697
        CPLDebug("GZIP", CPL_FRMT_GUIB " " CPL_FRMT_GUIB,
698
                 m_poBaseHandle->Tell(), offsetEndCompressedData);
699
#endif
700
0
        if (stream.avail_in == 0)
701
0
        {
702
0
            z_eof = 1;
703
0
            if (m_poBaseHandle->Tell() != offsetEndCompressedData)
704
0
                z_err = Z_ERRNO;
705
            // if( ferror(file) ) z_err = Z_ERRNO;
706
0
            return EOF;
707
0
        }
708
0
        stream.next_in = inbuf;
709
0
    }
710
0
    stream.avail_in--;
711
0
    return *(stream.next_in)++;
712
0
}
713
714
/************************************************************************/
715
/*                              gzrewind()                              */
716
/************************************************************************/
717
718
int VSIGZipHandle::gzrewind()
719
0
{
720
0
    z_err = Z_OK;
721
0
    z_eof = 0;
722
0
    m_bEOF = false;
723
0
    stream.avail_in = 0;
724
0
    stream.next_in = inbuf;
725
0
    crc = 0;
726
0
    if (!m_transparent)
727
0
        CPL_IGNORE_RET_VAL(inflateReset(&stream));
728
0
    in = 0;
729
0
    out = 0;
730
0
    return m_poBaseHandle->Seek(startOff, SEEK_SET);
731
0
}
732
733
/************************************************************************/
734
/*                                Seek()                                */
735
/************************************************************************/
736
737
int VSIGZipHandle::Seek(vsi_l_offset nOffset, int nWhence)
738
0
{
739
0
    m_bEOF = false;
740
741
0
    return gzseek(nOffset, nWhence) ? 0 : -1;
742
0
}
743
744
/************************************************************************/
745
/*                               gzseek()                               */
746
/************************************************************************/
747
748
bool VSIGZipHandle::gzseek(vsi_l_offset offset, int whence)
749
0
{
750
0
    const vsi_l_offset original_offset = offset;
751
0
    const int original_nWhence = whence;
752
753
0
    z_eof = 0;
754
#ifdef ENABLE_DEBUG
755
    CPLDebug("GZIP", "Seek(" CPL_FRMT_GUIB ",%d)", offset, whence);
756
#endif
757
758
0
    if (m_transparent)
759
0
    {
760
0
        stream.avail_in = 0;
761
0
        stream.next_in = inbuf;
762
0
        if (whence == SEEK_CUR)
763
0
        {
764
0
            if (out + offset > m_compressed_size)
765
0
            {
766
0
                CPL_VSIL_GZ_RETURN(FALSE);
767
0
                return false;
768
0
            }
769
770
0
            offset = startOff + out + offset;
771
0
        }
772
0
        else if (whence == SEEK_SET)
773
0
        {
774
0
            if (offset > m_compressed_size)
775
0
            {
776
0
                CPL_VSIL_GZ_RETURN(FALSE);
777
0
                return false;
778
0
            }
779
780
0
            offset = startOff + offset;
781
0
        }
782
0
        else if (whence == SEEK_END)
783
0
        {
784
            // Commented test: because vsi_l_offset is unsigned (for the moment)
785
            // so no way to seek backward. See #1590 */
786
0
            if (offset > 0)  // || -offset > compressed_size
787
0
            {
788
0
                CPL_VSIL_GZ_RETURN(FALSE);
789
0
                return false;
790
0
            }
791
792
0
            offset = startOff + m_compressed_size - offset;
793
0
        }
794
0
        else
795
0
        {
796
0
            CPL_VSIL_GZ_RETURN(FALSE);
797
0
            return false;
798
0
        }
799
800
0
        if (m_poBaseHandle->Seek(offset, SEEK_SET) < 0)
801
0
        {
802
0
            CPL_VSIL_GZ_RETURN(FALSE);
803
0
            return false;
804
0
        }
805
806
0
        out = offset - startOff;
807
0
        in = out;
808
0
        return true;
809
0
    }
810
811
    // whence == SEEK_END is unsuppored in original gzseek.
812
0
    if (whence == SEEK_END)
813
0
    {
814
        // If we known the uncompressed size, we can fake a jump to
815
        // the end of the stream.
816
0
        if (offset == 0 && m_uncompressed_size != 0)
817
0
        {
818
0
            out = m_uncompressed_size;
819
0
            return true;
820
0
        }
821
822
        // We don't know the uncompressed size. This is unfortunate.
823
        // Do the slow version.
824
0
        static int firstWarning = 1;
825
0
        if (m_compressed_size > 10 * 1024 * 1024 && firstWarning)
826
0
        {
827
0
            CPLError(CE_Warning, CPLE_AppDefined,
828
0
                     "VSIFSeekL(xxx, SEEK_END) may be really slow "
829
0
                     "on GZip streams.");
830
0
            firstWarning = 0;
831
0
        }
832
833
0
        whence = SEEK_CUR;
834
0
        offset = 1024 * 1024 * 1024;
835
0
        offset *= 1024 * 1024;
836
0
    }
837
838
    // Rest of function is for reading only.
839
840
    // Compute absolute position.
841
0
    if (whence == SEEK_CUR)
842
0
    {
843
0
        offset += out;
844
0
    }
845
846
    // For a negative seek, rewind and use positive seek.
847
0
    if (offset >= out)
848
0
    {
849
0
        offset -= out;
850
0
    }
851
0
    else if (gzrewind() < 0)
852
0
    {
853
0
        CPL_VSIL_GZ_RETURN(FALSE);
854
0
        return false;
855
0
    }
856
857
0
    if (z_err != Z_OK && z_err != Z_STREAM_END)
858
0
    {
859
0
        CPL_VSIL_GZ_RETURN(FALSE);
860
0
        return false;
861
0
    }
862
863
0
    for (unsigned int i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
864
0
         i++)
865
0
    {
866
0
        if (snapshots[i].posInBaseHandle == 0)
867
0
            break;
868
0
        if (snapshots[i].out <= out + offset &&
869
0
            (i == m_compressed_size / snapshot_byte_interval ||
870
0
             snapshots[i + 1].out == 0 || snapshots[i + 1].out > out + offset))
871
0
        {
872
0
            if (out >= snapshots[i].out)
873
0
                break;
874
875
#ifdef ENABLE_DEBUG
876
            CPLDebug("SNAPSHOT",
877
                     "using snapshot %d : "
878
                     "posInBaseHandle(snapshot)=" CPL_FRMT_GUIB
879
                     " in(snapshot)=" CPL_FRMT_GUIB
880
                     " out(snapshot)=" CPL_FRMT_GUIB " out=" CPL_FRMT_GUIB
881
                     " offset=" CPL_FRMT_GUIB,
882
                     i, snapshots[i].posInBaseHandle, snapshots[i].in,
883
                     snapshots[i].out, out, offset);
884
#endif
885
0
            offset = out + offset - snapshots[i].out;
886
0
            if (m_poBaseHandle->Seek(snapshots[i].posInBaseHandle, SEEK_SET) !=
887
0
                0)
888
0
                CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
889
890
0
            inflateEnd(&stream);
891
0
            inflateCopy(&stream, &snapshots[i].stream);
892
0
            crc = snapshots[i].crc;
893
0
            m_transparent = snapshots[i].transparent;
894
0
            in = snapshots[i].in;
895
0
            out = snapshots[i].out;
896
0
            break;
897
0
        }
898
0
    }
899
900
    // Offset is now the number of bytes to skip.
901
902
0
    if (offset != 0 && outbuf == nullptr)
903
0
    {
904
0
        outbuf = static_cast<Byte *>(ALLOC(Z_BUFSIZE));
905
0
        if (outbuf == nullptr)
906
0
        {
907
0
            CPL_VSIL_GZ_RETURN(FALSE);
908
0
            return false;
909
0
        }
910
0
    }
911
912
0
    if (original_nWhence == SEEK_END && z_err == Z_STREAM_END)
913
0
    {
914
0
        return true;
915
0
    }
916
917
0
    while (offset > 0)
918
0
    {
919
0
        int size = Z_BUFSIZE;
920
0
        if (offset < static_cast<vsi_l_offset>(Z_BUFSIZE))
921
0
            size = static_cast<int>(offset);
922
923
0
        const int read_size =
924
0
            static_cast<int>(Read(outbuf, static_cast<uInt>(size)));
925
0
        if (original_nWhence == SEEK_END)
926
0
        {
927
0
            if (size != read_size)
928
0
            {
929
0
                z_err = Z_STREAM_END;
930
0
                break;
931
0
            }
932
0
        }
933
0
        else if (read_size == 0)
934
0
        {
935
            // CPL_VSIL_GZ_RETURN(FALSE);
936
0
            return false;
937
0
        }
938
0
        offset -= read_size;
939
0
    }
940
#ifdef ENABLE_DEBUG
941
    CPLDebug("GZIP", "gzseek at offset " CPL_FRMT_GUIB, out);
942
#endif
943
944
0
    if (original_offset == 0 && original_nWhence == SEEK_END)
945
0
    {
946
0
        m_uncompressed_size = out;
947
948
0
        if (m_pszBaseFileName && !STARTS_WITH(m_pszBaseFileName, "/vsicurl/") &&
949
0
            !STARTS_WITH(m_pszBaseFileName, "/vsitar/") &&
950
0
            !STARTS_WITH(m_pszBaseFileName, "/vsizip/") && m_bWriteProperties)
951
0
        {
952
0
            CPLErrorStateBackuper oErrorStateBackuper(CPLQuietErrorHandler);
953
954
0
            CPLString osCacheFilename(m_pszBaseFileName);
955
0
            osCacheFilename += ".properties";
956
957
            // Write a .properties file to avoid seeking next time.
958
0
            VSILFILE *fpCacheLength = VSIFOpenL(osCacheFilename.c_str(), "wb");
959
0
            if (fpCacheLength)
960
0
            {
961
0
                char szBuffer[32] = {};
962
963
0
                CPLPrintUIntBig(szBuffer, m_compressed_size, 31);
964
0
                char *pszFirstNonSpace = szBuffer;
965
0
                while (*pszFirstNonSpace == ' ')
966
0
                    pszFirstNonSpace++;
967
0
                CPL_IGNORE_RET_VAL(VSIFPrintfL(
968
0
                    fpCacheLength, "compressed_size=%s\n", pszFirstNonSpace));
969
970
0
                CPLPrintUIntBig(szBuffer, m_uncompressed_size, 31);
971
0
                pszFirstNonSpace = szBuffer;
972
0
                while (*pszFirstNonSpace == ' ')
973
0
                    pszFirstNonSpace++;
974
0
                CPL_IGNORE_RET_VAL(VSIFPrintfL(
975
0
                    fpCacheLength, "uncompressed_size=%s\n", pszFirstNonSpace));
976
977
0
                CPL_IGNORE_RET_VAL(VSIFCloseL(fpCacheLength));
978
0
            }
979
0
        }
980
0
    }
981
982
0
    return true;
983
0
}
984
985
/************************************************************************/
986
/*                                Tell()                                */
987
/************************************************************************/
988
989
vsi_l_offset VSIGZipHandle::Tell()
990
0
{
991
#ifdef ENABLE_DEBUG
992
    CPLDebug("GZIP", "Tell() = " CPL_FRMT_GUIB, out);
993
#endif
994
0
    return out;
995
0
}
996
997
/************************************************************************/
998
/*                                Read()                                */
999
/************************************************************************/
1000
1001
size_t VSIGZipHandle::Read(void *const buf, size_t const nBytes)
1002
0
{
1003
#ifdef ENABLE_DEBUG
1004
    CPLDebug("GZIP", "Read(%p, %d)", buf, static_cast<int>(nBytes));
1005
#endif
1006
1007
0
    if (m_bEOF || z_err != Z_OK)
1008
0
    {
1009
0
        if (z_err == Z_STREAM_END && nBytes > 0)
1010
0
            m_bEOF = true;
1011
0
        return 0;
1012
0
    }
1013
1014
0
    if (nBytes > UINT32_MAX)
1015
0
    {
1016
0
        CPLError(CE_Failure, CPLE_FileIO, "Too many bytes to read at once");
1017
0
        return 0;
1018
0
    }
1019
1020
0
    const unsigned len = static_cast<unsigned int>(nBytes);
1021
0
    Bytef *pStart =
1022
0
        static_cast<Bytef *>(buf);  // Start off point for crc computation.
1023
    // == stream.next_out but not forced far (for MSDOS).
1024
0
    Byte *next_out = static_cast<Byte *>(buf);
1025
0
    stream.next_out = static_cast<Bytef *>(buf);
1026
0
    stream.avail_out = len;
1027
1028
0
    while (stream.avail_out != 0)
1029
0
    {
1030
0
        if (m_transparent)
1031
0
        {
1032
            // Copy first the lookahead bytes:
1033
0
            uInt nRead = 0;
1034
0
            uInt n = stream.avail_in;
1035
0
            if (n > stream.avail_out)
1036
0
                n = stream.avail_out;
1037
0
            if (n > 0)
1038
0
            {
1039
0
                memcpy(stream.next_out, stream.next_in, n);
1040
0
                next_out += n;
1041
0
                stream.next_out = next_out;
1042
0
                stream.next_in += n;
1043
0
                stream.avail_out -= n;
1044
0
                stream.avail_in -= n;
1045
0
                nRead += n;
1046
0
            }
1047
0
            if (stream.avail_out > 0)
1048
0
            {
1049
0
                const uInt nToRead = static_cast<uInt>(
1050
0
                    std::min(m_compressed_size - (in + nRead),
1051
0
                             static_cast<vsi_l_offset>(stream.avail_out)));
1052
0
                const uInt nReadFromFile =
1053
0
                    static_cast<uInt>(m_poBaseHandle->Read(next_out, nToRead));
1054
0
                if (nReadFromFile < nToRead && m_poBaseHandle->Error())
1055
0
                    z_err = Z_ERRNO;
1056
0
                stream.avail_out -= nReadFromFile;
1057
0
                nRead += nReadFromFile;
1058
0
            }
1059
0
            in += nRead;
1060
0
            out += nRead;
1061
0
            if (nRead < len)
1062
0
            {
1063
0
                m_bEOF = true;
1064
0
                z_eof = 1;
1065
0
            }
1066
#ifdef ENABLE_DEBUG
1067
            CPLDebug("GZIP", "Read return %u", nRead);
1068
#endif
1069
0
            return nRead;
1070
0
        }
1071
0
        if (stream.avail_in == 0 && !z_eof)
1072
0
        {
1073
0
            vsi_l_offset posInBaseHandle = m_poBaseHandle->Tell();
1074
0
            if (posInBaseHandle - startOff > m_compressed_size)
1075
0
            {
1076
                // If we reach here, file size has changed (because at
1077
                // construction time startOff + m_compressed_size marked the
1078
                // end of file).
1079
                // We should probably have a better fix than that, by detecting
1080
                // at open time that the saved snapshot is not valid and
1081
                // discarding it.
1082
0
                CPLError(CE_Failure, CPLE_AppDefined,
1083
0
                         "File size of underlying /vsigzip/ file has changed");
1084
0
                z_err = Z_ERRNO;
1085
0
                CPL_VSIL_GZ_RETURN(0);
1086
0
                return 0;
1087
0
            }
1088
0
            GZipSnapshot *snapshot = &snapshots[(posInBaseHandle - startOff) /
1089
0
                                                snapshot_byte_interval];
1090
0
            if (snapshot->posInBaseHandle == 0)
1091
0
            {
1092
0
                snapshot->crc = crc32(
1093
0
                    crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1094
#ifdef ENABLE_DEBUG
1095
                CPLDebug("SNAPSHOT",
1096
                         "creating snapshot %d : "
1097
                         "posInBaseHandle=" CPL_FRMT_GUIB " in=" CPL_FRMT_GUIB
1098
                         " out=" CPL_FRMT_GUIB " crc=%X",
1099
                         static_cast<int>((posInBaseHandle - startOff) /
1100
                                          snapshot_byte_interval),
1101
                         posInBaseHandle, in, out,
1102
                         static_cast<unsigned int>(snapshot->crc));
1103
#endif
1104
0
                snapshot->posInBaseHandle = posInBaseHandle;
1105
0
                inflateCopy(&snapshot->stream, &stream);
1106
0
                snapshot->transparent = m_transparent;
1107
0
                snapshot->in = in;
1108
0
                snapshot->out = out;
1109
1110
0
                if (out > m_nLastReadOffset)
1111
0
                    m_nLastReadOffset = out;
1112
0
            }
1113
1114
0
            errno = 0;
1115
0
            stream.avail_in =
1116
0
                static_cast<uInt>(m_poBaseHandle->Read(inbuf, Z_BUFSIZE));
1117
#ifdef ENABLE_DEBUG
1118
            CPLDebug("GZIP", CPL_FRMT_GUIB " " CPL_FRMT_GUIB,
1119
                     m_poBaseHandle->Tell(), offsetEndCompressedData);
1120
#endif
1121
0
            if (m_poBaseHandle->Tell() > offsetEndCompressedData)
1122
0
            {
1123
#ifdef ENABLE_DEBUG
1124
                CPLDebug("GZIP", "avail_in before = %d", stream.avail_in);
1125
#endif
1126
0
                stream.avail_in = stream.avail_in -
1127
0
                                  static_cast<uInt>(m_poBaseHandle->Tell() -
1128
0
                                                    offsetEndCompressedData);
1129
0
                if (m_poBaseHandle->Seek(offsetEndCompressedData, SEEK_SET) !=
1130
0
                    0)
1131
0
                    CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1132
#ifdef ENABLE_DEBUG
1133
                CPLDebug("GZIP", "avail_in after = %d", stream.avail_in);
1134
#endif
1135
0
            }
1136
0
            if (stream.avail_in == 0)
1137
0
            {
1138
0
                z_eof = 1;
1139
0
                if (m_poBaseHandle->Error() ||
1140
0
                    m_poBaseHandle->Tell() != offsetEndCompressedData)
1141
0
                {
1142
0
                    z_err = Z_ERRNO;
1143
0
                    break;
1144
0
                }
1145
0
            }
1146
0
            stream.next_in = inbuf;
1147
0
        }
1148
0
        in += stream.avail_in;
1149
0
        out += stream.avail_out;
1150
0
        z_err = inflate(&(stream), Z_NO_FLUSH);
1151
0
        in -= stream.avail_in;
1152
0
        out -= stream.avail_out;
1153
1154
0
        if (z_err == Z_STREAM_END && m_compressed_size != 2)
1155
0
        {
1156
            // Check CRC and original size.
1157
0
            crc =
1158
0
                crc32(crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1159
0
            pStart = stream.next_out;
1160
0
            if (m_expected_crc)
1161
0
            {
1162
#ifdef ENABLE_DEBUG
1163
                CPLDebug("GZIP", "Computed CRC = %X. Expected CRC = %X",
1164
                         static_cast<unsigned int>(crc),
1165
                         static_cast<unsigned int>(m_expected_crc));
1166
#endif
1167
0
            }
1168
0
            if (m_expected_crc != 0 && m_expected_crc != crc)
1169
0
            {
1170
0
                CPLError(CE_Failure, CPLE_FileIO,
1171
0
                         "CRC error. Got %X instead of %X",
1172
0
                         static_cast<unsigned int>(crc),
1173
0
                         static_cast<unsigned int>(m_expected_crc));
1174
0
                z_err = Z_DATA_ERROR;
1175
0
            }
1176
0
            else if (m_expected_crc == 0)
1177
0
            {
1178
0
                const uLong read_crc = static_cast<unsigned long>(getLong());
1179
0
                if (read_crc != crc)
1180
0
                {
1181
0
                    CPLError(CE_Failure, CPLE_FileIO,
1182
0
                             "CRC error. Got %X instead of %X",
1183
0
                             static_cast<unsigned int>(crc),
1184
0
                             static_cast<unsigned int>(read_crc));
1185
0
                    z_err = Z_DATA_ERROR;
1186
0
                }
1187
0
                else
1188
0
                {
1189
0
                    CPL_IGNORE_RET_VAL(getLong());
1190
                    // The uncompressed length returned by above getlong() may
1191
                    // be different from out in case of concatenated .gz files.
1192
                    // Check for such files:
1193
0
                    check_header();
1194
0
                    if (z_err == Z_OK)
1195
0
                    {
1196
0
                        inflateReset(&(stream));
1197
0
                        crc = 0;
1198
0
                    }
1199
0
                }
1200
0
            }
1201
0
        }
1202
0
        if (z_err != Z_OK || z_eof)
1203
0
            break;
1204
0
    }
1205
0
    crc = crc32(crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1206
1207
0
    unsigned ret = len - stream.avail_out;
1208
0
    if (z_err != Z_OK && z_err != Z_STREAM_END)
1209
0
    {
1210
0
        CPLError(CE_Failure, CPLE_AppDefined,
1211
0
                 "In file %s, at line %d, decompression failed with "
1212
0
                 "z_err = %d, return = %u",
1213
0
                 __FILE__, __LINE__, z_err, ret);
1214
0
    }
1215
0
    else if (ret < nBytes)
1216
0
    {
1217
0
        m_bEOF = true;
1218
0
    }
1219
1220
#ifdef ENABLE_DEBUG
1221
    CPLDebug("GZIP", "Read return %u (z_err=%d, z_eof=%d)", ret, z_err, z_eof);
1222
#endif
1223
0
    return ret;
1224
0
}
1225
1226
/************************************************************************/
1227
/*                              getLong()                               */
1228
/************************************************************************/
1229
1230
uLong VSIGZipHandle::getLong()
1231
0
{
1232
0
    uLong x = static_cast<uLong>(get_byte()) & 0xFF;
1233
1234
0
    x += (static_cast<uLong>(get_byte()) & 0xFF) << 8;
1235
0
    x += (static_cast<uLong>(get_byte()) & 0xFF) << 16;
1236
0
    const int c = get_byte();
1237
0
    if (c == EOF)
1238
0
    {
1239
0
        z_err = Z_DATA_ERROR;
1240
0
        return 0;
1241
0
    }
1242
0
    x += static_cast<uLong>(c) << 24;
1243
    // coverity[overflow_sink]
1244
0
    return x;
1245
0
}
1246
1247
/************************************************************************/
1248
/*                               Write()                                */
1249
/************************************************************************/
1250
1251
size_t VSIGZipHandle::Write(const void * /* pBuffer */, size_t /* nBytes */)
1252
0
{
1253
0
    CPLError(CE_Failure, CPLE_NotSupported,
1254
0
             "VSIFWriteL is not supported on GZip streams");
1255
0
    return 0;
1256
0
}
1257
1258
/************************************************************************/
1259
/*                                Eof()                                 */
1260
/************************************************************************/
1261
1262
int VSIGZipHandle::Eof()
1263
0
{
1264
#ifdef ENABLE_DEBUG
1265
    CPLDebug("GZIP", "Eof()");
1266
#endif
1267
0
    return m_bEOF;
1268
0
}
1269
1270
/************************************************************************/
1271
/*                               Error()                                */
1272
/************************************************************************/
1273
1274
int VSIGZipHandle::Error()
1275
0
{
1276
#ifdef ENABLE_DEBUG
1277
    CPLDebug("GZIP", "Error()");
1278
#endif
1279
0
    return z_err != Z_OK && z_err != Z_STREAM_END;
1280
0
}
1281
1282
/************************************************************************/
1283
/*                              ClearErr()                              */
1284
/************************************************************************/
1285
1286
void VSIGZipHandle::ClearErr()
1287
0
{
1288
0
    m_poBaseHandle->ClearErr();
1289
0
    z_eof = 0;
1290
0
    m_bEOF = false;
1291
0
    z_err = Z_OK;
1292
0
}
1293
1294
/************************************************************************/
1295
/*                               Flush()                                */
1296
/************************************************************************/
1297
1298
int VSIGZipHandle::Flush()
1299
0
{
1300
0
    return 0;
1301
0
}
1302
1303
/************************************************************************/
1304
/*                               Close()                                */
1305
/************************************************************************/
1306
1307
int VSIGZipHandle::Close()
1308
0
{
1309
0
    return 0;
1310
0
}
1311
1312
#ifdef ENABLE_DEFLATE64
1313
1314
/************************************************************************/
1315
/*                             Duplicate()                              */
1316
/************************************************************************/
1317
1318
VSIDeflate64Handle *VSIDeflate64Handle::Duplicate()
1319
0
{
1320
0
    CPLAssert(m_offset == 0);
1321
0
    CPLAssert(m_compressed_size != 0);
1322
0
    CPLAssert(m_pszBaseFileName != nullptr);
1323
1324
0
    VSIFilesystemHandler *poFSHandler =
1325
0
        VSIFileManager::GetHandler(m_pszBaseFileName);
1326
1327
0
    VSIVirtualHandleUniquePtr poNewBaseHandle(
1328
0
        poFSHandler->Open(m_pszBaseFileName, "rb"));
1329
1330
0
    if (poNewBaseHandle == nullptr)
1331
0
        return nullptr;
1332
1333
0
    auto poHandle = std::make_unique<VSIDeflate64Handle>(
1334
0
        std::move(poNewBaseHandle), m_pszBaseFileName, 0, m_compressed_size,
1335
0
        m_uncompressed_size);
1336
0
    if (!(poHandle->IsInitOK()))
1337
0
    {
1338
0
        return nullptr;
1339
0
    }
1340
1341
    // Most important: duplicate the snapshots!
1342
1343
0
    for (unsigned int i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
1344
0
         i++)
1345
0
    {
1346
0
        if (snapshots[i].posInBaseHandle == 0)
1347
0
            break;
1348
1349
0
        poHandle->snapshots[i].posInBaseHandle = snapshots[i].posInBaseHandle;
1350
0
        if (inflateBack9Copy(&poHandle->snapshots[i].stream,
1351
0
                             &snapshots[i].stream) != Z_OK)
1352
0
            CPLError(CE_Failure, CPLE_AppDefined, "inflateBack9Copy() failed");
1353
0
        poHandle->snapshots[i].crc = snapshots[i].crc;
1354
0
        poHandle->snapshots[i].in = snapshots[i].in;
1355
0
        poHandle->snapshots[i].out = snapshots[i].out;
1356
0
        poHandle->snapshots[i].extraOutput = snapshots[i].extraOutput;
1357
0
        poHandle->snapshots[i].m_bStreamEndReached =
1358
0
            snapshots[i].m_bStreamEndReached;
1359
0
    }
1360
1361
0
    return poHandle.release();
1362
0
}
1363
1364
/************************************************************************/
1365
/*                          CloseBaseHandle()                           */
1366
/************************************************************************/
1367
1368
bool VSIDeflate64Handle::CloseBaseHandle()
1369
0
{
1370
0
    bool bRet = true;
1371
0
    if (m_poBaseHandle)
1372
0
    {
1373
0
        bRet = m_poBaseHandle->Close() == 0;
1374
0
        m_poBaseHandle.reset();
1375
0
    }
1376
0
    return bRet;
1377
0
}
1378
1379
/************************************************************************/
1380
/*                         VSIDeflate64Handle()                         */
1381
/************************************************************************/
1382
1383
VSIDeflate64Handle::VSIDeflate64Handle(VSIVirtualHandleUniquePtr poBaseHandleIn,
1384
                                       const char *pszBaseFileName,
1385
                                       vsi_l_offset offset,
1386
                                       vsi_l_offset compressed_size,
1387
                                       vsi_l_offset uncompressed_size,
1388
                                       uLong expected_crc)
1389
0
    : m_poBaseHandle(std::move(poBaseHandleIn)),
1390
#ifdef DEBUG
1391
0
      m_offset(offset),
1392
#endif
1393
0
      m_uncompressed_size(uncompressed_size), m_expected_crc(expected_crc),
1394
0
      m_pszBaseFileName(pszBaseFileName ? CPLStrdup(pszBaseFileName) : nullptr),
1395
0
      stream(), crc(0)
1396
0
{
1397
0
    if (compressed_size)
1398
0
    {
1399
0
        m_compressed_size = compressed_size;
1400
0
    }
1401
0
    else
1402
0
    {
1403
0
        if (m_poBaseHandle->Seek(0, SEEK_END) != 0)
1404
0
            CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1405
0
        m_compressed_size = m_poBaseHandle->Tell() - offset;
1406
0
        compressed_size = m_compressed_size;
1407
0
    }
1408
0
    offsetEndCompressedData = offset + compressed_size;
1409
1410
0
    if (m_poBaseHandle->Seek(offset, SEEK_SET) != 0)
1411
0
        CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1412
1413
0
    stream.zalloc = nullptr;
1414
0
    stream.zfree = nullptr;
1415
0
    stream.opaque = nullptr;
1416
0
    stream.next_in = inbuf = nullptr;
1417
0
    stream.next_out = outbuf = nullptr;
1418
0
    stream.avail_in = stream.avail_out = 0;
1419
1420
0
    inbuf = static_cast<Byte *>(ALLOC(Z_BUFSIZE));
1421
0
    stream.next_in = inbuf;
1422
1423
0
    int err = inflateBack9Init(&(stream), nullptr);
1424
    // Note that in this case inflate *requires* an extra "dummy" byte
1425
    // after the compressed stream in order to complete decompression and
1426
    // return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are
1427
    // present after the compressed stream.
1428
0
    if (err != Z_OK || inbuf == nullptr)
1429
0
    {
1430
0
        CPLError(CE_Failure, CPLE_NotSupported, "inflateBack9Init init failed");
1431
0
        TRYFREE(inbuf);
1432
0
        inbuf = nullptr;
1433
0
        return;
1434
0
    }
1435
0
    startOff = m_poBaseHandle->Tell() - stream.avail_in;
1436
1437
0
    snapshot_byte_interval =
1438
0
        std::max(static_cast<vsi_l_offset>(Z_BUFSIZE), compressed_size / 100);
1439
0
    snapshots.resize(
1440
0
        static_cast<size_t>(compressed_size / snapshot_byte_interval + 1));
1441
0
}
1442
1443
/************************************************************************/
1444
/*                        ~VSIDeflate64Handle()                         */
1445
/************************************************************************/
1446
1447
VSIDeflate64Handle::~VSIDeflate64Handle()
1448
0
{
1449
0
    if (stream.state != nullptr)
1450
0
    {
1451
0
        inflateBack9End(&(stream));
1452
0
    }
1453
1454
0
    TRYFREE(inbuf);
1455
0
    TRYFREE(outbuf);
1456
1457
0
    for (auto &snapshot : snapshots)
1458
0
    {
1459
0
        if (snapshot.posInBaseHandle)
1460
0
        {
1461
0
            inflateBack9End(&(snapshot.stream));
1462
0
        }
1463
0
    }
1464
0
    CPLFree(m_pszBaseFileName);
1465
1466
0
    CloseBaseHandle();
1467
0
}
1468
1469
/************************************************************************/
1470
/*                              gzrewind()                              */
1471
/************************************************************************/
1472
1473
int VSIDeflate64Handle::gzrewind()
1474
0
{
1475
0
    m_bStreamEndReached = false;
1476
0
    extraOutput.clear();
1477
0
    z_err = Z_OK;
1478
0
    z_eof = 0;
1479
0
    stream.avail_in = 0;
1480
0
    stream.next_in = inbuf;
1481
0
    crc = 0;
1482
0
    CPL_IGNORE_RET_VAL(inflateBack9End(&stream));
1483
0
    CPL_IGNORE_RET_VAL(inflateBack9Init(&stream, nullptr));
1484
0
    in = 0;
1485
0
    out = 0;
1486
0
    return m_poBaseHandle->Seek(startOff, SEEK_SET);
1487
0
}
1488
1489
/************************************************************************/
1490
/*                                Seek()                                */
1491
/************************************************************************/
1492
1493
int VSIDeflate64Handle::Seek(vsi_l_offset nOffset, int nWhence)
1494
0
{
1495
0
    m_bEOF = false;
1496
0
    return gzseek(nOffset, nWhence) ? 0 : -1;
1497
0
}
1498
1499
/************************************************************************/
1500
/*                               gzseek()                               */
1501
/************************************************************************/
1502
1503
bool VSIDeflate64Handle::gzseek(vsi_l_offset offset, int whence)
1504
0
{
1505
0
    const vsi_l_offset original_offset = offset;
1506
0
    const int original_nWhence = whence;
1507
1508
0
    z_eof = 0;
1509
#ifdef ENABLE_DEBUG
1510
    CPLDebug("GZIP", "Seek(" CPL_FRMT_GUIB ",%d)", offset, whence);
1511
#endif
1512
1513
    // whence == SEEK_END is unsuppored in original gzseek.
1514
0
    if (whence == SEEK_END)
1515
0
    {
1516
        // If we known the uncompressed size, we can fake a jump to
1517
        // the end of the stream.
1518
0
        if (offset == 0 && m_uncompressed_size != 0)
1519
0
        {
1520
0
            out = m_uncompressed_size;
1521
0
            return true;
1522
0
        }
1523
1524
        // We don't know the uncompressed size. This is unfortunate.
1525
        // Do the slow version.
1526
0
        static int firstWarning = 1;
1527
0
        if (m_compressed_size > 10 * 1024 * 1024 && firstWarning)
1528
0
        {
1529
0
            CPLError(CE_Warning, CPLE_AppDefined,
1530
0
                     "VSIFSeekL(xxx, SEEK_END) may be really slow "
1531
0
                     "on GZip streams.");
1532
0
            firstWarning = 0;
1533
0
        }
1534
1535
0
        whence = SEEK_CUR;
1536
0
        offset = 1024 * 1024 * 1024;
1537
0
        offset *= 1024 * 1024;
1538
0
    }
1539
1540
    // Rest of function is for reading only.
1541
1542
    // Compute absolute position.
1543
0
    if (whence == SEEK_CUR)
1544
0
    {
1545
0
        offset += out;
1546
0
    }
1547
1548
    // For a negative seek, rewind and use positive seek.
1549
0
    if (offset >= out)
1550
0
    {
1551
0
        offset -= out;
1552
0
    }
1553
0
    else if (gzrewind() < 0)
1554
0
    {
1555
0
        CPL_VSIL_GZ_RETURN(FALSE);
1556
0
        return false;
1557
0
    }
1558
1559
0
    if (z_err != Z_OK && z_err != Z_STREAM_END)
1560
0
    {
1561
0
        CPL_VSIL_GZ_RETURN(FALSE);
1562
0
        return false;
1563
0
    }
1564
1565
0
    for (unsigned int i = 0; i < m_compressed_size / snapshot_byte_interval + 1;
1566
0
         i++)
1567
0
    {
1568
0
        if (snapshots[i].posInBaseHandle == 0)
1569
0
            break;
1570
0
        if (snapshots[i].out <= out + offset &&
1571
0
            (i == m_compressed_size / snapshot_byte_interval ||
1572
0
             snapshots[i + 1].out == 0 || snapshots[i + 1].out > out + offset))
1573
0
        {
1574
0
            if (out >= snapshots[i].out)
1575
0
                break;
1576
1577
#ifdef ENABLE_DEBUG
1578
            CPLDebug("SNAPSHOT",
1579
                     "using snapshot %d : "
1580
                     "posInBaseHandle(snapshot)=" CPL_FRMT_GUIB
1581
                     " in(snapshot)=" CPL_FRMT_GUIB
1582
                     " out(snapshot)=" CPL_FRMT_GUIB " out=" CPL_FRMT_GUIB
1583
                     " offset=" CPL_FRMT_GUIB,
1584
                     i, snapshots[i].posInBaseHandle, snapshots[i].in,
1585
                     snapshots[i].out, out, offset);
1586
#endif
1587
0
            offset = out + offset - snapshots[i].out;
1588
0
            if (m_poBaseHandle->Seek(snapshots[i].posInBaseHandle, SEEK_SET) !=
1589
0
                0)
1590
0
                CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1591
1592
0
            inflateBack9End(&stream);
1593
0
            if (inflateBack9Copy(&stream, &snapshots[i].stream) != Z_OK)
1594
0
                CPLError(CE_Failure, CPLE_AppDefined,
1595
0
                         "inflateBack9Copy() failed");
1596
0
            crc = snapshots[i].crc;
1597
0
            in = snapshots[i].in;
1598
0
            out = snapshots[i].out;
1599
0
            extraOutput = snapshots[i].extraOutput;
1600
0
            m_bStreamEndReached = snapshots[i].m_bStreamEndReached;
1601
0
            break;
1602
0
        }
1603
0
    }
1604
1605
    // Offset is now the number of bytes to skip.
1606
1607
0
    if (offset != 0 && outbuf == nullptr)
1608
0
    {
1609
0
        outbuf = static_cast<Byte *>(ALLOC(Z_BUFSIZE));
1610
0
        if (outbuf == nullptr)
1611
0
        {
1612
0
            CPL_VSIL_GZ_RETURN(FALSE);
1613
0
            return false;
1614
0
        }
1615
0
    }
1616
1617
0
    if (original_nWhence == SEEK_END && z_err == Z_STREAM_END)
1618
0
    {
1619
0
        return true;
1620
0
    }
1621
1622
0
    while (offset > 0)
1623
0
    {
1624
0
        int size = Z_BUFSIZE;
1625
0
        if (offset < static_cast<vsi_l_offset>(Z_BUFSIZE))
1626
0
            size = static_cast<int>(offset);
1627
1628
0
        const int read_size =
1629
0
            static_cast<int>(Read(outbuf, static_cast<uInt>(size)));
1630
0
        if (original_nWhence == SEEK_END)
1631
0
        {
1632
0
            if (size != read_size)
1633
0
            {
1634
0
                z_err = Z_STREAM_END;
1635
0
                break;
1636
0
            }
1637
0
        }
1638
0
        else if (read_size == 0)
1639
0
        {
1640
            // CPL_VSIL_GZ_RETURN(FALSE);
1641
0
            return false;
1642
0
        }
1643
0
        offset -= read_size;
1644
0
    }
1645
#ifdef ENABLE_DEBUG
1646
    CPLDebug("GZIP", "gzseek at offset " CPL_FRMT_GUIB, out);
1647
#endif
1648
1649
0
    if (original_offset == 0 && original_nWhence == SEEK_END)
1650
0
    {
1651
0
        m_uncompressed_size = out;
1652
0
    }
1653
1654
0
    return true;
1655
0
}
1656
1657
/************************************************************************/
1658
/*                                Tell()                                */
1659
/************************************************************************/
1660
1661
vsi_l_offset VSIDeflate64Handle::Tell()
1662
0
{
1663
#ifdef ENABLE_DEBUG
1664
    CPLDebug("GZIP", "Tell() = " CPL_FRMT_GUIB, out);
1665
#endif
1666
0
    return out;
1667
0
}
1668
1669
/************************************************************************/
1670
/*                                Read()                                */
1671
/************************************************************************/
1672
1673
size_t VSIDeflate64Handle::Read(void *const buf, size_t const nBytes)
1674
0
{
1675
#ifdef ENABLE_DEBUG
1676
    CPLDebug("GZIP", "Read(%p, %d)", buf, static_cast<int>(nBytes));
1677
#endif
1678
1679
0
    if (m_bEOF || z_err != Z_OK)
1680
0
    {
1681
0
        if (z_err == Z_STREAM_END && nBytes > 0)
1682
0
            m_bEOF = true;
1683
0
        return 0;
1684
0
    }
1685
1686
0
    if (nBytes > UINT32_MAX)
1687
0
    {
1688
0
        CPLError(CE_Failure, CPLE_FileIO, "Too many bytes to read at once");
1689
0
        return 0;
1690
0
    }
1691
1692
0
    const unsigned len = static_cast<unsigned int>(nBytes);
1693
0
    Bytef *pStart =
1694
0
        static_cast<Bytef *>(buf);  // Start off point for crc computation.
1695
    // == stream.next_out but not forced far (for MSDOS).
1696
0
    stream.next_out = static_cast<Bytef *>(buf);
1697
0
    stream.avail_out = len;
1698
1699
0
    while (stream.avail_out != 0)
1700
0
    {
1701
0
        if (!extraOutput.empty())
1702
0
        {
1703
0
            if (extraOutput.size() >= stream.avail_out)
1704
0
            {
1705
0
                memcpy(stream.next_out, extraOutput.data(), stream.avail_out);
1706
0
                extraOutput.erase(extraOutput.begin(),
1707
0
                                  extraOutput.begin() + stream.avail_out);
1708
0
                out += stream.avail_out;
1709
0
                stream.next_out += stream.avail_out;
1710
0
                stream.avail_out = 0;
1711
0
            }
1712
0
            else
1713
0
            {
1714
0
                memcpy(stream.next_out, extraOutput.data(), extraOutput.size());
1715
0
                stream.next_out += extraOutput.size();
1716
0
                out += static_cast<uInt>(extraOutput.size());
1717
0
                stream.avail_out -= static_cast<uInt>(extraOutput.size());
1718
0
                CPLAssert(stream.avail_out > 0);
1719
0
                extraOutput.clear();
1720
0
            }
1721
0
            z_err = Z_OK;
1722
0
        }
1723
1724
0
        if (stream.avail_in == 0 && !z_eof)
1725
0
        {
1726
0
            vsi_l_offset posInBaseHandle = m_poBaseHandle->Tell();
1727
0
            if (posInBaseHandle - startOff > m_compressed_size)
1728
0
            {
1729
                // If we reach here, file size has changed (because at
1730
                // construction time startOff + m_compressed_size marked the
1731
                // end of file).
1732
                // We should probably have a better fix than that, by detecting
1733
                // at open time that the saved snapshot is not valid and
1734
                // discarding it.
1735
0
                CPLError(CE_Failure, CPLE_AppDefined,
1736
0
                         "File size of underlying /vsigzip/ file has changed");
1737
0
                z_err = Z_ERRNO;
1738
0
                CPL_VSIL_GZ_RETURN(0);
1739
0
                return 0;
1740
0
            }
1741
0
            auto snapshot = &snapshots[static_cast<size_t>(
1742
0
                (posInBaseHandle - startOff) / snapshot_byte_interval)];
1743
0
            if (snapshot->posInBaseHandle == 0)
1744
0
            {
1745
0
                snapshot->crc = crc32(
1746
0
                    crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1747
#ifdef ENABLE_DEBUG
1748
                CPLDebug("SNAPSHOT",
1749
                         "creating snapshot %d : "
1750
                         "posInBaseHandle=" CPL_FRMT_GUIB " in=" CPL_FRMT_GUIB
1751
                         " out=" CPL_FRMT_GUIB " crc=%X",
1752
                         static_cast<int>((posInBaseHandle - startOff) /
1753
                                          snapshot_byte_interval),
1754
                         posInBaseHandle, in, out,
1755
                         static_cast<unsigned int>(snapshot->crc));
1756
#endif
1757
0
                snapshot->posInBaseHandle = posInBaseHandle;
1758
0
                if (inflateBack9Copy(&snapshot->stream, &stream) != Z_OK)
1759
0
                    CPLError(CE_Failure, CPLE_AppDefined,
1760
0
                             "inflateBack9Copy() failed");
1761
0
                snapshot->in = in;
1762
0
                snapshot->out = out;
1763
0
                snapshot->extraOutput = extraOutput;
1764
0
                snapshot->m_bStreamEndReached = m_bStreamEndReached;
1765
0
            }
1766
1767
0
            errno = 0;
1768
0
            stream.avail_in =
1769
0
                static_cast<uInt>(m_poBaseHandle->Read(inbuf, Z_BUFSIZE));
1770
#ifdef ENABLE_DEBUG
1771
            CPLDebug("GZIP", CPL_FRMT_GUIB " " CPL_FRMT_GUIB,
1772
                     m_poBaseHandle->Tell(), offsetEndCompressedData);
1773
#endif
1774
0
            if (m_poBaseHandle->Tell() > offsetEndCompressedData)
1775
0
            {
1776
#ifdef ENABLE_DEBUG
1777
                CPLDebug("GZIP", "avail_in before = %d", stream.avail_in);
1778
#endif
1779
0
                stream.avail_in = stream.avail_in -
1780
0
                                  static_cast<uInt>(m_poBaseHandle->Tell() -
1781
0
                                                    offsetEndCompressedData);
1782
0
                if (m_poBaseHandle->Seek(offsetEndCompressedData, SEEK_SET) !=
1783
0
                    0)
1784
0
                    CPLError(CE_Failure, CPLE_FileIO, "Seek() failed");
1785
#ifdef ENABLE_DEBUG
1786
                CPLDebug("GZIP", "avail_in after = %d", stream.avail_in);
1787
#endif
1788
0
            }
1789
0
            if (stream.avail_in == 0)
1790
0
            {
1791
0
                z_eof = 1;
1792
0
                if (m_poBaseHandle->Error() ||
1793
0
                    m_poBaseHandle->Tell() != offsetEndCompressedData)
1794
0
                {
1795
0
                    z_err = Z_ERRNO;
1796
0
                    break;
1797
0
                }
1798
0
            }
1799
0
            stream.next_in = inbuf;
1800
0
        }
1801
1802
0
        struct InOutCallback
1803
0
        {
1804
0
            vsi_l_offset *pOut = nullptr;
1805
0
            std::vector<GByte> *pExtraOutput = nullptr;
1806
0
            z_stream *pStream = nullptr;
1807
1808
0
            static unsigned inCbk(void FAR *, z_const unsigned char FAR * FAR *)
1809
0
            {
1810
0
                return 0;
1811
0
            }
1812
1813
0
            static int outCbk(void FAR *user_data, unsigned char FAR *data,
1814
0
                              unsigned len)
1815
0
            {
1816
0
                auto self = static_cast<InOutCallback *>(user_data);
1817
0
                if (self->pStream->avail_out >= len)
1818
0
                {
1819
0
                    memcpy(self->pStream->next_out, data, len);
1820
0
                    *(self->pOut) += len;
1821
0
                    self->pStream->next_out += len;
1822
0
                    self->pStream->avail_out -= len;
1823
0
                }
1824
0
                else
1825
0
                {
1826
0
                    if (self->pStream->avail_out != 0)
1827
0
                    {
1828
0
                        memcpy(self->pStream->next_out, data,
1829
0
                               self->pStream->avail_out);
1830
0
                        *(self->pOut) += self->pStream->avail_out;
1831
0
                        data += self->pStream->avail_out;
1832
0
                        len -= self->pStream->avail_out;
1833
0
                        self->pStream->next_out += self->pStream->avail_out;
1834
0
                        self->pStream->avail_out = 0;
1835
0
                    }
1836
0
                    if (len > 0)
1837
0
                    {
1838
0
                        self->pExtraOutput->insert(self->pExtraOutput->end(),
1839
0
                                                   data, data + len);
1840
0
                    }
1841
0
                }
1842
0
                return 0;
1843
0
            }
1844
0
        };
1845
1846
0
        InOutCallback cbkData;
1847
0
        cbkData.pOut = &out;
1848
0
        cbkData.pExtraOutput = &extraOutput;
1849
0
        cbkData.pStream = &stream;
1850
1851
0
        if (stream.avail_out)
1852
0
        {
1853
0
            if (m_bStreamEndReached)
1854
0
                z_err = Z_STREAM_END;
1855
0
            else
1856
0
            {
1857
0
                in += stream.avail_in;
1858
0
                z_err = inflateBack9(&(stream), InOutCallback::inCbk, &cbkData,
1859
0
                                     InOutCallback::outCbk, &cbkData);
1860
0
                in -= stream.avail_in;
1861
0
            }
1862
0
        }
1863
0
        if (z_err == Z_BUF_ERROR && stream.next_in == Z_NULL)
1864
0
            z_err = Z_OK;
1865
0
        else if (!extraOutput.empty() && z_err == Z_STREAM_END)
1866
0
        {
1867
0
            m_bStreamEndReached = true;
1868
0
            z_err = Z_OK;
1869
0
        }
1870
1871
0
        if (z_err == Z_STREAM_END /*&& m_compressed_size != 2*/)
1872
0
        {
1873
            // Check CRC and original size.
1874
0
            crc =
1875
0
                crc32(crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1876
0
            pStart = stream.next_out;
1877
0
            if (m_expected_crc)
1878
0
            {
1879
#ifdef ENABLE_DEBUG
1880
                CPLDebug("GZIP", "Computed CRC = %X. Expected CRC = %X",
1881
                         static_cast<unsigned int>(crc),
1882
                         static_cast<unsigned int>(m_expected_crc));
1883
#endif
1884
0
            }
1885
0
            if (m_expected_crc != 0 && m_expected_crc != crc)
1886
0
            {
1887
0
                CPLError(CE_Failure, CPLE_FileIO,
1888
0
                         "CRC error. Got %X instead of %X",
1889
0
                         static_cast<unsigned int>(crc),
1890
0
                         static_cast<unsigned int>(m_expected_crc));
1891
0
                z_err = Z_DATA_ERROR;
1892
0
            }
1893
0
        }
1894
0
        if (z_err != Z_OK || z_eof)
1895
0
            break;
1896
0
    }
1897
0
    crc = crc32(crc, pStart, static_cast<uInt>(stream.next_out - pStart));
1898
1899
0
    unsigned ret = (len - stream.avail_out);
1900
0
    if (z_err != Z_OK && z_err != Z_STREAM_END)
1901
0
    {
1902
0
        CPLError(CE_Failure, CPLE_AppDefined,
1903
0
                 "In file %s, at line %d, decompression failed with "
1904
0
                 "z_err = %d, return = %u",
1905
0
                 __FILE__, __LINE__, z_err, ret);
1906
0
    }
1907
0
    else if (ret < nBytes)
1908
0
    {
1909
0
        m_bEOF = true;
1910
0
    }
1911
1912
#ifdef ENABLE_DEBUG
1913
    CPLDebug("GZIP", "Read return %u (z_err=%d, z_eof=%d)", ret, z_err, z_eof);
1914
#endif
1915
0
    return ret;
1916
0
}
1917
1918
/************************************************************************/
1919
/*                               Write()                                */
1920
/************************************************************************/
1921
1922
size_t VSIDeflate64Handle::Write(const void * /* pBuffer */,
1923
                                 size_t /* nBytes */)
1924
0
{
1925
0
    CPLError(CE_Failure, CPLE_NotSupported,
1926
0
             "VSIFWriteL is not supported on GZip streams");
1927
0
    return 0;
1928
0
}
1929
1930
/************************************************************************/
1931
/*                                Eof()                                 */
1932
/************************************************************************/
1933
1934
int VSIDeflate64Handle::Eof()
1935
0
{
1936
#ifdef ENABLE_DEBUG
1937
    CPLDebug("GZIP", "Eof()");
1938
#endif
1939
0
    return m_bEOF;
1940
0
}
1941
1942
/************************************************************************/
1943
/*                               Error()                                */
1944
/************************************************************************/
1945
1946
int VSIDeflate64Handle::Error()
1947
0
{
1948
#ifdef ENABLE_DEBUG
1949
    CPLDebug("GZIP", "Error()");
1950
#endif
1951
0
    return z_err != Z_OK && z_err != Z_STREAM_END;
1952
0
}
1953
1954
/************************************************************************/
1955
/*                              ClearErr()                              */
1956
/************************************************************************/
1957
1958
void VSIDeflate64Handle::ClearErr()
1959
0
{
1960
0
    m_poBaseHandle->ClearErr();
1961
0
    z_eof = 0;
1962
0
    m_bEOF = false;
1963
0
    z_err = Z_OK;
1964
0
}
1965
1966
/************************************************************************/
1967
/*                               Flush()                                */
1968
/************************************************************************/
1969
1970
int VSIDeflate64Handle::Flush()
1971
0
{
1972
0
    return 0;
1973
0
}
1974
1975
/************************************************************************/
1976
/*                               Close()                                */
1977
/************************************************************************/
1978
1979
int VSIDeflate64Handle::Close()
1980
0
{
1981
0
    return 0;
1982
0
}
1983
#endif
1984
1985
/************************************************************************/
1986
/* ==================================================================== */
1987
/*                       VSIGZipWriteHandleMT                           */
1988
/* ==================================================================== */
1989
/************************************************************************/
1990
1991
class VSIGZipWriteHandleMT final : public VSIVirtualHandle
1992
{
1993
    CPL_DISALLOW_COPY_ASSIGN(VSIGZipWriteHandleMT)
1994
1995
    VSIVirtualHandle *poBaseHandle_ = nullptr;
1996
    vsi_l_offset nCurOffset_ = 0;
1997
    uLong nCRC_ = 0;
1998
    int nDeflateType_ = CPL_DEFLATE_TYPE_GZIP;
1999
    bool bAutoCloseBaseHandle_ = false;
2000
    int nThreads_ = 0;
2001
    std::unique_ptr<CPLWorkerThreadPool> poPool_{};
2002
    std::list<std::string *> aposBuffers_{};
2003
    std::string *pCurBuffer_ = nullptr;
2004
    std::mutex sMutex_{};
2005
    int nSeqNumberGenerated_ = 0;
2006
    int nSeqNumberExpected_ = 0;
2007
    int nSeqNumberExpectedCRC_ = 0;
2008
    size_t nChunkSize_ = 0;
2009
    bool bHasErrored_ = false;
2010
2011
    struct Job
2012
    {
2013
        VSIGZipWriteHandleMT *pParent_ = nullptr;
2014
        std::string *pBuffer_ = nullptr;
2015
        int nSeqNumber_ = 0;
2016
        bool bFinish_ = false;
2017
        bool bInCRCComputation_ = false;
2018
2019
        std::string sCompressedData_{};
2020
        uLong nCRC_ = 0;
2021
    };
2022
2023
    std::list<Job *> apoFinishedJobs_{};
2024
    std::list<Job *> apoCRCFinishedJobs_{};
2025
    std::list<Job *> apoFreeJobs_{};
2026
    vsi_l_offset nStartOffset_ = 0;
2027
    size_t nSOZIPIndexEltSize_ = 0;
2028
    std::vector<uint8_t> *panSOZIPIndex_ = nullptr;
2029
2030
    static void DeflateCompress(void *inData);
2031
    static void CRCCompute(void *inData);
2032
    bool ProcessCompletedJobs();
2033
    Job *GetJobObject();
2034
#ifdef DEBUG_VERBOSE
2035
    void DumpState();
2036
#endif
2037
2038
  public:
2039
    VSIGZipWriteHandleMT(VSIVirtualHandle *poBaseHandle, int nDeflateType,
2040
                         bool bAutoCloseBaseHandleIn, int nThreads,
2041
                         size_t nChunkSize, size_t nSOZIPIndexEltSize,
2042
                         std::vector<uint8_t> *panSOZIPIndex);
2043
2044
    ~VSIGZipWriteHandleMT() override;
2045
2046
    int Seek(vsi_l_offset nOffset, int nWhence) override;
2047
    vsi_l_offset Tell() override;
2048
    size_t Read(void *pBuffer, size_t nBytes) override;
2049
    size_t Write(const void *pBuffer, size_t nBytes) override;
2050
2051
    int Eof() override
2052
0
    {
2053
0
        return 0;
2054
0
    }
2055
2056
    int Error() override
2057
0
    {
2058
0
        return 0;
2059
0
    }
2060
2061
    void ClearErr() override
2062
0
    {
2063
0
    }
2064
2065
    int Flush() override;
2066
    int Close() override;
2067
};
2068
2069
/************************************************************************/
2070
/*                        VSIGZipWriteHandleMT()                        */
2071
/************************************************************************/
2072
2073
VSIGZipWriteHandleMT::VSIGZipWriteHandleMT(VSIVirtualHandle *poBaseHandle,
2074
                                           int nDeflateType,
2075
                                           bool bAutoCloseBaseHandleIn,
2076
                                           int nThreads, size_t nChunkSize,
2077
                                           size_t nSOZIPIndexEltSize,
2078
                                           std::vector<uint8_t> *panSOZIPIndex)
2079
0
    : poBaseHandle_(poBaseHandle), nDeflateType_(nDeflateType),
2080
0
      bAutoCloseBaseHandle_(bAutoCloseBaseHandleIn), nThreads_(nThreads),
2081
0
      nChunkSize_(nChunkSize), nSOZIPIndexEltSize_(nSOZIPIndexEltSize),
2082
0
      panSOZIPIndex_(panSOZIPIndex)
2083
0
{
2084
0
    if (nChunkSize_ == 0)
2085
0
    {
2086
0
        const char *pszChunkSize =
2087
0
            CPLGetConfigOption("CPL_VSIL_DEFLATE_CHUNK_SIZE", "1024K");
2088
0
        nChunkSize_ = static_cast<size_t>(atoi(pszChunkSize));
2089
0
        if (strchr(pszChunkSize, 'K'))
2090
0
            nChunkSize_ *= 1024;
2091
0
        else if (strchr(pszChunkSize, 'M'))
2092
0
            nChunkSize_ *= 1024 * 1024;
2093
0
        nChunkSize_ =
2094
0
            std::max(static_cast<size_t>(4 * 1024),
2095
0
                     std::min(static_cast<size_t>(UINT_MAX), nChunkSize_));
2096
0
    }
2097
2098
0
    for (int i = 0; i < 1 + nThreads_; i++)
2099
0
        aposBuffers_.emplace_back(new std::string());
2100
2101
0
    nStartOffset_ = poBaseHandle_->Tell();
2102
0
    if (nDeflateType == CPL_DEFLATE_TYPE_GZIP)
2103
0
    {
2104
0
        char header[11] = {};
2105
2106
        // Write a very simple .gz header:
2107
0
        snprintf(header, sizeof(header), "%c%c%c%c%c%c%c%c%c%c", gz_magic[0],
2108
0
                 gz_magic[1], Z_DEFLATED, 0 /*flags*/, 0, 0, 0, 0 /*time*/,
2109
0
                 0 /*xflags*/, 0x03);
2110
0
        poBaseHandle_->Write(header, 10);
2111
0
    }
2112
0
}
2113
2114
/************************************************************************/
2115
/*                       ~VSIGZipWriteHandleMT()                        */
2116
/************************************************************************/
2117
2118
VSIGZipWriteHandleMT::~VSIGZipWriteHandleMT()
2119
2120
0
{
2121
0
    VSIGZipWriteHandleMT::Close();
2122
0
    for (auto &psJob : apoFinishedJobs_)
2123
0
    {
2124
0
        delete psJob->pBuffer_;
2125
0
        delete psJob;
2126
0
    }
2127
0
    for (auto &psJob : apoCRCFinishedJobs_)
2128
0
    {
2129
0
        delete psJob->pBuffer_;
2130
0
        delete psJob;
2131
0
    }
2132
0
    for (auto &psJob : apoFreeJobs_)
2133
0
    {
2134
0
        delete psJob->pBuffer_;
2135
0
        delete psJob;
2136
0
    }
2137
0
    for (auto &pstr : aposBuffers_)
2138
0
    {
2139
0
        delete pstr;
2140
0
    }
2141
0
    delete pCurBuffer_;
2142
0
}
2143
2144
/************************************************************************/
2145
/*                               Close()                                */
2146
/************************************************************************/
2147
2148
int VSIGZipWriteHandleMT::Close()
2149
2150
0
{
2151
0
    if (!poBaseHandle_)
2152
0
        return 0;
2153
2154
0
    int nRet = 0;
2155
2156
0
    if (!pCurBuffer_)
2157
0
        pCurBuffer_ = new std::string();
2158
2159
0
    {
2160
0
        auto psJob = GetJobObject();
2161
0
        psJob->bFinish_ = true;
2162
0
        psJob->pParent_ = this;
2163
0
        psJob->pBuffer_ = pCurBuffer_;
2164
0
        pCurBuffer_ = nullptr;
2165
0
        psJob->nSeqNumber_ = nSeqNumberGenerated_;
2166
0
        VSIGZipWriteHandleMT::DeflateCompress(psJob);
2167
0
    }
2168
2169
0
    if (poPool_)
2170
0
    {
2171
0
        poPool_->WaitCompletion(0);
2172
0
    }
2173
0
    if (!ProcessCompletedJobs())
2174
0
    {
2175
0
        nRet = -1;
2176
0
    }
2177
0
    else
2178
0
    {
2179
0
        CPLAssert(apoFinishedJobs_.empty());
2180
0
        if (nDeflateType_ == CPL_DEFLATE_TYPE_GZIP)
2181
0
        {
2182
0
            if (poPool_)
2183
0
            {
2184
0
                poPool_->WaitCompletion(0);
2185
0
            }
2186
0
            ProcessCompletedJobs();
2187
0
        }
2188
0
        CPLAssert(apoCRCFinishedJobs_.empty());
2189
0
    }
2190
2191
0
    if (nDeflateType_ == CPL_DEFLATE_TYPE_GZIP)
2192
0
    {
2193
0
        const GUInt32 anTrailer[2] = {
2194
0
            CPL_LSBWORD32(static_cast<GUInt32>(nCRC_)),
2195
0
            CPL_LSBWORD32(static_cast<GUInt32>(nCurOffset_))};
2196
2197
0
        if (poBaseHandle_->Write(anTrailer, 8) < 8)
2198
0
        {
2199
0
            nRet = -1;
2200
0
        }
2201
0
    }
2202
2203
0
    if (bAutoCloseBaseHandle_)
2204
0
    {
2205
0
        int nRetClose = poBaseHandle_->Close();
2206
0
        if (nRet == 0)
2207
0
            nRet = nRetClose;
2208
2209
0
        delete poBaseHandle_;
2210
0
    }
2211
0
    poBaseHandle_ = nullptr;
2212
2213
0
    return nRet;
2214
0
}
2215
2216
/************************************************************************/
2217
/*                                Read()                                */
2218
/************************************************************************/
2219
2220
size_t VSIGZipWriteHandleMT::Read(void * /* pBuffer */, size_t /* nBytes*/)
2221
0
{
2222
0
    CPLError(CE_Failure, CPLE_NotSupported,
2223
0
             "VSIFReadL is not supported on GZip write streams");
2224
0
    return 0;
2225
0
}
2226
2227
/************************************************************************/
2228
/*                          DeflateCompress()                           */
2229
/************************************************************************/
2230
2231
void VSIGZipWriteHandleMT::DeflateCompress(void *inData)
2232
0
{
2233
0
    Job *psJob = static_cast<Job *>(inData);
2234
2235
0
    CPLAssert(psJob->pBuffer_);
2236
2237
0
    z_stream sStream;
2238
0
    memset(&sStream, 0, sizeof(sStream));
2239
0
    sStream.zalloc = nullptr;
2240
0
    sStream.zfree = nullptr;
2241
0
    sStream.opaque = nullptr;
2242
2243
0
    sStream.avail_in = static_cast<uInt>(psJob->pBuffer_->size());
2244
0
    sStream.next_in = reinterpret_cast<Bytef *>(&(*psJob->pBuffer_)[0]);
2245
2246
0
    int ret = deflateInit2(
2247
0
        &sStream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
2248
0
        (psJob->pParent_->nDeflateType_ == CPL_DEFLATE_TYPE_ZLIB) ? MAX_WBITS
2249
0
                                                                  : -MAX_WBITS,
2250
0
        8, Z_DEFAULT_STRATEGY);
2251
0
    CPLAssertAlwaysEval(ret == Z_OK);
2252
2253
0
    size_t nRealSize = 0;
2254
2255
0
    while (sStream.avail_in > 0)
2256
0
    {
2257
0
        psJob->sCompressedData_.resize(nRealSize + Z_BUFSIZE);
2258
0
        sStream.avail_out = static_cast<uInt>(Z_BUFSIZE);
2259
0
        sStream.next_out =
2260
0
            reinterpret_cast<Bytef *>(&psJob->sCompressedData_[0]) + nRealSize;
2261
2262
0
        const int zlibRet = deflate(&sStream, Z_NO_FLUSH);
2263
0
        CPLAssertAlwaysEval(zlibRet == Z_OK);
2264
2265
0
        nRealSize += static_cast<uInt>(Z_BUFSIZE) - sStream.avail_out;
2266
0
    }
2267
2268
0
    psJob->sCompressedData_.resize(nRealSize + Z_BUFSIZE);
2269
0
    sStream.avail_out = static_cast<uInt>(Z_BUFSIZE);
2270
0
    sStream.next_out =
2271
0
        reinterpret_cast<Bytef *>(&psJob->sCompressedData_[0]) + nRealSize;
2272
2273
0
    if (psJob->bFinish_)
2274
0
    {
2275
0
        const int zlibRet = deflate(&sStream, Z_FINISH);
2276
0
        CPLAssertAlwaysEval(zlibRet == Z_STREAM_END);
2277
0
    }
2278
0
    else
2279
0
    {
2280
        // Do a Z_SYNC_FLUSH and Z_FULL_FLUSH, so as to have two markers when
2281
        // independent as pigz 2.3.4 or later. The following 9 byte sequence
2282
        // will be found: 0x00 0x00 0xff 0xff 0x00 0x00 0x00 0xff 0xff
2283
        // Z_FULL_FLUSH only is sufficient, but it is not obvious if a
2284
        // 0x00 0x00 0xff 0xff marker in the codestream is just a SYNC_FLUSH (
2285
        // without dictionary reset) or a FULL_FLUSH (with dictionary reset)
2286
0
        {
2287
0
            const int zlibRet = deflate(&sStream, Z_SYNC_FLUSH);
2288
0
            CPLAssertAlwaysEval(zlibRet == Z_OK);
2289
0
        }
2290
2291
0
        {
2292
0
            const int zlibRet = deflate(&sStream, Z_FULL_FLUSH);
2293
0
            CPLAssertAlwaysEval(zlibRet == Z_OK);
2294
0
        }
2295
0
    }
2296
2297
0
    nRealSize += static_cast<uInt>(Z_BUFSIZE) - sStream.avail_out;
2298
0
    psJob->sCompressedData_.resize(nRealSize);
2299
2300
0
    deflateEnd(&sStream);
2301
2302
0
    {
2303
0
        std::lock_guard<std::mutex> oLock(psJob->pParent_->sMutex_);
2304
0
        psJob->pParent_->apoFinishedJobs_.push_back(psJob);
2305
0
    }
2306
0
}
2307
2308
/************************************************************************/
2309
/*                             CRCCompute()                             */
2310
/************************************************************************/
2311
2312
void VSIGZipWriteHandleMT::CRCCompute(void *inData)
2313
0
{
2314
0
    Job *psJob = static_cast<Job *>(inData);
2315
0
    psJob->bInCRCComputation_ = true;
2316
0
    psJob->nCRC_ =
2317
0
        crc32(0U, reinterpret_cast<const Bytef *>(psJob->pBuffer_->data()),
2318
0
              static_cast<uInt>(psJob->pBuffer_->size()));
2319
2320
0
    {
2321
0
        std::lock_guard<std::mutex> oLock(psJob->pParent_->sMutex_);
2322
0
        psJob->pParent_->apoCRCFinishedJobs_.push_back(psJob);
2323
0
    }
2324
0
}
2325
2326
/************************************************************************/
2327
/*                             DumpState()                              */
2328
/************************************************************************/
2329
2330
#ifdef DEBUG_VERBOSE
2331
void VSIGZipWriteHandleMT::DumpState()
2332
{
2333
    fprintf(stderr, "Finished jobs (expected = %d):\n",  // ok
2334
            nSeqNumberExpected_);
2335
    for (const auto *psJob : apoFinishedJobs_)
2336
    {
2337
        fprintf(stderr, "seq number=%d, bInCRCComputation = %d\n",  // ok
2338
                psJob->nSeqNumber_, psJob->bInCRCComputation_ ? 1 : 0);
2339
    }
2340
    fprintf(stderr, "Finished CRC jobs (expected = %d):\n",  // ok
2341
            nSeqNumberExpectedCRC_);
2342
    for (const auto *psJob : apoFinishedJobs_)
2343
    {
2344
        fprintf(stderr, "seq number=%d\n",  // ok
2345
                psJob->nSeqNumber_);
2346
    }
2347
    fprintf(stderr, "apoFreeJobs_.size() = %d\n",  // ok
2348
            static_cast<int>(apoFreeJobs_.size()));
2349
    fprintf(stderr, "aposBuffers_.size() = %d\n",  // ok
2350
            static_cast<int>(aposBuffers_.size()));
2351
}
2352
#endif
2353
2354
/************************************************************************/
2355
/*                        ProcessCompletedJobs()                        */
2356
/************************************************************************/
2357
2358
bool VSIGZipWriteHandleMT::ProcessCompletedJobs()
2359
0
{
2360
0
    std::lock_guard<std::mutex> oLock(sMutex_);
2361
0
    bool do_it_again = true;
2362
0
    while (do_it_again)
2363
0
    {
2364
0
        do_it_again = false;
2365
0
        if (nDeflateType_ == CPL_DEFLATE_TYPE_GZIP)
2366
0
        {
2367
0
            for (auto iter = apoFinishedJobs_.begin();
2368
0
                 iter != apoFinishedJobs_.end(); ++iter)
2369
0
            {
2370
0
                auto psJob = *iter;
2371
2372
0
                if (!psJob->bInCRCComputation_)
2373
0
                {
2374
0
                    psJob->bInCRCComputation_ = true;
2375
0
                    sMutex_.unlock();
2376
0
                    if (poPool_)
2377
0
                    {
2378
0
                        poPool_->SubmitJob(VSIGZipWriteHandleMT::CRCCompute,
2379
0
                                           psJob);
2380
0
                    }
2381
0
                    else
2382
0
                    {
2383
0
                        CRCCompute(psJob);
2384
0
                    }
2385
0
                    sMutex_.lock();
2386
0
                }
2387
0
            }
2388
0
        }
2389
2390
0
        for (auto iter = apoFinishedJobs_.begin();
2391
0
             iter != apoFinishedJobs_.end(); ++iter)
2392
0
        {
2393
0
            auto psJob = *iter;
2394
0
            if (psJob->nSeqNumber_ == nSeqNumberExpected_)
2395
0
            {
2396
0
                apoFinishedJobs_.erase(iter);
2397
2398
0
                const bool bIsSeqNumberExpectedZero =
2399
0
                    (nSeqNumberExpected_ == 0);
2400
0
                sMutex_.unlock();
2401
2402
0
                const size_t nToWrite = psJob->sCompressedData_.size();
2403
0
                if (panSOZIPIndex_ && !bIsSeqNumberExpectedZero &&
2404
0
                    !psJob->pBuffer_->empty())
2405
0
                {
2406
0
                    uint64_t nOffset = poBaseHandle_->Tell() - nStartOffset_;
2407
0
                    if (nSOZIPIndexEltSize_ == 8)
2408
0
                    {
2409
0
                        const uint64_t nLSBOffset = CPL_AS_LSB(nOffset);
2410
0
                        std::copy(
2411
0
                            reinterpret_cast<const uint8_t *>(&nLSBOffset),
2412
0
                            reinterpret_cast<const uint8_t *>(&nLSBOffset) +
2413
0
                                sizeof(nLSBOffset),
2414
0
                            std::back_inserter(*panSOZIPIndex_));
2415
0
                    }
2416
0
                    else
2417
0
                    {
2418
0
                        if (nOffset > std::numeric_limits<uint32_t>::max())
2419
0
                        {
2420
                            // shouldn't happen normally...
2421
0
                            CPLError(
2422
0
                                CE_Failure, CPLE_AppDefined,
2423
0
                                "Too big offset for SOZIP_OFFSET_SIZE = 4");
2424
0
                            panSOZIPIndex_->clear();
2425
0
                            panSOZIPIndex_ = nullptr;
2426
0
                        }
2427
0
                        else
2428
0
                        {
2429
0
                            const uint32_t nLSBOffset32 =
2430
0
                                CPL_AS_LSB(static_cast<uint32_t>(nOffset));
2431
0
                            std::copy(reinterpret_cast<const uint8_t *>(
2432
0
                                          &nLSBOffset32),
2433
0
                                      reinterpret_cast<const uint8_t *>(
2434
0
                                          &nLSBOffset32) +
2435
0
                                          sizeof(nLSBOffset32),
2436
0
                                      std::back_inserter(*panSOZIPIndex_));
2437
0
                        }
2438
0
                    }
2439
0
                }
2440
0
                bool bError =
2441
0
                    poBaseHandle_->Write(psJob->sCompressedData_.data(),
2442
0
                                         nToWrite) < nToWrite;
2443
0
                sMutex_.lock();
2444
0
                nSeqNumberExpected_++;
2445
2446
0
                if (nDeflateType_ != CPL_DEFLATE_TYPE_GZIP)
2447
0
                {
2448
0
                    aposBuffers_.push_back(psJob->pBuffer_);
2449
0
                    psJob->pBuffer_ = nullptr;
2450
2451
0
                    apoFreeJobs_.push_back(psJob);
2452
0
                }
2453
2454
0
                if (bError)
2455
0
                {
2456
0
                    return false;
2457
0
                }
2458
2459
0
                do_it_again = true;
2460
0
                break;
2461
0
            }
2462
0
        }
2463
2464
0
        if (nDeflateType_ == CPL_DEFLATE_TYPE_GZIP)
2465
0
        {
2466
0
            for (auto iter = apoCRCFinishedJobs_.begin();
2467
0
                 iter != apoCRCFinishedJobs_.end(); ++iter)
2468
0
            {
2469
0
                auto psJob = *iter;
2470
0
                if (psJob->nSeqNumber_ == nSeqNumberExpectedCRC_)
2471
0
                {
2472
0
                    apoCRCFinishedJobs_.erase(iter);
2473
2474
0
                    nCRC_ = crc32_combine(
2475
0
                        nCRC_, psJob->nCRC_,
2476
0
                        static_cast<uLong>(psJob->pBuffer_->size()));
2477
2478
0
                    nSeqNumberExpectedCRC_++;
2479
2480
0
                    aposBuffers_.push_back(psJob->pBuffer_);
2481
0
                    psJob->pBuffer_ = nullptr;
2482
2483
0
                    apoFreeJobs_.push_back(psJob);
2484
0
                    do_it_again = true;
2485
0
                    break;
2486
0
                }
2487
0
            }
2488
0
        }
2489
0
    }
2490
0
    return true;
2491
0
}
2492
2493
/************************************************************************/
2494
/*                            GetJobObject()                            */
2495
/************************************************************************/
2496
2497
VSIGZipWriteHandleMT::Job *VSIGZipWriteHandleMT::GetJobObject()
2498
0
{
2499
0
    {
2500
0
        std::lock_guard<std::mutex> oLock(sMutex_);
2501
0
        if (!apoFreeJobs_.empty())
2502
0
        {
2503
0
            auto job = apoFreeJobs_.back();
2504
0
            apoFreeJobs_.pop_back();
2505
0
            job->sCompressedData_.clear();
2506
0
            job->bInCRCComputation_ = false;
2507
0
            return job;
2508
0
        }
2509
0
    }
2510
0
    return new Job();
2511
0
}
2512
2513
/************************************************************************/
2514
/*                               Write()                                */
2515
/************************************************************************/
2516
2517
size_t VSIGZipWriteHandleMT::Write(const void *const pBuffer,
2518
                                   size_t const nBytes)
2519
2520
0
{
2521
0
    if (bHasErrored_)
2522
0
        return 0;
2523
2524
0
    const char *pszBuffer = static_cast<const char *>(pBuffer);
2525
0
    size_t nBytesToWrite = nBytes;
2526
0
    while (nBytesToWrite > 0)
2527
0
    {
2528
0
        if (pCurBuffer_ == nullptr)
2529
0
        {
2530
0
            while (true)
2531
0
            {
2532
                // We store in a local variable instead of pCurBuffer_ directly
2533
                // to avoid Coverity Scan to be confused by the fact that we
2534
                // have used above pCurBuffer_ outside of the mutex. But what
2535
                // is protected by the mutex is aposBuffers_, not pCurBuffer_.
2536
0
                std::string *l_pCurBuffer = nullptr;
2537
0
                {
2538
0
                    std::lock_guard<std::mutex> oLock(sMutex_);
2539
0
                    if (!aposBuffers_.empty())
2540
0
                    {
2541
0
                        l_pCurBuffer = aposBuffers_.back();
2542
0
                        aposBuffers_.pop_back();
2543
0
                    }
2544
0
                }
2545
0
                pCurBuffer_ = l_pCurBuffer;
2546
0
                if (pCurBuffer_)
2547
0
                    break;
2548
2549
0
                if (poPool_)
2550
0
                {
2551
0
                    poPool_->WaitEvent();
2552
0
                }
2553
0
                if (!ProcessCompletedJobs())
2554
0
                {
2555
0
                    bHasErrored_ = true;
2556
0
                    return 0;
2557
0
                }
2558
0
            }
2559
0
            pCurBuffer_->clear();
2560
0
        }
2561
0
        size_t nConsumed =
2562
0
            std::min(nBytesToWrite, nChunkSize_ - pCurBuffer_->size());
2563
0
        pCurBuffer_->append(pszBuffer, nConsumed);
2564
0
        nCurOffset_ += nConsumed;
2565
0
        pszBuffer += nConsumed;
2566
0
        nBytesToWrite -= nConsumed;
2567
0
        if (pCurBuffer_->size() == nChunkSize_)
2568
0
        {
2569
0
            if (poPool_ == nullptr)
2570
0
            {
2571
0
                poPool_.reset(new CPLWorkerThreadPool());
2572
0
                if (!poPool_->Setup(nThreads_, nullptr, nullptr, false))
2573
0
                {
2574
0
                    bHasErrored_ = true;
2575
0
                    poPool_.reset();
2576
0
                    return 0;
2577
0
                }
2578
0
            }
2579
2580
0
            auto psJob = GetJobObject();
2581
0
            psJob->pParent_ = this;
2582
0
            psJob->pBuffer_ = pCurBuffer_;
2583
0
            psJob->nSeqNumber_ = nSeqNumberGenerated_;
2584
0
            nSeqNumberGenerated_++;
2585
0
            pCurBuffer_ = nullptr;
2586
0
            poPool_->SubmitJob(VSIGZipWriteHandleMT::DeflateCompress, psJob);
2587
0
        }
2588
0
    }
2589
2590
0
    return nBytes;
2591
0
}
2592
2593
/************************************************************************/
2594
/*                               Flush()                                */
2595
/************************************************************************/
2596
2597
int VSIGZipWriteHandleMT::Flush()
2598
2599
0
{
2600
    // we *could* do something for this but for now we choose not to.
2601
2602
0
    return 0;
2603
0
}
2604
2605
/************************************************************************/
2606
/*                                Seek()                                */
2607
/************************************************************************/
2608
2609
int VSIGZipWriteHandleMT::Seek(vsi_l_offset nOffset, int nWhence)
2610
2611
0
{
2612
0
    if (nOffset == 0 && (nWhence == SEEK_END || nWhence == SEEK_CUR))
2613
0
        return 0;
2614
0
    else if (nWhence == SEEK_SET && nOffset == nCurOffset_)
2615
0
        return 0;
2616
0
    else
2617
0
    {
2618
0
        CPLError(CE_Failure, CPLE_NotSupported,
2619
0
                 "Seeking on writable compressed data streams not supported.");
2620
2621
0
        return -1;
2622
0
    }
2623
0
}
2624
2625
/************************************************************************/
2626
/*                                Tell()                                */
2627
/************************************************************************/
2628
2629
vsi_l_offset VSIGZipWriteHandleMT::Tell()
2630
2631
0
{
2632
0
    return nCurOffset_;
2633
0
}
2634
2635
/************************************************************************/
2636
/* ==================================================================== */
2637
/*                       VSIGZipWriteHandle                             */
2638
/* ==================================================================== */
2639
/************************************************************************/
2640
2641
class VSIGZipWriteHandle final : public VSIVirtualHandle
2642
{
2643
    CPL_DISALLOW_COPY_ASSIGN(VSIGZipWriteHandle)
2644
2645
    VSIVirtualHandle *m_poBaseHandle = nullptr;
2646
    z_stream sStream;
2647
    Byte *pabyInBuf = nullptr;
2648
    Byte *pabyOutBuf = nullptr;
2649
    bool bCompressActive = false;
2650
    vsi_l_offset nCurOffset = 0;
2651
    uLong nCRC = 0;
2652
    int nDeflateType = CPL_DEFLATE_TYPE_GZIP;
2653
    bool bAutoCloseBaseHandle = false;
2654
2655
  public:
2656
    VSIGZipWriteHandle(VSIVirtualHandle *poBaseHandle, int nDeflateType,
2657
                       bool bAutoCloseBaseHandleIn);
2658
2659
    ~VSIGZipWriteHandle() override;
2660
2661
    int Seek(vsi_l_offset nOffset, int nWhence) override;
2662
    vsi_l_offset Tell() override;
2663
    size_t Read(void *pBuffer, size_t nBytes) override;
2664
    size_t Write(const void *pBuffer, size_t nBytes) override;
2665
2666
    int Eof() override
2667
0
    {
2668
0
        return 0;
2669
0
    }
2670
2671
    int Error() override
2672
0
    {
2673
0
        return 0;
2674
0
    }
2675
2676
    void ClearErr() override
2677
0
    {
2678
0
    }
2679
2680
    int Flush() override;
2681
    int Close() override;
2682
};
2683
2684
/************************************************************************/
2685
/*                         VSIGZipWriteHandle()                         */
2686
/************************************************************************/
2687
2688
VSIGZipWriteHandle::VSIGZipWriteHandle(VSIVirtualHandle *poBaseHandle,
2689
                                       int nDeflateTypeIn,
2690
                                       bool bAutoCloseBaseHandleIn)
2691
0
    : m_poBaseHandle(poBaseHandle), sStream(),
2692
0
      pabyInBuf(static_cast<Byte *>(CPLMalloc(Z_BUFSIZE))),
2693
0
      pabyOutBuf(static_cast<Byte *>(CPLMalloc(Z_BUFSIZE))),
2694
0
      nCRC(crc32(0L, nullptr, 0)), nDeflateType(nDeflateTypeIn),
2695
0
      bAutoCloseBaseHandle(bAutoCloseBaseHandleIn)
2696
0
{
2697
0
    sStream.zalloc = nullptr;
2698
0
    sStream.zfree = nullptr;
2699
0
    sStream.opaque = nullptr;
2700
0
    sStream.next_in = nullptr;
2701
0
    sStream.next_out = nullptr;
2702
0
    sStream.avail_in = sStream.avail_out = 0;
2703
2704
0
    sStream.next_in = pabyInBuf;
2705
2706
0
    if (deflateInit2(&sStream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
2707
0
                     (nDeflateType == CPL_DEFLATE_TYPE_ZLIB) ? MAX_WBITS
2708
0
                                                             : -MAX_WBITS,
2709
0
                     8, Z_DEFAULT_STRATEGY) != Z_OK)
2710
0
    {
2711
0
        bCompressActive = false;
2712
0
    }
2713
0
    else
2714
0
    {
2715
0
        if (nDeflateType == CPL_DEFLATE_TYPE_GZIP)
2716
0
        {
2717
0
            char header[11] = {};
2718
2719
            // Write a very simple .gz header:
2720
0
            snprintf(header, sizeof(header), "%c%c%c%c%c%c%c%c%c%c",
2721
0
                     gz_magic[0], gz_magic[1], Z_DEFLATED, 0 /*flags*/, 0, 0, 0,
2722
0
                     0 /*time*/, 0 /*xflags*/, 0x03);
2723
0
            m_poBaseHandle->Write(header, 10);
2724
0
        }
2725
2726
0
        bCompressActive = true;
2727
0
    }
2728
0
}
2729
2730
/************************************************************************/
2731
/*                       VSICreateGZipWritable()                        */
2732
/************************************************************************/
2733
2734
VSIVirtualHandle *VSICreateGZipWritable(VSIVirtualHandle *poBaseHandle,
2735
                                        int nDeflateTypeIn,
2736
                                        int bAutoCloseBaseHandle)
2737
0
{
2738
0
    return VSICreateGZipWritable(poBaseHandle, nDeflateTypeIn,
2739
0
                                 CPL_TO_BOOL(bAutoCloseBaseHandle), 0, 0, 0,
2740
0
                                 nullptr);
2741
0
}
2742
2743
VSIVirtualHandle *VSICreateGZipWritable(VSIVirtualHandle *poBaseHandle,
2744
                                        int nDeflateTypeIn,
2745
                                        bool bAutoCloseBaseHandle, int nThreads,
2746
                                        size_t nChunkSize,
2747
                                        size_t nSOZIPIndexEltSize,
2748
                                        std::vector<uint8_t> *panSOZIPIndex)
2749
0
{
2750
0
    nThreads = nThreads > 0
2751
0
                   ? nThreads
2752
0
                   : GDALGetNumThreads(/* nMaxVal = */ 128,
2753
0
                                       /* bDefaultToAllCPUs = */ false);
2754
0
    if (nThreads > 1 || nChunkSize > 0)
2755
0
    {
2756
        // coverity[tainted_data]
2757
0
        return new VSIGZipWriteHandleMT(
2758
0
            poBaseHandle, nDeflateTypeIn, bAutoCloseBaseHandle, nThreads,
2759
0
            nChunkSize, nSOZIPIndexEltSize, panSOZIPIndex);
2760
0
    }
2761
0
    return new VSIGZipWriteHandle(poBaseHandle, nDeflateTypeIn,
2762
0
                                  bAutoCloseBaseHandle);
2763
0
}
2764
2765
/************************************************************************/
2766
/*                        ~VSIGZipWriteHandle()                         */
2767
/************************************************************************/
2768
2769
VSIGZipWriteHandle::~VSIGZipWriteHandle()
2770
2771
0
{
2772
0
    if (bCompressActive)
2773
0
        VSIGZipWriteHandle::Close();
2774
2775
0
    CPLFree(pabyInBuf);
2776
0
    CPLFree(pabyOutBuf);
2777
0
}
2778
2779
/************************************************************************/
2780
/*                               Close()                                */
2781
/************************************************************************/
2782
2783
int VSIGZipWriteHandle::Close()
2784
2785
0
{
2786
0
    int nRet = 0;
2787
0
    if (bCompressActive)
2788
0
    {
2789
0
        sStream.next_out = pabyOutBuf;
2790
0
        sStream.avail_out = static_cast<uInt>(Z_BUFSIZE);
2791
2792
0
        const int zlibRet = deflate(&sStream, Z_FINISH);
2793
0
        CPLAssertAlwaysEval(zlibRet == Z_STREAM_END);
2794
2795
0
        const size_t nOutBytes =
2796
0
            static_cast<uInt>(Z_BUFSIZE) - sStream.avail_out;
2797
2798
0
        deflateEnd(&sStream);
2799
2800
0
        if (m_poBaseHandle->Write(pabyOutBuf, nOutBytes) < nOutBytes)
2801
0
        {
2802
0
            nRet = -1;
2803
0
        }
2804
2805
0
        if (nRet == 0 && nDeflateType == CPL_DEFLATE_TYPE_GZIP)
2806
0
        {
2807
0
            const GUInt32 anTrailer[2] = {
2808
0
                CPL_LSBWORD32(static_cast<GUInt32>(nCRC)),
2809
0
                CPL_LSBWORD32(static_cast<GUInt32>(nCurOffset))};
2810
2811
0
            if (m_poBaseHandle->Write(anTrailer, 8) < 8)
2812
0
            {
2813
0
                nRet = -1;
2814
0
            }
2815
0
        }
2816
2817
0
        if (bAutoCloseBaseHandle)
2818
0
        {
2819
0
            if (nRet == 0)
2820
0
                nRet = m_poBaseHandle->Close();
2821
2822
0
            delete m_poBaseHandle;
2823
0
        }
2824
2825
0
        bCompressActive = false;
2826
0
    }
2827
2828
0
    return nRet;
2829
0
}
2830
2831
/************************************************************************/
2832
/*                                Read()                                */
2833
/************************************************************************/
2834
2835
size_t VSIGZipWriteHandle::Read(void * /* pBuffer */, size_t /* nBytes */)
2836
0
{
2837
0
    CPLError(CE_Failure, CPLE_NotSupported,
2838
0
             "VSIFReadL is not supported on GZip write streams");
2839
0
    return 0;
2840
0
}
2841
2842
/************************************************************************/
2843
/*                               Write()                                */
2844
/************************************************************************/
2845
2846
size_t VSIGZipWriteHandle::Write(const void *const pBuffer,
2847
                                 size_t const nBytesToWrite)
2848
2849
0
{
2850
0
    {
2851
0
        size_t nOffset = 0;
2852
0
        while (nOffset < nBytesToWrite)
2853
0
        {
2854
0
            uInt nChunk = static_cast<uInt>(std::min(
2855
0
                static_cast<size_t>(UINT_MAX), nBytesToWrite - nOffset));
2856
0
            nCRC =
2857
0
                crc32(nCRC, reinterpret_cast<const Bytef *>(pBuffer) + nOffset,
2858
0
                      nChunk);
2859
0
            nOffset += nChunk;
2860
0
        }
2861
0
    }
2862
2863
0
    if (!bCompressActive)
2864
0
        return 0;
2865
2866
0
    size_t nNextByte = 0;
2867
0
    while (nNextByte < nBytesToWrite)
2868
0
    {
2869
0
        sStream.next_out = pabyOutBuf;
2870
0
        sStream.avail_out = static_cast<uInt>(Z_BUFSIZE);
2871
2872
0
        if (sStream.avail_in > 0)
2873
0
            memmove(pabyInBuf, sStream.next_in, sStream.avail_in);
2874
2875
0
        const uInt nNewBytesToWrite = static_cast<uInt>(
2876
0
            std::min(static_cast<size_t>(Z_BUFSIZE - sStream.avail_in),
2877
0
                     nBytesToWrite - nNextByte));
2878
0
        memcpy(pabyInBuf + sStream.avail_in,
2879
0
               reinterpret_cast<const Byte *>(pBuffer) + nNextByte,
2880
0
               nNewBytesToWrite);
2881
2882
0
        sStream.next_in = pabyInBuf;
2883
0
        sStream.avail_in += nNewBytesToWrite;
2884
2885
0
        const int zlibRet = deflate(&sStream, Z_NO_FLUSH);
2886
0
        CPLAssertAlwaysEval(zlibRet == Z_OK);
2887
2888
0
        const size_t nOutBytes =
2889
0
            static_cast<uInt>(Z_BUFSIZE) - sStream.avail_out;
2890
2891
0
        if (nOutBytes > 0)
2892
0
        {
2893
0
            if (m_poBaseHandle->Write(pabyOutBuf, nOutBytes) < nOutBytes)
2894
0
                return 0;
2895
0
        }
2896
2897
0
        nNextByte += nNewBytesToWrite;
2898
0
        nCurOffset += nNewBytesToWrite;
2899
0
    }
2900
2901
0
    return nBytesToWrite;
2902
0
}
2903
2904
/************************************************************************/
2905
/*                               Flush()                                */
2906
/************************************************************************/
2907
2908
int VSIGZipWriteHandle::Flush()
2909
2910
0
{
2911
    // we *could* do something for this but for now we choose not to.
2912
2913
0
    return 0;
2914
0
}
2915
2916
/************************************************************************/
2917
/*                                Seek()                                */
2918
/************************************************************************/
2919
2920
int VSIGZipWriteHandle::Seek(vsi_l_offset nOffset, int nWhence)
2921
2922
0
{
2923
0
    if (nOffset == 0 && (nWhence == SEEK_END || nWhence == SEEK_CUR))
2924
0
        return 0;
2925
0
    else if (nWhence == SEEK_SET && nOffset == nCurOffset)
2926
0
        return 0;
2927
0
    else
2928
0
    {
2929
0
        CPLError(CE_Failure, CPLE_NotSupported,
2930
0
                 "Seeking on writable compressed data streams not supported.");
2931
2932
0
        return -1;
2933
0
    }
2934
0
}
2935
2936
/************************************************************************/
2937
/*                                Tell()                                */
2938
/************************************************************************/
2939
2940
vsi_l_offset VSIGZipWriteHandle::Tell()
2941
2942
0
{
2943
0
    return nCurOffset;
2944
0
}
2945
2946
/************************************************************************/
2947
/* ==================================================================== */
2948
/*                       VSIGZipFilesystemHandler                       */
2949
/* ==================================================================== */
2950
/************************************************************************/
2951
2952
/************************************************************************/
2953
/*                     ~VSIGZipFilesystemHandler()                      */
2954
/************************************************************************/
2955
2956
VSIGZipFilesystemHandler::~VSIGZipFilesystemHandler()
2957
0
{
2958
0
    if (poHandleLastGZipFile)
2959
0
    {
2960
0
        poHandleLastGZipFile->UnsetCanSaveInfo();
2961
0
        poHandleLastGZipFile.reset();
2962
0
    }
2963
0
}
2964
2965
/************************************************************************/
2966
/*                              SaveInfo()                              */
2967
/************************************************************************/
2968
2969
void VSIGZipFilesystemHandler::SaveInfo(VSIGZipHandle *poHandle)
2970
0
{
2971
0
    std::unique_lock oLock(oMutex);
2972
0
    SaveInfo_unlocked(poHandle);
2973
0
}
2974
2975
void VSIGZipFilesystemHandler::SaveInfo_unlocked(VSIGZipHandle *poHandle)
2976
0
{
2977
0
    if (m_bInSaveInfo)
2978
0
        return;
2979
0
    m_bInSaveInfo = true;
2980
2981
0
    CPLAssert(poHandle != poHandleLastGZipFile.get());
2982
0
    CPLAssert(poHandle->GetBaseFileName() != nullptr);
2983
2984
0
    if (poHandleLastGZipFile == nullptr ||
2985
0
        strcmp(poHandleLastGZipFile->GetBaseFileName(),
2986
0
               poHandle->GetBaseFileName()) != 0 ||
2987
0
        poHandle->GetLastReadOffset() >
2988
0
            poHandleLastGZipFile->GetLastReadOffset())
2989
0
    {
2990
0
        std::unique_ptr<VSIGZipHandle> poTmp;
2991
0
        std::swap(poTmp, poHandleLastGZipFile);
2992
0
        if (poTmp)
2993
0
        {
2994
0
            poTmp->UnsetCanSaveInfo();
2995
0
            poTmp.reset();
2996
0
        }
2997
0
        poHandleLastGZipFile.reset(poHandle->Duplicate());
2998
0
        if (poHandleLastGZipFile)
2999
0
            poHandleLastGZipFile->CloseBaseHandle();
3000
0
    }
3001
0
    m_bInSaveInfo = false;
3002
0
}
3003
3004
/************************************************************************/
3005
/*                                Open()                                */
3006
/************************************************************************/
3007
3008
VSIVirtualHandleUniquePtr
3009
VSIGZipFilesystemHandler::Open(const char *pszFilename, const char *pszAccess,
3010
                               bool /* bSetError */,
3011
                               CSLConstList /* papszOptions */)
3012
2.00k
{
3013
2.00k
    if (!STARTS_WITH_CI(pszFilename, "/vsigzip/"))
3014
10
        return nullptr;
3015
3016
1.99k
    VSIFilesystemHandler *poFSHandler =
3017
1.99k
        VSIFileManager::GetHandler(pszFilename + strlen("/vsigzip/"));
3018
3019
    /* -------------------------------------------------------------------- */
3020
    /*      Is this an attempt to write a new file without update (w+)      */
3021
    /*      access?  If so, create a writable handle for the underlying     */
3022
    /*      filename.                                                       */
3023
    /* -------------------------------------------------------------------- */
3024
1.99k
    if (strchr(pszAccess, 'w') != nullptr)
3025
0
    {
3026
0
        if (strchr(pszAccess, '+') != nullptr)
3027
0
        {
3028
0
            CPLError(CE_Failure, CPLE_AppDefined,
3029
0
                     "Write+update (w+) not supported for /vsigzip, "
3030
0
                     "only read-only or write-only.");
3031
0
            return nullptr;
3032
0
        }
3033
3034
0
        auto poVirtualHandle =
3035
0
            poFSHandler->Open(pszFilename + strlen("/vsigzip/"), "wb");
3036
3037
0
        if (poVirtualHandle == nullptr)
3038
0
            return nullptr;
3039
3040
0
        return VSIVirtualHandleUniquePtr(
3041
0
            VSICreateGZipWritable(poVirtualHandle.release(),
3042
0
                                  strchr(pszAccess, 'z') != nullptr, TRUE));
3043
0
    }
3044
3045
    /* -------------------------------------------------------------------- */
3046
    /*      Otherwise we are in the read access case.                       */
3047
    /* -------------------------------------------------------------------- */
3048
3049
1.99k
    VSIGZipHandle *poGZIPHandle = OpenGZipReadOnly(pszFilename, pszAccess);
3050
1.99k
    if (poGZIPHandle)
3051
        // Wrap the VSIGZipHandle inside a buffered reader that will
3052
        // improve dramatically performance when doing small backward
3053
        // seeks.
3054
0
        return VSIVirtualHandleUniquePtr(
3055
0
            VSICreateBufferedReaderHandle(poGZIPHandle));
3056
3057
1.99k
    return nullptr;
3058
1.99k
}
3059
3060
/************************************************************************/
3061
/*                      SupportsSequentialWrite()                       */
3062
/************************************************************************/
3063
3064
bool VSIGZipFilesystemHandler::SupportsSequentialWrite(const char *pszPath,
3065
                                                       bool bAllowLocalTempFile)
3066
0
{
3067
0
    if (!STARTS_WITH_CI(pszPath, "/vsigzip/"))
3068
0
        return false;
3069
0
    const char *pszBaseFileName = pszPath + strlen("/vsigzip/");
3070
0
    VSIFilesystemHandler *poFSHandler =
3071
0
        VSIFileManager::GetHandler(pszBaseFileName);
3072
0
    return poFSHandler->SupportsSequentialWrite(pszPath, bAllowLocalTempFile);
3073
0
}
3074
3075
/************************************************************************/
3076
/*                          OpenGZipReadOnly()                          */
3077
/************************************************************************/
3078
3079
VSIGZipHandle *
3080
VSIGZipFilesystemHandler::OpenGZipReadOnly(const char *pszFilename,
3081
                                           const char *pszAccess)
3082
2.17k
{
3083
2.17k
    VSIFilesystemHandler *poFSHandler =
3084
2.17k
        VSIFileManager::GetHandler(pszFilename + strlen("/vsigzip/"));
3085
3086
2.17k
    std::unique_lock oLock(oMutex);
3087
3088
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
3089
    // Disable caching in fuzzing mode as the /vsigzip/ file is likely to
3090
    // change very often
3091
    // TODO: filename-based logic isn't enough. We should probably check
3092
    // timestamp and/or file size.
3093
    if (poHandleLastGZipFile != nullptr &&
3094
        strcmp(pszFilename + strlen("/vsigzip/"),
3095
               poHandleLastGZipFile->GetBaseFileName()) == 0 &&
3096
        EQUAL(pszAccess, "rb"))
3097
    {
3098
        VSIGZipHandle *poHandle = poHandleLastGZipFile->Duplicate();
3099
        if (poHandle)
3100
            return poHandle;
3101
    }
3102
#else
3103
2.17k
    CPL_IGNORE_RET_VAL(pszAccess);
3104
2.17k
#endif
3105
3106
2.17k
    VSIVirtualHandleUniquePtr poVirtualHandle(
3107
2.17k
        poFSHandler->Open(pszFilename + strlen("/vsigzip/"), "rb"));
3108
3109
2.17k
    if (poVirtualHandle == nullptr)
3110
1.39k
        return nullptr;
3111
3112
783
    unsigned char signature[2] = {'\0', '\0'};
3113
783
    if (poVirtualHandle->Read(signature, 2) != 2 ||
3114
0
        signature[0] != gz_magic[0] || signature[1] != gz_magic[1])
3115
783
    {
3116
783
        return nullptr;
3117
783
    }
3118
3119
0
    if (poHandleLastGZipFile)
3120
0
    {
3121
0
        poHandleLastGZipFile->UnsetCanSaveInfo();
3122
0
        poHandleLastGZipFile.reset();
3123
0
    }
3124
3125
0
    auto poHandle = std::make_unique<VSIGZipHandle>(
3126
0
        std::move(poVirtualHandle), pszFilename + strlen("/vsigzip/"));
3127
0
    if (!(poHandle->IsInitOK()))
3128
0
    {
3129
0
        return nullptr;
3130
0
    }
3131
0
    return poHandle.release();
3132
0
}
3133
3134
/************************************************************************/
3135
/*                                Stat()                                */
3136
/************************************************************************/
3137
3138
int VSIGZipFilesystemHandler::Stat(const char *pszFilename,
3139
                                   VSIStatBufL *pStatBuf, int nFlags)
3140
3.97k
{
3141
3.97k
    if (!STARTS_WITH_CI(pszFilename, "/vsigzip/"))
3142
105
        return -1;
3143
3144
3.87k
    std::unique_lock oLock(oMutex);
3145
3146
3.87k
    memset(pStatBuf, 0, sizeof(VSIStatBufL));
3147
3148
3.87k
    if (poHandleLastGZipFile != nullptr &&
3149
0
        strcmp(pszFilename + strlen("/vsigzip/"),
3150
0
               poHandleLastGZipFile->GetBaseFileName()) == 0)
3151
0
    {
3152
0
        if (poHandleLastGZipFile->GetUncompressedSize() != 0)
3153
0
        {
3154
0
            pStatBuf->st_mode = S_IFREG;
3155
0
            pStatBuf->st_size = poHandleLastGZipFile->GetUncompressedSize();
3156
0
            return 0;
3157
0
        }
3158
0
    }
3159
3160
    // Begin by doing a stat on the real file.
3161
3.87k
    int ret = VSIStatExL(pszFilename + strlen("/vsigzip/"), pStatBuf, nFlags);
3162
3163
3.87k
    if (ret == 0 && (nFlags & VSI_STAT_SIZE_FLAG))
3164
182
    {
3165
182
        CPLString osCacheFilename(pszFilename + strlen("/vsigzip/"));
3166
182
        osCacheFilename += ".properties";
3167
3168
        // Can we save a bit of seeking by using a .properties file?
3169
182
        VSILFILE *fpCacheLength = VSIFOpenL(osCacheFilename.c_str(), "rb");
3170
182
        if (fpCacheLength)
3171
94
        {
3172
94
            const char *pszLine;
3173
94
            GUIntBig nCompressedSize = 0;
3174
94
            GUIntBig nUncompressedSize = 0;
3175
94
            while ((pszLine = CPLReadLineL(fpCacheLength)) != nullptr)
3176
0
            {
3177
0
                if (STARTS_WITH_CI(pszLine, "compressed_size="))
3178
0
                {
3179
0
                    const char *pszBuffer =
3180
0
                        pszLine + strlen("compressed_size=");
3181
0
                    nCompressedSize = CPLScanUIntBig(
3182
0
                        pszBuffer, static_cast<int>(strlen(pszBuffer)));
3183
0
                }
3184
0
                else if (STARTS_WITH_CI(pszLine, "uncompressed_size="))
3185
0
                {
3186
0
                    const char *pszBuffer =
3187
0
                        pszLine + strlen("uncompressed_size=");
3188
0
                    nUncompressedSize = CPLScanUIntBig(
3189
0
                        pszBuffer, static_cast<int>(strlen(pszBuffer)));
3190
0
                }
3191
0
            }
3192
3193
94
            CPL_IGNORE_RET_VAL(VSIFCloseL(fpCacheLength));
3194
3195
94
            if (nCompressedSize == static_cast<GUIntBig>(pStatBuf->st_size))
3196
94
            {
3197
                // Patch with the uncompressed size.
3198
94
                pStatBuf->st_size = nUncompressedSize;
3199
3200
94
                VSIGZipHandle *poHandle =
3201
94
                    VSIGZipFilesystemHandler::OpenGZipReadOnly(pszFilename,
3202
94
                                                               "rb");
3203
94
                if (poHandle)
3204
0
                {
3205
0
                    poHandle->SetUncompressedSize(nUncompressedSize);
3206
0
                    SaveInfo_unlocked(poHandle);
3207
0
                    delete poHandle;
3208
0
                }
3209
3210
94
                return ret;
3211
94
            }
3212
94
        }
3213
3214
        // No, then seek at the end of the data (slow).
3215
88
        VSIGZipHandle *poHandle =
3216
88
            VSIGZipFilesystemHandler::OpenGZipReadOnly(pszFilename, "rb");
3217
88
        if (poHandle)
3218
0
        {
3219
0
            poHandle->Seek(0, SEEK_END);
3220
0
            const GUIntBig uncompressed_size =
3221
0
                static_cast<GUIntBig>(poHandle->Tell());
3222
0
            poHandle->Seek(0, SEEK_SET);
3223
3224
            // Patch with the uncompressed size.
3225
0
            pStatBuf->st_size = uncompressed_size;
3226
3227
0
            delete poHandle;
3228
0
        }
3229
88
        else
3230
88
        {
3231
88
            ret = -1;
3232
88
        }
3233
88
    }
3234
3235
3.77k
    return ret;
3236
3.87k
}
3237
3238
/************************************************************************/
3239
/*                             ReadDirEx()                              */
3240
/************************************************************************/
3241
3242
char **VSIGZipFilesystemHandler::ReadDirEx(const char * /*pszDirname*/,
3243
                                           int /* nMaxFiles */)
3244
0
{
3245
0
    return nullptr;
3246
0
}
3247
3248
/************************************************************************/
3249
/*                             GetOptions()                             */
3250
/************************************************************************/
3251
3252
const char *VSIGZipFilesystemHandler::GetOptions()
3253
0
{
3254
0
    return "<Options>"
3255
0
           "  <Option name='GDAL_NUM_THREADS' type='string' "
3256
0
           "description='Number of threads for compression. Either a integer "
3257
0
           "or ALL_CPUS'/>"
3258
0
           "  <Option name='CPL_VSIL_DEFLATE_CHUNK_SIZE' type='string' "
3259
0
           "description='Chunk of uncompressed data for parallelization. "
3260
0
           "Use K(ilobytes) or M(egabytes) suffix' default='1M'/>"
3261
0
           "</Options>";
3262
0
}
3263
3264
//! @endcond
3265
/************************************************************************/
3266
/*                     VSIInstallGZipFileHandler()                      */
3267
/************************************************************************/
3268
3269
/*!
3270
 \brief Install GZip file system handler.
3271
3272
 A special file handler is installed that allows reading on-the-fly and
3273
 writing in GZip (.gz) files.
3274
3275
 All portions of the file system underneath the base
3276
 path "/vsigzip/" will be handled by this driver.
3277
3278
 \verbatim embed:rst
3279
 See :ref:`/vsigzip/ documentation <vsigzip>`
3280
 \endverbatim
3281
3282
 */
3283
3284
void VSIInstallGZipFileHandler()
3285
3
{
3286
3
    VSIFileManager::InstallHandler(
3287
3
        "/vsigzip/", std::make_shared<VSIGZipFilesystemHandler>());
3288
3
}
3289
3290
//! @cond Doxygen_Suppress
3291
3292
/************************************************************************/
3293
/* ==================================================================== */
3294
/*                         VSIZipEntryFileOffset                        */
3295
/* ==================================================================== */
3296
/************************************************************************/
3297
3298
class VSIZipEntryFileOffset final : public VSIArchiveEntryFileOffset
3299
{
3300
  public:
3301
    unz_file_pos m_file_pos;
3302
3303
0
    explicit VSIZipEntryFileOffset(unz_file_pos file_pos) : m_file_pos()
3304
0
    {
3305
0
        m_file_pos.pos_in_zip_directory = file_pos.pos_in_zip_directory;
3306
0
        m_file_pos.num_of_file = file_pos.num_of_file;
3307
0
    }
3308
3309
    ~VSIZipEntryFileOffset() override;
3310
};
3311
3312
0
VSIZipEntryFileOffset::~VSIZipEntryFileOffset() = default;
3313
3314
/************************************************************************/
3315
/* ==================================================================== */
3316
/*                             VSIZipReader                             */
3317
/* ==================================================================== */
3318
/************************************************************************/
3319
3320
class VSIZipReader final : public VSIArchiveReader
3321
{
3322
    CPL_DISALLOW_COPY_ASSIGN(VSIZipReader)
3323
3324
  private:
3325
    unzFile unzF = nullptr;
3326
    unz_file_pos file_pos;
3327
    GUIntBig nNextFileSize = 0;
3328
    CPLString osNextFileName{};
3329
    GIntBig nModifiedTime = 0;
3330
3331
    bool SetInfo();
3332
3333
  public:
3334
    explicit VSIZipReader(const char *pszZipFileName);
3335
    ~VSIZipReader() override;
3336
3337
    int IsValid()
3338
1.37k
    {
3339
1.37k
        return unzF != nullptr;
3340
1.37k
    }
3341
3342
    unzFile GetUnzFileHandle()
3343
0
    {
3344
0
        return unzF;
3345
0
    }
3346
3347
    int GotoFirstFile() override;
3348
    int GotoNextFile() override;
3349
3350
    VSIArchiveEntryFileOffset *GetFileOffset() override
3351
0
    {
3352
0
        return new VSIZipEntryFileOffset(file_pos);
3353
0
    }
3354
3355
    GUIntBig GetFileSize() override
3356
0
    {
3357
0
        return nNextFileSize;
3358
0
    }
3359
3360
    CPLString GetFileName() override
3361
0
    {
3362
0
        return osNextFileName;
3363
0
    }
3364
3365
    GIntBig GetModifiedTime() override
3366
0
    {
3367
0
        return nModifiedTime;
3368
0
    }
3369
3370
    int GotoFileOffset(VSIArchiveEntryFileOffset *pOffset) override;
3371
};
3372
3373
/************************************************************************/
3374
/*                            VSIZipReader()                            */
3375
/************************************************************************/
3376
3377
VSIZipReader::VSIZipReader(const char *pszZipFileName)
3378
1.37k
    : unzF(cpl_unzOpen(pszZipFileName)), file_pos()
3379
1.37k
{
3380
1.37k
    file_pos.pos_in_zip_directory = 0;
3381
1.37k
    file_pos.num_of_file = 0;
3382
1.37k
}
3383
3384
/************************************************************************/
3385
/*                           ~VSIZipReader()                            */
3386
/************************************************************************/
3387
3388
VSIZipReader::~VSIZipReader()
3389
1.37k
{
3390
1.37k
    if (unzF)
3391
0
        cpl_unzClose(unzF);
3392
1.37k
}
3393
3394
/************************************************************************/
3395
/*                              SetInfo()                               */
3396
/************************************************************************/
3397
3398
bool VSIZipReader::SetInfo()
3399
0
{
3400
0
    char fileName[8193] = {};
3401
0
    unz_file_info file_info;
3402
0
    if (UNZ_OK != cpl_unzGetCurrentFileInfo(unzF, &file_info, fileName,
3403
0
                                            sizeof(fileName) - 1, nullptr, 0,
3404
0
                                            nullptr, 0))
3405
0
    {
3406
0
        CPLError(CE_Failure, CPLE_FileIO, "cpl_unzGetCurrentFileInfo failed");
3407
0
        cpl_unzGetFilePos(unzF, &file_pos);
3408
0
        return false;
3409
0
    }
3410
0
    fileName[sizeof(fileName) - 1] = '\0';
3411
0
    osNextFileName = fileName;
3412
0
    nNextFileSize = file_info.uncompressed_size;
3413
0
    struct tm brokendowntime;
3414
0
    brokendowntime.tm_sec = file_info.tmu_date.tm_sec;
3415
0
    brokendowntime.tm_min = file_info.tmu_date.tm_min;
3416
0
    brokendowntime.tm_hour = file_info.tmu_date.tm_hour;
3417
0
    brokendowntime.tm_mday = file_info.tmu_date.tm_mday;
3418
0
    brokendowntime.tm_mon = file_info.tmu_date.tm_mon;
3419
    // The minizip conventions differs from the Unix one.
3420
0
    brokendowntime.tm_year = file_info.tmu_date.tm_year - 1900;
3421
0
    nModifiedTime = CPLYMDHMSToUnixTime(&brokendowntime);
3422
3423
0
    cpl_unzGetFilePos(unzF, &file_pos);
3424
0
    return true;
3425
0
}
3426
3427
/************************************************************************/
3428
/*                            GotoNextFile()                            */
3429
/************************************************************************/
3430
3431
int VSIZipReader::GotoNextFile()
3432
0
{
3433
0
    if (cpl_unzGoToNextFile(unzF) != UNZ_OK)
3434
0
        return FALSE;
3435
3436
0
    if (!SetInfo())
3437
0
        return FALSE;
3438
3439
0
    return TRUE;
3440
0
}
3441
3442
/************************************************************************/
3443
/*                           GotoFirstFile()                            */
3444
/************************************************************************/
3445
3446
int VSIZipReader::GotoFirstFile()
3447
0
{
3448
0
    if (cpl_unzGoToFirstFile(unzF) != UNZ_OK)
3449
0
        return FALSE;
3450
3451
0
    if (!SetInfo())
3452
0
        return FALSE;
3453
3454
0
    return TRUE;
3455
0
}
3456
3457
/************************************************************************/
3458
/*                           GotoFileOffset()                           */
3459
/************************************************************************/
3460
3461
int VSIZipReader::GotoFileOffset(VSIArchiveEntryFileOffset *pOffset)
3462
0
{
3463
0
    VSIZipEntryFileOffset *pZipEntryOffset =
3464
0
        reinterpret_cast<VSIZipEntryFileOffset *>(pOffset);
3465
0
    if (cpl_unzGoToFilePos(unzF, &(pZipEntryOffset->m_file_pos)) != UNZ_OK)
3466
0
    {
3467
0
        CPLError(CE_Failure, CPLE_AppDefined, "GotoFileOffset failed");
3468
0
        return FALSE;
3469
0
    }
3470
3471
0
    if (!SetInfo())
3472
0
        return FALSE;
3473
3474
0
    return TRUE;
3475
0
}
3476
3477
/************************************************************************/
3478
/* ==================================================================== */
3479
/*                       VSIZipFilesystemHandler                        */
3480
/* ==================================================================== */
3481
/************************************************************************/
3482
3483
class VSIZipWriteHandle;
3484
3485
class VSIZipFilesystemHandler final : public VSIArchiveFilesystemHandler
3486
{
3487
    CPL_DISALLOW_COPY_ASSIGN(VSIZipFilesystemHandler)
3488
3489
    std::map<CPLString, VSIZipWriteHandle *> oMapZipWriteHandles{};
3490
    VSIVirtualHandleUniquePtr OpenForWrite_unlocked(const char *pszFilename,
3491
                                                    const char *pszAccess);
3492
3493
    struct VSIFileInZipInfo
3494
    {
3495
        VSIVirtualHandleUniquePtr poVirtualHandle{};
3496
        std::map<std::string, std::string> oMapProperties{};
3497
        int nCompressionMethod = 0;
3498
        uint64_t nUncompressedSize = 0;
3499
        uint64_t nCompressedSize = 0;
3500
        uint64_t nStartDataStream = 0;
3501
        uLong nCRC = 0;
3502
        bool bSOZipIndexFound = false;
3503
        bool bSOZipIndexValid = false;
3504
        uint32_t nSOZIPVersion = 0;
3505
        uint32_t nSOZIPToSkip = 0;
3506
        uint32_t nSOZIPChunkSize = 0;
3507
        uint32_t nSOZIPOffsetSize = 0;
3508
        uint64_t nSOZIPStartData = 0;
3509
    };
3510
3511
    bool GetFileInfo(const char *pszFilename, VSIFileInZipInfo &info,
3512
                     bool bSetError);
3513
3514
  public:
3515
3
    VSIZipFilesystemHandler() = default;
3516
    ~VSIZipFilesystemHandler() override;
3517
3518
    const char *GetPrefix() const override
3519
15.3k
    {
3520
15.3k
        return "/vsizip";
3521
15.3k
    }
3522
3523
    std::vector<CPLString> GetExtensions() const override;
3524
    std::unique_ptr<VSIArchiveReader>
3525
    CreateReader(const char *pszZipFileName) override;
3526
3527
    VSIVirtualHandleUniquePtr Open(const char *pszFilename,
3528
                                   const char *pszAccess, bool bSetError,
3529
                                   CSLConstList /* papszOptions */) override;
3530
3531
    char **GetFileMetadata(const char *pszFilename, const char *pszDomain,
3532
                           CSLConstList papszOptions) override;
3533
3534
    VSIVirtualHandleUniquePtr OpenForWrite(const char *pszFilename,
3535
                                           const char *pszAccess);
3536
3537
    int CopyFile(const char *pszSource, const char *pszTarget,
3538
                 VSILFILE *fpSource, vsi_l_offset nSourceSize,
3539
                 const char *const *papszOptions,
3540
                 GDALProgressFunc pProgressFunc, void *pProgressData) override;
3541
3542
    int Mkdir(const char *pszDirname, long nMode) override;
3543
    char **ReadDirEx(const char *pszDirname, int nMaxFiles) override;
3544
    int Stat(const char *pszFilename, VSIStatBufL *pStatBuf,
3545
             int nFlags) override;
3546
3547
    const char *GetOptions() override;
3548
3549
    void RemoveFromMap(VSIZipWriteHandle *poHandle);
3550
};
3551
3552
/************************************************************************/
3553
/* ==================================================================== */
3554
/*                       VSIZipWriteHandle                              */
3555
/* ==================================================================== */
3556
/************************************************************************/
3557
3558
class VSIZipWriteHandle final : public VSIVirtualHandle
3559
{
3560
    CPL_DISALLOW_COPY_ASSIGN(VSIZipWriteHandle)
3561
3562
    VSIZipFilesystemHandler *m_poFS = nullptr;
3563
    void *m_hZIP = nullptr;
3564
    VSIZipWriteHandle *poChildInWriting = nullptr;
3565
    VSIZipWriteHandle *m_poParent = nullptr;
3566
    bool bAutoDeleteParent = false;
3567
    vsi_l_offset nCurOffset = 0;
3568
3569
  public:
3570
    VSIZipWriteHandle(VSIZipFilesystemHandler *poFS, void *hZIP,
3571
                      VSIZipWriteHandle *poParent);
3572
3573
    ~VSIZipWriteHandle() override;
3574
3575
    int Seek(vsi_l_offset nOffset, int nWhence) override;
3576
    vsi_l_offset Tell() override;
3577
    size_t Read(void *pBuffer, size_t nBytes) override;
3578
    size_t Write(const void *pBuffer, size_t nBytes) override;
3579
3580
    int Eof() override
3581
0
    {
3582
0
        return 0;
3583
0
    }
3584
3585
    int Error() override
3586
0
    {
3587
0
        return 0;
3588
0
    }
3589
3590
    void ClearErr() override
3591
0
    {
3592
0
    }
3593
3594
    int Flush() override;
3595
    int Close() override;
3596
3597
    void StartNewFile(VSIZipWriteHandle *poSubFile);
3598
    void StopCurrentFile();
3599
3600
    void *GetHandle()
3601
0
    {
3602
0
        return m_hZIP;
3603
0
    }
3604
3605
    VSIZipWriteHandle *GetChildInWriting()
3606
0
    {
3607
0
        return poChildInWriting;
3608
0
    }
3609
3610
    void SetAutoDeleteParent()
3611
0
    {
3612
0
        bAutoDeleteParent = true;
3613
0
    }
3614
};
3615
3616
/************************************************************************/
3617
/*                      ~VSIZipFilesystemHandler()                      */
3618
/************************************************************************/
3619
3620
VSIZipFilesystemHandler::~VSIZipFilesystemHandler()
3621
0
{
3622
0
    for (std::map<CPLString, VSIZipWriteHandle *>::const_iterator iter =
3623
0
             oMapZipWriteHandles.begin();
3624
0
         iter != oMapZipWriteHandles.end(); ++iter)
3625
0
    {
3626
0
        CPLError(CE_Failure, CPLE_AppDefined, "%s has not been closed",
3627
0
                 iter->first.c_str());
3628
0
    }
3629
0
}
3630
3631
/************************************************************************/
3632
/*                           GetExtensions()                            */
3633
/************************************************************************/
3634
3635
std::vector<CPLString> VSIZipFilesystemHandler::GetExtensions() const
3636
1.06k
{
3637
1.06k
    std::vector<CPLString> oList;
3638
1.06k
    oList.push_back(".zip");
3639
1.06k
    oList.push_back(".kmz");
3640
1.06k
    oList.push_back(".dwf");
3641
1.06k
    oList.push_back(".ods");
3642
1.06k
    oList.push_back(".xlsx");
3643
1.06k
    oList.push_back(".xlsm");
3644
3645
    // Add to zip FS handler extensions array additional extensions
3646
    // listed in CPL_VSIL_ZIP_ALLOWED_EXTENSIONS config option.
3647
    // The extensions are divided by commas.
3648
1.06k
    const char *pszAllowedExtensions =
3649
1.06k
        CPLGetConfigOption("CPL_VSIL_ZIP_ALLOWED_EXTENSIONS", nullptr);
3650
1.06k
    if (pszAllowedExtensions)
3651
0
    {
3652
0
        char **papszExtensions =
3653
0
            CSLTokenizeString2(pszAllowedExtensions, ", ", 0);
3654
0
        for (int i = 0; papszExtensions[i] != nullptr; i++)
3655
0
        {
3656
0
            oList.push_back(papszExtensions[i]);
3657
0
        }
3658
0
        CSLDestroy(papszExtensions);
3659
0
    }
3660
3661
1.06k
    return oList;
3662
1.06k
}
3663
3664
/************************************************************************/
3665
/*                            CreateReader()                            */
3666
/************************************************************************/
3667
3668
std::unique_ptr<VSIArchiveReader>
3669
VSIZipFilesystemHandler::CreateReader(const char *pszZipFileName)
3670
1.37k
{
3671
1.37k
    auto poReader = std::make_unique<VSIZipReader>(pszZipFileName);
3672
3673
1.37k
    if (!poReader->IsValid() || !poReader->GotoFirstFile())
3674
1.37k
    {
3675
1.37k
        return nullptr;
3676
1.37k
    }
3677
3678
0
    return poReader;
3679
1.37k
}
3680
3681
/************************************************************************/
3682
/*                            VSISOZipHandle                            */
3683
/************************************************************************/
3684
3685
class VSISOZipHandle final : public VSIVirtualHandle
3686
{
3687
    VSIVirtualHandleUniquePtr poBaseHandle_{};
3688
    vsi_l_offset nPosCompressedStream_;
3689
    uint64_t compressed_size_;
3690
    uint64_t uncompressed_size_;
3691
    vsi_l_offset indexPos_;
3692
    uint32_t nToSkip_;
3693
    uint32_t nChunkSize_;
3694
    bool bEOF_ = false;
3695
    bool bError_ = false;
3696
    vsi_l_offset nCurPos_ = 0;
3697
    bool bOK_ = true;
3698
#ifdef HAVE_LIBDEFLATE
3699
    struct libdeflate_decompressor *pDecompressor_ = nullptr;
3700
#else
3701
    z_stream sStream_{};
3702
#endif
3703
3704
    VSISOZipHandle(const VSISOZipHandle &) = delete;
3705
    VSISOZipHandle &operator=(const VSISOZipHandle &) = delete;
3706
3707
  public:
3708
    VSISOZipHandle(VSIVirtualHandleUniquePtr poVirtualHandleIn,
3709
                   vsi_l_offset nPosCompressedStream, uint64_t compressed_size,
3710
                   uint64_t uncompressed_size, vsi_l_offset indexPos,
3711
                   uint32_t nToSkip, uint32_t nChunkSize);
3712
    ~VSISOZipHandle() override;
3713
3714
    int Seek(vsi_l_offset nOffset, int nWhence) override;
3715
3716
    vsi_l_offset Tell() override
3717
0
    {
3718
0
        return nCurPos_;
3719
0
    }
3720
3721
    size_t Read(void *pBuffer, size_t nBytes) override;
3722
3723
    size_t Write(const void *, size_t) override
3724
0
    {
3725
0
        return 0;
3726
0
    }
3727
3728
    int Eof() override
3729
0
    {
3730
0
        return bEOF_;
3731
0
    }
3732
3733
    int Error() override
3734
0
    {
3735
0
        return bError_;
3736
0
    }
3737
3738
    void ClearErr() override
3739
0
    {
3740
0
        bEOF_ = false;
3741
0
        bError_ = false;
3742
0
    }
3743
3744
    int Close() override;
3745
3746
    bool IsOK() const
3747
0
    {
3748
0
        return bOK_;
3749
0
    }
3750
};
3751
3752
/************************************************************************/
3753
/*                           VSISOZipHandle()                           */
3754
/************************************************************************/
3755
3756
VSISOZipHandle::VSISOZipHandle(VSIVirtualHandleUniquePtr poVirtualHandleIn,
3757
                               vsi_l_offset nPosCompressedStream,
3758
                               uint64_t compressed_size,
3759
                               uint64_t uncompressed_size,
3760
                               vsi_l_offset indexPos, uint32_t nToSkip,
3761
                               uint32_t nChunkSize)
3762
0
    : poBaseHandle_(std::move(poVirtualHandleIn)),
3763
0
      nPosCompressedStream_(nPosCompressedStream),
3764
0
      compressed_size_(compressed_size), uncompressed_size_(uncompressed_size),
3765
0
      indexPos_(indexPos), nToSkip_(nToSkip), nChunkSize_(nChunkSize)
3766
0
{
3767
#ifdef HAVE_LIBDEFLATE
3768
    pDecompressor_ = libdeflate_alloc_decompressor();
3769
    if (!pDecompressor_)
3770
        bOK_ = false;
3771
#else
3772
0
    memset(&sStream_, 0, sizeof(sStream_));
3773
0
    int err = inflateInit2(&sStream_, -MAX_WBITS);
3774
0
    if (err != Z_OK)
3775
0
        bOK_ = false;
3776
0
#endif
3777
0
}
3778
3779
/************************************************************************/
3780
/*                          ~VSISOZipHandle()                           */
3781
/************************************************************************/
3782
3783
VSISOZipHandle::~VSISOZipHandle()
3784
0
{
3785
0
    VSISOZipHandle::Close();
3786
0
    if (bOK_)
3787
0
    {
3788
#ifdef HAVE_LIBDEFLATE
3789
        libdeflate_free_decompressor(pDecompressor_);
3790
#else
3791
0
        inflateEnd(&sStream_);
3792
0
#endif
3793
0
    }
3794
0
}
3795
3796
/************************************************************************/
3797
/*                               Close()                                */
3798
/************************************************************************/
3799
3800
int VSISOZipHandle::Close()
3801
0
{
3802
0
    int ret = 0;
3803
0
    if (poBaseHandle_)
3804
0
    {
3805
0
        ret = poBaseHandle_->Close();
3806
0
        poBaseHandle_.reset();
3807
0
    }
3808
0
    return ret;
3809
0
}
3810
3811
/************************************************************************/
3812
/*                                Seek()                                */
3813
/************************************************************************/
3814
3815
int VSISOZipHandle::Seek(vsi_l_offset nOffset, int nWhence)
3816
0
{
3817
0
    bEOF_ = false;
3818
0
    if (nWhence == SEEK_SET)
3819
0
        nCurPos_ = nOffset;
3820
0
    else if (nWhence == SEEK_END)
3821
0
        nCurPos_ = uncompressed_size_;
3822
0
    else
3823
0
        nCurPos_ += nOffset;
3824
0
    return 0;
3825
0
}
3826
3827
/************************************************************************/
3828
/*                                Read()                                */
3829
/************************************************************************/
3830
3831
size_t VSISOZipHandle::Read(void *pBuffer, size_t nBytes)
3832
0
{
3833
0
    size_t nRet = nBytes;
3834
0
    size_t nToRead = nBytes;
3835
0
    if (nCurPos_ >= uncompressed_size_ && nToRead > 0)
3836
0
    {
3837
0
        bEOF_ = true;
3838
0
        return 0;
3839
0
    }
3840
3841
0
    if ((nCurPos_ % nChunkSize_) != 0)
3842
0
    {
3843
0
        bError_ = true;
3844
0
        CPLError(CE_Failure, CPLE_NotSupported,
3845
0
                 "nCurPos is not a multiple of nChunkSize");
3846
0
        return 0;
3847
0
    }
3848
0
    if (nCurPos_ + nToRead > uncompressed_size_)
3849
0
    {
3850
0
        nToRead = static_cast<size_t>(uncompressed_size_ - nCurPos_);
3851
0
        nRet = nToRead;
3852
0
    }
3853
0
    else if ((nToRead % nChunkSize_) != 0)
3854
0
    {
3855
0
        bError_ = true;
3856
0
        CPLError(CE_Failure, CPLE_NotSupported,
3857
0
                 "nToRead is not a multiple of nChunkSize");
3858
0
        return 0;
3859
0
    }
3860
3861
0
    const auto ReadOffsetInCompressedStream =
3862
0
        [this](uint64_t nChunkIdx) -> uint64_t
3863
0
    {
3864
0
        if (nChunkIdx == 0)
3865
0
            return 0;
3866
0
        if (nChunkIdx == 1 + (uncompressed_size_ - 1) / nChunkSize_)
3867
0
            return compressed_size_;
3868
0
        constexpr size_t nOffsetSize = 8;
3869
0
        if (poBaseHandle_->Seek(indexPos_ + 32 + nToSkip_ +
3870
0
                                    (nChunkIdx - 1) * nOffsetSize,
3871
0
                                SEEK_SET) != 0)
3872
0
            return static_cast<uint64_t>(-1);
3873
3874
0
        uint64_t nOffset;
3875
0
        if (!poBaseHandle_->ReadLSB(nOffset))
3876
0
            return static_cast<uint64_t>(-1);
3877
0
        return nOffset;
3878
0
    };
3879
3880
0
    size_t nOffsetInOutputBuffer = 0;
3881
0
    while (true)
3882
0
    {
3883
0
        uint64_t nOffsetInCompressedStream =
3884
0
            ReadOffsetInCompressedStream(nCurPos_ / nChunkSize_);
3885
0
        if (nOffsetInCompressedStream == static_cast<uint64_t>(-1))
3886
0
        {
3887
0
            bError_ = true;
3888
0
            CPLError(CE_Failure, CPLE_AppDefined,
3889
0
                     "Cannot read nOffsetInCompressedStream");
3890
0
            return 0;
3891
0
        }
3892
0
        uint64_t nNextOffsetInCompressedStream =
3893
0
            ReadOffsetInCompressedStream(1 + nCurPos_ / nChunkSize_);
3894
0
        if (nNextOffsetInCompressedStream == static_cast<uint64_t>(-1))
3895
0
        {
3896
0
            bError_ = true;
3897
0
            CPLError(CE_Failure, CPLE_AppDefined,
3898
0
                     "Cannot read nNextOffsetInCompressedStream");
3899
0
            return 0;
3900
0
        }
3901
3902
0
        if (nNextOffsetInCompressedStream <= nOffsetInCompressedStream ||
3903
0
            nNextOffsetInCompressedStream - nOffsetInCompressedStream >
3904
0
                13 + 2 * nChunkSize_ ||
3905
0
            nNextOffsetInCompressedStream > compressed_size_)
3906
0
        {
3907
0
            bError_ = true;
3908
0
            CPLError(
3909
0
                CE_Failure, CPLE_AppDefined,
3910
0
                "Invalid values for nOffsetInCompressedStream (" CPL_FRMT_GUIB
3911
0
                ") / "
3912
0
                "nNextOffsetInCompressedStream(" CPL_FRMT_GUIB ")",
3913
0
                static_cast<GUIntBig>(nOffsetInCompressedStream),
3914
0
                static_cast<GUIntBig>(nNextOffsetInCompressedStream));
3915
0
            return 0;
3916
0
        }
3917
3918
        // CPLDebug("VSIZIP", "Seek to compressed data at offset "
3919
        // CPL_FRMT_GUIB, static_cast<GUIntBig>(nPosCompressedStream_ +
3920
        // nOffsetInCompressedStream));
3921
0
        if (poBaseHandle_->Seek(
3922
0
                nPosCompressedStream_ + nOffsetInCompressedStream, SEEK_SET) !=
3923
0
            0)
3924
0
        {
3925
0
            bError_ = true;
3926
0
            return 0;
3927
0
        }
3928
3929
0
        const size_t nCompressedToRead = static_cast<size_t>(
3930
0
            nNextOffsetInCompressedStream - nOffsetInCompressedStream);
3931
        // CPLDebug("VSIZIP", "nCompressedToRead = %d", nCompressedToRead);
3932
0
        std::vector<GByte> abyCompressedData(nCompressedToRead);
3933
0
        if (poBaseHandle_->Read(&abyCompressedData[0], nCompressedToRead) !=
3934
0
            nCompressedToRead)
3935
0
        {
3936
0
            bError_ = true;
3937
0
            return 0;
3938
0
        }
3939
3940
0
        size_t nToReadThisIter =
3941
0
            std::min(nToRead, static_cast<size_t>(nChunkSize_));
3942
3943
0
        if (nCompressedToRead >= 5 &&
3944
0
            abyCompressedData[nCompressedToRead - 5] == 0x00 &&
3945
0
            memcmp(&abyCompressedData[nCompressedToRead - 4],
3946
0
                   "\x00\x00\xFF\xFF", 4) == 0)
3947
0
        {
3948
            // Tag this flush block as the last one.
3949
0
            abyCompressedData[nCompressedToRead - 5] = 0x01;
3950
0
        }
3951
3952
#ifdef HAVE_LIBDEFLATE
3953
        size_t nOut = 0;
3954
        if (libdeflate_deflate_decompress(
3955
                pDecompressor_, &abyCompressedData[0], nCompressedToRead,
3956
                static_cast<Bytef *>(pBuffer) + nOffsetInOutputBuffer,
3957
                nToReadThisIter, &nOut) != LIBDEFLATE_SUCCESS)
3958
        {
3959
            bError_ = true;
3960
            CPLError(
3961
                CE_Failure, CPLE_AppDefined,
3962
                "libdeflate_deflate_decompress() failed at pos " CPL_FRMT_GUIB,
3963
                static_cast<GUIntBig>(nCurPos_));
3964
            return 0;
3965
        }
3966
        if (nOut != nToReadThisIter)
3967
        {
3968
            bError_ = true;
3969
            CPLError(CE_Failure, CPLE_AppDefined,
3970
                     "Only %u bytes decompressed at pos " CPL_FRMT_GUIB
3971
                     " whereas %u where expected",
3972
                     static_cast<unsigned>(nOut),
3973
                     static_cast<GUIntBig>(nCurPos_),
3974
                     static_cast<unsigned>(nToReadThisIter));
3975
            return 0;
3976
        }
3977
#else
3978
        if constexpr (sizeof(size_t) > sizeof(uInt))
3979
0
        {
3980
0
            if (nCompressedToRead > UINT32_MAX)
3981
0
            {
3982
0
                CPLError(CE_Failure, CPLE_AppDefined,
3983
0
                         "nCompressedToRead > UINT32_MAX");
3984
0
                return 0;
3985
0
            }
3986
0
        }
3987
0
        sStream_.avail_in = static_cast<uInt>(nCompressedToRead);
3988
0
        sStream_.next_in = &abyCompressedData[0];
3989
0
        sStream_.avail_out = static_cast<int>(nToReadThisIter);
3990
0
        sStream_.next_out =
3991
0
            static_cast<Bytef *>(pBuffer) + nOffsetInOutputBuffer;
3992
3993
0
        int err = inflate(&sStream_, Z_FINISH);
3994
0
        if ((err != Z_OK && err != Z_STREAM_END))
3995
0
        {
3996
0
            bError_ = true;
3997
0
            CPLError(CE_Failure, CPLE_AppDefined,
3998
0
                     "inflate() failed at pos " CPL_FRMT_GUIB,
3999
0
                     static_cast<GUIntBig>(nCurPos_));
4000
0
            inflateReset(&sStream_);
4001
0
            return 0;
4002
0
        }
4003
0
        if (sStream_.avail_in != 0)
4004
0
            CPLDebug("VSIZIP", "avail_in = %d", sStream_.avail_in);
4005
0
        if (sStream_.avail_out != 0)
4006
0
        {
4007
0
            bError_ = true;
4008
0
            CPLError(
4009
0
                CE_Failure, CPLE_AppDefined,
4010
0
                "Only %u bytes decompressed at pos " CPL_FRMT_GUIB
4011
0
                " whereas %u where expected",
4012
0
                static_cast<unsigned>(nToReadThisIter - sStream_.avail_out),
4013
0
                static_cast<GUIntBig>(nCurPos_),
4014
0
                static_cast<unsigned>(nToReadThisIter));
4015
0
            inflateReset(&sStream_);
4016
0
            return 0;
4017
0
        }
4018
0
        inflateReset(&sStream_);
4019
0
#endif
4020
0
        nOffsetInOutputBuffer += nToReadThisIter;
4021
0
        nCurPos_ += nToReadThisIter;
4022
0
        nToRead -= nToReadThisIter;
4023
0
        if (nToRead == 0)
4024
0
            break;
4025
0
    }
4026
4027
0
    return nRet;
4028
0
}
4029
4030
/************************************************************************/
4031
/*                            GetFileInfo()                             */
4032
/************************************************************************/
4033
4034
bool VSIZipFilesystemHandler::GetFileInfo(const char *pszFilename,
4035
                                          VSIFileInZipInfo &info,
4036
                                          bool bSetError)
4037
1.58k
{
4038
4039
1.58k
    CPLString osZipInFileName;
4040
1.58k
    auto zipFilename =
4041
1.58k
        SplitFilename(pszFilename, osZipInFileName, true, bSetError);
4042
1.58k
    if (zipFilename == nullptr)
4043
1.04k
        return false;
4044
4045
535
    {
4046
535
        std::unique_lock oLock(oMutex);
4047
535
        if (oMapZipWriteHandles.find(zipFilename.get()) !=
4048
535
            oMapZipWriteHandles.end())
4049
0
        {
4050
0
            CPLError(CE_Failure, CPLE_AppDefined,
4051
0
                     "Cannot read a zip file being written");
4052
0
            return false;
4053
0
        }
4054
535
    }
4055
4056
535
    auto poReader = OpenArchiveFile(zipFilename.get(), osZipInFileName);
4057
535
    if (poReader == nullptr)
4058
535
    {
4059
535
        return false;
4060
535
    }
4061
4062
0
    VSIFilesystemHandler *poFSHandler =
4063
0
        VSIFileManager::GetHandler(zipFilename.get());
4064
4065
0
    VSIVirtualHandleUniquePtr poVirtualHandle(
4066
0
        poFSHandler->Open(zipFilename.get(), "rb"));
4067
4068
0
    if (poVirtualHandle == nullptr)
4069
0
    {
4070
0
        return false;
4071
0
    }
4072
4073
0
    unzFile unzF =
4074
0
        cpl::down_cast<VSIZipReader *>(poReader.get())->GetUnzFileHandle();
4075
4076
0
    if (cpl_unzOpenCurrentFile(unzF) != UNZ_OK)
4077
0
    {
4078
0
        CPLError(CE_Failure, CPLE_AppDefined,
4079
0
                 "cpl_unzOpenCurrentFile() failed");
4080
0
        return false;
4081
0
    }
4082
4083
0
    info.nStartDataStream = cpl_unzGetCurrentFileZStreamPos(unzF);
4084
4085
0
    unz_file_info file_info;
4086
0
    if (cpl_unzGetCurrentFileInfo(unzF, &file_info, nullptr, 0, nullptr, 0,
4087
0
                                  nullptr, 0) != UNZ_OK)
4088
0
    {
4089
0
        CPLError(CE_Failure, CPLE_AppDefined,
4090
0
                 "cpl_unzGetCurrentFileInfo() failed");
4091
0
        cpl_unzCloseCurrentFile(unzF);
4092
0
        return false;
4093
0
    }
4094
4095
0
    if (file_info.size_file_extra)
4096
0
    {
4097
0
        std::vector<GByte> abyExtra(file_info.size_file_extra);
4098
0
        poVirtualHandle->Seek(file_info.file_extra_abs_offset, SEEK_SET);
4099
0
        if (poVirtualHandle->Read(&abyExtra[0], abyExtra.size()) ==
4100
0
            abyExtra.size())
4101
0
        {
4102
0
            size_t nPos = 0;
4103
0
            while (nPos + 2 * sizeof(uint16_t) <= abyExtra.size())
4104
0
            {
4105
0
                const uint16_t nId =
4106
0
                    CPL_FROM_LSB<uint16_t>(abyExtra.data() + nPos);
4107
0
                nPos += sizeof(uint16_t);
4108
4109
0
                const uint16_t nSize =
4110
0
                    CPL_FROM_LSB<uint16_t>(abyExtra.data() + nPos);
4111
0
                nPos += sizeof(uint16_t);
4112
4113
0
                if (nId == 0x564b && nPos + nSize <= abyExtra.size())  // "KV"
4114
0
                {
4115
0
                    if (nSize >= strlen("KeyValuePairs") + 1 &&
4116
0
                        memcmp(&abyExtra[nPos], "KeyValuePairs",
4117
0
                               strlen("KeyValuePairs")) == 0)
4118
0
                    {
4119
0
                        int nPos2 = static_cast<int>(strlen("KeyValuePairs"));
4120
0
                        const int nKVPairs = abyExtra[nPos + nPos2];
4121
0
                        nPos2++;
4122
0
                        for (int iKV = 0; iKV < nKVPairs; ++iKV)
4123
0
                        {
4124
0
                            if (nPos2 + sizeof(uint16_t) > nSize)
4125
0
                                break;
4126
0
                            const uint16_t nKeyLen = CPL_FROM_LSB<uint16_t>(
4127
0
                                abyExtra.data() + nPos + nPos2);
4128
0
                            nPos2 += sizeof(uint16_t);
4129
0
                            if (nPos2 + nKeyLen > nSize)
4130
0
                                break;
4131
0
                            std::string osKey;
4132
0
                            osKey.resize(nKeyLen);
4133
0
                            memcpy(&osKey[0], &abyExtra[nPos + nPos2], nKeyLen);
4134
0
                            nPos2 += nKeyLen;
4135
4136
0
                            if (nPos2 + sizeof(uint16_t) > nSize)
4137
0
                                break;
4138
0
                            const uint16_t nValLen = CPL_FROM_LSB<uint16_t>(
4139
0
                                abyExtra.data() + nPos + nPos2);
4140
0
                            nPos2 += sizeof(uint16_t);
4141
0
                            if (nPos2 + nValLen > nSize)
4142
0
                                break;
4143
0
                            std::string osVal;
4144
0
                            osVal.resize(nValLen);
4145
0
                            memcpy(&osVal[0], &abyExtra[nPos + nPos2], nValLen);
4146
0
                            nPos2 += nValLen;
4147
4148
0
                            info.oMapProperties[osKey] = std::move(osVal);
4149
0
                        }
4150
0
                    }
4151
0
                }
4152
0
                nPos += nSize;
4153
0
            }
4154
0
        }
4155
0
    }
4156
4157
0
    info.nCRC = file_info.crc;
4158
0
    info.nCompressionMethod = static_cast<int>(file_info.compression_method);
4159
0
    info.nUncompressedSize = static_cast<uint64_t>(file_info.uncompressed_size);
4160
0
    info.nCompressedSize = static_cast<uint64_t>(file_info.compressed_size);
4161
4162
    // Sanity checks
4163
0
    if (info.nCompressedSize >
4164
0
        std::numeric_limits<uint64_t>::max() - info.nStartDataStream)
4165
0
    {
4166
0
        CPLError(CE_Failure, CPLE_AppDefined,
4167
0
                 "Invalid compressed size for file %s", pszFilename);
4168
0
        return false;
4169
0
    }
4170
0
    const uLong64 afterFileOffset =
4171
0
        info.nStartDataStream + info.nCompressedSize;
4172
4173
    // Cf https://stackoverflow.com/questions/16792189/gzip-compression-ratio-for-zeros/16794960
4174
0
    constexpr unsigned MAX_DEFLATE_COMPRESSION_RATIO = 1032;
4175
0
    if (info.nCompressedSize == 0 && info.nUncompressedSize != 0)
4176
0
    {
4177
0
        CPLError(CE_Failure, CPLE_AppDefined,
4178
0
                 "Invalid compressed size (=0) vs uncompressed size (!=0) for "
4179
0
                 "file %s",
4180
0
                 pszFilename);
4181
0
        return false;
4182
0
    }
4183
0
    else if (info.nCompressedSize != 0 &&
4184
0
             info.nUncompressedSize / info.nCompressedSize >
4185
0
                 MAX_DEFLATE_COMPRESSION_RATIO)
4186
0
    {
4187
0
        CPLError(CE_Failure, CPLE_AppDefined,
4188
0
                 "Invalid compression ratio for file %s: %" PRIu64, pszFilename,
4189
0
                 info.nUncompressedSize / info.nCompressedSize);
4190
0
        return false;
4191
0
    }
4192
4193
    // A bit arbitrary
4194
0
    constexpr unsigned THRESHOLD_FOR_BIG_ALLOCS = 1024 * 1024 * 1024;
4195
0
    if (info.nUncompressedSize > THRESHOLD_FOR_BIG_ALLOCS)
4196
0
    {
4197
        // Check that the compressed file size is consistent with the ZIP file size
4198
0
        poVirtualHandle->Seek(0, SEEK_END);
4199
0
        if (afterFileOffset > poVirtualHandle->Tell())
4200
0
        {
4201
0
            CPLError(CE_Failure, CPLE_AppDefined,
4202
0
                     "Invalid compressed size for file %s: %" PRIu64,
4203
0
                     pszFilename, info.nCompressedSize);
4204
0
            return false;
4205
0
        }
4206
0
    }
4207
4208
    // Try to locate .sozip.idx file
4209
0
    unz_file_info file_info2;
4210
0
    std::string osAuxName;
4211
0
    osAuxName.resize(1024);
4212
0
    uLong64 indexPos;
4213
0
    if (file_info.compression_method == 8 &&
4214
0
        cpl_unzCurrentFileInfoFromLocalHeader(
4215
0
            unzF, afterFileOffset, &file_info2, &osAuxName[0], osAuxName.size(),
4216
0
            &indexPos) == UNZ_OK)
4217
0
    {
4218
0
        osAuxName.resize(strlen(osAuxName.c_str()));
4219
0
        if (osAuxName.find(".sozip.idx") != std::string::npos)
4220
0
        {
4221
0
            info.bSOZipIndexFound = true;
4222
0
            info.nSOZIPStartData = indexPos;
4223
0
            poVirtualHandle->Seek(indexPos, SEEK_SET);
4224
0
            const uint32_t nVersion = poVirtualHandle->ReadLSB<uint32_t>();
4225
0
            const uint32_t nToSkip = poVirtualHandle->ReadLSB<uint32_t>();
4226
0
            const uint32_t nChunkSize = poVirtualHandle->ReadLSB<uint32_t>();
4227
0
            const uint32_t nOffsetSize = poVirtualHandle->ReadLSB<uint32_t>();
4228
0
            const uint64_t nUncompressedSize =
4229
0
                poVirtualHandle->ReadLSB<uint64_t>();
4230
0
            const uint64_t nCompressedSize =
4231
0
                poVirtualHandle->ReadLSB<uint64_t>();
4232
4233
0
            info.nSOZIPVersion = nVersion;
4234
0
            info.nSOZIPToSkip = nToSkip;
4235
0
            info.nSOZIPChunkSize = nChunkSize;
4236
0
            info.nSOZIPOffsetSize = nOffsetSize;
4237
4238
0
            bool bValid = true;
4239
0
            if (nVersion != 1)
4240
0
            {
4241
0
                CPLDebug("SOZIP", "version = %u, expected 1", nVersion);
4242
0
                bValid = false;
4243
0
            }
4244
0
            if (nCompressedSize != file_info.compressed_size)
4245
0
            {
4246
0
                CPLDebug("SOZIP",
4247
0
                         "compressedSize field inconsistent with file");
4248
0
                bValid = false;
4249
0
            }
4250
0
            if (nUncompressedSize != file_info.uncompressed_size)
4251
0
            {
4252
0
                CPLDebug("SOZIP",
4253
0
                         "uncompressedSize field inconsistent with file");
4254
0
                bValid = false;
4255
0
            }
4256
0
            if (!(nChunkSize > 0 && nChunkSize < 100 * 1024 * 1024))
4257
0
            {
4258
0
                CPLDebug("SOZIP", "invalid chunkSize = %u", nChunkSize);
4259
0
                bValid = false;
4260
0
            }
4261
0
            if (nOffsetSize != 8)
4262
0
            {
4263
0
                CPLDebug("SOZIP", "invalid offsetSize = %u", nOffsetSize);
4264
0
                bValid = false;
4265
0
            }
4266
0
            if (file_info2.compression_method != 0)
4267
0
            {
4268
0
                CPLDebug("SOZIP", "unexpected compression_method = %u",
4269
0
                         static_cast<unsigned>(file_info2.compression_method));
4270
0
                bValid = false;
4271
0
            }
4272
0
            if (bValid)
4273
0
            {
4274
0
                const auto nExpectedIndexSize =
4275
0
                    32 + static_cast<uint64_t>(nToSkip) +
4276
0
                    ((nUncompressedSize - 1) / nChunkSize) * nOffsetSize;
4277
0
                if (nExpectedIndexSize != file_info2.uncompressed_size)
4278
0
                {
4279
0
                    CPLDebug("SOZIP", "invalid file size for index");
4280
0
                    bValid = false;
4281
0
                }
4282
0
            }
4283
0
            if (bValid)
4284
0
            {
4285
0
                info.bSOZipIndexValid = true;
4286
0
                CPLDebug("SOZIP", "Found valid SOZIP index: %s",
4287
0
                         osAuxName.c_str());
4288
0
            }
4289
0
            else
4290
0
            {
4291
0
                CPLDebug("SOZIP", "Found *invalid* SOZIP index: %s",
4292
0
                         osAuxName.c_str());
4293
0
            }
4294
0
        }
4295
0
    }
4296
4297
0
    cpl_unzCloseCurrentFile(unzF);
4298
4299
0
    info.poVirtualHandle = std::move(poVirtualHandle);
4300
4301
0
    return true;
4302
0
}
4303
4304
/************************************************************************/
4305
/*                                Open()                                */
4306
/************************************************************************/
4307
4308
VSIVirtualHandleUniquePtr
4309
VSIZipFilesystemHandler::Open(const char *pszFilename, const char *pszAccess,
4310
                              bool bSetError, CSLConstList /* papszOptions */)
4311
1.58k
{
4312
4313
1.58k
    if (strchr(pszAccess, 'w') != nullptr)
4314
0
    {
4315
0
        return OpenForWrite(pszFilename, pszAccess);
4316
0
    }
4317
4318
1.58k
    if (strchr(pszAccess, '+') != nullptr)
4319
0
    {
4320
0
        CPLError(CE_Failure, CPLE_AppDefined,
4321
0
                 "Read-write random access not supported for /vsizip");
4322
0
        return nullptr;
4323
0
    }
4324
4325
1.58k
    VSIFileInZipInfo info;
4326
1.58k
    if (!GetFileInfo(pszFilename, info, bSetError))
4327
1.58k
        return nullptr;
4328
4329
0
#ifdef ENABLE_DEFLATE64
4330
0
    if (info.nCompressionMethod == 9)
4331
0
    {
4332
0
        auto poGZIPHandle = std::make_unique<VSIDeflate64Handle>(
4333
0
            std::move(info.poVirtualHandle), nullptr, info.nStartDataStream,
4334
0
            info.nCompressedSize, info.nUncompressedSize, info.nCRC);
4335
0
        if (!(poGZIPHandle->IsInitOK()))
4336
0
        {
4337
0
            return nullptr;
4338
0
        }
4339
4340
        // Wrap the VSIGZipHandle inside a buffered reader that will
4341
        // improve dramatically performance when doing small backward
4342
        // seeks.
4343
0
        return VSIVirtualHandleUniquePtr(
4344
0
            VSICreateBufferedReaderHandle(poGZIPHandle.release()));
4345
0
    }
4346
0
    else
4347
0
#endif
4348
0
    {
4349
0
        if (info.bSOZipIndexValid)
4350
0
        {
4351
0
            auto poSOZIPHandle = std::make_unique<VSISOZipHandle>(
4352
0
                std::move(info.poVirtualHandle), info.nStartDataStream,
4353
0
                info.nCompressedSize, info.nUncompressedSize,
4354
0
                info.nSOZIPStartData, info.nSOZIPToSkip, info.nSOZIPChunkSize);
4355
0
            if (!poSOZIPHandle->IsOK())
4356
0
            {
4357
0
                return nullptr;
4358
0
            }
4359
0
            return VSIVirtualHandleUniquePtr(VSICreateCachedFile(
4360
0
                poSOZIPHandle.release(), info.nSOZIPChunkSize, 0));
4361
0
        }
4362
4363
0
        auto poGZIPHandle = std::make_unique<VSIGZipHandle>(
4364
0
            std::move(info.poVirtualHandle), nullptr, info.nStartDataStream,
4365
0
            info.nCompressedSize, info.nUncompressedSize, info.nCRC,
4366
0
            info.nCompressionMethod == 0);
4367
0
        if (!(poGZIPHandle->IsInitOK()))
4368
0
        {
4369
0
            return nullptr;
4370
0
        }
4371
4372
        // Wrap the VSIGZipHandle inside a buffered reader that will
4373
        // improve dramatically performance when doing small backward
4374
        // seeks.
4375
0
        return VSIVirtualHandleUniquePtr(
4376
0
            VSICreateBufferedReaderHandle(poGZIPHandle.release()));
4377
0
    }
4378
0
}
4379
4380
/************************************************************************/
4381
/*                          GetFileMetadata()                           */
4382
/************************************************************************/
4383
4384
char **VSIZipFilesystemHandler::GetFileMetadata(const char *pszFilename,
4385
                                                const char *pszDomain,
4386
                                                CSLConstList /*papszOptions*/)
4387
0
{
4388
0
    VSIFileInZipInfo info;
4389
0
    if (!GetFileInfo(pszFilename, info, true))
4390
0
        return nullptr;
4391
4392
0
    if (!pszDomain)
4393
0
    {
4394
0
        CPLStringList aosMetadata;
4395
0
        for (const auto &kv : info.oMapProperties)
4396
0
        {
4397
0
            aosMetadata.AddNameValue(kv.first.c_str(), kv.second.c_str());
4398
0
        }
4399
0
        return aosMetadata.StealList();
4400
0
    }
4401
0
    else if (EQUAL(pszDomain, "ZIP"))
4402
0
    {
4403
0
        CPLStringList aosMetadata;
4404
0
        aosMetadata.SetNameValue(
4405
0
            "START_DATA_OFFSET",
4406
0
            CPLSPrintf(CPL_FRMT_GUIB,
4407
0
                       static_cast<GUIntBig>(info.nStartDataStream)));
4408
4409
0
        if (info.nCompressionMethod == 0)
4410
0
            aosMetadata.SetNameValue("COMPRESSION_METHOD", "0 (STORED)");
4411
0
        else if (info.nCompressionMethod == 8)
4412
0
            aosMetadata.SetNameValue("COMPRESSION_METHOD", "8 (DEFLATE)");
4413
0
        else
4414
0
        {
4415
0
            aosMetadata.SetNameValue("COMPRESSION_METHOD",
4416
0
                                     CPLSPrintf("%d", info.nCompressionMethod));
4417
0
        }
4418
0
        aosMetadata.SetNameValue(
4419
0
            "COMPRESSED_SIZE",
4420
0
            CPLSPrintf(CPL_FRMT_GUIB,
4421
0
                       static_cast<GUIntBig>(info.nCompressedSize)));
4422
0
        aosMetadata.SetNameValue(
4423
0
            "UNCOMPRESSED_SIZE",
4424
0
            CPLSPrintf(CPL_FRMT_GUIB,
4425
0
                       static_cast<GUIntBig>(info.nUncompressedSize)));
4426
4427
0
        if (info.bSOZipIndexFound)
4428
0
        {
4429
0
            aosMetadata.SetNameValue("SOZIP_FOUND", "YES");
4430
4431
0
            aosMetadata.SetNameValue("SOZIP_VERSION",
4432
0
                                     CPLSPrintf("%u", info.nSOZIPVersion));
4433
4434
0
            aosMetadata.SetNameValue("SOZIP_OFFSET_SIZE",
4435
0
                                     CPLSPrintf("%u", info.nSOZIPOffsetSize));
4436
4437
0
            aosMetadata.SetNameValue("SOZIP_CHUNK_SIZE",
4438
0
                                     CPLSPrintf("%u", info.nSOZIPChunkSize));
4439
4440
0
            aosMetadata.SetNameValue(
4441
0
                "SOZIP_START_DATA_OFFSET",
4442
0
                CPLSPrintf(CPL_FRMT_GUIB,
4443
0
                           static_cast<GUIntBig>(info.nSOZIPStartData)));
4444
4445
0
            if (info.bSOZipIndexValid)
4446
0
            {
4447
0
                aosMetadata.SetNameValue("SOZIP_VALID", "YES");
4448
0
            }
4449
0
        }
4450
4451
0
        return aosMetadata.StealList();
4452
0
    }
4453
0
    return nullptr;
4454
0
}
4455
4456
/************************************************************************/
4457
/*                               Mkdir()                                */
4458
/************************************************************************/
4459
4460
int VSIZipFilesystemHandler::Mkdir(const char *pszDirname, long /* nMode */)
4461
0
{
4462
0
    CPLString osDirname = pszDirname;
4463
0
    if (!osDirname.empty() && osDirname.back() != '/')
4464
0
        osDirname += "/";
4465
0
    return OpenForWrite(osDirname, "wb") != nullptr ? 0 : -1;
4466
0
}
4467
4468
/************************************************************************/
4469
/*                             ReadDirEx()                              */
4470
/************************************************************************/
4471
4472
char **VSIZipFilesystemHandler::ReadDirEx(const char *pszDirname, int nMaxFiles)
4473
0
{
4474
0
    CPLString osInArchiveSubDir;
4475
0
    auto zipFilename = SplitFilename(pszDirname, osInArchiveSubDir, true, true);
4476
0
    if (zipFilename == nullptr)
4477
0
        return nullptr;
4478
4479
0
    {
4480
0
        std::unique_lock oLock(oMutex);
4481
4482
0
        if (oMapZipWriteHandles.find(zipFilename.get()) !=
4483
0
            oMapZipWriteHandles.end())
4484
0
        {
4485
0
            CPLError(CE_Failure, CPLE_AppDefined,
4486
0
                     "Cannot read a zip file being written");
4487
0
            return nullptr;
4488
0
        }
4489
0
    }
4490
4491
0
    return VSIArchiveFilesystemHandler::ReadDirEx(pszDirname, nMaxFiles);
4492
0
}
4493
4494
/************************************************************************/
4495
/*                                Stat()                                */
4496
/************************************************************************/
4497
4498
int VSIZipFilesystemHandler::Stat(const char *pszFilename,
4499
                                  VSIStatBufL *pStatBuf, int nFlags)
4500
2.27k
{
4501
2.27k
    CPLString osInArchiveSubDir;
4502
4503
2.27k
    memset(pStatBuf, 0, sizeof(VSIStatBufL));
4504
4505
2.27k
    auto zipFilename = SplitFilename(pszFilename, osInArchiveSubDir, true,
4506
2.27k
                                     (nFlags & VSI_STAT_SET_ERROR_FLAG) != 0);
4507
2.27k
    if (zipFilename == nullptr)
4508
1.43k
        return -1;
4509
4510
836
    {
4511
836
        std::unique_lock oLock(oMutex);
4512
4513
836
        if (oMapZipWriteHandles.find(zipFilename.get()) !=
4514
836
            oMapZipWriteHandles.end())
4515
0
        {
4516
0
            CPLError(CE_Failure, CPLE_AppDefined,
4517
0
                     "Cannot read a zip file being written");
4518
0
            return -1;
4519
0
        }
4520
836
    }
4521
4522
836
    return VSIArchiveFilesystemHandler::Stat(pszFilename, pStatBuf, nFlags);
4523
836
}
4524
4525
/************************************************************************/
4526
/*                           RemoveFromMap()                            */
4527
/************************************************************************/
4528
4529
void VSIZipFilesystemHandler::RemoveFromMap(VSIZipWriteHandle *poHandle)
4530
0
{
4531
0
    std::unique_lock oLock(oMutex);
4532
4533
0
    for (std::map<CPLString, VSIZipWriteHandle *>::iterator iter =
4534
0
             oMapZipWriteHandles.begin();
4535
0
         iter != oMapZipWriteHandles.end(); ++iter)
4536
0
    {
4537
0
        if (iter->second == poHandle)
4538
0
        {
4539
0
            oMapZipWriteHandles.erase(iter);
4540
0
            break;
4541
0
        }
4542
0
    }
4543
0
}
4544
4545
/************************************************************************/
4546
/*                            OpenForWrite()                            */
4547
/************************************************************************/
4548
4549
VSIVirtualHandleUniquePtr
4550
VSIZipFilesystemHandler::OpenForWrite(const char *pszFilename,
4551
                                      const char *pszAccess)
4552
0
{
4553
0
    std::unique_lock oLock(oMutex);
4554
0
    return OpenForWrite_unlocked(pszFilename, pszAccess);
4555
0
}
4556
4557
VSIVirtualHandleUniquePtr
4558
VSIZipFilesystemHandler::OpenForWrite_unlocked(const char *pszFilename,
4559
                                               const char *pszAccess)
4560
0
{
4561
0
    CPLString osZipInFileName;
4562
4563
0
    auto zipFilename =
4564
0
        SplitFilename(pszFilename, osZipInFileName, false, false);
4565
0
    if (zipFilename == nullptr)
4566
0
        return nullptr;
4567
0
    const CPLString osZipFilename = zipFilename.get();
4568
4569
    // Invalidate cached file list.
4570
0
    auto iter = oFileList.find(osZipFilename);
4571
0
    if (iter != oFileList.end())
4572
0
    {
4573
0
        oFileList.erase(iter);
4574
0
    }
4575
4576
0
    auto oIter = oMapZipWriteHandles.find(osZipFilename);
4577
0
    if (oIter != oMapZipWriteHandles.end())
4578
0
    {
4579
0
        if (strchr(pszAccess, '+') != nullptr)
4580
0
        {
4581
0
            CPLError(
4582
0
                CE_Failure, CPLE_AppDefined,
4583
0
                "Random access not supported for writable file in /vsizip");
4584
0
            return nullptr;
4585
0
        }
4586
4587
0
        VSIZipWriteHandle *poZIPHandle = oIter->second;
4588
4589
0
        if (poZIPHandle->GetChildInWriting() != nullptr)
4590
0
        {
4591
0
            CPLError(CE_Failure, CPLE_AppDefined,
4592
0
                     "Cannot create %s while another file is being "
4593
0
                     "written in the .zip",
4594
0
                     osZipInFileName.c_str());
4595
0
            return nullptr;
4596
0
        }
4597
4598
0
        poZIPHandle->StopCurrentFile();
4599
4600
        // Re-add path separator when creating directories.
4601
0
        char chLastChar = pszFilename[strlen(pszFilename) - 1];
4602
0
        if (chLastChar == '/' || chLastChar == '\\')
4603
0
            osZipInFileName += chLastChar;
4604
4605
0
        if (CPLCreateFileInZip(poZIPHandle->GetHandle(), osZipInFileName,
4606
0
                               nullptr) != CE_None)
4607
0
            return nullptr;
4608
4609
0
        auto poChildHandle =
4610
0
            std::make_unique<VSIZipWriteHandle>(this, nullptr, poZIPHandle);
4611
4612
0
        poZIPHandle->StartNewFile(poChildHandle.get());
4613
4614
0
        return VSIVirtualHandleUniquePtr(poChildHandle.release());
4615
0
    }
4616
0
    else
4617
0
    {
4618
0
        char **papszOptions = nullptr;
4619
0
        if ((strchr(pszAccess, '+') && osZipInFileName.empty()) ||
4620
0
            !osZipInFileName.empty())
4621
0
        {
4622
0
            VSIStatBufL sBuf;
4623
0
            if (VSIStatExL(osZipFilename, &sBuf, VSI_STAT_EXISTS_FLAG) == 0)
4624
0
                papszOptions = CSLAddNameValue(papszOptions, "APPEND", "TRUE");
4625
0
        }
4626
4627
0
        void *hZIP = CPLCreateZip(osZipFilename, papszOptions);
4628
0
        CSLDestroy(papszOptions);
4629
4630
0
        if (hZIP == nullptr)
4631
0
            return nullptr;
4632
4633
0
        auto poHandle = new VSIZipWriteHandle(this, hZIP, nullptr);
4634
0
        oMapZipWriteHandles[osZipFilename] = poHandle;
4635
4636
0
        if (!osZipInFileName.empty())
4637
0
        {
4638
0
            auto poRes = std::unique_ptr<VSIZipWriteHandle>(
4639
0
                cpl::down_cast<VSIZipWriteHandle *>(
4640
0
                    OpenForWrite_unlocked(pszFilename, pszAccess).release()));
4641
0
            if (poRes == nullptr)
4642
0
            {
4643
0
                delete poHandle;
4644
0
                oMapZipWriteHandles.erase(osZipFilename);
4645
0
                return nullptr;
4646
0
            }
4647
4648
0
            poRes->SetAutoDeleteParent();
4649
4650
0
            return VSIVirtualHandleUniquePtr(poRes.release());
4651
0
        }
4652
4653
0
        return VSIVirtualHandleUniquePtr(poHandle);
4654
0
    }
4655
0
}
4656
4657
/************************************************************************/
4658
/*                             GetOptions()                             */
4659
/************************************************************************/
4660
4661
const char *VSIZipFilesystemHandler::GetOptions()
4662
0
{
4663
0
    return "<Options>"
4664
0
           "  <Option name='GDAL_NUM_THREADS' type='string' "
4665
0
           "description='Number of threads for compression. Either a integer "
4666
0
           "or ALL_CPUS'/>"
4667
0
           "  <Option name='CPL_VSIL_DEFLATE_CHUNK_SIZE' type='string' "
4668
0
           "description='Chunk of uncompressed data for parallelization. "
4669
0
           "Use K(ilobytes) or M(egabytes) suffix' default='1M'/>"
4670
0
           "</Options>";
4671
0
}
4672
4673
/************************************************************************/
4674
/*                              CopyFile()                              */
4675
/************************************************************************/
4676
4677
int VSIZipFilesystemHandler::CopyFile(const char *pszSource,
4678
                                      const char *pszTarget, VSILFILE *fpSource,
4679
                                      vsi_l_offset /* nSourceSize */,
4680
                                      CSLConstList papszOptions,
4681
                                      GDALProgressFunc pProgressFunc,
4682
                                      void *pProgressData)
4683
0
{
4684
0
    CPLString osZipInFileName;
4685
4686
0
    auto zipFilename = SplitFilename(pszTarget, osZipInFileName, false, false);
4687
0
    if (zipFilename == nullptr)
4688
0
        return -1;
4689
0
    const CPLString osZipFilename = zipFilename.get();
4690
0
    if (osZipInFileName.empty())
4691
0
    {
4692
0
        CPLError(CE_Failure, CPLE_AppDefined,
4693
0
                 "Target filename should be of the form "
4694
0
                 "/vsizip/path_to.zip/filename_within_zip");
4695
0
        return -1;
4696
0
    }
4697
4698
    // Invalidate cached file list.
4699
0
    auto oIterFileList = oFileList.find(osZipFilename);
4700
0
    if (oIterFileList != oFileList.end())
4701
0
    {
4702
0
        oFileList.erase(oIterFileList);
4703
0
    }
4704
4705
0
    const auto oIter = oMapZipWriteHandles.find(osZipFilename);
4706
0
    if (oIter != oMapZipWriteHandles.end())
4707
0
    {
4708
0
        VSIZipWriteHandle *poZIPHandle = oIter->second;
4709
4710
0
        if (poZIPHandle->GetChildInWriting() != nullptr)
4711
0
        {
4712
0
            CPLError(CE_Failure, CPLE_AppDefined,
4713
0
                     "Cannot create %s while another file is being "
4714
0
                     "written in the .zip",
4715
0
                     osZipInFileName.c_str());
4716
0
            return -1;
4717
0
        }
4718
4719
0
        if (CPLAddFileInZip(poZIPHandle->GetHandle(), osZipInFileName.c_str(),
4720
0
                            pszSource, fpSource, papszOptions, pProgressFunc,
4721
0
                            pProgressData) != CE_None)
4722
0
        {
4723
0
            return -1;
4724
0
        }
4725
0
        return 0;
4726
0
    }
4727
0
    else
4728
0
    {
4729
0
        CPLStringList aosOptionsCreateZip;
4730
0
        VSIStatBufL sBuf;
4731
0
        if (VSIStatExL(osZipFilename, &sBuf, VSI_STAT_EXISTS_FLAG) == 0)
4732
0
            aosOptionsCreateZip.SetNameValue("APPEND", "TRUE");
4733
4734
0
        void *hZIP = CPLCreateZip(osZipFilename, aosOptionsCreateZip.List());
4735
4736
0
        if (hZIP == nullptr)
4737
0
            return -1;
4738
4739
0
        if (CPLAddFileInZip(hZIP, osZipInFileName.c_str(), pszSource, fpSource,
4740
0
                            papszOptions, pProgressFunc,
4741
0
                            pProgressData) != CE_None)
4742
0
        {
4743
0
            CPLCloseZip(hZIP);
4744
0
            return -1;
4745
0
        }
4746
0
        CPLCloseZip(hZIP);
4747
0
        return 0;
4748
0
    }
4749
0
}
4750
4751
/************************************************************************/
4752
/*                         VSIZipWriteHandle()                          */
4753
/************************************************************************/
4754
4755
VSIZipWriteHandle::VSIZipWriteHandle(VSIZipFilesystemHandler *poFS, void *hZIP,
4756
                                     VSIZipWriteHandle *poParent)
4757
0
    : m_poFS(poFS), m_hZIP(hZIP), m_poParent(poParent)
4758
0
{
4759
0
}
4760
4761
/************************************************************************/
4762
/*                         ~VSIZipWriteHandle()                         */
4763
/************************************************************************/
4764
4765
VSIZipWriteHandle::~VSIZipWriteHandle()
4766
0
{
4767
0
    VSIZipWriteHandle::Close();
4768
0
}
4769
4770
/************************************************************************/
4771
/*                                Seek()                                */
4772
/************************************************************************/
4773
4774
int VSIZipWriteHandle::Seek(vsi_l_offset nOffset, int nWhence)
4775
0
{
4776
0
    if (nOffset == 0 && (nWhence == SEEK_END || nWhence == SEEK_CUR))
4777
0
        return 0;
4778
0
    if (nOffset == nCurOffset && nWhence == SEEK_SET)
4779
0
        return 0;
4780
4781
0
    CPLError(CE_Failure, CPLE_NotSupported,
4782
0
             "VSIFSeekL() is not supported on writable Zip files");
4783
0
    return -1;
4784
0
}
4785
4786
/************************************************************************/
4787
/*                                Tell()                                */
4788
/************************************************************************/
4789
4790
vsi_l_offset VSIZipWriteHandle::Tell()
4791
0
{
4792
0
    return nCurOffset;
4793
0
}
4794
4795
/************************************************************************/
4796
/*                                Read()                                */
4797
/************************************************************************/
4798
4799
size_t VSIZipWriteHandle::Read(void * /* pBuffer */, size_t /* nBytes */)
4800
0
{
4801
0
    CPLError(CE_Failure, CPLE_NotSupported,
4802
0
             "VSIFReadL() is not supported on writable Zip files");
4803
0
    return 0;
4804
0
}
4805
4806
/************************************************************************/
4807
/*                               Write()                                */
4808
/************************************************************************/
4809
4810
size_t VSIZipWriteHandle::Write(const void *pBuffer, size_t const nBytesToWrite)
4811
0
{
4812
0
    if (m_poParent == nullptr)
4813
0
    {
4814
0
        CPLError(CE_Failure, CPLE_NotSupported,
4815
0
                 "VSIFWriteL() is not supported on "
4816
0
                 "main Zip file or closed subfiles");
4817
0
        return 0;
4818
0
    }
4819
4820
0
    const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
4821
0
    size_t nWritten = 0;
4822
0
    while (nWritten < nBytesToWrite)
4823
0
    {
4824
0
        int nToWrite = static_cast<int>(
4825
0
            std::min(static_cast<size_t>(INT_MAX), nBytesToWrite));
4826
0
        if (CPLWriteFileInZip(m_poParent->m_hZIP, pabyBuffer, nToWrite) !=
4827
0
            CE_None)
4828
0
            return 0;
4829
0
        nWritten += nToWrite;
4830
0
        pabyBuffer += nToWrite;
4831
0
    }
4832
4833
0
    nCurOffset += nBytesToWrite;
4834
4835
0
    return nBytesToWrite;
4836
0
}
4837
4838
/************************************************************************/
4839
/*                               Flush()                                */
4840
/************************************************************************/
4841
4842
int VSIZipWriteHandle::Flush()
4843
0
{
4844
    /*CPLError(CE_Failure, CPLE_NotSupported,
4845
             "VSIFFlushL() is not supported on writable Zip files");*/
4846
0
    return 0;
4847
0
}
4848
4849
/************************************************************************/
4850
/*                               Close()                                */
4851
/************************************************************************/
4852
4853
int VSIZipWriteHandle::Close()
4854
0
{
4855
0
    int nRet = 0;
4856
0
    if (m_poParent)
4857
0
    {
4858
0
        CPLCloseFileInZip(m_poParent->m_hZIP);
4859
0
        m_poParent->poChildInWriting = nullptr;
4860
0
        if (bAutoDeleteParent)
4861
0
        {
4862
0
            if (m_poParent->Close() != 0)
4863
0
                nRet = -1;
4864
0
            delete m_poParent;
4865
0
        }
4866
0
        m_poParent = nullptr;
4867
0
    }
4868
0
    if (poChildInWriting)
4869
0
    {
4870
0
        if (poChildInWriting->Close() != 0)
4871
0
            nRet = -1;
4872
0
        poChildInWriting = nullptr;
4873
0
    }
4874
0
    if (m_hZIP)
4875
0
    {
4876
0
        if (CPLCloseZip(m_hZIP) != CE_None)
4877
0
            nRet = -1;
4878
0
        m_hZIP = nullptr;
4879
4880
0
        m_poFS->RemoveFromMap(this);
4881
0
    }
4882
4883
0
    return nRet;
4884
0
}
4885
4886
/************************************************************************/
4887
/*                          StopCurrentFile()                           */
4888
/************************************************************************/
4889
4890
void VSIZipWriteHandle::StopCurrentFile()
4891
0
{
4892
0
    if (poChildInWriting)
4893
0
        poChildInWriting->Close();
4894
0
    poChildInWriting = nullptr;
4895
0
}
4896
4897
/************************************************************************/
4898
/*                            StartNewFile()                            */
4899
/************************************************************************/
4900
4901
void VSIZipWriteHandle::StartNewFile(VSIZipWriteHandle *poSubFile)
4902
0
{
4903
0
    poChildInWriting = poSubFile;
4904
0
}
4905
4906
//! @endcond
4907
4908
/************************************************************************/
4909
/*                      VSIInstallZipFileHandler()                      */
4910
/************************************************************************/
4911
4912
/*!
4913
 \brief Install ZIP file system handler.
4914
4915
 A special file handler is installed that allows reading on-the-fly in ZIP
4916
 (.zip) archives.
4917
4918
 All portions of the file system underneath the base path "/vsizip/" will be
4919
 handled by this driver.
4920
4921
 \verbatim embed:rst
4922
 See :ref:`/vsizip/ documentation <vsizip>`
4923
 \endverbatim
4924
4925
 */
4926
4927
void VSIInstallZipFileHandler()
4928
3
{
4929
3
    VSIFileManager::InstallHandler("/vsizip/",
4930
3
                                   std::make_shared<VSIZipFilesystemHandler>());
4931
3
}
4932
4933
/************************************************************************/
4934
/*                           CPLZLibDeflate()                           */
4935
/************************************************************************/
4936
4937
/**
4938
 * \brief Compress a buffer with ZLib compression.
4939
 *
4940
 * @param ptr input buffer.
4941
 * @param nBytes size of input buffer in bytes.
4942
 * @param nLevel ZLib compression level (-1 for default).
4943
 * @param outptr output buffer, or NULL to let the function allocate it.
4944
 * @param nOutAvailableBytes size of output buffer if provided, or ignored.
4945
 * @param pnOutBytes pointer to a size_t, where to store the size of the
4946
 *                   output buffer.
4947
 *
4948
 * @return the output buffer (to be freed with VSIFree() if not provided)
4949
 *         or NULL in case of error.
4950
 *
4951
 */
4952
4953
void *CPLZLibDeflate(const void *ptr, size_t nBytes, int nLevel, void *outptr,
4954
                     size_t nOutAvailableBytes, size_t *pnOutBytes)
4955
0
{
4956
0
    if (pnOutBytes != nullptr)
4957
0
        *pnOutBytes = 0;
4958
4959
0
    size_t nTmpSize = 0;
4960
0
    void *pTmp;
4961
#ifdef HAVE_LIBDEFLATE
4962
    struct libdeflate_compressor *enc =
4963
        libdeflate_alloc_compressor(nLevel < 0 ? 7 : nLevel);
4964
    if (enc == nullptr)
4965
    {
4966
        return nullptr;
4967
    }
4968
#endif
4969
0
    if (outptr == nullptr)
4970
0
    {
4971
#ifdef HAVE_LIBDEFLATE
4972
        nTmpSize = libdeflate_zlib_compress_bound(enc, nBytes);
4973
#else
4974
0
        nTmpSize = 32 + nBytes * 2;
4975
0
#endif
4976
0
        pTmp = VSIMalloc(nTmpSize);
4977
0
        if (pTmp == nullptr)
4978
0
        {
4979
#ifdef HAVE_LIBDEFLATE
4980
            libdeflate_free_compressor(enc);
4981
#endif
4982
0
            return nullptr;
4983
0
        }
4984
0
    }
4985
0
    else
4986
0
    {
4987
0
        pTmp = outptr;
4988
0
        nTmpSize = nOutAvailableBytes;
4989
0
    }
4990
4991
#ifdef HAVE_LIBDEFLATE
4992
    size_t nCompressedBytes =
4993
        libdeflate_zlib_compress(enc, ptr, nBytes, pTmp, nTmpSize);
4994
    libdeflate_free_compressor(enc);
4995
    if (nCompressedBytes == 0)
4996
    {
4997
        if (pTmp != outptr)
4998
            VSIFree(pTmp);
4999
        return nullptr;
5000
    }
5001
    if (pnOutBytes != nullptr)
5002
        *pnOutBytes = nCompressedBytes;
5003
#else
5004
0
    z_stream strm;
5005
0
    strm.zalloc = nullptr;
5006
0
    strm.zfree = nullptr;
5007
0
    strm.opaque = nullptr;
5008
0
    int ret = deflateInit(&strm, nLevel < 0 ? Z_DEFAULT_COMPRESSION : nLevel);
5009
0
    if (ret != Z_OK)
5010
0
    {
5011
0
        if (pTmp != outptr)
5012
0
            VSIFree(pTmp);
5013
0
        return nullptr;
5014
0
    }
5015
5016
0
    strm.avail_in = static_cast<uInt>(nBytes);
5017
0
    strm.next_in = reinterpret_cast<Bytef *>(const_cast<void *>(ptr));
5018
0
    strm.avail_out = static_cast<uInt>(nTmpSize);
5019
0
    strm.next_out = reinterpret_cast<Bytef *>(pTmp);
5020
0
    ret = deflate(&strm, Z_FINISH);
5021
0
    if (ret != Z_STREAM_END)
5022
0
    {
5023
0
        if (pTmp != outptr)
5024
0
            VSIFree(pTmp);
5025
0
        return nullptr;
5026
0
    }
5027
0
    if (pnOutBytes != nullptr)
5028
0
        *pnOutBytes = nTmpSize - strm.avail_out;
5029
0
    deflateEnd(&strm);
5030
0
#endif
5031
5032
0
    return pTmp;
5033
0
}
5034
5035
/************************************************************************/
5036
/*                           CPLZLibInflate()                           */
5037
/************************************************************************/
5038
5039
/**
5040
 * \brief Uncompress a buffer compressed with ZLib compression.
5041
 *
5042
 * @param ptr input buffer.
5043
 * @param nBytes size of input buffer in bytes.
5044
 * @param outptr output buffer, or NULL to let the function allocate it.
5045
 * @param nOutAvailableBytes size of output buffer if provided, or ignored.
5046
 * @param pnOutBytes pointer to a size_t, where to store the size of the
5047
 *                   output buffer.
5048
 *
5049
 * @return the output buffer (to be freed with VSIFree() if not provided)
5050
 *         or NULL in case of error.
5051
 *
5052
 */
5053
5054
void *CPLZLibInflate(const void *ptr, size_t nBytes, void *outptr,
5055
                     size_t nOutAvailableBytes, size_t *pnOutBytes)
5056
0
{
5057
0
    return CPLZLibInflateEx(ptr, nBytes, outptr, nOutAvailableBytes, false,
5058
0
                            pnOutBytes);
5059
0
}
5060
5061
/************************************************************************/
5062
/*                          CPLZLibInflateEx()                          */
5063
/************************************************************************/
5064
5065
/**
5066
 * \brief Uncompress a buffer compressed with ZLib compression.
5067
 *
5068
 * @param ptr input buffer.
5069
 * @param nBytes size of input buffer in bytes.
5070
 * @param outptr output buffer, or NULL to let the function allocate it.
5071
 * @param nOutAvailableBytes size of output buffer if provided, or ignored.
5072
 * @param bAllowResizeOutptr whether the function is allowed to grow outptr
5073
 *                           (using VSIRealloc) if its initial capacity
5074
 *                           provided by nOutAvailableBytes is not
5075
 *                           large enough. Ignored if outptr is NULL.
5076
 * @param pnOutBytes pointer to a size_t, where to store the size of the
5077
 *                   output buffer.
5078
 *
5079
 * @return the output buffer (to be freed with VSIFree() if not provided)
5080
 *         or NULL in case of error. If bAllowResizeOutptr is set to true,
5081
 *         only the returned pointer should be freed by the caller, as outptr
5082
 *         might have been reallocated or freed.
5083
 *
5084
 * @since GDAL 3.9.0
5085
 */
5086
5087
void *CPLZLibInflateEx(const void *ptr, size_t nBytes, void *outptr,
5088
                       size_t nOutAvailableBytes, bool bAllowResizeOutptr,
5089
                       size_t *pnOutBytes)
5090
0
{
5091
0
    if (pnOutBytes != nullptr)
5092
0
        *pnOutBytes = 0;
5093
0
    char *pszReallocatableBuf = nullptr;
5094
5095
#ifdef HAVE_LIBDEFLATE
5096
    if (outptr)
5097
    {
5098
        struct libdeflate_decompressor *dec = libdeflate_alloc_decompressor();
5099
        if (dec == nullptr)
5100
        {
5101
            if (bAllowResizeOutptr)
5102
                VSIFree(outptr);
5103
            return nullptr;
5104
        }
5105
        enum libdeflate_result res;
5106
        size_t nOutBytes = 0;
5107
        if (nBytes > 2 && static_cast<const GByte *>(ptr)[0] == 0x1F &&
5108
            static_cast<const GByte *>(ptr)[1] == 0x8B)
5109
        {
5110
            res = libdeflate_gzip_decompress(dec, ptr, nBytes, outptr,
5111
                                             nOutAvailableBytes, &nOutBytes);
5112
        }
5113
        else
5114
        {
5115
            res = libdeflate_zlib_decompress(dec, ptr, nBytes, outptr,
5116
                                             nOutAvailableBytes, &nOutBytes);
5117
        }
5118
        if (pnOutBytes)
5119
            *pnOutBytes = nOutBytes;
5120
        libdeflate_free_decompressor(dec);
5121
        if (res == LIBDEFLATE_INSUFFICIENT_SPACE && bAllowResizeOutptr)
5122
        {
5123
            if (nOutAvailableBytes >
5124
                (std::numeric_limits<size_t>::max() - 1) / 2)
5125
            {
5126
                VSIFree(outptr);
5127
                return nullptr;
5128
            }
5129
            size_t nOutBufSize = nOutAvailableBytes * 2;
5130
            pszReallocatableBuf = static_cast<char *>(
5131
                VSI_REALLOC_VERBOSE(outptr, nOutBufSize + 1));
5132
            if (!pszReallocatableBuf)
5133
            {
5134
                VSIFree(outptr);
5135
                return nullptr;
5136
            }
5137
            outptr = nullptr;
5138
            nOutAvailableBytes = nOutBufSize;
5139
        }
5140
        else if (res != LIBDEFLATE_SUCCESS)
5141
        {
5142
            if (bAllowResizeOutptr)
5143
                VSIFree(outptr);
5144
            return nullptr;
5145
        }
5146
        else
5147
        {
5148
            // Nul-terminate if possible.
5149
            if (nOutBytes < nOutAvailableBytes)
5150
            {
5151
                static_cast<char *>(outptr)[nOutBytes] = '\0';
5152
            }
5153
            return outptr;
5154
        }
5155
    }
5156
#endif
5157
5158
0
    z_stream strm;
5159
0
    memset(&strm, 0, sizeof(strm));
5160
0
    strm.zalloc = nullptr;
5161
0
    strm.zfree = nullptr;
5162
0
    strm.opaque = nullptr;
5163
0
    int ret;
5164
    // MAX_WBITS + 32 mode which detects automatically gzip vs zlib
5165
    // encapsulation seems to be broken with
5166
    // /opt/intel/oneapi/intelpython/latest/lib/libz.so.1 from
5167
    // intel/oneapi-basekit Docker image
5168
0
    if (nBytes > 2 && static_cast<const GByte *>(ptr)[0] == 0x1F &&
5169
0
        static_cast<const GByte *>(ptr)[1] == 0x8B)
5170
0
    {
5171
0
        ret = inflateInit2(&strm, MAX_WBITS + 16);  // gzip
5172
0
    }
5173
0
    else
5174
0
    {
5175
0
        ret = inflateInit2(&strm, MAX_WBITS);  // zlib
5176
0
    }
5177
0
    if (ret != Z_OK)
5178
0
    {
5179
0
        if (bAllowResizeOutptr)
5180
0
            VSIFree(outptr);
5181
0
        VSIFree(pszReallocatableBuf);
5182
0
        return nullptr;
5183
0
    }
5184
5185
0
    size_t nOutBufSize = 0;
5186
0
    char *pszOutBuf = nullptr;
5187
5188
#ifdef HAVE_LIBDEFLATE
5189
    if (pszReallocatableBuf)
5190
    {
5191
        pszOutBuf = pszReallocatableBuf;
5192
        nOutBufSize = nOutAvailableBytes;
5193
    }
5194
    else
5195
#endif
5196
0
        if (!outptr)
5197
0
    {
5198
0
        if (nBytes > (std::numeric_limits<size_t>::max() - 1) / 2)
5199
0
        {
5200
0
            inflateEnd(&strm);
5201
0
            return nullptr;
5202
0
        }
5203
0
        nOutBufSize = 2 * nBytes + 1;
5204
0
        pszOutBuf = static_cast<char *>(VSI_MALLOC_VERBOSE(nOutBufSize));
5205
0
        if (pszOutBuf == nullptr)
5206
0
        {
5207
0
            inflateEnd(&strm);
5208
0
            return nullptr;
5209
0
        }
5210
0
        pszReallocatableBuf = pszOutBuf;
5211
0
        bAllowResizeOutptr = true;
5212
0
    }
5213
0
#ifndef HAVE_LIBDEFLATE
5214
0
    else
5215
0
    {
5216
0
        pszOutBuf = static_cast<char *>(outptr);
5217
0
        nOutBufSize = nOutAvailableBytes;
5218
0
        if (bAllowResizeOutptr)
5219
0
            pszReallocatableBuf = pszOutBuf;
5220
0
    }
5221
0
#endif
5222
5223
0
    strm.next_in = static_cast<Bytef *>(const_cast<void *>(ptr));
5224
0
    strm.next_out = reinterpret_cast<Bytef *>(pszOutBuf);
5225
0
    size_t nInBytesRemaining = nBytes;
5226
0
    size_t nOutBytesRemaining = nOutBufSize;
5227
5228
0
    while (true)
5229
0
    {
5230
0
        strm.avail_in = static_cast<uInt>(std::min<size_t>(
5231
0
            nInBytesRemaining, std::numeric_limits<uInt>::max()));
5232
0
        const auto avail_in_before = strm.avail_in;
5233
0
        strm.avail_out = static_cast<uInt>(std::min<size_t>(
5234
0
            nOutBytesRemaining, std::numeric_limits<uInt>::max()));
5235
0
        const auto avail_out_before = strm.avail_out;
5236
0
        ret = inflate(&strm, Z_FINISH);
5237
0
        nInBytesRemaining -= (avail_in_before - strm.avail_in);
5238
0
        nOutBytesRemaining -= (avail_out_before - strm.avail_out);
5239
5240
0
        if (ret == Z_BUF_ERROR && strm.avail_out == 0)
5241
0
        {
5242
#ifdef HAVE_LIBDEFLATE
5243
            CPLAssert(bAllowResizeOutptr);
5244
#else
5245
0
            if (!bAllowResizeOutptr)
5246
0
            {
5247
0
                VSIFree(pszReallocatableBuf);
5248
0
                inflateEnd(&strm);
5249
0
                return nullptr;
5250
0
            }
5251
0
#endif
5252
5253
0
            const size_t nAlreadyWritten = nOutBufSize - nOutBytesRemaining;
5254
0
            if (nOutBufSize > (std::numeric_limits<size_t>::max() - 1) / 2)
5255
0
            {
5256
0
                VSIFree(pszReallocatableBuf);
5257
0
                inflateEnd(&strm);
5258
0
                return nullptr;
5259
0
            }
5260
0
            nOutBufSize = nOutBufSize * 2 + 1;
5261
0
            char *pszNew = static_cast<char *>(
5262
0
                VSI_REALLOC_VERBOSE(pszReallocatableBuf, nOutBufSize));
5263
0
            if (!pszNew)
5264
0
            {
5265
0
                VSIFree(pszReallocatableBuf);
5266
0
                inflateEnd(&strm);
5267
0
                return nullptr;
5268
0
            }
5269
0
            pszOutBuf = pszNew;
5270
0
            pszReallocatableBuf = pszOutBuf;
5271
0
            nOutBytesRemaining = nOutBufSize - nAlreadyWritten;
5272
0
            strm.next_out =
5273
0
                reinterpret_cast<Bytef *>(pszOutBuf + nAlreadyWritten);
5274
0
        }
5275
0
        else if (ret != Z_OK || nInBytesRemaining == 0)
5276
0
            break;
5277
0
    }
5278
5279
0
    if (ret == Z_OK || ret == Z_STREAM_END)
5280
0
    {
5281
0
        size_t nOutBytes = nOutBufSize - nOutBytesRemaining;
5282
        // Nul-terminate if possible.
5283
0
        if (nOutBytes < nOutBufSize)
5284
0
        {
5285
0
            pszOutBuf[nOutBytes] = '\0';
5286
0
        }
5287
0
        inflateEnd(&strm);
5288
0
        if (pnOutBytes != nullptr)
5289
0
            *pnOutBytes = nOutBytes;
5290
0
        return pszOutBuf;
5291
0
    }
5292
0
    else
5293
0
    {
5294
0
        VSIFree(pszReallocatableBuf);
5295
0
        inflateEnd(&strm);
5296
0
        return nullptr;
5297
0
    }
5298
0
}