/src/gdal/apps/gdalalg_vsi_sozip.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: GDAL |
4 | | * Purpose: gdal "sozip" subcommand |
5 | | * Author: Even Rouault <even dot rouault at spatialys.com> |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2025, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #include "gdalalg_vsi_sozip.h" |
14 | | |
15 | | #include "cpl_conv.h" |
16 | | #include "cpl_string.h" |
17 | | #include "cpl_time.h" |
18 | | |
19 | | #include <cstdlib> |
20 | | #include <limits> |
21 | | |
22 | | //! @cond Doxygen_Suppress |
23 | | |
24 | | #ifndef _ |
25 | 0 | #define _(x) (x) |
26 | | #endif |
27 | | |
28 | | /************************************************************************/ |
29 | | /* GDALVSISOZIPCreateBaseAlgorithm */ |
30 | | /************************************************************************/ |
31 | | |
32 | | class GDALVSISOZIPCreateBaseAlgorithm /* non final */ : public GDALAlgorithm |
33 | | { |
34 | | protected: |
35 | | GDALVSISOZIPCreateBaseAlgorithm(const std::string &name, |
36 | | const std::string &description, |
37 | | const std::string &helpURL, |
38 | | bool optimizeFrom) |
39 | 0 | : GDALAlgorithm(name, description, helpURL), |
40 | 0 | m_optimizeFrom(optimizeFrom) |
41 | 0 | { |
42 | 0 | AddProgressArg(); |
43 | 0 | if (optimizeFrom) |
44 | 0 | AddArg("input", 'i', _("Input ZIP filename"), &m_inputFilenames) |
45 | 0 | .SetRequired() |
46 | 0 | .SetPositional() |
47 | 0 | .SetMaxCount(1); |
48 | 0 | else |
49 | 0 | AddArg("input", 'i', _("Input filenames"), &m_inputFilenames) |
50 | 0 | .SetRequired() |
51 | 0 | .SetPositional(); |
52 | 0 | AddArg("output", 'o', _("Output ZIP filename"), &m_zipFilename) |
53 | 0 | .SetRequired() |
54 | 0 | .SetPositional() |
55 | 0 | .AddValidationAction( |
56 | 0 | [this]() |
57 | 0 | { |
58 | 0 | if (!EQUAL( |
59 | 0 | CPLGetExtensionSafe(m_zipFilename.c_str()).c_str(), |
60 | 0 | "zip")) |
61 | 0 | { |
62 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
63 | 0 | "Extension of zip filename should be .zip"); |
64 | 0 | return false; |
65 | 0 | } |
66 | 0 | return true; |
67 | 0 | }); |
68 | 0 | AddOverwriteArg(&m_overwrite); |
69 | 0 | if (!optimizeFrom) |
70 | 0 | { |
71 | 0 | AddArg("recursive", 'r', |
72 | 0 | _("Travels the directory structure of the specified " |
73 | 0 | "directories recursively"), |
74 | 0 | &m_recursive) |
75 | 0 | .AddHiddenAlias("recurse"); |
76 | 0 | } |
77 | 0 | if (!optimizeFrom) |
78 | 0 | { |
79 | 0 | AddArg("no-paths", 'j', |
80 | 0 | _("Store just the name of a saved file, and do not store " |
81 | 0 | "directory names"), |
82 | 0 | &m_noDirName) |
83 | 0 | .AddAlias("junk-paths"); |
84 | 0 | } |
85 | 0 | AddArg("enable-sozip", 0, |
86 | 0 | _("Whether to automatically/systematically/never apply the " |
87 | 0 | "SOZIP optimization"), |
88 | 0 | &m_mode) |
89 | 0 | .SetDefault(m_mode) |
90 | 0 | .SetChoices("auto", "yes", "no"); |
91 | 0 | AddArg("sozip-chunk-size", 0, _("Chunk size for a seek-optimized file"), |
92 | 0 | &m_chunkSize) |
93 | 0 | .SetMetaVar("<value in bytes or with K/M suffix>") |
94 | 0 | .SetDefault(m_chunkSize) |
95 | 0 | .SetMinCharCount(1); |
96 | 0 | AddArg( |
97 | 0 | "sozip-min-file-size", 0, |
98 | 0 | _("Minimum file size to decide if a file should be seek-optimized"), |
99 | 0 | &m_minFileSize) |
100 | 0 | .SetMetaVar("<value in bytes or with K/M/G suffix>") |
101 | 0 | .SetDefault(m_minFileSize) |
102 | 0 | .SetMinCharCount(1); |
103 | 0 | if (!optimizeFrom) |
104 | 0 | AddArg("content-type", 0, |
105 | 0 | _("Store the Content-Type of the file being added."), |
106 | 0 | &m_contentType) |
107 | 0 | .SetMinCharCount(1); |
108 | |
|
109 | 0 | AddOutputStringArg(&m_output); |
110 | 0 | AddStdoutArg(&m_stdout); |
111 | 0 | } |
112 | | |
113 | | private: |
114 | | const bool m_optimizeFrom; |
115 | | std::vector<std::string> m_inputFilenames{}; |
116 | | std::string m_zipFilename{}; |
117 | | bool m_overwrite = false; |
118 | | bool m_recursive = false; |
119 | | bool m_noDirName = false; |
120 | | std::string m_mode = "auto"; |
121 | | std::string m_chunkSize = "32768"; |
122 | | std::string m_minFileSize = "1 MB"; |
123 | | std::string m_contentType{}; |
124 | | std::string m_output{}; |
125 | | bool m_stdout = false; |
126 | | |
127 | | bool RunImpl(GDALProgressFunc, void *) override; |
128 | | |
129 | | void Output(const std::string &s) |
130 | 0 | { |
131 | 0 | if (!m_quiet) |
132 | 0 | { |
133 | 0 | if (m_stdout) |
134 | 0 | printf("%s", s.c_str()); |
135 | 0 | else |
136 | 0 | m_output += s; |
137 | 0 | } |
138 | 0 | } |
139 | | }; |
140 | | |
141 | | /************************************************************************/ |
142 | | /* GDALVSISOZIPCreateBaseAlgorithm::RunImpl() */ |
143 | | /************************************************************************/ |
144 | | |
145 | | bool GDALVSISOZIPCreateBaseAlgorithm::RunImpl(GDALProgressFunc pfnProgress, |
146 | | void *pProgressData) |
147 | 0 | { |
148 | 0 | CPLStringList aosOptions; |
149 | 0 | aosOptions.SetNameValue("SOZIP_ENABLED", m_mode.c_str()); |
150 | 0 | aosOptions.SetNameValue("SOZIP_CHUNK_SIZE", m_chunkSize.c_str()); |
151 | 0 | aosOptions.SetNameValue("SOZIP_MIN_FILE_SIZE", m_minFileSize.c_str()); |
152 | 0 | if (!m_contentType.empty()) |
153 | 0 | aosOptions.SetNameValue("CONTENT_TYPE", m_contentType.c_str()); |
154 | |
|
155 | 0 | VSIStatBufL sBuf; |
156 | 0 | CPLStringList aosOptionsCreateZip; |
157 | 0 | if (m_overwrite) |
158 | 0 | { |
159 | 0 | VSIUnlink(m_zipFilename.c_str()); |
160 | 0 | } |
161 | 0 | else |
162 | 0 | { |
163 | 0 | if (VSIStatExL(m_zipFilename.c_str(), &sBuf, VSI_STAT_EXISTS_FLAG) == 0) |
164 | 0 | { |
165 | 0 | if (m_optimizeFrom) |
166 | 0 | { |
167 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
168 | 0 | "%s already exists. Use --overwrite", |
169 | 0 | m_zipFilename.c_str()); |
170 | 0 | return false; |
171 | 0 | } |
172 | 0 | aosOptionsCreateZip.SetNameValue("APPEND", "TRUE"); |
173 | 0 | } |
174 | 0 | } |
175 | | |
176 | 0 | std::vector<std::string> aosFiles = m_inputFilenames; |
177 | 0 | std::string osRemovePrefix; |
178 | 0 | if (m_optimizeFrom) |
179 | 0 | { |
180 | 0 | std::unique_ptr<VSIDIR, decltype(&VSICloseDir)> psDir( |
181 | 0 | VSIOpenDir( |
182 | 0 | std::string("/vsizip/").append(m_inputFilenames[0]).c_str(), -1, |
183 | 0 | nullptr), |
184 | 0 | VSICloseDir); |
185 | 0 | if (!psDir) |
186 | 0 | { |
187 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
188 | 0 | "%s is not a valid .zip file", |
189 | 0 | m_inputFilenames[0].c_str()); |
190 | 0 | return false; |
191 | 0 | } |
192 | | |
193 | 0 | osRemovePrefix = |
194 | 0 | std::string("/vsizip/{").append(m_inputFilenames[0]).append("}/"); |
195 | 0 | while (const auto psEntry = VSIGetNextDirEntry(psDir.get())) |
196 | 0 | { |
197 | 0 | if (!VSI_ISDIR(psEntry->nMode)) |
198 | 0 | { |
199 | 0 | aosFiles.push_back(osRemovePrefix + psEntry->pszName); |
200 | 0 | } |
201 | 0 | } |
202 | 0 | } |
203 | 0 | else if (m_recursive) |
204 | 0 | { |
205 | 0 | std::vector<std::string> aosNewFiles; |
206 | 0 | for (const std::string &osFile : m_inputFilenames) |
207 | 0 | { |
208 | 0 | if (VSIStatL(osFile.c_str(), &sBuf) == 0 && VSI_ISDIR(sBuf.st_mode)) |
209 | 0 | { |
210 | 0 | std::unique_ptr<VSIDIR, decltype(&VSICloseDir)> psDir( |
211 | 0 | VSIOpenDir(osFile.c_str(), -1, nullptr), VSICloseDir); |
212 | 0 | if (!psDir) |
213 | 0 | return false; |
214 | 0 | while (const auto psEntry = VSIGetNextDirEntry(psDir.get())) |
215 | 0 | { |
216 | 0 | if (!VSI_ISDIR(psEntry->nMode)) |
217 | 0 | { |
218 | 0 | std::string osName(osFile); |
219 | 0 | if (osName.back() != '/') |
220 | 0 | osName += '/'; |
221 | 0 | osName += psEntry->pszName; |
222 | 0 | aosNewFiles.push_back(std::move(osName)); |
223 | 0 | if (aosNewFiles.size() > 10 * 1000 * 1000) |
224 | 0 | { |
225 | 0 | ReportError(CE_Failure, CPLE_NotSupported, |
226 | 0 | "Too many source files"); |
227 | 0 | return false; |
228 | 0 | } |
229 | 0 | } |
230 | 0 | } |
231 | 0 | } |
232 | 0 | } |
233 | 0 | aosFiles = std::move(aosNewFiles); |
234 | 0 | } |
235 | | |
236 | 0 | uint64_t nTotalSize = 0; |
237 | 0 | std::vector<uint64_t> anFileSizes; |
238 | |
|
239 | 0 | if (pfnProgress) |
240 | 0 | { |
241 | 0 | #if defined(__GNUC__) |
242 | 0 | #pragma GCC diagnostic push |
243 | 0 | #pragma GCC diagnostic ignored "-Wnull-dereference" |
244 | 0 | #endif |
245 | 0 | anFileSizes.resize(aosFiles.size()); |
246 | 0 | #if defined(__GNUC__) |
247 | 0 | #pragma GCC diagnostic pop |
248 | 0 | #endif |
249 | 0 | for (size_t i = 0; i < aosFiles.size(); ++i) |
250 | 0 | { |
251 | 0 | if (VSIStatL(aosFiles[i].c_str(), &sBuf) == 0) |
252 | 0 | { |
253 | 0 | anFileSizes[i] = sBuf.st_size; |
254 | 0 | nTotalSize += sBuf.st_size; |
255 | 0 | } |
256 | 0 | else |
257 | 0 | { |
258 | 0 | ReportError(CE_Failure, CPLE_AppDefined, "%s does not exist", |
259 | 0 | aosFiles[i].c_str()); |
260 | 0 | return false; |
261 | 0 | } |
262 | 0 | } |
263 | 0 | } |
264 | | |
265 | 0 | std::unique_ptr<void, decltype(&CPLCloseZip)> hZIP( |
266 | 0 | CPLCreateZip(m_zipFilename.c_str(), aosOptionsCreateZip.List()), |
267 | 0 | CPLCloseZip); |
268 | 0 | if (!hZIP) |
269 | 0 | return false; |
270 | | |
271 | 0 | uint64_t nCurSize = 0; |
272 | 0 | for (size_t i = 0; i < aosFiles.size(); ++i) |
273 | 0 | { |
274 | 0 | if (!m_quiet) |
275 | 0 | { |
276 | 0 | Output(CPLSPrintf("Adding %s... (%d/%d)\n", aosFiles[i].c_str(), |
277 | 0 | int(i + 1), static_cast<int>(aosFiles.size()))); |
278 | 0 | } |
279 | |
|
280 | 0 | if (VSIStatL(aosFiles[i].c_str(), &sBuf) != 0) |
281 | 0 | { |
282 | 0 | ReportError(CE_Failure, CPLE_AppDefined, "%s does not exist", |
283 | 0 | aosFiles[i].c_str()); |
284 | 0 | return false; |
285 | 0 | } |
286 | 0 | else if (VSI_ISDIR(sBuf.st_mode)) |
287 | 0 | { |
288 | 0 | ReportError(CE_Failure, CPLE_AppDefined, "%s is a directory", |
289 | 0 | aosFiles[i].c_str()); |
290 | 0 | return false; |
291 | 0 | } |
292 | | |
293 | 0 | std::string osArchiveFilename(aosFiles[i]); |
294 | 0 | if (m_noDirName) |
295 | 0 | { |
296 | 0 | osArchiveFilename = CPLGetFilename(aosFiles[i].c_str()); |
297 | 0 | } |
298 | 0 | else if (!osRemovePrefix.empty() && |
299 | 0 | STARTS_WITH(osArchiveFilename.c_str(), osRemovePrefix.c_str())) |
300 | 0 | { |
301 | 0 | osArchiveFilename = osArchiveFilename.substr(osRemovePrefix.size()); |
302 | 0 | } |
303 | 0 | else if (osArchiveFilename[0] == '/') |
304 | 0 | { |
305 | 0 | osArchiveFilename = osArchiveFilename.substr(1); |
306 | 0 | } |
307 | 0 | else if (osArchiveFilename.size() > 3 && osArchiveFilename[1] == ':' && |
308 | 0 | (osArchiveFilename[2] == '/' || osArchiveFilename[2] == '\\')) |
309 | 0 | { |
310 | 0 | osArchiveFilename = osArchiveFilename.substr(3); |
311 | 0 | } |
312 | |
|
313 | 0 | std::unique_ptr<void, decltype(&GDALDestroyScaledProgress)> |
314 | 0 | pScaledProgress(nullptr, GDALDestroyScaledProgress); |
315 | 0 | if (nTotalSize != 0) |
316 | 0 | { |
317 | 0 | pScaledProgress.reset(GDALCreateScaledProgress( |
318 | 0 | double(nCurSize) / nTotalSize, |
319 | 0 | double(nCurSize + anFileSizes[i]) / nTotalSize, pfnProgress, |
320 | 0 | pProgressData)); |
321 | 0 | nCurSize += anFileSizes[i]; |
322 | 0 | } |
323 | |
|
324 | 0 | const CPLErr eErr = CPLAddFileInZip( |
325 | 0 | hZIP.get(), osArchiveFilename.c_str(), aosFiles[i].c_str(), nullptr, |
326 | 0 | aosOptions.List(), pScaledProgress ? GDALScaledProgress : nullptr, |
327 | 0 | pScaledProgress.get()); |
328 | 0 | if (eErr != CE_None) |
329 | 0 | { |
330 | 0 | ReportError(CE_Failure, CPLE_AppDefined, "Failed adding %s", |
331 | 0 | aosFiles[i].c_str()); |
332 | 0 | return false; |
333 | 0 | } |
334 | 0 | } |
335 | | |
336 | 0 | return true; |
337 | 0 | } |
338 | | |
339 | | /************************************************************************/ |
340 | | /* GDALVSISOZIPCreateAlgorithm */ |
341 | | /************************************************************************/ |
342 | | |
343 | | class GDALVSISOZIPCreateAlgorithm final : public GDALVSISOZIPCreateBaseAlgorithm |
344 | | { |
345 | | public: |
346 | | static constexpr const char *NAME = "create"; |
347 | | static constexpr const char *DESCRIPTION = |
348 | | "Create a Seek-optimized ZIP (SOZIP) file."; |
349 | | static constexpr const char *HELP_URL = "/programs/gdal_vsi_sozip.html"; |
350 | | |
351 | | GDALVSISOZIPCreateAlgorithm() |
352 | 0 | : GDALVSISOZIPCreateBaseAlgorithm(NAME, DESCRIPTION, HELP_URL, false) |
353 | 0 | { |
354 | 0 | } |
355 | | |
356 | | ~GDALVSISOZIPCreateAlgorithm() override; |
357 | | }; |
358 | | |
359 | 0 | GDALVSISOZIPCreateAlgorithm::~GDALVSISOZIPCreateAlgorithm() = default; |
360 | | |
361 | | /************************************************************************/ |
362 | | /* GDALVSISOZIPOptimizeAlgorithm */ |
363 | | /************************************************************************/ |
364 | | |
365 | | class GDALVSISOZIPOptimizeAlgorithm final |
366 | | : public GDALVSISOZIPCreateBaseAlgorithm |
367 | | { |
368 | | public: |
369 | | static constexpr const char *NAME = "optimize"; |
370 | | static constexpr const char *DESCRIPTION = |
371 | | "Create a Seek-optimized ZIP (SOZIP) file from a regular ZIP file."; |
372 | | static constexpr const char *HELP_URL = "/programs/gdal_vsi_sozip.html"; |
373 | | |
374 | | GDALVSISOZIPOptimizeAlgorithm() |
375 | 0 | : GDALVSISOZIPCreateBaseAlgorithm(NAME, DESCRIPTION, HELP_URL, true) |
376 | 0 | { |
377 | 0 | } |
378 | | |
379 | | ~GDALVSISOZIPOptimizeAlgorithm() override; |
380 | | }; |
381 | | |
382 | 0 | GDALVSISOZIPOptimizeAlgorithm::~GDALVSISOZIPOptimizeAlgorithm() = default; |
383 | | |
384 | | /************************************************************************/ |
385 | | /* GDALVSISOZIPListAlgorithm */ |
386 | | /************************************************************************/ |
387 | | |
388 | | class GDALVSISOZIPListAlgorithm final : public GDALAlgorithm |
389 | | { |
390 | | public: |
391 | | static constexpr const char *NAME = "list"; |
392 | | static constexpr const char *DESCRIPTION = |
393 | | "List content of a ZIP file, with SOZIP related information."; |
394 | | static constexpr const char *HELP_URL = "/programs/gdal_vsi_sozip.html"; |
395 | | |
396 | 0 | GDALVSISOZIPListAlgorithm() : GDALAlgorithm(NAME, DESCRIPTION, HELP_URL) |
397 | 0 | { |
398 | 0 | AddArg("input", 'i', _("Input ZIP filename"), &m_zipFilename) |
399 | 0 | .SetRequired() |
400 | 0 | .SetPositional(); |
401 | 0 | AddOutputStringArg(&m_output); |
402 | 0 | } |
403 | | |
404 | | private: |
405 | | std::string m_zipFilename{}; |
406 | | std::string m_output{}; |
407 | | |
408 | | bool RunImpl(GDALProgressFunc, void *) override; |
409 | | }; |
410 | | |
411 | | /************************************************************************/ |
412 | | /* GDALVSISOZIPListAlgorithm::RunImpl() */ |
413 | | /************************************************************************/ |
414 | | |
415 | | bool GDALVSISOZIPListAlgorithm::RunImpl(GDALProgressFunc, void *) |
416 | 0 | { |
417 | 0 | std::unique_ptr<VSIDIR, decltype(&VSICloseDir)> psDir( |
418 | 0 | VSIOpenDir(std::string("/vsizip/").append(m_zipFilename).c_str(), -1, |
419 | 0 | nullptr), |
420 | 0 | VSICloseDir); |
421 | 0 | if (!psDir) |
422 | 0 | { |
423 | 0 | ReportError(CE_Failure, CPLE_AppDefined, "%s is not a valid .zip file", |
424 | 0 | m_zipFilename.c_str()); |
425 | 0 | return false; |
426 | 0 | } |
427 | | |
428 | 0 | m_output = " Length DateTime Seek-optimized / chunk size " |
429 | 0 | "Name Properties\n"; |
430 | | /* clang-format off */ |
431 | 0 | m_output += "----------- ------------------- --------------------------- ----------------- --------------\n"; |
432 | | /* clang-format on */ |
433 | |
|
434 | 0 | while (const auto psEntry = VSIGetNextDirEntry(psDir.get())) |
435 | 0 | { |
436 | 0 | if (!VSI_ISDIR(psEntry->nMode)) |
437 | 0 | { |
438 | 0 | struct tm brokenDown; |
439 | 0 | CPLUnixTimeToYMDHMS(psEntry->nMTime, &brokenDown); |
440 | 0 | const std::string osFilename = std::string("/vsizip/{") |
441 | 0 | .append(m_zipFilename) |
442 | 0 | .append("}/") |
443 | 0 | .append(psEntry->pszName); |
444 | 0 | std::string osProperties; |
445 | 0 | const CPLStringList aosMDGeneric( |
446 | 0 | VSIGetFileMetadata(osFilename.c_str(), nullptr, nullptr)); |
447 | 0 | for (const char *pszMDGeneric : aosMDGeneric) |
448 | 0 | { |
449 | 0 | if (!osProperties.empty()) |
450 | 0 | osProperties += ','; |
451 | 0 | osProperties += pszMDGeneric; |
452 | 0 | } |
453 | |
|
454 | 0 | const CPLStringList aosMD( |
455 | 0 | VSIGetFileMetadata(osFilename.c_str(), "ZIP", nullptr)); |
456 | 0 | const bool bSeekOptimized = |
457 | 0 | aosMD.FetchNameValue("SOZIP_VALID") != nullptr; |
458 | 0 | const char *pszChunkSize = aosMD.FetchNameValue("SOZIP_CHUNK_SIZE"); |
459 | 0 | m_output += CPLSPrintf( |
460 | 0 | "%11" CPL_FRMT_GB_WITHOUT_PREFIX |
461 | 0 | "u %04d-%02d-%02d %02d:%02d:%02d %s %s " |
462 | 0 | "%s\n", |
463 | 0 | static_cast<GUIntBig>(psEntry->nSize), |
464 | 0 | brokenDown.tm_year + 1900, brokenDown.tm_mon + 1, |
465 | 0 | brokenDown.tm_mday, brokenDown.tm_hour, brokenDown.tm_min, |
466 | 0 | brokenDown.tm_sec, |
467 | 0 | bSeekOptimized |
468 | 0 | ? CPLSPrintf(" yes (%9s bytes) ", pszChunkSize) |
469 | 0 | : " ", |
470 | 0 | psEntry->pszName, osProperties.c_str()); |
471 | 0 | } |
472 | 0 | } |
473 | 0 | return true; |
474 | 0 | } |
475 | | |
476 | | /************************************************************************/ |
477 | | /* GDALVSISOZIPValidateAlgorithm */ |
478 | | /************************************************************************/ |
479 | | |
480 | | class GDALVSISOZIPValidateAlgorithm final : public GDALAlgorithm |
481 | | { |
482 | | public: |
483 | | static constexpr const char *NAME = "validate"; |
484 | | static constexpr const char *DESCRIPTION = |
485 | | "Validate a ZIP file, possibly using SOZIP optimization."; |
486 | | static constexpr const char *HELP_URL = "/programs/gdal_vsi_sozip.html"; |
487 | | |
488 | 0 | GDALVSISOZIPValidateAlgorithm() : GDALAlgorithm(NAME, DESCRIPTION, HELP_URL) |
489 | 0 | { |
490 | 0 | AddArg("input", 'i', _("Input ZIP filename"), &m_zipFilename) |
491 | 0 | .SetRequired() |
492 | 0 | .SetPositional(); |
493 | 0 | AddOutputStringArg(&m_output); |
494 | 0 | AddArg("verbose", 'v', _("Turn on verbose mode"), &m_verbose) |
495 | 0 | .SetHiddenForAPI(); |
496 | 0 | AddStdoutArg(&m_stdout); |
497 | 0 | } |
498 | | |
499 | | private: |
500 | | std::string m_zipFilename{}; |
501 | | std::string m_output{}; |
502 | | bool m_stdout = false; |
503 | | bool m_verbose = false; |
504 | | |
505 | | bool RunImpl(GDALProgressFunc, void *) override; |
506 | | |
507 | | void Output(const std::string &s) |
508 | 0 | { |
509 | 0 | if (!m_quiet) |
510 | 0 | { |
511 | 0 | if (m_stdout) |
512 | 0 | printf("%s", s.c_str()); |
513 | 0 | else |
514 | 0 | m_output += s; |
515 | 0 | } |
516 | 0 | } |
517 | | }; |
518 | | |
519 | | /************************************************************************/ |
520 | | /* GDALVSISOZIPValidateAlgorithm::RunImpl() */ |
521 | | /************************************************************************/ |
522 | | |
523 | | bool GDALVSISOZIPValidateAlgorithm::RunImpl(GDALProgressFunc, void *) |
524 | 0 | { |
525 | 0 | std::unique_ptr<VSIDIR, decltype(&VSICloseDir)> psDir( |
526 | 0 | VSIOpenDir(std::string("/vsizip/").append(m_zipFilename).c_str(), -1, |
527 | 0 | nullptr), |
528 | 0 | VSICloseDir); |
529 | 0 | if (!psDir) |
530 | 0 | { |
531 | 0 | ReportError(CE_Failure, CPLE_AppDefined, "%s is not a valid .zip file", |
532 | 0 | m_zipFilename.c_str()); |
533 | 0 | return false; |
534 | 0 | } |
535 | | |
536 | 0 | int nCountValidSOZIP = 0; |
537 | 0 | bool ret = true; |
538 | 0 | const bool bVerbose = m_verbose; |
539 | 0 | while (const auto psEntry = VSIGetNextDirEntry(psDir.get())) |
540 | 0 | { |
541 | 0 | if (!VSI_ISDIR(psEntry->nMode)) |
542 | 0 | { |
543 | 0 | const std::string osFilenameInZip = std::string("/vsizip/{") |
544 | 0 | .append(m_zipFilename) |
545 | 0 | .append("}/") |
546 | 0 | .append(psEntry->pszName); |
547 | 0 | if (bVerbose) |
548 | 0 | Output(CPLSPrintf("Testing %s...\n", psEntry->pszName)); |
549 | |
|
550 | 0 | const CPLStringList aosMD( |
551 | 0 | VSIGetFileMetadata(osFilenameInZip.c_str(), "ZIP", nullptr)); |
552 | 0 | bool bSeekOptimizedFound = |
553 | 0 | aosMD.FetchNameValue("SOZIP_FOUND") != nullptr; |
554 | 0 | bool bSeekOptimizedValid = |
555 | 0 | aosMD.FetchNameValue("SOZIP_VALID") != nullptr; |
556 | 0 | const char *pszChunkSize = aosMD.FetchNameValue("SOZIP_CHUNK_SIZE"); |
557 | 0 | if (bSeekOptimizedValid) |
558 | 0 | { |
559 | 0 | if (bVerbose) |
560 | 0 | { |
561 | 0 | Output( |
562 | 0 | CPLSPrintf(" %s has an associated .sozip.idx file\n", |
563 | 0 | psEntry->pszName)); |
564 | 0 | } |
565 | |
|
566 | 0 | const char *pszStartIdxDataOffset = |
567 | 0 | aosMD.FetchNameValue("SOZIP_START_DATA_OFFSET"); |
568 | 0 | const vsi_l_offset nStartIdxOffset = |
569 | 0 | std::strtoull(pszStartIdxDataOffset, nullptr, 10); |
570 | 0 | VSILFILE *fpRaw = VSIFOpenL(m_zipFilename.c_str(), "rb"); |
571 | 0 | CPLAssert(fpRaw); |
572 | | |
573 | 0 | if (VSIFSeekL(fpRaw, nStartIdxOffset + 4, SEEK_SET) != 0) |
574 | 0 | { |
575 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
576 | 0 | "VSIFSeekL() failed."); |
577 | 0 | ret = false; |
578 | 0 | } |
579 | 0 | uint32_t nToSkip = 0; |
580 | 0 | if (VSIFReadL(&nToSkip, sizeof(nToSkip), 1, fpRaw) != 1) |
581 | 0 | { |
582 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
583 | 0 | "VSIFReadL() failed."); |
584 | 0 | ret = false; |
585 | 0 | } |
586 | 0 | CPL_LSBPTR32(&nToSkip); |
587 | |
|
588 | 0 | if (VSIFSeekL(fpRaw, nStartIdxOffset + 32 + nToSkip, |
589 | 0 | SEEK_SET) != 0) |
590 | 0 | { |
591 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
592 | 0 | "VSIFSeekL() failed."); |
593 | 0 | ret = false; |
594 | 0 | } |
595 | 0 | const int nChunkSize = atoi(pszChunkSize); |
596 | 0 | const uint64_t nCompressedSize = std::strtoull( |
597 | 0 | aosMD.FetchNameValue("COMPRESSED_SIZE"), nullptr, 10); |
598 | 0 | const uint64_t nUncompressedSize = std::strtoull( |
599 | 0 | aosMD.FetchNameValue("UNCOMPRESSED_SIZE"), nullptr, 10); |
600 | 0 | if (nChunkSize == 0 || // cannot happen |
601 | 0 | (nUncompressedSize - 1) / nChunkSize > |
602 | 0 | static_cast<uint64_t>(std::numeric_limits<int>::max())) |
603 | 0 | { |
604 | 0 | ReportError( |
605 | 0 | CE_Failure, CPLE_AppDefined, |
606 | 0 | "* File %s has a SOZip index, but (nUncompressedSize - " |
607 | 0 | "1) / nChunkSize > INT_MAX !", |
608 | 0 | psEntry->pszName); |
609 | 0 | ret = false; |
610 | 0 | continue; |
611 | 0 | } |
612 | | |
613 | 0 | int nChunksItems = |
614 | 0 | static_cast<int>((nUncompressedSize - 1) / nChunkSize); |
615 | |
|
616 | 0 | if (bVerbose) |
617 | 0 | { |
618 | 0 | Output(CPLSPrintf(" %s: checking index offset values...\n", |
619 | 0 | psEntry->pszName)); |
620 | 0 | } |
621 | |
|
622 | 0 | std::vector<uint64_t> anOffsets; |
623 | 0 | try |
624 | 0 | { |
625 | 0 | anOffsets.reserve(nChunksItems); |
626 | 0 | } |
627 | 0 | catch (const std::exception &) |
628 | 0 | { |
629 | 0 | nChunksItems = 0; |
630 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
631 | 0 | "Cannot allocate memory for chunk offsets."); |
632 | 0 | ret = false; |
633 | 0 | } |
634 | |
|
635 | 0 | for (int i = 0; i < nChunksItems; ++i) |
636 | 0 | { |
637 | 0 | uint64_t nOffset64 = 0; |
638 | 0 | if (VSIFReadL(&nOffset64, sizeof(nOffset64), 1, fpRaw) != 1) |
639 | 0 | { |
640 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
641 | 0 | "VSIFReadL() failed."); |
642 | 0 | ret = false; |
643 | 0 | } |
644 | 0 | CPL_LSBPTR64(&nOffset64); |
645 | 0 | if (nOffset64 >= nCompressedSize) |
646 | 0 | { |
647 | 0 | bSeekOptimizedValid = false; |
648 | 0 | ReportError( |
649 | 0 | CE_Failure, CPLE_AppDefined, |
650 | 0 | "Error: file %s, offset[%d] (= " CPL_FRMT_GUIB |
651 | 0 | ") >= compressed_size is invalid.", |
652 | 0 | psEntry->pszName, i, |
653 | 0 | static_cast<GUIntBig>(nOffset64)); |
654 | 0 | } |
655 | 0 | if (!anOffsets.empty()) |
656 | 0 | { |
657 | 0 | const auto nPrevOffset = anOffsets.back(); |
658 | 0 | if (nOffset64 <= nPrevOffset) |
659 | 0 | { |
660 | 0 | bSeekOptimizedValid = false; |
661 | 0 | ReportError( |
662 | 0 | CE_Failure, CPLE_AppDefined, |
663 | 0 | "Error: file %s, offset[%d] (= " CPL_FRMT_GUIB |
664 | 0 | ") <= offset[%d] (= " CPL_FRMT_GUIB ")", |
665 | 0 | psEntry->pszName, i + 1, |
666 | 0 | static_cast<GUIntBig>(nOffset64), i, |
667 | 0 | static_cast<GUIntBig>(nPrevOffset)); |
668 | 0 | } |
669 | 0 | } |
670 | 0 | else if (nOffset64 < 9) |
671 | 0 | { |
672 | 0 | bSeekOptimizedValid = false; |
673 | 0 | ReportError( |
674 | 0 | CE_Failure, CPLE_AppDefined, |
675 | 0 | "Error: file %s, offset[0] (= " CPL_FRMT_GUIB |
676 | 0 | ") is invalid.", |
677 | 0 | psEntry->pszName, static_cast<GUIntBig>(nOffset64)); |
678 | 0 | } |
679 | 0 | anOffsets.push_back(nOffset64); |
680 | 0 | } |
681 | |
|
682 | 0 | if (bVerbose) |
683 | 0 | { |
684 | 0 | Output(CPLSPrintf(" %s: checking if chunks can be " |
685 | 0 | "independently decompressed...\n", |
686 | 0 | psEntry->pszName)); |
687 | 0 | } |
688 | |
|
689 | 0 | const char *pszStartDataOffset = |
690 | 0 | aosMD.FetchNameValue("START_DATA_OFFSET"); |
691 | 0 | const vsi_l_offset nStartOffset = |
692 | 0 | std::strtoull(pszStartDataOffset, nullptr, 10); |
693 | 0 | VSILFILE *fp = VSIFOpenL(osFilenameInZip.c_str(), "rb"); |
694 | 0 | if (!fp) |
695 | 0 | { |
696 | 0 | bSeekOptimizedValid = false; |
697 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
698 | 0 | "Error: cannot open %s", |
699 | 0 | osFilenameInZip.c_str()); |
700 | 0 | } |
701 | 0 | std::vector<GByte> abyData; |
702 | 0 | try |
703 | 0 | { |
704 | 0 | abyData.resize(nChunkSize); |
705 | 0 | } |
706 | 0 | catch (const std::exception &) |
707 | 0 | { |
708 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
709 | 0 | "Cannot allocate memory for chunk data."); |
710 | 0 | ret = false; |
711 | 0 | } |
712 | 0 | for (int i = 0; fp != nullptr && i < nChunksItems; ++i) |
713 | 0 | { |
714 | 0 | if (VSIFSeekL(fpRaw, nStartOffset + anOffsets[i] - 9, |
715 | 0 | SEEK_SET) != 0) |
716 | 0 | { |
717 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
718 | 0 | "VSIFSeekL() failed."); |
719 | 0 | ret = false; |
720 | 0 | } |
721 | 0 | GByte abyEnd[9] = {0}; |
722 | 0 | if (VSIFReadL(abyEnd, 9, 1, fpRaw) != 1) |
723 | 0 | { |
724 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
725 | 0 | "VSIFReadL() failed."); |
726 | 0 | ret = false; |
727 | 0 | } |
728 | 0 | if (memcmp(abyEnd, "\x00\x00\xFF\xFF\x00\x00\x00\xFF\xFF", |
729 | 0 | 9) != 0) |
730 | 0 | { |
731 | 0 | bSeekOptimizedValid = false; |
732 | 0 | ReportError( |
733 | 0 | CE_Failure, CPLE_AppDefined, |
734 | 0 | "Error: file %s, chunk[%d] is not terminated by " |
735 | 0 | "\\x00\\x00\\xFF\\xFF\\x00\\x00\\x00\\xFF\\xFF.", |
736 | 0 | psEntry->pszName, i); |
737 | 0 | } |
738 | 0 | if (!abyData.empty()) |
739 | 0 | { |
740 | 0 | if (VSIFSeekL(fp, |
741 | 0 | static_cast<vsi_l_offset>(i) * nChunkSize, |
742 | 0 | SEEK_SET) != 0) |
743 | 0 | { |
744 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
745 | 0 | "VSIFSeekL() failed."); |
746 | 0 | ret = false; |
747 | 0 | } |
748 | 0 | const size_t nRead = |
749 | 0 | VSIFReadL(&abyData[0], 1, nChunkSize, fp); |
750 | 0 | if (nRead != static_cast<size_t>(nChunkSize)) |
751 | 0 | { |
752 | 0 | bSeekOptimizedValid = false; |
753 | 0 | ReportError( |
754 | 0 | CE_Failure, CPLE_AppDefined, |
755 | 0 | "Error: file %s, chunk[%d] cannot be fully " |
756 | 0 | "read.", |
757 | 0 | psEntry->pszName, i); |
758 | 0 | } |
759 | 0 | } |
760 | 0 | } |
761 | |
|
762 | 0 | if (fp) |
763 | 0 | { |
764 | 0 | if (VSIFSeekL(fp, |
765 | 0 | static_cast<vsi_l_offset>(nChunksItems) * |
766 | 0 | nChunkSize, |
767 | 0 | SEEK_SET) != 0) |
768 | 0 | { |
769 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
770 | 0 | "VSIFSeekL() failed."); |
771 | 0 | ret = false; |
772 | 0 | } |
773 | 0 | const size_t nRead = |
774 | 0 | VSIFReadL(&abyData[0], 1, nChunkSize, fp); |
775 | 0 | if (nRead != static_cast<size_t>( |
776 | 0 | nUncompressedSize - |
777 | 0 | static_cast<vsi_l_offset>(nChunksItems) * |
778 | 0 | nChunkSize)) |
779 | 0 | { |
780 | 0 | bSeekOptimizedValid = false; |
781 | 0 | ReportError( |
782 | 0 | CE_Failure, CPLE_AppDefined, |
783 | 0 | "Error: file %s, chunk[%d] cannot be fully read.", |
784 | 0 | psEntry->pszName, nChunksItems); |
785 | 0 | } |
786 | |
|
787 | 0 | VSIFCloseL(fp); |
788 | 0 | } |
789 | |
|
790 | 0 | VSIFCloseL(fpRaw); |
791 | 0 | } |
792 | | |
793 | 0 | if (bSeekOptimizedValid) |
794 | 0 | { |
795 | 0 | Output(CPLSPrintf( |
796 | 0 | "* File %s has a valid SOZip index, using chunk_size = " |
797 | 0 | "%s.\n", |
798 | 0 | psEntry->pszName, pszChunkSize)); |
799 | 0 | nCountValidSOZIP++; |
800 | 0 | } |
801 | 0 | else if (bSeekOptimizedFound) |
802 | 0 | { |
803 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
804 | 0 | "* File %s has a SOZip index, but is is invalid!", |
805 | 0 | psEntry->pszName); |
806 | 0 | ret = false; |
807 | 0 | } |
808 | 0 | } |
809 | 0 | } |
810 | | |
811 | 0 | if (ret) |
812 | 0 | { |
813 | 0 | if (nCountValidSOZIP > 0) |
814 | 0 | { |
815 | 0 | Output("-----\n"); |
816 | 0 | Output(CPLSPrintf( |
817 | 0 | "%s is a valid .zip file, and contains %d SOZip-enabled " |
818 | 0 | "file(s).\n", |
819 | 0 | m_zipFilename.c_str(), nCountValidSOZIP)); |
820 | 0 | } |
821 | 0 | else |
822 | 0 | Output( |
823 | 0 | CPLSPrintf("%s is a valid .zip file, but does not contain any " |
824 | 0 | "SOZip-enabled files.\n", |
825 | 0 | m_zipFilename.c_str())); |
826 | 0 | } |
827 | 0 | else |
828 | 0 | { |
829 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
830 | 0 | "%s is not a valid SOZip file!", m_zipFilename.c_str()); |
831 | 0 | } |
832 | 0 | return ret; |
833 | 0 | } |
834 | | |
835 | | /************************************************************************/ |
836 | | /* GDALVSISOZIPAlgorithm::GDALVSISOZIPAlgorithm() */ |
837 | | /************************************************************************/ |
838 | | |
839 | | GDALVSISOZIPAlgorithm::GDALVSISOZIPAlgorithm() |
840 | 0 | : GDALAlgorithm(NAME, DESCRIPTION, HELP_URL) |
841 | 0 | { |
842 | 0 | RegisterSubAlgorithm<GDALVSISOZIPCreateAlgorithm>(); |
843 | 0 | RegisterSubAlgorithm<GDALVSISOZIPOptimizeAlgorithm>(); |
844 | 0 | RegisterSubAlgorithm<GDALVSISOZIPListAlgorithm>(); |
845 | 0 | RegisterSubAlgorithm<GDALVSISOZIPValidateAlgorithm>(); |
846 | 0 | } |
847 | | |
848 | | /************************************************************************/ |
849 | | /* GDALVSISOZIPAlgorithm::RunImpl() */ |
850 | | /************************************************************************/ |
851 | | |
852 | | bool GDALVSISOZIPAlgorithm::RunImpl(GDALProgressFunc, void *) |
853 | 0 | { |
854 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
855 | 0 | "The Run() method should not be called directly on the \"gdal " |
856 | 0 | "sozip\" program."); |
857 | 0 | return false; |
858 | 0 | } |
859 | | |
860 | | //! @endcond |