/src/gdal/apps/gdalalg_dataset_identify.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: GDAL |
4 | | * Purpose: gdal "dataset identify" subcommand |
5 | | * Author: Even Rouault <even dot rouault at spatialys.com> |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2025, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | //! @cond Doxygen_Suppress |
14 | | |
15 | | #include "gdalalg_dataset_identify.h" |
16 | | |
17 | | #include "cpl_string.h" |
18 | | #include "gdal_dataset.h" |
19 | | #include "gdal_driver.h" |
20 | | #include "gdal_drivermanager.h" |
21 | | #include "gdal_rasterband.h" |
22 | | #include "ogrsf_frmts.h" |
23 | | |
24 | | #ifndef _ |
25 | 0 | #define _(x) (x) |
26 | | #endif |
27 | | |
28 | | /************************************************************************/ |
29 | | /* GDALDatasetIdentifyAlgorithm() */ |
30 | | /************************************************************************/ |
31 | | |
32 | | GDALDatasetIdentifyAlgorithm::GDALDatasetIdentifyAlgorithm() |
33 | 0 | : GDALAlgorithm(NAME, DESCRIPTION, HELP_URL), m_oWriter(JSONPrint, this) |
34 | 0 | { |
35 | 0 | AddProgressArg(); |
36 | |
|
37 | 0 | auto &arg = AddArg("filename", 0, _("File or directory name"), &m_filename) |
38 | 0 | .AddAlias(GDAL_ARG_NAME_INPUT) |
39 | 0 | .SetPositional() |
40 | 0 | .SetRequired(); |
41 | 0 | SetAutoCompleteFunctionForFilename(arg, 0); |
42 | |
|
43 | 0 | AddOutputDatasetArg(&m_outputDataset, GDAL_OF_VECTOR, |
44 | 0 | /* positionalAndRequired = */ false) |
45 | 0 | .SetDatasetInputFlags(GADV_NAME); |
46 | |
|
47 | 0 | AddOutputFormatArg(&m_format) |
48 | 0 | .AddMetadataItem(GAAMDI_REQUIRED_CAPABILITIES, |
49 | 0 | {GDAL_DCAP_VECTOR, GDAL_DCAP_CREATE}) |
50 | 0 | .AddMetadataItem(GAAMDI_EXTRA_FORMATS, {"json", "text"}); |
51 | |
|
52 | 0 | AddCreationOptionsArg(&m_creationOptions); |
53 | 0 | AddLayerCreationOptionsArg(&m_layerCreationOptions); |
54 | 0 | AddArg(GDAL_ARG_NAME_OUTPUT_LAYER, 'l', _("Output layer name"), |
55 | 0 | &m_outputLayerName); |
56 | 0 | AddOverwriteArg(&m_overwrite); |
57 | |
|
58 | 0 | AddArg("recursive", 'r', _("Recursively scan files/folders for datasets"), |
59 | 0 | &m_recursive); |
60 | |
|
61 | 0 | AddArg("force-recursive", 0, |
62 | 0 | _("Recursively scan folders for datasets, forcing " |
63 | 0 | "recursion in folders recognized as valid formats"), |
64 | 0 | &m_forceRecursive); |
65 | |
|
66 | 0 | AddArg("detailed", 0, |
67 | 0 | _("Most detailed output. Reports the presence of georeferencing, " |
68 | 0 | "if a GeoTIFF file is cloud optimized, etc."), |
69 | 0 | &m_detailed); |
70 | |
|
71 | 0 | AddArg("report-failures", 0, |
72 | 0 | _("Report failures if file type is unidentified"), |
73 | 0 | &m_reportFailures); |
74 | |
|
75 | 0 | AddOutputStringArg(&m_output); |
76 | 0 | AddStdoutArg(&m_stdout); |
77 | 0 | } |
78 | | |
79 | | /************************************************************************/ |
80 | | /* ~GDALDatasetIdentifyAlgorithm() */ |
81 | | /************************************************************************/ |
82 | | |
83 | 0 | GDALDatasetIdentifyAlgorithm::~GDALDatasetIdentifyAlgorithm() = default; |
84 | | |
85 | | /************************************************************************/ |
86 | | /* GDALDatasetIdentifyAlgorithm::Print() */ |
87 | | /************************************************************************/ |
88 | | |
89 | | void GDALDatasetIdentifyAlgorithm::Print(const char *str) |
90 | 0 | { |
91 | 0 | if (m_fpOut) |
92 | 0 | m_fpOut->Write(str, 1, strlen(str)); |
93 | 0 | else if (m_stdout) |
94 | 0 | fwrite(str, 1, strlen(str), stdout); |
95 | 0 | else |
96 | 0 | m_output += str; |
97 | 0 | } |
98 | | |
99 | | /************************************************************************/ |
100 | | /* GDALDatasetIdentifyAlgorithm::JSONPrint() */ |
101 | | /************************************************************************/ |
102 | | |
103 | | /* static */ void GDALDatasetIdentifyAlgorithm::JSONPrint(const char *pszTxt, |
104 | | void *pUserData) |
105 | 0 | { |
106 | 0 | static_cast<GDALDatasetIdentifyAlgorithm *>(pUserData)->Print(pszTxt); |
107 | 0 | } |
108 | | |
109 | | /************************************************************************/ |
110 | | /* Process() */ |
111 | | /************************************************************************/ |
112 | | |
113 | | bool GDALDatasetIdentifyAlgorithm::Process(const char *pszTarget, |
114 | | CSLConstList papszSiblingList, |
115 | | GDALProgressFunc pfnProgress, |
116 | | void *pProgressData) |
117 | | |
118 | 0 | { |
119 | 0 | if (IsCalledFromCommandLine()) |
120 | 0 | pfnProgress = nullptr; |
121 | |
|
122 | 0 | if (m_format.empty()) |
123 | 0 | m_format = IsCalledFromCommandLine() ? "text" : "json"; |
124 | |
|
125 | 0 | GDALDriverH hDriver = nullptr; |
126 | 0 | { |
127 | 0 | CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler); |
128 | 0 | hDriver = GDALIdentifyDriver(pszTarget, papszSiblingList); |
129 | 0 | } |
130 | |
|
131 | 0 | const char *pszDriverName = hDriver ? GDALGetDriverShortName(hDriver) : ""; |
132 | |
|
133 | 0 | CPLStringList aosFileList; |
134 | 0 | std::string osLayout; |
135 | 0 | bool bHasCRS = false; |
136 | 0 | bool bHasGeoTransform = false; |
137 | 0 | bool bHasOverview = false; |
138 | 0 | if (hDriver && m_detailed) |
139 | 0 | { |
140 | 0 | CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler); |
141 | 0 | const char *const apszDriver[] = {pszDriverName, nullptr}; |
142 | 0 | auto poDS = std::unique_ptr<GDALDataset>(GDALDataset::Open( |
143 | 0 | pszTarget, 0, apszDriver, nullptr, papszSiblingList)); |
144 | 0 | if (poDS) |
145 | 0 | { |
146 | 0 | if (EQUAL(pszDriverName, "GTiff")) |
147 | 0 | { |
148 | 0 | if (const char *pszLayout = |
149 | 0 | poDS->GetMetadataItem("LAYOUT", "IMAGE_STRUCTURE")) |
150 | 0 | { |
151 | 0 | osLayout = pszLayout; |
152 | 0 | } |
153 | 0 | } |
154 | |
|
155 | 0 | aosFileList.Assign(poDS->GetFileList(), |
156 | 0 | /* bTakeOwnership = */ true); |
157 | 0 | bHasCRS = poDS->GetSpatialRef() != nullptr; |
158 | 0 | GDALGeoTransform gt; |
159 | 0 | bHasGeoTransform = poDS->GetGeoTransform(gt) == CE_None; |
160 | 0 | bHasOverview = (poDS->GetRasterCount() && |
161 | 0 | poDS->GetRasterBand(1)->GetOverviewCount() > 0); |
162 | 0 | } |
163 | 0 | } |
164 | |
|
165 | 0 | if (m_poLayer) |
166 | 0 | { |
167 | 0 | OGRFeature oFeature(m_poLayer->GetLayerDefn()); |
168 | 0 | oFeature.SetField("filename", pszTarget); |
169 | 0 | if (hDriver) |
170 | 0 | { |
171 | 0 | oFeature.SetField("driver", pszDriverName); |
172 | |
|
173 | 0 | if (m_detailed) |
174 | 0 | { |
175 | 0 | if (!osLayout.empty()) |
176 | 0 | oFeature.SetField("layout", osLayout.c_str()); |
177 | |
|
178 | 0 | if (!aosFileList.empty()) |
179 | 0 | { |
180 | 0 | oFeature.SetField("file_list", aosFileList.List()); |
181 | 0 | } |
182 | |
|
183 | 0 | oFeature.SetField("has_crs", bHasCRS); |
184 | 0 | oFeature.SetField("has_geotransform", bHasGeoTransform); |
185 | 0 | oFeature.SetField("has_overview", bHasOverview); |
186 | 0 | } |
187 | |
|
188 | 0 | if (m_poLayer->CreateFeature(&oFeature) != OGRERR_NONE) |
189 | 0 | return false; |
190 | 0 | } |
191 | 0 | else if (m_reportFailures) |
192 | 0 | { |
193 | 0 | if (m_poLayer->CreateFeature(&oFeature) != OGRERR_NONE) |
194 | 0 | return false; |
195 | 0 | } |
196 | 0 | } |
197 | 0 | else if (m_format == "json") |
198 | 0 | { |
199 | 0 | if (hDriver) |
200 | 0 | { |
201 | 0 | m_oWriter.StartObj(); |
202 | 0 | m_oWriter.AddObjKey("name"); |
203 | 0 | m_oWriter.Add(pszTarget); |
204 | 0 | m_oWriter.AddObjKey("driver"); |
205 | 0 | m_oWriter.Add(GDALGetDriverShortName(hDriver)); |
206 | 0 | if (m_detailed) |
207 | 0 | { |
208 | 0 | if (!osLayout.empty()) |
209 | 0 | { |
210 | 0 | m_oWriter.AddObjKey("layout"); |
211 | 0 | m_oWriter.Add(osLayout); |
212 | 0 | } |
213 | |
|
214 | 0 | if (!aosFileList.empty()) |
215 | 0 | { |
216 | 0 | m_oWriter.AddObjKey("file_list"); |
217 | 0 | m_oWriter.StartArray(); |
218 | 0 | for (const char *pszFilename : aosFileList) |
219 | 0 | { |
220 | 0 | m_oWriter.Add(pszFilename); |
221 | 0 | } |
222 | 0 | m_oWriter.EndArray(); |
223 | 0 | } |
224 | |
|
225 | 0 | if (bHasCRS) |
226 | 0 | { |
227 | 0 | m_oWriter.AddObjKey("has_crs"); |
228 | 0 | m_oWriter.Add(true); |
229 | 0 | } |
230 | |
|
231 | 0 | if (bHasGeoTransform) |
232 | 0 | { |
233 | 0 | m_oWriter.AddObjKey("has_geotransform"); |
234 | 0 | m_oWriter.Add(true); |
235 | 0 | } |
236 | |
|
237 | 0 | if (bHasOverview) |
238 | 0 | { |
239 | 0 | m_oWriter.AddObjKey("has_overview"); |
240 | 0 | m_oWriter.Add(true); |
241 | 0 | } |
242 | 0 | } |
243 | 0 | m_oWriter.EndObj(); |
244 | 0 | } |
245 | 0 | else if (m_reportFailures) |
246 | 0 | { |
247 | 0 | m_oWriter.StartObj(); |
248 | 0 | m_oWriter.AddObjKey("name"); |
249 | 0 | m_oWriter.Add(pszTarget); |
250 | 0 | m_oWriter.AddObjKey("driver"); |
251 | 0 | m_oWriter.AddNull(); |
252 | 0 | m_oWriter.EndObj(); |
253 | 0 | } |
254 | 0 | } |
255 | 0 | else |
256 | 0 | { |
257 | 0 | if (hDriver) |
258 | 0 | { |
259 | 0 | Print(pszTarget); |
260 | 0 | Print(": "); |
261 | 0 | Print(pszDriverName); |
262 | 0 | if (m_detailed) |
263 | 0 | { |
264 | 0 | if (!osLayout.empty()) |
265 | 0 | { |
266 | 0 | Print(", layout="); |
267 | 0 | Print(osLayout.c_str()); |
268 | 0 | } |
269 | 0 | if (aosFileList.size() > 1) |
270 | 0 | { |
271 | 0 | Print(", has side-car files"); |
272 | 0 | } |
273 | 0 | if (bHasCRS) |
274 | 0 | { |
275 | 0 | Print(", has CRS"); |
276 | 0 | } |
277 | 0 | if (bHasGeoTransform) |
278 | 0 | { |
279 | 0 | Print(", has geotransform"); |
280 | 0 | } |
281 | 0 | if (bHasOverview) |
282 | 0 | { |
283 | 0 | Print(", has overview(s)"); |
284 | 0 | } |
285 | 0 | } |
286 | 0 | Print("\n"); |
287 | 0 | } |
288 | 0 | else if (m_reportFailures) |
289 | 0 | { |
290 | 0 | Print(pszTarget); |
291 | 0 | Print(": unrecognized\n"); |
292 | 0 | } |
293 | 0 | } |
294 | | |
295 | 0 | bool ret = true; |
296 | 0 | VSIStatBufL sStatBuf; |
297 | 0 | if ((m_forceRecursive || (m_recursive && hDriver == nullptr)) && |
298 | 0 | VSIStatL(pszTarget, &sStatBuf) == 0 && VSI_ISDIR(sStatBuf.st_mode)) |
299 | 0 | { |
300 | 0 | const CPLStringList aosSiblingList(VSIReadDir(pszTarget)); |
301 | 0 | const int nListSize = aosSiblingList.size(); |
302 | 0 | for (int i = 0; i < nListSize; ++i) |
303 | 0 | { |
304 | 0 | const char *pszSubTarget = aosSiblingList[i]; |
305 | 0 | if (!(EQUAL(pszSubTarget, "..") || EQUAL(pszSubTarget, "."))) |
306 | 0 | { |
307 | 0 | const std::string osSubTarget = |
308 | 0 | CPLFormFilenameSafe(pszTarget, pszSubTarget, nullptr); |
309 | |
|
310 | 0 | std::unique_ptr<void, decltype(&GDALDestroyScaledProgress)> |
311 | 0 | pScaledProgress(GDALCreateScaledProgress( |
312 | 0 | static_cast<double>(i) / nListSize, |
313 | 0 | static_cast<double>(i + 1) / nListSize, |
314 | 0 | pfnProgress, pProgressData), |
315 | 0 | GDALDestroyScaledProgress); |
316 | 0 | ret = ret && |
317 | 0 | Process(osSubTarget.c_str(), aosSiblingList.List(), |
318 | 0 | pScaledProgress ? GDALScaledProgress : nullptr, |
319 | 0 | pScaledProgress.get()); |
320 | 0 | } |
321 | 0 | } |
322 | 0 | } |
323 | |
|
324 | 0 | return ret && (!pfnProgress || pfnProgress(1.0, "", pProgressData)); |
325 | 0 | } |
326 | | |
327 | | /************************************************************************/ |
328 | | /* GDALDatasetIdentifyAlgorithm::RunImpl() */ |
329 | | /************************************************************************/ |
330 | | |
331 | | bool GDALDatasetIdentifyAlgorithm::RunImpl(GDALProgressFunc pfnProgress, |
332 | | void *pProgressData) |
333 | 0 | { |
334 | 0 | if (m_format.empty() && m_outputDataset.GetName().empty()) |
335 | 0 | m_format = IsCalledFromCommandLine() ? "text" : "json"; |
336 | |
|
337 | 0 | if (m_format == "text" || m_format == "json") |
338 | 0 | { |
339 | 0 | if (!m_outputDataset.GetName().empty()) |
340 | 0 | { |
341 | 0 | m_fpOut = VSIFilesystemHandler::OpenStatic( |
342 | 0 | m_outputDataset.GetName().c_str(), "wb"); |
343 | 0 | if (!m_fpOut) |
344 | 0 | { |
345 | 0 | ReportError(CE_Failure, CPLE_FileIO, "Cannot create '%s'", |
346 | 0 | m_outputDataset.GetName().c_str()); |
347 | 0 | return false; |
348 | 0 | } |
349 | 0 | } |
350 | 0 | } |
351 | 0 | else |
352 | 0 | { |
353 | 0 | if (m_outputDataset.GetName().empty() && m_format != "MEM") |
354 | 0 | { |
355 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
356 | 0 | "'output' argument must be specified for non-text or " |
357 | 0 | "non-json output"); |
358 | 0 | return false; |
359 | 0 | } |
360 | | |
361 | 0 | if (m_format.empty()) |
362 | 0 | { |
363 | 0 | const CPLStringList aosFormats(GDALGetOutputDriversForDatasetName( |
364 | 0 | m_outputDataset.GetName().c_str(), GDAL_OF_VECTOR, |
365 | 0 | /* bSingleMatch = */ true, |
366 | 0 | /* bEmitWarning = */ true)); |
367 | 0 | if (aosFormats.size() != 1) |
368 | 0 | { |
369 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
370 | 0 | "Cannot guess driver for %s", |
371 | 0 | m_outputDataset.GetName().c_str()); |
372 | 0 | return false; |
373 | 0 | } |
374 | 0 | m_format = aosFormats[0]; |
375 | 0 | } |
376 | | |
377 | 0 | auto poOutDrv = |
378 | 0 | GetGDALDriverManager()->GetDriverByName(m_format.c_str()); |
379 | 0 | if (!poOutDrv) |
380 | 0 | { |
381 | | // shouldn't happen given checks done in GDALAlgorithm unless |
382 | | // someone deregister the driver between ParseCommandLineArgs() and |
383 | | // Run() |
384 | 0 | ReportError(CE_Failure, CPLE_AppDefined, "Driver %s does not exist", |
385 | 0 | m_format.c_str()); |
386 | 0 | return false; |
387 | 0 | } |
388 | | |
389 | 0 | m_poOutDS.reset(poOutDrv->Create( |
390 | 0 | m_outputDataset.GetName().c_str(), 0, 0, 0, GDT_Unknown, |
391 | 0 | CPLStringList(m_creationOptions).List())); |
392 | 0 | if (!m_poOutDS) |
393 | 0 | return false; |
394 | | |
395 | 0 | if (m_outputLayerName.empty()) |
396 | 0 | { |
397 | 0 | if (EQUAL(poOutDrv->GetDescription(), "ESRI Shapefile")) |
398 | 0 | m_outputLayerName = |
399 | 0 | CPLGetBasenameSafe(m_outputDataset.GetName().c_str()); |
400 | 0 | else |
401 | 0 | m_outputLayerName = "output"; |
402 | 0 | } |
403 | |
|
404 | 0 | m_poLayer = m_poOutDS->CreateLayer( |
405 | 0 | m_outputLayerName.c_str(), nullptr, |
406 | 0 | CPLStringList(m_layerCreationOptions).List()); |
407 | 0 | if (!m_poLayer) |
408 | 0 | return false; |
409 | | |
410 | 0 | bool ret = true; |
411 | 0 | { |
412 | 0 | OGRFieldDefn oFieldDefn("filename", OFTString); |
413 | 0 | ret = m_poLayer->CreateField(&oFieldDefn) == OGRERR_NONE; |
414 | 0 | } |
415 | |
|
416 | 0 | { |
417 | 0 | OGRFieldDefn oFieldDefn("driver", OFTString); |
418 | 0 | ret = ret && m_poLayer->CreateField(&oFieldDefn) == OGRERR_NONE; |
419 | 0 | } |
420 | |
|
421 | 0 | if (m_detailed) |
422 | 0 | { |
423 | 0 | { |
424 | 0 | OGRFieldDefn oFieldDefn("layout", OFTString); |
425 | 0 | ret = ret && m_poLayer->CreateField(&oFieldDefn) == OGRERR_NONE; |
426 | 0 | } |
427 | 0 | { |
428 | 0 | const char *pszSupportedFieldTypes = |
429 | 0 | poOutDrv->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES); |
430 | 0 | OGRFieldDefn oFieldDefn( |
431 | 0 | "file_list", (pszSupportedFieldTypes && |
432 | 0 | strstr(pszSupportedFieldTypes, "StringList")) |
433 | 0 | ? OFTStringList |
434 | 0 | : OFTString); |
435 | 0 | ret = ret && m_poLayer->CreateField(&oFieldDefn) == OGRERR_NONE; |
436 | 0 | } |
437 | 0 | { |
438 | 0 | OGRFieldDefn oFieldDefn("has_crs", OFTInteger); |
439 | 0 | oFieldDefn.SetSubType(OFSTBoolean); |
440 | 0 | ret = ret && m_poLayer->CreateField(&oFieldDefn) == OGRERR_NONE; |
441 | 0 | } |
442 | 0 | { |
443 | 0 | OGRFieldDefn oFieldDefn("has_geotransform", OFTInteger); |
444 | 0 | oFieldDefn.SetSubType(OFSTBoolean); |
445 | 0 | ret = ret && m_poLayer->CreateField(&oFieldDefn) == OGRERR_NONE; |
446 | 0 | } |
447 | 0 | { |
448 | 0 | OGRFieldDefn oFieldDefn("has_overview", OFTInteger); |
449 | 0 | oFieldDefn.SetSubType(OFSTBoolean); |
450 | 0 | ret = ret && m_poLayer->CreateField(&oFieldDefn) == OGRERR_NONE; |
451 | 0 | } |
452 | 0 | } |
453 | |
|
454 | 0 | if (!ret) |
455 | 0 | return false; |
456 | 0 | } |
457 | | |
458 | 0 | if (m_format == "json") |
459 | 0 | m_oWriter.StartArray(); |
460 | 0 | int i = 0; |
461 | 0 | bool ret = true; |
462 | 0 | for (const std::string &osPath : m_filename) |
463 | 0 | { |
464 | 0 | std::unique_ptr<void, decltype(&GDALDestroyScaledProgress)> |
465 | 0 | pScaledProgress(GDALCreateScaledProgress( |
466 | 0 | static_cast<double>(i) / |
467 | 0 | static_cast<int>(m_filename.size()), |
468 | 0 | static_cast<double>(i + 1) / |
469 | 0 | static_cast<int>(m_filename.size()), |
470 | 0 | pfnProgress, pProgressData), |
471 | 0 | GDALDestroyScaledProgress); |
472 | 0 | ret = ret && Process(osPath.c_str(), nullptr, |
473 | 0 | pScaledProgress ? GDALScaledProgress : nullptr, |
474 | 0 | pScaledProgress.get()); |
475 | 0 | ++i; |
476 | 0 | } |
477 | 0 | if (m_format == "json") |
478 | 0 | m_oWriter.EndArray(); |
479 | |
|
480 | 0 | if (!m_output.empty()) |
481 | 0 | { |
482 | 0 | GetArg(GDAL_ARG_NAME_OUTPUT_STRING)->Set(m_output); |
483 | 0 | } |
484 | 0 | else |
485 | 0 | { |
486 | 0 | m_outputDataset.Set(std::move(m_poOutDS)); |
487 | 0 | } |
488 | |
|
489 | 0 | return ret; |
490 | 0 | } |
491 | | |
492 | | //! @endcond |