/src/gdal/ogr/ogrsf_frmts/xlsx/ogrxlsxdriver.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: XLSX Translator |
4 | | * Purpose: Implements OGRXLSXDriver. |
5 | | * Author: Even Rouault, even dot rouault at spatialys.com |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2012, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #include "ogr_xlsx.h" |
14 | | #include "cpl_conv.h" |
15 | | |
16 | | extern "C" void RegisterOGRXLSX(); |
17 | | |
18 | | using namespace OGRXLSX; |
19 | | |
20 | | // g++ -DHAVE_EXPAT -g -Wall -fPIC ogr/ogrsf_frmts/xlsx/*.cpp -shared -o |
21 | | // ogr_XLSX.so -Iport -Igcore -Iogr -Iogr/ogrsf_frmts -Iogr/ogrsf_frmts/mem |
22 | | // -Iogr/ogrsf_frmts/xlsx -L. -lgdal |
23 | | |
24 | | static const char XLSX_MIMETYPE[] = |
25 | | "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"; |
26 | | |
27 | | /************************************************************************/ |
28 | | /* Identify() */ |
29 | | /************************************************************************/ |
30 | | |
31 | | static int OGRXLSXDriverIdentify(GDALOpenInfo *poOpenInfo) |
32 | 113k | { |
33 | 113k | if (poOpenInfo->fpL == nullptr && |
34 | 113k | STARTS_WITH_CI(poOpenInfo->pszFilename, "XLSX:")) |
35 | 404 | { |
36 | 404 | return TRUE; |
37 | 404 | } |
38 | | |
39 | 113k | if (STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") || |
40 | 113k | STARTS_WITH(poOpenInfo->pszFilename, "/vsitar/")) |
41 | 41.0k | { |
42 | 41.0k | const char *pszExt = poOpenInfo->osExtension.c_str(); |
43 | 41.0k | return EQUAL(pszExt, "XLSX") || EQUAL(pszExt, "XLSM") || |
44 | 41.0k | EQUAL(pszExt, "XLSX}") || EQUAL(pszExt, "XLSM}"); |
45 | 41.0k | } |
46 | | |
47 | 72.4k | if (poOpenInfo->nHeaderBytes > 30 && |
48 | 72.4k | memcmp(poOpenInfo->pabyHeader, "PK\x03\x04", 4) == 0) |
49 | 246 | { |
50 | | // Fetch the first filename in the zip |
51 | 246 | const int nFilenameLength = |
52 | 246 | CPL_LSBUINT16PTR(poOpenInfo->pabyHeader + 26); |
53 | 246 | if (30 + nFilenameLength > poOpenInfo->nHeaderBytes) |
54 | 65 | return FALSE; |
55 | 181 | const std::string osFilename( |
56 | 181 | reinterpret_cast<const char *>(poOpenInfo->pabyHeader) + 30, |
57 | 181 | nFilenameLength); |
58 | 181 | if (STARTS_WITH(osFilename.c_str(), "xl/") || |
59 | 181 | STARTS_WITH(osFilename.c_str(), "_rels/") || |
60 | 181 | STARTS_WITH(osFilename.c_str(), "docProps/") || |
61 | 181 | osFilename == "[Content_Types].xml") |
62 | 162 | { |
63 | 162 | return TRUE; |
64 | 162 | } |
65 | 19 | const char *pszExt = poOpenInfo->osExtension.c_str(); |
66 | 19 | if (EQUAL(pszExt, "XLSX") || EQUAL(pszExt, "XLSM")) |
67 | 0 | { |
68 | 0 | CPLDebug( |
69 | 0 | "XLSX", |
70 | 0 | "Identify() failed to recognize first filename in zip (%s), " |
71 | 0 | "but fallback to extension matching", |
72 | 0 | osFilename.c_str()); |
73 | 0 | return TRUE; |
74 | 0 | } |
75 | 19 | } |
76 | 72.1k | return FALSE; |
77 | 72.4k | } |
78 | | |
79 | | /************************************************************************/ |
80 | | /* Open() */ |
81 | | /************************************************************************/ |
82 | | |
83 | | static GDALDataset *OGRXLSXDriverOpen(GDALOpenInfo *poOpenInfo) |
84 | | |
85 | 6.03k | { |
86 | 6.03k | if (!OGRXLSXDriverIdentify(poOpenInfo)) |
87 | 0 | return nullptr; |
88 | | |
89 | 6.03k | const char *pszFilename = poOpenInfo->pszFilename; |
90 | 6.03k | if (poOpenInfo->fpL == nullptr && STARTS_WITH_CI(pszFilename, "XLSX:")) |
91 | 202 | { |
92 | 202 | pszFilename += strlen("XLSX:"); |
93 | 202 | } |
94 | 6.03k | const bool bIsVsiZipOrTarPrefixed = STARTS_WITH(pszFilename, "/vsizip/") || |
95 | 6.03k | STARTS_WITH(pszFilename, "/vsitar/"); |
96 | 6.03k | if (bIsVsiZipOrTarPrefixed) |
97 | 5.74k | { |
98 | 5.74k | if (poOpenInfo->eAccess != GA_ReadOnly) |
99 | 0 | return nullptr; |
100 | 5.74k | } |
101 | | |
102 | 6.03k | std::string osPrefixedFilename; |
103 | 6.03k | if (!bIsVsiZipOrTarPrefixed) |
104 | 283 | { |
105 | 283 | osPrefixedFilename = "/vsizip/{"; |
106 | 283 | osPrefixedFilename += pszFilename; |
107 | 283 | osPrefixedFilename += "}"; |
108 | 283 | } |
109 | 5.74k | else |
110 | 5.74k | { |
111 | 5.74k | osPrefixedFilename = pszFilename; |
112 | 5.74k | } |
113 | | |
114 | 6.03k | CPLString osTmpFilename; |
115 | 6.03k | osTmpFilename = |
116 | 6.03k | CPLSPrintf("%s/[Content_Types].xml", osPrefixedFilename.c_str()); |
117 | 6.03k | VSILFILE *fpContent = VSIFOpenL(osTmpFilename, "rb"); |
118 | 6.03k | if (fpContent == nullptr) |
119 | 1.40k | return nullptr; |
120 | | |
121 | 4.62k | char szBuffer[2048]; |
122 | 4.62k | int nRead = (int)VSIFReadL(szBuffer, 1, sizeof(szBuffer) - 1, fpContent); |
123 | 4.62k | szBuffer[nRead] = 0; |
124 | | |
125 | 4.62k | VSIFCloseL(fpContent); |
126 | | |
127 | 4.62k | if (strstr(szBuffer, XLSX_MIMETYPE) == nullptr) |
128 | 179 | return nullptr; |
129 | | |
130 | 4.44k | osTmpFilename = |
131 | 4.44k | CPLSPrintf("%s/xl/workbook.xml", osPrefixedFilename.c_str()); |
132 | 4.44k | VSILFILE *fpWorkbook = VSIFOpenL(osTmpFilename, "rb"); |
133 | 4.44k | if (fpWorkbook == nullptr) |
134 | 30 | return nullptr; |
135 | | |
136 | 4.41k | osTmpFilename = |
137 | 4.41k | CPLSPrintf("%s/xl/_rels/workbook.xml.rels", osPrefixedFilename.c_str()); |
138 | 4.41k | VSILFILE *fpWorkbookRels = VSIFOpenL(osTmpFilename, "rb"); |
139 | 4.41k | if (fpWorkbookRels == nullptr) |
140 | 22 | { |
141 | 22 | VSIFCloseL(fpWorkbook); |
142 | 22 | return nullptr; |
143 | 22 | } |
144 | | |
145 | 4.39k | osTmpFilename = |
146 | 4.39k | CPLSPrintf("%s/xl/sharedStrings.xml", osPrefixedFilename.c_str()); |
147 | 4.39k | VSILFILE *fpSharedStrings = VSIFOpenL(osTmpFilename, "rb"); |
148 | 4.39k | osTmpFilename = CPLSPrintf("%s/xl/styles.xml", osPrefixedFilename.c_str()); |
149 | 4.39k | VSILFILE *fpStyles = VSIFOpenL(osTmpFilename, "rb"); |
150 | | |
151 | 4.39k | OGRXLSXDataSource *poDS = |
152 | 4.39k | new OGRXLSXDataSource(poOpenInfo->papszOpenOptions); |
153 | | |
154 | 4.39k | if (!poDS->Open(pszFilename, osPrefixedFilename.c_str(), fpWorkbook, |
155 | 4.39k | fpWorkbookRels, fpSharedStrings, fpStyles, |
156 | 4.39k | poOpenInfo->eAccess == GA_Update)) |
157 | 0 | { |
158 | 0 | delete poDS; |
159 | 0 | poDS = nullptr; |
160 | 0 | } |
161 | 4.39k | else |
162 | 4.39k | { |
163 | 4.39k | poDS->SetDescription(poOpenInfo->pszFilename); |
164 | 4.39k | } |
165 | | |
166 | 4.39k | return poDS; |
167 | 4.41k | } |
168 | | |
169 | | /************************************************************************/ |
170 | | /* OGRXLSXDriverCreate() */ |
171 | | /************************************************************************/ |
172 | | |
173 | | static GDALDataset *OGRXLSXDriverCreate(const char *pszName, int /* nXSize */, |
174 | | int /* nYSize */, int /* nBands */, |
175 | | GDALDataType /* eDT */, |
176 | | char **papszOptions) |
177 | | |
178 | 25 | { |
179 | 25 | if (!EQUAL(CPLGetExtensionSafe(pszName).c_str(), "XLSX")) |
180 | 0 | { |
181 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "File extension should be XLSX"); |
182 | 0 | return nullptr; |
183 | 0 | } |
184 | | |
185 | | /* -------------------------------------------------------------------- */ |
186 | | /* First, ensure there isn't any such file yet. */ |
187 | | /* -------------------------------------------------------------------- */ |
188 | 25 | VSIStatBufL sStatBuf; |
189 | | |
190 | 25 | if (VSIStatL(pszName, &sStatBuf) == 0) |
191 | 0 | { |
192 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
193 | 0 | "It seems a file system object called '%s' already exists.", |
194 | 0 | pszName); |
195 | |
|
196 | 0 | return nullptr; |
197 | 0 | } |
198 | | |
199 | | /* -------------------------------------------------------------------- */ |
200 | | /* Try to create datasource. */ |
201 | | /* -------------------------------------------------------------------- */ |
202 | 25 | OGRXLSXDataSource *poDS = new OGRXLSXDataSource(nullptr); |
203 | | |
204 | 25 | if (!poDS->Create(pszName, papszOptions)) |
205 | 0 | { |
206 | 0 | delete poDS; |
207 | 0 | return nullptr; |
208 | 0 | } |
209 | 25 | else |
210 | 25 | return poDS; |
211 | 25 | } |
212 | | |
213 | | /************************************************************************/ |
214 | | /* RegisterOGRXLSX() */ |
215 | | /************************************************************************/ |
216 | | |
217 | | void RegisterOGRXLSX() |
218 | | |
219 | 26 | { |
220 | 26 | if (GDALGetDriverByName("XLSX") != nullptr) |
221 | 0 | return; |
222 | | |
223 | 26 | GDALDriver *poDriver = new GDALDriver(); |
224 | | |
225 | 26 | poDriver->SetDescription("XLSX"); |
226 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES"); |
227 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES"); |
228 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_DELETE_LAYER, "YES"); |
229 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES"); |
230 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_DELETE_FIELD, "YES"); |
231 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_REORDER_FIELDS, "YES"); |
232 | 26 | poDriver->SetMetadataItem(GDAL_DMD_ALTER_FIELD_DEFN_FLAGS, "Name Type"); |
233 | | |
234 | 26 | poDriver->SetMetadataItem(GDAL_DMD_LONGNAME, |
235 | 26 | "MS Office Open XML spreadsheet"); |
236 | 26 | poDriver->SetMetadataItem(GDAL_DMD_EXTENSIONS, "xlsx xlsm"); |
237 | 26 | poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/xlsx.html"); |
238 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES"); |
239 | 26 | poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES, |
240 | 26 | "Integer Integer64 Real String Date DateTime " |
241 | 26 | "Time"); |
242 | 26 | poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES, "Boolean"); |
243 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_NONSPATIAL, "YES"); |
244 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_MULTIPLE_VECTOR_LAYERS, "YES"); |
245 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES"); |
246 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_CURVE_GEOMETRIES, "YES"); |
247 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES"); |
248 | 26 | poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE"); |
249 | | |
250 | 26 | poDriver->SetMetadataItem(GDAL_DCAP_UPDATE, "YES"); |
251 | 26 | poDriver->SetMetadataItem(GDAL_DMD_UPDATE_ITEMS, "Features"); |
252 | | |
253 | 26 | poDriver->SetMetadataItem( |
254 | 26 | GDAL_DMD_OPENOPTIONLIST, |
255 | 26 | "<OpenOptionList>" |
256 | 26 | " <Option name='FIELD_TYPES' type='string-select' " |
257 | 26 | "description='If set to STRING, all fields will be of type String. " |
258 | 26 | "Otherwise the driver autodetects the field type from field content.' " |
259 | 26 | "default='AUTO'>" |
260 | 26 | " <Value>AUTO</Value>" |
261 | 26 | " <Value>STRING</Value>" |
262 | 26 | " </Option>" |
263 | 26 | " <Option name='HEADERS' type='string-select' " |
264 | 26 | "description='Defines if the first line should be considered as " |
265 | 26 | "containing the name of the fields.' " |
266 | 26 | "default='AUTO'>" |
267 | 26 | " <Value>AUTO</Value>" |
268 | 26 | " <Value>FORCE</Value>" |
269 | 26 | " <Value>DISABLE</Value>" |
270 | 26 | " </Option>" |
271 | 26 | "</OpenOptionList>"); |
272 | | |
273 | 26 | poDriver->pfnIdentify = OGRXLSXDriverIdentify; |
274 | 26 | poDriver->pfnOpen = OGRXLSXDriverOpen; |
275 | 26 | poDriver->pfnCreate = OGRXLSXDriverCreate; |
276 | | |
277 | 26 | GetGDALDriverManager()->RegisterDriver(poDriver); |
278 | 26 | } |