/src/gdal/ogr/ogrsf_frmts/xlsx/ogrxlsxdriver.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: XLSX Translator |
4 | | * Purpose: Implements OGRXLSXDriver. |
5 | | * Author: Even Rouault, even dot rouault at spatialys.com |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2012, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #include "ogr_xlsx.h" |
14 | | #include "cpl_conv.h" |
15 | | |
16 | | extern "C" void RegisterOGRXLSX(); |
17 | | |
18 | | using namespace OGRXLSX; |
19 | | |
20 | | // g++ -DHAVE_EXPAT -g -Wall -fPIC ogr/ogrsf_frmts/xlsx/*.cpp -shared -o |
21 | | // ogr_XLSX.so -Iport -Igcore -Iogr -Iogr/ogrsf_frmts -Iogr/ogrsf_frmts/mem |
22 | | // -Iogr/ogrsf_frmts/xlsx -L. -lgdal |
23 | | |
24 | | static const char XLSX_MIMETYPE[] = |
25 | | "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"; |
26 | | |
27 | | /************************************************************************/ |
28 | | /* Identify() */ |
29 | | /************************************************************************/ |
30 | | |
31 | | static int OGRXLSXDriverIdentify(GDALOpenInfo *poOpenInfo) |
32 | 95.0k | { |
33 | 95.0k | if (poOpenInfo->fpL == nullptr && |
34 | 47.8k | STARTS_WITH_CI(poOpenInfo->pszFilename, "XLSX:")) |
35 | 406 | { |
36 | 406 | return TRUE; |
37 | 406 | } |
38 | | |
39 | 94.6k | if (STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") || |
40 | 94.1k | STARTS_WITH(poOpenInfo->pszFilename, "/vsitar/")) |
41 | 29.8k | { |
42 | 29.8k | const char *pszExt = poOpenInfo->osExtension.c_str(); |
43 | 29.8k | return EQUAL(pszExt, "XLSX") || EQUAL(pszExt, "XLSM") || |
44 | 29.8k | EQUAL(pszExt, "XLSX}") || EQUAL(pszExt, "XLSM}"); |
45 | 29.8k | } |
46 | | |
47 | 64.8k | if (poOpenInfo->nHeaderBytes > 30 && |
48 | 27.8k | memcmp(poOpenInfo->pabyHeader, "PK\x03\x04", 4) == 0) |
49 | 853 | { |
50 | | // Fetch the first filename in the zip |
51 | 853 | const int nFilenameLength = |
52 | 853 | CPL_LSBUINT16PTR(poOpenInfo->pabyHeader + 26); |
53 | 853 | if (30 + nFilenameLength > poOpenInfo->nHeaderBytes) |
54 | 55 | return FALSE; |
55 | 798 | const std::string osFilename( |
56 | 798 | reinterpret_cast<const char *>(poOpenInfo->pabyHeader) + 30, |
57 | 798 | nFilenameLength); |
58 | 798 | if (STARTS_WITH(osFilename.c_str(), "xl/") || |
59 | 710 | STARTS_WITH(osFilename.c_str(), "_rels/") || |
60 | 664 | STARTS_WITH(osFilename.c_str(), "docProps/") || |
61 | 654 | osFilename == "[Content_Types].xml") |
62 | 764 | { |
63 | 764 | return TRUE; |
64 | 764 | } |
65 | 34 | const char *pszExt = poOpenInfo->osExtension.c_str(); |
66 | 34 | if (EQUAL(pszExt, "XLSX") || EQUAL(pszExt, "XLSM")) |
67 | 0 | { |
68 | 0 | CPLDebug( |
69 | 0 | "XLSX", |
70 | 0 | "Identify() failed to recognize first filename in zip (%s), " |
71 | 0 | "but fallback to extension matching", |
72 | 0 | osFilename.c_str()); |
73 | 0 | return TRUE; |
74 | 0 | } |
75 | 34 | } |
76 | 63.9k | return FALSE; |
77 | 64.8k | } |
78 | | |
79 | | /************************************************************************/ |
80 | | /* Open() */ |
81 | | /************************************************************************/ |
82 | | |
83 | | static GDALDataset *OGRXLSXDriverOpen(GDALOpenInfo *poOpenInfo) |
84 | | |
85 | 6.05k | { |
86 | 6.05k | if (!OGRXLSXDriverIdentify(poOpenInfo)) |
87 | 0 | return nullptr; |
88 | | |
89 | 6.05k | const char *pszFilename = poOpenInfo->pszFilename; |
90 | 6.05k | if (poOpenInfo->fpL == nullptr && STARTS_WITH_CI(pszFilename, "XLSX:")) |
91 | 203 | { |
92 | 203 | pszFilename += strlen("XLSX:"); |
93 | 203 | } |
94 | 6.05k | const bool bIsVsiZipOrTarPrefixed = STARTS_WITH(pszFilename, "/vsizip/") || |
95 | 6.05k | STARTS_WITH(pszFilename, "/vsitar/"); |
96 | 6.05k | if (bIsVsiZipOrTarPrefixed) |
97 | 5.46k | { |
98 | 5.46k | if (poOpenInfo->eAccess != GA_ReadOnly) |
99 | 0 | return nullptr; |
100 | 5.46k | } |
101 | | |
102 | 6.05k | std::string osPrefixedFilename; |
103 | 6.05k | if (!bIsVsiZipOrTarPrefixed) |
104 | 585 | { |
105 | 585 | osPrefixedFilename = "/vsizip/{"; |
106 | 585 | osPrefixedFilename += pszFilename; |
107 | 585 | osPrefixedFilename += "}"; |
108 | 585 | } |
109 | 5.46k | else |
110 | 5.46k | { |
111 | 5.46k | osPrefixedFilename = pszFilename; |
112 | 5.46k | } |
113 | | |
114 | 6.05k | CPLString osTmpFilename; |
115 | 6.05k | osTmpFilename = |
116 | 6.05k | CPLSPrintf("%s/[Content_Types].xml", osPrefixedFilename.c_str()); |
117 | 6.05k | VSILFILE *fpContent = VSIFOpenL(osTmpFilename, "rb"); |
118 | 6.05k | if (fpContent == nullptr) |
119 | 1.36k | return nullptr; |
120 | | |
121 | 4.68k | char szBuffer[2048]; |
122 | 4.68k | int nRead = (int)VSIFReadL(szBuffer, 1, sizeof(szBuffer) - 1, fpContent); |
123 | 4.68k | szBuffer[nRead] = 0; |
124 | | |
125 | 4.68k | VSIFCloseL(fpContent); |
126 | | |
127 | 4.68k | if (strstr(szBuffer, XLSX_MIMETYPE) == nullptr) |
128 | 49 | return nullptr; |
129 | | |
130 | 4.63k | osTmpFilename = |
131 | 4.63k | CPLSPrintf("%s/xl/workbook.xml", osPrefixedFilename.c_str()); |
132 | 4.63k | VSILFILE *fpWorkbook = VSIFOpenL(osTmpFilename, "rb"); |
133 | 4.63k | if (fpWorkbook == nullptr) |
134 | 59 | return nullptr; |
135 | | |
136 | 4.57k | osTmpFilename = |
137 | 4.57k | CPLSPrintf("%s/xl/_rels/workbook.xml.rels", osPrefixedFilename.c_str()); |
138 | 4.57k | VSILFILE *fpWorkbookRels = VSIFOpenL(osTmpFilename, "rb"); |
139 | 4.57k | if (fpWorkbookRels == nullptr) |
140 | 27 | { |
141 | 27 | VSIFCloseL(fpWorkbook); |
142 | 27 | return nullptr; |
143 | 27 | } |
144 | | |
145 | 4.55k | osTmpFilename = |
146 | 4.55k | CPLSPrintf("%s/xl/sharedStrings.xml", osPrefixedFilename.c_str()); |
147 | 4.55k | VSILFILE *fpSharedStrings = VSIFOpenL(osTmpFilename, "rb"); |
148 | 4.55k | osTmpFilename = CPLSPrintf("%s/xl/styles.xml", osPrefixedFilename.c_str()); |
149 | 4.55k | VSILFILE *fpStyles = VSIFOpenL(osTmpFilename, "rb"); |
150 | | |
151 | 4.55k | OGRXLSXDataSource *poDS = |
152 | 4.55k | new OGRXLSXDataSource(poOpenInfo->papszOpenOptions); |
153 | | |
154 | 4.55k | if (!poDS->Open(pszFilename, osPrefixedFilename.c_str(), fpWorkbook, |
155 | 4.55k | fpWorkbookRels, fpSharedStrings, fpStyles, |
156 | 4.55k | poOpenInfo->eAccess == GA_Update)) |
157 | 0 | { |
158 | 0 | delete poDS; |
159 | 0 | poDS = nullptr; |
160 | 0 | } |
161 | 4.55k | else |
162 | 4.55k | { |
163 | 4.55k | poDS->SetDescription(poOpenInfo->pszFilename); |
164 | 4.55k | } |
165 | | |
166 | 4.55k | return poDS; |
167 | 4.57k | } |
168 | | |
169 | | /************************************************************************/ |
170 | | /* OGRXLSXDriverCreate() */ |
171 | | /************************************************************************/ |
172 | | |
173 | | static GDALDataset *OGRXLSXDriverCreate(const char *pszName, int /* nXSize */, |
174 | | int /* nYSize */, int /* nBands */, |
175 | | GDALDataType /* eDT */, |
176 | | char **papszOptions) |
177 | | |
178 | 174 | { |
179 | 174 | if (!EQUAL(CPLGetExtensionSafe(pszName).c_str(), "XLSX")) |
180 | 0 | { |
181 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "File extension should be XLSX"); |
182 | 0 | return nullptr; |
183 | 0 | } |
184 | | |
185 | | /* -------------------------------------------------------------------- */ |
186 | | /* First, ensure there isn't any such file yet. */ |
187 | | /* -------------------------------------------------------------------- */ |
188 | 174 | VSIStatBufL sStatBuf; |
189 | | |
190 | 174 | if (VSIStatL(pszName, &sStatBuf) == 0) |
191 | 0 | { |
192 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
193 | 0 | "It seems a file system object called '%s' already exists.", |
194 | 0 | pszName); |
195 | |
|
196 | 0 | return nullptr; |
197 | 0 | } |
198 | | |
199 | | /* -------------------------------------------------------------------- */ |
200 | | /* Try to create datasource. */ |
201 | | /* -------------------------------------------------------------------- */ |
202 | 174 | OGRXLSXDataSource *poDS = new OGRXLSXDataSource(nullptr); |
203 | | |
204 | 174 | if (!poDS->Create(pszName, papszOptions)) |
205 | 0 | { |
206 | 0 | delete poDS; |
207 | 0 | return nullptr; |
208 | 0 | } |
209 | 174 | else |
210 | 174 | return poDS; |
211 | 174 | } |
212 | | |
213 | | /************************************************************************/ |
214 | | /* RegisterOGRXLSX() */ |
215 | | /************************************************************************/ |
216 | | |
217 | | void RegisterOGRXLSX() |
218 | | |
219 | 24 | { |
220 | 24 | if (GDALGetDriverByName("XLSX") != nullptr) |
221 | 0 | return; |
222 | | |
223 | 24 | GDALDriver *poDriver = new GDALDriver(); |
224 | | |
225 | 24 | poDriver->SetDescription("XLSX"); |
226 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES"); |
227 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES"); |
228 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_DELETE_LAYER, "YES"); |
229 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES"); |
230 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_DELETE_FIELD, "YES"); |
231 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_REORDER_FIELDS, "YES"); |
232 | 24 | poDriver->SetMetadataItem(GDAL_DMD_ALTER_FIELD_DEFN_FLAGS, "Name Type"); |
233 | | |
234 | 24 | poDriver->SetMetadataItem(GDAL_DMD_LONGNAME, |
235 | 24 | "MS Office Open XML spreadsheet"); |
236 | 24 | poDriver->SetMetadataItem(GDAL_DMD_EXTENSIONS, "xlsx xlsm"); |
237 | 24 | poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/xlsx.html"); |
238 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES"); |
239 | 24 | poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES, |
240 | 24 | "Integer Integer64 Real String Date DateTime " |
241 | 24 | "Time"); |
242 | 24 | poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES, "Boolean"); |
243 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_NONSPATIAL, "YES"); |
244 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_MULTIPLE_VECTOR_LAYERS, "YES"); |
245 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES"); |
246 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_CURVE_GEOMETRIES, "YES"); |
247 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES"); |
248 | 24 | poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE"); |
249 | | |
250 | 24 | poDriver->SetMetadataItem(GDAL_DCAP_UPDATE, "YES"); |
251 | 24 | poDriver->SetMetadataItem(GDAL_DMD_UPDATE_ITEMS, "Features"); |
252 | | |
253 | 24 | poDriver->SetMetadataItem( |
254 | 24 | GDAL_DMD_OPENOPTIONLIST, |
255 | 24 | "<OpenOptionList>" |
256 | 24 | " <Option name='FIELD_TYPES' type='string-select' " |
257 | 24 | "description='If set to STRING, all fields will be of type String. " |
258 | 24 | "Otherwise the driver autodetects the field type from field content.' " |
259 | 24 | "default='AUTO'>" |
260 | 24 | " <Value>AUTO</Value>" |
261 | 24 | " <Value>STRING</Value>" |
262 | 24 | " </Option>" |
263 | 24 | " <Option name='HEADERS' type='string-select' " |
264 | 24 | "description='Defines if the first line should be considered as " |
265 | 24 | "containing the name of the fields.' " |
266 | 24 | "default='AUTO'>" |
267 | 24 | " <Value>AUTO</Value>" |
268 | 24 | " <Value>FORCE</Value>" |
269 | 24 | " <Value>DISABLE</Value>" |
270 | 24 | " </Option>" |
271 | 24 | "</OpenOptionList>"); |
272 | | |
273 | 24 | poDriver->pfnIdentify = OGRXLSXDriverIdentify; |
274 | 24 | poDriver->pfnOpen = OGRXLSXDriverOpen; |
275 | 24 | poDriver->pfnCreate = OGRXLSXDriverCreate; |
276 | | |
277 | 24 | GetGDALDriverManager()->RegisterDriver(poDriver); |
278 | 24 | } |