/src/gdal/ogr/ogrsf_frmts/xlsx/ogr_xlsx.h
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: XLSX Translator |
4 | | * Purpose: Definition of classes for OGR OpenOfficeSpreadsheet .xlsx driver. |
5 | | * Author: Even Rouault, even dot rouault at spatialys.com |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2012, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #ifndef OGR_XLSX_H_INCLUDED |
14 | | #define OGR_XLSX_H_INCLUDED |
15 | | |
16 | | #include "ogrsf_frmts.h" |
17 | | |
18 | | #include "ogr_expat.h" |
19 | | #include "memdataset.h" |
20 | | |
21 | | #include <vector> |
22 | | #include <set> |
23 | | #include <string> |
24 | | #include <map> |
25 | | |
26 | | namespace OGRXLSX |
27 | | { |
28 | | |
29 | | /************************************************************************/ |
30 | | /* OGRXLSXLayer */ |
31 | | /************************************************************************/ |
32 | | |
33 | | class OGRXLSXDataSource; |
34 | | |
35 | | class OGRXLSXLayer final : public OGRMemLayer |
36 | | { |
37 | | friend class OGRXLSXDataSource; |
38 | | |
39 | | bool bInit; |
40 | | OGRXLSXDataSource *poDS; |
41 | | CPLString osFilename; |
42 | | void Init(); |
43 | | bool bUpdated; |
44 | | bool bHasHeaderLine; |
45 | | std::string m_osCols{}; |
46 | | std::set<int> oSetFieldsOfUnknownType{}; |
47 | | |
48 | | GIntBig TranslateFIDFromMemLayer(GIntBig nFID) const; |
49 | | GIntBig TranslateFIDToMemLayer(GIntBig nFID) const; |
50 | | |
51 | | public: |
52 | | OGRXLSXLayer(OGRXLSXDataSource *poDSIn, const char *pszFilename, |
53 | | const char *pszName, int bUpdateIn = FALSE); |
54 | | |
55 | | bool HasBeenUpdated() const |
56 | 0 | { |
57 | 0 | return bUpdated; |
58 | 0 | } |
59 | | |
60 | | void SetUpdated(bool bUpdatedIn = true); |
61 | | |
62 | | bool GetHasHeaderLine() const |
63 | 0 | { |
64 | 0 | return bHasHeaderLine; |
65 | 0 | } |
66 | | |
67 | | void SetHasHeaderLine(bool bIn) |
68 | 8.71k | { |
69 | 8.71k | bHasHeaderLine = bIn; |
70 | 8.71k | } |
71 | | |
72 | | const char *GetName() const override |
73 | 32.0k | { |
74 | 32.0k | return OGRMemLayer::GetLayerDefn()->GetName(); |
75 | 32.0k | } |
76 | | |
77 | | OGRwkbGeometryType GetGeomType() const override |
78 | 8.49k | { |
79 | 8.49k | return wkbNone; |
80 | 8.49k | } |
81 | | |
82 | | const OGRSpatialReference *GetSpatialRef() const override |
83 | 8.49k | { |
84 | 8.49k | return nullptr; |
85 | 8.49k | } |
86 | | |
87 | | void ResetReading() override |
88 | 294 | { |
89 | 294 | Init(); |
90 | 294 | OGRMemLayer::ResetReading(); |
91 | 294 | } |
92 | | |
93 | | int TestCapability(const char *pszCap) const override; |
94 | | |
95 | | const CPLString &GetFilename() const |
96 | 10.1k | { |
97 | 10.1k | return osFilename; |
98 | 10.1k | } |
99 | | |
100 | | /* For external usage. Mess with FID */ |
101 | | OGRFeature *GetNextFeature() override; |
102 | | OGRFeature *GetFeature(GIntBig nFeatureId) override; |
103 | | OGRErr ISetFeature(OGRFeature *poFeature) override; |
104 | | OGRErr ISetFeatureUniqPtr(std::unique_ptr<OGRFeature> poFeature) override; |
105 | | OGRErr IUpdateFeature(OGRFeature *poFeature, int nUpdatedFieldsCount, |
106 | | const int *panUpdatedFieldsIdx, |
107 | | int nUpdatedGeomFieldsCount, |
108 | | const int *panUpdatedGeomFieldsIdx, |
109 | | bool bUpdateStyleString) override; |
110 | | OGRErr DeleteFeature(GIntBig nFID) override; |
111 | | |
112 | | OGRErr SetNextByIndex(GIntBig nIndex) override |
113 | 0 | { |
114 | 0 | Init(); |
115 | 0 | return OGRMemLayer::SetNextByIndex(nIndex); |
116 | 0 | } |
117 | | |
118 | | OGRErr ICreateFeature(OGRFeature *poFeature) override; |
119 | | OGRErr ICreateFeatureUniqPtr(std::unique_ptr<OGRFeature> poFeature, |
120 | | GIntBig *pnFID) override; |
121 | | |
122 | | const OGRFeatureDefn *GetLayerDefn() const override |
123 | 2.65M | { |
124 | 2.65M | const_cast<OGRXLSXLayer *>(this)->Init(); |
125 | 2.65M | return OGRMemLayer::GetLayerDefn(); |
126 | 2.65M | } |
127 | | |
128 | | GIntBig GetFeatureCount(int bForce) override |
129 | 7.65k | { |
130 | 7.65k | Init(); |
131 | 7.65k | return OGRMemLayer::GetFeatureCount(bForce); |
132 | 7.65k | } |
133 | | |
134 | | virtual OGRErr CreateField(const OGRFieldDefn *poField, |
135 | | int bApproxOK = TRUE) override; |
136 | | |
137 | | OGRErr DeleteField(int iField) override |
138 | 0 | { |
139 | 0 | Init(); |
140 | 0 | SetUpdated(); |
141 | 0 | return OGRMemLayer::DeleteField(iField); |
142 | 0 | } |
143 | | |
144 | | OGRErr ReorderFields(int *panMap) override |
145 | 0 | { |
146 | 0 | Init(); |
147 | 0 | SetUpdated(); |
148 | 0 | return OGRMemLayer::ReorderFields(panMap); |
149 | 0 | } |
150 | | |
151 | | virtual OGRErr AlterFieldDefn(int iField, OGRFieldDefn *poNewFieldDefn, |
152 | | int nFlagsIn) override |
153 | 13.1k | { |
154 | 13.1k | Init(); |
155 | 13.1k | SetUpdated(); |
156 | 13.1k | return OGRMemLayer::AlterFieldDefn(iField, poNewFieldDefn, nFlagsIn); |
157 | 13.1k | } |
158 | | |
159 | | const std::string &GetCols() const |
160 | 9 | { |
161 | 9 | return m_osCols; |
162 | 9 | } |
163 | | |
164 | | OGRErr SyncToDisk() override; |
165 | | |
166 | | GDALDataset *GetDataset() override; |
167 | | }; |
168 | | |
169 | | /************************************************************************/ |
170 | | /* OGRXLSXDataSource */ |
171 | | /************************************************************************/ |
172 | 20.4M | #define STACK_SIZE 5 |
173 | | |
174 | | typedef enum |
175 | | { |
176 | | STATE_DEFAULT, |
177 | | |
178 | | /* for sharedString.xml */ |
179 | | STATE_SI, |
180 | | STATE_T, |
181 | | |
182 | | /* for sheet?.xml */ |
183 | | STATE_COLS, |
184 | | STATE_SHEETDATA, |
185 | | STATE_ROW, |
186 | | STATE_CELL, |
187 | | STATE_TEXTV, |
188 | | } HandlerStateEnum; |
189 | | |
190 | | typedef struct |
191 | | { |
192 | | HandlerStateEnum eVal; |
193 | | int nBeginDepth; |
194 | | } HandlerState; |
195 | | |
196 | | class XLSXFieldTypeExtended |
197 | | { |
198 | | public: |
199 | | OGRFieldType eType; |
200 | | bool bHasMS; |
201 | | |
202 | 1.54k | XLSXFieldTypeExtended() : eType(OFTMaxType), bHasMS(false) |
203 | 1.54k | { |
204 | 1.54k | } |
205 | | |
206 | | explicit XLSXFieldTypeExtended(OGRFieldType eTypeIn, bool bHasMSIn = false) |
207 | 180k | : eType(eTypeIn), bHasMS(bHasMSIn) |
208 | 180k | { |
209 | 180k | } |
210 | | }; |
211 | | |
212 | | class OGRXLSXDataSource final : public GDALDataset |
213 | | { |
214 | | char *pszName; |
215 | | CPLString osPrefixedFilename; |
216 | | bool bUpdatable; |
217 | | bool bUpdated; |
218 | | |
219 | | int nLayers; |
220 | | OGRXLSXLayer **papoLayers; |
221 | | std::map<CPLString, CPLString> oMapRelsIdToTarget; |
222 | | std::set<std::string> m_oSetSheetId; |
223 | | |
224 | | void AnalyseSharedStrings(VSILFILE *fpSharedStrings); |
225 | | void AnalyseWorkbook(VSILFILE *fpWorkbook); |
226 | | void AnalyseWorkbookRels(VSILFILE *fpWorkbookRels); |
227 | | void AnalyseStyles(VSILFILE *fpStyles); |
228 | | |
229 | | std::vector<std::string> apoSharedStrings; |
230 | | std::string osCurrentString; |
231 | | |
232 | | bool bFirstLineIsHeaders; |
233 | | int bAutodetectTypes; |
234 | | |
235 | | XML_Parser oParser; |
236 | | bool bStopParsing; |
237 | | int nWithoutEventCounter; |
238 | | int nDataHandlerCounter; |
239 | | int nCurLine; |
240 | | int nCurCol; |
241 | | |
242 | | OGRXLSXLayer *poCurLayer; |
243 | | std::string m_osCols{}; |
244 | | |
245 | | int nStackDepth; |
246 | | int nDepth; |
247 | | HandlerState stateStack[STACK_SIZE]; |
248 | | |
249 | | CPLString osValueType; |
250 | | CPLString osValue; |
251 | | |
252 | | std::vector<std::string> apoFirstLineValues; |
253 | | std::vector<std::string> apoFirstLineTypes; |
254 | | std::vector<std::string> apoCurLineValues; |
255 | | std::vector<std::string> apoCurLineTypes; |
256 | | |
257 | | bool bInCellXFS; |
258 | | std::map<int, XLSXFieldTypeExtended> apoMapStyleFormats; |
259 | | std::vector<XLSXFieldTypeExtended> apoStyles; |
260 | | |
261 | | void PushState(HandlerStateEnum eVal); |
262 | | void startElementDefault(const char *pszName, const char **ppszAttr); |
263 | | void startElementTable(const char *pszName, const char **ppszAttr); |
264 | | void endElementTable(const char *pszName); |
265 | | void startElementCols(const char *pszName, const char **ppszAttr); |
266 | | void endElementCols(const char *pszName); |
267 | | void startElementRow(const char *pszName, const char **ppszAttr); |
268 | | void endElementRow(const char *pszName); |
269 | | void startElementCell(const char *pszName, const char **ppszAttr); |
270 | | void endElementCell(const char *pszName); |
271 | | void dataHandlerTextV(const char *data, int nLen); |
272 | | |
273 | | void DetectHeaderLine(); |
274 | | |
275 | | OGRFieldType GetOGRFieldType(const char *pszValue, const char *pszValueType, |
276 | | OGRFieldSubType &eSubType); |
277 | | |
278 | | void DeleteLayer(const char *pszLayerName); |
279 | | |
280 | | public: |
281 | | explicit OGRXLSXDataSource(CSLConstList papszOpenOptionsIn); |
282 | | ~OGRXLSXDataSource() override; |
283 | | CPLErr Close(GDALProgressFunc = nullptr, void * = nullptr) override; |
284 | | |
285 | | int Open(const char *pszFilename, const char *pszPrefixedFilename, |
286 | | VSILFILE *fpWorkbook, VSILFILE *fpWorkbookRels, |
287 | | VSILFILE *fpSharedStrings, VSILFILE *fpStyles, int bUpdate); |
288 | | int Create(const char *pszName, char **papszOptions); |
289 | | |
290 | | int GetLayerCount() const override; |
291 | | const OGRLayer *GetLayer(int) const override; |
292 | | |
293 | | int TestCapability(const char *) const override; |
294 | | |
295 | | OGRLayer *ICreateLayer(const char *pszName, |
296 | | const OGRGeomFieldDefn *poGeomFieldDefn, |
297 | | CSLConstList papszOptions) override; |
298 | | |
299 | | OGRErr DeleteLayer(int iLayer) override; |
300 | | |
301 | | CPLErr FlushCache(bool bAtClosing) override; |
302 | | |
303 | | void startElementCbk(const char *pszName, const char **ppszAttr); |
304 | | void endElementCbk(const char *pszName); |
305 | | void dataHandlerCbk(const char *data, int nLen); |
306 | | |
307 | | void startElementSSCbk(const char *pszName, const char **ppszAttr); |
308 | | void endElementSSCbk(const char *pszName); |
309 | | void dataHandlerSSCbk(const char *data, int nLen); |
310 | | |
311 | | void startElementWBRelsCbk(const char *pszName, const char **ppszAttr); |
312 | | |
313 | | void startElementWBCbk(const char *pszName, const char **ppszAttr); |
314 | | |
315 | | void startElementStylesCbk(const char *pszName, const char **ppszAttr); |
316 | | void endElementStylesCbk(const char *pszName); |
317 | | |
318 | | void BuildLayer(OGRXLSXLayer *poLayer); |
319 | | |
320 | | bool GetUpdatable() |
321 | 1.51M | { |
322 | 1.51M | return bUpdatable; |
323 | 1.51M | } |
324 | | |
325 | | void SetUpdated() |
326 | 0 | { |
327 | 0 | bUpdated = true; |
328 | 0 | } |
329 | | }; |
330 | | |
331 | | } // namespace OGRXLSX |
332 | | |
333 | | #endif /* ndef OGR_XLSX_H_INCLUDED */ |