/src/gdal/ogr/ogrsf_frmts/dxf/ogrdxfreader.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: DXF Translator |
4 | | * Purpose: Implements low level DXF reading with caching and parsing of |
5 | | * of the code/value pairs. |
6 | | * Author: Frank Warmerdam, warmerdam@pobox.com |
7 | | * |
8 | | ****************************************************************************** |
9 | | * Copyright (c) 2009, Frank Warmerdam <warmerdam@pobox.com> |
10 | | * |
11 | | * SPDX-License-Identifier: MIT |
12 | | ****************************************************************************/ |
13 | | |
14 | | #include "ogr_dxf.h" |
15 | | #include "cpl_conv.h" |
16 | | #include "cpl_string.h" |
17 | | #include "cpl_csv.h" |
18 | | |
19 | | #include <cinttypes> |
20 | | |
21 | | /************************************************************************/ |
22 | | /* ~OGRDXFReaderBase() */ |
23 | | /************************************************************************/ |
24 | | |
25 | 28.7k | OGRDXFReaderBase::~OGRDXFReaderBase() = default; |
26 | | |
27 | | /************************************************************************/ |
28 | | /* Initialize() */ |
29 | | /************************************************************************/ |
30 | | |
31 | | void OGRDXFReaderBase::Initialize(VSILFILE *fpIn) |
32 | | |
33 | 28.6k | { |
34 | 28.6k | fp = fpIn; |
35 | 28.6k | } |
36 | | |
37 | | /************************************************************************/ |
38 | | /* ResetReadPointer() */ |
39 | | /************************************************************************/ |
40 | | |
41 | | void OGRDXFReaderASCII::ResetReadPointer(uint64_t iNewOffset, |
42 | | int nNewLineNumber /* = 0 */) |
43 | | |
44 | 21.0k | { |
45 | 21.0k | nSrcBufferBytes = 0; |
46 | 21.0k | iSrcBufferOffset = 0; |
47 | 21.0k | iSrcBufferFileOffset = iNewOffset; |
48 | 21.0k | nLastValueSize = 0; |
49 | 21.0k | nLineNumber = nNewLineNumber; |
50 | | |
51 | 21.0k | VSIFSeekL(fp, iNewOffset, SEEK_SET); |
52 | 21.0k | } |
53 | | |
54 | | /************************************************************************/ |
55 | | /* LoadDiskChunk() */ |
56 | | /* */ |
57 | | /* Load another block (512 bytes) of input from the source */ |
58 | | /* file. */ |
59 | | /************************************************************************/ |
60 | | |
61 | | void OGRDXFReaderASCII::LoadDiskChunk() |
62 | | |
63 | 2.16M | { |
64 | 2.16M | if (nSrcBufferBytes - iSrcBufferOffset > 511) |
65 | 718 | return; |
66 | | |
67 | 2.16M | if (iSrcBufferOffset > 0) |
68 | 2.04M | { |
69 | 2.04M | CPLAssert(nSrcBufferBytes <= 1024); |
70 | 2.04M | CPLAssert(iSrcBufferOffset <= nSrcBufferBytes); |
71 | | |
72 | 2.04M | memmove(achSrcBuffer.data(), achSrcBuffer.data() + iSrcBufferOffset, |
73 | 2.04M | nSrcBufferBytes - iSrcBufferOffset); |
74 | 2.04M | iSrcBufferFileOffset += iSrcBufferOffset; |
75 | 2.04M | nSrcBufferBytes -= iSrcBufferOffset; |
76 | 2.04M | iSrcBufferOffset = 0; |
77 | 2.04M | } |
78 | | |
79 | 2.16M | nSrcBufferBytes += static_cast<int>( |
80 | 2.16M | VSIFReadL(achSrcBuffer.data() + nSrcBufferBytes, 1, 512, fp)); |
81 | 2.16M | achSrcBuffer[nSrcBufferBytes] = '\0'; |
82 | | |
83 | 2.16M | CPLAssert(nSrcBufferBytes <= 1024); |
84 | 2.16M | CPLAssert(iSrcBufferOffset <= nSrcBufferBytes); |
85 | 2.16M | } |
86 | | |
87 | | /************************************************************************/ |
88 | | /* ReadValue() */ |
89 | | /* */ |
90 | | /* Read one type code and value line pair from the DXF file. */ |
91 | | /************************************************************************/ |
92 | | |
93 | | int OGRDXFReaderASCII::ReadValueRaw(char *pszValueBuf, int nValueBufSize) |
94 | | |
95 | 46.9M | { |
96 | | /* -------------------------------------------------------------------- */ |
97 | | /* Make sure we have lots of data in our buffer for one value. */ |
98 | | /* -------------------------------------------------------------------- */ |
99 | 46.9M | if (nSrcBufferBytes - iSrcBufferOffset < 512) |
100 | 2.13M | LoadDiskChunk(); |
101 | | |
102 | | /* -------------------------------------------------------------------- */ |
103 | | /* Capture the value code, and skip past it. */ |
104 | | /* -------------------------------------------------------------------- */ |
105 | 46.9M | unsigned int iStartSrcBufferOffset = iSrcBufferOffset; |
106 | 46.9M | int nValueCode = atoi(achSrcBuffer.data() + iSrcBufferOffset); |
107 | | |
108 | 46.9M | nLineNumber++; |
109 | | |
110 | | // proceed to newline. |
111 | 293M | while (achSrcBuffer[iSrcBufferOffset] != '\n' && |
112 | 266M | achSrcBuffer[iSrcBufferOffset] != '\r' && |
113 | 246M | achSrcBuffer[iSrcBufferOffset] != '\0') |
114 | 246M | iSrcBufferOffset++; |
115 | | |
116 | 46.9M | if (achSrcBuffer[iSrcBufferOffset] == '\0') |
117 | 24.8k | return -1; |
118 | | |
119 | | // skip past newline. CR, CRLF, or LFCR |
120 | 46.9M | if ((achSrcBuffer[iSrcBufferOffset] == '\r' && |
121 | 19.9M | achSrcBuffer[iSrcBufferOffset + 1] == '\n') || |
122 | 40.6M | (achSrcBuffer[iSrcBufferOffset] == '\n' && |
123 | 26.9M | achSrcBuffer[iSrcBufferOffset + 1] == '\r')) |
124 | 9.15M | iSrcBufferOffset += 2; |
125 | 37.7M | else |
126 | 37.7M | iSrcBufferOffset += 1; |
127 | | |
128 | 46.9M | if (achSrcBuffer[iSrcBufferOffset] == '\0') |
129 | 2.70k | return -1; |
130 | | |
131 | | /* -------------------------------------------------------------------- */ |
132 | | /* Capture the value string. */ |
133 | | /* -------------------------------------------------------------------- */ |
134 | 46.9M | unsigned int iEOL = iSrcBufferOffset; |
135 | 46.9M | CPLString osValue; |
136 | | |
137 | 46.9M | nLineNumber++; |
138 | | |
139 | | // proceed to newline. |
140 | 338M | while (achSrcBuffer[iEOL] != '\n' && achSrcBuffer[iEOL] != '\r' && |
141 | 291M | achSrcBuffer[iEOL] != '\0') |
142 | 291M | iEOL++; |
143 | | |
144 | 46.9M | bool bLongLine = false; |
145 | 46.9M | while (achSrcBuffer[iEOL] == '\0' || |
146 | 46.9M | (achSrcBuffer[iEOL] == '\r' && achSrcBuffer[iEOL + 1] == '\0')) |
147 | 23.6k | { |
148 | | // The line is longer than the buffer (or the line ending is split at |
149 | | // end of buffer). Let's copy what we have so far into our string, and |
150 | | // read more |
151 | 23.6k | const auto nValueLength = osValue.length(); |
152 | | |
153 | 23.6k | if (nValueLength + iEOL - iSrcBufferOffset > 1048576) |
154 | 0 | { |
155 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "Line %d is too long", |
156 | 0 | nLineNumber); |
157 | 0 | return -1; |
158 | 0 | } |
159 | | |
160 | 23.6k | osValue.resize(nValueLength + iEOL - iSrcBufferOffset, '\0'); |
161 | 23.6k | std::copy(achSrcBuffer.data() + iSrcBufferOffset, |
162 | 23.6k | achSrcBuffer.data() + iEOL, osValue.begin() + nValueLength); |
163 | | |
164 | 23.6k | iSrcBufferOffset = iEOL; |
165 | 23.6k | LoadDiskChunk(); |
166 | 23.6k | iEOL = iSrcBufferOffset; |
167 | 23.6k | bLongLine = true; |
168 | | |
169 | | // Have we prematurely reached the end of the file? |
170 | 23.6k | if (achSrcBuffer[iEOL] == '\0') |
171 | 9.23k | return -1; |
172 | | |
173 | | // Proceed to newline again |
174 | 4.32M | while (achSrcBuffer[iEOL] != '\n' && achSrcBuffer[iEOL] != '\r' && |
175 | 4.31M | achSrcBuffer[iEOL] != '\0') |
176 | 4.31M | iEOL++; |
177 | 14.4k | } |
178 | | |
179 | 46.9M | size_t nValueBufLen = 0; |
180 | | |
181 | | // If this was an extremely long line, copy from osValue into the buffer |
182 | 46.9M | if (!osValue.empty()) |
183 | 9.53k | { |
184 | 9.53k | strncpy(pszValueBuf, osValue.c_str(), nValueBufSize - 1); |
185 | 9.53k | pszValueBuf[nValueBufSize - 1] = '\0'; |
186 | | |
187 | 9.53k | nValueBufLen = strlen(pszValueBuf); |
188 | | |
189 | 9.53k | if (static_cast<int>(osValue.length()) > nValueBufSize - 1) |
190 | 5.10k | { |
191 | 5.10k | CPLDebug("DXF", "Long line truncated to %d characters.\n%s...", |
192 | 5.10k | nValueBufSize - 1, pszValueBuf); |
193 | 5.10k | } |
194 | 9.53k | } |
195 | | |
196 | | // Copy the last (normally, the only) section of this line into the buffer |
197 | 46.9M | if (static_cast<int>(iEOL - iSrcBufferOffset) > |
198 | 46.9M | nValueBufSize - static_cast<int>(nValueBufLen) - 1) |
199 | 39.7k | { |
200 | 39.7k | strncpy(pszValueBuf + nValueBufLen, |
201 | 39.7k | achSrcBuffer.data() + iSrcBufferOffset, |
202 | 39.7k | nValueBufSize - static_cast<int>(nValueBufLen) - 1); |
203 | 39.7k | pszValueBuf[nValueBufSize - 1] = '\0'; |
204 | | |
205 | 39.7k | CPLDebug("DXF", "Long line truncated to %d characters.\n%s...", |
206 | 39.7k | nValueBufSize - 1, pszValueBuf); |
207 | 39.7k | } |
208 | 46.8M | else |
209 | 46.8M | { |
210 | 46.8M | strncpy(pszValueBuf + nValueBufLen, |
211 | 46.8M | achSrcBuffer.data() + iSrcBufferOffset, |
212 | 46.8M | iEOL - iSrcBufferOffset); |
213 | 46.8M | pszValueBuf[nValueBufLen + iEOL - iSrcBufferOffset] = '\0'; |
214 | 46.8M | } |
215 | | |
216 | 46.9M | iSrcBufferOffset = iEOL; |
217 | | |
218 | | // skip past newline. CR, CRLF, or LFCR |
219 | 46.9M | if ((achSrcBuffer[iSrcBufferOffset] == '\r' && |
220 | 19.8M | achSrcBuffer[iSrcBufferOffset + 1] == '\n') || |
221 | 40.7M | (achSrcBuffer[iSrcBufferOffset] == '\n' && |
222 | 27.0M | achSrcBuffer[iSrcBufferOffset + 1] == '\r')) |
223 | 9.17M | iSrcBufferOffset += 2; |
224 | 37.7M | else |
225 | 37.7M | iSrcBufferOffset += 1; |
226 | | |
227 | | /* -------------------------------------------------------------------- */ |
228 | | /* Record how big this value was, so it can be unread safely. */ |
229 | | /* -------------------------------------------------------------------- */ |
230 | 46.9M | if (bLongLine) |
231 | 9.54k | nLastValueSize = 0; |
232 | 46.9M | else |
233 | 46.9M | { |
234 | 46.9M | nLastValueSize = iSrcBufferOffset - iStartSrcBufferOffset; |
235 | 46.9M | CPLAssert(nLastValueSize > 0); |
236 | 46.9M | } |
237 | | |
238 | 46.9M | return nValueCode; |
239 | 46.9M | } |
240 | | |
241 | | int OGRDXFReaderASCII::ReadValue(char *pszValueBuf, int nValueBufSize) |
242 | 46.9M | { |
243 | 46.9M | int nValueCode; |
244 | 46.9M | while (true) |
245 | 46.9M | { |
246 | 46.9M | nValueCode = ReadValueRaw(pszValueBuf, nValueBufSize); |
247 | 46.9M | if (nValueCode == 999) |
248 | 17.8k | { |
249 | | // Skip comments |
250 | 17.8k | continue; |
251 | 17.8k | } |
252 | 46.9M | break; |
253 | 46.9M | } |
254 | 46.9M | return nValueCode; |
255 | 46.9M | } |
256 | | |
257 | | /************************************************************************/ |
258 | | /* UnreadValue() */ |
259 | | /* */ |
260 | | /* Unread the last value read, accomplished by resetting the */ |
261 | | /* read pointer. */ |
262 | | /************************************************************************/ |
263 | | |
264 | | void OGRDXFReaderASCII::UnreadValue() |
265 | | |
266 | 1.31M | { |
267 | 1.31M | if (nLastValueSize == 0) |
268 | 447 | { |
269 | 447 | CPLError(CE_Failure, CPLE_AppDefined, |
270 | 447 | "Cannot UnreadValue(), likely due to a previous long line"); |
271 | 447 | return; |
272 | 447 | } |
273 | 1.31M | CPLAssert(iSrcBufferOffset >= nLastValueSize); |
274 | 1.31M | CPLAssert(nLastValueSize > 0); |
275 | | |
276 | 1.31M | iSrcBufferOffset -= nLastValueSize; |
277 | 1.31M | nLineNumber -= 2; |
278 | 1.31M | nLastValueSize = 0; |
279 | 1.31M | } |
280 | | |
281 | | int OGRDXFReaderBinary::ReadValue(char *pszValueBuffer, int nValueBufferSize) |
282 | 142 | { |
283 | 142 | if (VSIFTellL(fp) == 0) |
284 | 0 | { |
285 | 0 | VSIFSeekL( |
286 | 0 | fp, static_cast<vsi_l_offset>(AUTOCAD_BINARY_DXF_SIGNATURE.size()), |
287 | 0 | SEEK_SET); |
288 | 0 | } |
289 | 142 | if (VSIFTellL(fp) == AUTOCAD_BINARY_DXF_SIGNATURE.size()) |
290 | 129 | { |
291 | | // Detect if the file is AutoCAD Binary r12 |
292 | 129 | GByte abyZeroSection[8] = {0}; |
293 | 129 | if (VSIFReadL(abyZeroSection, 1, sizeof(abyZeroSection), fp) != |
294 | 129 | sizeof(abyZeroSection)) |
295 | 2 | { |
296 | 2 | CPLError(CE_Failure, CPLE_FileIO, "File too short"); |
297 | 2 | return -1; |
298 | 2 | } |
299 | 127 | m_bIsR12 = memcmp(abyZeroSection, "\x00SECTION", 8) == 0; |
300 | 127 | VSIFSeekL( |
301 | 127 | fp, static_cast<vsi_l_offset>(AUTOCAD_BINARY_DXF_SIGNATURE.size()), |
302 | 127 | SEEK_SET); |
303 | 127 | } |
304 | | |
305 | 140 | m_nPrevPos = VSIFTellL(fp); |
306 | | |
307 | 140 | uint16_t nCode = 0; |
308 | 140 | bool bReadCodeUINT16 = true; |
309 | 140 | if (m_bIsR12) |
310 | 22 | { |
311 | 22 | GByte nCodeByte = 0; |
312 | 22 | if (VSIFReadL(&nCodeByte, 1, 1, fp) != 1) |
313 | 0 | { |
314 | 0 | CPLError(CE_Failure, CPLE_FileIO, "File too short"); |
315 | 0 | return -1; |
316 | 0 | } |
317 | 22 | bReadCodeUINT16 = (nCodeByte == 255); |
318 | 22 | if (!bReadCodeUINT16) |
319 | 21 | nCode = nCodeByte; |
320 | 22 | } |
321 | 140 | if (bReadCodeUINT16) |
322 | 119 | { |
323 | 119 | if (VSIFReadL(&nCode, 1, sizeof(uint16_t), fp) != sizeof(uint16_t)) |
324 | 3 | { |
325 | 3 | CPLError(CE_Failure, CPLE_FileIO, "File too short"); |
326 | 3 | return -1; |
327 | 3 | } |
328 | 116 | CPL_LSBPTR16(&nCode); |
329 | 116 | } |
330 | | |
331 | | // Credits to ezdxf for the ranges |
332 | 137 | bool bRet = true; |
333 | 137 | if (nCode >= 290 && nCode < 300) |
334 | 1 | { |
335 | 1 | GByte nVal = 0; |
336 | 1 | bRet = VSIFReadL(&nVal, 1, sizeof(nVal), fp) == 1; |
337 | 1 | CPLsnprintf(pszValueBuffer, nValueBufferSize, "%d", nVal); |
338 | | // CPLDebug("DXF", "Read %d: %d", nCode, nVal); |
339 | 1 | } |
340 | 136 | else if ((nCode >= 60 && nCode < 80) || (nCode >= 170 && nCode < 180) || |
341 | 132 | (nCode >= 270 && nCode < 290) || (nCode >= 370 && nCode < 390) || |
342 | 130 | (nCode >= 400 && nCode < 410) || (nCode >= 1060 && nCode < 1071)) |
343 | 8 | { |
344 | 8 | int16_t nVal = 0; |
345 | 8 | bRet = VSIFReadL(&nVal, 1, sizeof(nVal), fp) == sizeof(nVal); |
346 | 8 | CPL_LSBPTR16(&nVal); |
347 | 8 | CPLsnprintf(pszValueBuffer, nValueBufferSize, "%d", nVal); |
348 | | // CPLDebug("DXF", "Read %d: %d", nCode, nVal); |
349 | 8 | } |
350 | 128 | else if ((nCode >= 90 && nCode < 100) || (nCode >= 420 && nCode < 430) || |
351 | 125 | (nCode >= 440 && nCode < 460) || (nCode == 1071)) |
352 | 5 | { |
353 | 5 | int32_t nVal = 0; |
354 | 5 | bRet = VSIFReadL(&nVal, 1, sizeof(nVal), fp) == sizeof(nVal); |
355 | 5 | CPL_LSBPTR32(&nVal); |
356 | 5 | CPLsnprintf(pszValueBuffer, nValueBufferSize, "%d", nVal); |
357 | | // CPLDebug("DXF", "Read %d: %d", nCode, nVal); |
358 | 5 | } |
359 | 123 | else if (nCode >= 160 && nCode < 170) |
360 | 2 | { |
361 | 2 | int64_t nVal = 0; |
362 | 2 | bRet = VSIFReadL(&nVal, 1, sizeof(nVal), fp) == sizeof(nVal); |
363 | 2 | CPL_LSBPTR64(&nVal); |
364 | 2 | CPLsnprintf(pszValueBuffer, nValueBufferSize, "%" PRId64, nVal); |
365 | | // CPLDebug("DXF", "Read %d: %" PRId64, nCode, nVal); |
366 | 2 | } |
367 | 121 | else if ((nCode >= 10 && nCode < 60) || (nCode >= 110 && nCode < 150) || |
368 | 112 | (nCode >= 210 && nCode < 240) || (nCode >= 460 && nCode < 470) || |
369 | 110 | (nCode >= 1010 && nCode < 1060)) |
370 | 12 | { |
371 | 12 | double dfVal = 0; |
372 | 12 | bRet = VSIFReadL(&dfVal, 1, sizeof(dfVal), fp) == sizeof(dfVal); |
373 | 12 | CPL_LSBPTR64(&dfVal); |
374 | 12 | CPLsnprintf(pszValueBuffer, nValueBufferSize, "%.17g", dfVal); |
375 | | // CPLDebug("DXF", "Read %d: %g", nCode, dfVal); |
376 | 12 | } |
377 | 109 | else if ((nCode >= 310 && nCode < 320) || nCode == 1004) |
378 | 14 | { |
379 | | // Binary |
380 | 14 | GByte nChunkLength = 0; |
381 | 14 | bRet = VSIFReadL(&nChunkLength, 1, sizeof(nChunkLength), fp) == |
382 | 14 | sizeof(nChunkLength); |
383 | 14 | std::vector<GByte> abyData(nChunkLength); |
384 | 14 | bRet &= VSIFReadL(abyData.data(), 1, nChunkLength, fp) == nChunkLength; |
385 | 14 | if (2 * nChunkLength + 1 > nValueBufferSize) |
386 | 6 | { |
387 | 6 | CPLError(CE_Failure, CPLE_AppDefined, |
388 | 6 | "Provided buffer too small to store string"); |
389 | 6 | return -1; |
390 | 6 | } |
391 | 93 | for (int i = 0; i < nChunkLength; ++i) |
392 | 85 | { |
393 | 85 | snprintf(pszValueBuffer + 2 * i, nValueBufferSize - 2 * i, "%02X", |
394 | 85 | abyData[i]); |
395 | 85 | } |
396 | 8 | pszValueBuffer[2 * nChunkLength] = 0; |
397 | | // CPLDebug("DXF", "Read %d: '%s'", nCode, pszValueBuffer); |
398 | 8 | } |
399 | 95 | else |
400 | 95 | { |
401 | | // Zero terminated string |
402 | 95 | bool bEOS = false; |
403 | 2.76k | for (int i = 0; bRet && i < nValueBufferSize; ++i) |
404 | 2.73k | { |
405 | 2.73k | char ch = 0; |
406 | 2.73k | bRet = VSIFReadL(&ch, 1, 1, fp) == 1; |
407 | 2.73k | pszValueBuffer[i] = ch; |
408 | 2.73k | if (ch == 0) |
409 | 72 | { |
410 | | // CPLDebug("DXF", "Read %d: '%s'", nCode, pszValueBuffer); |
411 | 72 | bEOS = true; |
412 | 72 | break; |
413 | 72 | } |
414 | 2.73k | } |
415 | 95 | if (!bEOS) |
416 | 23 | { |
417 | 23 | CPLError(CE_Failure, CPLE_AppDefined, |
418 | 23 | "Provided buffer too small to store string"); |
419 | 13.6k | while (bRet) |
420 | 13.6k | { |
421 | 13.6k | char ch = 0; |
422 | 13.6k | bRet = VSIFReadL(&ch, 1, 1, fp) == 1; |
423 | 13.6k | if (ch == 0) |
424 | 23 | { |
425 | 23 | break; |
426 | 23 | } |
427 | 13.6k | } |
428 | 23 | return -1; |
429 | 23 | } |
430 | 95 | } |
431 | | |
432 | 108 | if (!bRet) |
433 | 43 | { |
434 | 43 | CPLError(CE_Failure, CPLE_FileIO, "File too short"); |
435 | 43 | return -1; |
436 | 43 | } |
437 | 65 | return nCode; |
438 | 108 | } |
439 | | |
440 | | void OGRDXFReaderBinary::UnreadValue() |
441 | 0 | { |
442 | 0 | if (m_nPrevPos == static_cast<uint64_t>(-1)) |
443 | 0 | { |
444 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
445 | 0 | "UnreadValue() can be called just once after ReadValue()"); |
446 | 0 | } |
447 | 0 | else |
448 | 0 | { |
449 | 0 | VSIFSeekL(fp, m_nPrevPos, SEEK_SET); |
450 | 0 | m_nPrevPos = static_cast<uint64_t>(-1); |
451 | 0 | } |
452 | 0 | } |
453 | | |
454 | | void OGRDXFReaderBinary::ResetReadPointer(uint64_t nPos, int nNewLineNumber) |
455 | 0 | { |
456 | | VSIFSeekL(fp, nPos, SEEK_SET); |
457 | 0 | nLineNumber = nNewLineNumber; |
458 | 0 | } |