/src/gdal/ogr/ogr_expat.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: OGR |
4 | | * Purpose: Convenience function for parsing with Expat library |
5 | | * Author: Even Rouault, even dot rouault at spatialys.com |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #ifdef HAVE_EXPAT |
14 | | |
15 | | #include "cpl_port.h" |
16 | | #include "cpl_conv.h" |
17 | | #include "cpl_string.h" |
18 | | #include "ogr_expat.h" |
19 | | |
20 | | #include <cstddef> |
21 | | #include <cstdlib> |
22 | | |
23 | | #include "cpl_error.h" |
24 | | |
25 | | constexpr size_t OGR_EXPAT_MAX_ALLOWED_ALLOC = 10000000; |
26 | | |
27 | | static void *OGRExpatMalloc(size_t size) CPL_WARN_UNUSED_RESULT; |
28 | | static void *OGRExpatRealloc(void *ptr, size_t size) CPL_WARN_UNUSED_RESULT; |
29 | | |
30 | | /************************************************************************/ |
31 | | /* CanAlloc() */ |
32 | | /************************************************************************/ |
33 | | |
34 | | static bool CanAlloc(size_t size) |
35 | 8.41M | { |
36 | 8.41M | if (size < OGR_EXPAT_MAX_ALLOWED_ALLOC) |
37 | 8.41M | return true; |
38 | | |
39 | 0 | if (CPLTestBool(CPLGetConfigOption("OGR_EXPAT_UNLIMITED_MEM_ALLOC", "NO"))) |
40 | 0 | return true; |
41 | | |
42 | 0 | CPLError( |
43 | 0 | CE_Failure, CPLE_OutOfMemory, |
44 | 0 | "Expat tried to malloc %d bytes. File probably corrupted. " |
45 | 0 | "This may also happen in case of a very big XML comment, in which case " |
46 | 0 | "you may define the OGR_EXPAT_UNLIMITED_MEM_ALLOC configuration " |
47 | 0 | "option to YES to remove that protection.", |
48 | 0 | static_cast<int>(size)); |
49 | 0 | return false; |
50 | 0 | } |
51 | | |
52 | | /************************************************************************/ |
53 | | /* OGRExpatMalloc() */ |
54 | | /************************************************************************/ |
55 | | |
56 | | static void *OGRExpatMalloc(size_t size) |
57 | 8.19M | { |
58 | 8.19M | if (CanAlloc(size)) |
59 | 8.19M | return malloc(size); |
60 | | |
61 | 0 | return nullptr; |
62 | 8.19M | } |
63 | | |
64 | | /************************************************************************/ |
65 | | /* OGRExpatRealloc() */ |
66 | | /************************************************************************/ |
67 | | |
68 | | // Caller must replace the pointer with the returned pointer. |
69 | | static void *OGRExpatRealloc(void *ptr, size_t size) |
70 | 218k | { |
71 | 218k | if (CanAlloc(size)) |
72 | 218k | return realloc(ptr, size); |
73 | | |
74 | 0 | return nullptr; |
75 | 218k | } |
76 | | |
77 | | /************************************************************************/ |
78 | | /* FillWINDOWS1252() */ |
79 | | /************************************************************************/ |
80 | | |
81 | | static void FillWINDOWS1252(XML_Encoding *info) |
82 | 18 | { |
83 | | // Map CP1252 bytes to Unicode values. |
84 | 2.32k | for (int i = 0; i < 0x80; ++i) |
85 | 2.30k | info->map[i] = i; |
86 | | |
87 | 18 | info->map[0x80] = 0x20AC; |
88 | 18 | info->map[0x81] = -1; |
89 | 18 | info->map[0x82] = 0x201A; |
90 | 18 | info->map[0x83] = 0x0192; |
91 | 18 | info->map[0x84] = 0x201E; |
92 | 18 | info->map[0x85] = 0x2026; |
93 | 18 | info->map[0x86] = 0x2020; |
94 | 18 | info->map[0x87] = 0x2021; |
95 | 18 | info->map[0x88] = 0x02C6; |
96 | 18 | info->map[0x89] = 0x2030; |
97 | 18 | info->map[0x8A] = 0x0160; |
98 | 18 | info->map[0x8B] = 0x2039; |
99 | 18 | info->map[0x8C] = 0x0152; |
100 | 18 | info->map[0x8D] = -1; |
101 | 18 | info->map[0x8E] = 0x017D; |
102 | 18 | info->map[0x8F] = -1; |
103 | 18 | info->map[0x90] = -1; |
104 | 18 | info->map[0x91] = 0x2018; |
105 | 18 | info->map[0x92] = 0x2019; |
106 | 18 | info->map[0x93] = 0x201C; |
107 | 18 | info->map[0x94] = 0x201D; |
108 | 18 | info->map[0x95] = 0x2022; |
109 | 18 | info->map[0x96] = 0x2013; |
110 | 18 | info->map[0x97] = 0x2014; |
111 | 18 | info->map[0x98] = 0x02DC; |
112 | 18 | info->map[0x99] = 0x2122; |
113 | 18 | info->map[0x9A] = 0x0161; |
114 | 18 | info->map[0x9B] = 0x203A; |
115 | 18 | info->map[0x9C] = 0x0153; |
116 | 18 | info->map[0x9D] = -1; |
117 | 18 | info->map[0x9E] = 0x017E; |
118 | 18 | info->map[0x9F] = 0x0178; |
119 | | |
120 | 1.74k | for (int i = 0xA0; i <= 0xFF; ++i) |
121 | 1.72k | info->map[i] = i; |
122 | 18 | } |
123 | | |
124 | | /************************************************************************/ |
125 | | /* FillISO885915() */ |
126 | | /************************************************************************/ |
127 | | |
128 | | static void FillISO885915(XML_Encoding *info) |
129 | 22.9k | { |
130 | | // Map ISO-8859-15 bytes to Unicode values. |
131 | | // Generated by generate_encoding_table.c. |
132 | 3.78M | for (int i = 0x00; i < 0xA4; ++i) |
133 | 3.75M | info->map[i] = i; |
134 | 22.9k | info->map[0xA4] = 0x20AC; |
135 | 22.9k | info->map[0xA5] = 0xA5; |
136 | 22.9k | info->map[0xA6] = 0x0160; |
137 | 22.9k | info->map[0xA7] = 0xA7; |
138 | 22.9k | info->map[0xA8] = 0x0161; |
139 | 275k | for (int i = 0xA9; i < 0xB4; ++i) |
140 | 252k | info->map[i] = i; |
141 | 22.9k | info->map[0xB4] = 0x017D; |
142 | 91.6k | for (int i = 0xB5; i < 0xB8; ++i) |
143 | 68.7k | info->map[i] = i; |
144 | 22.9k | info->map[0xB8] = 0x017E; |
145 | 91.6k | for (int i = 0xB9; i < 0xBC; ++i) |
146 | 68.7k | info->map[i] = i; |
147 | 22.9k | info->map[0xBC] = 0x0152; |
148 | 22.9k | info->map[0xBD] = 0x0153; |
149 | 22.9k | info->map[0xBE] = 0x0178; |
150 | 1.51M | for (int i = 0xBF; i < 0x100; ++i) |
151 | 1.48M | info->map[i] = i; |
152 | 22.9k | } |
153 | | |
154 | | /************************************************************************/ |
155 | | /* OGRExpatUnknownEncodingHandler() */ |
156 | | /************************************************************************/ |
157 | | |
158 | | static int |
159 | | OGRExpatUnknownEncodingHandler(void * /* unused_encodingHandlerData */, |
160 | | const XML_Char *name, XML_Encoding *info) |
161 | 23.3k | { |
162 | 23.3k | if (EQUAL(name, "WINDOWS-1252")) |
163 | 18 | FillWINDOWS1252(info); |
164 | 23.3k | else if (EQUAL(name, "ISO-8859-15")) |
165 | 22.9k | FillISO885915(info); |
166 | 391 | else |
167 | 391 | { |
168 | 391 | CPLDebug("OGR", "Unhandled encoding %s", name); |
169 | 391 | return XML_STATUS_ERROR; |
170 | 391 | } |
171 | | |
172 | 22.9k | info->data = nullptr; |
173 | 22.9k | info->convert = nullptr; |
174 | 22.9k | info->release = nullptr; |
175 | | |
176 | 22.9k | return XML_STATUS_OK; |
177 | 23.3k | } |
178 | | |
179 | | /************************************************************************/ |
180 | | /* OGRCreateExpatXMLParser() */ |
181 | | /************************************************************************/ |
182 | | |
183 | | XML_Parser OGRCreateExpatXMLParser() |
184 | 79.9k | { |
185 | 79.9k | XML_Memory_Handling_Suite memsuite; |
186 | 79.9k | memsuite.malloc_fcn = OGRExpatMalloc; |
187 | 79.9k | memsuite.realloc_fcn = OGRExpatRealloc; |
188 | 79.9k | memsuite.free_fcn = free; |
189 | 79.9k | XML_Parser hParser = XML_ParserCreate_MM(nullptr, &memsuite, nullptr); |
190 | | |
191 | 79.9k | XML_SetUnknownEncodingHandler(hParser, OGRExpatUnknownEncodingHandler, |
192 | 79.9k | nullptr); |
193 | | |
194 | 79.9k | return hParser; |
195 | 79.9k | } |
196 | | |
197 | | #endif // HAVE_EXPAT |