/src/exiv2/xmpsdk/src/XMPMeta-Parse.cpp
Line | Count | Source |
1 | | // ================================================================================================= |
2 | | // Copyright 2002-2008 Adobe Systems Incorporated |
3 | | // All Rights Reserved. |
4 | | // |
5 | | // NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms |
6 | | // of the Adobe license agreement accompanying it. |
7 | | // |
8 | | // Adobe patent application tracking #P435, entitled 'Unique markers to simplify embedding data of |
9 | | // one format in a file with a different format', inventors: Sean Parent, Greg Gilley. |
10 | | // ================================================================================================= |
11 | | |
12 | | #include "XMP_Environment.h" // ! This must be the first include! |
13 | | #include "XMPCore_Impl.hpp" |
14 | | |
15 | | #include "XMPMeta.hpp" |
16 | | #include "XMPUtils.hpp" |
17 | | |
18 | | #include "UnicodeInlines.incl_cpp" |
19 | | #include "UnicodeConversions.hpp" |
20 | | #include "ExpatAdapter.hpp" |
21 | | |
22 | | #if XMP_DebugBuild |
23 | | #include <iostream> |
24 | | #endif |
25 | | |
26 | | using namespace std; |
27 | | |
28 | | #if XMP_WinBuild |
29 | | #ifdef _MSC_VER |
30 | | #pragma warning ( disable : 4533 ) // initialization of '...' is skipped by 'goto ...' |
31 | | #pragma warning ( disable : 4702 ) // unreachable code |
32 | | #pragma warning ( disable : 4800 ) // forcing value to bool 'true' or 'false' (performance warning) |
33 | | #pragma warning ( disable : 4996 ) // '...' was declared deprecated |
34 | | #endif |
35 | | #endif |
36 | | |
37 | | |
38 | | // *** Use the XMP_PropIsXyz (Schema, Simple, Struct, Array, ...) macros |
39 | | // *** Add debug codegen checks, e.g. that typical masking operations really work |
40 | | // *** Change all uses of strcmp and strncmp to XMP_LitMatch and XMP_LitNMatch |
41 | | |
42 | | |
43 | | // ================================================================================================= |
44 | | // Local Types and Constants |
45 | | // ========================= |
46 | | |
47 | | |
48 | | // ================================================================================================= |
49 | | // Static Variables |
50 | | // ================ |
51 | | |
52 | | #ifndef Trace_ParsingHackery |
53 | | #define Trace_ParsingHackery 0 |
54 | | #endif |
55 | | |
56 | | static const char * kReplaceLatin1[128] = |
57 | | { |
58 | | |
59 | | // The 0x80..0x9F range is undefined in Latin-1, but is defined in Windows code page 1252. |
60 | | // The bytes 0x81, 0x8D, 0x8F, 0x90, and 0x9D are formally undefined by Windows 1252, but |
61 | | // their conversion API maps them to U+0081, etc. These are in XML's RestrictedChar set, so |
62 | | // we map them to a space. |
63 | | |
64 | | "\xE2\x82\xAC", " ", "\xE2\x80\x9A", "\xC6\x92", // 0x80 .. 0x83 |
65 | | "\xE2\x80\x9E", "\xE2\x80\xA6", "\xE2\x80\xA0", "\xE2\x80\xA1", // 0x84 .. 0x87 |
66 | | "\xCB\x86", "\xE2\x80\xB0", "\xC5\xA0", "\xE2\x80\xB9", // 0x88 .. 0x8B |
67 | | "\xC5\x92", " ", "\xC5\xBD", " ", // 0x8C .. 0x8F |
68 | | |
69 | | " ", "\xE2\x80\x98", "\xE2\x80\x99", "\xE2\x80\x9C", // 0x90 .. 0x93 |
70 | | "\xE2\x80\x9D", "\xE2\x80\xA2", "\xE2\x80\x93", "\xE2\x80\x94", // 0x94 .. 0x97 |
71 | | "\xCB\x9C", "\xE2\x84\xA2", "\xC5\xA1", "\xE2\x80\xBA", // 0x98 .. 0x9B |
72 | | "\xC5\x93", " ", "\xC5\xBE", "\xC5\xB8", // 0x9C .. 0x9F |
73 | | |
74 | | // These are the UTF-8 forms of the official Latin-1 characters in the range 0xA0..0xFF. Not |
75 | | // too surprisingly these map to U+00A0, etc. Which is the Unicode Latin Supplement range. |
76 | | |
77 | | "\xC2\xA0", "\xC2\xA1", "\xC2\xA2", "\xC2\xA3", "\xC2\xA4", "\xC2\xA5", "\xC2\xA6", "\xC2\xA7", // 0xA0 .. 0xA7 |
78 | | "\xC2\xA8", "\xC2\xA9", "\xC2\xAA", "\xC2\xAB", "\xC2\xAC", "\xC2\xAD", "\xC2\xAE", "\xC2\xAF", // 0xA8 .. 0xAF |
79 | | |
80 | | "\xC2\xB0", "\xC2\xB1", "\xC2\xB2", "\xC2\xB3", "\xC2\xB4", "\xC2\xB5", "\xC2\xB6", "\xC2\xB7", // 0xB0 .. 0xB7 |
81 | | "\xC2\xB8", "\xC2\xB9", "\xC2\xBA", "\xC2\xBB", "\xC2\xBC", "\xC2\xBD", "\xC2\xBE", "\xC2\xBF", // 0xB8 .. 0xBF |
82 | | |
83 | | "\xC3\x80", "\xC3\x81", "\xC3\x82", "\xC3\x83", "\xC3\x84", "\xC3\x85", "\xC3\x86", "\xC3\x87", // 0xC0 .. 0xC7 |
84 | | "\xC3\x88", "\xC3\x89", "\xC3\x8A", "\xC3\x8B", "\xC3\x8C", "\xC3\x8D", "\xC3\x8E", "\xC3\x8F", // 0xC8 .. 0xCF |
85 | | |
86 | | "\xC3\x90", "\xC3\x91", "\xC3\x92", "\xC3\x93", "\xC3\x94", "\xC3\x95", "\xC3\x96", "\xC3\x97", // 0xD0 .. 0xD7 |
87 | | "\xC3\x98", "\xC3\x99", "\xC3\x9A", "\xC3\x9B", "\xC3\x9C", "\xC3\x9D", "\xC3\x9E", "\xC3\x9F", // 0xD8 .. 0xDF |
88 | | |
89 | | "\xC3\xA0", "\xC3\xA1", "\xC3\xA2", "\xC3\xA3", "\xC3\xA4", "\xC3\xA5", "\xC3\xA6", "\xC3\xA7", // 0xE0 .. 0xE7 |
90 | | "\xC3\xA8", "\xC3\xA9", "\xC3\xAA", "\xC3\xAB", "\xC3\xAC", "\xC3\xAD", "\xC3\xAE", "\xC3\xAF", // 0xE8 .. 0xEF |
91 | | |
92 | | "\xC3\xB0", "\xC3\xB1", "\xC3\xB2", "\xC3\xB3", "\xC3\xB4", "\xC3\xB5", "\xC3\xB6", "\xC3\xB7", // 0xF0 .. 0xF7 |
93 | | "\xC3\xB8", "\xC3\xB9", "\xC3\xBA", "\xC3\xBB", "\xC3\xBC", "\xC3\xBD", "\xC3\xBE", "\xC3\xBF", // 0xF8 .. 0xFF |
94 | | |
95 | | }; |
96 | | |
97 | | |
98 | | // ================================================================================================= |
99 | | // Local Utilities |
100 | | // =============== |
101 | | |
102 | | |
103 | | #define IsHexDigit(ch) ( (('0' <= (ch)) && ((ch) <= '9')) || (('A' <= (ch)) && ((ch) <= 'F')) ) |
104 | | #define HexDigitValue(ch) ( (((ch) - '0') < 10) ? ((ch) - '0') : ((ch) - 'A' + 10) ) |
105 | | |
106 | | |
107 | | // ------------------------------------------------------------------------------------------------- |
108 | | // PickBestRoot |
109 | | // ------------ |
110 | | static const XML_Node * PickBestRoot ( const XML_Node & xmlParent, XMP_OptionBits options ) |
111 | 2.05k | { |
112 | | |
113 | | // Look among this parent's content for x:xmpmeta. The recursion for x:xmpmeta is broader than |
114 | | // the strictly defined choice, but gives us smaller code. |
115 | 10.2k | for ( size_t childNum = 0, childLim = xmlParent.content.size(); childNum < childLim; ++childNum ) { |
116 | 8.36k | const XML_Node * childNode = xmlParent.content[childNum]; |
117 | 8.36k | if ( childNode->kind != kElemNode ) continue; |
118 | 529 | if ( (childNode->name == "x:xmpmeta") || (childNode->name == "x:xapmeta") ) return PickBestRoot ( *childNode, 0 ); |
119 | 529 | } |
120 | | // Look among this parent's content for a bare rdf:RDF if that is allowed. |
121 | 1.90k | if ( ! (options & kXMP_RequireXMPMeta) ) { |
122 | 8.98k | for ( size_t childNum = 0, childLim = xmlParent.content.size(); childNum < childLim; ++childNum ) { |
123 | 7.17k | const XML_Node * childNode = xmlParent.content[childNum]; |
124 | 7.17k | if ( childNode->kind != kElemNode ) continue; |
125 | 305 | if ( childNode->name == "rdf:RDF" ) return childNode; |
126 | 305 | } |
127 | 1.90k | } |
128 | | |
129 | | // Recurse into the content. |
130 | 3.58k | for ( size_t childNum = 0, childLim = xmlParent.content.size(); childNum < childLim; ++childNum ) { |
131 | 1.79k | const XML_Node * foundRoot = PickBestRoot ( *xmlParent.content[childNum], options ); |
132 | 1.79k | if ( foundRoot != 0 ) return foundRoot; |
133 | 1.79k | } |
134 | | |
135 | 1.78k | return 0; |
136 | | |
137 | 1.80k | } // PickBestRoot |
138 | | |
139 | | // ------------------------------------------------------------------------------------------------- |
140 | | // FindRootNode |
141 | | // ------------ |
142 | | // |
143 | | // Find the XML node that is the root of the XMP data tree. Generally this will be an outer node, |
144 | | // but it could be anywhere if a general XML document is parsed (e.g. SVG). The XML parser counted |
145 | | // all possible root nodes, and kept a pointer to the last one. If there is more than one possible |
146 | | // root use PickBestRoot to choose among them. |
147 | | // |
148 | | // If there is a root node, try to extract the version of the previous XMP toolkit. |
149 | | |
150 | | static const XML_Node * FindRootNode ( XMPMeta * thiz, const XMLParserAdapter & xmlParser, XMP_OptionBits options ) |
151 | 5.88k | { |
152 | 5.88k | const XML_Node * rootNode = xmlParser.rootNode; |
153 | | |
154 | 5.88k | if ( xmlParser.rootCount > 1 ) rootNode = PickBestRoot ( xmlParser.tree, options ); |
155 | 5.88k | if ( rootNode == 0 ) return 0; |
156 | | |
157 | | // We have a root node. Try to extract previous toolkit version number. |
158 | | |
159 | 5.44k | XMP_StringPtr verStr = ""; |
160 | | |
161 | 5.44k | XMP_Assert ( rootNode->name == "rdf:RDF" ); |
162 | | |
163 | 5.44k | if ( (options & kXMP_RequireXMPMeta) && |
164 | 0 | ((rootNode->parent == 0) || |
165 | 0 | ((rootNode->parent->name != "x:xmpmeta") && (rootNode->parent->name != "x:xapmeta"))) ) return 0; |
166 | | |
167 | 6.29k | for ( size_t attrNum = 0, attrLim = rootNode->parent->attrs.size(); attrNum < attrLim; ++attrNum ) { |
168 | 1.89k | const XML_Node * currAttr =rootNode->parent->attrs[attrNum]; |
169 | 1.89k | if ( (currAttr->name == "x:xmptk") || (currAttr->name == "x:xaptk") ) { |
170 | 1.05k | verStr = currAttr->value.c_str(); |
171 | 1.05k | break; |
172 | 1.05k | } |
173 | 1.89k | } |
174 | | |
175 | | // Decode the version number into MMmmuubbb digits. If any part is too big, peg it at 99 or 999. |
176 | | |
177 | 5.44k | unsigned long part; |
178 | 33.1k | while ( (*verStr != 0) && ((*verStr < '0') || (*verStr > '9')) ) ++verStr; |
179 | | |
180 | 5.44k | part = 0; |
181 | 8.69k | while ( (*verStr != 0) && ('0' <= *verStr) && (*verStr <= '9') ) { |
182 | 3.24k | part = (part * 10) + (*verStr - '0'); |
183 | 3.24k | ++verStr; |
184 | 3.24k | } |
185 | 5.44k | if ( part > 99 ) part = 99; |
186 | 5.44k | thiz->prevTkVer = part * 100*100*1000; |
187 | | |
188 | 5.44k | part = 0; |
189 | 5.44k | if ( *verStr == '.' ) ++verStr; |
190 | 7.53k | while ( (*verStr != 0) && ('0' <= *verStr) && (*verStr <= '9') ) { |
191 | 2.08k | part = (part * 10) + (*verStr - '0'); |
192 | 2.08k | ++verStr; |
193 | 2.08k | } |
194 | 5.44k | if ( part > 99 ) part = 99; |
195 | 5.44k | thiz->prevTkVer += part * 100*1000; |
196 | | |
197 | 5.44k | part = 0; |
198 | 5.44k | if ( *verStr == '.' ) ++verStr; |
199 | 7.73k | while ( (*verStr != 0) && ('0' <= *verStr) && (*verStr <= '9') ) { |
200 | 2.28k | part = (part * 10) + (*verStr - '0'); |
201 | 2.28k | ++verStr; |
202 | 2.28k | } |
203 | 5.44k | if ( part > 99 ) part = 99; |
204 | 5.44k | thiz->prevTkVer += part * 1000; |
205 | | |
206 | 5.44k | part = 0; |
207 | 5.44k | if ( *verStr == '-' ) ++verStr; |
208 | 8.20k | while ( (*verStr != 0) && ('0' <= *verStr) && (*verStr <= '9') ) { |
209 | 2.75k | part = (part * 10) + (*verStr - '0'); |
210 | 2.75k | ++verStr; |
211 | 2.75k | } |
212 | 5.44k | if ( part > 999 ) part = 999; |
213 | 5.44k | thiz->prevTkVer += part; |
214 | | |
215 | 5.44k | return rootNode; |
216 | | |
217 | 5.44k | } // FindRootNode |
218 | | |
219 | | // ------------------------------------------------------------------------------------------------- |
220 | | // NormalizeDCArrays |
221 | | // ----------------- |
222 | | // |
223 | | // Undo the denormalization performed by the XMP used in Acrobat 5. If a Dublin Core array had only |
224 | | // one item, it was serialized as a simple property. The xml:lang attribute was dropped from an |
225 | | // alt-text item if the language was x-default. |
226 | | |
227 | | // *** This depends on the dc: namespace prefix. |
228 | | |
229 | | static void |
230 | | NormalizeDCArrays ( XMP_Node * xmpTree ) |
231 | 4.04k | { |
232 | 4.04k | XMP_Node * dcSchema = FindSchemaNode ( xmpTree, kXMP_NS_DC, kXMP_ExistingOnly ); |
233 | 4.04k | if ( dcSchema == 0 ) return; |
234 | | |
235 | 605 | for ( size_t propNum = 0, propLimit = dcSchema->children.size(); propNum < propLimit; ++propNum ) { |
236 | 437 | XMP_Node * currProp = dcSchema->children[propNum]; |
237 | 437 | XMP_OptionBits arrayForm = 0; |
238 | | |
239 | 437 | if ( ! XMP_PropIsSimple ( currProp->options ) ) continue; // Nothing to do if not simple. |
240 | | |
241 | 332 | if ( (currProp->name == "dc:creator" ) || // See if it is supposed to be an array. |
242 | 308 | (currProp->name == "dc:date" ) ) { // *** Think about an array of char* and a loop. |
243 | 24 | arrayForm = kXMP_PropArrayIsOrdered; |
244 | 308 | } else if ( |
245 | 308 | (currProp->name == "dc:description" ) || |
246 | 305 | (currProp->name == "dc:rights" ) || |
247 | 305 | (currProp->name == "dc:title" ) ) { |
248 | 3 | arrayForm = kXMP_PropArrayIsAltText; |
249 | 305 | } else if ( |
250 | 305 | (currProp->name == "dc:contributor" ) || |
251 | 303 | (currProp->name == "dc:language" ) || |
252 | 299 | (currProp->name == "dc:publisher" ) || |
253 | 286 | (currProp->name == "dc:relation" ) || |
254 | 286 | (currProp->name == "dc:subject" ) || |
255 | 283 | (currProp->name == "dc:type" ) ) { |
256 | 22 | arrayForm = kXMP_PropValueIsArray; |
257 | 22 | } |
258 | 332 | if ( arrayForm == 0 ) continue; // Nothing to do if it isn't supposed to be an array. |
259 | | |
260 | 49 | arrayForm = VerifySetOptions ( arrayForm, 0 ); // Set the implicit array bits. |
261 | 49 | XMP_Node * newArray = new XMP_Node ( dcSchema, currProp->name.c_str(), arrayForm ); |
262 | 49 | dcSchema->children[propNum] = newArray; |
263 | 49 | newArray->children.push_back ( currProp ); |
264 | 49 | currProp->parent = newArray; |
265 | 49 | currProp->name = kXMP_ArrayItemName; |
266 | | |
267 | 49 | if ( XMP_ArrayIsAltText ( arrayForm ) && (! (currProp->options & kXMP_PropHasLang)) ) { |
268 | 3 | XMP_Node * newLang = new XMP_Node ( currProp, "xml:lang", "x-default", kXMP_PropIsQualifier ); |
269 | 3 | currProp->options |= (kXMP_PropHasQualifiers | kXMP_PropHasLang); |
270 | 3 | if ( currProp->qualifiers.empty() ) { // *** Need a util? |
271 | 3 | currProp->qualifiers.push_back ( newLang ); |
272 | 3 | } else { |
273 | 0 | currProp->qualifiers.insert ( currProp->qualifiers.begin(), newLang ); |
274 | 0 | } |
275 | 3 | } |
276 | | |
277 | 49 | } |
278 | | |
279 | 168 | } // NormalizeDCArrays |
280 | | |
281 | | |
282 | | // ------------------------------------------------------------------------------------------------- |
283 | | // CompareAliasedSubtrees |
284 | | // ---------------------- |
285 | | |
286 | | // *** Change to do some alias-specific setup, then use CompareSubtrees. One special case for |
287 | | // *** aliases is a simple to x-default alias, the options and qualifiers obviously differ. |
288 | | |
289 | | static void |
290 | | CompareAliasedSubtrees ( XMP_Node * aliasNode, XMP_Node * baseNode, bool outerCall = true ) |
291 | 0 | { |
292 | | // ! The outermost call is special. The names almost certainly differ. The qualifiers (and |
293 | | // ! hence options) will differ for an alias to the x-default item of a langAlt array. |
294 | 0 | if ( (aliasNode->value != baseNode->value) || |
295 | 0 | (aliasNode->children.size() != baseNode->children.size()) ) { |
296 | 0 | XMP_Throw ( "Mismatch between alias and base nodes", kXMPErr_BadXMP ); |
297 | 0 | } |
298 | 0 | if ( ! outerCall ) { |
299 | 0 | if ( (aliasNode->name != baseNode->name) || |
300 | 0 | (aliasNode->options != baseNode->options) || |
301 | 0 | (aliasNode->qualifiers.size() != baseNode->qualifiers.size()) ) { |
302 | 0 | XMP_Throw ( "Mismatch between alias and base nodes", kXMPErr_BadXMP ); |
303 | 0 | } |
304 | 0 | } |
305 | | |
306 | 0 | for ( size_t childNum = 0, childLim = aliasNode->children.size(); childNum < childLim; ++childNum ) { |
307 | 0 | XMP_Node * aliasChild = aliasNode->children[childNum]; |
308 | 0 | XMP_Node * baseChild = baseNode->children[childNum]; |
309 | 0 | CompareAliasedSubtrees ( aliasChild, baseChild, false ); |
310 | 0 | } |
311 | | |
312 | 0 | for ( size_t qualNum = 0, qualLim = aliasNode->qualifiers.size(); qualNum < qualLim; ++qualNum ) { |
313 | 0 | XMP_Node * aliasQual = aliasNode->qualifiers[qualNum]; |
314 | 0 | XMP_Node * baseQual = baseNode->qualifiers[qualNum]; |
315 | 0 | CompareAliasedSubtrees ( aliasQual, baseQual, false ); |
316 | 0 | } |
317 | | |
318 | 0 | } // CompareAliasedSubtrees |
319 | | |
320 | | |
321 | | // ------------------------------------------------------------------------------------------------- |
322 | | // TransplantArrayItemAlias |
323 | | // ------------------------ |
324 | | |
325 | | static void |
326 | | TransplantArrayItemAlias ( XMP_Node * oldParent, size_t oldNum, XMP_Node * newParent ) |
327 | 0 | { |
328 | 0 | XMP_Node * childNode = oldParent->children[oldNum]; |
329 | |
|
330 | 0 | if ( newParent->options & kXMP_PropArrayIsAltText ) { |
331 | 0 | if ( childNode->options & kXMP_PropHasLang ) { |
332 | 0 | XMP_Throw ( "Alias to x-default already has a language qualifier", kXMPErr_BadXMP ); // *** Allow x-default. |
333 | 0 | } |
334 | 0 | childNode->options |= (kXMP_PropHasQualifiers | kXMP_PropHasLang); |
335 | 0 | XMP_Node * langQual = new XMP_Node ( childNode, "xml:lang", "x-default", kXMP_PropIsQualifier ); // *** AddLangQual util? |
336 | 0 | if ( childNode->qualifiers.empty() ) { |
337 | 0 | childNode->qualifiers.push_back ( langQual ); |
338 | 0 | } else { |
339 | 0 | childNode->qualifiers.insert ( childNode->qualifiers.begin(), langQual ); |
340 | 0 | } |
341 | 0 | } |
342 | | |
343 | 0 | oldParent->children.erase ( oldParent->children.begin() + oldNum ); |
344 | 0 | childNode->name = kXMP_ArrayItemName; |
345 | 0 | childNode->parent = newParent; |
346 | 0 | if ( newParent->children.empty() ) { |
347 | 0 | newParent->children.push_back ( childNode ); |
348 | 0 | } else { |
349 | 0 | newParent->children.insert ( newParent->children.begin(), childNode ); |
350 | 0 | } |
351 | |
|
352 | 0 | } // TransplantArrayItemAlias |
353 | | |
354 | | |
355 | | // ------------------------------------------------------------------------------------------------- |
356 | | // TransplantNamedAlias |
357 | | // -------------------- |
358 | | |
359 | | static void |
360 | | TransplantNamedAlias ( XMP_Node * oldParent, size_t oldNum, XMP_Node * newParent, XMP_VarString & newName ) |
361 | 0 | { |
362 | 0 | XMP_Node * childNode = oldParent->children[oldNum]; |
363 | |
|
364 | 0 | oldParent->children.erase ( oldParent->children.begin() + oldNum ); |
365 | 0 | childNode->name = newName; |
366 | 0 | childNode->parent = newParent; |
367 | 0 | newParent->children.push_back ( childNode ); |
368 | |
|
369 | 0 | } // TransplantNamedAlias |
370 | | |
371 | | |
372 | | // ------------------------------------------------------------------------------------------------- |
373 | | // MoveExplicitAliases |
374 | | // ------------------- |
375 | | |
376 | | static void |
377 | | MoveExplicitAliases ( XMP_Node * tree, XMP_OptionBits parseOptions ) |
378 | 0 | { |
379 | 0 | tree->options ^= kXMP_PropHasAliases; |
380 | 0 | const bool strictAliasing = ((parseOptions & kXMP_StrictAliasing) != 0); |
381 | | |
382 | | // Visit all of the top level nodes looking for aliases. If there is no base, transplant the |
383 | | // alias subtree. If there is a base and strict aliasing is on, make sure the alias and base |
384 | | // subtrees match. |
385 | | |
386 | | // ! Use "while" loops not "for" loops since both the schema and property loops can remove the |
387 | | // ! current item from the vector being traversed. And don't increment the counter for a delete. |
388 | | |
389 | 0 | size_t schemaNum = 0; |
390 | 0 | while ( schemaNum < tree->children.size() ) { |
391 | 0 | XMP_Node * currSchema = tree->children[schemaNum]; |
392 | | |
393 | 0 | size_t propNum = 0; |
394 | 0 | while ( propNum < currSchema->children.size() ) { |
395 | 0 | XMP_Node * currProp = currSchema->children[propNum]; |
396 | 0 | if ( ! (currProp->options & kXMP_PropIsAlias) ) { |
397 | 0 | ++propNum; |
398 | 0 | continue; |
399 | 0 | } |
400 | 0 | currProp->options ^= kXMP_PropIsAlias; |
401 | | |
402 | | // Find the base path, look for the base schema and root node. |
403 | |
|
404 | 0 | XMP_AliasMapPos aliasPos = sRegisteredAliasMap->find ( currProp->name ); |
405 | 0 | XMP_Assert ( aliasPos != sRegisteredAliasMap->end() ); |
406 | 0 | XMP_ExpandedXPath & basePath = aliasPos->second; |
407 | 0 | XMP_OptionBits arrayOptions = (basePath[kRootPropStep].options & kXMP_PropArrayFormMask); |
408 | |
|
409 | 0 | XMP_Node * baseSchema = FindSchemaNode ( tree, basePath[kSchemaStep].step.c_str(), kXMP_CreateNodes ); |
410 | 0 | if ( baseSchema->options & kXMP_NewImplicitNode ) baseSchema->options ^= kXMP_NewImplicitNode; |
411 | 0 | XMP_Node * baseNode = FindChildNode ( baseSchema, basePath[kRootPropStep].step.c_str(), kXMP_ExistingOnly ); |
412 | |
|
413 | 0 | if ( baseNode == 0 ) { |
414 | | |
415 | 0 | if ( basePath.size() == 2 ) { |
416 | | // A top-to-top alias, transplant the property. |
417 | 0 | TransplantNamedAlias ( currSchema, propNum, baseSchema, basePath[kRootPropStep].step ); |
418 | 0 | } else { |
419 | | // An alias to an array item, create the array and transplant the property. |
420 | 0 | baseNode = new XMP_Node ( baseSchema, basePath[kRootPropStep].step.c_str(), arrayOptions ); |
421 | 0 | baseSchema->children.push_back ( baseNode ); |
422 | 0 | TransplantArrayItemAlias ( currSchema, propNum, baseNode ); |
423 | 0 | } |
424 | | |
425 | 0 | } else if ( basePath.size() == 2 ) { |
426 | | |
427 | | // The base node does exist and this is a top-to-top alias. Check for conflicts if |
428 | | // strict aliasing is on. Remove and delete the alias subtree. |
429 | 0 | if ( strictAliasing ) CompareAliasedSubtrees ( currProp, baseNode ); |
430 | 0 | currSchema->children.erase ( currSchema->children.begin() + propNum ); |
431 | 0 | delete currProp; |
432 | | |
433 | 0 | } else { |
434 | | |
435 | | // This is an alias to an array item and the array exists. Look for the aliased item. |
436 | | // Then transplant or check & delete as appropriate. |
437 | | |
438 | 0 | XMP_Node * itemNode = 0; |
439 | 0 | if ( arrayOptions & kXMP_PropArrayIsAltText ) { |
440 | 0 | XMP_Index xdIndex = LookupLangItem ( baseNode, *xdefaultName ); |
441 | 0 | if ( xdIndex != -1 ) itemNode = baseNode->children[xdIndex]; |
442 | 0 | } else if ( ! baseNode->children.empty() ) { |
443 | 0 | itemNode = baseNode->children[0]; |
444 | 0 | } |
445 | | |
446 | 0 | if ( itemNode == 0 ) { |
447 | 0 | TransplantArrayItemAlias ( currSchema, propNum, baseNode ); |
448 | 0 | } else { |
449 | 0 | if ( strictAliasing ) CompareAliasedSubtrees ( currProp, itemNode ); |
450 | 0 | currSchema->children.erase ( currSchema->children.begin() + propNum ); |
451 | 0 | delete currProp; |
452 | 0 | } |
453 | |
|
454 | 0 | } |
455 | |
|
456 | 0 | } // Property loop |
457 | | |
458 | | // Increment the counter or remove an empty schema node. |
459 | 0 | if ( currSchema->children.size() > 0 ) { |
460 | 0 | ++schemaNum; |
461 | 0 | } else { |
462 | 0 | delete tree->children[schemaNum]; // ! Delete the schema node itself. |
463 | 0 | tree->children.erase ( tree->children.begin() + schemaNum ); |
464 | 0 | } |
465 | | |
466 | 0 | } // Schema loop |
467 | | |
468 | 0 | } // MoveExplicitAliases |
469 | | |
470 | | |
471 | | // ------------------------------------------------------------------------------------------------- |
472 | | // FixGPSTimeStamp |
473 | | // --------------- |
474 | | |
475 | | static void |
476 | | FixGPSTimeStamp ( XMP_Node * exifSchema, XMP_Node * gpsDateTime ) |
477 | 0 | { |
478 | 0 | XMP_DateTime binGPSStamp; |
479 | 0 | try { |
480 | 0 | XMPUtils::ConvertToDate ( gpsDateTime->value.c_str(), &binGPSStamp ); |
481 | 0 | } catch ( ... ) { |
482 | 0 | return; // Don't let a bad date stop other things. |
483 | 0 | } |
484 | 0 | if ( (binGPSStamp.year != 0) || (binGPSStamp.month != 0) || (binGPSStamp.day != 0) ) return; |
485 | | |
486 | 0 | XMP_Node * otherDate = FindChildNode ( exifSchema, "exif:DateTimeOriginal", kXMP_ExistingOnly ); |
487 | 0 | if ( otherDate == 0 ) otherDate = FindChildNode ( exifSchema, "exif:DateTimeDigitized", kXMP_ExistingOnly ); |
488 | 0 | if ( otherDate == 0 ) return; |
489 | | |
490 | 0 | XMP_DateTime binOtherDate; |
491 | 0 | try { |
492 | 0 | XMPUtils::ConvertToDate ( otherDate->value.c_str(), &binOtherDate ); |
493 | 0 | } catch ( ... ) { |
494 | 0 | return; // Don't let a bad date stop other things. |
495 | 0 | } |
496 | | |
497 | 0 | binGPSStamp.year = binOtherDate.year; |
498 | 0 | binGPSStamp.month = binOtherDate.month; |
499 | 0 | binGPSStamp.day = binOtherDate.day; |
500 | |
|
501 | 0 | XMP_StringPtr goodStr; |
502 | 0 | XMP_StringLen goodLen; |
503 | 0 | XMPUtils::ConvertFromDate ( binGPSStamp, &goodStr, &goodLen ); |
504 | | |
505 | 0 | gpsDateTime->value.assign ( goodStr, goodLen ); |
506 | |
|
507 | 0 | } // FixGPSTimeStamp |
508 | | |
509 | | |
510 | | // ------------------------------------------------------------------------------------------------- |
511 | | // MigrateAudioCopyright |
512 | | // --------------------- |
513 | | // |
514 | | // The initial support for WAV files mapped a legacy ID3 audio copyright into a new xmpDM:copyright |
515 | | // property. This is special case code to migrate that into dc:rights['x-default']. The rules: |
516 | | // |
517 | | // 1. If there is no dc:rights array, or an empty array - |
518 | | // Create one with dc:rights['x-default'] set from double linefeed and xmpDM:copyright. |
519 | | // |
520 | | // 2. If there is a dc:rights array but it has no x-default item - |
521 | | // Create an x-default item as a copy of the first item then apply rule #3. |
522 | | // |
523 | | // 3. If there is a dc:rights array with an x-default item, look for a double linefeed in the value. |
524 | | // A. If no double linefeed, compare the x-default value to the xmpDM:copyright value. |
525 | | // A1. If they match then leave the x-default value alone. |
526 | | // A2. Otherwise, append a double linefeed and the xmpDM:copyright value to the x-default value. |
527 | | // B. If there is a double linefeed, compare the trailing text to the xmpDM:copyright value. |
528 | | // B1. If they match then leave the x-default value alone. |
529 | | // B2. Otherwise, replace the trailing x-default text with the xmpDM:copyright value. |
530 | | // |
531 | | // 4. In all cases, delete the xmpDM:copyright property. |
532 | | |
533 | | static void |
534 | | MigrateAudioCopyright ( XMPMeta * xmp, XMP_Node * dmCopyright ) |
535 | 0 | { |
536 | |
|
537 | 0 | try { |
538 | | |
539 | 0 | std::string & dmValue = dmCopyright->value; |
540 | 0 | static const char * kDoubleLF = "\xA\xA"; |
541 | | |
542 | 0 | XMP_Node * dcSchema = FindSchemaNode ( &xmp->tree, kXMP_NS_DC, kXMP_CreateNodes ); |
543 | 0 | XMP_Node * dcRightsArray = FindChildNode ( dcSchema, "dc:rights", kXMP_ExistingOnly ); |
544 | | |
545 | 0 | if ( (dcRightsArray == 0) || dcRightsArray->children.empty() ) { |
546 | | |
547 | | // 1. No dc:rights array, create from double linefeed and xmpDM:copyright. |
548 | 0 | dmValue.insert ( 0, kDoubleLF ); |
549 | 0 | xmp->SetLocalizedText ( kXMP_NS_DC, "rights", "", "x-default", dmValue.c_str(), 0 ); |
550 | | |
551 | 0 | } else { |
552 | |
|
553 | 0 | std::string xdefaultStr ( "x-default" ); |
554 | | |
555 | 0 | XMP_Index xdIndex = LookupLangItem ( dcRightsArray, xdefaultStr ); |
556 | | |
557 | 0 | if ( xdIndex < 0 ) { |
558 | | // 2. No x-default item, create from the first item. |
559 | 0 | XMP_StringPtr firstValue = dcRightsArray->children[0]->value.c_str(); |
560 | 0 | xmp->SetLocalizedText ( kXMP_NS_DC, "rights", "", "x-default", firstValue, 0 ); |
561 | 0 | xdIndex = LookupLangItem ( dcRightsArray, xdefaultStr ); |
562 | 0 | } |
563 | | |
564 | | // 3. Look for a double linefeed in the x-default value. |
565 | 0 | XMP_Assert ( xdIndex == 0 ); |
566 | 0 | std::string & defaultValue = dcRightsArray->children[xdIndex]->value; |
567 | 0 | XMP_Index lfPos = defaultValue.find ( kDoubleLF ); |
568 | | |
569 | 0 | if ( lfPos < 0 ) { |
570 | | |
571 | | // 3A. No double LF, compare whole values. |
572 | 0 | if ( dmValue != defaultValue ) { |
573 | | // 3A2. Append the xmpDM:copyright to the x-default item. |
574 | 0 | defaultValue += kDoubleLF; |
575 | 0 | defaultValue += dmValue; |
576 | 0 | } |
577 | | |
578 | 0 | } else { |
579 | | |
580 | | // 3B. Has double LF, compare the tail. |
581 | 0 | if ( defaultValue.compare ( lfPos+2, std::string::npos, dmValue ) != 0 ) { |
582 | | // 3B2. Replace the x-default tail. |
583 | 0 | defaultValue.replace ( lfPos+2, std::string::npos, dmValue ); |
584 | 0 | } |
585 | | |
586 | 0 | } |
587 | |
|
588 | 0 | } |
589 | | |
590 | | // 4. Get rid of the xmpDM:copyright. |
591 | 0 | xmp->DeleteProperty ( kXMP_NS_DM, "copyright" ); |
592 | | |
593 | 0 | } catch ( ... ) { |
594 | | // Don't let failures (like a bad dc:rights form) stop other cleanup. |
595 | 0 | } |
596 | |
|
597 | 0 | } // MigrateAudioCopyright |
598 | | |
599 | | |
600 | | // ------------------------------------------------------------------------------------------------- |
601 | | // RepairAltText |
602 | | // ------------- |
603 | | // |
604 | | // Make sure that the array is well-formed AltText. Each item must be simple and have an xml:lang |
605 | | // qualifier. If repairs are needed, keep simple non-empty items by adding the xml:lang. |
606 | | |
607 | | static void |
608 | | RepairAltText ( XMP_Node & tree, XMP_StringPtr schemaNS, XMP_StringPtr arrayName ) |
609 | 20.2k | { |
610 | 20.2k | XMP_Node * schemaNode = FindSchemaNode ( &tree, schemaNS, kXMP_ExistingOnly ); |
611 | 20.2k | if ( schemaNode == 0 ) return; |
612 | | |
613 | 727 | XMP_Node * arrayNode = FindChildNode ( schemaNode, arrayName, kXMP_ExistingOnly ); |
614 | 727 | if ( (arrayNode == 0) || XMP_ArrayIsAltText ( arrayNode->options ) ) return; // Already OK. |
615 | | |
616 | 90 | if ( ! XMP_PropIsArray ( arrayNode->options ) ) return; // ! Not even an array, leave it alone. |
617 | | // *** Should probably change simple values to LangAlt with 'x-default' item. |
618 | | |
619 | 85 | arrayNode->options |= (kXMP_PropArrayIsOrdered | kXMP_PropArrayIsAlternate | kXMP_PropArrayIsAltText); |
620 | | |
621 | 464 | for ( int i = arrayNode->children.size()-1; i >= 0; --i ) { // ! Need a signed index type. |
622 | | |
623 | 379 | XMP_Node * currChild = arrayNode->children[i]; |
624 | | |
625 | 379 | if ( ! XMP_PropIsSimple ( currChild->options ) ) { |
626 | | |
627 | | // Delete non-simple children. |
628 | 13 | delete ( currChild ); |
629 | 13 | arrayNode->children.erase ( arrayNode->children.begin() + i ); |
630 | | |
631 | 366 | } else if ( ! XMP_PropHasLang ( currChild->options ) ) { |
632 | | |
633 | 280 | if ( currChild->value.empty() ) { |
634 | | |
635 | | // Delete empty valued children that have no xml:lang. |
636 | 11 | delete ( currChild ); |
637 | 11 | arrayNode->children.erase ( arrayNode->children.begin() + i ); |
638 | | |
639 | 269 | } else { |
640 | | |
641 | | // Add an xml:lang qualifier with the value "x-repair". |
642 | 269 | XMP_Node * repairLang = new XMP_Node ( currChild, "xml:lang", "x-repair", kXMP_PropIsQualifier ); |
643 | 269 | if ( currChild->qualifiers.empty() ) { |
644 | 269 | currChild->qualifiers.push_back ( repairLang ); |
645 | 269 | } else { |
646 | 0 | currChild->qualifiers.insert ( currChild->qualifiers.begin(), repairLang ); |
647 | 0 | } |
648 | 269 | currChild->options |= (kXMP_PropHasQualifiers | kXMP_PropHasLang); |
649 | | |
650 | 269 | } |
651 | | |
652 | 280 | } |
653 | | |
654 | 379 | } |
655 | | |
656 | 85 | } // RepairAltText |
657 | | |
658 | | |
659 | | // ------------------------------------------------------------------------------------------------- |
660 | | // TouchUpDataModel |
661 | | // ---------------- |
662 | | |
663 | | static void |
664 | | TouchUpDataModel ( XMPMeta * xmp ) |
665 | 4.04k | { |
666 | 4.04k | XMP_Node & tree = xmp->tree; |
667 | | |
668 | | // Do special case touch ups for certain schema. |
669 | | |
670 | 4.04k | XMP_Node * currSchema = 0; |
671 | | |
672 | 4.04k | currSchema = FindSchemaNode ( &tree, kXMP_NS_EXIF, kXMP_ExistingOnly ); |
673 | 4.04k | if ( currSchema != 0 ) { |
674 | | |
675 | | // Do a special case fix for exif:GPSTimeStamp. |
676 | 25 | XMP_Node * gpsDateTime = FindChildNode ( currSchema, "exif:GPSTimeStamp", kXMP_ExistingOnly ); |
677 | 25 | if ( gpsDateTime != 0 ) FixGPSTimeStamp ( currSchema, gpsDateTime ); |
678 | | |
679 | | // *** Should probably have RepairAltText change simple values to LangAlt with 'x-default' item. |
680 | | // *** For now just do this for exif:UserComment, the one case we know about, late in cycle fix. |
681 | 25 | XMP_Node * userComment = FindChildNode ( currSchema, "exif:UserComment", kXMP_ExistingOnly ); |
682 | 25 | if ( (userComment != 0) && XMP_PropIsSimple ( userComment->options ) ) { |
683 | 0 | XMP_Node * newChild = new XMP_Node ( userComment, kXMP_ArrayItemName, |
684 | 0 | userComment->value.c_str(), userComment->options ); |
685 | 0 | newChild->qualifiers.swap ( userComment->qualifiers ); |
686 | 0 | if ( ! XMP_PropHasLang ( newChild->options ) ) { |
687 | 0 | XMP_Node * langQual = new XMP_Node ( newChild, "xml:lang", "x-default", kXMP_PropIsQualifier ); |
688 | 0 | newChild->qualifiers.insert ( newChild->qualifiers.begin(), langQual ); |
689 | 0 | newChild->options |= (kXMP_PropHasQualifiers | kXMP_PropHasLang); |
690 | 0 | } |
691 | 0 | userComment->value.erase(); |
692 | 0 | userComment->options = kXMP_PropArrayFormMask; // ! Happens to have all the right bits. |
693 | 0 | userComment->children.push_back ( newChild ); |
694 | 0 | } |
695 | | |
696 | 25 | } |
697 | | |
698 | 4.04k | currSchema = FindSchemaNode ( &tree, kXMP_NS_DM, kXMP_ExistingOnly ); |
699 | 4.04k | if ( currSchema != 0 ) { |
700 | | // Do a special case migration of xmpDM:copyright to dc:rights['x-default']. Do this before |
701 | | // the dc: touch up since it can affect the dc: schema. |
702 | 0 | XMP_Node * dmCopyright = FindChildNode ( currSchema, "xmpDM:copyright", kXMP_ExistingOnly ); |
703 | 0 | if ( dmCopyright != 0 ) MigrateAudioCopyright ( xmp, dmCopyright ); |
704 | 0 | } |
705 | | |
706 | 4.04k | currSchema = FindSchemaNode ( &tree, kXMP_NS_DC, kXMP_ExistingOnly ); |
707 | 4.04k | if ( currSchema != 0 ) { |
708 | | // Do a special case fix for dc:subject, make sure it is an unordered array. |
709 | 168 | XMP_Node * dcSubject = FindChildNode ( currSchema, "dc:subject", kXMP_ExistingOnly ); |
710 | 168 | if ( dcSubject != 0 ) { |
711 | 24 | XMP_OptionBits keepMask = static_cast<XMP_OptionBits>(~(kXMP_PropArrayIsOrdered | kXMP_PropArrayIsAlternate | kXMP_PropArrayIsAltText)); |
712 | 24 | dcSubject->options &= keepMask; // Make sure any ordered array bits are clear. |
713 | 24 | } |
714 | 168 | } |
715 | | |
716 | | // Fix any broken AltText arrays that we know about. |
717 | | |
718 | 4.04k | RepairAltText ( tree, kXMP_NS_DC, "dc:description" ); // ! Note inclusion of prefixes for direct node lookup! |
719 | 4.04k | RepairAltText ( tree, kXMP_NS_DC, "dc:rights" ); |
720 | 4.04k | RepairAltText ( tree, kXMP_NS_DC, "dc:title" ); |
721 | 4.04k | RepairAltText ( tree, kXMP_NS_XMP_Rights, "xmpRights:UsageTerms" ); |
722 | 4.04k | RepairAltText ( tree, kXMP_NS_EXIF, "exif:UserComment" ); |
723 | | |
724 | | // Tweak old XMP: Move an instance ID from rdf:about to the xmpMM:InstanceID property. An old |
725 | | // instance ID usually looks like "uuid:bac965c4-9d87-11d9-9a30-000d936b79c4", plus InDesign |
726 | | // 3.0 wrote them like "bac965c4-9d87-11d9-9a30-000d936b79c4". If the name looks like a UUID |
727 | | // simply move it to xmpMM:InstanceID, don't worry about any existing xmpMM:InstanceID. Both |
728 | | // will only be present when a newer file with the xmpMM:InstanceID property is updated by an |
729 | | // old app that uses rdf:about. |
730 | | |
731 | 4.04k | if ( ! tree.name.empty() ) { |
732 | | |
733 | 338 | bool nameIsUUID = false; |
734 | 338 | XMP_StringPtr nameStr = tree.name.c_str(); |
735 | | |
736 | 338 | if ( XMP_LitNMatch ( nameStr, "uuid:", 5 ) ) { |
737 | | |
738 | 32 | nameIsUUID = true; |
739 | | |
740 | 306 | } else if ( tree.name.size() == 36 ) { |
741 | | |
742 | 180 | nameIsUUID = true; // ! Assume true, we'll set it to false below if not. |
743 | 4.18k | for ( int i = 0; i < 36; ++i ) { |
744 | 4.13k | char ch = nameStr[i]; |
745 | 4.13k | if ( ch == '-' ) { |
746 | 162 | if ( (i == 8) || (i == 13) || (i == 18) || (i == 23) ) continue; |
747 | 42 | nameIsUUID = false; |
748 | 42 | break; |
749 | 3.97k | } else { |
750 | 3.97k | if ( (('0' <= ch) && (ch <= '9')) || (('a' <= ch) && (ch <= 'z')) ) continue; |
751 | 91 | nameIsUUID = false; |
752 | 91 | break; |
753 | 3.97k | } |
754 | 4.13k | } |
755 | | |
756 | 180 | } |
757 | | |
758 | 338 | if ( nameIsUUID ) { |
759 | | |
760 | 79 | XMP_ExpandedXPath expPath; |
761 | 79 | ExpandXPath ( kXMP_NS_XMP_MM, "InstanceID", &expPath ); |
762 | 79 | XMP_Node * idNode = FindNode ( &tree, expPath, kXMP_CreateNodes, 0 ); |
763 | 79 | if ( idNode == 0 ) XMP_Throw ( "Failure creating xmpMM:InstanceID", kXMPErr_InternalFailure ); |
764 | | |
765 | 79 | idNode->options = 0; // Clobber any existing xmpMM:InstanceID. |
766 | 79 | idNode->value = tree.name; |
767 | 79 | idNode->RemoveChildren(); |
768 | 79 | idNode->RemoveQualifiers(); |
769 | | |
770 | 79 | tree.name.erase(); |
771 | | |
772 | 79 | } |
773 | | |
774 | 338 | } |
775 | | |
776 | 4.04k | } // TouchUpDataModel |
777 | | |
778 | | |
779 | | // ------------------------------------------------------------------------------------------------- |
780 | | // DetermineInputEncoding |
781 | | // ---------------------- |
782 | | // |
783 | | // Try to determine the character encoding, making a guess if the input is too short. We make some |
784 | | // simplifying assumtions: the first character must be U+FEFF or ASCII, U+0000 is not allowed. The |
785 | | // XML 1.1 spec is even more strict, UTF-16 XML documents must begin with U+FEFF, and the first |
786 | | // "real" character must be '<'. Ignoring the XML declaration, the first XML character could be '<', |
787 | | // space, tab, CR, or LF. |
788 | | // |
789 | | // The possible input sequences are: |
790 | | // |
791 | | // Cases with U+FEFF |
792 | | // EF BB BF -- - UTF-8 |
793 | | // FE FF -- -- - Big endian UTF-16 |
794 | | // 00 00 FE FF - Big endian UTF 32 |
795 | | // FF FE 00 00 - Little endian UTF-32 |
796 | | // FF FE -- -- - Little endian UTF-16 |
797 | | // |
798 | | // Cases with ASCII |
799 | | // nn mm -- -- - UTF-8 - |
800 | | // 00 00 00 nn - Big endian UTF-32 |
801 | | // 00 nn -- -- - Big endian UTF-16 |
802 | | // nn 00 00 00 - Little endian UTF-32 |
803 | | // nn 00 -- -- - Little endian UTF-16 |
804 | | // |
805 | | // ! We don't check for full patterns, or for errors. We just check enough to determine what the |
806 | | // ! only possible (or reasonable) case would be. |
807 | | |
808 | | static XMP_OptionBits |
809 | | DetermineInputEncoding ( const XMP_Uns8 * buffer, size_t length ) |
810 | 5.90k | { |
811 | 5.90k | if ( length < 2 ) return kXMP_EncodeUTF8; |
812 | | |
813 | 5.90k | XMP_Uns8 * uniChar = (XMP_Uns8*)buffer; // ! Make sure comparisons are unsigned. |
814 | | |
815 | 5.90k | if ( uniChar[0] == 0 ) { |
816 | | |
817 | | // These cases are: |
818 | | // 00 nn -- -- - Big endian UTF-16 |
819 | | // 00 00 00 nn - Big endian UTF-32 |
820 | | // 00 00 FE FF - Big endian UTF 32 |
821 | |
|
822 | 0 | if ( (length < 4) || (uniChar[1] != 0) ) return kXMP_EncodeUTF16Big; |
823 | 0 | return kXMP_EncodeUTF32Big; |
824 | | |
825 | 5.90k | } else if ( uniChar[0] < 0x80 ) { |
826 | | |
827 | | // These cases are: |
828 | | // nn mm -- -- - UTF-8, includes EF BB BF case |
829 | | // nn 00 00 00 - Little endian UTF-32 |
830 | | // nn 00 -- -- - Little endian UTF-16 |
831 | | |
832 | 5.90k | if ( uniChar[1] != 0 ) return kXMP_EncodeUTF8; |
833 | 0 | if ( (length < 4) || (uniChar[2] != 0) ) return kXMP_EncodeUTF16Little; |
834 | 0 | return kXMP_EncodeUTF32Little; |
835 | |
|
836 | 0 | } else { |
837 | | |
838 | | // These cases are: |
839 | | // EF BB BF -- - UTF-8 |
840 | | // FE FF -- -- - Big endian UTF-16 |
841 | | // FF FE 00 00 - Little endian UTF-32 |
842 | | // FF FE -- -- - Little endian UTF-16 |
843 | |
|
844 | 0 | if ( uniChar[0] == 0xEF ) return kXMP_EncodeUTF8; |
845 | 0 | if ( uniChar[0] == 0xFE ) return kXMP_EncodeUTF16Big; |
846 | 0 | if ( (length < 4) || (uniChar[2] != 0) ) return kXMP_EncodeUTF16Little; |
847 | 0 | return kXMP_EncodeUTF32Little; |
848 | |
|
849 | 0 | } |
850 | | |
851 | 5.90k | } // DetermineInputEncoding |
852 | | |
853 | | |
854 | | // ------------------------------------------------------------------------------------------------- |
855 | | // CountUTF8 |
856 | | // --------- |
857 | | // |
858 | | // Look for a valid multi-byte UTF-8 sequence and return its length. Returns 0 for an invalid UTF-8 |
859 | | // sequence. Returns a negative value for a partial valid sequence at the end of the buffer. |
860 | | // |
861 | | // The checking is not strict. We simply count the number of high order 1 bits in the first byte, |
862 | | // then look for n-1 following bytes whose high order 2 bits are 1 and 0. We do not check for a |
863 | | // minimal length representation of the codepoint, or that the codepoint is defined by Unicode. |
864 | | |
865 | | static int |
866 | | CountUTF8 ( const XMP_Uns8 * charStart, const XMP_Uns8 * bufEnd ) |
867 | 63.8k | { |
868 | 63.8k | XMP_Assert ( charStart < bufEnd ); // Catch this in debug builds. |
869 | 63.8k | if ( charStart >= bufEnd ) return 0; // Don't run-on in release builds. |
870 | 63.8k | if ( (*charStart & 0xC0) != 0xC0 ) return 0; // Must have at least 2 high bits set. |
871 | | |
872 | 63.8k | int byteCount = 2; |
873 | 63.8k | XMP_Uns8 firstByte = *charStart; |
874 | 72.7k | for ( firstByte = firstByte << 2; (firstByte & 0x80) != 0; firstByte = firstByte << 1 ) ++byteCount; |
875 | | |
876 | 63.8k | if ( (charStart + byteCount) > bufEnd ) return -byteCount; |
877 | | |
878 | 136k | for ( int i = 1; i < byteCount; ++i ) { |
879 | 72.7k | if ( (charStart[i] & 0xC0) != 0x80 ) return 0; |
880 | 72.7k | } |
881 | | |
882 | 63.8k | return byteCount; |
883 | | |
884 | 63.8k | } // CountUTF8 |
885 | | |
886 | | |
887 | | // ------------------------------------------------------------------------------------------------- |
888 | | // CountControlEscape |
889 | | // ------------------ |
890 | | // |
891 | | // Look for a numeric escape sequence for a "prohibited" ASCII control character. These are 0x7F, |
892 | | // and the range 0x00..0x1F except for tab/LF/CR. Return 0 if this is definitely not a numeric |
893 | | // escape, the length of the escape if found, or a negative value for a partial escape. |
894 | | |
895 | | static int |
896 | | CountControlEscape ( const XMP_Uns8 * escStart, const XMP_Uns8 * bufEnd ) |
897 | 171k | { |
898 | 171k | XMP_Assert ( escStart < bufEnd ); // Catch this in debug builds. |
899 | 171k | if ( escStart >= bufEnd ) return 0; // Don't run-on in release builds. |
900 | 171k | XMP_Assert ( *escStart == '&' ); |
901 | | |
902 | 171k | size_t tailLen = bufEnd - escStart; |
903 | 171k | if ( tailLen < 5 ) return -1; // Don't need a more thorough check, we'll catch it on the next pass. |
904 | | |
905 | 171k | if ( strncmp ( (char*)escStart, "&#x", 3 ) != 0 ) return 0; |
906 | | |
907 | 107k | XMP_Uns8 escValue = 0; |
908 | 107k | const XMP_Uns8 * escPos = escStart + 3; |
909 | | |
910 | 107k | if ( ('0' <= *escPos) && (*escPos <= '9') ) { |
911 | 58.8k | escValue = *escPos - '0'; |
912 | 58.8k | ++escPos; |
913 | 58.8k | } else if ( ('A' <= *escPos) && (*escPos <= 'F') ) { |
914 | 6.79k | escValue = *escPos - 'A' + 10; |
915 | 6.79k | ++escPos; |
916 | 41.9k | } else if ( ('a' <= *escPos) && (*escPos <= 'f') ) { |
917 | 15.4k | escValue = *escPos - 'a' + 10; |
918 | 15.4k | ++escPos; |
919 | 15.4k | } |
920 | | |
921 | 107k | if ( ('0' <= *escPos) && (*escPos <= '9') ) { |
922 | 31.2k | escValue = (escValue << 4) + (*escPos - '0'); |
923 | 31.2k | ++escPos; |
924 | 76.2k | } else if ( ('A' <= *escPos) && (*escPos <= 'F') ) { |
925 | 2.97k | escValue = (escValue << 4) + (*escPos - 'A' + 10); |
926 | 2.97k | ++escPos; |
927 | 73.3k | } else if ( ('a' <= *escPos) && (*escPos <= 'f') ) { |
928 | 5.80k | escValue = (escValue << 4) + (*escPos - 'a' + 10); |
929 | 5.80k | ++escPos; |
930 | 5.80k | } |
931 | | |
932 | 107k | if ( escPos == bufEnd ) return -1; // Partial escape. |
933 | 107k | if ( *escPos != ';' ) return 0; |
934 | | |
935 | 15.3k | size_t escLen = escPos - escStart + 1; |
936 | 15.3k | if ( escLen < 5 ) return 0; // ! Catch "&#x;". |
937 | | |
938 | 11.1k | if ( (escValue == kTab) || (escValue == kLF) || (escValue == kCR) ) return 0; // An allowed escape. |
939 | | |
940 | 8.93k | return escLen; // Found a full "prohibited" numeric escape. |
941 | | |
942 | 11.1k | } // CountControlEscape |
943 | | |
944 | | |
945 | | // ------------------------------------------------------------------------------------------------- |
946 | | // ProcessUTF8Portion |
947 | | // ------------------ |
948 | | // |
949 | | // Early versions of the XMP spec mentioned allowing ISO Latin-1 input. There are also problems with |
950 | | // some clients placing ASCII control characters within XMP values. This is an XML problem, the XML |
951 | | // spec only allows tab (0x09), LF (0x0A), and CR (0x0D) from the 0x00..0x1F range. As a concession |
952 | | // to this we scan 8-bit input for byte sequences that are not valid UTF-8 or in the 0x00..0x1F |
953 | | // range and replace each byte as follows: |
954 | | // 0x00..0x1F - Replace with a space, except for tab, CR, and LF. |
955 | | // 0x7F - Replace with a space. This is ASCII Delete, not allowed by ISO Latin-1. |
956 | | // 0x80..0x9F - Replace with the UTF-8 for a corresponding Unicode character. |
957 | | // 0xA0..0XFF - Replace with the UTF-8 for a corresponding Unicode character. |
958 | | // |
959 | | // The 0x80..0x9F range is not defined by Latin-1. But the Windows 1252 code page defines these and |
960 | | // is otherwise the same as Latin-1. |
961 | | // |
962 | | // For at least historical compatibility reasons we also find and replace singly escaped ASCII |
963 | | // control characters. The Expat parser we're using does not allow numeric escapes like "". |
964 | | // The XML spec is clear that raw controls are not allowed (in the RestrictedChar set), but it isn't |
965 | | // as clear about numeric escapes for them. At any rate, Expat complains, so we treat the numeric |
966 | | // escapes like raw characters and replace them with a space. |
967 | | // |
968 | | // We check for 1 or 2 hex digits ("	" or "	") and upper or lower case ("
" or "
"). |
969 | | // The full escape sequence is 5 or 6 bytes. |
970 | | |
971 | | static size_t |
972 | | ProcessUTF8Portion ( XMLParserAdapter * xmlParser, |
973 | | const XMP_Uns8 * buffer, |
974 | | size_t length, |
975 | | bool last ) |
976 | 5.91k | { |
977 | 5.91k | const XMP_Uns8 * bufEnd = buffer + length; |
978 | | |
979 | 5.91k | const XMP_Uns8 * spanEnd; |
980 | | |
981 | | // `buffer` is copied into this std::string. If `buffer` only |
982 | | // contains valid UTF-8 and no escape characters, then the copy |
983 | | // will be identical to the original, but invalid characters are |
984 | | // replaced - usually with a space character. This std::string was |
985 | | // added as a performance fix for: |
986 | | // https://github.com/Exiv2/exiv2/security/advisories/GHSA-w8mv-g8qq-36mj |
987 | | // Previously, the code was repeatedly calling |
988 | | // `xmlParser->ParseBuffer()`, which turned out to have quadratic |
989 | | // complexity, because expat kept reparsing the entire string from |
990 | | // the beginning. |
991 | 5.91k | std::string copy; |
992 | | |
993 | 71.1M | for ( spanEnd = buffer; spanEnd < bufEnd; ++spanEnd ) { |
994 | | |
995 | 71.1M | if ( (0x20 <= *spanEnd) && (*spanEnd <= 0x7E) && (*spanEnd != '&') ) { |
996 | 69.5M | copy.push_back(*spanEnd); |
997 | 69.5M | continue; // A regular ASCII character. |
998 | 69.5M | } |
999 | | |
1000 | 1.62M | if ( *spanEnd >= 0x80 ) { |
1001 | | |
1002 | | // See if this is a multi-byte UTF-8 sequence, or a Latin-1 character to replace. |
1003 | | |
1004 | 63.8k | int uniLen = CountUTF8 ( spanEnd, bufEnd ); |
1005 | | |
1006 | 63.8k | if ( uniLen > 0 ) { |
1007 | | |
1008 | | // A valid UTF-8 character, keep it as-is. |
1009 | 63.8k | copy.append((const char*)spanEnd, uniLen); |
1010 | 63.8k | spanEnd += uniLen - 1; // ! The loop increment will put back the +1. |
1011 | | |
1012 | 63.8k | } else if ( (uniLen < 0) && (! last) ) { |
1013 | | |
1014 | | // Have a partial UTF-8 character at the end of the buffer and more input coming. |
1015 | 0 | xmlParser->ParseBuffer ( copy.c_str(), copy.size(), false ); |
1016 | 0 | return (spanEnd - buffer); |
1017 | |
|
1018 | 0 | } else { |
1019 | | |
1020 | | // Not a valid UTF-8 sequence. Replace the first byte with the Latin-1 equivalent. |
1021 | 0 | const char * replacement = kReplaceLatin1 [ *spanEnd - 0x80 ]; |
1022 | 0 | copy.append ( replacement ); |
1023 | |
|
1024 | 0 | } |
1025 | | |
1026 | 1.55M | } else if ( (*spanEnd < 0x20) || (*spanEnd == 0x7F) ) { |
1027 | | |
1028 | | // Replace ASCII controls other than tab, LF, and CR with a space. |
1029 | | |
1030 | 1.38M | if ( (*spanEnd == kTab) || (*spanEnd == kLF) || (*spanEnd == kCR) ) { |
1031 | 1.36M | copy.push_back(*spanEnd); |
1032 | 1.36M | continue; |
1033 | 1.36M | } |
1034 | | |
1035 | 19.3k | copy.push_back(' '); |
1036 | | |
1037 | 171k | } else { |
1038 | | |
1039 | | // See if this is a numeric escape sequence for a prohibited ASCII control. |
1040 | | |
1041 | 171k | XMP_Assert ( *spanEnd == '&' ); |
1042 | 171k | int escLen = CountControlEscape ( spanEnd, bufEnd ); |
1043 | | |
1044 | 171k | if ( escLen < 0 ) { |
1045 | | |
1046 | | // Have a partial numeric escape in this buffer, wait for more input. |
1047 | 17 | if ( last ) { |
1048 | 17 | copy.push_back('&'); |
1049 | 17 | continue; // No more buffers, not an escape, absorb as normal input. |
1050 | 17 | } |
1051 | 0 | xmlParser->ParseBuffer ( copy.c_str(), copy.size(), false ); |
1052 | 0 | return (spanEnd - buffer); |
1053 | | |
1054 | 171k | } else if ( escLen > 0 ) { |
1055 | | |
1056 | | // Have a complete numeric escape to replace. |
1057 | 8.93k | copy.push_back(' '); |
1058 | 8.93k | spanEnd = spanEnd + escLen - 1; // ! The loop continuation will increment spanEnd! |
1059 | | |
1060 | 162k | } else { |
1061 | 162k | copy.push_back('&'); |
1062 | 162k | } |
1063 | | |
1064 | 171k | } |
1065 | | |
1066 | 1.62M | } |
1067 | | |
1068 | 5.91k | XMP_Assert ( spanEnd == bufEnd ); |
1069 | 5.91k | copy.push_back(' '); |
1070 | 5.91k | xmlParser->ParseBuffer ( copy.c_str(), copy.size(), true ); |
1071 | 5.91k | return length; |
1072 | | |
1073 | 5.91k | } // ProcessUTF8Portion |
1074 | | |
1075 | | |
1076 | | // ------------------------------------------------------------------------------------------------- |
1077 | | // ParseFromBuffer |
1078 | | // --------------- |
1079 | | // |
1080 | | // Although most clients will probably parse everything in one call, we have a buffered API model |
1081 | | // and need to support even the extreme case of 1 byte at a time parsing. This is considerably |
1082 | | // complicated by some special cases for 8-bit input. Because of this, the first thing we do is |
1083 | | // determine whether the input is 8-bit, UTF-16, or UTF-32. |
1084 | | // |
1085 | | // Both the 8-bit special cases and the encoding determination are easier to do with 8 bytes or more |
1086 | | // of input. The XMLParserAdapter class has a pending-input buffer for this. At the start of parsing |
1087 | | // we (moght) try to fill this buffer before determining the input character encoding. After that, |
1088 | | // we (might) use this buffer with the current input to simplify the logic in Process8BitInput. The |
1089 | | // "(might)" part means that we don't actually use the pending-input buffer unless we have to. In |
1090 | | // particular, the common case of single-buffer parsing won't use it. |
1091 | | |
1092 | | void |
1093 | | XMPMeta::ParseFromBuffer ( XMP_StringPtr buffer, |
1094 | | XMP_StringLen xmpSize, |
1095 | | XMP_OptionBits options ) |
1096 | 5.90k | { |
1097 | 5.90k | if ( (buffer == 0) && (xmpSize != 0) ) XMP_Throw ( "Null parse buffer", kXMPErr_BadParam ); |
1098 | 5.90k | if ( xmpSize == kXMP_UseNullTermination ) xmpSize = strlen ( buffer ); |
1099 | | |
1100 | 5.90k | const bool lastClientCall = ((options & kXMP_ParseMoreBuffers) == 0); // *** Could use FlagIsSet & FlagIsClear macros. |
1101 | | |
1102 | 5.90k | this->tree.ClearNode(); // Make sure the target XMP object is totally empty. |
1103 | | |
1104 | 5.90k | if ( this->xmlParser == 0 ) { |
1105 | 5.90k | if ( (xmpSize == 0) && lastClientCall ) return; // Tolerate empty parse. Expat complains if there are no XML elements. |
1106 | 5.90k | this->xmlParser = XMP_NewExpatAdapter(); |
1107 | 5.90k | } |
1108 | | |
1109 | 5.90k | XMLParserAdapter& parser = *this->xmlParser; |
1110 | | |
1111 | | #if 0 // XMP_DebugBuild |
1112 | | if ( parser.parseLog != 0 ) { |
1113 | | char message [200]; // AUDIT: Using sizeof(message) below for snprintf length is safe. |
1114 | | snprintf ( message, sizeof(message), "<!-- ParseFromBuffer, length = %d, options = %X%s -->", // AUDIT: See above. |
1115 | | xmpSize, options, (lastClientCall ? " (last)" : "") ); |
1116 | | fwrite ( message, 1, strlen(message), parser.parseLog ); |
1117 | | fflush ( parser.parseLog ); |
1118 | | } |
1119 | | #endif |
1120 | | |
1121 | 5.90k | try { // Cleanup the tree and xmlParser if anything fails. |
1122 | | |
1123 | | // Determine the character encoding before doing any real parsing. This is needed to do the |
1124 | | // 8-bit special processing. |
1125 | | |
1126 | 5.90k | if ( parser.charEncoding == XMP_OptionBits(-1) ) { |
1127 | | |
1128 | 5.90k | if ( (parser.pendingCount == 0) && (xmpSize >= kXMLPendingInputMax) ) { |
1129 | | |
1130 | | // This ought to be the common case, the first buffer is big enough. |
1131 | 5.88k | parser.charEncoding = DetermineInputEncoding ( (XMP_Uns8*)buffer, xmpSize ); |
1132 | | |
1133 | 5.88k | } else { |
1134 | | |
1135 | | // Try to fill the pendingInput buffer before calling DetermineInputEncoding. |
1136 | | |
1137 | 12 | size_t pendingOverlap = kXMLPendingInputMax - parser.pendingCount; |
1138 | 12 | if ( pendingOverlap > xmpSize ) pendingOverlap = xmpSize; |
1139 | | |
1140 | 12 | memcpy ( &parser.pendingInput[parser.pendingCount], buffer, pendingOverlap ); // AUDIT: Count is safe. |
1141 | 12 | buffer += pendingOverlap; |
1142 | 12 | xmpSize -= pendingOverlap; |
1143 | 12 | parser.pendingCount += pendingOverlap; |
1144 | | |
1145 | 12 | if ( (! lastClientCall) && (parser.pendingCount < kXMLPendingInputMax) ) return; |
1146 | 12 | parser.charEncoding = DetermineInputEncoding ( parser.pendingInput, parser.pendingCount ); |
1147 | | |
1148 | | #if Trace_ParsingHackery |
1149 | | fprintf ( stderr, "XMP Character encoding is %d\n", parser.charEncoding ); |
1150 | | #endif |
1151 | | |
1152 | 12 | } |
1153 | | |
1154 | 5.90k | } |
1155 | | |
1156 | | // We have the character encoding. Process UTF-16 and UTF-32 as is. UTF-8 needs special |
1157 | | // handling to take care of things like ISO Latin-1 or unescaped ASCII controls. |
1158 | | |
1159 | 5.90k | XMP_Assert ( parser.charEncoding != XMP_OptionBits(-1) ); |
1160 | | |
1161 | 5.90k | if ( parser.charEncoding != kXMP_EncodeUTF8 ) { |
1162 | | |
1163 | 0 | if ( parser.pendingCount > 0 ) { |
1164 | | // Might have pendingInput from the above portion to determine the character encoding. |
1165 | 0 | parser.ParseBuffer ( parser.pendingInput, parser.pendingCount, false ); |
1166 | 0 | } |
1167 | 0 | parser.ParseBuffer ( buffer, xmpSize, lastClientCall ); |
1168 | | |
1169 | 5.90k | } else { |
1170 | | |
1171 | | #if Trace_ParsingHackery |
1172 | | fprintf ( stderr, "Parsing %d bytes @ %.8X, %s, %d pending, context: %.8s\n", |
1173 | | xmpSize, buffer, (lastClientCall ? "last" : "not last"), parser.pendingCount, buffer ); |
1174 | | #endif |
1175 | | |
1176 | | // The UTF-8 processing is a bit complex due to the need to tolerate ISO Latin-1 input. |
1177 | | // This is done by scanning the input for byte sequences that are not valid UTF-8, |
1178 | | // assuming they are Latin-1 characters in the range 0x80..0xFF. This requires saving a |
1179 | | // pending input buffer to handle partial UTF-8 sequences at the end of a buffer. |
1180 | | |
1181 | 5.91k | while ( parser.pendingCount > 0 ) { |
1182 | | |
1183 | | // We've got some leftover input, process it first then continue with the current |
1184 | | // buffer. Try to fill the pendingInput buffer before parsing further. We use a loop |
1185 | | // for weird edge cases like a 2 byte input buffer, using 1 byte for pendingInput, |
1186 | | // then having a partial UTF-8 end and need to absorb more. |
1187 | | |
1188 | 12 | size_t pendingOverlap = kXMLPendingInputMax - parser.pendingCount; |
1189 | 12 | if ( pendingOverlap > xmpSize ) pendingOverlap = xmpSize; |
1190 | | |
1191 | 12 | memcpy ( &parser.pendingInput[parser.pendingCount], buffer, pendingOverlap ); // AUDIT: Count is safe. |
1192 | 12 | parser.pendingCount += pendingOverlap; |
1193 | 12 | buffer += pendingOverlap; |
1194 | 12 | xmpSize -= pendingOverlap; |
1195 | | |
1196 | 12 | if ( (! lastClientCall) && (parser.pendingCount < kXMLPendingInputMax) ) return; |
1197 | 12 | size_t bytesDone = ProcessUTF8Portion ( &parser, parser.pendingInput, parser.pendingCount, lastClientCall ); |
1198 | 12 | size_t bytesLeft = parser.pendingCount - bytesDone; |
1199 | | |
1200 | | #if Trace_ParsingHackery |
1201 | | fprintf ( stderr, " ProcessUTF8Portion handled %d pending bytes\n", bytesDone ); |
1202 | | #endif |
1203 | | |
1204 | 12 | if ( bytesDone == parser.pendingCount ) { |
1205 | | |
1206 | | // Done with all of the pending input, move on to the current buffer. |
1207 | 12 | parser.pendingCount = 0; |
1208 | | |
1209 | 12 | } else if ( bytesLeft <= pendingOverlap ) { |
1210 | | |
1211 | | // The leftover pending input all came from the current buffer. Exit this loop. |
1212 | 0 | buffer -= bytesLeft; |
1213 | 0 | xmpSize += bytesLeft; |
1214 | 0 | parser.pendingCount = 0; |
1215 | |
|
1216 | 0 | } else if ( xmpSize > 0 ) { |
1217 | | |
1218 | | // Pull more of the current buffer into the pending input and try again. |
1219 | | // Backup by this pass's overlap so the loop entry code runs OK. |
1220 | 0 | parser.pendingCount -= pendingOverlap; |
1221 | 0 | buffer -= pendingOverlap; |
1222 | 0 | xmpSize += pendingOverlap; |
1223 | |
|
1224 | 0 | } else { |
1225 | | |
1226 | | // There is no more of the current buffer. Wait for more. Partial sequences at |
1227 | | // the end of the last buffer should be treated as Latin-1 by ProcessUTF8Portion. |
1228 | 0 | XMP_Assert ( ! lastClientCall ); |
1229 | 0 | parser.pendingCount = bytesLeft; |
1230 | 0 | memcpy ( &parser.pendingInput[0], &parser.pendingInput[bytesDone], bytesLeft ); // AUDIT: Count is safe. |
1231 | 0 | return; |
1232 | |
|
1233 | 0 | } |
1234 | | |
1235 | 12 | } |
1236 | | |
1237 | | // Done with the pending input, process the current buffer. |
1238 | | |
1239 | 5.90k | size_t bytesDone = ProcessUTF8Portion ( &parser, (XMP_Uns8*)buffer, xmpSize, lastClientCall ); |
1240 | | |
1241 | | #if Trace_ParsingHackery |
1242 | | fprintf ( stderr, " ProcessUTF8Portion handled %d additional bytes\n", bytesDone ); |
1243 | | #endif |
1244 | | |
1245 | 5.90k | if ( bytesDone < xmpSize ) { |
1246 | |
|
1247 | 0 | XMP_Assert ( ! lastClientCall ); |
1248 | 0 | size_t bytesLeft = xmpSize - bytesDone; |
1249 | 0 | if ( bytesLeft > kXMLPendingInputMax ) XMP_Throw ( "Parser bytesLeft too large", kXMPErr_InternalFailure ); |
1250 | |
|
1251 | 0 | memcpy ( parser.pendingInput, &buffer[bytesDone], bytesLeft ); // AUDIT: Count is safe. |
1252 | 0 | parser.pendingCount = bytesLeft; |
1253 | 0 | return; // Wait for the next buffer. |
1254 | |
|
1255 | 0 | } |
1256 | | |
1257 | 5.90k | } |
1258 | | |
1259 | 5.90k | if ( lastClientCall ) { |
1260 | | |
1261 | | #if XMP_DebugBuild && DumpXMLParseTree |
1262 | | if ( parser.parseLog == 0 ) parser.parseLog = stdout; |
1263 | | DumpXMLTree ( parser.parseLog, parser.tree, 0 ); |
1264 | | #endif |
1265 | | |
1266 | 5.88k | const XML_Node * xmlRoot = FindRootNode ( this, *this->xmlParser, options ); |
1267 | | |
1268 | 5.88k | if ( xmlRoot != 0 ) { |
1269 | | |
1270 | 5.44k | ProcessRDF ( &this->tree, *xmlRoot, options ); |
1271 | 5.44k | NormalizeDCArrays ( &this->tree ); |
1272 | 5.44k | if ( this->tree.options & kXMP_PropHasAliases ) MoveExplicitAliases ( &this->tree, options ); |
1273 | 5.44k | TouchUpDataModel ( this ); |
1274 | | |
1275 | | // Delete empty schema nodes. Do this last, other cleanup can make empty schema. |
1276 | 5.44k | size_t schemaNum = 0; |
1277 | 16.3k | while ( schemaNum < this->tree.children.size() ) { |
1278 | 10.8k | XMP_Node * currSchema = this->tree.children[schemaNum]; |
1279 | 10.8k | if ( currSchema->children.size() > 0 ) { |
1280 | 10.8k | ++schemaNum; |
1281 | 10.8k | } else { |
1282 | 0 | delete this->tree.children[schemaNum]; // ! Delete the schema node itself. |
1283 | 0 | this->tree.children.erase ( this->tree.children.begin() + schemaNum ); |
1284 | 0 | } |
1285 | 10.8k | } |
1286 | | |
1287 | 5.44k | } |
1288 | | |
1289 | 5.88k | delete this->xmlParser; |
1290 | 5.88k | this->xmlParser = 0; |
1291 | | |
1292 | 5.88k | } |
1293 | | |
1294 | 5.90k | } catch ( ... ) { |
1295 | | |
1296 | 1.41k | delete this->xmlParser; |
1297 | 1.41k | this->xmlParser = 0; |
1298 | 1.41k | prevTkVer = 0; |
1299 | 1.41k | this->tree.ClearNode(); |
1300 | 1.41k | throw; |
1301 | | |
1302 | 1.41k | } |
1303 | | |
1304 | 5.90k | } // ParseFromBuffer |
1305 | | |
1306 | | // ================================================================================================= |