Coverage Report

Created: 2025-12-31 07:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/exiv2/xmpsdk/src/ExpatAdapter.cpp
Line
Count
Source
1
// =================================================================================================
2
// Copyright 2005-2008 Adobe Systems Incorporated
3
// All Rights Reserved.
4
//
5
// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
6
// of the Adobe license agreement accompanying it.
7
// =================================================================================================
8
9
#include "XMP_Environment.h"  // ! Must be the first #include!
10
#include "XMPCore_Impl.hpp"
11
12
#include "ExpatAdapter.hpp"
13
#include "XMPMeta.hpp"
14
15
#include "expat.h"
16
17
#include <string.h>
18
19
using namespace std;
20
21
#if XMP_WinBuild
22
#   ifdef _MSC_VER
23
        #pragma warning ( disable : 4996 )  // '...' was declared deprecated
24
#   endif
25
#endif
26
27
// *** Set memory handlers.
28
29
#ifndef DumpXMLParseEvents
30
  #define DumpXMLParseEvents  0
31
#endif
32
33
628k
#define FullNameSeparator '@'
34
35
// =================================================================================================
36
37
static void StartNamespaceDeclHandler    ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri );
38
static void EndNamespaceDeclHandler      ( void * userData, XMP_StringPtr prefix );
39
40
static void StartElementHandler          ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs );
41
static void EndElementHandler            ( void * userData, XMP_StringPtr name );
42
43
static void CharacterDataHandler         ( void * userData, XMP_StringPtr cData, int len );
44
static void StartCdataSectionHandler     ( void * userData );
45
static void EndCdataSectionHandler       ( void * userData );
46
47
static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data );
48
static void CommentHandler               ( void * userData, XMP_StringPtr comment );
49
50
#if BanAllEntityUsage
51
52
  // For now we do this by banning DOCTYPE entirely. This is easy and consistent with what is
53
  // available in recent Java XML parsers. Another, somewhat less drastic, approach would be to 
54
  // ban all entity declarations. We can't allow declarations and ban references, Expat does not
55
  // call the SkippedEntityHandler for references in attribute values.
56
  
57
  // ! Standard entities (&amp;, &lt;, &gt;, &quot;, &apos;, and numeric character references) are
58
  // ! not banned. Expat handles them transparently no matter what.
59
60
  static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
61
                      XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset );
62
63
#endif
64
65
// =================================================================================================
66
67
extern "C" ExpatAdapter * XMP_NewExpatAdapter()
68
4.40k
{
69
4.40k
  return new ExpatAdapter;
70
4.40k
}  // XMP_NewExpatAdapter
71
72
// =================================================================================================
73
74
4.40k
ExpatAdapter::ExpatAdapter() : parser(0)
75
4.40k
{
76
77
  #if XMP_DebugBuild
78
    this->elemNesting = 0;
79
    #if DumpXMLParseEvents
80
      if ( this->parseLog == 0 ) this->parseLog = stdout;
81
    #endif
82
  #endif
83
84
4.40k
  this->parser = XML_ParserCreateNS ( 0, FullNameSeparator );
85
4.40k
  if ( this->parser == 0 ) XMP_Throw ( "Failure creating Expat parser", kXMPErr_ExternalFailure );
86
  
87
4.40k
  XML_SetUserData ( this->parser, this );
88
  
89
4.40k
  XML_SetNamespaceDeclHandler ( this->parser, StartNamespaceDeclHandler, EndNamespaceDeclHandler );
90
4.40k
  XML_SetElementHandler ( this->parser, StartElementHandler, EndElementHandler );
91
92
4.40k
  XML_SetCharacterDataHandler ( this->parser, CharacterDataHandler );
93
4.40k
  XML_SetCdataSectionHandler ( this->parser, StartCdataSectionHandler, EndCdataSectionHandler );
94
95
4.40k
  XML_SetProcessingInstructionHandler ( this->parser, ProcessingInstructionHandler );
96
4.40k
  XML_SetCommentHandler ( this->parser, CommentHandler );
97
98
4.40k
  #if BanAllEntityUsage
99
4.40k
    XML_SetStartDoctypeDeclHandler ( this->parser, StartDoctypeDeclHandler );
100
4.40k
    isAborted = false;
101
4.40k
  #endif
102
103
4.40k
  this->parseStack.push_back ( &this->tree ); // Push the XML root node.
104
105
4.40k
}  // ExpatAdapter::ExpatAdapter
106
107
// =================================================================================================
108
109
ExpatAdapter::~ExpatAdapter()
110
4.40k
{
111
112
4.40k
  if ( this->parser != 0 ) XML_ParserFree ( this->parser );
113
4.40k
  this->parser = 0;
114
115
4.40k
}  // ExpatAdapter::~ExpatAdapter
116
117
// =================================================================================================
118
119
#if XMP_DebugBuild
120
  static XMP_VarString sExpatMessage;
121
#endif
122
123
static const char * kOneSpace = " ";
124
125
void ExpatAdapter::ParseBuffer ( const void * buffer, size_t length, bool last /* = true */ )
126
4.40k
{
127
4.40k
  enum XML_Status status;
128
  
129
4.40k
  if ( length == 0 ) { // Expat does not like empty buffers.
130
0
    if ( ! last ) return;
131
0
    buffer = kOneSpace;
132
0
    length = 1;
133
0
  }
134
  
135
4.40k
  status = XML_Parse ( this->parser, (const char *)buffer, length, last );
136
  
137
4.40k
  #if BanAllEntityUsage
138
4.40k
    if ( this->isAborted ) XMP_Throw ( "DOCTYPE is not allowed", kXMPErr_BadXML );
139
4.40k
  #endif
140
141
4.40k
  if ( status != XML_STATUS_OK ) {
142
  
143
3
    XMP_StringPtr errMsg = "XML parsing failure";
144
145
    #if 0 // XMP_DebugBuild // Disable for now to make test output uniform. Restore later with thread safety.
146
    
147
      // *** This is a good candidate for a callback error notification mechanism.
148
      // *** This code is not thread safe, the sExpatMessage isn't locked. But that's OK for debug usage.
149
150
      enum XML_Error expatErr = XML_GetErrorCode ( this->parser );
151
      const char *   expatMsg = XML_ErrorString ( expatErr );
152
      int errLine = XML_GetCurrentLineNumber ( this->parser );
153
    
154
      char msgBuffer[1000];
155
      // AUDIT: Use of sizeof(msgBuffer) for snprintf length is safe.
156
      snprintf ( msgBuffer, sizeof(msgBuffer), "# Expat error %d at line %d, \"%s\"", expatErr, errLine, expatMsg );
157
      sExpatMessage = msgBuffer;
158
      errMsg = sExpatMessage.c_str();
159
160
      #if  DumpXMLParseEvents
161
        if ( this->parseLog != 0 ) fprintf ( this->parseLog, "%s\n", errMsg, expatErr, errLine, expatMsg );
162
      #endif
163
164
    #endif
165
166
3
    XMP_Throw ( errMsg, kXMPErr_BadXML );
167
168
0
  }
169
  
170
4.40k
}  // ExpatAdapter::ParseBuffer
171
172
// =================================================================================================
173
// =================================================================================================
174
175
#if XMP_DebugBuild & DumpXMLParseEvents
176
177
  static inline void PrintIndent ( FILE * file, size_t count )
178
  {
179
    for ( ; count > 0; --count ) fprintf ( file, "  " );
180
  }
181
182
#endif
183
184
// =================================================================================================
185
186
static void SetQualName ( XMP_StringPtr fullName, XML_Node * node )
187
50.1k
{
188
  // Expat delivers the full name as a catenation of namespace URI, separator, and local name.
189
190
  // As a compatibility hack, an "about" or "ID" attribute of an rdf:Description element is
191
  // changed to "rdf:about" or rdf:ID. Easier done here than in the RDF recognizer.
192
  
193
  // As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
194
  // Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
195
196
  // ! This code presumes the RDF namespace prefix is "rdf".
197
198
50.1k
  size_t sepPos = strlen(fullName);
199
575k
  for ( --sepPos; sepPos > 0; --sepPos ) {
200
573k
    if ( fullName[sepPos] == FullNameSeparator ) break;
201
573k
  }
202
203
50.1k
  if ( fullName[sepPos] == FullNameSeparator ) {
204
205
48.6k
    XMP_StringPtr prefix;
206
48.6k
    XMP_StringLen prefixLen;
207
48.6k
    XMP_StringPtr localPart = fullName + sepPos + 1;
208
209
48.6k
    node->ns.assign ( fullName, sepPos );
210
48.6k
    if ( node->ns == "http://purl.org/dc/1.1/" ) node->ns = "http://purl.org/dc/elements/1.1/";
211
212
48.6k
    bool found = XMPMeta::GetNamespacePrefix ( node->ns.c_str(), &prefix, &prefixLen );
213
48.6k
    if ( ! found ) XMP_Throw ( "Unknown URI in Expat full name", kXMPErr_ExternalFailure );
214
48.6k
    node->nsPrefixLen = prefixLen;  // ! Includes the ':'.
215
    
216
48.6k
    node->name = prefix;
217
48.6k
    node->name += localPart;
218
219
48.6k
  } else {
220
221
1.50k
    node->name = fullName;  // The name is not in a namespace.
222
  
223
1.50k
    if ( node->parent->name == "rdf:Description" ) {
224
402
      if ( node->name == "about" ) {
225
72
        node->ns   = kXMP_NS_RDF;
226
72
        node->name = "rdf:about";
227
72
        node->nsPrefixLen = 4;  // ! Include the ':'.
228
330
      } else if ( node->name == "ID" ) {
229
34
        node->ns   = kXMP_NS_RDF;
230
34
        node->name = "rdf:ID";
231
34
        node->nsPrefixLen = 4;  // ! Include the ':'.
232
34
      }
233
402
    }
234
    
235
1.50k
  }
236
237
50.1k
}  // SetQualName
238
239
// =================================================================================================
240
241
static void StartNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri )
242
15.9k
{
243
15.9k
  IgnoreParam(userData);
244
  
245
  // As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
246
  // Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
247
  
248
  #if XMP_DebugBuild & DumpXMLParseEvents   // Avoid unused variable warning.
249
    ExpatAdapter * thiz = (ExpatAdapter*)userData;
250
  #endif
251
252
15.9k
  if ( prefix == 0 ) prefix = "_dflt_"; // Have default namespace.
253
15.9k
  if ( uri == 0 ) return;  // Ignore, have xmlns:pre="", no URI to register.
254
  
255
  #if XMP_DebugBuild & DumpXMLParseEvents
256
    if ( thiz->parseLog != 0 ) {
257
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
258
      fprintf ( thiz->parseLog, "StartNamespace: %s - \"%s\"\n", prefix, uri );
259
    }
260
  #endif
261
  
262
15.9k
  if ( XMP_LitMatch ( uri, "http://purl.org/dc/1.1/" ) ) uri = "http://purl.org/dc/elements/1.1/";
263
15.9k
  XMPMeta::RegisterNamespace ( uri, prefix );
264
265
15.9k
}  // StartNamespaceDeclHandler
266
267
// =================================================================================================
268
269
static void EndNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix )
270
15.9k
{
271
15.9k
  IgnoreParam(userData);
272
273
  #if XMP_DebugBuild & DumpXMLParseEvents   // Avoid unused variable warning.
274
    ExpatAdapter * thiz = (ExpatAdapter*)userData;
275
  #endif
276
277
15.9k
  if ( prefix == 0 ) prefix = "_dflt_"; // Have default namespace.
278
  
279
  #if XMP_DebugBuild & DumpXMLParseEvents
280
    if ( thiz->parseLog != 0 ) {
281
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
282
      fprintf ( thiz->parseLog, "EndNamespace: %s\n", prefix );
283
    }
284
  #endif
285
  
286
  // ! Nothing to do, Expat has done all of the XML processing.
287
288
15.9k
}  // EndNamespaceDeclHandler
289
290
// =================================================================================================
291
292
static void StartElementHandler ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs )
293
27.3k
{
294
27.3k
  XMP_Assert ( attrs != 0 );
295
27.3k
  ExpatAdapter * thiz = (ExpatAdapter*)userData;
296
  
297
27.3k
  size_t attrCount = 0;
298
72.9k
  for ( XMP_StringPtr* a = attrs; *a != 0; ++a ) ++attrCount;
299
27.3k
  if ( (attrCount & 1) != 0 )  XMP_Throw ( "Expat attribute info has odd length", kXMPErr_ExternalFailure );
300
27.3k
  attrCount = attrCount/2;  // They are name/value pairs.
301
  
302
  #if XMP_DebugBuild & DumpXMLParseEvents
303
    if ( thiz->parseLog != 0 ) {
304
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
305
      fprintf ( thiz->parseLog, "StartElement: %s, %d attrs", name, attrCount );
306
      for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
307
        XMP_StringPtr attrName = *attr;
308
        XMP_StringPtr attrValue = *(attr+1);
309
        fprintf ( thiz->parseLog, ", %s = \"%s\"", attrName, attrValue );
310
      }
311
      fprintf ( thiz->parseLog, "\n" );
312
    }
313
  #endif
314
315
27.3k
  XML_Node * parentNode = thiz->parseStack.back();
316
27.3k
  XML_Node * elemNode   = new XML_Node ( parentNode, "", kElemNode );
317
  
318
27.3k
  SetQualName ( name, elemNode );
319
  
320
50.1k
  for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
321
322
22.7k
    XMP_StringPtr attrName = *attr;
323
22.7k
    XMP_StringPtr attrValue = *(attr+1);
324
22.7k
    XML_Node * attrNode = new XML_Node ( elemNode, "", kAttrNode );
325
326
22.7k
    SetQualName ( attrName, attrNode );
327
22.7k
    attrNode->value = attrValue;
328
22.7k
    if ( attrNode->name == "xml:lang" ) NormalizeLangValue ( &attrNode->value );
329
22.7k
    elemNode->attrs.push_back ( attrNode );
330
331
22.7k
  }
332
  
333
27.3k
  parentNode->content.push_back ( elemNode );
334
27.3k
  thiz->parseStack.push_back ( elemNode );
335
  
336
27.3k
  if ( elemNode->name == "rdf:RDF" ) {
337
4.22k
    thiz->rootNode = elemNode;
338
4.22k
    ++thiz->rootCount;
339
4.22k
  }
340
  #if XMP_DebugBuild
341
    ++thiz->elemNesting;
342
  #endif
343
344
27.3k
}  // StartElementHandler
345
346
// =================================================================================================
347
348
static void EndElementHandler ( void * userData, XMP_StringPtr name )
349
27.3k
{
350
27.3k
  IgnoreParam(name);
351
  
352
27.3k
  ExpatAdapter * thiz = (ExpatAdapter*)userData;
353
354
  #if XMP_DebugBuild
355
    --thiz->elemNesting;
356
  #endif
357
27.3k
  (void) thiz->parseStack.pop_back();
358
  
359
  #if XMP_DebugBuild & DumpXMLParseEvents
360
    if ( thiz->parseLog != 0 ) {
361
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
362
      fprintf ( thiz->parseLog, "EndElement: %s\n", name );
363
    }
364
  #endif
365
366
27.3k
}  // EndElementHandler
367
368
// =================================================================================================
369
370
static void CharacterDataHandler ( void * userData, XMP_StringPtr cData, int len )
371
62.1k
{
372
62.1k
  ExpatAdapter * thiz = (ExpatAdapter*)userData;
373
  
374
62.1k
  if ( (cData == 0) || (len == 0) ) { cData = ""; len = 0; }
375
  
376
  #if XMP_DebugBuild & DumpXMLParseEvents
377
    if ( thiz->parseLog != 0 ) {
378
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
379
      fprintf ( thiz->parseLog, "CharContent: \"" );
380
      for ( int i = 0; i < len; ++i ) fprintf ( thiz->parseLog, "%c", cData[i] );
381
      fprintf ( thiz->parseLog, "\"\n" );
382
    }
383
  #endif
384
  
385
62.1k
  XML_Node * parentNode = thiz->parseStack.back();
386
62.1k
  XML_Node * cDataNode  = new XML_Node ( parentNode, "", kCDataNode );
387
  
388
62.1k
  cDataNode->value.assign ( cData, len );
389
62.1k
  parentNode->content.push_back ( cDataNode );
390
  
391
62.1k
}  // CharacterDataHandler
392
393
// =================================================================================================
394
395
static void StartCdataSectionHandler ( void * userData )
396
0
{
397
0
  IgnoreParam(userData);
398
399
  #if XMP_DebugBuild & DumpXMLParseEvents   // Avoid unused variable warning.
400
    ExpatAdapter * thiz = (ExpatAdapter*)userData;
401
  #endif
402
  
403
  #if XMP_DebugBuild & DumpXMLParseEvents
404
    if ( thiz->parseLog != 0 ) {
405
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
406
      fprintf ( thiz->parseLog, "StartCDATA\n" );
407
    }
408
  #endif
409
  
410
  // *** Since markup isn't recognized inside CDATA, this affects XMP's double escaping.
411
  
412
0
}  // StartCdataSectionHandler
413
414
// =================================================================================================
415
416
static void EndCdataSectionHandler ( void * userData )
417
0
{
418
0
  IgnoreParam(userData);
419
420
  #if XMP_DebugBuild & DumpXMLParseEvents   // Avoid unused variable warning.
421
    ExpatAdapter * thiz = (ExpatAdapter*)userData;
422
  #endif
423
  
424
  #if XMP_DebugBuild & DumpXMLParseEvents
425
    if ( thiz->parseLog != 0 ) {
426
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
427
      fprintf ( thiz->parseLog, "EndCDATA\n" );
428
    }
429
  #endif  
430
431
0
}  // EndCdataSectionHandler
432
433
// =================================================================================================
434
435
static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data )
436
2.01k
{
437
2.01k
  XMP_Assert ( target != 0 );
438
2.01k
  ExpatAdapter * thiz = (ExpatAdapter*)userData;
439
440
2.01k
  if ( ! XMP_LitMatch ( target, "xpacket" ) ) return;  // Ignore all PIs except the XMP packet wrapper.
441
914
  if ( data == 0 ) data = "";
442
  
443
  #if XMP_DebugBuild & DumpXMLParseEvents
444
    if ( thiz->parseLog != 0 ) {
445
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
446
      fprintf ( thiz->parseLog, "PI: %s - \"%s\"\n", target, data );
447
    }
448
  #endif
449
  
450
914
  XML_Node * parentNode = thiz->parseStack.back();
451
914
  XML_Node * piNode  = new XML_Node ( parentNode, target, kPINode );
452
  
453
914
  piNode->value.assign ( data );
454
914
  parentNode->content.push_back ( piNode );
455
  
456
914
}  // ProcessingInstructionHandler
457
458
// =================================================================================================
459
460
static void CommentHandler ( void * userData, XMP_StringPtr comment )
461
0
{
462
0
  IgnoreParam(userData);
463
464
  #if XMP_DebugBuild & DumpXMLParseEvents   // Avoid unused variable warning.
465
    ExpatAdapter * thiz = (ExpatAdapter*)userData;
466
  #endif
467
468
0
  if ( comment == 0 ) comment = "";
469
  
470
  #if XMP_DebugBuild & DumpXMLParseEvents
471
    if ( thiz->parseLog != 0 ) {
472
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
473
      fprintf ( thiz->parseLog, "Comment: \"%s\"\n", comment );
474
    }
475
  #endif
476
  
477
  // ! Comments are ignored.
478
  
479
0
}  // CommentHandler
480
481
// =================================================================================================
482
483
#if BanAllEntityUsage
484
static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
485
                    XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset )
486
0
{
487
0
  IgnoreParam(doctypeName);
488
0
  IgnoreParam(sysid);
489
0
  IgnoreParam(pubid);
490
0
  IgnoreParam(has_internal_subset);
491
492
0
  ExpatAdapter * thiz = (ExpatAdapter*)userData;
493
494
  #if XMP_DebugBuild & DumpXMLParseEvents   // Avoid unused variable warning.
495
    if ( thiz->parseLog != 0 ) {
496
      PrintIndent ( thiz->parseLog, thiz->elemNesting );
497
      fprintf ( thiz->parseLog, "DocType: \"%s\"\n", doctypeName );
498
    }
499
  #endif
500
  
501
0
  thiz->isAborted = true; // ! Can't throw an exception across the plain C Expat frames.
502
0
  (void) XML_StopParser ( thiz->parser, XML_FALSE /* not resumable */ );
503
504
0
}  // StartDoctypeDeclHandler
505
#endif
506
507
// =================================================================================================