Coverage Report

Created: 2024-07-23 06:39

/src/resiprocate/rutil/XMLCursor.cxx
Line
Count
Source (jump to first uncovered line)
1
#if defined(HAVE_CONFIG_H)
2
#include "config.h"
3
#endif
4
5
#include "rutil/XMLCursor.hxx"
6
#include "rutil/Logger.hxx"
7
#include "rutil/WinLeakCheck.hxx"
8
9
using namespace resip;
10
using namespace std;
11
12
#define RESIPROCATE_SUBSYSTEM Subsystem::CONTENTS
13
14
/**
15
Whitespace handling:
16
Are the following XML fragments equivalent?
17
18
Strictly interpreted, the root of the first XML document has one 
19
child while the root of the second XML doucment has three children.
20
The line breaks and spaces after the <root> and before </root> are 
21
tagless children.
22
23
--->
24
  <root><child>child content</child></root>
25
<--
26
  vs.
27
--->
28
  <root>
29
     <child>child content</child>
30
  </root>
31
<--
32
33
Treating whitespace as children is consistent with the spec but not usually
34
convenient. <!ATTLIST poem   xml:space (default|preserve) 'preserve'> is used to
35
control whitespace handling. Supporting this switch is painful. For now, treat
36
whitespace as non-significant.
37
*/
38
39
static char BANG[] = "!";
40
static char HYPHEN[] = "-";
41
static char LA_QUOTE[] = "<";
42
static char RA_QUOTE[] = ">";
43
static char SLASH[] = "/";
44
static char EQUALS[] = "=";
45
static char DOUBLE_QUOTE[] = "\"";
46
static char SINGLE_QUOTE[] = "\'";
47
//http://www.w3.org/TR/1998/REC-xml-19980210
48
static const Data COMMENT_START("<!--");
49
static const Data COMMENT_END("-->");
50
static const Data QUESTION_RA_QUOTE("?>");
51
52
// An alternative to stripping comments out in preparse
53
// is to deal with them in the parse; ignore when after non-leaf element
54
// put a leaf after a comment after a leaf in the first leaf's children
55
// getValue() needs to copy first leaf and all 'child' leaves to mValue
56
//
57
// has the advantage of allowing
58
// 1. lazier parsing
59
// 2. embedded weirdnesses like <! > and <? >
60
XMLCursor::XMLCursor(const ParseBuffer& pb)
61
   : mCursor(nullptr),
62
     mAttributesSet(false)
63
0
{
64
0
   ParseBuffer lPb(pb);
65
66
0
   skipProlog(lPb);
67
0
   const char* start = lPb.position();
68
69
0
   lPb.skipToChars(COMMENT_START);
70
0
   if (!lPb.eof())
71
0
   {
72
0
      StackLog(<< "removing comments");
73
0
      lPb.reset(start);
74
0
      mData.reserve(lPb.end() - lPb.start());
75
76
0
      const char* anchor = start;
77
0
      {
78
0
         DataStream str(mData);
79
0
         Data temp;
80
0
         while (true)
81
0
         {
82
0
            lPb.skipToChars(COMMENT_START);
83
0
            if (!lPb.eof())
84
0
            {
85
0
               lPb.data(temp, anchor);
86
0
               str << temp;
87
0
               anchor = Node::skipComments(lPb);
88
0
            }
89
0
            else
90
0
            {
91
0
               lPb.data(temp, anchor);
92
0
               str << temp;
93
0
               break;
94
0
            }
95
0
         }
96
0
      }
97
0
      mRoot.reset(new Node(ParseBuffer(mData.data(), mData.size())));
98
0
   }
99
0
   else
100
0
   {
101
0
      mRoot.reset(new Node(ParseBuffer(start, pb.end() - start)));
102
0
   }
103
0
   mCursor = mRoot.get();
104
105
0
   if (mRoot->extractTag())
106
0
   {
107
0
      InfoLog(<< "XML: empty element no a legal root");
108
0
      mRoot->mPb.fail(__FILE__, __LINE__);
109
0
   }
110
111
0
   mTag = mRoot->mTag;
112
0
   decodeName(mRoot->mTag);
113
114
   // check for # & and note -- make decode, decodeName do stuff if set
115
116
   //<top></top> // no children
117
0
   ParseBuffer pbtemp(mRoot->mPb);
118
0
   pbtemp.skipToChar(RA_QUOTE[0]);
119
0
   pbtemp.skipChar();
120
0
   if (!WhitespaceSignificant)
121
0
   {
122
0
      pbtemp.skipWhitespace();
123
0
   }
124
0
   if (*pbtemp.position() == LA_QUOTE[0] &&
125
0
       *(pbtemp.position()+1) == SLASH[0])
126
0
   {
127
0
      pbtemp.skipChar();
128
0
      pbtemp.skipChar();
129
0
      if (strncmp(mRoot->mTag.data(), pbtemp.position(), mRoot->mTag.size()) == 0)
130
0
      {
131
         // no children ever
132
0
         mRoot->mPb.reset(mRoot->mPb.end());
133
0
         return;
134
0
      }
135
0
   }
136
0
}
137
138
XMLCursor::~XMLCursor()
139
0
{
140
0
}
141
142
void
143
XMLCursor::skipProlog(ParseBuffer& pb)
144
0
{
145
   //'<?xml' VersionInfo '<xml?' EncodingDecl '?>'? '<?xml' SDDecl '?>'? S? '?>
146
147
   // !dlb! much more complicated than this.. can contain comments
148
0
   const char* start = pb.position();
149
0
   pb.skipToChars(QUESTION_RA_QUOTE);
150
0
   if(pb.eof()) 
151
0
   {
152
      // No Prolog
153
0
      pb.reset(start);
154
0
      return;
155
0
   }
156
0
   pb.skipN(2);
157
0
   pb.skipWhitespace();
158
0
}
159
160
void
161
XMLCursor::decode(Data& text)
162
0
{
163
0
}
164
165
void
166
XMLCursor::decodeName(Data& name)
167
0
{
168
0
}
169
170
void
171
XMLCursor::parseNextRootChild()
172
0
{
173
   // no next child to parse?
174
0
   if (mRoot->mPb.eof())
175
0
   {
176
0
      return;
177
0
   }
178
179
   // next child already parsed?
180
0
   if (mRoot->mNext != mRoot->mChildren.end())
181
0
   {
182
0
      return;
183
0
   }
184
185
   // skip self tag
186
0
   if (mRoot->mPb.position() == mRoot->mPb.start())
187
0
   {
188
0
      mRoot->mPb.skipToChar(RA_QUOTE[0]);
189
0
      mRoot->mPb.skipChar();
190
0
   }
191
192
0
   if (!WhitespaceSignificant)
193
0
   {
194
0
      mRoot->mPb.skipWhitespace();
195
0
   }
196
197
   // root end tag?
198
0
   if (*mRoot->mPb.position() == LA_QUOTE[0])
199
0
   {
200
0
      ParseBuffer pb(mRoot->mPb.position(), 
201
0
                     mRoot->mPb.end() - mRoot->mPb.position());
202
0
      pb.skipChar();
203
0
      if (!pb.eof() && *pb.position() == SLASH[0])
204
0
      {
205
0
         pb.skipChar();
206
         // CodeWarrior isn't helpful enough to pick the "obvious" operator definition
207
         // so we add volatile here so CW is completely unconfused what to do.
208
         // second note - MSVC 7.0 won't compile the volatile - tried the following to fix
209
0
         const char* end = pb.position();
210
0
         if ( (const char*)pb.end() < end + mTag.size() )
211
0
         {
212
0
            InfoLog(<< "XML: unexpected end");
213
0
            pb.fail(__FILE__, __LINE__);
214
0
         }
215
         
216
0
         if (strncmp(mTag.data(), pb.position(), mRoot->mTag.size()) == 0)
217
0
         {
218
0
            mRoot->mPb.skipToEnd();
219
0
            return;
220
0
         }
221
0
      }
222
0
   }
223
224
   // leaf?
225
0
   if (*mRoot->mPb.position() != LA_QUOTE[0])
226
0
   {
227
0
      const char* anchor = mRoot->mPb.position();
228
0
      mRoot->mPb.skipToChar(LA_QUOTE[0]);
229
0
      Node* leaf = new Node(ParseBuffer(anchor, mRoot->mPb.position() - anchor));
230
0
      leaf->mIsLeaf = true;
231
0
      mRoot->addChild(leaf);
232
0
   }
233
0
   else
234
0
   {
235
0
      Node* child = new Node(mRoot->mPb);
236
0
      child->skipToEndTag();
237
238
      // leave the parse buffer after the child
239
0
      mRoot->mPb.reset(child->mPb.end());
240
241
0
      mRoot->addChild(child);
242
0
   }
243
244
   // mNext always points at cursored child
245
0
   mRoot->mNext = mRoot->mChildren.end();
246
0
   mRoot->mNext--;
247
0
}
248
249
bool
250
XMLCursor::nextSibling()
251
0
{
252
0
   if (atRoot())
253
0
   {
254
0
      StackLog(<< "XMLCursor::nextSibling" << *this->mCursor << " <<root>>");
255
0
      return false;
256
0
   }
257
258
0
   StackLog(<< "XMLCursor::nextSibling" << *this->mCursor << " " << *this->mCursor->mParent);
259
0
   if (mCursor->mParent == mRoot.get())
260
0
   {
261
0
      parseNextRootChild();
262
0
   }
263
264
0
   if (mCursor->mParent->mNext != mCursor->mParent->mChildren.end())
265
0
   {
266
0
      mCursor = *((mCursor->mParent->mNext)++);
267
0
      mAttributesSet = false;
268
0
      return true;
269
0
   }
270
0
   else
271
0
   {
272
0
      return false;
273
0
   }
274
0
}
275
276
bool
277
XMLCursor::firstChild()
278
0
{
279
0
   if (atRoot() &&
280
0
       mRoot->mChildren.empty())
281
0
   {
282
0
      parseNextRootChild();
283
0
   }
284
285
0
   if (mCursor->mChildren.empty())
286
0
   {
287
0
      return false;
288
0
   }
289
0
   else
290
0
   {
291
      // mNext always points after cursored child
292
0
      mCursor->mNext = mCursor->mChildren.begin();
293
0
      mCursor->mNext++;
294
0
      mCursor = mCursor->mChildren.front();
295
0
      mAttributesSet = false;
296
0
      return true;
297
0
   }
298
0
}
299
300
bool
301
XMLCursor::parent()
302
0
{
303
0
   if (atRoot())
304
0
   {
305
0
      return false;
306
0
   }
307
308
0
   mCursor = mCursor->mParent;
309
0
   mAttributesSet = false;
310
0
   return true;
311
0
}
312
313
void
314
XMLCursor::reset()
315
0
{
316
0
   mCursor = mRoot.get();
317
0
   mAttributesSet = false;
318
0
}
319
320
bool
321
XMLCursor::atRoot() const
322
0
{
323
0
   return mCursor == mRoot.get();
324
0
}
325
326
bool
327
XMLCursor::atLeaf() const
328
0
{
329
0
   return mCursor->mIsLeaf;
330
0
}
331
332
const Data&
333
XMLCursor::getTag() const
334
0
{
335
0
   return mCursor->mTag;
336
0
}
337
338
//<foo >
339
//<foo>
340
//<foo/>
341
//<foo attr = 'value'   attr="value">
342
//<foo attr = 'value'   attr="value" >
343
//
344
//<foo attr = 'value'   attr="value" />
345
static const Data RA_QUOTE_SLASH(">/");
346
const XMLCursor::AttributeMap&
347
XMLCursor::getAttributes() const
348
0
{
349
0
   if (!atLeaf() &&
350
0
       !mAttributesSet)
351
0
   {
352
0
      mAttributes.clear();
353
0
      mAttributesSet = true;
354
   
355
0
      ParseBuffer pb(mCursor->mPb);
356
0
      pb.reset(mCursor->mPb.start());
357
358
0
      Data attribute;
359
0
      Data value;
360
361
0
      pb.skipToOneOf(ParseBuffer::Whitespace, RA_QUOTE_SLASH);
362
0
      pb.skipWhitespace();
363
364
0
      while (!pb.eof() && 
365
0
             *pb.position() != RA_QUOTE[0] &&
366
0
             *pb.position() != SLASH[0])
367
0
      {
368
0
         attribute.clear();
369
0
         value.clear();
370
371
0
         const char* anchor = pb.skipWhitespace();
372
0
         pb.skipToOneOf(ParseBuffer::Whitespace, EQUALS);
373
0
         pb.data(attribute, anchor);
374
0
         XMLCursor::decodeName(attribute);
375
376
0
         StackLog(<< "attribute: " << attribute);
377
378
0
         pb.skipWhitespace();
379
0
         pb.skipToChar(EQUALS[0]);
380
0
         pb.skipChar();
381
0
         pb.skipWhitespace();
382
0
         if (!pb.eof())
383
0
         {
384
0
            const char quote = *pb.position();
385
386
0
            StackLog(<< "quote is <" << quote << ">");
387
388
0
            if(quote != DOUBLE_QUOTE[0] &&
389
0
               quote != SINGLE_QUOTE[0])
390
0
            {
391
0
               InfoLog(<< "XML: badly quoted attribute value");
392
0
               pb.fail(__FILE__, __LINE__);
393
0
            }
394
0
            anchor = pb.skipChar();
395
0
            pb.skipToChar(quote);
396
0
            pb.data(value, anchor);
397
0
            XMLCursor::decode(value);
398
0
            pb.skipChar();
399
0
            mAttributes[attribute] = value;
400
0
         }
401
0
         pb.skipWhitespace();
402
0
      }
403
0
   }
404
405
0
   return mAttributes;
406
0
}
407
408
const Data&
409
XMLCursor::getValue() const
410
0
{
411
0
   if (atLeaf())
412
0
   {
413
0
      ParseBuffer pb(mCursor->mPb);
414
0
      pb.skipToEnd();
415
0
      pb.data(mValue, pb.start());
416
0
      XMLCursor::decode(mValue);
417
0
   }
418
0
   else
419
0
   {
420
0
      mValue.clear();
421
0
   }
422
0
   return mValue;
423
0
}
424
425
EncodeStream&
426
XMLCursor::encode(EncodeStream& str, const AttributeMap& attrs)
427
0
{
428
0
   for(AttributeMap::const_iterator i = attrs.begin();
429
0
       i != attrs.end(); ++i)
430
0
   {
431
0
      if (i != attrs.begin())
432
0
      {
433
0
         str << " ";
434
0
      }
435
      // !dlb! some sort of character encoding required here
436
0
      str << i->first << "=\"" << i->second << "\"";
437
0
   }
438
439
0
   return str;
440
0
}
441
442
XMLCursor::Node::Node(const ParseBuffer& pb)
443
   : mPb(pb.position(), pb.end() - pb.position()),
444
     mParent(0),
445
     mChildren(),
446
     mNext(mChildren.begin()),
447
     mIsLeaf(false)
448
0
{
449
0
   mPb.assertNotEof();
450
0
   StackLog(<< "XMLCursor::Node::Node" << *this);
451
0
}
452
453
XMLCursor::Node::~Node()
454
0
{
455
0
   for (vector<Node*>::iterator i = mChildren.begin();
456
0
        i != mChildren.end(); ++i)
457
0
   {
458
0
      delete *i;
459
0
   }
460
0
}
461
462
// start:
463
//<foo >
464
//^
465
// end:
466
//<foo >
467
//      ^
468
static Data SLASH_RA_QUOTE("/>");
469
bool
470
XMLCursor::Node::extractTag()
471
0
{
472
0
   ParseBuffer pb(mPb);
473
0
   if (!WhitespaceSignificant)
474
0
   {
475
0
       pb.skipWhitespace();
476
0
   }
477
0
   const char* anchor = pb.skipChar(LA_QUOTE[0]);
478
0
   pb.skipToOneOf(ParseBuffer::Whitespace, SLASH_RA_QUOTE);
479
0
   pb.assertNotEof();
480
0
   pb.data(mTag, anchor);
481
482
0
   return !pb.eof() && *pb.position() == SLASH[0];
483
0
}
484
485
void
486
XMLCursor::Node::addChild(Node* child)
487
0
{
488
0
   mChildren.push_back(child);
489
0
   child->mParent = this;
490
0
}
491
492
//<foo> <bar> </bar> <baz> </baz> </foo>
493
//^start
494
//      ^child      
495
//                   ^child
496
//                                ^end
497
//
498
//<foo> sdfsf sadfsf <bar> asdfdf </bar> sadfsdf </foo>
499
//^start
500
//      ^child
501
//                   ^child sub 
502
//                                      ^child
503
void
504
XMLCursor::Node::skipToEndTag()
505
0
{
506
0
   extractTag();
507
0
   StackLog(<< "XMLCursor::Node::skipToEndTag(" <<  mTag << ")");
508
   //StackLog(<< "XMLCursor::Node::skipToEndTag(" << Data(mPb.position(), mPb.end() - mPb.position()) << ")");
509
510
   //<foo />
511
0
   mPb.skipToChar(RA_QUOTE[0]);
512
0
   if (*(mPb.position()-1) == SLASH[0])
513
0
   {
514
0
      mPb.skipChar();
515
0
      mPb = ParseBuffer(mPb.start(), mPb.position() - mPb.start());
516
0
      return;
517
0
   }
518
519
   //<foo> ...<child> ... </child> </foo>
520
   //    ^
521
0
   mPb.skipChar();
522
   //<foo> ...<child> ... </child> </foo>
523
   //     ^
524
0
   while (true)
525
0
   {
526
0
      if (!WhitespaceSignificant)
527
0
      {
528
0
         mPb.skipWhitespace();
529
0
      }
530
531
      // Some text contents ...<
532
      // ^                     ^
533
0
      if (*mPb.position() != LA_QUOTE[0])
534
0
      {
535
0
         const char* anchor = mPb.position();
536
0
         mPb.skipToChar(LA_QUOTE[0]);
537
0
         Node* leaf = new Node(ParseBuffer(anchor, mPb.position() - anchor));
538
0
         leaf->mIsLeaf = true;
539
0
         addChild(leaf);
540
0
      }
541
542
      //<...
543
      //^
544
0
      mPb.skipChar();
545
      //<...
546
      // ^
547
548
      // exit condition
549
      //</foo>
550
0
      if (*mPb.position() == SLASH[0])
551
0
      {
552
0
         mPb.skipChar();
553
         // CodeWarrior isn't helpful enough to pick the "obvious" operator definition
554
         // so we add volatile here so CW is completely unconfused what to do.
555
         // second note - MSVC 7.0 won't compile the volatile - tried the following to fix
556
0
         const char* end = mPb.position();
557
0
         if ( (const char*)mPb.end() < end + mTag.size() )
558
0
         {
559
0
            InfoLog(<< "XML: unexpected end");
560
0
            mPb.fail(__FILE__, __LINE__);
561
0
         }
562
563
0
         if (strncmp(mTag.data(), mPb.position(), mTag.size()) == 0)
564
0
         {
565
0
            mPb.skipToChar(RA_QUOTE[0]);
566
0
            mPb.skipChar();
567
0
            mPb = ParseBuffer(mPb.start(), mPb.position() - mPb.start());
568
0
            return;
569
0
         }
570
0
         else
571
0
         {
572
0
            InfoLog(<< "Badly formed XML: unexpected endtag");
573
0
            mPb.fail(__FILE__, __LINE__);
574
0
         }
575
0
      }
576
577
      //<child>...
578
      // ^
579
0
      if (mPb.position() == mPb.start())
580
0
      {
581
0
         InfoLog(<< "XML: badly formed element");
582
0
         mPb.fail(__FILE__, __LINE__);
583
0
      }
584
585
0
      mPb.reset(mPb.position()-1);
586
      //<child>...
587
      //^
588
0
      Node* child = new Node(mPb);
589
0
      addChild(child);
590
0
      child->skipToEndTag();
591
0
      mPb.reset(child->mPb.end());
592
0
      XMLCursor::decodeName(child->mTag);
593
0
      StackLog(<< mTag << "(" << child->mTag << ")");
594
0
    }
595
0
}
596
597
//<!-- declarations for <head> & <body> -->
598
const char*
599
XMLCursor::Node::skipComments(ParseBuffer& pb)
600
0
{
601
0
   while (*pb.position() == LA_QUOTE[0] &&
602
0
          *(pb.position()+1) == BANG[0] &&
603
0
          *(pb.position()+2) == HYPHEN[0] &&
604
0
          *(pb.position()+3) == HYPHEN[0])
605
0
   {
606
0
      pb.skipToChars(COMMENT_END);
607
0
      pb.skipChars(COMMENT_END);
608
0
      pb.skipWhitespace();
609
0
      if(pb.eof())
610
0
      {
611
0
         return pb.end();
612
0
      }
613
0
   }
614
615
0
   return pb.position();
616
0
}
617
618
EncodeStream&
619
resip::operator<<(EncodeStream& str, const XMLCursor::Node& node)
620
0
{
621
0
   Data::size_type size = node.mPb.end() - node.mPb.start();
622
623
0
   static const Data::size_type showSize(35);
624
625
0
   str << &node << "[" 
626
0
       << Data(node.mPb.start(), 
627
0
               min(showSize, size))
628
0
        << "]" << (size ? "" : "...");
629
630
0
   return str;
631
0
}
632
633
EncodeStream&
634
resip::operator<<(EncodeStream& str, const XMLCursor& cursor)
635
0
{
636
0
   str << "XMLCursor " << *cursor.mCursor;
637
0
   return str;
638
0
}
639
640
/* ====================================================================
641
 * The Vovida Software License, Version 1.0 
642
 * 
643
 * Copyright (c) 2000 Vovida Networks, Inc.  All rights reserved.
644
 * 
645
 * Redistribution and use in source and binary forms, with or without
646
 * modification, are permitted provided that the following conditions
647
 * are met:
648
 * 
649
 * 1. Redistributions of source code must retain the above copyright
650
 *    notice, this list of conditions and the following disclaimer.
651
 * 
652
 * 2. Redistributions in binary form must reproduce the above copyright
653
 *    notice, this list of conditions and the following disclaimer in
654
 *    the documentation and/or other materials provided with the
655
 *    distribution.
656
 * 
657
 * 3. The names "VOCAL", "Vovida Open Communication Application Library",
658
 *    and "Vovida Open Communication Application Library (VOCAL)" must
659
 *    not be used to endorse or promote products derived from this
660
 *    software without prior written permission. For written
661
 *    permission, please contact vocal@vovida.org.
662
 *
663
 * 4. Products derived from this software may not be called "VOCAL", nor
664
 *    may "VOCAL" appear in their name, without prior written
665
 *    permission of Vovida Networks, Inc.
666
 * 
667
 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
668
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
669
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
670
 * NON-INFRINGEMENT ARE DISCLAIMED.  IN NO EVENT SHALL VOVIDA
671
 * NETWORKS, INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT DAMAGES
672
 * IN EXCESS OF $1,000, NOR FOR ANY INDIRECT, INCIDENTAL, SPECIAL,
673
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
674
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
675
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
676
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
677
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
678
 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
679
 * DAMAGE.
680
 * 
681
 * ====================================================================
682
 * 
683
 * This software consists of voluntary contributions made by Vovida
684
 * Networks, Inc. and many individuals on behalf of Vovida Networks,
685
 * Inc.  For more information on Vovida Networks, Inc., please see
686
 * <http://www.vovida.org/>.
687
 *
688
 */