Coverage Report

Created: 2026-02-14 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/xerces-c/src/xercesc/internal/SGXMLScanner.cpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
18
/*
19
 * $Id: SGXMLScanner.cpp 1517488 2013-08-26 10:33:26Z amassari $
20
 */
21
22
23
// ---------------------------------------------------------------------------
24
//  Includes
25
// ---------------------------------------------------------------------------
26
#include <xercesc/internal/SGXMLScanner.hpp>
27
#include <xercesc/util/RuntimeException.hpp>
28
#include <xercesc/util/UnexpectedEOFException.hpp>
29
#include <xercesc/util/XMLUri.hpp>
30
#include <xercesc/framework/LocalFileInputSource.hpp>
31
#include <xercesc/framework/URLInputSource.hpp>
32
#include <xercesc/framework/XMLDocumentHandler.hpp>
33
#include <xercesc/framework/XMLEntityHandler.hpp>
34
#include <xercesc/framework/XMLPScanToken.hpp>
35
#include <xercesc/framework/MemoryManager.hpp>
36
#include <xercesc/framework/XMLGrammarPool.hpp>
37
#include <xercesc/framework/psvi/PSVIElement.hpp>
38
#include <xercesc/framework/psvi/PSVIHandler.hpp>
39
#include <xercesc/framework/psvi/PSVIAttributeList.hpp>
40
#include <xercesc/framework/psvi/XSAnnotation.hpp>
41
#include <xercesc/internal/EndOfEntityException.hpp>
42
#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
43
#include <xercesc/validators/schema/SchemaValidator.hpp>
44
#include <xercesc/validators/schema/TraverseSchema.hpp>
45
#include <xercesc/validators/schema/XSDDOMParser.hpp>
46
#include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
47
#include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp>
48
#include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
49
#include <xercesc/validators/schema/identity/IC_Selector.hpp>
50
#include <xercesc/validators/schema/identity/ValueStore.hpp>
51
#include <xercesc/util/OutOfMemoryException.hpp>
52
#include <xercesc/util/XMLStringTokenizer.hpp>
53
54
XERCES_CPP_NAMESPACE_BEGIN
55
56
inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl);
57
58
59
typedef JanitorMemFunCall<SGXMLScanner> CleanupType;
60
typedef JanitorMemFunCall<ReaderMgr>    ReaderMgrResetType;
61
62
63
// ---------------------------------------------------------------------------
64
//  SGXMLScanner: Constructors and Destructor
65
// ---------------------------------------------------------------------------
66
SGXMLScanner::SGXMLScanner( XMLValidator* const valToAdopt
67
                          , GrammarResolver* const grammarResolver
68
                          , MemoryManager* const manager) :
69
70
0
    XMLScanner(valToAdopt, grammarResolver, manager)
71
0
    , fSeeXsi(false)
72
0
    , fGrammarType(Grammar::UnKnown)
73
0
    , fElemStateSize(16)
74
0
    , fElemState(0)
75
0
    , fElemLoopState(0)
76
0
    , fContent(1023, manager)
77
0
    , fEntityTable(0)
78
0
    , fRawAttrList(0)
79
0
    , fRawAttrColonListSize(32)
80
0
    , fRawAttrColonList(0)
81
0
    , fSchemaGrammar(0)
82
0
    , fSchemaValidator(0)
83
0
    , fICHandler(0)
84
0
    , fElemNonDeclPool(0)
85
0
    , fElemCount(0)
86
0
    , fAttDefRegistry(0)
87
0
    , fUndeclaredAttrRegistry(0)
88
0
    , fPSVIAttrList(0)
89
0
    , fModel(0)
90
0
    , fPSVIElement(0)
91
0
    , fErrorStack(0)
92
0
    , fSchemaInfoList(0)
93
0
    , fCachedSchemaInfoList(0)
94
0
{
95
0
    CleanupType cleanup(this, &SGXMLScanner::cleanUp);
96
97
0
    try
98
0
    {
99
0
         commonInit();
100
0
    }
101
0
    catch(const OutOfMemoryException&)
102
0
    {
103
        // Don't cleanup when out of memory, since executing the
104
        // code can cause problems.
105
0
        cleanup.release();
106
107
0
        throw;
108
0
    }
109
110
0
    cleanup.release();
111
0
}
112
113
SGXMLScanner::SGXMLScanner( XMLDocumentHandler* const docHandler
114
                          , DocTypeHandler* const     docTypeHandler
115
                          , XMLEntityHandler* const   entityHandler
116
                          , XMLErrorReporter* const   errHandler
117
                          , XMLValidator* const       valToAdopt
118
                          , GrammarResolver* const    grammarResolver
119
                          , MemoryManager* const      manager) :
120
121
0
    XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
122
0
    , fSeeXsi(false)
123
0
    , fGrammarType(Grammar::UnKnown)
124
0
    , fElemStateSize(16)
125
0
    , fElemState(0)
126
0
    , fElemLoopState(0)
127
0
    , fContent(1023, manager)
128
0
    , fEntityTable(0)
129
0
    , fRawAttrList(0)
130
0
    , fRawAttrColonListSize(32)
131
0
    , fRawAttrColonList(0)
132
0
    , fSchemaGrammar(0)
133
0
    , fSchemaValidator(0)
134
0
    , fICHandler(0)
135
0
    , fElemNonDeclPool(0)
136
0
    , fElemCount(0)
137
0
    , fAttDefRegistry(0)
138
0
    , fUndeclaredAttrRegistry(0)
139
0
    , fPSVIAttrList(0)
140
0
    , fModel(0)
141
0
    , fPSVIElement(0)
142
0
    , fErrorStack(0)
143
0
    , fSchemaInfoList(0)
144
0
    , fCachedSchemaInfoList(0)
145
0
{
146
0
    CleanupType cleanup(this, &SGXMLScanner::cleanUp);
147
148
0
    try
149
0
    {
150
0
        commonInit();
151
0
    }
152
0
    catch(const OutOfMemoryException&)
153
0
    {
154
        // Don't cleanup when out of memory, since executing the
155
        // code can cause problems.
156
0
        cleanup.release();
157
158
0
        throw;
159
0
    }
160
161
0
    cleanup.release();
162
0
}
163
164
SGXMLScanner::~SGXMLScanner()
165
0
{
166
0
    cleanUp();
167
0
}
168
169
// ---------------------------------------------------------------------------
170
//  XMLScanner: Getter methods
171
// ---------------------------------------------------------------------------
172
NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool()
173
0
{
174
0
    return 0;
175
0
}
176
177
const NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() const
178
0
{
179
0
    return 0;
180
0
}
181
182
// ---------------------------------------------------------------------------
183
//  SGXMLScanner: Main entry point to scan a document
184
// ---------------------------------------------------------------------------
185
void SGXMLScanner::scanDocument(const InputSource& src)
186
0
{
187
    //  Bump up the sequence id for this parser instance. This will invalidate
188
    //  any previous progressive scan tokens.
189
0
    fSequenceId++;
190
191
0
    ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
192
193
0
    try
194
0
    {
195
        //  Reset the scanner and its plugged in stuff for a new run. This
196
        //  resets all the data structures, creates the initial reader and
197
        //  pushes it on the stack, and sets up the base document path.
198
0
        scanReset(src);
199
200
        // If we have a document handler, then call the start document
201
0
        if (fDocHandler)
202
0
            fDocHandler->startDocument();
203
204
        //  Scan the prolog part, which is everything before the root element
205
        //  including the DTD subsets.
206
0
        scanProlog();
207
208
        //  If we got to the end of input, then its not a valid XML file.
209
        //  Else, go on to scan the content.
210
0
        if (fReaderMgr.atEOF())
211
0
        {
212
0
            emitError(XMLErrs::EmptyMainEntity);
213
0
        }
214
0
        else
215
0
        {
216
            // Scan content, and tell it its not an external entity
217
0
            if (scanContent())
218
0
            {
219
                // Do post-parse validation if required
220
0
                if (fValidate)
221
0
                {
222
                    //  We handle ID reference semantics at this level since
223
                    //  its required by XML 1.0.
224
0
                    checkIDRefs();
225
226
                    // Then allow the validator to do any extra stuff it wants
227
//                    fValidator->postParseValidation();
228
0
                }
229
230
                // That went ok, so scan for any miscellaneous stuff
231
0
                if (!fReaderMgr.atEOF())
232
0
                    scanMiscellaneous();
233
0
            }
234
0
        }
235
236
        // If we have a document handler, then call the end document
237
0
        if (fDocHandler)
238
0
            fDocHandler->endDocument();
239
0
    }
240
    //  NOTE:
241
    //
242
    //  In all of the error processing below, the emitError() call MUST come
243
    //  before the flush of the reader mgr, or it will fail because it tries
244
    //  to find out the position in the XML source of the error.
245
0
    catch(const XMLErrs::Codes)
246
0
    {
247
        // This is a 'first failure' exception, so fall through
248
0
    }
249
0
    catch(const XMLValid::Codes)
250
0
    {
251
        // This is a 'first fatal error' type exit, so fall through
252
0
    }
253
0
    catch(const XMLException& excToCatch)
254
0
    {
255
        //  Emit the error and catch any user exception thrown from here. Make
256
        //  sure in all cases we flush the reader manager.
257
0
        fInException = true;
258
0
        try
259
0
        {
260
0
            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
261
0
                emitError
262
0
                (
263
0
                    XMLErrs::XMLException_Warning
264
0
                    , excToCatch.getCode()
265
0
                    , excToCatch.getMessage()
266
0
                );
267
0
            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
268
0
                emitError
269
0
                (
270
0
                    XMLErrs::XMLException_Fatal
271
0
                    , excToCatch.getCode()
272
0
                    , excToCatch.getMessage()
273
0
                );
274
0
            else
275
0
                emitError
276
0
                (
277
0
                    XMLErrs::XMLException_Error
278
0
                    , excToCatch.getCode()
279
0
                    , excToCatch.getMessage()
280
0
                );
281
0
        }
282
0
        catch(const OutOfMemoryException&)
283
0
        {
284
            // This is a special case for out-of-memory
285
            // conditions, because resetting the ReaderMgr
286
            // can be problematic.
287
0
            resetReaderMgr.release();
288
289
0
            throw;
290
0
        }
291
0
    }
292
0
    catch(const OutOfMemoryException&)
293
0
    {
294
        // This is a special case for out-of-memory
295
        // conditions, because resetting the ReaderMgr
296
        // can be problematic.
297
0
        resetReaderMgr.release();
298
299
0
        throw;
300
0
    }
301
0
}
302
303
304
bool SGXMLScanner::scanNext(XMLPScanToken& token)
305
0
{
306
    // Make sure this token is still legal
307
0
    if (!isLegalToken(token))
308
0
        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
309
310
    // Find the next token and remember the reader id
311
0
    XMLSize_t orgReader;
312
0
    XMLTokens curToken;
313
314
0
    ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
315
316
0
    bool retVal = true;
317
318
0
    try
319
0
    {
320
0
        while (true)
321
0
        {
322
            //  We have to handle any end of entity exceptions that happen here.
323
            //  We could be at the end of X nested entities, each of which will
324
            //  generate an end of entity exception as we try to move forward.
325
0
            try
326
0
            {
327
0
                curToken = senseNextToken(orgReader);
328
0
                break;
329
0
            }
330
0
            catch(const EndOfEntityException& toCatch)
331
0
            {
332
                // Send an end of entity reference event
333
0
                if (fDocHandler)
334
0
                    fDocHandler->endEntityReference(toCatch.getEntity());
335
0
            }
336
0
        }
337
338
0
        if (curToken == Token_CharData)
339
0
        {
340
0
            scanCharData(fCDataBuf);
341
0
        }
342
0
        else if (curToken == Token_EOF)
343
0
        {
344
0
            if (!fElemStack.isEmpty())
345
0
            {
346
0
                const ElemStack::StackElem* topElem = fElemStack.popTop();
347
0
                emitError
348
0
                (
349
0
                    XMLErrs::EndedWithTagsOnStack
350
0
                    , topElem->fThisElement->getFullName()
351
0
                );
352
0
            }
353
354
0
            retVal = false;
355
0
        }
356
0
        else
357
0
        {
358
            // Its some sort of markup
359
0
            bool gotData = true;
360
0
            switch(curToken)
361
0
            {
362
0
                case Token_CData :
363
                    // Make sure we are within content
364
0
                    if (fElemStack.isEmpty())
365
0
                        emitError(XMLErrs::CDATAOutsideOfContent);
366
0
                    scanCDSection();
367
0
                    break;
368
369
0
                case Token_Comment :
370
0
                    scanComment();
371
0
                    break;
372
373
0
                case Token_EndTag :
374
0
                    scanEndTag(gotData);
375
0
                    break;
376
377
0
                case Token_PI :
378
0
                    scanPI();
379
0
                    break;
380
381
0
                case Token_StartTag :
382
0
                    scanStartTag(gotData);
383
0
                    break;
384
385
0
                default :
386
0
                    fReaderMgr.skipToChar(chOpenAngle);
387
0
                    break;
388
0
            }
389
390
0
            if (orgReader != fReaderMgr.getCurrentReaderNum())
391
0
                emitError(XMLErrs::PartialMarkupInEntity);
392
393
            // If we hit the end, then do the miscellaneous part
394
0
            if (!gotData)
395
0
            {
396
                // Do post-parse validation if required
397
0
                if (fValidate)
398
0
                {
399
                    //  We handle ID reference semantics at this level since
400
                    //  its required by XML 1.0.
401
0
                    checkIDRefs();
402
403
                    // Then allow the validator to do any extra stuff it wants
404
//                    fValidator->postParseValidation();
405
0
                }
406
407
                // That went ok, so scan for any miscellaneous stuff
408
0
                scanMiscellaneous();
409
410
0
                if (toCheckIdentityConstraint())
411
0
                    fICHandler->endDocument();
412
413
0
                if (fDocHandler)
414
0
                    fDocHandler->endDocument();
415
0
            }
416
0
        }
417
0
    }
418
    //  NOTE:
419
    //
420
    //  In all of the error processing below, the emitError() call MUST come
421
    //  before the flush of the reader mgr, or it will fail because it tries
422
    //  to find out the position in the XML source of the error.
423
0
    catch(const XMLErrs::Codes)
424
0
    {
425
        // This is a 'first failure' exception, so return failure
426
0
        retVal = false;
427
0
    }
428
0
    catch(const XMLValid::Codes)
429
0
    {
430
        // This is a 'first fatal error' type exit, so return failure
431
0
        retVal = false;
432
0
    }
433
0
    catch(const XMLException& excToCatch)
434
0
    {
435
        //  Emit the error and catch any user exception thrown from here. Make
436
        //  sure in all cases we flush the reader manager.
437
0
        fInException = true;
438
0
        try
439
0
        {
440
0
            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
441
0
                emitError
442
0
                (
443
0
                    XMLErrs::XMLException_Warning
444
0
                    , excToCatch.getCode()
445
0
                    , excToCatch.getMessage()
446
0
                );
447
0
            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
448
0
                emitError
449
0
                (
450
0
                    XMLErrs::XMLException_Fatal
451
0
                    , excToCatch.getCode()
452
0
                    , excToCatch.getMessage()
453
0
                );
454
0
            else
455
0
                emitError
456
0
                (
457
0
                    XMLErrs::XMLException_Error
458
0
                    , excToCatch.getCode()
459
0
                    , excToCatch.getMessage()
460
0
                );
461
0
        }
462
0
        catch(const OutOfMemoryException&)
463
0
        {
464
            // This is a special case for out-of-memory
465
            // conditions, because resetting the ReaderMgr
466
            // can be problematic.
467
0
            resetReaderMgr.release();
468
469
0
            throw;
470
0
        }
471
472
0
        retVal = false;
473
0
    }
474
0
    catch(const OutOfMemoryException&)
475
0
    {
476
        // This is a special case for out-of-memory
477
        // conditions, because resetting the ReaderMgr
478
        // can be problematic.
479
0
        resetReaderMgr.release();
480
481
0
        throw;
482
0
    }
483
484
    // If we are not at the end, release the object that will
485
    // reset the ReaderMgr.
486
0
    if (retVal)
487
0
        resetReaderMgr.release();
488
489
0
    return retVal;
490
0
}
491
492
// ---------------------------------------------------------------------------
493
//  SGXMLScanner: Private scanning methods
494
// ---------------------------------------------------------------------------
495
496
//  This method is called from scanStartTag() to handle the very raw initial
497
//  scan of the attributes. It just fills in the passed collection with
498
//  key/value pairs for each attribute. No processing is done on them at all.
499
XMLSize_t
500
SGXMLScanner::rawAttrScan(const   XMLCh* const                elemName
501
                          ,       RefVectorOf<KVStringPair>&  toFill
502
                          ,       bool&                       isEmpty)
503
0
{
504
    //  Keep up with how many attributes we've seen so far, and how many
505
    //  elements are available in the vector. This way we can reuse old
506
    //  elements until we run out and then expand it.
507
0
    XMLSize_t attCount = 0;
508
0
    XMLSize_t curVecSize = toFill.size();
509
510
    // Assume it is not empty
511
0
    isEmpty = false;
512
513
    //  We loop until we either see a /> or >, handling key/value pairs util
514
    //  we get there. We place them in the passed vector, which we will expand
515
    //  as required to hold them.
516
0
    while (true)
517
0
    {
518
        // Get the next character, which should be non-space
519
0
        XMLCh nextCh = fReaderMgr.peekNextChar();
520
521
        //  If the next character is not a slash or closed angle bracket,
522
        //  then it must be whitespace, since whitespace is required
523
        //  between the end of the last attribute and the name of the next
524
        //  one.
525
        //
526
0
        if (attCount)
527
0
        {
528
0
            if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
529
0
            {
530
0
                if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
531
0
                {
532
                    // Ok, skip by them and get another char
533
0
                    fReaderMgr.getNextChar();
534
0
                    fReaderMgr.skipPastSpaces();
535
0
                    nextCh = fReaderMgr.peekNextChar();
536
0
                }
537
0
                 else
538
0
                {
539
                    // Emit the error but keep on going
540
0
                    emitError(XMLErrs::ExpectedWhitespace);
541
0
                }
542
0
            }
543
0
        }
544
545
        //  Ok, here we first check for any of the special case characters.
546
        //  If its not one, then we do the normal case processing, which
547
        //  assumes that we've hit an attribute value, Otherwise, we do all
548
        //  the special case checks.
549
0
        if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
550
0
        {
551
            //  Assume its going to be an attribute, so get a name from
552
            //  the input.
553
0
            int colonPosition;
554
0
            if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
555
0
            {
556
0
                if (fAttNameBuf.isEmpty())
557
0
                    emitError(XMLErrs::ExpectedAttrName);
558
0
                else
559
0
                    emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
560
0
                fReaderMgr.skipPastChar(chCloseAngle);
561
0
                return attCount;
562
0
            }
563
564
0
            const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer();
565
566
            // And next must be an equal sign
567
0
            if (!scanEq())
568
0
            {
569
0
                static const XMLCh tmpList[] =
570
0
                {
571
0
                    chSingleQuote, chDoubleQuote, chCloseAngle
572
0
                    , chOpenAngle, chForwardSlash, chNull
573
0
                };
574
575
0
                emitError(XMLErrs::ExpectedEqSign);
576
577
                //  Try to sync back up by skipping forward until we either
578
                //  hit something meaningful.
579
0
                const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
580
581
0
                if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
582
0
                {
583
                    // Jump back to top for normal processing of these
584
0
                    continue;
585
0
                }
586
0
                else if ((chFound == chSingleQuote)
587
0
                      ||  (chFound == chDoubleQuote)
588
0
                      ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
589
0
                {
590
                    // Just fall through assuming that the value is to follow
591
0
                }
592
0
                else if (chFound == chOpenAngle)
593
0
                {
594
                    // Assume a malformed tag and that new one is starting
595
0
                    emitError(XMLErrs::UnterminatedStartTag, elemName);
596
0
                    return attCount;
597
0
                }
598
0
                else
599
0
                {
600
                    // Something went really wrong
601
0
                    return attCount;
602
0
                }
603
0
            }
604
605
            //  Next should be the quoted attribute value. We just do a simple
606
            //  and stupid scan of this value. The only thing we do here
607
            //  is to expand entity references.
608
0
            if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf))
609
0
            {
610
0
                static const XMLCh tmpList[] =
611
0
                {
612
0
                    chCloseAngle, chOpenAngle, chForwardSlash, chNull
613
0
                };
614
615
0
                emitError(XMLErrs::ExpectedAttrValue);
616
617
                //  It failed, so lets try to get synced back up. We skip
618
                //  forward until we find some whitespace or one of the
619
                //  chars in our list.
620
0
                const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
621
622
0
                if ((chFound == chCloseAngle)
623
0
                ||  (chFound == chForwardSlash)
624
0
                ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
625
0
                {
626
                    //  Just fall through and process this attribute, though
627
                    //  the value will be "".
628
0
                }
629
0
                else if (chFound == chOpenAngle)
630
0
                {
631
                    // Assume a malformed tag and that new one is starting
632
0
                    emitError(XMLErrs::UnterminatedStartTag, elemName);
633
0
                    return attCount;
634
0
                }
635
0
                else
636
0
                {
637
                    // Something went really wrong
638
0
                    return attCount;
639
0
                }
640
0
            }
641
642
            //  And now lets add it to the passed collection. If we have not
643
            //  filled it up yet, then we use the next element. Else we add
644
            //  a new one.
645
0
            KVStringPair* curPair = 0;
646
0
            if (attCount >= curVecSize)
647
0
            {
648
0
                curPair = new (fMemoryManager) KVStringPair
649
0
                (
650
0
                    curAttNameBuf
651
0
                    , fAttNameBuf.getLen()
652
0
                    , fAttValueBuf.getRawBuffer()
653
0
                    , fAttValueBuf.getLen()
654
0
                    , fMemoryManager
655
0
                );
656
0
                toFill.addElement(curPair);
657
0
            }
658
0
             else
659
0
            {
660
0
                curPair = toFill.elementAt(attCount);
661
0
                curPair->set
662
0
                (
663
0
                    curAttNameBuf
664
0
                    , fAttNameBuf.getLen()
665
0
                    , fAttValueBuf.getRawBuffer()
666
0
                    , fAttValueBuf.getLen()
667
0
                );
668
0
            }
669
0
            if (attCount >= fRawAttrColonListSize) {
670
0
                resizeRawAttrColonList();
671
0
            }
672
0
            fRawAttrColonList[attCount] = colonPosition;
673
674
            // And bump the count of attributes we've gotten
675
0
            attCount++;
676
677
            // And go to the top again for another attribute
678
0
            continue;
679
0
        }
680
681
        //  It was some special case character so do all of the checks and
682
        //  deal with it.
683
0
        if (!nextCh)
684
0
            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
685
686
0
        if (nextCh == chForwardSlash)
687
0
        {
688
0
            fReaderMgr.getNextChar();
689
0
            isEmpty = true;
690
0
            if (!fReaderMgr.skippedChar(chCloseAngle))
691
0
                emitError(XMLErrs::UnterminatedStartTag, elemName);
692
0
            break;
693
0
        }
694
0
        else if (nextCh == chCloseAngle)
695
0
        {
696
0
            fReaderMgr.getNextChar();
697
0
            break;
698
0
        }
699
0
        else if (nextCh == chOpenAngle)
700
0
        {
701
            //  Check for this one specially, since its going to be common
702
            //  and it is kind of auto-recovering since we've already hit the
703
            //  next open bracket, which is what we would have seeked to (and
704
            //  skipped this whole tag.)
705
0
            emitError(XMLErrs::UnterminatedStartTag, elemName);
706
0
            break;
707
0
        }
708
0
        else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
709
0
        {
710
            //  Check for this one specially, which is probably a missing
711
            //  attribute name, e.g. ="value". Just issue expected name
712
            //  error and eat the quoted string, then jump back to the
713
            //  top again.
714
0
            emitError(XMLErrs::ExpectedAttrName);
715
0
            fReaderMgr.getNextChar();
716
0
            fReaderMgr.skipQuotedString(nextCh);
717
0
            fReaderMgr.skipPastSpaces();
718
0
            continue;
719
0
        }
720
0
    }
721
722
0
    return attCount;
723
0
}
724
725
726
//  This method will kick off the scanning of the primary content of the
727
//  document, i.e. the elements.
728
bool SGXMLScanner::scanContent()
729
0
{
730
    //  Go into a loop until we hit the end of the root element, or we fall
731
    //  out because there is no root element.
732
    //
733
    //  We have to do kind of a deeply nested double loop here in order to
734
    //  avoid doing the setup/teardown of the exception handler on each
735
    //  round. Doing it this way we only do it when an exception actually
736
    //  occurs.
737
0
    bool gotData = true;
738
0
    bool inMarkup = false;
739
0
    while (gotData)
740
0
    {
741
0
        try
742
0
        {
743
0
            while (gotData)
744
0
            {
745
                //  Sense what the next top level token is. According to what
746
                //  this tells us, we will call something to handle that kind
747
                //  of thing.
748
0
                XMLSize_t orgReader;
749
0
                const XMLTokens curToken = senseNextToken(orgReader);
750
751
                //  Handle character data and end of file specially. Char data
752
                //  is not markup so we don't want to handle it in the loop
753
                //  below.
754
0
                if (curToken == Token_CharData)
755
0
                {
756
                    //  Scan the character data and call appropriate events. Let
757
                    //  him use our local character data buffer for efficiency.
758
0
                    scanCharData(fCDataBuf);
759
0
                    continue;
760
0
                }
761
0
                else if (curToken == Token_EOF)
762
0
                {
763
                    //  The element stack better be empty at this point or we
764
                    //  ended prematurely before all elements were closed.
765
0
                    if (!fElemStack.isEmpty())
766
0
                    {
767
0
                        const ElemStack::StackElem* topElem = fElemStack.popTop();
768
0
                        emitError
769
0
                        (
770
0
                            XMLErrs::EndedWithTagsOnStack
771
0
                            , topElem->fThisElement->getFullName()
772
0
                        );
773
0
                    }
774
775
                    // Its the end of file, so clear the got data flag
776
0
                    gotData = false;
777
0
                    continue;
778
0
                }
779
780
                // We are in some sort of markup now
781
0
                inMarkup = true;
782
783
                //  According to the token we got, call the appropriate
784
                //  scanning method.
785
0
                switch(curToken)
786
0
                {
787
0
                    case Token_CData :
788
                        // Make sure we are within content
789
0
                        if (fElemStack.isEmpty())
790
0
                            emitError(XMLErrs::CDATAOutsideOfContent);
791
0
                        scanCDSection();
792
0
                        break;
793
794
0
                    case Token_Comment :
795
0
                        scanComment();
796
0
                        break;
797
798
0
                    case Token_EndTag :
799
0
                        scanEndTag(gotData);
800
0
                        break;
801
802
0
                    case Token_PI :
803
0
                        scanPI();
804
0
                        break;
805
806
0
                    case Token_StartTag :
807
0
                        scanStartTag(gotData);
808
0
                        break;
809
810
0
                    default :
811
0
                        fReaderMgr.skipToChar(chOpenAngle);
812
0
                        break;
813
0
                }
814
815
0
                if (orgReader != fReaderMgr.getCurrentReaderNum())
816
0
                    emitError(XMLErrs::PartialMarkupInEntity);
817
818
                // And we are back out of markup again
819
0
                inMarkup = false;
820
0
            }
821
0
        }
822
0
        catch(const EndOfEntityException& toCatch)
823
0
        {
824
            //  If we were in some markup when this happened, then its a
825
            //  partial markup error.
826
0
            if (inMarkup)
827
0
                emitError(XMLErrs::PartialMarkupInEntity);
828
829
            // Send an end of entity reference event
830
0
            if (fDocHandler)
831
0
                fDocHandler->endEntityReference(toCatch.getEntity());
832
833
0
            inMarkup = false;
834
0
        }
835
0
    }
836
837
    // It went ok, so return success
838
0
    return true;
839
0
}
840
841
842
void SGXMLScanner::scanEndTag(bool& gotData)
843
0
{
844
    //  Assume we will still have data until proven otherwise. It will only
845
    //  ever be false if this is the end of the root element.
846
0
    gotData = true;
847
848
    //  Check if the element stack is empty. If so, then this is an unbalanced
849
    //  element (i.e. more ends than starts, perhaps because of bad text
850
    //  causing one to be skipped.)
851
0
    if (fElemStack.isEmpty())
852
0
    {
853
0
        emitError(XMLErrs::MoreEndThanStartTags);
854
0
        fReaderMgr.skipPastChar(chCloseAngle);
855
0
        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
856
0
    }
857
858
    //  Pop the stack of the element we are supposed to be ending. Remember
859
    //  that we don't own this. The stack just keeps them and reuses them.
860
0
    unsigned int uriId = (fDoNamespaces)
861
0
        ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
862
863
    // Make sure that its the end of the element that we expect
864
0
    const XMLCh *elemName = fElemStack.getCurrentSchemaElemName();
865
0
    const ElemStack::StackElem* topElem = fElemStack.topElement();
866
0
    if (!fReaderMgr.skippedStringLong(elemName))
867
0
    {
868
0
        emitError
869
0
        (
870
0
            XMLErrs::ExpectedEndOfTagX
871
0
            , elemName
872
0
        );
873
0
        fReaderMgr.skipPastChar(chCloseAngle);
874
0
        fElemStack.popTop();
875
0
        return;
876
0
    }
877
878
0
    fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
879
880
    // Make sure we are back on the same reader as where we started
881
0
    if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
882
0
        emitError(XMLErrs::PartialTagMarkupError);
883
884
    // Skip optional whitespace
885
0
    fReaderMgr.skipPastSpaces();
886
887
    // Make sure we find the closing bracket
888
0
    if (!fReaderMgr.skippedChar(chCloseAngle))
889
0
    {
890
0
        emitError
891
0
        (
892
0
            XMLErrs::UnterminatedEndTag
893
0
            , topElem->fThisElement->getFullName()
894
0
        );
895
0
    }
896
897
0
    if (fValidate && topElem->fThisElement->isDeclared())
898
0
    {
899
0
        fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
900
0
        if(!fPSVIElemContext.fCurrentTypeInfo)
901
0
            fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
902
0
        else
903
0
            fPSVIElemContext.fCurrentDV = 0;
904
0
        if (fPSVIHandler)
905
0
        {
906
0
            fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
907
908
0
            if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
909
0
                fPSVIElemContext.fNormalizedValue = 0;
910
911
0
        }
912
0
    }
913
0
    else
914
0
    {
915
0
        fPSVIElemContext.fCurrentDV = 0;
916
0
        fPSVIElemContext.fCurrentTypeInfo = 0;
917
0
        fPSVIElemContext.fNormalizedValue = 0;
918
0
    }
919
920
    //  If validation is enabled, then lets pass him the list of children and
921
    //  this element and let him validate it.
922
0
    DatatypeValidator* psviMemberType = 0;
923
0
    if (fValidate)
924
0
    {
925
0
        XMLSize_t failure;
926
0
        bool res = fValidator->checkContent
927
0
        (
928
0
            topElem->fThisElement
929
0
            , topElem->fChildren
930
0
            , topElem->fChildCount
931
0
            , &failure
932
0
        );
933
934
0
        if (!res)
935
0
        {
936
            //  One of the elements is not valid for the content. NOTE that
937
            //  if no children were provided but the content model requires
938
            //  them, it comes back with a zero value. But we cannot use that
939
            //  to index the child array in this case, and have to put out a
940
            //  special message.
941
0
            if (!topElem->fChildCount)
942
0
            {
943
0
                fValidator->emitError
944
0
                (
945
0
                    XMLValid::EmptyNotValidForContent
946
0
                    , topElem->fThisElement->getFormattedContentModel()
947
0
                );
948
0
            }
949
0
            else if (failure >= topElem->fChildCount)
950
0
            {
951
0
                fValidator->emitError
952
0
                (
953
0
                    XMLValid::NotEnoughElemsForCM
954
0
                    , topElem->fThisElement->getFormattedContentModel()
955
0
                );
956
0
            }
957
0
            else
958
0
            {
959
0
                fValidator->emitError
960
0
                (
961
0
                    XMLValid::ElementNotValidForContent
962
0
                    , topElem->fChildren[failure]->getRawName()
963
0
                    , topElem->fThisElement->getFormattedContentModel()
964
0
                );
965
0
            }
966
967
0
        }
968
969
        // update PSVI info
970
0
        if (((SchemaValidator*) fValidator)->getErrorOccurred())
971
0
            fPSVIElemContext.fErrorOccurred = true;
972
0
        else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
973
0
            psviMemberType = fValidationContext->getValidatingMemberType();
974
0
        if (fPSVIHandler)
975
0
        {
976
0
            fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
977
0
            if(fPSVIElemContext.fIsSpecified)
978
0
                fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue();
979
0
        }
980
981
        // call matchers and de-activate context
982
0
        if (toCheckIdentityConstraint())
983
0
        {
984
0
            fICHandler->deactivateContext
985
0
                        (
986
0
                         (SchemaElementDecl *) topElem->fThisElement
987
0
                       , fContent.getRawBuffer()
988
0
                       , fValidationContext
989
0
                       , fPSVIElemContext.fCurrentDV
990
0
                        );
991
0
        }
992
993
0
    }
994
995
    // QName dv needed topElem to resolve URIs on the checkContent
996
0
    fElemStack.popTop();
997
998
    // See if it was the root element, to avoid multiple calls below
999
0
    const bool isRoot = fElemStack.isEmpty();
1000
1001
0
    if (fPSVIHandler)
1002
0
    {
1003
0
        endElementPSVI
1004
0
        (
1005
0
            (SchemaElementDecl*)topElem->fThisElement, psviMemberType
1006
0
        );
1007
0
    }
1008
    // now we can reset the datatype buffer, since the
1009
    // application has had a chance to copy the characters somewhere else
1010
0
    ((SchemaValidator *)fValidator)->clearDatatypeBuffer();
1011
1012
    // If we have a doc handler, tell it about the end tag
1013
0
    if (fDocHandler)
1014
0
    {
1015
0
        if (fGrammarType == Grammar::SchemaGrammarType) {
1016
0
            if (topElem->fPrefixColonPos != -1)
1017
0
                fPrefixBuf.set(elemName, topElem->fPrefixColonPos);
1018
0
            else
1019
0
                fPrefixBuf.reset();
1020
0
        }
1021
0
        else {
1022
0
            fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix());
1023
0
        }
1024
0
        fDocHandler->endElement
1025
0
        (
1026
0
            *topElem->fThisElement
1027
0
            , uriId
1028
0
            , isRoot
1029
0
            , fPrefixBuf.getRawBuffer()
1030
0
        );
1031
0
    }
1032
1033
0
    if (!isRoot)
1034
0
    {
1035
        // update error information
1036
0
        fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred);
1037
0
    }
1038
1039
    // If this was the root, then done with content
1040
0
    gotData = !isRoot;
1041
1042
0
    if (gotData) {
1043
1044
        // Restore the grammar
1045
0
        fGrammar = fElemStack.getCurrentGrammar();
1046
0
        fGrammarType = fGrammar->getGrammarType();
1047
0
        fValidator->setGrammar(fGrammar);
1048
1049
        // Restore the validation flag
1050
0
        fValidate = fElemStack.getValidationFlag();
1051
0
    }
1052
0
}
1053
1054
1055
//  This method handles the high level logic of scanning the DOCType
1056
//  declaration. This calls the DTDScanner and kicks off both the scanning of
1057
//  the internal subset and the scanning of the external subset, if any.
1058
//
1059
//  When we get here the '<!DOCTYPE' part has already been scanned, which is
1060
//  what told us that we had a doc type decl to parse.
1061
void SGXMLScanner::scanDocTypeDecl()
1062
0
{
1063
    // Just skips over it
1064
    // REVISIT: Should we issue a warning
1065
0
    static const XMLCh doctypeIE[] =
1066
0
    {
1067
0
            chOpenSquare, chCloseAngle, chNull
1068
0
    };
1069
0
    XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE);
1070
1071
0
    if (nextCh == chOpenSquare)
1072
0
        fReaderMgr.skipPastChar(chCloseSquare);
1073
1074
0
    fReaderMgr.skipPastChar(chCloseAngle);
1075
0
}
1076
1077
//  This method is called to scan a start tag when we are processing
1078
//  namespaces. This method is called after we've scanned the < of a
1079
//  start tag. So we have to get the element name, then scan the attributes,
1080
//  after which we are either going to see >, />, or attributes followed
1081
//  by one of those sequences.
1082
bool SGXMLScanner::scanStartTag(bool& gotData)
1083
0
{
1084
    //  Assume we will still have data until proven otherwise. It will only
1085
    //  ever be false if this is the root and its empty.
1086
0
    gotData = true;
1087
1088
    // Reset element content
1089
0
    fContent.reset();
1090
1091
    //  The current position is after the open bracket, so we need to read in
1092
    //  in the element name.
1093
0
    int prefixColonPos;
1094
0
    if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos))
1095
0
    {
1096
0
        if (fQNameBuf.isEmpty())
1097
0
            emitError(XMLErrs::ExpectedElementName);
1098
0
        else
1099
0
            emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
1100
0
        fReaderMgr.skipToChar(chOpenAngle);
1101
0
        return false;
1102
0
    }
1103
1104
    // See if its the root element
1105
0
    const bool isRoot = fElemStack.isEmpty();
1106
1107
    // Skip any whitespace after the name
1108
0
    fReaderMgr.skipPastSpaces();
1109
1110
    //  First we have to do the rawest attribute scan. We don't do any
1111
    //  normalization of them at all, since we don't know yet what type they
1112
    //  might be (since we need the element decl in order to do that.)
1113
0
    const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
1114
0
    bool isEmpty;
1115
0
    XMLSize_t attCount = rawAttrScan
1116
0
    (
1117
0
        qnameRawBuf
1118
0
        , *fRawAttrList
1119
0
        , isEmpty
1120
0
    );
1121
1122
    // save the contentleafname and currentscope before addlevel, for later use
1123
0
    ContentLeafNameTypeVector* cv = 0;
1124
0
    XMLContentModel* cm = 0;
1125
0
    unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE;
1126
0
    bool laxThisOne = false;
1127
0
    if (!isRoot)
1128
0
    {
1129
        // schema validator will have correct type if validating
1130
0
        SchemaElementDecl* tempElement = (SchemaElementDecl*)
1131
0
            fElemStack.topElement()->fThisElement;
1132
0
        SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
1133
0
        ComplexTypeInfo *currType = 0;
1134
1135
0
        if (fValidate)
1136
0
        {
1137
0
            currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
1138
0
            if (currType)
1139
0
                modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();
1140
0
            else // something must have gone wrong
1141
0
                modelType = SchemaElementDecl::Any;
1142
0
        }
1143
0
        else
1144
0
        {
1145
0
            currType = tempElement->getComplexTypeInfo();
1146
0
        }
1147
1148
0
        if ((modelType == SchemaElementDecl::Mixed_Simple)
1149
0
          ||  (modelType == SchemaElementDecl::Mixed_Complex)
1150
0
          ||  (modelType == SchemaElementDecl::Children))
1151
0
        {
1152
0
            cm = currType->getContentModel();
1153
0
            cv = cm->getContentLeafNameTypeVector();
1154
0
            currentScope = fElemStack.getCurrentScope();
1155
0
        }
1156
0
        else if (modelType == SchemaElementDecl::Any) {
1157
0
            laxThisOne = true;
1158
0
        }
1159
0
    }
1160
1161
    //  Now, since we might have to update the namespace map for this element,
1162
    //  but we don't have the element decl yet, we just tell the element stack
1163
    //  to expand up to get ready.
1164
0
    XMLSize_t elemDepth = fElemStack.addLevel();
1165
0
    fElemStack.setValidationFlag(fValidate);
1166
0
    fElemStack.setPrefixColonPos(prefixColonPos);
1167
1168
    //  Check if there is any external schema location specified, and if we are at root,
1169
    //  go through them first before scanning those specified in the instance document
1170
0
    if (isRoot
1171
0
        && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
1172
1173
0
        if (fExternalSchemaLocation)
1174
0
            parseSchemaLocation(fExternalSchemaLocation, true);
1175
0
        if (fExternalNoNamespaceSchemaLocation)
1176
0
            resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true);
1177
0
    }
1178
1179
    //  Make an initial pass through the list and find any xmlns attributes or
1180
    //  schema attributes.
1181
0
    if (attCount)
1182
0
        scanRawAttrListforNameSpaces(attCount);
1183
1184
    //  Resolve the qualified name to a URI and name so that we can look up
1185
    //  the element decl for this element. We have now update the prefix to
1186
    //  namespace map so we should get the correct element now.
1187
0
    unsigned int uriId = resolveQNameWithColon
1188
0
    (
1189
0
        qnameRawBuf
1190
0
        , fPrefixBuf
1191
0
        , ElemStack::Mode_Element
1192
0
        , prefixColonPos
1193
0
    );
1194
1195
    //if schema, check if we should lax or skip the validation of this element
1196
0
    bool parentValidation = fValidate;
1197
0
    if (cv) {
1198
0
        QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
1199
        // elementDepth will be > 0, as cv is only constructed if element is not
1200
        // root.
1201
0
        laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
1202
0
    }
1203
1204
    //  Look up the element now in the grammar. This will get us back a
1205
    //  generic element decl object. We tell him to fault one in if he does
1206
    //  not find it.
1207
0
    XMLElementDecl* elemDecl = 0;
1208
0
    bool wasAdded = false;
1209
0
    const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
1210
0
    const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
1211
1212
0
    if (uriId != fEmptyNamespaceId) {
1213
1214
        // Check in current grammar before switching if necessary
1215
0
        elemDecl = fGrammar->getElemDecl
1216
0
        (
1217
0
          uriId
1218
0
          , nameRawBuf
1219
0
          , qnameRawBuf
1220
0
          , currentScope
1221
0
        );
1222
0
        if(!elemDecl)
1223
0
        {
1224
            // look in the list of undeclared elements, as would have been done
1225
            // before we made grammars stateless:
1226
0
            elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
1227
0
        }
1228
        // this is initialized correctly only if there is
1229
        // no element decl.  The other uses in this scope will only
1230
        // be encountered if there continues to be no element decl--which
1231
        // implies that this will have been initialized correctly.
1232
0
        unsigned int orgGrammarUri = uriId;
1233
0
        if (!elemDecl && ( orgGrammarUri = fURIStringPool->getId(original_uriStr)) != uriId) {
1234
            // not found, switch to the specified grammar
1235
0
            const XMLCh* uriStr = getURIText(uriId);
1236
0
            bool errorCondition = !switchGrammar(uriStr) && fValidate;
1237
0
            if (errorCondition && !laxThisOne)
1238
0
            {
1239
0
                fValidator->emitError
1240
0
                (
1241
0
                    XMLValid::GrammarNotFound
1242
0
                    ,uriStr
1243
0
                );
1244
0
            }
1245
1246
0
            elemDecl = fGrammar->getElemDecl
1247
0
            (
1248
0
              uriId
1249
0
              , nameRawBuf
1250
0
              , qnameRawBuf
1251
0
              , currentScope
1252
0
            );
1253
0
        }
1254
1255
0
        if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
1256
            // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
1257
0
            elemDecl = fGrammar->getElemDecl
1258
0
                       (
1259
0
                           uriId
1260
0
                           , nameRawBuf
1261
0
                           , qnameRawBuf
1262
0
                           , Grammar::TOP_LEVEL_SCOPE
1263
0
                       );
1264
0
            if(!elemDecl)
1265
0
            {
1266
                // look in the list of undeclared elements, as would have been done
1267
                // before we made grammars stateless:
1268
0
                elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE);
1269
0
            }
1270
0
            if(!elemDecl) {
1271
                // still not found in specified uri
1272
                // try emptyNamespace see if element should be un-qualified.
1273
                // Use a temp variable until we decide this is the case
1274
0
                XMLElementDecl* tempElemDecl = fGrammar->getElemDecl
1275
0
                           (
1276
0
                               fEmptyNamespaceId
1277
0
                               , nameRawBuf
1278
0
                               , qnameRawBuf
1279
0
                               , currentScope
1280
0
                           );
1281
0
                if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
1282
0
                    fValidator->emitError
1283
0
                    (
1284
0
                        XMLValid::ElementNotUnQualified
1285
0
                        , qnameRawBuf
1286
0
                    );
1287
0
                    elemDecl = tempElemDecl;
1288
0
                }
1289
0
            }
1290
0
        }
1291
1292
0
        if (!elemDecl) {
1293
            // still not found, fault this in and issue error later
1294
            // switch back to original grammar first (if necessary)
1295
0
            if(orgGrammarUri != uriId)
1296
0
            {
1297
0
                switchGrammar(original_uriStr);
1298
0
            }
1299
0
            elemDecl = new (fMemoryManager) SchemaElementDecl
1300
0
            (
1301
0
                fPrefixBuf.getRawBuffer()
1302
0
                , nameRawBuf
1303
0
                , uriId
1304
0
                , SchemaElementDecl::Any
1305
0
                , Grammar::TOP_LEVEL_SCOPE
1306
0
                , fMemoryManager
1307
0
            );
1308
0
            elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl));
1309
0
            wasAdded = true;
1310
0
        }
1311
0
    }
1312
0
    else if (!elemDecl)
1313
0
    {
1314
        //the element has no prefix,
1315
        //thus it is either a non-qualified element defined in current targetNS
1316
        //or an element that is defined in the globalNS
1317
1318
        //try unqualifed first
1319
0
        elemDecl = fGrammar->getElemDecl
1320
0
                   (
1321
0
                      uriId
1322
0
                    , nameRawBuf
1323
0
                    , qnameRawBuf
1324
0
                    , currentScope
1325
0
                    );
1326
0
        if(!elemDecl)
1327
0
        {
1328
            // look in the list of undeclared elements, as would have been done
1329
            // before we made grammars stateless:
1330
0
            elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
1331
0
        }
1332
        // this is initialized correctly only if there is
1333
        // no element decl.  The other uses in this scope will only
1334
        // be encountered if there continues to be no element decl--which
1335
        // implies that this will have been initialized correctly.
1336
0
        unsigned int orgGrammarUri = fEmptyNamespaceId;
1337
0
        if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) {
1338
            //not found, switch grammar and try globalNS
1339
0
            bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
1340
0
            if (errorCondition && !laxThisOne)
1341
0
            {
1342
0
                fValidator->emitError
1343
0
                (
1344
0
                    XMLValid::GrammarNotFound
1345
0
                  , XMLUni::fgZeroLenString
1346
0
                );
1347
0
            }
1348
1349
0
            elemDecl = fGrammar->getElemDecl
1350
0
            (
1351
0
              uriId
1352
0
              , nameRawBuf
1353
0
              , qnameRawBuf
1354
0
              , currentScope
1355
0
            );
1356
0
        }
1357
1358
0
        if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
1359
            // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
1360
0
            elemDecl = fGrammar->getElemDecl
1361
0
                       (
1362
0
                           uriId
1363
0
                           , nameRawBuf
1364
0
                           , qnameRawBuf
1365
0
                           , Grammar::TOP_LEVEL_SCOPE
1366
0
                       );
1367
0
            if(!elemDecl)
1368
0
            {
1369
                // look in the list of undeclared elements, as would have been done
1370
                // before we made grammars stateless:
1371
0
                elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE);
1372
0
            }
1373
0
            if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
1374
                // still Not found in specified uri
1375
                // go to original Grammar again to see if element needs to be fully qualified.
1376
0
                bool errorCondition = !switchGrammar(original_uriStr) && fValidate;
1377
0
                if (errorCondition && !laxThisOne)
1378
0
                {
1379
0
                    fValidator->emitError
1380
0
                    (
1381
0
                        XMLValid::GrammarNotFound
1382
0
                        ,original_uriStr
1383
0
                    );
1384
0
                }
1385
1386
                // Use a temp variable until we decide this is the case
1387
0
                XMLElementDecl* tempElemDecl = fGrammar->getElemDecl
1388
0
                           (
1389
0
                               orgGrammarUri
1390
0
                               , nameRawBuf
1391
0
                               , qnameRawBuf
1392
0
                               , currentScope
1393
0
                           );
1394
0
                if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
1395
0
                    fValidator->emitError
1396
0
                    (
1397
0
                        XMLValid::ElementNotQualified
1398
0
                        , qnameRawBuf
1399
0
                    );
1400
0
                    elemDecl=tempElemDecl;
1401
0
                }
1402
0
            }
1403
0
        }
1404
1405
0
        if (!elemDecl) {
1406
            // still not found, fault this in and issue error later
1407
            // switch back to original grammar first (if necessary)
1408
0
            if(orgGrammarUri != fEmptyNamespaceId)
1409
0
            {
1410
0
                switchGrammar(original_uriStr);
1411
0
            }
1412
0
            elemDecl = new (fMemoryManager) SchemaElementDecl
1413
0
            (
1414
0
                fPrefixBuf.getRawBuffer()
1415
0
                , nameRawBuf
1416
0
                , uriId
1417
0
                , SchemaElementDecl::Any
1418
0
                , Grammar::TOP_LEVEL_SCOPE
1419
0
                , fMemoryManager
1420
0
            );
1421
0
            elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl));
1422
0
            wasAdded = true;
1423
0
        }
1424
0
    }
1425
1426
    // this info needed for DOMTypeInfo
1427
0
    fPSVIElemContext.fErrorOccurred = false;
1428
1429
    //  We do something different here according to whether we found the
1430
    //  element or not.
1431
0
    bool bXsiTypeSet= (fValidator)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false;
1432
0
    if (wasAdded)
1433
0
    {
1434
0
        if (laxThisOne && !bXsiTypeSet) {
1435
0
            fValidate = false;
1436
0
            fElemStack.setValidationFlag(fValidate);
1437
0
        }
1438
1439
        // If validating then emit an error
1440
0
        if (fValidate)
1441
0
        {
1442
            // This is to tell the reuse Validator that this element was
1443
            // faulted-in, was not an element in the grammar pool originally
1444
0
            elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
1445
1446
0
            if(!bXsiTypeSet)
1447
0
            {
1448
0
                fValidator->emitError
1449
0
                (
1450
0
                    XMLValid::ElementNotDefined
1451
0
                    , elemDecl->getFullName()
1452
0
                );
1453
0
                fPSVIElemContext.fErrorOccurred = true;
1454
0
            }
1455
0
        }
1456
0
    }
1457
0
    else
1458
0
    {
1459
        // If its not marked declared and validating, then emit an error
1460
0
        if (!elemDecl->isDeclared()) {
1461
0
            if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) {
1462
0
                if(!bXsiTypeSet)
1463
0
                    fPSVIElemContext.fErrorOccurred = true;
1464
0
            }
1465
0
            if (laxThisOne) {
1466
0
                fValidate = false;
1467
0
                fElemStack.setValidationFlag(fValidate);
1468
0
            }
1469
1470
0
            if (fValidate && !bXsiTypeSet)
1471
0
            {
1472
0
                fValidator->emitError
1473
0
                (
1474
0
                    XMLValid::ElementNotDefined
1475
0
                    , elemDecl->getFullName()
1476
0
                );
1477
0
            }
1478
0
        }
1479
0
    }
1480
1481
1482
    //  Now we can update the element stack to set the current element
1483
    //  decl. We expanded the stack above, but couldn't store the element
1484
    //  decl because we didn't know it yet.
1485
0
    fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
1486
0
    fElemStack.setCurrentURI(uriId);
1487
1488
0
    if (isRoot)
1489
0
    {
1490
0
        fRootGrammar = fGrammar;
1491
0
        fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager);
1492
0
    }
1493
1494
0
    if (fPSVIHandler)
1495
0
    {
1496
0
        fPSVIElemContext.fElemDepth++;
1497
1498
0
        if (elemDecl->isDeclared())
1499
0
        {
1500
0
            fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth;
1501
0
        }
1502
0
        else
1503
0
        {
1504
0
            fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth;
1505
1506
            /******
1507
             * While we report an error for historical reasons, this should
1508
             * actually result in lax assessment - NG.
1509
            if (isRoot && fValidate)
1510
                fPSVIElemContext.fErrorOccurred = true;
1511
            ******/
1512
0
        }
1513
0
    }
1514
1515
    //  Validate the element
1516
0
    if (fValidate)
1517
0
    {
1518
0
        fValidator->validateElement(elemDecl);
1519
0
        if (((SchemaValidator*) fValidator)->getErrorOccurred())
1520
0
            fPSVIElemContext.fErrorOccurred = true;
1521
0
    }
1522
1523
    // squirrel away the element's QName, so that we can do an efficient
1524
    // end-tag match
1525
0
    fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
1526
1527
0
    ComplexTypeInfo* typeinfo = (fValidate)
1528
0
        ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
1529
0
        : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
1530
1531
0
    if (typeinfo) {
1532
0
        currentScope = typeinfo->getScopeDefined();
1533
1534
        // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
1535
0
        XMLCh* typeName = typeinfo->getTypeName();
1536
0
        const int comma = XMLString::indexOf(typeName, chComma);
1537
0
        if (comma > 0) {
1538
0
            XMLBuffer prefixBuf(comma+1, fMemoryManager);
1539
0
            prefixBuf.append(typeName, comma);
1540
0
            const XMLCh* uriStr = prefixBuf.getRawBuffer();
1541
1542
0
            bool errorCondition = !switchGrammar(uriStr) && fValidate;
1543
0
            if (errorCondition && !laxThisOne)
1544
0
            {
1545
0
                fValidator->emitError
1546
0
                (
1547
0
                    XMLValid::GrammarNotFound
1548
0
                    , prefixBuf.getRawBuffer()
1549
0
                );
1550
0
            }
1551
0
        }
1552
0
        else if (comma == 0) {
1553
0
            bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
1554
0
            if (errorCondition && !laxThisOne)
1555
0
            {
1556
0
                fValidator->emitError
1557
0
                (
1558
0
                    XMLValid::GrammarNotFound
1559
0
                    , XMLUni::fgZeroLenString
1560
0
                );
1561
0
            }
1562
0
        }
1563
0
    }
1564
0
    fElemStack.setCurrentScope(currentScope);
1565
1566
    // Set element next state
1567
0
    if (elemDepth >= fElemStateSize) {
1568
0
        resizeElemState();
1569
0
    }
1570
1571
0
    fElemState[elemDepth] = 0;
1572
0
    fElemLoopState[elemDepth] = 0;
1573
0
    fElemStack.setCurrentGrammar(fGrammar);
1574
1575
    //  If this is the first element and we are validating, check the root
1576
    //  element.
1577
0
    if (!isRoot && parentValidation)
1578
0
    {
1579
        //  If the element stack is not empty, then add this element as a
1580
        //  child of the previous top element. If its empty, this is the root
1581
        //  elem and is not the child of anything.
1582
0
        fElemStack.addChild(elemDecl->getElementName(), true);
1583
0
    }
1584
1585
    // PSVI handling:  must reset this, even if no attributes...
1586
0
    if(getPSVIHandler())
1587
0
        fPSVIAttrList->reset();
1588
1589
    //  Now lets get the fAttrList filled in. This involves faulting in any
1590
    //  defaulted and fixed attributes and normalizing the values of any that
1591
    //  we got explicitly.
1592
    //
1593
    //  We update the attCount value with the total number of attributes, but
1594
    //  it goes in with the number of values we got during the raw scan of
1595
    //  explictly provided attrs above.
1596
0
    attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
1597
1598
0
    if(attCount)
1599
0
    {
1600
        // clean up after ourselves:
1601
        // clear the map used to detect duplicate attributes
1602
0
        fUndeclaredAttrRegistry->removeAll();
1603
0
    }
1604
1605
    // activate identity constraints
1606
0
    if (toCheckIdentityConstraint())
1607
0
    {
1608
0
        fICHandler->activateIdentityConstraint
1609
0
                        (
1610
0
                          (SchemaElementDecl*) elemDecl
1611
0
                        , (int) elemDepth
1612
0
                        , uriId
1613
0
                        , fPrefixBuf.getRawBuffer()
1614
0
                        , *fAttrList
1615
0
                        , attCount
1616
0
                        , fValidationContext
1617
0
                        );
1618
1619
0
    }
1620
1621
    // Since the element may have default values, call start tag now regardless if it is empty or not
1622
    // If we have a document handler, then tell it about this start tag
1623
0
    if (fDocHandler)
1624
0
    {
1625
0
        fDocHandler->startElement
1626
0
        (
1627
0
            *elemDecl
1628
0
            , uriId
1629
0
            , fPrefixBuf.getRawBuffer()
1630
0
            , *fAttrList
1631
0
            , attCount
1632
0
            , false
1633
0
            , isRoot
1634
0
        );
1635
0
    } // may be where we output something...
1636
1637
    // if we have a PSVIHandler, now's the time to call
1638
    // its handleAttributesPSVI method:
1639
0
    if(fPSVIHandler)
1640
0
    {
1641
0
        QName *eName = elemDecl->getElementName();
1642
0
        fPSVIHandler->handleAttributesPSVI
1643
0
        (
1644
0
            eName->getLocalPart()
1645
0
            , fURIStringPool->getValueForId(eName->getURI())
1646
0
            , fPSVIAttrList
1647
0
        );
1648
0
    }
1649
1650
    //  If empty, validate content right now if we are validating and then
1651
    //  pop the element stack top. Else, we have to update the current stack
1652
    //  top's namespace mapping elements.
1653
0
    if (isEmpty)
1654
0
    {
1655
        // Pop the element stack back off since it'll never be used now
1656
0
        fElemStack.popTop();
1657
1658
        // reset current type info
1659
0
        DatatypeValidator* psviMemberType = 0;
1660
0
        if (fGrammarType == Grammar::SchemaGrammarType)
1661
0
        {
1662
0
            if (fValidate && elemDecl->isDeclared())
1663
0
            {
1664
0
                fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
1665
0
                if(!fPSVIElemContext.fCurrentTypeInfo)
1666
0
                    fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1667
0
                else
1668
0
                    fPSVIElemContext.fCurrentDV = 0;
1669
0
                if(fPSVIHandler)
1670
0
                {
1671
0
                    fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
1672
1673
0
                    if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
1674
0
                        fPSVIElemContext.fNormalizedValue = 0;
1675
0
                }
1676
0
            }
1677
0
            else
1678
0
            {
1679
0
                fPSVIElemContext.fCurrentDV = 0;
1680
0
                fPSVIElemContext.fCurrentTypeInfo = 0;
1681
0
                fPSVIElemContext.fNormalizedValue = 0;
1682
0
            }
1683
0
        }
1684
1685
        // If validating, then insure that its legal to have no content
1686
0
        if (fValidate)
1687
0
        {
1688
0
            XMLSize_t failure;
1689
0
            bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
1690
0
            if (!res)
1691
0
            {
1692
                // REVISIT:  in the case of xsi:type, this may
1693
                // return the wrong string...
1694
0
                fValidator->emitError
1695
0
                (
1696
0
                    XMLValid::ElementNotValidForContent
1697
0
                    , elemDecl->getFullName()
1698
0
                    , elemDecl->getFormattedContentModel()
1699
0
                );
1700
0
            }
1701
1702
0
            if (((SchemaValidator*) fValidator)->getErrorOccurred())
1703
0
                fPSVIElemContext.fErrorOccurred = true;
1704
            // note that if we're empty, won't be a current DV
1705
0
            else
1706
0
            {
1707
0
                if (fPSVIHandler)
1708
0
                {
1709
0
                    fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
1710
0
                    if(fPSVIElemContext.fIsSpecified)
1711
0
                        fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue();
1712
0
                }
1713
0
                if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
1714
0
                    psviMemberType = fValidationContext->getValidatingMemberType();
1715
0
            }
1716
1717
            // call matchers and de-activate context
1718
0
            if (toCheckIdentityConstraint())
1719
0
            {
1720
0
                fICHandler->deactivateContext
1721
0
                       (
1722
0
                        (SchemaElementDecl *) elemDecl
1723
0
                      , fContent.getRawBuffer()
1724
0
                      , fValidationContext
1725
0
                      , fPSVIElemContext.fCurrentDV
1726
0
                       );
1727
0
            }
1728
1729
0
        }
1730
0
        else if (fGrammarType == Grammar::SchemaGrammarType) {
1731
0
            ((SchemaValidator*)fValidator)->resetNillable();
1732
0
        }
1733
1734
0
        if (fPSVIHandler)
1735
0
        {
1736
0
            endElementPSVI
1737
0
            (
1738
0
                (SchemaElementDecl*)elemDecl, psviMemberType
1739
0
            );
1740
0
        }
1741
1742
        // If we have a doc handler, tell it about the end tag
1743
0
        if (fDocHandler)
1744
0
        {
1745
0
            fDocHandler->endElement
1746
0
            (
1747
0
                *elemDecl
1748
0
                , uriId
1749
0
                , isRoot
1750
0
                , fPrefixBuf.getRawBuffer()
1751
0
            );
1752
0
        }
1753
1754
        // If the elem stack is empty, then it was an empty root
1755
0
        if (isRoot)
1756
0
            gotData = false;
1757
0
        else
1758
0
        {
1759
            // Restore the grammar
1760
0
            fGrammar = fElemStack.getCurrentGrammar();
1761
0
            fGrammarType = fGrammar->getGrammarType();
1762
0
            fValidator->setGrammar(fGrammar);
1763
1764
            // Restore the validation flag
1765
0
            fValidate = fElemStack.getValidationFlag();
1766
0
        }
1767
0
    }
1768
0
    else    // not empty
1769
0
    {
1770
1771
        // send a partial element psvi
1772
0
        if (fPSVIHandler)
1773
0
        {
1774
1775
0
            ComplexTypeInfo*   curTypeInfo = 0;
1776
0
            DatatypeValidator* curDV = 0;
1777
0
            XSTypeDefinition*  typeDef = 0;
1778
1779
0
            if (fValidate && elemDecl->isDeclared())
1780
0
            {
1781
0
                curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
1782
1783
0
                if (curTypeInfo)
1784
0
                {
1785
0
                    typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo);
1786
0
                }
1787
0
                else
1788
0
                {
1789
0
                    curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1790
1791
0
                    if (curDV)
1792
0
                    {
1793
0
                        typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV);
1794
0
                    }
1795
0
                }
1796
0
            }
1797
1798
0
            fPSVIElement->reset
1799
0
                (
1800
0
                  PSVIElement::VALIDITY_NOTKNOWN
1801
0
                , PSVIElement::VALIDATION_NONE
1802
0
                , fRootElemName
1803
0
                , ((SchemaValidator*) fValidator)->getIsElemSpecified()
1804
0
                , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
1805
0
                , typeDef
1806
0
                , 0 //memberType
1807
0
                , fModel
1808
0
                , ((SchemaElementDecl*)elemDecl)->getDefaultValue()
1809
0
                , 0
1810
0
                , 0
1811
0
                , 0
1812
0
                );
1813
1814
1815
0
            fPSVIHandler->handlePartialElementPSVI
1816
0
                (
1817
0
                  elemDecl->getBaseName()
1818
0
                , fURIStringPool->getValueForId(elemDecl->getURI())
1819
0
                , fPSVIElement
1820
0
                );
1821
1822
0
        }
1823
1824
0
        fErrorStack->push(fPSVIElemContext.fErrorOccurred);
1825
0
    }
1826
1827
0
    return true;
1828
0
}
1829
1830
1831
// ---------------------------------------------------------------------------
1832
//  SGXMLScanner: Grammar preparsing
1833
// ---------------------------------------------------------------------------
1834
Grammar* SGXMLScanner::loadGrammar(const   InputSource& src
1835
                                   , const short        grammarType
1836
                                   , const bool         toCache)
1837
0
{
1838
0
    Grammar* loadedGrammar = 0;
1839
1840
0
    ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
1841
1842
0
    try
1843
0
    {
1844
0
        fGrammarResolver->cacheGrammarFromParse(false);
1845
    // if the new grammar has to be cached, better use the already cached
1846
    // grammars, or the an exception will be thrown when caching an already
1847
    // cached grammar
1848
0
        fGrammarResolver->useCachedGrammarInParse(toCache);
1849
0
        fRootGrammar = 0;
1850
1851
0
        if (fValScheme == Val_Auto) {
1852
0
            fValidate = true;
1853
0
        }
1854
1855
        // Reset some status flags
1856
0
        fInException = false;
1857
0
        fStandalone = false;
1858
0
        fErrorCount = 0;
1859
0
        fHasNoDTD = true;
1860
0
        fSeeXsi = false;
1861
1862
0
        if (grammarType == Grammar::SchemaGrammarType) {
1863
0
            loadedGrammar = loadXMLSchemaGrammar(src, toCache);
1864
0
        }
1865
0
    }
1866
    //  NOTE:
1867
    //
1868
    //  In all of the error processing below, the emitError() call MUST come
1869
    //  before the flush of the reader mgr, or it will fail because it tries
1870
    //  to find out the position in the XML source of the error.
1871
0
    catch(const XMLErrs::Codes)
1872
0
    {
1873
        // This is a 'first failure' exception, so fall through
1874
0
    }
1875
0
    catch(const XMLValid::Codes)
1876
0
    {
1877
        // This is a 'first fatal error' type exit, so fall through
1878
0
    }
1879
0
    catch(const XMLException& excToCatch)
1880
0
    {
1881
        //  Emit the error and catch any user exception thrown from here. Make
1882
        //  sure in all cases we flush the reader manager.
1883
0
        fInException = true;
1884
0
        try
1885
0
        {
1886
0
            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
1887
0
                emitError
1888
0
                (
1889
0
                    XMLErrs::XMLException_Warning
1890
0
                    , excToCatch.getCode()
1891
0
                    , excToCatch.getMessage()
1892
0
                );
1893
0
            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
1894
0
                emitError
1895
0
                (
1896
0
                    XMLErrs::XMLException_Fatal
1897
0
                    , excToCatch.getCode()
1898
0
                    , excToCatch.getMessage()
1899
0
                );
1900
0
            else
1901
0
                emitError
1902
0
                (
1903
0
                    XMLErrs::XMLException_Error
1904
0
                    , excToCatch.getCode()
1905
0
                    , excToCatch.getMessage()
1906
0
                );
1907
0
        }
1908
0
        catch(const OutOfMemoryException&)
1909
0
        {
1910
            // This is a special case for out-of-memory
1911
            // conditions, because resetting the ReaderMgr
1912
            // can be problematic.
1913
0
            resetReaderMgr.release();
1914
1915
0
            throw;
1916
0
        }
1917
0
    }
1918
0
    catch(const OutOfMemoryException&)
1919
0
    {
1920
        // This is a special case for out-of-memory
1921
        // conditions, because resetting the ReaderMgr
1922
        // can be problematic.
1923
0
        resetReaderMgr.release();
1924
1925
0
        throw;
1926
0
    }
1927
1928
0
    return loadedGrammar;
1929
0
}
1930
1931
void SGXMLScanner::resetCachedGrammar ()
1932
0
{
1933
0
  fCachedSchemaInfoList->removeAll ();
1934
0
}
1935
1936
// ---------------------------------------------------------------------------
1937
//  SGXMLScanner: Private helper methods
1938
// ---------------------------------------------------------------------------
1939
//  This method handles the common initialization, to avoid having to do
1940
//  it redundantly in multiple constructors.
1941
void SGXMLScanner::commonInit()
1942
0
{
1943
    //  Create the element state array
1944
0
    fElemState = (unsigned int*) fMemoryManager->allocate
1945
0
    (
1946
0
        fElemStateSize * sizeof(unsigned int)
1947
0
    ); //new unsigned int[fElemStateSize];
1948
0
    fElemLoopState = (unsigned int*) fMemoryManager->allocate
1949
0
    (
1950
0
        fElemStateSize * sizeof(unsigned int)
1951
0
    ); //new unsigned int[fElemStateSize];
1952
1953
    //  And we need one for the raw attribute scan. This just stores key/
1954
    //  value string pairs (prior to any processing.)
1955
0
    fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
1956
0
    fRawAttrColonList = (int*) fMemoryManager->allocate
1957
0
    (
1958
0
        fRawAttrColonListSize * sizeof(int)
1959
0
    );
1960
1961
    //  Create the Validator and init them
1962
0
    fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
1963
0
    initValidator(fSchemaValidator);
1964
1965
    // Create IdentityConstraint info
1966
0
    fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager);
1967
1968
    //  Add the default entity entries for the character refs that must always
1969
    //  be present.
1970
0
    fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager);
1971
0
    fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand);
1972
0
    fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle);
1973
0
    fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle);
1974
0
    fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);
1975
0
    fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);
1976
0
    fElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
1977
0
    fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher>
1978
0
    (
1979
0
        131, false, fMemoryManager
1980
0
    );
1981
0
    fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager);
1982
0
    fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager);
1983
1984
0
    fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
1985
0
    fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
1986
1987
0
    if (fValidator)
1988
0
    {
1989
0
        if (!fValidator->handlesSchema())
1990
0
            ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
1991
0
    }
1992
0
    else
1993
0
    {
1994
0
        fValidator = fSchemaValidator;
1995
0
    }
1996
0
}
1997
1998
void SGXMLScanner::cleanUp()
1999
0
{
2000
0
    fMemoryManager->deallocate(fElemState); //delete [] fElemState;
2001
0
    fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
2002
0
    delete fSchemaGrammar;
2003
0
    delete fEntityTable;
2004
0
    delete fRawAttrList;
2005
0
    fMemoryManager->deallocate(fRawAttrColonList);
2006
0
    delete fSchemaValidator;
2007
0
    delete fICHandler;
2008
0
    delete fElemNonDeclPool;
2009
0
    delete fAttDefRegistry;
2010
0
    delete fUndeclaredAttrRegistry;
2011
0
    delete fPSVIAttrList;
2012
0
    if (fPSVIElement)
2013
0
        delete fPSVIElement;
2014
2015
0
    if (fErrorStack)
2016
0
        delete fErrorStack;
2017
2018
0
    delete fSchemaInfoList;
2019
0
    delete fCachedSchemaInfoList;
2020
0
}
2021
2022
0
void SGXMLScanner::resizeElemState() {
2023
2024
0
    unsigned int newSize = fElemStateSize * 2;
2025
0
    unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
2026
0
    (
2027
0
        newSize * sizeof(unsigned int)
2028
0
    ); //new unsigned int[newSize];
2029
0
    unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate
2030
0
    (
2031
0
        newSize * sizeof(unsigned int)
2032
0
    ); //new unsigned int[newSize];
2033
2034
    // Copy the existing values
2035
0
    unsigned int index = 0;
2036
0
    for (; index < fElemStateSize; index++)
2037
0
    {
2038
0
        newElemState[index] = fElemState[index];
2039
0
        newElemLoopState[index] = fElemLoopState[index];
2040
0
    }
2041
2042
0
    for (; index < newSize; index++)
2043
0
        newElemLoopState[index] = newElemState[index] = 0;
2044
2045
    // Delete the old array and udpate our members
2046
0
    fMemoryManager->deallocate(fElemState); //delete [] fElemState;
2047
0
    fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
2048
0
    fElemState = newElemState;
2049
0
    fElemLoopState = newElemLoopState;
2050
0
    fElemStateSize = newSize;
2051
0
}
2052
2053
0
void SGXMLScanner::resizeRawAttrColonList() {
2054
2055
0
    unsigned int newSize = fRawAttrColonListSize * 2;
2056
0
    int* newRawAttrColonList = (int*) fMemoryManager->allocate
2057
0
    (
2058
0
        newSize * sizeof(int)
2059
0
    ); //new int[newSize];
2060
2061
    // Copy the existing values
2062
0
    unsigned int index = 0;
2063
0
    for (; index < fRawAttrColonListSize; index++)
2064
0
        newRawAttrColonList[index] = fRawAttrColonList[index];
2065
2066
    // Delete the old array and udpate our members
2067
0
    fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList;
2068
0
    fRawAttrColonList = newRawAttrColonList;
2069
0
    fRawAttrColonListSize = newSize;
2070
0
}
2071
2072
//  This method is called from scanStartTag() to build up the list of
2073
//  XMLAttr objects that will be passed out in the start tag callout. We
2074
//  get the key/value pairs from the raw scan of explicitly provided attrs,
2075
//  which have not been normalized. And we get the element declaration from
2076
//  which we will get any defaulted or fixed attribute defs and add those
2077
//  in as well.
2078
XMLSize_t
2079
SGXMLScanner::buildAttList(const  RefVectorOf<KVStringPair>&  providedAttrs
2080
                          , const XMLSize_t                   attCount
2081
                          ,       XMLElementDecl*             elemDecl
2082
                          ,       RefVectorOf<XMLAttr>&       toFill)
2083
0
{
2084
    //  Ask the element to clear the 'provided' flag on all of the att defs
2085
    //  that it owns, and to return us a boolean indicating whether it has
2086
    //  any defs.
2087
0
    DatatypeValidator *currDV = 0;
2088
0
    ComplexTypeInfo *currType = 0;
2089
2090
0
    if (fValidate)
2091
0
    {
2092
0
        currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
2093
0
        if (!currType) {
2094
0
            currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator();
2095
0
        }
2096
0
    }
2097
2098
0
    const bool hasDefs = (currType && fValidate)
2099
0
            ? currType->hasAttDefs()
2100
0
            : elemDecl->hasAttDefs();
2101
2102
0
    fElemCount++;
2103
2104
    //  If there are no expliclitily provided attributes and there are no
2105
    //  defined attributes for the element, the we don't have anything to do.
2106
    //  So just return zero in this case.
2107
0
    if (!hasDefs && !attCount)
2108
0
        return 0;
2109
2110
    // Keep up with how many attrs we end up with total
2111
0
    XMLSize_t retCount = 0;
2112
2113
    //  And get the current size of the output vector. This lets us use
2114
    //  existing elements until we fill it, then start adding new ones.
2115
0
    const XMLSize_t curAttListSize = toFill.size();
2116
2117
    //  We need a buffer into which raw scanned attribute values will be
2118
    //  normalized.
2119
0
    XMLBufBid bbNormal(&fBufMgr);
2120
0
    XMLBuffer& normBuf = bbNormal.getBuffer();
2121
2122
0
    XMLBufBid bbPrefix(&fBufMgr);
2123
0
    XMLBuffer& prefixBuf = bbPrefix.getBuffer();
2124
2125
    //  Loop through our explicitly provided attributes, which are in the raw
2126
    //  scanned form, and build up XMLAttr objects.
2127
0
    XMLSize_t index;
2128
0
    const XMLCh* prefPtr, *suffPtr;
2129
0
    for (index = 0; index < attCount; index++)
2130
0
    {
2131
0
        PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID;
2132
0
        PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL;
2133
0
        const KVStringPair* curPair = providedAttrs.elementAt(index);
2134
2135
        //  We have to split the name into its prefix and name parts. Then
2136
        //  we map the prefix to its URI.
2137
0
        const XMLCh* const namePtr = curPair->getKey();
2138
2139
0
        const int colonInd = fRawAttrColonList[index];
2140
0
        unsigned int uriId;
2141
0
        if (colonInd != -1)
2142
0
        {
2143
0
            prefixBuf.set(namePtr, colonInd);
2144
0
            prefPtr = prefixBuf.getRawBuffer();
2145
0
            suffPtr = namePtr + colonInd + 1;
2146
            //  Map the prefix to a URI id
2147
0
            uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);
2148
0
        }
2149
0
        else
2150
0
        {
2151
            // No colon, so we just have a name with no prefix
2152
0
            prefPtr = XMLUni::fgZeroLenString;
2153
0
            suffPtr = namePtr;
2154
            // an empty prefix is always the empty namespace, when dealing with attributes
2155
0
            uriId = fEmptyNamespaceId;
2156
0
        }
2157
2158
        //  If the uri comes back as the xmlns or xml URI or its just a name
2159
        //  and that name is 'xmlns', then we handle it specially. So set a
2160
        //  boolean flag that lets us quickly below know which we are dealing
2161
        //  with.
2162
0
        const bool isNSAttr = (uriId == fEmptyNamespaceId)?
2163
0
                                XMLString::equals(suffPtr, XMLUni::fgXMLNSString) :
2164
0
                                (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI));
2165
2166
        //  If its not a special case namespace attr of some sort, then we
2167
        //  do normal checking and processing.
2168
0
        XMLAttDef::AttTypes attType = XMLAttDef::CData;
2169
0
        DatatypeValidator *attrValidator = 0;
2170
0
        PSVIAttribute *psviAttr = 0;
2171
0
        bool otherXSI = false;
2172
2173
0
        if (isNSAttr)
2174
0
        {
2175
0
            if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
2176
0
            {
2177
0
                emitError
2178
0
                (
2179
0
                    XMLErrs::AttrAlreadyUsedInSTag
2180
0
                    , namePtr
2181
0
                    , elemDecl->getFullName()
2182
0
                );
2183
0
                fPSVIElemContext.fErrorOccurred = true;
2184
0
            }
2185
0
            else
2186
0
            {
2187
0
                bool ValueValidate = false;
2188
0
                bool tokenizeBuffer = false;
2189
2190
0
                if (uriId == fXMLNSNamespaceId)
2191
0
                {
2192
0
                    attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
2193
0
                }
2194
0
                else if (XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
2195
0
                {
2196
0
                    if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
2197
0
                    {
2198
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
2199
2200
0
                        ValueValidate = true;
2201
0
                    }
2202
0
                    else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
2203
0
                    {
2204
                        // use anyURI as the validator
2205
                        // tokenize the data and use the anyURI data for each piece
2206
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
2207
                        //We should validate each value in the schema location however
2208
                        //this lead to a performance degradation of around 4%.  Since
2209
                        //the first value of each pair needs to match what is in the
2210
                        //schema document and the second value needs to be valid in
2211
                        //order to open the document we won't validate it.  Need to
2212
                        //do performance analysis of the anyuri datatype.
2213
                        //ValueValidate = true;
2214
0
                        ValueValidate = false;
2215
0
                        tokenizeBuffer = true;
2216
0
                    }
2217
0
                    else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
2218
0
                    {
2219
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
2220
                        //We should validate this value however
2221
                        //this lead to a performance degradation of around 4%.  Since
2222
                        //the value needs to be valid in
2223
                        //order to open the document we won't validate it.  Need to
2224
                        //do performance analysis of the anyuri datatype.
2225
                        //ValueValidate = true;
2226
0
                        ValueValidate = false;
2227
0
                    }
2228
0
                    else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
2229
0
                    {
2230
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
2231
2232
0
                        ValueValidate = true;
2233
0
                    }
2234
0
                    else {
2235
0
                        otherXSI = true;
2236
0
                    }
2237
0
                }
2238
2239
0
                if (!otherXSI) {
2240
0
                    normalizeAttRawValue
2241
0
                    (
2242
0
                        namePtr
2243
0
                        , curPair->getValue()
2244
0
                        , normBuf
2245
0
                    );
2246
2247
0
                    if (fValidate && attrValidator && ValueValidate)
2248
0
                    {
2249
0
                        ((SchemaValidator*) fValidator)->normalizeWhiteSpace(attrValidator, normBuf.getRawBuffer(), normBuf, true);
2250
2251
0
                        ValidationContext* const    theContext =
2252
0
                            getValidationContext();
2253
2254
0
                        if (theContext)
2255
0
                        {
2256
0
                            try
2257
0
                            {
2258
0
                                if (tokenizeBuffer) {
2259
0
                                    XMLStringTokenizer tokenizer(normBuf.getRawBuffer(), fMemoryManager);
2260
0
                                    while (tokenizer.hasMoreTokens()) {
2261
0
                                        attrValidator->validate(
2262
0
                                            tokenizer.nextToken(),
2263
0
                                            theContext,
2264
0
                                            fMemoryManager);
2265
0
                                    }
2266
0
                                }
2267
0
                                else {
2268
0
                                    attrValidator->validate(
2269
0
                                        normBuf.getRawBuffer(),
2270
0
                                        theContext,
2271
0
                                        fMemoryManager);
2272
0
                                }
2273
0
                            }
2274
0
                            catch (const XMLException& idve)
2275
0
                            {
2276
0
                                fValidator->emitError (XMLValid::DatatypeError, idve.getCode(), idve.getMessage());
2277
0
                            }
2278
0
                        }
2279
0
                    }
2280
2281
0
                    if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType)
2282
0
                    {
2283
0
                      psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
2284
0
                      XSSimpleTypeDefinition *validatingType = (attrValidator)
2285
0
                            ? (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator)
2286
0
                            : 0;
2287
                        // no attribute declarations for these...
2288
0
                      psviAttr->reset(
2289
0
                          fRootElemName
2290
0
                          , PSVIItem::VALIDITY_NOTKNOWN
2291
0
                          , PSVIItem::VALIDATION_NONE
2292
0
                          , validatingType
2293
0
                          , 0
2294
0
                          , 0
2295
0
                            , false
2296
0
                          , 0
2297
0
                            , attrValidator
2298
0
                        );
2299
0
                    }
2300
0
                }
2301
0
            }
2302
0
        }
2303
2304
0
        if (!isNSAttr || otherXSI)
2305
0
        {
2306
            // Some checking for attribute wild card first (for schema)
2307
0
            bool laxThisOne = false;
2308
0
            bool skipThisOne = false;
2309
2310
0
            XMLAttDef* attDefForWildCard = 0;
2311
0
            XMLAttDef*  attDef = 0;
2312
2313
0
            if (fGrammarType == Grammar::SchemaGrammarType) {
2314
2315
                //retrieve the att def
2316
0
                SchemaAttDef* attWildCard = 0;
2317
0
                if (currType) {
2318
0
                    attDef = currType->getAttDef(suffPtr, uriId);
2319
0
                    attWildCard = currType->getAttWildCard();
2320
0
                }
2321
0
                else if (!currDV) { // check explicitly-set wildcard
2322
0
                    attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard();
2323
0
                }
2324
2325
                // if not found or faulted in - check for a matching wildcard attribute
2326
                // if no matching wildcard attribute, check (un)qualifed cases and flag
2327
                // appropriate errors
2328
0
                if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) {
2329
2330
0
                    if (attWildCard) {
2331
                        //if schema, see if we should lax or skip the validation of this attribute
2332
0
                        if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) {
2333
2334
0
                            if(!skipThisOne)
2335
0
                            {
2336
0
                                SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId));
2337
0
                                if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) {
2338
0
                                    RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry();
2339
0
                                    if (attRegistry) {
2340
0
                                        attDefForWildCard = attRegistry->get(suffPtr);
2341
0
                                    }
2342
0
                                }
2343
0
                            }
2344
0
                        }
2345
0
                    }
2346
0
                    else if (currType) {
2347
                        // not found, see if the attDef should be qualified or not
2348
0
                        if (uriId == fEmptyNamespaceId) {
2349
0
                            attDef = currType->getAttDef(suffPtr, fURIStringPool->getId(fGrammar->getTargetNamespace()));
2350
0
                            if (fValidate
2351
0
                                && attDef
2352
0
                                && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
2353
                                // the attribute should be qualified
2354
0
                                fValidator->emitError
2355
0
                                (
2356
0
                                    XMLValid::AttributeNotQualified
2357
0
                                    , attDef->getFullName()
2358
0
                                );
2359
0
                                fPSVIElemContext.fErrorOccurred = true;
2360
0
                                if (getPSVIHandler())
2361
0
                                {
2362
0
                                    attrValid = PSVIItem::VALIDITY_INVALID;
2363
0
                                }
2364
0
                            }
2365
0
                        }
2366
0
                        else {
2367
0
                            attDef = currType->getAttDef(suffPtr, fEmptyNamespaceId);
2368
0
                            if (fValidate
2369
0
                                && attDef
2370
0
                                && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
2371
                                // the attribute should be qualified
2372
0
                                fValidator->emitError
2373
0
                                (
2374
0
                                    XMLValid::AttributeNotUnQualified
2375
0
                                    , attDef->getFullName()
2376
0
                                );
2377
0
                                fPSVIElemContext.fErrorOccurred = true;
2378
0
                                if (getPSVIHandler())
2379
0
                                {
2380
0
                                    attrValid = PSVIItem::VALIDITY_INVALID;
2381
0
                                }
2382
0
                            }
2383
0
                        }
2384
0
                    }
2385
0
                }
2386
0
            }
2387
2388
            // now need to prepare for duplicate detection
2389
0
            if(attDef)
2390
0
            {
2391
0
                unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
2392
0
                if(!curCountPtr)
2393
0
                {
2394
0
                    curCountPtr = getNewUIntPtr();
2395
0
                    *curCountPtr = fElemCount;
2396
0
                    fAttDefRegistry->put(attDef, curCountPtr);
2397
0
                }
2398
0
                else if(*curCountPtr < fElemCount)
2399
0
                    *curCountPtr = fElemCount;
2400
0
                else
2401
0
                {
2402
0
                    emitError
2403
0
                    (
2404
0
                        XMLErrs::AttrAlreadyUsedInSTag
2405
0
                        , attDef->getFullName()
2406
0
                        , elemDecl->getFullName()
2407
0
                    );
2408
0
                    fPSVIElemContext.fErrorOccurred = true;
2409
0
                }
2410
0
            }
2411
0
            else
2412
0
            {
2413
0
                if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
2414
0
                {
2415
0
                    emitError
2416
0
                    (
2417
0
                        XMLErrs::AttrAlreadyUsedInSTag
2418
0
                        , namePtr
2419
0
                        , elemDecl->getFullName()
2420
0
                    );
2421
0
                    fPSVIElemContext.fErrorOccurred = true;
2422
0
                }
2423
0
            }
2424
2425
            // if we've found either an attDef or an attDefForWildCard,
2426
            // then we're doing full validation and it may still be valid.
2427
0
            if(!attDef && !attDefForWildCard)
2428
0
            {
2429
0
                if(!laxThisOne && !skipThisOne)
2430
0
                {
2431
0
                    fPSVIElemContext.fErrorOccurred = true;
2432
0
                }
2433
0
                if(getPSVIHandler())
2434
0
                {
2435
0
                    if(!laxThisOne && !skipThisOne)
2436
0
                    {
2437
0
                        attrValid = PSVIItem::VALIDITY_INVALID;
2438
0
                    }
2439
0
                    else if(laxThisOne)
2440
0
                    {
2441
0
                        attrValid = PSVIItem::VALIDITY_NOTKNOWN;
2442
0
                        attrAssessed = PSVIItem::VALIDATION_PARTIAL;
2443
0
                    }
2444
0
                    else
2445
0
                    {
2446
0
                        attrValid = PSVIItem::VALIDITY_NOTKNOWN;
2447
0
                        attrAssessed = PSVIItem::VALIDATION_NONE;
2448
0
                    }
2449
0
                }
2450
0
            }
2451
2452
0
            bool errorCondition = fValidate && !attDefForWildCard && !attDef;
2453
0
            if (errorCondition && !skipThisOne && !laxThisOne)
2454
0
            {
2455
                //
2456
                //  Its not valid for this element, so issue an error if we are
2457
                //  validating.
2458
                //
2459
0
                XMLBufBid bbMsg(&fBufMgr);
2460
0
                XMLBuffer& bufMsg = bbMsg.getBuffer();
2461
0
                if (uriId != fEmptyNamespaceId) {
2462
0
                    XMLBufBid bbURI(&fBufMgr);
2463
0
                    XMLBuffer& bufURI = bbURI.getBuffer();
2464
2465
0
                    getURIText(uriId, bufURI);
2466
2467
0
                    bufMsg.append(chOpenCurly);
2468
0
                    bufMsg.append(bufURI.getRawBuffer());
2469
0
                    bufMsg.append(chCloseCurly);
2470
0
                }
2471
0
                bufMsg.append(suffPtr);
2472
0
                fValidator->emitError
2473
0
                (
2474
0
                    XMLValid::AttNotDefinedForElement
2475
0
                    , bufMsg.getRawBuffer()
2476
0
                    , elemDecl->getFullName()
2477
0
                );
2478
0
            }
2479
2480
            //  Now normalize the raw value since we have the attribute type. We
2481
            //  don't care about the return status here. If it failed, an error
2482
            //  was issued, which is all we care about.
2483
0
            if (attDefForWildCard) {
2484
0
                normalizeAttValue(
2485
0
                    attDefForWildCard, namePtr, curPair->getValue(), normBuf
2486
0
                );
2487
2488
                //  If we found an attdef for this one, then lets validate it.
2489
0
                const XMLCh* xsNormalized = normBuf.getRawBuffer();
2490
0
                DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
2491
0
                if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
2492
0
                {
2493
                    // normalize the attribute according to schema whitespace facet
2494
0
                    ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
2495
0
                    xsNormalized = fWSNormalizeBuf.getRawBuffer();
2496
0
                    if (fNormalizeData && fValidate) {
2497
0
                        normBuf.set(xsNormalized);
2498
0
                    }
2499
0
                }
2500
2501
0
                if (fValidate ) {
2502
0
                    fValidator->validateAttrValue(
2503
0
                        attDefForWildCard, xsNormalized, false, elemDecl
2504
0
                    );
2505
0
                    attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
2506
0
                    if(((SchemaValidator *)fValidator)->getErrorOccurred())
2507
0
                    {
2508
0
                        fPSVIElemContext.fErrorOccurred = true;
2509
0
                        if(getPSVIHandler())
2510
0
                            attrValid = PSVIItem::VALIDITY_INVALID;
2511
0
                    }
2512
0
                }
2513
0
                else { // no decl; default DOMTypeInfo to anySimpleType
2514
0
                    attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
2515
0
                }
2516
2517
                // Save the type for later use
2518
0
                attType = attDefForWildCard->getType();
2519
0
            }
2520
0
            else {
2521
0
                normalizeAttValue(
2522
0
                    attDef, namePtr, curPair->getValue(), normBuf
2523
0
                );
2524
2525
                //  If we found an attdef for this one, then lets validate it.
2526
0
                if (attDef)
2527
0
                {
2528
0
                    const XMLCh* xsNormalized = normBuf.getRawBuffer();
2529
0
                    if (fGrammarType == Grammar::SchemaGrammarType)
2530
0
                    {
2531
0
                        DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator();
2532
0
                        if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
2533
0
                        {
2534
                            // normalize the attribute according to schema whitespace facet
2535
0
                            ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
2536
0
                            xsNormalized = fWSNormalizeBuf.getRawBuffer();
2537
0
                            if (fNormalizeData && fValidate && !skipThisOne) {
2538
0
                                normBuf.set(xsNormalized);
2539
0
                            }
2540
0
                        }
2541
0
                    }
2542
2543
0
                    if (fValidate && !skipThisOne)
2544
0
                    {
2545
0
                        fValidator->validateAttrValue(
2546
0
                            attDef, xsNormalized, false, elemDecl
2547
0
                        );
2548
0
                        attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
2549
0
                        if(((SchemaValidator *)fValidator)->getErrorOccurred())
2550
0
                        {
2551
0
                            fPSVIElemContext.fErrorOccurred = true;
2552
0
                            if(getPSVIHandler())
2553
0
                                attrValid = PSVIItem::VALIDITY_INVALID;
2554
0
                        }
2555
0
                    }
2556
0
                    else {
2557
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
2558
0
                    }
2559
0
                }
2560
0
                else {
2561
0
                    attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
2562
0
                }
2563
2564
                // Save the type for later use
2565
0
                if (attDef)
2566
0
                {
2567
0
                    attType = attDef->getType();
2568
0
                }
2569
0
            }
2570
2571
            // now fill in the PSVIAttributes entry for this attribute:
2572
0
          if(getPSVIHandler())
2573
0
          {
2574
0
              psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
2575
0
              SchemaAttDef *actualAttDef = 0;
2576
0
              if(attDef)
2577
0
                  actualAttDef = (SchemaAttDef *)attDef;
2578
0
              else if (attDefForWildCard)
2579
0
                  actualAttDef = (SchemaAttDef *)attDefForWildCard;
2580
0
                if(actualAttDef)
2581
0
                {
2582
0
                  XSAttributeDeclaration *attrDecl = (XSAttributeDeclaration *)fModel->getXSObject(actualAttDef);
2583
0
                    DatatypeValidator * attrDataType = actualAttDef->getDatatypeValidator();
2584
0
                  XSSimpleTypeDefinition *validatingType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrDataType);
2585
0
                  if(attrValid != PSVIItem::VALIDITY_VALID)
2586
0
                  {
2587
0
                      psviAttr->reset
2588
0
                        (
2589
0
                          fRootElemName
2590
0
                          , attrValid
2591
0
                          , attrAssessed
2592
0
                          , validatingType
2593
0
                          , 0
2594
0
                          , actualAttDef->getValue()
2595
0
                          , false
2596
0
                          , attrDecl
2597
0
                            , 0
2598
0
                      );
2599
0
                  }
2600
0
                  else
2601
0
                  {
2602
0
                      XSSimpleTypeDefinition *memberType = 0;
2603
0
                      if(validatingType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
2604
0
                          memberType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator);
2605
0
                      psviAttr->reset
2606
0
                        (
2607
0
                          fRootElemName
2608
0
                          , attrValid
2609
0
                          , attrAssessed
2610
0
                          , validatingType
2611
0
                          , memberType
2612
0
                          , actualAttDef->getValue()
2613
0
                          , false
2614
0
                          , attrDecl
2615
0
                            , (memberType)?attrValidator:attrDataType
2616
0
                      );
2617
0
                  }
2618
0
                }
2619
0
                else
2620
0
                {
2621
0
                  psviAttr->reset
2622
0
                    (
2623
0
                      fRootElemName
2624
0
                      , attrValid
2625
0
                      , attrAssessed
2626
0
                      , 0
2627
0
                      , 0
2628
0
                        , 0
2629
0
                      , false
2630
0
                      , 0
2631
0
                        , 0
2632
0
                  );
2633
0
                }
2634
0
          }
2635
0
        }
2636
2637
        //  Add this attribute to the attribute list that we use to pass them
2638
        //  to the handler. We reuse its existing elements but expand it as
2639
        //  required.
2640
0
        XMLAttr* curAttr;
2641
0
        if (retCount >= curAttListSize)
2642
0
        {
2643
0
            curAttr = new (fMemoryManager) XMLAttr
2644
0
            (
2645
0
                uriId
2646
0
                , suffPtr
2647
0
                , prefPtr
2648
0
                , normBuf.getRawBuffer()
2649
0
                , attType
2650
0
                , true
2651
0
                , fMemoryManager
2652
0
            );
2653
0
            toFill.addElement(curAttr);
2654
0
        }
2655
0
        else
2656
0
        {
2657
0
            curAttr = toFill.elementAt(retCount);
2658
0
            curAttr->set
2659
0
            (
2660
0
                uriId
2661
0
                , suffPtr
2662
0
                , prefPtr
2663
0
                , normBuf.getRawBuffer()
2664
0
                , attType
2665
0
            );
2666
0
            curAttr->setSpecified(true);
2667
0
        }
2668
0
        if(psviAttr)
2669
0
            psviAttr->setValue(curAttr->getValue());
2670
2671
        // Bump the count of attrs in the list
2672
0
        retCount++;
2673
0
    }
2674
2675
    //  Now, if there are any attributes declared by this element, let's
2676
    //  go through them and make sure that any required ones are provided,
2677
    //  and fault in any fixed ones and defaulted ones that are not provided
2678
    //  literally.
2679
0
    if (hasDefs)
2680
0
    {
2681
        // Check after all specified attrs are scanned
2682
        // (1) report error for REQUIRED attrs that are missing (V_TAGc)
2683
        // (2) add default attrs if missing (FIXED and NOT_FIXED)
2684
2685
0
        XMLAttDefList& attDefList = getAttDefList(currType, elemDecl);
2686
2687
0
        for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
2688
0
        {
2689
            // Get the current att def, for convenience and its def type
2690
0
            XMLAttDef *curDef = &attDefList.getAttDef(i);
2691
0
            const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
2692
2693
0
            unsigned int *attCountPtr = fAttDefRegistry->get(curDef);
2694
0
            if (!attCountPtr || *attCountPtr < fElemCount)
2695
0
            { // did not occur
2696
                // note that since there is no attribute information
2697
                // item present, there is no PSVI infoset to augment here *except*
2698
                // that the element is invalid
2699
2700
                //the attribute is not provided
2701
0
                if (fValidate)
2702
0
                {
2703
                    // If we are validating and its required, then an error
2704
0
                    if ((defType == XMLAttDef::Required) ||
2705
0
                        (defType == XMLAttDef::Required_And_Fixed)  )
2706
2707
0
                    {
2708
0
                        fValidator->emitError
2709
0
                        (
2710
0
                            XMLValid::RequiredAttrNotProvided
2711
0
                            , curDef->getFullName()
2712
0
                        );
2713
0
                        fPSVIElemContext.fErrorOccurred = true;
2714
0
                    }
2715
0
                    else if ((defType == XMLAttDef::Default) ||
2716
0
                             (defType == XMLAttDef::Fixed)  )
2717
0
                    {
2718
0
                        if (fStandalone && curDef->isExternal())
2719
0
                        {
2720
                            // XML 1.0 Section 2.9
2721
                            // Document is standalone, so attributes must not be defaulted.
2722
0
                            fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName());
2723
0
                        }
2724
0
                    }
2725
0
                }
2726
2727
                //  Fault in the value if needed, and bump the att count.
2728
0
                if ((defType == XMLAttDef::Default)
2729
0
                    ||  (defType == XMLAttDef::Fixed))
2730
0
                {
2731
                    // Let the validator pass judgement on the attribute value
2732
0
                    if (fValidate)
2733
0
                    {
2734
0
                        fValidator->validateAttrValue
2735
0
                        (
2736
0
                            curDef
2737
0
                            , curDef->getValue()
2738
0
                            , false
2739
0
                            , elemDecl
2740
0
                        );
2741
0
                    }
2742
2743
0
                    XMLAttr* curAtt;
2744
0
                    if (retCount >= curAttListSize)
2745
0
                    {
2746
0
                        curAtt = new (fMemoryManager) XMLAttr(fMemoryManager);
2747
0
                        fValidator->faultInAttr(*curAtt, *curDef);
2748
0
                        fAttrList->addElement(curAtt);
2749
0
                    }
2750
0
                    else
2751
0
                    {
2752
0
                        curAtt = fAttrList->elementAt(retCount);
2753
0
                        fValidator->faultInAttr(*curAtt, *curDef);
2754
0
                    }
2755
2756
                    // Indicate it was not explicitly specified and bump count
2757
0
                    curAtt->setSpecified(false);
2758
0
                    retCount++;
2759
0
                    if(getPSVIHandler())
2760
0
                    {
2761
0
                        QName *attName = ((SchemaAttDef *)curDef)->getAttName();
2762
0
                        PSVIAttribute *defAttrToFill = fPSVIAttrList->getPSVIAttributeToFill
2763
0
                        (
2764
0
                            attName->getLocalPart(), fURIStringPool->getValueForId( attName->getURI())
2765
0
                        );
2766
0
                        XSAttributeDeclaration *defAttrDecl = (XSAttributeDeclaration *)fModel->getXSObject((void *)curDef);
2767
0
                        DatatypeValidator * attrDataType = ((SchemaAttDef *)curDef)->getDatatypeValidator();
2768
0
                        XSSimpleTypeDefinition *defAttrType =
2769
0
                            (XSSimpleTypeDefinition*)fModel->getXSObject(attrDataType);
2770
                        // would have occurred during validation of default value
2771
0
                        if(((SchemaValidator *)fValidator)->getErrorOccurred())
2772
0
                        {
2773
0
                            defAttrToFill->reset(
2774
0
                                fRootElemName
2775
0
                                , PSVIItem::VALIDITY_INVALID
2776
0
                                , PSVIItem::VALIDATION_FULL
2777
0
                                , defAttrType
2778
0
                                , 0
2779
0
                                , curDef->getValue()
2780
0
                                , true
2781
0
                                , defAttrDecl
2782
0
                                , 0
2783
0
                            );
2784
0
                        }
2785
0
                        else
2786
0
                        {
2787
0
                            XSSimpleTypeDefinition *defAttrMemberType = 0;
2788
0
                            if(defAttrType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
2789
0
                            {
2790
0
                                defAttrMemberType = (XSSimpleTypeDefinition *)fModel->getXSObject
2791
0
                                (
2792
0
                                    ((SchemaValidator*)fValidator)->getMostRecentAttrValidator()
2793
0
                                );
2794
0
                            }
2795
0
                            defAttrToFill->reset
2796
0
                            (
2797
0
                                fRootElemName
2798
0
                                , PSVIItem::VALIDITY_VALID
2799
0
                                , PSVIItem::VALIDATION_FULL
2800
0
                                , defAttrType
2801
0
                                , defAttrMemberType
2802
0
                                , curDef->getValue()
2803
0
                                , true
2804
0
                                , defAttrDecl
2805
0
                                , (defAttrMemberType)?((SchemaValidator *)fValidator)->getMostRecentAttrValidator():attrDataType
2806
0
                            );
2807
0
                        }
2808
0
                        defAttrToFill->setValue(curDef->getValue());
2809
0
                    }
2810
0
                }
2811
0
            }
2812
0
            else if (attCountPtr)
2813
0
            {
2814
                //attribute is provided
2815
                // (schema) report error for PROHIBITED attrs that are present (V_TAGc)
2816
0
                if (defType == XMLAttDef::Prohibited && fValidate)
2817
0
                {
2818
0
                    fValidator->emitError
2819
0
                    (
2820
0
                        XMLValid::ProhibitedAttributePresent
2821
0
                        , curDef->getFullName()
2822
0
                    );
2823
0
                    fPSVIElemContext.fErrorOccurred = true;
2824
0
                    if (getPSVIHandler())
2825
0
                    {
2826
0
                        QName *attQName = ((SchemaAttDef *)curDef)->getAttName();
2827
                        // bad luck...
2828
0
                        PSVIAttribute *prohibitedAttr = fPSVIAttrList->getAttributePSVIByName
2829
0
                        (
2830
0
                            attQName->getLocalPart(),
2831
0
                            fURIStringPool->getValueForId(attQName->getURI())
2832
0
                        );
2833
0
                        prohibitedAttr->updateValidity(PSVIItem::VALIDITY_INVALID);
2834
0
                    }
2835
0
                }
2836
0
            }
2837
0
        }
2838
0
    }
2839
2840
0
    return retCount;
2841
0
}
2842
2843
2844
//  This method will take a raw attribute value and normalize it according to
2845
//  the rules of the attribute type. It will put the resulting value into the
2846
//  passed buffer.
2847
//
2848
//  This code assumes that escaped characters in the original value (via char
2849
//  refs) are prefixed by a 0xFFFF character. This is because some characters
2850
//  are legal if escaped only. And some escape chars are not subject to
2851
//  normalization rules.
2852
bool SGXMLScanner::normalizeAttValue( const   XMLAttDef* const    attDef
2853
                                      , const XMLCh* const        attName
2854
                                      , const XMLCh* const        value
2855
                                      ,       XMLBuffer&          toFill)
2856
0
{
2857
    // A simple state value for a whitespace processing state machine
2858
0
    enum States
2859
0
    {
2860
0
        InWhitespace
2861
0
        , InContent
2862
0
    };
2863
2864
    // Get the type and name
2865
0
    const XMLAttDef::AttTypes type = (attDef)
2866
0
                            ?attDef->getType()
2867
0
                            :XMLAttDef::CData;
2868
2869
    // Assume its going to go fine, and empty the target buffer in preperation
2870
0
    bool retVal = true;
2871
0
    toFill.reset();
2872
2873
    // check to see if it's a tokenized type that is declared externally 
2874
0
    bool  isAttTokenizedExternal = (attDef)
2875
0
                                   ?attDef->isExternal() && (type == XMLAttDef::ID || 
2876
0
                                                             type == XMLAttDef::IDRef || 
2877
0
                                                             type == XMLAttDef::IDRefs || 
2878
0
                                                             type == XMLAttDef::Entity || 
2879
0
                                                             type == XMLAttDef::Entities || 
2880
0
                                                             type == XMLAttDef::NmToken || 
2881
0
                                                             type == XMLAttDef::NmTokens)
2882
0
                                   :false;
2883
2884
    //  Loop through the chars of the source value and normalize it according
2885
    //  to the type.
2886
0
    States curState = InContent;
2887
0
    bool firstNonWS = false;
2888
0
    XMLCh nextCh;
2889
0
    const XMLCh* srcPtr = value;
2890
2891
0
    if (type == XMLAttDef::CData || type > XMLAttDef::Notation) {
2892
0
        while (*srcPtr) {
2893
            //  Get the next character from the source. We have to watch for
2894
            //  escaped characters (which are indicated by a 0xFFFF value followed
2895
            //  by the char that was escaped.)
2896
0
            nextCh = *srcPtr;
2897
2898
            // Do we have an escaped character ?
2899
0
            if (nextCh == 0xFFFF)
2900
0
            {
2901
0
                nextCh = *++srcPtr;
2902
0
            }
2903
0
            else if ( (nextCh <= 0x0D) && (nextCh == 0x09 || nextCh == 0x0A || nextCh == 0x0D) ) {
2904
                // Check Validity Constraint for Standalone document declaration
2905
                // XML 1.0, Section 2.9
2906
0
                if (fStandalone && fValidate && isAttTokenizedExternal)
2907
0
                {
2908
                     // Can't have a standalone document declaration of "yes" if  attribute
2909
                     // values are subject to normalisation
2910
0
                     fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
2911
0
                }
2912
0
                nextCh = chSpace;
2913
0
            }
2914
0
            else if (nextCh == chOpenAngle) {
2915
                //  If its not escaped, then make sure its not a < character, which is
2916
                //  not allowed in attribute values.
2917
0
                emitError(XMLErrs::BracketInAttrValue, attName);
2918
0
                retVal = false;
2919
0
            }
2920
2921
            // Add this char to the target buffer
2922
0
            toFill.append(nextCh);
2923
2924
            // And move up to the next character in the source
2925
0
            srcPtr++;
2926
0
        }
2927
0
    }
2928
0
    else {
2929
0
        while (*srcPtr)
2930
0
        {
2931
            //  Get the next character from the source. We have to watch for
2932
            //  escaped characters (which are indicated by a 0xFFFF value followed
2933
            //  by the char that was escaped.)
2934
0
            nextCh = *srcPtr;
2935
2936
            // Do we have an escaped character ?
2937
0
            if (nextCh == 0xFFFF)
2938
0
            {
2939
0
                nextCh = *++srcPtr;
2940
0
            }
2941
0
            else if (nextCh == chOpenAngle) {
2942
                //  If its not escaped, then make sure its not a < character, which is
2943
                //  not allowed in attribute values.
2944
0
                emitError(XMLErrs::BracketInAttrValue, attName);
2945
0
                retVal = false;
2946
0
            }
2947
2948
0
            if (curState == InWhitespace)
2949
0
            {
2950
0
                if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
2951
0
                {
2952
0
                    if (firstNonWS)
2953
0
                        toFill.append(chSpace);
2954
0
                    curState = InContent;
2955
0
                    firstNonWS = true;
2956
0
                }
2957
0
                else
2958
0
                {
2959
0
                    srcPtr++;
2960
0
                    continue;
2961
0
                }
2962
0
            }
2963
0
            else if (curState == InContent)
2964
0
            {
2965
0
                if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
2966
0
                {
2967
0
                    curState = InWhitespace;
2968
0
                    srcPtr++;
2969
2970
                    // Check Validity Constraint for Standalone document declaration
2971
                    // XML 1.0, Section 2.9
2972
0
                    if (fStandalone && fValidate && isAttTokenizedExternal)
2973
0
                    {
2974
0
                        if (!firstNonWS || (nextCh != chSpace && *srcPtr && fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr)))
2975
0
                        {
2976
                            // Can't have a standalone document declaration of "yes" if  attribute
2977
                            // values are subject to normalisation
2978
0
                            fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
2979
0
                        }
2980
0
                    }
2981
0
                    continue;
2982
0
                }
2983
0
                firstNonWS = true;
2984
0
            }
2985
2986
            // Add this char to the target buffer
2987
0
            toFill.append(nextCh);
2988
2989
            // And move up to the next character in the source
2990
0
            srcPtr++;
2991
0
        }
2992
0
    }
2993
2994
0
    return retVal;
2995
0
}
2996
2997
//  This method will just normalize the input value as CDATA without
2998
//  any standalone checking.
2999
bool SGXMLScanner::normalizeAttRawValue( const   XMLCh* const        attrName
3000
                                      , const XMLCh* const        value
3001
                                      ,       XMLBuffer&          toFill)
3002
0
{
3003
    // Assume its going to go fine, and empty the target buffer in preperation
3004
0
    bool retVal = true;
3005
0
    toFill.reset();
3006
3007
    //  Loop through the chars of the source value and normalize it according
3008
    //  to the type.
3009
0
    bool escaped;
3010
0
    XMLCh nextCh;
3011
0
    const XMLCh* srcPtr = value;
3012
0
    while (*srcPtr)
3013
0
    {
3014
        //  Get the next character from the source. We have to watch for
3015
        //  escaped characters (which are indicated by a 0xFFFF value followed
3016
        //  by the char that was escaped.)
3017
0
        nextCh = *srcPtr;
3018
0
        escaped = (nextCh == 0xFFFF);
3019
0
        if (escaped)
3020
0
            nextCh = *++srcPtr;
3021
3022
        //  If its not escaped, then make sure its not a < character, which is
3023
        //  not allowed in attribute values.
3024
0
        if (!escaped && (*srcPtr == chOpenAngle))
3025
0
        {
3026
0
            emitError(XMLErrs::BracketInAttrValue, attrName);
3027
0
            retVal = false;
3028
0
        }
3029
3030
0
        if (!escaped)
3031
0
        {
3032
            //  NOTE: Yes this is a little redundant in that a 0x20 is
3033
            //  replaced with an 0x20. But its faster to do this (I think)
3034
            //  than checking for 9, A, and D separately.
3035
0
            if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
3036
0
                nextCh = chSpace;
3037
0
        }
3038
3039
        // Add this char to the target buffer
3040
0
        toFill.append(nextCh);
3041
3042
        // And move up to the next character in the source
3043
0
        srcPtr++;
3044
0
    }
3045
0
    return retVal;
3046
0
}
3047
3048
//  This method will reset the scanner data structures, and related plugged
3049
//  in stuff, for a new scan session. We get the input source for the primary
3050
//  XML entity, create the reader for it, and push it on the stack so that
3051
//  upon successful return from here we are ready to go.
3052
void SGXMLScanner::scanReset(const InputSource& src)
3053
0
{
3054
3055
    //  This call implicitly tells us that we are going to reuse the scanner
3056
    //  if it was previously used. So tell the validator to reset itself.
3057
    //
3058
    //  But, if the fUseCacheGrammar flag is set, then don't reset it.
3059
    //
3060
    //  NOTE:   The ReaderMgr is flushed on the way out, because that is
3061
    //          required to insure that files are closed.
3062
0
    fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
3063
0
    fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
3064
3065
    // Clear transient schema info list.
3066
    //
3067
0
    fSchemaInfoList->removeAll ();
3068
3069
    // fModel may need updating, as fGrammarResolver could have cleaned it
3070
0
    if(fModel && getPSVIHandler())
3071
0
        fModel = fGrammarResolver->getXSModel();
3072
3073
    // Create dummy schema grammar
3074
0
    if (!fSchemaGrammar) {
3075
0
        fSchemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
3076
0
    }
3077
3078
0
    fGrammar = fSchemaGrammar;
3079
0
    fGrammarType = Grammar::DTDGrammarType;
3080
0
    fRootGrammar = 0;
3081
3082
0
    fValidator->setGrammar(fGrammar);
3083
0
    if (fValidatorFromUser) {
3084
3085
0
        ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter);
3086
0
        ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver);
3087
0
        ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal);
3088
0
    }
3089
3090
    // Reset validation
3091
0
    fValidate = (fValScheme == Val_Always) ? true : false;
3092
3093
    //  And for all installed handlers, send reset events. This gives them
3094
    //  a chance to flush any cached data.
3095
0
    if (fDocHandler)
3096
0
        fDocHandler->resetDocument();
3097
0
    if (fEntityHandler)
3098
0
        fEntityHandler->resetEntities();
3099
0
    if (fErrorReporter)
3100
0
        fErrorReporter->resetErrors();
3101
3102
    // Clear out the id reference list
3103
0
    resetValidationContext();
3104
3105
    // Reset the Root Element Name
3106
0
    fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
3107
0
    fRootElemName = 0;
3108
3109
    // Reset IdentityConstraints
3110
0
    if (fICHandler)
3111
0
        fICHandler->reset();
3112
3113
    //  Reset the element stack, and give it the latest ids for the special
3114
    //  URIs it has to know about.
3115
0
    fElemStack.reset
3116
0
    (
3117
0
        fEmptyNamespaceId
3118
0
        , fUnknownNamespaceId
3119
0
        , fXMLNamespaceId
3120
0
        , fXMLNSNamespaceId
3121
0
    );
3122
3123
0
    if (!fSchemaNamespaceId)
3124
0
        fSchemaNamespaceId  = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI);
3125
3126
    // Reset some status flags
3127
0
    fInException = false;
3128
0
    fStandalone = false;
3129
0
    fErrorCount = 0;
3130
0
    fHasNoDTD = true;
3131
0
    fSeeXsi = false;
3132
0
    fDoNamespaces = true;
3133
0
    fDoSchema = true;
3134
3135
    // Reset PSVI context
3136
    // Note that we always need this around for DOMTypeInfo
3137
0
    if (!fPSVIElement)
3138
0
        fPSVIElement = new (fMemoryManager) PSVIElement(fMemoryManager);
3139
3140
0
    if (!fErrorStack)
3141
0
    {
3142
0
        fErrorStack = new (fMemoryManager) ValueStackOf<bool>(8, fMemoryManager);
3143
0
    }
3144
0
    else
3145
0
    {
3146
0
        fErrorStack->removeAllElements();
3147
0
    }
3148
3149
0
    resetPSVIElemContext();
3150
3151
    // Reset the validators
3152
0
    fSchemaValidator->reset();
3153
0
    fSchemaValidator->setErrorReporter(fErrorReporter);
3154
0
    fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
3155
0
    fSchemaValidator->setGrammarResolver(fGrammarResolver);
3156
0
    if (fValidatorFromUser)
3157
0
        fValidator->reset();
3158
3159
    //  Handle the creation of the XML reader object for this input source.
3160
    //  This will provide us with transcoding and basic lexing services.
3161
0
    XMLReader* newReader = fReaderMgr.createReader
3162
0
    (
3163
0
        src
3164
0
        , true
3165
0
        , XMLReader::RefFrom_NonLiteral
3166
0
        , XMLReader::Type_General
3167
0
        , XMLReader::Source_External
3168
0
        , fCalculateSrcOfs
3169
0
        , fLowWaterMark
3170
0
    );
3171
3172
0
    if (!newReader) {
3173
0
        if (src.getIssueFatalErrorIfNotFound())
3174
0
            ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
3175
0
        else
3176
0
            ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
3177
0
    }
3178
3179
    // Push this read onto the reader manager
3180
0
    fReaderMgr.pushReader(newReader, 0);
3181
3182
    // and reset security-related things if necessary:
3183
0
    if(fSecurityManager != 0)
3184
0
    {
3185
0
        fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
3186
0
        fEntityExpansionCount = 0;
3187
0
    }
3188
0
    fElemCount = 0;
3189
0
    if(fUIntPoolRowTotal >= 32)
3190
0
    { // 8 KB tied up with validating attributes...
3191
0
        fAttDefRegistry->removeAll();
3192
0
        recreateUIntPool();
3193
0
    }
3194
0
    else
3195
0
    {
3196
        // note that this will implicitly reset the values of the hashtables,
3197
        // though their buckets will still be tied up
3198
0
        resetUIntPool();
3199
0
    }
3200
0
    fUndeclaredAttrRegistry->removeAll();
3201
0
}
3202
3203
3204
//  This method is called between markup in content. It scans for character
3205
//  data that is sent to the document handler. It watches for any markup
3206
//  characters that would indicate that the character data has ended. It also
3207
//  handles expansion of general and character entities.
3208
//
3209
//  sendData() is a local static helper for this method which handles some
3210
//  code that must be done in three different places here.
3211
void SGXMLScanner::sendCharData(XMLBuffer& toSend)
3212
0
{
3213
    // If no data in the buffer, then nothing to do
3214
0
    if (toSend.isEmpty())
3215
0
        return;
3216
3217
    //  We do different things according to whether we are validating or
3218
    //  not. If not, its always just characters; else, it depends on the
3219
    //  current element's content model.
3220
0
    if (fValidate)
3221
0
    {
3222
        // Get the raw data we need for the callback
3223
0
        const XMLCh* rawBuf = toSend.getRawBuffer();
3224
0
        const XMLSize_t len = toSend.getLen();
3225
3226
        // Get the character data opts for the current element
3227
0
        XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
3228
        // And see if the current element is a 'Children' style content model
3229
0
        ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
3230
0
        if(currType)
3231
0
        {
3232
0
            SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
3233
0
            if(modelType == SchemaElementDecl::Children ||
3234
0
               modelType == SchemaElementDecl::ElementOnlyEmpty)
3235
0
                charOpts = XMLElementDecl::SpacesOk;
3236
0
            else if(modelType == SchemaElementDecl::Empty)
3237
0
                charOpts = XMLElementDecl::NoCharData;
3238
0
        }
3239
3240
        // should not be necessary once PSVI method on element decls
3241
        // are removed
3242
0
        if (charOpts == XMLElementDecl::NoCharData)
3243
0
        {
3244
            // They definitely cannot handle any type of char data
3245
0
            fValidator->emitError(XMLValid::NoCharDataInCM);
3246
0
            if (getPSVIHandler())
3247
0
            {
3248
                // REVISIT:
3249
                // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
3250
0
            }
3251
0
        }
3252
0
        else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len))
3253
0
        {
3254
            //  Its all spaces. So, if they can take spaces, then send it
3255
            //  as ignorable whitespace. If they can handle any char data
3256
            //  send it as characters.
3257
0
            if (charOpts == XMLElementDecl::SpacesOk) {
3258
0
                if (fDocHandler)
3259
0
                    fDocHandler->ignorableWhitespace(rawBuf, len, false);
3260
0
            }
3261
0
            else if (charOpts == XMLElementDecl::AllCharData)
3262
0
            {
3263
0
                XMLSize_t xsLen;
3264
0
                const XMLCh* xsNormalized;
3265
0
                DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
3266
0
                if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
3267
0
                {
3268
                    // normalize the character according to schema whitespace facet
3269
0
                    ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
3270
0
                    xsNormalized = fWSNormalizeBuf.getRawBuffer();
3271
0
                    xsLen = fWSNormalizeBuf.getLen();
3272
0
                }
3273
0
                else {
3274
0
                    xsNormalized = rawBuf;
3275
0
                    xsLen = len;
3276
0
                }
3277
3278
                // tell the schema validation about the character data for checkContent later
3279
0
                ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
3280
3281
                // call all active identity constraints
3282
0
                if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
3283
0
                    fContent.append(xsNormalized, xsLen);
3284
0
                }
3285
3286
0
                if (fDocHandler) {
3287
0
                    if (fNormalizeData) {
3288
0
                        fDocHandler->docCharacters(xsNormalized, xsLen, false);
3289
0
                    }
3290
0
                    else {
3291
0
                        fDocHandler->docCharacters(rawBuf, len, false);
3292
0
                    }
3293
0
                }
3294
0
            }
3295
0
        }
3296
0
        else
3297
0
        {
3298
            //  If they can take any char data, then send it. Otherwise, they
3299
            //  can only handle whitespace and can't handle this stuff so
3300
            //  issue an error.
3301
0
            if (charOpts == XMLElementDecl::AllCharData)
3302
0
            {
3303
0
                XMLSize_t xsLen;
3304
0
                const XMLCh *xsNormalized;
3305
0
                DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
3306
0
                if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
3307
0
                {
3308
0
                    ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
3309
0
                    xsNormalized = fWSNormalizeBuf.getRawBuffer();
3310
0
                    xsLen = fWSNormalizeBuf.getLen();
3311
0
                }
3312
0
                else {
3313
0
                    xsNormalized = rawBuf;
3314
0
                    xsLen = len;
3315
0
                }
3316
3317
                // tell the schema validation about the character data for checkContent later
3318
0
                ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
3319
3320
                // call all active identity constraints
3321
0
                if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
3322
0
                    fContent.append(xsNormalized, xsLen);
3323
0
                }
3324
3325
0
                if (fDocHandler) {
3326
0
                    if (fNormalizeData) {
3327
0
                        fDocHandler->docCharacters(xsNormalized, xsLen, false);
3328
0
                    }
3329
0
                    else {
3330
0
                        fDocHandler->docCharacters(rawBuf, len, false);
3331
0
                    }
3332
0
                }
3333
0
            }
3334
0
            else
3335
0
            {
3336
0
                fValidator->emitError(XMLValid::NoCharDataInCM);
3337
0
                if (getPSVIHandler())
3338
0
                {
3339
                    // REVISIT:
3340
                    // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
3341
0
                }
3342
0
            }
3343
0
        }
3344
0
    }
3345
0
    else
3346
0
    {
3347
        // call all active identity constraints
3348
0
        if (toCheckIdentityConstraint() && fICHandler->getMatcherCount())
3349
0
            fContent.append(toSend.getRawBuffer(), toSend.getLen());
3350
3351
        // Always assume its just char data if not validating
3352
0
        if (fDocHandler)
3353
0
            fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
3354
0
    }
3355
3356
    // Reset buffer
3357
0
    toSend.reset();
3358
0
}
3359
3360
3361
3362
//  This method is called with a key/value string pair that represents an
3363
//  xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the
3364
//  current top of the element stack based on this data. We know that when
3365
//  we get here, that it is one of these forms, so we don't bother confirming
3366
//  it.
3367
//
3368
//  But we have to ensure
3369
//      1. xxx is not xmlns
3370
//      2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
3371
//      3. yyy is not XMLUni::fgXMLNSURIName
3372
//      4. if xxx is not null, then yyy cannot be an empty string.
3373
void SGXMLScanner::updateNSMap(const  XMLCh* const    attrName
3374
                              , const XMLCh* const    attrValue)
3375
0
{
3376
0
    updateNSMap(attrName, attrValue, XMLString::indexOf(attrName, chColon));
3377
0
}
3378
3379
void SGXMLScanner::updateNSMap(const  XMLCh* const    attrName
3380
                              , const XMLCh* const    attrValue
3381
                              , const int colonOfs)
3382
0
{
3383
    // We need a buffer to normalize the attribute value into
3384
0
    XMLBufBid bbNormal(&fBufMgr);
3385
0
    XMLBuffer& normalBuf = bbNormal.getBuffer();
3386
3387
    //  Normalize the value into the passed buffer. In this case, we don't
3388
    //  care about the return value. An error was issued for the error, which
3389
    //  is all we care about here.
3390
0
    normalizeAttRawValue(attrName, attrValue, normalBuf);
3391
0
    XMLCh* namespaceURI = normalBuf.getRawBuffer();
3392
3393
    //  We either have the default prefix (""), or we point it into the attr
3394
    //  name parameter. Note that the xmlns is not the prefix we care about
3395
    //  here. To us, the 'prefix' is really the local part of the attrName
3396
    //  parameter.
3397
    //
3398
    //  Check 1. xxx is not xmlns
3399
    //        2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
3400
    //        3. yyy is not XMLUni::fgXMLNSURIName
3401
    //        4. if xxx is not null, then yyy cannot be an empty string.
3402
0
    const XMLCh* prefPtr = XMLUni::fgZeroLenString;
3403
0
    if (colonOfs != -1) {
3404
0
        prefPtr = &attrName[colonOfs + 1];
3405
3406
0
        if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString))
3407
0
            emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
3408
0
        else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) {
3409
0
            if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
3410
0
                emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
3411
0
        }
3412
3413
0
        if (!namespaceURI)
3414
0
            emitError(XMLErrs::NoEmptyStrNamespace, attrName);
3415
0
        else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
3416
0
            emitError(XMLErrs::NoEmptyStrNamespace, attrName);
3417
0
    }
3418
3419
0
    if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
3420
0
        emitError(XMLErrs::NoUseOfxmlnsURI);
3421
0
    else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) {
3422
0
        if (!XMLString::equals(prefPtr, XMLUni::fgXMLString))
3423
0
            emitError(XMLErrs::XMLURINotMatchXMLPrefix);
3424
0
    }
3425
3426
    //  Ok, we have to get the unique id for the attribute value, which is the
3427
    //  URI that this value should be mapped to. The validator has the
3428
    //  namespace string pool, so we ask him to find or add this new one. Then
3429
    //  we ask the element stack to add this prefix to URI Id mapping.
3430
0
    fElemStack.addPrefix
3431
0
    (
3432
0
        prefPtr
3433
0
        , fURIStringPool->addOrFind(namespaceURI)
3434
0
    );
3435
0
}
3436
3437
void SGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount)
3438
0
{
3439
    //  Make an initial pass through the list and find any xmlns attributes or
3440
    //  schema attributes.
3441
    //  When we find one, send it off to be used to update the element stack's
3442
    //  namespace mappings.
3443
0
    for (XMLSize_t index = 0; index < attCount; index++)
3444
0
    {
3445
        // each attribute has the prefix:suffix="value"
3446
0
        const KVStringPair* curPair = fRawAttrList->elementAt(index);
3447
0
        const XMLCh* rawPtr = curPair->getKey();
3448
3449
        //  If either the key begins with "xmlns:" or its just plain
3450
        //  "xmlns", then use it to update the map.
3451
0
        if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
3452
0
        ||  XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
3453
0
        {
3454
0
            const XMLCh* valuePtr = curPair->getValue();
3455
3456
0
            updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]);
3457
3458
            // if the schema URI is seen in the the valuePtr, set the boolean seeXsi
3459
0
            if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) {
3460
0
                fSeeXsi = true;
3461
0
            }
3462
0
        }
3463
0
    }
3464
3465
    // walk through the list again to deal with "xsi:...."
3466
0
    if (fSeeXsi)
3467
0
    {
3468
0
        for (XMLSize_t index = 0; index < attCount; index++)
3469
0
        {
3470
            // each attribute has the prefix:suffix="value"
3471
0
            const KVStringPair* curPair = fRawAttrList->elementAt(index);
3472
0
            const XMLCh* rawPtr = curPair->getKey();
3473
0
            const XMLCh* prefPtr;
3474
3475
0
            int   colonInd = fRawAttrColonList[index];
3476
3477
0
            if (colonInd != -1) {
3478
0
                fURIBuf.set(rawPtr, colonInd);
3479
0
                prefPtr = fURIBuf.getRawBuffer();
3480
0
            }
3481
0
            else {
3482
0
                prefPtr = XMLUni::fgZeroLenString;
3483
0
            }
3484
3485
            // if schema URI has been seen, scan for the schema location and uri
3486
            // and resolve the schema grammar
3487
0
            if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
3488
3489
0
                const XMLCh* valuePtr = curPair->getValue();
3490
0
                const XMLCh*  suffPtr = &rawPtr[colonInd + 1];
3491
3492
0
                if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
3493
0
                    parseSchemaLocation(valuePtr);
3494
0
                else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
3495
0
                    resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString);
3496
0
            }
3497
0
        }
3498
3499
        // do it another time, as xsi:type and xsi:nill only work if the schema grammar has been already
3500
        // loaded (JIRA XERCESC-1937)
3501
0
        if (fValidator && fValidator->handlesSchema())
3502
0
        {
3503
0
            for (XMLSize_t index = 0; index < attCount; index++)
3504
0
            {
3505
                // each attribute has the prefix:suffix="value"
3506
0
                const KVStringPair* curPair = fRawAttrList->elementAt(index);
3507
0
                const XMLCh* rawPtr = curPair->getKey();
3508
0
                const XMLCh* prefPtr;
3509
3510
0
                int   colonInd = fRawAttrColonList[index];
3511
3512
0
                if (colonInd != -1) {
3513
0
                    fURIBuf.set(rawPtr, colonInd);
3514
0
                    prefPtr = fURIBuf.getRawBuffer();
3515
0
                }
3516
0
                else {
3517
0
                    prefPtr = XMLUni::fgZeroLenString;
3518
0
                }
3519
3520
                // scan for schema type
3521
0
                if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
3522
3523
0
                    const XMLCh* valuePtr = curPair->getValue();
3524
0
                    const XMLCh*  suffPtr = &rawPtr[colonInd + 1];
3525
3526
0
                    if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
3527
0
                    {
3528
0
                        XMLBufBid bbXsi(&fBufMgr);
3529
0
                        XMLBuffer& fXsiType = bbXsi.getBuffer();
3530
3531
                        // normalize the attribute according to schema whitespace facet
3532
0
                        DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
3533
0
                        normalizeAttRawValue(SchemaSymbols::fgXSI_TYPE, valuePtr, fXsiType);
3534
0
                        ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, fXsiType.getRawBuffer(), fXsiType, true);
3535
0
                        if (!fXsiType.isEmpty()) {
3536
0
                            int colonPos = -1;
3537
0
                            unsigned int uriId = resolveQName (
3538
0
                                  fXsiType.getRawBuffer()
3539
0
                                , fPrefixBuf
3540
0
                                , ElemStack::Mode_Element
3541
0
                                , colonPos
3542
0
                            );
3543
0
                            ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId);
3544
0
                        }
3545
0
                    }
3546
0
                    else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
3547
0
                    {
3548
                        // normalize the attribute according to schema whitespace facet
3549
0
                        XMLBufBid bbXsi(&fBufMgr);
3550
0
                        XMLBuffer& fXsiNil = bbXsi.getBuffer();
3551
3552
0
                        DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
3553
0
                        normalizeAttRawValue(SchemaSymbols::fgATT_NILL, valuePtr, fXsiNil);
3554
0
                        ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, fXsiNil.getRawBuffer(), fXsiNil, true);
3555
0
                        if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_TRUE))
3556
0
                            ((SchemaValidator*)fValidator)->setNillable(true);
3557
0
                        else if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_FALSE))
3558
0
                            ((SchemaValidator*)fValidator)->setNillable(false);
3559
0
                        else
3560
0
                            emitError(XMLErrs::InvalidAttValue, fXsiNil.getRawBuffer(), valuePtr);
3561
0
                    }
3562
0
                }
3563
0
            }
3564
0
        }
3565
0
    }
3566
0
}
3567
3568
void SGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema)
3569
0
{
3570
0
    BaseRefVectorOf<XMLCh>* schemaLocation = XMLString::tokenizeString(schemaLocationStr, fMemoryManager);
3571
0
    Janitor<BaseRefVectorOf<XMLCh> > janLoc(schemaLocation);
3572
3573
0
    XMLSize_t size = schemaLocation->size();
3574
0
    if (size % 2 != 0 ) {
3575
0
        emitError(XMLErrs::BadSchemaLocation);
3576
0
    } else {
3577
        // We need a buffer to normalize the attribute value into
3578
0
        XMLBuffer normalBuf(1023, fMemoryManager);
3579
0
        for(XMLSize_t i=0; i<size; i=i+2) {
3580
0
            normalizeAttRawValue(SchemaSymbols::fgXSI_SCHEMALOCATION, schemaLocation->elementAt(i), normalBuf);
3581
0
            resolveSchemaGrammar(schemaLocation->elementAt(i+1), normalBuf.getRawBuffer(), ignoreLoadSchema);
3582
0
        }
3583
0
    }
3584
0
}
3585
3586
0
void SGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema) {
3587
3588
0
    Grammar* grammar = 0;
3589
3590
0
    {
3591
0
        XMLSchemaDescriptionImpl    theSchemaDescription(uri, fMemoryManager);
3592
0
        theSchemaDescription.setLocationHints(loc);
3593
0
        grammar = fGrammarResolver->getGrammar(&theSchemaDescription);
3594
0
    }
3595
3596
    // If multi-import is enabled, make sure the existing grammar came
3597
    // from the import directive. Otherwise we may end up reloading
3598
    // the same schema that came from the external grammar pool. Ideally,
3599
    // we would move fSchemaInfoList to XMLGrammarPool so that it survives
3600
    // the destruction of the scanner in which case we could rely on the
3601
    // same logic we use to weed out duplicate schemas below.
3602
    //
3603
0
    if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType ||
3604
0
        (getHandleMultipleImports() &&
3605
0
         ((XMLSchemaDescription*)grammar->getGrammarDescription())->
3606
0
         getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
3607
0
    {
3608
0
      if (fLoadSchema || ignoreLoadSchema)
3609
0
      {
3610
0
        XSDDOMParser parser(0, fMemoryManager, 0);
3611
3612
0
        parser.setValidationScheme(XercesDOMParser::Val_Never);
3613
0
        parser.setDoNamespaces(true);
3614
0
        parser.setUserEntityHandler(fEntityHandler);
3615
0
        parser.setUserErrorReporter(fErrorReporter);
3616
3617
        //Normalize sysId
3618
0
        XMLBufBid nnSys(&fBufMgr);
3619
0
        XMLBuffer& normalizedSysId = nnSys.getBuffer();
3620
0
        XMLString::removeChar(loc, 0xFFFF, normalizedSysId);
3621
0
        const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
3622
3623
        // Create a buffer for expanding the system id
3624
0
        XMLBufBid bbSys(&fBufMgr);
3625
0
        XMLBuffer& expSysId = bbSys.getBuffer();
3626
3627
        //  Allow the entity handler to expand the system id if they choose
3628
        //  to do so.
3629
0
        InputSource* srcToFill = 0;
3630
0
        if (fEntityHandler)
3631
0
        {
3632
0
            if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
3633
0
                expSysId.set(normalizedURI);
3634
3635
0
            ReaderMgr::LastExtEntityInfo lastInfo;
3636
0
            fReaderMgr.getLastExtEntityInfo(lastInfo);
3637
0
            XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar,
3638
0
                            expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId,
3639
0
                            &fReaderMgr);
3640
0
            srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
3641
0
        }
3642
0
        else
3643
0
        {
3644
0
            expSysId.set(normalizedURI);
3645
0
        }
3646
3647
        //  If they didn't create a source via the entity handler, then we
3648
        //  have to create one on our own.
3649
0
        if (!srcToFill)
3650
0
        {
3651
0
            if (fDisableDefaultEntityResolution)
3652
0
                return;
3653
3654
0
            ReaderMgr::LastExtEntityInfo lastInfo;
3655
0
            fReaderMgr.getLastExtEntityInfo(lastInfo);
3656
3657
0
            XMLURL urlTmp(fMemoryManager);
3658
0
            if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
3659
0
                (urlTmp.isRelative()))
3660
0
            {
3661
0
                if (!fStandardUriConformant)
3662
0
                {
3663
0
                    XMLBufBid  ddSys(&fBufMgr);
3664
0
                    XMLBuffer& resolvedSysId = ddSys.getBuffer();
3665
0
                    XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
3666
3667
0
                    srcToFill = new (fMemoryManager) LocalFileInputSource
3668
0
                    (
3669
0
                        lastInfo.systemId
3670
0
                        , resolvedSysId.getRawBuffer()
3671
0
                        , fMemoryManager
3672
0
                    );
3673
0
                }
3674
0
                else
3675
0
                    ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3676
0
            }
3677
0
            else
3678
0
            {
3679
0
                if (fStandardUriConformant && urlTmp.hasInvalidChar())
3680
0
                    ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3681
3682
0
                srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
3683
0
            }
3684
0
        }
3685
3686
        // Put a janitor on the input source
3687
0
        Janitor<InputSource> janSrc(srcToFill);
3688
3689
        // Check if this exact schema has already been seen.
3690
        //
3691
0
        const XMLCh* sysId = srcToFill->getSystemId();
3692
0
        unsigned int uriId = (uri && *uri) ? fURIStringPool->addOrFind(uri) : fEmptyNamespaceId;
3693
0
        SchemaInfo* importSchemaInfo = 0;
3694
3695
0
        if (fUseCachedGrammar)
3696
0
          importSchemaInfo = fCachedSchemaInfoList->get(sysId, uriId);
3697
3698
0
        if (!importSchemaInfo && !fToCacheGrammar)
3699
0
          importSchemaInfo = fSchemaInfoList->get(sysId, uriId);
3700
3701
0
        if (importSchemaInfo)
3702
0
        {
3703
          // We haven't added any new grammars so it is safe to just
3704
          // return.
3705
          //
3706
0
          return;
3707
0
        }
3708
3709
        // Should just issue warning if the schema is not found
3710
0
        bool flag = srcToFill->getIssueFatalErrorIfNotFound();
3711
0
        srcToFill->setIssueFatalErrorIfNotFound(false);
3712
3713
0
        parser.parse(*srcToFill);
3714
3715
        // Reset the InputSource
3716
0
        srcToFill->setIssueFatalErrorIfNotFound(flag);
3717
3718
0
        if (parser.getSawFatal() && fExitOnFirstFatal)
3719
0
            emitError(XMLErrs::SchemaScanFatalError);
3720
3721
0
        DOMDocument* document = parser.getDocument(); //Our Grammar
3722
3723
0
        if (document != 0) {
3724
3725
0
            DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
3726
0
            if (root != 0)
3727
0
            {
3728
0
                const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
3729
0
                bool newGrammar = false;
3730
0
                if (!XMLString::equals(newUri, uri)) {
3731
0
                    if (fValidate || fValScheme == Val_Auto) {
3732
0
                        fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri);
3733
0
                    }
3734
3735
0
                    grammar = fGrammarResolver->getGrammar(newUri);
3736
0
                    newGrammar = true;
3737
0
                }
3738
3739
0
                if (!grammar ||
3740
0
                    grammar->getGrammarType() == Grammar::DTDGrammarType ||
3741
0
                    (getHandleMultipleImports() &&
3742
0
                     ((XMLSchemaDescription*) grammar->getGrammarDescription())->
3743
0
                     getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
3744
0
                {
3745
                    // If we switched namespace URI, recheck the schema info.
3746
                    //
3747
0
                    if (newGrammar)
3748
0
                    {
3749
0
                      unsigned int newUriId = (newUri && *newUri) ? fURIStringPool->addOrFind(newUri) : fEmptyNamespaceId;
3750
3751
0
                      if (fUseCachedGrammar)
3752
0
                        importSchemaInfo = fCachedSchemaInfoList->get(sysId, newUriId);
3753
3754
0
                      if (!importSchemaInfo && !fToCacheGrammar)
3755
0
                        importSchemaInfo = fSchemaInfoList->get(sysId, newUriId);
3756
3757
0
                      if (importSchemaInfo)
3758
0
                        return;
3759
0
                    }
3760
3761
                    //  Since we have seen a grammar, set our validation flag
3762
                    //  at this point if the validation scheme is auto
3763
0
                    if (fValScheme == Val_Auto && !fValidate) {
3764
0
                        fValidate = true;
3765
0
                        fElemStack.setValidationFlag(fValidate);
3766
0
                    }
3767
3768
0
                    bool grammarFound = grammar &&
3769
0
                      grammar->getGrammarType() == Grammar::SchemaGrammarType;
3770
3771
0
                    SchemaGrammar* schemaGrammar;
3772
3773
0
                    if (grammarFound) {
3774
0
                      schemaGrammar = (SchemaGrammar*) grammar;
3775
0
                    }
3776
0
                    else {
3777
0
                      schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
3778
0
                    }
3779
3780
0
                    XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
3781
3782
0
                    gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
3783
0
                    gramDesc->setLocationHints(sysId);
3784
3785
0
                    TraverseSchema traverseSchema
3786
0
                    (
3787
0
                        root
3788
0
                        , fURIStringPool
3789
0
                        , schemaGrammar
3790
0
                        , fGrammarResolver
3791
0
                        , fUseCachedGrammar ? fCachedSchemaInfoList : fSchemaInfoList
3792
0
                        , fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList
3793
0
                        , this
3794
0
                        , sysId
3795
0
                        , fEntityHandler
3796
0
                        , fErrorReporter
3797
0
                        , fMemoryManager
3798
0
                        , grammarFound
3799
0
                    );
3800
3801
                    // Reset the now invalid schema roots in the collected
3802
                    // schema info entries.
3803
                    //
3804
0
                    {
3805
0
                      RefHash2KeysTableOfEnumerator<SchemaInfo> i (
3806
0
                        fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList);
3807
3808
0
                      while (i.hasMoreElements ())
3809
0
                        i.nextElement().resetRoot ();
3810
0
                    }
3811
3812
0
                    if (fGrammarType == Grammar::DTDGrammarType) {
3813
0
                        fGrammar = schemaGrammar;
3814
0
                        fGrammarType = Grammar::SchemaGrammarType;
3815
0
                        fValidator->setGrammar(fGrammar);
3816
0
                    }
3817
3818
0
                    if (fValidate) {
3819
                        //  validate the Schema scan so far
3820
0
                        fValidator->preContentValidation(false);
3821
0
                    }
3822
0
                }
3823
0
            }
3824
0
        }
3825
0
      }
3826
0
    }
3827
0
    else
3828
0
    {
3829
        //  Since we have seen a grammar, set our validation flag
3830
        //  at this point if the validation scheme is auto
3831
0
        if (fValScheme == Val_Auto && !fValidate) {
3832
0
            fValidate = true;
3833
0
            fElemStack.setValidationFlag(fValidate);
3834
0
        }
3835
3836
        // we have seen a schema, so set up the fValidator as fSchemaValidator
3837
0
        if (fGrammarType == Grammar::DTDGrammarType) {
3838
0
            fGrammar = grammar;
3839
0
            fGrammarType = Grammar::SchemaGrammarType;
3840
0
            fValidator->setGrammar(fGrammar);
3841
0
        }
3842
0
    }
3843
    // update fModel; rely on the grammar resolver to do this
3844
    // efficiently
3845
0
    if(getPSVIHandler())
3846
0
        fModel = fGrammarResolver->getXSModel();
3847
0
}
3848
3849
InputSource* SGXMLScanner::resolveSystemId(const XMLCh* const sysId
3850
                                          ,const XMLCh* const pubId)
3851
0
{
3852
    //Normalize sysId
3853
0
    XMLBufBid nnSys(&fBufMgr);
3854
0
    XMLBuffer& normalizedSysId = nnSys.getBuffer();
3855
0
    XMLString::removeChar(sysId, 0xFFFF, normalizedSysId);
3856
0
    const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
3857
3858
    // Create a buffer for expanding the system id
3859
0
    XMLBufBid bbSys(&fBufMgr);
3860
0
    XMLBuffer& expSysId = bbSys.getBuffer();
3861
3862
    //  Allow the entity handler to expand the system id if they choose
3863
    //  to do so.
3864
0
    InputSource* srcToFill = 0;
3865
0
    if (fEntityHandler)
3866
0
    {
3867
0
        if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
3868
0
            expSysId.set(normalizedURI);
3869
3870
0
        ReaderMgr::LastExtEntityInfo lastInfo;
3871
0
        fReaderMgr.getLastExtEntityInfo(lastInfo);
3872
0
        XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
3873
0
                            expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId,
3874
0
                            &fReaderMgr);
3875
0
        srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
3876
0
    }
3877
0
    else
3878
0
    {
3879
0
        expSysId.set(normalizedURI);
3880
0
    }
3881
3882
    //  If they didn't create a source via the entity handler, then we
3883
    //  have to create one on our own.
3884
0
    if (!srcToFill)
3885
0
    {
3886
0
        if (fDisableDefaultEntityResolution)
3887
0
            return 0;
3888
3889
0
        ReaderMgr::LastExtEntityInfo lastInfo;
3890
0
        fReaderMgr.getLastExtEntityInfo(lastInfo);
3891
3892
0
        XMLURL urlTmp(fMemoryManager);
3893
0
        if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
3894
0
            (urlTmp.isRelative()))
3895
0
        {
3896
0
            if (!fStandardUriConformant)
3897
0
            {
3898
0
                XMLBufBid  ddSys(&fBufMgr);
3899
0
                XMLBuffer& resolvedSysId = ddSys.getBuffer();
3900
0
                XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
3901
3902
0
                srcToFill = new (fMemoryManager) LocalFileInputSource
3903
0
                (
3904
0
                    lastInfo.systemId
3905
0
                    , resolvedSysId.getRawBuffer()
3906
0
                    , fMemoryManager
3907
0
                );
3908
0
            }
3909
0
            else
3910
0
                ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3911
0
        }
3912
0
        else
3913
0
        {
3914
0
            if (fStandardUriConformant && urlTmp.hasInvalidChar())
3915
0
                ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
3916
0
            srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
3917
0
        }
3918
0
    }
3919
3920
0
    return srcToFill;
3921
0
}
3922
3923
3924
// ---------------------------------------------------------------------------
3925
//  SGXMLScanner: Private grammar preparsing methods
3926
// ---------------------------------------------------------------------------
3927
Grammar* SGXMLScanner::loadXMLSchemaGrammar(const InputSource& src,
3928
                                          const bool toCache)
3929
0
{
3930
   // Reset the validators
3931
0
    fSchemaValidator->reset();
3932
0
    fSchemaValidator->setErrorReporter(fErrorReporter);
3933
0
    fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
3934
0
    fSchemaValidator->setGrammarResolver(fGrammarResolver);
3935
3936
0
    if (fValidatorFromUser)
3937
0
        fValidator->reset();
3938
3939
0
    XSDDOMParser parser(0, fMemoryManager, 0);
3940
3941
0
    parser.setValidationScheme(XercesDOMParser::Val_Never);
3942
0
    parser.setDoNamespaces(true);
3943
0
    parser.setUserEntityHandler(fEntityHandler);
3944
0
    parser.setUserErrorReporter(fErrorReporter);
3945
3946
    // Should just issue warning if the schema is not found
3947
0
    bool flag = src.getIssueFatalErrorIfNotFound();
3948
0
    ((InputSource&) src).setIssueFatalErrorIfNotFound(false);
3949
3950
0
    parser.parse(src);
3951
3952
    // Reset the InputSource
3953
0
    ((InputSource&) src).setIssueFatalErrorIfNotFound(flag);
3954
3955
0
    if (parser.getSawFatal() && fExitOnFirstFatal)
3956
0
        emitError(XMLErrs::SchemaScanFatalError);
3957
3958
0
    DOMDocument* document = parser.getDocument(); //Our Grammar
3959
3960
0
    if (document != 0) {
3961
3962
0
        DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
3963
0
        if (root != 0)
3964
0
        {
3965
0
            const XMLCh* nsUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
3966
0
            Grammar* grammar = fGrammarResolver->getGrammar(nsUri);
3967
3968
            // Check if this exact schema has already been seen.
3969
            //
3970
0
            const XMLCh* sysId = src.getSystemId();
3971
0
            SchemaInfo* importSchemaInfo = 0;
3972
3973
0
            if (grammar)
3974
0
            {
3975
0
              if (nsUri && *nsUri)
3976
0
                importSchemaInfo = fCachedSchemaInfoList->get(sysId, fURIStringPool->addOrFind(nsUri));
3977
0
              else
3978
0
                importSchemaInfo = fCachedSchemaInfoList->get(sysId, fEmptyNamespaceId);
3979
0
            }
3980
3981
0
            if (!importSchemaInfo)
3982
0
            {
3983
0
              bool grammarFound = grammar &&
3984
0
                grammar->getGrammarType() == Grammar::SchemaGrammarType &&
3985
0
                getHandleMultipleImports();
3986
3987
0
              SchemaGrammar* schemaGrammar;
3988
3989
0
              if (grammarFound)
3990
0
                schemaGrammar = (SchemaGrammar*) grammar;
3991
0
              else
3992
0
                schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
3993
3994
0
              XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
3995
0
              gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
3996
0
              gramDesc->setLocationHints(sysId);
3997
3998
0
              TraverseSchema traverseSchema
3999
0
                (
4000
0
                  root
4001
0
                  , fURIStringPool
4002
0
                  , schemaGrammar
4003
0
                  , fGrammarResolver
4004
0
                  , fCachedSchemaInfoList
4005
0
                  , toCache ? fCachedSchemaInfoList : fSchemaInfoList
4006
0
                  , this
4007
0
                  , sysId
4008
0
                  , fEntityHandler
4009
0
                  , fErrorReporter
4010
0
                  , fMemoryManager
4011
0
                  , grammarFound
4012
0
                );
4013
4014
0
              grammar = schemaGrammar;
4015
4016
              // Reset the now invalid schema roots in the collected
4017
              // schema info entries.
4018
              //
4019
0
              {
4020
0
                RefHash2KeysTableOfEnumerator<SchemaInfo> i (
4021
0
                  toCache ? fCachedSchemaInfoList : fSchemaInfoList);
4022
4023
0
                while (i.hasMoreElements ())
4024
0
                  i.nextElement().resetRoot ();
4025
0
              }
4026
0
            }
4027
4028
0
            if (fValidate) {
4029
              //  validate the Schema scan so far
4030
0
              fValidator->setGrammar(grammar);
4031
0
              fValidator->preContentValidation(false);
4032
0
            }
4033
4034
0
            if (toCache) {
4035
0
              fGrammarResolver->cacheGrammars();
4036
0
            }
4037
4038
0
            if(getPSVIHandler())
4039
0
              fModel = fGrammarResolver->getXSModel();
4040
4041
0
            return grammar;
4042
0
        }
4043
0
    }
4044
4045
0
    return 0;
4046
0
}
4047
4048
4049
4050
// ---------------------------------------------------------------------------
4051
//  SGXMLScanner: Private parsing methods
4052
// ---------------------------------------------------------------------------
4053
4054
//  This method is called to do a raw scan of an attribute value. It does not
4055
//  do normalization (since we don't know their types yet.) It just scans the
4056
//  value and does entity expansion.
4057
//
4058
//  End of entity's must be dealt with here. During DTD scan, they can come
4059
//  from external entities. During content, they can come from any entity.
4060
//  We just eat the end of entity and continue with our scan until we come
4061
//  to the closing quote. If an unterminated value causes us to go through
4062
//  subsequent entities, that will cause errors back in the calling code,
4063
//  but there's little we can do about it here.
4064
bool SGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill)
4065
0
{
4066
    // Reset the target buffer
4067
0
    toFill.reset();
4068
4069
    // Get the next char which must be a single or double quote
4070
0
    XMLCh quoteCh;
4071
0
    if (!fReaderMgr.skipIfQuote(quoteCh))
4072
0
        return false;
4073
4074
    //  We have to get the current reader because we have to ignore closing
4075
    //  quotes until we hit the same reader again.
4076
0
    const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
4077
4078
    //  Loop until we get the attribute value. Note that we use a double
4079
    //  loop here to avoid the setup/teardown overhead of the exception
4080
    //  handler on every round.
4081
0
    while (true)
4082
0
    {
4083
0
        try
4084
0
        {
4085
0
            while(true)
4086
0
            {
4087
0
                XMLCh nextCh = fReaderMgr.getNextChar();
4088
4089
0
                if (nextCh != quoteCh)
4090
0
                {
4091
0
                    if (nextCh != chAmpersand)
4092
0
                    {
4093
0
                        if ((nextCh < 0xD800) || (nextCh > 0xDFFF))
4094
0
                        {
4095
                            // Its got to at least be a valid XML character
4096
0
                            if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
4097
0
                            {
4098
0
                                if (nextCh == 0)
4099
0
                                    ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
4100
4101
0
                                XMLCh tmpBuf[9];
4102
0
                                XMLString::binToText
4103
0
                                (
4104
0
                                    nextCh
4105
0
                                    , tmpBuf
4106
0
                                    , 8
4107
0
                                    , 16
4108
0
                                    , fMemoryManager
4109
0
                                );
4110
0
                                emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
4111
0
                            }
4112
0
                        } else // its a surrogate
4113
0
                        {
4114
                            // Deal with surrogate pairs
4115
4116
                            //  we expect a a leading surrogate.
4117
0
                            if (nextCh <= 0xDBFF)
4118
0
                            {
4119
0
                                toFill.append(nextCh);
4120
4121
                                //  process the trailing surrogate
4122
0
                                nextCh = fReaderMgr.getNextChar();
4123
4124
                                //  it should be a trailing surrogate.
4125
0
                                if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
4126
0
                                {
4127
0
                                    emitError(XMLErrs::Expected2ndSurrogateChar);
4128
0
                                }
4129
0
                            } else
4130
0
                            {
4131
                                //  Its a trailing surrogate, but we are not expecting it
4132
0
                                emitError(XMLErrs::Unexpected2ndSurrogateChar);
4133
0
                            }
4134
0
                        }
4135
0
                    } else // its a chAmpersand
4136
0
                    {
4137
                        //  Check for an entity ref . We ignore the empty flag in
4138
                        //  this one.
4139
4140
0
                        bool    escaped;
4141
0
                        XMLCh   firstCh;
4142
0
                        XMLCh   secondCh
4143
0
                            ;
4144
                        // If it was not returned directly, then jump back up
4145
0
                        if (scanEntityRef(true, firstCh, secondCh, escaped) == EntityExp_Returned)
4146
0
                        {
4147
                            //  If it was escaped, then put in a 0xFFFF value. This will
4148
                            //  be used later during validation and normalization of the
4149
                            //  value to know that the following character was via an
4150
                            //  escape char.
4151
0
                            if (escaped)
4152
0
                                toFill.append(0xFFFF);
4153
4154
0
                            toFill.append(firstCh);
4155
0
                            if (secondCh)
4156
0
                                toFill.append(secondCh);
4157
0
                        }
4158
0
                        continue;
4159
0
                    }
4160
0
                } else // its a quoteCh
4161
0
                {
4162
                    //  Check for our ending quote. It has to be in the same entity
4163
                    //  as where we started. Quotes in nested entities are ignored.
4164
4165
0
                    if (curReader == fReaderMgr.getCurrentReaderNum())
4166
0
                    {
4167
0
                        return true;
4168
0
                    }
4169
4170
                    // Watch for spillover into a previous entity
4171
0
                    if (curReader > fReaderMgr.getCurrentReaderNum())
4172
0
                    {
4173
0
                        emitError(XMLErrs::PartialMarkupInEntity);
4174
0
                        return false;
4175
0
                    }
4176
0
                }
4177
4178
                // add it to the buffer
4179
0
                toFill.append(nextCh);
4180
4181
0
            }
4182
0
        }
4183
0
        catch(const EndOfEntityException&)
4184
0
        {
4185
            // Just eat it and continue.
4186
0
        }
4187
0
    }
4188
0
    return true;
4189
0
}
4190
4191
4192
//  This method scans a CDATA section. It collects the character into one
4193
//  of the temp buffers and calls the document handler, if any, with the
4194
//  characters. It assumes that the <![CDATA string has been scanned before
4195
//  this call.
4196
void SGXMLScanner::scanCDSection()
4197
0
{
4198
0
    static const XMLCh CDataClose[] =
4199
0
    {
4200
0
            chCloseSquare, chCloseAngle, chNull
4201
0
    };
4202
4203
    //  The next character should be the opening square bracket. If not
4204
    //  issue an error, but then try to recover by skipping any whitespace
4205
    //  and checking again.
4206
0
    if (!fReaderMgr.skippedChar(chOpenSquare))
4207
0
    {
4208
0
        emitError(XMLErrs::ExpectedOpenSquareBracket);
4209
0
        fReaderMgr.skipPastSpaces();
4210
4211
        // If we still don't find it, then give up, else keep going
4212
0
        if (!fReaderMgr.skippedChar(chOpenSquare))
4213
0
            return;
4214
0
    }
4215
4216
    // Get a buffer for this
4217
0
    XMLBufBid bbCData(&fBufMgr);
4218
4219
    //  We just scan forward until we hit the end of CDATA section sequence.
4220
    //  CDATA is effectively a big escape mechanism so we don't treat markup
4221
    //  characters specially here.
4222
0
    bool            emittedError = false;
4223
0
    bool    gotLeadingSurrogate = false;
4224
4225
    // Get the character data opts for the current element
4226
0
    XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
4227
    // And see if the current element is a 'Children' style content model
4228
0
    ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
4229
0
    if(currType)
4230
0
    {
4231
0
        SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
4232
0
        if(modelType == SchemaElementDecl::Children ||
4233
0
           modelType == SchemaElementDecl::ElementOnlyEmpty)
4234
0
            charOpts = XMLElementDecl::SpacesOk;
4235
0
        else if(modelType == SchemaElementDecl::Empty)
4236
0
            charOpts = XMLElementDecl::NoCharData;
4237
0
    }
4238
4239
    // should not be necessary when PSVI on element decl removed
4240
0
    const ElemStack::StackElem* topElem = fElemStack.topElement();
4241
4242
0
    while (true)
4243
0
    {
4244
0
        const XMLCh nextCh = fReaderMgr.getNextChar();
4245
4246
        // Watch for unexpected end of file
4247
0
        if (!nextCh)
4248
0
        {
4249
0
            emitError(XMLErrs::UnterminatedCDATASection);
4250
0
            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
4251
0
        }
4252
4253
0
        if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
4254
0
        {
4255
            // This document is standalone; this ignorable CDATA whitespace is forbidden.
4256
            // XML 1.0, Section 2.9
4257
            // And see if the current element is a 'Children' style content model
4258
0
            if (topElem->fThisElement->isExternal()) {
4259
4260
0
                if (charOpts == XMLElementDecl::SpacesOk) // Element Content
4261
0
                {
4262
                    // Error - standalone should have a value of "no" as whitespace detected in an
4263
                    // element type with element content whose element declaration was external
4264
0
                    fValidator->emitError(XMLValid::NoWSForStandalone);
4265
0
                    if (getPSVIHandler())
4266
0
                    {
4267
                        // REVISIT:
4268
                        // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
4269
0
                    }
4270
0
                }
4271
0
            }
4272
0
        }
4273
4274
        //  If this is a close square bracket it could be our closing
4275
        //  sequence.
4276
0
        if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
4277
0
        {
4278
            //  make sure we were not expecting a trailing surrogate.
4279
0
            if (gotLeadingSurrogate) {
4280
0
                emitError(XMLErrs::Expected2ndSurrogateChar);
4281
0
            }
4282
4283
0
            XMLSize_t xsLen = bbCData.getLen();
4284
0
            const XMLCh* xsNormalized = bbCData.getRawBuffer();
4285
0
            if (fValidate) {
4286
4287
0
                DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
4288
0
                if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
4289
0
                {
4290
                    // normalize the character according to schema whitespace facet
4291
0
                    ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf);
4292
0
                    xsNormalized = fWSNormalizeBuf.getRawBuffer();
4293
0
                    xsLen = fWSNormalizeBuf.getLen();
4294
0
                }
4295
4296
                // tell the schema validation about the character data for checkContent later
4297
0
                ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
4298
4299
0
                if (charOpts != XMLElementDecl::AllCharData)
4300
0
                {
4301
                    // They definitely cannot handle any type of char data
4302
0
                    fValidator->emitError(XMLValid::NoCharDataInCM);
4303
0
                    if (getPSVIHandler())
4304
0
                    {
4305
                        // REVISIT:
4306
                        // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
4307
0
                    }
4308
0
                }
4309
0
            }
4310
4311
            // call all active identity constraints
4312
0
            if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
4313
0
                fContent.append(xsNormalized, xsLen);
4314
0
            }
4315
4316
            // If we have a doc handler, call it
4317
0
            if (fDocHandler)
4318
0
            {
4319
0
                if (fNormalizeData) {
4320
0
                    fDocHandler->docCharacters(xsNormalized, xsLen, true);
4321
0
                }
4322
0
                else {
4323
0
                    fDocHandler->docCharacters(
4324
0
                        bbCData.getRawBuffer(), bbCData.getLen(), true
4325
0
                    );
4326
0
                }
4327
0
            }
4328
4329
            // And we are done
4330
0
            break;
4331
0
        }
4332
4333
        //  Make sure its a valid character. But if we've emitted an error
4334
        //  already, don't bother with the overhead since we've already told
4335
        //  them about it.
4336
0
        if (!emittedError)
4337
0
        {
4338
            // Deal with surrogate pairs
4339
0
            if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
4340
0
            {
4341
                //  Its a leading surrogate. If we already got one, then
4342
                //  issue an error, else set leading flag to make sure that
4343
                //  we look for a trailing next time.
4344
0
                if (gotLeadingSurrogate)
4345
0
                    emitError(XMLErrs::Expected2ndSurrogateChar);
4346
0
                else
4347
0
                    gotLeadingSurrogate = true;
4348
0
            }
4349
0
            else
4350
0
            {
4351
                //  If its a trailing surrogate, make sure that we are
4352
                //  prepared for that. Else, its just a regular char so make
4353
                //  sure that we were not expected a trailing surrogate.
4354
0
                if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
4355
0
                {
4356
                    // Its trailing, so make sure we were expecting it
4357
0
                    if (!gotLeadingSurrogate)
4358
0
                        emitError(XMLErrs::Unexpected2ndSurrogateChar);
4359
0
                }
4360
0
                else
4361
0
                {
4362
                    //  Its just a char, so make sure we were not expecting a
4363
                    //  trailing surrogate.
4364
0
                    if (gotLeadingSurrogate)
4365
0
                        emitError(XMLErrs::Expected2ndSurrogateChar);
4366
4367
                    // Its got to at least be a valid XML character
4368
0
                    else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
4369
0
                    {
4370
0
                        XMLCh tmpBuf[9];
4371
0
                        XMLString::binToText
4372
0
                        (
4373
0
                            nextCh
4374
0
                            , tmpBuf
4375
0
                            , 8
4376
0
                            , 16
4377
0
                            , fMemoryManager
4378
0
                        );
4379
0
                        emitError(XMLErrs::InvalidCharacter, tmpBuf);
4380
0
                        emittedError = true;
4381
0
                    }
4382
0
                }
4383
0
                gotLeadingSurrogate = false;
4384
0
            }
4385
0
        }
4386
4387
        // Add it to the buffer
4388
0
        bbCData.append(nextCh);
4389
0
    }
4390
0
}
4391
4392
4393
void SGXMLScanner::scanCharData(XMLBuffer& toUse)
4394
0
{
4395
    //  We have to watch for the stupid ]]> sequence, which is illegal in
4396
    //  character data. So this is a little state machine that handles that.
4397
0
    enum States
4398
0
    {
4399
0
        State_Waiting
4400
0
        , State_GotOne
4401
0
        , State_GotTwo
4402
0
    };
4403
4404
    // Reset the buffer before we start
4405
0
    toUse.reset();
4406
4407
    // Turn on the 'throw at end' flag of the reader manager
4408
0
    ThrowEOEJanitor jan(&fReaderMgr, true);
4409
4410
    //  In order to be more efficient we have to use kind of a deeply nested
4411
    //  set of blocks here. The outer block puts on a try and catches end of
4412
    //  entity exceptions. The inner loop is the per-character loop. If we
4413
    //  put the try inside the inner loop, it would work but would require
4414
    //  the exception handling code setup/teardown code to be invoked for
4415
    //  each character.
4416
0
    XMLCh   nextCh;
4417
0
    XMLCh   secondCh = 0;
4418
0
    States  curState = State_Waiting;
4419
0
    bool    escaped = false;
4420
0
    bool    gotLeadingSurrogate = false;
4421
0
    bool    notDone = true;
4422
0
    while (notDone)
4423
0
    {
4424
0
        try
4425
0
        {
4426
0
            while (true)
4427
0
            {
4428
                //  Eat through as many plain content characters as possible without
4429
                //  needing special handling.  Moving most content characters here,
4430
                //  in this one call, rather than running the overall loop once
4431
                //  per content character, is a speed optimization.
4432
0
                if (curState == State_Waiting  &&  !gotLeadingSurrogate)
4433
0
                {
4434
0
                     fReaderMgr.movePlainContentChars(toUse);
4435
0
                }
4436
4437
                // Try to get another char from the source
4438
                //   The code from here on down covers all contengencies,
4439
0
                if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
4440
0
                {
4441
                    // If we were waiting for a trailing surrogate, its an error
4442
0
                    if (gotLeadingSurrogate)
4443
0
                        emitError(XMLErrs::Expected2ndSurrogateChar);
4444
4445
0
                    notDone = false;
4446
0
                    break;
4447
0
                }
4448
4449
                //  Watch for a reference. Note that the escapement mechanism
4450
                //  is ignored in this content.
4451
0
                escaped = false;
4452
0
                if (nextCh == chAmpersand)
4453
0
                {
4454
0
                    sendCharData(toUse);
4455
4456
                    // Turn off the throwing at the end of entity during this
4457
0
                    ThrowEOEJanitor jan(&fReaderMgr, false);
4458
4459
0
                    if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
4460
0
                    {
4461
0
                        gotLeadingSurrogate = false;
4462
0
                        continue;
4463
0
                    }
4464
0
                }
4465
0
                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
4466
0
                {
4467
                    // Deal with surrogate pairs
4468
                    //  Its a leading surrogate. If we already got one, then
4469
                    //  issue an error, else set leading flag to make sure that
4470
                    //  we look for a trailing next time.
4471
0
                    if (gotLeadingSurrogate)
4472
0
                        emitError(XMLErrs::Expected2ndSurrogateChar);
4473
0
                    else
4474
0
                        gotLeadingSurrogate = true;
4475
0
                }
4476
0
                else
4477
0
                {
4478
                    //  If its a trailing surrogate, make sure that we are
4479
                    //  prepared for that. Else, its just a regular char so make
4480
                    //  sure that we were not expected a trailing surrogate.
4481
0
                    if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
4482
0
                    {
4483
                        // Its trailing, so make sure we were expecting it
4484
0
                        if (!gotLeadingSurrogate)
4485
0
                            emitError(XMLErrs::Unexpected2ndSurrogateChar);
4486
0
                    }
4487
0
                    else
4488
0
                    {
4489
                        //  Its just a char, so make sure we were not expecting a
4490
                        //  trailing surrogate.
4491
0
                        if (gotLeadingSurrogate)
4492
0
                            emitError(XMLErrs::Expected2ndSurrogateChar);
4493
4494
                        // Make sure the returned char is a valid XML char
4495
0
                        if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
4496
0
                        {
4497
0
                            XMLCh tmpBuf[9];
4498
0
                            XMLString::binToText
4499
0
                            (
4500
0
                                nextCh
4501
0
                                , tmpBuf
4502
0
                                , 8
4503
0
                                , 16
4504
0
                                , fMemoryManager
4505
0
                            );
4506
0
                            emitError(XMLErrs::InvalidCharacter, tmpBuf);
4507
0
                        }
4508
0
                    }
4509
0
                    gotLeadingSurrogate = false;
4510
0
                }
4511
4512
                // Keep the state machine up to date
4513
0
                if (!escaped)
4514
0
                {
4515
0
                    if (nextCh == chCloseSquare)
4516
0
                    {
4517
0
                        if (curState == State_Waiting)
4518
0
                            curState = State_GotOne;
4519
0
                        else if (curState == State_GotOne)
4520
0
                            curState = State_GotTwo;
4521
0
                    }
4522
0
                    else if (nextCh == chCloseAngle)
4523
0
                    {
4524
0
                        if (curState == State_GotTwo)
4525
0
                            emitError(XMLErrs::BadSequenceInCharData);
4526
0
                        curState = State_Waiting;
4527
0
                    }
4528
0
                    else
4529
0
                    {
4530
0
                        curState = State_Waiting;
4531
0
                    }
4532
0
                }
4533
0
                else
4534
0
                {
4535
0
                    curState = State_Waiting;
4536
0
                }
4537
4538
                // Add this char to the buffer
4539
0
                toUse.append(nextCh);
4540
4541
0
                if (secondCh)
4542
0
                {
4543
0
                    toUse.append(secondCh);
4544
0
                    secondCh=0;
4545
0
                }
4546
0
            }
4547
0
        }
4548
0
        catch(const EndOfEntityException& toCatch)
4549
0
        {
4550
            //  Some entity ended, so we have to send any accumulated
4551
            //  chars and send an end of entity event.
4552
0
            sendCharData(toUse);
4553
0
            gotLeadingSurrogate = false;
4554
4555
0
            if (fDocHandler)
4556
0
                fDocHandler->endEntityReference(toCatch.getEntity());
4557
0
        }
4558
0
    }
4559
4560
    // Check the validity constraints as per XML 1.0 Section 2.9
4561
0
    if (fValidate && fStandalone)
4562
0
    {
4563
        // See if the text contains whitespace
4564
        // Get the raw data we need for the callback
4565
0
        const XMLCh* rawBuf = toUse.getRawBuffer();
4566
0
        const XMLSize_t len = toUse.getLen();
4567
0
        const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
4568
4569
0
        if (isSpaces)
4570
0
        {
4571
            // And see if the current element is a 'Children' style content model
4572
0
            const ElemStack::StackElem* topElem = fElemStack.topElement();
4573
4574
0
            if (topElem->fThisElement->isExternal()) {
4575
4576
                // Get the character data opts for the current element
4577
0
                XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
4578
                // And see if the current element is a 'Children' style content model
4579
0
                ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
4580
0
                if(currType)
4581
0
                {
4582
0
                    SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
4583
0
                    if(modelType == SchemaElementDecl::Children ||
4584
0
                       modelType == SchemaElementDecl::ElementOnlyEmpty)
4585
0
                        charOpts = XMLElementDecl::SpacesOk;
4586
0
                }
4587
4588
0
                if (charOpts == XMLElementDecl::SpacesOk)  // => Element Content
4589
0
                {
4590
                    // Error - standalone should have a value of "no" as whitespace detected in an
4591
                    // element type with element content whose element declaration was external
4592
                    //
4593
0
                    fValidator->emitError(XMLValid::NoWSForStandalone);
4594
0
                    if (getPSVIHandler())
4595
0
                    {
4596
                        // REVISIT:
4597
                        // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
4598
0
                    }
4599
0
                }
4600
0
            }
4601
0
        }
4602
0
    }
4603
    // Send any char data that we accumulated into the buffer
4604
0
    sendCharData(toUse);
4605
0
}
4606
4607
4608
//  This method will scan a general/character entity ref. It will either
4609
//  expand a char ref and return it directly, or push a reader for a general
4610
//  entity.
4611
//
4612
//  The return value indicates whether the char parameters hold the value
4613
//  or whether the value was pushed as a reader, or that it failed.
4614
//
4615
//  The escaped flag tells the caller whether the returned parameter resulted
4616
//  from a character reference, which escapes the character in some cases. It
4617
//  only makes any difference if the return value indicates the value was
4618
//  returned directly.
4619
SGXMLScanner::EntityExpRes
4620
SGXMLScanner::scanEntityRef(  const   bool
4621
                            ,       XMLCh&  firstCh
4622
                            ,       XMLCh&  secondCh
4623
                            ,       bool&   escaped)
4624
0
{
4625
    // Assume no escape
4626
0
    secondCh = 0;
4627
0
    escaped = false;
4628
4629
    // We have to insure that its all in one entity
4630
0
    const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
4631
4632
    //  If the next char is a pound, then its a character reference and we
4633
    //  need to expand it always.
4634
0
    if (fReaderMgr.skippedChar(chPound))
4635
0
    {
4636
        //  Its a character reference, so scan it and get back the numeric
4637
        //  value it represents.
4638
0
        if (!scanCharRef(firstCh, secondCh))
4639
0
            return EntityExp_Failed;
4640
4641
0
        escaped = true;
4642
4643
0
        if (curReader != fReaderMgr.getCurrentReaderNum())
4644
0
            emitError(XMLErrs::PartialMarkupInEntity);
4645
4646
0
        return EntityExp_Returned;
4647
0
    }
4648
4649
    // Expand it since its a normal entity ref
4650
0
    XMLBufBid bbName(&fBufMgr);
4651
0
    int colonPosition;
4652
0
    if (!fReaderMgr.getQName(bbName.getBuffer(), &colonPosition))
4653
0
    {
4654
0
        if (bbName.isEmpty())
4655
0
            emitError(XMLErrs::ExpectedEntityRefName);
4656
0
        else
4657
0
            emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer());
4658
0
        return EntityExp_Failed;
4659
0
    }
4660
4661
    //  Next char must be a semi-colon. But if its not, just emit
4662
    //  an error and try to continue.
4663
0
    if (!fReaderMgr.skippedChar(chSemiColon))
4664
0
        emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
4665
4666
    // Make sure we ended up on the same entity reader as the & char
4667
0
    if (curReader != fReaderMgr.getCurrentReaderNum())
4668
0
        emitError(XMLErrs::PartialMarkupInEntity);
4669
4670
    // Look up the name in the general entity pool
4671
    // If it does not exist, then obviously an error
4672
0
    if (!fEntityTable->containsKey(bbName.getRawBuffer()))
4673
0
    {
4674
        // XML 1.0 Section 4.1
4675
        // Well-formedness Constraint for entity not found:
4676
        //   In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
4677
        //      or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
4678
        //      or a parameter entity
4679
0
        if (fStandalone || fHasNoDTD)
4680
0
            emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
4681
4682
0
        return EntityExp_Failed;
4683
0
    }
4684
4685
    // here's where we need to check if there's a SecurityManager,
4686
    // how many entity references we've had
4687
0
    if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
4688
0
        XMLCh expLimStr[32];
4689
0
        XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
4690
0
        emitError
4691
0
        (
4692
0
            XMLErrs::EntityExpansionLimitExceeded
4693
0
            , expLimStr
4694
0
        );
4695
        // there seems nothing better to be done than to reset the entity expansion limit
4696
0
        fEntityExpansionCount = 0;
4697
0
    }
4698
4699
0
    firstCh = fEntityTable->get(bbName.getRawBuffer());
4700
0
    escaped = true;
4701
0
    return EntityExp_Returned;
4702
0
}
4703
4704
4705
bool SGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace)
4706
0
{
4707
0
    Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace);
4708
4709
0
    if (!tempGrammar) {
4710
0
        tempGrammar = fSchemaGrammar;
4711
0
    }
4712
4713
0
    if (!tempGrammar)
4714
0
        return false;
4715
0
    else {
4716
0
        fGrammar = tempGrammar;
4717
0
        fGrammarType = fGrammar->getGrammarType();
4718
0
        if (fGrammarType == Grammar::DTDGrammarType) {
4719
0
            ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
4720
0
        }
4721
4722
0
        fValidator->setGrammar(fGrammar);
4723
0
        return true;
4724
0
    }
4725
0
}
4726
4727
// check if we should skip or lax the validation of the element
4728
// if skip - no validation
4729
// if lax - validate only if the element if found
4730
bool SGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
4731
                                        const XMLContentModel* const cm,
4732
                                        const XMLSize_t parentElemDepth)
4733
0
{
4734
0
    bool skipThisOne = false;
4735
0
    bool laxThisOne = false;
4736
0
    unsigned int elementURI = element->getURI();
4737
0
    unsigned int currState = fElemState[parentElemDepth];
4738
0
    unsigned int currLoop = fElemLoopState[parentElemDepth];
4739
4740
0
    if (currState == XMLContentModel::gInvalidTrans) {
4741
0
        return laxThisOne;
4742
0
    }
4743
4744
0
    SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool);
4745
4746
0
    if (cv) {
4747
0
        XMLSize_t i = 0;
4748
0
        XMLSize_t leafCount = cv->getLeafCount();
4749
0
        unsigned int nextState = 0;
4750
4751
0
        for (; i < leafCount; i++) {
4752
4753
0
            QName* fElemMap = cv->getLeafNameAt(i);
4754
0
            unsigned int uri = fElemMap->getURI();
4755
0
            ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
4756
4757
0
            if (type == ContentSpecNode::Leaf) {
4758
0
                if (((uri == elementURI)
4759
0
                      && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart()))
4760
0
                    || comparator.isEquivalentTo(element, fElemMap)) {
4761
4762
0
                    nextState = cm->getNextState(currState, i);
4763
4764
0
                    if (nextState != XMLContentModel::gInvalidTrans)
4765
0
                        break;
4766
0
                }
4767
0
            } else if ((type & 0x0f) == ContentSpecNode::Any) {
4768
0
                nextState = cm->getNextState(currState, i);
4769
0
                if (nextState != XMLContentModel::gInvalidTrans)
4770
0
                    break;
4771
0
            }
4772
0
            else if ((type & 0x0f) == ContentSpecNode::Any_Other) {
4773
0
                if (uri != elementURI && elementURI != fEmptyNamespaceId) {
4774
0
                    nextState = cm->getNextState(currState, i);
4775
0
                    if (nextState != XMLContentModel::gInvalidTrans)
4776
0
                        break;
4777
0
                }
4778
0
            }
4779
0
            else if ((type & 0x0f) == ContentSpecNode::Any_NS) {
4780
0
                if (uri == elementURI) {
4781
0
                    nextState = cm->getNextState(currState, i);
4782
0
                    if (nextState != XMLContentModel::gInvalidTrans)
4783
0
                        break;
4784
0
                }
4785
0
            }
4786
4787
0
        } // for
4788
4789
0
        if (i == leafCount) { // no match
4790
0
            fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
4791
0
            fElemLoopState[parentElemDepth] = 0;
4792
0
            return laxThisOne;
4793
0
        }
4794
4795
0
        ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
4796
0
        if ((type & 0x0f) == ContentSpecNode::Any ||
4797
0
            (type & 0x0f) == ContentSpecNode::Any_Other ||
4798
0
            (type & 0x0f) == ContentSpecNode::Any_NS)
4799
0
        {
4800
0
            if (type == ContentSpecNode::Any_Skip ||
4801
0
                type == ContentSpecNode::Any_NS_Skip ||
4802
0
                type == ContentSpecNode::Any_Other_Skip) {
4803
0
                skipThisOne = true;
4804
0
            }
4805
0
            else if (type == ContentSpecNode::Any_Lax ||
4806
0
                     type == ContentSpecNode::Any_NS_Lax ||
4807
0
                     type == ContentSpecNode::Any_Other_Lax) {
4808
0
                laxThisOne = true;
4809
0
            }
4810
0
        }
4811
0
        fElemState[parentElemDepth] = nextState;
4812
0
        fElemLoopState[parentElemDepth] = currLoop;
4813
0
    } // if
4814
4815
0
    if (skipThisOne) {
4816
0
        fValidate = false;
4817
0
        fElemStack.setValidationFlag(fValidate);
4818
0
    }
4819
4820
0
    return laxThisOne;
4821
0
}
4822
4823
4824
// check if there is an AnyAttribute, and if so, see if we should lax or skip
4825
// if skip - no validation
4826
// if lax - validate only if the attribute if found
4827
bool SGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne)
4828
0
{
4829
0
    XMLAttDef::AttTypes wildCardType = attWildCard->getType();
4830
0
    bool anyEncountered = false;
4831
0
    skipThisOne = false;
4832
0
    laxThisOne = false;
4833
0
    if (wildCardType == XMLAttDef::Any_Any)
4834
0
        anyEncountered = true;
4835
0
    else if (wildCardType == XMLAttDef::Any_Other) {
4836
0
        if (attWildCard->getAttName()->getURI() != uriId
4837
0
            && uriId != fEmptyNamespaceId)
4838
0
            anyEncountered = true;
4839
0
    }
4840
0
    else if (wildCardType == XMLAttDef::Any_List) {
4841
0
        ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList();
4842
0
        XMLSize_t listSize = (nameURIList) ? nameURIList->size() : 0;
4843
4844
0
        if (listSize) {
4845
0
            for (XMLSize_t i=0; i < listSize; i++) {
4846
0
                if (nameURIList->elementAt(i) == uriId)
4847
0
                    anyEncountered = true;
4848
0
            }
4849
0
        }
4850
0
    }
4851
4852
0
    if (anyEncountered) {
4853
0
        XMLAttDef::DefAttTypes   defType   = attWildCard->getDefaultType();
4854
0
        if (defType == XMLAttDef::ProcessContents_Skip) {
4855
            // attribute should just be bypassed,
4856
0
            skipThisOne = true;
4857
0
            if (getPSVIHandler())
4858
0
            {
4859
                // REVISIT:
4860
                // PSVIAttribute->setValidationAttempted(PSVIItem::VALIDATION_NONE);
4861
0
            }
4862
0
        }
4863
0
        else if (defType == XMLAttDef::ProcessContents_Lax) {
4864
0
            laxThisOne = true;
4865
0
        }
4866
0
    }
4867
4868
0
    return anyEncountered;
4869
0
}
4870
4871
inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl)
4872
0
{
4873
0
    if (currType)
4874
0
        return currType->getAttDefList();
4875
0
    else
4876
0
        return elemDecl->getAttDefList();
4877
0
}
4878
4879
void SGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl,
4880
                                  DatatypeValidator* const memberDV)
4881
0
{
4882
0
    PSVIElement::ASSESSMENT_TYPE validationAttempted;
4883
0
    PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN;
4884
4885
0
    if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth)
4886
0
        validationAttempted = PSVIElement::VALIDATION_FULL;
4887
0
    else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth)
4888
0
        validationAttempted = PSVIElement::VALIDATION_NONE;
4889
0
    else
4890
0
    {
4891
0
        validationAttempted  = PSVIElement::VALIDATION_PARTIAL;
4892
0
    fPSVIElemContext.fFullValidationDepth =
4893
0
            fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1;
4894
0
    }
4895
4896
0
    if (fValidate && elemDecl->isDeclared())
4897
0
    {
4898
0
        validity = (fPSVIElemContext.fErrorOccurred)
4899
0
            ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID;
4900
0
    }
4901
4902
0
    XSTypeDefinition* typeDef = 0;
4903
0
    bool isMixed = false;
4904
0
    if (fPSVIElemContext.fCurrentTypeInfo)
4905
0
    {
4906
0
        typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo);
4907
0
        SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType();
4908
0
        isMixed = (modelType == SchemaElementDecl::Mixed_Simple
4909
0
                || modelType == SchemaElementDecl::Mixed_Complex);
4910
0
    }
4911
0
    else if (fPSVIElemContext.fCurrentDV)
4912
0
        typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV);
4913
4914
0
    XMLCh* canonicalValue = 0;
4915
0
    if (fPSVIElemContext.fNormalizedValue && !isMixed &&
4916
0
            validity == PSVIElement::VALIDITY_VALID)
4917
0
    {
4918
0
        if (memberDV)
4919
0
            canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
4920
0
        else if (fPSVIElemContext.fCurrentDV)
4921
0
            canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
4922
0
    }
4923
4924
0
    fPSVIElement->reset
4925
0
    (
4926
0
        validity
4927
0
        , validationAttempted
4928
0
        , fRootElemName
4929
0
        , fPSVIElemContext.fIsSpecified
4930
0
        , (elemDecl->isDeclared())
4931
0
            ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
4932
0
        , typeDef
4933
0
        , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0
4934
0
        , fModel
4935
0
        , elemDecl->getDefaultValue()
4936
0
        , fPSVIElemContext.fNormalizedValue
4937
0
        , canonicalValue
4938
0
    );
4939
4940
0
    fPSVIHandler->handleElementPSVI
4941
0
    (
4942
0
        elemDecl->getBaseName()
4943
0
        , fURIStringPool->getValueForId(elemDecl->getURI())
4944
0
        , fPSVIElement
4945
0
    );
4946
4947
    // decrease element depth
4948
0
    fPSVIElemContext.fElemDepth--;
4949
4950
0
}
4951
4952
void SGXMLScanner::resetPSVIElemContext()
4953
0
{
4954
0
    fPSVIElemContext.fIsSpecified = false;
4955
0
    fPSVIElemContext.fErrorOccurred = false;
4956
0
    fPSVIElemContext.fElemDepth = -1;
4957
0
    fPSVIElemContext.fFullValidationDepth = -1;
4958
0
    fPSVIElemContext.fNoneValidationDepth = -1;
4959
0
    fPSVIElemContext.fCurrentDV = 0;
4960
0
    fPSVIElemContext.fCurrentTypeInfo = 0;
4961
0
    fPSVIElemContext.fNormalizedValue = 0;
4962
0
}
4963
4964
XERCES_CPP_NAMESPACE_END