Coverage Report

Created: 2026-06-13 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/xerces-c/src/xercesc/internal/IGXMLScanner2.cpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
18
/*
19
 * $Id: IGXMLScanner2.cpp 1799520 2017-06-21 21:34:26Z scantor $
20
 */
21
22
// ---------------------------------------------------------------------------
23
//  This file holds some of the grunt work methods of IGXMLScanner.cpp to keep
24
//  it a little more readable.
25
// ---------------------------------------------------------------------------
26
27
// ---------------------------------------------------------------------------
28
//  Includes
29
// ---------------------------------------------------------------------------
30
#include <xercesc/internal/IGXMLScanner.hpp>
31
#include <xercesc/internal/EndOfEntityException.hpp>
32
#include <xercesc/util/UnexpectedEOFException.hpp>
33
#include <xercesc/util/XMLUri.hpp>
34
#include <xercesc/framework/LocalFileInputSource.hpp>
35
#include <xercesc/framework/URLInputSource.hpp>
36
#include <xercesc/framework/XMLDocumentHandler.hpp>
37
#include <xercesc/framework/XMLEntityHandler.hpp>
38
#include <xercesc/framework/XMLPScanToken.hpp>
39
#include <xercesc/framework/XMLRefInfo.hpp>
40
#include <xercesc/framework/XMLGrammarPool.hpp>
41
#include <xercesc/framework/psvi/PSVIAttributeList.hpp>
42
#include <xercesc/framework/psvi/PSVIElement.hpp>
43
#include <xercesc/framework/psvi/XSAnnotation.hpp>
44
#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
45
#include <xercesc/validators/DTD/DTDGrammar.hpp>
46
#include <xercesc/validators/DTD/DTDValidator.hpp>
47
#include <xercesc/validators/DTD/XMLDTDDescriptionImpl.hpp>
48
#include <xercesc/validators/datatype/DatatypeValidator.hpp>
49
#include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp>
50
#include <xercesc/validators/schema/SchemaGrammar.hpp>
51
#include <xercesc/validators/schema/SchemaValidator.hpp>
52
#include <xercesc/validators/schema/TraverseSchema.hpp>
53
#include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
54
#include <xercesc/validators/schema/XSDDOMParser.hpp>
55
#include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
56
#include <xercesc/validators/schema/identity/ValueStore.hpp>
57
#include <xercesc/util/XMLStringTokenizer.hpp>
58
59
XERCES_CPP_NAMESPACE_BEGIN
60
61
inline XMLAttDefList& getAttDefList(bool              isSchemaGrammar
62
                                  , ComplexTypeInfo*  currType
63
                                  , XMLElementDecl*   elemDecl);
64
65
// ---------------------------------------------------------------------------
66
//  IGXMLScanner: Private helper methods
67
// ---------------------------------------------------------------------------
68
69
//  This method is called from scanStartTagNS() to build up the list of
70
//  XMLAttr objects that will be passed out in the start tag callout. We
71
//  get the key/value pairs from the raw scan of explicitly provided attrs,
72
//  which have not been normalized. And we get the element declaration from
73
//  which we will get any defaulted or fixed attribute defs and add those
74
//  in as well.
75
XMLSize_t
76
IGXMLScanner::buildAttList(const  RefVectorOf<KVStringPair>&  providedAttrs
77
                          , const XMLSize_t                   attCount
78
                          ,       XMLElementDecl*             elemDecl
79
                          ,       RefVectorOf<XMLAttr>&       toFill)
80
0
{
81
    //  If doing DTD's, Ask the element to clear the 'provided' flag on all of the att defs
82
    //  that it owns, and to return us a boolean indicating whether it has
83
    //  any defs.  If schemas are being validated, the complexType
84
    // at the top of the SchemaValidator's stack will
85
    // know what's best.  REVISIT:  don't modify grammar at all; eliminate
86
    // this step...
87
0
    ComplexTypeInfo *currType = 0;
88
0
    DatatypeValidator *currDV = 0;
89
0
    if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType && fValidate)
90
0
    {
91
0
        currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
92
0
        if (!currType) {
93
0
            currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator();
94
0
        }
95
0
    }
96
97
0
    const bool hasDefs = (currType && fValidate)
98
0
            ? currType->hasAttDefs()
99
0
            : elemDecl->hasAttDefs();
100
101
    // another set of attributes; increment element counter
102
0
    fElemCount++;
103
104
    //  If there are no expliclitily provided attributes and there are no
105
    //  defined attributes for the element, the we don't have anything to do.
106
    //  So just return zero in this case.
107
0
    if (!hasDefs && !attCount)
108
0
        return 0;
109
110
    // Keep up with how many attrs we end up with total
111
0
    XMLSize_t retCount = 0;
112
113
    //  And get the current size of the output vector. This lets us use
114
    //  existing elements until we fill it, then start adding new ones.
115
0
    const XMLSize_t curAttListSize = toFill.size();
116
117
    //  We need a buffer into which raw scanned attribute values will be
118
    //  normalized.
119
0
    XMLBufBid bbNormal(&fBufMgr);
120
0
    XMLBuffer& normBuf = bbNormal.getBuffer();
121
122
    //
123
    // Decide if to use hash table to do duplicate checking
124
    //
125
0
    bool toUseHashTable = false;
126
0
    if (fGrammarType == Grammar::DTDGrammarType)
127
0
    {
128
0
        setAttrDupChkRegistry(attCount, toUseHashTable);
129
0
    }
130
131
0
    XMLBufBid bbPrefix(&fBufMgr);
132
0
    XMLBuffer& prefixBuf = bbPrefix.getBuffer();
133
134
    //  Loop through our explicitly provided attributes, which are in the raw
135
    //  scanned form, and build up XMLAttr objects.
136
0
    XMLSize_t index;
137
0
    const XMLCh* prefPtr, *suffPtr;
138
0
    for (index = 0; index < attCount; index++)
139
0
    {
140
0
        PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID;
141
0
        PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL;
142
0
        const KVStringPair* curPair = providedAttrs.elementAt(index);
143
144
        //  We have to split the name into its prefix and name parts. Then
145
        //  we map the prefix to its URI.
146
0
        const XMLCh* const namePtr = curPair->getKey();
147
148
0
        const int colonInd = fRawAttrColonList[index];
149
0
        unsigned int uriId;
150
0
        if (colonInd != -1)
151
0
        {
152
0
            prefixBuf.set(namePtr, colonInd);
153
0
            prefPtr = prefixBuf.getRawBuffer();
154
0
            suffPtr = namePtr + colonInd + 1;
155
            //  Map the prefix to a URI id
156
0
            uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);
157
0
        }
158
0
        else
159
0
        {
160
            // No colon, so we just have a name with no prefix
161
0
            prefPtr = XMLUni::fgZeroLenString;
162
0
            suffPtr = namePtr;
163
            // an empty prefix is always the empty namespace, when dealing with attributes
164
0
            uriId = fEmptyNamespaceId;
165
0
        }
166
167
        //  If the uri comes back as the xmlns or xml URI or its just a name
168
        //  and that name is 'xmlns', then we handle it specially. So set a
169
        //  boolean flag that lets us quickly below know which we are dealing
170
        //  with.
171
0
        const bool isNSAttr = (uriId == fEmptyNamespaceId)?
172
0
                                XMLString::equals(suffPtr, XMLUni::fgXMLNSString) :
173
0
                                (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI));
174
175
176
        //  If its not a special case namespace attr of some sort, then we
177
        //  do normal checking and processing.
178
0
        XMLAttDef::AttTypes attType = XMLAttDef::CData;
179
0
        DatatypeValidator *attrValidator = 0;
180
0
        PSVIAttribute *psviAttr = 0;
181
0
        bool otherXSI = false;
182
183
0
        if (isNSAttr && fGrammarType == Grammar::SchemaGrammarType)
184
0
        {
185
0
            if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
186
0
            {
187
0
                emitError
188
0
                (
189
0
                    XMLErrs::AttrAlreadyUsedInSTag
190
0
                    , namePtr
191
0
                    , elemDecl->getFullName()
192
0
                );
193
0
                fPSVIElemContext.fErrorOccurred = true;
194
0
            }
195
0
            else
196
0
            {
197
0
                bool ValueValidate = false;
198
0
                bool tokenizeBuffer = false;
199
200
0
                if (uriId == fXMLNSNamespaceId)
201
0
                {
202
0
                    attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
203
0
                }
204
0
                else if (XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
205
0
                {
206
0
                    if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
207
0
                    {
208
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
209
210
0
                        ValueValidate = true;
211
0
                    }
212
0
                    else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
213
0
                    {
214
                        // use anyURI as the validator
215
                        // tokenize the data and use the anyURI data for each piece
216
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
217
                        //We should validate each value in the schema location however
218
                        //this lead to a performance degradation of around 4%.  Since
219
                        //the first value of each pair needs to match what is in the
220
                        //schema document and the second value needs to be valid in
221
                        //order to open the document we won't validate it.  Need to
222
                        //do performance analysis of the anyuri datatype.
223
                        //ValueValidate = true;
224
0
                        ValueValidate = false;
225
0
                        tokenizeBuffer = true;
226
0
                    }
227
0
                    else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
228
0
                    {
229
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
230
                        //We should validate this value however
231
                        //this lead to a performance degradation of around 4%.  Since
232
                        //the value needs to be valid in
233
                        //order to open the document we won't validate it.  Need to
234
                        //do performance analysis of the anyuri datatype.
235
                        //ValueValidate = true;
236
0
                        ValueValidate = false;
237
0
                    }
238
0
                    else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
239
0
                    {
240
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
241
242
0
                        ValueValidate = true;
243
0
                    }
244
0
                    else {
245
0
                        otherXSI = true;
246
0
                    }
247
0
                }
248
249
0
                if (!otherXSI) {
250
0
                    normalizeAttRawValue
251
0
                    (
252
0
                        namePtr
253
0
                        , curPair->getValue()
254
0
                        , normBuf
255
0
                    );
256
257
0
                    if (fValidate && attrValidator && ValueValidate)
258
0
                    {
259
0
                        ((SchemaValidator*) fValidator)->normalizeWhiteSpace(attrValidator, normBuf.getRawBuffer(), normBuf, true);
260
261
0
                        ValidationContext* const    theContext =
262
0
                            getValidationContext();
263
264
0
                        if (theContext)
265
0
                        {
266
0
                            try
267
0
                            {
268
0
                                if (tokenizeBuffer) {
269
0
                                    XMLStringTokenizer tokenizer(normBuf.getRawBuffer(), fMemoryManager);
270
0
                                    while (tokenizer.hasMoreTokens()) {
271
0
                                        attrValidator->validate(
272
0
                                            tokenizer.nextToken(),
273
0
                                            theContext,
274
0
                                            fMemoryManager);
275
0
                                    }
276
0
                                }
277
0
                                else {
278
0
                                    attrValidator->validate(
279
0
                                        normBuf.getRawBuffer(),
280
0
                                        theContext,
281
0
                                        fMemoryManager);
282
0
                                }
283
0
                            }
284
0
                            catch (const XMLException& idve)
285
0
                            {
286
0
                                fValidator->emitError (XMLValid::DatatypeError, idve.getCode(), idve.getMessage());
287
0
                            }
288
0
                        }
289
0
                    }
290
291
0
                    if(getPSVIHandler())
292
0
                    {
293
0
                        psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
294
0
                        XSSimpleTypeDefinition *validatingType = (attrValidator)
295
0
                            ? (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator)
296
0
                            : 0;
297
                        // no attribute declarations for these...
298
0
                        psviAttr->reset(
299
0
                            fRootElemName
300
0
                            , PSVIItem::VALIDITY_NOTKNOWN
301
0
                            , PSVIItem::VALIDATION_NONE
302
0
                            , validatingType
303
0
                            , 0
304
0
                            , 0
305
0
                            , false
306
0
                            , 0
307
0
                            , attrValidator
308
0
                            );
309
0
                    }
310
0
                }
311
0
            }
312
0
        }
313
314
0
        if (!isNSAttr || fGrammarType == Grammar::DTDGrammarType || otherXSI)
315
0
        {
316
            // Some checking for attribute wild card first (for schema)
317
0
            bool laxThisOne = false;
318
0
            bool skipThisOne = false;
319
320
0
            XMLAttDef* attDefForWildCard = 0;
321
0
            XMLAttDef*  attDef = 0;
322
323
0
            if (fGrammarType == Grammar::SchemaGrammarType) {
324
325
                //retrieve the att def
326
0
                SchemaAttDef* attWildCard = 0;
327
0
                if (currType) {
328
0
                    attDef = currType->getAttDef(suffPtr, uriId);
329
0
                    attWildCard = currType->getAttWildCard();
330
0
                }
331
0
                else if (!currDV) { // check explicitly-set wildcard
332
0
                    attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, uriId);
333
0
                    attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard();
334
0
                }
335
336
                // if not found or faulted in - check for a matching wildcard attribute
337
                // if no matching wildcard attribute, check (un)qualifed cases and flag
338
                // appropriate errors
339
0
                if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) {
340
341
0
                    if (attWildCard) {
342
                        //if schema, see if we should lax or skip the validation of this attribute
343
0
                        if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) {
344
345
0
                            if(!skipThisOne)
346
0
                            {
347
0
                                SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId));
348
0
                                if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) {
349
0
                                    RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry();
350
0
                                    if (attRegistry) {
351
0
                                        attDefForWildCard = attRegistry->get(suffPtr);
352
0
                                    }
353
0
                                }
354
0
                            }
355
0
                        }
356
0
                    }
357
0
                    else if (currType) {
358
                        // not found, see if the attDef should be qualified or not
359
0
                        if (uriId == fEmptyNamespaceId) {
360
0
                            attDef = currType->getAttDef(suffPtr
361
0
                                            , fURIStringPool->getId(fGrammar->getTargetNamespace()));
362
0
                            if (fValidate
363
0
                                && attDef
364
0
                                && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
365
                                // the attribute should be qualified
366
0
                                fValidator->emitError
367
0
                                (
368
0
                                    XMLValid::AttributeNotQualified
369
0
                                    , attDef->getFullName()
370
0
                                );
371
0
                                if(fGrammarType == Grammar::SchemaGrammarType) {
372
0
                                    fPSVIElemContext.fErrorOccurred = true;
373
0
                                    if (getPSVIHandler())
374
0
                                    {
375
0
                                        attrValid = PSVIItem::VALIDITY_INVALID;
376
0
                                    }
377
0
                                }
378
0
                            }
379
0
                        }
380
0
                        else {
381
0
                            attDef = currType->getAttDef(suffPtr
382
0
                                            , fEmptyNamespaceId);
383
0
                            if (fValidate
384
0
                                && attDef
385
0
                                && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
386
                                // the attribute should be qualified
387
0
                                fValidator->emitError
388
0
                                (
389
0
                                    XMLValid::AttributeNotUnQualified
390
0
                                    , attDef->getFullName()
391
0
                                );
392
0
                                if(fGrammarType == Grammar::SchemaGrammarType) {
393
0
                                    fPSVIElemContext.fErrorOccurred = true;
394
0
                                    if (getPSVIHandler())
395
0
                                    {
396
0
                                        attrValid = PSVIItem::VALIDITY_INVALID;
397
0
                                    }
398
0
                                }
399
0
                            }
400
0
                        }
401
0
                    }
402
0
                }
403
0
            }
404
405
            //  Find this attribute within the parent element. We pass both
406
            //  the uriID/name and the raw QName buffer, since we don't know
407
            //  how the derived validator and its elements store attributes.
408
0
            else
409
0
            {
410
0
                if(fGrammarType == Grammar::DTDGrammarType)
411
0
                    attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( namePtr);
412
0
            }
413
414
            // now need to prepare for duplicate detection
415
0
            if(attDef)
416
0
            {
417
0
                unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
418
0
                if(!curCountPtr)
419
0
                {
420
0
                    curCountPtr = getNewUIntPtr();
421
0
                    *curCountPtr = fElemCount;
422
0
                    fAttDefRegistry->put(attDef, curCountPtr);
423
0
                }
424
0
                else if(*curCountPtr < fElemCount)
425
0
                    *curCountPtr = fElemCount;
426
0
                else
427
0
                {
428
0
                    emitError
429
0
                    (
430
0
                        XMLErrs::AttrAlreadyUsedInSTag
431
0
                        , attDef->getFullName()
432
0
                        , elemDecl->getFullName()
433
0
                    );
434
0
                    fPSVIElemContext.fErrorOccurred = true;
435
0
                }
436
0
            }
437
0
            else
438
0
            {
439
0
                if(fGrammarType == Grammar::DTDGrammarType)
440
0
                {
441
0
                    if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0))
442
0
                    {
443
0
                        emitError
444
0
                        (
445
0
                            XMLErrs::AttrAlreadyUsedInSTag
446
0
                            , namePtr
447
0
                            , elemDecl->getFullName()
448
0
                        );
449
0
                    }
450
0
                }
451
0
                else // schema grammar
452
0
                {
453
0
                    if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
454
0
                    {
455
0
                        emitError
456
0
                        (
457
0
                            XMLErrs::AttrAlreadyUsedInSTag
458
0
                            , namePtr
459
0
                            , elemDecl->getFullName()
460
0
                        );
461
0
                        fPSVIElemContext.fErrorOccurred = true;
462
0
                    }
463
0
                }
464
0
            }
465
466
0
            if(fGrammarType == Grammar::SchemaGrammarType )
467
0
            {
468
                // if we've found either an attDef or an attDefForWildCard,
469
                // then we're doing full validation and it may still be valid.
470
0
                if(!attDef && !attDefForWildCard)
471
0
                {
472
0
                    if(!laxThisOne && !skipThisOne)
473
0
                    {
474
0
                        fPSVIElemContext.fErrorOccurred = true;
475
0
                    }
476
0
                    if(getPSVIHandler())
477
0
                    {
478
0
                        if(!laxThisOne && !skipThisOne)
479
0
                        {
480
0
                            attrValid = PSVIItem::VALIDITY_INVALID;
481
0
                        }
482
0
                        else if(laxThisOne)
483
0
                        {
484
0
                            attrValid = PSVIItem::VALIDITY_NOTKNOWN;
485
0
                            attrAssessed = PSVIItem::VALIDATION_PARTIAL;
486
0
                        }
487
0
                        else
488
0
                        {
489
0
                            attrValid = PSVIItem::VALIDITY_NOTKNOWN;
490
0
                            attrAssessed = PSVIItem::VALIDATION_NONE;
491
0
                        }
492
0
                    }
493
0
                }
494
0
            }
495
496
0
            bool errorCondition = fValidate && !attDefForWildCard && !attDef;
497
0
            if (errorCondition && !skipThisOne && !laxThisOne)
498
0
            {
499
                //
500
                //  Its not valid for this element, so issue an error if we are
501
                //  validating.
502
                //
503
0
                XMLBufBid bbMsg(&fBufMgr);
504
0
                XMLBuffer& bufMsg = bbMsg.getBuffer();
505
0
                if (uriId != fEmptyNamespaceId) {
506
0
                    XMLBufBid bbURI(&fBufMgr);
507
0
                    XMLBuffer& bufURI = bbURI.getBuffer();
508
509
0
                    getURIText(uriId, bufURI);
510
511
0
                    bufMsg.append(chOpenCurly);
512
0
                    bufMsg.append(bufURI.getRawBuffer());
513
0
                    bufMsg.append(chCloseCurly);
514
0
                }
515
0
                bufMsg.append(suffPtr);
516
0
                fValidator->emitError
517
0
                (
518
0
                    XMLValid::AttNotDefinedForElement
519
0
                    , bufMsg.getRawBuffer()
520
0
                    , elemDecl->getFullName()
521
0
                );
522
0
            }
523
524
            //  Now normalize the raw value since we have the attribute type. We
525
            //  don't care about the return status here. If it failed, an error
526
            //  was issued, which is all we care about.
527
0
            if (attDefForWildCard) {
528
0
                normalizeAttValue(
529
0
                    attDefForWildCard, namePtr, curPair->getValue(), normBuf
530
0
                );
531
532
                //  If we found an attdef for this one, then lets validate it.
533
0
                const XMLCh* xsNormalized = normBuf.getRawBuffer();
534
0
                DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
535
0
                if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
536
0
                {
537
                    // normalize the attribute according to schema whitespace facet
538
0
                    ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
539
0
                    xsNormalized = fWSNormalizeBuf.getRawBuffer();
540
541
0
                    if (fNormalizeData && fValidate) {
542
0
                        normBuf.set(xsNormalized);
543
0
                    }
544
0
                }
545
546
0
                if (fValidate ) {
547
0
                    fValidator->validateAttrValue(
548
0
                        attDefForWildCard, xsNormalized, false, elemDecl
549
0
                    );
550
0
                    attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator();
551
0
                    if(((SchemaValidator *)fValidator)->getErrorOccurred())
552
0
                    {
553
0
                        fPSVIElemContext.fErrorOccurred = true;
554
0
                        if(getPSVIHandler())
555
0
                            attrValid = PSVIItem::VALIDITY_INVALID;
556
0
                    }
557
0
                }
558
0
                else { // no decl; default DOMTypeInfo to anySimpleType
559
0
                    attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
560
0
                }
561
562
                // Save the type for later use
563
0
                attType = attDefForWildCard->getType();
564
0
            }
565
0
            else {
566
0
                normalizeAttValue(
567
0
                    attDef, namePtr, curPair->getValue(), normBuf
568
0
                );
569
570
                //  If we found an attdef for this one, then lets validate it.
571
0
                if (attDef)
572
0
                {
573
0
                    const XMLCh* xsNormalized = normBuf.getRawBuffer();
574
0
                    if (fGrammarType == Grammar::SchemaGrammarType)
575
0
                    {
576
0
                        DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator();
577
0
                        if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
578
0
                        {
579
                            // normalize the attribute according to schema whitespace facet
580
0
                            ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
581
0
                            xsNormalized = fWSNormalizeBuf.getRawBuffer();
582
0
                            if (fNormalizeData && fValidate && !skipThisOne) {
583
0
                                normBuf.set(xsNormalized);
584
0
                            }
585
0
                        }
586
0
                    }
587
588
0
                    if (fValidate && !skipThisOne)
589
0
                    {
590
0
                        fValidator->validateAttrValue(
591
0
                            attDef, xsNormalized, false, elemDecl
592
0
                        );
593
594
0
                        if(fGrammarType == Grammar::SchemaGrammarType)
595
0
                        {
596
0
                            attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator();
597
0
                            if(((SchemaValidator *)fValidator)->getErrorOccurred())
598
0
                            {
599
0
                                fPSVIElemContext.fErrorOccurred = true;
600
0
                                if (getPSVIHandler())
601
0
                                    attrValid = PSVIItem::VALIDITY_INVALID;
602
0
                            }
603
0
                        }
604
0
                    }
605
0
                    else if(fGrammarType == Grammar::SchemaGrammarType) {
606
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
607
0
                    }
608
0
                }
609
0
                else // no attDef at all; default to anySimpleType
610
0
                {
611
0
                    if(fGrammarType == Grammar::SchemaGrammarType) {
612
0
                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
613
0
                    }
614
0
                }
615
616
                // Save the type for later use
617
0
                if (attDef)
618
0
                {
619
0
                    attType = attDef->getType();
620
0
                }
621
0
            }
622
623
            // now fill in the PSVIAttributes entry for this attribute:
624
0
            if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType)
625
0
            {
626
0
                psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
627
0
                SchemaAttDef *actualAttDef = 0;
628
0
                if(attDef)
629
0
                    actualAttDef = (SchemaAttDef *)attDef;
630
0
                else if (attDefForWildCard)
631
0
                    actualAttDef = (SchemaAttDef *)attDefForWildCard;
632
0
                if(actualAttDef)
633
0
                {
634
0
                    XSAttributeDeclaration *attrDecl = (XSAttributeDeclaration *)fModel->getXSObject(actualAttDef);
635
0
                    DatatypeValidator * attrDataType = actualAttDef->getDatatypeValidator();
636
0
                    XSSimpleTypeDefinition *validatingType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrDataType);
637
0
                    if(attrValid != PSVIItem::VALIDITY_VALID)
638
0
                    {
639
0
                        psviAttr->reset
640
0
                        (
641
0
                            fRootElemName
642
0
                            , attrValid
643
0
                            , attrAssessed
644
0
                            , validatingType
645
0
                            , 0
646
0
                            , actualAttDef->getValue()
647
0
                            , false
648
0
                            , attrDecl
649
0
                            , 0
650
0
                        );
651
0
                    }
652
0
                    else
653
0
                    {
654
0
                        XSSimpleTypeDefinition *memberType = 0;
655
0
                        if(validatingType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
656
0
                            memberType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator);
657
0
                        psviAttr->reset
658
0
                        (
659
0
                            fRootElemName
660
0
                            , attrValid
661
0
                            , attrAssessed
662
0
                            , validatingType
663
0
                            , memberType
664
0
                            , actualAttDef->getValue()
665
0
                            , false
666
0
                            , attrDecl
667
0
                            , (memberType)?attrValidator:attrDataType
668
0
                        );
669
0
                    }
670
0
                }
671
0
                else
672
0
                {
673
0
                    psviAttr->reset
674
0
                    (
675
0
                        fRootElemName
676
0
                        , attrValid
677
0
                        , attrAssessed
678
0
                        , 0
679
0
                        , 0
680
0
                        , 0
681
0
                        , false
682
0
                        , 0
683
0
                        , 0
684
0
                    );
685
0
                }
686
0
            }
687
0
        }
688
689
        //  Add this attribute to the attribute list that we use to pass them
690
        //  to the handler. We reuse its existing elements but expand it as
691
        //  required.
692
0
        XMLAttr* curAttr;
693
694
        // check for duplicate namespace attributes:
695
        // by checking for qualified names with the same local part and with prefixes
696
        // which have been bound to namespace names that are identical.
697
0
        if (fGrammarType == Grammar::DTDGrammarType) {
698
0
            if (!toUseHashTable)
699
0
            {
700
0
                for (XMLSize_t attrIndex=0; attrIndex < retCount; attrIndex++) {
701
0
                    curAttr = toFill.elementAt(attrIndex);
702
0
                    if (uriId == curAttr->getURIId() &&
703
0
                        XMLString::equals(suffPtr, curAttr->getName())) {
704
0
                        emitError
705
0
                        (
706
707
0
                         XMLErrs::AttrAlreadyUsedInSTag
708
0
                        , curAttr->getName()
709
0
                        , elemDecl->getFullName()
710
0
                        );
711
0
                    }
712
0
                }
713
0
            }
714
0
            else
715
0
            {
716
0
                if (fAttrDupChkRegistry->containsKey((void*)suffPtr, uriId))
717
0
                {
718
0
                    emitError
719
0
                        (
720
0
                        XMLErrs::AttrAlreadyUsedInSTag
721
0
                        , suffPtr
722
0
                        , elemDecl->getFullName()
723
0
                        );
724
0
                }
725
0
            }
726
0
        }
727
728
0
        if (retCount >= curAttListSize)
729
0
        {
730
0
            curAttr = new (fMemoryManager) XMLAttr
731
0
            (
732
0
                uriId
733
0
                , suffPtr
734
0
                , prefPtr
735
0
                , normBuf.getRawBuffer()
736
0
                , attType
737
0
                , true
738
0
                , fMemoryManager
739
0
            );
740
0
            toFill.addElement(curAttr);
741
0
        }
742
0
        else
743
0
        {
744
0
            curAttr = toFill.elementAt(retCount);
745
0
            curAttr->set
746
0
            (
747
0
                uriId
748
0
                , suffPtr
749
0
                , prefPtr
750
0
                , normBuf.getRawBuffer()
751
0
                , attType
752
0
            );
753
0
            curAttr->setSpecified(true);
754
0
        }
755
756
0
        if (toUseHashTable)
757
0
        {
758
0
            fAttrDupChkRegistry->put((void*)suffPtr, uriId, curAttr);
759
0
        }
760
761
0
        if(psviAttr)
762
0
            psviAttr->setValue(curAttr->getValue());
763
764
        // Bump the count of attrs in the list
765
0
        retCount++;
766
0
    }
767
768
    //  Now, if there are any attributes declared by this element, let's
769
    //  go through them and make sure that any required ones are provided,
770
    //  and fault in any fixed ones and defaulted ones that are not provided
771
    //  literally.
772
0
    if (hasDefs)
773
0
    {
774
        // Check after all specified attrs are scanned
775
        // (1) report error for REQUIRED attrs that are missing (V_TAGc)
776
        // (2) add default attrs if missing (FIXED and NOT_FIXED)
777
778
779
0
        XMLAttDefList &attDefList = getAttDefList(fGrammarType == Grammar::SchemaGrammarType, currType, elemDecl);
780
781
0
        for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
782
0
        {
783
            // Get the current att def, for convenience and its def type
784
0
            const XMLAttDef *curDef = &attDefList.getAttDef(i);
785
0
            const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
786
0
            unsigned int *attCountPtr = fAttDefRegistry->get((void *)curDef);
787
0
            if (!attCountPtr || *attCountPtr < fElemCount)
788
0
            { // did not occur
789
                // note that since there is no attribute information
790
                // item present, there is no PSVI infoset to augment here *except*
791
                // that the element is invalid
792
793
                //the attribute is not provided
794
0
                if (fValidate)
795
0
                {
796
                    // If we are validating and its required, then an error
797
0
                    if ((defType == XMLAttDef::Required) ||
798
0
                        (defType == XMLAttDef::Required_And_Fixed)  )
799
800
0
                    {
801
0
                        fValidator->emitError
802
0
                        (
803
0
                            XMLValid::RequiredAttrNotProvided
804
0
                            , curDef->getFullName()
805
0
                        );
806
0
                        if(fGrammarType == Grammar::SchemaGrammarType)
807
0
                        {
808
0
                            fPSVIElemContext.fErrorOccurred = true;
809
0
                        }
810
0
                    }
811
0
                    else if ((defType == XMLAttDef::Default) ||
812
0
                            (defType == XMLAttDef::Fixed)  )
813
0
                    {
814
0
                        if (fStandalone && curDef->isExternal())
815
0
                        {
816
                            // XML 1.0 Section 2.9
817
                            // Document is standalone, so attributes must not be defaulted.
818
0
                            fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName());
819
0
                            if(fGrammarType == Grammar::SchemaGrammarType)
820
0
                            {
821
0
                                fPSVIElemContext.fErrorOccurred = true;
822
0
                            }
823
0
                        }
824
0
                    }
825
0
                }
826
827
                //  Fault in the value if needed, and bump the att count.
828
0
                if ((defType == XMLAttDef::Default)
829
0
                    ||  (defType == XMLAttDef::Fixed))
830
0
                {
831
                    // Let the validator pass judgement on the attribute value
832
0
                    if (fValidate)
833
0
                    {
834
0
                        fValidator->validateAttrValue
835
0
                        (
836
0
                            curDef
837
0
                            , curDef->getValue()
838
0
                            , false
839
0
                            , elemDecl
840
0
                        );
841
0
                    }
842
843
0
                    XMLAttr* curAtt;
844
0
                    if (retCount >= curAttListSize)
845
0
                    {
846
0
                        curAtt = new (fMemoryManager) XMLAttr(fMemoryManager);
847
0
                        fValidator->faultInAttr(*curAtt, *curDef);
848
0
                        fAttrList->addElement(curAtt);
849
0
                    }
850
0
                    else
851
0
                    {
852
0
                        curAtt = fAttrList->elementAt(retCount);
853
0
                        fValidator->faultInAttr(*curAtt, *curDef);
854
0
                    }
855
856
0
                    if (fGrammarType == Grammar::DTDGrammarType)
857
0
                    {
858
                        //  Map the new attribute's prefix to a URI id and store
859
                        //  that in the attribute object.
860
0
                        curAtt->setURIId
861
0
                        (
862
0
                            resolvePrefix(curAtt->getPrefix(), ElemStack::Mode_Attribute)
863
0
                        );
864
0
                    }
865
866
                    // Indicate it was not explicitly specified and bump count
867
0
                    curAtt->setSpecified(false);
868
0
                    retCount++;
869
0
                    if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType)
870
0
                    {
871
0
                        QName *attName = ((SchemaAttDef *)curDef)->getAttName();
872
0
                        PSVIAttribute *defAttrToFill = fPSVIAttrList->getPSVIAttributeToFill
873
0
                        (
874
0
                            attName->getLocalPart(), fURIStringPool->getValueForId( attName->getURI())
875
0
                        );
876
0
                        XSAttributeDeclaration *defAttrDecl = (XSAttributeDeclaration *)fModel->getXSObject((void *)curDef);
877
0
                        DatatypeValidator * attrDataType = ((SchemaAttDef *)curDef)->getDatatypeValidator();
878
0
                        XSSimpleTypeDefinition *defAttrType =
879
0
                            (XSSimpleTypeDefinition*)fModel->getXSObject(attrDataType);
880
                        // would have occurred during validation of default value
881
0
                        if(((SchemaValidator *)fValidator)->getErrorOccurred())
882
0
                        {
883
0
                            defAttrToFill->reset(
884
0
                                fRootElemName
885
0
                                , PSVIItem::VALIDITY_INVALID
886
0
                                , PSVIItem::VALIDATION_FULL
887
0
                                , defAttrType
888
0
                                , 0
889
0
                                , curDef->getValue()
890
0
                                , true
891
0
                                , defAttrDecl
892
0
                                , 0
893
0
                            );
894
0
                        }
895
0
                        else
896
0
                        {
897
0
                            XSSimpleTypeDefinition *defAttrMemberType = 0;
898
0
                            if(defAttrType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
899
0
                            {
900
0
                                defAttrMemberType = (XSSimpleTypeDefinition *)fModel->getXSObject
901
0
                                (
902
0
                                    ((SchemaValidator*)fValidator)->getMostRecentAttrValidator()
903
0
                                );
904
0
                            }
905
0
                            defAttrToFill->reset(
906
0
                                fRootElemName
907
0
                                , PSVIItem::VALIDITY_VALID
908
0
                                , PSVIItem::VALIDATION_FULL
909
0
                                , defAttrType
910
0
                                , defAttrMemberType
911
0
                                , curDef->getValue()
912
0
                                , true
913
0
                                , defAttrDecl
914
0
                                , (defAttrMemberType)?((SchemaValidator *)fValidator)->getMostRecentAttrValidator():attrDataType
915
0
                            );
916
0
                        }
917
0
                        defAttrToFill->setValue(curDef->getValue());
918
0
                    }
919
0
                }
920
0
            }
921
0
            else if(attCountPtr)
922
0
            {
923
                //attribute is provided
924
                // (schema) report error for PROHIBITED attrs that are present (V_TAGc)
925
0
                if (defType == XMLAttDef::Prohibited && fValidate)
926
0
                {
927
0
                    fValidator->emitError
928
0
                    (
929
0
                        XMLValid::ProhibitedAttributePresent
930
0
                        , curDef->getFullName()
931
0
                    );
932
0
                    if(fGrammarType == Grammar::SchemaGrammarType)
933
0
                    {
934
0
                        fPSVIElemContext.fErrorOccurred = true;
935
0
                        if (getPSVIHandler())
936
0
                        {
937
0
                            QName *attQName = ((SchemaAttDef *)curDef)->getAttName();
938
                            // bad luck...
939
0
                            PSVIAttribute *prohibitedAttr = fPSVIAttrList->getAttributePSVIByName
940
0
                            (
941
0
                                attQName->getLocalPart(),
942
0
                                fURIStringPool->getValueForId(attQName->getURI())
943
0
                            );
944
0
                            prohibitedAttr->updateValidity(PSVIItem::VALIDITY_INVALID);
945
0
                        }
946
0
                    }
947
0
                }
948
0
            }
949
0
        }
950
0
    }
951
0
    return retCount;
952
0
}
953
954
955
//  This method will take a raw attribute value and normalize it according to
956
//  the rules of the attribute type. It will put the resulting value into the
957
//  passed buffer.
958
//
959
//  This code assumes that escaped characters in the original value (via char
960
//  refs) are prefixed by a 0xFFFF character. This is because some characters
961
//  are legal if escaped only. And some escape chars are not subject to
962
//  normalization rules.
963
bool IGXMLScanner::normalizeAttValue( const   XMLAttDef* const    attDef
964
                                      , const XMLCh* const        attName
965
                                      , const XMLCh* const        value
966
                                      ,       XMLBuffer&          toFill)
967
0
{
968
    // A simple state value for a whitespace processing state machine
969
0
    enum States
970
0
    {
971
0
        InWhitespace
972
0
        , InContent
973
0
    };
974
975
    // Get the type and name
976
0
    const XMLAttDef::AttTypes type = (attDef)?attDef->getType():XMLAttDef::CData;
977
978
    // check to see if it's a tokenized type that is declared externally 
979
0
    bool  isAttTokenizedExternal = (attDef)
980
0
                                   ?attDef->isExternal() && (type == XMLAttDef::ID || 
981
0
                                                             type == XMLAttDef::IDRef || 
982
0
                                                             type == XMLAttDef::IDRefs || 
983
0
                                                             type == XMLAttDef::Entity || 
984
0
                                                             type == XMLAttDef::Entities || 
985
0
                                                             type == XMLAttDef::NmToken || 
986
0
                                                             type == XMLAttDef::NmTokens)
987
0
                                   :false;
988
989
    // Assume its going to go fine, and empty the target buffer in preperation
990
0
    bool retVal = true;
991
0
    toFill.reset();
992
993
    //  Loop through the chars of the source value and normalize it according
994
    //  to the type.
995
0
    XMLCh nextCh;
996
0
    const XMLCh* srcPtr = value;
997
998
0
    if (type == XMLAttDef::CData || type > XMLAttDef::Notation) {
999
        //  Get the next character from the source. We have to watch for
1000
        //  escaped characters (which are indicated by a 0xFFFF value followed
1001
        //  by the char that was escaped.)
1002
0
        while ((nextCh = *srcPtr++)!=0)
1003
0
        {
1004
0
            switch(nextCh)
1005
0
            {
1006
            // Do we have an escaped character ?
1007
0
            case 0xFFFF:
1008
0
                nextCh = *srcPtr++;
1009
0
                break;
1010
0
            case 0x09:
1011
0
            case 0x0A:
1012
0
            case 0x0D:
1013
                // Check Validity Constraint for Standalone document declaration
1014
                // XML 1.0, Section 2.9
1015
0
                if (fStandalone && fValidate && isAttTokenizedExternal)
1016
0
                {
1017
                     // Can't have a standalone document declaration of "yes" if  attribute
1018
                     // values are subject to normalisation
1019
0
                     fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
1020
0
                }
1021
0
                nextCh = chSpace;
1022
0
                break;
1023
0
            case chOpenAngle:
1024
                //  If its not escaped, then make sure its not a < character, which is
1025
                //  not allowed in attribute values.
1026
0
                emitError(XMLErrs::BracketInAttrValue, attName);
1027
0
                retVal = false;
1028
0
                break;
1029
0
            }
1030
1031
            // Add this char to the target buffer
1032
0
            toFill.append(nextCh);
1033
0
        }
1034
0
    }
1035
0
    else {
1036
0
        States curState = InContent;
1037
0
        bool firstNonWS = false;
1038
        //  Get the next character from the source. We have to watch for
1039
        //  escaped characters (which are indicated by a 0xFFFF value followed
1040
        //  by the char that was escaped.)
1041
0
        while ((nextCh = *srcPtr)!=0)
1042
0
        {
1043
            // Do we have an escaped character ?
1044
0
            if (nextCh == 0xFFFF)
1045
0
            {
1046
0
                nextCh = *++srcPtr;
1047
0
            }
1048
0
            else if (nextCh == chOpenAngle) {
1049
                //  If its not escaped, then make sure its not a < character, which is
1050
                //  not allowed in attribute values.
1051
0
                emitError(XMLErrs::BracketInAttrValue, attName);
1052
0
                retVal = false;
1053
0
            }
1054
1055
0
            if (curState == InWhitespace)
1056
0
            {
1057
0
                if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
1058
0
                {
1059
0
                    if (firstNonWS)
1060
0
                        toFill.append(chSpace);
1061
0
                    curState = InContent;
1062
0
                    firstNonWS = true;
1063
0
                }
1064
0
                else
1065
0
                {
1066
0
                    srcPtr++;
1067
0
                    continue;
1068
0
                }
1069
0
            }
1070
0
            else if (curState == InContent)
1071
0
            {
1072
0
                if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
1073
0
                {
1074
0
                    curState = InWhitespace;
1075
0
                    srcPtr++;
1076
1077
                    // Check Validity Constraint for Standalone document declaration
1078
                    // XML 1.0, Section 2.9
1079
0
                    if (fStandalone && fValidate && isAttTokenizedExternal)
1080
0
                    {
1081
0
                        if (!firstNonWS || (nextCh != chSpace && *srcPtr && fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr)))
1082
0
                        {
1083
                            // Can't have a standalone document declaration of "yes" if  attribute
1084
                            // values are subject to normalisation
1085
0
                            fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
1086
0
                        }
1087
0
                    }
1088
0
                    continue;
1089
0
                }
1090
0
                firstNonWS = true;
1091
0
            }
1092
1093
            // Add this char to the target buffer
1094
0
            toFill.append(nextCh);
1095
1096
            // And move up to the next character in the source
1097
0
            srcPtr++;
1098
0
        }
1099
0
    }
1100
1101
0
    return retVal;
1102
0
}
1103
1104
//  This method will just normalize the input value as CDATA without
1105
//  any standalone checking.
1106
bool IGXMLScanner::normalizeAttRawValue( const   XMLCh* const        attrName
1107
                                      , const XMLCh* const        value
1108
                                      ,       XMLBuffer&          toFill)
1109
0
{
1110
    // Assume its going to go fine, and empty the target buffer in preperation
1111
0
    bool retVal = true;
1112
0
    toFill.reset();
1113
1114
    //  Loop through the chars of the source value and normalize it according
1115
    //  to the type.
1116
0
    bool escaped;
1117
0
    XMLCh nextCh;
1118
0
    const XMLCh* srcPtr = value;
1119
0
    while (*srcPtr)
1120
0
    {
1121
        //  Get the next character from the source. We have to watch for
1122
        //  escaped characters (which are indicated by a 0xFFFF value followed
1123
        //  by the char that was escaped.)
1124
0
        nextCh = *srcPtr;
1125
0
        escaped = (nextCh == 0xFFFF);
1126
0
        if (escaped)
1127
0
            nextCh = *++srcPtr;
1128
1129
        //  If its not escaped, then make sure its not a < character, which is
1130
        //  not allowed in attribute values.
1131
0
        if (!escaped && (*srcPtr == chOpenAngle))
1132
0
        {
1133
0
            emitError(XMLErrs::BracketInAttrValue, attrName);
1134
0
            retVal = false;
1135
0
        }
1136
1137
0
        if (!escaped)
1138
0
        {
1139
            //  NOTE: Yes this is a little redundant in that a 0x20 is
1140
            //  replaced with an 0x20. But its faster to do this (I think)
1141
            //  than checking for 9, A, and D separately.
1142
0
            if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
1143
0
                nextCh = chSpace;
1144
0
        }
1145
1146
        // Add this char to the target buffer
1147
0
        toFill.append(nextCh);
1148
1149
        // And move up to the next character in the source
1150
0
        srcPtr++;
1151
0
    }
1152
0
    return retVal;
1153
0
}
1154
1155
//  This method will reset the scanner data structures, and related plugged
1156
//  in stuff, for a new scan session. We get the input source for the primary
1157
//  XML entity, create the reader for it, and push it on the stack so that
1158
//  upon successful return from here we are ready to go.
1159
void IGXMLScanner::scanReset(const InputSource& src)
1160
17.6k
{
1161
    //  This call implicitly tells us that we are going to reuse the scanner
1162
    //  if it was previously used. So tell the validator to reset itself.
1163
    //
1164
    //  But, if the fUseCacheGrammar flag is set, then don't reset it.
1165
    //
1166
    //  NOTE:   The ReaderMgr is flushed on the way out, because that is
1167
    //          required to insure that files are closed.
1168
17.6k
    fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
1169
17.6k
    fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
1170
1171
    // Clear transient schema info list.
1172
    //
1173
17.6k
    fSchemaInfoList->removeAll ();
1174
1175
    // fModel may need updating, as fGrammarResolver could have cleaned it
1176
17.6k
    if(getPSVIHandler())
1177
0
        fModel = fGrammarResolver->getXSModel();
1178
1179
17.6k
    {
1180
17.6k
        XMLDTDDescriptionImpl   theDTDDescription(XMLUni::fgDTDEntityString, fMemoryManager);
1181
17.6k
        fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(&theDTDDescription);
1182
17.6k
    }
1183
1184
17.6k
    if (!fDTDGrammar) {
1185
1186
17.6k
        fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
1187
17.6k
        fGrammarResolver->putGrammar(fDTDGrammar);
1188
17.6k
    }
1189
0
    else
1190
0
        fDTDGrammar->reset();
1191
1192
17.6k
    fGrammar = fDTDGrammar;
1193
17.6k
    fGrammarType = fGrammar->getGrammarType();
1194
17.6k
    fRootGrammar = 0;
1195
1196
17.6k
    if (fValidatorFromUser) {
1197
0
        if (fValidator->handlesDTD())
1198
0
            fValidator->setGrammar(fGrammar);
1199
0
        else if (fValidator->handlesSchema()) {
1200
1201
0
            ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter);
1202
0
            ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver);
1203
0
            ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal);
1204
0
        }
1205
0
    }
1206
17.6k
    else {
1207
        // set fValidator as fDTDValidator
1208
17.6k
        fValidator = fDTDValidator;
1209
17.6k
        fValidator->setGrammar(fGrammar);
1210
17.6k
    }
1211
1212
    // Reset validation
1213
17.6k
    fValidate = (fValScheme == Val_Always) ? true : false;
1214
1215
    // Ignore skipDTDValidation flag if no schema processing is taking place */
1216
17.6k
    fSkipDTDValidation = fSkipDTDValidation && fDoSchema;
1217
1218
    //  And for all installed handlers, send reset events. This gives them
1219
    //  a chance to flush any cached data.
1220
17.6k
    if (fDocHandler)
1221
0
        fDocHandler->resetDocument();
1222
17.6k
    if (fEntityHandler)
1223
0
        fEntityHandler->resetEntities();
1224
17.6k
    if (fErrorReporter)
1225
0
        fErrorReporter->resetErrors();
1226
1227
    // Clear out the id reference list
1228
17.6k
    resetValidationContext();
1229
1230
    // Reset the Root Element Name
1231
17.6k
    fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
1232
17.6k
    fRootElemName = 0;
1233
1234
    // Reset IdentityConstraints
1235
17.6k
    if (fICHandler)
1236
17.6k
        fICHandler->reset();
1237
1238
    //  Reset the element stack, and give it the latest ids for the special
1239
    //  URIs it has to know about.
1240
17.6k
    fElemStack.reset
1241
17.6k
    (
1242
17.6k
        fEmptyNamespaceId
1243
17.6k
        , fUnknownNamespaceId
1244
17.6k
        , fXMLNamespaceId
1245
17.6k
        , fXMLNSNamespaceId
1246
17.6k
    );
1247
1248
17.6k
    if (!fSchemaNamespaceId)
1249
17.6k
        fSchemaNamespaceId  = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI);
1250
1251
    // Reset some status flags
1252
17.6k
    fInException = false;
1253
17.6k
    fStandalone = false;
1254
17.6k
    fErrorCount = 0;
1255
17.6k
    fHasNoDTD = true;
1256
17.6k
    fSeeXsi = false;
1257
1258
    // Reset PSVI context
1259
    // note that we always need this around for DOMTypeInfo
1260
17.6k
    if (!fPSVIElement)
1261
17.6k
        fPSVIElement = new (fMemoryManager) PSVIElement(fMemoryManager);
1262
1263
17.6k
    if (!fErrorStack)
1264
17.6k
    {
1265
17.6k
        fErrorStack = new (fMemoryManager) ValueStackOf<bool>(8, fMemoryManager);
1266
17.6k
    }
1267
0
    else
1268
0
    {
1269
0
        fErrorStack->removeAllElements();
1270
0
    }
1271
1272
17.6k
    resetPSVIElemContext();
1273
1274
    // Reset the validators
1275
17.6k
    fDTDValidator->reset();
1276
17.6k
    fDTDValidator->setErrorReporter(fErrorReporter);
1277
17.6k
    fSchemaValidator->reset();
1278
17.6k
    fSchemaValidator->setErrorReporter(fErrorReporter);
1279
17.6k
    fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
1280
17.6k
    fSchemaValidator->setGrammarResolver(fGrammarResolver);
1281
17.6k
    if (fValidatorFromUser)
1282
0
        fValidator->reset();
1283
1284
    //  Handle the creation of the XML reader object for this input source.
1285
    //  This will provide us with transcoding and basic lexing services.
1286
17.6k
    XMLReader* newReader = fReaderMgr.createReader
1287
17.6k
    (
1288
17.6k
        src
1289
17.6k
        , true
1290
17.6k
        , XMLReader::RefFrom_NonLiteral
1291
17.6k
        , XMLReader::Type_General
1292
17.6k
        , XMLReader::Source_External
1293
17.6k
        , fCalculateSrcOfs
1294
17.6k
        , fLowWaterMark
1295
17.6k
    );
1296
1297
17.6k
    if (!newReader) {
1298
0
        if (src.getIssueFatalErrorIfNotFound())
1299
0
            ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
1300
0
        else
1301
0
            ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
1302
0
    }
1303
1304
    // Push this read onto the reader manager
1305
17.6k
    fReaderMgr.pushReader(newReader, 0);
1306
1307
    // and reset security-related things if necessary:
1308
17.6k
    if(fSecurityManager != 0)
1309
0
    {
1310
0
        fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
1311
0
        fEntityExpansionCount = 0;
1312
0
    }
1313
17.6k
    fElemCount = 0;
1314
17.6k
    if(fUIntPoolRowTotal >= 32)
1315
0
    { // 8 KB tied up with validating attributes...
1316
0
        fAttDefRegistry->removeAll();
1317
0
        recreateUIntPool();
1318
0
    }
1319
17.6k
    else
1320
17.6k
    {
1321
        // note that this will implicitly reset the values of the hashtables,
1322
        // though their buckets will still be tied up
1323
17.6k
        resetUIntPool();
1324
17.6k
    }
1325
17.6k
    fUndeclaredAttrRegistry->removeAll();
1326
17.6k
    fDTDElemNonDeclPool->removeAll();
1327
17.6k
}
1328
1329
1330
//  This method is called between markup in content. It scans for character
1331
//  data that is sent to the document handler. It watches for any markup
1332
//  characters that would indicate that the character data has ended. It also
1333
//  handles expansion of general and character entities.
1334
//
1335
//  sendData() is a local static helper for this method which handles some
1336
//  code that must be done in three different places here.
1337
void IGXMLScanner::sendCharData(XMLBuffer& toSend)
1338
3.13M
{
1339
    // If no data in the buffer, then nothing to do
1340
3.13M
    if (toSend.isEmpty())
1341
219k
        return;
1342
1343
    //  We do different things according to whether we are validating or
1344
    //  not. If not, its always just characters; else, it depends on the
1345
    //  current element's content model.
1346
2.91M
    if (fValidate)
1347
2.90M
    {
1348
        // Get the raw data we need for the callback
1349
2.90M
        const XMLCh* rawBuf = toSend.getRawBuffer();
1350
2.90M
        XMLSize_t len = toSend.getLen();
1351
1352
        // And see if the current element is a 'Children' style content model
1353
2.90M
        const ElemStack::StackElem* topElem = fElemStack.topElement();
1354
1355
        // Get the character data opts for the current element
1356
2.90M
        XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
1357
2.90M
        if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
1358
0
        {
1359
            // And see if the current element is a 'Children' style content model
1360
0
            ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
1361
0
            if(currType)
1362
0
            {
1363
0
                SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
1364
0
                if(modelType == SchemaElementDecl::Children ||
1365
0
                   modelType == SchemaElementDecl::ElementOnlyEmpty)
1366
0
                    charOpts = XMLElementDecl::SpacesOk;
1367
0
                else if(modelType == SchemaElementDecl::Empty)
1368
0
                    charOpts = XMLElementDecl::NoCharData;
1369
0
            }
1370
0
        } else // DTD grammar
1371
2.90M
            charOpts = topElem->fThisElement->getCharDataOpts();
1372
1373
2.90M
        if (charOpts == XMLElementDecl::NoCharData)
1374
1.63k
        {
1375
            // They definitely cannot handle any type of char data
1376
1.63k
            fValidator->emitError(XMLValid::NoCharDataInCM);
1377
            //if(fGrammarType == Grammar::SchemaGrammarType)
1378
            //{
1379
              //  if (getPSVIHandler())
1380
              //  {
1381
                    // REVISIT:
1382
                    // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
1383
              //  }
1384
           // }
1385
1.63k
        }
1386
2.90M
        else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len))
1387
361k
        {
1388
            //  Its all spaces. So, if they can take spaces, then send it
1389
            //  as ignorable whitespace. If they can handle any char data
1390
            //  send it as characters.
1391
361k
            if (charOpts == XMLElementDecl::SpacesOk) {
1392
4.78k
                if (fDocHandler)
1393
0
                    fDocHandler->ignorableWhitespace(rawBuf, len, false);
1394
4.78k
            }
1395
356k
            else if (charOpts == XMLElementDecl::AllCharData)
1396
356k
            {
1397
356k
                if (fGrammarType != Grammar::SchemaGrammarType)
1398
356k
                {
1399
356k
                    if (fDocHandler)
1400
0
                        fDocHandler->docCharacters(rawBuf, len, false);
1401
356k
                }
1402
0
                else
1403
0
                {
1404
0
                    XMLSize_t xsLen;
1405
0
                    const XMLCh* xsNormalized;
1406
0
                    SchemaValidator *schemaValidator = (SchemaValidator *)fValidator;
1407
0
                    DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1408
0
                    if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
1409
0
                    {
1410
                        // normalize the character according to schema whitespace facet
1411
0
                        ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
1412
0
                        xsNormalized = fWSNormalizeBuf.getRawBuffer();
1413
0
                        xsLen = fWSNormalizeBuf.getLen();
1414
0
                    }
1415
0
                    else {
1416
0
                        xsNormalized = rawBuf;
1417
0
                        xsLen = len ;
1418
0
                    }
1419
1420
                    // tell the schema validation about the character data for checkContent later
1421
0
                    schemaValidator->setDatatypeBuffer(xsNormalized);
1422
1423
                    // call all active identity constraints
1424
0
                    if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
1425
0
                        fContent.append(xsNormalized, xsLen);
1426
0
                    }
1427
1428
0
                    if (fDocHandler) {
1429
0
                        if (fNormalizeData) {
1430
0
                           fDocHandler->docCharacters(xsNormalized, xsLen, false);
1431
0
                        }
1432
0
                        else {
1433
0
                            fDocHandler->docCharacters(rawBuf, len, false);
1434
0
                        }
1435
0
                    }
1436
0
                }
1437
356k
            }
1438
361k
        }
1439
2.54M
        else
1440
2.54M
        {
1441
            //  If they can take any char data, then send it. Otherwise, they
1442
            //  can only handle whitespace and can't handle this stuff so
1443
            //  issue an error.
1444
2.54M
            if (charOpts == XMLElementDecl::AllCharData)
1445
2.50M
            {
1446
2.50M
                if (fGrammarType != Grammar::SchemaGrammarType)
1447
2.50M
                {
1448
2.50M
                    if (fDocHandler)
1449
0
                        fDocHandler->docCharacters(rawBuf, len, false);
1450
2.50M
                }
1451
0
                else
1452
0
                {
1453
0
                    XMLSize_t xsLen;
1454
0
                    const XMLCh* xsNormalized;
1455
0
                    SchemaValidator *schemaValidator = (SchemaValidator*)fValidator;
1456
0
                    DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
1457
0
                    if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
1458
0
                    {
1459
                        // normalize the character according to schema whitespace facet
1460
0
                        ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
1461
0
                        xsNormalized = fWSNormalizeBuf.getRawBuffer();
1462
0
                        xsLen = fWSNormalizeBuf.getLen();
1463
0
                    }
1464
0
                    else {
1465
0
                        xsNormalized = rawBuf;
1466
0
                        xsLen = len;
1467
0
                    }
1468
1469
                    // tell the schema validation about the character data for checkContent later
1470
0
                    schemaValidator->setDatatypeBuffer(xsNormalized);
1471
1472
                    // call all active identity constraints
1473
0
                    if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
1474
0
                        fContent.append(xsNormalized, xsLen);
1475
0
                    }
1476
1477
0
                    if (fDocHandler) {
1478
0
                        if (fNormalizeData) {
1479
0
                            fDocHandler->docCharacters(xsNormalized, xsLen, false);
1480
0
                        }
1481
0
                        else {
1482
0
                            fDocHandler->docCharacters(rawBuf, len, false);
1483
0
                        }
1484
0
                    }
1485
0
                }
1486
2.50M
            }
1487
40.3k
            else
1488
40.3k
            {
1489
40.3k
                fValidator->emitError(XMLValid::NoCharDataInCM);
1490
40.3k
                if(fGrammarType == Grammar::SchemaGrammarType)
1491
0
                {
1492
0
                    if (getPSVIHandler())
1493
0
                    {
1494
                        // REVISIT:
1495
                        // PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID);
1496
0
                    }
1497
0
                }
1498
40.3k
            }
1499
2.54M
        }
1500
2.90M
    }
1501
4.17k
    else
1502
4.17k
    {
1503
        // call all active identity constraints
1504
4.17k
        if (fGrammarType == Grammar::SchemaGrammarType) {
1505
1506
0
            if (toCheckIdentityConstraint() && fICHandler->getMatcherCount())
1507
0
                fContent.append(toSend.getRawBuffer(), toSend.getLen());
1508
0
        }
1509
1510
        // Always assume its just char data if not validating
1511
4.17k
        if (fDocHandler)
1512
0
            fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
1513
4.17k
    }
1514
1515
    // Reset buffer
1516
2.91M
    toSend.reset();
1517
2.91M
}
1518
1519
1520
1521
//  This method is called with a key/value string pair that represents an
1522
//  xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the
1523
//  current top of the element stack based on this data. We know that when
1524
//  we get here, that it is one of these forms, so we don't bother confirming
1525
//  it.
1526
//
1527
//  But we have to ensure
1528
//      1. xxx is not xmlns
1529
//      2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
1530
//      3. yyy is not XMLUni::fgXMLNSURIName
1531
//      4. if xxx is not null, then yyy cannot be an empty string.
1532
void IGXMLScanner::updateNSMap(const  XMLCh* const    attrName
1533
                            , const XMLCh* const    attrValue)
1534
0
{
1535
0
    updateNSMap(attrName, attrValue, XMLString::indexOf(attrName, chColon));
1536
0
}
1537
1538
void IGXMLScanner::updateNSMap(const  XMLCh* const    attrName
1539
                            , const XMLCh* const    attrValue
1540
                            , const int colonOfs)
1541
0
{
1542
    // We need a buffer to normalize the attribute value into
1543
0
    XMLBufBid bbNormal(&fBufMgr);
1544
0
    XMLBuffer& normalBuf = bbNormal.getBuffer();
1545
1546
    //  Normalize the value into the passed buffer. In this case, we don't
1547
    //  care about the return value. An error was issued for the error, which
1548
    //  is all we care about here.
1549
0
    normalizeAttRawValue(attrName, attrValue, normalBuf);
1550
0
    XMLCh* namespaceURI = normalBuf.getRawBuffer();
1551
1552
    //  We either have the default prefix (""), or we point it into the attr
1553
    //  name parameter. Note that the xmlns is not the prefix we care about
1554
    //  here. To us, the 'prefix' is really the local part of the attrName
1555
    //  parameter.
1556
    //
1557
    //  Check 1. xxx is not xmlns
1558
    //        2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
1559
    //        3. yyy is not XMLUni::fgXMLNSURIName
1560
    //        4. if xxx is not null, then yyy cannot be an empty string.
1561
0
    const XMLCh* prefPtr = XMLUni::fgZeroLenString;
1562
0
    if (colonOfs != -1) {
1563
0
        prefPtr = &attrName[colonOfs + 1];
1564
1565
0
        if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString))
1566
0
            emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
1567
0
        else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) {
1568
0
            if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
1569
0
                emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
1570
0
        }
1571
1572
0
        if (!namespaceURI)
1573
0
            emitError(XMLErrs::NoEmptyStrNamespace, attrName);
1574
0
        else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
1575
0
            emitError(XMLErrs::NoEmptyStrNamespace, attrName);
1576
0
    }
1577
1578
0
    if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
1579
0
        emitError(XMLErrs::NoUseOfxmlnsURI);
1580
0
    else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) {
1581
0
        if (!XMLString::equals(prefPtr, XMLUni::fgXMLString))
1582
0
            emitError(XMLErrs::XMLURINotMatchXMLPrefix);
1583
0
    }
1584
1585
    //  Ok, we have to get the unique id for the attribute value, which is the
1586
    //  URI that this value should be mapped to. The validator has the
1587
    //  namespace string pool, so we ask him to find or add this new one. Then
1588
    //  we ask the element stack to add this prefix to URI Id mapping.
1589
0
    fElemStack.addPrefix
1590
0
    (
1591
0
        prefPtr
1592
0
        , fURIStringPool->addOrFind(namespaceURI)
1593
0
    );
1594
0
}
1595
1596
void IGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount)
1597
0
{
1598
    //  Make an initial pass through the list and find any xmlns attributes or
1599
    //  schema attributes.
1600
    //  When we find one, send it off to be used to update the element stack's
1601
    //  namespace mappings.
1602
0
    for (XMLSize_t index = 0; index < attCount; index++)
1603
0
    {
1604
        // each attribute has the prefix:suffix="value"
1605
0
        const KVStringPair* curPair = fRawAttrList->elementAt(index);
1606
0
        const XMLCh* rawPtr = curPair->getKey();
1607
1608
        //  If either the key begins with "xmlns:" or its just plain
1609
        //  "xmlns", then use it to update the map.
1610
0
        if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
1611
0
        ||  XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
1612
0
        {
1613
0
            const XMLCh* valuePtr = curPair->getValue();
1614
1615
0
            updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]);
1616
1617
            // if the schema URI is seen in the the valuePtr, set the boolean seeXsi
1618
0
            if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) {
1619
0
                fSeeXsi = true;
1620
0
            }
1621
0
        }
1622
0
    }
1623
1624
    // walk through the list again to deal with "xsi:...."
1625
0
    if (fDoSchema && fSeeXsi)
1626
0
    {
1627
0
        for (XMLSize_t index = 0; index < attCount; index++)
1628
0
        {
1629
            // each attribute has the prefix:suffix="value"
1630
0
            const KVStringPair* curPair = fRawAttrList->elementAt(index);
1631
0
            const XMLCh* rawPtr = curPair->getKey();
1632
0
            const XMLCh* prefPtr = XMLUni::fgZeroLenString;
1633
0
            int   colonInd = fRawAttrColonList[index];
1634
1635
0
            if (colonInd != -1) {
1636
1637
0
                fURIBuf.set(rawPtr, colonInd);
1638
0
                prefPtr = fURIBuf.getRawBuffer();
1639
0
            }
1640
1641
            // if schema URI has been seen, scan for the schema location and uri
1642
            // and resolve the schema grammar
1643
0
            if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
1644
1645
0
                const XMLCh* valuePtr = curPair->getValue();
1646
0
                const XMLCh* suffPtr = &rawPtr[colonInd + 1];
1647
1648
0
                if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
1649
0
                    parseSchemaLocation(valuePtr);
1650
0
                else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
1651
0
                    resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString);
1652
0
            }
1653
0
        }
1654
1655
        // do it another time, as xsi:type and xsi:nill only work if the schema grammar has been already
1656
        // loaded (JIRA XERCESC-1937)
1657
0
        for (XMLSize_t index = 0; index < attCount; index++)
1658
0
        {
1659
0
            const KVStringPair* curPair = fRawAttrList->elementAt(index);
1660
0
            const XMLCh* rawPtr = curPair->getKey();
1661
0
            const XMLCh* prefPtr = XMLUni::fgZeroLenString;
1662
0
            int   colonInd = fRawAttrColonList[index];
1663
1664
0
            if (colonInd != -1) {
1665
1666
0
                fURIBuf.set(rawPtr, colonInd);
1667
0
                prefPtr = fURIBuf.getRawBuffer();
1668
0
            }
1669
1670
            // scan for schema type
1671
0
            if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
1672
1673
0
                const XMLCh* valuePtr = curPair->getValue();
1674
0
                const XMLCh*  suffPtr = &rawPtr[colonInd + 1];
1675
1676
0
                if(XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE) ||
1677
0
                   XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
1678
0
                {
1679
0
                    if (!fValidator || !fValidator->handlesSchema())
1680
0
                    {
1681
                        // If we are in the DTD mode, try to switch to the Schema
1682
                        // mode. For that we need to find any XML Schema grammar
1683
                        // that we can switch to. Such a grammar can only come
1684
                        // from the cache (if it came from the schemaLocation
1685
                        // attribute, we would be in the Schema mode already).
1686
                        //
1687
0
                        XMLGrammarPool* pool = fGrammarResolver->getGrammarPool ();
1688
0
                        RefHashTableOfEnumerator<Grammar> i = pool->getGrammarEnumerator ();
1689
1690
0
                        while (i.hasMoreElements ())
1691
0
                        {
1692
0
                            Grammar& gr (i.nextElement ());
1693
1694
0
                            if (gr.getGrammarType () == Grammar::SchemaGrammarType)
1695
0
                            {
1696
0
                                switchGrammar (gr.getTargetNamespace ());
1697
0
                                break;
1698
0
                            }
1699
0
                        }
1700
0
                    }
1701
1702
0
                    if( fValidator && fValidator->handlesSchema() )
1703
0
                    {
1704
0
                        if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
1705
0
                        {
1706
0
                            XMLBufBid bbXsi(&fBufMgr);
1707
0
                            XMLBuffer& fXsiType = bbXsi.getBuffer();
1708
1709
                            // normalize the attribute according to schema whitespace facet
1710
0
                            DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
1711
0
                            normalizeAttRawValue(SchemaSymbols::fgXSI_TYPE, valuePtr, fXsiType);
1712
0
                            ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, fXsiType.getRawBuffer(), fXsiType, true);
1713
0
                            if (!fXsiType.isEmpty()) {
1714
0
                                int colonPos = -1;
1715
0
                                unsigned int uriId = resolveQName (
1716
0
                                      fXsiType.getRawBuffer()
1717
0
                                    , fPrefixBuf
1718
0
                                    , ElemStack::Mode_Element
1719
0
                                    , colonPos
1720
0
                                );
1721
0
                                ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId);
1722
0
                            }
1723
0
                        }
1724
0
                        else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
1725
0
                        {
1726
                            // normalize the attribute according to schema whitespace facet
1727
0
                            XMLBufBid bbXsi(&fBufMgr);
1728
0
                            XMLBuffer& fXsiNil = bbXsi.getBuffer();
1729
1730
0
                            DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
1731
0
                            normalizeAttRawValue(SchemaSymbols::fgATT_NILL, valuePtr, fXsiNil);
1732
0
                            ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, fXsiNil.getRawBuffer(), fXsiNil, true);
1733
0
                            if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_TRUE))
1734
0
                                ((SchemaValidator*)fValidator)->setNillable(true);
1735
0
                            else if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_FALSE))
1736
0
                                ((SchemaValidator*)fValidator)->setNillable(false);
1737
0
                            else
1738
0
                                emitError(XMLErrs::InvalidAttValue, fXsiNil.getRawBuffer(), valuePtr);
1739
0
                        }
1740
0
                    }
1741
0
                }
1742
0
            }
1743
0
        }
1744
0
    }
1745
1746
0
}
1747
1748
void IGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema)
1749
0
{
1750
0
    XMLCh* locStr = XMLString::replicate(schemaLocationStr, fMemoryManager);
1751
0
    ArrayJanitor<XMLCh> janLoc(locStr, fMemoryManager);
1752
1753
0
    processSchemaLocation(locStr);
1754
0
    XMLSize_t size = fLocationPairs->size();
1755
1756
0
    if (size % 2 != 0 ) {
1757
0
        emitError(XMLErrs::BadSchemaLocation);
1758
0
    } else {
1759
        // We need a buffer to normalize the attribute value into
1760
0
        XMLBuffer normalBuf(1023, fMemoryManager);
1761
0
        for(XMLSize_t i=0; i<size; i=i+2) {
1762
0
            normalizeAttRawValue(SchemaSymbols::fgXSI_SCHEMALOCATION, fLocationPairs->elementAt(i), normalBuf);
1763
0
            resolveSchemaGrammar(fLocationPairs->elementAt(i+1), normalBuf.getRawBuffer(), ignoreLoadSchema);
1764
0
        }
1765
0
    }
1766
0
}
1767
1768
0
void IGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema) {
1769
1770
0
    Grammar* grammar = 0;
1771
1772
0
    {
1773
0
        XMLSchemaDescriptionImpl    theSchemaDescription(uri, fMemoryManager);
1774
0
        theSchemaDescription.setLocationHints(loc);
1775
0
        grammar = fGrammarResolver->getGrammar(&theSchemaDescription);
1776
0
    }
1777
1778
    // If multi-import is enabled, make sure the existing grammar came
1779
    // from the import directive. Otherwise we may end up reloading
1780
    // the same schema that came from the external grammar pool. Ideally,
1781
    // we would move fSchemaInfoList to XMLGrammarPool so that it survives
1782
    // the destruction of the scanner in which case we could rely on the
1783
    // same logic we use to weed out duplicate schemas below.
1784
    //
1785
0
    if (!grammar ||
1786
0
        grammar->getGrammarType() == Grammar::DTDGrammarType ||
1787
0
        (getHandleMultipleImports() &&
1788
0
         ((XMLSchemaDescription*)grammar->getGrammarDescription())->
1789
0
         getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
1790
0
    {
1791
0
      if (fLoadSchema || ignoreLoadSchema)
1792
0
      {
1793
0
        XSDDOMParser parser(0, fMemoryManager, 0);
1794
1795
0
        parser.setValidationScheme(XercesDOMParser::Val_Never);
1796
0
        parser.setDoNamespaces(true);
1797
0
        parser.setUserEntityHandler(fEntityHandler);
1798
0
        parser.setUserErrorReporter(fErrorReporter);
1799
1800
        //Normalize loc
1801
0
        XMLBufBid nnSys(&fBufMgr);
1802
0
        XMLBuffer& normalizedSysId = nnSys.getBuffer();
1803
0
        XMLString::removeChar(loc, 0xFFFF, normalizedSysId);
1804
0
        const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
1805
1806
        // Create a buffer for expanding the system id
1807
0
        XMLBufBid bbSys(&fBufMgr);
1808
0
        XMLBuffer& expSysId = bbSys.getBuffer();
1809
1810
        //  Allow the entity handler to expand the system id if they choose
1811
        //  to do so.
1812
0
        InputSource* srcToFill = 0;
1813
0
        if (fEntityHandler)
1814
0
        {
1815
0
            if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
1816
0
                expSysId.set(normalizedURI);
1817
1818
0
            ReaderMgr::LastExtEntityInfo lastInfo;
1819
0
            fReaderMgr.getLastExtEntityInfo(lastInfo);
1820
0
            XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar,
1821
0
                            expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId,
1822
0
                            &fReaderMgr);
1823
0
            srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
1824
0
        }
1825
0
        else
1826
0
        {
1827
0
            expSysId.set(normalizedURI);
1828
0
        }
1829
1830
        //  If they didn't create a source via the entity handler, then we
1831
        //  have to create one on our own.
1832
0
        if (!srcToFill)
1833
0
        {
1834
0
            if (fDisableDefaultEntityResolution)
1835
0
                return;
1836
1837
0
            ReaderMgr::LastExtEntityInfo lastInfo;
1838
0
            fReaderMgr.getLastExtEntityInfo(lastInfo);
1839
1840
0
            XMLURL urlTmp(fMemoryManager);
1841
0
            if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
1842
0
                (urlTmp.isRelative()))
1843
0
            {
1844
0
                if (!fStandardUriConformant)
1845
0
                {
1846
0
                    XMLBufBid  ddSys(&fBufMgr);
1847
0
                    XMLBuffer& resolvedSysId = ddSys.getBuffer();
1848
0
                    XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
1849
1850
0
                    srcToFill = new (fMemoryManager) LocalFileInputSource
1851
0
                    (
1852
0
                        lastInfo.systemId
1853
0
                        , resolvedSysId.getRawBuffer()
1854
0
                        , fMemoryManager
1855
0
                    );
1856
0
                }
1857
0
                else
1858
0
                    ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
1859
0
            }
1860
0
            else
1861
0
            {
1862
0
                if (fStandardUriConformant && urlTmp.hasInvalidChar())
1863
0
                    ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
1864
0
                srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
1865
0
            }
1866
0
        }
1867
1868
        // Put a janitor on the input source
1869
0
        Janitor<InputSource> janSrc(srcToFill);
1870
1871
        // Check if this exact schema has already been seen.
1872
        //
1873
0
        const XMLCh* sysId = srcToFill->getSystemId();
1874
0
        unsigned int uriId = (uri && *uri) ? fURIStringPool->addOrFind(uri) : fEmptyNamespaceId;
1875
0
        SchemaInfo* importSchemaInfo = 0;
1876
1877
0
        if (fUseCachedGrammar)
1878
0
          importSchemaInfo = fCachedSchemaInfoList->get(sysId, uriId);
1879
1880
0
        if (!importSchemaInfo && !fToCacheGrammar)
1881
0
          importSchemaInfo = fSchemaInfoList->get(sysId, uriId);
1882
1883
0
        if (importSchemaInfo)
1884
0
        {
1885
          // We haven't added any new grammars so it is safe to just
1886
          // return.
1887
          //
1888
0
          return;
1889
0
        }
1890
1891
        // Should just issue warning if the schema is not found
1892
0
        bool flag = srcToFill->getIssueFatalErrorIfNotFound();
1893
0
        srcToFill->setIssueFatalErrorIfNotFound(false);
1894
1895
0
        parser.parse(*srcToFill);
1896
1897
        // Reset the InputSource
1898
0
        srcToFill->setIssueFatalErrorIfNotFound(flag);
1899
1900
0
        if (parser.getSawFatal() && fExitOnFirstFatal)
1901
0
            emitError(XMLErrs::SchemaScanFatalError);
1902
1903
0
        DOMDocument* document = parser.getDocument(); //Our Grammar
1904
1905
0
        if (document != 0) {
1906
1907
0
            DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
1908
0
            if (root != 0)
1909
0
            {
1910
0
                const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
1911
0
                bool newGrammar = false;
1912
0
                if (!XMLString::equals(newUri, uri)) {
1913
0
                    if (fValidate || fValScheme == Val_Auto) {
1914
0
                        fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri);
1915
0
                    }
1916
1917
0
                    grammar = fGrammarResolver->getGrammar(newUri);
1918
0
                    newGrammar = true;
1919
0
                }
1920
1921
0
                if (!grammar ||
1922
0
                    grammar->getGrammarType() == Grammar::DTDGrammarType ||
1923
0
                    (getHandleMultipleImports() &&
1924
0
                     ((XMLSchemaDescription*)grammar->getGrammarDescription())->
1925
0
                     getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
1926
0
                {
1927
                    // If we switched namespace URI, recheck the schema info.
1928
                    //
1929
0
                    if (newGrammar)
1930
0
                    {
1931
0
                      unsigned int newUriId = (newUri && *newUri) ? fURIStringPool->addOrFind(newUri) : fEmptyNamespaceId;
1932
1933
0
                      if (fUseCachedGrammar)
1934
0
                        importSchemaInfo = fCachedSchemaInfoList->get(sysId, newUriId);
1935
1936
0
                      if (!importSchemaInfo && !fToCacheGrammar)
1937
0
                        importSchemaInfo = fSchemaInfoList->get(sysId, newUriId);
1938
1939
0
                      if (importSchemaInfo)
1940
0
                        return;
1941
0
                    }
1942
1943
                    //  Since we have seen a grammar, set our validation flag
1944
                    //  at this point if the validation scheme is auto
1945
0
                    if (fValScheme == Val_Auto && !fValidate) {
1946
0
                        fValidate = true;
1947
0
                        fElemStack.setValidationFlag(fValidate);
1948
0
                    }
1949
1950
                    // we have seen a schema, so set up the fValidator as fSchemaValidator
1951
0
                    if (!fValidator->handlesSchema())
1952
0
                    {
1953
0
                        if (fValidatorFromUser) {
1954
                            // the fValidator is from user
1955
0
                            ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
1956
0
                        }
1957
0
                        else {
1958
0
                            fValidator = fSchemaValidator;
1959
0
                        }
1960
0
                    }
1961
1962
0
                    bool grammarFound = grammar &&
1963
0
                      grammar->getGrammarType() == Grammar::SchemaGrammarType;
1964
1965
0
                    SchemaGrammar* schemaGrammar;
1966
1967
0
                    if (grammarFound) {
1968
0
                      schemaGrammar = (SchemaGrammar*) grammar;
1969
0
                    }
1970
0
                    else {
1971
0
                      schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
1972
0
                    }
1973
1974
0
                    XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
1975
0
                    gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
1976
0
                    gramDesc->setLocationHints(sysId);
1977
1978
0
                    TraverseSchema traverseSchema
1979
0
                    (
1980
0
                        root
1981
0
                        , fURIStringPool
1982
0
                        , schemaGrammar
1983
0
                        , fGrammarResolver
1984
0
                        , fUseCachedGrammar ? fCachedSchemaInfoList : fSchemaInfoList
1985
0
                        , fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList
1986
0
                        , this
1987
0
                        , sysId
1988
0
                        , fEntityHandler
1989
0
                        , fErrorReporter
1990
0
                        , fMemoryManager
1991
0
                        , grammarFound
1992
0
                    );
1993
1994
                    // Reset the now invalid schema roots in the collected
1995
                    // schema info entries.
1996
                    //
1997
0
                    {
1998
0
                      RefHash2KeysTableOfEnumerator<SchemaInfo> i (
1999
0
                        fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList);
2000
2001
0
                      while (i.hasMoreElements ())
2002
0
                        i.nextElement().resetRoot ();
2003
0
                    }
2004
2005
0
                    if (fGrammarType == Grammar::DTDGrammarType) {
2006
0
                        fGrammar = schemaGrammar;
2007
0
                        fGrammarType = Grammar::SchemaGrammarType;
2008
0
                        fValidator->setGrammar(fGrammar);
2009
0
                    }
2010
2011
0
                    if (fValidate) {
2012
                        //  validate the Schema scan so far
2013
0
                        fValidator->preContentValidation(false);
2014
0
                    }
2015
0
                }
2016
0
            }
2017
0
        }
2018
0
      }
2019
0
    }
2020
0
    else
2021
0
    {
2022
        //  Since we have seen a grammar, set our validation flag
2023
        //  at this point if the validation scheme is auto
2024
0
        if (fValScheme == Val_Auto && !fValidate) {
2025
0
            fValidate = true;
2026
0
            fElemStack.setValidationFlag(fValidate);
2027
0
        }
2028
2029
        // we have seen a schema, so set up the fValidator as fSchemaValidator
2030
0
        if (!fValidator->handlesSchema())
2031
0
        {
2032
0
            if (fValidatorFromUser) {
2033
                // the fValidator is from user
2034
0
                ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
2035
0
            }
2036
0
            else {
2037
0
                fValidator = fSchemaValidator;
2038
0
            }
2039
0
        }
2040
2041
0
        if (fGrammarType == Grammar::DTDGrammarType) {
2042
0
            fGrammar = grammar;
2043
0
            fGrammarType = Grammar::SchemaGrammarType;
2044
0
            fValidator->setGrammar(fGrammar);
2045
0
        }
2046
0
    }
2047
2048
    // fModel may need updating:
2049
0
    if(getPSVIHandler())
2050
0
        fModel = fGrammarResolver->getXSModel();
2051
0
}
2052
2053
InputSource* IGXMLScanner::resolveSystemId(const XMLCh* const sysId
2054
                                          ,const XMLCh* const pubId)
2055
0
{
2056
    //Normalize sysId
2057
0
    XMLBufBid nnSys(&fBufMgr);
2058
0
    XMLBuffer& normalizedSysId = nnSys.getBuffer();
2059
0
    XMLString::removeChar(sysId, 0xFFFF, normalizedSysId);
2060
0
    const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
2061
2062
    // Create a buffer for expanding the system id
2063
0
    XMLBufBid bbSys(&fBufMgr);
2064
0
    XMLBuffer& expSysId = bbSys.getBuffer();
2065
2066
    //  Allow the entity handler to expand the system id if they choose
2067
    //  to do so.
2068
0
    InputSource* srcToFill = 0;
2069
0
    if (fEntityHandler)
2070
0
    {
2071
0
        if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
2072
0
            expSysId.set(normalizedURI);
2073
2074
0
        ReaderMgr::LastExtEntityInfo lastInfo;
2075
0
        fReaderMgr.getLastExtEntityInfo(lastInfo);
2076
0
        XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
2077
0
                              expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId,
2078
0
                              &fReaderMgr);
2079
0
        srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
2080
0
    }
2081
0
    else
2082
0
    {
2083
0
        expSysId.set(normalizedURI);
2084
0
    }
2085
2086
    //  If they didn't create a source via the entity handler, then we
2087
    //  have to create one on our own.
2088
0
    if (!srcToFill)
2089
0
    {
2090
0
        if (fDisableDefaultEntityResolution)
2091
0
            return srcToFill;
2092
2093
0
        ReaderMgr::LastExtEntityInfo lastInfo;
2094
0
        fReaderMgr.getLastExtEntityInfo(lastInfo);
2095
2096
0
        XMLURL urlTmp(fMemoryManager);
2097
0
        if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
2098
0
            (urlTmp.isRelative()))
2099
0
        {
2100
0
            if (!fStandardUriConformant)
2101
0
            {
2102
0
                XMLBufBid  ddSys(&fBufMgr);
2103
0
                XMLBuffer& resolvedSysId = ddSys.getBuffer();
2104
0
                XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
2105
2106
0
                srcToFill = new (fMemoryManager) LocalFileInputSource
2107
0
                (
2108
0
                    lastInfo.systemId
2109
0
                    , resolvedSysId.getRawBuffer()
2110
0
                    , fMemoryManager
2111
0
                );
2112
0
            }
2113
0
            else
2114
0
                ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
2115
0
        }
2116
0
        else
2117
0
        {
2118
0
            if (fStandardUriConformant && urlTmp.hasInvalidChar())
2119
0
                ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
2120
0
            srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
2121
0
        }
2122
0
    }
2123
2124
0
    return srcToFill;
2125
0
}
2126
2127
2128
// ---------------------------------------------------------------------------
2129
//  IGXMLScanner: Private grammar preparsing methods
2130
// ---------------------------------------------------------------------------
2131
Grammar* IGXMLScanner::loadXMLSchemaGrammar(const InputSource& src,
2132
                                            const bool toCache)
2133
0
{
2134
   // Reset the validators
2135
0
    fSchemaValidator->reset();
2136
0
    fSchemaValidator->setErrorReporter(fErrorReporter);
2137
0
    fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
2138
0
    fSchemaValidator->setGrammarResolver(fGrammarResolver);
2139
2140
0
    if (fValidatorFromUser)
2141
0
        fValidator->reset();
2142
2143
0
    if (!fValidator->handlesSchema()) {
2144
0
        if (fValidatorFromUser && fValidate)
2145
0
            ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
2146
0
        else {
2147
0
            fValidator = fSchemaValidator;
2148
0
        }
2149
0
    }
2150
2151
0
    XSDDOMParser parser(0, fMemoryManager, 0);
2152
2153
0
    parser.setValidationScheme(XercesDOMParser::Val_Never);
2154
0
    parser.setDoNamespaces(true);
2155
0
    parser.setUserEntityHandler(fEntityHandler);
2156
0
    parser.setUserErrorReporter(fErrorReporter);
2157
2158
    // Should just issue warning if the schema is not found
2159
0
    bool flag = src.getIssueFatalErrorIfNotFound();
2160
0
    ((InputSource&) src).setIssueFatalErrorIfNotFound(false);
2161
2162
0
    parser.parse(src);
2163
2164
    // Reset the InputSource
2165
0
    ((InputSource&) src).setIssueFatalErrorIfNotFound(flag);
2166
2167
0
    if (parser.getSawFatal() && fExitOnFirstFatal)
2168
0
        emitError(XMLErrs::SchemaScanFatalError);
2169
2170
0
    DOMDocument* document = parser.getDocument(); //Our Grammar
2171
2172
0
    if (document != 0) {
2173
2174
0
        DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
2175
0
        if (root != 0)
2176
0
        {
2177
0
            const XMLCh* nsUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
2178
0
            Grammar* grammar = fGrammarResolver->getGrammar(nsUri);
2179
2180
            // Check if this exact schema has already been seen.
2181
            //
2182
0
            const XMLCh* sysId = src.getSystemId();
2183
0
            SchemaInfo* importSchemaInfo = 0;
2184
2185
0
            if (grammar)
2186
0
            {
2187
0
              if (nsUri && *nsUri)
2188
0
                importSchemaInfo = fCachedSchemaInfoList->get(sysId, fURIStringPool->addOrFind(nsUri));
2189
0
              else
2190
0
                importSchemaInfo = fCachedSchemaInfoList->get(sysId, fEmptyNamespaceId);
2191
0
            }
2192
2193
0
            if (!importSchemaInfo)
2194
0
            {
2195
0
              bool grammarFound = grammar &&
2196
0
                grammar->getGrammarType() == Grammar::SchemaGrammarType &&
2197
0
                getHandleMultipleImports();
2198
2199
0
              SchemaGrammar* schemaGrammar;
2200
2201
0
              if (grammarFound)
2202
0
                schemaGrammar = (SchemaGrammar*) grammar;
2203
0
              else
2204
0
                schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
2205
2206
0
              XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
2207
0
              gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
2208
0
              gramDesc->setLocationHints(sysId);
2209
2210
0
              TraverseSchema traverseSchema
2211
0
                (
2212
0
                  root
2213
0
                  , fURIStringPool
2214
0
                  , schemaGrammar
2215
0
                  , fGrammarResolver
2216
0
                  , fCachedSchemaInfoList
2217
0
                  , toCache ? fCachedSchemaInfoList : fSchemaInfoList
2218
0
                  , this
2219
0
                  , sysId
2220
0
                  , fEntityHandler
2221
0
                  , fErrorReporter
2222
0
                  , fMemoryManager
2223
0
                  , grammarFound
2224
0
                );
2225
2226
0
              grammar = schemaGrammar;
2227
2228
              // Reset the now invalid schema roots in the collected
2229
              // schema info entries.
2230
              //
2231
0
              {
2232
0
                RefHash2KeysTableOfEnumerator<SchemaInfo> i (
2233
0
                  toCache ? fCachedSchemaInfoList : fSchemaInfoList);
2234
2235
0
                while (i.hasMoreElements ())
2236
0
                  i.nextElement().resetRoot ();
2237
0
              }
2238
0
            }
2239
2240
0
            if (fValidate) {
2241
              //  validate the Schema scan so far
2242
0
              fValidator->setGrammar(grammar);
2243
0
              fValidator->preContentValidation(false);
2244
0
            }
2245
2246
0
            if (toCache) {
2247
0
              fGrammarResolver->cacheGrammars();
2248
0
            }
2249
2250
0
            if(getPSVIHandler())
2251
0
              fModel = fGrammarResolver->getXSModel();
2252
2253
0
            return grammar;
2254
0
        }
2255
0
    }
2256
2257
0
    return 0;
2258
0
}
2259
2260
2261
2262
// ---------------------------------------------------------------------------
2263
//  IGXMLScanner: Private parsing methods
2264
// ---------------------------------------------------------------------------
2265
2266
//  This method is called to do a raw scan of an attribute value. It does not
2267
//  do normalization (since we don't know their types yet.) It just scans the
2268
//  value and does entity expansion.
2269
//
2270
//  End of entity's must be dealt with here. During DTD scan, they can come
2271
//  from external entities. During content, they can come from any entity.
2272
//  We just eat the end of entity and continue with our scan until we come
2273
//  to the closing quote. If an unterminated value causes us to go through
2274
//  subsequent entities, that will cause errors back in the calling code,
2275
//  but there's little we can do about it here.
2276
bool IGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill)
2277
0
{
2278
    // Reset the target buffer
2279
0
    toFill.reset();
2280
2281
    // Get the next char which must be a single or double quote
2282
0
    XMLCh quoteCh;
2283
0
    if (!fReaderMgr.skipIfQuote(quoteCh))
2284
0
        return false;
2285
2286
    //  We have to get the current reader because we have to ignore closing
2287
    //  quotes until we hit the same reader again.
2288
0
    const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
2289
2290
    //  Loop until we get the attribute value. Note that we use a double
2291
    //  loop here to avoid the setup/teardown overhead of the exception
2292
    //  handler on every round.
2293
0
    while (true)
2294
0
    {
2295
0
        try
2296
0
        {
2297
0
            while(true)
2298
0
            {
2299
0
                XMLCh nextCh = fReaderMgr.getNextChar();
2300
2301
0
                if (nextCh != quoteCh)
2302
0
                {
2303
0
                    if (nextCh != chAmpersand)
2304
0
                    {
2305
0
                        if ((nextCh < 0xD800) || (nextCh > 0xDFFF))
2306
0
                        {
2307
                            // Its got to at least be a valid XML character
2308
0
                            if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
2309
0
                            {
2310
0
                                if (nextCh == 0)
2311
0
                                    ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
2312
2313
0
                                XMLCh tmpBuf[9];
2314
0
                                XMLString::binToText
2315
0
                                (
2316
0
                                    nextCh
2317
0
                                    , tmpBuf
2318
0
                                    , 8
2319
0
                                    , 16
2320
0
                                    , fMemoryManager
2321
0
                                );
2322
0
                                emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
2323
0
                            }
2324
0
                        } else // its a surrogate
2325
0
                        {
2326
                            // Deal with surrogate pairs
2327
2328
                            //  we expect a a leading surrogate.
2329
0
                            if (nextCh <= 0xDBFF)
2330
0
                            {
2331
0
                                toFill.append(nextCh);
2332
2333
                                //  process the trailing surrogate
2334
0
                                nextCh = fReaderMgr.getNextChar();
2335
2336
                                //  it should be a trailing surrogate.
2337
0
                                if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
2338
0
                                {
2339
0
                                    emitError(XMLErrs::Expected2ndSurrogateChar);
2340
0
                                }
2341
0
                            } else
2342
0
                            {
2343
                                //  Its a trailing surrogate, but we are not expecting it
2344
0
                                emitError(XMLErrs::Unexpected2ndSurrogateChar);
2345
0
                            }
2346
0
                        }
2347
0
                    } else // its a chAmpersand
2348
0
                    {
2349
                        //  Check for an entity ref . We ignore the empty flag in
2350
                        //  this one.
2351
2352
0
                        bool    escaped;
2353
0
                        XMLCh   firstCh;
2354
0
                        XMLCh   secondCh
2355
0
                            ;
2356
                        // If it was not returned directly, then jump back up
2357
0
                        if (scanEntityRef(true, firstCh, secondCh, escaped) == EntityExp_Returned)
2358
0
                        {
2359
                            //  If it was escaped, then put in a 0xFFFF value. This will
2360
                            //  be used later during validation and normalization of the
2361
                            //  value to know that the following character was via an
2362
                            //  escape char.
2363
0
                            if (escaped)
2364
0
                                toFill.append(0xFFFF);
2365
2366
0
                            toFill.append(firstCh);
2367
0
                            if (secondCh)
2368
0
                                toFill.append(secondCh);
2369
0
                        }
2370
0
                        continue;
2371
0
                    }
2372
0
                } else // its a quoteCh
2373
0
                {
2374
                    //  Check for our ending quote. It has to be in the same entity
2375
                    //  as where we started. Quotes in nested entities are ignored.
2376
2377
0
                    if (curReader == fReaderMgr.getCurrentReaderNum())
2378
0
                    {
2379
0
                        return true;
2380
0
                    }
2381
2382
                    // Watch for spillover into a previous entity
2383
0
                    if (curReader > fReaderMgr.getCurrentReaderNum())
2384
0
                    {
2385
0
                        emitError(XMLErrs::PartialMarkupInEntity);
2386
0
                        return false;
2387
0
                    }
2388
0
                }
2389
2390
                // add it to the buffer
2391
0
                toFill.append(nextCh);
2392
2393
0
            }
2394
0
        }
2395
0
        catch(const EndOfEntityException&)
2396
0
        {
2397
            // Just eat it and continue.
2398
0
        }
2399
0
    }
2400
0
    return true;
2401
0
}
2402
2403
2404
bool IGXMLScanner::scanAttValue(  const   XMLAttDef* const    attDef
2405
                                  , const XMLCh* const        attrName
2406
                                  ,       XMLBuffer&          toFill)
2407
1.43M
{
2408
1.43M
    enum States
2409
1.43M
    {
2410
1.43M
        InWhitespace
2411
1.43M
        , InContent
2412
1.43M
    };
2413
2414
    // Get the type and name
2415
1.43M
    const XMLAttDef::AttTypes type = (attDef)
2416
1.43M
                ?attDef->getType()
2417
1.43M
                :XMLAttDef::CData;
2418
2419
    // Reset the target buffer
2420
1.43M
    toFill.reset();
2421
2422
    // Get the next char which must be a single or double quote
2423
1.43M
    XMLCh quoteCh;
2424
1.43M
    if (!fReaderMgr.skipIfQuote(quoteCh))
2425
125
        return false;
2426
2427
    //  We have to get the current reader because we have to ignore closing
2428
    //  quotes until we hit the same reader again.
2429
1.43M
    const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
2430
2431
    // check to see if it's a tokenized type that is declared externally 
2432
1.43M
    bool  isAttTokenizedExternal = (attDef)
2433
1.43M
                                   ?attDef->isExternal() && (type == XMLAttDef::ID || 
2434
503k
                                                             type == XMLAttDef::IDRef || 
2435
502k
                                                             type == XMLAttDef::IDRefs || 
2436
202
                                                             type == XMLAttDef::Entity || 
2437
202
                                                             type == XMLAttDef::Entities || 
2438
202
                                                             type == XMLAttDef::NmToken || 
2439
202
                                                             type == XMLAttDef::NmTokens)
2440
1.43M
                                   :false;
2441
2442
    //  Loop until we get the attribute value. Note that we use a double
2443
    //  loop here to avoid the setup/teardown overhead of the exception
2444
    //  handler on every round.
2445
1.43M
    XMLCh   nextCh;
2446
1.43M
    XMLCh   secondCh = 0;
2447
1.43M
    States  curState = InContent;
2448
1.43M
    bool    firstNonWS = false;
2449
1.43M
    bool    gotLeadingSurrogate = false;
2450
1.43M
    bool    escaped;
2451
1.43M
    while (true)
2452
1.43M
    {
2453
1.43M
        try
2454
1.43M
        {
2455
99.1M
            while(true)
2456
99.1M
            {
2457
99.1M
                nextCh = fReaderMgr.getNextChar();
2458
2459
99.1M
                if (!nextCh)
2460
267
                    ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
2461
2462
                // Check for our ending quote in the same entity
2463
99.1M
                if (nextCh == quoteCh)
2464
1.47M
                {
2465
1.47M
                    if (curReader == fReaderMgr.getCurrentReaderNum())
2466
1.43M
                        return true;
2467
2468
                    // Watch for spillover into a previous entity
2469
37.5k
                    if (curReader > fReaderMgr.getCurrentReaderNum())
2470
2
                    {
2471
2
                        emitError(XMLErrs::PartialMarkupInEntity);
2472
2
                        return false;
2473
2
                    }
2474
37.5k
                }
2475
2476
                //  Check for an entity ref now, before we let it affect our
2477
                //  whitespace normalization logic below. We ignore the empty flag
2478
                //  in this one.
2479
97.6M
                escaped = false;
2480
97.6M
                if (nextCh == chAmpersand)
2481
120k
                {
2482
120k
                    if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
2483
98.4k
                    {
2484
98.4k
                        gotLeadingSurrogate = false;
2485
98.4k
                        continue;
2486
98.4k
                    }
2487
120k
                }
2488
97.5M
                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
2489
12.5k
                {
2490
                    // Deal with surrogate pairs
2491
                    //  Its a leading surrogate. If we already got one, then
2492
                    //  issue an error, else set leading flag to make sure that
2493
                    //  we look for a trailing next time.
2494
12.5k
                    if (gotLeadingSurrogate)
2495
4
                        emitError(XMLErrs::Expected2ndSurrogateChar);
2496
12.5k
                     else
2497
12.5k
                        gotLeadingSurrogate = true;
2498
12.5k
                }
2499
97.5M
                else
2500
97.5M
                {
2501
                    //  If its a trailing surrogate, make sure that we are
2502
                    //  prepared for that. Else, its just a regular char so make
2503
                    //  sure that we were not expected a trailing surrogate.
2504
97.5M
                    if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
2505
12.5k
                    {
2506
                        // Its trailing, so make sure we were expecting it
2507
12.5k
                        if (!gotLeadingSurrogate)
2508
4
                            emitError(XMLErrs::Unexpected2ndSurrogateChar);
2509
12.5k
                    }
2510
97.5M
                    else
2511
97.5M
                    {
2512
                        //  Its just a char, so make sure we were not expecting a
2513
                        //  trailing surrogate.
2514
97.5M
                        if (gotLeadingSurrogate)
2515
2
                            emitError(XMLErrs::Expected2ndSurrogateChar);
2516
2517
                        // Its got to at least be a valid XML character
2518
97.5M
                        if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
2519
26
                        {
2520
26
                            XMLCh tmpBuf[9];
2521
26
                            XMLString::binToText
2522
26
                            (
2523
26
                                nextCh
2524
26
                                , tmpBuf
2525
26
                                , 8
2526
26
                                , 16
2527
26
                                , fMemoryManager
2528
26
                            );
2529
26
                            emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
2530
26
                        }
2531
97.5M
                    }
2532
97.5M
                    gotLeadingSurrogate = false;
2533
97.5M
                }
2534
2535
                //  If its not escaped, then make sure its not a < character, which
2536
                //  is not allowed in attribute values.
2537
97.5M
                if (!escaped && (nextCh == chOpenAngle))
2538
36
                    emitError(XMLErrs::BracketInAttrValue, attrName);
2539
2540
                //  If the attribute is a CDATA type we do simple replacement of
2541
                //  tabs and new lines with spaces, if the character is not escaped
2542
                //  by way of a char ref.
2543
                //
2544
                //  Otherwise, we do the standard non-CDATA normalization of
2545
                //  compressing whitespace to single spaces and getting rid of leading
2546
                //  and trailing whitespace.
2547
97.5M
                if (type == XMLAttDef::CData)
2548
44.8M
                {
2549
44.8M
                    if (!escaped)
2550
44.8M
                    {
2551
44.8M
                        if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))
2552
1.11M
                        {
2553
                            // Check Validity Constraint for Standalone document declaration
2554
                            // XML 1.0, Section 2.9
2555
1.11M
                            if (fStandalone && fValidate && isAttTokenizedExternal)
2556
0
                            {
2557
                                // Can't have a standalone document declaration of "yes" if  attribute
2558
                                // values are subject to normalisation
2559
0
                                fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
2560
0
                            }
2561
1.11M
                            nextCh = chSpace;
2562
1.11M
                        }
2563
44.8M
                    }
2564
44.8M
                }
2565
52.7M
                else
2566
52.7M
                {
2567
52.7M
                    if (curState == InWhitespace)
2568
539k
                    {
2569
539k
                        if ((escaped && nextCh != chSpace) || !fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
2570
462k
                        {
2571
462k
                            if (firstNonWS)
2572
456k
                                toFill.append(chSpace);
2573
462k
                            curState = InContent;
2574
462k
                            firstNonWS = true;
2575
462k
                        }
2576
76.7k
                        else
2577
76.7k
                        {
2578
76.7k
                            continue;
2579
76.7k
                        }
2580
539k
                    }
2581
52.2M
                    else if (curState == InContent)
2582
52.2M
                    {
2583
52.2M
                        if ((nextCh == chSpace) ||
2584
52.0M
                            (fReaderMgr.getCurrentReader()->isWhitespace(nextCh) && !escaped))
2585
468k
                        {
2586
468k
                            curState = InWhitespace;
2587
2588
                            // Check Validity Constraint for Standalone document declaration
2589
                            // XML 1.0, Section 2.9
2590
468k
                            if (fStandalone && fValidate && isAttTokenizedExternal)
2591
6.06k
                            {
2592
6.06k
                                if (!firstNonWS || (nextCh != chSpace && fReaderMgr.lookingAtSpace()))
2593
2.66k
                                {
2594
                                     // Can't have a standalone document declaration of "yes" if  attribute
2595
                                     // values are subject to normalisation
2596
2.66k
                                     fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
2597
2.66k
                                }
2598
6.06k
                            }
2599
468k
                            continue;
2600
468k
                        }
2601
51.7M
                        firstNonWS = true;
2602
51.7M
                    }
2603
52.7M
                }
2604
2605
                // Else add it to the buffer
2606
97.0M
                toFill.append(nextCh);
2607
2608
97.0M
                if (secondCh)
2609
4.93k
                {
2610
4.93k
                    toFill.append(secondCh);
2611
4.93k
                    secondCh=0;
2612
4.93k
                }
2613
97.0M
            }
2614
1.43M
        }
2615
1.43M
        catch(const EndOfEntityException&)
2616
1.43M
        {
2617
            // Just eat it and continue.
2618
0
            gotLeadingSurrogate = false;
2619
0
            escaped = false;
2620
0
        }
2621
1.43M
    }
2622
0
    return true;
2623
1.43M
}
2624
2625
2626
//  This method scans a CDATA section. It collects the character into one
2627
//  of the temp buffers and calls the document handler, if any, with the
2628
//  characters. It assumes that the <![CDATA string has been scanned before
2629
//  this call.
2630
void IGXMLScanner::scanCDSection()
2631
65.5k
{
2632
65.5k
    static const XMLCh CDataClose[] =
2633
65.5k
    {
2634
65.5k
            chCloseSquare, chCloseAngle, chNull
2635
65.5k
    };
2636
2637
    //  The next character should be the opening square bracket. If not
2638
    //  issue an error, but then try to recover by skipping any whitespace
2639
    //  and checking again.
2640
65.5k
    if (!fReaderMgr.skippedChar(chOpenSquare))
2641
1
    {
2642
1
        emitError(XMLErrs::ExpectedOpenSquareBracket);
2643
1
        fReaderMgr.skipPastSpaces();
2644
2645
        // If we still don't find it, then give up, else keep going
2646
1
        if (!fReaderMgr.skippedChar(chOpenSquare))
2647
0
            return;
2648
1
    }
2649
2650
    // Get a buffer for this
2651
65.5k
    XMLBufBid bbCData(&fBufMgr);
2652
2653
    //  We just scan forward until we hit the end of CDATA section sequence.
2654
    //  CDATA is effectively a big escape mechanism so we don't treat markup
2655
    //  characters specially here.
2656
65.5k
    bool            emittedError = false;
2657
65.5k
    bool    gotLeadingSurrogate = false;
2658
65.5k
    const ElemStack::StackElem* topElem = fElemStack.topElement();
2659
2660
    // Get the character data opts for the current element
2661
65.5k
    XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
2662
65.5k
    if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
2663
0
    {
2664
        // And see if the current element is a 'Children' style content model
2665
0
        ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
2666
0
        if(currType)
2667
0
        {
2668
0
            SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
2669
0
            if(modelType == SchemaElementDecl::Children ||
2670
0
               modelType == SchemaElementDecl::ElementOnlyEmpty)
2671
0
                charOpts = XMLElementDecl::SpacesOk;
2672
0
            else if(modelType == SchemaElementDecl::Empty)
2673
0
                charOpts = XMLElementDecl::NoCharData;
2674
0
        }
2675
0
    } else // DTD grammar
2676
65.5k
        charOpts = topElem->fThisElement->getCharDataOpts();
2677
2678
3.52M
    while (true)
2679
3.52M
    {
2680
3.52M
        const XMLCh nextCh = fReaderMgr.getNextChar();
2681
2682
        // Watch for unexpected end of file
2683
3.52M
        if (!nextCh)
2684
92
        {
2685
92
            emitError(XMLErrs::UnterminatedCDATASection);
2686
92
            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
2687
92
        }
2688
2689
3.52M
        if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
2690
522
        {
2691
            // This document is standalone; this ignorable CDATA whitespace is forbidden.
2692
            // XML 1.0, Section 2.9
2693
            // And see if the current element is a 'Children' style content model
2694
522
            if (topElem->fThisElement->isExternal()) {
2695
2696
363
                if (charOpts == XMLElementDecl::SpacesOk) // Element Content
2697
0
                {
2698
                    // Error - standalone should have a value of "no" as whitespace detected in an
2699
                    // element type with element content whose element declaration was external
2700
0
                    fValidator->emitError(XMLValid::NoWSForStandalone);
2701
0
                    if(fGrammarType == Grammar::SchemaGrammarType)
2702
0
                    {
2703
0
                        if (getPSVIHandler())
2704
0
                        {
2705
                            // REVISIT:
2706
                            // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
2707
0
                        }
2708
0
                    }
2709
0
                }
2710
363
            }
2711
522
        }
2712
2713
        //  If this is a close square bracket it could be our closing
2714
        //  sequence.
2715
3.52M
        if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
2716
65.4k
        {
2717
            //  make sure we were not expecting a trailing surrogate.
2718
65.4k
            if (gotLeadingSurrogate)
2719
1
                emitError(XMLErrs::Expected2ndSurrogateChar);
2720
2721
65.4k
            if (fGrammarType == Grammar::SchemaGrammarType) {
2722
2723
0
                XMLSize_t xsLen = bbCData.getLen();
2724
0
                const XMLCh* xsNormalized = bbCData.getRawBuffer();
2725
0
                DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
2726
0
                if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
2727
0
                {
2728
                    // normalize the character according to schema whitespace facet
2729
0
                    ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf);
2730
0
                    xsNormalized = fWSNormalizeBuf.getRawBuffer();
2731
0
                    xsLen = fWSNormalizeBuf.getLen();
2732
0
                    if (fNormalizeData && fValidate) {
2733
0
                        bbCData.set(xsNormalized);
2734
0
                    }
2735
0
                }
2736
2737
0
                if (fValidate) {
2738
2739
                    // tell the schema validation about the character data for checkContent later
2740
0
                    ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
2741
2742
0
                    if (charOpts != XMLElementDecl::AllCharData)
2743
0
                    {
2744
                        // They definitely cannot handle any type of char data
2745
0
                        fValidator->emitError(XMLValid::NoCharDataInCM);
2746
0
                        if (getPSVIHandler())
2747
0
                        {
2748
                            // REVISIT:
2749
                            // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
2750
0
                        }
2751
0
                    }
2752
0
                }
2753
2754
                // call all active identity constraints
2755
0
                if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
2756
0
                    fContent.append(xsNormalized, xsLen);
2757
0
                }
2758
0
            }
2759
65.4k
            else {
2760
65.4k
                if (fValidate) {
2761
2762
65.2k
                    if (charOpts != XMLElementDecl::AllCharData)
2763
595
                    {
2764
                        // They definitely cannot handle any type of char data
2765
595
                        fValidator->emitError(XMLValid::NoCharDataInCM);
2766
595
                    }
2767
65.2k
                }
2768
65.4k
            }
2769
2770
            // If we have a doc handler, call it
2771
65.4k
            if (fDocHandler)
2772
0
            {
2773
0
                fDocHandler->docCharacters(
2774
0
                    bbCData.getRawBuffer(), bbCData.getLen(), true
2775
0
                );
2776
0
            }
2777
2778
            // And we are done
2779
65.4k
            break;
2780
65.4k
        }
2781
2782
        //  Make sure its a valid character. But if we've emitted an error
2783
        //  already, don't bother with the overhead since we've already told
2784
        //  them about it.
2785
3.45M
        if (!emittedError)
2786
3.45M
        {
2787
            // Deal with surrogate pairs
2788
3.45M
            if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
2789
61.9k
            {
2790
                //  Its a leading surrogate. If we already got one, then
2791
                //  issue an error, else set leading flag to make sure that
2792
                //  we look for a trailing next time.
2793
61.9k
                if (gotLeadingSurrogate)
2794
1
                    emitError(XMLErrs::Expected2ndSurrogateChar);
2795
61.9k
                else
2796
61.9k
                    gotLeadingSurrogate = true;
2797
61.9k
            }
2798
3.39M
            else
2799
3.39M
            {
2800
                //  If its a trailing surrogate, make sure that we are
2801
                //  prepared for that. Else, its just a regular char so make
2802
                //  sure that we were not expected a trailing surrogate.
2803
3.39M
                if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
2804
61.8k
                {
2805
                    // Its trailing, so make sure we were expecting it
2806
61.8k
                    if (!gotLeadingSurrogate)
2807
4
                        emitError(XMLErrs::Unexpected2ndSurrogateChar);
2808
61.8k
                }
2809
3.33M
                else
2810
3.33M
                {
2811
                    //  Its just a char, so make sure we were not expecting a
2812
                    //  trailing surrogate.
2813
3.33M
                    if (gotLeadingSurrogate)
2814
4
                        emitError(XMLErrs::Expected2ndSurrogateChar);
2815
2816
                    // Its got to at least be a valid XML character
2817
3.33M
                    else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
2818
19
                    {
2819
19
                        XMLCh tmpBuf[9];
2820
19
                        XMLString::binToText
2821
19
                        (
2822
19
                            nextCh
2823
19
                            , tmpBuf
2824
19
                            , 8
2825
19
                            , 16
2826
19
                            , fMemoryManager
2827
19
                        );
2828
19
                        emitError(XMLErrs::InvalidCharacter, tmpBuf);
2829
19
                        emittedError = true;
2830
19
                    }
2831
3.33M
                }
2832
3.39M
                gotLeadingSurrogate = false;
2833
3.39M
            }
2834
3.45M
        }
2835
2836
        // Add it to the buffer
2837
3.45M
        bbCData.append(nextCh);
2838
3.45M
    }
2839
65.5k
}
2840
2841
2842
void IGXMLScanner::scanCharData(XMLBuffer& toUse)
2843
2.61M
{
2844
    //  We have to watch for the stupid ]]> sequence, which is illegal in
2845
    //  character data. So this is a little state machine that handles that.
2846
2.61M
    enum States
2847
2.61M
    {
2848
2.61M
        State_Waiting
2849
2.61M
        , State_GotOne
2850
2.61M
        , State_GotTwo
2851
2.61M
    };
2852
2853
    // Reset the buffer before we start
2854
2.61M
    toUse.reset();
2855
2856
    // Turn on the 'throw at end' flag of the reader manager
2857
2.61M
    ThrowEOEJanitor jan(&fReaderMgr, true);
2858
2859
    //  In order to be more efficient we have to use kind of a deeply nested
2860
    //  set of blocks here. The outer block puts on a try and catches end of
2861
    //  entity exceptions. The inner loop is the per-character loop. If we
2862
    //  put the try inside the inner loop, it would work but would require
2863
    //  the exception handling code setup/teardown code to be invoked for
2864
    //  each character.
2865
2.61M
    XMLCh   nextCh;
2866
2.61M
    XMLCh   secondCh = 0;
2867
2.61M
    States  curState = State_Waiting;
2868
2.61M
    bool    escaped = false;
2869
2.61M
    bool    gotLeadingSurrogate = false;
2870
2.61M
    bool    notDone = true;
2871
5.22M
    while (notDone)
2872
2.61M
    {
2873
2.61M
        try
2874
2.61M
        {
2875
7.11M
            while (true)
2876
7.11M
            {
2877
                //  Eat through as many plain content characters as possible without
2878
                //  needing special handling.  Moving most content characters here,
2879
                //  in this one call, rather than running the overall loop once
2880
                //  per content character, is a speed optimization.
2881
7.11M
                if (curState == State_Waiting  &&  !gotLeadingSurrogate)
2882
7.00M
                {
2883
7.00M
                     fReaderMgr.movePlainContentChars(toUse);
2884
7.00M
                }
2885
2886
                // Try to get another char from the source
2887
                //   The code from here on down covers all contengencies,
2888
7.11M
                if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
2889
2.61M
                {
2890
                    // If we were waiting for a trailing surrogate, its an error
2891
2.61M
                    if (gotLeadingSurrogate)
2892
2
                        emitError(XMLErrs::Expected2ndSurrogateChar);
2893
2894
2.61M
                    notDone = false;
2895
2.61M
                    break;
2896
2.61M
                }
2897
2898
                //  Watch for a reference. Note that the escapement mechanism
2899
                //  is ignored in this content.
2900
4.50M
                escaped = false;
2901
4.50M
                if (nextCh == chAmpersand)
2902
518k
                {
2903
518k
                    sendCharData(toUse);
2904
2905
                    // Turn off the throwing at the end of entity during this
2906
518k
                    ThrowEOEJanitor jan(&fReaderMgr, false);
2907
2908
518k
                    if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
2909
401k
                    {
2910
401k
                        gotLeadingSurrogate = false;
2911
401k
                        continue;
2912
401k
                    }
2913
117k
                    else
2914
117k
                    {
2915
117k
                        if (escaped && !fElemStack.isEmpty())
2916
116k
                            fElemStack.setReferenceEscaped();
2917
117k
                    }
2918
518k
                }
2919
3.98M
                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
2920
8.97k
                {
2921
                    // Deal with surrogate pairs
2922
                    //  Its a leading surrogate. If we already got one, then
2923
                    //  issue an error, else set leading flag to make sure that
2924
                    //  we look for a trailing next time.
2925
8.97k
                    if (gotLeadingSurrogate)
2926
1
                        emitError(XMLErrs::Expected2ndSurrogateChar);
2927
8.97k
                    else
2928
8.97k
                        gotLeadingSurrogate = true;
2929
8.97k
                }
2930
3.97M
                else
2931
3.97M
                {
2932
                    //  If its a trailing surrogate, make sure that we are
2933
                    //  prepared for that. Else, its just a regular char so make
2934
                    //  sure that we were not expected a trailing surrogate.
2935
3.97M
                    if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
2936
8.97k
                    {
2937
                        // Its trailing, so make sure we were expecting it
2938
8.97k
                        if (!gotLeadingSurrogate)
2939
9
                            emitError(XMLErrs::Unexpected2ndSurrogateChar);
2940
8.97k
                    }
2941
3.96M
                    else
2942
3.96M
                    {
2943
                        //  Its just a char, so make sure we were not expecting a
2944
                        //  trailing surrogate.
2945
3.96M
                        if (gotLeadingSurrogate)
2946
9
                            emitError(XMLErrs::Expected2ndSurrogateChar);
2947
2948
                        // Make sure the returned char is a valid XML char
2949
3.96M
                        if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
2950
96
                        {
2951
96
                            XMLCh tmpBuf[9];
2952
96
                            XMLString::binToText
2953
96
                            (
2954
96
                                nextCh
2955
96
                                , tmpBuf
2956
96
                                , 8
2957
96
                                , 16
2958
96
                                , fMemoryManager
2959
96
                            );
2960
96
                            emitError(XMLErrs::InvalidCharacter, tmpBuf);
2961
96
                        }
2962
3.96M
                    }
2963
3.97M
                    gotLeadingSurrogate = false;
2964
3.97M
                }
2965
2966
                 // Keep the state machine up to date
2967
4.09M
                if (!escaped)
2968
3.98M
                {
2969
3.98M
                    if (nextCh == chCloseSquare)
2970
101k
                    {
2971
101k
                        if (curState == State_Waiting)
2972
84.5k
                            curState = State_GotOne;
2973
17.0k
                        else if (curState == State_GotOne)
2974
1.53k
                            curState = State_GotTwo;
2975
101k
                    }
2976
3.88M
                    else if (nextCh == chCloseAngle)
2977
8.99k
                    {
2978
8.99k
                        if (curState == State_GotTwo)
2979
3
                            emitError(XMLErrs::BadSequenceInCharData);
2980
8.99k
                        curState = State_Waiting;
2981
8.99k
                    }
2982
3.87M
                    else
2983
3.87M
                    {
2984
3.87M
                        curState = State_Waiting;
2985
3.87M
                    }
2986
3.98M
                }
2987
117k
                else
2988
117k
                {
2989
117k
                    curState = State_Waiting;
2990
117k
                }
2991
2992
                // Add this char to the buffer
2993
4.09M
                toUse.append(nextCh);
2994
2995
4.09M
                if (secondCh)
2996
948
                {
2997
948
                    toUse.append(secondCh);
2998
948
                    secondCh=0;
2999
948
                }
3000
4.09M
            }
3001
2.61M
        }
3002
2.61M
        catch(const EndOfEntityException& toCatch)
3003
2.61M
        {
3004
            //  Some entity ended, so we have to send any accumulated
3005
            //  chars and send an end of entity event.
3006
0
            sendCharData(toUse);
3007
0
            gotLeadingSurrogate = false;
3008
3009
0
            if (fDocHandler)
3010
0
                fDocHandler->endEntityReference(toCatch.getEntity());
3011
0
        }
3012
2.61M
    }
3013
3014
    // Check the validity constraints as per XML 1.0 Section 2.9
3015
2.61M
    if (fValidate && fStandalone)
3016
178k
    {
3017
        // See if the text contains whitespace
3018
        // Get the raw data we need for the callback
3019
178k
        const XMLCh* rawBuf = toUse.getRawBuffer();
3020
178k
        const XMLSize_t len = toUse.getLen();
3021
178k
        const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
3022
3023
178k
        if (isSpaces)
3024
126k
        {
3025
            // And see if the current element is a 'Children' style content model
3026
126k
            const ElemStack::StackElem* topElem = fElemStack.topElement();
3027
3028
126k
            if (topElem->fThisElement->isExternal()) {
3029
3030
                // Get the character data opts for the current element
3031
66.3k
                XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
3032
66.3k
                if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
3033
0
                {
3034
                    // And see if the current element is a 'Children' style content model
3035
0
                    ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
3036
0
                    if(currType)
3037
0
                    {
3038
0
                        SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
3039
0
                        if(modelType == SchemaElementDecl::Children ||
3040
0
                           modelType == SchemaElementDecl::ElementOnlyEmpty)
3041
0
                            charOpts = XMLElementDecl::SpacesOk;
3042
0
                        else if(modelType == SchemaElementDecl::Empty)
3043
0
                            charOpts = XMLElementDecl::NoCharData;
3044
0
                    }
3045
0
                } else // DTD grammar
3046
66.3k
                    charOpts = topElem->fThisElement->getCharDataOpts();
3047
3048
66.3k
                if (charOpts == XMLElementDecl::SpacesOk)  // => Element Content
3049
0
                {
3050
                    // Error - standalone should have a value of "no" as whitespace detected in an
3051
                    // element type with element content whose element declaration was external
3052
                    //
3053
0
                    fValidator->emitError(XMLValid::NoWSForStandalone);
3054
0
                    if(fGrammarType == Grammar::SchemaGrammarType)
3055
0
                    {
3056
0
                        if (getPSVIHandler())
3057
0
                        {
3058
                            // REVISIT:
3059
                            // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
3060
0
                        }
3061
0
                    }
3062
0
                }
3063
66.3k
            }
3064
126k
        }
3065
178k
    }
3066
    // Send any char data that we accumulated into the buffer
3067
2.61M
    sendCharData(toUse);
3068
2.61M
}
3069
3070
3071
//  This method will scan a general/character entity ref. It will either
3072
//  expand a char ref and return it directly, or push a reader for a general
3073
//  entity.
3074
//
3075
//  The return value indicates whether the char parameters hold the value
3076
//  or whether the value was pushed as a reader, or that it failed.
3077
//
3078
//  The escaped flag tells the caller whether the returned parameter resulted
3079
//  from a character reference, which escapes the character in some cases. It
3080
//  only makes any difference if the return value indicates the value was
3081
//  returned directly.
3082
IGXMLScanner::EntityExpRes
3083
IGXMLScanner::scanEntityRef(  const   bool    inAttVal
3084
                            ,       XMLCh&  firstCh
3085
                            ,       XMLCh&  secondCh
3086
                            ,       bool&   escaped)
3087
639k
{
3088
    // Assume no escape
3089
639k
    secondCh = 0;
3090
639k
    escaped = false;
3091
3092
    // We have to insure that its all in one entity
3093
639k
    const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
3094
3095
    //  If the next char is a pound, then its a character reference and we
3096
    //  need to expand it always.
3097
639k
    if (fReaderMgr.skippedChar(chPound))
3098
25.1k
    {
3099
        //  Its a character reference, so scan it and get back the numeric
3100
        //  value it represents.
3101
25.1k
        if (!scanCharRef(firstCh, secondCh))
3102
0
            return EntityExp_Failed;
3103
3104
25.1k
        escaped = true;
3105
3106
25.1k
        if (curReader != fReaderMgr.getCurrentReaderNum())
3107
0
            emitError(XMLErrs::PartialMarkupInEntity);
3108
3109
25.1k
        return EntityExp_Returned;
3110
25.1k
    }
3111
3112
    // Expand it since its a normal entity ref
3113
614k
    XMLBufBid bbName(&fBufMgr);
3114
614k
    int  colonPosition;
3115
614k
    bool validName = fDoNamespaces ? fReaderMgr.getQName(bbName.getBuffer(), &colonPosition) :
3116
614k
                                     fReaderMgr.getName(bbName.getBuffer());
3117
614k
    if (!validName)
3118
149
    {
3119
149
        if (bbName.isEmpty())
3120
149
            emitError(XMLErrs::ExpectedEntityRefName);
3121
0
        else
3122
0
            emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer());
3123
149
        return EntityExp_Failed;
3124
149
    }
3125
3126
    //  Next char must be a semi-colon. But if its not, just emit
3127
    //  an error and try to continue.
3128
614k
    if (!fReaderMgr.skippedChar(chSemiColon))
3129
128
        emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
3130
3131
    // Make sure we ended up on the same entity reader as the & char
3132
614k
    if (curReader != fReaderMgr.getCurrentReaderNum())
3133
0
        emitError(XMLErrs::PartialMarkupInEntity);
3134
3135
    // Look up the name in the general entity pool
3136
614k
    XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer());
3137
3138
    // If it does not exist, then obviously an error
3139
614k
    if (!decl)
3140
314k
    {
3141
        // XML 1.0 Section 4.1
3142
        // Well-formedness Constraint for entity not found:
3143
        //   In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
3144
        //      or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
3145
        //      or a parameter entity
3146
        //
3147
        // Else it's Validity Constraint
3148
314k
        if (fStandalone || fHasNoDTD)
3149
118
            emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
3150
314k
        else {
3151
314k
            if (fValidate)
3152
314k
                fValidator->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
3153
314k
        }
3154
3155
314k
        return EntityExp_Failed;
3156
314k
    }
3157
3158
    // XML 1.0 Section 4.1
3159
    //  If we are a standalone document, then it has to have been declared
3160
    //  in the internal subset.
3161
299k
    if (fStandalone && !decl->getDeclaredInIntSubset())
3162
0
        emitError(XMLErrs::IllegalRefInStandalone, bbName.getRawBuffer());
3163
3164
299k
    if (decl->isExternal())
3165
5.04k
    {
3166
        // If its unparsed, then its not valid here
3167
5.04k
        if (decl->isUnparsed())
3168
2
        {
3169
2
            emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer());
3170
2
            return EntityExp_Failed;
3171
2
        }
3172
3173
        // If we are in an attribute value, then not valid but keep going
3174
5.03k
        if (inAttVal)
3175
1
            emitError(XMLErrs::NoExtRefsInAttValue);
3176
3177
        // And now create a reader to read this entity
3178
5.03k
        InputSource* srcUsed;
3179
5.03k
        XMLReader* reader = fReaderMgr.createReader
3180
5.03k
        (
3181
5.03k
            decl->getBaseURI()
3182
5.03k
            , decl->getSystemId()
3183
5.03k
            , decl->getPublicId()
3184
5.03k
            , false
3185
5.03k
            , XMLReader::RefFrom_NonLiteral
3186
5.03k
            , XMLReader::Type_General
3187
5.03k
            , XMLReader::Source_External
3188
5.03k
            , srcUsed
3189
5.03k
            , fCalculateSrcOfs
3190
5.03k
            , fLowWaterMark
3191
5.03k
            , fDisableDefaultEntityResolution
3192
5.03k
        );
3193
3194
        // Put a janitor on the source so it gets cleaned up on exit
3195
5.03k
        Janitor<InputSource> janSrc(srcUsed);
3196
3197
        //  If the creation failed, and its not because the source was empty,
3198
        //  then emit an error and return.
3199
5.03k
        if (!reader)
3200
215
            ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed ? srcUsed->getSystemId() : decl->getSystemId(), fMemoryManager);
3201
3202
        //  Push the reader. If its a recursive expansion, then emit an error
3203
        //  and return an failure.
3204
4.82k
        if (!fReaderMgr.pushReader(reader, decl))
3205
0
        {
3206
0
            emitError(XMLErrs::RecursiveEntity, decl->getName());
3207
0
            return EntityExp_Failed;
3208
0
        }
3209
3210
        // here's where we need to check if there's a SecurityManager,
3211
        // how many entity references we've had
3212
4.82k
        if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
3213
0
            XMLCh expLimStr[32];
3214
0
            XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
3215
0
            emitError
3216
0
            (
3217
0
                XMLErrs::EntityExpansionLimitExceeded
3218
0
                , expLimStr
3219
0
            );
3220
            // there seems nothing  better to be done than to reset the entity expansion counter
3221
0
            fEntityExpansionCount = 0;
3222
0
        }
3223
3224
        //  Do a start entity reference event.
3225
        //
3226
        //  <TBD> For now, we supress them in att values. Later, when
3227
        //  the stuff is in place to correctly allow DOM to handle them
3228
        //  we'll turn this back on.
3229
4.82k
        if (fDocHandler && !inAttVal)
3230
0
            fDocHandler->startEntityReference(*decl);
3231
3232
        // If it starts with the XML string, then parse a text decl
3233
4.82k
        if (checkXMLDecl(true))
3234
0
            scanXMLDecl(Decl_Text);
3235
4.82k
    }
3236
294k
    else
3237
294k
    {
3238
        //  If its one of the special char references, then we can return
3239
        //  it as a character, and its considered escaped.
3240
294k
        if (decl->getIsSpecialChar())
3241
113k
        {
3242
113k
            firstCh = decl->getValue()[0];
3243
113k
            escaped = true;
3244
113k
            return EntityExp_Returned;
3245
113k
        }
3246
3247
        //  Create a reader over a memory stream over the entity value
3248
        //  We force it to assume UTF-16 by passing in an encoding
3249
        //  string. This way it won't both trying to predecode the
3250
        //  first line, looking for an XML/TextDecl.
3251
181k
        XMLReader* valueReader = fReaderMgr.createIntEntReader
3252
181k
        (
3253
181k
            decl->getName()
3254
181k
            , XMLReader::RefFrom_NonLiteral
3255
181k
            , XMLReader::Type_General
3256
181k
            , decl->getValue()
3257
181k
            , decl->getValueLen()
3258
181k
            , false
3259
181k
        );
3260
3261
        //  Try to push the entity reader onto the reader manager stack,
3262
        //  where it will become the subsequent input. If it fails, that
3263
        //  means the entity is recursive, so issue an error. The reader
3264
        //  will have just been discarded, but we just keep going.
3265
181k
        if (!fReaderMgr.pushReader(valueReader, decl))
3266
23
            emitError(XMLErrs::RecursiveEntity, decl->getName());
3267
3268
        // here's where we need to check if there's a SecurityManager,
3269
        // how many entity references we've had
3270
181k
        if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
3271
0
            XMLCh expLimStr[32];
3272
0
            XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
3273
0
            emitError
3274
0
            (
3275
0
                XMLErrs::EntityExpansionLimitExceeded
3276
0
                , expLimStr
3277
0
            );
3278
0
        }
3279
3280
        //  Do a start entity reference event.
3281
        //
3282
        //  <TBD> For now, we supress them in att values. Later, when
3283
        //  the stuff is in place to correctly allow DOM to handle them
3284
        //  we'll turn this back on.
3285
181k
        if (fDocHandler && !inAttVal)
3286
0
            fDocHandler->startEntityReference(*decl);
3287
3288
        // If it starts with the XML string, then it's an error
3289
181k
        if (checkXMLDecl(true)) {
3290
2
            emitError(XMLErrs::TextDeclNotLegalHere);
3291
2
            fReaderMgr.skipPastChar(chCloseAngle);
3292
2
        }
3293
181k
    }
3294
186k
    return EntityExp_Pushed;
3295
299k
}
3296
3297
3298
bool IGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace)
3299
14.3k
{
3300
14.3k
    Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace);
3301
3302
14.3k
    if (!tempGrammar && !fSkipDTDValidation) {
3303
        // This is a case where namespaces is on with a DTD grammar.
3304
0
        tempGrammar = fDTDGrammar;
3305
0
    }
3306
14.3k
    if (!tempGrammar) {
3307
0
        return false;
3308
0
    }
3309
14.3k
    else {
3310
3311
14.3k
        Grammar::GrammarType tempGrammarType = tempGrammar->getGrammarType();
3312
14.3k
        if (tempGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
3313
0
            if (fValidatorFromUser)
3314
0
                ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
3315
0
            else {
3316
0
                fValidator = fSchemaValidator;
3317
0
            }
3318
0
        }
3319
14.3k
        else if (tempGrammarType == Grammar::DTDGrammarType) {
3320
14.3k
            if (fSkipDTDValidation) {
3321
0
                return false;
3322
0
            }
3323
3324
14.3k
            if (!fValidator->handlesDTD()) {
3325
0
                if (fValidatorFromUser)
3326
0
                    ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
3327
0
                else {
3328
0
                    fValidator = fDTDValidator;
3329
0
                }
3330
0
            }
3331
14.3k
        }
3332
3333
14.3k
        fGrammarType = tempGrammarType;
3334
14.3k
        fGrammar = tempGrammar;
3335
14.3k
        fValidator->setGrammar(fGrammar);
3336
14.3k
        return true;
3337
14.3k
    }
3338
14.3k
}
3339
3340
// check if we should skip or lax the validation of the element
3341
// if skip - no validation
3342
// if lax - validate only if the element if found
3343
bool IGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
3344
                                        const XMLContentModel* const cm,
3345
                                        const XMLSize_t parentElemDepth)
3346
0
{
3347
0
    bool skipThisOne = false;
3348
0
    bool laxThisOne = false;
3349
0
    unsigned int elementURI = element->getURI();
3350
0
    unsigned int currState = fElemState[parentElemDepth];
3351
0
    unsigned int currLoop = fElemLoopState[parentElemDepth];
3352
3353
0
    if (currState == XMLContentModel::gInvalidTrans) {
3354
0
        return laxThisOne;
3355
0
    }
3356
3357
0
    SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool);
3358
3359
0
    if (cv) {
3360
0
        XMLSize_t i = 0;
3361
0
        XMLSize_t leafCount = cv->getLeafCount();
3362
0
        unsigned int nextState = 0;
3363
3364
0
        for (; i < leafCount; i++) {
3365
3366
0
            QName* fElemMap = cv->getLeafNameAt(i);
3367
0
            unsigned int uri = fElemMap->getURI();
3368
0
            ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
3369
3370
0
            if (type == ContentSpecNode::Leaf) {
3371
0
                if (((uri == elementURI)
3372
0
                      && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart()))
3373
0
                    || comparator.isEquivalentTo(element, fElemMap)) {
3374
3375
0
                    nextState = cm->getNextState(currState, i);
3376
3377
0
                    if (nextState != XMLContentModel::gInvalidTrans)
3378
0
                        break;
3379
0
                }
3380
0
            } else if ((type & 0x0f) == ContentSpecNode::Any) {
3381
0
                nextState = cm->getNextState(currState, i);
3382
0
                if (nextState != XMLContentModel::gInvalidTrans)
3383
0
                    break;
3384
0
            }
3385
0
            else if ((type & 0x0f) == ContentSpecNode::Any_Other) {
3386
0
                if (uri != elementURI && elementURI != fEmptyNamespaceId) {
3387
0
                    nextState = cm->getNextState(currState, i);
3388
0
                    if (nextState != XMLContentModel::gInvalidTrans)
3389
0
                        break;
3390
0
                }
3391
0
            }
3392
0
            else if ((type & 0x0f) == ContentSpecNode::Any_NS) {
3393
0
                if (uri == elementURI) {
3394
0
                    nextState = cm->getNextState(currState, i);
3395
0
                    if (nextState != XMLContentModel::gInvalidTrans)
3396
0
                        break;
3397
0
                }
3398
0
            }
3399
3400
0
        } // for
3401
3402
0
        if (i == leafCount) { // no match
3403
0
            fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
3404
0
            fElemLoopState[parentElemDepth] = 0;
3405
0
            return laxThisOne;
3406
0
        }
3407
3408
0
        unsigned int nextLoop = 0;
3409
0
        if(!cm->handleRepetitions(element, currState, currLoop, nextState, nextLoop, i, &comparator)) {
3410
0
            fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
3411
0
            fElemLoopState[parentElemDepth] = 0;
3412
0
            return laxThisOne;
3413
0
        }
3414
3415
0
        ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
3416
0
        if ((type & 0x0f) == ContentSpecNode::Any ||
3417
0
            (type & 0x0f) == ContentSpecNode::Any_Other ||
3418
0
            (type & 0x0f) == ContentSpecNode::Any_NS)
3419
0
        {
3420
0
            if (type == ContentSpecNode::Any_Skip ||
3421
0
                type == ContentSpecNode::Any_NS_Skip ||
3422
0
                type == ContentSpecNode::Any_Other_Skip) {
3423
0
                skipThisOne = true;
3424
0
            }
3425
0
            else if (type == ContentSpecNode::Any_Lax ||
3426
0
                     type == ContentSpecNode::Any_NS_Lax ||
3427
0
                     type == ContentSpecNode::Any_Other_Lax) {
3428
0
                laxThisOne = true;
3429
0
            }
3430
0
        }
3431
0
        fElemState[parentElemDepth] = nextState;
3432
0
        fElemLoopState[parentElemDepth] = nextLoop;
3433
0
    } // if
3434
3435
0
    if (skipThisOne) {
3436
0
        fValidate = false;
3437
0
        fElemStack.setValidationFlag(fValidate);
3438
0
    }
3439
3440
0
    return laxThisOne;
3441
0
}
3442
3443
3444
// check if there is an AnyAttribute, and if so, see if we should lax or skip
3445
// if skip - no validation
3446
// if lax - validate only if the attribute if found
3447
bool IGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne)
3448
0
{
3449
0
    XMLAttDef::AttTypes wildCardType = attWildCard->getType();
3450
0
    bool anyEncountered = false;
3451
0
    skipThisOne = false;
3452
0
    laxThisOne = false;
3453
0
    if (wildCardType == XMLAttDef::Any_Any)
3454
0
        anyEncountered = true;
3455
0
    else if (wildCardType == XMLAttDef::Any_Other) {
3456
0
        if (attWildCard->getAttName()->getURI() != uriId
3457
0
            && uriId != fEmptyNamespaceId)
3458
0
            anyEncountered = true;
3459
0
    }
3460
0
    else if (wildCardType == XMLAttDef::Any_List) {
3461
0
        ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList();
3462
0
        XMLSize_t listSize = (nameURIList) ? nameURIList->size() : 0;
3463
3464
0
        if (listSize) {
3465
0
            for (XMLSize_t i=0; i < listSize; i++) {
3466
0
                if (nameURIList->elementAt(i) == uriId)
3467
0
                    anyEncountered = true;
3468
0
            }
3469
0
        }
3470
0
    }
3471
3472
0
    if (anyEncountered) {
3473
0
        XMLAttDef::DefAttTypes   defType   = attWildCard->getDefaultType();
3474
0
        if (defType == XMLAttDef::ProcessContents_Skip) {
3475
            // attribute should just be bypassed,
3476
0
            skipThisOne = true;
3477
0
        }
3478
0
        else if (defType == XMLAttDef::ProcessContents_Lax) {
3479
0
            laxThisOne = true;
3480
0
        }
3481
0
    }
3482
3483
0
    return anyEncountered;
3484
0
}
3485
3486
inline XMLAttDefList& getAttDefList(bool              isSchemaGrammar
3487
                                  , ComplexTypeInfo*  currType
3488
                                  , XMLElementDecl*   elemDecl)
3489
0
{
3490
0
    if (isSchemaGrammar && currType)
3491
0
        return currType->getAttDefList();
3492
0
    else
3493
0
        return elemDecl->getAttDefList();
3494
0
}
3495
3496
XERCES_CPP_NAMESPACE_END