/src/xerces-c/src/xercesc/internal/IGXMLScanner2.cpp
Line | Count | Source |
1 | | /* |
2 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
3 | | * contributor license agreements. See the NOTICE file distributed with |
4 | | * this work for additional information regarding copyright ownership. |
5 | | * The ASF licenses this file to You under the Apache License, Version 2.0 |
6 | | * (the "License"); you may not use this file except in compliance with |
7 | | * the License. You may obtain a copy of the License at |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | /* |
19 | | * $Id: IGXMLScanner2.cpp 1799520 2017-06-21 21:34:26Z scantor $ |
20 | | */ |
21 | | |
22 | | // --------------------------------------------------------------------------- |
23 | | // This file holds some of the grunt work methods of IGXMLScanner.cpp to keep |
24 | | // it a little more readable. |
25 | | // --------------------------------------------------------------------------- |
26 | | |
27 | | // --------------------------------------------------------------------------- |
28 | | // Includes |
29 | | // --------------------------------------------------------------------------- |
30 | | #include <xercesc/internal/IGXMLScanner.hpp> |
31 | | #include <xercesc/internal/EndOfEntityException.hpp> |
32 | | #include <xercesc/util/UnexpectedEOFException.hpp> |
33 | | #include <xercesc/util/XMLUri.hpp> |
34 | | #include <xercesc/framework/LocalFileInputSource.hpp> |
35 | | #include <xercesc/framework/URLInputSource.hpp> |
36 | | #include <xercesc/framework/XMLDocumentHandler.hpp> |
37 | | #include <xercesc/framework/XMLEntityHandler.hpp> |
38 | | #include <xercesc/framework/XMLPScanToken.hpp> |
39 | | #include <xercesc/framework/XMLRefInfo.hpp> |
40 | | #include <xercesc/framework/XMLGrammarPool.hpp> |
41 | | #include <xercesc/framework/psvi/PSVIAttributeList.hpp> |
42 | | #include <xercesc/framework/psvi/PSVIElement.hpp> |
43 | | #include <xercesc/framework/psvi/XSAnnotation.hpp> |
44 | | #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp> |
45 | | #include <xercesc/validators/DTD/DTDGrammar.hpp> |
46 | | #include <xercesc/validators/DTD/DTDValidator.hpp> |
47 | | #include <xercesc/validators/DTD/XMLDTDDescriptionImpl.hpp> |
48 | | #include <xercesc/validators/datatype/DatatypeValidator.hpp> |
49 | | #include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp> |
50 | | #include <xercesc/validators/schema/SchemaGrammar.hpp> |
51 | | #include <xercesc/validators/schema/SchemaValidator.hpp> |
52 | | #include <xercesc/validators/schema/TraverseSchema.hpp> |
53 | | #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp> |
54 | | #include <xercesc/validators/schema/XSDDOMParser.hpp> |
55 | | #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp> |
56 | | #include <xercesc/validators/schema/identity/ValueStore.hpp> |
57 | | #include <xercesc/util/XMLStringTokenizer.hpp> |
58 | | |
59 | | XERCES_CPP_NAMESPACE_BEGIN |
60 | | |
61 | | inline XMLAttDefList& getAttDefList(bool isSchemaGrammar |
62 | | , ComplexTypeInfo* currType |
63 | | , XMLElementDecl* elemDecl); |
64 | | |
65 | | // --------------------------------------------------------------------------- |
66 | | // IGXMLScanner: Private helper methods |
67 | | // --------------------------------------------------------------------------- |
68 | | |
69 | | // This method is called from scanStartTagNS() to build up the list of |
70 | | // XMLAttr objects that will be passed out in the start tag callout. We |
71 | | // get the key/value pairs from the raw scan of explicitly provided attrs, |
72 | | // which have not been normalized. And we get the element declaration from |
73 | | // which we will get any defaulted or fixed attribute defs and add those |
74 | | // in as well. |
75 | | XMLSize_t |
76 | | IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs |
77 | | , const XMLSize_t attCount |
78 | | , XMLElementDecl* elemDecl |
79 | | , RefVectorOf<XMLAttr>& toFill) |
80 | 0 | { |
81 | | // If doing DTD's, Ask the element to clear the 'provided' flag on all of the att defs |
82 | | // that it owns, and to return us a boolean indicating whether it has |
83 | | // any defs. If schemas are being validated, the complexType |
84 | | // at the top of the SchemaValidator's stack will |
85 | | // know what's best. REVISIT: don't modify grammar at all; eliminate |
86 | | // this step... |
87 | 0 | ComplexTypeInfo *currType = 0; |
88 | 0 | DatatypeValidator *currDV = 0; |
89 | 0 | if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType && fValidate) |
90 | 0 | { |
91 | 0 | currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); |
92 | 0 | if (!currType) { |
93 | 0 | currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator(); |
94 | 0 | } |
95 | 0 | } |
96 | |
|
97 | 0 | const bool hasDefs = (currType && fValidate) |
98 | 0 | ? currType->hasAttDefs() |
99 | 0 | : elemDecl->hasAttDefs(); |
100 | | |
101 | | // another set of attributes; increment element counter |
102 | 0 | fElemCount++; |
103 | | |
104 | | // If there are no expliclitily provided attributes and there are no |
105 | | // defined attributes for the element, the we don't have anything to do. |
106 | | // So just return zero in this case. |
107 | 0 | if (!hasDefs && !attCount) |
108 | 0 | return 0; |
109 | | |
110 | | // Keep up with how many attrs we end up with total |
111 | 0 | XMLSize_t retCount = 0; |
112 | | |
113 | | // And get the current size of the output vector. This lets us use |
114 | | // existing elements until we fill it, then start adding new ones. |
115 | 0 | const XMLSize_t curAttListSize = toFill.size(); |
116 | | |
117 | | // We need a buffer into which raw scanned attribute values will be |
118 | | // normalized. |
119 | 0 | XMLBufBid bbNormal(&fBufMgr); |
120 | 0 | XMLBuffer& normBuf = bbNormal.getBuffer(); |
121 | | |
122 | | // |
123 | | // Decide if to use hash table to do duplicate checking |
124 | | // |
125 | 0 | bool toUseHashTable = false; |
126 | 0 | if (fGrammarType == Grammar::DTDGrammarType) |
127 | 0 | { |
128 | 0 | setAttrDupChkRegistry(attCount, toUseHashTable); |
129 | 0 | } |
130 | |
|
131 | 0 | XMLBufBid bbPrefix(&fBufMgr); |
132 | 0 | XMLBuffer& prefixBuf = bbPrefix.getBuffer(); |
133 | | |
134 | | // Loop through our explicitly provided attributes, which are in the raw |
135 | | // scanned form, and build up XMLAttr objects. |
136 | 0 | XMLSize_t index; |
137 | 0 | const XMLCh* prefPtr, *suffPtr; |
138 | 0 | for (index = 0; index < attCount; index++) |
139 | 0 | { |
140 | 0 | PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID; |
141 | 0 | PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL; |
142 | 0 | const KVStringPair* curPair = providedAttrs.elementAt(index); |
143 | | |
144 | | // We have to split the name into its prefix and name parts. Then |
145 | | // we map the prefix to its URI. |
146 | 0 | const XMLCh* const namePtr = curPair->getKey(); |
147 | |
|
148 | 0 | const int colonInd = fRawAttrColonList[index]; |
149 | 0 | unsigned int uriId; |
150 | 0 | if (colonInd != -1) |
151 | 0 | { |
152 | 0 | prefixBuf.set(namePtr, colonInd); |
153 | 0 | prefPtr = prefixBuf.getRawBuffer(); |
154 | 0 | suffPtr = namePtr + colonInd + 1; |
155 | | // Map the prefix to a URI id |
156 | 0 | uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute); |
157 | 0 | } |
158 | 0 | else |
159 | 0 | { |
160 | | // No colon, so we just have a name with no prefix |
161 | 0 | prefPtr = XMLUni::fgZeroLenString; |
162 | 0 | suffPtr = namePtr; |
163 | | // an empty prefix is always the empty namespace, when dealing with attributes |
164 | 0 | uriId = fEmptyNamespaceId; |
165 | 0 | } |
166 | | |
167 | | // If the uri comes back as the xmlns or xml URI or its just a name |
168 | | // and that name is 'xmlns', then we handle it specially. So set a |
169 | | // boolean flag that lets us quickly below know which we are dealing |
170 | | // with. |
171 | 0 | const bool isNSAttr = (uriId == fEmptyNamespaceId)? |
172 | 0 | XMLString::equals(suffPtr, XMLUni::fgXMLNSString) : |
173 | 0 | (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)); |
174 | | |
175 | | |
176 | | // If its not a special case namespace attr of some sort, then we |
177 | | // do normal checking and processing. |
178 | 0 | XMLAttDef::AttTypes attType = XMLAttDef::CData; |
179 | 0 | DatatypeValidator *attrValidator = 0; |
180 | 0 | PSVIAttribute *psviAttr = 0; |
181 | 0 | bool otherXSI = false; |
182 | |
|
183 | 0 | if (isNSAttr && fGrammarType == Grammar::SchemaGrammarType) |
184 | 0 | { |
185 | 0 | if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId)) |
186 | 0 | { |
187 | 0 | emitError |
188 | 0 | ( |
189 | 0 | XMLErrs::AttrAlreadyUsedInSTag |
190 | 0 | , namePtr |
191 | 0 | , elemDecl->getFullName() |
192 | 0 | ); |
193 | 0 | fPSVIElemContext.fErrorOccurred = true; |
194 | 0 | } |
195 | 0 | else |
196 | 0 | { |
197 | 0 | bool ValueValidate = false; |
198 | 0 | bool tokenizeBuffer = false; |
199 | |
|
200 | 0 | if (uriId == fXMLNSNamespaceId) |
201 | 0 | { |
202 | 0 | attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); |
203 | 0 | } |
204 | 0 | else if (XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)) |
205 | 0 | { |
206 | 0 | if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)) |
207 | 0 | { |
208 | 0 | attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN); |
209 | |
|
210 | 0 | ValueValidate = true; |
211 | 0 | } |
212 | 0 | else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION)) |
213 | 0 | { |
214 | | // use anyURI as the validator |
215 | | // tokenize the data and use the anyURI data for each piece |
216 | 0 | attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); |
217 | | //We should validate each value in the schema location however |
218 | | //this lead to a performance degradation of around 4%. Since |
219 | | //the first value of each pair needs to match what is in the |
220 | | //schema document and the second value needs to be valid in |
221 | | //order to open the document we won't validate it. Need to |
222 | | //do performance analysis of the anyuri datatype. |
223 | | //ValueValidate = true; |
224 | 0 | ValueValidate = false; |
225 | 0 | tokenizeBuffer = true; |
226 | 0 | } |
227 | 0 | else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION)) |
228 | 0 | { |
229 | 0 | attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); |
230 | | //We should validate this value however |
231 | | //this lead to a performance degradation of around 4%. Since |
232 | | //the value needs to be valid in |
233 | | //order to open the document we won't validate it. Need to |
234 | | //do performance analysis of the anyuri datatype. |
235 | | //ValueValidate = true; |
236 | 0 | ValueValidate = false; |
237 | 0 | } |
238 | 0 | else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE)) |
239 | 0 | { |
240 | 0 | attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME); |
241 | |
|
242 | 0 | ValueValidate = true; |
243 | 0 | } |
244 | 0 | else { |
245 | 0 | otherXSI = true; |
246 | 0 | } |
247 | 0 | } |
248 | |
|
249 | 0 | if (!otherXSI) { |
250 | 0 | normalizeAttRawValue |
251 | 0 | ( |
252 | 0 | namePtr |
253 | 0 | , curPair->getValue() |
254 | 0 | , normBuf |
255 | 0 | ); |
256 | |
|
257 | 0 | if (fValidate && attrValidator && ValueValidate) |
258 | 0 | { |
259 | 0 | ((SchemaValidator*) fValidator)->normalizeWhiteSpace(attrValidator, normBuf.getRawBuffer(), normBuf, true); |
260 | |
|
261 | 0 | ValidationContext* const theContext = |
262 | 0 | getValidationContext(); |
263 | |
|
264 | 0 | if (theContext) |
265 | 0 | { |
266 | 0 | try |
267 | 0 | { |
268 | 0 | if (tokenizeBuffer) { |
269 | 0 | XMLStringTokenizer tokenizer(normBuf.getRawBuffer(), fMemoryManager); |
270 | 0 | while (tokenizer.hasMoreTokens()) { |
271 | 0 | attrValidator->validate( |
272 | 0 | tokenizer.nextToken(), |
273 | 0 | theContext, |
274 | 0 | fMemoryManager); |
275 | 0 | } |
276 | 0 | } |
277 | 0 | else { |
278 | 0 | attrValidator->validate( |
279 | 0 | normBuf.getRawBuffer(), |
280 | 0 | theContext, |
281 | 0 | fMemoryManager); |
282 | 0 | } |
283 | 0 | } |
284 | 0 | catch (const XMLException& idve) |
285 | 0 | { |
286 | 0 | fValidator->emitError (XMLValid::DatatypeError, idve.getCode(), idve.getMessage()); |
287 | 0 | } |
288 | 0 | } |
289 | 0 | } |
290 | |
|
291 | 0 | if(getPSVIHandler()) |
292 | 0 | { |
293 | 0 | psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId)); |
294 | 0 | XSSimpleTypeDefinition *validatingType = (attrValidator) |
295 | 0 | ? (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator) |
296 | 0 | : 0; |
297 | | // no attribute declarations for these... |
298 | 0 | psviAttr->reset( |
299 | 0 | fRootElemName |
300 | 0 | , PSVIItem::VALIDITY_NOTKNOWN |
301 | 0 | , PSVIItem::VALIDATION_NONE |
302 | 0 | , validatingType |
303 | 0 | , 0 |
304 | 0 | , 0 |
305 | 0 | , false |
306 | 0 | , 0 |
307 | 0 | , attrValidator |
308 | 0 | ); |
309 | 0 | } |
310 | 0 | } |
311 | 0 | } |
312 | 0 | } |
313 | |
|
314 | 0 | if (!isNSAttr || fGrammarType == Grammar::DTDGrammarType || otherXSI) |
315 | 0 | { |
316 | | // Some checking for attribute wild card first (for schema) |
317 | 0 | bool laxThisOne = false; |
318 | 0 | bool skipThisOne = false; |
319 | |
|
320 | 0 | XMLAttDef* attDefForWildCard = 0; |
321 | 0 | XMLAttDef* attDef = 0; |
322 | |
|
323 | 0 | if (fGrammarType == Grammar::SchemaGrammarType) { |
324 | | |
325 | | //retrieve the att def |
326 | 0 | SchemaAttDef* attWildCard = 0; |
327 | 0 | if (currType) { |
328 | 0 | attDef = currType->getAttDef(suffPtr, uriId); |
329 | 0 | attWildCard = currType->getAttWildCard(); |
330 | 0 | } |
331 | 0 | else if (!currDV) { // check explicitly-set wildcard |
332 | 0 | attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, uriId); |
333 | 0 | attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard(); |
334 | 0 | } |
335 | | |
336 | | // if not found or faulted in - check for a matching wildcard attribute |
337 | | // if no matching wildcard attribute, check (un)qualifed cases and flag |
338 | | // appropriate errors |
339 | 0 | if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) { |
340 | |
|
341 | 0 | if (attWildCard) { |
342 | | //if schema, see if we should lax or skip the validation of this attribute |
343 | 0 | if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) { |
344 | |
|
345 | 0 | if(!skipThisOne) |
346 | 0 | { |
347 | 0 | SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId)); |
348 | 0 | if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) { |
349 | 0 | RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry(); |
350 | 0 | if (attRegistry) { |
351 | 0 | attDefForWildCard = attRegistry->get(suffPtr); |
352 | 0 | } |
353 | 0 | } |
354 | 0 | } |
355 | 0 | } |
356 | 0 | } |
357 | 0 | else if (currType) { |
358 | | // not found, see if the attDef should be qualified or not |
359 | 0 | if (uriId == fEmptyNamespaceId) { |
360 | 0 | attDef = currType->getAttDef(suffPtr |
361 | 0 | , fURIStringPool->getId(fGrammar->getTargetNamespace())); |
362 | 0 | if (fValidate |
363 | 0 | && attDef |
364 | 0 | && attDef->getCreateReason() != XMLAttDef::JustFaultIn) { |
365 | | // the attribute should be qualified |
366 | 0 | fValidator->emitError |
367 | 0 | ( |
368 | 0 | XMLValid::AttributeNotQualified |
369 | 0 | , attDef->getFullName() |
370 | 0 | ); |
371 | 0 | if(fGrammarType == Grammar::SchemaGrammarType) { |
372 | 0 | fPSVIElemContext.fErrorOccurred = true; |
373 | 0 | if (getPSVIHandler()) |
374 | 0 | { |
375 | 0 | attrValid = PSVIItem::VALIDITY_INVALID; |
376 | 0 | } |
377 | 0 | } |
378 | 0 | } |
379 | 0 | } |
380 | 0 | else { |
381 | 0 | attDef = currType->getAttDef(suffPtr |
382 | 0 | , fEmptyNamespaceId); |
383 | 0 | if (fValidate |
384 | 0 | && attDef |
385 | 0 | && attDef->getCreateReason() != XMLAttDef::JustFaultIn) { |
386 | | // the attribute should be qualified |
387 | 0 | fValidator->emitError |
388 | 0 | ( |
389 | 0 | XMLValid::AttributeNotUnQualified |
390 | 0 | , attDef->getFullName() |
391 | 0 | ); |
392 | 0 | if(fGrammarType == Grammar::SchemaGrammarType) { |
393 | 0 | fPSVIElemContext.fErrorOccurred = true; |
394 | 0 | if (getPSVIHandler()) |
395 | 0 | { |
396 | 0 | attrValid = PSVIItem::VALIDITY_INVALID; |
397 | 0 | } |
398 | 0 | } |
399 | 0 | } |
400 | 0 | } |
401 | 0 | } |
402 | 0 | } |
403 | 0 | } |
404 | | |
405 | | // Find this attribute within the parent element. We pass both |
406 | | // the uriID/name and the raw QName buffer, since we don't know |
407 | | // how the derived validator and its elements store attributes. |
408 | 0 | else |
409 | 0 | { |
410 | 0 | if(fGrammarType == Grammar::DTDGrammarType) |
411 | 0 | attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( namePtr); |
412 | 0 | } |
413 | | |
414 | | // now need to prepare for duplicate detection |
415 | 0 | if(attDef) |
416 | 0 | { |
417 | 0 | unsigned int *curCountPtr = fAttDefRegistry->get(attDef); |
418 | 0 | if(!curCountPtr) |
419 | 0 | { |
420 | 0 | curCountPtr = getNewUIntPtr(); |
421 | 0 | *curCountPtr = fElemCount; |
422 | 0 | fAttDefRegistry->put(attDef, curCountPtr); |
423 | 0 | } |
424 | 0 | else if(*curCountPtr < fElemCount) |
425 | 0 | *curCountPtr = fElemCount; |
426 | 0 | else |
427 | 0 | { |
428 | 0 | emitError |
429 | 0 | ( |
430 | 0 | XMLErrs::AttrAlreadyUsedInSTag |
431 | 0 | , attDef->getFullName() |
432 | 0 | , elemDecl->getFullName() |
433 | 0 | ); |
434 | 0 | fPSVIElemContext.fErrorOccurred = true; |
435 | 0 | } |
436 | 0 | } |
437 | 0 | else |
438 | 0 | { |
439 | 0 | if(fGrammarType == Grammar::DTDGrammarType) |
440 | 0 | { |
441 | 0 | if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0)) |
442 | 0 | { |
443 | 0 | emitError |
444 | 0 | ( |
445 | 0 | XMLErrs::AttrAlreadyUsedInSTag |
446 | 0 | , namePtr |
447 | 0 | , elemDecl->getFullName() |
448 | 0 | ); |
449 | 0 | } |
450 | 0 | } |
451 | 0 | else // schema grammar |
452 | 0 | { |
453 | 0 | if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId)) |
454 | 0 | { |
455 | 0 | emitError |
456 | 0 | ( |
457 | 0 | XMLErrs::AttrAlreadyUsedInSTag |
458 | 0 | , namePtr |
459 | 0 | , elemDecl->getFullName() |
460 | 0 | ); |
461 | 0 | fPSVIElemContext.fErrorOccurred = true; |
462 | 0 | } |
463 | 0 | } |
464 | 0 | } |
465 | |
|
466 | 0 | if(fGrammarType == Grammar::SchemaGrammarType ) |
467 | 0 | { |
468 | | // if we've found either an attDef or an attDefForWildCard, |
469 | | // then we're doing full validation and it may still be valid. |
470 | 0 | if(!attDef && !attDefForWildCard) |
471 | 0 | { |
472 | 0 | if(!laxThisOne && !skipThisOne) |
473 | 0 | { |
474 | 0 | fPSVIElemContext.fErrorOccurred = true; |
475 | 0 | } |
476 | 0 | if(getPSVIHandler()) |
477 | 0 | { |
478 | 0 | if(!laxThisOne && !skipThisOne) |
479 | 0 | { |
480 | 0 | attrValid = PSVIItem::VALIDITY_INVALID; |
481 | 0 | } |
482 | 0 | else if(laxThisOne) |
483 | 0 | { |
484 | 0 | attrValid = PSVIItem::VALIDITY_NOTKNOWN; |
485 | 0 | attrAssessed = PSVIItem::VALIDATION_PARTIAL; |
486 | 0 | } |
487 | 0 | else |
488 | 0 | { |
489 | 0 | attrValid = PSVIItem::VALIDITY_NOTKNOWN; |
490 | 0 | attrAssessed = PSVIItem::VALIDATION_NONE; |
491 | 0 | } |
492 | 0 | } |
493 | 0 | } |
494 | 0 | } |
495 | |
|
496 | 0 | bool errorCondition = fValidate && !attDefForWildCard && !attDef; |
497 | 0 | if (errorCondition && !skipThisOne && !laxThisOne) |
498 | 0 | { |
499 | | // |
500 | | // Its not valid for this element, so issue an error if we are |
501 | | // validating. |
502 | | // |
503 | 0 | XMLBufBid bbMsg(&fBufMgr); |
504 | 0 | XMLBuffer& bufMsg = bbMsg.getBuffer(); |
505 | 0 | if (uriId != fEmptyNamespaceId) { |
506 | 0 | XMLBufBid bbURI(&fBufMgr); |
507 | 0 | XMLBuffer& bufURI = bbURI.getBuffer(); |
508 | |
|
509 | 0 | getURIText(uriId, bufURI); |
510 | |
|
511 | 0 | bufMsg.append(chOpenCurly); |
512 | 0 | bufMsg.append(bufURI.getRawBuffer()); |
513 | 0 | bufMsg.append(chCloseCurly); |
514 | 0 | } |
515 | 0 | bufMsg.append(suffPtr); |
516 | 0 | fValidator->emitError |
517 | 0 | ( |
518 | 0 | XMLValid::AttNotDefinedForElement |
519 | 0 | , bufMsg.getRawBuffer() |
520 | 0 | , elemDecl->getFullName() |
521 | 0 | ); |
522 | 0 | } |
523 | | |
524 | | // Now normalize the raw value since we have the attribute type. We |
525 | | // don't care about the return status here. If it failed, an error |
526 | | // was issued, which is all we care about. |
527 | 0 | if (attDefForWildCard) { |
528 | 0 | normalizeAttValue( |
529 | 0 | attDefForWildCard, namePtr, curPair->getValue(), normBuf |
530 | 0 | ); |
531 | | |
532 | | // If we found an attdef for this one, then lets validate it. |
533 | 0 | const XMLCh* xsNormalized = normBuf.getRawBuffer(); |
534 | 0 | DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator(); |
535 | 0 | if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) |
536 | 0 | { |
537 | | // normalize the attribute according to schema whitespace facet |
538 | 0 | ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true); |
539 | 0 | xsNormalized = fWSNormalizeBuf.getRawBuffer(); |
540 | |
|
541 | 0 | if (fNormalizeData && fValidate) { |
542 | 0 | normBuf.set(xsNormalized); |
543 | 0 | } |
544 | 0 | } |
545 | |
|
546 | 0 | if (fValidate ) { |
547 | 0 | fValidator->validateAttrValue( |
548 | 0 | attDefForWildCard, xsNormalized, false, elemDecl |
549 | 0 | ); |
550 | 0 | attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator(); |
551 | 0 | if(((SchemaValidator *)fValidator)->getErrorOccurred()) |
552 | 0 | { |
553 | 0 | fPSVIElemContext.fErrorOccurred = true; |
554 | 0 | if(getPSVIHandler()) |
555 | 0 | attrValid = PSVIItem::VALIDITY_INVALID; |
556 | 0 | } |
557 | 0 | } |
558 | 0 | else { // no decl; default DOMTypeInfo to anySimpleType |
559 | 0 | attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); |
560 | 0 | } |
561 | | |
562 | | // Save the type for later use |
563 | 0 | attType = attDefForWildCard->getType(); |
564 | 0 | } |
565 | 0 | else { |
566 | 0 | normalizeAttValue( |
567 | 0 | attDef, namePtr, curPair->getValue(), normBuf |
568 | 0 | ); |
569 | | |
570 | | // If we found an attdef for this one, then lets validate it. |
571 | 0 | if (attDef) |
572 | 0 | { |
573 | 0 | const XMLCh* xsNormalized = normBuf.getRawBuffer(); |
574 | 0 | if (fGrammarType == Grammar::SchemaGrammarType) |
575 | 0 | { |
576 | 0 | DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator(); |
577 | 0 | if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) |
578 | 0 | { |
579 | | // normalize the attribute according to schema whitespace facet |
580 | 0 | ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true); |
581 | 0 | xsNormalized = fWSNormalizeBuf.getRawBuffer(); |
582 | 0 | if (fNormalizeData && fValidate && !skipThisOne) { |
583 | 0 | normBuf.set(xsNormalized); |
584 | 0 | } |
585 | 0 | } |
586 | 0 | } |
587 | |
|
588 | 0 | if (fValidate && !skipThisOne) |
589 | 0 | { |
590 | 0 | fValidator->validateAttrValue( |
591 | 0 | attDef, xsNormalized, false, elemDecl |
592 | 0 | ); |
593 | |
|
594 | 0 | if(fGrammarType == Grammar::SchemaGrammarType) |
595 | 0 | { |
596 | 0 | attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator(); |
597 | 0 | if(((SchemaValidator *)fValidator)->getErrorOccurred()) |
598 | 0 | { |
599 | 0 | fPSVIElemContext.fErrorOccurred = true; |
600 | 0 | if (getPSVIHandler()) |
601 | 0 | attrValid = PSVIItem::VALIDITY_INVALID; |
602 | 0 | } |
603 | 0 | } |
604 | 0 | } |
605 | 0 | else if(fGrammarType == Grammar::SchemaGrammarType) { |
606 | 0 | attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); |
607 | 0 | } |
608 | 0 | } |
609 | 0 | else // no attDef at all; default to anySimpleType |
610 | 0 | { |
611 | 0 | if(fGrammarType == Grammar::SchemaGrammarType) { |
612 | 0 | attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); |
613 | 0 | } |
614 | 0 | } |
615 | | |
616 | | // Save the type for later use |
617 | 0 | if (attDef) |
618 | 0 | { |
619 | 0 | attType = attDef->getType(); |
620 | 0 | } |
621 | 0 | } |
622 | | |
623 | | // now fill in the PSVIAttributes entry for this attribute: |
624 | 0 | if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType) |
625 | 0 | { |
626 | 0 | psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId)); |
627 | 0 | SchemaAttDef *actualAttDef = 0; |
628 | 0 | if(attDef) |
629 | 0 | actualAttDef = (SchemaAttDef *)attDef; |
630 | 0 | else if (attDefForWildCard) |
631 | 0 | actualAttDef = (SchemaAttDef *)attDefForWildCard; |
632 | 0 | if(actualAttDef) |
633 | 0 | { |
634 | 0 | XSAttributeDeclaration *attrDecl = (XSAttributeDeclaration *)fModel->getXSObject(actualAttDef); |
635 | 0 | DatatypeValidator * attrDataType = actualAttDef->getDatatypeValidator(); |
636 | 0 | XSSimpleTypeDefinition *validatingType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrDataType); |
637 | 0 | if(attrValid != PSVIItem::VALIDITY_VALID) |
638 | 0 | { |
639 | 0 | psviAttr->reset |
640 | 0 | ( |
641 | 0 | fRootElemName |
642 | 0 | , attrValid |
643 | 0 | , attrAssessed |
644 | 0 | , validatingType |
645 | 0 | , 0 |
646 | 0 | , actualAttDef->getValue() |
647 | 0 | , false |
648 | 0 | , attrDecl |
649 | 0 | , 0 |
650 | 0 | ); |
651 | 0 | } |
652 | 0 | else |
653 | 0 | { |
654 | 0 | XSSimpleTypeDefinition *memberType = 0; |
655 | 0 | if(validatingType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION) |
656 | 0 | memberType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator); |
657 | 0 | psviAttr->reset |
658 | 0 | ( |
659 | 0 | fRootElemName |
660 | 0 | , attrValid |
661 | 0 | , attrAssessed |
662 | 0 | , validatingType |
663 | 0 | , memberType |
664 | 0 | , actualAttDef->getValue() |
665 | 0 | , false |
666 | 0 | , attrDecl |
667 | 0 | , (memberType)?attrValidator:attrDataType |
668 | 0 | ); |
669 | 0 | } |
670 | 0 | } |
671 | 0 | else |
672 | 0 | { |
673 | 0 | psviAttr->reset |
674 | 0 | ( |
675 | 0 | fRootElemName |
676 | 0 | , attrValid |
677 | 0 | , attrAssessed |
678 | 0 | , 0 |
679 | 0 | , 0 |
680 | 0 | , 0 |
681 | 0 | , false |
682 | 0 | , 0 |
683 | 0 | , 0 |
684 | 0 | ); |
685 | 0 | } |
686 | 0 | } |
687 | 0 | } |
688 | | |
689 | | // Add this attribute to the attribute list that we use to pass them |
690 | | // to the handler. We reuse its existing elements but expand it as |
691 | | // required. |
692 | 0 | XMLAttr* curAttr; |
693 | | |
694 | | // check for duplicate namespace attributes: |
695 | | // by checking for qualified names with the same local part and with prefixes |
696 | | // which have been bound to namespace names that are identical. |
697 | 0 | if (fGrammarType == Grammar::DTDGrammarType) { |
698 | 0 | if (!toUseHashTable) |
699 | 0 | { |
700 | 0 | for (XMLSize_t attrIndex=0; attrIndex < retCount; attrIndex++) { |
701 | 0 | curAttr = toFill.elementAt(attrIndex); |
702 | 0 | if (uriId == curAttr->getURIId() && |
703 | 0 | XMLString::equals(suffPtr, curAttr->getName())) { |
704 | 0 | emitError |
705 | 0 | ( |
706 | |
|
707 | 0 | XMLErrs::AttrAlreadyUsedInSTag |
708 | 0 | , curAttr->getName() |
709 | 0 | , elemDecl->getFullName() |
710 | 0 | ); |
711 | 0 | } |
712 | 0 | } |
713 | 0 | } |
714 | 0 | else |
715 | 0 | { |
716 | 0 | if (fAttrDupChkRegistry->containsKey((void*)suffPtr, uriId)) |
717 | 0 | { |
718 | 0 | emitError |
719 | 0 | ( |
720 | 0 | XMLErrs::AttrAlreadyUsedInSTag |
721 | 0 | , suffPtr |
722 | 0 | , elemDecl->getFullName() |
723 | 0 | ); |
724 | 0 | } |
725 | 0 | } |
726 | 0 | } |
727 | |
|
728 | 0 | if (retCount >= curAttListSize) |
729 | 0 | { |
730 | 0 | curAttr = new (fMemoryManager) XMLAttr |
731 | 0 | ( |
732 | 0 | uriId |
733 | 0 | , suffPtr |
734 | 0 | , prefPtr |
735 | 0 | , normBuf.getRawBuffer() |
736 | 0 | , attType |
737 | 0 | , true |
738 | 0 | , fMemoryManager |
739 | 0 | ); |
740 | 0 | toFill.addElement(curAttr); |
741 | 0 | } |
742 | 0 | else |
743 | 0 | { |
744 | 0 | curAttr = toFill.elementAt(retCount); |
745 | 0 | curAttr->set |
746 | 0 | ( |
747 | 0 | uriId |
748 | 0 | , suffPtr |
749 | 0 | , prefPtr |
750 | 0 | , normBuf.getRawBuffer() |
751 | 0 | , attType |
752 | 0 | ); |
753 | 0 | curAttr->setSpecified(true); |
754 | 0 | } |
755 | |
|
756 | 0 | if (toUseHashTable) |
757 | 0 | { |
758 | 0 | fAttrDupChkRegistry->put((void*)suffPtr, uriId, curAttr); |
759 | 0 | } |
760 | |
|
761 | 0 | if(psviAttr) |
762 | 0 | psviAttr->setValue(curAttr->getValue()); |
763 | | |
764 | | // Bump the count of attrs in the list |
765 | 0 | retCount++; |
766 | 0 | } |
767 | | |
768 | | // Now, if there are any attributes declared by this element, let's |
769 | | // go through them and make sure that any required ones are provided, |
770 | | // and fault in any fixed ones and defaulted ones that are not provided |
771 | | // literally. |
772 | 0 | if (hasDefs) |
773 | 0 | { |
774 | | // Check after all specified attrs are scanned |
775 | | // (1) report error for REQUIRED attrs that are missing (V_TAGc) |
776 | | // (2) add default attrs if missing (FIXED and NOT_FIXED) |
777 | | |
778 | |
|
779 | 0 | XMLAttDefList &attDefList = getAttDefList(fGrammarType == Grammar::SchemaGrammarType, currType, elemDecl); |
780 | |
|
781 | 0 | for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++) |
782 | 0 | { |
783 | | // Get the current att def, for convenience and its def type |
784 | 0 | const XMLAttDef *curDef = &attDefList.getAttDef(i); |
785 | 0 | const XMLAttDef::DefAttTypes defType = curDef->getDefaultType(); |
786 | 0 | unsigned int *attCountPtr = fAttDefRegistry->get((void *)curDef); |
787 | 0 | if (!attCountPtr || *attCountPtr < fElemCount) |
788 | 0 | { // did not occur |
789 | | // note that since there is no attribute information |
790 | | // item present, there is no PSVI infoset to augment here *except* |
791 | | // that the element is invalid |
792 | | |
793 | | //the attribute is not provided |
794 | 0 | if (fValidate) |
795 | 0 | { |
796 | | // If we are validating and its required, then an error |
797 | 0 | if ((defType == XMLAttDef::Required) || |
798 | 0 | (defType == XMLAttDef::Required_And_Fixed) ) |
799 | | |
800 | 0 | { |
801 | 0 | fValidator->emitError |
802 | 0 | ( |
803 | 0 | XMLValid::RequiredAttrNotProvided |
804 | 0 | , curDef->getFullName() |
805 | 0 | ); |
806 | 0 | if(fGrammarType == Grammar::SchemaGrammarType) |
807 | 0 | { |
808 | 0 | fPSVIElemContext.fErrorOccurred = true; |
809 | 0 | } |
810 | 0 | } |
811 | 0 | else if ((defType == XMLAttDef::Default) || |
812 | 0 | (defType == XMLAttDef::Fixed) ) |
813 | 0 | { |
814 | 0 | if (fStandalone && curDef->isExternal()) |
815 | 0 | { |
816 | | // XML 1.0 Section 2.9 |
817 | | // Document is standalone, so attributes must not be defaulted. |
818 | 0 | fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName()); |
819 | 0 | if(fGrammarType == Grammar::SchemaGrammarType) |
820 | 0 | { |
821 | 0 | fPSVIElemContext.fErrorOccurred = true; |
822 | 0 | } |
823 | 0 | } |
824 | 0 | } |
825 | 0 | } |
826 | | |
827 | | // Fault in the value if needed, and bump the att count. |
828 | 0 | if ((defType == XMLAttDef::Default) |
829 | 0 | || (defType == XMLAttDef::Fixed)) |
830 | 0 | { |
831 | | // Let the validator pass judgement on the attribute value |
832 | 0 | if (fValidate) |
833 | 0 | { |
834 | 0 | fValidator->validateAttrValue |
835 | 0 | ( |
836 | 0 | curDef |
837 | 0 | , curDef->getValue() |
838 | 0 | , false |
839 | 0 | , elemDecl |
840 | 0 | ); |
841 | 0 | } |
842 | |
|
843 | 0 | XMLAttr* curAtt; |
844 | 0 | if (retCount >= curAttListSize) |
845 | 0 | { |
846 | 0 | curAtt = new (fMemoryManager) XMLAttr(fMemoryManager); |
847 | 0 | fValidator->faultInAttr(*curAtt, *curDef); |
848 | 0 | fAttrList->addElement(curAtt); |
849 | 0 | } |
850 | 0 | else |
851 | 0 | { |
852 | 0 | curAtt = fAttrList->elementAt(retCount); |
853 | 0 | fValidator->faultInAttr(*curAtt, *curDef); |
854 | 0 | } |
855 | |
|
856 | 0 | if (fGrammarType == Grammar::DTDGrammarType) |
857 | 0 | { |
858 | | // Map the new attribute's prefix to a URI id and store |
859 | | // that in the attribute object. |
860 | 0 | curAtt->setURIId |
861 | 0 | ( |
862 | 0 | resolvePrefix(curAtt->getPrefix(), ElemStack::Mode_Attribute) |
863 | 0 | ); |
864 | 0 | } |
865 | | |
866 | | // Indicate it was not explicitly specified and bump count |
867 | 0 | curAtt->setSpecified(false); |
868 | 0 | retCount++; |
869 | 0 | if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType) |
870 | 0 | { |
871 | 0 | QName *attName = ((SchemaAttDef *)curDef)->getAttName(); |
872 | 0 | PSVIAttribute *defAttrToFill = fPSVIAttrList->getPSVIAttributeToFill |
873 | 0 | ( |
874 | 0 | attName->getLocalPart(), fURIStringPool->getValueForId( attName->getURI()) |
875 | 0 | ); |
876 | 0 | XSAttributeDeclaration *defAttrDecl = (XSAttributeDeclaration *)fModel->getXSObject((void *)curDef); |
877 | 0 | DatatypeValidator * attrDataType = ((SchemaAttDef *)curDef)->getDatatypeValidator(); |
878 | 0 | XSSimpleTypeDefinition *defAttrType = |
879 | 0 | (XSSimpleTypeDefinition*)fModel->getXSObject(attrDataType); |
880 | | // would have occurred during validation of default value |
881 | 0 | if(((SchemaValidator *)fValidator)->getErrorOccurred()) |
882 | 0 | { |
883 | 0 | defAttrToFill->reset( |
884 | 0 | fRootElemName |
885 | 0 | , PSVIItem::VALIDITY_INVALID |
886 | 0 | , PSVIItem::VALIDATION_FULL |
887 | 0 | , defAttrType |
888 | 0 | , 0 |
889 | 0 | , curDef->getValue() |
890 | 0 | , true |
891 | 0 | , defAttrDecl |
892 | 0 | , 0 |
893 | 0 | ); |
894 | 0 | } |
895 | 0 | else |
896 | 0 | { |
897 | 0 | XSSimpleTypeDefinition *defAttrMemberType = 0; |
898 | 0 | if(defAttrType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION) |
899 | 0 | { |
900 | 0 | defAttrMemberType = (XSSimpleTypeDefinition *)fModel->getXSObject |
901 | 0 | ( |
902 | 0 | ((SchemaValidator*)fValidator)->getMostRecentAttrValidator() |
903 | 0 | ); |
904 | 0 | } |
905 | 0 | defAttrToFill->reset( |
906 | 0 | fRootElemName |
907 | 0 | , PSVIItem::VALIDITY_VALID |
908 | 0 | , PSVIItem::VALIDATION_FULL |
909 | 0 | , defAttrType |
910 | 0 | , defAttrMemberType |
911 | 0 | , curDef->getValue() |
912 | 0 | , true |
913 | 0 | , defAttrDecl |
914 | 0 | , (defAttrMemberType)?((SchemaValidator *)fValidator)->getMostRecentAttrValidator():attrDataType |
915 | 0 | ); |
916 | 0 | } |
917 | 0 | defAttrToFill->setValue(curDef->getValue()); |
918 | 0 | } |
919 | 0 | } |
920 | 0 | } |
921 | 0 | else if(attCountPtr) |
922 | 0 | { |
923 | | //attribute is provided |
924 | | // (schema) report error for PROHIBITED attrs that are present (V_TAGc) |
925 | 0 | if (defType == XMLAttDef::Prohibited && fValidate) |
926 | 0 | { |
927 | 0 | fValidator->emitError |
928 | 0 | ( |
929 | 0 | XMLValid::ProhibitedAttributePresent |
930 | 0 | , curDef->getFullName() |
931 | 0 | ); |
932 | 0 | if(fGrammarType == Grammar::SchemaGrammarType) |
933 | 0 | { |
934 | 0 | fPSVIElemContext.fErrorOccurred = true; |
935 | 0 | if (getPSVIHandler()) |
936 | 0 | { |
937 | 0 | QName *attQName = ((SchemaAttDef *)curDef)->getAttName(); |
938 | | // bad luck... |
939 | 0 | PSVIAttribute *prohibitedAttr = fPSVIAttrList->getAttributePSVIByName |
940 | 0 | ( |
941 | 0 | attQName->getLocalPart(), |
942 | 0 | fURIStringPool->getValueForId(attQName->getURI()) |
943 | 0 | ); |
944 | 0 | prohibitedAttr->updateValidity(PSVIItem::VALIDITY_INVALID); |
945 | 0 | } |
946 | 0 | } |
947 | 0 | } |
948 | 0 | } |
949 | 0 | } |
950 | 0 | } |
951 | 0 | return retCount; |
952 | 0 | } |
953 | | |
954 | | |
955 | | // This method will take a raw attribute value and normalize it according to |
956 | | // the rules of the attribute type. It will put the resulting value into the |
957 | | // passed buffer. |
958 | | // |
959 | | // This code assumes that escaped characters in the original value (via char |
960 | | // refs) are prefixed by a 0xFFFF character. This is because some characters |
961 | | // are legal if escaped only. And some escape chars are not subject to |
962 | | // normalization rules. |
963 | | bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef |
964 | | , const XMLCh* const attName |
965 | | , const XMLCh* const value |
966 | | , XMLBuffer& toFill) |
967 | 0 | { |
968 | | // A simple state value for a whitespace processing state machine |
969 | 0 | enum States |
970 | 0 | { |
971 | 0 | InWhitespace |
972 | 0 | , InContent |
973 | 0 | }; |
974 | | |
975 | | // Get the type and name |
976 | 0 | const XMLAttDef::AttTypes type = (attDef)?attDef->getType():XMLAttDef::CData; |
977 | | |
978 | | // check to see if it's a tokenized type that is declared externally |
979 | 0 | bool isAttTokenizedExternal = (attDef) |
980 | 0 | ?attDef->isExternal() && (type == XMLAttDef::ID || |
981 | 0 | type == XMLAttDef::IDRef || |
982 | 0 | type == XMLAttDef::IDRefs || |
983 | 0 | type == XMLAttDef::Entity || |
984 | 0 | type == XMLAttDef::Entities || |
985 | 0 | type == XMLAttDef::NmToken || |
986 | 0 | type == XMLAttDef::NmTokens) |
987 | 0 | :false; |
988 | | |
989 | | // Assume its going to go fine, and empty the target buffer in preperation |
990 | 0 | bool retVal = true; |
991 | 0 | toFill.reset(); |
992 | | |
993 | | // Loop through the chars of the source value and normalize it according |
994 | | // to the type. |
995 | 0 | XMLCh nextCh; |
996 | 0 | const XMLCh* srcPtr = value; |
997 | |
|
998 | 0 | if (type == XMLAttDef::CData || type > XMLAttDef::Notation) { |
999 | | // Get the next character from the source. We have to watch for |
1000 | | // escaped characters (which are indicated by a 0xFFFF value followed |
1001 | | // by the char that was escaped.) |
1002 | 0 | while ((nextCh = *srcPtr++)!=0) |
1003 | 0 | { |
1004 | 0 | switch(nextCh) |
1005 | 0 | { |
1006 | | // Do we have an escaped character ? |
1007 | 0 | case 0xFFFF: |
1008 | 0 | nextCh = *srcPtr++; |
1009 | 0 | break; |
1010 | 0 | case 0x09: |
1011 | 0 | case 0x0A: |
1012 | 0 | case 0x0D: |
1013 | | // Check Validity Constraint for Standalone document declaration |
1014 | | // XML 1.0, Section 2.9 |
1015 | 0 | if (fStandalone && fValidate && isAttTokenizedExternal) |
1016 | 0 | { |
1017 | | // Can't have a standalone document declaration of "yes" if attribute |
1018 | | // values are subject to normalisation |
1019 | 0 | fValidator->emitError(XMLValid::NoAttNormForStandalone, attName); |
1020 | 0 | } |
1021 | 0 | nextCh = chSpace; |
1022 | 0 | break; |
1023 | 0 | case chOpenAngle: |
1024 | | // If its not escaped, then make sure its not a < character, which is |
1025 | | // not allowed in attribute values. |
1026 | 0 | emitError(XMLErrs::BracketInAttrValue, attName); |
1027 | 0 | retVal = false; |
1028 | 0 | break; |
1029 | 0 | } |
1030 | | |
1031 | | // Add this char to the target buffer |
1032 | 0 | toFill.append(nextCh); |
1033 | 0 | } |
1034 | 0 | } |
1035 | 0 | else { |
1036 | 0 | States curState = InContent; |
1037 | 0 | bool firstNonWS = false; |
1038 | | // Get the next character from the source. We have to watch for |
1039 | | // escaped characters (which are indicated by a 0xFFFF value followed |
1040 | | // by the char that was escaped.) |
1041 | 0 | while ((nextCh = *srcPtr)!=0) |
1042 | 0 | { |
1043 | | // Do we have an escaped character ? |
1044 | 0 | if (nextCh == 0xFFFF) |
1045 | 0 | { |
1046 | 0 | nextCh = *++srcPtr; |
1047 | 0 | } |
1048 | 0 | else if (nextCh == chOpenAngle) { |
1049 | | // If its not escaped, then make sure its not a < character, which is |
1050 | | // not allowed in attribute values. |
1051 | 0 | emitError(XMLErrs::BracketInAttrValue, attName); |
1052 | 0 | retVal = false; |
1053 | 0 | } |
1054 | |
|
1055 | 0 | if (curState == InWhitespace) |
1056 | 0 | { |
1057 | 0 | if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) |
1058 | 0 | { |
1059 | 0 | if (firstNonWS) |
1060 | 0 | toFill.append(chSpace); |
1061 | 0 | curState = InContent; |
1062 | 0 | firstNonWS = true; |
1063 | 0 | } |
1064 | 0 | else |
1065 | 0 | { |
1066 | 0 | srcPtr++; |
1067 | 0 | continue; |
1068 | 0 | } |
1069 | 0 | } |
1070 | 0 | else if (curState == InContent) |
1071 | 0 | { |
1072 | 0 | if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) |
1073 | 0 | { |
1074 | 0 | curState = InWhitespace; |
1075 | 0 | srcPtr++; |
1076 | | |
1077 | | // Check Validity Constraint for Standalone document declaration |
1078 | | // XML 1.0, Section 2.9 |
1079 | 0 | if (fStandalone && fValidate && isAttTokenizedExternal) |
1080 | 0 | { |
1081 | 0 | if (!firstNonWS || (nextCh != chSpace && *srcPtr && fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr))) |
1082 | 0 | { |
1083 | | // Can't have a standalone document declaration of "yes" if attribute |
1084 | | // values are subject to normalisation |
1085 | 0 | fValidator->emitError(XMLValid::NoAttNormForStandalone, attName); |
1086 | 0 | } |
1087 | 0 | } |
1088 | 0 | continue; |
1089 | 0 | } |
1090 | 0 | firstNonWS = true; |
1091 | 0 | } |
1092 | | |
1093 | | // Add this char to the target buffer |
1094 | 0 | toFill.append(nextCh); |
1095 | | |
1096 | | // And move up to the next character in the source |
1097 | 0 | srcPtr++; |
1098 | 0 | } |
1099 | 0 | } |
1100 | | |
1101 | 0 | return retVal; |
1102 | 0 | } |
1103 | | |
1104 | | // This method will just normalize the input value as CDATA without |
1105 | | // any standalone checking. |
1106 | | bool IGXMLScanner::normalizeAttRawValue( const XMLCh* const attrName |
1107 | | , const XMLCh* const value |
1108 | | , XMLBuffer& toFill) |
1109 | 0 | { |
1110 | | // Assume its going to go fine, and empty the target buffer in preperation |
1111 | 0 | bool retVal = true; |
1112 | 0 | toFill.reset(); |
1113 | | |
1114 | | // Loop through the chars of the source value and normalize it according |
1115 | | // to the type. |
1116 | 0 | bool escaped; |
1117 | 0 | XMLCh nextCh; |
1118 | 0 | const XMLCh* srcPtr = value; |
1119 | 0 | while (*srcPtr) |
1120 | 0 | { |
1121 | | // Get the next character from the source. We have to watch for |
1122 | | // escaped characters (which are indicated by a 0xFFFF value followed |
1123 | | // by the char that was escaped.) |
1124 | 0 | nextCh = *srcPtr; |
1125 | 0 | escaped = (nextCh == 0xFFFF); |
1126 | 0 | if (escaped) |
1127 | 0 | nextCh = *++srcPtr; |
1128 | | |
1129 | | // If its not escaped, then make sure its not a < character, which is |
1130 | | // not allowed in attribute values. |
1131 | 0 | if (!escaped && (*srcPtr == chOpenAngle)) |
1132 | 0 | { |
1133 | 0 | emitError(XMLErrs::BracketInAttrValue, attrName); |
1134 | 0 | retVal = false; |
1135 | 0 | } |
1136 | |
|
1137 | 0 | if (!escaped) |
1138 | 0 | { |
1139 | | // NOTE: Yes this is a little redundant in that a 0x20 is |
1140 | | // replaced with an 0x20. But its faster to do this (I think) |
1141 | | // than checking for 9, A, and D separately. |
1142 | 0 | if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) |
1143 | 0 | nextCh = chSpace; |
1144 | 0 | } |
1145 | | |
1146 | | // Add this char to the target buffer |
1147 | 0 | toFill.append(nextCh); |
1148 | | |
1149 | | // And move up to the next character in the source |
1150 | 0 | srcPtr++; |
1151 | 0 | } |
1152 | 0 | return retVal; |
1153 | 0 | } |
1154 | | |
1155 | | // This method will reset the scanner data structures, and related plugged |
1156 | | // in stuff, for a new scan session. We get the input source for the primary |
1157 | | // XML entity, create the reader for it, and push it on the stack so that |
1158 | | // upon successful return from here we are ready to go. |
1159 | | void IGXMLScanner::scanReset(const InputSource& src) |
1160 | 17.6k | { |
1161 | | // This call implicitly tells us that we are going to reuse the scanner |
1162 | | // if it was previously used. So tell the validator to reset itself. |
1163 | | // |
1164 | | // But, if the fUseCacheGrammar flag is set, then don't reset it. |
1165 | | // |
1166 | | // NOTE: The ReaderMgr is flushed on the way out, because that is |
1167 | | // required to insure that files are closed. |
1168 | 17.6k | fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar); |
1169 | 17.6k | fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar); |
1170 | | |
1171 | | // Clear transient schema info list. |
1172 | | // |
1173 | 17.6k | fSchemaInfoList->removeAll (); |
1174 | | |
1175 | | // fModel may need updating, as fGrammarResolver could have cleaned it |
1176 | 17.6k | if(getPSVIHandler()) |
1177 | 0 | fModel = fGrammarResolver->getXSModel(); |
1178 | | |
1179 | 17.6k | { |
1180 | 17.6k | XMLDTDDescriptionImpl theDTDDescription(XMLUni::fgDTDEntityString, fMemoryManager); |
1181 | 17.6k | fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(&theDTDDescription); |
1182 | 17.6k | } |
1183 | | |
1184 | 17.6k | if (!fDTDGrammar) { |
1185 | | |
1186 | 17.6k | fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager); |
1187 | 17.6k | fGrammarResolver->putGrammar(fDTDGrammar); |
1188 | 17.6k | } |
1189 | 0 | else |
1190 | 0 | fDTDGrammar->reset(); |
1191 | | |
1192 | 17.6k | fGrammar = fDTDGrammar; |
1193 | 17.6k | fGrammarType = fGrammar->getGrammarType(); |
1194 | 17.6k | fRootGrammar = 0; |
1195 | | |
1196 | 17.6k | if (fValidatorFromUser) { |
1197 | 0 | if (fValidator->handlesDTD()) |
1198 | 0 | fValidator->setGrammar(fGrammar); |
1199 | 0 | else if (fValidator->handlesSchema()) { |
1200 | |
|
1201 | 0 | ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter); |
1202 | 0 | ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver); |
1203 | 0 | ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal); |
1204 | 0 | } |
1205 | 0 | } |
1206 | 17.6k | else { |
1207 | | // set fValidator as fDTDValidator |
1208 | 17.6k | fValidator = fDTDValidator; |
1209 | 17.6k | fValidator->setGrammar(fGrammar); |
1210 | 17.6k | } |
1211 | | |
1212 | | // Reset validation |
1213 | 17.6k | fValidate = (fValScheme == Val_Always) ? true : false; |
1214 | | |
1215 | | // Ignore skipDTDValidation flag if no schema processing is taking place */ |
1216 | 17.6k | fSkipDTDValidation = fSkipDTDValidation && fDoSchema; |
1217 | | |
1218 | | // And for all installed handlers, send reset events. This gives them |
1219 | | // a chance to flush any cached data. |
1220 | 17.6k | if (fDocHandler) |
1221 | 0 | fDocHandler->resetDocument(); |
1222 | 17.6k | if (fEntityHandler) |
1223 | 0 | fEntityHandler->resetEntities(); |
1224 | 17.6k | if (fErrorReporter) |
1225 | 0 | fErrorReporter->resetErrors(); |
1226 | | |
1227 | | // Clear out the id reference list |
1228 | 17.6k | resetValidationContext(); |
1229 | | |
1230 | | // Reset the Root Element Name |
1231 | 17.6k | fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName; |
1232 | 17.6k | fRootElemName = 0; |
1233 | | |
1234 | | // Reset IdentityConstraints |
1235 | 17.6k | if (fICHandler) |
1236 | 17.6k | fICHandler->reset(); |
1237 | | |
1238 | | // Reset the element stack, and give it the latest ids for the special |
1239 | | // URIs it has to know about. |
1240 | 17.6k | fElemStack.reset |
1241 | 17.6k | ( |
1242 | 17.6k | fEmptyNamespaceId |
1243 | 17.6k | , fUnknownNamespaceId |
1244 | 17.6k | , fXMLNamespaceId |
1245 | 17.6k | , fXMLNSNamespaceId |
1246 | 17.6k | ); |
1247 | | |
1248 | 17.6k | if (!fSchemaNamespaceId) |
1249 | 17.6k | fSchemaNamespaceId = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI); |
1250 | | |
1251 | | // Reset some status flags |
1252 | 17.6k | fInException = false; |
1253 | 17.6k | fStandalone = false; |
1254 | 17.6k | fErrorCount = 0; |
1255 | 17.6k | fHasNoDTD = true; |
1256 | 17.6k | fSeeXsi = false; |
1257 | | |
1258 | | // Reset PSVI context |
1259 | | // note that we always need this around for DOMTypeInfo |
1260 | 17.6k | if (!fPSVIElement) |
1261 | 17.6k | fPSVIElement = new (fMemoryManager) PSVIElement(fMemoryManager); |
1262 | | |
1263 | 17.6k | if (!fErrorStack) |
1264 | 17.6k | { |
1265 | 17.6k | fErrorStack = new (fMemoryManager) ValueStackOf<bool>(8, fMemoryManager); |
1266 | 17.6k | } |
1267 | 0 | else |
1268 | 0 | { |
1269 | 0 | fErrorStack->removeAllElements(); |
1270 | 0 | } |
1271 | | |
1272 | 17.6k | resetPSVIElemContext(); |
1273 | | |
1274 | | // Reset the validators |
1275 | 17.6k | fDTDValidator->reset(); |
1276 | 17.6k | fDTDValidator->setErrorReporter(fErrorReporter); |
1277 | 17.6k | fSchemaValidator->reset(); |
1278 | 17.6k | fSchemaValidator->setErrorReporter(fErrorReporter); |
1279 | 17.6k | fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal); |
1280 | 17.6k | fSchemaValidator->setGrammarResolver(fGrammarResolver); |
1281 | 17.6k | if (fValidatorFromUser) |
1282 | 0 | fValidator->reset(); |
1283 | | |
1284 | | // Handle the creation of the XML reader object for this input source. |
1285 | | // This will provide us with transcoding and basic lexing services. |
1286 | 17.6k | XMLReader* newReader = fReaderMgr.createReader |
1287 | 17.6k | ( |
1288 | 17.6k | src |
1289 | 17.6k | , true |
1290 | 17.6k | , XMLReader::RefFrom_NonLiteral |
1291 | 17.6k | , XMLReader::Type_General |
1292 | 17.6k | , XMLReader::Source_External |
1293 | 17.6k | , fCalculateSrcOfs |
1294 | 17.6k | , fLowWaterMark |
1295 | 17.6k | ); |
1296 | | |
1297 | 17.6k | if (!newReader) { |
1298 | 0 | if (src.getIssueFatalErrorIfNotFound()) |
1299 | 0 | ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager); |
1300 | 0 | else |
1301 | 0 | ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager); |
1302 | 0 | } |
1303 | | |
1304 | | // Push this read onto the reader manager |
1305 | 17.6k | fReaderMgr.pushReader(newReader, 0); |
1306 | | |
1307 | | // and reset security-related things if necessary: |
1308 | 17.6k | if(fSecurityManager != 0) |
1309 | 0 | { |
1310 | 0 | fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit(); |
1311 | 0 | fEntityExpansionCount = 0; |
1312 | 0 | } |
1313 | 17.6k | fElemCount = 0; |
1314 | 17.6k | if(fUIntPoolRowTotal >= 32) |
1315 | 0 | { // 8 KB tied up with validating attributes... |
1316 | 0 | fAttDefRegistry->removeAll(); |
1317 | 0 | recreateUIntPool(); |
1318 | 0 | } |
1319 | 17.6k | else |
1320 | 17.6k | { |
1321 | | // note that this will implicitly reset the values of the hashtables, |
1322 | | // though their buckets will still be tied up |
1323 | 17.6k | resetUIntPool(); |
1324 | 17.6k | } |
1325 | 17.6k | fUndeclaredAttrRegistry->removeAll(); |
1326 | 17.6k | fDTDElemNonDeclPool->removeAll(); |
1327 | 17.6k | } |
1328 | | |
1329 | | |
1330 | | // This method is called between markup in content. It scans for character |
1331 | | // data that is sent to the document handler. It watches for any markup |
1332 | | // characters that would indicate that the character data has ended. It also |
1333 | | // handles expansion of general and character entities. |
1334 | | // |
1335 | | // sendData() is a local static helper for this method which handles some |
1336 | | // code that must be done in three different places here. |
1337 | | void IGXMLScanner::sendCharData(XMLBuffer& toSend) |
1338 | 3.13M | { |
1339 | | // If no data in the buffer, then nothing to do |
1340 | 3.13M | if (toSend.isEmpty()) |
1341 | 219k | return; |
1342 | | |
1343 | | // We do different things according to whether we are validating or |
1344 | | // not. If not, its always just characters; else, it depends on the |
1345 | | // current element's content model. |
1346 | 2.91M | if (fValidate) |
1347 | 2.90M | { |
1348 | | // Get the raw data we need for the callback |
1349 | 2.90M | const XMLCh* rawBuf = toSend.getRawBuffer(); |
1350 | 2.90M | XMLSize_t len = toSend.getLen(); |
1351 | | |
1352 | | // And see if the current element is a 'Children' style content model |
1353 | 2.90M | const ElemStack::StackElem* topElem = fElemStack.topElement(); |
1354 | | |
1355 | | // Get the character data opts for the current element |
1356 | 2.90M | XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData; |
1357 | 2.90M | if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType) |
1358 | 0 | { |
1359 | | // And see if the current element is a 'Children' style content model |
1360 | 0 | ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); |
1361 | 0 | if(currType) |
1362 | 0 | { |
1363 | 0 | SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType(); |
1364 | 0 | if(modelType == SchemaElementDecl::Children || |
1365 | 0 | modelType == SchemaElementDecl::ElementOnlyEmpty) |
1366 | 0 | charOpts = XMLElementDecl::SpacesOk; |
1367 | 0 | else if(modelType == SchemaElementDecl::Empty) |
1368 | 0 | charOpts = XMLElementDecl::NoCharData; |
1369 | 0 | } |
1370 | 0 | } else // DTD grammar |
1371 | 2.90M | charOpts = topElem->fThisElement->getCharDataOpts(); |
1372 | | |
1373 | 2.90M | if (charOpts == XMLElementDecl::NoCharData) |
1374 | 1.63k | { |
1375 | | // They definitely cannot handle any type of char data |
1376 | 1.63k | fValidator->emitError(XMLValid::NoCharDataInCM); |
1377 | | //if(fGrammarType == Grammar::SchemaGrammarType) |
1378 | | //{ |
1379 | | // if (getPSVIHandler()) |
1380 | | // { |
1381 | | // REVISIT: |
1382 | | // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); |
1383 | | // } |
1384 | | // } |
1385 | 1.63k | } |
1386 | 2.90M | else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len)) |
1387 | 361k | { |
1388 | | // Its all spaces. So, if they can take spaces, then send it |
1389 | | // as ignorable whitespace. If they can handle any char data |
1390 | | // send it as characters. |
1391 | 361k | if (charOpts == XMLElementDecl::SpacesOk) { |
1392 | 4.78k | if (fDocHandler) |
1393 | 0 | fDocHandler->ignorableWhitespace(rawBuf, len, false); |
1394 | 4.78k | } |
1395 | 356k | else if (charOpts == XMLElementDecl::AllCharData) |
1396 | 356k | { |
1397 | 356k | if (fGrammarType != Grammar::SchemaGrammarType) |
1398 | 356k | { |
1399 | 356k | if (fDocHandler) |
1400 | 0 | fDocHandler->docCharacters(rawBuf, len, false); |
1401 | 356k | } |
1402 | 0 | else |
1403 | 0 | { |
1404 | 0 | XMLSize_t xsLen; |
1405 | 0 | const XMLCh* xsNormalized; |
1406 | 0 | SchemaValidator *schemaValidator = (SchemaValidator *)fValidator; |
1407 | 0 | DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); |
1408 | 0 | if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) |
1409 | 0 | { |
1410 | | // normalize the character according to schema whitespace facet |
1411 | 0 | ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf); |
1412 | 0 | xsNormalized = fWSNormalizeBuf.getRawBuffer(); |
1413 | 0 | xsLen = fWSNormalizeBuf.getLen(); |
1414 | 0 | } |
1415 | 0 | else { |
1416 | 0 | xsNormalized = rawBuf; |
1417 | 0 | xsLen = len ; |
1418 | 0 | } |
1419 | | |
1420 | | // tell the schema validation about the character data for checkContent later |
1421 | 0 | schemaValidator->setDatatypeBuffer(xsNormalized); |
1422 | | |
1423 | | // call all active identity constraints |
1424 | 0 | if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) { |
1425 | 0 | fContent.append(xsNormalized, xsLen); |
1426 | 0 | } |
1427 | |
|
1428 | 0 | if (fDocHandler) { |
1429 | 0 | if (fNormalizeData) { |
1430 | 0 | fDocHandler->docCharacters(xsNormalized, xsLen, false); |
1431 | 0 | } |
1432 | 0 | else { |
1433 | 0 | fDocHandler->docCharacters(rawBuf, len, false); |
1434 | 0 | } |
1435 | 0 | } |
1436 | 0 | } |
1437 | 356k | } |
1438 | 361k | } |
1439 | 2.54M | else |
1440 | 2.54M | { |
1441 | | // If they can take any char data, then send it. Otherwise, they |
1442 | | // can only handle whitespace and can't handle this stuff so |
1443 | | // issue an error. |
1444 | 2.54M | if (charOpts == XMLElementDecl::AllCharData) |
1445 | 2.50M | { |
1446 | 2.50M | if (fGrammarType != Grammar::SchemaGrammarType) |
1447 | 2.50M | { |
1448 | 2.50M | if (fDocHandler) |
1449 | 0 | fDocHandler->docCharacters(rawBuf, len, false); |
1450 | 2.50M | } |
1451 | 0 | else |
1452 | 0 | { |
1453 | 0 | XMLSize_t xsLen; |
1454 | 0 | const XMLCh* xsNormalized; |
1455 | 0 | SchemaValidator *schemaValidator = (SchemaValidator*)fValidator; |
1456 | 0 | DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); |
1457 | 0 | if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) |
1458 | 0 | { |
1459 | | // normalize the character according to schema whitespace facet |
1460 | 0 | ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf); |
1461 | 0 | xsNormalized = fWSNormalizeBuf.getRawBuffer(); |
1462 | 0 | xsLen = fWSNormalizeBuf.getLen(); |
1463 | 0 | } |
1464 | 0 | else { |
1465 | 0 | xsNormalized = rawBuf; |
1466 | 0 | xsLen = len; |
1467 | 0 | } |
1468 | | |
1469 | | // tell the schema validation about the character data for checkContent later |
1470 | 0 | schemaValidator->setDatatypeBuffer(xsNormalized); |
1471 | | |
1472 | | // call all active identity constraints |
1473 | 0 | if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) { |
1474 | 0 | fContent.append(xsNormalized, xsLen); |
1475 | 0 | } |
1476 | |
|
1477 | 0 | if (fDocHandler) { |
1478 | 0 | if (fNormalizeData) { |
1479 | 0 | fDocHandler->docCharacters(xsNormalized, xsLen, false); |
1480 | 0 | } |
1481 | 0 | else { |
1482 | 0 | fDocHandler->docCharacters(rawBuf, len, false); |
1483 | 0 | } |
1484 | 0 | } |
1485 | 0 | } |
1486 | 2.50M | } |
1487 | 40.3k | else |
1488 | 40.3k | { |
1489 | 40.3k | fValidator->emitError(XMLValid::NoCharDataInCM); |
1490 | 40.3k | if(fGrammarType == Grammar::SchemaGrammarType) |
1491 | 0 | { |
1492 | 0 | if (getPSVIHandler()) |
1493 | 0 | { |
1494 | | // REVISIT: |
1495 | | // PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID); |
1496 | 0 | } |
1497 | 0 | } |
1498 | 40.3k | } |
1499 | 2.54M | } |
1500 | 2.90M | } |
1501 | 4.17k | else |
1502 | 4.17k | { |
1503 | | // call all active identity constraints |
1504 | 4.17k | if (fGrammarType == Grammar::SchemaGrammarType) { |
1505 | |
|
1506 | 0 | if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) |
1507 | 0 | fContent.append(toSend.getRawBuffer(), toSend.getLen()); |
1508 | 0 | } |
1509 | | |
1510 | | // Always assume its just char data if not validating |
1511 | 4.17k | if (fDocHandler) |
1512 | 0 | fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false); |
1513 | 4.17k | } |
1514 | | |
1515 | | // Reset buffer |
1516 | 2.91M | toSend.reset(); |
1517 | 2.91M | } |
1518 | | |
1519 | | |
1520 | | |
1521 | | // This method is called with a key/value string pair that represents an |
1522 | | // xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the |
1523 | | // current top of the element stack based on this data. We know that when |
1524 | | // we get here, that it is one of these forms, so we don't bother confirming |
1525 | | // it. |
1526 | | // |
1527 | | // But we have to ensure |
1528 | | // 1. xxx is not xmlns |
1529 | | // 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa |
1530 | | // 3. yyy is not XMLUni::fgXMLNSURIName |
1531 | | // 4. if xxx is not null, then yyy cannot be an empty string. |
1532 | | void IGXMLScanner::updateNSMap(const XMLCh* const attrName |
1533 | | , const XMLCh* const attrValue) |
1534 | 0 | { |
1535 | 0 | updateNSMap(attrName, attrValue, XMLString::indexOf(attrName, chColon)); |
1536 | 0 | } |
1537 | | |
1538 | | void IGXMLScanner::updateNSMap(const XMLCh* const attrName |
1539 | | , const XMLCh* const attrValue |
1540 | | , const int colonOfs) |
1541 | 0 | { |
1542 | | // We need a buffer to normalize the attribute value into |
1543 | 0 | XMLBufBid bbNormal(&fBufMgr); |
1544 | 0 | XMLBuffer& normalBuf = bbNormal.getBuffer(); |
1545 | | |
1546 | | // Normalize the value into the passed buffer. In this case, we don't |
1547 | | // care about the return value. An error was issued for the error, which |
1548 | | // is all we care about here. |
1549 | 0 | normalizeAttRawValue(attrName, attrValue, normalBuf); |
1550 | 0 | XMLCh* namespaceURI = normalBuf.getRawBuffer(); |
1551 | | |
1552 | | // We either have the default prefix (""), or we point it into the attr |
1553 | | // name parameter. Note that the xmlns is not the prefix we care about |
1554 | | // here. To us, the 'prefix' is really the local part of the attrName |
1555 | | // parameter. |
1556 | | // |
1557 | | // Check 1. xxx is not xmlns |
1558 | | // 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa |
1559 | | // 3. yyy is not XMLUni::fgXMLNSURIName |
1560 | | // 4. if xxx is not null, then yyy cannot be an empty string. |
1561 | 0 | const XMLCh* prefPtr = XMLUni::fgZeroLenString; |
1562 | 0 | if (colonOfs != -1) { |
1563 | 0 | prefPtr = &attrName[colonOfs + 1]; |
1564 | |
|
1565 | 0 | if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString)) |
1566 | 0 | emitError(XMLErrs::NoUseOfxmlnsAsPrefix); |
1567 | 0 | else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) { |
1568 | 0 | if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) |
1569 | 0 | emitError(XMLErrs::PrefixXMLNotMatchXMLURI); |
1570 | 0 | } |
1571 | |
|
1572 | 0 | if (!namespaceURI) |
1573 | 0 | emitError(XMLErrs::NoEmptyStrNamespace, attrName); |
1574 | 0 | else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0) |
1575 | 0 | emitError(XMLErrs::NoEmptyStrNamespace, attrName); |
1576 | 0 | } |
1577 | |
|
1578 | 0 | if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName)) |
1579 | 0 | emitError(XMLErrs::NoUseOfxmlnsURI); |
1580 | 0 | else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) { |
1581 | 0 | if (!XMLString::equals(prefPtr, XMLUni::fgXMLString)) |
1582 | 0 | emitError(XMLErrs::XMLURINotMatchXMLPrefix); |
1583 | 0 | } |
1584 | | |
1585 | | // Ok, we have to get the unique id for the attribute value, which is the |
1586 | | // URI that this value should be mapped to. The validator has the |
1587 | | // namespace string pool, so we ask him to find or add this new one. Then |
1588 | | // we ask the element stack to add this prefix to URI Id mapping. |
1589 | 0 | fElemStack.addPrefix |
1590 | 0 | ( |
1591 | 0 | prefPtr |
1592 | 0 | , fURIStringPool->addOrFind(namespaceURI) |
1593 | 0 | ); |
1594 | 0 | } |
1595 | | |
1596 | | void IGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount) |
1597 | 0 | { |
1598 | | // Make an initial pass through the list and find any xmlns attributes or |
1599 | | // schema attributes. |
1600 | | // When we find one, send it off to be used to update the element stack's |
1601 | | // namespace mappings. |
1602 | 0 | for (XMLSize_t index = 0; index < attCount; index++) |
1603 | 0 | { |
1604 | | // each attribute has the prefix:suffix="value" |
1605 | 0 | const KVStringPair* curPair = fRawAttrList->elementAt(index); |
1606 | 0 | const XMLCh* rawPtr = curPair->getKey(); |
1607 | | |
1608 | | // If either the key begins with "xmlns:" or its just plain |
1609 | | // "xmlns", then use it to update the map. |
1610 | 0 | if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6) |
1611 | 0 | || XMLString::equals(rawPtr, XMLUni::fgXMLNSString)) |
1612 | 0 | { |
1613 | 0 | const XMLCh* valuePtr = curPair->getValue(); |
1614 | |
|
1615 | 0 | updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]); |
1616 | | |
1617 | | // if the schema URI is seen in the the valuePtr, set the boolean seeXsi |
1618 | 0 | if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) { |
1619 | 0 | fSeeXsi = true; |
1620 | 0 | } |
1621 | 0 | } |
1622 | 0 | } |
1623 | | |
1624 | | // walk through the list again to deal with "xsi:...." |
1625 | 0 | if (fDoSchema && fSeeXsi) |
1626 | 0 | { |
1627 | 0 | for (XMLSize_t index = 0; index < attCount; index++) |
1628 | 0 | { |
1629 | | // each attribute has the prefix:suffix="value" |
1630 | 0 | const KVStringPair* curPair = fRawAttrList->elementAt(index); |
1631 | 0 | const XMLCh* rawPtr = curPair->getKey(); |
1632 | 0 | const XMLCh* prefPtr = XMLUni::fgZeroLenString; |
1633 | 0 | int colonInd = fRawAttrColonList[index]; |
1634 | |
|
1635 | 0 | if (colonInd != -1) { |
1636 | |
|
1637 | 0 | fURIBuf.set(rawPtr, colonInd); |
1638 | 0 | prefPtr = fURIBuf.getRawBuffer(); |
1639 | 0 | } |
1640 | | |
1641 | | // if schema URI has been seen, scan for the schema location and uri |
1642 | | // and resolve the schema grammar |
1643 | 0 | if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) { |
1644 | |
|
1645 | 0 | const XMLCh* valuePtr = curPair->getValue(); |
1646 | 0 | const XMLCh* suffPtr = &rawPtr[colonInd + 1]; |
1647 | |
|
1648 | 0 | if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION)) |
1649 | 0 | parseSchemaLocation(valuePtr); |
1650 | 0 | else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION)) |
1651 | 0 | resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString); |
1652 | 0 | } |
1653 | 0 | } |
1654 | | |
1655 | | // do it another time, as xsi:type and xsi:nill only work if the schema grammar has been already |
1656 | | // loaded (JIRA XERCESC-1937) |
1657 | 0 | for (XMLSize_t index = 0; index < attCount; index++) |
1658 | 0 | { |
1659 | 0 | const KVStringPair* curPair = fRawAttrList->elementAt(index); |
1660 | 0 | const XMLCh* rawPtr = curPair->getKey(); |
1661 | 0 | const XMLCh* prefPtr = XMLUni::fgZeroLenString; |
1662 | 0 | int colonInd = fRawAttrColonList[index]; |
1663 | |
|
1664 | 0 | if (colonInd != -1) { |
1665 | |
|
1666 | 0 | fURIBuf.set(rawPtr, colonInd); |
1667 | 0 | prefPtr = fURIBuf.getRawBuffer(); |
1668 | 0 | } |
1669 | | |
1670 | | // scan for schema type |
1671 | 0 | if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) { |
1672 | |
|
1673 | 0 | const XMLCh* valuePtr = curPair->getValue(); |
1674 | 0 | const XMLCh* suffPtr = &rawPtr[colonInd + 1]; |
1675 | |
|
1676 | 0 | if(XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE) || |
1677 | 0 | XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)) |
1678 | 0 | { |
1679 | 0 | if (!fValidator || !fValidator->handlesSchema()) |
1680 | 0 | { |
1681 | | // If we are in the DTD mode, try to switch to the Schema |
1682 | | // mode. For that we need to find any XML Schema grammar |
1683 | | // that we can switch to. Such a grammar can only come |
1684 | | // from the cache (if it came from the schemaLocation |
1685 | | // attribute, we would be in the Schema mode already). |
1686 | | // |
1687 | 0 | XMLGrammarPool* pool = fGrammarResolver->getGrammarPool (); |
1688 | 0 | RefHashTableOfEnumerator<Grammar> i = pool->getGrammarEnumerator (); |
1689 | |
|
1690 | 0 | while (i.hasMoreElements ()) |
1691 | 0 | { |
1692 | 0 | Grammar& gr (i.nextElement ()); |
1693 | |
|
1694 | 0 | if (gr.getGrammarType () == Grammar::SchemaGrammarType) |
1695 | 0 | { |
1696 | 0 | switchGrammar (gr.getTargetNamespace ()); |
1697 | 0 | break; |
1698 | 0 | } |
1699 | 0 | } |
1700 | 0 | } |
1701 | |
|
1702 | 0 | if( fValidator && fValidator->handlesSchema() ) |
1703 | 0 | { |
1704 | 0 | if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE)) |
1705 | 0 | { |
1706 | 0 | XMLBufBid bbXsi(&fBufMgr); |
1707 | 0 | XMLBuffer& fXsiType = bbXsi.getBuffer(); |
1708 | | |
1709 | | // normalize the attribute according to schema whitespace facet |
1710 | 0 | DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME); |
1711 | 0 | normalizeAttRawValue(SchemaSymbols::fgXSI_TYPE, valuePtr, fXsiType); |
1712 | 0 | ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, fXsiType.getRawBuffer(), fXsiType, true); |
1713 | 0 | if (!fXsiType.isEmpty()) { |
1714 | 0 | int colonPos = -1; |
1715 | 0 | unsigned int uriId = resolveQName ( |
1716 | 0 | fXsiType.getRawBuffer() |
1717 | 0 | , fPrefixBuf |
1718 | 0 | , ElemStack::Mode_Element |
1719 | 0 | , colonPos |
1720 | 0 | ); |
1721 | 0 | ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId); |
1722 | 0 | } |
1723 | 0 | } |
1724 | 0 | else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)) |
1725 | 0 | { |
1726 | | // normalize the attribute according to schema whitespace facet |
1727 | 0 | XMLBufBid bbXsi(&fBufMgr); |
1728 | 0 | XMLBuffer& fXsiNil = bbXsi.getBuffer(); |
1729 | |
|
1730 | 0 | DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN); |
1731 | 0 | normalizeAttRawValue(SchemaSymbols::fgATT_NILL, valuePtr, fXsiNil); |
1732 | 0 | ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, fXsiNil.getRawBuffer(), fXsiNil, true); |
1733 | 0 | if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_TRUE)) |
1734 | 0 | ((SchemaValidator*)fValidator)->setNillable(true); |
1735 | 0 | else if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_FALSE)) |
1736 | 0 | ((SchemaValidator*)fValidator)->setNillable(false); |
1737 | 0 | else |
1738 | 0 | emitError(XMLErrs::InvalidAttValue, fXsiNil.getRawBuffer(), valuePtr); |
1739 | 0 | } |
1740 | 0 | } |
1741 | 0 | } |
1742 | 0 | } |
1743 | 0 | } |
1744 | 0 | } |
1745 | |
|
1746 | 0 | } |
1747 | | |
1748 | | void IGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema) |
1749 | 0 | { |
1750 | 0 | XMLCh* locStr = XMLString::replicate(schemaLocationStr, fMemoryManager); |
1751 | 0 | ArrayJanitor<XMLCh> janLoc(locStr, fMemoryManager); |
1752 | |
|
1753 | 0 | processSchemaLocation(locStr); |
1754 | 0 | XMLSize_t size = fLocationPairs->size(); |
1755 | |
|
1756 | 0 | if (size % 2 != 0 ) { |
1757 | 0 | emitError(XMLErrs::BadSchemaLocation); |
1758 | 0 | } else { |
1759 | | // We need a buffer to normalize the attribute value into |
1760 | 0 | XMLBuffer normalBuf(1023, fMemoryManager); |
1761 | 0 | for(XMLSize_t i=0; i<size; i=i+2) { |
1762 | 0 | normalizeAttRawValue(SchemaSymbols::fgXSI_SCHEMALOCATION, fLocationPairs->elementAt(i), normalBuf); |
1763 | 0 | resolveSchemaGrammar(fLocationPairs->elementAt(i+1), normalBuf.getRawBuffer(), ignoreLoadSchema); |
1764 | 0 | } |
1765 | 0 | } |
1766 | 0 | } |
1767 | | |
1768 | 0 | void IGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema) { |
1769 | |
|
1770 | 0 | Grammar* grammar = 0; |
1771 | |
|
1772 | 0 | { |
1773 | 0 | XMLSchemaDescriptionImpl theSchemaDescription(uri, fMemoryManager); |
1774 | 0 | theSchemaDescription.setLocationHints(loc); |
1775 | 0 | grammar = fGrammarResolver->getGrammar(&theSchemaDescription); |
1776 | 0 | } |
1777 | | |
1778 | | // If multi-import is enabled, make sure the existing grammar came |
1779 | | // from the import directive. Otherwise we may end up reloading |
1780 | | // the same schema that came from the external grammar pool. Ideally, |
1781 | | // we would move fSchemaInfoList to XMLGrammarPool so that it survives |
1782 | | // the destruction of the scanner in which case we could rely on the |
1783 | | // same logic we use to weed out duplicate schemas below. |
1784 | | // |
1785 | 0 | if (!grammar || |
1786 | 0 | grammar->getGrammarType() == Grammar::DTDGrammarType || |
1787 | 0 | (getHandleMultipleImports() && |
1788 | 0 | ((XMLSchemaDescription*)grammar->getGrammarDescription())-> |
1789 | 0 | getContextType () == XMLSchemaDescription::CONTEXT_IMPORT)) |
1790 | 0 | { |
1791 | 0 | if (fLoadSchema || ignoreLoadSchema) |
1792 | 0 | { |
1793 | 0 | XSDDOMParser parser(0, fMemoryManager, 0); |
1794 | |
|
1795 | 0 | parser.setValidationScheme(XercesDOMParser::Val_Never); |
1796 | 0 | parser.setDoNamespaces(true); |
1797 | 0 | parser.setUserEntityHandler(fEntityHandler); |
1798 | 0 | parser.setUserErrorReporter(fErrorReporter); |
1799 | | |
1800 | | //Normalize loc |
1801 | 0 | XMLBufBid nnSys(&fBufMgr); |
1802 | 0 | XMLBuffer& normalizedSysId = nnSys.getBuffer(); |
1803 | 0 | XMLString::removeChar(loc, 0xFFFF, normalizedSysId); |
1804 | 0 | const XMLCh* normalizedURI = normalizedSysId.getRawBuffer(); |
1805 | | |
1806 | | // Create a buffer for expanding the system id |
1807 | 0 | XMLBufBid bbSys(&fBufMgr); |
1808 | 0 | XMLBuffer& expSysId = bbSys.getBuffer(); |
1809 | | |
1810 | | // Allow the entity handler to expand the system id if they choose |
1811 | | // to do so. |
1812 | 0 | InputSource* srcToFill = 0; |
1813 | 0 | if (fEntityHandler) |
1814 | 0 | { |
1815 | 0 | if (!fEntityHandler->expandSystemId(normalizedURI, expSysId)) |
1816 | 0 | expSysId.set(normalizedURI); |
1817 | |
|
1818 | 0 | ReaderMgr::LastExtEntityInfo lastInfo; |
1819 | 0 | fReaderMgr.getLastExtEntityInfo(lastInfo); |
1820 | 0 | XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar, |
1821 | 0 | expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId, |
1822 | 0 | &fReaderMgr); |
1823 | 0 | srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier); |
1824 | 0 | } |
1825 | 0 | else |
1826 | 0 | { |
1827 | 0 | expSysId.set(normalizedURI); |
1828 | 0 | } |
1829 | | |
1830 | | // If they didn't create a source via the entity handler, then we |
1831 | | // have to create one on our own. |
1832 | 0 | if (!srcToFill) |
1833 | 0 | { |
1834 | 0 | if (fDisableDefaultEntityResolution) |
1835 | 0 | return; |
1836 | | |
1837 | 0 | ReaderMgr::LastExtEntityInfo lastInfo; |
1838 | 0 | fReaderMgr.getLastExtEntityInfo(lastInfo); |
1839 | |
|
1840 | 0 | XMLURL urlTmp(fMemoryManager); |
1841 | 0 | if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) || |
1842 | 0 | (urlTmp.isRelative())) |
1843 | 0 | { |
1844 | 0 | if (!fStandardUriConformant) |
1845 | 0 | { |
1846 | 0 | XMLBufBid ddSys(&fBufMgr); |
1847 | 0 | XMLBuffer& resolvedSysId = ddSys.getBuffer(); |
1848 | 0 | XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId); |
1849 | |
|
1850 | 0 | srcToFill = new (fMemoryManager) LocalFileInputSource |
1851 | 0 | ( |
1852 | 0 | lastInfo.systemId |
1853 | 0 | , resolvedSysId.getRawBuffer() |
1854 | 0 | , fMemoryManager |
1855 | 0 | ); |
1856 | 0 | } |
1857 | 0 | else |
1858 | 0 | ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); |
1859 | 0 | } |
1860 | 0 | else |
1861 | 0 | { |
1862 | 0 | if (fStandardUriConformant && urlTmp.hasInvalidChar()) |
1863 | 0 | ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); |
1864 | 0 | srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager); |
1865 | 0 | } |
1866 | 0 | } |
1867 | | |
1868 | | // Put a janitor on the input source |
1869 | 0 | Janitor<InputSource> janSrc(srcToFill); |
1870 | | |
1871 | | // Check if this exact schema has already been seen. |
1872 | | // |
1873 | 0 | const XMLCh* sysId = srcToFill->getSystemId(); |
1874 | 0 | unsigned int uriId = (uri && *uri) ? fURIStringPool->addOrFind(uri) : fEmptyNamespaceId; |
1875 | 0 | SchemaInfo* importSchemaInfo = 0; |
1876 | |
|
1877 | 0 | if (fUseCachedGrammar) |
1878 | 0 | importSchemaInfo = fCachedSchemaInfoList->get(sysId, uriId); |
1879 | |
|
1880 | 0 | if (!importSchemaInfo && !fToCacheGrammar) |
1881 | 0 | importSchemaInfo = fSchemaInfoList->get(sysId, uriId); |
1882 | |
|
1883 | 0 | if (importSchemaInfo) |
1884 | 0 | { |
1885 | | // We haven't added any new grammars so it is safe to just |
1886 | | // return. |
1887 | | // |
1888 | 0 | return; |
1889 | 0 | } |
1890 | | |
1891 | | // Should just issue warning if the schema is not found |
1892 | 0 | bool flag = srcToFill->getIssueFatalErrorIfNotFound(); |
1893 | 0 | srcToFill->setIssueFatalErrorIfNotFound(false); |
1894 | |
|
1895 | 0 | parser.parse(*srcToFill); |
1896 | | |
1897 | | // Reset the InputSource |
1898 | 0 | srcToFill->setIssueFatalErrorIfNotFound(flag); |
1899 | |
|
1900 | 0 | if (parser.getSawFatal() && fExitOnFirstFatal) |
1901 | 0 | emitError(XMLErrs::SchemaScanFatalError); |
1902 | |
|
1903 | 0 | DOMDocument* document = parser.getDocument(); //Our Grammar |
1904 | |
|
1905 | 0 | if (document != 0) { |
1906 | |
|
1907 | 0 | DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema |
1908 | 0 | if (root != 0) |
1909 | 0 | { |
1910 | 0 | const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE); |
1911 | 0 | bool newGrammar = false; |
1912 | 0 | if (!XMLString::equals(newUri, uri)) { |
1913 | 0 | if (fValidate || fValScheme == Val_Auto) { |
1914 | 0 | fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri); |
1915 | 0 | } |
1916 | |
|
1917 | 0 | grammar = fGrammarResolver->getGrammar(newUri); |
1918 | 0 | newGrammar = true; |
1919 | 0 | } |
1920 | |
|
1921 | 0 | if (!grammar || |
1922 | 0 | grammar->getGrammarType() == Grammar::DTDGrammarType || |
1923 | 0 | (getHandleMultipleImports() && |
1924 | 0 | ((XMLSchemaDescription*)grammar->getGrammarDescription())-> |
1925 | 0 | getContextType () == XMLSchemaDescription::CONTEXT_IMPORT)) |
1926 | 0 | { |
1927 | | // If we switched namespace URI, recheck the schema info. |
1928 | | // |
1929 | 0 | if (newGrammar) |
1930 | 0 | { |
1931 | 0 | unsigned int newUriId = (newUri && *newUri) ? fURIStringPool->addOrFind(newUri) : fEmptyNamespaceId; |
1932 | |
|
1933 | 0 | if (fUseCachedGrammar) |
1934 | 0 | importSchemaInfo = fCachedSchemaInfoList->get(sysId, newUriId); |
1935 | |
|
1936 | 0 | if (!importSchemaInfo && !fToCacheGrammar) |
1937 | 0 | importSchemaInfo = fSchemaInfoList->get(sysId, newUriId); |
1938 | |
|
1939 | 0 | if (importSchemaInfo) |
1940 | 0 | return; |
1941 | 0 | } |
1942 | | |
1943 | | // Since we have seen a grammar, set our validation flag |
1944 | | // at this point if the validation scheme is auto |
1945 | 0 | if (fValScheme == Val_Auto && !fValidate) { |
1946 | 0 | fValidate = true; |
1947 | 0 | fElemStack.setValidationFlag(fValidate); |
1948 | 0 | } |
1949 | | |
1950 | | // we have seen a schema, so set up the fValidator as fSchemaValidator |
1951 | 0 | if (!fValidator->handlesSchema()) |
1952 | 0 | { |
1953 | 0 | if (fValidatorFromUser) { |
1954 | | // the fValidator is from user |
1955 | 0 | ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); |
1956 | 0 | } |
1957 | 0 | else { |
1958 | 0 | fValidator = fSchemaValidator; |
1959 | 0 | } |
1960 | 0 | } |
1961 | | |
1962 | 0 | bool grammarFound = grammar && |
1963 | 0 | grammar->getGrammarType() == Grammar::SchemaGrammarType; |
1964 | |
|
1965 | 0 | SchemaGrammar* schemaGrammar; |
1966 | |
|
1967 | 0 | if (grammarFound) { |
1968 | 0 | schemaGrammar = (SchemaGrammar*) grammar; |
1969 | 0 | } |
1970 | 0 | else { |
1971 | 0 | schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager); |
1972 | 0 | } |
1973 | |
|
1974 | 0 | XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription(); |
1975 | 0 | gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE); |
1976 | 0 | gramDesc->setLocationHints(sysId); |
1977 | |
|
1978 | 0 | TraverseSchema traverseSchema |
1979 | 0 | ( |
1980 | 0 | root |
1981 | 0 | , fURIStringPool |
1982 | 0 | , schemaGrammar |
1983 | 0 | , fGrammarResolver |
1984 | 0 | , fUseCachedGrammar ? fCachedSchemaInfoList : fSchemaInfoList |
1985 | 0 | , fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList |
1986 | 0 | , this |
1987 | 0 | , sysId |
1988 | 0 | , fEntityHandler |
1989 | 0 | , fErrorReporter |
1990 | 0 | , fMemoryManager |
1991 | 0 | , grammarFound |
1992 | 0 | ); |
1993 | | |
1994 | | // Reset the now invalid schema roots in the collected |
1995 | | // schema info entries. |
1996 | | // |
1997 | 0 | { |
1998 | 0 | RefHash2KeysTableOfEnumerator<SchemaInfo> i ( |
1999 | 0 | fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList); |
2000 | |
|
2001 | 0 | while (i.hasMoreElements ()) |
2002 | 0 | i.nextElement().resetRoot (); |
2003 | 0 | } |
2004 | |
|
2005 | 0 | if (fGrammarType == Grammar::DTDGrammarType) { |
2006 | 0 | fGrammar = schemaGrammar; |
2007 | 0 | fGrammarType = Grammar::SchemaGrammarType; |
2008 | 0 | fValidator->setGrammar(fGrammar); |
2009 | 0 | } |
2010 | |
|
2011 | 0 | if (fValidate) { |
2012 | | // validate the Schema scan so far |
2013 | 0 | fValidator->preContentValidation(false); |
2014 | 0 | } |
2015 | 0 | } |
2016 | 0 | } |
2017 | 0 | } |
2018 | 0 | } |
2019 | 0 | } |
2020 | 0 | else |
2021 | 0 | { |
2022 | | // Since we have seen a grammar, set our validation flag |
2023 | | // at this point if the validation scheme is auto |
2024 | 0 | if (fValScheme == Val_Auto && !fValidate) { |
2025 | 0 | fValidate = true; |
2026 | 0 | fElemStack.setValidationFlag(fValidate); |
2027 | 0 | } |
2028 | | |
2029 | | // we have seen a schema, so set up the fValidator as fSchemaValidator |
2030 | 0 | if (!fValidator->handlesSchema()) |
2031 | 0 | { |
2032 | 0 | if (fValidatorFromUser) { |
2033 | | // the fValidator is from user |
2034 | 0 | ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); |
2035 | 0 | } |
2036 | 0 | else { |
2037 | 0 | fValidator = fSchemaValidator; |
2038 | 0 | } |
2039 | 0 | } |
2040 | | |
2041 | 0 | if (fGrammarType == Grammar::DTDGrammarType) { |
2042 | 0 | fGrammar = grammar; |
2043 | 0 | fGrammarType = Grammar::SchemaGrammarType; |
2044 | 0 | fValidator->setGrammar(fGrammar); |
2045 | 0 | } |
2046 | 0 | } |
2047 | | |
2048 | | // fModel may need updating: |
2049 | 0 | if(getPSVIHandler()) |
2050 | 0 | fModel = fGrammarResolver->getXSModel(); |
2051 | 0 | } |
2052 | | |
2053 | | InputSource* IGXMLScanner::resolveSystemId(const XMLCh* const sysId |
2054 | | ,const XMLCh* const pubId) |
2055 | 0 | { |
2056 | | //Normalize sysId |
2057 | 0 | XMLBufBid nnSys(&fBufMgr); |
2058 | 0 | XMLBuffer& normalizedSysId = nnSys.getBuffer(); |
2059 | 0 | XMLString::removeChar(sysId, 0xFFFF, normalizedSysId); |
2060 | 0 | const XMLCh* normalizedURI = normalizedSysId.getRawBuffer(); |
2061 | | |
2062 | | // Create a buffer for expanding the system id |
2063 | 0 | XMLBufBid bbSys(&fBufMgr); |
2064 | 0 | XMLBuffer& expSysId = bbSys.getBuffer(); |
2065 | | |
2066 | | // Allow the entity handler to expand the system id if they choose |
2067 | | // to do so. |
2068 | 0 | InputSource* srcToFill = 0; |
2069 | 0 | if (fEntityHandler) |
2070 | 0 | { |
2071 | 0 | if (!fEntityHandler->expandSystemId(normalizedURI, expSysId)) |
2072 | 0 | expSysId.set(normalizedURI); |
2073 | |
|
2074 | 0 | ReaderMgr::LastExtEntityInfo lastInfo; |
2075 | 0 | fReaderMgr.getLastExtEntityInfo(lastInfo); |
2076 | 0 | XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity, |
2077 | 0 | expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId, |
2078 | 0 | &fReaderMgr); |
2079 | 0 | srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier); |
2080 | 0 | } |
2081 | 0 | else |
2082 | 0 | { |
2083 | 0 | expSysId.set(normalizedURI); |
2084 | 0 | } |
2085 | | |
2086 | | // If they didn't create a source via the entity handler, then we |
2087 | | // have to create one on our own. |
2088 | 0 | if (!srcToFill) |
2089 | 0 | { |
2090 | 0 | if (fDisableDefaultEntityResolution) |
2091 | 0 | return srcToFill; |
2092 | | |
2093 | 0 | ReaderMgr::LastExtEntityInfo lastInfo; |
2094 | 0 | fReaderMgr.getLastExtEntityInfo(lastInfo); |
2095 | |
|
2096 | 0 | XMLURL urlTmp(fMemoryManager); |
2097 | 0 | if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) || |
2098 | 0 | (urlTmp.isRelative())) |
2099 | 0 | { |
2100 | 0 | if (!fStandardUriConformant) |
2101 | 0 | { |
2102 | 0 | XMLBufBid ddSys(&fBufMgr); |
2103 | 0 | XMLBuffer& resolvedSysId = ddSys.getBuffer(); |
2104 | 0 | XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId); |
2105 | |
|
2106 | 0 | srcToFill = new (fMemoryManager) LocalFileInputSource |
2107 | 0 | ( |
2108 | 0 | lastInfo.systemId |
2109 | 0 | , resolvedSysId.getRawBuffer() |
2110 | 0 | , fMemoryManager |
2111 | 0 | ); |
2112 | 0 | } |
2113 | 0 | else |
2114 | 0 | ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); |
2115 | 0 | } |
2116 | 0 | else |
2117 | 0 | { |
2118 | 0 | if (fStandardUriConformant && urlTmp.hasInvalidChar()) |
2119 | 0 | ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); |
2120 | 0 | srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager); |
2121 | 0 | } |
2122 | 0 | } |
2123 | | |
2124 | 0 | return srcToFill; |
2125 | 0 | } |
2126 | | |
2127 | | |
2128 | | // --------------------------------------------------------------------------- |
2129 | | // IGXMLScanner: Private grammar preparsing methods |
2130 | | // --------------------------------------------------------------------------- |
2131 | | Grammar* IGXMLScanner::loadXMLSchemaGrammar(const InputSource& src, |
2132 | | const bool toCache) |
2133 | 0 | { |
2134 | | // Reset the validators |
2135 | 0 | fSchemaValidator->reset(); |
2136 | 0 | fSchemaValidator->setErrorReporter(fErrorReporter); |
2137 | 0 | fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal); |
2138 | 0 | fSchemaValidator->setGrammarResolver(fGrammarResolver); |
2139 | |
|
2140 | 0 | if (fValidatorFromUser) |
2141 | 0 | fValidator->reset(); |
2142 | |
|
2143 | 0 | if (!fValidator->handlesSchema()) { |
2144 | 0 | if (fValidatorFromUser && fValidate) |
2145 | 0 | ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); |
2146 | 0 | else { |
2147 | 0 | fValidator = fSchemaValidator; |
2148 | 0 | } |
2149 | 0 | } |
2150 | | |
2151 | 0 | XSDDOMParser parser(0, fMemoryManager, 0); |
2152 | |
|
2153 | 0 | parser.setValidationScheme(XercesDOMParser::Val_Never); |
2154 | 0 | parser.setDoNamespaces(true); |
2155 | 0 | parser.setUserEntityHandler(fEntityHandler); |
2156 | 0 | parser.setUserErrorReporter(fErrorReporter); |
2157 | | |
2158 | | // Should just issue warning if the schema is not found |
2159 | 0 | bool flag = src.getIssueFatalErrorIfNotFound(); |
2160 | 0 | ((InputSource&) src).setIssueFatalErrorIfNotFound(false); |
2161 | |
|
2162 | 0 | parser.parse(src); |
2163 | | |
2164 | | // Reset the InputSource |
2165 | 0 | ((InputSource&) src).setIssueFatalErrorIfNotFound(flag); |
2166 | |
|
2167 | 0 | if (parser.getSawFatal() && fExitOnFirstFatal) |
2168 | 0 | emitError(XMLErrs::SchemaScanFatalError); |
2169 | |
|
2170 | 0 | DOMDocument* document = parser.getDocument(); //Our Grammar |
2171 | |
|
2172 | 0 | if (document != 0) { |
2173 | |
|
2174 | 0 | DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema |
2175 | 0 | if (root != 0) |
2176 | 0 | { |
2177 | 0 | const XMLCh* nsUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE); |
2178 | 0 | Grammar* grammar = fGrammarResolver->getGrammar(nsUri); |
2179 | | |
2180 | | // Check if this exact schema has already been seen. |
2181 | | // |
2182 | 0 | const XMLCh* sysId = src.getSystemId(); |
2183 | 0 | SchemaInfo* importSchemaInfo = 0; |
2184 | |
|
2185 | 0 | if (grammar) |
2186 | 0 | { |
2187 | 0 | if (nsUri && *nsUri) |
2188 | 0 | importSchemaInfo = fCachedSchemaInfoList->get(sysId, fURIStringPool->addOrFind(nsUri)); |
2189 | 0 | else |
2190 | 0 | importSchemaInfo = fCachedSchemaInfoList->get(sysId, fEmptyNamespaceId); |
2191 | 0 | } |
2192 | |
|
2193 | 0 | if (!importSchemaInfo) |
2194 | 0 | { |
2195 | 0 | bool grammarFound = grammar && |
2196 | 0 | grammar->getGrammarType() == Grammar::SchemaGrammarType && |
2197 | 0 | getHandleMultipleImports(); |
2198 | |
|
2199 | 0 | SchemaGrammar* schemaGrammar; |
2200 | |
|
2201 | 0 | if (grammarFound) |
2202 | 0 | schemaGrammar = (SchemaGrammar*) grammar; |
2203 | 0 | else |
2204 | 0 | schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager); |
2205 | |
|
2206 | 0 | XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription(); |
2207 | 0 | gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE); |
2208 | 0 | gramDesc->setLocationHints(sysId); |
2209 | |
|
2210 | 0 | TraverseSchema traverseSchema |
2211 | 0 | ( |
2212 | 0 | root |
2213 | 0 | , fURIStringPool |
2214 | 0 | , schemaGrammar |
2215 | 0 | , fGrammarResolver |
2216 | 0 | , fCachedSchemaInfoList |
2217 | 0 | , toCache ? fCachedSchemaInfoList : fSchemaInfoList |
2218 | 0 | , this |
2219 | 0 | , sysId |
2220 | 0 | , fEntityHandler |
2221 | 0 | , fErrorReporter |
2222 | 0 | , fMemoryManager |
2223 | 0 | , grammarFound |
2224 | 0 | ); |
2225 | |
|
2226 | 0 | grammar = schemaGrammar; |
2227 | | |
2228 | | // Reset the now invalid schema roots in the collected |
2229 | | // schema info entries. |
2230 | | // |
2231 | 0 | { |
2232 | 0 | RefHash2KeysTableOfEnumerator<SchemaInfo> i ( |
2233 | 0 | toCache ? fCachedSchemaInfoList : fSchemaInfoList); |
2234 | |
|
2235 | 0 | while (i.hasMoreElements ()) |
2236 | 0 | i.nextElement().resetRoot (); |
2237 | 0 | } |
2238 | 0 | } |
2239 | |
|
2240 | 0 | if (fValidate) { |
2241 | | // validate the Schema scan so far |
2242 | 0 | fValidator->setGrammar(grammar); |
2243 | 0 | fValidator->preContentValidation(false); |
2244 | 0 | } |
2245 | |
|
2246 | 0 | if (toCache) { |
2247 | 0 | fGrammarResolver->cacheGrammars(); |
2248 | 0 | } |
2249 | |
|
2250 | 0 | if(getPSVIHandler()) |
2251 | 0 | fModel = fGrammarResolver->getXSModel(); |
2252 | |
|
2253 | 0 | return grammar; |
2254 | 0 | } |
2255 | 0 | } |
2256 | | |
2257 | 0 | return 0; |
2258 | 0 | } |
2259 | | |
2260 | | |
2261 | | |
2262 | | // --------------------------------------------------------------------------- |
2263 | | // IGXMLScanner: Private parsing methods |
2264 | | // --------------------------------------------------------------------------- |
2265 | | |
2266 | | // This method is called to do a raw scan of an attribute value. It does not |
2267 | | // do normalization (since we don't know their types yet.) It just scans the |
2268 | | // value and does entity expansion. |
2269 | | // |
2270 | | // End of entity's must be dealt with here. During DTD scan, they can come |
2271 | | // from external entities. During content, they can come from any entity. |
2272 | | // We just eat the end of entity and continue with our scan until we come |
2273 | | // to the closing quote. If an unterminated value causes us to go through |
2274 | | // subsequent entities, that will cause errors back in the calling code, |
2275 | | // but there's little we can do about it here. |
2276 | | bool IGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill) |
2277 | 0 | { |
2278 | | // Reset the target buffer |
2279 | 0 | toFill.reset(); |
2280 | | |
2281 | | // Get the next char which must be a single or double quote |
2282 | 0 | XMLCh quoteCh; |
2283 | 0 | if (!fReaderMgr.skipIfQuote(quoteCh)) |
2284 | 0 | return false; |
2285 | | |
2286 | | // We have to get the current reader because we have to ignore closing |
2287 | | // quotes until we hit the same reader again. |
2288 | 0 | const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum(); |
2289 | | |
2290 | | // Loop until we get the attribute value. Note that we use a double |
2291 | | // loop here to avoid the setup/teardown overhead of the exception |
2292 | | // handler on every round. |
2293 | 0 | while (true) |
2294 | 0 | { |
2295 | 0 | try |
2296 | 0 | { |
2297 | 0 | while(true) |
2298 | 0 | { |
2299 | 0 | XMLCh nextCh = fReaderMgr.getNextChar(); |
2300 | |
|
2301 | 0 | if (nextCh != quoteCh) |
2302 | 0 | { |
2303 | 0 | if (nextCh != chAmpersand) |
2304 | 0 | { |
2305 | 0 | if ((nextCh < 0xD800) || (nextCh > 0xDFFF)) |
2306 | 0 | { |
2307 | | // Its got to at least be a valid XML character |
2308 | 0 | if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) |
2309 | 0 | { |
2310 | 0 | if (nextCh == 0) |
2311 | 0 | ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); |
2312 | | |
2313 | 0 | XMLCh tmpBuf[9]; |
2314 | 0 | XMLString::binToText |
2315 | 0 | ( |
2316 | 0 | nextCh |
2317 | 0 | , tmpBuf |
2318 | 0 | , 8 |
2319 | 0 | , 16 |
2320 | 0 | , fMemoryManager |
2321 | 0 | ); |
2322 | 0 | emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); |
2323 | 0 | } |
2324 | 0 | } else // its a surrogate |
2325 | 0 | { |
2326 | | // Deal with surrogate pairs |
2327 | | |
2328 | | // we expect a a leading surrogate. |
2329 | 0 | if (nextCh <= 0xDBFF) |
2330 | 0 | { |
2331 | 0 | toFill.append(nextCh); |
2332 | | |
2333 | | // process the trailing surrogate |
2334 | 0 | nextCh = fReaderMgr.getNextChar(); |
2335 | | |
2336 | | // it should be a trailing surrogate. |
2337 | 0 | if ((nextCh < 0xDC00) || (nextCh > 0xDFFF)) |
2338 | 0 | { |
2339 | 0 | emitError(XMLErrs::Expected2ndSurrogateChar); |
2340 | 0 | } |
2341 | 0 | } else |
2342 | 0 | { |
2343 | | // Its a trailing surrogate, but we are not expecting it |
2344 | 0 | emitError(XMLErrs::Unexpected2ndSurrogateChar); |
2345 | 0 | } |
2346 | 0 | } |
2347 | 0 | } else // its a chAmpersand |
2348 | 0 | { |
2349 | | // Check for an entity ref . We ignore the empty flag in |
2350 | | // this one. |
2351 | |
|
2352 | 0 | bool escaped; |
2353 | 0 | XMLCh firstCh; |
2354 | 0 | XMLCh secondCh |
2355 | 0 | ; |
2356 | | // If it was not returned directly, then jump back up |
2357 | 0 | if (scanEntityRef(true, firstCh, secondCh, escaped) == EntityExp_Returned) |
2358 | 0 | { |
2359 | | // If it was escaped, then put in a 0xFFFF value. This will |
2360 | | // be used later during validation and normalization of the |
2361 | | // value to know that the following character was via an |
2362 | | // escape char. |
2363 | 0 | if (escaped) |
2364 | 0 | toFill.append(0xFFFF); |
2365 | |
|
2366 | 0 | toFill.append(firstCh); |
2367 | 0 | if (secondCh) |
2368 | 0 | toFill.append(secondCh); |
2369 | 0 | } |
2370 | 0 | continue; |
2371 | 0 | } |
2372 | 0 | } else // its a quoteCh |
2373 | 0 | { |
2374 | | // Check for our ending quote. It has to be in the same entity |
2375 | | // as where we started. Quotes in nested entities are ignored. |
2376 | |
|
2377 | 0 | if (curReader == fReaderMgr.getCurrentReaderNum()) |
2378 | 0 | { |
2379 | 0 | return true; |
2380 | 0 | } |
2381 | | |
2382 | | // Watch for spillover into a previous entity |
2383 | 0 | if (curReader > fReaderMgr.getCurrentReaderNum()) |
2384 | 0 | { |
2385 | 0 | emitError(XMLErrs::PartialMarkupInEntity); |
2386 | 0 | return false; |
2387 | 0 | } |
2388 | 0 | } |
2389 | | |
2390 | | // add it to the buffer |
2391 | 0 | toFill.append(nextCh); |
2392 | |
|
2393 | 0 | } |
2394 | 0 | } |
2395 | 0 | catch(const EndOfEntityException&) |
2396 | 0 | { |
2397 | | // Just eat it and continue. |
2398 | 0 | } |
2399 | 0 | } |
2400 | 0 | return true; |
2401 | 0 | } |
2402 | | |
2403 | | |
2404 | | bool IGXMLScanner::scanAttValue( const XMLAttDef* const attDef |
2405 | | , const XMLCh* const attrName |
2406 | | , XMLBuffer& toFill) |
2407 | 1.43M | { |
2408 | 1.43M | enum States |
2409 | 1.43M | { |
2410 | 1.43M | InWhitespace |
2411 | 1.43M | , InContent |
2412 | 1.43M | }; |
2413 | | |
2414 | | // Get the type and name |
2415 | 1.43M | const XMLAttDef::AttTypes type = (attDef) |
2416 | 1.43M | ?attDef->getType() |
2417 | 1.43M | :XMLAttDef::CData; |
2418 | | |
2419 | | // Reset the target buffer |
2420 | 1.43M | toFill.reset(); |
2421 | | |
2422 | | // Get the next char which must be a single or double quote |
2423 | 1.43M | XMLCh quoteCh; |
2424 | 1.43M | if (!fReaderMgr.skipIfQuote(quoteCh)) |
2425 | 125 | return false; |
2426 | | |
2427 | | // We have to get the current reader because we have to ignore closing |
2428 | | // quotes until we hit the same reader again. |
2429 | 1.43M | const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum(); |
2430 | | |
2431 | | // check to see if it's a tokenized type that is declared externally |
2432 | 1.43M | bool isAttTokenizedExternal = (attDef) |
2433 | 1.43M | ?attDef->isExternal() && (type == XMLAttDef::ID || |
2434 | 503k | type == XMLAttDef::IDRef || |
2435 | 502k | type == XMLAttDef::IDRefs || |
2436 | 202 | type == XMLAttDef::Entity || |
2437 | 202 | type == XMLAttDef::Entities || |
2438 | 202 | type == XMLAttDef::NmToken || |
2439 | 202 | type == XMLAttDef::NmTokens) |
2440 | 1.43M | :false; |
2441 | | |
2442 | | // Loop until we get the attribute value. Note that we use a double |
2443 | | // loop here to avoid the setup/teardown overhead of the exception |
2444 | | // handler on every round. |
2445 | 1.43M | XMLCh nextCh; |
2446 | 1.43M | XMLCh secondCh = 0; |
2447 | 1.43M | States curState = InContent; |
2448 | 1.43M | bool firstNonWS = false; |
2449 | 1.43M | bool gotLeadingSurrogate = false; |
2450 | 1.43M | bool escaped; |
2451 | 1.43M | while (true) |
2452 | 1.43M | { |
2453 | 1.43M | try |
2454 | 1.43M | { |
2455 | 99.1M | while(true) |
2456 | 99.1M | { |
2457 | 99.1M | nextCh = fReaderMgr.getNextChar(); |
2458 | | |
2459 | 99.1M | if (!nextCh) |
2460 | 267 | ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); |
2461 | | |
2462 | | // Check for our ending quote in the same entity |
2463 | 99.1M | if (nextCh == quoteCh) |
2464 | 1.47M | { |
2465 | 1.47M | if (curReader == fReaderMgr.getCurrentReaderNum()) |
2466 | 1.43M | return true; |
2467 | | |
2468 | | // Watch for spillover into a previous entity |
2469 | 37.5k | if (curReader > fReaderMgr.getCurrentReaderNum()) |
2470 | 2 | { |
2471 | 2 | emitError(XMLErrs::PartialMarkupInEntity); |
2472 | 2 | return false; |
2473 | 2 | } |
2474 | 37.5k | } |
2475 | | |
2476 | | // Check for an entity ref now, before we let it affect our |
2477 | | // whitespace normalization logic below. We ignore the empty flag |
2478 | | // in this one. |
2479 | 97.6M | escaped = false; |
2480 | 97.6M | if (nextCh == chAmpersand) |
2481 | 120k | { |
2482 | 120k | if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned) |
2483 | 98.4k | { |
2484 | 98.4k | gotLeadingSurrogate = false; |
2485 | 98.4k | continue; |
2486 | 98.4k | } |
2487 | 120k | } |
2488 | 97.5M | else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) |
2489 | 12.5k | { |
2490 | | // Deal with surrogate pairs |
2491 | | // Its a leading surrogate. If we already got one, then |
2492 | | // issue an error, else set leading flag to make sure that |
2493 | | // we look for a trailing next time. |
2494 | 12.5k | if (gotLeadingSurrogate) |
2495 | 4 | emitError(XMLErrs::Expected2ndSurrogateChar); |
2496 | 12.5k | else |
2497 | 12.5k | gotLeadingSurrogate = true; |
2498 | 12.5k | } |
2499 | 97.5M | else |
2500 | 97.5M | { |
2501 | | // If its a trailing surrogate, make sure that we are |
2502 | | // prepared for that. Else, its just a regular char so make |
2503 | | // sure that we were not expected a trailing surrogate. |
2504 | 97.5M | if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) |
2505 | 12.5k | { |
2506 | | // Its trailing, so make sure we were expecting it |
2507 | 12.5k | if (!gotLeadingSurrogate) |
2508 | 4 | emitError(XMLErrs::Unexpected2ndSurrogateChar); |
2509 | 12.5k | } |
2510 | 97.5M | else |
2511 | 97.5M | { |
2512 | | // Its just a char, so make sure we were not expecting a |
2513 | | // trailing surrogate. |
2514 | 97.5M | if (gotLeadingSurrogate) |
2515 | 2 | emitError(XMLErrs::Expected2ndSurrogateChar); |
2516 | | |
2517 | | // Its got to at least be a valid XML character |
2518 | 97.5M | if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) |
2519 | 26 | { |
2520 | 26 | XMLCh tmpBuf[9]; |
2521 | 26 | XMLString::binToText |
2522 | 26 | ( |
2523 | 26 | nextCh |
2524 | 26 | , tmpBuf |
2525 | 26 | , 8 |
2526 | 26 | , 16 |
2527 | 26 | , fMemoryManager |
2528 | 26 | ); |
2529 | 26 | emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); |
2530 | 26 | } |
2531 | 97.5M | } |
2532 | 97.5M | gotLeadingSurrogate = false; |
2533 | 97.5M | } |
2534 | | |
2535 | | // If its not escaped, then make sure its not a < character, which |
2536 | | // is not allowed in attribute values. |
2537 | 97.5M | if (!escaped && (nextCh == chOpenAngle)) |
2538 | 36 | emitError(XMLErrs::BracketInAttrValue, attrName); |
2539 | | |
2540 | | // If the attribute is a CDATA type we do simple replacement of |
2541 | | // tabs and new lines with spaces, if the character is not escaped |
2542 | | // by way of a char ref. |
2543 | | // |
2544 | | // Otherwise, we do the standard non-CDATA normalization of |
2545 | | // compressing whitespace to single spaces and getting rid of leading |
2546 | | // and trailing whitespace. |
2547 | 97.5M | if (type == XMLAttDef::CData) |
2548 | 44.8M | { |
2549 | 44.8M | if (!escaped) |
2550 | 44.8M | { |
2551 | 44.8M | if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D)) |
2552 | 1.11M | { |
2553 | | // Check Validity Constraint for Standalone document declaration |
2554 | | // XML 1.0, Section 2.9 |
2555 | 1.11M | if (fStandalone && fValidate && isAttTokenizedExternal) |
2556 | 0 | { |
2557 | | // Can't have a standalone document declaration of "yes" if attribute |
2558 | | // values are subject to normalisation |
2559 | 0 | fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); |
2560 | 0 | } |
2561 | 1.11M | nextCh = chSpace; |
2562 | 1.11M | } |
2563 | 44.8M | } |
2564 | 44.8M | } |
2565 | 52.7M | else |
2566 | 52.7M | { |
2567 | 52.7M | if (curState == InWhitespace) |
2568 | 539k | { |
2569 | 539k | if ((escaped && nextCh != chSpace) || !fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) |
2570 | 462k | { |
2571 | 462k | if (firstNonWS) |
2572 | 456k | toFill.append(chSpace); |
2573 | 462k | curState = InContent; |
2574 | 462k | firstNonWS = true; |
2575 | 462k | } |
2576 | 76.7k | else |
2577 | 76.7k | { |
2578 | 76.7k | continue; |
2579 | 76.7k | } |
2580 | 539k | } |
2581 | 52.2M | else if (curState == InContent) |
2582 | 52.2M | { |
2583 | 52.2M | if ((nextCh == chSpace) || |
2584 | 52.0M | (fReaderMgr.getCurrentReader()->isWhitespace(nextCh) && !escaped)) |
2585 | 468k | { |
2586 | 468k | curState = InWhitespace; |
2587 | | |
2588 | | // Check Validity Constraint for Standalone document declaration |
2589 | | // XML 1.0, Section 2.9 |
2590 | 468k | if (fStandalone && fValidate && isAttTokenizedExternal) |
2591 | 6.06k | { |
2592 | 6.06k | if (!firstNonWS || (nextCh != chSpace && fReaderMgr.lookingAtSpace())) |
2593 | 2.66k | { |
2594 | | // Can't have a standalone document declaration of "yes" if attribute |
2595 | | // values are subject to normalisation |
2596 | 2.66k | fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); |
2597 | 2.66k | } |
2598 | 6.06k | } |
2599 | 468k | continue; |
2600 | 468k | } |
2601 | 51.7M | firstNonWS = true; |
2602 | 51.7M | } |
2603 | 52.7M | } |
2604 | | |
2605 | | // Else add it to the buffer |
2606 | 97.0M | toFill.append(nextCh); |
2607 | | |
2608 | 97.0M | if (secondCh) |
2609 | 4.93k | { |
2610 | 4.93k | toFill.append(secondCh); |
2611 | 4.93k | secondCh=0; |
2612 | 4.93k | } |
2613 | 97.0M | } |
2614 | 1.43M | } |
2615 | 1.43M | catch(const EndOfEntityException&) |
2616 | 1.43M | { |
2617 | | // Just eat it and continue. |
2618 | 0 | gotLeadingSurrogate = false; |
2619 | 0 | escaped = false; |
2620 | 0 | } |
2621 | 1.43M | } |
2622 | 0 | return true; |
2623 | 1.43M | } |
2624 | | |
2625 | | |
2626 | | // This method scans a CDATA section. It collects the character into one |
2627 | | // of the temp buffers and calls the document handler, if any, with the |
2628 | | // characters. It assumes that the <![CDATA string has been scanned before |
2629 | | // this call. |
2630 | | void IGXMLScanner::scanCDSection() |
2631 | 65.5k | { |
2632 | 65.5k | static const XMLCh CDataClose[] = |
2633 | 65.5k | { |
2634 | 65.5k | chCloseSquare, chCloseAngle, chNull |
2635 | 65.5k | }; |
2636 | | |
2637 | | // The next character should be the opening square bracket. If not |
2638 | | // issue an error, but then try to recover by skipping any whitespace |
2639 | | // and checking again. |
2640 | 65.5k | if (!fReaderMgr.skippedChar(chOpenSquare)) |
2641 | 1 | { |
2642 | 1 | emitError(XMLErrs::ExpectedOpenSquareBracket); |
2643 | 1 | fReaderMgr.skipPastSpaces(); |
2644 | | |
2645 | | // If we still don't find it, then give up, else keep going |
2646 | 1 | if (!fReaderMgr.skippedChar(chOpenSquare)) |
2647 | 0 | return; |
2648 | 1 | } |
2649 | | |
2650 | | // Get a buffer for this |
2651 | 65.5k | XMLBufBid bbCData(&fBufMgr); |
2652 | | |
2653 | | // We just scan forward until we hit the end of CDATA section sequence. |
2654 | | // CDATA is effectively a big escape mechanism so we don't treat markup |
2655 | | // characters specially here. |
2656 | 65.5k | bool emittedError = false; |
2657 | 65.5k | bool gotLeadingSurrogate = false; |
2658 | 65.5k | const ElemStack::StackElem* topElem = fElemStack.topElement(); |
2659 | | |
2660 | | // Get the character data opts for the current element |
2661 | 65.5k | XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData; |
2662 | 65.5k | if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType) |
2663 | 0 | { |
2664 | | // And see if the current element is a 'Children' style content model |
2665 | 0 | ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); |
2666 | 0 | if(currType) |
2667 | 0 | { |
2668 | 0 | SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType(); |
2669 | 0 | if(modelType == SchemaElementDecl::Children || |
2670 | 0 | modelType == SchemaElementDecl::ElementOnlyEmpty) |
2671 | 0 | charOpts = XMLElementDecl::SpacesOk; |
2672 | 0 | else if(modelType == SchemaElementDecl::Empty) |
2673 | 0 | charOpts = XMLElementDecl::NoCharData; |
2674 | 0 | } |
2675 | 0 | } else // DTD grammar |
2676 | 65.5k | charOpts = topElem->fThisElement->getCharDataOpts(); |
2677 | | |
2678 | 3.52M | while (true) |
2679 | 3.52M | { |
2680 | 3.52M | const XMLCh nextCh = fReaderMgr.getNextChar(); |
2681 | | |
2682 | | // Watch for unexpected end of file |
2683 | 3.52M | if (!nextCh) |
2684 | 92 | { |
2685 | 92 | emitError(XMLErrs::UnterminatedCDATASection); |
2686 | 92 | ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); |
2687 | 92 | } |
2688 | | |
2689 | 3.52M | if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))) |
2690 | 522 | { |
2691 | | // This document is standalone; this ignorable CDATA whitespace is forbidden. |
2692 | | // XML 1.0, Section 2.9 |
2693 | | // And see if the current element is a 'Children' style content model |
2694 | 522 | if (topElem->fThisElement->isExternal()) { |
2695 | | |
2696 | 363 | if (charOpts == XMLElementDecl::SpacesOk) // Element Content |
2697 | 0 | { |
2698 | | // Error - standalone should have a value of "no" as whitespace detected in an |
2699 | | // element type with element content whose element declaration was external |
2700 | 0 | fValidator->emitError(XMLValid::NoWSForStandalone); |
2701 | 0 | if(fGrammarType == Grammar::SchemaGrammarType) |
2702 | 0 | { |
2703 | 0 | if (getPSVIHandler()) |
2704 | 0 | { |
2705 | | // REVISIT: |
2706 | | // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); |
2707 | 0 | } |
2708 | 0 | } |
2709 | 0 | } |
2710 | 363 | } |
2711 | 522 | } |
2712 | | |
2713 | | // If this is a close square bracket it could be our closing |
2714 | | // sequence. |
2715 | 3.52M | if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose)) |
2716 | 65.4k | { |
2717 | | // make sure we were not expecting a trailing surrogate. |
2718 | 65.4k | if (gotLeadingSurrogate) |
2719 | 1 | emitError(XMLErrs::Expected2ndSurrogateChar); |
2720 | | |
2721 | 65.4k | if (fGrammarType == Grammar::SchemaGrammarType) { |
2722 | |
|
2723 | 0 | XMLSize_t xsLen = bbCData.getLen(); |
2724 | 0 | const XMLCh* xsNormalized = bbCData.getRawBuffer(); |
2725 | 0 | DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); |
2726 | 0 | if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) |
2727 | 0 | { |
2728 | | // normalize the character according to schema whitespace facet |
2729 | 0 | ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf); |
2730 | 0 | xsNormalized = fWSNormalizeBuf.getRawBuffer(); |
2731 | 0 | xsLen = fWSNormalizeBuf.getLen(); |
2732 | 0 | if (fNormalizeData && fValidate) { |
2733 | 0 | bbCData.set(xsNormalized); |
2734 | 0 | } |
2735 | 0 | } |
2736 | |
|
2737 | 0 | if (fValidate) { |
2738 | | |
2739 | | // tell the schema validation about the character data for checkContent later |
2740 | 0 | ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized); |
2741 | |
|
2742 | 0 | if (charOpts != XMLElementDecl::AllCharData) |
2743 | 0 | { |
2744 | | // They definitely cannot handle any type of char data |
2745 | 0 | fValidator->emitError(XMLValid::NoCharDataInCM); |
2746 | 0 | if (getPSVIHandler()) |
2747 | 0 | { |
2748 | | // REVISIT: |
2749 | | // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); |
2750 | 0 | } |
2751 | 0 | } |
2752 | 0 | } |
2753 | | |
2754 | | // call all active identity constraints |
2755 | 0 | if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) { |
2756 | 0 | fContent.append(xsNormalized, xsLen); |
2757 | 0 | } |
2758 | 0 | } |
2759 | 65.4k | else { |
2760 | 65.4k | if (fValidate) { |
2761 | | |
2762 | 65.2k | if (charOpts != XMLElementDecl::AllCharData) |
2763 | 595 | { |
2764 | | // They definitely cannot handle any type of char data |
2765 | 595 | fValidator->emitError(XMLValid::NoCharDataInCM); |
2766 | 595 | } |
2767 | 65.2k | } |
2768 | 65.4k | } |
2769 | | |
2770 | | // If we have a doc handler, call it |
2771 | 65.4k | if (fDocHandler) |
2772 | 0 | { |
2773 | 0 | fDocHandler->docCharacters( |
2774 | 0 | bbCData.getRawBuffer(), bbCData.getLen(), true |
2775 | 0 | ); |
2776 | 0 | } |
2777 | | |
2778 | | // And we are done |
2779 | 65.4k | break; |
2780 | 65.4k | } |
2781 | | |
2782 | | // Make sure its a valid character. But if we've emitted an error |
2783 | | // already, don't bother with the overhead since we've already told |
2784 | | // them about it. |
2785 | 3.45M | if (!emittedError) |
2786 | 3.45M | { |
2787 | | // Deal with surrogate pairs |
2788 | 3.45M | if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) |
2789 | 61.9k | { |
2790 | | // Its a leading surrogate. If we already got one, then |
2791 | | // issue an error, else set leading flag to make sure that |
2792 | | // we look for a trailing next time. |
2793 | 61.9k | if (gotLeadingSurrogate) |
2794 | 1 | emitError(XMLErrs::Expected2ndSurrogateChar); |
2795 | 61.9k | else |
2796 | 61.9k | gotLeadingSurrogate = true; |
2797 | 61.9k | } |
2798 | 3.39M | else |
2799 | 3.39M | { |
2800 | | // If its a trailing surrogate, make sure that we are |
2801 | | // prepared for that. Else, its just a regular char so make |
2802 | | // sure that we were not expected a trailing surrogate. |
2803 | 3.39M | if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) |
2804 | 61.8k | { |
2805 | | // Its trailing, so make sure we were expecting it |
2806 | 61.8k | if (!gotLeadingSurrogate) |
2807 | 4 | emitError(XMLErrs::Unexpected2ndSurrogateChar); |
2808 | 61.8k | } |
2809 | 3.33M | else |
2810 | 3.33M | { |
2811 | | // Its just a char, so make sure we were not expecting a |
2812 | | // trailing surrogate. |
2813 | 3.33M | if (gotLeadingSurrogate) |
2814 | 4 | emitError(XMLErrs::Expected2ndSurrogateChar); |
2815 | | |
2816 | | // Its got to at least be a valid XML character |
2817 | 3.33M | else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) |
2818 | 19 | { |
2819 | 19 | XMLCh tmpBuf[9]; |
2820 | 19 | XMLString::binToText |
2821 | 19 | ( |
2822 | 19 | nextCh |
2823 | 19 | , tmpBuf |
2824 | 19 | , 8 |
2825 | 19 | , 16 |
2826 | 19 | , fMemoryManager |
2827 | 19 | ); |
2828 | 19 | emitError(XMLErrs::InvalidCharacter, tmpBuf); |
2829 | 19 | emittedError = true; |
2830 | 19 | } |
2831 | 3.33M | } |
2832 | 3.39M | gotLeadingSurrogate = false; |
2833 | 3.39M | } |
2834 | 3.45M | } |
2835 | | |
2836 | | // Add it to the buffer |
2837 | 3.45M | bbCData.append(nextCh); |
2838 | 3.45M | } |
2839 | 65.5k | } |
2840 | | |
2841 | | |
2842 | | void IGXMLScanner::scanCharData(XMLBuffer& toUse) |
2843 | 2.61M | { |
2844 | | // We have to watch for the stupid ]]> sequence, which is illegal in |
2845 | | // character data. So this is a little state machine that handles that. |
2846 | 2.61M | enum States |
2847 | 2.61M | { |
2848 | 2.61M | State_Waiting |
2849 | 2.61M | , State_GotOne |
2850 | 2.61M | , State_GotTwo |
2851 | 2.61M | }; |
2852 | | |
2853 | | // Reset the buffer before we start |
2854 | 2.61M | toUse.reset(); |
2855 | | |
2856 | | // Turn on the 'throw at end' flag of the reader manager |
2857 | 2.61M | ThrowEOEJanitor jan(&fReaderMgr, true); |
2858 | | |
2859 | | // In order to be more efficient we have to use kind of a deeply nested |
2860 | | // set of blocks here. The outer block puts on a try and catches end of |
2861 | | // entity exceptions. The inner loop is the per-character loop. If we |
2862 | | // put the try inside the inner loop, it would work but would require |
2863 | | // the exception handling code setup/teardown code to be invoked for |
2864 | | // each character. |
2865 | 2.61M | XMLCh nextCh; |
2866 | 2.61M | XMLCh secondCh = 0; |
2867 | 2.61M | States curState = State_Waiting; |
2868 | 2.61M | bool escaped = false; |
2869 | 2.61M | bool gotLeadingSurrogate = false; |
2870 | 2.61M | bool notDone = true; |
2871 | 5.22M | while (notDone) |
2872 | 2.61M | { |
2873 | 2.61M | try |
2874 | 2.61M | { |
2875 | 7.11M | while (true) |
2876 | 7.11M | { |
2877 | | // Eat through as many plain content characters as possible without |
2878 | | // needing special handling. Moving most content characters here, |
2879 | | // in this one call, rather than running the overall loop once |
2880 | | // per content character, is a speed optimization. |
2881 | 7.11M | if (curState == State_Waiting && !gotLeadingSurrogate) |
2882 | 7.00M | { |
2883 | 7.00M | fReaderMgr.movePlainContentChars(toUse); |
2884 | 7.00M | } |
2885 | | |
2886 | | // Try to get another char from the source |
2887 | | // The code from here on down covers all contengencies, |
2888 | 7.11M | if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) |
2889 | 2.61M | { |
2890 | | // If we were waiting for a trailing surrogate, its an error |
2891 | 2.61M | if (gotLeadingSurrogate) |
2892 | 2 | emitError(XMLErrs::Expected2ndSurrogateChar); |
2893 | | |
2894 | 2.61M | notDone = false; |
2895 | 2.61M | break; |
2896 | 2.61M | } |
2897 | | |
2898 | | // Watch for a reference. Note that the escapement mechanism |
2899 | | // is ignored in this content. |
2900 | 4.50M | escaped = false; |
2901 | 4.50M | if (nextCh == chAmpersand) |
2902 | 518k | { |
2903 | 518k | sendCharData(toUse); |
2904 | | |
2905 | | // Turn off the throwing at the end of entity during this |
2906 | 518k | ThrowEOEJanitor jan(&fReaderMgr, false); |
2907 | | |
2908 | 518k | if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned) |
2909 | 401k | { |
2910 | 401k | gotLeadingSurrogate = false; |
2911 | 401k | continue; |
2912 | 401k | } |
2913 | 117k | else |
2914 | 117k | { |
2915 | 117k | if (escaped && !fElemStack.isEmpty()) |
2916 | 116k | fElemStack.setReferenceEscaped(); |
2917 | 117k | } |
2918 | 518k | } |
2919 | 3.98M | else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) |
2920 | 8.97k | { |
2921 | | // Deal with surrogate pairs |
2922 | | // Its a leading surrogate. If we already got one, then |
2923 | | // issue an error, else set leading flag to make sure that |
2924 | | // we look for a trailing next time. |
2925 | 8.97k | if (gotLeadingSurrogate) |
2926 | 1 | emitError(XMLErrs::Expected2ndSurrogateChar); |
2927 | 8.97k | else |
2928 | 8.97k | gotLeadingSurrogate = true; |
2929 | 8.97k | } |
2930 | 3.97M | else |
2931 | 3.97M | { |
2932 | | // If its a trailing surrogate, make sure that we are |
2933 | | // prepared for that. Else, its just a regular char so make |
2934 | | // sure that we were not expected a trailing surrogate. |
2935 | 3.97M | if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) |
2936 | 8.97k | { |
2937 | | // Its trailing, so make sure we were expecting it |
2938 | 8.97k | if (!gotLeadingSurrogate) |
2939 | 9 | emitError(XMLErrs::Unexpected2ndSurrogateChar); |
2940 | 8.97k | } |
2941 | 3.96M | else |
2942 | 3.96M | { |
2943 | | // Its just a char, so make sure we were not expecting a |
2944 | | // trailing surrogate. |
2945 | 3.96M | if (gotLeadingSurrogate) |
2946 | 9 | emitError(XMLErrs::Expected2ndSurrogateChar); |
2947 | | |
2948 | | // Make sure the returned char is a valid XML char |
2949 | 3.96M | if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) |
2950 | 96 | { |
2951 | 96 | XMLCh tmpBuf[9]; |
2952 | 96 | XMLString::binToText |
2953 | 96 | ( |
2954 | 96 | nextCh |
2955 | 96 | , tmpBuf |
2956 | 96 | , 8 |
2957 | 96 | , 16 |
2958 | 96 | , fMemoryManager |
2959 | 96 | ); |
2960 | 96 | emitError(XMLErrs::InvalidCharacter, tmpBuf); |
2961 | 96 | } |
2962 | 3.96M | } |
2963 | 3.97M | gotLeadingSurrogate = false; |
2964 | 3.97M | } |
2965 | | |
2966 | | // Keep the state machine up to date |
2967 | 4.09M | if (!escaped) |
2968 | 3.98M | { |
2969 | 3.98M | if (nextCh == chCloseSquare) |
2970 | 101k | { |
2971 | 101k | if (curState == State_Waiting) |
2972 | 84.5k | curState = State_GotOne; |
2973 | 17.0k | else if (curState == State_GotOne) |
2974 | 1.53k | curState = State_GotTwo; |
2975 | 101k | } |
2976 | 3.88M | else if (nextCh == chCloseAngle) |
2977 | 8.99k | { |
2978 | 8.99k | if (curState == State_GotTwo) |
2979 | 3 | emitError(XMLErrs::BadSequenceInCharData); |
2980 | 8.99k | curState = State_Waiting; |
2981 | 8.99k | } |
2982 | 3.87M | else |
2983 | 3.87M | { |
2984 | 3.87M | curState = State_Waiting; |
2985 | 3.87M | } |
2986 | 3.98M | } |
2987 | 117k | else |
2988 | 117k | { |
2989 | 117k | curState = State_Waiting; |
2990 | 117k | } |
2991 | | |
2992 | | // Add this char to the buffer |
2993 | 4.09M | toUse.append(nextCh); |
2994 | | |
2995 | 4.09M | if (secondCh) |
2996 | 948 | { |
2997 | 948 | toUse.append(secondCh); |
2998 | 948 | secondCh=0; |
2999 | 948 | } |
3000 | 4.09M | } |
3001 | 2.61M | } |
3002 | 2.61M | catch(const EndOfEntityException& toCatch) |
3003 | 2.61M | { |
3004 | | // Some entity ended, so we have to send any accumulated |
3005 | | // chars and send an end of entity event. |
3006 | 0 | sendCharData(toUse); |
3007 | 0 | gotLeadingSurrogate = false; |
3008 | |
|
3009 | 0 | if (fDocHandler) |
3010 | 0 | fDocHandler->endEntityReference(toCatch.getEntity()); |
3011 | 0 | } |
3012 | 2.61M | } |
3013 | | |
3014 | | // Check the validity constraints as per XML 1.0 Section 2.9 |
3015 | 2.61M | if (fValidate && fStandalone) |
3016 | 178k | { |
3017 | | // See if the text contains whitespace |
3018 | | // Get the raw data we need for the callback |
3019 | 178k | const XMLCh* rawBuf = toUse.getRawBuffer(); |
3020 | 178k | const XMLSize_t len = toUse.getLen(); |
3021 | 178k | const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len); |
3022 | | |
3023 | 178k | if (isSpaces) |
3024 | 126k | { |
3025 | | // And see if the current element is a 'Children' style content model |
3026 | 126k | const ElemStack::StackElem* topElem = fElemStack.topElement(); |
3027 | | |
3028 | 126k | if (topElem->fThisElement->isExternal()) { |
3029 | | |
3030 | | // Get the character data opts for the current element |
3031 | 66.3k | XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData; |
3032 | 66.3k | if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType) |
3033 | 0 | { |
3034 | | // And see if the current element is a 'Children' style content model |
3035 | 0 | ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); |
3036 | 0 | if(currType) |
3037 | 0 | { |
3038 | 0 | SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType(); |
3039 | 0 | if(modelType == SchemaElementDecl::Children || |
3040 | 0 | modelType == SchemaElementDecl::ElementOnlyEmpty) |
3041 | 0 | charOpts = XMLElementDecl::SpacesOk; |
3042 | 0 | else if(modelType == SchemaElementDecl::Empty) |
3043 | 0 | charOpts = XMLElementDecl::NoCharData; |
3044 | 0 | } |
3045 | 0 | } else // DTD grammar |
3046 | 66.3k | charOpts = topElem->fThisElement->getCharDataOpts(); |
3047 | | |
3048 | 66.3k | if (charOpts == XMLElementDecl::SpacesOk) // => Element Content |
3049 | 0 | { |
3050 | | // Error - standalone should have a value of "no" as whitespace detected in an |
3051 | | // element type with element content whose element declaration was external |
3052 | | // |
3053 | 0 | fValidator->emitError(XMLValid::NoWSForStandalone); |
3054 | 0 | if(fGrammarType == Grammar::SchemaGrammarType) |
3055 | 0 | { |
3056 | 0 | if (getPSVIHandler()) |
3057 | 0 | { |
3058 | | // REVISIT: |
3059 | | // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); |
3060 | 0 | } |
3061 | 0 | } |
3062 | 0 | } |
3063 | 66.3k | } |
3064 | 126k | } |
3065 | 178k | } |
3066 | | // Send any char data that we accumulated into the buffer |
3067 | 2.61M | sendCharData(toUse); |
3068 | 2.61M | } |
3069 | | |
3070 | | |
3071 | | // This method will scan a general/character entity ref. It will either |
3072 | | // expand a char ref and return it directly, or push a reader for a general |
3073 | | // entity. |
3074 | | // |
3075 | | // The return value indicates whether the char parameters hold the value |
3076 | | // or whether the value was pushed as a reader, or that it failed. |
3077 | | // |
3078 | | // The escaped flag tells the caller whether the returned parameter resulted |
3079 | | // from a character reference, which escapes the character in some cases. It |
3080 | | // only makes any difference if the return value indicates the value was |
3081 | | // returned directly. |
3082 | | IGXMLScanner::EntityExpRes |
3083 | | IGXMLScanner::scanEntityRef( const bool inAttVal |
3084 | | , XMLCh& firstCh |
3085 | | , XMLCh& secondCh |
3086 | | , bool& escaped) |
3087 | 639k | { |
3088 | | // Assume no escape |
3089 | 639k | secondCh = 0; |
3090 | 639k | escaped = false; |
3091 | | |
3092 | | // We have to insure that its all in one entity |
3093 | 639k | const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum(); |
3094 | | |
3095 | | // If the next char is a pound, then its a character reference and we |
3096 | | // need to expand it always. |
3097 | 639k | if (fReaderMgr.skippedChar(chPound)) |
3098 | 25.1k | { |
3099 | | // Its a character reference, so scan it and get back the numeric |
3100 | | // value it represents. |
3101 | 25.1k | if (!scanCharRef(firstCh, secondCh)) |
3102 | 0 | return EntityExp_Failed; |
3103 | | |
3104 | 25.1k | escaped = true; |
3105 | | |
3106 | 25.1k | if (curReader != fReaderMgr.getCurrentReaderNum()) |
3107 | 0 | emitError(XMLErrs::PartialMarkupInEntity); |
3108 | | |
3109 | 25.1k | return EntityExp_Returned; |
3110 | 25.1k | } |
3111 | | |
3112 | | // Expand it since its a normal entity ref |
3113 | 614k | XMLBufBid bbName(&fBufMgr); |
3114 | 614k | int colonPosition; |
3115 | 614k | bool validName = fDoNamespaces ? fReaderMgr.getQName(bbName.getBuffer(), &colonPosition) : |
3116 | 614k | fReaderMgr.getName(bbName.getBuffer()); |
3117 | 614k | if (!validName) |
3118 | 149 | { |
3119 | 149 | if (bbName.isEmpty()) |
3120 | 149 | emitError(XMLErrs::ExpectedEntityRefName); |
3121 | 0 | else |
3122 | 0 | emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer()); |
3123 | 149 | return EntityExp_Failed; |
3124 | 149 | } |
3125 | | |
3126 | | // Next char must be a semi-colon. But if its not, just emit |
3127 | | // an error and try to continue. |
3128 | 614k | if (!fReaderMgr.skippedChar(chSemiColon)) |
3129 | 128 | emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer()); |
3130 | | |
3131 | | // Make sure we ended up on the same entity reader as the & char |
3132 | 614k | if (curReader != fReaderMgr.getCurrentReaderNum()) |
3133 | 0 | emitError(XMLErrs::PartialMarkupInEntity); |
3134 | | |
3135 | | // Look up the name in the general entity pool |
3136 | 614k | XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer()); |
3137 | | |
3138 | | // If it does not exist, then obviously an error |
3139 | 614k | if (!decl) |
3140 | 314k | { |
3141 | | // XML 1.0 Section 4.1 |
3142 | | // Well-formedness Constraint for entity not found: |
3143 | | // In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references, |
3144 | | // or a document with "standalone='yes'", for an entity reference that does not occur within the external subset |
3145 | | // or a parameter entity |
3146 | | // |
3147 | | // Else it's Validity Constraint |
3148 | 314k | if (fStandalone || fHasNoDTD) |
3149 | 118 | emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer()); |
3150 | 314k | else { |
3151 | 314k | if (fValidate) |
3152 | 314k | fValidator->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer()); |
3153 | 314k | } |
3154 | | |
3155 | 314k | return EntityExp_Failed; |
3156 | 314k | } |
3157 | | |
3158 | | // XML 1.0 Section 4.1 |
3159 | | // If we are a standalone document, then it has to have been declared |
3160 | | // in the internal subset. |
3161 | 299k | if (fStandalone && !decl->getDeclaredInIntSubset()) |
3162 | 0 | emitError(XMLErrs::IllegalRefInStandalone, bbName.getRawBuffer()); |
3163 | | |
3164 | 299k | if (decl->isExternal()) |
3165 | 5.04k | { |
3166 | | // If its unparsed, then its not valid here |
3167 | 5.04k | if (decl->isUnparsed()) |
3168 | 2 | { |
3169 | 2 | emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer()); |
3170 | 2 | return EntityExp_Failed; |
3171 | 2 | } |
3172 | | |
3173 | | // If we are in an attribute value, then not valid but keep going |
3174 | 5.03k | if (inAttVal) |
3175 | 1 | emitError(XMLErrs::NoExtRefsInAttValue); |
3176 | | |
3177 | | // And now create a reader to read this entity |
3178 | 5.03k | InputSource* srcUsed; |
3179 | 5.03k | XMLReader* reader = fReaderMgr.createReader |
3180 | 5.03k | ( |
3181 | 5.03k | decl->getBaseURI() |
3182 | 5.03k | , decl->getSystemId() |
3183 | 5.03k | , decl->getPublicId() |
3184 | 5.03k | , false |
3185 | 5.03k | , XMLReader::RefFrom_NonLiteral |
3186 | 5.03k | , XMLReader::Type_General |
3187 | 5.03k | , XMLReader::Source_External |
3188 | 5.03k | , srcUsed |
3189 | 5.03k | , fCalculateSrcOfs |
3190 | 5.03k | , fLowWaterMark |
3191 | 5.03k | , fDisableDefaultEntityResolution |
3192 | 5.03k | ); |
3193 | | |
3194 | | // Put a janitor on the source so it gets cleaned up on exit |
3195 | 5.03k | Janitor<InputSource> janSrc(srcUsed); |
3196 | | |
3197 | | // If the creation failed, and its not because the source was empty, |
3198 | | // then emit an error and return. |
3199 | 5.03k | if (!reader) |
3200 | 215 | ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed ? srcUsed->getSystemId() : decl->getSystemId(), fMemoryManager); |
3201 | | |
3202 | | // Push the reader. If its a recursive expansion, then emit an error |
3203 | | // and return an failure. |
3204 | 4.82k | if (!fReaderMgr.pushReader(reader, decl)) |
3205 | 0 | { |
3206 | 0 | emitError(XMLErrs::RecursiveEntity, decl->getName()); |
3207 | 0 | return EntityExp_Failed; |
3208 | 0 | } |
3209 | | |
3210 | | // here's where we need to check if there's a SecurityManager, |
3211 | | // how many entity references we've had |
3212 | 4.82k | if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) { |
3213 | 0 | XMLCh expLimStr[32]; |
3214 | 0 | XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager); |
3215 | 0 | emitError |
3216 | 0 | ( |
3217 | 0 | XMLErrs::EntityExpansionLimitExceeded |
3218 | 0 | , expLimStr |
3219 | 0 | ); |
3220 | | // there seems nothing better to be done than to reset the entity expansion counter |
3221 | 0 | fEntityExpansionCount = 0; |
3222 | 0 | } |
3223 | | |
3224 | | // Do a start entity reference event. |
3225 | | // |
3226 | | // <TBD> For now, we supress them in att values. Later, when |
3227 | | // the stuff is in place to correctly allow DOM to handle them |
3228 | | // we'll turn this back on. |
3229 | 4.82k | if (fDocHandler && !inAttVal) |
3230 | 0 | fDocHandler->startEntityReference(*decl); |
3231 | | |
3232 | | // If it starts with the XML string, then parse a text decl |
3233 | 4.82k | if (checkXMLDecl(true)) |
3234 | 0 | scanXMLDecl(Decl_Text); |
3235 | 4.82k | } |
3236 | 294k | else |
3237 | 294k | { |
3238 | | // If its one of the special char references, then we can return |
3239 | | // it as a character, and its considered escaped. |
3240 | 294k | if (decl->getIsSpecialChar()) |
3241 | 113k | { |
3242 | 113k | firstCh = decl->getValue()[0]; |
3243 | 113k | escaped = true; |
3244 | 113k | return EntityExp_Returned; |
3245 | 113k | } |
3246 | | |
3247 | | // Create a reader over a memory stream over the entity value |
3248 | | // We force it to assume UTF-16 by passing in an encoding |
3249 | | // string. This way it won't both trying to predecode the |
3250 | | // first line, looking for an XML/TextDecl. |
3251 | 181k | XMLReader* valueReader = fReaderMgr.createIntEntReader |
3252 | 181k | ( |
3253 | 181k | decl->getName() |
3254 | 181k | , XMLReader::RefFrom_NonLiteral |
3255 | 181k | , XMLReader::Type_General |
3256 | 181k | , decl->getValue() |
3257 | 181k | , decl->getValueLen() |
3258 | 181k | , false |
3259 | 181k | ); |
3260 | | |
3261 | | // Try to push the entity reader onto the reader manager stack, |
3262 | | // where it will become the subsequent input. If it fails, that |
3263 | | // means the entity is recursive, so issue an error. The reader |
3264 | | // will have just been discarded, but we just keep going. |
3265 | 181k | if (!fReaderMgr.pushReader(valueReader, decl)) |
3266 | 23 | emitError(XMLErrs::RecursiveEntity, decl->getName()); |
3267 | | |
3268 | | // here's where we need to check if there's a SecurityManager, |
3269 | | // how many entity references we've had |
3270 | 181k | if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) { |
3271 | 0 | XMLCh expLimStr[32]; |
3272 | 0 | XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager); |
3273 | 0 | emitError |
3274 | 0 | ( |
3275 | 0 | XMLErrs::EntityExpansionLimitExceeded |
3276 | 0 | , expLimStr |
3277 | 0 | ); |
3278 | 0 | } |
3279 | | |
3280 | | // Do a start entity reference event. |
3281 | | // |
3282 | | // <TBD> For now, we supress them in att values. Later, when |
3283 | | // the stuff is in place to correctly allow DOM to handle them |
3284 | | // we'll turn this back on. |
3285 | 181k | if (fDocHandler && !inAttVal) |
3286 | 0 | fDocHandler->startEntityReference(*decl); |
3287 | | |
3288 | | // If it starts with the XML string, then it's an error |
3289 | 181k | if (checkXMLDecl(true)) { |
3290 | 2 | emitError(XMLErrs::TextDeclNotLegalHere); |
3291 | 2 | fReaderMgr.skipPastChar(chCloseAngle); |
3292 | 2 | } |
3293 | 181k | } |
3294 | 186k | return EntityExp_Pushed; |
3295 | 299k | } |
3296 | | |
3297 | | |
3298 | | bool IGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace) |
3299 | 14.3k | { |
3300 | 14.3k | Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace); |
3301 | | |
3302 | 14.3k | if (!tempGrammar && !fSkipDTDValidation) { |
3303 | | // This is a case where namespaces is on with a DTD grammar. |
3304 | 0 | tempGrammar = fDTDGrammar; |
3305 | 0 | } |
3306 | 14.3k | if (!tempGrammar) { |
3307 | 0 | return false; |
3308 | 0 | } |
3309 | 14.3k | else { |
3310 | | |
3311 | 14.3k | Grammar::GrammarType tempGrammarType = tempGrammar->getGrammarType(); |
3312 | 14.3k | if (tempGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) { |
3313 | 0 | if (fValidatorFromUser) |
3314 | 0 | ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); |
3315 | 0 | else { |
3316 | 0 | fValidator = fSchemaValidator; |
3317 | 0 | } |
3318 | 0 | } |
3319 | 14.3k | else if (tempGrammarType == Grammar::DTDGrammarType) { |
3320 | 14.3k | if (fSkipDTDValidation) { |
3321 | 0 | return false; |
3322 | 0 | } |
3323 | | |
3324 | 14.3k | if (!fValidator->handlesDTD()) { |
3325 | 0 | if (fValidatorFromUser) |
3326 | 0 | ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager); |
3327 | 0 | else { |
3328 | 0 | fValidator = fDTDValidator; |
3329 | 0 | } |
3330 | 0 | } |
3331 | 14.3k | } |
3332 | | |
3333 | 14.3k | fGrammarType = tempGrammarType; |
3334 | 14.3k | fGrammar = tempGrammar; |
3335 | 14.3k | fValidator->setGrammar(fGrammar); |
3336 | 14.3k | return true; |
3337 | 14.3k | } |
3338 | 14.3k | } |
3339 | | |
3340 | | // check if we should skip or lax the validation of the element |
3341 | | // if skip - no validation |
3342 | | // if lax - validate only if the element if found |
3343 | | bool IGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv, |
3344 | | const XMLContentModel* const cm, |
3345 | | const XMLSize_t parentElemDepth) |
3346 | 0 | { |
3347 | 0 | bool skipThisOne = false; |
3348 | 0 | bool laxThisOne = false; |
3349 | 0 | unsigned int elementURI = element->getURI(); |
3350 | 0 | unsigned int currState = fElemState[parentElemDepth]; |
3351 | 0 | unsigned int currLoop = fElemLoopState[parentElemDepth]; |
3352 | |
|
3353 | 0 | if (currState == XMLContentModel::gInvalidTrans) { |
3354 | 0 | return laxThisOne; |
3355 | 0 | } |
3356 | | |
3357 | 0 | SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool); |
3358 | |
|
3359 | 0 | if (cv) { |
3360 | 0 | XMLSize_t i = 0; |
3361 | 0 | XMLSize_t leafCount = cv->getLeafCount(); |
3362 | 0 | unsigned int nextState = 0; |
3363 | |
|
3364 | 0 | for (; i < leafCount; i++) { |
3365 | |
|
3366 | 0 | QName* fElemMap = cv->getLeafNameAt(i); |
3367 | 0 | unsigned int uri = fElemMap->getURI(); |
3368 | 0 | ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i); |
3369 | |
|
3370 | 0 | if (type == ContentSpecNode::Leaf) { |
3371 | 0 | if (((uri == elementURI) |
3372 | 0 | && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart())) |
3373 | 0 | || comparator.isEquivalentTo(element, fElemMap)) { |
3374 | |
|
3375 | 0 | nextState = cm->getNextState(currState, i); |
3376 | |
|
3377 | 0 | if (nextState != XMLContentModel::gInvalidTrans) |
3378 | 0 | break; |
3379 | 0 | } |
3380 | 0 | } else if ((type & 0x0f) == ContentSpecNode::Any) { |
3381 | 0 | nextState = cm->getNextState(currState, i); |
3382 | 0 | if (nextState != XMLContentModel::gInvalidTrans) |
3383 | 0 | break; |
3384 | 0 | } |
3385 | 0 | else if ((type & 0x0f) == ContentSpecNode::Any_Other) { |
3386 | 0 | if (uri != elementURI && elementURI != fEmptyNamespaceId) { |
3387 | 0 | nextState = cm->getNextState(currState, i); |
3388 | 0 | if (nextState != XMLContentModel::gInvalidTrans) |
3389 | 0 | break; |
3390 | 0 | } |
3391 | 0 | } |
3392 | 0 | else if ((type & 0x0f) == ContentSpecNode::Any_NS) { |
3393 | 0 | if (uri == elementURI) { |
3394 | 0 | nextState = cm->getNextState(currState, i); |
3395 | 0 | if (nextState != XMLContentModel::gInvalidTrans) |
3396 | 0 | break; |
3397 | 0 | } |
3398 | 0 | } |
3399 | |
|
3400 | 0 | } // for |
3401 | |
|
3402 | 0 | if (i == leafCount) { // no match |
3403 | 0 | fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans; |
3404 | 0 | fElemLoopState[parentElemDepth] = 0; |
3405 | 0 | return laxThisOne; |
3406 | 0 | } |
3407 | | |
3408 | 0 | unsigned int nextLoop = 0; |
3409 | 0 | if(!cm->handleRepetitions(element, currState, currLoop, nextState, nextLoop, i, &comparator)) { |
3410 | 0 | fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans; |
3411 | 0 | fElemLoopState[parentElemDepth] = 0; |
3412 | 0 | return laxThisOne; |
3413 | 0 | } |
3414 | | |
3415 | 0 | ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i); |
3416 | 0 | if ((type & 0x0f) == ContentSpecNode::Any || |
3417 | 0 | (type & 0x0f) == ContentSpecNode::Any_Other || |
3418 | 0 | (type & 0x0f) == ContentSpecNode::Any_NS) |
3419 | 0 | { |
3420 | 0 | if (type == ContentSpecNode::Any_Skip || |
3421 | 0 | type == ContentSpecNode::Any_NS_Skip || |
3422 | 0 | type == ContentSpecNode::Any_Other_Skip) { |
3423 | 0 | skipThisOne = true; |
3424 | 0 | } |
3425 | 0 | else if (type == ContentSpecNode::Any_Lax || |
3426 | 0 | type == ContentSpecNode::Any_NS_Lax || |
3427 | 0 | type == ContentSpecNode::Any_Other_Lax) { |
3428 | 0 | laxThisOne = true; |
3429 | 0 | } |
3430 | 0 | } |
3431 | 0 | fElemState[parentElemDepth] = nextState; |
3432 | 0 | fElemLoopState[parentElemDepth] = nextLoop; |
3433 | 0 | } // if |
3434 | | |
3435 | 0 | if (skipThisOne) { |
3436 | 0 | fValidate = false; |
3437 | 0 | fElemStack.setValidationFlag(fValidate); |
3438 | 0 | } |
3439 | |
|
3440 | 0 | return laxThisOne; |
3441 | 0 | } |
3442 | | |
3443 | | |
3444 | | // check if there is an AnyAttribute, and if so, see if we should lax or skip |
3445 | | // if skip - no validation |
3446 | | // if lax - validate only if the attribute if found |
3447 | | bool IGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne) |
3448 | 0 | { |
3449 | 0 | XMLAttDef::AttTypes wildCardType = attWildCard->getType(); |
3450 | 0 | bool anyEncountered = false; |
3451 | 0 | skipThisOne = false; |
3452 | 0 | laxThisOne = false; |
3453 | 0 | if (wildCardType == XMLAttDef::Any_Any) |
3454 | 0 | anyEncountered = true; |
3455 | 0 | else if (wildCardType == XMLAttDef::Any_Other) { |
3456 | 0 | if (attWildCard->getAttName()->getURI() != uriId |
3457 | 0 | && uriId != fEmptyNamespaceId) |
3458 | 0 | anyEncountered = true; |
3459 | 0 | } |
3460 | 0 | else if (wildCardType == XMLAttDef::Any_List) { |
3461 | 0 | ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList(); |
3462 | 0 | XMLSize_t listSize = (nameURIList) ? nameURIList->size() : 0; |
3463 | |
|
3464 | 0 | if (listSize) { |
3465 | 0 | for (XMLSize_t i=0; i < listSize; i++) { |
3466 | 0 | if (nameURIList->elementAt(i) == uriId) |
3467 | 0 | anyEncountered = true; |
3468 | 0 | } |
3469 | 0 | } |
3470 | 0 | } |
3471 | |
|
3472 | 0 | if (anyEncountered) { |
3473 | 0 | XMLAttDef::DefAttTypes defType = attWildCard->getDefaultType(); |
3474 | 0 | if (defType == XMLAttDef::ProcessContents_Skip) { |
3475 | | // attribute should just be bypassed, |
3476 | 0 | skipThisOne = true; |
3477 | 0 | } |
3478 | 0 | else if (defType == XMLAttDef::ProcessContents_Lax) { |
3479 | 0 | laxThisOne = true; |
3480 | 0 | } |
3481 | 0 | } |
3482 | |
|
3483 | 0 | return anyEncountered; |
3484 | 0 | } |
3485 | | |
3486 | | inline XMLAttDefList& getAttDefList(bool isSchemaGrammar |
3487 | | , ComplexTypeInfo* currType |
3488 | | , XMLElementDecl* elemDecl) |
3489 | 0 | { |
3490 | 0 | if (isSchemaGrammar && currType) |
3491 | 0 | return currType->getAttDefList(); |
3492 | 0 | else |
3493 | 0 | return elemDecl->getAttDefList(); |
3494 | 0 | } |
3495 | | |
3496 | | XERCES_CPP_NAMESPACE_END |