Coverage Report

Created: 2025-11-13 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/poco/XML/src/ParserEngine.h
Line
Count
Source
1
//
2
// ParserEngine.h
3
//
4
// Library: XML
5
// Package: XML
6
// Module:  ParserEngine
7
//
8
// Definition of the ParseEngine class.
9
//
10
// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
11
// and Contributors.
12
//
13
// SPDX-License-Identifier: BSL-1.0
14
15
16
#ifndef XML_ParserEngine_INCLUDED
17
#define XML_ParserEngine_INCLUDED
18
19
20
#include "Poco/XML/XML.h"
21
#include "Poco/XML/XMLString.h"
22
#include "Poco/XML/XMLStream.h"
23
#include "Poco/SAX/Locator.h"
24
#include "Poco/TextEncoding.h"
25
#include <expat.h>
26
#include <map>
27
#include <vector>
28
29
30
namespace Poco {
31
namespace XML {
32
33
34
class InputSource;
35
class EntityResolver;
36
class DTDHandler;
37
class DeclHandler;
38
class ContentHandler;
39
class LexicalHandler;
40
class ErrorHandler;
41
class NamespaceStrategy;
42
class ContextLocator;
43
44
45
class XML_API ParserEngine: public Locator
46
  /// This class provides an object-oriented, stream-based,
47
  /// low-level interface to the XML Parser Toolkit (expat).
48
  /// It is strongly recommended, that you use the
49
  /// SAX parser classes (which are based on this
50
  /// class) instead of this class, since they provide
51
  /// a standardized, higher-level interface to the parser.
52
{
53
public:
54
  ParserEngine();
55
    /// Creates the parser engine.
56
57
  ParserEngine(const XMLString& encoding);
58
    /// Creates the parser engine and passes the encoding
59
    /// to the underlying parser.
60
61
  ~ParserEngine();
62
    /// Destroys the parser.
63
64
  void setEncoding(const XMLString& encoding);
65
    /// Sets the encoding used by expat. The encoding must be
66
    /// set before parsing begins, otherwise it will be ignored.
67
68
  const XMLString& getEncoding() const;
69
    /// Returns the encoding used by expat.
70
71
  void addEncoding(const XMLString& name, Poco::TextEncoding* pEncoding);
72
    /// Adds an encoding to the parser.
73
74
  void setNamespaceStrategy(NamespaceStrategy* pStrategy);
75
    /// Sets the NamespaceStrategy used by the parser.
76
    /// The parser takes ownership of the strategy object
77
    /// and deletes it when it's no longer needed.
78
    /// The default is NoNamespacesStrategy.
79
80
  NamespaceStrategy* getNamespaceStrategy() const;
81
    /// Returns the NamespaceStrategy currently in use.
82
83
  void setExpandInternalEntities(bool flag = true);
84
    /// Enables/disables expansion of internal entities (enabled by
85
    /// default). If entity expansion is disabled, internal entities
86
    /// are reported via the default handler.
87
    /// Must be set before parsing begins, otherwise it will be
88
    /// ignored.
89
90
  bool getExpandInternalEntities() const;
91
    /// Returns true if internal entities will be expanded automatically,
92
    /// which is the default.
93
94
  void setExternalGeneralEntities(bool flag = true);
95
    /// Enable or disable processing of external general entities.
96
97
  bool getExternalGeneralEntities() const;
98
    /// Returns true if external general entities will be processed; false otherwise.
99
100
  void setExternalParameterEntities(bool flag = true);
101
    /// Enable or disable processing of external parameter entities.
102
103
  bool getExternalParameterEntities() const;
104
    /// Returns true if external parameter entities will be processed; false otherwise.
105
106
  void setEntityResolver(EntityResolver* pResolver);
107
    /// Allow an application to register an entity resolver.
108
109
  EntityResolver* getEntityResolver() const;
110
    /// Return the current entity resolver.
111
112
  void setDTDHandler(DTDHandler* pDTDHandler);
113
    /// Allow an application to register a DTD event handler.
114
115
  DTDHandler* getDTDHandler() const;
116
    /// Return the current DTD handler.
117
118
  void setDeclHandler(DeclHandler* pDeclHandler);
119
    /// Allow an application to register a DTD declarations event handler.
120
121
  DeclHandler* getDeclHandler() const;
122
    /// Return the current DTD declarations handler.
123
124
  void setContentHandler(ContentHandler* pContentHandler);
125
    /// Allow an application to register a content event handler.
126
127
  ContentHandler* getContentHandler() const;
128
    /// Return the current content handler.
129
130
  void setLexicalHandler(LexicalHandler* pLexicalHandler);
131
    /// Allow an application to register a lexical event handler.
132
133
  LexicalHandler* getLexicalHandler() const;
134
    /// Return the current lexical handler.
135
136
  void setErrorHandler(ErrorHandler* pErrorHandler);
137
    /// Allow an application to register an error event handler.
138
139
  ErrorHandler* getErrorHandler() const;
140
    /// Return the current error handler.
141
142
  void setEnablePartialReads(bool flag = true);
143
    /// Enable or disable partial reads from the input source.
144
    ///
145
    /// This is useful for parsing XML from a socket stream for
146
    /// a protocol like XMPP, where basically single elements
147
    /// are read one at a time from the input source's stream, and
148
    /// following elements depend upon responses sent back to
149
    /// the peer.
150
    ///
151
    /// Normally, the parser always reads blocks of PARSE_BUFFER_SIZE
152
    /// at a time, and blocks until a complete block has been read (or
153
    /// the end of the stream has been reached).
154
    /// This allows for efficient parsing of "complete" XML documents,
155
    /// but fails in a case such as XMPP, where only XML fragments
156
    /// are sent at a time.
157
158
  bool getEnablePartialReads() const;
159
    /// Returns true if partial reads are enabled (see
160
    /// setEnablePartialReads()), false otherwise.
161
162
  void setBillionLaughsAttackProtectionMaximumAmplification(float maximumAmplificationFactor);
163
    /// Sets the maximum tolerated amplification factor
164
      /// for protection against Billion Laughs Attacks.
165
      ///
166
      /// The amplification factor is calculated as:
167
      ///     amplification := (direct + indirect) / direct
168
      /// while parsing, whereas:
169
      ///   - direct is the number of bytes read from the primary document in parsing and
170
      ///   - indirect is the number of bytes added by expanding entities and reading of
171
      ///     external DTD files, combined.
172
      ///
173
      /// maximumAmplificationFactor must be non-NaN and greater than or equal to 1.0.
174
    ///
175
    /// Requires an underlying Expat version >= 2.4.0.
176
177
  void setBillionLaughsAttackProtectionActivationThreshold(Poco::UInt64 activationThresholdBytes);
178
    /// Sets number of output bytes (including amplification from entity expansion and reading DTD files)
179
    /// needed to activate protection against Billion Laughs Attacks.
180
    ///
181
    /// Defaults to 8 MiB.
182
    ///
183
    /// Requires an underlying Expat version >= 2.4.0.
184
185
  void parse(InputSource* pInputSource);
186
    /// Parse an XML document from the given InputSource.
187
188
  void parse(const char* pBuffer, std::size_t size);
189
    /// Parses an XML document from the given buffer.
190
191
  // Locator
192
  XMLString getPublicId() const;
193
    /// Return the public identifier for the current document event.
194
195
  XMLString getSystemId() const;
196
    /// Return the system identifier for the current document event.
197
198
  int getLineNumber() const;
199
    /// Return the line number where the current document event ends.
200
201
  int getColumnNumber() const;
202
    /// Return the column number where the current document event ends.
203
204
protected:
205
  void init();
206
    /// initializes expat
207
208
  void parseByteInputStream(XMLByteInputStream& istr);
209
    /// Parses an entity from the given stream.
210
211
  void parseCharInputStream(XMLCharInputStream& istr);
212
    /// Parses an entity from the given stream.
213
214
  std::streamsize readBytes(XMLByteInputStream& istr, char* pBuffer, std::streamsize bufferSize);
215
    /// Reads at most bufferSize bytes from the given stream into the given buffer.
216
217
  std::streamsize readChars(XMLCharInputStream& istr, XMLChar* pBuffer, std::streamsize bufferSize);
218
    /// Reads at most bufferSize chars from the given stream into the given buffer.
219
220
  void handleError(int errorNo);
221
    /// Throws an XMLException with a message corresponding
222
    /// to the given Expat error code.
223
224
  void parseExternal(XML_Parser extParser, InputSource* pInputSource);
225
    /// Parse an XML document from the given InputSource.
226
227
  void parseExternalByteInputStream(XML_Parser extParser, XMLByteInputStream& istr);
228
    /// Parses an external entity from the given stream, with a separate parser.
229
230
  void parseExternalCharInputStream(XML_Parser extParser, XMLCharInputStream& istr);
231
    /// Parses an external entity from the given stream, with a separate parser.
232
233
  void pushContext(XML_Parser parser, InputSource* pInputSource);
234
    /// Pushes a new entry to the context stack.
235
236
  void popContext();
237
    /// Pops the top-most entry from the context stack.
238
239
  void resetContext();
240
    /// Resets and clears the context stack.
241
242
  const Locator& locator() const;
243
    /// Returns a locator denoting the current parse location.
244
245
  // expat handler procedures
246
  static void handleStartElement(void* userData, const XML_Char* name, const XML_Char** atts);
247
  static void handleEndElement(void* userData, const XML_Char* name);
248
  static void handleCharacterData(void* userData, const XML_Char* s, int len);
249
  static void handleProcessingInstruction(void* userData, const XML_Char* target, const XML_Char* data);
250
  static void handleDefault(void* userData, const XML_Char* s, int len);
251
  static void handleUnparsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName);
252
  static void handleNotationDecl(void* userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId);
253
  static int handleExternalEntityRef(XML_Parser parser, const XML_Char* openEntityNames, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId);
254
  static int handleUnknownEncoding(void* encodingHandlerData, const XML_Char* name, XML_Encoding* info);
255
  static void handleComment(void* userData, const XML_Char* data);
256
  static void handleStartCdataSection(void* userData);
257
  static void handleEndCdataSection(void* userData);
258
  static void handleStartNamespaceDecl(void* userData, const XML_Char* prefix, const XML_Char* uri);
259
  static void handleEndNamespaceDecl(void* userData, const XML_Char* prefix);
260
  static void handleStartDoctypeDecl(void* userData, const XML_Char* doctypeName, const XML_Char *systemId, const XML_Char* publicId, int hasInternalSubset);
261
  static void handleEndDoctypeDecl(void* userData);
262
  static void handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength,
263
                               const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName);
264
  static void handleExternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId);
265
  static void handleInternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* replacementText, int replacementTextLength);
266
  static void handleSkippedEntity(void* userData, const XML_Char* entityName, int isParameterEntity);
267
268
  // encoding support
269
  static int convert(void *data, const char *s);
270
271
private:
272
  typedef std::map<XMLString, Poco::TextEncoding*> EncodingMap;
273
  typedef std::vector<ContextLocator*> ContextStack;
274
275
  XML_Parser _parser;
276
  char*      _pBuffer;
277
  bool       _encodingSpecified;
278
  XMLString  _encoding;
279
  bool       _expandInternalEntities;
280
  bool       _externalGeneralEntities;
281
  bool       _externalParameterEntities;
282
  bool       _enablePartialReads;
283
  NamespaceStrategy* _pNamespaceStrategy;
284
  EncodingMap        _encodings;
285
  ContextStack       _context;
286
287
  EntityResolver* _pEntityResolver;
288
  DTDHandler*     _pDTDHandler;
289
  DeclHandler*    _pDeclHandler;
290
  ContentHandler* _pContentHandler;
291
  LexicalHandler* _pLexicalHandler;
292
  ErrorHandler*   _pErrorHandler;
293
294
  float _maximumAmplificationFactor;
295
  Poco::UInt64 _activationThresholdBytes;
296
297
  static const int PARSE_BUFFER_SIZE;
298
  static const XMLString EMPTY_STRING;
299
};
300
301
302
//
303
// inlines
304
//
305
inline const XMLString& ParserEngine::getEncoding() const
306
0
{
307
0
  return _encoding;
308
0
}
309
310
311
inline NamespaceStrategy* ParserEngine::getNamespaceStrategy() const
312
0
{
313
0
  return _pNamespaceStrategy;
314
0
}
315
316
317
inline bool ParserEngine::getExpandInternalEntities() const
318
0
{
319
0
  return _expandInternalEntities;
320
0
}
321
322
323
inline bool ParserEngine::getExternalGeneralEntities() const
324
0
{
325
0
  return _externalGeneralEntities;
326
0
}
327
328
329
inline bool ParserEngine::getExternalParameterEntities() const
330
0
{
331
0
  return _externalParameterEntities;
332
0
}
333
334
335
inline EntityResolver* ParserEngine::getEntityResolver() const
336
0
{
337
0
  return _pEntityResolver;
338
0
}
339
340
341
inline DTDHandler* ParserEngine::getDTDHandler() const
342
0
{
343
0
  return _pDTDHandler;
344
0
}
345
346
347
inline DeclHandler* ParserEngine::getDeclHandler() const
348
0
{
349
0
  return _pDeclHandler;
350
0
}
351
352
353
inline ContentHandler* ParserEngine::getContentHandler() const
354
0
{
355
0
  return _pContentHandler;
356
0
}
357
358
359
inline LexicalHandler* ParserEngine::getLexicalHandler() const
360
0
{
361
0
  return _pLexicalHandler;
362
0
}
363
364
365
inline ErrorHandler* ParserEngine::getErrorHandler() const
366
0
{
367
0
  return _pErrorHandler;
368
0
}
369
370
371
inline bool ParserEngine::getEnablePartialReads() const
372
0
{
373
0
  return _enablePartialReads;
374
0
}
375
376
377
} } // namespace Poco::XML
378
379
380
#endif // XML_ParserEngine_INCLUDED