Coverage Report

Created: 2026-02-11 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/xerces-c/src/xercesc/framework/XMLFormatter.hpp
Line
Count
Source
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *      http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
18
/*
19
 * $Id: XMLFormatter.hpp 932889 2010-04-11 13:10:10Z borisk $
20
 */
21
22
#if !defined(XERCESC_INCLUDE_GUARD_XMLFORMATTER_HPP)
23
#define XERCESC_INCLUDE_GUARD_XMLFORMATTER_HPP
24
25
#include <xercesc/util/PlatformUtils.hpp>
26
27
XERCES_CPP_NAMESPACE_BEGIN
28
29
class XMLFormatTarget;
30
class XMLTranscoder;
31
32
/**
33
 *  This class provides the basic formatting capabilities that are required
34
 *  to turn the Unicode based XML data from the parsers into a form that can
35
 *  be used on non-Unicode based systems, that is, into local or generic text
36
 *  encodings.
37
 *
38
 *  A number of flags are provided to control whether various optional
39
 *  formatting operations are performed.
40
 */
41
class XMLPARSER_EXPORT XMLFormatter : public XMemory
42
{
43
public:
44
    // -----------------------------------------------------------------------
45
    //  Class types
46
    // -----------------------------------------------------------------------
47
    /** @name Public Constants */
48
    //@{
49
    /**
50
     * EscapeFlags - Different styles of escape flags to control various formatting.
51
     *
52
     * <p><code>NoEscapes:</code>
53
     * No character needs to be escaped.   Just write them out as is.</p>
54
     * <p><code>StdEscapes:</code>
55
     * The following characters need to be escaped:</p>
56
     * <table border='1'>
57
     * <tr>
58
     * <td>character</td>
59
     * <td>should be escaped and written as</td>
60
     * </tr>
61
     * <tr>
62
     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
63
     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
64
     * </tr>
65
     * <tr>
66
     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
67
     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
68
     * </tr>
69
     * <tr>
70
     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
71
     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
72
     * </tr>
73
     * <tr>
74
     * <td valign='top' rowspan='1' colspan='1'>&lt;</td>
75
     * <td valign='top' rowspan='1' colspan='1'>&amp;lt;</td>
76
     * </tr>
77
     * <tr>
78
     * <td valign='top' rowspan='1' colspan='1'>&apos;</td>
79
     * <td valign='top' rowspan='1' colspan='1'>&amp;apos;</td>
80
     * </tr>
81
     * </table>
82
     * <p><code>AttrEscapes:</code>
83
     * The following characters need to be escaped:</p>
84
     * <table border='1'>
85
     * <tr>
86
     * <td>character</td>
87
     * <td>should be escaped and written as</td>
88
     * </tr>
89
     * <tr>
90
     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
91
     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
92
     * </tr>
93
     * <tr>
94
     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
95
     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
96
     * </tr>
97
     * <tr>
98
     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
99
     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
100
     * </tr>
101
     * </table>
102
     * <p><code>CharEscapes:</code>
103
     * The following characters need to be escaped:</p>
104
     * <table border='1'>
105
     * <tr>
106
     * <td>character</td>
107
     * <td>should be escaped and written as</td>
108
     * </tr>
109
     * <tr>
110
     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
111
     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
112
     * </tr>
113
     * <tr>
114
     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
115
     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
116
     * </tr>
117
     * </table>
118
     * <p><code>EscapeFlags_Count:</code>
119
     * Special value, do not use directly.</p>
120
     * <p><code>DefaultEscape:</code>
121
     * Special value, do not use directly.</p>
122
     *
123
     */
124
    enum EscapeFlags
125
    {
126
        NoEscapes
127
        , StdEscapes
128
        , AttrEscapes
129
        , CharEscapes
130
131
        // Special values, don't use directly
132
        , EscapeFlags_Count
133
        , DefaultEscape     = 999
134
    };
135
136
    /**
137
     * UnRepFlags
138
     *
139
     * The unrepresentable flags that indicate how to react when a
140
     * character cannot be represented in the target encoding.
141
     *
142
     * <p><code>UnRep_Fail:</code>
143
     * Fail the operation.</p>
144
     * <p><code>UnRep_CharRef:</code>
145
     * Display the unrepresented character as reference.</p>
146
     * <p><code>UnRep_Replace:</code>
147
     * Replace the unrepresented character with the replacement character.</p>
148
     * <p><code>DefaultUnRep:</code>
149
     * Special value, do not use directly.</p>
150
     *
151
     */
152
    enum UnRepFlags
153
    {
154
        UnRep_Fail
155
        , UnRep_CharRef
156
        , UnRep_Replace
157
158
        , DefaultUnRep      = 999
159
    };
160
    //@}
161
162
163
    // -----------------------------------------------------------------------
164
    //  Constructors and Destructor
165
    // -----------------------------------------------------------------------
166
    /** @name Constructor and Destructor */
167
    //@{
168
    /**
169
     * @param outEncoding the encoding for the formatted content.
170
     * @param docVersion  the document version.
171
     * @param target      the formatTarget where the formatted content is written to.
172
     * @param escapeFlags the escape style for certain character.
173
     * @param unrepFlags  the reaction to unrepresentable character.
174
     * @param manager     Pointer to the memory manager to be used to
175
     *                    allocate objects.
176
     */
177
    XMLFormatter
178
    (
179
        const   XMLCh* const            outEncoding
180
        , const XMLCh* const            docVersion
181
        ,       XMLFormatTarget* const  target
182
        , const EscapeFlags             escapeFlags = NoEscapes
183
        , const UnRepFlags              unrepFlags = UnRep_Fail
184
        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
185
    );
186
187
    XMLFormatter
188
    (
189
        const   char* const             outEncoding
190
        , const char* const             docVersion
191
        ,       XMLFormatTarget* const  target
192
        , const EscapeFlags             escapeFlags = NoEscapes
193
        , const UnRepFlags              unrepFlags = UnRep_Fail
194
        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
195
    );
196
197
    XMLFormatter
198
    (
199
        const   XMLCh* const            outEncoding
200
        ,       XMLFormatTarget* const  target
201
        , const EscapeFlags             escapeFlags = NoEscapes
202
        , const UnRepFlags              unrepFlags = UnRep_Fail
203
        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
204
    );
205
206
    XMLFormatter
207
    (
208
        const   char* const             outEncoding
209
        ,       XMLFormatTarget* const  target
210
        , const EscapeFlags             escapeFlags = NoEscapes
211
        , const UnRepFlags              unrepFlags = UnRep_Fail
212
        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
213
    );
214
215
    ~XMLFormatter();
216
    //@}
217
218
219
    // -----------------------------------------------------------------------
220
    //  Formatting methods
221
    // -----------------------------------------------------------------------
222
    /** @name Formatting methods */
223
    //@{
224
    /**
225
     * @param toFormat the string to be formatted
226
     * @param count    length of the string
227
     * @param escapeFlags the escape style for formatting toFormat
228
     * @param unrepFlags the reaction for any unrepresentable character in toFormat
229
     *
230
     */
231
    void formatBuf
232
    (
233
        const   XMLCh* const    toFormat
234
        , const XMLSize_t       count
235
        , const EscapeFlags     escapeFlags = DefaultEscape
236
        , const UnRepFlags      unrepFlags = DefaultUnRep
237
    );
238
239
    /**
240
     * @see formatBuf
241
     */
242
    XMLFormatter& operator<<
243
    (
244
        const   XMLCh* const    toFormat
245
    );
246
247
    XMLFormatter& operator<<
248
    (
249
        const   XMLCh           toFormat
250
    );
251
252
    void writeBOM(const XMLByte* const toFormat
253
                , const XMLSize_t      count);
254
255
    //@}
256
257
    // -----------------------------------------------------------------------
258
    //  Getter methods
259
    // -----------------------------------------------------------------------
260
    /** @name Getter methods */
261
    //@{
262
    /**
263
     * @return return the encoding set for the formatted content
264
     */
265
266
    const XMLCh* getEncodingName() const;
267
268
    /**
269
     * @return return constant transcoder used internally for transcoding the formatter conent
270
     */
271
    inline const XMLTranscoder*   getTranscoder() const;
272
273
    /**
274
     * @return return the transcoder used internally for transcoding the formatter content
275
     */
276
    inline XMLTranscoder*   getTranscoder();
277
278
   //@}
279
280
    // -----------------------------------------------------------------------
281
    //  Setter methods
282
    // -----------------------------------------------------------------------
283
    /** @name Setter methods */
284
    //@{
285
    /**
286
     * @param newFlags set the escape style for the follow-on formatted content
287
     */
288
    void setEscapeFlags
289
    (
290
        const   EscapeFlags     newFlags
291
    );
292
293
    /**
294
     * @param newFlags set the reaction for unrepresentable character
295
     */
296
    void setUnRepFlags
297
    (
298
        const   UnRepFlags      newFlags
299
    );
300
301
    /**
302
     * @param newFlags set the escape style for the follow-on formatted content
303
     * @see setEscapeFlags
304
     */
305
    XMLFormatter& operator<<
306
    (
307
        const   EscapeFlags     newFlags
308
    );
309
310
    /**
311
     * @param newFlags set the reaction for unrepresentable character
312
     * @see setUnRepFlags
313
     */
314
    XMLFormatter& operator<<
315
    (
316
        const   UnRepFlags      newFlags
317
    );
318
    //@}
319
320
    // -----------------------------------------------------------------------
321
    //  Getter methods
322
    // -----------------------------------------------------------------------
323
    /** @name Setter methods */
324
    //@{
325
    /**
326
     * @return return the escape style for the formatted content
327
     */
328
    EscapeFlags getEscapeFlags() const;
329
330
    /**
331
     * @return return the reaction for unrepresentable character
332
     */
333
    UnRepFlags getUnRepFlags() const;
334
    //@}
335
336
private :
337
    // -----------------------------------------------------------------------
338
    //  Unimplemented constructors and operators
339
    // -----------------------------------------------------------------------
340
    XMLFormatter();
341
    XMLFormatter(const XMLFormatter&);
342
    XMLFormatter& operator=(const XMLFormatter&);
343
344
345
    // -----------------------------------------------------------------------
346
    //  Private class constants
347
    // -----------------------------------------------------------------------
348
    enum Constants
349
    {
350
        kTmpBufSize     = 16 * 1024
351
    };
352
353
354
    // -----------------------------------------------------------------------
355
    //  Private helper methods
356
    // -----------------------------------------------------------------------
357
    const XMLByte* getCharRef(XMLSize_t     &count,
358
                              XMLByte*      &ref,
359
                              const XMLCh *  stdRef);
360
361
    void writeCharRef(const XMLCh &toWrite);
362
    void writeCharRef(XMLSize_t toWrite);
363
364
    bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
365
                    , const XMLCh                     toCheck);
366
367
368
    XMLSize_t handleUnEscapedChars(const XMLCh *      srcPtr,
369
                                   const XMLSize_t    count,
370
                                   const UnRepFlags   unrepFlags);
371
372
    void specialFormat
373
    (
374
        const   XMLCh* const    toFormat
375
        , const XMLSize_t       count
376
        , const EscapeFlags     escapeFlags
377
    );
378
379
380
    // -----------------------------------------------------------------------
381
    //  Private, non-virtual methods
382
    //
383
    //  fEscapeFlags
384
    //      The escape flags we were told to use in formatting. These are
385
    //      defaults set in the ctor, which can be overridden on a particular
386
    //      call.
387
    //
388
    //  fOutEncoding
389
    //      This the name of the output encoding. Saved mainly for meaningful
390
    //      error messages.
391
    //
392
    //  fTarget
393
    //      This is the target object for the formatting operation.
394
    //
395
    //  fUnRepFlags
396
    //      The unrepresentable flags that indicate how to react when a
397
    //      character cannot be represented in the target encoding.
398
    //
399
    //  fXCoder
400
    //      This the transcoder that we will use. It is created using the
401
    //      encoding name we were told to use.
402
    //
403
    //  fTmpBuf
404
    //      An output buffer that we use to transcode chars into before we
405
    //      send them off to be output.
406
    //
407
    //  fAposRef
408
    //  fAmpRef
409
    //  fGTRef
410
    //  fLTRef
411
    //  fQuoteRef
412
    //      These are character refs for the standard char refs, in the
413
    //      output encoding. They are faulted in as required, by transcoding
414
    //      them from fixed Unicode versions.
415
    //
416
    //  fIsXML11
417
    //      for performance reason, we do not store the actual version string
418
    //      and do the string comparison again and again.
419
    //
420
    // -----------------------------------------------------------------------
421
    EscapeFlags                 fEscapeFlags;
422
    XMLCh*                      fOutEncoding;
423
    XMLFormatTarget*            fTarget;
424
    UnRepFlags                  fUnRepFlags;
425
    XMLTranscoder*              fXCoder;
426
    XMLByte                     fTmpBuf[kTmpBufSize + 4];
427
    XMLByte*                    fAposRef;
428
    XMLSize_t                   fAposLen;
429
    XMLByte*                    fAmpRef;
430
    XMLSize_t                   fAmpLen;
431
    XMLByte*                    fGTRef;
432
    XMLSize_t                   fGTLen;
433
    XMLByte*                    fLTRef;
434
    XMLSize_t                   fLTLen;
435
    XMLByte*                    fQuoteRef;
436
    XMLSize_t                   fQuoteLen;
437
    bool                        fIsXML11;
438
    MemoryManager*              fMemoryManager;
439
};
440
441
442
class XMLPARSER_EXPORT XMLFormatTarget : public XMemory
443
{
444
public:
445
    // -----------------------------------------------------------------------
446
    //  Constructors and Destructor
447
    // -----------------------------------------------------------------------
448
0
    virtual ~XMLFormatTarget() {}
449
450
451
    // -----------------------------------------------------------------------
452
    //  Virtual interface
453
    // -----------------------------------------------------------------------
454
    virtual void writeChars
455
    (
456
          const XMLByte* const      toWrite
457
        , const XMLSize_t           count
458
        ,       XMLFormatter* const formatter
459
    ) = 0;
460
461
0
    virtual void flush() {};
462
463
464
protected :
465
    // -----------------------------------------------------------------------
466
    //  Hidden constructors and operators
467
    // -----------------------------------------------------------------------
468
0
    XMLFormatTarget() {};
469
470
private:
471
    // -----------------------------------------------------------------------
472
    //  Unimplemented constructors and operators
473
    // -----------------------------------------------------------------------
474
    XMLFormatTarget(const XMLFormatTarget&);
475
    XMLFormatTarget& operator=(const XMLFormatTarget&);
476
};
477
478
479
// ---------------------------------------------------------------------------
480
//  XMLFormatter: Getter methods
481
// ---------------------------------------------------------------------------
482
inline const XMLCh* XMLFormatter::getEncodingName() const
483
0
{
484
0
    return fOutEncoding;
485
0
}
486
487
inline const XMLTranscoder* XMLFormatter::getTranscoder() const
488
0
{
489
0
    return fXCoder;
490
0
}
491
492
inline XMLTranscoder* XMLFormatter::getTranscoder()
493
0
{
494
0
    return fXCoder;
495
0
}
496
497
// ---------------------------------------------------------------------------
498
//  XMLFormatter: Setter methods
499
// ---------------------------------------------------------------------------
500
inline void XMLFormatter::setEscapeFlags(const EscapeFlags newFlags)
501
0
{
502
0
    fEscapeFlags = newFlags;
503
0
}
504
505
inline void XMLFormatter::setUnRepFlags(const UnRepFlags newFlags)
506
0
{
507
0
    fUnRepFlags = newFlags;
508
0
}
509
510
511
inline XMLFormatter& XMLFormatter::operator<<(const EscapeFlags newFlags)
512
0
{
513
0
    fEscapeFlags = newFlags;
514
0
    return *this;
515
0
}
516
517
inline XMLFormatter& XMLFormatter::operator<<(const UnRepFlags newFlags)
518
0
{
519
0
    fUnRepFlags = newFlags;
520
0
    return *this;
521
0
}
522
523
// ---------------------------------------------------------------------------
524
//  XMLFormatter: Getter methods
525
// ---------------------------------------------------------------------------
526
inline XMLFormatter::EscapeFlags XMLFormatter::getEscapeFlags() const
527
0
{
528
0
    return fEscapeFlags;
529
0
}
530
531
inline XMLFormatter::UnRepFlags XMLFormatter::getUnRepFlags() const
532
0
{
533
0
    return fUnRepFlags;
534
0
}
535
536
XERCES_CPP_NAMESPACE_END
537
538
#endif