/src/xerces-c/src/xercesc/framework/XMLFormatter.hpp
Line | Count | Source |
1 | | /* |
2 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
3 | | * contributor license agreements. See the NOTICE file distributed with |
4 | | * this work for additional information regarding copyright ownership. |
5 | | * The ASF licenses this file to You under the Apache License, Version 2.0 |
6 | | * (the "License"); you may not use this file except in compliance with |
7 | | * the License. You may obtain a copy of the License at |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | /* |
19 | | * $Id: XMLFormatter.hpp 932889 2010-04-11 13:10:10Z borisk $ |
20 | | */ |
21 | | |
22 | | #if !defined(XERCESC_INCLUDE_GUARD_XMLFORMATTER_HPP) |
23 | | #define XERCESC_INCLUDE_GUARD_XMLFORMATTER_HPP |
24 | | |
25 | | #include <xercesc/util/PlatformUtils.hpp> |
26 | | |
27 | | XERCES_CPP_NAMESPACE_BEGIN |
28 | | |
29 | | class XMLFormatTarget; |
30 | | class XMLTranscoder; |
31 | | |
32 | | /** |
33 | | * This class provides the basic formatting capabilities that are required |
34 | | * to turn the Unicode based XML data from the parsers into a form that can |
35 | | * be used on non-Unicode based systems, that is, into local or generic text |
36 | | * encodings. |
37 | | * |
38 | | * A number of flags are provided to control whether various optional |
39 | | * formatting operations are performed. |
40 | | */ |
41 | | class XMLPARSER_EXPORT XMLFormatter : public XMemory |
42 | | { |
43 | | public: |
44 | | // ----------------------------------------------------------------------- |
45 | | // Class types |
46 | | // ----------------------------------------------------------------------- |
47 | | /** @name Public Constants */ |
48 | | //@{ |
49 | | /** |
50 | | * EscapeFlags - Different styles of escape flags to control various formatting. |
51 | | * |
52 | | * <p><code>NoEscapes:</code> |
53 | | * No character needs to be escaped. Just write them out as is.</p> |
54 | | * <p><code>StdEscapes:</code> |
55 | | * The following characters need to be escaped:</p> |
56 | | * <table border='1'> |
57 | | * <tr> |
58 | | * <td>character</td> |
59 | | * <td>should be escaped and written as</td> |
60 | | * </tr> |
61 | | * <tr> |
62 | | * <td valign='top' rowspan='1' colspan='1'>&</td> |
63 | | * <td valign='top' rowspan='1' colspan='1'>&amp;</td> |
64 | | * </tr> |
65 | | * <tr> |
66 | | * <td valign='top' rowspan='1' colspan='1'>></td> |
67 | | * <td valign='top' rowspan='1' colspan='1'>&gt;</td> |
68 | | * </tr> |
69 | | * <tr> |
70 | | * <td valign='top' rowspan='1' colspan='1'>"</td> |
71 | | * <td valign='top' rowspan='1' colspan='1'>&quot;</td> |
72 | | * </tr> |
73 | | * <tr> |
74 | | * <td valign='top' rowspan='1' colspan='1'><</td> |
75 | | * <td valign='top' rowspan='1' colspan='1'>&lt;</td> |
76 | | * </tr> |
77 | | * <tr> |
78 | | * <td valign='top' rowspan='1' colspan='1'>'</td> |
79 | | * <td valign='top' rowspan='1' colspan='1'>&apos;</td> |
80 | | * </tr> |
81 | | * </table> |
82 | | * <p><code>AttrEscapes:</code> |
83 | | * The following characters need to be escaped:</p> |
84 | | * <table border='1'> |
85 | | * <tr> |
86 | | * <td>character</td> |
87 | | * <td>should be escaped and written as</td> |
88 | | * </tr> |
89 | | * <tr> |
90 | | * <td valign='top' rowspan='1' colspan='1'>&</td> |
91 | | * <td valign='top' rowspan='1' colspan='1'>&amp;</td> |
92 | | * </tr> |
93 | | * <tr> |
94 | | * <td valign='top' rowspan='1' colspan='1'>></td> |
95 | | * <td valign='top' rowspan='1' colspan='1'>&gt;</td> |
96 | | * </tr> |
97 | | * <tr> |
98 | | * <td valign='top' rowspan='1' colspan='1'>"</td> |
99 | | * <td valign='top' rowspan='1' colspan='1'>&quot;</td> |
100 | | * </tr> |
101 | | * </table> |
102 | | * <p><code>CharEscapes:</code> |
103 | | * The following characters need to be escaped:</p> |
104 | | * <table border='1'> |
105 | | * <tr> |
106 | | * <td>character</td> |
107 | | * <td>should be escaped and written as</td> |
108 | | * </tr> |
109 | | * <tr> |
110 | | * <td valign='top' rowspan='1' colspan='1'>&</td> |
111 | | * <td valign='top' rowspan='1' colspan='1'>&amp;</td> |
112 | | * </tr> |
113 | | * <tr> |
114 | | * <td valign='top' rowspan='1' colspan='1'>></td> |
115 | | * <td valign='top' rowspan='1' colspan='1'>&gt;</td> |
116 | | * </tr> |
117 | | * </table> |
118 | | * <p><code>EscapeFlags_Count:</code> |
119 | | * Special value, do not use directly.</p> |
120 | | * <p><code>DefaultEscape:</code> |
121 | | * Special value, do not use directly.</p> |
122 | | * |
123 | | */ |
124 | | enum EscapeFlags |
125 | | { |
126 | | NoEscapes |
127 | | , StdEscapes |
128 | | , AttrEscapes |
129 | | , CharEscapes |
130 | | |
131 | | // Special values, don't use directly |
132 | | , EscapeFlags_Count |
133 | | , DefaultEscape = 999 |
134 | | }; |
135 | | |
136 | | /** |
137 | | * UnRepFlags |
138 | | * |
139 | | * The unrepresentable flags that indicate how to react when a |
140 | | * character cannot be represented in the target encoding. |
141 | | * |
142 | | * <p><code>UnRep_Fail:</code> |
143 | | * Fail the operation.</p> |
144 | | * <p><code>UnRep_CharRef:</code> |
145 | | * Display the unrepresented character as reference.</p> |
146 | | * <p><code>UnRep_Replace:</code> |
147 | | * Replace the unrepresented character with the replacement character.</p> |
148 | | * <p><code>DefaultUnRep:</code> |
149 | | * Special value, do not use directly.</p> |
150 | | * |
151 | | */ |
152 | | enum UnRepFlags |
153 | | { |
154 | | UnRep_Fail |
155 | | , UnRep_CharRef |
156 | | , UnRep_Replace |
157 | | |
158 | | , DefaultUnRep = 999 |
159 | | }; |
160 | | //@} |
161 | | |
162 | | |
163 | | // ----------------------------------------------------------------------- |
164 | | // Constructors and Destructor |
165 | | // ----------------------------------------------------------------------- |
166 | | /** @name Constructor and Destructor */ |
167 | | //@{ |
168 | | /** |
169 | | * @param outEncoding the encoding for the formatted content. |
170 | | * @param docVersion the document version. |
171 | | * @param target the formatTarget where the formatted content is written to. |
172 | | * @param escapeFlags the escape style for certain character. |
173 | | * @param unrepFlags the reaction to unrepresentable character. |
174 | | * @param manager Pointer to the memory manager to be used to |
175 | | * allocate objects. |
176 | | */ |
177 | | XMLFormatter |
178 | | ( |
179 | | const XMLCh* const outEncoding |
180 | | , const XMLCh* const docVersion |
181 | | , XMLFormatTarget* const target |
182 | | , const EscapeFlags escapeFlags = NoEscapes |
183 | | , const UnRepFlags unrepFlags = UnRep_Fail |
184 | | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
185 | | ); |
186 | | |
187 | | XMLFormatter |
188 | | ( |
189 | | const char* const outEncoding |
190 | | , const char* const docVersion |
191 | | , XMLFormatTarget* const target |
192 | | , const EscapeFlags escapeFlags = NoEscapes |
193 | | , const UnRepFlags unrepFlags = UnRep_Fail |
194 | | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
195 | | ); |
196 | | |
197 | | XMLFormatter |
198 | | ( |
199 | | const XMLCh* const outEncoding |
200 | | , XMLFormatTarget* const target |
201 | | , const EscapeFlags escapeFlags = NoEscapes |
202 | | , const UnRepFlags unrepFlags = UnRep_Fail |
203 | | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
204 | | ); |
205 | | |
206 | | XMLFormatter |
207 | | ( |
208 | | const char* const outEncoding |
209 | | , XMLFormatTarget* const target |
210 | | , const EscapeFlags escapeFlags = NoEscapes |
211 | | , const UnRepFlags unrepFlags = UnRep_Fail |
212 | | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
213 | | ); |
214 | | |
215 | | ~XMLFormatter(); |
216 | | //@} |
217 | | |
218 | | |
219 | | // ----------------------------------------------------------------------- |
220 | | // Formatting methods |
221 | | // ----------------------------------------------------------------------- |
222 | | /** @name Formatting methods */ |
223 | | //@{ |
224 | | /** |
225 | | * @param toFormat the string to be formatted |
226 | | * @param count length of the string |
227 | | * @param escapeFlags the escape style for formatting toFormat |
228 | | * @param unrepFlags the reaction for any unrepresentable character in toFormat |
229 | | * |
230 | | */ |
231 | | void formatBuf |
232 | | ( |
233 | | const XMLCh* const toFormat |
234 | | , const XMLSize_t count |
235 | | , const EscapeFlags escapeFlags = DefaultEscape |
236 | | , const UnRepFlags unrepFlags = DefaultUnRep |
237 | | ); |
238 | | |
239 | | /** |
240 | | * @see formatBuf |
241 | | */ |
242 | | XMLFormatter& operator<< |
243 | | ( |
244 | | const XMLCh* const toFormat |
245 | | ); |
246 | | |
247 | | XMLFormatter& operator<< |
248 | | ( |
249 | | const XMLCh toFormat |
250 | | ); |
251 | | |
252 | | void writeBOM(const XMLByte* const toFormat |
253 | | , const XMLSize_t count); |
254 | | |
255 | | //@} |
256 | | |
257 | | // ----------------------------------------------------------------------- |
258 | | // Getter methods |
259 | | // ----------------------------------------------------------------------- |
260 | | /** @name Getter methods */ |
261 | | //@{ |
262 | | /** |
263 | | * @return return the encoding set for the formatted content |
264 | | */ |
265 | | |
266 | | const XMLCh* getEncodingName() const; |
267 | | |
268 | | /** |
269 | | * @return return constant transcoder used internally for transcoding the formatter conent |
270 | | */ |
271 | | inline const XMLTranscoder* getTranscoder() const; |
272 | | |
273 | | /** |
274 | | * @return return the transcoder used internally for transcoding the formatter content |
275 | | */ |
276 | | inline XMLTranscoder* getTranscoder(); |
277 | | |
278 | | //@} |
279 | | |
280 | | // ----------------------------------------------------------------------- |
281 | | // Setter methods |
282 | | // ----------------------------------------------------------------------- |
283 | | /** @name Setter methods */ |
284 | | //@{ |
285 | | /** |
286 | | * @param newFlags set the escape style for the follow-on formatted content |
287 | | */ |
288 | | void setEscapeFlags |
289 | | ( |
290 | | const EscapeFlags newFlags |
291 | | ); |
292 | | |
293 | | /** |
294 | | * @param newFlags set the reaction for unrepresentable character |
295 | | */ |
296 | | void setUnRepFlags |
297 | | ( |
298 | | const UnRepFlags newFlags |
299 | | ); |
300 | | |
301 | | /** |
302 | | * @param newFlags set the escape style for the follow-on formatted content |
303 | | * @see setEscapeFlags |
304 | | */ |
305 | | XMLFormatter& operator<< |
306 | | ( |
307 | | const EscapeFlags newFlags |
308 | | ); |
309 | | |
310 | | /** |
311 | | * @param newFlags set the reaction for unrepresentable character |
312 | | * @see setUnRepFlags |
313 | | */ |
314 | | XMLFormatter& operator<< |
315 | | ( |
316 | | const UnRepFlags newFlags |
317 | | ); |
318 | | //@} |
319 | | |
320 | | // ----------------------------------------------------------------------- |
321 | | // Getter methods |
322 | | // ----------------------------------------------------------------------- |
323 | | /** @name Setter methods */ |
324 | | //@{ |
325 | | /** |
326 | | * @return return the escape style for the formatted content |
327 | | */ |
328 | | EscapeFlags getEscapeFlags() const; |
329 | | |
330 | | /** |
331 | | * @return return the reaction for unrepresentable character |
332 | | */ |
333 | | UnRepFlags getUnRepFlags() const; |
334 | | //@} |
335 | | |
336 | | private : |
337 | | // ----------------------------------------------------------------------- |
338 | | // Unimplemented constructors and operators |
339 | | // ----------------------------------------------------------------------- |
340 | | XMLFormatter(); |
341 | | XMLFormatter(const XMLFormatter&); |
342 | | XMLFormatter& operator=(const XMLFormatter&); |
343 | | |
344 | | |
345 | | // ----------------------------------------------------------------------- |
346 | | // Private class constants |
347 | | // ----------------------------------------------------------------------- |
348 | | enum Constants |
349 | | { |
350 | | kTmpBufSize = 16 * 1024 |
351 | | }; |
352 | | |
353 | | |
354 | | // ----------------------------------------------------------------------- |
355 | | // Private helper methods |
356 | | // ----------------------------------------------------------------------- |
357 | | const XMLByte* getCharRef(XMLSize_t &count, |
358 | | XMLByte* &ref, |
359 | | const XMLCh * stdRef); |
360 | | |
361 | | void writeCharRef(const XMLCh &toWrite); |
362 | | void writeCharRef(XMLSize_t toWrite); |
363 | | |
364 | | bool inEscapeList(const XMLFormatter::EscapeFlags escStyle |
365 | | , const XMLCh toCheck); |
366 | | |
367 | | |
368 | | XMLSize_t handleUnEscapedChars(const XMLCh * srcPtr, |
369 | | const XMLSize_t count, |
370 | | const UnRepFlags unrepFlags); |
371 | | |
372 | | void specialFormat |
373 | | ( |
374 | | const XMLCh* const toFormat |
375 | | , const XMLSize_t count |
376 | | , const EscapeFlags escapeFlags |
377 | | ); |
378 | | |
379 | | |
380 | | // ----------------------------------------------------------------------- |
381 | | // Private, non-virtual methods |
382 | | // |
383 | | // fEscapeFlags |
384 | | // The escape flags we were told to use in formatting. These are |
385 | | // defaults set in the ctor, which can be overridden on a particular |
386 | | // call. |
387 | | // |
388 | | // fOutEncoding |
389 | | // This the name of the output encoding. Saved mainly for meaningful |
390 | | // error messages. |
391 | | // |
392 | | // fTarget |
393 | | // This is the target object for the formatting operation. |
394 | | // |
395 | | // fUnRepFlags |
396 | | // The unrepresentable flags that indicate how to react when a |
397 | | // character cannot be represented in the target encoding. |
398 | | // |
399 | | // fXCoder |
400 | | // This the transcoder that we will use. It is created using the |
401 | | // encoding name we were told to use. |
402 | | // |
403 | | // fTmpBuf |
404 | | // An output buffer that we use to transcode chars into before we |
405 | | // send them off to be output. |
406 | | // |
407 | | // fAposRef |
408 | | // fAmpRef |
409 | | // fGTRef |
410 | | // fLTRef |
411 | | // fQuoteRef |
412 | | // These are character refs for the standard char refs, in the |
413 | | // output encoding. They are faulted in as required, by transcoding |
414 | | // them from fixed Unicode versions. |
415 | | // |
416 | | // fIsXML11 |
417 | | // for performance reason, we do not store the actual version string |
418 | | // and do the string comparison again and again. |
419 | | // |
420 | | // ----------------------------------------------------------------------- |
421 | | EscapeFlags fEscapeFlags; |
422 | | XMLCh* fOutEncoding; |
423 | | XMLFormatTarget* fTarget; |
424 | | UnRepFlags fUnRepFlags; |
425 | | XMLTranscoder* fXCoder; |
426 | | XMLByte fTmpBuf[kTmpBufSize + 4]; |
427 | | XMLByte* fAposRef; |
428 | | XMLSize_t fAposLen; |
429 | | XMLByte* fAmpRef; |
430 | | XMLSize_t fAmpLen; |
431 | | XMLByte* fGTRef; |
432 | | XMLSize_t fGTLen; |
433 | | XMLByte* fLTRef; |
434 | | XMLSize_t fLTLen; |
435 | | XMLByte* fQuoteRef; |
436 | | XMLSize_t fQuoteLen; |
437 | | bool fIsXML11; |
438 | | MemoryManager* fMemoryManager; |
439 | | }; |
440 | | |
441 | | |
442 | | class XMLPARSER_EXPORT XMLFormatTarget : public XMemory |
443 | | { |
444 | | public: |
445 | | // ----------------------------------------------------------------------- |
446 | | // Constructors and Destructor |
447 | | // ----------------------------------------------------------------------- |
448 | 0 | virtual ~XMLFormatTarget() {} |
449 | | |
450 | | |
451 | | // ----------------------------------------------------------------------- |
452 | | // Virtual interface |
453 | | // ----------------------------------------------------------------------- |
454 | | virtual void writeChars |
455 | | ( |
456 | | const XMLByte* const toWrite |
457 | | , const XMLSize_t count |
458 | | , XMLFormatter* const formatter |
459 | | ) = 0; |
460 | | |
461 | 0 | virtual void flush() {}; |
462 | | |
463 | | |
464 | | protected : |
465 | | // ----------------------------------------------------------------------- |
466 | | // Hidden constructors and operators |
467 | | // ----------------------------------------------------------------------- |
468 | 0 | XMLFormatTarget() {}; |
469 | | |
470 | | private: |
471 | | // ----------------------------------------------------------------------- |
472 | | // Unimplemented constructors and operators |
473 | | // ----------------------------------------------------------------------- |
474 | | XMLFormatTarget(const XMLFormatTarget&); |
475 | | XMLFormatTarget& operator=(const XMLFormatTarget&); |
476 | | }; |
477 | | |
478 | | |
479 | | // --------------------------------------------------------------------------- |
480 | | // XMLFormatter: Getter methods |
481 | | // --------------------------------------------------------------------------- |
482 | | inline const XMLCh* XMLFormatter::getEncodingName() const |
483 | 0 | { |
484 | 0 | return fOutEncoding; |
485 | 0 | } |
486 | | |
487 | | inline const XMLTranscoder* XMLFormatter::getTranscoder() const |
488 | 0 | { |
489 | 0 | return fXCoder; |
490 | 0 | } |
491 | | |
492 | | inline XMLTranscoder* XMLFormatter::getTranscoder() |
493 | 0 | { |
494 | 0 | return fXCoder; |
495 | 0 | } |
496 | | |
497 | | // --------------------------------------------------------------------------- |
498 | | // XMLFormatter: Setter methods |
499 | | // --------------------------------------------------------------------------- |
500 | | inline void XMLFormatter::setEscapeFlags(const EscapeFlags newFlags) |
501 | 0 | { |
502 | 0 | fEscapeFlags = newFlags; |
503 | 0 | } |
504 | | |
505 | | inline void XMLFormatter::setUnRepFlags(const UnRepFlags newFlags) |
506 | 0 | { |
507 | 0 | fUnRepFlags = newFlags; |
508 | 0 | } |
509 | | |
510 | | |
511 | | inline XMLFormatter& XMLFormatter::operator<<(const EscapeFlags newFlags) |
512 | 0 | { |
513 | 0 | fEscapeFlags = newFlags; |
514 | 0 | return *this; |
515 | 0 | } |
516 | | |
517 | | inline XMLFormatter& XMLFormatter::operator<<(const UnRepFlags newFlags) |
518 | 0 | { |
519 | 0 | fUnRepFlags = newFlags; |
520 | 0 | return *this; |
521 | 0 | } |
522 | | |
523 | | // --------------------------------------------------------------------------- |
524 | | // XMLFormatter: Getter methods |
525 | | // --------------------------------------------------------------------------- |
526 | | inline XMLFormatter::EscapeFlags XMLFormatter::getEscapeFlags() const |
527 | 0 | { |
528 | 0 | return fEscapeFlags; |
529 | 0 | } |
530 | | |
531 | | inline XMLFormatter::UnRepFlags XMLFormatter::getUnRepFlags() const |
532 | 0 | { |
533 | 0 | return fUnRepFlags; |
534 | 0 | } |
535 | | |
536 | | XERCES_CPP_NAMESPACE_END |
537 | | |
538 | | #endif |