/src/libreoffice/sax/source/tools/fastserializer.hxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #pragma once |
21 | | |
22 | | #include <com/sun/star/xml/sax/XFastTokenHandler.hpp> |
23 | | #include <com/sun/star/io/XOutputStream.hpp> |
24 | | |
25 | | #include <sax/fastattribs.hxx> |
26 | | #include <sax/fshelper.hxx> |
27 | | #include "CachedOutputStream.hxx" |
28 | | |
29 | | #include <stack> |
30 | | #include <string_view> |
31 | | #include <map> |
32 | | #include <memory> |
33 | | |
34 | | namespace sax_fastparser { |
35 | | |
36 | | struct TokenValue |
37 | | { |
38 | | sal_Int32 nToken; |
39 | | std::string_view pValue; |
40 | 0 | TokenValue(sal_Int32 _nToken, std::string_view _pValue) : nToken(_nToken), pValue(_pValue) {} |
41 | | }; |
42 | | typedef std::vector<TokenValue> TokenValueList; |
43 | | |
44 | | /// Receives notification of sax document events to write into an XOutputStream. |
45 | | class FastSaxSerializer |
46 | | { |
47 | | typedef css::uno::Sequence< ::sal_Int32 > Int32Sequence; |
48 | | |
49 | | public: |
50 | | explicit FastSaxSerializer(const css::uno::Reference< css::io::XOutputStream >& xOutputStream); |
51 | | ~FastSaxSerializer(); |
52 | | |
53 | | css::uno::Reference< css::io::XOutputStream > const & getOutputStream() const; |
54 | | /// called by FSHelper to put data in for writeTokenValueList |
55 | 0 | TokenValueList& getTokenValueList() { return maTokenValues; } |
56 | | |
57 | | /** called by the parser when parsing of an XML stream is started. |
58 | | */ |
59 | | void startDocument(); |
60 | | |
61 | | /** called by the parser after the last XML element of a stream is processed. |
62 | | */ |
63 | | void endDocument(); |
64 | | |
65 | | /** receives notification of the beginning of an element. |
66 | | |
67 | | @param Element |
68 | | contains the integer token from the <type>XFastTokenHandler</type> |
69 | | registered at the <type>XFastParser</type>.<br> |
70 | | |
71 | | If the element has a namespace that was registered with the |
72 | | <type>XFastParser</type>, <param>Element</param> contains the integer |
73 | | token of the elements local name from the <type>XFastTokenHandler</type> |
74 | | and the integer token of the namespace combined with an arithmetic |
75 | | <b>or</b> operation. |
76 | | |
77 | | @param pAttrList |
78 | | Contains a <type>FastAttributeList</type> to access the attributes |
79 | | from the element. |
80 | | |
81 | | */ |
82 | | void startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr ); |
83 | | |
84 | | /** receives notification of the end of a known element. |
85 | | @see startFastElement |
86 | | */ |
87 | | void endFastElement( ::sal_Int32 Element ); |
88 | | |
89 | | /** receives notification of the beginning of a single element. |
90 | | |
91 | | @param Element |
92 | | contains the integer token from the <type>XFastTokenHandler</type> |
93 | | registered at the <type>XFastParser</type>.<br> |
94 | | |
95 | | If the element has a namespace that was registered with the |
96 | | <type>XFastParser</type>, <param>Element</param> contains the integer |
97 | | token of the elements local name from the <type>XFastTokenHandler</type> |
98 | | and the integer token of the namespace combined with an arithmetic |
99 | | <b>or</b> operation. |
100 | | |
101 | | @param pAttrList |
102 | | Contains a <type>FastAttributeList</type> to access the attributes |
103 | | from the element. |
104 | | |
105 | | */ |
106 | | void singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr ); |
107 | | |
108 | | // C++ helpers |
109 | | void writeId( ::sal_Int32 Element ); |
110 | | OString getId( ::sal_Int32 Element ); |
111 | | |
112 | | void write( double value ); |
113 | | void write( std::u16string_view s, bool bEscape = false ); |
114 | | void write( std::string_view s, bool bEscape = false ); |
115 | | void write( const char* pStr, sal_Int32 nLen, bool bEscape = false ); |
116 | | |
117 | | // strings with _xHHHH_ are escaped with _x005F unless this is disabled |
118 | 0 | void setAllowXEscape(bool bSet) { mbXescape = bSet; } |
119 | | |
120 | | public: |
121 | | /** From now on, don't write directly to the stream, but to top of a stack. |
122 | | |
123 | | This is to be able to change the order of the data being written. |
124 | | If you need to write eg. |
125 | | p, r, rPr, [something], /rPr, t, [text], /t, /r, /p, |
126 | | but get it in order |
127 | | p, r, t, [text], /t, rPr, [something], /rPr, /r, /p, |
128 | | simply do |
129 | | p, r, mark(), t, [text], /t, mark(), rPr, [something], /rPr, |
130 | | mergeTopMarks( MergeMarks::PREPEND ), mergeTopMarks( MergeMarks::APPEND ), /r, /p |
131 | | and you are done. |
132 | | |
133 | | @param nTag debugging aid to ensure mark and merge match in LIFO order |
134 | | */ |
135 | | void mark(sal_Int32 nTag, const Int32Sequence& rOrder); |
136 | | |
137 | | /** Merge 2 topmost marks. |
138 | | |
139 | | The possibilities: prepend the top before the second top-most |
140 | | mark, append it, append it later or ignore; prepending brings the possibility |
141 | | to switch parts of the output, appending later allows to write some |
142 | | output in advance. |
143 | | |
144 | | Writes the result to the output stream if the mark stack becomes empty |
145 | | by the operation. |
146 | | |
147 | | When the MergeMarks::POSTPONE is specified, the merge happens just |
148 | | before the next merge. |
149 | | |
150 | | @param nTag debugging aid to ensure mark and merge match in LIFO order |
151 | | |
152 | | @see mark() |
153 | | */ |
154 | | void mergeTopMarks(sal_Int32 nTag, |
155 | | sax_fastparser::MergeMarks eMergeType); |
156 | | |
157 | | private: |
158 | | /** Helper class to cache data and write in chunks to XOutputStream or ForMerge::append. |
159 | | * Its flush method needs to be called before touching maMarkStack |
160 | | * to ensure correct order of ForSort methods. |
161 | | */ |
162 | | CachedOutputStream maCachedOutputStream; |
163 | | css::uno::Reference< css::xml::sax::XFastTokenHandler > mxFastTokenHandler; |
164 | | |
165 | | class ForMerge : public ForMergeBase |
166 | | { |
167 | | Int8Sequence maData; |
168 | | std::vector<sal_Int8> maPostponed; |
169 | | |
170 | | public: |
171 | | sal_Int32 const m_Tag; |
172 | | #ifdef DBG_UTIL |
173 | | // pending close tags, followed by pending open tags |
174 | | std::deque<sal_Int32> m_DebugEndedElements; |
175 | | std::deque<sal_Int32> m_DebugStartedElements; |
176 | | // ... and another buffer for maPostponed ... |
177 | | std::deque<sal_Int32> m_DebugPostponedEndedElements; |
178 | | std::deque<sal_Int32> m_DebugPostponedStartedElements; |
179 | | #endif |
180 | | |
181 | 0 | explicit ForMerge(sal_Int32 const nTag) : m_Tag(nTag) {} |
182 | | |
183 | 0 | virtual void setCurrentElement( ::sal_Int32 /*nToken*/ ) {} |
184 | | virtual Int8Sequence& getData(); |
185 | | #if OSL_DEBUG_LEVEL > 0 |
186 | | virtual void print(); |
187 | | #endif |
188 | | |
189 | | virtual void prepend( const Int8Sequence &rWhat ); |
190 | | virtual void append( const Int8Sequence &rWhat ) override; |
191 | | void postpone( const Int8Sequence &rWhat ); |
192 | | |
193 | | protected: |
194 | | void resetData( ); |
195 | | static void merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend ); |
196 | | private: |
197 | | static void merge( Int8Sequence &rTop, const sal_Int8* pMerge, sal_Int32 nMergeLen, bool bAppend ); |
198 | | }; |
199 | | |
200 | | class ForSort : public ForMerge |
201 | | { |
202 | | std::map< ::sal_Int32, Int8Sequence > maData; |
203 | | sal_Int32 mnCurrentElement; |
204 | | |
205 | | Int32Sequence maOrder; |
206 | | |
207 | | public: |
208 | | ForSort(sal_Int32 const nTag, const Int32Sequence& rOrder) |
209 | 0 | : ForMerge(nTag) |
210 | 0 | , mnCurrentElement( 0 ) |
211 | 0 | , maOrder( rOrder ) |
212 | 0 | {} |
213 | | |
214 | | void setCurrentElement( ::sal_Int32 nToken ) override; |
215 | | |
216 | | virtual Int8Sequence& getData() override; |
217 | | |
218 | | #if OSL_DEBUG_LEVEL > 0 |
219 | | virtual void print() override; |
220 | | #endif |
221 | | |
222 | | virtual void prepend( const Int8Sequence &rWhat ) override; |
223 | | virtual void append( const Int8Sequence &rWhat ) override; |
224 | | private: |
225 | | void sort(); |
226 | | }; |
227 | | |
228 | | std::stack< std::shared_ptr< ForMerge > > maMarkStack; |
229 | | bool mbMarkStackEmpty; |
230 | | // Would be better to use OStringBuffer instead of these two |
231 | | // but then we couldn't get the rtl_String* member :-( |
232 | | rtl_String *mpDoubleStr; |
233 | | sal_Int32 mnDoubleStrCapacity; |
234 | | TokenValueList maTokenValues; |
235 | | bool mbXescape; ///< whether to escape invalid XML characters as _xHHHH_ in write(const char*,sal_Int32,true) |
236 | | |
237 | | |
238 | | #ifdef DBG_UTIL |
239 | | std::stack<sal_Int32> m_DebugStartedElements; |
240 | | #endif |
241 | | |
242 | | void writeTokenValueList(); |
243 | | void writeFastAttributeList(FastAttributeList const & rAttrList); |
244 | | |
245 | | /** Forward the call to the output stream, or write to the stack. |
246 | | |
247 | | The latter in the case that we are inside a mark(). |
248 | | */ |
249 | | void writeBytes( const css::uno::Sequence< ::sal_Int8 >& aData ); |
250 | | void writeBytes( const char* pStr, size_t nLen ); |
251 | | }; |
252 | | |
253 | | } // namespace sax_fastparser |
254 | | |
255 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |