/src/libreoffice/svtools/source/svrtf/svparser.cxx
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <svtools/svparser.hxx> |
21 | | #include <svtools/htmltokn.h> |
22 | | #include <tools/stream.hxx> |
23 | | #include <tools/debug.hxx> |
24 | | #include <rtl/textcvt.h> |
25 | | #include <rtl/tencinfo.h> |
26 | | #include <rtl/character.hxx> |
27 | | #include <sal/log.hxx> |
28 | | #include <unicode/ucsdet.h> |
29 | | #include <comphelper/configuration.hxx> |
30 | | |
31 | | #include <vector> |
32 | | |
33 | | // structure to store the actual data |
34 | | template<typename T> |
35 | | struct SvParser_Impl |
36 | | { |
37 | | OUString aToken; // parsed token |
38 | | sal_uInt64 nFilePos; // actual position in stream |
39 | | sal_uInt32 nlLineNr; // actual line number |
40 | | sal_uInt32 nlLinePos; // actual column number |
41 | | tools::Long nTokenValue; // extra value (RTF) |
42 | | bool bTokenHasValue; // indicates whether nTokenValue is valid |
43 | | T nToken; // actual Token |
44 | | sal_uInt32 nNextCh; // actual character |
45 | | T nSaveToken; // the token from Continue |
46 | | |
47 | | rtl_TextToUnicodeConverter hConv; |
48 | | rtl_TextToUnicodeContext hContext; |
49 | | |
50 | | SvParser_Impl() |
51 | 52.9k | : nFilePos(0) |
52 | 52.9k | , nlLineNr(0) |
53 | 52.9k | , nlLinePos(0) |
54 | 52.9k | , nTokenValue(0) |
55 | 52.9k | , bTokenHasValue(false) |
56 | 52.9k | , nToken(static_cast<T>(0)) |
57 | 52.9k | , nNextCh(0) |
58 | 52.9k | , nSaveToken(static_cast<T>(0)) |
59 | 52.9k | , hConv( nullptr ) |
60 | 52.9k | , hContext( reinterpret_cast<rtl_TextToUnicodeContext>(1) ) |
61 | 52.9k | { |
62 | 52.9k | } SvParser_Impl<int>::SvParser_Impl() Line | Count | Source | 51 | 10.4k | : nFilePos(0) | 52 | 10.4k | , nlLineNr(0) | 53 | 10.4k | , nlLinePos(0) | 54 | 10.4k | , nTokenValue(0) | 55 | 10.4k | , bTokenHasValue(false) | 56 | 10.4k | , nToken(static_cast<T>(0)) | 57 | 10.4k | , nNextCh(0) | 58 | 10.4k | , nSaveToken(static_cast<T>(0)) | 59 | 10.4k | , hConv( nullptr ) | 60 | 10.4k | , hContext( reinterpret_cast<rtl_TextToUnicodeContext>(1) ) | 61 | 10.4k | { | 62 | 10.4k | } |
SvParser_Impl<HtmlTokenId>::SvParser_Impl() Line | Count | Source | 51 | 42.5k | : nFilePos(0) | 52 | 42.5k | , nlLineNr(0) | 53 | 42.5k | , nlLinePos(0) | 54 | 42.5k | , nTokenValue(0) | 55 | 42.5k | , bTokenHasValue(false) | 56 | 42.5k | , nToken(static_cast<T>(0)) | 57 | 42.5k | , nNextCh(0) | 58 | 42.5k | , nSaveToken(static_cast<T>(0)) | 59 | 42.5k | , hConv( nullptr ) | 60 | 42.5k | , hContext( reinterpret_cast<rtl_TextToUnicodeContext>(1) ) | 61 | 42.5k | { | 62 | 42.5k | } |
|
63 | | |
64 | | }; |
65 | | |
66 | | |
67 | | template<typename T> |
68 | | SvParser<T>::TokenStackType::TokenStackType() |
69 | 179k | : nTokenValue(0) |
70 | 179k | , bTokenHasValue(false) |
71 | 179k | , nTokenId(static_cast<T>(0)) |
72 | 179k | { |
73 | 179k | } SvParser<int>::TokenStackType::TokenStackType() Line | Count | Source | 69 | 52.2k | : nTokenValue(0) | 70 | 52.2k | , bTokenHasValue(false) | 71 | 52.2k | , nTokenId(static_cast<T>(0)) | 72 | 52.2k | { | 73 | 52.2k | } |
SvParser<HtmlTokenId>::TokenStackType::TokenStackType() Line | Count | Source | 69 | 127k | : nTokenValue(0) | 70 | 127k | , bTokenHasValue(false) | 71 | 127k | , nTokenId(static_cast<T>(0)) | 72 | 127k | { | 73 | 127k | } |
|
74 | | |
75 | | // Constructor |
76 | | template<typename T> |
77 | | SvParser<T>::SvParser( SvStream& rIn, sal_uInt8 nStackSize ) |
78 | 52.9k | : rInput( rIn ) |
79 | 52.9k | , nlLineNr( 1 ) |
80 | 52.9k | , nlLinePos( 1 ) |
81 | 52.9k | , nConversionErrors( 0 ) |
82 | 52.9k | , pImplData( nullptr ) |
83 | 52.9k | , m_nTokenIndex(0) |
84 | 52.9k | , nTokenValue( 0 ) |
85 | 52.9k | , bTokenHasValue( false ) |
86 | 52.9k | , bFuzzing(comphelper::IsFuzzing()) |
87 | 52.9k | , eState( SvParserState::NotStarted ) |
88 | 52.9k | , eSrcEnc( RTL_TEXTENCODING_DONTKNOW ) |
89 | 52.9k | , nNextChPos(0) |
90 | 52.9k | , nNextCh(0) |
91 | 52.9k | , bSwitchToUCS2(false) |
92 | 52.9k | , bRTF_InTextRead(false) |
93 | 52.9k | , nTokenStackSize( nStackSize ) |
94 | 52.9k | , nTokenStackPos( 0 ) |
95 | 52.9k | { |
96 | 52.9k | eState = SvParserState::NotStarted; |
97 | 52.9k | if( nTokenStackSize < 3 ) |
98 | 0 | nTokenStackSize = 3; |
99 | 52.9k | pTokenStack.reset(new TokenStackType[ nTokenStackSize ]); |
100 | 52.9k | pTokenStackPos = pTokenStack.get(); |
101 | 52.9k | } SvParser<int>::SvParser(SvStream&, unsigned char) Line | Count | Source | 78 | 10.4k | : rInput( rIn ) | 79 | 10.4k | , nlLineNr( 1 ) | 80 | 10.4k | , nlLinePos( 1 ) | 81 | 10.4k | , nConversionErrors( 0 ) | 82 | 10.4k | , pImplData( nullptr ) | 83 | 10.4k | , m_nTokenIndex(0) | 84 | 10.4k | , nTokenValue( 0 ) | 85 | 10.4k | , bTokenHasValue( false ) | 86 | 10.4k | , bFuzzing(comphelper::IsFuzzing()) | 87 | 10.4k | , eState( SvParserState::NotStarted ) | 88 | 10.4k | , eSrcEnc( RTL_TEXTENCODING_DONTKNOW ) | 89 | 10.4k | , nNextChPos(0) | 90 | 10.4k | , nNextCh(0) | 91 | 10.4k | , bSwitchToUCS2(false) | 92 | 10.4k | , bRTF_InTextRead(false) | 93 | 10.4k | , nTokenStackSize( nStackSize ) | 94 | 10.4k | , nTokenStackPos( 0 ) | 95 | 10.4k | { | 96 | 10.4k | eState = SvParserState::NotStarted; | 97 | 10.4k | if( nTokenStackSize < 3 ) | 98 | 0 | nTokenStackSize = 3; | 99 | 10.4k | pTokenStack.reset(new TokenStackType[ nTokenStackSize ]); | 100 | 10.4k | pTokenStackPos = pTokenStack.get(); | 101 | 10.4k | } |
SvParser<HtmlTokenId>::SvParser(SvStream&, unsigned char) Line | Count | Source | 78 | 42.5k | : rInput( rIn ) | 79 | 42.5k | , nlLineNr( 1 ) | 80 | 42.5k | , nlLinePos( 1 ) | 81 | 42.5k | , nConversionErrors( 0 ) | 82 | 42.5k | , pImplData( nullptr ) | 83 | 42.5k | , m_nTokenIndex(0) | 84 | 42.5k | , nTokenValue( 0 ) | 85 | 42.5k | , bTokenHasValue( false ) | 86 | 42.5k | , bFuzzing(comphelper::IsFuzzing()) | 87 | 42.5k | , eState( SvParserState::NotStarted ) | 88 | 42.5k | , eSrcEnc( RTL_TEXTENCODING_DONTKNOW ) | 89 | 42.5k | , nNextChPos(0) | 90 | 42.5k | , nNextCh(0) | 91 | 42.5k | , bSwitchToUCS2(false) | 92 | 42.5k | , bRTF_InTextRead(false) | 93 | 42.5k | , nTokenStackSize( nStackSize ) | 94 | 42.5k | , nTokenStackPos( 0 ) | 95 | 42.5k | { | 96 | 42.5k | eState = SvParserState::NotStarted; | 97 | 42.5k | if( nTokenStackSize < 3 ) | 98 | 0 | nTokenStackSize = 3; | 99 | 42.5k | pTokenStack.reset(new TokenStackType[ nTokenStackSize ]); | 100 | 42.5k | pTokenStackPos = pTokenStack.get(); | 101 | 42.5k | } |
|
102 | | |
103 | | template<typename T> |
104 | | SvParser<T>::~SvParser() |
105 | 52.9k | { |
106 | 52.9k | if( pImplData && pImplData->hConv ) |
107 | 52.2k | { |
108 | 52.2k | rtl_destroyTextToUnicodeContext( pImplData->hConv, |
109 | 52.2k | pImplData->hContext ); |
110 | 52.2k | rtl_destroyTextToUnicodeConverter( pImplData->hConv ); |
111 | 52.2k | } |
112 | | |
113 | 52.9k | pTokenStack.reset(); |
114 | 52.9k | } SvParser<int>::~SvParser() Line | Count | Source | 105 | 10.4k | { | 106 | 10.4k | if( pImplData && pImplData->hConv ) | 107 | 10.1k | { | 108 | 10.1k | rtl_destroyTextToUnicodeContext( pImplData->hConv, | 109 | 10.1k | pImplData->hContext ); | 110 | 10.1k | rtl_destroyTextToUnicodeConverter( pImplData->hConv ); | 111 | 10.1k | } | 112 | | | 113 | 10.4k | pTokenStack.reset(); | 114 | 10.4k | } |
SvParser<HtmlTokenId>::~SvParser() Line | Count | Source | 105 | 42.5k | { | 106 | 42.5k | if( pImplData && pImplData->hConv ) | 107 | 42.1k | { | 108 | 42.1k | rtl_destroyTextToUnicodeContext( pImplData->hConv, | 109 | 42.1k | pImplData->hContext ); | 110 | 42.1k | rtl_destroyTextToUnicodeConverter( pImplData->hConv ); | 111 | 42.1k | } | 112 | | | 113 | 42.5k | pTokenStack.reset(); | 114 | 42.5k | } |
|
115 | | |
116 | 1.06M | template<typename T> SvParserState SvParser<T>::GetStatus() const { return eState; } SvParser<int>::GetStatus() const Line | Count | Source | 116 | 9.84k | template<typename T> SvParserState SvParser<T>::GetStatus() const { return eState; } |
SvParser<HtmlTokenId>::GetStatus() const Line | Count | Source | 116 | 1.06M | template<typename T> SvParserState SvParser<T>::GetStatus() const { return eState; } |
|
117 | 4.48M | template<typename T> sal_uInt32 SvParser<T>::GetLineNr() const { return nlLineNr; } Unexecuted instantiation: SvParser<int>::GetLineNr() const SvParser<HtmlTokenId>::GetLineNr() const Line | Count | Source | 117 | 4.48M | template<typename T> sal_uInt32 SvParser<T>::GetLineNr() const { return nlLineNr; } |
|
118 | 4.74M | template<typename T> sal_uInt32 SvParser<T>::GetLinePos() const { return nlLinePos; } Unexecuted instantiation: SvParser<int>::GetLinePos() const SvParser<HtmlTokenId>::GetLinePos() const Line | Count | Source | 118 | 4.74M | template<typename T> sal_uInt32 SvParser<T>::GetLinePos() const { return nlLinePos; } |
|
119 | 2.37M | template<typename T> void SvParser<T>::IncLineNr() { ++nlLineNr; } SvParser<int>::IncLineNr() Line | Count | Source | 119 | 588k | template<typename T> void SvParser<T>::IncLineNr() { ++nlLineNr; } |
SvParser<HtmlTokenId>::IncLineNr() Line | Count | Source | 119 | 1.78M | template<typename T> void SvParser<T>::IncLineNr() { ++nlLineNr; } |
|
120 | 119M | template<typename T> sal_uInt32 SvParser<T>::IncLinePos() { return ++nlLinePos; } SvParser<int>::IncLinePos() Line | Count | Source | 120 | 10.9M | template<typename T> sal_uInt32 SvParser<T>::IncLinePos() { return ++nlLinePos; } |
SvParser<HtmlTokenId>::IncLinePos() Line | Count | Source | 120 | 108M | template<typename T> sal_uInt32 SvParser<T>::IncLinePos() { return ++nlLinePos; } |
|
121 | 47.3k | template<typename T> void SvParser<T>::SetLineNr( sal_uInt32 nlNum ) { nlLineNr = nlNum; } Unexecuted instantiation: SvParser<int>::SetLineNr(unsigned int) SvParser<HtmlTokenId>::SetLineNr(unsigned int) Line | Count | Source | 121 | 47.3k | template<typename T> void SvParser<T>::SetLineNr( sal_uInt32 nlNum ) { nlLineNr = nlNum; } |
|
122 | 2.41M | template<typename T> void SvParser<T>::SetLinePos( sal_uInt32 nlPos ) { nlLinePos = nlPos; } SvParser<int>::SetLinePos(unsigned int) Line | Count | Source | 122 | 588k | template<typename T> void SvParser<T>::SetLinePos( sal_uInt32 nlPos ) { nlLinePos = nlPos; } |
SvParser<HtmlTokenId>::SetLinePos(unsigned int) Line | Count | Source | 122 | 1.82M | template<typename T> void SvParser<T>::SetLinePos( sal_uInt32 nlPos ) { nlLinePos = nlPos; } |
|
123 | 163M | template<typename T> bool SvParser<T>::IsParserWorking() const { return SvParserState::Working == eState; } SvParser<int>::IsParserWorking() const Line | Count | Source | 123 | 7.99M | template<typename T> bool SvParser<T>::IsParserWorking() const { return SvParserState::Working == eState; } |
SvParser<HtmlTokenId>::IsParserWorking() const Line | Count | Source | 123 | 155M | template<typename T> bool SvParser<T>::IsParserWorking() const { return SvParserState::Working == eState; } |
|
124 | 194k | template<typename T> rtl_TextEncoding SvParser<T>::GetSrcEncoding() const { return eSrcEnc; } SvParser<int>::GetSrcEncoding() const Line | Count | Source | 124 | 188k | template<typename T> rtl_TextEncoding SvParser<T>::GetSrcEncoding() const { return eSrcEnc; } |
SvParser<HtmlTokenId>::GetSrcEncoding() const Line | Count | Source | 124 | 6.16k | template<typename T> rtl_TextEncoding SvParser<T>::GetSrcEncoding() const { return eSrcEnc; } |
|
125 | 42.5k | template<typename T> void SvParser<T>::SetSwitchToUCS2( bool bSet ) { bSwitchToUCS2 = bSet; } Unexecuted instantiation: SvParser<int>::SetSwitchToUCS2(bool) SvParser<HtmlTokenId>::SetSwitchToUCS2(bool) Line | Count | Source | 125 | 42.5k | template<typename T> void SvParser<T>::SetSwitchToUCS2( bool bSet ) { bSwitchToUCS2 = bSet; } |
|
126 | 0 | template<typename T> bool SvParser<T>::IsSwitchToUCS2() const { return bSwitchToUCS2; } Unexecuted instantiation: SvParser<int>::IsSwitchToUCS2() const Unexecuted instantiation: SvParser<HtmlTokenId>::IsSwitchToUCS2() const |
127 | 1.38k | template<typename T> sal_uInt16 SvParser<T>::GetCharSize() const { return (RTL_TEXTENCODING_UCS2 == eSrcEnc) ? 2 : 1; } Unexecuted instantiation: SvParser<int>::GetCharSize() const SvParser<HtmlTokenId>::GetCharSize() const Line | Count | Source | 127 | 1.38k | template<typename T> sal_uInt16 SvParser<T>::GetCharSize() const { return (RTL_TEXTENCODING_UCS2 == eSrcEnc) ? 2 : 1; } |
|
128 | | template<typename T> Link<LinkParamNone*,void> SvParser<T>::GetAsynchCallLink() const |
129 | 0 | { |
130 | 0 | return LINK( const_cast<SvParser*>(this), SvParser, NewDataRead ); |
131 | 0 | } Unexecuted instantiation: SvParser<int>::GetAsynchCallLink() const Unexecuted instantiation: SvParser<HtmlTokenId>::GetAsynchCallLink() const |
132 | | |
133 | | template<typename T> |
134 | | void SvParser<T>::ClearTxtConvContext() |
135 | 94.9k | { |
136 | 94.9k | if( pImplData && pImplData->hConv ) |
137 | 86.4k | rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext ); |
138 | 94.9k | } Unexecuted instantiation: SvParser<int>::ClearTxtConvContext() SvParser<HtmlTokenId>::ClearTxtConvContext() Line | Count | Source | 135 | 94.9k | { | 136 | 94.9k | if( pImplData && pImplData->hConv ) | 137 | 86.4k | rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext ); | 138 | 94.9k | } |
|
139 | | |
140 | | template<typename T> |
141 | | void SvParser<T>::SetSrcEncoding( rtl_TextEncoding eEnc ) |
142 | 257k | { |
143 | 257k | if( eEnc == eSrcEnc ) |
144 | 136k | return; |
145 | | |
146 | 120k | if( pImplData && pImplData->hConv ) |
147 | 66.0k | { |
148 | 66.0k | rtl_destroyTextToUnicodeContext( pImplData->hConv, |
149 | 66.0k | pImplData->hContext ); |
150 | 66.0k | rtl_destroyTextToUnicodeConverter( pImplData->hConv ); |
151 | 66.0k | pImplData->hConv = nullptr; |
152 | 66.0k | pImplData->hContext = reinterpret_cast<rtl_TextToUnicodeContext>(1); |
153 | 66.0k | } |
154 | | |
155 | 120k | if( rtl_isOctetTextEncoding(eEnc) || |
156 | 120k | RTL_TEXTENCODING_UCS2 == eEnc ) |
157 | 119k | { |
158 | 119k | eSrcEnc = eEnc; |
159 | 119k | if( !pImplData ) |
160 | 52.9k | pImplData.reset(new SvParser_Impl<T>); |
161 | 119k | pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc ); |
162 | 119k | DBG_ASSERT( pImplData->hConv, |
163 | 119k | "SvParser::SetSrcEncoding: no converter for source encoding" ); |
164 | 119k | if( !pImplData->hConv ) |
165 | 1.19k | eSrcEnc = RTL_TEXTENCODING_DONTKNOW; |
166 | 118k | else |
167 | 118k | pImplData->hContext = |
168 | 118k | rtl_createTextToUnicodeContext( pImplData->hConv ); |
169 | 119k | } |
170 | 1.18k | else |
171 | 1.18k | { |
172 | 1.18k | SAL_WARN( "svtools", |
173 | 1.18k | "SvParser::SetSrcEncoding: invalid source encoding" ); |
174 | 1.18k | eSrcEnc = RTL_TEXTENCODING_DONTKNOW; |
175 | 1.18k | } |
176 | 120k | } SvParser<int>::SetSrcEncoding(unsigned short) Line | Count | Source | 142 | 152k | { | 143 | 152k | if( eEnc == eSrcEnc ) | 144 | 128k | return; | 145 | | | 146 | 24.4k | if( pImplData && pImplData->hConv ) | 147 | 12.6k | { | 148 | 12.6k | rtl_destroyTextToUnicodeContext( pImplData->hConv, | 149 | 12.6k | pImplData->hContext ); | 150 | 12.6k | rtl_destroyTextToUnicodeConverter( pImplData->hConv ); | 151 | 12.6k | pImplData->hConv = nullptr; | 152 | 12.6k | pImplData->hContext = reinterpret_cast<rtl_TextToUnicodeContext>(1); | 153 | 12.6k | } | 154 | | | 155 | 24.4k | if( rtl_isOctetTextEncoding(eEnc) || | 156 | 24.4k | RTL_TEXTENCODING_UCS2 == eEnc ) | 157 | 23.2k | { | 158 | 23.2k | eSrcEnc = eEnc; | 159 | 23.2k | if( !pImplData ) | 160 | 10.4k | pImplData.reset(new SvParser_Impl<T>); | 161 | 23.2k | pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc ); | 162 | 23.2k | DBG_ASSERT( pImplData->hConv, | 163 | 23.2k | "SvParser::SetSrcEncoding: no converter for source encoding" ); | 164 | 23.2k | if( !pImplData->hConv ) | 165 | 386 | eSrcEnc = RTL_TEXTENCODING_DONTKNOW; | 166 | 22.8k | else | 167 | 22.8k | pImplData->hContext = | 168 | 22.8k | rtl_createTextToUnicodeContext( pImplData->hConv ); | 169 | 23.2k | } | 170 | 1.18k | else | 171 | 1.18k | { | 172 | 1.18k | SAL_WARN( "svtools", | 173 | 1.18k | "SvParser::SetSrcEncoding: invalid source encoding" ); | 174 | 1.18k | eSrcEnc = RTL_TEXTENCODING_DONTKNOW; | 175 | 1.18k | } | 176 | 24.4k | } |
SvParser<HtmlTokenId>::SetSrcEncoding(unsigned short) Line | Count | Source | 142 | 104k | { | 143 | 104k | if( eEnc == eSrcEnc ) | 144 | 8.19k | return; | 145 | | | 146 | 96.2k | if( pImplData && pImplData->hConv ) | 147 | 53.3k | { | 148 | 53.3k | rtl_destroyTextToUnicodeContext( pImplData->hConv, | 149 | 53.3k | pImplData->hContext ); | 150 | 53.3k | rtl_destroyTextToUnicodeConverter( pImplData->hConv ); | 151 | 53.3k | pImplData->hConv = nullptr; | 152 | 53.3k | pImplData->hContext = reinterpret_cast<rtl_TextToUnicodeContext>(1); | 153 | 53.3k | } | 154 | | | 155 | 96.2k | if( rtl_isOctetTextEncoding(eEnc) || | 156 | 96.2k | RTL_TEXTENCODING_UCS2 == eEnc ) | 157 | 96.2k | { | 158 | 96.2k | eSrcEnc = eEnc; | 159 | 96.2k | if( !pImplData ) | 160 | 42.5k | pImplData.reset(new SvParser_Impl<T>); | 161 | 96.2k | pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc ); | 162 | 96.2k | DBG_ASSERT( pImplData->hConv, | 163 | 96.2k | "SvParser::SetSrcEncoding: no converter for source encoding" ); | 164 | 96.2k | if( !pImplData->hConv ) | 165 | 808 | eSrcEnc = RTL_TEXTENCODING_DONTKNOW; | 166 | 95.4k | else | 167 | 95.4k | pImplData->hContext = | 168 | 95.4k | rtl_createTextToUnicodeContext( pImplData->hConv ); | 169 | 96.2k | } | 170 | 0 | else | 171 | 0 | { | 172 | 0 | SAL_WARN( "svtools", | 173 | 0 | "SvParser::SetSrcEncoding: invalid source encoding" ); | 174 | 0 | eSrcEnc = RTL_TEXTENCODING_DONTKNOW; | 175 | 0 | } | 176 | 96.2k | } |
|
177 | | |
178 | | template<typename T> |
179 | | void SvParser<T>::RereadLookahead() |
180 | 38.7k | { |
181 | 38.7k | rInput.Seek(nNextChPos); |
182 | 38.7k | nNextCh = GetNextChar(); |
183 | 38.7k | } SvParser<int>::RereadLookahead() Line | Count | Source | 180 | 38.7k | { | 181 | 38.7k | rInput.Seek(nNextChPos); | 182 | 38.7k | nNextCh = GetNextChar(); | 183 | 38.7k | } |
Unexecuted instantiation: SvParser<HtmlTokenId>::RereadLookahead() |
184 | | |
185 | | template<typename T> |
186 | | sal_uInt32 SvParser<T>::GetNextChar() |
187 | 124M | { |
188 | 124M | sal_uInt32 c = 0U; |
189 | | |
190 | | // When reading multiple bytes, we don't have to care about the file |
191 | | // position when we run into the pending state. The file position is |
192 | | // maintained by SaveState/RestoreState. |
193 | 124M | if( bSwitchToUCS2 && 0 == rInput.Tell() ) |
194 | 42.5k | { |
195 | 42.5k | rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW); |
196 | 42.5k | if (rInput.good()) |
197 | 42.5k | { |
198 | 42.5k | sal_uInt64 nPos = rInput.Tell(); |
199 | 42.5k | if (nPos == 2) |
200 | 89 | eSrcEnc = RTL_TEXTENCODING_UCS2; |
201 | 42.4k | else if (nPos == 3) |
202 | 86 | SetSrcEncoding(RTL_TEXTENCODING_UTF8); |
203 | 42.3k | else // Try to detect encoding without BOM |
204 | 42.3k | { |
205 | 42.3k | std::vector<char> buf(65535); // Arbitrarily chosen 64KiB buffer |
206 | 42.3k | const size_t nSize = rInput.ReadBytes(buf.data(), buf.size()); |
207 | 42.3k | rInput.Seek(0); |
208 | 42.3k | if (nSize > 0) |
209 | 42.3k | { |
210 | 42.3k | UErrorCode uerr = U_ZERO_ERROR; |
211 | 42.3k | UCharsetDetector* ucd = ucsdet_open(&uerr); |
212 | 42.3k | ucsdet_setText(ucd, buf.data(), nSize, &uerr); |
213 | 42.3k | if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr)) |
214 | 42.1k | { |
215 | 42.1k | const char* pEncodingName = ucsdet_getName(match, &uerr); |
216 | | |
217 | 42.1k | if (U_SUCCESS(uerr)) |
218 | 42.1k | { |
219 | 42.1k | if (strcmp("UTF-8", pEncodingName) == 0) |
220 | 3.85k | { |
221 | 3.85k | SetSrcEncoding(RTL_TEXTENCODING_UTF8); |
222 | 3.85k | } |
223 | 38.3k | else if (strcmp("UTF-16LE", pEncodingName) == 0) |
224 | 614 | { |
225 | 614 | eSrcEnc = RTL_TEXTENCODING_UCS2; |
226 | 614 | rInput.SetEndian(SvStreamEndian::LITTLE); |
227 | 614 | } |
228 | 37.7k | else if (strcmp("UTF-16BE", pEncodingName) == 0) |
229 | 532 | { |
230 | 532 | eSrcEnc = RTL_TEXTENCODING_UCS2; |
231 | 532 | rInput.SetEndian(SvStreamEndian::BIG); |
232 | 532 | } |
233 | 42.1k | } |
234 | 42.1k | } |
235 | | |
236 | 42.3k | ucsdet_close(ucd); |
237 | 42.3k | } |
238 | 42.3k | } |
239 | 42.5k | } |
240 | 42.5k | bSwitchToUCS2 = false; |
241 | 42.5k | } |
242 | | |
243 | 124M | bool bErr; |
244 | 124M | nNextChPos = rInput.Tell(); |
245 | | |
246 | 124M | if( RTL_TEXTENCODING_UCS2 == eSrcEnc ) |
247 | 1.14M | { |
248 | 1.14M | sal_Unicode cUC; |
249 | 1.14M | rInput.ReadUtf16(cUC); |
250 | 1.14M | bErr = !rInput.good(); |
251 | 1.14M | if( !bErr ) |
252 | 1.14M | { |
253 | 1.14M | c = cUC; |
254 | 1.14M | if (rtl::isHighSurrogate(cUC)) |
255 | 12.9k | { |
256 | 12.9k | const sal_uInt64 nPos = rInput.Tell(); |
257 | 12.9k | rInput.ReadUtf16(cUC); |
258 | 12.9k | if (rtl::isLowSurrogate(cUC)) // can only be true when ReadUtf16 succeeded |
259 | 525 | c = rtl::combineSurrogates(c, cUC); |
260 | 12.4k | else |
261 | 12.4k | rInput.Seek(nPos); // process lone high surrogate |
262 | 12.9k | } |
263 | 1.14M | } |
264 | 1.14M | } |
265 | 123M | else |
266 | 123M | { |
267 | 123M | sal_Size nChars = 0; |
268 | 123M | do |
269 | 123M | { |
270 | 123M | char c1; // signed, that's the text converter expects |
271 | 123M | rInput.ReadChar( c1 ); |
272 | 123M | bErr = !rInput.good(); |
273 | 123M | if( !bErr ) |
274 | 123M | { |
275 | 123M | if ( |
276 | 123M | RTL_TEXTENCODING_DONTKNOW == eSrcEnc || |
277 | 123M | RTL_TEXTENCODING_SYMBOL == eSrcEnc |
278 | 123M | ) |
279 | 3.78M | { |
280 | | // no conversion shall take place |
281 | 3.78M | c = reinterpret_cast<unsigned char&>( c1 ); |
282 | 3.78M | nChars = 1; |
283 | 3.78M | } |
284 | 119M | else |
285 | 119M | { |
286 | 119M | assert(pImplData && pImplData->hConv && "no text converter!"); |
287 | | |
288 | 119M | sal_Unicode cUC; |
289 | 119M | sal_uInt32 nInfo = 0; |
290 | 119M | sal_Size nCvtBytes; |
291 | 119M | nChars = rtl_convertTextToUnicode( |
292 | 119M | pImplData->hConv, pImplData->hContext, |
293 | 119M | &c1, 1, &cUC, 1, |
294 | 119M | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| |
295 | 119M | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| |
296 | 119M | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, |
297 | 119M | &nInfo, &nCvtBytes); |
298 | 119M | if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 ) |
299 | 668k | { |
300 | | // The conversion wasn't successful because we haven't |
301 | | // read enough characters. |
302 | 668k | if( pImplData->hContext != reinterpret_cast<rtl_TextToUnicodeContext>(1) ) |
303 | 663k | { |
304 | 663k | sal_Unicode sCh[2]; |
305 | 1.56M | while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 ) |
306 | 898k | { |
307 | 898k | rInput.ReadChar( c1 ); |
308 | 898k | bErr = !rInput.good(); |
309 | 898k | if( bErr ) |
310 | 1.46k | break; |
311 | | |
312 | 897k | nChars = rtl_convertTextToUnicode( |
313 | 897k | pImplData->hConv, pImplData->hContext, |
314 | 897k | &c1, 1, sCh , 2, |
315 | 897k | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| |
316 | 897k | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| |
317 | 897k | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, |
318 | 897k | &nInfo, &nCvtBytes); |
319 | 897k | } |
320 | 663k | if( !bErr ) |
321 | 661k | { |
322 | 661k | if( 1 == nChars && 0 == nInfo ) |
323 | 274k | { |
324 | 274k | c = sal_uInt32( sCh[0] ); |
325 | 274k | } |
326 | 386k | else if( 2 == nChars && 0 == nInfo ) |
327 | 59.4k | { |
328 | 59.4k | c = rtl::combineSurrogates( sCh[0], sCh[1] ); |
329 | 59.4k | } |
330 | 327k | else if( 0 != nChars || 0 != nInfo ) |
331 | 327k | { |
332 | 327k | DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0, |
333 | 327k | "source buffer is too small" ); |
334 | 327k | DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0, |
335 | 327k | "there is a conversion error" ); |
336 | 327k | DBG_ASSERT( 0 == nChars, |
337 | 327k | "there is a converted character, but an error" ); |
338 | | // There are still errors, but nothing we can |
339 | | // do |
340 | 327k | c = '?'; |
341 | 327k | nChars = 1; |
342 | 327k | ++nConversionErrors; |
343 | 327k | } |
344 | 661k | } |
345 | 663k | } |
346 | 5.09k | else |
347 | 5.09k | { |
348 | 5.09k | char sBuffer[10]; |
349 | 5.09k | sBuffer[0] = c1; |
350 | 5.09k | sal_uInt16 nLen = 1; |
351 | 10.1k | while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 && |
352 | 10.1k | nLen < 10 ) |
353 | 5.09k | { |
354 | 5.09k | rInput.ReadChar( c1 ); |
355 | 5.09k | bErr = !rInput.good(); |
356 | 5.09k | if( bErr ) |
357 | 18 | break; |
358 | | |
359 | 5.07k | sBuffer[nLen++] = c1; |
360 | 5.07k | nChars = rtl_convertTextToUnicode( |
361 | 5.07k | pImplData->hConv, nullptr, sBuffer, nLen, &cUC, 1, |
362 | 5.07k | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| |
363 | 5.07k | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| |
364 | 5.07k | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, |
365 | 5.07k | &nInfo, &nCvtBytes); |
366 | 5.07k | } |
367 | 5.09k | if( !bErr ) |
368 | 5.07k | { |
369 | 5.07k | if( 1 == nChars && 0 == nInfo ) |
370 | 3.10k | { |
371 | 3.10k | DBG_ASSERT( nCvtBytes == nLen, |
372 | 3.10k | "no all bytes have been converted!" ); |
373 | 3.10k | c = cUC; |
374 | 3.10k | } |
375 | 1.97k | else |
376 | 1.97k | { |
377 | 1.97k | DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0, |
378 | 1.97k | "source buffer is too small" ); |
379 | 1.97k | DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0, |
380 | 1.97k | "there is a conversion error" ); |
381 | 1.97k | DBG_ASSERT( 0 == nChars, |
382 | 1.97k | "there is a converted character, but an error" ); |
383 | | |
384 | | // There are still errors, so we use the first |
385 | | // character and restart after that. |
386 | 1.97k | c = reinterpret_cast<unsigned char&>( sBuffer[0] ); |
387 | 1.97k | rInput.SeekRel( -(nLen-1) ); |
388 | 1.97k | nChars = 1; |
389 | 1.97k | ++nConversionErrors; |
390 | 1.97k | } |
391 | 5.07k | } |
392 | 5.09k | } |
393 | 668k | } |
394 | 118M | else if( 1 == nChars && 0 == nInfo ) |
395 | 117M | { |
396 | | // The conversion was successful |
397 | 117M | DBG_ASSERT( nCvtBytes == 1, |
398 | 117M | "no all bytes have been converted!" ); |
399 | 117M | c = cUC; |
400 | 117M | } |
401 | 915k | else if( 0 != nChars || 0 != nInfo ) |
402 | 915k | { |
403 | 915k | DBG_ASSERT( 0 == nChars, |
404 | 915k | "there is a converted character, but an error" ); |
405 | 915k | DBG_ASSERT( 0 != nInfo, |
406 | 915k | "there is no converted character and no error" ); |
407 | | // #73398#: If the character could not be converted, |
408 | | // because a conversion is not available, do no conversion at all. |
409 | 915k | c = reinterpret_cast<unsigned char&>( c1 ); |
410 | 915k | nChars = 1; |
411 | 915k | ++nConversionErrors; |
412 | 915k | } |
413 | 119M | } |
414 | 123M | } |
415 | 123M | } |
416 | 123M | while( 0 == nChars && !bErr ); |
417 | 123M | } |
418 | | |
419 | 124M | if ( ! rtl::isUnicodeScalarValue( c ) ) |
420 | 18.0k | c = '?' ; |
421 | | |
422 | 124M | if (bFuzzing && nConversionErrors > 128) |
423 | 2.38M | { |
424 | 2.38M | SAL_WARN("svtools", "SvParser::GetNextChar too many conversion errors while fuzzing, abandoning for performance"); |
425 | 2.38M | bErr = true; |
426 | 2.38M | } |
427 | | |
428 | 124M | if( bErr ) |
429 | 2.48M | { |
430 | 2.48M | if( ERRCODE_IO_PENDING == rInput.GetError() ) |
431 | 0 | { |
432 | 0 | eState = SvParserState::Pending; |
433 | 0 | return c; |
434 | 0 | } |
435 | 2.48M | else |
436 | 2.48M | return sal_Unicode(EOF); |
437 | 2.48M | } |
438 | | |
439 | 121M | if( c == '\n' ) |
440 | 2.37M | { |
441 | 2.37M | IncLineNr(); |
442 | 2.37M | SetLinePos( 1 ); |
443 | 2.37M | } |
444 | 119M | else |
445 | 119M | IncLinePos(); |
446 | | |
447 | 121M | return c; |
448 | 124M | } SvParser<int>::GetNextChar() Line | Count | Source | 187 | 11.5M | { | 188 | 11.5M | sal_uInt32 c = 0U; | 189 | | | 190 | | // When reading multiple bytes, we don't have to care about the file | 191 | | // position when we run into the pending state. The file position is | 192 | | // maintained by SaveState/RestoreState. | 193 | 11.5M | if( bSwitchToUCS2 && 0 == rInput.Tell() ) | 194 | 0 | { | 195 | 0 | rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW); | 196 | 0 | if (rInput.good()) | 197 | 0 | { | 198 | 0 | sal_uInt64 nPos = rInput.Tell(); | 199 | 0 | if (nPos == 2) | 200 | 0 | eSrcEnc = RTL_TEXTENCODING_UCS2; | 201 | 0 | else if (nPos == 3) | 202 | 0 | SetSrcEncoding(RTL_TEXTENCODING_UTF8); | 203 | 0 | else // Try to detect encoding without BOM | 204 | 0 | { | 205 | 0 | std::vector<char> buf(65535); // Arbitrarily chosen 64KiB buffer | 206 | 0 | const size_t nSize = rInput.ReadBytes(buf.data(), buf.size()); | 207 | 0 | rInput.Seek(0); | 208 | 0 | if (nSize > 0) | 209 | 0 | { | 210 | 0 | UErrorCode uerr = U_ZERO_ERROR; | 211 | 0 | UCharsetDetector* ucd = ucsdet_open(&uerr); | 212 | 0 | ucsdet_setText(ucd, buf.data(), nSize, &uerr); | 213 | 0 | if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr)) | 214 | 0 | { | 215 | 0 | const char* pEncodingName = ucsdet_getName(match, &uerr); | 216 | |
| 217 | 0 | if (U_SUCCESS(uerr)) | 218 | 0 | { | 219 | 0 | if (strcmp("UTF-8", pEncodingName) == 0) | 220 | 0 | { | 221 | 0 | SetSrcEncoding(RTL_TEXTENCODING_UTF8); | 222 | 0 | } | 223 | 0 | else if (strcmp("UTF-16LE", pEncodingName) == 0) | 224 | 0 | { | 225 | 0 | eSrcEnc = RTL_TEXTENCODING_UCS2; | 226 | 0 | rInput.SetEndian(SvStreamEndian::LITTLE); | 227 | 0 | } | 228 | 0 | else if (strcmp("UTF-16BE", pEncodingName) == 0) | 229 | 0 | { | 230 | 0 | eSrcEnc = RTL_TEXTENCODING_UCS2; | 231 | 0 | rInput.SetEndian(SvStreamEndian::BIG); | 232 | 0 | } | 233 | 0 | } | 234 | 0 | } | 235 | |
| 236 | 0 | ucsdet_close(ucd); | 237 | 0 | } | 238 | 0 | } | 239 | 0 | } | 240 | 0 | bSwitchToUCS2 = false; | 241 | 0 | } | 242 | | | 243 | 11.5M | bool bErr; | 244 | 11.5M | nNextChPos = rInput.Tell(); | 245 | | | 246 | 11.5M | if( RTL_TEXTENCODING_UCS2 == eSrcEnc ) | 247 | 0 | { | 248 | 0 | sal_Unicode cUC; | 249 | 0 | rInput.ReadUtf16(cUC); | 250 | 0 | bErr = !rInput.good(); | 251 | 0 | if( !bErr ) | 252 | 0 | { | 253 | 0 | c = cUC; | 254 | 0 | if (rtl::isHighSurrogate(cUC)) | 255 | 0 | { | 256 | 0 | const sal_uInt64 nPos = rInput.Tell(); | 257 | 0 | rInput.ReadUtf16(cUC); | 258 | 0 | if (rtl::isLowSurrogate(cUC)) // can only be true when ReadUtf16 succeeded | 259 | 0 | c = rtl::combineSurrogates(c, cUC); | 260 | 0 | else | 261 | 0 | rInput.Seek(nPos); // process lone high surrogate | 262 | 0 | } | 263 | 0 | } | 264 | 0 | } | 265 | 11.5M | else | 266 | 11.5M | { | 267 | 11.5M | sal_Size nChars = 0; | 268 | 11.5M | do | 269 | 11.5M | { | 270 | 11.5M | char c1; // signed, that's the text converter expects | 271 | 11.5M | rInput.ReadChar( c1 ); | 272 | 11.5M | bErr = !rInput.good(); | 273 | 11.5M | if( !bErr ) | 274 | 11.5M | { | 275 | 11.5M | if ( | 276 | 11.5M | RTL_TEXTENCODING_DONTKNOW == eSrcEnc || | 277 | 11.5M | RTL_TEXTENCODING_SYMBOL == eSrcEnc | 278 | 11.5M | ) | 279 | 227k | { | 280 | | // no conversion shall take place | 281 | 227k | c = reinterpret_cast<unsigned char&>( c1 ); | 282 | 227k | nChars = 1; | 283 | 227k | } | 284 | 11.3M | else | 285 | 11.3M | { | 286 | 11.3M | assert(pImplData && pImplData->hConv && "no text converter!"); | 287 | | | 288 | 11.3M | sal_Unicode cUC; | 289 | 11.3M | sal_uInt32 nInfo = 0; | 290 | 11.3M | sal_Size nCvtBytes; | 291 | 11.3M | nChars = rtl_convertTextToUnicode( | 292 | 11.3M | pImplData->hConv, pImplData->hContext, | 293 | 11.3M | &c1, 1, &cUC, 1, | 294 | 11.3M | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| | 295 | 11.3M | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| | 296 | 11.3M | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, | 297 | 11.3M | &nInfo, &nCvtBytes); | 298 | 11.3M | if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 ) | 299 | 11.0k | { | 300 | | // The conversion wasn't successful because we haven't | 301 | | // read enough characters. | 302 | 11.0k | if( pImplData->hContext != reinterpret_cast<rtl_TextToUnicodeContext>(1) ) | 303 | 5.96k | { | 304 | 5.96k | sal_Unicode sCh[2]; | 305 | 15.9k | while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 ) | 306 | 10.0k | { | 307 | 10.0k | rInput.ReadChar( c1 ); | 308 | 10.0k | bErr = !rInput.good(); | 309 | 10.0k | if( bErr ) | 310 | 38 | break; | 311 | | | 312 | 10.0k | nChars = rtl_convertTextToUnicode( | 313 | 10.0k | pImplData->hConv, pImplData->hContext, | 314 | 10.0k | &c1, 1, sCh , 2, | 315 | 10.0k | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| | 316 | 10.0k | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| | 317 | 10.0k | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, | 318 | 10.0k | &nInfo, &nCvtBytes); | 319 | 10.0k | } | 320 | 5.96k | if( !bErr ) | 321 | 5.92k | { | 322 | 5.92k | if( 1 == nChars && 0 == nInfo ) | 323 | 1.72k | { | 324 | 1.72k | c = sal_uInt32( sCh[0] ); | 325 | 1.72k | } | 326 | 4.19k | else if( 2 == nChars && 0 == nInfo ) | 327 | 1.22k | { | 328 | 1.22k | c = rtl::combineSurrogates( sCh[0], sCh[1] ); | 329 | 1.22k | } | 330 | 2.97k | else if( 0 != nChars || 0 != nInfo ) | 331 | 2.97k | { | 332 | 2.97k | DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0, | 333 | 2.97k | "source buffer is too small" ); | 334 | 2.97k | DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0, | 335 | 2.97k | "there is a conversion error" ); | 336 | 2.97k | DBG_ASSERT( 0 == nChars, | 337 | 2.97k | "there is a converted character, but an error" ); | 338 | | // There are still errors, but nothing we can | 339 | | // do | 340 | 2.97k | c = '?'; | 341 | 2.97k | nChars = 1; | 342 | 2.97k | ++nConversionErrors; | 343 | 2.97k | } | 344 | 5.92k | } | 345 | 5.96k | } | 346 | 5.09k | else | 347 | 5.09k | { | 348 | 5.09k | char sBuffer[10]; | 349 | 5.09k | sBuffer[0] = c1; | 350 | 5.09k | sal_uInt16 nLen = 1; | 351 | 10.1k | while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 && | 352 | 10.1k | nLen < 10 ) | 353 | 5.09k | { | 354 | 5.09k | rInput.ReadChar( c1 ); | 355 | 5.09k | bErr = !rInput.good(); | 356 | 5.09k | if( bErr ) | 357 | 18 | break; | 358 | | | 359 | 5.07k | sBuffer[nLen++] = c1; | 360 | 5.07k | nChars = rtl_convertTextToUnicode( | 361 | 5.07k | pImplData->hConv, nullptr, sBuffer, nLen, &cUC, 1, | 362 | 5.07k | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| | 363 | 5.07k | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| | 364 | 5.07k | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, | 365 | 5.07k | &nInfo, &nCvtBytes); | 366 | 5.07k | } | 367 | 5.09k | if( !bErr ) | 368 | 5.07k | { | 369 | 5.07k | if( 1 == nChars && 0 == nInfo ) | 370 | 3.10k | { | 371 | 3.10k | DBG_ASSERT( nCvtBytes == nLen, | 372 | 3.10k | "no all bytes have been converted!" ); | 373 | 3.10k | c = cUC; | 374 | 3.10k | } | 375 | 1.97k | else | 376 | 1.97k | { | 377 | 1.97k | DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0, | 378 | 1.97k | "source buffer is too small" ); | 379 | 1.97k | DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0, | 380 | 1.97k | "there is a conversion error" ); | 381 | 1.97k | DBG_ASSERT( 0 == nChars, | 382 | 1.97k | "there is a converted character, but an error" ); | 383 | | | 384 | | // There are still errors, so we use the first | 385 | | // character and restart after that. | 386 | 1.97k | c = reinterpret_cast<unsigned char&>( sBuffer[0] ); | 387 | 1.97k | rInput.SeekRel( -(nLen-1) ); | 388 | 1.97k | nChars = 1; | 389 | 1.97k | ++nConversionErrors; | 390 | 1.97k | } | 391 | 5.07k | } | 392 | 5.09k | } | 393 | 11.0k | } | 394 | 11.3M | else if( 1 == nChars && 0 == nInfo ) | 395 | 11.2M | { | 396 | | // The conversion was successful | 397 | 11.2M | DBG_ASSERT( nCvtBytes == 1, | 398 | 11.2M | "no all bytes have been converted!" ); | 399 | 11.2M | c = cUC; | 400 | 11.2M | } | 401 | 30.9k | else if( 0 != nChars || 0 != nInfo ) | 402 | 30.9k | { | 403 | 30.9k | DBG_ASSERT( 0 == nChars, | 404 | 30.9k | "there is a converted character, but an error" ); | 405 | 30.9k | DBG_ASSERT( 0 != nInfo, | 406 | 30.9k | "there is no converted character and no error" ); | 407 | | // #73398#: If the character could not be converted, | 408 | | // because a conversion is not available, do no conversion at all. | 409 | 30.9k | c = reinterpret_cast<unsigned char&>( c1 ); | 410 | 30.9k | nChars = 1; | 411 | 30.9k | ++nConversionErrors; | 412 | 30.9k | } | 413 | 11.3M | } | 414 | 11.5M | } | 415 | 11.5M | } | 416 | 11.5M | while( 0 == nChars && !bErr ); | 417 | 11.5M | } | 418 | | | 419 | 11.5M | if ( ! rtl::isUnicodeScalarValue( c ) ) | 420 | 0 | c = '?' ; | 421 | | | 422 | 11.5M | if (bFuzzing && nConversionErrors > 128) | 423 | 227 | { | 424 | 227 | SAL_WARN("svtools", "SvParser::GetNextChar too many conversion errors while fuzzing, abandoning for performance"); | 425 | 227 | bErr = true; | 426 | 227 | } | 427 | | | 428 | 11.5M | if( bErr ) | 429 | 25.9k | { | 430 | 25.9k | if( ERRCODE_IO_PENDING == rInput.GetError() ) | 431 | 0 | { | 432 | 0 | eState = SvParserState::Pending; | 433 | 0 | return c; | 434 | 0 | } | 435 | 25.9k | else | 436 | 25.9k | return sal_Unicode(EOF); | 437 | 25.9k | } | 438 | | | 439 | 11.5M | if( c == '\n' ) | 440 | 588k | { | 441 | 588k | IncLineNr(); | 442 | 588k | SetLinePos( 1 ); | 443 | 588k | } | 444 | 10.9M | else | 445 | 10.9M | IncLinePos(); | 446 | | | 447 | 11.5M | return c; | 448 | 11.5M | } |
SvParser<HtmlTokenId>::GetNextChar() Line | Count | Source | 187 | 112M | { | 188 | 112M | sal_uInt32 c = 0U; | 189 | | | 190 | | // When reading multiple bytes, we don't have to care about the file | 191 | | // position when we run into the pending state. The file position is | 192 | | // maintained by SaveState/RestoreState. | 193 | 112M | if( bSwitchToUCS2 && 0 == rInput.Tell() ) | 194 | 42.5k | { | 195 | 42.5k | rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW); | 196 | 42.5k | if (rInput.good()) | 197 | 42.5k | { | 198 | 42.5k | sal_uInt64 nPos = rInput.Tell(); | 199 | 42.5k | if (nPos == 2) | 200 | 89 | eSrcEnc = RTL_TEXTENCODING_UCS2; | 201 | 42.4k | else if (nPos == 3) | 202 | 86 | SetSrcEncoding(RTL_TEXTENCODING_UTF8); | 203 | 42.3k | else // Try to detect encoding without BOM | 204 | 42.3k | { | 205 | 42.3k | std::vector<char> buf(65535); // Arbitrarily chosen 64KiB buffer | 206 | 42.3k | const size_t nSize = rInput.ReadBytes(buf.data(), buf.size()); | 207 | 42.3k | rInput.Seek(0); | 208 | 42.3k | if (nSize > 0) | 209 | 42.3k | { | 210 | 42.3k | UErrorCode uerr = U_ZERO_ERROR; | 211 | 42.3k | UCharsetDetector* ucd = ucsdet_open(&uerr); | 212 | 42.3k | ucsdet_setText(ucd, buf.data(), nSize, &uerr); | 213 | 42.3k | if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr)) | 214 | 42.1k | { | 215 | 42.1k | const char* pEncodingName = ucsdet_getName(match, &uerr); | 216 | | | 217 | 42.1k | if (U_SUCCESS(uerr)) | 218 | 42.1k | { | 219 | 42.1k | if (strcmp("UTF-8", pEncodingName) == 0) | 220 | 3.85k | { | 221 | 3.85k | SetSrcEncoding(RTL_TEXTENCODING_UTF8); | 222 | 3.85k | } | 223 | 38.3k | else if (strcmp("UTF-16LE", pEncodingName) == 0) | 224 | 614 | { | 225 | 614 | eSrcEnc = RTL_TEXTENCODING_UCS2; | 226 | 614 | rInput.SetEndian(SvStreamEndian::LITTLE); | 227 | 614 | } | 228 | 37.7k | else if (strcmp("UTF-16BE", pEncodingName) == 0) | 229 | 532 | { | 230 | 532 | eSrcEnc = RTL_TEXTENCODING_UCS2; | 231 | 532 | rInput.SetEndian(SvStreamEndian::BIG); | 232 | 532 | } | 233 | 42.1k | } | 234 | 42.1k | } | 235 | | | 236 | 42.3k | ucsdet_close(ucd); | 237 | 42.3k | } | 238 | 42.3k | } | 239 | 42.5k | } | 240 | 42.5k | bSwitchToUCS2 = false; | 241 | 42.5k | } | 242 | | | 243 | 112M | bool bErr; | 244 | 112M | nNextChPos = rInput.Tell(); | 245 | | | 246 | 112M | if( RTL_TEXTENCODING_UCS2 == eSrcEnc ) | 247 | 1.14M | { | 248 | 1.14M | sal_Unicode cUC; | 249 | 1.14M | rInput.ReadUtf16(cUC); | 250 | 1.14M | bErr = !rInput.good(); | 251 | 1.14M | if( !bErr ) | 252 | 1.14M | { | 253 | 1.14M | c = cUC; | 254 | 1.14M | if (rtl::isHighSurrogate(cUC)) | 255 | 12.9k | { | 256 | 12.9k | const sal_uInt64 nPos = rInput.Tell(); | 257 | 12.9k | rInput.ReadUtf16(cUC); | 258 | 12.9k | if (rtl::isLowSurrogate(cUC)) // can only be true when ReadUtf16 succeeded | 259 | 525 | c = rtl::combineSurrogates(c, cUC); | 260 | 12.4k | else | 261 | 12.4k | rInput.Seek(nPos); // process lone high surrogate | 262 | 12.9k | } | 263 | 1.14M | } | 264 | 1.14M | } | 265 | 111M | else | 266 | 111M | { | 267 | 111M | sal_Size nChars = 0; | 268 | 111M | do | 269 | 111M | { | 270 | 111M | char c1; // signed, that's the text converter expects | 271 | 111M | rInput.ReadChar( c1 ); | 272 | 111M | bErr = !rInput.good(); | 273 | 111M | if( !bErr ) | 274 | 111M | { | 275 | 111M | if ( | 276 | 111M | RTL_TEXTENCODING_DONTKNOW == eSrcEnc || | 277 | 111M | RTL_TEXTENCODING_SYMBOL == eSrcEnc | 278 | 111M | ) | 279 | 3.55M | { | 280 | | // no conversion shall take place | 281 | 3.55M | c = reinterpret_cast<unsigned char&>( c1 ); | 282 | 3.55M | nChars = 1; | 283 | 3.55M | } | 284 | 108M | else | 285 | 108M | { | 286 | 108M | assert(pImplData && pImplData->hConv && "no text converter!"); | 287 | | | 288 | 108M | sal_Unicode cUC; | 289 | 108M | sal_uInt32 nInfo = 0; | 290 | 108M | sal_Size nCvtBytes; | 291 | 108M | nChars = rtl_convertTextToUnicode( | 292 | 108M | pImplData->hConv, pImplData->hContext, | 293 | 108M | &c1, 1, &cUC, 1, | 294 | 108M | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| | 295 | 108M | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| | 296 | 108M | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, | 297 | 108M | &nInfo, &nCvtBytes); | 298 | 108M | if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 ) | 299 | 657k | { | 300 | | // The conversion wasn't successful because we haven't | 301 | | // read enough characters. | 302 | 657k | if( pImplData->hContext != reinterpret_cast<rtl_TextToUnicodeContext>(1) ) | 303 | 657k | { | 304 | 657k | sal_Unicode sCh[2]; | 305 | 1.54M | while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 ) | 306 | 888k | { | 307 | 888k | rInput.ReadChar( c1 ); | 308 | 888k | bErr = !rInput.good(); | 309 | 888k | if( bErr ) | 310 | 1.43k | break; | 311 | | | 312 | 887k | nChars = rtl_convertTextToUnicode( | 313 | 887k | pImplData->hConv, pImplData->hContext, | 314 | 887k | &c1, 1, sCh , 2, | 315 | 887k | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| | 316 | 887k | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| | 317 | 887k | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, | 318 | 887k | &nInfo, &nCvtBytes); | 319 | 887k | } | 320 | 657k | if( !bErr ) | 321 | 655k | { | 322 | 655k | if( 1 == nChars && 0 == nInfo ) | 323 | 273k | { | 324 | 273k | c = sal_uInt32( sCh[0] ); | 325 | 273k | } | 326 | 382k | else if( 2 == nChars && 0 == nInfo ) | 327 | 58.2k | { | 328 | 58.2k | c = rtl::combineSurrogates( sCh[0], sCh[1] ); | 329 | 58.2k | } | 330 | 324k | else if( 0 != nChars || 0 != nInfo ) | 331 | 324k | { | 332 | 324k | DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0, | 333 | 324k | "source buffer is too small" ); | 334 | 324k | DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0, | 335 | 324k | "there is a conversion error" ); | 336 | 324k | DBG_ASSERT( 0 == nChars, | 337 | 324k | "there is a converted character, but an error" ); | 338 | | // There are still errors, but nothing we can | 339 | | // do | 340 | 324k | c = '?'; | 341 | 324k | nChars = 1; | 342 | 324k | ++nConversionErrors; | 343 | 324k | } | 344 | 655k | } | 345 | 657k | } | 346 | 0 | else | 347 | 0 | { | 348 | 0 | char sBuffer[10]; | 349 | 0 | sBuffer[0] = c1; | 350 | 0 | sal_uInt16 nLen = 1; | 351 | 0 | while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 && | 352 | 0 | nLen < 10 ) | 353 | 0 | { | 354 | 0 | rInput.ReadChar( c1 ); | 355 | 0 | bErr = !rInput.good(); | 356 | 0 | if( bErr ) | 357 | 0 | break; | 358 | | | 359 | 0 | sBuffer[nLen++] = c1; | 360 | 0 | nChars = rtl_convertTextToUnicode( | 361 | 0 | pImplData->hConv, nullptr, sBuffer, nLen, &cUC, 1, | 362 | 0 | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| | 363 | 0 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| | 364 | 0 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, | 365 | 0 | &nInfo, &nCvtBytes); | 366 | 0 | } | 367 | 0 | if( !bErr ) | 368 | 0 | { | 369 | 0 | if( 1 == nChars && 0 == nInfo ) | 370 | 0 | { | 371 | 0 | DBG_ASSERT( nCvtBytes == nLen, | 372 | 0 | "no all bytes have been converted!" ); | 373 | 0 | c = cUC; | 374 | 0 | } | 375 | 0 | else | 376 | 0 | { | 377 | 0 | DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0, | 378 | 0 | "source buffer is too small" ); | 379 | 0 | DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0, | 380 | 0 | "there is a conversion error" ); | 381 | 0 | DBG_ASSERT( 0 == nChars, | 382 | 0 | "there is a converted character, but an error" ); | 383 | | | 384 | | // There are still errors, so we use the first | 385 | | // character and restart after that. | 386 | 0 | c = reinterpret_cast<unsigned char&>( sBuffer[0] ); | 387 | 0 | rInput.SeekRel( -(nLen-1) ); | 388 | 0 | nChars = 1; | 389 | 0 | ++nConversionErrors; | 390 | 0 | } | 391 | 0 | } | 392 | 0 | } | 393 | 657k | } | 394 | 107M | else if( 1 == nChars && 0 == nInfo ) | 395 | 106M | { | 396 | | // The conversion was successful | 397 | 106M | DBG_ASSERT( nCvtBytes == 1, | 398 | 106M | "no all bytes have been converted!" ); | 399 | 106M | c = cUC; | 400 | 106M | } | 401 | 884k | else if( 0 != nChars || 0 != nInfo ) | 402 | 884k | { | 403 | 884k | DBG_ASSERT( 0 == nChars, | 404 | 884k | "there is a converted character, but an error" ); | 405 | 884k | DBG_ASSERT( 0 != nInfo, | 406 | 884k | "there is no converted character and no error" ); | 407 | | // #73398#: If the character could not be converted, | 408 | | // because a conversion is not available, do no conversion at all. | 409 | 884k | c = reinterpret_cast<unsigned char&>( c1 ); | 410 | 884k | nChars = 1; | 411 | 884k | ++nConversionErrors; | 412 | 884k | } | 413 | 108M | } | 414 | 111M | } | 415 | 111M | } | 416 | 111M | while( 0 == nChars && !bErr ); | 417 | 111M | } | 418 | | | 419 | 112M | if ( ! rtl::isUnicodeScalarValue( c ) ) | 420 | 18.0k | c = '?' ; | 421 | | | 422 | 112M | if (bFuzzing && nConversionErrors > 128) | 423 | 2.38M | { | 424 | 2.38M | SAL_WARN("svtools", "SvParser::GetNextChar too many conversion errors while fuzzing, abandoning for performance"); | 425 | 2.38M | bErr = true; | 426 | 2.38M | } | 427 | | | 428 | 112M | if( bErr ) | 429 | 2.46M | { | 430 | 2.46M | if( ERRCODE_IO_PENDING == rInput.GetError() ) | 431 | 0 | { | 432 | 0 | eState = SvParserState::Pending; | 433 | 0 | return c; | 434 | 0 | } | 435 | 2.46M | else | 436 | 2.46M | return sal_Unicode(EOF); | 437 | 2.46M | } | 438 | | | 439 | 110M | if( c == '\n' ) | 440 | 1.78M | { | 441 | 1.78M | IncLineNr(); | 442 | 1.78M | SetLinePos( 1 ); | 443 | 1.78M | } | 444 | 108M | else | 445 | 108M | IncLinePos(); | 446 | | | 447 | 110M | return c; | 448 | 112M | } |
|
449 | | |
450 | | template<typename T> |
451 | | T SvParser<T>::GetNextToken() |
452 | 11.2M | { |
453 | 11.2M | T nRet = static_cast<T>(0); |
454 | | |
455 | 11.2M | if( !nTokenStackPos ) |
456 | 10.6M | { |
457 | 10.6M | aToken.setLength( 0 ); // empty token buffer |
458 | 10.6M | nTokenValue = -1; // marker for no value read |
459 | 10.6M | bTokenHasValue = false; |
460 | | |
461 | 10.6M | nRet = GetNextToken_(); |
462 | 10.6M | if( SvParserState::Pending == eState ) |
463 | 0 | return nRet; |
464 | 10.6M | } |
465 | | |
466 | 11.2M | ++pTokenStackPos; |
467 | 11.2M | if( pTokenStackPos == pTokenStack.get() + nTokenStackSize ) |
468 | 3.39M | pTokenStackPos = pTokenStack.get(); |
469 | | |
470 | | // pop from stack ?? |
471 | 11.2M | if( nTokenStackPos ) |
472 | 569k | { |
473 | 569k | --nTokenStackPos; |
474 | 569k | nTokenValue = pTokenStackPos->nTokenValue; |
475 | 569k | bTokenHasValue = pTokenStackPos->bTokenHasValue; |
476 | 569k | aToken = pTokenStackPos->sToken; |
477 | 569k | nRet = pTokenStackPos->nTokenId; |
478 | 569k | ++m_nTokenIndex; |
479 | 569k | } |
480 | | // no, now push actual value on stack |
481 | 10.6M | else if( SvParserState::Working == eState ) |
482 | 10.4M | { |
483 | 10.4M | pTokenStackPos->sToken = aToken; |
484 | 10.4M | pTokenStackPos->nTokenValue = nTokenValue; |
485 | 10.4M | pTokenStackPos->bTokenHasValue = bTokenHasValue; |
486 | 10.4M | pTokenStackPos->nTokenId = nRet; |
487 | 10.4M | ++m_nTokenIndex; |
488 | 10.4M | } |
489 | 254k | else if( SvParserState::Accepted != eState && SvParserState::Pending != eState ) |
490 | 7.59k | eState = SvParserState::Error; // an error occurred |
491 | | |
492 | 11.2M | return nRet; |
493 | 11.2M | } SvParser<int>::GetNextToken() Line | Count | Source | 452 | 2.51M | { | 453 | 2.51M | T nRet = static_cast<T>(0); | 454 | | | 455 | 2.51M | if( !nTokenStackPos ) | 456 | 2.22M | { | 457 | 2.22M | aToken.setLength( 0 ); // empty token buffer | 458 | 2.22M | nTokenValue = -1; // marker for no value read | 459 | 2.22M | bTokenHasValue = false; | 460 | | | 461 | 2.22M | nRet = GetNextToken_(); | 462 | 2.22M | if( SvParserState::Pending == eState ) | 463 | 0 | return nRet; | 464 | 2.22M | } | 465 | | | 466 | 2.51M | ++pTokenStackPos; | 467 | 2.51M | if( pTokenStackPos == pTokenStack.get() + nTokenStackSize ) | 468 | 499k | pTokenStackPos = pTokenStack.get(); | 469 | | | 470 | | // pop from stack ?? | 471 | 2.51M | if( nTokenStackPos ) | 472 | 289k | { | 473 | 289k | --nTokenStackPos; | 474 | 289k | nTokenValue = pTokenStackPos->nTokenValue; | 475 | 289k | bTokenHasValue = pTokenStackPos->bTokenHasValue; | 476 | 289k | aToken = pTokenStackPos->sToken; | 477 | 289k | nRet = pTokenStackPos->nTokenId; | 478 | 289k | ++m_nTokenIndex; | 479 | 289k | } | 480 | | // no, now push actual value on stack | 481 | 2.22M | else if( SvParserState::Working == eState ) | 482 | 2.21M | { | 483 | 2.21M | pTokenStackPos->sToken = aToken; | 484 | 2.21M | pTokenStackPos->nTokenValue = nTokenValue; | 485 | 2.21M | pTokenStackPos->bTokenHasValue = bTokenHasValue; | 486 | 2.21M | pTokenStackPos->nTokenId = nRet; | 487 | 2.21M | ++m_nTokenIndex; | 488 | 2.21M | } | 489 | 10.3k | else if( SvParserState::Accepted != eState && SvParserState::Pending != eState ) | 490 | 745 | eState = SvParserState::Error; // an error occurred | 491 | | | 492 | 2.51M | return nRet; | 493 | 2.51M | } |
SvParser<HtmlTokenId>::GetNextToken() Line | Count | Source | 452 | 8.72M | { | 453 | 8.72M | T nRet = static_cast<T>(0); | 454 | | | 455 | 8.72M | if( !nTokenStackPos ) | 456 | 8.44M | { | 457 | 8.44M | aToken.setLength( 0 ); // empty token buffer | 458 | 8.44M | nTokenValue = -1; // marker for no value read | 459 | 8.44M | bTokenHasValue = false; | 460 | | | 461 | 8.44M | nRet = GetNextToken_(); | 462 | 8.44M | if( SvParserState::Pending == eState ) | 463 | 0 | return nRet; | 464 | 8.44M | } | 465 | | | 466 | 8.72M | ++pTokenStackPos; | 467 | 8.72M | if( pTokenStackPos == pTokenStack.get() + nTokenStackSize ) | 468 | 2.89M | pTokenStackPos = pTokenStack.get(); | 469 | | | 470 | | // pop from stack ?? | 471 | 8.72M | if( nTokenStackPos ) | 472 | 280k | { | 473 | 280k | --nTokenStackPos; | 474 | 280k | nTokenValue = pTokenStackPos->nTokenValue; | 475 | 280k | bTokenHasValue = pTokenStackPos->bTokenHasValue; | 476 | 280k | aToken = pTokenStackPos->sToken; | 477 | 280k | nRet = pTokenStackPos->nTokenId; | 478 | 280k | ++m_nTokenIndex; | 479 | 280k | } | 480 | | // no, now push actual value on stack | 481 | 8.44M | else if( SvParserState::Working == eState ) | 482 | 8.19M | { | 483 | 8.19M | pTokenStackPos->sToken = aToken; | 484 | 8.19M | pTokenStackPos->nTokenValue = nTokenValue; | 485 | 8.19M | pTokenStackPos->bTokenHasValue = bTokenHasValue; | 486 | 8.19M | pTokenStackPos->nTokenId = nRet; | 487 | 8.19M | ++m_nTokenIndex; | 488 | 8.19M | } | 489 | 244k | else if( SvParserState::Accepted != eState && SvParserState::Pending != eState ) | 490 | 6.85k | eState = SvParserState::Error; // an error occurred | 491 | | | 492 | 8.72M | return nRet; | 493 | 8.72M | } |
|
494 | | |
495 | | template<typename T> |
496 | | T SvParser<T>::SkipToken( short nCnt ) // "skip" n Tokens backward |
497 | 554k | { |
498 | 554k | pTokenStackPos = GetStackPtr( nCnt ); |
499 | 554k | short nTmp = nTokenStackPos - nCnt; |
500 | 554k | if( nTmp < 0 ) |
501 | 1 | nTmp = 0; |
502 | 554k | else if( nTmp > nTokenStackSize ) |
503 | 0 | nTmp = nTokenStackSize; |
504 | 554k | nTokenStackPos = sal_uInt8(nTmp); |
505 | | |
506 | 554k | m_nTokenIndex -= nTmp; |
507 | | |
508 | | // restore values |
509 | 554k | aToken = pTokenStackPos->sToken; |
510 | 554k | nTokenValue = pTokenStackPos->nTokenValue; |
511 | 554k | bTokenHasValue = pTokenStackPos->bTokenHasValue; |
512 | | |
513 | 554k | return pTokenStackPos->nTokenId; |
514 | 554k | } SvParser<int>::SkipToken(short) Line | Count | Source | 497 | 273k | { | 498 | 273k | pTokenStackPos = GetStackPtr( nCnt ); | 499 | 273k | short nTmp = nTokenStackPos - nCnt; | 500 | 273k | if( nTmp < 0 ) | 501 | 1 | nTmp = 0; | 502 | 273k | else if( nTmp > nTokenStackSize ) | 503 | 0 | nTmp = nTokenStackSize; | 504 | 273k | nTokenStackPos = sal_uInt8(nTmp); | 505 | | | 506 | 273k | m_nTokenIndex -= nTmp; | 507 | | | 508 | | // restore values | 509 | 273k | aToken = pTokenStackPos->sToken; | 510 | 273k | nTokenValue = pTokenStackPos->nTokenValue; | 511 | 273k | bTokenHasValue = pTokenStackPos->bTokenHasValue; | 512 | | | 513 | 273k | return pTokenStackPos->nTokenId; | 514 | 273k | } |
SvParser<HtmlTokenId>::SkipToken(short) Line | Count | Source | 497 | 280k | { | 498 | 280k | pTokenStackPos = GetStackPtr( nCnt ); | 499 | 280k | short nTmp = nTokenStackPos - nCnt; | 500 | 280k | if( nTmp < 0 ) | 501 | 0 | nTmp = 0; | 502 | 280k | else if( nTmp > nTokenStackSize ) | 503 | 0 | nTmp = nTokenStackSize; | 504 | 280k | nTokenStackPos = sal_uInt8(nTmp); | 505 | | | 506 | 280k | m_nTokenIndex -= nTmp; | 507 | | | 508 | | // restore values | 509 | 280k | aToken = pTokenStackPos->sToken; | 510 | 280k | nTokenValue = pTokenStackPos->nTokenValue; | 511 | 280k | bTokenHasValue = pTokenStackPos->bTokenHasValue; | 512 | | | 513 | 280k | return pTokenStackPos->nTokenId; | 514 | 280k | } |
|
515 | | |
516 | | template<typename T> |
517 | | typename SvParser<T>::TokenStackType* SvParser<T>::GetStackPtr( short nCnt ) |
518 | 719k | { |
519 | 719k | sal_uInt8 nCurrentPos = sal_uInt8(pTokenStackPos - pTokenStack.get()); |
520 | 719k | if( nCnt > 0 ) |
521 | 209 | { |
522 | 209 | if( nCnt >= nTokenStackSize ) |
523 | 0 | nCnt = (nTokenStackSize-1); |
524 | 209 | if( nCurrentPos + nCnt < nTokenStackSize ) |
525 | 146 | nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt); |
526 | 63 | else |
527 | 63 | nCurrentPos = sal::static_int_cast< sal_uInt8 >( |
528 | 63 | nCurrentPos + (nCnt - nTokenStackSize)); |
529 | 209 | } |
530 | 719k | else if( nCnt < 0 ) |
531 | 719k | { |
532 | 719k | if( -nCnt >= nTokenStackSize ) |
533 | 0 | nCnt = -nTokenStackSize+1; |
534 | 719k | if( -nCnt <= nCurrentPos ) |
535 | 532k | nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt); |
536 | 186k | else |
537 | 186k | nCurrentPos = sal::static_int_cast< sal_uInt8 >( |
538 | 186k | nCurrentPos + (nCnt + nTokenStackSize)); |
539 | 719k | } |
540 | 719k | return pTokenStack.get() + nCurrentPos; |
541 | 719k | } SvParser<int>::GetStackPtr(short) Line | Count | Source | 518 | 439k | { | 519 | 439k | sal_uInt8 nCurrentPos = sal_uInt8(pTokenStackPos - pTokenStack.get()); | 520 | 439k | if( nCnt > 0 ) | 521 | 209 | { | 522 | 209 | if( nCnt >= nTokenStackSize ) | 523 | 0 | nCnt = (nTokenStackSize-1); | 524 | 209 | if( nCurrentPos + nCnt < nTokenStackSize ) | 525 | 146 | nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt); | 526 | 63 | else | 527 | 63 | nCurrentPos = sal::static_int_cast< sal_uInt8 >( | 528 | 63 | nCurrentPos + (nCnt - nTokenStackSize)); | 529 | 209 | } | 530 | 438k | else if( nCnt < 0 ) | 531 | 438k | { | 532 | 438k | if( -nCnt >= nTokenStackSize ) | 533 | 0 | nCnt = -nTokenStackSize+1; | 534 | 438k | if( -nCnt <= nCurrentPos ) | 535 | 346k | nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt); | 536 | 92.6k | else | 537 | 92.6k | nCurrentPos = sal::static_int_cast< sal_uInt8 >( | 538 | 92.6k | nCurrentPos + (nCnt + nTokenStackSize)); | 539 | 438k | } | 540 | 439k | return pTokenStack.get() + nCurrentPos; | 541 | 439k | } |
SvParser<HtmlTokenId>::GetStackPtr(short) Line | Count | Source | 518 | 280k | { | 519 | 280k | sal_uInt8 nCurrentPos = sal_uInt8(pTokenStackPos - pTokenStack.get()); | 520 | 280k | if( nCnt > 0 ) | 521 | 0 | { | 522 | 0 | if( nCnt >= nTokenStackSize ) | 523 | 0 | nCnt = (nTokenStackSize-1); | 524 | 0 | if( nCurrentPos + nCnt < nTokenStackSize ) | 525 | 0 | nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt); | 526 | 0 | else | 527 | 0 | nCurrentPos = sal::static_int_cast< sal_uInt8 >( | 528 | 0 | nCurrentPos + (nCnt - nTokenStackSize)); | 529 | 0 | } | 530 | 280k | else if( nCnt < 0 ) | 531 | 280k | { | 532 | 280k | if( -nCnt >= nTokenStackSize ) | 533 | 0 | nCnt = -nTokenStackSize+1; | 534 | 280k | if( -nCnt <= nCurrentPos ) | 535 | 186k | nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt); | 536 | 93.8k | else | 537 | 93.8k | nCurrentPos = sal::static_int_cast< sal_uInt8 >( | 538 | 93.8k | nCurrentPos + (nCnt + nTokenStackSize)); | 539 | 280k | } | 540 | 280k | return pTokenStack.get() + nCurrentPos; | 541 | 280k | } |
|
542 | | |
543 | | // to read asynchronous from SvStream |
544 | | |
545 | | template<typename T> |
546 | | T SvParser<T>::GetSaveToken() const |
547 | 0 | { |
548 | 0 | return pImplData ? pImplData->nSaveToken : static_cast<T>(0); |
549 | 0 | } Unexecuted instantiation: SvParser<int>::GetSaveToken() const Unexecuted instantiation: SvParser<HtmlTokenId>::GetSaveToken() const |
550 | | |
551 | | template<typename T> |
552 | | void SvParser<T>::SaveState( T nToken ) |
553 | 21.3M | { |
554 | | // save actual status |
555 | 21.3M | if( !pImplData ) |
556 | 0 | { |
557 | 0 | pImplData.reset(new SvParser_Impl<T>); |
558 | 0 | pImplData->nSaveToken = static_cast<T>(0); |
559 | 0 | } |
560 | | |
561 | 21.3M | pImplData->nFilePos = rInput.Tell(); |
562 | 21.3M | pImplData->nToken = nToken; |
563 | | |
564 | 21.3M | pImplData->aToken = aToken; |
565 | 21.3M | pImplData->nlLineNr = nlLineNr; |
566 | 21.3M | pImplData->nlLinePos = nlLinePos; |
567 | 21.3M | pImplData->nTokenValue= nTokenValue; |
568 | 21.3M | pImplData->bTokenHasValue = bTokenHasValue; |
569 | 21.3M | pImplData->nNextCh = nNextCh; |
570 | 21.3M | } SvParser<int>::SaveState(int) Line | Count | Source | 553 | 3.62M | { | 554 | | // save actual status | 555 | 3.62M | if( !pImplData ) | 556 | 0 | { | 557 | 0 | pImplData.reset(new SvParser_Impl<T>); | 558 | 0 | pImplData->nSaveToken = static_cast<T>(0); | 559 | 0 | } | 560 | | | 561 | 3.62M | pImplData->nFilePos = rInput.Tell(); | 562 | 3.62M | pImplData->nToken = nToken; | 563 | | | 564 | 3.62M | pImplData->aToken = aToken; | 565 | 3.62M | pImplData->nlLineNr = nlLineNr; | 566 | 3.62M | pImplData->nlLinePos = nlLinePos; | 567 | 3.62M | pImplData->nTokenValue= nTokenValue; | 568 | 3.62M | pImplData->bTokenHasValue = bTokenHasValue; | 569 | 3.62M | pImplData->nNextCh = nNextCh; | 570 | 3.62M | } |
SvParser<HtmlTokenId>::SaveState(HtmlTokenId) Line | Count | Source | 553 | 17.7M | { | 554 | | // save actual status | 555 | 17.7M | if( !pImplData ) | 556 | 0 | { | 557 | 0 | pImplData.reset(new SvParser_Impl<T>); | 558 | 0 | pImplData->nSaveToken = static_cast<T>(0); | 559 | 0 | } | 560 | | | 561 | 17.7M | pImplData->nFilePos = rInput.Tell(); | 562 | 17.7M | pImplData->nToken = nToken; | 563 | | | 564 | 17.7M | pImplData->aToken = aToken; | 565 | 17.7M | pImplData->nlLineNr = nlLineNr; | 566 | 17.7M | pImplData->nlLinePos = nlLinePos; | 567 | 17.7M | pImplData->nTokenValue= nTokenValue; | 568 | 17.7M | pImplData->bTokenHasValue = bTokenHasValue; | 569 | 17.7M | pImplData->nNextCh = nNextCh; | 570 | 17.7M | } |
|
571 | | |
572 | | template<typename T> |
573 | | void SvParser<T>::RestoreState() |
574 | 0 | { |
575 | | // restore old status |
576 | 0 | if( !pImplData ) |
577 | 0 | return; |
578 | | |
579 | 0 | if( ERRCODE_IO_PENDING == rInput.GetError() ) |
580 | 0 | rInput.ResetError(); |
581 | 0 | aToken = pImplData->aToken; |
582 | 0 | nlLineNr = pImplData->nlLineNr; |
583 | 0 | nlLinePos = pImplData->nlLinePos; |
584 | 0 | nTokenValue= pImplData->nTokenValue; |
585 | 0 | bTokenHasValue=pImplData->bTokenHasValue; |
586 | 0 | nNextCh = pImplData->nNextCh; |
587 | |
|
588 | 0 | pImplData->nSaveToken = pImplData->nToken; |
589 | |
|
590 | 0 | rInput.Seek( pImplData->nFilePos ); |
591 | 0 | } Unexecuted instantiation: SvParser<int>::RestoreState() Unexecuted instantiation: SvParser<HtmlTokenId>::RestoreState() |
592 | | |
593 | | template<typename T> |
594 | | void SvParser<T>::Continue( T ) |
595 | 0 | { |
596 | 0 | } Unexecuted instantiation: SvParser<int>::Continue(int) Unexecuted instantiation: SvParser<HtmlTokenId>::Continue(HtmlTokenId) |
597 | | |
598 | | |
599 | | // expanded out version of |
600 | | // IMPL_LINK_NOARG( SvParser, NewDataRead, LinkParamNone*, void ) |
601 | | // since it can't cope with template methods |
602 | | template<typename T> |
603 | 0 | void SvParser<T>::LinkStubNewDataRead(void * instance, LinkParamNone* data) { |
604 | 0 | return static_cast<SvParser<T> *>(instance)->NewDataRead(data); |
605 | 0 | } Unexecuted instantiation: SvParser<int>::LinkStubNewDataRead(void*, LinkParamNone*) Unexecuted instantiation: SvParser<HtmlTokenId>::LinkStubNewDataRead(void*, LinkParamNone*) |
606 | | template<typename T> |
607 | | void SvParser<T>::NewDataRead(SAL_UNUSED_PARAMETER LinkParamNone*) |
608 | 0 | { |
609 | 0 | switch( eState ) |
610 | 0 | { |
611 | 0 | case SvParserState::Pending: |
612 | 0 | eState = SvParserState::Working; |
613 | 0 | RestoreState(); |
614 | |
|
615 | 0 | Continue( pImplData->nToken ); |
616 | |
|
617 | 0 | if( ERRCODE_IO_PENDING == rInput.GetError() ) |
618 | 0 | rInput.ResetError(); |
619 | |
|
620 | 0 | if( SvParserState::Pending != eState ) |
621 | 0 | ReleaseRef(); // ready otherwise! |
622 | 0 | break; |
623 | | |
624 | 0 | case SvParserState::NotStarted: |
625 | 0 | case SvParserState::Working: |
626 | 0 | break; |
627 | | |
628 | 0 | default: |
629 | 0 | ReleaseRef(); // ready otherwise! |
630 | 0 | break; |
631 | 0 | } |
632 | 0 | } Unexecuted instantiation: SvParser<int>::NewDataRead(LinkParamNone*) Unexecuted instantiation: SvParser<HtmlTokenId>::NewDataRead(LinkParamNone*) |
633 | | |
634 | | template class SVT_DLLPUBLIC SvParser<int>; |
635 | | template class SVT_DLLPUBLIC SvParser<HtmlTokenId>; |
636 | | |
637 | | /*======================================================================== |
638 | | * |
639 | | * SvKeyValueIterator. |
640 | | * |
641 | | *======================================================================*/ |
642 | | |
643 | | typedef std::vector<SvKeyValue> SvKeyValueList_Impl; |
644 | | |
645 | | struct SvKeyValueIterator::Impl |
646 | | { |
647 | | SvKeyValueList_Impl maList; |
648 | | sal_uInt16 mnPos; |
649 | | |
650 | 60.0k | Impl() : mnPos(0) {} |
651 | | }; |
652 | | |
653 | 60.0k | SvKeyValueIterator::SvKeyValueIterator() : mpImpl(new Impl) {} |
654 | | |
655 | 60.0k | SvKeyValueIterator::~SvKeyValueIterator() = default; |
656 | | |
657 | | bool SvKeyValueIterator::GetFirst (SvKeyValue &rKeyVal) |
658 | 77.2k | { |
659 | 77.2k | mpImpl->mnPos = mpImpl->maList.size(); |
660 | 77.2k | return GetNext (rKeyVal); |
661 | 77.2k | } |
662 | | |
663 | | bool SvKeyValueIterator::GetNext (SvKeyValue &rKeyVal) |
664 | 131k | { |
665 | 131k | if (mpImpl->mnPos > 0) |
666 | 54.0k | { |
667 | 54.0k | rKeyVal = mpImpl->maList[--mpImpl->mnPos]; |
668 | 54.0k | return true; |
669 | 54.0k | } |
670 | 77.2k | else |
671 | 77.2k | { |
672 | | // Nothing to do. |
673 | 77.2k | return false; |
674 | 77.2k | } |
675 | 131k | } |
676 | | |
677 | | void SvKeyValueIterator::Append (const SvKeyValue &rKeyVal) |
678 | 34.1k | { |
679 | 34.1k | mpImpl->maList.push_back(rKeyVal); |
680 | 34.1k | } |
681 | | |
682 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |