Coverage Report

Created: 2025-07-07 10:01

/src/libreoffice/svtools/source/svrtf/svparser.cxx
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <svtools/svparser.hxx>
21
#include <svtools/htmltokn.h>
22
#include <tools/stream.hxx>
23
#include <tools/debug.hxx>
24
#include <rtl/textcvt.h>
25
#include <rtl/tencinfo.h>
26
#include <rtl/character.hxx>
27
#include <sal/log.hxx>
28
#include <unicode/ucsdet.h>
29
#include <comphelper/configuration.hxx>
30
31
#include <vector>
32
33
// structure to store the actual data
34
template<typename T>
35
struct SvParser_Impl
36
{
37
    OUString        aToken;             // parsed token
38
    sal_uInt64      nFilePos;           // actual position in stream
39
    sal_uInt32      nlLineNr;           // actual line number
40
    sal_uInt32      nlLinePos;          // actual column number
41
    tools::Long            nTokenValue;        // extra value (RTF)
42
    bool            bTokenHasValue;     // indicates whether nTokenValue is valid
43
    T               nToken;             // actual Token
44
    sal_uInt32      nNextCh;            // actual character
45
    T               nSaveToken;         // the token from Continue
46
47
    rtl_TextToUnicodeConverter hConv;
48
    rtl_TextToUnicodeContext   hContext;
49
50
    SvParser_Impl()
51
52.9k
        : nFilePos(0)
52
52.9k
        , nlLineNr(0)
53
52.9k
        , nlLinePos(0)
54
52.9k
        , nTokenValue(0)
55
52.9k
        , bTokenHasValue(false)
56
52.9k
        , nToken(static_cast<T>(0))
57
52.9k
        , nNextCh(0)
58
52.9k
        , nSaveToken(static_cast<T>(0))
59
52.9k
        , hConv( nullptr )
60
52.9k
        , hContext( reinterpret_cast<rtl_TextToUnicodeContext>(1) )
61
52.9k
    {
62
52.9k
    }
SvParser_Impl<int>::SvParser_Impl()
Line
Count
Source
51
10.4k
        : nFilePos(0)
52
10.4k
        , nlLineNr(0)
53
10.4k
        , nlLinePos(0)
54
10.4k
        , nTokenValue(0)
55
10.4k
        , bTokenHasValue(false)
56
10.4k
        , nToken(static_cast<T>(0))
57
10.4k
        , nNextCh(0)
58
10.4k
        , nSaveToken(static_cast<T>(0))
59
10.4k
        , hConv( nullptr )
60
10.4k
        , hContext( reinterpret_cast<rtl_TextToUnicodeContext>(1) )
61
10.4k
    {
62
10.4k
    }
SvParser_Impl<HtmlTokenId>::SvParser_Impl()
Line
Count
Source
51
42.5k
        : nFilePos(0)
52
42.5k
        , nlLineNr(0)
53
42.5k
        , nlLinePos(0)
54
42.5k
        , nTokenValue(0)
55
42.5k
        , bTokenHasValue(false)
56
42.5k
        , nToken(static_cast<T>(0))
57
42.5k
        , nNextCh(0)
58
42.5k
        , nSaveToken(static_cast<T>(0))
59
42.5k
        , hConv( nullptr )
60
42.5k
        , hContext( reinterpret_cast<rtl_TextToUnicodeContext>(1) )
61
42.5k
    {
62
42.5k
    }
63
64
};
65
66
67
template<typename T>
68
SvParser<T>::TokenStackType::TokenStackType()
69
179k
  : nTokenValue(0)
70
179k
    , bTokenHasValue(false)
71
179k
    , nTokenId(static_cast<T>(0))
72
179k
{
73
179k
}
SvParser<int>::TokenStackType::TokenStackType()
Line
Count
Source
69
52.2k
  : nTokenValue(0)
70
52.2k
    , bTokenHasValue(false)
71
52.2k
    , nTokenId(static_cast<T>(0))
72
52.2k
{
73
52.2k
}
SvParser<HtmlTokenId>::TokenStackType::TokenStackType()
Line
Count
Source
69
127k
  : nTokenValue(0)
70
127k
    , bTokenHasValue(false)
71
127k
    , nTokenId(static_cast<T>(0))
72
127k
{
73
127k
}
74
75
// Constructor
76
template<typename T>
77
SvParser<T>::SvParser( SvStream& rIn, sal_uInt8 nStackSize )
78
52.9k
    : rInput( rIn )
79
52.9k
    , nlLineNr( 1 )
80
52.9k
    , nlLinePos( 1 )
81
52.9k
    , nConversionErrors( 0 )
82
52.9k
    , pImplData( nullptr )
83
52.9k
    , m_nTokenIndex(0)
84
52.9k
    , nTokenValue( 0 )
85
52.9k
    , bTokenHasValue( false )
86
52.9k
    , bFuzzing(comphelper::IsFuzzing())
87
52.9k
    , eState( SvParserState::NotStarted )
88
52.9k
    , eSrcEnc( RTL_TEXTENCODING_DONTKNOW )
89
52.9k
    , nNextChPos(0)
90
52.9k
    , nNextCh(0)
91
52.9k
    , bSwitchToUCS2(false)
92
52.9k
    , bRTF_InTextRead(false)
93
52.9k
    , nTokenStackSize( nStackSize )
94
52.9k
    , nTokenStackPos( 0 )
95
52.9k
{
96
52.9k
    eState = SvParserState::NotStarted;
97
52.9k
    if( nTokenStackSize < 3 )
98
0
        nTokenStackSize = 3;
99
52.9k
    pTokenStack.reset(new TokenStackType[ nTokenStackSize ]);
100
52.9k
    pTokenStackPos = pTokenStack.get();
101
52.9k
}
SvParser<int>::SvParser(SvStream&, unsigned char)
Line
Count
Source
78
10.4k
    : rInput( rIn )
79
10.4k
    , nlLineNr( 1 )
80
10.4k
    , nlLinePos( 1 )
81
10.4k
    , nConversionErrors( 0 )
82
10.4k
    , pImplData( nullptr )
83
10.4k
    , m_nTokenIndex(0)
84
10.4k
    , nTokenValue( 0 )
85
10.4k
    , bTokenHasValue( false )
86
10.4k
    , bFuzzing(comphelper::IsFuzzing())
87
10.4k
    , eState( SvParserState::NotStarted )
88
10.4k
    , eSrcEnc( RTL_TEXTENCODING_DONTKNOW )
89
10.4k
    , nNextChPos(0)
90
10.4k
    , nNextCh(0)
91
10.4k
    , bSwitchToUCS2(false)
92
10.4k
    , bRTF_InTextRead(false)
93
10.4k
    , nTokenStackSize( nStackSize )
94
10.4k
    , nTokenStackPos( 0 )
95
10.4k
{
96
10.4k
    eState = SvParserState::NotStarted;
97
10.4k
    if( nTokenStackSize < 3 )
98
0
        nTokenStackSize = 3;
99
10.4k
    pTokenStack.reset(new TokenStackType[ nTokenStackSize ]);
100
10.4k
    pTokenStackPos = pTokenStack.get();
101
10.4k
}
SvParser<HtmlTokenId>::SvParser(SvStream&, unsigned char)
Line
Count
Source
78
42.5k
    : rInput( rIn )
79
42.5k
    , nlLineNr( 1 )
80
42.5k
    , nlLinePos( 1 )
81
42.5k
    , nConversionErrors( 0 )
82
42.5k
    , pImplData( nullptr )
83
42.5k
    , m_nTokenIndex(0)
84
42.5k
    , nTokenValue( 0 )
85
42.5k
    , bTokenHasValue( false )
86
42.5k
    , bFuzzing(comphelper::IsFuzzing())
87
42.5k
    , eState( SvParserState::NotStarted )
88
42.5k
    , eSrcEnc( RTL_TEXTENCODING_DONTKNOW )
89
42.5k
    , nNextChPos(0)
90
42.5k
    , nNextCh(0)
91
42.5k
    , bSwitchToUCS2(false)
92
42.5k
    , bRTF_InTextRead(false)
93
42.5k
    , nTokenStackSize( nStackSize )
94
42.5k
    , nTokenStackPos( 0 )
95
42.5k
{
96
42.5k
    eState = SvParserState::NotStarted;
97
42.5k
    if( nTokenStackSize < 3 )
98
0
        nTokenStackSize = 3;
99
42.5k
    pTokenStack.reset(new TokenStackType[ nTokenStackSize ]);
100
42.5k
    pTokenStackPos = pTokenStack.get();
101
42.5k
}
102
103
template<typename T>
104
SvParser<T>::~SvParser()
105
52.9k
{
106
52.9k
    if( pImplData && pImplData->hConv )
107
52.2k
    {
108
52.2k
        rtl_destroyTextToUnicodeContext( pImplData->hConv,
109
52.2k
                                         pImplData->hContext );
110
52.2k
        rtl_destroyTextToUnicodeConverter( pImplData->hConv );
111
52.2k
    }
112
113
52.9k
    pTokenStack.reset();
114
52.9k
}
SvParser<int>::~SvParser()
Line
Count
Source
105
10.4k
{
106
10.4k
    if( pImplData && pImplData->hConv )
107
10.1k
    {
108
10.1k
        rtl_destroyTextToUnicodeContext( pImplData->hConv,
109
10.1k
                                         pImplData->hContext );
110
10.1k
        rtl_destroyTextToUnicodeConverter( pImplData->hConv );
111
10.1k
    }
112
113
10.4k
    pTokenStack.reset();
114
10.4k
}
SvParser<HtmlTokenId>::~SvParser()
Line
Count
Source
105
42.5k
{
106
42.5k
    if( pImplData && pImplData->hConv )
107
42.1k
    {
108
42.1k
        rtl_destroyTextToUnicodeContext( pImplData->hConv,
109
42.1k
                                         pImplData->hContext );
110
42.1k
        rtl_destroyTextToUnicodeConverter( pImplData->hConv );
111
42.1k
    }
112
113
42.5k
    pTokenStack.reset();
114
42.5k
}
115
116
1.06M
template<typename T> SvParserState SvParser<T>::GetStatus() const { return eState; }
SvParser<int>::GetStatus() const
Line
Count
Source
116
9.84k
template<typename T> SvParserState SvParser<T>::GetStatus() const { return eState; }
SvParser<HtmlTokenId>::GetStatus() const
Line
Count
Source
116
1.06M
template<typename T> SvParserState SvParser<T>::GetStatus() const { return eState; }
117
4.48M
template<typename T> sal_uInt32 SvParser<T>::GetLineNr() const       { return nlLineNr; }
Unexecuted instantiation: SvParser<int>::GetLineNr() const
SvParser<HtmlTokenId>::GetLineNr() const
Line
Count
Source
117
4.48M
template<typename T> sal_uInt32 SvParser<T>::GetLineNr() const       { return nlLineNr; }
118
4.74M
template<typename T> sal_uInt32 SvParser<T>::GetLinePos() const      { return nlLinePos; }
Unexecuted instantiation: SvParser<int>::GetLinePos() const
SvParser<HtmlTokenId>::GetLinePos() const
Line
Count
Source
118
4.74M
template<typename T> sal_uInt32 SvParser<T>::GetLinePos() const      { return nlLinePos; }
119
2.37M
template<typename T> void       SvParser<T>::IncLineNr()             { ++nlLineNr; }
SvParser<int>::IncLineNr()
Line
Count
Source
119
588k
template<typename T> void       SvParser<T>::IncLineNr()             { ++nlLineNr; }
SvParser<HtmlTokenId>::IncLineNr()
Line
Count
Source
119
1.78M
template<typename T> void       SvParser<T>::IncLineNr()             { ++nlLineNr; }
120
119M
template<typename T> sal_uInt32 SvParser<T>::IncLinePos()            { return ++nlLinePos; }
SvParser<int>::IncLinePos()
Line
Count
Source
120
10.9M
template<typename T> sal_uInt32 SvParser<T>::IncLinePos()            { return ++nlLinePos; }
SvParser<HtmlTokenId>::IncLinePos()
Line
Count
Source
120
108M
template<typename T> sal_uInt32 SvParser<T>::IncLinePos()            { return ++nlLinePos; }
121
47.3k
template<typename T> void       SvParser<T>::SetLineNr( sal_uInt32 nlNum ) { nlLineNr = nlNum; }
Unexecuted instantiation: SvParser<int>::SetLineNr(unsigned int)
SvParser<HtmlTokenId>::SetLineNr(unsigned int)
Line
Count
Source
121
47.3k
template<typename T> void       SvParser<T>::SetLineNr( sal_uInt32 nlNum ) { nlLineNr = nlNum; }
122
2.41M
template<typename T> void       SvParser<T>::SetLinePos( sal_uInt32 nlPos ) {   nlLinePos = nlPos; }
SvParser<int>::SetLinePos(unsigned int)
Line
Count
Source
122
588k
template<typename T> void       SvParser<T>::SetLinePos( sal_uInt32 nlPos ) {   nlLinePos = nlPos; }
SvParser<HtmlTokenId>::SetLinePos(unsigned int)
Line
Count
Source
122
1.82M
template<typename T> void       SvParser<T>::SetLinePos( sal_uInt32 nlPos ) {   nlLinePos = nlPos; }
123
163M
template<typename T> bool       SvParser<T>::IsParserWorking() const { return SvParserState::Working == eState; }
SvParser<int>::IsParserWorking() const
Line
Count
Source
123
7.99M
template<typename T> bool       SvParser<T>::IsParserWorking() const { return SvParserState::Working == eState; }
SvParser<HtmlTokenId>::IsParserWorking() const
Line
Count
Source
123
155M
template<typename T> bool       SvParser<T>::IsParserWorking() const { return SvParserState::Working == eState; }
124
194k
template<typename T> rtl_TextEncoding SvParser<T>::GetSrcEncoding() const { return eSrcEnc; }
SvParser<int>::GetSrcEncoding() const
Line
Count
Source
124
188k
template<typename T> rtl_TextEncoding SvParser<T>::GetSrcEncoding() const { return eSrcEnc; }
SvParser<HtmlTokenId>::GetSrcEncoding() const
Line
Count
Source
124
6.16k
template<typename T> rtl_TextEncoding SvParser<T>::GetSrcEncoding() const { return eSrcEnc; }
125
42.5k
template<typename T> void       SvParser<T>::SetSwitchToUCS2( bool bSet ) { bSwitchToUCS2 = bSet; }
Unexecuted instantiation: SvParser<int>::SetSwitchToUCS2(bool)
SvParser<HtmlTokenId>::SetSwitchToUCS2(bool)
Line
Count
Source
125
42.5k
template<typename T> void       SvParser<T>::SetSwitchToUCS2( bool bSet ) { bSwitchToUCS2 = bSet; }
126
0
template<typename T> bool       SvParser<T>::IsSwitchToUCS2() const { return bSwitchToUCS2; }
Unexecuted instantiation: SvParser<int>::IsSwitchToUCS2() const
Unexecuted instantiation: SvParser<HtmlTokenId>::IsSwitchToUCS2() const
127
1.38k
template<typename T> sal_uInt16 SvParser<T>::GetCharSize() const { return (RTL_TEXTENCODING_UCS2 == eSrcEnc) ? 2 : 1; }
Unexecuted instantiation: SvParser<int>::GetCharSize() const
SvParser<HtmlTokenId>::GetCharSize() const
Line
Count
Source
127
1.38k
template<typename T> sal_uInt16 SvParser<T>::GetCharSize() const { return (RTL_TEXTENCODING_UCS2 == eSrcEnc) ? 2 : 1; }
128
template<typename T> Link<LinkParamNone*,void> SvParser<T>::GetAsynchCallLink() const
129
0
{
130
0
    return LINK( const_cast<SvParser*>(this), SvParser, NewDataRead );
131
0
}
Unexecuted instantiation: SvParser<int>::GetAsynchCallLink() const
Unexecuted instantiation: SvParser<HtmlTokenId>::GetAsynchCallLink() const
132
133
template<typename T>
134
void SvParser<T>::ClearTxtConvContext()
135
94.9k
{
136
94.9k
    if( pImplData && pImplData->hConv )
137
86.4k
        rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext );
138
94.9k
}
Unexecuted instantiation: SvParser<int>::ClearTxtConvContext()
SvParser<HtmlTokenId>::ClearTxtConvContext()
Line
Count
Source
135
94.9k
{
136
94.9k
    if( pImplData && pImplData->hConv )
137
86.4k
        rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext );
138
94.9k
}
139
140
template<typename T>
141
void SvParser<T>::SetSrcEncoding( rtl_TextEncoding eEnc )
142
257k
{
143
257k
    if( eEnc == eSrcEnc )
144
136k
        return;
145
146
120k
    if( pImplData && pImplData->hConv )
147
66.0k
    {
148
66.0k
        rtl_destroyTextToUnicodeContext( pImplData->hConv,
149
66.0k
                                         pImplData->hContext );
150
66.0k
        rtl_destroyTextToUnicodeConverter( pImplData->hConv );
151
66.0k
        pImplData->hConv = nullptr;
152
66.0k
        pImplData->hContext = reinterpret_cast<rtl_TextToUnicodeContext>(1);
153
66.0k
    }
154
155
120k
    if( rtl_isOctetTextEncoding(eEnc) ||
156
120k
        RTL_TEXTENCODING_UCS2 == eEnc  )
157
119k
    {
158
119k
        eSrcEnc = eEnc;
159
119k
        if( !pImplData )
160
52.9k
            pImplData.reset(new SvParser_Impl<T>);
161
119k
        pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc );
162
119k
        DBG_ASSERT( pImplData->hConv,
163
119k
                    "SvParser::SetSrcEncoding: no converter for source encoding" );
164
119k
        if( !pImplData->hConv )
165
1.19k
            eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
166
118k
        else
167
118k
            pImplData->hContext =
168
118k
                rtl_createTextToUnicodeContext( pImplData->hConv );
169
119k
    }
170
1.18k
    else
171
1.18k
    {
172
1.18k
        SAL_WARN( "svtools",
173
1.18k
                    "SvParser::SetSrcEncoding: invalid source encoding" );
174
1.18k
        eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
175
1.18k
    }
176
120k
}
SvParser<int>::SetSrcEncoding(unsigned short)
Line
Count
Source
142
152k
{
143
152k
    if( eEnc == eSrcEnc )
144
128k
        return;
145
146
24.4k
    if( pImplData && pImplData->hConv )
147
12.6k
    {
148
12.6k
        rtl_destroyTextToUnicodeContext( pImplData->hConv,
149
12.6k
                                         pImplData->hContext );
150
12.6k
        rtl_destroyTextToUnicodeConverter( pImplData->hConv );
151
12.6k
        pImplData->hConv = nullptr;
152
12.6k
        pImplData->hContext = reinterpret_cast<rtl_TextToUnicodeContext>(1);
153
12.6k
    }
154
155
24.4k
    if( rtl_isOctetTextEncoding(eEnc) ||
156
24.4k
        RTL_TEXTENCODING_UCS2 == eEnc  )
157
23.2k
    {
158
23.2k
        eSrcEnc = eEnc;
159
23.2k
        if( !pImplData )
160
10.4k
            pImplData.reset(new SvParser_Impl<T>);
161
23.2k
        pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc );
162
23.2k
        DBG_ASSERT( pImplData->hConv,
163
23.2k
                    "SvParser::SetSrcEncoding: no converter for source encoding" );
164
23.2k
        if( !pImplData->hConv )
165
386
            eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
166
22.8k
        else
167
22.8k
            pImplData->hContext =
168
22.8k
                rtl_createTextToUnicodeContext( pImplData->hConv );
169
23.2k
    }
170
1.18k
    else
171
1.18k
    {
172
1.18k
        SAL_WARN( "svtools",
173
1.18k
                    "SvParser::SetSrcEncoding: invalid source encoding" );
174
1.18k
        eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
175
1.18k
    }
176
24.4k
}
SvParser<HtmlTokenId>::SetSrcEncoding(unsigned short)
Line
Count
Source
142
104k
{
143
104k
    if( eEnc == eSrcEnc )
144
8.19k
        return;
145
146
96.2k
    if( pImplData && pImplData->hConv )
147
53.3k
    {
148
53.3k
        rtl_destroyTextToUnicodeContext( pImplData->hConv,
149
53.3k
                                         pImplData->hContext );
150
53.3k
        rtl_destroyTextToUnicodeConverter( pImplData->hConv );
151
53.3k
        pImplData->hConv = nullptr;
152
53.3k
        pImplData->hContext = reinterpret_cast<rtl_TextToUnicodeContext>(1);
153
53.3k
    }
154
155
96.2k
    if( rtl_isOctetTextEncoding(eEnc) ||
156
96.2k
        RTL_TEXTENCODING_UCS2 == eEnc  )
157
96.2k
    {
158
96.2k
        eSrcEnc = eEnc;
159
96.2k
        if( !pImplData )
160
42.5k
            pImplData.reset(new SvParser_Impl<T>);
161
96.2k
        pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc );
162
96.2k
        DBG_ASSERT( pImplData->hConv,
163
96.2k
                    "SvParser::SetSrcEncoding: no converter for source encoding" );
164
96.2k
        if( !pImplData->hConv )
165
808
            eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
166
95.4k
        else
167
95.4k
            pImplData->hContext =
168
95.4k
                rtl_createTextToUnicodeContext( pImplData->hConv );
169
96.2k
    }
170
0
    else
171
0
    {
172
0
        SAL_WARN( "svtools",
173
0
                    "SvParser::SetSrcEncoding: invalid source encoding" );
174
0
        eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
175
0
    }
176
96.2k
}
177
178
template<typename T>
179
void SvParser<T>::RereadLookahead()
180
38.7k
{
181
38.7k
    rInput.Seek(nNextChPos);
182
38.7k
    nNextCh = GetNextChar();
183
38.7k
}
SvParser<int>::RereadLookahead()
Line
Count
Source
180
38.7k
{
181
38.7k
    rInput.Seek(nNextChPos);
182
38.7k
    nNextCh = GetNextChar();
183
38.7k
}
Unexecuted instantiation: SvParser<HtmlTokenId>::RereadLookahead()
184
185
template<typename T>
186
sal_uInt32 SvParser<T>::GetNextChar()
187
124M
{
188
124M
    sal_uInt32 c = 0U;
189
190
    // When reading multiple bytes, we don't have to care about the file
191
    // position when we run into the pending state. The file position is
192
    // maintained by SaveState/RestoreState.
193
124M
    if( bSwitchToUCS2 && 0 == rInput.Tell() )
194
42.5k
    {
195
42.5k
        rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW);
196
42.5k
        if (rInput.good())
197
42.5k
        {
198
42.5k
            sal_uInt64 nPos = rInput.Tell();
199
42.5k
            if (nPos == 2)
200
89
                eSrcEnc = RTL_TEXTENCODING_UCS2;
201
42.4k
            else if (nPos == 3)
202
86
                SetSrcEncoding(RTL_TEXTENCODING_UTF8);
203
42.3k
            else // Try to detect encoding without BOM
204
42.3k
            {
205
42.3k
                std::vector<char> buf(65535); // Arbitrarily chosen 64KiB buffer
206
42.3k
                const size_t nSize = rInput.ReadBytes(buf.data(), buf.size());
207
42.3k
                rInput.Seek(0);
208
42.3k
                if (nSize > 0)
209
42.3k
                {
210
42.3k
                    UErrorCode uerr = U_ZERO_ERROR;
211
42.3k
                    UCharsetDetector* ucd = ucsdet_open(&uerr);
212
42.3k
                    ucsdet_setText(ucd, buf.data(), nSize, &uerr);
213
42.3k
                    if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr))
214
42.1k
                    {
215
42.1k
                        const char* pEncodingName = ucsdet_getName(match, &uerr);
216
217
42.1k
                        if (U_SUCCESS(uerr))
218
42.1k
                        {
219
42.1k
                            if (strcmp("UTF-8", pEncodingName) == 0)
220
3.85k
                            {
221
3.85k
                                SetSrcEncoding(RTL_TEXTENCODING_UTF8);
222
3.85k
                            }
223
38.3k
                            else if (strcmp("UTF-16LE", pEncodingName) == 0)
224
614
                            {
225
614
                                eSrcEnc = RTL_TEXTENCODING_UCS2;
226
614
                                rInput.SetEndian(SvStreamEndian::LITTLE);
227
614
                            }
228
37.7k
                            else if (strcmp("UTF-16BE", pEncodingName) == 0)
229
532
                            {
230
532
                                eSrcEnc = RTL_TEXTENCODING_UCS2;
231
532
                                rInput.SetEndian(SvStreamEndian::BIG);
232
532
                            }
233
42.1k
                        }
234
42.1k
                    }
235
236
42.3k
                    ucsdet_close(ucd);
237
42.3k
                }
238
42.3k
            }
239
42.5k
        }
240
42.5k
        bSwitchToUCS2 = false;
241
42.5k
    }
242
243
124M
    bool bErr;
244
124M
    nNextChPos = rInput.Tell();
245
246
124M
    if( RTL_TEXTENCODING_UCS2 == eSrcEnc )
247
1.14M
    {
248
1.14M
        sal_Unicode cUC;
249
1.14M
        rInput.ReadUtf16(cUC);
250
1.14M
        bErr = !rInput.good();
251
1.14M
        if( !bErr )
252
1.14M
        {
253
1.14M
            c = cUC;
254
1.14M
            if (rtl::isHighSurrogate(cUC))
255
12.9k
            {
256
12.9k
                const sal_uInt64 nPos = rInput.Tell();
257
12.9k
                rInput.ReadUtf16(cUC);
258
12.9k
                if (rtl::isLowSurrogate(cUC)) // can only be true when ReadUtf16 succeeded
259
525
                    c = rtl::combineSurrogates(c, cUC);
260
12.4k
                else
261
12.4k
                    rInput.Seek(nPos); // process lone high surrogate
262
12.9k
            }
263
1.14M
        }
264
1.14M
    }
265
123M
    else
266
123M
    {
267
123M
        sal_Size nChars = 0;
268
123M
        do
269
123M
        {
270
123M
            char c1;    // signed, that's the text converter expects
271
123M
            rInput.ReadChar( c1 );
272
123M
            bErr = !rInput.good();
273
123M
            if( !bErr )
274
123M
            {
275
123M
                if (
276
123M
                     RTL_TEXTENCODING_DONTKNOW == eSrcEnc ||
277
123M
                     RTL_TEXTENCODING_SYMBOL == eSrcEnc
278
123M
                   )
279
3.78M
                {
280
                    // no conversion shall take place
281
3.78M
                    c = reinterpret_cast<unsigned char&>( c1 );
282
3.78M
                    nChars = 1;
283
3.78M
                }
284
119M
                else
285
119M
                {
286
119M
                    assert(pImplData && pImplData->hConv && "no text converter!");
287
288
119M
                    sal_Unicode cUC;
289
119M
                    sal_uInt32 nInfo = 0;
290
119M
                    sal_Size nCvtBytes;
291
119M
                    nChars = rtl_convertTextToUnicode(
292
119M
                                pImplData->hConv, pImplData->hContext,
293
119M
                                &c1, 1, &cUC, 1,
294
119M
                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
295
119M
                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
296
119M
                                RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
297
119M
                                &nInfo, &nCvtBytes);
298
119M
                    if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 )
299
668k
                    {
300
                        // The conversion wasn't successful because we haven't
301
                        // read enough characters.
302
668k
                        if( pImplData->hContext != reinterpret_cast<rtl_TextToUnicodeContext>(1) )
303
663k
                        {
304
663k
                            sal_Unicode sCh[2];
305
1.56M
                            while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 )
306
898k
                            {
307
898k
                                rInput.ReadChar( c1 );
308
898k
                                bErr = !rInput.good();
309
898k
                                if( bErr )
310
1.46k
                                    break;
311
312
897k
                                nChars = rtl_convertTextToUnicode(
313
897k
                                            pImplData->hConv, pImplData->hContext,
314
897k
                                            &c1, 1, sCh , 2,
315
897k
                                            RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
316
897k
                                            RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
317
897k
                                            RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
318
897k
                                            &nInfo, &nCvtBytes);
319
897k
                            }
320
663k
                            if( !bErr )
321
661k
                            {
322
661k
                                if( 1 == nChars && 0 == nInfo )
323
274k
                                {
324
274k
                                    c = sal_uInt32( sCh[0] );
325
274k
                                }
326
386k
                                else if( 2 == nChars && 0 == nInfo )
327
59.4k
                                {
328
59.4k
                                    c = rtl::combineSurrogates( sCh[0], sCh[1] );
329
59.4k
                                }
330
327k
                                else if( 0 != nChars || 0 != nInfo )
331
327k
                                {
332
327k
                                    DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0,
333
327k
                                        "source buffer is too small" );
334
327k
                                    DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0,
335
327k
                                         "there is a conversion error" );
336
327k
                                    DBG_ASSERT( 0 == nChars,
337
327k
                                       "there is a converted character, but an error" );
338
                                    // There are still errors, but nothing we can
339
                                    // do
340
327k
                                    c = '?';
341
327k
                                    nChars = 1;
342
327k
                                    ++nConversionErrors;
343
327k
                                }
344
661k
                            }
345
663k
                        }
346
5.09k
                        else
347
5.09k
                        {
348
5.09k
                            char sBuffer[10];
349
5.09k
                            sBuffer[0] = c1;
350
5.09k
                            sal_uInt16 nLen = 1;
351
10.1k
                            while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 &&
352
10.1k
                                    nLen < 10 )
353
5.09k
                            {
354
5.09k
                                rInput.ReadChar( c1 );
355
5.09k
                                bErr = !rInput.good();
356
5.09k
                                if( bErr )
357
18
                                    break;
358
359
5.07k
                                sBuffer[nLen++] = c1;
360
5.07k
                                nChars = rtl_convertTextToUnicode(
361
5.07k
                                            pImplData->hConv, nullptr, sBuffer, nLen, &cUC, 1,
362
5.07k
                                            RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
363
5.07k
                                            RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
364
5.07k
                                            RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
365
5.07k
                                            &nInfo, &nCvtBytes);
366
5.07k
                            }
367
5.09k
                            if( !bErr )
368
5.07k
                            {
369
5.07k
                                if( 1 == nChars && 0 == nInfo )
370
3.10k
                                {
371
3.10k
                                    DBG_ASSERT( nCvtBytes == nLen,
372
3.10k
                                                "no all bytes have been converted!" );
373
3.10k
                                    c = cUC;
374
3.10k
                                }
375
1.97k
                                else
376
1.97k
                                {
377
1.97k
                                    DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0,
378
1.97k
                                        "source buffer is too small" );
379
1.97k
                                    DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0,
380
1.97k
                                         "there is a conversion error" );
381
1.97k
                                    DBG_ASSERT( 0 == nChars,
382
1.97k
                                       "there is a converted character, but an error" );
383
384
                                    // There are still errors, so we use the first
385
                                    // character and restart after that.
386
1.97k
                                    c = reinterpret_cast<unsigned char&>( sBuffer[0] );
387
1.97k
                                    rInput.SeekRel( -(nLen-1) );
388
1.97k
                                    nChars = 1;
389
1.97k
                                    ++nConversionErrors;
390
1.97k
                                }
391
5.07k
                            }
392
5.09k
                        }
393
668k
                    }
394
118M
                    else if( 1 == nChars && 0 == nInfo )
395
117M
                    {
396
                        // The conversion was successful
397
117M
                        DBG_ASSERT( nCvtBytes == 1,
398
117M
                                    "no all bytes have been converted!" );
399
117M
                        c = cUC;
400
117M
                    }
401
915k
                    else if( 0 != nChars || 0 != nInfo )
402
915k
                    {
403
915k
                        DBG_ASSERT( 0 == nChars,
404
915k
                                "there is a converted character, but an error" );
405
915k
                        DBG_ASSERT( 0 != nInfo,
406
915k
                                "there is no converted character and no error" );
407
                        // #73398#: If the character could not be converted,
408
                        // because a conversion is not available, do no conversion at all.
409
915k
                        c = reinterpret_cast<unsigned char&>( c1 );
410
915k
                        nChars = 1;
411
915k
                        ++nConversionErrors;
412
915k
                    }
413
119M
                }
414
123M
            }
415
123M
        }
416
123M
        while( 0 == nChars  && !bErr );
417
123M
    }
418
419
124M
    if ( ! rtl::isUnicodeScalarValue( c ) )
420
18.0k
        c = '?' ;
421
422
124M
    if (bFuzzing && nConversionErrors > 128)
423
2.38M
    {
424
2.38M
        SAL_WARN("svtools", "SvParser::GetNextChar too many conversion errors while fuzzing, abandoning for performance");
425
2.38M
        bErr = true;
426
2.38M
    }
427
428
124M
    if( bErr )
429
2.48M
    {
430
2.48M
        if( ERRCODE_IO_PENDING == rInput.GetError() )
431
0
        {
432
0
            eState = SvParserState::Pending;
433
0
            return c;
434
0
        }
435
2.48M
        else
436
2.48M
            return sal_Unicode(EOF);
437
2.48M
    }
438
439
121M
    if( c == '\n' )
440
2.37M
    {
441
2.37M
        IncLineNr();
442
2.37M
        SetLinePos( 1 );
443
2.37M
    }
444
119M
    else
445
119M
        IncLinePos();
446
447
121M
    return c;
448
124M
}
SvParser<int>::GetNextChar()
Line
Count
Source
187
11.5M
{
188
11.5M
    sal_uInt32 c = 0U;
189
190
    // When reading multiple bytes, we don't have to care about the file
191
    // position when we run into the pending state. The file position is
192
    // maintained by SaveState/RestoreState.
193
11.5M
    if( bSwitchToUCS2 && 0 == rInput.Tell() )
194
0
    {
195
0
        rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW);
196
0
        if (rInput.good())
197
0
        {
198
0
            sal_uInt64 nPos = rInput.Tell();
199
0
            if (nPos == 2)
200
0
                eSrcEnc = RTL_TEXTENCODING_UCS2;
201
0
            else if (nPos == 3)
202
0
                SetSrcEncoding(RTL_TEXTENCODING_UTF8);
203
0
            else // Try to detect encoding without BOM
204
0
            {
205
0
                std::vector<char> buf(65535); // Arbitrarily chosen 64KiB buffer
206
0
                const size_t nSize = rInput.ReadBytes(buf.data(), buf.size());
207
0
                rInput.Seek(0);
208
0
                if (nSize > 0)
209
0
                {
210
0
                    UErrorCode uerr = U_ZERO_ERROR;
211
0
                    UCharsetDetector* ucd = ucsdet_open(&uerr);
212
0
                    ucsdet_setText(ucd, buf.data(), nSize, &uerr);
213
0
                    if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr))
214
0
                    {
215
0
                        const char* pEncodingName = ucsdet_getName(match, &uerr);
216
217
0
                        if (U_SUCCESS(uerr))
218
0
                        {
219
0
                            if (strcmp("UTF-8", pEncodingName) == 0)
220
0
                            {
221
0
                                SetSrcEncoding(RTL_TEXTENCODING_UTF8);
222
0
                            }
223
0
                            else if (strcmp("UTF-16LE", pEncodingName) == 0)
224
0
                            {
225
0
                                eSrcEnc = RTL_TEXTENCODING_UCS2;
226
0
                                rInput.SetEndian(SvStreamEndian::LITTLE);
227
0
                            }
228
0
                            else if (strcmp("UTF-16BE", pEncodingName) == 0)
229
0
                            {
230
0
                                eSrcEnc = RTL_TEXTENCODING_UCS2;
231
0
                                rInput.SetEndian(SvStreamEndian::BIG);
232
0
                            }
233
0
                        }
234
0
                    }
235
236
0
                    ucsdet_close(ucd);
237
0
                }
238
0
            }
239
0
        }
240
0
        bSwitchToUCS2 = false;
241
0
    }
242
243
11.5M
    bool bErr;
244
11.5M
    nNextChPos = rInput.Tell();
245
246
11.5M
    if( RTL_TEXTENCODING_UCS2 == eSrcEnc )
247
0
    {
248
0
        sal_Unicode cUC;
249
0
        rInput.ReadUtf16(cUC);
250
0
        bErr = !rInput.good();
251
0
        if( !bErr )
252
0
        {
253
0
            c = cUC;
254
0
            if (rtl::isHighSurrogate(cUC))
255
0
            {
256
0
                const sal_uInt64 nPos = rInput.Tell();
257
0
                rInput.ReadUtf16(cUC);
258
0
                if (rtl::isLowSurrogate(cUC)) // can only be true when ReadUtf16 succeeded
259
0
                    c = rtl::combineSurrogates(c, cUC);
260
0
                else
261
0
                    rInput.Seek(nPos); // process lone high surrogate
262
0
            }
263
0
        }
264
0
    }
265
11.5M
    else
266
11.5M
    {
267
11.5M
        sal_Size nChars = 0;
268
11.5M
        do
269
11.5M
        {
270
11.5M
            char c1;    // signed, that's the text converter expects
271
11.5M
            rInput.ReadChar( c1 );
272
11.5M
            bErr = !rInput.good();
273
11.5M
            if( !bErr )
274
11.5M
            {
275
11.5M
                if (
276
11.5M
                     RTL_TEXTENCODING_DONTKNOW == eSrcEnc ||
277
11.5M
                     RTL_TEXTENCODING_SYMBOL == eSrcEnc
278
11.5M
                   )
279
227k
                {
280
                    // no conversion shall take place
281
227k
                    c = reinterpret_cast<unsigned char&>( c1 );
282
227k
                    nChars = 1;
283
227k
                }
284
11.3M
                else
285
11.3M
                {
286
11.3M
                    assert(pImplData && pImplData->hConv && "no text converter!");
287
288
11.3M
                    sal_Unicode cUC;
289
11.3M
                    sal_uInt32 nInfo = 0;
290
11.3M
                    sal_Size nCvtBytes;
291
11.3M
                    nChars = rtl_convertTextToUnicode(
292
11.3M
                                pImplData->hConv, pImplData->hContext,
293
11.3M
                                &c1, 1, &cUC, 1,
294
11.3M
                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
295
11.3M
                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
296
11.3M
                                RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
297
11.3M
                                &nInfo, &nCvtBytes);
298
11.3M
                    if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 )
299
11.0k
                    {
300
                        // The conversion wasn't successful because we haven't
301
                        // read enough characters.
302
11.0k
                        if( pImplData->hContext != reinterpret_cast<rtl_TextToUnicodeContext>(1) )
303
5.96k
                        {
304
5.96k
                            sal_Unicode sCh[2];
305
15.9k
                            while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 )
306
10.0k
                            {
307
10.0k
                                rInput.ReadChar( c1 );
308
10.0k
                                bErr = !rInput.good();
309
10.0k
                                if( bErr )
310
38
                                    break;
311
312
10.0k
                                nChars = rtl_convertTextToUnicode(
313
10.0k
                                            pImplData->hConv, pImplData->hContext,
314
10.0k
                                            &c1, 1, sCh , 2,
315
10.0k
                                            RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
316
10.0k
                                            RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
317
10.0k
                                            RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
318
10.0k
                                            &nInfo, &nCvtBytes);
319
10.0k
                            }
320
5.96k
                            if( !bErr )
321
5.92k
                            {
322
5.92k
                                if( 1 == nChars && 0 == nInfo )
323
1.72k
                                {
324
1.72k
                                    c = sal_uInt32( sCh[0] );
325
1.72k
                                }
326
4.19k
                                else if( 2 == nChars && 0 == nInfo )
327
1.22k
                                {
328
1.22k
                                    c = rtl::combineSurrogates( sCh[0], sCh[1] );
329
1.22k
                                }
330
2.97k
                                else if( 0 != nChars || 0 != nInfo )
331
2.97k
                                {
332
2.97k
                                    DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0,
333
2.97k
                                        "source buffer is too small" );
334
2.97k
                                    DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0,
335
2.97k
                                         "there is a conversion error" );
336
2.97k
                                    DBG_ASSERT( 0 == nChars,
337
2.97k
                                       "there is a converted character, but an error" );
338
                                    // There are still errors, but nothing we can
339
                                    // do
340
2.97k
                                    c = '?';
341
2.97k
                                    nChars = 1;
342
2.97k
                                    ++nConversionErrors;
343
2.97k
                                }
344
5.92k
                            }
345
5.96k
                        }
346
5.09k
                        else
347
5.09k
                        {
348
5.09k
                            char sBuffer[10];
349
5.09k
                            sBuffer[0] = c1;
350
5.09k
                            sal_uInt16 nLen = 1;
351
10.1k
                            while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 &&
352
10.1k
                                    nLen < 10 )
353
5.09k
                            {
354
5.09k
                                rInput.ReadChar( c1 );
355
5.09k
                                bErr = !rInput.good();
356
5.09k
                                if( bErr )
357
18
                                    break;
358
359
5.07k
                                sBuffer[nLen++] = c1;
360
5.07k
                                nChars = rtl_convertTextToUnicode(
361
5.07k
                                            pImplData->hConv, nullptr, sBuffer, nLen, &cUC, 1,
362
5.07k
                                            RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
363
5.07k
                                            RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
364
5.07k
                                            RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
365
5.07k
                                            &nInfo, &nCvtBytes);
366
5.07k
                            }
367
5.09k
                            if( !bErr )
368
5.07k
                            {
369
5.07k
                                if( 1 == nChars && 0 == nInfo )
370
3.10k
                                {
371
3.10k
                                    DBG_ASSERT( nCvtBytes == nLen,
372
3.10k
                                                "no all bytes have been converted!" );
373
3.10k
                                    c = cUC;
374
3.10k
                                }
375
1.97k
                                else
376
1.97k
                                {
377
1.97k
                                    DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0,
378
1.97k
                                        "source buffer is too small" );
379
1.97k
                                    DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0,
380
1.97k
                                         "there is a conversion error" );
381
1.97k
                                    DBG_ASSERT( 0 == nChars,
382
1.97k
                                       "there is a converted character, but an error" );
383
384
                                    // There are still errors, so we use the first
385
                                    // character and restart after that.
386
1.97k
                                    c = reinterpret_cast<unsigned char&>( sBuffer[0] );
387
1.97k
                                    rInput.SeekRel( -(nLen-1) );
388
1.97k
                                    nChars = 1;
389
1.97k
                                    ++nConversionErrors;
390
1.97k
                                }
391
5.07k
                            }
392
5.09k
                        }
393
11.0k
                    }
394
11.3M
                    else if( 1 == nChars && 0 == nInfo )
395
11.2M
                    {
396
                        // The conversion was successful
397
11.2M
                        DBG_ASSERT( nCvtBytes == 1,
398
11.2M
                                    "no all bytes have been converted!" );
399
11.2M
                        c = cUC;
400
11.2M
                    }
401
30.9k
                    else if( 0 != nChars || 0 != nInfo )
402
30.9k
                    {
403
30.9k
                        DBG_ASSERT( 0 == nChars,
404
30.9k
                                "there is a converted character, but an error" );
405
30.9k
                        DBG_ASSERT( 0 != nInfo,
406
30.9k
                                "there is no converted character and no error" );
407
                        // #73398#: If the character could not be converted,
408
                        // because a conversion is not available, do no conversion at all.
409
30.9k
                        c = reinterpret_cast<unsigned char&>( c1 );
410
30.9k
                        nChars = 1;
411
30.9k
                        ++nConversionErrors;
412
30.9k
                    }
413
11.3M
                }
414
11.5M
            }
415
11.5M
        }
416
11.5M
        while( 0 == nChars  && !bErr );
417
11.5M
    }
418
419
11.5M
    if ( ! rtl::isUnicodeScalarValue( c ) )
420
0
        c = '?' ;
421
422
11.5M
    if (bFuzzing && nConversionErrors > 128)
423
227
    {
424
227
        SAL_WARN("svtools", "SvParser::GetNextChar too many conversion errors while fuzzing, abandoning for performance");
425
227
        bErr = true;
426
227
    }
427
428
11.5M
    if( bErr )
429
25.9k
    {
430
25.9k
        if( ERRCODE_IO_PENDING == rInput.GetError() )
431
0
        {
432
0
            eState = SvParserState::Pending;
433
0
            return c;
434
0
        }
435
25.9k
        else
436
25.9k
            return sal_Unicode(EOF);
437
25.9k
    }
438
439
11.5M
    if( c == '\n' )
440
588k
    {
441
588k
        IncLineNr();
442
588k
        SetLinePos( 1 );
443
588k
    }
444
10.9M
    else
445
10.9M
        IncLinePos();
446
447
11.5M
    return c;
448
11.5M
}
SvParser<HtmlTokenId>::GetNextChar()
Line
Count
Source
187
112M
{
188
112M
    sal_uInt32 c = 0U;
189
190
    // When reading multiple bytes, we don't have to care about the file
191
    // position when we run into the pending state. The file position is
192
    // maintained by SaveState/RestoreState.
193
112M
    if( bSwitchToUCS2 && 0 == rInput.Tell() )
194
42.5k
    {
195
42.5k
        rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW);
196
42.5k
        if (rInput.good())
197
42.5k
        {
198
42.5k
            sal_uInt64 nPos = rInput.Tell();
199
42.5k
            if (nPos == 2)
200
89
                eSrcEnc = RTL_TEXTENCODING_UCS2;
201
42.4k
            else if (nPos == 3)
202
86
                SetSrcEncoding(RTL_TEXTENCODING_UTF8);
203
42.3k
            else // Try to detect encoding without BOM
204
42.3k
            {
205
42.3k
                std::vector<char> buf(65535); // Arbitrarily chosen 64KiB buffer
206
42.3k
                const size_t nSize = rInput.ReadBytes(buf.data(), buf.size());
207
42.3k
                rInput.Seek(0);
208
42.3k
                if (nSize > 0)
209
42.3k
                {
210
42.3k
                    UErrorCode uerr = U_ZERO_ERROR;
211
42.3k
                    UCharsetDetector* ucd = ucsdet_open(&uerr);
212
42.3k
                    ucsdet_setText(ucd, buf.data(), nSize, &uerr);
213
42.3k
                    if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr))
214
42.1k
                    {
215
42.1k
                        const char* pEncodingName = ucsdet_getName(match, &uerr);
216
217
42.1k
                        if (U_SUCCESS(uerr))
218
42.1k
                        {
219
42.1k
                            if (strcmp("UTF-8", pEncodingName) == 0)
220
3.85k
                            {
221
3.85k
                                SetSrcEncoding(RTL_TEXTENCODING_UTF8);
222
3.85k
                            }
223
38.3k
                            else if (strcmp("UTF-16LE", pEncodingName) == 0)
224
614
                            {
225
614
                                eSrcEnc = RTL_TEXTENCODING_UCS2;
226
614
                                rInput.SetEndian(SvStreamEndian::LITTLE);
227
614
                            }
228
37.7k
                            else if (strcmp("UTF-16BE", pEncodingName) == 0)
229
532
                            {
230
532
                                eSrcEnc = RTL_TEXTENCODING_UCS2;
231
532
                                rInput.SetEndian(SvStreamEndian::BIG);
232
532
                            }
233
42.1k
                        }
234
42.1k
                    }
235
236
42.3k
                    ucsdet_close(ucd);
237
42.3k
                }
238
42.3k
            }
239
42.5k
        }
240
42.5k
        bSwitchToUCS2 = false;
241
42.5k
    }
242
243
112M
    bool bErr;
244
112M
    nNextChPos = rInput.Tell();
245
246
112M
    if( RTL_TEXTENCODING_UCS2 == eSrcEnc )
247
1.14M
    {
248
1.14M
        sal_Unicode cUC;
249
1.14M
        rInput.ReadUtf16(cUC);
250
1.14M
        bErr = !rInput.good();
251
1.14M
        if( !bErr )
252
1.14M
        {
253
1.14M
            c = cUC;
254
1.14M
            if (rtl::isHighSurrogate(cUC))
255
12.9k
            {
256
12.9k
                const sal_uInt64 nPos = rInput.Tell();
257
12.9k
                rInput.ReadUtf16(cUC);
258
12.9k
                if (rtl::isLowSurrogate(cUC)) // can only be true when ReadUtf16 succeeded
259
525
                    c = rtl::combineSurrogates(c, cUC);
260
12.4k
                else
261
12.4k
                    rInput.Seek(nPos); // process lone high surrogate
262
12.9k
            }
263
1.14M
        }
264
1.14M
    }
265
111M
    else
266
111M
    {
267
111M
        sal_Size nChars = 0;
268
111M
        do
269
111M
        {
270
111M
            char c1;    // signed, that's the text converter expects
271
111M
            rInput.ReadChar( c1 );
272
111M
            bErr = !rInput.good();
273
111M
            if( !bErr )
274
111M
            {
275
111M
                if (
276
111M
                     RTL_TEXTENCODING_DONTKNOW == eSrcEnc ||
277
111M
                     RTL_TEXTENCODING_SYMBOL == eSrcEnc
278
111M
                   )
279
3.55M
                {
280
                    // no conversion shall take place
281
3.55M
                    c = reinterpret_cast<unsigned char&>( c1 );
282
3.55M
                    nChars = 1;
283
3.55M
                }
284
108M
                else
285
108M
                {
286
108M
                    assert(pImplData && pImplData->hConv && "no text converter!");
287
288
108M
                    sal_Unicode cUC;
289
108M
                    sal_uInt32 nInfo = 0;
290
108M
                    sal_Size nCvtBytes;
291
108M
                    nChars = rtl_convertTextToUnicode(
292
108M
                                pImplData->hConv, pImplData->hContext,
293
108M
                                &c1, 1, &cUC, 1,
294
108M
                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
295
108M
                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
296
108M
                                RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
297
108M
                                &nInfo, &nCvtBytes);
298
108M
                    if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 )
299
657k
                    {
300
                        // The conversion wasn't successful because we haven't
301
                        // read enough characters.
302
657k
                        if( pImplData->hContext != reinterpret_cast<rtl_TextToUnicodeContext>(1) )
303
657k
                        {
304
657k
                            sal_Unicode sCh[2];
305
1.54M
                            while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 )
306
888k
                            {
307
888k
                                rInput.ReadChar( c1 );
308
888k
                                bErr = !rInput.good();
309
888k
                                if( bErr )
310
1.43k
                                    break;
311
312
887k
                                nChars = rtl_convertTextToUnicode(
313
887k
                                            pImplData->hConv, pImplData->hContext,
314
887k
                                            &c1, 1, sCh , 2,
315
887k
                                            RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
316
887k
                                            RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
317
887k
                                            RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
318
887k
                                            &nInfo, &nCvtBytes);
319
887k
                            }
320
657k
                            if( !bErr )
321
655k
                            {
322
655k
                                if( 1 == nChars && 0 == nInfo )
323
273k
                                {
324
273k
                                    c = sal_uInt32( sCh[0] );
325
273k
                                }
326
382k
                                else if( 2 == nChars && 0 == nInfo )
327
58.2k
                                {
328
58.2k
                                    c = rtl::combineSurrogates( sCh[0], sCh[1] );
329
58.2k
                                }
330
324k
                                else if( 0 != nChars || 0 != nInfo )
331
324k
                                {
332
324k
                                    DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0,
333
324k
                                        "source buffer is too small" );
334
324k
                                    DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0,
335
324k
                                         "there is a conversion error" );
336
324k
                                    DBG_ASSERT( 0 == nChars,
337
324k
                                       "there is a converted character, but an error" );
338
                                    // There are still errors, but nothing we can
339
                                    // do
340
324k
                                    c = '?';
341
324k
                                    nChars = 1;
342
324k
                                    ++nConversionErrors;
343
324k
                                }
344
655k
                            }
345
657k
                        }
346
0
                        else
347
0
                        {
348
0
                            char sBuffer[10];
349
0
                            sBuffer[0] = c1;
350
0
                            sal_uInt16 nLen = 1;
351
0
                            while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) != 0 &&
352
0
                                    nLen < 10 )
353
0
                            {
354
0
                                rInput.ReadChar( c1 );
355
0
                                bErr = !rInput.good();
356
0
                                if( bErr )
357
0
                                    break;
358
359
0
                                sBuffer[nLen++] = c1;
360
0
                                nChars = rtl_convertTextToUnicode(
361
0
                                            pImplData->hConv, nullptr, sBuffer, nLen, &cUC, 1,
362
0
                                            RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
363
0
                                            RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
364
0
                                            RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
365
0
                                            &nInfo, &nCvtBytes);
366
0
                            }
367
0
                            if( !bErr )
368
0
                            {
369
0
                                if( 1 == nChars && 0 == nInfo )
370
0
                                {
371
0
                                    DBG_ASSERT( nCvtBytes == nLen,
372
0
                                                "no all bytes have been converted!" );
373
0
                                    c = cUC;
374
0
                                }
375
0
                                else
376
0
                                {
377
0
                                    DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL) == 0,
378
0
                                        "source buffer is too small" );
379
0
                                    DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL)) == 0,
380
0
                                         "there is a conversion error" );
381
0
                                    DBG_ASSERT( 0 == nChars,
382
0
                                       "there is a converted character, but an error" );
383
384
                                    // There are still errors, so we use the first
385
                                    // character and restart after that.
386
0
                                    c = reinterpret_cast<unsigned char&>( sBuffer[0] );
387
0
                                    rInput.SeekRel( -(nLen-1) );
388
0
                                    nChars = 1;
389
0
                                    ++nConversionErrors;
390
0
                                }
391
0
                            }
392
0
                        }
393
657k
                    }
394
107M
                    else if( 1 == nChars && 0 == nInfo )
395
106M
                    {
396
                        // The conversion was successful
397
106M
                        DBG_ASSERT( nCvtBytes == 1,
398
106M
                                    "no all bytes have been converted!" );
399
106M
                        c = cUC;
400
106M
                    }
401
884k
                    else if( 0 != nChars || 0 != nInfo )
402
884k
                    {
403
884k
                        DBG_ASSERT( 0 == nChars,
404
884k
                                "there is a converted character, but an error" );
405
884k
                        DBG_ASSERT( 0 != nInfo,
406
884k
                                "there is no converted character and no error" );
407
                        // #73398#: If the character could not be converted,
408
                        // because a conversion is not available, do no conversion at all.
409
884k
                        c = reinterpret_cast<unsigned char&>( c1 );
410
884k
                        nChars = 1;
411
884k
                        ++nConversionErrors;
412
884k
                    }
413
108M
                }
414
111M
            }
415
111M
        }
416
111M
        while( 0 == nChars  && !bErr );
417
111M
    }
418
419
112M
    if ( ! rtl::isUnicodeScalarValue( c ) )
420
18.0k
        c = '?' ;
421
422
112M
    if (bFuzzing && nConversionErrors > 128)
423
2.38M
    {
424
2.38M
        SAL_WARN("svtools", "SvParser::GetNextChar too many conversion errors while fuzzing, abandoning for performance");
425
2.38M
        bErr = true;
426
2.38M
    }
427
428
112M
    if( bErr )
429
2.46M
    {
430
2.46M
        if( ERRCODE_IO_PENDING == rInput.GetError() )
431
0
        {
432
0
            eState = SvParserState::Pending;
433
0
            return c;
434
0
        }
435
2.46M
        else
436
2.46M
            return sal_Unicode(EOF);
437
2.46M
    }
438
439
110M
    if( c == '\n' )
440
1.78M
    {
441
1.78M
        IncLineNr();
442
1.78M
        SetLinePos( 1 );
443
1.78M
    }
444
108M
    else
445
108M
        IncLinePos();
446
447
110M
    return c;
448
112M
}
449
450
template<typename T>
451
T SvParser<T>::GetNextToken()
452
11.2M
{
453
11.2M
    T nRet = static_cast<T>(0);
454
455
11.2M
    if( !nTokenStackPos )
456
10.6M
    {
457
10.6M
        aToken.setLength( 0 );     // empty token buffer
458
10.6M
        nTokenValue = -1;   // marker for no value read
459
10.6M
        bTokenHasValue = false;
460
461
10.6M
        nRet = GetNextToken_();
462
10.6M
        if( SvParserState::Pending == eState )
463
0
            return nRet;
464
10.6M
    }
465
466
11.2M
    ++pTokenStackPos;
467
11.2M
    if( pTokenStackPos == pTokenStack.get() + nTokenStackSize )
468
3.39M
        pTokenStackPos = pTokenStack.get();
469
470
    // pop from stack ??
471
11.2M
    if( nTokenStackPos )
472
569k
    {
473
569k
        --nTokenStackPos;
474
569k
        nTokenValue = pTokenStackPos->nTokenValue;
475
569k
        bTokenHasValue = pTokenStackPos->bTokenHasValue;
476
569k
        aToken = pTokenStackPos->sToken;
477
569k
        nRet = pTokenStackPos->nTokenId;
478
569k
        ++m_nTokenIndex;
479
569k
    }
480
    // no, now push actual value on stack
481
10.6M
    else if( SvParserState::Working == eState )
482
10.4M
    {
483
10.4M
        pTokenStackPos->sToken = aToken;
484
10.4M
        pTokenStackPos->nTokenValue = nTokenValue;
485
10.4M
        pTokenStackPos->bTokenHasValue = bTokenHasValue;
486
10.4M
        pTokenStackPos->nTokenId = nRet;
487
10.4M
        ++m_nTokenIndex;
488
10.4M
    }
489
254k
    else if( SvParserState::Accepted != eState && SvParserState::Pending != eState )
490
7.59k
        eState = SvParserState::Error;       // an error occurred
491
492
11.2M
    return nRet;
493
11.2M
}
SvParser<int>::GetNextToken()
Line
Count
Source
452
2.51M
{
453
2.51M
    T nRet = static_cast<T>(0);
454
455
2.51M
    if( !nTokenStackPos )
456
2.22M
    {
457
2.22M
        aToken.setLength( 0 );     // empty token buffer
458
2.22M
        nTokenValue = -1;   // marker for no value read
459
2.22M
        bTokenHasValue = false;
460
461
2.22M
        nRet = GetNextToken_();
462
2.22M
        if( SvParserState::Pending == eState )
463
0
            return nRet;
464
2.22M
    }
465
466
2.51M
    ++pTokenStackPos;
467
2.51M
    if( pTokenStackPos == pTokenStack.get() + nTokenStackSize )
468
499k
        pTokenStackPos = pTokenStack.get();
469
470
    // pop from stack ??
471
2.51M
    if( nTokenStackPos )
472
289k
    {
473
289k
        --nTokenStackPos;
474
289k
        nTokenValue = pTokenStackPos->nTokenValue;
475
289k
        bTokenHasValue = pTokenStackPos->bTokenHasValue;
476
289k
        aToken = pTokenStackPos->sToken;
477
289k
        nRet = pTokenStackPos->nTokenId;
478
289k
        ++m_nTokenIndex;
479
289k
    }
480
    // no, now push actual value on stack
481
2.22M
    else if( SvParserState::Working == eState )
482
2.21M
    {
483
2.21M
        pTokenStackPos->sToken = aToken;
484
2.21M
        pTokenStackPos->nTokenValue = nTokenValue;
485
2.21M
        pTokenStackPos->bTokenHasValue = bTokenHasValue;
486
2.21M
        pTokenStackPos->nTokenId = nRet;
487
2.21M
        ++m_nTokenIndex;
488
2.21M
    }
489
10.3k
    else if( SvParserState::Accepted != eState && SvParserState::Pending != eState )
490
745
        eState = SvParserState::Error;       // an error occurred
491
492
2.51M
    return nRet;
493
2.51M
}
SvParser<HtmlTokenId>::GetNextToken()
Line
Count
Source
452
8.72M
{
453
8.72M
    T nRet = static_cast<T>(0);
454
455
8.72M
    if( !nTokenStackPos )
456
8.44M
    {
457
8.44M
        aToken.setLength( 0 );     // empty token buffer
458
8.44M
        nTokenValue = -1;   // marker for no value read
459
8.44M
        bTokenHasValue = false;
460
461
8.44M
        nRet = GetNextToken_();
462
8.44M
        if( SvParserState::Pending == eState )
463
0
            return nRet;
464
8.44M
    }
465
466
8.72M
    ++pTokenStackPos;
467
8.72M
    if( pTokenStackPos == pTokenStack.get() + nTokenStackSize )
468
2.89M
        pTokenStackPos = pTokenStack.get();
469
470
    // pop from stack ??
471
8.72M
    if( nTokenStackPos )
472
280k
    {
473
280k
        --nTokenStackPos;
474
280k
        nTokenValue = pTokenStackPos->nTokenValue;
475
280k
        bTokenHasValue = pTokenStackPos->bTokenHasValue;
476
280k
        aToken = pTokenStackPos->sToken;
477
280k
        nRet = pTokenStackPos->nTokenId;
478
280k
        ++m_nTokenIndex;
479
280k
    }
480
    // no, now push actual value on stack
481
8.44M
    else if( SvParserState::Working == eState )
482
8.19M
    {
483
8.19M
        pTokenStackPos->sToken = aToken;
484
8.19M
        pTokenStackPos->nTokenValue = nTokenValue;
485
8.19M
        pTokenStackPos->bTokenHasValue = bTokenHasValue;
486
8.19M
        pTokenStackPos->nTokenId = nRet;
487
8.19M
        ++m_nTokenIndex;
488
8.19M
    }
489
244k
    else if( SvParserState::Accepted != eState && SvParserState::Pending != eState )
490
6.85k
        eState = SvParserState::Error;       // an error occurred
491
492
8.72M
    return nRet;
493
8.72M
}
494
495
template<typename T>
496
T SvParser<T>::SkipToken( short nCnt )       // "skip" n Tokens backward
497
554k
{
498
554k
    pTokenStackPos = GetStackPtr( nCnt );
499
554k
    short nTmp = nTokenStackPos - nCnt;
500
554k
    if( nTmp < 0 )
501
1
        nTmp = 0;
502
554k
    else if( nTmp > nTokenStackSize )
503
0
        nTmp = nTokenStackSize;
504
554k
    nTokenStackPos = sal_uInt8(nTmp);
505
506
554k
    m_nTokenIndex -= nTmp;
507
508
    // restore values
509
554k
    aToken = pTokenStackPos->sToken;
510
554k
    nTokenValue = pTokenStackPos->nTokenValue;
511
554k
    bTokenHasValue = pTokenStackPos->bTokenHasValue;
512
513
554k
    return pTokenStackPos->nTokenId;
514
554k
}
SvParser<int>::SkipToken(short)
Line
Count
Source
497
273k
{
498
273k
    pTokenStackPos = GetStackPtr( nCnt );
499
273k
    short nTmp = nTokenStackPos - nCnt;
500
273k
    if( nTmp < 0 )
501
1
        nTmp = 0;
502
273k
    else if( nTmp > nTokenStackSize )
503
0
        nTmp = nTokenStackSize;
504
273k
    nTokenStackPos = sal_uInt8(nTmp);
505
506
273k
    m_nTokenIndex -= nTmp;
507
508
    // restore values
509
273k
    aToken = pTokenStackPos->sToken;
510
273k
    nTokenValue = pTokenStackPos->nTokenValue;
511
273k
    bTokenHasValue = pTokenStackPos->bTokenHasValue;
512
513
273k
    return pTokenStackPos->nTokenId;
514
273k
}
SvParser<HtmlTokenId>::SkipToken(short)
Line
Count
Source
497
280k
{
498
280k
    pTokenStackPos = GetStackPtr( nCnt );
499
280k
    short nTmp = nTokenStackPos - nCnt;
500
280k
    if( nTmp < 0 )
501
0
        nTmp = 0;
502
280k
    else if( nTmp > nTokenStackSize )
503
0
        nTmp = nTokenStackSize;
504
280k
    nTokenStackPos = sal_uInt8(nTmp);
505
506
280k
    m_nTokenIndex -= nTmp;
507
508
    // restore values
509
280k
    aToken = pTokenStackPos->sToken;
510
280k
    nTokenValue = pTokenStackPos->nTokenValue;
511
280k
    bTokenHasValue = pTokenStackPos->bTokenHasValue;
512
513
280k
    return pTokenStackPos->nTokenId;
514
280k
}
515
516
template<typename T>
517
typename SvParser<T>::TokenStackType* SvParser<T>::GetStackPtr( short nCnt )
518
719k
{
519
719k
    sal_uInt8 nCurrentPos = sal_uInt8(pTokenStackPos - pTokenStack.get());
520
719k
    if( nCnt > 0 )
521
209
    {
522
209
        if( nCnt >= nTokenStackSize )
523
0
            nCnt = (nTokenStackSize-1);
524
209
        if( nCurrentPos + nCnt < nTokenStackSize )
525
146
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt);
526
63
        else
527
63
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(
528
63
                nCurrentPos + (nCnt - nTokenStackSize));
529
209
    }
530
719k
    else if( nCnt < 0 )
531
719k
    {
532
719k
        if( -nCnt >= nTokenStackSize )
533
0
            nCnt = -nTokenStackSize+1;
534
719k
        if( -nCnt <= nCurrentPos )
535
532k
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt);
536
186k
        else
537
186k
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(
538
186k
                nCurrentPos + (nCnt + nTokenStackSize));
539
719k
    }
540
719k
    return pTokenStack.get() + nCurrentPos;
541
719k
}
SvParser<int>::GetStackPtr(short)
Line
Count
Source
518
439k
{
519
439k
    sal_uInt8 nCurrentPos = sal_uInt8(pTokenStackPos - pTokenStack.get());
520
439k
    if( nCnt > 0 )
521
209
    {
522
209
        if( nCnt >= nTokenStackSize )
523
0
            nCnt = (nTokenStackSize-1);
524
209
        if( nCurrentPos + nCnt < nTokenStackSize )
525
146
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt);
526
63
        else
527
63
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(
528
63
                nCurrentPos + (nCnt - nTokenStackSize));
529
209
    }
530
438k
    else if( nCnt < 0 )
531
438k
    {
532
438k
        if( -nCnt >= nTokenStackSize )
533
0
            nCnt = -nTokenStackSize+1;
534
438k
        if( -nCnt <= nCurrentPos )
535
346k
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt);
536
92.6k
        else
537
92.6k
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(
538
92.6k
                nCurrentPos + (nCnt + nTokenStackSize));
539
438k
    }
540
439k
    return pTokenStack.get() + nCurrentPos;
541
439k
}
SvParser<HtmlTokenId>::GetStackPtr(short)
Line
Count
Source
518
280k
{
519
280k
    sal_uInt8 nCurrentPos = sal_uInt8(pTokenStackPos - pTokenStack.get());
520
280k
    if( nCnt > 0 )
521
0
    {
522
0
        if( nCnt >= nTokenStackSize )
523
0
            nCnt = (nTokenStackSize-1);
524
0
        if( nCurrentPos + nCnt < nTokenStackSize )
525
0
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt);
526
0
        else
527
0
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(
528
0
                nCurrentPos + (nCnt - nTokenStackSize));
529
0
    }
530
280k
    else if( nCnt < 0 )
531
280k
    {
532
280k
        if( -nCnt >= nTokenStackSize )
533
0
            nCnt = -nTokenStackSize+1;
534
280k
        if( -nCnt <= nCurrentPos )
535
186k
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(nCurrentPos + nCnt);
536
93.8k
        else
537
93.8k
            nCurrentPos = sal::static_int_cast< sal_uInt8 >(
538
93.8k
                nCurrentPos + (nCnt + nTokenStackSize));
539
280k
    }
540
280k
    return pTokenStack.get() + nCurrentPos;
541
280k
}
542
543
// to read asynchronous from SvStream
544
545
template<typename T>
546
T SvParser<T>::GetSaveToken() const
547
0
{
548
0
    return pImplData ? pImplData->nSaveToken : static_cast<T>(0);
549
0
}
Unexecuted instantiation: SvParser<int>::GetSaveToken() const
Unexecuted instantiation: SvParser<HtmlTokenId>::GetSaveToken() const
550
551
template<typename T>
552
void SvParser<T>::SaveState( T nToken )
553
21.3M
{
554
    // save actual status
555
21.3M
    if( !pImplData )
556
0
    {
557
0
        pImplData.reset(new SvParser_Impl<T>);
558
0
        pImplData->nSaveToken = static_cast<T>(0);
559
0
    }
560
561
21.3M
    pImplData->nFilePos = rInput.Tell();
562
21.3M
    pImplData->nToken = nToken;
563
564
21.3M
    pImplData->aToken = aToken;
565
21.3M
    pImplData->nlLineNr = nlLineNr;
566
21.3M
    pImplData->nlLinePos = nlLinePos;
567
21.3M
    pImplData->nTokenValue= nTokenValue;
568
21.3M
    pImplData->bTokenHasValue = bTokenHasValue;
569
21.3M
    pImplData->nNextCh = nNextCh;
570
21.3M
}
SvParser<int>::SaveState(int)
Line
Count
Source
553
3.62M
{
554
    // save actual status
555
3.62M
    if( !pImplData )
556
0
    {
557
0
        pImplData.reset(new SvParser_Impl<T>);
558
0
        pImplData->nSaveToken = static_cast<T>(0);
559
0
    }
560
561
3.62M
    pImplData->nFilePos = rInput.Tell();
562
3.62M
    pImplData->nToken = nToken;
563
564
3.62M
    pImplData->aToken = aToken;
565
3.62M
    pImplData->nlLineNr = nlLineNr;
566
3.62M
    pImplData->nlLinePos = nlLinePos;
567
3.62M
    pImplData->nTokenValue= nTokenValue;
568
3.62M
    pImplData->bTokenHasValue = bTokenHasValue;
569
3.62M
    pImplData->nNextCh = nNextCh;
570
3.62M
}
SvParser<HtmlTokenId>::SaveState(HtmlTokenId)
Line
Count
Source
553
17.7M
{
554
    // save actual status
555
17.7M
    if( !pImplData )
556
0
    {
557
0
        pImplData.reset(new SvParser_Impl<T>);
558
0
        pImplData->nSaveToken = static_cast<T>(0);
559
0
    }
560
561
17.7M
    pImplData->nFilePos = rInput.Tell();
562
17.7M
    pImplData->nToken = nToken;
563
564
17.7M
    pImplData->aToken = aToken;
565
17.7M
    pImplData->nlLineNr = nlLineNr;
566
17.7M
    pImplData->nlLinePos = nlLinePos;
567
17.7M
    pImplData->nTokenValue= nTokenValue;
568
17.7M
    pImplData->bTokenHasValue = bTokenHasValue;
569
17.7M
    pImplData->nNextCh = nNextCh;
570
17.7M
}
571
572
template<typename T>
573
void SvParser<T>::RestoreState()
574
0
{
575
    // restore old status
576
0
    if( !pImplData )
577
0
        return;
578
579
0
    if( ERRCODE_IO_PENDING == rInput.GetError() )
580
0
        rInput.ResetError();
581
0
    aToken = pImplData->aToken;
582
0
    nlLineNr = pImplData->nlLineNr;
583
0
    nlLinePos = pImplData->nlLinePos;
584
0
    nTokenValue= pImplData->nTokenValue;
585
0
    bTokenHasValue=pImplData->bTokenHasValue;
586
0
    nNextCh = pImplData->nNextCh;
587
588
0
    pImplData->nSaveToken = pImplData->nToken;
589
590
0
    rInput.Seek( pImplData->nFilePos );
591
0
}
Unexecuted instantiation: SvParser<int>::RestoreState()
Unexecuted instantiation: SvParser<HtmlTokenId>::RestoreState()
592
593
template<typename T>
594
void SvParser<T>::Continue( T )
595
0
{
596
0
}
Unexecuted instantiation: SvParser<int>::Continue(int)
Unexecuted instantiation: SvParser<HtmlTokenId>::Continue(HtmlTokenId)
597
598
599
// expanded out version of
600
//   IMPL_LINK_NOARG( SvParser, NewDataRead, LinkParamNone*, void )
601
// since it can't cope with template methods
602
template<typename T>
603
0
void SvParser<T>::LinkStubNewDataRead(void * instance, LinkParamNone* data) {
604
0
    return static_cast<SvParser<T> *>(instance)->NewDataRead(data);
605
0
}
Unexecuted instantiation: SvParser<int>::LinkStubNewDataRead(void*, LinkParamNone*)
Unexecuted instantiation: SvParser<HtmlTokenId>::LinkStubNewDataRead(void*, LinkParamNone*)
606
template<typename T>
607
void SvParser<T>::NewDataRead(SAL_UNUSED_PARAMETER LinkParamNone*)
608
0
{
609
0
    switch( eState )
610
0
    {
611
0
    case SvParserState::Pending:
612
0
        eState = SvParserState::Working;
613
0
        RestoreState();
614
615
0
        Continue( pImplData->nToken );
616
617
0
        if( ERRCODE_IO_PENDING == rInput.GetError() )
618
0
            rInput.ResetError();
619
620
0
        if( SvParserState::Pending != eState )
621
0
            ReleaseRef();                    // ready otherwise!
622
0
        break;
623
624
0
    case SvParserState::NotStarted:
625
0
    case SvParserState::Working:
626
0
        break;
627
628
0
    default:
629
0
        ReleaseRef();                    // ready otherwise!
630
0
        break;
631
0
    }
632
0
}
Unexecuted instantiation: SvParser<int>::NewDataRead(LinkParamNone*)
Unexecuted instantiation: SvParser<HtmlTokenId>::NewDataRead(LinkParamNone*)
633
634
template class SVT_DLLPUBLIC SvParser<int>;
635
template class SVT_DLLPUBLIC SvParser<HtmlTokenId>;
636
637
/*========================================================================
638
 *
639
 * SvKeyValueIterator.
640
 *
641
 *======================================================================*/
642
643
typedef std::vector<SvKeyValue> SvKeyValueList_Impl;
644
645
struct SvKeyValueIterator::Impl
646
{
647
    SvKeyValueList_Impl maList;
648
    sal_uInt16 mnPos;
649
650
60.0k
    Impl() : mnPos(0) {}
651
};
652
653
60.0k
SvKeyValueIterator::SvKeyValueIterator() : mpImpl(new Impl) {}
654
655
60.0k
SvKeyValueIterator::~SvKeyValueIterator() = default;
656
657
bool SvKeyValueIterator::GetFirst (SvKeyValue &rKeyVal)
658
77.2k
{
659
77.2k
    mpImpl->mnPos = mpImpl->maList.size();
660
77.2k
    return GetNext (rKeyVal);
661
77.2k
}
662
663
bool SvKeyValueIterator::GetNext (SvKeyValue &rKeyVal)
664
131k
{
665
131k
    if (mpImpl->mnPos > 0)
666
54.0k
    {
667
54.0k
        rKeyVal = mpImpl->maList[--mpImpl->mnPos];
668
54.0k
        return true;
669
54.0k
    }
670
77.2k
    else
671
77.2k
    {
672
        // Nothing to do.
673
77.2k
        return false;
674
77.2k
    }
675
131k
}
676
677
void SvKeyValueIterator::Append (const SvKeyValue &rKeyVal)
678
34.1k
{
679
34.1k
    mpImpl->maList.push_back(rKeyVal);
680
34.1k
}
681
682
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */