Coverage Report

Created: 2026-02-14 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/include/svtools/parhtml.hxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#pragma once
21
22
#include <svtools/svtdllapi.h>
23
#include <svtools/svparser.hxx>
24
#include <svtools/htmltokn.h>
25
26
#include <string_view>
27
#include <vector>
28
29
namespace com :: sun :: star :: uno { template <class interface_type> class Reference; }
30
31
namespace com::sun::star {
32
    namespace document {
33
        class XDocumentProperties;
34
    }
35
}
36
37
class Color;
38
enum class HtmlOptionId;
39
40
#define HTMLFONTSZ1_DFLT 7
41
#define HTMLFONTSZ2_DFLT 10
42
#define HTMLFONTSZ3_DFLT 12
43
#define HTMLFONTSZ4_DFLT 14
44
#define HTMLFONTSZ5_DFLT 18
45
#define HTMLFONTSZ6_DFLT 24
46
#define HTMLFONTSZ7_DFLT 36
47
48
enum class HTMLTableFrame { Void, Above, Below, HSides, LHS, RHS, VSides, Box };
49
50
enum class HTMLTableRules { NONE, Groups, Rows, Cols, All };
51
52
enum class HTMLInputType
53
{
54
    Text =      1,
55
    Password,
56
    Checkbox,
57
    Radio,
58
    Range,
59
    Scribble,
60
    File,
61
    Hidden,
62
    Submit,
63
    Image,
64
    Reset,
65
    Button
66
};
67
68
enum class HTMLScriptLanguage
69
{
70
    StarBasic,
71
    JavaScript,
72
    Unknown
73
};
74
75
template<typename EnumT>
76
struct HTMLOptionEnum
77
{
78
    const char *pName;  // value of an HTML option
79
    EnumT       nValue; // and corresponding value of an enum
80
};
81
82
/** Representation of an HTML option (=attribute in a start tag).
83
 * The values of the options are always stored as strings.
84
 * The methods GetNumber,... may only be called if the option
85
 * is actually numerical,...
86
 */
87
class SVT_DLLPUBLIC HTMLOption
88
{
89
    OUString aValue;          // value of the option (always as string)
90
    OUString aToken;          // name of the option as string
91
    HtmlOptionId nToken;        // and respective token
92
93
public:
94
95
    HTMLOption( HtmlOptionId nTyp, OUString aToken, OUString aValue );
96
97
    // name of the option...
98
1.65M
    HtmlOptionId GetToken() const { return nToken; }  // ... as enum
99
26.7k
    const OUString& GetTokenString() const { return aToken; } // ... as string
100
101
    // value of the option ...
102
886k
    const OUString& GetString() const { return aValue; }  // ... as string
103
104
    sal_uInt32 GetNumber() const;                           // ... as number
105
    sal_Int32 GetSNumber() const;                           // ... as number
106
    void GetNumbers( std::vector<sal_uInt32> &rNumbers ) const; // ... as numbers
107
    void GetColor( Color& ) const;                      // ... as color
108
109
    template<typename EnumT>
110
    EnumT GetEnum( const HTMLOptionEnum<EnumT> *pOptEnums,
111
                        EnumT nDflt = static_cast<EnumT>(0) ) const
112
52.1k
    {
113
171k
        while( pOptEnums->pName )
114
161k
        {
115
161k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
42.2k
                return pOptEnums->nValue;
117
119k
            pOptEnums++;
118
119k
        }
119
9.91k
        return nDflt;
120
52.1k
    }
Unexecuted instantiation: HTMLInputType HTMLOption::GetEnum<HTMLInputType>(HTMLOptionEnum<HTMLInputType> const*, HTMLInputType) const
HTMLTableFrame HTMLOption::GetEnum<HTMLTableFrame>(HTMLOptionEnum<HTMLTableFrame> const*, HTMLTableFrame) const
Line
Count
Source
112
3.05k
    {
113
9.31k
        while( pOptEnums->pName )
114
8.70k
        {
115
8.70k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
2.43k
                return pOptEnums->nValue;
117
6.26k
            pOptEnums++;
118
6.26k
        }
119
616
        return nDflt;
120
3.05k
    }
HTMLTableRules HTMLOption::GetEnum<HTMLTableRules>(HTMLOptionEnum<HTMLTableRules> const*, HTMLTableRules) const
Line
Count
Source
112
757
    {
113
2.49k
        while( pOptEnums->pName )
114
2.30k
        {
115
2.30k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
566
                return pOptEnums->nValue;
117
1.73k
            pOptEnums++;
118
1.73k
        }
119
191
        return nDflt;
120
757
    }
SvxAdjust HTMLOption::GetEnum<SvxAdjust>(HTMLOptionEnum<SvxAdjust> const*, SvxAdjust) const
Line
Count
Source
112
33.9k
    {
113
113k
        while( pOptEnums->pName )
114
108k
        {
115
108k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
28.9k
                return pOptEnums->nValue;
117
79.8k
            pOptEnums++;
118
79.8k
        }
119
5.02k
        return nDflt;
120
33.9k
    }
short HTMLOption::GetEnum<short>(HTMLOptionEnum<short> const*, short) const
Line
Count
Source
112
12.8k
    {
113
42.5k
        while( pOptEnums->pName )
114
38.7k
        {
115
38.7k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
9.08k
                return pOptEnums->nValue;
117
29.6k
            pOptEnums++;
118
29.6k
        }
119
3.75k
        return nDflt;
120
12.8k
    }
SdrTextAniKind HTMLOption::GetEnum<SdrTextAniKind>(HTMLOptionEnum<SdrTextAniKind> const*, SdrTextAniKind) const
Line
Count
Source
112
50
    {
113
152
        while( pOptEnums->pName )
114
143
        {
115
143
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
41
                return pOptEnums->nValue;
117
102
            pOptEnums++;
118
102
        }
119
9
        return nDflt;
120
50
    }
SdrTextAniDirection HTMLOption::GetEnum<SdrTextAniDirection>(HTMLOptionEnum<SdrTextAniDirection> const*, SdrTextAniDirection) const
Line
Count
Source
112
120
    {
113
226
        while( pOptEnums->pName )
114
198
        {
115
198
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
92
                return pOptEnums->nValue;
117
106
            pOptEnums++;
118
106
        }
119
28
        return nDflt;
120
120
    }
com::sun::star::form::FormSubmitMethod HTMLOption::GetEnum<com::sun::star::form::FormSubmitMethod>(HTMLOptionEnum<com::sun::star::form::FormSubmitMethod> const*, com::sun::star::form::FormSubmitMethod) const
Line
Count
Source
112
650
    {
113
1.34k
        while( pOptEnums->pName )
114
1.23k
        {
115
1.23k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
540
                return pOptEnums->nValue;
117
695
            pOptEnums++;
118
695
        }
119
110
        return nDflt;
120
650
    }
com::sun::star::form::FormSubmitEncoding HTMLOption::GetEnum<com::sun::star::form::FormSubmitEncoding>(HTMLOptionEnum<com::sun::star::form::FormSubmitEncoding> const*, com::sun::star::form::FormSubmitEncoding) const
Line
Count
Source
112
512
    {
113
1.35k
        while( pOptEnums->pName )
114
1.18k
        {
115
1.18k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
349
                return pOptEnums->nValue;
117
838
            pOptEnums++;
118
838
        }
119
163
        return nDflt;
120
512
    }
Unexecuted instantiation: htmlform.cxx:(anonymous namespace)::HTMLWordWrapMode HTMLOption::GetEnum<(anonymous namespace)::HTMLWordWrapMode>(HTMLOptionEnum<(anonymous namespace)::HTMLWordWrapMode> const*, (anonymous namespace)::HTMLWordWrapMode) const
unsigned int HTMLOption::GetEnum<unsigned int>(HTMLOptionEnum<unsigned int> const*, unsigned int) const
Line
Count
Source
112
142
    {
113
146
        while( pOptEnums->pName )
114
145
        {
115
145
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
141
                return pOptEnums->nValue;
117
4
            pOptEnums++;
118
4
        }
119
1
        return nDflt;
120
142
    }
ScrollingMode HTMLOption::GetEnum<ScrollingMode>(HTMLOptionEnum<ScrollingMode> const*, ScrollingMode) const
Line
Count
Source
112
65
    {
113
152
        while( pOptEnums->pName )
114
144
        {
115
144
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116
57
                return pOptEnums->nValue;
117
87
            pOptEnums++;
118
87
        }
119
8
        return nDflt;
120
65
    }
121
122
    template<typename EnumT>
123
    bool GetEnum( EnumT &rEnum, const HTMLOptionEnum<EnumT> *pOptEnums ) const
124
49.1k
    {
125
329k
        while( pOptEnums->pName )
126
316k
        {
127
316k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
128
36.7k
            {
129
36.7k
                rEnum = pOptEnums->nValue;
130
36.7k
                return true;
131
36.7k
            }
132
280k
            pOptEnums++;
133
280k
        }
134
12.3k
        return false;
135
49.1k
    }
bool HTMLOption::GetEnum<IMapObjectType>(IMapObjectType&, HTMLOptionEnum<IMapObjectType> const*) const
Line
Count
Source
124
2.33k
    {
125
4.77k
        while( pOptEnums->pName )
126
4.43k
        {
127
4.43k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
128
1.99k
            {
129
1.99k
                rEnum = pOptEnums->nValue;
130
1.99k
                return true;
131
1.99k
            }
132
2.44k
            pOptEnums++;
133
2.44k
        }
134
336
        return false;
135
2.33k
    }
parhtml.cxx:bool HTMLOption::GetEnum<(anonymous namespace)::HtmlMeta>((anonymous namespace)::HtmlMeta&, HTMLOptionEnum<(anonymous namespace)::HtmlMeta> const*) const
Line
Count
Source
124
34.3k
    {
125
248k
        while( pOptEnums->pName )
126
238k
        {
127
238k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
128
23.9k
            {
129
23.9k
                rEnum = pOptEnums->nValue;
130
23.9k
                return true;
131
23.9k
            }
132
214k
            pOptEnums++;
133
214k
        }
134
10.3k
        return false;
135
34.3k
    }
Unexecuted instantiation: bool HTMLOption::GetEnum<unsigned short>(unsigned short&, HTMLOptionEnum<unsigned short> const*) const
bool HTMLOption::GetEnum<SwFieldIds>(SwFieldIds&, HTMLOptionEnum<SwFieldIds> const*) const
Line
Count
Source
124
5.96k
    {
125
36.1k
        while( pOptEnums->pName )
126
35.4k
        {
127
35.4k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
128
5.24k
            {
129
5.24k
                rEnum = pOptEnums->nValue;
130
5.24k
                return true;
131
5.24k
            }
132
30.1k
            pOptEnums++;
133
30.1k
        }
134
713
        return false;
135
5.96k
    }
bool HTMLOption::GetEnum<SwExtUserSubType>(SwExtUserSubType&, HTMLOptionEnum<SwExtUserSubType> const*) const
Line
Count
Source
124
375
    {
125
5.96k
        while( pOptEnums->pName )
126
5.60k
        {
127
5.60k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
128
19
            {
129
19
                rEnum = pOptEnums->nValue;
130
19
                return true;
131
19
            }
132
5.58k
            pOptEnums++;
133
5.58k
        }
134
356
        return false;
135
375
    }
Unexecuted instantiation: bool HTMLOption::GetEnum<SwAuthorFormat>(SwAuthorFormat&, HTMLOptionEnum<SwAuthorFormat> const*) const
Unexecuted instantiation: bool HTMLOption::GetEnum<SwPageNumSubType>(SwPageNumSubType&, HTMLOptionEnum<SwPageNumSubType> const*) const
Unexecuted instantiation: bool HTMLOption::GetEnum<SvxNumType>(SvxNumType&, HTMLOptionEnum<SvxNumType> const*) const
bool HTMLOption::GetEnum<SwDocInfoSubType>(SwDocInfoSubType&, HTMLOptionEnum<SwDocInfoSubType> const*) const
Line
Count
Source
124
3.66k
    {
125
26.5k
        while( pOptEnums->pName )
126
26.2k
        {
127
26.2k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
128
3.36k
            {
129
3.36k
                rEnum = pOptEnums->nValue;
130
3.36k
                return true;
131
3.36k
            }
132
22.8k
            pOptEnums++;
133
22.8k
        }
134
296
        return false;
135
3.66k
    }
Unexecuted instantiation: bool HTMLOption::GetEnum<SwDocStatSubType>(SwDocStatSubType&, HTMLOptionEnum<SwDocStatSubType> const*) const
Unexecuted instantiation: bool HTMLOption::GetEnum<SwFileNameFormat>(SwFileNameFormat&, HTMLOptionEnum<SwFileNameFormat> const*) const
bool HTMLOption::GetEnum<SvxAdjust>(SvxAdjust&, HTMLOptionEnum<SvxAdjust> const*) const
Line
Count
Source
124
2.11k
    {
125
6.48k
        while( pOptEnums->pName )
126
6.25k
        {
127
6.25k
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
128
1.89k
            {
129
1.89k
                rEnum = pOptEnums->nValue;
130
1.89k
                return true;
131
1.89k
            }
132
4.36k
            pOptEnums++;
133
4.36k
        }
134
228
        return false;
135
2.11k
    }
bool HTMLOption::GetEnum<HTMLScriptLanguage>(HTMLScriptLanguage&, HTMLOptionEnum<HTMLScriptLanguage> const*) const
Line
Count
Source
124
340
    {
125
846
        while( pOptEnums->pName )
126
796
        {
127
796
            if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
128
290
            {
129
290
                rEnum = pOptEnums->nValue;
130
290
                return true;
131
290
            }
132
506
            pOptEnums++;
133
506
        }
134
50
        return false;
135
340
    }
136
137
    // ... and as a few special enums
138
    HTMLInputType GetInputType() const;                 // <INPUT TYPE=...>
139
    HTMLTableFrame GetTableFrame() const;               // <TABLE FRAME=...>
140
    HTMLTableRules GetTableRules() const;               // <TABLE RULES=...>
141
    //SvxAdjust GetAdjust() const;                      // <P,TH,TD ALIGN=>
142
};
143
144
typedef ::std::vector<HTMLOption> HTMLOptions;
145
146
class SVT_DLLPUBLIC HTMLParser : public SvParser<HtmlTokenId>
147
{
148
private:
149
    mutable HTMLOptions maOptions; // options of the start tag
150
151
    bool bNewDoc        : 1;        // read new Doc?
152
    bool bIsInHeader    : 1;        // scan header section
153
    bool bReadListing   : 1;        // read listings
154
    bool bReadXMP       : 1;        // read XMP
155
    bool bReadPRE       : 1;        // read preformatted text
156
    bool bReadTextArea  : 1;        // read TEXTAREA
157
    bool bReadScript    : 1;        // read <SCRIPT>
158
    bool bReadStyle     : 1;        // read <STYLE>
159
    bool bEndTokenFound : 1;        // found </SCRIPT> or </STYLE>
160
161
    bool bPre_IgnoreNewPara : 1;    // flags for reading of PRE paragraphs
162
    bool bReadNextChar : 1;         // true: read NextChar again(JavaScript!)
163
    bool bReadComment : 1;          // true: read NextChar again (JavaScript!)
164
165
    bool m_bPreserveSpaces : 1 = false;
166
167
    sal_uInt32 nPre_LinePos;            // Pos in the line in the PRE-Tag
168
169
    HtmlTokenId mnPendingOffToken;          ///< OFF token pending for a <XX.../> ON/OFF ON token
170
171
    OUString aEndToken;
172
173
    /// XML namespace, in case of XHTML.
174
    OUString maNamespace;
175
176
protected:
177
    OUString sSaveToken;             // the read tag as string
178
179
    HtmlTokenId ScanText( const sal_Unicode cBreak = 0U );
180
181
    HtmlTokenId GetNextRawToken();
182
183
    // scan next token
184
    virtual HtmlTokenId GetNextToken_() override;
185
186
    virtual ~HTMLParser() override;
187
188
13.7k
    void FinishHeader() { bIsInHeader = false; }
189
190
    void SetNamespace(std::u16string_view rNamespace);
191
192
232k
    bool GetPreserveSpaces() const { return m_bPreserveSpaces; }
193
76.6k
    void SetPreserveSpaces(bool val) { m_bPreserveSpaces = val; }
194
195
public:
196
    HTMLParser( SvStream& rIn, bool bReadNewDoc = true );
197
198
    virtual SvParserState CallParser() override;
199
200
502k
    bool IsNewDoc() const       { return bNewDoc; }
201
215k
    bool IsInHeader() const     { return bIsInHeader; }
202
312k
    bool IsReadListing() const  { return bReadListing; }
203
312k
    bool IsReadXMP() const      { return bReadXMP; }
204
2.10M
    bool IsReadPRE() const      { return bReadPRE; }
205
113k
    bool IsReadScript() const   { return bReadScript; }
206
104k
    bool IsReadStyle() const    { return bReadStyle; }
207
208
    // start PRE-/LISTING or XMP mode or filter tags respectively
209
    inline void StartPRE();
210
1.49k
    void FinishPRE() { bReadPRE = false; }
211
    HtmlTokenId FilterPRE( HtmlTokenId nToken );
212
213
    inline void StartListing();
214
87
    void FinishListing() { bReadListing = false; }
215
    HtmlTokenId FilterListing( HtmlTokenId nToken );
216
217
    inline void StartXMP();
218
316
    void FinishXMP() { bReadXMP = false; }
219
    HtmlTokenId FilterXMP( HtmlTokenId nToken );
220
221
781
    void FinishTextArea() { bReadTextArea = false; }
222
223
    // finish PRE-/LISTING- and XMP mode
224
624k
    void FinishPREListingXMP() { bReadPRE = bReadListing = bReadXMP = false; }
225
226
    // Filter the current token according to the current mode
227
    // (PRE, XMP, ...) and set the flags. Is called by Continue before
228
    // NextToken is called. If you implement own loops or call
229
    // NextToken yourself, you should call this method beforehand.
230
    HtmlTokenId FilterToken( HtmlTokenId nToken );
231
232
28
    void ReadRawData( const OUString &rEndToken ) { aEndToken = rEndToken; }
233
234
    // Token without \-sequences
235
    void UnescapeToken();
236
237
    // Determine the options. pNoConvertToken is the optional token
238
    // of an option, for which the CR/LFs are not deleted from the value
239
    // of the option.
240
    const HTMLOptions& GetOptions( HtmlOptionId const *pNoConvertToken=nullptr );
241
242
    // for asynchronous reading from the SvStream
243
    virtual void Continue( HtmlTokenId nToken ) override;
244
245
246
protected:
247
248
    static rtl_TextEncoding GetEncodingByMIME( const OUString& rMime );
249
250
    /// template method: called when ParseMetaOptions adds a user-defined meta
251
    virtual void AddMetaUserDefined( OUString const & i_rMetaName );
252
253
private:
254
    /// parse meta options into XDocumentProperties and encoding
255
    bool ParseMetaOptionsImpl( const css::uno::Reference< css::document::XDocumentProperties>&,
256
            SvKeyValueIterator*,
257
            const HTMLOptions&,
258
            rtl_TextEncoding& rEnc );
259
260
public:
261
    /// overriding method must call this implementation!
262
    virtual bool ParseMetaOptions( const css::uno::Reference< css::document::XDocumentProperties>&,
263
            SvKeyValueIterator* );
264
265
    void ParseScriptOptions( OUString& rLangString, std::u16string_view rBaseURL, HTMLScriptLanguage& rLang,
266
                             OUString& rSrc, OUString& rLibrary, OUString& rModule );
267
268
    // Remove a comment around the content of <SCRIPT> or <STYLE>.
269
    // The whole line behind a "<!--" might be deleted (for JavaScript).
270
    static void RemoveSGMLComment( OUString &rString );
271
272
    static bool InternalImgToPrivateURL( OUString& rURL );
273
    static rtl_TextEncoding GetEncodingByHttpHeader( SvKeyValueIterator *pHTTPHeader );
274
    bool SetEncodingByHTTPHeader( SvKeyValueIterator *pHTTPHeader );
275
};
276
277
inline void HTMLParser::StartPRE()
278
36.0k
{
279
36.0k
    bReadPRE = true;
280
36.0k
    bPre_IgnoreNewPara = true;
281
36.0k
    nPre_LinePos = 0;
282
36.0k
}
283
284
inline void HTMLParser::StartListing()
285
1.19k
{
286
1.19k
    bReadListing = true;
287
1.19k
    bPre_IgnoreNewPara = true;
288
1.19k
    nPre_LinePos = 0;
289
1.19k
}
290
291
inline void HTMLParser::StartXMP()
292
9.03k
{
293
9.03k
    bReadXMP = true;
294
9.03k
    bPre_IgnoreNewPara = true;
295
9.03k
    nPre_LinePos = 0;
296
9.03k
}
297
298
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */