Coverage Report

Created: 2025-12-31 10:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/include/xmlreader/xmlreader.hxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#pragma once
21
22
#include <sal/config.h>
23
24
#include <stack>
25
#include <vector>
26
27
#include <osl/file.h>
28
#include <rtl/ustring.hxx>
29
#include <sal/types.h>
30
#include <xmlreader/detail/xmlreaderdllapi.hxx>
31
#include <xmlreader/pad.hxx>
32
#include <xmlreader/span.hxx>
33
34
namespace xmlreader {
35
36
class OOO_DLLPUBLIC_XMLREADER XmlReader {
37
public:
38
    explicit XmlReader(OUString fileUrl);
39
40
    ~XmlReader();
41
42
    enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 };
43
44
    enum class Text { NONE, Raw, Normalized };
45
46
    enum class Result { Begin, End, Text, Done };
47
48
    int registerNamespaceIri(Span const & iri);
49
50
    // RESULT_BEGIN: data = localName, ns = ns
51
    // RESULT_END: data, ns unused
52
    // RESULT_TEXT: data = text, ns unused
53
    Result nextItem(Text reportText, Span * data, int * nsId);
54
55
    bool nextAttribute(int * nsId, Span * localName);
56
57
    // the span returned by getAttributeValue is only valid until the next call
58
    // to nextItem or getAttributeValue
59
    Span getAttributeValue(bool fullyNormalize);
60
61
    int getNamespaceId(Span const & prefix) const;
62
63
95.0k
    const OUString& getUrl() const { return fileUrl_;}
64
65
private:
66
    XmlReader(const XmlReader&) = delete;
67
    XmlReader& operator=(const XmlReader&) = delete;
68
69
    typedef std::vector< Span > NamespaceIris;
70
71
    // If NamespaceData (and similarly ElementData and AttributeData) is made
72
    // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
73
    // "'xmlreader::XmlReader' declared with greater visibility than the type of
74
    // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
75
    // elements_ and attributes_):
76
77
    struct NamespaceData {
78
        Span prefix;
79
        int nsId;
80
81
        NamespaceData():
82
0
            nsId(-1) {}
83
84
        NamespaceData(Span const & thePrefix, int theNsId):
85
108
            prefix(thePrefix), nsId(theNsId) {}
86
    };
87
88
    typedef std::vector< NamespaceData > NamespaceList;
89
90
    struct ElementData {
91
        Span name;
92
        NamespaceList::size_type inheritedNamespaces;
93
        int defaultNamespaceId;
94
95
        ElementData(
96
            Span const & theName,
97
            NamespaceList::size_type theInheritedNamespaces,
98
            int theDefaultNamespaceId):
99
237k
            name(theName), inheritedNamespaces(theInheritedNamespaces),
100
237k
            defaultNamespaceId(theDefaultNamespaceId)
101
237k
        {}
102
    };
103
104
    typedef std::stack< ElementData > ElementStack;
105
106
    struct AttributeData {
107
        char const * nameBegin;
108
        char const * nameEnd;
109
        char const * nameColon;
110
        char const * valueBegin;
111
        char const * valueEnd;
112
113
        AttributeData(
114
            char const * theNameBegin, char const * theNameEnd,
115
            char const * theNameColon, char const * theValueBegin,
116
            char const * theValueEnd):
117
367k
            nameBegin(theNameBegin), nameEnd(theNameEnd),
118
367k
            nameColon(theNameColon), valueBegin(theValueBegin),
119
367k
            valueEnd(theValueEnd)
120
367k
        {}
121
    };
122
123
    typedef std::vector< AttributeData > Attributes;
124
125
    enum class State { Content, StartTag, EndTag, EmptyElementTag, Done };
126
127
735k
    SAL_DLLPRIVATE char read() { return pos_ == end_ ? '\0' : *pos_++; }
128
129
10.6M
    SAL_DLLPRIVATE char peek() const { return pos_ == end_ ? '\0' : *pos_; }
130
131
    SAL_DLLPRIVATE void normalizeLineEnds(Span const & text);
132
133
    SAL_DLLPRIVATE void skipSpace();
134
135
    SAL_DLLPRIVATE bool skipComment();
136
137
    SAL_DLLPRIVATE void skipProcessingInstruction();
138
139
    SAL_DLLPRIVATE void skipDocumentTypeDeclaration();
140
141
    SAL_DLLPRIVATE Span scanCdataSection();
142
143
    SAL_DLLPRIVATE bool scanName(char const ** nameColon);
144
145
    SAL_DLLPRIVATE int scanNamespaceIri(
146
        char const * begin, char const * end);
147
148
    SAL_DLLPRIVATE char const * handleReference(
149
        char const * position, char const * end);
150
151
    SAL_DLLPRIVATE Span handleAttributeValue(
152
        char const * begin, char const * end, bool fullyNormalize);
153
154
    SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName);
155
156
    SAL_DLLPRIVATE Result handleEndTag();
157
158
    SAL_DLLPRIVATE void handleElementEnd();
159
160
    SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId);
161
162
    SAL_DLLPRIVATE Result handleRawText(Span * text);
163
164
    SAL_DLLPRIVATE Result handleNormalizedText(Span * text);
165
166
    SAL_DLLPRIVATE static int toNamespaceId(NamespaceIris::size_type pos);
167
168
    OUString const fileUrl_;
169
    oslFileHandle fileHandle_;
170
    sal_uInt64 fileSize_;
171
    void * fileAddress_;
172
    NamespaceIris namespaceIris_;
173
    NamespaceList namespaces_;
174
    ElementStack elements_;
175
    char const * pos_;
176
    char const * end_;
177
    State state_;
178
    Attributes attributes_;
179
    Attributes::iterator currentAttribute_;
180
    bool firstAttribute_;
181
    Pad pad_;
182
};
183
184
}
185
186
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */