/src/libreoffice/sax/inc/xml2utf.hxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #pragma once |
21 | | |
22 | | #include <sal/config.h> |
23 | | |
24 | | #include <memory> |
25 | | |
26 | | #include <sal/types.h> |
27 | | #include <rtl/string.hxx> |
28 | | |
29 | | #include <com/sun/star/io/XInputStream.hpp> |
30 | | |
31 | | namespace sax_expatwrap { |
32 | | |
33 | | class Text2UnicodeConverter |
34 | | { |
35 | | |
36 | | public: |
37 | | Text2UnicodeConverter( const OString & sEncoding ); |
38 | | ~Text2UnicodeConverter(); |
39 | | |
40 | | css::uno::Sequence < sal_Unicode > convert( const css::uno::Sequence<sal_Int8> & ); |
41 | 56.9k | bool canContinue() const { return m_bCanContinue; } |
42 | | |
43 | | private: |
44 | | void init( rtl_TextEncoding encoding ); |
45 | | |
46 | | rtl_TextToUnicodeConverter m_convText2Unicode; |
47 | | rtl_TextToUnicodeContext m_contextText2Unicode; |
48 | | bool m_bCanContinue; |
49 | | bool m_bInitialized; |
50 | | css::uno::Sequence<sal_Int8> m_seqSource; |
51 | | }; |
52 | | |
53 | | /*---------------------------------------- |
54 | | * |
55 | | * Unicode2TextConverter |
56 | | * |
57 | | **-----------------------------------------*/ |
58 | | class Unicode2TextConverter |
59 | | { |
60 | | public: |
61 | | Unicode2TextConverter( rtl_TextEncoding encoding ); |
62 | | ~Unicode2TextConverter(); |
63 | | |
64 | | css::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength ); |
65 | | |
66 | | private: |
67 | | rtl_UnicodeToTextConverter m_convUnicode2Text; |
68 | | rtl_UnicodeToTextContext m_contextUnicode2Text; |
69 | | css::uno::Sequence<sal_Unicode> m_seqSource; |
70 | | }; |
71 | | |
72 | | |
73 | | /*---------------------------------------- |
74 | | * |
75 | | * XMLFile2UTFConverter |
76 | | * |
77 | | **-----------------------------------------*/ |
78 | | class XMLFile2UTFConverter |
79 | | { |
80 | | public: |
81 | | XMLFile2UTFConverter( ): |
82 | 303k | m_bStarted( false ) |
83 | 303k | {} |
84 | | |
85 | 303k | void setInputStream( css::uno::Reference< css::io::XInputStream > const &r ) { m_in = r; } |
86 | 0 | void setEncoding( const OString &s ) { m_sEncoding = s; } |
87 | | |
88 | | |
89 | | // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There |
90 | | // may be returned less or more bytes than ordered. |
91 | | /// @throws css::io::IOException |
92 | | /// @throws css::io::NotConnectedException |
93 | | /// @throws css::io::BufferSizeExceededException |
94 | | /// @throws css::uno::RuntimeException |
95 | | sal_Int32 readAndConvert( css::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead ); |
96 | | |
97 | | private: |
98 | | |
99 | | // Called only on first Sequence of bytes. Tries to figure out file format and encoding information. |
100 | | // @return TRUE, when encoding information could be retrieved |
101 | | // @return FALSE, when no encoding information was found in file |
102 | | bool scanForEncoding( css::uno::Sequence<sal_Int8> &seq ); |
103 | | |
104 | | // Called only on first Sequence of bytes. Tries to figure out |
105 | | // if enough data is available to scan encoding |
106 | | // @return TRUE, when encoding is retrievable |
107 | | // @return FALSE, when more data is needed |
108 | | static bool isEncodingRecognizable( const css::uno::Sequence< sal_Int8 > & seq ); |
109 | | |
110 | | // When encoding attribute is within the text (in the first line), it is removed. |
111 | | static void removeEncoding( css::uno::Sequence<sal_Int8> &seq ); |
112 | | |
113 | | // Initializes decoding depending on m_sEncoding setting |
114 | | void initializeDecoding(); |
115 | | private: |
116 | | css::uno::Reference< css::io::XInputStream > m_in; |
117 | | |
118 | | bool m_bStarted; |
119 | | OString m_sEncoding; |
120 | | |
121 | | std::unique_ptr<Text2UnicodeConverter> m_pText2Unicode; |
122 | | std::unique_ptr<Unicode2TextConverter> m_pUnicode2Text; |
123 | | }; |
124 | | } |
125 | | |
126 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |