/src/libreoffice/sc/source/ui/inc/impex.hxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #pragma once |
21 | | |
22 | | #include <o3tl/deleter.hxx> |
23 | | #include <sot/formats.hxx> |
24 | | #include <address.hxx> |
25 | | #include <tools/stream.hxx> |
26 | | |
27 | | #include <com/sun/star/uno/Any.hxx> |
28 | | |
29 | | class ScDocShell; |
30 | | class ScDocument; |
31 | | class ScAsciiOptions; |
32 | | |
33 | | /** |
34 | | * These options control how multi-line cells are converted during export in |
35 | | * certain lossy formats (such as csv). |
36 | | */ |
37 | | struct ScExportTextOptions |
38 | | { |
39 | | enum NewlineConversion { ToSystem, ToSpace, None }; |
40 | | ScExportTextOptions( NewlineConversion eNewlineConversion = ToSystem, sal_Unicode cSeparatorConvertTo = 0, bool bAddQuotes = false ) : |
41 | 49.2k | meNewlineConversion( eNewlineConversion ), mcSeparatorConvertTo( cSeparatorConvertTo ), mbAddQuotes( bAddQuotes ) {} |
42 | | |
43 | | NewlineConversion meNewlineConversion; |
44 | | sal_Unicode mcSeparatorConvertTo; // Convert separator to this character |
45 | | bool mbAddQuotes; |
46 | | }; |
47 | | |
48 | | class SAL_DLLPUBLIC_RTTI ScImportExport |
49 | | { |
50 | | ScDocShell* pDocSh; |
51 | | ScDocument& rDoc; |
52 | | std::unique_ptr<ScDocument, o3tl::default_delete<ScDocument>> pUndoDoc; |
53 | | ScRange aRange; |
54 | | OUString aStreamPath; |
55 | | OUString aNonConvertibleChars; |
56 | | OUString maFilterOptions; |
57 | | sal_uInt32 nSizeLimit; |
58 | | SCROW nMaxImportRow; |
59 | | sal_Unicode cSep; // Separator |
60 | | sal_Unicode cStr; // String Delimiter |
61 | | bool bFormulas; // Formula in Text? |
62 | | bool bIncludeFiltered; // include filtered rows? (default true) |
63 | | bool bAll; // no selection |
64 | | bool bSingle; // Single selection |
65 | | bool bUndo; // with Undo? |
66 | | bool bOverflowRow; // too many rows |
67 | | bool bOverflowCol; // too many columns |
68 | | bool bOverflowCell; // too much data for a cell |
69 | | bool mbApi; |
70 | | bool mbImportBroadcast; // whether or not to broadcast after data import. |
71 | | bool mbOverwriting; // Whether we could be overwriting existing values (paste). |
72 | | // In this case we cannot use the insert optimization, but we |
73 | | // do not need to broadcast after the import. |
74 | | bool mbIncludeBOM; // Whether to include a byte-order-mark in the output. |
75 | | ScExportTextOptions mExportTextOptions; |
76 | | |
77 | | std::unique_ptr<ScAsciiOptions> pExtOptions; // extended options |
78 | | |
79 | | bool StartPaste(); // Protect check, set up Undo |
80 | | void EndPaste(bool bAutoRowHeight = true); // Undo/Redo actions, Repaint |
81 | | bool Doc2Text( SvStream& ); |
82 | | bool Text2Doc( SvStream& ); |
83 | | bool Doc2Sylk( SvStream& ); |
84 | | bool Sylk2Doc( SvStream& ); |
85 | | bool Doc2HTML( SvStream&, const OUString& ); |
86 | | bool Doc2RTF( SvStream& ); |
87 | | bool Doc2Dif( SvStream& ); |
88 | | bool Dif2Doc( SvStream& ); |
89 | | bool ExtText2Doc( SvStream& ); // with pExtOptions |
90 | | bool RTF2Doc( SvStream&, const OUString& rBaseURL ); |
91 | | bool HTML2Doc( SvStream&, const OUString& rBaseURL ); |
92 | | |
93 | | public: |
94 | | ScImportExport( ScDocument& ); // the whole document |
95 | | ScImportExport( ScDocument&, const OUString& ); // Range/cell input |
96 | | SC_DLLPUBLIC ScImportExport( ScDocument&, const ScAddress& ); |
97 | | SC_DLLPUBLIC ScImportExport( ScDocument&, const ScRange& ); |
98 | | SC_DLLPUBLIC ~ScImportExport(); |
99 | | |
100 | | void SetExtOptions( const ScAsciiOptions& rOpt ); |
101 | | void SetFilterOptions( const OUString& rFilterOptions ); |
102 | 0 | bool IsRef() const { return !bAll; } |
103 | | |
104 | 0 | const ScRange& GetRange() const { return aRange; } |
105 | | |
106 | | SC_DLLPUBLIC static void EmbeddedNullTreatment( OUString & rStr ); |
107 | | |
108 | | static bool IsFormatSupported( SotClipboardFormatId nFormat ); |
109 | | static const sal_Unicode* ScanNextFieldFromString( const sal_Unicode* p, |
110 | | OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, |
111 | | bool bMergeSeps, bool& rbIsQuoted, bool& rbOverflowCell, bool bRemoveSpace ); |
112 | | |
113 | | /** ScImportExport::CountVisualWidth |
114 | | Count the width of string visually ( in multiple of western characters), considering CJK |
115 | | ideographs and CJK symbols (U+3000-U+303F) as twice the width of western characters. |
116 | | @param rStr the string. |
117 | | @param nIdx the starting index, index is incremented for each counted character. |
118 | | @param nMaxWidth the maximum width to count. |
119 | | @return the sum of the width of counted characters. |
120 | | **/ |
121 | | static sal_Int32 CountVisualWidth(std::u16string_view rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth); |
122 | | |
123 | | /** ScImportExport::CountVisualWidth |
124 | | @return the sum of the visual width of the whole string. |
125 | | **/ |
126 | | static sal_Int32 CountVisualWidth(std::u16string_view rStr); |
127 | | |
128 | 0 | void SetSeparator( sal_Unicode c ) { cSep = c; } |
129 | 0 | void SetDelimiter( sal_Unicode c ) { cStr = c; } |
130 | 0 | void SetFormulas( bool b ) { bFormulas = b; } |
131 | 0 | void SetIncludeFiltered( bool b ) { bIncludeFiltered = b; } |
132 | | |
133 | 0 | void SetStreamPath( const OUString& rPath ) { aStreamPath = rPath; } |
134 | | |
135 | | bool ImportString( const OUString&, SotClipboardFormatId ); |
136 | | bool ExportString( OUString&, SotClipboardFormatId ); |
137 | | bool ExportByteString( OString&, rtl_TextEncoding, SotClipboardFormatId ); |
138 | | |
139 | | SC_DLLPUBLIC bool ImportStream( SvStream&, const OUString& rBaseURL, SotClipboardFormatId ); |
140 | | SC_DLLPUBLIC bool ExportStream( SvStream&, const OUString& rBaseURL, SotClipboardFormatId ); |
141 | | |
142 | | bool ExportData( std::u16string_view rMimeType, |
143 | | css::uno::Any & rValue ); |
144 | | |
145 | | // after import |
146 | 0 | bool IsOverflowRow() const { return bOverflowRow; } |
147 | 0 | bool IsOverflowCol() const { return bOverflowCol; } |
148 | 0 | bool IsOverflowCell() const { return bOverflowCell; } |
149 | 0 | bool IsOverflow() const { return bOverflowRow || bOverflowCol || bOverflowCell; } |
150 | | |
151 | 0 | const OUString& GetNonConvertibleChars() const { return aNonConvertibleChars; } |
152 | | |
153 | 0 | void SetApi( bool bApi ) { mbApi = bApi; } |
154 | 0 | void SetImportBroadcast( bool b ) { mbImportBroadcast = b; } |
155 | 0 | void SetOverwriting( const bool bOverwriting ) { mbOverwriting = bOverwriting; } |
156 | 0 | void SetExportTextOptions( const ScExportTextOptions& options ) { mExportTextOptions = options; } |
157 | | |
158 | 0 | bool GetIncludeBOM() const { return mbIncludeBOM; } |
159 | | }; |
160 | | |
161 | | // Helper class for importing clipboard strings as streams. |
162 | | class ScImportStringStream : public SvMemoryStream |
163 | | { |
164 | | public: |
165 | | ScImportStringStream(const OUString& rStr); |
166 | | }; |
167 | | |
168 | | /** Read a CSV (comma separated values) data line using |
169 | | ReadUniOrByteStringLine(). |
170 | | |
171 | | @param bEmbeddedLineBreak |
172 | | If TRUE and a line-break occurs inside a field of data, |
173 | | a line feed LF '\n' and the next line are appended. Repeats |
174 | | until a line-break is not in a field. A field is determined |
175 | | by delimiting rFieldSeparators and optionally surrounded by |
176 | | a pair of cFieldQuote characters. For a line-break to be |
177 | | within a field, the field content MUST be surrounded by |
178 | | cFieldQuote characters, and the opening cFieldQuote MUST be |
179 | | at the very start of a line or follow right behind a field |
180 | | separator with no extra characters in between, with the |
181 | | exception of blanks contradictory to RFC 4180. Anything, |
182 | | including field separators and escaped quotes (by doubling |
183 | | them) may appear in a quoted field. |
184 | | |
185 | | If bEmbeddedLineBreak==FALSE, nothing is parsed and the |
186 | | string returned is simply one ReadUniOrByteStringLine(). |
187 | | |
188 | | @param rFieldSeparators |
189 | | A list of characters that each may act as a field separator. |
190 | | If rcDetectSep was 0 and a separator is detected then it is appended to |
191 | | rFieldSeparators. |
192 | | |
193 | | @param cFieldQuote |
194 | | The quote character used. |
195 | | |
196 | | @param rcDetectSep |
197 | | If 0 then attempt to detect a possible separator if |
198 | | rFieldSeparators doesn't include it already. This can be necessary because |
199 | | of the "accept broken misquoted CSV fields" feature that tries to ignore |
200 | | trailing blanks after a quoted field and if no separator follows continues |
201 | | to add content to the field assuming the single double quote was in error. |
202 | | It is also necessary if the only possible separator was not selected and |
203 | | not included in rFieldSeparators and a line starts with a quoted field, in |
204 | | which case appending lines is tried until end of file. |
205 | | If a separator is detected it is added to rFieldSeparators and the |
206 | | line is reread with the new separators |
207 | | |
208 | | @param nMaxSourceLines |
209 | | Maximum source lines to read and combine into one logical line for embedded |
210 | | new line purpose. Should be limited for the preview dialog because only |
211 | | non-matching separators selected otherwise would lead to trying to |
212 | | concatenate lines until file end. |
213 | | If 0 no limit other than the internal arbitrary resulting line length |
214 | | limit. |
215 | | |
216 | | check Stream::good() to detect IO problems during read |
217 | | |
218 | | @ATTENTION |
219 | | Note that the string returned may be truncated even inside |
220 | | a quoted field if some (arbitrary) maximum length was reached. |
221 | | There currently is no way to exactly determine the conditions, |
222 | | whether this was at a line end, or whether open quotes |
223 | | would have closed the field before the line end, as even a |
224 | | ReadUniOrByteStringLine() may return prematurely but the |
225 | | stream was positioned ahead until the real end of line. |
226 | | Additionally, due to character encoding conversions, string |
227 | | length and bytes read don't necessarily match, and |
228 | | resyncing to a previous position matching the string's |
229 | | length isn't always possible. As a result, a logical line |
230 | | with embedded line breaks and more than the maximum length |
231 | | characters will be spoiled, and a subsequent ReadCsvLine() |
232 | | may start under false preconditions. |
233 | | |
234 | | */ |
235 | | SC_DLLPUBLIC OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak, |
236 | | OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep, |
237 | | sal_uInt32 nMaxSourceLines = 0 ); |
238 | | |
239 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |