/src/libreoffice/svtools/source/svrtf/parrtf.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <sal/config.h> |
21 | | #include <sal/log.hxx> |
22 | | |
23 | | #include <comphelper/scopeguard.hxx> |
24 | | |
25 | | #include <rtl/character.hxx> |
26 | | #include <rtl/strbuf.hxx> |
27 | | #include <rtl/tencinfo.h> |
28 | | #include <rtl/ustrbuf.hxx> |
29 | | #include <tools/stream.hxx> |
30 | | #include <tools/debug.hxx> |
31 | | #include <svtools/rtftoken.h> |
32 | | #include <svtools/parrtf.hxx> |
33 | | |
34 | | const int MAX_STRING_LEN = 1024; |
35 | | |
36 | 4.67M | #define RTF_ISDIGIT( c ) rtl::isAsciiDigit(c) |
37 | 12.9M | #define RTF_ISALPHA( c ) rtl::isAsciiAlpha(c) |
38 | | |
39 | | SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize ) |
40 | 9.66k | : SvParser<int>( rIn, nStackSize ) |
41 | 9.66k | , nOpenBrackets(0) |
42 | 9.66k | , nUPRLevel(0) |
43 | 9.66k | , eCodeSet(RTL_TEXTENCODING_MS_1252) |
44 | 9.66k | , nUCharOverread(1) |
45 | 9.66k | { |
46 | | // default is ANSI-CodeSet |
47 | 9.66k | SetSrcEncoding( RTL_TEXTENCODING_MS_1252 ); |
48 | 9.66k | bRTF_InTextRead = false; |
49 | 9.66k | } |
50 | | |
51 | | SvRTFParser::~SvRTFParser() |
52 | 9.66k | { |
53 | 9.66k | } |
54 | | |
55 | | |
56 | | int SvRTFParser::GetNextToken_() |
57 | 2.17M | { |
58 | 2.17M | int nRet = 0; |
59 | 2.31M | do { |
60 | 2.31M | bool bNextCh = true; |
61 | 2.31M | switch( nNextCh ) |
62 | 2.31M | { |
63 | 1.05M | case '\\': |
64 | 1.05M | { |
65 | | // control characters |
66 | 1.05M | nNextCh = GetNextChar(); |
67 | 1.05M | switch( nNextCh ) |
68 | 1.05M | { |
69 | 3.48k | case '{': |
70 | 3.88k | case '}': |
71 | 15.1k | case '\\': |
72 | 15.3k | case '+': // I found it in a RTF-file |
73 | 16.3k | case '~': // nonbreaking space |
74 | 16.8k | case '-': // optional hyphen |
75 | 16.9k | case '_': // nonbreaking hyphen |
76 | 22.4k | case '\'': // HexValue |
77 | 22.4k | nNextCh = '\\'; |
78 | 22.4k | rInput.SeekRel( -1 ); |
79 | 22.4k | ScanText(); |
80 | 22.4k | nRet = RTF_TEXTTOKEN; |
81 | 22.4k | bNextCh = 0 == nNextCh; |
82 | 22.4k | break; |
83 | | |
84 | 12.6k | case '*': // ignoreflag |
85 | 12.6k | nRet = RTF_IGNOREFLAG; |
86 | 12.6k | break; |
87 | 1.79k | case ':': // subentry in an index entry |
88 | 1.79k | nRet = RTF_SUBENTRYINDEX; |
89 | 1.79k | break; |
90 | 236 | case '|': // formula-character |
91 | 236 | nRet = RTF_FORMULA; |
92 | 236 | break; |
93 | | |
94 | 287k | case 0x0a: |
95 | 297k | case 0x0d: |
96 | 297k | nRet = RTF_PAR; |
97 | 297k | break; |
98 | | |
99 | 723k | default: |
100 | 723k | if( RTF_ISALPHA( nNextCh ) ) |
101 | 659k | { |
102 | 659k | aToken = "\\"; |
103 | 659k | { |
104 | 2.43M | do { |
105 | 2.43M | aToken.appendUtf32(nNextCh); |
106 | 2.43M | nNextCh = GetNextChar(); |
107 | 2.43M | } while( RTF_ISALPHA( nNextCh ) ); |
108 | 659k | } |
109 | | |
110 | | // minus before numeric parameters |
111 | 659k | bool bNegValue = false; |
112 | 659k | if( '-' == nNextCh ) |
113 | 19.9k | { |
114 | 19.9k | bNegValue = true; |
115 | 19.9k | nNextCh = GetNextChar(); |
116 | 19.9k | } |
117 | | |
118 | | // possible numeric parameter |
119 | 659k | if( RTF_ISDIGIT( nNextCh ) ) |
120 | 271k | { |
121 | 271k | OUStringBuffer aNumber; |
122 | 594k | do { |
123 | 594k | aNumber.append(static_cast<sal_Unicode>(nNextCh)); |
124 | 594k | nNextCh = GetNextChar(); |
125 | 594k | } while( RTF_ISDIGIT( nNextCh ) ); |
126 | 271k | nTokenValue = OUString::unacquired(aNumber).toInt32(); |
127 | 271k | if( bNegValue ) |
128 | 10.5k | nTokenValue = -nTokenValue; |
129 | 271k | bTokenHasValue=true; |
130 | 271k | } |
131 | 387k | else if( bNegValue ) // restore minus |
132 | 9.40k | { |
133 | 9.40k | nNextCh = '-'; |
134 | 9.40k | rInput.SeekRel( -1 ); |
135 | 9.40k | } |
136 | 659k | if( ' ' == nNextCh ) // blank is part of token! |
137 | 84.3k | nNextCh = GetNextChar(); |
138 | | |
139 | | // search for the token in the table: |
140 | 659k | if( 0 == (nRet = GetRTFToken( aToken )) ) |
141 | | // Unknown Control |
142 | 99.7k | nRet = RTF_UNKNOWNCONTROL; |
143 | | |
144 | | // bug 76812 - unicode token handled as normal text |
145 | 659k | bNextCh = false; |
146 | 659k | switch( nRet ) |
147 | 659k | { |
148 | 2.58k | case RTF_UC: |
149 | 2.58k | if( 0 <= nTokenValue ) |
150 | 1.76k | { |
151 | 1.76k | nUCharOverread = static_cast<sal_uInt8>(nTokenValue); |
152 | 1.76k | if (!aParserStates.empty()) |
153 | 1.52k | { |
154 | | //cmc: other ifdef breaks #i3584 |
155 | 1.52k | aParserStates.top().nUCharOverread = nUCharOverread; |
156 | 1.52k | } |
157 | 1.76k | } |
158 | 2.58k | aToken.setLength( 0 ); // #i47831# erase token to prevent the token from being treated as text |
159 | | // read next token |
160 | 2.58k | nRet = 0; |
161 | 2.58k | break; |
162 | | |
163 | 4.36k | case RTF_UPR: |
164 | 4.36k | if (!_inSkipGroup) |
165 | 3.54k | { |
166 | 3.54k | if (nUPRLevel > 256) // fairly sure > 1 is probably an error, but provide some leeway |
167 | 25 | { |
168 | 25 | SAL_WARN("svtools", "urp stack too deep"); |
169 | 25 | eState = SvParserState::Error; |
170 | 25 | break; |
171 | 25 | } |
172 | | |
173 | 3.51k | ++nUPRLevel; |
174 | | |
175 | | // UPR - overread the group with the ansi |
176 | | // information |
177 | 3.51k | int nNextToken; |
178 | 3.51k | do |
179 | 5.61k | { |
180 | 5.61k | nNextToken = GetNextToken_(); |
181 | 5.61k | } |
182 | 5.61k | while (nNextToken != '{' && nNextToken != sal_Unicode(EOF) && IsParserWorking()); |
183 | | |
184 | 3.51k | SkipGroup(); |
185 | 3.51k | GetNextToken_(); // overread the last bracket |
186 | 3.51k | nRet = 0; |
187 | | |
188 | 3.51k | --nUPRLevel; |
189 | 3.51k | } |
190 | 4.34k | break; |
191 | | |
192 | 10.6k | case RTF_U: |
193 | 10.6k | if( !bRTF_InTextRead ) |
194 | 3.08k | { |
195 | 3.08k | nRet = RTF_TEXTTOKEN; |
196 | 3.08k | aToken = OUStringChar( static_cast<sal_Unicode>(nTokenValue) ); |
197 | | |
198 | | // overread the next n "RTF" characters. This |
199 | | // can be also \{, \}, \'88 |
200 | 9.08k | for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) |
201 | 5.99k | { |
202 | 5.99k | sal_uInt32 cAnsi = nNextCh; |
203 | 6.49k | while( 0xD == cAnsi ) |
204 | 498 | cAnsi = GetNextChar(); |
205 | 8.15k | while( 0xA == cAnsi ) |
206 | 2.15k | cAnsi = GetNextChar(); |
207 | | |
208 | 5.99k | if( '\\' == cAnsi && |
209 | 1.13k | '\'' == GetNextChar() ) |
210 | | // skip HexValue |
211 | 283 | GetHexValue(); |
212 | 5.99k | nNextCh = GetNextChar(); |
213 | 5.99k | } |
214 | 3.08k | ScanText(); |
215 | 3.08k | bNextCh = 0 == nNextCh; |
216 | 3.08k | } |
217 | 10.6k | break; |
218 | 659k | } |
219 | 659k | } |
220 | 64.6k | else if( SvParserState::Pending != eState ) |
221 | 64.6k | { |
222 | | // Bug 34631 - "\ " read on - Blank as character |
223 | | // eState = SvParserState::Error; |
224 | 64.6k | bNextCh = false; |
225 | 64.6k | } |
226 | 723k | break; |
227 | 1.05M | } |
228 | 1.05M | } |
229 | 1.05M | break; |
230 | | |
231 | 1.05M | case sal_Unicode(EOF): |
232 | 12.0k | eState = SvParserState::Accepted; |
233 | 12.0k | nRet = nNextCh; |
234 | 12.0k | break; |
235 | | |
236 | 155k | case '{': |
237 | 155k | { |
238 | 155k | if( 0 <= nOpenBrackets ) |
239 | 154k | { |
240 | 154k | RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() ); |
241 | 154k | aParserStates.push( aState ); |
242 | 154k | } |
243 | 155k | ++nOpenBrackets; |
244 | 155k | DBG_ASSERT( |
245 | 155k | static_cast<size_t>(nOpenBrackets) == aParserStates.size(), |
246 | 155k | "ParserStateStack unequal to bracket count" ); |
247 | 155k | nRet = nNextCh; |
248 | 155k | } |
249 | 155k | break; |
250 | | |
251 | 81.2k | case '}': |
252 | 81.2k | --nOpenBrackets; |
253 | 81.2k | if( 0 <= nOpenBrackets ) |
254 | 75.9k | { |
255 | 75.9k | aParserStates.pop(); |
256 | 75.9k | if( !aParserStates.empty() ) |
257 | 75.4k | { |
258 | 75.4k | const RtfParserState_Impl& rRPS = |
259 | 75.4k | aParserStates.top(); |
260 | 75.4k | nUCharOverread = rRPS.nUCharOverread; |
261 | 75.4k | SetSrcEncoding( rRPS.eCodeSet ); |
262 | 75.4k | } |
263 | 502 | else |
264 | 502 | { |
265 | 502 | nUCharOverread = 1; |
266 | 502 | SetSrcEncoding( GetCodeSet() ); |
267 | 502 | } |
268 | 75.9k | } |
269 | 81.2k | DBG_ASSERT( |
270 | 81.2k | static_cast<size_t>(nOpenBrackets) == aParserStates.size(), |
271 | 81.2k | "ParserStateStack unequal to bracket count" ); |
272 | 81.2k | nRet = nNextCh; |
273 | 81.2k | break; |
274 | | |
275 | 18.7k | case 0x0d: |
276 | 67.0k | case 0x0a: |
277 | 67.0k | break; |
278 | | |
279 | 936k | default: |
280 | | // now normal text follows |
281 | 936k | ScanText(); |
282 | 936k | nRet = RTF_TEXTTOKEN; |
283 | 936k | bNextCh = 0 == nNextCh; |
284 | 936k | break; |
285 | 2.31M | } |
286 | | |
287 | 2.31M | if( bNextCh ) |
288 | 1.11M | nNextCh = GetNextChar(); |
289 | | |
290 | 2.31M | } while( !nRet && SvParserState::Working == eState ); |
291 | 2.17M | return nRet; |
292 | 2.17M | } |
293 | | |
294 | | |
295 | | sal_Unicode SvRTFParser::GetHexValue() |
296 | 28.8k | { |
297 | | // collect Hex values |
298 | 28.8k | int n; |
299 | 28.8k | sal_Unicode nHexVal = 0; |
300 | | |
301 | 86.4k | for( n = 0; n < 2; ++n ) |
302 | 57.6k | { |
303 | 57.6k | nHexVal *= 16; |
304 | 57.6k | nNextCh = GetNextChar(); |
305 | 57.6k | if( nNextCh >= '0' && nNextCh <= '9' ) |
306 | 21.0k | nHexVal += (nNextCh - 48); |
307 | 36.6k | else if( nNextCh >= 'a' && nNextCh <= 'f' ) |
308 | 24.6k | nHexVal += (nNextCh - 87); |
309 | 11.9k | else if( nNextCh >= 'A' && nNextCh <= 'F' ) |
310 | 834 | nHexVal += (nNextCh - 55); |
311 | 57.6k | } |
312 | 28.8k | return nHexVal; |
313 | 28.8k | } |
314 | | |
315 | | void SvRTFParser::ScanText() |
316 | 961k | { |
317 | 961k | const sal_Unicode cBreak = 0; |
318 | 961k | OUStringBuffer aStrBuffer; |
319 | 961k | bool bContinue = true; |
320 | 4.39M | while( bContinue && IsParserWorking() && aStrBuffer.getLength() < MAX_STRING_LEN) |
321 | 3.44M | { |
322 | 3.44M | bool bNextCh = true; |
323 | 3.44M | switch( nNextCh ) |
324 | 3.44M | { |
325 | 434k | case '\\': |
326 | 434k | { |
327 | 434k | nNextCh = GetNextChar(); |
328 | 434k | switch (nNextCh) |
329 | 434k | { |
330 | 9.49k | case '\'': |
331 | 9.49k | { |
332 | | |
333 | 9.49k | OStringBuffer aByteString; |
334 | 27.4k | while (true) |
335 | 27.4k | { |
336 | 27.4k | char c = static_cast<char>(GetHexValue()); |
337 | | /* |
338 | | * Note: \'00 is a valid internal character in a |
339 | | * string in RTF. OStringBuffer supports |
340 | | * appending nulls fine |
341 | | */ |
342 | 27.4k | aByteString.append(c); |
343 | | |
344 | 27.4k | bool bBreak = false; |
345 | 27.4k | bool bEOF = false; |
346 | 27.4k | char nSlash = '\\'; |
347 | 236k | while (!bBreak) |
348 | 208k | { |
349 | 208k | auto next = GetNextChar(); |
350 | 208k | if (sal_Unicode(EOF) == next) |
351 | 416 | { |
352 | 416 | bEOF = true; |
353 | 416 | break; |
354 | 416 | } |
355 | 208k | if (next>0xFF) // fix for #i43933# and #i35653# |
356 | 3.08k | { |
357 | 3.08k | if (!aByteString.isEmpty()) |
358 | 1.65k | { |
359 | 1.65k | aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) ); |
360 | 1.65k | aByteString.setLength(0); |
361 | 1.65k | } |
362 | 3.08k | aStrBuffer.append(static_cast<sal_Unicode>(next)); |
363 | | |
364 | 3.08k | continue; |
365 | 3.08k | } |
366 | 205k | nSlash = static_cast<char>(next); |
367 | 207k | while (nSlash == 0xD || nSlash == 0xA) |
368 | 2.26k | nSlash = static_cast<char>(GetNextChar()); |
369 | | |
370 | 205k | switch (nSlash) |
371 | 205k | { |
372 | 780 | case '{': |
373 | 2.68k | case '}': |
374 | 27.0k | case '\\': |
375 | 27.0k | bBreak = true; |
376 | 27.0k | break; |
377 | 178k | default: |
378 | 178k | aByteString.append(nSlash); |
379 | 178k | break; |
380 | 205k | } |
381 | 205k | } |
382 | | |
383 | 27.4k | if (bEOF) |
384 | 416 | { |
385 | 416 | bContinue = false; // abort, string together |
386 | 416 | break; |
387 | 416 | } |
388 | | |
389 | 27.0k | nNextCh = GetNextChar(); |
390 | | |
391 | 27.0k | if (nSlash != '\\' || nNextCh != '\'') |
392 | 9.07k | { |
393 | 9.07k | rInput.SeekRel(-1); |
394 | 9.07k | nNextCh = static_cast<unsigned char>(nSlash); |
395 | 9.07k | break; |
396 | 9.07k | } |
397 | 27.0k | } |
398 | | |
399 | 9.49k | bNextCh = false; |
400 | | |
401 | 9.49k | if (!aByteString.isEmpty()) |
402 | 9.27k | { |
403 | 9.27k | aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) ); |
404 | 9.27k | aByteString.setLength(0); |
405 | 9.27k | } |
406 | 9.49k | } |
407 | 0 | break; |
408 | 30.8k | case '\\': |
409 | 36.1k | case '}': |
410 | 48.0k | case '{': |
411 | 48.4k | case '+': // I found in a RTF file |
412 | 48.4k | aStrBuffer.append(sal_Unicode(nNextCh)); |
413 | 48.4k | break; |
414 | 779 | case '~': // nonbreaking space |
415 | 779 | aStrBuffer.append(u'\x00A0'); |
416 | 779 | break; |
417 | 1.30k | case '-': // optional hyphen |
418 | 1.30k | aStrBuffer.append(u'\x00AD'); |
419 | 1.30k | break; |
420 | 283 | case '_': // nonbreaking hyphen |
421 | 283 | aStrBuffer.append(u'\x2011'); |
422 | 283 | break; |
423 | | |
424 | 15.7k | case 'u': |
425 | | // read UNI-Code characters |
426 | 15.7k | { |
427 | 15.7k | nNextCh = GetNextChar(); |
428 | 15.7k | rInput.SeekRel( -2 ); |
429 | | |
430 | 15.7k | if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) ) |
431 | 7.59k | { |
432 | 7.59k | bRTF_InTextRead = true; |
433 | | |
434 | 7.59k | OUString sSave( aToken ); // GetNextToken_() overwrites this |
435 | 7.59k | nNextCh = '\\'; |
436 | 7.59k | int nToken = GetNextToken_(); |
437 | 7.59k | DBG_ASSERT( RTF_U == nToken, "still not a UNI-Code character" ); |
438 | | // don't convert symbol chars |
439 | 7.59k | aStrBuffer.append(static_cast< sal_Unicode >(nTokenValue)); |
440 | | |
441 | | // overread the next n "RTF" characters. This |
442 | | // can be also \{, \}, \'88 |
443 | 13.5k | for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) |
444 | 5.90k | { |
445 | 5.90k | sal_Unicode cAnsi = nNextCh; |
446 | 6.16k | while( 0xD == cAnsi ) |
447 | 255 | cAnsi = GetNextChar(); |
448 | 6.50k | while( 0xA == cAnsi ) |
449 | 596 | cAnsi = GetNextChar(); |
450 | | |
451 | 5.90k | if( '\\' == cAnsi && |
452 | 1.59k | '\'' == GetNextChar() ) |
453 | | // skip HexValue |
454 | 1.07k | GetHexValue(); |
455 | 5.90k | nNextCh = GetNextChar(); |
456 | 5.90k | } |
457 | 7.59k | bNextCh = false; |
458 | 7.59k | aToken = sSave; |
459 | 7.59k | bRTF_InTextRead = false; |
460 | 7.59k | } |
461 | 8.12k | else if ( 'c' == nNextCh ) |
462 | 2.52k | { |
463 | | // Prevent text breaking into multiple tokens. |
464 | 2.52k | rInput.SeekRel( 2 ); |
465 | 2.52k | nNextCh = GetNextChar(); |
466 | 2.52k | if (RTF_ISDIGIT( nNextCh )) |
467 | 2.26k | { |
468 | 2.26k | sal_uInt8 nNewOverread = 0 ; |
469 | 2.58k | do { |
470 | 2.58k | nNewOverread *= 10; |
471 | 2.58k | nNewOverread += nNextCh - '0'; |
472 | 2.58k | nNextCh = GetNextChar(); |
473 | 2.58k | } while ( RTF_ISDIGIT( nNextCh ) ); |
474 | 2.26k | nUCharOverread = nNewOverread; |
475 | 2.26k | if (!aParserStates.empty()) |
476 | 2.21k | aParserStates.top().nUCharOverread = nNewOverread; |
477 | 2.26k | } |
478 | 2.52k | bNextCh = 0x20 == nNextCh; |
479 | 2.52k | } |
480 | 5.60k | else |
481 | 5.60k | { |
482 | 5.60k | nNextCh = '\\'; |
483 | 5.60k | bContinue = false; // abort, string together |
484 | 5.60k | } |
485 | 15.7k | } |
486 | 15.7k | break; |
487 | | |
488 | 358k | default: |
489 | 358k | rInput.SeekRel( -1 ); |
490 | 358k | nNextCh = '\\'; |
491 | 358k | bContinue = false; // abort, string together |
492 | 358k | break; |
493 | 434k | } |
494 | 434k | } |
495 | 434k | break; |
496 | | |
497 | 434k | case sal_Unicode(EOF): |
498 | 632 | eState = SvParserState::Error; |
499 | 632 | [[fallthrough]]; |
500 | 47.9k | case '{': |
501 | 102k | case '}': |
502 | 102k | bContinue = false; |
503 | 102k | break; |
504 | | |
505 | 52.7k | case 0x0a: |
506 | 59.5k | case 0x0d: |
507 | 59.5k | break; |
508 | | |
509 | 2.84M | default: |
510 | 2.84M | if( nNextCh == cBreak || aStrBuffer.getLength() >= MAX_STRING_LEN) |
511 | 486k | bContinue = false; |
512 | 2.35M | else |
513 | 2.35M | { |
514 | 4.91M | do { |
515 | | // all other characters end up in the text |
516 | 4.91M | aStrBuffer.appendUtf32(nNextCh); |
517 | | |
518 | 4.91M | if (sal_Unicode(EOF) == (nNextCh = GetNextChar())) |
519 | 5.43k | { |
520 | 5.43k | if (!aStrBuffer.isEmpty()) |
521 | 5.43k | aToken.append( aStrBuffer ); |
522 | 5.43k | return; |
523 | 5.43k | } |
524 | 4.91M | } while |
525 | 2.35M | ( |
526 | 4.91M | (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) && |
527 | 2.56M | (aStrBuffer.getLength() < MAX_STRING_LEN) |
528 | 2.35M | ); |
529 | 2.35M | bNextCh = false; |
530 | 2.35M | } |
531 | 3.44M | } |
532 | | |
533 | 3.43M | if( bContinue && bNextCh ) |
534 | 110k | nNextCh = GetNextChar(); |
535 | 3.43M | } |
536 | | |
537 | 956k | if (!aStrBuffer.isEmpty()) |
538 | 552k | aToken.append( aStrBuffer ); |
539 | 956k | } |
540 | | |
541 | | |
542 | | short SvRTFParser::_inSkipGroup=0; |
543 | | |
544 | | void SvRTFParser::SkipGroup() |
545 | 8.81k | { |
546 | 8.81k | short nBrackets=1; |
547 | 8.81k | if (_inSkipGroup>0) |
548 | 0 | return; |
549 | 8.81k | _inSkipGroup++; |
550 | | //#i16185# faking \bin keyword |
551 | 8.81k | do |
552 | 62.1k | { |
553 | 62.1k | switch (nNextCh) |
554 | 62.1k | { |
555 | 6.73k | case '{': |
556 | 6.73k | ++nBrackets; |
557 | 6.73k | break; |
558 | 10.9k | case '}': |
559 | 10.9k | if (!--nBrackets) { |
560 | 5.40k | _inSkipGroup--; |
561 | 5.40k | return; |
562 | 5.40k | } |
563 | 5.57k | break; |
564 | 62.1k | } |
565 | 56.7k | int nToken = GetNextToken_(); |
566 | 56.7k | if (nToken == RTF_BIN) |
567 | 410 | { |
568 | 410 | rInput.SeekRel(-1); |
569 | 410 | SAL_WARN_IF(nTokenValue < 0, "svtools", "negative value argument for rtf \\bin keyword"); |
570 | 410 | if (nTokenValue > 0) |
571 | 237 | rInput.SeekRel(nTokenValue); |
572 | 410 | nNextCh = GetNextChar(); |
573 | 410 | } |
574 | 59.2k | while (nNextCh==0xa || nNextCh==0xd) |
575 | 2.53k | { |
576 | 2.53k | nNextCh = GetNextChar(); |
577 | 2.53k | } |
578 | 56.7k | } while (sal_Unicode(EOF) != nNextCh && IsParserWorking()); |
579 | | |
580 | 3.41k | if( SvParserState::Pending != eState && '}' != nNextCh ) |
581 | 3.16k | eState = SvParserState::Error; |
582 | 3.41k | _inSkipGroup--; |
583 | 3.41k | } |
584 | | |
585 | 2.65k | void SvRTFParser::ReadUnknownData() { SkipGroup(); } |
586 | 32 | void SvRTFParser::ReadBitmapData() { SkipGroup(); } |
587 | | |
588 | | |
589 | | SvParserState SvRTFParser::CallParser() |
590 | 9.66k | { |
591 | 9.66k | char cFirstCh(0); |
592 | 9.66k | nNextChPos = rInput.Tell(); |
593 | 9.66k | rInput.ReadChar( cFirstCh ); |
594 | 9.66k | nNextCh = static_cast<unsigned char>(cFirstCh); |
595 | 9.66k | eState = SvParserState::Working; |
596 | 9.66k | nOpenBrackets = 0; |
597 | 9.66k | eCodeSet = RTL_TEXTENCODING_MS_1252; |
598 | 9.66k | SetSrcEncoding( eCodeSet ); |
599 | | |
600 | | // the first two tokens should be '{' and \\rtf !! |
601 | 9.66k | if( '{' == GetNextToken() && RTF_RTF == GetNextToken() ) |
602 | 9.51k | { |
603 | 9.51k | AddFirstRef(); |
604 | | // call ReleaseRef at end of this scope, even in the face of exceptions |
605 | 9.51k | comphelper::ScopeGuard g([this] { |
606 | 9.51k | if( SvParserState::Pending != eState ) |
607 | 9.51k | ReleaseRef(); // now parser is not needed anymore |
608 | 9.51k | }); |
609 | 9.51k | Continue( 0 ); |
610 | 9.51k | } |
611 | 150 | else |
612 | 150 | eState = SvParserState::Error; |
613 | | |
614 | 9.66k | return eState; |
615 | 9.66k | } |
616 | | |
617 | | void SvRTFParser::Continue( int nToken ) |
618 | 9.51k | { |
619 | | // DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(), |
620 | | // "Characterset was changed." ); |
621 | | |
622 | 9.51k | if( !nToken ) |
623 | 9.51k | nToken = GetNextToken(); |
624 | | |
625 | 9.51k | bool bLooping = false; |
626 | | |
627 | 1.68M | while (IsParserWorking() && !bLooping) |
628 | 1.67M | { |
629 | 1.67M | auto nCurrentTokenIndex = m_nTokenIndex; |
630 | 1.67M | auto nCurrentToken = nToken; |
631 | | |
632 | 1.67M | SaveState( nToken ); |
633 | 1.67M | switch( nToken ) |
634 | 1.67M | { |
635 | 47.3k | case '}': |
636 | 47.3k | if( nOpenBrackets ) |
637 | 46.9k | goto NEXTTOKEN; |
638 | 387 | eState = SvParserState::Accepted; |
639 | 387 | break; |
640 | | |
641 | 94.1k | case '{': |
642 | | // an unknown group ? |
643 | 94.1k | { |
644 | 94.1k | if( RTF_IGNOREFLAG != GetNextToken() ) |
645 | 90.5k | nToken = SkipToken(); |
646 | 3.56k | else if( RTF_UNKNOWNCONTROL != GetNextToken() ) |
647 | 2.30k | nToken = SkipToken( -2 ); |
648 | 1.25k | else |
649 | 1.25k | { |
650 | | // filter immediately |
651 | 1.25k | ReadUnknownData(); |
652 | 1.25k | nToken = GetNextToken(); |
653 | 1.25k | if( '}' != nToken ) |
654 | 41 | eState = SvParserState::Error; |
655 | 1.25k | break; // move to next token!! |
656 | 1.25k | } |
657 | 94.1k | } |
658 | 92.8k | goto NEXTTOKEN; |
659 | | |
660 | 92.8k | case RTF_UNKNOWNCONTROL: |
661 | 85.8k | break; // skip unknown token |
662 | 0 | case RTF_NEXTTYPE: |
663 | 1.21k | case RTF_ANSITYPE: |
664 | 1.21k | eCodeSet = RTL_TEXTENCODING_MS_1252; |
665 | 1.21k | SetSrcEncoding( eCodeSet ); |
666 | 1.21k | break; |
667 | 314 | case RTF_MACTYPE: |
668 | 314 | eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN; |
669 | 314 | SetSrcEncoding( eCodeSet ); |
670 | 314 | break; |
671 | 397 | case RTF_PCTYPE: |
672 | 397 | eCodeSet = RTL_TEXTENCODING_IBM_437; |
673 | 397 | SetSrcEncoding( eCodeSet ); |
674 | 397 | break; |
675 | 25 | case RTF_PCATYPE: |
676 | 25 | eCodeSet = RTL_TEXTENCODING_IBM_850; |
677 | 25 | SetSrcEncoding( eCodeSet ); |
678 | 25 | break; |
679 | 4.84k | case RTF_ANSICPG: |
680 | 4.84k | eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue); |
681 | 4.84k | SetSrcEncoding(eCodeSet); |
682 | 4.84k | break; |
683 | 1.43M | default: |
684 | 1.57M | NEXTTOKEN: |
685 | 1.57M | NextToken( nToken ); |
686 | 1.57M | break; |
687 | 1.67M | } |
688 | 1.67M | if( IsParserWorking() ) |
689 | 1.67M | SaveState( 0 ); // processed till here, |
690 | | // continue with new token! |
691 | 1.67M | nToken = GetNextToken(); |
692 | 1.67M | bLooping = nCurrentTokenIndex == m_nTokenIndex && nToken == nCurrentToken; |
693 | 1.67M | } |
694 | 9.49k | if( SvParserState::Accepted == eState && 0 < nOpenBrackets ) |
695 | 8.25k | eState = SvParserState::Error; |
696 | 9.49k | } |
697 | | |
698 | | void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc ) |
699 | 49.9k | { |
700 | 49.9k | if (eEnc == RTL_TEXTENCODING_DONTKNOW) |
701 | 28.1k | eEnc = GetCodeSet(); |
702 | | |
703 | 49.9k | if (!aParserStates.empty()) |
704 | 49.6k | aParserStates.top().eCodeSet = eEnc; |
705 | 49.9k | SetSrcEncoding(eEnc); |
706 | 49.9k | } |
707 | | |
708 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |