/src/libreoffice/svtools/source/svrtf/parrtf.cxx
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <sal/config.h> |
21 | | #include <sal/log.hxx> |
22 | | |
23 | | #include <comphelper/scopeguard.hxx> |
24 | | |
25 | | #include <rtl/character.hxx> |
26 | | #include <rtl/strbuf.hxx> |
27 | | #include <rtl/tencinfo.h> |
28 | | #include <rtl/ustrbuf.hxx> |
29 | | #include <tools/stream.hxx> |
30 | | #include <tools/debug.hxx> |
31 | | #include <svtools/rtftoken.h> |
32 | | #include <svtools/parrtf.hxx> |
33 | | |
34 | | const int MAX_STRING_LEN = 1024; |
35 | | |
36 | 4.92M | #define RTF_ISDIGIT( c ) rtl::isAsciiDigit(c) |
37 | 13.5M | #define RTF_ISALPHA( c ) rtl::isAsciiAlpha(c) |
38 | | |
39 | | SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize ) |
40 | 10.4k | : SvParser<int>( rIn, nStackSize ) |
41 | 10.4k | , nOpenBrackets(0) |
42 | 10.4k | , nUPRLevel(0) |
43 | 10.4k | , eCodeSet(RTL_TEXTENCODING_MS_1252) |
44 | 10.4k | , nUCharOverread(1) |
45 | 10.4k | { |
46 | | // default is ANSI-CodeSet |
47 | 10.4k | SetSrcEncoding( RTL_TEXTENCODING_MS_1252 ); |
48 | 10.4k | bRTF_InTextRead = false; |
49 | 10.4k | } |
50 | | |
51 | | SvRTFParser::~SvRTFParser() |
52 | 10.4k | { |
53 | 10.4k | } |
54 | | |
55 | | |
56 | | int SvRTFParser::GetNextToken_() |
57 | 2.32M | { |
58 | 2.32M | int nRet = 0; |
59 | 2.46M | do { |
60 | 2.46M | bool bNextCh = true; |
61 | 2.46M | switch( nNextCh ) |
62 | 2.46M | { |
63 | 1.10M | case '\\': |
64 | 1.10M | { |
65 | | // control characters |
66 | 1.10M | nNextCh = GetNextChar(); |
67 | 1.10M | switch( nNextCh ) |
68 | 1.10M | { |
69 | 3.53k | case '{': |
70 | 3.94k | case '}': |
71 | 15.6k | case '\\': |
72 | 15.8k | case '+': // I found it in a RTF-file |
73 | 16.8k | case '~': // nonbreaking space |
74 | 17.4k | case '-': // optional hyphen |
75 | 17.5k | case '_': // nonbreaking hyphen |
76 | 22.9k | case '\'': // HexValue |
77 | 22.9k | nNextCh = '\\'; |
78 | 22.9k | rInput.SeekRel( -1 ); |
79 | 22.9k | ScanText(); |
80 | 22.9k | nRet = RTF_TEXTTOKEN; |
81 | 22.9k | bNextCh = 0 == nNextCh; |
82 | 22.9k | break; |
83 | | |
84 | 12.5k | case '*': // ignoreflag |
85 | 12.5k | nRet = RTF_IGNOREFLAG; |
86 | 12.5k | break; |
87 | 1.82k | case ':': // subentry in an index entry |
88 | 1.82k | nRet = RTF_SUBENTRYINDEX; |
89 | 1.82k | break; |
90 | 238 | case '|': // formula-character |
91 | 238 | nRet = RTF_FORMULA; |
92 | 238 | break; |
93 | | |
94 | 324k | case 0x0a: |
95 | 335k | case 0x0d: |
96 | 335k | nRet = RTF_PAR; |
97 | 335k | break; |
98 | | |
99 | 727k | default: |
100 | 727k | if( RTF_ISALPHA( nNextCh ) ) |
101 | 660k | { |
102 | 660k | aToken = "\\"; |
103 | 660k | { |
104 | 2.42M | do { |
105 | 2.42M | aToken.appendUtf32(nNextCh); |
106 | 2.42M | nNextCh = GetNextChar(); |
107 | 2.42M | } while( RTF_ISALPHA( nNextCh ) ); |
108 | 660k | } |
109 | | |
110 | | // minus before numeric parameters |
111 | 660k | bool bNegValue = false; |
112 | 660k | if( '-' == nNextCh ) |
113 | 20.4k | { |
114 | 20.4k | bNegValue = true; |
115 | 20.4k | nNextCh = GetNextChar(); |
116 | 20.4k | } |
117 | | |
118 | | // possible numeric parameter |
119 | 660k | if( RTF_ISDIGIT( nNextCh ) ) |
120 | 266k | { |
121 | 266k | OUStringBuffer aNumber; |
122 | 578k | do { |
123 | 578k | aNumber.append(static_cast<sal_Unicode>(nNextCh)); |
124 | 578k | nNextCh = GetNextChar(); |
125 | 578k | } while( RTF_ISDIGIT( nNextCh ) ); |
126 | 266k | nTokenValue = OUString::unacquired(aNumber).toInt32(); |
127 | 266k | if( bNegValue ) |
128 | 10.4k | nTokenValue = -nTokenValue; |
129 | 266k | bTokenHasValue=true; |
130 | 266k | } |
131 | 393k | else if( bNegValue ) // restore minus |
132 | 9.96k | { |
133 | 9.96k | nNextCh = '-'; |
134 | 9.96k | rInput.SeekRel( -1 ); |
135 | 9.96k | } |
136 | 660k | if( ' ' == nNextCh ) // blank is part of token! |
137 | 78.5k | nNextCh = GetNextChar(); |
138 | | |
139 | | // search for the token in the table: |
140 | 660k | if( 0 == (nRet = GetRTFToken( aToken )) ) |
141 | | // Unknown Control |
142 | 100k | nRet = RTF_UNKNOWNCONTROL; |
143 | | |
144 | | // bug 76812 - unicode token handled as normal text |
145 | 660k | bNextCh = false; |
146 | 660k | switch( nRet ) |
147 | 660k | { |
148 | 2.29k | case RTF_UC: |
149 | 2.29k | if( 0 <= nTokenValue ) |
150 | 1.42k | { |
151 | 1.42k | nUCharOverread = static_cast<sal_uInt8>(nTokenValue); |
152 | 1.42k | if (!aParserStates.empty()) |
153 | 1.10k | { |
154 | | //cmc: other ifdef breaks #i3584 |
155 | 1.10k | aParserStates.top().nUCharOverread = nUCharOverread; |
156 | 1.10k | } |
157 | 1.42k | } |
158 | 2.29k | aToken.setLength( 0 ); // #i47831# erase token to prevent the token from being treated as text |
159 | | // read next token |
160 | 2.29k | nRet = 0; |
161 | 2.29k | break; |
162 | | |
163 | 4.56k | case RTF_UPR: |
164 | 4.56k | if (!_inSkipGroup) |
165 | 3.73k | { |
166 | 3.73k | if (nUPRLevel > 256) // fairly sure > 1 is probably an error, but provide some leeway |
167 | 25 | { |
168 | 25 | SAL_WARN("svtools", "urp stack too deep"); |
169 | 25 | eState = SvParserState::Error; |
170 | 25 | break; |
171 | 25 | } |
172 | | |
173 | 3.71k | ++nUPRLevel; |
174 | | |
175 | | // UPR - overread the group with the ansi |
176 | | // information |
177 | 3.71k | int nNextToken; |
178 | 3.71k | do |
179 | 7.70k | { |
180 | 7.70k | nNextToken = GetNextToken_(); |
181 | 7.70k | } |
182 | 7.70k | while (nNextToken != '{' && nNextToken != sal_Unicode(EOF) && IsParserWorking()); |
183 | | |
184 | 3.71k | SkipGroup(); |
185 | 3.71k | GetNextToken_(); // overread the last bracket |
186 | 3.71k | nRet = 0; |
187 | | |
188 | 3.71k | --nUPRLevel; |
189 | 3.71k | } |
190 | 4.53k | break; |
191 | | |
192 | 10.2k | case RTF_U: |
193 | 10.2k | if( !bRTF_InTextRead ) |
194 | 2.68k | { |
195 | 2.68k | nRet = RTF_TEXTTOKEN; |
196 | 2.68k | aToken = OUStringChar( static_cast<sal_Unicode>(nTokenValue) ); |
197 | | |
198 | | // overread the next n "RTF" characters. This |
199 | | // can be also \{, \}, \'88 |
200 | 10.2k | for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) |
201 | 7.55k | { |
202 | 7.55k | sal_uInt32 cAnsi = nNextCh; |
203 | 8.19k | while( 0xD == cAnsi ) |
204 | 643 | cAnsi = GetNextChar(); |
205 | 9.98k | while( 0xA == cAnsi ) |
206 | 2.42k | cAnsi = GetNextChar(); |
207 | | |
208 | 7.55k | if( '\\' == cAnsi && |
209 | 7.55k | '\'' == GetNextChar() ) |
210 | | // skip HexValue |
211 | 241 | GetHexValue(); |
212 | 7.55k | nNextCh = GetNextChar(); |
213 | 7.55k | } |
214 | 2.68k | ScanText(); |
215 | 2.68k | bNextCh = 0 == nNextCh; |
216 | 2.68k | } |
217 | 10.2k | break; |
218 | 660k | } |
219 | 660k | } |
220 | 66.4k | else if( SvParserState::Pending != eState ) |
221 | 66.4k | { |
222 | | // Bug 34631 - "\ " read on - Blank as character |
223 | | // eState = SvParserState::Error; |
224 | 66.4k | bNextCh = false; |
225 | 66.4k | } |
226 | 727k | break; |
227 | 1.10M | } |
228 | 1.10M | } |
229 | 1.10M | break; |
230 | | |
231 | 1.10M | case sal_Unicode(EOF): |
232 | 12.7k | eState = SvParserState::Accepted; |
233 | 12.7k | nRet = nNextCh; |
234 | 12.7k | break; |
235 | | |
236 | 179k | case '{': |
237 | 179k | { |
238 | 179k | if( 0 <= nOpenBrackets ) |
239 | 177k | { |
240 | 177k | RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() ); |
241 | 177k | aParserStates.push( aState ); |
242 | 177k | } |
243 | 179k | ++nOpenBrackets; |
244 | 179k | DBG_ASSERT( |
245 | 179k | static_cast<size_t>(nOpenBrackets) == aParserStates.size(), |
246 | 179k | "ParserStateStack unequal to bracket count" ); |
247 | 179k | nRet = nNextCh; |
248 | 179k | } |
249 | 179k | break; |
250 | | |
251 | 79.7k | case '}': |
252 | 79.7k | --nOpenBrackets; |
253 | 79.7k | if( 0 <= nOpenBrackets ) |
254 | 73.8k | { |
255 | 73.8k | aParserStates.pop(); |
256 | 73.8k | if( !aParserStates.empty() ) |
257 | 73.3k | { |
258 | 73.3k | const RtfParserState_Impl& rRPS = |
259 | 73.3k | aParserStates.top(); |
260 | 73.3k | nUCharOverread = rRPS.nUCharOverread; |
261 | 73.3k | SetSrcEncoding( rRPS.eCodeSet ); |
262 | 73.3k | } |
263 | 500 | else |
264 | 500 | { |
265 | 500 | nUCharOverread = 1; |
266 | 500 | SetSrcEncoding( GetCodeSet() ); |
267 | 500 | } |
268 | 73.8k | } |
269 | 79.7k | DBG_ASSERT( |
270 | 79.7k | static_cast<size_t>(nOpenBrackets) == aParserStates.size(), |
271 | 79.7k | "ParserStateStack unequal to bracket count" ); |
272 | 79.7k | nRet = nNextCh; |
273 | 79.7k | break; |
274 | | |
275 | 18.5k | case 0x0d: |
276 | 66.6k | case 0x0a: |
277 | 66.6k | break; |
278 | | |
279 | 1.02M | default: |
280 | | // now normal text follows |
281 | 1.02M | ScanText(); |
282 | 1.02M | nRet = RTF_TEXTTOKEN; |
283 | 1.02M | bNextCh = 0 == nNextCh; |
284 | 1.02M | break; |
285 | 2.46M | } |
286 | | |
287 | 2.46M | if( bNextCh ) |
288 | 1.23M | nNextCh = GetNextChar(); |
289 | | |
290 | 2.46M | } while( !nRet && SvParserState::Working == eState ); |
291 | 2.32M | return nRet; |
292 | 2.32M | } |
293 | | |
294 | | |
295 | | sal_Unicode SvRTFParser::GetHexValue() |
296 | 27.7k | { |
297 | | // collect Hex values |
298 | 27.7k | int n; |
299 | 27.7k | sal_Unicode nHexVal = 0; |
300 | | |
301 | 83.3k | for( n = 0; n < 2; ++n ) |
302 | 55.5k | { |
303 | 55.5k | nHexVal *= 16; |
304 | 55.5k | nNextCh = GetNextChar(); |
305 | 55.5k | if( nNextCh >= '0' && nNextCh <= '9' ) |
306 | 18.9k | nHexVal += (nNextCh - 48); |
307 | 36.5k | else if( nNextCh >= 'a' && nNextCh <= 'f' ) |
308 | 24.0k | nHexVal += (nNextCh - 87); |
309 | 12.5k | else if( nNextCh >= 'A' && nNextCh <= 'F' ) |
310 | 943 | nHexVal += (nNextCh - 55); |
311 | 55.5k | } |
312 | 27.7k | return nHexVal; |
313 | 27.7k | } |
314 | | |
315 | | void SvRTFParser::ScanText() |
316 | 1.05M | { |
317 | 1.05M | const sal_Unicode cBreak = 0; |
318 | 1.05M | OUStringBuffer aStrBuffer; |
319 | 1.05M | bool bContinue = true; |
320 | 4.69M | while( bContinue && IsParserWorking() && aStrBuffer.getLength() < MAX_STRING_LEN) |
321 | 3.65M | { |
322 | 3.65M | bool bNextCh = true; |
323 | 3.65M | switch( nNextCh ) |
324 | 3.65M | { |
325 | 465k | case '\\': |
326 | 465k | { |
327 | 465k | nNextCh = GetNextChar(); |
328 | 465k | switch (nNextCh) |
329 | 465k | { |
330 | 9.12k | case '\'': |
331 | 9.12k | { |
332 | | |
333 | 9.12k | OStringBuffer aByteString; |
334 | 26.4k | while (true) |
335 | 26.4k | { |
336 | 26.4k | char c = static_cast<char>(GetHexValue()); |
337 | | /* |
338 | | * Note: \'00 is a valid internal character in a |
339 | | * string in RTF. OStringBuffer supports |
340 | | * appending nulls fine |
341 | | */ |
342 | 26.4k | aByteString.append(c); |
343 | | |
344 | 26.4k | bool bBreak = false; |
345 | 26.4k | bool bEOF = false; |
346 | 26.4k | char nSlash = '\\'; |
347 | 215k | while (!bBreak) |
348 | 189k | { |
349 | 189k | auto next = GetNextChar(); |
350 | 189k | if (sal_Unicode(EOF) == next) |
351 | 486 | { |
352 | 486 | bEOF = true; |
353 | 486 | break; |
354 | 486 | } |
355 | 189k | if (next>0xFF) // fix for #i43933# and #i35653# |
356 | 3.59k | { |
357 | 3.59k | if (!aByteString.isEmpty()) |
358 | 1.80k | { |
359 | 1.80k | aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) ); |
360 | 1.80k | aByteString.setLength(0); |
361 | 1.80k | } |
362 | 3.59k | aStrBuffer.append(static_cast<sal_Unicode>(next)); |
363 | | |
364 | 3.59k | continue; |
365 | 3.59k | } |
366 | 185k | nSlash = static_cast<char>(next); |
367 | 188k | while (nSlash == 0xD || nSlash == 0xA) |
368 | 2.09k | nSlash = static_cast<char>(GetNextChar()); |
369 | | |
370 | 185k | switch (nSlash) |
371 | 185k | { |
372 | 803 | case '{': |
373 | 2.58k | case '}': |
374 | 25.9k | case '\\': |
375 | 25.9k | bBreak = true; |
376 | 25.9k | break; |
377 | 159k | default: |
378 | 159k | aByteString.append(nSlash); |
379 | 159k | break; |
380 | 185k | } |
381 | 185k | } |
382 | | |
383 | 26.4k | if (bEOF) |
384 | 486 | { |
385 | 486 | bContinue = false; // abort, string together |
386 | 486 | break; |
387 | 486 | } |
388 | | |
389 | 25.9k | nNextCh = GetNextChar(); |
390 | | |
391 | 25.9k | if (nSlash != '\\' || nNextCh != '\'') |
392 | 8.63k | { |
393 | 8.63k | rInput.SeekRel(-1); |
394 | 8.63k | nNextCh = static_cast<unsigned char>(nSlash); |
395 | 8.63k | break; |
396 | 8.63k | } |
397 | 25.9k | } |
398 | | |
399 | 9.12k | bNextCh = false; |
400 | | |
401 | 9.12k | if (!aByteString.isEmpty()) |
402 | 8.88k | { |
403 | 8.88k | aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) ); |
404 | 8.88k | aByteString.setLength(0); |
405 | 8.88k | } |
406 | 9.12k | } |
407 | 0 | break; |
408 | 34.8k | case '\\': |
409 | 40.3k | case '}': |
410 | 52.4k | case '{': |
411 | 52.8k | case '+': // I found in a RTF file |
412 | 52.8k | aStrBuffer.append(sal_Unicode(nNextCh)); |
413 | 52.8k | break; |
414 | 758 | case '~': // nonbreaking space |
415 | 758 | aStrBuffer.append(u'\x00A0'); |
416 | 758 | break; |
417 | 1.35k | case '-': // optional hyphen |
418 | 1.35k | aStrBuffer.append(u'\x00AD'); |
419 | 1.35k | break; |
420 | 341 | case '_': // nonbreaking hyphen |
421 | 341 | aStrBuffer.append(u'\x2011'); |
422 | 341 | break; |
423 | | |
424 | 16.3k | case 'u': |
425 | | // read UNI-Code characters |
426 | 16.3k | { |
427 | 16.3k | nNextCh = GetNextChar(); |
428 | 16.3k | rInput.SeekRel( -2 ); |
429 | | |
430 | 16.3k | if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) ) |
431 | 7.52k | { |
432 | 7.52k | bRTF_InTextRead = true; |
433 | | |
434 | 7.52k | OUString sSave( aToken ); // GetNextToken_() overwrites this |
435 | 7.52k | nNextCh = '\\'; |
436 | 7.52k | int nToken = GetNextToken_(); |
437 | 7.52k | DBG_ASSERT( RTF_U == nToken, "still not a UNI-Code character" ); |
438 | | // don't convert symbol chars |
439 | 7.52k | aStrBuffer.append(static_cast< sal_Unicode >(nTokenValue)); |
440 | | |
441 | | // overread the next n "RTF" characters. This |
442 | | // can be also \{, \}, \'88 |
443 | 14.8k | for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) |
444 | 7.35k | { |
445 | 7.35k | sal_Unicode cAnsi = nNextCh; |
446 | 7.82k | while( 0xD == cAnsi ) |
447 | 464 | cAnsi = GetNextChar(); |
448 | 8.07k | while( 0xA == cAnsi ) |
449 | 717 | cAnsi = GetNextChar(); |
450 | | |
451 | 7.35k | if( '\\' == cAnsi && |
452 | 7.35k | '\'' == GetNextChar() ) |
453 | | // skip HexValue |
454 | 1.11k | GetHexValue(); |
455 | 7.35k | nNextCh = GetNextChar(); |
456 | 7.35k | } |
457 | 7.52k | bNextCh = false; |
458 | 7.52k | aToken = sSave; |
459 | 7.52k | bRTF_InTextRead = false; |
460 | 7.52k | } |
461 | 8.79k | else if ( 'c' == nNextCh ) |
462 | 2.86k | { |
463 | | // Prevent text breaking into multiple tokens. |
464 | 2.86k | rInput.SeekRel( 2 ); |
465 | 2.86k | nNextCh = GetNextChar(); |
466 | 2.86k | if (RTF_ISDIGIT( nNextCh )) |
467 | 2.42k | { |
468 | 2.42k | sal_uInt8 nNewOverread = 0 ; |
469 | 3.17k | do { |
470 | 3.17k | nNewOverread *= 10; |
471 | 3.17k | nNewOverread += nNextCh - '0'; |
472 | 3.17k | nNextCh = GetNextChar(); |
473 | 3.17k | } while ( RTF_ISDIGIT( nNextCh ) ); |
474 | 2.42k | nUCharOverread = nNewOverread; |
475 | 2.42k | if (!aParserStates.empty()) |
476 | 2.29k | aParserStates.top().nUCharOverread = nNewOverread; |
477 | 2.42k | } |
478 | 2.86k | bNextCh = 0x20 == nNextCh; |
479 | 2.86k | } |
480 | 5.93k | else |
481 | 5.93k | { |
482 | 5.93k | nNextCh = '\\'; |
483 | 5.93k | bContinue = false; // abort, string together |
484 | 5.93k | } |
485 | 16.3k | } |
486 | 16.3k | break; |
487 | | |
488 | 384k | default: |
489 | 384k | rInput.SeekRel( -1 ); |
490 | 384k | nNextCh = '\\'; |
491 | 384k | bContinue = false; // abort, string together |
492 | 384k | break; |
493 | 465k | } |
494 | 465k | } |
495 | 465k | break; |
496 | | |
497 | 465k | case sal_Unicode(EOF): |
498 | 742 | eState = SvParserState::Error; |
499 | 742 | [[fallthrough]]; |
500 | 50.1k | case '{': |
501 | 103k | case '}': |
502 | 103k | bContinue = false; |
503 | 103k | break; |
504 | | |
505 | 55.3k | case 0x0a: |
506 | 62.9k | case 0x0d: |
507 | 62.9k | break; |
508 | | |
509 | 3.02M | default: |
510 | 3.02M | if( nNextCh == cBreak || aStrBuffer.getLength() >= MAX_STRING_LEN) |
511 | 547k | bContinue = false; |
512 | 2.47M | else |
513 | 2.47M | { |
514 | 5.19M | do { |
515 | | // all other characters end up in the text |
516 | 5.19M | aStrBuffer.appendUtf32(nNextCh); |
517 | | |
518 | 5.19M | if (sal_Unicode(EOF) == (nNextCh = GetNextChar())) |
519 | 5.81k | { |
520 | 5.81k | if (!aStrBuffer.isEmpty()) |
521 | 5.81k | aToken.append( aStrBuffer ); |
522 | 5.81k | return; |
523 | 5.81k | } |
524 | 5.19M | } while |
525 | 2.47M | ( |
526 | 5.18M | (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) && |
527 | 5.18M | (aStrBuffer.getLength() < MAX_STRING_LEN) |
528 | 2.47M | ); |
529 | 2.46M | bNextCh = false; |
530 | 2.46M | } |
531 | 3.65M | } |
532 | | |
533 | 3.64M | if( bContinue && bNextCh ) |
534 | 118k | nNextCh = GetNextChar(); |
535 | 3.64M | } |
536 | | |
537 | 1.04M | if (!aStrBuffer.isEmpty()) |
538 | 587k | aToken.append( aStrBuffer ); |
539 | 1.04M | } |
540 | | |
541 | | |
542 | | short SvRTFParser::_inSkipGroup=0; |
543 | | |
544 | | void SvRTFParser::SkipGroup() |
545 | 9.18k | { |
546 | 9.18k | short nBrackets=1; |
547 | 9.18k | if (_inSkipGroup>0) |
548 | 0 | return; |
549 | 9.18k | _inSkipGroup++; |
550 | | //#i16185# faking \bin keyword |
551 | 9.18k | do |
552 | 87.9k | { |
553 | 87.9k | switch (nNextCh) |
554 | 87.9k | { |
555 | 31.7k | case '{': |
556 | 31.7k | ++nBrackets; |
557 | 31.7k | break; |
558 | 10.4k | case '}': |
559 | 10.4k | if (!--nBrackets) { |
560 | 5.57k | _inSkipGroup--; |
561 | 5.57k | return; |
562 | 5.57k | } |
563 | 4.86k | break; |
564 | 87.9k | } |
565 | 82.3k | int nToken = GetNextToken_(); |
566 | 82.3k | if (nToken == RTF_BIN) |
567 | 409 | { |
568 | 409 | rInput.SeekRel(-1); |
569 | 409 | SAL_WARN_IF(nTokenValue < 0, "svtools", "negative value argument for rtf \\bin keyword"); |
570 | 409 | if (nTokenValue > 0) |
571 | 236 | rInput.SeekRel(nTokenValue); |
572 | 409 | nNextCh = GetNextChar(); |
573 | 409 | } |
574 | 84.6k | while (nNextCh==0xa || nNextCh==0xd) |
575 | 2.29k | { |
576 | 2.29k | nNextCh = GetNextChar(); |
577 | 2.29k | } |
578 | 82.3k | } while (sal_Unicode(EOF) != nNextCh && IsParserWorking()); |
579 | | |
580 | 3.60k | if( SvParserState::Pending != eState && '}' != nNextCh ) |
581 | 3.36k | eState = SvParserState::Error; |
582 | 3.60k | _inSkipGroup--; |
583 | 3.60k | } |
584 | | |
585 | 2.73k | void SvRTFParser::ReadUnknownData() { SkipGroup(); } |
586 | 32 | void SvRTFParser::ReadBitmapData() { SkipGroup(); } |
587 | | |
588 | | |
589 | | SvParserState SvRTFParser::CallParser() |
590 | 10.4k | { |
591 | 10.4k | char cFirstCh(0); |
592 | 10.4k | nNextChPos = rInput.Tell(); |
593 | 10.4k | rInput.ReadChar( cFirstCh ); |
594 | 10.4k | nNextCh = static_cast<unsigned char>(cFirstCh); |
595 | 10.4k | eState = SvParserState::Working; |
596 | 10.4k | nOpenBrackets = 0; |
597 | 10.4k | eCodeSet = RTL_TEXTENCODING_MS_1252; |
598 | 10.4k | SetSrcEncoding( eCodeSet ); |
599 | | |
600 | | // the first two tokens should be '{' and \\rtf !! |
601 | 10.4k | if( '{' == GetNextToken() && RTF_RTF == GetNextToken() ) |
602 | 9.87k | { |
603 | 9.87k | AddFirstRef(); |
604 | | // call ReleaseRef at end of this scope, even in the face of exceptions |
605 | 9.87k | comphelper::ScopeGuard g([this] { |
606 | 9.87k | if( SvParserState::Pending != eState ) |
607 | 9.87k | ReleaseRef(); // now parser is not needed anymore |
608 | 9.87k | }); |
609 | 9.87k | Continue( 0 ); |
610 | 9.87k | } |
611 | 576 | else |
612 | 576 | eState = SvParserState::Error; |
613 | | |
614 | 10.4k | return eState; |
615 | 10.4k | } |
616 | | |
617 | | void SvRTFParser::Continue( int nToken ) |
618 | 9.87k | { |
619 | | // DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(), |
620 | | // "Characterset was changed." ); |
621 | | |
622 | 9.87k | if( !nToken ) |
623 | 9.87k | nToken = GetNextToken(); |
624 | | |
625 | 9.87k | bool bLooping = false; |
626 | | |
627 | 1.81M | while (IsParserWorking() && !bLooping) |
628 | 1.80M | { |
629 | 1.80M | auto nCurrentTokenIndex = m_nTokenIndex; |
630 | 1.80M | auto nCurrentToken = nToken; |
631 | | |
632 | 1.80M | SaveState( nToken ); |
633 | 1.80M | switch( nToken ) |
634 | 1.80M | { |
635 | 47.1k | case '}': |
636 | 47.1k | if( nOpenBrackets ) |
637 | 46.7k | goto NEXTTOKEN; |
638 | 369 | eState = SvParserState::Accepted; |
639 | 369 | break; |
640 | | |
641 | 96.3k | case '{': |
642 | | // an unknown group ? |
643 | 96.3k | { |
644 | 96.3k | if( RTF_IGNOREFLAG != GetNextToken() ) |
645 | 92.8k | nToken = SkipToken(); |
646 | 3.52k | else if( RTF_UNKNOWNCONTROL != GetNextToken() ) |
647 | 2.28k | nToken = SkipToken( -2 ); |
648 | 1.23k | else |
649 | 1.23k | { |
650 | | // filter immediately |
651 | 1.23k | ReadUnknownData(); |
652 | 1.23k | nToken = GetNextToken(); |
653 | 1.23k | if( '}' != nToken ) |
654 | 42 | eState = SvParserState::Error; |
655 | 1.23k | break; // move to next token!! |
656 | 1.23k | } |
657 | 96.3k | } |
658 | 95.1k | goto NEXTTOKEN; |
659 | | |
660 | 95.1k | case RTF_UNKNOWNCONTROL: |
661 | 87.3k | break; // skip unknown token |
662 | 0 | case RTF_NEXTTYPE: |
663 | 1.23k | case RTF_ANSITYPE: |
664 | 1.23k | eCodeSet = RTL_TEXTENCODING_MS_1252; |
665 | 1.23k | SetSrcEncoding( eCodeSet ); |
666 | 1.23k | break; |
667 | 323 | case RTF_MACTYPE: |
668 | 323 | eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN; |
669 | 323 | SetSrcEncoding( eCodeSet ); |
670 | 323 | break; |
671 | 403 | case RTF_PCTYPE: |
672 | 403 | eCodeSet = RTL_TEXTENCODING_IBM_437; |
673 | 403 | SetSrcEncoding( eCodeSet ); |
674 | 403 | break; |
675 | 25 | case RTF_PCATYPE: |
676 | 25 | eCodeSet = RTL_TEXTENCODING_IBM_850; |
677 | 25 | SetSrcEncoding( eCodeSet ); |
678 | 25 | break; |
679 | 4.97k | case RTF_ANSICPG: |
680 | 4.97k | eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue); |
681 | 4.97k | SetSrcEncoding(eCodeSet); |
682 | 4.97k | break; |
683 | 1.56M | default: |
684 | 1.70M | NEXTTOKEN: |
685 | 1.70M | NextToken( nToken ); |
686 | 1.70M | break; |
687 | 1.80M | } |
688 | 1.80M | if( IsParserWorking() ) |
689 | 1.80M | SaveState( 0 ); // processed till here, |
690 | | // continue with new token! |
691 | 1.80M | nToken = GetNextToken(); |
692 | 1.80M | bLooping = nCurrentTokenIndex == m_nTokenIndex && nToken == nCurrentToken; |
693 | 1.80M | } |
694 | 9.84k | if( SvParserState::Accepted == eState && 0 < nOpenBrackets ) |
695 | 8.61k | eState = SvParserState::Error; |
696 | 9.84k | } |
697 | | |
698 | | void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc ) |
699 | 50.9k | { |
700 | 50.9k | if (eEnc == RTL_TEXTENCODING_DONTKNOW) |
701 | 29.8k | eEnc = GetCodeSet(); |
702 | | |
703 | 50.9k | if (!aParserStates.empty()) |
704 | 50.6k | aParserStates.top().eCodeSet = eEnc; |
705 | 50.9k | SetSrcEncoding(eEnc); |
706 | 50.9k | } |
707 | | |
708 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |