/src/libreoffice/svtools/source/svrtf/parrtf.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <sal/config.h> |
21 | | #include <sal/log.hxx> |
22 | | |
23 | | #include <comphelper/scopeguard.hxx> |
24 | | |
25 | | #include <o3tl/numeric.hxx> |
26 | | #include <rtl/character.hxx> |
27 | | #include <rtl/strbuf.hxx> |
28 | | #include <rtl/tencinfo.h> |
29 | | #include <rtl/ustrbuf.hxx> |
30 | | #include <tools/stream.hxx> |
31 | | #include <tools/debug.hxx> |
32 | | #include <svtools/rtftoken.h> |
33 | | #include <svtools/parrtf.hxx> |
34 | | |
35 | | const int MAX_STRING_LEN = 1024; |
36 | | |
37 | 6.74M | #define RTF_ISDIGIT( c ) rtl::isAsciiDigit(c) |
38 | 18.4M | #define RTF_ISALPHA( c ) rtl::isAsciiAlpha(c) |
39 | | |
40 | | SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize ) |
41 | 11.5k | : SvParser<int>( rIn, nStackSize ) |
42 | 11.5k | , nOpenBrackets(0) |
43 | 11.5k | , nUPRLevel(0) |
44 | 11.5k | , eCodeSet(RTL_TEXTENCODING_MS_1252) |
45 | 11.5k | , nUCharOverread(1) |
46 | 11.5k | { |
47 | | // default is ANSI-CodeSet |
48 | 11.5k | SetSrcEncoding( RTL_TEXTENCODING_MS_1252 ); |
49 | 11.5k | bRTF_InTextRead = false; |
50 | 11.5k | } |
51 | | |
52 | | SvRTFParser::~SvRTFParser() |
53 | 11.5k | { |
54 | 11.5k | } |
55 | | |
56 | | |
57 | | int SvRTFParser::GetNextToken_() |
58 | 2.95M | { |
59 | 2.95M | int nRet = 0; |
60 | 3.14M | do { |
61 | 3.14M | bool bNextCh = true; |
62 | 3.14M | switch( nNextCh ) |
63 | 3.14M | { |
64 | 1.39M | case '\\': |
65 | 1.39M | { |
66 | | // control characters |
67 | 1.39M | nNextCh = GetNextChar(); |
68 | 1.39M | switch( nNextCh ) |
69 | 1.39M | { |
70 | 5.53k | case '{': |
71 | 6.33k | case '}': |
72 | 21.2k | case '\\': |
73 | 21.4k | case '+': // I found it in a RTF-file |
74 | 22.4k | case '~': // nonbreaking space |
75 | 23.1k | case '-': // optional hyphen |
76 | 23.3k | case '_': // nonbreaking hyphen |
77 | 34.3k | case '\'': // HexValue |
78 | 34.3k | nNextCh = '\\'; |
79 | 34.3k | rInput.SeekRel( -1 ); |
80 | 34.3k | ScanText(); |
81 | 34.3k | nRet = RTF_TEXTTOKEN; |
82 | 34.3k | bNextCh = 0 == nNextCh; |
83 | 34.3k | break; |
84 | | |
85 | 15.1k | case '*': // ignoreflag |
86 | 15.1k | nRet = RTF_IGNOREFLAG; |
87 | 15.1k | break; |
88 | 3.24k | case ':': // subentry in an index entry |
89 | 3.24k | nRet = RTF_SUBENTRYINDEX; |
90 | 3.24k | break; |
91 | 830 | case '|': // formula-character |
92 | 830 | nRet = RTF_FORMULA; |
93 | 830 | break; |
94 | | |
95 | 374k | case 0x0a: |
96 | 386k | case 0x0d: |
97 | 386k | nRet = RTF_PAR; |
98 | 386k | break; |
99 | | |
100 | 951k | default: |
101 | 951k | if( RTF_ISALPHA( nNextCh ) ) |
102 | 848k | { |
103 | 848k | aToken = "\\"; |
104 | 848k | { |
105 | 3.07M | do { |
106 | 3.07M | aToken.appendUtf32(nNextCh); |
107 | 3.07M | nNextCh = GetNextChar(); |
108 | 3.07M | } while( RTF_ISALPHA( nNextCh ) ); |
109 | 848k | } |
110 | | |
111 | | // minus before numeric parameters |
112 | 848k | bool bNegValue = false; |
113 | 848k | if( '-' == nNextCh ) |
114 | 26.6k | { |
115 | 26.6k | bNegValue = true; |
116 | 26.6k | nNextCh = GetNextChar(); |
117 | 26.6k | } |
118 | | |
119 | | // possible numeric parameter |
120 | 848k | if( RTF_ISDIGIT( nNextCh ) ) |
121 | 323k | { |
122 | 323k | OUStringBuffer aNumber; |
123 | 716k | do { |
124 | 716k | aNumber.append(static_cast<sal_Unicode>(nNextCh)); |
125 | 716k | nNextCh = GetNextChar(); |
126 | 716k | } while( RTF_ISDIGIT( nNextCh ) ); |
127 | 323k | nTokenValue = OUString::unacquired(aNumber).toInt32(); |
128 | 323k | if( bNegValue ) |
129 | 13.6k | nTokenValue = -nTokenValue; |
130 | 323k | bTokenHasValue=true; |
131 | 323k | } |
132 | 525k | else if( bNegValue ) // restore minus |
133 | 12.9k | { |
134 | 12.9k | nNextCh = '-'; |
135 | 12.9k | rInput.SeekRel( -1 ); |
136 | 12.9k | } |
137 | 848k | if( ' ' == nNextCh ) // blank is part of token! |
138 | 89.8k | nNextCh = GetNextChar(); |
139 | | |
140 | | // search for the token in the table: |
141 | 848k | if( 0 == (nRet = GetRTFToken( aToken )) ) |
142 | | // Unknown Control |
143 | 165k | nRet = RTF_UNKNOWNCONTROL; |
144 | | |
145 | | // bug 76812 - unicode token handled as normal text |
146 | 848k | bNextCh = false; |
147 | 848k | switch( nRet ) |
148 | 848k | { |
149 | 4.73k | case RTF_UC: |
150 | 4.73k | if( 0 <= nTokenValue ) |
151 | 3.48k | { |
152 | 3.48k | nUCharOverread = static_cast<sal_uInt8>(nTokenValue); |
153 | 3.48k | if (!aParserStates.empty()) |
154 | 3.25k | { |
155 | | //cmc: other ifdef breaks #i3584 |
156 | 3.25k | aParserStates.top().nUCharOverread = nUCharOverread; |
157 | 3.25k | } |
158 | 3.48k | } |
159 | 4.73k | aToken.setLength( 0 ); // #i47831# erase token to prevent the token from being treated as text |
160 | | // read next token |
161 | 4.73k | nRet = 0; |
162 | 4.73k | break; |
163 | | |
164 | 5.25k | case RTF_UPR: |
165 | 5.25k | if (!_inSkipGroup) |
166 | 4.31k | { |
167 | 4.31k | if (nUPRLevel > 256) // fairly sure > 1 is probably an error, but provide some leeway |
168 | 71 | { |
169 | 71 | SAL_WARN("svtools", "urp stack too deep"); |
170 | 71 | eState = SvParserState::Error; |
171 | 71 | break; |
172 | 71 | } |
173 | | |
174 | 4.24k | ++nUPRLevel; |
175 | | |
176 | | // UPR - overread the group with the ansi |
177 | | // information |
178 | 4.24k | int nNextToken; |
179 | 4.24k | do |
180 | 6.76k | { |
181 | 6.76k | nNextToken = GetNextToken_(); |
182 | 6.76k | } |
183 | 6.76k | while (nNextToken != '{' && nNextToken != sal_Unicode(EOF) && IsParserWorking()); |
184 | | |
185 | 4.24k | SkipGroup(); |
186 | 4.24k | GetNextToken_(); // overread the last bracket |
187 | 4.24k | nRet = 0; |
188 | | |
189 | 4.24k | --nUPRLevel; |
190 | 4.24k | } |
191 | 5.18k | break; |
192 | | |
193 | 20.3k | case RTF_U: |
194 | 20.3k | if( !bRTF_InTextRead ) |
195 | 8.64k | { |
196 | 8.64k | nRet = RTF_TEXTTOKEN; |
197 | 8.64k | aToken = OUStringChar( static_cast<sal_Unicode>(nTokenValue) ); |
198 | | |
199 | | // overread the next n "RTF" characters. This |
200 | | // can be also \{, \}, \'88 |
201 | 19.6k | for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) |
202 | 10.9k | { |
203 | 10.9k | sal_uInt32 cAnsi = nNextCh; |
204 | 11.7k | while( 0xD == cAnsi ) |
205 | 784 | cAnsi = GetNextChar(); |
206 | 13.4k | while( 0xA == cAnsi ) |
207 | 2.47k | cAnsi = GetNextChar(); |
208 | | |
209 | 10.9k | if( '\\' == cAnsi && |
210 | 2.60k | '\'' == GetNextChar() ) |
211 | | // skip HexValue |
212 | 206 | GetHexValue(); |
213 | 10.9k | nNextCh = GetNextChar(); |
214 | 10.9k | } |
215 | 8.64k | ScanText(); |
216 | 8.64k | bNextCh = 0 == nNextCh; |
217 | 8.64k | } |
218 | 20.3k | break; |
219 | 848k | } |
220 | 848k | } |
221 | 102k | else if( SvParserState::Pending != eState ) |
222 | 102k | { |
223 | | // Bug 34631 - "\ " read on - Blank as character |
224 | | // eState = SvParserState::Error; |
225 | 102k | bNextCh = false; |
226 | 102k | } |
227 | 951k | break; |
228 | 1.39M | } |
229 | 1.39M | } |
230 | 1.39M | break; |
231 | | |
232 | 1.39M | case sal_Unicode(EOF): |
233 | 13.9k | eState = SvParserState::Accepted; |
234 | 13.9k | nRet = nNextCh; |
235 | 13.9k | break; |
236 | | |
237 | 199k | case '{': |
238 | 199k | { |
239 | 199k | if( 0 <= nOpenBrackets ) |
240 | 197k | { |
241 | 197k | RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() ); |
242 | 197k | aParserStates.push( aState ); |
243 | 197k | } |
244 | 199k | ++nOpenBrackets; |
245 | 199k | DBG_ASSERT( |
246 | 199k | static_cast<size_t>(nOpenBrackets) == aParserStates.size(), |
247 | 199k | "ParserStateStack unequal to bracket count" ); |
248 | 199k | nRet = nNextCh; |
249 | 199k | } |
250 | 199k | break; |
251 | | |
252 | 96.2k | case '}': |
253 | 96.2k | --nOpenBrackets; |
254 | 96.2k | if( 0 <= nOpenBrackets ) |
255 | 87.0k | { |
256 | 87.0k | aParserStates.pop(); |
257 | 87.0k | if( !aParserStates.empty() ) |
258 | 86.3k | { |
259 | 86.3k | const RtfParserState_Impl& rRPS = |
260 | 86.3k | aParserStates.top(); |
261 | 86.3k | nUCharOverread = rRPS.nUCharOverread; |
262 | 86.3k | SetSrcEncoding( rRPS.eCodeSet ); |
263 | 86.3k | } |
264 | 697 | else |
265 | 697 | { |
266 | 697 | nUCharOverread = 1; |
267 | 697 | SetSrcEncoding( GetCodeSet() ); |
268 | 697 | } |
269 | 87.0k | } |
270 | 96.2k | DBG_ASSERT( |
271 | 96.2k | static_cast<size_t>(nOpenBrackets) == aParserStates.size(), |
272 | 96.2k | "ParserStateStack unequal to bracket count" ); |
273 | 96.2k | nRet = nNextCh; |
274 | 96.2k | break; |
275 | | |
276 | 20.3k | case 0x0d: |
277 | 82.1k | case 0x0a: |
278 | 82.1k | break; |
279 | | |
280 | 1.35M | default: |
281 | | // now normal text follows |
282 | 1.35M | ScanText(); |
283 | 1.35M | nRet = RTF_TEXTTOKEN; |
284 | 1.35M | bNextCh = 0 == nNextCh; |
285 | 1.35M | break; |
286 | 3.14M | } |
287 | | |
288 | 3.14M | if( bNextCh ) |
289 | 1.50M | nNextCh = GetNextChar(); |
290 | | |
291 | 3.14M | } while( !nRet && SvParserState::Working == eState ); |
292 | 2.95M | return nRet; |
293 | 2.95M | } |
294 | | |
295 | | |
296 | | sal_Unicode SvRTFParser::GetHexValue() |
297 | 40.6k | { |
298 | | // collect Hex values |
299 | 40.6k | sal_uInt32 nHi = GetNextChar(); |
300 | 40.6k | sal_uInt32 nLo = GetNextChar(); |
301 | 40.6k | nNextCh = nLo; |
302 | 40.6k | return o3tl::convertToHex<sal_Unicode, 0>(nHi, nLo); |
303 | 40.6k | } |
304 | | |
305 | | void SvRTFParser::ScanText() |
306 | 1.40M | { |
307 | 1.40M | const sal_Unicode cBreak = 0; |
308 | 1.40M | OUStringBuffer aStrBuffer; |
309 | 1.40M | bool bContinue = true; |
310 | 6.66M | while( bContinue && IsParserWorking() && aStrBuffer.getLength() < MAX_STRING_LEN) |
311 | 5.26M | { |
312 | 5.26M | bool bNextCh = true; |
313 | 5.26M | switch( nNextCh ) |
314 | 5.26M | { |
315 | 668k | case '\\': |
316 | 668k | { |
317 | 668k | nNextCh = GetNextChar(); |
318 | 668k | switch (nNextCh) |
319 | 668k | { |
320 | 17.5k | case '\'': |
321 | 17.5k | { |
322 | | |
323 | 17.5k | OStringBuffer aByteString; |
324 | 39.3k | while (true) |
325 | 39.3k | { |
326 | 39.3k | char c = static_cast<char>(GetHexValue()); |
327 | | /* |
328 | | * Note: \'00 is a valid internal character in a |
329 | | * string in RTF. OStringBuffer supports |
330 | | * appending nulls fine |
331 | | */ |
332 | 39.3k | aByteString.append(c); |
333 | | |
334 | 39.3k | bool bBreak = false; |
335 | 39.3k | bool bEOF = false; |
336 | 39.3k | char nSlash = '\\'; |
337 | 311k | while (!bBreak) |
338 | 272k | { |
339 | 272k | auto next = GetNextChar(); |
340 | 272k | if (sal_Unicode(EOF) == next) |
341 | 577 | { |
342 | 577 | bEOF = true; |
343 | 577 | break; |
344 | 577 | } |
345 | 271k | if (next>0xFF) // fix for #i43933# and #i35653# |
346 | 4.53k | { |
347 | 4.53k | if (!aByteString.isEmpty()) |
348 | 2.91k | { |
349 | 2.91k | aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) ); |
350 | 2.91k | aByteString.setLength(0); |
351 | 2.91k | } |
352 | 4.53k | aStrBuffer.append(static_cast<sal_Unicode>(next)); |
353 | | |
354 | 4.53k | continue; |
355 | 4.53k | } |
356 | 267k | nSlash = static_cast<char>(next); |
357 | 269k | while (nSlash == 0xD || nSlash == 0xA) |
358 | 2.57k | nSlash = static_cast<char>(GetNextChar()); |
359 | | |
360 | 267k | switch (nSlash) |
361 | 267k | { |
362 | 1.28k | case '{': |
363 | 3.26k | case '}': |
364 | 38.7k | case '\\': |
365 | 38.7k | bBreak = true; |
366 | 38.7k | break; |
367 | 228k | default: |
368 | 228k | aByteString.append(nSlash); |
369 | 228k | break; |
370 | 267k | } |
371 | 267k | } |
372 | | |
373 | 39.3k | if (bEOF) |
374 | 577 | { |
375 | 577 | bContinue = false; // abort, string together |
376 | 577 | break; |
377 | 577 | } |
378 | | |
379 | 38.7k | nNextCh = GetNextChar(); |
380 | | |
381 | 38.7k | if (nSlash != '\\' || nNextCh != '\'') |
382 | 16.9k | { |
383 | 16.9k | rInput.SeekRel(-1); |
384 | 16.9k | nNextCh = static_cast<unsigned char>(nSlash); |
385 | 16.9k | break; |
386 | 16.9k | } |
387 | 38.7k | } |
388 | | |
389 | 17.5k | bNextCh = false; |
390 | | |
391 | 17.5k | if (!aByteString.isEmpty()) |
392 | 16.5k | { |
393 | 16.5k | aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) ); |
394 | 16.5k | aByteString.setLength(0); |
395 | 16.5k | } |
396 | 17.5k | } |
397 | 0 | break; |
398 | 45.9k | case '\\': |
399 | 62.0k | case '}': |
400 | 86.1k | case '{': |
401 | 86.9k | case '+': // I found in a RTF file |
402 | 86.9k | aStrBuffer.append(sal_Unicode(nNextCh)); |
403 | 86.9k | break; |
404 | 704 | case '~': // nonbreaking space |
405 | 704 | aStrBuffer.append(u'\x00A0'); |
406 | 704 | break; |
407 | 2.09k | case '-': // optional hyphen |
408 | 2.09k | aStrBuffer.append(u'\x00AD'); |
409 | 2.09k | break; |
410 | 458 | case '_': // nonbreaking hyphen |
411 | 458 | aStrBuffer.append(u'\x2011'); |
412 | 458 | break; |
413 | | |
414 | 26.6k | case 'u': |
415 | | // read UNI-Code characters |
416 | 26.6k | { |
417 | 26.6k | nNextCh = GetNextChar(); |
418 | 26.6k | rInput.SeekRel( -2 ); |
419 | | |
420 | 26.6k | if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) ) |
421 | 11.7k | { |
422 | 11.7k | bRTF_InTextRead = true; |
423 | | |
424 | 11.7k | OUString sSave( aToken ); // GetNextToken_() overwrites this |
425 | 11.7k | nNextCh = '\\'; |
426 | 11.7k | int nToken = GetNextToken_(); |
427 | 11.7k | DBG_ASSERT( RTF_U == nToken, "still not a UNI-Code character" ); |
428 | | // don't convert symbol chars |
429 | 11.7k | aStrBuffer.append(static_cast< sal_Unicode >(nTokenValue)); |
430 | | |
431 | | // overread the next n "RTF" characters. This |
432 | | // can be also \{, \}, \'88 |
433 | 22.4k | for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) |
434 | 10.7k | { |
435 | 10.7k | sal_Unicode cAnsi = nNextCh; |
436 | 10.9k | while( 0xD == cAnsi ) |
437 | 232 | cAnsi = GetNextChar(); |
438 | 12.0k | while( 0xA == cAnsi ) |
439 | 1.28k | cAnsi = GetNextChar(); |
440 | | |
441 | 10.7k | if( '\\' == cAnsi && |
442 | 2.48k | '\'' == GetNextChar() ) |
443 | | // skip HexValue |
444 | 1.12k | GetHexValue(); |
445 | 10.7k | nNextCh = GetNextChar(); |
446 | 10.7k | } |
447 | 11.7k | bNextCh = false; |
448 | 11.7k | aToken = sSave; |
449 | 11.7k | bRTF_InTextRead = false; |
450 | 11.7k | } |
451 | 14.8k | else if ( 'c' == nNextCh ) |
452 | 2.98k | { |
453 | | // Prevent text breaking into multiple tokens. |
454 | 2.98k | rInput.SeekRel( 2 ); |
455 | 2.98k | nNextCh = GetNextChar(); |
456 | 2.98k | if (RTF_ISDIGIT( nNextCh )) |
457 | 1.90k | { |
458 | 1.90k | sal_uInt8 nNewOverread = 0 ; |
459 | 2.97k | do { |
460 | 2.97k | nNewOverread *= 10; |
461 | 2.97k | nNewOverread += nNextCh - '0'; |
462 | 2.97k | nNextCh = GetNextChar(); |
463 | 2.97k | } while ( RTF_ISDIGIT( nNextCh ) ); |
464 | 1.90k | nUCharOverread = nNewOverread; |
465 | 1.90k | if (!aParserStates.empty()) |
466 | 1.77k | aParserStates.top().nUCharOverread = nNewOverread; |
467 | 1.90k | } |
468 | 2.98k | bNextCh = 0x20 == nNextCh; |
469 | 2.98k | } |
470 | 11.8k | else |
471 | 11.8k | { |
472 | 11.8k | nNextCh = '\\'; |
473 | 11.8k | bContinue = false; // abort, string together |
474 | 11.8k | } |
475 | 26.6k | } |
476 | 26.6k | break; |
477 | | |
478 | 534k | default: |
479 | 534k | rInput.SeekRel( -1 ); |
480 | 534k | nNextCh = '\\'; |
481 | 534k | bContinue = false; // abort, string together |
482 | 534k | break; |
483 | 668k | } |
484 | 668k | } |
485 | 668k | break; |
486 | | |
487 | 668k | case sal_Unicode(EOF): |
488 | 759 | eState = SvParserState::Error; |
489 | 759 | [[fallthrough]]; |
490 | 68.3k | case '{': |
491 | 134k | case '}': |
492 | 134k | bContinue = false; |
493 | 134k | break; |
494 | | |
495 | 84.4k | case 0x0a: |
496 | 94.1k | case 0x0d: |
497 | 94.1k | break; |
498 | | |
499 | 4.36M | default: |
500 | 4.36M | if( nNextCh == cBreak || aStrBuffer.getLength() >= MAX_STRING_LEN) |
501 | 710k | bContinue = false; |
502 | 3.65M | else |
503 | 3.65M | { |
504 | 7.21M | do { |
505 | | // all other characters end up in the text |
506 | 7.21M | aStrBuffer.appendUtf32(nNextCh); |
507 | | |
508 | 7.21M | if (sal_Unicode(EOF) == (nNextCh = GetNextChar())) |
509 | 6.44k | { |
510 | 6.44k | if (!aStrBuffer.isEmpty()) |
511 | 6.44k | aToken.append( aStrBuffer ); |
512 | 6.44k | return; |
513 | 6.44k | } |
514 | 7.21M | } while |
515 | 3.65M | ( |
516 | 7.21M | (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) && |
517 | 3.56M | (aStrBuffer.getLength() < MAX_STRING_LEN) |
518 | 3.65M | ); |
519 | 3.65M | bNextCh = false; |
520 | 3.65M | } |
521 | 5.26M | } |
522 | | |
523 | 5.25M | if( bContinue && bNextCh ) |
524 | 184k | nNextCh = GetNextChar(); |
525 | 5.25M | } |
526 | | |
527 | 1.39M | if (!aStrBuffer.isEmpty()) |
528 | 825k | aToken.append( aStrBuffer ); |
529 | 1.39M | } |
530 | | |
531 | | |
532 | | short SvRTFParser::_inSkipGroup=0; |
533 | | |
534 | | void SvRTFParser::SkipGroup() |
535 | 9.40k | { |
536 | 9.40k | short nBrackets=1; |
537 | 9.40k | if (_inSkipGroup>0) |
538 | 0 | return; |
539 | 9.40k | _inSkipGroup++; |
540 | | //#i16185# faking \bin keyword |
541 | 9.40k | do |
542 | 60.1k | { |
543 | 60.1k | switch (nNextCh) |
544 | 60.1k | { |
545 | 6.30k | case '{': |
546 | 6.30k | ++nBrackets; |
547 | 6.30k | break; |
548 | 10.1k | case '}': |
549 | 10.1k | if (!--nBrackets) { |
550 | 5.38k | _inSkipGroup--; |
551 | 5.38k | return; |
552 | 5.38k | } |
553 | 4.78k | break; |
554 | 60.1k | } |
555 | 54.7k | int nToken = GetNextToken_(); |
556 | 54.7k | if (nToken == RTF_BIN) |
557 | 652 | { |
558 | 652 | rInput.SeekRel(-1); |
559 | 652 | SAL_WARN_IF(nTokenValue < 0, "svtools", "negative value argument for rtf \\bin keyword"); |
560 | 652 | if (nTokenValue > 0) |
561 | 238 | rInput.SeekRel(nTokenValue); |
562 | 652 | nNextCh = GetNextChar(); |
563 | 652 | } |
564 | 56.6k | while (nNextCh==0xa || nNextCh==0xd) |
565 | 1.89k | { |
566 | 1.89k | nNextCh = GetNextChar(); |
567 | 1.89k | } |
568 | 54.7k | } while (sal_Unicode(EOF) != nNextCh && IsParserWorking()); |
569 | | |
570 | 4.02k | if( SvParserState::Pending != eState && '}' != nNextCh ) |
571 | 3.71k | eState = SvParserState::Error; |
572 | 4.02k | _inSkipGroup--; |
573 | 4.02k | } |
574 | | |
575 | 2.59k | void SvRTFParser::ReadUnknownData() { SkipGroup(); } |
576 | 28 | void SvRTFParser::ReadBitmapData() { SkipGroup(); } |
577 | | |
578 | | |
579 | | SvParserState SvRTFParser::CallParser() |
580 | 11.5k | { |
581 | 11.5k | char cFirstCh(0); |
582 | 11.5k | nNextChPos = rInput.Tell(); |
583 | 11.5k | rInput.ReadChar( cFirstCh ); |
584 | 11.5k | nNextCh = static_cast<unsigned char>(cFirstCh); |
585 | 11.5k | eState = SvParserState::Working; |
586 | 11.5k | nOpenBrackets = 0; |
587 | 11.5k | eCodeSet = RTL_TEXTENCODING_MS_1252; |
588 | 11.5k | SetSrcEncoding( eCodeSet ); |
589 | | |
590 | | // the first two tokens should be '{' and \\rtf !! |
591 | 11.5k | if( '{' == GetNextToken() && RTF_RTF == GetNextToken() ) |
592 | 11.3k | { |
593 | 11.3k | AddFirstRef(); |
594 | | // call ReleaseRef at end of this scope, even in the face of exceptions |
595 | 11.3k | comphelper::ScopeGuard g([this] { |
596 | 11.3k | if( SvParserState::Pending != eState ) |
597 | 11.3k | ReleaseRef(); // now parser is not needed anymore |
598 | 11.3k | }); |
599 | 11.3k | Continue( 0 ); |
600 | 11.3k | } |
601 | 154 | else |
602 | 154 | eState = SvParserState::Error; |
603 | | |
604 | 11.5k | return eState; |
605 | 11.5k | } |
606 | | |
607 | | void SvRTFParser::Continue( int nToken ) |
608 | 11.3k | { |
609 | | // DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(), |
610 | | // "Characterset was changed." ); |
611 | | |
612 | 11.3k | if( !nToken ) |
613 | 11.3k | nToken = GetNextToken(); |
614 | | |
615 | 11.3k | bool bLooping = false; |
616 | | |
617 | 2.37M | while (IsParserWorking() && !bLooping) |
618 | 2.35M | { |
619 | 2.35M | auto nCurrentTokenIndex = m_nTokenIndex; |
620 | 2.35M | auto nCurrentToken = nToken; |
621 | | |
622 | 2.35M | SaveState( nToken ); |
623 | 2.35M | switch( nToken ) |
624 | 2.35M | { |
625 | 61.1k | case '}': |
626 | 61.1k | if( nOpenBrackets ) |
627 | 60.6k | goto NEXTTOKEN; |
628 | 486 | eState = SvParserState::Accepted; |
629 | 486 | break; |
630 | | |
631 | 122k | case '{': |
632 | | // an unknown group ? |
633 | 122k | { |
634 | 122k | if( RTF_IGNOREFLAG != GetNextToken() ) |
635 | 118k | nToken = SkipToken(); |
636 | 4.15k | else if( RTF_UNKNOWNCONTROL != GetNextToken() ) |
637 | 3.07k | nToken = SkipToken( -2 ); |
638 | 1.07k | else |
639 | 1.07k | { |
640 | | // filter immediately |
641 | 1.07k | ReadUnknownData(); |
642 | 1.07k | nToken = GetNextToken(); |
643 | 1.07k | if( '}' != nToken ) |
644 | 46 | eState = SvParserState::Error; |
645 | 1.07k | break; // move to next token!! |
646 | 1.07k | } |
647 | 122k | } |
648 | 121k | goto NEXTTOKEN; |
649 | | |
650 | 144k | case RTF_UNKNOWNCONTROL: |
651 | 144k | break; // skip unknown token |
652 | 0 | case RTF_NEXTTYPE: |
653 | 1.86k | case RTF_ANSITYPE: |
654 | 1.86k | eCodeSet = RTL_TEXTENCODING_MS_1252; |
655 | 1.86k | SetSrcEncoding( eCodeSet ); |
656 | 1.86k | break; |
657 | 704 | case RTF_MACTYPE: |
658 | 704 | eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN; |
659 | 704 | SetSrcEncoding( eCodeSet ); |
660 | 704 | break; |
661 | 1.00k | case RTF_PCTYPE: |
662 | 1.00k | eCodeSet = RTL_TEXTENCODING_IBM_437; |
663 | 1.00k | SetSrcEncoding( eCodeSet ); |
664 | 1.00k | break; |
665 | 137 | case RTF_PCATYPE: |
666 | 137 | eCodeSet = RTL_TEXTENCODING_IBM_850; |
667 | 137 | SetSrcEncoding( eCodeSet ); |
668 | 137 | break; |
669 | 9.77k | case RTF_ANSICPG: |
670 | 9.77k | eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue); |
671 | 9.77k | SetSrcEncoding(eCodeSet); |
672 | 9.77k | break; |
673 | 2.01M | default: |
674 | 2.19M | NEXTTOKEN: |
675 | 2.19M | NextToken( nToken ); |
676 | 2.19M | break; |
677 | 2.35M | } |
678 | 2.35M | if( IsParserWorking() ) |
679 | 2.35M | SaveState( 0 ); // processed till here, |
680 | | // continue with new token! |
681 | 2.35M | nToken = GetNextToken(); |
682 | 2.35M | bLooping = nCurrentTokenIndex == m_nTokenIndex && nToken == nCurrentToken; |
683 | 2.35M | } |
684 | 11.3k | if( SvParserState::Accepted == eState && 0 < nOpenBrackets ) |
685 | 9.82k | eState = SvParserState::Error; |
686 | 11.3k | } |
687 | | |
688 | | void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc ) |
689 | 60.2k | { |
690 | 60.2k | if (eEnc == RTL_TEXTENCODING_DONTKNOW) |
691 | 39.7k | eEnc = GetCodeSet(); |
692 | | |
693 | 60.2k | if (!aParserStates.empty()) |
694 | 59.6k | aParserStates.top().eCodeSet = eEnc; |
695 | 60.2k | SetSrcEncoding(eEnc); |
696 | 60.2k | } |
697 | | |
698 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |