Coverage Report

Created: 2025-11-16 09:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/svtools/source/svrtf/parrtf.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <sal/config.h>
21
#include <sal/log.hxx>
22
23
#include <comphelper/scopeguard.hxx>
24
25
#include <rtl/character.hxx>
26
#include <rtl/strbuf.hxx>
27
#include <rtl/tencinfo.h>
28
#include <rtl/ustrbuf.hxx>
29
#include <tools/stream.hxx>
30
#include <tools/debug.hxx>
31
#include <svtools/rtftoken.h>
32
#include <svtools/parrtf.hxx>
33
34
const int MAX_STRING_LEN = 1024;
35
36
4.67M
#define RTF_ISDIGIT( c ) rtl::isAsciiDigit(c)
37
12.9M
#define RTF_ISALPHA( c ) rtl::isAsciiAlpha(c)
38
39
SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize )
40
9.66k
    : SvParser<int>( rIn, nStackSize )
41
9.66k
    , nOpenBrackets(0)
42
9.66k
    , nUPRLevel(0)
43
9.66k
    , eCodeSet(RTL_TEXTENCODING_MS_1252)
44
9.66k
    , nUCharOverread(1)
45
9.66k
{
46
    // default is ANSI-CodeSet
47
9.66k
    SetSrcEncoding( RTL_TEXTENCODING_MS_1252 );
48
9.66k
    bRTF_InTextRead = false;
49
9.66k
}
50
51
SvRTFParser::~SvRTFParser()
52
9.66k
{
53
9.66k
}
54
55
56
int SvRTFParser::GetNextToken_()
57
2.17M
{
58
2.17M
    int nRet = 0;
59
2.31M
    do {
60
2.31M
        bool bNextCh = true;
61
2.31M
        switch( nNextCh )
62
2.31M
        {
63
1.05M
        case '\\':
64
1.05M
            {
65
                // control characters
66
1.05M
                nNextCh = GetNextChar();
67
1.05M
                switch( nNextCh )
68
1.05M
                {
69
3.48k
                case '{':
70
3.88k
                case '}':
71
15.1k
                case '\\':
72
15.3k
                case '+':       // I found it in a RTF-file
73
16.3k
                case '~':       // nonbreaking space
74
16.8k
                case '-':       // optional hyphen
75
16.9k
                case '_':       // nonbreaking hyphen
76
22.4k
                case '\'':      // HexValue
77
22.4k
                    nNextCh = '\\';
78
22.4k
                    rInput.SeekRel( -1 );
79
22.4k
                    ScanText();
80
22.4k
                    nRet = RTF_TEXTTOKEN;
81
22.4k
                    bNextCh = 0 == nNextCh;
82
22.4k
                    break;
83
84
12.6k
                case '*':       // ignoreflag
85
12.6k
                    nRet = RTF_IGNOREFLAG;
86
12.6k
                    break;
87
1.79k
                case ':':       // subentry in an index entry
88
1.79k
                    nRet = RTF_SUBENTRYINDEX;
89
1.79k
                    break;
90
236
                case '|':       // formula-character
91
236
                    nRet = RTF_FORMULA;
92
236
                    break;
93
94
287k
                case 0x0a:
95
297k
                case 0x0d:
96
297k
                    nRet = RTF_PAR;
97
297k
                    break;
98
99
723k
                default:
100
723k
                    if( RTF_ISALPHA( nNextCh ) )
101
659k
                    {
102
659k
                        aToken = "\\";
103
659k
                        {
104
2.43M
                            do {
105
2.43M
                                aToken.appendUtf32(nNextCh);
106
2.43M
                                nNextCh = GetNextChar();
107
2.43M
                            } while( RTF_ISALPHA( nNextCh ) );
108
659k
                        }
109
110
                        // minus before numeric parameters
111
659k
                        bool bNegValue = false;
112
659k
                        if( '-' == nNextCh )
113
19.9k
                        {
114
19.9k
                            bNegValue = true;
115
19.9k
                            nNextCh = GetNextChar();
116
19.9k
                        }
117
118
                        // possible numeric parameter
119
659k
                        if( RTF_ISDIGIT( nNextCh ) )
120
271k
                        {
121
271k
                            OUStringBuffer aNumber;
122
594k
                            do {
123
594k
                                aNumber.append(static_cast<sal_Unicode>(nNextCh));
124
594k
                                nNextCh = GetNextChar();
125
594k
                            } while( RTF_ISDIGIT( nNextCh ) );
126
271k
                            nTokenValue = OUString::unacquired(aNumber).toInt32();
127
271k
                            if( bNegValue )
128
10.5k
                                nTokenValue = -nTokenValue;
129
271k
                            bTokenHasValue=true;
130
271k
                        }
131
387k
                        else if( bNegValue )        // restore minus
132
9.40k
                        {
133
9.40k
                            nNextCh = '-';
134
9.40k
                            rInput.SeekRel( -1 );
135
9.40k
                        }
136
659k
                        if( ' ' == nNextCh )        // blank is part of token!
137
84.3k
                            nNextCh = GetNextChar();
138
139
                        // search for the token in the table:
140
659k
                        if( 0 == (nRet = GetRTFToken( aToken )) )
141
                            // Unknown Control
142
99.7k
                            nRet = RTF_UNKNOWNCONTROL;
143
144
                        // bug 76812 - unicode token handled as normal text
145
659k
                        bNextCh = false;
146
659k
                        switch( nRet )
147
659k
                        {
148
2.58k
                        case RTF_UC:
149
2.58k
                            if( 0 <= nTokenValue )
150
1.76k
                            {
151
1.76k
                                nUCharOverread = static_cast<sal_uInt8>(nTokenValue);
152
1.76k
                                if (!aParserStates.empty())
153
1.52k
                                {
154
                                    //cmc: other ifdef breaks #i3584
155
1.52k
                                    aParserStates.top().nUCharOverread = nUCharOverread;
156
1.52k
                                }
157
1.76k
                            }
158
2.58k
                            aToken.setLength( 0 ); // #i47831# erase token to prevent the token from being treated as text
159
                            // read next token
160
2.58k
                            nRet = 0;
161
2.58k
                            break;
162
163
4.36k
                        case RTF_UPR:
164
4.36k
                            if (!_inSkipGroup)
165
3.54k
                            {
166
3.54k
                                if (nUPRLevel > 256) // fairly sure > 1 is probably an error, but provide some leeway
167
25
                                {
168
25
                                    SAL_WARN("svtools", "urp stack too deep");
169
25
                                    eState = SvParserState::Error;
170
25
                                    break;
171
25
                                }
172
173
3.51k
                                ++nUPRLevel;
174
175
                                // UPR - overread the group with the ansi
176
                                //       information
177
3.51k
                                int nNextToken;
178
3.51k
                                do
179
5.61k
                                {
180
5.61k
                                    nNextToken = GetNextToken_();
181
5.61k
                                }
182
5.61k
                                while (nNextToken != '{' && nNextToken != sal_Unicode(EOF) && IsParserWorking());
183
184
3.51k
                                SkipGroup();
185
3.51k
                                GetNextToken_();  // overread the last bracket
186
3.51k
                                nRet = 0;
187
188
3.51k
                                --nUPRLevel;
189
3.51k
                            }
190
4.34k
                            break;
191
192
10.6k
                        case RTF_U:
193
10.6k
                            if( !bRTF_InTextRead )
194
3.08k
                            {
195
3.08k
                                nRet = RTF_TEXTTOKEN;
196
3.08k
                                aToken = OUStringChar( static_cast<sal_Unicode>(nTokenValue) );
197
198
                                // overread the next n "RTF" characters. This
199
                                // can be also \{, \}, \'88
200
9.08k
                                for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
201
5.99k
                                {
202
5.99k
                                    sal_uInt32 cAnsi = nNextCh;
203
6.49k
                                    while( 0xD == cAnsi )
204
498
                                        cAnsi = GetNextChar();
205
8.15k
                                    while( 0xA == cAnsi )
206
2.15k
                                        cAnsi = GetNextChar();
207
208
5.99k
                                    if( '\\' == cAnsi &&
209
1.13k
                                        '\'' == GetNextChar() )
210
                                        // skip HexValue
211
283
                                        GetHexValue();
212
5.99k
                                    nNextCh = GetNextChar();
213
5.99k
                                }
214
3.08k
                                ScanText();
215
3.08k
                                bNextCh = 0 == nNextCh;
216
3.08k
                            }
217
10.6k
                            break;
218
659k
                        }
219
659k
                    }
220
64.6k
                    else if( SvParserState::Pending != eState )
221
64.6k
                    {
222
                        // Bug 34631 - "\ " read on - Blank as character
223
                        // eState = SvParserState::Error;
224
64.6k
                        bNextCh = false;
225
64.6k
                    }
226
723k
                    break;
227
1.05M
                }
228
1.05M
            }
229
1.05M
            break;
230
231
1.05M
        case sal_Unicode(EOF):
232
12.0k
            eState = SvParserState::Accepted;
233
12.0k
            nRet = nNextCh;
234
12.0k
            break;
235
236
155k
        case '{':
237
155k
            {
238
155k
                if( 0 <= nOpenBrackets )
239
154k
                {
240
154k
                    RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() );
241
154k
                    aParserStates.push( aState );
242
154k
                }
243
155k
                ++nOpenBrackets;
244
155k
                DBG_ASSERT(
245
155k
                    static_cast<size_t>(nOpenBrackets) == aParserStates.size(),
246
155k
                    "ParserStateStack unequal to bracket count" );
247
155k
                nRet = nNextCh;
248
155k
            }
249
155k
            break;
250
251
81.2k
        case '}':
252
81.2k
            --nOpenBrackets;
253
81.2k
            if( 0 <= nOpenBrackets )
254
75.9k
            {
255
75.9k
                aParserStates.pop();
256
75.9k
                if( !aParserStates.empty() )
257
75.4k
                {
258
75.4k
                    const RtfParserState_Impl& rRPS =
259
75.4k
                            aParserStates.top();
260
75.4k
                    nUCharOverread = rRPS.nUCharOverread;
261
75.4k
                    SetSrcEncoding( rRPS.eCodeSet );
262
75.4k
                }
263
502
                else
264
502
                {
265
502
                    nUCharOverread = 1;
266
502
                    SetSrcEncoding( GetCodeSet() );
267
502
                }
268
75.9k
            }
269
81.2k
            DBG_ASSERT(
270
81.2k
                static_cast<size_t>(nOpenBrackets) == aParserStates.size(),
271
81.2k
                "ParserStateStack unequal to bracket count" );
272
81.2k
            nRet = nNextCh;
273
81.2k
            break;
274
275
18.7k
        case 0x0d:
276
67.0k
        case 0x0a:
277
67.0k
            break;
278
279
936k
        default:
280
            // now normal text follows
281
936k
            ScanText();
282
936k
            nRet = RTF_TEXTTOKEN;
283
936k
            bNextCh = 0 == nNextCh;
284
936k
            break;
285
2.31M
        }
286
287
2.31M
        if( bNextCh )
288
1.11M
            nNextCh = GetNextChar();
289
290
2.31M
    } while( !nRet && SvParserState::Working == eState );
291
2.17M
    return nRet;
292
2.17M
}
293
294
295
sal_Unicode SvRTFParser::GetHexValue()
296
28.8k
{
297
    // collect Hex values
298
28.8k
    int n;
299
28.8k
    sal_Unicode nHexVal = 0;
300
301
86.4k
    for( n = 0; n < 2; ++n )
302
57.6k
    {
303
57.6k
        nHexVal *= 16;
304
57.6k
        nNextCh = GetNextChar();
305
57.6k
        if( nNextCh >= '0' && nNextCh <= '9' )
306
21.0k
            nHexVal += (nNextCh - 48);
307
36.6k
        else if( nNextCh >= 'a' && nNextCh <= 'f' )
308
24.6k
            nHexVal += (nNextCh - 87);
309
11.9k
        else if( nNextCh >= 'A' && nNextCh <= 'F' )
310
834
            nHexVal += (nNextCh - 55);
311
57.6k
    }
312
28.8k
    return nHexVal;
313
28.8k
}
314
315
void SvRTFParser::ScanText()
316
961k
{
317
961k
    const sal_Unicode cBreak = 0;
318
961k
    OUStringBuffer aStrBuffer;
319
961k
    bool bContinue = true;
320
4.39M
    while( bContinue && IsParserWorking() && aStrBuffer.getLength() < MAX_STRING_LEN)
321
3.44M
    {
322
3.44M
        bool bNextCh = true;
323
3.44M
        switch( nNextCh )
324
3.44M
        {
325
434k
        case '\\':
326
434k
            {
327
434k
                nNextCh = GetNextChar();
328
434k
                switch (nNextCh)
329
434k
                {
330
9.49k
                case '\'':
331
9.49k
                    {
332
333
9.49k
                        OStringBuffer aByteString;
334
27.4k
                        while (true)
335
27.4k
                        {
336
27.4k
                            char c = static_cast<char>(GetHexValue());
337
                            /*
338
                             * Note: \'00 is a valid internal character in  a
339
                             * string in RTF. OStringBuffer supports
340
                             * appending nulls fine
341
                             */
342
27.4k
                            aByteString.append(c);
343
344
27.4k
                            bool bBreak = false;
345
27.4k
                            bool bEOF = false;
346
27.4k
                            char nSlash = '\\';
347
236k
                            while (!bBreak)
348
208k
                            {
349
208k
                                auto next = GetNextChar();
350
208k
                                if (sal_Unicode(EOF) == next)
351
416
                                {
352
416
                                    bEOF = true;
353
416
                                    break;
354
416
                                }
355
208k
                                if (next>0xFF) // fix for #i43933# and #i35653#
356
3.08k
                                {
357
3.08k
                                    if (!aByteString.isEmpty())
358
1.65k
                                    {
359
1.65k
                                        aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) );
360
1.65k
                                        aByteString.setLength(0);
361
1.65k
                                    }
362
3.08k
                                    aStrBuffer.append(static_cast<sal_Unicode>(next));
363
364
3.08k
                                    continue;
365
3.08k
                                }
366
205k
                                nSlash = static_cast<char>(next);
367
207k
                                while (nSlash == 0xD || nSlash == 0xA)
368
2.26k
                                    nSlash = static_cast<char>(GetNextChar());
369
370
205k
                                switch (nSlash)
371
205k
                                {
372
780
                                    case '{':
373
2.68k
                                    case '}':
374
27.0k
                                    case '\\':
375
27.0k
                                        bBreak = true;
376
27.0k
                                        break;
377
178k
                                    default:
378
178k
                                        aByteString.append(nSlash);
379
178k
                                        break;
380
205k
                                }
381
205k
                            }
382
383
27.4k
                            if (bEOF)
384
416
                            {
385
416
                                bContinue = false;        // abort, string together
386
416
                                break;
387
416
                            }
388
389
27.0k
                            nNextCh = GetNextChar();
390
391
27.0k
                            if (nSlash != '\\' || nNextCh != '\'')
392
9.07k
                            {
393
9.07k
                                rInput.SeekRel(-1);
394
9.07k
                                nNextCh = static_cast<unsigned char>(nSlash);
395
9.07k
                                break;
396
9.07k
                            }
397
27.0k
                        }
398
399
9.49k
                        bNextCh = false;
400
401
9.49k
                        if (!aByteString.isEmpty())
402
9.27k
                        {
403
9.27k
                            aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) );
404
9.27k
                            aByteString.setLength(0);
405
9.27k
                        }
406
9.49k
                    }
407
0
                    break;
408
30.8k
                case '\\':
409
36.1k
                case '}':
410
48.0k
                case '{':
411
48.4k
                case '+':       // I found in a RTF file
412
48.4k
                    aStrBuffer.append(sal_Unicode(nNextCh));
413
48.4k
                    break;
414
779
                case '~':       // nonbreaking space
415
779
                    aStrBuffer.append(u'\x00A0');
416
779
                    break;
417
1.30k
                case '-':       // optional hyphen
418
1.30k
                    aStrBuffer.append(u'\x00AD');
419
1.30k
                    break;
420
283
                case '_':       // nonbreaking hyphen
421
283
                    aStrBuffer.append(u'\x2011');
422
283
                    break;
423
424
15.7k
                case 'u':
425
                    // read UNI-Code characters
426
15.7k
                    {
427
15.7k
                        nNextCh = GetNextChar();
428
15.7k
                        rInput.SeekRel( -2 );
429
430
15.7k
                        if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) )
431
7.59k
                        {
432
7.59k
                            bRTF_InTextRead = true;
433
434
7.59k
                            OUString sSave( aToken ); // GetNextToken_() overwrites this
435
7.59k
                            nNextCh = '\\';
436
7.59k
                            int nToken = GetNextToken_();
437
7.59k
                            DBG_ASSERT( RTF_U == nToken, "still not a UNI-Code character" );
438
                            // don't convert symbol chars
439
7.59k
                            aStrBuffer.append(static_cast< sal_Unicode >(nTokenValue));
440
441
                            // overread the next n "RTF" characters. This
442
                            // can be also \{, \}, \'88
443
13.5k
                            for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
444
5.90k
                            {
445
5.90k
                                sal_Unicode cAnsi = nNextCh;
446
6.16k
                                while( 0xD == cAnsi )
447
255
                                    cAnsi = GetNextChar();
448
6.50k
                                while( 0xA == cAnsi )
449
596
                                    cAnsi = GetNextChar();
450
451
5.90k
                                if( '\\' == cAnsi &&
452
1.59k
                                    '\'' == GetNextChar() )
453
                                    // skip HexValue
454
1.07k
                                    GetHexValue();
455
5.90k
                                nNextCh = GetNextChar();
456
5.90k
                            }
457
7.59k
                            bNextCh = false;
458
7.59k
                            aToken = sSave;
459
7.59k
                            bRTF_InTextRead = false;
460
7.59k
                        }
461
8.12k
                        else if ( 'c' == nNextCh )
462
2.52k
                        {
463
                            // Prevent text breaking into multiple tokens.
464
2.52k
                            rInput.SeekRel( 2 );
465
2.52k
                            nNextCh = GetNextChar();
466
2.52k
                            if (RTF_ISDIGIT( nNextCh ))
467
2.26k
                            {
468
2.26k
                                sal_uInt8 nNewOverread = 0 ;
469
2.58k
                                do {
470
2.58k
                                    nNewOverread *= 10;
471
2.58k
                                    nNewOverread += nNextCh - '0';
472
2.58k
                                    nNextCh = GetNextChar();
473
2.58k
                                } while ( RTF_ISDIGIT( nNextCh ) );
474
2.26k
                                nUCharOverread = nNewOverread;
475
2.26k
                                if (!aParserStates.empty())
476
2.21k
                                    aParserStates.top().nUCharOverread = nNewOverread;
477
2.26k
                            }
478
2.52k
                            bNextCh = 0x20 == nNextCh;
479
2.52k
                        }
480
5.60k
                        else
481
5.60k
                        {
482
5.60k
                            nNextCh = '\\';
483
5.60k
                            bContinue = false;        // abort, string together
484
5.60k
                        }
485
15.7k
                    }
486
15.7k
                    break;
487
488
358k
                default:
489
358k
                    rInput.SeekRel( -1 );
490
358k
                    nNextCh = '\\';
491
358k
                    bContinue = false;        // abort, string together
492
358k
                    break;
493
434k
                }
494
434k
            }
495
434k
            break;
496
497
434k
        case sal_Unicode(EOF):
498
632
            eState = SvParserState::Error;
499
632
            [[fallthrough]];
500
47.9k
        case '{':
501
102k
        case '}':
502
102k
            bContinue = false;
503
102k
            break;
504
505
52.7k
        case 0x0a:
506
59.5k
        case 0x0d:
507
59.5k
            break;
508
509
2.84M
        default:
510
2.84M
            if( nNextCh == cBreak || aStrBuffer.getLength() >= MAX_STRING_LEN)
511
486k
                bContinue = false;
512
2.35M
            else
513
2.35M
            {
514
4.91M
                do {
515
                    // all other characters end up in the text
516
4.91M
                    aStrBuffer.appendUtf32(nNextCh);
517
518
4.91M
                    if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
519
5.43k
                    {
520
5.43k
                        if (!aStrBuffer.isEmpty())
521
5.43k
                            aToken.append( aStrBuffer );
522
5.43k
                        return;
523
5.43k
                    }
524
4.91M
                } while
525
2.35M
                (
526
4.91M
                    (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) &&
527
2.56M
                    (aStrBuffer.getLength() < MAX_STRING_LEN)
528
2.35M
                );
529
2.35M
                bNextCh = false;
530
2.35M
            }
531
3.44M
        }
532
533
3.43M
        if( bContinue && bNextCh )
534
110k
            nNextCh = GetNextChar();
535
3.43M
    }
536
537
956k
    if (!aStrBuffer.isEmpty())
538
552k
        aToken.append( aStrBuffer );
539
956k
}
540
541
542
short SvRTFParser::_inSkipGroup=0;
543
544
void SvRTFParser::SkipGroup()
545
8.81k
{
546
8.81k
    short nBrackets=1;
547
8.81k
    if (_inSkipGroup>0)
548
0
        return;
549
8.81k
    _inSkipGroup++;
550
//#i16185# faking \bin keyword
551
8.81k
    do
552
62.1k
    {
553
62.1k
        switch (nNextCh)
554
62.1k
        {
555
6.73k
            case '{':
556
6.73k
                ++nBrackets;
557
6.73k
                break;
558
10.9k
            case '}':
559
10.9k
                if (!--nBrackets) {
560
5.40k
                    _inSkipGroup--;
561
5.40k
                    return;
562
5.40k
                }
563
5.57k
                break;
564
62.1k
        }
565
56.7k
        int nToken = GetNextToken_();
566
56.7k
        if (nToken == RTF_BIN)
567
410
        {
568
410
            rInput.SeekRel(-1);
569
410
            SAL_WARN_IF(nTokenValue < 0, "svtools", "negative value argument for rtf \\bin keyword");
570
410
            if (nTokenValue > 0)
571
237
                rInput.SeekRel(nTokenValue);
572
410
            nNextCh = GetNextChar();
573
410
        }
574
59.2k
        while (nNextCh==0xa || nNextCh==0xd)
575
2.53k
        {
576
2.53k
            nNextCh = GetNextChar();
577
2.53k
        }
578
56.7k
    } while (sal_Unicode(EOF) != nNextCh && IsParserWorking());
579
580
3.41k
    if( SvParserState::Pending != eState && '}' != nNextCh )
581
3.16k
        eState = SvParserState::Error;
582
3.41k
    _inSkipGroup--;
583
3.41k
}
584
585
2.65k
void SvRTFParser::ReadUnknownData() { SkipGroup(); }
586
32
void SvRTFParser::ReadBitmapData()  { SkipGroup(); }
587
588
589
SvParserState SvRTFParser::CallParser()
590
9.66k
{
591
9.66k
    char cFirstCh(0);
592
9.66k
    nNextChPos = rInput.Tell();
593
9.66k
    rInput.ReadChar( cFirstCh );
594
9.66k
    nNextCh = static_cast<unsigned char>(cFirstCh);
595
9.66k
    eState = SvParserState::Working;
596
9.66k
    nOpenBrackets = 0;
597
9.66k
    eCodeSet = RTL_TEXTENCODING_MS_1252;
598
9.66k
    SetSrcEncoding( eCodeSet );
599
600
    // the first two tokens should be '{' and \\rtf !!
601
9.66k
    if( '{' == GetNextToken() && RTF_RTF == GetNextToken() )
602
9.51k
    {
603
9.51k
        AddFirstRef();
604
        // call ReleaseRef at end of this scope, even in the face of exceptions
605
9.51k
        comphelper::ScopeGuard g([this] {
606
9.51k
            if( SvParserState::Pending != eState )
607
9.51k
                ReleaseRef();       // now parser is not needed anymore
608
9.51k
        });
609
9.51k
        Continue( 0 );
610
9.51k
    }
611
150
    else
612
150
        eState = SvParserState::Error;
613
614
9.66k
    return eState;
615
9.66k
}
616
617
void SvRTFParser::Continue( int nToken )
618
9.51k
{
619
//  DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(),
620
//              "Characterset was changed." );
621
622
9.51k
    if( !nToken )
623
9.51k
        nToken = GetNextToken();
624
625
9.51k
    bool bLooping = false;
626
627
1.68M
    while (IsParserWorking() && !bLooping)
628
1.67M
    {
629
1.67M
        auto nCurrentTokenIndex = m_nTokenIndex;
630
1.67M
        auto nCurrentToken = nToken;
631
632
1.67M
        SaveState( nToken );
633
1.67M
        switch( nToken )
634
1.67M
        {
635
47.3k
        case '}':
636
47.3k
            if( nOpenBrackets )
637
46.9k
                goto NEXTTOKEN;
638
387
            eState = SvParserState::Accepted;
639
387
            break;
640
641
94.1k
        case '{':
642
            // an unknown group ?
643
94.1k
            {
644
94.1k
                if( RTF_IGNOREFLAG != GetNextToken() )
645
90.5k
                    nToken = SkipToken();
646
3.56k
                else if( RTF_UNKNOWNCONTROL != GetNextToken() )
647
2.30k
                    nToken = SkipToken( -2 );
648
1.25k
                else
649
1.25k
                {
650
                    // filter immediately
651
1.25k
                    ReadUnknownData();
652
1.25k
                    nToken = GetNextToken();
653
1.25k
                    if( '}' != nToken )
654
41
                        eState = SvParserState::Error;
655
1.25k
                    break;      // move to next token!!
656
1.25k
                }
657
94.1k
            }
658
92.8k
            goto NEXTTOKEN;
659
660
92.8k
        case RTF_UNKNOWNCONTROL:
661
85.8k
            break;      // skip unknown token
662
0
        case RTF_NEXTTYPE:
663
1.21k
        case RTF_ANSITYPE:
664
1.21k
            eCodeSet = RTL_TEXTENCODING_MS_1252;
665
1.21k
            SetSrcEncoding( eCodeSet );
666
1.21k
            break;
667
314
        case RTF_MACTYPE:
668
314
            eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN;
669
314
            SetSrcEncoding( eCodeSet );
670
314
            break;
671
397
        case RTF_PCTYPE:
672
397
            eCodeSet = RTL_TEXTENCODING_IBM_437;
673
397
            SetSrcEncoding( eCodeSet );
674
397
            break;
675
25
        case RTF_PCATYPE:
676
25
            eCodeSet = RTL_TEXTENCODING_IBM_850;
677
25
            SetSrcEncoding( eCodeSet );
678
25
            break;
679
4.84k
        case RTF_ANSICPG:
680
4.84k
            eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue);
681
4.84k
            SetSrcEncoding(eCodeSet);
682
4.84k
            break;
683
1.43M
        default:
684
1.57M
NEXTTOKEN:
685
1.57M
            NextToken( nToken );
686
1.57M
            break;
687
1.67M
        }
688
1.67M
        if( IsParserWorking() )
689
1.67M
            SaveState( 0 );         // processed till here,
690
                                    // continue with new token!
691
1.67M
        nToken = GetNextToken();
692
1.67M
        bLooping = nCurrentTokenIndex == m_nTokenIndex && nToken == nCurrentToken;
693
1.67M
    }
694
9.49k
    if( SvParserState::Accepted == eState && 0 < nOpenBrackets )
695
8.25k
        eState = SvParserState::Error;
696
9.49k
}
697
698
void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc )
699
49.9k
{
700
49.9k
    if (eEnc == RTL_TEXTENCODING_DONTKNOW)
701
28.1k
        eEnc = GetCodeSet();
702
703
49.9k
    if (!aParserStates.empty())
704
49.6k
        aParserStates.top().eCodeSet = eEnc;
705
49.9k
    SetSrcEncoding(eEnc);
706
49.9k
}
707
708
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */