Coverage Report

Created: 2025-07-07 10:01

/src/libreoffice/svtools/source/svrtf/parrtf.cxx
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <sal/config.h>
21
#include <sal/log.hxx>
22
23
#include <comphelper/scopeguard.hxx>
24
25
#include <rtl/character.hxx>
26
#include <rtl/strbuf.hxx>
27
#include <rtl/tencinfo.h>
28
#include <rtl/ustrbuf.hxx>
29
#include <tools/stream.hxx>
30
#include <tools/debug.hxx>
31
#include <svtools/rtftoken.h>
32
#include <svtools/parrtf.hxx>
33
34
const int MAX_STRING_LEN = 1024;
35
36
4.92M
#define RTF_ISDIGIT( c ) rtl::isAsciiDigit(c)
37
13.5M
#define RTF_ISALPHA( c ) rtl::isAsciiAlpha(c)
38
39
SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize )
40
10.4k
    : SvParser<int>( rIn, nStackSize )
41
10.4k
    , nOpenBrackets(0)
42
10.4k
    , nUPRLevel(0)
43
10.4k
    , eCodeSet(RTL_TEXTENCODING_MS_1252)
44
10.4k
    , nUCharOverread(1)
45
10.4k
{
46
    // default is ANSI-CodeSet
47
10.4k
    SetSrcEncoding( RTL_TEXTENCODING_MS_1252 );
48
10.4k
    bRTF_InTextRead = false;
49
10.4k
}
50
51
SvRTFParser::~SvRTFParser()
52
10.4k
{
53
10.4k
}
54
55
56
int SvRTFParser::GetNextToken_()
57
2.32M
{
58
2.32M
    int nRet = 0;
59
2.46M
    do {
60
2.46M
        bool bNextCh = true;
61
2.46M
        switch( nNextCh )
62
2.46M
        {
63
1.10M
        case '\\':
64
1.10M
            {
65
                // control characters
66
1.10M
                nNextCh = GetNextChar();
67
1.10M
                switch( nNextCh )
68
1.10M
                {
69
3.53k
                case '{':
70
3.94k
                case '}':
71
15.6k
                case '\\':
72
15.8k
                case '+':       // I found it in a RTF-file
73
16.8k
                case '~':       // nonbreaking space
74
17.4k
                case '-':       // optional hyphen
75
17.5k
                case '_':       // nonbreaking hyphen
76
22.9k
                case '\'':      // HexValue
77
22.9k
                    nNextCh = '\\';
78
22.9k
                    rInput.SeekRel( -1 );
79
22.9k
                    ScanText();
80
22.9k
                    nRet = RTF_TEXTTOKEN;
81
22.9k
                    bNextCh = 0 == nNextCh;
82
22.9k
                    break;
83
84
12.5k
                case '*':       // ignoreflag
85
12.5k
                    nRet = RTF_IGNOREFLAG;
86
12.5k
                    break;
87
1.82k
                case ':':       // subentry in an index entry
88
1.82k
                    nRet = RTF_SUBENTRYINDEX;
89
1.82k
                    break;
90
238
                case '|':       // formula-character
91
238
                    nRet = RTF_FORMULA;
92
238
                    break;
93
94
324k
                case 0x0a:
95
335k
                case 0x0d:
96
335k
                    nRet = RTF_PAR;
97
335k
                    break;
98
99
727k
                default:
100
727k
                    if( RTF_ISALPHA( nNextCh ) )
101
660k
                    {
102
660k
                        aToken = "\\";
103
660k
                        {
104
2.42M
                            do {
105
2.42M
                                aToken.appendUtf32(nNextCh);
106
2.42M
                                nNextCh = GetNextChar();
107
2.42M
                            } while( RTF_ISALPHA( nNextCh ) );
108
660k
                        }
109
110
                        // minus before numeric parameters
111
660k
                        bool bNegValue = false;
112
660k
                        if( '-' == nNextCh )
113
20.4k
                        {
114
20.4k
                            bNegValue = true;
115
20.4k
                            nNextCh = GetNextChar();
116
20.4k
                        }
117
118
                        // possible numeric parameter
119
660k
                        if( RTF_ISDIGIT( nNextCh ) )
120
266k
                        {
121
266k
                            OUStringBuffer aNumber;
122
578k
                            do {
123
578k
                                aNumber.append(static_cast<sal_Unicode>(nNextCh));
124
578k
                                nNextCh = GetNextChar();
125
578k
                            } while( RTF_ISDIGIT( nNextCh ) );
126
266k
                            nTokenValue = OUString::unacquired(aNumber).toInt32();
127
266k
                            if( bNegValue )
128
10.4k
                                nTokenValue = -nTokenValue;
129
266k
                            bTokenHasValue=true;
130
266k
                        }
131
393k
                        else if( bNegValue )        // restore minus
132
9.96k
                        {
133
9.96k
                            nNextCh = '-';
134
9.96k
                            rInput.SeekRel( -1 );
135
9.96k
                        }
136
660k
                        if( ' ' == nNextCh )        // blank is part of token!
137
78.5k
                            nNextCh = GetNextChar();
138
139
                        // search for the token in the table:
140
660k
                        if( 0 == (nRet = GetRTFToken( aToken )) )
141
                            // Unknown Control
142
100k
                            nRet = RTF_UNKNOWNCONTROL;
143
144
                        // bug 76812 - unicode token handled as normal text
145
660k
                        bNextCh = false;
146
660k
                        switch( nRet )
147
660k
                        {
148
2.29k
                        case RTF_UC:
149
2.29k
                            if( 0 <= nTokenValue )
150
1.42k
                            {
151
1.42k
                                nUCharOverread = static_cast<sal_uInt8>(nTokenValue);
152
1.42k
                                if (!aParserStates.empty())
153
1.10k
                                {
154
                                    //cmc: other ifdef breaks #i3584
155
1.10k
                                    aParserStates.top().nUCharOverread = nUCharOverread;
156
1.10k
                                }
157
1.42k
                            }
158
2.29k
                            aToken.setLength( 0 ); // #i47831# erase token to prevent the token from being treated as text
159
                            // read next token
160
2.29k
                            nRet = 0;
161
2.29k
                            break;
162
163
4.56k
                        case RTF_UPR:
164
4.56k
                            if (!_inSkipGroup)
165
3.73k
                            {
166
3.73k
                                if (nUPRLevel > 256) // fairly sure > 1 is probably an error, but provide some leeway
167
25
                                {
168
25
                                    SAL_WARN("svtools", "urp stack too deep");
169
25
                                    eState = SvParserState::Error;
170
25
                                    break;
171
25
                                }
172
173
3.71k
                                ++nUPRLevel;
174
175
                                // UPR - overread the group with the ansi
176
                                //       information
177
3.71k
                                int nNextToken;
178
3.71k
                                do
179
7.70k
                                {
180
7.70k
                                    nNextToken = GetNextToken_();
181
7.70k
                                }
182
7.70k
                                while (nNextToken != '{' && nNextToken != sal_Unicode(EOF) && IsParserWorking());
183
184
3.71k
                                SkipGroup();
185
3.71k
                                GetNextToken_();  // overread the last bracket
186
3.71k
                                nRet = 0;
187
188
3.71k
                                --nUPRLevel;
189
3.71k
                            }
190
4.53k
                            break;
191
192
10.2k
                        case RTF_U:
193
10.2k
                            if( !bRTF_InTextRead )
194
2.68k
                            {
195
2.68k
                                nRet = RTF_TEXTTOKEN;
196
2.68k
                                aToken = OUStringChar( static_cast<sal_Unicode>(nTokenValue) );
197
198
                                // overread the next n "RTF" characters. This
199
                                // can be also \{, \}, \'88
200
10.2k
                                for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
201
7.55k
                                {
202
7.55k
                                    sal_uInt32 cAnsi = nNextCh;
203
8.19k
                                    while( 0xD == cAnsi )
204
643
                                        cAnsi = GetNextChar();
205
9.98k
                                    while( 0xA == cAnsi )
206
2.42k
                                        cAnsi = GetNextChar();
207
208
7.55k
                                    if( '\\' == cAnsi &&
209
7.55k
                                        '\'' == GetNextChar() )
210
                                        // skip HexValue
211
241
                                        GetHexValue();
212
7.55k
                                    nNextCh = GetNextChar();
213
7.55k
                                }
214
2.68k
                                ScanText();
215
2.68k
                                bNextCh = 0 == nNextCh;
216
2.68k
                            }
217
10.2k
                            break;
218
660k
                        }
219
660k
                    }
220
66.4k
                    else if( SvParserState::Pending != eState )
221
66.4k
                    {
222
                        // Bug 34631 - "\ " read on - Blank as character
223
                        // eState = SvParserState::Error;
224
66.4k
                        bNextCh = false;
225
66.4k
                    }
226
727k
                    break;
227
1.10M
                }
228
1.10M
            }
229
1.10M
            break;
230
231
1.10M
        case sal_Unicode(EOF):
232
12.7k
            eState = SvParserState::Accepted;
233
12.7k
            nRet = nNextCh;
234
12.7k
            break;
235
236
179k
        case '{':
237
179k
            {
238
179k
                if( 0 <= nOpenBrackets )
239
177k
                {
240
177k
                    RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() );
241
177k
                    aParserStates.push( aState );
242
177k
                }
243
179k
                ++nOpenBrackets;
244
179k
                DBG_ASSERT(
245
179k
                    static_cast<size_t>(nOpenBrackets) == aParserStates.size(),
246
179k
                    "ParserStateStack unequal to bracket count" );
247
179k
                nRet = nNextCh;
248
179k
            }
249
179k
            break;
250
251
79.7k
        case '}':
252
79.7k
            --nOpenBrackets;
253
79.7k
            if( 0 <= nOpenBrackets )
254
73.8k
            {
255
73.8k
                aParserStates.pop();
256
73.8k
                if( !aParserStates.empty() )
257
73.3k
                {
258
73.3k
                    const RtfParserState_Impl& rRPS =
259
73.3k
                            aParserStates.top();
260
73.3k
                    nUCharOverread = rRPS.nUCharOverread;
261
73.3k
                    SetSrcEncoding( rRPS.eCodeSet );
262
73.3k
                }
263
500
                else
264
500
                {
265
500
                    nUCharOverread = 1;
266
500
                    SetSrcEncoding( GetCodeSet() );
267
500
                }
268
73.8k
            }
269
79.7k
            DBG_ASSERT(
270
79.7k
                static_cast<size_t>(nOpenBrackets) == aParserStates.size(),
271
79.7k
                "ParserStateStack unequal to bracket count" );
272
79.7k
            nRet = nNextCh;
273
79.7k
            break;
274
275
18.5k
        case 0x0d:
276
66.6k
        case 0x0a:
277
66.6k
            break;
278
279
1.02M
        default:
280
            // now normal text follows
281
1.02M
            ScanText();
282
1.02M
            nRet = RTF_TEXTTOKEN;
283
1.02M
            bNextCh = 0 == nNextCh;
284
1.02M
            break;
285
2.46M
        }
286
287
2.46M
        if( bNextCh )
288
1.23M
            nNextCh = GetNextChar();
289
290
2.46M
    } while( !nRet && SvParserState::Working == eState );
291
2.32M
    return nRet;
292
2.32M
}
293
294
295
sal_Unicode SvRTFParser::GetHexValue()
296
27.7k
{
297
    // collect Hex values
298
27.7k
    int n;
299
27.7k
    sal_Unicode nHexVal = 0;
300
301
83.3k
    for( n = 0; n < 2; ++n )
302
55.5k
    {
303
55.5k
        nHexVal *= 16;
304
55.5k
        nNextCh = GetNextChar();
305
55.5k
        if( nNextCh >= '0' && nNextCh <= '9' )
306
18.9k
            nHexVal += (nNextCh - 48);
307
36.5k
        else if( nNextCh >= 'a' && nNextCh <= 'f' )
308
24.0k
            nHexVal += (nNextCh - 87);
309
12.5k
        else if( nNextCh >= 'A' && nNextCh <= 'F' )
310
943
            nHexVal += (nNextCh - 55);
311
55.5k
    }
312
27.7k
    return nHexVal;
313
27.7k
}
314
315
void SvRTFParser::ScanText()
316
1.05M
{
317
1.05M
    const sal_Unicode cBreak = 0;
318
1.05M
    OUStringBuffer aStrBuffer;
319
1.05M
    bool bContinue = true;
320
4.69M
    while( bContinue && IsParserWorking() && aStrBuffer.getLength() < MAX_STRING_LEN)
321
3.65M
    {
322
3.65M
        bool bNextCh = true;
323
3.65M
        switch( nNextCh )
324
3.65M
        {
325
465k
        case '\\':
326
465k
            {
327
465k
                nNextCh = GetNextChar();
328
465k
                switch (nNextCh)
329
465k
                {
330
9.12k
                case '\'':
331
9.12k
                    {
332
333
9.12k
                        OStringBuffer aByteString;
334
26.4k
                        while (true)
335
26.4k
                        {
336
26.4k
                            char c = static_cast<char>(GetHexValue());
337
                            /*
338
                             * Note: \'00 is a valid internal character in  a
339
                             * string in RTF. OStringBuffer supports
340
                             * appending nulls fine
341
                             */
342
26.4k
                            aByteString.append(c);
343
344
26.4k
                            bool bBreak = false;
345
26.4k
                            bool bEOF = false;
346
26.4k
                            char nSlash = '\\';
347
215k
                            while (!bBreak)
348
189k
                            {
349
189k
                                auto next = GetNextChar();
350
189k
                                if (sal_Unicode(EOF) == next)
351
486
                                {
352
486
                                    bEOF = true;
353
486
                                    break;
354
486
                                }
355
189k
                                if (next>0xFF) // fix for #i43933# and #i35653#
356
3.59k
                                {
357
3.59k
                                    if (!aByteString.isEmpty())
358
1.80k
                                    {
359
1.80k
                                        aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) );
360
1.80k
                                        aByteString.setLength(0);
361
1.80k
                                    }
362
3.59k
                                    aStrBuffer.append(static_cast<sal_Unicode>(next));
363
364
3.59k
                                    continue;
365
3.59k
                                }
366
185k
                                nSlash = static_cast<char>(next);
367
188k
                                while (nSlash == 0xD || nSlash == 0xA)
368
2.09k
                                    nSlash = static_cast<char>(GetNextChar());
369
370
185k
                                switch (nSlash)
371
185k
                                {
372
803
                                    case '{':
373
2.58k
                                    case '}':
374
25.9k
                                    case '\\':
375
25.9k
                                        bBreak = true;
376
25.9k
                                        break;
377
159k
                                    default:
378
159k
                                        aByteString.append(nSlash);
379
159k
                                        break;
380
185k
                                }
381
185k
                            }
382
383
26.4k
                            if (bEOF)
384
486
                            {
385
486
                                bContinue = false;        // abort, string together
386
486
                                break;
387
486
                            }
388
389
25.9k
                            nNextCh = GetNextChar();
390
391
25.9k
                            if (nSlash != '\\' || nNextCh != '\'')
392
8.63k
                            {
393
8.63k
                                rInput.SeekRel(-1);
394
8.63k
                                nNextCh = static_cast<unsigned char>(nSlash);
395
8.63k
                                break;
396
8.63k
                            }
397
25.9k
                        }
398
399
9.12k
                        bNextCh = false;
400
401
9.12k
                        if (!aByteString.isEmpty())
402
8.88k
                        {
403
8.88k
                            aStrBuffer.append( OStringToOUString(aByteString, GetSrcEncoding()) );
404
8.88k
                            aByteString.setLength(0);
405
8.88k
                        }
406
9.12k
                    }
407
0
                    break;
408
34.8k
                case '\\':
409
40.3k
                case '}':
410
52.4k
                case '{':
411
52.8k
                case '+':       // I found in a RTF file
412
52.8k
                    aStrBuffer.append(sal_Unicode(nNextCh));
413
52.8k
                    break;
414
758
                case '~':       // nonbreaking space
415
758
                    aStrBuffer.append(u'\x00A0');
416
758
                    break;
417
1.35k
                case '-':       // optional hyphen
418
1.35k
                    aStrBuffer.append(u'\x00AD');
419
1.35k
                    break;
420
341
                case '_':       // nonbreaking hyphen
421
341
                    aStrBuffer.append(u'\x2011');
422
341
                    break;
423
424
16.3k
                case 'u':
425
                    // read UNI-Code characters
426
16.3k
                    {
427
16.3k
                        nNextCh = GetNextChar();
428
16.3k
                        rInput.SeekRel( -2 );
429
430
16.3k
                        if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) )
431
7.52k
                        {
432
7.52k
                            bRTF_InTextRead = true;
433
434
7.52k
                            OUString sSave( aToken ); // GetNextToken_() overwrites this
435
7.52k
                            nNextCh = '\\';
436
7.52k
                            int nToken = GetNextToken_();
437
7.52k
                            DBG_ASSERT( RTF_U == nToken, "still not a UNI-Code character" );
438
                            // don't convert symbol chars
439
7.52k
                            aStrBuffer.append(static_cast< sal_Unicode >(nTokenValue));
440
441
                            // overread the next n "RTF" characters. This
442
                            // can be also \{, \}, \'88
443
14.8k
                            for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
444
7.35k
                            {
445
7.35k
                                sal_Unicode cAnsi = nNextCh;
446
7.82k
                                while( 0xD == cAnsi )
447
464
                                    cAnsi = GetNextChar();
448
8.07k
                                while( 0xA == cAnsi )
449
717
                                    cAnsi = GetNextChar();
450
451
7.35k
                                if( '\\' == cAnsi &&
452
7.35k
                                    '\'' == GetNextChar() )
453
                                    // skip HexValue
454
1.11k
                                    GetHexValue();
455
7.35k
                                nNextCh = GetNextChar();
456
7.35k
                            }
457
7.52k
                            bNextCh = false;
458
7.52k
                            aToken = sSave;
459
7.52k
                            bRTF_InTextRead = false;
460
7.52k
                        }
461
8.79k
                        else if ( 'c' == nNextCh )
462
2.86k
                        {
463
                            // Prevent text breaking into multiple tokens.
464
2.86k
                            rInput.SeekRel( 2 );
465
2.86k
                            nNextCh = GetNextChar();
466
2.86k
                            if (RTF_ISDIGIT( nNextCh ))
467
2.42k
                            {
468
2.42k
                                sal_uInt8 nNewOverread = 0 ;
469
3.17k
                                do {
470
3.17k
                                    nNewOverread *= 10;
471
3.17k
                                    nNewOverread += nNextCh - '0';
472
3.17k
                                    nNextCh = GetNextChar();
473
3.17k
                                } while ( RTF_ISDIGIT( nNextCh ) );
474
2.42k
                                nUCharOverread = nNewOverread;
475
2.42k
                                if (!aParserStates.empty())
476
2.29k
                                    aParserStates.top().nUCharOverread = nNewOverread;
477
2.42k
                            }
478
2.86k
                            bNextCh = 0x20 == nNextCh;
479
2.86k
                        }
480
5.93k
                        else
481
5.93k
                        {
482
5.93k
                            nNextCh = '\\';
483
5.93k
                            bContinue = false;        // abort, string together
484
5.93k
                        }
485
16.3k
                    }
486
16.3k
                    break;
487
488
384k
                default:
489
384k
                    rInput.SeekRel( -1 );
490
384k
                    nNextCh = '\\';
491
384k
                    bContinue = false;        // abort, string together
492
384k
                    break;
493
465k
                }
494
465k
            }
495
465k
            break;
496
497
465k
        case sal_Unicode(EOF):
498
742
            eState = SvParserState::Error;
499
742
            [[fallthrough]];
500
50.1k
        case '{':
501
103k
        case '}':
502
103k
            bContinue = false;
503
103k
            break;
504
505
55.3k
        case 0x0a:
506
62.9k
        case 0x0d:
507
62.9k
            break;
508
509
3.02M
        default:
510
3.02M
            if( nNextCh == cBreak || aStrBuffer.getLength() >= MAX_STRING_LEN)
511
547k
                bContinue = false;
512
2.47M
            else
513
2.47M
            {
514
5.19M
                do {
515
                    // all other characters end up in the text
516
5.19M
                    aStrBuffer.appendUtf32(nNextCh);
517
518
5.19M
                    if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
519
5.81k
                    {
520
5.81k
                        if (!aStrBuffer.isEmpty())
521
5.81k
                            aToken.append( aStrBuffer );
522
5.81k
                        return;
523
5.81k
                    }
524
5.19M
                } while
525
2.47M
                (
526
5.18M
                    (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) &&
527
5.18M
                    (aStrBuffer.getLength() < MAX_STRING_LEN)
528
2.47M
                );
529
2.46M
                bNextCh = false;
530
2.46M
            }
531
3.65M
        }
532
533
3.64M
        if( bContinue && bNextCh )
534
118k
            nNextCh = GetNextChar();
535
3.64M
    }
536
537
1.04M
    if (!aStrBuffer.isEmpty())
538
587k
        aToken.append( aStrBuffer );
539
1.04M
}
540
541
542
short SvRTFParser::_inSkipGroup=0;
543
544
void SvRTFParser::SkipGroup()
545
9.18k
{
546
9.18k
    short nBrackets=1;
547
9.18k
    if (_inSkipGroup>0)
548
0
        return;
549
9.18k
    _inSkipGroup++;
550
//#i16185# faking \bin keyword
551
9.18k
    do
552
87.9k
    {
553
87.9k
        switch (nNextCh)
554
87.9k
        {
555
31.7k
            case '{':
556
31.7k
                ++nBrackets;
557
31.7k
                break;
558
10.4k
            case '}':
559
10.4k
                if (!--nBrackets) {
560
5.57k
                    _inSkipGroup--;
561
5.57k
                    return;
562
5.57k
                }
563
4.86k
                break;
564
87.9k
        }
565
82.3k
        int nToken = GetNextToken_();
566
82.3k
        if (nToken == RTF_BIN)
567
409
        {
568
409
            rInput.SeekRel(-1);
569
409
            SAL_WARN_IF(nTokenValue < 0, "svtools", "negative value argument for rtf \\bin keyword");
570
409
            if (nTokenValue > 0)
571
236
                rInput.SeekRel(nTokenValue);
572
409
            nNextCh = GetNextChar();
573
409
        }
574
84.6k
        while (nNextCh==0xa || nNextCh==0xd)
575
2.29k
        {
576
2.29k
            nNextCh = GetNextChar();
577
2.29k
        }
578
82.3k
    } while (sal_Unicode(EOF) != nNextCh && IsParserWorking());
579
580
3.60k
    if( SvParserState::Pending != eState && '}' != nNextCh )
581
3.36k
        eState = SvParserState::Error;
582
3.60k
    _inSkipGroup--;
583
3.60k
}
584
585
2.73k
void SvRTFParser::ReadUnknownData() { SkipGroup(); }
586
32
void SvRTFParser::ReadBitmapData()  { SkipGroup(); }
587
588
589
SvParserState SvRTFParser::CallParser()
590
10.4k
{
591
10.4k
    char cFirstCh(0);
592
10.4k
    nNextChPos = rInput.Tell();
593
10.4k
    rInput.ReadChar( cFirstCh );
594
10.4k
    nNextCh = static_cast<unsigned char>(cFirstCh);
595
10.4k
    eState = SvParserState::Working;
596
10.4k
    nOpenBrackets = 0;
597
10.4k
    eCodeSet = RTL_TEXTENCODING_MS_1252;
598
10.4k
    SetSrcEncoding( eCodeSet );
599
600
    // the first two tokens should be '{' and \\rtf !!
601
10.4k
    if( '{' == GetNextToken() && RTF_RTF == GetNextToken() )
602
9.87k
    {
603
9.87k
        AddFirstRef();
604
        // call ReleaseRef at end of this scope, even in the face of exceptions
605
9.87k
        comphelper::ScopeGuard g([this] {
606
9.87k
            if( SvParserState::Pending != eState )
607
9.87k
                ReleaseRef();       // now parser is not needed anymore
608
9.87k
        });
609
9.87k
        Continue( 0 );
610
9.87k
    }
611
576
    else
612
576
        eState = SvParserState::Error;
613
614
10.4k
    return eState;
615
10.4k
}
616
617
void SvRTFParser::Continue( int nToken )
618
9.87k
{
619
//  DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(),
620
//              "Characterset was changed." );
621
622
9.87k
    if( !nToken )
623
9.87k
        nToken = GetNextToken();
624
625
9.87k
    bool bLooping = false;
626
627
1.81M
    while (IsParserWorking() && !bLooping)
628
1.80M
    {
629
1.80M
        auto nCurrentTokenIndex = m_nTokenIndex;
630
1.80M
        auto nCurrentToken = nToken;
631
632
1.80M
        SaveState( nToken );
633
1.80M
        switch( nToken )
634
1.80M
        {
635
47.1k
        case '}':
636
47.1k
            if( nOpenBrackets )
637
46.7k
                goto NEXTTOKEN;
638
369
            eState = SvParserState::Accepted;
639
369
            break;
640
641
96.3k
        case '{':
642
            // an unknown group ?
643
96.3k
            {
644
96.3k
                if( RTF_IGNOREFLAG != GetNextToken() )
645
92.8k
                    nToken = SkipToken();
646
3.52k
                else if( RTF_UNKNOWNCONTROL != GetNextToken() )
647
2.28k
                    nToken = SkipToken( -2 );
648
1.23k
                else
649
1.23k
                {
650
                    // filter immediately
651
1.23k
                    ReadUnknownData();
652
1.23k
                    nToken = GetNextToken();
653
1.23k
                    if( '}' != nToken )
654
42
                        eState = SvParserState::Error;
655
1.23k
                    break;      // move to next token!!
656
1.23k
                }
657
96.3k
            }
658
95.1k
            goto NEXTTOKEN;
659
660
95.1k
        case RTF_UNKNOWNCONTROL:
661
87.3k
            break;      // skip unknown token
662
0
        case RTF_NEXTTYPE:
663
1.23k
        case RTF_ANSITYPE:
664
1.23k
            eCodeSet = RTL_TEXTENCODING_MS_1252;
665
1.23k
            SetSrcEncoding( eCodeSet );
666
1.23k
            break;
667
323
        case RTF_MACTYPE:
668
323
            eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN;
669
323
            SetSrcEncoding( eCodeSet );
670
323
            break;
671
403
        case RTF_PCTYPE:
672
403
            eCodeSet = RTL_TEXTENCODING_IBM_437;
673
403
            SetSrcEncoding( eCodeSet );
674
403
            break;
675
25
        case RTF_PCATYPE:
676
25
            eCodeSet = RTL_TEXTENCODING_IBM_850;
677
25
            SetSrcEncoding( eCodeSet );
678
25
            break;
679
4.97k
        case RTF_ANSICPG:
680
4.97k
            eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue);
681
4.97k
            SetSrcEncoding(eCodeSet);
682
4.97k
            break;
683
1.56M
        default:
684
1.70M
NEXTTOKEN:
685
1.70M
            NextToken( nToken );
686
1.70M
            break;
687
1.80M
        }
688
1.80M
        if( IsParserWorking() )
689
1.80M
            SaveState( 0 );         // processed till here,
690
                                    // continue with new token!
691
1.80M
        nToken = GetNextToken();
692
1.80M
        bLooping = nCurrentTokenIndex == m_nTokenIndex && nToken == nCurrentToken;
693
1.80M
    }
694
9.84k
    if( SvParserState::Accepted == eState && 0 < nOpenBrackets )
695
8.61k
        eState = SvParserState::Error;
696
9.84k
}
697
698
void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc )
699
50.9k
{
700
50.9k
    if (eEnc == RTL_TEXTENCODING_DONTKNOW)
701
29.8k
        eEnc = GetCodeSet();
702
703
50.9k
    if (!aParserStates.empty())
704
50.6k
        aParserStates.top().eCodeSet = eEnc;
705
50.9k
    SetSrcEncoding(eEnc);
706
50.9k
}
707
708
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */