Coverage Report

Created: 2025-12-08 09:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/tools/source/inet/inetmime.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <algorithm>
21
#include <limits>
22
#include <forward_list>
23
#include <memory>
24
25
#include <sal/log.hxx>
26
#include <rtl/ustring.hxx>
27
#include <rtl/strbuf.hxx>
28
#include <rtl/ustrbuf.hxx>
29
#include <rtl/tencinfo.h>
30
#include <tools/debug.hxx>
31
#include <tools/inetmime.hxx>
32
#include <rtl/character.hxx>
33
34
namespace {
35
36
rtl_TextEncoding getCharsetEncoding(const char * pBegin,
37
                                           const char * pEnd);
38
39
/** Check for US-ASCII white space character.
40
41
    @param nChar  Some UCS-4 character.
42
43
    @return  True if nChar is a US-ASCII white space character (US-ASCII
44
    0x09 or 0x20).
45
 */
46
bool isWhiteSpace(sal_uInt32 nChar)
47
0
{
48
0
    return nChar == '\t' || nChar == ' ';
49
0
}
50
51
/** Get the Base 64 digit weight of a US-ASCII character.
52
53
    @param nChar  Some UCS-4 character.
54
55
    @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
56
    'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
57
    corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
58
    character (US-ASCII '='), return -1; otherwise, return -2.
59
 */
60
int getBase64Weight(sal_uInt32 nChar)
61
0
{
62
0
    return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
63
0
           rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
64
0
           rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
65
0
           nChar == '+' ? 62 :
66
0
           nChar == '/' ? 63 :
67
0
           nChar == '=' ? -1 : -2;
68
0
}
69
70
bool startsWithLineFolding(const sal_Unicode * pBegin,
71
                                            const sal_Unicode * pEnd)
72
0
{
73
0
    assert(pBegin && pBegin <= pEnd && "startsWithLineFolding(): Bad sequence");
74
75
0
    return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
76
0
           && isWhiteSpace(pBegin[2]); // CR, LF
77
0
}
78
79
rtl_TextEncoding translateFromMIME(rtl_TextEncoding
80
                                                        eEncoding)
81
0
{
82
#if defined(_WIN32)
83
    return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
84
               RTL_TEXTENCODING_MS_1252 : eEncoding;
85
#else
86
0
    return eEncoding;
87
0
#endif
88
0
}
89
90
bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
91
0
{
92
0
    return rtl_isOctetTextEncoding(eEncoding);
93
0
}
94
95
std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
96
                                         const char * pEnd,
97
                                         rtl_TextEncoding eEncoding,
98
                                         sal_Size & rSize)
99
49.2k
{
100
49.2k
    if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
101
643
        return nullptr;
102
48.5k
    rtl_TextToUnicodeConverter hConverter
103
48.5k
        = rtl_createTextToUnicodeConverter(eEncoding);
104
48.5k
    rtl_TextToUnicodeContext hContext
105
48.5k
        = rtl_createTextToUnicodeContext(hConverter);
106
48.5k
    std::unique_ptr<sal_Unicode[]> pBuffer;
107
48.5k
    sal_uInt32 nInfo;
108
48.5k
    for (sal_Size nBufferSize = pEnd - pBegin;;
109
48.5k
         nBufferSize += nBufferSize / 3 + 1)
110
48.5k
    {
111
48.5k
        pBuffer.reset(new sal_Unicode[nBufferSize]);
112
48.5k
        sal_Size nSrcCvtBytes;
113
48.5k
        rSize = rtl_convertTextToUnicode(
114
48.5k
                    hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
115
48.5k
                    nBufferSize,
116
48.5k
                    RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117
48.5k
                        | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118
48.5k
                        | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
119
48.5k
                    &nInfo, &nSrcCvtBytes);
120
48.5k
        if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
121
48.5k
            break;
122
0
        pBuffer.reset();
123
0
        rtl_resetTextToUnicodeContext(hConverter, hContext);
124
0
    }
125
48.5k
    rtl_destroyTextToUnicodeContext(hConverter, hContext);
126
48.5k
    rtl_destroyTextToUnicodeConverter(hConverter);
127
48.5k
    if (nInfo != 0)
128
78
    {
129
78
        pBuffer.reset();
130
78
    }
131
48.5k
    return pBuffer;
132
49.2k
}
133
134
void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
135
50.7k
{
136
    // See RFC 2279 for a discussion of UTF-8.
137
50.7k
    DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
138
139
50.7k
    if (nChar < 0x80)
140
45.7k
        rSink.append(char(nChar));
141
5.03k
    else if (nChar < 0x800)
142
4.16k
        rSink.append(OStringChar(char(nChar >> 6 | 0xC0))
143
4.16k
                + OStringChar(char((nChar & 0x3F) | 0x80)));
144
870
    else if (nChar < 0x10000)
145
686
        rSink.append(
146
686
            OStringChar(char(nChar >> 12 | 0xE0))
147
686
             + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
148
686
             + OStringChar(char((nChar & 0x3F) | 0x80)));
149
184
    else if (nChar < 0x200000)
150
184
        rSink.append(
151
184
            OStringChar(char(nChar >> 18 | 0xF0))
152
184
             + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
153
184
             + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
154
184
             + OStringChar(char((nChar & 0x3F) | 0x80)));
155
0
    else if (nChar < 0x4000000)
156
0
        rSink.append(
157
0
            OStringChar(char(nChar >> 24 | 0xF8))
158
0
            + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
159
0
            + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
160
0
            + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
161
0
            + OStringChar(char((nChar & 0x3F) | 0x80)));
162
0
    else
163
0
        rSink.append(
164
0
            OStringChar(char(nChar >> 30 | 0xFC))
165
0
            + OStringChar(char((nChar >> 24 & 0x3F) | 0x80))
166
0
            + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
167
0
            + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
168
0
            + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
169
0
            + OStringChar(char((nChar & 0x3F) | 0x80)));
170
50.7k
}
171
172
bool translateUTF8Char(const char *& rBegin,
173
                                 const char * pEnd,
174
                                 sal_uInt32 & rCharacter)
175
0
{
176
0
    if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
177
0
        || static_cast< unsigned char >(*rBegin) >= 0xFE)
178
0
        return false;
179
180
0
    int nCount;
181
0
    sal_uInt32 nMin;
182
0
    sal_uInt32 nUCS4;
183
0
    const char * p = rBegin;
184
0
    if (static_cast< unsigned char >(*p) < 0xE0)
185
0
    {
186
0
        nCount = 1;
187
0
        nMin = 0x80;
188
0
        nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
189
0
    }
190
0
    else if (static_cast< unsigned char >(*p) < 0xF0)
191
0
    {
192
0
        nCount = 2;
193
0
        nMin = 0x800;
194
0
        nUCS4 = static_cast< unsigned char >(*p) & 0xF;
195
0
    }
196
0
    else if (static_cast< unsigned char >(*p) < 0xF8)
197
0
    {
198
0
        nCount = 3;
199
0
        nMin = 0x10000;
200
0
        nUCS4 = static_cast< unsigned char >(*p) & 7;
201
0
    }
202
0
    else if (static_cast< unsigned char >(*p) < 0xFC)
203
0
    {
204
0
        nCount = 4;
205
0
        nMin = 0x200000;
206
0
        nUCS4 = static_cast< unsigned char >(*p) & 3;
207
0
    }
208
0
    else
209
0
    {
210
0
        nCount = 5;
211
0
        nMin = 0x4000000;
212
0
        nUCS4 = static_cast< unsigned char >(*p) & 1;
213
0
    }
214
0
    ++p;
215
216
0
    for (; nCount-- > 0; ++p)
217
0
        if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
218
0
            nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
219
0
        else
220
0
            return false;
221
222
0
    if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
223
0
        return false;
224
225
0
    rCharacter = nUCS4;
226
0
    rBegin = p;
227
0
    return true;
228
0
}
229
230
void appendISO88591(OUStringBuffer & rText, char const * pBegin,
231
                    char const * pEnd);
232
233
struct Parameter
234
{
235
    OString m_aAttribute;
236
    OString m_aCharset;
237
    OString m_aLanguage;
238
    OString m_aValue;
239
    sal_uInt32 m_nSection;
240
    bool m_bExtended;
241
242
    bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
243
25.4k
    {
244
25.4k
        int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
245
25.4k
        return nComp < 0 ||
246
13.5k
                (nComp == 0 && m_nSection < rhs.m_nSection);
247
25.4k
    }
248
    struct IsSameSection // is used to check container for duplicates with std::any_of
249
    {
250
        const OString& rAttribute;
251
        const sal_uInt32 nSection;
252
        bool operator()(const Parameter& r) const
253
66.1k
        { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
254
    };
255
};
256
257
typedef std::forward_list<Parameter> ParameterList;
258
259
bool parseParameters(ParameterList const & rInput,
260
                     INetContentTypeParameterList * pOutput);
261
262
//  appendISO88591
263
264
void appendISO88591(OUStringBuffer & rText, char const * pBegin,
265
                    char const * pEnd)
266
0
{
267
0
    sal_Int32 nLength = pEnd - pBegin;
268
0
    std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
269
0
    for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
270
0
        *p++ = static_cast<unsigned char>(*pBegin++);
271
0
    rText.append(pBuffer.get(), nLength);
272
0
}
273
274
//  parseParameters
275
276
bool parseParameters(ParameterList const & rInput,
277
                     INetContentTypeParameterList * pOutput)
278
67.0k
{
279
67.0k
    if (pOutput)
280
43.1k
        pOutput->clear();
281
282
116k
    for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
283
49.7k
    {
284
49.7k
        if (it->m_nSection > 0
285
419
            && (itPrev == rInput.end()
286
143
                || itPrev->m_nSection != it->m_nSection - 1
287
77
                || itPrev->m_aAttribute != it->m_aAttribute))
288
360
            return false;
289
49.7k
    }
290
291
66.7k
    if (pOutput)
292
91.9k
        for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
293
49.1k
        {
294
49.1k
            bool bCharset = !it->m_aCharset.isEmpty();
295
49.1k
            rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
296
49.1k
            if (bCharset)
297
646
                eEncoding
298
646
                    = getCharsetEncoding(it->m_aCharset.getStr(),
299
646
                                                   it->m_aCharset.getStr()
300
646
                                                       + it->m_aCharset.getLength());
301
49.1k
            OUStringBuffer aValue(64);
302
49.1k
            bool bBadEncoding = false;
303
49.1k
            itNext = it;
304
49.1k
            do
305
49.1k
            {
306
49.1k
                sal_Size nSize;
307
49.1k
                std::unique_ptr<sal_Unicode[]> pUnicode
308
49.1k
                    = convertToUnicode(itNext->m_aValue.getStr(),
309
49.1k
                                                 itNext->m_aValue.getStr()
310
49.1k
                                                     + itNext->m_aValue.getLength(),
311
49.1k
                                                 bCharset && it->m_bExtended ?
312
646
                                                     eEncoding :
313
49.1k
                                                     RTL_TEXTENCODING_UTF8,
314
49.1k
                                                 nSize);
315
49.1k
                if (!pUnicode && !(bCharset && it->m_bExtended))
316
78
                    pUnicode = convertToUnicode(
317
78
                                   itNext->m_aValue.getStr(),
318
78
                                   itNext->m_aValue.getStr()
319
78
                                       + itNext->m_aValue.getLength(),
320
78
                                   RTL_TEXTENCODING_ISO_8859_1, nSize);
321
49.1k
                if (!pUnicode)
322
643
                {
323
643
                    bBadEncoding = true;
324
643
                    break;
325
643
                }
326
48.5k
                aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
327
48.5k
                ++itNext;
328
48.5k
            }
329
49.1k
            while (itNext != rInput.end() && itNext->m_nSection != 0);
330
331
49.1k
            if (bBadEncoding)
332
643
            {
333
643
                aValue.setLength(0);
334
643
                itNext = it;
335
643
                do
336
663
                {
337
663
                    if (itNext->m_bExtended)
338
663
                    {
339
18.0k
                        for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
340
17.3k
                            aValue.append(
341
17.3k
                                static_cast<sal_Unicode>(
342
17.3k
                                    static_cast<unsigned char>(itNext->m_aValue[i])
343
17.3k
                                    | 0xF800)); // map to unicode corporate use sub area
344
663
                    }
345
0
                    else
346
0
                    {
347
0
                        for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
348
0
                            aValue.append( itNext->m_aValue[i] );
349
0
                    }
350
663
                    ++itNext;
351
663
                }
352
663
                while (itNext != rInput.end() && itNext->m_nSection != 0);
353
643
            }
354
49.1k
            auto const ret = pOutput->insert(
355
49.1k
                {it->m_aAttribute,
356
49.1k
                 {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
357
49.1k
            SAL_INFO_IF(!ret.second, "tools",
358
49.1k
                "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
359
49.1k
        }
360
66.7k
    return true;
361
66.7k
}
362
363
/** Check whether some character is valid within an RFC 2045 <token>.
364
365
    @param nChar  Some UCS-4 character.
366
367
    @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
368
    'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
369
    '-', '.', '^', '_', '`', '{', '|', '}', or '~').
370
 */
371
bool isTokenChar(sal_uInt32 nChar)
372
1.65M
{
373
1.65M
    static const bool aMap[128]
374
1.65M
        = { false, false, false, false, false, false, false, false,
375
1.65M
            false, false, false, false, false, false, false, false,
376
1.65M
            false, false, false, false, false, false, false, false,
377
1.65M
            false, false, false, false, false, false, false, false,
378
1.65M
            false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
379
1.65M
            false, false,  true,  true, false,  true,  true, false, //()*+,-./
380
1.65M
             true,  true,  true,  true,  true,  true,  true,  true, //01234567
381
1.65M
             true,  true, false, false, false, false, false, false, //89:;<=>?
382
1.65M
            false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
383
1.65M
             true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
384
1.65M
             true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
385
1.65M
             true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
386
1.65M
             true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
387
1.65M
             true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
388
1.65M
             true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
389
1.65M
             true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
390
1.65M
          };
391
1.65M
    return rtl::isAscii(nChar) && aMap[nChar];
392
1.65M
}
393
394
const sal_Unicode * skipComment(const sal_Unicode * pBegin,
395
                                          const sal_Unicode * pEnd)
396
311
{
397
311
    assert(pBegin && pBegin <= pEnd && "skipComment(): Bad sequence");
398
399
311
    if (pBegin != pEnd && *pBegin == '(')
400
311
    {
401
311
        sal_uInt32 nLevel = 0;
402
15.1k
        for (const sal_Unicode * p = pBegin; p != pEnd;)
403
14.9k
            switch (*p++)
404
14.9k
            {
405
1.45k
                case '(':
406
1.45k
                    ++nLevel;
407
1.45k
                    break;
408
409
313
                case ')':
410
313
                    if (--nLevel == 0)
411
94
                        return p;
412
219
                    break;
413
414
344
                case '\\':
415
344
                    if (p != pEnd)
416
306
                        ++p;
417
344
                    break;
418
14.9k
            }
419
311
    }
420
217
    return pBegin;
421
311
}
422
423
const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
424
                                                              pBegin,
425
                                                          const sal_Unicode *
426
                                                              pEnd)
427
494k
{
428
494k
    assert(pBegin && pBegin <= pEnd && "skipLinearWhiteSpaceComment(): Bad sequence");
429
430
540k
    while (pBegin != pEnd)
431
500k
        switch (*pBegin)
432
500k
        {
433
139
            case '\t':
434
45.8k
            case ' ':
435
45.8k
                ++pBegin;
436
45.8k
                break;
437
438
0
            case 0x0D: // CR
439
0
                if (startsWithLineFolding(pBegin, pEnd))
440
0
                    pBegin += 3;
441
0
                else
442
0
                    return pBegin;
443
0
                break;
444
445
311
            case '(':
446
311
            {
447
311
                const sal_Unicode * p = skipComment(pBegin, pEnd);
448
311
                if (p == pBegin)
449
217
                    return pBegin;
450
94
                pBegin = p;
451
94
                break;
452
311
            }
453
454
454k
            default:
455
454k
                return pBegin;
456
500k
        }
457
39.2k
    return pBegin;
458
494k
}
459
460
const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
461
                                               const sal_Unicode * pEnd)
462
40
{
463
40
    assert(pBegin && pBegin <= pEnd && "skipQuotedString(): Bad sequence");
464
465
40
    if (pBegin != pEnd && *pBegin == '"')
466
1.49k
        for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
467
1.49k
            switch (*p++)
468
1.49k
            {
469
0
                case 0x0D: // CR
470
0
                    if (pEnd - p < 2 || *p++ != 0x0A // LF
471
0
                        || !isWhiteSpace(*p++))
472
0
                        return pBegin;
473
0
                    break;
474
475
33
                case '"':
476
33
                    return p;
477
478
1
                case '\\':
479
1
                    if (p != pEnd)
480
1
                        ++p;
481
1
                    break;
482
1.49k
            }
483
7
    return pBegin;
484
40
}
485
486
sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
487
                                             sal_Unicode const * pEnd,
488
                                             INetContentTypeParameterList *
489
                                                 pParameters)
490
67.0k
{
491
67.0k
    ParameterList aList;
492
67.0k
    sal_Unicode const * pParameterBegin = pBegin;
493
67.0k
    for (sal_Unicode const * p = pParameterBegin;;)
494
117k
    {
495
117k
        pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
496
117k
        if (pParameterBegin == pEnd || *pParameterBegin != ';')
497
60.0k
            break;
498
57.2k
        p = pParameterBegin + 1;
499
500
57.2k
        sal_Unicode const * pAttributeBegin
501
57.2k
            = skipLinearWhiteSpaceComment(p, pEnd);
502
57.2k
        p = pAttributeBegin;
503
57.2k
        bool bDowncaseAttribute = false;
504
449k
        while (p != pEnd && isTokenChar(*p) && *p != '*')
505
391k
        {
506
391k
            bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
507
391k
            ++p;
508
391k
        }
509
57.2k
        if (p == pAttributeBegin)
510
204
            break;
511
57.0k
        OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
512
57.0k
        if (bDowncaseAttribute)
513
13.1k
            aAttribute = aAttribute.toAsciiLowerCase();
514
515
57.0k
        sal_uInt32 nSection = 0;
516
57.0k
        if (p != pEnd && *p == '*')
517
3.83k
        {
518
3.83k
            ++p;
519
3.83k
            if (p != pEnd && rtl::isAsciiDigit(*p)
520
2.05k
                && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
521
273
                break;
522
3.83k
        }
523
524
56.7k
        bool bPresent = std::any_of(aList.begin(), aList.end(),
525
56.7k
                                    Parameter::IsSameSection{aAttribute, nSection});
526
56.7k
        if (bPresent)
527
148
            break;
528
529
56.5k
        bool bExtended = false;
530
56.5k
        if (p != pEnd && *p == '*')
531
1.25k
        {
532
1.25k
            ++p;
533
1.25k
            bExtended = true;
534
1.25k
        }
535
536
56.5k
        p = skipLinearWhiteSpaceComment(p, pEnd);
537
538
56.5k
        if (p == pEnd || *p != '=')
539
4.84k
            break;
540
541
51.7k
        p = skipLinearWhiteSpaceComment(p + 1, pEnd);
542
543
51.7k
        OString aCharset;
544
51.7k
        OString aLanguage;
545
51.7k
        OString aValue;
546
51.7k
        if (bExtended)
547
1.17k
        {
548
1.17k
            if (nSection == 0)
549
1.13k
            {
550
1.13k
                sal_Unicode const * pCharsetBegin = p;
551
1.13k
                bool bDowncaseCharset = false;
552
27.2k
                while (p != pEnd && isTokenChar(*p) && *p != '\'')
553
26.1k
                {
554
26.1k
                    bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
555
26.1k
                    ++p;
556
26.1k
                }
557
1.13k
                if (p == pCharsetBegin)
558
10
                    break;
559
1.12k
                if (pParameters)
560
1.12k
                {
561
1.12k
                    aCharset = OString(
562
1.12k
                        pCharsetBegin,
563
1.12k
                        p - pCharsetBegin,
564
1.12k
                        RTL_TEXTENCODING_ASCII_US);
565
1.12k
                    if (bDowncaseCharset)
566
90
                        aCharset = aCharset.toAsciiLowerCase();
567
1.12k
                }
568
569
1.12k
                if (p == pEnd || *p != '\'')
570
264
                    break;
571
858
                ++p;
572
573
858
                sal_Unicode const * pLanguageBegin = p;
574
858
                bool bDowncaseLanguage = false;
575
858
                int nLetters = 0;
576
2.87k
                for (; p != pEnd; ++p)
577
2.83k
                    if (rtl::isAsciiAlpha(*p))
578
2.07k
                    {
579
2.07k
                        if (++nLetters > 8)
580
71
                            break;
581
2.00k
                        bDowncaseLanguage = bDowncaseLanguage
582
1.44k
                                            || rtl::isAsciiUpperCase(*p);
583
2.00k
                    }
584
761
                    else if (*p == '-')
585
44
                    {
586
44
                        if (nLetters == 0)
587
34
                            break;
588
10
                        nLetters = 0;
589
10
                    }
590
717
                    else
591
717
                        break;
592
858
                if (nLetters == 0 || nLetters > 8)
593
129
                    break;
594
729
                if (pParameters)
595
729
                {
596
729
                    aLanguage = OString(
597
729
                        pLanguageBegin,
598
729
                        p - pLanguageBegin,
599
729
                        RTL_TEXTENCODING_ASCII_US);
600
729
                    if (bDowncaseLanguage)
601
679
                        aLanguage = aLanguage.toAsciiLowerCase();
602
729
                }
603
604
729
                if (p == pEnd || *p != '\'')
605
83
                    break;
606
646
                ++p;
607
646
            }
608
690
            if (pParameters)
609
690
            {
610
690
                OStringBuffer aSink;
611
16.6k
                while (p != pEnd)
612
16.4k
                {
613
16.4k
                    auto q = p;
614
16.4k
                    sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
615
16.4k
                    if (rtl::isAscii(nChar) && !isTokenChar(nChar))
616
528
                        break;
617
15.9k
                    p = q;
618
15.9k
                    if (nChar == '%' && p + 1 < pEnd)
619
8.34k
                    {
620
8.34k
                        int nWeight1 = INetMIME::getHexWeight(p[0]);
621
8.34k
                        int nWeight2 = INetMIME::getHexWeight(p[1]);
622
8.34k
                        if (nWeight1 >= 0 && nWeight2 >= 0)
623
95
                        {
624
95
                            aSink.append(char(nWeight1 << 4 | nWeight2));
625
95
                            p += 2;
626
95
                            continue;
627
95
                        }
628
8.34k
                    }
629
15.8k
                    writeUTF8(aSink, nChar);
630
15.8k
                }
631
690
                aValue = aSink.makeStringAndClear();
632
690
            }
633
0
            else
634
0
                while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
635
0
                    ++p;
636
690
        }
637
50.5k
        else if (p != pEnd && *p == '"')
638
6.91k
            if (pParameters)
639
6.87k
            {
640
6.87k
                OStringBuffer aSink(256);
641
6.87k
                bool bInvalid = false;
642
6.87k
                for (++p;;)
643
41.8k
                {
644
41.8k
                    if (p == pEnd)
645
194
                    {
646
194
                        bInvalid = true;
647
194
                        break;
648
194
                    }
649
41.6k
                    sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
650
41.6k
                    if (nChar == '"')
651
6.65k
                        break;
652
34.9k
                    else if (nChar == 0x0D) // CR
653
0
                    {
654
0
                        if (pEnd - p < 2 || *p++ != 0x0A // LF
655
0
                            || !isWhiteSpace(*p))
656
0
                        {
657
0
                            bInvalid = true;
658
0
                            break;
659
0
                        }
660
0
                        nChar = static_cast<unsigned char>(*p++);
661
0
                    }
662
34.9k
                    else if (nChar == '\\')
663
922
                    {
664
922
                        if (p == pEnd)
665
22
                        {
666
22
                            bInvalid = true;
667
22
                            break;
668
22
                        }
669
900
                        nChar = INetMIME::getUTF32Character(p, pEnd);
670
900
                    }
671
34.9k
                    writeUTF8(aSink, nChar);
672
34.9k
                }
673
6.87k
                if (bInvalid)
674
216
                    break;
675
6.65k
                aValue = aSink.makeStringAndClear();
676
6.65k
            }
677
40
            else
678
40
            {
679
40
                sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
680
40
                if (p == pStringEnd)
681
7
                    break;
682
33
                p = pStringEnd;
683
33
            }
684
43.6k
        else
685
43.6k
        {
686
43.6k
            sal_Unicode const * pTokenBegin = p;
687
326k
            while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
688
283k
                ++p;
689
43.6k
            if (p == pTokenBegin)
690
905
                break;
691
42.7k
            if (pParameters)
692
42.6k
                aValue = OString(
693
42.6k
                    pTokenBegin, p - pTokenBegin,
694
42.6k
                    RTL_TEXTENCODING_UTF8);
695
42.7k
        }
696
50.1k
        aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
697
50.1k
    }
698
67.0k
    aList.sort();
699
67.0k
    return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
700
67.0k
}
701
702
bool equalIgnoreCase(const char * pBegin1,
703
                               const char * pEnd1,
704
                               const char * pString2)
705
111k
{
706
111k
    assert(pBegin1 && pBegin1 <= pEnd1 && pString2 &&
707
111k
               "equalIgnoreCase(): Bad sequences");
708
709
113k
    while (*pString2 != 0)
710
113k
        if (pBegin1 == pEnd1
711
113k
            || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
712
113k
                != rtl::toAsciiUpperCase(
713
113k
                    static_cast<unsigned char>(*pString2++))))
714
111k
            return false;
715
114
    return pBegin1 == pEnd1;
716
111k
}
717
718
struct EncodingEntry
719
{
720
    char const * m_aName;
721
    rtl_TextEncoding m_eEncoding;
722
};
723
724
// The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
725
// assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
726
// noted:
727
EncodingEntry const aEncodingMap[]
728
    = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
729
        { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
730
        { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
731
        { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
732
        { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
733
        { "ASCII", RTL_TEXTENCODING_ASCII_US },
734
        { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
735
        { "US", RTL_TEXTENCODING_ASCII_US },
736
        { "IBM367", RTL_TEXTENCODING_ASCII_US },
737
        { "CP367", RTL_TEXTENCODING_ASCII_US },
738
        { "CSASCII", RTL_TEXTENCODING_ASCII_US },
739
        { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
740
        { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
741
        { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
742
        { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
743
        { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
744
        { "L1", RTL_TEXTENCODING_ISO_8859_1 },
745
        { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
746
        { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
747
        { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
748
        { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
749
        { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
750
        { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
751
        { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
752
        { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
753
        { "L2", RTL_TEXTENCODING_ISO_8859_2 },
754
        { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
755
        { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
756
        { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
757
        { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
758
        { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
759
        { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
760
        { "L3", RTL_TEXTENCODING_ISO_8859_3 },
761
        { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
762
        { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
763
        { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
764
        { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
765
        { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
766
        { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
767
        { "L4", RTL_TEXTENCODING_ISO_8859_4 },
768
        { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
769
        { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
770
        { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
771
        { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
772
        { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
773
        { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
774
        { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
775
        { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
776
        { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
777
        { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
778
        { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
779
        { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
780
        { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
781
        { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
782
        { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
783
        { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
784
        { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
785
        { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
786
        { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
787
        { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
788
        { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
789
        { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
790
        { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
791
        { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
792
        { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
793
        { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
794
        { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
795
        { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
796
        { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
797
        { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
798
        { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
799
        { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
800
        { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
801
        { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
802
        { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
803
        { "L5", RTL_TEXTENCODING_ISO_8859_9 },
804
        { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
805
        { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
806
        { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
807
        { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
808
        { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
809
        { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
810
        { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
811
        { "IBM437", RTL_TEXTENCODING_IBM_437 },
812
        { "CP437", RTL_TEXTENCODING_IBM_437 },
813
        { "437", RTL_TEXTENCODING_IBM_437 },
814
        { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
815
        { "IBM850", RTL_TEXTENCODING_IBM_850 },
816
        { "CP850", RTL_TEXTENCODING_IBM_850 },
817
        { "850", RTL_TEXTENCODING_IBM_850 },
818
        { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
819
        { "IBM860", RTL_TEXTENCODING_IBM_860 },
820
        { "CP860", RTL_TEXTENCODING_IBM_860 },
821
        { "860", RTL_TEXTENCODING_IBM_860 },
822
        { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
823
        { "IBM861", RTL_TEXTENCODING_IBM_861 },
824
        { "CP861", RTL_TEXTENCODING_IBM_861 },
825
        { "861", RTL_TEXTENCODING_IBM_861 },
826
        { "CP-IS", RTL_TEXTENCODING_IBM_861 },
827
        { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
828
        { "IBM863", RTL_TEXTENCODING_IBM_863 },
829
        { "CP863", RTL_TEXTENCODING_IBM_863 },
830
        { "863", RTL_TEXTENCODING_IBM_863 },
831
        { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
832
        { "IBM865", RTL_TEXTENCODING_IBM_865 },
833
        { "CP865", RTL_TEXTENCODING_IBM_865 },
834
        { "865", RTL_TEXTENCODING_IBM_865 },
835
        { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
836
        { "IBM775", RTL_TEXTENCODING_IBM_775 },
837
        { "CP775", RTL_TEXTENCODING_IBM_775 },
838
        { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
839
        { "IBM852", RTL_TEXTENCODING_IBM_852 },
840
        { "CP852", RTL_TEXTENCODING_IBM_852 },
841
        { "852", RTL_TEXTENCODING_IBM_852 },
842
        { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
843
        { "IBM855", RTL_TEXTENCODING_IBM_855 },
844
        { "CP855", RTL_TEXTENCODING_IBM_855 },
845
        { "855", RTL_TEXTENCODING_IBM_855 },
846
        { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
847
        { "IBM857", RTL_TEXTENCODING_IBM_857 },
848
        { "CP857", RTL_TEXTENCODING_IBM_857 },
849
        { "857", RTL_TEXTENCODING_IBM_857 },
850
        { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
851
        { "IBM862", RTL_TEXTENCODING_IBM_862 },
852
        { "CP862", RTL_TEXTENCODING_IBM_862 },
853
        { "862", RTL_TEXTENCODING_IBM_862 },
854
        { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
855
        { "IBM864", RTL_TEXTENCODING_IBM_864 },
856
        { "CP864", RTL_TEXTENCODING_IBM_864 },
857
        { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
858
        { "IBM866", RTL_TEXTENCODING_IBM_866 },
859
        { "CP866", RTL_TEXTENCODING_IBM_866 },
860
        { "866", RTL_TEXTENCODING_IBM_866 },
861
        { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
862
        { "IBM869", RTL_TEXTENCODING_IBM_869 },
863
        { "CP869", RTL_TEXTENCODING_IBM_869 },
864
        { "869", RTL_TEXTENCODING_IBM_869 },
865
        { "CP-GR", RTL_TEXTENCODING_IBM_869 },
866
        { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
867
        { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
868
        { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
869
        { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
870
        { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
871
        { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
872
        { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
873
        { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
874
        { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
875
        { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
876
        { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
877
        { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
878
        { "GB2312", RTL_TEXTENCODING_GB_2312 },
879
        { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
880
        { "BIG5", RTL_TEXTENCODING_BIG5 },
881
        { "CSBIG5", RTL_TEXTENCODING_BIG5 },
882
        { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
883
        { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
884
          RTL_TEXTENCODING_EUC_JP },
885
        { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
886
        { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
887
        { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
888
        { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
889
        { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
890
        { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
891
        { "UTF-7", RTL_TEXTENCODING_UTF7 },
892
        { "UTF-8", RTL_TEXTENCODING_UTF8 },
893
        { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
894
        { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
895
        { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
896
        { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
897
        { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
898
        { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
899
        { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
900
        { "CSUCS4", RTL_TEXTENCODING_UCS4 },
901
        { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
902
        { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
903
904
rtl_TextEncoding getCharsetEncoding(char const * pBegin,
905
                                              char const * pEnd)
906
646
{
907
646
    for (const EncodingEntry& i : aEncodingMap)
908
111k
        if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
909
3
            return i.m_eEncoding;
910
643
    return RTL_TEXTENCODING_DONTKNOW;
911
646
}
912
913
}
914
915
//  INetMIME
916
917
// static
918
bool INetMIME::isAtomChar(sal_uInt32 nChar)
919
0
{
920
0
    static const bool aMap[128]
921
0
        = { false, false, false, false, false, false, false, false,
922
0
            false, false, false, false, false, false, false, false,
923
0
            false, false, false, false, false, false, false, false,
924
0
            false, false, false, false, false, false, false, false,
925
0
            false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
926
0
            false, false,  true,  true, false,  true, false,  true, //()*+,-./
927
0
             true,  true,  true,  true,  true,  true,  true,  true, //01234567
928
0
             true,  true, false, false, false,  true, false,  true, //89:;<=>?
929
0
            false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
930
0
             true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
931
0
             true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
932
0
             true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
933
0
             true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
934
0
             true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
935
0
             true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
936
0
             true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
937
0
          };
938
0
    return rtl::isAscii(nChar) && aMap[nChar];
939
0
}
940
941
// static
942
bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
943
0
{
944
0
    static const bool aMap[128]
945
0
        = { false, false, false, false, false, false, false, false,
946
0
            false, false, false, false, false, false, false, false,
947
0
            false, false, false, false, false, false, false, false,
948
0
            false, false, false, false, false, false, false, false,
949
0
            false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
950
0
            false, false, false,  true,  true,  true,  true,  true, //()*+,-./
951
0
             true,  true,  true,  true,  true,  true,  true,  true, //01234567
952
0
             true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
953
0
             true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
954
0
             true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
955
0
             true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
956
0
             true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
957
0
             true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
958
0
             true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
959
0
             true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
960
0
             true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
961
0
          };
962
0
    return rtl::isAscii(nChar) && aMap[nChar];
963
0
}
964
965
// static
966
bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
967
                               const sal_Unicode * pEnd1,
968
                               const char * pString2)
969
865k
{
970
865k
    assert(pBegin1 && pBegin1 <= pEnd1 && pString2 &&
971
865k
               "INetMIME::equalIgnoreCase(): Bad sequences");
972
973
867k
    while (*pString2 != 0)
974
867k
        if (pBegin1 == pEnd1
975
46.0k
            || (rtl::toAsciiUpperCase(*pBegin1++)
976
46.0k
                != rtl::toAsciiUpperCase(
977
46.0k
                    static_cast<unsigned char>(*pString2++))))
978
865k
            return false;
979
0
    return pBegin1 == pEnd1;
980
865k
}
981
982
// static
983
bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
984
                            const sal_Unicode * pEnd, bool bLeadingZeroes,
985
                            sal_uInt32 & rValue)
986
2.05k
{
987
2.05k
    sal_uInt64 nTheValue = 0;
988
2.05k
    const sal_Unicode * p = rBegin;
989
7.76k
    for ( ; p != pEnd; ++p)
990
7.65k
    {
991
7.65k
        int nWeight = getWeight(*p);
992
7.65k
        if (nWeight < 0)
993
1.86k
            break;
994
5.78k
        nTheValue = 10 * nTheValue + nWeight;
995
5.78k
        if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
996
78
            return false;
997
5.78k
    }
998
1.98k
    if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
999
195
        return false;
1000
1.78k
    rBegin = p;
1001
1.78k
    rValue = sal_uInt32(nTheValue);
1002
1.78k
    return true;
1003
1.98k
}
1004
1005
// static
1006
sal_Unicode const * INetMIME::scanContentType(
1007
    std::u16string_view rStr, OUString * pType,
1008
    OUString * pSubType, INetContentTypeParameterList * pParameters)
1009
72.5k
{
1010
72.5k
    sal_Unicode const * pBegin = rStr.data();
1011
72.5k
    sal_Unicode const * pEnd = pBegin + rStr.size();
1012
72.5k
    sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1013
72.5k
    sal_Unicode const * pTypeBegin = p;
1014
412k
    while (p != pEnd && isTokenChar(*p))
1015
339k
    {
1016
339k
        ++p;
1017
339k
    }
1018
72.5k
    if (p == pTypeBegin)
1019
1.51k
        return nullptr;
1020
71.0k
    sal_Unicode const * pTypeEnd = p;
1021
1022
71.0k
    p = skipLinearWhiteSpaceComment(p, pEnd);
1023
71.0k
    if (p == pEnd || *p++ != '/')
1024
3.39k
        return nullptr;
1025
1026
67.6k
    p = skipLinearWhiteSpaceComment(p, pEnd);
1027
67.6k
    sal_Unicode const * pSubTypeBegin = p;
1028
467k
    while (p != pEnd && isTokenChar(*p))
1029
399k
    {
1030
399k
        ++p;
1031
399k
    }
1032
67.6k
    if (p == pSubTypeBegin)
1033
585
        return nullptr;
1034
67.0k
    sal_Unicode const * pSubTypeEnd = p;
1035
1036
67.0k
    if (pType != nullptr)
1037
43.1k
    {
1038
43.1k
        *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1039
43.1k
    }
1040
67.0k
    if (pSubType != nullptr)
1041
43.1k
    {
1042
43.1k
        *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1043
43.1k
            .toAsciiLowerCase();
1044
43.1k
    }
1045
1046
67.0k
    return scanParameters(p, pEnd, pParameters);
1047
67.6k
}
1048
1049
// static
1050
OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1051
0
{
1052
    // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1053
    // versions of StarOffice send mails with header fields where encoded
1054
    // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1055
    // '=', ',', '.', '"', ')', without any required white space in between.
1056
    // And there appear to exist some broken mailers that only encode single
1057
    // letters within words, like "Appel
1058
    // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1059
    // detect encoded words even when not properly surrounded by white space.
1060
1061
    // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1062
1063
    // encoded-word = "=?"
1064
    //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1065
    //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1066
    //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1067
    //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1068
    //     "?="
1069
1070
    // base64 = ALPHA / DIGIT / "+" / "/"
1071
1072
0
    const char * pBegin = rBody.getStr();
1073
0
    const char * pEnd = pBegin + rBody.getLength();
1074
1075
0
    OUStringBuffer sDecoded;
1076
0
    const char * pCopyBegin = pBegin;
1077
1078
    /* bool bStartEncodedWord = true; */
1079
0
    const char * pWSPBegin = pBegin;
1080
1081
0
    for (const char * p = pBegin; p != pEnd;)
1082
0
    {
1083
0
        if (*p == '=' /* && bStartEncodedWord */)
1084
0
        {
1085
0
            const char * q = p + 1;
1086
0
            bool bEncodedWord = q != pEnd && *q++ == '?';
1087
1088
0
            rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1089
0
            if (bEncodedWord)
1090
0
            {
1091
0
                const char * pCharsetBegin = q;
1092
0
                const char * pLanguageBegin = nullptr;
1093
0
                int nAlphaCount = 0;
1094
0
                for (bool bDone = false; !bDone;)
1095
0
                    if (q == pEnd)
1096
0
                    {
1097
0
                        bEncodedWord = false;
1098
0
                        bDone = true;
1099
0
                    }
1100
0
                    else
1101
0
                    {
1102
0
                        char cChar = *q++;
1103
0
                        switch (cChar)
1104
0
                        {
1105
0
                            case '*':
1106
0
                                pLanguageBegin = q - 1;
1107
0
                                nAlphaCount = 0;
1108
0
                                break;
1109
1110
0
                            case '-':
1111
0
                                if (pLanguageBegin != nullptr)
1112
0
                                {
1113
0
                                    if (nAlphaCount == 0)
1114
0
                                        pLanguageBegin = nullptr;
1115
0
                                    else
1116
0
                                        nAlphaCount = 0;
1117
0
                                }
1118
0
                                break;
1119
1120
0
                            case '?':
1121
0
                                if (pCharsetBegin == q - 1)
1122
0
                                    bEncodedWord = false;
1123
0
                                else
1124
0
                                {
1125
0
                                    eCharsetEncoding
1126
0
                                        = getCharsetEncoding(
1127
0
                                              pCharsetBegin,
1128
0
                                              pLanguageBegin == nullptr
1129
0
                                              || nAlphaCount == 0 ?
1130
0
                                                  q - 1 : pLanguageBegin);
1131
0
                                    bEncodedWord = isMIMECharsetEncoding(
1132
0
                                                       eCharsetEncoding);
1133
0
                                    eCharsetEncoding
1134
0
                                        = translateFromMIME(eCharsetEncoding);
1135
0
                                }
1136
0
                                bDone = true;
1137
0
                                break;
1138
1139
0
                            default:
1140
0
                                if (pLanguageBegin != nullptr
1141
0
                                    && (!rtl::isAsciiAlpha(
1142
0
                                            static_cast<unsigned char>(cChar))
1143
0
                                        || ++nAlphaCount > 8))
1144
0
                                    pLanguageBegin = nullptr;
1145
0
                                break;
1146
0
                        }
1147
0
                    }
1148
0
            }
1149
1150
0
            bool bEncodingB = false;
1151
0
            if (bEncodedWord)
1152
0
            {
1153
0
                if (q == pEnd)
1154
0
                    bEncodedWord = false;
1155
0
                else
1156
0
                {
1157
0
                    switch (*q++)
1158
0
                    {
1159
0
                        case 'B':
1160
0
                        case 'b':
1161
0
                            bEncodingB = true;
1162
0
                            break;
1163
1164
0
                        case 'Q':
1165
0
                        case 'q':
1166
0
                            bEncodingB = false;
1167
0
                            break;
1168
1169
0
                        default:
1170
0
                            bEncodedWord = false;
1171
0
                            break;
1172
0
                    }
1173
0
                }
1174
0
            }
1175
1176
0
            bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1177
1178
0
            OStringBuffer sText;
1179
0
            if (bEncodedWord)
1180
0
            {
1181
0
                if (bEncodingB)
1182
0
                {
1183
0
                    for (bool bDone = false; !bDone;)
1184
0
                    {
1185
0
                        if (pEnd - q < 4)
1186
0
                        {
1187
0
                            bEncodedWord = false;
1188
0
                            bDone = true;
1189
0
                        }
1190
0
                        else
1191
0
                        {
1192
0
                            bool bFinal = false;
1193
0
                            int nCount = 3;
1194
0
                            sal_uInt32 nValue = 0;
1195
0
                            for (int nShift = 18; nShift >= 0; nShift -= 6)
1196
0
                            {
1197
0
                                int nWeight = getBase64Weight(*q++);
1198
0
                                if (nWeight == -2)
1199
0
                                {
1200
0
                                    bEncodedWord = false;
1201
0
                                    bDone = true;
1202
0
                                    break;
1203
0
                                }
1204
0
                                if (nWeight == -1)
1205
0
                                {
1206
0
                                    if (!bFinal)
1207
0
                                    {
1208
0
                                        if (nShift >= 12)
1209
0
                                        {
1210
0
                                            bEncodedWord = false;
1211
0
                                            bDone = true;
1212
0
                                            break;
1213
0
                                        }
1214
0
                                        bFinal = true;
1215
0
                                        nCount = nShift == 6 ? 1 : 2;
1216
0
                                    }
1217
0
                                }
1218
0
                                else
1219
0
                                    nValue |= nWeight << nShift;
1220
0
                            }
1221
0
                            if (bEncodedWord)
1222
0
                            {
1223
0
                                for (int nShift = 16; nCount-- > 0; nShift -= 8)
1224
0
                                    sText.append(char(nValue >> nShift & 0xFF));
1225
0
                                if (*q == '?')
1226
0
                                {
1227
0
                                    ++q;
1228
0
                                    bDone = true;
1229
0
                                }
1230
0
                                if (bFinal && !bDone)
1231
0
                                {
1232
0
                                    bEncodedWord = false;
1233
0
                                    bDone = true;
1234
0
                                }
1235
0
                            }
1236
0
                        }
1237
0
                    }
1238
0
                }
1239
0
                else
1240
0
                {
1241
0
                    const char * pEncodedTextBegin = q;
1242
0
                    const char * pEncodedTextCopyBegin = q;
1243
0
                    for (bool bDone = false; !bDone;)
1244
0
                        if (q == pEnd)
1245
0
                        {
1246
0
                            bEncodedWord = false;
1247
0
                            bDone = true;
1248
0
                        }
1249
0
                        else
1250
0
                        {
1251
0
                            sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1252
0
                            switch (nChar)
1253
0
                            {
1254
0
                                case '=':
1255
0
                                {
1256
0
                                    if (pEnd - q < 2)
1257
0
                                    {
1258
0
                                        bEncodedWord = false;
1259
0
                                        bDone = true;
1260
0
                                        break;
1261
0
                                    }
1262
0
                                    int nDigit1 = getHexWeight(q[0]);
1263
0
                                    int nDigit2 = getHexWeight(q[1]);
1264
0
                                    if (nDigit1 < 0 || nDigit2 < 0)
1265
0
                                    {
1266
0
                                        bEncodedWord = false;
1267
0
                                        bDone = true;
1268
0
                                        break;
1269
0
                                    }
1270
0
                                    sText.append(
1271
0
                                        rBody.subView(
1272
0
                                            (pEncodedTextCopyBegin - pBegin),
1273
0
                                            (q - 1 - pEncodedTextCopyBegin))
1274
0
                                        + OStringChar(char(nDigit1 << 4 | nDigit2)));
1275
0
                                    q += 2;
1276
0
                                    pEncodedTextCopyBegin = q;
1277
0
                                    break;
1278
0
                                }
1279
1280
0
                                case '?':
1281
0
                                    if (q - pEncodedTextBegin > 1)
1282
0
                                        sText.append(rBody.subView(
1283
0
                                            (pEncodedTextCopyBegin - pBegin),
1284
0
                                            (q - 1 - pEncodedTextCopyBegin)));
1285
0
                                    else
1286
0
                                        bEncodedWord = false;
1287
0
                                    bDone = true;
1288
0
                                    break;
1289
1290
0
                                case '_':
1291
0
                                    sText.append(
1292
0
                                        rBody.subView(
1293
0
                                            (pEncodedTextCopyBegin - pBegin),
1294
0
                                            (q - 1 - pEncodedTextCopyBegin))
1295
0
                                        + OString::Concat(" "));
1296
0
                                    pEncodedTextCopyBegin = q;
1297
0
                                    break;
1298
1299
0
                                default:
1300
0
                                    if (!isVisible(nChar))
1301
0
                                    {
1302
0
                                        bEncodedWord = false;
1303
0
                                        bDone = true;
1304
0
                                    }
1305
0
                                    break;
1306
0
                            }
1307
0
                        }
1308
0
                }
1309
0
            }
1310
1311
0
            bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1312
1313
0
            std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1314
0
            sal_Size nUnicodeSize = 0;
1315
0
            if (bEncodedWord)
1316
0
            {
1317
0
                pUnicodeBuffer
1318
0
                    = convertToUnicode(sText.getStr(),
1319
0
                                       sText.getStr() + sText.getLength(),
1320
0
                                       eCharsetEncoding, nUnicodeSize);
1321
0
                if (!pUnicodeBuffer)
1322
0
                    bEncodedWord = false;
1323
0
            }
1324
1325
0
            if (bEncodedWord)
1326
0
            {
1327
0
                appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1328
0
                sDecoded.append(
1329
0
                    pUnicodeBuffer.get(),
1330
0
                    static_cast< sal_Int32 >(nUnicodeSize));
1331
0
                pUnicodeBuffer.reset();
1332
0
                p = q;
1333
0
                pCopyBegin = p;
1334
1335
0
                pWSPBegin = p;
1336
0
                while (p != pEnd && isWhiteSpace(*p))
1337
0
                    ++p;
1338
                /* bStartEncodedWord = p != pWSPBegin; */
1339
0
                continue;
1340
0
            }
1341
0
        }
1342
1343
0
        if (p == pEnd)
1344
0
            break;
1345
1346
0
        switch (*p++)
1347
0
        {
1348
0
            case '"':
1349
                /* bStartEncodedWord = true; */
1350
0
                break;
1351
1352
0
            case '(':
1353
                /* bStartEncodedWord = true; */
1354
0
                break;
1355
1356
0
            case ')':
1357
                /* bStartEncodedWord = false; */
1358
0
                break;
1359
1360
0
            default:
1361
0
            {
1362
0
                const char * pUTF8Begin = p - 1;
1363
0
                const char * pUTF8End = pUTF8Begin;
1364
0
                sal_uInt32 nCharacter = 0;
1365
0
                if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1366
0
                {
1367
0
                    appendISO88591(sDecoded, pCopyBegin, p - 1);
1368
0
                    sDecoded.appendUtf32(nCharacter);
1369
0
                    p = pUTF8End;
1370
0
                    pCopyBegin = p;
1371
0
                }
1372
                /* bStartEncodedWord = false; */
1373
0
                break;
1374
0
            }
1375
0
        }
1376
0
        pWSPBegin = p;
1377
0
    }
1378
1379
0
    appendISO88591(sDecoded, pCopyBegin, pEnd);
1380
0
    return sDecoded.makeStringAndClear();
1381
0
}
1382
1383
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */