/src/libreoffice/tools/source/inet/inetmime.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <algorithm> |
21 | | #include <limits> |
22 | | #include <forward_list> |
23 | | #include <memory> |
24 | | |
25 | | #include <sal/log.hxx> |
26 | | #include <rtl/ustring.hxx> |
27 | | #include <rtl/strbuf.hxx> |
28 | | #include <rtl/ustrbuf.hxx> |
29 | | #include <rtl/tencinfo.h> |
30 | | #include <tools/debug.hxx> |
31 | | #include <tools/inetmime.hxx> |
32 | | #include <rtl/character.hxx> |
33 | | |
34 | | namespace { |
35 | | |
36 | | rtl_TextEncoding getCharsetEncoding(const char * pBegin, |
37 | | const char * pEnd); |
38 | | |
39 | | /** Check for US-ASCII white space character. |
40 | | |
41 | | @param nChar Some UCS-4 character. |
42 | | |
43 | | @return True if nChar is a US-ASCII white space character (US-ASCII |
44 | | 0x09 or 0x20). |
45 | | */ |
46 | | bool isWhiteSpace(sal_uInt32 nChar) |
47 | 0 | { |
48 | 0 | return nChar == '\t' || nChar == ' '; |
49 | 0 | } |
50 | | |
51 | | /** Get the Base 64 digit weight of a US-ASCII character. |
52 | | |
53 | | @param nChar Some UCS-4 character. |
54 | | |
55 | | @return If nChar is a US-ASCII Base 64 digit character (US-ASCII |
56 | | 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the |
57 | | corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding |
58 | | character (US-ASCII '='), return -1; otherwise, return -2. |
59 | | */ |
60 | | int getBase64Weight(sal_uInt32 nChar) |
61 | 0 | { |
62 | 0 | return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') : |
63 | 0 | rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) : |
64 | 0 | rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) : |
65 | 0 | nChar == '+' ? 62 : |
66 | 0 | nChar == '/' ? 63 : |
67 | 0 | nChar == '=' ? -1 : -2; |
68 | 0 | } |
69 | | |
70 | | bool startsWithLineFolding(const sal_Unicode * pBegin, |
71 | | const sal_Unicode * pEnd) |
72 | 0 | { |
73 | 0 | assert(pBegin && pBegin <= pEnd && "startsWithLineFolding(): Bad sequence"); |
74 | |
|
75 | 0 | return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A |
76 | 0 | && isWhiteSpace(pBegin[2]); // CR, LF |
77 | 0 | } |
78 | | |
79 | | rtl_TextEncoding translateFromMIME(rtl_TextEncoding |
80 | | eEncoding) |
81 | 0 | { |
82 | | #if defined(_WIN32) |
83 | | return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ? |
84 | | RTL_TEXTENCODING_MS_1252 : eEncoding; |
85 | | #else |
86 | 0 | return eEncoding; |
87 | 0 | #endif |
88 | 0 | } |
89 | | |
90 | | bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding) |
91 | 0 | { |
92 | 0 | return rtl_isOctetTextEncoding(eEncoding); |
93 | 0 | } |
94 | | |
95 | | std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin, |
96 | | const char * pEnd, |
97 | | rtl_TextEncoding eEncoding, |
98 | | sal_Size & rSize) |
99 | 49.2k | { |
100 | 49.2k | if (eEncoding == RTL_TEXTENCODING_DONTKNOW) |
101 | 643 | return nullptr; |
102 | 48.5k | rtl_TextToUnicodeConverter hConverter |
103 | 48.5k | = rtl_createTextToUnicodeConverter(eEncoding); |
104 | 48.5k | rtl_TextToUnicodeContext hContext |
105 | 48.5k | = rtl_createTextToUnicodeContext(hConverter); |
106 | 48.5k | std::unique_ptr<sal_Unicode[]> pBuffer; |
107 | 48.5k | sal_uInt32 nInfo; |
108 | 48.5k | for (sal_Size nBufferSize = pEnd - pBegin;; |
109 | 48.5k | nBufferSize += nBufferSize / 3 + 1) |
110 | 48.5k | { |
111 | 48.5k | pBuffer.reset(new sal_Unicode[nBufferSize]); |
112 | 48.5k | sal_Size nSrcCvtBytes; |
113 | 48.5k | rSize = rtl_convertTextToUnicode( |
114 | 48.5k | hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(), |
115 | 48.5k | nBufferSize, |
116 | 48.5k | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR |
117 | 48.5k | | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR |
118 | 48.5k | | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, |
119 | 48.5k | &nInfo, &nSrcCvtBytes); |
120 | 48.5k | if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL) |
121 | 48.5k | break; |
122 | 0 | pBuffer.reset(); |
123 | 0 | rtl_resetTextToUnicodeContext(hConverter, hContext); |
124 | 0 | } |
125 | 48.5k | rtl_destroyTextToUnicodeContext(hConverter, hContext); |
126 | 48.5k | rtl_destroyTextToUnicodeConverter(hConverter); |
127 | 48.5k | if (nInfo != 0) |
128 | 78 | { |
129 | 78 | pBuffer.reset(); |
130 | 78 | } |
131 | 48.5k | return pBuffer; |
132 | 49.2k | } |
133 | | |
134 | | void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar) |
135 | 50.7k | { |
136 | | // See RFC 2279 for a discussion of UTF-8. |
137 | 50.7k | DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char"); |
138 | | |
139 | 50.7k | if (nChar < 0x80) |
140 | 45.7k | rSink.append(char(nChar)); |
141 | 5.03k | else if (nChar < 0x800) |
142 | 4.16k | rSink.append(OStringChar(char(nChar >> 6 | 0xC0)) |
143 | 4.16k | + OStringChar(char((nChar & 0x3F) | 0x80))); |
144 | 870 | else if (nChar < 0x10000) |
145 | 686 | rSink.append( |
146 | 686 | OStringChar(char(nChar >> 12 | 0xE0)) |
147 | 686 | + OStringChar(char((nChar >> 6 & 0x3F) | 0x80)) |
148 | 686 | + OStringChar(char((nChar & 0x3F) | 0x80))); |
149 | 184 | else if (nChar < 0x200000) |
150 | 184 | rSink.append( |
151 | 184 | OStringChar(char(nChar >> 18 | 0xF0)) |
152 | 184 | + OStringChar(char((nChar >> 12 & 0x3F) | 0x80)) |
153 | 184 | + OStringChar(char((nChar >> 6 & 0x3F) | 0x80)) |
154 | 184 | + OStringChar(char((nChar & 0x3F) | 0x80))); |
155 | 0 | else if (nChar < 0x4000000) |
156 | 0 | rSink.append( |
157 | 0 | OStringChar(char(nChar >> 24 | 0xF8)) |
158 | 0 | + OStringChar(char((nChar >> 18 & 0x3F) | 0x80)) |
159 | 0 | + OStringChar(char((nChar >> 12 & 0x3F) | 0x80)) |
160 | 0 | + OStringChar(char((nChar >> 6 & 0x3F) | 0x80)) |
161 | 0 | + OStringChar(char((nChar & 0x3F) | 0x80))); |
162 | 0 | else |
163 | 0 | rSink.append( |
164 | 0 | OStringChar(char(nChar >> 30 | 0xFC)) |
165 | 0 | + OStringChar(char((nChar >> 24 & 0x3F) | 0x80)) |
166 | 0 | + OStringChar(char((nChar >> 18 & 0x3F) | 0x80)) |
167 | 0 | + OStringChar(char((nChar >> 12 & 0x3F) | 0x80)) |
168 | 0 | + OStringChar(char((nChar >> 6 & 0x3F) | 0x80)) |
169 | 0 | + OStringChar(char((nChar & 0x3F) | 0x80))); |
170 | 50.7k | } |
171 | | |
172 | | bool translateUTF8Char(const char *& rBegin, |
173 | | const char * pEnd, |
174 | | sal_uInt32 & rCharacter) |
175 | 0 | { |
176 | 0 | if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80 |
177 | 0 | || static_cast< unsigned char >(*rBegin) >= 0xFE) |
178 | 0 | return false; |
179 | | |
180 | 0 | int nCount; |
181 | 0 | sal_uInt32 nMin; |
182 | 0 | sal_uInt32 nUCS4; |
183 | 0 | const char * p = rBegin; |
184 | 0 | if (static_cast< unsigned char >(*p) < 0xE0) |
185 | 0 | { |
186 | 0 | nCount = 1; |
187 | 0 | nMin = 0x80; |
188 | 0 | nUCS4 = static_cast< unsigned char >(*p) & 0x1F; |
189 | 0 | } |
190 | 0 | else if (static_cast< unsigned char >(*p) < 0xF0) |
191 | 0 | { |
192 | 0 | nCount = 2; |
193 | 0 | nMin = 0x800; |
194 | 0 | nUCS4 = static_cast< unsigned char >(*p) & 0xF; |
195 | 0 | } |
196 | 0 | else if (static_cast< unsigned char >(*p) < 0xF8) |
197 | 0 | { |
198 | 0 | nCount = 3; |
199 | 0 | nMin = 0x10000; |
200 | 0 | nUCS4 = static_cast< unsigned char >(*p) & 7; |
201 | 0 | } |
202 | 0 | else if (static_cast< unsigned char >(*p) < 0xFC) |
203 | 0 | { |
204 | 0 | nCount = 4; |
205 | 0 | nMin = 0x200000; |
206 | 0 | nUCS4 = static_cast< unsigned char >(*p) & 3; |
207 | 0 | } |
208 | 0 | else |
209 | 0 | { |
210 | 0 | nCount = 5; |
211 | 0 | nMin = 0x4000000; |
212 | 0 | nUCS4 = static_cast< unsigned char >(*p) & 1; |
213 | 0 | } |
214 | 0 | ++p; |
215 | |
|
216 | 0 | for (; nCount-- > 0; ++p) |
217 | 0 | if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80) |
218 | 0 | nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F); |
219 | 0 | else |
220 | 0 | return false; |
221 | | |
222 | 0 | if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin) |
223 | 0 | return false; |
224 | | |
225 | 0 | rCharacter = nUCS4; |
226 | 0 | rBegin = p; |
227 | 0 | return true; |
228 | 0 | } |
229 | | |
230 | | void appendISO88591(OUStringBuffer & rText, char const * pBegin, |
231 | | char const * pEnd); |
232 | | |
233 | | struct Parameter |
234 | | { |
235 | | OString m_aAttribute; |
236 | | OString m_aCharset; |
237 | | OString m_aLanguage; |
238 | | OString m_aValue; |
239 | | sal_uInt32 m_nSection; |
240 | | bool m_bExtended; |
241 | | |
242 | | bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort |
243 | 25.4k | { |
244 | 25.4k | int nComp = m_aAttribute.compareTo(rhs.m_aAttribute); |
245 | 25.4k | return nComp < 0 || |
246 | 13.5k | (nComp == 0 && m_nSection < rhs.m_nSection); |
247 | 25.4k | } |
248 | | struct IsSameSection // is used to check container for duplicates with std::any_of |
249 | | { |
250 | | const OString& rAttribute; |
251 | | const sal_uInt32 nSection; |
252 | | bool operator()(const Parameter& r) const |
253 | 66.1k | { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; } |
254 | | }; |
255 | | }; |
256 | | |
257 | | typedef std::forward_list<Parameter> ParameterList; |
258 | | |
259 | | bool parseParameters(ParameterList const & rInput, |
260 | | INetContentTypeParameterList * pOutput); |
261 | | |
262 | | // appendISO88591 |
263 | | |
264 | | void appendISO88591(OUStringBuffer & rText, char const * pBegin, |
265 | | char const * pEnd) |
266 | 0 | { |
267 | 0 | sal_Int32 nLength = pEnd - pBegin; |
268 | 0 | std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]); |
269 | 0 | for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;) |
270 | 0 | *p++ = static_cast<unsigned char>(*pBegin++); |
271 | 0 | rText.append(pBuffer.get(), nLength); |
272 | 0 | } |
273 | | |
274 | | // parseParameters |
275 | | |
276 | | bool parseParameters(ParameterList const & rInput, |
277 | | INetContentTypeParameterList * pOutput) |
278 | 67.0k | { |
279 | 67.0k | if (pOutput) |
280 | 43.1k | pOutput->clear(); |
281 | | |
282 | 116k | for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++) |
283 | 49.7k | { |
284 | 49.7k | if (it->m_nSection > 0 |
285 | 419 | && (itPrev == rInput.end() |
286 | 143 | || itPrev->m_nSection != it->m_nSection - 1 |
287 | 77 | || itPrev->m_aAttribute != it->m_aAttribute)) |
288 | 360 | return false; |
289 | 49.7k | } |
290 | | |
291 | 66.7k | if (pOutput) |
292 | 91.9k | for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext) |
293 | 49.1k | { |
294 | 49.1k | bool bCharset = !it->m_aCharset.isEmpty(); |
295 | 49.1k | rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW; |
296 | 49.1k | if (bCharset) |
297 | 646 | eEncoding |
298 | 646 | = getCharsetEncoding(it->m_aCharset.getStr(), |
299 | 646 | it->m_aCharset.getStr() |
300 | 646 | + it->m_aCharset.getLength()); |
301 | 49.1k | OUStringBuffer aValue(64); |
302 | 49.1k | bool bBadEncoding = false; |
303 | 49.1k | itNext = it; |
304 | 49.1k | do |
305 | 49.1k | { |
306 | 49.1k | sal_Size nSize; |
307 | 49.1k | std::unique_ptr<sal_Unicode[]> pUnicode |
308 | 49.1k | = convertToUnicode(itNext->m_aValue.getStr(), |
309 | 49.1k | itNext->m_aValue.getStr() |
310 | 49.1k | + itNext->m_aValue.getLength(), |
311 | 49.1k | bCharset && it->m_bExtended ? |
312 | 646 | eEncoding : |
313 | 49.1k | RTL_TEXTENCODING_UTF8, |
314 | 49.1k | nSize); |
315 | 49.1k | if (!pUnicode && !(bCharset && it->m_bExtended)) |
316 | 78 | pUnicode = convertToUnicode( |
317 | 78 | itNext->m_aValue.getStr(), |
318 | 78 | itNext->m_aValue.getStr() |
319 | 78 | + itNext->m_aValue.getLength(), |
320 | 78 | RTL_TEXTENCODING_ISO_8859_1, nSize); |
321 | 49.1k | if (!pUnicode) |
322 | 643 | { |
323 | 643 | bBadEncoding = true; |
324 | 643 | break; |
325 | 643 | } |
326 | 48.5k | aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize)); |
327 | 48.5k | ++itNext; |
328 | 48.5k | } |
329 | 49.1k | while (itNext != rInput.end() && itNext->m_nSection != 0); |
330 | | |
331 | 49.1k | if (bBadEncoding) |
332 | 643 | { |
333 | 643 | aValue.setLength(0); |
334 | 643 | itNext = it; |
335 | 643 | do |
336 | 663 | { |
337 | 663 | if (itNext->m_bExtended) |
338 | 663 | { |
339 | 18.0k | for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i) |
340 | 17.3k | aValue.append( |
341 | 17.3k | static_cast<sal_Unicode>( |
342 | 17.3k | static_cast<unsigned char>(itNext->m_aValue[i]) |
343 | 17.3k | | 0xF800)); // map to unicode corporate use sub area |
344 | 663 | } |
345 | 0 | else |
346 | 0 | { |
347 | 0 | for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i) |
348 | 0 | aValue.append( itNext->m_aValue[i] ); |
349 | 0 | } |
350 | 663 | ++itNext; |
351 | 663 | } |
352 | 663 | while (itNext != rInput.end() && itNext->m_nSection != 0); |
353 | 643 | } |
354 | 49.1k | auto const ret = pOutput->insert( |
355 | 49.1k | {it->m_aAttribute, |
356 | 49.1k | {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}}); |
357 | 49.1k | SAL_INFO_IF(!ret.second, "tools", |
358 | 49.1k | "INetMIME: dropping duplicate parameter: " << it->m_aAttribute); |
359 | 49.1k | } |
360 | 66.7k | return true; |
361 | 66.7k | } |
362 | | |
363 | | /** Check whether some character is valid within an RFC 2045 <token>. |
364 | | |
365 | | @param nChar Some UCS-4 character. |
366 | | |
367 | | @return True if nChar is valid within an RFC 2047 <token> (US-ASCII |
368 | | 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+', |
369 | | '-', '.', '^', '_', '`', '{', '|', '}', or '~'). |
370 | | */ |
371 | | bool isTokenChar(sal_uInt32 nChar) |
372 | 1.65M | { |
373 | 1.65M | static const bool aMap[128] |
374 | 1.65M | = { false, false, false, false, false, false, false, false, |
375 | 1.65M | false, false, false, false, false, false, false, false, |
376 | 1.65M | false, false, false, false, false, false, false, false, |
377 | 1.65M | false, false, false, false, false, false, false, false, |
378 | 1.65M | false, true, false, true, true, true, true, true, // !"#$%&' |
379 | 1.65M | false, false, true, true, false, true, true, false, //()*+,-./ |
380 | 1.65M | true, true, true, true, true, true, true, true, //01234567 |
381 | 1.65M | true, true, false, false, false, false, false, false, //89:;<=>? |
382 | 1.65M | false, true, true, true, true, true, true, true, //@ABCDEFG |
383 | 1.65M | true, true, true, true, true, true, true, true, //HIJKLMNO |
384 | 1.65M | true, true, true, true, true, true, true, true, //PQRSTUVW |
385 | 1.65M | true, true, true, false, false, false, true, true, //XYZ[\]^_ |
386 | 1.65M | true, true, true, true, true, true, true, true, //`abcdefg |
387 | 1.65M | true, true, true, true, true, true, true, true, //hijklmno |
388 | 1.65M | true, true, true, true, true, true, true, true, //pqrstuvw |
389 | 1.65M | true, true, true, true, true, true, true, false //xyz{|}~ |
390 | 1.65M | }; |
391 | 1.65M | return rtl::isAscii(nChar) && aMap[nChar]; |
392 | 1.65M | } |
393 | | |
394 | | const sal_Unicode * skipComment(const sal_Unicode * pBegin, |
395 | | const sal_Unicode * pEnd) |
396 | 311 | { |
397 | 311 | assert(pBegin && pBegin <= pEnd && "skipComment(): Bad sequence"); |
398 | | |
399 | 311 | if (pBegin != pEnd && *pBegin == '(') |
400 | 311 | { |
401 | 311 | sal_uInt32 nLevel = 0; |
402 | 15.1k | for (const sal_Unicode * p = pBegin; p != pEnd;) |
403 | 14.9k | switch (*p++) |
404 | 14.9k | { |
405 | 1.45k | case '(': |
406 | 1.45k | ++nLevel; |
407 | 1.45k | break; |
408 | | |
409 | 313 | case ')': |
410 | 313 | if (--nLevel == 0) |
411 | 94 | return p; |
412 | 219 | break; |
413 | | |
414 | 344 | case '\\': |
415 | 344 | if (p != pEnd) |
416 | 306 | ++p; |
417 | 344 | break; |
418 | 14.9k | } |
419 | 311 | } |
420 | 217 | return pBegin; |
421 | 311 | } |
422 | | |
423 | | const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode * |
424 | | pBegin, |
425 | | const sal_Unicode * |
426 | | pEnd) |
427 | 494k | { |
428 | 494k | assert(pBegin && pBegin <= pEnd && "skipLinearWhiteSpaceComment(): Bad sequence"); |
429 | | |
430 | 540k | while (pBegin != pEnd) |
431 | 500k | switch (*pBegin) |
432 | 500k | { |
433 | 139 | case '\t': |
434 | 45.8k | case ' ': |
435 | 45.8k | ++pBegin; |
436 | 45.8k | break; |
437 | | |
438 | 0 | case 0x0D: // CR |
439 | 0 | if (startsWithLineFolding(pBegin, pEnd)) |
440 | 0 | pBegin += 3; |
441 | 0 | else |
442 | 0 | return pBegin; |
443 | 0 | break; |
444 | | |
445 | 311 | case '(': |
446 | 311 | { |
447 | 311 | const sal_Unicode * p = skipComment(pBegin, pEnd); |
448 | 311 | if (p == pBegin) |
449 | 217 | return pBegin; |
450 | 94 | pBegin = p; |
451 | 94 | break; |
452 | 311 | } |
453 | | |
454 | 454k | default: |
455 | 454k | return pBegin; |
456 | 500k | } |
457 | 39.2k | return pBegin; |
458 | 494k | } |
459 | | |
460 | | const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin, |
461 | | const sal_Unicode * pEnd) |
462 | 40 | { |
463 | 40 | assert(pBegin && pBegin <= pEnd && "skipQuotedString(): Bad sequence"); |
464 | | |
465 | 40 | if (pBegin != pEnd && *pBegin == '"') |
466 | 1.49k | for (const sal_Unicode * p = pBegin + 1; p != pEnd;) |
467 | 1.49k | switch (*p++) |
468 | 1.49k | { |
469 | 0 | case 0x0D: // CR |
470 | 0 | if (pEnd - p < 2 || *p++ != 0x0A // LF |
471 | 0 | || !isWhiteSpace(*p++)) |
472 | 0 | return pBegin; |
473 | 0 | break; |
474 | | |
475 | 33 | case '"': |
476 | 33 | return p; |
477 | | |
478 | 1 | case '\\': |
479 | 1 | if (p != pEnd) |
480 | 1 | ++p; |
481 | 1 | break; |
482 | 1.49k | } |
483 | 7 | return pBegin; |
484 | 40 | } |
485 | | |
486 | | sal_Unicode const * scanParameters(sal_Unicode const * pBegin, |
487 | | sal_Unicode const * pEnd, |
488 | | INetContentTypeParameterList * |
489 | | pParameters) |
490 | 67.0k | { |
491 | 67.0k | ParameterList aList; |
492 | 67.0k | sal_Unicode const * pParameterBegin = pBegin; |
493 | 67.0k | for (sal_Unicode const * p = pParameterBegin;;) |
494 | 117k | { |
495 | 117k | pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd); |
496 | 117k | if (pParameterBegin == pEnd || *pParameterBegin != ';') |
497 | 60.0k | break; |
498 | 57.2k | p = pParameterBegin + 1; |
499 | | |
500 | 57.2k | sal_Unicode const * pAttributeBegin |
501 | 57.2k | = skipLinearWhiteSpaceComment(p, pEnd); |
502 | 57.2k | p = pAttributeBegin; |
503 | 57.2k | bool bDowncaseAttribute = false; |
504 | 449k | while (p != pEnd && isTokenChar(*p) && *p != '*') |
505 | 391k | { |
506 | 391k | bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p); |
507 | 391k | ++p; |
508 | 391k | } |
509 | 57.2k | if (p == pAttributeBegin) |
510 | 204 | break; |
511 | 57.0k | OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US); |
512 | 57.0k | if (bDowncaseAttribute) |
513 | 13.1k | aAttribute = aAttribute.toAsciiLowerCase(); |
514 | | |
515 | 57.0k | sal_uInt32 nSection = 0; |
516 | 57.0k | if (p != pEnd && *p == '*') |
517 | 3.83k | { |
518 | 3.83k | ++p; |
519 | 3.83k | if (p != pEnd && rtl::isAsciiDigit(*p) |
520 | 2.05k | && !INetMIME::scanUnsigned(p, pEnd, false, nSection)) |
521 | 273 | break; |
522 | 3.83k | } |
523 | | |
524 | 56.7k | bool bPresent = std::any_of(aList.begin(), aList.end(), |
525 | 56.7k | Parameter::IsSameSection{aAttribute, nSection}); |
526 | 56.7k | if (bPresent) |
527 | 148 | break; |
528 | | |
529 | 56.5k | bool bExtended = false; |
530 | 56.5k | if (p != pEnd && *p == '*') |
531 | 1.25k | { |
532 | 1.25k | ++p; |
533 | 1.25k | bExtended = true; |
534 | 1.25k | } |
535 | | |
536 | 56.5k | p = skipLinearWhiteSpaceComment(p, pEnd); |
537 | | |
538 | 56.5k | if (p == pEnd || *p != '=') |
539 | 4.84k | break; |
540 | | |
541 | 51.7k | p = skipLinearWhiteSpaceComment(p + 1, pEnd); |
542 | | |
543 | 51.7k | OString aCharset; |
544 | 51.7k | OString aLanguage; |
545 | 51.7k | OString aValue; |
546 | 51.7k | if (bExtended) |
547 | 1.17k | { |
548 | 1.17k | if (nSection == 0) |
549 | 1.13k | { |
550 | 1.13k | sal_Unicode const * pCharsetBegin = p; |
551 | 1.13k | bool bDowncaseCharset = false; |
552 | 27.2k | while (p != pEnd && isTokenChar(*p) && *p != '\'') |
553 | 26.1k | { |
554 | 26.1k | bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p); |
555 | 26.1k | ++p; |
556 | 26.1k | } |
557 | 1.13k | if (p == pCharsetBegin) |
558 | 10 | break; |
559 | 1.12k | if (pParameters) |
560 | 1.12k | { |
561 | 1.12k | aCharset = OString( |
562 | 1.12k | pCharsetBegin, |
563 | 1.12k | p - pCharsetBegin, |
564 | 1.12k | RTL_TEXTENCODING_ASCII_US); |
565 | 1.12k | if (bDowncaseCharset) |
566 | 90 | aCharset = aCharset.toAsciiLowerCase(); |
567 | 1.12k | } |
568 | | |
569 | 1.12k | if (p == pEnd || *p != '\'') |
570 | 264 | break; |
571 | 858 | ++p; |
572 | | |
573 | 858 | sal_Unicode const * pLanguageBegin = p; |
574 | 858 | bool bDowncaseLanguage = false; |
575 | 858 | int nLetters = 0; |
576 | 2.87k | for (; p != pEnd; ++p) |
577 | 2.83k | if (rtl::isAsciiAlpha(*p)) |
578 | 2.07k | { |
579 | 2.07k | if (++nLetters > 8) |
580 | 71 | break; |
581 | 2.00k | bDowncaseLanguage = bDowncaseLanguage |
582 | 1.44k | || rtl::isAsciiUpperCase(*p); |
583 | 2.00k | } |
584 | 761 | else if (*p == '-') |
585 | 44 | { |
586 | 44 | if (nLetters == 0) |
587 | 34 | break; |
588 | 10 | nLetters = 0; |
589 | 10 | } |
590 | 717 | else |
591 | 717 | break; |
592 | 858 | if (nLetters == 0 || nLetters > 8) |
593 | 129 | break; |
594 | 729 | if (pParameters) |
595 | 729 | { |
596 | 729 | aLanguage = OString( |
597 | 729 | pLanguageBegin, |
598 | 729 | p - pLanguageBegin, |
599 | 729 | RTL_TEXTENCODING_ASCII_US); |
600 | 729 | if (bDowncaseLanguage) |
601 | 679 | aLanguage = aLanguage.toAsciiLowerCase(); |
602 | 729 | } |
603 | | |
604 | 729 | if (p == pEnd || *p != '\'') |
605 | 83 | break; |
606 | 646 | ++p; |
607 | 646 | } |
608 | 690 | if (pParameters) |
609 | 690 | { |
610 | 690 | OStringBuffer aSink; |
611 | 16.6k | while (p != pEnd) |
612 | 16.4k | { |
613 | 16.4k | auto q = p; |
614 | 16.4k | sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd); |
615 | 16.4k | if (rtl::isAscii(nChar) && !isTokenChar(nChar)) |
616 | 528 | break; |
617 | 15.9k | p = q; |
618 | 15.9k | if (nChar == '%' && p + 1 < pEnd) |
619 | 8.34k | { |
620 | 8.34k | int nWeight1 = INetMIME::getHexWeight(p[0]); |
621 | 8.34k | int nWeight2 = INetMIME::getHexWeight(p[1]); |
622 | 8.34k | if (nWeight1 >= 0 && nWeight2 >= 0) |
623 | 95 | { |
624 | 95 | aSink.append(char(nWeight1 << 4 | nWeight2)); |
625 | 95 | p += 2; |
626 | 95 | continue; |
627 | 95 | } |
628 | 8.34k | } |
629 | 15.8k | writeUTF8(aSink, nChar); |
630 | 15.8k | } |
631 | 690 | aValue = aSink.makeStringAndClear(); |
632 | 690 | } |
633 | 0 | else |
634 | 0 | while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p))) |
635 | 0 | ++p; |
636 | 690 | } |
637 | 50.5k | else if (p != pEnd && *p == '"') |
638 | 6.91k | if (pParameters) |
639 | 6.87k | { |
640 | 6.87k | OStringBuffer aSink(256); |
641 | 6.87k | bool bInvalid = false; |
642 | 6.87k | for (++p;;) |
643 | 41.8k | { |
644 | 41.8k | if (p == pEnd) |
645 | 194 | { |
646 | 194 | bInvalid = true; |
647 | 194 | break; |
648 | 194 | } |
649 | 41.6k | sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd); |
650 | 41.6k | if (nChar == '"') |
651 | 6.65k | break; |
652 | 34.9k | else if (nChar == 0x0D) // CR |
653 | 0 | { |
654 | 0 | if (pEnd - p < 2 || *p++ != 0x0A // LF |
655 | 0 | || !isWhiteSpace(*p)) |
656 | 0 | { |
657 | 0 | bInvalid = true; |
658 | 0 | break; |
659 | 0 | } |
660 | 0 | nChar = static_cast<unsigned char>(*p++); |
661 | 0 | } |
662 | 34.9k | else if (nChar == '\\') |
663 | 922 | { |
664 | 922 | if (p == pEnd) |
665 | 22 | { |
666 | 22 | bInvalid = true; |
667 | 22 | break; |
668 | 22 | } |
669 | 900 | nChar = INetMIME::getUTF32Character(p, pEnd); |
670 | 900 | } |
671 | 34.9k | writeUTF8(aSink, nChar); |
672 | 34.9k | } |
673 | 6.87k | if (bInvalid) |
674 | 216 | break; |
675 | 6.65k | aValue = aSink.makeStringAndClear(); |
676 | 6.65k | } |
677 | 40 | else |
678 | 40 | { |
679 | 40 | sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd); |
680 | 40 | if (p == pStringEnd) |
681 | 7 | break; |
682 | 33 | p = pStringEnd; |
683 | 33 | } |
684 | 43.6k | else |
685 | 43.6k | { |
686 | 43.6k | sal_Unicode const * pTokenBegin = p; |
687 | 326k | while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p))) |
688 | 283k | ++p; |
689 | 43.6k | if (p == pTokenBegin) |
690 | 905 | break; |
691 | 42.7k | if (pParameters) |
692 | 42.6k | aValue = OString( |
693 | 42.6k | pTokenBegin, p - pTokenBegin, |
694 | 42.6k | RTL_TEXTENCODING_UTF8); |
695 | 42.7k | } |
696 | 50.1k | aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended}); |
697 | 50.1k | } |
698 | 67.0k | aList.sort(); |
699 | 67.0k | return parseParameters(aList, pParameters) ? pParameterBegin : pBegin; |
700 | 67.0k | } |
701 | | |
702 | | bool equalIgnoreCase(const char * pBegin1, |
703 | | const char * pEnd1, |
704 | | const char * pString2) |
705 | 111k | { |
706 | 111k | assert(pBegin1 && pBegin1 <= pEnd1 && pString2 && |
707 | 111k | "equalIgnoreCase(): Bad sequences"); |
708 | | |
709 | 113k | while (*pString2 != 0) |
710 | 113k | if (pBegin1 == pEnd1 |
711 | 113k | || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++)) |
712 | 113k | != rtl::toAsciiUpperCase( |
713 | 113k | static_cast<unsigned char>(*pString2++)))) |
714 | 111k | return false; |
715 | 114 | return pBegin1 == pEnd1; |
716 | 111k | } |
717 | | |
718 | | struct EncodingEntry |
719 | | { |
720 | | char const * m_aName; |
721 | | rtl_TextEncoding m_eEncoding; |
722 | | }; |
723 | | |
724 | | // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/ |
725 | | // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise |
726 | | // noted: |
727 | | EncodingEntry const aEncodingMap[] |
728 | | = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, |
729 | | { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US }, |
730 | | { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US }, |
731 | | { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US }, |
732 | | { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US }, |
733 | | { "ASCII", RTL_TEXTENCODING_ASCII_US }, |
734 | | { "ISO646-US", RTL_TEXTENCODING_ASCII_US }, |
735 | | { "US", RTL_TEXTENCODING_ASCII_US }, |
736 | | { "IBM367", RTL_TEXTENCODING_ASCII_US }, |
737 | | { "CP367", RTL_TEXTENCODING_ASCII_US }, |
738 | | { "CSASCII", RTL_TEXTENCODING_ASCII_US }, |
739 | | { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 }, |
740 | | { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 }, |
741 | | { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 }, |
742 | | { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 }, |
743 | | { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 }, |
744 | | { "L1", RTL_TEXTENCODING_ISO_8859_1 }, |
745 | | { "IBM819", RTL_TEXTENCODING_ISO_8859_1 }, |
746 | | { "CP819", RTL_TEXTENCODING_ISO_8859_1 }, |
747 | | { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 }, |
748 | | { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 }, |
749 | | { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 }, |
750 | | { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 }, |
751 | | { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 }, |
752 | | { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 }, |
753 | | { "L2", RTL_TEXTENCODING_ISO_8859_2 }, |
754 | | { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 }, |
755 | | { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 }, |
756 | | { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 }, |
757 | | { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 }, |
758 | | { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 }, |
759 | | { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 }, |
760 | | { "L3", RTL_TEXTENCODING_ISO_8859_3 }, |
761 | | { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 }, |
762 | | { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 }, |
763 | | { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 }, |
764 | | { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 }, |
765 | | { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 }, |
766 | | { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 }, |
767 | | { "L4", RTL_TEXTENCODING_ISO_8859_4 }, |
768 | | { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 }, |
769 | | { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 }, |
770 | | { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 }, |
771 | | { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 }, |
772 | | { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 }, |
773 | | { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 }, |
774 | | { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 }, |
775 | | { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 }, |
776 | | { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 }, |
777 | | { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 }, |
778 | | { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 }, |
779 | | { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 }, |
780 | | { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 }, |
781 | | { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 }, |
782 | | { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 }, |
783 | | { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 }, |
784 | | { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 }, |
785 | | { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 }, |
786 | | { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 }, |
787 | | { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 }, |
788 | | { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 }, |
789 | | { "GREEK", RTL_TEXTENCODING_ISO_8859_7 }, |
790 | | { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 }, |
791 | | { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 }, |
792 | | { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 }, |
793 | | { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 }, |
794 | | { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 }, |
795 | | { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 }, |
796 | | { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 }, |
797 | | { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 }, |
798 | | { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 }, |
799 | | { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 }, |
800 | | { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 }, |
801 | | { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 }, |
802 | | { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 }, |
803 | | { "L5", RTL_TEXTENCODING_ISO_8859_9 }, |
804 | | { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 }, |
805 | | { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047 |
806 | | { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 }, |
807 | | { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047 |
808 | | { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, |
809 | | { "MAC", RTL_TEXTENCODING_APPLE_ROMAN }, |
810 | | { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, |
811 | | { "IBM437", RTL_TEXTENCODING_IBM_437 }, |
812 | | { "CP437", RTL_TEXTENCODING_IBM_437 }, |
813 | | { "437", RTL_TEXTENCODING_IBM_437 }, |
814 | | { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 }, |
815 | | { "IBM850", RTL_TEXTENCODING_IBM_850 }, |
816 | | { "CP850", RTL_TEXTENCODING_IBM_850 }, |
817 | | { "850", RTL_TEXTENCODING_IBM_850 }, |
818 | | { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 }, |
819 | | { "IBM860", RTL_TEXTENCODING_IBM_860 }, |
820 | | { "CP860", RTL_TEXTENCODING_IBM_860 }, |
821 | | { "860", RTL_TEXTENCODING_IBM_860 }, |
822 | | { "CSIBM860", RTL_TEXTENCODING_IBM_860 }, |
823 | | { "IBM861", RTL_TEXTENCODING_IBM_861 }, |
824 | | { "CP861", RTL_TEXTENCODING_IBM_861 }, |
825 | | { "861", RTL_TEXTENCODING_IBM_861 }, |
826 | | { "CP-IS", RTL_TEXTENCODING_IBM_861 }, |
827 | | { "CSIBM861", RTL_TEXTENCODING_IBM_861 }, |
828 | | { "IBM863", RTL_TEXTENCODING_IBM_863 }, |
829 | | { "CP863", RTL_TEXTENCODING_IBM_863 }, |
830 | | { "863", RTL_TEXTENCODING_IBM_863 }, |
831 | | { "CSIBM863", RTL_TEXTENCODING_IBM_863 }, |
832 | | { "IBM865", RTL_TEXTENCODING_IBM_865 }, |
833 | | { "CP865", RTL_TEXTENCODING_IBM_865 }, |
834 | | { "865", RTL_TEXTENCODING_IBM_865 }, |
835 | | { "CSIBM865", RTL_TEXTENCODING_IBM_865 }, |
836 | | { "IBM775", RTL_TEXTENCODING_IBM_775 }, |
837 | | { "CP775", RTL_TEXTENCODING_IBM_775 }, |
838 | | { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 }, |
839 | | { "IBM852", RTL_TEXTENCODING_IBM_852 }, |
840 | | { "CP852", RTL_TEXTENCODING_IBM_852 }, |
841 | | { "852", RTL_TEXTENCODING_IBM_852 }, |
842 | | { "CSPCP852", RTL_TEXTENCODING_IBM_852 }, |
843 | | { "IBM855", RTL_TEXTENCODING_IBM_855 }, |
844 | | { "CP855", RTL_TEXTENCODING_IBM_855 }, |
845 | | { "855", RTL_TEXTENCODING_IBM_855 }, |
846 | | { "CSIBM855", RTL_TEXTENCODING_IBM_855 }, |
847 | | { "IBM857", RTL_TEXTENCODING_IBM_857 }, |
848 | | { "CP857", RTL_TEXTENCODING_IBM_857 }, |
849 | | { "857", RTL_TEXTENCODING_IBM_857 }, |
850 | | { "CSIBM857", RTL_TEXTENCODING_IBM_857 }, |
851 | | { "IBM862", RTL_TEXTENCODING_IBM_862 }, |
852 | | { "CP862", RTL_TEXTENCODING_IBM_862 }, |
853 | | { "862", RTL_TEXTENCODING_IBM_862 }, |
854 | | { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 }, |
855 | | { "IBM864", RTL_TEXTENCODING_IBM_864 }, |
856 | | { "CP864", RTL_TEXTENCODING_IBM_864 }, |
857 | | { "CSIBM864", RTL_TEXTENCODING_IBM_864 }, |
858 | | { "IBM866", RTL_TEXTENCODING_IBM_866 }, |
859 | | { "CP866", RTL_TEXTENCODING_IBM_866 }, |
860 | | { "866", RTL_TEXTENCODING_IBM_866 }, |
861 | | { "CSIBM866", RTL_TEXTENCODING_IBM_866 }, |
862 | | { "IBM869", RTL_TEXTENCODING_IBM_869 }, |
863 | | { "CP869", RTL_TEXTENCODING_IBM_869 }, |
864 | | { "869", RTL_TEXTENCODING_IBM_869 }, |
865 | | { "CP-GR", RTL_TEXTENCODING_IBM_869 }, |
866 | | { "CSIBM869", RTL_TEXTENCODING_IBM_869 }, |
867 | | { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 }, |
868 | | { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 }, |
869 | | { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 }, |
870 | | { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 }, |
871 | | { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 }, |
872 | | { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 }, |
873 | | { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 }, |
874 | | { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 }, |
875 | | { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS }, |
876 | | { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS }, |
877 | | { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS }, |
878 | | { "GB2312", RTL_TEXTENCODING_GB_2312 }, |
879 | | { "CSGB2312", RTL_TEXTENCODING_GB_2312 }, |
880 | | { "BIG5", RTL_TEXTENCODING_BIG5 }, |
881 | | { "CSBIG5", RTL_TEXTENCODING_BIG5 }, |
882 | | { "EUC-JP", RTL_TEXTENCODING_EUC_JP }, |
883 | | { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE", |
884 | | RTL_TEXTENCODING_EUC_JP }, |
885 | | { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP }, |
886 | | { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP }, |
887 | | { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP }, |
888 | | { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN }, |
889 | | { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, |
890 | | { "CSKOI8R", RTL_TEXTENCODING_KOI8_R }, |
891 | | { "UTF-7", RTL_TEXTENCODING_UTF7 }, |
892 | | { "UTF-8", RTL_TEXTENCODING_UTF8 }, |
893 | | { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047 |
894 | | { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047 |
895 | | { "EUC-KR", RTL_TEXTENCODING_EUC_KR }, |
896 | | { "CSEUCKR", RTL_TEXTENCODING_EUC_KR }, |
897 | | { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR }, |
898 | | { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR }, |
899 | | { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 }, |
900 | | { "CSUCS4", RTL_TEXTENCODING_UCS4 }, |
901 | | { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 }, |
902 | | { "CSUNICODE", RTL_TEXTENCODING_UCS2 } }; |
903 | | |
904 | | rtl_TextEncoding getCharsetEncoding(char const * pBegin, |
905 | | char const * pEnd) |
906 | 646 | { |
907 | 646 | for (const EncodingEntry& i : aEncodingMap) |
908 | 111k | if (equalIgnoreCase(pBegin, pEnd, i.m_aName)) |
909 | 3 | return i.m_eEncoding; |
910 | 643 | return RTL_TEXTENCODING_DONTKNOW; |
911 | 646 | } |
912 | | |
913 | | } |
914 | | |
915 | | // INetMIME |
916 | | |
917 | | // static |
918 | | bool INetMIME::isAtomChar(sal_uInt32 nChar) |
919 | 0 | { |
920 | 0 | static const bool aMap[128] |
921 | 0 | = { false, false, false, false, false, false, false, false, |
922 | 0 | false, false, false, false, false, false, false, false, |
923 | 0 | false, false, false, false, false, false, false, false, |
924 | 0 | false, false, false, false, false, false, false, false, |
925 | 0 | false, true, false, true, true, true, true, true, // !"#$%&' |
926 | 0 | false, false, true, true, false, true, false, true, //()*+,-./ |
927 | 0 | true, true, true, true, true, true, true, true, //01234567 |
928 | 0 | true, true, false, false, false, true, false, true, //89:;<=>? |
929 | 0 | false, true, true, true, true, true, true, true, //@ABCDEFG |
930 | 0 | true, true, true, true, true, true, true, true, //HIJKLMNO |
931 | 0 | true, true, true, true, true, true, true, true, //PQRSTUVW |
932 | 0 | true, true, true, false, false, false, true, true, //XYZ[\]^_ |
933 | 0 | true, true, true, true, true, true, true, true, //`abcdefg |
934 | 0 | true, true, true, true, true, true, true, true, //hijklmno |
935 | 0 | true, true, true, true, true, true, true, true, //pqrstuvw |
936 | 0 | true, true, true, true, true, true, true, false //xyz{|}~ |
937 | 0 | }; |
938 | 0 | return rtl::isAscii(nChar) && aMap[nChar]; |
939 | 0 | } |
940 | | |
941 | | // static |
942 | | bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar) |
943 | 0 | { |
944 | 0 | static const bool aMap[128] |
945 | 0 | = { false, false, false, false, false, false, false, false, |
946 | 0 | false, false, false, false, false, false, false, false, |
947 | 0 | false, false, false, false, false, false, false, false, |
948 | 0 | false, false, false, false, false, false, false, false, |
949 | 0 | false, true, false, true, true, false, true, true, // !"#$%&' |
950 | 0 | false, false, false, true, true, true, true, true, //()*+,-./ |
951 | 0 | true, true, true, true, true, true, true, true, //01234567 |
952 | 0 | true, true, true, true, true, true, true, true, //89:;<=>? |
953 | 0 | true, true, true, true, true, true, true, true, //@ABCDEFG |
954 | 0 | true, true, true, true, true, true, true, true, //HIJKLMNO |
955 | 0 | true, true, true, true, true, true, true, true, //PQRSTUVW |
956 | 0 | true, true, true, true, false, true, true, true, //XYZ[\]^_ |
957 | 0 | true, true, true, true, true, true, true, true, //`abcdefg |
958 | 0 | true, true, true, true, true, true, true, true, //hijklmno |
959 | 0 | true, true, true, true, true, true, true, true, //pqrstuvw |
960 | 0 | true, true, true, false, true, true, true, false //xyz{|}~ |
961 | 0 | }; |
962 | 0 | return rtl::isAscii(nChar) && aMap[nChar]; |
963 | 0 | } |
964 | | |
965 | | // static |
966 | | bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1, |
967 | | const sal_Unicode * pEnd1, |
968 | | const char * pString2) |
969 | 865k | { |
970 | 865k | assert(pBegin1 && pBegin1 <= pEnd1 && pString2 && |
971 | 865k | "INetMIME::equalIgnoreCase(): Bad sequences"); |
972 | | |
973 | 867k | while (*pString2 != 0) |
974 | 867k | if (pBegin1 == pEnd1 |
975 | 46.0k | || (rtl::toAsciiUpperCase(*pBegin1++) |
976 | 46.0k | != rtl::toAsciiUpperCase( |
977 | 46.0k | static_cast<unsigned char>(*pString2++)))) |
978 | 865k | return false; |
979 | 0 | return pBegin1 == pEnd1; |
980 | 865k | } |
981 | | |
982 | | // static |
983 | | bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin, |
984 | | const sal_Unicode * pEnd, bool bLeadingZeroes, |
985 | | sal_uInt32 & rValue) |
986 | 2.05k | { |
987 | 2.05k | sal_uInt64 nTheValue = 0; |
988 | 2.05k | const sal_Unicode * p = rBegin; |
989 | 7.76k | for ( ; p != pEnd; ++p) |
990 | 7.65k | { |
991 | 7.65k | int nWeight = getWeight(*p); |
992 | 7.65k | if (nWeight < 0) |
993 | 1.86k | break; |
994 | 5.78k | nTheValue = 10 * nTheValue + nWeight; |
995 | 5.78k | if (nTheValue > std::numeric_limits< sal_uInt32 >::max()) |
996 | 78 | return false; |
997 | 5.78k | } |
998 | 1.98k | if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1))) |
999 | 195 | return false; |
1000 | 1.78k | rBegin = p; |
1001 | 1.78k | rValue = sal_uInt32(nTheValue); |
1002 | 1.78k | return true; |
1003 | 1.98k | } |
1004 | | |
1005 | | // static |
1006 | | sal_Unicode const * INetMIME::scanContentType( |
1007 | | std::u16string_view rStr, OUString * pType, |
1008 | | OUString * pSubType, INetContentTypeParameterList * pParameters) |
1009 | 72.5k | { |
1010 | 72.5k | sal_Unicode const * pBegin = rStr.data(); |
1011 | 72.5k | sal_Unicode const * pEnd = pBegin + rStr.size(); |
1012 | 72.5k | sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd); |
1013 | 72.5k | sal_Unicode const * pTypeBegin = p; |
1014 | 412k | while (p != pEnd && isTokenChar(*p)) |
1015 | 339k | { |
1016 | 339k | ++p; |
1017 | 339k | } |
1018 | 72.5k | if (p == pTypeBegin) |
1019 | 1.51k | return nullptr; |
1020 | 71.0k | sal_Unicode const * pTypeEnd = p; |
1021 | | |
1022 | 71.0k | p = skipLinearWhiteSpaceComment(p, pEnd); |
1023 | 71.0k | if (p == pEnd || *p++ != '/') |
1024 | 3.39k | return nullptr; |
1025 | | |
1026 | 67.6k | p = skipLinearWhiteSpaceComment(p, pEnd); |
1027 | 67.6k | sal_Unicode const * pSubTypeBegin = p; |
1028 | 467k | while (p != pEnd && isTokenChar(*p)) |
1029 | 399k | { |
1030 | 399k | ++p; |
1031 | 399k | } |
1032 | 67.6k | if (p == pSubTypeBegin) |
1033 | 585 | return nullptr; |
1034 | 67.0k | sal_Unicode const * pSubTypeEnd = p; |
1035 | | |
1036 | 67.0k | if (pType != nullptr) |
1037 | 43.1k | { |
1038 | 43.1k | *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase(); |
1039 | 43.1k | } |
1040 | 67.0k | if (pSubType != nullptr) |
1041 | 43.1k | { |
1042 | 43.1k | *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin) |
1043 | 43.1k | .toAsciiLowerCase(); |
1044 | 43.1k | } |
1045 | | |
1046 | 67.0k | return scanParameters(p, pEnd, pParameters); |
1047 | 67.6k | } |
1048 | | |
1049 | | // static |
1050 | | OUString INetMIME::decodeHeaderFieldBody(const OString& rBody) |
1051 | 0 | { |
1052 | | // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old |
1053 | | // versions of StarOffice send mails with header fields where encoded |
1054 | | // words can be preceded by '=', ',', '.', '"', or '(', and followed by |
1055 | | // '=', ',', '.', '"', ')', without any required white space in between. |
1056 | | // And there appear to exist some broken mailers that only encode single |
1057 | | // letters within words, like "Appel |
1058 | | // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to |
1059 | | // detect encoded words even when not properly surrounded by white space. |
1060 | | |
1061 | | // Non US-ASCII characters in rBody are treated as ISO-8859-1. |
1062 | | |
1063 | | // encoded-word = "=?" |
1064 | | // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E) |
1065 | | // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?" |
1066 | | // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==") |
1067 | | // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG)) |
1068 | | // "?=" |
1069 | | |
1070 | | // base64 = ALPHA / DIGIT / "+" / "/" |
1071 | |
|
1072 | 0 | const char * pBegin = rBody.getStr(); |
1073 | 0 | const char * pEnd = pBegin + rBody.getLength(); |
1074 | |
|
1075 | 0 | OUStringBuffer sDecoded; |
1076 | 0 | const char * pCopyBegin = pBegin; |
1077 | | |
1078 | | /* bool bStartEncodedWord = true; */ |
1079 | 0 | const char * pWSPBegin = pBegin; |
1080 | |
|
1081 | 0 | for (const char * p = pBegin; p != pEnd;) |
1082 | 0 | { |
1083 | 0 | if (*p == '=' /* && bStartEncodedWord */) |
1084 | 0 | { |
1085 | 0 | const char * q = p + 1; |
1086 | 0 | bool bEncodedWord = q != pEnd && *q++ == '?'; |
1087 | |
|
1088 | 0 | rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW; |
1089 | 0 | if (bEncodedWord) |
1090 | 0 | { |
1091 | 0 | const char * pCharsetBegin = q; |
1092 | 0 | const char * pLanguageBegin = nullptr; |
1093 | 0 | int nAlphaCount = 0; |
1094 | 0 | for (bool bDone = false; !bDone;) |
1095 | 0 | if (q == pEnd) |
1096 | 0 | { |
1097 | 0 | bEncodedWord = false; |
1098 | 0 | bDone = true; |
1099 | 0 | } |
1100 | 0 | else |
1101 | 0 | { |
1102 | 0 | char cChar = *q++; |
1103 | 0 | switch (cChar) |
1104 | 0 | { |
1105 | 0 | case '*': |
1106 | 0 | pLanguageBegin = q - 1; |
1107 | 0 | nAlphaCount = 0; |
1108 | 0 | break; |
1109 | | |
1110 | 0 | case '-': |
1111 | 0 | if (pLanguageBegin != nullptr) |
1112 | 0 | { |
1113 | 0 | if (nAlphaCount == 0) |
1114 | 0 | pLanguageBegin = nullptr; |
1115 | 0 | else |
1116 | 0 | nAlphaCount = 0; |
1117 | 0 | } |
1118 | 0 | break; |
1119 | | |
1120 | 0 | case '?': |
1121 | 0 | if (pCharsetBegin == q - 1) |
1122 | 0 | bEncodedWord = false; |
1123 | 0 | else |
1124 | 0 | { |
1125 | 0 | eCharsetEncoding |
1126 | 0 | = getCharsetEncoding( |
1127 | 0 | pCharsetBegin, |
1128 | 0 | pLanguageBegin == nullptr |
1129 | 0 | || nAlphaCount == 0 ? |
1130 | 0 | q - 1 : pLanguageBegin); |
1131 | 0 | bEncodedWord = isMIMECharsetEncoding( |
1132 | 0 | eCharsetEncoding); |
1133 | 0 | eCharsetEncoding |
1134 | 0 | = translateFromMIME(eCharsetEncoding); |
1135 | 0 | } |
1136 | 0 | bDone = true; |
1137 | 0 | break; |
1138 | | |
1139 | 0 | default: |
1140 | 0 | if (pLanguageBegin != nullptr |
1141 | 0 | && (!rtl::isAsciiAlpha( |
1142 | 0 | static_cast<unsigned char>(cChar)) |
1143 | 0 | || ++nAlphaCount > 8)) |
1144 | 0 | pLanguageBegin = nullptr; |
1145 | 0 | break; |
1146 | 0 | } |
1147 | 0 | } |
1148 | 0 | } |
1149 | | |
1150 | 0 | bool bEncodingB = false; |
1151 | 0 | if (bEncodedWord) |
1152 | 0 | { |
1153 | 0 | if (q == pEnd) |
1154 | 0 | bEncodedWord = false; |
1155 | 0 | else |
1156 | 0 | { |
1157 | 0 | switch (*q++) |
1158 | 0 | { |
1159 | 0 | case 'B': |
1160 | 0 | case 'b': |
1161 | 0 | bEncodingB = true; |
1162 | 0 | break; |
1163 | | |
1164 | 0 | case 'Q': |
1165 | 0 | case 'q': |
1166 | 0 | bEncodingB = false; |
1167 | 0 | break; |
1168 | | |
1169 | 0 | default: |
1170 | 0 | bEncodedWord = false; |
1171 | 0 | break; |
1172 | 0 | } |
1173 | 0 | } |
1174 | 0 | } |
1175 | | |
1176 | 0 | bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?'; |
1177 | |
|
1178 | 0 | OStringBuffer sText; |
1179 | 0 | if (bEncodedWord) |
1180 | 0 | { |
1181 | 0 | if (bEncodingB) |
1182 | 0 | { |
1183 | 0 | for (bool bDone = false; !bDone;) |
1184 | 0 | { |
1185 | 0 | if (pEnd - q < 4) |
1186 | 0 | { |
1187 | 0 | bEncodedWord = false; |
1188 | 0 | bDone = true; |
1189 | 0 | } |
1190 | 0 | else |
1191 | 0 | { |
1192 | 0 | bool bFinal = false; |
1193 | 0 | int nCount = 3; |
1194 | 0 | sal_uInt32 nValue = 0; |
1195 | 0 | for (int nShift = 18; nShift >= 0; nShift -= 6) |
1196 | 0 | { |
1197 | 0 | int nWeight = getBase64Weight(*q++); |
1198 | 0 | if (nWeight == -2) |
1199 | 0 | { |
1200 | 0 | bEncodedWord = false; |
1201 | 0 | bDone = true; |
1202 | 0 | break; |
1203 | 0 | } |
1204 | 0 | if (nWeight == -1) |
1205 | 0 | { |
1206 | 0 | if (!bFinal) |
1207 | 0 | { |
1208 | 0 | if (nShift >= 12) |
1209 | 0 | { |
1210 | 0 | bEncodedWord = false; |
1211 | 0 | bDone = true; |
1212 | 0 | break; |
1213 | 0 | } |
1214 | 0 | bFinal = true; |
1215 | 0 | nCount = nShift == 6 ? 1 : 2; |
1216 | 0 | } |
1217 | 0 | } |
1218 | 0 | else |
1219 | 0 | nValue |= nWeight << nShift; |
1220 | 0 | } |
1221 | 0 | if (bEncodedWord) |
1222 | 0 | { |
1223 | 0 | for (int nShift = 16; nCount-- > 0; nShift -= 8) |
1224 | 0 | sText.append(char(nValue >> nShift & 0xFF)); |
1225 | 0 | if (*q == '?') |
1226 | 0 | { |
1227 | 0 | ++q; |
1228 | 0 | bDone = true; |
1229 | 0 | } |
1230 | 0 | if (bFinal && !bDone) |
1231 | 0 | { |
1232 | 0 | bEncodedWord = false; |
1233 | 0 | bDone = true; |
1234 | 0 | } |
1235 | 0 | } |
1236 | 0 | } |
1237 | 0 | } |
1238 | 0 | } |
1239 | 0 | else |
1240 | 0 | { |
1241 | 0 | const char * pEncodedTextBegin = q; |
1242 | 0 | const char * pEncodedTextCopyBegin = q; |
1243 | 0 | for (bool bDone = false; !bDone;) |
1244 | 0 | if (q == pEnd) |
1245 | 0 | { |
1246 | 0 | bEncodedWord = false; |
1247 | 0 | bDone = true; |
1248 | 0 | } |
1249 | 0 | else |
1250 | 0 | { |
1251 | 0 | sal_uInt32 nChar = static_cast<unsigned char>(*q++); |
1252 | 0 | switch (nChar) |
1253 | 0 | { |
1254 | 0 | case '=': |
1255 | 0 | { |
1256 | 0 | if (pEnd - q < 2) |
1257 | 0 | { |
1258 | 0 | bEncodedWord = false; |
1259 | 0 | bDone = true; |
1260 | 0 | break; |
1261 | 0 | } |
1262 | 0 | int nDigit1 = getHexWeight(q[0]); |
1263 | 0 | int nDigit2 = getHexWeight(q[1]); |
1264 | 0 | if (nDigit1 < 0 || nDigit2 < 0) |
1265 | 0 | { |
1266 | 0 | bEncodedWord = false; |
1267 | 0 | bDone = true; |
1268 | 0 | break; |
1269 | 0 | } |
1270 | 0 | sText.append( |
1271 | 0 | rBody.subView( |
1272 | 0 | (pEncodedTextCopyBegin - pBegin), |
1273 | 0 | (q - 1 - pEncodedTextCopyBegin)) |
1274 | 0 | + OStringChar(char(nDigit1 << 4 | nDigit2))); |
1275 | 0 | q += 2; |
1276 | 0 | pEncodedTextCopyBegin = q; |
1277 | 0 | break; |
1278 | 0 | } |
1279 | | |
1280 | 0 | case '?': |
1281 | 0 | if (q - pEncodedTextBegin > 1) |
1282 | 0 | sText.append(rBody.subView( |
1283 | 0 | (pEncodedTextCopyBegin - pBegin), |
1284 | 0 | (q - 1 - pEncodedTextCopyBegin))); |
1285 | 0 | else |
1286 | 0 | bEncodedWord = false; |
1287 | 0 | bDone = true; |
1288 | 0 | break; |
1289 | | |
1290 | 0 | case '_': |
1291 | 0 | sText.append( |
1292 | 0 | rBody.subView( |
1293 | 0 | (pEncodedTextCopyBegin - pBegin), |
1294 | 0 | (q - 1 - pEncodedTextCopyBegin)) |
1295 | 0 | + OString::Concat(" ")); |
1296 | 0 | pEncodedTextCopyBegin = q; |
1297 | 0 | break; |
1298 | | |
1299 | 0 | default: |
1300 | 0 | if (!isVisible(nChar)) |
1301 | 0 | { |
1302 | 0 | bEncodedWord = false; |
1303 | 0 | bDone = true; |
1304 | 0 | } |
1305 | 0 | break; |
1306 | 0 | } |
1307 | 0 | } |
1308 | 0 | } |
1309 | 0 | } |
1310 | | |
1311 | 0 | bEncodedWord = bEncodedWord && q != pEnd && *q++ == '='; |
1312 | |
|
1313 | 0 | std::unique_ptr<sal_Unicode[]> pUnicodeBuffer; |
1314 | 0 | sal_Size nUnicodeSize = 0; |
1315 | 0 | if (bEncodedWord) |
1316 | 0 | { |
1317 | 0 | pUnicodeBuffer |
1318 | 0 | = convertToUnicode(sText.getStr(), |
1319 | 0 | sText.getStr() + sText.getLength(), |
1320 | 0 | eCharsetEncoding, nUnicodeSize); |
1321 | 0 | if (!pUnicodeBuffer) |
1322 | 0 | bEncodedWord = false; |
1323 | 0 | } |
1324 | |
|
1325 | 0 | if (bEncodedWord) |
1326 | 0 | { |
1327 | 0 | appendISO88591(sDecoded, pCopyBegin, pWSPBegin); |
1328 | 0 | sDecoded.append( |
1329 | 0 | pUnicodeBuffer.get(), |
1330 | 0 | static_cast< sal_Int32 >(nUnicodeSize)); |
1331 | 0 | pUnicodeBuffer.reset(); |
1332 | 0 | p = q; |
1333 | 0 | pCopyBegin = p; |
1334 | |
|
1335 | 0 | pWSPBegin = p; |
1336 | 0 | while (p != pEnd && isWhiteSpace(*p)) |
1337 | 0 | ++p; |
1338 | | /* bStartEncodedWord = p != pWSPBegin; */ |
1339 | 0 | continue; |
1340 | 0 | } |
1341 | 0 | } |
1342 | | |
1343 | 0 | if (p == pEnd) |
1344 | 0 | break; |
1345 | | |
1346 | 0 | switch (*p++) |
1347 | 0 | { |
1348 | 0 | case '"': |
1349 | | /* bStartEncodedWord = true; */ |
1350 | 0 | break; |
1351 | | |
1352 | 0 | case '(': |
1353 | | /* bStartEncodedWord = true; */ |
1354 | 0 | break; |
1355 | | |
1356 | 0 | case ')': |
1357 | | /* bStartEncodedWord = false; */ |
1358 | 0 | break; |
1359 | | |
1360 | 0 | default: |
1361 | 0 | { |
1362 | 0 | const char * pUTF8Begin = p - 1; |
1363 | 0 | const char * pUTF8End = pUTF8Begin; |
1364 | 0 | sal_uInt32 nCharacter = 0; |
1365 | 0 | if (translateUTF8Char(pUTF8End, pEnd, nCharacter)) |
1366 | 0 | { |
1367 | 0 | appendISO88591(sDecoded, pCopyBegin, p - 1); |
1368 | 0 | sDecoded.appendUtf32(nCharacter); |
1369 | 0 | p = pUTF8End; |
1370 | 0 | pCopyBegin = p; |
1371 | 0 | } |
1372 | | /* bStartEncodedWord = false; */ |
1373 | 0 | break; |
1374 | 0 | } |
1375 | 0 | } |
1376 | 0 | pWSPBegin = p; |
1377 | 0 | } |
1378 | | |
1379 | 0 | appendISO88591(sDecoded, pCopyBegin, pEnd); |
1380 | 0 | return sDecoded.makeStringAndClear(); |
1381 | 0 | } |
1382 | | |
1383 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |