/src/kcodecs/src/kcodecs.cpp
Line | Count | Source |
1 | | /* |
2 | | SPDX-FileCopyrightText: 2000-2001 Dawit Alemayehu <adawit@kde.org> |
3 | | SPDX-FileCopyrightText: 2001 Rik Hemsley (rikkus) <rik@kde.org> |
4 | | SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org> |
5 | | |
6 | | SPDX-License-Identifier: LGPL-2.0-only |
7 | | |
8 | | The encoding and decoding utilities in KCodecs with the exception of |
9 | | quoted-printable are based on the java implementation in HTTPClient |
10 | | package by Ronald Tschalär Copyright (C) 1996-1999. // krazy:exclude=copyright |
11 | | |
12 | | The quoted-printable codec as described in RFC 2045, section 6.7. is by |
13 | | Rik Hemsley (C) 2001. |
14 | | */ |
15 | | |
16 | | #include "kcodecs.h" |
17 | | #include "kcharsets.h" |
18 | | #include "kcodecs_debug.h" |
19 | | #include "kcodecs_p.h" |
20 | | #include "kcodecsbase64.h" |
21 | | #include "kcodecsqp.h" |
22 | | #include "kcodecsuuencode.h" |
23 | | |
24 | | #include <array> |
25 | | #include <cassert> |
26 | | #include <cstring> |
27 | | #include <stdlib.h> |
28 | | #include <string.h> |
29 | | |
30 | | #include <QDebug> |
31 | | #include <QStringDecoder> |
32 | | #include <QStringEncoder> |
33 | | |
34 | | namespace KCodecs |
35 | | { |
36 | | static QList<QByteArray> charsetCache; |
37 | | |
38 | | QByteArray cachedCharset(const QByteArray &name) |
39 | 27.6k | { |
40 | 57.7k | auto it = std::find_if(charsetCache.cbegin(), charsetCache.cend(), [&name](const QByteArray &charset) { |
41 | 57.7k | return name.compare(charset, Qt::CaseInsensitive) == 0; |
42 | 57.7k | }); |
43 | 27.6k | if (it != charsetCache.cend()) { |
44 | 27.6k | return *it; |
45 | 27.6k | } |
46 | | |
47 | 2 | charsetCache.append(name.toUpper()); |
48 | 2 | return charsetCache.last(); |
49 | 27.6k | } |
50 | | |
51 | | QByteArray cachedCharset(QByteArrayView name) |
52 | 607k | { |
53 | 1.07M | auto it = std::find_if(charsetCache.cbegin(), charsetCache.cend(), [&name](const QByteArray &charset) { |
54 | 1.07M | return name.compare(charset, Qt::CaseInsensitive) == 0; |
55 | 1.07M | }); |
56 | 607k | if (it != charsetCache.cend()) { |
57 | 607k | return *it; |
58 | 607k | } |
59 | | |
60 | 52 | charsetCache.append(name.toByteArray().toUpper()); |
61 | 52 | return charsetCache.last(); |
62 | 607k | } |
63 | | |
64 | | namespace CodecNames |
65 | | { |
66 | | QByteArray utf8() |
67 | 12.4k | { |
68 | 12.4k | return QByteArrayLiteral("UTF-8"); |
69 | 12.4k | } |
70 | | } |
71 | | |
72 | | Q_REQUIRED_RESULT |
73 | | QByteArray updateEncodingCharset(const QByteArray ¤tCharset, const QByteArray &nextCharset) |
74 | 635k | { |
75 | 635k | if (!nextCharset.isEmpty()) { |
76 | 634k | if (currentCharset.isEmpty()) { |
77 | 604k | return nextCharset; |
78 | 604k | } |
79 | 30.2k | if (currentCharset != nextCharset) { |
80 | | // only one charset per string supported, so change to superset charset UTF-8, |
81 | | // which should cover any possible chars |
82 | 11.7k | return CodecNames::utf8(); |
83 | 11.7k | } |
84 | 30.2k | } |
85 | 18.9k | return currentCharset; |
86 | 635k | } |
87 | | |
88 | | } // namespace KCodecs |
89 | | |
90 | | /******************************** KCodecs ********************************/ |
91 | | |
92 | | QByteArray KCodecs::quotedPrintableEncode(QByteArrayView in, bool useCRLF) |
93 | 0 | { |
94 | 0 | Codec *codec = Codec::codecForName("quoted-printable"); |
95 | 0 | return codec->encode(in, useCRLF ? Codec::NewlineCRLF : Codec::NewlineLF); |
96 | 0 | } |
97 | | |
98 | | void KCodecs::quotedPrintableEncode(QByteArrayView in, QByteArray &out, bool useCRLF) |
99 | 0 | { |
100 | 0 | out = quotedPrintableEncode(in, useCRLF ? Codec::NewlineCRLF : Codec::NewlineLF); |
101 | 0 | } |
102 | | |
103 | | QByteArray KCodecs::quotedPrintableDecode(QByteArrayView in) |
104 | 8.28k | { |
105 | 8.28k | Codec *codec = Codec::codecForName("quoted-printable"); |
106 | 8.28k | return codec->decode(in); |
107 | 8.28k | } |
108 | | |
109 | | void KCodecs::quotedPrintableDecode(QByteArrayView in, QByteArray &out) |
110 | 0 | { |
111 | 0 | out = quotedPrintableDecode(in); |
112 | 0 | } |
113 | | |
114 | | QByteArray KCodecs::base64Encode(QByteArrayView in) |
115 | 97.7k | { |
116 | 97.7k | Codec *codec = Codec::codecForName("base64"); |
117 | 97.7k | return codec->encode(in); |
118 | 97.7k | } |
119 | | |
120 | | void KCodecs::base64Encode(QByteArrayView in, QByteArray &out, bool insertLFs) |
121 | 97.7k | { |
122 | 97.7k | Q_UNUSED(insertLFs); |
123 | 97.7k | out = base64Encode(in); |
124 | 97.7k | } |
125 | | |
126 | | QByteArray KCodecs::base64Decode(QByteArrayView in) |
127 | 940 | { |
128 | 940 | Codec *codec = Codec::codecForName("base64"); |
129 | 940 | return codec->decode(in); |
130 | 940 | } |
131 | | |
132 | | void KCodecs::base64Decode(const QByteArrayView in, QByteArray &out) |
133 | 0 | { |
134 | 0 | out = base64Decode(in); |
135 | 0 | } |
136 | | |
137 | | QByteArray KCodecs::uudecode(QByteArrayView in) |
138 | 21.0k | { |
139 | 21.0k | Codec *codec = Codec::codecForName("x-uuencode"); |
140 | 21.0k | return codec->decode(in); |
141 | 21.0k | } |
142 | | |
143 | | void KCodecs::uudecode(QByteArrayView in, QByteArray &out) |
144 | 18.5k | { |
145 | 18.5k | out = uudecode(in); |
146 | 18.5k | } |
147 | | |
148 | | //@cond PRIVATE |
149 | | |
150 | | namespace KCodecs |
151 | | { |
152 | | // parse the encoded-word (scursor points to after the initial '=') |
153 | | // retry point is the next position at which a failed parsing of an encoded word |
154 | | // should be retried from. This is an optimization to avoid quadratic behavior on |
155 | | // (intentionally) corrupted input |
156 | | bool parseEncodedWord(const char *&scursor, |
157 | | const char *const send, |
158 | | QString *result, |
159 | | QByteArray *usedCS, |
160 | | const QByteArray &defaultCS, |
161 | | CharsetOption charsetOption, |
162 | | const char *&retryPoint) |
163 | 263k | { |
164 | 263k | assert(result); |
165 | | |
166 | | // make sure the caller already did a bit of the work. |
167 | 263k | assert(*(scursor - 1) == '='); |
168 | | |
169 | | // |
170 | | // STEP 1: |
171 | | // scan for the charset/language portion of the encoded-word |
172 | | // |
173 | | |
174 | 263k | retryPoint = nullptr; |
175 | 263k | char ch = *scursor++; |
176 | | |
177 | 263k | if (ch != '?') { |
178 | | // qCDebug(KCODECS_LOG) << "first"; |
179 | | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
180 | 165k | return false; |
181 | 165k | } |
182 | | |
183 | | // remember start of charset (i.e. just after the initial "=?") and |
184 | | // language (just after the first '*') fields: |
185 | 98.1k | const char *charsetStart = scursor; |
186 | 98.1k | const char *languageStart = nullptr; |
187 | | |
188 | | // find delimiting '?' (and the '*' separating charset and language |
189 | | // tags, if any): |
190 | 3.13M | for (; scursor != send; scursor++) { |
191 | 3.12M | if (*scursor == '?') { |
192 | 89.9k | break; |
193 | 3.03M | } else if (*scursor == '*' && languageStart == nullptr) { |
194 | 2.13k | languageStart = scursor + 1; |
195 | 2.13k | } |
196 | 3.12M | } |
197 | | |
198 | | // not found? can't be an encoded-word! |
199 | 98.1k | if (scursor == send || *scursor != '?') { |
200 | | // qCDebug(KCODECS_LOG) << "second"; |
201 | | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
202 | 8.16k | return false; |
203 | 8.16k | } |
204 | | |
205 | | // extract charset information (keep in mind: the size given to the |
206 | | // ctor is one off due to the \0 terminator): |
207 | 89.9k | QByteArrayView maybeCharset(charsetStart, std::min<qsizetype>((languageStart ? languageStart - 1 : scursor) - charsetStart, std::strlen(charsetStart))); |
208 | | |
209 | | // |
210 | | // STEP 2: |
211 | | // scan for the encoding portion of the encoded-word |
212 | | // |
213 | | |
214 | | // remember start of encoding (just _after_ the second '?'): |
215 | 89.9k | scursor++; |
216 | 89.9k | const char *encodingStart = scursor; |
217 | | |
218 | | // find next '?' (ending the encoding tag): |
219 | 8.05M | for (; scursor != send; scursor++) { |
220 | 8.04M | if (*scursor == '?') { |
221 | 85.4k | break; |
222 | 85.4k | } |
223 | 8.04M | } |
224 | | |
225 | | // not found? Can't be an encoded-word! |
226 | 89.9k | if (scursor == send || *scursor != '?') { |
227 | | // qCDebug(KCODECS_LOG) << "third"; |
228 | | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
229 | 4.53k | return false; |
230 | 4.53k | } |
231 | | |
232 | | // extract the encoding information: |
233 | 85.4k | QByteArrayView maybeEncoding(encodingStart, scursor - encodingStart); |
234 | | |
235 | | // qCDebug(KCODECS_LOG) << "parseEncodedWord: found charset == \"" << maybeCharset |
236 | | // << "\"; language == \"" << maybeLanguage |
237 | | // << "\"; encoding == \"" << maybeEncoding << "\""; |
238 | | |
239 | | // |
240 | | // STEP 3: |
241 | | // scan for encoded-text portion of encoded-word |
242 | | // |
243 | | |
244 | | // remember start of encoded-text (just after the third '?'): |
245 | 85.4k | scursor++; |
246 | 85.4k | const char *encodedTextStart = scursor; |
247 | | |
248 | | // find the '?=' sequence (ending the encoded-text): |
249 | 59.7M | for (; scursor != send; scursor++) { |
250 | 59.7M | if (*scursor == '?') { |
251 | 21.1M | if (scursor + 1 != send) { |
252 | 21.1M | if (*(scursor + 1) != '=') { // We expect a '=' after the '?', but we got something else; ignore |
253 | | // qCDebug(KCODECS_LOG) << "Stray '?' in q-encoded word, ignoring this."; |
254 | 21.0M | if (*(scursor - 1) == '=') { |
255 | 48.2k | retryPoint = scursor - 1; |
256 | 48.2k | } |
257 | 21.0M | continue; |
258 | 21.0M | } else { // yep, found a '?=' sequence |
259 | 76.3k | scursor += 2; |
260 | 76.3k | break; |
261 | 76.3k | } |
262 | 21.1M | } else { // The '?' is the last char, but we need a '=' after it! |
263 | | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
264 | 351 | return false; |
265 | 351 | } |
266 | 21.1M | } |
267 | 59.7M | } |
268 | | |
269 | 85.0k | if (*(scursor - 2) != '?' || *(scursor - 1) != '=' || scursor < encodedTextStart + 2) { |
270 | | // qCDebug(KCODECS_LOG) << "Premature end of encoded word"; |
271 | 8.72k | if (!retryPoint) { |
272 | 6.94k | retryPoint = scursor; |
273 | 6.94k | } |
274 | 8.72k | return false; |
275 | 8.72k | } |
276 | | |
277 | | // set end sentinel for encoded-text: |
278 | 76.3k | const char *const encodedTextEnd = scursor - 2; |
279 | | |
280 | | // |
281 | | // STEP 4: |
282 | | // setup decoders for the transfer encoding and the charset |
283 | | // |
284 | | |
285 | | // try if there's a codec for the encoding found: |
286 | 76.3k | Codec *codec = Codec::codecForName(maybeEncoding); |
287 | 76.3k | if (!codec) { |
288 | | // qCDebug(KCODECS_LOG) << "Unknown encoding" << maybeEncoding; |
289 | 16.3k | return false; |
290 | 16.3k | } |
291 | | |
292 | | // get an instance of a corresponding decoder: |
293 | 60.0k | Decoder *dec = codec->makeDecoder(); |
294 | 60.0k | assert(dec); |
295 | | |
296 | | // try if there's a (text)codec for the charset found: |
297 | 60.0k | QByteArray cs; |
298 | 60.0k | QStringDecoder textCodec; |
299 | 60.0k | if (charsetOption == KCodecs::ForceDefaultCharset || maybeCharset.isEmpty()) { |
300 | 9.81k | textCodec = QStringDecoder(defaultCS); |
301 | 9.81k | cs = cachedCharset(defaultCS); |
302 | 50.2k | } else { |
303 | 50.2k | textCodec = QStringDecoder(maybeCharset); |
304 | 50.2k | if (!textCodec.isValid()) { // no suitable codec found => use default charset |
305 | 17.8k | textCodec = QStringDecoder(defaultCS); |
306 | 17.8k | cs = cachedCharset(defaultCS); |
307 | 32.3k | } else { |
308 | 32.3k | cs = cachedCharset(maybeCharset); |
309 | 32.3k | } |
310 | 50.2k | } |
311 | 60.0k | if (usedCS) { |
312 | 60.0k | *usedCS = updateEncodingCharset(*usedCS, cs); |
313 | 60.0k | } |
314 | | |
315 | 60.0k | if (!textCodec.isValid()) { |
316 | | // qCDebug(KCODECS_LOG) << "Unknown charset" << maybeCharset; |
317 | 482 | delete dec; |
318 | 482 | return false; |
319 | 59.5k | }; |
320 | | |
321 | | // qCDebug(KCODECS_LOG) << "mimeName(): \"" << textCodec->name() << "\""; |
322 | | |
323 | | // allocate a temporary buffer to store the 8bit text: |
324 | 59.5k | int encodedTextLength = encodedTextEnd - encodedTextStart; |
325 | 59.5k | QByteArray buffer; |
326 | 59.5k | buffer.resize(codec->maxDecodedSizeFor(encodedTextLength)); |
327 | 59.5k | char *bbegin = buffer.data(); |
328 | 59.5k | char *bend = bbegin + buffer.length(); |
329 | | |
330 | | // |
331 | | // STEP 5: |
332 | | // do the actual decoding |
333 | | // |
334 | | |
335 | 59.5k | if (!dec->decode(encodedTextStart, encodedTextEnd, bbegin, bend)) { |
336 | 6.71k | qWarning() << codec->name() << "codec lies about its maxDecodedSizeFor(" << encodedTextLength << ")\nresult may be truncated"; |
337 | 6.71k | } |
338 | | |
339 | 59.5k | *result = textCodec.decode(QByteArrayView(buffer.data(), bbegin - buffer.data())); |
340 | | |
341 | | // qCDebug(KCODECS_LOG) << "result now: \"" << result << "\""; |
342 | | // cleanup: |
343 | 59.5k | delete dec; |
344 | | |
345 | 59.5k | return true; |
346 | 60.0k | } |
347 | | |
348 | | } // namespace KCodecs |
349 | | |
350 | | //@endcond |
351 | | |
352 | | QString KCodecs::decodeRFC2047String(QStringView msg) |
353 | 0 | { |
354 | 0 | QByteArray usedCS; |
355 | 0 | return decodeRFC2047String(msg.toUtf8(), &usedCS, CodecNames::utf8(), NoOption); |
356 | 0 | } |
357 | | |
358 | | QString KCodecs::decodeRFC2047String(QByteArrayView src, QByteArray *usedCS, const QByteArray &defaultCS, CharsetOption charsetOption) |
359 | 1.69M | { |
360 | 1.69M | QByteArray result; |
361 | 1.69M | QByteArray spaceBuffer; |
362 | 1.69M | const char *scursor = src.constData(); |
363 | 1.69M | const char *send = scursor + src.length(); |
364 | 1.69M | bool onlySpacesSinceLastWord = false; |
365 | 1.69M | if (usedCS) { |
366 | 1.69M | usedCS->clear(); |
367 | 1.69M | } |
368 | | |
369 | 1.69M | const char *retryPoint = nullptr; |
370 | 117M | while (scursor != send) { |
371 | | // space |
372 | 115M | if (isspace(*scursor) && onlySpacesSinceLastWord) { |
373 | 23.4k | spaceBuffer += *scursor++; |
374 | 23.4k | continue; |
375 | 23.4k | } |
376 | | |
377 | | // possible start of an encoded word |
378 | 115M | if (*scursor == '=' && (!retryPoint || retryPoint <= scursor)) { |
379 | 263k | QString decoded; |
380 | 263k | ++scursor; |
381 | 263k | const char *start = scursor; |
382 | 263k | if (parseEncodedWord(scursor, send, &decoded, usedCS, defaultCS, charsetOption, retryPoint)) { |
383 | 59.5k | result += decoded.toUtf8(); |
384 | 59.5k | onlySpacesSinceLastWord = true; |
385 | 59.5k | spaceBuffer.clear(); |
386 | 204k | } else { |
387 | 204k | if (onlySpacesSinceLastWord) { |
388 | 4.44k | result += spaceBuffer; |
389 | 4.44k | onlySpacesSinceLastWord = false; |
390 | 4.44k | } |
391 | 204k | result += '='; |
392 | 204k | scursor = start; // reset cursor after parsing failure |
393 | 204k | } |
394 | 263k | continue; |
395 | 115M | } else { |
396 | | // unencoded data |
397 | 115M | if (onlySpacesSinceLastWord) { |
398 | 36.7k | result += spaceBuffer; |
399 | 36.7k | onlySpacesSinceLastWord = false; |
400 | 36.7k | } |
401 | 115M | result += *scursor; |
402 | 115M | ++scursor; |
403 | 115M | } |
404 | 115M | } |
405 | | // If there are any chars that couldn't be decoded in UTF-8, |
406 | | // fallback to local codec |
407 | 1.69M | const QString tryUtf8 = QString::fromUtf8(result); |
408 | 1.69M | if (tryUtf8.contains(QChar(0xFFFD))) { |
409 | 574k | QStringDecoder codec(QStringDecoder::System); |
410 | 574k | if (usedCS) { |
411 | 574k | *usedCS = updateEncodingCharset(*usedCS, cachedCharset(QByteArrayView(codec.name()))); |
412 | 574k | } |
413 | 574k | return codec.decode(result); |
414 | 1.12M | } else { |
415 | 1.12M | return tryUtf8; |
416 | 1.12M | } |
417 | 1.69M | } |
418 | | |
419 | | QByteArray KCodecs::encodeRFC2047String(QStringView src, const QByteArray &charset) |
420 | 3.31M | { |
421 | 3.31M | return KCodecs::encodeRFC2047String(src, charset, RFC2047EncodingOption::NoOption); |
422 | 3.31M | } |
423 | | |
424 | | static constexpr const char reservedCharacters[] = "\"()<>@,.;:\\[]="; |
425 | | |
426 | | QByteArray KCodecs::encodeRFC2047String(QStringView src, QByteArrayView charset, KCodecs::RFC2047EncodingOption option) |
427 | 3.31M | { |
428 | 3.31M | QByteArray result; |
429 | 3.31M | int start = 0; |
430 | 3.31M | int end = 0; |
431 | 3.31M | bool nonAscii = false; |
432 | 3.31M | bool useQEncoding = false; |
433 | | |
434 | 3.31M | QStringEncoder codec(charset.constData()); |
435 | | |
436 | 3.31M | QByteArrayView usedCS; |
437 | 3.31M | if (!codec.isValid()) { |
438 | | // no codec available => try local8Bit and hope the best ;-) |
439 | 0 | codec = QStringEncoder(QStringEncoder::System); |
440 | 0 | usedCS = codec.name(); |
441 | 3.31M | } else { |
442 | 3.31M | Q_ASSERT(codec.isValid()); |
443 | 3.31M | if (charset.isEmpty()) { |
444 | 0 | usedCS = codec.name(); |
445 | 3.31M | } else { |
446 | 3.31M | usedCS = charset; |
447 | 3.31M | } |
448 | 3.31M | } |
449 | | |
450 | 3.31M | const QByteArray encoded8Bit = [&] { // encoded8Bit must be const to not detach below! |
451 | 3.31M | QByteArray encoded8Bit = codec.encode(src); |
452 | 3.31M | if (codec.hasError()) { |
453 | 706 | usedCS = CodecNames::utf8(); |
454 | 706 | codec = QStringEncoder(QStringEncoder::Utf8); |
455 | 706 | encoded8Bit = codec.encode(src); |
456 | 706 | } |
457 | 3.31M | return encoded8Bit; |
458 | 3.31M | }(); |
459 | | |
460 | 3.31M | if (usedCS.contains("8859-")) { // use "B"-Encoding for non iso-8859-x charsets |
461 | 4.27k | useQEncoding = true; |
462 | 4.27k | } |
463 | | |
464 | 3.31M | uint encoded8BitLength = encoded8Bit.length(); |
465 | 11.5M | for (unsigned int i = 0; i < encoded8BitLength; i++) { |
466 | 11.2M | if (encoded8Bit[i] == ' ') { // encoding starts at word boundaries |
467 | 205k | start = i + 1; |
468 | 205k | } |
469 | | |
470 | | // encode escape character, for japanese encodings... |
471 | 11.2M | if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') |
472 | 8.23M | || (option == RFC2047EncodingOption::EncodeReservedCharcters && (std::strchr(reservedCharacters, encoded8Bit[i]) != nullptr))) { |
473 | 2.99M | end = start; // non us-ascii char found, now we determine where to stop encoding |
474 | 2.99M | nonAscii = true; |
475 | 2.99M | break; |
476 | 2.99M | } |
477 | 11.2M | } |
478 | | |
479 | 3.31M | if (nonAscii) { |
480 | 26.2M | while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { |
481 | | // we encode complete words |
482 | 23.2M | end++; |
483 | 23.2M | } |
484 | | |
485 | 12.2M | for (int x = end; x < encoded8Bit.length(); x++) { |
486 | 9.21M | if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') |
487 | 8.98M | || (option == RFC2047EncodingOption::EncodeReservedCharcters && (std::strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) { |
488 | 318k | end = x; // we found another non-ascii word |
489 | | |
490 | 122M | while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { |
491 | | // we encode complete words |
492 | 121M | end++; |
493 | 121M | } |
494 | 318k | x = end; |
495 | 318k | } |
496 | 9.21M | } |
497 | | |
498 | 2.99M | result = QByteArrayView(encoded8Bit).left(start) + "=?" + usedCS; |
499 | | |
500 | 2.99M | if (useQEncoding) { |
501 | 1.14k | result += "?Q?"; |
502 | | |
503 | 1.14k | char hexcode; // "Q"-encoding implementation described in RFC 2047 |
504 | 28.3k | for (int i = start; i < end; i++) { |
505 | 27.2k | const char c = encoded8Bit[i]; |
506 | 27.2k | if (c == ' ') { // make the result readable with not MIME-capable readers |
507 | 13 | result += '_'; |
508 | 27.2k | } else { |
509 | 27.2k | if (((c >= 'a') && (c <= 'z')) || // paranoid mode, encode *all* special chars to avoid problems |
510 | 21.2k | ((c >= 'A') && (c <= 'Z')) || // with "From" & "To" headers |
511 | 20.2k | ((c >= '0') && (c <= '9'))) { |
512 | 20.2k | result += c; |
513 | 20.2k | } else { |
514 | 6.95k | result += '='; // "stolen" from KMail ;-) |
515 | 6.95k | hexcode = ((c & 0xF0) >> 4) + 48; |
516 | 6.95k | if (hexcode >= 58) { |
517 | 1.60k | hexcode += 7; |
518 | 1.60k | } |
519 | 6.95k | result += hexcode; |
520 | 6.95k | hexcode = (c & 0x0F) + 48; |
521 | 6.95k | if (hexcode >= 58) { |
522 | 5.28k | hexcode += 7; |
523 | 5.28k | } |
524 | 6.95k | result += hexcode; |
525 | 6.95k | } |
526 | 27.2k | } |
527 | 27.2k | } |
528 | 2.99M | } else { |
529 | 2.99M | result += "?B?" + encoded8Bit.mid(start, end - start).toBase64(); |
530 | 2.99M | } |
531 | | |
532 | 2.99M | result += "?="; |
533 | 2.99M | result += QByteArrayView(encoded8Bit).right(encoded8Bit.length() - end); |
534 | 2.99M | } else { |
535 | 318k | result = encoded8Bit; |
536 | 318k | } |
537 | | |
538 | 3.31M | return result; |
539 | 3.31M | } |
540 | | |
541 | | /******************************************************************************/ |
542 | | /* KCodecs::Codec */ |
543 | | |
544 | | KCodecs::Codec *KCodecs::Codec::codecForName(QByteArrayView name) |
545 | 347k | { |
546 | 347k | struct CodecEntry { |
547 | 347k | const char *name; |
548 | 347k | std::unique_ptr<KCodecs::Codec> codec; |
549 | 347k | }; |
550 | 347k | static const std::array<CodecEntry, 6> s_codecs{{ |
551 | 347k | {"b", std::make_unique<KCodecs::Rfc2047BEncodingCodec>()}, |
552 | 347k | {"base64", std::make_unique<KCodecs::Base64Codec>()}, |
553 | 347k | {"q", std::make_unique<KCodecs::Rfc2047QEncodingCodec>()}, |
554 | 347k | {"quoted-printable", std::make_unique<KCodecs::QuotedPrintableCodec>()}, |
555 | 347k | {"x-kmime-rfc2231", std::make_unique<KCodecs::Rfc2231EncodingCodec>()}, |
556 | 347k | {"x-uuencode", std::make_unique<KCodecs::UUCodec>()}, |
557 | 347k | }}; |
558 | | |
559 | 964k | for (auto &entry : s_codecs) { |
560 | 964k | if (name.compare(entry.name, Qt::CaseInsensitive) == 0) { |
561 | 323k | return entry.codec.get(); |
562 | 323k | } |
563 | 964k | } |
564 | 347k | qWarning() << "Unknown codec" << name << "requested!"; |
565 | 24.8k | return nullptr; |
566 | 347k | } |
567 | | |
568 | | bool KCodecs::Codec::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend, NewlineType newline) const |
569 | 97.7k | { |
570 | | // get an encoder: |
571 | 97.7k | std::unique_ptr<Encoder> enc(makeEncoder(newline)); |
572 | 97.7k | if (!enc) { |
573 | 0 | qWarning() << "makeEncoder failed for" << name(); |
574 | 0 | return false; |
575 | 0 | } |
576 | | |
577 | | // encode and check for output buffer overflow: |
578 | 97.7k | while (!enc->encode(scursor, send, dcursor, dend)) { |
579 | 0 | if (dcursor == dend) { |
580 | 0 | return false; // not enough space in output buffer |
581 | 0 | } |
582 | 0 | } |
583 | | |
584 | | // finish and check for output buffer overflow: |
585 | 97.7k | while (!enc->finish(dcursor, dend)) { |
586 | 0 | if (dcursor == dend) { |
587 | 0 | return false; // not enough space in output buffer |
588 | 0 | } |
589 | 0 | } |
590 | | |
591 | 97.7k | return true; // successfully encoded. |
592 | 97.7k | } |
593 | | |
594 | | QByteArray KCodecs::Codec::encode(QByteArrayView src, NewlineType newline) const |
595 | 97.7k | { |
596 | | // allocate buffer for the worst case: |
597 | 97.7k | QByteArray result; |
598 | 97.7k | result.resize(maxEncodedSizeFor(src.size(), newline)); |
599 | | |
600 | | // set up iterators: |
601 | 97.7k | QByteArray::ConstIterator iit = src.begin(); |
602 | 97.7k | QByteArray::ConstIterator iend = src.end(); |
603 | 97.7k | QByteArray::Iterator oit = result.begin(); |
604 | 97.7k | QByteArray::ConstIterator oend = result.end(); |
605 | | |
606 | | // encode |
607 | 97.7k | if (!encode(iit, iend, oit, oend, newline)) { |
608 | 0 | qCritical() << name() << "codec lies about it's mEncodedSizeFor()"; |
609 | 0 | } |
610 | | |
611 | | // shrink result to actual size: |
612 | 97.7k | result.truncate(oit - result.begin()); |
613 | | |
614 | 97.7k | return result; |
615 | 97.7k | } |
616 | | |
617 | | QByteArray KCodecs::Codec::decode(QByteArrayView src, NewlineType newline) const |
618 | 30.2k | { |
619 | | // allocate buffer for the worst case: |
620 | 30.2k | QByteArray result; |
621 | 30.2k | result.resize(maxDecodedSizeFor(src.size(), newline)); |
622 | | |
623 | | // set up iterators: |
624 | 30.2k | QByteArray::ConstIterator iit = src.begin(); |
625 | 30.2k | QByteArray::ConstIterator iend = src.end(); |
626 | 30.2k | QByteArray::Iterator oit = result.begin(); |
627 | 30.2k | QByteArray::ConstIterator oend = result.end(); |
628 | | |
629 | | // decode |
630 | 30.2k | if (!decode(iit, iend, oit, oend, newline)) { |
631 | 0 | qCritical() << name() << "codec lies about it's maxDecodedSizeFor()"; |
632 | 0 | } |
633 | | |
634 | | // shrink result to actual size: |
635 | 30.2k | result.truncate(oit - result.begin()); |
636 | | |
637 | 30.2k | return result; |
638 | 30.2k | } |
639 | | |
640 | | bool KCodecs::Codec::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend, NewlineType newline) const |
641 | 30.2k | { |
642 | | // get a decoder: |
643 | 30.2k | std::unique_ptr<Decoder> dec(makeDecoder(newline)); |
644 | 30.2k | assert(dec); |
645 | | |
646 | | // decode and check for output buffer overflow: |
647 | 31.1k | while (!dec->decode(scursor, send, dcursor, dend)) { |
648 | 917 | if (dcursor == dend) { |
649 | 0 | return false; // not enough space in output buffer |
650 | 0 | } |
651 | 917 | } |
652 | | |
653 | | // finish and check for output buffer overflow: |
654 | 30.2k | while (!dec->finish(dcursor, dend)) { |
655 | 0 | if (dcursor == dend) { |
656 | 0 | return false; // not enough space in output buffer |
657 | 0 | } |
658 | 0 | } |
659 | | |
660 | 30.2k | return true; // successfully encoded. |
661 | 30.2k | } |
662 | | |
663 | | /******************************************************************************/ |
664 | | /* KCodecs::Encoder */ |
665 | | |
666 | | KCodecs::EncoderPrivate::EncoderPrivate(Codec::NewlineType newline) |
667 | 97.7k | : outputBufferCursor(0) |
668 | 97.7k | , newline(newline) |
669 | 97.7k | { |
670 | 97.7k | } |
671 | | |
672 | | KCodecs::Encoder::Encoder(Codec::NewlineType newline) |
673 | 97.7k | : d(new KCodecs::EncoderPrivate(newline)) |
674 | 97.7k | { |
675 | 97.7k | } |
676 | | |
677 | 97.7k | KCodecs::Encoder::~Encoder() = default; |
678 | | |
679 | | bool KCodecs::Encoder::write(char ch, char *&dcursor, const char *const dend) |
680 | 2.68M | { |
681 | 2.68M | if (dcursor != dend) { |
682 | | // if there's space in the output stream, write there: |
683 | 2.68M | *dcursor++ = ch; |
684 | 2.68M | return true; |
685 | 2.68M | } else { |
686 | | // else buffer the output: |
687 | 0 | if (d->outputBufferCursor >= maxBufferedChars) { |
688 | 0 | qCritical() << "KCodecs::Encoder: internal buffer overflow!"; |
689 | 0 | } else { |
690 | 0 | d->outputBuffer[d->outputBufferCursor++] = ch; |
691 | 0 | } |
692 | 0 | return false; |
693 | 0 | } |
694 | 2.68M | } |
695 | | |
696 | | // write as much as possible off the output buffer. Return true if |
697 | | // flushing was complete, false if some chars could not be flushed. |
698 | | bool KCodecs::Encoder::flushOutputBuffer(char *&dcursor, const char *const dend) |
699 | 97.7k | { |
700 | 97.7k | int i; |
701 | | // copy output buffer to output stream: |
702 | 97.7k | for (i = 0; dcursor != dend && i < d->outputBufferCursor; ++i) { |
703 | 0 | *dcursor++ = d->outputBuffer[i]; |
704 | 0 | } |
705 | | |
706 | | // calculate the number of missing chars: |
707 | 97.7k | int numCharsLeft = d->outputBufferCursor - i; |
708 | | // push the remaining chars to the beginning of the buffer: |
709 | 97.7k | if (numCharsLeft) { |
710 | 0 | ::memmove(d->outputBuffer, d->outputBuffer + i, numCharsLeft); |
711 | 0 | } |
712 | | // adjust cursor: |
713 | 97.7k | d->outputBufferCursor = numCharsLeft; |
714 | | |
715 | 97.7k | return !numCharsLeft; |
716 | 97.7k | } |
717 | | |
718 | | bool KCodecs::Encoder::writeCRLF(char *&dcursor, const char *const dend) |
719 | 129k | { |
720 | 129k | if (d->newline == Codec::NewlineCRLF) { |
721 | 0 | write('\r', dcursor, dend); |
722 | 0 | } |
723 | 129k | return write('\n', dcursor, dend); |
724 | 129k | } |
725 | | |
726 | | /******************************************************************************/ |
727 | | /* KCodecs::Decoder */ |
728 | | |
729 | | KCodecs::DecoderPrivate::DecoderPrivate(Codec::NewlineType newline) |
730 | 230k | : newline(newline) |
731 | 230k | { |
732 | 230k | } |
733 | | |
734 | | KCodecs::Decoder::Decoder(Codec::NewlineType newline) |
735 | 230k | : d(new KCodecs::DecoderPrivate(newline)) |
736 | 230k | { |
737 | 230k | } |
738 | | |
739 | 230k | KCodecs::Decoder::~Decoder() = default; |