/src/kcodecs/src/kcodecsqp.cpp
Line | Count | Source |
1 | | /* -*- c++ -*- |
2 | | SPDX-FileCopyrightText: 2002 Marc Mutz <mutz@kde.org> |
3 | | |
4 | | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | | */ |
6 | | |
7 | | #include "kcodecsqp.h" |
8 | | #include "kcodecs_p.h" |
9 | | |
10 | | #include <QDebug> |
11 | | |
12 | | #include <cassert> |
13 | | |
14 | | using namespace KCodecs; |
15 | | |
16 | | namespace KCodecs |
17 | | { |
18 | | // none except a-zA-Z0-9!*+-/ |
19 | | const uchar eTextMap[16] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x35, 0xFF, 0xC0, 0x7F, 0xFF, 0xFF, 0xE0, 0x7F, 0xFF, 0xFF, 0xE0}; |
20 | | |
21 | | // some helpful functions: |
22 | | |
23 | | /** |
24 | | Converts a 4-bit @p value into its hexadecimal characater representation. |
25 | | So input of value [0,15] returns ['0','1',... 'F']. Input values |
26 | | greater than 15 will produce undesired results. |
27 | | @param value is an unsigned character containing the 4-bit input value. |
28 | | */ |
29 | | static inline char binToHex(uchar value) |
30 | 0 | { |
31 | 0 | if (value > 9) { |
32 | 0 | return value + 'A' - 10; |
33 | 0 | } else { |
34 | 0 | return value + '0'; |
35 | 0 | } |
36 | 0 | } |
37 | | |
38 | | /** |
39 | | Returns the high-order 4 bits of an 8-bit value in another 8-bit value. |
40 | | @param ch is an unsigned character containing the 8-bit input value. |
41 | | */ |
42 | | static inline uchar highNibble(uchar ch) |
43 | 0 | { |
44 | 0 | return ch >> 4; |
45 | 0 | } |
46 | | |
47 | | /** |
48 | | Returns the low-order 4 bits of an 8-bit value in another 8-bit value. |
49 | | @param ch is an unsigned character containing the 8-bit input value. |
50 | | */ |
51 | | static inline uchar lowNibble(uchar ch) |
52 | 0 | { |
53 | 0 | return ch & 0xF; |
54 | 0 | } |
55 | | |
56 | | // |
57 | | // QuotedPrintableCodec |
58 | | // |
59 | | |
60 | | class QuotedPrintableEncoder : public Encoder |
61 | | { |
62 | | char mInputBuffer[16]; |
63 | | uchar mCurrentLineLength; // 0..76 |
64 | | uchar mAccu; |
65 | | uint mInputBufferReadCursor : 4; // 0..15 |
66 | | uint mInputBufferWriteCursor : 4; // 0..15 |
67 | | enum { |
68 | | Never, |
69 | | AtBOL, |
70 | | Definitely, |
71 | | } mAccuNeedsEncoding; |
72 | | bool mSawLineEnd : 1; |
73 | | bool mSawCR : 1; |
74 | | bool mFinishing : 1; |
75 | | bool mFinished : 1; |
76 | | |
77 | | protected: |
78 | | friend class QuotedPrintableCodec; |
79 | | QuotedPrintableEncoder(Codec::NewlineType newline = Codec::NewlineLF) |
80 | 0 | : Encoder(newline) |
81 | 0 | , mCurrentLineLength(0) |
82 | 0 | , mAccu(0) |
83 | 0 | , mInputBufferReadCursor(0) |
84 | 0 | , mInputBufferWriteCursor(0) |
85 | 0 | , mAccuNeedsEncoding(Never) |
86 | 0 | , mSawLineEnd(false) |
87 | 0 | , mSawCR(false) |
88 | 0 | , mFinishing(false) |
89 | 0 | , mFinished(false) |
90 | 0 | { |
91 | 0 | } |
92 | | |
93 | | bool needsEncoding(uchar ch) |
94 | 0 | { |
95 | 0 | return ch > '~' || (ch < ' ' && ch != '\t') || ch == '='; |
96 | 0 | } |
97 | | bool needsEncodingAtEOL(uchar ch) |
98 | 0 | { |
99 | 0 | return ch == ' ' || ch == '\t'; |
100 | 0 | } |
101 | | bool needsEncodingAtBOL(uchar ch) |
102 | 0 | { |
103 | 0 | return ch == 'F' || ch == '.' || ch == '-'; |
104 | 0 | } |
105 | | bool fillInputBuffer(const char *&scursor, const char *const send); |
106 | | bool processNextChar(); |
107 | | void createOutputBuffer(char *&dcursor, const char *const dend); |
108 | | |
109 | | public: |
110 | | ~QuotedPrintableEncoder() override |
111 | 0 | { |
112 | 0 | } |
113 | | |
114 | | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
115 | | |
116 | | bool finish(char *&dcursor, const char *const dend) override; |
117 | | }; |
118 | | |
119 | | class QuotedPrintableDecoder : public Decoder |
120 | | { |
121 | | const char mEscapeChar; |
122 | | char mBadChar; |
123 | | /** @p accu holds the msb nibble of the hexchar or zero. */ |
124 | | uchar mAccu; |
125 | | /** @p insideHexChar is true iff we're inside an hexchar (=XY). |
126 | | Together with @ref mAccu, we can build this states: |
127 | | @li @p insideHexChar == @p false: |
128 | | normal text |
129 | | @li @p insideHexChar == @p true, @p mAccu == 0: |
130 | | saw the leading '=' |
131 | | @li @p insideHexChar == @p true, @p mAccu != 0: |
132 | | saw the first nibble '=X' |
133 | | */ |
134 | | const bool mQEncoding; |
135 | | bool mInsideHexChar; |
136 | | bool mFlushing; |
137 | | bool mExpectLF; |
138 | | bool mHaveAccu; |
139 | | /** @p mLastChar holds the first char of an encoded char, so that |
140 | | we are able to keep the first char if the second char is invalid. */ |
141 | | char mLastChar; |
142 | | |
143 | | protected: |
144 | | friend class QuotedPrintableCodec; |
145 | | friend class Rfc2047QEncodingCodec; |
146 | | friend class Rfc2231EncodingCodec; |
147 | | QuotedPrintableDecoder(Codec::NewlineType newline = Codec::NewlineLF, bool aQEncoding = false, char aEscapeChar = '=') |
148 | 89.9k | : Decoder(newline) |
149 | 89.9k | , mEscapeChar(aEscapeChar) |
150 | 89.9k | , mBadChar(0) |
151 | 89.9k | , mAccu(0) |
152 | 89.9k | , mQEncoding(aQEncoding) |
153 | 89.9k | , mInsideHexChar(false) |
154 | 89.9k | , mFlushing(false) |
155 | 89.9k | , mExpectLF(false) |
156 | 89.9k | , mHaveAccu(false) |
157 | 89.9k | , mLastChar(0) |
158 | 89.9k | { |
159 | 89.9k | } |
160 | | |
161 | | public: |
162 | | ~QuotedPrintableDecoder() override |
163 | 0 | { |
164 | 0 | } |
165 | | |
166 | | bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
167 | | bool finish(char *&dcursor, const char *const dend) override; |
168 | | }; |
169 | | |
170 | | class Rfc2047QEncodingEncoder : public Encoder |
171 | | { |
172 | | uchar mAccu; |
173 | | uchar mStepNo; |
174 | | const char mEscapeChar; |
175 | | bool mInsideFinishing : 1; |
176 | | |
177 | | protected: |
178 | | friend class Rfc2047QEncodingCodec; |
179 | | friend class Rfc2231EncodingCodec; |
180 | | Rfc2047QEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF, char aEscapeChar = '=') |
181 | 0 | : Encoder(newline) |
182 | 0 | , mAccu(0) |
183 | 0 | , mStepNo(0) |
184 | 0 | , mEscapeChar(aEscapeChar) |
185 | 0 | , mInsideFinishing(false) |
186 | 0 | { |
187 | | // else an optimization in ::encode might break. |
188 | 0 | assert(aEscapeChar == '=' || aEscapeChar == '%'); |
189 | 0 | } |
190 | | |
191 | | bool isEText(uchar ch) |
192 | 0 | { |
193 | 0 | return (ch < 128) && (eTextMap[ch / 8] & 0x80 >> ch % 8); |
194 | 0 | } |
195 | | |
196 | | // this code assumes that isEText( mEscapeChar ) == false! |
197 | | bool needsEncoding(uchar ch) |
198 | 0 | { |
199 | 0 | if (ch > 'z') { |
200 | 0 | return true; // {|}~ DEL and 8bit chars need |
201 | 0 | } |
202 | 0 | if (!isEText(ch)) { |
203 | 0 | return true; // all but a-zA-Z0-9!/*+- need, too |
204 | 0 | } |
205 | 0 | if (mEscapeChar == '%' && (ch == '*' || ch == '/')) { |
206 | 0 | return true; // not allowed in rfc2231 encoding |
207 | 0 | } |
208 | 0 | return false; |
209 | 0 | } |
210 | | |
211 | | public: |
212 | | ~Rfc2047QEncodingEncoder() override |
213 | 0 | { |
214 | 0 | } |
215 | | |
216 | | bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; |
217 | | bool finish(char *&dcursor, const char *const dend) override; |
218 | | }; |
219 | | |
220 | | // this doesn't access any member variables, so it can be defined static |
221 | | // but then we can't call it from virtual functions |
222 | | static qsizetype QuotedPrintableDecoder_maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) |
223 | 89.8k | { |
224 | | // all chars unencoded: |
225 | 89.8k | qsizetype result = insize; |
226 | | // but maybe all of them are \n and we need to make them \r\n :-o |
227 | 89.8k | if (newline == Codec::NewlineCRLF) { |
228 | 0 | result += insize; |
229 | 0 | } |
230 | | |
231 | | // there might be an accu plus escape |
232 | 89.8k | result += 2; |
233 | | |
234 | 89.8k | return result; |
235 | 89.8k | } |
236 | | |
237 | | Encoder *QuotedPrintableCodec::makeEncoder(Codec::NewlineType newline) const |
238 | 0 | { |
239 | 0 | return new QuotedPrintableEncoder(newline); |
240 | 0 | } |
241 | | |
242 | | Decoder *QuotedPrintableCodec::makeDecoder(Codec::NewlineType newline) const |
243 | 9.14k | { |
244 | 9.14k | return new QuotedPrintableDecoder(newline); |
245 | 9.14k | } |
246 | | |
247 | | qsizetype QuotedPrintableCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const |
248 | 9.14k | { |
249 | 9.14k | return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline); |
250 | 9.14k | } |
251 | | |
252 | | Encoder *Rfc2047QEncodingCodec::makeEncoder(Codec::NewlineType newline) const |
253 | 0 | { |
254 | 0 | return new Rfc2047QEncodingEncoder(newline); |
255 | 0 | } |
256 | | |
257 | | Decoder *Rfc2047QEncodingCodec::makeDecoder(Codec::NewlineType newline) const |
258 | 69.8k | { |
259 | 69.8k | return new QuotedPrintableDecoder(newline, true); |
260 | 69.8k | } |
261 | | |
262 | | qsizetype Rfc2047QEncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const |
263 | 69.7k | { |
264 | 69.7k | return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline); |
265 | 69.7k | } |
266 | | |
267 | | Encoder *Rfc2231EncodingCodec::makeEncoder(Codec::NewlineType newline) const |
268 | 0 | { |
269 | 0 | return new Rfc2047QEncodingEncoder(newline, '%'); |
270 | 0 | } |
271 | | |
272 | | Decoder *Rfc2231EncodingCodec::makeDecoder(Codec::NewlineType newline) const |
273 | 10.9k | { |
274 | 10.9k | return new QuotedPrintableDecoder(newline, true, '%'); |
275 | 10.9k | } |
276 | | |
277 | | qsizetype Rfc2231EncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const |
278 | 10.9k | { |
279 | 10.9k | return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline); |
280 | 10.9k | } |
281 | | |
282 | | /********************************************************/ |
283 | | /********************************************************/ |
284 | | /********************************************************/ |
285 | | |
286 | | bool QuotedPrintableDecoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
287 | 90.2k | { |
288 | 90.2k | if (d->newline == Codec::NewlineCRLF) { |
289 | 0 | qWarning() << "CRLF output for decoders isn't yet supported!"; |
290 | 0 | } |
291 | | |
292 | 63.5M | while (scursor != send && dcursor != dend) { |
293 | 63.4M | if (mFlushing) { |
294 | | // we have to flush chars in the aftermath of a decoding |
295 | | // error. The way to request a flush is to |
296 | | // - store the offending character in mBadChar and |
297 | | // - set mFlushing to true. |
298 | | // The supported cases are (H: hexchar, X: bad char): |
299 | | // =X, =HX, CR |
300 | | // mBadChar is only written out if it is not by itself illegal in |
301 | | // quoted-printable (e.g. CTLs, 8Bits). |
302 | | // A fast way to suppress mBadChar output is to set it to NUL. |
303 | 1.65M | if (mInsideHexChar) { |
304 | | // output '=' |
305 | 822k | *dcursor++ = mEscapeChar; |
306 | 822k | mInsideHexChar = false; |
307 | 836k | } else if (mHaveAccu) { |
308 | | // output the high nibble of the accumulator: |
309 | 13.2k | *dcursor++ = mLastChar; |
310 | 13.2k | mHaveAccu = false; |
311 | 13.2k | mAccu = 0; |
312 | 822k | } else { |
313 | | // output mBadChar |
314 | 822k | assert(mAccu == 0); |
315 | 822k | if (mBadChar) { |
316 | 820k | if (mBadChar == '=') { |
317 | 610k | mInsideHexChar = true; |
318 | 610k | } else { |
319 | 210k | *dcursor++ = mBadChar; |
320 | 210k | } |
321 | 820k | mBadChar = 0; |
322 | 820k | } |
323 | 822k | mFlushing = false; |
324 | 822k | } |
325 | 1.65M | continue; |
326 | 1.65M | } |
327 | 63.4M | assert(mBadChar == 0); |
328 | | |
329 | 61.8M | uchar ch = *scursor++; |
330 | | |
331 | 61.8M | if (mExpectLF && ch != '\n') { |
332 | | // qWarning() << "QuotedPrintableDecoder:" |
333 | | // "illegally formed soft linebreak or lonely CR!"; |
334 | 5.19k | mInsideHexChar = false; |
335 | 5.19k | mExpectLF = false; |
336 | 5.19k | if (mAccu != 0) { |
337 | 1.64k | return false; |
338 | 1.64k | } |
339 | 5.19k | } |
340 | | |
341 | 61.8M | if (mInsideHexChar) { |
342 | 1.05M | uchar value = 255; |
343 | | // next char(s) represent nibble instead of itself: |
344 | 1.05M | if (ch <= '9') { |
345 | 171k | if (ch >= '0') { |
346 | 126k | value = ch - '0'; |
347 | 126k | } else { |
348 | 44.6k | switch (ch) { |
349 | 2.24k | case '\r': |
350 | 2.24k | mExpectLF = true; |
351 | 2.24k | break; |
352 | 14.2k | case '\n': |
353 | | // soft line break, but only if mAccu is NUL. |
354 | 14.2k | if (!mHaveAccu) { |
355 | 6.43k | mExpectLF = false; |
356 | 6.43k | mInsideHexChar = false; |
357 | 6.43k | break; |
358 | 6.43k | } |
359 | | // else fall through |
360 | 35.9k | default: |
361 | | // qWarning() << "QuotedPrintableDecoder:" |
362 | | // "illegally formed hex char! Outputting verbatim."; |
363 | 35.9k | mBadChar = ch; |
364 | 35.9k | mFlushing = true; |
365 | 44.6k | } |
366 | 44.6k | continue; |
367 | 44.6k | } |
368 | 878k | } else { // ch > '9' |
369 | 878k | if (ch <= 'F') { |
370 | 736k | if (ch >= 'A') { |
371 | 81.7k | value = 10 + ch - 'A'; |
372 | 654k | } else { // [:-@] |
373 | 654k | mBadChar = ch; |
374 | 654k | mFlushing = true; |
375 | 654k | continue; |
376 | 654k | } |
377 | 736k | } else { // ch > 'F' |
378 | 142k | if (ch <= 'f' && ch >= 'a') { |
379 | 7.71k | value = 10 + ch - 'a'; |
380 | 134k | } else { |
381 | 134k | mBadChar = ch; |
382 | 134k | mFlushing = true; |
383 | 134k | continue; |
384 | 134k | } |
385 | 142k | } |
386 | 878k | } |
387 | | |
388 | 1.05M | assert(value < 16); |
389 | 216k | assert(mBadChar == 0); |
390 | 216k | assert(!mExpectLF); |
391 | | |
392 | 216k | if (mHaveAccu) { |
393 | 100k | *dcursor++ = char(mAccu | value); |
394 | 100k | mAccu = 0; |
395 | 100k | mHaveAccu = false; |
396 | 100k | mInsideHexChar = false; |
397 | 115k | } else { |
398 | 115k | mHaveAccu = true; |
399 | 115k | mAccu = value << 4; |
400 | 115k | mLastChar = ch; |
401 | 115k | } |
402 | 60.7M | } else { // not mInsideHexChar |
403 | 60.7M | if ((ch <= '~' && ch >= ' ') || ch == '\t') { |
404 | 24.7M | if (ch == mEscapeChar) { |
405 | 329k | mInsideHexChar = true; |
406 | 24.4M | } else if (mQEncoding && ch == '_') { |
407 | 160k | *dcursor++ = char(0x20); |
408 | 24.3M | } else { |
409 | 24.3M | *dcursor++ = char(ch); |
410 | 24.3M | } |
411 | 35.9M | } else if (ch == '\n') { |
412 | 158k | *dcursor++ = '\n'; |
413 | 158k | mExpectLF = false; |
414 | 35.8M | } else if (ch == '\r') { |
415 | 2.99k | mExpectLF = true; |
416 | 35.7M | } else { |
417 | | // qWarning() << "QuotedPrintableDecoder:" << ch << |
418 | | // "illegal character in input stream!"; |
419 | 35.7M | *dcursor++ = char(ch); |
420 | 35.7M | } |
421 | 60.7M | } |
422 | 61.8M | } |
423 | | |
424 | 88.5k | return scursor == send; |
425 | 90.2k | } |
426 | | |
427 | | bool QuotedPrintableDecoder::finish(char *&dcursor, const char *const dend) |
428 | 8.28k | { |
429 | 11.8k | while ((mInsideHexChar || mHaveAccu || mFlushing) && dcursor != dend) { |
430 | | // we have to flush chars |
431 | 3.60k | if (mInsideHexChar) { |
432 | | // output '=' |
433 | 1.35k | *dcursor++ = mEscapeChar; |
434 | 1.35k | mInsideHexChar = false; |
435 | 2.25k | } else if (mHaveAccu) { |
436 | | // output the high nibble of the accumulator: |
437 | 903 | *dcursor++ = mLastChar; |
438 | 903 | mHaveAccu = false; |
439 | 903 | mAccu = 0; |
440 | 1.35k | } else { |
441 | | // output mBadChar |
442 | 1.35k | assert(mAccu == 0); |
443 | 1.35k | if (mBadChar) { |
444 | 705 | *dcursor++ = mBadChar; |
445 | 705 | mBadChar = 0; |
446 | 705 | } |
447 | 1.35k | mFlushing = false; |
448 | 1.35k | } |
449 | 3.60k | } |
450 | | |
451 | | // return false if we are not finished yet; note that mInsideHexChar is always false |
452 | 8.28k | return !(mHaveAccu || mFlushing); |
453 | 8.28k | } |
454 | | |
455 | | bool QuotedPrintableEncoder::fillInputBuffer(const char *&scursor, const char *const send) |
456 | 0 | { |
457 | | // Don't read more if there's still a tail of a line in the buffer: |
458 | 0 | if (mSawLineEnd) { |
459 | 0 | return true; |
460 | 0 | } |
461 | | |
462 | | // Read until the buffer is full or we have found CRLF or LF (which |
463 | | // don't end up in the input buffer): |
464 | 0 | for (; (mInputBufferWriteCursor + 1) % 16 != mInputBufferReadCursor && scursor != send; mInputBufferWriteCursor++) { |
465 | 0 | char ch = *scursor++; |
466 | 0 | if (ch == '\r') { |
467 | 0 | mSawCR = true; |
468 | 0 | } else if (ch == '\n') { |
469 | | // remove the CR from the input buffer (if any) and return that |
470 | | // we found a line ending: |
471 | 0 | if (mSawCR) { |
472 | 0 | mSawCR = false; |
473 | 0 | assert(mInputBufferWriteCursor != mInputBufferReadCursor); |
474 | 0 | mInputBufferWriteCursor--; |
475 | 0 | } |
476 | 0 | mSawLineEnd = true; |
477 | 0 | return true; // saw CRLF or LF |
478 | 0 | } else { |
479 | 0 | mSawCR = false; |
480 | 0 | } |
481 | 0 | mInputBuffer[mInputBufferWriteCursor] = ch; |
482 | 0 | } |
483 | 0 | mSawLineEnd = false; |
484 | 0 | return false; // didn't see a line ending... |
485 | 0 | } |
486 | | |
487 | | bool QuotedPrintableEncoder::processNextChar() |
488 | 0 | { |
489 | | // If we process a buffer which doesn't end in a line break, we |
490 | | // can't process all of it, since the next chars that will be read |
491 | | // could be a line break. So we empty the buffer only until a fixed |
492 | | // number of chars is left (except when mFinishing, which means that |
493 | | // the data doesn't end in newline): |
494 | 0 | const int minBufferFillWithoutLineEnd = 4; |
495 | |
|
496 | 0 | assert(d->outputBufferCursor == 0); |
497 | | |
498 | 0 | int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor); |
499 | 0 | if (bufferFill < 0) { |
500 | 0 | bufferFill += 16; |
501 | 0 | } |
502 | |
|
503 | 0 | assert(bufferFill >= 0 && bufferFill <= 15); |
504 | | |
505 | 0 | if (!mFinishing // |
506 | 0 | && !mSawLineEnd // |
507 | 0 | && bufferFill < minBufferFillWithoutLineEnd) { |
508 | 0 | return false; |
509 | 0 | } |
510 | | |
511 | | // buffer is empty, return false: |
512 | 0 | if (mInputBufferReadCursor == mInputBufferWriteCursor) { |
513 | 0 | return false; |
514 | 0 | } |
515 | | |
516 | | // Real processing goes here: |
517 | 0 | mAccu = mInputBuffer[mInputBufferReadCursor++]; |
518 | 0 | if (needsEncoding(mAccu)) { // always needs encoding or |
519 | 0 | mAccuNeedsEncoding = Definitely; |
520 | 0 | } else if ((mSawLineEnd || mFinishing) // needs encoding at end of line |
521 | 0 | && bufferFill == 1 // or end of buffer |
522 | 0 | && needsEncodingAtEOL(mAccu)) { |
523 | 0 | mAccuNeedsEncoding = Definitely; |
524 | 0 | } else if (needsEncodingAtBOL(mAccu)) { |
525 | 0 | mAccuNeedsEncoding = AtBOL; |
526 | 0 | } else { |
527 | | // never needs encoding |
528 | 0 | mAccuNeedsEncoding = Never; |
529 | 0 | } |
530 | |
|
531 | 0 | return true; |
532 | 0 | } |
533 | | |
534 | | // Outputs processed (verbatim or hex-encoded) chars and inserts soft |
535 | | // line breaks as necessary. Depends on processNextChar's directions |
536 | | // on whether to encode the current char, and whether |
537 | | // the current char is the last one in it's input line: |
538 | | void QuotedPrintableEncoder::createOutputBuffer(char *&dcursor, const char *const dend) |
539 | 0 | { |
540 | 0 | const int maxLineLength = 76; // rfc 2045 |
541 | |
|
542 | 0 | assert(d->outputBufferCursor == 0); |
543 | | |
544 | | /* clang-format off */ |
545 | 0 | bool lastOneOnThisLine = mSawLineEnd |
546 | 0 | && mInputBufferReadCursor == mInputBufferWriteCursor; |
547 | | /* clang-format on */ |
548 | |
|
549 | 0 | int neededSpace = 1; |
550 | 0 | if (mAccuNeedsEncoding == Definitely) { |
551 | 0 | neededSpace = 3; |
552 | 0 | } |
553 | | |
554 | | // reserve space for the soft hyphen (=) |
555 | 0 | if (!lastOneOnThisLine) { |
556 | 0 | neededSpace++; |
557 | 0 | } |
558 | |
|
559 | 0 | if (mCurrentLineLength > maxLineLength - neededSpace) { |
560 | | // current line too short, insert soft line break: |
561 | 0 | write('=', dcursor, dend); |
562 | 0 | writeCRLF(dcursor, dend); |
563 | 0 | mCurrentLineLength = 0; |
564 | 0 | } |
565 | |
|
566 | 0 | if (Never == mAccuNeedsEncoding // |
567 | 0 | || (AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0)) { |
568 | 0 | write(mAccu, dcursor, dend); |
569 | 0 | mCurrentLineLength++; |
570 | 0 | } else { |
571 | 0 | write('=', dcursor, dend); |
572 | 0 | write(binToHex(highNibble(mAccu)), dcursor, dend); |
573 | 0 | write(binToHex(lowNibble(mAccu)), dcursor, dend); |
574 | 0 | mCurrentLineLength += 3; |
575 | 0 | } |
576 | 0 | } |
577 | | |
578 | | bool QuotedPrintableEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
579 | 0 | { |
580 | | // support probing by the caller: |
581 | 0 | if (mFinishing) { |
582 | 0 | return true; |
583 | 0 | } |
584 | | |
585 | 0 | while (scursor != send && dcursor != dend) { |
586 | 0 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
587 | 0 | return scursor == send; |
588 | 0 | } |
589 | | |
590 | 0 | assert(d->outputBufferCursor == 0); |
591 | | |
592 | | // fill input buffer until eol has been reached or until the |
593 | | // buffer is full, whatever comes first: |
594 | 0 | fillInputBuffer(scursor, send); |
595 | |
|
596 | 0 | if (processNextChar()) { |
597 | | // there was one... |
598 | 0 | createOutputBuffer(dcursor, dend); |
599 | 0 | } else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) { |
600 | | // load a hard line break into output buffer: |
601 | 0 | writeCRLF(dcursor, dend); |
602 | | // signal fillInputBuffer() we are ready for the next line: |
603 | 0 | mSawLineEnd = false; |
604 | 0 | mCurrentLineLength = 0; |
605 | 0 | } else { |
606 | | // we are supposedly finished with this input block: |
607 | 0 | break; |
608 | 0 | } |
609 | 0 | } |
610 | | |
611 | | // make sure we write as much as possible and don't stop _writing_ |
612 | | // just because we have no more _input_: |
613 | 0 | if (d->outputBufferCursor) { |
614 | 0 | flushOutputBuffer(dcursor, dend); |
615 | 0 | } |
616 | |
|
617 | 0 | return scursor == send; |
618 | |
|
619 | 0 | } // encode |
620 | | |
621 | | bool QuotedPrintableEncoder::finish(char *&dcursor, const char *const dend) |
622 | 0 | { |
623 | 0 | mFinishing = true; |
624 | |
|
625 | 0 | if (mFinished) { |
626 | 0 | return flushOutputBuffer(dcursor, dend); |
627 | 0 | } |
628 | | |
629 | 0 | while (dcursor != dend) { |
630 | 0 | if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { |
631 | 0 | return false; |
632 | 0 | } |
633 | | |
634 | 0 | assert(d->outputBufferCursor == 0); |
635 | | |
636 | 0 | if (processNextChar()) { |
637 | | // there was one... |
638 | 0 | createOutputBuffer(dcursor, dend); |
639 | 0 | } else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) { |
640 | | // load a hard line break into output buffer: |
641 | 0 | writeCRLF(dcursor, dend); |
642 | 0 | mSawLineEnd = false; |
643 | 0 | mCurrentLineLength = 0; |
644 | 0 | } else { |
645 | 0 | mFinished = true; |
646 | 0 | return flushOutputBuffer(dcursor, dend); |
647 | 0 | } |
648 | 0 | } |
649 | | |
650 | 0 | return mFinished && !d->outputBufferCursor; |
651 | |
|
652 | 0 | } // finish |
653 | | |
654 | | bool Rfc2047QEncodingEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) |
655 | 0 | { |
656 | 0 | if (mInsideFinishing) { |
657 | 0 | return true; |
658 | 0 | } |
659 | | |
660 | 0 | while (scursor != send && dcursor != dend) { |
661 | 0 | uchar value = 0; |
662 | 0 | switch (mStepNo) { |
663 | 0 | case 0: |
664 | | // read the next char and decide if and how do encode: |
665 | 0 | mAccu = *scursor++; |
666 | 0 | if (!needsEncoding(mAccu)) { |
667 | 0 | *dcursor++ = char(mAccu); |
668 | 0 | } else if (mEscapeChar == '=' && mAccu == 0x20) { |
669 | | // shortcut encoding for 0x20 (latin-1/us-ascii SPACE) |
670 | | // (not for rfc2231 encoding) |
671 | 0 | *dcursor++ = '_'; |
672 | 0 | } else { |
673 | | // needs =XY encoding - write escape char: |
674 | 0 | *dcursor++ = mEscapeChar; |
675 | 0 | mStepNo = 1; |
676 | 0 | } |
677 | 0 | continue; |
678 | 0 | case 1: |
679 | | // extract hi-nibble: |
680 | 0 | value = highNibble(mAccu); |
681 | 0 | mStepNo = 2; |
682 | 0 | break; |
683 | 0 | case 2: |
684 | | // extract lo-nibble: |
685 | 0 | value = lowNibble(mAccu); |
686 | 0 | mStepNo = 0; |
687 | 0 | break; |
688 | 0 | default: |
689 | 0 | assert(0); |
690 | 0 | } |
691 | | |
692 | | // and write: |
693 | 0 | *dcursor++ = binToHex(value); |
694 | 0 | } |
695 | | |
696 | 0 | return scursor == send; |
697 | 0 | } // encode |
698 | | |
699 | | bool Rfc2047QEncodingEncoder::finish(char *&dcursor, const char *const dend) |
700 | 0 | { |
701 | 0 | mInsideFinishing = true; |
702 | | |
703 | | // write the last bits of mAccu, if any: |
704 | 0 | while (mStepNo != 0 && dcursor != dend) { |
705 | 0 | uchar value = 0; |
706 | 0 | switch (mStepNo) { |
707 | 0 | case 1: |
708 | | // extract hi-nibble: |
709 | 0 | value = highNibble(mAccu); |
710 | 0 | mStepNo = 2; |
711 | 0 | break; |
712 | 0 | case 2: |
713 | | // extract lo-nibble: |
714 | 0 | value = lowNibble(mAccu); |
715 | 0 | mStepNo = 0; |
716 | 0 | break; |
717 | 0 | default: |
718 | 0 | assert(0); |
719 | 0 | } |
720 | | |
721 | | // and write: |
722 | 0 | *dcursor++ = binToHex(value); |
723 | 0 | } |
724 | | |
725 | 0 | return mStepNo == 0; |
726 | 0 | } |
727 | | |
728 | | } // namespace KCodecs |