/src/kcodecs/src/kcodecsqp.cpp

Source
/*  -*- c++ -*-
    SPDX-FileCopyrightText: 2002 Marc Mutz <mutz@kde.org>

    SPDX-License-Identifier: LGPL-2.0-or-later
*/

#include "kcodecsqp.h"
#include "kcodecs_p.h"

#include <QDebug>

#include <cassert>

using namespace KCodecs;

namespace KCodecs
{
// none except a-zA-Z0-9!*+-/
const uchar eTextMap[16] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x35, 0xFF, 0xC0, 0x7F, 0xFF, 0xFF, 0xE0, 0x7F, 0xFF, 0xFF, 0xE0};

// some helpful functions:

/**
  Converts a 4-bit @p value into its hexadecimal characater representation.
  So input of value [0,15] returns ['0','1',... 'F'].  Input values
  greater than 15 will produce undesired results.
  @param value is an unsigned character containing the 4-bit input value.
*/
static inline char binToHex(uchar value)
{
    if (value > 9) {
        return value + 'A' - 10;
    } else {
        return value + '0';
    }
}

/**
  Returns the high-order 4 bits of an 8-bit value in another 8-bit value.
  @param ch is an unsigned character containing the 8-bit input value.
*/
static inline uchar highNibble(uchar ch)
{
    return ch >> 4;
}

/**
  Returns the low-order 4 bits of an 8-bit value in another 8-bit value.
  @param ch is an unsigned character containing the 8-bit input value.
*/
static inline uchar lowNibble(uchar ch)
{
    return ch & 0xF;
}

//
// QuotedPrintableCodec
//

class QuotedPrintableEncoder : public Encoder
{
    char mInputBuffer[16];
    uchar mCurrentLineLength; // 0..76
    uchar mAccu;
    uint mInputBufferReadCursor : 4; // 0..15
    uint mInputBufferWriteCursor : 4; // 0..15
    enum {
        Never,
        AtBOL,
        Definitely,
    } mAccuNeedsEncoding;
    bool mSawLineEnd : 1;
    bool mSawCR : 1;
    bool mFinishing : 1;
    bool mFinished : 1;

protected:
    friend class QuotedPrintableCodec;
    QuotedPrintableEncoder(Codec::NewlineType newline = Codec::NewlineLF)
        : Encoder(newline)
        , mCurrentLineLength(0)
        , mAccu(0)
        , mInputBufferReadCursor(0)
        , mInputBufferWriteCursor(0)
        , mAccuNeedsEncoding(Never)
        , mSawLineEnd(false)
        , mSawCR(false)
        , mFinishing(false)
        , mFinished(false)
    {
    }

    bool needsEncoding(uchar ch)
    {
        return ch > '~' || (ch < ' ' && ch != '\t') || ch == '=';
    }
    bool needsEncodingAtEOL(uchar ch)
    {
        return ch == ' ' || ch == '\t';
    }
    bool needsEncodingAtBOL(uchar ch)
    {
        return ch == 'F' || ch == '.' || ch == '-';
    }
    bool fillInputBuffer(const char *&scursor, const char *const send);
    bool processNextChar();
    void createOutputBuffer(char *&dcursor, const char *const dend);

public:
    ~QuotedPrintableEncoder() override
    {
    }

    bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override;

    bool finish(char *&dcursor, const char *const dend) override;
};

class QuotedPrintableDecoder : public Decoder
{
    const char mEscapeChar;
    char mBadChar;
    /** @p accu holds the msb nibble of the hexchar or zero. */
    uchar mAccu;
    /** @p insideHexChar is true iff we're inside an hexchar (=XY).
        Together with @ref mAccu, we can build this states:
        @li @p insideHexChar == @p false:
        normal text
        @li @p insideHexChar == @p true, @p mAccu == 0:
        saw the leading '='
        @li @p insideHexChar == @p true, @p mAccu != 0:
        saw the first nibble '=X'
    */
    const bool mQEncoding;
    bool mInsideHexChar;
    bool mFlushing;
    bool mExpectLF;
    bool mHaveAccu;
    /** @p mLastChar holds the first char of an encoded char, so that
        we are able to keep the first char if the second char is invalid. */
    char mLastChar;

protected:
    friend class QuotedPrintableCodec;
    friend class Rfc2047QEncodingCodec;
    friend class Rfc2231EncodingCodec;
    QuotedPrintableDecoder(Codec::NewlineType newline = Codec::NewlineLF, bool aQEncoding = false, char aEscapeChar = '=')
        : Decoder(newline)
        , mEscapeChar(aEscapeChar)
        , mBadChar(0)
        , mAccu(0)
        , mQEncoding(aQEncoding)
        , mInsideHexChar(false)
        , mFlushing(false)
        , mExpectLF(false)
        , mHaveAccu(false)
        , mLastChar(0)
    {
    }

public:
    ~QuotedPrintableDecoder() override
    {
    }

    bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override;
    bool finish(char *&dcursor, const char *const dend) override;
};

class Rfc2047QEncodingEncoder : public Encoder
{
    uchar mAccu;
    uchar mStepNo;
    const char mEscapeChar;
    bool mInsideFinishing : 1;

protected:
    friend class Rfc2047QEncodingCodec;
    friend class Rfc2231EncodingCodec;
    Rfc2047QEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF, char aEscapeChar = '=')
        : Encoder(newline)
        , mAccu(0)
        , mStepNo(0)
        , mEscapeChar(aEscapeChar)
        , mInsideFinishing(false)
    {
        // else an optimization in ::encode might break.
        assert(aEscapeChar == '=' || aEscapeChar == '%');
    }

    bool isEText(uchar ch)
    {
        return (ch < 128) && (eTextMap[ch / 8] & 0x80 >> ch % 8);
    }

    // this code assumes that isEText( mEscapeChar ) == false!
    bool needsEncoding(uchar ch)
    {
        if (ch > 'z') {
            return true; // {|}~ DEL and 8bit chars need
        }
        if (!isEText(ch)) {
            return true; // all but a-zA-Z0-9!/*+- need, too
        }
        if (mEscapeChar == '%' && (ch == '*' || ch == '/')) {
            return true; // not allowed in rfc2231 encoding
        }
        return false;
    }

public:
    ~Rfc2047QEncodingEncoder() override
    {
    }

    bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override;
    bool finish(char *&dcursor, const char *const dend) override;
};

// this doesn't access any member variables, so it can be defined static
// but then we can't call it from virtual functions
static qsizetype QuotedPrintableDecoder_maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline)
{
    // all chars unencoded:
    qsizetype result = insize;
    // but maybe all of them are \n and we need to make them \r\n :-o
    if (newline == Codec::NewlineCRLF) {
        result += insize;
    }

    // there might be an accu plus escape
    result += 2;

    return result;
}

Encoder *QuotedPrintableCodec::makeEncoder(Codec::NewlineType newline) const
{
    return new QuotedPrintableEncoder(newline);
}

Decoder *QuotedPrintableCodec::makeDecoder(Codec::NewlineType newline) const
{
    return new QuotedPrintableDecoder(newline);
}

qsizetype QuotedPrintableCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const
{
    return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline);
}

Encoder *Rfc2047QEncodingCodec::makeEncoder(Codec::NewlineType newline) const
{
    return new Rfc2047QEncodingEncoder(newline);
}

Decoder *Rfc2047QEncodingCodec::makeDecoder(Codec::NewlineType newline) const
{
    return new QuotedPrintableDecoder(newline, true);
}

qsizetype Rfc2047QEncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const
{
    return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline);
}

Encoder *Rfc2231EncodingCodec::makeEncoder(Codec::NewlineType newline) const
{
    return new Rfc2047QEncodingEncoder(newline, '%');
}

Decoder *Rfc2231EncodingCodec::makeDecoder(Codec::NewlineType newline) const
{
    return new QuotedPrintableDecoder(newline, true, '%');
}

qsizetype Rfc2231EncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const
{
    return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline);
}

/********************************************************/
/********************************************************/
/********************************************************/

bool QuotedPrintableDecoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)
{
    if (d->newline == Codec::NewlineCRLF) {
        qWarning() << "CRLF output for decoders isn't yet supported!";
    }

    while (scursor != send && dcursor != dend) {
        if (mFlushing) {
            // we have to flush chars in the aftermath of a decoding
            // error. The way to request a flush is to
            // - store the offending character in mBadChar and
            // - set mFlushing to true.
            // The supported cases are (H: hexchar, X: bad char):
            // =X, =HX, CR
            // mBadChar is only written out if it is not by itself illegal in
            // quoted-printable (e.g. CTLs, 8Bits).
            // A fast way to suppress mBadChar output is to set it to NUL.
            if (mInsideHexChar) {
                // output '='
                *dcursor++ = mEscapeChar;
                mInsideHexChar = false;
            } else if (mHaveAccu) {
                // output the high nibble of the accumulator:
                *dcursor++ = mLastChar;
                mHaveAccu = false;
                mAccu = 0;
            } else {
                // output mBadChar
                assert(mAccu == 0);
                if (mBadChar) {
                    if (mBadChar == '=') {
                        mInsideHexChar = true;
                    } else {
                        *dcursor++ = mBadChar;
                    }
                    mBadChar = 0;
                }
                mFlushing = false;
            }
            continue;
        }
        assert(mBadChar == 0);

        uchar ch = *scursor++;

        if (mExpectLF && ch != '\n') {
            // qWarning() << "QuotedPrintableDecoder:"
            //              "illegally formed soft linebreak or lonely CR!";
            mInsideHexChar = false;
            mExpectLF = false;
            if (mAccu != 0) {
                return false;
            }
        }

        if (mInsideHexChar) {
            uchar value = 255;
            // next char(s) represent nibble instead of itself:
            if (ch <= '9') {
                if (ch >= '0') {
                    value = ch - '0';
                } else {
                    switch (ch) {
                    case '\r':
                        mExpectLF = true;
                        break;
                    case '\n':
                        // soft line break, but only if mAccu is NUL.
                        if (!mHaveAccu) {
                            mExpectLF = false;
                            mInsideHexChar = false;
                            break;
                        }
                    // else fall through
                    default:
                        // qWarning() << "QuotedPrintableDecoder:"
                        //              "illegally formed hex char! Outputting verbatim.";
                        mBadChar = ch;
                        mFlushing = true;
                    }
                    continue;
                }
            } else { // ch > '9'
                if (ch <= 'F') {
                    if (ch >= 'A') {
                        value = 10 + ch - 'A';
                    } else { // [:-@]
                        mBadChar = ch;
                        mFlushing = true;
                        continue;
                    }
                } else { // ch > 'F'
                    if (ch <= 'f' && ch >= 'a') {
                        value = 10 + ch - 'a';
                    } else {
                        mBadChar = ch;
                        mFlushing = true;
                        continue;
                    }
                }
            }

            assert(value < 16);
            assert(mBadChar == 0);
            assert(!mExpectLF);

            if (mHaveAccu) {
                *dcursor++ = char(mAccu | value);
                mAccu = 0;
                mHaveAccu = false;
                mInsideHexChar = false;
            } else {
                mHaveAccu = true;
                mAccu = value << 4;
                mLastChar = ch;
            }
        } else { // not mInsideHexChar
            if ((ch <= '~' && ch >= ' ') || ch == '\t') {
                if (ch == mEscapeChar) {
                    mInsideHexChar = true;
                } else if (mQEncoding && ch == '_') {
                    *dcursor++ = char(0x20);
                } else {
                    *dcursor++ = char(ch);
                }
            } else if (ch == '\n') {
                *dcursor++ = '\n';
                mExpectLF = false;
            } else if (ch == '\r') {
                mExpectLF = true;
            } else {
                // qWarning() << "QuotedPrintableDecoder:" << ch <<
                //  "illegal character in input stream!";
                *dcursor++ = char(ch);
            }
        }
    }

    return scursor == send;
}

bool QuotedPrintableDecoder::finish(char *&dcursor, const char *const dend)
{
    while ((mInsideHexChar || mHaveAccu || mFlushing) && dcursor != dend) {
        // we have to flush chars
        if (mInsideHexChar) {
            // output '='
            *dcursor++ = mEscapeChar;
            mInsideHexChar = false;
        } else if (mHaveAccu) {
            // output the high nibble of the accumulator:
            *dcursor++ = mLastChar;
            mHaveAccu = false;
            mAccu = 0;
        } else {
            // output mBadChar
            assert(mAccu == 0);
            if (mBadChar) {
                *dcursor++ = mBadChar;
                mBadChar = 0;
            }
            mFlushing = false;
        }
    }

    // return false if we are not finished yet; note that mInsideHexChar is always false
    return !(mHaveAccu || mFlushing);
}

bool QuotedPrintableEncoder::fillInputBuffer(const char *&scursor, const char *const send)
{
    // Don't read more if there's still a tail of a line in the buffer:
    if (mSawLineEnd) {
        return true;
    }

    // Read until the buffer is full or we have found CRLF or LF (which
    // don't end up in the input buffer):
    for (; (mInputBufferWriteCursor + 1) % 16 != mInputBufferReadCursor && scursor != send; mInputBufferWriteCursor++) {
        char ch = *scursor++;
        if (ch == '\r') {
            mSawCR = true;
        } else if (ch == '\n') {
            // remove the CR from the input buffer (if any) and return that
            // we found a line ending:
            if (mSawCR) {
                mSawCR = false;
                assert(mInputBufferWriteCursor != mInputBufferReadCursor);
                mInputBufferWriteCursor--;
            }
            mSawLineEnd = true;
            return true; // saw CRLF or LF
        } else {
            mSawCR = false;
        }
        mInputBuffer[mInputBufferWriteCursor] = ch;
    }
    mSawLineEnd = false;
    return false; // didn't see a line ending...
}

bool QuotedPrintableEncoder::processNextChar()
{
    // If we process a buffer which doesn't end in a line break, we
    // can't process all of it, since the next chars that will be read
    // could be a line break. So we empty the buffer only until a fixed
    // number of chars is left (except when mFinishing, which means that
    // the data doesn't end in newline):
    const int minBufferFillWithoutLineEnd = 4;

    assert(d->outputBufferCursor == 0);

    int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor);
    if (bufferFill < 0) {
        bufferFill += 16;
    }

    assert(bufferFill >= 0 && bufferFill <= 15);

    if (!mFinishing //
        && !mSawLineEnd //
        && bufferFill < minBufferFillWithoutLineEnd) {
        return false;
    }

    // buffer is empty, return false:
    if (mInputBufferReadCursor == mInputBufferWriteCursor) {
        return false;
    }

    // Real processing goes here:
    mAccu = mInputBuffer[mInputBufferReadCursor++];
    if (needsEncoding(mAccu)) { // always needs encoding or
        mAccuNeedsEncoding = Definitely;
    } else if ((mSawLineEnd || mFinishing) // needs encoding at end of line
               && bufferFill == 1 // or end of buffer
               && needsEncodingAtEOL(mAccu)) {
        mAccuNeedsEncoding = Definitely;
    } else if (needsEncodingAtBOL(mAccu)) {
        mAccuNeedsEncoding = AtBOL;
    } else {
        // never needs encoding
        mAccuNeedsEncoding = Never;
    }

    return true;
}

// Outputs processed (verbatim or hex-encoded) chars and inserts soft
// line breaks as necessary. Depends on processNextChar's directions
// on whether to encode the current char, and whether
// the current char is the last one in it's input line:
void QuotedPrintableEncoder::createOutputBuffer(char *&dcursor, const char *const dend)
{
    const int maxLineLength = 76; // rfc 2045

    assert(d->outputBufferCursor == 0);

    /* clang-format off */
    bool lastOneOnThisLine = mSawLineEnd
                             && mInputBufferReadCursor == mInputBufferWriteCursor;
    /* clang-format on */

    int neededSpace = 1;
    if (mAccuNeedsEncoding == Definitely) {
        neededSpace = 3;
    }

    // reserve space for the soft hyphen (=)
    if (!lastOneOnThisLine) {
        neededSpace++;
    }

    if (mCurrentLineLength > maxLineLength - neededSpace) {
        // current line too short, insert soft line break:
        write('=', dcursor, dend);
        writeCRLF(dcursor, dend);
        mCurrentLineLength = 0;
    }

    if (Never == mAccuNeedsEncoding //
        || (AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0)) {
        write(mAccu, dcursor, dend);
        mCurrentLineLength++;
    } else {
        write('=', dcursor, dend);
        write(binToHex(highNibble(mAccu)), dcursor, dend);
        write(binToHex(lowNibble(mAccu)), dcursor, dend);
        mCurrentLineLength += 3;
    }
}

bool QuotedPrintableEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)
{
    // support probing by the caller:
    if (mFinishing) {
        return true;
    }

    while (scursor != send && dcursor != dend) {
        if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) {
            return scursor == send;
        }

        assert(d->outputBufferCursor == 0);

        // fill input buffer until eol has been reached or until the
        // buffer is full, whatever comes first:
        fillInputBuffer(scursor, send);

        if (processNextChar()) {
            // there was one...
            createOutputBuffer(dcursor, dend);
        } else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) {
            // load a hard line break into output buffer:
            writeCRLF(dcursor, dend);
            // signal fillInputBuffer() we are ready for the next line:
            mSawLineEnd = false;
            mCurrentLineLength = 0;
        } else {
            // we are supposedly finished with this input block:
            break;
        }
    }

    // make sure we write as much as possible and don't stop _writing_
    // just because we have no more _input_:
    if (d->outputBufferCursor) {
        flushOutputBuffer(dcursor, dend);
    }

    return scursor == send;

} // encode

bool QuotedPrintableEncoder::finish(char *&dcursor, const char *const dend)
{
    mFinishing = true;

    if (mFinished) {
        return flushOutputBuffer(dcursor, dend);
    }

    while (dcursor != dend) {
        if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) {
            return false;
        }

        assert(d->outputBufferCursor == 0);

        if (processNextChar()) {
            // there was one...
            createOutputBuffer(dcursor, dend);
        } else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) {
            // load a hard line break into output buffer:
            writeCRLF(dcursor, dend);
            mSawLineEnd = false;
            mCurrentLineLength = 0;
        } else {
            mFinished = true;
            return flushOutputBuffer(dcursor, dend);
        }
    }

    return mFinished && !d->outputBufferCursor;

} // finish

bool Rfc2047QEncodingEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)
{
    if (mInsideFinishing) {
        return true;
    }

    while (scursor != send && dcursor != dend) {
        uchar value = 0;
        switch (mStepNo) {
        case 0:
            // read the next char and decide if and how do encode:
            mAccu = *scursor++;
            if (!needsEncoding(mAccu)) {
                *dcursor++ = char(mAccu);
            } else if (mEscapeChar == '=' && mAccu == 0x20) {
                // shortcut encoding for 0x20 (latin-1/us-ascii SPACE)
                // (not for rfc2231 encoding)
                *dcursor++ = '_';
            } else {
                // needs =XY encoding - write escape char:
                *dcursor++ = mEscapeChar;
                mStepNo = 1;
            }
            continue;
        case 1:
            // extract hi-nibble:
            value = highNibble(mAccu);
            mStepNo = 2;
            break;
        case 2:
            // extract lo-nibble:
            value = lowNibble(mAccu);
            mStepNo = 0;
            break;
        default:
            assert(0);
        }

        // and write:
        *dcursor++ = binToHex(value);
    }

    return scursor == send;
} // encode

bool Rfc2047QEncodingEncoder::finish(char *&dcursor, const char *const dend)
{
    mInsideFinishing = true;

    // write the last bits of mAccu, if any:
    while (mStepNo != 0 && dcursor != dend) {
        uchar value = 0;
        switch (mStepNo) {
        case 1:
            // extract hi-nibble:
            value = highNibble(mAccu);
            mStepNo = 2;
            break;
        case 2:
            // extract lo-nibble:
            value = lowNibble(mAccu);
            mStepNo = 0;
            break;
        default:
            assert(0);
        }

        // and write:
        *dcursor++ = binToHex(value);
    }

    return mStepNo == 0;
}

} // namespace KCodecs

Coverage Report

Created: 2026-05-27 07:07

Line	Count	Source
1		/* -- c++ --
2		SPDX-FileCopyrightText: 2002 Marc Mutz <mutz@kde.org>
3
4		SPDX-License-Identifier: LGPL-2.0-or-later
5		*/
6
7		#include "kcodecsqp.h"
8		#include "kcodecs_p.h"
9
10		#include <QDebug>
11
12		#include <cassert>
13
14		using namespace KCodecs;
15
16		namespace KCodecs
17		{
18		// none except a-zA-Z0-9!*+-/
19		const uchar eTextMap[16] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x35, 0xFF, 0xC0, 0x7F, 0xFF, 0xFF, 0xE0, 0x7F, 0xFF, 0xFF, 0xE0};
20
21		// some helpful functions:
22
23		/**
24		Converts a 4-bit @p value into its hexadecimal characater representation.
25		So input of value [0,15] returns ['0','1',... 'F']. Input values
26		greater than 15 will produce undesired results.
27		@param value is an unsigned character containing the 4-bit input value.
28		*/
29		static inline char binToHex(uchar value)
30	0	{
31	0	if (value > 9) {
32	0	return value + 'A' - 10;
33	0	} else {
34	0	return value + '0';
35	0	}
36	0	}
37
38		/**
39		Returns the high-order 4 bits of an 8-bit value in another 8-bit value.
40		@param ch is an unsigned character containing the 8-bit input value.
41		*/
42		static inline uchar highNibble(uchar ch)
43	0	{
44	0	return ch >> 4;
45	0	}
46
47		/**
48		Returns the low-order 4 bits of an 8-bit value in another 8-bit value.
49		@param ch is an unsigned character containing the 8-bit input value.
50		*/
51		static inline uchar lowNibble(uchar ch)
52	0	{
53	0	return ch & 0xF;
54	0	}
55
56		//
57		// QuotedPrintableCodec
58		//
59
60		class QuotedPrintableEncoder : public Encoder
61		{
62		char mInputBuffer[16];
63		uchar mCurrentLineLength; // 0..76
64		uchar mAccu;
65		uint mInputBufferReadCursor : 4; // 0..15
66		uint mInputBufferWriteCursor : 4; // 0..15
67		enum {
68		Never,
69		AtBOL,
70		Definitely,
71		} mAccuNeedsEncoding;
72		bool mSawLineEnd : 1;
73		bool mSawCR : 1;
74		bool mFinishing : 1;
75		bool mFinished : 1;
76
77		protected:
78		friend class QuotedPrintableCodec;
79		QuotedPrintableEncoder(Codec::NewlineType newline = Codec::NewlineLF)
80	0	: Encoder(newline)
81	0	, mCurrentLineLength(0)
82	0	, mAccu(0)
83	0	, mInputBufferReadCursor(0)
84	0	, mInputBufferWriteCursor(0)
85	0	, mAccuNeedsEncoding(Never)
86	0	, mSawLineEnd(false)
87	0	, mSawCR(false)
88	0	, mFinishing(false)
89	0	, mFinished(false)
90	0	{
91	0	}
92
93		bool needsEncoding(uchar ch)
94	0	{
95	0	return ch > '~' \|\| (ch < ' ' && ch != '\t') \|\| ch == '=';
96	0	}
97		bool needsEncodingAtEOL(uchar ch)
98	0	{
99	0	return ch == ' ' \|\| ch == '\t';
100	0	}
101		bool needsEncodingAtBOL(uchar ch)
102	0	{
103	0	return ch == 'F' \|\| ch == '.' \|\| ch == '-';
104	0	}
105		bool fillInputBuffer(const char &scursor, const char const send);
106		bool processNextChar();
107		void createOutputBuffer(char &dcursor, const char const dend);
108
109		public:
110		~QuotedPrintableEncoder() override
111	0	{
112	0	}
113
114		bool encode(const char &scursor, const char const send, char &dcursor, const char const dend) override;
115
116		bool finish(char &dcursor, const char const dend) override;
117		};
118
119		class QuotedPrintableDecoder : public Decoder
120		{
121		const char mEscapeChar;
122		char mBadChar;
123		/** @p accu holds the msb nibble of the hexchar or zero. */
124		uchar mAccu;
125		/** @p insideHexChar is true iff we're inside an hexchar (=XY).
126		Together with @ref mAccu, we can build this states:
127		@li @p insideHexChar == @p false:
128		normal text
129		@li @p insideHexChar == @p true, @p mAccu == 0:
130		saw the leading '='
131		@li @p insideHexChar == @p true, @p mAccu != 0:
132		saw the first nibble '=X'
133		*/
134		const bool mQEncoding;
135		bool mInsideHexChar;
136		bool mFlushing;
137		bool mExpectLF;
138		bool mHaveAccu;
139		/** @p mLastChar holds the first char of an encoded char, so that
140		we are able to keep the first char if the second char is invalid. */
141		char mLastChar;
142
143		protected:
144		friend class QuotedPrintableCodec;
145		friend class Rfc2047QEncodingCodec;
146		friend class Rfc2231EncodingCodec;
147		QuotedPrintableDecoder(Codec::NewlineType newline = Codec::NewlineLF, bool aQEncoding = false, char aEscapeChar = '=')
148	89.9k	: Decoder(newline)
149	89.9k	, mEscapeChar(aEscapeChar)
150	89.9k	, mBadChar(0)
151	89.9k	, mAccu(0)
152	89.9k	, mQEncoding(aQEncoding)
153	89.9k	, mInsideHexChar(false)
154	89.9k	, mFlushing(false)
155	89.9k	, mExpectLF(false)
156	89.9k	, mHaveAccu(false)
157	89.9k	, mLastChar(0)
158	89.9k	{
159	89.9k	}
160
161		public:
162		~QuotedPrintableDecoder() override
163	0	{
164	0	}
165
166		bool decode(const char &scursor, const char const send, char &dcursor, const char const dend) override;
167		bool finish(char &dcursor, const char const dend) override;
168		};
169
170		class Rfc2047QEncodingEncoder : public Encoder
171		{
172		uchar mAccu;
173		uchar mStepNo;
174		const char mEscapeChar;
175		bool mInsideFinishing : 1;
176
177		protected:
178		friend class Rfc2047QEncodingCodec;
179		friend class Rfc2231EncodingCodec;
180		Rfc2047QEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF, char aEscapeChar = '=')
181	0	: Encoder(newline)
182	0	, mAccu(0)
183	0	, mStepNo(0)
184	0	, mEscapeChar(aEscapeChar)
185	0	, mInsideFinishing(false)
186	0	{
187		// else an optimization in ::encode might break.
188	0	assert(aEscapeChar == '=' \|\| aEscapeChar == '%');
189	0	}
190
191		bool isEText(uchar ch)
192	0	{
193	0	return (ch < 128) && (eTextMap[ch / 8] & 0x80 >> ch % 8);
194	0	}
195
196		// this code assumes that isEText( mEscapeChar ) == false!
197		bool needsEncoding(uchar ch)
198	0	{
199	0	if (ch > 'z') {
200	0	return true; // {\|}~ DEL and 8bit chars need
201	0	}
202	0	if (!isEText(ch)) {
203	0	return true; // all but a-zA-Z0-9!/*+- need, too
204	0	}
205	0	if (mEscapeChar == '%' && (ch == '*' \|\| ch == '/')) {
206	0	return true; // not allowed in rfc2231 encoding
207	0	}
208	0	return false;
209	0	}
210
211		public:
212		~Rfc2047QEncodingEncoder() override
213	0	{
214	0	}
215
216		bool encode(const char &scursor, const char const send, char &dcursor, const char const dend) override;
217		bool finish(char &dcursor, const char const dend) override;
218		};
219
220		// this doesn't access any member variables, so it can be defined static
221		// but then we can't call it from virtual functions
222		static qsizetype QuotedPrintableDecoder_maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline)
223	89.8k	{
224		// all chars unencoded:
225	89.8k	qsizetype result = insize;
226		// but maybe all of them are \n and we need to make them \r\n :-o
227	89.8k	if (newline == Codec::NewlineCRLF) {
228	0	result += insize;
229	0	}
230
231		// there might be an accu plus escape
232	89.8k	result += 2;
233
234	89.8k	return result;
235	89.8k	}
236
237		Encoder *QuotedPrintableCodec::makeEncoder(Codec::NewlineType newline) const
238	0	{
239	0	return new QuotedPrintableEncoder(newline);
240	0	}
241
242		Decoder *QuotedPrintableCodec::makeDecoder(Codec::NewlineType newline) const
243	9.14k	{
244	9.14k	return new QuotedPrintableDecoder(newline);
245	9.14k	}
246
247		qsizetype QuotedPrintableCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const
248	9.14k	{
249	9.14k	return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline);
250	9.14k	}
251
252		Encoder *Rfc2047QEncodingCodec::makeEncoder(Codec::NewlineType newline) const
253	0	{
254	0	return new Rfc2047QEncodingEncoder(newline);
255	0	}
256
257		Decoder *Rfc2047QEncodingCodec::makeDecoder(Codec::NewlineType newline) const
258	69.8k	{
259	69.8k	return new QuotedPrintableDecoder(newline, true);
260	69.8k	}
261
262		qsizetype Rfc2047QEncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const
263	69.7k	{
264	69.7k	return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline);
265	69.7k	}
266
267		Encoder *Rfc2231EncodingCodec::makeEncoder(Codec::NewlineType newline) const
268	0	{
269	0	return new Rfc2047QEncodingEncoder(newline, '%');
270	0	}
271
272		Decoder *Rfc2231EncodingCodec::makeDecoder(Codec::NewlineType newline) const
273	10.9k	{
274	10.9k	return new QuotedPrintableDecoder(newline, true, '%');
275	10.9k	}
276
277		qsizetype Rfc2231EncodingCodec::maxDecodedSizeFor(qsizetype insize, Codec::NewlineType newline) const
278	10.9k	{
279	10.9k	return QuotedPrintableDecoder_maxDecodedSizeFor(insize, newline);
280	10.9k	}
281
282		/********************************************************/
283		/********************************************************/
284		/********************************************************/
285
286		bool QuotedPrintableDecoder::decode(const char &scursor, const char const send, char &dcursor, const char const dend)
287	90.2k	{
288	90.2k	if (d->newline == Codec::NewlineCRLF) {
289	0	qWarning() << "CRLF output for decoders isn't yet supported!";
290	0	}
291
292	63.5M	while (scursor != send && dcursor != dend) {
293	63.4M	if (mFlushing) {
294		// we have to flush chars in the aftermath of a decoding
295		// error. The way to request a flush is to
296		// - store the offending character in mBadChar and
297		// - set mFlushing to true.
298		// The supported cases are (H: hexchar, X: bad char):
299		// =X, =HX, CR
300		// mBadChar is only written out if it is not by itself illegal in
301		// quoted-printable (e.g. CTLs, 8Bits).
302		// A fast way to suppress mBadChar output is to set it to NUL.
303	1.65M	if (mInsideHexChar) {
304		// output '='
305	822k	*dcursor++ = mEscapeChar;
306	822k	mInsideHexChar = false;
307	836k	} else if (mHaveAccu) {
308		// output the high nibble of the accumulator:
309	13.2k	*dcursor++ = mLastChar;
310	13.2k	mHaveAccu = false;
311	13.2k	mAccu = 0;
312	822k	} else {
313		// output mBadChar
314	822k	assert(mAccu == 0);
315	822k	if (mBadChar) {
316	820k	if (mBadChar == '=') {
317	610k	mInsideHexChar = true;
318	610k	} else {
319	210k	*dcursor++ = mBadChar;
320	210k	}
321	820k	mBadChar = 0;
322	820k	}
323	822k	mFlushing = false;
324	822k	}
325	1.65M	continue;
326	1.65M	}
327	63.4M	assert(mBadChar == 0);
328
329	61.8M	uchar ch = *scursor++;
330
331	61.8M	if (mExpectLF && ch != '\n') {
332		// qWarning() << "QuotedPrintableDecoder:"
333		// "illegally formed soft linebreak or lonely CR!";
334	5.19k	mInsideHexChar = false;
335	5.19k	mExpectLF = false;
336	5.19k	if (mAccu != 0) {
337	1.64k	return false;
338	1.64k	}
339	5.19k	}
340
341	61.8M	if (mInsideHexChar) {
342	1.05M	uchar value = 255;
343		// next char(s) represent nibble instead of itself:
344	1.05M	if (ch <= '9') {
345	171k	if (ch >= '0') {
346	126k	value = ch - '0';
347	126k	} else {
348	44.6k	switch (ch) {
349	2.24k	case '\r':
350	2.24k	mExpectLF = true;
351	2.24k	break;
352	14.2k	case '\n':
353		// soft line break, but only if mAccu is NUL.
354	14.2k	if (!mHaveAccu) {
355	6.43k	mExpectLF = false;
356	6.43k	mInsideHexChar = false;
357	6.43k	break;
358	6.43k	}
359		// else fall through
360	35.9k	default:
361		// qWarning() << "QuotedPrintableDecoder:"
362		// "illegally formed hex char! Outputting verbatim.";
363	35.9k	mBadChar = ch;
364	35.9k	mFlushing = true;
365	44.6k	}
366	44.6k	continue;
367	44.6k	}
368	878k	} else { // ch > '9'
369	878k	if (ch <= 'F') {
370	736k	if (ch >= 'A') {
371	81.7k	value = 10 + ch - 'A';
372	654k	} else { // [:-@]
373	654k	mBadChar = ch;
374	654k	mFlushing = true;
375	654k	continue;
376	654k	}
377	736k	} else { // ch > 'F'
378	142k	if (ch <= 'f' && ch >= 'a') {
379	7.71k	value = 10 + ch - 'a';
380	134k	} else {
381	134k	mBadChar = ch;
382	134k	mFlushing = true;
383	134k	continue;
384	134k	}
385	142k	}
386	878k	}
387
388	1.05M	assert(value < 16);
389	216k	assert(mBadChar == 0);
390	216k	assert(!mExpectLF);
391
392	216k	if (mHaveAccu) {
393	100k	*dcursor++ = char(mAccu \| value);
394	100k	mAccu = 0;
395	100k	mHaveAccu = false;
396	100k	mInsideHexChar = false;
397	115k	} else {
398	115k	mHaveAccu = true;
399	115k	mAccu = value << 4;
400	115k	mLastChar = ch;
401	115k	}
402	60.7M	} else { // not mInsideHexChar
403	60.7M	if ((ch <= '~' && ch >= ' ') \|\| ch == '\t') {
404	24.7M	if (ch == mEscapeChar) {
405	329k	mInsideHexChar = true;
406	24.4M	} else if (mQEncoding && ch == '_') {
407	160k	*dcursor++ = char(0x20);
408	24.3M	} else {
409	24.3M	*dcursor++ = char(ch);
410	24.3M	}
411	35.9M	} else if (ch == '\n') {
412	158k	*dcursor++ = '\n';
413	158k	mExpectLF = false;
414	35.8M	} else if (ch == '\r') {
415	2.99k	mExpectLF = true;
416	35.7M	} else {
417		// qWarning() << "QuotedPrintableDecoder:" << ch <<
418		// "illegal character in input stream!";
419	35.7M	*dcursor++ = char(ch);
420	35.7M	}
421	60.7M	}
422	61.8M	}
423
424	88.5k	return scursor == send;
425	90.2k	}
426
427		bool QuotedPrintableDecoder::finish(char &dcursor, const char const dend)
428	8.28k	{
429	11.8k	while ((mInsideHexChar \|\| mHaveAccu \|\| mFlushing) && dcursor != dend) {
430		// we have to flush chars
431	3.60k	if (mInsideHexChar) {
432		// output '='
433	1.35k	*dcursor++ = mEscapeChar;
434	1.35k	mInsideHexChar = false;
435	2.25k	} else if (mHaveAccu) {
436		// output the high nibble of the accumulator:
437	903	*dcursor++ = mLastChar;
438	903	mHaveAccu = false;
439	903	mAccu = 0;
440	1.35k	} else {
441		// output mBadChar
442	1.35k	assert(mAccu == 0);
443	1.35k	if (mBadChar) {
444	705	*dcursor++ = mBadChar;
445	705	mBadChar = 0;
446	705	}
447	1.35k	mFlushing = false;
448	1.35k	}
449	3.60k	}
450
451		// return false if we are not finished yet; note that mInsideHexChar is always false
452	8.28k	return !(mHaveAccu \|\| mFlushing);
453	8.28k	}
454
455		bool QuotedPrintableEncoder::fillInputBuffer(const char &scursor, const char const send)
456	0	{
457		// Don't read more if there's still a tail of a line in the buffer:
458	0	if (mSawLineEnd) {
459	0	return true;
460	0	}
461
462		// Read until the buffer is full or we have found CRLF or LF (which
463		// don't end up in the input buffer):
464	0	for (; (mInputBufferWriteCursor + 1) % 16 != mInputBufferReadCursor && scursor != send; mInputBufferWriteCursor++) {
465	0	char ch = *scursor++;
466	0	if (ch == '\r') {
467	0	mSawCR = true;
468	0	} else if (ch == '\n') {
469		// remove the CR from the input buffer (if any) and return that
470		// we found a line ending:
471	0	if (mSawCR) {
472	0	mSawCR = false;
473	0	assert(mInputBufferWriteCursor != mInputBufferReadCursor);
474	0	mInputBufferWriteCursor--;
475	0	}
476	0	mSawLineEnd = true;
477	0	return true; // saw CRLF or LF
478	0	} else {
479	0	mSawCR = false;
480	0	}
481	0	mInputBuffer[mInputBufferWriteCursor] = ch;
482	0	}
483	0	mSawLineEnd = false;
484	0	return false; // didn't see a line ending...
485	0	}
486
487		bool QuotedPrintableEncoder::processNextChar()
488	0	{
489		// If we process a buffer which doesn't end in a line break, we
490		// can't process all of it, since the next chars that will be read
491		// could be a line break. So we empty the buffer only until a fixed
492		// number of chars is left (except when mFinishing, which means that
493		// the data doesn't end in newline):
494	0	const int minBufferFillWithoutLineEnd = 4;
495
496	0	assert(d->outputBufferCursor == 0);
497
498	0	int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor);
499	0	if (bufferFill < 0) {
500	0	bufferFill += 16;
501	0	}
502
503	0	assert(bufferFill >= 0 && bufferFill <= 15);
504
505	0	if (!mFinishing //
506	0	&& !mSawLineEnd //
507	0	&& bufferFill < minBufferFillWithoutLineEnd) {
508	0	return false;
509	0	}
510
511		// buffer is empty, return false:
512	0	if (mInputBufferReadCursor == mInputBufferWriteCursor) {
513	0	return false;
514	0	}
515
516		// Real processing goes here:
517	0	mAccu = mInputBuffer[mInputBufferReadCursor++];
518	0	if (needsEncoding(mAccu)) { // always needs encoding or
519	0	mAccuNeedsEncoding = Definitely;
520	0	} else if ((mSawLineEnd \|\| mFinishing) // needs encoding at end of line
521	0	&& bufferFill == 1 // or end of buffer
522	0	&& needsEncodingAtEOL(mAccu)) {
523	0	mAccuNeedsEncoding = Definitely;
524	0	} else if (needsEncodingAtBOL(mAccu)) {
525	0	mAccuNeedsEncoding = AtBOL;
526	0	} else {
527		// never needs encoding
528	0	mAccuNeedsEncoding = Never;
529	0	}
530
531	0	return true;
532	0	}
533
534		// Outputs processed (verbatim or hex-encoded) chars and inserts soft
535		// line breaks as necessary. Depends on processNextChar's directions
536		// on whether to encode the current char, and whether
537		// the current char is the last one in it's input line:
538		void QuotedPrintableEncoder::createOutputBuffer(char &dcursor, const char const dend)
539	0	{
540	0	const int maxLineLength = 76; // rfc 2045
541
542	0	assert(d->outputBufferCursor == 0);
543
544		/* clang-format off */
545	0	bool lastOneOnThisLine = mSawLineEnd
546	0	&& mInputBufferReadCursor == mInputBufferWriteCursor;
547		/* clang-format on */
548
549	0	int neededSpace = 1;
550	0	if (mAccuNeedsEncoding == Definitely) {
551	0	neededSpace = 3;
552	0	}
553
554		// reserve space for the soft hyphen (=)
555	0	if (!lastOneOnThisLine) {
556	0	neededSpace++;
557	0	}
558
559	0	if (mCurrentLineLength > maxLineLength - neededSpace) {
560		// current line too short, insert soft line break:
561	0	write('=', dcursor, dend);
562	0	writeCRLF(dcursor, dend);
563	0	mCurrentLineLength = 0;
564	0	}
565
566	0	if (Never == mAccuNeedsEncoding //
567	0	\|\| (AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0)) {
568	0	write(mAccu, dcursor, dend);
569	0	mCurrentLineLength++;
570	0	} else {
571	0	write('=', dcursor, dend);
572	0	write(binToHex(highNibble(mAccu)), dcursor, dend);
573	0	write(binToHex(lowNibble(mAccu)), dcursor, dend);
574	0	mCurrentLineLength += 3;
575	0	}
576	0	}
577
578		bool QuotedPrintableEncoder::encode(const char &scursor, const char const send, char &dcursor, const char const dend)
579	0	{
580		// support probing by the caller:
581	0	if (mFinishing) {
582	0	return true;
583	0	}
584
585	0	while (scursor != send && dcursor != dend) {
586	0	if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) {
587	0	return scursor == send;
588	0	}
589
590	0	assert(d->outputBufferCursor == 0);
591
592		// fill input buffer until eol has been reached or until the
593		// buffer is full, whatever comes first:
594	0	fillInputBuffer(scursor, send);
595
596	0	if (processNextChar()) {
597		// there was one...
598	0	createOutputBuffer(dcursor, dend);
599	0	} else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) {
600		// load a hard line break into output buffer:
601	0	writeCRLF(dcursor, dend);
602		// signal fillInputBuffer() we are ready for the next line:
603	0	mSawLineEnd = false;
604	0	mCurrentLineLength = 0;
605	0	} else {
606		// we are supposedly finished with this input block:
607	0	break;
608	0	}
609	0	}
610
611		// make sure we write as much as possible and don't stop _writing_
612		// just because we have no more _input_:
613	0	if (d->outputBufferCursor) {
614	0	flushOutputBuffer(dcursor, dend);
615	0	}
616
617	0	return scursor == send;
618
619	0	} // encode
620
621		bool QuotedPrintableEncoder::finish(char &dcursor, const char const dend)
622	0	{
623	0	mFinishing = true;
624
625	0	if (mFinished) {
626	0	return flushOutputBuffer(dcursor, dend);
627	0	}
628
629	0	while (dcursor != dend) {
630	0	if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) {
631	0	return false;
632	0	}
633
634	0	assert(d->outputBufferCursor == 0);
635
636	0	if (processNextChar()) {
637		// there was one...
638	0	createOutputBuffer(dcursor, dend);
639	0	} else if (mSawLineEnd && mInputBufferWriteCursor == mInputBufferReadCursor) {
640		// load a hard line break into output buffer:
641	0	writeCRLF(dcursor, dend);
642	0	mSawLineEnd = false;
643	0	mCurrentLineLength = 0;
644	0	} else {
645	0	mFinished = true;
646	0	return flushOutputBuffer(dcursor, dend);
647	0	}
648	0	}
649
650	0	return mFinished && !d->outputBufferCursor;
651
652	0	} // finish
653
654		bool Rfc2047QEncodingEncoder::encode(const char &scursor, const char const send, char &dcursor, const char const dend)
655	0	{
656	0	if (mInsideFinishing) {
657	0	return true;
658	0	}
659
660	0	while (scursor != send && dcursor != dend) {
661	0	uchar value = 0;
662	0	switch (mStepNo) {
663	0	case 0:
664		// read the next char and decide if and how do encode:
665	0	mAccu = *scursor++;
666	0	if (!needsEncoding(mAccu)) {
667	0	*dcursor++ = char(mAccu);
668	0	} else if (mEscapeChar == '=' && mAccu == 0x20) {
669		// shortcut encoding for 0x20 (latin-1/us-ascii SPACE)
670		// (not for rfc2231 encoding)
671	0	*dcursor++ = '_';
672	0	} else {
673		// needs =XY encoding - write escape char:
674	0	*dcursor++ = mEscapeChar;
675	0	mStepNo = 1;
676	0	}
677	0	continue;
678	0	case 1:
679		// extract hi-nibble:
680	0	value = highNibble(mAccu);
681	0	mStepNo = 2;
682	0	break;
683	0	case 2:
684		// extract lo-nibble:
685	0	value = lowNibble(mAccu);
686	0	mStepNo = 0;
687	0	break;
688	0	default:
689	0	assert(0);
690	0	}
691
692		// and write:
693	0	*dcursor++ = binToHex(value);
694	0	}
695
696	0	return scursor == send;
697	0	} // encode
698
699		bool Rfc2047QEncodingEncoder::finish(char &dcursor, const char const dend)
700	0	{
701	0	mInsideFinishing = true;
702
703		// write the last bits of mAccu, if any:
704	0	while (mStepNo != 0 && dcursor != dend) {
705	0	uchar value = 0;
706	0	switch (mStepNo) {
707	0	case 1:
708		// extract hi-nibble:
709	0	value = highNibble(mAccu);
710	0	mStepNo = 2;
711	0	break;
712	0	case 2:
713		// extract lo-nibble:
714	0	value = lowNibble(mAccu);
715	0	mStepNo = 0;
716	0	break;
717	0	default:
718	0	assert(0);
719	0	}
720
721		// and write:
722	0	*dcursor++ = binToHex(value);
723	0	}
724
725	0	return mStepNo == 0;
726	0	}
727
728		} // namespace KCodecs