/src/x265/source/common/quant.h

Source
/*****************************************************************************
 * Copyright (C) 2013-2020 MulticoreWare, Inc
 *
 * Authors: Steve Borho <steve@borho.org>
 *          Min Chen <chenm003@163.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at license @ x265.com.
 *****************************************************************************/

#ifndef X265_QUANT_H
#define X265_QUANT_H

#include "common.h"
#include "scalinglist.h"
#include "contexts.h"

namespace X265_NS {
// private namespace

class CUData;
class Entropy;
struct TUEntropyCodingParameters;

struct QpParam
{
    int rem;
    int per;
    int qp;
    int64_t lambda2; /* FIX8 */
    int32_t lambda;  /* FIX8, dynamic range is 18-bits in Main and 20-bits in Main10 */

    QpParam() : qp(MAX_INT) {}

    void setQpParam(int qpScaled)
    {
        if (qp != qpScaled)
        {
            rem = qpScaled % 6;
            per = qpScaled / 6;
            qp  = qpScaled;
            lambda2 = (int64_t)(x265_lambda2_tab[qp - QP_BD_OFFSET] * 256. + 0.5);
            lambda  = (int32_t)(x265_lambda_tab[qp - QP_BD_OFFSET] * 256. + 0.5);
            X265_CHECK((x265_lambda_tab[qp - QP_BD_OFFSET] * 256. + 0.5) < (double)MAX_INT, "x265_lambda_tab[] value too large\n");
        }
    }
};

// NOTE: MUST be 16-byte aligned for asm code
struct NoiseReduction
{
    /* 0 = luma 4x4,   1 = luma 8x8,   2 = luma 16x16,   3 = luma 32x32
     * 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma 32x32
     * Intra 0..7 - Inter 8..15 */
    ALIGN_VAR_16(uint32_t, nrResidualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]);
    uint32_t nrCount[MAX_NUM_TR_CATEGORIES];
    uint16_t nrOffsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
    uint16_t (*offset)[MAX_NUM_TR_COEFFS];
    uint32_t (*residualSum)[MAX_NUM_TR_COEFFS];
    uint32_t *count;
};

class Quant
{
protected:

    const ScalingList* m_scalingList;
    Entropy*           m_entropyCoder;

    QpParam            m_qpParam[3];

    int                m_rdoqLevel;
    int32_t            m_psyRdoqScale;  // dynamic range [0,50] * 256 = 14-bits
    int16_t*           m_resiDctCoeff;
    int16_t*           m_fencDctCoeff;
    int16_t*           m_fencShortBuf;

    enum { IEP_RATE = 32768 }; /* FIX15 cost of an equal probable bit */

public:

    NoiseReduction*    m_nr;
    NoiseReduction*    m_frameNr; // Array of NR structures, one for each frameEncoder

    Quant();
    ~Quant();

    /* one-time setup */
    bool init(double psyScale, const ScalingList& scalingList, Entropy& entropy);
    bool allocNoiseReduction(const x265_param& param);

    /* CU setup */
    void setQPforQuant(const CUData& ctu, int qp);

    uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
                          uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);

    void invtransformNxN(const CUData& cu, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
                         uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
    uint64_t ssimDistortion(const CUData& cu, const pixel* fenc, uint32_t fStride, const pixel* recon, intptr_t rstride,
                            uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx);

    /* Pattern decision for context derivation process of significant_coeff_flag */
    static uint32_t calcPatternSigCtx(uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t cgBlkPos, uint32_t trSizeCG)
    {
        if (trSizeCG == 1)
            return 0;

        X265_CHECK(trSizeCG <= 8, "transform CG is too large\n");
        X265_CHECK(cgBlkPos < 64, "cgBlkPos is too large\n");
        // NOTE: cgBlkPos+1 may more than 63, it is invalid for shift,
        //       but in this case, both cgPosX and cgPosY equal to (trSizeCG - 1),
        //       the sigRight and sigLower will clear value to zero, the final result will be correct
        const uint32_t sigPos = (uint32_t)(sigCoeffGroupFlag64 >> (cgBlkPos + 1)); // just need lowest 7-bits valid

        // TODO: instruction BT is faster, but _bittest64 still generate instruction 'BT m, r' in VS2012
        const uint32_t sigRight = (cgPosX != (trSizeCG - 1)) & sigPos;
        const uint32_t sigLower = (cgPosY != (trSizeCG - 1)) & (sigPos >> (trSizeCG - 1));
        return sigRight + sigLower * 2;
    }

    /* Context derivation process of coeff_abs_significant_flag */
    static uint32_t getSigCoeffGroupCtxInc(uint64_t cgGroupMask, uint32_t cgPosX, uint32_t cgPosY, uint32_t cgBlkPos, uint32_t trSizeCG)
    {
        X265_CHECK(cgBlkPos < 64, "cgBlkPos is too large\n");
        // NOTE: unsafe shift operator, see NOTE in calcPatternSigCtx
        const uint32_t sigPos = (uint32_t)(cgGroupMask >> (cgBlkPos + 1)); // just need lowest 8-bits valid
        const uint32_t sigRight = (cgPosX != (trSizeCG - 1)) & sigPos;
        const uint32_t sigLower = (cgPosY != (trSizeCG - 1)) & (sigPos >> (trSizeCG - 1));

        return (sigRight | sigLower);
    }

    /* static methods shared with entropy.cpp */
    static uint32_t getSigCtxInc(uint32_t patternSigCtx, uint32_t log2TrSize, uint32_t trSize, uint32_t blkPos, bool bIsLuma, uint32_t firstSignificanceMapContext);

protected:

    void setChromaQP(int qpin, TextType ttype, int chFmt);

    uint32_t signBitHidingHDQ(int16_t* qcoeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codingParameters, uint32_t log2TrSize);

    template<uint32_t log2TrSize>
    uint32_t rdoQuant(const CUData& cu, int16_t* dstCoeff, TextType ttype, uint32_t absPartIdx, bool usePsy);

public:
    typedef uint32_t (Quant::*rdoQuant_t)(const CUData& cu, int16_t* dstCoeff, TextType ttype, uint32_t absPartIdx, bool usePsy);

private:
    static rdoQuant_t rdoQuant_func[NUM_CU_DEPTH];
};
}

#endif // ifndef X265_QUANT_H

Line	Count	Source
1		/*****************************************************************************
2		* Copyright (C) 2013-2020 MulticoreWare, Inc
3		*
4		* Authors: Steve Borho <steve@borho.org>
5		* Min Chen <chenm003@163.com>
6		*
7		* This program is free software; you can redistribute it and/or modify
8		* it under the terms of the GNU General Public License as published by
9		* the Free Software Foundation; either version 2 of the License, or
10		* (at your option) any later version.
11		*
12		* This program is distributed in the hope that it will be useful,
13		* but WITHOUT ANY WARRANTY; without even the implied warranty of
14		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15		* GNU General Public License for more details.
16		*
17		* You should have received a copy of the GNU General Public License
18		* along with this program; if not, write to the Free Software
19		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20		*
21		* This program is also available under a commercial proprietary license.
22		* For more information, contact us at license @ x265.com.
23		*****************************************************************************/
24
25		#ifndef X265_QUANT_H
26		#define X265_QUANT_H
27
28		#include "common.h"
29		#include "scalinglist.h"
30		#include "contexts.h"
31
32		namespace X265_NS {
33		// private namespace
34
35		class CUData;
36		class Entropy;
37		struct TUEntropyCodingParameters;
38
39		struct QpParam
40		{
41		int rem;
42		int per;
43		int qp;
44		int64_t lambda2; /* FIX8 */
45		int32_t lambda; /* FIX8, dynamic range is 18-bits in Main and 20-bits in Main10 */
46
47	67.9k	QpParam() : qp(MAX_INT) {}
48
49		void setQpParam(int qpScaled)
50	84.5k	{
51	84.5k	if (qp != qpScaled)
52	4.81k	{
53	4.81k	rem = qpScaled % 6;
54	4.81k	per = qpScaled / 6;
55	4.81k	qp = qpScaled;
56	4.81k	lambda2 = (int64_t)(x265_lambda2_tab[qp - QP_BD_OFFSET] * 256. + 0.5);
57	4.81k	lambda = (int32_t)(x265_lambda_tab[qp - QP_BD_OFFSET] * 256. + 0.5);
58	4.81k	X265_CHECK((x265_lambda_tab[qp - QP_BD_OFFSET] * 256. + 0.5) < (double)MAX_INT, "x265_lambda_tab[] value too large\n");
59	4.81k	}
60	84.5k	}
61		};
62
63		// NOTE: MUST be 16-byte aligned for asm code
64		struct NoiseReduction
65		{
66		/* 0 = luma 4x4, 1 = luma 8x8, 2 = luma 16x16, 3 = luma 32x32
67		* 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma 32x32
68		* Intra 0..7 - Inter 8..15 */
69		ALIGN_VAR_16(uint32_t, nrResidualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]);
70		uint32_t nrCount[MAX_NUM_TR_CATEGORIES];
71		uint16_t nrOffsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
72		uint16_t (*offset)[MAX_NUM_TR_COEFFS];
73		uint32_t (*residualSum)[MAX_NUM_TR_COEFFS];
74		uint32_t *count;
75		};
76
77		class Quant
78		{
79		protected:
80
81		const ScalingList* m_scalingList;
82		Entropy* m_entropyCoder;
83
84		QpParam m_qpParam[3];
85
86		int m_rdoqLevel;
87		int32_t m_psyRdoqScale; // dynamic range [0,50] * 256 = 14-bits
88		int16_t* m_resiDctCoeff;
89		int16_t* m_fencDctCoeff;
90		int16_t* m_fencShortBuf;
91
92		enum { IEP_RATE = 32768 }; /* FIX15 cost of an equal probable bit */
93
94		public:
95
96		NoiseReduction* m_nr;
97		NoiseReduction* m_frameNr; // Array of NR structures, one for each frameEncoder
98
99		Quant();
100		~Quant();
101
102		/* one-time setup */
103		bool init(double psyScale, const ScalingList& scalingList, Entropy& entropy);
104		bool allocNoiseReduction(const x265_param& param);
105
106		/* CU setup */
107		void setQPforQuant(const CUData& ctu, int qp);
108
109		uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
110		uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
111
112		void invtransformNxN(const CUData& cu, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
113		uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
114		uint64_t ssimDistortion(const CUData& cu, const pixel* fenc, uint32_t fStride, const pixel* recon, intptr_t rstride,
115		uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx);
116
117		/* Pattern decision for context derivation process of significant_coeff_flag */
118		static uint32_t calcPatternSigCtx(uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t cgBlkPos, uint32_t trSizeCG)
119	191k	{
120	191k	if (trSizeCG == 1)
121	33.4k	return 0;
122
123	158k	X265_CHECK(trSizeCG <= 8, "transform CG is too large\n");
124	158k	X265_CHECK(cgBlkPos < 64, "cgBlkPos is too large\n");
125		// NOTE: cgBlkPos+1 may more than 63, it is invalid for shift,
126		// but in this case, both cgPosX and cgPosY equal to (trSizeCG - 1),
127		// the sigRight and sigLower will clear value to zero, the final result will be correct
128	158k	const uint32_t sigPos = (uint32_t)(sigCoeffGroupFlag64 >> (cgBlkPos + 1)); // just need lowest 7-bits valid
129
130		// TODO: instruction BT is faster, but _bittest64 still generate instruction 'BT m, r' in VS2012
131	158k	const uint32_t sigRight = (cgPosX != (trSizeCG - 1)) & sigPos;
132	158k	const uint32_t sigLower = (cgPosY != (trSizeCG - 1)) & (sigPos >> (trSizeCG - 1));
133	158k	return sigRight + sigLower * 2;
134	191k	}
135
136		/* Context derivation process of coeff_abs_significant_flag */
137		static uint32_t getSigCoeffGroupCtxInc(uint64_t cgGroupMask, uint32_t cgPosX, uint32_t cgPosY, uint32_t cgBlkPos, uint32_t trSizeCG)
138	110k	{
139	110k	X265_CHECK(cgBlkPos < 64, "cgBlkPos is too large\n");
140		// NOTE: unsafe shift operator, see NOTE in calcPatternSigCtx
141	110k	const uint32_t sigPos = (uint32_t)(cgGroupMask >> (cgBlkPos + 1)); // just need lowest 8-bits valid
142	110k	const uint32_t sigRight = (cgPosX != (trSizeCG - 1)) & sigPos;
143	110k	const uint32_t sigLower = (cgPosY != (trSizeCG - 1)) & (sigPos >> (trSizeCG - 1));
144
145	110k	return (sigRight \| sigLower);
146	110k	}
147
148		/* static methods shared with entropy.cpp */
149		static uint32_t getSigCtxInc(uint32_t patternSigCtx, uint32_t log2TrSize, uint32_t trSize, uint32_t blkPos, bool bIsLuma, uint32_t firstSignificanceMapContext);
150
151		protected:
152
153		void setChromaQP(int qpin, TextType ttype, int chFmt);
154
155		uint32_t signBitHidingHDQ(int16_t* qcoeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codingParameters, uint32_t log2TrSize);
156
157		template<uint32_t log2TrSize>
158		uint32_t rdoQuant(const CUData& cu, int16_t* dstCoeff, TextType ttype, uint32_t absPartIdx, bool usePsy);
159
160		public:
161		typedef uint32_t (Quant::rdoQuant_t)(const CUData& cu, int16_t dstCoeff, TextType ttype, uint32_t absPartIdx, bool usePsy);
162
163		private:
164		static rdoQuant_t rdoQuant_func[NUM_CU_DEPTH];
165		};
166		}
167
168		#endif // ifndef X265_QUANT_H

Coverage Report

Created: 2022-08-24 06:15