/src/Simd/src/Simd/SimdAvx2SynetQuantizedAdd.cpp

Source
/*
* Simd Library (http://ermig1979.github.io/Simd).
*
* Copyright (c) 2011-2025 Yermalayeu Ihar.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "Simd/SimdSynetQuantizedAdd.h"
#include "Simd/SimdSynetQuantizedAddCommon.h"
#include "Simd/SimdStore.h"
#include "Simd/SimdFmadd.h"

namespace Simd
{
#if defined(SIMD_AVX2_ENABLE) && defined(SIMD_SYNET_ENABLE)   
    namespace Avx2
    {
        static void QuantizedAddUniform8u8u8u(const uint8_t* a, float aScale, int aZero, const uint8_t* b, float bScale, int bZero, size_t size, const float*, float dScale, int dZero, uint8_t* dst)
        {
            float adScale = aScale / dScale;
            float bdScale = bScale / dScale;
            float term = float(dZero) - (adScale * float(aZero) + bdScale * float(bZero));
            __m256 _adScale = _mm256_set1_ps(adScale), _bdScale = _mm256_set1_ps(bdScale), _term = _mm256_set1_ps(term);
            size_t i = 0, size4 = AlignLo(size, 4), size16 = AlignLo(size, 16), size32 = AlignLo(size, 32);
            for (; i < size32; i += 32)
                QuantizedAdd8u8u8u32(a + i, _adScale, b + i, _bdScale, _term, dst + i);
            for (; i < size16; i += 16)
                QuantizedAdd8u8u8u16(a + i, _adScale, b + i, _bdScale, _term, dst + i);
            for (; i < size4; i += 4)
                QuantizedAdd8u8u8u4(a + i, _adScale, b + i, _bdScale, _term, dst + i);
            for (; i < size; i += 1)
                QuantizedAdd8u8u8u1(a + i, _adScale, b + i, _bdScale, _term, dst + i);
        }

        static SynetQuantizedAddUniform::UniformPtr GetQuantizedAddUniform8u8u8u(SimdConvolutionActivationType type)
        {
            switch (type)
            {
            case SimdConvolutionActivationIdentity:
            case SimdConvolutionActivationRelu: return QuantizedAddUniform8u8u8u;
            default:
                return NULL;
            }
        }

        //-------------------------------------------------------------------------------------------------

        SynetQuantizedAddUniform::SynetQuantizedAddUniform(const QuantizedAddParam& p)
            : Sse41::SynetQuantizedAddUniform(p)
        {
            if(p.aType == SimdTensorData8u && p.bType == SimdTensorData8u && p.dType == SimdTensorData8u)
                _uniform = GetQuantizedAddUniform8u8u8u(p.actType);
        }

        //-------------------------------------------------------------------------------------------------

        void* SynetQuantizedAddInit(const size_t* aShape, size_t aCount, SimdTensorDataType aType, const float* aScale, int32_t aZero,
            const size_t* bShape, size_t bCount, SimdTensorDataType bType, const float* bScale, int32_t bZero,
            SimdConvolutionActivationType actType, const float* actParams, SimdTensorDataType dstType, const float* dstScale, int32_t dstZero)
        {
            QuantizedAddParam param(aShape, aCount, aType, aScale, aZero, bShape, bCount, bType, bScale, bZero, actType, actParams, dstType, dstScale, dstZero);
            if (!param.Valid())
                return NULL;
            if (SynetQuantizedAddUniform::Preferable(param))
                return new SynetQuantizedAddUniform(param);
            return NULL;
        }
    }
#endif
}

Coverage Report

Created: 2025-11-16 06:41

Line	Count	Source
1		/*
2		* Simd Library (http://ermig1979.github.io/Simd).
3		*
4		* Copyright (c) 2011-2025 Yermalayeu Ihar.
5		*
6		* Permission is hereby granted, free of charge, to any person obtaining a copy
7		* of this software and associated documentation files (the "Software"), to deal
8		* in the Software without restriction, including without limitation the rights
9		* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10		* copies of the Software, and to permit persons to whom the Software is
11		* furnished to do so, subject to the following conditions:
12		*
13		* The above copyright notice and this permission notice shall be included in
14		* all copies or substantial portions of the Software.
15		*
16		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17		* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18		* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19		* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20		* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21		* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22		* SOFTWARE.
23		*/
24		#include "Simd/SimdSynetQuantizedAdd.h"
25		#include "Simd/SimdSynetQuantizedAddCommon.h"
26		#include "Simd/SimdStore.h"
27		#include "Simd/SimdFmadd.h"
28
29		namespace Simd
30		{
31		#if defined(SIMD_AVX2_ENABLE) && defined(SIMD_SYNET_ENABLE)
32		namespace Avx2
33		{
34		static void QuantizedAddUniform8u8u8u(const uint8_t* a, float aScale, int aZero, const uint8_t* b, float bScale, int bZero, size_t size, const float, float dScale, int dZero, uint8_t dst)
35	0	{
36	0	float adScale = aScale / dScale;
37	0	float bdScale = bScale / dScale;
38	0	float term = float(dZero) - (adScale * float(aZero) + bdScale * float(bZero));
39	0	__m256 _adScale = _mm256_set1_ps(adScale), _bdScale = _mm256_set1_ps(bdScale), _term = _mm256_set1_ps(term);
40	0	size_t i = 0, size4 = AlignLo(size, 4), size16 = AlignLo(size, 16), size32 = AlignLo(size, 32);
41	0	for (; i < size32; i += 32)
42	0	QuantizedAdd8u8u8u32(a + i, _adScale, b + i, _bdScale, _term, dst + i);
43	0	for (; i < size16; i += 16)
44	0	QuantizedAdd8u8u8u16(a + i, _adScale, b + i, _bdScale, _term, dst + i);
45	0	for (; i < size4; i += 4)
46	0	QuantizedAdd8u8u8u4(a + i, _adScale, b + i, _bdScale, _term, dst + i);
47	0	for (; i < size; i += 1)
48	0	QuantizedAdd8u8u8u1(a + i, _adScale, b + i, _bdScale, _term, dst + i);
49	0	}
50
51		static SynetQuantizedAddUniform::UniformPtr GetQuantizedAddUniform8u8u8u(SimdConvolutionActivationType type)
52	0	{
53	0	switch (type)
54	0	{
55	0	case SimdConvolutionActivationIdentity:
56	0	case SimdConvolutionActivationRelu: return QuantizedAddUniform8u8u8u;
57	0	default:
58	0	return NULL;
59	0	}
60	0	}
61
62		//-------------------------------------------------------------------------------------------------
63
64		SynetQuantizedAddUniform::SynetQuantizedAddUniform(const QuantizedAddParam& p)
65	0	: Sse41::SynetQuantizedAddUniform(p)
66	0	{
67	0	if(p.aType == SimdTensorData8u && p.bType == SimdTensorData8u && p.dType == SimdTensorData8u)
68	0	_uniform = GetQuantizedAddUniform8u8u8u(p.actType);
69	0	}
70
71		//-------------------------------------------------------------------------------------------------
72
73		void* SynetQuantizedAddInit(const size_t* aShape, size_t aCount, SimdTensorDataType aType, const float* aScale, int32_t aZero,
74		const size_t* bShape, size_t bCount, SimdTensorDataType bType, const float* bScale, int32_t bZero,
75		SimdConvolutionActivationType actType, const float* actParams, SimdTensorDataType dstType, const float* dstScale, int32_t dstZero)
76	0	{
77	0	QuantizedAddParam param(aShape, aCount, aType, aScale, aZero, bShape, bCount, bType, bScale, bZero, actType, actParams, dstType, dstScale, dstZero);
78	0	if (!param.Valid())
79	0	return NULL;
80	0	if (SynetQuantizedAddUniform::Preferable(param))
81	0	return new SynetQuantizedAddUniform(param);
82	0	return NULL;
83	0	}
84		}
85		#endif
86		}