/src/Simd/src/Simd/SimdAvx2SynetQuantizedAdd.cpp
Line | Count | Source |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2025 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdSynetQuantizedAdd.h" |
25 | | #include "Simd/SimdSynetQuantizedAddCommon.h" |
26 | | #include "Simd/SimdStore.h" |
27 | | #include "Simd/SimdFmadd.h" |
28 | | |
29 | | namespace Simd |
30 | | { |
31 | | #if defined(SIMD_AVX2_ENABLE) && defined(SIMD_SYNET_ENABLE) |
32 | | namespace Avx2 |
33 | | { |
34 | | static void QuantizedAddUniform8u8u8u(const uint8_t* a, float aScale, int aZero, const uint8_t* b, float bScale, int bZero, size_t size, const float*, float dScale, int dZero, uint8_t* dst) |
35 | 0 | { |
36 | 0 | float adScale = aScale / dScale; |
37 | 0 | float bdScale = bScale / dScale; |
38 | 0 | float term = float(dZero) - (adScale * float(aZero) + bdScale * float(bZero)); |
39 | 0 | __m256 _adScale = _mm256_set1_ps(adScale), _bdScale = _mm256_set1_ps(bdScale), _term = _mm256_set1_ps(term); |
40 | 0 | size_t i = 0, size4 = AlignLo(size, 4), size16 = AlignLo(size, 16), size32 = AlignLo(size, 32); |
41 | 0 | for (; i < size32; i += 32) |
42 | 0 | QuantizedAdd8u8u8u32(a + i, _adScale, b + i, _bdScale, _term, dst + i); |
43 | 0 | for (; i < size16; i += 16) |
44 | 0 | QuantizedAdd8u8u8u16(a + i, _adScale, b + i, _bdScale, _term, dst + i); |
45 | 0 | for (; i < size4; i += 4) |
46 | 0 | QuantizedAdd8u8u8u4(a + i, _adScale, b + i, _bdScale, _term, dst + i); |
47 | 0 | for (; i < size; i += 1) |
48 | 0 | QuantizedAdd8u8u8u1(a + i, _adScale, b + i, _bdScale, _term, dst + i); |
49 | 0 | } |
50 | | |
51 | | static SynetQuantizedAddUniform::UniformPtr GetQuantizedAddUniform8u8u8u(SimdConvolutionActivationType type) |
52 | 0 | { |
53 | 0 | switch (type) |
54 | 0 | { |
55 | 0 | case SimdConvolutionActivationIdentity: |
56 | 0 | case SimdConvolutionActivationRelu: return QuantizedAddUniform8u8u8u; |
57 | 0 | default: |
58 | 0 | return NULL; |
59 | 0 | } |
60 | 0 | } |
61 | | |
62 | | //------------------------------------------------------------------------------------------------- |
63 | | |
64 | | SynetQuantizedAddUniform::SynetQuantizedAddUniform(const QuantizedAddParam& p) |
65 | 0 | : Sse41::SynetQuantizedAddUniform(p) |
66 | 0 | { |
67 | 0 | if(p.aType == SimdTensorData8u && p.bType == SimdTensorData8u && p.dType == SimdTensorData8u) |
68 | 0 | _uniform = GetQuantizedAddUniform8u8u8u(p.actType); |
69 | 0 | } |
70 | | |
71 | | //------------------------------------------------------------------------------------------------- |
72 | | |
73 | | void* SynetQuantizedAddInit(const size_t* aShape, size_t aCount, SimdTensorDataType aType, const float* aScale, int32_t aZero, |
74 | | const size_t* bShape, size_t bCount, SimdTensorDataType bType, const float* bScale, int32_t bZero, |
75 | | SimdConvolutionActivationType actType, const float* actParams, SimdTensorDataType dstType, const float* dstScale, int32_t dstZero) |
76 | 0 | { |
77 | 0 | QuantizedAddParam param(aShape, aCount, aType, aScale, aZero, bShape, bCount, bType, bScale, bZero, actType, actParams, dstType, dstScale, dstZero); |
78 | 0 | if (!param.Valid()) |
79 | 0 | return NULL; |
80 | 0 | if (SynetQuantizedAddUniform::Preferable(param)) |
81 | 0 | return new SynetQuantizedAddUniform(param); |
82 | 0 | return NULL; |
83 | 0 | } |
84 | | } |
85 | | #endif |
86 | | } |