/src/Simd/src/Simd/SimdBaseSynetScale16b.cpp
Line | Count | Source |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2025 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdSynetScale16b.h" |
25 | | #include "Simd/SimdSynetAdd16bCommon.h" |
26 | | |
27 | | namespace Simd |
28 | | { |
29 | | #if defined(SIMD_SYNET_ENABLE) |
30 | | |
31 | | SynetScale16b::SynetScale16b(const Scale16bParam& p) |
32 | 0 | : _param(p) |
33 | 0 | { |
34 | |
|
35 | 0 | } |
36 | | |
37 | | //------------------------------------------------------------------------------------------------- |
38 | | |
39 | | namespace Base |
40 | | { |
41 | | template<class S, class D> void SynetNormBias16b(const uint8_t* src8, size_t channels, size_t spatial, SimdTensorFormatType format, const float* norm, const float* bias, uint8_t* dst8) |
42 | 0 | { |
43 | 0 | const S* src = (const S*)src8; |
44 | 0 | D* dst = (D*)dst8; |
45 | 0 | if (format == SimdTensorFormatNchw) |
46 | 0 | { |
47 | 0 | for (size_t c = 0; c < channels; ++c) |
48 | 0 | { |
49 | 0 | float _norm = norm[c]; |
50 | 0 | float _bias = bias[c]; |
51 | 0 | for (size_t s = 0; s < spatial; ++s) |
52 | 0 | NormBias16b<S, D>(src[s], _norm, _bias, dst[s]); |
53 | 0 | src += spatial; |
54 | 0 | dst += spatial; |
55 | 0 | } |
56 | 0 | } |
57 | 0 | else if (format == SimdTensorFormatNhwc) |
58 | 0 | { |
59 | 0 | for (size_t s = 0; s < spatial; ++s) |
60 | 0 | { |
61 | 0 | for (size_t c = 0; c < channels; ++c) |
62 | 0 | NormBias16b<S, D>(src[c], norm[c], bias[c], dst[c]); |
63 | 0 | src += channels; |
64 | 0 | dst += channels; |
65 | 0 | } |
66 | 0 | } |
67 | 0 | else |
68 | 0 | assert(0); |
69 | 0 | } Unexecuted instantiation: void Simd::Base::SynetNormBias16b<float, float>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: void Simd::Base::SynetNormBias16b<float, unsigned short>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: void Simd::Base::SynetNormBias16b<unsigned short, float>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: void Simd::Base::SynetNormBias16b<unsigned short, unsigned short>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) |
70 | | |
71 | | template<class S, class D> void SynetNorm16b(const uint8_t* src8, size_t channels, size_t spatial, SimdTensorFormatType format, const float* norm, const float* bias, uint8_t* dst8) |
72 | 0 | { |
73 | 0 | const S* src = (const S*)src8; |
74 | 0 | D* dst = (D*)dst8; |
75 | 0 | if (format == SimdTensorFormatNchw) |
76 | 0 | { |
77 | 0 | for (size_t c = 0; c < channels; ++c) |
78 | 0 | { |
79 | 0 | float _norm = norm[c]; |
80 | 0 | for (size_t s = 0; s < spatial; ++s) |
81 | 0 | Norm16b<S, D>(src[s], _norm, dst[s]); |
82 | 0 | src += spatial; |
83 | 0 | dst += spatial; |
84 | 0 | } |
85 | 0 | } |
86 | 0 | else if (format == SimdTensorFormatNhwc) |
87 | 0 | { |
88 | 0 | for (size_t s = 0; s < spatial; ++s) |
89 | 0 | { |
90 | 0 | for (size_t c = 0; c < channels; ++c) |
91 | 0 | Norm16b<S, D>(src[c], norm[c], dst[c]); |
92 | 0 | src += channels; |
93 | 0 | dst += channels; |
94 | 0 | } |
95 | 0 | } |
96 | 0 | else |
97 | 0 | assert(0); |
98 | 0 | } Unexecuted instantiation: void Simd::Base::SynetNorm16b<float, float>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: void Simd::Base::SynetNorm16b<float, unsigned short>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: void Simd::Base::SynetNorm16b<unsigned short, float>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: void Simd::Base::SynetNorm16b<unsigned short, unsigned short>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) |
99 | | |
100 | | template<class S, class D> void SynetBias16b(const uint8_t* src8, size_t channels, size_t spatial, SimdTensorFormatType format, const float* norm, const float* bias, uint8_t* dst8) |
101 | 0 | { |
102 | 0 | const S* src = (const S*)src8; |
103 | 0 | D* dst = (D*)dst8; |
104 | 0 | if (format == SimdTensorFormatNchw) |
105 | 0 | { |
106 | 0 | for (size_t c = 0; c < channels; ++c) |
107 | 0 | { |
108 | 0 | float _bias = bias[c]; |
109 | 0 | for (size_t s = 0; s < spatial; ++s) |
110 | 0 | Bias16b<S, D>(src[s], _bias, dst[s]); |
111 | 0 | src += spatial; |
112 | 0 | dst += spatial; |
113 | 0 | } |
114 | 0 | } |
115 | 0 | else if (format == SimdTensorFormatNhwc) |
116 | 0 | { |
117 | 0 | for (size_t s = 0; s < spatial; ++s) |
118 | 0 | { |
119 | 0 | for (size_t c = 0; c < channels; ++c) |
120 | 0 | Bias16b<S, D>(src[c], bias[c], dst[c]); |
121 | 0 | src += channels; |
122 | 0 | dst += channels; |
123 | 0 | } |
124 | 0 | } |
125 | 0 | else |
126 | 0 | assert(0); |
127 | 0 | } Unexecuted instantiation: void Simd::Base::SynetBias16b<float, float>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: void Simd::Base::SynetBias16b<float, unsigned short>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: void Simd::Base::SynetBias16b<unsigned short, float>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: void Simd::Base::SynetBias16b<unsigned short, unsigned short>(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) |
128 | | |
129 | | //------------------------------------------------------------------------------------------------- |
130 | | |
131 | | template<class S, class D> static SynetScale16b::WorkerPtr GetScale16bWorker(SimdBool norm, SimdBool bias) |
132 | 0 | { |
133 | 0 | if (norm) |
134 | 0 | return bias ? SynetNormBias16b<S, D> : SynetNorm16b<S, D>; |
135 | 0 | else |
136 | 0 | return bias ? SynetBias16b<S, D> : NULL; |
137 | 0 | } Unexecuted instantiation: SimdBaseSynetScale16b.cpp:void (*Simd::Base::GetScale16bWorker<float, float>(SimdBool, SimdBool))(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: SimdBaseSynetScale16b.cpp:void (*Simd::Base::GetScale16bWorker<float, unsigned short>(SimdBool, SimdBool))(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: SimdBaseSynetScale16b.cpp:void (*Simd::Base::GetScale16bWorker<unsigned short, float>(SimdBool, SimdBool))(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: SimdBaseSynetScale16b.cpp:void (*Simd::Base::GetScale16bWorker<unsigned short, unsigned short>(SimdBool, SimdBool))(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) |
138 | | |
139 | | template<class S> static SynetScale16b::WorkerPtr GetScale16bWorker(SimdTensorDataType dType, SimdBool norm, SimdBool bias) |
140 | 0 | { |
141 | 0 | switch (dType) |
142 | 0 | { |
143 | 0 | case SimdTensorData32f: return GetScale16bWorker<S, float>(norm, bias); |
144 | 0 | case SimdTensorData16b: return GetScale16bWorker<S, uint16_t>(norm, bias); |
145 | 0 | default: |
146 | 0 | return NULL; |
147 | 0 | } |
148 | 0 | } Unexecuted instantiation: SimdBaseSynetScale16b.cpp:void (*Simd::Base::GetScale16bWorker<float>(SimdTensorDataType, SimdBool, SimdBool))(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) Unexecuted instantiation: SimdBaseSynetScale16b.cpp:void (*Simd::Base::GetScale16bWorker<unsigned short>(SimdTensorDataType, SimdBool, SimdBool))(unsigned char const*, unsigned long, unsigned long, SimdTensorFormatType, float const*, float const*, unsigned char*) |
149 | | |
150 | | static SynetScale16b::WorkerPtr GetScale16bWorker(SimdTensorDataType sType, SimdTensorDataType dType, SimdBool norm, SimdBool bias) |
151 | 0 | { |
152 | 0 | switch (sType) |
153 | 0 | { |
154 | 0 | case SimdTensorData32f: return GetScale16bWorker<float>(dType, norm, bias); |
155 | 0 | case SimdTensorData16b: return GetScale16bWorker<uint16_t>(dType, norm, bias); |
156 | 0 | default: |
157 | 0 | return NULL; |
158 | 0 | } |
159 | 0 | } |
160 | | |
161 | | //------------------------------------------------------------------------------------------------- |
162 | | |
163 | | SynetScale16b::SynetScale16b(const Scale16bParam& p) |
164 | 0 | : Simd::SynetScale16b(p) |
165 | 0 | , _worker(NULL) |
166 | 0 | { |
167 | 0 | _worker = GetScale16bWorker(p.sType, p.dType, p.norm, p.bias); |
168 | 0 | } |
169 | | |
170 | | bool SynetScale16b::Preferable(const Scale16bParam& p) |
171 | 0 | { |
172 | 0 | return true; |
173 | 0 | } |
174 | | |
175 | | void SynetScale16b::Forward(const uint8_t* src, const float* norm, const float* bias, uint8_t* dst) |
176 | 0 | { |
177 | 0 | _worker(src, _param.channels, _param.spatial, _param.format, norm, bias, dst); |
178 | 0 | } |
179 | | |
180 | | //------------------------------------------------------------------------------------------------- |
181 | | |
182 | | void* SynetScale16bInit(size_t channels, size_t spatial, SimdTensorDataType srcType, SimdTensorDataType dstType, SimdTensorFormatType format, SimdBool norm, SimdBool bias) |
183 | 0 | { |
184 | 0 | Scale16bParam param(channels, spatial, srcType, dstType, format, norm, bias); |
185 | 0 | if (!param.Valid()) |
186 | 0 | return NULL; |
187 | 0 | if (SynetScale16b::Preferable(param)) |
188 | 0 | return new SynetScale16b(param); |
189 | 0 | return NULL; |
190 | 0 | } |
191 | | } |
192 | | #endif |
193 | | } |