Line | Count | Source |
1 | | /* Copyright (c) 2008-2011 Octasic Inc. |
2 | | 2012-2017 Jean-Marc Valin */ |
3 | | /* |
4 | | Redistribution and use in source and binary forms, with or without |
5 | | modification, are permitted provided that the following conditions |
6 | | are met: |
7 | | |
8 | | - Redistributions of source code must retain the above copyright |
9 | | notice, this list of conditions and the following disclaimer. |
10 | | |
11 | | - Redistributions in binary form must reproduce the above copyright |
12 | | notice, this list of conditions and the following disclaimer in the |
13 | | documentation and/or other materials provided with the distribution. |
14 | | |
15 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
16 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
17 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
18 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
19 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
20 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
21 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
22 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
23 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
24 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #ifdef HAVE_CONFIG_H |
29 | | #include "config.h" |
30 | | #endif |
31 | | |
32 | | #include <math.h> |
33 | | #include "opus_types.h" |
34 | | #include "opus_defines.h" |
35 | | #include "arch.h" |
36 | | #include "mlp.h" |
37 | | |
38 | 65.8M | #define fmadd(a, b, c) ((a)*(b)+(c)) |
39 | | static OPUS_INLINE float tansig_approx(float x) |
40 | 32.9M | { |
41 | 32.9M | const float N0 = 952.52801514f; |
42 | 32.9M | const float N1 = 96.39235687f; |
43 | 32.9M | const float N2 = 0.60863042f; |
44 | 32.9M | const float D0 = 952.72399902f; |
45 | 32.9M | const float D1 = 413.36801147f; |
46 | 32.9M | const float D2 = 11.88600922f; |
47 | 32.9M | float X2, num, den; |
48 | 32.9M | X2 = x*x; |
49 | 32.9M | num = fmadd(fmadd(N2, X2, N1), X2, N0); |
50 | 32.9M | den = fmadd(fmadd(D2, X2, D1), X2, D0); |
51 | 32.9M | num = num*x/den; |
52 | 32.9M | return MAX32(-1.f, MIN32(1.f, num)); |
53 | 32.9M | } |
54 | | |
55 | | static OPUS_INLINE float sigmoid_approx(float x) |
56 | 15.5M | { |
57 | 15.5M | return .5f + .5f*tansig_approx(.5f*x); |
58 | 15.5M | } |
59 | | |
60 | | static void gemm_accum(float *out, const opus_int8 *weights, int rows, int cols, int col_stride, const float *x) |
61 | 2.48M | { |
62 | 2.48M | int i, j; |
63 | 57.8M | for (i=0;i<rows;i++) |
64 | 55.3M | { |
65 | 1.57G | for (j=0;j<cols;j++) |
66 | 1.51G | out[i] += weights[j*col_stride + i]*x[j]; |
67 | 55.3M | } |
68 | 2.48M | } |
69 | | |
70 | | void analysis_compute_dense(const AnalysisDenseLayer *layer, float *output, const float *input) |
71 | 621k | { |
72 | 621k | int i; |
73 | 621k | int N, M; |
74 | 621k | int stride; |
75 | 621k | M = layer->nb_inputs; |
76 | 621k | N = layer->nb_neurons; |
77 | 621k | stride = N; |
78 | 11.1M | for (i=0;i<N;i++) |
79 | 10.5M | output[i] = layer->bias[i]; |
80 | 621k | gemm_accum(output, layer->input_weights, N, M, stride, input); |
81 | 11.1M | for (i=0;i<N;i++) |
82 | 10.5M | output[i] *= WEIGHTS_SCALE; |
83 | 621k | if (layer->sigmoid) { |
84 | 932k | for (i=0;i<N;i++) |
85 | 621k | output[i] = sigmoid_approx(output[i]); |
86 | 310k | } else { |
87 | 10.2M | for (i=0;i<N;i++) |
88 | 9.94M | output[i] = tansig_approx(output[i]); |
89 | 310k | } |
90 | 621k | } |
91 | | |
92 | | void analysis_compute_gru(const AnalysisGRULayer *gru, float *state, const float *input) |
93 | 310k | { |
94 | 310k | int i; |
95 | 310k | int N, M; |
96 | 310k | int stride; |
97 | 310k | float tmp[MAX_NEURONS]; |
98 | 310k | float z[MAX_NEURONS]; |
99 | 310k | float r[MAX_NEURONS]; |
100 | 310k | float h[MAX_NEURONS]; |
101 | 310k | M = gru->nb_inputs; |
102 | 310k | N = gru->nb_neurons; |
103 | 310k | stride = 3*N; |
104 | | /* Compute update gate. */ |
105 | 7.77M | for (i=0;i<N;i++) |
106 | 7.45M | z[i] = gru->bias[i]; |
107 | 310k | gemm_accum(z, gru->input_weights, N, M, stride, input); |
108 | 310k | gemm_accum(z, gru->recurrent_weights, N, N, stride, state); |
109 | 7.77M | for (i=0;i<N;i++) |
110 | 7.45M | z[i] = sigmoid_approx(WEIGHTS_SCALE*z[i]); |
111 | | |
112 | | /* Compute reset gate. */ |
113 | 7.77M | for (i=0;i<N;i++) |
114 | 7.45M | r[i] = gru->bias[N + i]; |
115 | 310k | gemm_accum(r, &gru->input_weights[N], N, M, stride, input); |
116 | 310k | gemm_accum(r, &gru->recurrent_weights[N], N, N, stride, state); |
117 | 7.77M | for (i=0;i<N;i++) |
118 | 7.45M | r[i] = sigmoid_approx(WEIGHTS_SCALE*r[i]); |
119 | | |
120 | | /* Compute output. */ |
121 | 7.77M | for (i=0;i<N;i++) |
122 | 7.45M | h[i] = gru->bias[2*N + i]; |
123 | 7.77M | for (i=0;i<N;i++) |
124 | 7.45M | tmp[i] = state[i] * r[i]; |
125 | 310k | gemm_accum(h, &gru->input_weights[2*N], N, M, stride, input); |
126 | 310k | gemm_accum(h, &gru->recurrent_weights[2*N], N, N, stride, tmp); |
127 | 7.77M | for (i=0;i<N;i++) |
128 | 7.45M | h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*h[i]); |
129 | 7.77M | for (i=0;i<N;i++) |
130 | 7.45M | state[i] = h[i]; |
131 | 310k | } |