/src/fftw3/kernel/tensor7.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | #include "kernel/ifftw.h" |
23 | | |
24 | | static int signof(INT x) |
25 | 5.35k | { |
26 | 5.35k | if (x < 0) return -1; |
27 | 2.59k | if (x == 0) return 0; |
28 | 2.59k | /* if (x > 0) */ return 1; |
29 | 2.59k | } |
30 | | |
31 | | /* total order among iodim's */ |
32 | | int X(dimcmp)(const iodim *a, const iodim *b) |
33 | 1.16k | { |
34 | 1.16k | INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); |
35 | 1.16k | INT sao = X(iabs)(a->os), sbo = X(iabs)(b->os); |
36 | 1.16k | INT sam = X(imin)(sai, sao), sbm = X(imin)(sbi, sbo); |
37 | | |
38 | | /* in descending order of min{istride, ostride} */ |
39 | 1.16k | if (sam != sbm) |
40 | 779 | return signof(sbm - sam); |
41 | | |
42 | | /* in case of a tie, in descending order of istride */ |
43 | 386 | if (sbi != sai) |
44 | 386 | return signof(sbi - sai); |
45 | | |
46 | | /* in case of a tie, in descending order of ostride */ |
47 | 0 | if (sbo != sao) |
48 | 0 | return signof(sbo - sao); |
49 | | |
50 | | /* in case of a tie, in ascending order of n */ |
51 | 0 | return signof(a->n - b->n); |
52 | 0 | } |
53 | | |
54 | | static void canonicalize(tensor *x) |
55 | 5.35k | { |
56 | 5.35k | if (x->rnk > 1) { |
57 | 906 | qsort(x->dims, (unsigned)x->rnk, sizeof(iodim), |
58 | 906 | (int (*)(const void *, const void *))X(dimcmp)); |
59 | 906 | } |
60 | 5.35k | } |
61 | | |
62 | | static int compare_by_istride(const iodim *a, const iodim *b) |
63 | 4.18k | { |
64 | 4.18k | INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is); |
65 | | |
66 | | /* in descending order of istride */ |
67 | 4.18k | return signof(sbi - sai); |
68 | 4.18k | } |
69 | | |
70 | | static tensor *really_compress(const tensor *sz) |
71 | 8.15k | { |
72 | 8.15k | int i, rnk; |
73 | 8.15k | tensor *x; |
74 | | |
75 | 8.15k | A(FINITE_RNK(sz->rnk)); |
76 | 21.0k | for (i = rnk = 0; i < sz->rnk; ++i) { |
77 | 12.8k | A(sz->dims[i].n > 0); |
78 | 12.8k | if (sz->dims[i].n != 1) |
79 | 10.1k | ++rnk; |
80 | 12.8k | } |
81 | | |
82 | 8.15k | x = X(mktensor)(rnk); |
83 | 21.0k | for (i = rnk = 0; i < sz->rnk; ++i) { |
84 | 12.8k | if (sz->dims[i].n != 1) |
85 | 10.1k | x->dims[rnk++] = sz->dims[i]; |
86 | 12.8k | } |
87 | 8.15k | return x; |
88 | 8.15k | } |
89 | | |
90 | | /* Like tensor_copy, but eliminate n == 1 dimensions, which |
91 | | never affect any transform or transform vector. |
92 | | |
93 | | Also, we sort the tensor into a canonical order of decreasing |
94 | | strides (see X(dimcmp) for an exact definition). In general, |
95 | | processing a loop/array in order of decreasing stride will improve |
96 | | locality. Both forward and backwards traversal of the tensor are |
97 | | considered e.g. by vrank-geq1, so sorting in increasing |
98 | | vs. decreasing order is not really important. */ |
99 | | tensor *X(tensor_compress)(const tensor *sz) |
100 | 3.01k | { |
101 | 3.01k | tensor *x = really_compress(sz); |
102 | 3.01k | canonicalize(x); |
103 | 3.01k | return x; |
104 | 3.01k | } |
105 | | |
106 | | /* Return whether the strides of a and b are such that they form an |
107 | | effective contiguous 1d array. Assumes that a.is >= b.is. */ |
108 | | static int strides_contig(iodim *a, iodim *b) |
109 | 6.59k | { |
110 | 6.59k | return (a->is == b->is * b->n && a->os == b->os * b->n); |
111 | 6.59k | } |
112 | | |
113 | | /* Like tensor_compress, but also compress into one dimension any |
114 | | group of dimensions that form a contiguous block of indices with |
115 | | some stride. (This can safely be done for transform vector sizes.) */ |
116 | | tensor *X(tensor_compress_contiguous)(const tensor *sz) |
117 | 5.34k | { |
118 | 5.34k | int i, rnk; |
119 | 5.34k | tensor *sz2, *x; |
120 | | |
121 | 5.34k | if (X(tensor_sz)(sz) == 0) |
122 | 211 | return X(mktensor)(RNK_MINFTY); |
123 | | |
124 | 5.13k | sz2 = really_compress(sz); |
125 | 5.13k | A(FINITE_RNK(sz2->rnk)); |
126 | | |
127 | 5.13k | if (sz2->rnk <= 1) { /* nothing to compress. */ |
128 | 2.79k | if (0) { |
129 | | /* this call is redundant, because "sz->rnk <= 1" implies |
130 | | that the tensor is already canonical, but I am writing |
131 | | it explicitly because "logically" we need to canonicalize |
132 | | the tensor before returning. */ |
133 | 0 | canonicalize(sz2); |
134 | 0 | } |
135 | 2.79k | return sz2; |
136 | 2.79k | } |
137 | | |
138 | | /* sort in descending order of |istride|, so that compressible |
139 | | dimensions appear contigously */ |
140 | 2.33k | qsort(sz2->dims, (unsigned)sz2->rnk, sizeof(iodim), |
141 | 2.33k | (int (*)(const void *, const void *))compare_by_istride); |
142 | | |
143 | | /* compute what the rank will be after compression */ |
144 | 5.63k | for (i = rnk = 1; i < sz2->rnk; ++i) |
145 | 3.29k | if (!strides_contig(sz2->dims + i - 1, sz2->dims + i)) |
146 | 1.14k | ++rnk; |
147 | | |
148 | | /* merge adjacent dimensions whenever possible */ |
149 | 2.33k | x = X(mktensor)(rnk); |
150 | 2.33k | x->dims[0] = sz2->dims[0]; |
151 | 5.63k | for (i = rnk = 1; i < sz2->rnk; ++i) { |
152 | 3.29k | if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) { |
153 | 2.15k | x->dims[rnk - 1].n *= sz2->dims[i].n; |
154 | 2.15k | x->dims[rnk - 1].is = sz2->dims[i].is; |
155 | 2.15k | x->dims[rnk - 1].os = sz2->dims[i].os; |
156 | 2.15k | } else { |
157 | 1.14k | A(rnk < x->rnk); |
158 | 1.14k | x->dims[rnk++] = sz2->dims[i]; |
159 | 1.14k | } |
160 | 3.29k | } |
161 | | |
162 | 2.33k | X(tensor_destroy)(sz2); |
163 | | |
164 | | /* reduce to canonical form */ |
165 | 2.33k | canonicalize(x); |
166 | 2.33k | return x; |
167 | 5.13k | } |
168 | | |
169 | | /* The inverse of X(tensor_append): splits the sz tensor into |
170 | | tensor a followed by tensor b, where a's rank is arnk. */ |
171 | | void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b) |
172 | 0 | { |
173 | 0 | A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk)); |
174 | |
|
175 | 0 | *a = X(tensor_copy_sub)(sz, 0, arnk); |
176 | 0 | *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk); |
177 | 0 | } |
178 | | |
179 | | /* TRUE if the two tensors are equal */ |
180 | | int X(tensor_equal)(const tensor *a, const tensor *b) |
181 | 966 | { |
182 | 966 | if (a->rnk != b->rnk) |
183 | 0 | return 0; |
184 | | |
185 | 966 | if (FINITE_RNK(a->rnk)) { |
186 | 961 | int i; |
187 | 1.99k | for (i = 0; i < a->rnk; ++i) |
188 | 1.03k | if (0 |
189 | 1.03k | || a->dims[i].n != b->dims[i].n |
190 | 1.03k | || a->dims[i].is != b->dims[i].is |
191 | 1.03k | || a->dims[i].os != b->dims[i].os |
192 | 1.03k | ) |
193 | 0 | return 0; |
194 | 961 | } |
195 | | |
196 | 966 | return 1; |
197 | 966 | } |
198 | | |
199 | | /* TRUE if the sets of input and output locations described by |
200 | | (append sz vecsz) are the same */ |
201 | | int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz) |
202 | 966 | { |
203 | 966 | tensor *t = X(tensor_append)(sz, vecsz); |
204 | 966 | tensor *ti = X(tensor_copy_inplace)(t, INPLACE_IS); |
205 | 966 | tensor *to = X(tensor_copy_inplace)(t, INPLACE_OS); |
206 | 966 | tensor *tic = X(tensor_compress_contiguous)(ti); |
207 | 966 | tensor *toc = X(tensor_compress_contiguous)(to); |
208 | | |
209 | 966 | int retval = X(tensor_equal)(tic, toc); |
210 | | |
211 | 966 | X(tensor_destroy)(t); |
212 | 966 | X(tensor_destroy4)(ti, to, tic, toc); |
213 | | |
214 | 966 | return retval; |
215 | 966 | } |