/src/fftw3/kernel/planner.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2000 Matteo Frigo |
3 | | * Copyright (c) 2000 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | #include "kernel/ifftw.h" |
22 | | #include <string.h> |
23 | | |
24 | | /* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU |
25 | | program in English, because English is the one language that nearly |
26 | | all programmers in all countries can read." |
27 | | |
28 | | ingemisco tanquam reus |
29 | | culpa rubet vultus meus |
30 | | supplicanti parce [rms] |
31 | | */ |
32 | | |
33 | 52.1k | #define VALIDP(solution) ((solution)->flags.hash_info & H_VALID) |
34 | 123k | #define LIVEP(solution) ((solution)->flags.hash_info & H_LIVE) |
35 | 27.9k | #define SLVNDX(solution) ((solution)->flags.slvndx) |
36 | 5.29k | #define BLISS(flags) (((flags).hash_info) & BLESSING) |
37 | 4.34k | #define INFEASIBLE_SLVNDX ((1U<<BITS_FOR_SLVNDX)-1) |
38 | | |
39 | | |
40 | 0 | #define MAXNAM 64 /* maximum length of registrar's name. |
41 | | Used for reading wisdom. There is no point |
42 | | in doing this right */ |
43 | | |
44 | | |
45 | | #ifdef FFTW_DEBUG |
46 | | static void check(hashtab *ht); |
47 | | #endif |
48 | | |
49 | | /* x <= y */ |
50 | 10.3k | #define LEQ(x, y) (((x) & (y)) == (x)) |
51 | | |
52 | | /* A subsumes B */ |
53 | | static int subsumes(const flags_t *a, unsigned slvndx_a, const flags_t *b) |
54 | 2.43k | { |
55 | 2.43k | if (slvndx_a != INFEASIBLE_SLVNDX) { |
56 | 2.41k | A(a->timelimit_impatience == 0); |
57 | 2.41k | return (LEQ(a->u, b->u) && LEQ(b->l, a->l)); |
58 | 2.41k | } else { |
59 | 20 | return (LEQ(a->l, b->l) |
60 | 20 | && a->timelimit_impatience <= b->timelimit_impatience); |
61 | 20 | } |
62 | 2.43k | } |
63 | | |
64 | | static unsigned addmod(unsigned a, unsigned b, unsigned p) |
65 | 55.7k | { |
66 | | /* gcc-2.95/sparc produces incorrect code for the fast version below. */ |
67 | | #if defined(__sparc__) && defined(__GNUC__) |
68 | | /* slow version */ |
69 | | return (a + b) % p; |
70 | | #else |
71 | | /* faster version */ |
72 | 55.7k | unsigned c = a + b; |
73 | 55.7k | return c >= p ? c - p : c; |
74 | 55.7k | #endif |
75 | 55.7k | } |
76 | | |
77 | | /* |
78 | | slvdesc management: |
79 | | */ |
80 | | static void sgrow(planner *ego) |
81 | 25 | { |
82 | 25 | unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4; |
83 | 25 | slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS); |
84 | 25 | slvdesc *otab = ego->slvdescs; |
85 | 25 | unsigned i; |
86 | | |
87 | 25 | ego->slvdescs = ntab; |
88 | 25 | ego->slvdescsiz = nsiz; |
89 | 2.80k | for (i = 0; i < osiz; ++i) |
90 | 2.77k | ntab[i] = otab[i]; |
91 | 25 | X(ifree0)(otab); |
92 | 25 | } |
93 | | |
94 | | static void register_solver(planner *ego, solver *s) |
95 | 613 | { |
96 | 613 | slvdesc *n; |
97 | 613 | int kind; |
98 | | |
99 | 613 | if (s) { /* add s to solver list */ |
100 | 613 | X(solver_use)(s); |
101 | | |
102 | 613 | A(ego->nslvdesc < INFEASIBLE_SLVNDX); |
103 | 613 | if (ego->nslvdesc >= ego->slvdescsiz) |
104 | 25 | sgrow(ego); |
105 | | |
106 | 613 | n = ego->slvdescs + ego->nslvdesc; |
107 | | |
108 | 613 | n->slv = s; |
109 | 613 | n->reg_nam = ego->cur_reg_nam; |
110 | 613 | n->reg_id = ego->cur_reg_id++; |
111 | | |
112 | 613 | A(strlen(n->reg_nam) < MAXNAM); |
113 | 613 | n->nam_hash = X(hash)(n->reg_nam); |
114 | | |
115 | 613 | kind = s->adt->problem_kind; |
116 | 613 | n->next_for_same_problem_kind = ego->slvdescs_for_problem_kind[kind]; |
117 | 613 | ego->slvdescs_for_problem_kind[kind] = (int)/*from unsigned*/ego->nslvdesc; |
118 | | |
119 | 613 | ego->nslvdesc++; |
120 | 613 | } |
121 | 613 | } |
122 | | |
123 | | static unsigned slookup(planner *ego, char *nam, int id) |
124 | 0 | { |
125 | 0 | unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */ |
126 | 0 | FORALL_SOLVERS(ego, s, sp, { |
127 | 0 | UNUSED(s); |
128 | 0 | if (sp->reg_id == id && sp->nam_hash == h |
129 | 0 | && !strcmp(sp->reg_nam, nam)) |
130 | 0 | return (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs); |
131 | 0 | }); |
132 | 0 | return INFEASIBLE_SLVNDX; |
133 | 0 | } |
134 | | |
135 | | /* Compute a MD5 hash of the configuration of the planner. |
136 | | We store it into the wisdom file to make absolutely sure that |
137 | | we are reading wisdom that is applicable */ |
138 | | static void signature_of_configuration(md5 *m, planner *ego) |
139 | 0 | { |
140 | 0 | X(md5begin)(m); |
141 | 0 | X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ |
142 | 0 | FORALL_SOLVERS(ego, s, sp, { |
143 | 0 | UNUSED(s); |
144 | 0 | X(md5int)(m, sp->reg_id); |
145 | 0 | X(md5puts)(m, sp->reg_nam); |
146 | 0 | }); |
147 | 0 | X(md5end)(m); |
148 | 0 | } |
149 | | |
150 | | /* |
151 | | md5-related stuff: |
152 | | */ |
153 | | |
154 | | /* first hash function */ |
155 | | static unsigned h1(const hashtab *ht, const md5sig s) |
156 | 23.4k | { |
157 | 23.4k | unsigned h = s[0] % ht->hashsiz; |
158 | 23.4k | A(h == (s[0] % ht->hashsiz)); |
159 | 23.4k | return h; |
160 | 23.4k | } |
161 | | |
162 | | /* second hash function (for double hashing) */ |
163 | | static unsigned h2(const hashtab *ht, const md5sig s) |
164 | 23.4k | { |
165 | 23.4k | unsigned h = 1U + s[1] % (ht->hashsiz - 1); |
166 | 23.4k | A(h == (1U + s[1] % (ht->hashsiz - 1))); |
167 | 23.4k | return h; |
168 | 23.4k | } |
169 | | |
170 | | static void md5hash(md5 *m, const problem *p, const planner *plnr) |
171 | 3.66k | { |
172 | 3.66k | X(md5begin)(m); |
173 | 3.66k | X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ |
174 | 3.66k | X(md5int)(m, plnr->nthr); |
175 | 3.66k | p->adt->hash(p, m); |
176 | 3.66k | X(md5end)(m); |
177 | 3.66k | } |
178 | | |
179 | | static int md5eq(const md5sig a, const md5sig b) |
180 | 42.1k | { |
181 | 42.1k | return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3]; |
182 | 42.1k | } |
183 | | |
184 | | static void sigcpy(const md5sig a, md5sig b) |
185 | 14.1k | { |
186 | 14.1k | b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3]; |
187 | 14.1k | } |
188 | | |
189 | | /* |
190 | | memoization routines : |
191 | | */ |
192 | | |
193 | | /* |
194 | | liber scriptus proferetur |
195 | | in quo totum continetur |
196 | | unde mundus iudicetur |
197 | | */ |
198 | | struct solution_s { |
199 | | md5sig s; |
200 | | flags_t flags; |
201 | | }; |
202 | | |
203 | | static solution *htab_lookup(hashtab *ht, const md5sig s, |
204 | | const flags_t *flagsp) |
205 | 6.32k | { |
206 | 6.32k | unsigned g, h = h1(ht, s), d = h2(ht, s); |
207 | 6.32k | solution *best = 0; |
208 | | |
209 | 6.32k | ++ht->lookup; |
210 | | |
211 | | /* search all entries that match; select the one with |
212 | | the lowest flags.u */ |
213 | | /* This loop may potentially traverse the whole table, since at |
214 | | least one element is guaranteed to be !LIVEP, but all elements |
215 | | may be VALIDP. Hence, we stop after at the first invalid |
216 | | element or after traversing the whole table. */ |
217 | 6.32k | g = h; |
218 | 34.9k | do { |
219 | 34.9k | solution *l = ht->solutions + g; |
220 | 34.9k | ++ht->lookup_iter; |
221 | 34.9k | if (VALIDP(l)) { |
222 | 28.6k | if (LIVEP(l) |
223 | 28.6k | && md5eq(s, l->s) |
224 | 28.6k | && subsumes(&l->flags, SLVNDX(l), flagsp) ) { |
225 | 1.64k | if (!best || LEQ(l->flags.u, best->flags.u)) |
226 | 1.64k | best = l; |
227 | 1.64k | } |
228 | 28.6k | } else |
229 | 6.32k | break; |
230 | | |
231 | 28.6k | g = addmod(g, d, ht->hashsiz); |
232 | 28.6k | } while (g != h); |
233 | | |
234 | 6.32k | if (best) |
235 | 1.64k | ++ht->succ_lookup; |
236 | 6.32k | return best; |
237 | 6.32k | } |
238 | | |
239 | | static solution *hlookup(planner *ego, const md5sig s, |
240 | | const flags_t *flagsp) |
241 | 3.66k | { |
242 | 3.66k | solution *sol = htab_lookup(&ego->htab_blessed, s, flagsp); |
243 | 3.66k | if (!sol) sol = htab_lookup(&ego->htab_unblessed, s, flagsp); |
244 | 3.66k | return sol; |
245 | 3.66k | } |
246 | | |
247 | | static void fill_slot(hashtab *ht, const md5sig s, const flags_t *flagsp, |
248 | | unsigned slvndx, solution *slot) |
249 | 14.1k | { |
250 | 14.1k | ++ht->insert; |
251 | 14.1k | ++ht->nelem; |
252 | 14.1k | A(!LIVEP(slot)); |
253 | 14.1k | slot->flags.u = flagsp->u; |
254 | 14.1k | slot->flags.l = flagsp->l; |
255 | 14.1k | slot->flags.timelimit_impatience = flagsp->timelimit_impatience; |
256 | 14.1k | slot->flags.hash_info |= H_VALID | H_LIVE; |
257 | 14.1k | SLVNDX(slot) = slvndx; |
258 | | |
259 | | /* keep this check enabled in case we add so many solvers |
260 | | that the bitfield overflows */ |
261 | 14.1k | CK(SLVNDX(slot) == slvndx); |
262 | 14.1k | sigcpy(s, slot->s); |
263 | 14.1k | } |
264 | | |
265 | | static void kill_slot(hashtab *ht, solution *slot) |
266 | 682 | { |
267 | 682 | A(LIVEP(slot)); /* ==> */ A(VALIDP(slot)); |
268 | | |
269 | 682 | --ht->nelem; |
270 | 682 | slot->flags.hash_info = H_VALID; |
271 | 682 | } |
272 | | |
273 | | static void hinsert0(hashtab *ht, const md5sig s, const flags_t *flagsp, |
274 | | unsigned slvndx) |
275 | 13.4k | { |
276 | 13.4k | solution *l; |
277 | 13.4k | unsigned g, h = h1(ht, s), d = h2(ht, s); |
278 | | |
279 | 13.4k | ++ht->insert_unknown; |
280 | | |
281 | | /* search for nonfull slot */ |
282 | 27.0k | for (g = h; ; g = addmod(g, d, ht->hashsiz)) { |
283 | 27.0k | ++ht->insert_iter; |
284 | 27.0k | l = ht->solutions + g; |
285 | 27.0k | if (!LIVEP(l)) break; |
286 | 13.5k | A((g + d) % ht->hashsiz != h); |
287 | 13.5k | } |
288 | | |
289 | 13.4k | fill_slot(ht, s, flagsp, slvndx, l); |
290 | 13.4k | } |
291 | | |
292 | | static void rehash(hashtab *ht, unsigned nsiz) |
293 | 1.23k | { |
294 | 1.23k | unsigned osiz = ht->hashsiz, h; |
295 | 1.23k | solution *osol = ht->solutions, *nsol; |
296 | | |
297 | 1.23k | nsiz = (unsigned)X(next_prime)((INT)nsiz); |
298 | 1.23k | nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT); |
299 | 1.23k | ++ht->nrehash; |
300 | | |
301 | | /* init new table */ |
302 | 17.4k | for (h = 0; h < nsiz; ++h) |
303 | 16.2k | nsol[h].flags.hash_info = 0; |
304 | | |
305 | | /* install new table */ |
306 | 1.23k | ht->hashsiz = nsiz; |
307 | 1.23k | ht->solutions = nsol; |
308 | 1.23k | ht->nelem = 0; |
309 | | |
310 | | /* copy table */ |
311 | 13.6k | for (h = 0; h < osiz; ++h) { |
312 | 12.3k | solution *l = osol + h; |
313 | 12.3k | if (LIVEP(l)) |
314 | 10.4k | hinsert0(ht, l->s, &l->flags, SLVNDX(l)); |
315 | 12.3k | } |
316 | | |
317 | 1.23k | X(ifree0)(osol); |
318 | 1.23k | } |
319 | | |
320 | | static unsigned minsz(unsigned nelem) |
321 | 5.73k | { |
322 | 5.73k | return 1U + nelem + nelem / 8U; |
323 | 5.73k | } |
324 | | |
325 | | static unsigned nextsz(unsigned nelem) |
326 | 1.23k | { |
327 | 1.23k | return minsz(minsz(nelem)); |
328 | 1.23k | } |
329 | | |
330 | | static void hgrow(hashtab *ht) |
331 | 3.26k | { |
332 | 3.26k | unsigned nelem = ht->nelem; |
333 | 3.26k | if (minsz(nelem) >= ht->hashsiz) |
334 | 1.23k | rehash(ht, nextsz(nelem)); |
335 | 3.26k | } |
336 | | |
337 | | #if 0 |
338 | | /* shrink the hash table, never used */ |
339 | | static void hshrink(hashtab *ht) |
340 | | { |
341 | | unsigned nelem = ht->nelem; |
342 | | /* always rehash after deletions */ |
343 | | rehash(ht, nextsz(nelem)); |
344 | | } |
345 | | #endif |
346 | | |
347 | | static void htab_insert(hashtab *ht, const md5sig s, const flags_t *flagsp, |
348 | | unsigned slvndx) |
349 | 3.65k | { |
350 | 3.65k | unsigned g, h = h1(ht, s), d = h2(ht, s); |
351 | 3.65k | solution *first = 0; |
352 | | |
353 | | /* Remove all entries that are subsumed by the new one. */ |
354 | | /* This loop may potentially traverse the whole table, since at |
355 | | least one element is guaranteed to be !LIVEP, but all elements |
356 | | may be VALIDP. Hence, we stop after at the first invalid |
357 | | element or after traversing the whole table. */ |
358 | 3.65k | g = h; |
359 | 17.2k | do { |
360 | 17.2k | solution *l = ht->solutions + g; |
361 | 17.2k | ++ht->insert_iter; |
362 | 17.2k | if (VALIDP(l)) { |
363 | 13.5k | if (LIVEP(l) && md5eq(s, l->s)) { |
364 | 726 | if (subsumes(flagsp, slvndx, &l->flags)) { |
365 | 682 | if (!first) first = l; |
366 | 682 | kill_slot(ht, l); |
367 | 682 | } else { |
368 | | /* It is an error to insert an element that |
369 | | is subsumed by an existing entry. */ |
370 | 44 | A(!subsumes(&l->flags, SLVNDX(l), flagsp)); |
371 | 44 | } |
372 | 726 | } |
373 | 13.5k | } else |
374 | 3.65k | break; |
375 | | |
376 | 13.5k | g = addmod(g, d, ht->hashsiz); |
377 | 13.5k | } while (g != h); |
378 | | |
379 | 3.65k | if (first) { |
380 | | /* overwrite FIRST */ |
381 | 682 | fill_slot(ht, s, flagsp, slvndx, first); |
382 | 2.97k | } else { |
383 | | /* create a new entry */ |
384 | 2.97k | hgrow(ht); |
385 | 2.97k | hinsert0(ht, s, flagsp, slvndx); |
386 | 2.97k | } |
387 | 3.65k | } |
388 | | |
389 | | static void hinsert(planner *ego, const md5sig s, const flags_t *flagsp, |
390 | | unsigned slvndx) |
391 | 3.65k | { |
392 | 3.65k | htab_insert(BLISS(*flagsp) ? &ego->htab_blessed : &ego->htab_unblessed, |
393 | 3.65k | s, flagsp, slvndx ); |
394 | 3.65k | } |
395 | | |
396 | | |
397 | | static void invoke_hook(planner *ego, plan *pln, const problem *p, |
398 | | int optimalp) |
399 | 5.51k | { |
400 | 5.51k | if (ego->hook) |
401 | 0 | ego->hook(ego, pln, p, optimalp); |
402 | 5.51k | } |
403 | | |
404 | | #ifdef FFTW_RANDOM_ESTIMATOR |
405 | | /* a "random" estimate, used for debugging to generate "random" |
406 | | plans, albeit from a deterministic seed. */ |
407 | | |
408 | | unsigned X(random_estimate_seed) = 0; |
409 | | |
410 | | static double random_estimate(const planner *ego, const plan *pln, |
411 | | const problem *p) |
412 | | { |
413 | | md5 m; |
414 | | X(md5begin)(&m); |
415 | | X(md5unsigned)(&m, X(random_estimate_seed)); |
416 | | X(md5int)(&m, ego->nthr); |
417 | | p->adt->hash(p, &m); |
418 | | X(md5putb)(&m, &pln->ops, sizeof(pln->ops)); |
419 | | X(md5putb)(&m, &pln->adt, sizeof(pln->adt)); |
420 | | X(md5end)(&m); |
421 | | return ego->cost_hook ? ego->cost_hook(p, m.s[0], COST_MAX) : m.s[0]; |
422 | | } |
423 | | |
424 | | #endif |
425 | | |
426 | | double X(iestimate_cost)(const planner *ego, const plan *pln, const problem *p) |
427 | 2.12k | { |
428 | 2.12k | double cost = |
429 | 2.12k | + pln->ops.add |
430 | 2.12k | + pln->ops.mul |
431 | | |
432 | | #if HAVE_FMA |
433 | | + pln->ops.fma |
434 | | #else |
435 | 2.12k | + 2 * pln->ops.fma |
436 | 2.12k | #endif |
437 | | |
438 | 2.12k | + pln->ops.other; |
439 | 2.12k | if (ego->cost_hook) |
440 | 0 | cost = ego->cost_hook(p, cost, COST_MAX); |
441 | 2.12k | return cost; |
442 | 2.12k | } |
443 | | |
444 | | static void evaluate_plan(planner *ego, plan *pln, const problem *p) |
445 | 2.12k | { |
446 | 2.12k | if (ESTIMATEP(ego) || !BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) { |
447 | 2.12k | ego->nplan++; |
448 | | |
449 | 2.12k | if (ESTIMATEP(ego)) { |
450 | 2.12k | estimate: |
451 | | /* heuristic */ |
452 | | #ifdef FFTW_RANDOM_ESTIMATOR |
453 | | pln->pcost = random_estimate(ego, pln, p); |
454 | | ego->epcost += X(iestimate_cost)(ego, pln, p); |
455 | | #else |
456 | 2.12k | pln->pcost = X(iestimate_cost)(ego, pln, p); |
457 | 2.12k | ego->epcost += pln->pcost; |
458 | 2.12k | #endif |
459 | 2.12k | } else { |
460 | 0 | double t = X(measure_execution_time)(ego, pln, p); |
461 | | |
462 | 0 | if (t < 0) { /* unavailable cycle counter */ |
463 | | /* Real programmers can write FORTRAN in any language */ |
464 | 0 | goto estimate; |
465 | 0 | } |
466 | | |
467 | 0 | pln->pcost = t; |
468 | 0 | ego->pcost += t; |
469 | 0 | ego->need_timeout_check = 1; |
470 | 0 | } |
471 | 2.12k | } |
472 | | |
473 | 2.12k | invoke_hook(ego, pln, p, 0); |
474 | 2.12k | } |
475 | | |
476 | | /* maintain dynamic scoping of flags, nthr: */ |
477 | | static plan *invoke_solver(planner *ego, const problem *p, solver *s, |
478 | | const flags_t *nflags) |
479 | 292k | { |
480 | 292k | flags_t flags = ego->flags; |
481 | 292k | int nthr = ego->nthr; |
482 | 292k | plan *pln; |
483 | 292k | ego->flags = *nflags; |
484 | 292k | PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; |
485 | 292k | A(p->adt->problem_kind == s->adt->problem_kind); |
486 | 292k | pln = s->adt->mkplan(s, p, ego); |
487 | 292k | ego->nthr = nthr; |
488 | 292k | ego->flags = flags; |
489 | 292k | return pln; |
490 | 292k | } |
491 | | |
492 | | /* maintain the invariant TIMED_OUT ==> NEED_TIMEOUT_CHECK */ |
493 | | static int timeout_p(planner *ego, const problem *p) |
494 | 2.05k | { |
495 | | /* do not timeout when estimating. First, the estimator is the |
496 | | planner of last resort. Second, calling X(elapsed_since)() is |
497 | | slower than estimating */ |
498 | 2.05k | if (!ESTIMATEP(ego)) { |
499 | | /* do not assume that X(elapsed_since)() is monotonic */ |
500 | 0 | if (ego->timed_out) { |
501 | 0 | A(ego->need_timeout_check); |
502 | 0 | return 1; |
503 | 0 | } |
504 | | |
505 | 0 | if (ego->timelimit >= 0 && |
506 | 0 | X(elapsed_since)(ego, p, ego->start_time) >= ego->timelimit) { |
507 | 0 | ego->timed_out = 1; |
508 | 0 | ego->need_timeout_check = 1; |
509 | 0 | return 1; |
510 | 0 | } |
511 | 0 | } |
512 | | |
513 | 2.05k | A(!ego->timed_out); |
514 | 2.05k | ego->need_timeout_check = 0; |
515 | 2.05k | return 0; |
516 | 2.05k | } |
517 | | |
518 | | static plan *search0(planner *ego, const problem *p, unsigned *slvndx, |
519 | | const flags_t *flagsp) |
520 | 2.05k | { |
521 | 2.05k | plan *best = 0; |
522 | 2.05k | int best_not_yet_timed = 1; |
523 | | |
524 | | /* Do not start a search if the planner timed out. This check is |
525 | | necessary, lest the relaxation mechanism kick in */ |
526 | 2.05k | if (timeout_p(ego, p)) |
527 | 0 | return 0; |
528 | | |
529 | 2.05k | FORALL_SOLVERS_OF_KIND(p->adt->problem_kind, ego, s, sp, { |
530 | 2.05k | plan *pln; |
531 | | |
532 | 2.05k | pln = invoke_solver(ego, p, s, flagsp); |
533 | | |
534 | 2.05k | if (ego->need_timeout_check) |
535 | 2.05k | if (timeout_p(ego, p)) { |
536 | 2.05k | X(plan_destroy_internal)(pln); |
537 | 2.05k | X(plan_destroy_internal)(best); |
538 | 2.05k | return 0; |
539 | 2.05k | } |
540 | | |
541 | 2.05k | if (pln) { |
542 | | /* read COULD_PRUNE_NOW_P because PLN may be destroyed |
543 | | before we use COULD_PRUNE_NOW_P */ |
544 | 2.05k | int could_prune_now_p = pln->could_prune_now_p; |
545 | | |
546 | 2.05k | if (best) { |
547 | 2.05k | if (best_not_yet_timed) { |
548 | 2.05k | evaluate_plan(ego, best, p); |
549 | 2.05k | best_not_yet_timed = 0; |
550 | 2.05k | } |
551 | 2.05k | evaluate_plan(ego, pln, p); |
552 | 2.05k | if (pln->pcost < best->pcost) { |
553 | 2.05k | X(plan_destroy_internal)(best); |
554 | 2.05k | best = pln; |
555 | 2.05k | *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs); |
556 | 2.05k | } else { |
557 | 2.05k | X(plan_destroy_internal)(pln); |
558 | 2.05k | } |
559 | 2.05k | } else { |
560 | 2.05k | best = pln; |
561 | 2.05k | *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs); |
562 | 2.05k | } |
563 | | |
564 | 2.05k | if (ALLOW_PRUNINGP(ego) && could_prune_now_p) |
565 | 2.05k | break; |
566 | 2.05k | } |
567 | 2.05k | }); |
568 | | |
569 | 2.05k | return best; |
570 | 2.05k | } |
571 | | |
572 | | static plan *search(planner *ego, const problem *p, unsigned *slvndx, |
573 | | flags_t *flagsp) |
574 | 2.02k | { |
575 | 2.02k | plan *pln = 0; |
576 | 2.02k | unsigned i; |
577 | | |
578 | | /* relax impatience in this order: */ |
579 | 2.02k | static const unsigned relax_tab[] = { |
580 | 2.02k | 0, /* relax nothing */ |
581 | 2.02k | NO_VRECURSE, |
582 | 2.02k | NO_FIXED_RADIX_LARGE_N, |
583 | 2.02k | NO_SLOW, |
584 | 2.02k | NO_UGLY |
585 | 2.02k | }; |
586 | | |
587 | 2.02k | unsigned l_orig = flagsp->l; |
588 | 2.02k | unsigned x = flagsp->u; |
589 | | |
590 | | /* guaranteed to be different from X */ |
591 | 2.02k | unsigned last_x = ~x; |
592 | | |
593 | 3.38k | for (i = 0; i < sizeof(relax_tab) / sizeof(relax_tab[0]); ++i) { |
594 | 3.12k | if (LEQ(l_orig, x & ~relax_tab[i])) |
595 | 2.05k | x = x & ~relax_tab[i]; |
596 | | |
597 | 3.12k | if (x != last_x) { |
598 | 2.05k | last_x = x; |
599 | 2.05k | flagsp->l = x; |
600 | 2.05k | pln = search0(ego, p, slvndx, flagsp); |
601 | 2.05k | if (pln) break; |
602 | 2.05k | } |
603 | 3.12k | } |
604 | | |
605 | 2.02k | if (!pln) { |
606 | | /* search [L_ORIG, U] */ |
607 | 268 | if (l_orig != last_x) { |
608 | 0 | last_x = l_orig; |
609 | 0 | flagsp->l = l_orig; |
610 | 0 | pln = search0(ego, p, slvndx, flagsp); |
611 | 0 | } |
612 | 268 | } |
613 | | |
614 | 2.02k | return pln; |
615 | 2.02k | } |
616 | | |
617 | | #define CHECK_FOR_BOGOSITY \ |
618 | 7.32k | if ((ego->bogosity_hook ? \ |
619 | 7.32k | (ego->wisdom_state = ego->bogosity_hook(ego->wisdom_state, p)) \ |
620 | 7.32k | : ego->wisdom_state) == WISDOM_IS_BOGUS) \ |
621 | 7.32k | goto wisdom_is_bogus; |
622 | | |
623 | | static plan *mkplan(planner *ego, const problem *p) |
624 | 3.66k | { |
625 | 3.66k | plan *pln; |
626 | 3.66k | md5 m; |
627 | 3.66k | unsigned slvndx; |
628 | 3.66k | flags_t flags_of_solution; |
629 | 3.66k | solution *sol; |
630 | 3.66k | solver *s; |
631 | | |
632 | 3.66k | ASSERT_ALIGNED_DOUBLE; |
633 | 3.66k | A(LEQ(PLNR_L(ego), PLNR_U(ego))); |
634 | | |
635 | 3.66k | if (ESTIMATEP(ego)) |
636 | 3.66k | PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; /* canonical form */ |
637 | | |
638 | | |
639 | | #ifdef FFTW_DEBUG |
640 | | check(&ego->htab_blessed); |
641 | | check(&ego->htab_unblessed); |
642 | | #endif |
643 | | |
644 | 3.66k | pln = 0; |
645 | | |
646 | 3.66k | CHECK_FOR_BOGOSITY; |
647 | | |
648 | 3.66k | ego->timed_out = 0; |
649 | | |
650 | 3.66k | ++ego->nprob; |
651 | 3.66k | md5hash(&m, p, ego); |
652 | | |
653 | 3.66k | flags_of_solution = ego->flags; |
654 | | |
655 | 3.66k | if (ego->wisdom_state != WISDOM_IGNORE_ALL) { |
656 | 3.66k | if ((sol = hlookup(ego, m.s, &flags_of_solution))) { |
657 | | /* wisdom is acceptable */ |
658 | 1.64k | wisdom_state_t owisdom_state = ego->wisdom_state; |
659 | | |
660 | | /* this hook is mainly for MPI, to make sure that |
661 | | wisdom is in sync across all processes for MPI problems */ |
662 | 1.64k | if (ego->wisdom_ok_hook && !ego->wisdom_ok_hook(p, sol->flags)) |
663 | 0 | goto do_search; /* ignore not-ok wisdom */ |
664 | | |
665 | 1.64k | slvndx = SLVNDX(sol); |
666 | | |
667 | 1.64k | if (slvndx == INFEASIBLE_SLVNDX) { |
668 | 6 | if (ego->wisdom_state == WISDOM_IGNORE_INFEASIBLE) |
669 | 0 | goto do_search; |
670 | 6 | else |
671 | 6 | return 0; /* known to be infeasible */ |
672 | 6 | } |
673 | | |
674 | 1.63k | flags_of_solution = sol->flags; |
675 | | |
676 | | /* inherit blessing either from wisdom |
677 | | or from the planner */ |
678 | 1.63k | flags_of_solution.hash_info |= BLISS(ego->flags); |
679 | | |
680 | 1.63k | ego->wisdom_state = WISDOM_ONLY; |
681 | | |
682 | 1.63k | s = ego->slvdescs[slvndx].slv; |
683 | 1.63k | if (p->adt->problem_kind != s->adt->problem_kind) |
684 | 0 | goto wisdom_is_bogus; |
685 | | |
686 | 1.63k | pln = invoke_solver(ego, p, s, &flags_of_solution); |
687 | | |
688 | 1.63k | CHECK_FOR_BOGOSITY; /* catch error in child solvers */ |
689 | | |
690 | 1.63k | sol = 0; /* Paranoia: SOL may be dangling after |
691 | | invoke_solver(); make sure we don't accidentally |
692 | | reuse it. */ |
693 | | |
694 | 1.63k | if (!pln) |
695 | 0 | goto wisdom_is_bogus; |
696 | | |
697 | 1.63k | ego->wisdom_state = owisdom_state; |
698 | | |
699 | 1.63k | goto skip_search; |
700 | 1.63k | } |
701 | 2.02k | else if (ego->nowisdom_hook) /* for MPI, make sure lack of wisdom */ |
702 | 0 | ego->nowisdom_hook(p); /* is in sync across all processes */ |
703 | 3.66k | } |
704 | | |
705 | 2.02k | do_search: |
706 | | /* cannot search in WISDOM_ONLY mode */ |
707 | 2.02k | if (ego->wisdom_state == WISDOM_ONLY) |
708 | 0 | goto wisdom_is_bogus; |
709 | | |
710 | 2.02k | flags_of_solution = ego->flags; |
711 | 2.02k | pln = search(ego, p, &slvndx, &flags_of_solution); |
712 | 2.02k | CHECK_FOR_BOGOSITY; /* catch error in child solvers */ |
713 | | |
714 | 2.02k | if (ego->timed_out) { |
715 | 0 | A(!pln); |
716 | 0 | if (PLNR_TIMELIMIT_IMPATIENCE(ego) != 0) { |
717 | | /* record (below) that this plan has failed because of |
718 | | timeout */ |
719 | 0 | flags_of_solution.hash_info |= BLESSING; |
720 | 0 | } else { |
721 | | /* this is not the top-level problem or timeout is not |
722 | | active: record no wisdom. */ |
723 | 0 | return 0; |
724 | 0 | } |
725 | 2.02k | } else { |
726 | | /* canonicalize to infinite timeout */ |
727 | 2.02k | flags_of_solution.timelimit_impatience = 0; |
728 | 2.02k | } |
729 | | |
730 | 3.65k | skip_search: |
731 | 3.65k | if (ego->wisdom_state == WISDOM_NORMAL || |
732 | 3.65k | ego->wisdom_state == WISDOM_ONLY) { |
733 | 3.65k | if (pln) { |
734 | 3.39k | hinsert(ego, m.s, &flags_of_solution, slvndx); |
735 | 3.39k | invoke_hook(ego, pln, p, 1); |
736 | 3.39k | } else { |
737 | 268 | hinsert(ego, m.s, &flags_of_solution, INFEASIBLE_SLVNDX); |
738 | 268 | } |
739 | 3.65k | } |
740 | | |
741 | 3.65k | return pln; |
742 | | |
743 | 0 | wisdom_is_bogus: |
744 | 0 | X(plan_destroy_internal)(pln); |
745 | 0 | ego->wisdom_state = WISDOM_IS_BOGUS; |
746 | 0 | return 0; |
747 | 2.02k | } |
748 | | |
749 | | static void htab_destroy(hashtab *ht) |
750 | 283 | { |
751 | 283 | X(ifree)(ht->solutions); |
752 | 283 | ht->solutions = 0; |
753 | 283 | ht->nelem = 0U; |
754 | 283 | } |
755 | | |
756 | | static void mkhashtab(hashtab *ht) |
757 | 285 | { |
758 | 285 | ht->nrehash = 0; |
759 | 285 | ht->succ_lookup = ht->lookup = ht->lookup_iter = 0; |
760 | 285 | ht->insert = ht->insert_iter = ht->insert_unknown = 0; |
761 | | |
762 | 285 | ht->solutions = 0; |
763 | 285 | ht->hashsiz = ht->nelem = 0U; |
764 | 285 | hgrow(ht); /* so that hashsiz > 0 */ |
765 | 285 | } |
766 | | |
767 | | /* destroy hash table entries. If FORGET_EVERYTHING, destroy the whole |
768 | | table. If FORGET_ACCURSED, then destroy entries that are not blessed. */ |
769 | | static void forget(planner *ego, amnesia a) |
770 | 283 | { |
771 | 283 | switch (a) { |
772 | 0 | case FORGET_EVERYTHING: |
773 | 0 | htab_destroy(&ego->htab_blessed); |
774 | 0 | mkhashtab(&ego->htab_blessed); |
775 | | /* fall through */ |
776 | 283 | case FORGET_ACCURSED: |
777 | 283 | htab_destroy(&ego->htab_unblessed); |
778 | 283 | mkhashtab(&ego->htab_unblessed); |
779 | 283 | break; |
780 | 0 | default: |
781 | 0 | break; |
782 | 283 | } |
783 | 283 | } |
784 | | |
785 | | /* FIXME: what sort of version information should we write? */ |
786 | | #define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom)) |
787 | | static const char stimeout[] = "TIMEOUT"; |
788 | | |
789 | | /* tantus labor non sit cassus */ |
790 | | static void exprt(planner *ego, printer *p) |
791 | 0 | { |
792 | 0 | unsigned h; |
793 | 0 | hashtab *ht = &ego->htab_blessed; |
794 | 0 | md5 m; |
795 | |
|
796 | 0 | signature_of_configuration(&m, ego); |
797 | |
|
798 | 0 | p->print(p, |
799 | 0 | "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n", |
800 | 0 | m.s[0], m.s[1], m.s[2], m.s[3]); |
801 | |
|
802 | 0 | for (h = 0; h < ht->hashsiz; ++h) { |
803 | 0 | solution *l = ht->solutions + h; |
804 | 0 | if (LIVEP(l)) { |
805 | 0 | const char *reg_nam; |
806 | 0 | int reg_id; |
807 | |
|
808 | 0 | if (SLVNDX(l) == INFEASIBLE_SLVNDX) { |
809 | 0 | reg_nam = stimeout; |
810 | 0 | reg_id = 0; |
811 | 0 | } else { |
812 | 0 | slvdesc *sp = ego->slvdescs + SLVNDX(l); |
813 | 0 | reg_nam = sp->reg_nam; |
814 | 0 | reg_id = sp->reg_id; |
815 | 0 | } |
816 | | |
817 | | /* qui salvandos salvas gratis |
818 | | salva me fons pietatis */ |
819 | 0 | p->print(p, " (%s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)\n", |
820 | 0 | reg_nam, reg_id, |
821 | 0 | l->flags.l, l->flags.u, l->flags.timelimit_impatience, |
822 | 0 | l->s[0], l->s[1], l->s[2], l->s[3]); |
823 | 0 | } |
824 | 0 | } |
825 | 0 | p->print(p, ")\n"); |
826 | 0 | } |
827 | | |
828 | | /* mors stupebit et natura |
829 | | cum resurget creatura */ |
830 | | static int imprt(planner *ego, scanner *sc) |
831 | 0 | { |
832 | 0 | char buf[MAXNAM + 1]; |
833 | 0 | md5uint sig[4]; |
834 | 0 | unsigned l, u, timelimit_impatience; |
835 | 0 | flags_t flags; |
836 | 0 | int reg_id; |
837 | 0 | unsigned slvndx; |
838 | 0 | hashtab *ht = &ego->htab_blessed; |
839 | 0 | hashtab old; |
840 | 0 | md5 m; |
841 | |
|
842 | 0 | if (!sc->scan(sc, |
843 | 0 | "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n", |
844 | 0 | sig + 0, sig + 1, sig + 2, sig + 3)) |
845 | 0 | return 0; /* don't need to restore hashtable */ |
846 | | |
847 | 0 | signature_of_configuration(&m, ego); |
848 | 0 | if (m.s[0] != sig[0] || m.s[1] != sig[1] || |
849 | 0 | m.s[2] != sig[2] || m.s[3] != sig[3]) { |
850 | | /* invalid configuration */ |
851 | 0 | return 0; |
852 | 0 | } |
853 | | |
854 | | /* make a backup copy of the hash table (cache the hash) */ |
855 | 0 | { |
856 | 0 | unsigned h, hsiz = ht->hashsiz; |
857 | 0 | old = *ht; |
858 | 0 | old.solutions = (solution *)MALLOC(hsiz * sizeof(solution), HASHT); |
859 | 0 | for (h = 0; h < hsiz; ++h) |
860 | 0 | old.solutions[h] = ht->solutions[h]; |
861 | 0 | } |
862 | |
|
863 | 0 | while (1) { |
864 | 0 | if (sc->scan(sc, ")")) |
865 | 0 | break; |
866 | | |
867 | | /* qua resurget ex favilla */ |
868 | 0 | if (!sc->scan(sc, "(%*s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)", |
869 | 0 | MAXNAM, buf, ®_id, &l, &u, &timelimit_impatience, |
870 | 0 | sig + 0, sig + 1, sig + 2, sig + 3)) |
871 | 0 | goto bad; |
872 | | |
873 | 0 | if (!strcmp(buf, stimeout) && reg_id == 0) { |
874 | 0 | slvndx = INFEASIBLE_SLVNDX; |
875 | 0 | } else { |
876 | 0 | if (timelimit_impatience != 0) |
877 | 0 | goto bad; |
878 | | |
879 | 0 | slvndx = slookup(ego, buf, reg_id); |
880 | 0 | if (slvndx == INFEASIBLE_SLVNDX) |
881 | 0 | goto bad; |
882 | 0 | } |
883 | | |
884 | | /* inter oves locum praesta */ |
885 | 0 | flags.l = l; |
886 | 0 | flags.u = u; |
887 | 0 | flags.timelimit_impatience = timelimit_impatience; |
888 | 0 | flags.hash_info = BLESSING; |
889 | |
|
890 | 0 | CK(flags.l == l); |
891 | 0 | CK(flags.u == u); |
892 | 0 | CK(flags.timelimit_impatience == timelimit_impatience); |
893 | |
|
894 | 0 | if (!hlookup(ego, sig, &flags)) |
895 | 0 | hinsert(ego, sig, &flags, slvndx); |
896 | 0 | } |
897 | | |
898 | 0 | X(ifree0)(old.solutions); |
899 | 0 | return 1; |
900 | | |
901 | 0 | bad: |
902 | | /* ``The wisdom of FFTW must be above suspicion.'' */ |
903 | 0 | X(ifree0)(ht->solutions); |
904 | 0 | *ht = old; |
905 | 0 | return 0; |
906 | 0 | } |
907 | | |
908 | | /* |
909 | | * create a planner |
910 | | */ |
911 | | planner *X(mkplanner)(void) |
912 | 1 | { |
913 | 1 | int i; |
914 | | |
915 | 1 | static const planner_adt padt = { |
916 | 1 | register_solver, mkplan, forget, exprt, imprt |
917 | 1 | }; |
918 | | |
919 | 1 | planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS); |
920 | | |
921 | 1 | p->adt = &padt; |
922 | 1 | p->nplan = p->nprob = 0; |
923 | 1 | p->pcost = p->epcost = 0.0; |
924 | 1 | p->hook = 0; |
925 | 1 | p->cost_hook = 0; |
926 | 1 | p->wisdom_ok_hook = 0; |
927 | 1 | p->nowisdom_hook = 0; |
928 | 1 | p->bogosity_hook = 0; |
929 | 1 | p->cur_reg_nam = 0; |
930 | 1 | p->wisdom_state = WISDOM_NORMAL; |
931 | | |
932 | 1 | p->slvdescs = 0; |
933 | 1 | p->nslvdesc = p->slvdescsiz = 0; |
934 | | |
935 | 1 | p->flags.l = 0; |
936 | 1 | p->flags.u = 0; |
937 | 1 | p->flags.timelimit_impatience = 0; |
938 | 1 | p->flags.hash_info = 0; |
939 | 1 | p->nthr = 1; |
940 | 1 | p->need_timeout_check = 1; |
941 | 1 | p->timelimit = -1; |
942 | | |
943 | 1 | mkhashtab(&p->htab_blessed); |
944 | 1 | mkhashtab(&p->htab_unblessed); |
945 | | |
946 | 9 | for (i = 0; i < PROBLEM_LAST; ++i) |
947 | 8 | p->slvdescs_for_problem_kind[i] = -1; |
948 | | |
949 | 1 | return p; |
950 | 1 | } |
951 | | |
952 | | void X(planner_destroy)(planner *ego) |
953 | 0 | { |
954 | | /* destroy hash table */ |
955 | 0 | htab_destroy(&ego->htab_blessed); |
956 | 0 | htab_destroy(&ego->htab_unblessed); |
957 | | |
958 | | /* destroy solvdesc table */ |
959 | 0 | FORALL_SOLVERS(ego, s, sp, { |
960 | 0 | UNUSED(sp); |
961 | 0 | X(solver_destroy)(s); |
962 | 0 | }); |
963 | |
|
964 | 0 | X(ifree0)(ego->slvdescs); |
965 | 0 | X(ifree)(ego); /* dona eis requiem */ |
966 | 0 | } |
967 | | |
968 | | plan *X(mkplan_d)(planner *ego, problem *p) |
969 | 3.09k | { |
970 | 3.09k | plan *pln = ego->adt->mkplan(ego, p); |
971 | 3.09k | X(problem_destroy)(p); |
972 | 3.09k | return pln; |
973 | 3.09k | } |
974 | | |
975 | | /* like X(mkplan_d), but sets/resets flags as well */ |
976 | | plan *X(mkplan_f_d)(planner *ego, problem *p, |
977 | | unsigned l_set, unsigned u_set, unsigned u_reset) |
978 | 595 | { |
979 | 595 | flags_t oflags = ego->flags; |
980 | 595 | plan *pln; |
981 | | |
982 | 595 | PLNR_U(ego) &= ~u_reset; |
983 | 595 | PLNR_L(ego) &= ~u_reset; |
984 | 595 | PLNR_L(ego) |= l_set; |
985 | 595 | PLNR_U(ego) |= u_set | l_set; |
986 | 595 | pln = X(mkplan_d)(ego, p); |
987 | 595 | ego->flags = oflags; |
988 | 595 | return pln; |
989 | 595 | } |
990 | | |
991 | | /* |
992 | | * Debugging code: |
993 | | */ |
994 | | #ifdef FFTW_DEBUG |
995 | | static void check(hashtab *ht) |
996 | | { |
997 | | unsigned live = 0; |
998 | | unsigned i; |
999 | | |
1000 | | A(ht->nelem < ht->hashsiz); |
1001 | | |
1002 | | for (i = 0; i < ht->hashsiz; ++i) { |
1003 | | solution *l = ht->solutions + i; |
1004 | | if (LIVEP(l)) |
1005 | | ++live; |
1006 | | } |
1007 | | |
1008 | | A(ht->nelem == live); |
1009 | | |
1010 | | for (i = 0; i < ht->hashsiz; ++i) { |
1011 | | solution *l1 = ht->solutions + i; |
1012 | | int foundit = 0; |
1013 | | if (LIVEP(l1)) { |
1014 | | unsigned g, h = h1(ht, l1->s), d = h2(ht, l1->s); |
1015 | | |
1016 | | g = h; |
1017 | | do { |
1018 | | solution *l = ht->solutions + g; |
1019 | | if (VALIDP(l)) { |
1020 | | if (l1 == l) |
1021 | | foundit = 1; |
1022 | | else if (LIVEP(l) && md5eq(l1->s, l->s)) { |
1023 | | A(!subsumes(&l->flags, SLVNDX(l), &l1->flags)); |
1024 | | A(!subsumes(&l1->flags, SLVNDX(l1), &l->flags)); |
1025 | | } |
1026 | | } else |
1027 | | break; |
1028 | | g = addmod(g, d, ht->hashsiz); |
1029 | | } while (g != h); |
1030 | | |
1031 | | A(foundit); |
1032 | | } |
1033 | | } |
1034 | | } |
1035 | | #endif |