Coverage Report

Created: 2026-02-14 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/kernel/planner.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2000 Matteo Frigo
3
 * Copyright (c) 2000 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
#include "kernel/ifftw.h"
22
#include <string.h>
23
24
/* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU
25
   program in English, because English is the one language that nearly
26
   all programmers in all countries can read."
27
28
                    ingemisco tanquam reus
29
        culpa rubet vultus meus
30
        supplicanti parce [rms]
31
*/
32
33
54.4k
#define VALIDP(solution) ((solution)->flags.hash_info & H_VALID)
34
129k
#define LIVEP(solution) ((solution)->flags.hash_info & H_LIVE)
35
29.3k
#define SLVNDX(solution) ((solution)->flags.slvndx)
36
5.48k
#define BLISS(flags) (((flags).hash_info) & BLESSING)
37
4.60k
#define INFEASIBLE_SLVNDX ((1U<<BITS_FOR_SLVNDX)-1)
38
39
40
0
#define MAXNAM 64  /* maximum length of registrar's name.
41
          Used for reading wisdom.  There is no point
42
          in doing this right */
43
44
45
#ifdef FFTW_DEBUG
46
static void check(hashtab *ht);
47
#endif
48
49
/* x <= y */
50
10.8k
#define LEQ(x, y) (((x) & (y)) == (x))
51
52
/* A subsumes B */
53
static int subsumes(const flags_t *a, unsigned slvndx_a, const flags_t *b)
54
2.60k
{
55
2.60k
     if (slvndx_a != INFEASIBLE_SLVNDX) {
56
2.58k
    A(a->timelimit_impatience == 0);
57
2.58k
    return (LEQ(a->u, b->u) && LEQ(b->l, a->l));
58
2.58k
     } else {
59
20
    return (LEQ(a->l, b->l) 
60
10
      && a->timelimit_impatience <= b->timelimit_impatience);
61
20
     }
62
2.60k
}
63
64
static unsigned addmod(unsigned a, unsigned b, unsigned p)
65
58.4k
{
66
     /* gcc-2.95/sparc produces incorrect code for the fast version below. */
67
#if defined(__sparc__) && defined(__GNUC__)
68
     /* slow version  */
69
     return (a + b) % p;
70
#else
71
     /* faster version */
72
58.4k
     unsigned c = a + b;
73
58.4k
     return c >= p ? c - p : c;
74
58.4k
#endif
75
58.4k
}
76
77
/*
78
  slvdesc management:
79
*/
80
static void sgrow(planner *ego)
81
25
{
82
25
     unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4;
83
25
     slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS);
84
25
     slvdesc *otab = ego->slvdescs;
85
25
     unsigned i;
86
87
25
     ego->slvdescs = ntab;
88
25
     ego->slvdescsiz = nsiz;
89
2.80k
     for (i = 0; i < osiz; ++i)
90
2.77k
    ntab[i] = otab[i];
91
25
     X(ifree0)(otab);
92
25
}
93
94
static void register_solver(planner *ego, solver *s)
95
613
{
96
613
     slvdesc *n;
97
613
     int kind;
98
99
613
     if (s) { /* add s to solver list */
100
613
    X(solver_use)(s);
101
102
613
    A(ego->nslvdesc < INFEASIBLE_SLVNDX);
103
613
    if (ego->nslvdesc >= ego->slvdescsiz)
104
25
         sgrow(ego);
105
106
613
    n = ego->slvdescs + ego->nslvdesc;
107
108
613
    n->slv = s;
109
613
    n->reg_nam = ego->cur_reg_nam;
110
613
    n->reg_id = ego->cur_reg_id++;
111
    
112
613
    A(strlen(n->reg_nam) < MAXNAM);
113
613
    n->nam_hash = X(hash)(n->reg_nam);
114
115
613
    kind = s->adt->problem_kind;
116
613
    n->next_for_same_problem_kind = ego->slvdescs_for_problem_kind[kind];
117
613
    ego->slvdescs_for_problem_kind[kind] = (int)/*from unsigned*/ego->nslvdesc;
118
119
613
    ego->nslvdesc++;
120
613
     }
121
613
}
122
123
static unsigned slookup(planner *ego, char *nam, int id)
124
0
{
125
0
     unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */
126
0
     FORALL_SOLVERS(ego, s, sp, {
127
0
    UNUSED(s);
128
0
    if (sp->reg_id == id && sp->nam_hash == h
129
0
        && !strcmp(sp->reg_nam, nam))
130
0
         return (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs);
131
0
     });
132
0
     return INFEASIBLE_SLVNDX;
133
0
}
134
135
/* Compute a MD5 hash of the configuration of the planner.
136
   We store it into the wisdom file to make absolutely sure that
137
   we are reading wisdom that is applicable */
138
static void signature_of_configuration(md5 *m, planner *ego)
139
0
{
140
0
     X(md5begin)(m);
141
0
     X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */
142
0
     FORALL_SOLVERS(ego, s, sp, {
143
0
    UNUSED(s);
144
0
    X(md5int)(m, sp->reg_id);
145
0
    X(md5puts)(m, sp->reg_nam);
146
0
     });
147
0
     X(md5end)(m);
148
0
}
149
150
/*
151
  md5-related stuff:
152
*/
153
154
/* first hash function */
155
static unsigned h1(const hashtab *ht, const md5sig s)
156
24.2k
{
157
24.2k
     unsigned h = s[0] % ht->hashsiz;
158
24.2k
     A(h == (s[0] % ht->hashsiz));
159
24.2k
     return h;
160
24.2k
}
161
162
/* second hash function (for double hashing) */
163
static unsigned h2(const hashtab *ht, const md5sig s)
164
24.2k
{
165
24.2k
     unsigned h = 1U + s[1] % (ht->hashsiz - 1);
166
24.2k
     A(h == (1U + s[1] % (ht->hashsiz - 1)));
167
24.2k
     return h;
168
24.2k
}
169
170
static void md5hash(md5 *m, const problem *p, const planner *plnr)
171
3.75k
{
172
3.75k
     X(md5begin)(m);
173
3.75k
     X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */
174
3.75k
     X(md5int)(m, plnr->nthr);
175
3.75k
     p->adt->hash(p, m);
176
3.75k
     X(md5end)(m);
177
3.75k
}
178
179
static int md5eq(const md5sig a, const md5sig b)
180
44.2k
{
181
44.2k
     return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3];
182
44.2k
}
183
184
static void sigcpy(const md5sig a, md5sig b)
185
14.7k
{
186
14.7k
     b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3];
187
14.7k
}
188
189
/*
190
  memoization routines :
191
*/
192
193
/*
194
   liber scriptus proferetur
195
   in quo totum continetur
196
   unde mundus iudicetur
197
*/
198
struct solution_s {
199
     md5sig s;
200
     flags_t flags;
201
};
202
203
static solution *htab_lookup(hashtab *ht, const md5sig s, 
204
           const flags_t *flagsp)
205
6.42k
{
206
6.42k
     unsigned g, h = h1(ht, s), d = h2(ht, s);
207
6.42k
     solution *best = 0;
208
209
6.42k
     ++ht->lookup;
210
211
     /* search all entries that match; select the one with
212
  the lowest flags.u */
213
     /* This loop may potentially traverse the whole table, since at
214
  least one element is guaranteed to be !LIVEP, but all elements
215
  may be VALIDP.  Hence, we stop after at the first invalid
216
  element or after traversing the whole table. */
217
6.42k
     g = h;
218
36.4k
     do {
219
36.4k
    solution *l = ht->solutions + g;
220
36.4k
    ++ht->lookup_iter;
221
36.4k
    if (VALIDP(l)) {
222
30.0k
         if (LIVEP(l)
223
30.0k
       && md5eq(s, l->s)
224
1.81k
       && subsumes(&l->flags, SLVNDX(l), flagsp) ) { 
225
1.73k
        if (!best || LEQ(l->flags.u, best->flags.u))
226
1.73k
       best = l;
227
1.73k
         }
228
30.0k
    } else 
229
6.42k
         break;
230
231
30.0k
    g = addmod(g, d, ht->hashsiz);
232
30.0k
     } while (g != h);
233
234
6.42k
     if (best) 
235
1.73k
    ++ht->succ_lookup;
236
6.42k
     return best;
237
6.42k
}
238
239
static solution *hlookup(planner *ego, const md5sig s, 
240
       const flags_t *flagsp)
241
3.75k
{
242
3.75k
     solution *sol = htab_lookup(&ego->htab_blessed, s, flagsp);
243
3.75k
     if (!sol) sol = htab_lookup(&ego->htab_unblessed, s, flagsp);
244
3.75k
     return sol;
245
3.75k
}
246
247
static void fill_slot(hashtab *ht, const md5sig s, const flags_t *flagsp,
248
          unsigned slvndx, solution *slot)
249
14.7k
{
250
14.7k
     ++ht->insert;
251
14.7k
     ++ht->nelem;
252
14.7k
     A(!LIVEP(slot));
253
14.7k
     slot->flags.u = flagsp->u;
254
14.7k
     slot->flags.l = flagsp->l;
255
14.7k
     slot->flags.timelimit_impatience = flagsp->timelimit_impatience;
256
14.7k
     slot->flags.hash_info |= H_VALID | H_LIVE;
257
14.7k
     SLVNDX(slot) = slvndx;
258
259
     /* keep this check enabled in case we add so many solvers
260
  that the bitfield overflows */
261
14.7k
     CK(SLVNDX(slot) == slvndx);     
262
14.7k
     sigcpy(s, slot->s);
263
14.7k
}
264
265
static void kill_slot(hashtab *ht, solution *slot)
266
732
{
267
732
     A(LIVEP(slot)); /* ==> */ A(VALIDP(slot));
268
269
732
     --ht->nelem;
270
732
     slot->flags.hash_info = H_VALID;
271
732
}
272
273
static void hinsert0(hashtab *ht, const md5sig s, const flags_t *flagsp, 
274
         unsigned slvndx)
275
14.0k
{
276
14.0k
     solution *l;
277
14.0k
     unsigned g, h = h1(ht, s), d = h2(ht, s); 
278
279
14.0k
     ++ht->insert_unknown;
280
281
     /* search for nonfull slot */
282
28.2k
     for (g = h; ; g = addmod(g, d, ht->hashsiz)) {
283
28.2k
    ++ht->insert_iter;
284
28.2k
    l = ht->solutions + g;
285
28.2k
    if (!LIVEP(l)) break;
286
14.1k
    A((g + d) % ht->hashsiz != h);
287
14.1k
     }
288
289
14.0k
     fill_slot(ht, s, flagsp, slvndx, l);
290
14.0k
}
291
292
static void rehash(hashtab *ht, unsigned nsiz)
293
1.26k
{
294
1.26k
     unsigned osiz = ht->hashsiz, h;
295
1.26k
     solution *osol = ht->solutions, *nsol;
296
297
1.26k
     nsiz = (unsigned)X(next_prime)((INT)nsiz);
298
1.26k
     nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT);
299
1.26k
     ++ht->nrehash;
300
301
     /* init new table */
302
18.2k
     for (h = 0; h < nsiz; ++h) 
303
17.0k
    nsol[h].flags.hash_info = 0;
304
305
     /* install new table */
306
1.26k
     ht->hashsiz = nsiz;
307
1.26k
     ht->solutions = nsol;
308
1.26k
     ht->nelem = 0;
309
310
     /* copy table */
311
14.3k
     for (h = 0; h < osiz; ++h) {
312
13.0k
    solution *l = osol + h;
313
13.0k
    if (LIVEP(l))
314
11.0k
         hinsert0(ht, l->s, &l->flags, SLVNDX(l));
315
13.0k
     }
316
317
1.26k
     X(ifree0)(osol);
318
1.26k
}
319
320
static unsigned minsz(unsigned nelem)
321
5.82k
{
322
5.82k
     return 1U + nelem + nelem / 8U;
323
5.82k
}
324
325
static unsigned nextsz(unsigned nelem)
326
1.26k
{
327
1.26k
     return minsz(minsz(nelem));
328
1.26k
}
329
330
static void hgrow(hashtab *ht)
331
3.30k
{
332
3.30k
     unsigned nelem = ht->nelem;
333
3.30k
     if (minsz(nelem) >= ht->hashsiz)
334
1.26k
    rehash(ht, nextsz(nelem));
335
3.30k
}
336
337
#if 0
338
/* shrink the hash table, never used */
339
static void hshrink(hashtab *ht)
340
{
341
     unsigned nelem = ht->nelem;
342
     /* always rehash after deletions */
343
     rehash(ht, nextsz(nelem));
344
}
345
#endif
346
347
static void htab_insert(hashtab *ht, const md5sig s, const flags_t *flagsp,
348
      unsigned slvndx)
349
3.74k
{
350
3.74k
     unsigned g, h = h1(ht, s), d = h2(ht, s);
351
3.74k
     solution *first = 0;
352
353
     /* Remove all entries that are subsumed by the new one.  */
354
     /* This loop may potentially traverse the whole table, since at
355
  least one element is guaranteed to be !LIVEP, but all elements
356
  may be VALIDP.  Hence, we stop after at the first invalid
357
  element or after traversing the whole table. */
358
3.74k
     g = h;
359
17.9k
     do {
360
17.9k
    solution *l = ht->solutions + g;
361
17.9k
    ++ht->insert_iter;
362
17.9k
    if (VALIDP(l)) {
363
14.1k
         if (LIVEP(l) && md5eq(s, l->s)) {
364
784
        if (subsumes(flagsp, slvndx, &l->flags)) {
365
732
       if (!first) first = l;
366
732
       kill_slot(ht, l);
367
732
        } else {
368
       /* It is an error to insert an element that
369
          is subsumed by an existing entry. */
370
52
       A(!subsumes(&l->flags, SLVNDX(l), flagsp));
371
52
        }
372
784
         }
373
14.1k
    } else 
374
3.74k
         break;
375
376
14.1k
    g = addmod(g, d, ht->hashsiz);
377
14.1k
     } while (g != h);
378
379
3.74k
     if (first) {
380
    /* overwrite FIRST */
381
732
    fill_slot(ht, s, flagsp, slvndx, first);
382
3.01k
     } else {
383
    /* create a new entry */
384
3.01k
    hgrow(ht);
385
3.01k
    hinsert0(ht, s, flagsp, slvndx);
386
3.01k
     }
387
3.74k
}
388
389
static void hinsert(planner *ego, const md5sig s, const flags_t *flagsp, 
390
        unsigned slvndx)
391
3.74k
{
392
3.74k
     htab_insert(BLISS(*flagsp) ? &ego->htab_blessed : &ego->htab_unblessed,
393
3.74k
     s, flagsp, slvndx );
394
3.74k
}
395
396
397
static void invoke_hook(planner *ego, plan *pln, const problem *p, 
398
      int optimalp)
399
5.61k
{
400
5.61k
     if (ego->hook)
401
0
    ego->hook(ego, pln, p, optimalp);
402
5.61k
}
403
404
#ifdef FFTW_RANDOM_ESTIMATOR
405
/* a "random" estimate, used for debugging to generate "random"
406
   plans, albeit from a deterministic seed. */
407
408
unsigned X(random_estimate_seed) = 0;
409
410
static double random_estimate(const planner *ego, const plan *pln, 
411
            const problem *p)
412
{
413
     md5 m;
414
     X(md5begin)(&m);
415
     X(md5unsigned)(&m, X(random_estimate_seed));
416
     X(md5int)(&m, ego->nthr);
417
     p->adt->hash(p, &m);
418
     X(md5putb)(&m, &pln->ops, sizeof(pln->ops));
419
     X(md5putb)(&m, &pln->adt, sizeof(pln->adt));
420
     X(md5end)(&m);
421
     return ego->cost_hook ? ego->cost_hook(p, m.s[0], COST_MAX) : m.s[0];
422
}
423
424
#endif
425
426
double X(iestimate_cost)(const planner *ego, const plan *pln, const problem *p)
427
2.13k
{
428
2.13k
     double cost =
429
2.13k
    + pln->ops.add
430
2.13k
    + pln->ops.mul
431
    
432
#if HAVE_FMA
433
    + pln->ops.fma
434
#else
435
2.13k
    + 2 * pln->ops.fma
436
2.13k
#endif
437
    
438
2.13k
    + pln->ops.other;
439
2.13k
     if (ego->cost_hook)
440
0
    cost = ego->cost_hook(p, cost, COST_MAX);
441
2.13k
     return cost;
442
2.13k
}
443
444
static void evaluate_plan(planner *ego, plan *pln, const problem *p)
445
2.13k
{
446
2.13k
     if (ESTIMATEP(ego) || !BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) {
447
2.13k
    ego->nplan++;
448
449
2.13k
    if (ESTIMATEP(ego)) {
450
2.13k
    estimate:
451
         /* heuristic */
452
#ifdef FFTW_RANDOM_ESTIMATOR
453
         pln->pcost = random_estimate(ego, pln, p);
454
         ego->epcost += X(iestimate_cost)(ego, pln, p);
455
#else
456
2.13k
         pln->pcost = X(iestimate_cost)(ego, pln, p);
457
2.13k
         ego->epcost += pln->pcost;
458
2.13k
#endif
459
2.13k
    } else {
460
0
         double t = X(measure_execution_time)(ego, pln, p);
461
         
462
0
         if (t < 0) {  /* unavailable cycle counter */
463
        /* Real programmers can write FORTRAN in any language */
464
0
        goto estimate;
465
0
         }
466
467
0
         pln->pcost = t;
468
0
         ego->pcost += t;
469
0
         ego->need_timeout_check = 1;
470
0
    }
471
2.13k
     }
472
     
473
2.13k
     invoke_hook(ego, pln, p, 0);
474
2.13k
}
475
476
/* maintain dynamic scoping of flags, nthr: */
477
static plan *invoke_solver(planner *ego, const problem *p, solver *s, 
478
         const flags_t *nflags)
479
290k
{
480
290k
     flags_t flags = ego->flags;
481
290k
     int nthr = ego->nthr;
482
290k
     plan *pln;
483
290k
     ego->flags = *nflags;
484
290k
     PLNR_TIMELIMIT_IMPATIENCE(ego) = 0;
485
290k
     A(p->adt->problem_kind == s->adt->problem_kind);
486
290k
     pln = s->adt->mkplan(s, p, ego);
487
290k
     ego->nthr = nthr;
488
290k
     ego->flags = flags;
489
290k
     return pln;
490
290k
}
491
492
/* maintain the invariant TIMED_OUT ==> NEED_TIMEOUT_CHECK */
493
static int timeout_p(planner *ego, const problem *p)
494
2.05k
{
495
     /* do not timeout when estimating.  First, the estimator is the
496
  planner of last resort.  Second, calling X(elapsed_since)() is
497
  slower than estimating */
498
2.05k
     if (!ESTIMATEP(ego)) {
499
    /* do not assume that X(elapsed_since)() is monotonic */
500
0
    if (ego->timed_out) {
501
0
         A(ego->need_timeout_check);
502
0
         return 1;
503
0
    }
504
505
0
    if (ego->timelimit >= 0 &&
506
0
        X(elapsed_since)(ego, p, ego->start_time) >= ego->timelimit) {
507
0
         ego->timed_out = 1;
508
0
         ego->need_timeout_check = 1;
509
0
         return 1;
510
0
    }
511
0
     }
512
513
2.05k
     A(!ego->timed_out);
514
2.05k
     ego->need_timeout_check = 0;
515
2.05k
     return 0;
516
2.05k
}
517
518
static plan *search0(planner *ego, const problem *p, unsigned *slvndx, 
519
         const flags_t *flagsp)
520
2.05k
{
521
2.05k
     plan *best = 0;
522
2.05k
     int best_not_yet_timed = 1;
523
524
     /* Do not start a search if the planner timed out. This check is
525
  necessary, lest the relaxation mechanism kick in */
526
2.05k
     if (timeout_p(ego, p))
527
0
    return 0;
528
529
2.05k
     FORALL_SOLVERS_OF_KIND(p->adt->problem_kind, ego, s, sp, {
530
2.05k
    plan *pln;
531
532
2.05k
    pln = invoke_solver(ego, p, s, flagsp);
533
534
2.05k
    if (ego->need_timeout_check) 
535
2.05k
         if (timeout_p(ego, p)) {
536
2.05k
        X(plan_destroy_internal)(pln);
537
2.05k
        X(plan_destroy_internal)(best);
538
2.05k
        return 0;
539
2.05k
         }
540
541
2.05k
    if (pln) {
542
         /* read COULD_PRUNE_NOW_P because PLN may be destroyed
543
      before we use COULD_PRUNE_NOW_P */
544
2.05k
         int could_prune_now_p = pln->could_prune_now_p;
545
546
2.05k
         if (best) {
547
2.05k
        if (best_not_yet_timed) {
548
2.05k
       evaluate_plan(ego, best, p);
549
2.05k
       best_not_yet_timed = 0;
550
2.05k
        }
551
2.05k
        evaluate_plan(ego, pln, p);
552
2.05k
        if (pln->pcost < best->pcost) {
553
2.05k
       X(plan_destroy_internal)(best);
554
2.05k
       best = pln;
555
2.05k
                         *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs);
556
2.05k
        } else {
557
2.05k
       X(plan_destroy_internal)(pln);
558
2.05k
        }
559
2.05k
         } else {
560
2.05k
        best = pln;
561
2.05k
                    *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs);                    
562
2.05k
         }
563
564
2.05k
         if (ALLOW_PRUNINGP(ego) && could_prune_now_p) 
565
2.05k
        break;
566
2.05k
    }
567
2.05k
     });
568
569
2.05k
     return best;
570
2.05k
}
571
572
static plan *search(planner *ego, const problem *p, unsigned *slvndx, 
573
        flags_t *flagsp)
574
2.01k
{
575
2.01k
     plan *pln = 0;
576
2.01k
     unsigned i;
577
578
     /* relax impatience in this order: */
579
2.01k
     static const unsigned relax_tab[] = {
580
2.01k
    0, /* relax nothing */
581
2.01k
    NO_VRECURSE,
582
2.01k
    NO_FIXED_RADIX_LARGE_N,
583
2.01k
    NO_SLOW,
584
2.01k
    NO_UGLY
585
2.01k
     };
586
587
2.01k
     unsigned l_orig = flagsp->l;
588
2.01k
     unsigned x = flagsp->u;
589
590
     /* guaranteed to be different from X */
591
2.01k
     unsigned last_x = ~x; 
592
593
3.35k
     for (i = 0; i < sizeof(relax_tab) / sizeof(relax_tab[0]); ++i) {
594
3.09k
    if (LEQ(l_orig, x & ~relax_tab[i]))
595
2.05k
         x = x & ~relax_tab[i];
596
597
3.09k
    if (x != last_x) {
598
2.05k
         last_x = x;
599
2.05k
         flagsp->l = x;
600
2.05k
         pln = search0(ego, p, slvndx, flagsp);
601
2.05k
         if (pln) break;
602
2.05k
    }
603
3.09k
     }
604
605
2.01k
     if (!pln) {
606
    /* search [L_ORIG, U] */
607
262
    if (l_orig != last_x) {
608
0
         last_x = l_orig;
609
0
         flagsp->l = l_orig;
610
0
         pln = search0(ego, p, slvndx, flagsp);
611
0
    }
612
262
     }
613
614
2.01k
     return pln;
615
2.01k
}
616
617
#define CHECK_FOR_BOGOSITY            \
618
7.50k
     if ((ego->bogosity_hook ?            \
619
7.50k
    (ego->wisdom_state = ego->bogosity_hook(ego->wisdom_state, p)) \
620
7.50k
    : ego->wisdom_state) == WISDOM_IS_BOGUS)     \
621
7.50k
    goto wisdom_is_bogus;
622
623
static plan *mkplan(planner *ego, const problem *p)
624
3.75k
{
625
3.75k
     plan *pln;
626
3.75k
     md5 m;
627
3.75k
     unsigned slvndx;
628
3.75k
     flags_t flags_of_solution;
629
3.75k
     solution *sol;
630
3.75k
     solver *s;
631
632
3.75k
     ASSERT_ALIGNED_DOUBLE;
633
3.75k
     A(LEQ(PLNR_L(ego), PLNR_U(ego)));
634
635
3.75k
     if (ESTIMATEP(ego))
636
3.75k
    PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; /* canonical form */
637
638
639
#ifdef FFTW_DEBUG
640
     check(&ego->htab_blessed);
641
     check(&ego->htab_unblessed);
642
#endif
643
644
3.75k
     pln = 0;
645
646
3.75k
     CHECK_FOR_BOGOSITY;
647
648
3.75k
     ego->timed_out = 0;
649
650
3.75k
     ++ego->nprob;
651
3.75k
     md5hash(&m, p, ego);
652
653
3.75k
     flags_of_solution = ego->flags;
654
655
3.75k
     if (ego->wisdom_state != WISDOM_IGNORE_ALL) {
656
3.75k
    if ((sol = hlookup(ego, m.s, &flags_of_solution))) { 
657
         /* wisdom is acceptable */
658
1.73k
         wisdom_state_t owisdom_state = ego->wisdom_state;
659
         
660
         /* this hook is mainly for MPI, to make sure that
661
      wisdom is in sync across all processes for MPI problems */
662
1.73k
         if (ego->wisdom_ok_hook && !ego->wisdom_ok_hook(p, sol->flags))
663
0
        goto do_search; /* ignore not-ok wisdom */
664
         
665
1.73k
         slvndx = SLVNDX(sol);
666
         
667
1.73k
         if (slvndx == INFEASIBLE_SLVNDX) {
668
6
        if (ego->wisdom_state == WISDOM_IGNORE_INFEASIBLE)
669
0
       goto do_search;
670
6
        else
671
6
       return 0;   /* known to be infeasible */
672
6
         }
673
         
674
1.73k
         flags_of_solution = sol->flags;
675
         
676
         /* inherit blessing either from wisdom
677
      or from the planner */
678
1.73k
         flags_of_solution.hash_info |= BLISS(ego->flags);
679
         
680
1.73k
         ego->wisdom_state = WISDOM_ONLY;
681
         
682
1.73k
         s = ego->slvdescs[slvndx].slv;
683
1.73k
         if (p->adt->problem_kind != s->adt->problem_kind)
684
0
        goto wisdom_is_bogus;
685
         
686
1.73k
         pln = invoke_solver(ego, p, s, &flags_of_solution);
687
         
688
1.73k
         CHECK_FOR_BOGOSITY;     /* catch error in child solvers */
689
         
690
1.73k
         sol = 0; /* Paranoia: SOL may be dangling after
691
         invoke_solver(); make sure we don't accidentally
692
         reuse it. */
693
         
694
1.73k
         if (!pln)
695
0
        goto wisdom_is_bogus;
696
         
697
1.73k
         ego->wisdom_state = owisdom_state;
698
         
699
1.73k
         goto skip_search;
700
1.73k
    }
701
2.01k
    else if (ego->nowisdom_hook) /* for MPI, make sure lack of wisdom */
702
0
         ego->nowisdom_hook(p);  /*   is in sync across all processes */
703
3.75k
     }
704
705
2.01k
 do_search:
706
     /* cannot search in WISDOM_ONLY mode */
707
2.01k
     if (ego->wisdom_state == WISDOM_ONLY)
708
0
    goto wisdom_is_bogus;
709
710
2.01k
     flags_of_solution = ego->flags;
711
2.01k
     pln = search(ego, p, &slvndx, &flags_of_solution);
712
2.01k
     CHECK_FOR_BOGOSITY;     /* catch error in child solvers */
713
714
2.01k
     if (ego->timed_out) {
715
0
    A(!pln);
716
0
    if (PLNR_TIMELIMIT_IMPATIENCE(ego) != 0) {
717
         /* record (below) that this plan has failed because of
718
      timeout */
719
0
         flags_of_solution.hash_info |= BLESSING;
720
0
    } else {
721
         /* this is not the top-level problem or timeout is not
722
      active: record no wisdom. */
723
0
         return 0;
724
0
    }
725
2.01k
     } else {
726
    /* canonicalize to infinite timeout */
727
2.01k
    flags_of_solution.timelimit_impatience = 0;
728
2.01k
     }
729
730
3.74k
 skip_search:
731
3.74k
     if (ego->wisdom_state == WISDOM_NORMAL ||
732
3.74k
   ego->wisdom_state == WISDOM_ONLY) {
733
3.74k
    if (pln) {
734
3.48k
         hinsert(ego, m.s, &flags_of_solution, slvndx);
735
3.48k
         invoke_hook(ego, pln, p, 1);
736
3.48k
    } else {
737
262
         hinsert(ego, m.s, &flags_of_solution, INFEASIBLE_SLVNDX);
738
262
    }
739
3.74k
     }
740
741
3.74k
     return pln;
742
743
0
 wisdom_is_bogus:
744
0
     X(plan_destroy_internal)(pln);
745
0
     ego->wisdom_state = WISDOM_IS_BOGUS;
746
0
     return 0;
747
2.01k
}
748
749
static void htab_destroy(hashtab *ht)
750
285
{
751
285
     X(ifree)(ht->solutions);
752
285
     ht->solutions = 0;
753
285
     ht->nelem = 0U;
754
285
}
755
756
static void mkhashtab(hashtab *ht)
757
287
{
758
287
     ht->nrehash = 0;
759
287
     ht->succ_lookup = ht->lookup = ht->lookup_iter = 0;
760
287
     ht->insert = ht->insert_iter = ht->insert_unknown = 0;
761
762
287
     ht->solutions = 0;
763
287
     ht->hashsiz = ht->nelem = 0U;
764
287
     hgrow(ht);     /* so that hashsiz > 0 */
765
287
}
766
767
/* destroy hash table entries.  If FORGET_EVERYTHING, destroy the whole
768
   table.  If FORGET_ACCURSED, then destroy entries that are not blessed. */
769
static void forget(planner *ego, amnesia a)
770
285
{
771
285
     switch (a) {
772
0
   case FORGET_EVERYTHING:
773
0
        htab_destroy(&ego->htab_blessed);
774
0
        mkhashtab(&ego->htab_blessed);
775
        /* fall through */
776
285
   case FORGET_ACCURSED:
777
285
        htab_destroy(&ego->htab_unblessed);
778
285
        mkhashtab(&ego->htab_unblessed);
779
285
        break;
780
0
   default:
781
0
        break;
782
285
     }
783
285
}
784
785
/* FIXME: what sort of version information should we write? */
786
#define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom))
787
static const char stimeout[] = "TIMEOUT";
788
789
/* tantus labor non sit cassus */
790
static void exprt(planner *ego, printer *p)
791
0
{
792
0
     unsigned h;
793
0
     hashtab *ht = &ego->htab_blessed;
794
0
     md5 m;
795
796
0
     signature_of_configuration(&m, ego);
797
798
0
     p->print(p, 
799
0
        "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n",
800
0
        m.s[0], m.s[1], m.s[2], m.s[3]);
801
802
0
     for (h = 0; h < ht->hashsiz; ++h) {
803
0
    solution *l = ht->solutions + h;
804
0
    if (LIVEP(l)) {
805
0
         const char *reg_nam;
806
0
         int reg_id;
807
808
0
         if (SLVNDX(l) == INFEASIBLE_SLVNDX) {
809
0
        reg_nam = stimeout;
810
0
        reg_id = 0;
811
0
         } else {
812
0
        slvdesc *sp = ego->slvdescs + SLVNDX(l);
813
0
        reg_nam = sp->reg_nam;
814
0
        reg_id = sp->reg_id;
815
0
         }
816
817
         /* qui salvandos salvas gratis
818
      salva me fons pietatis */
819
0
         p->print(p, "  (%s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)\n",
820
0
      reg_nam, reg_id, 
821
0
      l->flags.l, l->flags.u, l->flags.timelimit_impatience, 
822
0
      l->s[0], l->s[1], l->s[2], l->s[3]);
823
0
    }
824
0
     }
825
0
     p->print(p, ")\n");
826
0
}
827
828
/* mors stupebit et natura
829
   cum resurget creatura */
830
static int imprt(planner *ego, scanner *sc)
831
0
{
832
0
     char buf[MAXNAM + 1];
833
0
     md5uint sig[4];
834
0
     unsigned l, u, timelimit_impatience;
835
0
     flags_t flags;
836
0
     int reg_id;
837
0
     unsigned slvndx;
838
0
     hashtab *ht = &ego->htab_blessed;
839
0
     hashtab old;
840
0
     md5 m;
841
842
0
     if (!sc->scan(sc, 
843
0
       "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n",
844
0
       sig + 0, sig + 1, sig + 2, sig + 3))
845
0
    return 0; /* don't need to restore hashtable */
846
847
0
     signature_of_configuration(&m, ego);
848
0
     if (m.s[0] != sig[0] || m.s[1] != sig[1] ||
849
0
   m.s[2] != sig[2] || m.s[3] != sig[3]) {
850
    /* invalid configuration */
851
0
    return 0;
852
0
     }
853
     
854
     /* make a backup copy of the hash table (cache the hash) */
855
0
     {
856
0
    unsigned h, hsiz = ht->hashsiz;
857
0
    old = *ht;
858
0
    old.solutions = (solution *)MALLOC(hsiz * sizeof(solution), HASHT);
859
0
    for (h = 0; h < hsiz; ++h)
860
0
         old.solutions[h] = ht->solutions[h];
861
0
     }
862
863
0
     while (1) {
864
0
    if (sc->scan(sc, ")"))
865
0
         break;
866
867
    /* qua resurget ex favilla */
868
0
    if (!sc->scan(sc, "(%*s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)",
869
0
      MAXNAM, buf, &reg_id, &l, &u, &timelimit_impatience,
870
0
      sig + 0, sig + 1, sig + 2, sig + 3))
871
0
         goto bad;
872
873
0
    if (!strcmp(buf, stimeout) && reg_id == 0) {
874
0
         slvndx = INFEASIBLE_SLVNDX;
875
0
    } else {
876
0
         if (timelimit_impatience != 0)
877
0
        goto bad;
878
879
0
         slvndx = slookup(ego, buf, reg_id);
880
0
         if (slvndx == INFEASIBLE_SLVNDX)
881
0
        goto bad;
882
0
    }
883
884
    /* inter oves locum praesta */
885
0
    flags.l = l;
886
0
    flags.u = u;
887
0
    flags.timelimit_impatience = timelimit_impatience;
888
0
    flags.hash_info = BLESSING;
889
890
0
    CK(flags.l == l);
891
0
    CK(flags.u == u);
892
0
    CK(flags.timelimit_impatience == timelimit_impatience);
893
894
0
    if (!hlookup(ego, sig, &flags))
895
0
         hinsert(ego, sig, &flags, slvndx);
896
0
     }
897
898
0
     X(ifree0)(old.solutions);
899
0
     return 1;
900
901
0
 bad:
902
     /* ``The wisdom of FFTW must be above suspicion.'' */
903
0
     X(ifree0)(ht->solutions);
904
0
     *ht = old;
905
0
     return 0;
906
0
}
907
908
/*
909
 * create a planner
910
 */
911
planner *X(mkplanner)(void)
912
1
{
913
1
     int i;
914
915
1
     static const planner_adt padt = {
916
1
    register_solver, mkplan, forget, exprt, imprt
917
1
     };
918
919
1
     planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS);
920
921
1
     p->adt = &padt;
922
1
     p->nplan = p->nprob = 0;
923
1
     p->pcost = p->epcost = 0.0;
924
1
     p->hook = 0;
925
1
     p->cost_hook = 0;
926
1
     p->wisdom_ok_hook = 0;
927
1
     p->nowisdom_hook = 0;
928
1
     p->bogosity_hook = 0;
929
1
     p->cur_reg_nam = 0;
930
1
     p->wisdom_state = WISDOM_NORMAL;
931
932
1
     p->slvdescs = 0;
933
1
     p->nslvdesc = p->slvdescsiz = 0;
934
935
1
     p->flags.l = 0;
936
1
     p->flags.u = 0;
937
1
     p->flags.timelimit_impatience = 0;
938
1
     p->flags.hash_info = 0;
939
1
     p->nthr = 1;
940
1
     p->need_timeout_check = 1;
941
1
     p->timelimit = -1;
942
943
1
     mkhashtab(&p->htab_blessed);
944
1
     mkhashtab(&p->htab_unblessed);
945
946
9
     for (i = 0; i < PROBLEM_LAST; ++i)
947
8
    p->slvdescs_for_problem_kind[i] = -1;
948
949
1
     return p;
950
1
}
951
952
void X(planner_destroy)(planner *ego)
953
0
{
954
     /* destroy hash table */
955
0
     htab_destroy(&ego->htab_blessed);
956
0
     htab_destroy(&ego->htab_unblessed);
957
958
     /* destroy solvdesc table */
959
0
     FORALL_SOLVERS(ego, s, sp, {
960
0
    UNUSED(sp);
961
0
    X(solver_destroy)(s);
962
0
     });
963
964
0
     X(ifree0)(ego->slvdescs);
965
0
     X(ifree)(ego); /* dona eis requiem */
966
0
}
967
968
plan *X(mkplan_d)(planner *ego, problem *p)
969
3.18k
{
970
3.18k
     plan *pln = ego->adt->mkplan(ego, p);
971
3.18k
     X(problem_destroy)(p);
972
3.18k
     return pln;
973
3.18k
}
974
975
/* like X(mkplan_d), but sets/resets flags as well */
976
plan *X(mkplan_f_d)(planner *ego, problem *p, 
977
        unsigned l_set, unsigned u_set, unsigned u_reset)
978
635
{
979
635
     flags_t oflags = ego->flags;
980
635
     plan *pln;
981
982
635
     PLNR_U(ego) &= ~u_reset;
983
635
     PLNR_L(ego) &= ~u_reset;
984
635
     PLNR_L(ego) |= l_set;
985
635
     PLNR_U(ego) |= u_set | l_set;
986
635
     pln = X(mkplan_d)(ego, p);
987
635
     ego->flags = oflags;
988
635
     return pln;
989
635
}
990
991
/*
992
 * Debugging code:
993
 */
994
#ifdef FFTW_DEBUG
995
static void check(hashtab *ht)
996
{
997
     unsigned live = 0;
998
     unsigned i;
999
1000
     A(ht->nelem < ht->hashsiz);
1001
1002
     for (i = 0; i < ht->hashsiz; ++i) {
1003
    solution *l = ht->solutions + i; 
1004
    if (LIVEP(l)) 
1005
         ++live; 
1006
     }
1007
1008
     A(ht->nelem == live);
1009
1010
     for (i = 0; i < ht->hashsiz; ++i) {
1011
    solution *l1 = ht->solutions + i; 
1012
    int foundit = 0;
1013
    if (LIVEP(l1)) {
1014
         unsigned g, h = h1(ht, l1->s), d = h2(ht, l1->s);
1015
1016
         g = h;
1017
         do {
1018
        solution *l = ht->solutions + g;
1019
        if (VALIDP(l)) {
1020
       if (l1 == l)
1021
            foundit = 1;
1022
       else if (LIVEP(l) && md5eq(l1->s, l->s)) {
1023
            A(!subsumes(&l->flags, SLVNDX(l), &l1->flags));
1024
            A(!subsumes(&l1->flags, SLVNDX(l1), &l->flags));
1025
       }
1026
        } else 
1027
       break;
1028
        g = addmod(g, d, ht->hashsiz);
1029
         } while (g != h);
1030
1031
         A(foundit);
1032
    }
1033
     }
1034
}
1035
#endif