Coverage Report

Created: 2025-07-23 07:03

/src/fftw3/kernel/planner.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2000 Matteo Frigo
3
 * Copyright (c) 2000 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
#include "kernel/ifftw.h"
22
#include <string.h>
23
24
/* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU
25
   program in English, because English is the one language that nearly
26
   all programmers in all countries can read."
27
28
                    ingemisco tanquam reus
29
        culpa rubet vultus meus
30
        supplicanti parce [rms]
31
*/
32
33
52.1k
#define VALIDP(solution) ((solution)->flags.hash_info & H_VALID)
34
123k
#define LIVEP(solution) ((solution)->flags.hash_info & H_LIVE)
35
27.9k
#define SLVNDX(solution) ((solution)->flags.slvndx)
36
5.29k
#define BLISS(flags) (((flags).hash_info) & BLESSING)
37
4.34k
#define INFEASIBLE_SLVNDX ((1U<<BITS_FOR_SLVNDX)-1)
38
39
40
0
#define MAXNAM 64  /* maximum length of registrar's name.
41
          Used for reading wisdom.  There is no point
42
          in doing this right */
43
44
45
#ifdef FFTW_DEBUG
46
static void check(hashtab *ht);
47
#endif
48
49
/* x <= y */
50
10.3k
#define LEQ(x, y) (((x) & (y)) == (x))
51
52
/* A subsumes B */
53
static int subsumes(const flags_t *a, unsigned slvndx_a, const flags_t *b)
54
2.43k
{
55
2.43k
     if (slvndx_a != INFEASIBLE_SLVNDX) {
56
2.41k
    A(a->timelimit_impatience == 0);
57
2.41k
    return (LEQ(a->u, b->u) && LEQ(b->l, a->l));
58
2.41k
     } else {
59
20
    return (LEQ(a->l, b->l) 
60
20
      && a->timelimit_impatience <= b->timelimit_impatience);
61
20
     }
62
2.43k
}
63
64
static unsigned addmod(unsigned a, unsigned b, unsigned p)
65
55.7k
{
66
     /* gcc-2.95/sparc produces incorrect code for the fast version below. */
67
#if defined(__sparc__) && defined(__GNUC__)
68
     /* slow version  */
69
     return (a + b) % p;
70
#else
71
     /* faster version */
72
55.7k
     unsigned c = a + b;
73
55.7k
     return c >= p ? c - p : c;
74
55.7k
#endif
75
55.7k
}
76
77
/*
78
  slvdesc management:
79
*/
80
static void sgrow(planner *ego)
81
25
{
82
25
     unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4;
83
25
     slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS);
84
25
     slvdesc *otab = ego->slvdescs;
85
25
     unsigned i;
86
87
25
     ego->slvdescs = ntab;
88
25
     ego->slvdescsiz = nsiz;
89
2.80k
     for (i = 0; i < osiz; ++i)
90
2.77k
    ntab[i] = otab[i];
91
25
     X(ifree0)(otab);
92
25
}
93
94
static void register_solver(planner *ego, solver *s)
95
613
{
96
613
     slvdesc *n;
97
613
     int kind;
98
99
613
     if (s) { /* add s to solver list */
100
613
    X(solver_use)(s);
101
102
613
    A(ego->nslvdesc < INFEASIBLE_SLVNDX);
103
613
    if (ego->nslvdesc >= ego->slvdescsiz)
104
25
         sgrow(ego);
105
106
613
    n = ego->slvdescs + ego->nslvdesc;
107
108
613
    n->slv = s;
109
613
    n->reg_nam = ego->cur_reg_nam;
110
613
    n->reg_id = ego->cur_reg_id++;
111
    
112
613
    A(strlen(n->reg_nam) < MAXNAM);
113
613
    n->nam_hash = X(hash)(n->reg_nam);
114
115
613
    kind = s->adt->problem_kind;
116
613
    n->next_for_same_problem_kind = ego->slvdescs_for_problem_kind[kind];
117
613
    ego->slvdescs_for_problem_kind[kind] = (int)/*from unsigned*/ego->nslvdesc;
118
119
613
    ego->nslvdesc++;
120
613
     }
121
613
}
122
123
static unsigned slookup(planner *ego, char *nam, int id)
124
0
{
125
0
     unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */
126
0
     FORALL_SOLVERS(ego, s, sp, {
127
0
    UNUSED(s);
128
0
    if (sp->reg_id == id && sp->nam_hash == h
129
0
        && !strcmp(sp->reg_nam, nam))
130
0
         return (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs);
131
0
     });
132
0
     return INFEASIBLE_SLVNDX;
133
0
}
134
135
/* Compute a MD5 hash of the configuration of the planner.
136
   We store it into the wisdom file to make absolutely sure that
137
   we are reading wisdom that is applicable */
138
static void signature_of_configuration(md5 *m, planner *ego)
139
0
{
140
0
     X(md5begin)(m);
141
0
     X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */
142
0
     FORALL_SOLVERS(ego, s, sp, {
143
0
    UNUSED(s);
144
0
    X(md5int)(m, sp->reg_id);
145
0
    X(md5puts)(m, sp->reg_nam);
146
0
     });
147
0
     X(md5end)(m);
148
0
}
149
150
/*
151
  md5-related stuff:
152
*/
153
154
/* first hash function */
155
static unsigned h1(const hashtab *ht, const md5sig s)
156
23.4k
{
157
23.4k
     unsigned h = s[0] % ht->hashsiz;
158
23.4k
     A(h == (s[0] % ht->hashsiz));
159
23.4k
     return h;
160
23.4k
}
161
162
/* second hash function (for double hashing) */
163
static unsigned h2(const hashtab *ht, const md5sig s)
164
23.4k
{
165
23.4k
     unsigned h = 1U + s[1] % (ht->hashsiz - 1);
166
23.4k
     A(h == (1U + s[1] % (ht->hashsiz - 1)));
167
23.4k
     return h;
168
23.4k
}
169
170
static void md5hash(md5 *m, const problem *p, const planner *plnr)
171
3.66k
{
172
3.66k
     X(md5begin)(m);
173
3.66k
     X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */
174
3.66k
     X(md5int)(m, plnr->nthr);
175
3.66k
     p->adt->hash(p, m);
176
3.66k
     X(md5end)(m);
177
3.66k
}
178
179
static int md5eq(const md5sig a, const md5sig b)
180
42.1k
{
181
42.1k
     return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3];
182
42.1k
}
183
184
static void sigcpy(const md5sig a, md5sig b)
185
14.1k
{
186
14.1k
     b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3];
187
14.1k
}
188
189
/*
190
  memoization routines :
191
*/
192
193
/*
194
   liber scriptus proferetur
195
   in quo totum continetur
196
   unde mundus iudicetur
197
*/
198
struct solution_s {
199
     md5sig s;
200
     flags_t flags;
201
};
202
203
static solution *htab_lookup(hashtab *ht, const md5sig s, 
204
           const flags_t *flagsp)
205
6.32k
{
206
6.32k
     unsigned g, h = h1(ht, s), d = h2(ht, s);
207
6.32k
     solution *best = 0;
208
209
6.32k
     ++ht->lookup;
210
211
     /* search all entries that match; select the one with
212
  the lowest flags.u */
213
     /* This loop may potentially traverse the whole table, since at
214
  least one element is guaranteed to be !LIVEP, but all elements
215
  may be VALIDP.  Hence, we stop after at the first invalid
216
  element or after traversing the whole table. */
217
6.32k
     g = h;
218
34.9k
     do {
219
34.9k
    solution *l = ht->solutions + g;
220
34.9k
    ++ht->lookup_iter;
221
34.9k
    if (VALIDP(l)) {
222
28.6k
         if (LIVEP(l)
223
28.6k
       && md5eq(s, l->s)
224
28.6k
       && subsumes(&l->flags, SLVNDX(l), flagsp) ) { 
225
1.64k
        if (!best || LEQ(l->flags.u, best->flags.u))
226
1.64k
       best = l;
227
1.64k
         }
228
28.6k
    } else 
229
6.32k
         break;
230
231
28.6k
    g = addmod(g, d, ht->hashsiz);
232
28.6k
     } while (g != h);
233
234
6.32k
     if (best) 
235
1.64k
    ++ht->succ_lookup;
236
6.32k
     return best;
237
6.32k
}
238
239
static solution *hlookup(planner *ego, const md5sig s, 
240
       const flags_t *flagsp)
241
3.66k
{
242
3.66k
     solution *sol = htab_lookup(&ego->htab_blessed, s, flagsp);
243
3.66k
     if (!sol) sol = htab_lookup(&ego->htab_unblessed, s, flagsp);
244
3.66k
     return sol;
245
3.66k
}
246
247
static void fill_slot(hashtab *ht, const md5sig s, const flags_t *flagsp,
248
          unsigned slvndx, solution *slot)
249
14.1k
{
250
14.1k
     ++ht->insert;
251
14.1k
     ++ht->nelem;
252
14.1k
     A(!LIVEP(slot));
253
14.1k
     slot->flags.u = flagsp->u;
254
14.1k
     slot->flags.l = flagsp->l;
255
14.1k
     slot->flags.timelimit_impatience = flagsp->timelimit_impatience;
256
14.1k
     slot->flags.hash_info |= H_VALID | H_LIVE;
257
14.1k
     SLVNDX(slot) = slvndx;
258
259
     /* keep this check enabled in case we add so many solvers
260
  that the bitfield overflows */
261
14.1k
     CK(SLVNDX(slot) == slvndx);     
262
14.1k
     sigcpy(s, slot->s);
263
14.1k
}
264
265
static void kill_slot(hashtab *ht, solution *slot)
266
682
{
267
682
     A(LIVEP(slot)); /* ==> */ A(VALIDP(slot));
268
269
682
     --ht->nelem;
270
682
     slot->flags.hash_info = H_VALID;
271
682
}
272
273
static void hinsert0(hashtab *ht, const md5sig s, const flags_t *flagsp, 
274
         unsigned slvndx)
275
13.4k
{
276
13.4k
     solution *l;
277
13.4k
     unsigned g, h = h1(ht, s), d = h2(ht, s); 
278
279
13.4k
     ++ht->insert_unknown;
280
281
     /* search for nonfull slot */
282
27.0k
     for (g = h; ; g = addmod(g, d, ht->hashsiz)) {
283
27.0k
    ++ht->insert_iter;
284
27.0k
    l = ht->solutions + g;
285
27.0k
    if (!LIVEP(l)) break;
286
13.5k
    A((g + d) % ht->hashsiz != h);
287
13.5k
     }
288
289
13.4k
     fill_slot(ht, s, flagsp, slvndx, l);
290
13.4k
}
291
292
static void rehash(hashtab *ht, unsigned nsiz)
293
1.23k
{
294
1.23k
     unsigned osiz = ht->hashsiz, h;
295
1.23k
     solution *osol = ht->solutions, *nsol;
296
297
1.23k
     nsiz = (unsigned)X(next_prime)((INT)nsiz);
298
1.23k
     nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT);
299
1.23k
     ++ht->nrehash;
300
301
     /* init new table */
302
17.4k
     for (h = 0; h < nsiz; ++h) 
303
16.2k
    nsol[h].flags.hash_info = 0;
304
305
     /* install new table */
306
1.23k
     ht->hashsiz = nsiz;
307
1.23k
     ht->solutions = nsol;
308
1.23k
     ht->nelem = 0;
309
310
     /* copy table */
311
13.6k
     for (h = 0; h < osiz; ++h) {
312
12.3k
    solution *l = osol + h;
313
12.3k
    if (LIVEP(l))
314
10.4k
         hinsert0(ht, l->s, &l->flags, SLVNDX(l));
315
12.3k
     }
316
317
1.23k
     X(ifree0)(osol);
318
1.23k
}
319
320
static unsigned minsz(unsigned nelem)
321
5.73k
{
322
5.73k
     return 1U + nelem + nelem / 8U;
323
5.73k
}
324
325
static unsigned nextsz(unsigned nelem)
326
1.23k
{
327
1.23k
     return minsz(minsz(nelem));
328
1.23k
}
329
330
static void hgrow(hashtab *ht)
331
3.26k
{
332
3.26k
     unsigned nelem = ht->nelem;
333
3.26k
     if (minsz(nelem) >= ht->hashsiz)
334
1.23k
    rehash(ht, nextsz(nelem));
335
3.26k
}
336
337
#if 0
338
/* shrink the hash table, never used */
339
static void hshrink(hashtab *ht)
340
{
341
     unsigned nelem = ht->nelem;
342
     /* always rehash after deletions */
343
     rehash(ht, nextsz(nelem));
344
}
345
#endif
346
347
static void htab_insert(hashtab *ht, const md5sig s, const flags_t *flagsp,
348
      unsigned slvndx)
349
3.65k
{
350
3.65k
     unsigned g, h = h1(ht, s), d = h2(ht, s);
351
3.65k
     solution *first = 0;
352
353
     /* Remove all entries that are subsumed by the new one.  */
354
     /* This loop may potentially traverse the whole table, since at
355
  least one element is guaranteed to be !LIVEP, but all elements
356
  may be VALIDP.  Hence, we stop after at the first invalid
357
  element or after traversing the whole table. */
358
3.65k
     g = h;
359
17.2k
     do {
360
17.2k
    solution *l = ht->solutions + g;
361
17.2k
    ++ht->insert_iter;
362
17.2k
    if (VALIDP(l)) {
363
13.5k
         if (LIVEP(l) && md5eq(s, l->s)) {
364
726
        if (subsumes(flagsp, slvndx, &l->flags)) {
365
682
       if (!first) first = l;
366
682
       kill_slot(ht, l);
367
682
        } else {
368
       /* It is an error to insert an element that
369
          is subsumed by an existing entry. */
370
44
       A(!subsumes(&l->flags, SLVNDX(l), flagsp));
371
44
        }
372
726
         }
373
13.5k
    } else 
374
3.65k
         break;
375
376
13.5k
    g = addmod(g, d, ht->hashsiz);
377
13.5k
     } while (g != h);
378
379
3.65k
     if (first) {
380
    /* overwrite FIRST */
381
682
    fill_slot(ht, s, flagsp, slvndx, first);
382
2.97k
     } else {
383
    /* create a new entry */
384
2.97k
    hgrow(ht);
385
2.97k
    hinsert0(ht, s, flagsp, slvndx);
386
2.97k
     }
387
3.65k
}
388
389
static void hinsert(planner *ego, const md5sig s, const flags_t *flagsp, 
390
        unsigned slvndx)
391
3.65k
{
392
3.65k
     htab_insert(BLISS(*flagsp) ? &ego->htab_blessed : &ego->htab_unblessed,
393
3.65k
     s, flagsp, slvndx );
394
3.65k
}
395
396
397
static void invoke_hook(planner *ego, plan *pln, const problem *p, 
398
      int optimalp)
399
5.51k
{
400
5.51k
     if (ego->hook)
401
0
    ego->hook(ego, pln, p, optimalp);
402
5.51k
}
403
404
#ifdef FFTW_RANDOM_ESTIMATOR
405
/* a "random" estimate, used for debugging to generate "random"
406
   plans, albeit from a deterministic seed. */
407
408
unsigned X(random_estimate_seed) = 0;
409
410
static double random_estimate(const planner *ego, const plan *pln, 
411
            const problem *p)
412
{
413
     md5 m;
414
     X(md5begin)(&m);
415
     X(md5unsigned)(&m, X(random_estimate_seed));
416
     X(md5int)(&m, ego->nthr);
417
     p->adt->hash(p, &m);
418
     X(md5putb)(&m, &pln->ops, sizeof(pln->ops));
419
     X(md5putb)(&m, &pln->adt, sizeof(pln->adt));
420
     X(md5end)(&m);
421
     return ego->cost_hook ? ego->cost_hook(p, m.s[0], COST_MAX) : m.s[0];
422
}
423
424
#endif
425
426
double X(iestimate_cost)(const planner *ego, const plan *pln, const problem *p)
427
2.12k
{
428
2.12k
     double cost =
429
2.12k
    + pln->ops.add
430
2.12k
    + pln->ops.mul
431
    
432
#if HAVE_FMA
433
    + pln->ops.fma
434
#else
435
2.12k
    + 2 * pln->ops.fma
436
2.12k
#endif
437
    
438
2.12k
    + pln->ops.other;
439
2.12k
     if (ego->cost_hook)
440
0
    cost = ego->cost_hook(p, cost, COST_MAX);
441
2.12k
     return cost;
442
2.12k
}
443
444
static void evaluate_plan(planner *ego, plan *pln, const problem *p)
445
2.12k
{
446
2.12k
     if (ESTIMATEP(ego) || !BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) {
447
2.12k
    ego->nplan++;
448
449
2.12k
    if (ESTIMATEP(ego)) {
450
2.12k
    estimate:
451
         /* heuristic */
452
#ifdef FFTW_RANDOM_ESTIMATOR
453
         pln->pcost = random_estimate(ego, pln, p);
454
         ego->epcost += X(iestimate_cost)(ego, pln, p);
455
#else
456
2.12k
         pln->pcost = X(iestimate_cost)(ego, pln, p);
457
2.12k
         ego->epcost += pln->pcost;
458
2.12k
#endif
459
2.12k
    } else {
460
0
         double t = X(measure_execution_time)(ego, pln, p);
461
         
462
0
         if (t < 0) {  /* unavailable cycle counter */
463
        /* Real programmers can write FORTRAN in any language */
464
0
        goto estimate;
465
0
         }
466
467
0
         pln->pcost = t;
468
0
         ego->pcost += t;
469
0
         ego->need_timeout_check = 1;
470
0
    }
471
2.12k
     }
472
     
473
2.12k
     invoke_hook(ego, pln, p, 0);
474
2.12k
}
475
476
/* maintain dynamic scoping of flags, nthr: */
477
static plan *invoke_solver(planner *ego, const problem *p, solver *s, 
478
         const flags_t *nflags)
479
292k
{
480
292k
     flags_t flags = ego->flags;
481
292k
     int nthr = ego->nthr;
482
292k
     plan *pln;
483
292k
     ego->flags = *nflags;
484
292k
     PLNR_TIMELIMIT_IMPATIENCE(ego) = 0;
485
292k
     A(p->adt->problem_kind == s->adt->problem_kind);
486
292k
     pln = s->adt->mkplan(s, p, ego);
487
292k
     ego->nthr = nthr;
488
292k
     ego->flags = flags;
489
292k
     return pln;
490
292k
}
491
492
/* maintain the invariant TIMED_OUT ==> NEED_TIMEOUT_CHECK */
493
static int timeout_p(planner *ego, const problem *p)
494
2.05k
{
495
     /* do not timeout when estimating.  First, the estimator is the
496
  planner of last resort.  Second, calling X(elapsed_since)() is
497
  slower than estimating */
498
2.05k
     if (!ESTIMATEP(ego)) {
499
    /* do not assume that X(elapsed_since)() is monotonic */
500
0
    if (ego->timed_out) {
501
0
         A(ego->need_timeout_check);
502
0
         return 1;
503
0
    }
504
505
0
    if (ego->timelimit >= 0 &&
506
0
        X(elapsed_since)(ego, p, ego->start_time) >= ego->timelimit) {
507
0
         ego->timed_out = 1;
508
0
         ego->need_timeout_check = 1;
509
0
         return 1;
510
0
    }
511
0
     }
512
513
2.05k
     A(!ego->timed_out);
514
2.05k
     ego->need_timeout_check = 0;
515
2.05k
     return 0;
516
2.05k
}
517
518
static plan *search0(planner *ego, const problem *p, unsigned *slvndx, 
519
         const flags_t *flagsp)
520
2.05k
{
521
2.05k
     plan *best = 0;
522
2.05k
     int best_not_yet_timed = 1;
523
524
     /* Do not start a search if the planner timed out. This check is
525
  necessary, lest the relaxation mechanism kick in */
526
2.05k
     if (timeout_p(ego, p))
527
0
    return 0;
528
529
2.05k
     FORALL_SOLVERS_OF_KIND(p->adt->problem_kind, ego, s, sp, {
530
2.05k
    plan *pln;
531
532
2.05k
    pln = invoke_solver(ego, p, s, flagsp);
533
534
2.05k
    if (ego->need_timeout_check) 
535
2.05k
         if (timeout_p(ego, p)) {
536
2.05k
        X(plan_destroy_internal)(pln);
537
2.05k
        X(plan_destroy_internal)(best);
538
2.05k
        return 0;
539
2.05k
         }
540
541
2.05k
    if (pln) {
542
         /* read COULD_PRUNE_NOW_P because PLN may be destroyed
543
      before we use COULD_PRUNE_NOW_P */
544
2.05k
         int could_prune_now_p = pln->could_prune_now_p;
545
546
2.05k
         if (best) {
547
2.05k
        if (best_not_yet_timed) {
548
2.05k
       evaluate_plan(ego, best, p);
549
2.05k
       best_not_yet_timed = 0;
550
2.05k
        }
551
2.05k
        evaluate_plan(ego, pln, p);
552
2.05k
        if (pln->pcost < best->pcost) {
553
2.05k
       X(plan_destroy_internal)(best);
554
2.05k
       best = pln;
555
2.05k
                         *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs);
556
2.05k
        } else {
557
2.05k
       X(plan_destroy_internal)(pln);
558
2.05k
        }
559
2.05k
         } else {
560
2.05k
        best = pln;
561
2.05k
                    *slvndx = (unsigned)/*from ptrdiff_t*/(sp - ego->slvdescs);                    
562
2.05k
         }
563
564
2.05k
         if (ALLOW_PRUNINGP(ego) && could_prune_now_p) 
565
2.05k
        break;
566
2.05k
    }
567
2.05k
     });
568
569
2.05k
     return best;
570
2.05k
}
571
572
static plan *search(planner *ego, const problem *p, unsigned *slvndx, 
573
        flags_t *flagsp)
574
2.02k
{
575
2.02k
     plan *pln = 0;
576
2.02k
     unsigned i;
577
578
     /* relax impatience in this order: */
579
2.02k
     static const unsigned relax_tab[] = {
580
2.02k
    0, /* relax nothing */
581
2.02k
    NO_VRECURSE,
582
2.02k
    NO_FIXED_RADIX_LARGE_N,
583
2.02k
    NO_SLOW,
584
2.02k
    NO_UGLY
585
2.02k
     };
586
587
2.02k
     unsigned l_orig = flagsp->l;
588
2.02k
     unsigned x = flagsp->u;
589
590
     /* guaranteed to be different from X */
591
2.02k
     unsigned last_x = ~x; 
592
593
3.38k
     for (i = 0; i < sizeof(relax_tab) / sizeof(relax_tab[0]); ++i) {
594
3.12k
    if (LEQ(l_orig, x & ~relax_tab[i]))
595
2.05k
         x = x & ~relax_tab[i];
596
597
3.12k
    if (x != last_x) {
598
2.05k
         last_x = x;
599
2.05k
         flagsp->l = x;
600
2.05k
         pln = search0(ego, p, slvndx, flagsp);
601
2.05k
         if (pln) break;
602
2.05k
    }
603
3.12k
     }
604
605
2.02k
     if (!pln) {
606
    /* search [L_ORIG, U] */
607
268
    if (l_orig != last_x) {
608
0
         last_x = l_orig;
609
0
         flagsp->l = l_orig;
610
0
         pln = search0(ego, p, slvndx, flagsp);
611
0
    }
612
268
     }
613
614
2.02k
     return pln;
615
2.02k
}
616
617
#define CHECK_FOR_BOGOSITY            \
618
7.32k
     if ((ego->bogosity_hook ?            \
619
7.32k
    (ego->wisdom_state = ego->bogosity_hook(ego->wisdom_state, p)) \
620
7.32k
    : ego->wisdom_state) == WISDOM_IS_BOGUS)     \
621
7.32k
    goto wisdom_is_bogus;
622
623
static plan *mkplan(planner *ego, const problem *p)
624
3.66k
{
625
3.66k
     plan *pln;
626
3.66k
     md5 m;
627
3.66k
     unsigned slvndx;
628
3.66k
     flags_t flags_of_solution;
629
3.66k
     solution *sol;
630
3.66k
     solver *s;
631
632
3.66k
     ASSERT_ALIGNED_DOUBLE;
633
3.66k
     A(LEQ(PLNR_L(ego), PLNR_U(ego)));
634
635
3.66k
     if (ESTIMATEP(ego))
636
3.66k
    PLNR_TIMELIMIT_IMPATIENCE(ego) = 0; /* canonical form */
637
638
639
#ifdef FFTW_DEBUG
640
     check(&ego->htab_blessed);
641
     check(&ego->htab_unblessed);
642
#endif
643
644
3.66k
     pln = 0;
645
646
3.66k
     CHECK_FOR_BOGOSITY;
647
648
3.66k
     ego->timed_out = 0;
649
650
3.66k
     ++ego->nprob;
651
3.66k
     md5hash(&m, p, ego);
652
653
3.66k
     flags_of_solution = ego->flags;
654
655
3.66k
     if (ego->wisdom_state != WISDOM_IGNORE_ALL) {
656
3.66k
    if ((sol = hlookup(ego, m.s, &flags_of_solution))) { 
657
         /* wisdom is acceptable */
658
1.64k
         wisdom_state_t owisdom_state = ego->wisdom_state;
659
         
660
         /* this hook is mainly for MPI, to make sure that
661
      wisdom is in sync across all processes for MPI problems */
662
1.64k
         if (ego->wisdom_ok_hook && !ego->wisdom_ok_hook(p, sol->flags))
663
0
        goto do_search; /* ignore not-ok wisdom */
664
         
665
1.64k
         slvndx = SLVNDX(sol);
666
         
667
1.64k
         if (slvndx == INFEASIBLE_SLVNDX) {
668
6
        if (ego->wisdom_state == WISDOM_IGNORE_INFEASIBLE)
669
0
       goto do_search;
670
6
        else
671
6
       return 0;   /* known to be infeasible */
672
6
         }
673
         
674
1.63k
         flags_of_solution = sol->flags;
675
         
676
         /* inherit blessing either from wisdom
677
      or from the planner */
678
1.63k
         flags_of_solution.hash_info |= BLISS(ego->flags);
679
         
680
1.63k
         ego->wisdom_state = WISDOM_ONLY;
681
         
682
1.63k
         s = ego->slvdescs[slvndx].slv;
683
1.63k
         if (p->adt->problem_kind != s->adt->problem_kind)
684
0
        goto wisdom_is_bogus;
685
         
686
1.63k
         pln = invoke_solver(ego, p, s, &flags_of_solution);
687
         
688
1.63k
         CHECK_FOR_BOGOSITY;     /* catch error in child solvers */
689
         
690
1.63k
         sol = 0; /* Paranoia: SOL may be dangling after
691
         invoke_solver(); make sure we don't accidentally
692
         reuse it. */
693
         
694
1.63k
         if (!pln)
695
0
        goto wisdom_is_bogus;
696
         
697
1.63k
         ego->wisdom_state = owisdom_state;
698
         
699
1.63k
         goto skip_search;
700
1.63k
    }
701
2.02k
    else if (ego->nowisdom_hook) /* for MPI, make sure lack of wisdom */
702
0
         ego->nowisdom_hook(p);  /*   is in sync across all processes */
703
3.66k
     }
704
705
2.02k
 do_search:
706
     /* cannot search in WISDOM_ONLY mode */
707
2.02k
     if (ego->wisdom_state == WISDOM_ONLY)
708
0
    goto wisdom_is_bogus;
709
710
2.02k
     flags_of_solution = ego->flags;
711
2.02k
     pln = search(ego, p, &slvndx, &flags_of_solution);
712
2.02k
     CHECK_FOR_BOGOSITY;     /* catch error in child solvers */
713
714
2.02k
     if (ego->timed_out) {
715
0
    A(!pln);
716
0
    if (PLNR_TIMELIMIT_IMPATIENCE(ego) != 0) {
717
         /* record (below) that this plan has failed because of
718
      timeout */
719
0
         flags_of_solution.hash_info |= BLESSING;
720
0
    } else {
721
         /* this is not the top-level problem or timeout is not
722
      active: record no wisdom. */
723
0
         return 0;
724
0
    }
725
2.02k
     } else {
726
    /* canonicalize to infinite timeout */
727
2.02k
    flags_of_solution.timelimit_impatience = 0;
728
2.02k
     }
729
730
3.65k
 skip_search:
731
3.65k
     if (ego->wisdom_state == WISDOM_NORMAL ||
732
3.65k
   ego->wisdom_state == WISDOM_ONLY) {
733
3.65k
    if (pln) {
734
3.39k
         hinsert(ego, m.s, &flags_of_solution, slvndx);
735
3.39k
         invoke_hook(ego, pln, p, 1);
736
3.39k
    } else {
737
268
         hinsert(ego, m.s, &flags_of_solution, INFEASIBLE_SLVNDX);
738
268
    }
739
3.65k
     }
740
741
3.65k
     return pln;
742
743
0
 wisdom_is_bogus:
744
0
     X(plan_destroy_internal)(pln);
745
0
     ego->wisdom_state = WISDOM_IS_BOGUS;
746
0
     return 0;
747
2.02k
}
748
749
static void htab_destroy(hashtab *ht)
750
283
{
751
283
     X(ifree)(ht->solutions);
752
283
     ht->solutions = 0;
753
283
     ht->nelem = 0U;
754
283
}
755
756
static void mkhashtab(hashtab *ht)
757
285
{
758
285
     ht->nrehash = 0;
759
285
     ht->succ_lookup = ht->lookup = ht->lookup_iter = 0;
760
285
     ht->insert = ht->insert_iter = ht->insert_unknown = 0;
761
762
285
     ht->solutions = 0;
763
285
     ht->hashsiz = ht->nelem = 0U;
764
285
     hgrow(ht);     /* so that hashsiz > 0 */
765
285
}
766
767
/* destroy hash table entries.  If FORGET_EVERYTHING, destroy the whole
768
   table.  If FORGET_ACCURSED, then destroy entries that are not blessed. */
769
static void forget(planner *ego, amnesia a)
770
283
{
771
283
     switch (a) {
772
0
   case FORGET_EVERYTHING:
773
0
        htab_destroy(&ego->htab_blessed);
774
0
        mkhashtab(&ego->htab_blessed);
775
        /* fall through */
776
283
   case FORGET_ACCURSED:
777
283
        htab_destroy(&ego->htab_unblessed);
778
283
        mkhashtab(&ego->htab_unblessed);
779
283
        break;
780
0
   default:
781
0
        break;
782
283
     }
783
283
}
784
785
/* FIXME: what sort of version information should we write? */
786
#define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom))
787
static const char stimeout[] = "TIMEOUT";
788
789
/* tantus labor non sit cassus */
790
static void exprt(planner *ego, printer *p)
791
0
{
792
0
     unsigned h;
793
0
     hashtab *ht = &ego->htab_blessed;
794
0
     md5 m;
795
796
0
     signature_of_configuration(&m, ego);
797
798
0
     p->print(p, 
799
0
        "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n",
800
0
        m.s[0], m.s[1], m.s[2], m.s[3]);
801
802
0
     for (h = 0; h < ht->hashsiz; ++h) {
803
0
    solution *l = ht->solutions + h;
804
0
    if (LIVEP(l)) {
805
0
         const char *reg_nam;
806
0
         int reg_id;
807
808
0
         if (SLVNDX(l) == INFEASIBLE_SLVNDX) {
809
0
        reg_nam = stimeout;
810
0
        reg_id = 0;
811
0
         } else {
812
0
        slvdesc *sp = ego->slvdescs + SLVNDX(l);
813
0
        reg_nam = sp->reg_nam;
814
0
        reg_id = sp->reg_id;
815
0
         }
816
817
         /* qui salvandos salvas gratis
818
      salva me fons pietatis */
819
0
         p->print(p, "  (%s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)\n",
820
0
      reg_nam, reg_id, 
821
0
      l->flags.l, l->flags.u, l->flags.timelimit_impatience, 
822
0
      l->s[0], l->s[1], l->s[2], l->s[3]);
823
0
    }
824
0
     }
825
0
     p->print(p, ")\n");
826
0
}
827
828
/* mors stupebit et natura
829
   cum resurget creatura */
830
static int imprt(planner *ego, scanner *sc)
831
0
{
832
0
     char buf[MAXNAM + 1];
833
0
     md5uint sig[4];
834
0
     unsigned l, u, timelimit_impatience;
835
0
     flags_t flags;
836
0
     int reg_id;
837
0
     unsigned slvndx;
838
0
     hashtab *ht = &ego->htab_blessed;
839
0
     hashtab old;
840
0
     md5 m;
841
842
0
     if (!sc->scan(sc, 
843
0
       "(" WISDOM_PREAMBLE " #x%M #x%M #x%M #x%M\n",
844
0
       sig + 0, sig + 1, sig + 2, sig + 3))
845
0
    return 0; /* don't need to restore hashtable */
846
847
0
     signature_of_configuration(&m, ego);
848
0
     if (m.s[0] != sig[0] || m.s[1] != sig[1] ||
849
0
   m.s[2] != sig[2] || m.s[3] != sig[3]) {
850
    /* invalid configuration */
851
0
    return 0;
852
0
     }
853
     
854
     /* make a backup copy of the hash table (cache the hash) */
855
0
     {
856
0
    unsigned h, hsiz = ht->hashsiz;
857
0
    old = *ht;
858
0
    old.solutions = (solution *)MALLOC(hsiz * sizeof(solution), HASHT);
859
0
    for (h = 0; h < hsiz; ++h)
860
0
         old.solutions[h] = ht->solutions[h];
861
0
     }
862
863
0
     while (1) {
864
0
    if (sc->scan(sc, ")"))
865
0
         break;
866
867
    /* qua resurget ex favilla */
868
0
    if (!sc->scan(sc, "(%*s %d #x%x #x%x #x%x #x%M #x%M #x%M #x%M)",
869
0
      MAXNAM, buf, &reg_id, &l, &u, &timelimit_impatience,
870
0
      sig + 0, sig + 1, sig + 2, sig + 3))
871
0
         goto bad;
872
873
0
    if (!strcmp(buf, stimeout) && reg_id == 0) {
874
0
         slvndx = INFEASIBLE_SLVNDX;
875
0
    } else {
876
0
         if (timelimit_impatience != 0)
877
0
        goto bad;
878
879
0
         slvndx = slookup(ego, buf, reg_id);
880
0
         if (slvndx == INFEASIBLE_SLVNDX)
881
0
        goto bad;
882
0
    }
883
884
    /* inter oves locum praesta */
885
0
    flags.l = l;
886
0
    flags.u = u;
887
0
    flags.timelimit_impatience = timelimit_impatience;
888
0
    flags.hash_info = BLESSING;
889
890
0
    CK(flags.l == l);
891
0
    CK(flags.u == u);
892
0
    CK(flags.timelimit_impatience == timelimit_impatience);
893
894
0
    if (!hlookup(ego, sig, &flags))
895
0
         hinsert(ego, sig, &flags, slvndx);
896
0
     }
897
898
0
     X(ifree0)(old.solutions);
899
0
     return 1;
900
901
0
 bad:
902
     /* ``The wisdom of FFTW must be above suspicion.'' */
903
0
     X(ifree0)(ht->solutions);
904
0
     *ht = old;
905
0
     return 0;
906
0
}
907
908
/*
909
 * create a planner
910
 */
911
planner *X(mkplanner)(void)
912
1
{
913
1
     int i;
914
915
1
     static const planner_adt padt = {
916
1
    register_solver, mkplan, forget, exprt, imprt
917
1
     };
918
919
1
     planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS);
920
921
1
     p->adt = &padt;
922
1
     p->nplan = p->nprob = 0;
923
1
     p->pcost = p->epcost = 0.0;
924
1
     p->hook = 0;
925
1
     p->cost_hook = 0;
926
1
     p->wisdom_ok_hook = 0;
927
1
     p->nowisdom_hook = 0;
928
1
     p->bogosity_hook = 0;
929
1
     p->cur_reg_nam = 0;
930
1
     p->wisdom_state = WISDOM_NORMAL;
931
932
1
     p->slvdescs = 0;
933
1
     p->nslvdesc = p->slvdescsiz = 0;
934
935
1
     p->flags.l = 0;
936
1
     p->flags.u = 0;
937
1
     p->flags.timelimit_impatience = 0;
938
1
     p->flags.hash_info = 0;
939
1
     p->nthr = 1;
940
1
     p->need_timeout_check = 1;
941
1
     p->timelimit = -1;
942
943
1
     mkhashtab(&p->htab_blessed);
944
1
     mkhashtab(&p->htab_unblessed);
945
946
9
     for (i = 0; i < PROBLEM_LAST; ++i)
947
8
    p->slvdescs_for_problem_kind[i] = -1;
948
949
1
     return p;
950
1
}
951
952
void X(planner_destroy)(planner *ego)
953
0
{
954
     /* destroy hash table */
955
0
     htab_destroy(&ego->htab_blessed);
956
0
     htab_destroy(&ego->htab_unblessed);
957
958
     /* destroy solvdesc table */
959
0
     FORALL_SOLVERS(ego, s, sp, {
960
0
    UNUSED(sp);
961
0
    X(solver_destroy)(s);
962
0
     });
963
964
0
     X(ifree0)(ego->slvdescs);
965
0
     X(ifree)(ego); /* dona eis requiem */
966
0
}
967
968
plan *X(mkplan_d)(planner *ego, problem *p)
969
3.09k
{
970
3.09k
     plan *pln = ego->adt->mkplan(ego, p);
971
3.09k
     X(problem_destroy)(p);
972
3.09k
     return pln;
973
3.09k
}
974
975
/* like X(mkplan_d), but sets/resets flags as well */
976
plan *X(mkplan_f_d)(planner *ego, problem *p, 
977
        unsigned l_set, unsigned u_set, unsigned u_reset)
978
595
{
979
595
     flags_t oflags = ego->flags;
980
595
     plan *pln;
981
982
595
     PLNR_U(ego) &= ~u_reset;
983
595
     PLNR_L(ego) &= ~u_reset;
984
595
     PLNR_L(ego) |= l_set;
985
595
     PLNR_U(ego) |= u_set | l_set;
986
595
     pln = X(mkplan_d)(ego, p);
987
595
     ego->flags = oflags;
988
595
     return pln;
989
595
}
990
991
/*
992
 * Debugging code:
993
 */
994
#ifdef FFTW_DEBUG
995
static void check(hashtab *ht)
996
{
997
     unsigned live = 0;
998
     unsigned i;
999
1000
     A(ht->nelem < ht->hashsiz);
1001
1002
     for (i = 0; i < ht->hashsiz; ++i) {
1003
    solution *l = ht->solutions + i; 
1004
    if (LIVEP(l)) 
1005
         ++live; 
1006
     }
1007
1008
     A(ht->nelem == live);
1009
1010
     for (i = 0; i < ht->hashsiz; ++i) {
1011
    solution *l1 = ht->solutions + i; 
1012
    int foundit = 0;
1013
    if (LIVEP(l1)) {
1014
         unsigned g, h = h1(ht, l1->s), d = h2(ht, l1->s);
1015
1016
         g = h;
1017
         do {
1018
        solution *l = ht->solutions + g;
1019
        if (VALIDP(l)) {
1020
       if (l1 == l)
1021
            foundit = 1;
1022
       else if (LIVEP(l) && md5eq(l1->s, l->s)) {
1023
            A(!subsumes(&l->flags, SLVNDX(l), &l1->flags));
1024
            A(!subsumes(&l1->flags, SLVNDX(l1), &l->flags));
1025
       }
1026
        } else 
1027
       break;
1028
        g = addmod(g, d, ht->hashsiz);
1029
         } while (g != h);
1030
1031
         A(foundit);
1032
    }
1033
     }
1034
}
1035
#endif