Coverage Report

Created: 2023-02-22 06:39

/src/gmp-6.2.1/mpn/compute_powtab.c
Line
Count
Source
1
/* mpn_compute_powtab.
2
3
   Contributed to the GNU project by Torbjorn Granlund.
4
5
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 1991-2017 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
/*
38
  CAVEATS:
39
  * The exptab and powtab vectors are in opposite orders.  Probably OK.
40
  * Consider getting rid of exptab, doing bit ops on the un argument instead.
41
  * Consider rounding greatest power slightly upwards to save adjustments.
42
  * In powtab_decide, consider computing cost from just the 2-3 largest
43
    operands, since smaller operand contribute little.  This makes most sense
44
    if exptab is suppressed.
45
*/
46
47
#include "gmp-impl.h"
48
49
#ifndef DIV_1_VS_MUL_1_PERCENT
50
#define DIV_1_VS_MUL_1_PERCENT 150
51
#endif
52
53
#define SET_powers_t(dest, ptr, size, dib, b, sh) \
54
19.8k
  do {             \
55
19.8k
    dest.p = ptr;         \
56
19.8k
    dest.n = size;          \
57
19.8k
    dest.digits_in_base = dib;        \
58
19.8k
    dest.base = b;          \
59
19.8k
    dest.shift = sh;          \
60
19.8k
  } while (0)
61
62
#if DIV_1_VS_MUL_1_PERCENT > 120
63
#define HAVE_mpn_compute_powtab_mul 1
64
static void
65
mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
66
      int base, const size_t *exptab, size_t n_pows)
67
2.58k
{
68
2.58k
  mp_size_t n;
69
2.58k
  mp_ptr p, t;
70
2.58k
  mp_limb_t cy;
71
2.58k
  long start_idx;
72
2.58k
  int c;
73
2.58k
  mp_size_t shift;
74
2.58k
  long pi;
75
76
2.58k
  mp_limb_t big_base = mp_bases[base].big_base;
77
2.58k
  int chars_per_limb = mp_bases[base].chars_per_limb;
78
79
2.58k
  mp_ptr powtab_mem_ptr = powtab_mem;
80
81
2.58k
  size_t digits_in_base = chars_per_limb;
82
83
2.58k
  powers_t *pt = powtab;
84
85
2.58k
  p = powtab_mem_ptr;
86
2.58k
  powtab_mem_ptr += 1;
87
2.58k
  p[0] = big_base;
88
89
2.58k
  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
90
2.58k
  pt++;
91
92
2.58k
  t = powtab_mem_ptr;
93
2.58k
  powtab_mem_ptr += 2;
94
2.58k
  t[1] = mpn_mul_1 (t, p, 1, big_base);
95
2.58k
  n = 2;
96
97
2.58k
  digits_in_base *= 2;
98
99
2.58k
  c = t[0] == 0;
100
2.58k
  t += c;
101
2.58k
  n -= c;
102
2.58k
  shift = c;
103
104
2.58k
  SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
105
2.58k
  p = t;
106
2.58k
  pt++;
107
108
2.58k
  if (exptab[0] == ((size_t) chars_per_limb << n_pows))
109
82
    {
110
82
      start_idx = n_pows - 2;
111
82
    }
112
2.50k
  else
113
2.50k
    {
114
2.50k
      if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
115
1.18k
  {
116
    /* 3, sometimes adjusted to 4.  */
117
1.18k
    t = powtab_mem_ptr;
118
1.18k
    powtab_mem_ptr += 4;
119
1.18k
    t[n] = cy = mpn_mul_1 (t, p, n, big_base);
120
1.18k
    n += cy != 0;;
121
122
1.18k
    digits_in_base += chars_per_limb;
123
124
1.18k
    c  = t[0] == 0;
125
1.18k
    t += c;
126
1.18k
    n -= c;
127
1.18k
    shift += c;
128
1.18k
  }
129
1.31k
      else
130
1.31k
  {
131
    /* 2 copy, will always become 3 with back-multiplication.  */
132
1.31k
    t = powtab_mem_ptr;
133
1.31k
    powtab_mem_ptr += 3;
134
1.31k
    t[0] = p[0];
135
1.31k
    t[1] = p[1];
136
1.31k
  }
137
138
2.50k
      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
139
2.50k
      p = t;
140
2.50k
      pt++;
141
2.50k
      start_idx = n_pows - 3;
142
2.50k
    }
143
144
14.7k
  for (pi = start_idx; pi >= 0; pi--)
145
12.1k
    {
146
12.1k
      t = powtab_mem_ptr;
147
12.1k
      powtab_mem_ptr += 2 * n + 2;
148
149
12.1k
      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
150
151
12.1k
      mpn_sqr (t, p, n);
152
153
12.1k
      digits_in_base *= 2;
154
12.1k
      n *= 2;
155
12.1k
      n -= t[n - 1] == 0;
156
12.1k
      shift *= 2;
157
158
12.1k
      c = t[0] == 0;
159
12.1k
      t += c;
160
12.1k
      n -= c;
161
12.1k
      shift += c;
162
163
      /* Adjust new value if it is too small as input to the next squaring.  */
164
12.1k
      if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
165
5.14k
  {
166
5.14k
    t[n] = cy = mpn_mul_1 (t, t, n, big_base);
167
5.14k
    n += cy != 0;
168
169
5.14k
    digits_in_base += chars_per_limb;
170
171
5.14k
    c  = t[0] == 0;
172
5.14k
    t += c;
173
5.14k
    n -= c;
174
5.14k
    shift += c;
175
5.14k
  }
176
177
12.1k
      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
178
179
      /* Adjust previous value if it is not at its target power.  */
180
12.1k
      if (pt[-1].digits_in_base < exptab[pi + 1])
181
9.91k
  {
182
9.91k
    mp_size_t n = pt[-1].n;
183
9.91k
    mp_ptr p = pt[-1].p;
184
9.91k
    p[n] = cy = mpn_mul_1 (p, p, n, big_base);
185
9.91k
    n += cy != 0;
186
187
9.91k
    ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
188
9.91k
    pt[-1].digits_in_base = exptab[pi + 1];
189
190
9.91k
    c = p[0] == 0;
191
9.91k
    pt[-1].p = p + c;
192
9.91k
    pt[-1].n = n - c;
193
9.91k
    pt[-1].shift += c;
194
9.91k
  }
195
196
12.1k
      p = t;
197
12.1k
      pt++;
198
12.1k
    }
199
2.58k
}
200
#endif
201
202
#if DIV_1_VS_MUL_1_PERCENT < 275
203
#define HAVE_mpn_compute_powtab_div 1
204
static void
205
mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
206
      int base, const size_t *exptab, size_t n_pows)
207
{
208
  mp_ptr p, t;
209
210
  mp_limb_t big_base = mp_bases[base].big_base;
211
  int chars_per_limb = mp_bases[base].chars_per_limb;
212
213
  mp_ptr powtab_mem_ptr = powtab_mem;
214
215
  size_t digits_in_base = chars_per_limb;
216
217
  powers_t *pt = powtab;
218
219
  mp_size_t n = 1;
220
  mp_size_t shift = 0;
221
  long pi;
222
223
  p = powtab_mem_ptr;
224
  powtab_mem_ptr += 1;
225
  p[0] = big_base;
226
227
  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
228
  pt++;
229
230
  for (pi = n_pows - 1; pi >= 0; pi--)
231
    {
232
      t = powtab_mem_ptr;
233
      powtab_mem_ptr += 2 * n;
234
235
      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
236
237
      mpn_sqr (t, p, n);
238
      n = 2 * n - 1; n += t[n] != 0;
239
      digits_in_base *= 2;
240
241
      if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */
242
  {
243
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
244
    if (__GMP_LIKELY (base == 10))
245
      mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
246
            MP_BASES_BIG_BASE_BINVERTED_10,
247
            MP_BASES_BIG_BASE_CTZ_10);
248
    else
249
#endif
250
      /* FIXME: We could use _pi1 here if we add big_base_binverted and
251
         big_base_ctz fields to struct bases.  That would add about 2 KiB
252
         to mp_bases.c.
253
         FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
254
         mpn_bdiv_q_1 for more machines. */
255
      mpn_divexact_1 (t, t, n, big_base);
256
257
    n -= t[n - 1] == 0;
258
    digits_in_base -= chars_per_limb;
259
  }
260
261
      shift *= 2;
262
      /* Strip low zero limbs, but be careful to keep the result divisible by
263
   big_base.  */
264
      while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
265
  {
266
    t++;
267
    n--;
268
    shift++;
269
  }
270
      p = t;
271
272
      SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
273
      pt++;
274
    }
275
276
  /* Strip any remaining low zero limbs.  */
277
  pt -= n_pows + 1;
278
  for (pi = n_pows; pi >= 0; pi--)
279
    {
280
      mp_ptr t = pt[pi].p;
281
      mp_size_t shift = pt[pi].shift;
282
      mp_size_t n = pt[pi].n;
283
      int c;
284
      c = t[0] == 0;
285
      t += c;
286
      n -= c;
287
      shift += c;
288
      pt[pi].p = t;
289
      pt[pi].shift = shift;
290
      pt[pi].n = n;
291
    }
292
}
293
#endif
294
295
static long
296
powtab_decide (size_t *exptab, size_t un, int base)
297
2.58k
{
298
2.58k
  int chars_per_limb = mp_bases[base].chars_per_limb;
299
2.58k
  long n_pows = 0;
300
2.58k
  size_t pn;
301
19.8k
  for (pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
302
17.2k
    {
303
17.2k
      exptab[n_pows] = pn * chars_per_limb;
304
17.2k
      n_pows++;
305
17.2k
    }
306
2.58k
  exptab[n_pows] = chars_per_limb;
307
308
#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
309
  {
310
  size_t pn = un - 1;
311
  size_t xn = (un + 1) >> 1;
312
  unsigned mcost = 1;
313
  unsigned dcost = 1;
314
  long i;
315
  for (i = n_pows - 2; i >= 0; i--)
316
    {
317
      size_t pow = (pn >> (i + 1)) + 1;
318
319
      if (pow & 1)
320
  dcost += pow;
321
322
      if (xn != (pow << i))
323
  {
324
    if (pow > 2 && (pow & 1) == 0)
325
      mcost += 2 * pow;
326
    else
327
      mcost += pow;
328
  }
329
      else
330
  {
331
    if (pow & 1)
332
      mcost += pow;
333
  }
334
    }
335
336
  dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
337
338
  if (mcost <= dcost)
339
    return n_pows;
340
  else
341
    return -n_pows;
342
  }
343
#elif HAVE_mpn_compute_powtab_mul
344
2.58k
  return n_pows;
345
#elif HAVE_mpn_compute_powtab_div
346
  return -n_pows;
347
#else
348
#error "no powtab function available"
349
#endif
350
2.58k
}
351
352
size_t
353
mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
354
2.58k
{
355
2.58k
  size_t exptab[GMP_LIMB_BITS];
356
357
2.58k
  long n_pows = powtab_decide (exptab, un, base);
358
359
#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
360
  if (n_pows >= 0)
361
    {
362
      mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
363
      return n_pows;
364
    }
365
  else
366
    {
367
      mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
368
      return -n_pows;
369
    }
370
#elif HAVE_mpn_compute_powtab_mul
371
2.58k
  ASSERT (n_pows > 0);
372
2.58k
  mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
373
2.58k
  return n_pows;
374
#elif HAVE_mpn_compute_powtab_div
375
  ASSERT (n_pows < 0);
376
  mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
377
  return -n_pows;
378
#else
379
#error "no powtab function available"
380
#endif
381
2.58k
}