Coverage Report

Created: 2024-11-21 06:47

/src/libgmp/mpn/compute_powtab.c
Line
Count
Source
1
/* mpn_compute_powtab.
2
3
   Contributed to the GNU project by Torbjorn Granlund.
4
5
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 1991-2017 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
/*
38
  CAVEATS:
39
  * The exptab and powtab vectors are in opposite orders.  Probably OK.
40
  * Consider getting rid of exptab, doing bit ops on the un argument instead.
41
  * Consider rounding greatest power slightly upwards to save adjustments.
42
  * In powtab_decide, consider computing cost from just the 2-3 largest
43
    operands, since smaller operand contribute little.  This makes most sense
44
    if exptab is suppressed.
45
*/
46
47
#include "gmp-impl.h"
48
49
#ifndef DIV_1_VS_MUL_1_PERCENT
50
#define DIV_1_VS_MUL_1_PERCENT 150
51
#endif
52
53
#define SET_powers_t(dest, ptr, size, dib, b, sh) \
54
2.13M
  do {             \
55
2.13M
    dest.p = ptr;         \
56
2.13M
    dest.n = size;          \
57
2.13M
    dest.digits_in_base = dib;        \
58
2.13M
    dest.base = b;          \
59
2.13M
    dest.shift = sh;          \
60
2.13M
  } while (0)
61
62
#if DIV_1_VS_MUL_1_PERCENT > 120
63
#define HAVE_mpn_compute_powtab_mul 1
64
static void
65
mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
66
      int base, const size_t *exptab, size_t n_pows)
67
359k
{
68
359k
  mp_size_t n;
69
359k
  mp_ptr p, t;
70
359k
  mp_limb_t cy;
71
359k
  long start_idx;
72
359k
  int c;
73
74
359k
  mp_limb_t big_base = mp_bases[base].big_base;
75
359k
  int chars_per_limb = mp_bases[base].chars_per_limb;
76
77
359k
  mp_ptr powtab_mem_ptr = powtab_mem;
78
79
359k
  size_t digits_in_base = chars_per_limb;
80
81
359k
  powers_t *pt = powtab;
82
83
359k
  p = powtab_mem_ptr;
84
359k
  powtab_mem_ptr += 1;
85
359k
  p[0] = big_base;
86
87
359k
  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
88
359k
  pt++;
89
90
359k
  t = powtab_mem_ptr;
91
359k
  powtab_mem_ptr += 2;
92
359k
  t[1] = mpn_mul_1 (t, p, 1, big_base);
93
359k
  n = 2;
94
95
359k
  digits_in_base *= 2;
96
97
359k
  c = t[0] == 0;
98
359k
  t += c;
99
359k
  n -= c;
100
359k
  mp_size_t shift = c;
101
102
359k
  SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
103
359k
  p = t;
104
359k
  pt++;
105
106
359k
  if (exptab[0] == ((size_t) chars_per_limb << n_pows))
107
121k
    {
108
121k
      start_idx = n_pows - 2;
109
121k
    }
110
238k
  else
111
238k
    {
112
238k
      if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
113
183k
  {
114
    /* 3, sometimes adjusted to 4.  */
115
183k
    t = powtab_mem_ptr;
116
183k
    powtab_mem_ptr += 4;
117
183k
    t[n] = cy = mpn_mul_1 (t, p, n, big_base);
118
183k
    n += cy != 0;;
119
120
183k
    digits_in_base += chars_per_limb;
121
122
183k
    c  = t[0] == 0;
123
183k
    t += c;
124
183k
    n -= c;
125
183k
    shift += c;
126
183k
  }
127
54.2k
      else
128
54.2k
  {
129
    /* 2 copy, will always become 3 with back-multiplication.  */
130
54.2k
    t = powtab_mem_ptr;
131
54.2k
    powtab_mem_ptr += 3;
132
54.2k
    t[0] = p[0];
133
54.2k
    t[1] = p[1];
134
54.2k
  }
135
136
238k
      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
137
238k
      p = t;
138
238k
      pt++;
139
238k
      start_idx = n_pows - 3;
140
238k
    }
141
142
1.53M
  for (long pi = start_idx; pi >= 0; pi--)
143
1.18M
    {
144
1.18M
      t = powtab_mem_ptr;
145
1.18M
      powtab_mem_ptr += 2 * n + 2;
146
147
1.18M
      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
148
149
1.18M
      mpn_sqr (t, p, n);
150
151
1.18M
      digits_in_base *= 2;
152
1.18M
      n *= 2;
153
1.18M
      n -= t[n - 1] == 0;
154
1.18M
      shift *= 2;
155
156
1.18M
      c = t[0] == 0;
157
1.18M
      t += c;
158
1.18M
      n -= c;
159
1.18M
      shift += c;
160
161
      /* Adjust new value if it is too small as input to the next squaring.  */
162
1.18M
      if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
163
413k
  {
164
413k
    t[n] = cy = mpn_mul_1 (t, t, n, big_base);
165
413k
    n += cy != 0;
166
167
413k
    digits_in_base += chars_per_limb;
168
169
413k
    c  = t[0] == 0;
170
413k
    t += c;
171
413k
    n -= c;
172
413k
    shift += c;
173
413k
  }
174
175
1.18M
      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
176
177
      /* Adjust previous value if it is not at its target power.  */
178
1.18M
      if (pt[-1].digits_in_base < exptab[pi + 1])
179
601k
  {
180
601k
    mp_size_t n = pt[-1].n;
181
601k
    mp_ptr p = pt[-1].p;
182
601k
    p[n] = cy = mpn_mul_1 (p, p, n, big_base);
183
601k
    n += cy != 0;
184
185
601k
    ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
186
601k
    pt[-1].digits_in_base = exptab[pi + 1];
187
188
601k
    c = p[0] == 0;
189
601k
    pt[-1].p = p + c;
190
601k
    pt[-1].n = n - c;
191
601k
    pt[-1].shift += c;
192
601k
  }
193
194
1.18M
      p = t;
195
1.18M
      pt++;
196
1.18M
    }
197
359k
}
198
#endif
199
200
#if DIV_1_VS_MUL_1_PERCENT < 275
201
#define HAVE_mpn_compute_powtab_div 1
202
static void
203
mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
204
      int base, const size_t *exptab, size_t n_pows)
205
{
206
  mp_ptr p, t;
207
208
  mp_limb_t big_base = mp_bases[base].big_base;
209
  int chars_per_limb = mp_bases[base].chars_per_limb;
210
211
  mp_ptr powtab_mem_ptr = powtab_mem;
212
213
  size_t digits_in_base = chars_per_limb;
214
215
  powers_t *pt = powtab;
216
217
  p = powtab_mem_ptr;
218
  powtab_mem_ptr += 1;
219
  p[0] = big_base;
220
221
  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
222
  pt++;
223
224
  mp_size_t n = 1;
225
  mp_size_t shift = 0;
226
  for (long pi = n_pows - 1; pi >= 0; pi--)
227
    {
228
      t = powtab_mem_ptr;
229
      powtab_mem_ptr += 2 * n;
230
231
      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
232
233
      mpn_sqr (t, p, n);
234
      n = 2 * n - 1; n += t[n] != 0;
235
      digits_in_base *= 2;
236
237
      if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */
238
  {
239
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
240
    if (__GMP_LIKELY (base == 10))
241
      mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
242
            MP_BASES_BIG_BASE_BINVERTED_10,
243
            MP_BASES_BIG_BASE_CTZ_10);
244
    else
245
#endif
246
      /* FIXME: We could use _pi1 here if we add big_base_binverted and
247
         big_base_ctz fields to struct bases.  That would add about 2 KiB
248
         to mp_bases.c.
249
         FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
250
         mpn_bdiv_q_1 for more machines. */
251
      mpn_divexact_1 (t, t, n, big_base);
252
253
    n -= t[n - 1] == 0;
254
    digits_in_base -= chars_per_limb;
255
  }
256
257
      shift *= 2;
258
      /* Strip low zero limbs, but be careful to keep the result divisible by
259
   big_base.  */
260
      while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
261
  {
262
    t++;
263
    n--;
264
    shift++;
265
  }
266
      p = t;
267
268
      SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
269
      pt++;
270
    }
271
272
  /* Strip any remaining low zero limbs.  */
273
  pt -= n_pows + 1;
274
  for (long pi = n_pows; pi >= 0; pi--)
275
    {
276
      mp_ptr t = pt[pi].p;
277
      mp_size_t shift = pt[pi].shift;
278
      mp_size_t n = pt[pi].n;
279
      int c;
280
      c = t[0] == 0;
281
      t += c;
282
      n -= c;
283
      shift += c;
284
      pt[pi].p = t;
285
      pt[pi].shift = shift;
286
      pt[pi].n = n;
287
    }
288
}
289
#endif
290
291
static long
292
powtab_decide (size_t *exptab, size_t un, int base)
293
359k
{
294
359k
  int chars_per_limb = mp_bases[base].chars_per_limb;
295
359k
  long n_pows = 0;
296
2.13M
  for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
297
1.77M
    {
298
1.77M
      exptab[n_pows] = pn * chars_per_limb;
299
1.77M
      n_pows++;
300
1.77M
    }
301
359k
  exptab[n_pows] = chars_per_limb;
302
303
#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
304
  size_t pn = un - 1;
305
  size_t xn = (un + 1) >> 1;
306
  unsigned mcost = 1;
307
  unsigned dcost = 1;
308
  for (long i = n_pows - 2; i >= 0; i--)
309
    {
310
      size_t pow = (pn >> (i + 1)) + 1;
311
312
      if (pow & 1)
313
  dcost += pow;
314
315
      if (xn != (pow << i))
316
  {
317
    if (pow > 2 && (pow & 1) == 0)
318
      mcost += 2 * pow;
319
    else
320
      mcost += pow;
321
  }
322
      else
323
  {
324
    if (pow & 1)
325
      mcost += pow;
326
  }
327
    }
328
329
  dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
330
331
  if (mcost <= dcost)
332
    return n_pows;
333
  else
334
    return -n_pows;
335
#elif HAVE_mpn_compute_powtab_mul
336
359k
  return n_pows;
337
#elif HAVE_mpn_compute_powtab_div
338
  return -n_pows;
339
#else
340
#error "no powtab function available"
341
#endif
342
359k
}
343
344
size_t
345
mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
346
359k
{
347
359k
  size_t exptab[GMP_LIMB_BITS];
348
349
359k
  long n_pows = powtab_decide (exptab, un, base);
350
351
#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
352
  if (n_pows >= 0)
353
    {
354
      mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
355
      return n_pows;
356
    }
357
  else
358
    {
359
      mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
360
      return -n_pows;
361
    }
362
#elif HAVE_mpn_compute_powtab_mul
363
359k
  ASSERT (n_pows > 0);
364
359k
  mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
365
359k
  return n_pows;
366
#elif HAVE_mpn_compute_powtab_div
367
  ASSERT (n_pows < 0);
368
  mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
369
  return -n_pows;
370
#else
371
#error "no powtab function available"
372
#endif
373
359k
}