Coverage Report

Created: 2025-07-23 06:43

/src/gmp/mpn/compute_powtab.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_compute_powtab.
2
3
   Contributed to the GNU project by Torbjorn Granlund.
4
5
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 1991-2017 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
/*
38
  CAVEATS:
39
  * The exptab and powtab vectors are in opposite orders.  Probably OK.
40
  * Consider getting rid of exptab, doing bit ops on the un argument instead.
41
  * Consider rounding greatest power slightly upwards to save adjustments.
42
  * In powtab_decide, consider computing cost from just the 2-3 largest
43
    operands, since smaller operand contribute little.  This makes most sense
44
    if exptab is suppressed.
45
*/
46
47
#include "gmp-impl.h"
48
49
#ifndef DIV_1_VS_MUL_1_PERCENT
50
0
#define DIV_1_VS_MUL_1_PERCENT 150
51
#endif
52
53
#define SET_powers_t(dest, ptr, size, dib, b, sh) \
54
0
  do {             \
55
0
    dest.p = ptr;         \
56
0
    dest.n = size;          \
57
0
    dest.digits_in_base = dib;        \
58
0
    dest.base = b;          \
59
0
    dest.shift = sh;          \
60
0
  } while (0)
61
62
#if DIV_1_VS_MUL_1_PERCENT > 120
63
#define HAVE_mpn_compute_powtab_mul 1
64
static void
65
mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
66
      int base, const size_t *exptab, size_t n_pows)
67
0
{
68
0
  mp_size_t n;
69
0
  mp_ptr p, t;
70
0
  mp_limb_t cy;
71
0
  long start_idx;
72
0
  int c;
73
74
0
  mp_limb_t big_base = mp_bases[base].big_base;
75
0
  int chars_per_limb = mp_bases[base].chars_per_limb;
76
77
0
  mp_ptr powtab_mem_ptr = powtab_mem;
78
79
0
  size_t digits_in_base = chars_per_limb;
80
81
0
  powers_t *pt = powtab;
82
83
0
  p = powtab_mem_ptr;
84
0
  powtab_mem_ptr += 1;
85
0
  p[0] = big_base;
86
87
0
  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
88
0
  pt++;
89
90
0
  t = powtab_mem_ptr;
91
0
  powtab_mem_ptr += 2;
92
0
  t[1] = mpn_mul_1 (t, p, 1, big_base);
93
0
  n = 2;
94
95
0
  digits_in_base *= 2;
96
97
0
  c = t[0] == 0;
98
0
  t += c;
99
0
  n -= c;
100
0
  mp_size_t shift = c;
101
102
0
  SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
103
0
  p = t;
104
0
  pt++;
105
106
0
  if (exptab[0] == ((size_t) chars_per_limb << n_pows))
107
0
    {
108
0
      start_idx = n_pows - 2;
109
0
    }
110
0
  else
111
0
    {
112
0
      if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
113
0
  {
114
    /* 3, sometimes adjusted to 4.  */
115
0
    t = powtab_mem_ptr;
116
0
    powtab_mem_ptr += 4;
117
0
    t[n] = cy = mpn_mul_1 (t, p, n, big_base);
118
0
    n += cy != 0;;
119
120
0
    digits_in_base += chars_per_limb;
121
122
0
    c  = t[0] == 0;
123
0
    t += c;
124
0
    n -= c;
125
0
    shift += c;
126
0
  }
127
0
      else
128
0
  {
129
    /* 2 copy, will always become 3 with back-multiplication.  */
130
0
    t = powtab_mem_ptr;
131
0
    powtab_mem_ptr += 3;
132
0
    t[0] = p[0];
133
0
    t[1] = p[1];
134
0
  }
135
136
0
      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
137
0
      p = t;
138
0
      pt++;
139
0
      start_idx = n_pows - 3;
140
0
    }
141
142
0
  for (long pi = start_idx; pi >= 0; pi--)
143
0
    {
144
0
      t = powtab_mem_ptr;
145
0
      powtab_mem_ptr += 2 * n + 2;
146
147
0
      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
148
149
0
      mpn_sqr (t, p, n);
150
151
0
      digits_in_base *= 2;
152
0
      n *= 2;
153
0
      n -= t[n - 1] == 0;
154
0
      shift *= 2;
155
156
0
      c = t[0] == 0;
157
0
      t += c;
158
0
      n -= c;
159
0
      shift += c;
160
161
      /* Adjust new value if it is too small as input to the next squaring.  */
162
0
      if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
163
0
  {
164
0
    t[n] = cy = mpn_mul_1 (t, t, n, big_base);
165
0
    n += cy != 0;
166
167
0
    digits_in_base += chars_per_limb;
168
169
0
    c  = t[0] == 0;
170
0
    t += c;
171
0
    n -= c;
172
0
    shift += c;
173
0
  }
174
175
0
      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
176
177
      /* Adjust previous value if it is not at its target power.  */
178
0
      if (pt[-1].digits_in_base < exptab[pi + 1])
179
0
  {
180
0
    mp_size_t n = pt[-1].n;
181
0
    mp_ptr p = pt[-1].p;
182
0
    p[n] = cy = mpn_mul_1 (p, p, n, big_base);
183
0
    n += cy != 0;
184
185
0
    ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
186
0
    pt[-1].digits_in_base = exptab[pi + 1];
187
188
0
    c = p[0] == 0;
189
0
    pt[-1].p = p + c;
190
0
    pt[-1].n = n - c;
191
0
    pt[-1].shift += c;
192
0
  }
193
194
0
      p = t;
195
0
      pt++;
196
0
    }
197
0
}
198
#endif
199
200
#if DIV_1_VS_MUL_1_PERCENT < 275
201
#define HAVE_mpn_compute_powtab_div 1
202
static void
203
mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
204
      int base, const size_t *exptab, size_t n_pows)
205
0
{
206
0
  mp_ptr p, t;
207
208
0
  mp_limb_t big_base = mp_bases[base].big_base;
209
0
  int chars_per_limb = mp_bases[base].chars_per_limb;
210
211
0
  mp_ptr powtab_mem_ptr = powtab_mem;
212
213
0
  size_t digits_in_base = chars_per_limb;
214
215
0
  powers_t *pt = powtab;
216
217
0
  p = powtab_mem_ptr;
218
0
  powtab_mem_ptr += 1;
219
0
  p[0] = big_base;
220
221
0
  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
222
0
  pt++;
223
224
0
  mp_size_t n = 1;
225
0
  mp_size_t shift = 0;
226
0
  for (long pi = n_pows - 1; pi >= 0; pi--)
227
0
    {
228
0
      t = powtab_mem_ptr;
229
0
      powtab_mem_ptr += 2 * n;
230
231
0
      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
232
233
0
      mpn_sqr (t, p, n);
234
0
      n = 2 * n - 1; n += t[n] != 0;
235
0
      digits_in_base *= 2;
236
237
0
      if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */
238
0
  {
239
0
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
240
0
    if (__GMP_LIKELY (base == 10))
241
0
      mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
242
0
            MP_BASES_BIG_BASE_BINVERTED_10,
243
0
            MP_BASES_BIG_BASE_CTZ_10);
244
0
    else
245
0
#endif
246
      /* FIXME: We could use _pi1 here if we add big_base_binverted and
247
         big_base_ctz fields to struct bases.  That would add about 2 KiB
248
         to mp_bases.c.
249
         FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
250
         mpn_bdiv_q_1 for more machines. */
251
0
      mpn_divexact_1 (t, t, n, big_base);
252
253
0
    n -= t[n - 1] == 0;
254
0
    digits_in_base -= chars_per_limb;
255
0
  }
256
257
0
      shift *= 2;
258
      /* Strip low zero limbs, but be careful to keep the result divisible by
259
   big_base.  */
260
0
      while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
261
0
  {
262
0
    t++;
263
0
    n--;
264
0
    shift++;
265
0
  }
266
0
      p = t;
267
268
0
      SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
269
0
      pt++;
270
0
    }
271
272
  /* Strip any remaining low zero limbs.  */
273
0
  pt -= n_pows + 1;
274
0
  for (long pi = n_pows; pi >= 0; pi--)
275
0
    {
276
0
      mp_ptr t = pt[pi].p;
277
0
      mp_size_t shift = pt[pi].shift;
278
0
      mp_size_t n = pt[pi].n;
279
0
      int c;
280
0
      c = t[0] == 0;
281
0
      t += c;
282
0
      n -= c;
283
0
      shift += c;
284
0
      pt[pi].p = t;
285
0
      pt[pi].shift = shift;
286
0
      pt[pi].n = n;
287
0
    }
288
0
}
289
#endif
290
291
static long
292
powtab_decide (size_t *exptab, size_t un, int base)
293
0
{
294
0
  int chars_per_limb = mp_bases[base].chars_per_limb;
295
0
  long n_pows = 0;
296
0
  for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
297
0
    {
298
0
      exptab[n_pows] = pn * chars_per_limb;
299
0
      n_pows++;
300
0
    }
301
0
  exptab[n_pows] = chars_per_limb;
302
303
0
#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
304
0
  size_t pn = un - 1;
305
0
  size_t xn = (un + 1) >> 1;
306
0
  unsigned mcost = 1;
307
0
  unsigned dcost = 1;
308
0
  for (long i = n_pows - 2; i >= 0; i--)
309
0
    {
310
0
      size_t pow = (pn >> (i + 1)) + 1;
311
312
0
      if (pow & 1)
313
0
  dcost += pow;
314
315
0
      if (xn != (pow << i))
316
0
  {
317
0
    if (pow > 2 && (pow & 1) == 0)
318
0
      mcost += 2 * pow;
319
0
    else
320
0
      mcost += pow;
321
0
  }
322
0
      else
323
0
  {
324
0
    if (pow & 1)
325
0
      mcost += pow;
326
0
  }
327
0
    }
328
329
0
  dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
330
331
0
  if (mcost <= dcost)
332
0
    return n_pows;
333
0
  else
334
0
    return -n_pows;
335
#elif HAVE_mpn_compute_powtab_mul
336
  return n_pows;
337
#elif HAVE_mpn_compute_powtab_div
338
  return -n_pows;
339
#else
340
#error "no powtab function available"
341
#endif
342
0
}
343
344
size_t
345
mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
346
0
{
347
0
  size_t exptab[GMP_LIMB_BITS];
348
349
0
  long n_pows = powtab_decide (exptab, un, base);
350
351
0
#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
352
0
  if (n_pows >= 0)
353
0
    {
354
0
      mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
355
0
      return n_pows;
356
0
    }
357
0
  else
358
0
    {
359
0
      mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
360
0
      return -n_pows;
361
0
    }
362
#elif HAVE_mpn_compute_powtab_mul
363
  ASSERT (n_pows > 0);
364
  mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
365
  return n_pows;
366
#elif HAVE_mpn_compute_powtab_div
367
  ASSERT (n_pows < 0);
368
  mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
369
  return -n_pows;
370
#else
371
#error "no powtab function available"
372
#endif
373
0
}