Coverage Report

Created: 2025-03-06 07:58

/src/gmp/mpn/mu_bdiv_q.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_mu_bdiv_q(qp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^nn.
2
   storing the result in {qp,nn}.  Overlap allowed between Q and N; all other
3
   overlap disallowed.
4
5
   Contributed to the GNU project by Torbjorn Granlund.
6
7
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
8
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
9
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
10
11
Copyright 2005-2007, 2009, 2010, 2017 Free Software Foundation, Inc.
12
13
This file is part of the GNU MP Library.
14
15
The GNU MP Library is free software; you can redistribute it and/or modify
16
it under the terms of either:
17
18
  * the GNU Lesser General Public License as published by the Free
19
    Software Foundation; either version 3 of the License, or (at your
20
    option) any later version.
21
22
or
23
24
  * the GNU General Public License as published by the Free Software
25
    Foundation; either version 2 of the License, or (at your option) any
26
    later version.
27
28
or both in parallel, as here.
29
30
The GNU MP Library is distributed in the hope that it will be useful, but
31
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
33
for more details.
34
35
You should have received copies of the GNU General Public License and the
36
GNU Lesser General Public License along with the GNU MP Library.  If not,
37
see https://www.gnu.org/licenses/.  */
38
39
40
/*
41
   The idea of the algorithm used herein is to compute a smaller inverted value
42
   than used in the standard Barrett algorithm, and thus save time in the
43
   Newton iterations, and pay just a small price when using the inverted value
44
   for developing quotient bits.  This algorithm was presented at ICMS 2006.
45
*/
46
47
#include "gmp-impl.h"
48
49
50
/* N = {np,nn}
51
   D = {dp,dn}
52
53
   Requirements: N >= D
54
     D >= 1
55
     D odd
56
     dn >= 2
57
     nn >= 2
58
     scratch space as determined by mpn_mu_bdiv_q_itch(nn,dn).
59
60
   Write quotient to Q = {qp,nn}.
61
62
   FIXME: When iterating, perhaps do the small step before loop, not after.
63
   FIXME: Try to avoid the scalar divisions when computing inverse size.
64
   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
65
    particular, when dn==in, tp and rp could use the same space.
66
   FIXME: Trim final quotient calculation to qn limbs of precision.
67
*/
68
static void
69
mpn_mu_bdiv_q_old (mp_ptr qp,
70
         mp_srcptr np, mp_size_t nn,
71
         mp_srcptr dp, mp_size_t dn,
72
         mp_ptr scratch)
73
0
{
74
0
  mp_size_t qn;
75
0
  mp_size_t in;
76
0
  int cy, c0;
77
0
  mp_size_t tn, wn;
78
79
0
  qn = nn;
80
81
0
  ASSERT (dn >= 2);
82
0
  ASSERT (qn >= 2);
83
84
0
  if (qn > dn)
85
0
    {
86
0
      mp_size_t b;
87
88
      /* |_______________________|   dividend
89
      |________|   divisor  */
90
91
0
#define ip           scratch      /* in */
92
0
#define rp           (scratch + in)    /* dn or rest >= binvert_itch(in) */
93
0
#define tp           (scratch + in + dn)  /* dn+in or next_size(dn) */
94
0
#define scratch_out  (scratch + in + dn + tn)  /* mulmod_bnm1_itch(next_size(dn)) */
95
96
      /* Compute an inverse size that is a nice partition of the quotient.  */
97
0
      b = (qn - 1) / dn + 1;  /* ceil(qn/dn), number of blocks */
98
0
      in = (qn - 1) / b + 1;  /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
99
100
      /* Some notes on allocation:
101
102
   When in = dn, R dies when mpn_mullo returns, if in < dn the low in
103
   limbs of R dies at that point.  We could save memory by letting T live
104
   just under R, and let the upper part of T expand into R. These changes
105
   should reduce itch to perhaps 3dn.
106
       */
107
108
0
      mpn_binvert (ip, dp, in, rp);
109
110
0
      cy = 0;
111
112
0
      MPN_COPY (rp, np, dn);
113
0
      np += dn;
114
0
      mpn_mullo_n (qp, rp, ip, in);
115
0
      qn -= in;
116
117
0
      while (qn > in)
118
0
  {
119
0
    if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
120
0
      mpn_mul (tp, dp, dn, qp, in);  /* mulhi, need tp[dn+in-1...in] */
121
0
    else
122
0
      {
123
0
        tn = mpn_mulmod_bnm1_next_size (dn);
124
0
        mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
125
0
        wn = dn + in - tn;    /* number of wrapped limbs */
126
0
        if (wn > 0)
127
0
    {
128
0
      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
129
0
      mpn_decr_u (tp + wn, c0);
130
0
    }
131
0
      }
132
133
0
    qp += in;
134
0
    if (dn != in)
135
0
      {
136
        /* Subtract tp[dn-1...in] from partial remainder.  */
137
0
        cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
138
0
        if (cy == 2)
139
0
    {
140
0
      mpn_incr_u (tp + dn, 1);
141
0
      cy = 1;
142
0
    }
143
0
      }
144
    /* Subtract tp[dn+in-1...dn] from dividend.  */
145
0
    cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
146
0
    np += in;
147
0
    mpn_mullo_n (qp, rp, ip, in);
148
0
    qn -= in;
149
0
  }
150
151
      /* Generate last qn limbs.
152
   FIXME: It should be possible to limit precision here, since qn is
153
   typically somewhat smaller than dn.  No big gains expected.  */
154
155
0
      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
156
0
  mpn_mul (tp, dp, dn, qp, in);    /* mulhi, need tp[qn+in-1...in] */
157
0
      else
158
0
  {
159
0
    tn = mpn_mulmod_bnm1_next_size (dn);
160
0
    mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
161
0
    wn = dn + in - tn;      /* number of wrapped limbs */
162
0
    if (wn > 0)
163
0
      {
164
0
        c0 = mpn_sub_n (tp + tn, tp, rp, wn);
165
0
        mpn_decr_u (tp + wn, c0);
166
0
      }
167
0
  }
168
169
0
      qp += in;
170
0
      if (dn != in)
171
0
  {
172
0
    cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
173
0
    if (cy == 2)
174
0
      {
175
0
        mpn_incr_u (tp + dn, 1);
176
0
        cy = 1;
177
0
      }
178
0
  }
179
180
0
      mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy);
181
0
      mpn_mullo_n (qp, rp, ip, qn);
182
183
0
#undef ip
184
0
#undef rp
185
0
#undef tp
186
0
#undef scratch_out
187
0
   }
188
0
  else
189
0
    {
190
      /* |_______________________|   dividend
191
    |________________|   divisor  */
192
193
0
#define ip           scratch    /* in */
194
0
#define tp           (scratch + in)  /* qn+in or next_size(qn) or rest >= binvert_itch(in) */
195
0
#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(qn)) */
196
197
      /* Compute half-sized inverse.  */
198
0
      in = qn - (qn >> 1);
199
200
0
      mpn_binvert (ip, dp, in, tp);
201
202
0
      mpn_mullo_n (qp, np, ip, in);    /* low `in' quotient limbs */
203
204
0
      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
205
0
  mpn_mul (tp, dp, qn, qp, in);    /* mulhigh */
206
0
      else
207
0
  {
208
0
    tn = mpn_mulmod_bnm1_next_size (qn);
209
0
    mpn_mulmod_bnm1 (tp, tn, dp, qn, qp, in, scratch_out);
210
0
    wn = qn + in - tn;      /* number of wrapped limbs */
211
0
    if (wn > 0)
212
0
      {
213
0
        c0 = mpn_cmp (tp, np, wn) < 0;
214
0
        mpn_decr_u (tp + wn, c0);
215
0
      }
216
0
  }
217
218
0
      mpn_sub_n (tp, np + in, tp + in, qn - in);
219
0
      mpn_mullo_n (qp + in, tp, ip, qn - in); /* high qn-in quotient limbs */
220
221
0
#undef ip
222
0
#undef tp
223
0
#undef scratch_out
224
0
    }
225
0
}
226
227
void
228
mpn_mu_bdiv_q (mp_ptr qp,
229
         mp_srcptr np, mp_size_t nn,
230
         mp_srcptr dp, mp_size_t dn,
231
         mp_ptr scratch)
232
0
{
233
0
  mpn_mu_bdiv_q_old (qp, np, nn, dp, dn, scratch);
234
0
  mpn_neg (qp, qp, nn);
235
0
}
236
237
mp_size_t
238
mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
239
0
{
240
0
  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
241
0
  mp_size_t b;
242
243
0
  ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
244
245
0
  qn = nn;
246
247
0
  if (qn > dn)
248
0
    {
249
0
      b = (qn - 1) / dn + 1;  /* ceil(qn/dn), number of blocks */
250
0
      in = (qn - 1) / b + 1;  /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
251
0
      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
252
0
  {
253
0
    tn = dn + in;
254
0
    itch_out = 0;
255
0
  }
256
0
      else
257
0
  {
258
0
    tn = mpn_mulmod_bnm1_next_size (dn);
259
0
    itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
260
0
  }
261
0
      itches = dn + tn + itch_out;
262
0
    }
263
0
  else
264
0
    {
265
0
      in = qn - (qn >> 1);
266
0
      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
267
0
  {
268
0
    tn = qn + in;
269
0
    itch_out = 0;
270
0
  }
271
0
      else
272
0
  {
273
0
    tn = mpn_mulmod_bnm1_next_size (qn);
274
0
    itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
275
0
  }
276
0
      itches = tn + itch_out;
277
0
    }
278
279
0
  itch_binvert = mpn_binvert_itch (in);
280
0
  return in + MAX (itches, itch_binvert);
281
0
}