Coverage Report

Created: 2024-11-25 06:31

/src/gmp/mpn/hgcd2.c
Line
Count
Source
1
/* hgcd2.c
2
3
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7
Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
8
Inc.
9
10
This file is part of the GNU MP Library.
11
12
The GNU MP Library is free software; you can redistribute it and/or modify
13
it under the terms of either:
14
15
  * the GNU Lesser General Public License as published by the Free
16
    Software Foundation; either version 3 of the License, or (at your
17
    option) any later version.
18
19
or
20
21
  * the GNU General Public License as published by the Free Software
22
    Foundation; either version 2 of the License, or (at your option) any
23
    later version.
24
25
or both in parallel, as here.
26
27
The GNU MP Library is distributed in the hope that it will be useful, but
28
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
30
for more details.
31
32
You should have received copies of the GNU General Public License and the
33
GNU Lesser General Public License along with the GNU MP Library.  If not,
34
see https://www.gnu.org/licenses/.  */
35
36
#include "gmp-impl.h"
37
#include "longlong.h"
38
39
#include "mpn/generic/hgcd2-div.h"
40
41
#if GMP_NAIL_BITS != 0
42
#error Nails not implemented
43
#endif
44
45
/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
46
   matrix M. Returns 1 if we make progress, i.e. can perform at least
47
   one subtraction. Otherwise returns zero. */
48
49
/* FIXME: Possible optimizations:
50
51
   The div2 function starts with checking the most significant bit of
52
   the numerator. We can maintained normalized operands here, call
53
   hgcd with normalized operands only, which should make the code
54
   simpler and possibly faster.
55
56
   Experiment with table lookups on the most significant bits.
57
58
   This function is also a candidate for assembler implementation.
59
*/
60
int
61
mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
62
     struct hgcd_matrix1 *M)
63
608k
{
64
608k
  mp_limb_t u00, u01, u10, u11;
65
66
608k
  if (ah < 2 || bh < 2)
67
1.66k
    return 0;
68
69
606k
  if (ah > bh || (ah == bh && al > bl))
70
343k
    {
71
343k
      sub_ddmmss (ah, al, ah, al, bh, bl);
72
343k
      if (ah < 2)
73
3.19k
  return 0;
74
75
340k
      u00 = u01 = u11 = 1;
76
340k
      u10 = 0;
77
340k
    }
78
262k
  else
79
262k
    {
80
262k
      sub_ddmmss (bh, bl, bh, bl, ah, al);
81
262k
      if (bh < 2)
82
4.09k
  return 0;
83
84
258k
      u00 = u10 = u11 = 1;
85
258k
      u01 = 0;
86
258k
    }
87
88
599k
  if (ah < bh)
89
344k
    goto subtract_a;
90
91
255k
  for (;;)
92
5.68M
    {
93
5.68M
      ASSERT (ah >= bh);
94
5.68M
      if (ah == bh)
95
747
  goto done;
96
97
5.68M
      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
98
322k
  {
99
322k
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
100
322k
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
101
102
322k
    break;
103
322k
  }
104
105
      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
106
   1), affecting the second column of M. */
107
5.36M
      ASSERT (ah > bh);
108
5.36M
      sub_ddmmss (ah, al, ah, al, bh, bl);
109
110
5.36M
      if (ah < 2)
111
835
  goto done;
112
113
5.36M
      if (ah <= bh)
114
2.14M
  {
115
    /* Use q = 1 */
116
2.14M
    u01 += u00;
117
2.14M
    u11 += u10;
118
2.14M
  }
119
3.22M
      else
120
3.22M
  {
121
3.22M
    mp_limb_t r[2];
122
3.22M
    mp_limb_t q = div2 (r, ah, al, bh, bl);
123
3.22M
    al = r[0]; ah = r[1];
124
3.22M
    if (ah < 2)
125
1.78k
      {
126
        /* A is too small, but q is correct. */
127
1.78k
        u01 += q * u00;
128
1.78k
        u11 += q * u10;
129
1.78k
        goto done;
130
1.78k
      }
131
3.22M
    q++;
132
3.22M
    u01 += q * u00;
133
3.22M
    u11 += q * u10;
134
3.22M
  }
135
5.70M
    subtract_a:
136
5.70M
      ASSERT (bh >= ah);
137
5.70M
      if (ah == bh)
138
620
  goto done;
139
140
5.70M
      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
141
270k
  {
142
270k
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
143
270k
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
144
145
270k
    goto subtract_a1;
146
270k
  }
147
148
      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
149
   1), affecting the first column of M. */
150
5.43M
      sub_ddmmss (bh, bl, bh, bl, ah, al);
151
152
5.43M
      if (bh < 2)
153
754
  goto done;
154
155
5.43M
      if (bh <= ah)
156
2.13M
  {
157
    /* Use q = 1 */
158
2.13M
    u00 += u01;
159
2.13M
    u10 += u11;
160
2.13M
  }
161
3.29M
      else
162
3.29M
  {
163
3.29M
    mp_limb_t r[2];
164
3.29M
    mp_limb_t q = div2 (r, bh, bl, ah, al);
165
3.29M
    bl = r[0]; bh = r[1];
166
3.29M
    if (bh < 2)
167
1.60k
      {
168
        /* B is too small, but q is correct. */
169
1.60k
        u00 += q * u01;
170
1.60k
        u10 += q * u11;
171
1.60k
        goto done;
172
1.60k
      }
173
3.29M
    q++;
174
3.29M
    u00 += q * u01;
175
3.29M
    u10 += q * u11;
176
3.29M
  }
177
5.43M
    }
178
179
  /* NOTE: Since we discard the least significant half limb, we don't get a
180
     truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
181
  /* Single precision loop */
182
322k
  for (;;)
183
5.18M
    {
184
5.18M
      ASSERT (ah >= bh);
185
186
5.18M
      ah -= bh;
187
5.18M
      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
188
180k
  break;
189
190
4.99M
      if (ah <= bh)
191
2.25M
  {
192
    /* Use q = 1 */
193
2.25M
    u01 += u00;
194
2.25M
    u11 += u10;
195
2.25M
  }
196
2.74M
      else
197
2.74M
  {
198
2.74M
    mp_double_limb_t rq = div1 (ah, bh);
199
2.74M
    mp_limb_t q = rq.d1;
200
2.74M
    ah = rq.d0;
201
202
2.74M
    if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
203
149k
      {
204
        /* A is too small, but q is correct. */
205
149k
        u01 += q * u00;
206
149k
        u11 += q * u10;
207
149k
        break;
208
149k
      }
209
2.59M
    q++;
210
2.59M
    u01 += q * u00;
211
2.59M
    u11 += q * u10;
212
2.59M
  }
213
5.12M
    subtract_a1:
214
5.12M
      ASSERT (bh >= ah);
215
216
5.12M
      bh -= ah;
217
5.12M
      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
218
112k
  break;
219
220
5.00M
      if (bh <= ah)
221
1.96M
  {
222
    /* Use q = 1 */
223
1.96M
    u00 += u01;
224
1.96M
    u10 += u11;
225
1.96M
  }
226
3.04M
      else
227
3.04M
  {
228
3.04M
    mp_double_limb_t rq = div1 (bh, ah);
229
3.04M
    mp_limb_t q = rq.d1;
230
3.04M
    bh = rq.d0;
231
232
3.04M
    if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
233
149k
      {
234
        /* B is too small, but q is correct. */
235
149k
        u00 += q * u01;
236
149k
        u10 += q * u11;
237
149k
        break;
238
149k
      }
239
2.89M
    q++;
240
2.89M
    u00 += q * u01;
241
2.89M
    u10 += q * u11;
242
2.89M
  }
243
5.00M
    }
244
245
599k
 done:
246
599k
  M->u[0][0] = u00; M->u[0][1] = u01;
247
599k
  M->u[1][0] = u10; M->u[1][1] = u11;
248
249
599k
  return 1;
250
322k
}
251
252
/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
253
 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
254
mp_size_t
255
mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
256
           mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
257
599k
{
258
599k
  mp_limb_t ah, bh;
259
260
  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
261
262
     r  = u00 * a
263
     r += u10 * b
264
     b *= u11
265
     b += u01 * a
266
  */
267
268
#if HAVE_NATIVE_mpn_addaddmul_1msb0
269
  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
270
  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
271
#else
272
599k
  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
273
599k
  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
274
275
599k
  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
276
599k
  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
277
599k
#endif
278
599k
  rp[n] = ah;
279
599k
  bp[n] = bh;
280
281
599k
  n += (ah | bh) > 0;
282
599k
  return n;
283
599k
}