Coverage Report

Created: 2024-11-25 06:27

/src/gmp/mpn/hgcd2.c
Line
Count
Source (jump to first uncovered line)
1
/* hgcd2.c
2
3
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7
Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
8
Inc.
9
10
This file is part of the GNU MP Library.
11
12
The GNU MP Library is free software; you can redistribute it and/or modify
13
it under the terms of either:
14
15
  * the GNU Lesser General Public License as published by the Free
16
    Software Foundation; either version 3 of the License, or (at your
17
    option) any later version.
18
19
or
20
21
  * the GNU General Public License as published by the Free Software
22
    Foundation; either version 2 of the License, or (at your option) any
23
    later version.
24
25
or both in parallel, as here.
26
27
The GNU MP Library is distributed in the hope that it will be useful, but
28
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
30
for more details.
31
32
You should have received copies of the GNU General Public License and the
33
GNU Lesser General Public License along with the GNU MP Library.  If not,
34
see https://www.gnu.org/licenses/.  */
35
36
#include "gmp-impl.h"
37
#include "longlong.h"
38
39
#include "mpn/generic/hgcd2-div.h"
40
41
#if GMP_NAIL_BITS != 0
42
#error Nails not implemented
43
#endif
44
45
/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
46
   matrix M. Returns 1 if we make progress, i.e. can perform at least
47
   one subtraction. Otherwise returns zero. */
48
49
/* FIXME: Possible optimizations:
50
51
   The div2 function starts with checking the most significant bit of
52
   the numerator. We can maintained normalized operands here, call
53
   hgcd with normalized operands only, which should make the code
54
   simpler and possibly faster.
55
56
   Experiment with table lookups on the most significant bits.
57
58
   This function is also a candidate for assembler implementation.
59
*/
60
int
61
mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
62
     struct hgcd_matrix1 *M)
63
34.5k
{
64
34.5k
  mp_limb_t u00, u01, u10, u11;
65
66
34.5k
  if (ah < 2 || bh < 2)
67
0
    return 0;
68
69
34.5k
  if (ah > bh || (ah == bh && al > bl))
70
23.7k
    {
71
23.7k
      sub_ddmmss (ah, al, ah, al, bh, bl);
72
23.7k
      if (ah < 2)
73
0
  return 0;
74
75
23.7k
      u00 = u01 = u11 = 1;
76
23.7k
      u10 = 0;
77
23.7k
    }
78
10.7k
  else
79
10.7k
    {
80
10.7k
      sub_ddmmss (bh, bl, bh, bl, ah, al);
81
10.7k
      if (bh < 2)
82
0
  return 0;
83
84
10.7k
      u00 = u10 = u11 = 1;
85
10.7k
      u01 = 0;
86
10.7k
    }
87
88
34.5k
  if (ah < bh)
89
23.7k
    goto subtract_a;
90
91
10.7k
  for (;;)
92
328k
    {
93
328k
      ASSERT (ah >= bh);
94
328k
      if (ah == bh)
95
0
  goto done;
96
97
328k
      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
98
17.2k
  {
99
17.2k
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
100
17.2k
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
101
102
17.2k
    break;
103
17.2k
  }
104
105
      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
106
   1), affecting the second column of M. */
107
310k
      ASSERT (ah > bh);
108
310k
      sub_ddmmss (ah, al, ah, al, bh, bl);
109
110
310k
      if (ah < 2)
111
0
  goto done;
112
113
310k
      if (ah <= bh)
114
133k
  {
115
    /* Use q = 1 */
116
133k
    u01 += u00;
117
133k
    u11 += u10;
118
133k
  }
119
176k
      else
120
176k
  {
121
176k
    mp_limb_t r[2];
122
176k
    mp_limb_t q = div2 (r, ah, al, bh, bl);
123
176k
    al = r[0]; ah = r[1];
124
176k
    if (ah < 2)
125
0
      {
126
        /* A is too small, but q is correct. */
127
0
        u01 += q * u00;
128
0
        u11 += q * u10;
129
0
        goto done;
130
0
      }
131
176k
    q++;
132
176k
    u01 += q * u00;
133
176k
    u11 += q * u10;
134
176k
  }
135
334k
    subtract_a:
136
334k
      ASSERT (bh >= ah);
137
334k
      if (ah == bh)
138
0
  goto done;
139
140
334k
      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
141
17.2k
  {
142
17.2k
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
143
17.2k
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
144
145
17.2k
    goto subtract_a1;
146
17.2k
  }
147
148
      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
149
   1), affecting the first column of M. */
150
317k
      sub_ddmmss (bh, bl, bh, bl, ah, al);
151
152
317k
      if (bh < 2)
153
0
  goto done;
154
155
317k
      if (bh <= ah)
156
114k
  {
157
    /* Use q = 1 */
158
114k
    u00 += u01;
159
114k
    u10 += u11;
160
114k
  }
161
202k
      else
162
202k
  {
163
202k
    mp_limb_t r[2];
164
202k
    mp_limb_t q = div2 (r, bh, bl, ah, al);
165
202k
    bl = r[0]; bh = r[1];
166
202k
    if (bh < 2)
167
0
      {
168
        /* B is too small, but q is correct. */
169
0
        u00 += q * u01;
170
0
        u10 += q * u11;
171
0
        goto done;
172
0
      }
173
202k
    q++;
174
202k
    u00 += q * u01;
175
202k
    u10 += q * u11;
176
202k
  }
177
317k
    }
178
179
  /* NOTE: Since we discard the least significant half limb, we don't get a
180
     truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
181
  /* Single precision loop */
182
17.2k
  for (;;)
183
321k
    {
184
321k
      ASSERT (ah >= bh);
185
186
321k
      ah -= bh;
187
321k
      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
188
15.1k
  break;
189
190
306k
      if (ah <= bh)
191
151k
  {
192
    /* Use q = 1 */
193
151k
    u01 += u00;
194
151k
    u11 += u10;
195
151k
  }
196
155k
      else
197
155k
  {
198
155k
    mp_double_limb_t rq = div1 (ah, bh);
199
155k
    mp_limb_t q = rq.d1;
200
155k
    ah = rq.d0;
201
202
155k
    if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
203
8.63k
      {
204
        /* A is too small, but q is correct. */
205
8.63k
        u01 += q * u00;
206
8.63k
        u11 += q * u10;
207
8.63k
        break;
208
8.63k
      }
209
146k
    q++;
210
146k
    u01 += q * u00;
211
146k
    u11 += q * u10;
212
146k
  }
213
315k
    subtract_a1:
214
315k
      ASSERT (bh >= ah);
215
216
315k
      bh -= ah;
217
315k
      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
218
2.15k
  break;
219
220
312k
      if (bh <= ah)
221
112k
  {
222
    /* Use q = 1 */
223
112k
    u00 += u01;
224
112k
    u10 += u11;
225
112k
  }
226
200k
      else
227
200k
  {
228
200k
    mp_double_limb_t rq = div1 (bh, ah);
229
200k
    mp_limb_t q = rq.d1;
230
200k
    bh = rq.d0;
231
232
200k
    if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
233
8.63k
      {
234
        /* B is too small, but q is correct. */
235
8.63k
        u00 += q * u01;
236
8.63k
        u10 += q * u11;
237
8.63k
        break;
238
8.63k
      }
239
192k
    q++;
240
192k
    u00 += q * u01;
241
192k
    u10 += q * u11;
242
192k
  }
243
312k
    }
244
245
34.5k
 done:
246
34.5k
  M->u[0][0] = u00; M->u[0][1] = u01;
247
34.5k
  M->u[1][0] = u10; M->u[1][1] = u11;
248
249
34.5k
  return 1;
250
17.2k
}
251
252
/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
253
 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
254
mp_size_t
255
mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
256
           mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
257
34.5k
{
258
34.5k
  mp_limb_t ah, bh;
259
260
  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
261
262
     r  = u00 * a
263
     r += u10 * b
264
     b *= u11
265
     b += u01 * a
266
  */
267
268
#if HAVE_NATIVE_mpn_addaddmul_1msb0
269
  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
270
  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
271
#else
272
34.5k
  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
273
34.5k
  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
274
275
34.5k
  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
276
34.5k
  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
277
34.5k
#endif
278
34.5k
  rp[n] = ah;
279
34.5k
  bp[n] = bh;
280
281
34.5k
  n += (ah | bh) > 0;
282
34.5k
  return n;
283
34.5k
}