Coverage Report

Created: 2026-06-08 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gmp/mpn/hgcd2.c
Line
Count
Source
1
/* hgcd2.c
2
3
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7
Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
8
Inc.
9
10
This file is part of the GNU MP Library.
11
12
The GNU MP Library is free software; you can redistribute it and/or modify
13
it under the terms of either:
14
15
  * the GNU Lesser General Public License as published by the Free
16
    Software Foundation; either version 3 of the License, or (at your
17
    option) any later version.
18
19
or
20
21
  * the GNU General Public License as published by the Free Software
22
    Foundation; either version 2 of the License, or (at your option) any
23
    later version.
24
25
or both in parallel, as here.
26
27
The GNU MP Library is distributed in the hope that it will be useful, but
28
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
30
for more details.
31
32
You should have received copies of the GNU General Public License and the
33
GNU Lesser General Public License along with the GNU MP Library.  If not,
34
see https://www.gnu.org/licenses/.  */
35
36
#include "gmp-impl.h"
37
#include "longlong.h"
38
39
#include "mpn/generic/hgcd2-div.h"
40
41
#if GMP_NAIL_BITS != 0
42
#error Nails not implemented
43
#endif
44
45
/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
46
   matrix M. Returns 1 if we make progress, i.e. can perform at least
47
   one subtraction. Otherwise returns zero. */
48
49
/* FIXME: Possible optimizations:
50
51
   The div2 function starts with checking the most significant bit of
52
   the numerator. We can maintained normalized operands here, call
53
   hgcd with normalized operands only, which should make the code
54
   simpler and possibly faster.
55
56
   Experiment with table lookups on the most significant bits.
57
58
   This function is also a candidate for assembler implementation.
59
*/
60
int
61
mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
62
     struct hgcd_matrix1 *M)
63
472k
{
64
472k
  mp_limb_t u00, u01, u10, u11;
65
66
472k
  if (ah < 2 || bh < 2)
67
1.62k
    return 0;
68
69
471k
  if (ah > bh || (ah == bh && al > bl))
70
253k
    {
71
253k
      sub_ddmmss (ah, al, ah, al, bh, bl);
72
253k
      if (ah < 2)
73
3.21k
  return 0;
74
75
250k
      u00 = u01 = u11 = 1;
76
250k
      u10 = 0;
77
250k
    }
78
217k
  else
79
217k
    {
80
217k
      sub_ddmmss (bh, bl, bh, bl, ah, al);
81
217k
      if (bh < 2)
82
3.81k
  return 0;
83
84
213k
      u00 = u10 = u11 = 1;
85
213k
      u01 = 0;
86
213k
    }
87
88
463k
  if (ah < bh)
89
253k
    goto subtract_a;
90
91
210k
  for (;;)
92
4.39M
    {
93
4.39M
      ASSERT (ah >= bh);
94
4.39M
      if (ah == bh)
95
765
  goto done;
96
97
4.39M
      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
98
253k
  {
99
253k
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
100
253k
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
101
102
253k
    break;
103
253k
  }
104
105
      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
106
   1), affecting the second column of M. */
107
4.14M
      ASSERT (ah > bh);
108
4.14M
      sub_ddmmss (ah, al, ah, al, bh, bl);
109
110
4.14M
      if (ah < 2)
111
770
  goto done;
112
113
4.14M
      if (ah <= bh)
114
1.62M
  {
115
    /* Use q = 1 */
116
1.62M
    u01 += u00;
117
1.62M
    u11 += u10;
118
1.62M
  }
119
2.52M
      else
120
2.52M
  {
121
2.52M
    mp_limb_t r[2];
122
2.52M
    mp_limb_t q = div2 (r, ah, al, bh, bl);
123
2.52M
    al = r[0]; ah = r[1];
124
2.52M
    if (ah < 2)
125
1.81k
      {
126
        /* A is too small, but q is correct. */
127
1.81k
        u01 += q * u00;
128
1.81k
        u11 += q * u10;
129
1.81k
        goto done;
130
1.81k
      }
131
2.51M
    q++;
132
2.51M
    u01 += q * u00;
133
2.51M
    u11 += q * u10;
134
2.51M
  }
135
4.39M
    subtract_a:
136
4.39M
      ASSERT (bh >= ah);
137
4.39M
      if (ah == bh)
138
693
  goto done;
139
140
4.39M
      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
141
204k
  {
142
204k
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
143
204k
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
144
145
204k
    goto subtract_a1;
146
204k
  }
147
148
      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
149
   1), affecting the first column of M. */
150
4.19M
      sub_ddmmss (bh, bl, bh, bl, ah, al);
151
152
4.19M
      if (bh < 2)
153
721
  goto done;
154
155
4.19M
      if (bh <= ah)
156
1.67M
  {
157
    /* Use q = 1 */
158
1.67M
    u00 += u01;
159
1.67M
    u10 += u11;
160
1.67M
  }
161
2.51M
      else
162
2.51M
  {
163
2.51M
    mp_limb_t r[2];
164
2.51M
    mp_limb_t q = div2 (r, bh, bl, ah, al);
165
2.51M
    bl = r[0]; bh = r[1];
166
2.51M
    if (bh < 2)
167
1.56k
      {
168
        /* B is too small, but q is correct. */
169
1.56k
        u00 += q * u01;
170
1.56k
        u10 += q * u11;
171
1.56k
        goto done;
172
1.56k
      }
173
2.51M
    q++;
174
2.51M
    u00 += q * u01;
175
2.51M
    u10 += q * u11;
176
2.51M
  }
177
4.19M
    }
178
179
  /* NOTE: Since we discard the least significant half limb, we don't get a
180
     truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
181
  /* Single precision loop */
182
253k
  for (;;)
183
3.93M
    {
184
3.93M
      ASSERT (ah >= bh);
185
186
3.93M
      ah -= bh;
187
3.93M
      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
188
124k
  break;
189
190
3.80M
      if (ah <= bh)
191
1.67M
  {
192
    /* Use q = 1 */
193
1.67M
    u01 += u00;
194
1.67M
    u11 += u10;
195
1.67M
  }
196
2.13M
      else
197
2.13M
  {
198
2.13M
    mp_double_limb_t rq = div1 (ah, bh);
199
2.13M
    mp_limb_t q = rq.d1;
200
2.13M
    ah = rq.d0;
201
202
2.13M
    if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
203
116k
      {
204
        /* A is too small, but q is correct. */
205
116k
        u01 += q * u00;
206
116k
        u11 += q * u10;
207
116k
        break;
208
116k
      }
209
2.02M
    q++;
210
2.02M
    u01 += q * u00;
211
2.02M
    u11 += q * u10;
212
2.02M
  }
213
3.89M
    subtract_a1:
214
3.89M
      ASSERT (bh >= ah);
215
216
3.89M
      bh -= ah;
217
3.89M
      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
218
100k
  break;
219
220
3.79M
      if (bh <= ah)
221
1.52M
  {
222
    /* Use q = 1 */
223
1.52M
    u00 += u01;
224
1.52M
    u10 += u11;
225
1.52M
  }
226
2.27M
      else
227
2.27M
  {
228
2.27M
    mp_double_limb_t rq = div1 (bh, ah);
229
2.27M
    mp_limb_t q = rq.d1;
230
2.27M
    bh = rq.d0;
231
232
2.27M
    if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
233
116k
      {
234
        /* B is too small, but q is correct. */
235
116k
        u00 += q * u01;
236
116k
        u10 += q * u11;
237
116k
        break;
238
116k
      }
239
2.16M
    q++;
240
2.16M
    u00 += q * u01;
241
2.16M
    u10 += q * u11;
242
2.16M
  }
243
3.79M
    }
244
245
463k
 done:
246
463k
  M->u[0][0] = u00; M->u[0][1] = u01;
247
463k
  M->u[1][0] = u10; M->u[1][1] = u11;
248
249
463k
  return 1;
250
253k
}
251
252
/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
253
 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
254
mp_size_t
255
mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
256
           mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
257
463k
{
258
463k
  mp_limb_t ah, bh;
259
260
  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
261
262
     r  = u00 * a
263
     r += u10 * b
264
     b *= u11
265
     b += u01 * a
266
  */
267
268
#if HAVE_NATIVE_mpn_addaddmul_1msb0
269
  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
270
  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
271
#else
272
463k
  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
273
463k
  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
274
275
463k
  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
276
463k
  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
277
463k
#endif
278
463k
  rp[n] = ah;
279
463k
  bp[n] = bh;
280
281
463k
  n += (ah | bh) > 0;
282
463k
  return n;
283
463k
}