Coverage Report

Created: 2026-03-31 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gmp/mpn/hgcd2.c
Line
Count
Source
1
/* hgcd2.c
2
3
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7
Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
8
Inc.
9
10
This file is part of the GNU MP Library.
11
12
The GNU MP Library is free software; you can redistribute it and/or modify
13
it under the terms of either:
14
15
  * the GNU Lesser General Public License as published by the Free
16
    Software Foundation; either version 3 of the License, or (at your
17
    option) any later version.
18
19
or
20
21
  * the GNU General Public License as published by the Free Software
22
    Foundation; either version 2 of the License, or (at your option) any
23
    later version.
24
25
or both in parallel, as here.
26
27
The GNU MP Library is distributed in the hope that it will be useful, but
28
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
30
for more details.
31
32
You should have received copies of the GNU General Public License and the
33
GNU Lesser General Public License along with the GNU MP Library.  If not,
34
see https://www.gnu.org/licenses/.  */
35
36
#include "gmp-impl.h"
37
#include "longlong.h"
38
39
#include "mpn/generic/hgcd2-div.h"
40
41
#if GMP_NAIL_BITS != 0
42
#error Nails not implemented
43
#endif
44
45
/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
46
   matrix M. Returns 1 if we make progress, i.e. can perform at least
47
   one subtraction. Otherwise returns zero. */
48
49
/* FIXME: Possible optimizations:
50
51
   The div2 function starts with checking the most significant bit of
52
   the numerator. We can maintained normalized operands here, call
53
   hgcd with normalized operands only, which should make the code
54
   simpler and possibly faster.
55
56
   Experiment with table lookups on the most significant bits.
57
58
   This function is also a candidate for assembler implementation.
59
*/
60
int
61
mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
62
     struct hgcd_matrix1 *M)
63
579k
{
64
579k
  mp_limb_t u00, u01, u10, u11;
65
66
579k
  if (ah < 2 || bh < 2)
67
1.50k
    return 0;
68
69
578k
  if (ah > bh || (ah == bh && al > bl))
70
326k
    {
71
326k
      sub_ddmmss (ah, al, ah, al, bh, bl);
72
326k
      if (ah < 2)
73
3.43k
  return 0;
74
75
323k
      u00 = u01 = u11 = 1;
76
323k
      u10 = 0;
77
323k
    }
78
251k
  else
79
251k
    {
80
251k
      sub_ddmmss (bh, bl, bh, bl, ah, al);
81
251k
      if (bh < 2)
82
4.01k
  return 0;
83
84
247k
      u00 = u10 = u11 = 1;
85
247k
      u01 = 0;
86
247k
    }
87
88
570k
  if (ah < bh)
89
326k
    goto subtract_a;
90
91
243k
  for (;;)
92
5.41M
    {
93
5.41M
      ASSERT (ah >= bh);
94
5.41M
      if (ah == bh)
95
810
  goto done;
96
97
5.41M
      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
98
306k
  {
99
306k
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
100
306k
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
101
102
306k
    break;
103
306k
  }
104
105
      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
106
   1), affecting the second column of M. */
107
5.10M
      ASSERT (ah > bh);
108
5.10M
      sub_ddmmss (ah, al, ah, al, bh, bl);
109
110
5.10M
      if (ah < 2)
111
919
  goto done;
112
113
5.10M
      if (ah <= bh)
114
2.03M
  {
115
    /* Use q = 1 */
116
2.03M
    u01 += u00;
117
2.03M
    u11 += u10;
118
2.03M
  }
119
3.06M
      else
120
3.06M
  {
121
3.06M
    mp_limb_t r[2];
122
3.06M
    mp_limb_t q = div2 (r, ah, al, bh, bl);
123
3.06M
    al = r[0]; ah = r[1];
124
3.06M
    if (ah < 2)
125
1.94k
      {
126
        /* A is too small, but q is correct. */
127
1.94k
        u01 += q * u00;
128
1.94k
        u11 += q * u10;
129
1.94k
        goto done;
130
1.94k
      }
131
3.06M
    q++;
132
3.06M
    u01 += q * u00;
133
3.06M
    u11 += q * u10;
134
3.06M
  }
135
5.42M
    subtract_a:
136
5.42M
      ASSERT (bh >= ah);
137
5.42M
      if (ah == bh)
138
724
  goto done;
139
140
5.42M
      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
141
257k
  {
142
257k
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
143
257k
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
144
145
257k
    goto subtract_a1;
146
257k
  }
147
148
      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
149
   1), affecting the first column of M. */
150
5.17M
      sub_ddmmss (bh, bl, bh, bl, ah, al);
151
152
5.17M
      if (bh < 2)
153
723
  goto done;
154
155
5.16M
      if (bh <= ah)
156
2.03M
  {
157
    /* Use q = 1 */
158
2.03M
    u00 += u01;
159
2.03M
    u10 += u11;
160
2.03M
  }
161
3.13M
      else
162
3.13M
  {
163
3.13M
    mp_limb_t r[2];
164
3.13M
    mp_limb_t q = div2 (r, bh, bl, ah, al);
165
3.13M
    bl = r[0]; bh = r[1];
166
3.13M
    if (bh < 2)
167
1.63k
      {
168
        /* B is too small, but q is correct. */
169
1.63k
        u00 += q * u01;
170
1.63k
        u10 += q * u11;
171
1.63k
        goto done;
172
1.63k
      }
173
3.13M
    q++;
174
3.13M
    u00 += q * u01;
175
3.13M
    u10 += q * u11;
176
3.13M
  }
177
5.16M
    }
178
179
  /* NOTE: Since we discard the least significant half limb, we don't get a
180
     truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
181
  /* Single precision loop */
182
306k
  for (;;)
183
4.92M
    {
184
4.92M
      ASSERT (ah >= bh);
185
186
4.92M
      ah -= bh;
187
4.92M
      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
188
171k
  break;
189
190
4.75M
      if (ah <= bh)
191
2.13M
  {
192
    /* Use q = 1 */
193
2.13M
    u01 += u00;
194
2.13M
    u11 += u10;
195
2.13M
  }
196
2.61M
      else
197
2.61M
  {
198
2.61M
    mp_double_limb_t rq = div1 (ah, bh);
199
2.61M
    mp_limb_t q = rq.d1;
200
2.61M
    ah = rq.d0;
201
202
2.61M
    if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
203
142k
      {
204
        /* A is too small, but q is correct. */
205
142k
        u01 += q * u00;
206
142k
        u11 += q * u10;
207
142k
        break;
208
142k
      }
209
2.47M
    q++;
210
2.47M
    u01 += q * u00;
211
2.47M
    u11 += q * u10;
212
2.47M
  }
213
4.86M
    subtract_a1:
214
4.86M
      ASSERT (bh >= ah);
215
216
4.86M
      bh -= ah;
217
4.86M
      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
218
107k
  break;
219
220
4.76M
      if (bh <= ah)
221
1.86M
  {
222
    /* Use q = 1 */
223
1.86M
    u00 += u01;
224
1.86M
    u10 += u11;
225
1.86M
  }
226
2.89M
      else
227
2.89M
  {
228
2.89M
    mp_double_limb_t rq = div1 (bh, ah);
229
2.89M
    mp_limb_t q = rq.d1;
230
2.89M
    bh = rq.d0;
231
232
2.89M
    if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
233
142k
      {
234
        /* B is too small, but q is correct. */
235
142k
        u00 += q * u01;
236
142k
        u10 += q * u11;
237
142k
        break;
238
142k
      }
239
2.75M
    q++;
240
2.75M
    u00 += q * u01;
241
2.75M
    u10 += q * u11;
242
2.75M
  }
243
4.76M
    }
244
245
570k
 done:
246
570k
  M->u[0][0] = u00; M->u[0][1] = u01;
247
570k
  M->u[1][0] = u10; M->u[1][1] = u11;
248
249
570k
  return 1;
250
306k
}
251
252
/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
253
 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
254
mp_size_t
255
mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
256
           mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
257
570k
{
258
570k
  mp_limb_t ah, bh;
259
260
  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
261
262
     r  = u00 * a
263
     r += u10 * b
264
     b *= u11
265
     b += u01 * a
266
  */
267
268
#if HAVE_NATIVE_mpn_addaddmul_1msb0
269
  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
270
  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
271
#else
272
570k
  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
273
570k
  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
274
275
570k
  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
276
570k
  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
277
570k
#endif
278
570k
  rp[n] = ah;
279
570k
  bp[n] = bh;
280
281
570k
  n += (ah | bh) > 0;
282
570k
  return n;
283
570k
}