/src/libgmp/mpn/dcpi1_bdiv_qr.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed |
2 | | inverse, returning quotient and remainder. |
3 | | |
4 | | Contributed to the GNU project by Niels Möller and Torbjorn Granlund. |
5 | | |
6 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
7 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
8 | | GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. |
9 | | |
10 | | Copyright 2006, 2007, 2009, 2010, 2017 Free Software Foundation, Inc. |
11 | | |
12 | | This file is part of the GNU MP Library. |
13 | | |
14 | | The GNU MP Library is free software; you can redistribute it and/or modify |
15 | | it under the terms of either: |
16 | | |
17 | | * the GNU Lesser General Public License as published by the Free |
18 | | Software Foundation; either version 3 of the License, or (at your |
19 | | option) any later version. |
20 | | |
21 | | or |
22 | | |
23 | | * the GNU General Public License as published by the Free Software |
24 | | Foundation; either version 2 of the License, or (at your option) any |
25 | | later version. |
26 | | |
27 | | or both in parallel, as here. |
28 | | |
29 | | The GNU MP Library is distributed in the hope that it will be useful, but |
30 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
31 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
32 | | for more details. |
33 | | |
34 | | You should have received copies of the GNU General Public License and the |
35 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
36 | | see https://www.gnu.org/licenses/. */ |
37 | | |
38 | | #include "gmp-impl.h" |
39 | | |
40 | | |
41 | | /* Computes Hensel binary division of {np, 2*n} by {dp, n}. |
42 | | |
43 | | Output: |
44 | | |
45 | | q = -n * d^{-1} mod 2^{qn * GMP_NUMB_BITS}, |
46 | | |
47 | | r = (n + q * d) * 2^{-qn * GMP_NUMB_BITS} |
48 | | |
49 | | Stores q at qp. Stores the n least significant limbs of r at the high half |
50 | | of np, and returns the carry from the addition n + q*d. |
51 | | |
52 | | d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */ |
53 | | |
54 | | mp_size_t |
55 | | mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n) |
56 | 0 | { |
57 | 0 | return n; |
58 | 0 | } |
59 | | |
60 | | mp_limb_t |
61 | | mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, |
62 | | mp_limb_t dinv, mp_ptr tp) |
63 | 0 | { |
64 | 0 | mp_size_t lo, hi; |
65 | 0 | mp_limb_t cy; |
66 | 0 | mp_limb_t rh; |
67 | |
|
68 | 0 | lo = n >> 1; /* floor(n/2) */ |
69 | 0 | hi = n - lo; /* ceil(n/2) */ |
70 | |
|
71 | 0 | if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD)) |
72 | 0 | cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv); |
73 | 0 | else |
74 | 0 | cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp); |
75 | |
|
76 | 0 | mpn_mul (tp, dp + lo, hi, qp, lo); |
77 | |
|
78 | 0 | mpn_incr_u (tp + lo, cy); |
79 | 0 | rh = mpn_add (np + lo, np + lo, n + hi, tp, n); |
80 | |
|
81 | 0 | if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD)) |
82 | 0 | cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv); |
83 | 0 | else |
84 | 0 | cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp); |
85 | |
|
86 | 0 | mpn_mul (tp, qp + lo, hi, dp + hi, lo); |
87 | |
|
88 | 0 | mpn_incr_u (tp + hi, cy); |
89 | 0 | rh += mpn_add_n (np + n, np + n, tp, n); |
90 | |
|
91 | 0 | return rh; |
92 | 0 | } |
93 | | |
94 | | mp_limb_t |
95 | | mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, |
96 | | mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) |
97 | 0 | { |
98 | 0 | mp_size_t qn; |
99 | 0 | mp_limb_t rr, cy; |
100 | 0 | mp_ptr tp; |
101 | 0 | TMP_DECL; |
102 | |
|
103 | 0 | TMP_MARK; |
104 | |
|
105 | 0 | ASSERT (dn >= 2); /* to adhere to mpn_sbpi1_div_qr's limits */ |
106 | 0 | ASSERT (nn - dn >= 1); /* to adhere to mpn_sbpi1_div_qr's limits */ |
107 | 0 | ASSERT (dp[0] & 1); |
108 | |
|
109 | 0 | tp = TMP_SALLOC_LIMBS (dn); |
110 | |
|
111 | 0 | qn = nn - dn; |
112 | |
|
113 | 0 | if (qn > dn) |
114 | 0 | { |
115 | | /* Reduce qn mod dn without division, optimizing small operations. */ |
116 | 0 | do |
117 | 0 | qn -= dn; |
118 | 0 | while (qn > dn); |
119 | | |
120 | | /* Perform the typically smaller block first. */ |
121 | 0 | if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD)) |
122 | 0 | cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv); |
123 | 0 | else |
124 | 0 | cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp); |
125 | |
|
126 | 0 | rr = 0; |
127 | 0 | if (qn != dn) |
128 | 0 | { |
129 | 0 | if (qn > dn - qn) |
130 | 0 | mpn_mul (tp, qp, qn, dp + qn, dn - qn); |
131 | 0 | else |
132 | 0 | mpn_mul (tp, dp + qn, dn - qn, qp, qn); |
133 | 0 | mpn_incr_u (tp + qn, cy); |
134 | |
|
135 | 0 | rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn); |
136 | 0 | cy = 0; |
137 | 0 | } |
138 | |
|
139 | 0 | np += qn; |
140 | 0 | qp += qn; |
141 | |
|
142 | 0 | qn = nn - dn - qn; |
143 | 0 | do |
144 | 0 | { |
145 | 0 | rr += mpn_add_1 (np + dn, np + dn, qn, cy); |
146 | 0 | cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp); |
147 | 0 | qp += dn; |
148 | 0 | np += dn; |
149 | 0 | qn -= dn; |
150 | 0 | } |
151 | 0 | while (qn > 0); |
152 | 0 | TMP_FREE; |
153 | 0 | return rr + cy; |
154 | 0 | } |
155 | | |
156 | 0 | if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD)) |
157 | 0 | cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv); |
158 | 0 | else |
159 | 0 | cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp); |
160 | |
|
161 | 0 | rr = 0; |
162 | 0 | if (qn != dn) |
163 | 0 | { |
164 | 0 | if (qn > dn - qn) |
165 | 0 | mpn_mul (tp, qp, qn, dp + qn, dn - qn); |
166 | 0 | else |
167 | 0 | mpn_mul (tp, dp + qn, dn - qn, qp, qn); |
168 | 0 | mpn_incr_u (tp + qn, cy); |
169 | |
|
170 | 0 | rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn); |
171 | 0 | cy = 0; |
172 | 0 | } |
173 | |
|
174 | 0 | TMP_FREE; |
175 | 0 | return rr + cy; |
176 | 0 | } |