Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_divexact_1 -- mpn by limb exact division. |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST |
4 | | CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN |
5 | | FUTURE GNU MP RELEASES. |
6 | | |
7 | | Copyright 2000-2003, 2005, 2013 Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include "gmp-impl.h" |
36 | | #include "longlong.h" |
37 | | |
38 | | |
39 | | |
40 | | /* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}. |
41 | | q will only be correct if d divides a exactly. |
42 | | |
43 | | A separate loop is used for shift==0 because n<<GMP_LIMB_BITS doesn't |
44 | | give zero on all CPUs (for instance it doesn't on the x86s). This |
45 | | separate loop might run faster too, helping odd divisors. |
46 | | |
47 | | Possibilities: |
48 | | |
49 | | mpn_divexact_1c could be created, accepting and returning c. This would |
50 | | let a long calculation be done piece by piece. Currently there's no |
51 | | particular need for that, and not returning c means that a final umul can |
52 | | be skipped. |
53 | | |
54 | | Another use for returning c would be letting the caller know whether the |
55 | | division was in fact exact. It would work just to return the carry bit |
56 | | "c=(l>s)" and let the caller do a final umul if interested. |
57 | | |
58 | | When the divisor is even, the factors of two could be handled with a |
59 | | separate mpn_rshift, instead of shifting on the fly. That might be |
60 | | faster on some CPUs and would mean just the shift==0 style loop would be |
61 | | needed. |
62 | | |
63 | | If n<<GMP_LIMB_BITS gives zero on a particular CPU then the separate |
64 | | shift==0 loop is unnecessary, and could be eliminated if there's no great |
65 | | speed difference. |
66 | | |
67 | | It's not clear whether "/" is the best way to handle size==1. Alpha gcc |
68 | | 2.95 for instance has a poor "/" and might prefer the modular method. |
69 | | Perhaps a tuned parameter should control this. |
70 | | |
71 | | If src[size-1] < divisor then dst[size-1] will be zero, and one divide |
72 | | step could be skipped. A test at last step for s<divisor (or ls in the |
73 | | even case) might be a good way to do that. But if this code is often |
74 | | used with small divisors then it might not be worth bothering */ |
75 | | |
76 | | void |
77 | | mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor) |
78 | 0 | { |
79 | 0 | mp_size_t i; |
80 | 0 | mp_limb_t c, h, l, ls, s, s_next, inverse, dummy; |
81 | 0 | unsigned shift; |
82 | |
|
83 | 0 | ASSERT (size >= 1); |
84 | 0 | ASSERT (divisor != 0); |
85 | 0 | ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); |
86 | 0 | ASSERT_MPN (src, size); |
87 | 0 | ASSERT_LIMB (divisor); |
88 | |
|
89 | 0 | if ((divisor & 1) == 0) |
90 | 0 | { |
91 | 0 | count_trailing_zeros (shift, divisor); |
92 | 0 | divisor >>= shift; |
93 | 0 | } |
94 | 0 | else |
95 | 0 | shift = 0; |
96 | |
|
97 | 0 | binvert_limb (inverse, divisor); |
98 | 0 | divisor <<= GMP_NAIL_BITS; |
99 | |
|
100 | 0 | if (shift != 0) |
101 | 0 | { |
102 | 0 | c = 0; |
103 | |
|
104 | 0 | s = src[0]; |
105 | |
|
106 | 0 | for (i = 1; i < size; i++) |
107 | 0 | { |
108 | 0 | s_next = src[i]; |
109 | 0 | ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK; |
110 | 0 | s = s_next; |
111 | |
|
112 | 0 | SUBC_LIMB (c, l, ls, c); |
113 | |
|
114 | 0 | l = (l * inverse) & GMP_NUMB_MASK; |
115 | 0 | dst[i - 1] = l; |
116 | |
|
117 | 0 | umul_ppmm (h, dummy, l, divisor); |
118 | 0 | c += h; |
119 | 0 | } |
120 | |
|
121 | 0 | ls = s >> shift; |
122 | 0 | l = ls - c; |
123 | 0 | l = (l * inverse) & GMP_NUMB_MASK; |
124 | 0 | dst[size - 1] = l; |
125 | 0 | } |
126 | 0 | else |
127 | 0 | { |
128 | 0 | s = src[0]; |
129 | |
|
130 | 0 | l = (s * inverse) & GMP_NUMB_MASK; |
131 | 0 | dst[0] = l; |
132 | 0 | c = 0; |
133 | |
|
134 | 0 | for (i = 1; i < size; i++) |
135 | 0 | { |
136 | 0 | umul_ppmm (h, dummy, l, divisor); |
137 | 0 | c += h; |
138 | |
|
139 | 0 | s = src[i]; |
140 | 0 | SUBC_LIMB (c, l, s, c); |
141 | |
|
142 | 0 | l = (l * inverse) & GMP_NUMB_MASK; |
143 | 0 | dst[i] = l; |
144 | 0 | } |
145 | 0 | } |
146 | 0 | } |