/src/gmp/mpn/mod_34lsub1.c
Line | Count | Source |
1 | | /* mpn_mod_34lsub1 -- remainder modulo 2^(GMP_NUMB_BITS*3/4)-1. |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST |
4 | | CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN |
5 | | FUTURE GNU MP RELEASES. |
6 | | |
7 | | Copyright 2000-2002 Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | |
36 | | #include "gmp-impl.h" |
37 | | |
38 | | |
39 | | /* Calculate a remainder from {p,n} divided by 2^(GMP_NUMB_BITS*3/4)-1. |
40 | | The remainder is not fully reduced, it's any limb value congruent to |
41 | | {p,n} modulo that divisor. |
42 | | |
43 | | This implementation is only correct when GMP_NUMB_BITS is a multiple of |
44 | | 4. |
45 | | |
46 | | FIXME: If GMP_NAIL_BITS is some silly big value during development then |
47 | | it's possible the carry accumulators c0,c1,c2 could overflow. |
48 | | |
49 | | General notes: |
50 | | |
51 | | The basic idea is to use a set of N accumulators (N=3 in this case) to |
52 | | effectively get a remainder mod 2^(GMP_NUMB_BITS*N)-1 followed at the end |
53 | | by a reduction to GMP_NUMB_BITS*N/M bits (M=4 in this case) for a |
54 | | remainder mod 2^(GMP_NUMB_BITS*N/M)-1. N and M are chosen to give a good |
55 | | set of small prime factors in 2^(GMP_NUMB_BITS*N/M)-1. |
56 | | |
57 | | N=3 M=4 suits GMP_NUMB_BITS==32 and GMP_NUMB_BITS==64 quite well, giving |
58 | | a few more primes than a single accumulator N=1 does, and for no extra |
59 | | cost (assuming the processor has a decent number of registers). |
60 | | |
61 | | For strange nailified values of GMP_NUMB_BITS the idea would be to look |
62 | | for what N and M give good primes. With GMP_NUMB_BITS not a power of 2 |
63 | | the choices for M may be opened up a bit. But such things are probably |
64 | | best done in separate code, not grafted on here. */ |
65 | | |
66 | | #if GMP_NUMB_BITS % 4 == 0 |
67 | | |
68 | 55.7M | #define B1 (GMP_NUMB_BITS / 4) |
69 | 20.9M | #define B2 (B1 * 2) |
70 | 13.9M | #define B3 (B1 * 3) |
71 | | |
72 | 6.97M | #define M1 ((CNST_LIMB(1) << B1) - 1) |
73 | 6.97M | #define M2 ((CNST_LIMB(1) << B2) - 1) |
74 | 6.97M | #define M3 ((CNST_LIMB(1) << B3) - 1) |
75 | | |
76 | 6.97M | #define LOW0(n) ((n) & M3) |
77 | 6.97M | #define HIGH0(n) ((n) >> B3) |
78 | | |
79 | 6.97M | #define LOW1(n) (((n) & M2) << B1) |
80 | 6.97M | #define HIGH1(n) ((n) >> B2) |
81 | | |
82 | 6.97M | #define LOW2(n) (((n) & M1) << B2) |
83 | 6.97M | #define HIGH2(n) ((n) >> B1) |
84 | | |
85 | 6.97M | #define PARTS0(n) (LOW0(n) + HIGH0(n)) |
86 | 6.97M | #define PARTS1(n) (LOW1(n) + HIGH1(n)) |
87 | 6.97M | #define PARTS2(n) (LOW2(n) + HIGH2(n)) |
88 | | |
89 | | #define ADD(c,a,val) \ |
90 | 55.0M | do { \ |
91 | 55.0M | mp_limb_t new_c; \ |
92 | 55.0M | ADDC_LIMB (new_c, a, a, val); \ |
93 | 55.0M | (c) += new_c; \ |
94 | 55.0M | } while (0) |
95 | | |
96 | | mp_limb_t |
97 | | mpn_mod_34lsub1 (mp_srcptr p, mp_size_t n) |
98 | 3.48M | { |
99 | 3.48M | mp_limb_t c0, c1, c2; |
100 | 3.48M | mp_limb_t a0, a1, a2; |
101 | | |
102 | 3.48M | ASSERT (n >= 1); |
103 | 3.48M | ASSERT (n/3 < GMP_NUMB_MAX); |
104 | | |
105 | 3.48M | a0 = a1 = a2 = 0; |
106 | 3.48M | c0 = c1 = c2 = 0; |
107 | | |
108 | 20.7M | while ((n -= 3) >= 0) |
109 | 17.2M | { |
110 | 17.2M | ADD (c0, a0, p[0]); |
111 | 17.2M | ADD (c1, a1, p[1]); |
112 | 17.2M | ADD (c2, a2, p[2]); |
113 | 17.2M | p += 3; |
114 | 17.2M | } |
115 | | |
116 | 3.48M | if (n != -3) |
117 | 2.20M | { |
118 | 2.20M | ADD (c0, a0, p[0]); |
119 | 2.20M | if (n != -2) |
120 | 1.05M | ADD (c1, a1, p[1]); |
121 | 2.20M | } |
122 | | |
123 | 3.48M | return |
124 | 3.48M | PARTS0 (a0) + PARTS1 (a1) + PARTS2 (a2) |
125 | 3.48M | + PARTS1 (c0) + PARTS2 (c1) + PARTS0 (c2); |
126 | 3.48M | } |
127 | | |
128 | | #endif |