/src/libgmp/mpn/toom2_sqr.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_toom2_sqr -- Square {ap,an}. |
2 | | |
3 | | Contributed to the GNU project by Torbjorn Granlund. |
4 | | |
5 | | THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY |
6 | | SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
7 | | GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
8 | | |
9 | | Copyright 2006-2010, 2012, 2014, 2018, 2020 Free Software Foundation, Inc. |
10 | | |
11 | | This file is part of the GNU MP Library. |
12 | | |
13 | | The GNU MP Library is free software; you can redistribute it and/or modify |
14 | | it under the terms of either: |
15 | | |
16 | | * the GNU Lesser General Public License as published by the Free |
17 | | Software Foundation; either version 3 of the License, or (at your |
18 | | option) any later version. |
19 | | |
20 | | or |
21 | | |
22 | | * the GNU General Public License as published by the Free Software |
23 | | Foundation; either version 2 of the License, or (at your option) any |
24 | | later version. |
25 | | |
26 | | or both in parallel, as here. |
27 | | |
28 | | The GNU MP Library is distributed in the hope that it will be useful, but |
29 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
30 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
31 | | for more details. |
32 | | |
33 | | You should have received copies of the GNU General Public License and the |
34 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
35 | | see https://www.gnu.org/licenses/. */ |
36 | | |
37 | | |
38 | | #include "gmp-impl.h" |
39 | | |
40 | | /* Evaluate in: -1, 0, +inf |
41 | | |
42 | | <-s--><--n--> |
43 | | ____ ______ |
44 | | |_a1_|___a0_| |
45 | | |
46 | | v0 = a0 ^2 # A(0)^2 |
47 | | vm1 = (a0- a1)^2 # A(-1)^2 |
48 | | vinf= a1 ^2 # A(inf)^2 |
49 | | */ |
50 | | |
51 | | #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY |
52 | | #define MAYBE_sqr_toom2 1 |
53 | | #else |
54 | | #define MAYBE_sqr_toom2 \ |
55 | 33.6M | (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD) |
56 | | #endif |
57 | | |
58 | | #define TOOM2_SQR_REC(p, a, n, ws) \ |
59 | 16.8M | do { \ |
60 | 16.8M | if (! MAYBE_sqr_toom2 \ |
61 | 16.8M | || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \ |
62 | 16.8M | mpn_sqr_basecase (p, a, n); \ |
63 | 16.8M | else \ |
64 | 16.8M | mpn_toom2_sqr (p, a, n, ws); \ |
65 | 16.8M | } while (0) |
66 | | |
67 | | void |
68 | | mpn_toom2_sqr (mp_ptr pp, |
69 | | mp_srcptr ap, mp_size_t an, |
70 | | mp_ptr scratch) |
71 | 5.60M | { |
72 | 5.60M | const int __gmpn_cpuvec_initialized = 1; |
73 | 5.60M | mp_size_t n, s; |
74 | 5.60M | mp_limb_t cy, cy2; |
75 | 5.60M | mp_ptr asm1; |
76 | | |
77 | 14.1M | #define a0 ap |
78 | 8.35M | #define a1 (ap + n) |
79 | | |
80 | 5.60M | s = an >> 1; |
81 | 5.60M | n = an - s; |
82 | | |
83 | 5.60M | ASSERT (0 < s && s <= n && (n - s) == (an & 1)); |
84 | | |
85 | 5.60M | asm1 = pp; |
86 | | |
87 | | /* Compute asm1. */ |
88 | 5.60M | if ((an & 1) == 0) /* s == n */ |
89 | 2.69M | { |
90 | 2.69M | if (mpn_cmp (a0, a1, n) < 0) |
91 | 136k | { |
92 | 136k | mpn_sub_n (asm1, a1, a0, n); |
93 | 136k | } |
94 | 2.55M | else |
95 | 2.55M | { |
96 | 2.55M | mpn_sub_n (asm1, a0, a1, n); |
97 | 2.55M | } |
98 | 2.69M | } |
99 | 2.91M | else /* n - s == 1 */ |
100 | 2.91M | { |
101 | 2.91M | if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0) |
102 | 13.4k | { |
103 | 13.4k | mpn_sub_n (asm1, a1, a0, s); |
104 | 13.4k | asm1[s] = 0; |
105 | 13.4k | } |
106 | 2.89M | else |
107 | 2.89M | { |
108 | 2.89M | asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s); |
109 | 2.89M | } |
110 | 2.91M | } |
111 | | |
112 | 11.2M | #define v0 pp /* 2n */ |
113 | 11.2M | #define vinf (pp + 2 * n) /* s+s */ |
114 | 5.60M | #define vm1 scratch /* 2n */ |
115 | 5.60M | #define scratch_out scratch + 2 * n |
116 | | |
117 | | /* vm1, 2n limbs */ |
118 | 5.60M | TOOM2_SQR_REC (vm1, asm1, n, scratch_out); |
119 | | |
120 | | /* vinf, s+s limbs */ |
121 | 5.60M | TOOM2_SQR_REC (vinf, a1, s, scratch_out); |
122 | | |
123 | | /* v0, 2n limbs */ |
124 | 5.60M | TOOM2_SQR_REC (v0, ap, n, scratch_out); |
125 | | |
126 | | /* H(v0) + L(vinf) */ |
127 | 5.60M | cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n); |
128 | | |
129 | | /* L(v0) + H(v0) */ |
130 | 5.60M | cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n); |
131 | | |
132 | | /* L(vinf) + H(vinf) */ |
133 | 5.60M | cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n); |
134 | | |
135 | 5.60M | cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n); |
136 | | |
137 | 5.60M | ASSERT (cy + 1 <= 3); |
138 | 5.60M | ASSERT (cy2 <= 2); |
139 | | |
140 | 5.60M | if (LIKELY (cy <= 2)) { |
141 | 5.57M | MPN_INCR_U (pp + 2 * n, s + s, cy2); |
142 | 5.57M | MPN_INCR_U (pp + 3 * n, s + s - n, cy); |
143 | 5.57M | } else { /* cy is negative */ |
144 | | /* The total contribution of v0+vinf-vm1 can not be negative. */ |
145 | | #if WANT_ASSERT |
146 | | /* The borrow in cy stops the propagation of the carry cy2, */ |
147 | | ASSERT (cy2 == 1); |
148 | | cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2); |
149 | | ASSERT (cy == 0); |
150 | | #else |
151 | | /* we simply fill the area with zeros. */ |
152 | 32.8k | MPN_FILL (pp + 2 * n, n, 0); |
153 | 32.8k | #endif |
154 | 32.8k | } |
155 | 5.60M | } |