/src/boringssl/crypto/fipsmodule/ec/util.c.inc

Source
/* Copyright (c) 2015, Google Inc.
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */

#include <openssl/base.h>

#include <openssl/ec.h>

#include "internal.h"


// This function looks at 5+1 scalar bits (5 current, 1 adjacent less
// significant bit), and recodes them into a signed digit for use in fast point
// multiplication: the use of signed rather than unsigned digits means that
// fewer points need to be precomputed, given that point inversion is easy (a
// precomputed point dP makes -dP available as well).
//
// BACKGROUND:
//
// Signed digits for multiplication were introduced by Booth ("A signed binary
// multiplication technique", Quart. Journ. Mech. and Applied Math., vol. IV,
// pt. 2 (1951), pp. 236-240), in that case for multiplication of integers.
// Booth's original encoding did not generally improve the density of nonzero
// digits over the binary representation, and was merely meant to simplify the
// handling of signed factors given in two's complement; but it has since been
// shown to be the basis of various signed-digit representations that do have
// further advantages, including the wNAF, using the following general
// approach:
//
// (1) Given a binary representation
//
//       b_k  ...  b_2  b_1  b_0,
//
//     of a nonnegative integer (b_k in {0, 1}), rewrite it in digits 0, 1, -1
//     by using bit-wise subtraction as follows:
//
//        b_k     b_(k-1)  ...  b_2  b_1  b_0
//      -         b_k      ...  b_3  b_2  b_1  b_0
//       -----------------------------------------
//        s_(k+1) s_k      ...  s_3  s_2  s_1  s_0
//
//     A left-shift followed by subtraction of the original value yields a new
//     representation of the same value, using signed bits s_i = b_(i-1) - b_i.
//     This representation from Booth's paper has since appeared in the
//     literature under a variety of different names including "reversed binary
//     form", "alternating greedy expansion", "mutual opposite form", and
//     "sign-alternating {+-1}-representation".
//
//     An interesting property is that among the nonzero bits, values 1 and -1
//     strictly alternate.
//
// (2) Various window schemes can be applied to the Booth representation of
//     integers: for example, right-to-left sliding windows yield the wNAF
//     (a signed-digit encoding independently discovered by various researchers
//     in the 1990s), and left-to-right sliding windows yield a left-to-right
//     equivalent of the wNAF (independently discovered by various researchers
//     around 2004).
//
// To prevent leaking information through side channels in point multiplication,
// we need to recode the given integer into a regular pattern: sliding windows
// as in wNAFs won't do, we need their fixed-window equivalent -- which is a few
// decades older: we'll be using the so-called "modified Booth encoding" due to
// MacSorley ("High-speed arithmetic in binary computers", Proc. IRE, vol. 49
// (1961), pp. 67-91), in a radix-2^5 setting.  That is, we always combine five
// signed bits into a signed digit:
//
//       s_(5j + 4) s_(5j + 3) s_(5j + 2) s_(5j + 1) s_(5j)
//
// The sign-alternating property implies that the resulting digit values are
// integers from -16 to 16.
//
// Of course, we don't actually need to compute the signed digits s_i as an
// intermediate step (that's just a nice way to see how this scheme relates
// to the wNAF): a direct computation obtains the recoded digit from the
// six bits b_(5j + 4) ... b_(5j - 1).
//
// This function takes those six bits as an integer (0 .. 63), writing the
// recoded digit to *sign (0 for positive, 1 for negative) and *digit (absolute
// value, in the range 0 .. 16).  Note that this integer essentially provides
// the input bits "shifted to the left" by one position: for example, the input
// to compute the least significant recoded digit, given that there's no bit
// b_-1, has to be b_4 b_3 b_2 b_1 b_0 0.
//
// DOUBLING CASE:
//
// Point addition formulas for short Weierstrass curves are often incomplete.
// Edge cases such as P + P or P + ∞ must be handled separately. This
// complicates constant-time requirements. P + ∞ cannot be avoided (any window
// may be zero) and is handled with constant-time selects. P + P (where P is not
// ∞) usually is not. Instead, windowing strategies are chosen to avoid this
// case. Whether this happens depends on the group order.
//
// Let w be the window width (in this function, w = 5). The non-trivial doubling
// case in single-point scalar multiplication may occur if and only if the
// 2^(w-1) bit of the group order is zero.
//
// Note the above only holds if the scalar is fully reduced and the group order
// is a prime that is much larger than 2^w. It also only holds when windows
// are applied from most significant to least significant, doubling between each
// window. It does not apply to more complex table strategies such as
// |EC_GFp_nistz256_method|.
//
// PROOF:
//
// Let n be the group order. Let l be the number of bits needed to represent n.
// Assume there exists some 0 <= k < n such that signed w-bit windowed
// multiplication hits the doubling case.
//
// Windowed multiplication consists of iterating over groups of s_i (defined
// above based on k's binary representation) from most to least significant. At
// iteration i (for i = ..., 3w, 2w, w, 0, starting from the most significant
// window), we:
//
//  1. Double the accumulator A, w times. Let A_i be the value of A at this
//     point.
//
//  2. Set A to T_i + A_i, where T_i is a precomputed multiple of P
//     corresponding to the window s_(i+w-1) ... s_i.
//
// Let j be the index such that A_j = T_j ≠ ∞. Looking at A_i and T_i as
// multiples of P, define a_i and t_i to be scalar coefficients of A_i and T_i.
// Thus a_j = t_j ≠ 0 (mod n). Note a_i and t_i may not be reduced mod n. t_i is
// the value of the w signed bits s_(i+w-1) ... s_i. a_i is computed as a_i =
// 2^w * (a_(i+w) + t_(i+w)).
//
// t_i is bounded by -2^(w-1) <= t_i <= 2^(w-1). Additionally, we may write it
// in terms of unsigned bits b_i. t_i consists of signed bits s_(i+w-1) ... s_i.
// This is computed as:
//
//         b_(i+w-2) b_(i+w-3)  ...  b_i      b_(i-1)
//      -  b_(i+w-1) b_(i+w-2)  ...  b_(i+1)  b_i
//       --------------------------------------------
//   t_i = s_(i+w-1) s_(i+w-2)  ...  s_(i+1)  s_i
//
// Observe that b_(i+w-2) through b_i occur in both terms. Let x be the integer
// represented by that bit string, i.e. 2^(w-2)*b_(i+w-2) + ... + b_i.
//
//   t_i = (2*x + b_(i-1)) - (2^(w-1)*b_(i+w-1) + x)
//       = x - 2^(w-1)*b_(i+w-1) + b_(i-1)
//
// Or, using C notation for bit operations:
//
//   t_i = (k>>i) & ((1<<(w-1)) - 1) - (k>>i) & (1<<(w-1)) + (k>>(i-1)) & 1
//
// Note b_(i-1) is added in left-shifted by one (or doubled) from its place.
// This is compensated by t_(i-w)'s subtraction term. Thus, a_i may be computed
// by adding b_l b_(l-1) ... b_(i+1) b_i and an extra copy of b_(i-1). In C
// notation, this is:
//
//   a_i = (k>>(i+w)) << w + ((k>>(i+w-1)) & 1) << w
//
// Observe that, while t_i may be positive or negative, a_i is bounded by
// 0 <= a_i < n + 2^w. Additionally, a_i can only be zero if b_(i+w-1) and up
// are all zero. (Note this implies a non-trivial P + (-P) is unreachable for
// all groups. That would imply the subsequent a_i is zero, which means all
// terms thus far were zero.)
//
// Returning to our doubling position, we have a_j = t_j (mod n). We now
// determine the value of a_j - t_j, which must be divisible by n. Our bounds on
// a_j and t_j imply a_j - t_j is 0 or n. If it is 0, a_j = t_j. However, 2^w
// divides a_j and -2^(w-1) <= t_j <= 2^(w-1), so this can only happen if
// a_j = t_j = 0, which is a trivial doubling. Therefore, a_j - t_j = n.
//
// Now we determine j. Suppose j > 0. w divides j, so j >= w. Then,
//
//   n = a_j - t_j = (k>>(j+w)) << w + ((k>>(j+w-1)) & 1) << w - t_j
//                <= k/2^j + 2^w - t_j
//                 < n/2^w + 2^w + 2^(w-1)
//
// n is much larger than 2^w, so this is impossible. Thus, j = 0: only the final
// addition may hit the doubling case.
//
// Finally, we consider bit patterns for n and k. Divide k into k_H + k_M + k_L
// such that k_H is the contribution from b_(l-1) .. b_w, k_M is the
// contribution from b_(w-1), and k_L is the contribution from b_(w-2) ... b_0.
// That is:
//
// - 2^w divides k_H
// - k_M is 0 or 2^(w-1)
// - 0 <= k_L < 2^(w-1)
//
// Divide n into n_H + n_M + n_L similarly. We thus have:
//
//   t_0 = (k>>0) & ((1<<(w-1)) - 1) - (k>>0) & (1<<(w-1)) + (k>>(0-1)) & 1
//       = k & ((1<<(w-1)) - 1) - k & (1<<(w-1))
//       = k_L - k_M
//
//   a_0 = (k>>(0+w)) << w + ((k>>(0+w-1)) & 1) << w
//       = (k>>w) << w + ((k>>(w-1)) & 1) << w
//       = k_H + 2*k_M
//
//                 n = a_0 - t_0
//   n_H + n_M + n_L = (k_H + 2*k_M) - (k_L - k_M)
//                   = k_H + 3*k_M - k_L
//
// k_H - k_L < k and k < n, so k_H - k_L ≠ n. Therefore k_M is not 0 and must be
// 2^(w-1). Now we consider k_H and n_H. We know k_H <= n_H. Suppose k_H = n_H.
// Then,
//
//   n_M + n_L = 3*(2^(w-1)) - k_L
//             > 3*(2^(w-1)) - 2^(w-1)
//             = 2^w
//
// Contradiction (n_M + n_L is the bottom w bits of n). Thus k_H < n_H. Suppose
// k_H < n_H - 2*2^w. Then,
//
//   n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L
//                   < n_H - 2*2^w + 3*(2^(w-1)) - k_L
//         n_M + n_L < -2^(w-1) - k_L
//
// Contradiction. Thus, k_H = n_H - 2^w. (Note 2^w divides n_H and k_H.) Thus,
//
//   n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L
//                   = n_H - 2^w + 3*(2^(w-1)) - k_L
//         n_M + n_L = 2^(w-1) - k_L
//                  <= 2^(w-1)
//
// Equality would mean 2^(w-1) divides n, which is impossible if n is prime.
// Thus n_M + n_L < 2^(w-1), so n_M is zero, proving our condition.
//
// This proof constructs k, so, to show the converse, let k_H = n_H - 2^w,
// k_M = 2^(w-1), k_L = 2^(w-1) - n_L. This will result in a non-trivial point
// doubling in the final addition and is the only such scalar.
//
// COMMON CURVES:
//
// The group orders for common curves end in the following bit patterns:
//
//   P-521: ...00001001; w = 4 is okay
//   P-384: ...01110011; w = 2, 5, 6, 7 are okay
//   P-256: ...01010001; w = 5, 7 are okay
//   P-224: ...00111101; w = 3, 4, 5, 6 are okay
void ec_GFp_nistp_recode_scalar_bits(crypto_word_t *sign, crypto_word_t *digit,
                                     crypto_word_t in) {
  crypto_word_t s, d;

  s = ~((in >> 5) - 1); /* sets all bits to MSB(in), 'in' seen as
                          * 6-bit value */
  d = (1 << 6) - in - 1;
  d = (d & s) | (in & ~s);
  d = (d >> 1) + (d & 1);

  *sign = s & 1;
  *digit = d;
}

Coverage Report

Created: 2024-11-21 07:03

Line	Count	Source
1		/* Copyright (c) 2015, Google Inc.
2		*
3		* Permission to use, copy, modify, and/or distribute this software for any
4		* purpose with or without fee is hereby granted, provided that the above
5		* copyright notice and this permission notice appear in all copies.
6		*
7		* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8		* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9		* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10		* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11		* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12		* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13		* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15		#include <openssl/base.h>
16
17		#include <openssl/ec.h>
18
19		#include "internal.h"
20
21
22		// This function looks at 5+1 scalar bits (5 current, 1 adjacent less
23		// significant bit), and recodes them into a signed digit for use in fast point
24		// multiplication: the use of signed rather than unsigned digits means that
25		// fewer points need to be precomputed, given that point inversion is easy (a
26		// precomputed point dP makes -dP available as well).
27		//
28		// BACKGROUND:
29		//
30		// Signed digits for multiplication were introduced by Booth ("A signed binary
31		// multiplication technique", Quart. Journ. Mech. and Applied Math., vol. IV,
32		// pt. 2 (1951), pp. 236-240), in that case for multiplication of integers.
33		// Booth's original encoding did not generally improve the density of nonzero
34		// digits over the binary representation, and was merely meant to simplify the
35		// handling of signed factors given in two's complement; but it has since been
36		// shown to be the basis of various signed-digit representations that do have
37		// further advantages, including the wNAF, using the following general
38		// approach:
39		//
40		// (1) Given a binary representation
41		//
42		// b_k ... b_2 b_1 b_0,
43		//
44		// of a nonnegative integer (b_k in {0, 1}), rewrite it in digits 0, 1, -1
45		// by using bit-wise subtraction as follows:
46		//
47		// b_k b_(k-1) ... b_2 b_1 b_0
48		// - b_k ... b_3 b_2 b_1 b_0
49		// -----------------------------------------
50		// s_(k+1) s_k ... s_3 s_2 s_1 s_0
51		//
52		// A left-shift followed by subtraction of the original value yields a new
53		// representation of the same value, using signed bits s_i = b_(i-1) - b_i.
54		// This representation from Booth's paper has since appeared in the
55		// literature under a variety of different names including "reversed binary
56		// form", "alternating greedy expansion", "mutual opposite form", and
57		// "sign-alternating {+-1}-representation".
58		//
59		// An interesting property is that among the nonzero bits, values 1 and -1
60		// strictly alternate.
61		//
62		// (2) Various window schemes can be applied to the Booth representation of
63		// integers: for example, right-to-left sliding windows yield the wNAF
64		// (a signed-digit encoding independently discovered by various researchers
65		// in the 1990s), and left-to-right sliding windows yield a left-to-right
66		// equivalent of the wNAF (independently discovered by various researchers
67		// around 2004).
68		//
69		// To prevent leaking information through side channels in point multiplication,
70		// we need to recode the given integer into a regular pattern: sliding windows
71		// as in wNAFs won't do, we need their fixed-window equivalent -- which is a few
72		// decades older: we'll be using the so-called "modified Booth encoding" due to
73		// MacSorley ("High-speed arithmetic in binary computers", Proc. IRE, vol. 49
74		// (1961), pp. 67-91), in a radix-2^5 setting. That is, we always combine five
75		// signed bits into a signed digit:
76		//
77		// s_(5j + 4) s_(5j + 3) s_(5j + 2) s_(5j + 1) s_(5j)
78		//
79		// The sign-alternating property implies that the resulting digit values are
80		// integers from -16 to 16.
81		//
82		// Of course, we don't actually need to compute the signed digits s_i as an
83		// intermediate step (that's just a nice way to see how this scheme relates
84		// to the wNAF): a direct computation obtains the recoded digit from the
85		// six bits b_(5j + 4) ... b_(5j - 1).
86		//
87		// This function takes those six bits as an integer (0 .. 63), writing the
88		// recoded digit to sign (0 for positive, 1 for negative) and digit (absolute
89		// value, in the range 0 .. 16). Note that this integer essentially provides
90		// the input bits "shifted to the left" by one position: for example, the input
91		// to compute the least significant recoded digit, given that there's no bit
92		// b_-1, has to be b_4 b_3 b_2 b_1 b_0 0.
93		//
94		// DOUBLING CASE:
95		//
96		// Point addition formulas for short Weierstrass curves are often incomplete.
97		// Edge cases such as P + P or P + ∞ must be handled separately. This
98		// complicates constant-time requirements. P + ∞ cannot be avoided (any window
99		// may be zero) and is handled with constant-time selects. P + P (where P is not
100		// ∞) usually is not. Instead, windowing strategies are chosen to avoid this
101		// case. Whether this happens depends on the group order.
102		//
103		// Let w be the window width (in this function, w = 5). The non-trivial doubling
104		// case in single-point scalar multiplication may occur if and only if the
105		// 2^(w-1) bit of the group order is zero.
106		//
107		// Note the above only holds if the scalar is fully reduced and the group order
108		// is a prime that is much larger than 2^w. It also only holds when windows
109		// are applied from most significant to least significant, doubling between each
110		// window. It does not apply to more complex table strategies such as
111		// \|EC_GFp_nistz256_method\|.
112		//
113		// PROOF:
114		//
115		// Let n be the group order. Let l be the number of bits needed to represent n.
116		// Assume there exists some 0 <= k < n such that signed w-bit windowed
117		// multiplication hits the doubling case.
118		//
119		// Windowed multiplication consists of iterating over groups of s_i (defined
120		// above based on k's binary representation) from most to least significant. At
121		// iteration i (for i = ..., 3w, 2w, w, 0, starting from the most significant
122		// window), we:
123		//
124		// 1. Double the accumulator A, w times. Let A_i be the value of A at this
125		// point.
126		//
127		// 2. Set A to T_i + A_i, where T_i is a precomputed multiple of P
128		// corresponding to the window s_(i+w-1) ... s_i.
129		//
130		// Let j be the index such that A_j = T_j ≠ ∞. Looking at A_i and T_i as
131		// multiples of P, define a_i and t_i to be scalar coefficients of A_i and T_i.
132		// Thus a_j = t_j ≠ 0 (mod n). Note a_i and t_i may not be reduced mod n. t_i is
133		// the value of the w signed bits s_(i+w-1) ... s_i. a_i is computed as a_i =
134		// 2^w * (a_(i+w) + t_(i+w)).
135		//
136		// t_i is bounded by -2^(w-1) <= t_i <= 2^(w-1). Additionally, we may write it
137		// in terms of unsigned bits b_i. t_i consists of signed bits s_(i+w-1) ... s_i.
138		// This is computed as:
139		//
140		// b_(i+w-2) b_(i+w-3) ... b_i b_(i-1)
141		// - b_(i+w-1) b_(i+w-2) ... b_(i+1) b_i
142		// --------------------------------------------
143		// t_i = s_(i+w-1) s_(i+w-2) ... s_(i+1) s_i
144		//
145		// Observe that b_(i+w-2) through b_i occur in both terms. Let x be the integer
146		// represented by that bit string, i.e. 2^(w-2)*b_(i+w-2) + ... + b_i.
147		//
148		// t_i = (2x + b_(i-1)) - (2^(w-1)b_(i+w-1) + x)
149		// = x - 2^(w-1)*b_(i+w-1) + b_(i-1)
150		//
151		// Or, using C notation for bit operations:
152		//
153		// t_i = (k>>i) & ((1<<(w-1)) - 1) - (k>>i) & (1<<(w-1)) + (k>>(i-1)) & 1
154		//
155		// Note b_(i-1) is added in left-shifted by one (or doubled) from its place.
156		// This is compensated by t_(i-w)'s subtraction term. Thus, a_i may be computed
157		// by adding b_l b_(l-1) ... b_(i+1) b_i and an extra copy of b_(i-1). In C
158		// notation, this is:
159		//
160		// a_i = (k>>(i+w)) << w + ((k>>(i+w-1)) & 1) << w
161		//
162		// Observe that, while t_i may be positive or negative, a_i is bounded by
163		// 0 <= a_i < n + 2^w. Additionally, a_i can only be zero if b_(i+w-1) and up
164		// are all zero. (Note this implies a non-trivial P + (-P) is unreachable for
165		// all groups. That would imply the subsequent a_i is zero, which means all
166		// terms thus far were zero.)
167		//
168		// Returning to our doubling position, we have a_j = t_j (mod n). We now
169		// determine the value of a_j - t_j, which must be divisible by n. Our bounds on
170		// a_j and t_j imply a_j - t_j is 0 or n. If it is 0, a_j = t_j. However, 2^w
171		// divides a_j and -2^(w-1) <= t_j <= 2^(w-1), so this can only happen if
172		// a_j = t_j = 0, which is a trivial doubling. Therefore, a_j - t_j = n.
173		//
174		// Now we determine j. Suppose j > 0. w divides j, so j >= w. Then,
175		//
176		// n = a_j - t_j = (k>>(j+w)) << w + ((k>>(j+w-1)) & 1) << w - t_j
177		// <= k/2^j + 2^w - t_j
178		// < n/2^w + 2^w + 2^(w-1)
179		//
180		// n is much larger than 2^w, so this is impossible. Thus, j = 0: only the final
181		// addition may hit the doubling case.
182		//
183		// Finally, we consider bit patterns for n and k. Divide k into k_H + k_M + k_L
184		// such that k_H is the contribution from b_(l-1) .. b_w, k_M is the
185		// contribution from b_(w-1), and k_L is the contribution from b_(w-2) ... b_0.
186		// That is:
187		//
188		// - 2^w divides k_H
189		// - k_M is 0 or 2^(w-1)
190		// - 0 <= k_L < 2^(w-1)
191		//
192		// Divide n into n_H + n_M + n_L similarly. We thus have:
193		//
194		// t_0 = (k>>0) & ((1<<(w-1)) - 1) - (k>>0) & (1<<(w-1)) + (k>>(0-1)) & 1
195		// = k & ((1<<(w-1)) - 1) - k & (1<<(w-1))
196		// = k_L - k_M
197		//
198		// a_0 = (k>>(0+w)) << w + ((k>>(0+w-1)) & 1) << w
199		// = (k>>w) << w + ((k>>(w-1)) & 1) << w
200		// = k_H + 2*k_M
201		//
202		// n = a_0 - t_0
203		// n_H + n_M + n_L = (k_H + 2*k_M) - (k_L - k_M)
204		// = k_H + 3*k_M - k_L
205		//
206		// k_H - k_L < k and k < n, so k_H - k_L ≠ n. Therefore k_M is not 0 and must be
207		// 2^(w-1). Now we consider k_H and n_H. We know k_H <= n_H. Suppose k_H = n_H.
208		// Then,
209		//
210		// n_M + n_L = 3*(2^(w-1)) - k_L
211		// > 3*(2^(w-1)) - 2^(w-1)
212		// = 2^w
213		//
214		// Contradiction (n_M + n_L is the bottom w bits of n). Thus k_H < n_H. Suppose
215		// k_H < n_H - 2*2^w. Then,
216		//
217		// n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L
218		// < n_H - 22^w + 3(2^(w-1)) - k_L
219		// n_M + n_L < -2^(w-1) - k_L
220		//
221		// Contradiction. Thus, k_H = n_H - 2^w. (Note 2^w divides n_H and k_H.) Thus,
222		//
223		// n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L
224		// = n_H - 2^w + 3*(2^(w-1)) - k_L
225		// n_M + n_L = 2^(w-1) - k_L
226		// <= 2^(w-1)
227		//
228		// Equality would mean 2^(w-1) divides n, which is impossible if n is prime.
229		// Thus n_M + n_L < 2^(w-1), so n_M is zero, proving our condition.
230		//
231		// This proof constructs k, so, to show the converse, let k_H = n_H - 2^w,
232		// k_M = 2^(w-1), k_L = 2^(w-1) - n_L. This will result in a non-trivial point
233		// doubling in the final addition and is the only such scalar.
234		//
235		// COMMON CURVES:
236		//
237		// The group orders for common curves end in the following bit patterns:
238		//
239		// P-521: ...00001001; w = 4 is okay
240		// P-384: ...01110011; w = 2, 5, 6, 7 are okay
241		// P-256: ...01010001; w = 5, 7 are okay
242		// P-224: ...00111101; w = 3, 4, 5, 6 are okay
243		void ec_GFp_nistp_recode_scalar_bits(crypto_word_t sign, crypto_word_t digit,
244	745	crypto_word_t in) {
245	745	crypto_word_t s, d;
246
247	745	s = ~((in >> 5) - 1); /* sets all bits to MSB(in), 'in' seen as
248		* 6-bit value */
249	745	d = (1 << 6) - in - 1;
250	745	d = (d & s) \| (in & ~s);
251	745	d = (d >> 1) + (d & 1);
252
253	745	*sign = s & 1;
254	745	*digit = d;
255	745	}