Coverage Report

Created: 2024-07-27 06:19

/src/openssh/sntrup761.c
Line
Count
Source (jump to first uncovered line)
1
/*  $OpenBSD: sntrup761.c,v 1.6 2023/01/11 02:13:52 djm Exp $ */
2
3
/*
4
 * Public Domain, Authors:
5
 * - Daniel J. Bernstein
6
 * - Chitchanok Chuengsatiansup
7
 * - Tanja Lange
8
 * - Christine van Vredendaal
9
 */
10
11
#include "includes.h"
12
13
#ifdef USE_SNTRUP761X25519
14
15
#include <string.h>
16
#include "crypto_api.h"
17
18
#define int8 crypto_int8
19
#define uint8 crypto_uint8
20
#define int16 crypto_int16
21
8.02G
#define uint16 crypto_uint16
22
4.01G
#define int32 crypto_int32
23
32.0G
#define uint32 crypto_uint32
24
#define int64 crypto_int64
25
#define uint64 crypto_uint64
26
27
/* from supercop-20201130/crypto_sort/int32/portable4/int32_minmax.inc */
28
13.1M
#define int32_MINMAX(a,b) \
29
13.1M
do { \
30
13.1M
  int64_t ab = (int64_t)b ^ (int64_t)a; \
31
13.1M
  int64_t c = (int64_t)b - (int64_t)a; \
32
13.1M
  c ^= ab & (c ^ b); \
33
13.1M
  c >>= 31; \
34
13.1M
  c &= ab; \
35
13.1M
  a ^= c; \
36
13.1M
  b ^= c; \
37
13.1M
} while(0)
38
39
/* from supercop-20201130/crypto_sort/int32/portable4/sort.c */
40
41
42
static void crypto_sort_int32(void *array,long long n)
43
782
{
44
782
  long long top,p,q,r,i,j;
45
782
  int32 *x = array;
46
47
782
  if (n < 2) return;
48
782
  top = 1;
49
7.82k
  while (top < n - top) top += top;
50
51
8.60k
  for (p = top;p >= 1;p >>= 1) {
52
7.82k
    i = 0;
53
597k
    while (i + 2 * p <= n) {
54
2.98M
      for (j = i;j < i + p;++j)
55
2.39M
        int32_MINMAX(x[j],x[j+p]);
56
589k
      i += 2 * p;
57
589k
    }
58
369k
    for (j = i;j < n - p;++j)
59
361k
      int32_MINMAX(x[j],x[j+p]);
60
61
7.82k
    i = 0;
62
7.82k
    j = 0;
63
43.0k
    for (q = top;q > p;q >>= 1) {
64
35.1k
      if (j != i) for (;;) {
65
19.5k
        if (j == n - q) goto done;
66
19.5k
        int32 a = x[j + p];
67
90.7k
        for (r = q;r > p;r >>= 1)
68
71.1k
          int32_MINMAX(a,x[j + r]);
69
19.5k
        x[j + p] = a;
70
19.5k
        ++j;
71
19.5k
        if (j == i + p) {
72
10.1k
          i += 2 * p;
73
10.1k
          break;
74
10.1k
        }
75
19.5k
      }
76
612k
      while (i + p <= n - q) {
77
2.74M
        for (j = i;j < i + p;++j) {
78
2.16M
          int32 a = x[j + p];
79
12.1M
          for (r = q;r > p;r >>= 1)
80
10.0M
            int32_MINMAX(a,x[j+r]);
81
2.16M
          x[j + p] = a;
82
2.16M
        }
83
577k
        i += 2 * p;
84
577k
      }
85
      /* now i + p > n - q */
86
35.1k
      j = i;
87
241k
      while (j < n - q) {
88
206k
        int32 a = x[j + p];
89
451k
        for (r = q;r > p;r >>= 1)
90
244k
          int32_MINMAX(a,x[j+r]);
91
206k
        x[j + p] = a;
92
206k
        ++j;
93
206k
      }
94
95
35.1k
      done: ;
96
35.1k
    }
97
7.82k
  }
98
782
}
99
100
/* from supercop-20201130/crypto_sort/uint32/useint32/sort.c */
101
102
/* can save time by vectorizing xor loops */
103
/* can save time by integrating xor loops with int32_sort */
104
105
static void crypto_sort_uint32(void *array,long long n)
106
782
{
107
782
  crypto_uint32 *x = array;
108
782
  long long j;
109
595k
  for (j = 0;j < n;++j) x[j] ^= 0x80000000;
110
782
  crypto_sort_int32(array,n);
111
595k
  for (j = 0;j < n;++j) x[j] ^= 0x80000000;
112
782
}
113
114
/* from supercop-20201130/crypto_kem/sntrup761/ref/uint32.c */
115
116
/*
117
CPU division instruction typically takes time depending on x.
118
This software is designed to take time independent of x.
119
Time still varies depending on m; user must ensure that m is constant.
120
Time also varies on CPUs where multiplication is variable-time.
121
There could be more CPU issues.
122
There could also be compiler issues.
123
*/
124
125
static void uint32_divmod_uint14(uint32 *q,uint16 *r,uint32 x,uint16 m)
126
8.01G
{
127
8.01G
  uint32 v = 0x80000000;
128
8.01G
  uint32 qpart;
129
8.01G
  uint32 mask;
130
131
8.01G
  v /= m;
132
133
  /* caller guarantees m > 0 */
134
  /* caller guarantees m < 16384 */
135
  /* vm <= 2^31 <= vm+m-1 */
136
  /* xvm <= 2^31 x <= xvm+x(m-1) */
137
138
8.01G
  *q = 0;
139
140
8.01G
  qpart = (x*(uint64)v)>>31;
141
  /* 2^31 qpart <= xv <= 2^31 qpart + 2^31-1 */
142
  /* 2^31 qpart m <= xvm <= 2^31 qpart m + (2^31-1)m */
143
  /* 2^31 qpart m <= 2^31 x <= 2^31 qpart m + (2^31-1)m + x(m-1) */
144
  /* 0 <= 2^31 newx <= (2^31-1)m + x(m-1) */
145
  /* 0 <= newx <= (1-1/2^31)m + x(m-1)/2^31 */
146
  /* 0 <= newx <= (1-1/2^31)(2^14-1) + (2^32-1)((2^14-1)-1)/2^31 */
147
148
8.01G
  x -= qpart*m; *q += qpart;
149
  /* x <= 49146 */
150
151
8.01G
  qpart = (x*(uint64)v)>>31;
152
  /* 0 <= newx <= (1-1/2^31)m + x(m-1)/2^31 */
153
  /* 0 <= newx <= m + 49146(2^14-1)/2^31 */
154
  /* 0 <= newx <= m + 0.4 */
155
  /* 0 <= newx <= m */
156
157
8.01G
  x -= qpart*m; *q += qpart;
158
  /* x <= m */
159
160
8.01G
  x -= m; *q += 1;
161
8.01G
  mask = -(x>>31);
162
8.01G
  x += mask&(uint32)m; *q += mask;
163
  /* x < m */
164
165
8.01G
  *r = x;
166
8.01G
}
167
168
169
static uint16 uint32_mod_uint14(uint32 x,uint16 m)
170
23.5k
{
171
23.5k
  uint32 q;
172
23.5k
  uint16 r;
173
23.5k
  uint32_divmod_uint14(&q,&r,x,m);
174
23.5k
  return r;
175
23.5k
}
176
177
/* from supercop-20201130/crypto_kem/sntrup761/ref/int32.c */
178
179
static void int32_divmod_uint14(int32 *q,uint16 *r,int32 x,uint16 m)
180
4.00G
{
181
4.00G
  uint32 uq,uq2;
182
4.00G
  uint16 ur,ur2;
183
4.00G
  uint32 mask;
184
185
4.00G
  uint32_divmod_uint14(&uq,&ur,0x80000000+(uint32)x,m);
186
4.00G
  uint32_divmod_uint14(&uq2,&ur2,0x80000000,m);
187
4.00G
  ur -= ur2; uq -= uq2;
188
4.00G
  mask = -(uint32)(ur>>15);
189
4.00G
  ur += mask&m; uq += mask;
190
4.00G
  *r = ur; *q = uq;
191
4.00G
}
192
193
194
static uint16 int32_mod_uint14(int32 x,uint16 m)
195
4.00G
{
196
4.00G
  int32 q;
197
4.00G
  uint16 r;
198
4.00G
  int32_divmod_uint14(&q,&r,x,m);
199
4.00G
  return r;
200
4.00G
}
201
202
/* from supercop-20201130/crypto_kem/sntrup761/ref/paramsmenu.h */
203
/* pick one of these three: */
204
#define SIZE761
205
#undef SIZE653
206
#undef SIZE857
207
208
/* pick one of these two: */
209
#define SNTRUP /* Streamlined NTRU Prime */
210
#undef LPR /* NTRU LPRime */
211
212
/* from supercop-20201130/crypto_kem/sntrup761/ref/params.h */
213
#ifndef params_H
214
#define params_H
215
216
/* menu of parameter choices: */
217
218
219
/* what the menu means: */
220
221
#if defined(SIZE761)
222
7.33G
#define p 761
223
6.72G
#define q 4591
224
26.0k
#define Rounded_bytes 1007
225
#ifndef LPR
226
885k
#define Rq_bytes 1158
227
226k
#define w 286
228
#else
229
#define w 250
230
#define tau0 2156
231
#define tau1 114
232
#define tau2 2007
233
#define tau3 287
234
#endif
235
236
#elif defined(SIZE653)
237
#define p 653
238
#define q 4621
239
#define Rounded_bytes 865
240
#ifndef LPR
241
#define Rq_bytes 994
242
#define w 288
243
#else
244
#define w 252
245
#define tau0 2175
246
#define tau1 113
247
#define tau2 2031
248
#define tau3 290
249
#endif
250
251
#elif defined(SIZE857)
252
#define p 857
253
#define q 5167
254
#define Rounded_bytes 1152
255
#ifndef LPR
256
#define Rq_bytes 1322
257
#define w 322
258
#else
259
#define w 281
260
#define tau0 2433
261
#define tau1 101
262
#define tau2 2265
263
#define tau3 324
264
#endif
265
266
#else
267
#error "no parameter set defined"
268
#endif
269
270
#ifdef LPR
271
#define I 256
272
#endif
273
274
#endif
275
276
/* from supercop-20201130/crypto_kem/sntrup761/ref/Decode.h */
277
#ifndef Decode_H
278
#define Decode_H
279
280
281
/* Decode(R,s,M,len) */
282
/* assumes 0 < M[i] < 16384 */
283
/* produces 0 <= R[i] < M[i] */
284
285
#endif
286
287
/* from supercop-20201130/crypto_kem/sntrup761/ref/Decode.c */
288
289
static void Decode(uint16 *out,const unsigned char *S,const uint16 *M,long long len)
290
341
{
291
341
  if (len == 1) {
292
31
    if (M[0] == 1)
293
0
      *out = 0;
294
31
    else if (M[0] <= 256)
295
0
      *out = uint32_mod_uint14(S[0],M[0]);
296
31
    else
297
31
      *out = uint32_mod_uint14(S[0]+(((uint16)S[1])<<8),M[0]);
298
31
  }
299
341
  if (len > 1) {
300
310
    uint16 R2[(len+1)/2];
301
310
    uint16 M2[(len+1)/2];
302
310
    uint16 bottomr[len/2];
303
310
    uint32 bottomt[len/2];
304
310
    long long i;
305
23.8k
    for (i = 0;i < len-1;i += 2) {
306
23.5k
      uint32 m = M[i]*(uint32) M[i+1];
307
23.5k
      if (m > 256*16383) {
308
11.3k
        bottomt[i/2] = 256*256;
309
11.3k
        bottomr[i/2] = S[0]+256*S[1];
310
11.3k
        S += 2;
311
11.3k
        M2[i/2] = (((m+255)>>8)+255)>>8;
312
12.1k
      } else if (m >= 16384) {
313
12.1k
        bottomt[i/2] = 256;
314
12.1k
        bottomr[i/2] = S[0];
315
12.1k
        S += 1;
316
12.1k
        M2[i/2] = (m+255)>>8;
317
12.1k
      } else {
318
0
        bottomt[i/2] = 1;
319
0
        bottomr[i/2] = 0;
320
0
        M2[i/2] = m;
321
0
      }
322
23.5k
    }
323
310
    if (i < len)
324
124
      M2[i/2] = M[i];
325
310
    Decode(R2,S,M2,(len+1)/2);
326
23.8k
    for (i = 0;i < len-1;i += 2) {
327
23.5k
      uint32 r = bottomr[i/2];
328
23.5k
      uint32 r1;
329
23.5k
      uint16 r0;
330
23.5k
      r += bottomt[i/2]*R2[i/2];
331
23.5k
      uint32_divmod_uint14(&r1,&r0,r,M[i]);
332
23.5k
      r1 = uint32_mod_uint14(r1,M[i+1]); /* only needed for invalid inputs */
333
23.5k
      *out++ = r0;
334
23.5k
      *out++ = r1;
335
23.5k
    }
336
310
    if (i < len)
337
124
      *out++ = R2[i/2];
338
310
  }
339
341
}
340
341
/* from supercop-20201130/crypto_kem/sntrup761/ref/Encode.h */
342
#ifndef Encode_H
343
#define Encode_H
344
345
346
/* Encode(s,R,M,len) */
347
/* assumes 0 <= R[i] < M[i] < 16384 */
348
349
#endif
350
351
/* from supercop-20201130/crypto_kem/sntrup761/ref/Encode.c */
352
353
/* 0 <= R[i] < M[i] < 16384 */
354
static void Encode(unsigned char *out,const uint16 *R,const uint16 *M,long long len)
355
8.66k
{
356
8.66k
  if (len == 1) {
357
788
    uint16 r = R[0];
358
788
    uint16 m = M[0];
359
2.36k
    while (m > 1) {
360
1.57k
      *out++ = r;
361
1.57k
      r >>= 8;
362
1.57k
      m = (m+255)>>8;
363
1.57k
    }
364
788
  }
365
8.66k
  if (len > 1) {
366
7.88k
    uint16 R2[(len+1)/2];
367
7.88k
    uint16 M2[(len+1)/2];
368
7.88k
    long long i;
369
606k
    for (i = 0;i < len-1;i += 2) {
370
598k
      uint32 m0 = M[i];
371
598k
      uint32 r = R[i]+R[i+1]*m0;
372
598k
      uint32 m = M[i+1]*m0;
373
1.50M
      while (m >= 16384) {
374
907k
        *out++ = r;
375
907k
        r >>= 8;
376
907k
        m = (m+255)>>8;
377
907k
      }
378
598k
      R2[i/2] = r;
379
598k
      M2[i/2] = m;
380
598k
    }
381
7.88k
    if (i < len) {
382
3.15k
      R2[i/2] = R[i];
383
3.15k
      M2[i/2] = M[i];
384
3.15k
    }
385
7.88k
    Encode(out,R2,M2,(len+1)/2);
386
7.88k
  }
387
8.66k
}
388
389
/* from supercop-20201130/crypto_kem/sntrup761/ref/kem.c */
390
391
#ifdef LPR
392
#endif
393
394
395
/* ----- masks */
396
397
#ifndef LPR
398
399
/* return -1 if x!=0; else return 0 */
400
static int int16_nonzero_mask(int16 x)
401
2.32M
{
402
2.32M
  uint16 u = x; /* 0, else 1...65535 */
403
2.32M
  uint32 v = u; /* 0, else 1...65535 */
404
2.32M
  v = -v; /* 0, else 2^32-65535...2^32-1 */
405
2.32M
  v >>= 31; /* 0, else 1 */
406
2.32M
  return -v; /* 0, else -1 */
407
2.32M
}
408
409
#endif
410
411
/* return -1 if x<0; otherwise return 0 */
412
static int int16_negative_mask(int16 x)
413
2.32M
{
414
2.32M
  uint16 u = x;
415
2.32M
  u >>= 15;
416
2.32M
  return -(int) u;
417
  /* alternative with gcc -fwrapv: */
418
  /* x>>15 compiles to CPU's arithmetic right shift */
419
2.32M
}
420
421
/* ----- arithmetic mod 3 */
422
423
typedef int8 small;
424
425
/* F3 is always represented as -1,0,1 */
426
/* so ZZ_fromF3 is a no-op */
427
428
/* x must not be close to top int16 */
429
static small F3_freeze(int16 x)
430
1.77G
{
431
1.77G
  return int32_mod_uint14(x+1,3)-1;
432
1.77G
}
433
434
/* ----- arithmetic mod q */
435
436
4.47G
#define q12 ((q-1)/2)
437
typedef int16 Fq;
438
/* always represented as -q12...q12 */
439
/* so ZZ_fromFq is a no-op */
440
441
/* x must not be close to top int32 */
442
static Fq Fq_freeze(int32 x)
443
2.23G
{
444
2.23G
  return int32_mod_uint14(x+q12,q)-q12;
445
2.23G
}
446
447
#ifndef LPR
448
449
static Fq Fq_recip(Fq a1)
450
1.52k
{
451
1.52k
  int i = 1;
452
1.52k
  Fq ai = a1;
453
454
7.00M
  while (i < q-2) {
455
7.00M
    ai = Fq_freeze(a1*(int32)ai);
456
7.00M
    i += 1;
457
7.00M
  }
458
1.52k
  return ai;
459
1.52k
}
460
461
#endif
462
463
/* ----- Top and Right */
464
465
#ifdef LPR
466
#define tau 16
467
468
static int8 Top(Fq C)
469
{
470
  return (tau1*(int32)(C+tau0)+16384)>>15;
471
}
472
473
static Fq Right(int8 T)
474
{
475
  return Fq_freeze(tau3*(int32)T-tau2);
476
}
477
#endif
478
479
/* ----- small polynomials */
480
481
#ifndef LPR
482
483
/* 0 if Weightw_is(r), else -1 */
484
static int Weightw_mask(small *r)
485
6
{
486
6
  int weight = 0;
487
6
  int i;
488
489
4.57k
  for (i = 0;i < p;++i) weight += r[i]&1;
490
6
  return int16_nonzero_mask(weight-w);
491
6
}
492
493
/* R3_fromR(R_fromRq(r)) */
494
static void R3_fromRq(small *out,const Fq *r)
495
6
{
496
6
  int i;
497
4.57k
  for (i = 0;i < p;++i) out[i] = F3_freeze(r[i]);
498
6
}
499
500
/* h = f*g in the ring R3 */
501
static void R3_mult(small *h,const small *f,const small *g)
502
6
{
503
6
  small fg[p+p-1];
504
6
  small result;
505
6
  int i,j;
506
507
4.57k
  for (i = 0;i < p;++i) {
508
4.56k
    result = 0;
509
1.74M
    for (j = 0;j <= i;++j) result = F3_freeze(result+f[j]*g[i-j]);
510
4.56k
    fg[i] = result;
511
4.56k
  }
512
4.56k
  for (i = p;i < p+p-1;++i) {
513
4.56k
    result = 0;
514
1.73M
    for (j = i-p+1;j < p;++j) result = F3_freeze(result+f[j]*g[i-j]);
515
4.56k
    fg[i] = result;
516
4.56k
  }
517
518
4.56k
  for (i = p+p-2;i >= p;--i) {
519
4.56k
    fg[i-p] = F3_freeze(fg[i-p]+fg[i]);
520
4.56k
    fg[i-p+1] = F3_freeze(fg[i-p+1]+fg[i]);
521
4.56k
  }
522
523
4.57k
  for (i = 0;i < p;++i) h[i] = fg[i];
524
6
}
525
526
/* returns 0 if recip succeeded; else -1 */
527
static int R3_recip(small *out,const small *in)
528
763
{
529
763
  small f[p+1],g[p+1],v[p+1],r[p+1];
530
763
  int i,loop,delta;
531
763
  int sign,swap,t;
532
533
582k
  for (i = 0;i < p+1;++i) v[i] = 0;
534
582k
  for (i = 0;i < p+1;++i) r[i] = 0;
535
763
  r[0] = 1;
536
581k
  for (i = 0;i < p;++i) f[i] = 0;
537
763
  f[0] = 1; f[p-1] = f[p] = -1;
538
581k
  for (i = 0;i < p;++i) g[p-1-i] = in[i];
539
763
  g[p] = 0;
540
541
763
  delta = 1;
542
543
1.16M
  for (loop = 0;loop < 2*p-1;++loop) {
544
884M
    for (i = p;i > 0;--i) v[i] = v[i-1];
545
1.16M
    v[0] = 0;
546
547
1.16M
    sign = -g[0]*f[0];
548
1.16M
    swap = int16_negative_mask(-delta) & int16_nonzero_mask(g[0]);
549
1.16M
    delta ^= swap&(delta^-delta);
550
1.16M
    delta += 1;
551
552
885M
    for (i = 0;i < p+1;++i) {
553
884M
      t = swap&(f[i]^g[i]); f[i] ^= t; g[i] ^= t;
554
884M
      t = swap&(v[i]^r[i]); v[i] ^= t; r[i] ^= t;
555
884M
    }
556
557
885M
    for (i = 0;i < p+1;++i) g[i] = F3_freeze(g[i]+sign*f[i]);
558
885M
    for (i = 0;i < p+1;++i) r[i] = F3_freeze(r[i]+sign*v[i]);
559
560
884M
    for (i = 0;i < p;++i) g[i] = g[i+1];
561
1.16M
    g[p] = 0;
562
1.16M
  }
563
564
763
  sign = f[0];
565
581k
  for (i = 0;i < p;++i) out[i] = sign*v[p-1-i];
566
567
763
  return int16_nonzero_mask(delta);
568
763
}
569
570
#endif
571
572
/* ----- polynomials mod q */
573
574
/* h = f*g in the ring Rq */
575
static void Rq_mult_small(Fq *h,const Fq *f,const small *g)
576
794
{
577
794
  Fq fg[p+p-1];
578
794
  Fq result;
579
794
  int i,j;
580
581
605k
  for (i = 0;i < p;++i) {
582
604k
    result = 0;
583
230M
    for (j = 0;j <= i;++j) result = Fq_freeze(result+f[j]*(int32)g[i-j]);
584
604k
    fg[i] = result;
585
604k
  }
586
604k
  for (i = p;i < p+p-1;++i) {
587
603k
    result = 0;
588
230M
    for (j = i-p+1;j < p;++j) result = Fq_freeze(result+f[j]*(int32)g[i-j]);
589
603k
    fg[i] = result;
590
603k
  }
591
592
604k
  for (i = p+p-2;i >= p;--i) {
593
603k
    fg[i-p] = Fq_freeze(fg[i-p]+fg[i]);
594
603k
    fg[i-p+1] = Fq_freeze(fg[i-p+1]+fg[i]);
595
603k
  }
596
597
605k
  for (i = 0;i < p;++i) h[i] = fg[i];
598
794
}
599
600
#ifndef LPR
601
602
/* h = 3f in Rq */
603
static void Rq_mult3(Fq *h,const Fq *f)
604
6
{
605
6
  int i;
606
607
4.57k
  for (i = 0;i < p;++i) h[i] = Fq_freeze(3*f[i]);
608
6
}
609
610
/* out = 1/(3*in) in Rq */
611
/* returns 0 if recip succeeded; else -1 */
612
static int Rq_recip3(Fq *out,const small *in)
613
763
{
614
763
  Fq f[p+1],g[p+1],v[p+1],r[p+1];
615
763
  int i,loop,delta;
616
763
  int swap,t;
617
763
  int32 f0,g0;
618
763
  Fq scale;
619
620
582k
  for (i = 0;i < p+1;++i) v[i] = 0;
621
582k
  for (i = 0;i < p+1;++i) r[i] = 0;
622
763
  r[0] = Fq_recip(3);
623
581k
  for (i = 0;i < p;++i) f[i] = 0;
624
763
  f[0] = 1; f[p-1] = f[p] = -1;
625
581k
  for (i = 0;i < p;++i) g[p-1-i] = in[i];
626
763
  g[p] = 0;
627
628
763
  delta = 1;
629
630
1.16M
  for (loop = 0;loop < 2*p-1;++loop) {
631
884M
    for (i = p;i > 0;--i) v[i] = v[i-1];
632
1.16M
    v[0] = 0;
633
634
1.16M
    swap = int16_negative_mask(-delta) & int16_nonzero_mask(g[0]);
635
1.16M
    delta ^= swap&(delta^-delta);
636
1.16M
    delta += 1;
637
638
885M
    for (i = 0;i < p+1;++i) {
639
884M
      t = swap&(f[i]^g[i]); f[i] ^= t; g[i] ^= t;
640
884M
      t = swap&(v[i]^r[i]); v[i] ^= t; r[i] ^= t;
641
884M
    }
642
643
1.16M
    f0 = f[0];
644
1.16M
    g0 = g[0];
645
885M
    for (i = 0;i < p+1;++i) g[i] = Fq_freeze(f0*g[i]-g0*f[i]);
646
885M
    for (i = 0;i < p+1;++i) r[i] = Fq_freeze(f0*r[i]-g0*v[i]);
647
648
884M
    for (i = 0;i < p;++i) g[i] = g[i+1];
649
1.16M
    g[p] = 0;
650
1.16M
  }
651
652
763
  scale = Fq_recip(f[0]);
653
581k
  for (i = 0;i < p;++i) out[i] = Fq_freeze(scale*(int32)v[p-1-i]);
654
655
763
  return int16_nonzero_mask(delta);
656
763
}
657
658
#endif
659
660
/* ----- rounded polynomials mod q */
661
662
static void Round(Fq *out,const Fq *a)
663
25
{
664
25
  int i;
665
19.0k
  for (i = 0;i < p;++i) out[i] = a[i]-F3_freeze(a[i]);
666
25
}
667
668
/* ----- sorting to generate short polynomial */
669
670
static void Short_fromlist(small *out,const uint32 *in)
671
782
{
672
782
  uint32 L[p];
673
782
  int i;
674
675
224k
  for (i = 0;i < w;++i) L[i] = in[i]&(uint32)-2;
676
372k
  for (i = w;i < p;++i) L[i] = (in[i]&(uint32)-3)|1;
677
782
  crypto_sort_uint32(L,p);
678
595k
  for (i = 0;i < p;++i) out[i] = (L[i]&3)-1;
679
782
}
680
681
/* ----- underlying hash function */
682
683
27.6k
#define Hash_bytes 32
684
685
/* e.g., b = 0 means out = Hash0(in) */
686
static void Hash_prefix(unsigned char *out,int b,const unsigned char *in,int inlen)
687
882
{
688
882
  unsigned char x[inlen+1];
689
882
  unsigned char h[64];
690
882
  int i;
691
692
882
  x[0] = b;
693
944k
  for (i = 0;i < inlen;++i) x[i+1] = in[i];
694
882
  crypto_hash_sha512(h,x,inlen+1);
695
29.1k
  for (i = 0;i < 32;++i) out[i] = h[i];
696
882
}
697
698
/* ----- higher-level randomness */
699
700
static uint32 urandom32(void)
701
1.17M
{
702
1.17M
  unsigned char c[4];
703
1.17M
  uint32 out[4];
704
705
1.17M
  randombytes(c,4);
706
1.17M
  out[0] = (uint32)c[0];
707
1.17M
  out[1] = ((uint32)c[1])<<8;
708
1.17M
  out[2] = ((uint32)c[2])<<16;
709
1.17M
  out[3] = ((uint32)c[3])<<24;
710
1.17M
  return out[0]+out[1]+out[2]+out[3];
711
1.17M
}
712
713
static void Short_random(small *out)
714
782
{
715
782
  uint32 L[p];
716
782
  int i;
717
718
595k
  for (i = 0;i < p;++i) L[i] = urandom32();
719
782
  Short_fromlist(out,L);
720
782
}
721
722
#ifndef LPR
723
724
static void Small_random(small *out)
725
763
{
726
763
  int i;
727
728
581k
  for (i = 0;i < p;++i) out[i] = (((urandom32()&0x3fffffff)*3)>>30)-1;
729
763
}
730
731
#endif
732
733
/* ----- Streamlined NTRU Prime Core */
734
735
#ifndef LPR
736
737
/* h,(f,ginv) = KeyGen() */
738
static void KeyGen(Fq *h,small *f,small *ginv)
739
763
{
740
763
  small g[p];
741
763
  Fq finv[p];
742
743
763
  for (;;) {
744
763
    Small_random(g);
745
763
    if (R3_recip(ginv,g) == 0) break;
746
763
  }
747
763
  Short_random(f);
748
763
  Rq_recip3(finv,f); /* always works */
749
763
  Rq_mult_small(h,finv,g);
750
763
}
751
752
/* c = Encrypt(r,h) */
753
static void Encrypt(Fq *c,const small *r,const Fq *h)
754
25
{
755
25
  Fq hr[p];
756
757
25
  Rq_mult_small(hr,h,r);
758
25
  Round(c,hr);
759
25
}
760
761
/* r = Decrypt(c,(f,ginv)) */
762
static void Decrypt(small *r,const Fq *c,const small *f,const small *ginv)
763
6
{
764
6
  Fq cf[p];
765
6
  Fq cf3[p];
766
6
  small e[p];
767
6
  small ev[p];
768
6
  int mask;
769
6
  int i;
770
771
6
  Rq_mult_small(cf,c,f);
772
6
  Rq_mult3(cf3,cf);
773
6
  R3_fromRq(e,cf3);
774
6
  R3_mult(ev,e,ginv);
775
776
6
  mask = Weightw_mask(ev); /* 0 if weight w, else -1 */
777
1.72k
  for (i = 0;i < w;++i) r[i] = ((ev[i]^1)&~mask)^1;
778
2.85k
  for (i = w;i < p;++i) r[i] = ev[i]&~mask;
779
6
}
780
781
#endif
782
783
/* ----- NTRU LPRime Core */
784
785
#ifdef LPR
786
787
/* (G,A),a = KeyGen(G); leaves G unchanged */
788
static void KeyGen(Fq *A,small *a,const Fq *G)
789
{
790
  Fq aG[p];
791
792
  Short_random(a);
793
  Rq_mult_small(aG,G,a);
794
  Round(A,aG);
795
}
796
797
/* B,T = Encrypt(r,(G,A),b) */
798
static void Encrypt(Fq *B,int8 *T,const int8 *r,const Fq *G,const Fq *A,const small *b)
799
{
800
  Fq bG[p];
801
  Fq bA[p];
802
  int i;
803
804
  Rq_mult_small(bG,G,b);
805
  Round(B,bG);
806
  Rq_mult_small(bA,A,b);
807
  for (i = 0;i < I;++i) T[i] = Top(Fq_freeze(bA[i]+r[i]*q12));
808
}
809
810
/* r = Decrypt((B,T),a) */
811
static void Decrypt(int8 *r,const Fq *B,const int8 *T,const small *a)
812
{
813
  Fq aB[p];
814
  int i;
815
816
  Rq_mult_small(aB,B,a);
817
  for (i = 0;i < I;++i)
818
    r[i] = -int16_negative_mask(Fq_freeze(Right(T[i])-aB[i]+4*w+1));
819
}
820
821
#endif
822
823
/* ----- encoding I-bit inputs */
824
825
#ifdef LPR
826
827
#define Inputs_bytes (I/8)
828
typedef int8 Inputs[I]; /* passed by reference */
829
830
static void Inputs_encode(unsigned char *s,const Inputs r)
831
{
832
  int i;
833
  for (i = 0;i < Inputs_bytes;++i) s[i] = 0;
834
  for (i = 0;i < I;++i) s[i>>3] |= r[i]<<(i&7);
835
}
836
837
#endif
838
839
/* ----- Expand */
840
841
#ifdef LPR
842
843
static const unsigned char aes_nonce[16] = {0};
844
845
static void Expand(uint32 *L,const unsigned char *k)
846
{
847
  int i;
848
  crypto_stream_aes256ctr((unsigned char *) L,4*p,aes_nonce,k);
849
  for (i = 0;i < p;++i) {
850
    uint32 L0 = ((unsigned char *) L)[4*i];
851
    uint32 L1 = ((unsigned char *) L)[4*i+1];
852
    uint32 L2 = ((unsigned char *) L)[4*i+2];
853
    uint32 L3 = ((unsigned char *) L)[4*i+3];
854
    L[i] = L0+(L1<<8)+(L2<<16)+(L3<<24);
855
  }
856
}
857
858
#endif
859
860
/* ----- Seeds */
861
862
#ifdef LPR
863
864
#define Seeds_bytes 32
865
866
static void Seeds_random(unsigned char *s)
867
{
868
  randombytes(s,Seeds_bytes);
869
}
870
871
#endif
872
873
/* ----- Generator, HashShort */
874
875
#ifdef LPR
876
877
/* G = Generator(k) */
878
static void Generator(Fq *G,const unsigned char *k)
879
{
880
  uint32 L[p];
881
  int i;
882
883
  Expand(L,k);
884
  for (i = 0;i < p;++i) G[i] = uint32_mod_uint14(L[i],q)-q12;
885
}
886
887
/* out = HashShort(r) */
888
static void HashShort(small *out,const Inputs r)
889
{
890
  unsigned char s[Inputs_bytes];
891
  unsigned char h[Hash_bytes];
892
  uint32 L[p];
893
894
  Inputs_encode(s,r);
895
  Hash_prefix(h,5,s,sizeof s);
896
  Expand(L,h);
897
  Short_fromlist(out,L);
898
}
899
900
#endif
901
902
/* ----- NTRU LPRime Expand */
903
904
#ifdef LPR
905
906
/* (S,A),a = XKeyGen() */
907
static void XKeyGen(unsigned char *S,Fq *A,small *a)
908
{
909
  Fq G[p];
910
911
  Seeds_random(S);
912
  Generator(G,S);
913
  KeyGen(A,a,G);
914
}
915
916
/* B,T = XEncrypt(r,(S,A)) */
917
static void XEncrypt(Fq *B,int8 *T,const int8 *r,const unsigned char *S,const Fq *A)
918
{
919
  Fq G[p];
920
  small b[p];
921
922
  Generator(G,S);
923
  HashShort(b,r);
924
  Encrypt(B,T,r,G,A,b);
925
}
926
927
#define XDecrypt Decrypt
928
929
#endif
930
931
/* ----- encoding small polynomials (including short polynomials) */
932
933
3.50k
#define Small_bytes ((p+3)/4)
934
935
/* these are the only functions that rely on p mod 4 = 1 */
936
937
static void Small_encode(unsigned char *s,const small *f)
938
1.55k
{
939
1.55k
  small x;
940
1.55k
  int i;
941
942
296k
  for (i = 0;i < p/4;++i) {
943
294k
    x = *f++ + 1;
944
294k
    x += (*f++ + 1)<<2;
945
294k
    x += (*f++ + 1)<<4;
946
294k
    x += (*f++ + 1)<<6;
947
294k
    *s++ = x;
948
294k
  }
949
1.55k
  x = *f++ + 1;
950
1.55k
  *s++ = x;
951
1.55k
}
952
953
static void Small_decode(small *f,const unsigned char *s)
954
12
{
955
12
  unsigned char x;
956
12
  int i;
957
958
2.29k
  for (i = 0;i < p/4;++i) {
959
2.28k
    x = *s++;
960
2.28k
    *f++ = ((small)(x&3))-1; x >>= 2;
961
2.28k
    *f++ = ((small)(x&3))-1; x >>= 2;
962
2.28k
    *f++ = ((small)(x&3))-1; x >>= 2;
963
2.28k
    *f++ = ((small)(x&3))-1;
964
2.28k
  }
965
12
  x = *s++;
966
12
  *f++ = ((small)(x&3))-1;
967
12
}
968
969
/* ----- encoding general polynomials */
970
971
#ifndef LPR
972
973
static void Rq_encode(unsigned char *s,const Fq *r)
974
763
{
975
763
  uint16 R[p],M[p];
976
763
  int i;
977
978
581k
  for (i = 0;i < p;++i) R[i] = r[i]+q12;
979
581k
  for (i = 0;i < p;++i) M[i] = q;
980
763
  Encode(s,R,M,p);
981
763
}
982
983
static void Rq_decode(Fq *r,const unsigned char *s)
984
25
{
985
25
  uint16 R[p],M[p];
986
25
  int i;
987
988
19.0k
  for (i = 0;i < p;++i) M[i] = q;
989
25
  Decode(R,s,M,p);
990
19.0k
  for (i = 0;i < p;++i) r[i] = ((Fq)R[i])-q12;
991
25
}
992
993
#endif
994
995
/* ----- encoding rounded polynomials */
996
997
static void Rounded_encode(unsigned char *s,const Fq *r)
998
25
{
999
25
  uint16 R[p],M[p];
1000
25
  int i;
1001
1002
19.0k
  for (i = 0;i < p;++i) R[i] = ((r[i]+q12)*10923)>>15;
1003
19.0k
  for (i = 0;i < p;++i) M[i] = (q+2)/3;
1004
25
  Encode(s,R,M,p);
1005
25
}
1006
1007
static void Rounded_decode(Fq *r,const unsigned char *s)
1008
6
{
1009
6
  uint16 R[p],M[p];
1010
6
  int i;
1011
1012
4.57k
  for (i = 0;i < p;++i) M[i] = (q+2)/3;
1013
6
  Decode(R,s,M,p);
1014
4.57k
  for (i = 0;i < p;++i) r[i] = R[i]*3-q12;
1015
6
}
1016
1017
/* ----- encoding top polynomials */
1018
1019
#ifdef LPR
1020
1021
#define Top_bytes (I/2)
1022
1023
static void Top_encode(unsigned char *s,const int8 *T)
1024
{
1025
  int i;
1026
  for (i = 0;i < Top_bytes;++i)
1027
    s[i] = T[2*i]+(T[2*i+1]<<4);
1028
}
1029
1030
static void Top_decode(int8 *T,const unsigned char *s)
1031
{
1032
  int i;
1033
  for (i = 0;i < Top_bytes;++i) {
1034
    T[2*i] = s[i]&15;
1035
    T[2*i+1] = s[i]>>4;
1036
  }
1037
}
1038
1039
#endif
1040
1041
/* ----- Streamlined NTRU Prime Core plus encoding */
1042
1043
#ifndef LPR
1044
1045
typedef small Inputs[p]; /* passed by reference */
1046
19
#define Inputs_random Short_random
1047
25
#define Inputs_encode Small_encode
1048
1.97k
#define Inputs_bytes Small_bytes
1049
1050
26.0k
#define Ciphertexts_bytes Rounded_bytes
1051
769
#define SecretKeys_bytes (2*Small_bytes)
1052
885k
#define PublicKeys_bytes Rq_bytes
1053
1054
/* pk,sk = ZKeyGen() */
1055
static void ZKeyGen(unsigned char *pk,unsigned char *sk)
1056
763
{
1057
763
  Fq h[p];
1058
763
  small f[p],v[p];
1059
1060
763
  KeyGen(h,f,v);
1061
763
  Rq_encode(pk,h);
1062
763
  Small_encode(sk,f); sk += Small_bytes;
1063
763
  Small_encode(sk,v);
1064
763
}
1065
1066
/* C = ZEncrypt(r,pk) */
1067
static void ZEncrypt(unsigned char *C,const Inputs r,const unsigned char *pk)
1068
25
{
1069
25
  Fq h[p];
1070
25
  Fq c[p];
1071
25
  Rq_decode(h,pk);
1072
25
  Encrypt(c,r,h);
1073
25
  Rounded_encode(C,c);
1074
25
}
1075
1076
/* r = ZDecrypt(C,sk) */
1077
static void ZDecrypt(Inputs r,const unsigned char *C,const unsigned char *sk)
1078
6
{
1079
6
  small f[p],v[p];
1080
6
  Fq c[p];
1081
1082
6
  Small_decode(f,sk); sk += Small_bytes;
1083
6
  Small_decode(v,sk);
1084
6
  Rounded_decode(c,C);
1085
6
  Decrypt(r,c,f,v);
1086
6
}
1087
1088
#endif
1089
1090
/* ----- NTRU LPRime Expand plus encoding */
1091
1092
#ifdef LPR
1093
1094
#define Ciphertexts_bytes (Rounded_bytes+Top_bytes)
1095
#define SecretKeys_bytes Small_bytes
1096
#define PublicKeys_bytes (Seeds_bytes+Rounded_bytes)
1097
1098
static void Inputs_random(Inputs r)
1099
{
1100
  unsigned char s[Inputs_bytes];
1101
  int i;
1102
1103
  randombytes(s,sizeof s);
1104
  for (i = 0;i < I;++i) r[i] = 1&(s[i>>3]>>(i&7));
1105
}
1106
1107
/* pk,sk = ZKeyGen() */
1108
static void ZKeyGen(unsigned char *pk,unsigned char *sk)
1109
{
1110
  Fq A[p];
1111
  small a[p];
1112
1113
  XKeyGen(pk,A,a); pk += Seeds_bytes;
1114
  Rounded_encode(pk,A);
1115
  Small_encode(sk,a);
1116
}
1117
1118
/* c = ZEncrypt(r,pk) */
1119
static void ZEncrypt(unsigned char *c,const Inputs r,const unsigned char *pk)
1120
{
1121
  Fq A[p];
1122
  Fq B[p];
1123
  int8 T[I];
1124
1125
  Rounded_decode(A,pk+Seeds_bytes);
1126
  XEncrypt(B,T,r,pk,A);
1127
  Rounded_encode(c,B); c += Rounded_bytes;
1128
  Top_encode(c,T);
1129
}
1130
1131
/* r = ZDecrypt(C,sk) */
1132
static void ZDecrypt(Inputs r,const unsigned char *c,const unsigned char *sk)
1133
{
1134
  small a[p];
1135
  Fq B[p];
1136
  int8 T[I];
1137
1138
  Small_decode(a,sk);
1139
  Rounded_decode(B,c);
1140
  Top_decode(T,c+Rounded_bytes);
1141
  XDecrypt(r,B,T,a);
1142
}
1143
1144
#endif
1145
1146
/* ----- confirmation hash */
1147
1148
26.0k
#define Confirm_bytes 32
1149
1150
/* h = HashConfirm(r,pk,cache); cache is Hash4(pk) */
1151
static void HashConfirm(unsigned char *h,const unsigned char *r,const unsigned char *pk,const unsigned char *cache)
1152
25
{
1153
25
#ifndef LPR
1154
25
  unsigned char x[Hash_bytes*2];
1155
25
  int i;
1156
1157
25
  Hash_prefix(x,3,r,Inputs_bytes);
1158
825
  for (i = 0;i < Hash_bytes;++i) x[Hash_bytes+i] = cache[i];
1159
#else
1160
  unsigned char x[Inputs_bytes+Hash_bytes];
1161
  int i;
1162
1163
  for (i = 0;i < Inputs_bytes;++i) x[i] = r[i];
1164
  for (i = 0;i < Hash_bytes;++i) x[Inputs_bytes+i] = cache[i];
1165
#endif
1166
25
  Hash_prefix(h,2,x,sizeof x);
1167
25
}
1168
1169
/* ----- session-key hash */
1170
1171
/* k = HashSession(b,y,z) */
1172
static void HashSession(unsigned char *k,int b,const unsigned char *y,const unsigned char *z)
1173
25
{
1174
25
#ifndef LPR
1175
25
  unsigned char x[Hash_bytes+Ciphertexts_bytes+Confirm_bytes];
1176
25
  int i;
1177
1178
25
  Hash_prefix(x,3,y,Inputs_bytes);
1179
26.0k
  for (i = 0;i < Ciphertexts_bytes+Confirm_bytes;++i) x[Hash_bytes+i] = z[i];
1180
#else
1181
  unsigned char x[Inputs_bytes+Ciphertexts_bytes+Confirm_bytes];
1182
  int i;
1183
1184
  for (i = 0;i < Inputs_bytes;++i) x[i] = y[i];
1185
  for (i = 0;i < Ciphertexts_bytes+Confirm_bytes;++i) x[Inputs_bytes+i] = z[i];
1186
#endif
1187
25
  Hash_prefix(k,b,x,sizeof x);
1188
25
}
1189
1190
/* ----- Streamlined NTRU Prime and NTRU LPRime */
1191
1192
/* pk,sk = KEM_KeyGen() */
1193
static void KEM_KeyGen(unsigned char *pk,unsigned char *sk)
1194
763
{
1195
763
  int i;
1196
1197
763
  ZKeyGen(pk,sk); sk += SecretKeys_bytes;
1198
884k
  for (i = 0;i < PublicKeys_bytes;++i) *sk++ = pk[i];
1199
763
  randombytes(sk,Inputs_bytes); sk += Inputs_bytes;
1200
763
  Hash_prefix(sk,4,pk,PublicKeys_bytes);
1201
763
}
1202
1203
/* c,r_enc = Hide(r,pk,cache); cache is Hash4(pk) */
1204
static void Hide(unsigned char *c,unsigned char *r_enc,const Inputs r,const unsigned char *pk,const unsigned char *cache)
1205
25
{
1206
25
  Inputs_encode(r_enc,r);
1207
25
  ZEncrypt(c,r,pk); c += Ciphertexts_bytes;
1208
25
  HashConfirm(c,r_enc,pk,cache);
1209
25
}
1210
1211
/* c,k = Encap(pk) */
1212
static void Encap(unsigned char *c,unsigned char *k,const unsigned char *pk)
1213
19
{
1214
19
  Inputs r;
1215
19
  unsigned char r_enc[Inputs_bytes];
1216
19
  unsigned char cache[Hash_bytes];
1217
1218
19
  Hash_prefix(cache,4,pk,PublicKeys_bytes);
1219
19
  Inputs_random(r);
1220
19
  Hide(c,r_enc,r,pk,cache);
1221
19
  HashSession(k,1,r_enc,c);
1222
19
}
1223
1224
/* 0 if matching ciphertext+confirm, else -1 */
1225
static int Ciphertexts_diff_mask(const unsigned char *c,const unsigned char *c2)
1226
6
{
1227
6
  uint16 differentbits = 0;
1228
6
  int len = Ciphertexts_bytes+Confirm_bytes;
1229
1230
6.24k
  while (len-- > 0) differentbits |= (*c++)^(*c2++);
1231
6
  return (1&((differentbits-1)>>8))-1;
1232
6
}
1233
1234
/* k = Decap(c,sk) */
1235
static void Decap(unsigned char *k,const unsigned char *c,const unsigned char *sk)
1236
6
{
1237
6
  const unsigned char *pk = sk + SecretKeys_bytes;
1238
6
  const unsigned char *rho = pk + PublicKeys_bytes;
1239
6
  const unsigned char *cache = rho + Inputs_bytes;
1240
6
  Inputs r;
1241
6
  unsigned char r_enc[Inputs_bytes];
1242
6
  unsigned char cnew[Ciphertexts_bytes+Confirm_bytes];
1243
6
  int mask;
1244
6
  int i;
1245
1246
6
  ZDecrypt(r,c,sk);
1247
6
  Hide(cnew,r_enc,r,pk,cache);
1248
6
  mask = Ciphertexts_diff_mask(c,cnew);
1249
1.15k
  for (i = 0;i < Inputs_bytes;++i) r_enc[i] ^= mask&(r_enc[i]^rho[i]);
1250
6
  HashSession(k,1+mask,r_enc,c);
1251
6
}
1252
1253
/* ----- crypto_kem API */
1254
1255
1256
int crypto_kem_sntrup761_keypair(unsigned char *pk,unsigned char *sk)
1257
763
{
1258
763
  KEM_KeyGen(pk,sk);
1259
763
  return 0;
1260
763
}
1261
1262
int crypto_kem_sntrup761_enc(unsigned char *c,unsigned char *k,const unsigned char *pk)
1263
19
{
1264
19
  Encap(c,k,pk);
1265
19
  return 0;
1266
19
}
1267
1268
int crypto_kem_sntrup761_dec(unsigned char *k,const unsigned char *c,const unsigned char *sk)
1269
6
{
1270
6
  Decap(k,c,sk);
1271
6
  return 0;
1272
6
}
1273
#endif /* USE_SNTRUP761X25519 */