/src/libgcrypt/mpi/mpi-inv.c

Source (jump to first uncovered line)
/* mpi-inv.c  -  MPI functions
 *  Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
 *
 * This file is part of Libgcrypt.
 *
 * Libgcrypt is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * Libgcrypt is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include "mpi-internal.h"
#include "g10lib.h"

/*
 * This uses a modular inversion algorithm designed by Niels Möller
 * which was implemented in Nettle.  The same algorithm was later also
 * adapted to GMP in mpn_sec_invert.
 *
 * For the description of the algorithm, see Algorithm 5 in Appendix A
 * of "Fast Software Polynomial Multiplication on ARM Processors using
 * the NEON Engine" by Danilo Câmara, Conrado P. L. Gouvêa, Julio
 * López, and Ricardo Dahab:
 *   https://hal.inria.fr/hal-01506572/document
 *
 * Note that in the reference above, at the line 2 of Algorithm 5,
 * initial value of V was described as V:=1 wrongly.  It must be V:=0.
 */
static mpi_ptr_t
mpih_invm_odd (mpi_ptr_t ap, mpi_ptr_t np, mpi_size_t nsize)
{
  int secure;
  unsigned int iterations;
  mpi_ptr_t n1hp;
  mpi_ptr_t bp;
  mpi_ptr_t up, vp;

  secure = _gcry_is_secure (ap);
  up = mpi_alloc_limb_space (nsize, secure);
  MPN_ZERO (up, nsize);
  up[0] = 1;

  vp = mpi_alloc_limb_space (nsize, secure);
  MPN_ZERO (vp, nsize);

  secure = _gcry_is_secure (np);
  bp = mpi_alloc_limb_space (nsize, secure);
  MPN_COPY (bp, np, nsize);

  n1hp = mpi_alloc_limb_space (nsize, secure);
  MPN_COPY (n1hp, np, nsize);
  _gcry_mpih_rshift (n1hp, n1hp, nsize, 1);
  _gcry_mpih_add_1 (n1hp, n1hp, nsize, 1);

  iterations = 2 * nsize * BITS_PER_MPI_LIMB;

  while (iterations-- > 0)
    {
      mpi_limb_t odd_a, odd_u, underflow, borrow;

      odd_a = ap[0] & 1;

      underflow = mpih_sub_n_cond (ap, ap, bp, nsize, odd_a);
      mpih_add_n_cond (bp, bp, ap, nsize, underflow);
      mpih_abs_cond (ap, ap, nsize, underflow);
      mpih_swap_cond (up, vp, nsize, underflow);

      _gcry_mpih_rshift (ap, ap, nsize, 1);

      borrow = mpih_sub_n_cond (up, up, vp, nsize, odd_a);
      mpih_add_n_cond (up, up, np, nsize, borrow);

      odd_u = _gcry_mpih_rshift (up, up, nsize, 1) != 0;
      mpih_add_n_cond (up, up, n1hp, nsize, odd_u);
    }

  _gcry_mpi_free_limb_space (n1hp, nsize);
  _gcry_mpi_free_limb_space (up, nsize);

  if (_gcry_mpih_cmp_ui (bp, nsize, 1) == 0)
    {
      /* Inverse exists.  */
      _gcry_mpi_free_limb_space (bp, nsize);
      return vp;
    }
  else
    {
      _gcry_mpi_free_limb_space (bp, nsize);
      _gcry_mpi_free_limb_space (vp, nsize);
      return NULL;
    }
}


/*
 * Calculate the multiplicative inverse X of A mod 2^K
 * A must be positive.
 *
 * See section 7 in "A New Algorithm for Inversion mod p^k" by Çetin
 * Kaya Koç: https://eprint.iacr.org/2017/411.pdf
 */
static mpi_ptr_t
mpih_invm_pow2 (mpi_ptr_t ap, mpi_size_t asize, unsigned int k)
{
  int secure = _gcry_is_secure (ap);
  mpi_size_t i;
  unsigned int iterations;
  mpi_ptr_t xp, wp, up, vp;
  mpi_size_t usize;

  if (!(ap[0] & 1))
    return NULL;

  iterations = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB)
    * BITS_PER_MPI_LIMB;
  usize = iterations / BITS_PER_MPI_LIMB;

  up = mpi_alloc_limb_space (usize, secure);
  MPN_ZERO (up, usize);
  up[0] = 1;

  vp = mpi_alloc_limb_space (usize, secure);
  for (i = 0; i < (usize < asize ? usize : asize); i++)
    vp[i] = ap[i];
  for (; i < usize; i++)
    vp[i] = 0;
  if ((k % BITS_PER_MPI_LIMB))
    for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
      vp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);

  wp = mpi_alloc_limb_space (usize, secure);
  MPN_COPY (wp, up, usize);

  xp = mpi_alloc_limb_space (usize, secure);
  MPN_ZERO (xp, usize);

  /*
   * It can be considered that overflow at _gcry_mpih_sub_n results
   * adding 2^(USIZE*BITS_PER_MPI_LIMB), which is no problem in modulo
   * 2^K computation.
   */
  for (i = 0; i < iterations; i++)
    {
      int b0 = (up[0] & 1);

      xp[i/BITS_PER_MPI_LIMB] |= ((mpi_limb_t)b0<<(i%BITS_PER_MPI_LIMB));
      _gcry_mpih_sub_n (wp, up, vp, usize);
      mpih_set_cond (up, wp, usize, b0);
      _gcry_mpih_rshift (up, up, usize, 1);
    }

  if ((k % BITS_PER_MPI_LIMB))
    for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
      xp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);

  _gcry_mpi_free_limb_space (up, usize);
  _gcry_mpi_free_limb_space (vp, usize);
  _gcry_mpi_free_limb_space (wp, usize);

  return xp;
}


/****************
 * Calculate the multiplicative inverse X of A mod N
 * That is: Find the solution x for
 *    1 = (a*x) mod n
 */
static int
mpi_invm_generic (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
{
    int is_gcd_one;
#if 0
    /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) */
    gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, q, t1, t2, t3;

    u = mpi_copy(a);
    v = mpi_copy(n);
    u1 = mpi_alloc_set_ui(1);
    u2 = mpi_alloc_set_ui(0);
    u3 = mpi_copy(u);
    v1 = mpi_alloc_set_ui(0);
    v2 = mpi_alloc_set_ui(1);
    v3 = mpi_copy(v);
    q  = mpi_alloc( mpi_get_nlimbs(u)+1 );
    t1 = mpi_alloc( mpi_get_nlimbs(u)+1 );
    t2 = mpi_alloc( mpi_get_nlimbs(u)+1 );
    t3 = mpi_alloc( mpi_get_nlimbs(u)+1 );
    while( mpi_cmp_ui( v3, 0 ) ) {
  mpi_fdiv_q( q, u3, v3 );
  mpi_mul(t1, v1, q); mpi_mul(t2, v2, q); mpi_mul(t3, v3, q);
  mpi_sub(t1, u1, t1); mpi_sub(t2, u2, t2); mpi_sub(t3, u3, t3);
  mpi_set(u1, v1); mpi_set(u2, v2); mpi_set(u3, v3);
  mpi_set(v1, t1); mpi_set(v2, t2); mpi_set(v3, t3);
    }
    /*  log_debug("result:\n");
  log_mpidump("q =", q );
  log_mpidump("u1=", u1);
  log_mpidump("u2=", u2);
  log_mpidump("u3=", u3);
  log_mpidump("v1=", v1);
  log_mpidump("v2=", v2); */
    mpi_set(x, u1);

    is_gcd_one = (mpi_cmp_ui (u3, 1) == 0);

    mpi_free(u1);
    mpi_free(u2);
    mpi_free(u3);
    mpi_free(v1);
    mpi_free(v2);
    mpi_free(v3);
    mpi_free(q);
    mpi_free(t1);
    mpi_free(t2);
    mpi_free(t3);
    mpi_free(u);
    mpi_free(v);
#elif 0
    /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
     * modified according to Michael Penk's solution for Exercise 35
     * (in the first edition)
     * In the third edition, it's Exercise 39, and it is described in
     * page 646 of ANSWERS TO EXERCISES chapter.
     */

    /* FIXME: we can simplify this in most cases (see Knuth) */
    gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, t1, t2, t3;
    unsigned k;
    int sign;

    u = mpi_copy(a);
    v = mpi_copy(n);
    for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
  mpi_rshift(u, u, 1);
  mpi_rshift(v, v, 1);
    }


    u1 = mpi_alloc_set_ui(1);
    u2 = mpi_alloc_set_ui(0);
    u3 = mpi_copy(u);
    v1 = mpi_copy(v);          /* !-- used as const 1 */
    v2 = mpi_alloc( mpi_get_nlimbs(u) ); mpi_sub( v2, u1, u );
    v3 = mpi_copy(v);
    if( mpi_test_bit(u, 0) ) { /* u is odd */
  t1 = mpi_alloc_set_ui(0);
  t2 = mpi_alloc_set_ui(1); t2->sign = 1;
  t3 = mpi_copy(v); t3->sign = !t3->sign;
  goto Y4;
    }
    else {
  t1 = mpi_alloc_set_ui(1);
  t2 = mpi_alloc_set_ui(0);
  t3 = mpi_copy(u);
    }
    do {
  do {
      if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
    mpi_add(t1, t1, v);
    mpi_sub(t2, t2, u);
      }
      mpi_rshift(t1, t1, 1);
      mpi_rshift(t2, t2, 1);
      mpi_rshift(t3, t3, 1);
    Y4:
      ;
  } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */

  if( !t3->sign ) {
      mpi_set(u1, t1);
      mpi_set(u2, t2);
      mpi_set(u3, t3);
  }
  else {
      mpi_sub(v1, v, t1);
      sign = u->sign; u->sign = !u->sign;
      mpi_sub(v2, u, t2);
      u->sign = sign;
      sign = t3->sign; t3->sign = !t3->sign;
      mpi_set(v3, t3);
      t3->sign = sign;
  }
  mpi_sub(t1, u1, v1);
  mpi_sub(t2, u2, v2);
  mpi_sub(t3, u3, v3);
  if( t1->sign ) {
      mpi_add(t1, t1, v);
      mpi_sub(t2, t2, u);
  }
    } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
    /* mpi_lshift( u3, u3, k ); */
    is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
    mpi_set(x, u1);

    mpi_free(u1);
    mpi_free(u2);
    mpi_free(u3);
    mpi_free(v1);
    mpi_free(v2);
    mpi_free(v3);
    mpi_free(t1);
    mpi_free(t2);
    mpi_free(t3);
#else
    /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
     * modified according to Michael Penk's solution for Exercise 35
     * with further enhancement */
    /* The reference in the comment above is for the first edition.
     * In the third edition, it's Exercise 39, and it is described in
     * page 646 of ANSWERS TO EXERCISES chapter.
     */
    gcry_mpi_t u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3;
    unsigned k;
    int sign;
    int odd ;

    u = mpi_copy(a);
    v = mpi_copy(n);

    for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
  mpi_rshift(u, u, 1);
  mpi_rshift(v, v, 1);
    }
    odd = mpi_test_bit(v,0);

    u1 = mpi_alloc_set_ui(1);
    if( !odd )
  u2 = mpi_alloc_set_ui(0);
    u3 = mpi_copy(u);
    v1 = mpi_copy(v);
    if( !odd ) {
  v2 = mpi_alloc( mpi_get_nlimbs(u) );
  mpi_sub( v2, u1, u ); /* U is used as const 1 */
    }
    v3 = mpi_copy(v);
    if( mpi_test_bit(u, 0) ) { /* u is odd */
  t1 = mpi_alloc_set_ui(0);
  if( !odd ) {
      t2 = mpi_alloc_set_ui(1); t2->sign = 1;
  }
  t3 = mpi_copy(v); t3->sign = !t3->sign;
  goto Y4;
    }
    else {
  t1 = mpi_alloc_set_ui(1);
  if( !odd )
      t2 = mpi_alloc_set_ui(0);
  t3 = mpi_copy(u);
    }
    do {
  do {
      if( !odd ) {
    if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
        mpi_add(t1, t1, v);
        mpi_sub(t2, t2, u);
    }
    mpi_rshift(t1, t1, 1);
    mpi_rshift(t2, t2, 1);
    mpi_rshift(t3, t3, 1);
      }
      else {
    if( mpi_test_bit(t1, 0) )
        mpi_add(t1, t1, v);
    mpi_rshift(t1, t1, 1);
    mpi_rshift(t3, t3, 1);
      }
    Y4:
      ;
  } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */

  if( !t3->sign ) {
      mpi_set(u1, t1);
      if( !odd )
    mpi_set(u2, t2);
      mpi_set(u3, t3);
  }
  else {
      mpi_sub(v1, v, t1);
      sign = u->sign; u->sign = !u->sign;
      if( !odd )
    mpi_sub(v2, u, t2);
      u->sign = sign;
      sign = t3->sign; t3->sign = !t3->sign;
      mpi_set(v3, t3);
      t3->sign = sign;
  }
  mpi_sub(t1, u1, v1);
  if( !odd )
      mpi_sub(t2, u2, v2);
  mpi_sub(t3, u3, v3);
  if( t1->sign ) {
      mpi_add(t1, t1, v);
      if( !odd )
    mpi_sub(t2, t2, u);
  }
    } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
    /* mpi_lshift( u3, u3, k ); */
    is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
    mpi_set(x, u1);

    mpi_free(u1);
    mpi_free(v1);
    mpi_free(t1);
    if( !odd ) {
  mpi_free(u2);
  mpi_free(v2);
  mpi_free(t2);
    }
    mpi_free(u3);
    mpi_free(v3);
    mpi_free(t3);

    mpi_free(u);
    mpi_free(v);
#endif
    return is_gcd_one;
}


/*
 * Set X to the multiplicative inverse of A mod M.  Return true if the
 * inverse exists.
 */
int
_gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
{
  mpi_ptr_t ap, xp;

  if (!mpi_cmp_ui (a, 0))
    return 0; /* Inverse does not exists.  */
  if (!mpi_cmp_ui (n, 1))
    return 0; /* Inverse does not exists.  */

  if (mpi_test_bit (n, 0))
    {
      if (a->nlimbs <= n->nlimbs)
        {
          ap = mpi_alloc_limb_space (n->nlimbs, _gcry_is_secure (a->d));
          MPN_ZERO (ap, n->nlimbs);
          MPN_COPY (ap, a->d, a->nlimbs);
        }
      else
        ap = _gcry_mpih_mod (a->d, a->nlimbs, n->d, n->nlimbs);

      xp = mpih_invm_odd (ap, n->d, n->nlimbs);
      _gcry_mpi_free_limb_space (ap, n->nlimbs);

      if (xp)
        {
          _gcry_mpi_assign_limb_space (x, xp, n->nlimbs);
          x->nlimbs = n->nlimbs;
          return 1;
        }
      else
        return 0; /* Inverse does not exists.  */
    }
  else if (!a->sign && !n->sign)
    {
      unsigned int k = mpi_trailing_zeros (n);
      mpi_size_t x1size = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB);
      mpi_size_t hsize;
      gcry_mpi_t q;
      mpi_ptr_t x1p, x2p, q_invp, hp, diffp;
      mpi_size_t i;

      if (k == _gcry_mpi_get_nbits (n) - 1)
        {
          x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);

          if (x1p)
            {
              _gcry_mpi_assign_limb_space (x, x1p, x1size);
              x->nlimbs = x1size;
              return 1;
            }
          else
            return 0; /* Inverse does not exists.  */
        }

      /* N can be expressed as P * Q, where P = 2^K.  P and Q are coprime.  */
      /*
       * Compute X1 = invm (A, P) and X2 = invm (A, Q), and combine
       * them by Garner's formula, to get X = invm (A, P*Q).
       * A special case of Chinese Remainder Theorem.
       */

      /* X1 = invm (A, P) */
      x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);
      if (!x1p)
        return 0;               /* Inverse does not exists.  */

      /* Q = N / P          */
      q = mpi_new (0);
      mpi_rshift (q, n, k);

      /* X2 = invm (A%Q, Q) */
      ap = _gcry_mpih_mod (a->d, a->nlimbs, q->d, q->nlimbs);
      x2p = mpih_invm_odd (ap, q->d, q->nlimbs);
      _gcry_mpi_free_limb_space (ap, q->nlimbs);
      if (!x2p)
        {
          _gcry_mpi_free_limb_space (x1p, x1size);
          mpi_free (q);
          return 0;             /* Inverse does not exists.  */
        }

      /* Q_inv = Q^(-1) = invm (Q, P) */
      q_invp = mpih_invm_pow2 (q->d, q->nlimbs, k);

      /* H = (X1 - X2) * Q_inv % P */
      diffp = mpi_alloc_limb_space (x1size, _gcry_is_secure (a->d));
      if (x1size >= q->nlimbs)
        _gcry_mpih_sub (diffp, x1p, x1size, x2p, q->nlimbs);
      else
  _gcry_mpih_sub_n (diffp, x1p, x2p, x1size);
      _gcry_mpi_free_limb_space (x1p, x1size);
      if ((k % BITS_PER_MPI_LIMB))
        for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
          diffp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);

      hsize = x1size * 2;
      hp = mpi_alloc_limb_space (hsize, _gcry_is_secure (a->d));
      _gcry_mpih_mul_n (hp, diffp, q_invp, x1size);
      _gcry_mpi_free_limb_space (diffp, x1size);
      _gcry_mpi_free_limb_space (q_invp, x1size);

      for (i = x1size; i < hsize; i++)
        hp[i] = 0;
      if ((k % BITS_PER_MPI_LIMB))
        for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
          hp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);

      xp = mpi_alloc_limb_space (x1size + q->nlimbs, _gcry_is_secure (a->d));
      if (x1size >= q->nlimbs)
        _gcry_mpih_mul (xp, hp, x1size, q->d, q->nlimbs);
      else
        _gcry_mpih_mul (xp, q->d, q->nlimbs, hp, x1size);

      _gcry_mpi_free_limb_space (hp, hsize);

      _gcry_mpih_add (xp, xp, x1size + q->nlimbs, x2p, q->nlimbs);
      _gcry_mpi_free_limb_space (x2p, q->nlimbs);

      _gcry_mpi_assign_limb_space (x, xp, x1size + q->nlimbs);
      x->nlimbs = x1size + q->nlimbs;

      mpi_free (q);

      return 1;
    }
  else
    return mpi_invm_generic (x, a, n);
}

Coverage Report

Created: 2022-12-08 06:10

Line	Count	Source (jump to first uncovered line)
1		/* mpi-inv.c - MPI functions
2		* Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
3		*
4		* This file is part of Libgcrypt.
5		*
6		* Libgcrypt is free software; you can redistribute it and/or modify
7		* it under the terms of the GNU Lesser General Public License as
8		* published by the Free Software Foundation; either version 2.1 of
9		* the License, or (at your option) any later version.
10		*
11		* Libgcrypt is distributed in the hope that it will be useful,
12		* but WITHOUT ANY WARRANTY; without even the implied warranty of
13		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		* GNU Lesser General Public License for more details.
15		*
16		* You should have received a copy of the GNU Lesser General Public
17		* License along with this program; if not, see <http://www.gnu.org/licenses/>.
18		*/
19
20		#include <config.h>
21		#include <stdio.h>
22		#include <stdlib.h>
23		#include "mpi-internal.h"
24		#include "g10lib.h"
25
26		/*
27		* This uses a modular inversion algorithm designed by Niels Möller
28		* which was implemented in Nettle. The same algorithm was later also
29		* adapted to GMP in mpn_sec_invert.
30		*
31		* For the description of the algorithm, see Algorithm 5 in Appendix A
32		* of "Fast Software Polynomial Multiplication on ARM Processors using
33		* the NEON Engine" by Danilo Câmara, Conrado P. L. Gouvêa, Julio
34		* López, and Ricardo Dahab:
35		* https://hal.inria.fr/hal-01506572/document
36		*
37		* Note that in the reference above, at the line 2 of Algorithm 5,
38		* initial value of V was described as V:=1 wrongly. It must be V:=0.
39		*/
40		static mpi_ptr_t
41		mpih_invm_odd (mpi_ptr_t ap, mpi_ptr_t np, mpi_size_t nsize)
42	48	{
43	48	int secure;
44	48	unsigned int iterations;
45	48	mpi_ptr_t n1hp;
46	48	mpi_ptr_t bp;
47	48	mpi_ptr_t up, vp;
48
49	48	secure = _gcry_is_secure (ap);
50	48	up = mpi_alloc_limb_space (nsize, secure);
51	48	MPN_ZERO (up, nsize);
52	48	up[0] = 1;
53
54	48	vp = mpi_alloc_limb_space (nsize, secure);
55	48	MPN_ZERO (vp, nsize);
56
57	48	secure = _gcry_is_secure (np);
58	48	bp = mpi_alloc_limb_space (nsize, secure);
59	48	MPN_COPY (bp, np, nsize);
60
61	48	n1hp = mpi_alloc_limb_space (nsize, secure);
62	48	MPN_COPY (n1hp, np, nsize);
63	48	_gcry_mpih_rshift (n1hp, n1hp, nsize, 1);
64	48	_gcry_mpih_add_1 (n1hp, n1hp, nsize, 1);
65
66	48	iterations = 2 * nsize * BITS_PER_MPI_LIMB;
67
68	24.6k	while (iterations-- > 0)
69	24.5k	{
70	24.5k	mpi_limb_t odd_a, odd_u, underflow, borrow;
71
72	24.5k	odd_a = ap[0] & 1;
73
74	24.5k	underflow = mpih_sub_n_cond (ap, ap, bp, nsize, odd_a);
75	24.5k	mpih_add_n_cond (bp, bp, ap, nsize, underflow);
76	24.5k	mpih_abs_cond (ap, ap, nsize, underflow);
77	24.5k	mpih_swap_cond (up, vp, nsize, underflow);
78
79	24.5k	_gcry_mpih_rshift (ap, ap, nsize, 1);
80
81	24.5k	borrow = mpih_sub_n_cond (up, up, vp, nsize, odd_a);
82	24.5k	mpih_add_n_cond (up, up, np, nsize, borrow);
83
84	24.5k	odd_u = _gcry_mpih_rshift (up, up, nsize, 1) != 0;
85	24.5k	mpih_add_n_cond (up, up, n1hp, nsize, odd_u);
86	24.5k	}
87
88	48	_gcry_mpi_free_limb_space (n1hp, nsize);
89	48	_gcry_mpi_free_limb_space (up, nsize);
90
91	48	if (_gcry_mpih_cmp_ui (bp, nsize, 1) == 0)
92	48	{
93		/* Inverse exists. */
94	48	_gcry_mpi_free_limb_space (bp, nsize);
95	48	return vp;
96	48	}
97	0	else
98	0	{
99	0	_gcry_mpi_free_limb_space (bp, nsize);
100	0	_gcry_mpi_free_limb_space (vp, nsize);
101	0	return NULL;
102	0	}
103	48	}
104
105
106		/*
107		* Calculate the multiplicative inverse X of A mod 2^K
108		* A must be positive.
109		*
110		* See section 7 in "A New Algorithm for Inversion mod p^k" by Çetin
111		* Kaya Koç: https://eprint.iacr.org/2017/411.pdf
112		*/
113		static mpi_ptr_t
114		mpih_invm_pow2 (mpi_ptr_t ap, mpi_size_t asize, unsigned int k)
115	0	{
116	0	int secure = _gcry_is_secure (ap);
117	0	mpi_size_t i;
118	0	unsigned int iterations;
119	0	mpi_ptr_t xp, wp, up, vp;
120	0	mpi_size_t usize;
121
122	0	if (!(ap[0] & 1))
123	0	return NULL;
124
125	0	iterations = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB)
126	0	* BITS_PER_MPI_LIMB;
127	0	usize = iterations / BITS_PER_MPI_LIMB;
128
129	0	up = mpi_alloc_limb_space (usize, secure);
130	0	MPN_ZERO (up, usize);
131	0	up[0] = 1;
132
133	0	vp = mpi_alloc_limb_space (usize, secure);
134	0	for (i = 0; i < (usize < asize ? usize : asize); i++)
135	0	vp[i] = ap[i];
136	0	for (; i < usize; i++)
137	0	vp[i] = 0;
138	0	if ((k % BITS_PER_MPI_LIMB))
139	0	for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
140	0	vp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
141
142	0	wp = mpi_alloc_limb_space (usize, secure);
143	0	MPN_COPY (wp, up, usize);
144
145	0	xp = mpi_alloc_limb_space (usize, secure);
146	0	MPN_ZERO (xp, usize);
147
148		/*
149		* It can be considered that overflow at _gcry_mpih_sub_n results
150		* adding 2^(USIZE*BITS_PER_MPI_LIMB), which is no problem in modulo
151		* 2^K computation.
152		*/
153	0	for (i = 0; i < iterations; i++)
154	0	{
155	0	int b0 = (up[0] & 1);
156
157	0	xp[i/BITS_PER_MPI_LIMB] \|= ((mpi_limb_t)b0<<(i%BITS_PER_MPI_LIMB));
158	0	_gcry_mpih_sub_n (wp, up, vp, usize);
159	0	mpih_set_cond (up, wp, usize, b0);
160	0	_gcry_mpih_rshift (up, up, usize, 1);
161	0	}
162
163	0	if ((k % BITS_PER_MPI_LIMB))
164	0	for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
165	0	xp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
166
167	0	_gcry_mpi_free_limb_space (up, usize);
168	0	_gcry_mpi_free_limb_space (vp, usize);
169	0	_gcry_mpi_free_limb_space (wp, usize);
170
171	0	return xp;
172	0	}
173
174
175		/****************
176		* Calculate the multiplicative inverse X of A mod N
177		* That is: Find the solution x for
178		* 1 = (a*x) mod n
179		*/
180		static int
181		mpi_invm_generic (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
182	0	{
183	0	int is_gcd_one;
184		#if 0
185		/* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) */
186		gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, q, t1, t2, t3;
187
188		u = mpi_copy(a);
189		v = mpi_copy(n);
190		u1 = mpi_alloc_set_ui(1);
191		u2 = mpi_alloc_set_ui(0);
192		u3 = mpi_copy(u);
193		v1 = mpi_alloc_set_ui(0);
194		v2 = mpi_alloc_set_ui(1);
195		v3 = mpi_copy(v);
196		q = mpi_alloc( mpi_get_nlimbs(u)+1 );
197		t1 = mpi_alloc( mpi_get_nlimbs(u)+1 );
198		t2 = mpi_alloc( mpi_get_nlimbs(u)+1 );
199		t3 = mpi_alloc( mpi_get_nlimbs(u)+1 );
200		while( mpi_cmp_ui( v3, 0 ) ) {
201		mpi_fdiv_q( q, u3, v3 );
202		mpi_mul(t1, v1, q); mpi_mul(t2, v2, q); mpi_mul(t3, v3, q);
203		mpi_sub(t1, u1, t1); mpi_sub(t2, u2, t2); mpi_sub(t3, u3, t3);
204		mpi_set(u1, v1); mpi_set(u2, v2); mpi_set(u3, v3);
205		mpi_set(v1, t1); mpi_set(v2, t2); mpi_set(v3, t3);
206		}
207		/* log_debug("result:\n");
208		log_mpidump("q =", q );
209		log_mpidump("u1=", u1);
210		log_mpidump("u2=", u2);
211		log_mpidump("u3=", u3);
212		log_mpidump("v1=", v1);
213		log_mpidump("v2=", v2); */
214		mpi_set(x, u1);
215
216		is_gcd_one = (mpi_cmp_ui (u3, 1) == 0);
217
218		mpi_free(u1);
219		mpi_free(u2);
220		mpi_free(u3);
221		mpi_free(v1);
222		mpi_free(v2);
223		mpi_free(v3);
224		mpi_free(q);
225		mpi_free(t1);
226		mpi_free(t2);
227		mpi_free(t3);
228		mpi_free(u);
229		mpi_free(v);
230		#elif 0
231		/* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
232		* modified according to Michael Penk's solution for Exercise 35
233		* (in the first edition)
234		* In the third edition, it's Exercise 39, and it is described in
235		* page 646 of ANSWERS TO EXERCISES chapter.
236		*/
237
238		/* FIXME: we can simplify this in most cases (see Knuth) */
239		gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, t1, t2, t3;
240		unsigned k;
241		int sign;
242
243		u = mpi_copy(a);
244		v = mpi_copy(n);
245		for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
246		mpi_rshift(u, u, 1);
247		mpi_rshift(v, v, 1);
248		}
249
250
251		u1 = mpi_alloc_set_ui(1);
252		u2 = mpi_alloc_set_ui(0);
253		u3 = mpi_copy(u);
254		v1 = mpi_copy(v); /* !-- used as const 1 */
255		v2 = mpi_alloc( mpi_get_nlimbs(u) ); mpi_sub( v2, u1, u );
256		v3 = mpi_copy(v);
257		if( mpi_test_bit(u, 0) ) { /* u is odd */
258		t1 = mpi_alloc_set_ui(0);
259		t2 = mpi_alloc_set_ui(1); t2->sign = 1;
260		t3 = mpi_copy(v); t3->sign = !t3->sign;
261		goto Y4;
262		}
263		else {
264		t1 = mpi_alloc_set_ui(1);
265		t2 = mpi_alloc_set_ui(0);
266		t3 = mpi_copy(u);
267		}
268		do {
269		do {
270		if( mpi_test_bit(t1, 0) \|\| mpi_test_bit(t2, 0) ) { /* one is odd */
271		mpi_add(t1, t1, v);
272		mpi_sub(t2, t2, u);
273		}
274		mpi_rshift(t1, t1, 1);
275		mpi_rshift(t2, t2, 1);
276		mpi_rshift(t3, t3, 1);
277		Y4:
278		;
279		} while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
280
281		if( !t3->sign ) {
282		mpi_set(u1, t1);
283		mpi_set(u2, t2);
284		mpi_set(u3, t3);
285		}
286		else {
287		mpi_sub(v1, v, t1);
288		sign = u->sign; u->sign = !u->sign;
289		mpi_sub(v2, u, t2);
290		u->sign = sign;
291		sign = t3->sign; t3->sign = !t3->sign;
292		mpi_set(v3, t3);
293		t3->sign = sign;
294		}
295		mpi_sub(t1, u1, v1);
296		mpi_sub(t2, u2, v2);
297		mpi_sub(t3, u3, v3);
298		if( t1->sign ) {
299		mpi_add(t1, t1, v);
300		mpi_sub(t2, t2, u);
301		}
302		} while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
303		/* mpi_lshift( u3, u3, k ); */
304		is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
305		mpi_set(x, u1);
306
307		mpi_free(u1);
308		mpi_free(u2);
309		mpi_free(u3);
310		mpi_free(v1);
311		mpi_free(v2);
312		mpi_free(v3);
313		mpi_free(t1);
314		mpi_free(t2);
315		mpi_free(t3);
316		#else
317		/* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
318		* modified according to Michael Penk's solution for Exercise 35
319		* with further enhancement */
320		/* The reference in the comment above is for the first edition.
321		* In the third edition, it's Exercise 39, and it is described in
322		* page 646 of ANSWERS TO EXERCISES chapter.
323		*/
324	0	gcry_mpi_t u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3;
325	0	unsigned k;
326	0	int sign;
327	0	int odd ;
328
329	0	u = mpi_copy(a);
330	0	v = mpi_copy(n);
331
332	0	for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
333	0	mpi_rshift(u, u, 1);
334	0	mpi_rshift(v, v, 1);
335	0	}
336	0	odd = mpi_test_bit(v,0);
337
338	0	u1 = mpi_alloc_set_ui(1);
339	0	if( !odd )
340	0	u2 = mpi_alloc_set_ui(0);
341	0	u3 = mpi_copy(u);
342	0	v1 = mpi_copy(v);
343	0	if( !odd ) {
344	0	v2 = mpi_alloc( mpi_get_nlimbs(u) );
345	0	mpi_sub( v2, u1, u ); /* U is used as const 1 */
346	0	}
347	0	v3 = mpi_copy(v);
348	0	if( mpi_test_bit(u, 0) ) { /* u is odd */
349	0	t1 = mpi_alloc_set_ui(0);
350	0	if( !odd ) {
351	0	t2 = mpi_alloc_set_ui(1); t2->sign = 1;
352	0	}
353	0	t3 = mpi_copy(v); t3->sign = !t3->sign;
354	0	goto Y4;
355	0	}
356	0	else {
357	0	t1 = mpi_alloc_set_ui(1);
358	0	if( !odd )
359	0	t2 = mpi_alloc_set_ui(0);
360	0	t3 = mpi_copy(u);
361	0	}
362	0	do {
363	0	do {
364	0	if( !odd ) {
365	0	if( mpi_test_bit(t1, 0) \|\| mpi_test_bit(t2, 0) ) { /* one is odd */
366	0	mpi_add(t1, t1, v);
367	0	mpi_sub(t2, t2, u);
368	0	}
369	0	mpi_rshift(t1, t1, 1);
370	0	mpi_rshift(t2, t2, 1);
371	0	mpi_rshift(t3, t3, 1);
372	0	}
373	0	else {
374	0	if( mpi_test_bit(t1, 0) )
375	0	mpi_add(t1, t1, v);
376	0	mpi_rshift(t1, t1, 1);
377	0	mpi_rshift(t3, t3, 1);
378	0	}
379	0	Y4:
380	0	;
381	0	} while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
382
383	0	if( !t3->sign ) {
384	0	mpi_set(u1, t1);
385	0	if( !odd )
386	0	mpi_set(u2, t2);
387	0	mpi_set(u3, t3);
388	0	}
389	0	else {
390	0	mpi_sub(v1, v, t1);
391	0	sign = u->sign; u->sign = !u->sign;
392	0	if( !odd )
393	0	mpi_sub(v2, u, t2);
394	0	u->sign = sign;
395	0	sign = t3->sign; t3->sign = !t3->sign;
396	0	mpi_set(v3, t3);
397	0	t3->sign = sign;
398	0	}
399	0	mpi_sub(t1, u1, v1);
400	0	if( !odd )
401	0	mpi_sub(t2, u2, v2);
402	0	mpi_sub(t3, u3, v3);
403	0	if( t1->sign ) {
404	0	mpi_add(t1, t1, v);
405	0	if( !odd )
406	0	mpi_sub(t2, t2, u);
407	0	}
408	0	} while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
409		/* mpi_lshift( u3, u3, k ); */
410	0	is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
411	0	mpi_set(x, u1);
412
413	0	mpi_free(u1);
414	0	mpi_free(v1);
415	0	mpi_free(t1);
416	0	if( !odd ) {
417	0	mpi_free(u2);
418	0	mpi_free(v2);
419	0	mpi_free(t2);
420	0	}
421	0	mpi_free(u3);
422	0	mpi_free(v3);
423	0	mpi_free(t3);
424
425	0	mpi_free(u);
426	0	mpi_free(v);
427	0	#endif
428	0	return is_gcd_one;
429	0	}
430
431
432		/*
433		* Set X to the multiplicative inverse of A mod M. Return true if the
434		* inverse exists.
435		*/
436		int
437		_gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
438	48	{
439	48	mpi_ptr_t ap, xp;
440
441	48	if (!mpi_cmp_ui (a, 0))
442	0	return 0; /* Inverse does not exists. */
443	48	if (!mpi_cmp_ui (n, 1))
444	0	return 0; /* Inverse does not exists. */
445
446	48	if (mpi_test_bit (n, 0))
447	48	{
448	48	if (a->nlimbs <= n->nlimbs)
449	48	{
450	48	ap = mpi_alloc_limb_space (n->nlimbs, _gcry_is_secure (a->d));
451	48	MPN_ZERO (ap, n->nlimbs);
452	48	MPN_COPY (ap, a->d, a->nlimbs);
453	48	}
454	0	else
455	0	ap = _gcry_mpih_mod (a->d, a->nlimbs, n->d, n->nlimbs);
456
457	48	xp = mpih_invm_odd (ap, n->d, n->nlimbs);
458	48	_gcry_mpi_free_limb_space (ap, n->nlimbs);
459
460	48	if (xp)
461	48	{
462	48	_gcry_mpi_assign_limb_space (x, xp, n->nlimbs);
463	48	x->nlimbs = n->nlimbs;
464	48	return 1;
465	48	}
466	0	else
467	0	return 0; /* Inverse does not exists. */
468	48	}
469	0	else if (!a->sign && !n->sign)
470	0	{
471	0	unsigned int k = mpi_trailing_zeros (n);
472	0	mpi_size_t x1size = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB);
473	0	mpi_size_t hsize;
474	0	gcry_mpi_t q;
475	0	mpi_ptr_t x1p, x2p, q_invp, hp, diffp;
476	0	mpi_size_t i;
477
478	0	if (k == _gcry_mpi_get_nbits (n) - 1)
479	0	{
480	0	x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);
481
482	0	if (x1p)
483	0	{
484	0	_gcry_mpi_assign_limb_space (x, x1p, x1size);
485	0	x->nlimbs = x1size;
486	0	return 1;
487	0	}
488	0	else
489	0	return 0; /* Inverse does not exists. */
490	0	}
491
492		/* N can be expressed as P * Q, where P = 2^K. P and Q are coprime. */
493		/*
494		* Compute X1 = invm (A, P) and X2 = invm (A, Q), and combine
495		* them by Garner's formula, to get X = invm (A, P*Q).
496		* A special case of Chinese Remainder Theorem.
497		*/
498
499		/* X1 = invm (A, P) */
500	0	x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);
501	0	if (!x1p)
502	0	return 0; /* Inverse does not exists. */
503
504		/* Q = N / P */
505	0	q = mpi_new (0);
506	0	mpi_rshift (q, n, k);
507
508		/* X2 = invm (A%Q, Q) */
509	0	ap = _gcry_mpih_mod (a->d, a->nlimbs, q->d, q->nlimbs);
510	0	x2p = mpih_invm_odd (ap, q->d, q->nlimbs);
511	0	_gcry_mpi_free_limb_space (ap, q->nlimbs);
512	0	if (!x2p)
513	0	{
514	0	_gcry_mpi_free_limb_space (x1p, x1size);
515	0	mpi_free (q);
516	0	return 0; /* Inverse does not exists. */
517	0	}
518
519		/* Q_inv = Q^(-1) = invm (Q, P) */
520	0	q_invp = mpih_invm_pow2 (q->d, q->nlimbs, k);
521
522		/* H = (X1 - X2) * Q_inv % P */
523	0	diffp = mpi_alloc_limb_space (x1size, _gcry_is_secure (a->d));
524	0	if (x1size >= q->nlimbs)
525	0	_gcry_mpih_sub (diffp, x1p, x1size, x2p, q->nlimbs);
526	0	else
527	0	_gcry_mpih_sub_n (diffp, x1p, x2p, x1size);
528	0	_gcry_mpi_free_limb_space (x1p, x1size);
529	0	if ((k % BITS_PER_MPI_LIMB))
530	0	for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
531	0	diffp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
532
533	0	hsize = x1size * 2;
534	0	hp = mpi_alloc_limb_space (hsize, _gcry_is_secure (a->d));
535	0	_gcry_mpih_mul_n (hp, diffp, q_invp, x1size);
536	0	_gcry_mpi_free_limb_space (diffp, x1size);
537	0	_gcry_mpi_free_limb_space (q_invp, x1size);
538
539	0	for (i = x1size; i < hsize; i++)
540	0	hp[i] = 0;
541	0	if ((k % BITS_PER_MPI_LIMB))
542	0	for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
543	0	hp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
544
545	0	xp = mpi_alloc_limb_space (x1size + q->nlimbs, _gcry_is_secure (a->d));
546	0	if (x1size >= q->nlimbs)
547	0	_gcry_mpih_mul (xp, hp, x1size, q->d, q->nlimbs);
548	0	else
549	0	_gcry_mpih_mul (xp, q->d, q->nlimbs, hp, x1size);
550
551	0	_gcry_mpi_free_limb_space (hp, hsize);
552
553	0	_gcry_mpih_add (xp, xp, x1size + q->nlimbs, x2p, q->nlimbs);
554	0	_gcry_mpi_free_limb_space (x2p, q->nlimbs);
555
556	0	_gcry_mpi_assign_limb_space (x, xp, x1size + q->nlimbs);
557	0	x->nlimbs = x1size + q->nlimbs;
558
559	0	mpi_free (q);
560
561	0	return 1;
562	0	}
563	0	else
564	0	return mpi_invm_generic (x, a, n);
565	48	}