/src/libgcrypt/mpi/mpih-div.c

Source (jump to first uncovered line)
/* mpih-div.c  -  MPI helper functions
 * Copyright (C) 1994, 1996, 1998, 2000,
 *               2001, 2002 Free Software Foundation, Inc.
 *
 * This file is part of Libgcrypt.
 *
 * Libgcrypt is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * Libgcrypt is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 *
 * Note: This code is heavily based on the GNU MP Library.
 *   Actually it's the same code with only minor changes in the
 *   way the data is stored; this is to support the abstraction
 *   of an optional secure memory allocation which may be used
 *   to avoid revealing of sensitive data due to paging etc.
 */

#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include "mpi-internal.h"
#include "longlong.h"

#ifndef UMUL_TIME
#define UMUL_TIME 1
#endif
#ifndef UDIV_TIME
#define UDIV_TIME UMUL_TIME
#endif

/* FIXME: We should be using invert_limb (or invert_normalized_limb)
 * here (not udiv_qrnnd).
 */

mpi_limb_t
_gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
              mpi_limb_t divisor_limb)
{
    mpi_size_t i;
    mpi_limb_t n1, n0, r;
    mpi_limb_t dummy GCC_ATTR_UNUSED;

    /* Botch: Should this be handled at all?  Rely on callers?  */
    if( !dividend_size )
  return 0;

    /* If multiplication is much faster than division, and the
     * dividend is large, pre-invert the divisor, and use
     * only multiplications in the inner loop.
     *
     * This test should be read:
     *   Does it ever help to use udiv_qrnnd_preinv?
     *     && Does what we save compensate for the inversion overhead?
     */
    if( UDIV_TIME > (2 * UMUL_TIME + 6)
  && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
  int normalization_steps;

  count_leading_zeros( normalization_steps, divisor_limb );
  if( normalization_steps ) {
      mpi_limb_t divisor_limb_inverted;

      divisor_limb <<= normalization_steps;

      /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
       * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
       * most significant bit (with weight 2**N) implicit.
       *
       * Special case for DIVISOR_LIMB == 100...000.
       */
      if( !(divisor_limb << 1) )
    divisor_limb_inverted = ~(mpi_limb_t)0;
      else
    udiv_qrnnd(divisor_limb_inverted, dummy,
         -divisor_limb, 0, divisor_limb);

      n1 = dividend_ptr[dividend_size - 1];
      r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);

      /* Possible optimization:
       * if (r == 0
       * && divisor_limb > ((n1 << normalization_steps)
       *           | (dividend_ptr[dividend_size - 2] >> ...)))
       * ...one division less...
       */
      for( i = dividend_size - 2; i >= 0; i--) {
    n0 = dividend_ptr[i];
    UDIV_QRNND_PREINV(dummy, r, r,
           ((n1 << normalization_steps)
        | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
        divisor_limb, divisor_limb_inverted);
    n1 = n0;
      }
      UDIV_QRNND_PREINV(dummy, r, r,
            n1 << normalization_steps,
            divisor_limb, divisor_limb_inverted);
      return r >> normalization_steps;
  }
  else {
      mpi_limb_t divisor_limb_inverted;

      /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
       * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
       * most significant bit (with weight 2**N) implicit.
       *
       * Special case for DIVISOR_LIMB == 100...000.
       */
      if( !(divisor_limb << 1) )
    divisor_limb_inverted = ~(mpi_limb_t)0;
      else
    udiv_qrnnd(divisor_limb_inverted, dummy,
          -divisor_limb, 0, divisor_limb);

      i = dividend_size - 1;
      r = dividend_ptr[i];

      if( r >= divisor_limb )
    r = 0;
      else
    i--;

      for( ; i >= 0; i--) {
    n0 = dividend_ptr[i];
    UDIV_QRNND_PREINV(dummy, r, r,
          n0, divisor_limb, divisor_limb_inverted);
      }
      return r;
  }
    }
    else {
  if( UDIV_NEEDS_NORMALIZATION ) {
      int normalization_steps;

      count_leading_zeros(normalization_steps, divisor_limb);
      if( normalization_steps ) {
    divisor_limb <<= normalization_steps;

    n1 = dividend_ptr[dividend_size - 1];
    r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);

    /* Possible optimization:
     * if (r == 0
     * && divisor_limb > ((n1 << normalization_steps)
     *       | (dividend_ptr[dividend_size - 2] >> ...)))
     * ...one division less...
     */
    for(i = dividend_size - 2; i >= 0; i--) {
        n0 = dividend_ptr[i];
        udiv_qrnnd (dummy, r, r,
        ((n1 << normalization_steps)
       | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
       divisor_limb);
        n1 = n0;
    }
    udiv_qrnnd (dummy, r, r,
          n1 << normalization_steps,
          divisor_limb);
    return r >> normalization_steps;
      }
  }
  /* No normalization needed, either because udiv_qrnnd doesn't require
   * it, or because DIVISOR_LIMB is already normalized.  */
  i = dividend_size - 1;
  r = dividend_ptr[i];

  if(r >= divisor_limb)
      r = 0;
  else
      i--;

  for(; i >= 0; i--) {
      n0 = dividend_ptr[i];
      udiv_qrnnd (dummy, r, r, n0, divisor_limb);
  }
  return r;
    }
}

/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
 * the NSIZE-DSIZE least significant quotient limbs at QP
 * and the DSIZE long remainder at NP.  If QEXTRA_LIMBS is
 * non-zero, generate that many fraction bits and append them after the
 * other quotient limbs.
 * Return the most significant limb of the quotient, this is always 0 or 1.
 *
 * Preconditions:
 * 0. NSIZE >= DSIZE.
 * 1. The most significant bit of the divisor must be set.
 * 2. QP must either not overlap with the input operands at all, or
 *    QP + DSIZE >= NP must hold true.  (This means that it's
 *    possible to put the quotient in the high part of NUM, right after the
 *    remainder in NUM.
 * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
 */

mpi_limb_t
_gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
                      mpi_ptr_t np, mpi_size_t nsize,
                      mpi_ptr_t dp, mpi_size_t dsize)
{
    mpi_limb_t most_significant_q_limb = 0;

    switch(dsize) {
      case 0:
  _gcry_divide_by_zero();
  break;

      case 1:
  {
      mpi_size_t i;
      mpi_limb_t n1;
      mpi_limb_t d;

      d = dp[0];
      n1 = np[nsize - 1];

      if( n1 >= d ) {
    n1 -= d;
    most_significant_q_limb = 1;
      }

      qp += qextra_limbs;
      for( i = nsize - 2; i >= 0; i--)
    udiv_qrnnd( qp[i], n1, n1, np[i], d );
      qp -= qextra_limbs;

      for( i = qextra_limbs - 1; i >= 0; i-- )
    udiv_qrnnd (qp[i], n1, n1, 0, d);

      np[0] = n1;
  }
  break;

      case 2:
  {
      mpi_size_t i;
      mpi_limb_t n1, n0, n2;
      mpi_limb_t d1, d0;

      np += nsize - 2;
      d1 = dp[1];
      d0 = dp[0];
      n1 = np[1];
      n0 = np[0];

      if( n1 >= d1 && (n1 > d1 || n0 >= d0) ) {
    sub_ddmmss (n1, n0, n1, n0, d1, d0);
    most_significant_q_limb = 1;
      }

      for( i = qextra_limbs + nsize - 2 - 1; i >= 0; i-- ) {
    mpi_limb_t q;
    mpi_limb_t r;

    if( i >= qextra_limbs )
        np--;
    else
        np[0] = 0;

    if( n1 == d1 ) {
        /* Q should be either 111..111 or 111..110.  Need special
         * treatment of this rare case as normal division would
         * give overflow.  */
        q = ~(mpi_limb_t)0;

        r = n0 + d1;
        if( r < d1 ) {   /* Carry in the addition? */
      add_ssaaaa( n1, n0, r - d0, np[0], 0, d0 );
      qp[i] = q;
      continue;
        }
        n1 = d0 - (d0 != 0?1:0);
        n0 = -d0;
    }
    else {
        udiv_qrnnd (q, r, n1, n0, d1);
        umul_ppmm (n1, n0, d0, q);
    }

    n2 = np[0];
        q_test:
    if( n1 > r || (n1 == r && n0 > n2) ) {
        /* The estimated Q was too large.  */
        q--;
        sub_ddmmss (n1, n0, n1, n0, 0, d0);
        r += d1;
        if( r >= d1 )    /* If not carry, test Q again.  */
      goto q_test;
    }

    qp[i] = q;
    sub_ddmmss (n1, n0, r, n2, n1, n0);
      }
      np[1] = n1;
      np[0] = n0;
  }
  break;

      default:
  {
      mpi_size_t i;
      mpi_limb_t dX, d1, n0;

      np += nsize - dsize;
      dX = dp[dsize - 1];
      d1 = dp[dsize - 2];
      n0 = np[dsize - 1];

      if( n0 >= dX ) {
    if(n0 > dX || _gcry_mpih_cmp(np, dp, dsize - 1) >= 0 ) {
        _gcry_mpih_sub_n(np, np, dp, dsize);
        n0 = np[dsize - 1];
        most_significant_q_limb = 1;
    }
      }

      for( i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
    mpi_limb_t q;
    mpi_limb_t n1, n2;
    mpi_limb_t cy_limb;

    if( i >= qextra_limbs ) {
        np--;
        n2 = np[dsize];
    }
    else {
        n2 = np[dsize - 1];
        MPN_COPY_DECR (np + 1, np, dsize - 1);
        np[0] = 0;
    }

    if( n0 == dX ) {
        /* This might over-estimate q, but it's probably not worth
         * the extra code here to find out.  */
        q = ~(mpi_limb_t)0;
    }
    else {
        mpi_limb_t r;

        udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
        umul_ppmm(n1, n0, d1, q);

        while( n1 > r || (n1 == r && n0 > np[dsize - 2])) {
      q--;
      r += dX;
      if( r < dX ) /* I.e. "carry in previous addition?" */
          break;
      n1 -= n0 < d1;
      n0 -= d1;
        }
    }

    /* Possible optimization: We already have (q * n0) and (1 * n1)
     * after the calculation of q.  Taking advantage of that, we
     * could make this loop make two iterations less.  */
    cy_limb = _gcry_mpih_submul_1(np, dp, dsize, q);

    if( n2 != cy_limb ) {
        _gcry_mpih_add_n(np, np, dp, dsize);
        q--;
    }

    qp[i] = q;
    n0 = np[dsize - 1];
      }
  }
    }

    return most_significant_q_limb;
}


/****************
 * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
 * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
 * Return the single-limb remainder.
 * There are no constraints on the value of the divisor.
 *
 * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
 */

mpi_limb_t
_gcry_mpih_divmod_1( mpi_ptr_t quot_ptr,
                        mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
                        mpi_limb_t divisor_limb)
{
    mpi_size_t i;
    mpi_limb_t n1, n0, r;
    mpi_limb_t dummy GCC_ATTR_UNUSED;

    if( !dividend_size )
  return 0;

    /* If multiplication is much faster than division, and the
     * dividend is large, pre-invert the divisor, and use
     * only multiplications in the inner loop.
     *
     * This test should be read:
     * Does it ever help to use udiv_qrnnd_preinv?
     * && Does what we save compensate for the inversion overhead?
     */
    if( UDIV_TIME > (2 * UMUL_TIME + 6)
  && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
  int normalization_steps;

  count_leading_zeros( normalization_steps, divisor_limb );
  if( normalization_steps ) {
      mpi_limb_t divisor_limb_inverted;

      divisor_limb <<= normalization_steps;

      /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
       * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
       * most significant bit (with weight 2**N) implicit.
       */
      /* Special case for DIVISOR_LIMB == 100...000.  */
      if( !(divisor_limb << 1) )
    divisor_limb_inverted = ~(mpi_limb_t)0;
      else
    udiv_qrnnd(divisor_limb_inverted, dummy,
         -divisor_limb, 0, divisor_limb);

      n1 = dividend_ptr[dividend_size - 1];
      r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);

      /* Possible optimization:
       * if (r == 0
       * && divisor_limb > ((n1 << normalization_steps)
       *           | (dividend_ptr[dividend_size - 2] >> ...)))
       * ...one division less...
       */
      for( i = dividend_size - 2; i >= 0; i--) {
    n0 = dividend_ptr[i];
    UDIV_QRNND_PREINV( quot_ptr[i + 1], r, r,
           ((n1 << normalization_steps)
       | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
            divisor_limb, divisor_limb_inverted);
    n1 = n0;
      }
      UDIV_QRNND_PREINV( quot_ptr[0], r, r,
             n1 << normalization_steps,
             divisor_limb, divisor_limb_inverted);
      return r >> normalization_steps;
  }
  else {
      mpi_limb_t divisor_limb_inverted;

      /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
       * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
       * most significant bit (with weight 2**N) implicit.
       */
      /* Special case for DIVISOR_LIMB == 100...000.  */
      if( !(divisor_limb << 1) )
    divisor_limb_inverted = ~(mpi_limb_t) 0;
      else
    udiv_qrnnd(divisor_limb_inverted, dummy,
         -divisor_limb, 0, divisor_limb);

      i = dividend_size - 1;
      r = dividend_ptr[i];

      if( r >= divisor_limb )
    r = 0;
      else
    quot_ptr[i--] = 0;

      for( ; i >= 0; i-- ) {
    n0 = dividend_ptr[i];
    UDIV_QRNND_PREINV( quot_ptr[i], r, r,
           n0, divisor_limb, divisor_limb_inverted);
      }
      return r;
  }
    }
    else {
  if(UDIV_NEEDS_NORMALIZATION) {
      int normalization_steps;

      count_leading_zeros (normalization_steps, divisor_limb);
      if( normalization_steps ) {
    divisor_limb <<= normalization_steps;

    n1 = dividend_ptr[dividend_size - 1];
    r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);

    /* Possible optimization:
     * if (r == 0
     * && divisor_limb > ((n1 << normalization_steps)
     *       | (dividend_ptr[dividend_size - 2] >> ...)))
     * ...one division less...
     */
    for( i = dividend_size - 2; i >= 0; i--) {
        n0 = dividend_ptr[i];
        udiv_qrnnd (quot_ptr[i + 1], r, r,
           ((n1 << normalization_steps)
       | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
        divisor_limb);
        n1 = n0;
    }
    udiv_qrnnd (quot_ptr[0], r, r,
          n1 << normalization_steps,
          divisor_limb);
    return r >> normalization_steps;
      }
  }
  /* No normalization needed, either because udiv_qrnnd doesn't require
   * it, or because DIVISOR_LIMB is already normalized.  */
  i = dividend_size - 1;
  r = dividend_ptr[i];

  if(r >= divisor_limb)
      r = 0;
  else
      quot_ptr[i--] = 0;

  for(; i >= 0; i--) {
      n0 = dividend_ptr[i];
      udiv_qrnnd( quot_ptr[i], r, r, n0, divisor_limb );
  }
  return r;
    }
}

Coverage Report

Created: 2022-12-08 06:10

Line	Count	Source (jump to first uncovered line)
1		/* mpih-div.c - MPI helper functions
2		* Copyright (C) 1994, 1996, 1998, 2000,
3		* 2001, 2002 Free Software Foundation, Inc.
4		*
5		* This file is part of Libgcrypt.
6		*
7		* Libgcrypt is free software; you can redistribute it and/or modify
8		* it under the terms of the GNU Lesser General Public License as
9		* published by the Free Software Foundation; either version 2.1 of
10		* the License, or (at your option) any later version.
11		*
12		* Libgcrypt is distributed in the hope that it will be useful,
13		* but WITHOUT ANY WARRANTY; without even the implied warranty of
14		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15		* GNU Lesser General Public License for more details.
16		*
17		* You should have received a copy of the GNU Lesser General Public
18		* License along with this program; if not, write to the Free Software
19		* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
20		*
21		* Note: This code is heavily based on the GNU MP Library.
22		* Actually it's the same code with only minor changes in the
23		* way the data is stored; this is to support the abstraction
24		* of an optional secure memory allocation which may be used
25		* to avoid revealing of sensitive data due to paging etc.
26		*/
27
28		#include <config.h>
29		#include <stdio.h>
30		#include <stdlib.h>
31		#include "mpi-internal.h"
32		#include "longlong.h"
33
34		#ifndef UMUL_TIME
35		#define UMUL_TIME 1
36		#endif
37		#ifndef UDIV_TIME
38		#define UDIV_TIME UMUL_TIME
39		#endif
40
41		/* FIXME: We should be using invert_limb (or invert_normalized_limb)
42		* here (not udiv_qrnnd).
43		*/
44
45		mpi_limb_t
46		_gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
47		mpi_limb_t divisor_limb)
48	0	{
49	0	mpi_size_t i;
50	0	mpi_limb_t n1, n0, r;
51	0	mpi_limb_t dummy GCC_ATTR_UNUSED;
52
53		/* Botch: Should this be handled at all? Rely on callers? */
54	0	if( !dividend_size )
55	0	return 0;
56
57		/* If multiplication is much faster than division, and the
58		* dividend is large, pre-invert the divisor, and use
59		* only multiplications in the inner loop.
60		*
61		* This test should be read:
62		* Does it ever help to use udiv_qrnnd_preinv?
63		* && Does what we save compensate for the inversion overhead?
64		*/
65	0	if( UDIV_TIME > (2 * UMUL_TIME + 6)
66	0	&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
67	0	int normalization_steps;
68
69	0	count_leading_zeros( normalization_steps, divisor_limb );
70	0	if( normalization_steps ) {
71	0	mpi_limb_t divisor_limb_inverted;
72
73	0	divisor_limb <<= normalization_steps;
74
75		/* Compute (22N - 2N * DIVISOR_LIMB) / DIVISOR_LIMB. The
76		* result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
77		* most significant bit (with weight 2**N) implicit.
78		*
79		* Special case for DIVISOR_LIMB == 100...000.
80		*/
81	0	if( !(divisor_limb << 1) )
82	0	divisor_limb_inverted = ~(mpi_limb_t)0;
83	0	else
84	0	udiv_qrnnd(divisor_limb_inverted, dummy,
85	0	-divisor_limb, 0, divisor_limb);
86
87	0	n1 = dividend_ptr[dividend_size - 1];
88	0	r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
89
90		/* Possible optimization:
91		* if (r == 0
92		* && divisor_limb > ((n1 << normalization_steps)
93		* \| (dividend_ptr[dividend_size - 2] >> ...)))
94		* ...one division less...
95		*/
96	0	for( i = dividend_size - 2; i >= 0; i--) {
97	0	n0 = dividend_ptr[i];
98	0	UDIV_QRNND_PREINV(dummy, r, r,
99	0	((n1 << normalization_steps)
100	0	\| (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
101	0	divisor_limb, divisor_limb_inverted);
102	0	n1 = n0;
103	0	}
104	0	UDIV_QRNND_PREINV(dummy, r, r,
105	0	n1 << normalization_steps,
106	0	divisor_limb, divisor_limb_inverted);
107	0	return r >> normalization_steps;
108	0	}
109	0	else {
110	0	mpi_limb_t divisor_limb_inverted;
111
112		/* Compute (22N - 2N * DIVISOR_LIMB) / DIVISOR_LIMB. The
113		* result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
114		* most significant bit (with weight 2**N) implicit.
115		*
116		* Special case for DIVISOR_LIMB == 100...000.
117		*/
118	0	if( !(divisor_limb << 1) )
119	0	divisor_limb_inverted = ~(mpi_limb_t)0;
120	0	else
121	0	udiv_qrnnd(divisor_limb_inverted, dummy,
122	0	-divisor_limb, 0, divisor_limb);
123
124	0	i = dividend_size - 1;
125	0	r = dividend_ptr[i];
126
127	0	if( r >= divisor_limb )
128	0	r = 0;
129	0	else
130	0	i--;
131
132	0	for( ; i >= 0; i--) {
133	0	n0 = dividend_ptr[i];
134	0	UDIV_QRNND_PREINV(dummy, r, r,
135	0	n0, divisor_limb, divisor_limb_inverted);
136	0	}
137	0	return r;
138	0	}
139	0	}
140	0	else {
141	0	if( UDIV_NEEDS_NORMALIZATION ) {
142	0	int normalization_steps;
143
144	0	count_leading_zeros(normalization_steps, divisor_limb);
145	0	if( normalization_steps ) {
146	0	divisor_limb <<= normalization_steps;
147
148	0	n1 = dividend_ptr[dividend_size - 1];
149	0	r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
150
151		/* Possible optimization:
152		* if (r == 0
153		* && divisor_limb > ((n1 << normalization_steps)
154		* \| (dividend_ptr[dividend_size - 2] >> ...)))
155		* ...one division less...
156		*/
157	0	for(i = dividend_size - 2; i >= 0; i--) {
158	0	n0 = dividend_ptr[i];
159	0	udiv_qrnnd (dummy, r, r,
160	0	((n1 << normalization_steps)
161	0	\| (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
162	0	divisor_limb);
163	0	n1 = n0;
164	0	}
165	0	udiv_qrnnd (dummy, r, r,
166	0	n1 << normalization_steps,
167	0	divisor_limb);
168	0	return r >> normalization_steps;
169	0	}
170	0	}
171		/* No normalization needed, either because udiv_qrnnd doesn't require
172		* it, or because DIVISOR_LIMB is already normalized. */
173	0	i = dividend_size - 1;
174	0	r = dividend_ptr[i];
175
176	0	if(r >= divisor_limb)
177	0	r = 0;
178	0	else
179	0	i--;
180
181	0	for(; i >= 0; i--) {
182	0	n0 = dividend_ptr[i];
183	0	udiv_qrnnd (dummy, r, r, n0, divisor_limb);
184	0	}
185	0	return r;
186	0	}
187	0	}
188
189		/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
190		* the NSIZE-DSIZE least significant quotient limbs at QP
191		* and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
192		* non-zero, generate that many fraction bits and append them after the
193		* other quotient limbs.
194		* Return the most significant limb of the quotient, this is always 0 or 1.
195		*
196		* Preconditions:
197		* 0. NSIZE >= DSIZE.
198		* 1. The most significant bit of the divisor must be set.
199		* 2. QP must either not overlap with the input operands at all, or
200		* QP + DSIZE >= NP must hold true. (This means that it's
201		* possible to put the quotient in the high part of NUM, right after the
202		* remainder in NUM.
203		* 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
204		*/
205
206		mpi_limb_t
207		_gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
208		mpi_ptr_t np, mpi_size_t nsize,
209		mpi_ptr_t dp, mpi_size_t dsize)
210	17.5k	{
211	17.5k	mpi_limb_t most_significant_q_limb = 0;
212
213	17.5k	switch(dsize) {
214	0	case 0:
215	0	_gcry_divide_by_zero();
216	0	break;
217
218	0	case 1:
219	0	{
220	0	mpi_size_t i;
221	0	mpi_limb_t n1;
222	0	mpi_limb_t d;
223
224	0	d = dp[0];
225	0	n1 = np[nsize - 1];
226
227	0	if( n1 >= d ) {
228	0	n1 -= d;
229	0	most_significant_q_limb = 1;
230	0	}
231
232	0	qp += qextra_limbs;
233	0	for( i = nsize - 2; i >= 0; i--)
234	0	udiv_qrnnd( qp[i], n1, n1, np[i], d );
235	0	qp -= qextra_limbs;
236
237	0	for( i = qextra_limbs - 1; i >= 0; i-- )
238	0	udiv_qrnnd (qp[i], n1, n1, 0, d);
239
240	0	np[0] = n1;
241	0	}
242	0	break;
243
244	0	case 2:
245	0	{
246	0	mpi_size_t i;
247	0	mpi_limb_t n1, n0, n2;
248	0	mpi_limb_t d1, d0;
249
250	0	np += nsize - 2;
251	0	d1 = dp[1];
252	0	d0 = dp[0];
253	0	n1 = np[1];
254	0	n0 = np[0];
255
256	0	if( n1 >= d1 && (n1 > d1 \|\| n0 >= d0) ) {
257	0	sub_ddmmss (n1, n0, n1, n0, d1, d0);
258	0	most_significant_q_limb = 1;
259	0	}
260
261	0	for( i = qextra_limbs + nsize - 2 - 1; i >= 0; i-- ) {
262	0	mpi_limb_t q;
263	0	mpi_limb_t r;
264
265	0	if( i >= qextra_limbs )
266	0	np--;
267	0	else
268	0	np[0] = 0;
269
270	0	if( n1 == d1 ) {
271		/* Q should be either 111..111 or 111..110. Need special
272		* treatment of this rare case as normal division would
273		* give overflow. */
274	0	q = ~(mpi_limb_t)0;
275
276	0	r = n0 + d1;
277	0	if( r < d1 ) { /* Carry in the addition? */
278	0	add_ssaaaa( n1, n0, r - d0, np[0], 0, d0 );
279	0	qp[i] = q;
280	0	continue;
281	0	}
282	0	n1 = d0 - (d0 != 0?1:0);
283	0	n0 = -d0;
284	0	}
285	0	else {
286	0	udiv_qrnnd (q, r, n1, n0, d1);
287	0	umul_ppmm (n1, n0, d0, q);
288	0	}
289
290	0	n2 = np[0];
291	0	q_test:
292	0	if( n1 > r \|\| (n1 == r && n0 > n2) ) {
293		/* The estimated Q was too large. */
294	0	q--;
295	0	sub_ddmmss (n1, n0, n1, n0, 0, d0);
296	0	r += d1;
297	0	if( r >= d1 ) /* If not carry, test Q again. */
298	0	goto q_test;
299	0	}
300
301	0	qp[i] = q;
302	0	sub_ddmmss (n1, n0, r, n2, n1, n0);
303	0	}
304	0	np[1] = n1;
305	0	np[0] = n0;
306	0	}
307	0	break;
308
309	17.5k	default:
310	17.5k	{
311	17.5k	mpi_size_t i;
312	17.5k	mpi_limb_t dX, d1, n0;
313
314	17.5k	np += nsize - dsize;
315	17.5k	dX = dp[dsize - 1];
316	17.5k	d1 = dp[dsize - 2];
317	17.5k	n0 = np[dsize - 1];
318
319	17.5k	if( n0 >= dX ) {
320	0	if(n0 > dX \|\| _gcry_mpih_cmp(np, dp, dsize - 1) >= 0 ) {
321	0	_gcry_mpih_sub_n(np, np, dp, dsize);
322	0	n0 = np[dsize - 1];
323	0	most_significant_q_limb = 1;
324	0	}
325	0	}
326
327	71.6k	for( i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
328	54.1k	mpi_limb_t q;
329	54.1k	mpi_limb_t n1, n2;
330	54.1k	mpi_limb_t cy_limb;
331
332	54.1k	if( i >= qextra_limbs ) {
333	54.1k	np--;
334	54.1k	n2 = np[dsize];
335	54.1k	}
336	0	else {
337	0	n2 = np[dsize - 1];
338	0	MPN_COPY_DECR (np + 1, np, dsize - 1);
339	0	np[0] = 0;
340	0	}
341
342	54.1k	if( n0 == dX ) {
343		/* This might over-estimate q, but it's probably not worth
344		* the extra code here to find out. */
345	0	q = ~(mpi_limb_t)0;
346	0	}
347	54.1k	else {
348	54.1k	mpi_limb_t r;
349
350	54.1k	udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
351	54.1k	umul_ppmm(n1, n0, d1, q);
352
353	56.9k	while( n1 > r \|\| (n1 == r && n0 > np[dsize - 2])) {
354	4.08k	q--;
355	4.08k	r += dX;
356	4.08k	if( r < dX ) /* I.e. "carry in previous addition?" */
357	1.29k	break;
358	2.79k	n1 -= n0 < d1;
359	2.79k	n0 -= d1;
360	2.79k	}
361	54.1k	}
362
363		/* Possible optimization: We already have (q * n0) and (1 * n1)
364		* after the calculation of q. Taking advantage of that, we
365		* could make this loop make two iterations less. */
366	54.1k	cy_limb = _gcry_mpih_submul_1(np, dp, dsize, q);
367
368	54.1k	if( n2 != cy_limb ) {
369	0	_gcry_mpih_add_n(np, np, dp, dsize);
370	0	q--;
371	0	}
372
373	54.1k	qp[i] = q;
374	54.1k	n0 = np[dsize - 1];
375	54.1k	}
376	17.5k	}
377	17.5k	}
378
379	17.5k	return most_significant_q_limb;
380	17.5k	}
381
382
383		/****************
384		* Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
385		* Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
386		* Return the single-limb remainder.
387		* There are no constraints on the value of the divisor.
388		*
389		* QUOT_PTR and DIVIDEND_PTR might point to the same limb.
390		*/
391
392		mpi_limb_t
393		_gcry_mpih_divmod_1( mpi_ptr_t quot_ptr,
394		mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
395		mpi_limb_t divisor_limb)
396	0	{
397	0	mpi_size_t i;
398	0	mpi_limb_t n1, n0, r;
399	0	mpi_limb_t dummy GCC_ATTR_UNUSED;
400
401	0	if( !dividend_size )
402	0	return 0;
403
404		/* If multiplication is much faster than division, and the
405		* dividend is large, pre-invert the divisor, and use
406		* only multiplications in the inner loop.
407		*
408		* This test should be read:
409		* Does it ever help to use udiv_qrnnd_preinv?
410		* && Does what we save compensate for the inversion overhead?
411		*/
412	0	if( UDIV_TIME > (2 * UMUL_TIME + 6)
413	0	&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
414	0	int normalization_steps;
415
416	0	count_leading_zeros( normalization_steps, divisor_limb );
417	0	if( normalization_steps ) {
418	0	mpi_limb_t divisor_limb_inverted;
419
420	0	divisor_limb <<= normalization_steps;
421
422		/* Compute (22N - 2N * DIVISOR_LIMB) / DIVISOR_LIMB. The
423		* result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
424		* most significant bit (with weight 2**N) implicit.
425		*/
426		/* Special case for DIVISOR_LIMB == 100...000. */
427	0	if( !(divisor_limb << 1) )
428	0	divisor_limb_inverted = ~(mpi_limb_t)0;
429	0	else
430	0	udiv_qrnnd(divisor_limb_inverted, dummy,
431	0	-divisor_limb, 0, divisor_limb);
432
433	0	n1 = dividend_ptr[dividend_size - 1];
434	0	r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
435
436		/* Possible optimization:
437		* if (r == 0
438		* && divisor_limb > ((n1 << normalization_steps)
439		* \| (dividend_ptr[dividend_size - 2] >> ...)))
440		* ...one division less...
441		*/
442	0	for( i = dividend_size - 2; i >= 0; i--) {
443	0	n0 = dividend_ptr[i];
444	0	UDIV_QRNND_PREINV( quot_ptr[i + 1], r, r,
445	0	((n1 << normalization_steps)
446	0	\| (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
447	0	divisor_limb, divisor_limb_inverted);
448	0	n1 = n0;
449	0	}
450	0	UDIV_QRNND_PREINV( quot_ptr[0], r, r,
451	0	n1 << normalization_steps,
452	0	divisor_limb, divisor_limb_inverted);
453	0	return r >> normalization_steps;
454	0	}
455	0	else {
456	0	mpi_limb_t divisor_limb_inverted;
457
458		/* Compute (22N - 2N * DIVISOR_LIMB) / DIVISOR_LIMB. The
459		* result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
460		* most significant bit (with weight 2**N) implicit.
461		*/
462		/* Special case for DIVISOR_LIMB == 100...000. */
463	0	if( !(divisor_limb << 1) )
464	0	divisor_limb_inverted = ~(mpi_limb_t) 0;
465	0	else
466	0	udiv_qrnnd(divisor_limb_inverted, dummy,
467	0	-divisor_limb, 0, divisor_limb);
468
469	0	i = dividend_size - 1;
470	0	r = dividend_ptr[i];
471
472	0	if( r >= divisor_limb )
473	0	r = 0;
474	0	else
475	0	quot_ptr[i--] = 0;
476
477	0	for( ; i >= 0; i-- ) {
478	0	n0 = dividend_ptr[i];
479	0	UDIV_QRNND_PREINV( quot_ptr[i], r, r,
480	0	n0, divisor_limb, divisor_limb_inverted);
481	0	}
482	0	return r;
483	0	}
484	0	}
485	0	else {
486	0	if(UDIV_NEEDS_NORMALIZATION) {
487	0	int normalization_steps;
488
489	0	count_leading_zeros (normalization_steps, divisor_limb);
490	0	if( normalization_steps ) {
491	0	divisor_limb <<= normalization_steps;
492
493	0	n1 = dividend_ptr[dividend_size - 1];
494	0	r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
495
496		/* Possible optimization:
497		* if (r == 0
498		* && divisor_limb > ((n1 << normalization_steps)
499		* \| (dividend_ptr[dividend_size - 2] >> ...)))
500		* ...one division less...
501		*/
502	0	for( i = dividend_size - 2; i >= 0; i--) {
503	0	n0 = dividend_ptr[i];
504	0	udiv_qrnnd (quot_ptr[i + 1], r, r,
505	0	((n1 << normalization_steps)
506	0	\| (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
507	0	divisor_limb);
508	0	n1 = n0;
509	0	}
510	0	udiv_qrnnd (quot_ptr[0], r, r,
511	0	n1 << normalization_steps,
512	0	divisor_limb);
513	0	return r >> normalization_steps;
514	0	}
515	0	}
516		/* No normalization needed, either because udiv_qrnnd doesn't require
517		* it, or because DIVISOR_LIMB is already normalized. */
518	0	i = dividend_size - 1;
519	0	r = dividend_ptr[i];
520
521	0	if(r >= divisor_limb)
522	0	r = 0;
523	0	else
524	0	quot_ptr[i--] = 0;
525
526	0	for(; i >= 0; i--) {
527	0	n0 = dividend_ptr[i];
528	0	udiv_qrnnd( quot_ptr[i], r, r, n0, divisor_limb );
529	0	}
530	0	return r;
531	0	}
532	0	}