Home | History | Annotate | Line # | Download | only in generic
      1  1.1.1.2  mrg /* mpn_sbpi1_div_q -- Schoolbook division using the Mller-Granlund 3/2
      2      1.1  mrg    division algorithm.
      3      1.1  mrg 
      4      1.1  mrg    Contributed to the GNU project by Torbjorn Granlund.
      5      1.1  mrg 
      6      1.1  mrg    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      7      1.1  mrg    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      8      1.1  mrg    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
      9      1.1  mrg 
     10      1.1  mrg Copyright 2007, 2009 Free Software Foundation, Inc.
     11      1.1  mrg 
     12      1.1  mrg This file is part of the GNU MP Library.
     13      1.1  mrg 
     14      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     15  1.1.1.2  mrg it under the terms of either:
     16  1.1.1.2  mrg 
     17  1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
     18  1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
     19  1.1.1.2  mrg     option) any later version.
     20  1.1.1.2  mrg 
     21  1.1.1.2  mrg or
     22  1.1.1.2  mrg 
     23  1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
     24  1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
     25  1.1.1.2  mrg     later version.
     26  1.1.1.2  mrg 
     27  1.1.1.2  mrg or both in parallel, as here.
     28      1.1  mrg 
     29      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     30      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     31  1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     32  1.1.1.2  mrg for more details.
     33      1.1  mrg 
     34  1.1.1.2  mrg You should have received copies of the GNU General Public License and the
     35  1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     36  1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
     37      1.1  mrg 
     38      1.1  mrg 
     39      1.1  mrg #include "gmp-impl.h"
     40      1.1  mrg #include "longlong.h"
     41      1.1  mrg 
     42      1.1  mrg mp_limb_t
     43      1.1  mrg mpn_sbpi1_div_q (mp_ptr qp,
     44      1.1  mrg 		 mp_ptr np, mp_size_t nn,
     45      1.1  mrg 		 mp_srcptr dp, mp_size_t dn,
     46      1.1  mrg 		 mp_limb_t dinv)
     47      1.1  mrg {
     48      1.1  mrg   mp_limb_t qh;
     49      1.1  mrg   mp_size_t qn, i;
     50      1.1  mrg   mp_limb_t n1, n0;
     51      1.1  mrg   mp_limb_t d1, d0;
     52      1.1  mrg   mp_limb_t cy, cy1;
     53      1.1  mrg   mp_limb_t q;
     54      1.1  mrg   mp_limb_t flag;
     55      1.1  mrg 
     56      1.1  mrg   mp_size_t dn_orig = dn;
     57      1.1  mrg   mp_srcptr dp_orig = dp;
     58      1.1  mrg   mp_ptr np_orig = np;
     59      1.1  mrg 
     60      1.1  mrg   ASSERT (dn > 2);
     61      1.1  mrg   ASSERT (nn >= dn);
     62      1.1  mrg   ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
     63      1.1  mrg 
     64      1.1  mrg   np += nn;
     65      1.1  mrg 
     66      1.1  mrg   qn = nn - dn;
     67      1.1  mrg   if (qn + 1 < dn)
     68      1.1  mrg     {
     69      1.1  mrg       dp += dn - (qn + 1);
     70      1.1  mrg       dn = qn + 1;
     71      1.1  mrg     }
     72      1.1  mrg 
     73      1.1  mrg   qh = mpn_cmp (np - dn, dp, dn) >= 0;
     74      1.1  mrg   if (qh != 0)
     75      1.1  mrg     mpn_sub_n (np - dn, np - dn, dp, dn);
     76      1.1  mrg 
     77      1.1  mrg   qp += qn;
     78      1.1  mrg 
     79      1.1  mrg   dn -= 2;			/* offset dn by 2 for main division loops,
     80      1.1  mrg 				   saving two iterations in mpn_submul_1.  */
     81      1.1  mrg   d1 = dp[dn + 1];
     82      1.1  mrg   d0 = dp[dn + 0];
     83      1.1  mrg 
     84      1.1  mrg   np -= 2;
     85      1.1  mrg 
     86      1.1  mrg   n1 = np[1];
     87      1.1  mrg 
     88      1.1  mrg   for (i = qn - (dn + 2); i >= 0; i--)
     89      1.1  mrg     {
     90      1.1  mrg       np--;
     91      1.1  mrg       if (UNLIKELY (n1 == d1) && np[1] == d0)
     92      1.1  mrg 	{
     93      1.1  mrg 	  q = GMP_NUMB_MASK;
     94      1.1  mrg 	  mpn_submul_1 (np - dn, dp, dn + 2, q);
     95      1.1  mrg 	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
     96      1.1  mrg 	}
     97      1.1  mrg       else
     98      1.1  mrg 	{
     99      1.1  mrg 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
    100      1.1  mrg 
    101      1.1  mrg 	  cy = mpn_submul_1 (np - dn, dp, dn, q);
    102      1.1  mrg 
    103      1.1  mrg 	  cy1 = n0 < cy;
    104      1.1  mrg 	  n0 = (n0 - cy) & GMP_NUMB_MASK;
    105      1.1  mrg 	  cy = n1 < cy1;
    106      1.1  mrg 	  n1 -= cy1;
    107      1.1  mrg 	  np[0] = n0;
    108      1.1  mrg 
    109      1.1  mrg 	  if (UNLIKELY (cy != 0))
    110      1.1  mrg 	    {
    111      1.1  mrg 	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
    112      1.1  mrg 	      q--;
    113      1.1  mrg 	    }
    114      1.1  mrg 	}
    115      1.1  mrg 
    116      1.1  mrg       *--qp = q;
    117      1.1  mrg     }
    118      1.1  mrg 
    119      1.1  mrg   flag = ~CNST_LIMB(0);
    120      1.1  mrg 
    121      1.1  mrg   if (dn >= 0)
    122      1.1  mrg     {
    123      1.1  mrg       for (i = dn; i > 0; i--)
    124      1.1  mrg 	{
    125      1.1  mrg 	  np--;
    126      1.1  mrg 	  if (UNLIKELY (n1 >= (d1 & flag)))
    127      1.1  mrg 	    {
    128      1.1  mrg 	      q = GMP_NUMB_MASK;
    129      1.1  mrg 	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
    130      1.1  mrg 
    131      1.1  mrg 	      if (UNLIKELY (n1 != cy))
    132      1.1  mrg 		{
    133      1.1  mrg 		  if (n1 < (cy & flag))
    134      1.1  mrg 		    {
    135      1.1  mrg 		      q--;
    136      1.1  mrg 		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
    137      1.1  mrg 		    }
    138      1.1  mrg 		  else
    139      1.1  mrg 		    flag = 0;
    140      1.1  mrg 		}
    141      1.1  mrg 	      n1 = np[1];
    142      1.1  mrg 	    }
    143      1.1  mrg 	  else
    144      1.1  mrg 	    {
    145      1.1  mrg 	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
    146      1.1  mrg 
    147      1.1  mrg 	      cy = mpn_submul_1 (np - dn, dp, dn, q);
    148      1.1  mrg 
    149      1.1  mrg 	      cy1 = n0 < cy;
    150      1.1  mrg 	      n0 = (n0 - cy) & GMP_NUMB_MASK;
    151      1.1  mrg 	      cy = n1 < cy1;
    152      1.1  mrg 	      n1 -= cy1;
    153      1.1  mrg 	      np[0] = n0;
    154      1.1  mrg 
    155      1.1  mrg 	      if (UNLIKELY (cy != 0))
    156      1.1  mrg 		{
    157      1.1  mrg 		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
    158      1.1  mrg 		  q--;
    159      1.1  mrg 		}
    160      1.1  mrg 	    }
    161      1.1  mrg 
    162      1.1  mrg 	  *--qp = q;
    163      1.1  mrg 
    164      1.1  mrg 	  /* Truncate operands.  */
    165      1.1  mrg 	  dn--;
    166      1.1  mrg 	  dp++;
    167      1.1  mrg 	}
    168      1.1  mrg 
    169      1.1  mrg       np--;
    170      1.1  mrg       if (UNLIKELY (n1 >= (d1 & flag)))
    171      1.1  mrg 	{
    172      1.1  mrg 	  q = GMP_NUMB_MASK;
    173      1.1  mrg 	  cy = mpn_submul_1 (np, dp, 2, q);
    174      1.1  mrg 
    175      1.1  mrg 	  if (UNLIKELY (n1 != cy))
    176      1.1  mrg 	    {
    177      1.1  mrg 	      if (n1 < (cy & flag))
    178      1.1  mrg 		{
    179      1.1  mrg 		  q--;
    180      1.1  mrg 		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
    181      1.1  mrg 		}
    182      1.1  mrg 	      else
    183      1.1  mrg 		flag = 0;
    184      1.1  mrg 	    }
    185      1.1  mrg 	  n1 = np[1];
    186      1.1  mrg 	}
    187      1.1  mrg       else
    188      1.1  mrg 	{
    189      1.1  mrg 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
    190      1.1  mrg 
    191      1.1  mrg 	  np[0] = n0;
    192      1.1  mrg 	  np[1] = n1;
    193      1.1  mrg 	}
    194      1.1  mrg 
    195      1.1  mrg       *--qp = q;
    196      1.1  mrg     }
    197      1.1  mrg   ASSERT_ALWAYS (np[1] == n1);
    198      1.1  mrg   np += 2;
    199      1.1  mrg 
    200      1.1  mrg 
    201      1.1  mrg   dn = dn_orig;
    202      1.1  mrg   if (UNLIKELY (n1 < (dn & flag)))
    203      1.1  mrg     {
    204      1.1  mrg       mp_limb_t q, x;
    205      1.1  mrg 
    206      1.1  mrg       /* The quotient may be too large if the remainder is small.  Recompute
    207      1.1  mrg 	 for above ignored operand parts, until the remainder spills.
    208      1.1  mrg 
    209      1.1  mrg 	 FIXME: The quality of this code isn't the same as the code above.
    210      1.1  mrg 	 1. We don't compute things in an optimal order, high-to-low, in order
    211      1.1  mrg 	    to terminate as quickly as possible.
    212      1.1  mrg 	 2. We mess with pointers and sizes, adding and subtracting and
    213      1.1  mrg 	    adjusting to get things right.  It surely could be streamlined.
    214      1.1  mrg 	 3. The only termination criteria are that we determine that the
    215      1.1  mrg 	    quotient needs to be adjusted, or that we have recomputed
    216      1.1  mrg 	    everything.  We should stop when the remainder is so large
    217      1.1  mrg 	    that no additional subtracting could make it spill.
    218      1.1  mrg 	 4. If nothing else, we should not do two loops of submul_1 over the
    219      1.1  mrg 	    data, instead handle both the triangularization and chopping at
    220      1.1  mrg 	    once.  */
    221      1.1  mrg 
    222      1.1  mrg       x = n1;
    223      1.1  mrg 
    224      1.1  mrg       if (dn > 2)
    225      1.1  mrg 	{
    226      1.1  mrg 	  /* Compensate for triangularization.  */
    227      1.1  mrg 	  mp_limb_t y;
    228      1.1  mrg 
    229      1.1  mrg 	  dp = dp_orig;
    230      1.1  mrg 	  if (qn + 1 < dn)
    231      1.1  mrg 	    {
    232      1.1  mrg 	      dp += dn - (qn + 1);
    233      1.1  mrg 	      dn = qn + 1;
    234      1.1  mrg 	    }
    235      1.1  mrg 
    236      1.1  mrg 	  y = np[-2];
    237      1.1  mrg 
    238      1.1  mrg 	  for (i = dn - 3; i >= 0; i--)
    239      1.1  mrg 	    {
    240      1.1  mrg 	      q = qp[i];
    241      1.1  mrg 	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
    242      1.1  mrg 
    243      1.1  mrg 	      if (y < cy)
    244      1.1  mrg 		{
    245      1.1  mrg 		  if (x == 0)
    246      1.1  mrg 		    {
    247      1.1  mrg 		      cy = mpn_sub_1 (qp, qp, qn, 1);
    248      1.1  mrg 		      ASSERT_ALWAYS (cy == 0);
    249      1.1  mrg 		      return qh - cy;
    250      1.1  mrg 		    }
    251      1.1  mrg 		  x--;
    252      1.1  mrg 		}
    253      1.1  mrg 	      y -= cy;
    254      1.1  mrg 	    }
    255      1.1  mrg 	  np[-2] = y;
    256      1.1  mrg 	}
    257      1.1  mrg 
    258      1.1  mrg       dn = dn_orig;
    259      1.1  mrg       if (qn + 1 < dn)
    260      1.1  mrg 	{
    261      1.1  mrg 	  /* Compensate for ignored dividend and divisor tails.  */
    262      1.1  mrg 
    263      1.1  mrg 	  dp = dp_orig;
    264      1.1  mrg 	  np = np_orig;
    265      1.1  mrg 
    266      1.1  mrg 	  if (qh != 0)
    267      1.1  mrg 	    {
    268      1.1  mrg 	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
    269      1.1  mrg 	      if (cy != 0)
    270      1.1  mrg 		{
    271      1.1  mrg 		  if (x == 0)
    272      1.1  mrg 		    {
    273      1.1  mrg 		      if (qn != 0)
    274      1.1  mrg 			cy = mpn_sub_1 (qp, qp, qn, 1);
    275      1.1  mrg 		      return qh - cy;
    276      1.1  mrg 		    }
    277      1.1  mrg 		  x--;
    278      1.1  mrg 		}
    279      1.1  mrg 	    }
    280      1.1  mrg 
    281      1.1  mrg 	  if (qn == 0)
    282      1.1  mrg 	    return qh;
    283      1.1  mrg 
    284      1.1  mrg 	  for (i = dn - qn - 2; i >= 0; i--)
    285      1.1  mrg 	    {
    286      1.1  mrg 	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
    287      1.1  mrg 	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
    288      1.1  mrg 	      if (cy != 0)
    289      1.1  mrg 		{
    290      1.1  mrg 		  if (x == 0)
    291      1.1  mrg 		    {
    292      1.1  mrg 		      cy = mpn_sub_1 (qp, qp, qn, 1);
    293      1.1  mrg 		      return qh;
    294      1.1  mrg 		    }
    295      1.1  mrg 		  x--;
    296      1.1  mrg 		}
    297      1.1  mrg 	    }
    298      1.1  mrg 	}
    299      1.1  mrg     }
    300      1.1  mrg 
    301      1.1  mrg   return qh;
    302      1.1  mrg }
    303