Home | History | Annotate | Line # | Download | only in generic
sbpi1_div_q.c revision 1.1
      1 /* mpn_sbpi1_div_q -- Schoolbook division using the Mller-Granlund 3/2
      2    division algorithm.
      3 
      4    Contributed to the GNU project by Torbjorn Granlund.
      5 
      6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
      9 
     10 Copyright 2007, 2009 Free Software Foundation, Inc.
     11 
     12 This file is part of the GNU MP Library.
     13 
     14 The GNU MP Library is free software; you can redistribute it and/or modify
     15 it under the terms of the GNU Lesser General Public License as published by
     16 the Free Software Foundation; either version 3 of the License, or (at your
     17 option) any later version.
     18 
     19 The GNU MP Library is distributed in the hope that it will be useful, but
     20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     21 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     22 License for more details.
     23 
     24 You should have received a copy of the GNU Lesser General Public License
     25 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     26 
     27 
     28 #include "gmp.h"
     29 #include "gmp-impl.h"
     30 #include "longlong.h"
     31 
     32 mp_limb_t
     33 mpn_sbpi1_div_q (mp_ptr qp,
     34 		 mp_ptr np, mp_size_t nn,
     35 		 mp_srcptr dp, mp_size_t dn,
     36 		 mp_limb_t dinv)
     37 {
     38   mp_limb_t qh;
     39   mp_size_t qn, i;
     40   mp_limb_t n1, n0;
     41   mp_limb_t d1, d0;
     42   mp_limb_t cy, cy1;
     43   mp_limb_t q;
     44   mp_limb_t flag;
     45 
     46   mp_size_t dn_orig = dn;
     47   mp_srcptr dp_orig = dp;
     48   mp_ptr np_orig = np;
     49 
     50   ASSERT (dn > 2);
     51   ASSERT (nn >= dn);
     52   ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
     53 
     54   np += nn;
     55 
     56   qn = nn - dn;
     57   if (qn + 1 < dn)
     58     {
     59       dp += dn - (qn + 1);
     60       dn = qn + 1;
     61     }
     62 
     63   qh = mpn_cmp (np - dn, dp, dn) >= 0;
     64   if (qh != 0)
     65     mpn_sub_n (np - dn, np - dn, dp, dn);
     66 
     67   qp += qn;
     68 
     69   dn -= 2;			/* offset dn by 2 for main division loops,
     70 				   saving two iterations in mpn_submul_1.  */
     71   d1 = dp[dn + 1];
     72   d0 = dp[dn + 0];
     73 
     74   np -= 2;
     75 
     76   n1 = np[1];
     77 
     78   for (i = qn - (dn + 2); i >= 0; i--)
     79     {
     80       np--;
     81       if (UNLIKELY (n1 == d1) && np[1] == d0)
     82 	{
     83 	  q = GMP_NUMB_MASK;
     84 	  mpn_submul_1 (np - dn, dp, dn + 2, q);
     85 	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
     86 	}
     87       else
     88 	{
     89 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
     90 
     91 	  cy = mpn_submul_1 (np - dn, dp, dn, q);
     92 
     93 	  cy1 = n0 < cy;
     94 	  n0 = (n0 - cy) & GMP_NUMB_MASK;
     95 	  cy = n1 < cy1;
     96 	  n1 -= cy1;
     97 	  np[0] = n0;
     98 
     99 	  if (UNLIKELY (cy != 0))
    100 	    {
    101 	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
    102 	      q--;
    103 	    }
    104 	}
    105 
    106       *--qp = q;
    107     }
    108 
    109   flag = ~CNST_LIMB(0);
    110 
    111   if (dn >= 0)
    112     {
    113       for (i = dn; i > 0; i--)
    114 	{
    115 	  np--;
    116 	  if (UNLIKELY (n1 >= (d1 & flag)))
    117 	    {
    118 	      q = GMP_NUMB_MASK;
    119 	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
    120 
    121 	      if (UNLIKELY (n1 != cy))
    122 		{
    123 		  if (n1 < (cy & flag))
    124 		    {
    125 		      q--;
    126 		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
    127 		    }
    128 		  else
    129 		    flag = 0;
    130 		}
    131 	      n1 = np[1];
    132 	    }
    133 	  else
    134 	    {
    135 	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
    136 
    137 	      cy = mpn_submul_1 (np - dn, dp, dn, q);
    138 
    139 	      cy1 = n0 < cy;
    140 	      n0 = (n0 - cy) & GMP_NUMB_MASK;
    141 	      cy = n1 < cy1;
    142 	      n1 -= cy1;
    143 	      np[0] = n0;
    144 
    145 	      if (UNLIKELY (cy != 0))
    146 		{
    147 		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
    148 		  q--;
    149 		}
    150 	    }
    151 
    152 	  *--qp = q;
    153 
    154 	  /* Truncate operands.  */
    155 	  dn--;
    156 	  dp++;
    157 	}
    158 
    159       np--;
    160       if (UNLIKELY (n1 >= (d1 & flag)))
    161 	{
    162 	  q = GMP_NUMB_MASK;
    163 	  cy = mpn_submul_1 (np, dp, 2, q);
    164 
    165 	  if (UNLIKELY (n1 != cy))
    166 	    {
    167 	      if (n1 < (cy & flag))
    168 		{
    169 		  q--;
    170 		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
    171 		}
    172 	      else
    173 		flag = 0;
    174 	    }
    175 	  n1 = np[1];
    176 	}
    177       else
    178 	{
    179 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
    180 
    181 	  np[0] = n0;
    182 	  np[1] = n1;
    183 	}
    184 
    185       *--qp = q;
    186     }
    187   ASSERT_ALWAYS (np[1] == n1);
    188   np += 2;
    189 
    190 
    191   dn = dn_orig;
    192   if (UNLIKELY (n1 < (dn & flag)))
    193     {
    194       mp_limb_t q, x;
    195 
    196       /* The quotient may be too large if the remainder is small.  Recompute
    197 	 for above ignored operand parts, until the remainder spills.
    198 
    199 	 FIXME: The quality of this code isn't the same as the code above.
    200 	 1. We don't compute things in an optimal order, high-to-low, in order
    201 	    to terminate as quickly as possible.
    202 	 2. We mess with pointers and sizes, adding and subtracting and
    203 	    adjusting to get things right.  It surely could be streamlined.
    204 	 3. The only termination criteria are that we determine that the
    205 	    quotient needs to be adjusted, or that we have recomputed
    206 	    everything.  We should stop when the remainder is so large
    207 	    that no additional subtracting could make it spill.
    208 	 4. If nothing else, we should not do two loops of submul_1 over the
    209 	    data, instead handle both the triangularization and chopping at
    210 	    once.  */
    211 
    212       x = n1;
    213 
    214       if (dn > 2)
    215 	{
    216 	  /* Compensate for triangularization.  */
    217 	  mp_limb_t y;
    218 
    219 	  dp = dp_orig;
    220 	  if (qn + 1 < dn)
    221 	    {
    222 	      dp += dn - (qn + 1);
    223 	      dn = qn + 1;
    224 	    }
    225 
    226 	  y = np[-2];
    227 
    228 	  for (i = dn - 3; i >= 0; i--)
    229 	    {
    230 	      q = qp[i];
    231 	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
    232 
    233 	      if (y < cy)
    234 		{
    235 		  if (x == 0)
    236 		    {
    237 		      cy = mpn_sub_1 (qp, qp, qn, 1);
    238 		      ASSERT_ALWAYS (cy == 0);
    239 		      return qh - cy;
    240 		    }
    241 		  x--;
    242 		}
    243 	      y -= cy;
    244 	    }
    245 	  np[-2] = y;
    246 	}
    247 
    248       dn = dn_orig;
    249       if (qn + 1 < dn)
    250 	{
    251 	  /* Compensate for ignored dividend and divisor tails.  */
    252 
    253 	  dp = dp_orig;
    254 	  np = np_orig;
    255 
    256 	  if (qh != 0)
    257 	    {
    258 	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
    259 	      if (cy != 0)
    260 		{
    261 		  if (x == 0)
    262 		    {
    263 		      if (qn != 0)
    264 			cy = mpn_sub_1 (qp, qp, qn, 1);
    265 		      return qh - cy;
    266 		    }
    267 		  x--;
    268 		}
    269 	    }
    270 
    271 	  if (qn == 0)
    272 	    return qh;
    273 
    274 	  for (i = dn - qn - 2; i >= 0; i--)
    275 	    {
    276 	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
    277 	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
    278 	      if (cy != 0)
    279 		{
    280 		  if (x == 0)
    281 		    {
    282 		      cy = mpn_sub_1 (qp, qp, qn, 1);
    283 		      return qh;
    284 		    }
    285 		  x--;
    286 		}
    287 	    }
    288 	}
    289     }
    290 
    291   return qh;
    292 }
    293