Home | History | Annotate | Line # | Download | only in generic
sbpi1_div_q.c revision 1.1.1.3
      1 /* mpn_sbpi1_div_q -- Schoolbook division using the Mller-Granlund 3/2
      2    division algorithm.
      3 
      4    Contributed to the GNU project by Torbjorn Granlund.
      5 
      6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
      9 
     10 Copyright 2007, 2009 Free Software Foundation, Inc.
     11 
     12 This file is part of the GNU MP Library.
     13 
     14 The GNU MP Library is free software; you can redistribute it and/or modify
     15 it under the terms of either:
     16 
     17   * the GNU Lesser General Public License as published by the Free
     18     Software Foundation; either version 3 of the License, or (at your
     19     option) any later version.
     20 
     21 or
     22 
     23   * the GNU General Public License as published by the Free Software
     24     Foundation; either version 2 of the License, or (at your option) any
     25     later version.
     26 
     27 or both in parallel, as here.
     28 
     29 The GNU MP Library is distributed in the hope that it will be useful, but
     30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     32 for more details.
     33 
     34 You should have received copies of the GNU General Public License and the
     35 GNU Lesser General Public License along with the GNU MP Library.  If not,
     36 see https://www.gnu.org/licenses/.  */
     37 
     38 
     39 #include "gmp-impl.h"
     40 #include "longlong.h"
     41 
     42 mp_limb_t
     43 mpn_sbpi1_div_q (mp_ptr qp,
     44 		 mp_ptr np, mp_size_t nn,
     45 		 mp_srcptr dp, mp_size_t dn,
     46 		 mp_limb_t dinv)
     47 {
     48   mp_limb_t qh;
     49   mp_size_t qn, i;
     50   mp_limb_t n1, n0;
     51   mp_limb_t d1, d0;
     52   mp_limb_t cy, cy1;
     53   mp_limb_t q;
     54   mp_limb_t flag;
     55 
     56   mp_size_t dn_orig = dn;
     57   mp_srcptr dp_orig = dp;
     58   mp_ptr np_orig = np;
     59 
     60   ASSERT (dn > 2);
     61   ASSERT (nn >= dn);
     62   ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
     63 
     64   np += nn;
     65 
     66   qn = nn - dn;
     67   if (qn + 1 < dn)
     68     {
     69       dp += dn - (qn + 1);
     70       dn = qn + 1;
     71     }
     72 
     73   qh = mpn_cmp (np - dn, dp, dn) >= 0;
     74   if (qh != 0)
     75     mpn_sub_n (np - dn, np - dn, dp, dn);
     76 
     77   qp += qn;
     78 
     79   dn -= 2;			/* offset dn by 2 for main division loops,
     80 				   saving two iterations in mpn_submul_1.  */
     81   d1 = dp[dn + 1];
     82   d0 = dp[dn + 0];
     83 
     84   np -= 2;
     85 
     86   n1 = np[1];
     87 
     88   for (i = qn - (dn + 2); i >= 0; i--)
     89     {
     90       np--;
     91       if (UNLIKELY (n1 == d1) && np[1] == d0)
     92 	{
     93 	  q = GMP_NUMB_MASK;
     94 	  mpn_submul_1 (np - dn, dp, dn + 2, q);
     95 	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
     96 	}
     97       else
     98 	{
     99 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
    100 
    101 	  cy = mpn_submul_1 (np - dn, dp, dn, q);
    102 
    103 	  cy1 = n0 < cy;
    104 	  n0 = (n0 - cy) & GMP_NUMB_MASK;
    105 	  cy = n1 < cy1;
    106 	  n1 -= cy1;
    107 	  np[0] = n0;
    108 
    109 	  if (UNLIKELY (cy != 0))
    110 	    {
    111 	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
    112 	      q--;
    113 	    }
    114 	}
    115 
    116       *--qp = q;
    117     }
    118 
    119   flag = ~CNST_LIMB(0);
    120 
    121   if (dn >= 0)
    122     {
    123       for (i = dn; i > 0; i--)
    124 	{
    125 	  np--;
    126 	  if (UNLIKELY (n1 >= (d1 & flag)))
    127 	    {
    128 	      q = GMP_NUMB_MASK;
    129 	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
    130 
    131 	      if (UNLIKELY (n1 != cy))
    132 		{
    133 		  if (n1 < (cy & flag))
    134 		    {
    135 		      q--;
    136 		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
    137 		    }
    138 		  else
    139 		    flag = 0;
    140 		}
    141 	      n1 = np[1];
    142 	    }
    143 	  else
    144 	    {
    145 	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
    146 
    147 	      cy = mpn_submul_1 (np - dn, dp, dn, q);
    148 
    149 	      cy1 = n0 < cy;
    150 	      n0 = (n0 - cy) & GMP_NUMB_MASK;
    151 	      cy = n1 < cy1;
    152 	      n1 -= cy1;
    153 	      np[0] = n0;
    154 
    155 	      if (UNLIKELY (cy != 0))
    156 		{
    157 		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
    158 		  q--;
    159 		}
    160 	    }
    161 
    162 	  *--qp = q;
    163 
    164 	  /* Truncate operands.  */
    165 	  dn--;
    166 	  dp++;
    167 	}
    168 
    169       np--;
    170       if (UNLIKELY (n1 >= (d1 & flag)))
    171 	{
    172 	  q = GMP_NUMB_MASK;
    173 	  cy = mpn_submul_1 (np, dp, 2, q);
    174 
    175 	  if (UNLIKELY (n1 != cy))
    176 	    {
    177 	      if (n1 < (cy & flag))
    178 		{
    179 		  q--;
    180 		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
    181 		}
    182 	      else
    183 		flag = 0;
    184 	    }
    185 	  n1 = np[1];
    186 	}
    187       else
    188 	{
    189 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
    190 
    191 	  np[0] = n0;
    192 	  np[1] = n1;
    193 	}
    194 
    195       *--qp = q;
    196     }
    197   ASSERT_ALWAYS (np[1] == n1);
    198   np += 2;
    199 
    200 
    201   dn = dn_orig;
    202   if (UNLIKELY (n1 < (dn & flag)))
    203     {
    204       mp_limb_t q, x;
    205 
    206       /* The quotient may be too large if the remainder is small.  Recompute
    207 	 for above ignored operand parts, until the remainder spills.
    208 
    209 	 FIXME: The quality of this code isn't the same as the code above.
    210 	 1. We don't compute things in an optimal order, high-to-low, in order
    211 	    to terminate as quickly as possible.
    212 	 2. We mess with pointers and sizes, adding and subtracting and
    213 	    adjusting to get things right.  It surely could be streamlined.
    214 	 3. The only termination criteria are that we determine that the
    215 	    quotient needs to be adjusted, or that we have recomputed
    216 	    everything.  We should stop when the remainder is so large
    217 	    that no additional subtracting could make it spill.
    218 	 4. If nothing else, we should not do two loops of submul_1 over the
    219 	    data, instead handle both the triangularization and chopping at
    220 	    once.  */
    221 
    222       x = n1;
    223 
    224       if (dn > 2)
    225 	{
    226 	  /* Compensate for triangularization.  */
    227 	  mp_limb_t y;
    228 
    229 	  dp = dp_orig;
    230 	  if (qn + 1 < dn)
    231 	    {
    232 	      dp += dn - (qn + 1);
    233 	      dn = qn + 1;
    234 	    }
    235 
    236 	  y = np[-2];
    237 
    238 	  for (i = dn - 3; i >= 0; i--)
    239 	    {
    240 	      q = qp[i];
    241 	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
    242 
    243 	      if (y < cy)
    244 		{
    245 		  if (x == 0)
    246 		    {
    247 		      cy = mpn_sub_1 (qp, qp, qn, 1);
    248 		      ASSERT_ALWAYS (cy == 0);
    249 		      return qh - cy;
    250 		    }
    251 		  x--;
    252 		}
    253 	      y -= cy;
    254 	    }
    255 	  np[-2] = y;
    256 	}
    257 
    258       dn = dn_orig;
    259       if (qn + 1 < dn)
    260 	{
    261 	  /* Compensate for ignored dividend and divisor tails.  */
    262 
    263 	  dp = dp_orig;
    264 	  np = np_orig;
    265 
    266 	  if (qh != 0)
    267 	    {
    268 	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
    269 	      if (cy != 0)
    270 		{
    271 		  if (x == 0)
    272 		    {
    273 		      if (qn != 0)
    274 			cy = mpn_sub_1 (qp, qp, qn, 1);
    275 		      return qh - cy;
    276 		    }
    277 		  x--;
    278 		}
    279 	    }
    280 
    281 	  if (qn == 0)
    282 	    return qh;
    283 
    284 	  for (i = dn - qn - 2; i >= 0; i--)
    285 	    {
    286 	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
    287 	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
    288 	      if (cy != 0)
    289 		{
    290 		  if (x == 0)
    291 		    {
    292 		      cy = mpn_sub_1 (qp, qp, qn, 1);
    293 		      return qh;
    294 		    }
    295 		  x--;
    296 		}
    297 	    }
    298 	}
    299     }
    300 
    301   return qh;
    302 }
    303