libquadmath/printf/mul.c

1.1  mrg /* mpn_mul -- Multiply two natural numbers.
1.1  mrg
1.1  mrg Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
1.1  mrg
1.1  mrg This file is part of the GNU MP Library.
1.1  mrg
1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
1.1  mrg it under the terms of the GNU Lesser General Public License as published by
1.1  mrg the Free Software Foundation; either version 2.1 of the License, or (at your
1.1  mrg option) any later version.
1.1  mrg
1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1.1  mrg License for more details.
1.1  mrg
1.1  mrg You should have received a copy of the GNU Lesser General Public License
1.1  mrg along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
1.1  mrg the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
1.1  mrg MA 02111-1307, USA. */
1.1  mrg
1.1  mrg #include <config.h>
1.1  mrg #include "gmp-impl.h"
1.1  mrg
1.1  mrg /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
1.1  mrg    and v (pointed to by VP, with VSIZE limbs), and store the result at
1.1  mrg    PRODP.  USIZE + VSIZE limbs are always stored, but if the input
1.1  mrg    operands are normalized.  Return the most significant limb of the
1.1  mrg    result.
1.1  mrg
1.1  mrg    NOTE: The space pointed to by PRODP is overwritten before finished
1.1  mrg    with U and V, so overlap is an error.
1.1  mrg
1.1  mrg    Argument constraints:
1.1  mrg    1. USIZE >= VSIZE.
1.1  mrg    2. PRODP != UP and PRODP != VP, i.e. the destination
1.1  mrg       must be distinct from the multiplier and the multiplicand.  */
1.1  mrg
1.1  mrg /* If KARATSUBA_THRESHOLD is not already defined, define it to a
1.1  mrg    value which is good on most machines.  */
1.1  mrg #ifndef KARATSUBA_THRESHOLD
1.1  mrg #define KARATSUBA_THRESHOLD 32
1.1  mrg #endif
1.1  mrg
1.1  mrg mp_limb_t
1.1  mrg #if __STDC__
1.1  mrg mpn_mul (mp_ptr prodp,
1.1  mrg 	 mp_srcptr up, mp_size_t usize,
1.1  mrg 	 mp_srcptr vp, mp_size_t vsize)
1.1  mrg #else
1.1  mrg mpn_mul (prodp, up, usize, vp, vsize)
1.1  mrg      mp_ptr prodp;
1.1  mrg      mp_srcptr up;
1.1  mrg      mp_size_t usize;
1.1  mrg      mp_srcptr vp;
1.1  mrg      mp_size_t vsize;
1.1  mrg #endif
1.1  mrg {
1.1  mrg   mp_ptr prod_endp = prodp + usize + vsize - 1;
1.1  mrg   mp_limb_t cy;
1.1  mrg   mp_ptr tspace;
1.1  mrg
1.1  mrg   if (vsize < KARATSUBA_THRESHOLD)
1.1  mrg     {
1.1  mrg       /* Handle simple cases with traditional multiplication.
1.1  mrg
1.1  mrg 	 This is the most critical code of the entire function.  All
1.1  mrg 	 multiplies rely on this, both small and huge.  Small ones arrive
1.1  mrg 	 here immediately.  Huge ones arrive here as this is the base case
1.1  mrg 	 for Karatsuba's recursive algorithm below.  */
1.1  mrg       mp_size_t i;
1.1  mrg       mp_limb_t cy_limb;
1.1  mrg       mp_limb_t v_limb;
1.1  mrg
1.1  mrg       if (vsize == 0)
1.1  mrg 	return 0;
1.1  mrg
1.1  mrg       /* Multiply by the first limb in V separately, as the result can be
1.1  mrg 	 stored (not added) to PROD.  We also avoid a loop for zeroing.  */
1.1  mrg       v_limb = vp[0];
1.1  mrg       if (v_limb <= 1)
1.1  mrg 	{
1.1  mrg 	  if (v_limb == 1)
1.1  mrg 	    MPN_COPY (prodp, up, usize);
1.1  mrg 	  else
1.1  mrg 	    MPN_ZERO (prodp, usize);
1.1  mrg 	  cy_limb = 0;
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
1.1  mrg
1.1  mrg       prodp[usize] = cy_limb;
1.1  mrg       prodp++;
1.1  mrg
1.1  mrg       /* For each iteration in the outer loop, multiply one limb from
1.1  mrg 	 U with one limb from V, and add it to PROD.  */
1.1  mrg       for (i = 1; i < vsize; i++)
1.1  mrg 	{
1.1  mrg 	  v_limb = vp[i];
1.1  mrg 	  if (v_limb <= 1)
1.1  mrg 	    {
1.1  mrg 	      cy_limb = 0;
1.1  mrg 	      if (v_limb == 1)
1.1  mrg 		cy_limb = mpn_add_n (prodp, prodp, up, usize);
1.1  mrg 	    }
1.1  mrg 	  else
1.1  mrg 	    cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
1.1  mrg
1.1  mrg 	  prodp[usize] = cy_limb;
1.1  mrg 	  prodp++;
1.1  mrg 	}
1.1  mrg       return cy_limb;
1.1  mrg     }
1.1  mrg
1.1  mrg   tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
1.1  mrg   MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
1.1  mrg
1.1  mrg   prodp += vsize;
1.1  mrg   up += vsize;
1.1  mrg   usize -= vsize;
1.1  mrg   if (usize >= vsize)
1.1  mrg     {
1.1  mrg       mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
1.1  mrg       do
1.1  mrg 	{
1.1  mrg 	  MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
1.1  mrg 	  cy = mpn_add_n (prodp, prodp, tp, vsize);
1.1  mrg 	  mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
1.1  mrg 	  prodp += vsize;
1.1  mrg 	  up += vsize;
1.1  mrg 	  usize -= vsize;
1.1  mrg 	}
1.1  mrg       while (usize >= vsize);
1.1  mrg     }
1.1  mrg
1.1  mrg   /* True: usize < vsize.  */
1.1  mrg
1.1  mrg   /* Make life simple: Recurse.  */
1.1  mrg
1.1  mrg   if (usize != 0)
1.1  mrg     {
1.1  mrg       mpn_mul (tspace, vp, vsize, up, usize);
1.1  mrg       cy = mpn_add_n (prodp, prodp, tspace, vsize);
1.1  mrg       mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
1.1  mrg     }
1.1  mrg
1.1  mrg   return *prod_endp;
1.1  mrg }