1 1.1 mrg /* mpn_mul -- Multiply two natural numbers. 2 1.1 mrg 3 1.1 mrg Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg This file is part of the GNU MP Library. 6 1.1 mrg 7 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1 mrg it under the terms of the GNU Lesser General Public License as published by 9 1.1 mrg the Free Software Foundation; either version 2.1 of the License, or (at your 10 1.1 mrg option) any later version. 11 1.1 mrg 12 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 13 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15 1.1 mrg License for more details. 16 1.1 mrg 17 1.1 mrg You should have received a copy of the GNU Lesser General Public License 18 1.1 mrg along with the GNU MP Library; see the file COPYING.LIB. If not, write to 19 1.1 mrg the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, 20 1.1 mrg MA 02111-1307, USA. */ 21 1.1 mrg 22 1.1 mrg #include <config.h> 23 1.1 mrg #include "gmp-impl.h" 24 1.1 mrg 25 1.1 mrg /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) 26 1.1 mrg and v (pointed to by VP, with VSIZE limbs), and store the result at 27 1.1 mrg PRODP. USIZE + VSIZE limbs are always stored, but if the input 28 1.1 mrg operands are normalized. Return the most significant limb of the 29 1.1 mrg result. 30 1.1 mrg 31 1.1 mrg NOTE: The space pointed to by PRODP is overwritten before finished 32 1.1 mrg with U and V, so overlap is an error. 33 1.1 mrg 34 1.1 mrg Argument constraints: 35 1.1 mrg 1. USIZE >= VSIZE. 36 1.1 mrg 2. PRODP != UP and PRODP != VP, i.e. the destination 37 1.1 mrg must be distinct from the multiplier and the multiplicand. */ 38 1.1 mrg 39 1.1 mrg /* If KARATSUBA_THRESHOLD is not already defined, define it to a 40 1.1 mrg value which is good on most machines. */ 41 1.1 mrg #ifndef KARATSUBA_THRESHOLD 42 1.1 mrg #define KARATSUBA_THRESHOLD 32 43 1.1 mrg #endif 44 1.1 mrg 45 1.1 mrg mp_limb_t 46 1.1 mrg #if __STDC__ 47 1.1 mrg mpn_mul (mp_ptr prodp, 48 1.1 mrg mp_srcptr up, mp_size_t usize, 49 1.1 mrg mp_srcptr vp, mp_size_t vsize) 50 1.1 mrg #else 51 1.1 mrg mpn_mul (prodp, up, usize, vp, vsize) 52 1.1 mrg mp_ptr prodp; 53 1.1 mrg mp_srcptr up; 54 1.1 mrg mp_size_t usize; 55 1.1 mrg mp_srcptr vp; 56 1.1 mrg mp_size_t vsize; 57 1.1 mrg #endif 58 1.1 mrg { 59 1.1 mrg mp_ptr prod_endp = prodp + usize + vsize - 1; 60 1.1 mrg mp_limb_t cy; 61 1.1 mrg mp_ptr tspace; 62 1.1 mrg 63 1.1 mrg if (vsize < KARATSUBA_THRESHOLD) 64 1.1 mrg { 65 1.1 mrg /* Handle simple cases with traditional multiplication. 66 1.1 mrg 67 1.1 mrg This is the most critical code of the entire function. All 68 1.1 mrg multiplies rely on this, both small and huge. Small ones arrive 69 1.1 mrg here immediately. Huge ones arrive here as this is the base case 70 1.1 mrg for Karatsuba's recursive algorithm below. */ 71 1.1 mrg mp_size_t i; 72 1.1 mrg mp_limb_t cy_limb; 73 1.1 mrg mp_limb_t v_limb; 74 1.1 mrg 75 1.1 mrg if (vsize == 0) 76 1.1 mrg return 0; 77 1.1 mrg 78 1.1 mrg /* Multiply by the first limb in V separately, as the result can be 79 1.1 mrg stored (not added) to PROD. We also avoid a loop for zeroing. */ 80 1.1 mrg v_limb = vp[0]; 81 1.1 mrg if (v_limb <= 1) 82 1.1 mrg { 83 1.1 mrg if (v_limb == 1) 84 1.1 mrg MPN_COPY (prodp, up, usize); 85 1.1 mrg else 86 1.1 mrg MPN_ZERO (prodp, usize); 87 1.1 mrg cy_limb = 0; 88 1.1 mrg } 89 1.1 mrg else 90 1.1 mrg cy_limb = mpn_mul_1 (prodp, up, usize, v_limb); 91 1.1 mrg 92 1.1 mrg prodp[usize] = cy_limb; 93 1.1 mrg prodp++; 94 1.1 mrg 95 1.1 mrg /* For each iteration in the outer loop, multiply one limb from 96 1.1 mrg U with one limb from V, and add it to PROD. */ 97 1.1 mrg for (i = 1; i < vsize; i++) 98 1.1 mrg { 99 1.1 mrg v_limb = vp[i]; 100 1.1 mrg if (v_limb <= 1) 101 1.1 mrg { 102 1.1 mrg cy_limb = 0; 103 1.1 mrg if (v_limb == 1) 104 1.1 mrg cy_limb = mpn_add_n (prodp, prodp, up, usize); 105 1.1 mrg } 106 1.1 mrg else 107 1.1 mrg cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb); 108 1.1 mrg 109 1.1 mrg prodp[usize] = cy_limb; 110 1.1 mrg prodp++; 111 1.1 mrg } 112 1.1 mrg return cy_limb; 113 1.1 mrg } 114 1.1 mrg 115 1.1 mrg tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB); 116 1.1 mrg MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace); 117 1.1 mrg 118 1.1 mrg prodp += vsize; 119 1.1 mrg up += vsize; 120 1.1 mrg usize -= vsize; 121 1.1 mrg if (usize >= vsize) 122 1.1 mrg { 123 1.1 mrg mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB); 124 1.1 mrg do 125 1.1 mrg { 126 1.1 mrg MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace); 127 1.1 mrg cy = mpn_add_n (prodp, prodp, tp, vsize); 128 1.1 mrg mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy); 129 1.1 mrg prodp += vsize; 130 1.1 mrg up += vsize; 131 1.1 mrg usize -= vsize; 132 1.1 mrg } 133 1.1 mrg while (usize >= vsize); 134 1.1 mrg } 135 1.1 mrg 136 1.1 mrg /* True: usize < vsize. */ 137 1.1 mrg 138 1.1 mrg /* Make life simple: Recurse. */ 139 1.1 mrg 140 1.1 mrg if (usize != 0) 141 1.1 mrg { 142 1.1 mrg mpn_mul (tspace, vp, vsize, up, usize); 143 1.1 mrg cy = mpn_add_n (prodp, prodp, tspace, vsize); 144 1.1 mrg mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy); 145 1.1 mrg } 146 1.1 mrg 147 1.1 mrg return *prod_endp; 148 1.1 mrg } 149