1 1.1 mrg /* mpn_sqrlo_basecase -- Internal routine to square a natural number 2 1.1 mrg of length n. 3 1.1 mrg 4 1.1 mrg THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY 5 1.1 mrg SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. 6 1.1 mrg 7 1.1 mrg 8 1.1.1.2 mrg Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015, 9 1.1.1.2 mrg 2016 Free Software Foundation, Inc. 10 1.1 mrg 11 1.1 mrg This file is part of the GNU MP Library. 12 1.1 mrg 13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 14 1.1 mrg it under the terms of either: 15 1.1 mrg 16 1.1 mrg * the GNU Lesser General Public License as published by the Free 17 1.1 mrg Software Foundation; either version 3 of the License, or (at your 18 1.1 mrg option) any later version. 19 1.1 mrg 20 1.1 mrg or 21 1.1 mrg 22 1.1 mrg * the GNU General Public License as published by the Free Software 23 1.1 mrg Foundation; either version 2 of the License, or (at your option) any 24 1.1 mrg later version. 25 1.1 mrg 26 1.1 mrg or both in parallel, as here. 27 1.1 mrg 28 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 29 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 1.1 mrg for more details. 32 1.1 mrg 33 1.1 mrg You should have received copies of the GNU General Public License and the 34 1.1 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 35 1.1 mrg see https://www.gnu.org/licenses/. */ 36 1.1 mrg 37 1.1 mrg #include "gmp-impl.h" 38 1.1 mrg #include "longlong.h" 39 1.1 mrg 40 1.1 mrg #ifndef SQRLO_SHORTCUT_MULTIPLICATIONS 41 1.1 mrg #if HAVE_NATIVE_mpn_addmul_1 42 1.1 mrg #define SQRLO_SHORTCUT_MULTIPLICATIONS 0 43 1.1 mrg #else 44 1.1 mrg #define SQRLO_SHORTCUT_MULTIPLICATIONS 1 45 1.1 mrg #endif 46 1.1 mrg #endif 47 1.1 mrg 48 1.1 mrg #if HAVE_NATIVE_mpn_sqr_diagonal 49 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \ 50 1.1 mrg mpn_sqr_diagonal (rp, up, n) 51 1.1 mrg #else 52 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \ 53 1.1 mrg do { \ 54 1.1 mrg mp_size_t _i; \ 55 1.1 mrg for (_i = 0; _i < (n); _i++) \ 56 1.1 mrg { \ 57 1.1 mrg mp_limb_t ul, lpl; \ 58 1.1 mrg ul = (up)[_i]; \ 59 1.1 mrg umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \ 60 1.1 mrg (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \ 61 1.1 mrg } \ 62 1.1 mrg } while (0) 63 1.1 mrg #endif 64 1.1 mrg 65 1.1 mrg #define MPN_SQRLO_DIAGONAL(rp, up, n) \ 66 1.1 mrg do { \ 67 1.1 mrg mp_size_t nhalf; \ 68 1.1 mrg nhalf = (n) >> 1; \ 69 1.1 mrg MPN_SQR_DIAGONAL ((rp), (up), nhalf); \ 70 1.1 mrg if (((n) & 1) != 0) \ 71 1.1 mrg { \ 72 1.1 mrg mp_limb_t op; \ 73 1.1 mrg op = (up)[nhalf]; \ 74 1.1 mrg (rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK; \ 75 1.1 mrg } \ 76 1.1 mrg } while (0) 77 1.1 mrg 78 1.1 mrg #if HAVE_NATIVE_mpn_addlsh1_n_ip1 79 1.1 mrg #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \ 80 1.1 mrg do { \ 81 1.1 mrg MPN_SQRLO_DIAGONAL((rp), (up), (n)); \ 82 1.1 mrg mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1); \ 83 1.1 mrg } while (0) 84 1.1 mrg #else 85 1.1 mrg #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \ 86 1.1 mrg do { \ 87 1.1 mrg MPN_SQRLO_DIAGONAL((rp), (up), (n)); \ 88 1.1 mrg mpn_lshift ((tp), (tp), (n) - 1, 1); \ 89 1.1 mrg mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1); \ 90 1.1 mrg } while (0) 91 1.1 mrg #endif 92 1.1 mrg 93 1.1 mrg /* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */ 94 1.1 mrg #define SQRLO_BASECASE_ALLOC \ 95 1.1 mrg (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1) 96 1.1 mrg 97 1.1 mrg /* Default mpn_sqrlo_basecase using mpn_addmul_1. */ 98 1.1 mrg #ifndef SQRLO_SPECIAL_CASES 99 1.1 mrg #define SQRLO_SPECIAL_CASES 2 100 1.1 mrg #endif 101 1.1.1.2 mrg 102 1.1.1.2 mrg #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY 103 1.1.1.2 mrg #define MAYBE_special_cases 1 104 1.1.1.2 mrg #else 105 1.1.1.2 mrg #define MAYBE_special_cases \ 106 1.1.1.2 mrg ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0)) 107 1.1.1.2 mrg #endif 108 1.1.1.2 mrg 109 1.1 mrg void 110 1.1 mrg mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 111 1.1 mrg { 112 1.1 mrg mp_limb_t ul; 113 1.1 mrg 114 1.1 mrg ASSERT (n >= 1); 115 1.1 mrg ASSERT (! MPN_OVERLAP_P (rp, n, up, n)); 116 1.1 mrg 117 1.1 mrg ul = up[0]; 118 1.1 mrg 119 1.1.1.2 mrg if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES) 120 1.1 mrg { 121 1.1 mrg #if SQRLO_SPECIAL_CASES == 1 122 1.1 mrg rp[0] = (ul * ul) & GMP_NUMB_MASK; 123 1.1 mrg #else 124 1.1 mrg if (n == 1) 125 1.1 mrg rp[0] = (ul * ul) & GMP_NUMB_MASK; 126 1.1 mrg else 127 1.1 mrg { 128 1.1 mrg mp_limb_t hi, lo, ul1; 129 1.1 mrg umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS); 130 1.1 mrg rp[0] = lo >> GMP_NAIL_BITS; 131 1.1 mrg ul1 = up[1]; 132 1.1 mrg #if SQRLO_SPECIAL_CASES == 2 133 1.1 mrg rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK; 134 1.1 mrg #else 135 1.1 mrg if (n == 2) 136 1.1 mrg rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK; 137 1.1 mrg else 138 1.1 mrg { 139 1.1 mrg mp_limb_t hi1; 140 1.1 mrg #if GMP_NAIL_BITS != 0 141 1.1 mrg ul <<= 1; 142 1.1 mrg #endif 143 1.1 mrg umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul); 144 1.1 mrg hi1 += ul * up[2]; 145 1.1 mrg #if GMP_NAIL_BITS == 0 146 1.1 mrg hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1)); 147 1.1 mrg add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi); 148 1.1 mrg #else 149 1.1 mrg hi += lo >> GMP_NAIL_BITS; 150 1.1 mrg rp[1] = hi & GMP_NUMB_MASK; 151 1.1 mrg rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK; 152 1.1 mrg #endif 153 1.1 mrg } 154 1.1 mrg #endif 155 1.1 mrg } 156 1.1 mrg #endif 157 1.1 mrg } 158 1.1 mrg else 159 1.1 mrg { 160 1.1 mrg mp_limb_t tp[SQRLO_BASECASE_ALLOC]; 161 1.1 mrg mp_size_t i; 162 1.1 mrg 163 1.1 mrg /* must fit n-1 limbs in tp */ 164 1.1 mrg ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT); 165 1.1 mrg 166 1.1 mrg --n; 167 1.1 mrg #if SQRLO_SHORTCUT_MULTIPLICATIONS 168 1.1 mrg { 169 1.1 mrg mp_limb_t cy; 170 1.1 mrg 171 1.1 mrg cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul); 172 1.1 mrg for (i = 1; 2 * i + 1 < n; ++i) 173 1.1 mrg { 174 1.1 mrg ul = up[i]; 175 1.1 mrg cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul); 176 1.1 mrg } 177 1.1 mrg tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK; 178 1.1 mrg } 179 1.1 mrg #else 180 1.1 mrg mpn_mul_1 (tp, up + 1, n, ul); 181 1.1 mrg for (i = 1; 2 * i < n; ++i) 182 1.1 mrg mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]); 183 1.1 mrg #endif 184 1.1 mrg 185 1.1 mrg MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1); 186 1.1 mrg } 187 1.1 mrg } 188 1.1 mrg #undef SQRLO_SPECIAL_CASES 189 1.1.1.2 mrg #undef MAYBE_special_cases 190 1.1.1.2 mrg #undef SQRLO_BASECASE_ALLOC 191 1.1.1.2 mrg #undef SQRLO_SHORTCUT_MULTIPLICATIONS 192 1.1.1.2 mrg #undef MPN_SQR_DIAGONAL 193 1.1.1.2 mrg #undef MPN_SQRLO_DIAGONAL 194 1.1.1.2 mrg #undef MPN_SQRLO_DIAG_ADDLSH1 195