1 1.1 mrg /* mpn_sqr_basecase -- Internal routine to square a natural number 2 1.1 mrg of length n. 3 1.1 mrg 4 1.1 mrg THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY 5 1.1 mrg SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. 6 1.1 mrg 7 1.1 mrg 8 1.1.1.4 mrg Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free 9 1.1.1.4 mrg Software Foundation, Inc. 10 1.1 mrg 11 1.1 mrg This file is part of the GNU MP Library. 12 1.1 mrg 13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 14 1.1.1.3 mrg it under the terms of either: 15 1.1.1.3 mrg 16 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free 17 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your 18 1.1.1.3 mrg option) any later version. 19 1.1.1.3 mrg 20 1.1.1.3 mrg or 21 1.1.1.3 mrg 22 1.1.1.3 mrg * the GNU General Public License as published by the Free Software 23 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any 24 1.1.1.3 mrg later version. 25 1.1.1.3 mrg 26 1.1.1.3 mrg or both in parallel, as here. 27 1.1 mrg 28 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 29 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 1.1.1.3 mrg for more details. 32 1.1 mrg 33 1.1.1.3 mrg You should have received copies of the GNU General Public License and the 34 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 35 1.1.1.3 mrg see https://www.gnu.org/licenses/. */ 36 1.1 mrg 37 1.1 mrg #include "gmp-impl.h" 38 1.1 mrg #include "longlong.h" 39 1.1 mrg 40 1.1 mrg 41 1.1 mrg #if HAVE_NATIVE_mpn_sqr_diagonal 42 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \ 43 1.1 mrg mpn_sqr_diagonal (rp, up, n) 44 1.1 mrg #else 45 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \ 46 1.1 mrg do { \ 47 1.1 mrg mp_size_t _i; \ 48 1.1 mrg for (_i = 0; _i < (n); _i++) \ 49 1.1 mrg { \ 50 1.1 mrg mp_limb_t ul, lpl; \ 51 1.1 mrg ul = (up)[_i]; \ 52 1.1 mrg umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \ 53 1.1 mrg (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \ 54 1.1 mrg } \ 55 1.1 mrg } while (0) 56 1.1 mrg #endif 57 1.1 mrg 58 1.1.1.2 mrg #if HAVE_NATIVE_mpn_sqr_diag_addlsh1 59 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ 60 1.1.1.2 mrg mpn_sqr_diag_addlsh1 (rp, tp, up, n) 61 1.1.1.2 mrg #else 62 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh1_n 63 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ 64 1.1.1.2 mrg do { \ 65 1.1.1.2 mrg mp_limb_t cy; \ 66 1.1.1.2 mrg MPN_SQR_DIAGONAL (rp, up, n); \ 67 1.1.1.2 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \ 68 1.1.1.2 mrg rp[2 * n - 1] += cy; \ 69 1.1.1.2 mrg } while (0) 70 1.1.1.2 mrg #else 71 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ 72 1.1.1.2 mrg do { \ 73 1.1.1.2 mrg mp_limb_t cy; \ 74 1.1.1.2 mrg MPN_SQR_DIAGONAL (rp, up, n); \ 75 1.1.1.2 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \ 76 1.1.1.2 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \ 77 1.1.1.2 mrg rp[2 * n - 1] += cy; \ 78 1.1.1.2 mrg } while (0) 79 1.1.1.2 mrg #endif 80 1.1.1.2 mrg #endif 81 1.1.1.2 mrg 82 1.1 mrg 83 1.1 mrg #undef READY_WITH_mpn_sqr_basecase 84 1.1 mrg 85 1.1 mrg 86 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s 87 1.1 mrg void 88 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 89 1.1 mrg { 90 1.1 mrg mp_size_t i; 91 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; 92 1.1 mrg mp_ptr tp = tarr; 93 1.1 mrg mp_limb_t cy; 94 1.1 mrg 95 1.1 mrg /* must fit 2*n limbs in tarr */ 96 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD); 97 1.1 mrg 98 1.1 mrg if ((n & 1) != 0) 99 1.1 mrg { 100 1.1 mrg if (n == 1) 101 1.1 mrg { 102 1.1 mrg mp_limb_t ul, lpl; 103 1.1 mrg ul = up[0]; 104 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 105 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS; 106 1.1 mrg return; 107 1.1 mrg } 108 1.1 mrg 109 1.1 mrg MPN_ZERO (tp, n); 110 1.1 mrg 111 1.1 mrg for (i = 0; i <= n - 2; i += 2) 112 1.1 mrg { 113 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 114 1.1 mrg tp[n + i] = cy; 115 1.1 mrg } 116 1.1 mrg } 117 1.1 mrg else 118 1.1 mrg { 119 1.1 mrg if (n == 2) 120 1.1 mrg { 121 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_2 122 1.1.1.2 mrg rp[3] = mpn_mul_2 (rp, up, 2, up); 123 1.1.1.2 mrg #else 124 1.1 mrg rp[0] = 0; 125 1.1 mrg rp[1] = 0; 126 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up); 127 1.1.1.2 mrg #endif 128 1.1 mrg return; 129 1.1 mrg } 130 1.1 mrg 131 1.1 mrg MPN_ZERO (tp, n); 132 1.1 mrg 133 1.1 mrg for (i = 0; i <= n - 4; i += 2) 134 1.1 mrg { 135 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 136 1.1 mrg tp[n + i] = cy; 137 1.1 mrg } 138 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]); 139 1.1 mrg tp[2 * n - 3] = cy; 140 1.1 mrg } 141 1.1 mrg 142 1.1.1.2 mrg MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n); 143 1.1 mrg } 144 1.1 mrg #define READY_WITH_mpn_sqr_basecase 145 1.1 mrg #endif 146 1.1 mrg 147 1.1 mrg 148 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2 149 1.1 mrg 150 1.1 mrg /* mpn_sqr_basecase using plain mpn_addmul_2. 151 1.1 mrg 152 1.1 mrg This is tricky, since we have to let mpn_addmul_2 make some undesirable 153 1.1 mrg multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle. 154 1.1 mrg This forces us to conditionally add or subtract the mpn_sqr_diagonal 155 1.1 mrg results. Examples of the product we form: 156 1.1 mrg 157 1.1 mrg n = 4 n = 5 n = 6 158 1.1 mrg u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1 159 1.1 mrg u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3 160 1.1 mrg u4 * u5 161 1.1 mrg add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5 162 1.1 mrg sub: u1 sub: u1 u3 sub: u1 u3 163 1.1 mrg */ 164 1.1 mrg 165 1.1 mrg void 166 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 167 1.1 mrg { 168 1.1 mrg mp_size_t i; 169 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; 170 1.1 mrg mp_ptr tp = tarr; 171 1.1 mrg mp_limb_t cy; 172 1.1 mrg 173 1.1 mrg /* must fit 2*n limbs in tarr */ 174 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD); 175 1.1 mrg 176 1.1 mrg if ((n & 1) != 0) 177 1.1 mrg { 178 1.1 mrg mp_limb_t x0, x1; 179 1.1 mrg 180 1.1 mrg if (n == 1) 181 1.1 mrg { 182 1.1 mrg mp_limb_t ul, lpl; 183 1.1 mrg ul = up[0]; 184 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 185 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS; 186 1.1 mrg return; 187 1.1 mrg } 188 1.1 mrg 189 1.1 mrg /* The code below doesn't like unnormalized operands. Since such 190 1.1 mrg operands are unusual, handle them with a dumb recursion. */ 191 1.1 mrg if (up[n - 1] == 0) 192 1.1 mrg { 193 1.1 mrg rp[2 * n - 2] = 0; 194 1.1 mrg rp[2 * n - 1] = 0; 195 1.1 mrg mpn_sqr_basecase (rp, up, n - 1); 196 1.1 mrg return; 197 1.1 mrg } 198 1.1 mrg 199 1.1 mrg MPN_ZERO (tp, n); 200 1.1 mrg 201 1.1 mrg for (i = 0; i <= n - 2; i += 2) 202 1.1 mrg { 203 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 204 1.1 mrg tp[n + i] = cy; 205 1.1 mrg } 206 1.1 mrg 207 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n); 208 1.1 mrg 209 1.1 mrg for (i = 2;; i += 4) 210 1.1 mrg { 211 1.1 mrg x0 = rp[i + 0]; 212 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK; 213 1.1 mrg x1 = rp[i + 1]; 214 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK; 215 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0); 216 1.1 mrg if (i + 4 >= 2 * n) 217 1.1 mrg break; 218 1.1 mrg mpn_incr_u (rp + i + 4, cy); 219 1.1 mrg } 220 1.1 mrg } 221 1.1 mrg else 222 1.1 mrg { 223 1.1 mrg mp_limb_t x0, x1; 224 1.1 mrg 225 1.1 mrg if (n == 2) 226 1.1 mrg { 227 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_2 228 1.1.1.2 mrg rp[3] = mpn_mul_2 (rp, up, 2, up); 229 1.1.1.2 mrg #else 230 1.1 mrg rp[0] = 0; 231 1.1 mrg rp[1] = 0; 232 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up); 233 1.1.1.2 mrg #endif 234 1.1 mrg return; 235 1.1 mrg } 236 1.1 mrg 237 1.1 mrg /* The code below doesn't like unnormalized operands. Since such 238 1.1 mrg operands are unusual, handle them with a dumb recursion. */ 239 1.1 mrg if (up[n - 1] == 0) 240 1.1 mrg { 241 1.1 mrg rp[2 * n - 2] = 0; 242 1.1 mrg rp[2 * n - 1] = 0; 243 1.1 mrg mpn_sqr_basecase (rp, up, n - 1); 244 1.1 mrg return; 245 1.1 mrg } 246 1.1 mrg 247 1.1 mrg MPN_ZERO (tp, n); 248 1.1 mrg 249 1.1 mrg for (i = 0; i <= n - 4; i += 2) 250 1.1 mrg { 251 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 252 1.1 mrg tp[n + i] = cy; 253 1.1 mrg } 254 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]); 255 1.1 mrg tp[2 * n - 3] = cy; 256 1.1 mrg 257 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n); 258 1.1 mrg 259 1.1 mrg for (i = 2;; i += 4) 260 1.1 mrg { 261 1.1 mrg x0 = rp[i + 0]; 262 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK; 263 1.1 mrg x1 = rp[i + 1]; 264 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK; 265 1.1 mrg if (i + 6 >= 2 * n) 266 1.1 mrg break; 267 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0); 268 1.1 mrg mpn_incr_u (rp + i + 4, cy); 269 1.1 mrg } 270 1.1 mrg mpn_decr_u (rp + i + 2, (x1 | x0) != 0); 271 1.1 mrg } 272 1.1 mrg 273 1.1 mrg #if HAVE_NATIVE_mpn_addlsh1_n 274 1.1 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); 275 1.1 mrg #else 276 1.1 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1); 277 1.1 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); 278 1.1 mrg #endif 279 1.1 mrg rp[2 * n - 1] += cy; 280 1.1 mrg } 281 1.1 mrg #define READY_WITH_mpn_sqr_basecase 282 1.1 mrg #endif 283 1.1 mrg 284 1.1 mrg 285 1.1.1.4 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1 286 1.1.1.4 mrg 287 1.1.1.4 mrg /* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack 288 1.1.1.4 mrg allocation. */ 289 1.1.1.4 mrg void 290 1.1.1.4 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 291 1.1.1.4 mrg { 292 1.1.1.4 mrg if (n == 1) 293 1.1.1.4 mrg { 294 1.1.1.4 mrg mp_limb_t ul, lpl; 295 1.1.1.4 mrg ul = up[0]; 296 1.1.1.4 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 297 1.1.1.4 mrg rp[0] = lpl >> GMP_NAIL_BITS; 298 1.1.1.4 mrg } 299 1.1.1.4 mrg else 300 1.1.1.4 mrg { 301 1.1.1.4 mrg mp_size_t i; 302 1.1.1.4 mrg mp_ptr xp; 303 1.1.1.4 mrg 304 1.1.1.4 mrg rp += 1; 305 1.1.1.4 mrg rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]); 306 1.1.1.4 mrg for (i = n - 2; i != 0; i--) 307 1.1.1.4 mrg { 308 1.1.1.4 mrg up += 1; 309 1.1.1.4 mrg rp += 2; 310 1.1.1.4 mrg rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]); 311 1.1.1.4 mrg } 312 1.1.1.4 mrg 313 1.1.1.4 mrg xp = rp - 2 * n + 3; 314 1.1.1.4 mrg mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n); 315 1.1.1.4 mrg } 316 1.1.1.4 mrg } 317 1.1.1.4 mrg #define READY_WITH_mpn_sqr_basecase 318 1.1.1.4 mrg #endif 319 1.1.1.4 mrg 320 1.1.1.4 mrg 321 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) 322 1.1 mrg 323 1.1 mrg /* Default mpn_sqr_basecase using mpn_addmul_1. */ 324 1.1 mrg void 325 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 326 1.1 mrg { 327 1.1 mrg mp_size_t i; 328 1.1 mrg 329 1.1 mrg ASSERT (n >= 1); 330 1.1 mrg ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n)); 331 1.1 mrg 332 1.1.1.4 mrg if (n == 1) 333 1.1.1.4 mrg { 334 1.1.1.4 mrg mp_limb_t ul, lpl; 335 1.1.1.4 mrg ul = up[0]; 336 1.1.1.4 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 337 1.1.1.4 mrg rp[0] = lpl >> GMP_NAIL_BITS; 338 1.1.1.4 mrg } 339 1.1.1.4 mrg else 340 1.1 mrg { 341 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; 342 1.1 mrg mp_ptr tp = tarr; 343 1.1 mrg mp_limb_t cy; 344 1.1 mrg 345 1.1 mrg /* must fit 2*n limbs in tarr */ 346 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD); 347 1.1 mrg 348 1.1 mrg cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]); 349 1.1 mrg tp[n - 1] = cy; 350 1.1 mrg for (i = 2; i < n; i++) 351 1.1 mrg { 352 1.1 mrg mp_limb_t cy; 353 1.1 mrg cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]); 354 1.1 mrg tp[n + i - 2] = cy; 355 1.1 mrg } 356 1.1 mrg 357 1.1.1.2 mrg MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n); 358 1.1 mrg } 359 1.1 mrg } 360 1.1.1.4 mrg #define READY_WITH_mpn_sqr_basecase 361 1.1 mrg #endif 362