1 1.1 mrg /* mpn_toom2_sqr -- Square {ap,an}. 2 1.1 mrg 3 1.1 mrg Contributed to the GNU project by Torbjorn Granlund. 4 1.1 mrg 5 1.1 mrg THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 1.1 mrg SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 1.1 mrg GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 1.1 mrg 9 1.1.1.4 mrg Copyright 2006-2010, 2012, 2014, 2018 Free Software Foundation, Inc. 10 1.1 mrg 11 1.1 mrg This file is part of the GNU MP Library. 12 1.1 mrg 13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 14 1.1.1.3 mrg it under the terms of either: 15 1.1.1.3 mrg 16 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free 17 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your 18 1.1.1.3 mrg option) any later version. 19 1.1.1.3 mrg 20 1.1.1.3 mrg or 21 1.1.1.3 mrg 22 1.1.1.3 mrg * the GNU General Public License as published by the Free Software 23 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any 24 1.1.1.3 mrg later version. 25 1.1.1.3 mrg 26 1.1.1.3 mrg or both in parallel, as here. 27 1.1 mrg 28 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 29 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 1.1.1.3 mrg for more details. 32 1.1 mrg 33 1.1.1.3 mrg You should have received copies of the GNU General Public License and the 34 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 35 1.1.1.3 mrg see https://www.gnu.org/licenses/. */ 36 1.1 mrg 37 1.1 mrg 38 1.1 mrg #include "gmp-impl.h" 39 1.1 mrg 40 1.1 mrg /* Evaluate in: -1, 0, +inf 41 1.1 mrg 42 1.1 mrg <-s--><--n--> 43 1.1 mrg ____ ______ 44 1.1 mrg |_a1_|___a0_| 45 1.1 mrg 46 1.1 mrg v0 = a0 ^2 # A(0)^2 47 1.1 mrg vm1 = (a0- a1)^2 # A(-1)^2 48 1.1 mrg vinf= a1 ^2 # A(inf)^2 49 1.1 mrg */ 50 1.1 mrg 51 1.1.1.2 mrg #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY 52 1.1 mrg #define MAYBE_sqr_toom2 1 53 1.1 mrg #else 54 1.1 mrg #define MAYBE_sqr_toom2 \ 55 1.1 mrg (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD) 56 1.1 mrg #endif 57 1.1 mrg 58 1.1 mrg #define TOOM2_SQR_REC(p, a, n, ws) \ 59 1.1 mrg do { \ 60 1.1 mrg if (! MAYBE_sqr_toom2 \ 61 1.1 mrg || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \ 62 1.1 mrg mpn_sqr_basecase (p, a, n); \ 63 1.1 mrg else \ 64 1.1 mrg mpn_toom2_sqr (p, a, n, ws); \ 65 1.1 mrg } while (0) 66 1.1 mrg 67 1.1 mrg void 68 1.1 mrg mpn_toom2_sqr (mp_ptr pp, 69 1.1 mrg mp_srcptr ap, mp_size_t an, 70 1.1 mrg mp_ptr scratch) 71 1.1 mrg { 72 1.1.1.2 mrg const int __gmpn_cpuvec_initialized = 1; 73 1.1 mrg mp_size_t n, s; 74 1.1 mrg mp_limb_t cy, cy2; 75 1.1 mrg mp_ptr asm1; 76 1.1 mrg 77 1.1 mrg #define a0 ap 78 1.1 mrg #define a1 (ap + n) 79 1.1 mrg 80 1.1 mrg s = an >> 1; 81 1.1 mrg n = an - s; 82 1.1 mrg 83 1.1.1.3 mrg ASSERT (0 < s && s <= n && s >= n - 1); 84 1.1 mrg 85 1.1 mrg asm1 = pp; 86 1.1 mrg 87 1.1 mrg /* Compute asm1. */ 88 1.1 mrg if (s == n) 89 1.1 mrg { 90 1.1 mrg if (mpn_cmp (a0, a1, n) < 0) 91 1.1 mrg { 92 1.1 mrg mpn_sub_n (asm1, a1, a0, n); 93 1.1 mrg } 94 1.1 mrg else 95 1.1 mrg { 96 1.1 mrg mpn_sub_n (asm1, a0, a1, n); 97 1.1 mrg } 98 1.1 mrg } 99 1.1.1.3 mrg else /* n - s == 1 */ 100 1.1 mrg { 101 1.1.1.3 mrg if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0) 102 1.1 mrg { 103 1.1 mrg mpn_sub_n (asm1, a1, a0, s); 104 1.1.1.3 mrg asm1[s] = 0; 105 1.1 mrg } 106 1.1 mrg else 107 1.1 mrg { 108 1.1.1.3 mrg asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s); 109 1.1 mrg } 110 1.1 mrg } 111 1.1 mrg 112 1.1 mrg #define v0 pp /* 2n */ 113 1.1 mrg #define vinf (pp + 2 * n) /* s+s */ 114 1.1 mrg #define vm1 scratch /* 2n */ 115 1.1 mrg #define scratch_out scratch + 2 * n 116 1.1 mrg 117 1.1 mrg /* vm1, 2n limbs */ 118 1.1 mrg TOOM2_SQR_REC (vm1, asm1, n, scratch_out); 119 1.1 mrg 120 1.1 mrg /* vinf, s+s limbs */ 121 1.1 mrg TOOM2_SQR_REC (vinf, a1, s, scratch_out); 122 1.1 mrg 123 1.1 mrg /* v0, 2n limbs */ 124 1.1 mrg TOOM2_SQR_REC (v0, ap, n, scratch_out); 125 1.1 mrg 126 1.1 mrg /* H(v0) + L(vinf) */ 127 1.1 mrg cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n); 128 1.1 mrg 129 1.1 mrg /* L(v0) + H(v0) */ 130 1.1 mrg cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n); 131 1.1 mrg 132 1.1 mrg /* L(vinf) + H(vinf) */ 133 1.1 mrg cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n); 134 1.1 mrg 135 1.1 mrg cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n); 136 1.1 mrg 137 1.1.1.4 mrg ASSERT (cy + 1 <= 3); 138 1.1 mrg ASSERT (cy2 <= 2); 139 1.1 mrg 140 1.1.1.4 mrg if (LIKELY (cy <= 2)) { 141 1.1.1.4 mrg MPN_INCR_U (pp + 2 * n, s + s, cy2); 142 1.1.1.3 mrg MPN_INCR_U (pp + 3 * n, s + s - n, cy); 143 1.1.1.4 mrg } else { /* cy is negative */ 144 1.1.1.4 mrg /* The total contribution of v0+vinf-vm1 can not be negative. */ 145 1.1.1.4 mrg #if WANT_ASSERT 146 1.1.1.4 mrg /* The borrow in cy stops the propagation of the carry cy2, */ 147 1.1.1.4 mrg ASSERT (cy2 == 1); 148 1.1.1.4 mrg cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2); 149 1.1.1.4 mrg ASSERT (cy == 0); 150 1.1.1.4 mrg #else 151 1.1.1.4 mrg /* we simply fill the area with zeros. */ 152 1.1.1.4 mrg MPN_FILL (pp + 2 * n, n, 0); 153 1.1.1.4 mrg #endif 154 1.1.1.4 mrg } 155 1.1 mrg } 156