1 1.1 mrg /* hgcd.c. 2 1.1 mrg 3 1.1 mrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 4 1.1 mrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 5 1.1 mrg GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 6 1.1 mrg 7 1.1.1.3 mrg Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc. 8 1.1 mrg 9 1.1 mrg This file is part of the GNU MP Library. 10 1.1 mrg 11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 12 1.1.1.3 mrg it under the terms of either: 13 1.1.1.3 mrg 14 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free 15 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your 16 1.1.1.3 mrg option) any later version. 17 1.1.1.3 mrg 18 1.1.1.3 mrg or 19 1.1.1.3 mrg 20 1.1.1.3 mrg * the GNU General Public License as published by the Free Software 21 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any 22 1.1.1.3 mrg later version. 23 1.1.1.3 mrg 24 1.1.1.3 mrg or both in parallel, as here. 25 1.1 mrg 26 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 27 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29 1.1.1.3 mrg for more details. 30 1.1 mrg 31 1.1.1.3 mrg You should have received copies of the GNU General Public License and the 32 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 33 1.1.1.3 mrg see https://www.gnu.org/licenses/. */ 34 1.1 mrg 35 1.1 mrg #include "gmp-impl.h" 36 1.1 mrg #include "longlong.h" 37 1.1 mrg 38 1.1 mrg 39 1.1 mrg /* Size analysis for hgcd: 40 1.1 mrg 41 1.1 mrg For the recursive calls, we have n1 <= ceil(n / 2). Then the 42 1.1 mrg storage need is determined by the storage for the recursive call 43 1.1 mrg computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1 44 1.1 mrg (after this, the storage needed for M1 can be recycled). 45 1.1 mrg 46 1.1 mrg Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1) 47 1.1 mrg = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2, 48 1.1 mrg and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total, 49 1.1 mrg 4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12. 50 1.1 mrg 51 1.1 mrg For the recursive call, we need S(n1) = S(ceil(n/2)). 52 1.1 mrg 53 1.1 mrg S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2)) 54 1.1 mrg <= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k)) 55 1.1 mrg <= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k)) 56 1.1 mrg <= 20 ceil(n/4) + 22k + S(ceil(n/2^k)) 57 1.1 mrg */ 58 1.1 mrg 59 1.1 mrg mp_size_t 60 1.1 mrg mpn_hgcd_itch (mp_size_t n) 61 1.1 mrg { 62 1.1 mrg unsigned k; 63 1.1 mrg int count; 64 1.1 mrg mp_size_t nscaled; 65 1.1 mrg 66 1.1 mrg if (BELOW_THRESHOLD (n, HGCD_THRESHOLD)) 67 1.1.1.2 mrg return n; 68 1.1 mrg 69 1.1 mrg /* Get the recursion depth. */ 70 1.1 mrg nscaled = (n - 1) / (HGCD_THRESHOLD - 1); 71 1.1 mrg count_leading_zeros (count, nscaled); 72 1.1 mrg k = GMP_LIMB_BITS - count; 73 1.1 mrg 74 1.1.1.2 mrg return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD; 75 1.1 mrg } 76 1.1 mrg 77 1.1 mrg /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M 78 1.1 mrg with elements of size at most (n+1)/2 - 1. Returns new size of a, 79 1.1 mrg b, or zero if no reduction is possible. */ 80 1.1 mrg 81 1.1 mrg mp_size_t 82 1.1 mrg mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n, 83 1.1 mrg struct hgcd_matrix *M, mp_ptr tp) 84 1.1 mrg { 85 1.1 mrg mp_size_t s = n/2 + 1; 86 1.1 mrg 87 1.1.1.2 mrg mp_size_t nn; 88 1.1 mrg int success = 0; 89 1.1 mrg 90 1.1 mrg if (n <= s) 91 1.1 mrg /* Happens when n <= 2, a fairly uninteresting case but exercised 92 1.1 mrg by the random inputs of the testsuite. */ 93 1.1 mrg return 0; 94 1.1 mrg 95 1.1 mrg ASSERT ((ap[n-1] | bp[n-1]) > 0); 96 1.1 mrg 97 1.1 mrg ASSERT ((n+1)/2 - 1 < M->alloc); 98 1.1 mrg 99 1.1.1.2 mrg if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD)) 100 1.1 mrg { 101 1.1.1.2 mrg mp_size_t n2 = (3*n)/4 + 1; 102 1.1.1.2 mrg mp_size_t p = n/2; 103 1.1 mrg 104 1.1.1.2 mrg nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp); 105 1.1.1.2 mrg if (nn) 106 1.1.1.2 mrg { 107 1.1.1.2 mrg n = nn; 108 1.1.1.2 mrg success = 1; 109 1.1.1.2 mrg } 110 1.1.1.2 mrg 111 1.1.1.3 mrg /* NOTE: It appears this loop never runs more than once (at 112 1.1.1.2 mrg least when not recursing to hgcd_appr). */ 113 1.1.1.2 mrg while (n > n2) 114 1.1.1.2 mrg { 115 1.1.1.2 mrg /* Needs n + 1 storage */ 116 1.1.1.2 mrg nn = mpn_hgcd_step (n, ap, bp, s, M, tp); 117 1.1.1.2 mrg if (!nn) 118 1.1.1.2 mrg return success ? n : 0; 119 1.1 mrg 120 1.1.1.2 mrg n = nn; 121 1.1.1.2 mrg success = 1; 122 1.1.1.2 mrg } 123 1.1 mrg 124 1.1.1.2 mrg if (n > s + 2) 125 1.1 mrg { 126 1.1.1.2 mrg struct hgcd_matrix M1; 127 1.1.1.2 mrg mp_size_t scratch; 128 1.1 mrg 129 1.1.1.2 mrg p = 2*s - n + 1; 130 1.1.1.2 mrg scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p); 131 1.1 mrg 132 1.1.1.2 mrg mpn_hgcd_matrix_init(&M1, n - p, tp); 133 1.1 mrg 134 1.1.1.2 mrg /* FIXME: Should use hgcd_reduce, but that may require more 135 1.1.1.2 mrg scratch space, which requires review. */ 136 1.1 mrg 137 1.1.1.2 mrg nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch); 138 1.1.1.2 mrg if (nn > 0) 139 1.1.1.2 mrg { 140 1.1.1.2 mrg /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */ 141 1.1.1.2 mrg ASSERT (M->n + 2 >= M1.n); 142 1.1 mrg 143 1.1.1.2 mrg /* Furthermore, assume M ends with a quotient (1, q; 0, 1), 144 1.1.1.2 mrg then either q or q + 1 is a correct quotient, and M1 will 145 1.1.1.2 mrg start with either (1, 0; 1, 1) or (2, 1; 1, 1). This 146 1.1.1.2 mrg rules out the case that the size of M * M1 is much 147 1.1.1.2 mrg smaller than the expected M->n + M1->n. */ 148 1.1 mrg 149 1.1.1.2 mrg ASSERT (M->n + M1.n < M->alloc); 150 1.1 mrg 151 1.1.1.2 mrg /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1) 152 1.1.1.2 mrg = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */ 153 1.1.1.2 mrg n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch); 154 1.1 mrg 155 1.1.1.2 mrg /* We need a bound for of M->n + M1.n. Let n be the original 156 1.1.1.2 mrg input size. Then 157 1.1.1.2 mrg 158 1.1.1.2 mrg ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2 159 1.1.1.2 mrg 160 1.1.1.2 mrg and it follows that 161 1.1.1.2 mrg 162 1.1.1.2 mrg M.n + M1.n <= ceil(n/2) + 1 163 1.1.1.2 mrg 164 1.1.1.2 mrg Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the 165 1.1.1.2 mrg amount of needed scratch space. */ 166 1.1.1.2 mrg mpn_hgcd_matrix_mul (M, &M1, tp + scratch); 167 1.1.1.2 mrg success = 1; 168 1.1.1.2 mrg } 169 1.1 mrg } 170 1.1 mrg } 171 1.1 mrg 172 1.1 mrg for (;;) 173 1.1 mrg { 174 1.1 mrg /* Needs s+3 < n */ 175 1.1.1.2 mrg nn = mpn_hgcd_step (n, ap, bp, s, M, tp); 176 1.1 mrg if (!nn) 177 1.1 mrg return success ? n : 0; 178 1.1 mrg 179 1.1 mrg n = nn; 180 1.1 mrg success = 1; 181 1.1 mrg } 182 1.1 mrg } 183