1 1.1 mrg /* hgcd_matrix.c. 2 1.1 mrg 3 1.1 mrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 4 1.1 mrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 5 1.1 mrg GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 6 1.1 mrg 7 1.1.1.2 mrg Copyright 2003-2005, 2008, 2012 Free Software Foundation, Inc. 8 1.1 mrg 9 1.1 mrg This file is part of the GNU MP Library. 10 1.1 mrg 11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 12 1.1.1.2 mrg it under the terms of either: 13 1.1.1.2 mrg 14 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free 15 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your 16 1.1.1.2 mrg option) any later version. 17 1.1.1.2 mrg 18 1.1.1.2 mrg or 19 1.1.1.2 mrg 20 1.1.1.2 mrg * the GNU General Public License as published by the Free Software 21 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any 22 1.1.1.2 mrg later version. 23 1.1.1.2 mrg 24 1.1.1.2 mrg or both in parallel, as here. 25 1.1 mrg 26 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 27 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29 1.1.1.2 mrg for more details. 30 1.1 mrg 31 1.1.1.2 mrg You should have received copies of the GNU General Public License and the 32 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 33 1.1.1.2 mrg see https://www.gnu.org/licenses/. */ 34 1.1 mrg 35 1.1 mrg #include "gmp-impl.h" 36 1.1 mrg #include "longlong.h" 37 1.1 mrg 38 1.1 mrg /* For input of size n, matrix elements are of size at most ceil(n/2) 39 1.1 mrg - 1, but we need two limbs extra. */ 40 1.1 mrg void 41 1.1 mrg mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p) 42 1.1 mrg { 43 1.1 mrg mp_size_t s = (n+1)/2 + 1; 44 1.1 mrg M->alloc = s; 45 1.1 mrg M->n = 1; 46 1.1 mrg MPN_ZERO (p, 4 * s); 47 1.1 mrg M->p[0][0] = p; 48 1.1 mrg M->p[0][1] = p + s; 49 1.1 mrg M->p[1][0] = p + 2 * s; 50 1.1 mrg M->p[1][1] = p + 3 * s; 51 1.1 mrg 52 1.1 mrg M->p[0][0][0] = M->p[1][1][0] = 1; 53 1.1 mrg } 54 1.1 mrg 55 1.1 mrg /* Update column COL, adding in Q * column (1-COL). Temporary storage: 56 1.1 mrg * qn + n <= M->alloc, where n is the size of the largest element in 57 1.1 mrg * column 1 - COL. */ 58 1.1 mrg void 59 1.1 mrg mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn, 60 1.1 mrg unsigned col, mp_ptr tp) 61 1.1 mrg { 62 1.1 mrg ASSERT (col < 2); 63 1.1 mrg 64 1.1 mrg if (qn == 1) 65 1.1 mrg { 66 1.1 mrg mp_limb_t q = qp[0]; 67 1.1 mrg mp_limb_t c0, c1; 68 1.1 mrg 69 1.1 mrg c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q); 70 1.1 mrg c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q); 71 1.1 mrg 72 1.1 mrg M->p[0][col][M->n] = c0; 73 1.1 mrg M->p[1][col][M->n] = c1; 74 1.1 mrg 75 1.1 mrg M->n += (c0 | c1) != 0; 76 1.1 mrg } 77 1.1 mrg else 78 1.1 mrg { 79 1.1 mrg unsigned row; 80 1.1 mrg 81 1.1 mrg /* Carries for the unlikely case that we get both high words 82 1.1 mrg from the multiplication and carries from the addition. */ 83 1.1 mrg mp_limb_t c[2]; 84 1.1 mrg mp_size_t n; 85 1.1 mrg 86 1.1 mrg /* The matrix will not necessarily grow in size by qn, so we 87 1.1 mrg need normalization in order not to overflow M. */ 88 1.1 mrg 89 1.1 mrg for (n = M->n; n + qn > M->n; n--) 90 1.1 mrg { 91 1.1 mrg ASSERT (n > 0); 92 1.1 mrg if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0) 93 1.1 mrg break; 94 1.1 mrg } 95 1.1 mrg 96 1.1 mrg ASSERT (qn + n <= M->alloc); 97 1.1 mrg 98 1.1 mrg for (row = 0; row < 2; row++) 99 1.1 mrg { 100 1.1 mrg if (qn <= n) 101 1.1 mrg mpn_mul (tp, M->p[row][1-col], n, qp, qn); 102 1.1 mrg else 103 1.1 mrg mpn_mul (tp, qp, qn, M->p[row][1-col], n); 104 1.1 mrg 105 1.1 mrg ASSERT (n + qn >= M->n); 106 1.1 mrg c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n); 107 1.1 mrg } 108 1.1 mrg 109 1.1 mrg n += qn; 110 1.1 mrg 111 1.1 mrg if (c[0] | c[1]) 112 1.1 mrg { 113 1.1 mrg M->p[0][col][n] = c[0]; 114 1.1 mrg M->p[1][col][n] = c[1]; 115 1.1 mrg n++; 116 1.1 mrg } 117 1.1 mrg else 118 1.1 mrg { 119 1.1 mrg n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0; 120 1.1 mrg ASSERT (n >= M->n); 121 1.1 mrg } 122 1.1 mrg M->n = n; 123 1.1 mrg } 124 1.1 mrg 125 1.1 mrg ASSERT (M->n < M->alloc); 126 1.1 mrg } 127 1.1 mrg 128 1.1 mrg /* Multiply M by M1 from the right. Since the M1 elements fit in 129 1.1 mrg GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs 130 1.1 mrg temporary space M->n */ 131 1.1 mrg void 132 1.1 mrg mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1, 133 1.1 mrg mp_ptr tp) 134 1.1 mrg { 135 1.1 mrg mp_size_t n0, n1; 136 1.1 mrg 137 1.1 mrg /* Could avoid copy by some swapping of pointers. */ 138 1.1 mrg MPN_COPY (tp, M->p[0][0], M->n); 139 1.1 mrg n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n); 140 1.1 mrg MPN_COPY (tp, M->p[1][0], M->n); 141 1.1 mrg n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n); 142 1.1 mrg 143 1.1 mrg /* Depends on zero initialization */ 144 1.1 mrg M->n = MAX(n0, n1); 145 1.1 mrg ASSERT (M->n < M->alloc); 146 1.1 mrg } 147 1.1 mrg 148 1.1 mrg /* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs 149 1.1 mrg of temporary storage (see mpn_matrix22_mul_itch). */ 150 1.1 mrg void 151 1.1 mrg mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1, 152 1.1 mrg mp_ptr tp) 153 1.1 mrg { 154 1.1 mrg mp_size_t n; 155 1.1 mrg 156 1.1 mrg /* About the new size of M:s elements. Since M1's diagonal elements 157 1.1 mrg are > 0, no element can decrease. The new elements are of size 158 1.1 mrg M->n + M1->n, one limb more or less. The computation of the 159 1.1 mrg matrix product produces elements of size M->n + M1->n + 1. But 160 1.1 mrg the true size, after normalization, may be three limbs smaller. 161 1.1 mrg 162 1.1 mrg The reason that the product has normalized size >= M->n + M1->n - 163 1.1 mrg 2 is subtle. It depends on the fact that M and M1 can be factored 164 1.1 mrg as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have 165 1.1 mrg M ending with a large power and M1 starting with a large power of 166 1.1 mrg the same matrix. */ 167 1.1 mrg 168 1.1 mrg /* FIXME: Strassen multiplication gives only a small speedup. In FFT 169 1.1 mrg multiplication range, this function could be sped up quite a lot 170 1.1 mrg using invariance. */ 171 1.1 mrg ASSERT (M->n + M1->n < M->alloc); 172 1.1 mrg 173 1.1 mrg ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1] 174 1.1 mrg | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0); 175 1.1 mrg 176 1.1 mrg ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1] 177 1.1 mrg | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0); 178 1.1 mrg 179 1.1 mrg mpn_matrix22_mul (M->p[0][0], M->p[0][1], 180 1.1 mrg M->p[1][0], M->p[1][1], M->n, 181 1.1 mrg M1->p[0][0], M1->p[0][1], 182 1.1 mrg M1->p[1][0], M1->p[1][1], M1->n, tp); 183 1.1 mrg 184 1.1 mrg /* Index of last potentially non-zero limb, size is one greater. */ 185 1.1 mrg n = M->n + M1->n; 186 1.1 mrg 187 1.1 mrg n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); 188 1.1 mrg n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); 189 1.1 mrg n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); 190 1.1 mrg 191 1.1 mrg ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0); 192 1.1 mrg 193 1.1 mrg M->n = n + 1; 194 1.1 mrg } 195 1.1 mrg 196 1.1 mrg /* Multiplies the least significant p limbs of (a;b) by M^-1. 197 1.1 mrg Temporary space needed: 2 * (p + M->n)*/ 198 1.1 mrg mp_size_t 199 1.1 mrg mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M, 200 1.1 mrg mp_size_t n, mp_ptr ap, mp_ptr bp, 201 1.1 mrg mp_size_t p, mp_ptr tp) 202 1.1 mrg { 203 1.1 mrg /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b) 204 1.1 mrg = (r11 a - r01 b; - r10 a + r00 b */ 205 1.1 mrg 206 1.1 mrg mp_ptr t0 = tp; 207 1.1 mrg mp_ptr t1 = tp + p + M->n; 208 1.1 mrg mp_limb_t ah, bh; 209 1.1 mrg mp_limb_t cy; 210 1.1 mrg 211 1.1 mrg ASSERT (p + M->n < n); 212 1.1 mrg 213 1.1 mrg /* First compute the two values depending on a, before overwriting a */ 214 1.1 mrg 215 1.1 mrg if (M->n >= p) 216 1.1 mrg { 217 1.1 mrg mpn_mul (t0, M->p[1][1], M->n, ap, p); 218 1.1 mrg mpn_mul (t1, M->p[1][0], M->n, ap, p); 219 1.1 mrg } 220 1.1 mrg else 221 1.1 mrg { 222 1.1 mrg mpn_mul (t0, ap, p, M->p[1][1], M->n); 223 1.1 mrg mpn_mul (t1, ap, p, M->p[1][0], M->n); 224 1.1 mrg } 225 1.1 mrg 226 1.1 mrg /* Update a */ 227 1.1 mrg MPN_COPY (ap, t0, p); 228 1.1 mrg ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n); 229 1.1 mrg 230 1.1 mrg if (M->n >= p) 231 1.1 mrg mpn_mul (t0, M->p[0][1], M->n, bp, p); 232 1.1 mrg else 233 1.1 mrg mpn_mul (t0, bp, p, M->p[0][1], M->n); 234 1.1 mrg 235 1.1 mrg cy = mpn_sub (ap, ap, n, t0, p + M->n); 236 1.1 mrg ASSERT (cy <= ah); 237 1.1 mrg ah -= cy; 238 1.1 mrg 239 1.1 mrg /* Update b */ 240 1.1 mrg if (M->n >= p) 241 1.1 mrg mpn_mul (t0, M->p[0][0], M->n, bp, p); 242 1.1 mrg else 243 1.1 mrg mpn_mul (t0, bp, p, M->p[0][0], M->n); 244 1.1 mrg 245 1.1 mrg MPN_COPY (bp, t0, p); 246 1.1 mrg bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n); 247 1.1 mrg cy = mpn_sub (bp, bp, n, t1, p + M->n); 248 1.1 mrg ASSERT (cy <= bh); 249 1.1 mrg bh -= cy; 250 1.1 mrg 251 1.1 mrg if (ah > 0 || bh > 0) 252 1.1 mrg { 253 1.1 mrg ap[n] = ah; 254 1.1 mrg bp[n] = bh; 255 1.1 mrg n++; 256 1.1 mrg } 257 1.1 mrg else 258 1.1 mrg { 259 1.1 mrg /* The subtraction can reduce the size by at most one limb. */ 260 1.1 mrg if (ap[n-1] == 0 && bp[n-1] == 0) 261 1.1 mrg n--; 262 1.1 mrg } 263 1.1 mrg ASSERT (ap[n-1] > 0 || bp[n-1] > 0); 264 1.1 mrg return n; 265 1.1 mrg } 266