1 1.1 mrg /* mpn_gcdext -- Extended Greatest Common Divisor. 2 1.1 mrg 3 1.1.1.3 mrg Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation, 4 1.1.1.3 mrg Inc. 5 1.1 mrg 6 1.1 mrg This file is part of the GNU MP Library. 7 1.1 mrg 8 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 9 1.1.1.3 mrg it under the terms of either: 10 1.1.1.3 mrg 11 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free 12 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your 13 1.1.1.3 mrg option) any later version. 14 1.1.1.3 mrg 15 1.1.1.3 mrg or 16 1.1.1.3 mrg 17 1.1.1.3 mrg * the GNU General Public License as published by the Free Software 18 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any 19 1.1.1.3 mrg later version. 20 1.1.1.3 mrg 21 1.1.1.3 mrg or both in parallel, as here. 22 1.1 mrg 23 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 24 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 1.1.1.3 mrg for more details. 27 1.1 mrg 28 1.1.1.3 mrg You should have received copies of the GNU General Public License and the 29 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 30 1.1.1.3 mrg see https://www.gnu.org/licenses/. */ 31 1.1 mrg 32 1.1 mrg #include "gmp-impl.h" 33 1.1 mrg #include "longlong.h" 34 1.1 mrg 35 1.1.1.2 mrg /* Here, d is the index of the cofactor to update. FIXME: Could use qn 36 1.1.1.2 mrg = 0 for the common case q = 1. */ 37 1.1.1.2 mrg void 38 1.1.1.2 mrg mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn, 39 1.1.1.2 mrg mp_srcptr qp, mp_size_t qn, int d) 40 1.1.1.2 mrg { 41 1.1.1.2 mrg struct gcdext_ctx *ctx = (struct gcdext_ctx *) p; 42 1.1.1.2 mrg mp_size_t un = ctx->un; 43 1.1.1.2 mrg 44 1.1.1.2 mrg if (gp) 45 1.1.1.2 mrg { 46 1.1.1.2 mrg mp_srcptr up; 47 1.1.1.2 mrg 48 1.1.1.2 mrg ASSERT (gn > 0); 49 1.1.1.2 mrg ASSERT (gp[gn-1] > 0); 50 1.1.1.2 mrg 51 1.1.1.2 mrg MPN_COPY (ctx->gp, gp, gn); 52 1.1.1.2 mrg ctx->gn = gn; 53 1.1.1.2 mrg 54 1.1.1.2 mrg if (d < 0) 55 1.1.1.2 mrg { 56 1.1.1.2 mrg int c; 57 1.1.1.2 mrg 58 1.1.1.2 mrg /* Must return the smallest cofactor, +u1 or -u0 */ 59 1.1.1.2 mrg MPN_CMP (c, ctx->u0, ctx->u1, un); 60 1.1.1.2 mrg ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1)); 61 1.1.1.2 mrg 62 1.1.1.2 mrg d = c < 0; 63 1.1.1.2 mrg } 64 1.1.1.2 mrg 65 1.1.1.2 mrg up = d ? ctx->u0 : ctx->u1; 66 1.1.1.2 mrg 67 1.1.1.2 mrg MPN_NORMALIZE (up, un); 68 1.1.1.2 mrg MPN_COPY (ctx->up, up, un); 69 1.1.1.2 mrg 70 1.1.1.2 mrg *ctx->usize = d ? -un : un; 71 1.1.1.2 mrg } 72 1.1.1.2 mrg else 73 1.1.1.2 mrg { 74 1.1.1.2 mrg mp_limb_t cy; 75 1.1.1.2 mrg mp_ptr u0 = ctx->u0; 76 1.1.1.2 mrg mp_ptr u1 = ctx->u1; 77 1.1.1.2 mrg 78 1.1.1.2 mrg ASSERT (d >= 0); 79 1.1.1.2 mrg 80 1.1.1.2 mrg if (d) 81 1.1.1.2 mrg MP_PTR_SWAP (u0, u1); 82 1.1.1.2 mrg 83 1.1.1.2 mrg qn -= (qp[qn-1] == 0); 84 1.1.1.2 mrg 85 1.1.1.2 mrg /* Update u0 += q * u1 */ 86 1.1.1.2 mrg if (qn == 1) 87 1.1.1.2 mrg { 88 1.1.1.2 mrg mp_limb_t q = qp[0]; 89 1.1.1.2 mrg 90 1.1.1.2 mrg if (q == 1) 91 1.1.1.2 mrg /* A common case. */ 92 1.1.1.2 mrg cy = mpn_add_n (u0, u0, u1, un); 93 1.1.1.2 mrg else 94 1.1.1.2 mrg cy = mpn_addmul_1 (u0, u1, un, q); 95 1.1.1.2 mrg } 96 1.1.1.2 mrg else 97 1.1.1.2 mrg { 98 1.1.1.2 mrg mp_size_t u1n; 99 1.1.1.2 mrg mp_ptr tp; 100 1.1.1.2 mrg 101 1.1.1.2 mrg u1n = un; 102 1.1.1.2 mrg MPN_NORMALIZE (u1, u1n); 103 1.1.1.2 mrg 104 1.1.1.2 mrg if (u1n == 0) 105 1.1.1.2 mrg return; 106 1.1.1.2 mrg 107 1.1.1.2 mrg /* Should always have u1n == un here, and u1 >= u0. The 108 1.1.1.2 mrg reason is that we alternate adding u0 to u1 and u1 to u0 109 1.1.1.2 mrg (corresponding to subtractions a - b and b - a), and we 110 1.1.1.2 mrg can get a large quotient only just after a switch, which 111 1.1.1.2 mrg means that we'll add (a multiple of) the larger u to the 112 1.1.1.2 mrg smaller. */ 113 1.1.1.2 mrg 114 1.1.1.2 mrg tp = ctx->tp; 115 1.1.1.2 mrg 116 1.1.1.2 mrg if (qn > u1n) 117 1.1.1.2 mrg mpn_mul (tp, qp, qn, u1, u1n); 118 1.1.1.2 mrg else 119 1.1.1.2 mrg mpn_mul (tp, u1, u1n, qp, qn); 120 1.1.1.2 mrg 121 1.1.1.2 mrg u1n += qn; 122 1.1.1.2 mrg u1n -= tp[u1n-1] == 0; 123 1.1.1.2 mrg 124 1.1.1.2 mrg if (u1n >= un) 125 1.1.1.2 mrg { 126 1.1.1.2 mrg cy = mpn_add (u0, tp, u1n, u0, un); 127 1.1.1.2 mrg un = u1n; 128 1.1.1.2 mrg } 129 1.1.1.2 mrg else 130 1.1.1.2 mrg /* Note: Unlikely case, maybe never happens? */ 131 1.1.1.2 mrg cy = mpn_add (u0, u0, un, tp, u1n); 132 1.1.1.2 mrg 133 1.1.1.2 mrg } 134 1.1.1.2 mrg u0[un] = cy; 135 1.1.1.2 mrg ctx->un = un + (cy > 0); 136 1.1.1.2 mrg } 137 1.1.1.2 mrg } 138 1.1.1.2 mrg 139 1.1.1.2 mrg /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for 140 1.1.1.2 mrg the matrix-vector multiplication adjusting a, b. If hgcd fails, we 141 1.1.1.2 mrg need at most n for the quotient and n+1 for the u update (reusing 142 1.1.1.2 mrg the extra u). In all, 4n + 3. */ 143 1.1 mrg 144 1.1 mrg mp_size_t 145 1.1 mrg mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize, 146 1.1 mrg mp_ptr ap, mp_ptr bp, mp_size_t n, 147 1.1 mrg mp_ptr tp) 148 1.1 mrg { 149 1.1 mrg mp_size_t ualloc = n + 1; 150 1.1 mrg 151 1.1 mrg /* Keeps track of the second row of the reduction matrix 152 1.1 mrg * 153 1.1 mrg * M = (v0, v1 ; u0, u1) 154 1.1 mrg * 155 1.1 mrg * which correspond to the first column of the inverse 156 1.1 mrg * 157 1.1 mrg * M^{-1} = (u1, -v1; -u0, v0) 158 1.1.1.2 mrg * 159 1.1.1.2 mrg * This implies that 160 1.1.1.2 mrg * 161 1.1.1.2 mrg * a = u1 A (mod B) 162 1.1.1.2 mrg * b = -u0 A (mod B) 163 1.1.1.2 mrg * 164 1.1.1.2 mrg * where A, B denotes the input values. 165 1.1 mrg */ 166 1.1 mrg 167 1.1.1.2 mrg struct gcdext_ctx ctx; 168 1.1 mrg mp_size_t un; 169 1.1 mrg mp_ptr u0; 170 1.1 mrg mp_ptr u1; 171 1.1 mrg mp_ptr u2; 172 1.1 mrg 173 1.1 mrg MPN_ZERO (tp, 3*ualloc); 174 1.1 mrg u0 = tp; tp += ualloc; 175 1.1 mrg u1 = tp; tp += ualloc; 176 1.1 mrg u2 = tp; tp += ualloc; 177 1.1 mrg 178 1.1 mrg u1[0] = 1; un = 1; 179 1.1 mrg 180 1.1.1.2 mrg ctx.gp = gp; 181 1.1.1.2 mrg ctx.up = up; 182 1.1.1.2 mrg ctx.usize = usize; 183 1.1.1.2 mrg 184 1.1 mrg /* FIXME: Handle n == 2 differently, after the loop? */ 185 1.1 mrg while (n >= 2) 186 1.1 mrg { 187 1.1 mrg struct hgcd_matrix1 M; 188 1.1 mrg mp_limb_t ah, al, bh, bl; 189 1.1 mrg mp_limb_t mask; 190 1.1 mrg 191 1.1 mrg mask = ap[n-1] | bp[n-1]; 192 1.1 mrg ASSERT (mask > 0); 193 1.1 mrg 194 1.1 mrg if (mask & GMP_NUMB_HIGHBIT) 195 1.1 mrg { 196 1.1 mrg ah = ap[n-1]; al = ap[n-2]; 197 1.1 mrg bh = bp[n-1]; bl = bp[n-2]; 198 1.1 mrg } 199 1.1 mrg else if (n == 2) 200 1.1 mrg { 201 1.1 mrg /* We use the full inputs without truncation, so we can 202 1.1 mrg safely shift left. */ 203 1.1 mrg int shift; 204 1.1 mrg 205 1.1 mrg count_leading_zeros (shift, mask); 206 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]); 207 1.1 mrg al = ap[0] << shift; 208 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]); 209 1.1 mrg bl = bp[0] << shift; 210 1.1 mrg } 211 1.1 mrg else 212 1.1 mrg { 213 1.1 mrg int shift; 214 1.1 mrg 215 1.1 mrg count_leading_zeros (shift, mask); 216 1.1 mrg ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); 217 1.1 mrg al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); 218 1.1 mrg bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); 219 1.1 mrg bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); 220 1.1 mrg } 221 1.1 mrg 222 1.1 mrg /* Try an mpn_nhgcd2 step */ 223 1.1 mrg if (mpn_hgcd2 (ah, al, bh, bl, &M)) 224 1.1 mrg { 225 1.1.1.2 mrg n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n); 226 1.1 mrg MP_PTR_SWAP (ap, tp); 227 1.1 mrg un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un); 228 1.1 mrg MP_PTR_SWAP (u0, u2); 229 1.1 mrg } 230 1.1 mrg else 231 1.1 mrg { 232 1.1 mrg /* mpn_hgcd2 has failed. Then either one of a or b is very 233 1.1 mrg small, or the difference is very small. Perform one 234 1.1 mrg subtraction followed by one division. */ 235 1.1.1.2 mrg ctx.u0 = u0; 236 1.1.1.2 mrg ctx.u1 = u1; 237 1.1.1.2 mrg ctx.tp = u2; 238 1.1.1.2 mrg ctx.un = un; 239 1.1 mrg 240 1.1 mrg /* Temporary storage n for the quotient and ualloc for the 241 1.1 mrg new cofactor. */ 242 1.1.1.2 mrg n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp); 243 1.1 mrg if (n == 0) 244 1.1.1.2 mrg return ctx.gn; 245 1.1 mrg 246 1.1.1.2 mrg un = ctx.un; 247 1.1 mrg } 248 1.1 mrg } 249 1.1 mrg ASSERT_ALWAYS (ap[0] > 0); 250 1.1 mrg ASSERT_ALWAYS (bp[0] > 0); 251 1.1 mrg 252 1.1 mrg if (ap[0] == bp[0]) 253 1.1 mrg { 254 1.1 mrg int c; 255 1.1 mrg 256 1.1 mrg /* Which cofactor to return now? Candidates are +u1 and -u0, 257 1.1 mrg depending on which of a and b was most recently reduced, 258 1.1 mrg which we don't keep track of. So compare and get the smallest 259 1.1 mrg one. */ 260 1.1 mrg 261 1.1 mrg gp[0] = ap[0]; 262 1.1 mrg 263 1.1 mrg MPN_CMP (c, u0, u1, un); 264 1.1 mrg ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1)); 265 1.1 mrg if (c < 0) 266 1.1 mrg { 267 1.1 mrg MPN_NORMALIZE (u0, un); 268 1.1 mrg MPN_COPY (up, u0, un); 269 1.1 mrg *usize = -un; 270 1.1 mrg } 271 1.1 mrg else 272 1.1 mrg { 273 1.1 mrg MPN_NORMALIZE_NOT_ZERO (u1, un); 274 1.1 mrg MPN_COPY (up, u1, un); 275 1.1 mrg *usize = un; 276 1.1 mrg } 277 1.1 mrg return 1; 278 1.1 mrg } 279 1.1 mrg else 280 1.1 mrg { 281 1.1 mrg mp_limb_t uh, vh; 282 1.1 mrg mp_limb_signed_t u; 283 1.1 mrg mp_limb_signed_t v; 284 1.1 mrg int negate; 285 1.1 mrg 286 1.1 mrg gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]); 287 1.1 mrg 288 1.1 mrg /* Set up = u u1 - v u0. Keep track of size, un grows by one or 289 1.1 mrg two limbs. */ 290 1.1 mrg 291 1.1 mrg if (u == 0) 292 1.1 mrg { 293 1.1 mrg ASSERT (v == 1); 294 1.1 mrg MPN_NORMALIZE (u0, un); 295 1.1 mrg MPN_COPY (up, u0, un); 296 1.1 mrg *usize = -un; 297 1.1 mrg return 1; 298 1.1 mrg } 299 1.1 mrg else if (v == 0) 300 1.1 mrg { 301 1.1 mrg ASSERT (u == 1); 302 1.1 mrg MPN_NORMALIZE (u1, un); 303 1.1 mrg MPN_COPY (up, u1, un); 304 1.1 mrg *usize = un; 305 1.1 mrg return 1; 306 1.1 mrg } 307 1.1 mrg else if (u > 0) 308 1.1 mrg { 309 1.1 mrg negate = 0; 310 1.1 mrg ASSERT (v < 0); 311 1.1 mrg v = -v; 312 1.1 mrg } 313 1.1 mrg else 314 1.1 mrg { 315 1.1 mrg negate = 1; 316 1.1 mrg ASSERT (v > 0); 317 1.1 mrg u = -u; 318 1.1 mrg } 319 1.1 mrg 320 1.1 mrg uh = mpn_mul_1 (up, u1, un, u); 321 1.1 mrg vh = mpn_addmul_1 (up, u0, un, v); 322 1.1 mrg 323 1.1 mrg if ( (uh | vh) > 0) 324 1.1 mrg { 325 1.1 mrg uh += vh; 326 1.1 mrg up[un++] = uh; 327 1.1 mrg if (uh < vh) 328 1.1 mrg up[un++] = 1; 329 1.1 mrg } 330 1.1 mrg 331 1.1 mrg MPN_NORMALIZE_NOT_ZERO (up, un); 332 1.1 mrg 333 1.1 mrg *usize = negate ? -un : un; 334 1.1 mrg return 1; 335 1.1 mrg } 336 1.1 mrg } 337