1 1.1.1.2 mrg /* mpn_sbpi1_div_q -- Schoolbook division using the Mller-Granlund 3/2 2 1.1 mrg division algorithm. 3 1.1 mrg 4 1.1 mrg Contributed to the GNU project by Torbjorn Granlund. 5 1.1 mrg 6 1.1 mrg THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 7 1.1 mrg SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 1.1 mrg GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 9 1.1 mrg 10 1.1 mrg Copyright 2007, 2009 Free Software Foundation, Inc. 11 1.1 mrg 12 1.1 mrg This file is part of the GNU MP Library. 13 1.1 mrg 14 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 15 1.1.1.2 mrg it under the terms of either: 16 1.1.1.2 mrg 17 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free 18 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your 19 1.1.1.2 mrg option) any later version. 20 1.1.1.2 mrg 21 1.1.1.2 mrg or 22 1.1.1.2 mrg 23 1.1.1.2 mrg * the GNU General Public License as published by the Free Software 24 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any 25 1.1.1.2 mrg later version. 26 1.1.1.2 mrg 27 1.1.1.2 mrg or both in parallel, as here. 28 1.1 mrg 29 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 30 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 1.1.1.2 mrg for more details. 33 1.1 mrg 34 1.1.1.2 mrg You should have received copies of the GNU General Public License and the 35 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 36 1.1.1.2 mrg see https://www.gnu.org/licenses/. */ 37 1.1 mrg 38 1.1 mrg 39 1.1 mrg #include "gmp-impl.h" 40 1.1 mrg #include "longlong.h" 41 1.1 mrg 42 1.1 mrg mp_limb_t 43 1.1 mrg mpn_sbpi1_div_q (mp_ptr qp, 44 1.1 mrg mp_ptr np, mp_size_t nn, 45 1.1 mrg mp_srcptr dp, mp_size_t dn, 46 1.1 mrg mp_limb_t dinv) 47 1.1 mrg { 48 1.1 mrg mp_limb_t qh; 49 1.1 mrg mp_size_t qn, i; 50 1.1 mrg mp_limb_t n1, n0; 51 1.1 mrg mp_limb_t d1, d0; 52 1.1 mrg mp_limb_t cy, cy1; 53 1.1 mrg mp_limb_t q; 54 1.1 mrg mp_limb_t flag; 55 1.1 mrg 56 1.1 mrg mp_size_t dn_orig = dn; 57 1.1 mrg mp_srcptr dp_orig = dp; 58 1.1 mrg mp_ptr np_orig = np; 59 1.1 mrg 60 1.1 mrg ASSERT (dn > 2); 61 1.1 mrg ASSERT (nn >= dn); 62 1.1 mrg ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0); 63 1.1 mrg 64 1.1 mrg np += nn; 65 1.1 mrg 66 1.1 mrg qn = nn - dn; 67 1.1 mrg if (qn + 1 < dn) 68 1.1 mrg { 69 1.1 mrg dp += dn - (qn + 1); 70 1.1 mrg dn = qn + 1; 71 1.1 mrg } 72 1.1 mrg 73 1.1 mrg qh = mpn_cmp (np - dn, dp, dn) >= 0; 74 1.1 mrg if (qh != 0) 75 1.1 mrg mpn_sub_n (np - dn, np - dn, dp, dn); 76 1.1 mrg 77 1.1 mrg qp += qn; 78 1.1 mrg 79 1.1 mrg dn -= 2; /* offset dn by 2 for main division loops, 80 1.1 mrg saving two iterations in mpn_submul_1. */ 81 1.1 mrg d1 = dp[dn + 1]; 82 1.1 mrg d0 = dp[dn + 0]; 83 1.1 mrg 84 1.1 mrg np -= 2; 85 1.1 mrg 86 1.1 mrg n1 = np[1]; 87 1.1 mrg 88 1.1 mrg for (i = qn - (dn + 2); i >= 0; i--) 89 1.1 mrg { 90 1.1 mrg np--; 91 1.1 mrg if (UNLIKELY (n1 == d1) && np[1] == d0) 92 1.1 mrg { 93 1.1 mrg q = GMP_NUMB_MASK; 94 1.1 mrg mpn_submul_1 (np - dn, dp, dn + 2, q); 95 1.1 mrg n1 = np[1]; /* update n1, last loop's value will now be invalid */ 96 1.1 mrg } 97 1.1 mrg else 98 1.1 mrg { 99 1.1 mrg udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); 100 1.1 mrg 101 1.1 mrg cy = mpn_submul_1 (np - dn, dp, dn, q); 102 1.1 mrg 103 1.1 mrg cy1 = n0 < cy; 104 1.1 mrg n0 = (n0 - cy) & GMP_NUMB_MASK; 105 1.1 mrg cy = n1 < cy1; 106 1.1 mrg n1 -= cy1; 107 1.1 mrg np[0] = n0; 108 1.1 mrg 109 1.1 mrg if (UNLIKELY (cy != 0)) 110 1.1 mrg { 111 1.1 mrg n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1); 112 1.1 mrg q--; 113 1.1 mrg } 114 1.1 mrg } 115 1.1 mrg 116 1.1 mrg *--qp = q; 117 1.1 mrg } 118 1.1 mrg 119 1.1 mrg flag = ~CNST_LIMB(0); 120 1.1 mrg 121 1.1 mrg if (dn >= 0) 122 1.1 mrg { 123 1.1 mrg for (i = dn; i > 0; i--) 124 1.1 mrg { 125 1.1 mrg np--; 126 1.1 mrg if (UNLIKELY (n1 >= (d1 & flag))) 127 1.1 mrg { 128 1.1 mrg q = GMP_NUMB_MASK; 129 1.1 mrg cy = mpn_submul_1 (np - dn, dp, dn + 2, q); 130 1.1 mrg 131 1.1 mrg if (UNLIKELY (n1 != cy)) 132 1.1 mrg { 133 1.1 mrg if (n1 < (cy & flag)) 134 1.1 mrg { 135 1.1 mrg q--; 136 1.1 mrg mpn_add_n (np - dn, np - dn, dp, dn + 2); 137 1.1 mrg } 138 1.1 mrg else 139 1.1 mrg flag = 0; 140 1.1 mrg } 141 1.1 mrg n1 = np[1]; 142 1.1 mrg } 143 1.1 mrg else 144 1.1 mrg { 145 1.1 mrg udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); 146 1.1 mrg 147 1.1 mrg cy = mpn_submul_1 (np - dn, dp, dn, q); 148 1.1 mrg 149 1.1 mrg cy1 = n0 < cy; 150 1.1 mrg n0 = (n0 - cy) & GMP_NUMB_MASK; 151 1.1 mrg cy = n1 < cy1; 152 1.1 mrg n1 -= cy1; 153 1.1 mrg np[0] = n0; 154 1.1 mrg 155 1.1 mrg if (UNLIKELY (cy != 0)) 156 1.1 mrg { 157 1.1 mrg n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1); 158 1.1 mrg q--; 159 1.1 mrg } 160 1.1 mrg } 161 1.1 mrg 162 1.1 mrg *--qp = q; 163 1.1 mrg 164 1.1 mrg /* Truncate operands. */ 165 1.1 mrg dn--; 166 1.1 mrg dp++; 167 1.1 mrg } 168 1.1 mrg 169 1.1 mrg np--; 170 1.1 mrg if (UNLIKELY (n1 >= (d1 & flag))) 171 1.1 mrg { 172 1.1 mrg q = GMP_NUMB_MASK; 173 1.1 mrg cy = mpn_submul_1 (np, dp, 2, q); 174 1.1 mrg 175 1.1 mrg if (UNLIKELY (n1 != cy)) 176 1.1 mrg { 177 1.1 mrg if (n1 < (cy & flag)) 178 1.1 mrg { 179 1.1 mrg q--; 180 1.1 mrg add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]); 181 1.1 mrg } 182 1.1 mrg else 183 1.1 mrg flag = 0; 184 1.1 mrg } 185 1.1 mrg n1 = np[1]; 186 1.1 mrg } 187 1.1 mrg else 188 1.1 mrg { 189 1.1 mrg udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); 190 1.1 mrg 191 1.1 mrg np[0] = n0; 192 1.1 mrg np[1] = n1; 193 1.1 mrg } 194 1.1 mrg 195 1.1 mrg *--qp = q; 196 1.1 mrg } 197 1.1 mrg ASSERT_ALWAYS (np[1] == n1); 198 1.1 mrg np += 2; 199 1.1 mrg 200 1.1 mrg 201 1.1 mrg dn = dn_orig; 202 1.1 mrg if (UNLIKELY (n1 < (dn & flag))) 203 1.1 mrg { 204 1.1 mrg mp_limb_t q, x; 205 1.1 mrg 206 1.1 mrg /* The quotient may be too large if the remainder is small. Recompute 207 1.1 mrg for above ignored operand parts, until the remainder spills. 208 1.1 mrg 209 1.1 mrg FIXME: The quality of this code isn't the same as the code above. 210 1.1 mrg 1. We don't compute things in an optimal order, high-to-low, in order 211 1.1 mrg to terminate as quickly as possible. 212 1.1 mrg 2. We mess with pointers and sizes, adding and subtracting and 213 1.1 mrg adjusting to get things right. It surely could be streamlined. 214 1.1 mrg 3. The only termination criteria are that we determine that the 215 1.1 mrg quotient needs to be adjusted, or that we have recomputed 216 1.1 mrg everything. We should stop when the remainder is so large 217 1.1 mrg that no additional subtracting could make it spill. 218 1.1 mrg 4. If nothing else, we should not do two loops of submul_1 over the 219 1.1 mrg data, instead handle both the triangularization and chopping at 220 1.1 mrg once. */ 221 1.1 mrg 222 1.1 mrg x = n1; 223 1.1 mrg 224 1.1 mrg if (dn > 2) 225 1.1 mrg { 226 1.1 mrg /* Compensate for triangularization. */ 227 1.1 mrg mp_limb_t y; 228 1.1 mrg 229 1.1 mrg dp = dp_orig; 230 1.1 mrg if (qn + 1 < dn) 231 1.1 mrg { 232 1.1 mrg dp += dn - (qn + 1); 233 1.1 mrg dn = qn + 1; 234 1.1 mrg } 235 1.1 mrg 236 1.1 mrg y = np[-2]; 237 1.1 mrg 238 1.1 mrg for (i = dn - 3; i >= 0; i--) 239 1.1 mrg { 240 1.1 mrg q = qp[i]; 241 1.1 mrg cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q); 242 1.1 mrg 243 1.1 mrg if (y < cy) 244 1.1 mrg { 245 1.1 mrg if (x == 0) 246 1.1 mrg { 247 1.1 mrg cy = mpn_sub_1 (qp, qp, qn, 1); 248 1.1 mrg ASSERT_ALWAYS (cy == 0); 249 1.1 mrg return qh - cy; 250 1.1 mrg } 251 1.1 mrg x--; 252 1.1 mrg } 253 1.1 mrg y -= cy; 254 1.1 mrg } 255 1.1 mrg np[-2] = y; 256 1.1 mrg } 257 1.1 mrg 258 1.1 mrg dn = dn_orig; 259 1.1 mrg if (qn + 1 < dn) 260 1.1 mrg { 261 1.1 mrg /* Compensate for ignored dividend and divisor tails. */ 262 1.1 mrg 263 1.1 mrg dp = dp_orig; 264 1.1 mrg np = np_orig; 265 1.1 mrg 266 1.1 mrg if (qh != 0) 267 1.1 mrg { 268 1.1 mrg cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1)); 269 1.1 mrg if (cy != 0) 270 1.1 mrg { 271 1.1 mrg if (x == 0) 272 1.1 mrg { 273 1.1 mrg if (qn != 0) 274 1.1 mrg cy = mpn_sub_1 (qp, qp, qn, 1); 275 1.1 mrg return qh - cy; 276 1.1 mrg } 277 1.1 mrg x--; 278 1.1 mrg } 279 1.1 mrg } 280 1.1 mrg 281 1.1 mrg if (qn == 0) 282 1.1 mrg return qh; 283 1.1 mrg 284 1.1 mrg for (i = dn - qn - 2; i >= 0; i--) 285 1.1 mrg { 286 1.1 mrg cy = mpn_submul_1 (np + i, qp, qn, dp[i]); 287 1.1 mrg cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy); 288 1.1 mrg if (cy != 0) 289 1.1 mrg { 290 1.1 mrg if (x == 0) 291 1.1 mrg { 292 1.1 mrg cy = mpn_sub_1 (qp, qp, qn, 1); 293 1.1 mrg return qh; 294 1.1 mrg } 295 1.1 mrg x--; 296 1.1 mrg } 297 1.1 mrg } 298 1.1 mrg } 299 1.1 mrg } 300 1.1 mrg 301 1.1 mrg return qh; 302 1.1 mrg } 303