Lines Matching refs:vn
44 1/6 1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5 1 vn/un
78 (pointed to by VP, with VN limbs), and store the result at PRODP. The
79 result is UN + VN limbs. Return the most significant limb of the result.
85 1. UN >= VN.
95 remaining area might have un < vn. Should we fix the toomX2 code in a
105 have the same vn threshold. This is not true, we should actually use
116 mp_srcptr vp, mp_size_t vn)
118 ASSERT (un >= vn);
119 ASSERT (vn >= 1);
120 ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
121 ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
125 /* When un (and thus vn) is below the toom22 range, do mul_basecase.
126 Test un and not vn here not to thwart the un >> vn code below.
129 mpn_mul_basecase (prodp, up, un, vp, vn);
131 else if (un == vn)
135 else if (vn < MUL_TOOM22_THRESHOLD)
142 || vn <= 2
144 || vn == 1
147 mpn_mul_basecase (prodp, up, un, vp, vn);
150 /* We have un >> MUL_BASECASE_MAX_UN > vn. For better memory
153 multiplication (but the last) we copy the most significant vn
158 -->vn<--
178 mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
180 MPN_COPY (tp, prodp, vn); /* preserve high triangle */
185 mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
186 cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
187 mpn_incr_u (prodp + vn, cy);
189 MPN_COPY (tp, prodp, vn); /* preserve high triangle */
193 if (un > vn)
195 mpn_mul_basecase (prodp, up, un, vp, vn);
200 mpn_mul_basecase (prodp, vp, vn, up, un);
202 cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
203 mpn_incr_u (prodp + vn, cy);
206 else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))
212 #define ITCH_TOOMX2 (9 * vn / 2 + GMP_NUMB_BITS * 2)
214 ASSERT (mpn_toom22_mul_itch ((5*vn-1)/4, vn) <= ITCH_TOOMX2); /* 5vn/2+ */
215 ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX2); /* 7vn/6+ */
216 ASSERT (mpn_toom42_mul_itch (3 * vn - 1, vn) <= ITCH_TOOMX2); /* 9vn/2+ */
219 /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
220 square to a (3vn-1)*vn rectangle. Leaving such a rectangle is hardly
222 will sometimes end up with un < vn, like in the X3 arm below. */
223 if (un >= 3 * vn)
229 ws = TMP_SALLOC_LIMBS (4 * vn);
231 mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
232 un -= 2 * vn;
233 up += 2 * vn;
234 prodp += 2 * vn;
236 while (un >= 3 * vn)
238 mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
239 un -= 2 * vn;
240 up += 2 * vn;
241 cy = mpn_add_n (prodp, prodp, ws, vn);
242 MPN_COPY (prodp + vn, ws + vn, 2 * vn);
243 mpn_incr_u (prodp + vn, cy);
244 prodp += 2 * vn;
247 /* vn <= un < 3vn */
249 if (4 * un < 5 * vn)
250 mpn_toom22_mul (ws, up, un, vp, vn, scratch);
251 else if (4 * un < 7 * vn)
252 mpn_toom32_mul (ws, up, un, vp, vn, scratch);
254 mpn_toom42_mul (ws, up, un, vp, vn, scratch);
256 cy = mpn_add_n (prodp, prodp, ws, vn);
257 MPN_COPY (prodp + vn, ws + vn, un);
258 mpn_incr_u (prodp + vn, cy);
262 if (4 * un < 5 * vn)
263 mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
264 else if (4 * un < 7 * vn)
265 mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
267 mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
271 else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||
272 BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))
278 if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))
284 #define ITCH_TOOMX3 (4 * vn + GMP_NUMB_BITS)
286 ASSERT (mpn_toom33_mul_itch ((7*vn-1)/6, vn) <= ITCH_TOOMX3); /* 7vn/2+ */
287 ASSERT (mpn_toom43_mul_itch ((3*vn-1)/2, vn) <= ITCH_TOOMX3); /* 9vn/4+ */
288 ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX3); /* 7vn/6+ */
289 ASSERT (mpn_toom53_mul_itch ((11*vn-1)/6, vn) <= ITCH_TOOMX3); /* 11vn/3+ */
290 ASSERT (mpn_toom42_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
291 ASSERT (mpn_toom63_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
294 if (2 * un >= 5 * vn)
300 ws = TMP_ALLOC_LIMBS (7 * vn >> 1);
302 if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
303 mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
305 mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);
306 un -= 2 * vn;
307 up += 2 * vn;
308 prodp += 2 * vn;
310 while (2 * un >= 5 * vn) /* un >= 2.5vn */
312 if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
313 mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
315 mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);
316 un -= 2 * vn;
317 up += 2 * vn;
318 cy = mpn_add_n (prodp, prodp, ws, vn);
319 MPN_COPY (prodp + vn, ws + vn, 2 * vn);
320 mpn_incr_u (prodp + vn, cy);
321 prodp += 2 * vn;
324 /* vn / 2 <= un < 2.5vn */
326 if (un < vn)
327 mpn_mul (ws, vp, vn, up, un);
329 mpn_mul (ws, up, un, vp, vn);
331 cy = mpn_add_n (prodp, prodp, ws, vn);
332 MPN_COPY (prodp + vn, ws + vn, un);
333 mpn_incr_u (prodp + vn, cy);
337 if (6 * un < 7 * vn)
338 mpn_toom33_mul (prodp, up, un, vp, vn, scratch);
339 else if (2 * un < 3 * vn)
341 if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))
342 mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
344 mpn_toom43_mul (prodp, up, un, vp, vn, scratch);
346 else if (6 * un < 11 * vn)
348 if (4 * un < 7 * vn)
350 if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))
351 mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
353 mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
357 if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))
358 mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
360 mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
365 if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
366 mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
368 mpn_toom63_mul (prodp, up, un, vp, vn, scratch);
378 if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))
380 scratch = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));
381 mpn_toom44_mul (prodp, up, un, vp, vn, scratch);
383 else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))
385 scratch = TMP_SALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));
386 mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);
390 scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));
391 mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);
398 if (un >= 8 * vn)
405 ws = TMP_BALLOC_LIMBS (9 * vn >> 1);
407 mpn_fft_mul (prodp, up, 3 * vn, vp, vn);
408 un -= 3 * vn;
409 up += 3 * vn;
410 prodp += 3 * vn;
412 while (2 * un >= 7 * vn) /* un >= 3.5vn */
414 mpn_fft_mul (ws, up, 3 * vn, vp, vn);
415 un -= 3 * vn;
416 up += 3 * vn;
417 cy = mpn_add_n (prodp, prodp, ws, vn);
418 MPN_COPY (prodp + vn, ws + vn, 3 * vn);
419 mpn_incr_u (prodp + vn, cy);
420 prodp += 3 * vn;
423 /* vn / 2 <= un < 3.5vn */
425 if (un < vn)
426 mpn_mul (ws, vp, vn, up, un);
428 mpn_mul (ws, up, un, vp, vn);
430 cy = mpn_add_n (prodp, prodp, ws, vn);
431 MPN_COPY (prodp + vn, ws + vn, un);
432 mpn_incr_u (prodp + vn, cy);
437 mpn_fft_mul (prodp, up, un, vp, vn);
440 return prodp[un + vn - 1]; /* historic */