toom2_sqr.c revision 1.1 1 1.1 mrg /* mpn_toom2_sqr -- Square {ap,an}.
2 1.1 mrg
3 1.1 mrg Contributed to the GNU project by Torbjorn Granlund.
4 1.1 mrg
5 1.1 mrg THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 1.1 mrg SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 1.1 mrg GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 1.1 mrg
9 1.1 mrg Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
15 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
16 1.1 mrg option) any later version.
17 1.1 mrg
18 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
19 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 1.1 mrg License for more details.
22 1.1 mrg
23 1.1 mrg You should have received a copy of the GNU Lesser General Public License
24 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 1.1 mrg
26 1.1 mrg
27 1.1 mrg #include "gmp.h"
28 1.1 mrg #include "gmp-impl.h"
29 1.1 mrg
30 1.1 mrg /* Evaluate in: -1, 0, +inf
31 1.1 mrg
32 1.1 mrg <-s--><--n-->
33 1.1 mrg ____ ______
34 1.1 mrg |_a1_|___a0_|
35 1.1 mrg
36 1.1 mrg v0 = a0 ^2 # A(0)^2
37 1.1 mrg vm1 = (a0- a1)^2 # A(-1)^2
38 1.1 mrg vinf= a1 ^2 # A(inf)^2
39 1.1 mrg */
40 1.1 mrg
41 1.1 mrg #if TUNE_PROGRAM_BUILD
42 1.1 mrg #define MAYBE_sqr_toom2 1
43 1.1 mrg #else
44 1.1 mrg #define MAYBE_sqr_toom2 \
45 1.1 mrg (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
46 1.1 mrg #endif
47 1.1 mrg
48 1.1 mrg #define TOOM2_SQR_REC(p, a, n, ws) \
49 1.1 mrg do { \
50 1.1 mrg if (! MAYBE_sqr_toom2 \
51 1.1 mrg || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \
52 1.1 mrg mpn_sqr_basecase (p, a, n); \
53 1.1 mrg else \
54 1.1 mrg mpn_toom2_sqr (p, a, n, ws); \
55 1.1 mrg } while (0)
56 1.1 mrg
57 1.1 mrg void
58 1.1 mrg mpn_toom2_sqr (mp_ptr pp,
59 1.1 mrg mp_srcptr ap, mp_size_t an,
60 1.1 mrg mp_ptr scratch)
61 1.1 mrg {
62 1.1 mrg mp_size_t n, s;
63 1.1 mrg mp_limb_t cy, cy2;
64 1.1 mrg mp_ptr asm1;
65 1.1 mrg
66 1.1 mrg #define a0 ap
67 1.1 mrg #define a1 (ap + n)
68 1.1 mrg
69 1.1 mrg s = an >> 1;
70 1.1 mrg n = an - s;
71 1.1 mrg
72 1.1 mrg ASSERT (0 < s && s <= n);
73 1.1 mrg
74 1.1 mrg asm1 = pp;
75 1.1 mrg
76 1.1 mrg /* Compute asm1. */
77 1.1 mrg if (s == n)
78 1.1 mrg {
79 1.1 mrg if (mpn_cmp (a0, a1, n) < 0)
80 1.1 mrg {
81 1.1 mrg mpn_sub_n (asm1, a1, a0, n);
82 1.1 mrg }
83 1.1 mrg else
84 1.1 mrg {
85 1.1 mrg mpn_sub_n (asm1, a0, a1, n);
86 1.1 mrg }
87 1.1 mrg }
88 1.1 mrg else
89 1.1 mrg {
90 1.1 mrg if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0)
91 1.1 mrg {
92 1.1 mrg mpn_sub_n (asm1, a1, a0, s);
93 1.1 mrg MPN_ZERO (asm1 + s, n - s);
94 1.1 mrg }
95 1.1 mrg else
96 1.1 mrg {
97 1.1 mrg mpn_sub (asm1, a0, n, a1, s);
98 1.1 mrg }
99 1.1 mrg }
100 1.1 mrg
101 1.1 mrg #define v0 pp /* 2n */
102 1.1 mrg #define vinf (pp + 2 * n) /* s+s */
103 1.1 mrg #define vm1 scratch /* 2n */
104 1.1 mrg #define scratch_out scratch + 2 * n
105 1.1 mrg
106 1.1 mrg /* vm1, 2n limbs */
107 1.1 mrg TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
108 1.1 mrg
109 1.1 mrg /* vinf, s+s limbs */
110 1.1 mrg TOOM2_SQR_REC (vinf, a1, s, scratch_out);
111 1.1 mrg
112 1.1 mrg /* v0, 2n limbs */
113 1.1 mrg TOOM2_SQR_REC (v0, ap, n, scratch_out);
114 1.1 mrg
115 1.1 mrg /* H(v0) + L(vinf) */
116 1.1 mrg cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
117 1.1 mrg
118 1.1 mrg /* L(v0) + H(v0) */
119 1.1 mrg cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
120 1.1 mrg
121 1.1 mrg /* L(vinf) + H(vinf) */
122 1.1 mrg cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
123 1.1 mrg
124 1.1 mrg cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
125 1.1 mrg
126 1.1 mrg ASSERT (cy + 1 <= 3);
127 1.1 mrg ASSERT (cy2 <= 2);
128 1.1 mrg
129 1.1 mrg mpn_incr_u (pp + 2 * n, cy2);
130 1.1 mrg if (LIKELY (cy <= 2))
131 1.1 mrg mpn_incr_u (pp + 3 * n, cy);
132 1.1 mrg else
133 1.1 mrg mpn_decr_u (pp + 3 * n, 1);
134 1.1 mrg }
135