toom2_sqr.c revision 1.1.1.3 1 1.1 mrg /* mpn_toom2_sqr -- Square {ap,an}.
2 1.1 mrg
3 1.1 mrg Contributed to the GNU project by Torbjorn Granlund.
4 1.1 mrg
5 1.1 mrg THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 1.1 mrg SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 1.1 mrg GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 1.1 mrg
9 1.1.1.3 mrg Copyright 2006-2010, 2012, 2014 Free Software Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1.1.3 mrg it under the terms of either:
15 1.1.1.3 mrg
16 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free
17 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your
18 1.1.1.3 mrg option) any later version.
19 1.1.1.3 mrg
20 1.1.1.3 mrg or
21 1.1.1.3 mrg
22 1.1.1.3 mrg * the GNU General Public License as published by the Free Software
23 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any
24 1.1.1.3 mrg later version.
25 1.1.1.3 mrg
26 1.1.1.3 mrg or both in parallel, as here.
27 1.1 mrg
28 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
29 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 1.1.1.3 mrg for more details.
32 1.1 mrg
33 1.1.1.3 mrg You should have received copies of the GNU General Public License and the
34 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
35 1.1.1.3 mrg see https://www.gnu.org/licenses/. */
36 1.1 mrg
37 1.1 mrg
38 1.1 mrg #include "gmp.h"
39 1.1 mrg #include "gmp-impl.h"
40 1.1 mrg
41 1.1 mrg /* Evaluate in: -1, 0, +inf
42 1.1 mrg
43 1.1 mrg <-s--><--n-->
44 1.1 mrg ____ ______
45 1.1 mrg |_a1_|___a0_|
46 1.1 mrg
47 1.1 mrg v0 = a0 ^2 # A(0)^2
48 1.1 mrg vm1 = (a0- a1)^2 # A(-1)^2
49 1.1 mrg vinf= a1 ^2 # A(inf)^2
50 1.1 mrg */
51 1.1 mrg
52 1.1.1.2 mrg #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
53 1.1 mrg #define MAYBE_sqr_toom2 1
54 1.1 mrg #else
55 1.1 mrg #define MAYBE_sqr_toom2 \
56 1.1 mrg (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
57 1.1 mrg #endif
58 1.1 mrg
59 1.1 mrg #define TOOM2_SQR_REC(p, a, n, ws) \
60 1.1 mrg do { \
61 1.1 mrg if (! MAYBE_sqr_toom2 \
62 1.1 mrg || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \
63 1.1 mrg mpn_sqr_basecase (p, a, n); \
64 1.1 mrg else \
65 1.1 mrg mpn_toom2_sqr (p, a, n, ws); \
66 1.1 mrg } while (0)
67 1.1 mrg
68 1.1 mrg void
69 1.1 mrg mpn_toom2_sqr (mp_ptr pp,
70 1.1 mrg mp_srcptr ap, mp_size_t an,
71 1.1 mrg mp_ptr scratch)
72 1.1 mrg {
73 1.1.1.2 mrg const int __gmpn_cpuvec_initialized = 1;
74 1.1 mrg mp_size_t n, s;
75 1.1 mrg mp_limb_t cy, cy2;
76 1.1 mrg mp_ptr asm1;
77 1.1 mrg
78 1.1 mrg #define a0 ap
79 1.1 mrg #define a1 (ap + n)
80 1.1 mrg
81 1.1 mrg s = an >> 1;
82 1.1 mrg n = an - s;
83 1.1 mrg
84 1.1.1.3 mrg ASSERT (0 < s && s <= n && s >= n - 1);
85 1.1 mrg
86 1.1 mrg asm1 = pp;
87 1.1 mrg
88 1.1 mrg /* Compute asm1. */
89 1.1 mrg if (s == n)
90 1.1 mrg {
91 1.1 mrg if (mpn_cmp (a0, a1, n) < 0)
92 1.1 mrg {
93 1.1 mrg mpn_sub_n (asm1, a1, a0, n);
94 1.1 mrg }
95 1.1 mrg else
96 1.1 mrg {
97 1.1 mrg mpn_sub_n (asm1, a0, a1, n);
98 1.1 mrg }
99 1.1 mrg }
100 1.1.1.3 mrg else /* n - s == 1 */
101 1.1 mrg {
102 1.1.1.3 mrg if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
103 1.1 mrg {
104 1.1 mrg mpn_sub_n (asm1, a1, a0, s);
105 1.1.1.3 mrg asm1[s] = 0;
106 1.1 mrg }
107 1.1 mrg else
108 1.1 mrg {
109 1.1.1.3 mrg asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
110 1.1 mrg }
111 1.1 mrg }
112 1.1 mrg
113 1.1 mrg #define v0 pp /* 2n */
114 1.1 mrg #define vinf (pp + 2 * n) /* s+s */
115 1.1 mrg #define vm1 scratch /* 2n */
116 1.1 mrg #define scratch_out scratch + 2 * n
117 1.1 mrg
118 1.1 mrg /* vm1, 2n limbs */
119 1.1 mrg TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
120 1.1 mrg
121 1.1 mrg /* vinf, s+s limbs */
122 1.1 mrg TOOM2_SQR_REC (vinf, a1, s, scratch_out);
123 1.1 mrg
124 1.1 mrg /* v0, 2n limbs */
125 1.1 mrg TOOM2_SQR_REC (v0, ap, n, scratch_out);
126 1.1 mrg
127 1.1 mrg /* H(v0) + L(vinf) */
128 1.1 mrg cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
129 1.1 mrg
130 1.1 mrg /* L(v0) + H(v0) */
131 1.1 mrg cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
132 1.1 mrg
133 1.1 mrg /* L(vinf) + H(vinf) */
134 1.1 mrg cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
135 1.1 mrg
136 1.1 mrg cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
137 1.1 mrg
138 1.1 mrg ASSERT (cy + 1 <= 3);
139 1.1 mrg ASSERT (cy2 <= 2);
140 1.1 mrg
141 1.1.1.3 mrg MPN_INCR_U (pp + 2 * n, s + s, cy2);
142 1.1 mrg if (LIKELY (cy <= 2))
143 1.1.1.3 mrg MPN_INCR_U (pp + 3 * n, s + s - n, cy);
144 1.1 mrg else
145 1.1.1.3 mrg MPN_DECR_U (pp + 3 * n, s + s - n, 1);
146 1.1 mrg }
147