toom2_sqr.c revision 1.1.1.2 1 1.1 mrg /* mpn_toom2_sqr -- Square {ap,an}.
2 1.1 mrg
3 1.1 mrg Contributed to the GNU project by Torbjorn Granlund.
4 1.1 mrg
5 1.1 mrg THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 1.1 mrg SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 1.1 mrg GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 1.1 mrg
9 1.1.1.2 mrg Copyright 2006, 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
15 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
16 1.1 mrg option) any later version.
17 1.1 mrg
18 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
19 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 1.1 mrg License for more details.
22 1.1 mrg
23 1.1 mrg You should have received a copy of the GNU Lesser General Public License
24 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 1.1 mrg
26 1.1 mrg
27 1.1 mrg #include "gmp.h"
28 1.1 mrg #include "gmp-impl.h"
29 1.1 mrg
30 1.1 mrg /* Evaluate in: -1, 0, +inf
31 1.1 mrg
32 1.1 mrg <-s--><--n-->
33 1.1 mrg ____ ______
34 1.1 mrg |_a1_|___a0_|
35 1.1 mrg
36 1.1 mrg v0 = a0 ^2 # A(0)^2
37 1.1 mrg vm1 = (a0- a1)^2 # A(-1)^2
38 1.1 mrg vinf= a1 ^2 # A(inf)^2
39 1.1 mrg */
40 1.1 mrg
41 1.1.1.2 mrg #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
42 1.1 mrg #define MAYBE_sqr_toom2 1
43 1.1 mrg #else
44 1.1 mrg #define MAYBE_sqr_toom2 \
45 1.1 mrg (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
46 1.1 mrg #endif
47 1.1 mrg
48 1.1 mrg #define TOOM2_SQR_REC(p, a, n, ws) \
49 1.1 mrg do { \
50 1.1 mrg if (! MAYBE_sqr_toom2 \
51 1.1 mrg || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \
52 1.1 mrg mpn_sqr_basecase (p, a, n); \
53 1.1 mrg else \
54 1.1 mrg mpn_toom2_sqr (p, a, n, ws); \
55 1.1 mrg } while (0)
56 1.1 mrg
57 1.1 mrg void
58 1.1 mrg mpn_toom2_sqr (mp_ptr pp,
59 1.1 mrg mp_srcptr ap, mp_size_t an,
60 1.1 mrg mp_ptr scratch)
61 1.1 mrg {
62 1.1.1.2 mrg const int __gmpn_cpuvec_initialized = 1;
63 1.1 mrg mp_size_t n, s;
64 1.1 mrg mp_limb_t cy, cy2;
65 1.1 mrg mp_ptr asm1;
66 1.1 mrg
67 1.1 mrg #define a0 ap
68 1.1 mrg #define a1 (ap + n)
69 1.1 mrg
70 1.1 mrg s = an >> 1;
71 1.1 mrg n = an - s;
72 1.1 mrg
73 1.1 mrg ASSERT (0 < s && s <= n);
74 1.1 mrg
75 1.1 mrg asm1 = pp;
76 1.1 mrg
77 1.1 mrg /* Compute asm1. */
78 1.1 mrg if (s == n)
79 1.1 mrg {
80 1.1 mrg if (mpn_cmp (a0, a1, n) < 0)
81 1.1 mrg {
82 1.1 mrg mpn_sub_n (asm1, a1, a0, n);
83 1.1 mrg }
84 1.1 mrg else
85 1.1 mrg {
86 1.1 mrg mpn_sub_n (asm1, a0, a1, n);
87 1.1 mrg }
88 1.1 mrg }
89 1.1 mrg else
90 1.1 mrg {
91 1.1 mrg if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0)
92 1.1 mrg {
93 1.1 mrg mpn_sub_n (asm1, a1, a0, s);
94 1.1 mrg MPN_ZERO (asm1 + s, n - s);
95 1.1 mrg }
96 1.1 mrg else
97 1.1 mrg {
98 1.1 mrg mpn_sub (asm1, a0, n, a1, s);
99 1.1 mrg }
100 1.1 mrg }
101 1.1 mrg
102 1.1 mrg #define v0 pp /* 2n */
103 1.1 mrg #define vinf (pp + 2 * n) /* s+s */
104 1.1 mrg #define vm1 scratch /* 2n */
105 1.1 mrg #define scratch_out scratch + 2 * n
106 1.1 mrg
107 1.1 mrg /* vm1, 2n limbs */
108 1.1 mrg TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
109 1.1 mrg
110 1.1 mrg /* vinf, s+s limbs */
111 1.1 mrg TOOM2_SQR_REC (vinf, a1, s, scratch_out);
112 1.1 mrg
113 1.1 mrg /* v0, 2n limbs */
114 1.1 mrg TOOM2_SQR_REC (v0, ap, n, scratch_out);
115 1.1 mrg
116 1.1 mrg /* H(v0) + L(vinf) */
117 1.1 mrg cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
118 1.1 mrg
119 1.1 mrg /* L(v0) + H(v0) */
120 1.1 mrg cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
121 1.1 mrg
122 1.1 mrg /* L(vinf) + H(vinf) */
123 1.1 mrg cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
124 1.1 mrg
125 1.1 mrg cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
126 1.1 mrg
127 1.1 mrg ASSERT (cy + 1 <= 3);
128 1.1 mrg ASSERT (cy2 <= 2);
129 1.1 mrg
130 1.1 mrg mpn_incr_u (pp + 2 * n, cy2);
131 1.1 mrg if (LIKELY (cy <= 2))
132 1.1 mrg mpn_incr_u (pp + 3 * n, cy);
133 1.1 mrg else
134 1.1 mrg mpn_decr_u (pp + 3 * n, 1);
135 1.1 mrg }
136