toom2_sqr.c revision 1.1.1.4 1 1.1 mrg /* mpn_toom2_sqr -- Square {ap,an}.
2 1.1 mrg
3 1.1 mrg Contributed to the GNU project by Torbjorn Granlund.
4 1.1 mrg
5 1.1 mrg THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 1.1 mrg SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 1.1 mrg GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 1.1 mrg
9 1.1.1.4 mrg Copyright 2006-2010, 2012, 2014, 2018 Free Software Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1.1.3 mrg it under the terms of either:
15 1.1.1.3 mrg
16 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free
17 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your
18 1.1.1.3 mrg option) any later version.
19 1.1.1.3 mrg
20 1.1.1.3 mrg or
21 1.1.1.3 mrg
22 1.1.1.3 mrg * the GNU General Public License as published by the Free Software
23 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any
24 1.1.1.3 mrg later version.
25 1.1.1.3 mrg
26 1.1.1.3 mrg or both in parallel, as here.
27 1.1 mrg
28 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
29 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 1.1.1.3 mrg for more details.
32 1.1 mrg
33 1.1.1.3 mrg You should have received copies of the GNU General Public License and the
34 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
35 1.1.1.3 mrg see https://www.gnu.org/licenses/. */
36 1.1 mrg
37 1.1 mrg
38 1.1 mrg #include "gmp-impl.h"
39 1.1 mrg
40 1.1 mrg /* Evaluate in: -1, 0, +inf
41 1.1 mrg
42 1.1 mrg <-s--><--n-->
43 1.1 mrg ____ ______
44 1.1 mrg |_a1_|___a0_|
45 1.1 mrg
46 1.1 mrg v0 = a0 ^2 # A(0)^2
47 1.1 mrg vm1 = (a0- a1)^2 # A(-1)^2
48 1.1 mrg vinf= a1 ^2 # A(inf)^2
49 1.1 mrg */
50 1.1 mrg
51 1.1.1.2 mrg #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
52 1.1 mrg #define MAYBE_sqr_toom2 1
53 1.1 mrg #else
54 1.1 mrg #define MAYBE_sqr_toom2 \
55 1.1 mrg (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
56 1.1 mrg #endif
57 1.1 mrg
58 1.1 mrg #define TOOM2_SQR_REC(p, a, n, ws) \
59 1.1 mrg do { \
60 1.1 mrg if (! MAYBE_sqr_toom2 \
61 1.1 mrg || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \
62 1.1 mrg mpn_sqr_basecase (p, a, n); \
63 1.1 mrg else \
64 1.1 mrg mpn_toom2_sqr (p, a, n, ws); \
65 1.1 mrg } while (0)
66 1.1 mrg
67 1.1 mrg void
68 1.1 mrg mpn_toom2_sqr (mp_ptr pp,
69 1.1 mrg mp_srcptr ap, mp_size_t an,
70 1.1 mrg mp_ptr scratch)
71 1.1 mrg {
72 1.1.1.2 mrg const int __gmpn_cpuvec_initialized = 1;
73 1.1 mrg mp_size_t n, s;
74 1.1 mrg mp_limb_t cy, cy2;
75 1.1 mrg mp_ptr asm1;
76 1.1 mrg
77 1.1 mrg #define a0 ap
78 1.1 mrg #define a1 (ap + n)
79 1.1 mrg
80 1.1 mrg s = an >> 1;
81 1.1 mrg n = an - s;
82 1.1 mrg
83 1.1.1.3 mrg ASSERT (0 < s && s <= n && s >= n - 1);
84 1.1 mrg
85 1.1 mrg asm1 = pp;
86 1.1 mrg
87 1.1 mrg /* Compute asm1. */
88 1.1 mrg if (s == n)
89 1.1 mrg {
90 1.1 mrg if (mpn_cmp (a0, a1, n) < 0)
91 1.1 mrg {
92 1.1 mrg mpn_sub_n (asm1, a1, a0, n);
93 1.1 mrg }
94 1.1 mrg else
95 1.1 mrg {
96 1.1 mrg mpn_sub_n (asm1, a0, a1, n);
97 1.1 mrg }
98 1.1 mrg }
99 1.1.1.3 mrg else /* n - s == 1 */
100 1.1 mrg {
101 1.1.1.3 mrg if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
102 1.1 mrg {
103 1.1 mrg mpn_sub_n (asm1, a1, a0, s);
104 1.1.1.3 mrg asm1[s] = 0;
105 1.1 mrg }
106 1.1 mrg else
107 1.1 mrg {
108 1.1.1.3 mrg asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
109 1.1 mrg }
110 1.1 mrg }
111 1.1 mrg
112 1.1 mrg #define v0 pp /* 2n */
113 1.1 mrg #define vinf (pp + 2 * n) /* s+s */
114 1.1 mrg #define vm1 scratch /* 2n */
115 1.1 mrg #define scratch_out scratch + 2 * n
116 1.1 mrg
117 1.1 mrg /* vm1, 2n limbs */
118 1.1 mrg TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
119 1.1 mrg
120 1.1 mrg /* vinf, s+s limbs */
121 1.1 mrg TOOM2_SQR_REC (vinf, a1, s, scratch_out);
122 1.1 mrg
123 1.1 mrg /* v0, 2n limbs */
124 1.1 mrg TOOM2_SQR_REC (v0, ap, n, scratch_out);
125 1.1 mrg
126 1.1 mrg /* H(v0) + L(vinf) */
127 1.1 mrg cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
128 1.1 mrg
129 1.1 mrg /* L(v0) + H(v0) */
130 1.1 mrg cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
131 1.1 mrg
132 1.1 mrg /* L(vinf) + H(vinf) */
133 1.1 mrg cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
134 1.1 mrg
135 1.1 mrg cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
136 1.1 mrg
137 1.1.1.4 mrg ASSERT (cy + 1 <= 3);
138 1.1 mrg ASSERT (cy2 <= 2);
139 1.1 mrg
140 1.1.1.4 mrg if (LIKELY (cy <= 2)) {
141 1.1.1.4 mrg MPN_INCR_U (pp + 2 * n, s + s, cy2);
142 1.1.1.3 mrg MPN_INCR_U (pp + 3 * n, s + s - n, cy);
143 1.1.1.4 mrg } else { /* cy is negative */
144 1.1.1.4 mrg /* The total contribution of v0+vinf-vm1 can not be negative. */
145 1.1.1.4 mrg #if WANT_ASSERT
146 1.1.1.4 mrg /* The borrow in cy stops the propagation of the carry cy2, */
147 1.1.1.4 mrg ASSERT (cy2 == 1);
148 1.1.1.4 mrg cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
149 1.1.1.4 mrg ASSERT (cy == 0);
150 1.1.1.4 mrg #else
151 1.1.1.4 mrg /* we simply fill the area with zeros. */
152 1.1.1.4 mrg MPN_FILL (pp + 2 * n, n, 0);
153 1.1.1.4 mrg #endif
154 1.1.1.4 mrg }
155 1.1 mrg }
156