sqr_basecase.c revision 1.1.1.2 1 1.1 mrg /* mpn_sqr_basecase -- Internal routine to square a natural number
2 1.1 mrg of length n.
3 1.1 mrg
4 1.1 mrg THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 1.1 mrg SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6 1.1 mrg
7 1.1 mrg
8 1.1 mrg Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
9 1.1.1.2 mrg 2005, 2008, 2010, 2011 Free Software Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
15 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
16 1.1 mrg option) any later version.
17 1.1 mrg
18 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
19 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 1.1 mrg License for more details.
22 1.1 mrg
23 1.1 mrg You should have received a copy of the GNU Lesser General Public License
24 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 1.1 mrg
26 1.1 mrg #include "gmp.h"
27 1.1 mrg #include "gmp-impl.h"
28 1.1 mrg #include "longlong.h"
29 1.1 mrg
30 1.1 mrg
31 1.1 mrg #if HAVE_NATIVE_mpn_sqr_diagonal
32 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \
33 1.1 mrg mpn_sqr_diagonal (rp, up, n)
34 1.1 mrg #else
35 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \
36 1.1 mrg do { \
37 1.1 mrg mp_size_t _i; \
38 1.1 mrg for (_i = 0; _i < (n); _i++) \
39 1.1 mrg { \
40 1.1 mrg mp_limb_t ul, lpl; \
41 1.1 mrg ul = (up)[_i]; \
42 1.1 mrg umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
43 1.1 mrg (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
44 1.1 mrg } \
45 1.1 mrg } while (0)
46 1.1 mrg #endif
47 1.1 mrg
48 1.1.1.2 mrg #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
49 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
50 1.1.1.2 mrg mpn_sqr_diag_addlsh1 (rp, tp, up, n)
51 1.1.1.2 mrg #else
52 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh1_n
53 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
54 1.1.1.2 mrg do { \
55 1.1.1.2 mrg mp_limb_t cy; \
56 1.1.1.2 mrg MPN_SQR_DIAGONAL (rp, up, n); \
57 1.1.1.2 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
58 1.1.1.2 mrg rp[2 * n - 1] += cy; \
59 1.1.1.2 mrg } while (0)
60 1.1.1.2 mrg #else
61 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
62 1.1.1.2 mrg do { \
63 1.1.1.2 mrg mp_limb_t cy; \
64 1.1.1.2 mrg MPN_SQR_DIAGONAL (rp, up, n); \
65 1.1.1.2 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
66 1.1.1.2 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
67 1.1.1.2 mrg rp[2 * n - 1] += cy; \
68 1.1.1.2 mrg } while (0)
69 1.1.1.2 mrg #endif
70 1.1.1.2 mrg #endif
71 1.1.1.2 mrg
72 1.1 mrg
73 1.1 mrg #undef READY_WITH_mpn_sqr_basecase
74 1.1 mrg
75 1.1 mrg
76 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
77 1.1 mrg void
78 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
79 1.1 mrg {
80 1.1 mrg mp_size_t i;
81 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
82 1.1 mrg mp_ptr tp = tarr;
83 1.1 mrg mp_limb_t cy;
84 1.1 mrg
85 1.1 mrg /* must fit 2*n limbs in tarr */
86 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
87 1.1 mrg
88 1.1 mrg if ((n & 1) != 0)
89 1.1 mrg {
90 1.1 mrg if (n == 1)
91 1.1 mrg {
92 1.1 mrg mp_limb_t ul, lpl;
93 1.1 mrg ul = up[0];
94 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
95 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
96 1.1 mrg return;
97 1.1 mrg }
98 1.1 mrg
99 1.1 mrg MPN_ZERO (tp, n);
100 1.1 mrg
101 1.1 mrg for (i = 0; i <= n - 2; i += 2)
102 1.1 mrg {
103 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
104 1.1 mrg tp[n + i] = cy;
105 1.1 mrg }
106 1.1 mrg }
107 1.1 mrg else
108 1.1 mrg {
109 1.1 mrg if (n == 2)
110 1.1 mrg {
111 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_2
112 1.1.1.2 mrg rp[3] = mpn_mul_2 (rp, up, 2, up);
113 1.1.1.2 mrg #else
114 1.1 mrg rp[0] = 0;
115 1.1 mrg rp[1] = 0;
116 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up);
117 1.1.1.2 mrg #endif
118 1.1 mrg return;
119 1.1 mrg }
120 1.1 mrg
121 1.1 mrg MPN_ZERO (tp, n);
122 1.1 mrg
123 1.1 mrg for (i = 0; i <= n - 4; i += 2)
124 1.1 mrg {
125 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
126 1.1 mrg tp[n + i] = cy;
127 1.1 mrg }
128 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
129 1.1 mrg tp[2 * n - 3] = cy;
130 1.1 mrg }
131 1.1 mrg
132 1.1.1.2 mrg MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
133 1.1 mrg }
134 1.1 mrg #define READY_WITH_mpn_sqr_basecase
135 1.1 mrg #endif
136 1.1 mrg
137 1.1 mrg
138 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
139 1.1 mrg
140 1.1 mrg /* mpn_sqr_basecase using plain mpn_addmul_2.
141 1.1 mrg
142 1.1 mrg This is tricky, since we have to let mpn_addmul_2 make some undesirable
143 1.1 mrg multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
144 1.1 mrg This forces us to conditionally add or subtract the mpn_sqr_diagonal
145 1.1 mrg results. Examples of the product we form:
146 1.1 mrg
147 1.1 mrg n = 4 n = 5 n = 6
148 1.1 mrg u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
149 1.1 mrg u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
150 1.1 mrg u4 * u5
151 1.1 mrg add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
152 1.1 mrg sub: u1 sub: u1 u3 sub: u1 u3
153 1.1 mrg */
154 1.1 mrg
155 1.1 mrg void
156 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
157 1.1 mrg {
158 1.1 mrg mp_size_t i;
159 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
160 1.1 mrg mp_ptr tp = tarr;
161 1.1 mrg mp_limb_t cy;
162 1.1 mrg
163 1.1 mrg /* must fit 2*n limbs in tarr */
164 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
165 1.1 mrg
166 1.1 mrg if ((n & 1) != 0)
167 1.1 mrg {
168 1.1 mrg mp_limb_t x0, x1;
169 1.1 mrg
170 1.1 mrg if (n == 1)
171 1.1 mrg {
172 1.1 mrg mp_limb_t ul, lpl;
173 1.1 mrg ul = up[0];
174 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
175 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
176 1.1 mrg return;
177 1.1 mrg }
178 1.1 mrg
179 1.1 mrg /* The code below doesn't like unnormalized operands. Since such
180 1.1 mrg operands are unusual, handle them with a dumb recursion. */
181 1.1 mrg if (up[n - 1] == 0)
182 1.1 mrg {
183 1.1 mrg rp[2 * n - 2] = 0;
184 1.1 mrg rp[2 * n - 1] = 0;
185 1.1 mrg mpn_sqr_basecase (rp, up, n - 1);
186 1.1 mrg return;
187 1.1 mrg }
188 1.1 mrg
189 1.1 mrg MPN_ZERO (tp, n);
190 1.1 mrg
191 1.1 mrg for (i = 0; i <= n - 2; i += 2)
192 1.1 mrg {
193 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
194 1.1 mrg tp[n + i] = cy;
195 1.1 mrg }
196 1.1 mrg
197 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n);
198 1.1 mrg
199 1.1 mrg for (i = 2;; i += 4)
200 1.1 mrg {
201 1.1 mrg x0 = rp[i + 0];
202 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK;
203 1.1 mrg x1 = rp[i + 1];
204 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
205 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
206 1.1 mrg if (i + 4 >= 2 * n)
207 1.1 mrg break;
208 1.1 mrg mpn_incr_u (rp + i + 4, cy);
209 1.1 mrg }
210 1.1 mrg }
211 1.1 mrg else
212 1.1 mrg {
213 1.1 mrg mp_limb_t x0, x1;
214 1.1 mrg
215 1.1 mrg if (n == 2)
216 1.1 mrg {
217 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_2
218 1.1.1.2 mrg rp[3] = mpn_mul_2 (rp, up, 2, up);
219 1.1.1.2 mrg #else
220 1.1 mrg rp[0] = 0;
221 1.1 mrg rp[1] = 0;
222 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up);
223 1.1.1.2 mrg #endif
224 1.1 mrg return;
225 1.1 mrg }
226 1.1 mrg
227 1.1 mrg /* The code below doesn't like unnormalized operands. Since such
228 1.1 mrg operands are unusual, handle them with a dumb recursion. */
229 1.1 mrg if (up[n - 1] == 0)
230 1.1 mrg {
231 1.1 mrg rp[2 * n - 2] = 0;
232 1.1 mrg rp[2 * n - 1] = 0;
233 1.1 mrg mpn_sqr_basecase (rp, up, n - 1);
234 1.1 mrg return;
235 1.1 mrg }
236 1.1 mrg
237 1.1 mrg MPN_ZERO (tp, n);
238 1.1 mrg
239 1.1 mrg for (i = 0; i <= n - 4; i += 2)
240 1.1 mrg {
241 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
242 1.1 mrg tp[n + i] = cy;
243 1.1 mrg }
244 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
245 1.1 mrg tp[2 * n - 3] = cy;
246 1.1 mrg
247 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n);
248 1.1 mrg
249 1.1 mrg for (i = 2;; i += 4)
250 1.1 mrg {
251 1.1 mrg x0 = rp[i + 0];
252 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK;
253 1.1 mrg x1 = rp[i + 1];
254 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
255 1.1 mrg if (i + 6 >= 2 * n)
256 1.1 mrg break;
257 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
258 1.1 mrg mpn_incr_u (rp + i + 4, cy);
259 1.1 mrg }
260 1.1 mrg mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
261 1.1 mrg }
262 1.1 mrg
263 1.1 mrg #if HAVE_NATIVE_mpn_addlsh1_n
264 1.1 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
265 1.1 mrg #else
266 1.1 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
267 1.1 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
268 1.1 mrg #endif
269 1.1 mrg rp[2 * n - 1] += cy;
270 1.1 mrg }
271 1.1 mrg #define READY_WITH_mpn_sqr_basecase
272 1.1 mrg #endif
273 1.1 mrg
274 1.1 mrg
275 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase)
276 1.1 mrg
277 1.1 mrg /* Default mpn_sqr_basecase using mpn_addmul_1. */
278 1.1 mrg
279 1.1 mrg void
280 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
281 1.1 mrg {
282 1.1 mrg mp_size_t i;
283 1.1 mrg
284 1.1 mrg ASSERT (n >= 1);
285 1.1 mrg ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
286 1.1 mrg
287 1.1 mrg {
288 1.1 mrg mp_limb_t ul, lpl;
289 1.1 mrg ul = up[0];
290 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
291 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
292 1.1 mrg }
293 1.1 mrg if (n > 1)
294 1.1 mrg {
295 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
296 1.1 mrg mp_ptr tp = tarr;
297 1.1 mrg mp_limb_t cy;
298 1.1 mrg
299 1.1 mrg /* must fit 2*n limbs in tarr */
300 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
301 1.1 mrg
302 1.1 mrg cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
303 1.1 mrg tp[n - 1] = cy;
304 1.1 mrg for (i = 2; i < n; i++)
305 1.1 mrg {
306 1.1 mrg mp_limb_t cy;
307 1.1 mrg cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
308 1.1 mrg tp[n + i - 2] = cy;
309 1.1 mrg }
310 1.1 mrg
311 1.1.1.2 mrg MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
312 1.1 mrg }
313 1.1 mrg }
314 1.1 mrg #endif
315