sqr_basecase.c revision 1.1.1.3 1 1.1 mrg /* mpn_sqr_basecase -- Internal routine to square a natural number
2 1.1 mrg of length n.
3 1.1 mrg
4 1.1 mrg THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 1.1 mrg SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6 1.1 mrg
7 1.1 mrg
8 1.1.1.3 mrg Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011 Free Software
9 1.1.1.3 mrg Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1.1.3 mrg it under the terms of either:
15 1.1.1.3 mrg
16 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free
17 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your
18 1.1.1.3 mrg option) any later version.
19 1.1.1.3 mrg
20 1.1.1.3 mrg or
21 1.1.1.3 mrg
22 1.1.1.3 mrg * the GNU General Public License as published by the Free Software
23 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any
24 1.1.1.3 mrg later version.
25 1.1.1.3 mrg
26 1.1.1.3 mrg or both in parallel, as here.
27 1.1 mrg
28 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
29 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 1.1.1.3 mrg for more details.
32 1.1 mrg
33 1.1.1.3 mrg You should have received copies of the GNU General Public License and the
34 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
35 1.1.1.3 mrg see https://www.gnu.org/licenses/. */
36 1.1 mrg
37 1.1 mrg #include "gmp.h"
38 1.1 mrg #include "gmp-impl.h"
39 1.1 mrg #include "longlong.h"
40 1.1 mrg
41 1.1 mrg
42 1.1 mrg #if HAVE_NATIVE_mpn_sqr_diagonal
43 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \
44 1.1 mrg mpn_sqr_diagonal (rp, up, n)
45 1.1 mrg #else
46 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \
47 1.1 mrg do { \
48 1.1 mrg mp_size_t _i; \
49 1.1 mrg for (_i = 0; _i < (n); _i++) \
50 1.1 mrg { \
51 1.1 mrg mp_limb_t ul, lpl; \
52 1.1 mrg ul = (up)[_i]; \
53 1.1 mrg umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
54 1.1 mrg (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
55 1.1 mrg } \
56 1.1 mrg } while (0)
57 1.1 mrg #endif
58 1.1 mrg
59 1.1.1.2 mrg #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
60 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
61 1.1.1.2 mrg mpn_sqr_diag_addlsh1 (rp, tp, up, n)
62 1.1.1.2 mrg #else
63 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh1_n
64 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
65 1.1.1.2 mrg do { \
66 1.1.1.2 mrg mp_limb_t cy; \
67 1.1.1.2 mrg MPN_SQR_DIAGONAL (rp, up, n); \
68 1.1.1.2 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
69 1.1.1.2 mrg rp[2 * n - 1] += cy; \
70 1.1.1.2 mrg } while (0)
71 1.1.1.2 mrg #else
72 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
73 1.1.1.2 mrg do { \
74 1.1.1.2 mrg mp_limb_t cy; \
75 1.1.1.2 mrg MPN_SQR_DIAGONAL (rp, up, n); \
76 1.1.1.2 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
77 1.1.1.2 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
78 1.1.1.2 mrg rp[2 * n - 1] += cy; \
79 1.1.1.2 mrg } while (0)
80 1.1.1.2 mrg #endif
81 1.1.1.2 mrg #endif
82 1.1.1.2 mrg
83 1.1 mrg
84 1.1 mrg #undef READY_WITH_mpn_sqr_basecase
85 1.1 mrg
86 1.1 mrg
87 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
88 1.1 mrg void
89 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
90 1.1 mrg {
91 1.1 mrg mp_size_t i;
92 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
93 1.1 mrg mp_ptr tp = tarr;
94 1.1 mrg mp_limb_t cy;
95 1.1 mrg
96 1.1 mrg /* must fit 2*n limbs in tarr */
97 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
98 1.1 mrg
99 1.1 mrg if ((n & 1) != 0)
100 1.1 mrg {
101 1.1 mrg if (n == 1)
102 1.1 mrg {
103 1.1 mrg mp_limb_t ul, lpl;
104 1.1 mrg ul = up[0];
105 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
106 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
107 1.1 mrg return;
108 1.1 mrg }
109 1.1 mrg
110 1.1 mrg MPN_ZERO (tp, n);
111 1.1 mrg
112 1.1 mrg for (i = 0; i <= n - 2; i += 2)
113 1.1 mrg {
114 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
115 1.1 mrg tp[n + i] = cy;
116 1.1 mrg }
117 1.1 mrg }
118 1.1 mrg else
119 1.1 mrg {
120 1.1 mrg if (n == 2)
121 1.1 mrg {
122 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_2
123 1.1.1.2 mrg rp[3] = mpn_mul_2 (rp, up, 2, up);
124 1.1.1.2 mrg #else
125 1.1 mrg rp[0] = 0;
126 1.1 mrg rp[1] = 0;
127 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up);
128 1.1.1.2 mrg #endif
129 1.1 mrg return;
130 1.1 mrg }
131 1.1 mrg
132 1.1 mrg MPN_ZERO (tp, n);
133 1.1 mrg
134 1.1 mrg for (i = 0; i <= n - 4; i += 2)
135 1.1 mrg {
136 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
137 1.1 mrg tp[n + i] = cy;
138 1.1 mrg }
139 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
140 1.1 mrg tp[2 * n - 3] = cy;
141 1.1 mrg }
142 1.1 mrg
143 1.1.1.2 mrg MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
144 1.1 mrg }
145 1.1 mrg #define READY_WITH_mpn_sqr_basecase
146 1.1 mrg #endif
147 1.1 mrg
148 1.1 mrg
149 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
150 1.1 mrg
151 1.1 mrg /* mpn_sqr_basecase using plain mpn_addmul_2.
152 1.1 mrg
153 1.1 mrg This is tricky, since we have to let mpn_addmul_2 make some undesirable
154 1.1 mrg multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
155 1.1 mrg This forces us to conditionally add or subtract the mpn_sqr_diagonal
156 1.1 mrg results. Examples of the product we form:
157 1.1 mrg
158 1.1 mrg n = 4 n = 5 n = 6
159 1.1 mrg u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
160 1.1 mrg u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
161 1.1 mrg u4 * u5
162 1.1 mrg add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
163 1.1 mrg sub: u1 sub: u1 u3 sub: u1 u3
164 1.1 mrg */
165 1.1 mrg
166 1.1 mrg void
167 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
168 1.1 mrg {
169 1.1 mrg mp_size_t i;
170 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
171 1.1 mrg mp_ptr tp = tarr;
172 1.1 mrg mp_limb_t cy;
173 1.1 mrg
174 1.1 mrg /* must fit 2*n limbs in tarr */
175 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
176 1.1 mrg
177 1.1 mrg if ((n & 1) != 0)
178 1.1 mrg {
179 1.1 mrg mp_limb_t x0, x1;
180 1.1 mrg
181 1.1 mrg if (n == 1)
182 1.1 mrg {
183 1.1 mrg mp_limb_t ul, lpl;
184 1.1 mrg ul = up[0];
185 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
186 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
187 1.1 mrg return;
188 1.1 mrg }
189 1.1 mrg
190 1.1 mrg /* The code below doesn't like unnormalized operands. Since such
191 1.1 mrg operands are unusual, handle them with a dumb recursion. */
192 1.1 mrg if (up[n - 1] == 0)
193 1.1 mrg {
194 1.1 mrg rp[2 * n - 2] = 0;
195 1.1 mrg rp[2 * n - 1] = 0;
196 1.1 mrg mpn_sqr_basecase (rp, up, n - 1);
197 1.1 mrg return;
198 1.1 mrg }
199 1.1 mrg
200 1.1 mrg MPN_ZERO (tp, n);
201 1.1 mrg
202 1.1 mrg for (i = 0; i <= n - 2; i += 2)
203 1.1 mrg {
204 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
205 1.1 mrg tp[n + i] = cy;
206 1.1 mrg }
207 1.1 mrg
208 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n);
209 1.1 mrg
210 1.1 mrg for (i = 2;; i += 4)
211 1.1 mrg {
212 1.1 mrg x0 = rp[i + 0];
213 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK;
214 1.1 mrg x1 = rp[i + 1];
215 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
216 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
217 1.1 mrg if (i + 4 >= 2 * n)
218 1.1 mrg break;
219 1.1 mrg mpn_incr_u (rp + i + 4, cy);
220 1.1 mrg }
221 1.1 mrg }
222 1.1 mrg else
223 1.1 mrg {
224 1.1 mrg mp_limb_t x0, x1;
225 1.1 mrg
226 1.1 mrg if (n == 2)
227 1.1 mrg {
228 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_2
229 1.1.1.2 mrg rp[3] = mpn_mul_2 (rp, up, 2, up);
230 1.1.1.2 mrg #else
231 1.1 mrg rp[0] = 0;
232 1.1 mrg rp[1] = 0;
233 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up);
234 1.1.1.2 mrg #endif
235 1.1 mrg return;
236 1.1 mrg }
237 1.1 mrg
238 1.1 mrg /* The code below doesn't like unnormalized operands. Since such
239 1.1 mrg operands are unusual, handle them with a dumb recursion. */
240 1.1 mrg if (up[n - 1] == 0)
241 1.1 mrg {
242 1.1 mrg rp[2 * n - 2] = 0;
243 1.1 mrg rp[2 * n - 1] = 0;
244 1.1 mrg mpn_sqr_basecase (rp, up, n - 1);
245 1.1 mrg return;
246 1.1 mrg }
247 1.1 mrg
248 1.1 mrg MPN_ZERO (tp, n);
249 1.1 mrg
250 1.1 mrg for (i = 0; i <= n - 4; i += 2)
251 1.1 mrg {
252 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
253 1.1 mrg tp[n + i] = cy;
254 1.1 mrg }
255 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
256 1.1 mrg tp[2 * n - 3] = cy;
257 1.1 mrg
258 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n);
259 1.1 mrg
260 1.1 mrg for (i = 2;; i += 4)
261 1.1 mrg {
262 1.1 mrg x0 = rp[i + 0];
263 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK;
264 1.1 mrg x1 = rp[i + 1];
265 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
266 1.1 mrg if (i + 6 >= 2 * n)
267 1.1 mrg break;
268 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
269 1.1 mrg mpn_incr_u (rp + i + 4, cy);
270 1.1 mrg }
271 1.1 mrg mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
272 1.1 mrg }
273 1.1 mrg
274 1.1 mrg #if HAVE_NATIVE_mpn_addlsh1_n
275 1.1 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
276 1.1 mrg #else
277 1.1 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
278 1.1 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
279 1.1 mrg #endif
280 1.1 mrg rp[2 * n - 1] += cy;
281 1.1 mrg }
282 1.1 mrg #define READY_WITH_mpn_sqr_basecase
283 1.1 mrg #endif
284 1.1 mrg
285 1.1 mrg
286 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase)
287 1.1 mrg
288 1.1 mrg /* Default mpn_sqr_basecase using mpn_addmul_1. */
289 1.1 mrg
290 1.1 mrg void
291 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
292 1.1 mrg {
293 1.1 mrg mp_size_t i;
294 1.1 mrg
295 1.1 mrg ASSERT (n >= 1);
296 1.1 mrg ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
297 1.1 mrg
298 1.1 mrg {
299 1.1 mrg mp_limb_t ul, lpl;
300 1.1 mrg ul = up[0];
301 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
302 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
303 1.1 mrg }
304 1.1 mrg if (n > 1)
305 1.1 mrg {
306 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
307 1.1 mrg mp_ptr tp = tarr;
308 1.1 mrg mp_limb_t cy;
309 1.1 mrg
310 1.1 mrg /* must fit 2*n limbs in tarr */
311 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
312 1.1 mrg
313 1.1 mrg cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
314 1.1 mrg tp[n - 1] = cy;
315 1.1 mrg for (i = 2; i < n; i++)
316 1.1 mrg {
317 1.1 mrg mp_limb_t cy;
318 1.1 mrg cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
319 1.1 mrg tp[n + i - 2] = cy;
320 1.1 mrg }
321 1.1 mrg
322 1.1.1.2 mrg MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
323 1.1 mrg }
324 1.1 mrg }
325 1.1 mrg #endif
326