sqr_basecase.c revision 1.1.1.4 1 1.1 mrg /* mpn_sqr_basecase -- Internal routine to square a natural number
2 1.1 mrg of length n.
3 1.1 mrg
4 1.1 mrg THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 1.1 mrg SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6 1.1 mrg
7 1.1 mrg
8 1.1.1.4 mrg Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free
9 1.1.1.4 mrg Software Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1.1.3 mrg it under the terms of either:
15 1.1.1.3 mrg
16 1.1.1.3 mrg * the GNU Lesser General Public License as published by the Free
17 1.1.1.3 mrg Software Foundation; either version 3 of the License, or (at your
18 1.1.1.3 mrg option) any later version.
19 1.1.1.3 mrg
20 1.1.1.3 mrg or
21 1.1.1.3 mrg
22 1.1.1.3 mrg * the GNU General Public License as published by the Free Software
23 1.1.1.3 mrg Foundation; either version 2 of the License, or (at your option) any
24 1.1.1.3 mrg later version.
25 1.1.1.3 mrg
26 1.1.1.3 mrg or both in parallel, as here.
27 1.1 mrg
28 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
29 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 1.1.1.3 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 1.1.1.3 mrg for more details.
32 1.1 mrg
33 1.1.1.3 mrg You should have received copies of the GNU General Public License and the
34 1.1.1.3 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
35 1.1.1.3 mrg see https://www.gnu.org/licenses/. */
36 1.1 mrg
37 1.1 mrg #include "gmp-impl.h"
38 1.1 mrg #include "longlong.h"
39 1.1 mrg
40 1.1 mrg
41 1.1 mrg #if HAVE_NATIVE_mpn_sqr_diagonal
42 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \
43 1.1 mrg mpn_sqr_diagonal (rp, up, n)
44 1.1 mrg #else
45 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \
46 1.1 mrg do { \
47 1.1 mrg mp_size_t _i; \
48 1.1 mrg for (_i = 0; _i < (n); _i++) \
49 1.1 mrg { \
50 1.1 mrg mp_limb_t ul, lpl; \
51 1.1 mrg ul = (up)[_i]; \
52 1.1 mrg umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
53 1.1 mrg (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
54 1.1 mrg } \
55 1.1 mrg } while (0)
56 1.1 mrg #endif
57 1.1 mrg
58 1.1.1.2 mrg #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
59 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
60 1.1.1.2 mrg mpn_sqr_diag_addlsh1 (rp, tp, up, n)
61 1.1.1.2 mrg #else
62 1.1.1.2 mrg #if HAVE_NATIVE_mpn_addlsh1_n
63 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
64 1.1.1.2 mrg do { \
65 1.1.1.2 mrg mp_limb_t cy; \
66 1.1.1.2 mrg MPN_SQR_DIAGONAL (rp, up, n); \
67 1.1.1.2 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
68 1.1.1.2 mrg rp[2 * n - 1] += cy; \
69 1.1.1.2 mrg } while (0)
70 1.1.1.2 mrg #else
71 1.1.1.2 mrg #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
72 1.1.1.2 mrg do { \
73 1.1.1.2 mrg mp_limb_t cy; \
74 1.1.1.2 mrg MPN_SQR_DIAGONAL (rp, up, n); \
75 1.1.1.2 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
76 1.1.1.2 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
77 1.1.1.2 mrg rp[2 * n - 1] += cy; \
78 1.1.1.2 mrg } while (0)
79 1.1.1.2 mrg #endif
80 1.1.1.2 mrg #endif
81 1.1.1.2 mrg
82 1.1 mrg
83 1.1 mrg #undef READY_WITH_mpn_sqr_basecase
84 1.1 mrg
85 1.1 mrg
86 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
87 1.1 mrg void
88 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
89 1.1 mrg {
90 1.1 mrg mp_size_t i;
91 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
92 1.1 mrg mp_ptr tp = tarr;
93 1.1 mrg mp_limb_t cy;
94 1.1 mrg
95 1.1 mrg /* must fit 2*n limbs in tarr */
96 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
97 1.1 mrg
98 1.1 mrg if ((n & 1) != 0)
99 1.1 mrg {
100 1.1 mrg if (n == 1)
101 1.1 mrg {
102 1.1 mrg mp_limb_t ul, lpl;
103 1.1 mrg ul = up[0];
104 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
105 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
106 1.1 mrg return;
107 1.1 mrg }
108 1.1 mrg
109 1.1 mrg MPN_ZERO (tp, n);
110 1.1 mrg
111 1.1 mrg for (i = 0; i <= n - 2; i += 2)
112 1.1 mrg {
113 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
114 1.1 mrg tp[n + i] = cy;
115 1.1 mrg }
116 1.1 mrg }
117 1.1 mrg else
118 1.1 mrg {
119 1.1 mrg if (n == 2)
120 1.1 mrg {
121 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_2
122 1.1.1.2 mrg rp[3] = mpn_mul_2 (rp, up, 2, up);
123 1.1.1.2 mrg #else
124 1.1 mrg rp[0] = 0;
125 1.1 mrg rp[1] = 0;
126 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up);
127 1.1.1.2 mrg #endif
128 1.1 mrg return;
129 1.1 mrg }
130 1.1 mrg
131 1.1 mrg MPN_ZERO (tp, n);
132 1.1 mrg
133 1.1 mrg for (i = 0; i <= n - 4; i += 2)
134 1.1 mrg {
135 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
136 1.1 mrg tp[n + i] = cy;
137 1.1 mrg }
138 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
139 1.1 mrg tp[2 * n - 3] = cy;
140 1.1 mrg }
141 1.1 mrg
142 1.1.1.2 mrg MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
143 1.1 mrg }
144 1.1 mrg #define READY_WITH_mpn_sqr_basecase
145 1.1 mrg #endif
146 1.1 mrg
147 1.1 mrg
148 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
149 1.1 mrg
150 1.1 mrg /* mpn_sqr_basecase using plain mpn_addmul_2.
151 1.1 mrg
152 1.1 mrg This is tricky, since we have to let mpn_addmul_2 make some undesirable
153 1.1 mrg multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
154 1.1 mrg This forces us to conditionally add or subtract the mpn_sqr_diagonal
155 1.1 mrg results. Examples of the product we form:
156 1.1 mrg
157 1.1 mrg n = 4 n = 5 n = 6
158 1.1 mrg u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
159 1.1 mrg u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
160 1.1 mrg u4 * u5
161 1.1 mrg add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
162 1.1 mrg sub: u1 sub: u1 u3 sub: u1 u3
163 1.1 mrg */
164 1.1 mrg
165 1.1 mrg void
166 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
167 1.1 mrg {
168 1.1 mrg mp_size_t i;
169 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
170 1.1 mrg mp_ptr tp = tarr;
171 1.1 mrg mp_limb_t cy;
172 1.1 mrg
173 1.1 mrg /* must fit 2*n limbs in tarr */
174 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
175 1.1 mrg
176 1.1 mrg if ((n & 1) != 0)
177 1.1 mrg {
178 1.1 mrg mp_limb_t x0, x1;
179 1.1 mrg
180 1.1 mrg if (n == 1)
181 1.1 mrg {
182 1.1 mrg mp_limb_t ul, lpl;
183 1.1 mrg ul = up[0];
184 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
185 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
186 1.1 mrg return;
187 1.1 mrg }
188 1.1 mrg
189 1.1 mrg /* The code below doesn't like unnormalized operands. Since such
190 1.1 mrg operands are unusual, handle them with a dumb recursion. */
191 1.1 mrg if (up[n - 1] == 0)
192 1.1 mrg {
193 1.1 mrg rp[2 * n - 2] = 0;
194 1.1 mrg rp[2 * n - 1] = 0;
195 1.1 mrg mpn_sqr_basecase (rp, up, n - 1);
196 1.1 mrg return;
197 1.1 mrg }
198 1.1 mrg
199 1.1 mrg MPN_ZERO (tp, n);
200 1.1 mrg
201 1.1 mrg for (i = 0; i <= n - 2; i += 2)
202 1.1 mrg {
203 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
204 1.1 mrg tp[n + i] = cy;
205 1.1 mrg }
206 1.1 mrg
207 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n);
208 1.1 mrg
209 1.1 mrg for (i = 2;; i += 4)
210 1.1 mrg {
211 1.1 mrg x0 = rp[i + 0];
212 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK;
213 1.1 mrg x1 = rp[i + 1];
214 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
215 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
216 1.1 mrg if (i + 4 >= 2 * n)
217 1.1 mrg break;
218 1.1 mrg mpn_incr_u (rp + i + 4, cy);
219 1.1 mrg }
220 1.1 mrg }
221 1.1 mrg else
222 1.1 mrg {
223 1.1 mrg mp_limb_t x0, x1;
224 1.1 mrg
225 1.1 mrg if (n == 2)
226 1.1 mrg {
227 1.1.1.2 mrg #if HAVE_NATIVE_mpn_mul_2
228 1.1.1.2 mrg rp[3] = mpn_mul_2 (rp, up, 2, up);
229 1.1.1.2 mrg #else
230 1.1 mrg rp[0] = 0;
231 1.1 mrg rp[1] = 0;
232 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up);
233 1.1.1.2 mrg #endif
234 1.1 mrg return;
235 1.1 mrg }
236 1.1 mrg
237 1.1 mrg /* The code below doesn't like unnormalized operands. Since such
238 1.1 mrg operands are unusual, handle them with a dumb recursion. */
239 1.1 mrg if (up[n - 1] == 0)
240 1.1 mrg {
241 1.1 mrg rp[2 * n - 2] = 0;
242 1.1 mrg rp[2 * n - 1] = 0;
243 1.1 mrg mpn_sqr_basecase (rp, up, n - 1);
244 1.1 mrg return;
245 1.1 mrg }
246 1.1 mrg
247 1.1 mrg MPN_ZERO (tp, n);
248 1.1 mrg
249 1.1 mrg for (i = 0; i <= n - 4; i += 2)
250 1.1 mrg {
251 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
252 1.1 mrg tp[n + i] = cy;
253 1.1 mrg }
254 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
255 1.1 mrg tp[2 * n - 3] = cy;
256 1.1 mrg
257 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n);
258 1.1 mrg
259 1.1 mrg for (i = 2;; i += 4)
260 1.1 mrg {
261 1.1 mrg x0 = rp[i + 0];
262 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK;
263 1.1 mrg x1 = rp[i + 1];
264 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
265 1.1 mrg if (i + 6 >= 2 * n)
266 1.1 mrg break;
267 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
268 1.1 mrg mpn_incr_u (rp + i + 4, cy);
269 1.1 mrg }
270 1.1 mrg mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
271 1.1 mrg }
272 1.1 mrg
273 1.1 mrg #if HAVE_NATIVE_mpn_addlsh1_n
274 1.1 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
275 1.1 mrg #else
276 1.1 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
277 1.1 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
278 1.1 mrg #endif
279 1.1 mrg rp[2 * n - 1] += cy;
280 1.1 mrg }
281 1.1 mrg #define READY_WITH_mpn_sqr_basecase
282 1.1 mrg #endif
283 1.1 mrg
284 1.1 mrg
285 1.1.1.4 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1
286 1.1.1.4 mrg
287 1.1.1.4 mrg /* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack
288 1.1.1.4 mrg allocation. */
289 1.1.1.4 mrg void
290 1.1.1.4 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
291 1.1.1.4 mrg {
292 1.1.1.4 mrg if (n == 1)
293 1.1.1.4 mrg {
294 1.1.1.4 mrg mp_limb_t ul, lpl;
295 1.1.1.4 mrg ul = up[0];
296 1.1.1.4 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
297 1.1.1.4 mrg rp[0] = lpl >> GMP_NAIL_BITS;
298 1.1.1.4 mrg }
299 1.1.1.4 mrg else
300 1.1.1.4 mrg {
301 1.1.1.4 mrg mp_size_t i;
302 1.1.1.4 mrg mp_ptr xp;
303 1.1.1.4 mrg
304 1.1.1.4 mrg rp += 1;
305 1.1.1.4 mrg rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]);
306 1.1.1.4 mrg for (i = n - 2; i != 0; i--)
307 1.1.1.4 mrg {
308 1.1.1.4 mrg up += 1;
309 1.1.1.4 mrg rp += 2;
310 1.1.1.4 mrg rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]);
311 1.1.1.4 mrg }
312 1.1.1.4 mrg
313 1.1.1.4 mrg xp = rp - 2 * n + 3;
314 1.1.1.4 mrg mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n);
315 1.1.1.4 mrg }
316 1.1.1.4 mrg }
317 1.1.1.4 mrg #define READY_WITH_mpn_sqr_basecase
318 1.1.1.4 mrg #endif
319 1.1.1.4 mrg
320 1.1.1.4 mrg
321 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase)
322 1.1 mrg
323 1.1 mrg /* Default mpn_sqr_basecase using mpn_addmul_1. */
324 1.1 mrg void
325 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
326 1.1 mrg {
327 1.1 mrg mp_size_t i;
328 1.1 mrg
329 1.1 mrg ASSERT (n >= 1);
330 1.1 mrg ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
331 1.1 mrg
332 1.1.1.4 mrg if (n == 1)
333 1.1.1.4 mrg {
334 1.1.1.4 mrg mp_limb_t ul, lpl;
335 1.1.1.4 mrg ul = up[0];
336 1.1.1.4 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
337 1.1.1.4 mrg rp[0] = lpl >> GMP_NAIL_BITS;
338 1.1.1.4 mrg }
339 1.1.1.4 mrg else
340 1.1 mrg {
341 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
342 1.1 mrg mp_ptr tp = tarr;
343 1.1 mrg mp_limb_t cy;
344 1.1 mrg
345 1.1 mrg /* must fit 2*n limbs in tarr */
346 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
347 1.1 mrg
348 1.1 mrg cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
349 1.1 mrg tp[n - 1] = cy;
350 1.1 mrg for (i = 2; i < n; i++)
351 1.1 mrg {
352 1.1 mrg mp_limb_t cy;
353 1.1 mrg cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
354 1.1 mrg tp[n + i - 2] = cy;
355 1.1 mrg }
356 1.1 mrg
357 1.1.1.2 mrg MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
358 1.1 mrg }
359 1.1 mrg }
360 1.1.1.4 mrg #define READY_WITH_mpn_sqr_basecase
361 1.1 mrg #endif
362