sqr_basecase.c revision 1.1.1.1.2.1 1 /* mpn_sqr_basecase -- Internal routine to square a natural number
2 of length n.
3
4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6
7
8 Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
9 2005, 2008, 2010, 2011 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25
26 #include "gmp.h"
27 #include "gmp-impl.h"
28 #include "longlong.h"
29
30
31 #if HAVE_NATIVE_mpn_sqr_diagonal
32 #define MPN_SQR_DIAGONAL(rp, up, n) \
33 mpn_sqr_diagonal (rp, up, n)
34 #else
35 #define MPN_SQR_DIAGONAL(rp, up, n) \
36 do { \
37 mp_size_t _i; \
38 for (_i = 0; _i < (n); _i++) \
39 { \
40 mp_limb_t ul, lpl; \
41 ul = (up)[_i]; \
42 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
43 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
44 } \
45 } while (0)
46 #endif
47
48 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
49 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
50 mpn_sqr_diag_addlsh1 (rp, tp, up, n)
51 #else
52 #if HAVE_NATIVE_mpn_addlsh1_n
53 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
54 do { \
55 mp_limb_t cy; \
56 MPN_SQR_DIAGONAL (rp, up, n); \
57 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
58 rp[2 * n - 1] += cy; \
59 } while (0)
60 #else
61 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
62 do { \
63 mp_limb_t cy; \
64 MPN_SQR_DIAGONAL (rp, up, n); \
65 cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
66 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
67 rp[2 * n - 1] += cy; \
68 } while (0)
69 #endif
70 #endif
71
72
73 #undef READY_WITH_mpn_sqr_basecase
74
75
76 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
77 void
78 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
79 {
80 mp_size_t i;
81 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
82 mp_ptr tp = tarr;
83 mp_limb_t cy;
84
85 /* must fit 2*n limbs in tarr */
86 ASSERT (n <= SQR_TOOM2_THRESHOLD);
87
88 if ((n & 1) != 0)
89 {
90 if (n == 1)
91 {
92 mp_limb_t ul, lpl;
93 ul = up[0];
94 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
95 rp[0] = lpl >> GMP_NAIL_BITS;
96 return;
97 }
98
99 MPN_ZERO (tp, n);
100
101 for (i = 0; i <= n - 2; i += 2)
102 {
103 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
104 tp[n + i] = cy;
105 }
106 }
107 else
108 {
109 if (n == 2)
110 {
111 #if HAVE_NATIVE_mpn_mul_2
112 rp[3] = mpn_mul_2 (rp, up, 2, up);
113 #else
114 rp[0] = 0;
115 rp[1] = 0;
116 rp[3] = mpn_addmul_2 (rp, up, 2, up);
117 #endif
118 return;
119 }
120
121 MPN_ZERO (tp, n);
122
123 for (i = 0; i <= n - 4; i += 2)
124 {
125 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
126 tp[n + i] = cy;
127 }
128 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
129 tp[2 * n - 3] = cy;
130 }
131
132 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
133 }
134 #define READY_WITH_mpn_sqr_basecase
135 #endif
136
137
138 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
139
140 /* mpn_sqr_basecase using plain mpn_addmul_2.
141
142 This is tricky, since we have to let mpn_addmul_2 make some undesirable
143 multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
144 This forces us to conditionally add or subtract the mpn_sqr_diagonal
145 results. Examples of the product we form:
146
147 n = 4 n = 5 n = 6
148 u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
149 u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
150 u4 * u5
151 add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
152 sub: u1 sub: u1 u3 sub: u1 u3
153 */
154
155 void
156 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
157 {
158 mp_size_t i;
159 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
160 mp_ptr tp = tarr;
161 mp_limb_t cy;
162
163 /* must fit 2*n limbs in tarr */
164 ASSERT (n <= SQR_TOOM2_THRESHOLD);
165
166 if ((n & 1) != 0)
167 {
168 mp_limb_t x0, x1;
169
170 if (n == 1)
171 {
172 mp_limb_t ul, lpl;
173 ul = up[0];
174 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
175 rp[0] = lpl >> GMP_NAIL_BITS;
176 return;
177 }
178
179 /* The code below doesn't like unnormalized operands. Since such
180 operands are unusual, handle them with a dumb recursion. */
181 if (up[n - 1] == 0)
182 {
183 rp[2 * n - 2] = 0;
184 rp[2 * n - 1] = 0;
185 mpn_sqr_basecase (rp, up, n - 1);
186 return;
187 }
188
189 MPN_ZERO (tp, n);
190
191 for (i = 0; i <= n - 2; i += 2)
192 {
193 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
194 tp[n + i] = cy;
195 }
196
197 MPN_SQR_DIAGONAL (rp, up, n);
198
199 for (i = 2;; i += 4)
200 {
201 x0 = rp[i + 0];
202 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
203 x1 = rp[i + 1];
204 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
205 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
206 if (i + 4 >= 2 * n)
207 break;
208 mpn_incr_u (rp + i + 4, cy);
209 }
210 }
211 else
212 {
213 mp_limb_t x0, x1;
214
215 if (n == 2)
216 {
217 #if HAVE_NATIVE_mpn_mul_2
218 rp[3] = mpn_mul_2 (rp, up, 2, up);
219 #else
220 rp[0] = 0;
221 rp[1] = 0;
222 rp[3] = mpn_addmul_2 (rp, up, 2, up);
223 #endif
224 return;
225 }
226
227 /* The code below doesn't like unnormalized operands. Since such
228 operands are unusual, handle them with a dumb recursion. */
229 if (up[n - 1] == 0)
230 {
231 rp[2 * n - 2] = 0;
232 rp[2 * n - 1] = 0;
233 mpn_sqr_basecase (rp, up, n - 1);
234 return;
235 }
236
237 MPN_ZERO (tp, n);
238
239 for (i = 0; i <= n - 4; i += 2)
240 {
241 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
242 tp[n + i] = cy;
243 }
244 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
245 tp[2 * n - 3] = cy;
246
247 MPN_SQR_DIAGONAL (rp, up, n);
248
249 for (i = 2;; i += 4)
250 {
251 x0 = rp[i + 0];
252 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
253 x1 = rp[i + 1];
254 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
255 if (i + 6 >= 2 * n)
256 break;
257 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
258 mpn_incr_u (rp + i + 4, cy);
259 }
260 mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
261 }
262
263 #if HAVE_NATIVE_mpn_addlsh1_n
264 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
265 #else
266 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
267 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
268 #endif
269 rp[2 * n - 1] += cy;
270 }
271 #define READY_WITH_mpn_sqr_basecase
272 #endif
273
274
275 #if ! defined (READY_WITH_mpn_sqr_basecase)
276
277 /* Default mpn_sqr_basecase using mpn_addmul_1. */
278
279 void
280 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
281 {
282 mp_size_t i;
283
284 ASSERT (n >= 1);
285 ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
286
287 {
288 mp_limb_t ul, lpl;
289 ul = up[0];
290 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
291 rp[0] = lpl >> GMP_NAIL_BITS;
292 }
293 if (n > 1)
294 {
295 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
296 mp_ptr tp = tarr;
297 mp_limb_t cy;
298
299 /* must fit 2*n limbs in tarr */
300 ASSERT (n <= SQR_TOOM2_THRESHOLD);
301
302 cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
303 tp[n - 1] = cy;
304 for (i = 2; i < n; i++)
305 {
306 mp_limb_t cy;
307 cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
308 tp[n + i - 2] = cy;
309 }
310
311 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
312 }
313 }
314 #endif
315