sqr_basecase.c revision 1.1 1 1.1 mrg /* mpn_sqr_basecase -- Internal routine to square a natural number
2 1.1 mrg of length n.
3 1.1 mrg
4 1.1 mrg THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 1.1 mrg SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6 1.1 mrg
7 1.1 mrg
8 1.1 mrg Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
9 1.1 mrg 2005, 2008 Free Software Foundation, Inc.
10 1.1 mrg
11 1.1 mrg This file is part of the GNU MP Library.
12 1.1 mrg
13 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
14 1.1 mrg it under the terms of the GNU Lesser General Public License as published by
15 1.1 mrg the Free Software Foundation; either version 3 of the License, or (at your
16 1.1 mrg option) any later version.
17 1.1 mrg
18 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
19 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 1.1 mrg License for more details.
22 1.1 mrg
23 1.1 mrg You should have received a copy of the GNU Lesser General Public License
24 1.1 mrg along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 1.1 mrg
26 1.1 mrg #include "gmp.h"
27 1.1 mrg #include "gmp-impl.h"
28 1.1 mrg #include "longlong.h"
29 1.1 mrg
30 1.1 mrg
31 1.1 mrg #if HAVE_NATIVE_mpn_sqr_diagonal
32 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \
33 1.1 mrg mpn_sqr_diagonal (rp, up, n)
34 1.1 mrg #else
35 1.1 mrg #define MPN_SQR_DIAGONAL(rp, up, n) \
36 1.1 mrg do { \
37 1.1 mrg mp_size_t _i; \
38 1.1 mrg for (_i = 0; _i < (n); _i++) \
39 1.1 mrg { \
40 1.1 mrg mp_limb_t ul, lpl; \
41 1.1 mrg ul = (up)[_i]; \
42 1.1 mrg umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
43 1.1 mrg (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
44 1.1 mrg } \
45 1.1 mrg } while (0)
46 1.1 mrg #endif
47 1.1 mrg
48 1.1 mrg
49 1.1 mrg #undef READY_WITH_mpn_sqr_basecase
50 1.1 mrg
51 1.1 mrg
52 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
53 1.1 mrg void
54 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
55 1.1 mrg {
56 1.1 mrg mp_size_t i;
57 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
58 1.1 mrg mp_ptr tp = tarr;
59 1.1 mrg mp_limb_t cy;
60 1.1 mrg
61 1.1 mrg /* must fit 2*n limbs in tarr */
62 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
63 1.1 mrg
64 1.1 mrg if ((n & 1) != 0)
65 1.1 mrg {
66 1.1 mrg if (n == 1)
67 1.1 mrg {
68 1.1 mrg mp_limb_t ul, lpl;
69 1.1 mrg ul = up[0];
70 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
71 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
72 1.1 mrg return;
73 1.1 mrg }
74 1.1 mrg
75 1.1 mrg MPN_ZERO (tp, n);
76 1.1 mrg
77 1.1 mrg for (i = 0; i <= n - 2; i += 2)
78 1.1 mrg {
79 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
80 1.1 mrg tp[n + i] = cy;
81 1.1 mrg }
82 1.1 mrg }
83 1.1 mrg else
84 1.1 mrg {
85 1.1 mrg if (n == 2)
86 1.1 mrg {
87 1.1 mrg rp[0] = 0;
88 1.1 mrg rp[1] = 0;
89 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up);
90 1.1 mrg return;
91 1.1 mrg }
92 1.1 mrg
93 1.1 mrg MPN_ZERO (tp, n);
94 1.1 mrg
95 1.1 mrg for (i = 0; i <= n - 4; i += 2)
96 1.1 mrg {
97 1.1 mrg cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
98 1.1 mrg tp[n + i] = cy;
99 1.1 mrg }
100 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
101 1.1 mrg tp[2 * n - 3] = cy;
102 1.1 mrg }
103 1.1 mrg
104 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n);
105 1.1 mrg
106 1.1 mrg #if HAVE_NATIVE_mpn_addlsh1_n
107 1.1 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
108 1.1 mrg #else
109 1.1 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
110 1.1 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
111 1.1 mrg #endif
112 1.1 mrg rp[2 * n - 1] += cy;
113 1.1 mrg }
114 1.1 mrg #define READY_WITH_mpn_sqr_basecase
115 1.1 mrg #endif
116 1.1 mrg
117 1.1 mrg
118 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
119 1.1 mrg
120 1.1 mrg /* mpn_sqr_basecase using plain mpn_addmul_2.
121 1.1 mrg
122 1.1 mrg This is tricky, since we have to let mpn_addmul_2 make some undesirable
123 1.1 mrg multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
124 1.1 mrg This forces us to conditionally add or subtract the mpn_sqr_diagonal
125 1.1 mrg results. Examples of the product we form:
126 1.1 mrg
127 1.1 mrg n = 4 n = 5 n = 6
128 1.1 mrg u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
129 1.1 mrg u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
130 1.1 mrg u4 * u5
131 1.1 mrg add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
132 1.1 mrg sub: u1 sub: u1 u3 sub: u1 u3
133 1.1 mrg */
134 1.1 mrg
135 1.1 mrg void
136 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
137 1.1 mrg {
138 1.1 mrg mp_size_t i;
139 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
140 1.1 mrg mp_ptr tp = tarr;
141 1.1 mrg mp_limb_t cy;
142 1.1 mrg
143 1.1 mrg /* must fit 2*n limbs in tarr */
144 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
145 1.1 mrg
146 1.1 mrg if ((n & 1) != 0)
147 1.1 mrg {
148 1.1 mrg mp_limb_t x0, x1;
149 1.1 mrg
150 1.1 mrg if (n == 1)
151 1.1 mrg {
152 1.1 mrg mp_limb_t ul, lpl;
153 1.1 mrg ul = up[0];
154 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
155 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
156 1.1 mrg return;
157 1.1 mrg }
158 1.1 mrg
159 1.1 mrg /* The code below doesn't like unnormalized operands. Since such
160 1.1 mrg operands are unusual, handle them with a dumb recursion. */
161 1.1 mrg if (up[n - 1] == 0)
162 1.1 mrg {
163 1.1 mrg rp[2 * n - 2] = 0;
164 1.1 mrg rp[2 * n - 1] = 0;
165 1.1 mrg mpn_sqr_basecase (rp, up, n - 1);
166 1.1 mrg return;
167 1.1 mrg }
168 1.1 mrg
169 1.1 mrg MPN_ZERO (tp, n);
170 1.1 mrg
171 1.1 mrg for (i = 0; i <= n - 2; i += 2)
172 1.1 mrg {
173 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
174 1.1 mrg tp[n + i] = cy;
175 1.1 mrg }
176 1.1 mrg
177 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n);
178 1.1 mrg
179 1.1 mrg for (i = 2;; i += 4)
180 1.1 mrg {
181 1.1 mrg x0 = rp[i + 0];
182 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK;
183 1.1 mrg x1 = rp[i + 1];
184 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
185 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
186 1.1 mrg if (i + 4 >= 2 * n)
187 1.1 mrg break;
188 1.1 mrg mpn_incr_u (rp + i + 4, cy);
189 1.1 mrg }
190 1.1 mrg }
191 1.1 mrg else
192 1.1 mrg {
193 1.1 mrg mp_limb_t x0, x1;
194 1.1 mrg
195 1.1 mrg if (n == 2)
196 1.1 mrg {
197 1.1 mrg rp[0] = 0;
198 1.1 mrg rp[1] = 0;
199 1.1 mrg rp[3] = mpn_addmul_2 (rp, up, 2, up);
200 1.1 mrg return;
201 1.1 mrg }
202 1.1 mrg
203 1.1 mrg /* The code below doesn't like unnormalized operands. Since such
204 1.1 mrg operands are unusual, handle them with a dumb recursion. */
205 1.1 mrg if (up[n - 1] == 0)
206 1.1 mrg {
207 1.1 mrg rp[2 * n - 2] = 0;
208 1.1 mrg rp[2 * n - 1] = 0;
209 1.1 mrg mpn_sqr_basecase (rp, up, n - 1);
210 1.1 mrg return;
211 1.1 mrg }
212 1.1 mrg
213 1.1 mrg MPN_ZERO (tp, n);
214 1.1 mrg
215 1.1 mrg for (i = 0; i <= n - 4; i += 2)
216 1.1 mrg {
217 1.1 mrg cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
218 1.1 mrg tp[n + i] = cy;
219 1.1 mrg }
220 1.1 mrg cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
221 1.1 mrg tp[2 * n - 3] = cy;
222 1.1 mrg
223 1.1 mrg MPN_SQR_DIAGONAL (rp, up, n);
224 1.1 mrg
225 1.1 mrg for (i = 2;; i += 4)
226 1.1 mrg {
227 1.1 mrg x0 = rp[i + 0];
228 1.1 mrg rp[i + 0] = (-x0) & GMP_NUMB_MASK;
229 1.1 mrg x1 = rp[i + 1];
230 1.1 mrg rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
231 1.1 mrg if (i + 6 >= 2 * n)
232 1.1 mrg break;
233 1.1 mrg __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
234 1.1 mrg mpn_incr_u (rp + i + 4, cy);
235 1.1 mrg }
236 1.1 mrg mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
237 1.1 mrg }
238 1.1 mrg
239 1.1 mrg #if HAVE_NATIVE_mpn_addlsh1_n
240 1.1 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
241 1.1 mrg #else
242 1.1 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
243 1.1 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
244 1.1 mrg #endif
245 1.1 mrg rp[2 * n - 1] += cy;
246 1.1 mrg }
247 1.1 mrg #define READY_WITH_mpn_sqr_basecase
248 1.1 mrg #endif
249 1.1 mrg
250 1.1 mrg
251 1.1 mrg #if ! defined (READY_WITH_mpn_sqr_basecase)
252 1.1 mrg
253 1.1 mrg /* Default mpn_sqr_basecase using mpn_addmul_1. */
254 1.1 mrg
255 1.1 mrg void
256 1.1 mrg mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
257 1.1 mrg {
258 1.1 mrg mp_size_t i;
259 1.1 mrg
260 1.1 mrg ASSERT (n >= 1);
261 1.1 mrg ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
262 1.1 mrg
263 1.1 mrg {
264 1.1 mrg mp_limb_t ul, lpl;
265 1.1 mrg ul = up[0];
266 1.1 mrg umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
267 1.1 mrg rp[0] = lpl >> GMP_NAIL_BITS;
268 1.1 mrg }
269 1.1 mrg if (n > 1)
270 1.1 mrg {
271 1.1 mrg mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
272 1.1 mrg mp_ptr tp = tarr;
273 1.1 mrg mp_limb_t cy;
274 1.1 mrg
275 1.1 mrg /* must fit 2*n limbs in tarr */
276 1.1 mrg ASSERT (n <= SQR_TOOM2_THRESHOLD);
277 1.1 mrg
278 1.1 mrg cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
279 1.1 mrg tp[n - 1] = cy;
280 1.1 mrg for (i = 2; i < n; i++)
281 1.1 mrg {
282 1.1 mrg mp_limb_t cy;
283 1.1 mrg cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
284 1.1 mrg tp[n + i - 2] = cy;
285 1.1 mrg }
286 1.1 mrg MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
287 1.1 mrg
288 1.1 mrg {
289 1.1 mrg mp_limb_t cy;
290 1.1 mrg #if HAVE_NATIVE_mpn_addlsh1_n
291 1.1 mrg cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
292 1.1 mrg #else
293 1.1 mrg cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
294 1.1 mrg cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
295 1.1 mrg #endif
296 1.1 mrg rp[2 * n - 1] += cy;
297 1.1 mrg }
298 1.1 mrg }
299 1.1 mrg }
300 1.1 mrg #endif
301