mulmid.c revision 1.1.1.1.8.2 1 /* mpn_mulmid -- middle product
2
3 Contributed by David Harvey.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2011 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25
26
27 #include "gmp.h"
28 #include "gmp-impl.h"
29
30
31 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
32
33
34 void
35 mpn_mulmid (mp_ptr rp,
36 mp_srcptr ap, mp_size_t an,
37 mp_srcptr bp, mp_size_t bn)
38 {
39 mp_size_t rn, k;
40 mp_ptr scratch, temp;
41
42 ASSERT (an >= bn);
43 ASSERT (bn >= 1);
44 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
45 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
46
47 if (bn < MULMID_TOOM42_THRESHOLD)
48 {
49 /* region not tall enough to make toom42 worthwhile for any portion */
50
51 if (an < CHUNK)
52 {
53 /* region not too wide either, just call basecase directly */
54 mpn_mulmid_basecase (rp, ap, an, bp, bn);
55 return;
56 }
57
58 /* Region quite wide. For better locality, use basecase on chunks:
59
60 AAABBBCC..
61 .AAABBBCC.
62 ..AAABBBCC
63 */
64
65 k = CHUNK - bn + 1; /* number of diagonals per chunk */
66
67 /* first chunk (marked A in the above diagram) */
68 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
69
70 /* remaining chunks (B, C, etc) */
71 an -= k;
72
73 while (an >= CHUNK)
74 {
75 mp_limb_t t0, t1, cy;
76 ap += k, rp += k;
77 t0 = rp[0], t1 = rp[1];
78 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
79 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
80 MPN_INCR_U (rp + 1, k + 1, t1 + cy);
81 an -= k;
82 }
83
84 if (an >= bn)
85 {
86 /* last remaining chunk */
87 mp_limb_t t0, t1, cy;
88 ap += k, rp += k;
89 t0 = rp[0], t1 = rp[1];
90 mpn_mulmid_basecase (rp, ap, an, bp, bn);
91 ADDC_LIMB (cy, rp[0], rp[0], t0);
92 MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
93 }
94
95 return;
96 }
97
98 /* region is tall enough for toom42 */
99
100 rn = an - bn + 1;
101
102 if (rn < MULMID_TOOM42_THRESHOLD)
103 {
104 /* region not wide enough to make toom42 worthwhile for any portion */
105
106 TMP_DECL;
107
108 if (bn < CHUNK)
109 {
110 /* region not too tall either, just call basecase directly */
111 mpn_mulmid_basecase (rp, ap, an, bp, bn);
112 return;
113 }
114
115 /* Region quite tall. For better locality, use basecase on chunks:
116
117 AAAAA....
118 .AAAAA...
119 ..BBBBB..
120 ...BBBBB.
121 ....CCCCC
122 */
123
124 TMP_MARK;
125
126 temp = TMP_ALLOC_LIMBS (rn + 2);
127
128 /* first chunk (marked A in the above diagram) */
129 bp += bn - CHUNK, an -= bn - CHUNK;
130 mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
131
132 /* remaining chunks (B, C, etc) */
133 bn -= CHUNK;
134
135 while (bn >= CHUNK)
136 {
137 ap += CHUNK, bp -= CHUNK;
138 mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
139 mpn_add_n (rp, rp, temp, rn + 2);
140 bn -= CHUNK;
141 }
142
143 if (bn)
144 {
145 /* last remaining chunk */
146 ap += CHUNK, bp -= bn;
147 mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
148 mpn_add_n (rp, rp, temp, rn + 2);
149 }
150
151 TMP_FREE;
152 return;
153 }
154
155 /* we're definitely going to use toom42 somewhere */
156
157 if (bn > rn)
158 {
159 /* slice region into chunks, use toom42 on all chunks except possibly
160 the last:
161
162 AA....
163 .AA...
164 ..BB..
165 ...BB.
166 ....CC
167 */
168
169 TMP_DECL;
170 TMP_MARK;
171
172 temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
173 scratch = temp + rn + 2;
174
175 /* first chunk (marked A in the above diagram) */
176 bp += bn - rn;
177 mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
178
179 /* remaining chunks (B, C, etc) */
180 bn -= rn;
181
182 while (bn >= rn)
183 {
184 ap += rn, bp -= rn;
185 mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
186 mpn_add_n (rp, rp, temp, rn + 2);
187 bn -= rn;
188 }
189
190 if (bn)
191 {
192 /* last remaining chunk */
193 ap += rn, bp -= bn;
194 mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
195 mpn_add_n (rp, rp, temp, rn + 2);
196 }
197
198 TMP_FREE;
199 }
200 else
201 {
202 /* slice region into chunks, use toom42 on all chunks except possibly
203 the last:
204
205 AAABBBCC..
206 .AAABBBCC.
207 ..AAABBBCC
208 */
209
210 TMP_DECL;
211 TMP_MARK;
212
213 scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
214
215 /* first chunk (marked A in the above diagram) */
216 mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
217
218 /* remaining chunks (B, C, etc) */
219 rn -= bn;
220
221 while (rn >= bn)
222 {
223 mp_limb_t t0, t1, cy;
224 ap += bn, rp += bn;
225 t0 = rp[0], t1 = rp[1];
226 mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
227 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
228 MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
229 rn -= bn;
230 }
231
232 TMP_FREE;
233
234 if (rn)
235 {
236 /* last remaining chunk */
237 mp_limb_t t0, t1, cy;
238 ap += bn, rp += bn;
239 t0 = rp[0], t1 = rp[1];
240 mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
241 ADDC_LIMB (cy, rp[0], rp[0], t0);
242 MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
243 }
244 }
245 }
246