softfloat-macros revision 1.3 1 1.3 christos /* $NetBSD: softfloat-macros,v 1.3 2012/03/21 02:32:26 christos Exp $ */
2 1.1 bjh21
3 1.1 bjh21 /*
4 1.1 bjh21 ===============================================================================
5 1.1 bjh21
6 1.1 bjh21 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
7 1.1 bjh21 Arithmetic Package, Release 2a.
8 1.1 bjh21
9 1.1 bjh21 Written by John R. Hauser. This work was made possible in part by the
10 1.1 bjh21 International Computer Science Institute, located at Suite 600, 1947 Center
11 1.1 bjh21 Street, Berkeley, California 94704. Funding was partially provided by the
12 1.1 bjh21 National Science Foundation under grant MIP-9311980. The original version
13 1.1 bjh21 of this code was written as part of a project to build a fixed-point vector
14 1.1 bjh21 processor in collaboration with the University of California at Berkeley,
15 1.1 bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
16 1.1 bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
17 1.1 bjh21 arithmetic/SoftFloat.html'.
18 1.1 bjh21
19 1.1 bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
20 1.1 bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
21 1.1 bjh21 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
22 1.1 bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
23 1.1 bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
24 1.1 bjh21
25 1.1 bjh21 Derivative works are acceptable, even for commercial purposes, so long as
26 1.1 bjh21 (1) they include prominent notice that the work is derivative, and (2) they
27 1.1 bjh21 include prominent notice akin to these four paragraphs for those parts of
28 1.1 bjh21 this code that are retained.
29 1.1 bjh21
30 1.1 bjh21 ===============================================================================
31 1.1 bjh21 */
32 1.1 bjh21
33 1.1 bjh21 /*
34 1.1 bjh21 -------------------------------------------------------------------------------
35 1.1 bjh21 Shifts `a' right by the number of bits given in `count'. If any nonzero
36 1.1 bjh21 bits are shifted off, they are ``jammed'' into the least significant bit of
37 1.1 bjh21 the result by setting the least significant bit to 1. The value of `count'
38 1.1 bjh21 can be arbitrarily large; in particular, if `count' is greater than 32, the
39 1.1 bjh21 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
40 1.1 bjh21 The result is stored in the location pointed to by `zPtr'.
41 1.1 bjh21 -------------------------------------------------------------------------------
42 1.1 bjh21 */
43 1.1 bjh21 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
44 1.1 bjh21 {
45 1.1 bjh21 bits32 z;
46 1.1 bjh21
47 1.1 bjh21 if ( count == 0 ) {
48 1.1 bjh21 z = a;
49 1.1 bjh21 }
50 1.1 bjh21 else if ( count < 32 ) {
51 1.1 bjh21 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
52 1.1 bjh21 }
53 1.1 bjh21 else {
54 1.1 bjh21 z = ( a != 0 );
55 1.1 bjh21 }
56 1.1 bjh21 *zPtr = z;
57 1.1 bjh21
58 1.1 bjh21 }
59 1.1 bjh21
60 1.1 bjh21 /*
61 1.1 bjh21 -------------------------------------------------------------------------------
62 1.1 bjh21 Shifts `a' right by the number of bits given in `count'. If any nonzero
63 1.1 bjh21 bits are shifted off, they are ``jammed'' into the least significant bit of
64 1.1 bjh21 the result by setting the least significant bit to 1. The value of `count'
65 1.1 bjh21 can be arbitrarily large; in particular, if `count' is greater than 64, the
66 1.1 bjh21 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
67 1.1 bjh21 The result is stored in the location pointed to by `zPtr'.
68 1.1 bjh21 -------------------------------------------------------------------------------
69 1.1 bjh21 */
70 1.1 bjh21 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
71 1.1 bjh21 {
72 1.1 bjh21 bits64 z;
73 1.1 bjh21
74 1.1 bjh21 if ( count == 0 ) {
75 1.1 bjh21 z = a;
76 1.1 bjh21 }
77 1.1 bjh21 else if ( count < 64 ) {
78 1.1 bjh21 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
79 1.1 bjh21 }
80 1.1 bjh21 else {
81 1.1 bjh21 z = ( a != 0 );
82 1.1 bjh21 }
83 1.1 bjh21 *zPtr = z;
84 1.1 bjh21
85 1.1 bjh21 }
86 1.1 bjh21
87 1.1 bjh21 /*
88 1.1 bjh21 -------------------------------------------------------------------------------
89 1.1 bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
90 1.1 bjh21 _plus_ the number of bits given in `count'. The shifted result is at most
91 1.1 bjh21 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
92 1.1 bjh21 bits shifted off form a second 64-bit result as follows: The _last_ bit
93 1.1 bjh21 shifted off is the most-significant bit of the extra result, and the other
94 1.1 bjh21 63 bits of the extra result are all zero if and only if _all_but_the_last_
95 1.1 bjh21 bits shifted off were all zero. This extra result is stored in the location
96 1.1 bjh21 pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
97 1.1 bjh21 (This routine makes more sense if `a0' and `a1' are considered to form a
98 1.1 bjh21 fixed-point value with binary point between `a0' and `a1'. This fixed-point
99 1.1 bjh21 value is shifted right by the number of bits given in `count', and the
100 1.1 bjh21 integer part of the result is returned at the location pointed to by
101 1.1 bjh21 `z0Ptr'. The fractional part of the result may be slightly corrupted as
102 1.1 bjh21 described above, and is returned at the location pointed to by `z1Ptr'.)
103 1.1 bjh21 -------------------------------------------------------------------------------
104 1.1 bjh21 */
105 1.1 bjh21 INLINE void
106 1.1 bjh21 shift64ExtraRightJamming(
107 1.1 bjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
108 1.1 bjh21 {
109 1.1 bjh21 bits64 z0, z1;
110 1.1 bjh21 int8 negCount = ( - count ) & 63;
111 1.1 bjh21
112 1.1 bjh21 if ( count == 0 ) {
113 1.1 bjh21 z1 = a1;
114 1.1 bjh21 z0 = a0;
115 1.1 bjh21 }
116 1.1 bjh21 else if ( count < 64 ) {
117 1.1 bjh21 z1 = ( a0<<negCount ) | ( a1 != 0 );
118 1.1 bjh21 z0 = a0>>count;
119 1.1 bjh21 }
120 1.1 bjh21 else {
121 1.1 bjh21 if ( count == 64 ) {
122 1.1 bjh21 z1 = a0 | ( a1 != 0 );
123 1.1 bjh21 }
124 1.1 bjh21 else {
125 1.1 bjh21 z1 = ( ( a0 | a1 ) != 0 );
126 1.1 bjh21 }
127 1.1 bjh21 z0 = 0;
128 1.1 bjh21 }
129 1.1 bjh21 *z1Ptr = z1;
130 1.1 bjh21 *z0Ptr = z0;
131 1.1 bjh21
132 1.1 bjh21 }
133 1.1 bjh21
134 1.1 bjh21 /*
135 1.1 bjh21 -------------------------------------------------------------------------------
136 1.1 bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
137 1.1 bjh21 number of bits given in `count'. Any bits shifted off are lost. The value
138 1.1 bjh21 of `count' can be arbitrarily large; in particular, if `count' is greater
139 1.1 bjh21 than 128, the result will be 0. The result is broken into two 64-bit pieces
140 1.1 bjh21 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
141 1.1 bjh21 -------------------------------------------------------------------------------
142 1.1 bjh21 */
143 1.1 bjh21 INLINE void
144 1.1 bjh21 shift128Right(
145 1.1 bjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
146 1.1 bjh21 {
147 1.1 bjh21 bits64 z0, z1;
148 1.1 bjh21 int8 negCount = ( - count ) & 63;
149 1.1 bjh21
150 1.1 bjh21 if ( count == 0 ) {
151 1.1 bjh21 z1 = a1;
152 1.1 bjh21 z0 = a0;
153 1.1 bjh21 }
154 1.1 bjh21 else if ( count < 64 ) {
155 1.1 bjh21 z1 = ( a0<<negCount ) | ( a1>>count );
156 1.1 bjh21 z0 = a0>>count;
157 1.1 bjh21 }
158 1.1 bjh21 else {
159 1.1 bjh21 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
160 1.1 bjh21 z0 = 0;
161 1.1 bjh21 }
162 1.1 bjh21 *z1Ptr = z1;
163 1.1 bjh21 *z0Ptr = z0;
164 1.1 bjh21
165 1.1 bjh21 }
166 1.1 bjh21
167 1.1 bjh21 /*
168 1.1 bjh21 -------------------------------------------------------------------------------
169 1.1 bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170 1.1 bjh21 number of bits given in `count'. If any nonzero bits are shifted off, they
171 1.1 bjh21 are ``jammed'' into the least significant bit of the result by setting the
172 1.1 bjh21 least significant bit to 1. The value of `count' can be arbitrarily large;
173 1.1 bjh21 in particular, if `count' is greater than 128, the result will be either
174 1.1 bjh21 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175 1.1 bjh21 nonzero. The result is broken into two 64-bit pieces which are stored at
176 1.1 bjh21 the locations pointed to by `z0Ptr' and `z1Ptr'.
177 1.1 bjh21 -------------------------------------------------------------------------------
178 1.1 bjh21 */
179 1.1 bjh21 INLINE void
180 1.1 bjh21 shift128RightJamming(
181 1.1 bjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
182 1.1 bjh21 {
183 1.1 bjh21 bits64 z0, z1;
184 1.1 bjh21 int8 negCount = ( - count ) & 63;
185 1.1 bjh21
186 1.1 bjh21 if ( count == 0 ) {
187 1.1 bjh21 z1 = a1;
188 1.1 bjh21 z0 = a0;
189 1.1 bjh21 }
190 1.1 bjh21 else if ( count < 64 ) {
191 1.1 bjh21 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
192 1.1 bjh21 z0 = a0>>count;
193 1.1 bjh21 }
194 1.1 bjh21 else {
195 1.1 bjh21 if ( count == 64 ) {
196 1.1 bjh21 z1 = a0 | ( a1 != 0 );
197 1.1 bjh21 }
198 1.1 bjh21 else if ( count < 128 ) {
199 1.1 bjh21 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
200 1.1 bjh21 }
201 1.1 bjh21 else {
202 1.1 bjh21 z1 = ( ( a0 | a1 ) != 0 );
203 1.1 bjh21 }
204 1.1 bjh21 z0 = 0;
205 1.1 bjh21 }
206 1.1 bjh21 *z1Ptr = z1;
207 1.1 bjh21 *z0Ptr = z0;
208 1.1 bjh21
209 1.1 bjh21 }
210 1.1 bjh21
211 1.1 bjh21 /*
212 1.1 bjh21 -------------------------------------------------------------------------------
213 1.1 bjh21 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
214 1.1 bjh21 by 64 _plus_ the number of bits given in `count'. The shifted result is
215 1.1 bjh21 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
216 1.1 bjh21 stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
217 1.1 bjh21 off form a third 64-bit result as follows: The _last_ bit shifted off is
218 1.1 bjh21 the most-significant bit of the extra result, and the other 63 bits of the
219 1.1 bjh21 extra result are all zero if and only if _all_but_the_last_ bits shifted off
220 1.1 bjh21 were all zero. This extra result is stored in the location pointed to by
221 1.1 bjh21 `z2Ptr'. The value of `count' can be arbitrarily large.
222 1.1 bjh21 (This routine makes more sense if `a0', `a1', and `a2' are considered
223 1.1 bjh21 to form a fixed-point value with binary point between `a1' and `a2'. This
224 1.1 bjh21 fixed-point value is shifted right by the number of bits given in `count',
225 1.1 bjh21 and the integer part of the result is returned at the locations pointed to
226 1.1 bjh21 by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
227 1.1 bjh21 corrupted as described above, and is returned at the location pointed to by
228 1.1 bjh21 `z2Ptr'.)
229 1.1 bjh21 -------------------------------------------------------------------------------
230 1.1 bjh21 */
231 1.1 bjh21 INLINE void
232 1.1 bjh21 shift128ExtraRightJamming(
233 1.1 bjh21 bits64 a0,
234 1.1 bjh21 bits64 a1,
235 1.1 bjh21 bits64 a2,
236 1.1 bjh21 int16 count,
237 1.1 bjh21 bits64 *z0Ptr,
238 1.1 bjh21 bits64 *z1Ptr,
239 1.1 bjh21 bits64 *z2Ptr
240 1.1 bjh21 )
241 1.1 bjh21 {
242 1.1 bjh21 bits64 z0, z1, z2;
243 1.1 bjh21 int8 negCount = ( - count ) & 63;
244 1.1 bjh21
245 1.1 bjh21 if ( count == 0 ) {
246 1.1 bjh21 z2 = a2;
247 1.1 bjh21 z1 = a1;
248 1.1 bjh21 z0 = a0;
249 1.1 bjh21 }
250 1.1 bjh21 else {
251 1.1 bjh21 if ( count < 64 ) {
252 1.1 bjh21 z2 = a1<<negCount;
253 1.1 bjh21 z1 = ( a0<<negCount ) | ( a1>>count );
254 1.1 bjh21 z0 = a0>>count;
255 1.1 bjh21 }
256 1.1 bjh21 else {
257 1.1 bjh21 if ( count == 64 ) {
258 1.1 bjh21 z2 = a1;
259 1.1 bjh21 z1 = a0;
260 1.1 bjh21 }
261 1.1 bjh21 else {
262 1.1 bjh21 a2 |= a1;
263 1.1 bjh21 if ( count < 128 ) {
264 1.1 bjh21 z2 = a0<<negCount;
265 1.1 bjh21 z1 = a0>>( count & 63 );
266 1.1 bjh21 }
267 1.1 bjh21 else {
268 1.1 bjh21 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
269 1.1 bjh21 z1 = 0;
270 1.1 bjh21 }
271 1.1 bjh21 }
272 1.1 bjh21 z0 = 0;
273 1.1 bjh21 }
274 1.1 bjh21 z2 |= ( a2 != 0 );
275 1.1 bjh21 }
276 1.1 bjh21 *z2Ptr = z2;
277 1.1 bjh21 *z1Ptr = z1;
278 1.1 bjh21 *z0Ptr = z0;
279 1.1 bjh21
280 1.1 bjh21 }
281 1.1 bjh21
282 1.1 bjh21 /*
283 1.1 bjh21 -------------------------------------------------------------------------------
284 1.1 bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
285 1.1 bjh21 number of bits given in `count'. Any bits shifted off are lost. The value
286 1.1 bjh21 of `count' must be less than 64. The result is broken into two 64-bit
287 1.1 bjh21 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
288 1.1 bjh21 -------------------------------------------------------------------------------
289 1.1 bjh21 */
290 1.1 bjh21 INLINE void
291 1.1 bjh21 shortShift128Left(
292 1.1 bjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
293 1.1 bjh21 {
294 1.1 bjh21
295 1.1 bjh21 *z1Ptr = a1<<count;
296 1.1 bjh21 *z0Ptr =
297 1.1 bjh21 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
298 1.1 bjh21
299 1.1 bjh21 }
300 1.1 bjh21
301 1.1 bjh21 /*
302 1.1 bjh21 -------------------------------------------------------------------------------
303 1.1 bjh21 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
304 1.1 bjh21 by the number of bits given in `count'. Any bits shifted off are lost.
305 1.1 bjh21 The value of `count' must be less than 64. The result is broken into three
306 1.1 bjh21 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
307 1.1 bjh21 `z1Ptr', and `z2Ptr'.
308 1.1 bjh21 -------------------------------------------------------------------------------
309 1.1 bjh21 */
310 1.1 bjh21 INLINE void
311 1.1 bjh21 shortShift192Left(
312 1.1 bjh21 bits64 a0,
313 1.1 bjh21 bits64 a1,
314 1.1 bjh21 bits64 a2,
315 1.1 bjh21 int16 count,
316 1.1 bjh21 bits64 *z0Ptr,
317 1.1 bjh21 bits64 *z1Ptr,
318 1.1 bjh21 bits64 *z2Ptr
319 1.1 bjh21 )
320 1.1 bjh21 {
321 1.1 bjh21 bits64 z0, z1, z2;
322 1.1 bjh21 int8 negCount;
323 1.1 bjh21
324 1.1 bjh21 z2 = a2<<count;
325 1.1 bjh21 z1 = a1<<count;
326 1.1 bjh21 z0 = a0<<count;
327 1.1 bjh21 if ( 0 < count ) {
328 1.1 bjh21 negCount = ( ( - count ) & 63 );
329 1.1 bjh21 z1 |= a2>>negCount;
330 1.1 bjh21 z0 |= a1>>negCount;
331 1.1 bjh21 }
332 1.1 bjh21 *z2Ptr = z2;
333 1.1 bjh21 *z1Ptr = z1;
334 1.1 bjh21 *z0Ptr = z0;
335 1.1 bjh21
336 1.1 bjh21 }
337 1.1 bjh21
338 1.1 bjh21 /*
339 1.1 bjh21 -------------------------------------------------------------------------------
340 1.1 bjh21 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
341 1.1 bjh21 value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
342 1.1 bjh21 any carry out is lost. The result is broken into two 64-bit pieces which
343 1.1 bjh21 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
344 1.1 bjh21 -------------------------------------------------------------------------------
345 1.1 bjh21 */
346 1.1 bjh21 INLINE void
347 1.1 bjh21 add128(
348 1.1 bjh21 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
349 1.1 bjh21 {
350 1.1 bjh21 bits64 z1;
351 1.1 bjh21
352 1.1 bjh21 z1 = a1 + b1;
353 1.1 bjh21 *z1Ptr = z1;
354 1.1 bjh21 *z0Ptr = a0 + b0 + ( z1 < a1 );
355 1.1 bjh21
356 1.1 bjh21 }
357 1.1 bjh21
358 1.1 bjh21 /*
359 1.1 bjh21 -------------------------------------------------------------------------------
360 1.1 bjh21 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
361 1.1 bjh21 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
362 1.1 bjh21 modulo 2^192, so any carry out is lost. The result is broken into three
363 1.1 bjh21 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
364 1.1 bjh21 `z1Ptr', and `z2Ptr'.
365 1.1 bjh21 -------------------------------------------------------------------------------
366 1.1 bjh21 */
367 1.1 bjh21 INLINE void
368 1.1 bjh21 add192(
369 1.1 bjh21 bits64 a0,
370 1.1 bjh21 bits64 a1,
371 1.1 bjh21 bits64 a2,
372 1.1 bjh21 bits64 b0,
373 1.1 bjh21 bits64 b1,
374 1.1 bjh21 bits64 b2,
375 1.1 bjh21 bits64 *z0Ptr,
376 1.1 bjh21 bits64 *z1Ptr,
377 1.1 bjh21 bits64 *z2Ptr
378 1.1 bjh21 )
379 1.1 bjh21 {
380 1.1 bjh21 bits64 z0, z1, z2;
381 1.1 bjh21 int8 carry0, carry1;
382 1.1 bjh21
383 1.1 bjh21 z2 = a2 + b2;
384 1.1 bjh21 carry1 = ( z2 < a2 );
385 1.1 bjh21 z1 = a1 + b1;
386 1.1 bjh21 carry0 = ( z1 < a1 );
387 1.1 bjh21 z0 = a0 + b0;
388 1.1 bjh21 z1 += carry1;
389 1.2 tron z0 += ( z1 < (bits64)carry1 );
390 1.1 bjh21 z0 += carry0;
391 1.1 bjh21 *z2Ptr = z2;
392 1.1 bjh21 *z1Ptr = z1;
393 1.1 bjh21 *z0Ptr = z0;
394 1.1 bjh21
395 1.1 bjh21 }
396 1.1 bjh21
397 1.1 bjh21 /*
398 1.1 bjh21 -------------------------------------------------------------------------------
399 1.1 bjh21 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
400 1.1 bjh21 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
401 1.1 bjh21 2^128, so any borrow out (carry out) is lost. The result is broken into two
402 1.1 bjh21 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
403 1.1 bjh21 `z1Ptr'.
404 1.1 bjh21 -------------------------------------------------------------------------------
405 1.1 bjh21 */
406 1.1 bjh21 INLINE void
407 1.1 bjh21 sub128(
408 1.1 bjh21 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
409 1.1 bjh21 {
410 1.1 bjh21
411 1.1 bjh21 *z1Ptr = a1 - b1;
412 1.1 bjh21 *z0Ptr = a0 - b0 - ( a1 < b1 );
413 1.1 bjh21
414 1.1 bjh21 }
415 1.1 bjh21
416 1.1 bjh21 /*
417 1.1 bjh21 -------------------------------------------------------------------------------
418 1.1 bjh21 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
419 1.1 bjh21 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
420 1.1 bjh21 Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
421 1.1 bjh21 result is broken into three 64-bit pieces which are stored at the locations
422 1.1 bjh21 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
423 1.1 bjh21 -------------------------------------------------------------------------------
424 1.1 bjh21 */
425 1.1 bjh21 INLINE void
426 1.1 bjh21 sub192(
427 1.1 bjh21 bits64 a0,
428 1.1 bjh21 bits64 a1,
429 1.1 bjh21 bits64 a2,
430 1.1 bjh21 bits64 b0,
431 1.1 bjh21 bits64 b1,
432 1.1 bjh21 bits64 b2,
433 1.1 bjh21 bits64 *z0Ptr,
434 1.1 bjh21 bits64 *z1Ptr,
435 1.1 bjh21 bits64 *z2Ptr
436 1.1 bjh21 )
437 1.1 bjh21 {
438 1.1 bjh21 bits64 z0, z1, z2;
439 1.1 bjh21 int8 borrow0, borrow1;
440 1.1 bjh21
441 1.1 bjh21 z2 = a2 - b2;
442 1.1 bjh21 borrow1 = ( a2 < b2 );
443 1.1 bjh21 z1 = a1 - b1;
444 1.1 bjh21 borrow0 = ( a1 < b1 );
445 1.1 bjh21 z0 = a0 - b0;
446 1.2 tron z0 -= ( z1 < (bits64)borrow1 );
447 1.1 bjh21 z1 -= borrow1;
448 1.1 bjh21 z0 -= borrow0;
449 1.1 bjh21 *z2Ptr = z2;
450 1.1 bjh21 *z1Ptr = z1;
451 1.1 bjh21 *z0Ptr = z0;
452 1.1 bjh21
453 1.1 bjh21 }
454 1.1 bjh21
455 1.1 bjh21 /*
456 1.1 bjh21 -------------------------------------------------------------------------------
457 1.1 bjh21 Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
458 1.1 bjh21 into two 64-bit pieces which are stored at the locations pointed to by
459 1.1 bjh21 `z0Ptr' and `z1Ptr'.
460 1.1 bjh21 -------------------------------------------------------------------------------
461 1.1 bjh21 */
462 1.1 bjh21 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
463 1.1 bjh21 {
464 1.1 bjh21 bits32 aHigh, aLow, bHigh, bLow;
465 1.1 bjh21 bits64 z0, zMiddleA, zMiddleB, z1;
466 1.1 bjh21
467 1.3 christos aLow = (bits32)a;
468 1.3 christos aHigh = (bits32)(a>>32);
469 1.3 christos bLow = (bits32)b;
470 1.3 christos bHigh = (bits32)(b>>32);
471 1.1 bjh21 z1 = ( (bits64) aLow ) * bLow;
472 1.1 bjh21 zMiddleA = ( (bits64) aLow ) * bHigh;
473 1.1 bjh21 zMiddleB = ( (bits64) aHigh ) * bLow;
474 1.1 bjh21 z0 = ( (bits64) aHigh ) * bHigh;
475 1.1 bjh21 zMiddleA += zMiddleB;
476 1.1 bjh21 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
477 1.1 bjh21 zMiddleA <<= 32;
478 1.1 bjh21 z1 += zMiddleA;
479 1.1 bjh21 z0 += ( z1 < zMiddleA );
480 1.1 bjh21 *z1Ptr = z1;
481 1.1 bjh21 *z0Ptr = z0;
482 1.1 bjh21
483 1.1 bjh21 }
484 1.1 bjh21
485 1.1 bjh21 /*
486 1.1 bjh21 -------------------------------------------------------------------------------
487 1.1 bjh21 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
488 1.1 bjh21 `b' to obtain a 192-bit product. The product is broken into three 64-bit
489 1.1 bjh21 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
490 1.1 bjh21 `z2Ptr'.
491 1.1 bjh21 -------------------------------------------------------------------------------
492 1.1 bjh21 */
493 1.1 bjh21 INLINE void
494 1.1 bjh21 mul128By64To192(
495 1.1 bjh21 bits64 a0,
496 1.1 bjh21 bits64 a1,
497 1.1 bjh21 bits64 b,
498 1.1 bjh21 bits64 *z0Ptr,
499 1.1 bjh21 bits64 *z1Ptr,
500 1.1 bjh21 bits64 *z2Ptr
501 1.1 bjh21 )
502 1.1 bjh21 {
503 1.1 bjh21 bits64 z0, z1, z2, more1;
504 1.1 bjh21
505 1.1 bjh21 mul64To128( a1, b, &z1, &z2 );
506 1.1 bjh21 mul64To128( a0, b, &z0, &more1 );
507 1.1 bjh21 add128( z0, more1, 0, z1, &z0, &z1 );
508 1.1 bjh21 *z2Ptr = z2;
509 1.1 bjh21 *z1Ptr = z1;
510 1.1 bjh21 *z0Ptr = z0;
511 1.1 bjh21
512 1.1 bjh21 }
513 1.1 bjh21
514 1.1 bjh21 /*
515 1.1 bjh21 -------------------------------------------------------------------------------
516 1.1 bjh21 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
517 1.1 bjh21 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
518 1.1 bjh21 product. The product is broken into four 64-bit pieces which are stored at
519 1.1 bjh21 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
520 1.1 bjh21 -------------------------------------------------------------------------------
521 1.1 bjh21 */
522 1.1 bjh21 INLINE void
523 1.1 bjh21 mul128To256(
524 1.1 bjh21 bits64 a0,
525 1.1 bjh21 bits64 a1,
526 1.1 bjh21 bits64 b0,
527 1.1 bjh21 bits64 b1,
528 1.1 bjh21 bits64 *z0Ptr,
529 1.1 bjh21 bits64 *z1Ptr,
530 1.1 bjh21 bits64 *z2Ptr,
531 1.1 bjh21 bits64 *z3Ptr
532 1.1 bjh21 )
533 1.1 bjh21 {
534 1.1 bjh21 bits64 z0, z1, z2, z3;
535 1.1 bjh21 bits64 more1, more2;
536 1.1 bjh21
537 1.1 bjh21 mul64To128( a1, b1, &z2, &z3 );
538 1.1 bjh21 mul64To128( a1, b0, &z1, &more2 );
539 1.1 bjh21 add128( z1, more2, 0, z2, &z1, &z2 );
540 1.1 bjh21 mul64To128( a0, b0, &z0, &more1 );
541 1.1 bjh21 add128( z0, more1, 0, z1, &z0, &z1 );
542 1.1 bjh21 mul64To128( a0, b1, &more1, &more2 );
543 1.1 bjh21 add128( more1, more2, 0, z2, &more1, &z2 );
544 1.1 bjh21 add128( z0, z1, 0, more1, &z0, &z1 );
545 1.1 bjh21 *z3Ptr = z3;
546 1.1 bjh21 *z2Ptr = z2;
547 1.1 bjh21 *z1Ptr = z1;
548 1.1 bjh21 *z0Ptr = z0;
549 1.1 bjh21
550 1.1 bjh21 }
551 1.1 bjh21
552 1.1 bjh21 /*
553 1.1 bjh21 -------------------------------------------------------------------------------
554 1.1 bjh21 Returns an approximation to the 64-bit integer quotient obtained by dividing
555 1.1 bjh21 `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
556 1.1 bjh21 divisor `b' must be at least 2^63. If q is the exact quotient truncated
557 1.1 bjh21 toward zero, the approximation returned lies between q and q + 2 inclusive.
558 1.1 bjh21 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
559 1.1 bjh21 unsigned integer is returned.
560 1.1 bjh21 -------------------------------------------------------------------------------
561 1.1 bjh21 */
562 1.1 bjh21 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
563 1.1 bjh21 {
564 1.1 bjh21 bits64 b0, b1;
565 1.1 bjh21 bits64 rem0, rem1, term0, term1;
566 1.1 bjh21 bits64 z;
567 1.1 bjh21
568 1.1 bjh21 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
569 1.1 bjh21 b0 = b>>32;
570 1.1 bjh21 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
571 1.1 bjh21 mul64To128( b, z, &term0, &term1 );
572 1.1 bjh21 sub128( a0, a1, term0, term1, &rem0, &rem1 );
573 1.1 bjh21 while ( ( (sbits64) rem0 ) < 0 ) {
574 1.1 bjh21 z -= LIT64( 0x100000000 );
575 1.1 bjh21 b1 = b<<32;
576 1.1 bjh21 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
577 1.1 bjh21 }
578 1.1 bjh21 rem0 = ( rem0<<32 ) | ( rem1>>32 );
579 1.1 bjh21 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
580 1.1 bjh21 return z;
581 1.1 bjh21
582 1.1 bjh21 }
583 1.1 bjh21
584 1.1 bjh21 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
585 1.1 bjh21 /*
586 1.1 bjh21 -------------------------------------------------------------------------------
587 1.1 bjh21 Returns an approximation to the square root of the 32-bit significand given
588 1.1 bjh21 by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
589 1.1 bjh21 `aExp' (the least significant bit) is 1, the integer returned approximates
590 1.1 bjh21 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
591 1.1 bjh21 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
592 1.1 bjh21 case, the approximation returned lies strictly within +/-2 of the exact
593 1.1 bjh21 value.
594 1.1 bjh21 -------------------------------------------------------------------------------
595 1.1 bjh21 */
596 1.1 bjh21 static bits32 estimateSqrt32( int16 aExp, bits32 a )
597 1.1 bjh21 {
598 1.1 bjh21 static const bits16 sqrtOddAdjustments[] = {
599 1.1 bjh21 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
600 1.1 bjh21 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
601 1.1 bjh21 };
602 1.1 bjh21 static const bits16 sqrtEvenAdjustments[] = {
603 1.1 bjh21 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
604 1.1 bjh21 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
605 1.1 bjh21 };
606 1.1 bjh21 int8 idx;
607 1.1 bjh21 bits32 z;
608 1.1 bjh21
609 1.1 bjh21 idx = ( a>>27 ) & 15;
610 1.1 bjh21 if ( aExp & 1 ) {
611 1.1 bjh21 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
612 1.1 bjh21 z = ( ( a / z )<<14 ) + ( z<<15 );
613 1.1 bjh21 a >>= 1;
614 1.1 bjh21 }
615 1.1 bjh21 else {
616 1.1 bjh21 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
617 1.1 bjh21 z = a / z + z;
618 1.1 bjh21 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
619 1.3 christos if ( z <= a ) return (bits32) ( ( (bits32) a )>>1 );
620 1.1 bjh21 }
621 1.1 bjh21 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
622 1.1 bjh21
623 1.1 bjh21 }
624 1.1 bjh21 #endif
625 1.1 bjh21
626 1.1 bjh21 /*
627 1.1 bjh21 -------------------------------------------------------------------------------
628 1.1 bjh21 Returns the number of leading 0 bits before the most-significant 1 bit of
629 1.1 bjh21 `a'. If `a' is zero, 32 is returned.
630 1.1 bjh21 -------------------------------------------------------------------------------
631 1.1 bjh21 */
632 1.1 bjh21 static int8 countLeadingZeros32( bits32 a )
633 1.1 bjh21 {
634 1.1 bjh21 static const int8 countLeadingZerosHigh[] = {
635 1.1 bjh21 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
636 1.1 bjh21 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
637 1.1 bjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
638 1.1 bjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
639 1.1 bjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
640 1.1 bjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
641 1.1 bjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
642 1.1 bjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
644 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
651 1.1 bjh21 };
652 1.1 bjh21 int8 shiftCount;
653 1.1 bjh21
654 1.1 bjh21 shiftCount = 0;
655 1.1 bjh21 if ( a < 0x10000 ) {
656 1.1 bjh21 shiftCount += 16;
657 1.1 bjh21 a <<= 16;
658 1.1 bjh21 }
659 1.1 bjh21 if ( a < 0x1000000 ) {
660 1.1 bjh21 shiftCount += 8;
661 1.1 bjh21 a <<= 8;
662 1.1 bjh21 }
663 1.1 bjh21 shiftCount += countLeadingZerosHigh[ a>>24 ];
664 1.1 bjh21 return shiftCount;
665 1.1 bjh21
666 1.1 bjh21 }
667 1.1 bjh21
668 1.1 bjh21 /*
669 1.1 bjh21 -------------------------------------------------------------------------------
670 1.1 bjh21 Returns the number of leading 0 bits before the most-significant 1 bit of
671 1.1 bjh21 `a'. If `a' is zero, 64 is returned.
672 1.1 bjh21 -------------------------------------------------------------------------------
673 1.1 bjh21 */
674 1.1 bjh21 static int8 countLeadingZeros64( bits64 a )
675 1.1 bjh21 {
676 1.1 bjh21 int8 shiftCount;
677 1.1 bjh21
678 1.1 bjh21 shiftCount = 0;
679 1.1 bjh21 if ( a < ( (bits64) 1 )<<32 ) {
680 1.1 bjh21 shiftCount += 32;
681 1.1 bjh21 }
682 1.1 bjh21 else {
683 1.1 bjh21 a >>= 32;
684 1.1 bjh21 }
685 1.3 christos shiftCount += (int8)countLeadingZeros32( (bits32)a );
686 1.1 bjh21 return shiftCount;
687 1.1 bjh21
688 1.1 bjh21 }
689 1.1 bjh21
690 1.1 bjh21 /*
691 1.1 bjh21 -------------------------------------------------------------------------------
692 1.1 bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
693 1.1 bjh21 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
694 1.1 bjh21 Otherwise, returns 0.
695 1.1 bjh21 -------------------------------------------------------------------------------
696 1.1 bjh21 */
697 1.1 bjh21 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
698 1.1 bjh21 {
699 1.1 bjh21
700 1.1 bjh21 return ( a0 == b0 ) && ( a1 == b1 );
701 1.1 bjh21
702 1.1 bjh21 }
703 1.1 bjh21
704 1.1 bjh21 /*
705 1.1 bjh21 -------------------------------------------------------------------------------
706 1.1 bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
707 1.1 bjh21 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
708 1.1 bjh21 Otherwise, returns 0.
709 1.1 bjh21 -------------------------------------------------------------------------------
710 1.1 bjh21 */
711 1.1 bjh21 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
712 1.1 bjh21 {
713 1.1 bjh21
714 1.1 bjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
715 1.1 bjh21
716 1.1 bjh21 }
717 1.1 bjh21
718 1.1 bjh21 /*
719 1.1 bjh21 -------------------------------------------------------------------------------
720 1.1 bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
721 1.1 bjh21 than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
722 1.1 bjh21 returns 0.
723 1.1 bjh21 -------------------------------------------------------------------------------
724 1.1 bjh21 */
725 1.1 bjh21 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
726 1.1 bjh21 {
727 1.1 bjh21
728 1.1 bjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
729 1.1 bjh21
730 1.1 bjh21 }
731 1.1 bjh21
732 1.1 bjh21 /*
733 1.1 bjh21 -------------------------------------------------------------------------------
734 1.1 bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
735 1.1 bjh21 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
736 1.1 bjh21 Otherwise, returns 0.
737 1.1 bjh21 -------------------------------------------------------------------------------
738 1.1 bjh21 */
739 1.1 bjh21 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
740 1.1 bjh21 {
741 1.1 bjh21
742 1.1 bjh21 return ( a0 != b0 ) || ( a1 != b1 );
743 1.1 bjh21
744 1.1 bjh21 }
745 1.1 bjh21
746