softfloat-macros.h revision 1.1 1 1.1 ross /* $NetBSD: softfloat-macros.h,v 1.1 2001/04/26 03:10:47 ross Exp $ */
2 1.1 ross
3 1.1 ross /*
4 1.1 ross ===============================================================================
5 1.1 ross
6 1.1 ross This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
7 1.1 ross Arithmetic Package, Release 2a.
8 1.1 ross
9 1.1 ross Written by John R. Hauser. This work was made possible in part by the
10 1.1 ross International Computer Science Institute, located at Suite 600, 1947 Center
11 1.1 ross Street, Berkeley, California 94704. Funding was partially provided by the
12 1.1 ross National Science Foundation under grant MIP-9311980. The original version
13 1.1 ross of this code was written as part of a project to build a fixed-point vector
14 1.1 ross processor in collaboration with the University of California at Berkeley,
15 1.1 ross overseen by Profs. Nelson Morgan and John Wawrzynek. More information
16 1.1 ross is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
17 1.1 ross arithmetic/SoftFloat.html'.
18 1.1 ross
19 1.1 ross THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
20 1.1 ross has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
21 1.1 ross TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
22 1.1 ross PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
23 1.1 ross AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
24 1.1 ross
25 1.1 ross Derivative works are acceptable, even for commercial purposes, so long as
26 1.1 ross (1) they include prominent notice that the work is derivative, and (2) they
27 1.1 ross include prominent notice akin to these four paragraphs for those parts of
28 1.1 ross this code that are retained.
29 1.1 ross
30 1.1 ross ===============================================================================
31 1.1 ross */
32 1.1 ross
33 1.1 ross /*
34 1.1 ross -------------------------------------------------------------------------------
35 1.1 ross Shifts `a' right by the number of bits given in `count'. If any nonzero
36 1.1 ross bits are shifted off, they are ``jammed'' into the least significant bit of
37 1.1 ross the result by setting the least significant bit to 1. The value of `count'
38 1.1 ross can be arbitrarily large; in particular, if `count' is greater than 32, the
39 1.1 ross result will be either 0 or 1, depending on whether `a' is zero or nonzero.
40 1.1 ross The result is stored in the location pointed to by `zPtr'.
41 1.1 ross -------------------------------------------------------------------------------
42 1.1 ross */
43 1.1 ross INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
44 1.1 ross {
45 1.1 ross bits32 z;
46 1.1 ross
47 1.1 ross if ( count == 0 ) {
48 1.1 ross z = a;
49 1.1 ross }
50 1.1 ross else if ( count < 32 ) {
51 1.1 ross z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
52 1.1 ross }
53 1.1 ross else {
54 1.1 ross z = ( a != 0 );
55 1.1 ross }
56 1.1 ross *zPtr = z;
57 1.1 ross
58 1.1 ross }
59 1.1 ross
60 1.1 ross /*
61 1.1 ross -------------------------------------------------------------------------------
62 1.1 ross Shifts `a' right by the number of bits given in `count'. If any nonzero
63 1.1 ross bits are shifted off, they are ``jammed'' into the least significant bit of
64 1.1 ross the result by setting the least significant bit to 1. The value of `count'
65 1.1 ross can be arbitrarily large; in particular, if `count' is greater than 64, the
66 1.1 ross result will be either 0 or 1, depending on whether `a' is zero or nonzero.
67 1.1 ross The result is stored in the location pointed to by `zPtr'.
68 1.1 ross -------------------------------------------------------------------------------
69 1.1 ross */
70 1.1 ross INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
71 1.1 ross {
72 1.1 ross bits64 z;
73 1.1 ross
74 1.1 ross if ( count == 0 ) {
75 1.1 ross z = a;
76 1.1 ross }
77 1.1 ross else if ( count < 64 ) {
78 1.1 ross z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
79 1.1 ross }
80 1.1 ross else {
81 1.1 ross z = ( a != 0 );
82 1.1 ross }
83 1.1 ross *zPtr = z;
84 1.1 ross
85 1.1 ross }
86 1.1 ross
87 1.1 ross /*
88 1.1 ross -------------------------------------------------------------------------------
89 1.1 ross Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
90 1.1 ross _plus_ the number of bits given in `count'. The shifted result is at most
91 1.1 ross 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
92 1.1 ross bits shifted off form a second 64-bit result as follows: The _last_ bit
93 1.1 ross shifted off is the most-significant bit of the extra result, and the other
94 1.1 ross 63 bits of the extra result are all zero if and only if _all_but_the_last_
95 1.1 ross bits shifted off were all zero. This extra result is stored in the location
96 1.1 ross pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
97 1.1 ross (This routine makes more sense if `a0' and `a1' are considered to form a
98 1.1 ross fixed-point value with binary point between `a0' and `a1'. This fixed-point
99 1.1 ross value is shifted right by the number of bits given in `count', and the
100 1.1 ross integer part of the result is returned at the location pointed to by
101 1.1 ross `z0Ptr'. The fractional part of the result may be slightly corrupted as
102 1.1 ross described above, and is returned at the location pointed to by `z1Ptr'.)
103 1.1 ross -------------------------------------------------------------------------------
104 1.1 ross */
105 1.1 ross INLINE void
106 1.1 ross shift64ExtraRightJamming(
107 1.1 ross bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
108 1.1 ross {
109 1.1 ross bits64 z0, z1;
110 1.1 ross int8 negCount = ( - count ) & 63;
111 1.1 ross
112 1.1 ross if ( count == 0 ) {
113 1.1 ross z1 = a1;
114 1.1 ross z0 = a0;
115 1.1 ross }
116 1.1 ross else if ( count < 64 ) {
117 1.1 ross z1 = ( a0<<negCount ) | ( a1 != 0 );
118 1.1 ross z0 = a0>>count;
119 1.1 ross }
120 1.1 ross else {
121 1.1 ross if ( count == 64 ) {
122 1.1 ross z1 = a0 | ( a1 != 0 );
123 1.1 ross }
124 1.1 ross else {
125 1.1 ross z1 = ( ( a0 | a1 ) != 0 );
126 1.1 ross }
127 1.1 ross z0 = 0;
128 1.1 ross }
129 1.1 ross *z1Ptr = z1;
130 1.1 ross *z0Ptr = z0;
131 1.1 ross
132 1.1 ross }
133 1.1 ross
134 1.1 ross /*
135 1.1 ross -------------------------------------------------------------------------------
136 1.1 ross Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
137 1.1 ross number of bits given in `count'. Any bits shifted off are lost. The value
138 1.1 ross of `count' can be arbitrarily large; in particular, if `count' is greater
139 1.1 ross than 128, the result will be 0. The result is broken into two 64-bit pieces
140 1.1 ross which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
141 1.1 ross -------------------------------------------------------------------------------
142 1.1 ross */
143 1.1 ross INLINE void
144 1.1 ross shift128Right(
145 1.1 ross bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
146 1.1 ross {
147 1.1 ross bits64 z0, z1;
148 1.1 ross int8 negCount = ( - count ) & 63;
149 1.1 ross
150 1.1 ross if ( count == 0 ) {
151 1.1 ross z1 = a1;
152 1.1 ross z0 = a0;
153 1.1 ross }
154 1.1 ross else if ( count < 64 ) {
155 1.1 ross z1 = ( a0<<negCount ) | ( a1>>count );
156 1.1 ross z0 = a0>>count;
157 1.1 ross }
158 1.1 ross else {
159 1.1 ross z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
160 1.1 ross z0 = 0;
161 1.1 ross }
162 1.1 ross *z1Ptr = z1;
163 1.1 ross *z0Ptr = z0;
164 1.1 ross
165 1.1 ross }
166 1.1 ross
167 1.1 ross /*
168 1.1 ross -------------------------------------------------------------------------------
169 1.1 ross Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170 1.1 ross number of bits given in `count'. If any nonzero bits are shifted off, they
171 1.1 ross are ``jammed'' into the least significant bit of the result by setting the
172 1.1 ross least significant bit to 1. The value of `count' can be arbitrarily large;
173 1.1 ross in particular, if `count' is greater than 128, the result will be either
174 1.1 ross 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175 1.1 ross nonzero. The result is broken into two 64-bit pieces which are stored at
176 1.1 ross the locations pointed to by `z0Ptr' and `z1Ptr'.
177 1.1 ross -------------------------------------------------------------------------------
178 1.1 ross */
179 1.1 ross INLINE void
180 1.1 ross shift128RightJamming(
181 1.1 ross bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
182 1.1 ross {
183 1.1 ross bits64 z0, z1;
184 1.1 ross int8 negCount = ( - count ) & 63;
185 1.1 ross
186 1.1 ross if ( count == 0 ) {
187 1.1 ross z1 = a1;
188 1.1 ross z0 = a0;
189 1.1 ross }
190 1.1 ross else if ( count < 64 ) {
191 1.1 ross z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
192 1.1 ross z0 = a0>>count;
193 1.1 ross }
194 1.1 ross else {
195 1.1 ross if ( count == 64 ) {
196 1.1 ross z1 = a0 | ( a1 != 0 );
197 1.1 ross }
198 1.1 ross else if ( count < 128 ) {
199 1.1 ross z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
200 1.1 ross }
201 1.1 ross else {
202 1.1 ross z1 = ( ( a0 | a1 ) != 0 );
203 1.1 ross }
204 1.1 ross z0 = 0;
205 1.1 ross }
206 1.1 ross *z1Ptr = z1;
207 1.1 ross *z0Ptr = z0;
208 1.1 ross
209 1.1 ross }
210 1.1 ross
211 1.1 ross /*
212 1.1 ross -------------------------------------------------------------------------------
213 1.1 ross Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
214 1.1 ross by 64 _plus_ the number of bits given in `count'. The shifted result is
215 1.1 ross at most 128 nonzero bits; these are broken into two 64-bit pieces which are
216 1.1 ross stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
217 1.1 ross off form a third 64-bit result as follows: The _last_ bit shifted off is
218 1.1 ross the most-significant bit of the extra result, and the other 63 bits of the
219 1.1 ross extra result are all zero if and only if _all_but_the_last_ bits shifted off
220 1.1 ross were all zero. This extra result is stored in the location pointed to by
221 1.1 ross `z2Ptr'. The value of `count' can be arbitrarily large.
222 1.1 ross (This routine makes more sense if `a0', `a1', and `a2' are considered
223 1.1 ross to form a fixed-point value with binary point between `a1' and `a2'. This
224 1.1 ross fixed-point value is shifted right by the number of bits given in `count',
225 1.1 ross and the integer part of the result is returned at the locations pointed to
226 1.1 ross by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
227 1.1 ross corrupted as described above, and is returned at the location pointed to by
228 1.1 ross `z2Ptr'.)
229 1.1 ross -------------------------------------------------------------------------------
230 1.1 ross */
231 1.1 ross INLINE void
232 1.1 ross shift128ExtraRightJamming(
233 1.1 ross bits64 a0,
234 1.1 ross bits64 a1,
235 1.1 ross bits64 a2,
236 1.1 ross int16 count,
237 1.1 ross bits64 *z0Ptr,
238 1.1 ross bits64 *z1Ptr,
239 1.1 ross bits64 *z2Ptr
240 1.1 ross )
241 1.1 ross {
242 1.1 ross bits64 z0, z1, z2;
243 1.1 ross int8 negCount = ( - count ) & 63;
244 1.1 ross
245 1.1 ross if ( count == 0 ) {
246 1.1 ross z2 = a2;
247 1.1 ross z1 = a1;
248 1.1 ross z0 = a0;
249 1.1 ross }
250 1.1 ross else {
251 1.1 ross if ( count < 64 ) {
252 1.1 ross z2 = a1<<negCount;
253 1.1 ross z1 = ( a0<<negCount ) | ( a1>>count );
254 1.1 ross z0 = a0>>count;
255 1.1 ross }
256 1.1 ross else {
257 1.1 ross if ( count == 64 ) {
258 1.1 ross z2 = a1;
259 1.1 ross z1 = a0;
260 1.1 ross }
261 1.1 ross else {
262 1.1 ross a2 |= a1;
263 1.1 ross if ( count < 128 ) {
264 1.1 ross z2 = a0<<negCount;
265 1.1 ross z1 = a0>>( count & 63 );
266 1.1 ross }
267 1.1 ross else {
268 1.1 ross z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
269 1.1 ross z1 = 0;
270 1.1 ross }
271 1.1 ross }
272 1.1 ross z0 = 0;
273 1.1 ross }
274 1.1 ross z2 |= ( a2 != 0 );
275 1.1 ross }
276 1.1 ross *z2Ptr = z2;
277 1.1 ross *z1Ptr = z1;
278 1.1 ross *z0Ptr = z0;
279 1.1 ross
280 1.1 ross }
281 1.1 ross
282 1.1 ross /*
283 1.1 ross -------------------------------------------------------------------------------
284 1.1 ross Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
285 1.1 ross number of bits given in `count'. Any bits shifted off are lost. The value
286 1.1 ross of `count' must be less than 64. The result is broken into two 64-bit
287 1.1 ross pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
288 1.1 ross -------------------------------------------------------------------------------
289 1.1 ross */
290 1.1 ross INLINE void
291 1.1 ross shortShift128Left(
292 1.1 ross bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
293 1.1 ross {
294 1.1 ross
295 1.1 ross *z1Ptr = a1<<count;
296 1.1 ross *z0Ptr =
297 1.1 ross ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
298 1.1 ross
299 1.1 ross }
300 1.1 ross
301 1.1 ross /*
302 1.1 ross -------------------------------------------------------------------------------
303 1.1 ross Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
304 1.1 ross by the number of bits given in `count'. Any bits shifted off are lost.
305 1.1 ross The value of `count' must be less than 64. The result is broken into three
306 1.1 ross 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
307 1.1 ross `z1Ptr', and `z2Ptr'.
308 1.1 ross -------------------------------------------------------------------------------
309 1.1 ross */
310 1.1 ross INLINE void
311 1.1 ross shortShift192Left(
312 1.1 ross bits64 a0,
313 1.1 ross bits64 a1,
314 1.1 ross bits64 a2,
315 1.1 ross int16 count,
316 1.1 ross bits64 *z0Ptr,
317 1.1 ross bits64 *z1Ptr,
318 1.1 ross bits64 *z2Ptr
319 1.1 ross )
320 1.1 ross {
321 1.1 ross bits64 z0, z1, z2;
322 1.1 ross int8 negCount;
323 1.1 ross
324 1.1 ross z2 = a2<<count;
325 1.1 ross z1 = a1<<count;
326 1.1 ross z0 = a0<<count;
327 1.1 ross if ( 0 < count ) {
328 1.1 ross negCount = ( ( - count ) & 63 );
329 1.1 ross z1 |= a2>>negCount;
330 1.1 ross z0 |= a1>>negCount;
331 1.1 ross }
332 1.1 ross *z2Ptr = z2;
333 1.1 ross *z1Ptr = z1;
334 1.1 ross *z0Ptr = z0;
335 1.1 ross
336 1.1 ross }
337 1.1 ross
338 1.1 ross /*
339 1.1 ross -------------------------------------------------------------------------------
340 1.1 ross Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
341 1.1 ross value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
342 1.1 ross any carry out is lost. The result is broken into two 64-bit pieces which
343 1.1 ross are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
344 1.1 ross -------------------------------------------------------------------------------
345 1.1 ross */
346 1.1 ross INLINE void
347 1.1 ross add128(
348 1.1 ross bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
349 1.1 ross {
350 1.1 ross bits64 z1;
351 1.1 ross
352 1.1 ross z1 = a1 + b1;
353 1.1 ross *z1Ptr = z1;
354 1.1 ross *z0Ptr = a0 + b0 + ( z1 < a1 );
355 1.1 ross
356 1.1 ross }
357 1.1 ross
358 1.1 ross /*
359 1.1 ross -------------------------------------------------------------------------------
360 1.1 ross Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
361 1.1 ross 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
362 1.1 ross modulo 2^192, so any carry out is lost. The result is broken into three
363 1.1 ross 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
364 1.1 ross `z1Ptr', and `z2Ptr'.
365 1.1 ross -------------------------------------------------------------------------------
366 1.1 ross */
367 1.1 ross INLINE void
368 1.1 ross add192(
369 1.1 ross bits64 a0,
370 1.1 ross bits64 a1,
371 1.1 ross bits64 a2,
372 1.1 ross bits64 b0,
373 1.1 ross bits64 b1,
374 1.1 ross bits64 b2,
375 1.1 ross bits64 *z0Ptr,
376 1.1 ross bits64 *z1Ptr,
377 1.1 ross bits64 *z2Ptr
378 1.1 ross )
379 1.1 ross {
380 1.1 ross bits64 z0, z1, z2;
381 1.1 ross int8 carry0, carry1;
382 1.1 ross
383 1.1 ross z2 = a2 + b2;
384 1.1 ross carry1 = ( z2 < a2 );
385 1.1 ross z1 = a1 + b1;
386 1.1 ross carry0 = ( z1 < a1 );
387 1.1 ross z0 = a0 + b0;
388 1.1 ross z1 += carry1;
389 1.1 ross z0 += ( z1 < carry1 );
390 1.1 ross z0 += carry0;
391 1.1 ross *z2Ptr = z2;
392 1.1 ross *z1Ptr = z1;
393 1.1 ross *z0Ptr = z0;
394 1.1 ross
395 1.1 ross }
396 1.1 ross
397 1.1 ross /*
398 1.1 ross -------------------------------------------------------------------------------
399 1.1 ross Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
400 1.1 ross 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
401 1.1 ross 2^128, so any borrow out (carry out) is lost. The result is broken into two
402 1.1 ross 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
403 1.1 ross `z1Ptr'.
404 1.1 ross -------------------------------------------------------------------------------
405 1.1 ross */
406 1.1 ross INLINE void
407 1.1 ross sub128(
408 1.1 ross bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
409 1.1 ross {
410 1.1 ross
411 1.1 ross *z1Ptr = a1 - b1;
412 1.1 ross *z0Ptr = a0 - b0 - ( a1 < b1 );
413 1.1 ross
414 1.1 ross }
415 1.1 ross
416 1.1 ross /*
417 1.1 ross -------------------------------------------------------------------------------
418 1.1 ross Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
419 1.1 ross from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
420 1.1 ross Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
421 1.1 ross result is broken into three 64-bit pieces which are stored at the locations
422 1.1 ross pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
423 1.1 ross -------------------------------------------------------------------------------
424 1.1 ross */
425 1.1 ross INLINE void
426 1.1 ross sub192(
427 1.1 ross bits64 a0,
428 1.1 ross bits64 a1,
429 1.1 ross bits64 a2,
430 1.1 ross bits64 b0,
431 1.1 ross bits64 b1,
432 1.1 ross bits64 b2,
433 1.1 ross bits64 *z0Ptr,
434 1.1 ross bits64 *z1Ptr,
435 1.1 ross bits64 *z2Ptr
436 1.1 ross )
437 1.1 ross {
438 1.1 ross bits64 z0, z1, z2;
439 1.1 ross int8 borrow0, borrow1;
440 1.1 ross
441 1.1 ross z2 = a2 - b2;
442 1.1 ross borrow1 = ( a2 < b2 );
443 1.1 ross z1 = a1 - b1;
444 1.1 ross borrow0 = ( a1 < b1 );
445 1.1 ross z0 = a0 - b0;
446 1.1 ross z0 -= ( z1 < borrow1 );
447 1.1 ross z1 -= borrow1;
448 1.1 ross z0 -= borrow0;
449 1.1 ross *z2Ptr = z2;
450 1.1 ross *z1Ptr = z1;
451 1.1 ross *z0Ptr = z0;
452 1.1 ross
453 1.1 ross }
454 1.1 ross
455 1.1 ross /*
456 1.1 ross -------------------------------------------------------------------------------
457 1.1 ross Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
458 1.1 ross into two 64-bit pieces which are stored at the locations pointed to by
459 1.1 ross `z0Ptr' and `z1Ptr'.
460 1.1 ross -------------------------------------------------------------------------------
461 1.1 ross */
462 1.1 ross INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
463 1.1 ross {
464 1.1 ross bits32 aHigh, aLow, bHigh, bLow;
465 1.1 ross bits64 z0, zMiddleA, zMiddleB, z1;
466 1.1 ross
467 1.1 ross aLow = a;
468 1.1 ross aHigh = a>>32;
469 1.1 ross bLow = b;
470 1.1 ross bHigh = b>>32;
471 1.1 ross z1 = ( (bits64) aLow ) * bLow;
472 1.1 ross zMiddleA = ( (bits64) aLow ) * bHigh;
473 1.1 ross zMiddleB = ( (bits64) aHigh ) * bLow;
474 1.1 ross z0 = ( (bits64) aHigh ) * bHigh;
475 1.1 ross zMiddleA += zMiddleB;
476 1.1 ross z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
477 1.1 ross zMiddleA <<= 32;
478 1.1 ross z1 += zMiddleA;
479 1.1 ross z0 += ( z1 < zMiddleA );
480 1.1 ross *z1Ptr = z1;
481 1.1 ross *z0Ptr = z0;
482 1.1 ross
483 1.1 ross }
484 1.1 ross
485 1.1 ross /*
486 1.1 ross -------------------------------------------------------------------------------
487 1.1 ross Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
488 1.1 ross `b' to obtain a 192-bit product. The product is broken into three 64-bit
489 1.1 ross pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
490 1.1 ross `z2Ptr'.
491 1.1 ross -------------------------------------------------------------------------------
492 1.1 ross */
493 1.1 ross INLINE void
494 1.1 ross mul128By64To192(
495 1.1 ross bits64 a0,
496 1.1 ross bits64 a1,
497 1.1 ross bits64 b,
498 1.1 ross bits64 *z0Ptr,
499 1.1 ross bits64 *z1Ptr,
500 1.1 ross bits64 *z2Ptr
501 1.1 ross )
502 1.1 ross {
503 1.1 ross bits64 z0, z1, z2, more1;
504 1.1 ross
505 1.1 ross mul64To128( a1, b, &z1, &z2 );
506 1.1 ross mul64To128( a0, b, &z0, &more1 );
507 1.1 ross add128( z0, more1, 0, z1, &z0, &z1 );
508 1.1 ross *z2Ptr = z2;
509 1.1 ross *z1Ptr = z1;
510 1.1 ross *z0Ptr = z0;
511 1.1 ross
512 1.1 ross }
513 1.1 ross
514 1.1 ross /*
515 1.1 ross -------------------------------------------------------------------------------
516 1.1 ross Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
517 1.1 ross 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
518 1.1 ross product. The product is broken into four 64-bit pieces which are stored at
519 1.1 ross the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
520 1.1 ross -------------------------------------------------------------------------------
521 1.1 ross */
522 1.1 ross INLINE void
523 1.1 ross mul128To256(
524 1.1 ross bits64 a0,
525 1.1 ross bits64 a1,
526 1.1 ross bits64 b0,
527 1.1 ross bits64 b1,
528 1.1 ross bits64 *z0Ptr,
529 1.1 ross bits64 *z1Ptr,
530 1.1 ross bits64 *z2Ptr,
531 1.1 ross bits64 *z3Ptr
532 1.1 ross )
533 1.1 ross {
534 1.1 ross bits64 z0, z1, z2, z3;
535 1.1 ross bits64 more1, more2;
536 1.1 ross
537 1.1 ross mul64To128( a1, b1, &z2, &z3 );
538 1.1 ross mul64To128( a1, b0, &z1, &more2 );
539 1.1 ross add128( z1, more2, 0, z2, &z1, &z2 );
540 1.1 ross mul64To128( a0, b0, &z0, &more1 );
541 1.1 ross add128( z0, more1, 0, z1, &z0, &z1 );
542 1.1 ross mul64To128( a0, b1, &more1, &more2 );
543 1.1 ross add128( more1, more2, 0, z2, &more1, &z2 );
544 1.1 ross add128( z0, z1, 0, more1, &z0, &z1 );
545 1.1 ross *z3Ptr = z3;
546 1.1 ross *z2Ptr = z2;
547 1.1 ross *z1Ptr = z1;
548 1.1 ross *z0Ptr = z0;
549 1.1 ross
550 1.1 ross }
551 1.1 ross
552 1.1 ross /*
553 1.1 ross -------------------------------------------------------------------------------
554 1.1 ross Returns an approximation to the 64-bit integer quotient obtained by dividing
555 1.1 ross `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
556 1.1 ross divisor `b' must be at least 2^63. If q is the exact quotient truncated
557 1.1 ross toward zero, the approximation returned lies between q and q + 2 inclusive.
558 1.1 ross If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
559 1.1 ross unsigned integer is returned.
560 1.1 ross -------------------------------------------------------------------------------
561 1.1 ross */
562 1.1 ross static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
563 1.1 ross {
564 1.1 ross bits64 b0, b1;
565 1.1 ross bits64 rem0, rem1, term0, term1;
566 1.1 ross bits64 z;
567 1.1 ross
568 1.1 ross if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
569 1.1 ross b0 = b>>32;
570 1.1 ross z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
571 1.1 ross mul64To128( b, z, &term0, &term1 );
572 1.1 ross sub128( a0, a1, term0, term1, &rem0, &rem1 );
573 1.1 ross while ( ( (sbits64) rem0 ) < 0 ) {
574 1.1 ross z -= LIT64( 0x100000000 );
575 1.1 ross b1 = b<<32;
576 1.1 ross add128( rem0, rem1, b0, b1, &rem0, &rem1 );
577 1.1 ross }
578 1.1 ross rem0 = ( rem0<<32 ) | ( rem1>>32 );
579 1.1 ross z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
580 1.1 ross return z;
581 1.1 ross
582 1.1 ross }
583 1.1 ross
584 1.1 ross #ifndef SOFTFLOAT_FOR_GCC /* Not used */
585 1.1 ross /*
586 1.1 ross -------------------------------------------------------------------------------
587 1.1 ross Returns an approximation to the square root of the 32-bit significand given
588 1.1 ross by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
589 1.1 ross `aExp' (the least significant bit) is 1, the integer returned approximates
590 1.1 ross 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
591 1.1 ross is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
592 1.1 ross case, the approximation returned lies strictly within +/-2 of the exact
593 1.1 ross value.
594 1.1 ross -------------------------------------------------------------------------------
595 1.1 ross */
596 1.1 ross static bits32 estimateSqrt32( int16 aExp, bits32 a )
597 1.1 ross {
598 1.1 ross static const bits16 sqrtOddAdjustments[] = {
599 1.1 ross 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
600 1.1 ross 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
601 1.1 ross };
602 1.1 ross static const bits16 sqrtEvenAdjustments[] = {
603 1.1 ross 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
604 1.1 ross 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
605 1.1 ross };
606 1.1 ross int8 index;
607 1.1 ross bits32 z;
608 1.1 ross
609 1.1 ross index = ( a>>27 ) & 15;
610 1.1 ross if ( aExp & 1 ) {
611 1.1 ross z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
612 1.1 ross z = ( ( a / z )<<14 ) + ( z<<15 );
613 1.1 ross a >>= 1;
614 1.1 ross }
615 1.1 ross else {
616 1.1 ross z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
617 1.1 ross z = a / z + z;
618 1.1 ross z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
619 1.1 ross if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
620 1.1 ross }
621 1.1 ross return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
622 1.1 ross
623 1.1 ross }
624 1.1 ross #endif
625 1.1 ross
626 1.1 ross /*
627 1.1 ross -------------------------------------------------------------------------------
628 1.1 ross Returns the number of leading 0 bits before the most-significant 1 bit of
629 1.1 ross `a'. If `a' is zero, 32 is returned.
630 1.1 ross -------------------------------------------------------------------------------
631 1.1 ross */
632 1.1 ross static int8 countLeadingZeros32( bits32 a )
633 1.1 ross {
634 1.1 ross static const int8 countLeadingZerosHigh[] = {
635 1.1 ross 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
636 1.1 ross 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
637 1.1 ross 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
638 1.1 ross 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
639 1.1 ross 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
640 1.1 ross 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
641 1.1 ross 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
642 1.1 ross 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643 1.1 ross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
644 1.1 ross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645 1.1 ross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646 1.1 ross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647 1.1 ross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648 1.1 ross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649 1.1 ross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650 1.1 ross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
651 1.1 ross };
652 1.1 ross int8 shiftCount;
653 1.1 ross
654 1.1 ross shiftCount = 0;
655 1.1 ross if ( a < 0x10000 ) {
656 1.1 ross shiftCount += 16;
657 1.1 ross a <<= 16;
658 1.1 ross }
659 1.1 ross if ( a < 0x1000000 ) {
660 1.1 ross shiftCount += 8;
661 1.1 ross a <<= 8;
662 1.1 ross }
663 1.1 ross shiftCount += countLeadingZerosHigh[ a>>24 ];
664 1.1 ross return shiftCount;
665 1.1 ross
666 1.1 ross }
667 1.1 ross
668 1.1 ross /*
669 1.1 ross -------------------------------------------------------------------------------
670 1.1 ross Returns the number of leading 0 bits before the most-significant 1 bit of
671 1.1 ross `a'. If `a' is zero, 64 is returned.
672 1.1 ross -------------------------------------------------------------------------------
673 1.1 ross */
674 1.1 ross static int8 countLeadingZeros64( bits64 a )
675 1.1 ross {
676 1.1 ross int8 shiftCount;
677 1.1 ross
678 1.1 ross shiftCount = 0;
679 1.1 ross if ( a < ( (bits64) 1 )<<32 ) {
680 1.1 ross shiftCount += 32;
681 1.1 ross }
682 1.1 ross else {
683 1.1 ross a >>= 32;
684 1.1 ross }
685 1.1 ross shiftCount += countLeadingZeros32( a );
686 1.1 ross return shiftCount;
687 1.1 ross
688 1.1 ross }
689 1.1 ross
690 1.1 ross /*
691 1.1 ross -------------------------------------------------------------------------------
692 1.1 ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
693 1.1 ross is equal to the 128-bit value formed by concatenating `b0' and `b1'.
694 1.1 ross Otherwise, returns 0.
695 1.1 ross -------------------------------------------------------------------------------
696 1.1 ross */
697 1.1 ross INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
698 1.1 ross {
699 1.1 ross
700 1.1 ross return ( a0 == b0 ) && ( a1 == b1 );
701 1.1 ross
702 1.1 ross }
703 1.1 ross
704 1.1 ross /*
705 1.1 ross -------------------------------------------------------------------------------
706 1.1 ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
707 1.1 ross than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
708 1.1 ross Otherwise, returns 0.
709 1.1 ross -------------------------------------------------------------------------------
710 1.1 ross */
711 1.1 ross INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
712 1.1 ross {
713 1.1 ross
714 1.1 ross return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
715 1.1 ross
716 1.1 ross }
717 1.1 ross
718 1.1 ross /*
719 1.1 ross -------------------------------------------------------------------------------
720 1.1 ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
721 1.1 ross than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
722 1.1 ross returns 0.
723 1.1 ross -------------------------------------------------------------------------------
724 1.1 ross */
725 1.1 ross INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
726 1.1 ross {
727 1.1 ross
728 1.1 ross return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
729 1.1 ross
730 1.1 ross }
731 1.1 ross
732 1.1 ross /*
733 1.1 ross -------------------------------------------------------------------------------
734 1.1 ross Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
735 1.1 ross not equal to the 128-bit value formed by concatenating `b0' and `b1'.
736 1.1 ross Otherwise, returns 0.
737 1.1 ross -------------------------------------------------------------------------------
738 1.1 ross */
739 1.1 ross INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
740 1.1 ross {
741 1.1 ross
742 1.1 ross return ( a0 != b0 ) || ( a1 != b1 );
743 1.1 ross
744 1.1 ross }
745 1.1 ross
746