1 1.3 christos /* $NetBSD: softfloat-macros,v 1.3 2012/03/21 02:32:26 christos Exp $ */ 2 1.1 bjh21 3 1.1 bjh21 /* 4 1.1 bjh21 =============================================================================== 5 1.1 bjh21 6 1.1 bjh21 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 7 1.1 bjh21 Arithmetic Package, Release 2a. 8 1.1 bjh21 9 1.1 bjh21 Written by John R. Hauser. This work was made possible in part by the 10 1.1 bjh21 International Computer Science Institute, located at Suite 600, 1947 Center 11 1.1 bjh21 Street, Berkeley, California 94704. Funding was partially provided by the 12 1.1 bjh21 National Science Foundation under grant MIP-9311980. The original version 13 1.1 bjh21 of this code was written as part of a project to build a fixed-point vector 14 1.1 bjh21 processor in collaboration with the University of California at Berkeley, 15 1.1 bjh21 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 16 1.1 bjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 17 1.1 bjh21 arithmetic/SoftFloat.html'. 18 1.1 bjh21 19 1.1 bjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 20 1.1 bjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 21 1.1 bjh21 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 22 1.1 bjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 23 1.1 bjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 24 1.1 bjh21 25 1.1 bjh21 Derivative works are acceptable, even for commercial purposes, so long as 26 1.1 bjh21 (1) they include prominent notice that the work is derivative, and (2) they 27 1.1 bjh21 include prominent notice akin to these four paragraphs for those parts of 28 1.1 bjh21 this code that are retained. 29 1.1 bjh21 30 1.1 bjh21 =============================================================================== 31 1.1 bjh21 */ 32 1.1 bjh21 33 1.1 bjh21 /* 34 1.1 bjh21 ------------------------------------------------------------------------------- 35 1.1 bjh21 Shifts `a' right by the number of bits given in `count'. If any nonzero 36 1.1 bjh21 bits are shifted off, they are ``jammed'' into the least significant bit of 37 1.1 bjh21 the result by setting the least significant bit to 1. The value of `count' 38 1.1 bjh21 can be arbitrarily large; in particular, if `count' is greater than 32, the 39 1.1 bjh21 result will be either 0 or 1, depending on whether `a' is zero or nonzero. 40 1.1 bjh21 The result is stored in the location pointed to by `zPtr'. 41 1.1 bjh21 ------------------------------------------------------------------------------- 42 1.1 bjh21 */ 43 1.1 bjh21 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) 44 1.1 bjh21 { 45 1.1 bjh21 bits32 z; 46 1.1 bjh21 47 1.1 bjh21 if ( count == 0 ) { 48 1.1 bjh21 z = a; 49 1.1 bjh21 } 50 1.1 bjh21 else if ( count < 32 ) { 51 1.1 bjh21 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 52 1.1 bjh21 } 53 1.1 bjh21 else { 54 1.1 bjh21 z = ( a != 0 ); 55 1.1 bjh21 } 56 1.1 bjh21 *zPtr = z; 57 1.1 bjh21 58 1.1 bjh21 } 59 1.1 bjh21 60 1.1 bjh21 /* 61 1.1 bjh21 ------------------------------------------------------------------------------- 62 1.1 bjh21 Shifts `a' right by the number of bits given in `count'. If any nonzero 63 1.1 bjh21 bits are shifted off, they are ``jammed'' into the least significant bit of 64 1.1 bjh21 the result by setting the least significant bit to 1. The value of `count' 65 1.1 bjh21 can be arbitrarily large; in particular, if `count' is greater than 64, the 66 1.1 bjh21 result will be either 0 or 1, depending on whether `a' is zero or nonzero. 67 1.1 bjh21 The result is stored in the location pointed to by `zPtr'. 68 1.1 bjh21 ------------------------------------------------------------------------------- 69 1.1 bjh21 */ 70 1.1 bjh21 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) 71 1.1 bjh21 { 72 1.1 bjh21 bits64 z; 73 1.1 bjh21 74 1.1 bjh21 if ( count == 0 ) { 75 1.1 bjh21 z = a; 76 1.1 bjh21 } 77 1.1 bjh21 else if ( count < 64 ) { 78 1.1 bjh21 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 79 1.1 bjh21 } 80 1.1 bjh21 else { 81 1.1 bjh21 z = ( a != 0 ); 82 1.1 bjh21 } 83 1.1 bjh21 *zPtr = z; 84 1.1 bjh21 85 1.1 bjh21 } 86 1.1 bjh21 87 1.1 bjh21 /* 88 1.1 bjh21 ------------------------------------------------------------------------------- 89 1.1 bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 90 1.1 bjh21 _plus_ the number of bits given in `count'. The shifted result is at most 91 1.1 bjh21 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 92 1.1 bjh21 bits shifted off form a second 64-bit result as follows: The _last_ bit 93 1.1 bjh21 shifted off is the most-significant bit of the extra result, and the other 94 1.1 bjh21 63 bits of the extra result are all zero if and only if _all_but_the_last_ 95 1.1 bjh21 bits shifted off were all zero. This extra result is stored in the location 96 1.1 bjh21 pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 97 1.1 bjh21 (This routine makes more sense if `a0' and `a1' are considered to form a 98 1.1 bjh21 fixed-point value with binary point between `a0' and `a1'. This fixed-point 99 1.1 bjh21 value is shifted right by the number of bits given in `count', and the 100 1.1 bjh21 integer part of the result is returned at the location pointed to by 101 1.1 bjh21 `z0Ptr'. The fractional part of the result may be slightly corrupted as 102 1.1 bjh21 described above, and is returned at the location pointed to by `z1Ptr'.) 103 1.1 bjh21 ------------------------------------------------------------------------------- 104 1.1 bjh21 */ 105 1.1 bjh21 INLINE void 106 1.1 bjh21 shift64ExtraRightJamming( 107 1.1 bjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 108 1.1 bjh21 { 109 1.1 bjh21 bits64 z0, z1; 110 1.1 bjh21 int8 negCount = ( - count ) & 63; 111 1.1 bjh21 112 1.1 bjh21 if ( count == 0 ) { 113 1.1 bjh21 z1 = a1; 114 1.1 bjh21 z0 = a0; 115 1.1 bjh21 } 116 1.1 bjh21 else if ( count < 64 ) { 117 1.1 bjh21 z1 = ( a0<<negCount ) | ( a1 != 0 ); 118 1.1 bjh21 z0 = a0>>count; 119 1.1 bjh21 } 120 1.1 bjh21 else { 121 1.1 bjh21 if ( count == 64 ) { 122 1.1 bjh21 z1 = a0 | ( a1 != 0 ); 123 1.1 bjh21 } 124 1.1 bjh21 else { 125 1.1 bjh21 z1 = ( ( a0 | a1 ) != 0 ); 126 1.1 bjh21 } 127 1.1 bjh21 z0 = 0; 128 1.1 bjh21 } 129 1.1 bjh21 *z1Ptr = z1; 130 1.1 bjh21 *z0Ptr = z0; 131 1.1 bjh21 132 1.1 bjh21 } 133 1.1 bjh21 134 1.1 bjh21 /* 135 1.1 bjh21 ------------------------------------------------------------------------------- 136 1.1 bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 137 1.1 bjh21 number of bits given in `count'. Any bits shifted off are lost. The value 138 1.1 bjh21 of `count' can be arbitrarily large; in particular, if `count' is greater 139 1.1 bjh21 than 128, the result will be 0. The result is broken into two 64-bit pieces 140 1.1 bjh21 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 141 1.1 bjh21 ------------------------------------------------------------------------------- 142 1.1 bjh21 */ 143 1.1 bjh21 INLINE void 144 1.1 bjh21 shift128Right( 145 1.1 bjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 146 1.1 bjh21 { 147 1.1 bjh21 bits64 z0, z1; 148 1.1 bjh21 int8 negCount = ( - count ) & 63; 149 1.1 bjh21 150 1.1 bjh21 if ( count == 0 ) { 151 1.1 bjh21 z1 = a1; 152 1.1 bjh21 z0 = a0; 153 1.1 bjh21 } 154 1.1 bjh21 else if ( count < 64 ) { 155 1.1 bjh21 z1 = ( a0<<negCount ) | ( a1>>count ); 156 1.1 bjh21 z0 = a0>>count; 157 1.1 bjh21 } 158 1.1 bjh21 else { 159 1.1 bjh21 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; 160 1.1 bjh21 z0 = 0; 161 1.1 bjh21 } 162 1.1 bjh21 *z1Ptr = z1; 163 1.1 bjh21 *z0Ptr = z0; 164 1.1 bjh21 165 1.1 bjh21 } 166 1.1 bjh21 167 1.1 bjh21 /* 168 1.1 bjh21 ------------------------------------------------------------------------------- 169 1.1 bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 170 1.1 bjh21 number of bits given in `count'. If any nonzero bits are shifted off, they 171 1.1 bjh21 are ``jammed'' into the least significant bit of the result by setting the 172 1.1 bjh21 least significant bit to 1. The value of `count' can be arbitrarily large; 173 1.1 bjh21 in particular, if `count' is greater than 128, the result will be either 174 1.1 bjh21 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 175 1.1 bjh21 nonzero. The result is broken into two 64-bit pieces which are stored at 176 1.1 bjh21 the locations pointed to by `z0Ptr' and `z1Ptr'. 177 1.1 bjh21 ------------------------------------------------------------------------------- 178 1.1 bjh21 */ 179 1.1 bjh21 INLINE void 180 1.1 bjh21 shift128RightJamming( 181 1.1 bjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 182 1.1 bjh21 { 183 1.1 bjh21 bits64 z0, z1; 184 1.1 bjh21 int8 negCount = ( - count ) & 63; 185 1.1 bjh21 186 1.1 bjh21 if ( count == 0 ) { 187 1.1 bjh21 z1 = a1; 188 1.1 bjh21 z0 = a0; 189 1.1 bjh21 } 190 1.1 bjh21 else if ( count < 64 ) { 191 1.1 bjh21 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 192 1.1 bjh21 z0 = a0>>count; 193 1.1 bjh21 } 194 1.1 bjh21 else { 195 1.1 bjh21 if ( count == 64 ) { 196 1.1 bjh21 z1 = a0 | ( a1 != 0 ); 197 1.1 bjh21 } 198 1.1 bjh21 else if ( count < 128 ) { 199 1.1 bjh21 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 200 1.1 bjh21 } 201 1.1 bjh21 else { 202 1.1 bjh21 z1 = ( ( a0 | a1 ) != 0 ); 203 1.1 bjh21 } 204 1.1 bjh21 z0 = 0; 205 1.1 bjh21 } 206 1.1 bjh21 *z1Ptr = z1; 207 1.1 bjh21 *z0Ptr = z0; 208 1.1 bjh21 209 1.1 bjh21 } 210 1.1 bjh21 211 1.1 bjh21 /* 212 1.1 bjh21 ------------------------------------------------------------------------------- 213 1.1 bjh21 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 214 1.1 bjh21 by 64 _plus_ the number of bits given in `count'. The shifted result is 215 1.1 bjh21 at most 128 nonzero bits; these are broken into two 64-bit pieces which are 216 1.1 bjh21 stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 217 1.1 bjh21 off form a third 64-bit result as follows: The _last_ bit shifted off is 218 1.1 bjh21 the most-significant bit of the extra result, and the other 63 bits of the 219 1.1 bjh21 extra result are all zero if and only if _all_but_the_last_ bits shifted off 220 1.1 bjh21 were all zero. This extra result is stored in the location pointed to by 221 1.1 bjh21 `z2Ptr'. The value of `count' can be arbitrarily large. 222 1.1 bjh21 (This routine makes more sense if `a0', `a1', and `a2' are considered 223 1.1 bjh21 to form a fixed-point value with binary point between `a1' and `a2'. This 224 1.1 bjh21 fixed-point value is shifted right by the number of bits given in `count', 225 1.1 bjh21 and the integer part of the result is returned at the locations pointed to 226 1.1 bjh21 by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 227 1.1 bjh21 corrupted as described above, and is returned at the location pointed to by 228 1.1 bjh21 `z2Ptr'.) 229 1.1 bjh21 ------------------------------------------------------------------------------- 230 1.1 bjh21 */ 231 1.1 bjh21 INLINE void 232 1.1 bjh21 shift128ExtraRightJamming( 233 1.1 bjh21 bits64 a0, 234 1.1 bjh21 bits64 a1, 235 1.1 bjh21 bits64 a2, 236 1.1 bjh21 int16 count, 237 1.1 bjh21 bits64 *z0Ptr, 238 1.1 bjh21 bits64 *z1Ptr, 239 1.1 bjh21 bits64 *z2Ptr 240 1.1 bjh21 ) 241 1.1 bjh21 { 242 1.1 bjh21 bits64 z0, z1, z2; 243 1.1 bjh21 int8 negCount = ( - count ) & 63; 244 1.1 bjh21 245 1.1 bjh21 if ( count == 0 ) { 246 1.1 bjh21 z2 = a2; 247 1.1 bjh21 z1 = a1; 248 1.1 bjh21 z0 = a0; 249 1.1 bjh21 } 250 1.1 bjh21 else { 251 1.1 bjh21 if ( count < 64 ) { 252 1.1 bjh21 z2 = a1<<negCount; 253 1.1 bjh21 z1 = ( a0<<negCount ) | ( a1>>count ); 254 1.1 bjh21 z0 = a0>>count; 255 1.1 bjh21 } 256 1.1 bjh21 else { 257 1.1 bjh21 if ( count == 64 ) { 258 1.1 bjh21 z2 = a1; 259 1.1 bjh21 z1 = a0; 260 1.1 bjh21 } 261 1.1 bjh21 else { 262 1.1 bjh21 a2 |= a1; 263 1.1 bjh21 if ( count < 128 ) { 264 1.1 bjh21 z2 = a0<<negCount; 265 1.1 bjh21 z1 = a0>>( count & 63 ); 266 1.1 bjh21 } 267 1.1 bjh21 else { 268 1.1 bjh21 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 269 1.1 bjh21 z1 = 0; 270 1.1 bjh21 } 271 1.1 bjh21 } 272 1.1 bjh21 z0 = 0; 273 1.1 bjh21 } 274 1.1 bjh21 z2 |= ( a2 != 0 ); 275 1.1 bjh21 } 276 1.1 bjh21 *z2Ptr = z2; 277 1.1 bjh21 *z1Ptr = z1; 278 1.1 bjh21 *z0Ptr = z0; 279 1.1 bjh21 280 1.1 bjh21 } 281 1.1 bjh21 282 1.1 bjh21 /* 283 1.1 bjh21 ------------------------------------------------------------------------------- 284 1.1 bjh21 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 285 1.1 bjh21 number of bits given in `count'. Any bits shifted off are lost. The value 286 1.1 bjh21 of `count' must be less than 64. The result is broken into two 64-bit 287 1.1 bjh21 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 288 1.1 bjh21 ------------------------------------------------------------------------------- 289 1.1 bjh21 */ 290 1.1 bjh21 INLINE void 291 1.1 bjh21 shortShift128Left( 292 1.1 bjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 293 1.1 bjh21 { 294 1.1 bjh21 295 1.1 bjh21 *z1Ptr = a1<<count; 296 1.1 bjh21 *z0Ptr = 297 1.1 bjh21 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 298 1.1 bjh21 299 1.1 bjh21 } 300 1.1 bjh21 301 1.1 bjh21 /* 302 1.1 bjh21 ------------------------------------------------------------------------------- 303 1.1 bjh21 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 304 1.1 bjh21 by the number of bits given in `count'. Any bits shifted off are lost. 305 1.1 bjh21 The value of `count' must be less than 64. The result is broken into three 306 1.1 bjh21 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 307 1.1 bjh21 `z1Ptr', and `z2Ptr'. 308 1.1 bjh21 ------------------------------------------------------------------------------- 309 1.1 bjh21 */ 310 1.1 bjh21 INLINE void 311 1.1 bjh21 shortShift192Left( 312 1.1 bjh21 bits64 a0, 313 1.1 bjh21 bits64 a1, 314 1.1 bjh21 bits64 a2, 315 1.1 bjh21 int16 count, 316 1.1 bjh21 bits64 *z0Ptr, 317 1.1 bjh21 bits64 *z1Ptr, 318 1.1 bjh21 bits64 *z2Ptr 319 1.1 bjh21 ) 320 1.1 bjh21 { 321 1.1 bjh21 bits64 z0, z1, z2; 322 1.1 bjh21 int8 negCount; 323 1.1 bjh21 324 1.1 bjh21 z2 = a2<<count; 325 1.1 bjh21 z1 = a1<<count; 326 1.1 bjh21 z0 = a0<<count; 327 1.1 bjh21 if ( 0 < count ) { 328 1.1 bjh21 negCount = ( ( - count ) & 63 ); 329 1.1 bjh21 z1 |= a2>>negCount; 330 1.1 bjh21 z0 |= a1>>negCount; 331 1.1 bjh21 } 332 1.1 bjh21 *z2Ptr = z2; 333 1.1 bjh21 *z1Ptr = z1; 334 1.1 bjh21 *z0Ptr = z0; 335 1.1 bjh21 336 1.1 bjh21 } 337 1.1 bjh21 338 1.1 bjh21 /* 339 1.1 bjh21 ------------------------------------------------------------------------------- 340 1.1 bjh21 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 341 1.1 bjh21 value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 342 1.1 bjh21 any carry out is lost. The result is broken into two 64-bit pieces which 343 1.1 bjh21 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 344 1.1 bjh21 ------------------------------------------------------------------------------- 345 1.1 bjh21 */ 346 1.1 bjh21 INLINE void 347 1.1 bjh21 add128( 348 1.1 bjh21 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 349 1.1 bjh21 { 350 1.1 bjh21 bits64 z1; 351 1.1 bjh21 352 1.1 bjh21 z1 = a1 + b1; 353 1.1 bjh21 *z1Ptr = z1; 354 1.1 bjh21 *z0Ptr = a0 + b0 + ( z1 < a1 ); 355 1.1 bjh21 356 1.1 bjh21 } 357 1.1 bjh21 358 1.1 bjh21 /* 359 1.1 bjh21 ------------------------------------------------------------------------------- 360 1.1 bjh21 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 361 1.1 bjh21 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 362 1.1 bjh21 modulo 2^192, so any carry out is lost. The result is broken into three 363 1.1 bjh21 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 364 1.1 bjh21 `z1Ptr', and `z2Ptr'. 365 1.1 bjh21 ------------------------------------------------------------------------------- 366 1.1 bjh21 */ 367 1.1 bjh21 INLINE void 368 1.1 bjh21 add192( 369 1.1 bjh21 bits64 a0, 370 1.1 bjh21 bits64 a1, 371 1.1 bjh21 bits64 a2, 372 1.1 bjh21 bits64 b0, 373 1.1 bjh21 bits64 b1, 374 1.1 bjh21 bits64 b2, 375 1.1 bjh21 bits64 *z0Ptr, 376 1.1 bjh21 bits64 *z1Ptr, 377 1.1 bjh21 bits64 *z2Ptr 378 1.1 bjh21 ) 379 1.1 bjh21 { 380 1.1 bjh21 bits64 z0, z1, z2; 381 1.1 bjh21 int8 carry0, carry1; 382 1.1 bjh21 383 1.1 bjh21 z2 = a2 + b2; 384 1.1 bjh21 carry1 = ( z2 < a2 ); 385 1.1 bjh21 z1 = a1 + b1; 386 1.1 bjh21 carry0 = ( z1 < a1 ); 387 1.1 bjh21 z0 = a0 + b0; 388 1.1 bjh21 z1 += carry1; 389 1.2 tron z0 += ( z1 < (bits64)carry1 ); 390 1.1 bjh21 z0 += carry0; 391 1.1 bjh21 *z2Ptr = z2; 392 1.1 bjh21 *z1Ptr = z1; 393 1.1 bjh21 *z0Ptr = z0; 394 1.1 bjh21 395 1.1 bjh21 } 396 1.1 bjh21 397 1.1 bjh21 /* 398 1.1 bjh21 ------------------------------------------------------------------------------- 399 1.1 bjh21 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 400 1.1 bjh21 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 401 1.1 bjh21 2^128, so any borrow out (carry out) is lost. The result is broken into two 402 1.1 bjh21 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 403 1.1 bjh21 `z1Ptr'. 404 1.1 bjh21 ------------------------------------------------------------------------------- 405 1.1 bjh21 */ 406 1.1 bjh21 INLINE void 407 1.1 bjh21 sub128( 408 1.1 bjh21 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 409 1.1 bjh21 { 410 1.1 bjh21 411 1.1 bjh21 *z1Ptr = a1 - b1; 412 1.1 bjh21 *z0Ptr = a0 - b0 - ( a1 < b1 ); 413 1.1 bjh21 414 1.1 bjh21 } 415 1.1 bjh21 416 1.1 bjh21 /* 417 1.1 bjh21 ------------------------------------------------------------------------------- 418 1.1 bjh21 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 419 1.1 bjh21 from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 420 1.1 bjh21 Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 421 1.1 bjh21 result is broken into three 64-bit pieces which are stored at the locations 422 1.1 bjh21 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 423 1.1 bjh21 ------------------------------------------------------------------------------- 424 1.1 bjh21 */ 425 1.1 bjh21 INLINE void 426 1.1 bjh21 sub192( 427 1.1 bjh21 bits64 a0, 428 1.1 bjh21 bits64 a1, 429 1.1 bjh21 bits64 a2, 430 1.1 bjh21 bits64 b0, 431 1.1 bjh21 bits64 b1, 432 1.1 bjh21 bits64 b2, 433 1.1 bjh21 bits64 *z0Ptr, 434 1.1 bjh21 bits64 *z1Ptr, 435 1.1 bjh21 bits64 *z2Ptr 436 1.1 bjh21 ) 437 1.1 bjh21 { 438 1.1 bjh21 bits64 z0, z1, z2; 439 1.1 bjh21 int8 borrow0, borrow1; 440 1.1 bjh21 441 1.1 bjh21 z2 = a2 - b2; 442 1.1 bjh21 borrow1 = ( a2 < b2 ); 443 1.1 bjh21 z1 = a1 - b1; 444 1.1 bjh21 borrow0 = ( a1 < b1 ); 445 1.1 bjh21 z0 = a0 - b0; 446 1.2 tron z0 -= ( z1 < (bits64)borrow1 ); 447 1.1 bjh21 z1 -= borrow1; 448 1.1 bjh21 z0 -= borrow0; 449 1.1 bjh21 *z2Ptr = z2; 450 1.1 bjh21 *z1Ptr = z1; 451 1.1 bjh21 *z0Ptr = z0; 452 1.1 bjh21 453 1.1 bjh21 } 454 1.1 bjh21 455 1.1 bjh21 /* 456 1.1 bjh21 ------------------------------------------------------------------------------- 457 1.1 bjh21 Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 458 1.1 bjh21 into two 64-bit pieces which are stored at the locations pointed to by 459 1.1 bjh21 `z0Ptr' and `z1Ptr'. 460 1.1 bjh21 ------------------------------------------------------------------------------- 461 1.1 bjh21 */ 462 1.1 bjh21 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) 463 1.1 bjh21 { 464 1.1 bjh21 bits32 aHigh, aLow, bHigh, bLow; 465 1.1 bjh21 bits64 z0, zMiddleA, zMiddleB, z1; 466 1.1 bjh21 467 1.3 christos aLow = (bits32)a; 468 1.3 christos aHigh = (bits32)(a>>32); 469 1.3 christos bLow = (bits32)b; 470 1.3 christos bHigh = (bits32)(b>>32); 471 1.1 bjh21 z1 = ( (bits64) aLow ) * bLow; 472 1.1 bjh21 zMiddleA = ( (bits64) aLow ) * bHigh; 473 1.1 bjh21 zMiddleB = ( (bits64) aHigh ) * bLow; 474 1.1 bjh21 z0 = ( (bits64) aHigh ) * bHigh; 475 1.1 bjh21 zMiddleA += zMiddleB; 476 1.1 bjh21 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 477 1.1 bjh21 zMiddleA <<= 32; 478 1.1 bjh21 z1 += zMiddleA; 479 1.1 bjh21 z0 += ( z1 < zMiddleA ); 480 1.1 bjh21 *z1Ptr = z1; 481 1.1 bjh21 *z0Ptr = z0; 482 1.1 bjh21 483 1.1 bjh21 } 484 1.1 bjh21 485 1.1 bjh21 /* 486 1.1 bjh21 ------------------------------------------------------------------------------- 487 1.1 bjh21 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 488 1.1 bjh21 `b' to obtain a 192-bit product. The product is broken into three 64-bit 489 1.1 bjh21 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 490 1.1 bjh21 `z2Ptr'. 491 1.1 bjh21 ------------------------------------------------------------------------------- 492 1.1 bjh21 */ 493 1.1 bjh21 INLINE void 494 1.1 bjh21 mul128By64To192( 495 1.1 bjh21 bits64 a0, 496 1.1 bjh21 bits64 a1, 497 1.1 bjh21 bits64 b, 498 1.1 bjh21 bits64 *z0Ptr, 499 1.1 bjh21 bits64 *z1Ptr, 500 1.1 bjh21 bits64 *z2Ptr 501 1.1 bjh21 ) 502 1.1 bjh21 { 503 1.1 bjh21 bits64 z0, z1, z2, more1; 504 1.1 bjh21 505 1.1 bjh21 mul64To128( a1, b, &z1, &z2 ); 506 1.1 bjh21 mul64To128( a0, b, &z0, &more1 ); 507 1.1 bjh21 add128( z0, more1, 0, z1, &z0, &z1 ); 508 1.1 bjh21 *z2Ptr = z2; 509 1.1 bjh21 *z1Ptr = z1; 510 1.1 bjh21 *z0Ptr = z0; 511 1.1 bjh21 512 1.1 bjh21 } 513 1.1 bjh21 514 1.1 bjh21 /* 515 1.1 bjh21 ------------------------------------------------------------------------------- 516 1.1 bjh21 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 517 1.1 bjh21 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 518 1.1 bjh21 product. The product is broken into four 64-bit pieces which are stored at 519 1.1 bjh21 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 520 1.1 bjh21 ------------------------------------------------------------------------------- 521 1.1 bjh21 */ 522 1.1 bjh21 INLINE void 523 1.1 bjh21 mul128To256( 524 1.1 bjh21 bits64 a0, 525 1.1 bjh21 bits64 a1, 526 1.1 bjh21 bits64 b0, 527 1.1 bjh21 bits64 b1, 528 1.1 bjh21 bits64 *z0Ptr, 529 1.1 bjh21 bits64 *z1Ptr, 530 1.1 bjh21 bits64 *z2Ptr, 531 1.1 bjh21 bits64 *z3Ptr 532 1.1 bjh21 ) 533 1.1 bjh21 { 534 1.1 bjh21 bits64 z0, z1, z2, z3; 535 1.1 bjh21 bits64 more1, more2; 536 1.1 bjh21 537 1.1 bjh21 mul64To128( a1, b1, &z2, &z3 ); 538 1.1 bjh21 mul64To128( a1, b0, &z1, &more2 ); 539 1.1 bjh21 add128( z1, more2, 0, z2, &z1, &z2 ); 540 1.1 bjh21 mul64To128( a0, b0, &z0, &more1 ); 541 1.1 bjh21 add128( z0, more1, 0, z1, &z0, &z1 ); 542 1.1 bjh21 mul64To128( a0, b1, &more1, &more2 ); 543 1.1 bjh21 add128( more1, more2, 0, z2, &more1, &z2 ); 544 1.1 bjh21 add128( z0, z1, 0, more1, &z0, &z1 ); 545 1.1 bjh21 *z3Ptr = z3; 546 1.1 bjh21 *z2Ptr = z2; 547 1.1 bjh21 *z1Ptr = z1; 548 1.1 bjh21 *z0Ptr = z0; 549 1.1 bjh21 550 1.1 bjh21 } 551 1.1 bjh21 552 1.1 bjh21 /* 553 1.1 bjh21 ------------------------------------------------------------------------------- 554 1.1 bjh21 Returns an approximation to the 64-bit integer quotient obtained by dividing 555 1.1 bjh21 `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 556 1.1 bjh21 divisor `b' must be at least 2^63. If q is the exact quotient truncated 557 1.1 bjh21 toward zero, the approximation returned lies between q and q + 2 inclusive. 558 1.1 bjh21 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 559 1.1 bjh21 unsigned integer is returned. 560 1.1 bjh21 ------------------------------------------------------------------------------- 561 1.1 bjh21 */ 562 1.1 bjh21 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) 563 1.1 bjh21 { 564 1.1 bjh21 bits64 b0, b1; 565 1.1 bjh21 bits64 rem0, rem1, term0, term1; 566 1.1 bjh21 bits64 z; 567 1.1 bjh21 568 1.1 bjh21 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 569 1.1 bjh21 b0 = b>>32; 570 1.1 bjh21 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 571 1.1 bjh21 mul64To128( b, z, &term0, &term1 ); 572 1.1 bjh21 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 573 1.1 bjh21 while ( ( (sbits64) rem0 ) < 0 ) { 574 1.1 bjh21 z -= LIT64( 0x100000000 ); 575 1.1 bjh21 b1 = b<<32; 576 1.1 bjh21 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 577 1.1 bjh21 } 578 1.1 bjh21 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 579 1.1 bjh21 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 580 1.1 bjh21 return z; 581 1.1 bjh21 582 1.1 bjh21 } 583 1.1 bjh21 584 1.1 bjh21 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128) 585 1.1 bjh21 /* 586 1.1 bjh21 ------------------------------------------------------------------------------- 587 1.1 bjh21 Returns an approximation to the square root of the 32-bit significand given 588 1.1 bjh21 by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 589 1.1 bjh21 `aExp' (the least significant bit) is 1, the integer returned approximates 590 1.1 bjh21 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 591 1.1 bjh21 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 592 1.1 bjh21 case, the approximation returned lies strictly within +/-2 of the exact 593 1.1 bjh21 value. 594 1.1 bjh21 ------------------------------------------------------------------------------- 595 1.1 bjh21 */ 596 1.1 bjh21 static bits32 estimateSqrt32( int16 aExp, bits32 a ) 597 1.1 bjh21 { 598 1.1 bjh21 static const bits16 sqrtOddAdjustments[] = { 599 1.1 bjh21 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 600 1.1 bjh21 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 601 1.1 bjh21 }; 602 1.1 bjh21 static const bits16 sqrtEvenAdjustments[] = { 603 1.1 bjh21 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 604 1.1 bjh21 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 605 1.1 bjh21 }; 606 1.1 bjh21 int8 idx; 607 1.1 bjh21 bits32 z; 608 1.1 bjh21 609 1.1 bjh21 idx = ( a>>27 ) & 15; 610 1.1 bjh21 if ( aExp & 1 ) { 611 1.1 bjh21 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ]; 612 1.1 bjh21 z = ( ( a / z )<<14 ) + ( z<<15 ); 613 1.1 bjh21 a >>= 1; 614 1.1 bjh21 } 615 1.1 bjh21 else { 616 1.1 bjh21 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ]; 617 1.1 bjh21 z = a / z + z; 618 1.1 bjh21 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 619 1.3 christos if ( z <= a ) return (bits32) ( ( (bits32) a )>>1 ); 620 1.1 bjh21 } 621 1.1 bjh21 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); 622 1.1 bjh21 623 1.1 bjh21 } 624 1.1 bjh21 #endif 625 1.1 bjh21 626 1.1 bjh21 /* 627 1.1 bjh21 ------------------------------------------------------------------------------- 628 1.1 bjh21 Returns the number of leading 0 bits before the most-significant 1 bit of 629 1.1 bjh21 `a'. If `a' is zero, 32 is returned. 630 1.1 bjh21 ------------------------------------------------------------------------------- 631 1.1 bjh21 */ 632 1.1 bjh21 static int8 countLeadingZeros32( bits32 a ) 633 1.1 bjh21 { 634 1.1 bjh21 static const int8 countLeadingZerosHigh[] = { 635 1.1 bjh21 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 636 1.1 bjh21 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 637 1.1 bjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 638 1.1 bjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 639 1.1 bjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 640 1.1 bjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 641 1.1 bjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 642 1.1 bjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 643 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 644 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 645 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 646 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 647 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 648 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 649 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 650 1.1 bjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 651 1.1 bjh21 }; 652 1.1 bjh21 int8 shiftCount; 653 1.1 bjh21 654 1.1 bjh21 shiftCount = 0; 655 1.1 bjh21 if ( a < 0x10000 ) { 656 1.1 bjh21 shiftCount += 16; 657 1.1 bjh21 a <<= 16; 658 1.1 bjh21 } 659 1.1 bjh21 if ( a < 0x1000000 ) { 660 1.1 bjh21 shiftCount += 8; 661 1.1 bjh21 a <<= 8; 662 1.1 bjh21 } 663 1.1 bjh21 shiftCount += countLeadingZerosHigh[ a>>24 ]; 664 1.1 bjh21 return shiftCount; 665 1.1 bjh21 666 1.1 bjh21 } 667 1.1 bjh21 668 1.1 bjh21 /* 669 1.1 bjh21 ------------------------------------------------------------------------------- 670 1.1 bjh21 Returns the number of leading 0 bits before the most-significant 1 bit of 671 1.1 bjh21 `a'. If `a' is zero, 64 is returned. 672 1.1 bjh21 ------------------------------------------------------------------------------- 673 1.1 bjh21 */ 674 1.1 bjh21 static int8 countLeadingZeros64( bits64 a ) 675 1.1 bjh21 { 676 1.1 bjh21 int8 shiftCount; 677 1.1 bjh21 678 1.1 bjh21 shiftCount = 0; 679 1.1 bjh21 if ( a < ( (bits64) 1 )<<32 ) { 680 1.1 bjh21 shiftCount += 32; 681 1.1 bjh21 } 682 1.1 bjh21 else { 683 1.1 bjh21 a >>= 32; 684 1.1 bjh21 } 685 1.3 christos shiftCount += (int8)countLeadingZeros32( (bits32)a ); 686 1.1 bjh21 return shiftCount; 687 1.1 bjh21 688 1.1 bjh21 } 689 1.1 bjh21 690 1.1 bjh21 /* 691 1.1 bjh21 ------------------------------------------------------------------------------- 692 1.1 bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 693 1.1 bjh21 is equal to the 128-bit value formed by concatenating `b0' and `b1'. 694 1.1 bjh21 Otherwise, returns 0. 695 1.1 bjh21 ------------------------------------------------------------------------------- 696 1.1 bjh21 */ 697 1.1 bjh21 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 698 1.1 bjh21 { 699 1.1 bjh21 700 1.1 bjh21 return ( a0 == b0 ) && ( a1 == b1 ); 701 1.1 bjh21 702 1.1 bjh21 } 703 1.1 bjh21 704 1.1 bjh21 /* 705 1.1 bjh21 ------------------------------------------------------------------------------- 706 1.1 bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 707 1.1 bjh21 than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 708 1.1 bjh21 Otherwise, returns 0. 709 1.1 bjh21 ------------------------------------------------------------------------------- 710 1.1 bjh21 */ 711 1.1 bjh21 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 712 1.1 bjh21 { 713 1.1 bjh21 714 1.1 bjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 715 1.1 bjh21 716 1.1 bjh21 } 717 1.1 bjh21 718 1.1 bjh21 /* 719 1.1 bjh21 ------------------------------------------------------------------------------- 720 1.1 bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 721 1.1 bjh21 than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 722 1.1 bjh21 returns 0. 723 1.1 bjh21 ------------------------------------------------------------------------------- 724 1.1 bjh21 */ 725 1.1 bjh21 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 726 1.1 bjh21 { 727 1.1 bjh21 728 1.1 bjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 729 1.1 bjh21 730 1.1 bjh21 } 731 1.1 bjh21 732 1.1 bjh21 /* 733 1.1 bjh21 ------------------------------------------------------------------------------- 734 1.1 bjh21 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 735 1.1 bjh21 not equal to the 128-bit value formed by concatenating `b0' and `b1'. 736 1.1 bjh21 Otherwise, returns 0. 737 1.1 bjh21 ------------------------------------------------------------------------------- 738 1.1 bjh21 */ 739 1.1 bjh21 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 740 1.1 bjh21 { 741 1.1 bjh21 742 1.1 bjh21 return ( a0 != b0 ) || ( a1 != b1 ); 743 1.1 bjh21 744 1.1 bjh21 } 745 1.1 bjh21 746