1 1.1 mrg /* IEEE-754 single-precision functions for Xtensa 2 1.1.1.13 mrg Copyright (C) 2006-2024 Free Software Foundation, Inc. 3 1.1 mrg Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica. 4 1.1 mrg 5 1.1 mrg This file is part of GCC. 6 1.1 mrg 7 1.1 mrg GCC is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published by 9 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.1 mrg any later version. 11 1.1 mrg 12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT 13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 1.1 mrg License for more details. 16 1.1 mrg 17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.1 mrg permissions described in the GCC Runtime Library Exception, version 19 1.1 mrg 3.1, as published by the Free Software Foundation. 20 1.1 mrg 21 1.1 mrg You should have received a copy of the GNU General Public License and 22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.1 mrg <http://www.gnu.org/licenses/>. */ 25 1.1 mrg 26 1.1 mrg #ifdef __XTENSA_EB__ 27 1.1 mrg #define xh a2 28 1.1 mrg #define xl a3 29 1.1 mrg #define yh a4 30 1.1 mrg #define yl a5 31 1.1 mrg #else 32 1.1 mrg #define xh a3 33 1.1 mrg #define xl a2 34 1.1 mrg #define yh a5 35 1.1 mrg #define yl a4 36 1.1 mrg #endif 37 1.1 mrg 38 1.1 mrg /* Warning! The branch displacements for some Xtensa branch instructions 39 1.1 mrg are quite small, and this code has been carefully laid out to keep 40 1.1 mrg branch targets in range. If you change anything, be sure to check that 41 1.1 mrg the assembler is not relaxing anything to branch over a jump. */ 42 1.1 mrg 43 1.1 mrg #ifdef L_negsf2 44 1.1 mrg 45 1.1 mrg .align 4 46 1.1 mrg .global __negsf2 47 1.1 mrg .type __negsf2, @function 48 1.1 mrg __negsf2: 49 1.1 mrg leaf_entry sp, 16 50 1.1 mrg movi a4, 0x80000000 51 1.1 mrg xor a2, a2, a4 52 1.1 mrg leaf_return 53 1.1 mrg 54 1.1 mrg #endif /* L_negsf2 */ 55 1.1 mrg 56 1.1 mrg #ifdef L_addsubsf3 57 1.1 mrg 58 1.1.1.5 mrg .literal_position 59 1.1 mrg /* Addition */ 60 1.1 mrg __addsf3_aux: 61 1.1 mrg 62 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the 63 1.1 mrg start of the function just to keep it in range of the limited 64 1.1 mrg branch displacements.) */ 65 1.1 mrg 66 1.1 mrg .Ladd_xnan_or_inf: 67 1.1 mrg /* If y is neither Infinity nor NaN, return x. */ 68 1.1.1.5 mrg bnall a3, a6, .Ladd_return_nan_or_inf 69 1.1 mrg /* If x is a NaN, return it. Otherwise, return y. */ 70 1.1 mrg slli a7, a2, 9 71 1.1.1.5 mrg bnez a7, .Ladd_return_nan 72 1.1 mrg 73 1.1 mrg .Ladd_ynan_or_inf: 74 1.1 mrg /* Return y. */ 75 1.1 mrg mov a2, a3 76 1.1.1.5 mrg 77 1.1.1.5 mrg .Ladd_return_nan_or_inf: 78 1.1.1.5 mrg slli a7, a2, 9 79 1.1.1.5 mrg bnez a7, .Ladd_return_nan 80 1.1.1.5 mrg leaf_return 81 1.1.1.5 mrg 82 1.1.1.5 mrg .Ladd_return_nan: 83 1.1.1.5 mrg movi a6, 0x400000 /* make it a quiet NaN */ 84 1.1.1.5 mrg or a2, a2, a6 85 1.1 mrg leaf_return 86 1.1 mrg 87 1.1 mrg .Ladd_opposite_signs: 88 1.1 mrg /* Operand signs differ. Do a subtraction. */ 89 1.1 mrg slli a7, a6, 8 90 1.1 mrg xor a3, a3, a7 91 1.1 mrg j .Lsub_same_sign 92 1.1 mrg 93 1.1 mrg .align 4 94 1.1 mrg .global __addsf3 95 1.1 mrg .type __addsf3, @function 96 1.1 mrg __addsf3: 97 1.1 mrg leaf_entry sp, 16 98 1.1 mrg movi a6, 0x7f800000 99 1.1 mrg 100 1.1 mrg /* Check if the two operands have the same sign. */ 101 1.1 mrg xor a7, a2, a3 102 1.1 mrg bltz a7, .Ladd_opposite_signs 103 1.1 mrg 104 1.1 mrg .Ladd_same_sign: 105 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ 106 1.1 mrg ball a2, a6, .Ladd_xnan_or_inf 107 1.1 mrg ball a3, a6, .Ladd_ynan_or_inf 108 1.1 mrg 109 1.1 mrg /* Compare the exponents. The smaller operand will be shifted 110 1.1 mrg right by the exponent difference and added to the larger 111 1.1 mrg one. */ 112 1.1 mrg extui a7, a2, 23, 9 113 1.1 mrg extui a8, a3, 23, 9 114 1.1 mrg bltu a7, a8, .Ladd_shiftx 115 1.1 mrg 116 1.1 mrg .Ladd_shifty: 117 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */ 118 1.1 mrg bnone a3, a6, .Ladd_yexpzero 119 1.1 mrg 120 1.1 mrg /* Replace y sign/exponent with 0x008. */ 121 1.1 mrg or a3, a3, a6 122 1.1 mrg slli a3, a3, 8 123 1.1 mrg srli a3, a3, 8 124 1.1 mrg 125 1.1 mrg .Ladd_yexpdiff: 126 1.1 mrg /* Compute the exponent difference. */ 127 1.1 mrg sub a10, a7, a8 128 1.1 mrg 129 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */ 130 1.1 mrg bgeui a10, 32, 1f 131 1.1 mrg 132 1.1 mrg /* Shift y right by the exponent difference. Any bits that are 133 1.1 mrg shifted out of y are saved in a9 for rounding the result. */ 134 1.1 mrg ssr a10 135 1.1 mrg movi a9, 0 136 1.1 mrg src a9, a3, a9 137 1.1 mrg srl a3, a3 138 1.1 mrg 139 1.1 mrg /* Do the addition. */ 140 1.1 mrg add a2, a2, a3 141 1.1 mrg 142 1.1 mrg /* Check if the add overflowed into the exponent. */ 143 1.1 mrg extui a10, a2, 23, 9 144 1.1 mrg beq a10, a7, .Ladd_round 145 1.1 mrg mov a8, a7 146 1.1 mrg j .Ladd_carry 147 1.1 mrg 148 1.1 mrg .Ladd_yexpzero: 149 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero, 150 1.1 mrg i.e., no implicit "1.0", and increment the apparent exponent 151 1.1 mrg because subnormals behave as if they had the minimum (nonzero) 152 1.1 mrg exponent. Test for the case when both exponents are zero. */ 153 1.1 mrg slli a3, a3, 9 154 1.1 mrg srli a3, a3, 9 155 1.1 mrg bnone a2, a6, .Ladd_bothexpzero 156 1.1 mrg addi a8, a8, 1 157 1.1 mrg j .Ladd_yexpdiff 158 1.1 mrg 159 1.1 mrg .Ladd_bothexpzero: 160 1.1 mrg /* Both exponents are zero. Handle this as a special case. There 161 1.1 mrg is no need to shift or round, and the normal code for handling 162 1.1 mrg a carry into the exponent field will not work because it 163 1.1 mrg assumes there is an implicit "1.0" that needs to be added. */ 164 1.1 mrg add a2, a2, a3 165 1.1 mrg 1: leaf_return 166 1.1 mrg 167 1.1 mrg .Ladd_xexpzero: 168 1.1 mrg /* Same as "yexpzero" except skip handling the case when both 169 1.1 mrg exponents are zero. */ 170 1.1 mrg slli a2, a2, 9 171 1.1 mrg srli a2, a2, 9 172 1.1 mrg addi a7, a7, 1 173 1.1 mrg j .Ladd_xexpdiff 174 1.1 mrg 175 1.1 mrg .Ladd_shiftx: 176 1.1 mrg /* Same thing as the "shifty" code, but with x and y swapped. Also, 177 1.1 mrg because the exponent difference is always nonzero in this version, 178 1.1 mrg the shift sequence can use SLL and skip loading a constant zero. */ 179 1.1 mrg bnone a2, a6, .Ladd_xexpzero 180 1.1 mrg 181 1.1 mrg or a2, a2, a6 182 1.1 mrg slli a2, a2, 8 183 1.1 mrg srli a2, a2, 8 184 1.1 mrg 185 1.1 mrg .Ladd_xexpdiff: 186 1.1 mrg sub a10, a8, a7 187 1.1 mrg bgeui a10, 32, .Ladd_returny 188 1.1 mrg 189 1.1 mrg ssr a10 190 1.1 mrg sll a9, a2 191 1.1 mrg srl a2, a2 192 1.1 mrg 193 1.1 mrg add a2, a2, a3 194 1.1 mrg 195 1.1 mrg /* Check if the add overflowed into the exponent. */ 196 1.1 mrg extui a10, a2, 23, 9 197 1.1 mrg bne a10, a8, .Ladd_carry 198 1.1 mrg 199 1.1 mrg .Ladd_round: 200 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */ 201 1.1 mrg bgez a9, 1f 202 1.1 mrg addi a2, a2, 1 203 1.1 mrg 204 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */ 205 1.1 mrg slli a9, a9, 1 206 1.1 mrg beqz a9, .Ladd_exactlyhalf 207 1.1 mrg 1: leaf_return 208 1.1 mrg 209 1.1 mrg .Ladd_returny: 210 1.1 mrg mov a2, a3 211 1.1 mrg leaf_return 212 1.1 mrg 213 1.1 mrg .Ladd_carry: 214 1.1 mrg /* The addition has overflowed into the exponent field, so the 215 1.1 mrg value needs to be renormalized. The mantissa of the result 216 1.1 mrg can be recovered by subtracting the original exponent and 217 1.1 mrg adding 0x800000 (which is the explicit "1.0" for the 218 1.1 mrg mantissa of the non-shifted operand -- the "1.0" for the 219 1.1 mrg shifted operand was already added). The mantissa can then 220 1.1 mrg be shifted right by one bit. The explicit "1.0" of the 221 1.1 mrg shifted mantissa then needs to be replaced by the exponent, 222 1.1 mrg incremented by one to account for the normalizing shift. 223 1.1 mrg It is faster to combine these operations: do the shift first 224 1.1 mrg and combine the additions and subtractions. If x is the 225 1.1 mrg original exponent, the result is: 226 1.1 mrg shifted mantissa - (x << 22) + (1 << 22) + (x << 23) 227 1.1 mrg or: 228 1.1 mrg shifted mantissa + ((x + 1) << 22) 229 1.1 mrg Note that the exponent is incremented here by leaving the 230 1.1 mrg explicit "1.0" of the mantissa in the exponent field. */ 231 1.1 mrg 232 1.1 mrg /* Shift x right by one bit. Save the lsb. */ 233 1.1 mrg mov a10, a2 234 1.1 mrg srli a2, a2, 1 235 1.1 mrg 236 1.1 mrg /* See explanation above. The original exponent is in a8. */ 237 1.1 mrg addi a8, a8, 1 238 1.1 mrg slli a8, a8, 22 239 1.1 mrg add a2, a2, a8 240 1.1 mrg 241 1.1 mrg /* Return an Infinity if the exponent overflowed. */ 242 1.1 mrg ball a2, a6, .Ladd_infinity 243 1.1 mrg 244 1.1 mrg /* Same thing as the "round" code except the msb of the leftover 245 1.1 mrg fraction is bit 0 of a10, with the rest of the fraction in a9. */ 246 1.1 mrg bbci.l a10, 0, 1f 247 1.1 mrg addi a2, a2, 1 248 1.1 mrg beqz a9, .Ladd_exactlyhalf 249 1.1 mrg 1: leaf_return 250 1.1 mrg 251 1.1 mrg .Ladd_infinity: 252 1.1 mrg /* Clear the mantissa. */ 253 1.1 mrg srli a2, a2, 23 254 1.1 mrg slli a2, a2, 23 255 1.1 mrg 256 1.1 mrg /* The sign bit may have been lost in a carry-out. Put it back. */ 257 1.1 mrg slli a8, a8, 1 258 1.1 mrg or a2, a2, a8 259 1.1 mrg leaf_return 260 1.1 mrg 261 1.1 mrg .Ladd_exactlyhalf: 262 1.1 mrg /* Round down to the nearest even value. */ 263 1.1 mrg srli a2, a2, 1 264 1.1 mrg slli a2, a2, 1 265 1.1 mrg leaf_return 266 1.1 mrg 267 1.1 mrg 268 1.1 mrg /* Subtraction */ 269 1.1 mrg __subsf3_aux: 270 1.1 mrg 271 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the 272 1.1 mrg start of the function just to keep it in range of the limited 273 1.1 mrg branch displacements.) */ 274 1.1 mrg 275 1.1 mrg .Lsub_xnan_or_inf: 276 1.1 mrg /* If y is neither Infinity nor NaN, return x. */ 277 1.1.1.5 mrg bnall a3, a6, .Lsub_return_nan_or_inf 278 1.1 mrg /* Both x and y are either NaN or Inf, so the result is NaN. */ 279 1.1.1.5 mrg 280 1.1.1.5 mrg .Lsub_return_nan: 281 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */ 282 1.1 mrg or a2, a2, a4 283 1.1.1.5 mrg leaf_return 284 1.1 mrg 285 1.1 mrg .Lsub_ynan_or_inf: 286 1.1 mrg /* Negate y and return it. */ 287 1.1 mrg slli a7, a6, 8 288 1.1 mrg xor a2, a3, a7 289 1.1.1.5 mrg 290 1.1.1.5 mrg .Lsub_return_nan_or_inf: 291 1.1.1.5 mrg slli a7, a2, 9 292 1.1.1.5 mrg bnez a7, .Lsub_return_nan 293 1.1 mrg leaf_return 294 1.1 mrg 295 1.1 mrg .Lsub_opposite_signs: 296 1.1 mrg /* Operand signs differ. Do an addition. */ 297 1.1 mrg slli a7, a6, 8 298 1.1 mrg xor a3, a3, a7 299 1.1 mrg j .Ladd_same_sign 300 1.1 mrg 301 1.1 mrg .align 4 302 1.1 mrg .global __subsf3 303 1.1 mrg .type __subsf3, @function 304 1.1 mrg __subsf3: 305 1.1 mrg leaf_entry sp, 16 306 1.1 mrg movi a6, 0x7f800000 307 1.1 mrg 308 1.1 mrg /* Check if the two operands have the same sign. */ 309 1.1 mrg xor a7, a2, a3 310 1.1 mrg bltz a7, .Lsub_opposite_signs 311 1.1 mrg 312 1.1 mrg .Lsub_same_sign: 313 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ 314 1.1 mrg ball a2, a6, .Lsub_xnan_or_inf 315 1.1 mrg ball a3, a6, .Lsub_ynan_or_inf 316 1.1 mrg 317 1.1 mrg /* Compare the operands. In contrast to addition, the entire 318 1.1 mrg value matters here. */ 319 1.1 mrg extui a7, a2, 23, 8 320 1.1 mrg extui a8, a3, 23, 8 321 1.1 mrg bltu a2, a3, .Lsub_xsmaller 322 1.1 mrg 323 1.1 mrg .Lsub_ysmaller: 324 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */ 325 1.1 mrg bnone a3, a6, .Lsub_yexpzero 326 1.1 mrg 327 1.1 mrg /* Replace y sign/exponent with 0x008. */ 328 1.1 mrg or a3, a3, a6 329 1.1 mrg slli a3, a3, 8 330 1.1 mrg srli a3, a3, 8 331 1.1 mrg 332 1.1 mrg .Lsub_yexpdiff: 333 1.1 mrg /* Compute the exponent difference. */ 334 1.1 mrg sub a10, a7, a8 335 1.1 mrg 336 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */ 337 1.1 mrg bgeui a10, 32, 1f 338 1.1 mrg 339 1.1 mrg /* Shift y right by the exponent difference. Any bits that are 340 1.1 mrg shifted out of y are saved in a9 for rounding the result. */ 341 1.1 mrg ssr a10 342 1.1 mrg movi a9, 0 343 1.1 mrg src a9, a3, a9 344 1.1 mrg srl a3, a3 345 1.1 mrg 346 1.1 mrg sub a2, a2, a3 347 1.1 mrg 348 1.1 mrg /* Subtract the leftover bits in a9 from zero and propagate any 349 1.1 mrg borrow from a2. */ 350 1.1 mrg neg a9, a9 351 1.1 mrg addi a10, a2, -1 352 1.1 mrg movnez a2, a10, a9 353 1.1 mrg 354 1.1 mrg /* Check if the subtract underflowed into the exponent. */ 355 1.1 mrg extui a10, a2, 23, 8 356 1.1 mrg beq a10, a7, .Lsub_round 357 1.1 mrg j .Lsub_borrow 358 1.1 mrg 359 1.1 mrg .Lsub_yexpzero: 360 1.1 mrg /* Return zero if the inputs are equal. (For the non-subnormal 361 1.1 mrg case, subtracting the "1.0" will cause a borrow from the exponent 362 1.1 mrg and this case can be detected when handling the borrow.) */ 363 1.1 mrg beq a2, a3, .Lsub_return_zero 364 1.1 mrg 365 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero, 366 1.1 mrg i.e., no implicit "1.0". Unless x is also a subnormal, increment 367 1.1 mrg y's apparent exponent because subnormals behave as if they had 368 1.1 mrg the minimum (nonzero) exponent. */ 369 1.1 mrg slli a3, a3, 9 370 1.1 mrg srli a3, a3, 9 371 1.1 mrg bnone a2, a6, .Lsub_yexpdiff 372 1.1 mrg addi a8, a8, 1 373 1.1 mrg j .Lsub_yexpdiff 374 1.1 mrg 375 1.1 mrg .Lsub_returny: 376 1.1 mrg /* Negate and return y. */ 377 1.1 mrg slli a7, a6, 8 378 1.1 mrg xor a2, a3, a7 379 1.1 mrg 1: leaf_return 380 1.1 mrg 381 1.1 mrg .Lsub_xsmaller: 382 1.1 mrg /* Same thing as the "ysmaller" code, but with x and y swapped and 383 1.1 mrg with y negated. */ 384 1.1 mrg bnone a2, a6, .Lsub_xexpzero 385 1.1 mrg 386 1.1 mrg or a2, a2, a6 387 1.1 mrg slli a2, a2, 8 388 1.1 mrg srli a2, a2, 8 389 1.1 mrg 390 1.1 mrg .Lsub_xexpdiff: 391 1.1 mrg sub a10, a8, a7 392 1.1 mrg bgeui a10, 32, .Lsub_returny 393 1.1 mrg 394 1.1 mrg ssr a10 395 1.1 mrg movi a9, 0 396 1.1 mrg src a9, a2, a9 397 1.1 mrg srl a2, a2 398 1.1 mrg 399 1.1 mrg /* Negate y. */ 400 1.1 mrg slli a11, a6, 8 401 1.1 mrg xor a3, a3, a11 402 1.1 mrg 403 1.1 mrg sub a2, a3, a2 404 1.1 mrg 405 1.1 mrg neg a9, a9 406 1.1 mrg addi a10, a2, -1 407 1.1 mrg movnez a2, a10, a9 408 1.1 mrg 409 1.1 mrg /* Check if the subtract underflowed into the exponent. */ 410 1.1 mrg extui a10, a2, 23, 8 411 1.1 mrg bne a10, a8, .Lsub_borrow 412 1.1 mrg 413 1.1 mrg .Lsub_round: 414 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */ 415 1.1 mrg bgez a9, 1f 416 1.1 mrg addi a2, a2, 1 417 1.1 mrg 418 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */ 419 1.1 mrg slli a9, a9, 1 420 1.1 mrg beqz a9, .Lsub_exactlyhalf 421 1.1 mrg 1: leaf_return 422 1.1 mrg 423 1.1 mrg .Lsub_xexpzero: 424 1.1 mrg /* Same as "yexpzero". */ 425 1.1 mrg beq a2, a3, .Lsub_return_zero 426 1.1 mrg slli a2, a2, 9 427 1.1 mrg srli a2, a2, 9 428 1.1 mrg bnone a3, a6, .Lsub_xexpdiff 429 1.1 mrg addi a7, a7, 1 430 1.1 mrg j .Lsub_xexpdiff 431 1.1 mrg 432 1.1 mrg .Lsub_return_zero: 433 1.1 mrg movi a2, 0 434 1.1 mrg leaf_return 435 1.1 mrg 436 1.1 mrg .Lsub_borrow: 437 1.1 mrg /* The subtraction has underflowed into the exponent field, so the 438 1.1 mrg value needs to be renormalized. Shift the mantissa left as 439 1.1 mrg needed to remove any leading zeros and adjust the exponent 440 1.1 mrg accordingly. If the exponent is not large enough to remove 441 1.1 mrg all the leading zeros, the result will be a subnormal value. */ 442 1.1 mrg 443 1.1 mrg slli a8, a2, 9 444 1.1 mrg beqz a8, .Lsub_xzero 445 1.1 mrg do_nsau a6, a8, a7, a11 446 1.1 mrg srli a8, a8, 9 447 1.1 mrg bge a6, a10, .Lsub_subnormal 448 1.1 mrg addi a6, a6, 1 449 1.1 mrg 450 1.1 mrg .Lsub_normalize_shift: 451 1.1 mrg /* Shift the mantissa (a8/a9) left by a6. */ 452 1.1 mrg ssl a6 453 1.1 mrg src a8, a8, a9 454 1.1 mrg sll a9, a9 455 1.1 mrg 456 1.1 mrg /* Combine the shifted mantissa with the sign and exponent, 457 1.1 mrg decrementing the exponent by a6. (The exponent has already 458 1.1 mrg been decremented by one due to the borrow from the subtraction, 459 1.1 mrg but adding the mantissa will increment the exponent by one.) */ 460 1.1 mrg srli a2, a2, 23 461 1.1 mrg sub a2, a2, a6 462 1.1 mrg slli a2, a2, 23 463 1.1 mrg add a2, a2, a8 464 1.1 mrg j .Lsub_round 465 1.1 mrg 466 1.1 mrg .Lsub_exactlyhalf: 467 1.1 mrg /* Round down to the nearest even value. */ 468 1.1 mrg srli a2, a2, 1 469 1.1 mrg slli a2, a2, 1 470 1.1 mrg leaf_return 471 1.1 mrg 472 1.1 mrg .Lsub_xzero: 473 1.1 mrg /* If there was a borrow from the exponent, and the mantissa and 474 1.1 mrg guard digits are all zero, then the inputs were equal and the 475 1.1 mrg result should be zero. */ 476 1.1 mrg beqz a9, .Lsub_return_zero 477 1.1 mrg 478 1.1 mrg /* Only the guard digit is nonzero. Shift by min(24, a10). */ 479 1.1 mrg addi a11, a10, -24 480 1.1 mrg movi a6, 24 481 1.1 mrg movltz a6, a10, a11 482 1.1 mrg j .Lsub_normalize_shift 483 1.1 mrg 484 1.1 mrg .Lsub_subnormal: 485 1.1 mrg /* The exponent is too small to shift away all the leading zeros. 486 1.1 mrg Set a6 to the current exponent (which has already been 487 1.1 mrg decremented by the borrow) so that the exponent of the result 488 1.1 mrg will be zero. Do not add 1 to a6 in this case, because: (1) 489 1.1 mrg adding the mantissa will not increment the exponent, so there is 490 1.1 mrg no need to subtract anything extra from the exponent to 491 1.1 mrg compensate, and (2) the effective exponent of a subnormal is 1 492 1.1 mrg not 0 so the shift amount must be 1 smaller than normal. */ 493 1.1 mrg mov a6, a10 494 1.1 mrg j .Lsub_normalize_shift 495 1.1 mrg 496 1.1 mrg #endif /* L_addsubsf3 */ 497 1.1 mrg 498 1.1 mrg #ifdef L_mulsf3 499 1.1 mrg 500 1.1 mrg /* Multiplication */ 501 1.1 mrg #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 502 1.1 mrg #define XCHAL_NO_MUL 1 503 1.1 mrg #endif 504 1.1 mrg 505 1.1.1.3 mrg .literal_position 506 1.1 mrg __mulsf3_aux: 507 1.1 mrg 508 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 509 1.1 mrg (This code is placed before the start of the function just to 510 1.1 mrg keep it in range of the limited branch displacements.) */ 511 1.1 mrg 512 1.1 mrg .Lmul_xexpzero: 513 1.1 mrg /* Clear the sign bit of x. */ 514 1.1 mrg slli a2, a2, 1 515 1.1 mrg srli a2, a2, 1 516 1.1 mrg 517 1.1 mrg /* If x is zero, return zero. */ 518 1.1 mrg beqz a2, .Lmul_return_zero 519 1.1 mrg 520 1.1 mrg /* Normalize x. Adjust the exponent in a8. */ 521 1.1 mrg do_nsau a10, a2, a11, a12 522 1.1 mrg addi a10, a10, -8 523 1.1 mrg ssl a10 524 1.1 mrg sll a2, a2 525 1.1 mrg movi a8, 1 526 1.1 mrg sub a8, a8, a10 527 1.1 mrg j .Lmul_xnormalized 528 1.1 mrg 529 1.1 mrg .Lmul_yexpzero: 530 1.1 mrg /* Clear the sign bit of y. */ 531 1.1 mrg slli a3, a3, 1 532 1.1 mrg srli a3, a3, 1 533 1.1 mrg 534 1.1 mrg /* If y is zero, return zero. */ 535 1.1 mrg beqz a3, .Lmul_return_zero 536 1.1 mrg 537 1.1 mrg /* Normalize y. Adjust the exponent in a9. */ 538 1.1 mrg do_nsau a10, a3, a11, a12 539 1.1 mrg addi a10, a10, -8 540 1.1 mrg ssl a10 541 1.1 mrg sll a3, a3 542 1.1 mrg movi a9, 1 543 1.1 mrg sub a9, a9, a10 544 1.1 mrg j .Lmul_ynormalized 545 1.1 mrg 546 1.1 mrg .Lmul_return_zero: 547 1.1 mrg /* Return zero with the appropriate sign bit. */ 548 1.1 mrg srli a2, a7, 31 549 1.1 mrg slli a2, a2, 31 550 1.1 mrg j .Lmul_done 551 1.1 mrg 552 1.1 mrg .Lmul_xnan_or_inf: 553 1.1 mrg /* If y is zero, return NaN. */ 554 1.1 mrg slli a8, a3, 1 555 1.1.1.5 mrg beqz a8, .Lmul_return_nan 556 1.1 mrg /* If y is NaN, return y. */ 557 1.1 mrg bnall a3, a6, .Lmul_returnx 558 1.1 mrg slli a8, a3, 9 559 1.1 mrg beqz a8, .Lmul_returnx 560 1.1 mrg 561 1.1 mrg .Lmul_returny: 562 1.1 mrg mov a2, a3 563 1.1 mrg 564 1.1 mrg .Lmul_returnx: 565 1.1.1.5 mrg slli a8, a2, 9 566 1.1.1.5 mrg bnez a8, .Lmul_return_nan 567 1.1 mrg /* Set the sign bit and return. */ 568 1.1 mrg extui a7, a7, 31, 1 569 1.1 mrg slli a2, a2, 1 570 1.1 mrg ssai 1 571 1.1 mrg src a2, a7, a2 572 1.1 mrg j .Lmul_done 573 1.1 mrg 574 1.1 mrg .Lmul_ynan_or_inf: 575 1.1 mrg /* If x is zero, return NaN. */ 576 1.1 mrg slli a8, a2, 1 577 1.1 mrg bnez a8, .Lmul_returny 578 1.1.1.5 mrg mov a2, a3 579 1.1.1.5 mrg 580 1.1.1.5 mrg .Lmul_return_nan: 581 1.1.1.5 mrg movi a4, 0x400000 /* make it a quiet NaN */ 582 1.1.1.5 mrg or a2, a2, a4 583 1.1 mrg j .Lmul_done 584 1.1 mrg 585 1.1 mrg .align 4 586 1.1 mrg .global __mulsf3 587 1.1 mrg .type __mulsf3, @function 588 1.1 mrg __mulsf3: 589 1.1 mrg #if __XTENSA_CALL0_ABI__ 590 1.1 mrg leaf_entry sp, 32 591 1.1 mrg addi sp, sp, -32 592 1.1 mrg s32i a12, sp, 16 593 1.1 mrg s32i a13, sp, 20 594 1.1 mrg s32i a14, sp, 24 595 1.1 mrg s32i a15, sp, 28 596 1.1 mrg #elif XCHAL_NO_MUL 597 1.1 mrg /* This is not really a leaf function; allocate enough stack space 598 1.1 mrg to allow CALL12s to a helper function. */ 599 1.1 mrg leaf_entry sp, 64 600 1.1 mrg #else 601 1.1 mrg leaf_entry sp, 32 602 1.1 mrg #endif 603 1.1 mrg movi a6, 0x7f800000 604 1.1 mrg 605 1.1 mrg /* Get the sign of the result. */ 606 1.1 mrg xor a7, a2, a3 607 1.1 mrg 608 1.1 mrg /* Check for NaN and infinity. */ 609 1.1 mrg ball a2, a6, .Lmul_xnan_or_inf 610 1.1 mrg ball a3, a6, .Lmul_ynan_or_inf 611 1.1 mrg 612 1.1 mrg /* Extract the exponents. */ 613 1.1 mrg extui a8, a2, 23, 8 614 1.1 mrg extui a9, a3, 23, 8 615 1.1 mrg 616 1.1 mrg beqz a8, .Lmul_xexpzero 617 1.1 mrg .Lmul_xnormalized: 618 1.1 mrg beqz a9, .Lmul_yexpzero 619 1.1 mrg .Lmul_ynormalized: 620 1.1 mrg 621 1.1 mrg /* Add the exponents. */ 622 1.1 mrg add a8, a8, a9 623 1.1 mrg 624 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */ 625 1.1 mrg movi a10, 0xffffff 626 1.1 mrg or a2, a2, a6 627 1.1 mrg and a2, a2, a10 628 1.1 mrg or a3, a3, a6 629 1.1 mrg and a3, a3, a10 630 1.1 mrg 631 1.1 mrg /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ 632 1.1 mrg 633 1.1 mrg #if XCHAL_HAVE_MUL32_HIGH 634 1.1 mrg 635 1.1 mrg mull a6, a2, a3 636 1.1 mrg muluh a2, a2, a3 637 1.1 mrg 638 1.1 mrg #else 639 1.1 mrg 640 1.1 mrg /* Break the inputs into 16-bit chunks and compute 4 32-bit partial 641 1.1 mrg products. These partial products are: 642 1.1 mrg 643 1.1 mrg 0 xl * yl 644 1.1 mrg 645 1.1 mrg 1 xl * yh 646 1.1 mrg 2 xh * yl 647 1.1 mrg 648 1.1 mrg 3 xh * yh 649 1.1 mrg 650 1.1 mrg If using the Mul16 or Mul32 multiplier options, these input 651 1.1 mrg chunks must be stored in separate registers. For Mac16, the 652 1.1 mrg UMUL.AA.* opcodes can specify that the inputs come from either 653 1.1 mrg half of the registers, so there is no need to shift them out 654 1.1 mrg ahead of time. If there is no multiply hardware, the 16-bit 655 1.1 mrg chunks can be extracted when setting up the arguments to the 656 1.1 mrg separate multiply function. */ 657 1.1 mrg 658 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 659 1.1 mrg /* Calling a separate multiply function will clobber a0 and requires 660 1.1 mrg use of a8 as a temporary, so save those values now. (The function 661 1.1 mrg uses a custom ABI so nothing else needs to be saved.) */ 662 1.1 mrg s32i a0, sp, 0 663 1.1 mrg s32i a8, sp, 4 664 1.1 mrg #endif 665 1.1 mrg 666 1.1 mrg #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 667 1.1 mrg 668 1.1 mrg #define a2h a4 669 1.1 mrg #define a3h a5 670 1.1 mrg 671 1.1 mrg /* Get the high halves of the inputs into registers. */ 672 1.1 mrg srli a2h, a2, 16 673 1.1 mrg srli a3h, a3, 16 674 1.1 mrg 675 1.1 mrg #define a2l a2 676 1.1 mrg #define a3l a3 677 1.1 mrg 678 1.1 mrg #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 679 1.1 mrg /* Clear the high halves of the inputs. This does not matter 680 1.1 mrg for MUL16 because the high bits are ignored. */ 681 1.1 mrg extui a2, a2, 0, 16 682 1.1 mrg extui a3, a3, 0, 16 683 1.1 mrg #endif 684 1.1 mrg #endif /* MUL16 || MUL32 */ 685 1.1 mrg 686 1.1 mrg 687 1.1 mrg #if XCHAL_HAVE_MUL16 688 1.1 mrg 689 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 690 1.1 mrg mul16u dst, xreg ## xhalf, yreg ## yhalf 691 1.1 mrg 692 1.1 mrg #elif XCHAL_HAVE_MUL32 693 1.1 mrg 694 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 695 1.1 mrg mull dst, xreg ## xhalf, yreg ## yhalf 696 1.1 mrg 697 1.1 mrg #elif XCHAL_HAVE_MAC16 698 1.1 mrg 699 1.1 mrg /* The preprocessor insists on inserting a space when concatenating after 700 1.1 mrg a period in the definition of do_mul below. These macros are a workaround 701 1.1 mrg using underscores instead of periods when doing the concatenation. */ 702 1.1 mrg #define umul_aa_ll umul.aa.ll 703 1.1 mrg #define umul_aa_lh umul.aa.lh 704 1.1 mrg #define umul_aa_hl umul.aa.hl 705 1.1 mrg #define umul_aa_hh umul.aa.hh 706 1.1 mrg 707 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 708 1.1 mrg umul_aa_ ## xhalf ## yhalf xreg, yreg; \ 709 1.1 mrg rsr dst, ACCLO 710 1.1 mrg 711 1.1 mrg #else /* no multiply hardware */ 712 1.1 mrg 713 1.1 mrg #define set_arg_l(dst, src) \ 714 1.1 mrg extui dst, src, 0, 16 715 1.1 mrg #define set_arg_h(dst, src) \ 716 1.1 mrg srli dst, src, 16 717 1.1 mrg 718 1.1 mrg #if __XTENSA_CALL0_ABI__ 719 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 720 1.1 mrg set_arg_ ## xhalf (a13, xreg); \ 721 1.1 mrg set_arg_ ## yhalf (a14, yreg); \ 722 1.1 mrg call0 .Lmul_mulsi3; \ 723 1.1 mrg mov dst, a12 724 1.1 mrg #else 725 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 726 1.1 mrg set_arg_ ## xhalf (a14, xreg); \ 727 1.1 mrg set_arg_ ## yhalf (a15, yreg); \ 728 1.1 mrg call12 .Lmul_mulsi3; \ 729 1.1 mrg mov dst, a14 730 1.1 mrg #endif /* __XTENSA_CALL0_ABI__ */ 731 1.1 mrg 732 1.1 mrg #endif /* no multiply hardware */ 733 1.1 mrg 734 1.1 mrg /* Add pp1 and pp2 into a6 with carry-out in a9. */ 735 1.1 mrg do_mul(a6, a2, l, a3, h) /* pp 1 */ 736 1.1 mrg do_mul(a11, a2, h, a3, l) /* pp 2 */ 737 1.1 mrg movi a9, 0 738 1.1 mrg add a6, a6, a11 739 1.1 mrg bgeu a6, a11, 1f 740 1.1 mrg addi a9, a9, 1 741 1.1 mrg 1: 742 1.1 mrg /* Shift the high half of a9/a6 into position in a9. Note that 743 1.1 mrg this value can be safely incremented without any carry-outs. */ 744 1.1 mrg ssai 16 745 1.1 mrg src a9, a9, a6 746 1.1 mrg 747 1.1 mrg /* Compute the low word into a6. */ 748 1.1 mrg do_mul(a11, a2, l, a3, l) /* pp 0 */ 749 1.1 mrg sll a6, a6 750 1.1 mrg add a6, a6, a11 751 1.1 mrg bgeu a6, a11, 1f 752 1.1 mrg addi a9, a9, 1 753 1.1 mrg 1: 754 1.1 mrg /* Compute the high word into a2. */ 755 1.1 mrg do_mul(a2, a2, h, a3, h) /* pp 3 */ 756 1.1 mrg add a2, a2, a9 757 1.1 mrg 758 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 759 1.1 mrg /* Restore values saved on the stack during the multiplication. */ 760 1.1 mrg l32i a0, sp, 0 761 1.1 mrg l32i a8, sp, 4 762 1.1 mrg #endif 763 1.1 mrg #endif /* ! XCHAL_HAVE_MUL32_HIGH */ 764 1.1 mrg 765 1.1 mrg /* Shift left by 9 bits, unless there was a carry-out from the 766 1.1 mrg multiply, in which case, shift by 8 bits and increment the 767 1.1 mrg exponent. */ 768 1.1 mrg movi a4, 9 769 1.1 mrg srli a5, a2, 24 - 9 770 1.1 mrg beqz a5, 1f 771 1.1 mrg addi a4, a4, -1 772 1.1 mrg addi a8, a8, 1 773 1.1 mrg 1: ssl a4 774 1.1 mrg src a2, a2, a6 775 1.1 mrg sll a6, a6 776 1.1 mrg 777 1.1 mrg /* Subtract the extra bias from the exponent sum (plus one to account 778 1.1 mrg for the explicit "1.0" of the mantissa that will be added to the 779 1.1 mrg exponent in the final result). */ 780 1.1 mrg movi a4, 0x80 781 1.1 mrg sub a8, a8, a4 782 1.1 mrg 783 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the 784 1.1 mrg final exponent, so values in the range 0..fd are OK here. */ 785 1.1 mrg movi a4, 0xfe 786 1.1 mrg bgeu a8, a4, .Lmul_overflow 787 1.1 mrg 788 1.1 mrg .Lmul_round: 789 1.1 mrg /* Round. */ 790 1.1 mrg bgez a6, .Lmul_rounded 791 1.1 mrg addi a2, a2, 1 792 1.1 mrg slli a6, a6, 1 793 1.1 mrg beqz a6, .Lmul_exactlyhalf 794 1.1 mrg 795 1.1 mrg .Lmul_rounded: 796 1.1 mrg /* Add the exponent to the mantissa. */ 797 1.1 mrg slli a8, a8, 23 798 1.1 mrg add a2, a2, a8 799 1.1 mrg 800 1.1 mrg .Lmul_addsign: 801 1.1 mrg /* Add the sign bit. */ 802 1.1 mrg srli a7, a7, 31 803 1.1 mrg slli a7, a7, 31 804 1.1 mrg or a2, a2, a7 805 1.1 mrg 806 1.1 mrg .Lmul_done: 807 1.1 mrg #if __XTENSA_CALL0_ABI__ 808 1.1 mrg l32i a12, sp, 16 809 1.1 mrg l32i a13, sp, 20 810 1.1 mrg l32i a14, sp, 24 811 1.1 mrg l32i a15, sp, 28 812 1.1 mrg addi sp, sp, 32 813 1.1 mrg #endif 814 1.1 mrg leaf_return 815 1.1 mrg 816 1.1 mrg .Lmul_exactlyhalf: 817 1.1 mrg /* Round down to the nearest even value. */ 818 1.1 mrg srli a2, a2, 1 819 1.1 mrg slli a2, a2, 1 820 1.1 mrg j .Lmul_rounded 821 1.1 mrg 822 1.1 mrg .Lmul_overflow: 823 1.1 mrg bltz a8, .Lmul_underflow 824 1.1 mrg /* Return +/- Infinity. */ 825 1.1 mrg movi a8, 0xff 826 1.1 mrg slli a2, a8, 23 827 1.1 mrg j .Lmul_addsign 828 1.1 mrg 829 1.1 mrg .Lmul_underflow: 830 1.1 mrg /* Create a subnormal value, where the exponent field contains zero, 831 1.1 mrg but the effective exponent is 1. The value of a8 is one less than 832 1.1 mrg the actual exponent, so just negate it to get the shift amount. */ 833 1.1 mrg neg a8, a8 834 1.1 mrg mov a9, a6 835 1.1 mrg ssr a8 836 1.1 mrg bgeui a8, 32, .Lmul_flush_to_zero 837 1.1 mrg 838 1.1 mrg /* Shift a2 right. Any bits that are shifted out of a2 are saved 839 1.1 mrg in a6 (combined with the shifted-out bits currently in a6) for 840 1.1 mrg rounding the result. */ 841 1.1 mrg sll a6, a2 842 1.1 mrg srl a2, a2 843 1.1 mrg 844 1.1 mrg /* Set the exponent to zero. */ 845 1.1 mrg movi a8, 0 846 1.1 mrg 847 1.1 mrg /* Pack any nonzero bits shifted out into a6. */ 848 1.1 mrg beqz a9, .Lmul_round 849 1.1 mrg movi a9, 1 850 1.1 mrg or a6, a6, a9 851 1.1 mrg j .Lmul_round 852 1.1 mrg 853 1.1 mrg .Lmul_flush_to_zero: 854 1.1 mrg /* Return zero with the appropriate sign bit. */ 855 1.1 mrg srli a2, a7, 31 856 1.1 mrg slli a2, a2, 31 857 1.1 mrg j .Lmul_done 858 1.1 mrg 859 1.1 mrg #if XCHAL_NO_MUL 860 1.1 mrg 861 1.1 mrg /* For Xtensa processors with no multiply hardware, this simplified 862 1.1 mrg version of _mulsi3 is used for multiplying 16-bit chunks of 863 1.1 mrg the floating-point mantissas. When using CALL0, this function 864 1.1 mrg uses a custom ABI: the inputs are passed in a13 and a14, the 865 1.1 mrg result is returned in a12, and a8 and a15 are clobbered. */ 866 1.1 mrg .align 4 867 1.1 mrg .Lmul_mulsi3: 868 1.1 mrg leaf_entry sp, 16 869 1.1 mrg .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 870 1.1 mrg movi \dst, 0 871 1.1 mrg 1: add \tmp1, \src2, \dst 872 1.1 mrg extui \tmp2, \src1, 0, 1 873 1.1 mrg movnez \dst, \tmp1, \tmp2 874 1.1 mrg 875 1.1 mrg do_addx2 \tmp1, \src2, \dst, \tmp1 876 1.1 mrg extui \tmp2, \src1, 1, 1 877 1.1 mrg movnez \dst, \tmp1, \tmp2 878 1.1 mrg 879 1.1 mrg do_addx4 \tmp1, \src2, \dst, \tmp1 880 1.1 mrg extui \tmp2, \src1, 2, 1 881 1.1 mrg movnez \dst, \tmp1, \tmp2 882 1.1 mrg 883 1.1 mrg do_addx8 \tmp1, \src2, \dst, \tmp1 884 1.1 mrg extui \tmp2, \src1, 3, 1 885 1.1 mrg movnez \dst, \tmp1, \tmp2 886 1.1 mrg 887 1.1 mrg srli \src1, \src1, 4 888 1.1 mrg slli \src2, \src2, 4 889 1.1 mrg bnez \src1, 1b 890 1.1 mrg .endm 891 1.1 mrg #if __XTENSA_CALL0_ABI__ 892 1.1 mrg mul_mulsi3_body a12, a13, a14, a15, a8 893 1.1 mrg #else 894 1.1 mrg /* The result will be written into a2, so save that argument in a4. */ 895 1.1 mrg mov a4, a2 896 1.1 mrg mul_mulsi3_body a2, a4, a3, a5, a6 897 1.1 mrg #endif 898 1.1 mrg leaf_return 899 1.1 mrg #endif /* XCHAL_NO_MUL */ 900 1.1 mrg #endif /* L_mulsf3 */ 901 1.1 mrg 902 1.1 mrg #ifdef L_divsf3 903 1.1 mrg 904 1.1 mrg /* Division */ 905 1.1.1.6 mrg 906 1.1.1.6 mrg #if XCHAL_HAVE_FP_DIV 907 1.1.1.6 mrg 908 1.1.1.6 mrg .align 4 909 1.1.1.6 mrg .global __divsf3 910 1.1.1.6 mrg .type __divsf3, @function 911 1.1.1.6 mrg __divsf3: 912 1.1.1.6 mrg leaf_entry sp, 16 913 1.1.1.6 mrg 914 1.1.1.6 mrg wfr f1, a2 /* dividend */ 915 1.1.1.6 mrg wfr f2, a3 /* divisor */ 916 1.1.1.6 mrg 917 1.1.1.6 mrg div0.s f3, f2 918 1.1.1.6 mrg nexp01.s f4, f2 919 1.1.1.6 mrg const.s f5, 1 920 1.1.1.6 mrg maddn.s f5, f4, f3 921 1.1.1.6 mrg mov.s f6, f3 922 1.1.1.6 mrg mov.s f7, f2 923 1.1.1.6 mrg nexp01.s f2, f1 924 1.1.1.6 mrg maddn.s f6, f5, f6 925 1.1.1.6 mrg const.s f5, 1 926 1.1.1.6 mrg const.s f0, 0 927 1.1.1.6 mrg neg.s f8, f2 928 1.1.1.6 mrg maddn.s f5, f4, f6 929 1.1.1.6 mrg maddn.s f0, f8, f3 930 1.1.1.6 mrg mkdadj.s f7, f1 931 1.1.1.6 mrg maddn.s f6, f5, f6 932 1.1.1.6 mrg maddn.s f8, f4, f0 933 1.1.1.6 mrg const.s f3, 1 934 1.1.1.6 mrg maddn.s f3, f4, f6 935 1.1.1.6 mrg maddn.s f0, f8, f6 936 1.1.1.6 mrg neg.s f2, f2 937 1.1.1.6 mrg maddn.s f6, f3, f6 938 1.1.1.6 mrg maddn.s f2, f4, f0 939 1.1.1.6 mrg addexpm.s f0, f7 940 1.1.1.6 mrg addexp.s f6, f7 941 1.1.1.6 mrg divn.s f0, f2, f6 942 1.1.1.6 mrg 943 1.1.1.6 mrg rfr a2, f0 944 1.1.1.6 mrg 945 1.1.1.6 mrg leaf_return 946 1.1.1.6 mrg 947 1.1.1.6 mrg #else 948 1.1.1.6 mrg 949 1.1.1.6 mrg .literal_position 950 1.1 mrg __divsf3_aux: 951 1.1 mrg 952 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 953 1.1 mrg (This code is placed before the start of the function just to 954 1.1 mrg keep it in range of the limited branch displacements.) */ 955 1.1 mrg 956 1.1 mrg .Ldiv_yexpzero: 957 1.1 mrg /* Clear the sign bit of y. */ 958 1.1 mrg slli a3, a3, 1 959 1.1 mrg srli a3, a3, 1 960 1.1 mrg 961 1.1 mrg /* Check for division by zero. */ 962 1.1 mrg beqz a3, .Ldiv_yzero 963 1.1 mrg 964 1.1 mrg /* Normalize y. Adjust the exponent in a9. */ 965 1.1 mrg do_nsau a10, a3, a4, a5 966 1.1 mrg addi a10, a10, -8 967 1.1 mrg ssl a10 968 1.1 mrg sll a3, a3 969 1.1 mrg movi a9, 1 970 1.1 mrg sub a9, a9, a10 971 1.1 mrg j .Ldiv_ynormalized 972 1.1 mrg 973 1.1 mrg .Ldiv_yzero: 974 1.1 mrg /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ 975 1.1 mrg slli a4, a2, 1 976 1.1 mrg srli a4, a4, 1 977 1.1 mrg srli a2, a7, 31 978 1.1 mrg slli a2, a2, 31 979 1.1 mrg or a2, a2, a6 980 1.1 mrg bnez a4, 1f 981 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */ 982 1.1 mrg or a2, a2, a4 983 1.1 mrg 1: leaf_return 984 1.1 mrg 985 1.1 mrg .Ldiv_xexpzero: 986 1.1 mrg /* Clear the sign bit of x. */ 987 1.1 mrg slli a2, a2, 1 988 1.1 mrg srli a2, a2, 1 989 1.1 mrg 990 1.1 mrg /* If x is zero, return zero. */ 991 1.1 mrg beqz a2, .Ldiv_return_zero 992 1.1 mrg 993 1.1 mrg /* Normalize x. Adjust the exponent in a8. */ 994 1.1 mrg do_nsau a10, a2, a4, a5 995 1.1 mrg addi a10, a10, -8 996 1.1 mrg ssl a10 997 1.1 mrg sll a2, a2 998 1.1 mrg movi a8, 1 999 1.1 mrg sub a8, a8, a10 1000 1.1 mrg j .Ldiv_xnormalized 1001 1.1 mrg 1002 1.1 mrg .Ldiv_return_zero: 1003 1.1 mrg /* Return zero with the appropriate sign bit. */ 1004 1.1 mrg srli a2, a7, 31 1005 1.1 mrg slli a2, a2, 31 1006 1.1 mrg leaf_return 1007 1.1 mrg 1008 1.1 mrg .Ldiv_xnan_or_inf: 1009 1.1 mrg /* Set the sign bit of the result. */ 1010 1.1 mrg srli a7, a3, 31 1011 1.1 mrg slli a7, a7, 31 1012 1.1 mrg xor a2, a2, a7 1013 1.1 mrg /* If y is NaN or Inf, return NaN. */ 1014 1.1.1.5 mrg ball a3, a6, .Ldiv_return_nan 1015 1.1.1.5 mrg slli a7, a2, 9 1016 1.1.1.5 mrg bnez a7, .Ldiv_return_nan 1017 1.1.1.5 mrg leaf_return 1018 1.1 mrg 1019 1.1 mrg .Ldiv_ynan_or_inf: 1020 1.1 mrg /* If y is Infinity, return zero. */ 1021 1.1 mrg slli a8, a3, 9 1022 1.1 mrg beqz a8, .Ldiv_return_zero 1023 1.1 mrg /* y is NaN; return it. */ 1024 1.1 mrg mov a2, a3 1025 1.1.1.5 mrg 1026 1.1.1.5 mrg .Ldiv_return_nan: 1027 1.1.1.5 mrg movi a4, 0x400000 /* make it a quiet NaN */ 1028 1.1.1.5 mrg or a2, a2, a4 1029 1.1 mrg leaf_return 1030 1.1 mrg 1031 1.1 mrg .align 4 1032 1.1 mrg .global __divsf3 1033 1.1 mrg .type __divsf3, @function 1034 1.1 mrg __divsf3: 1035 1.1 mrg leaf_entry sp, 16 1036 1.1 mrg movi a6, 0x7f800000 1037 1.1 mrg 1038 1.1 mrg /* Get the sign of the result. */ 1039 1.1 mrg xor a7, a2, a3 1040 1.1 mrg 1041 1.1 mrg /* Check for NaN and infinity. */ 1042 1.1 mrg ball a2, a6, .Ldiv_xnan_or_inf 1043 1.1 mrg ball a3, a6, .Ldiv_ynan_or_inf 1044 1.1 mrg 1045 1.1 mrg /* Extract the exponents. */ 1046 1.1 mrg extui a8, a2, 23, 8 1047 1.1 mrg extui a9, a3, 23, 8 1048 1.1 mrg 1049 1.1 mrg beqz a9, .Ldiv_yexpzero 1050 1.1 mrg .Ldiv_ynormalized: 1051 1.1 mrg beqz a8, .Ldiv_xexpzero 1052 1.1 mrg .Ldiv_xnormalized: 1053 1.1 mrg 1054 1.1 mrg /* Subtract the exponents. */ 1055 1.1 mrg sub a8, a8, a9 1056 1.1 mrg 1057 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */ 1058 1.1 mrg movi a10, 0xffffff 1059 1.1 mrg or a2, a2, a6 1060 1.1 mrg and a2, a2, a10 1061 1.1 mrg or a3, a3, a6 1062 1.1 mrg and a3, a3, a10 1063 1.1 mrg 1064 1.1 mrg /* The first digit of the mantissa division must be a one. 1065 1.1 mrg Shift x (and adjust the exponent) as needed to make this true. */ 1066 1.1 mrg bltu a3, a2, 1f 1067 1.1 mrg slli a2, a2, 1 1068 1.1 mrg addi a8, a8, -1 1069 1.1 mrg 1: 1070 1.1 mrg /* Do the first subtraction and shift. */ 1071 1.1 mrg sub a2, a2, a3 1072 1.1 mrg slli a2, a2, 1 1073 1.1 mrg 1074 1.1 mrg /* Put the quotient into a10. */ 1075 1.1 mrg movi a10, 1 1076 1.1 mrg 1077 1.1 mrg /* Divide one bit at a time for 23 bits. */ 1078 1.1 mrg movi a9, 23 1079 1.1 mrg #if XCHAL_HAVE_LOOPS 1080 1.1 mrg loop a9, .Ldiv_loopend 1081 1.1 mrg #endif 1082 1.1 mrg .Ldiv_loop: 1083 1.1 mrg /* Shift the quotient << 1. */ 1084 1.1 mrg slli a10, a10, 1 1085 1.1 mrg 1086 1.1 mrg /* Is this digit a 0 or 1? */ 1087 1.1 mrg bltu a2, a3, 1f 1088 1.1 mrg 1089 1.1 mrg /* Output a 1 and subtract. */ 1090 1.1 mrg addi a10, a10, 1 1091 1.1 mrg sub a2, a2, a3 1092 1.1 mrg 1093 1.1 mrg /* Shift the dividend << 1. */ 1094 1.1 mrg 1: slli a2, a2, 1 1095 1.1 mrg 1096 1.1 mrg #if !XCHAL_HAVE_LOOPS 1097 1.1 mrg addi a9, a9, -1 1098 1.1 mrg bnez a9, .Ldiv_loop 1099 1.1 mrg #endif 1100 1.1 mrg .Ldiv_loopend: 1101 1.1 mrg 1102 1.1 mrg /* Add the exponent bias (less one to account for the explicit "1.0" 1103 1.1 mrg of the mantissa that will be added to the exponent in the final 1104 1.1 mrg result). */ 1105 1.1 mrg addi a8, a8, 0x7e 1106 1.1 mrg 1107 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the 1108 1.1 mrg final exponent, so values in the range 0..fd are OK here. */ 1109 1.1 mrg movi a4, 0xfe 1110 1.1 mrg bgeu a8, a4, .Ldiv_overflow 1111 1.1 mrg 1112 1.1 mrg .Ldiv_round: 1113 1.1 mrg /* Round. The remainder (<< 1) is in a2. */ 1114 1.1 mrg bltu a2, a3, .Ldiv_rounded 1115 1.1 mrg addi a10, a10, 1 1116 1.1 mrg beq a2, a3, .Ldiv_exactlyhalf 1117 1.1 mrg 1118 1.1 mrg .Ldiv_rounded: 1119 1.1 mrg /* Add the exponent to the mantissa. */ 1120 1.1 mrg slli a8, a8, 23 1121 1.1 mrg add a2, a10, a8 1122 1.1 mrg 1123 1.1 mrg .Ldiv_addsign: 1124 1.1 mrg /* Add the sign bit. */ 1125 1.1 mrg srli a7, a7, 31 1126 1.1 mrg slli a7, a7, 31 1127 1.1 mrg or a2, a2, a7 1128 1.1 mrg leaf_return 1129 1.1 mrg 1130 1.1 mrg .Ldiv_overflow: 1131 1.1 mrg bltz a8, .Ldiv_underflow 1132 1.1 mrg /* Return +/- Infinity. */ 1133 1.1 mrg addi a8, a4, 1 /* 0xff */ 1134 1.1 mrg slli a2, a8, 23 1135 1.1 mrg j .Ldiv_addsign 1136 1.1 mrg 1137 1.1 mrg .Ldiv_exactlyhalf: 1138 1.1 mrg /* Remainder is exactly half the divisor. Round even. */ 1139 1.1 mrg srli a10, a10, 1 1140 1.1 mrg slli a10, a10, 1 1141 1.1 mrg j .Ldiv_rounded 1142 1.1 mrg 1143 1.1 mrg .Ldiv_underflow: 1144 1.1 mrg /* Create a subnormal value, where the exponent field contains zero, 1145 1.1 mrg but the effective exponent is 1. The value of a8 is one less than 1146 1.1 mrg the actual exponent, so just negate it to get the shift amount. */ 1147 1.1 mrg neg a8, a8 1148 1.1 mrg ssr a8 1149 1.1 mrg bgeui a8, 32, .Ldiv_flush_to_zero 1150 1.1 mrg 1151 1.1 mrg /* Shift a10 right. Any bits that are shifted out of a10 are 1152 1.1 mrg saved in a6 for rounding the result. */ 1153 1.1 mrg sll a6, a10 1154 1.1 mrg srl a10, a10 1155 1.1 mrg 1156 1.1 mrg /* Set the exponent to zero. */ 1157 1.1 mrg movi a8, 0 1158 1.1 mrg 1159 1.1 mrg /* Pack any nonzero remainder (in a2) into a6. */ 1160 1.1 mrg beqz a2, 1f 1161 1.1 mrg movi a9, 1 1162 1.1 mrg or a6, a6, a9 1163 1.1 mrg 1164 1.1 mrg /* Round a10 based on the bits shifted out into a6. */ 1165 1.1 mrg 1: bgez a6, .Ldiv_rounded 1166 1.1 mrg addi a10, a10, 1 1167 1.1 mrg slli a6, a6, 1 1168 1.1 mrg bnez a6, .Ldiv_rounded 1169 1.1 mrg srli a10, a10, 1 1170 1.1 mrg slli a10, a10, 1 1171 1.1 mrg j .Ldiv_rounded 1172 1.1 mrg 1173 1.1 mrg .Ldiv_flush_to_zero: 1174 1.1 mrg /* Return zero with the appropriate sign bit. */ 1175 1.1 mrg srli a2, a7, 31 1176 1.1 mrg slli a2, a2, 31 1177 1.1 mrg leaf_return 1178 1.1 mrg 1179 1.1.1.6 mrg #endif /* XCHAL_HAVE_FP_DIV */ 1180 1.1.1.6 mrg 1181 1.1 mrg #endif /* L_divsf3 */ 1182 1.1 mrg 1183 1.1 mrg #ifdef L_cmpsf2 1184 1.1 mrg 1185 1.1 mrg /* Equal and Not Equal */ 1186 1.1 mrg 1187 1.1 mrg .align 4 1188 1.1 mrg .global __eqsf2 1189 1.1 mrg .global __nesf2 1190 1.1 mrg .set __nesf2, __eqsf2 1191 1.1 mrg .type __eqsf2, @function 1192 1.1 mrg __eqsf2: 1193 1.1 mrg leaf_entry sp, 16 1194 1.1 mrg bne a2, a3, 4f 1195 1.1 mrg 1196 1.1 mrg /* The values are equal but NaN != NaN. Check the exponent. */ 1197 1.1 mrg movi a6, 0x7f800000 1198 1.1 mrg ball a2, a6, 3f 1199 1.1 mrg 1200 1.1 mrg /* Equal. */ 1201 1.1 mrg movi a2, 0 1202 1.1 mrg leaf_return 1203 1.1 mrg 1204 1.1 mrg /* Not equal. */ 1205 1.1 mrg 2: movi a2, 1 1206 1.1 mrg leaf_return 1207 1.1 mrg 1208 1.1 mrg /* Check if the mantissas are nonzero. */ 1209 1.1 mrg 3: slli a7, a2, 9 1210 1.1 mrg j 5f 1211 1.1 mrg 1212 1.1 mrg /* Check if x and y are zero with different signs. */ 1213 1.1 mrg 4: or a7, a2, a3 1214 1.1 mrg slli a7, a7, 1 1215 1.1 mrg 1216 1.1 mrg /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa 1217 1.1 mrg or x when exponent(x) = 0x7f8 and x == y. */ 1218 1.1 mrg 5: movi a2, 0 1219 1.1 mrg movi a3, 1 1220 1.1 mrg movnez a2, a3, a7 1221 1.1 mrg leaf_return 1222 1.1 mrg 1223 1.1 mrg 1224 1.1 mrg /* Greater Than */ 1225 1.1 mrg 1226 1.1 mrg .align 4 1227 1.1 mrg .global __gtsf2 1228 1.1 mrg .type __gtsf2, @function 1229 1.1 mrg __gtsf2: 1230 1.1 mrg leaf_entry sp, 16 1231 1.1 mrg movi a6, 0x7f800000 1232 1.1 mrg ball a2, a6, 2f 1233 1.1 mrg 1: bnall a3, a6, .Lle_cmp 1234 1.1 mrg 1235 1.1 mrg /* Check if y is a NaN. */ 1236 1.1 mrg slli a7, a3, 9 1237 1.1 mrg beqz a7, .Lle_cmp 1238 1.1 mrg movi a2, 0 1239 1.1 mrg leaf_return 1240 1.1 mrg 1241 1.1 mrg /* Check if x is a NaN. */ 1242 1.1 mrg 2: slli a7, a2, 9 1243 1.1 mrg beqz a7, 1b 1244 1.1 mrg movi a2, 0 1245 1.1 mrg leaf_return 1246 1.1 mrg 1247 1.1 mrg 1248 1.1 mrg /* Less Than or Equal */ 1249 1.1 mrg 1250 1.1 mrg .align 4 1251 1.1 mrg .global __lesf2 1252 1.1 mrg .type __lesf2, @function 1253 1.1 mrg __lesf2: 1254 1.1 mrg leaf_entry sp, 16 1255 1.1 mrg movi a6, 0x7f800000 1256 1.1 mrg ball a2, a6, 2f 1257 1.1 mrg 1: bnall a3, a6, .Lle_cmp 1258 1.1 mrg 1259 1.1 mrg /* Check if y is a NaN. */ 1260 1.1 mrg slli a7, a3, 9 1261 1.1 mrg beqz a7, .Lle_cmp 1262 1.1 mrg movi a2, 1 1263 1.1 mrg leaf_return 1264 1.1 mrg 1265 1.1 mrg /* Check if x is a NaN. */ 1266 1.1 mrg 2: slli a7, a2, 9 1267 1.1 mrg beqz a7, 1b 1268 1.1 mrg movi a2, 1 1269 1.1 mrg leaf_return 1270 1.1 mrg 1271 1.1 mrg .Lle_cmp: 1272 1.1 mrg /* Check if x and y have different signs. */ 1273 1.1 mrg xor a7, a2, a3 1274 1.1 mrg bltz a7, .Lle_diff_signs 1275 1.1 mrg 1276 1.1 mrg /* Check if x is negative. */ 1277 1.1 mrg bltz a2, .Lle_xneg 1278 1.1 mrg 1279 1.1 mrg /* Check if x <= y. */ 1280 1.1 mrg bltu a3, a2, 5f 1281 1.1 mrg 4: movi a2, 0 1282 1.1 mrg leaf_return 1283 1.1 mrg 1284 1.1 mrg .Lle_xneg: 1285 1.1 mrg /* Check if y <= x. */ 1286 1.1 mrg bgeu a2, a3, 4b 1287 1.1 mrg 5: movi a2, 1 1288 1.1 mrg leaf_return 1289 1.1 mrg 1290 1.1 mrg .Lle_diff_signs: 1291 1.1 mrg bltz a2, 4b 1292 1.1 mrg 1293 1.1 mrg /* Check if both x and y are zero. */ 1294 1.1 mrg or a7, a2, a3 1295 1.1 mrg slli a7, a7, 1 1296 1.1 mrg movi a2, 1 1297 1.1 mrg movi a3, 0 1298 1.1 mrg moveqz a2, a3, a7 1299 1.1 mrg leaf_return 1300 1.1 mrg 1301 1.1 mrg 1302 1.1 mrg /* Greater Than or Equal */ 1303 1.1 mrg 1304 1.1 mrg .align 4 1305 1.1 mrg .global __gesf2 1306 1.1 mrg .type __gesf2, @function 1307 1.1 mrg __gesf2: 1308 1.1 mrg leaf_entry sp, 16 1309 1.1 mrg movi a6, 0x7f800000 1310 1.1 mrg ball a2, a6, 2f 1311 1.1 mrg 1: bnall a3, a6, .Llt_cmp 1312 1.1 mrg 1313 1.1 mrg /* Check if y is a NaN. */ 1314 1.1 mrg slli a7, a3, 9 1315 1.1 mrg beqz a7, .Llt_cmp 1316 1.1 mrg movi a2, -1 1317 1.1 mrg leaf_return 1318 1.1 mrg 1319 1.1 mrg /* Check if x is a NaN. */ 1320 1.1 mrg 2: slli a7, a2, 9 1321 1.1 mrg beqz a7, 1b 1322 1.1 mrg movi a2, -1 1323 1.1 mrg leaf_return 1324 1.1 mrg 1325 1.1 mrg 1326 1.1 mrg /* Less Than */ 1327 1.1 mrg 1328 1.1 mrg .align 4 1329 1.1 mrg .global __ltsf2 1330 1.1 mrg .type __ltsf2, @function 1331 1.1 mrg __ltsf2: 1332 1.1 mrg leaf_entry sp, 16 1333 1.1 mrg movi a6, 0x7f800000 1334 1.1 mrg ball a2, a6, 2f 1335 1.1 mrg 1: bnall a3, a6, .Llt_cmp 1336 1.1 mrg 1337 1.1 mrg /* Check if y is a NaN. */ 1338 1.1 mrg slli a7, a3, 9 1339 1.1 mrg beqz a7, .Llt_cmp 1340 1.1 mrg movi a2, 0 1341 1.1 mrg leaf_return 1342 1.1 mrg 1343 1.1 mrg /* Check if x is a NaN. */ 1344 1.1 mrg 2: slli a7, a2, 9 1345 1.1 mrg beqz a7, 1b 1346 1.1 mrg movi a2, 0 1347 1.1 mrg leaf_return 1348 1.1 mrg 1349 1.1 mrg .Llt_cmp: 1350 1.1 mrg /* Check if x and y have different signs. */ 1351 1.1 mrg xor a7, a2, a3 1352 1.1 mrg bltz a7, .Llt_diff_signs 1353 1.1 mrg 1354 1.1 mrg /* Check if x is negative. */ 1355 1.1 mrg bltz a2, .Llt_xneg 1356 1.1 mrg 1357 1.1 mrg /* Check if x < y. */ 1358 1.1 mrg bgeu a2, a3, 5f 1359 1.1 mrg 4: movi a2, -1 1360 1.1 mrg leaf_return 1361 1.1 mrg 1362 1.1 mrg .Llt_xneg: 1363 1.1 mrg /* Check if y < x. */ 1364 1.1 mrg bltu a3, a2, 4b 1365 1.1 mrg 5: movi a2, 0 1366 1.1 mrg leaf_return 1367 1.1 mrg 1368 1.1 mrg .Llt_diff_signs: 1369 1.1 mrg bgez a2, 5b 1370 1.1 mrg 1371 1.1 mrg /* Check if both x and y are nonzero. */ 1372 1.1 mrg or a7, a2, a3 1373 1.1 mrg slli a7, a7, 1 1374 1.1 mrg movi a2, 0 1375 1.1 mrg movi a3, -1 1376 1.1 mrg movnez a2, a3, a7 1377 1.1 mrg leaf_return 1378 1.1 mrg 1379 1.1 mrg 1380 1.1 mrg /* Unordered */ 1381 1.1 mrg 1382 1.1 mrg .align 4 1383 1.1 mrg .global __unordsf2 1384 1.1 mrg .type __unordsf2, @function 1385 1.1 mrg __unordsf2: 1386 1.1 mrg leaf_entry sp, 16 1387 1.1 mrg movi a6, 0x7f800000 1388 1.1 mrg ball a2, a6, 3f 1389 1.1 mrg 1: ball a3, a6, 4f 1390 1.1 mrg 2: movi a2, 0 1391 1.1 mrg leaf_return 1392 1.1 mrg 1393 1.1 mrg 3: slli a7, a2, 9 1394 1.1 mrg beqz a7, 1b 1395 1.1 mrg movi a2, 1 1396 1.1 mrg leaf_return 1397 1.1 mrg 1398 1.1 mrg 4: slli a7, a3, 9 1399 1.1 mrg beqz a7, 2b 1400 1.1 mrg movi a2, 1 1401 1.1 mrg leaf_return 1402 1.1 mrg 1403 1.1 mrg #endif /* L_cmpsf2 */ 1404 1.1 mrg 1405 1.1 mrg #ifdef L_fixsfsi 1406 1.1 mrg 1407 1.1 mrg .align 4 1408 1.1 mrg .global __fixsfsi 1409 1.1 mrg .type __fixsfsi, @function 1410 1.1 mrg __fixsfsi: 1411 1.1 mrg leaf_entry sp, 16 1412 1.1 mrg 1413 1.1 mrg /* Check for NaN and Infinity. */ 1414 1.1 mrg movi a6, 0x7f800000 1415 1.1 mrg ball a2, a6, .Lfixsfsi_nan_or_inf 1416 1.1 mrg 1417 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ 1418 1.1 mrg extui a4, a2, 23, 8 1419 1.1 mrg addi a4, a4, -0x7e 1420 1.1 mrg bgei a4, 32, .Lfixsfsi_maxint 1421 1.1 mrg blti a4, 1, .Lfixsfsi_zero 1422 1.1 mrg 1423 1.1 mrg /* Add explicit "1.0" and shift << 8. */ 1424 1.1 mrg or a7, a2, a6 1425 1.1 mrg slli a5, a7, 8 1426 1.1 mrg 1427 1.1 mrg /* Shift back to the right, based on the exponent. */ 1428 1.1 mrg ssl a4 /* shift by 32 - a4 */ 1429 1.1 mrg srl a5, a5 1430 1.1 mrg 1431 1.1 mrg /* Negate the result if sign != 0. */ 1432 1.1 mrg neg a2, a5 1433 1.1 mrg movgez a2, a5, a7 1434 1.1 mrg leaf_return 1435 1.1 mrg 1436 1.1 mrg .Lfixsfsi_nan_or_inf: 1437 1.1 mrg /* Handle Infinity and NaN. */ 1438 1.1 mrg slli a4, a2, 9 1439 1.1 mrg beqz a4, .Lfixsfsi_maxint 1440 1.1 mrg 1441 1.1 mrg /* Translate NaN to +maxint. */ 1442 1.1 mrg movi a2, 0 1443 1.1 mrg 1444 1.1 mrg .Lfixsfsi_maxint: 1445 1.1 mrg slli a4, a6, 8 /* 0x80000000 */ 1446 1.1 mrg addi a5, a4, -1 /* 0x7fffffff */ 1447 1.1 mrg movgez a4, a5, a2 1448 1.1 mrg mov a2, a4 1449 1.1 mrg leaf_return 1450 1.1 mrg 1451 1.1 mrg .Lfixsfsi_zero: 1452 1.1 mrg movi a2, 0 1453 1.1 mrg leaf_return 1454 1.1 mrg 1455 1.1 mrg #endif /* L_fixsfsi */ 1456 1.1 mrg 1457 1.1 mrg #ifdef L_fixsfdi 1458 1.1 mrg 1459 1.1 mrg .align 4 1460 1.1 mrg .global __fixsfdi 1461 1.1 mrg .type __fixsfdi, @function 1462 1.1 mrg __fixsfdi: 1463 1.1 mrg leaf_entry sp, 16 1464 1.1 mrg 1465 1.1 mrg /* Check for NaN and Infinity. */ 1466 1.1 mrg movi a6, 0x7f800000 1467 1.1 mrg ball a2, a6, .Lfixsfdi_nan_or_inf 1468 1.1 mrg 1469 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ 1470 1.1 mrg extui a4, a2, 23, 8 1471 1.1 mrg addi a4, a4, -0x7e 1472 1.1 mrg bgei a4, 64, .Lfixsfdi_maxint 1473 1.1 mrg blti a4, 1, .Lfixsfdi_zero 1474 1.1 mrg 1475 1.1 mrg /* Add explicit "1.0" and shift << 8. */ 1476 1.1 mrg or a7, a2, a6 1477 1.1 mrg slli xh, a7, 8 1478 1.1 mrg 1479 1.1 mrg /* Shift back to the right, based on the exponent. */ 1480 1.1 mrg ssl a4 /* shift by 64 - a4 */ 1481 1.1 mrg bgei a4, 32, .Lfixsfdi_smallshift 1482 1.1 mrg srl xl, xh 1483 1.1 mrg movi xh, 0 1484 1.1 mrg 1485 1.1 mrg .Lfixsfdi_shifted: 1486 1.1 mrg /* Negate the result if sign != 0. */ 1487 1.1 mrg bgez a7, 1f 1488 1.1 mrg neg xl, xl 1489 1.1 mrg neg xh, xh 1490 1.1 mrg beqz xl, 1f 1491 1.1 mrg addi xh, xh, -1 1492 1.1 mrg 1: leaf_return 1493 1.1 mrg 1494 1.1 mrg .Lfixsfdi_smallshift: 1495 1.1 mrg movi xl, 0 1496 1.1 mrg sll xl, xh 1497 1.1 mrg srl xh, xh 1498 1.1 mrg j .Lfixsfdi_shifted 1499 1.1 mrg 1500 1.1 mrg .Lfixsfdi_nan_or_inf: 1501 1.1 mrg /* Handle Infinity and NaN. */ 1502 1.1 mrg slli a4, a2, 9 1503 1.1 mrg beqz a4, .Lfixsfdi_maxint 1504 1.1 mrg 1505 1.1 mrg /* Translate NaN to +maxint. */ 1506 1.1 mrg movi a2, 0 1507 1.1 mrg 1508 1.1 mrg .Lfixsfdi_maxint: 1509 1.1 mrg slli a7, a6, 8 /* 0x80000000 */ 1510 1.1 mrg bgez a2, 1f 1511 1.1 mrg mov xh, a7 1512 1.1 mrg movi xl, 0 1513 1.1 mrg leaf_return 1514 1.1 mrg 1515 1.1 mrg 1: addi xh, a7, -1 /* 0x7fffffff */ 1516 1.1 mrg movi xl, -1 1517 1.1 mrg leaf_return 1518 1.1 mrg 1519 1.1 mrg .Lfixsfdi_zero: 1520 1.1 mrg movi xh, 0 1521 1.1 mrg movi xl, 0 1522 1.1 mrg leaf_return 1523 1.1 mrg 1524 1.1 mrg #endif /* L_fixsfdi */ 1525 1.1 mrg 1526 1.1 mrg #ifdef L_fixunssfsi 1527 1.1 mrg 1528 1.1 mrg .align 4 1529 1.1 mrg .global __fixunssfsi 1530 1.1 mrg .type __fixunssfsi, @function 1531 1.1 mrg __fixunssfsi: 1532 1.1 mrg leaf_entry sp, 16 1533 1.1 mrg 1534 1.1 mrg /* Check for NaN and Infinity. */ 1535 1.1 mrg movi a6, 0x7f800000 1536 1.1 mrg ball a2, a6, .Lfixunssfsi_nan_or_inf 1537 1.1 mrg 1538 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ 1539 1.1 mrg extui a4, a2, 23, 8 1540 1.1 mrg addi a4, a4, -0x7f 1541 1.1 mrg bgei a4, 32, .Lfixunssfsi_maxint 1542 1.1 mrg bltz a4, .Lfixunssfsi_zero 1543 1.1 mrg 1544 1.1 mrg /* Add explicit "1.0" and shift << 8. */ 1545 1.1 mrg or a7, a2, a6 1546 1.1 mrg slli a5, a7, 8 1547 1.1 mrg 1548 1.1 mrg /* Shift back to the right, based on the exponent. */ 1549 1.1 mrg addi a4, a4, 1 1550 1.1 mrg beqi a4, 32, .Lfixunssfsi_bigexp 1551 1.1 mrg ssl a4 /* shift by 32 - a4 */ 1552 1.1 mrg srl a5, a5 1553 1.1 mrg 1554 1.1 mrg /* Negate the result if sign != 0. */ 1555 1.1 mrg neg a2, a5 1556 1.1 mrg movgez a2, a5, a7 1557 1.1 mrg leaf_return 1558 1.1 mrg 1559 1.1 mrg .Lfixunssfsi_nan_or_inf: 1560 1.1 mrg /* Handle Infinity and NaN. */ 1561 1.1 mrg slli a4, a2, 9 1562 1.1 mrg beqz a4, .Lfixunssfsi_maxint 1563 1.1 mrg 1564 1.1 mrg /* Translate NaN to 0xffffffff. */ 1565 1.1 mrg movi a2, -1 1566 1.1 mrg leaf_return 1567 1.1 mrg 1568 1.1 mrg .Lfixunssfsi_maxint: 1569 1.1 mrg slli a4, a6, 8 /* 0x80000000 */ 1570 1.1 mrg movi a5, -1 /* 0xffffffff */ 1571 1.1 mrg movgez a4, a5, a2 1572 1.1 mrg mov a2, a4 1573 1.1 mrg leaf_return 1574 1.1 mrg 1575 1.1 mrg .Lfixunssfsi_zero: 1576 1.1 mrg movi a2, 0 1577 1.1 mrg leaf_return 1578 1.1 mrg 1579 1.1 mrg .Lfixunssfsi_bigexp: 1580 1.1 mrg /* Handle unsigned maximum exponent case. */ 1581 1.1 mrg bltz a2, 1f 1582 1.1 mrg mov a2, a5 /* no shift needed */ 1583 1.1 mrg leaf_return 1584 1.1 mrg 1585 1.1 mrg /* Return 0x80000000 if negative. */ 1586 1.1 mrg 1: slli a2, a6, 8 1587 1.1 mrg leaf_return 1588 1.1 mrg 1589 1.1 mrg #endif /* L_fixunssfsi */ 1590 1.1 mrg 1591 1.1 mrg #ifdef L_fixunssfdi 1592 1.1 mrg 1593 1.1 mrg .align 4 1594 1.1 mrg .global __fixunssfdi 1595 1.1 mrg .type __fixunssfdi, @function 1596 1.1 mrg __fixunssfdi: 1597 1.1 mrg leaf_entry sp, 16 1598 1.1 mrg 1599 1.1 mrg /* Check for NaN and Infinity. */ 1600 1.1 mrg movi a6, 0x7f800000 1601 1.1 mrg ball a2, a6, .Lfixunssfdi_nan_or_inf 1602 1.1 mrg 1603 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ 1604 1.1 mrg extui a4, a2, 23, 8 1605 1.1 mrg addi a4, a4, -0x7f 1606 1.1 mrg bgei a4, 64, .Lfixunssfdi_maxint 1607 1.1 mrg bltz a4, .Lfixunssfdi_zero 1608 1.1 mrg 1609 1.1 mrg /* Add explicit "1.0" and shift << 8. */ 1610 1.1 mrg or a7, a2, a6 1611 1.1 mrg slli xh, a7, 8 1612 1.1 mrg 1613 1.1 mrg /* Shift back to the right, based on the exponent. */ 1614 1.1 mrg addi a4, a4, 1 1615 1.1 mrg beqi a4, 64, .Lfixunssfdi_bigexp 1616 1.1 mrg ssl a4 /* shift by 64 - a4 */ 1617 1.1 mrg bgei a4, 32, .Lfixunssfdi_smallshift 1618 1.1 mrg srl xl, xh 1619 1.1 mrg movi xh, 0 1620 1.1 mrg 1621 1.1 mrg .Lfixunssfdi_shifted: 1622 1.1 mrg /* Negate the result if sign != 0. */ 1623 1.1 mrg bgez a7, 1f 1624 1.1 mrg neg xl, xl 1625 1.1 mrg neg xh, xh 1626 1.1 mrg beqz xl, 1f 1627 1.1 mrg addi xh, xh, -1 1628 1.1 mrg 1: leaf_return 1629 1.1 mrg 1630 1.1 mrg .Lfixunssfdi_smallshift: 1631 1.1 mrg movi xl, 0 1632 1.1 mrg src xl, xh, xl 1633 1.1 mrg srl xh, xh 1634 1.1 mrg j .Lfixunssfdi_shifted 1635 1.1 mrg 1636 1.1 mrg .Lfixunssfdi_nan_or_inf: 1637 1.1 mrg /* Handle Infinity and NaN. */ 1638 1.1 mrg slli a4, a2, 9 1639 1.1 mrg beqz a4, .Lfixunssfdi_maxint 1640 1.1 mrg 1641 1.1 mrg /* Translate NaN to 0xffffffff.... */ 1642 1.1 mrg 1: movi xh, -1 1643 1.1 mrg movi xl, -1 1644 1.1 mrg leaf_return 1645 1.1 mrg 1646 1.1 mrg .Lfixunssfdi_maxint: 1647 1.1 mrg bgez a2, 1b 1648 1.1 mrg 2: slli xh, a6, 8 /* 0x80000000 */ 1649 1.1 mrg movi xl, 0 1650 1.1 mrg leaf_return 1651 1.1 mrg 1652 1.1 mrg .Lfixunssfdi_zero: 1653 1.1 mrg movi xh, 0 1654 1.1 mrg movi xl, 0 1655 1.1 mrg leaf_return 1656 1.1 mrg 1657 1.1 mrg .Lfixunssfdi_bigexp: 1658 1.1 mrg /* Handle unsigned maximum exponent case. */ 1659 1.1 mrg bltz a7, 2b 1660 1.1 mrg movi xl, 0 1661 1.1 mrg leaf_return /* no shift needed */ 1662 1.1 mrg 1663 1.1 mrg #endif /* L_fixunssfdi */ 1664 1.1 mrg 1665 1.1 mrg #ifdef L_floatsisf 1666 1.1 mrg 1667 1.1 mrg .align 4 1668 1.1 mrg .global __floatunsisf 1669 1.1 mrg .type __floatunsisf, @function 1670 1.1 mrg __floatunsisf: 1671 1.1 mrg leaf_entry sp, 16 1672 1.1 mrg beqz a2, .Lfloatsisf_return 1673 1.1 mrg 1674 1.1 mrg /* Set the sign to zero and jump to the floatsisf code. */ 1675 1.1 mrg movi a7, 0 1676 1.1 mrg j .Lfloatsisf_normalize 1677 1.1 mrg 1678 1.1 mrg .align 4 1679 1.1 mrg .global __floatsisf 1680 1.1 mrg .type __floatsisf, @function 1681 1.1 mrg __floatsisf: 1682 1.1 mrg leaf_entry sp, 16 1683 1.1 mrg 1684 1.1 mrg /* Check for zero. */ 1685 1.1 mrg beqz a2, .Lfloatsisf_return 1686 1.1 mrg 1687 1.1 mrg /* Save the sign. */ 1688 1.1 mrg extui a7, a2, 31, 1 1689 1.1 mrg 1690 1.1 mrg /* Get the absolute value. */ 1691 1.1 mrg #if XCHAL_HAVE_ABS 1692 1.1 mrg abs a2, a2 1693 1.1 mrg #else 1694 1.1 mrg neg a4, a2 1695 1.1 mrg movltz a2, a4, a2 1696 1.1 mrg #endif 1697 1.1 mrg 1698 1.1 mrg .Lfloatsisf_normalize: 1699 1.1 mrg /* Normalize with the first 1 bit in the msb. */ 1700 1.1 mrg do_nsau a4, a2, a5, a6 1701 1.1 mrg ssl a4 1702 1.1 mrg sll a5, a2 1703 1.1 mrg 1704 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */ 1705 1.1 mrg srli a2, a5, 8 1706 1.1 mrg slli a6, a5, (32 - 8) 1707 1.1 mrg 1708 1.1 mrg /* Set the exponent. */ 1709 1.1 mrg movi a5, 0x9d /* 0x7e + 31 */ 1710 1.1 mrg sub a5, a5, a4 1711 1.1 mrg slli a5, a5, 23 1712 1.1 mrg add a2, a2, a5 1713 1.1 mrg 1714 1.1 mrg /* Add the sign. */ 1715 1.1 mrg slli a7, a7, 31 1716 1.1 mrg or a2, a2, a7 1717 1.1 mrg 1718 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */ 1719 1.1 mrg bgez a6, .Lfloatsisf_return 1720 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */ 1721 1.1 mrg 1722 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */ 1723 1.1 mrg slli a6, a6, 1 1724 1.1 mrg beqz a6, .Lfloatsisf_exactlyhalf 1725 1.1 mrg 1726 1.1 mrg .Lfloatsisf_return: 1727 1.1 mrg leaf_return 1728 1.1 mrg 1729 1.1 mrg .Lfloatsisf_exactlyhalf: 1730 1.1 mrg /* Round down to the nearest even value. */ 1731 1.1 mrg srli a2, a2, 1 1732 1.1 mrg slli a2, a2, 1 1733 1.1 mrg leaf_return 1734 1.1 mrg 1735 1.1 mrg #endif /* L_floatsisf */ 1736 1.1 mrg 1737 1.1 mrg #ifdef L_floatdisf 1738 1.1 mrg 1739 1.1 mrg .align 4 1740 1.1 mrg .global __floatundisf 1741 1.1 mrg .type __floatundisf, @function 1742 1.1 mrg __floatundisf: 1743 1.1 mrg leaf_entry sp, 16 1744 1.1 mrg 1745 1.1 mrg /* Check for zero. */ 1746 1.1 mrg or a4, xh, xl 1747 1.1 mrg beqz a4, 2f 1748 1.1 mrg 1749 1.1 mrg /* Set the sign to zero and jump to the floatdisf code. */ 1750 1.1 mrg movi a7, 0 1751 1.1 mrg j .Lfloatdisf_normalize 1752 1.1 mrg 1753 1.1 mrg .align 4 1754 1.1 mrg .global __floatdisf 1755 1.1 mrg .type __floatdisf, @function 1756 1.1 mrg __floatdisf: 1757 1.1 mrg leaf_entry sp, 16 1758 1.1 mrg 1759 1.1 mrg /* Check for zero. */ 1760 1.1 mrg or a4, xh, xl 1761 1.1 mrg beqz a4, 2f 1762 1.1 mrg 1763 1.1 mrg /* Save the sign. */ 1764 1.1 mrg extui a7, xh, 31, 1 1765 1.1 mrg 1766 1.1 mrg /* Get the absolute value. */ 1767 1.1 mrg bgez xh, .Lfloatdisf_normalize 1768 1.1 mrg neg xl, xl 1769 1.1 mrg neg xh, xh 1770 1.1 mrg beqz xl, .Lfloatdisf_normalize 1771 1.1 mrg addi xh, xh, -1 1772 1.1 mrg 1773 1.1 mrg .Lfloatdisf_normalize: 1774 1.1 mrg /* Normalize with the first 1 bit in the msb of xh. */ 1775 1.1 mrg beqz xh, .Lfloatdisf_bigshift 1776 1.1 mrg do_nsau a4, xh, a5, a6 1777 1.1 mrg ssl a4 1778 1.1 mrg src xh, xh, xl 1779 1.1 mrg sll xl, xl 1780 1.1 mrg 1781 1.1 mrg .Lfloatdisf_shifted: 1782 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */ 1783 1.1 mrg ssai 8 1784 1.1 mrg sll a5, xl 1785 1.1 mrg src a6, xh, xl 1786 1.1 mrg srl xh, xh 1787 1.1 mrg beqz a5, 1f 1788 1.1 mrg movi a5, 1 1789 1.1 mrg or a6, a6, a5 1790 1.1 mrg 1: 1791 1.1 mrg /* Set the exponent. */ 1792 1.1 mrg movi a5, 0xbd /* 0x7e + 63 */ 1793 1.1 mrg sub a5, a5, a4 1794 1.1 mrg slli a5, a5, 23 1795 1.1 mrg add a2, xh, a5 1796 1.1 mrg 1797 1.1 mrg /* Add the sign. */ 1798 1.1 mrg slli a7, a7, 31 1799 1.1 mrg or a2, a2, a7 1800 1.1 mrg 1801 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */ 1802 1.1 mrg bgez a6, 2f 1803 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */ 1804 1.1 mrg 1805 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */ 1806 1.1 mrg slli a6, a6, 1 1807 1.1 mrg beqz a6, .Lfloatdisf_exactlyhalf 1808 1.1 mrg 2: leaf_return 1809 1.1 mrg 1810 1.1 mrg .Lfloatdisf_bigshift: 1811 1.1 mrg /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ 1812 1.1 mrg do_nsau a4, xl, a5, a6 1813 1.1 mrg ssl a4 1814 1.1 mrg sll xh, xl 1815 1.1 mrg movi xl, 0 1816 1.1 mrg addi a4, a4, 32 1817 1.1 mrg j .Lfloatdisf_shifted 1818 1.1 mrg 1819 1.1 mrg .Lfloatdisf_exactlyhalf: 1820 1.1 mrg /* Round down to the nearest even value. */ 1821 1.1 mrg srli a2, a2, 1 1822 1.1 mrg slli a2, a2, 1 1823 1.1 mrg leaf_return 1824 1.1 mrg 1825 1.1 mrg #endif /* L_floatdisf */ 1826 1.1.1.6 mrg 1827 1.1.1.6 mrg #if XCHAL_HAVE_FP_SQRT 1828 1.1.1.6 mrg #ifdef L_sqrtf 1829 1.1.1.6 mrg /* Square root */ 1830 1.1.1.6 mrg 1831 1.1.1.6 mrg .align 4 1832 1.1.1.6 mrg .global __ieee754_sqrtf 1833 1.1.1.6 mrg .type __ieee754_sqrtf, @function 1834 1.1.1.6 mrg __ieee754_sqrtf: 1835 1.1.1.6 mrg leaf_entry sp, 16 1836 1.1.1.6 mrg 1837 1.1.1.6 mrg wfr f1, a2 1838 1.1.1.6 mrg 1839 1.1.1.6 mrg sqrt0.s f2, f1 1840 1.1.1.6 mrg const.s f3, 0 1841 1.1.1.6 mrg maddn.s f3, f2, f2 1842 1.1.1.6 mrg nexp01.s f4, f1 1843 1.1.1.6 mrg const.s f0, 3 1844 1.1.1.6 mrg addexp.s f4, f0 1845 1.1.1.6 mrg maddn.s f0, f3, f4 1846 1.1.1.6 mrg nexp01.s f3, f1 1847 1.1.1.6 mrg neg.s f5, f3 1848 1.1.1.6 mrg maddn.s f2, f0, f2 1849 1.1.1.6 mrg const.s f0, 0 1850 1.1.1.6 mrg const.s f6, 0 1851 1.1.1.6 mrg const.s f7, 0 1852 1.1.1.6 mrg maddn.s f0, f5, f2 1853 1.1.1.6 mrg maddn.s f6, f2, f4 1854 1.1.1.6 mrg const.s f4, 3 1855 1.1.1.6 mrg maddn.s f7, f4, f2 1856 1.1.1.6 mrg maddn.s f3, f0, f0 1857 1.1.1.6 mrg maddn.s f4, f6, f2 1858 1.1.1.6 mrg neg.s f2, f7 1859 1.1.1.6 mrg maddn.s f0, f3, f2 1860 1.1.1.6 mrg maddn.s f7, f4, f7 1861 1.1.1.6 mrg mksadj.s f2, f1 1862 1.1.1.6 mrg nexp01.s f1, f1 1863 1.1.1.6 mrg maddn.s f1, f0, f0 1864 1.1.1.6 mrg neg.s f3, f7 1865 1.1.1.6 mrg addexpm.s f0, f2 1866 1.1.1.6 mrg addexp.s f3, f2 1867 1.1.1.6 mrg divn.s f0, f1, f3 1868 1.1.1.6 mrg 1869 1.1.1.6 mrg rfr a2, f0 1870 1.1.1.6 mrg 1871 1.1.1.6 mrg leaf_return 1872 1.1.1.6 mrg 1873 1.1.1.6 mrg #endif /* L_sqrtf */ 1874 1.1.1.6 mrg #endif /* XCHAL_HAVE_FP_SQRT */ 1875 1.1.1.6 mrg 1876 1.1.1.6 mrg #if XCHAL_HAVE_FP_RECIP 1877 1.1.1.6 mrg #ifdef L_recipsf2 1878 1.1.1.6 mrg /* Reciprocal */ 1879 1.1.1.6 mrg 1880 1.1.1.6 mrg .align 4 1881 1.1.1.6 mrg .global __recipsf2 1882 1.1.1.6 mrg .type __recipsf2, @function 1883 1.1.1.6 mrg __recipsf2: 1884 1.1.1.6 mrg leaf_entry sp, 16 1885 1.1.1.6 mrg 1886 1.1.1.6 mrg wfr f1, a2 1887 1.1.1.6 mrg 1888 1.1.1.6 mrg recip0.s f0, f1 1889 1.1.1.6 mrg const.s f2, 1 1890 1.1.1.6 mrg msub.s f2, f1, f0 1891 1.1.1.6 mrg maddn.s f0, f0, f2 1892 1.1.1.6 mrg const.s f2, 1 1893 1.1.1.6 mrg msub.s f2, f1, f0 1894 1.1.1.6 mrg maddn.s f0, f0, f2 1895 1.1.1.6 mrg 1896 1.1.1.6 mrg rfr a2, f0 1897 1.1.1.6 mrg 1898 1.1.1.6 mrg leaf_return 1899 1.1.1.6 mrg 1900 1.1.1.6 mrg #endif /* L_recipsf2 */ 1901 1.1.1.6 mrg #endif /* XCHAL_HAVE_FP_RECIP */ 1902 1.1.1.6 mrg 1903 1.1.1.6 mrg #if XCHAL_HAVE_FP_RSQRT 1904 1.1.1.6 mrg #ifdef L_rsqrtsf2 1905 1.1.1.6 mrg /* Reciprocal square root */ 1906 1.1.1.6 mrg 1907 1.1.1.6 mrg .align 4 1908 1.1.1.6 mrg .global __rsqrtsf2 1909 1.1.1.6 mrg .type __rsqrtsf2, @function 1910 1.1.1.6 mrg __rsqrtsf2: 1911 1.1.1.6 mrg leaf_entry sp, 16 1912 1.1.1.6 mrg 1913 1.1.1.6 mrg wfr f1, a2 1914 1.1.1.6 mrg 1915 1.1.1.6 mrg rsqrt0.s f0, f1 1916 1.1.1.6 mrg mul.s f2, f1, f0 1917 1.1.1.6 mrg const.s f3, 3; 1918 1.1.1.6 mrg mul.s f4, f3, f0 1919 1.1.1.6 mrg const.s f5, 1 1920 1.1.1.6 mrg msub.s f5, f2, f0 1921 1.1.1.6 mrg maddn.s f0, f4, f5 1922 1.1.1.6 mrg mul.s f2, f1, f0 1923 1.1.1.6 mrg mul.s f1, f3, f0 1924 1.1.1.6 mrg const.s f3, 1 1925 1.1.1.6 mrg msub.s f3, f2, f0 1926 1.1.1.6 mrg maddn.s f0, f1, f3 1927 1.1.1.6 mrg 1928 1.1.1.6 mrg rfr a2, f0 1929 1.1.1.6 mrg 1930 1.1.1.6 mrg leaf_return 1931 1.1.1.6 mrg 1932 1.1.1.6 mrg #endif /* L_rsqrtsf2 */ 1933 1.1.1.6 mrg #endif /* XCHAL_HAVE_FP_RSQRT */ 1934