Home | History | Annotate | Line # | Download | only in xtensa
ieee754-sf.S revision 1.1.1.13
      1 /* IEEE-754 single-precision functions for Xtensa
      2    Copyright (C) 2006-2024 Free Software Foundation, Inc.
      3    Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
      4 
      5    This file is part of GCC.
      6 
      7    GCC is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3, or (at your option)
     10    any later version.
     11 
     12    GCC is distributed in the hope that it will be useful, but WITHOUT
     13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     15    License for more details.
     16 
     17    Under Section 7 of GPL version 3, you are granted additional
     18    permissions described in the GCC Runtime Library Exception, version
     19    3.1, as published by the Free Software Foundation.
     20 
     21    You should have received a copy of the GNU General Public License and
     22    a copy of the GCC Runtime Library Exception along with this program;
     23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24    <http://www.gnu.org/licenses/>.  */
     25 
     26 #ifdef __XTENSA_EB__
     27 #define xh a2
     28 #define xl a3
     29 #define yh a4
     30 #define yl a5
     31 #else
     32 #define xh a3
     33 #define xl a2
     34 #define yh a5
     35 #define yl a4
     36 #endif
     37 
     38 /*  Warning!  The branch displacements for some Xtensa branch instructions
     39     are quite small, and this code has been carefully laid out to keep
     40     branch targets in range.  If you change anything, be sure to check that
     41     the assembler is not relaxing anything to branch over a jump.  */
     42 
     43 #ifdef L_negsf2
     44 
     45 	.align	4
     46 	.global	__negsf2
     47 	.type	__negsf2, @function
     48 __negsf2:
     49 	leaf_entry sp, 16
     50 	movi	a4, 0x80000000
     51 	xor	a2, a2, a4
     52 	leaf_return
     53 
     54 #endif /* L_negsf2 */
     55 
     56 #ifdef L_addsubsf3
     57 
     58 	.literal_position
     59 	/* Addition */
     60 __addsf3_aux:
     61 
     62 	/* Handle NaNs and Infinities.  (This code is placed before the
     63 	   start of the function just to keep it in range of the limited
     64 	   branch displacements.)  */
     65 
     66 .Ladd_xnan_or_inf:
     67 	/* If y is neither Infinity nor NaN, return x.  */
     68 	bnall	a3, a6, .Ladd_return_nan_or_inf
     69 	/* If x is a NaN, return it.  Otherwise, return y.  */
     70 	slli	a7, a2, 9
     71 	bnez	a7, .Ladd_return_nan
     72 
     73 .Ladd_ynan_or_inf:
     74 	/* Return y.  */
     75 	mov	a2, a3
     76 
     77 .Ladd_return_nan_or_inf:
     78 	slli	a7, a2, 9
     79 	bnez	a7, .Ladd_return_nan
     80 	leaf_return
     81 
     82 .Ladd_return_nan:
     83 	movi	a6, 0x400000	/* make it a quiet NaN */
     84 	or	a2, a2, a6
     85 	leaf_return
     86 
     87 .Ladd_opposite_signs:
     88 	/* Operand signs differ.  Do a subtraction.  */
     89 	slli	a7, a6, 8
     90 	xor	a3, a3, a7
     91 	j	.Lsub_same_sign
     92 
     93 	.align	4
     94 	.global	__addsf3
     95 	.type	__addsf3, @function
     96 __addsf3:
     97 	leaf_entry sp, 16
     98 	movi	a6, 0x7f800000
     99 
    100 	/* Check if the two operands have the same sign.  */
    101 	xor	a7, a2, a3
    102 	bltz	a7, .Ladd_opposite_signs
    103 
    104 .Ladd_same_sign:
    105 	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
    106 	ball	a2, a6, .Ladd_xnan_or_inf
    107 	ball	a3, a6, .Ladd_ynan_or_inf
    108 
    109 	/* Compare the exponents.  The smaller operand will be shifted
    110 	   right by the exponent difference and added to the larger
    111 	   one.  */
    112 	extui	a7, a2, 23, 9
    113 	extui	a8, a3, 23, 9
    114 	bltu	a7, a8, .Ladd_shiftx
    115 
    116 .Ladd_shifty:
    117 	/* Check if the smaller (or equal) exponent is zero.  */
    118 	bnone	a3, a6, .Ladd_yexpzero
    119 
    120 	/* Replace y sign/exponent with 0x008.  */
    121 	or	a3, a3, a6
    122 	slli	a3, a3, 8
    123 	srli	a3, a3, 8
    124 
    125 .Ladd_yexpdiff:
    126 	/* Compute the exponent difference.  */
    127 	sub	a10, a7, a8
    128 
    129 	/* Exponent difference > 32 -- just return the bigger value.  */
    130 	bgeui	a10, 32, 1f
    131 
    132 	/* Shift y right by the exponent difference.  Any bits that are
    133 	   shifted out of y are saved in a9 for rounding the result.  */
    134 	ssr	a10
    135 	movi	a9, 0
    136 	src	a9, a3, a9
    137 	srl	a3, a3
    138 
    139 	/* Do the addition.  */
    140 	add	a2, a2, a3
    141 
    142 	/* Check if the add overflowed into the exponent.  */
    143 	extui	a10, a2, 23, 9
    144 	beq	a10, a7, .Ladd_round
    145 	mov	a8, a7
    146 	j	.Ladd_carry
    147 
    148 .Ladd_yexpzero:
    149 	/* y is a subnormal value.  Replace its sign/exponent with zero,
    150 	   i.e., no implicit "1.0", and increment the apparent exponent
    151 	   because subnormals behave as if they had the minimum (nonzero)
    152 	   exponent.  Test for the case when both exponents are zero.  */
    153 	slli	a3, a3, 9
    154 	srli	a3, a3, 9
    155 	bnone	a2, a6, .Ladd_bothexpzero
    156 	addi	a8, a8, 1
    157 	j	.Ladd_yexpdiff
    158 
    159 .Ladd_bothexpzero:
    160 	/* Both exponents are zero.  Handle this as a special case.  There
    161 	   is no need to shift or round, and the normal code for handling
    162 	   a carry into the exponent field will not work because it
    163 	   assumes there is an implicit "1.0" that needs to be added.  */
    164 	add	a2, a2, a3
    165 1:	leaf_return
    166 
    167 .Ladd_xexpzero:
    168 	/* Same as "yexpzero" except skip handling the case when both
    169 	   exponents are zero.  */
    170 	slli	a2, a2, 9
    171 	srli	a2, a2, 9
    172 	addi	a7, a7, 1
    173 	j	.Ladd_xexpdiff
    174 
    175 .Ladd_shiftx:
    176 	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
    177 	   because the exponent difference is always nonzero in this version,
    178 	   the shift sequence can use SLL and skip loading a constant zero.  */
    179 	bnone	a2, a6, .Ladd_xexpzero
    180 
    181 	or	a2, a2, a6
    182 	slli	a2, a2, 8
    183 	srli	a2, a2, 8
    184 
    185 .Ladd_xexpdiff:
    186 	sub	a10, a8, a7
    187 	bgeui	a10, 32, .Ladd_returny
    188 
    189 	ssr	a10
    190 	sll	a9, a2
    191 	srl	a2, a2
    192 
    193 	add	a2, a2, a3
    194 
    195 	/* Check if the add overflowed into the exponent.  */
    196 	extui	a10, a2, 23, 9
    197 	bne	a10, a8, .Ladd_carry
    198 
    199 .Ladd_round:
    200 	/* Round up if the leftover fraction is >= 1/2.  */
    201 	bgez	a9, 1f
    202 	addi	a2, a2, 1
    203 
    204 	/* Check if the leftover fraction is exactly 1/2.  */
    205 	slli	a9, a9, 1
    206 	beqz	a9, .Ladd_exactlyhalf
    207 1:	leaf_return
    208 
    209 .Ladd_returny:
    210 	mov	a2, a3
    211 	leaf_return
    212 
    213 .Ladd_carry:
    214 	/* The addition has overflowed into the exponent field, so the
    215 	   value needs to be renormalized.  The mantissa of the result
    216 	   can be recovered by subtracting the original exponent and
    217 	   adding 0x800000 (which is the explicit "1.0" for the
    218 	   mantissa of the non-shifted operand -- the "1.0" for the
    219 	   shifted operand was already added).  The mantissa can then
    220 	   be shifted right by one bit.  The explicit "1.0" of the
    221 	   shifted mantissa then needs to be replaced by the exponent,
    222 	   incremented by one to account for the normalizing shift.
    223 	   It is faster to combine these operations: do the shift first
    224 	   and combine the additions and subtractions.  If x is the
    225 	   original exponent, the result is:
    226 	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
    227 	   or:
    228 	       shifted mantissa + ((x + 1) << 22)
    229 	   Note that the exponent is incremented here by leaving the
    230 	   explicit "1.0" of the mantissa in the exponent field.  */
    231 
    232 	/* Shift x right by one bit.  Save the lsb.  */
    233 	mov	a10, a2
    234 	srli	a2, a2, 1
    235 
    236 	/* See explanation above.  The original exponent is in a8.  */
    237 	addi	a8, a8, 1
    238 	slli	a8, a8, 22
    239 	add	a2, a2, a8
    240 
    241 	/* Return an Infinity if the exponent overflowed.  */
    242 	ball	a2, a6, .Ladd_infinity
    243 
    244 	/* Same thing as the "round" code except the msb of the leftover
    245 	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
    246 	bbci.l	a10, 0, 1f
    247 	addi	a2, a2, 1
    248 	beqz	a9, .Ladd_exactlyhalf
    249 1:	leaf_return
    250 
    251 .Ladd_infinity:
    252 	/* Clear the mantissa.  */
    253 	srli	a2, a2, 23
    254 	slli	a2, a2, 23
    255 
    256 	/* The sign bit may have been lost in a carry-out.  Put it back.  */
    257 	slli	a8, a8, 1
    258 	or	a2, a2, a8
    259 	leaf_return
    260 
    261 .Ladd_exactlyhalf:
    262 	/* Round down to the nearest even value.  */
    263 	srli	a2, a2, 1
    264 	slli	a2, a2, 1
    265 	leaf_return
    266 
    267 
    268 	/* Subtraction */
    269 __subsf3_aux:
    270 
    271 	/* Handle NaNs and Infinities.  (This code is placed before the
    272 	   start of the function just to keep it in range of the limited
    273 	   branch displacements.)  */
    274 
    275 .Lsub_xnan_or_inf:
    276 	/* If y is neither Infinity nor NaN, return x.  */
    277 	bnall	a3, a6, .Lsub_return_nan_or_inf
    278 	/* Both x and y are either NaN or Inf, so the result is NaN.  */
    279 
    280 .Lsub_return_nan:
    281 	movi	a4, 0x400000	/* make it a quiet NaN */
    282 	or	a2, a2, a4
    283 	leaf_return
    284 
    285 .Lsub_ynan_or_inf:
    286 	/* Negate y and return it.  */
    287 	slli	a7, a6, 8
    288 	xor	a2, a3, a7
    289 
    290 .Lsub_return_nan_or_inf:
    291 	slli	a7, a2, 9
    292 	bnez	a7, .Lsub_return_nan
    293 	leaf_return
    294 
    295 .Lsub_opposite_signs:
    296 	/* Operand signs differ.  Do an addition.  */
    297 	slli	a7, a6, 8
    298 	xor	a3, a3, a7
    299 	j	.Ladd_same_sign
    300 
    301 	.align	4
    302 	.global	__subsf3
    303 	.type	__subsf3, @function
    304 __subsf3:
    305 	leaf_entry sp, 16
    306 	movi	a6, 0x7f800000
    307 
    308 	/* Check if the two operands have the same sign.  */
    309 	xor	a7, a2, a3
    310 	bltz	a7, .Lsub_opposite_signs
    311 
    312 .Lsub_same_sign:
    313 	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
    314 	ball	a2, a6, .Lsub_xnan_or_inf
    315 	ball	a3, a6, .Lsub_ynan_or_inf
    316 
    317 	/* Compare the operands.  In contrast to addition, the entire
    318 	   value matters here.  */
    319 	extui	a7, a2, 23, 8
    320 	extui	a8, a3, 23, 8
    321 	bltu	a2, a3, .Lsub_xsmaller
    322 
    323 .Lsub_ysmaller:
    324 	/* Check if the smaller (or equal) exponent is zero.  */
    325 	bnone	a3, a6, .Lsub_yexpzero
    326 
    327 	/* Replace y sign/exponent with 0x008.  */
    328 	or	a3, a3, a6
    329 	slli	a3, a3, 8
    330 	srli	a3, a3, 8
    331 
    332 .Lsub_yexpdiff:
    333 	/* Compute the exponent difference.  */
    334 	sub	a10, a7, a8
    335 
    336 	/* Exponent difference > 32 -- just return the bigger value.  */
    337 	bgeui	a10, 32, 1f
    338 
    339 	/* Shift y right by the exponent difference.  Any bits that are
    340 	   shifted out of y are saved in a9 for rounding the result.  */
    341 	ssr	a10
    342 	movi	a9, 0
    343 	src	a9, a3, a9
    344 	srl	a3, a3
    345 
    346 	sub	a2, a2, a3
    347 
    348 	/* Subtract the leftover bits in a9 from zero and propagate any
    349 	   borrow from a2.  */
    350 	neg	a9, a9
    351 	addi	a10, a2, -1
    352 	movnez	a2, a10, a9
    353 
    354 	/* Check if the subtract underflowed into the exponent.  */
    355 	extui	a10, a2, 23, 8
    356 	beq	a10, a7, .Lsub_round
    357 	j	.Lsub_borrow
    358 
    359 .Lsub_yexpzero:
    360 	/* Return zero if the inputs are equal.  (For the non-subnormal
    361 	   case, subtracting the "1.0" will cause a borrow from the exponent
    362 	   and this case can be detected when handling the borrow.)  */
    363 	beq	a2, a3, .Lsub_return_zero
    364 
    365 	/* y is a subnormal value.  Replace its sign/exponent with zero,
    366 	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
    367 	   y's apparent exponent because subnormals behave as if they had
    368 	   the minimum (nonzero) exponent.  */
    369 	slli	a3, a3, 9
    370 	srli	a3, a3, 9
    371 	bnone	a2, a6, .Lsub_yexpdiff
    372 	addi	a8, a8, 1
    373 	j	.Lsub_yexpdiff
    374 
    375 .Lsub_returny:
    376 	/* Negate and return y.  */
    377 	slli	a7, a6, 8
    378 	xor	a2, a3, a7
    379 1:	leaf_return
    380 
    381 .Lsub_xsmaller:
    382 	/* Same thing as the "ysmaller" code, but with x and y swapped and
    383 	   with y negated.  */
    384 	bnone	a2, a6, .Lsub_xexpzero
    385 
    386 	or	a2, a2, a6
    387 	slli	a2, a2, 8
    388 	srli	a2, a2, 8
    389 
    390 .Lsub_xexpdiff:
    391 	sub	a10, a8, a7
    392 	bgeui	a10, 32, .Lsub_returny
    393 
    394 	ssr	a10
    395 	movi	a9, 0
    396 	src	a9, a2, a9
    397 	srl	a2, a2
    398 
    399 	/* Negate y.  */
    400 	slli	a11, a6, 8
    401 	xor	a3, a3, a11
    402 
    403 	sub	a2, a3, a2
    404 
    405 	neg	a9, a9
    406 	addi	a10, a2, -1
    407 	movnez	a2, a10, a9
    408 
    409 	/* Check if the subtract underflowed into the exponent.  */
    410 	extui	a10, a2, 23, 8
    411 	bne	a10, a8, .Lsub_borrow
    412 
    413 .Lsub_round:
    414 	/* Round up if the leftover fraction is >= 1/2.  */
    415 	bgez	a9, 1f
    416 	addi	a2, a2, 1
    417 
    418 	/* Check if the leftover fraction is exactly 1/2.  */
    419 	slli	a9, a9, 1
    420 	beqz	a9, .Lsub_exactlyhalf
    421 1:	leaf_return
    422 
    423 .Lsub_xexpzero:
    424 	/* Same as "yexpzero".  */
    425 	beq	a2, a3, .Lsub_return_zero
    426 	slli	a2, a2, 9
    427 	srli	a2, a2, 9
    428 	bnone	a3, a6, .Lsub_xexpdiff
    429 	addi	a7, a7, 1
    430 	j	.Lsub_xexpdiff
    431 
    432 .Lsub_return_zero:
    433 	movi	a2, 0
    434 	leaf_return
    435 
    436 .Lsub_borrow:
    437 	/* The subtraction has underflowed into the exponent field, so the
    438 	   value needs to be renormalized.  Shift the mantissa left as
    439 	   needed to remove any leading zeros and adjust the exponent
    440 	   accordingly.  If the exponent is not large enough to remove
    441 	   all the leading zeros, the result will be a subnormal value.  */
    442 
    443 	slli	a8, a2, 9
    444 	beqz	a8, .Lsub_xzero
    445 	do_nsau	a6, a8, a7, a11
    446 	srli	a8, a8, 9
    447 	bge	a6, a10, .Lsub_subnormal
    448 	addi	a6, a6, 1
    449 
    450 .Lsub_normalize_shift:
    451 	/* Shift the mantissa (a8/a9) left by a6.  */
    452 	ssl	a6
    453 	src	a8, a8, a9
    454 	sll	a9, a9
    455 
    456 	/* Combine the shifted mantissa with the sign and exponent,
    457 	   decrementing the exponent by a6.  (The exponent has already
    458 	   been decremented by one due to the borrow from the subtraction,
    459 	   but adding the mantissa will increment the exponent by one.)  */
    460 	srli	a2, a2, 23
    461 	sub	a2, a2, a6
    462 	slli	a2, a2, 23
    463 	add	a2, a2, a8
    464 	j	.Lsub_round
    465 
    466 .Lsub_exactlyhalf:
    467 	/* Round down to the nearest even value.  */
    468 	srli	a2, a2, 1
    469 	slli	a2, a2, 1
    470 	leaf_return
    471 
    472 .Lsub_xzero:
    473 	/* If there was a borrow from the exponent, and the mantissa and
    474 	   guard digits are all zero, then the inputs were equal and the
    475 	   result should be zero.  */
    476 	beqz	a9, .Lsub_return_zero
    477 
    478 	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
    479 	addi	a11, a10, -24
    480 	movi	a6, 24
    481 	movltz	a6, a10, a11
    482 	j	.Lsub_normalize_shift
    483 
    484 .Lsub_subnormal:
    485 	/* The exponent is too small to shift away all the leading zeros.
    486 	   Set a6 to the current exponent (which has already been
    487 	   decremented by the borrow) so that the exponent of the result
    488 	   will be zero.  Do not add 1 to a6 in this case, because: (1)
    489 	   adding the mantissa will not increment the exponent, so there is
    490 	   no need to subtract anything extra from the exponent to
    491 	   compensate, and (2) the effective exponent of a subnormal is 1
    492 	   not 0 so the shift amount must be 1 smaller than normal. */
    493 	mov	a6, a10
    494 	j	.Lsub_normalize_shift
    495 
    496 #endif /* L_addsubsf3 */
    497 
    498 #ifdef L_mulsf3
    499 
    500 	/* Multiplication */
    501 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
    502 #define XCHAL_NO_MUL 1
    503 #endif
    504 
    505 	.literal_position
    506 __mulsf3_aux:
    507 
    508 	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
    509 	   (This code is placed before the start of the function just to
    510 	   keep it in range of the limited branch displacements.)  */
    511 
    512 .Lmul_xexpzero:
    513 	/* Clear the sign bit of x.  */
    514 	slli	a2, a2, 1
    515 	srli	a2, a2, 1
    516 
    517 	/* If x is zero, return zero.  */
    518 	beqz	a2, .Lmul_return_zero
    519 
    520 	/* Normalize x.  Adjust the exponent in a8.  */
    521 	do_nsau	a10, a2, a11, a12
    522 	addi	a10, a10, -8
    523 	ssl	a10
    524 	sll	a2, a2
    525 	movi	a8, 1
    526 	sub	a8, a8, a10
    527 	j	.Lmul_xnormalized
    528 
    529 .Lmul_yexpzero:
    530 	/* Clear the sign bit of y.  */
    531 	slli	a3, a3, 1
    532 	srli	a3, a3, 1
    533 
    534 	/* If y is zero, return zero.  */
    535 	beqz	a3, .Lmul_return_zero
    536 
    537 	/* Normalize y.  Adjust the exponent in a9.  */
    538 	do_nsau	a10, a3, a11, a12
    539 	addi	a10, a10, -8
    540 	ssl	a10
    541 	sll	a3, a3
    542 	movi	a9, 1
    543 	sub	a9, a9, a10
    544 	j	.Lmul_ynormalized
    545 
    546 .Lmul_return_zero:
    547 	/* Return zero with the appropriate sign bit.  */
    548 	srli	a2, a7, 31
    549 	slli	a2, a2, 31
    550 	j	.Lmul_done
    551 
    552 .Lmul_xnan_or_inf:
    553 	/* If y is zero, return NaN.  */
    554 	slli	a8, a3, 1
    555 	beqz	a8, .Lmul_return_nan
    556 	/* If y is NaN, return y.  */
    557 	bnall	a3, a6, .Lmul_returnx
    558 	slli	a8, a3, 9
    559 	beqz	a8, .Lmul_returnx
    560 
    561 .Lmul_returny:
    562 	mov	a2, a3
    563 
    564 .Lmul_returnx:
    565 	slli	a8, a2, 9
    566 	bnez	a8, .Lmul_return_nan
    567 	/* Set the sign bit and return.  */
    568 	extui	a7, a7, 31, 1
    569 	slli	a2, a2, 1
    570 	ssai	1
    571 	src	a2, a7, a2
    572 	j	.Lmul_done
    573 
    574 .Lmul_ynan_or_inf:
    575 	/* If x is zero, return NaN.  */
    576 	slli	a8, a2, 1
    577 	bnez	a8, .Lmul_returny
    578 	mov	a2, a3
    579 
    580 .Lmul_return_nan:
    581 	movi	a4, 0x400000	/* make it a quiet NaN */
    582 	or	a2, a2, a4
    583 	j	.Lmul_done
    584 
    585 	.align	4
    586 	.global	__mulsf3
    587 	.type	__mulsf3, @function
    588 __mulsf3:
    589 #if __XTENSA_CALL0_ABI__
    590 	leaf_entry sp, 32
    591 	addi	sp, sp, -32
    592 	s32i	a12, sp, 16
    593 	s32i	a13, sp, 20
    594 	s32i	a14, sp, 24
    595 	s32i	a15, sp, 28
    596 #elif XCHAL_NO_MUL
    597 	/* This is not really a leaf function; allocate enough stack space
    598 	   to allow CALL12s to a helper function.  */
    599 	leaf_entry sp, 64
    600 #else
    601 	leaf_entry sp, 32
    602 #endif
    603 	movi	a6, 0x7f800000
    604 
    605 	/* Get the sign of the result.  */
    606 	xor	a7, a2, a3
    607 
    608 	/* Check for NaN and infinity.  */
    609 	ball	a2, a6, .Lmul_xnan_or_inf
    610 	ball	a3, a6, .Lmul_ynan_or_inf
    611 
    612 	/* Extract the exponents.  */
    613 	extui	a8, a2, 23, 8
    614 	extui	a9, a3, 23, 8
    615 
    616 	beqz	a8, .Lmul_xexpzero
    617 .Lmul_xnormalized:
    618 	beqz	a9, .Lmul_yexpzero
    619 .Lmul_ynormalized:
    620 
    621 	/* Add the exponents.  */
    622 	add	a8, a8, a9
    623 
    624 	/* Replace sign/exponent fields with explicit "1.0".  */
    625 	movi	a10, 0xffffff
    626 	or	a2, a2, a6
    627 	and	a2, a2, a10
    628 	or	a3, a3, a6
    629 	and	a3, a3, a10
    630 
    631 	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
    632 
    633 #if XCHAL_HAVE_MUL32_HIGH
    634 
    635 	mull	a6, a2, a3
    636 	muluh	a2, a2, a3
    637 
    638 #else
    639 
    640 	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
    641 	   products.  These partial products are:
    642 
    643 		0 xl * yl
    644 
    645 		1 xl * yh
    646 		2 xh * yl
    647 
    648 		3 xh * yh
    649 
    650 	   If using the Mul16 or Mul32 multiplier options, these input
    651 	   chunks must be stored in separate registers.  For Mac16, the
    652 	   UMUL.AA.* opcodes can specify that the inputs come from either
    653 	   half of the registers, so there is no need to shift them out
    654 	   ahead of time.  If there is no multiply hardware, the 16-bit
    655 	   chunks can be extracted when setting up the arguments to the
    656 	   separate multiply function.  */
    657 
    658 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
    659 	/* Calling a separate multiply function will clobber a0 and requires
    660 	   use of a8 as a temporary, so save those values now.  (The function
    661 	   uses a custom ABI so nothing else needs to be saved.)  */
    662 	s32i	a0, sp, 0
    663 	s32i	a8, sp, 4
    664 #endif
    665 
    666 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
    667 
    668 #define a2h a4
    669 #define a3h a5
    670 
    671 	/* Get the high halves of the inputs into registers.  */
    672 	srli	a2h, a2, 16
    673 	srli	a3h, a3, 16
    674 
    675 #define a2l a2
    676 #define a3l a3
    677 
    678 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
    679 	/* Clear the high halves of the inputs.  This does not matter
    680 	   for MUL16 because the high bits are ignored.  */
    681 	extui	a2, a2, 0, 16
    682 	extui	a3, a3, 0, 16
    683 #endif
    684 #endif /* MUL16 || MUL32 */
    685 
    686 
    687 #if XCHAL_HAVE_MUL16
    688 
    689 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    690 	mul16u	dst, xreg ## xhalf, yreg ## yhalf
    691 
    692 #elif XCHAL_HAVE_MUL32
    693 
    694 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    695 	mull	dst, xreg ## xhalf, yreg ## yhalf
    696 
    697 #elif XCHAL_HAVE_MAC16
    698 
    699 /* The preprocessor insists on inserting a space when concatenating after
    700    a period in the definition of do_mul below.  These macros are a workaround
    701    using underscores instead of periods when doing the concatenation.  */
    702 #define umul_aa_ll umul.aa.ll
    703 #define umul_aa_lh umul.aa.lh
    704 #define umul_aa_hl umul.aa.hl
    705 #define umul_aa_hh umul.aa.hh
    706 
    707 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    708 	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
    709 	rsr	dst, ACCLO
    710 
    711 #else /* no multiply hardware */
    712 
    713 #define set_arg_l(dst, src) \
    714 	extui	dst, src, 0, 16
    715 #define set_arg_h(dst, src) \
    716 	srli	dst, src, 16
    717 
    718 #if __XTENSA_CALL0_ABI__
    719 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    720 	set_arg_ ## xhalf (a13, xreg); \
    721 	set_arg_ ## yhalf (a14, yreg); \
    722 	call0	.Lmul_mulsi3; \
    723 	mov	dst, a12
    724 #else
    725 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    726 	set_arg_ ## xhalf (a14, xreg); \
    727 	set_arg_ ## yhalf (a15, yreg); \
    728 	call12	.Lmul_mulsi3; \
    729 	mov	dst, a14
    730 #endif /* __XTENSA_CALL0_ABI__ */
    731 
    732 #endif /* no multiply hardware */
    733 
    734 	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
    735 	do_mul(a6, a2, l, a3, h)	/* pp 1 */
    736 	do_mul(a11, a2, h, a3, l)	/* pp 2 */
    737 	movi	a9, 0
    738 	add	a6, a6, a11
    739 	bgeu	a6, a11, 1f
    740 	addi	a9, a9, 1
    741 1:
    742 	/* Shift the high half of a9/a6 into position in a9.  Note that
    743 	   this value can be safely incremented without any carry-outs.  */
    744 	ssai	16
    745 	src	a9, a9, a6
    746 
    747 	/* Compute the low word into a6.  */
    748 	do_mul(a11, a2, l, a3, l)	/* pp 0 */
    749 	sll	a6, a6
    750 	add	a6, a6, a11
    751 	bgeu	a6, a11, 1f
    752 	addi	a9, a9, 1
    753 1:
    754 	/* Compute the high word into a2.  */
    755 	do_mul(a2, a2, h, a3, h)	/* pp 3 */
    756 	add	a2, a2, a9
    757 
    758 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
    759 	/* Restore values saved on the stack during the multiplication.  */
    760 	l32i	a0, sp, 0
    761 	l32i	a8, sp, 4
    762 #endif
    763 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
    764 
    765 	/* Shift left by 9 bits, unless there was a carry-out from the
    766 	   multiply, in which case, shift by 8 bits and increment the
    767 	   exponent.  */
    768 	movi	a4, 9
    769 	srli	a5, a2, 24 - 9
    770 	beqz	a5, 1f
    771 	addi	a4, a4, -1
    772 	addi	a8, a8, 1
    773 1:	ssl	a4
    774 	src	a2, a2, a6
    775 	sll	a6, a6
    776 
    777 	/* Subtract the extra bias from the exponent sum (plus one to account
    778 	   for the explicit "1.0" of the mantissa that will be added to the
    779 	   exponent in the final result).  */
    780 	movi	a4, 0x80
    781 	sub	a8, a8, a4
    782 
    783 	/* Check for over/underflow.  The value in a8 is one less than the
    784 	   final exponent, so values in the range 0..fd are OK here.  */
    785 	movi	a4, 0xfe
    786 	bgeu	a8, a4, .Lmul_overflow
    787 
    788 .Lmul_round:
    789 	/* Round.  */
    790 	bgez	a6, .Lmul_rounded
    791 	addi	a2, a2, 1
    792 	slli	a6, a6, 1
    793 	beqz	a6, .Lmul_exactlyhalf
    794 
    795 .Lmul_rounded:
    796 	/* Add the exponent to the mantissa.  */
    797 	slli	a8, a8, 23
    798 	add	a2, a2, a8
    799 
    800 .Lmul_addsign:
    801 	/* Add the sign bit.  */
    802 	srli	a7, a7, 31
    803 	slli	a7, a7, 31
    804 	or	a2, a2, a7
    805 
    806 .Lmul_done:
    807 #if __XTENSA_CALL0_ABI__
    808 	l32i	a12, sp, 16
    809 	l32i	a13, sp, 20
    810 	l32i	a14, sp, 24
    811 	l32i	a15, sp, 28
    812 	addi	sp, sp, 32
    813 #endif
    814 	leaf_return
    815 
    816 .Lmul_exactlyhalf:
    817 	/* Round down to the nearest even value.  */
    818 	srli	a2, a2, 1
    819 	slli	a2, a2, 1
    820 	j	.Lmul_rounded
    821 
    822 .Lmul_overflow:
    823 	bltz	a8, .Lmul_underflow
    824 	/* Return +/- Infinity.  */
    825 	movi	a8, 0xff
    826 	slli	a2, a8, 23
    827 	j	.Lmul_addsign
    828 
    829 .Lmul_underflow:
    830 	/* Create a subnormal value, where the exponent field contains zero,
    831 	   but the effective exponent is 1.  The value of a8 is one less than
    832 	   the actual exponent, so just negate it to get the shift amount.  */
    833 	neg	a8, a8
    834 	mov	a9, a6
    835 	ssr	a8
    836 	bgeui	a8, 32, .Lmul_flush_to_zero
    837 
    838 	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
    839 	   in a6 (combined with the shifted-out bits currently in a6) for
    840 	   rounding the result.  */
    841 	sll	a6, a2
    842 	srl	a2, a2
    843 
    844 	/* Set the exponent to zero.  */
    845 	movi	a8, 0
    846 
    847 	/* Pack any nonzero bits shifted out into a6.  */
    848 	beqz	a9, .Lmul_round
    849 	movi	a9, 1
    850 	or	a6, a6, a9
    851 	j	.Lmul_round
    852 
    853 .Lmul_flush_to_zero:
    854 	/* Return zero with the appropriate sign bit.  */
    855 	srli	a2, a7, 31
    856 	slli	a2, a2, 31
    857 	j	.Lmul_done
    858 
    859 #if XCHAL_NO_MUL
    860 
    861 	/* For Xtensa processors with no multiply hardware, this simplified
    862 	   version of _mulsi3 is used for multiplying 16-bit chunks of
    863 	   the floating-point mantissas.  When using CALL0, this function
    864 	   uses a custom ABI: the inputs are passed in a13 and a14, the
    865 	   result is returned in a12, and a8 and a15 are clobbered.  */
    866 	.align	4
    867 .Lmul_mulsi3:
    868 	leaf_entry sp, 16
    869 	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
    870 	movi	\dst, 0
    871 1:	add	\tmp1, \src2, \dst
    872 	extui	\tmp2, \src1, 0, 1
    873 	movnez	\dst, \tmp1, \tmp2
    874 
    875 	do_addx2 \tmp1, \src2, \dst, \tmp1
    876 	extui	\tmp2, \src1, 1, 1
    877 	movnez	\dst, \tmp1, \tmp2
    878 
    879 	do_addx4 \tmp1, \src2, \dst, \tmp1
    880 	extui	\tmp2, \src1, 2, 1
    881 	movnez	\dst, \tmp1, \tmp2
    882 
    883 	do_addx8 \tmp1, \src2, \dst, \tmp1
    884 	extui	\tmp2, \src1, 3, 1
    885 	movnez	\dst, \tmp1, \tmp2
    886 
    887 	srli	\src1, \src1, 4
    888 	slli	\src2, \src2, 4
    889 	bnez	\src1, 1b
    890 	.endm
    891 #if __XTENSA_CALL0_ABI__
    892 	mul_mulsi3_body a12, a13, a14, a15, a8
    893 #else
    894 	/* The result will be written into a2, so save that argument in a4.  */
    895 	mov	a4, a2
    896 	mul_mulsi3_body a2, a4, a3, a5, a6
    897 #endif
    898 	leaf_return
    899 #endif /* XCHAL_NO_MUL */
    900 #endif /* L_mulsf3 */
    901 
    902 #ifdef L_divsf3
    903 
    904 	/* Division */
    905 
    906 #if XCHAL_HAVE_FP_DIV
    907 
    908 	.align	4
    909 	.global	__divsf3
    910 	.type	__divsf3, @function
    911 __divsf3:
    912 	leaf_entry	sp, 16
    913 
    914 	wfr		f1, a2	/* dividend */
    915 	wfr		f2, a3	/* divisor */
    916 
    917 	div0.s		f3, f2
    918 	nexp01.s	f4, f2
    919 	const.s		f5, 1
    920 	maddn.s		f5, f4, f3
    921 	mov.s		f6, f3
    922 	mov.s		f7, f2
    923 	nexp01.s	f2, f1
    924 	maddn.s		f6, f5, f6
    925 	const.s		f5, 1
    926 	const.s		f0, 0
    927 	neg.s		f8, f2
    928 	maddn.s		f5, f4, f6
    929 	maddn.s		f0, f8, f3
    930 	mkdadj.s	f7, f1
    931 	maddn.s		f6, f5, f6
    932 	maddn.s		f8, f4, f0
    933 	const.s		f3, 1
    934 	maddn.s		f3, f4, f6
    935 	maddn.s		f0, f8, f6
    936 	neg.s		f2, f2
    937 	maddn.s		f6, f3, f6
    938 	maddn.s		f2, f4, f0
    939 	addexpm.s	f0, f7
    940 	addexp.s	f6, f7
    941 	divn.s		f0, f2, f6
    942 
    943 	rfr		a2, f0
    944 
    945 	leaf_return
    946 
    947 #else
    948 
    949 	.literal_position
    950 __divsf3_aux:
    951 
    952 	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
    953 	   (This code is placed before the start of the function just to
    954 	   keep it in range of the limited branch displacements.)  */
    955 
    956 .Ldiv_yexpzero:
    957 	/* Clear the sign bit of y.  */
    958 	slli	a3, a3, 1
    959 	srli	a3, a3, 1
    960 
    961 	/* Check for division by zero.  */
    962 	beqz	a3, .Ldiv_yzero
    963 
    964 	/* Normalize y.  Adjust the exponent in a9.  */
    965 	do_nsau	a10, a3, a4, a5
    966 	addi	a10, a10, -8
    967 	ssl	a10
    968 	sll	a3, a3
    969 	movi	a9, 1
    970 	sub	a9, a9, a10
    971 	j	.Ldiv_ynormalized
    972 
    973 .Ldiv_yzero:
    974 	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
    975 	slli	a4, a2, 1
    976 	srli	a4, a4, 1
    977 	srli	a2, a7, 31
    978 	slli	a2, a2, 31
    979 	or	a2, a2, a6
    980 	bnez	a4, 1f
    981 	movi	a4, 0x400000	/* make it a quiet NaN */
    982 	or	a2, a2, a4
    983 1:	leaf_return
    984 
    985 .Ldiv_xexpzero:
    986 	/* Clear the sign bit of x.  */
    987 	slli	a2, a2, 1
    988 	srli	a2, a2, 1
    989 
    990 	/* If x is zero, return zero.  */
    991 	beqz	a2, .Ldiv_return_zero
    992 
    993 	/* Normalize x.  Adjust the exponent in a8.  */
    994 	do_nsau	a10, a2, a4, a5
    995 	addi	a10, a10, -8
    996 	ssl	a10
    997 	sll	a2, a2
    998 	movi	a8, 1
    999 	sub	a8, a8, a10
   1000 	j	.Ldiv_xnormalized
   1001 
   1002 .Ldiv_return_zero:
   1003 	/* Return zero with the appropriate sign bit.  */
   1004 	srli	a2, a7, 31
   1005 	slli	a2, a2, 31
   1006 	leaf_return
   1007 
   1008 .Ldiv_xnan_or_inf:
   1009 	/* Set the sign bit of the result.  */
   1010 	srli	a7, a3, 31
   1011 	slli	a7, a7, 31
   1012 	xor	a2, a2, a7
   1013 	/* If y is NaN or Inf, return NaN.  */
   1014 	ball	a3, a6, .Ldiv_return_nan
   1015 	slli	a7, a2, 9
   1016 	bnez	a7, .Ldiv_return_nan
   1017 	leaf_return
   1018 
   1019 .Ldiv_ynan_or_inf:
   1020 	/* If y is Infinity, return zero.  */
   1021 	slli	a8, a3, 9
   1022 	beqz	a8, .Ldiv_return_zero
   1023 	/* y is NaN; return it.  */
   1024 	mov	a2, a3
   1025 
   1026 .Ldiv_return_nan:
   1027 	movi	a4, 0x400000	/* make it a quiet NaN */
   1028 	or	a2, a2, a4
   1029 	leaf_return
   1030 
   1031 	.align	4
   1032 	.global	__divsf3
   1033 	.type	__divsf3, @function
   1034 __divsf3:
   1035 	leaf_entry sp, 16
   1036 	movi	a6, 0x7f800000
   1037 
   1038 	/* Get the sign of the result.  */
   1039 	xor	a7, a2, a3
   1040 
   1041 	/* Check for NaN and infinity.  */
   1042 	ball	a2, a6, .Ldiv_xnan_or_inf
   1043 	ball	a3, a6, .Ldiv_ynan_or_inf
   1044 
   1045 	/* Extract the exponents.  */
   1046 	extui	a8, a2, 23, 8
   1047 	extui	a9, a3, 23, 8
   1048 
   1049 	beqz	a9, .Ldiv_yexpzero
   1050 .Ldiv_ynormalized:
   1051 	beqz	a8, .Ldiv_xexpzero
   1052 .Ldiv_xnormalized:
   1053 
   1054 	/* Subtract the exponents.  */
   1055 	sub	a8, a8, a9
   1056 
   1057 	/* Replace sign/exponent fields with explicit "1.0".  */
   1058 	movi	a10, 0xffffff
   1059 	or	a2, a2, a6
   1060 	and	a2, a2, a10
   1061 	or	a3, a3, a6
   1062 	and	a3, a3, a10
   1063 
   1064 	/* The first digit of the mantissa division must be a one.
   1065 	   Shift x (and adjust the exponent) as needed to make this true.  */
   1066 	bltu	a3, a2, 1f
   1067 	slli	a2, a2, 1
   1068 	addi	a8, a8, -1
   1069 1:
   1070 	/* Do the first subtraction and shift.  */
   1071 	sub	a2, a2, a3
   1072 	slli	a2, a2, 1
   1073 
   1074 	/* Put the quotient into a10.  */
   1075 	movi	a10, 1
   1076 
   1077 	/* Divide one bit at a time for 23 bits.  */
   1078 	movi	a9, 23
   1079 #if XCHAL_HAVE_LOOPS
   1080 	loop	a9, .Ldiv_loopend
   1081 #endif
   1082 .Ldiv_loop:
   1083 	/* Shift the quotient << 1.  */
   1084 	slli	a10, a10, 1
   1085 
   1086 	/* Is this digit a 0 or 1?  */
   1087 	bltu	a2, a3, 1f
   1088 
   1089 	/* Output a 1 and subtract.  */
   1090 	addi	a10, a10, 1
   1091 	sub	a2, a2, a3
   1092 
   1093 	/* Shift the dividend << 1.  */
   1094 1:	slli	a2, a2, 1
   1095 
   1096 #if !XCHAL_HAVE_LOOPS
   1097 	addi	a9, a9, -1
   1098 	bnez	a9, .Ldiv_loop
   1099 #endif
   1100 .Ldiv_loopend:
   1101 
   1102 	/* Add the exponent bias (less one to account for the explicit "1.0"
   1103 	   of the mantissa that will be added to the exponent in the final
   1104 	   result).  */
   1105 	addi	a8, a8, 0x7e
   1106 
   1107 	/* Check for over/underflow.  The value in a8 is one less than the
   1108 	   final exponent, so values in the range 0..fd are OK here.  */
   1109 	movi	a4, 0xfe
   1110 	bgeu	a8, a4, .Ldiv_overflow
   1111 
   1112 .Ldiv_round:
   1113 	/* Round.  The remainder (<< 1) is in a2.  */
   1114 	bltu	a2, a3, .Ldiv_rounded
   1115 	addi	a10, a10, 1
   1116 	beq	a2, a3, .Ldiv_exactlyhalf
   1117 
   1118 .Ldiv_rounded:
   1119 	/* Add the exponent to the mantissa.  */
   1120 	slli	a8, a8, 23
   1121 	add	a2, a10, a8
   1122 
   1123 .Ldiv_addsign:
   1124 	/* Add the sign bit.  */
   1125 	srli	a7, a7, 31
   1126 	slli	a7, a7, 31
   1127 	or	a2, a2, a7
   1128 	leaf_return
   1129 
   1130 .Ldiv_overflow:
   1131 	bltz	a8, .Ldiv_underflow
   1132 	/* Return +/- Infinity.  */
   1133 	addi	a8, a4, 1	/* 0xff */
   1134 	slli	a2, a8, 23
   1135 	j	.Ldiv_addsign
   1136 
   1137 .Ldiv_exactlyhalf:
   1138 	/* Remainder is exactly half the divisor.  Round even.  */
   1139 	srli	a10, a10, 1
   1140 	slli	a10, a10, 1
   1141 	j	.Ldiv_rounded
   1142 
   1143 .Ldiv_underflow:
   1144 	/* Create a subnormal value, where the exponent field contains zero,
   1145 	   but the effective exponent is 1.  The value of a8 is one less than
   1146 	   the actual exponent, so just negate it to get the shift amount.  */
   1147 	neg	a8, a8
   1148 	ssr	a8
   1149 	bgeui	a8, 32, .Ldiv_flush_to_zero
   1150 
   1151 	/* Shift a10 right.  Any bits that are shifted out of a10 are
   1152 	   saved in a6 for rounding the result.  */
   1153 	sll	a6, a10
   1154 	srl	a10, a10
   1155 
   1156 	/* Set the exponent to zero.  */
   1157 	movi	a8, 0
   1158 
   1159 	/* Pack any nonzero remainder (in a2) into a6.  */
   1160 	beqz	a2, 1f
   1161 	movi	a9, 1
   1162 	or	a6, a6, a9
   1163 
   1164 	/* Round a10 based on the bits shifted out into a6.  */
   1165 1:	bgez	a6, .Ldiv_rounded
   1166 	addi	a10, a10, 1
   1167 	slli	a6, a6, 1
   1168 	bnez	a6, .Ldiv_rounded
   1169 	srli	a10, a10, 1
   1170 	slli	a10, a10, 1
   1171 	j	.Ldiv_rounded
   1172 
   1173 .Ldiv_flush_to_zero:
   1174 	/* Return zero with the appropriate sign bit.  */
   1175 	srli	a2, a7, 31
   1176 	slli	a2, a2, 31
   1177 	leaf_return
   1178 
   1179 #endif /* XCHAL_HAVE_FP_DIV */
   1180 
   1181 #endif /* L_divsf3 */
   1182 
   1183 #ifdef L_cmpsf2
   1184 
   1185 	/* Equal and Not Equal */
   1186 
   1187 	.align	4
   1188 	.global	__eqsf2
   1189 	.global	__nesf2
   1190 	.set	__nesf2, __eqsf2
   1191 	.type	__eqsf2, @function
   1192 __eqsf2:
   1193 	leaf_entry sp, 16
   1194 	bne	a2, a3, 4f
   1195 
   1196 	/* The values are equal but NaN != NaN.  Check the exponent.  */
   1197 	movi	a6, 0x7f800000
   1198 	ball	a2, a6, 3f
   1199 
   1200 	/* Equal.  */
   1201 	movi	a2, 0
   1202 	leaf_return
   1203 
   1204 	/* Not equal.  */
   1205 2:	movi	a2, 1
   1206 	leaf_return
   1207 
   1208 	/* Check if the mantissas are nonzero.  */
   1209 3:	slli	a7, a2, 9
   1210 	j	5f
   1211 
   1212 	/* Check if x and y are zero with different signs.  */
   1213 4:	or	a7, a2, a3
   1214 	slli	a7, a7, 1
   1215 
   1216 	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
   1217 	   or x when exponent(x) = 0x7f8 and x == y.  */
   1218 5:	movi	a2, 0
   1219 	movi	a3, 1
   1220 	movnez	a2, a3, a7
   1221 	leaf_return
   1222 
   1223 
   1224 	/* Greater Than */
   1225 
   1226 	.align	4
   1227 	.global	__gtsf2
   1228 	.type	__gtsf2, @function
   1229 __gtsf2:
   1230 	leaf_entry sp, 16
   1231 	movi	a6, 0x7f800000
   1232 	ball	a2, a6, 2f
   1233 1:	bnall	a3, a6, .Lle_cmp
   1234 
   1235 	/* Check if y is a NaN.  */
   1236 	slli	a7, a3, 9
   1237 	beqz	a7, .Lle_cmp
   1238 	movi	a2, 0
   1239 	leaf_return
   1240 
   1241 	/* Check if x is a NaN.  */
   1242 2:	slli	a7, a2, 9
   1243 	beqz	a7, 1b
   1244 	movi	a2, 0
   1245 	leaf_return
   1246 
   1247 
   1248 	/* Less Than or Equal */
   1249 
   1250 	.align	4
   1251 	.global	__lesf2
   1252 	.type	__lesf2, @function
   1253 __lesf2:
   1254 	leaf_entry sp, 16
   1255 	movi	a6, 0x7f800000
   1256 	ball	a2, a6, 2f
   1257 1:	bnall	a3, a6, .Lle_cmp
   1258 
   1259 	/* Check if y is a NaN.  */
   1260 	slli	a7, a3, 9
   1261 	beqz	a7, .Lle_cmp
   1262 	movi	a2, 1
   1263 	leaf_return
   1264 
   1265 	/* Check if x is a NaN.  */
   1266 2:	slli	a7, a2, 9
   1267 	beqz	a7, 1b
   1268 	movi	a2, 1
   1269 	leaf_return
   1270 
   1271 .Lle_cmp:
   1272 	/* Check if x and y have different signs.  */
   1273 	xor	a7, a2, a3
   1274 	bltz	a7, .Lle_diff_signs
   1275 
   1276 	/* Check if x is negative.  */
   1277 	bltz	a2, .Lle_xneg
   1278 
   1279 	/* Check if x <= y.  */
   1280 	bltu	a3, a2, 5f
   1281 4:	movi	a2, 0
   1282 	leaf_return
   1283 
   1284 .Lle_xneg:
   1285 	/* Check if y <= x.  */
   1286 	bgeu	a2, a3, 4b
   1287 5:	movi	a2, 1
   1288 	leaf_return
   1289 
   1290 .Lle_diff_signs:
   1291 	bltz	a2, 4b
   1292 
   1293 	/* Check if both x and y are zero.  */
   1294 	or	a7, a2, a3
   1295 	slli	a7, a7, 1
   1296 	movi	a2, 1
   1297 	movi	a3, 0
   1298 	moveqz	a2, a3, a7
   1299 	leaf_return
   1300 
   1301 
   1302 	/* Greater Than or Equal */
   1303 
   1304 	.align	4
   1305 	.global	__gesf2
   1306 	.type	__gesf2, @function
   1307 __gesf2:
   1308 	leaf_entry sp, 16
   1309 	movi	a6, 0x7f800000
   1310 	ball	a2, a6, 2f
   1311 1:	bnall	a3, a6, .Llt_cmp
   1312 
   1313 	/* Check if y is a NaN.  */
   1314 	slli	a7, a3, 9
   1315 	beqz	a7, .Llt_cmp
   1316 	movi	a2, -1
   1317 	leaf_return
   1318 
   1319 	/* Check if x is a NaN.  */
   1320 2:	slli	a7, a2, 9
   1321 	beqz	a7, 1b
   1322 	movi	a2, -1
   1323 	leaf_return
   1324 
   1325 
   1326 	/* Less Than */
   1327 
   1328 	.align	4
   1329 	.global	__ltsf2
   1330 	.type	__ltsf2, @function
   1331 __ltsf2:
   1332 	leaf_entry sp, 16
   1333 	movi	a6, 0x7f800000
   1334 	ball	a2, a6, 2f
   1335 1:	bnall	a3, a6, .Llt_cmp
   1336 
   1337 	/* Check if y is a NaN.  */
   1338 	slli	a7, a3, 9
   1339 	beqz	a7, .Llt_cmp
   1340 	movi	a2, 0
   1341 	leaf_return
   1342 
   1343 	/* Check if x is a NaN.  */
   1344 2:	slli	a7, a2, 9
   1345 	beqz	a7, 1b
   1346 	movi	a2, 0
   1347 	leaf_return
   1348 
   1349 .Llt_cmp:
   1350 	/* Check if x and y have different signs.  */
   1351 	xor	a7, a2, a3
   1352 	bltz	a7, .Llt_diff_signs
   1353 
   1354 	/* Check if x is negative.  */
   1355 	bltz	a2, .Llt_xneg
   1356 
   1357 	/* Check if x < y.  */
   1358 	bgeu	a2, a3, 5f
   1359 4:	movi	a2, -1
   1360 	leaf_return
   1361 
   1362 .Llt_xneg:
   1363 	/* Check if y < x.  */
   1364 	bltu	a3, a2, 4b
   1365 5:	movi	a2, 0
   1366 	leaf_return
   1367 
   1368 .Llt_diff_signs:
   1369 	bgez	a2, 5b
   1370 
   1371 	/* Check if both x and y are nonzero.  */
   1372 	or	a7, a2, a3
   1373 	slli	a7, a7, 1
   1374 	movi	a2, 0
   1375 	movi	a3, -1
   1376 	movnez	a2, a3, a7
   1377 	leaf_return
   1378 
   1379 
   1380 	/* Unordered */
   1381 
   1382 	.align	4
   1383 	.global	__unordsf2
   1384 	.type	__unordsf2, @function
   1385 __unordsf2:
   1386 	leaf_entry sp, 16
   1387 	movi	a6, 0x7f800000
   1388 	ball	a2, a6, 3f
   1389 1:	ball	a3, a6, 4f
   1390 2:	movi	a2, 0
   1391 	leaf_return
   1392 
   1393 3:	slli	a7, a2, 9
   1394 	beqz	a7, 1b
   1395 	movi	a2, 1
   1396 	leaf_return
   1397 
   1398 4:	slli	a7, a3, 9
   1399 	beqz	a7, 2b
   1400 	movi	a2, 1
   1401 	leaf_return
   1402 
   1403 #endif /* L_cmpsf2 */
   1404 
   1405 #ifdef L_fixsfsi
   1406 
   1407 	.align	4
   1408 	.global	__fixsfsi
   1409 	.type	__fixsfsi, @function
   1410 __fixsfsi:
   1411 	leaf_entry sp, 16
   1412 
   1413 	/* Check for NaN and Infinity.  */
   1414 	movi	a6, 0x7f800000
   1415 	ball	a2, a6, .Lfixsfsi_nan_or_inf
   1416 
   1417 	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
   1418 	extui	a4, a2, 23, 8
   1419 	addi	a4, a4, -0x7e
   1420 	bgei	a4, 32, .Lfixsfsi_maxint
   1421 	blti	a4, 1, .Lfixsfsi_zero
   1422 
   1423 	/* Add explicit "1.0" and shift << 8.  */
   1424 	or	a7, a2, a6
   1425 	slli	a5, a7, 8
   1426 
   1427 	/* Shift back to the right, based on the exponent.  */
   1428 	ssl	a4		/* shift by 32 - a4 */
   1429 	srl	a5, a5
   1430 
   1431 	/* Negate the result if sign != 0.  */
   1432 	neg	a2, a5
   1433 	movgez	a2, a5, a7
   1434 	leaf_return
   1435 
   1436 .Lfixsfsi_nan_or_inf:
   1437 	/* Handle Infinity and NaN.  */
   1438 	slli	a4, a2, 9
   1439 	beqz	a4, .Lfixsfsi_maxint
   1440 
   1441 	/* Translate NaN to +maxint.  */
   1442 	movi	a2, 0
   1443 
   1444 .Lfixsfsi_maxint:
   1445 	slli	a4, a6, 8	/* 0x80000000 */
   1446 	addi	a5, a4, -1	/* 0x7fffffff */
   1447 	movgez	a4, a5, a2
   1448 	mov	a2, a4
   1449 	leaf_return
   1450 
   1451 .Lfixsfsi_zero:
   1452 	movi	a2, 0
   1453 	leaf_return
   1454 
   1455 #endif /* L_fixsfsi */
   1456 
   1457 #ifdef L_fixsfdi
   1458 
   1459 	.align	4
   1460 	.global	__fixsfdi
   1461 	.type	__fixsfdi, @function
   1462 __fixsfdi:
   1463 	leaf_entry sp, 16
   1464 
   1465 	/* Check for NaN and Infinity.  */
   1466 	movi	a6, 0x7f800000
   1467 	ball	a2, a6, .Lfixsfdi_nan_or_inf
   1468 
   1469 	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
   1470 	extui	a4, a2, 23, 8
   1471 	addi	a4, a4, -0x7e
   1472 	bgei	a4, 64, .Lfixsfdi_maxint
   1473 	blti	a4, 1, .Lfixsfdi_zero
   1474 
   1475 	/* Add explicit "1.0" and shift << 8.  */
   1476 	or	a7, a2, a6
   1477 	slli	xh, a7, 8
   1478 
   1479 	/* Shift back to the right, based on the exponent.  */
   1480 	ssl	a4		/* shift by 64 - a4 */
   1481 	bgei	a4, 32, .Lfixsfdi_smallshift
   1482 	srl	xl, xh
   1483 	movi	xh, 0
   1484 
   1485 .Lfixsfdi_shifted:
   1486 	/* Negate the result if sign != 0.  */
   1487 	bgez	a7, 1f
   1488 	neg	xl, xl
   1489 	neg	xh, xh
   1490 	beqz	xl, 1f
   1491 	addi	xh, xh, -1
   1492 1:	leaf_return
   1493 
   1494 .Lfixsfdi_smallshift:
   1495 	movi	xl, 0
   1496 	sll	xl, xh
   1497 	srl	xh, xh
   1498 	j	.Lfixsfdi_shifted
   1499 
   1500 .Lfixsfdi_nan_or_inf:
   1501 	/* Handle Infinity and NaN.  */
   1502 	slli	a4, a2, 9
   1503 	beqz	a4, .Lfixsfdi_maxint
   1504 
   1505 	/* Translate NaN to +maxint.  */
   1506 	movi	a2, 0
   1507 
   1508 .Lfixsfdi_maxint:
   1509 	slli	a7, a6, 8	/* 0x80000000 */
   1510 	bgez	a2, 1f
   1511 	mov	xh, a7
   1512 	movi	xl, 0
   1513 	leaf_return
   1514 
   1515 1:	addi	xh, a7, -1	/* 0x7fffffff */
   1516 	movi	xl, -1
   1517 	leaf_return
   1518 
   1519 .Lfixsfdi_zero:
   1520 	movi	xh, 0
   1521 	movi	xl, 0
   1522 	leaf_return
   1523 
   1524 #endif /* L_fixsfdi */
   1525 
   1526 #ifdef L_fixunssfsi
   1527 
   1528 	.align	4
   1529 	.global	__fixunssfsi
   1530 	.type	__fixunssfsi, @function
   1531 __fixunssfsi:
   1532 	leaf_entry sp, 16
   1533 
   1534 	/* Check for NaN and Infinity.  */
   1535 	movi	a6, 0x7f800000
   1536 	ball	a2, a6, .Lfixunssfsi_nan_or_inf
   1537 
   1538 	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
   1539 	extui	a4, a2, 23, 8
   1540 	addi	a4, a4, -0x7f
   1541 	bgei	a4, 32, .Lfixunssfsi_maxint
   1542 	bltz	a4, .Lfixunssfsi_zero
   1543 
   1544 	/* Add explicit "1.0" and shift << 8.  */
   1545 	or	a7, a2, a6
   1546 	slli	a5, a7, 8
   1547 
   1548 	/* Shift back to the right, based on the exponent.  */
   1549 	addi	a4, a4, 1
   1550 	beqi	a4, 32, .Lfixunssfsi_bigexp
   1551 	ssl	a4		/* shift by 32 - a4 */
   1552 	srl	a5, a5
   1553 
   1554 	/* Negate the result if sign != 0.  */
   1555 	neg	a2, a5
   1556 	movgez	a2, a5, a7
   1557 	leaf_return
   1558 
   1559 .Lfixunssfsi_nan_or_inf:
   1560 	/* Handle Infinity and NaN.  */
   1561 	slli	a4, a2, 9
   1562 	beqz	a4, .Lfixunssfsi_maxint
   1563 
   1564 	/* Translate NaN to 0xffffffff.  */
   1565 	movi	a2, -1
   1566 	leaf_return
   1567 
   1568 .Lfixunssfsi_maxint:
   1569 	slli	a4, a6, 8	/* 0x80000000 */
   1570 	movi	a5, -1		/* 0xffffffff */
   1571 	movgez	a4, a5, a2
   1572 	mov	a2, a4
   1573 	leaf_return
   1574 
   1575 .Lfixunssfsi_zero:
   1576 	movi	a2, 0
   1577 	leaf_return
   1578 
   1579 .Lfixunssfsi_bigexp:
   1580 	/* Handle unsigned maximum exponent case.  */
   1581 	bltz	a2, 1f
   1582 	mov	a2, a5		/* no shift needed */
   1583 	leaf_return
   1584 
   1585 	/* Return 0x80000000 if negative.  */
   1586 1:	slli	a2, a6, 8
   1587 	leaf_return
   1588 
   1589 #endif /* L_fixunssfsi */
   1590 
   1591 #ifdef L_fixunssfdi
   1592 
   1593 	.align	4
   1594 	.global	__fixunssfdi
   1595 	.type	__fixunssfdi, @function
   1596 __fixunssfdi:
   1597 	leaf_entry sp, 16
   1598 
   1599 	/* Check for NaN and Infinity.  */
   1600 	movi	a6, 0x7f800000
   1601 	ball	a2, a6, .Lfixunssfdi_nan_or_inf
   1602 
   1603 	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
   1604 	extui	a4, a2, 23, 8
   1605 	addi	a4, a4, -0x7f
   1606 	bgei	a4, 64, .Lfixunssfdi_maxint
   1607 	bltz	a4, .Lfixunssfdi_zero
   1608 
   1609 	/* Add explicit "1.0" and shift << 8.  */
   1610 	or	a7, a2, a6
   1611 	slli	xh, a7, 8
   1612 
   1613 	/* Shift back to the right, based on the exponent.  */
   1614 	addi	a4, a4, 1
   1615 	beqi	a4, 64, .Lfixunssfdi_bigexp
   1616 	ssl	a4		/* shift by 64 - a4 */
   1617 	bgei	a4, 32, .Lfixunssfdi_smallshift
   1618 	srl	xl, xh
   1619 	movi	xh, 0
   1620 
   1621 .Lfixunssfdi_shifted:
   1622 	/* Negate the result if sign != 0.  */
   1623 	bgez	a7, 1f
   1624 	neg	xl, xl
   1625 	neg	xh, xh
   1626 	beqz	xl, 1f
   1627 	addi	xh, xh, -1
   1628 1:	leaf_return
   1629 
   1630 .Lfixunssfdi_smallshift:
   1631 	movi	xl, 0
   1632 	src	xl, xh, xl
   1633 	srl	xh, xh
   1634 	j	.Lfixunssfdi_shifted
   1635 
   1636 .Lfixunssfdi_nan_or_inf:
   1637 	/* Handle Infinity and NaN.  */
   1638 	slli	a4, a2, 9
   1639 	beqz	a4, .Lfixunssfdi_maxint
   1640 
   1641 	/* Translate NaN to 0xffffffff.... */
   1642 1:	movi	xh, -1
   1643 	movi	xl, -1
   1644 	leaf_return
   1645 
   1646 .Lfixunssfdi_maxint:
   1647 	bgez	a2, 1b
   1648 2:	slli	xh, a6, 8	/* 0x80000000 */
   1649 	movi	xl, 0
   1650 	leaf_return
   1651 
   1652 .Lfixunssfdi_zero:
   1653 	movi	xh, 0
   1654 	movi	xl, 0
   1655 	leaf_return
   1656 
   1657 .Lfixunssfdi_bigexp:
   1658 	/* Handle unsigned maximum exponent case.  */
   1659 	bltz	a7, 2b
   1660 	movi	xl, 0
   1661 	leaf_return		/* no shift needed */
   1662 
   1663 #endif /* L_fixunssfdi */
   1664 
   1665 #ifdef L_floatsisf
   1666 
   1667 	.align	4
   1668 	.global	__floatunsisf
   1669 	.type	__floatunsisf, @function
   1670 __floatunsisf:
   1671 	leaf_entry sp, 16
   1672 	beqz	a2, .Lfloatsisf_return
   1673 
   1674 	/* Set the sign to zero and jump to the floatsisf code.  */
   1675 	movi	a7, 0
   1676 	j	.Lfloatsisf_normalize
   1677 
   1678 	.align	4
   1679 	.global	__floatsisf
   1680 	.type	__floatsisf, @function
   1681 __floatsisf:
   1682 	leaf_entry sp, 16
   1683 
   1684 	/* Check for zero.  */
   1685 	beqz	a2, .Lfloatsisf_return
   1686 
   1687 	/* Save the sign.  */
   1688 	extui	a7, a2, 31, 1
   1689 
   1690 	/* Get the absolute value.  */
   1691 #if XCHAL_HAVE_ABS
   1692 	abs	a2, a2
   1693 #else
   1694 	neg	a4, a2
   1695 	movltz	a2, a4, a2
   1696 #endif
   1697 
   1698 .Lfloatsisf_normalize:
   1699 	/* Normalize with the first 1 bit in the msb.  */
   1700 	do_nsau	a4, a2, a5, a6
   1701 	ssl	a4
   1702 	sll	a5, a2
   1703 
   1704 	/* Shift the mantissa into position, with rounding bits in a6.  */
   1705 	srli	a2, a5, 8
   1706 	slli	a6, a5, (32 - 8)
   1707 
   1708 	/* Set the exponent.  */
   1709 	movi	a5, 0x9d	/* 0x7e + 31 */
   1710 	sub	a5, a5, a4
   1711 	slli	a5, a5, 23
   1712 	add	a2, a2, a5
   1713 
   1714 	/* Add the sign.  */
   1715 	slli	a7, a7, 31
   1716 	or	a2, a2, a7
   1717 
   1718 	/* Round up if the leftover fraction is >= 1/2.  */
   1719 	bgez	a6, .Lfloatsisf_return
   1720 	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
   1721 
   1722 	/* Check if the leftover fraction is exactly 1/2.  */
   1723 	slli	a6, a6, 1
   1724 	beqz	a6, .Lfloatsisf_exactlyhalf
   1725 
   1726 .Lfloatsisf_return:
   1727 	leaf_return
   1728 
   1729 .Lfloatsisf_exactlyhalf:
   1730 	/* Round down to the nearest even value.  */
   1731 	srli	a2, a2, 1
   1732 	slli	a2, a2, 1
   1733 	leaf_return
   1734 
   1735 #endif /* L_floatsisf */
   1736 
   1737 #ifdef L_floatdisf
   1738 
   1739 	.align	4
   1740 	.global	__floatundisf
   1741 	.type	__floatundisf, @function
   1742 __floatundisf:
   1743 	leaf_entry sp, 16
   1744 
   1745 	/* Check for zero.  */
   1746 	or	a4, xh, xl
   1747 	beqz	a4, 2f
   1748 
   1749 	/* Set the sign to zero and jump to the floatdisf code.  */
   1750 	movi	a7, 0
   1751 	j	.Lfloatdisf_normalize
   1752 
   1753 	.align	4
   1754 	.global	__floatdisf
   1755 	.type	__floatdisf, @function
   1756 __floatdisf:
   1757 	leaf_entry sp, 16
   1758 
   1759 	/* Check for zero.  */
   1760 	or	a4, xh, xl
   1761 	beqz	a4, 2f
   1762 
   1763 	/* Save the sign.  */
   1764 	extui	a7, xh, 31, 1
   1765 
   1766 	/* Get the absolute value.  */
   1767 	bgez	xh, .Lfloatdisf_normalize
   1768 	neg	xl, xl
   1769 	neg	xh, xh
   1770 	beqz	xl, .Lfloatdisf_normalize
   1771 	addi	xh, xh, -1
   1772 
   1773 .Lfloatdisf_normalize:
   1774 	/* Normalize with the first 1 bit in the msb of xh.  */
   1775 	beqz	xh, .Lfloatdisf_bigshift
   1776 	do_nsau	a4, xh, a5, a6
   1777 	ssl	a4
   1778 	src	xh, xh, xl
   1779 	sll	xl, xl
   1780 
   1781 .Lfloatdisf_shifted:
   1782 	/* Shift the mantissa into position, with rounding bits in a6.  */
   1783 	ssai	8
   1784 	sll	a5, xl
   1785 	src	a6, xh, xl
   1786 	srl	xh, xh
   1787 	beqz	a5, 1f
   1788 	movi	a5, 1
   1789 	or	a6, a6, a5
   1790 1:
   1791 	/* Set the exponent.  */
   1792 	movi	a5, 0xbd	/* 0x7e + 63 */
   1793 	sub	a5, a5, a4
   1794 	slli	a5, a5, 23
   1795 	add	a2, xh, a5
   1796 
   1797 	/* Add the sign.  */
   1798 	slli	a7, a7, 31
   1799 	or	a2, a2, a7
   1800 
   1801 	/* Round up if the leftover fraction is >= 1/2.  */
   1802 	bgez	a6, 2f
   1803 	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
   1804 
   1805 	/* Check if the leftover fraction is exactly 1/2.  */
   1806 	slli	a6, a6, 1
   1807 	beqz	a6, .Lfloatdisf_exactlyhalf
   1808 2:	leaf_return
   1809 
   1810 .Lfloatdisf_bigshift:
   1811 	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
   1812 	do_nsau	a4, xl, a5, a6
   1813 	ssl	a4
   1814 	sll	xh, xl
   1815 	movi	xl, 0
   1816 	addi	a4, a4, 32
   1817 	j	.Lfloatdisf_shifted
   1818 
   1819 .Lfloatdisf_exactlyhalf:
   1820 	/* Round down to the nearest even value.  */
   1821 	srli	a2, a2, 1
   1822 	slli	a2, a2, 1
   1823 	leaf_return
   1824 
   1825 #endif /* L_floatdisf */
   1826 
   1827 #if XCHAL_HAVE_FP_SQRT
   1828 #ifdef L_sqrtf
   1829 	/* Square root */
   1830 
   1831 	.align	4
   1832 	.global	__ieee754_sqrtf
   1833 	.type	__ieee754_sqrtf, @function
   1834 __ieee754_sqrtf:
   1835 	leaf_entry	sp, 16
   1836 
   1837 	wfr		f1, a2
   1838 
   1839 	sqrt0.s		f2, f1
   1840 	const.s		f3, 0
   1841 	maddn.s		f3, f2, f2
   1842 	nexp01.s	f4, f1
   1843 	const.s		f0, 3
   1844 	addexp.s	f4, f0
   1845 	maddn.s		f0, f3, f4
   1846 	nexp01.s	f3, f1
   1847 	neg.s		f5, f3
   1848 	maddn.s		f2, f0, f2
   1849 	const.s		f0, 0
   1850 	const.s		f6, 0
   1851 	const.s		f7, 0
   1852 	maddn.s		f0, f5, f2
   1853 	maddn.s		f6, f2, f4
   1854 	const.s		f4, 3
   1855 	maddn.s		f7, f4, f2
   1856 	maddn.s		f3, f0, f0
   1857 	maddn.s		f4, f6, f2
   1858 	neg.s		f2, f7
   1859 	maddn.s		f0, f3, f2
   1860 	maddn.s		f7, f4, f7
   1861 	mksadj.s	f2, f1
   1862 	nexp01.s	f1, f1
   1863 	maddn.s		f1, f0, f0
   1864 	neg.s		f3, f7
   1865 	addexpm.s	f0, f2
   1866 	addexp.s	f3, f2
   1867 	divn.s		f0, f1, f3
   1868 
   1869 	rfr		a2, f0
   1870 
   1871 	leaf_return
   1872 
   1873 #endif /* L_sqrtf */
   1874 #endif /* XCHAL_HAVE_FP_SQRT */
   1875 
   1876 #if XCHAL_HAVE_FP_RECIP
   1877 #ifdef L_recipsf2
   1878 	/* Reciprocal */
   1879 
   1880 	.align	4
   1881 	.global	__recipsf2
   1882 	.type	__recipsf2, @function
   1883 __recipsf2:
   1884 	leaf_entry	sp, 16
   1885 
   1886 	wfr		f1, a2
   1887 
   1888 	recip0.s	f0, f1
   1889 	const.s		f2, 1
   1890 	msub.s		f2, f1, f0
   1891 	maddn.s		f0, f0, f2
   1892 	const.s		f2, 1
   1893 	msub.s		f2, f1, f0
   1894 	maddn.s		f0, f0, f2
   1895 
   1896 	rfr		a2, f0
   1897 
   1898 	leaf_return
   1899 
   1900 #endif /* L_recipsf2 */
   1901 #endif /* XCHAL_HAVE_FP_RECIP */
   1902 
   1903 #if XCHAL_HAVE_FP_RSQRT
   1904 #ifdef L_rsqrtsf2
   1905 	/* Reciprocal square root */
   1906 
   1907 	.align	4
   1908 	.global	__rsqrtsf2
   1909 	.type	__rsqrtsf2, @function
   1910 __rsqrtsf2:
   1911 	leaf_entry	sp, 16
   1912 
   1913 	wfr		f1, a2
   1914 
   1915 	rsqrt0.s	f0, f1
   1916 	mul.s		f2, f1, f0
   1917 	const.s		f3, 3;
   1918 	mul.s		f4, f3, f0
   1919 	const.s		f5, 1
   1920 	msub.s		f5, f2, f0
   1921 	maddn.s		f0, f4, f5
   1922 	mul.s		f2, f1, f0
   1923 	mul.s		f1, f3, f0
   1924 	const.s		f3, 1
   1925 	msub.s		f3, f2, f0
   1926 	maddn.s		f0, f1, f3
   1927 
   1928 	rfr		a2, f0
   1929 
   1930 	leaf_return
   1931 
   1932 #endif /* L_rsqrtsf2 */
   1933 #endif /* XCHAL_HAVE_FP_RSQRT */
   1934