Home | History | Annotate | Line # | Download | only in xtensa
ieee754-sf.S revision 1.1.1.3
      1 /* IEEE-754 single-precision functions for Xtensa
      2    Copyright (C) 2006-2015 Free Software Foundation, Inc.
      3    Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
      4 
      5    This file is part of GCC.
      6 
      7    GCC is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3, or (at your option)
     10    any later version.
     11 
     12    GCC is distributed in the hope that it will be useful, but WITHOUT
     13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     15    License for more details.
     16 
     17    Under Section 7 of GPL version 3, you are granted additional
     18    permissions described in the GCC Runtime Library Exception, version
     19    3.1, as published by the Free Software Foundation.
     20 
     21    You should have received a copy of the GNU General Public License and
     22    a copy of the GCC Runtime Library Exception along with this program;
     23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24    <http://www.gnu.org/licenses/>.  */
     25 
     26 #ifdef __XTENSA_EB__
     27 #define xh a2
     28 #define xl a3
     29 #define yh a4
     30 #define yl a5
     31 #else
     32 #define xh a3
     33 #define xl a2
     34 #define yh a5
     35 #define yl a4
     36 #endif
     37 
     38 /*  Warning!  The branch displacements for some Xtensa branch instructions
     39     are quite small, and this code has been carefully laid out to keep
     40     branch targets in range.  If you change anything, be sure to check that
     41     the assembler is not relaxing anything to branch over a jump.  */
     42 
     43 #ifdef L_negsf2
     44 
     45 	.align	4
     46 	.global	__negsf2
     47 	.type	__negsf2, @function
     48 __negsf2:
     49 	leaf_entry sp, 16
     50 	movi	a4, 0x80000000
     51 	xor	a2, a2, a4
     52 	leaf_return
     53 
     54 #endif /* L_negsf2 */
     55 
     56 #ifdef L_addsubsf3
     57 
     58 	/* Addition */
     59 __addsf3_aux:
     60 
     61 	/* Handle NaNs and Infinities.  (This code is placed before the
     62 	   start of the function just to keep it in range of the limited
     63 	   branch displacements.)  */
     64 
     65 .Ladd_xnan_or_inf:
     66 	/* If y is neither Infinity nor NaN, return x.  */
     67 	bnall	a3, a6, 1f
     68 	/* If x is a NaN, return it.  Otherwise, return y.  */
     69 	slli	a7, a2, 9
     70 	beqz	a7, .Ladd_ynan_or_inf
     71 1:	leaf_return
     72 
     73 .Ladd_ynan_or_inf:
     74 	/* Return y.  */
     75 	mov	a2, a3
     76 	leaf_return
     77 
     78 .Ladd_opposite_signs:
     79 	/* Operand signs differ.  Do a subtraction.  */
     80 	slli	a7, a6, 8
     81 	xor	a3, a3, a7
     82 	j	.Lsub_same_sign
     83 
     84 	.align	4
     85 	.global	__addsf3
     86 	.type	__addsf3, @function
     87 __addsf3:
     88 	leaf_entry sp, 16
     89 	movi	a6, 0x7f800000
     90 
     91 	/* Check if the two operands have the same sign.  */
     92 	xor	a7, a2, a3
     93 	bltz	a7, .Ladd_opposite_signs
     94 
     95 .Ladd_same_sign:
     96 	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
     97 	ball	a2, a6, .Ladd_xnan_or_inf
     98 	ball	a3, a6, .Ladd_ynan_or_inf
     99 
    100 	/* Compare the exponents.  The smaller operand will be shifted
    101 	   right by the exponent difference and added to the larger
    102 	   one.  */
    103 	extui	a7, a2, 23, 9
    104 	extui	a8, a3, 23, 9
    105 	bltu	a7, a8, .Ladd_shiftx
    106 
    107 .Ladd_shifty:
    108 	/* Check if the smaller (or equal) exponent is zero.  */
    109 	bnone	a3, a6, .Ladd_yexpzero
    110 
    111 	/* Replace y sign/exponent with 0x008.  */
    112 	or	a3, a3, a6
    113 	slli	a3, a3, 8
    114 	srli	a3, a3, 8
    115 
    116 .Ladd_yexpdiff:
    117 	/* Compute the exponent difference.  */
    118 	sub	a10, a7, a8
    119 
    120 	/* Exponent difference > 32 -- just return the bigger value.  */
    121 	bgeui	a10, 32, 1f
    122 
    123 	/* Shift y right by the exponent difference.  Any bits that are
    124 	   shifted out of y are saved in a9 for rounding the result.  */
    125 	ssr	a10
    126 	movi	a9, 0
    127 	src	a9, a3, a9
    128 	srl	a3, a3
    129 
    130 	/* Do the addition.  */
    131 	add	a2, a2, a3
    132 
    133 	/* Check if the add overflowed into the exponent.  */
    134 	extui	a10, a2, 23, 9
    135 	beq	a10, a7, .Ladd_round
    136 	mov	a8, a7
    137 	j	.Ladd_carry
    138 
    139 .Ladd_yexpzero:
    140 	/* y is a subnormal value.  Replace its sign/exponent with zero,
    141 	   i.e., no implicit "1.0", and increment the apparent exponent
    142 	   because subnormals behave as if they had the minimum (nonzero)
    143 	   exponent.  Test for the case when both exponents are zero.  */
    144 	slli	a3, a3, 9
    145 	srli	a3, a3, 9
    146 	bnone	a2, a6, .Ladd_bothexpzero
    147 	addi	a8, a8, 1
    148 	j	.Ladd_yexpdiff
    149 
    150 .Ladd_bothexpzero:
    151 	/* Both exponents are zero.  Handle this as a special case.  There
    152 	   is no need to shift or round, and the normal code for handling
    153 	   a carry into the exponent field will not work because it
    154 	   assumes there is an implicit "1.0" that needs to be added.  */
    155 	add	a2, a2, a3
    156 1:	leaf_return
    157 
    158 .Ladd_xexpzero:
    159 	/* Same as "yexpzero" except skip handling the case when both
    160 	   exponents are zero.  */
    161 	slli	a2, a2, 9
    162 	srli	a2, a2, 9
    163 	addi	a7, a7, 1
    164 	j	.Ladd_xexpdiff
    165 
    166 .Ladd_shiftx:
    167 	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
    168 	   because the exponent difference is always nonzero in this version,
    169 	   the shift sequence can use SLL and skip loading a constant zero.  */
    170 	bnone	a2, a6, .Ladd_xexpzero
    171 
    172 	or	a2, a2, a6
    173 	slli	a2, a2, 8
    174 	srli	a2, a2, 8
    175 
    176 .Ladd_xexpdiff:
    177 	sub	a10, a8, a7
    178 	bgeui	a10, 32, .Ladd_returny
    179 
    180 	ssr	a10
    181 	sll	a9, a2
    182 	srl	a2, a2
    183 
    184 	add	a2, a2, a3
    185 
    186 	/* Check if the add overflowed into the exponent.  */
    187 	extui	a10, a2, 23, 9
    188 	bne	a10, a8, .Ladd_carry
    189 
    190 .Ladd_round:
    191 	/* Round up if the leftover fraction is >= 1/2.  */
    192 	bgez	a9, 1f
    193 	addi	a2, a2, 1
    194 
    195 	/* Check if the leftover fraction is exactly 1/2.  */
    196 	slli	a9, a9, 1
    197 	beqz	a9, .Ladd_exactlyhalf
    198 1:	leaf_return
    199 
    200 .Ladd_returny:
    201 	mov	a2, a3
    202 	leaf_return
    203 
    204 .Ladd_carry:
    205 	/* The addition has overflowed into the exponent field, so the
    206 	   value needs to be renormalized.  The mantissa of the result
    207 	   can be recovered by subtracting the original exponent and
    208 	   adding 0x800000 (which is the explicit "1.0" for the
    209 	   mantissa of the non-shifted operand -- the "1.0" for the
    210 	   shifted operand was already added).  The mantissa can then
    211 	   be shifted right by one bit.  The explicit "1.0" of the
    212 	   shifted mantissa then needs to be replaced by the exponent,
    213 	   incremented by one to account for the normalizing shift.
    214 	   It is faster to combine these operations: do the shift first
    215 	   and combine the additions and subtractions.  If x is the
    216 	   original exponent, the result is:
    217 	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
    218 	   or:
    219 	       shifted mantissa + ((x + 1) << 22)
    220 	   Note that the exponent is incremented here by leaving the
    221 	   explicit "1.0" of the mantissa in the exponent field.  */
    222 
    223 	/* Shift x right by one bit.  Save the lsb.  */
    224 	mov	a10, a2
    225 	srli	a2, a2, 1
    226 
    227 	/* See explanation above.  The original exponent is in a8.  */
    228 	addi	a8, a8, 1
    229 	slli	a8, a8, 22
    230 	add	a2, a2, a8
    231 
    232 	/* Return an Infinity if the exponent overflowed.  */
    233 	ball	a2, a6, .Ladd_infinity
    234 
    235 	/* Same thing as the "round" code except the msb of the leftover
    236 	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
    237 	bbci.l	a10, 0, 1f
    238 	addi	a2, a2, 1
    239 	beqz	a9, .Ladd_exactlyhalf
    240 1:	leaf_return
    241 
    242 .Ladd_infinity:
    243 	/* Clear the mantissa.  */
    244 	srli	a2, a2, 23
    245 	slli	a2, a2, 23
    246 
    247 	/* The sign bit may have been lost in a carry-out.  Put it back.  */
    248 	slli	a8, a8, 1
    249 	or	a2, a2, a8
    250 	leaf_return
    251 
    252 .Ladd_exactlyhalf:
    253 	/* Round down to the nearest even value.  */
    254 	srli	a2, a2, 1
    255 	slli	a2, a2, 1
    256 	leaf_return
    257 
    258 
    259 	/* Subtraction */
    260 __subsf3_aux:
    261 
    262 	/* Handle NaNs and Infinities.  (This code is placed before the
    263 	   start of the function just to keep it in range of the limited
    264 	   branch displacements.)  */
    265 
    266 .Lsub_xnan_or_inf:
    267 	/* If y is neither Infinity nor NaN, return x.  */
    268 	bnall	a3, a6, 1f
    269 	/* Both x and y are either NaN or Inf, so the result is NaN.  */
    270 	movi	a4, 0x400000	/* make it a quiet NaN */
    271 	or	a2, a2, a4
    272 1:	leaf_return
    273 
    274 .Lsub_ynan_or_inf:
    275 	/* Negate y and return it.  */
    276 	slli	a7, a6, 8
    277 	xor	a2, a3, a7
    278 	leaf_return
    279 
    280 .Lsub_opposite_signs:
    281 	/* Operand signs differ.  Do an addition.  */
    282 	slli	a7, a6, 8
    283 	xor	a3, a3, a7
    284 	j	.Ladd_same_sign
    285 
    286 	.align	4
    287 	.global	__subsf3
    288 	.type	__subsf3, @function
    289 __subsf3:
    290 	leaf_entry sp, 16
    291 	movi	a6, 0x7f800000
    292 
    293 	/* Check if the two operands have the same sign.  */
    294 	xor	a7, a2, a3
    295 	bltz	a7, .Lsub_opposite_signs
    296 
    297 .Lsub_same_sign:
    298 	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
    299 	ball	a2, a6, .Lsub_xnan_or_inf
    300 	ball	a3, a6, .Lsub_ynan_or_inf
    301 
    302 	/* Compare the operands.  In contrast to addition, the entire
    303 	   value matters here.  */
    304 	extui	a7, a2, 23, 8
    305 	extui	a8, a3, 23, 8
    306 	bltu	a2, a3, .Lsub_xsmaller
    307 
    308 .Lsub_ysmaller:
    309 	/* Check if the smaller (or equal) exponent is zero.  */
    310 	bnone	a3, a6, .Lsub_yexpzero
    311 
    312 	/* Replace y sign/exponent with 0x008.  */
    313 	or	a3, a3, a6
    314 	slli	a3, a3, 8
    315 	srli	a3, a3, 8
    316 
    317 .Lsub_yexpdiff:
    318 	/* Compute the exponent difference.  */
    319 	sub	a10, a7, a8
    320 
    321 	/* Exponent difference > 32 -- just return the bigger value.  */
    322 	bgeui	a10, 32, 1f
    323 
    324 	/* Shift y right by the exponent difference.  Any bits that are
    325 	   shifted out of y are saved in a9 for rounding the result.  */
    326 	ssr	a10
    327 	movi	a9, 0
    328 	src	a9, a3, a9
    329 	srl	a3, a3
    330 
    331 	sub	a2, a2, a3
    332 
    333 	/* Subtract the leftover bits in a9 from zero and propagate any
    334 	   borrow from a2.  */
    335 	neg	a9, a9
    336 	addi	a10, a2, -1
    337 	movnez	a2, a10, a9
    338 
    339 	/* Check if the subtract underflowed into the exponent.  */
    340 	extui	a10, a2, 23, 8
    341 	beq	a10, a7, .Lsub_round
    342 	j	.Lsub_borrow
    343 
    344 .Lsub_yexpzero:
    345 	/* Return zero if the inputs are equal.  (For the non-subnormal
    346 	   case, subtracting the "1.0" will cause a borrow from the exponent
    347 	   and this case can be detected when handling the borrow.)  */
    348 	beq	a2, a3, .Lsub_return_zero
    349 
    350 	/* y is a subnormal value.  Replace its sign/exponent with zero,
    351 	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
    352 	   y's apparent exponent because subnormals behave as if they had
    353 	   the minimum (nonzero) exponent.  */
    354 	slli	a3, a3, 9
    355 	srli	a3, a3, 9
    356 	bnone	a2, a6, .Lsub_yexpdiff
    357 	addi	a8, a8, 1
    358 	j	.Lsub_yexpdiff
    359 
    360 .Lsub_returny:
    361 	/* Negate and return y.  */
    362 	slli	a7, a6, 8
    363 	xor	a2, a3, a7
    364 1:	leaf_return
    365 
    366 .Lsub_xsmaller:
    367 	/* Same thing as the "ysmaller" code, but with x and y swapped and
    368 	   with y negated.  */
    369 	bnone	a2, a6, .Lsub_xexpzero
    370 
    371 	or	a2, a2, a6
    372 	slli	a2, a2, 8
    373 	srli	a2, a2, 8
    374 
    375 .Lsub_xexpdiff:
    376 	sub	a10, a8, a7
    377 	bgeui	a10, 32, .Lsub_returny
    378 
    379 	ssr	a10
    380 	movi	a9, 0
    381 	src	a9, a2, a9
    382 	srl	a2, a2
    383 
    384 	/* Negate y.  */
    385 	slli	a11, a6, 8
    386 	xor	a3, a3, a11
    387 
    388 	sub	a2, a3, a2
    389 
    390 	neg	a9, a9
    391 	addi	a10, a2, -1
    392 	movnez	a2, a10, a9
    393 
    394 	/* Check if the subtract underflowed into the exponent.  */
    395 	extui	a10, a2, 23, 8
    396 	bne	a10, a8, .Lsub_borrow
    397 
    398 .Lsub_round:
    399 	/* Round up if the leftover fraction is >= 1/2.  */
    400 	bgez	a9, 1f
    401 	addi	a2, a2, 1
    402 
    403 	/* Check if the leftover fraction is exactly 1/2.  */
    404 	slli	a9, a9, 1
    405 	beqz	a9, .Lsub_exactlyhalf
    406 1:	leaf_return
    407 
    408 .Lsub_xexpzero:
    409 	/* Same as "yexpzero".  */
    410 	beq	a2, a3, .Lsub_return_zero
    411 	slli	a2, a2, 9
    412 	srli	a2, a2, 9
    413 	bnone	a3, a6, .Lsub_xexpdiff
    414 	addi	a7, a7, 1
    415 	j	.Lsub_xexpdiff
    416 
    417 .Lsub_return_zero:
    418 	movi	a2, 0
    419 	leaf_return
    420 
    421 .Lsub_borrow:
    422 	/* The subtraction has underflowed into the exponent field, so the
    423 	   value needs to be renormalized.  Shift the mantissa left as
    424 	   needed to remove any leading zeros and adjust the exponent
    425 	   accordingly.  If the exponent is not large enough to remove
    426 	   all the leading zeros, the result will be a subnormal value.  */
    427 
    428 	slli	a8, a2, 9
    429 	beqz	a8, .Lsub_xzero
    430 	do_nsau	a6, a8, a7, a11
    431 	srli	a8, a8, 9
    432 	bge	a6, a10, .Lsub_subnormal
    433 	addi	a6, a6, 1
    434 
    435 .Lsub_normalize_shift:
    436 	/* Shift the mantissa (a8/a9) left by a6.  */
    437 	ssl	a6
    438 	src	a8, a8, a9
    439 	sll	a9, a9
    440 
    441 	/* Combine the shifted mantissa with the sign and exponent,
    442 	   decrementing the exponent by a6.  (The exponent has already
    443 	   been decremented by one due to the borrow from the subtraction,
    444 	   but adding the mantissa will increment the exponent by one.)  */
    445 	srli	a2, a2, 23
    446 	sub	a2, a2, a6
    447 	slli	a2, a2, 23
    448 	add	a2, a2, a8
    449 	j	.Lsub_round
    450 
    451 .Lsub_exactlyhalf:
    452 	/* Round down to the nearest even value.  */
    453 	srli	a2, a2, 1
    454 	slli	a2, a2, 1
    455 	leaf_return
    456 
    457 .Lsub_xzero:
    458 	/* If there was a borrow from the exponent, and the mantissa and
    459 	   guard digits are all zero, then the inputs were equal and the
    460 	   result should be zero.  */
    461 	beqz	a9, .Lsub_return_zero
    462 
    463 	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
    464 	addi	a11, a10, -24
    465 	movi	a6, 24
    466 	movltz	a6, a10, a11
    467 	j	.Lsub_normalize_shift
    468 
    469 .Lsub_subnormal:
    470 	/* The exponent is too small to shift away all the leading zeros.
    471 	   Set a6 to the current exponent (which has already been
    472 	   decremented by the borrow) so that the exponent of the result
    473 	   will be zero.  Do not add 1 to a6 in this case, because: (1)
    474 	   adding the mantissa will not increment the exponent, so there is
    475 	   no need to subtract anything extra from the exponent to
    476 	   compensate, and (2) the effective exponent of a subnormal is 1
    477 	   not 0 so the shift amount must be 1 smaller than normal. */
    478 	mov	a6, a10
    479 	j	.Lsub_normalize_shift
    480 
    481 #endif /* L_addsubsf3 */
    482 
    483 #ifdef L_mulsf3
    484 
    485 	/* Multiplication */
    486 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
    487 #define XCHAL_NO_MUL 1
    488 #endif
    489 
    490 	.literal_position
    491 __mulsf3_aux:
    492 
    493 	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
    494 	   (This code is placed before the start of the function just to
    495 	   keep it in range of the limited branch displacements.)  */
    496 
    497 .Lmul_xexpzero:
    498 	/* Clear the sign bit of x.  */
    499 	slli	a2, a2, 1
    500 	srli	a2, a2, 1
    501 
    502 	/* If x is zero, return zero.  */
    503 	beqz	a2, .Lmul_return_zero
    504 
    505 	/* Normalize x.  Adjust the exponent in a8.  */
    506 	do_nsau	a10, a2, a11, a12
    507 	addi	a10, a10, -8
    508 	ssl	a10
    509 	sll	a2, a2
    510 	movi	a8, 1
    511 	sub	a8, a8, a10
    512 	j	.Lmul_xnormalized
    513 
    514 .Lmul_yexpzero:
    515 	/* Clear the sign bit of y.  */
    516 	slli	a3, a3, 1
    517 	srli	a3, a3, 1
    518 
    519 	/* If y is zero, return zero.  */
    520 	beqz	a3, .Lmul_return_zero
    521 
    522 	/* Normalize y.  Adjust the exponent in a9.  */
    523 	do_nsau	a10, a3, a11, a12
    524 	addi	a10, a10, -8
    525 	ssl	a10
    526 	sll	a3, a3
    527 	movi	a9, 1
    528 	sub	a9, a9, a10
    529 	j	.Lmul_ynormalized
    530 
    531 .Lmul_return_zero:
    532 	/* Return zero with the appropriate sign bit.  */
    533 	srli	a2, a7, 31
    534 	slli	a2, a2, 31
    535 	j	.Lmul_done
    536 
    537 .Lmul_xnan_or_inf:
    538 	/* If y is zero, return NaN.  */
    539 	slli	a8, a3, 1
    540 	bnez	a8, 1f
    541 	movi	a4, 0x400000	/* make it a quiet NaN */
    542 	or	a2, a2, a4
    543 	j	.Lmul_done
    544 1:
    545 	/* If y is NaN, return y.  */
    546 	bnall	a3, a6, .Lmul_returnx
    547 	slli	a8, a3, 9
    548 	beqz	a8, .Lmul_returnx
    549 
    550 .Lmul_returny:
    551 	mov	a2, a3
    552 
    553 .Lmul_returnx:
    554 	/* Set the sign bit and return.  */
    555 	extui	a7, a7, 31, 1
    556 	slli	a2, a2, 1
    557 	ssai	1
    558 	src	a2, a7, a2
    559 	j	.Lmul_done
    560 
    561 .Lmul_ynan_or_inf:
    562 	/* If x is zero, return NaN.  */
    563 	slli	a8, a2, 1
    564 	bnez	a8, .Lmul_returny
    565 	movi	a7, 0x400000	/* make it a quiet NaN */
    566 	or	a2, a3, a7
    567 	j	.Lmul_done
    568 
    569 	.align	4
    570 	.global	__mulsf3
    571 	.type	__mulsf3, @function
    572 __mulsf3:
    573 #if __XTENSA_CALL0_ABI__
    574 	leaf_entry sp, 32
    575 	addi	sp, sp, -32
    576 	s32i	a12, sp, 16
    577 	s32i	a13, sp, 20
    578 	s32i	a14, sp, 24
    579 	s32i	a15, sp, 28
    580 #elif XCHAL_NO_MUL
    581 	/* This is not really a leaf function; allocate enough stack space
    582 	   to allow CALL12s to a helper function.  */
    583 	leaf_entry sp, 64
    584 #else
    585 	leaf_entry sp, 32
    586 #endif
    587 	movi	a6, 0x7f800000
    588 
    589 	/* Get the sign of the result.  */
    590 	xor	a7, a2, a3
    591 
    592 	/* Check for NaN and infinity.  */
    593 	ball	a2, a6, .Lmul_xnan_or_inf
    594 	ball	a3, a6, .Lmul_ynan_or_inf
    595 
    596 	/* Extract the exponents.  */
    597 	extui	a8, a2, 23, 8
    598 	extui	a9, a3, 23, 8
    599 
    600 	beqz	a8, .Lmul_xexpzero
    601 .Lmul_xnormalized:
    602 	beqz	a9, .Lmul_yexpzero
    603 .Lmul_ynormalized:
    604 
    605 	/* Add the exponents.  */
    606 	add	a8, a8, a9
    607 
    608 	/* Replace sign/exponent fields with explicit "1.0".  */
    609 	movi	a10, 0xffffff
    610 	or	a2, a2, a6
    611 	and	a2, a2, a10
    612 	or	a3, a3, a6
    613 	and	a3, a3, a10
    614 
    615 	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
    616 
    617 #if XCHAL_HAVE_MUL32_HIGH
    618 
    619 	mull	a6, a2, a3
    620 	muluh	a2, a2, a3
    621 
    622 #else
    623 
    624 	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
    625 	   products.  These partial products are:
    626 
    627 		0 xl * yl
    628 
    629 		1 xl * yh
    630 		2 xh * yl
    631 
    632 		3 xh * yh
    633 
    634 	   If using the Mul16 or Mul32 multiplier options, these input
    635 	   chunks must be stored in separate registers.  For Mac16, the
    636 	   UMUL.AA.* opcodes can specify that the inputs come from either
    637 	   half of the registers, so there is no need to shift them out
    638 	   ahead of time.  If there is no multiply hardware, the 16-bit
    639 	   chunks can be extracted when setting up the arguments to the
    640 	   separate multiply function.  */
    641 
    642 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
    643 	/* Calling a separate multiply function will clobber a0 and requires
    644 	   use of a8 as a temporary, so save those values now.  (The function
    645 	   uses a custom ABI so nothing else needs to be saved.)  */
    646 	s32i	a0, sp, 0
    647 	s32i	a8, sp, 4
    648 #endif
    649 
    650 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
    651 
    652 #define a2h a4
    653 #define a3h a5
    654 
    655 	/* Get the high halves of the inputs into registers.  */
    656 	srli	a2h, a2, 16
    657 	srli	a3h, a3, 16
    658 
    659 #define a2l a2
    660 #define a3l a3
    661 
    662 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
    663 	/* Clear the high halves of the inputs.  This does not matter
    664 	   for MUL16 because the high bits are ignored.  */
    665 	extui	a2, a2, 0, 16
    666 	extui	a3, a3, 0, 16
    667 #endif
    668 #endif /* MUL16 || MUL32 */
    669 
    670 
    671 #if XCHAL_HAVE_MUL16
    672 
    673 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    674 	mul16u	dst, xreg ## xhalf, yreg ## yhalf
    675 
    676 #elif XCHAL_HAVE_MUL32
    677 
    678 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    679 	mull	dst, xreg ## xhalf, yreg ## yhalf
    680 
    681 #elif XCHAL_HAVE_MAC16
    682 
    683 /* The preprocessor insists on inserting a space when concatenating after
    684    a period in the definition of do_mul below.  These macros are a workaround
    685    using underscores instead of periods when doing the concatenation.  */
    686 #define umul_aa_ll umul.aa.ll
    687 #define umul_aa_lh umul.aa.lh
    688 #define umul_aa_hl umul.aa.hl
    689 #define umul_aa_hh umul.aa.hh
    690 
    691 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    692 	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
    693 	rsr	dst, ACCLO
    694 
    695 #else /* no multiply hardware */
    696 
    697 #define set_arg_l(dst, src) \
    698 	extui	dst, src, 0, 16
    699 #define set_arg_h(dst, src) \
    700 	srli	dst, src, 16
    701 
    702 #if __XTENSA_CALL0_ABI__
    703 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    704 	set_arg_ ## xhalf (a13, xreg); \
    705 	set_arg_ ## yhalf (a14, yreg); \
    706 	call0	.Lmul_mulsi3; \
    707 	mov	dst, a12
    708 #else
    709 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    710 	set_arg_ ## xhalf (a14, xreg); \
    711 	set_arg_ ## yhalf (a15, yreg); \
    712 	call12	.Lmul_mulsi3; \
    713 	mov	dst, a14
    714 #endif /* __XTENSA_CALL0_ABI__ */
    715 
    716 #endif /* no multiply hardware */
    717 
    718 	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
    719 	do_mul(a6, a2, l, a3, h)	/* pp 1 */
    720 	do_mul(a11, a2, h, a3, l)	/* pp 2 */
    721 	movi	a9, 0
    722 	add	a6, a6, a11
    723 	bgeu	a6, a11, 1f
    724 	addi	a9, a9, 1
    725 1:
    726 	/* Shift the high half of a9/a6 into position in a9.  Note that
    727 	   this value can be safely incremented without any carry-outs.  */
    728 	ssai	16
    729 	src	a9, a9, a6
    730 
    731 	/* Compute the low word into a6.  */
    732 	do_mul(a11, a2, l, a3, l)	/* pp 0 */
    733 	sll	a6, a6
    734 	add	a6, a6, a11
    735 	bgeu	a6, a11, 1f
    736 	addi	a9, a9, 1
    737 1:
    738 	/* Compute the high word into a2.  */
    739 	do_mul(a2, a2, h, a3, h)	/* pp 3 */
    740 	add	a2, a2, a9
    741 
    742 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
    743 	/* Restore values saved on the stack during the multiplication.  */
    744 	l32i	a0, sp, 0
    745 	l32i	a8, sp, 4
    746 #endif
    747 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
    748 
    749 	/* Shift left by 9 bits, unless there was a carry-out from the
    750 	   multiply, in which case, shift by 8 bits and increment the
    751 	   exponent.  */
    752 	movi	a4, 9
    753 	srli	a5, a2, 24 - 9
    754 	beqz	a5, 1f
    755 	addi	a4, a4, -1
    756 	addi	a8, a8, 1
    757 1:	ssl	a4
    758 	src	a2, a2, a6
    759 	sll	a6, a6
    760 
    761 	/* Subtract the extra bias from the exponent sum (plus one to account
    762 	   for the explicit "1.0" of the mantissa that will be added to the
    763 	   exponent in the final result).  */
    764 	movi	a4, 0x80
    765 	sub	a8, a8, a4
    766 
    767 	/* Check for over/underflow.  The value in a8 is one less than the
    768 	   final exponent, so values in the range 0..fd are OK here.  */
    769 	movi	a4, 0xfe
    770 	bgeu	a8, a4, .Lmul_overflow
    771 
    772 .Lmul_round:
    773 	/* Round.  */
    774 	bgez	a6, .Lmul_rounded
    775 	addi	a2, a2, 1
    776 	slli	a6, a6, 1
    777 	beqz	a6, .Lmul_exactlyhalf
    778 
    779 .Lmul_rounded:
    780 	/* Add the exponent to the mantissa.  */
    781 	slli	a8, a8, 23
    782 	add	a2, a2, a8
    783 
    784 .Lmul_addsign:
    785 	/* Add the sign bit.  */
    786 	srli	a7, a7, 31
    787 	slli	a7, a7, 31
    788 	or	a2, a2, a7
    789 
    790 .Lmul_done:
    791 #if __XTENSA_CALL0_ABI__
    792 	l32i	a12, sp, 16
    793 	l32i	a13, sp, 20
    794 	l32i	a14, sp, 24
    795 	l32i	a15, sp, 28
    796 	addi	sp, sp, 32
    797 #endif
    798 	leaf_return
    799 
    800 .Lmul_exactlyhalf:
    801 	/* Round down to the nearest even value.  */
    802 	srli	a2, a2, 1
    803 	slli	a2, a2, 1
    804 	j	.Lmul_rounded
    805 
    806 .Lmul_overflow:
    807 	bltz	a8, .Lmul_underflow
    808 	/* Return +/- Infinity.  */
    809 	movi	a8, 0xff
    810 	slli	a2, a8, 23
    811 	j	.Lmul_addsign
    812 
    813 .Lmul_underflow:
    814 	/* Create a subnormal value, where the exponent field contains zero,
    815 	   but the effective exponent is 1.  The value of a8 is one less than
    816 	   the actual exponent, so just negate it to get the shift amount.  */
    817 	neg	a8, a8
    818 	mov	a9, a6
    819 	ssr	a8
    820 	bgeui	a8, 32, .Lmul_flush_to_zero
    821 
    822 	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
    823 	   in a6 (combined with the shifted-out bits currently in a6) for
    824 	   rounding the result.  */
    825 	sll	a6, a2
    826 	srl	a2, a2
    827 
    828 	/* Set the exponent to zero.  */
    829 	movi	a8, 0
    830 
    831 	/* Pack any nonzero bits shifted out into a6.  */
    832 	beqz	a9, .Lmul_round
    833 	movi	a9, 1
    834 	or	a6, a6, a9
    835 	j	.Lmul_round
    836 
    837 .Lmul_flush_to_zero:
    838 	/* Return zero with the appropriate sign bit.  */
    839 	srli	a2, a7, 31
    840 	slli	a2, a2, 31
    841 	j	.Lmul_done
    842 
    843 #if XCHAL_NO_MUL
    844 
    845 	/* For Xtensa processors with no multiply hardware, this simplified
    846 	   version of _mulsi3 is used for multiplying 16-bit chunks of
    847 	   the floating-point mantissas.  When using CALL0, this function
    848 	   uses a custom ABI: the inputs are passed in a13 and a14, the
    849 	   result is returned in a12, and a8 and a15 are clobbered.  */
    850 	.align	4
    851 .Lmul_mulsi3:
    852 	leaf_entry sp, 16
    853 	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
    854 	movi	\dst, 0
    855 1:	add	\tmp1, \src2, \dst
    856 	extui	\tmp2, \src1, 0, 1
    857 	movnez	\dst, \tmp1, \tmp2
    858 
    859 	do_addx2 \tmp1, \src2, \dst, \tmp1
    860 	extui	\tmp2, \src1, 1, 1
    861 	movnez	\dst, \tmp1, \tmp2
    862 
    863 	do_addx4 \tmp1, \src2, \dst, \tmp1
    864 	extui	\tmp2, \src1, 2, 1
    865 	movnez	\dst, \tmp1, \tmp2
    866 
    867 	do_addx8 \tmp1, \src2, \dst, \tmp1
    868 	extui	\tmp2, \src1, 3, 1
    869 	movnez	\dst, \tmp1, \tmp2
    870 
    871 	srli	\src1, \src1, 4
    872 	slli	\src2, \src2, 4
    873 	bnez	\src1, 1b
    874 	.endm
    875 #if __XTENSA_CALL0_ABI__
    876 	mul_mulsi3_body a12, a13, a14, a15, a8
    877 #else
    878 	/* The result will be written into a2, so save that argument in a4.  */
    879 	mov	a4, a2
    880 	mul_mulsi3_body a2, a4, a3, a5, a6
    881 #endif
    882 	leaf_return
    883 #endif /* XCHAL_NO_MUL */
    884 #endif /* L_mulsf3 */
    885 
    886 #ifdef L_divsf3
    887 
    888 	.literal_position
    889 	/* Division */
    890 __divsf3_aux:
    891 
    892 	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
    893 	   (This code is placed before the start of the function just to
    894 	   keep it in range of the limited branch displacements.)  */
    895 
    896 .Ldiv_yexpzero:
    897 	/* Clear the sign bit of y.  */
    898 	slli	a3, a3, 1
    899 	srli	a3, a3, 1
    900 
    901 	/* Check for division by zero.  */
    902 	beqz	a3, .Ldiv_yzero
    903 
    904 	/* Normalize y.  Adjust the exponent in a9.  */
    905 	do_nsau	a10, a3, a4, a5
    906 	addi	a10, a10, -8
    907 	ssl	a10
    908 	sll	a3, a3
    909 	movi	a9, 1
    910 	sub	a9, a9, a10
    911 	j	.Ldiv_ynormalized
    912 
    913 .Ldiv_yzero:
    914 	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
    915 	slli	a4, a2, 1
    916 	srli	a4, a4, 1
    917 	srli	a2, a7, 31
    918 	slli	a2, a2, 31
    919 	or	a2, a2, a6
    920 	bnez	a4, 1f
    921 	movi	a4, 0x400000	/* make it a quiet NaN */
    922 	or	a2, a2, a4
    923 1:	leaf_return
    924 
    925 .Ldiv_xexpzero:
    926 	/* Clear the sign bit of x.  */
    927 	slli	a2, a2, 1
    928 	srli	a2, a2, 1
    929 
    930 	/* If x is zero, return zero.  */
    931 	beqz	a2, .Ldiv_return_zero
    932 
    933 	/* Normalize x.  Adjust the exponent in a8.  */
    934 	do_nsau	a10, a2, a4, a5
    935 	addi	a10, a10, -8
    936 	ssl	a10
    937 	sll	a2, a2
    938 	movi	a8, 1
    939 	sub	a8, a8, a10
    940 	j	.Ldiv_xnormalized
    941 
    942 .Ldiv_return_zero:
    943 	/* Return zero with the appropriate sign bit.  */
    944 	srli	a2, a7, 31
    945 	slli	a2, a2, 31
    946 	leaf_return
    947 
    948 .Ldiv_xnan_or_inf:
    949 	/* Set the sign bit of the result.  */
    950 	srli	a7, a3, 31
    951 	slli	a7, a7, 31
    952 	xor	a2, a2, a7
    953 	/* If y is NaN or Inf, return NaN.  */
    954 	bnall	a3, a6, 1f
    955 	movi	a4, 0x400000	/* make it a quiet NaN */
    956 	or	a2, a2, a4
    957 1:	leaf_return
    958 
    959 .Ldiv_ynan_or_inf:
    960 	/* If y is Infinity, return zero.  */
    961 	slli	a8, a3, 9
    962 	beqz	a8, .Ldiv_return_zero
    963 	/* y is NaN; return it.  */
    964 	mov	a2, a3
    965 	leaf_return
    966 
    967 	.align	4
    968 	.global	__divsf3
    969 	.type	__divsf3, @function
    970 __divsf3:
    971 	leaf_entry sp, 16
    972 	movi	a6, 0x7f800000
    973 
    974 	/* Get the sign of the result.  */
    975 	xor	a7, a2, a3
    976 
    977 	/* Check for NaN and infinity.  */
    978 	ball	a2, a6, .Ldiv_xnan_or_inf
    979 	ball	a3, a6, .Ldiv_ynan_or_inf
    980 
    981 	/* Extract the exponents.  */
    982 	extui	a8, a2, 23, 8
    983 	extui	a9, a3, 23, 8
    984 
    985 	beqz	a9, .Ldiv_yexpzero
    986 .Ldiv_ynormalized:
    987 	beqz	a8, .Ldiv_xexpzero
    988 .Ldiv_xnormalized:
    989 
    990 	/* Subtract the exponents.  */
    991 	sub	a8, a8, a9
    992 
    993 	/* Replace sign/exponent fields with explicit "1.0".  */
    994 	movi	a10, 0xffffff
    995 	or	a2, a2, a6
    996 	and	a2, a2, a10
    997 	or	a3, a3, a6
    998 	and	a3, a3, a10
    999 
   1000 	/* The first digit of the mantissa division must be a one.
   1001 	   Shift x (and adjust the exponent) as needed to make this true.  */
   1002 	bltu	a3, a2, 1f
   1003 	slli	a2, a2, 1
   1004 	addi	a8, a8, -1
   1005 1:
   1006 	/* Do the first subtraction and shift.  */
   1007 	sub	a2, a2, a3
   1008 	slli	a2, a2, 1
   1009 
   1010 	/* Put the quotient into a10.  */
   1011 	movi	a10, 1
   1012 
   1013 	/* Divide one bit at a time for 23 bits.  */
   1014 	movi	a9, 23
   1015 #if XCHAL_HAVE_LOOPS
   1016 	loop	a9, .Ldiv_loopend
   1017 #endif
   1018 .Ldiv_loop:
   1019 	/* Shift the quotient << 1.  */
   1020 	slli	a10, a10, 1
   1021 
   1022 	/* Is this digit a 0 or 1?  */
   1023 	bltu	a2, a3, 1f
   1024 
   1025 	/* Output a 1 and subtract.  */
   1026 	addi	a10, a10, 1
   1027 	sub	a2, a2, a3
   1028 
   1029 	/* Shift the dividend << 1.  */
   1030 1:	slli	a2, a2, 1
   1031 
   1032 #if !XCHAL_HAVE_LOOPS
   1033 	addi	a9, a9, -1
   1034 	bnez	a9, .Ldiv_loop
   1035 #endif
   1036 .Ldiv_loopend:
   1037 
   1038 	/* Add the exponent bias (less one to account for the explicit "1.0"
   1039 	   of the mantissa that will be added to the exponent in the final
   1040 	   result).  */
   1041 	addi	a8, a8, 0x7e
   1042 
   1043 	/* Check for over/underflow.  The value in a8 is one less than the
   1044 	   final exponent, so values in the range 0..fd are OK here.  */
   1045 	movi	a4, 0xfe
   1046 	bgeu	a8, a4, .Ldiv_overflow
   1047 
   1048 .Ldiv_round:
   1049 	/* Round.  The remainder (<< 1) is in a2.  */
   1050 	bltu	a2, a3, .Ldiv_rounded
   1051 	addi	a10, a10, 1
   1052 	beq	a2, a3, .Ldiv_exactlyhalf
   1053 
   1054 .Ldiv_rounded:
   1055 	/* Add the exponent to the mantissa.  */
   1056 	slli	a8, a8, 23
   1057 	add	a2, a10, a8
   1058 
   1059 .Ldiv_addsign:
   1060 	/* Add the sign bit.  */
   1061 	srli	a7, a7, 31
   1062 	slli	a7, a7, 31
   1063 	or	a2, a2, a7
   1064 	leaf_return
   1065 
   1066 .Ldiv_overflow:
   1067 	bltz	a8, .Ldiv_underflow
   1068 	/* Return +/- Infinity.  */
   1069 	addi	a8, a4, 1	/* 0xff */
   1070 	slli	a2, a8, 23
   1071 	j	.Ldiv_addsign
   1072 
   1073 .Ldiv_exactlyhalf:
   1074 	/* Remainder is exactly half the divisor.  Round even.  */
   1075 	srli	a10, a10, 1
   1076 	slli	a10, a10, 1
   1077 	j	.Ldiv_rounded
   1078 
   1079 .Ldiv_underflow:
   1080 	/* Create a subnormal value, where the exponent field contains zero,
   1081 	   but the effective exponent is 1.  The value of a8 is one less than
   1082 	   the actual exponent, so just negate it to get the shift amount.  */
   1083 	neg	a8, a8
   1084 	ssr	a8
   1085 	bgeui	a8, 32, .Ldiv_flush_to_zero
   1086 
   1087 	/* Shift a10 right.  Any bits that are shifted out of a10 are
   1088 	   saved in a6 for rounding the result.  */
   1089 	sll	a6, a10
   1090 	srl	a10, a10
   1091 
   1092 	/* Set the exponent to zero.  */
   1093 	movi	a8, 0
   1094 
   1095 	/* Pack any nonzero remainder (in a2) into a6.  */
   1096 	beqz	a2, 1f
   1097 	movi	a9, 1
   1098 	or	a6, a6, a9
   1099 
   1100 	/* Round a10 based on the bits shifted out into a6.  */
   1101 1:	bgez	a6, .Ldiv_rounded
   1102 	addi	a10, a10, 1
   1103 	slli	a6, a6, 1
   1104 	bnez	a6, .Ldiv_rounded
   1105 	srli	a10, a10, 1
   1106 	slli	a10, a10, 1
   1107 	j	.Ldiv_rounded
   1108 
   1109 .Ldiv_flush_to_zero:
   1110 	/* Return zero with the appropriate sign bit.  */
   1111 	srli	a2, a7, 31
   1112 	slli	a2, a2, 31
   1113 	leaf_return
   1114 
   1115 #endif /* L_divsf3 */
   1116 
   1117 #ifdef L_cmpsf2
   1118 
   1119 	/* Equal and Not Equal */
   1120 
   1121 	.align	4
   1122 	.global	__eqsf2
   1123 	.global	__nesf2
   1124 	.set	__nesf2, __eqsf2
   1125 	.type	__eqsf2, @function
   1126 __eqsf2:
   1127 	leaf_entry sp, 16
   1128 	bne	a2, a3, 4f
   1129 
   1130 	/* The values are equal but NaN != NaN.  Check the exponent.  */
   1131 	movi	a6, 0x7f800000
   1132 	ball	a2, a6, 3f
   1133 
   1134 	/* Equal.  */
   1135 	movi	a2, 0
   1136 	leaf_return
   1137 
   1138 	/* Not equal.  */
   1139 2:	movi	a2, 1
   1140 	leaf_return
   1141 
   1142 	/* Check if the mantissas are nonzero.  */
   1143 3:	slli	a7, a2, 9
   1144 	j	5f
   1145 
   1146 	/* Check if x and y are zero with different signs.  */
   1147 4:	or	a7, a2, a3
   1148 	slli	a7, a7, 1
   1149 
   1150 	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
   1151 	   or x when exponent(x) = 0x7f8 and x == y.  */
   1152 5:	movi	a2, 0
   1153 	movi	a3, 1
   1154 	movnez	a2, a3, a7
   1155 	leaf_return
   1156 
   1157 
   1158 	/* Greater Than */
   1159 
   1160 	.align	4
   1161 	.global	__gtsf2
   1162 	.type	__gtsf2, @function
   1163 __gtsf2:
   1164 	leaf_entry sp, 16
   1165 	movi	a6, 0x7f800000
   1166 	ball	a2, a6, 2f
   1167 1:	bnall	a3, a6, .Lle_cmp
   1168 
   1169 	/* Check if y is a NaN.  */
   1170 	slli	a7, a3, 9
   1171 	beqz	a7, .Lle_cmp
   1172 	movi	a2, 0
   1173 	leaf_return
   1174 
   1175 	/* Check if x is a NaN.  */
   1176 2:	slli	a7, a2, 9
   1177 	beqz	a7, 1b
   1178 	movi	a2, 0
   1179 	leaf_return
   1180 
   1181 
   1182 	/* Less Than or Equal */
   1183 
   1184 	.align	4
   1185 	.global	__lesf2
   1186 	.type	__lesf2, @function
   1187 __lesf2:
   1188 	leaf_entry sp, 16
   1189 	movi	a6, 0x7f800000
   1190 	ball	a2, a6, 2f
   1191 1:	bnall	a3, a6, .Lle_cmp
   1192 
   1193 	/* Check if y is a NaN.  */
   1194 	slli	a7, a3, 9
   1195 	beqz	a7, .Lle_cmp
   1196 	movi	a2, 1
   1197 	leaf_return
   1198 
   1199 	/* Check if x is a NaN.  */
   1200 2:	slli	a7, a2, 9
   1201 	beqz	a7, 1b
   1202 	movi	a2, 1
   1203 	leaf_return
   1204 
   1205 .Lle_cmp:
   1206 	/* Check if x and y have different signs.  */
   1207 	xor	a7, a2, a3
   1208 	bltz	a7, .Lle_diff_signs
   1209 
   1210 	/* Check if x is negative.  */
   1211 	bltz	a2, .Lle_xneg
   1212 
   1213 	/* Check if x <= y.  */
   1214 	bltu	a3, a2, 5f
   1215 4:	movi	a2, 0
   1216 	leaf_return
   1217 
   1218 .Lle_xneg:
   1219 	/* Check if y <= x.  */
   1220 	bgeu	a2, a3, 4b
   1221 5:	movi	a2, 1
   1222 	leaf_return
   1223 
   1224 .Lle_diff_signs:
   1225 	bltz	a2, 4b
   1226 
   1227 	/* Check if both x and y are zero.  */
   1228 	or	a7, a2, a3
   1229 	slli	a7, a7, 1
   1230 	movi	a2, 1
   1231 	movi	a3, 0
   1232 	moveqz	a2, a3, a7
   1233 	leaf_return
   1234 
   1235 
   1236 	/* Greater Than or Equal */
   1237 
   1238 	.align	4
   1239 	.global	__gesf2
   1240 	.type	__gesf2, @function
   1241 __gesf2:
   1242 	leaf_entry sp, 16
   1243 	movi	a6, 0x7f800000
   1244 	ball	a2, a6, 2f
   1245 1:	bnall	a3, a6, .Llt_cmp
   1246 
   1247 	/* Check if y is a NaN.  */
   1248 	slli	a7, a3, 9
   1249 	beqz	a7, .Llt_cmp
   1250 	movi	a2, -1
   1251 	leaf_return
   1252 
   1253 	/* Check if x is a NaN.  */
   1254 2:	slli	a7, a2, 9
   1255 	beqz	a7, 1b
   1256 	movi	a2, -1
   1257 	leaf_return
   1258 
   1259 
   1260 	/* Less Than */
   1261 
   1262 	.align	4
   1263 	.global	__ltsf2
   1264 	.type	__ltsf2, @function
   1265 __ltsf2:
   1266 	leaf_entry sp, 16
   1267 	movi	a6, 0x7f800000
   1268 	ball	a2, a6, 2f
   1269 1:	bnall	a3, a6, .Llt_cmp
   1270 
   1271 	/* Check if y is a NaN.  */
   1272 	slli	a7, a3, 9
   1273 	beqz	a7, .Llt_cmp
   1274 	movi	a2, 0
   1275 	leaf_return
   1276 
   1277 	/* Check if x is a NaN.  */
   1278 2:	slli	a7, a2, 9
   1279 	beqz	a7, 1b
   1280 	movi	a2, 0
   1281 	leaf_return
   1282 
   1283 .Llt_cmp:
   1284 	/* Check if x and y have different signs.  */
   1285 	xor	a7, a2, a3
   1286 	bltz	a7, .Llt_diff_signs
   1287 
   1288 	/* Check if x is negative.  */
   1289 	bltz	a2, .Llt_xneg
   1290 
   1291 	/* Check if x < y.  */
   1292 	bgeu	a2, a3, 5f
   1293 4:	movi	a2, -1
   1294 	leaf_return
   1295 
   1296 .Llt_xneg:
   1297 	/* Check if y < x.  */
   1298 	bltu	a3, a2, 4b
   1299 5:	movi	a2, 0
   1300 	leaf_return
   1301 
   1302 .Llt_diff_signs:
   1303 	bgez	a2, 5b
   1304 
   1305 	/* Check if both x and y are nonzero.  */
   1306 	or	a7, a2, a3
   1307 	slli	a7, a7, 1
   1308 	movi	a2, 0
   1309 	movi	a3, -1
   1310 	movnez	a2, a3, a7
   1311 	leaf_return
   1312 
   1313 
   1314 	/* Unordered */
   1315 
   1316 	.align	4
   1317 	.global	__unordsf2
   1318 	.type	__unordsf2, @function
   1319 __unordsf2:
   1320 	leaf_entry sp, 16
   1321 	movi	a6, 0x7f800000
   1322 	ball	a2, a6, 3f
   1323 1:	ball	a3, a6, 4f
   1324 2:	movi	a2, 0
   1325 	leaf_return
   1326 
   1327 3:	slli	a7, a2, 9
   1328 	beqz	a7, 1b
   1329 	movi	a2, 1
   1330 	leaf_return
   1331 
   1332 4:	slli	a7, a3, 9
   1333 	beqz	a7, 2b
   1334 	movi	a2, 1
   1335 	leaf_return
   1336 
   1337 #endif /* L_cmpsf2 */
   1338 
   1339 #ifdef L_fixsfsi
   1340 
   1341 	.align	4
   1342 	.global	__fixsfsi
   1343 	.type	__fixsfsi, @function
   1344 __fixsfsi:
   1345 	leaf_entry sp, 16
   1346 
   1347 	/* Check for NaN and Infinity.  */
   1348 	movi	a6, 0x7f800000
   1349 	ball	a2, a6, .Lfixsfsi_nan_or_inf
   1350 
   1351 	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
   1352 	extui	a4, a2, 23, 8
   1353 	addi	a4, a4, -0x7e
   1354 	bgei	a4, 32, .Lfixsfsi_maxint
   1355 	blti	a4, 1, .Lfixsfsi_zero
   1356 
   1357 	/* Add explicit "1.0" and shift << 8.  */
   1358 	or	a7, a2, a6
   1359 	slli	a5, a7, 8
   1360 
   1361 	/* Shift back to the right, based on the exponent.  */
   1362 	ssl	a4		/* shift by 32 - a4 */
   1363 	srl	a5, a5
   1364 
   1365 	/* Negate the result if sign != 0.  */
   1366 	neg	a2, a5
   1367 	movgez	a2, a5, a7
   1368 	leaf_return
   1369 
   1370 .Lfixsfsi_nan_or_inf:
   1371 	/* Handle Infinity and NaN.  */
   1372 	slli	a4, a2, 9
   1373 	beqz	a4, .Lfixsfsi_maxint
   1374 
   1375 	/* Translate NaN to +maxint.  */
   1376 	movi	a2, 0
   1377 
   1378 .Lfixsfsi_maxint:
   1379 	slli	a4, a6, 8	/* 0x80000000 */
   1380 	addi	a5, a4, -1	/* 0x7fffffff */
   1381 	movgez	a4, a5, a2
   1382 	mov	a2, a4
   1383 	leaf_return
   1384 
   1385 .Lfixsfsi_zero:
   1386 	movi	a2, 0
   1387 	leaf_return
   1388 
   1389 #endif /* L_fixsfsi */
   1390 
   1391 #ifdef L_fixsfdi
   1392 
   1393 	.align	4
   1394 	.global	__fixsfdi
   1395 	.type	__fixsfdi, @function
   1396 __fixsfdi:
   1397 	leaf_entry sp, 16
   1398 
   1399 	/* Check for NaN and Infinity.  */
   1400 	movi	a6, 0x7f800000
   1401 	ball	a2, a6, .Lfixsfdi_nan_or_inf
   1402 
   1403 	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
   1404 	extui	a4, a2, 23, 8
   1405 	addi	a4, a4, -0x7e
   1406 	bgei	a4, 64, .Lfixsfdi_maxint
   1407 	blti	a4, 1, .Lfixsfdi_zero
   1408 
   1409 	/* Add explicit "1.0" and shift << 8.  */
   1410 	or	a7, a2, a6
   1411 	slli	xh, a7, 8
   1412 
   1413 	/* Shift back to the right, based on the exponent.  */
   1414 	ssl	a4		/* shift by 64 - a4 */
   1415 	bgei	a4, 32, .Lfixsfdi_smallshift
   1416 	srl	xl, xh
   1417 	movi	xh, 0
   1418 
   1419 .Lfixsfdi_shifted:
   1420 	/* Negate the result if sign != 0.  */
   1421 	bgez	a7, 1f
   1422 	neg	xl, xl
   1423 	neg	xh, xh
   1424 	beqz	xl, 1f
   1425 	addi	xh, xh, -1
   1426 1:	leaf_return
   1427 
   1428 .Lfixsfdi_smallshift:
   1429 	movi	xl, 0
   1430 	sll	xl, xh
   1431 	srl	xh, xh
   1432 	j	.Lfixsfdi_shifted
   1433 
   1434 .Lfixsfdi_nan_or_inf:
   1435 	/* Handle Infinity and NaN.  */
   1436 	slli	a4, a2, 9
   1437 	beqz	a4, .Lfixsfdi_maxint
   1438 
   1439 	/* Translate NaN to +maxint.  */
   1440 	movi	a2, 0
   1441 
   1442 .Lfixsfdi_maxint:
   1443 	slli	a7, a6, 8	/* 0x80000000 */
   1444 	bgez	a2, 1f
   1445 	mov	xh, a7
   1446 	movi	xl, 0
   1447 	leaf_return
   1448 
   1449 1:	addi	xh, a7, -1	/* 0x7fffffff */
   1450 	movi	xl, -1
   1451 	leaf_return
   1452 
   1453 .Lfixsfdi_zero:
   1454 	movi	xh, 0
   1455 	movi	xl, 0
   1456 	leaf_return
   1457 
   1458 #endif /* L_fixsfdi */
   1459 
   1460 #ifdef L_fixunssfsi
   1461 
   1462 	.align	4
   1463 	.global	__fixunssfsi
   1464 	.type	__fixunssfsi, @function
   1465 __fixunssfsi:
   1466 	leaf_entry sp, 16
   1467 
   1468 	/* Check for NaN and Infinity.  */
   1469 	movi	a6, 0x7f800000
   1470 	ball	a2, a6, .Lfixunssfsi_nan_or_inf
   1471 
   1472 	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
   1473 	extui	a4, a2, 23, 8
   1474 	addi	a4, a4, -0x7f
   1475 	bgei	a4, 32, .Lfixunssfsi_maxint
   1476 	bltz	a4, .Lfixunssfsi_zero
   1477 
   1478 	/* Add explicit "1.0" and shift << 8.  */
   1479 	or	a7, a2, a6
   1480 	slli	a5, a7, 8
   1481 
   1482 	/* Shift back to the right, based on the exponent.  */
   1483 	addi	a4, a4, 1
   1484 	beqi	a4, 32, .Lfixunssfsi_bigexp
   1485 	ssl	a4		/* shift by 32 - a4 */
   1486 	srl	a5, a5
   1487 
   1488 	/* Negate the result if sign != 0.  */
   1489 	neg	a2, a5
   1490 	movgez	a2, a5, a7
   1491 	leaf_return
   1492 
   1493 .Lfixunssfsi_nan_or_inf:
   1494 	/* Handle Infinity and NaN.  */
   1495 	slli	a4, a2, 9
   1496 	beqz	a4, .Lfixunssfsi_maxint
   1497 
   1498 	/* Translate NaN to 0xffffffff.  */
   1499 	movi	a2, -1
   1500 	leaf_return
   1501 
   1502 .Lfixunssfsi_maxint:
   1503 	slli	a4, a6, 8	/* 0x80000000 */
   1504 	movi	a5, -1		/* 0xffffffff */
   1505 	movgez	a4, a5, a2
   1506 	mov	a2, a4
   1507 	leaf_return
   1508 
   1509 .Lfixunssfsi_zero:
   1510 	movi	a2, 0
   1511 	leaf_return
   1512 
   1513 .Lfixunssfsi_bigexp:
   1514 	/* Handle unsigned maximum exponent case.  */
   1515 	bltz	a2, 1f
   1516 	mov	a2, a5		/* no shift needed */
   1517 	leaf_return
   1518 
   1519 	/* Return 0x80000000 if negative.  */
   1520 1:	slli	a2, a6, 8
   1521 	leaf_return
   1522 
   1523 #endif /* L_fixunssfsi */
   1524 
   1525 #ifdef L_fixunssfdi
   1526 
   1527 	.align	4
   1528 	.global	__fixunssfdi
   1529 	.type	__fixunssfdi, @function
   1530 __fixunssfdi:
   1531 	leaf_entry sp, 16
   1532 
   1533 	/* Check for NaN and Infinity.  */
   1534 	movi	a6, 0x7f800000
   1535 	ball	a2, a6, .Lfixunssfdi_nan_or_inf
   1536 
   1537 	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
   1538 	extui	a4, a2, 23, 8
   1539 	addi	a4, a4, -0x7f
   1540 	bgei	a4, 64, .Lfixunssfdi_maxint
   1541 	bltz	a4, .Lfixunssfdi_zero
   1542 
   1543 	/* Add explicit "1.0" and shift << 8.  */
   1544 	or	a7, a2, a6
   1545 	slli	xh, a7, 8
   1546 
   1547 	/* Shift back to the right, based on the exponent.  */
   1548 	addi	a4, a4, 1
   1549 	beqi	a4, 64, .Lfixunssfdi_bigexp
   1550 	ssl	a4		/* shift by 64 - a4 */
   1551 	bgei	a4, 32, .Lfixunssfdi_smallshift
   1552 	srl	xl, xh
   1553 	movi	xh, 0
   1554 
   1555 .Lfixunssfdi_shifted:
   1556 	/* Negate the result if sign != 0.  */
   1557 	bgez	a7, 1f
   1558 	neg	xl, xl
   1559 	neg	xh, xh
   1560 	beqz	xl, 1f
   1561 	addi	xh, xh, -1
   1562 1:	leaf_return
   1563 
   1564 .Lfixunssfdi_smallshift:
   1565 	movi	xl, 0
   1566 	src	xl, xh, xl
   1567 	srl	xh, xh
   1568 	j	.Lfixunssfdi_shifted
   1569 
   1570 .Lfixunssfdi_nan_or_inf:
   1571 	/* Handle Infinity and NaN.  */
   1572 	slli	a4, a2, 9
   1573 	beqz	a4, .Lfixunssfdi_maxint
   1574 
   1575 	/* Translate NaN to 0xffffffff.... */
   1576 1:	movi	xh, -1
   1577 	movi	xl, -1
   1578 	leaf_return
   1579 
   1580 .Lfixunssfdi_maxint:
   1581 	bgez	a2, 1b
   1582 2:	slli	xh, a6, 8	/* 0x80000000 */
   1583 	movi	xl, 0
   1584 	leaf_return
   1585 
   1586 .Lfixunssfdi_zero:
   1587 	movi	xh, 0
   1588 	movi	xl, 0
   1589 	leaf_return
   1590 
   1591 .Lfixunssfdi_bigexp:
   1592 	/* Handle unsigned maximum exponent case.  */
   1593 	bltz	a7, 2b
   1594 	movi	xl, 0
   1595 	leaf_return		/* no shift needed */
   1596 
   1597 #endif /* L_fixunssfdi */
   1598 
   1599 #ifdef L_floatsisf
   1600 
   1601 	.align	4
   1602 	.global	__floatunsisf
   1603 	.type	__floatunsisf, @function
   1604 __floatunsisf:
   1605 	leaf_entry sp, 16
   1606 	beqz	a2, .Lfloatsisf_return
   1607 
   1608 	/* Set the sign to zero and jump to the floatsisf code.  */
   1609 	movi	a7, 0
   1610 	j	.Lfloatsisf_normalize
   1611 
   1612 	.align	4
   1613 	.global	__floatsisf
   1614 	.type	__floatsisf, @function
   1615 __floatsisf:
   1616 	leaf_entry sp, 16
   1617 
   1618 	/* Check for zero.  */
   1619 	beqz	a2, .Lfloatsisf_return
   1620 
   1621 	/* Save the sign.  */
   1622 	extui	a7, a2, 31, 1
   1623 
   1624 	/* Get the absolute value.  */
   1625 #if XCHAL_HAVE_ABS
   1626 	abs	a2, a2
   1627 #else
   1628 	neg	a4, a2
   1629 	movltz	a2, a4, a2
   1630 #endif
   1631 
   1632 .Lfloatsisf_normalize:
   1633 	/* Normalize with the first 1 bit in the msb.  */
   1634 	do_nsau	a4, a2, a5, a6
   1635 	ssl	a4
   1636 	sll	a5, a2
   1637 
   1638 	/* Shift the mantissa into position, with rounding bits in a6.  */
   1639 	srli	a2, a5, 8
   1640 	slli	a6, a5, (32 - 8)
   1641 
   1642 	/* Set the exponent.  */
   1643 	movi	a5, 0x9d	/* 0x7e + 31 */
   1644 	sub	a5, a5, a4
   1645 	slli	a5, a5, 23
   1646 	add	a2, a2, a5
   1647 
   1648 	/* Add the sign.  */
   1649 	slli	a7, a7, 31
   1650 	or	a2, a2, a7
   1651 
   1652 	/* Round up if the leftover fraction is >= 1/2.  */
   1653 	bgez	a6, .Lfloatsisf_return
   1654 	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
   1655 
   1656 	/* Check if the leftover fraction is exactly 1/2.  */
   1657 	slli	a6, a6, 1
   1658 	beqz	a6, .Lfloatsisf_exactlyhalf
   1659 
   1660 .Lfloatsisf_return:
   1661 	leaf_return
   1662 
   1663 .Lfloatsisf_exactlyhalf:
   1664 	/* Round down to the nearest even value.  */
   1665 	srli	a2, a2, 1
   1666 	slli	a2, a2, 1
   1667 	leaf_return
   1668 
   1669 #endif /* L_floatsisf */
   1670 
   1671 #ifdef L_floatdisf
   1672 
   1673 	.align	4
   1674 	.global	__floatundisf
   1675 	.type	__floatundisf, @function
   1676 __floatundisf:
   1677 	leaf_entry sp, 16
   1678 
   1679 	/* Check for zero.  */
   1680 	or	a4, xh, xl
   1681 	beqz	a4, 2f
   1682 
   1683 	/* Set the sign to zero and jump to the floatdisf code.  */
   1684 	movi	a7, 0
   1685 	j	.Lfloatdisf_normalize
   1686 
   1687 	.align	4
   1688 	.global	__floatdisf
   1689 	.type	__floatdisf, @function
   1690 __floatdisf:
   1691 	leaf_entry sp, 16
   1692 
   1693 	/* Check for zero.  */
   1694 	or	a4, xh, xl
   1695 	beqz	a4, 2f
   1696 
   1697 	/* Save the sign.  */
   1698 	extui	a7, xh, 31, 1
   1699 
   1700 	/* Get the absolute value.  */
   1701 	bgez	xh, .Lfloatdisf_normalize
   1702 	neg	xl, xl
   1703 	neg	xh, xh
   1704 	beqz	xl, .Lfloatdisf_normalize
   1705 	addi	xh, xh, -1
   1706 
   1707 .Lfloatdisf_normalize:
   1708 	/* Normalize with the first 1 bit in the msb of xh.  */
   1709 	beqz	xh, .Lfloatdisf_bigshift
   1710 	do_nsau	a4, xh, a5, a6
   1711 	ssl	a4
   1712 	src	xh, xh, xl
   1713 	sll	xl, xl
   1714 
   1715 .Lfloatdisf_shifted:
   1716 	/* Shift the mantissa into position, with rounding bits in a6.  */
   1717 	ssai	8
   1718 	sll	a5, xl
   1719 	src	a6, xh, xl
   1720 	srl	xh, xh
   1721 	beqz	a5, 1f
   1722 	movi	a5, 1
   1723 	or	a6, a6, a5
   1724 1:
   1725 	/* Set the exponent.  */
   1726 	movi	a5, 0xbd	/* 0x7e + 63 */
   1727 	sub	a5, a5, a4
   1728 	slli	a5, a5, 23
   1729 	add	a2, xh, a5
   1730 
   1731 	/* Add the sign.  */
   1732 	slli	a7, a7, 31
   1733 	or	a2, a2, a7
   1734 
   1735 	/* Round up if the leftover fraction is >= 1/2.  */
   1736 	bgez	a6, 2f
   1737 	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
   1738 
   1739 	/* Check if the leftover fraction is exactly 1/2.  */
   1740 	slli	a6, a6, 1
   1741 	beqz	a6, .Lfloatdisf_exactlyhalf
   1742 2:	leaf_return
   1743 
   1744 .Lfloatdisf_bigshift:
   1745 	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
   1746 	do_nsau	a4, xl, a5, a6
   1747 	ssl	a4
   1748 	sll	xh, xl
   1749 	movi	xl, 0
   1750 	addi	a4, a4, 32
   1751 	j	.Lfloatdisf_shifted
   1752 
   1753 .Lfloatdisf_exactlyhalf:
   1754 	/* Round down to the nearest even value.  */
   1755 	srli	a2, a2, 1
   1756 	slli	a2, a2, 1
   1757 	leaf_return
   1758 
   1759 #endif /* L_floatdisf */
   1760