Home | History | Annotate | Line # | Download | only in xtensa
      1       1.1  mrg /* IEEE-754 single-precision functions for Xtensa
      2  1.1.1.13  mrg    Copyright (C) 2006-2024 Free Software Foundation, Inc.
      3       1.1  mrg    Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
      4       1.1  mrg 
      5       1.1  mrg    This file is part of GCC.
      6       1.1  mrg 
      7       1.1  mrg    GCC is free software; you can redistribute it and/or modify it
      8       1.1  mrg    under the terms of the GNU General Public License as published by
      9       1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     10       1.1  mrg    any later version.
     11       1.1  mrg 
     12       1.1  mrg    GCC is distributed in the hope that it will be useful, but WITHOUT
     13       1.1  mrg    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14       1.1  mrg    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     15       1.1  mrg    License for more details.
     16       1.1  mrg 
     17       1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     18       1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     19       1.1  mrg    3.1, as published by the Free Software Foundation.
     20       1.1  mrg 
     21       1.1  mrg    You should have received a copy of the GNU General Public License and
     22       1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23       1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24       1.1  mrg    <http://www.gnu.org/licenses/>.  */
     25       1.1  mrg 
     26       1.1  mrg #ifdef __XTENSA_EB__
     27       1.1  mrg #define xh a2
     28       1.1  mrg #define xl a3
     29       1.1  mrg #define yh a4
     30       1.1  mrg #define yl a5
     31       1.1  mrg #else
     32       1.1  mrg #define xh a3
     33       1.1  mrg #define xl a2
     34       1.1  mrg #define yh a5
     35       1.1  mrg #define yl a4
     36       1.1  mrg #endif
     37       1.1  mrg 
     38       1.1  mrg /*  Warning!  The branch displacements for some Xtensa branch instructions
     39       1.1  mrg     are quite small, and this code has been carefully laid out to keep
     40       1.1  mrg     branch targets in range.  If you change anything, be sure to check that
     41       1.1  mrg     the assembler is not relaxing anything to branch over a jump.  */
     42       1.1  mrg 
     43       1.1  mrg #ifdef L_negsf2
     44       1.1  mrg 
     45       1.1  mrg 	.align	4
     46       1.1  mrg 	.global	__negsf2
     47       1.1  mrg 	.type	__negsf2, @function
     48       1.1  mrg __negsf2:
     49       1.1  mrg 	leaf_entry sp, 16
     50       1.1  mrg 	movi	a4, 0x80000000
     51       1.1  mrg 	xor	a2, a2, a4
     52       1.1  mrg 	leaf_return
     53       1.1  mrg 
     54       1.1  mrg #endif /* L_negsf2 */
     55       1.1  mrg 
     56       1.1  mrg #ifdef L_addsubsf3
     57       1.1  mrg 
     58   1.1.1.5  mrg 	.literal_position
     59       1.1  mrg 	/* Addition */
     60       1.1  mrg __addsf3_aux:
     61       1.1  mrg 
     62       1.1  mrg 	/* Handle NaNs and Infinities.  (This code is placed before the
     63       1.1  mrg 	   start of the function just to keep it in range of the limited
     64       1.1  mrg 	   branch displacements.)  */
     65       1.1  mrg 
     66       1.1  mrg .Ladd_xnan_or_inf:
     67       1.1  mrg 	/* If y is neither Infinity nor NaN, return x.  */
     68   1.1.1.5  mrg 	bnall	a3, a6, .Ladd_return_nan_or_inf
     69       1.1  mrg 	/* If x is a NaN, return it.  Otherwise, return y.  */
     70       1.1  mrg 	slli	a7, a2, 9
     71   1.1.1.5  mrg 	bnez	a7, .Ladd_return_nan
     72       1.1  mrg 
     73       1.1  mrg .Ladd_ynan_or_inf:
     74       1.1  mrg 	/* Return y.  */
     75       1.1  mrg 	mov	a2, a3
     76   1.1.1.5  mrg 
     77   1.1.1.5  mrg .Ladd_return_nan_or_inf:
     78   1.1.1.5  mrg 	slli	a7, a2, 9
     79   1.1.1.5  mrg 	bnez	a7, .Ladd_return_nan
     80   1.1.1.5  mrg 	leaf_return
     81   1.1.1.5  mrg 
     82   1.1.1.5  mrg .Ladd_return_nan:
     83   1.1.1.5  mrg 	movi	a6, 0x400000	/* make it a quiet NaN */
     84   1.1.1.5  mrg 	or	a2, a2, a6
     85       1.1  mrg 	leaf_return
     86       1.1  mrg 
     87       1.1  mrg .Ladd_opposite_signs:
     88       1.1  mrg 	/* Operand signs differ.  Do a subtraction.  */
     89       1.1  mrg 	slli	a7, a6, 8
     90       1.1  mrg 	xor	a3, a3, a7
     91       1.1  mrg 	j	.Lsub_same_sign
     92       1.1  mrg 
     93       1.1  mrg 	.align	4
     94       1.1  mrg 	.global	__addsf3
     95       1.1  mrg 	.type	__addsf3, @function
     96       1.1  mrg __addsf3:
     97       1.1  mrg 	leaf_entry sp, 16
     98       1.1  mrg 	movi	a6, 0x7f800000
     99       1.1  mrg 
    100       1.1  mrg 	/* Check if the two operands have the same sign.  */
    101       1.1  mrg 	xor	a7, a2, a3
    102       1.1  mrg 	bltz	a7, .Ladd_opposite_signs
    103       1.1  mrg 
    104       1.1  mrg .Ladd_same_sign:
    105       1.1  mrg 	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
    106       1.1  mrg 	ball	a2, a6, .Ladd_xnan_or_inf
    107       1.1  mrg 	ball	a3, a6, .Ladd_ynan_or_inf
    108       1.1  mrg 
    109       1.1  mrg 	/* Compare the exponents.  The smaller operand will be shifted
    110       1.1  mrg 	   right by the exponent difference and added to the larger
    111       1.1  mrg 	   one.  */
    112       1.1  mrg 	extui	a7, a2, 23, 9
    113       1.1  mrg 	extui	a8, a3, 23, 9
    114       1.1  mrg 	bltu	a7, a8, .Ladd_shiftx
    115       1.1  mrg 
    116       1.1  mrg .Ladd_shifty:
    117       1.1  mrg 	/* Check if the smaller (or equal) exponent is zero.  */
    118       1.1  mrg 	bnone	a3, a6, .Ladd_yexpzero
    119       1.1  mrg 
    120       1.1  mrg 	/* Replace y sign/exponent with 0x008.  */
    121       1.1  mrg 	or	a3, a3, a6
    122       1.1  mrg 	slli	a3, a3, 8
    123       1.1  mrg 	srli	a3, a3, 8
    124       1.1  mrg 
    125       1.1  mrg .Ladd_yexpdiff:
    126       1.1  mrg 	/* Compute the exponent difference.  */
    127       1.1  mrg 	sub	a10, a7, a8
    128       1.1  mrg 
    129       1.1  mrg 	/* Exponent difference > 32 -- just return the bigger value.  */
    130       1.1  mrg 	bgeui	a10, 32, 1f
    131       1.1  mrg 
    132       1.1  mrg 	/* Shift y right by the exponent difference.  Any bits that are
    133       1.1  mrg 	   shifted out of y are saved in a9 for rounding the result.  */
    134       1.1  mrg 	ssr	a10
    135       1.1  mrg 	movi	a9, 0
    136       1.1  mrg 	src	a9, a3, a9
    137       1.1  mrg 	srl	a3, a3
    138       1.1  mrg 
    139       1.1  mrg 	/* Do the addition.  */
    140       1.1  mrg 	add	a2, a2, a3
    141       1.1  mrg 
    142       1.1  mrg 	/* Check if the add overflowed into the exponent.  */
    143       1.1  mrg 	extui	a10, a2, 23, 9
    144       1.1  mrg 	beq	a10, a7, .Ladd_round
    145       1.1  mrg 	mov	a8, a7
    146       1.1  mrg 	j	.Ladd_carry
    147       1.1  mrg 
    148       1.1  mrg .Ladd_yexpzero:
    149       1.1  mrg 	/* y is a subnormal value.  Replace its sign/exponent with zero,
    150       1.1  mrg 	   i.e., no implicit "1.0", and increment the apparent exponent
    151       1.1  mrg 	   because subnormals behave as if they had the minimum (nonzero)
    152       1.1  mrg 	   exponent.  Test for the case when both exponents are zero.  */
    153       1.1  mrg 	slli	a3, a3, 9
    154       1.1  mrg 	srli	a3, a3, 9
    155       1.1  mrg 	bnone	a2, a6, .Ladd_bothexpzero
    156       1.1  mrg 	addi	a8, a8, 1
    157       1.1  mrg 	j	.Ladd_yexpdiff
    158       1.1  mrg 
    159       1.1  mrg .Ladd_bothexpzero:
    160       1.1  mrg 	/* Both exponents are zero.  Handle this as a special case.  There
    161       1.1  mrg 	   is no need to shift or round, and the normal code for handling
    162       1.1  mrg 	   a carry into the exponent field will not work because it
    163       1.1  mrg 	   assumes there is an implicit "1.0" that needs to be added.  */
    164       1.1  mrg 	add	a2, a2, a3
    165       1.1  mrg 1:	leaf_return
    166       1.1  mrg 
    167       1.1  mrg .Ladd_xexpzero:
    168       1.1  mrg 	/* Same as "yexpzero" except skip handling the case when both
    169       1.1  mrg 	   exponents are zero.  */
    170       1.1  mrg 	slli	a2, a2, 9
    171       1.1  mrg 	srli	a2, a2, 9
    172       1.1  mrg 	addi	a7, a7, 1
    173       1.1  mrg 	j	.Ladd_xexpdiff
    174       1.1  mrg 
    175       1.1  mrg .Ladd_shiftx:
    176       1.1  mrg 	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
    177       1.1  mrg 	   because the exponent difference is always nonzero in this version,
    178       1.1  mrg 	   the shift sequence can use SLL and skip loading a constant zero.  */
    179       1.1  mrg 	bnone	a2, a6, .Ladd_xexpzero
    180       1.1  mrg 
    181       1.1  mrg 	or	a2, a2, a6
    182       1.1  mrg 	slli	a2, a2, 8
    183       1.1  mrg 	srli	a2, a2, 8
    184       1.1  mrg 
    185       1.1  mrg .Ladd_xexpdiff:
    186       1.1  mrg 	sub	a10, a8, a7
    187       1.1  mrg 	bgeui	a10, 32, .Ladd_returny
    188       1.1  mrg 
    189       1.1  mrg 	ssr	a10
    190       1.1  mrg 	sll	a9, a2
    191       1.1  mrg 	srl	a2, a2
    192       1.1  mrg 
    193       1.1  mrg 	add	a2, a2, a3
    194       1.1  mrg 
    195       1.1  mrg 	/* Check if the add overflowed into the exponent.  */
    196       1.1  mrg 	extui	a10, a2, 23, 9
    197       1.1  mrg 	bne	a10, a8, .Ladd_carry
    198       1.1  mrg 
    199       1.1  mrg .Ladd_round:
    200       1.1  mrg 	/* Round up if the leftover fraction is >= 1/2.  */
    201       1.1  mrg 	bgez	a9, 1f
    202       1.1  mrg 	addi	a2, a2, 1
    203       1.1  mrg 
    204       1.1  mrg 	/* Check if the leftover fraction is exactly 1/2.  */
    205       1.1  mrg 	slli	a9, a9, 1
    206       1.1  mrg 	beqz	a9, .Ladd_exactlyhalf
    207       1.1  mrg 1:	leaf_return
    208       1.1  mrg 
    209       1.1  mrg .Ladd_returny:
    210       1.1  mrg 	mov	a2, a3
    211       1.1  mrg 	leaf_return
    212       1.1  mrg 
    213       1.1  mrg .Ladd_carry:
    214       1.1  mrg 	/* The addition has overflowed into the exponent field, so the
    215       1.1  mrg 	   value needs to be renormalized.  The mantissa of the result
    216       1.1  mrg 	   can be recovered by subtracting the original exponent and
    217       1.1  mrg 	   adding 0x800000 (which is the explicit "1.0" for the
    218       1.1  mrg 	   mantissa of the non-shifted operand -- the "1.0" for the
    219       1.1  mrg 	   shifted operand was already added).  The mantissa can then
    220       1.1  mrg 	   be shifted right by one bit.  The explicit "1.0" of the
    221       1.1  mrg 	   shifted mantissa then needs to be replaced by the exponent,
    222       1.1  mrg 	   incremented by one to account for the normalizing shift.
    223       1.1  mrg 	   It is faster to combine these operations: do the shift first
    224       1.1  mrg 	   and combine the additions and subtractions.  If x is the
    225       1.1  mrg 	   original exponent, the result is:
    226       1.1  mrg 	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
    227       1.1  mrg 	   or:
    228       1.1  mrg 	       shifted mantissa + ((x + 1) << 22)
    229       1.1  mrg 	   Note that the exponent is incremented here by leaving the
    230       1.1  mrg 	   explicit "1.0" of the mantissa in the exponent field.  */
    231       1.1  mrg 
    232       1.1  mrg 	/* Shift x right by one bit.  Save the lsb.  */
    233       1.1  mrg 	mov	a10, a2
    234       1.1  mrg 	srli	a2, a2, 1
    235       1.1  mrg 
    236       1.1  mrg 	/* See explanation above.  The original exponent is in a8.  */
    237       1.1  mrg 	addi	a8, a8, 1
    238       1.1  mrg 	slli	a8, a8, 22
    239       1.1  mrg 	add	a2, a2, a8
    240       1.1  mrg 
    241       1.1  mrg 	/* Return an Infinity if the exponent overflowed.  */
    242       1.1  mrg 	ball	a2, a6, .Ladd_infinity
    243       1.1  mrg 
    244       1.1  mrg 	/* Same thing as the "round" code except the msb of the leftover
    245       1.1  mrg 	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
    246       1.1  mrg 	bbci.l	a10, 0, 1f
    247       1.1  mrg 	addi	a2, a2, 1
    248       1.1  mrg 	beqz	a9, .Ladd_exactlyhalf
    249       1.1  mrg 1:	leaf_return
    250       1.1  mrg 
    251       1.1  mrg .Ladd_infinity:
    252       1.1  mrg 	/* Clear the mantissa.  */
    253       1.1  mrg 	srli	a2, a2, 23
    254       1.1  mrg 	slli	a2, a2, 23
    255       1.1  mrg 
    256       1.1  mrg 	/* The sign bit may have been lost in a carry-out.  Put it back.  */
    257       1.1  mrg 	slli	a8, a8, 1
    258       1.1  mrg 	or	a2, a2, a8
    259       1.1  mrg 	leaf_return
    260       1.1  mrg 
    261       1.1  mrg .Ladd_exactlyhalf:
    262       1.1  mrg 	/* Round down to the nearest even value.  */
    263       1.1  mrg 	srli	a2, a2, 1
    264       1.1  mrg 	slli	a2, a2, 1
    265       1.1  mrg 	leaf_return
    266       1.1  mrg 
    267       1.1  mrg 
    268       1.1  mrg 	/* Subtraction */
    269       1.1  mrg __subsf3_aux:
    270       1.1  mrg 
    271       1.1  mrg 	/* Handle NaNs and Infinities.  (This code is placed before the
    272       1.1  mrg 	   start of the function just to keep it in range of the limited
    273       1.1  mrg 	   branch displacements.)  */
    274       1.1  mrg 
    275       1.1  mrg .Lsub_xnan_or_inf:
    276       1.1  mrg 	/* If y is neither Infinity nor NaN, return x.  */
    277   1.1.1.5  mrg 	bnall	a3, a6, .Lsub_return_nan_or_inf
    278       1.1  mrg 	/* Both x and y are either NaN or Inf, so the result is NaN.  */
    279   1.1.1.5  mrg 
    280   1.1.1.5  mrg .Lsub_return_nan:
    281       1.1  mrg 	movi	a4, 0x400000	/* make it a quiet NaN */
    282       1.1  mrg 	or	a2, a2, a4
    283   1.1.1.5  mrg 	leaf_return
    284       1.1  mrg 
    285       1.1  mrg .Lsub_ynan_or_inf:
    286       1.1  mrg 	/* Negate y and return it.  */
    287       1.1  mrg 	slli	a7, a6, 8
    288       1.1  mrg 	xor	a2, a3, a7
    289   1.1.1.5  mrg 
    290   1.1.1.5  mrg .Lsub_return_nan_or_inf:
    291   1.1.1.5  mrg 	slli	a7, a2, 9
    292   1.1.1.5  mrg 	bnez	a7, .Lsub_return_nan
    293       1.1  mrg 	leaf_return
    294       1.1  mrg 
    295       1.1  mrg .Lsub_opposite_signs:
    296       1.1  mrg 	/* Operand signs differ.  Do an addition.  */
    297       1.1  mrg 	slli	a7, a6, 8
    298       1.1  mrg 	xor	a3, a3, a7
    299       1.1  mrg 	j	.Ladd_same_sign
    300       1.1  mrg 
    301       1.1  mrg 	.align	4
    302       1.1  mrg 	.global	__subsf3
    303       1.1  mrg 	.type	__subsf3, @function
    304       1.1  mrg __subsf3:
    305       1.1  mrg 	leaf_entry sp, 16
    306       1.1  mrg 	movi	a6, 0x7f800000
    307       1.1  mrg 
    308       1.1  mrg 	/* Check if the two operands have the same sign.  */
    309       1.1  mrg 	xor	a7, a2, a3
    310       1.1  mrg 	bltz	a7, .Lsub_opposite_signs
    311       1.1  mrg 
    312       1.1  mrg .Lsub_same_sign:
    313       1.1  mrg 	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
    314       1.1  mrg 	ball	a2, a6, .Lsub_xnan_or_inf
    315       1.1  mrg 	ball	a3, a6, .Lsub_ynan_or_inf
    316       1.1  mrg 
    317       1.1  mrg 	/* Compare the operands.  In contrast to addition, the entire
    318       1.1  mrg 	   value matters here.  */
    319       1.1  mrg 	extui	a7, a2, 23, 8
    320       1.1  mrg 	extui	a8, a3, 23, 8
    321       1.1  mrg 	bltu	a2, a3, .Lsub_xsmaller
    322       1.1  mrg 
    323       1.1  mrg .Lsub_ysmaller:
    324       1.1  mrg 	/* Check if the smaller (or equal) exponent is zero.  */
    325       1.1  mrg 	bnone	a3, a6, .Lsub_yexpzero
    326       1.1  mrg 
    327       1.1  mrg 	/* Replace y sign/exponent with 0x008.  */
    328       1.1  mrg 	or	a3, a3, a6
    329       1.1  mrg 	slli	a3, a3, 8
    330       1.1  mrg 	srli	a3, a3, 8
    331       1.1  mrg 
    332       1.1  mrg .Lsub_yexpdiff:
    333       1.1  mrg 	/* Compute the exponent difference.  */
    334       1.1  mrg 	sub	a10, a7, a8
    335       1.1  mrg 
    336       1.1  mrg 	/* Exponent difference > 32 -- just return the bigger value.  */
    337       1.1  mrg 	bgeui	a10, 32, 1f
    338       1.1  mrg 
    339       1.1  mrg 	/* Shift y right by the exponent difference.  Any bits that are
    340       1.1  mrg 	   shifted out of y are saved in a9 for rounding the result.  */
    341       1.1  mrg 	ssr	a10
    342       1.1  mrg 	movi	a9, 0
    343       1.1  mrg 	src	a9, a3, a9
    344       1.1  mrg 	srl	a3, a3
    345       1.1  mrg 
    346       1.1  mrg 	sub	a2, a2, a3
    347       1.1  mrg 
    348       1.1  mrg 	/* Subtract the leftover bits in a9 from zero and propagate any
    349       1.1  mrg 	   borrow from a2.  */
    350       1.1  mrg 	neg	a9, a9
    351       1.1  mrg 	addi	a10, a2, -1
    352       1.1  mrg 	movnez	a2, a10, a9
    353       1.1  mrg 
    354       1.1  mrg 	/* Check if the subtract underflowed into the exponent.  */
    355       1.1  mrg 	extui	a10, a2, 23, 8
    356       1.1  mrg 	beq	a10, a7, .Lsub_round
    357       1.1  mrg 	j	.Lsub_borrow
    358       1.1  mrg 
    359       1.1  mrg .Lsub_yexpzero:
    360       1.1  mrg 	/* Return zero if the inputs are equal.  (For the non-subnormal
    361       1.1  mrg 	   case, subtracting the "1.0" will cause a borrow from the exponent
    362       1.1  mrg 	   and this case can be detected when handling the borrow.)  */
    363       1.1  mrg 	beq	a2, a3, .Lsub_return_zero
    364       1.1  mrg 
    365       1.1  mrg 	/* y is a subnormal value.  Replace its sign/exponent with zero,
    366       1.1  mrg 	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
    367       1.1  mrg 	   y's apparent exponent because subnormals behave as if they had
    368       1.1  mrg 	   the minimum (nonzero) exponent.  */
    369       1.1  mrg 	slli	a3, a3, 9
    370       1.1  mrg 	srli	a3, a3, 9
    371       1.1  mrg 	bnone	a2, a6, .Lsub_yexpdiff
    372       1.1  mrg 	addi	a8, a8, 1
    373       1.1  mrg 	j	.Lsub_yexpdiff
    374       1.1  mrg 
    375       1.1  mrg .Lsub_returny:
    376       1.1  mrg 	/* Negate and return y.  */
    377       1.1  mrg 	slli	a7, a6, 8
    378       1.1  mrg 	xor	a2, a3, a7
    379       1.1  mrg 1:	leaf_return
    380       1.1  mrg 
    381       1.1  mrg .Lsub_xsmaller:
    382       1.1  mrg 	/* Same thing as the "ysmaller" code, but with x and y swapped and
    383       1.1  mrg 	   with y negated.  */
    384       1.1  mrg 	bnone	a2, a6, .Lsub_xexpzero
    385       1.1  mrg 
    386       1.1  mrg 	or	a2, a2, a6
    387       1.1  mrg 	slli	a2, a2, 8
    388       1.1  mrg 	srli	a2, a2, 8
    389       1.1  mrg 
    390       1.1  mrg .Lsub_xexpdiff:
    391       1.1  mrg 	sub	a10, a8, a7
    392       1.1  mrg 	bgeui	a10, 32, .Lsub_returny
    393       1.1  mrg 
    394       1.1  mrg 	ssr	a10
    395       1.1  mrg 	movi	a9, 0
    396       1.1  mrg 	src	a9, a2, a9
    397       1.1  mrg 	srl	a2, a2
    398       1.1  mrg 
    399       1.1  mrg 	/* Negate y.  */
    400       1.1  mrg 	slli	a11, a6, 8
    401       1.1  mrg 	xor	a3, a3, a11
    402       1.1  mrg 
    403       1.1  mrg 	sub	a2, a3, a2
    404       1.1  mrg 
    405       1.1  mrg 	neg	a9, a9
    406       1.1  mrg 	addi	a10, a2, -1
    407       1.1  mrg 	movnez	a2, a10, a9
    408       1.1  mrg 
    409       1.1  mrg 	/* Check if the subtract underflowed into the exponent.  */
    410       1.1  mrg 	extui	a10, a2, 23, 8
    411       1.1  mrg 	bne	a10, a8, .Lsub_borrow
    412       1.1  mrg 
    413       1.1  mrg .Lsub_round:
    414       1.1  mrg 	/* Round up if the leftover fraction is >= 1/2.  */
    415       1.1  mrg 	bgez	a9, 1f
    416       1.1  mrg 	addi	a2, a2, 1
    417       1.1  mrg 
    418       1.1  mrg 	/* Check if the leftover fraction is exactly 1/2.  */
    419       1.1  mrg 	slli	a9, a9, 1
    420       1.1  mrg 	beqz	a9, .Lsub_exactlyhalf
    421       1.1  mrg 1:	leaf_return
    422       1.1  mrg 
    423       1.1  mrg .Lsub_xexpzero:
    424       1.1  mrg 	/* Same as "yexpzero".  */
    425       1.1  mrg 	beq	a2, a3, .Lsub_return_zero
    426       1.1  mrg 	slli	a2, a2, 9
    427       1.1  mrg 	srli	a2, a2, 9
    428       1.1  mrg 	bnone	a3, a6, .Lsub_xexpdiff
    429       1.1  mrg 	addi	a7, a7, 1
    430       1.1  mrg 	j	.Lsub_xexpdiff
    431       1.1  mrg 
    432       1.1  mrg .Lsub_return_zero:
    433       1.1  mrg 	movi	a2, 0
    434       1.1  mrg 	leaf_return
    435       1.1  mrg 
    436       1.1  mrg .Lsub_borrow:
    437       1.1  mrg 	/* The subtraction has underflowed into the exponent field, so the
    438       1.1  mrg 	   value needs to be renormalized.  Shift the mantissa left as
    439       1.1  mrg 	   needed to remove any leading zeros and adjust the exponent
    440       1.1  mrg 	   accordingly.  If the exponent is not large enough to remove
    441       1.1  mrg 	   all the leading zeros, the result will be a subnormal value.  */
    442       1.1  mrg 
    443       1.1  mrg 	slli	a8, a2, 9
    444       1.1  mrg 	beqz	a8, .Lsub_xzero
    445       1.1  mrg 	do_nsau	a6, a8, a7, a11
    446       1.1  mrg 	srli	a8, a8, 9
    447       1.1  mrg 	bge	a6, a10, .Lsub_subnormal
    448       1.1  mrg 	addi	a6, a6, 1
    449       1.1  mrg 
    450       1.1  mrg .Lsub_normalize_shift:
    451       1.1  mrg 	/* Shift the mantissa (a8/a9) left by a6.  */
    452       1.1  mrg 	ssl	a6
    453       1.1  mrg 	src	a8, a8, a9
    454       1.1  mrg 	sll	a9, a9
    455       1.1  mrg 
    456       1.1  mrg 	/* Combine the shifted mantissa with the sign and exponent,
    457       1.1  mrg 	   decrementing the exponent by a6.  (The exponent has already
    458       1.1  mrg 	   been decremented by one due to the borrow from the subtraction,
    459       1.1  mrg 	   but adding the mantissa will increment the exponent by one.)  */
    460       1.1  mrg 	srli	a2, a2, 23
    461       1.1  mrg 	sub	a2, a2, a6
    462       1.1  mrg 	slli	a2, a2, 23
    463       1.1  mrg 	add	a2, a2, a8
    464       1.1  mrg 	j	.Lsub_round
    465       1.1  mrg 
    466       1.1  mrg .Lsub_exactlyhalf:
    467       1.1  mrg 	/* Round down to the nearest even value.  */
    468       1.1  mrg 	srli	a2, a2, 1
    469       1.1  mrg 	slli	a2, a2, 1
    470       1.1  mrg 	leaf_return
    471       1.1  mrg 
    472       1.1  mrg .Lsub_xzero:
    473       1.1  mrg 	/* If there was a borrow from the exponent, and the mantissa and
    474       1.1  mrg 	   guard digits are all zero, then the inputs were equal and the
    475       1.1  mrg 	   result should be zero.  */
    476       1.1  mrg 	beqz	a9, .Lsub_return_zero
    477       1.1  mrg 
    478       1.1  mrg 	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
    479       1.1  mrg 	addi	a11, a10, -24
    480       1.1  mrg 	movi	a6, 24
    481       1.1  mrg 	movltz	a6, a10, a11
    482       1.1  mrg 	j	.Lsub_normalize_shift
    483       1.1  mrg 
    484       1.1  mrg .Lsub_subnormal:
    485       1.1  mrg 	/* The exponent is too small to shift away all the leading zeros.
    486       1.1  mrg 	   Set a6 to the current exponent (which has already been
    487       1.1  mrg 	   decremented by the borrow) so that the exponent of the result
    488       1.1  mrg 	   will be zero.  Do not add 1 to a6 in this case, because: (1)
    489       1.1  mrg 	   adding the mantissa will not increment the exponent, so there is
    490       1.1  mrg 	   no need to subtract anything extra from the exponent to
    491       1.1  mrg 	   compensate, and (2) the effective exponent of a subnormal is 1
    492       1.1  mrg 	   not 0 so the shift amount must be 1 smaller than normal. */
    493       1.1  mrg 	mov	a6, a10
    494       1.1  mrg 	j	.Lsub_normalize_shift
    495       1.1  mrg 
    496       1.1  mrg #endif /* L_addsubsf3 */
    497       1.1  mrg 
    498       1.1  mrg #ifdef L_mulsf3
    499       1.1  mrg 
    500       1.1  mrg 	/* Multiplication */
    501       1.1  mrg #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
    502       1.1  mrg #define XCHAL_NO_MUL 1
    503       1.1  mrg #endif
    504       1.1  mrg 
    505   1.1.1.3  mrg 	.literal_position
    506       1.1  mrg __mulsf3_aux:
    507       1.1  mrg 
    508       1.1  mrg 	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
    509       1.1  mrg 	   (This code is placed before the start of the function just to
    510       1.1  mrg 	   keep it in range of the limited branch displacements.)  */
    511       1.1  mrg 
    512       1.1  mrg .Lmul_xexpzero:
    513       1.1  mrg 	/* Clear the sign bit of x.  */
    514       1.1  mrg 	slli	a2, a2, 1
    515       1.1  mrg 	srli	a2, a2, 1
    516       1.1  mrg 
    517       1.1  mrg 	/* If x is zero, return zero.  */
    518       1.1  mrg 	beqz	a2, .Lmul_return_zero
    519       1.1  mrg 
    520       1.1  mrg 	/* Normalize x.  Adjust the exponent in a8.  */
    521       1.1  mrg 	do_nsau	a10, a2, a11, a12
    522       1.1  mrg 	addi	a10, a10, -8
    523       1.1  mrg 	ssl	a10
    524       1.1  mrg 	sll	a2, a2
    525       1.1  mrg 	movi	a8, 1
    526       1.1  mrg 	sub	a8, a8, a10
    527       1.1  mrg 	j	.Lmul_xnormalized
    528       1.1  mrg 
    529       1.1  mrg .Lmul_yexpzero:
    530       1.1  mrg 	/* Clear the sign bit of y.  */
    531       1.1  mrg 	slli	a3, a3, 1
    532       1.1  mrg 	srli	a3, a3, 1
    533       1.1  mrg 
    534       1.1  mrg 	/* If y is zero, return zero.  */
    535       1.1  mrg 	beqz	a3, .Lmul_return_zero
    536       1.1  mrg 
    537       1.1  mrg 	/* Normalize y.  Adjust the exponent in a9.  */
    538       1.1  mrg 	do_nsau	a10, a3, a11, a12
    539       1.1  mrg 	addi	a10, a10, -8
    540       1.1  mrg 	ssl	a10
    541       1.1  mrg 	sll	a3, a3
    542       1.1  mrg 	movi	a9, 1
    543       1.1  mrg 	sub	a9, a9, a10
    544       1.1  mrg 	j	.Lmul_ynormalized
    545       1.1  mrg 
    546       1.1  mrg .Lmul_return_zero:
    547       1.1  mrg 	/* Return zero with the appropriate sign bit.  */
    548       1.1  mrg 	srli	a2, a7, 31
    549       1.1  mrg 	slli	a2, a2, 31
    550       1.1  mrg 	j	.Lmul_done
    551       1.1  mrg 
    552       1.1  mrg .Lmul_xnan_or_inf:
    553       1.1  mrg 	/* If y is zero, return NaN.  */
    554       1.1  mrg 	slli	a8, a3, 1
    555   1.1.1.5  mrg 	beqz	a8, .Lmul_return_nan
    556       1.1  mrg 	/* If y is NaN, return y.  */
    557       1.1  mrg 	bnall	a3, a6, .Lmul_returnx
    558       1.1  mrg 	slli	a8, a3, 9
    559       1.1  mrg 	beqz	a8, .Lmul_returnx
    560       1.1  mrg 
    561       1.1  mrg .Lmul_returny:
    562       1.1  mrg 	mov	a2, a3
    563       1.1  mrg 
    564       1.1  mrg .Lmul_returnx:
    565   1.1.1.5  mrg 	slli	a8, a2, 9
    566   1.1.1.5  mrg 	bnez	a8, .Lmul_return_nan
    567       1.1  mrg 	/* Set the sign bit and return.  */
    568       1.1  mrg 	extui	a7, a7, 31, 1
    569       1.1  mrg 	slli	a2, a2, 1
    570       1.1  mrg 	ssai	1
    571       1.1  mrg 	src	a2, a7, a2
    572       1.1  mrg 	j	.Lmul_done
    573       1.1  mrg 
    574       1.1  mrg .Lmul_ynan_or_inf:
    575       1.1  mrg 	/* If x is zero, return NaN.  */
    576       1.1  mrg 	slli	a8, a2, 1
    577       1.1  mrg 	bnez	a8, .Lmul_returny
    578   1.1.1.5  mrg 	mov	a2, a3
    579   1.1.1.5  mrg 
    580   1.1.1.5  mrg .Lmul_return_nan:
    581   1.1.1.5  mrg 	movi	a4, 0x400000	/* make it a quiet NaN */
    582   1.1.1.5  mrg 	or	a2, a2, a4
    583       1.1  mrg 	j	.Lmul_done
    584       1.1  mrg 
    585       1.1  mrg 	.align	4
    586       1.1  mrg 	.global	__mulsf3
    587       1.1  mrg 	.type	__mulsf3, @function
    588       1.1  mrg __mulsf3:
    589       1.1  mrg #if __XTENSA_CALL0_ABI__
    590       1.1  mrg 	leaf_entry sp, 32
    591       1.1  mrg 	addi	sp, sp, -32
    592       1.1  mrg 	s32i	a12, sp, 16
    593       1.1  mrg 	s32i	a13, sp, 20
    594       1.1  mrg 	s32i	a14, sp, 24
    595       1.1  mrg 	s32i	a15, sp, 28
    596       1.1  mrg #elif XCHAL_NO_MUL
    597       1.1  mrg 	/* This is not really a leaf function; allocate enough stack space
    598       1.1  mrg 	   to allow CALL12s to a helper function.  */
    599       1.1  mrg 	leaf_entry sp, 64
    600       1.1  mrg #else
    601       1.1  mrg 	leaf_entry sp, 32
    602       1.1  mrg #endif
    603       1.1  mrg 	movi	a6, 0x7f800000
    604       1.1  mrg 
    605       1.1  mrg 	/* Get the sign of the result.  */
    606       1.1  mrg 	xor	a7, a2, a3
    607       1.1  mrg 
    608       1.1  mrg 	/* Check for NaN and infinity.  */
    609       1.1  mrg 	ball	a2, a6, .Lmul_xnan_or_inf
    610       1.1  mrg 	ball	a3, a6, .Lmul_ynan_or_inf
    611       1.1  mrg 
    612       1.1  mrg 	/* Extract the exponents.  */
    613       1.1  mrg 	extui	a8, a2, 23, 8
    614       1.1  mrg 	extui	a9, a3, 23, 8
    615       1.1  mrg 
    616       1.1  mrg 	beqz	a8, .Lmul_xexpzero
    617       1.1  mrg .Lmul_xnormalized:
    618       1.1  mrg 	beqz	a9, .Lmul_yexpzero
    619       1.1  mrg .Lmul_ynormalized:
    620       1.1  mrg 
    621       1.1  mrg 	/* Add the exponents.  */
    622       1.1  mrg 	add	a8, a8, a9
    623       1.1  mrg 
    624       1.1  mrg 	/* Replace sign/exponent fields with explicit "1.0".  */
    625       1.1  mrg 	movi	a10, 0xffffff
    626       1.1  mrg 	or	a2, a2, a6
    627       1.1  mrg 	and	a2, a2, a10
    628       1.1  mrg 	or	a3, a3, a6
    629       1.1  mrg 	and	a3, a3, a10
    630       1.1  mrg 
    631       1.1  mrg 	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
    632       1.1  mrg 
    633       1.1  mrg #if XCHAL_HAVE_MUL32_HIGH
    634       1.1  mrg 
    635       1.1  mrg 	mull	a6, a2, a3
    636       1.1  mrg 	muluh	a2, a2, a3
    637       1.1  mrg 
    638       1.1  mrg #else
    639       1.1  mrg 
    640       1.1  mrg 	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
    641       1.1  mrg 	   products.  These partial products are:
    642       1.1  mrg 
    643       1.1  mrg 		0 xl * yl
    644       1.1  mrg 
    645       1.1  mrg 		1 xl * yh
    646       1.1  mrg 		2 xh * yl
    647       1.1  mrg 
    648       1.1  mrg 		3 xh * yh
    649       1.1  mrg 
    650       1.1  mrg 	   If using the Mul16 or Mul32 multiplier options, these input
    651       1.1  mrg 	   chunks must be stored in separate registers.  For Mac16, the
    652       1.1  mrg 	   UMUL.AA.* opcodes can specify that the inputs come from either
    653       1.1  mrg 	   half of the registers, so there is no need to shift them out
    654       1.1  mrg 	   ahead of time.  If there is no multiply hardware, the 16-bit
    655       1.1  mrg 	   chunks can be extracted when setting up the arguments to the
    656       1.1  mrg 	   separate multiply function.  */
    657       1.1  mrg 
    658       1.1  mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
    659       1.1  mrg 	/* Calling a separate multiply function will clobber a0 and requires
    660       1.1  mrg 	   use of a8 as a temporary, so save those values now.  (The function
    661       1.1  mrg 	   uses a custom ABI so nothing else needs to be saved.)  */
    662       1.1  mrg 	s32i	a0, sp, 0
    663       1.1  mrg 	s32i	a8, sp, 4
    664       1.1  mrg #endif
    665       1.1  mrg 
    666       1.1  mrg #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
    667       1.1  mrg 
    668       1.1  mrg #define a2h a4
    669       1.1  mrg #define a3h a5
    670       1.1  mrg 
    671       1.1  mrg 	/* Get the high halves of the inputs into registers.  */
    672       1.1  mrg 	srli	a2h, a2, 16
    673       1.1  mrg 	srli	a3h, a3, 16
    674       1.1  mrg 
    675       1.1  mrg #define a2l a2
    676       1.1  mrg #define a3l a3
    677       1.1  mrg 
    678       1.1  mrg #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
    679       1.1  mrg 	/* Clear the high halves of the inputs.  This does not matter
    680       1.1  mrg 	   for MUL16 because the high bits are ignored.  */
    681       1.1  mrg 	extui	a2, a2, 0, 16
    682       1.1  mrg 	extui	a3, a3, 0, 16
    683       1.1  mrg #endif
    684       1.1  mrg #endif /* MUL16 || MUL32 */
    685       1.1  mrg 
    686       1.1  mrg 
    687       1.1  mrg #if XCHAL_HAVE_MUL16
    688       1.1  mrg 
    689       1.1  mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    690       1.1  mrg 	mul16u	dst, xreg ## xhalf, yreg ## yhalf
    691       1.1  mrg 
    692       1.1  mrg #elif XCHAL_HAVE_MUL32
    693       1.1  mrg 
    694       1.1  mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    695       1.1  mrg 	mull	dst, xreg ## xhalf, yreg ## yhalf
    696       1.1  mrg 
    697       1.1  mrg #elif XCHAL_HAVE_MAC16
    698       1.1  mrg 
    699       1.1  mrg /* The preprocessor insists on inserting a space when concatenating after
    700       1.1  mrg    a period in the definition of do_mul below.  These macros are a workaround
    701       1.1  mrg    using underscores instead of periods when doing the concatenation.  */
    702       1.1  mrg #define umul_aa_ll umul.aa.ll
    703       1.1  mrg #define umul_aa_lh umul.aa.lh
    704       1.1  mrg #define umul_aa_hl umul.aa.hl
    705       1.1  mrg #define umul_aa_hh umul.aa.hh
    706       1.1  mrg 
    707       1.1  mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    708       1.1  mrg 	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
    709       1.1  mrg 	rsr	dst, ACCLO
    710       1.1  mrg 
    711       1.1  mrg #else /* no multiply hardware */
    712       1.1  mrg 
    713       1.1  mrg #define set_arg_l(dst, src) \
    714       1.1  mrg 	extui	dst, src, 0, 16
    715       1.1  mrg #define set_arg_h(dst, src) \
    716       1.1  mrg 	srli	dst, src, 16
    717       1.1  mrg 
    718       1.1  mrg #if __XTENSA_CALL0_ABI__
    719       1.1  mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    720       1.1  mrg 	set_arg_ ## xhalf (a13, xreg); \
    721       1.1  mrg 	set_arg_ ## yhalf (a14, yreg); \
    722       1.1  mrg 	call0	.Lmul_mulsi3; \
    723       1.1  mrg 	mov	dst, a12
    724       1.1  mrg #else
    725       1.1  mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    726       1.1  mrg 	set_arg_ ## xhalf (a14, xreg); \
    727       1.1  mrg 	set_arg_ ## yhalf (a15, yreg); \
    728       1.1  mrg 	call12	.Lmul_mulsi3; \
    729       1.1  mrg 	mov	dst, a14
    730       1.1  mrg #endif /* __XTENSA_CALL0_ABI__ */
    731       1.1  mrg 
    732       1.1  mrg #endif /* no multiply hardware */
    733       1.1  mrg 
    734       1.1  mrg 	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
    735       1.1  mrg 	do_mul(a6, a2, l, a3, h)	/* pp 1 */
    736       1.1  mrg 	do_mul(a11, a2, h, a3, l)	/* pp 2 */
    737       1.1  mrg 	movi	a9, 0
    738       1.1  mrg 	add	a6, a6, a11
    739       1.1  mrg 	bgeu	a6, a11, 1f
    740       1.1  mrg 	addi	a9, a9, 1
    741       1.1  mrg 1:
    742       1.1  mrg 	/* Shift the high half of a9/a6 into position in a9.  Note that
    743       1.1  mrg 	   this value can be safely incremented without any carry-outs.  */
    744       1.1  mrg 	ssai	16
    745       1.1  mrg 	src	a9, a9, a6
    746       1.1  mrg 
    747       1.1  mrg 	/* Compute the low word into a6.  */
    748       1.1  mrg 	do_mul(a11, a2, l, a3, l)	/* pp 0 */
    749       1.1  mrg 	sll	a6, a6
    750       1.1  mrg 	add	a6, a6, a11
    751       1.1  mrg 	bgeu	a6, a11, 1f
    752       1.1  mrg 	addi	a9, a9, 1
    753       1.1  mrg 1:
    754       1.1  mrg 	/* Compute the high word into a2.  */
    755       1.1  mrg 	do_mul(a2, a2, h, a3, h)	/* pp 3 */
    756       1.1  mrg 	add	a2, a2, a9
    757       1.1  mrg 
    758       1.1  mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
    759       1.1  mrg 	/* Restore values saved on the stack during the multiplication.  */
    760       1.1  mrg 	l32i	a0, sp, 0
    761       1.1  mrg 	l32i	a8, sp, 4
    762       1.1  mrg #endif
    763       1.1  mrg #endif /* ! XCHAL_HAVE_MUL32_HIGH */
    764       1.1  mrg 
    765       1.1  mrg 	/* Shift left by 9 bits, unless there was a carry-out from the
    766       1.1  mrg 	   multiply, in which case, shift by 8 bits and increment the
    767       1.1  mrg 	   exponent.  */
    768       1.1  mrg 	movi	a4, 9
    769       1.1  mrg 	srli	a5, a2, 24 - 9
    770       1.1  mrg 	beqz	a5, 1f
    771       1.1  mrg 	addi	a4, a4, -1
    772       1.1  mrg 	addi	a8, a8, 1
    773       1.1  mrg 1:	ssl	a4
    774       1.1  mrg 	src	a2, a2, a6
    775       1.1  mrg 	sll	a6, a6
    776       1.1  mrg 
    777       1.1  mrg 	/* Subtract the extra bias from the exponent sum (plus one to account
    778       1.1  mrg 	   for the explicit "1.0" of the mantissa that will be added to the
    779       1.1  mrg 	   exponent in the final result).  */
    780       1.1  mrg 	movi	a4, 0x80
    781       1.1  mrg 	sub	a8, a8, a4
    782       1.1  mrg 
    783       1.1  mrg 	/* Check for over/underflow.  The value in a8 is one less than the
    784       1.1  mrg 	   final exponent, so values in the range 0..fd are OK here.  */
    785       1.1  mrg 	movi	a4, 0xfe
    786       1.1  mrg 	bgeu	a8, a4, .Lmul_overflow
    787       1.1  mrg 
    788       1.1  mrg .Lmul_round:
    789       1.1  mrg 	/* Round.  */
    790       1.1  mrg 	bgez	a6, .Lmul_rounded
    791       1.1  mrg 	addi	a2, a2, 1
    792       1.1  mrg 	slli	a6, a6, 1
    793       1.1  mrg 	beqz	a6, .Lmul_exactlyhalf
    794       1.1  mrg 
    795       1.1  mrg .Lmul_rounded:
    796       1.1  mrg 	/* Add the exponent to the mantissa.  */
    797       1.1  mrg 	slli	a8, a8, 23
    798       1.1  mrg 	add	a2, a2, a8
    799       1.1  mrg 
    800       1.1  mrg .Lmul_addsign:
    801       1.1  mrg 	/* Add the sign bit.  */
    802       1.1  mrg 	srli	a7, a7, 31
    803       1.1  mrg 	slli	a7, a7, 31
    804       1.1  mrg 	or	a2, a2, a7
    805       1.1  mrg 
    806       1.1  mrg .Lmul_done:
    807       1.1  mrg #if __XTENSA_CALL0_ABI__
    808       1.1  mrg 	l32i	a12, sp, 16
    809       1.1  mrg 	l32i	a13, sp, 20
    810       1.1  mrg 	l32i	a14, sp, 24
    811       1.1  mrg 	l32i	a15, sp, 28
    812       1.1  mrg 	addi	sp, sp, 32
    813       1.1  mrg #endif
    814       1.1  mrg 	leaf_return
    815       1.1  mrg 
    816       1.1  mrg .Lmul_exactlyhalf:
    817       1.1  mrg 	/* Round down to the nearest even value.  */
    818       1.1  mrg 	srli	a2, a2, 1
    819       1.1  mrg 	slli	a2, a2, 1
    820       1.1  mrg 	j	.Lmul_rounded
    821       1.1  mrg 
    822       1.1  mrg .Lmul_overflow:
    823       1.1  mrg 	bltz	a8, .Lmul_underflow
    824       1.1  mrg 	/* Return +/- Infinity.  */
    825       1.1  mrg 	movi	a8, 0xff
    826       1.1  mrg 	slli	a2, a8, 23
    827       1.1  mrg 	j	.Lmul_addsign
    828       1.1  mrg 
    829       1.1  mrg .Lmul_underflow:
    830       1.1  mrg 	/* Create a subnormal value, where the exponent field contains zero,
    831       1.1  mrg 	   but the effective exponent is 1.  The value of a8 is one less than
    832       1.1  mrg 	   the actual exponent, so just negate it to get the shift amount.  */
    833       1.1  mrg 	neg	a8, a8
    834       1.1  mrg 	mov	a9, a6
    835       1.1  mrg 	ssr	a8
    836       1.1  mrg 	bgeui	a8, 32, .Lmul_flush_to_zero
    837       1.1  mrg 
    838       1.1  mrg 	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
    839       1.1  mrg 	   in a6 (combined with the shifted-out bits currently in a6) for
    840       1.1  mrg 	   rounding the result.  */
    841       1.1  mrg 	sll	a6, a2
    842       1.1  mrg 	srl	a2, a2
    843       1.1  mrg 
    844       1.1  mrg 	/* Set the exponent to zero.  */
    845       1.1  mrg 	movi	a8, 0
    846       1.1  mrg 
    847       1.1  mrg 	/* Pack any nonzero bits shifted out into a6.  */
    848       1.1  mrg 	beqz	a9, .Lmul_round
    849       1.1  mrg 	movi	a9, 1
    850       1.1  mrg 	or	a6, a6, a9
    851       1.1  mrg 	j	.Lmul_round
    852       1.1  mrg 
    853       1.1  mrg .Lmul_flush_to_zero:
    854       1.1  mrg 	/* Return zero with the appropriate sign bit.  */
    855       1.1  mrg 	srli	a2, a7, 31
    856       1.1  mrg 	slli	a2, a2, 31
    857       1.1  mrg 	j	.Lmul_done
    858       1.1  mrg 
    859       1.1  mrg #if XCHAL_NO_MUL
    860       1.1  mrg 
    861       1.1  mrg 	/* For Xtensa processors with no multiply hardware, this simplified
    862       1.1  mrg 	   version of _mulsi3 is used for multiplying 16-bit chunks of
    863       1.1  mrg 	   the floating-point mantissas.  When using CALL0, this function
    864       1.1  mrg 	   uses a custom ABI: the inputs are passed in a13 and a14, the
    865       1.1  mrg 	   result is returned in a12, and a8 and a15 are clobbered.  */
    866       1.1  mrg 	.align	4
    867       1.1  mrg .Lmul_mulsi3:
    868       1.1  mrg 	leaf_entry sp, 16
    869       1.1  mrg 	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
    870       1.1  mrg 	movi	\dst, 0
    871       1.1  mrg 1:	add	\tmp1, \src2, \dst
    872       1.1  mrg 	extui	\tmp2, \src1, 0, 1
    873       1.1  mrg 	movnez	\dst, \tmp1, \tmp2
    874       1.1  mrg 
    875       1.1  mrg 	do_addx2 \tmp1, \src2, \dst, \tmp1
    876       1.1  mrg 	extui	\tmp2, \src1, 1, 1
    877       1.1  mrg 	movnez	\dst, \tmp1, \tmp2
    878       1.1  mrg 
    879       1.1  mrg 	do_addx4 \tmp1, \src2, \dst, \tmp1
    880       1.1  mrg 	extui	\tmp2, \src1, 2, 1
    881       1.1  mrg 	movnez	\dst, \tmp1, \tmp2
    882       1.1  mrg 
    883       1.1  mrg 	do_addx8 \tmp1, \src2, \dst, \tmp1
    884       1.1  mrg 	extui	\tmp2, \src1, 3, 1
    885       1.1  mrg 	movnez	\dst, \tmp1, \tmp2
    886       1.1  mrg 
    887       1.1  mrg 	srli	\src1, \src1, 4
    888       1.1  mrg 	slli	\src2, \src2, 4
    889       1.1  mrg 	bnez	\src1, 1b
    890       1.1  mrg 	.endm
    891       1.1  mrg #if __XTENSA_CALL0_ABI__
    892       1.1  mrg 	mul_mulsi3_body a12, a13, a14, a15, a8
    893       1.1  mrg #else
    894       1.1  mrg 	/* The result will be written into a2, so save that argument in a4.  */
    895       1.1  mrg 	mov	a4, a2
    896       1.1  mrg 	mul_mulsi3_body a2, a4, a3, a5, a6
    897       1.1  mrg #endif
    898       1.1  mrg 	leaf_return
    899       1.1  mrg #endif /* XCHAL_NO_MUL */
    900       1.1  mrg #endif /* L_mulsf3 */
    901       1.1  mrg 
    902       1.1  mrg #ifdef L_divsf3
    903       1.1  mrg 
    904       1.1  mrg 	/* Division */
    905   1.1.1.6  mrg 
    906   1.1.1.6  mrg #if XCHAL_HAVE_FP_DIV
    907   1.1.1.6  mrg 
    908   1.1.1.6  mrg 	.align	4
    909   1.1.1.6  mrg 	.global	__divsf3
    910   1.1.1.6  mrg 	.type	__divsf3, @function
    911   1.1.1.6  mrg __divsf3:
    912   1.1.1.6  mrg 	leaf_entry	sp, 16
    913   1.1.1.6  mrg 
    914   1.1.1.6  mrg 	wfr		f1, a2	/* dividend */
    915   1.1.1.6  mrg 	wfr		f2, a3	/* divisor */
    916   1.1.1.6  mrg 
    917   1.1.1.6  mrg 	div0.s		f3, f2
    918   1.1.1.6  mrg 	nexp01.s	f4, f2
    919   1.1.1.6  mrg 	const.s		f5, 1
    920   1.1.1.6  mrg 	maddn.s		f5, f4, f3
    921   1.1.1.6  mrg 	mov.s		f6, f3
    922   1.1.1.6  mrg 	mov.s		f7, f2
    923   1.1.1.6  mrg 	nexp01.s	f2, f1
    924   1.1.1.6  mrg 	maddn.s		f6, f5, f6
    925   1.1.1.6  mrg 	const.s		f5, 1
    926   1.1.1.6  mrg 	const.s		f0, 0
    927   1.1.1.6  mrg 	neg.s		f8, f2
    928   1.1.1.6  mrg 	maddn.s		f5, f4, f6
    929   1.1.1.6  mrg 	maddn.s		f0, f8, f3
    930   1.1.1.6  mrg 	mkdadj.s	f7, f1
    931   1.1.1.6  mrg 	maddn.s		f6, f5, f6
    932   1.1.1.6  mrg 	maddn.s		f8, f4, f0
    933   1.1.1.6  mrg 	const.s		f3, 1
    934   1.1.1.6  mrg 	maddn.s		f3, f4, f6
    935   1.1.1.6  mrg 	maddn.s		f0, f8, f6
    936   1.1.1.6  mrg 	neg.s		f2, f2
    937   1.1.1.6  mrg 	maddn.s		f6, f3, f6
    938   1.1.1.6  mrg 	maddn.s		f2, f4, f0
    939   1.1.1.6  mrg 	addexpm.s	f0, f7
    940   1.1.1.6  mrg 	addexp.s	f6, f7
    941   1.1.1.6  mrg 	divn.s		f0, f2, f6
    942   1.1.1.6  mrg 
    943   1.1.1.6  mrg 	rfr		a2, f0
    944   1.1.1.6  mrg 
    945   1.1.1.6  mrg 	leaf_return
    946   1.1.1.6  mrg 
    947   1.1.1.6  mrg #else
    948   1.1.1.6  mrg 
    949   1.1.1.6  mrg 	.literal_position
    950       1.1  mrg __divsf3_aux:
    951       1.1  mrg 
    952       1.1  mrg 	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
    953       1.1  mrg 	   (This code is placed before the start of the function just to
    954       1.1  mrg 	   keep it in range of the limited branch displacements.)  */
    955       1.1  mrg 
    956       1.1  mrg .Ldiv_yexpzero:
    957       1.1  mrg 	/* Clear the sign bit of y.  */
    958       1.1  mrg 	slli	a3, a3, 1
    959       1.1  mrg 	srli	a3, a3, 1
    960       1.1  mrg 
    961       1.1  mrg 	/* Check for division by zero.  */
    962       1.1  mrg 	beqz	a3, .Ldiv_yzero
    963       1.1  mrg 
    964       1.1  mrg 	/* Normalize y.  Adjust the exponent in a9.  */
    965       1.1  mrg 	do_nsau	a10, a3, a4, a5
    966       1.1  mrg 	addi	a10, a10, -8
    967       1.1  mrg 	ssl	a10
    968       1.1  mrg 	sll	a3, a3
    969       1.1  mrg 	movi	a9, 1
    970       1.1  mrg 	sub	a9, a9, a10
    971       1.1  mrg 	j	.Ldiv_ynormalized
    972       1.1  mrg 
    973       1.1  mrg .Ldiv_yzero:
    974       1.1  mrg 	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
    975       1.1  mrg 	slli	a4, a2, 1
    976       1.1  mrg 	srli	a4, a4, 1
    977       1.1  mrg 	srli	a2, a7, 31
    978       1.1  mrg 	slli	a2, a2, 31
    979       1.1  mrg 	or	a2, a2, a6
    980       1.1  mrg 	bnez	a4, 1f
    981       1.1  mrg 	movi	a4, 0x400000	/* make it a quiet NaN */
    982       1.1  mrg 	or	a2, a2, a4
    983       1.1  mrg 1:	leaf_return
    984       1.1  mrg 
    985       1.1  mrg .Ldiv_xexpzero:
    986       1.1  mrg 	/* Clear the sign bit of x.  */
    987       1.1  mrg 	slli	a2, a2, 1
    988       1.1  mrg 	srli	a2, a2, 1
    989       1.1  mrg 
    990       1.1  mrg 	/* If x is zero, return zero.  */
    991       1.1  mrg 	beqz	a2, .Ldiv_return_zero
    992       1.1  mrg 
    993       1.1  mrg 	/* Normalize x.  Adjust the exponent in a8.  */
    994       1.1  mrg 	do_nsau	a10, a2, a4, a5
    995       1.1  mrg 	addi	a10, a10, -8
    996       1.1  mrg 	ssl	a10
    997       1.1  mrg 	sll	a2, a2
    998       1.1  mrg 	movi	a8, 1
    999       1.1  mrg 	sub	a8, a8, a10
   1000       1.1  mrg 	j	.Ldiv_xnormalized
   1001       1.1  mrg 
   1002       1.1  mrg .Ldiv_return_zero:
   1003       1.1  mrg 	/* Return zero with the appropriate sign bit.  */
   1004       1.1  mrg 	srli	a2, a7, 31
   1005       1.1  mrg 	slli	a2, a2, 31
   1006       1.1  mrg 	leaf_return
   1007       1.1  mrg 
   1008       1.1  mrg .Ldiv_xnan_or_inf:
   1009       1.1  mrg 	/* Set the sign bit of the result.  */
   1010       1.1  mrg 	srli	a7, a3, 31
   1011       1.1  mrg 	slli	a7, a7, 31
   1012       1.1  mrg 	xor	a2, a2, a7
   1013       1.1  mrg 	/* If y is NaN or Inf, return NaN.  */
   1014   1.1.1.5  mrg 	ball	a3, a6, .Ldiv_return_nan
   1015   1.1.1.5  mrg 	slli	a7, a2, 9
   1016   1.1.1.5  mrg 	bnez	a7, .Ldiv_return_nan
   1017   1.1.1.5  mrg 	leaf_return
   1018       1.1  mrg 
   1019       1.1  mrg .Ldiv_ynan_or_inf:
   1020       1.1  mrg 	/* If y is Infinity, return zero.  */
   1021       1.1  mrg 	slli	a8, a3, 9
   1022       1.1  mrg 	beqz	a8, .Ldiv_return_zero
   1023       1.1  mrg 	/* y is NaN; return it.  */
   1024       1.1  mrg 	mov	a2, a3
   1025   1.1.1.5  mrg 
   1026   1.1.1.5  mrg .Ldiv_return_nan:
   1027   1.1.1.5  mrg 	movi	a4, 0x400000	/* make it a quiet NaN */
   1028   1.1.1.5  mrg 	or	a2, a2, a4
   1029       1.1  mrg 	leaf_return
   1030       1.1  mrg 
   1031       1.1  mrg 	.align	4
   1032       1.1  mrg 	.global	__divsf3
   1033       1.1  mrg 	.type	__divsf3, @function
   1034       1.1  mrg __divsf3:
   1035       1.1  mrg 	leaf_entry sp, 16
   1036       1.1  mrg 	movi	a6, 0x7f800000
   1037       1.1  mrg 
   1038       1.1  mrg 	/* Get the sign of the result.  */
   1039       1.1  mrg 	xor	a7, a2, a3
   1040       1.1  mrg 
   1041       1.1  mrg 	/* Check for NaN and infinity.  */
   1042       1.1  mrg 	ball	a2, a6, .Ldiv_xnan_or_inf
   1043       1.1  mrg 	ball	a3, a6, .Ldiv_ynan_or_inf
   1044       1.1  mrg 
   1045       1.1  mrg 	/* Extract the exponents.  */
   1046       1.1  mrg 	extui	a8, a2, 23, 8
   1047       1.1  mrg 	extui	a9, a3, 23, 8
   1048       1.1  mrg 
   1049       1.1  mrg 	beqz	a9, .Ldiv_yexpzero
   1050       1.1  mrg .Ldiv_ynormalized:
   1051       1.1  mrg 	beqz	a8, .Ldiv_xexpzero
   1052       1.1  mrg .Ldiv_xnormalized:
   1053       1.1  mrg 
   1054       1.1  mrg 	/* Subtract the exponents.  */
   1055       1.1  mrg 	sub	a8, a8, a9
   1056       1.1  mrg 
   1057       1.1  mrg 	/* Replace sign/exponent fields with explicit "1.0".  */
   1058       1.1  mrg 	movi	a10, 0xffffff
   1059       1.1  mrg 	or	a2, a2, a6
   1060       1.1  mrg 	and	a2, a2, a10
   1061       1.1  mrg 	or	a3, a3, a6
   1062       1.1  mrg 	and	a3, a3, a10
   1063       1.1  mrg 
   1064       1.1  mrg 	/* The first digit of the mantissa division must be a one.
   1065       1.1  mrg 	   Shift x (and adjust the exponent) as needed to make this true.  */
   1066       1.1  mrg 	bltu	a3, a2, 1f
   1067       1.1  mrg 	slli	a2, a2, 1
   1068       1.1  mrg 	addi	a8, a8, -1
   1069       1.1  mrg 1:
   1070       1.1  mrg 	/* Do the first subtraction and shift.  */
   1071       1.1  mrg 	sub	a2, a2, a3
   1072       1.1  mrg 	slli	a2, a2, 1
   1073       1.1  mrg 
   1074       1.1  mrg 	/* Put the quotient into a10.  */
   1075       1.1  mrg 	movi	a10, 1
   1076       1.1  mrg 
   1077       1.1  mrg 	/* Divide one bit at a time for 23 bits.  */
   1078       1.1  mrg 	movi	a9, 23
   1079       1.1  mrg #if XCHAL_HAVE_LOOPS
   1080       1.1  mrg 	loop	a9, .Ldiv_loopend
   1081       1.1  mrg #endif
   1082       1.1  mrg .Ldiv_loop:
   1083       1.1  mrg 	/* Shift the quotient << 1.  */
   1084       1.1  mrg 	slli	a10, a10, 1
   1085       1.1  mrg 
   1086       1.1  mrg 	/* Is this digit a 0 or 1?  */
   1087       1.1  mrg 	bltu	a2, a3, 1f
   1088       1.1  mrg 
   1089       1.1  mrg 	/* Output a 1 and subtract.  */
   1090       1.1  mrg 	addi	a10, a10, 1
   1091       1.1  mrg 	sub	a2, a2, a3
   1092       1.1  mrg 
   1093       1.1  mrg 	/* Shift the dividend << 1.  */
   1094       1.1  mrg 1:	slli	a2, a2, 1
   1095       1.1  mrg 
   1096       1.1  mrg #if !XCHAL_HAVE_LOOPS
   1097       1.1  mrg 	addi	a9, a9, -1
   1098       1.1  mrg 	bnez	a9, .Ldiv_loop
   1099       1.1  mrg #endif
   1100       1.1  mrg .Ldiv_loopend:
   1101       1.1  mrg 
   1102       1.1  mrg 	/* Add the exponent bias (less one to account for the explicit "1.0"
   1103       1.1  mrg 	   of the mantissa that will be added to the exponent in the final
   1104       1.1  mrg 	   result).  */
   1105       1.1  mrg 	addi	a8, a8, 0x7e
   1106       1.1  mrg 
   1107       1.1  mrg 	/* Check for over/underflow.  The value in a8 is one less than the
   1108       1.1  mrg 	   final exponent, so values in the range 0..fd are OK here.  */
   1109       1.1  mrg 	movi	a4, 0xfe
   1110       1.1  mrg 	bgeu	a8, a4, .Ldiv_overflow
   1111       1.1  mrg 
   1112       1.1  mrg .Ldiv_round:
   1113       1.1  mrg 	/* Round.  The remainder (<< 1) is in a2.  */
   1114       1.1  mrg 	bltu	a2, a3, .Ldiv_rounded
   1115       1.1  mrg 	addi	a10, a10, 1
   1116       1.1  mrg 	beq	a2, a3, .Ldiv_exactlyhalf
   1117       1.1  mrg 
   1118       1.1  mrg .Ldiv_rounded:
   1119       1.1  mrg 	/* Add the exponent to the mantissa.  */
   1120       1.1  mrg 	slli	a8, a8, 23
   1121       1.1  mrg 	add	a2, a10, a8
   1122       1.1  mrg 
   1123       1.1  mrg .Ldiv_addsign:
   1124       1.1  mrg 	/* Add the sign bit.  */
   1125       1.1  mrg 	srli	a7, a7, 31
   1126       1.1  mrg 	slli	a7, a7, 31
   1127       1.1  mrg 	or	a2, a2, a7
   1128       1.1  mrg 	leaf_return
   1129       1.1  mrg 
   1130       1.1  mrg .Ldiv_overflow:
   1131       1.1  mrg 	bltz	a8, .Ldiv_underflow
   1132       1.1  mrg 	/* Return +/- Infinity.  */
   1133       1.1  mrg 	addi	a8, a4, 1	/* 0xff */
   1134       1.1  mrg 	slli	a2, a8, 23
   1135       1.1  mrg 	j	.Ldiv_addsign
   1136       1.1  mrg 
   1137       1.1  mrg .Ldiv_exactlyhalf:
   1138       1.1  mrg 	/* Remainder is exactly half the divisor.  Round even.  */
   1139       1.1  mrg 	srli	a10, a10, 1
   1140       1.1  mrg 	slli	a10, a10, 1
   1141       1.1  mrg 	j	.Ldiv_rounded
   1142       1.1  mrg 
   1143       1.1  mrg .Ldiv_underflow:
   1144       1.1  mrg 	/* Create a subnormal value, where the exponent field contains zero,
   1145       1.1  mrg 	   but the effective exponent is 1.  The value of a8 is one less than
   1146       1.1  mrg 	   the actual exponent, so just negate it to get the shift amount.  */
   1147       1.1  mrg 	neg	a8, a8
   1148       1.1  mrg 	ssr	a8
   1149       1.1  mrg 	bgeui	a8, 32, .Ldiv_flush_to_zero
   1150       1.1  mrg 
   1151       1.1  mrg 	/* Shift a10 right.  Any bits that are shifted out of a10 are
   1152       1.1  mrg 	   saved in a6 for rounding the result.  */
   1153       1.1  mrg 	sll	a6, a10
   1154       1.1  mrg 	srl	a10, a10
   1155       1.1  mrg 
   1156       1.1  mrg 	/* Set the exponent to zero.  */
   1157       1.1  mrg 	movi	a8, 0
   1158       1.1  mrg 
   1159       1.1  mrg 	/* Pack any nonzero remainder (in a2) into a6.  */
   1160       1.1  mrg 	beqz	a2, 1f
   1161       1.1  mrg 	movi	a9, 1
   1162       1.1  mrg 	or	a6, a6, a9
   1163       1.1  mrg 
   1164       1.1  mrg 	/* Round a10 based on the bits shifted out into a6.  */
   1165       1.1  mrg 1:	bgez	a6, .Ldiv_rounded
   1166       1.1  mrg 	addi	a10, a10, 1
   1167       1.1  mrg 	slli	a6, a6, 1
   1168       1.1  mrg 	bnez	a6, .Ldiv_rounded
   1169       1.1  mrg 	srli	a10, a10, 1
   1170       1.1  mrg 	slli	a10, a10, 1
   1171       1.1  mrg 	j	.Ldiv_rounded
   1172       1.1  mrg 
   1173       1.1  mrg .Ldiv_flush_to_zero:
   1174       1.1  mrg 	/* Return zero with the appropriate sign bit.  */
   1175       1.1  mrg 	srli	a2, a7, 31
   1176       1.1  mrg 	slli	a2, a2, 31
   1177       1.1  mrg 	leaf_return
   1178       1.1  mrg 
   1179   1.1.1.6  mrg #endif /* XCHAL_HAVE_FP_DIV */
   1180   1.1.1.6  mrg 
   1181       1.1  mrg #endif /* L_divsf3 */
   1182       1.1  mrg 
   1183       1.1  mrg #ifdef L_cmpsf2
   1184       1.1  mrg 
   1185       1.1  mrg 	/* Equal and Not Equal */
   1186       1.1  mrg 
   1187       1.1  mrg 	.align	4
   1188       1.1  mrg 	.global	__eqsf2
   1189       1.1  mrg 	.global	__nesf2
   1190       1.1  mrg 	.set	__nesf2, __eqsf2
   1191       1.1  mrg 	.type	__eqsf2, @function
   1192       1.1  mrg __eqsf2:
   1193       1.1  mrg 	leaf_entry sp, 16
   1194       1.1  mrg 	bne	a2, a3, 4f
   1195       1.1  mrg 
   1196       1.1  mrg 	/* The values are equal but NaN != NaN.  Check the exponent.  */
   1197       1.1  mrg 	movi	a6, 0x7f800000
   1198       1.1  mrg 	ball	a2, a6, 3f
   1199       1.1  mrg 
   1200       1.1  mrg 	/* Equal.  */
   1201       1.1  mrg 	movi	a2, 0
   1202       1.1  mrg 	leaf_return
   1203       1.1  mrg 
   1204       1.1  mrg 	/* Not equal.  */
   1205       1.1  mrg 2:	movi	a2, 1
   1206       1.1  mrg 	leaf_return
   1207       1.1  mrg 
   1208       1.1  mrg 	/* Check if the mantissas are nonzero.  */
   1209       1.1  mrg 3:	slli	a7, a2, 9
   1210       1.1  mrg 	j	5f
   1211       1.1  mrg 
   1212       1.1  mrg 	/* Check if x and y are zero with different signs.  */
   1213       1.1  mrg 4:	or	a7, a2, a3
   1214       1.1  mrg 	slli	a7, a7, 1
   1215       1.1  mrg 
   1216       1.1  mrg 	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
   1217       1.1  mrg 	   or x when exponent(x) = 0x7f8 and x == y.  */
   1218       1.1  mrg 5:	movi	a2, 0
   1219       1.1  mrg 	movi	a3, 1
   1220       1.1  mrg 	movnez	a2, a3, a7
   1221       1.1  mrg 	leaf_return
   1222       1.1  mrg 
   1223       1.1  mrg 
   1224       1.1  mrg 	/* Greater Than */
   1225       1.1  mrg 
   1226       1.1  mrg 	.align	4
   1227       1.1  mrg 	.global	__gtsf2
   1228       1.1  mrg 	.type	__gtsf2, @function
   1229       1.1  mrg __gtsf2:
   1230       1.1  mrg 	leaf_entry sp, 16
   1231       1.1  mrg 	movi	a6, 0x7f800000
   1232       1.1  mrg 	ball	a2, a6, 2f
   1233       1.1  mrg 1:	bnall	a3, a6, .Lle_cmp
   1234       1.1  mrg 
   1235       1.1  mrg 	/* Check if y is a NaN.  */
   1236       1.1  mrg 	slli	a7, a3, 9
   1237       1.1  mrg 	beqz	a7, .Lle_cmp
   1238       1.1  mrg 	movi	a2, 0
   1239       1.1  mrg 	leaf_return
   1240       1.1  mrg 
   1241       1.1  mrg 	/* Check if x is a NaN.  */
   1242       1.1  mrg 2:	slli	a7, a2, 9
   1243       1.1  mrg 	beqz	a7, 1b
   1244       1.1  mrg 	movi	a2, 0
   1245       1.1  mrg 	leaf_return
   1246       1.1  mrg 
   1247       1.1  mrg 
   1248       1.1  mrg 	/* Less Than or Equal */
   1249       1.1  mrg 
   1250       1.1  mrg 	.align	4
   1251       1.1  mrg 	.global	__lesf2
   1252       1.1  mrg 	.type	__lesf2, @function
   1253       1.1  mrg __lesf2:
   1254       1.1  mrg 	leaf_entry sp, 16
   1255       1.1  mrg 	movi	a6, 0x7f800000
   1256       1.1  mrg 	ball	a2, a6, 2f
   1257       1.1  mrg 1:	bnall	a3, a6, .Lle_cmp
   1258       1.1  mrg 
   1259       1.1  mrg 	/* Check if y is a NaN.  */
   1260       1.1  mrg 	slli	a7, a3, 9
   1261       1.1  mrg 	beqz	a7, .Lle_cmp
   1262       1.1  mrg 	movi	a2, 1
   1263       1.1  mrg 	leaf_return
   1264       1.1  mrg 
   1265       1.1  mrg 	/* Check if x is a NaN.  */
   1266       1.1  mrg 2:	slli	a7, a2, 9
   1267       1.1  mrg 	beqz	a7, 1b
   1268       1.1  mrg 	movi	a2, 1
   1269       1.1  mrg 	leaf_return
   1270       1.1  mrg 
   1271       1.1  mrg .Lle_cmp:
   1272       1.1  mrg 	/* Check if x and y have different signs.  */
   1273       1.1  mrg 	xor	a7, a2, a3
   1274       1.1  mrg 	bltz	a7, .Lle_diff_signs
   1275       1.1  mrg 
   1276       1.1  mrg 	/* Check if x is negative.  */
   1277       1.1  mrg 	bltz	a2, .Lle_xneg
   1278       1.1  mrg 
   1279       1.1  mrg 	/* Check if x <= y.  */
   1280       1.1  mrg 	bltu	a3, a2, 5f
   1281       1.1  mrg 4:	movi	a2, 0
   1282       1.1  mrg 	leaf_return
   1283       1.1  mrg 
   1284       1.1  mrg .Lle_xneg:
   1285       1.1  mrg 	/* Check if y <= x.  */
   1286       1.1  mrg 	bgeu	a2, a3, 4b
   1287       1.1  mrg 5:	movi	a2, 1
   1288       1.1  mrg 	leaf_return
   1289       1.1  mrg 
   1290       1.1  mrg .Lle_diff_signs:
   1291       1.1  mrg 	bltz	a2, 4b
   1292       1.1  mrg 
   1293       1.1  mrg 	/* Check if both x and y are zero.  */
   1294       1.1  mrg 	or	a7, a2, a3
   1295       1.1  mrg 	slli	a7, a7, 1
   1296       1.1  mrg 	movi	a2, 1
   1297       1.1  mrg 	movi	a3, 0
   1298       1.1  mrg 	moveqz	a2, a3, a7
   1299       1.1  mrg 	leaf_return
   1300       1.1  mrg 
   1301       1.1  mrg 
   1302       1.1  mrg 	/* Greater Than or Equal */
   1303       1.1  mrg 
   1304       1.1  mrg 	.align	4
   1305       1.1  mrg 	.global	__gesf2
   1306       1.1  mrg 	.type	__gesf2, @function
   1307       1.1  mrg __gesf2:
   1308       1.1  mrg 	leaf_entry sp, 16
   1309       1.1  mrg 	movi	a6, 0x7f800000
   1310       1.1  mrg 	ball	a2, a6, 2f
   1311       1.1  mrg 1:	bnall	a3, a6, .Llt_cmp
   1312       1.1  mrg 
   1313       1.1  mrg 	/* Check if y is a NaN.  */
   1314       1.1  mrg 	slli	a7, a3, 9
   1315       1.1  mrg 	beqz	a7, .Llt_cmp
   1316       1.1  mrg 	movi	a2, -1
   1317       1.1  mrg 	leaf_return
   1318       1.1  mrg 
   1319       1.1  mrg 	/* Check if x is a NaN.  */
   1320       1.1  mrg 2:	slli	a7, a2, 9
   1321       1.1  mrg 	beqz	a7, 1b
   1322       1.1  mrg 	movi	a2, -1
   1323       1.1  mrg 	leaf_return
   1324       1.1  mrg 
   1325       1.1  mrg 
   1326       1.1  mrg 	/* Less Than */
   1327       1.1  mrg 
   1328       1.1  mrg 	.align	4
   1329       1.1  mrg 	.global	__ltsf2
   1330       1.1  mrg 	.type	__ltsf2, @function
   1331       1.1  mrg __ltsf2:
   1332       1.1  mrg 	leaf_entry sp, 16
   1333       1.1  mrg 	movi	a6, 0x7f800000
   1334       1.1  mrg 	ball	a2, a6, 2f
   1335       1.1  mrg 1:	bnall	a3, a6, .Llt_cmp
   1336       1.1  mrg 
   1337       1.1  mrg 	/* Check if y is a NaN.  */
   1338       1.1  mrg 	slli	a7, a3, 9
   1339       1.1  mrg 	beqz	a7, .Llt_cmp
   1340       1.1  mrg 	movi	a2, 0
   1341       1.1  mrg 	leaf_return
   1342       1.1  mrg 
   1343       1.1  mrg 	/* Check if x is a NaN.  */
   1344       1.1  mrg 2:	slli	a7, a2, 9
   1345       1.1  mrg 	beqz	a7, 1b
   1346       1.1  mrg 	movi	a2, 0
   1347       1.1  mrg 	leaf_return
   1348       1.1  mrg 
   1349       1.1  mrg .Llt_cmp:
   1350       1.1  mrg 	/* Check if x and y have different signs.  */
   1351       1.1  mrg 	xor	a7, a2, a3
   1352       1.1  mrg 	bltz	a7, .Llt_diff_signs
   1353       1.1  mrg 
   1354       1.1  mrg 	/* Check if x is negative.  */
   1355       1.1  mrg 	bltz	a2, .Llt_xneg
   1356       1.1  mrg 
   1357       1.1  mrg 	/* Check if x < y.  */
   1358       1.1  mrg 	bgeu	a2, a3, 5f
   1359       1.1  mrg 4:	movi	a2, -1
   1360       1.1  mrg 	leaf_return
   1361       1.1  mrg 
   1362       1.1  mrg .Llt_xneg:
   1363       1.1  mrg 	/* Check if y < x.  */
   1364       1.1  mrg 	bltu	a3, a2, 4b
   1365       1.1  mrg 5:	movi	a2, 0
   1366       1.1  mrg 	leaf_return
   1367       1.1  mrg 
   1368       1.1  mrg .Llt_diff_signs:
   1369       1.1  mrg 	bgez	a2, 5b
   1370       1.1  mrg 
   1371       1.1  mrg 	/* Check if both x and y are nonzero.  */
   1372       1.1  mrg 	or	a7, a2, a3
   1373       1.1  mrg 	slli	a7, a7, 1
   1374       1.1  mrg 	movi	a2, 0
   1375       1.1  mrg 	movi	a3, -1
   1376       1.1  mrg 	movnez	a2, a3, a7
   1377       1.1  mrg 	leaf_return
   1378       1.1  mrg 
   1379       1.1  mrg 
   1380       1.1  mrg 	/* Unordered */
   1381       1.1  mrg 
   1382       1.1  mrg 	.align	4
   1383       1.1  mrg 	.global	__unordsf2
   1384       1.1  mrg 	.type	__unordsf2, @function
   1385       1.1  mrg __unordsf2:
   1386       1.1  mrg 	leaf_entry sp, 16
   1387       1.1  mrg 	movi	a6, 0x7f800000
   1388       1.1  mrg 	ball	a2, a6, 3f
   1389       1.1  mrg 1:	ball	a3, a6, 4f
   1390       1.1  mrg 2:	movi	a2, 0
   1391       1.1  mrg 	leaf_return
   1392       1.1  mrg 
   1393       1.1  mrg 3:	slli	a7, a2, 9
   1394       1.1  mrg 	beqz	a7, 1b
   1395       1.1  mrg 	movi	a2, 1
   1396       1.1  mrg 	leaf_return
   1397       1.1  mrg 
   1398       1.1  mrg 4:	slli	a7, a3, 9
   1399       1.1  mrg 	beqz	a7, 2b
   1400       1.1  mrg 	movi	a2, 1
   1401       1.1  mrg 	leaf_return
   1402       1.1  mrg 
   1403       1.1  mrg #endif /* L_cmpsf2 */
   1404       1.1  mrg 
   1405       1.1  mrg #ifdef L_fixsfsi
   1406       1.1  mrg 
   1407       1.1  mrg 	.align	4
   1408       1.1  mrg 	.global	__fixsfsi
   1409       1.1  mrg 	.type	__fixsfsi, @function
   1410       1.1  mrg __fixsfsi:
   1411       1.1  mrg 	leaf_entry sp, 16
   1412       1.1  mrg 
   1413       1.1  mrg 	/* Check for NaN and Infinity.  */
   1414       1.1  mrg 	movi	a6, 0x7f800000
   1415       1.1  mrg 	ball	a2, a6, .Lfixsfsi_nan_or_inf
   1416       1.1  mrg 
   1417       1.1  mrg 	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
   1418       1.1  mrg 	extui	a4, a2, 23, 8
   1419       1.1  mrg 	addi	a4, a4, -0x7e
   1420       1.1  mrg 	bgei	a4, 32, .Lfixsfsi_maxint
   1421       1.1  mrg 	blti	a4, 1, .Lfixsfsi_zero
   1422       1.1  mrg 
   1423       1.1  mrg 	/* Add explicit "1.0" and shift << 8.  */
   1424       1.1  mrg 	or	a7, a2, a6
   1425       1.1  mrg 	slli	a5, a7, 8
   1426       1.1  mrg 
   1427       1.1  mrg 	/* Shift back to the right, based on the exponent.  */
   1428       1.1  mrg 	ssl	a4		/* shift by 32 - a4 */
   1429       1.1  mrg 	srl	a5, a5
   1430       1.1  mrg 
   1431       1.1  mrg 	/* Negate the result if sign != 0.  */
   1432       1.1  mrg 	neg	a2, a5
   1433       1.1  mrg 	movgez	a2, a5, a7
   1434       1.1  mrg 	leaf_return
   1435       1.1  mrg 
   1436       1.1  mrg .Lfixsfsi_nan_or_inf:
   1437       1.1  mrg 	/* Handle Infinity and NaN.  */
   1438       1.1  mrg 	slli	a4, a2, 9
   1439       1.1  mrg 	beqz	a4, .Lfixsfsi_maxint
   1440       1.1  mrg 
   1441       1.1  mrg 	/* Translate NaN to +maxint.  */
   1442       1.1  mrg 	movi	a2, 0
   1443       1.1  mrg 
   1444       1.1  mrg .Lfixsfsi_maxint:
   1445       1.1  mrg 	slli	a4, a6, 8	/* 0x80000000 */
   1446       1.1  mrg 	addi	a5, a4, -1	/* 0x7fffffff */
   1447       1.1  mrg 	movgez	a4, a5, a2
   1448       1.1  mrg 	mov	a2, a4
   1449       1.1  mrg 	leaf_return
   1450       1.1  mrg 
   1451       1.1  mrg .Lfixsfsi_zero:
   1452       1.1  mrg 	movi	a2, 0
   1453       1.1  mrg 	leaf_return
   1454       1.1  mrg 
   1455       1.1  mrg #endif /* L_fixsfsi */
   1456       1.1  mrg 
   1457       1.1  mrg #ifdef L_fixsfdi
   1458       1.1  mrg 
   1459       1.1  mrg 	.align	4
   1460       1.1  mrg 	.global	__fixsfdi
   1461       1.1  mrg 	.type	__fixsfdi, @function
   1462       1.1  mrg __fixsfdi:
   1463       1.1  mrg 	leaf_entry sp, 16
   1464       1.1  mrg 
   1465       1.1  mrg 	/* Check for NaN and Infinity.  */
   1466       1.1  mrg 	movi	a6, 0x7f800000
   1467       1.1  mrg 	ball	a2, a6, .Lfixsfdi_nan_or_inf
   1468       1.1  mrg 
   1469       1.1  mrg 	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
   1470       1.1  mrg 	extui	a4, a2, 23, 8
   1471       1.1  mrg 	addi	a4, a4, -0x7e
   1472       1.1  mrg 	bgei	a4, 64, .Lfixsfdi_maxint
   1473       1.1  mrg 	blti	a4, 1, .Lfixsfdi_zero
   1474       1.1  mrg 
   1475       1.1  mrg 	/* Add explicit "1.0" and shift << 8.  */
   1476       1.1  mrg 	or	a7, a2, a6
   1477       1.1  mrg 	slli	xh, a7, 8
   1478       1.1  mrg 
   1479       1.1  mrg 	/* Shift back to the right, based on the exponent.  */
   1480       1.1  mrg 	ssl	a4		/* shift by 64 - a4 */
   1481       1.1  mrg 	bgei	a4, 32, .Lfixsfdi_smallshift
   1482       1.1  mrg 	srl	xl, xh
   1483       1.1  mrg 	movi	xh, 0
   1484       1.1  mrg 
   1485       1.1  mrg .Lfixsfdi_shifted:
   1486       1.1  mrg 	/* Negate the result if sign != 0.  */
   1487       1.1  mrg 	bgez	a7, 1f
   1488       1.1  mrg 	neg	xl, xl
   1489       1.1  mrg 	neg	xh, xh
   1490       1.1  mrg 	beqz	xl, 1f
   1491       1.1  mrg 	addi	xh, xh, -1
   1492       1.1  mrg 1:	leaf_return
   1493       1.1  mrg 
   1494       1.1  mrg .Lfixsfdi_smallshift:
   1495       1.1  mrg 	movi	xl, 0
   1496       1.1  mrg 	sll	xl, xh
   1497       1.1  mrg 	srl	xh, xh
   1498       1.1  mrg 	j	.Lfixsfdi_shifted
   1499       1.1  mrg 
   1500       1.1  mrg .Lfixsfdi_nan_or_inf:
   1501       1.1  mrg 	/* Handle Infinity and NaN.  */
   1502       1.1  mrg 	slli	a4, a2, 9
   1503       1.1  mrg 	beqz	a4, .Lfixsfdi_maxint
   1504       1.1  mrg 
   1505       1.1  mrg 	/* Translate NaN to +maxint.  */
   1506       1.1  mrg 	movi	a2, 0
   1507       1.1  mrg 
   1508       1.1  mrg .Lfixsfdi_maxint:
   1509       1.1  mrg 	slli	a7, a6, 8	/* 0x80000000 */
   1510       1.1  mrg 	bgez	a2, 1f
   1511       1.1  mrg 	mov	xh, a7
   1512       1.1  mrg 	movi	xl, 0
   1513       1.1  mrg 	leaf_return
   1514       1.1  mrg 
   1515       1.1  mrg 1:	addi	xh, a7, -1	/* 0x7fffffff */
   1516       1.1  mrg 	movi	xl, -1
   1517       1.1  mrg 	leaf_return
   1518       1.1  mrg 
   1519       1.1  mrg .Lfixsfdi_zero:
   1520       1.1  mrg 	movi	xh, 0
   1521       1.1  mrg 	movi	xl, 0
   1522       1.1  mrg 	leaf_return
   1523       1.1  mrg 
   1524       1.1  mrg #endif /* L_fixsfdi */
   1525       1.1  mrg 
   1526       1.1  mrg #ifdef L_fixunssfsi
   1527       1.1  mrg 
   1528       1.1  mrg 	.align	4
   1529       1.1  mrg 	.global	__fixunssfsi
   1530       1.1  mrg 	.type	__fixunssfsi, @function
   1531       1.1  mrg __fixunssfsi:
   1532       1.1  mrg 	leaf_entry sp, 16
   1533       1.1  mrg 
   1534       1.1  mrg 	/* Check for NaN and Infinity.  */
   1535       1.1  mrg 	movi	a6, 0x7f800000
   1536       1.1  mrg 	ball	a2, a6, .Lfixunssfsi_nan_or_inf
   1537       1.1  mrg 
   1538       1.1  mrg 	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
   1539       1.1  mrg 	extui	a4, a2, 23, 8
   1540       1.1  mrg 	addi	a4, a4, -0x7f
   1541       1.1  mrg 	bgei	a4, 32, .Lfixunssfsi_maxint
   1542       1.1  mrg 	bltz	a4, .Lfixunssfsi_zero
   1543       1.1  mrg 
   1544       1.1  mrg 	/* Add explicit "1.0" and shift << 8.  */
   1545       1.1  mrg 	or	a7, a2, a6
   1546       1.1  mrg 	slli	a5, a7, 8
   1547       1.1  mrg 
   1548       1.1  mrg 	/* Shift back to the right, based on the exponent.  */
   1549       1.1  mrg 	addi	a4, a4, 1
   1550       1.1  mrg 	beqi	a4, 32, .Lfixunssfsi_bigexp
   1551       1.1  mrg 	ssl	a4		/* shift by 32 - a4 */
   1552       1.1  mrg 	srl	a5, a5
   1553       1.1  mrg 
   1554       1.1  mrg 	/* Negate the result if sign != 0.  */
   1555       1.1  mrg 	neg	a2, a5
   1556       1.1  mrg 	movgez	a2, a5, a7
   1557       1.1  mrg 	leaf_return
   1558       1.1  mrg 
   1559       1.1  mrg .Lfixunssfsi_nan_or_inf:
   1560       1.1  mrg 	/* Handle Infinity and NaN.  */
   1561       1.1  mrg 	slli	a4, a2, 9
   1562       1.1  mrg 	beqz	a4, .Lfixunssfsi_maxint
   1563       1.1  mrg 
   1564       1.1  mrg 	/* Translate NaN to 0xffffffff.  */
   1565       1.1  mrg 	movi	a2, -1
   1566       1.1  mrg 	leaf_return
   1567       1.1  mrg 
   1568       1.1  mrg .Lfixunssfsi_maxint:
   1569       1.1  mrg 	slli	a4, a6, 8	/* 0x80000000 */
   1570       1.1  mrg 	movi	a5, -1		/* 0xffffffff */
   1571       1.1  mrg 	movgez	a4, a5, a2
   1572       1.1  mrg 	mov	a2, a4
   1573       1.1  mrg 	leaf_return
   1574       1.1  mrg 
   1575       1.1  mrg .Lfixunssfsi_zero:
   1576       1.1  mrg 	movi	a2, 0
   1577       1.1  mrg 	leaf_return
   1578       1.1  mrg 
   1579       1.1  mrg .Lfixunssfsi_bigexp:
   1580       1.1  mrg 	/* Handle unsigned maximum exponent case.  */
   1581       1.1  mrg 	bltz	a2, 1f
   1582       1.1  mrg 	mov	a2, a5		/* no shift needed */
   1583       1.1  mrg 	leaf_return
   1584       1.1  mrg 
   1585       1.1  mrg 	/* Return 0x80000000 if negative.  */
   1586       1.1  mrg 1:	slli	a2, a6, 8
   1587       1.1  mrg 	leaf_return
   1588       1.1  mrg 
   1589       1.1  mrg #endif /* L_fixunssfsi */
   1590       1.1  mrg 
   1591       1.1  mrg #ifdef L_fixunssfdi
   1592       1.1  mrg 
   1593       1.1  mrg 	.align	4
   1594       1.1  mrg 	.global	__fixunssfdi
   1595       1.1  mrg 	.type	__fixunssfdi, @function
   1596       1.1  mrg __fixunssfdi:
   1597       1.1  mrg 	leaf_entry sp, 16
   1598       1.1  mrg 
   1599       1.1  mrg 	/* Check for NaN and Infinity.  */
   1600       1.1  mrg 	movi	a6, 0x7f800000
   1601       1.1  mrg 	ball	a2, a6, .Lfixunssfdi_nan_or_inf
   1602       1.1  mrg 
   1603       1.1  mrg 	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
   1604       1.1  mrg 	extui	a4, a2, 23, 8
   1605       1.1  mrg 	addi	a4, a4, -0x7f
   1606       1.1  mrg 	bgei	a4, 64, .Lfixunssfdi_maxint
   1607       1.1  mrg 	bltz	a4, .Lfixunssfdi_zero
   1608       1.1  mrg 
   1609       1.1  mrg 	/* Add explicit "1.0" and shift << 8.  */
   1610       1.1  mrg 	or	a7, a2, a6
   1611       1.1  mrg 	slli	xh, a7, 8
   1612       1.1  mrg 
   1613       1.1  mrg 	/* Shift back to the right, based on the exponent.  */
   1614       1.1  mrg 	addi	a4, a4, 1
   1615       1.1  mrg 	beqi	a4, 64, .Lfixunssfdi_bigexp
   1616       1.1  mrg 	ssl	a4		/* shift by 64 - a4 */
   1617       1.1  mrg 	bgei	a4, 32, .Lfixunssfdi_smallshift
   1618       1.1  mrg 	srl	xl, xh
   1619       1.1  mrg 	movi	xh, 0
   1620       1.1  mrg 
   1621       1.1  mrg .Lfixunssfdi_shifted:
   1622       1.1  mrg 	/* Negate the result if sign != 0.  */
   1623       1.1  mrg 	bgez	a7, 1f
   1624       1.1  mrg 	neg	xl, xl
   1625       1.1  mrg 	neg	xh, xh
   1626       1.1  mrg 	beqz	xl, 1f
   1627       1.1  mrg 	addi	xh, xh, -1
   1628       1.1  mrg 1:	leaf_return
   1629       1.1  mrg 
   1630       1.1  mrg .Lfixunssfdi_smallshift:
   1631       1.1  mrg 	movi	xl, 0
   1632       1.1  mrg 	src	xl, xh, xl
   1633       1.1  mrg 	srl	xh, xh
   1634       1.1  mrg 	j	.Lfixunssfdi_shifted
   1635       1.1  mrg 
   1636       1.1  mrg .Lfixunssfdi_nan_or_inf:
   1637       1.1  mrg 	/* Handle Infinity and NaN.  */
   1638       1.1  mrg 	slli	a4, a2, 9
   1639       1.1  mrg 	beqz	a4, .Lfixunssfdi_maxint
   1640       1.1  mrg 
   1641       1.1  mrg 	/* Translate NaN to 0xffffffff.... */
   1642       1.1  mrg 1:	movi	xh, -1
   1643       1.1  mrg 	movi	xl, -1
   1644       1.1  mrg 	leaf_return
   1645       1.1  mrg 
   1646       1.1  mrg .Lfixunssfdi_maxint:
   1647       1.1  mrg 	bgez	a2, 1b
   1648       1.1  mrg 2:	slli	xh, a6, 8	/* 0x80000000 */
   1649       1.1  mrg 	movi	xl, 0
   1650       1.1  mrg 	leaf_return
   1651       1.1  mrg 
   1652       1.1  mrg .Lfixunssfdi_zero:
   1653       1.1  mrg 	movi	xh, 0
   1654       1.1  mrg 	movi	xl, 0
   1655       1.1  mrg 	leaf_return
   1656       1.1  mrg 
   1657       1.1  mrg .Lfixunssfdi_bigexp:
   1658       1.1  mrg 	/* Handle unsigned maximum exponent case.  */
   1659       1.1  mrg 	bltz	a7, 2b
   1660       1.1  mrg 	movi	xl, 0
   1661       1.1  mrg 	leaf_return		/* no shift needed */
   1662       1.1  mrg 
   1663       1.1  mrg #endif /* L_fixunssfdi */
   1664       1.1  mrg 
   1665       1.1  mrg #ifdef L_floatsisf
   1666       1.1  mrg 
   1667       1.1  mrg 	.align	4
   1668       1.1  mrg 	.global	__floatunsisf
   1669       1.1  mrg 	.type	__floatunsisf, @function
   1670       1.1  mrg __floatunsisf:
   1671       1.1  mrg 	leaf_entry sp, 16
   1672       1.1  mrg 	beqz	a2, .Lfloatsisf_return
   1673       1.1  mrg 
   1674       1.1  mrg 	/* Set the sign to zero and jump to the floatsisf code.  */
   1675       1.1  mrg 	movi	a7, 0
   1676       1.1  mrg 	j	.Lfloatsisf_normalize
   1677       1.1  mrg 
   1678       1.1  mrg 	.align	4
   1679       1.1  mrg 	.global	__floatsisf
   1680       1.1  mrg 	.type	__floatsisf, @function
   1681       1.1  mrg __floatsisf:
   1682       1.1  mrg 	leaf_entry sp, 16
   1683       1.1  mrg 
   1684       1.1  mrg 	/* Check for zero.  */
   1685       1.1  mrg 	beqz	a2, .Lfloatsisf_return
   1686       1.1  mrg 
   1687       1.1  mrg 	/* Save the sign.  */
   1688       1.1  mrg 	extui	a7, a2, 31, 1
   1689       1.1  mrg 
   1690       1.1  mrg 	/* Get the absolute value.  */
   1691       1.1  mrg #if XCHAL_HAVE_ABS
   1692       1.1  mrg 	abs	a2, a2
   1693       1.1  mrg #else
   1694       1.1  mrg 	neg	a4, a2
   1695       1.1  mrg 	movltz	a2, a4, a2
   1696       1.1  mrg #endif
   1697       1.1  mrg 
   1698       1.1  mrg .Lfloatsisf_normalize:
   1699       1.1  mrg 	/* Normalize with the first 1 bit in the msb.  */
   1700       1.1  mrg 	do_nsau	a4, a2, a5, a6
   1701       1.1  mrg 	ssl	a4
   1702       1.1  mrg 	sll	a5, a2
   1703       1.1  mrg 
   1704       1.1  mrg 	/* Shift the mantissa into position, with rounding bits in a6.  */
   1705       1.1  mrg 	srli	a2, a5, 8
   1706       1.1  mrg 	slli	a6, a5, (32 - 8)
   1707       1.1  mrg 
   1708       1.1  mrg 	/* Set the exponent.  */
   1709       1.1  mrg 	movi	a5, 0x9d	/* 0x7e + 31 */
   1710       1.1  mrg 	sub	a5, a5, a4
   1711       1.1  mrg 	slli	a5, a5, 23
   1712       1.1  mrg 	add	a2, a2, a5
   1713       1.1  mrg 
   1714       1.1  mrg 	/* Add the sign.  */
   1715       1.1  mrg 	slli	a7, a7, 31
   1716       1.1  mrg 	or	a2, a2, a7
   1717       1.1  mrg 
   1718       1.1  mrg 	/* Round up if the leftover fraction is >= 1/2.  */
   1719       1.1  mrg 	bgez	a6, .Lfloatsisf_return
   1720       1.1  mrg 	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
   1721       1.1  mrg 
   1722       1.1  mrg 	/* Check if the leftover fraction is exactly 1/2.  */
   1723       1.1  mrg 	slli	a6, a6, 1
   1724       1.1  mrg 	beqz	a6, .Lfloatsisf_exactlyhalf
   1725       1.1  mrg 
   1726       1.1  mrg .Lfloatsisf_return:
   1727       1.1  mrg 	leaf_return
   1728       1.1  mrg 
   1729       1.1  mrg .Lfloatsisf_exactlyhalf:
   1730       1.1  mrg 	/* Round down to the nearest even value.  */
   1731       1.1  mrg 	srli	a2, a2, 1
   1732       1.1  mrg 	slli	a2, a2, 1
   1733       1.1  mrg 	leaf_return
   1734       1.1  mrg 
   1735       1.1  mrg #endif /* L_floatsisf */
   1736       1.1  mrg 
   1737       1.1  mrg #ifdef L_floatdisf
   1738       1.1  mrg 
   1739       1.1  mrg 	.align	4
   1740       1.1  mrg 	.global	__floatundisf
   1741       1.1  mrg 	.type	__floatundisf, @function
   1742       1.1  mrg __floatundisf:
   1743       1.1  mrg 	leaf_entry sp, 16
   1744       1.1  mrg 
   1745       1.1  mrg 	/* Check for zero.  */
   1746       1.1  mrg 	or	a4, xh, xl
   1747       1.1  mrg 	beqz	a4, 2f
   1748       1.1  mrg 
   1749       1.1  mrg 	/* Set the sign to zero and jump to the floatdisf code.  */
   1750       1.1  mrg 	movi	a7, 0
   1751       1.1  mrg 	j	.Lfloatdisf_normalize
   1752       1.1  mrg 
   1753       1.1  mrg 	.align	4
   1754       1.1  mrg 	.global	__floatdisf
   1755       1.1  mrg 	.type	__floatdisf, @function
   1756       1.1  mrg __floatdisf:
   1757       1.1  mrg 	leaf_entry sp, 16
   1758       1.1  mrg 
   1759       1.1  mrg 	/* Check for zero.  */
   1760       1.1  mrg 	or	a4, xh, xl
   1761       1.1  mrg 	beqz	a4, 2f
   1762       1.1  mrg 
   1763       1.1  mrg 	/* Save the sign.  */
   1764       1.1  mrg 	extui	a7, xh, 31, 1
   1765       1.1  mrg 
   1766       1.1  mrg 	/* Get the absolute value.  */
   1767       1.1  mrg 	bgez	xh, .Lfloatdisf_normalize
   1768       1.1  mrg 	neg	xl, xl
   1769       1.1  mrg 	neg	xh, xh
   1770       1.1  mrg 	beqz	xl, .Lfloatdisf_normalize
   1771       1.1  mrg 	addi	xh, xh, -1
   1772       1.1  mrg 
   1773       1.1  mrg .Lfloatdisf_normalize:
   1774       1.1  mrg 	/* Normalize with the first 1 bit in the msb of xh.  */
   1775       1.1  mrg 	beqz	xh, .Lfloatdisf_bigshift
   1776       1.1  mrg 	do_nsau	a4, xh, a5, a6
   1777       1.1  mrg 	ssl	a4
   1778       1.1  mrg 	src	xh, xh, xl
   1779       1.1  mrg 	sll	xl, xl
   1780       1.1  mrg 
   1781       1.1  mrg .Lfloatdisf_shifted:
   1782       1.1  mrg 	/* Shift the mantissa into position, with rounding bits in a6.  */
   1783       1.1  mrg 	ssai	8
   1784       1.1  mrg 	sll	a5, xl
   1785       1.1  mrg 	src	a6, xh, xl
   1786       1.1  mrg 	srl	xh, xh
   1787       1.1  mrg 	beqz	a5, 1f
   1788       1.1  mrg 	movi	a5, 1
   1789       1.1  mrg 	or	a6, a6, a5
   1790       1.1  mrg 1:
   1791       1.1  mrg 	/* Set the exponent.  */
   1792       1.1  mrg 	movi	a5, 0xbd	/* 0x7e + 63 */
   1793       1.1  mrg 	sub	a5, a5, a4
   1794       1.1  mrg 	slli	a5, a5, 23
   1795       1.1  mrg 	add	a2, xh, a5
   1796       1.1  mrg 
   1797       1.1  mrg 	/* Add the sign.  */
   1798       1.1  mrg 	slli	a7, a7, 31
   1799       1.1  mrg 	or	a2, a2, a7
   1800       1.1  mrg 
   1801       1.1  mrg 	/* Round up if the leftover fraction is >= 1/2.  */
   1802       1.1  mrg 	bgez	a6, 2f
   1803       1.1  mrg 	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
   1804       1.1  mrg 
   1805       1.1  mrg 	/* Check if the leftover fraction is exactly 1/2.  */
   1806       1.1  mrg 	slli	a6, a6, 1
   1807       1.1  mrg 	beqz	a6, .Lfloatdisf_exactlyhalf
   1808       1.1  mrg 2:	leaf_return
   1809       1.1  mrg 
   1810       1.1  mrg .Lfloatdisf_bigshift:
   1811       1.1  mrg 	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
   1812       1.1  mrg 	do_nsau	a4, xl, a5, a6
   1813       1.1  mrg 	ssl	a4
   1814       1.1  mrg 	sll	xh, xl
   1815       1.1  mrg 	movi	xl, 0
   1816       1.1  mrg 	addi	a4, a4, 32
   1817       1.1  mrg 	j	.Lfloatdisf_shifted
   1818       1.1  mrg 
   1819       1.1  mrg .Lfloatdisf_exactlyhalf:
   1820       1.1  mrg 	/* Round down to the nearest even value.  */
   1821       1.1  mrg 	srli	a2, a2, 1
   1822       1.1  mrg 	slli	a2, a2, 1
   1823       1.1  mrg 	leaf_return
   1824       1.1  mrg 
   1825       1.1  mrg #endif /* L_floatdisf */
   1826   1.1.1.6  mrg 
   1827   1.1.1.6  mrg #if XCHAL_HAVE_FP_SQRT
   1828   1.1.1.6  mrg #ifdef L_sqrtf
   1829   1.1.1.6  mrg 	/* Square root */
   1830   1.1.1.6  mrg 
   1831   1.1.1.6  mrg 	.align	4
   1832   1.1.1.6  mrg 	.global	__ieee754_sqrtf
   1833   1.1.1.6  mrg 	.type	__ieee754_sqrtf, @function
   1834   1.1.1.6  mrg __ieee754_sqrtf:
   1835   1.1.1.6  mrg 	leaf_entry	sp, 16
   1836   1.1.1.6  mrg 
   1837   1.1.1.6  mrg 	wfr		f1, a2
   1838   1.1.1.6  mrg 
   1839   1.1.1.6  mrg 	sqrt0.s		f2, f1
   1840   1.1.1.6  mrg 	const.s		f3, 0
   1841   1.1.1.6  mrg 	maddn.s		f3, f2, f2
   1842   1.1.1.6  mrg 	nexp01.s	f4, f1
   1843   1.1.1.6  mrg 	const.s		f0, 3
   1844   1.1.1.6  mrg 	addexp.s	f4, f0
   1845   1.1.1.6  mrg 	maddn.s		f0, f3, f4
   1846   1.1.1.6  mrg 	nexp01.s	f3, f1
   1847   1.1.1.6  mrg 	neg.s		f5, f3
   1848   1.1.1.6  mrg 	maddn.s		f2, f0, f2
   1849   1.1.1.6  mrg 	const.s		f0, 0
   1850   1.1.1.6  mrg 	const.s		f6, 0
   1851   1.1.1.6  mrg 	const.s		f7, 0
   1852   1.1.1.6  mrg 	maddn.s		f0, f5, f2
   1853   1.1.1.6  mrg 	maddn.s		f6, f2, f4
   1854   1.1.1.6  mrg 	const.s		f4, 3
   1855   1.1.1.6  mrg 	maddn.s		f7, f4, f2
   1856   1.1.1.6  mrg 	maddn.s		f3, f0, f0
   1857   1.1.1.6  mrg 	maddn.s		f4, f6, f2
   1858   1.1.1.6  mrg 	neg.s		f2, f7
   1859   1.1.1.6  mrg 	maddn.s		f0, f3, f2
   1860   1.1.1.6  mrg 	maddn.s		f7, f4, f7
   1861   1.1.1.6  mrg 	mksadj.s	f2, f1
   1862   1.1.1.6  mrg 	nexp01.s	f1, f1
   1863   1.1.1.6  mrg 	maddn.s		f1, f0, f0
   1864   1.1.1.6  mrg 	neg.s		f3, f7
   1865   1.1.1.6  mrg 	addexpm.s	f0, f2
   1866   1.1.1.6  mrg 	addexp.s	f3, f2
   1867   1.1.1.6  mrg 	divn.s		f0, f1, f3
   1868   1.1.1.6  mrg 
   1869   1.1.1.6  mrg 	rfr		a2, f0
   1870   1.1.1.6  mrg 
   1871   1.1.1.6  mrg 	leaf_return
   1872   1.1.1.6  mrg 
   1873   1.1.1.6  mrg #endif /* L_sqrtf */
   1874   1.1.1.6  mrg #endif /* XCHAL_HAVE_FP_SQRT */
   1875   1.1.1.6  mrg 
   1876   1.1.1.6  mrg #if XCHAL_HAVE_FP_RECIP
   1877   1.1.1.6  mrg #ifdef L_recipsf2
   1878   1.1.1.6  mrg 	/* Reciprocal */
   1879   1.1.1.6  mrg 
   1880   1.1.1.6  mrg 	.align	4
   1881   1.1.1.6  mrg 	.global	__recipsf2
   1882   1.1.1.6  mrg 	.type	__recipsf2, @function
   1883   1.1.1.6  mrg __recipsf2:
   1884   1.1.1.6  mrg 	leaf_entry	sp, 16
   1885   1.1.1.6  mrg 
   1886   1.1.1.6  mrg 	wfr		f1, a2
   1887   1.1.1.6  mrg 
   1888   1.1.1.6  mrg 	recip0.s	f0, f1
   1889   1.1.1.6  mrg 	const.s		f2, 1
   1890   1.1.1.6  mrg 	msub.s		f2, f1, f0
   1891   1.1.1.6  mrg 	maddn.s		f0, f0, f2
   1892   1.1.1.6  mrg 	const.s		f2, 1
   1893   1.1.1.6  mrg 	msub.s		f2, f1, f0
   1894   1.1.1.6  mrg 	maddn.s		f0, f0, f2
   1895   1.1.1.6  mrg 
   1896   1.1.1.6  mrg 	rfr		a2, f0
   1897   1.1.1.6  mrg 
   1898   1.1.1.6  mrg 	leaf_return
   1899   1.1.1.6  mrg 
   1900   1.1.1.6  mrg #endif /* L_recipsf2 */
   1901   1.1.1.6  mrg #endif /* XCHAL_HAVE_FP_RECIP */
   1902   1.1.1.6  mrg 
   1903   1.1.1.6  mrg #if XCHAL_HAVE_FP_RSQRT
   1904   1.1.1.6  mrg #ifdef L_rsqrtsf2
   1905   1.1.1.6  mrg 	/* Reciprocal square root */
   1906   1.1.1.6  mrg 
   1907   1.1.1.6  mrg 	.align	4
   1908   1.1.1.6  mrg 	.global	__rsqrtsf2
   1909   1.1.1.6  mrg 	.type	__rsqrtsf2, @function
   1910   1.1.1.6  mrg __rsqrtsf2:
   1911   1.1.1.6  mrg 	leaf_entry	sp, 16
   1912   1.1.1.6  mrg 
   1913   1.1.1.6  mrg 	wfr		f1, a2
   1914   1.1.1.6  mrg 
   1915   1.1.1.6  mrg 	rsqrt0.s	f0, f1
   1916   1.1.1.6  mrg 	mul.s		f2, f1, f0
   1917   1.1.1.6  mrg 	const.s		f3, 3;
   1918   1.1.1.6  mrg 	mul.s		f4, f3, f0
   1919   1.1.1.6  mrg 	const.s		f5, 1
   1920   1.1.1.6  mrg 	msub.s		f5, f2, f0
   1921   1.1.1.6  mrg 	maddn.s		f0, f4, f5
   1922   1.1.1.6  mrg 	mul.s		f2, f1, f0
   1923   1.1.1.6  mrg 	mul.s		f1, f3, f0
   1924   1.1.1.6  mrg 	const.s		f3, 1
   1925   1.1.1.6  mrg 	msub.s		f3, f2, f0
   1926   1.1.1.6  mrg 	maddn.s		f0, f1, f3
   1927   1.1.1.6  mrg 
   1928   1.1.1.6  mrg 	rfr		a2, f0
   1929   1.1.1.6  mrg 
   1930   1.1.1.6  mrg 	leaf_return
   1931   1.1.1.6  mrg 
   1932   1.1.1.6  mrg #endif /* L_rsqrtsf2 */
   1933   1.1.1.6  mrg #endif /* XCHAL_HAVE_FP_RSQRT */
   1934