Home | History | Annotate | Line # | Download | only in xtensa
      1 /* IEEE-754 double-precision functions for Xtensa
      2    Copyright (C) 2006-2024 Free Software Foundation, Inc.
      3    Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
      4 
      5    This file is part of GCC.
      6 
      7    GCC is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3, or (at your option)
     10    any later version.
     11 
     12    GCC is distributed in the hope that it will be useful, but WITHOUT
     13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     15    License for more details.
     16 
     17    Under Section 7 of GPL version 3, you are granted additional
     18    permissions described in the GCC Runtime Library Exception, version
     19    3.1, as published by the Free Software Foundation.
     20 
     21    You should have received a copy of the GNU General Public License and
     22    a copy of the GCC Runtime Library Exception along with this program;
     23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24    <http://www.gnu.org/licenses/>.  */
     25 
     26 #ifdef __XTENSA_EB__
     27 #define xh a2
     28 #define xl a3
     29 #define yh a4
     30 #define yl a5
     31 #else
     32 #define xh a3
     33 #define xl a2
     34 #define yh a5
     35 #define yl a4
     36 #endif
     37 
     38 /*  Warning!  The branch displacements for some Xtensa branch instructions
     39     are quite small, and this code has been carefully laid out to keep
     40     branch targets in range.  If you change anything, be sure to check that
     41     the assembler is not relaxing anything to branch over a jump.  */
     42 
     43 #ifdef L_negdf2
     44 
     45 	.align	4
     46 	.global	__negdf2
     47 	.type	__negdf2, @function
     48 __negdf2:
     49 	leaf_entry sp, 16
     50 	movi	a4, 0x80000000
     51 	xor	xh, xh, a4
     52 	leaf_return
     53 
     54 #endif /* L_negdf2 */
     55 
     56 #ifdef L_addsubdf3
     57 
     58 	.literal_position
     59 	/* Addition */
     60 __adddf3_aux:
     61 
     62 	/* Handle NaNs and Infinities.  (This code is placed before the
     63 	   start of the function just to keep it in range of the limited
     64 	   branch displacements.)  */
     65 
     66 .Ladd_xnan_or_inf:
     67 	/* If y is neither Infinity nor NaN, return x.  */
     68 	bnall	yh, a6, .Ladd_return_nan_or_inf
     69 	/* If x is a NaN, return it.  Otherwise, return y.  */
     70 	slli	a7, xh, 12
     71 	or	a7, a7, xl
     72 	bnez	a7, .Ladd_return_nan
     73 
     74 .Ladd_ynan_or_inf:
     75 	/* Return y.  */
     76 	mov	xh, yh
     77 	mov	xl, yl
     78 
     79 .Ladd_return_nan_or_inf:
     80 	slli	a7, xh, 12
     81 	or	a7, a7, xl
     82 	bnez	a7, .Ladd_return_nan
     83 	leaf_return
     84 
     85 .Ladd_return_nan:
     86 	movi	a4, 0x80000	/* make it a quiet NaN */
     87 	or	xh, xh, a4
     88 	leaf_return
     89 
     90 .Ladd_opposite_signs:
     91 	/* Operand signs differ.  Do a subtraction.  */
     92 	slli	a7, a6, 11
     93 	xor	yh, yh, a7
     94 	j	.Lsub_same_sign
     95 
     96 	.align	4
     97 	.global	__adddf3
     98 	.type	__adddf3, @function
     99 __adddf3:
    100 	leaf_entry sp, 16
    101 	movi	a6, 0x7ff00000
    102 
    103 	/* Check if the two operands have the same sign.  */
    104 	xor	a7, xh, yh
    105 	bltz	a7, .Ladd_opposite_signs
    106 
    107 .Ladd_same_sign:
    108 	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
    109 	ball	xh, a6, .Ladd_xnan_or_inf
    110 	ball	yh, a6, .Ladd_ynan_or_inf
    111 
    112 	/* Compare the exponents.  The smaller operand will be shifted
    113 	   right by the exponent difference and added to the larger
    114 	   one.  */
    115 	extui	a7, xh, 20, 12
    116 	extui	a8, yh, 20, 12
    117 	bltu	a7, a8, .Ladd_shiftx
    118 
    119 .Ladd_shifty:
    120 	/* Check if the smaller (or equal) exponent is zero.  */
    121 	bnone	yh, a6, .Ladd_yexpzero
    122 
    123 	/* Replace yh sign/exponent with 0x001.  */
    124 	or	yh, yh, a6
    125 	slli	yh, yh, 11
    126 	srli	yh, yh, 11
    127 
    128 .Ladd_yexpdiff:
    129 	/* Compute the exponent difference.  Optimize for difference < 32.  */
    130 	sub	a10, a7, a8
    131 	bgeui	a10, 32, .Ladd_bigshifty
    132 
    133 	/* Shift yh/yl right by the exponent difference.  Any bits that are
    134 	   shifted out of yl are saved in a9 for rounding the result.  */
    135 	ssr	a10
    136 	movi	a9, 0
    137 	src	a9, yl, a9
    138 	src	yl, yh, yl
    139 	srl	yh, yh
    140 
    141 .Ladd_addy:
    142 	/* Do the 64-bit addition.  */
    143 	add	xl, xl, yl
    144 	add	xh, xh, yh
    145 	bgeu	xl, yl, 1f
    146 	addi	xh, xh, 1
    147 1:
    148 	/* Check if the add overflowed into the exponent.  */
    149 	extui	a10, xh, 20, 12
    150 	beq	a10, a7, .Ladd_round
    151 	mov	a8, a7
    152 	j	.Ladd_carry
    153 
    154 .Ladd_yexpzero:
    155 	/* y is a subnormal value.  Replace its sign/exponent with zero,
    156 	   i.e., no implicit "1.0", and increment the apparent exponent
    157 	   because subnormals behave as if they had the minimum (nonzero)
    158 	   exponent.  Test for the case when both exponents are zero.  */
    159 	slli	yh, yh, 12
    160 	srli	yh, yh, 12
    161 	bnone	xh, a6, .Ladd_bothexpzero
    162 	addi	a8, a8, 1
    163 	j	.Ladd_yexpdiff
    164 
    165 .Ladd_bothexpzero:
    166 	/* Both exponents are zero.  Handle this as a special case.  There
    167 	   is no need to shift or round, and the normal code for handling
    168 	   a carry into the exponent field will not work because it
    169 	   assumes there is an implicit "1.0" that needs to be added.  */
    170 	add	xl, xl, yl
    171 	add	xh, xh, yh
    172 	bgeu	xl, yl, 1f
    173 	addi	xh, xh, 1
    174 1:	leaf_return
    175 
    176 .Ladd_bigshifty:
    177 	/* Exponent difference > 64 -- just return the bigger value.  */
    178 	bgeui	a10, 64, 1b
    179 
    180 	/* Shift yh/yl right by the exponent difference.  Any bits that are
    181 	   shifted out are saved in a9 for rounding the result.  */
    182 	ssr	a10
    183 	sll	a11, yl		/* lost bits shifted out of yl */
    184 	src	a9, yh, yl
    185 	srl	yl, yh
    186 	movi	yh, 0
    187 	beqz	a11, .Ladd_addy
    188 	or	a9, a9, a10	/* any positive, nonzero value will work */
    189 	j	.Ladd_addy
    190 
    191 .Ladd_xexpzero:
    192 	/* Same as "yexpzero" except skip handling the case when both
    193 	   exponents are zero.  */
    194 	slli	xh, xh, 12
    195 	srli	xh, xh, 12
    196 	addi	a7, a7, 1
    197 	j	.Ladd_xexpdiff
    198 
    199 .Ladd_shiftx:
    200 	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
    201 	   because the exponent difference is always nonzero in this version,
    202 	   the shift sequence can use SLL and skip loading a constant zero.  */
    203 	bnone	xh, a6, .Ladd_xexpzero
    204 
    205 	or	xh, xh, a6
    206 	slli	xh, xh, 11
    207 	srli	xh, xh, 11
    208 
    209 .Ladd_xexpdiff:
    210 	sub	a10, a8, a7
    211 	bgeui	a10, 32, .Ladd_bigshiftx
    212 
    213 	ssr	a10
    214 	sll	a9, xl
    215 	src	xl, xh, xl
    216 	srl	xh, xh
    217 
    218 .Ladd_addx:
    219 	add	xl, xl, yl
    220 	add	xh, xh, yh
    221 	bgeu	xl, yl, 1f
    222 	addi	xh, xh, 1
    223 1:
    224 	/* Check if the add overflowed into the exponent.  */
    225 	extui	a10, xh, 20, 12
    226 	bne	a10, a8, .Ladd_carry
    227 
    228 .Ladd_round:
    229 	/* Round up if the leftover fraction is >= 1/2.  */
    230 	bgez	a9, 1f
    231 	addi	xl, xl, 1
    232 	beqz	xl, .Ladd_roundcarry
    233 
    234 	/* Check if the leftover fraction is exactly 1/2.  */
    235 	slli	a9, a9, 1
    236 	beqz	a9, .Ladd_exactlyhalf
    237 1:	leaf_return
    238 
    239 .Ladd_bigshiftx:
    240 	/* Mostly the same thing as "bigshifty"....  */
    241 	bgeui	a10, 64, .Ladd_returny
    242 
    243 	ssr	a10
    244 	sll	a11, xl
    245 	src	a9, xh, xl
    246 	srl	xl, xh
    247 	movi	xh, 0
    248 	beqz	a11, .Ladd_addx
    249 	or	a9, a9, a10
    250 	j	.Ladd_addx
    251 
    252 .Ladd_returny:
    253 	mov	xh, yh
    254 	mov	xl, yl
    255 	leaf_return
    256 
    257 .Ladd_carry:
    258 	/* The addition has overflowed into the exponent field, so the
    259 	   value needs to be renormalized.  The mantissa of the result
    260 	   can be recovered by subtracting the original exponent and
    261 	   adding 0x100000 (which is the explicit "1.0" for the
    262 	   mantissa of the non-shifted operand -- the "1.0" for the
    263 	   shifted operand was already added).  The mantissa can then
    264 	   be shifted right by one bit.  The explicit "1.0" of the
    265 	   shifted mantissa then needs to be replaced by the exponent,
    266 	   incremented by one to account for the normalizing shift.
    267 	   It is faster to combine these operations: do the shift first
    268 	   and combine the additions and subtractions.  If x is the
    269 	   original exponent, the result is:
    270 	       shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
    271 	   or:
    272 	       shifted mantissa + ((x + 1) << 19)
    273 	   Note that the exponent is incremented here by leaving the
    274 	   explicit "1.0" of the mantissa in the exponent field.  */
    275 
    276 	/* Shift xh/xl right by one bit.  Save the lsb of xl.  */
    277 	mov	a10, xl
    278 	ssai	1
    279 	src	xl, xh, xl
    280 	srl	xh, xh
    281 
    282 	/* See explanation above.  The original exponent is in a8.  */
    283 	addi	a8, a8, 1
    284 	slli	a8, a8, 19
    285 	add	xh, xh, a8
    286 
    287 	/* Return an Infinity if the exponent overflowed.  */
    288 	ball	xh, a6, .Ladd_infinity
    289 
    290 	/* Same thing as the "round" code except the msb of the leftover
    291 	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
    292 	bbci.l	a10, 0, 1f
    293 	addi	xl, xl, 1
    294 	beqz	xl, .Ladd_roundcarry
    295 	beqz	a9, .Ladd_exactlyhalf
    296 1:	leaf_return
    297 
    298 .Ladd_infinity:
    299 	/* Clear the mantissa.  */
    300 	movi	xl, 0
    301 	srli	xh, xh, 20
    302 	slli	xh, xh, 20
    303 
    304 	/* The sign bit may have been lost in a carry-out.  Put it back.  */
    305 	slli	a8, a8, 1
    306 	or	xh, xh, a8
    307 	leaf_return
    308 
    309 .Ladd_exactlyhalf:
    310 	/* Round down to the nearest even value.  */
    311 	srli	xl, xl, 1
    312 	slli	xl, xl, 1
    313 	leaf_return
    314 
    315 .Ladd_roundcarry:
    316 	/* xl is always zero when the rounding increment overflows, so
    317 	   there's no need to round it to an even value.  */
    318 	addi	xh, xh, 1
    319 	/* Overflow to the exponent is OK.  */
    320 	leaf_return
    321 
    322 
    323 	/* Subtraction */
    324 __subdf3_aux:
    325 
    326 	/* Handle NaNs and Infinities.  (This code is placed before the
    327 	   start of the function just to keep it in range of the limited
    328 	   branch displacements.)  */
    329 
    330 .Lsub_xnan_or_inf:
    331 	/* If y is neither Infinity nor NaN, return x.  */
    332 	bnall	yh, a6, .Lsub_return_nan_or_inf
    333 
    334 .Lsub_return_nan:
    335 	/* Both x and y are either NaN or Inf, so the result is NaN.  */
    336 	movi	a4, 0x80000	/* make it a quiet NaN */
    337 	or	xh, xh, a4
    338 	leaf_return
    339 
    340 .Lsub_ynan_or_inf:
    341 	/* Negate y and return it.  */
    342 	slli	a7, a6, 11
    343 	xor	xh, yh, a7
    344 	mov	xl, yl
    345 
    346 .Lsub_return_nan_or_inf:
    347 	slli	a7, xh, 12
    348 	or	a7, a7, xl
    349 	bnez	a7, .Lsub_return_nan
    350 	leaf_return
    351 
    352 .Lsub_opposite_signs:
    353 	/* Operand signs differ.  Do an addition.  */
    354 	slli	a7, a6, 11
    355 	xor	yh, yh, a7
    356 	j	.Ladd_same_sign
    357 
    358 	.align	4
    359 	.global	__subdf3
    360 	.type	__subdf3, @function
    361 __subdf3:
    362 	leaf_entry sp, 16
    363 	movi	a6, 0x7ff00000
    364 
    365 	/* Check if the two operands have the same sign.  */
    366 	xor	a7, xh, yh
    367 	bltz	a7, .Lsub_opposite_signs
    368 
    369 .Lsub_same_sign:
    370 	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
    371 	ball	xh, a6, .Lsub_xnan_or_inf
    372 	ball	yh, a6, .Lsub_ynan_or_inf
    373 
    374 	/* Compare the operands.  In contrast to addition, the entire
    375 	   value matters here.  */
    376 	extui	a7, xh, 20, 11
    377 	extui	a8, yh, 20, 11
    378 	bltu	xh, yh, .Lsub_xsmaller
    379 	beq	xh, yh, .Lsub_compare_low
    380 
    381 .Lsub_ysmaller:
    382 	/* Check if the smaller (or equal) exponent is zero.  */
    383 	bnone	yh, a6, .Lsub_yexpzero
    384 
    385 	/* Replace yh sign/exponent with 0x001.  */
    386 	or	yh, yh, a6
    387 	slli	yh, yh, 11
    388 	srli	yh, yh, 11
    389 
    390 .Lsub_yexpdiff:
    391 	/* Compute the exponent difference.  Optimize for difference < 32.  */
    392 	sub	a10, a7, a8
    393 	bgeui	a10, 32, .Lsub_bigshifty
    394 
    395 	/* Shift yh/yl right by the exponent difference.  Any bits that are
    396 	   shifted out of yl are saved in a9 for rounding the result.  */
    397 	ssr	a10
    398 	movi	a9, 0
    399 	src	a9, yl, a9
    400 	src	yl, yh, yl
    401 	srl	yh, yh
    402 
    403 .Lsub_suby:
    404 	/* Do the 64-bit subtraction.  */
    405 	sub	xh, xh, yh
    406 	bgeu	xl, yl, 1f
    407 	addi	xh, xh, -1
    408 1:	sub	xl, xl, yl
    409 
    410 	/* Subtract the leftover bits in a9 from zero and propagate any
    411 	   borrow from xh/xl.  */
    412 	neg	a9, a9
    413 	beqz	a9, 1f
    414 	addi	a5, xh, -1
    415 	moveqz	xh, a5, xl
    416 	addi	xl, xl, -1
    417 1:
    418 	/* Check if the subtract underflowed into the exponent.  */
    419 	extui	a10, xh, 20, 11
    420 	beq	a10, a7, .Lsub_round
    421 	j	.Lsub_borrow
    422 
    423 .Lsub_compare_low:
    424 	/* The high words are equal.  Compare the low words.  */
    425 	bltu	xl, yl, .Lsub_xsmaller
    426 	bltu	yl, xl, .Lsub_ysmaller
    427 	/* The operands are equal.  Return 0.0.  */
    428 	movi	xh, 0
    429 	movi	xl, 0
    430 1:	leaf_return
    431 
    432 .Lsub_yexpzero:
    433 	/* y is a subnormal value.  Replace its sign/exponent with zero,
    434 	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
    435 	   y's apparent exponent because subnormals behave as if they had
    436 	   the minimum (nonzero) exponent.  */
    437 	slli	yh, yh, 12
    438 	srli	yh, yh, 12
    439 	bnone	xh, a6, .Lsub_yexpdiff
    440 	addi	a8, a8, 1
    441 	j	.Lsub_yexpdiff
    442 
    443 .Lsub_bigshifty:
    444 	/* Exponent difference > 64 -- just return the bigger value.  */
    445 	bgeui	a10, 64, 1b
    446 
    447 	/* Shift yh/yl right by the exponent difference.  Any bits that are
    448 	   shifted out are saved in a9 for rounding the result.  */
    449 	ssr	a10
    450 	sll	a11, yl		/* lost bits shifted out of yl */
    451 	src	a9, yh, yl
    452 	srl	yl, yh
    453 	movi	yh, 0
    454 	beqz	a11, .Lsub_suby
    455 	or	a9, a9, a10	/* any positive, nonzero value will work */
    456 	j	.Lsub_suby
    457 
    458 .Lsub_xsmaller:
    459 	/* Same thing as the "ysmaller" code, but with x and y swapped and
    460 	   with y negated.  */
    461 	bnone	xh, a6, .Lsub_xexpzero
    462 
    463 	or	xh, xh, a6
    464 	slli	xh, xh, 11
    465 	srli	xh, xh, 11
    466 
    467 .Lsub_xexpdiff:
    468 	sub	a10, a8, a7
    469 	bgeui	a10, 32, .Lsub_bigshiftx
    470 
    471 	ssr	a10
    472 	movi	a9, 0
    473 	src	a9, xl, a9
    474 	src	xl, xh, xl
    475 	srl	xh, xh
    476 
    477 	/* Negate y.  */
    478 	slli	a11, a6, 11
    479 	xor	yh, yh, a11
    480 
    481 .Lsub_subx:
    482 	sub	xl, yl, xl
    483 	sub	xh, yh, xh
    484 	bgeu	yl, xl, 1f
    485 	addi	xh, xh, -1
    486 1:
    487 	/* Subtract the leftover bits in a9 from zero and propagate any
    488 	   borrow from xh/xl.  */
    489 	neg	a9, a9
    490 	beqz	a9, 1f
    491 	addi	a5, xh, -1
    492 	moveqz	xh, a5, xl
    493 	addi	xl, xl, -1
    494 1:
    495 	/* Check if the subtract underflowed into the exponent.  */
    496 	extui	a10, xh, 20, 11
    497 	bne	a10, a8, .Lsub_borrow
    498 
    499 .Lsub_round:
    500 	/* Round up if the leftover fraction is >= 1/2.  */
    501 	bgez	a9, 1f
    502 	addi	xl, xl, 1
    503 	beqz	xl, .Lsub_roundcarry
    504 
    505 	/* Check if the leftover fraction is exactly 1/2.  */
    506 	slli	a9, a9, 1
    507 	beqz	a9, .Lsub_exactlyhalf
    508 1:	leaf_return
    509 
    510 .Lsub_xexpzero:
    511 	/* Same as "yexpzero".  */
    512 	slli	xh, xh, 12
    513 	srli	xh, xh, 12
    514 	bnone	yh, a6, .Lsub_xexpdiff
    515 	addi	a7, a7, 1
    516 	j	.Lsub_xexpdiff
    517 
    518 .Lsub_bigshiftx:
    519 	/* Mostly the same thing as "bigshifty", but with the sign bit of the
    520 	   shifted value set so that the subsequent subtraction flips the
    521 	   sign of y.  */
    522 	bgeui	a10, 64, .Lsub_returny
    523 
    524 	ssr	a10
    525 	sll	a11, xl
    526 	src	a9, xh, xl
    527 	srl	xl, xh
    528 	slli	xh, a6, 11	/* set sign bit of xh */
    529 	beqz	a11, .Lsub_subx
    530 	or	a9, a9, a10
    531 	j	.Lsub_subx
    532 
    533 .Lsub_returny:
    534 	/* Negate and return y.  */
    535 	slli	a7, a6, 11
    536 	xor	xh, yh, a7
    537 	mov	xl, yl
    538 	leaf_return
    539 
    540 .Lsub_borrow:
    541 	/* The subtraction has underflowed into the exponent field, so the
    542 	   value needs to be renormalized.  Shift the mantissa left as
    543 	   needed to remove any leading zeros and adjust the exponent
    544 	   accordingly.  If the exponent is not large enough to remove
    545 	   all the leading zeros, the result will be a subnormal value.  */
    546 
    547 	slli	a8, xh, 12
    548 	beqz	a8, .Lsub_xhzero
    549 	do_nsau	a6, a8, a7, a11
    550 	srli	a8, a8, 12
    551 	bge	a6, a10, .Lsub_subnormal
    552 	addi	a6, a6, 1
    553 
    554 .Lsub_shift_lt32:
    555 	/* Shift the mantissa (a8/xl/a9) left by a6.  */
    556 	ssl	a6
    557 	src	a8, a8, xl
    558 	src	xl, xl, a9
    559 	sll	a9, a9
    560 
    561 	/* Combine the shifted mantissa with the sign and exponent,
    562 	   decrementing the exponent by a6.  (The exponent has already
    563 	   been decremented by one due to the borrow from the subtraction,
    564 	   but adding the mantissa will increment the exponent by one.)  */
    565 	srli	xh, xh, 20
    566 	sub	xh, xh, a6
    567 	slli	xh, xh, 20
    568 	add	xh, xh, a8
    569 	j	.Lsub_round
    570 
    571 .Lsub_exactlyhalf:
    572 	/* Round down to the nearest even value.  */
    573 	srli	xl, xl, 1
    574 	slli	xl, xl, 1
    575 	leaf_return
    576 
    577 .Lsub_roundcarry:
    578 	/* xl is always zero when the rounding increment overflows, so
    579 	   there's no need to round it to an even value.  */
    580 	addi	xh, xh, 1
    581 	/* Overflow to the exponent is OK.  */
    582 	leaf_return
    583 
    584 .Lsub_xhzero:
    585 	/* When normalizing the result, all the mantissa bits in the high
    586 	   word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
    587 	do_nsau	a6, xl, a7, a11
    588 	addi	a6, a6, 21
    589 	blt	a10, a6, .Lsub_subnormal
    590 
    591 .Lsub_normalize_shift:
    592 	bltui	a6, 32, .Lsub_shift_lt32
    593 
    594 	ssl	a6
    595 	src	a8, xl, a9
    596 	sll	xl, a9
    597 	movi	a9, 0
    598 
    599 	srli	xh, xh, 20
    600 	sub	xh, xh, a6
    601 	slli	xh, xh, 20
    602 	add	xh, xh, a8
    603 	j	.Lsub_round
    604 
    605 .Lsub_subnormal:
    606 	/* The exponent is too small to shift away all the leading zeros.
    607 	   Set a6 to the current exponent (which has already been
    608 	   decremented by the borrow) so that the exponent of the result
    609 	   will be zero.  Do not add 1 to a6 in this case, because: (1)
    610 	   adding the mantissa will not increment the exponent, so there is
    611 	   no need to subtract anything extra from the exponent to
    612 	   compensate, and (2) the effective exponent of a subnormal is 1
    613 	   not 0 so the shift amount must be 1 smaller than normal. */
    614 	mov	a6, a10
    615 	j	.Lsub_normalize_shift
    616 
    617 #endif /* L_addsubdf3 */
    618 
    619 #ifdef L_muldf3
    620 
    621 	/* Multiplication */
    622 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
    623 #define XCHAL_NO_MUL 1
    624 #endif
    625 
    626 	.literal_position
    627 __muldf3_aux:
    628 
    629 	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
    630 	   (This code is placed before the start of the function just to
    631 	   keep it in range of the limited branch displacements.)  */
    632 
    633 .Lmul_xexpzero:
    634 	/* Clear the sign bit of x.  */
    635 	slli	xh, xh, 1
    636 	srli	xh, xh, 1
    637 
    638 	/* If x is zero, return zero.  */
    639 	or	a10, xh, xl
    640 	beqz	a10, .Lmul_return_zero
    641 
    642 	/* Normalize x.  Adjust the exponent in a8.  */
    643 	beqz	xh, .Lmul_xh_zero
    644 	do_nsau	a10, xh, a11, a12
    645 	addi	a10, a10, -11
    646 	ssl	a10
    647 	src	xh, xh, xl
    648 	sll	xl, xl
    649 	movi	a8, 1
    650 	sub	a8, a8, a10
    651 	j	.Lmul_xnormalized
    652 .Lmul_xh_zero:
    653 	do_nsau	a10, xl, a11, a12
    654 	addi	a10, a10, -11
    655 	movi	a8, -31
    656 	sub	a8, a8, a10
    657 	ssl	a10
    658 	bltz	a10, .Lmul_xl_srl
    659 	sll	xh, xl
    660 	movi	xl, 0
    661 	j	.Lmul_xnormalized
    662 .Lmul_xl_srl:
    663 	srl	xh, xl
    664 	sll	xl, xl
    665 	j	.Lmul_xnormalized
    666 
    667 .Lmul_yexpzero:
    668 	/* Clear the sign bit of y.  */
    669 	slli	yh, yh, 1
    670 	srli	yh, yh, 1
    671 
    672 	/* If y is zero, return zero.  */
    673 	or	a10, yh, yl
    674 	beqz	a10, .Lmul_return_zero
    675 
    676 	/* Normalize y.  Adjust the exponent in a9.  */
    677 	beqz	yh, .Lmul_yh_zero
    678 	do_nsau	a10, yh, a11, a12
    679 	addi	a10, a10, -11
    680 	ssl	a10
    681 	src	yh, yh, yl
    682 	sll	yl, yl
    683 	movi	a9, 1
    684 	sub	a9, a9, a10
    685 	j	.Lmul_ynormalized
    686 .Lmul_yh_zero:
    687 	do_nsau	a10, yl, a11, a12
    688 	addi	a10, a10, -11
    689 	movi	a9, -31
    690 	sub	a9, a9, a10
    691 	ssl	a10
    692 	bltz	a10, .Lmul_yl_srl
    693 	sll	yh, yl
    694 	movi	yl, 0
    695 	j	.Lmul_ynormalized
    696 .Lmul_yl_srl:
    697 	srl	yh, yl
    698 	sll	yl, yl
    699 	j	.Lmul_ynormalized
    700 
    701 .Lmul_return_zero:
    702 	/* Return zero with the appropriate sign bit.  */
    703 	srli	xh, a7, 31
    704 	slli	xh, xh, 31
    705 	movi	xl, 0
    706 	j	.Lmul_done
    707 
    708 .Lmul_xnan_or_inf:
    709 	/* If y is zero, return NaN.  */
    710 	bnez	yl, 1f
    711 	slli	a8, yh, 1
    712 	beqz	a8, .Lmul_return_nan
    713 1:
    714 	/* If y is NaN, return y.  */
    715 	bnall	yh, a6, .Lmul_returnx
    716 	slli	a8, yh, 12
    717 	or	a8, a8, yl
    718 	beqz	a8, .Lmul_returnx
    719 
    720 .Lmul_returny:
    721 	mov	xh, yh
    722 	mov	xl, yl
    723 
    724 .Lmul_returnx:
    725 	slli	a8, xh, 12
    726 	or	a8, a8, xl
    727 	bnez	a8, .Lmul_return_nan
    728 	/* Set the sign bit and return.  */
    729 	extui	a7, a7, 31, 1
    730 	slli	xh, xh, 1
    731 	ssai	1
    732 	src	xh, a7, xh
    733 	j	.Lmul_done
    734 
    735 .Lmul_ynan_or_inf:
    736 	/* If x is zero, return NaN.  */
    737 	bnez	xl, .Lmul_returny
    738 	slli	a8, xh, 1
    739 	bnez	a8, .Lmul_returny
    740 	mov	xh, yh
    741 
    742 .Lmul_return_nan:
    743 	movi	a4, 0x80000	/* make it a quiet NaN */
    744 	or	xh, xh, a4
    745 	j	.Lmul_done
    746 
    747 	.align	4
    748 	.global	__muldf3
    749 	.type	__muldf3, @function
    750 __muldf3:
    751 #if __XTENSA_CALL0_ABI__
    752 	leaf_entry sp, 32
    753 	addi	sp, sp, -32
    754 	s32i	a12, sp, 16
    755 	s32i	a13, sp, 20
    756 	s32i	a14, sp, 24
    757 	s32i	a15, sp, 28
    758 #elif XCHAL_NO_MUL
    759 	/* This is not really a leaf function; allocate enough stack space
    760 	   to allow CALL12s to a helper function.  */
    761 	leaf_entry sp, 64
    762 #else
    763 	leaf_entry sp, 32
    764 #endif
    765 	movi	a6, 0x7ff00000
    766 
    767 	/* Get the sign of the result.  */
    768 	xor	a7, xh, yh
    769 
    770 	/* Check for NaN and infinity.  */
    771 	ball	xh, a6, .Lmul_xnan_or_inf
    772 	ball	yh, a6, .Lmul_ynan_or_inf
    773 
    774 	/* Extract the exponents.  */
    775 	extui	a8, xh, 20, 11
    776 	extui	a9, yh, 20, 11
    777 
    778 	beqz	a8, .Lmul_xexpzero
    779 .Lmul_xnormalized:
    780 	beqz	a9, .Lmul_yexpzero
    781 .Lmul_ynormalized:
    782 
    783 	/* Add the exponents.  */
    784 	add	a8, a8, a9
    785 
    786 	/* Replace sign/exponent fields with explicit "1.0".  */
    787 	movi	a10, 0x1fffff
    788 	or	xh, xh, a6
    789 	and	xh, xh, a10
    790 	or	yh, yh, a6
    791 	and	yh, yh, a10
    792 
    793 	/* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
    794 	   The least-significant word of the result is thrown away except
    795 	   that if it is nonzero, the lsb of a6 is set to 1.  */
    796 #if XCHAL_HAVE_MUL32_HIGH
    797 
    798 	/* Compute a6 with any carry-outs in a10.  */
    799 	movi	a10, 0
    800 	mull	a6, xl, yh
    801 	mull	a11, xh, yl
    802 	add	a6, a6, a11
    803 	bgeu	a6, a11, 1f
    804 	addi	a10, a10, 1
    805 1:
    806 	muluh	a11, xl, yl
    807 	add	a6, a6, a11
    808 	bgeu	a6, a11, 1f
    809 	addi	a10, a10, 1
    810 1:
    811 	/* If the low word of the result is nonzero, set the lsb of a6.  */
    812 	mull	a11, xl, yl
    813 	beqz	a11, 1f
    814 	movi	a9, 1
    815 	or	a6, a6, a9
    816 1:
    817 	/* Compute xl with any carry-outs in a9.  */
    818 	movi	a9, 0
    819 	mull	a11, xh, yh
    820 	add	a10, a10, a11
    821 	bgeu	a10, a11, 1f
    822 	addi	a9, a9, 1
    823 1:
    824 	muluh	a11, xh, yl
    825 	add	a10, a10, a11
    826 	bgeu	a10, a11, 1f
    827 	addi	a9, a9, 1
    828 1:
    829 	muluh	xl, xl, yh
    830 	add	xl, xl, a10
    831 	bgeu	xl, a10, 1f
    832 	addi	a9, a9, 1
    833 1:
    834 	/* Compute xh.  */
    835 	muluh	xh, xh, yh
    836 	add	xh, xh, a9
    837 
    838 #else /* ! XCHAL_HAVE_MUL32_HIGH */
    839 
    840 	/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
    841 	   products.  These partial products are:
    842 
    843 		0 xll * yll
    844 
    845 		1 xll * ylh
    846 		2 xlh * yll
    847 
    848 		3 xll * yhl
    849 		4 xlh * ylh
    850 		5 xhl * yll
    851 
    852 		6 xll * yhh
    853 		7 xlh * yhl
    854 		8 xhl * ylh
    855 		9 xhh * yll
    856 
    857 		10 xlh * yhh
    858 		11 xhl * yhl
    859 		12 xhh * ylh
    860 
    861 		13 xhl * yhh
    862 		14 xhh * yhl
    863 
    864 		15 xhh * yhh
    865 
    866 	   where the input chunks are (hh, hl, lh, ll).  If using the Mul16
    867 	   or Mul32 multiplier options, these input chunks must be stored in
    868 	   separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
    869 	   that the inputs come from either half of the registers, so there
    870 	   is no need to shift them out ahead of time.  If there is no
    871 	   multiply hardware, the 16-bit chunks can be extracted when setting
    872 	   up the arguments to the separate multiply function.  */
    873 
    874 	/* Save a7 since it is needed to hold a temporary value.  */
    875 	s32i	a7, sp, 4
    876 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
    877 	/* Calling a separate multiply function will clobber a0 and requires
    878 	   use of a8 as a temporary, so save those values now.  (The function
    879 	   uses a custom ABI so nothing else needs to be saved.)  */
    880 	s32i	a0, sp, 0
    881 	s32i	a8, sp, 8
    882 #endif
    883 
    884 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
    885 
    886 #define xlh a12
    887 #define ylh a13
    888 #define xhh a14
    889 #define yhh a15
    890 
    891 	/* Get the high halves of the inputs into registers.  */
    892 	srli	xlh, xl, 16
    893 	srli	ylh, yl, 16
    894 	srli	xhh, xh, 16
    895 	srli	yhh, yh, 16
    896 
    897 #define xll xl
    898 #define yll yl
    899 #define xhl xh
    900 #define yhl yh
    901 
    902 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
    903 	/* Clear the high halves of the inputs.  This does not matter
    904 	   for MUL16 because the high bits are ignored.  */
    905 	extui	xl, xl, 0, 16
    906 	extui	xh, xh, 0, 16
    907 	extui	yl, yl, 0, 16
    908 	extui	yh, yh, 0, 16
    909 #endif
    910 #endif /* MUL16 || MUL32 */
    911 
    912 
    913 #if XCHAL_HAVE_MUL16
    914 
    915 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    916 	mul16u	dst, xreg ## xhalf, yreg ## yhalf
    917 
    918 #elif XCHAL_HAVE_MUL32
    919 
    920 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    921 	mull	dst, xreg ## xhalf, yreg ## yhalf
    922 
    923 #elif XCHAL_HAVE_MAC16
    924 
    925 /* The preprocessor insists on inserting a space when concatenating after
    926    a period in the definition of do_mul below.  These macros are a workaround
    927    using underscores instead of periods when doing the concatenation.  */
    928 #define umul_aa_ll umul.aa.ll
    929 #define umul_aa_lh umul.aa.lh
    930 #define umul_aa_hl umul.aa.hl
    931 #define umul_aa_hh umul.aa.hh
    932 
    933 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    934 	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
    935 	rsr	dst, ACCLO
    936 
    937 #else /* no multiply hardware */
    938 
    939 #define set_arg_l(dst, src) \
    940 	extui	dst, src, 0, 16
    941 #define set_arg_h(dst, src) \
    942 	srli	dst, src, 16
    943 
    944 #if __XTENSA_CALL0_ABI__
    945 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    946 	set_arg_ ## xhalf (a13, xreg); \
    947 	set_arg_ ## yhalf (a14, yreg); \
    948 	call0	.Lmul_mulsi3; \
    949 	mov	dst, a12
    950 #else
    951 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
    952 	set_arg_ ## xhalf (a14, xreg); \
    953 	set_arg_ ## yhalf (a15, yreg); \
    954 	call12	.Lmul_mulsi3; \
    955 	mov	dst, a14
    956 #endif /* __XTENSA_CALL0_ABI__ */
    957 
    958 #endif /* no multiply hardware */
    959 
    960 	/* Add pp1 and pp2 into a10 with carry-out in a9.  */
    961 	do_mul(a10, xl, l, yl, h)	/* pp 1 */
    962 	do_mul(a11, xl, h, yl, l)	/* pp 2 */
    963 	movi	a9, 0
    964 	add	a10, a10, a11
    965 	bgeu	a10, a11, 1f
    966 	addi	a9, a9, 1
    967 1:
    968 	/* Initialize a6 with a9/a10 shifted into position.  Note that
    969 	   this value can be safely incremented without any carry-outs.  */
    970 	ssai	16
    971 	src	a6, a9, a10
    972 
    973 	/* Compute the low word into a10.  */
    974 	do_mul(a11, xl, l, yl, l)	/* pp 0 */
    975 	sll	a10, a10
    976 	add	a10, a10, a11
    977 	bgeu	a10, a11, 1f
    978 	addi	a6, a6, 1
    979 1:
    980 	/* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
    981 	   This is good enough to determine the low half of a6, so that any
    982 	   nonzero bits from the low word of the result can be collapsed
    983 	   into a6, freeing up a register.  */
    984 	movi	a9, 0
    985 	do_mul(a11, xl, l, yh, l)	/* pp 3 */
    986 	add	a6, a6, a11
    987 	bgeu	a6, a11, 1f
    988 	addi	a9, a9, 1
    989 1:
    990 	do_mul(a11, xl, h, yl, h)	/* pp 4 */
    991 	add	a6, a6, a11
    992 	bgeu	a6, a11, 1f
    993 	addi	a9, a9, 1
    994 1:
    995 	do_mul(a11, xh, l, yl, l)	/* pp 5 */
    996 	add	a6, a6, a11
    997 	bgeu	a6, a11, 1f
    998 	addi	a9, a9, 1
    999 1:
   1000 	/* Collapse any nonzero bits from the low word into a6.  */
   1001 	beqz	a10, 1f
   1002 	movi	a11, 1
   1003 	or	a6, a6, a11
   1004 1:
   1005 	/* Add pp6-9 into a11 with carry-outs in a10.  */
   1006 	do_mul(a7, xl, l, yh, h)	/* pp 6 */
   1007 	do_mul(a11, xh, h, yl, l)	/* pp 9 */
   1008 	movi	a10, 0
   1009 	add	a11, a11, a7
   1010 	bgeu	a11, a7, 1f
   1011 	addi	a10, a10, 1
   1012 1:
   1013 	do_mul(a7, xl, h, yh, l)	/* pp 7 */
   1014 	add	a11, a11, a7
   1015 	bgeu	a11, a7, 1f
   1016 	addi	a10, a10, 1
   1017 1:
   1018 	do_mul(a7, xh, l, yl, h)	/* pp 8 */
   1019 	add	a11, a11, a7
   1020 	bgeu	a11, a7, 1f
   1021 	addi	a10, a10, 1
   1022 1:
   1023 	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
   1024 	src	a10, a10, a11
   1025 	add	a10, a10, a9
   1026 	sll	a11, a11
   1027 	add	a6, a6, a11
   1028 	bgeu	a6, a11, 1f
   1029 	addi	a10, a10, 1
   1030 1:
   1031 	/* Add pp10-12 into xl with carry-outs in a9.  */
   1032 	movi	a9, 0
   1033 	do_mul(xl, xl, h, yh, h)	/* pp 10 */
   1034 	add	xl, xl, a10
   1035 	bgeu	xl, a10, 1f
   1036 	addi	a9, a9, 1
   1037 1:
   1038 	do_mul(a10, xh, l, yh, l)	/* pp 11 */
   1039 	add	xl, xl, a10
   1040 	bgeu	xl, a10, 1f
   1041 	addi	a9, a9, 1
   1042 1:
   1043 	do_mul(a10, xh, h, yl, h)	/* pp 12 */
   1044 	add	xl, xl, a10
   1045 	bgeu	xl, a10, 1f
   1046 	addi	a9, a9, 1
   1047 1:
   1048 	/* Add pp13-14 into a11 with carry-outs in a10.  */
   1049 	do_mul(a11, xh, l, yh, h)	/* pp 13 */
   1050 	do_mul(a7, xh, h, yh, l)	/* pp 14 */
   1051 	movi	a10, 0
   1052 	add	a11, a11, a7
   1053 	bgeu	a11, a7, 1f
   1054 	addi	a10, a10, 1
   1055 1:
   1056 	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
   1057 	src	a10, a10, a11
   1058 	add	a10, a10, a9
   1059 	sll	a11, a11
   1060 	add	xl, xl, a11
   1061 	bgeu	xl, a11, 1f
   1062 	addi	a10, a10, 1
   1063 1:
   1064 	/* Compute xh.  */
   1065 	do_mul(xh, xh, h, yh, h)	/* pp 15 */
   1066 	add	xh, xh, a10
   1067 
   1068 	/* Restore values saved on the stack during the multiplication.  */
   1069 	l32i	a7, sp, 4
   1070 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
   1071 	l32i	a0, sp, 0
   1072 	l32i	a8, sp, 8
   1073 #endif
   1074 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
   1075 
   1076 	/* Shift left by 12 bits, unless there was a carry-out from the
   1077 	   multiply, in which case, shift by 11 bits and increment the
   1078 	   exponent.  Note: It is convenient to use the constant 0x3ff
   1079 	   instead of 0x400 when removing the extra exponent bias (so that
   1080 	   it is easy to construct 0x7fe for the overflow check).  Reverse
   1081 	   the logic here to decrement the exponent sum by one unless there
   1082 	   was a carry-out.  */
   1083 	movi	a4, 11
   1084 	srli	a5, xh, 21 - 12
   1085 	bnez	a5, 1f
   1086 	addi	a4, a4, 1
   1087 	addi	a8, a8, -1
   1088 1:	ssl	a4
   1089 	src	xh, xh, xl
   1090 	src	xl, xl, a6
   1091 	sll	a6, a6
   1092 
   1093 	/* Subtract the extra bias from the exponent sum (plus one to account
   1094 	   for the explicit "1.0" of the mantissa that will be added to the
   1095 	   exponent in the final result).  */
   1096 	movi	a4, 0x3ff
   1097 	sub	a8, a8, a4
   1098 
   1099 	/* Check for over/underflow.  The value in a8 is one less than the
   1100 	   final exponent, so values in the range 0..7fd are OK here.  */
   1101 	slli	a4, a4, 1	/* 0x7fe */
   1102 	bgeu	a8, a4, .Lmul_overflow
   1103 
   1104 .Lmul_round:
   1105 	/* Round.  */
   1106 	bgez	a6, .Lmul_rounded
   1107 	addi	xl, xl, 1
   1108 	beqz	xl, .Lmul_roundcarry
   1109 	slli	a6, a6, 1
   1110 	beqz	a6, .Lmul_exactlyhalf
   1111 
   1112 .Lmul_rounded:
   1113 	/* Add the exponent to the mantissa.  */
   1114 	slli	a8, a8, 20
   1115 	add	xh, xh, a8
   1116 
   1117 .Lmul_addsign:
   1118 	/* Add the sign bit.  */
   1119 	srli	a7, a7, 31
   1120 	slli	a7, a7, 31
   1121 	or	xh, xh, a7
   1122 
   1123 .Lmul_done:
   1124 #if __XTENSA_CALL0_ABI__
   1125 	l32i	a12, sp, 16
   1126 	l32i	a13, sp, 20
   1127 	l32i	a14, sp, 24
   1128 	l32i	a15, sp, 28
   1129 	addi	sp, sp, 32
   1130 #endif
   1131 	leaf_return
   1132 
   1133 .Lmul_exactlyhalf:
   1134 	/* Round down to the nearest even value.  */
   1135 	srli	xl, xl, 1
   1136 	slli	xl, xl, 1
   1137 	j	.Lmul_rounded
   1138 
   1139 .Lmul_roundcarry:
   1140 	/* xl is always zero when the rounding increment overflows, so
   1141 	   there's no need to round it to an even value.  */
   1142 	addi	xh, xh, 1
   1143 	/* Overflow is OK -- it will be added to the exponent.  */
   1144 	j	.Lmul_rounded
   1145 
   1146 .Lmul_overflow:
   1147 	bltz	a8, .Lmul_underflow
   1148 	/* Return +/- Infinity.  */
   1149 	addi	a8, a4, 1	/* 0x7ff */
   1150 	slli	xh, a8, 20
   1151 	movi	xl, 0
   1152 	j	.Lmul_addsign
   1153 
   1154 .Lmul_underflow:
   1155 	/* Create a subnormal value, where the exponent field contains zero,
   1156 	   but the effective exponent is 1.  The value of a8 is one less than
   1157 	   the actual exponent, so just negate it to get the shift amount.  */
   1158 	neg	a8, a8
   1159 	mov	a9, a6
   1160 	ssr	a8
   1161 	bgeui	a8, 32, .Lmul_bigshift
   1162 
   1163 	/* Shift xh/xl right.  Any bits that are shifted out of xl are saved
   1164 	   in a6 (combined with the shifted-out bits currently in a6) for
   1165 	   rounding the result.  */
   1166 	sll	a6, xl
   1167 	src	xl, xh, xl
   1168 	srl	xh, xh
   1169 	j	1f
   1170 
   1171 .Lmul_bigshift:
   1172 	bgeui	a8, 64, .Lmul_flush_to_zero
   1173 	sll	a10, xl		/* lost bits shifted out of xl */
   1174 	src	a6, xh, xl
   1175 	srl	xl, xh
   1176 	movi	xh, 0
   1177 	or	a9, a9, a10
   1178 
   1179 	/* Set the exponent to zero.  */
   1180 1:	movi	a8, 0
   1181 
   1182 	/* Pack any nonzero bits shifted out into a6.  */
   1183 	beqz	a9, .Lmul_round
   1184 	movi	a9, 1
   1185 	or	a6, a6, a9
   1186 	j	.Lmul_round
   1187 
   1188 .Lmul_flush_to_zero:
   1189 	/* Return zero with the appropriate sign bit.  */
   1190 	srli	xh, a7, 31
   1191 	slli	xh, xh, 31
   1192 	movi	xl, 0
   1193 	j	.Lmul_done
   1194 
   1195 #if XCHAL_NO_MUL
   1196 
   1197 	/* For Xtensa processors with no multiply hardware, this simplified
   1198 	   version of _mulsi3 is used for multiplying 16-bit chunks of
   1199 	   the floating-point mantissas.  When using CALL0, this function
   1200 	   uses a custom ABI: the inputs are passed in a13 and a14, the
   1201 	   result is returned in a12, and a8 and a15 are clobbered.  */
   1202 	.align	4
   1203 .Lmul_mulsi3:
   1204 	leaf_entry sp, 16
   1205 	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
   1206 	movi	\dst, 0
   1207 1:	add	\tmp1, \src2, \dst
   1208 	extui	\tmp2, \src1, 0, 1
   1209 	movnez	\dst, \tmp1, \tmp2
   1210 
   1211 	do_addx2 \tmp1, \src2, \dst, \tmp1
   1212 	extui	\tmp2, \src1, 1, 1
   1213 	movnez	\dst, \tmp1, \tmp2
   1214 
   1215 	do_addx4 \tmp1, \src2, \dst, \tmp1
   1216 	extui	\tmp2, \src1, 2, 1
   1217 	movnez	\dst, \tmp1, \tmp2
   1218 
   1219 	do_addx8 \tmp1, \src2, \dst, \tmp1
   1220 	extui	\tmp2, \src1, 3, 1
   1221 	movnez	\dst, \tmp1, \tmp2
   1222 
   1223 	srli	\src1, \src1, 4
   1224 	slli	\src2, \src2, 4
   1225 	bnez	\src1, 1b
   1226 	.endm
   1227 #if __XTENSA_CALL0_ABI__
   1228 	mul_mulsi3_body a12, a13, a14, a15, a8
   1229 #else
   1230 	/* The result will be written into a2, so save that argument in a4.  */
   1231 	mov	a4, a2
   1232 	mul_mulsi3_body a2, a4, a3, a5, a6
   1233 #endif
   1234 	leaf_return
   1235 #endif /* XCHAL_NO_MUL */
   1236 #endif /* L_muldf3 */
   1237 
   1238 #ifdef L_divdf3
   1239 
   1240 	/* Division */
   1241 
   1242 #if XCHAL_HAVE_DFP_DIV
   1243 
   1244         .text
   1245         .align 4
   1246         .global __divdf3
   1247         .type	__divdf3, @function
   1248 __divdf3:
   1249 	leaf_entry	sp, 16
   1250 
   1251 	wfrd		f1, xh, xl
   1252 	wfrd		f2, yh, yl
   1253 
   1254 	div0.d		f3, f2
   1255 	nexp01.d	f4, f2
   1256 	const.d		f0, 1
   1257 	maddn.d		f0, f4, f3
   1258 	const.d		f5, 0
   1259 	mov.d		f7, f2
   1260 	mkdadj.d	f7, f1
   1261 	maddn.d		f3, f0, f3
   1262 	maddn.d		f5, f0, f0
   1263 	nexp01.d	f1, f1
   1264 	div0.d		f2, f2
   1265 	maddn.d		f3, f5, f3
   1266 	const.d		f5, 1
   1267 	const.d		f0, 0
   1268 	neg.d		f6, f1
   1269 	maddn.d		f5, f4, f3
   1270 	maddn.d		f0, f6, f2
   1271 	maddn.d		f3, f5, f3
   1272 	maddn.d		f6, f4, f0
   1273 	const.d		f2, 1
   1274 	maddn.d		f2, f4, f3
   1275 	maddn.d		f0, f6, f3
   1276 	neg.d		f1, f1
   1277 	maddn.d		f3, f2, f3
   1278 	maddn.d		f1, f4, f0
   1279 	addexpm.d	f0, f7
   1280 	addexp.d	f3, f7
   1281 	divn.d		f0, f1, f3
   1282 
   1283 	rfr		xl, f0
   1284 	rfrd		xh, f0
   1285 
   1286 	leaf_return
   1287 
   1288 #else
   1289 
   1290 	.literal_position
   1291 
   1292 __divdf3_aux:
   1293 
   1294 	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
   1295 	   (This code is placed before the start of the function just to
   1296 	   keep it in range of the limited branch displacements.)  */
   1297 
   1298 .Ldiv_yexpzero:
   1299 	/* Clear the sign bit of y.  */
   1300 	slli	yh, yh, 1
   1301 	srli	yh, yh, 1
   1302 
   1303 	/* Check for division by zero.  */
   1304 	or	a10, yh, yl
   1305 	beqz	a10, .Ldiv_yzero
   1306 
   1307 	/* Normalize y.  Adjust the exponent in a9.  */
   1308 	beqz	yh, .Ldiv_yh_zero
   1309 	do_nsau	a10, yh, a11, a9
   1310 	addi	a10, a10, -11
   1311 	ssl	a10
   1312 	src	yh, yh, yl
   1313 	sll	yl, yl
   1314 	movi	a9, 1
   1315 	sub	a9, a9, a10
   1316 	j	.Ldiv_ynormalized
   1317 .Ldiv_yh_zero:
   1318 	do_nsau	a10, yl, a11, a9
   1319 	addi	a10, a10, -11
   1320 	movi	a9, -31
   1321 	sub	a9, a9, a10
   1322 	ssl	a10
   1323 	bltz	a10, .Ldiv_yl_srl
   1324 	sll	yh, yl
   1325 	movi	yl, 0
   1326 	j	.Ldiv_ynormalized
   1327 .Ldiv_yl_srl:
   1328 	srl	yh, yl
   1329 	sll	yl, yl
   1330 	j	.Ldiv_ynormalized
   1331 
   1332 .Ldiv_yzero:
   1333 	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
   1334 	slli	xh, xh, 1
   1335 	srli	xh, xh, 1
   1336 	or	xl, xl, xh
   1337 	srli	xh, a7, 31
   1338 	slli	xh, xh, 31
   1339 	or	xh, xh, a6
   1340 	bnez	xl, 1f
   1341 	movi	a4, 0x80000	/* make it a quiet NaN */
   1342 	or	xh, xh, a4
   1343 1:	movi	xl, 0
   1344 	leaf_return
   1345 
   1346 .Ldiv_xexpzero:
   1347 	/* Clear the sign bit of x.  */
   1348 	slli	xh, xh, 1
   1349 	srli	xh, xh, 1
   1350 
   1351 	/* If x is zero, return zero.  */
   1352 	or	a10, xh, xl
   1353 	beqz	a10, .Ldiv_return_zero
   1354 
   1355 	/* Normalize x.  Adjust the exponent in a8.  */
   1356 	beqz	xh, .Ldiv_xh_zero
   1357 	do_nsau	a10, xh, a11, a8
   1358 	addi	a10, a10, -11
   1359 	ssl	a10
   1360 	src	xh, xh, xl
   1361 	sll	xl, xl
   1362 	movi	a8, 1
   1363 	sub	a8, a8, a10
   1364 	j	.Ldiv_xnormalized
   1365 .Ldiv_xh_zero:
   1366 	do_nsau	a10, xl, a11, a8
   1367 	addi	a10, a10, -11
   1368 	movi	a8, -31
   1369 	sub	a8, a8, a10
   1370 	ssl	a10
   1371 	bltz	a10, .Ldiv_xl_srl
   1372 	sll	xh, xl
   1373 	movi	xl, 0
   1374 	j	.Ldiv_xnormalized
   1375 .Ldiv_xl_srl:
   1376 	srl	xh, xl
   1377 	sll	xl, xl
   1378 	j	.Ldiv_xnormalized
   1379 
   1380 .Ldiv_return_zero:
   1381 	/* Return zero with the appropriate sign bit.  */
   1382 	srli	xh, a7, 31
   1383 	slli	xh, xh, 31
   1384 	movi	xl, 0
   1385 	leaf_return
   1386 
   1387 .Ldiv_xnan_or_inf:
   1388 	/* Set the sign bit of the result.  */
   1389 	srli	a7, yh, 31
   1390 	slli	a7, a7, 31
   1391 	xor	xh, xh, a7
   1392 	/* If y is NaN or Inf, return NaN.  */
   1393 	ball	yh, a6, .Ldiv_return_nan
   1394 	slli	a8, xh, 12
   1395 	or	a8, a8, xl
   1396 	bnez	a8, .Ldiv_return_nan
   1397 	leaf_return
   1398 
   1399 .Ldiv_ynan_or_inf:
   1400 	/* If y is Infinity, return zero.  */
   1401 	slli	a8, yh, 12
   1402 	or	a8, a8, yl
   1403 	beqz	a8, .Ldiv_return_zero
   1404 	/* y is NaN; return it.  */
   1405 	mov	xh, yh
   1406 	mov	xl, yl
   1407 
   1408 .Ldiv_return_nan:
   1409 	movi	a4, 0x80000	/* make it a quiet NaN */
   1410 	or	xh, xh, a4
   1411 	leaf_return
   1412 
   1413 .Ldiv_highequal1:
   1414 	bltu	xl, yl, 2f
   1415 	j	3f
   1416 
   1417 	.align	4
   1418 	.global	__divdf3
   1419 	.type	__divdf3, @function
   1420 __divdf3:
   1421 	leaf_entry sp, 16
   1422 	movi	a6, 0x7ff00000
   1423 
   1424 	/* Get the sign of the result.  */
   1425 	xor	a7, xh, yh
   1426 
   1427 	/* Check for NaN and infinity.  */
   1428 	ball	xh, a6, .Ldiv_xnan_or_inf
   1429 	ball	yh, a6, .Ldiv_ynan_or_inf
   1430 
   1431 	/* Extract the exponents.  */
   1432 	extui	a8, xh, 20, 11
   1433 	extui	a9, yh, 20, 11
   1434 
   1435 	beqz	a9, .Ldiv_yexpzero
   1436 .Ldiv_ynormalized:
   1437 	beqz	a8, .Ldiv_xexpzero
   1438 .Ldiv_xnormalized:
   1439 
   1440 	/* Subtract the exponents.  */
   1441 	sub	a8, a8, a9
   1442 
   1443 	/* Replace sign/exponent fields with explicit "1.0".  */
   1444 	movi	a10, 0x1fffff
   1445 	or	xh, xh, a6
   1446 	and	xh, xh, a10
   1447 	or	yh, yh, a6
   1448 	and	yh, yh, a10
   1449 
   1450 	/* Set SAR for left shift by one.  */
   1451 	ssai	(32 - 1)
   1452 
   1453 	/* The first digit of the mantissa division must be a one.
   1454 	   Shift x (and adjust the exponent) as needed to make this true.  */
   1455 	bltu	yh, xh, 3f
   1456 	beq	yh, xh, .Ldiv_highequal1
   1457 2:	src	xh, xh, xl
   1458 	sll	xl, xl
   1459 	addi	a8, a8, -1
   1460 3:
   1461 	/* Do the first subtraction and shift.  */
   1462 	sub	xh, xh, yh
   1463 	bgeu	xl, yl, 1f
   1464 	addi	xh, xh, -1
   1465 1:	sub	xl, xl, yl
   1466 	src	xh, xh, xl
   1467 	sll	xl, xl
   1468 
   1469 	/* Put the quotient into a10/a11.  */
   1470 	movi	a10, 0
   1471 	movi	a11, 1
   1472 
   1473 	/* Divide one bit at a time for 52 bits.  */
   1474 	movi	a9, 52
   1475 #if XCHAL_HAVE_LOOPS
   1476 	loop	a9, .Ldiv_loopend
   1477 #endif
   1478 .Ldiv_loop:
   1479 	/* Shift the quotient << 1.  */
   1480 	src	a10, a10, a11
   1481 	sll	a11, a11
   1482 
   1483 	/* Is this digit a 0 or 1?  */
   1484 	bltu	xh, yh, 3f
   1485 	beq	xh, yh, .Ldiv_highequal2
   1486 
   1487 	/* Output a 1 and subtract.  */
   1488 2:	addi	a11, a11, 1
   1489 	sub	xh, xh, yh
   1490 	bgeu	xl, yl, 1f
   1491 	addi	xh, xh, -1
   1492 1:	sub	xl, xl, yl
   1493 
   1494 	/* Shift the dividend << 1.  */
   1495 3:	src	xh, xh, xl
   1496 	sll	xl, xl
   1497 
   1498 #if !XCHAL_HAVE_LOOPS
   1499 	addi	a9, a9, -1
   1500 	bnez	a9, .Ldiv_loop
   1501 #endif
   1502 .Ldiv_loopend:
   1503 
   1504 	/* Add the exponent bias (less one to account for the explicit "1.0"
   1505 	   of the mantissa that will be added to the exponent in the final
   1506 	   result).  */
   1507 	movi	a9, 0x3fe
   1508 	add	a8, a8, a9
   1509 
   1510 	/* Check for over/underflow.  The value in a8 is one less than the
   1511 	   final exponent, so values in the range 0..7fd are OK here.  */
   1512 	addmi	a9, a9, 0x400	/* 0x7fe */
   1513 	bgeu	a8, a9, .Ldiv_overflow
   1514 
   1515 .Ldiv_round:
   1516 	/* Round.  The remainder (<< 1) is in xh/xl.  */
   1517 	bltu	xh, yh, .Ldiv_rounded
   1518 	beq	xh, yh, .Ldiv_highequal3
   1519 .Ldiv_roundup:
   1520 	addi	a11, a11, 1
   1521 	beqz	a11, .Ldiv_roundcarry
   1522 
   1523 .Ldiv_rounded:
   1524 	mov	xl, a11
   1525 	/* Add the exponent to the mantissa.  */
   1526 	slli	a8, a8, 20
   1527 	add	xh, a10, a8
   1528 
   1529 .Ldiv_addsign:
   1530 	/* Add the sign bit.  */
   1531 	srli	a7, a7, 31
   1532 	slli	a7, a7, 31
   1533 	or	xh, xh, a7
   1534 	leaf_return
   1535 
   1536 .Ldiv_highequal2:
   1537 	bgeu	xl, yl, 2b
   1538 	j	3b
   1539 
   1540 .Ldiv_highequal3:
   1541 	bltu	xl, yl, .Ldiv_rounded
   1542 	bne	xl, yl, .Ldiv_roundup
   1543 
   1544 	/* Remainder is exactly half the divisor.  Round even.  */
   1545 	addi	a11, a11, 1
   1546 	beqz	a11, .Ldiv_roundcarry
   1547 	srli	a11, a11, 1
   1548 	slli	a11, a11, 1
   1549 	j	.Ldiv_rounded
   1550 
   1551 .Ldiv_overflow:
   1552 	bltz	a8, .Ldiv_underflow
   1553 	/* Return +/- Infinity.  */
   1554 	addi	a8, a9, 1	/* 0x7ff */
   1555 	slli	xh, a8, 20
   1556 	movi	xl, 0
   1557 	j	.Ldiv_addsign
   1558 
   1559 .Ldiv_underflow:
   1560 	/* Create a subnormal value, where the exponent field contains zero,
   1561 	   but the effective exponent is 1.  The value of a8 is one less than
   1562 	   the actual exponent, so just negate it to get the shift amount.  */
   1563 	neg	a8, a8
   1564 	ssr	a8
   1565 	bgeui	a8, 32, .Ldiv_bigshift
   1566 
   1567 	/* Shift a10/a11 right.  Any bits that are shifted out of a11 are
   1568 	   saved in a6 for rounding the result.  */
   1569 	sll	a6, a11
   1570 	src	a11, a10, a11
   1571 	srl	a10, a10
   1572 	j	1f
   1573 
   1574 .Ldiv_bigshift:
   1575 	bgeui	a8, 64, .Ldiv_flush_to_zero
   1576 	sll	a9, a11		/* lost bits shifted out of a11 */
   1577 	src	a6, a10, a11
   1578 	srl	a11, a10
   1579 	movi	a10, 0
   1580 	or	xl, xl, a9
   1581 
   1582 	/* Set the exponent to zero.  */
   1583 1:	movi	a8, 0
   1584 
   1585 	/* Pack any nonzero remainder (in xh/xl) into a6.  */
   1586 	or	xh, xh, xl
   1587 	beqz	xh, 1f
   1588 	movi	a9, 1
   1589 	or	a6, a6, a9
   1590 
   1591 	/* Round a10/a11 based on the bits shifted out into a6.  */
   1592 1:	bgez	a6, .Ldiv_rounded
   1593 	addi	a11, a11, 1
   1594 	beqz	a11, .Ldiv_roundcarry
   1595 	slli	a6, a6, 1
   1596 	bnez	a6, .Ldiv_rounded
   1597 	srli	a11, a11, 1
   1598 	slli	a11, a11, 1
   1599 	j	.Ldiv_rounded
   1600 
   1601 .Ldiv_roundcarry:
   1602 	/* a11 is always zero when the rounding increment overflows, so
   1603 	   there's no need to round it to an even value.  */
   1604 	addi	a10, a10, 1
   1605 	/* Overflow to the exponent field is OK.  */
   1606 	j	.Ldiv_rounded
   1607 
   1608 .Ldiv_flush_to_zero:
   1609 	/* Return zero with the appropriate sign bit.  */
   1610 	srli	xh, a7, 31
   1611 	slli	xh, xh, 31
   1612 	movi	xl, 0
   1613 	leaf_return
   1614 
   1615 #endif /* XCHAL_HAVE_DFP_DIV */
   1616 
   1617 #endif /* L_divdf3 */
   1618 
   1619 #ifdef L_cmpdf2
   1620 
   1621 	/* Equal and Not Equal */
   1622 
   1623 	.align	4
   1624 	.global	__eqdf2
   1625 	.global	__nedf2
   1626 	.set	__nedf2, __eqdf2
   1627 	.type	__eqdf2, @function
   1628 __eqdf2:
   1629 	leaf_entry sp, 16
   1630 	bne	xl, yl, 2f
   1631 	bne	xh, yh, 4f
   1632 
   1633 	/* The values are equal but NaN != NaN.  Check the exponent.  */
   1634 	movi	a6, 0x7ff00000
   1635 	ball	xh, a6, 3f
   1636 
   1637 	/* Equal.  */
   1638 	movi	a2, 0
   1639 	leaf_return
   1640 
   1641 	/* Not equal.  */
   1642 2:	movi	a2, 1
   1643 	leaf_return
   1644 
   1645 	/* Check if the mantissas are nonzero.  */
   1646 3:	slli	a7, xh, 12
   1647 	or	a7, a7, xl
   1648 	j	5f
   1649 
   1650 	/* Check if x and y are zero with different signs.  */
   1651 4:	or	a7, xh, yh
   1652 	slli	a7, a7, 1
   1653 	or	a7, a7, xl	/* xl == yl here */
   1654 
   1655 	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
   1656 	   or x when exponent(x) = 0x7ff and x == y.  */
   1657 5:	movi	a2, 0
   1658 	movi	a3, 1
   1659 	movnez	a2, a3, a7
   1660 	leaf_return
   1661 
   1662 
   1663 	/* Greater Than */
   1664 
   1665 	.align	4
   1666 	.global	__gtdf2
   1667 	.type	__gtdf2, @function
   1668 __gtdf2:
   1669 	leaf_entry sp, 16
   1670 	movi	a6, 0x7ff00000
   1671 	ball	xh, a6, 2f
   1672 1:	bnall	yh, a6, .Lle_cmp
   1673 
   1674 	/* Check if y is a NaN.  */
   1675 	slli	a7, yh, 12
   1676 	or	a7, a7, yl
   1677 	beqz	a7, .Lle_cmp
   1678 	movi	a2, 0
   1679 	leaf_return
   1680 
   1681 	/* Check if x is a NaN.  */
   1682 2:	slli	a7, xh, 12
   1683 	or	a7, a7, xl
   1684 	beqz	a7, 1b
   1685 	movi	a2, 0
   1686 	leaf_return
   1687 
   1688 
   1689 	/* Less Than or Equal */
   1690 
   1691 	.align	4
   1692 	.global	__ledf2
   1693 	.type	__ledf2, @function
   1694 __ledf2:
   1695 	leaf_entry sp, 16
   1696 	movi	a6, 0x7ff00000
   1697 	ball	xh, a6, 2f
   1698 1:	bnall	yh, a6, .Lle_cmp
   1699 
   1700 	/* Check if y is a NaN.  */
   1701 	slli	a7, yh, 12
   1702 	or	a7, a7, yl
   1703 	beqz	a7, .Lle_cmp
   1704 	movi	a2, 1
   1705 	leaf_return
   1706 
   1707 	/* Check if x is a NaN.  */
   1708 2:	slli	a7, xh, 12
   1709 	or	a7, a7, xl
   1710 	beqz	a7, 1b
   1711 	movi	a2, 1
   1712 	leaf_return
   1713 
   1714 .Lle_cmp:
   1715 	/* Check if x and y have different signs.  */
   1716 	xor	a7, xh, yh
   1717 	bltz	a7, .Lle_diff_signs
   1718 
   1719 	/* Check if x is negative.  */
   1720 	bltz	xh, .Lle_xneg
   1721 
   1722 	/* Check if x <= y.  */
   1723 	bltu	xh, yh, 4f
   1724 	bne	xh, yh, 5f
   1725 	bltu	yl, xl, 5f
   1726 4:	movi	a2, 0
   1727 	leaf_return
   1728 
   1729 .Lle_xneg:
   1730 	/* Check if y <= x.  */
   1731 	bltu	yh, xh, 4b
   1732 	bne	yh, xh, 5f
   1733 	bgeu	xl, yl, 4b
   1734 5:	movi	a2, 1
   1735 	leaf_return
   1736 
   1737 .Lle_diff_signs:
   1738 	bltz	xh, 4b
   1739 
   1740 	/* Check if both x and y are zero.  */
   1741 	or	a7, xh, yh
   1742 	slli	a7, a7, 1
   1743 	or	a7, a7, xl
   1744 	or	a7, a7, yl
   1745 	movi	a2, 1
   1746 	movi	a3, 0
   1747 	moveqz	a2, a3, a7
   1748 	leaf_return
   1749 
   1750 
   1751 	/* Greater Than or Equal */
   1752 
   1753 	.align	4
   1754 	.global	__gedf2
   1755 	.type	__gedf2, @function
   1756 __gedf2:
   1757 	leaf_entry sp, 16
   1758 	movi	a6, 0x7ff00000
   1759 	ball	xh, a6, 2f
   1760 1:	bnall	yh, a6, .Llt_cmp
   1761 
   1762 	/* Check if y is a NaN.  */
   1763 	slli	a7, yh, 12
   1764 	or	a7, a7, yl
   1765 	beqz	a7, .Llt_cmp
   1766 	movi	a2, -1
   1767 	leaf_return
   1768 
   1769 	/* Check if x is a NaN.  */
   1770 2:	slli	a7, xh, 12
   1771 	or	a7, a7, xl
   1772 	beqz	a7, 1b
   1773 	movi	a2, -1
   1774 	leaf_return
   1775 
   1776 
   1777 	/* Less Than */
   1778 
   1779 	.align	4
   1780 	.global	__ltdf2
   1781 	.type	__ltdf2, @function
   1782 __ltdf2:
   1783 	leaf_entry sp, 16
   1784 	movi	a6, 0x7ff00000
   1785 	ball	xh, a6, 2f
   1786 1:	bnall	yh, a6, .Llt_cmp
   1787 
   1788 	/* Check if y is a NaN.  */
   1789 	slli	a7, yh, 12
   1790 	or	a7, a7, yl
   1791 	beqz	a7, .Llt_cmp
   1792 	movi	a2, 0
   1793 	leaf_return
   1794 
   1795 	/* Check if x is a NaN.  */
   1796 2:	slli	a7, xh, 12
   1797 	or	a7, a7, xl
   1798 	beqz	a7, 1b
   1799 	movi	a2, 0
   1800 	leaf_return
   1801 
   1802 .Llt_cmp:
   1803 	/* Check if x and y have different signs.  */
   1804 	xor	a7, xh, yh
   1805 	bltz	a7, .Llt_diff_signs
   1806 
   1807 	/* Check if x is negative.  */
   1808 	bltz	xh, .Llt_xneg
   1809 
   1810 	/* Check if x < y.  */
   1811 	bltu	xh, yh, 4f
   1812 	bne	xh, yh, 5f
   1813 	bgeu	xl, yl, 5f
   1814 4:	movi	a2, -1
   1815 	leaf_return
   1816 
   1817 .Llt_xneg:
   1818 	/* Check if y < x.  */
   1819 	bltu	yh, xh, 4b
   1820 	bne	yh, xh, 5f
   1821 	bltu	yl, xl, 4b
   1822 5:	movi	a2, 0
   1823 	leaf_return
   1824 
   1825 .Llt_diff_signs:
   1826 	bgez	xh, 5b
   1827 
   1828 	/* Check if both x and y are nonzero.  */
   1829 	or	a7, xh, yh
   1830 	slli	a7, a7, 1
   1831 	or	a7, a7, xl
   1832 	or	a7, a7, yl
   1833 	movi	a2, 0
   1834 	movi	a3, -1
   1835 	movnez	a2, a3, a7
   1836 	leaf_return
   1837 
   1838 
   1839 	/* Unordered */
   1840 
   1841 	.align	4
   1842 	.global	__unorddf2
   1843 	.type	__unorddf2, @function
   1844 __unorddf2:
   1845 	leaf_entry sp, 16
   1846 	movi	a6, 0x7ff00000
   1847 	ball	xh, a6, 3f
   1848 1:	ball	yh, a6, 4f
   1849 2:	movi	a2, 0
   1850 	leaf_return
   1851 
   1852 3:	slli	a7, xh, 12
   1853 	or	a7, a7, xl
   1854 	beqz	a7, 1b
   1855 	movi	a2, 1
   1856 	leaf_return
   1857 
   1858 4:	slli	a7, yh, 12
   1859 	or	a7, a7, yl
   1860 	beqz	a7, 2b
   1861 	movi	a2, 1
   1862 	leaf_return
   1863 
   1864 #endif /* L_cmpdf2 */
   1865 
   1866 #ifdef L_fixdfsi
   1867 
   1868 	.align	4
   1869 	.global	__fixdfsi
   1870 	.type	__fixdfsi, @function
   1871 __fixdfsi:
   1872 	leaf_entry sp, 16
   1873 
   1874 	/* Check for NaN and Infinity.  */
   1875 	movi	a6, 0x7ff00000
   1876 	ball	xh, a6, .Lfixdfsi_nan_or_inf
   1877 
   1878 	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
   1879 	extui	a4, xh, 20, 11
   1880 	extui	a5, a6, 19, 10	/* 0x3fe */
   1881 	sub	a4, a4, a5
   1882 	bgei	a4, 32, .Lfixdfsi_maxint
   1883 	blti	a4, 1, .Lfixdfsi_zero
   1884 
   1885 	/* Add explicit "1.0" and shift << 11.  */
   1886 	or	a7, xh, a6
   1887 	ssai	(32 - 11)
   1888 	src	a5, a7, xl
   1889 
   1890 	/* Shift back to the right, based on the exponent.  */
   1891 	ssl	a4		/* shift by 32 - a4 */
   1892 	srl	a5, a5
   1893 
   1894 	/* Negate the result if sign != 0.  */
   1895 	neg	a2, a5
   1896 	movgez	a2, a5, a7
   1897 	leaf_return
   1898 
   1899 .Lfixdfsi_nan_or_inf:
   1900 	/* Handle Infinity and NaN.  */
   1901 	slli	a4, xh, 12
   1902 	or	a4, a4, xl
   1903 	beqz	a4, .Lfixdfsi_maxint
   1904 
   1905 	/* Translate NaN to +maxint.  */
   1906 	movi	xh, 0
   1907 
   1908 .Lfixdfsi_maxint:
   1909 	slli	a4, a6, 11	/* 0x80000000 */
   1910 	addi	a5, a4, -1	/* 0x7fffffff */
   1911 	movgez	a4, a5, xh
   1912 	mov	a2, a4
   1913 	leaf_return
   1914 
   1915 .Lfixdfsi_zero:
   1916 	movi	a2, 0
   1917 	leaf_return
   1918 
   1919 #endif /* L_fixdfsi */
   1920 
   1921 #ifdef L_fixdfdi
   1922 
   1923 	.align	4
   1924 	.global	__fixdfdi
   1925 	.type	__fixdfdi, @function
   1926 __fixdfdi:
   1927 	leaf_entry sp, 16
   1928 
   1929 	/* Check for NaN and Infinity.  */
   1930 	movi	a6, 0x7ff00000
   1931 	ball	xh, a6, .Lfixdfdi_nan_or_inf
   1932 
   1933 	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
   1934 	extui	a4, xh, 20, 11
   1935 	extui	a5, a6, 19, 10	/* 0x3fe */
   1936 	sub	a4, a4, a5
   1937 	bgei	a4, 64, .Lfixdfdi_maxint
   1938 	blti	a4, 1, .Lfixdfdi_zero
   1939 
   1940 	/* Add explicit "1.0" and shift << 11.  */
   1941 	or	a7, xh, a6
   1942 	ssai	(32 - 11)
   1943 	src	xh, a7, xl
   1944 	sll	xl, xl
   1945 
   1946 	/* Shift back to the right, based on the exponent.  */
   1947 	ssl	a4		/* shift by 64 - a4 */
   1948 	bgei	a4, 32, .Lfixdfdi_smallshift
   1949 	srl	xl, xh
   1950 	movi	xh, 0
   1951 
   1952 .Lfixdfdi_shifted:
   1953 	/* Negate the result if sign != 0.  */
   1954 	bgez	a7, 1f
   1955 	neg	xl, xl
   1956 	neg	xh, xh
   1957 	beqz	xl, 1f
   1958 	addi	xh, xh, -1
   1959 1:	leaf_return
   1960 
   1961 .Lfixdfdi_smallshift:
   1962 	src	xl, xh, xl
   1963 	srl	xh, xh
   1964 	j	.Lfixdfdi_shifted
   1965 
   1966 .Lfixdfdi_nan_or_inf:
   1967 	/* Handle Infinity and NaN.  */
   1968 	slli	a4, xh, 12
   1969 	or	a4, a4, xl
   1970 	beqz	a4, .Lfixdfdi_maxint
   1971 
   1972 	/* Translate NaN to +maxint.  */
   1973 	movi	xh, 0
   1974 
   1975 .Lfixdfdi_maxint:
   1976 	slli	a7, a6, 11	/* 0x80000000 */
   1977 	bgez	xh, 1f
   1978 	mov	xh, a7
   1979 	movi	xl, 0
   1980 	leaf_return
   1981 
   1982 1:	addi	xh, a7, -1	/* 0x7fffffff */
   1983 	movi	xl, -1
   1984 	leaf_return
   1985 
   1986 .Lfixdfdi_zero:
   1987 	movi	xh, 0
   1988 	movi	xl, 0
   1989 	leaf_return
   1990 
   1991 #endif /* L_fixdfdi */
   1992 
   1993 #ifdef L_fixunsdfsi
   1994 
   1995 	.align	4
   1996 	.global	__fixunsdfsi
   1997 	.type	__fixunsdfsi, @function
   1998 __fixunsdfsi:
   1999 	leaf_entry sp, 16
   2000 
   2001 	/* Check for NaN and Infinity.  */
   2002 	movi	a6, 0x7ff00000
   2003 	ball	xh, a6, .Lfixunsdfsi_nan_or_inf
   2004 
   2005 	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
   2006 	extui	a4, xh, 20, 11
   2007 	extui	a5, a6, 20, 10	/* 0x3ff */
   2008 	sub	a4, a4, a5
   2009 	bgei	a4, 32, .Lfixunsdfsi_maxint
   2010 	bltz	a4, .Lfixunsdfsi_zero
   2011 
   2012 	/* Add explicit "1.0" and shift << 11.  */
   2013 	or	a7, xh, a6
   2014 	ssai	(32 - 11)
   2015 	src	a5, a7, xl
   2016 
   2017 	/* Shift back to the right, based on the exponent.  */
   2018 	addi	a4, a4, 1
   2019 	beqi	a4, 32, .Lfixunsdfsi_bigexp
   2020 	ssl	a4		/* shift by 32 - a4 */
   2021 	srl	a5, a5
   2022 
   2023 	/* Negate the result if sign != 0.  */
   2024 	neg	a2, a5
   2025 	movgez	a2, a5, a7
   2026 	leaf_return
   2027 
   2028 .Lfixunsdfsi_nan_or_inf:
   2029 	/* Handle Infinity and NaN.  */
   2030 	slli	a4, xh, 12
   2031 	or	a4, a4, xl
   2032 	beqz	a4, .Lfixunsdfsi_maxint
   2033 
   2034 	/* Translate NaN to 0xffffffff.  */
   2035 	movi	a2, -1
   2036 	leaf_return
   2037 
   2038 .Lfixunsdfsi_maxint:
   2039 	slli	a4, a6, 11	/* 0x80000000 */
   2040 	movi	a5, -1		/* 0xffffffff */
   2041 	movgez	a4, a5, xh
   2042 	mov	a2, a4
   2043 	leaf_return
   2044 
   2045 .Lfixunsdfsi_zero:
   2046 	movi	a2, 0
   2047 	leaf_return
   2048 
   2049 .Lfixunsdfsi_bigexp:
   2050 	/* Handle unsigned maximum exponent case.  */
   2051 	bltz	xh, 1f
   2052 	mov	a2, a5		/* no shift needed */
   2053 	leaf_return
   2054 
   2055 	/* Return 0x80000000 if negative.  */
   2056 1:	slli	a2, a6, 11
   2057 	leaf_return
   2058 
   2059 #endif /* L_fixunsdfsi */
   2060 
   2061 #ifdef L_fixunsdfdi
   2062 
   2063 	.align	4
   2064 	.global	__fixunsdfdi
   2065 	.type	__fixunsdfdi, @function
   2066 __fixunsdfdi:
   2067 	leaf_entry sp, 16
   2068 
   2069 	/* Check for NaN and Infinity.  */
   2070 	movi	a6, 0x7ff00000
   2071 	ball	xh, a6, .Lfixunsdfdi_nan_or_inf
   2072 
   2073 	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
   2074 	extui	a4, xh, 20, 11
   2075 	extui	a5, a6, 20, 10	/* 0x3ff */
   2076 	sub	a4, a4, a5
   2077 	bgei	a4, 64, .Lfixunsdfdi_maxint
   2078 	bltz	a4, .Lfixunsdfdi_zero
   2079 
   2080 	/* Add explicit "1.0" and shift << 11.  */
   2081 	or	a7, xh, a6
   2082 	ssai	(32 - 11)
   2083 	src	xh, a7, xl
   2084 	sll	xl, xl
   2085 
   2086 	/* Shift back to the right, based on the exponent.  */
   2087 	addi	a4, a4, 1
   2088 	beqi	a4, 64, .Lfixunsdfdi_bigexp
   2089 	ssl	a4		/* shift by 64 - a4 */
   2090 	bgei	a4, 32, .Lfixunsdfdi_smallshift
   2091 	srl	xl, xh
   2092 	movi	xh, 0
   2093 
   2094 .Lfixunsdfdi_shifted:
   2095 	/* Negate the result if sign != 0.  */
   2096 	bgez	a7, 1f
   2097 	neg	xl, xl
   2098 	neg	xh, xh
   2099 	beqz	xl, 1f
   2100 	addi	xh, xh, -1
   2101 1:	leaf_return
   2102 
   2103 .Lfixunsdfdi_smallshift:
   2104 	src	xl, xh, xl
   2105 	srl	xh, xh
   2106 	j	.Lfixunsdfdi_shifted
   2107 
   2108 .Lfixunsdfdi_nan_or_inf:
   2109 	/* Handle Infinity and NaN.  */
   2110 	slli	a4, xh, 12
   2111 	or	a4, a4, xl
   2112 	beqz	a4, .Lfixunsdfdi_maxint
   2113 
   2114 	/* Translate NaN to 0xffffffff.... */
   2115 1:	movi	xh, -1
   2116 	movi	xl, -1
   2117 	leaf_return
   2118 
   2119 .Lfixunsdfdi_maxint:
   2120 	bgez	xh, 1b
   2121 2:	slli	xh, a6, 11	/* 0x80000000 */
   2122 	movi	xl, 0
   2123 	leaf_return
   2124 
   2125 .Lfixunsdfdi_zero:
   2126 	movi	xh, 0
   2127 	movi	xl, 0
   2128 	leaf_return
   2129 
   2130 .Lfixunsdfdi_bigexp:
   2131 	/* Handle unsigned maximum exponent case.  */
   2132 	bltz	a7, 2b
   2133 	leaf_return		/* no shift needed */
   2134 
   2135 #endif /* L_fixunsdfdi */
   2136 
   2137 #ifdef L_floatsidf
   2138 
   2139 	.align	4
   2140 	.global	__floatunsidf
   2141 	.type	__floatunsidf, @function
   2142 __floatunsidf:
   2143 	leaf_entry sp, 16
   2144 	beqz	a2, .Lfloatsidf_return_zero
   2145 
   2146 	/* Set the sign to zero and jump to the floatsidf code.  */
   2147 	movi	a7, 0
   2148 	j	.Lfloatsidf_normalize
   2149 
   2150 	.align	4
   2151 	.global	__floatsidf
   2152 	.type	__floatsidf, @function
   2153 __floatsidf:
   2154 	leaf_entry sp, 16
   2155 
   2156 	/* Check for zero.  */
   2157 	beqz	a2, .Lfloatsidf_return_zero
   2158 
   2159 	/* Save the sign.  */
   2160 	extui	a7, a2, 31, 1
   2161 
   2162 	/* Get the absolute value.  */
   2163 #if XCHAL_HAVE_ABS
   2164 	abs	a2, a2
   2165 #else
   2166 	neg	a4, a2
   2167 	movltz	a2, a4, a2
   2168 #endif
   2169 
   2170 .Lfloatsidf_normalize:
   2171 	/* Normalize with the first 1 bit in the msb.  */
   2172 	do_nsau	a4, a2, a5, a6
   2173 	ssl	a4
   2174 	sll	a5, a2
   2175 
   2176 	/* Shift the mantissa into position.  */
   2177 	srli	xh, a5, 11
   2178 	slli	xl, a5, (32 - 11)
   2179 
   2180 	/* Set the exponent.  */
   2181 	movi	a5, 0x41d	/* 0x3fe + 31 */
   2182 	sub	a5, a5, a4
   2183 	slli	a5, a5, 20
   2184 	add	xh, xh, a5
   2185 
   2186 	/* Add the sign and return. */
   2187 	slli	a7, a7, 31
   2188 	or	xh, xh, a7
   2189 	leaf_return
   2190 
   2191 .Lfloatsidf_return_zero:
   2192 	movi	a3, 0
   2193 	leaf_return
   2194 
   2195 #endif /* L_floatsidf */
   2196 
   2197 #ifdef L_floatdidf
   2198 
   2199 	.align	4
   2200 	.global	__floatundidf
   2201 	.type	__floatundidf, @function
   2202 __floatundidf:
   2203 	leaf_entry sp, 16
   2204 
   2205 	/* Check for zero.  */
   2206 	or	a4, xh, xl
   2207 	beqz	a4, 2f
   2208 
   2209 	/* Set the sign to zero and jump to the floatdidf code.  */
   2210 	movi	a7, 0
   2211 	j	.Lfloatdidf_normalize
   2212 
   2213 	.align	4
   2214 	.global	__floatdidf
   2215 	.type	__floatdidf, @function
   2216 __floatdidf:
   2217 	leaf_entry sp, 16
   2218 
   2219 	/* Check for zero.  */
   2220 	or	a4, xh, xl
   2221 	beqz	a4, 2f
   2222 
   2223 	/* Save the sign.  */
   2224 	extui	a7, xh, 31, 1
   2225 
   2226 	/* Get the absolute value.  */
   2227 	bgez	xh, .Lfloatdidf_normalize
   2228 	neg	xl, xl
   2229 	neg	xh, xh
   2230 	beqz	xl, .Lfloatdidf_normalize
   2231 	addi	xh, xh, -1
   2232 
   2233 .Lfloatdidf_normalize:
   2234 	/* Normalize with the first 1 bit in the msb of xh.  */
   2235 	beqz	xh, .Lfloatdidf_bigshift
   2236 	do_nsau	a4, xh, a5, a6
   2237 	ssl	a4
   2238 	src	xh, xh, xl
   2239 	sll	xl, xl
   2240 
   2241 .Lfloatdidf_shifted:
   2242 	/* Shift the mantissa into position, with rounding bits in a6.  */
   2243 	ssai	11
   2244 	sll	a6, xl
   2245 	src	xl, xh, xl
   2246 	srl	xh, xh
   2247 
   2248 	/* Set the exponent.  */
   2249 	movi	a5, 0x43d	/* 0x3fe + 63 */
   2250 	sub	a5, a5, a4
   2251 	slli	a5, a5, 20
   2252 	add	xh, xh, a5
   2253 
   2254 	/* Add the sign.  */
   2255 	slli	a7, a7, 31
   2256 	or	xh, xh, a7
   2257 
   2258 	/* Round up if the leftover fraction is >= 1/2.  */
   2259 	bgez	a6, 2f
   2260 	addi	xl, xl, 1
   2261 	beqz	xl, .Lfloatdidf_roundcarry
   2262 
   2263 	/* Check if the leftover fraction is exactly 1/2.  */
   2264 	slli	a6, a6, 1
   2265 	beqz	a6, .Lfloatdidf_exactlyhalf
   2266 2:	leaf_return
   2267 
   2268 .Lfloatdidf_bigshift:
   2269 	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
   2270 	do_nsau	a4, xl, a5, a6
   2271 	ssl	a4
   2272 	sll	xh, xl
   2273 	movi	xl, 0
   2274 	addi	a4, a4, 32
   2275 	j	.Lfloatdidf_shifted
   2276 
   2277 .Lfloatdidf_exactlyhalf:
   2278 	/* Round down to the nearest even value.  */
   2279 	srli	xl, xl, 1
   2280 	slli	xl, xl, 1
   2281 	leaf_return
   2282 
   2283 .Lfloatdidf_roundcarry:
   2284 	/* xl is always zero when the rounding increment overflows, so
   2285 	   there's no need to round it to an even value.  */
   2286 	addi	xh, xh, 1
   2287 	/* Overflow to the exponent is OK.  */
   2288 	leaf_return
   2289 
   2290 #endif /* L_floatdidf */
   2291 
   2292 #ifdef L_truncdfsf2
   2293 
   2294 	.align	4
   2295 	.global	__truncdfsf2
   2296 	.type	__truncdfsf2, @function
   2297 __truncdfsf2:
   2298 	leaf_entry sp, 16
   2299 
   2300 	/* Adjust the exponent bias.  */
   2301 	movi	a4, (0x3ff - 0x7f) << 20
   2302 	sub	a5, xh, a4
   2303 
   2304 	/* Check for underflow.  */
   2305 	xor	a6, xh, a5
   2306 	bltz	a6, .Ltrunc_underflow
   2307 	extui	a6, a5, 20, 11
   2308 	beqz	a6, .Ltrunc_underflow
   2309 
   2310 	/* Check for overflow.  */
   2311 	movi	a4, 255
   2312 	bge	a6, a4, .Ltrunc_overflow
   2313 
   2314 	/* Shift a5/xl << 3 into a5/a4.  */
   2315 	ssai	(32 - 3)
   2316 	src	a5, a5, xl
   2317 	sll	a4, xl
   2318 
   2319 .Ltrunc_addsign:
   2320 	/* Add the sign bit.  */
   2321 	extui	a6, xh, 31, 1
   2322 	slli	a6, a6, 31
   2323 	or	a2, a6, a5
   2324 
   2325 	/* Round up if the leftover fraction is >= 1/2.  */
   2326 	bgez	a4, 1f
   2327 	addi	a2, a2, 1
   2328 	/* Overflow to the exponent is OK.  The answer will be correct.  */
   2329 
   2330 	/* Check if the leftover fraction is exactly 1/2.  */
   2331 	slli	a4, a4, 1
   2332 	beqz	a4, .Ltrunc_exactlyhalf
   2333 1:	leaf_return
   2334 
   2335 .Ltrunc_exactlyhalf:
   2336 	/* Round down to the nearest even value.  */
   2337 	srli	a2, a2, 1
   2338 	slli	a2, a2, 1
   2339 	leaf_return
   2340 
   2341 .Ltrunc_overflow:
   2342 	/* Check if exponent == 0x7ff.  */
   2343 	movi	a4, 0x7ff00000
   2344 	bnall	xh, a4, 1f
   2345 
   2346 	/* Check if mantissa is nonzero.  */
   2347 	slli	a5, xh, 12
   2348 	or	a5, a5, xl
   2349 	beqz	a5, 1f
   2350 
   2351 	/* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
   2352 	srli	a4, a4, 1
   2353 
   2354 1:	slli	a4, a4, 4	/* 0xff000000 or 0xff800000 */
   2355 	/* Add the sign bit.  */
   2356 	extui	a6, xh, 31, 1
   2357 	ssai	1
   2358 	src	a2, a6, a4
   2359 	leaf_return
   2360 
   2361 .Ltrunc_underflow:
   2362 	/* Find shift count for a subnormal.  Flush to zero if >= 32.  */
   2363 	extui	a6, xh, 20, 11
   2364 	movi	a5, 0x3ff - 0x7f
   2365 	sub	a6, a5, a6
   2366 	addi	a6, a6, 1
   2367 	bgeui	a6, 32, 1f
   2368 
   2369 	/* Replace the exponent with an explicit "1.0".  */
   2370 	slli	a5, a5, 13	/* 0x700000 */
   2371 	or	a5, a5, xh
   2372 	slli	a5, a5, 11
   2373 	srli	a5, a5, 11
   2374 
   2375 	/* Shift the mantissa left by 3 bits (into a5/a4).  */
   2376 	ssai	(32 - 3)
   2377 	src	a5, a5, xl
   2378 	sll	a4, xl
   2379 
   2380 	/* Shift right by a6.  */
   2381 	ssr	a6
   2382 	sll	a7, a4
   2383 	src	a4, a5, a4
   2384 	srl	a5, a5
   2385 	beqz	a7, .Ltrunc_addsign
   2386 	or	a4, a4, a6	/* any positive, nonzero value will work */
   2387 	j	.Ltrunc_addsign
   2388 
   2389 	/* Return +/- zero.  */
   2390 1:	extui	a2, xh, 31, 1
   2391 	slli	a2, a2, 31
   2392 	leaf_return
   2393 
   2394 #endif /* L_truncdfsf2 */
   2395 
   2396 #ifdef L_extendsfdf2
   2397 
   2398 	.align	4
   2399 	.global	__extendsfdf2
   2400 	.type	__extendsfdf2, @function
   2401 __extendsfdf2:
   2402 	leaf_entry sp, 16
   2403 
   2404 	/* Save the sign bit and then shift it off.  */
   2405 	extui	a5, a2, 31, 1
   2406 	slli	a5, a5, 31
   2407 	slli	a4, a2, 1
   2408 
   2409 	/* Extract and check the exponent.  */
   2410 	extui	a6, a2, 23, 8
   2411 	beqz	a6, .Lextend_expzero
   2412 	addi	a6, a6, 1
   2413 	beqi	a6, 256, .Lextend_nan_or_inf
   2414 
   2415 	/* Shift >> 3 into a4/xl.  */
   2416 	srli	a4, a4, 4
   2417 	slli	xl, a2, (32 - 3)
   2418 
   2419 	/* Adjust the exponent bias.  */
   2420 	movi	a6, (0x3ff - 0x7f) << 20
   2421 	add	a4, a4, a6
   2422 
   2423 	/* Add the sign bit.  */
   2424 	or	xh, a4, a5
   2425 	leaf_return
   2426 
   2427 .Lextend_nan_or_inf:
   2428 	movi	a4, 0x7ff00000
   2429 
   2430 	/* Check for NaN.  */
   2431 	slli	a7, a2, 9
   2432 	beqz	a7, 1f
   2433 
   2434 	slli	a6, a6, 11	/* 0x80000 */
   2435 	or	a4, a4, a6
   2436 
   2437 	/* Add the sign and return.  */
   2438 1:	or	xh, a4, a5
   2439 	movi	xl, 0
   2440 	leaf_return
   2441 
   2442 .Lextend_expzero:
   2443 	beqz	a4, 1b
   2444 
   2445 	/* Normalize it to have 8 zero bits before the first 1 bit.  */
   2446 	do_nsau	a7, a4, a2, a3
   2447 	addi	a7, a7, -8
   2448 	ssl	a7
   2449 	sll	a4, a4
   2450 
   2451 	/* Shift >> 3 into a4/xl.  */
   2452 	slli	xl, a4, (32 - 3)
   2453 	srli	a4, a4, 3
   2454 
   2455 	/* Set the exponent.  */
   2456 	movi	a6, 0x3fe - 0x7f
   2457 	sub	a6, a6, a7
   2458 	slli	a6, a6, 20
   2459 	add	a4, a4, a6
   2460 
   2461 	/* Add the sign and return.  */
   2462 	or	xh, a4, a5
   2463 	leaf_return
   2464 
   2465 #endif /* L_extendsfdf2 */
   2466 
   2467 
   2468 #if XCHAL_HAVE_DFP_SQRT
   2469 #ifdef L_sqrt
   2470 
   2471         .text
   2472         .align 4
   2473         .global __ieee754_sqrt
   2474         .type	__ieee754_sqrt, @function
   2475 __ieee754_sqrt:
   2476 	leaf_entry	sp, 16
   2477 
   2478 	wfrd		f1, xh, xl
   2479 
   2480 	sqrt0.d		f2, f1
   2481 	const.d		f4, 0
   2482 	maddn.d		f4, f2, f2
   2483 	nexp01.d	f3, f1
   2484 	const.d		f0, 3
   2485 	addexp.d	f3, f0
   2486 	maddn.d		f0, f4, f3
   2487 	nexp01.d	f4, f1
   2488 	maddn.d		f2, f0, f2
   2489 	const.d		f5, 0
   2490 	maddn.d		f5, f2, f3
   2491 	const.d		f0, 3
   2492 	maddn.d		f0, f5, f2
   2493 	neg.d		f6, f4
   2494 	maddn.d		f2, f0, f2
   2495 	const.d		f0, 0
   2496 	const.d		f5, 0
   2497 	const.d		f7, 0
   2498 	maddn.d		f0, f6, f2
   2499 	maddn.d		f5, f2, f3
   2500 	const.d		f3, 3
   2501 	maddn.d		f7, f3, f2
   2502 	maddn.d		f4, f0, f0
   2503 	maddn.d		f3, f5, f2
   2504 	neg.d		f2, f7
   2505 	maddn.d		f0, f4, f2
   2506 	maddn.d		f7, f3, f7
   2507 	mksadj.d	f2, f1
   2508 	nexp01.d	f1, f1
   2509 	maddn.d		f1, f0, f0
   2510 	neg.d		f3, f7
   2511 	addexpm.d	f0, f2
   2512 	addexp.d	f3, f2
   2513 	divn.d		f0, f1, f3
   2514 
   2515 	rfr		xl, f0
   2516 	rfrd		xh, f0
   2517 
   2518 	leaf_return
   2519 
   2520 #endif /* L_sqrt */
   2521 #endif /* XCHAL_HAVE_DFP_SQRT */
   2522 
   2523 #if XCHAL_HAVE_DFP_RECIP
   2524 #ifdef L_recipdf2
   2525 	/* Reciprocal */
   2526 
   2527 	.align	4
   2528 	.global	__recipdf2
   2529 	.type	__recipdf2, @function
   2530 __recipdf2:
   2531 	leaf_entry	sp, 16
   2532 
   2533 	wfrd		f1, xh, xl
   2534 
   2535 	recip0.d	f0, f1
   2536 	const.d		f2, 2
   2537 	msub.d		f2, f1, f0
   2538 	mul.d		f3, f1, f0
   2539 	const.d		f4, 2
   2540 	mul.d		f5, f0, f2
   2541 	msub.d		f4, f3, f2
   2542 	const.d		f2, 1
   2543 	mul.d		f0, f5, f4
   2544 	msub.d		f2, f1, f0
   2545 	maddn.d		f0, f0, f2
   2546 
   2547 	rfr		xl, f0
   2548 	rfrd		xh, f0
   2549 
   2550 	leaf_return
   2551 
   2552 #endif /* L_recipdf2 */
   2553 #endif /* XCHAL_HAVE_DFP_RECIP */
   2554 
   2555 #if XCHAL_HAVE_DFP_RSQRT
   2556 #ifdef L_rsqrtdf2
   2557 	/* Reciprocal square root */
   2558 
   2559 	.align	4
   2560 	.global	__rsqrtdf2
   2561 	.type	__rsqrtdf2, @function
   2562 __rsqrtdf2:
   2563 	leaf_entry	sp, 16
   2564 
   2565 	wfrd		f1, xh, xl
   2566 
   2567 	rsqrt0.d	f0, f1
   2568 	mul.d		f2, f1, f0
   2569 	const.d		f3, 3
   2570 	mul.d		f4, f3, f0
   2571 	const.d		f5, 1
   2572 	msub.d		f5, f2, f0
   2573 	maddn.d		f0, f4, f5
   2574 	const.d		f2, 1
   2575 	mul.d		f4, f1, f0
   2576 	mul.d		f5, f3, f0
   2577 	msub.d		f2, f4, f0
   2578 	maddn.d		f0, f5, f2
   2579 	const.d		f2, 1
   2580 	mul.d		f1, f1, f0
   2581 	mul.d		f3, f3, f0
   2582 	msub.d		f2, f1, f0
   2583 	maddn.d		f0, f3, f2
   2584 
   2585 	rfr		xl, f0
   2586 	rfrd		xh, f0
   2587 
   2588 	leaf_return
   2589 
   2590 #endif /* L_rsqrtdf2 */
   2591 #endif /* XCHAL_HAVE_DFP_RSQRT */
   2592