ieee754-sf.S revision 1.1.1.4.2.1 1 /* IEEE-754 single-precision functions for Xtensa
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #ifdef __XTENSA_EB__
27 #define xh a2
28 #define xl a3
29 #define yh a4
30 #define yl a5
31 #else
32 #define xh a3
33 #define xl a2
34 #define yh a5
35 #define yl a4
36 #endif
37
38 /* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
42
43 #ifdef L_negsf2
44
45 .align 4
46 .global __negsf2
47 .type __negsf2, @function
48 __negsf2:
49 leaf_entry sp, 16
50 movi a4, 0x80000000
51 xor a2, a2, a4
52 leaf_return
53
54 #endif /* L_negsf2 */
55
56 #ifdef L_addsubsf3
57
58 .literal_position
59 /* Addition */
60 __addsf3_aux:
61
62 /* Handle NaNs and Infinities. (This code is placed before the
63 start of the function just to keep it in range of the limited
64 branch displacements.) */
65
66 .Ladd_xnan_or_inf:
67 /* If y is neither Infinity nor NaN, return x. */
68 bnall a3, a6, .Ladd_return_nan_or_inf
69 /* If x is a NaN, return it. Otherwise, return y. */
70 slli a7, a2, 9
71 bnez a7, .Ladd_return_nan
72
73 .Ladd_ynan_or_inf:
74 /* Return y. */
75 mov a2, a3
76
77 .Ladd_return_nan_or_inf:
78 slli a7, a2, 9
79 bnez a7, .Ladd_return_nan
80 leaf_return
81
82 .Ladd_return_nan:
83 movi a6, 0x400000 /* make it a quiet NaN */
84 or a2, a2, a6
85 leaf_return
86
87 .Ladd_opposite_signs:
88 /* Operand signs differ. Do a subtraction. */
89 slli a7, a6, 8
90 xor a3, a3, a7
91 j .Lsub_same_sign
92
93 .align 4
94 .global __addsf3
95 .type __addsf3, @function
96 __addsf3:
97 leaf_entry sp, 16
98 movi a6, 0x7f800000
99
100 /* Check if the two operands have the same sign. */
101 xor a7, a2, a3
102 bltz a7, .Ladd_opposite_signs
103
104 .Ladd_same_sign:
105 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
106 ball a2, a6, .Ladd_xnan_or_inf
107 ball a3, a6, .Ladd_ynan_or_inf
108
109 /* Compare the exponents. The smaller operand will be shifted
110 right by the exponent difference and added to the larger
111 one. */
112 extui a7, a2, 23, 9
113 extui a8, a3, 23, 9
114 bltu a7, a8, .Ladd_shiftx
115
116 .Ladd_shifty:
117 /* Check if the smaller (or equal) exponent is zero. */
118 bnone a3, a6, .Ladd_yexpzero
119
120 /* Replace y sign/exponent with 0x008. */
121 or a3, a3, a6
122 slli a3, a3, 8
123 srli a3, a3, 8
124
125 .Ladd_yexpdiff:
126 /* Compute the exponent difference. */
127 sub a10, a7, a8
128
129 /* Exponent difference > 32 -- just return the bigger value. */
130 bgeui a10, 32, 1f
131
132 /* Shift y right by the exponent difference. Any bits that are
133 shifted out of y are saved in a9 for rounding the result. */
134 ssr a10
135 movi a9, 0
136 src a9, a3, a9
137 srl a3, a3
138
139 /* Do the addition. */
140 add a2, a2, a3
141
142 /* Check if the add overflowed into the exponent. */
143 extui a10, a2, 23, 9
144 beq a10, a7, .Ladd_round
145 mov a8, a7
146 j .Ladd_carry
147
148 .Ladd_yexpzero:
149 /* y is a subnormal value. Replace its sign/exponent with zero,
150 i.e., no implicit "1.0", and increment the apparent exponent
151 because subnormals behave as if they had the minimum (nonzero)
152 exponent. Test for the case when both exponents are zero. */
153 slli a3, a3, 9
154 srli a3, a3, 9
155 bnone a2, a6, .Ladd_bothexpzero
156 addi a8, a8, 1
157 j .Ladd_yexpdiff
158
159 .Ladd_bothexpzero:
160 /* Both exponents are zero. Handle this as a special case. There
161 is no need to shift or round, and the normal code for handling
162 a carry into the exponent field will not work because it
163 assumes there is an implicit "1.0" that needs to be added. */
164 add a2, a2, a3
165 1: leaf_return
166
167 .Ladd_xexpzero:
168 /* Same as "yexpzero" except skip handling the case when both
169 exponents are zero. */
170 slli a2, a2, 9
171 srli a2, a2, 9
172 addi a7, a7, 1
173 j .Ladd_xexpdiff
174
175 .Ladd_shiftx:
176 /* Same thing as the "shifty" code, but with x and y swapped. Also,
177 because the exponent difference is always nonzero in this version,
178 the shift sequence can use SLL and skip loading a constant zero. */
179 bnone a2, a6, .Ladd_xexpzero
180
181 or a2, a2, a6
182 slli a2, a2, 8
183 srli a2, a2, 8
184
185 .Ladd_xexpdiff:
186 sub a10, a8, a7
187 bgeui a10, 32, .Ladd_returny
188
189 ssr a10
190 sll a9, a2
191 srl a2, a2
192
193 add a2, a2, a3
194
195 /* Check if the add overflowed into the exponent. */
196 extui a10, a2, 23, 9
197 bne a10, a8, .Ladd_carry
198
199 .Ladd_round:
200 /* Round up if the leftover fraction is >= 1/2. */
201 bgez a9, 1f
202 addi a2, a2, 1
203
204 /* Check if the leftover fraction is exactly 1/2. */
205 slli a9, a9, 1
206 beqz a9, .Ladd_exactlyhalf
207 1: leaf_return
208
209 .Ladd_returny:
210 mov a2, a3
211 leaf_return
212
213 .Ladd_carry:
214 /* The addition has overflowed into the exponent field, so the
215 value needs to be renormalized. The mantissa of the result
216 can be recovered by subtracting the original exponent and
217 adding 0x800000 (which is the explicit "1.0" for the
218 mantissa of the non-shifted operand -- the "1.0" for the
219 shifted operand was already added). The mantissa can then
220 be shifted right by one bit. The explicit "1.0" of the
221 shifted mantissa then needs to be replaced by the exponent,
222 incremented by one to account for the normalizing shift.
223 It is faster to combine these operations: do the shift first
224 and combine the additions and subtractions. If x is the
225 original exponent, the result is:
226 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
227 or:
228 shifted mantissa + ((x + 1) << 22)
229 Note that the exponent is incremented here by leaving the
230 explicit "1.0" of the mantissa in the exponent field. */
231
232 /* Shift x right by one bit. Save the lsb. */
233 mov a10, a2
234 srli a2, a2, 1
235
236 /* See explanation above. The original exponent is in a8. */
237 addi a8, a8, 1
238 slli a8, a8, 22
239 add a2, a2, a8
240
241 /* Return an Infinity if the exponent overflowed. */
242 ball a2, a6, .Ladd_infinity
243
244 /* Same thing as the "round" code except the msb of the leftover
245 fraction is bit 0 of a10, with the rest of the fraction in a9. */
246 bbci.l a10, 0, 1f
247 addi a2, a2, 1
248 beqz a9, .Ladd_exactlyhalf
249 1: leaf_return
250
251 .Ladd_infinity:
252 /* Clear the mantissa. */
253 srli a2, a2, 23
254 slli a2, a2, 23
255
256 /* The sign bit may have been lost in a carry-out. Put it back. */
257 slli a8, a8, 1
258 or a2, a2, a8
259 leaf_return
260
261 .Ladd_exactlyhalf:
262 /* Round down to the nearest even value. */
263 srli a2, a2, 1
264 slli a2, a2, 1
265 leaf_return
266
267
268 /* Subtraction */
269 __subsf3_aux:
270
271 /* Handle NaNs and Infinities. (This code is placed before the
272 start of the function just to keep it in range of the limited
273 branch displacements.) */
274
275 .Lsub_xnan_or_inf:
276 /* If y is neither Infinity nor NaN, return x. */
277 bnall a3, a6, .Lsub_return_nan_or_inf
278 /* Both x and y are either NaN or Inf, so the result is NaN. */
279
280 .Lsub_return_nan:
281 movi a4, 0x400000 /* make it a quiet NaN */
282 or a2, a2, a4
283 leaf_return
284
285 .Lsub_ynan_or_inf:
286 /* Negate y and return it. */
287 slli a7, a6, 8
288 xor a2, a3, a7
289
290 .Lsub_return_nan_or_inf:
291 slli a7, a2, 9
292 bnez a7, .Lsub_return_nan
293 leaf_return
294
295 .Lsub_opposite_signs:
296 /* Operand signs differ. Do an addition. */
297 slli a7, a6, 8
298 xor a3, a3, a7
299 j .Ladd_same_sign
300
301 .align 4
302 .global __subsf3
303 .type __subsf3, @function
304 __subsf3:
305 leaf_entry sp, 16
306 movi a6, 0x7f800000
307
308 /* Check if the two operands have the same sign. */
309 xor a7, a2, a3
310 bltz a7, .Lsub_opposite_signs
311
312 .Lsub_same_sign:
313 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
314 ball a2, a6, .Lsub_xnan_or_inf
315 ball a3, a6, .Lsub_ynan_or_inf
316
317 /* Compare the operands. In contrast to addition, the entire
318 value matters here. */
319 extui a7, a2, 23, 8
320 extui a8, a3, 23, 8
321 bltu a2, a3, .Lsub_xsmaller
322
323 .Lsub_ysmaller:
324 /* Check if the smaller (or equal) exponent is zero. */
325 bnone a3, a6, .Lsub_yexpzero
326
327 /* Replace y sign/exponent with 0x008. */
328 or a3, a3, a6
329 slli a3, a3, 8
330 srli a3, a3, 8
331
332 .Lsub_yexpdiff:
333 /* Compute the exponent difference. */
334 sub a10, a7, a8
335
336 /* Exponent difference > 32 -- just return the bigger value. */
337 bgeui a10, 32, 1f
338
339 /* Shift y right by the exponent difference. Any bits that are
340 shifted out of y are saved in a9 for rounding the result. */
341 ssr a10
342 movi a9, 0
343 src a9, a3, a9
344 srl a3, a3
345
346 sub a2, a2, a3
347
348 /* Subtract the leftover bits in a9 from zero and propagate any
349 borrow from a2. */
350 neg a9, a9
351 addi a10, a2, -1
352 movnez a2, a10, a9
353
354 /* Check if the subtract underflowed into the exponent. */
355 extui a10, a2, 23, 8
356 beq a10, a7, .Lsub_round
357 j .Lsub_borrow
358
359 .Lsub_yexpzero:
360 /* Return zero if the inputs are equal. (For the non-subnormal
361 case, subtracting the "1.0" will cause a borrow from the exponent
362 and this case can be detected when handling the borrow.) */
363 beq a2, a3, .Lsub_return_zero
364
365 /* y is a subnormal value. Replace its sign/exponent with zero,
366 i.e., no implicit "1.0". Unless x is also a subnormal, increment
367 y's apparent exponent because subnormals behave as if they had
368 the minimum (nonzero) exponent. */
369 slli a3, a3, 9
370 srli a3, a3, 9
371 bnone a2, a6, .Lsub_yexpdiff
372 addi a8, a8, 1
373 j .Lsub_yexpdiff
374
375 .Lsub_returny:
376 /* Negate and return y. */
377 slli a7, a6, 8
378 xor a2, a3, a7
379 1: leaf_return
380
381 .Lsub_xsmaller:
382 /* Same thing as the "ysmaller" code, but with x and y swapped and
383 with y negated. */
384 bnone a2, a6, .Lsub_xexpzero
385
386 or a2, a2, a6
387 slli a2, a2, 8
388 srli a2, a2, 8
389
390 .Lsub_xexpdiff:
391 sub a10, a8, a7
392 bgeui a10, 32, .Lsub_returny
393
394 ssr a10
395 movi a9, 0
396 src a9, a2, a9
397 srl a2, a2
398
399 /* Negate y. */
400 slli a11, a6, 8
401 xor a3, a3, a11
402
403 sub a2, a3, a2
404
405 neg a9, a9
406 addi a10, a2, -1
407 movnez a2, a10, a9
408
409 /* Check if the subtract underflowed into the exponent. */
410 extui a10, a2, 23, 8
411 bne a10, a8, .Lsub_borrow
412
413 .Lsub_round:
414 /* Round up if the leftover fraction is >= 1/2. */
415 bgez a9, 1f
416 addi a2, a2, 1
417
418 /* Check if the leftover fraction is exactly 1/2. */
419 slli a9, a9, 1
420 beqz a9, .Lsub_exactlyhalf
421 1: leaf_return
422
423 .Lsub_xexpzero:
424 /* Same as "yexpzero". */
425 beq a2, a3, .Lsub_return_zero
426 slli a2, a2, 9
427 srli a2, a2, 9
428 bnone a3, a6, .Lsub_xexpdiff
429 addi a7, a7, 1
430 j .Lsub_xexpdiff
431
432 .Lsub_return_zero:
433 movi a2, 0
434 leaf_return
435
436 .Lsub_borrow:
437 /* The subtraction has underflowed into the exponent field, so the
438 value needs to be renormalized. Shift the mantissa left as
439 needed to remove any leading zeros and adjust the exponent
440 accordingly. If the exponent is not large enough to remove
441 all the leading zeros, the result will be a subnormal value. */
442
443 slli a8, a2, 9
444 beqz a8, .Lsub_xzero
445 do_nsau a6, a8, a7, a11
446 srli a8, a8, 9
447 bge a6, a10, .Lsub_subnormal
448 addi a6, a6, 1
449
450 .Lsub_normalize_shift:
451 /* Shift the mantissa (a8/a9) left by a6. */
452 ssl a6
453 src a8, a8, a9
454 sll a9, a9
455
456 /* Combine the shifted mantissa with the sign and exponent,
457 decrementing the exponent by a6. (The exponent has already
458 been decremented by one due to the borrow from the subtraction,
459 but adding the mantissa will increment the exponent by one.) */
460 srli a2, a2, 23
461 sub a2, a2, a6
462 slli a2, a2, 23
463 add a2, a2, a8
464 j .Lsub_round
465
466 .Lsub_exactlyhalf:
467 /* Round down to the nearest even value. */
468 srli a2, a2, 1
469 slli a2, a2, 1
470 leaf_return
471
472 .Lsub_xzero:
473 /* If there was a borrow from the exponent, and the mantissa and
474 guard digits are all zero, then the inputs were equal and the
475 result should be zero. */
476 beqz a9, .Lsub_return_zero
477
478 /* Only the guard digit is nonzero. Shift by min(24, a10). */
479 addi a11, a10, -24
480 movi a6, 24
481 movltz a6, a10, a11
482 j .Lsub_normalize_shift
483
484 .Lsub_subnormal:
485 /* The exponent is too small to shift away all the leading zeros.
486 Set a6 to the current exponent (which has already been
487 decremented by the borrow) so that the exponent of the result
488 will be zero. Do not add 1 to a6 in this case, because: (1)
489 adding the mantissa will not increment the exponent, so there is
490 no need to subtract anything extra from the exponent to
491 compensate, and (2) the effective exponent of a subnormal is 1
492 not 0 so the shift amount must be 1 smaller than normal. */
493 mov a6, a10
494 j .Lsub_normalize_shift
495
496 #endif /* L_addsubsf3 */
497
498 #ifdef L_mulsf3
499
500 /* Multiplication */
501 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
502 #define XCHAL_NO_MUL 1
503 #endif
504
505 .literal_position
506 __mulsf3_aux:
507
508 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
509 (This code is placed before the start of the function just to
510 keep it in range of the limited branch displacements.) */
511
512 .Lmul_xexpzero:
513 /* Clear the sign bit of x. */
514 slli a2, a2, 1
515 srli a2, a2, 1
516
517 /* If x is zero, return zero. */
518 beqz a2, .Lmul_return_zero
519
520 /* Normalize x. Adjust the exponent in a8. */
521 do_nsau a10, a2, a11, a12
522 addi a10, a10, -8
523 ssl a10
524 sll a2, a2
525 movi a8, 1
526 sub a8, a8, a10
527 j .Lmul_xnormalized
528
529 .Lmul_yexpzero:
530 /* Clear the sign bit of y. */
531 slli a3, a3, 1
532 srli a3, a3, 1
533
534 /* If y is zero, return zero. */
535 beqz a3, .Lmul_return_zero
536
537 /* Normalize y. Adjust the exponent in a9. */
538 do_nsau a10, a3, a11, a12
539 addi a10, a10, -8
540 ssl a10
541 sll a3, a3
542 movi a9, 1
543 sub a9, a9, a10
544 j .Lmul_ynormalized
545
546 .Lmul_return_zero:
547 /* Return zero with the appropriate sign bit. */
548 srli a2, a7, 31
549 slli a2, a2, 31
550 j .Lmul_done
551
552 .Lmul_xnan_or_inf:
553 /* If y is zero, return NaN. */
554 slli a8, a3, 1
555 beqz a8, .Lmul_return_nan
556 /* If y is NaN, return y. */
557 bnall a3, a6, .Lmul_returnx
558 slli a8, a3, 9
559 beqz a8, .Lmul_returnx
560
561 .Lmul_returny:
562 mov a2, a3
563
564 .Lmul_returnx:
565 slli a8, a2, 9
566 bnez a8, .Lmul_return_nan
567 /* Set the sign bit and return. */
568 extui a7, a7, 31, 1
569 slli a2, a2, 1
570 ssai 1
571 src a2, a7, a2
572 j .Lmul_done
573
574 .Lmul_ynan_or_inf:
575 /* If x is zero, return NaN. */
576 slli a8, a2, 1
577 bnez a8, .Lmul_returny
578 mov a2, a3
579
580 .Lmul_return_nan:
581 movi a4, 0x400000 /* make it a quiet NaN */
582 or a2, a2, a4
583 j .Lmul_done
584
585 .align 4
586 .global __mulsf3
587 .type __mulsf3, @function
588 __mulsf3:
589 #if __XTENSA_CALL0_ABI__
590 leaf_entry sp, 32
591 addi sp, sp, -32
592 s32i a12, sp, 16
593 s32i a13, sp, 20
594 s32i a14, sp, 24
595 s32i a15, sp, 28
596 #elif XCHAL_NO_MUL
597 /* This is not really a leaf function; allocate enough stack space
598 to allow CALL12s to a helper function. */
599 leaf_entry sp, 64
600 #else
601 leaf_entry sp, 32
602 #endif
603 movi a6, 0x7f800000
604
605 /* Get the sign of the result. */
606 xor a7, a2, a3
607
608 /* Check for NaN and infinity. */
609 ball a2, a6, .Lmul_xnan_or_inf
610 ball a3, a6, .Lmul_ynan_or_inf
611
612 /* Extract the exponents. */
613 extui a8, a2, 23, 8
614 extui a9, a3, 23, 8
615
616 beqz a8, .Lmul_xexpzero
617 .Lmul_xnormalized:
618 beqz a9, .Lmul_yexpzero
619 .Lmul_ynormalized:
620
621 /* Add the exponents. */
622 add a8, a8, a9
623
624 /* Replace sign/exponent fields with explicit "1.0". */
625 movi a10, 0xffffff
626 or a2, a2, a6
627 and a2, a2, a10
628 or a3, a3, a6
629 and a3, a3, a10
630
631 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
632
633 #if XCHAL_HAVE_MUL32_HIGH
634
635 mull a6, a2, a3
636 muluh a2, a2, a3
637
638 #else
639
640 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
641 products. These partial products are:
642
643 0 xl * yl
644
645 1 xl * yh
646 2 xh * yl
647
648 3 xh * yh
649
650 If using the Mul16 or Mul32 multiplier options, these input
651 chunks must be stored in separate registers. For Mac16, the
652 UMUL.AA.* opcodes can specify that the inputs come from either
653 half of the registers, so there is no need to shift them out
654 ahead of time. If there is no multiply hardware, the 16-bit
655 chunks can be extracted when setting up the arguments to the
656 separate multiply function. */
657
658 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
659 /* Calling a separate multiply function will clobber a0 and requires
660 use of a8 as a temporary, so save those values now. (The function
661 uses a custom ABI so nothing else needs to be saved.) */
662 s32i a0, sp, 0
663 s32i a8, sp, 4
664 #endif
665
666 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
667
668 #define a2h a4
669 #define a3h a5
670
671 /* Get the high halves of the inputs into registers. */
672 srli a2h, a2, 16
673 srli a3h, a3, 16
674
675 #define a2l a2
676 #define a3l a3
677
678 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
679 /* Clear the high halves of the inputs. This does not matter
680 for MUL16 because the high bits are ignored. */
681 extui a2, a2, 0, 16
682 extui a3, a3, 0, 16
683 #endif
684 #endif /* MUL16 || MUL32 */
685
686
687 #if XCHAL_HAVE_MUL16
688
689 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
690 mul16u dst, xreg ## xhalf, yreg ## yhalf
691
692 #elif XCHAL_HAVE_MUL32
693
694 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
695 mull dst, xreg ## xhalf, yreg ## yhalf
696
697 #elif XCHAL_HAVE_MAC16
698
699 /* The preprocessor insists on inserting a space when concatenating after
700 a period in the definition of do_mul below. These macros are a workaround
701 using underscores instead of periods when doing the concatenation. */
702 #define umul_aa_ll umul.aa.ll
703 #define umul_aa_lh umul.aa.lh
704 #define umul_aa_hl umul.aa.hl
705 #define umul_aa_hh umul.aa.hh
706
707 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
708 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
709 rsr dst, ACCLO
710
711 #else /* no multiply hardware */
712
713 #define set_arg_l(dst, src) \
714 extui dst, src, 0, 16
715 #define set_arg_h(dst, src) \
716 srli dst, src, 16
717
718 #if __XTENSA_CALL0_ABI__
719 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
720 set_arg_ ## xhalf (a13, xreg); \
721 set_arg_ ## yhalf (a14, yreg); \
722 call0 .Lmul_mulsi3; \
723 mov dst, a12
724 #else
725 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
726 set_arg_ ## xhalf (a14, xreg); \
727 set_arg_ ## yhalf (a15, yreg); \
728 call12 .Lmul_mulsi3; \
729 mov dst, a14
730 #endif /* __XTENSA_CALL0_ABI__ */
731
732 #endif /* no multiply hardware */
733
734 /* Add pp1 and pp2 into a6 with carry-out in a9. */
735 do_mul(a6, a2, l, a3, h) /* pp 1 */
736 do_mul(a11, a2, h, a3, l) /* pp 2 */
737 movi a9, 0
738 add a6, a6, a11
739 bgeu a6, a11, 1f
740 addi a9, a9, 1
741 1:
742 /* Shift the high half of a9/a6 into position in a9. Note that
743 this value can be safely incremented without any carry-outs. */
744 ssai 16
745 src a9, a9, a6
746
747 /* Compute the low word into a6. */
748 do_mul(a11, a2, l, a3, l) /* pp 0 */
749 sll a6, a6
750 add a6, a6, a11
751 bgeu a6, a11, 1f
752 addi a9, a9, 1
753 1:
754 /* Compute the high word into a2. */
755 do_mul(a2, a2, h, a3, h) /* pp 3 */
756 add a2, a2, a9
757
758 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
759 /* Restore values saved on the stack during the multiplication. */
760 l32i a0, sp, 0
761 l32i a8, sp, 4
762 #endif
763 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
764
765 /* Shift left by 9 bits, unless there was a carry-out from the
766 multiply, in which case, shift by 8 bits and increment the
767 exponent. */
768 movi a4, 9
769 srli a5, a2, 24 - 9
770 beqz a5, 1f
771 addi a4, a4, -1
772 addi a8, a8, 1
773 1: ssl a4
774 src a2, a2, a6
775 sll a6, a6
776
777 /* Subtract the extra bias from the exponent sum (plus one to account
778 for the explicit "1.0" of the mantissa that will be added to the
779 exponent in the final result). */
780 movi a4, 0x80
781 sub a8, a8, a4
782
783 /* Check for over/underflow. The value in a8 is one less than the
784 final exponent, so values in the range 0..fd are OK here. */
785 movi a4, 0xfe
786 bgeu a8, a4, .Lmul_overflow
787
788 .Lmul_round:
789 /* Round. */
790 bgez a6, .Lmul_rounded
791 addi a2, a2, 1
792 slli a6, a6, 1
793 beqz a6, .Lmul_exactlyhalf
794
795 .Lmul_rounded:
796 /* Add the exponent to the mantissa. */
797 slli a8, a8, 23
798 add a2, a2, a8
799
800 .Lmul_addsign:
801 /* Add the sign bit. */
802 srli a7, a7, 31
803 slli a7, a7, 31
804 or a2, a2, a7
805
806 .Lmul_done:
807 #if __XTENSA_CALL0_ABI__
808 l32i a12, sp, 16
809 l32i a13, sp, 20
810 l32i a14, sp, 24
811 l32i a15, sp, 28
812 addi sp, sp, 32
813 #endif
814 leaf_return
815
816 .Lmul_exactlyhalf:
817 /* Round down to the nearest even value. */
818 srli a2, a2, 1
819 slli a2, a2, 1
820 j .Lmul_rounded
821
822 .Lmul_overflow:
823 bltz a8, .Lmul_underflow
824 /* Return +/- Infinity. */
825 movi a8, 0xff
826 slli a2, a8, 23
827 j .Lmul_addsign
828
829 .Lmul_underflow:
830 /* Create a subnormal value, where the exponent field contains zero,
831 but the effective exponent is 1. The value of a8 is one less than
832 the actual exponent, so just negate it to get the shift amount. */
833 neg a8, a8
834 mov a9, a6
835 ssr a8
836 bgeui a8, 32, .Lmul_flush_to_zero
837
838 /* Shift a2 right. Any bits that are shifted out of a2 are saved
839 in a6 (combined with the shifted-out bits currently in a6) for
840 rounding the result. */
841 sll a6, a2
842 srl a2, a2
843
844 /* Set the exponent to zero. */
845 movi a8, 0
846
847 /* Pack any nonzero bits shifted out into a6. */
848 beqz a9, .Lmul_round
849 movi a9, 1
850 or a6, a6, a9
851 j .Lmul_round
852
853 .Lmul_flush_to_zero:
854 /* Return zero with the appropriate sign bit. */
855 srli a2, a7, 31
856 slli a2, a2, 31
857 j .Lmul_done
858
859 #if XCHAL_NO_MUL
860
861 /* For Xtensa processors with no multiply hardware, this simplified
862 version of _mulsi3 is used for multiplying 16-bit chunks of
863 the floating-point mantissas. When using CALL0, this function
864 uses a custom ABI: the inputs are passed in a13 and a14, the
865 result is returned in a12, and a8 and a15 are clobbered. */
866 .align 4
867 .Lmul_mulsi3:
868 leaf_entry sp, 16
869 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
870 movi \dst, 0
871 1: add \tmp1, \src2, \dst
872 extui \tmp2, \src1, 0, 1
873 movnez \dst, \tmp1, \tmp2
874
875 do_addx2 \tmp1, \src2, \dst, \tmp1
876 extui \tmp2, \src1, 1, 1
877 movnez \dst, \tmp1, \tmp2
878
879 do_addx4 \tmp1, \src2, \dst, \tmp1
880 extui \tmp2, \src1, 2, 1
881 movnez \dst, \tmp1, \tmp2
882
883 do_addx8 \tmp1, \src2, \dst, \tmp1
884 extui \tmp2, \src1, 3, 1
885 movnez \dst, \tmp1, \tmp2
886
887 srli \src1, \src1, 4
888 slli \src2, \src2, 4
889 bnez \src1, 1b
890 .endm
891 #if __XTENSA_CALL0_ABI__
892 mul_mulsi3_body a12, a13, a14, a15, a8
893 #else
894 /* The result will be written into a2, so save that argument in a4. */
895 mov a4, a2
896 mul_mulsi3_body a2, a4, a3, a5, a6
897 #endif
898 leaf_return
899 #endif /* XCHAL_NO_MUL */
900 #endif /* L_mulsf3 */
901
902 #ifdef L_divsf3
903
904 .literal_position
905 /* Division */
906 __divsf3_aux:
907
908 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
909 (This code is placed before the start of the function just to
910 keep it in range of the limited branch displacements.) */
911
912 .Ldiv_yexpzero:
913 /* Clear the sign bit of y. */
914 slli a3, a3, 1
915 srli a3, a3, 1
916
917 /* Check for division by zero. */
918 beqz a3, .Ldiv_yzero
919
920 /* Normalize y. Adjust the exponent in a9. */
921 do_nsau a10, a3, a4, a5
922 addi a10, a10, -8
923 ssl a10
924 sll a3, a3
925 movi a9, 1
926 sub a9, a9, a10
927 j .Ldiv_ynormalized
928
929 .Ldiv_yzero:
930 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
931 slli a4, a2, 1
932 srli a4, a4, 1
933 srli a2, a7, 31
934 slli a2, a2, 31
935 or a2, a2, a6
936 bnez a4, 1f
937 movi a4, 0x400000 /* make it a quiet NaN */
938 or a2, a2, a4
939 1: leaf_return
940
941 .Ldiv_xexpzero:
942 /* Clear the sign bit of x. */
943 slli a2, a2, 1
944 srli a2, a2, 1
945
946 /* If x is zero, return zero. */
947 beqz a2, .Ldiv_return_zero
948
949 /* Normalize x. Adjust the exponent in a8. */
950 do_nsau a10, a2, a4, a5
951 addi a10, a10, -8
952 ssl a10
953 sll a2, a2
954 movi a8, 1
955 sub a8, a8, a10
956 j .Ldiv_xnormalized
957
958 .Ldiv_return_zero:
959 /* Return zero with the appropriate sign bit. */
960 srli a2, a7, 31
961 slli a2, a2, 31
962 leaf_return
963
964 .Ldiv_xnan_or_inf:
965 /* Set the sign bit of the result. */
966 srli a7, a3, 31
967 slli a7, a7, 31
968 xor a2, a2, a7
969 /* If y is NaN or Inf, return NaN. */
970 ball a3, a6, .Ldiv_return_nan
971 slli a7, a2, 9
972 bnez a7, .Ldiv_return_nan
973 leaf_return
974
975 .Ldiv_ynan_or_inf:
976 /* If y is Infinity, return zero. */
977 slli a8, a3, 9
978 beqz a8, .Ldiv_return_zero
979 /* y is NaN; return it. */
980 mov a2, a3
981
982 .Ldiv_return_nan:
983 movi a4, 0x400000 /* make it a quiet NaN */
984 or a2, a2, a4
985 leaf_return
986
987 .align 4
988 .global __divsf3
989 .type __divsf3, @function
990 __divsf3:
991 leaf_entry sp, 16
992 movi a6, 0x7f800000
993
994 /* Get the sign of the result. */
995 xor a7, a2, a3
996
997 /* Check for NaN and infinity. */
998 ball a2, a6, .Ldiv_xnan_or_inf
999 ball a3, a6, .Ldiv_ynan_or_inf
1000
1001 /* Extract the exponents. */
1002 extui a8, a2, 23, 8
1003 extui a9, a3, 23, 8
1004
1005 beqz a9, .Ldiv_yexpzero
1006 .Ldiv_ynormalized:
1007 beqz a8, .Ldiv_xexpzero
1008 .Ldiv_xnormalized:
1009
1010 /* Subtract the exponents. */
1011 sub a8, a8, a9
1012
1013 /* Replace sign/exponent fields with explicit "1.0". */
1014 movi a10, 0xffffff
1015 or a2, a2, a6
1016 and a2, a2, a10
1017 or a3, a3, a6
1018 and a3, a3, a10
1019
1020 /* The first digit of the mantissa division must be a one.
1021 Shift x (and adjust the exponent) as needed to make this true. */
1022 bltu a3, a2, 1f
1023 slli a2, a2, 1
1024 addi a8, a8, -1
1025 1:
1026 /* Do the first subtraction and shift. */
1027 sub a2, a2, a3
1028 slli a2, a2, 1
1029
1030 /* Put the quotient into a10. */
1031 movi a10, 1
1032
1033 /* Divide one bit at a time for 23 bits. */
1034 movi a9, 23
1035 #if XCHAL_HAVE_LOOPS
1036 loop a9, .Ldiv_loopend
1037 #endif
1038 .Ldiv_loop:
1039 /* Shift the quotient << 1. */
1040 slli a10, a10, 1
1041
1042 /* Is this digit a 0 or 1? */
1043 bltu a2, a3, 1f
1044
1045 /* Output a 1 and subtract. */
1046 addi a10, a10, 1
1047 sub a2, a2, a3
1048
1049 /* Shift the dividend << 1. */
1050 1: slli a2, a2, 1
1051
1052 #if !XCHAL_HAVE_LOOPS
1053 addi a9, a9, -1
1054 bnez a9, .Ldiv_loop
1055 #endif
1056 .Ldiv_loopend:
1057
1058 /* Add the exponent bias (less one to account for the explicit "1.0"
1059 of the mantissa that will be added to the exponent in the final
1060 result). */
1061 addi a8, a8, 0x7e
1062
1063 /* Check for over/underflow. The value in a8 is one less than the
1064 final exponent, so values in the range 0..fd are OK here. */
1065 movi a4, 0xfe
1066 bgeu a8, a4, .Ldiv_overflow
1067
1068 .Ldiv_round:
1069 /* Round. The remainder (<< 1) is in a2. */
1070 bltu a2, a3, .Ldiv_rounded
1071 addi a10, a10, 1
1072 beq a2, a3, .Ldiv_exactlyhalf
1073
1074 .Ldiv_rounded:
1075 /* Add the exponent to the mantissa. */
1076 slli a8, a8, 23
1077 add a2, a10, a8
1078
1079 .Ldiv_addsign:
1080 /* Add the sign bit. */
1081 srli a7, a7, 31
1082 slli a7, a7, 31
1083 or a2, a2, a7
1084 leaf_return
1085
1086 .Ldiv_overflow:
1087 bltz a8, .Ldiv_underflow
1088 /* Return +/- Infinity. */
1089 addi a8, a4, 1 /* 0xff */
1090 slli a2, a8, 23
1091 j .Ldiv_addsign
1092
1093 .Ldiv_exactlyhalf:
1094 /* Remainder is exactly half the divisor. Round even. */
1095 srli a10, a10, 1
1096 slli a10, a10, 1
1097 j .Ldiv_rounded
1098
1099 .Ldiv_underflow:
1100 /* Create a subnormal value, where the exponent field contains zero,
1101 but the effective exponent is 1. The value of a8 is one less than
1102 the actual exponent, so just negate it to get the shift amount. */
1103 neg a8, a8
1104 ssr a8
1105 bgeui a8, 32, .Ldiv_flush_to_zero
1106
1107 /* Shift a10 right. Any bits that are shifted out of a10 are
1108 saved in a6 for rounding the result. */
1109 sll a6, a10
1110 srl a10, a10
1111
1112 /* Set the exponent to zero. */
1113 movi a8, 0
1114
1115 /* Pack any nonzero remainder (in a2) into a6. */
1116 beqz a2, 1f
1117 movi a9, 1
1118 or a6, a6, a9
1119
1120 /* Round a10 based on the bits shifted out into a6. */
1121 1: bgez a6, .Ldiv_rounded
1122 addi a10, a10, 1
1123 slli a6, a6, 1
1124 bnez a6, .Ldiv_rounded
1125 srli a10, a10, 1
1126 slli a10, a10, 1
1127 j .Ldiv_rounded
1128
1129 .Ldiv_flush_to_zero:
1130 /* Return zero with the appropriate sign bit. */
1131 srli a2, a7, 31
1132 slli a2, a2, 31
1133 leaf_return
1134
1135 #endif /* L_divsf3 */
1136
1137 #ifdef L_cmpsf2
1138
1139 /* Equal and Not Equal */
1140
1141 .align 4
1142 .global __eqsf2
1143 .global __nesf2
1144 .set __nesf2, __eqsf2
1145 .type __eqsf2, @function
1146 __eqsf2:
1147 leaf_entry sp, 16
1148 bne a2, a3, 4f
1149
1150 /* The values are equal but NaN != NaN. Check the exponent. */
1151 movi a6, 0x7f800000
1152 ball a2, a6, 3f
1153
1154 /* Equal. */
1155 movi a2, 0
1156 leaf_return
1157
1158 /* Not equal. */
1159 2: movi a2, 1
1160 leaf_return
1161
1162 /* Check if the mantissas are nonzero. */
1163 3: slli a7, a2, 9
1164 j 5f
1165
1166 /* Check if x and y are zero with different signs. */
1167 4: or a7, a2, a3
1168 slli a7, a7, 1
1169
1170 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1171 or x when exponent(x) = 0x7f8 and x == y. */
1172 5: movi a2, 0
1173 movi a3, 1
1174 movnez a2, a3, a7
1175 leaf_return
1176
1177
1178 /* Greater Than */
1179
1180 .align 4
1181 .global __gtsf2
1182 .type __gtsf2, @function
1183 __gtsf2:
1184 leaf_entry sp, 16
1185 movi a6, 0x7f800000
1186 ball a2, a6, 2f
1187 1: bnall a3, a6, .Lle_cmp
1188
1189 /* Check if y is a NaN. */
1190 slli a7, a3, 9
1191 beqz a7, .Lle_cmp
1192 movi a2, 0
1193 leaf_return
1194
1195 /* Check if x is a NaN. */
1196 2: slli a7, a2, 9
1197 beqz a7, 1b
1198 movi a2, 0
1199 leaf_return
1200
1201
1202 /* Less Than or Equal */
1203
1204 .align 4
1205 .global __lesf2
1206 .type __lesf2, @function
1207 __lesf2:
1208 leaf_entry sp, 16
1209 movi a6, 0x7f800000
1210 ball a2, a6, 2f
1211 1: bnall a3, a6, .Lle_cmp
1212
1213 /* Check if y is a NaN. */
1214 slli a7, a3, 9
1215 beqz a7, .Lle_cmp
1216 movi a2, 1
1217 leaf_return
1218
1219 /* Check if x is a NaN. */
1220 2: slli a7, a2, 9
1221 beqz a7, 1b
1222 movi a2, 1
1223 leaf_return
1224
1225 .Lle_cmp:
1226 /* Check if x and y have different signs. */
1227 xor a7, a2, a3
1228 bltz a7, .Lle_diff_signs
1229
1230 /* Check if x is negative. */
1231 bltz a2, .Lle_xneg
1232
1233 /* Check if x <= y. */
1234 bltu a3, a2, 5f
1235 4: movi a2, 0
1236 leaf_return
1237
1238 .Lle_xneg:
1239 /* Check if y <= x. */
1240 bgeu a2, a3, 4b
1241 5: movi a2, 1
1242 leaf_return
1243
1244 .Lle_diff_signs:
1245 bltz a2, 4b
1246
1247 /* Check if both x and y are zero. */
1248 or a7, a2, a3
1249 slli a7, a7, 1
1250 movi a2, 1
1251 movi a3, 0
1252 moveqz a2, a3, a7
1253 leaf_return
1254
1255
1256 /* Greater Than or Equal */
1257
1258 .align 4
1259 .global __gesf2
1260 .type __gesf2, @function
1261 __gesf2:
1262 leaf_entry sp, 16
1263 movi a6, 0x7f800000
1264 ball a2, a6, 2f
1265 1: bnall a3, a6, .Llt_cmp
1266
1267 /* Check if y is a NaN. */
1268 slli a7, a3, 9
1269 beqz a7, .Llt_cmp
1270 movi a2, -1
1271 leaf_return
1272
1273 /* Check if x is a NaN. */
1274 2: slli a7, a2, 9
1275 beqz a7, 1b
1276 movi a2, -1
1277 leaf_return
1278
1279
1280 /* Less Than */
1281
1282 .align 4
1283 .global __ltsf2
1284 .type __ltsf2, @function
1285 __ltsf2:
1286 leaf_entry sp, 16
1287 movi a6, 0x7f800000
1288 ball a2, a6, 2f
1289 1: bnall a3, a6, .Llt_cmp
1290
1291 /* Check if y is a NaN. */
1292 slli a7, a3, 9
1293 beqz a7, .Llt_cmp
1294 movi a2, 0
1295 leaf_return
1296
1297 /* Check if x is a NaN. */
1298 2: slli a7, a2, 9
1299 beqz a7, 1b
1300 movi a2, 0
1301 leaf_return
1302
1303 .Llt_cmp:
1304 /* Check if x and y have different signs. */
1305 xor a7, a2, a3
1306 bltz a7, .Llt_diff_signs
1307
1308 /* Check if x is negative. */
1309 bltz a2, .Llt_xneg
1310
1311 /* Check if x < y. */
1312 bgeu a2, a3, 5f
1313 4: movi a2, -1
1314 leaf_return
1315
1316 .Llt_xneg:
1317 /* Check if y < x. */
1318 bltu a3, a2, 4b
1319 5: movi a2, 0
1320 leaf_return
1321
1322 .Llt_diff_signs:
1323 bgez a2, 5b
1324
1325 /* Check if both x and y are nonzero. */
1326 or a7, a2, a3
1327 slli a7, a7, 1
1328 movi a2, 0
1329 movi a3, -1
1330 movnez a2, a3, a7
1331 leaf_return
1332
1333
1334 /* Unordered */
1335
1336 .align 4
1337 .global __unordsf2
1338 .type __unordsf2, @function
1339 __unordsf2:
1340 leaf_entry sp, 16
1341 movi a6, 0x7f800000
1342 ball a2, a6, 3f
1343 1: ball a3, a6, 4f
1344 2: movi a2, 0
1345 leaf_return
1346
1347 3: slli a7, a2, 9
1348 beqz a7, 1b
1349 movi a2, 1
1350 leaf_return
1351
1352 4: slli a7, a3, 9
1353 beqz a7, 2b
1354 movi a2, 1
1355 leaf_return
1356
1357 #endif /* L_cmpsf2 */
1358
1359 #ifdef L_fixsfsi
1360
1361 .align 4
1362 .global __fixsfsi
1363 .type __fixsfsi, @function
1364 __fixsfsi:
1365 leaf_entry sp, 16
1366
1367 /* Check for NaN and Infinity. */
1368 movi a6, 0x7f800000
1369 ball a2, a6, .Lfixsfsi_nan_or_inf
1370
1371 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1372 extui a4, a2, 23, 8
1373 addi a4, a4, -0x7e
1374 bgei a4, 32, .Lfixsfsi_maxint
1375 blti a4, 1, .Lfixsfsi_zero
1376
1377 /* Add explicit "1.0" and shift << 8. */
1378 or a7, a2, a6
1379 slli a5, a7, 8
1380
1381 /* Shift back to the right, based on the exponent. */
1382 ssl a4 /* shift by 32 - a4 */
1383 srl a5, a5
1384
1385 /* Negate the result if sign != 0. */
1386 neg a2, a5
1387 movgez a2, a5, a7
1388 leaf_return
1389
1390 .Lfixsfsi_nan_or_inf:
1391 /* Handle Infinity and NaN. */
1392 slli a4, a2, 9
1393 beqz a4, .Lfixsfsi_maxint
1394
1395 /* Translate NaN to +maxint. */
1396 movi a2, 0
1397
1398 .Lfixsfsi_maxint:
1399 slli a4, a6, 8 /* 0x80000000 */
1400 addi a5, a4, -1 /* 0x7fffffff */
1401 movgez a4, a5, a2
1402 mov a2, a4
1403 leaf_return
1404
1405 .Lfixsfsi_zero:
1406 movi a2, 0
1407 leaf_return
1408
1409 #endif /* L_fixsfsi */
1410
1411 #ifdef L_fixsfdi
1412
1413 .align 4
1414 .global __fixsfdi
1415 .type __fixsfdi, @function
1416 __fixsfdi:
1417 leaf_entry sp, 16
1418
1419 /* Check for NaN and Infinity. */
1420 movi a6, 0x7f800000
1421 ball a2, a6, .Lfixsfdi_nan_or_inf
1422
1423 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1424 extui a4, a2, 23, 8
1425 addi a4, a4, -0x7e
1426 bgei a4, 64, .Lfixsfdi_maxint
1427 blti a4, 1, .Lfixsfdi_zero
1428
1429 /* Add explicit "1.0" and shift << 8. */
1430 or a7, a2, a6
1431 slli xh, a7, 8
1432
1433 /* Shift back to the right, based on the exponent. */
1434 ssl a4 /* shift by 64 - a4 */
1435 bgei a4, 32, .Lfixsfdi_smallshift
1436 srl xl, xh
1437 movi xh, 0
1438
1439 .Lfixsfdi_shifted:
1440 /* Negate the result if sign != 0. */
1441 bgez a7, 1f
1442 neg xl, xl
1443 neg xh, xh
1444 beqz xl, 1f
1445 addi xh, xh, -1
1446 1: leaf_return
1447
1448 .Lfixsfdi_smallshift:
1449 movi xl, 0
1450 sll xl, xh
1451 srl xh, xh
1452 j .Lfixsfdi_shifted
1453
1454 .Lfixsfdi_nan_or_inf:
1455 /* Handle Infinity and NaN. */
1456 slli a4, a2, 9
1457 beqz a4, .Lfixsfdi_maxint
1458
1459 /* Translate NaN to +maxint. */
1460 movi a2, 0
1461
1462 .Lfixsfdi_maxint:
1463 slli a7, a6, 8 /* 0x80000000 */
1464 bgez a2, 1f
1465 mov xh, a7
1466 movi xl, 0
1467 leaf_return
1468
1469 1: addi xh, a7, -1 /* 0x7fffffff */
1470 movi xl, -1
1471 leaf_return
1472
1473 .Lfixsfdi_zero:
1474 movi xh, 0
1475 movi xl, 0
1476 leaf_return
1477
1478 #endif /* L_fixsfdi */
1479
1480 #ifdef L_fixunssfsi
1481
1482 .align 4
1483 .global __fixunssfsi
1484 .type __fixunssfsi, @function
1485 __fixunssfsi:
1486 leaf_entry sp, 16
1487
1488 /* Check for NaN and Infinity. */
1489 movi a6, 0x7f800000
1490 ball a2, a6, .Lfixunssfsi_nan_or_inf
1491
1492 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1493 extui a4, a2, 23, 8
1494 addi a4, a4, -0x7f
1495 bgei a4, 32, .Lfixunssfsi_maxint
1496 bltz a4, .Lfixunssfsi_zero
1497
1498 /* Add explicit "1.0" and shift << 8. */
1499 or a7, a2, a6
1500 slli a5, a7, 8
1501
1502 /* Shift back to the right, based on the exponent. */
1503 addi a4, a4, 1
1504 beqi a4, 32, .Lfixunssfsi_bigexp
1505 ssl a4 /* shift by 32 - a4 */
1506 srl a5, a5
1507
1508 /* Negate the result if sign != 0. */
1509 neg a2, a5
1510 movgez a2, a5, a7
1511 leaf_return
1512
1513 .Lfixunssfsi_nan_or_inf:
1514 /* Handle Infinity and NaN. */
1515 slli a4, a2, 9
1516 beqz a4, .Lfixunssfsi_maxint
1517
1518 /* Translate NaN to 0xffffffff. */
1519 movi a2, -1
1520 leaf_return
1521
1522 .Lfixunssfsi_maxint:
1523 slli a4, a6, 8 /* 0x80000000 */
1524 movi a5, -1 /* 0xffffffff */
1525 movgez a4, a5, a2
1526 mov a2, a4
1527 leaf_return
1528
1529 .Lfixunssfsi_zero:
1530 movi a2, 0
1531 leaf_return
1532
1533 .Lfixunssfsi_bigexp:
1534 /* Handle unsigned maximum exponent case. */
1535 bltz a2, 1f
1536 mov a2, a5 /* no shift needed */
1537 leaf_return
1538
1539 /* Return 0x80000000 if negative. */
1540 1: slli a2, a6, 8
1541 leaf_return
1542
1543 #endif /* L_fixunssfsi */
1544
1545 #ifdef L_fixunssfdi
1546
1547 .align 4
1548 .global __fixunssfdi
1549 .type __fixunssfdi, @function
1550 __fixunssfdi:
1551 leaf_entry sp, 16
1552
1553 /* Check for NaN and Infinity. */
1554 movi a6, 0x7f800000
1555 ball a2, a6, .Lfixunssfdi_nan_or_inf
1556
1557 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1558 extui a4, a2, 23, 8
1559 addi a4, a4, -0x7f
1560 bgei a4, 64, .Lfixunssfdi_maxint
1561 bltz a4, .Lfixunssfdi_zero
1562
1563 /* Add explicit "1.0" and shift << 8. */
1564 or a7, a2, a6
1565 slli xh, a7, 8
1566
1567 /* Shift back to the right, based on the exponent. */
1568 addi a4, a4, 1
1569 beqi a4, 64, .Lfixunssfdi_bigexp
1570 ssl a4 /* shift by 64 - a4 */
1571 bgei a4, 32, .Lfixunssfdi_smallshift
1572 srl xl, xh
1573 movi xh, 0
1574
1575 .Lfixunssfdi_shifted:
1576 /* Negate the result if sign != 0. */
1577 bgez a7, 1f
1578 neg xl, xl
1579 neg xh, xh
1580 beqz xl, 1f
1581 addi xh, xh, -1
1582 1: leaf_return
1583
1584 .Lfixunssfdi_smallshift:
1585 movi xl, 0
1586 src xl, xh, xl
1587 srl xh, xh
1588 j .Lfixunssfdi_shifted
1589
1590 .Lfixunssfdi_nan_or_inf:
1591 /* Handle Infinity and NaN. */
1592 slli a4, a2, 9
1593 beqz a4, .Lfixunssfdi_maxint
1594
1595 /* Translate NaN to 0xffffffff.... */
1596 1: movi xh, -1
1597 movi xl, -1
1598 leaf_return
1599
1600 .Lfixunssfdi_maxint:
1601 bgez a2, 1b
1602 2: slli xh, a6, 8 /* 0x80000000 */
1603 movi xl, 0
1604 leaf_return
1605
1606 .Lfixunssfdi_zero:
1607 movi xh, 0
1608 movi xl, 0
1609 leaf_return
1610
1611 .Lfixunssfdi_bigexp:
1612 /* Handle unsigned maximum exponent case. */
1613 bltz a7, 2b
1614 movi xl, 0
1615 leaf_return /* no shift needed */
1616
1617 #endif /* L_fixunssfdi */
1618
1619 #ifdef L_floatsisf
1620
1621 .align 4
1622 .global __floatunsisf
1623 .type __floatunsisf, @function
1624 __floatunsisf:
1625 leaf_entry sp, 16
1626 beqz a2, .Lfloatsisf_return
1627
1628 /* Set the sign to zero and jump to the floatsisf code. */
1629 movi a7, 0
1630 j .Lfloatsisf_normalize
1631
1632 .align 4
1633 .global __floatsisf
1634 .type __floatsisf, @function
1635 __floatsisf:
1636 leaf_entry sp, 16
1637
1638 /* Check for zero. */
1639 beqz a2, .Lfloatsisf_return
1640
1641 /* Save the sign. */
1642 extui a7, a2, 31, 1
1643
1644 /* Get the absolute value. */
1645 #if XCHAL_HAVE_ABS
1646 abs a2, a2
1647 #else
1648 neg a4, a2
1649 movltz a2, a4, a2
1650 #endif
1651
1652 .Lfloatsisf_normalize:
1653 /* Normalize with the first 1 bit in the msb. */
1654 do_nsau a4, a2, a5, a6
1655 ssl a4
1656 sll a5, a2
1657
1658 /* Shift the mantissa into position, with rounding bits in a6. */
1659 srli a2, a5, 8
1660 slli a6, a5, (32 - 8)
1661
1662 /* Set the exponent. */
1663 movi a5, 0x9d /* 0x7e + 31 */
1664 sub a5, a5, a4
1665 slli a5, a5, 23
1666 add a2, a2, a5
1667
1668 /* Add the sign. */
1669 slli a7, a7, 31
1670 or a2, a2, a7
1671
1672 /* Round up if the leftover fraction is >= 1/2. */
1673 bgez a6, .Lfloatsisf_return
1674 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1675
1676 /* Check if the leftover fraction is exactly 1/2. */
1677 slli a6, a6, 1
1678 beqz a6, .Lfloatsisf_exactlyhalf
1679
1680 .Lfloatsisf_return:
1681 leaf_return
1682
1683 .Lfloatsisf_exactlyhalf:
1684 /* Round down to the nearest even value. */
1685 srli a2, a2, 1
1686 slli a2, a2, 1
1687 leaf_return
1688
1689 #endif /* L_floatsisf */
1690
1691 #ifdef L_floatdisf
1692
1693 .align 4
1694 .global __floatundisf
1695 .type __floatundisf, @function
1696 __floatundisf:
1697 leaf_entry sp, 16
1698
1699 /* Check for zero. */
1700 or a4, xh, xl
1701 beqz a4, 2f
1702
1703 /* Set the sign to zero and jump to the floatdisf code. */
1704 movi a7, 0
1705 j .Lfloatdisf_normalize
1706
1707 .align 4
1708 .global __floatdisf
1709 .type __floatdisf, @function
1710 __floatdisf:
1711 leaf_entry sp, 16
1712
1713 /* Check for zero. */
1714 or a4, xh, xl
1715 beqz a4, 2f
1716
1717 /* Save the sign. */
1718 extui a7, xh, 31, 1
1719
1720 /* Get the absolute value. */
1721 bgez xh, .Lfloatdisf_normalize
1722 neg xl, xl
1723 neg xh, xh
1724 beqz xl, .Lfloatdisf_normalize
1725 addi xh, xh, -1
1726
1727 .Lfloatdisf_normalize:
1728 /* Normalize with the first 1 bit in the msb of xh. */
1729 beqz xh, .Lfloatdisf_bigshift
1730 do_nsau a4, xh, a5, a6
1731 ssl a4
1732 src xh, xh, xl
1733 sll xl, xl
1734
1735 .Lfloatdisf_shifted:
1736 /* Shift the mantissa into position, with rounding bits in a6. */
1737 ssai 8
1738 sll a5, xl
1739 src a6, xh, xl
1740 srl xh, xh
1741 beqz a5, 1f
1742 movi a5, 1
1743 or a6, a6, a5
1744 1:
1745 /* Set the exponent. */
1746 movi a5, 0xbd /* 0x7e + 63 */
1747 sub a5, a5, a4
1748 slli a5, a5, 23
1749 add a2, xh, a5
1750
1751 /* Add the sign. */
1752 slli a7, a7, 31
1753 or a2, a2, a7
1754
1755 /* Round up if the leftover fraction is >= 1/2. */
1756 bgez a6, 2f
1757 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1758
1759 /* Check if the leftover fraction is exactly 1/2. */
1760 slli a6, a6, 1
1761 beqz a6, .Lfloatdisf_exactlyhalf
1762 2: leaf_return
1763
1764 .Lfloatdisf_bigshift:
1765 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1766 do_nsau a4, xl, a5, a6
1767 ssl a4
1768 sll xh, xl
1769 movi xl, 0
1770 addi a4, a4, 32
1771 j .Lfloatdisf_shifted
1772
1773 .Lfloatdisf_exactlyhalf:
1774 /* Round down to the nearest even value. */
1775 srli a2, a2, 1
1776 slli a2, a2, 1
1777 leaf_return
1778
1779 #endif /* L_floatdisf */
1780