ieee754-sf.S revision 1.1.1.2.8.1 1 /* IEEE-754 single-precision functions for Xtensa
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #ifdef __XTENSA_EB__
27 #define xh a2
28 #define xl a3
29 #define yh a4
30 #define yl a5
31 #else
32 #define xh a3
33 #define xl a2
34 #define yh a5
35 #define yl a4
36 #endif
37
38 /* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
42
43 #ifdef L_negsf2
44
45 .align 4
46 .global __negsf2
47 .type __negsf2, @function
48 __negsf2:
49 leaf_entry sp, 16
50 movi a4, 0x80000000
51 xor a2, a2, a4
52 leaf_return
53
54 #endif /* L_negsf2 */
55
56 #ifdef L_addsubsf3
57
58 /* Addition */
59 __addsf3_aux:
60
61 /* Handle NaNs and Infinities. (This code is placed before the
62 start of the function just to keep it in range of the limited
63 branch displacements.) */
64
65 .Ladd_xnan_or_inf:
66 /* If y is neither Infinity nor NaN, return x. */
67 bnall a3, a6, 1f
68 /* If x is a NaN, return it. Otherwise, return y. */
69 slli a7, a2, 9
70 beqz a7, .Ladd_ynan_or_inf
71 1: leaf_return
72
73 .Ladd_ynan_or_inf:
74 /* Return y. */
75 mov a2, a3
76 leaf_return
77
78 .Ladd_opposite_signs:
79 /* Operand signs differ. Do a subtraction. */
80 slli a7, a6, 8
81 xor a3, a3, a7
82 j .Lsub_same_sign
83
84 .align 4
85 .global __addsf3
86 .type __addsf3, @function
87 __addsf3:
88 leaf_entry sp, 16
89 movi a6, 0x7f800000
90
91 /* Check if the two operands have the same sign. */
92 xor a7, a2, a3
93 bltz a7, .Ladd_opposite_signs
94
95 .Ladd_same_sign:
96 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
97 ball a2, a6, .Ladd_xnan_or_inf
98 ball a3, a6, .Ladd_ynan_or_inf
99
100 /* Compare the exponents. The smaller operand will be shifted
101 right by the exponent difference and added to the larger
102 one. */
103 extui a7, a2, 23, 9
104 extui a8, a3, 23, 9
105 bltu a7, a8, .Ladd_shiftx
106
107 .Ladd_shifty:
108 /* Check if the smaller (or equal) exponent is zero. */
109 bnone a3, a6, .Ladd_yexpzero
110
111 /* Replace y sign/exponent with 0x008. */
112 or a3, a3, a6
113 slli a3, a3, 8
114 srli a3, a3, 8
115
116 .Ladd_yexpdiff:
117 /* Compute the exponent difference. */
118 sub a10, a7, a8
119
120 /* Exponent difference > 32 -- just return the bigger value. */
121 bgeui a10, 32, 1f
122
123 /* Shift y right by the exponent difference. Any bits that are
124 shifted out of y are saved in a9 for rounding the result. */
125 ssr a10
126 movi a9, 0
127 src a9, a3, a9
128 srl a3, a3
129
130 /* Do the addition. */
131 add a2, a2, a3
132
133 /* Check if the add overflowed into the exponent. */
134 extui a10, a2, 23, 9
135 beq a10, a7, .Ladd_round
136 mov a8, a7
137 j .Ladd_carry
138
139 .Ladd_yexpzero:
140 /* y is a subnormal value. Replace its sign/exponent with zero,
141 i.e., no implicit "1.0", and increment the apparent exponent
142 because subnormals behave as if they had the minimum (nonzero)
143 exponent. Test for the case when both exponents are zero. */
144 slli a3, a3, 9
145 srli a3, a3, 9
146 bnone a2, a6, .Ladd_bothexpzero
147 addi a8, a8, 1
148 j .Ladd_yexpdiff
149
150 .Ladd_bothexpzero:
151 /* Both exponents are zero. Handle this as a special case. There
152 is no need to shift or round, and the normal code for handling
153 a carry into the exponent field will not work because it
154 assumes there is an implicit "1.0" that needs to be added. */
155 add a2, a2, a3
156 1: leaf_return
157
158 .Ladd_xexpzero:
159 /* Same as "yexpzero" except skip handling the case when both
160 exponents are zero. */
161 slli a2, a2, 9
162 srli a2, a2, 9
163 addi a7, a7, 1
164 j .Ladd_xexpdiff
165
166 .Ladd_shiftx:
167 /* Same thing as the "shifty" code, but with x and y swapped. Also,
168 because the exponent difference is always nonzero in this version,
169 the shift sequence can use SLL and skip loading a constant zero. */
170 bnone a2, a6, .Ladd_xexpzero
171
172 or a2, a2, a6
173 slli a2, a2, 8
174 srli a2, a2, 8
175
176 .Ladd_xexpdiff:
177 sub a10, a8, a7
178 bgeui a10, 32, .Ladd_returny
179
180 ssr a10
181 sll a9, a2
182 srl a2, a2
183
184 add a2, a2, a3
185
186 /* Check if the add overflowed into the exponent. */
187 extui a10, a2, 23, 9
188 bne a10, a8, .Ladd_carry
189
190 .Ladd_round:
191 /* Round up if the leftover fraction is >= 1/2. */
192 bgez a9, 1f
193 addi a2, a2, 1
194
195 /* Check if the leftover fraction is exactly 1/2. */
196 slli a9, a9, 1
197 beqz a9, .Ladd_exactlyhalf
198 1: leaf_return
199
200 .Ladd_returny:
201 mov a2, a3
202 leaf_return
203
204 .Ladd_carry:
205 /* The addition has overflowed into the exponent field, so the
206 value needs to be renormalized. The mantissa of the result
207 can be recovered by subtracting the original exponent and
208 adding 0x800000 (which is the explicit "1.0" for the
209 mantissa of the non-shifted operand -- the "1.0" for the
210 shifted operand was already added). The mantissa can then
211 be shifted right by one bit. The explicit "1.0" of the
212 shifted mantissa then needs to be replaced by the exponent,
213 incremented by one to account for the normalizing shift.
214 It is faster to combine these operations: do the shift first
215 and combine the additions and subtractions. If x is the
216 original exponent, the result is:
217 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
218 or:
219 shifted mantissa + ((x + 1) << 22)
220 Note that the exponent is incremented here by leaving the
221 explicit "1.0" of the mantissa in the exponent field. */
222
223 /* Shift x right by one bit. Save the lsb. */
224 mov a10, a2
225 srli a2, a2, 1
226
227 /* See explanation above. The original exponent is in a8. */
228 addi a8, a8, 1
229 slli a8, a8, 22
230 add a2, a2, a8
231
232 /* Return an Infinity if the exponent overflowed. */
233 ball a2, a6, .Ladd_infinity
234
235 /* Same thing as the "round" code except the msb of the leftover
236 fraction is bit 0 of a10, with the rest of the fraction in a9. */
237 bbci.l a10, 0, 1f
238 addi a2, a2, 1
239 beqz a9, .Ladd_exactlyhalf
240 1: leaf_return
241
242 .Ladd_infinity:
243 /* Clear the mantissa. */
244 srli a2, a2, 23
245 slli a2, a2, 23
246
247 /* The sign bit may have been lost in a carry-out. Put it back. */
248 slli a8, a8, 1
249 or a2, a2, a8
250 leaf_return
251
252 .Ladd_exactlyhalf:
253 /* Round down to the nearest even value. */
254 srli a2, a2, 1
255 slli a2, a2, 1
256 leaf_return
257
258
259 /* Subtraction */
260 __subsf3_aux:
261
262 /* Handle NaNs and Infinities. (This code is placed before the
263 start of the function just to keep it in range of the limited
264 branch displacements.) */
265
266 .Lsub_xnan_or_inf:
267 /* If y is neither Infinity nor NaN, return x. */
268 bnall a3, a6, 1f
269 /* Both x and y are either NaN or Inf, so the result is NaN. */
270 movi a4, 0x400000 /* make it a quiet NaN */
271 or a2, a2, a4
272 1: leaf_return
273
274 .Lsub_ynan_or_inf:
275 /* Negate y and return it. */
276 slli a7, a6, 8
277 xor a2, a3, a7
278 leaf_return
279
280 .Lsub_opposite_signs:
281 /* Operand signs differ. Do an addition. */
282 slli a7, a6, 8
283 xor a3, a3, a7
284 j .Ladd_same_sign
285
286 .align 4
287 .global __subsf3
288 .type __subsf3, @function
289 __subsf3:
290 leaf_entry sp, 16
291 movi a6, 0x7f800000
292
293 /* Check if the two operands have the same sign. */
294 xor a7, a2, a3
295 bltz a7, .Lsub_opposite_signs
296
297 .Lsub_same_sign:
298 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
299 ball a2, a6, .Lsub_xnan_or_inf
300 ball a3, a6, .Lsub_ynan_or_inf
301
302 /* Compare the operands. In contrast to addition, the entire
303 value matters here. */
304 extui a7, a2, 23, 8
305 extui a8, a3, 23, 8
306 bltu a2, a3, .Lsub_xsmaller
307
308 .Lsub_ysmaller:
309 /* Check if the smaller (or equal) exponent is zero. */
310 bnone a3, a6, .Lsub_yexpzero
311
312 /* Replace y sign/exponent with 0x008. */
313 or a3, a3, a6
314 slli a3, a3, 8
315 srli a3, a3, 8
316
317 .Lsub_yexpdiff:
318 /* Compute the exponent difference. */
319 sub a10, a7, a8
320
321 /* Exponent difference > 32 -- just return the bigger value. */
322 bgeui a10, 32, 1f
323
324 /* Shift y right by the exponent difference. Any bits that are
325 shifted out of y are saved in a9 for rounding the result. */
326 ssr a10
327 movi a9, 0
328 src a9, a3, a9
329 srl a3, a3
330
331 sub a2, a2, a3
332
333 /* Subtract the leftover bits in a9 from zero and propagate any
334 borrow from a2. */
335 neg a9, a9
336 addi a10, a2, -1
337 movnez a2, a10, a9
338
339 /* Check if the subtract underflowed into the exponent. */
340 extui a10, a2, 23, 8
341 beq a10, a7, .Lsub_round
342 j .Lsub_borrow
343
344 .Lsub_yexpzero:
345 /* Return zero if the inputs are equal. (For the non-subnormal
346 case, subtracting the "1.0" will cause a borrow from the exponent
347 and this case can be detected when handling the borrow.) */
348 beq a2, a3, .Lsub_return_zero
349
350 /* y is a subnormal value. Replace its sign/exponent with zero,
351 i.e., no implicit "1.0". Unless x is also a subnormal, increment
352 y's apparent exponent because subnormals behave as if they had
353 the minimum (nonzero) exponent. */
354 slli a3, a3, 9
355 srli a3, a3, 9
356 bnone a2, a6, .Lsub_yexpdiff
357 addi a8, a8, 1
358 j .Lsub_yexpdiff
359
360 .Lsub_returny:
361 /* Negate and return y. */
362 slli a7, a6, 8
363 xor a2, a3, a7
364 1: leaf_return
365
366 .Lsub_xsmaller:
367 /* Same thing as the "ysmaller" code, but with x and y swapped and
368 with y negated. */
369 bnone a2, a6, .Lsub_xexpzero
370
371 or a2, a2, a6
372 slli a2, a2, 8
373 srli a2, a2, 8
374
375 .Lsub_xexpdiff:
376 sub a10, a8, a7
377 bgeui a10, 32, .Lsub_returny
378
379 ssr a10
380 movi a9, 0
381 src a9, a2, a9
382 srl a2, a2
383
384 /* Negate y. */
385 slli a11, a6, 8
386 xor a3, a3, a11
387
388 sub a2, a3, a2
389
390 neg a9, a9
391 addi a10, a2, -1
392 movnez a2, a10, a9
393
394 /* Check if the subtract underflowed into the exponent. */
395 extui a10, a2, 23, 8
396 bne a10, a8, .Lsub_borrow
397
398 .Lsub_round:
399 /* Round up if the leftover fraction is >= 1/2. */
400 bgez a9, 1f
401 addi a2, a2, 1
402
403 /* Check if the leftover fraction is exactly 1/2. */
404 slli a9, a9, 1
405 beqz a9, .Lsub_exactlyhalf
406 1: leaf_return
407
408 .Lsub_xexpzero:
409 /* Same as "yexpzero". */
410 beq a2, a3, .Lsub_return_zero
411 slli a2, a2, 9
412 srli a2, a2, 9
413 bnone a3, a6, .Lsub_xexpdiff
414 addi a7, a7, 1
415 j .Lsub_xexpdiff
416
417 .Lsub_return_zero:
418 movi a2, 0
419 leaf_return
420
421 .Lsub_borrow:
422 /* The subtraction has underflowed into the exponent field, so the
423 value needs to be renormalized. Shift the mantissa left as
424 needed to remove any leading zeros and adjust the exponent
425 accordingly. If the exponent is not large enough to remove
426 all the leading zeros, the result will be a subnormal value. */
427
428 slli a8, a2, 9
429 beqz a8, .Lsub_xzero
430 do_nsau a6, a8, a7, a11
431 srli a8, a8, 9
432 bge a6, a10, .Lsub_subnormal
433 addi a6, a6, 1
434
435 .Lsub_normalize_shift:
436 /* Shift the mantissa (a8/a9) left by a6. */
437 ssl a6
438 src a8, a8, a9
439 sll a9, a9
440
441 /* Combine the shifted mantissa with the sign and exponent,
442 decrementing the exponent by a6. (The exponent has already
443 been decremented by one due to the borrow from the subtraction,
444 but adding the mantissa will increment the exponent by one.) */
445 srli a2, a2, 23
446 sub a2, a2, a6
447 slli a2, a2, 23
448 add a2, a2, a8
449 j .Lsub_round
450
451 .Lsub_exactlyhalf:
452 /* Round down to the nearest even value. */
453 srli a2, a2, 1
454 slli a2, a2, 1
455 leaf_return
456
457 .Lsub_xzero:
458 /* If there was a borrow from the exponent, and the mantissa and
459 guard digits are all zero, then the inputs were equal and the
460 result should be zero. */
461 beqz a9, .Lsub_return_zero
462
463 /* Only the guard digit is nonzero. Shift by min(24, a10). */
464 addi a11, a10, -24
465 movi a6, 24
466 movltz a6, a10, a11
467 j .Lsub_normalize_shift
468
469 .Lsub_subnormal:
470 /* The exponent is too small to shift away all the leading zeros.
471 Set a6 to the current exponent (which has already been
472 decremented by the borrow) so that the exponent of the result
473 will be zero. Do not add 1 to a6 in this case, because: (1)
474 adding the mantissa will not increment the exponent, so there is
475 no need to subtract anything extra from the exponent to
476 compensate, and (2) the effective exponent of a subnormal is 1
477 not 0 so the shift amount must be 1 smaller than normal. */
478 mov a6, a10
479 j .Lsub_normalize_shift
480
481 #endif /* L_addsubsf3 */
482
483 #ifdef L_mulsf3
484
485 /* Multiplication */
486 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
487 #define XCHAL_NO_MUL 1
488 #endif
489
490 .literal_position
491 __mulsf3_aux:
492
493 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
494 (This code is placed before the start of the function just to
495 keep it in range of the limited branch displacements.) */
496
497 .Lmul_xexpzero:
498 /* Clear the sign bit of x. */
499 slli a2, a2, 1
500 srli a2, a2, 1
501
502 /* If x is zero, return zero. */
503 beqz a2, .Lmul_return_zero
504
505 /* Normalize x. Adjust the exponent in a8. */
506 do_nsau a10, a2, a11, a12
507 addi a10, a10, -8
508 ssl a10
509 sll a2, a2
510 movi a8, 1
511 sub a8, a8, a10
512 j .Lmul_xnormalized
513
514 .Lmul_yexpzero:
515 /* Clear the sign bit of y. */
516 slli a3, a3, 1
517 srli a3, a3, 1
518
519 /* If y is zero, return zero. */
520 beqz a3, .Lmul_return_zero
521
522 /* Normalize y. Adjust the exponent in a9. */
523 do_nsau a10, a3, a11, a12
524 addi a10, a10, -8
525 ssl a10
526 sll a3, a3
527 movi a9, 1
528 sub a9, a9, a10
529 j .Lmul_ynormalized
530
531 .Lmul_return_zero:
532 /* Return zero with the appropriate sign bit. */
533 srli a2, a7, 31
534 slli a2, a2, 31
535 j .Lmul_done
536
537 .Lmul_xnan_or_inf:
538 /* If y is zero, return NaN. */
539 slli a8, a3, 1
540 bnez a8, 1f
541 movi a4, 0x400000 /* make it a quiet NaN */
542 or a2, a2, a4
543 j .Lmul_done
544 1:
545 /* If y is NaN, return y. */
546 bnall a3, a6, .Lmul_returnx
547 slli a8, a3, 9
548 beqz a8, .Lmul_returnx
549
550 .Lmul_returny:
551 mov a2, a3
552
553 .Lmul_returnx:
554 /* Set the sign bit and return. */
555 extui a7, a7, 31, 1
556 slli a2, a2, 1
557 ssai 1
558 src a2, a7, a2
559 j .Lmul_done
560
561 .Lmul_ynan_or_inf:
562 /* If x is zero, return NaN. */
563 slli a8, a2, 1
564 bnez a8, .Lmul_returny
565 movi a7, 0x400000 /* make it a quiet NaN */
566 or a2, a3, a7
567 j .Lmul_done
568
569 .align 4
570 .global __mulsf3
571 .type __mulsf3, @function
572 __mulsf3:
573 #if __XTENSA_CALL0_ABI__
574 leaf_entry sp, 32
575 addi sp, sp, -32
576 s32i a12, sp, 16
577 s32i a13, sp, 20
578 s32i a14, sp, 24
579 s32i a15, sp, 28
580 #elif XCHAL_NO_MUL
581 /* This is not really a leaf function; allocate enough stack space
582 to allow CALL12s to a helper function. */
583 leaf_entry sp, 64
584 #else
585 leaf_entry sp, 32
586 #endif
587 movi a6, 0x7f800000
588
589 /* Get the sign of the result. */
590 xor a7, a2, a3
591
592 /* Check for NaN and infinity. */
593 ball a2, a6, .Lmul_xnan_or_inf
594 ball a3, a6, .Lmul_ynan_or_inf
595
596 /* Extract the exponents. */
597 extui a8, a2, 23, 8
598 extui a9, a3, 23, 8
599
600 beqz a8, .Lmul_xexpzero
601 .Lmul_xnormalized:
602 beqz a9, .Lmul_yexpzero
603 .Lmul_ynormalized:
604
605 /* Add the exponents. */
606 add a8, a8, a9
607
608 /* Replace sign/exponent fields with explicit "1.0". */
609 movi a10, 0xffffff
610 or a2, a2, a6
611 and a2, a2, a10
612 or a3, a3, a6
613 and a3, a3, a10
614
615 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
616
617 #if XCHAL_HAVE_MUL32_HIGH
618
619 mull a6, a2, a3
620 muluh a2, a2, a3
621
622 #else
623
624 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
625 products. These partial products are:
626
627 0 xl * yl
628
629 1 xl * yh
630 2 xh * yl
631
632 3 xh * yh
633
634 If using the Mul16 or Mul32 multiplier options, these input
635 chunks must be stored in separate registers. For Mac16, the
636 UMUL.AA.* opcodes can specify that the inputs come from either
637 half of the registers, so there is no need to shift them out
638 ahead of time. If there is no multiply hardware, the 16-bit
639 chunks can be extracted when setting up the arguments to the
640 separate multiply function. */
641
642 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
643 /* Calling a separate multiply function will clobber a0 and requires
644 use of a8 as a temporary, so save those values now. (The function
645 uses a custom ABI so nothing else needs to be saved.) */
646 s32i a0, sp, 0
647 s32i a8, sp, 4
648 #endif
649
650 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
651
652 #define a2h a4
653 #define a3h a5
654
655 /* Get the high halves of the inputs into registers. */
656 srli a2h, a2, 16
657 srli a3h, a3, 16
658
659 #define a2l a2
660 #define a3l a3
661
662 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
663 /* Clear the high halves of the inputs. This does not matter
664 for MUL16 because the high bits are ignored. */
665 extui a2, a2, 0, 16
666 extui a3, a3, 0, 16
667 #endif
668 #endif /* MUL16 || MUL32 */
669
670
671 #if XCHAL_HAVE_MUL16
672
673 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
674 mul16u dst, xreg ## xhalf, yreg ## yhalf
675
676 #elif XCHAL_HAVE_MUL32
677
678 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
679 mull dst, xreg ## xhalf, yreg ## yhalf
680
681 #elif XCHAL_HAVE_MAC16
682
683 /* The preprocessor insists on inserting a space when concatenating after
684 a period in the definition of do_mul below. These macros are a workaround
685 using underscores instead of periods when doing the concatenation. */
686 #define umul_aa_ll umul.aa.ll
687 #define umul_aa_lh umul.aa.lh
688 #define umul_aa_hl umul.aa.hl
689 #define umul_aa_hh umul.aa.hh
690
691 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
692 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
693 rsr dst, ACCLO
694
695 #else /* no multiply hardware */
696
697 #define set_arg_l(dst, src) \
698 extui dst, src, 0, 16
699 #define set_arg_h(dst, src) \
700 srli dst, src, 16
701
702 #if __XTENSA_CALL0_ABI__
703 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
704 set_arg_ ## xhalf (a13, xreg); \
705 set_arg_ ## yhalf (a14, yreg); \
706 call0 .Lmul_mulsi3; \
707 mov dst, a12
708 #else
709 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
710 set_arg_ ## xhalf (a14, xreg); \
711 set_arg_ ## yhalf (a15, yreg); \
712 call12 .Lmul_mulsi3; \
713 mov dst, a14
714 #endif /* __XTENSA_CALL0_ABI__ */
715
716 #endif /* no multiply hardware */
717
718 /* Add pp1 and pp2 into a6 with carry-out in a9. */
719 do_mul(a6, a2, l, a3, h) /* pp 1 */
720 do_mul(a11, a2, h, a3, l) /* pp 2 */
721 movi a9, 0
722 add a6, a6, a11
723 bgeu a6, a11, 1f
724 addi a9, a9, 1
725 1:
726 /* Shift the high half of a9/a6 into position in a9. Note that
727 this value can be safely incremented without any carry-outs. */
728 ssai 16
729 src a9, a9, a6
730
731 /* Compute the low word into a6. */
732 do_mul(a11, a2, l, a3, l) /* pp 0 */
733 sll a6, a6
734 add a6, a6, a11
735 bgeu a6, a11, 1f
736 addi a9, a9, 1
737 1:
738 /* Compute the high word into a2. */
739 do_mul(a2, a2, h, a3, h) /* pp 3 */
740 add a2, a2, a9
741
742 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
743 /* Restore values saved on the stack during the multiplication. */
744 l32i a0, sp, 0
745 l32i a8, sp, 4
746 #endif
747 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
748
749 /* Shift left by 9 bits, unless there was a carry-out from the
750 multiply, in which case, shift by 8 bits and increment the
751 exponent. */
752 movi a4, 9
753 srli a5, a2, 24 - 9
754 beqz a5, 1f
755 addi a4, a4, -1
756 addi a8, a8, 1
757 1: ssl a4
758 src a2, a2, a6
759 sll a6, a6
760
761 /* Subtract the extra bias from the exponent sum (plus one to account
762 for the explicit "1.0" of the mantissa that will be added to the
763 exponent in the final result). */
764 movi a4, 0x80
765 sub a8, a8, a4
766
767 /* Check for over/underflow. The value in a8 is one less than the
768 final exponent, so values in the range 0..fd are OK here. */
769 movi a4, 0xfe
770 bgeu a8, a4, .Lmul_overflow
771
772 .Lmul_round:
773 /* Round. */
774 bgez a6, .Lmul_rounded
775 addi a2, a2, 1
776 slli a6, a6, 1
777 beqz a6, .Lmul_exactlyhalf
778
779 .Lmul_rounded:
780 /* Add the exponent to the mantissa. */
781 slli a8, a8, 23
782 add a2, a2, a8
783
784 .Lmul_addsign:
785 /* Add the sign bit. */
786 srli a7, a7, 31
787 slli a7, a7, 31
788 or a2, a2, a7
789
790 .Lmul_done:
791 #if __XTENSA_CALL0_ABI__
792 l32i a12, sp, 16
793 l32i a13, sp, 20
794 l32i a14, sp, 24
795 l32i a15, sp, 28
796 addi sp, sp, 32
797 #endif
798 leaf_return
799
800 .Lmul_exactlyhalf:
801 /* Round down to the nearest even value. */
802 srli a2, a2, 1
803 slli a2, a2, 1
804 j .Lmul_rounded
805
806 .Lmul_overflow:
807 bltz a8, .Lmul_underflow
808 /* Return +/- Infinity. */
809 movi a8, 0xff
810 slli a2, a8, 23
811 j .Lmul_addsign
812
813 .Lmul_underflow:
814 /* Create a subnormal value, where the exponent field contains zero,
815 but the effective exponent is 1. The value of a8 is one less than
816 the actual exponent, so just negate it to get the shift amount. */
817 neg a8, a8
818 mov a9, a6
819 ssr a8
820 bgeui a8, 32, .Lmul_flush_to_zero
821
822 /* Shift a2 right. Any bits that are shifted out of a2 are saved
823 in a6 (combined with the shifted-out bits currently in a6) for
824 rounding the result. */
825 sll a6, a2
826 srl a2, a2
827
828 /* Set the exponent to zero. */
829 movi a8, 0
830
831 /* Pack any nonzero bits shifted out into a6. */
832 beqz a9, .Lmul_round
833 movi a9, 1
834 or a6, a6, a9
835 j .Lmul_round
836
837 .Lmul_flush_to_zero:
838 /* Return zero with the appropriate sign bit. */
839 srli a2, a7, 31
840 slli a2, a2, 31
841 j .Lmul_done
842
843 #if XCHAL_NO_MUL
844
845 /* For Xtensa processors with no multiply hardware, this simplified
846 version of _mulsi3 is used for multiplying 16-bit chunks of
847 the floating-point mantissas. When using CALL0, this function
848 uses a custom ABI: the inputs are passed in a13 and a14, the
849 result is returned in a12, and a8 and a15 are clobbered. */
850 .align 4
851 .Lmul_mulsi3:
852 leaf_entry sp, 16
853 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
854 movi \dst, 0
855 1: add \tmp1, \src2, \dst
856 extui \tmp2, \src1, 0, 1
857 movnez \dst, \tmp1, \tmp2
858
859 do_addx2 \tmp1, \src2, \dst, \tmp1
860 extui \tmp2, \src1, 1, 1
861 movnez \dst, \tmp1, \tmp2
862
863 do_addx4 \tmp1, \src2, \dst, \tmp1
864 extui \tmp2, \src1, 2, 1
865 movnez \dst, \tmp1, \tmp2
866
867 do_addx8 \tmp1, \src2, \dst, \tmp1
868 extui \tmp2, \src1, 3, 1
869 movnez \dst, \tmp1, \tmp2
870
871 srli \src1, \src1, 4
872 slli \src2, \src2, 4
873 bnez \src1, 1b
874 .endm
875 #if __XTENSA_CALL0_ABI__
876 mul_mulsi3_body a12, a13, a14, a15, a8
877 #else
878 /* The result will be written into a2, so save that argument in a4. */
879 mov a4, a2
880 mul_mulsi3_body a2, a4, a3, a5, a6
881 #endif
882 leaf_return
883 #endif /* XCHAL_NO_MUL */
884 #endif /* L_mulsf3 */
885
886 #ifdef L_divsf3
887
888 .literal_position
889 /* Division */
890 __divsf3_aux:
891
892 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
893 (This code is placed before the start of the function just to
894 keep it in range of the limited branch displacements.) */
895
896 .Ldiv_yexpzero:
897 /* Clear the sign bit of y. */
898 slli a3, a3, 1
899 srli a3, a3, 1
900
901 /* Check for division by zero. */
902 beqz a3, .Ldiv_yzero
903
904 /* Normalize y. Adjust the exponent in a9. */
905 do_nsau a10, a3, a4, a5
906 addi a10, a10, -8
907 ssl a10
908 sll a3, a3
909 movi a9, 1
910 sub a9, a9, a10
911 j .Ldiv_ynormalized
912
913 .Ldiv_yzero:
914 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
915 slli a4, a2, 1
916 srli a4, a4, 1
917 srli a2, a7, 31
918 slli a2, a2, 31
919 or a2, a2, a6
920 bnez a4, 1f
921 movi a4, 0x400000 /* make it a quiet NaN */
922 or a2, a2, a4
923 1: leaf_return
924
925 .Ldiv_xexpzero:
926 /* Clear the sign bit of x. */
927 slli a2, a2, 1
928 srli a2, a2, 1
929
930 /* If x is zero, return zero. */
931 beqz a2, .Ldiv_return_zero
932
933 /* Normalize x. Adjust the exponent in a8. */
934 do_nsau a10, a2, a4, a5
935 addi a10, a10, -8
936 ssl a10
937 sll a2, a2
938 movi a8, 1
939 sub a8, a8, a10
940 j .Ldiv_xnormalized
941
942 .Ldiv_return_zero:
943 /* Return zero with the appropriate sign bit. */
944 srli a2, a7, 31
945 slli a2, a2, 31
946 leaf_return
947
948 .Ldiv_xnan_or_inf:
949 /* Set the sign bit of the result. */
950 srli a7, a3, 31
951 slli a7, a7, 31
952 xor a2, a2, a7
953 /* If y is NaN or Inf, return NaN. */
954 bnall a3, a6, 1f
955 movi a4, 0x400000 /* make it a quiet NaN */
956 or a2, a2, a4
957 1: leaf_return
958
959 .Ldiv_ynan_or_inf:
960 /* If y is Infinity, return zero. */
961 slli a8, a3, 9
962 beqz a8, .Ldiv_return_zero
963 /* y is NaN; return it. */
964 mov a2, a3
965 leaf_return
966
967 .align 4
968 .global __divsf3
969 .type __divsf3, @function
970 __divsf3:
971 leaf_entry sp, 16
972 movi a6, 0x7f800000
973
974 /* Get the sign of the result. */
975 xor a7, a2, a3
976
977 /* Check for NaN and infinity. */
978 ball a2, a6, .Ldiv_xnan_or_inf
979 ball a3, a6, .Ldiv_ynan_or_inf
980
981 /* Extract the exponents. */
982 extui a8, a2, 23, 8
983 extui a9, a3, 23, 8
984
985 beqz a9, .Ldiv_yexpzero
986 .Ldiv_ynormalized:
987 beqz a8, .Ldiv_xexpzero
988 .Ldiv_xnormalized:
989
990 /* Subtract the exponents. */
991 sub a8, a8, a9
992
993 /* Replace sign/exponent fields with explicit "1.0". */
994 movi a10, 0xffffff
995 or a2, a2, a6
996 and a2, a2, a10
997 or a3, a3, a6
998 and a3, a3, a10
999
1000 /* The first digit of the mantissa division must be a one.
1001 Shift x (and adjust the exponent) as needed to make this true. */
1002 bltu a3, a2, 1f
1003 slli a2, a2, 1
1004 addi a8, a8, -1
1005 1:
1006 /* Do the first subtraction and shift. */
1007 sub a2, a2, a3
1008 slli a2, a2, 1
1009
1010 /* Put the quotient into a10. */
1011 movi a10, 1
1012
1013 /* Divide one bit at a time for 23 bits. */
1014 movi a9, 23
1015 #if XCHAL_HAVE_LOOPS
1016 loop a9, .Ldiv_loopend
1017 #endif
1018 .Ldiv_loop:
1019 /* Shift the quotient << 1. */
1020 slli a10, a10, 1
1021
1022 /* Is this digit a 0 or 1? */
1023 bltu a2, a3, 1f
1024
1025 /* Output a 1 and subtract. */
1026 addi a10, a10, 1
1027 sub a2, a2, a3
1028
1029 /* Shift the dividend << 1. */
1030 1: slli a2, a2, 1
1031
1032 #if !XCHAL_HAVE_LOOPS
1033 addi a9, a9, -1
1034 bnez a9, .Ldiv_loop
1035 #endif
1036 .Ldiv_loopend:
1037
1038 /* Add the exponent bias (less one to account for the explicit "1.0"
1039 of the mantissa that will be added to the exponent in the final
1040 result). */
1041 addi a8, a8, 0x7e
1042
1043 /* Check for over/underflow. The value in a8 is one less than the
1044 final exponent, so values in the range 0..fd are OK here. */
1045 movi a4, 0xfe
1046 bgeu a8, a4, .Ldiv_overflow
1047
1048 .Ldiv_round:
1049 /* Round. The remainder (<< 1) is in a2. */
1050 bltu a2, a3, .Ldiv_rounded
1051 addi a10, a10, 1
1052 beq a2, a3, .Ldiv_exactlyhalf
1053
1054 .Ldiv_rounded:
1055 /* Add the exponent to the mantissa. */
1056 slli a8, a8, 23
1057 add a2, a10, a8
1058
1059 .Ldiv_addsign:
1060 /* Add the sign bit. */
1061 srli a7, a7, 31
1062 slli a7, a7, 31
1063 or a2, a2, a7
1064 leaf_return
1065
1066 .Ldiv_overflow:
1067 bltz a8, .Ldiv_underflow
1068 /* Return +/- Infinity. */
1069 addi a8, a4, 1 /* 0xff */
1070 slli a2, a8, 23
1071 j .Ldiv_addsign
1072
1073 .Ldiv_exactlyhalf:
1074 /* Remainder is exactly half the divisor. Round even. */
1075 srli a10, a10, 1
1076 slli a10, a10, 1
1077 j .Ldiv_rounded
1078
1079 .Ldiv_underflow:
1080 /* Create a subnormal value, where the exponent field contains zero,
1081 but the effective exponent is 1. The value of a8 is one less than
1082 the actual exponent, so just negate it to get the shift amount. */
1083 neg a8, a8
1084 ssr a8
1085 bgeui a8, 32, .Ldiv_flush_to_zero
1086
1087 /* Shift a10 right. Any bits that are shifted out of a10 are
1088 saved in a6 for rounding the result. */
1089 sll a6, a10
1090 srl a10, a10
1091
1092 /* Set the exponent to zero. */
1093 movi a8, 0
1094
1095 /* Pack any nonzero remainder (in a2) into a6. */
1096 beqz a2, 1f
1097 movi a9, 1
1098 or a6, a6, a9
1099
1100 /* Round a10 based on the bits shifted out into a6. */
1101 1: bgez a6, .Ldiv_rounded
1102 addi a10, a10, 1
1103 slli a6, a6, 1
1104 bnez a6, .Ldiv_rounded
1105 srli a10, a10, 1
1106 slli a10, a10, 1
1107 j .Ldiv_rounded
1108
1109 .Ldiv_flush_to_zero:
1110 /* Return zero with the appropriate sign bit. */
1111 srli a2, a7, 31
1112 slli a2, a2, 31
1113 leaf_return
1114
1115 #endif /* L_divsf3 */
1116
1117 #ifdef L_cmpsf2
1118
1119 /* Equal and Not Equal */
1120
1121 .align 4
1122 .global __eqsf2
1123 .global __nesf2
1124 .set __nesf2, __eqsf2
1125 .type __eqsf2, @function
1126 __eqsf2:
1127 leaf_entry sp, 16
1128 bne a2, a3, 4f
1129
1130 /* The values are equal but NaN != NaN. Check the exponent. */
1131 movi a6, 0x7f800000
1132 ball a2, a6, 3f
1133
1134 /* Equal. */
1135 movi a2, 0
1136 leaf_return
1137
1138 /* Not equal. */
1139 2: movi a2, 1
1140 leaf_return
1141
1142 /* Check if the mantissas are nonzero. */
1143 3: slli a7, a2, 9
1144 j 5f
1145
1146 /* Check if x and y are zero with different signs. */
1147 4: or a7, a2, a3
1148 slli a7, a7, 1
1149
1150 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1151 or x when exponent(x) = 0x7f8 and x == y. */
1152 5: movi a2, 0
1153 movi a3, 1
1154 movnez a2, a3, a7
1155 leaf_return
1156
1157
1158 /* Greater Than */
1159
1160 .align 4
1161 .global __gtsf2
1162 .type __gtsf2, @function
1163 __gtsf2:
1164 leaf_entry sp, 16
1165 movi a6, 0x7f800000
1166 ball a2, a6, 2f
1167 1: bnall a3, a6, .Lle_cmp
1168
1169 /* Check if y is a NaN. */
1170 slli a7, a3, 9
1171 beqz a7, .Lle_cmp
1172 movi a2, 0
1173 leaf_return
1174
1175 /* Check if x is a NaN. */
1176 2: slli a7, a2, 9
1177 beqz a7, 1b
1178 movi a2, 0
1179 leaf_return
1180
1181
1182 /* Less Than or Equal */
1183
1184 .align 4
1185 .global __lesf2
1186 .type __lesf2, @function
1187 __lesf2:
1188 leaf_entry sp, 16
1189 movi a6, 0x7f800000
1190 ball a2, a6, 2f
1191 1: bnall a3, a6, .Lle_cmp
1192
1193 /* Check if y is a NaN. */
1194 slli a7, a3, 9
1195 beqz a7, .Lle_cmp
1196 movi a2, 1
1197 leaf_return
1198
1199 /* Check if x is a NaN. */
1200 2: slli a7, a2, 9
1201 beqz a7, 1b
1202 movi a2, 1
1203 leaf_return
1204
1205 .Lle_cmp:
1206 /* Check if x and y have different signs. */
1207 xor a7, a2, a3
1208 bltz a7, .Lle_diff_signs
1209
1210 /* Check if x is negative. */
1211 bltz a2, .Lle_xneg
1212
1213 /* Check if x <= y. */
1214 bltu a3, a2, 5f
1215 4: movi a2, 0
1216 leaf_return
1217
1218 .Lle_xneg:
1219 /* Check if y <= x. */
1220 bgeu a2, a3, 4b
1221 5: movi a2, 1
1222 leaf_return
1223
1224 .Lle_diff_signs:
1225 bltz a2, 4b
1226
1227 /* Check if both x and y are zero. */
1228 or a7, a2, a3
1229 slli a7, a7, 1
1230 movi a2, 1
1231 movi a3, 0
1232 moveqz a2, a3, a7
1233 leaf_return
1234
1235
1236 /* Greater Than or Equal */
1237
1238 .align 4
1239 .global __gesf2
1240 .type __gesf2, @function
1241 __gesf2:
1242 leaf_entry sp, 16
1243 movi a6, 0x7f800000
1244 ball a2, a6, 2f
1245 1: bnall a3, a6, .Llt_cmp
1246
1247 /* Check if y is a NaN. */
1248 slli a7, a3, 9
1249 beqz a7, .Llt_cmp
1250 movi a2, -1
1251 leaf_return
1252
1253 /* Check if x is a NaN. */
1254 2: slli a7, a2, 9
1255 beqz a7, 1b
1256 movi a2, -1
1257 leaf_return
1258
1259
1260 /* Less Than */
1261
1262 .align 4
1263 .global __ltsf2
1264 .type __ltsf2, @function
1265 __ltsf2:
1266 leaf_entry sp, 16
1267 movi a6, 0x7f800000
1268 ball a2, a6, 2f
1269 1: bnall a3, a6, .Llt_cmp
1270
1271 /* Check if y is a NaN. */
1272 slli a7, a3, 9
1273 beqz a7, .Llt_cmp
1274 movi a2, 0
1275 leaf_return
1276
1277 /* Check if x is a NaN. */
1278 2: slli a7, a2, 9
1279 beqz a7, 1b
1280 movi a2, 0
1281 leaf_return
1282
1283 .Llt_cmp:
1284 /* Check if x and y have different signs. */
1285 xor a7, a2, a3
1286 bltz a7, .Llt_diff_signs
1287
1288 /* Check if x is negative. */
1289 bltz a2, .Llt_xneg
1290
1291 /* Check if x < y. */
1292 bgeu a2, a3, 5f
1293 4: movi a2, -1
1294 leaf_return
1295
1296 .Llt_xneg:
1297 /* Check if y < x. */
1298 bltu a3, a2, 4b
1299 5: movi a2, 0
1300 leaf_return
1301
1302 .Llt_diff_signs:
1303 bgez a2, 5b
1304
1305 /* Check if both x and y are nonzero. */
1306 or a7, a2, a3
1307 slli a7, a7, 1
1308 movi a2, 0
1309 movi a3, -1
1310 movnez a2, a3, a7
1311 leaf_return
1312
1313
1314 /* Unordered */
1315
1316 .align 4
1317 .global __unordsf2
1318 .type __unordsf2, @function
1319 __unordsf2:
1320 leaf_entry sp, 16
1321 movi a6, 0x7f800000
1322 ball a2, a6, 3f
1323 1: ball a3, a6, 4f
1324 2: movi a2, 0
1325 leaf_return
1326
1327 3: slli a7, a2, 9
1328 beqz a7, 1b
1329 movi a2, 1
1330 leaf_return
1331
1332 4: slli a7, a3, 9
1333 beqz a7, 2b
1334 movi a2, 1
1335 leaf_return
1336
1337 #endif /* L_cmpsf2 */
1338
1339 #ifdef L_fixsfsi
1340
1341 .align 4
1342 .global __fixsfsi
1343 .type __fixsfsi, @function
1344 __fixsfsi:
1345 leaf_entry sp, 16
1346
1347 /* Check for NaN and Infinity. */
1348 movi a6, 0x7f800000
1349 ball a2, a6, .Lfixsfsi_nan_or_inf
1350
1351 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1352 extui a4, a2, 23, 8
1353 addi a4, a4, -0x7e
1354 bgei a4, 32, .Lfixsfsi_maxint
1355 blti a4, 1, .Lfixsfsi_zero
1356
1357 /* Add explicit "1.0" and shift << 8. */
1358 or a7, a2, a6
1359 slli a5, a7, 8
1360
1361 /* Shift back to the right, based on the exponent. */
1362 ssl a4 /* shift by 32 - a4 */
1363 srl a5, a5
1364
1365 /* Negate the result if sign != 0. */
1366 neg a2, a5
1367 movgez a2, a5, a7
1368 leaf_return
1369
1370 .Lfixsfsi_nan_or_inf:
1371 /* Handle Infinity and NaN. */
1372 slli a4, a2, 9
1373 beqz a4, .Lfixsfsi_maxint
1374
1375 /* Translate NaN to +maxint. */
1376 movi a2, 0
1377
1378 .Lfixsfsi_maxint:
1379 slli a4, a6, 8 /* 0x80000000 */
1380 addi a5, a4, -1 /* 0x7fffffff */
1381 movgez a4, a5, a2
1382 mov a2, a4
1383 leaf_return
1384
1385 .Lfixsfsi_zero:
1386 movi a2, 0
1387 leaf_return
1388
1389 #endif /* L_fixsfsi */
1390
1391 #ifdef L_fixsfdi
1392
1393 .align 4
1394 .global __fixsfdi
1395 .type __fixsfdi, @function
1396 __fixsfdi:
1397 leaf_entry sp, 16
1398
1399 /* Check for NaN and Infinity. */
1400 movi a6, 0x7f800000
1401 ball a2, a6, .Lfixsfdi_nan_or_inf
1402
1403 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1404 extui a4, a2, 23, 8
1405 addi a4, a4, -0x7e
1406 bgei a4, 64, .Lfixsfdi_maxint
1407 blti a4, 1, .Lfixsfdi_zero
1408
1409 /* Add explicit "1.0" and shift << 8. */
1410 or a7, a2, a6
1411 slli xh, a7, 8
1412
1413 /* Shift back to the right, based on the exponent. */
1414 ssl a4 /* shift by 64 - a4 */
1415 bgei a4, 32, .Lfixsfdi_smallshift
1416 srl xl, xh
1417 movi xh, 0
1418
1419 .Lfixsfdi_shifted:
1420 /* Negate the result if sign != 0. */
1421 bgez a7, 1f
1422 neg xl, xl
1423 neg xh, xh
1424 beqz xl, 1f
1425 addi xh, xh, -1
1426 1: leaf_return
1427
1428 .Lfixsfdi_smallshift:
1429 movi xl, 0
1430 sll xl, xh
1431 srl xh, xh
1432 j .Lfixsfdi_shifted
1433
1434 .Lfixsfdi_nan_or_inf:
1435 /* Handle Infinity and NaN. */
1436 slli a4, a2, 9
1437 beqz a4, .Lfixsfdi_maxint
1438
1439 /* Translate NaN to +maxint. */
1440 movi a2, 0
1441
1442 .Lfixsfdi_maxint:
1443 slli a7, a6, 8 /* 0x80000000 */
1444 bgez a2, 1f
1445 mov xh, a7
1446 movi xl, 0
1447 leaf_return
1448
1449 1: addi xh, a7, -1 /* 0x7fffffff */
1450 movi xl, -1
1451 leaf_return
1452
1453 .Lfixsfdi_zero:
1454 movi xh, 0
1455 movi xl, 0
1456 leaf_return
1457
1458 #endif /* L_fixsfdi */
1459
1460 #ifdef L_fixunssfsi
1461
1462 .align 4
1463 .global __fixunssfsi
1464 .type __fixunssfsi, @function
1465 __fixunssfsi:
1466 leaf_entry sp, 16
1467
1468 /* Check for NaN and Infinity. */
1469 movi a6, 0x7f800000
1470 ball a2, a6, .Lfixunssfsi_nan_or_inf
1471
1472 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1473 extui a4, a2, 23, 8
1474 addi a4, a4, -0x7f
1475 bgei a4, 32, .Lfixunssfsi_maxint
1476 bltz a4, .Lfixunssfsi_zero
1477
1478 /* Add explicit "1.0" and shift << 8. */
1479 or a7, a2, a6
1480 slli a5, a7, 8
1481
1482 /* Shift back to the right, based on the exponent. */
1483 addi a4, a4, 1
1484 beqi a4, 32, .Lfixunssfsi_bigexp
1485 ssl a4 /* shift by 32 - a4 */
1486 srl a5, a5
1487
1488 /* Negate the result if sign != 0. */
1489 neg a2, a5
1490 movgez a2, a5, a7
1491 leaf_return
1492
1493 .Lfixunssfsi_nan_or_inf:
1494 /* Handle Infinity and NaN. */
1495 slli a4, a2, 9
1496 beqz a4, .Lfixunssfsi_maxint
1497
1498 /* Translate NaN to 0xffffffff. */
1499 movi a2, -1
1500 leaf_return
1501
1502 .Lfixunssfsi_maxint:
1503 slli a4, a6, 8 /* 0x80000000 */
1504 movi a5, -1 /* 0xffffffff */
1505 movgez a4, a5, a2
1506 mov a2, a4
1507 leaf_return
1508
1509 .Lfixunssfsi_zero:
1510 movi a2, 0
1511 leaf_return
1512
1513 .Lfixunssfsi_bigexp:
1514 /* Handle unsigned maximum exponent case. */
1515 bltz a2, 1f
1516 mov a2, a5 /* no shift needed */
1517 leaf_return
1518
1519 /* Return 0x80000000 if negative. */
1520 1: slli a2, a6, 8
1521 leaf_return
1522
1523 #endif /* L_fixunssfsi */
1524
1525 #ifdef L_fixunssfdi
1526
1527 .align 4
1528 .global __fixunssfdi
1529 .type __fixunssfdi, @function
1530 __fixunssfdi:
1531 leaf_entry sp, 16
1532
1533 /* Check for NaN and Infinity. */
1534 movi a6, 0x7f800000
1535 ball a2, a6, .Lfixunssfdi_nan_or_inf
1536
1537 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1538 extui a4, a2, 23, 8
1539 addi a4, a4, -0x7f
1540 bgei a4, 64, .Lfixunssfdi_maxint
1541 bltz a4, .Lfixunssfdi_zero
1542
1543 /* Add explicit "1.0" and shift << 8. */
1544 or a7, a2, a6
1545 slli xh, a7, 8
1546
1547 /* Shift back to the right, based on the exponent. */
1548 addi a4, a4, 1
1549 beqi a4, 64, .Lfixunssfdi_bigexp
1550 ssl a4 /* shift by 64 - a4 */
1551 bgei a4, 32, .Lfixunssfdi_smallshift
1552 srl xl, xh
1553 movi xh, 0
1554
1555 .Lfixunssfdi_shifted:
1556 /* Negate the result if sign != 0. */
1557 bgez a7, 1f
1558 neg xl, xl
1559 neg xh, xh
1560 beqz xl, 1f
1561 addi xh, xh, -1
1562 1: leaf_return
1563
1564 .Lfixunssfdi_smallshift:
1565 movi xl, 0
1566 src xl, xh, xl
1567 srl xh, xh
1568 j .Lfixunssfdi_shifted
1569
1570 .Lfixunssfdi_nan_or_inf:
1571 /* Handle Infinity and NaN. */
1572 slli a4, a2, 9
1573 beqz a4, .Lfixunssfdi_maxint
1574
1575 /* Translate NaN to 0xffffffff.... */
1576 1: movi xh, -1
1577 movi xl, -1
1578 leaf_return
1579
1580 .Lfixunssfdi_maxint:
1581 bgez a2, 1b
1582 2: slli xh, a6, 8 /* 0x80000000 */
1583 movi xl, 0
1584 leaf_return
1585
1586 .Lfixunssfdi_zero:
1587 movi xh, 0
1588 movi xl, 0
1589 leaf_return
1590
1591 .Lfixunssfdi_bigexp:
1592 /* Handle unsigned maximum exponent case. */
1593 bltz a7, 2b
1594 movi xl, 0
1595 leaf_return /* no shift needed */
1596
1597 #endif /* L_fixunssfdi */
1598
1599 #ifdef L_floatsisf
1600
1601 .align 4
1602 .global __floatunsisf
1603 .type __floatunsisf, @function
1604 __floatunsisf:
1605 leaf_entry sp, 16
1606 beqz a2, .Lfloatsisf_return
1607
1608 /* Set the sign to zero and jump to the floatsisf code. */
1609 movi a7, 0
1610 j .Lfloatsisf_normalize
1611
1612 .align 4
1613 .global __floatsisf
1614 .type __floatsisf, @function
1615 __floatsisf:
1616 leaf_entry sp, 16
1617
1618 /* Check for zero. */
1619 beqz a2, .Lfloatsisf_return
1620
1621 /* Save the sign. */
1622 extui a7, a2, 31, 1
1623
1624 /* Get the absolute value. */
1625 #if XCHAL_HAVE_ABS
1626 abs a2, a2
1627 #else
1628 neg a4, a2
1629 movltz a2, a4, a2
1630 #endif
1631
1632 .Lfloatsisf_normalize:
1633 /* Normalize with the first 1 bit in the msb. */
1634 do_nsau a4, a2, a5, a6
1635 ssl a4
1636 sll a5, a2
1637
1638 /* Shift the mantissa into position, with rounding bits in a6. */
1639 srli a2, a5, 8
1640 slli a6, a5, (32 - 8)
1641
1642 /* Set the exponent. */
1643 movi a5, 0x9d /* 0x7e + 31 */
1644 sub a5, a5, a4
1645 slli a5, a5, 23
1646 add a2, a2, a5
1647
1648 /* Add the sign. */
1649 slli a7, a7, 31
1650 or a2, a2, a7
1651
1652 /* Round up if the leftover fraction is >= 1/2. */
1653 bgez a6, .Lfloatsisf_return
1654 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1655
1656 /* Check if the leftover fraction is exactly 1/2. */
1657 slli a6, a6, 1
1658 beqz a6, .Lfloatsisf_exactlyhalf
1659
1660 .Lfloatsisf_return:
1661 leaf_return
1662
1663 .Lfloatsisf_exactlyhalf:
1664 /* Round down to the nearest even value. */
1665 srli a2, a2, 1
1666 slli a2, a2, 1
1667 leaf_return
1668
1669 #endif /* L_floatsisf */
1670
1671 #ifdef L_floatdisf
1672
1673 .align 4
1674 .global __floatundisf
1675 .type __floatundisf, @function
1676 __floatundisf:
1677 leaf_entry sp, 16
1678
1679 /* Check for zero. */
1680 or a4, xh, xl
1681 beqz a4, 2f
1682
1683 /* Set the sign to zero and jump to the floatdisf code. */
1684 movi a7, 0
1685 j .Lfloatdisf_normalize
1686
1687 .align 4
1688 .global __floatdisf
1689 .type __floatdisf, @function
1690 __floatdisf:
1691 leaf_entry sp, 16
1692
1693 /* Check for zero. */
1694 or a4, xh, xl
1695 beqz a4, 2f
1696
1697 /* Save the sign. */
1698 extui a7, xh, 31, 1
1699
1700 /* Get the absolute value. */
1701 bgez xh, .Lfloatdisf_normalize
1702 neg xl, xl
1703 neg xh, xh
1704 beqz xl, .Lfloatdisf_normalize
1705 addi xh, xh, -1
1706
1707 .Lfloatdisf_normalize:
1708 /* Normalize with the first 1 bit in the msb of xh. */
1709 beqz xh, .Lfloatdisf_bigshift
1710 do_nsau a4, xh, a5, a6
1711 ssl a4
1712 src xh, xh, xl
1713 sll xl, xl
1714
1715 .Lfloatdisf_shifted:
1716 /* Shift the mantissa into position, with rounding bits in a6. */
1717 ssai 8
1718 sll a5, xl
1719 src a6, xh, xl
1720 srl xh, xh
1721 beqz a5, 1f
1722 movi a5, 1
1723 or a6, a6, a5
1724 1:
1725 /* Set the exponent. */
1726 movi a5, 0xbd /* 0x7e + 63 */
1727 sub a5, a5, a4
1728 slli a5, a5, 23
1729 add a2, xh, a5
1730
1731 /* Add the sign. */
1732 slli a7, a7, 31
1733 or a2, a2, a7
1734
1735 /* Round up if the leftover fraction is >= 1/2. */
1736 bgez a6, 2f
1737 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1738
1739 /* Check if the leftover fraction is exactly 1/2. */
1740 slli a6, a6, 1
1741 beqz a6, .Lfloatdisf_exactlyhalf
1742 2: leaf_return
1743
1744 .Lfloatdisf_bigshift:
1745 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1746 do_nsau a4, xl, a5, a6
1747 ssl a4
1748 sll xh, xl
1749 movi xl, 0
1750 addi a4, a4, 32
1751 j .Lfloatdisf_shifted
1752
1753 .Lfloatdisf_exactlyhalf:
1754 /* Round down to the nearest even value. */
1755 srli a2, a2, 1
1756 slli a2, a2, 1
1757 leaf_return
1758
1759 #endif /* L_floatdisf */
1760