ieee754-df.S revision 1.1.1.2 1 /* ieee754-df.S double-precision floating point support for ARM
2
3 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico (at) cam.org)
5
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25 /*
26 * Notes:
27 *
28 * The goal of this code is to be as fast as possible. This is
29 * not meant to be easy to understand for the casual reader.
30 * For slightly simpler code please see the single precision version
31 * of this file.
32 *
33 * Only the default rounding mode is intended for best performances.
34 * Exceptions aren't supported yet, but that can be added quite easily
35 * if necessary without impacting performances.
36 */
37
38
39 #ifndef __ARMEB__
40 #define xl r0
41 #define xh r1
42 #define yl r2
43 #define yh r3
44 #else
45 #define xh r0
46 #define xl r1
47 #define yh r2
48 #define yl r3
49 #endif
50
51
52 #ifdef L_arm_negdf2
53
54 ARM_FUNC_START negdf2
55 ARM_FUNC_ALIAS aeabi_dneg negdf2
56
57 @ flip sign bit
58 eor xh, xh, #0x80000000
59 RET
60
61 FUNC_END aeabi_dneg
62 FUNC_END negdf2
63
64 #endif
65
66 #ifdef L_arm_addsubdf3
67
68 ARM_FUNC_START aeabi_drsub
69
70 eor xh, xh, #0x80000000 @ flip sign bit of first arg
71 b 1f
72
73 ARM_FUNC_START subdf3
74 ARM_FUNC_ALIAS aeabi_dsub subdf3
75
76 eor yh, yh, #0x80000000 @ flip sign bit of second arg
77 #if defined(__INTERWORKING_STUBS__)
78 b 1f @ Skip Thumb-code prologue
79 #endif
80
81 ARM_FUNC_START adddf3
82 ARM_FUNC_ALIAS aeabi_dadd adddf3
83
84 1: do_push {r4, r5, lr}
85
86 @ Look for zeroes, equal values, INF, or NAN.
87 shift1 lsl, r4, xh, #1
88 shift1 lsl, r5, yh, #1
89 teq r4, r5
90 do_it eq
91 teqeq xl, yl
92 do_it ne, ttt
93 COND(orr,s,ne) ip, r4, xl
94 COND(orr,s,ne) ip, r5, yl
95 COND(mvn,s,ne) ip, r4, asr #21
96 COND(mvn,s,ne) ip, r5, asr #21
97 beq LSYM(Lad_s)
98
99 @ Compute exponent difference. Make largest exponent in r4,
100 @ corresponding arg in xh-xl, and positive exponent difference in r5.
101 shift1 lsr, r4, r4, #21
102 rsbs r5, r4, r5, lsr #21
103 do_it lt
104 rsblt r5, r5, #0
105 ble 1f
106 add r4, r4, r5
107 eor yl, xl, yl
108 eor yh, xh, yh
109 eor xl, yl, xl
110 eor xh, yh, xh
111 eor yl, xl, yl
112 eor yh, xh, yh
113 1:
114 @ If exponent difference is too large, return largest argument
115 @ already in xh-xl. We need up to 54 bit to handle proper rounding
116 @ of 0x1p54 - 1.1.
117 cmp r5, #54
118 do_it hi
119 RETLDM "r4, r5" hi
120
121 @ Convert mantissa to signed integer.
122 tst xh, #0x80000000
123 mov xh, xh, lsl #12
124 mov ip, #0x00100000
125 orr xh, ip, xh, lsr #12
126 beq 1f
127 #if defined(__thumb2__)
128 negs xl, xl
129 sbc xh, xh, xh, lsl #1
130 #else
131 rsbs xl, xl, #0
132 rsc xh, xh, #0
133 #endif
134 1:
135 tst yh, #0x80000000
136 mov yh, yh, lsl #12
137 orr yh, ip, yh, lsr #12
138 beq 1f
139 #if defined(__thumb2__)
140 negs yl, yl
141 sbc yh, yh, yh, lsl #1
142 #else
143 rsbs yl, yl, #0
144 rsc yh, yh, #0
145 #endif
146 1:
147 @ If exponent == difference, one or both args were denormalized.
148 @ Since this is not common case, rescale them off line.
149 teq r4, r5
150 beq LSYM(Lad_d)
151 LSYM(Lad_x):
152
153 @ Compensate for the exponent overlapping the mantissa MSB added later
154 sub r4, r4, #1
155
156 @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
157 rsbs lr, r5, #32
158 blt 1f
159 shift1 lsl, ip, yl, lr
160 shiftop adds xl xl yl lsr r5 yl
161 adc xh, xh, #0
162 shiftop adds xl xl yh lsl lr yl
163 shiftop adcs xh xh yh asr r5 yh
164 b 2f
165 1: sub r5, r5, #32
166 add lr, lr, #32
167 cmp yl, #1
168 shift1 lsl,ip, yh, lr
169 do_it cs
170 orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later
171 shiftop adds xl xl yh asr r5 yh
172 adcs xh, xh, yh, asr #31
173 2:
174 @ We now have a result in xh-xl-ip.
175 @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
176 and r5, xh, #0x80000000
177 bpl LSYM(Lad_p)
178 #if defined(__thumb2__)
179 mov lr, #0
180 negs ip, ip
181 sbcs xl, lr, xl
182 sbc xh, lr, xh
183 #else
184 rsbs ip, ip, #0
185 rscs xl, xl, #0
186 rsc xh, xh, #0
187 #endif
188
189 @ Determine how to normalize the result.
190 LSYM(Lad_p):
191 cmp xh, #0x00100000
192 bcc LSYM(Lad_a)
193 cmp xh, #0x00200000
194 bcc LSYM(Lad_e)
195
196 @ Result needs to be shifted right.
197 movs xh, xh, lsr #1
198 movs xl, xl, rrx
199 mov ip, ip, rrx
200 add r4, r4, #1
201
202 @ Make sure we did not bust our exponent.
203 mov r2, r4, lsl #21
204 cmn r2, #(2 << 21)
205 bcs LSYM(Lad_o)
206
207 @ Our result is now properly aligned into xh-xl, remaining bits in ip.
208 @ Round with MSB of ip. If halfway between two numbers, round towards
209 @ LSB of xl = 0.
210 @ Pack final result together.
211 LSYM(Lad_e):
212 cmp ip, #0x80000000
213 do_it eq
214 COND(mov,s,eq) ip, xl, lsr #1
215 adcs xl, xl, #0
216 adc xh, xh, r4, lsl #20
217 orr xh, xh, r5
218 RETLDM "r4, r5"
219
220 @ Result must be shifted left and exponent adjusted.
221 LSYM(Lad_a):
222 movs ip, ip, lsl #1
223 adcs xl, xl, xl
224 adc xh, xh, xh
225 tst xh, #0x00100000
226 sub r4, r4, #1
227 bne LSYM(Lad_e)
228
229 @ No rounding necessary since ip will always be 0 at this point.
230 LSYM(Lad_l):
231
232 #if __ARM_ARCH__ < 5
233
234 teq xh, #0
235 movne r3, #20
236 moveq r3, #52
237 moveq xh, xl
238 moveq xl, #0
239 mov r2, xh
240 cmp r2, #(1 << 16)
241 movhs r2, r2, lsr #16
242 subhs r3, r3, #16
243 cmp r2, #(1 << 8)
244 movhs r2, r2, lsr #8
245 subhs r3, r3, #8
246 cmp r2, #(1 << 4)
247 movhs r2, r2, lsr #4
248 subhs r3, r3, #4
249 cmp r2, #(1 << 2)
250 subhs r3, r3, #2
251 sublo r3, r3, r2, lsr #1
252 sub r3, r3, r2, lsr #3
253
254 #else
255
256 teq xh, #0
257 do_it eq, t
258 moveq xh, xl
259 moveq xl, #0
260 clz r3, xh
261 do_it eq
262 addeq r3, r3, #32
263 sub r3, r3, #11
264
265 #endif
266
267 @ determine how to shift the value.
268 subs r2, r3, #32
269 bge 2f
270 adds r2, r2, #12
271 ble 1f
272
273 @ shift value left 21 to 31 bits, or actually right 11 to 1 bits
274 @ since a register switch happened above.
275 add ip, r2, #20
276 rsb r2, r2, #12
277 shift1 lsl, xl, xh, ip
278 shift1 lsr, xh, xh, r2
279 b 3f
280
281 @ actually shift value left 1 to 20 bits, which might also represent
282 @ 32 to 52 bits if counting the register switch that happened earlier.
283 1: add r2, r2, #20
284 2: do_it le
285 rsble ip, r2, #32
286 shift1 lsl, xh, xh, r2
287 #if defined(__thumb2__)
288 lsr ip, xl, ip
289 itt le
290 orrle xh, xh, ip
291 lslle xl, xl, r2
292 #else
293 orrle xh, xh, xl, lsr ip
294 movle xl, xl, lsl r2
295 #endif
296
297 @ adjust exponent accordingly.
298 3: subs r4, r4, r3
299 do_it ge, tt
300 addge xh, xh, r4, lsl #20
301 orrge xh, xh, r5
302 RETLDM "r4, r5" ge
303
304 @ Exponent too small, denormalize result.
305 @ Find out proper shift value.
306 mvn r4, r4
307 subs r4, r4, #31
308 bge 2f
309 adds r4, r4, #12
310 bgt 1f
311
312 @ shift result right of 1 to 20 bits, sign is in r5.
313 add r4, r4, #20
314 rsb r2, r4, #32
315 shift1 lsr, xl, xl, r4
316 shiftop orr xl xl xh lsl r2 yh
317 shiftop orr xh r5 xh lsr r4 yh
318 RETLDM "r4, r5"
319
320 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
321 @ a register switch from xh to xl.
322 1: rsb r4, r4, #12
323 rsb r2, r4, #32
324 shift1 lsr, xl, xl, r2
325 shiftop orr xl xl xh lsl r4 yh
326 mov xh, r5
327 RETLDM "r4, r5"
328
329 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
330 @ from xh to xl.
331 2: shift1 lsr, xl, xh, r4
332 mov xh, r5
333 RETLDM "r4, r5"
334
335 @ Adjust exponents for denormalized arguments.
336 @ Note that r4 must not remain equal to 0.
337 LSYM(Lad_d):
338 teq r4, #0
339 eor yh, yh, #0x00100000
340 do_it eq, te
341 eoreq xh, xh, #0x00100000
342 addeq r4, r4, #1
343 subne r5, r5, #1
344 b LSYM(Lad_x)
345
346
347 LSYM(Lad_s):
348 mvns ip, r4, asr #21
349 do_it ne
350 COND(mvn,s,ne) ip, r5, asr #21
351 beq LSYM(Lad_i)
352
353 teq r4, r5
354 do_it eq
355 teqeq xl, yl
356 beq 1f
357
358 @ Result is x + 0.0 = x or 0.0 + y = y.
359 orrs ip, r4, xl
360 do_it eq, t
361 moveq xh, yh
362 moveq xl, yl
363 RETLDM "r4, r5"
364
365 1: teq xh, yh
366
367 @ Result is x - x = 0.
368 do_it ne, tt
369 movne xh, #0
370 movne xl, #0
371 RETLDM "r4, r5" ne
372
373 @ Result is x + x = 2x.
374 movs ip, r4, lsr #21
375 bne 2f
376 movs xl, xl, lsl #1
377 adcs xh, xh, xh
378 do_it cs
379 orrcs xh, xh, #0x80000000
380 RETLDM "r4, r5"
381 2: adds r4, r4, #(2 << 21)
382 do_it cc, t
383 addcc xh, xh, #(1 << 20)
384 RETLDM "r4, r5" cc
385 and r5, xh, #0x80000000
386
387 @ Overflow: return INF.
388 LSYM(Lad_o):
389 orr xh, r5, #0x7f000000
390 orr xh, xh, #0x00f00000
391 mov xl, #0
392 RETLDM "r4, r5"
393
394 @ At least one of x or y is INF/NAN.
395 @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
396 @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
397 @ if either is NAN: return NAN
398 @ if opposite sign: return NAN
399 @ otherwise return xh-xl (which is INF or -INF)
400 LSYM(Lad_i):
401 mvns ip, r4, asr #21
402 do_it ne, te
403 movne xh, yh
404 movne xl, yl
405 COND(mvn,s,eq) ip, r5, asr #21
406 do_it ne, t
407 movne yh, xh
408 movne yl, xl
409 orrs r4, xl, xh, lsl #12
410 do_it eq, te
411 COND(orr,s,eq) r5, yl, yh, lsl #12
412 teqeq xh, yh
413 orrne xh, xh, #0x00080000 @ quiet NAN
414 RETLDM "r4, r5"
415
416 FUNC_END aeabi_dsub
417 FUNC_END subdf3
418 FUNC_END aeabi_dadd
419 FUNC_END adddf3
420
421 ARM_FUNC_START floatunsidf
422 ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
423
424 teq r0, #0
425 do_it eq, t
426 moveq r1, #0
427 RETc(eq)
428 do_push {r4, r5, lr}
429 mov r4, #0x400 @ initial exponent
430 add r4, r4, #(52-1 - 1)
431 mov r5, #0 @ sign bit is 0
432 .ifnc xl, r0
433 mov xl, r0
434 .endif
435 mov xh, #0
436 b LSYM(Lad_l)
437
438 FUNC_END aeabi_ui2d
439 FUNC_END floatunsidf
440
441 ARM_FUNC_START floatsidf
442 ARM_FUNC_ALIAS aeabi_i2d floatsidf
443
444 teq r0, #0
445 do_it eq, t
446 moveq r1, #0
447 RETc(eq)
448 do_push {r4, r5, lr}
449 mov r4, #0x400 @ initial exponent
450 add r4, r4, #(52-1 - 1)
451 ands r5, r0, #0x80000000 @ sign bit in r5
452 do_it mi
453 rsbmi r0, r0, #0 @ absolute value
454 .ifnc xl, r0
455 mov xl, r0
456 .endif
457 mov xh, #0
458 b LSYM(Lad_l)
459
460 FUNC_END aeabi_i2d
461 FUNC_END floatsidf
462
463 ARM_FUNC_START extendsfdf2
464 ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
465
466 movs r2, r0, lsl #1 @ toss sign bit
467 mov xh, r2, asr #3 @ stretch exponent
468 mov xh, xh, rrx @ retrieve sign bit
469 mov xl, r2, lsl #28 @ retrieve remaining bits
470 do_it ne, ttt
471 COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent
472 teqne r3, #0xff000000 @ if not 0, check if INF or NAN
473 eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
474 RETc(ne) @ and return it.
475
476 teq r2, #0 @ if actually 0
477 do_it ne, e
478 teqne r3, #0xff000000 @ or INF or NAN
479 RETc(eq) @ we are done already.
480
481 @ value was denormalized. We can normalize it now.
482 do_push {r4, r5, lr}
483 mov r4, #0x380 @ setup corresponding exponent
484 and r5, xh, #0x80000000 @ move sign bit in r5
485 bic xh, xh, #0x80000000
486 b LSYM(Lad_l)
487
488 FUNC_END aeabi_f2d
489 FUNC_END extendsfdf2
490
491 ARM_FUNC_START floatundidf
492 ARM_FUNC_ALIAS aeabi_ul2d floatundidf
493
494 orrs r2, r0, r1
495 do_it eq
496 RETc(eq)
497
498 do_push {r4, r5, lr}
499
500 mov r5, #0
501 b 2f
502
503 ARM_FUNC_START floatdidf
504 ARM_FUNC_ALIAS aeabi_l2d floatdidf
505
506 orrs r2, r0, r1
507 do_it eq
508 RETc(eq)
509
510 do_push {r4, r5, lr}
511
512 ands r5, ah, #0x80000000 @ sign bit in r5
513 bpl 2f
514 #if defined(__thumb2__)
515 negs al, al
516 sbc ah, ah, ah, lsl #1
517 #else
518 rsbs al, al, #0
519 rsc ah, ah, #0
520 #endif
521 2:
522 mov r4, #0x400 @ initial exponent
523 add r4, r4, #(52-1 - 1)
524
525 @ If FP word order does not match integer word order, swap the words.
526 .ifnc xh, ah
527 mov ip, al
528 mov xh, ah
529 mov xl, ip
530 .endif
531
532 movs ip, xh, lsr #22
533 beq LSYM(Lad_p)
534
535 @ The value is too big. Scale it down a bit...
536 mov r2, #3
537 movs ip, ip, lsr #3
538 do_it ne
539 addne r2, r2, #3
540 movs ip, ip, lsr #3
541 do_it ne
542 addne r2, r2, #3
543 add r2, r2, ip, lsr #3
544
545 rsb r3, r2, #32
546 shift1 lsl, ip, xl, r3
547 shift1 lsr, xl, xl, r2
548 shiftop orr xl xl xh lsl r3 lr
549 shift1 lsr, xh, xh, r2
550 add r4, r4, r2
551 b LSYM(Lad_p)
552
553 FUNC_END floatdidf
554 FUNC_END aeabi_l2d
555 FUNC_END floatundidf
556 FUNC_END aeabi_ul2d
557
558 #endif /* L_addsubdf3 */
559
560 #ifdef L_arm_muldivdf3
561
562 ARM_FUNC_START muldf3
563 ARM_FUNC_ALIAS aeabi_dmul muldf3
564 do_push {r4, r5, r6, lr}
565
566 @ Mask out exponents, trap any zero/denormal/INF/NAN.
567 mov ip, #0xff
568 orr ip, ip, #0x700
569 ands r4, ip, xh, lsr #20
570 do_it ne, tte
571 COND(and,s,ne) r5, ip, yh, lsr #20
572 teqne r4, ip
573 teqne r5, ip
574 bleq LSYM(Lml_s)
575
576 @ Add exponents together
577 add r4, r4, r5
578
579 @ Determine final sign.
580 eor r6, xh, yh
581
582 @ Convert mantissa to unsigned integer.
583 @ If power of two, branch to a separate path.
584 bic xh, xh, ip, lsl #21
585 bic yh, yh, ip, lsl #21
586 orrs r5, xl, xh, lsl #12
587 do_it ne
588 COND(orr,s,ne) r5, yl, yh, lsl #12
589 orr xh, xh, #0x00100000
590 orr yh, yh, #0x00100000
591 beq LSYM(Lml_1)
592
593 #if __ARM_ARCH__ < 4
594
595 @ Put sign bit in r6, which will be restored in yl later.
596 and r6, r6, #0x80000000
597
598 @ Well, no way to make it shorter without the umull instruction.
599 stmfd sp!, {r6, r7, r8, r9, sl, fp}
600 mov r7, xl, lsr #16
601 mov r8, yl, lsr #16
602 mov r9, xh, lsr #16
603 mov sl, yh, lsr #16
604 bic xl, xl, r7, lsl #16
605 bic yl, yl, r8, lsl #16
606 bic xh, xh, r9, lsl #16
607 bic yh, yh, sl, lsl #16
608 mul ip, xl, yl
609 mul fp, xl, r8
610 mov lr, #0
611 adds ip, ip, fp, lsl #16
612 adc lr, lr, fp, lsr #16
613 mul fp, r7, yl
614 adds ip, ip, fp, lsl #16
615 adc lr, lr, fp, lsr #16
616 mul fp, xl, sl
617 mov r5, #0
618 adds lr, lr, fp, lsl #16
619 adc r5, r5, fp, lsr #16
620 mul fp, r7, yh
621 adds lr, lr, fp, lsl #16
622 adc r5, r5, fp, lsr #16
623 mul fp, xh, r8
624 adds lr, lr, fp, lsl #16
625 adc r5, r5, fp, lsr #16
626 mul fp, r9, yl
627 adds lr, lr, fp, lsl #16
628 adc r5, r5, fp, lsr #16
629 mul fp, xh, sl
630 mul r6, r9, sl
631 adds r5, r5, fp, lsl #16
632 adc r6, r6, fp, lsr #16
633 mul fp, r9, yh
634 adds r5, r5, fp, lsl #16
635 adc r6, r6, fp, lsr #16
636 mul fp, xl, yh
637 adds lr, lr, fp
638 mul fp, r7, sl
639 adcs r5, r5, fp
640 mul fp, xh, yl
641 adc r6, r6, #0
642 adds lr, lr, fp
643 mul fp, r9, r8
644 adcs r5, r5, fp
645 mul fp, r7, r8
646 adc r6, r6, #0
647 adds lr, lr, fp
648 mul fp, xh, yh
649 adcs r5, r5, fp
650 adc r6, r6, #0
651 ldmfd sp!, {yl, r7, r8, r9, sl, fp}
652
653 #else
654
655 @ Here is the actual multiplication.
656 umull ip, lr, xl, yl
657 mov r5, #0
658 umlal lr, r5, xh, yl
659 and yl, r6, #0x80000000
660 umlal lr, r5, xl, yh
661 mov r6, #0
662 umlal r5, r6, xh, yh
663
664 #endif
665
666 @ The LSBs in ip are only significant for the final rounding.
667 @ Fold them into lr.
668 teq ip, #0
669 do_it ne
670 orrne lr, lr, #1
671
672 @ Adjust result upon the MSB position.
673 sub r4, r4, #0xff
674 cmp r6, #(1 << (20-11))
675 sbc r4, r4, #0x300
676 bcs 1f
677 movs lr, lr, lsl #1
678 adcs r5, r5, r5
679 adc r6, r6, r6
680 1:
681 @ Shift to final position, add sign to result.
682 orr xh, yl, r6, lsl #11
683 orr xh, xh, r5, lsr #21
684 mov xl, r5, lsl #11
685 orr xl, xl, lr, lsr #21
686 mov lr, lr, lsl #11
687
688 @ Check exponent range for under/overflow.
689 subs ip, r4, #(254 - 1)
690 do_it hi
691 cmphi ip, #0x700
692 bhi LSYM(Lml_u)
693
694 @ Round the result, merge final exponent.
695 cmp lr, #0x80000000
696 do_it eq
697 COND(mov,s,eq) lr, xl, lsr #1
698 adcs xl, xl, #0
699 adc xh, xh, r4, lsl #20
700 RETLDM "r4, r5, r6"
701
702 @ Multiplication by 0x1p*: let''s shortcut a lot of code.
703 LSYM(Lml_1):
704 and r6, r6, #0x80000000
705 orr xh, r6, xh
706 orr xl, xl, yl
707 eor xh, xh, yh
708 subs r4, r4, ip, lsr #1
709 do_it gt, tt
710 COND(rsb,s,gt) r5, r4, ip
711 orrgt xh, xh, r4, lsl #20
712 RETLDM "r4, r5, r6" gt
713
714 @ Under/overflow: fix things up for the code below.
715 orr xh, xh, #0x00100000
716 mov lr, #0
717 subs r4, r4, #1
718
719 LSYM(Lml_u):
720 @ Overflow?
721 bgt LSYM(Lml_o)
722
723 @ Check if denormalized result is possible, otherwise return signed 0.
724 cmn r4, #(53 + 1)
725 do_it le, tt
726 movle xl, #0
727 bicle xh, xh, #0x7fffffff
728 RETLDM "r4, r5, r6" le
729
730 @ Find out proper shift value.
731 rsb r4, r4, #0
732 subs r4, r4, #32
733 bge 2f
734 adds r4, r4, #12
735 bgt 1f
736
737 @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
738 add r4, r4, #20
739 rsb r5, r4, #32
740 shift1 lsl, r3, xl, r5
741 shift1 lsr, xl, xl, r4
742 shiftop orr xl xl xh lsl r5 r2
743 and r2, xh, #0x80000000
744 bic xh, xh, #0x80000000
745 adds xl, xl, r3, lsr #31
746 shiftop adc xh r2 xh lsr r4 r6
747 orrs lr, lr, r3, lsl #1
748 do_it eq
749 biceq xl, xl, r3, lsr #31
750 RETLDM "r4, r5, r6"
751
752 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
753 @ a register switch from xh to xl. Then round.
754 1: rsb r4, r4, #12
755 rsb r5, r4, #32
756 shift1 lsl, r3, xl, r4
757 shift1 lsr, xl, xl, r5
758 shiftop orr xl xl xh lsl r4 r2
759 bic xh, xh, #0x7fffffff
760 adds xl, xl, r3, lsr #31
761 adc xh, xh, #0
762 orrs lr, lr, r3, lsl #1
763 do_it eq
764 biceq xl, xl, r3, lsr #31
765 RETLDM "r4, r5, r6"
766
767 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
768 @ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
769 2: rsb r5, r4, #32
770 shiftop orr lr lr xl lsl r5 r2
771 shift1 lsr, r3, xl, r4
772 shiftop orr r3 r3 xh lsl r5 r2
773 shift1 lsr, xl, xh, r4
774 bic xh, xh, #0x7fffffff
775 shiftop bic xl xl xh lsr r4 r2
776 add xl, xl, r3, lsr #31
777 orrs lr, lr, r3, lsl #1
778 do_it eq
779 biceq xl, xl, r3, lsr #31
780 RETLDM "r4, r5, r6"
781
782 @ One or both arguments are denormalized.
783 @ Scale them leftwards and preserve sign bit.
784 LSYM(Lml_d):
785 teq r4, #0
786 bne 2f
787 and r6, xh, #0x80000000
788 1: movs xl, xl, lsl #1
789 adc xh, xh, xh
790 tst xh, #0x00100000
791 do_it eq
792 subeq r4, r4, #1
793 beq 1b
794 orr xh, xh, r6
795 teq r5, #0
796 do_it ne
797 RETc(ne)
798 2: and r6, yh, #0x80000000
799 3: movs yl, yl, lsl #1
800 adc yh, yh, yh
801 tst yh, #0x00100000
802 do_it eq
803 subeq r5, r5, #1
804 beq 3b
805 orr yh, yh, r6
806 RET
807
808 LSYM(Lml_s):
809 @ Isolate the INF and NAN cases away
810 teq r4, ip
811 and r5, ip, yh, lsr #20
812 do_it ne
813 teqne r5, ip
814 beq 1f
815
816 @ Here, one or more arguments are either denormalized or zero.
817 orrs r6, xl, xh, lsl #1
818 do_it ne
819 COND(orr,s,ne) r6, yl, yh, lsl #1
820 bne LSYM(Lml_d)
821
822 @ Result is 0, but determine sign anyway.
823 LSYM(Lml_z):
824 eor xh, xh, yh
825 and xh, xh, #0x80000000
826 mov xl, #0
827 RETLDM "r4, r5, r6"
828
829 1: @ One or both args are INF or NAN.
830 orrs r6, xl, xh, lsl #1
831 do_it eq, te
832 moveq xl, yl
833 moveq xh, yh
834 COND(orr,s,ne) r6, yl, yh, lsl #1
835 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
836 teq r4, ip
837 bne 1f
838 orrs r6, xl, xh, lsl #12
839 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
840 1: teq r5, ip
841 bne LSYM(Lml_i)
842 orrs r6, yl, yh, lsl #12
843 do_it ne, t
844 movne xl, yl
845 movne xh, yh
846 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
847
848 @ Result is INF, but we need to determine its sign.
849 LSYM(Lml_i):
850 eor xh, xh, yh
851
852 @ Overflow: return INF (sign already in xh).
853 LSYM(Lml_o):
854 and xh, xh, #0x80000000
855 orr xh, xh, #0x7f000000
856 orr xh, xh, #0x00f00000
857 mov xl, #0
858 RETLDM "r4, r5, r6"
859
860 @ Return a quiet NAN.
861 LSYM(Lml_n):
862 orr xh, xh, #0x7f000000
863 orr xh, xh, #0x00f80000
864 RETLDM "r4, r5, r6"
865
866 FUNC_END aeabi_dmul
867 FUNC_END muldf3
868
869 ARM_FUNC_START divdf3
870 ARM_FUNC_ALIAS aeabi_ddiv divdf3
871
872 do_push {r4, r5, r6, lr}
873
874 @ Mask out exponents, trap any zero/denormal/INF/NAN.
875 mov ip, #0xff
876 orr ip, ip, #0x700
877 ands r4, ip, xh, lsr #20
878 do_it ne, tte
879 COND(and,s,ne) r5, ip, yh, lsr #20
880 teqne r4, ip
881 teqne r5, ip
882 bleq LSYM(Ldv_s)
883
884 @ Subtract divisor exponent from dividend''s.
885 sub r4, r4, r5
886
887 @ Preserve final sign into lr.
888 eor lr, xh, yh
889
890 @ Convert mantissa to unsigned integer.
891 @ Dividend -> r5-r6, divisor -> yh-yl.
892 orrs r5, yl, yh, lsl #12
893 mov xh, xh, lsl #12
894 beq LSYM(Ldv_1)
895 mov yh, yh, lsl #12
896 mov r5, #0x10000000
897 orr yh, r5, yh, lsr #4
898 orr yh, yh, yl, lsr #24
899 mov yl, yl, lsl #8
900 orr r5, r5, xh, lsr #4
901 orr r5, r5, xl, lsr #24
902 mov r6, xl, lsl #8
903
904 @ Initialize xh with final sign bit.
905 and xh, lr, #0x80000000
906
907 @ Ensure result will land to known bit position.
908 @ Apply exponent bias accordingly.
909 cmp r5, yh
910 do_it eq
911 cmpeq r6, yl
912 adc r4, r4, #(255 - 2)
913 add r4, r4, #0x300
914 bcs 1f
915 movs yh, yh, lsr #1
916 mov yl, yl, rrx
917 1:
918 @ Perform first subtraction to align result to a nibble.
919 subs r6, r6, yl
920 sbc r5, r5, yh
921 movs yh, yh, lsr #1
922 mov yl, yl, rrx
923 mov xl, #0x00100000
924 mov ip, #0x00080000
925
926 @ The actual division loop.
927 1: subs lr, r6, yl
928 sbcs lr, r5, yh
929 do_it cs, tt
930 subcs r6, r6, yl
931 movcs r5, lr
932 orrcs xl, xl, ip
933 movs yh, yh, lsr #1
934 mov yl, yl, rrx
935 subs lr, r6, yl
936 sbcs lr, r5, yh
937 do_it cs, tt
938 subcs r6, r6, yl
939 movcs r5, lr
940 orrcs xl, xl, ip, lsr #1
941 movs yh, yh, lsr #1
942 mov yl, yl, rrx
943 subs lr, r6, yl
944 sbcs lr, r5, yh
945 do_it cs, tt
946 subcs r6, r6, yl
947 movcs r5, lr
948 orrcs xl, xl, ip, lsr #2
949 movs yh, yh, lsr #1
950 mov yl, yl, rrx
951 subs lr, r6, yl
952 sbcs lr, r5, yh
953 do_it cs, tt
954 subcs r6, r6, yl
955 movcs r5, lr
956 orrcs xl, xl, ip, lsr #3
957
958 orrs lr, r5, r6
959 beq 2f
960 mov r5, r5, lsl #4
961 orr r5, r5, r6, lsr #28
962 mov r6, r6, lsl #4
963 mov yh, yh, lsl #3
964 orr yh, yh, yl, lsr #29
965 mov yl, yl, lsl #3
966 movs ip, ip, lsr #4
967 bne 1b
968
969 @ We are done with a word of the result.
970 @ Loop again for the low word if this pass was for the high word.
971 tst xh, #0x00100000
972 bne 3f
973 orr xh, xh, xl
974 mov xl, #0
975 mov ip, #0x80000000
976 b 1b
977 2:
978 @ Be sure result starts in the high word.
979 tst xh, #0x00100000
980 do_it eq, t
981 orreq xh, xh, xl
982 moveq xl, #0
983 3:
984 @ Check exponent range for under/overflow.
985 subs ip, r4, #(254 - 1)
986 do_it hi
987 cmphi ip, #0x700
988 bhi LSYM(Lml_u)
989
990 @ Round the result, merge final exponent.
991 subs ip, r5, yh
992 do_it eq, t
993 COND(sub,s,eq) ip, r6, yl
994 COND(mov,s,eq) ip, xl, lsr #1
995 adcs xl, xl, #0
996 adc xh, xh, r4, lsl #20
997 RETLDM "r4, r5, r6"
998
999 @ Division by 0x1p*: shortcut a lot of code.
1000 LSYM(Ldv_1):
1001 and lr, lr, #0x80000000
1002 orr xh, lr, xh, lsr #12
1003 adds r4, r4, ip, lsr #1
1004 do_it gt, tt
1005 COND(rsb,s,gt) r5, r4, ip
1006 orrgt xh, xh, r4, lsl #20
1007 RETLDM "r4, r5, r6" gt
1008
1009 orr xh, xh, #0x00100000
1010 mov lr, #0
1011 subs r4, r4, #1
1012 b LSYM(Lml_u)
1013
1014 @ Result mightt need to be denormalized: put remainder bits
1015 @ in lr for rounding considerations.
1016 LSYM(Ldv_u):
1017 orr lr, r5, r6
1018 b LSYM(Lml_u)
1019
1020 @ One or both arguments is either INF, NAN or zero.
1021 LSYM(Ldv_s):
1022 and r5, ip, yh, lsr #20
1023 teq r4, ip
1024 do_it eq
1025 teqeq r5, ip
1026 beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
1027 teq r4, ip
1028 bne 1f
1029 orrs r4, xl, xh, lsl #12
1030 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
1031 teq r5, ip
1032 bne LSYM(Lml_i) @ INF / <anything> -> INF
1033 mov xl, yl
1034 mov xh, yh
1035 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
1036 1: teq r5, ip
1037 bne 2f
1038 orrs r5, yl, yh, lsl #12
1039 beq LSYM(Lml_z) @ <anything> / INF -> 0
1040 mov xl, yl
1041 mov xh, yh
1042 b LSYM(Lml_n) @ <anything> / NAN -> NAN
1043 2: @ If both are nonzero, we need to normalize and resume above.
1044 orrs r6, xl, xh, lsl #1
1045 do_it ne
1046 COND(orr,s,ne) r6, yl, yh, lsl #1
1047 bne LSYM(Lml_d)
1048 @ One or both arguments are 0.
1049 orrs r4, xl, xh, lsl #1
1050 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
1051 orrs r5, yl, yh, lsl #1
1052 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
1053 b LSYM(Lml_n) @ 0 / 0 -> NAN
1054
1055 FUNC_END aeabi_ddiv
1056 FUNC_END divdf3
1057
1058 #endif /* L_muldivdf3 */
1059
1060 #ifdef L_arm_cmpdf2
1061
1062 @ Note: only r0 (return value) and ip are clobbered here.
1063
1064 ARM_FUNC_START gtdf2
1065 ARM_FUNC_ALIAS gedf2 gtdf2
1066 mov ip, #-1
1067 b 1f
1068
1069 ARM_FUNC_START ltdf2
1070 ARM_FUNC_ALIAS ledf2 ltdf2
1071 mov ip, #1
1072 b 1f
1073
1074 ARM_FUNC_START cmpdf2
1075 ARM_FUNC_ALIAS nedf2 cmpdf2
1076 ARM_FUNC_ALIAS eqdf2 cmpdf2
1077 mov ip, #1 @ how should we specify unordered here?
1078
1079 1: str ip, [sp, #-4]!
1080
1081 @ Trap any INF/NAN first.
1082 mov ip, xh, lsl #1
1083 mvns ip, ip, asr #21
1084 mov ip, yh, lsl #1
1085 do_it ne
1086 COND(mvn,s,ne) ip, ip, asr #21
1087 beq 3f
1088
1089 @ Test for equality.
1090 @ Note that 0.0 is equal to -0.0.
1091 2: add sp, sp, #4
1092 orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0
1093 do_it eq, e
1094 COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0
1095 teqne xh, yh @ or xh == yh
1096 do_it eq, tt
1097 teqeq xl, yl @ and xl == yl
1098 moveq r0, #0 @ then equal.
1099 RETc(eq)
1100
1101 @ Clear C flag
1102 cmn r0, #0
1103
1104 @ Compare sign,
1105 teq xh, yh
1106
1107 @ Compare values if same sign
1108 do_it pl
1109 cmppl xh, yh
1110 do_it eq
1111 cmpeq xl, yl
1112
1113 @ Result:
1114 do_it cs, e
1115 movcs r0, yh, asr #31
1116 mvncc r0, yh, asr #31
1117 orr r0, r0, #1
1118 RET
1119
1120 @ Look for a NAN.
1121 3: mov ip, xh, lsl #1
1122 mvns ip, ip, asr #21
1123 bne 4f
1124 orrs ip, xl, xh, lsl #12
1125 bne 5f @ x is NAN
1126 4: mov ip, yh, lsl #1
1127 mvns ip, ip, asr #21
1128 bne 2b
1129 orrs ip, yl, yh, lsl #12
1130 beq 2b @ y is not NAN
1131 5: ldr r0, [sp], #4 @ unordered return code
1132 RET
1133
1134 FUNC_END gedf2
1135 FUNC_END gtdf2
1136 FUNC_END ledf2
1137 FUNC_END ltdf2
1138 FUNC_END nedf2
1139 FUNC_END eqdf2
1140 FUNC_END cmpdf2
1141
1142 ARM_FUNC_START aeabi_cdrcmple
1143
1144 mov ip, r0
1145 mov r0, r2
1146 mov r2, ip
1147 mov ip, r1
1148 mov r1, r3
1149 mov r3, ip
1150 b 6f
1151
1152 ARM_FUNC_START aeabi_cdcmpeq
1153 ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
1154
1155 @ The status-returning routines are required to preserve all
1156 @ registers except ip, lr, and cpsr.
1157 6: do_push {r0, lr}
1158 ARM_CALL cmpdf2
1159 @ Set the Z flag correctly, and the C flag unconditionally.
1160 cmp r0, #0
1161 @ Clear the C flag if the return value was -1, indicating
1162 @ that the first operand was smaller than the second.
1163 do_it mi
1164 cmnmi r0, #0
1165 RETLDM "r0"
1166
1167 FUNC_END aeabi_cdcmple
1168 FUNC_END aeabi_cdcmpeq
1169 FUNC_END aeabi_cdrcmple
1170
1171 ARM_FUNC_START aeabi_dcmpeq
1172
1173 str lr, [sp, #-8]!
1174 ARM_CALL aeabi_cdcmple
1175 do_it eq, e
1176 moveq r0, #1 @ Equal to.
1177 movne r0, #0 @ Less than, greater than, or unordered.
1178 RETLDM
1179
1180 FUNC_END aeabi_dcmpeq
1181
1182 ARM_FUNC_START aeabi_dcmplt
1183
1184 str lr, [sp, #-8]!
1185 ARM_CALL aeabi_cdcmple
1186 do_it cc, e
1187 movcc r0, #1 @ Less than.
1188 movcs r0, #0 @ Equal to, greater than, or unordered.
1189 RETLDM
1190
1191 FUNC_END aeabi_dcmplt
1192
1193 ARM_FUNC_START aeabi_dcmple
1194
1195 str lr, [sp, #-8]!
1196 ARM_CALL aeabi_cdcmple
1197 do_it ls, e
1198 movls r0, #1 @ Less than or equal to.
1199 movhi r0, #0 @ Greater than or unordered.
1200 RETLDM
1201
1202 FUNC_END aeabi_dcmple
1203
1204 ARM_FUNC_START aeabi_dcmpge
1205
1206 str lr, [sp, #-8]!
1207 ARM_CALL aeabi_cdrcmple
1208 do_it ls, e
1209 movls r0, #1 @ Operand 2 is less than or equal to operand 1.
1210 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
1211 RETLDM
1212
1213 FUNC_END aeabi_dcmpge
1214
1215 ARM_FUNC_START aeabi_dcmpgt
1216
1217 str lr, [sp, #-8]!
1218 ARM_CALL aeabi_cdrcmple
1219 do_it cc, e
1220 movcc r0, #1 @ Operand 2 is less than operand 1.
1221 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
1222 @ or they are unordered.
1223 RETLDM
1224
1225 FUNC_END aeabi_dcmpgt
1226
1227 #endif /* L_cmpdf2 */
1228
1229 #ifdef L_arm_unorddf2
1230
1231 ARM_FUNC_START unorddf2
1232 ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
1233
1234 mov ip, xh, lsl #1
1235 mvns ip, ip, asr #21
1236 bne 1f
1237 orrs ip, xl, xh, lsl #12
1238 bne 3f @ x is NAN
1239 1: mov ip, yh, lsl #1
1240 mvns ip, ip, asr #21
1241 bne 2f
1242 orrs ip, yl, yh, lsl #12
1243 bne 3f @ y is NAN
1244 2: mov r0, #0 @ arguments are ordered.
1245 RET
1246
1247 3: mov r0, #1 @ arguments are unordered.
1248 RET
1249
1250 FUNC_END aeabi_dcmpun
1251 FUNC_END unorddf2
1252
1253 #endif /* L_unorddf2 */
1254
1255 #ifdef L_arm_fixdfsi
1256
1257 ARM_FUNC_START fixdfsi
1258 ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
1259
1260 @ check exponent range.
1261 mov r2, xh, lsl #1
1262 adds r2, r2, #(1 << 21)
1263 bcs 2f @ value is INF or NAN
1264 bpl 1f @ value is too small
1265 mov r3, #(0xfffffc00 + 31)
1266 subs r2, r3, r2, asr #21
1267 bls 3f @ value is too large
1268
1269 @ scale value
1270 mov r3, xh, lsl #11
1271 orr r3, r3, #0x80000000
1272 orr r3, r3, xl, lsr #21
1273 tst xh, #0x80000000 @ the sign bit
1274 shift1 lsr, r0, r3, r2
1275 do_it ne
1276 rsbne r0, r0, #0
1277 RET
1278
1279 1: mov r0, #0
1280 RET
1281
1282 2: orrs xl, xl, xh, lsl #12
1283 bne 4f @ x is NAN.
1284 3: ands r0, xh, #0x80000000 @ the sign bit
1285 do_it eq
1286 moveq r0, #0x7fffffff @ maximum signed positive si
1287 RET
1288
1289 4: mov r0, #0 @ How should we convert NAN?
1290 RET
1291
1292 FUNC_END aeabi_d2iz
1293 FUNC_END fixdfsi
1294
1295 #endif /* L_fixdfsi */
1296
1297 #ifdef L_arm_fixunsdfsi
1298
1299 ARM_FUNC_START fixunsdfsi
1300 ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
1301
1302 @ check exponent range.
1303 movs r2, xh, lsl #1
1304 bcs 1f @ value is negative
1305 adds r2, r2, #(1 << 21)
1306 bcs 2f @ value is INF or NAN
1307 bpl 1f @ value is too small
1308 mov r3, #(0xfffffc00 + 31)
1309 subs r2, r3, r2, asr #21
1310 bmi 3f @ value is too large
1311
1312 @ scale value
1313 mov r3, xh, lsl #11
1314 orr r3, r3, #0x80000000
1315 orr r3, r3, xl, lsr #21
1316 shift1 lsr, r0, r3, r2
1317 RET
1318
1319 1: mov r0, #0
1320 RET
1321
1322 2: orrs xl, xl, xh, lsl #12
1323 bne 4f @ value is NAN.
1324 3: mov r0, #0xffffffff @ maximum unsigned si
1325 RET
1326
1327 4: mov r0, #0 @ How should we convert NAN?
1328 RET
1329
1330 FUNC_END aeabi_d2uiz
1331 FUNC_END fixunsdfsi
1332
1333 #endif /* L_fixunsdfsi */
1334
1335 #ifdef L_arm_truncdfsf2
1336
1337 ARM_FUNC_START truncdfsf2
1338 ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
1339
1340 @ check exponent range.
1341 mov r2, xh, lsl #1
1342 subs r3, r2, #((1023 - 127) << 21)
1343 do_it cs, t
1344 COND(sub,s,cs) ip, r3, #(1 << 21)
1345 COND(rsb,s,cs) ip, ip, #(254 << 21)
1346 bls 2f @ value is out of range
1347
1348 1: @ shift and round mantissa
1349 and ip, xh, #0x80000000
1350 mov r2, xl, lsl #3
1351 orr xl, ip, xl, lsr #29
1352 cmp r2, #0x80000000
1353 adc r0, xl, r3, lsl #2
1354 do_it eq
1355 biceq r0, r0, #1
1356 RET
1357
1358 2: @ either overflow or underflow
1359 tst xh, #0x40000000
1360 bne 3f @ overflow
1361
1362 @ check if denormalized value is possible
1363 adds r2, r3, #(23 << 21)
1364 do_it lt, t
1365 andlt r0, xh, #0x80000000 @ too small, return signed 0.
1366 RETc(lt)
1367
1368 @ denormalize value so we can resume with the code above afterwards.
1369 orr xh, xh, #0x00100000
1370 mov r2, r2, lsr #21
1371 rsb r2, r2, #24
1372 rsb ip, r2, #32
1373 #if defined(__thumb2__)
1374 lsls r3, xl, ip
1375 #else
1376 movs r3, xl, lsl ip
1377 #endif
1378 shift1 lsr, xl, xl, r2
1379 do_it ne
1380 orrne xl, xl, #1 @ fold r3 for rounding considerations.
1381 mov r3, xh, lsl #11
1382 mov r3, r3, lsr #11
1383 shiftop orr xl xl r3 lsl ip ip
1384 shift1 lsr, r3, r3, r2
1385 mov r3, r3, lsl #1
1386 b 1b
1387
1388 3: @ chech for NAN
1389 mvns r3, r2, asr #21
1390 bne 5f @ simple overflow
1391 orrs r3, xl, xh, lsl #12
1392 do_it ne, tt
1393 movne r0, #0x7f000000
1394 orrne r0, r0, #0x00c00000
1395 RETc(ne) @ return NAN
1396
1397 5: @ return INF with sign
1398 and r0, xh, #0x80000000
1399 orr r0, r0, #0x7f000000
1400 orr r0, r0, #0x00800000
1401 RET
1402
1403 FUNC_END aeabi_d2f
1404 FUNC_END truncdfsf2
1405
1406 #endif /* L_truncdfsf2 */
1407