lib1funcs.S revision 1.9 1 @ libgcc routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4 /* Copyright (C) 1995-2020 Free Software Foundation, Inc.
5
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25 /* Everything in this file should now use unified syntax. */
26
27 .syntax unified
28
29 /* An executable stack is *not* required for these functions. */
30 #if defined(__ELF__) && defined(__linux__)
31 .section .note.GNU-stack,"",%progbits
32 .previous
33 #endif /* __ELF__ and __linux__ */
34
35 #ifdef __ARM_EABI__
36 /* Some attributes that are common to all routines in this file. */
37 /* Tag_ABI_align_needed: This code does not require 8-byte
38 alignment from the caller. */
39 /* .eabi_attribute 24, 0 -- default setting. */
40 /* Tag_ABI_align_preserved: This code preserves 8-byte
41 alignment in any callee. */
42 .eabi_attribute 25, 1
43 #endif /* __ARM_EABI__ */
44 /* ------------------------------------------------------------------------ */
45
46 /* We need to know what prefix to add to function names. */
47
48 #ifndef __USER_LABEL_PREFIX__
49 #error __USER_LABEL_PREFIX__ not defined
50 #endif
51
52 /* ANSI concatenation macros. */
53
54 #define CONCAT1(a, b) CONCAT2(a, b)
55 #define CONCAT2(a, b) a ## b
56
57 /* Use the right prefix for global labels. */
58
59 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
60
61 #ifdef __ELF__
62 #ifdef __thumb__
63 #define __PLT__ /* Not supported in Thumb assembler (for now). */
64 #elif defined __vxworks && !defined __PIC__
65 #define __PLT__ /* Not supported by the kernel loader. */
66 #else
67 #define __PLT__ (PLT)
68 #endif
69 #define TYPE(x) .type SYM(x),function
70 #define SIZE(x) .size SYM(x), . - SYM(x)
71 #define LSYM(x) .x
72 #else
73 #define __PLT__
74 #define TYPE(x)
75 #define SIZE(x)
76 #define LSYM(x) x
77 #endif
78
79 /* Function end macros. Variants for interworking. */
80
81 /* There are times when we might prefer Thumb1 code even if ARM code is
82 permitted, for example, the code might be smaller, or there might be
83 interworking problems with switching to ARM state if interworking is
84 disabled. */
85 #if (defined(__thumb__) \
86 && !defined(__thumb2__) \
87 && (!defined(__THUMB_INTERWORK__) \
88 || defined (__OPTIMIZE_SIZE__) \
89 || !__ARM_ARCH_ISA_ARM))
90 # define __prefer_thumb__
91 #endif
92
93 #if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1
94 #define NOT_ISA_TARGET_32BIT 1
95 #endif
96
97 /* How to return from a function call depends on the architecture variant. */
98
99 #if (__ARM_ARCH > 4) || defined(__ARM_ARCH_4T__)
100
101 # define RET bx lr
102 # define RETc(x) bx##x lr
103
104 /* Special precautions for interworking on armv4t. */
105 # if (__ARM_ARCH == 4)
106
107 /* Always use bx, not ldr pc. */
108 # if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
109 # define __INTERWORKING__
110 # endif /* __THUMB__ || __THUMB_INTERWORK__ */
111
112 /* Include thumb stub before arm mode code. */
113 # if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
114 # define __INTERWORKING_STUBS__
115 # endif /* __thumb__ && !__THUMB_INTERWORK__ */
116
117 #endif /* __ARM_ARCH == 4 */
118
119 #else
120
121 # define RET mov pc, lr
122 # define RETc(x) mov##x pc, lr
123
124 #endif
125
126 .macro cfi_pop advance, reg, cfa_offset
127 #ifdef __ELF__
128 .pushsection .debug_frame
129 .byte 0x4 /* DW_CFA_advance_loc4 */
130 .4byte \advance
131 .byte (0xc0 | \reg) /* DW_CFA_restore */
132 .byte 0xe /* DW_CFA_def_cfa_offset */
133 .uleb128 \cfa_offset
134 .popsection
135 #endif
136 .endm
137 .macro cfi_push advance, reg, offset, cfa_offset
138 #ifdef __ELF__
139 .pushsection .debug_frame
140 .byte 0x4 /* DW_CFA_advance_loc4 */
141 .4byte \advance
142 .byte (0x80 | \reg) /* DW_CFA_offset */
143 .uleb128 (\offset / -4)
144 .byte 0xe /* DW_CFA_def_cfa_offset */
145 .uleb128 \cfa_offset
146 .popsection
147 #endif
148 .endm
149 .macro cfi_start start_label, end_label
150 #ifdef __ELF__
151 .pushsection .debug_frame
152 LSYM(Lstart_frame):
153 .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
154 LSYM(Lstart_cie):
155 .4byte 0xffffffff @ CIE Identifier Tag
156 .byte 0x1 @ CIE Version
157 .ascii "\0" @ CIE Augmentation
158 .uleb128 0x1 @ CIE Code Alignment Factor
159 .sleb128 -4 @ CIE Data Alignment Factor
160 .byte 0xe @ CIE RA Column
161 .byte 0xc @ DW_CFA_def_cfa
162 .uleb128 0xd
163 .uleb128 0x0
164
165 .align 2
166 LSYM(Lend_cie):
167 .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
168 LSYM(Lstart_fde):
169 .4byte LSYM(Lstart_frame) @ FDE CIE offset
170 .4byte \start_label @ FDE initial location
171 .4byte \end_label-\start_label @ FDE address range
172 .popsection
173 #endif
174 .endm
175 .macro cfi_end end_label
176 #ifdef __ELF__
177 .pushsection .debug_frame
178 .align 2
179 LSYM(Lend_fde):
180 .popsection
181 \end_label:
182 #endif
183 .endm
184
185 /* Don't pass dirn, it's there just to get token pasting right. */
186
187 .macro RETLDM regs=, cond=, unwind=, dirn=ia
188 #if defined (__INTERWORKING__)
189 .ifc "\regs",""
190 ldr\cond lr, [sp], #8
191 .else
192 # if defined(__thumb2__)
193 pop\cond {\regs, lr}
194 # else
195 ldm\cond\dirn sp!, {\regs, lr}
196 # endif
197 .endif
198 .ifnc "\unwind", ""
199 /* Mark LR as restored. */
200 97: cfi_pop 97b - \unwind, 0xe, 0x0
201 .endif
202 bx\cond lr
203 #else
204 /* Caller is responsible for providing IT instruction. */
205 .ifc "\regs",""
206 ldr\cond pc, [sp], #8
207 .else
208 # if defined(__thumb2__)
209 pop\cond {\regs, pc}
210 # else
211 ldm\cond\dirn sp!, {\regs, pc}
212 # endif
213 .endif
214 #endif
215 .endm
216
217 /* The Unified assembly syntax allows the same code to be assembled for both
218 ARM and Thumb-2. However this is only supported by recent gas, so define
219 a set of macros to allow ARM code on older assemblers. */
220 #if defined(__thumb2__)
221 .macro do_it cond, suffix=""
222 it\suffix \cond
223 .endm
224 .macro shift1 op, arg0, arg1, arg2
225 \op \arg0, \arg1, \arg2
226 .endm
227 #define do_push push
228 #define do_pop pop
229 /* Perform an arithmetic operation with a variable shift operand. This
230 requires two instructions and a scratch register on Thumb-2. */
231 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
232 \shiftop \tmp, \src2, \shiftreg
233 \name \dest, \src1, \tmp
234 .endm
235 #else
236 .macro do_it cond, suffix=""
237 .endm
238 .macro shift1 op, arg0, arg1, arg2
239 mov \arg0, \arg1, \op \arg2
240 .endm
241 #define do_push stmfd sp!,
242 #define do_pop ldmfd sp!,
243 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
244 \name \dest, \src1, \src2, \shiftop \shiftreg
245 .endm
246 #endif
247
248 #define COND(op1, op2, cond) op1 ## op2 ## cond
249
250 #ifdef __ARM_EABI__
251 .macro ARM_LDIV0 name signed
252 cmp r0, #0
253 .ifc \signed, unsigned
254 movne r0, #0xffffffff
255 .else
256 movgt r0, #0x7fffffff
257 movlt r0, #0x80000000
258 .endif
259 b SYM (__aeabi_idiv0) __PLT__
260 .endm
261 #else
262 .macro ARM_LDIV0 name signed
263 str lr, [sp, #-8]!
264 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8
265 bl SYM (__div0) __PLT__
266 mov r0, #0 @ About as wrong as it could be.
267 RETLDM unwind=98b
268 .endm
269 #endif
270
271
272 #ifdef __ARM_EABI__
273 .macro THUMB_LDIV0 name signed
274 #ifdef NOT_ISA_TARGET_32BIT
275
276 push {r0, lr}
277 movs r0, #0
278 bl SYM(__aeabi_idiv0)
279 @ We know we are not on armv4t, so pop pc is safe.
280 pop {r1, pc}
281
282 #elif defined(__thumb2__)
283 .syntax unified
284 .ifc \signed, unsigned
285 cbz r0, 1f
286 mov r0, #0xffffffff
287 1:
288 .else
289 cmp r0, #0
290 do_it gt
291 movgt r0, #0x7fffffff
292 do_it lt
293 movlt r0, #0x80000000
294 .endif
295 b.w SYM(__aeabi_idiv0) __PLT__
296 #else
297 .align 2
298 bx pc
299 nop
300 .arm
301 cmp r0, #0
302 .ifc \signed, unsigned
303 movne r0, #0xffffffff
304 .else
305 movgt r0, #0x7fffffff
306 movlt r0, #0x80000000
307 .endif
308 b SYM(__aeabi_idiv0) __PLT__
309 .thumb
310 #endif
311 .endm
312 #else
313 .macro THUMB_LDIV0 name signed
314 push { r1, lr }
315 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8
316 bl SYM (__div0)
317 movs r0, #0 @ About as wrong as it could be.
318 #if defined (__INTERWORKING__)
319 pop { r1, r2 }
320 bx r2
321 #else
322 pop { r1, pc }
323 #endif
324 .endm
325 #endif
326
327 .macro FUNC_END name
328 SIZE (__\name)
329 .endm
330
331 .macro DIV_FUNC_END name signed
332 cfi_start __\name, LSYM(Lend_div0)
333 LSYM(Ldiv0):
334 #ifdef __thumb__
335 THUMB_LDIV0 \name \signed
336 #else
337 ARM_LDIV0 \name \signed
338 #endif
339 cfi_end LSYM(Lend_div0)
340 FUNC_END \name
341 .endm
342
343 .macro THUMB_FUNC_START name
344 .globl SYM (\name)
345 TYPE (\name)
346 .thumb_func
347 SYM (\name):
348 .endm
349
350 /* Function start macros. Variants for ARM and Thumb. */
351
352 #ifdef __thumb__
353 #define THUMB_FUNC .thumb_func
354 #define THUMB_CODE .force_thumb
355 # if defined(__thumb2__)
356 #define THUMB_SYNTAX
357 # else
358 #define THUMB_SYNTAX
359 # endif
360 #else
361 #define THUMB_FUNC
362 #define THUMB_CODE
363 #define THUMB_SYNTAX
364 #endif
365
366 .macro FUNC_START name
367 .text
368 .globl SYM (__\name)
369 TYPE (__\name)
370 .align 0
371 THUMB_CODE
372 THUMB_FUNC
373 THUMB_SYNTAX
374 SYM (__\name):
375 .endm
376
377 .macro ARM_SYM_START name
378 TYPE (\name)
379 .align 0
380 SYM (\name):
381 .endm
382
383 .macro SYM_END name
384 SIZE (\name)
385 .endm
386
387 /* Special function that will always be coded in ARM assembly, even if
388 in Thumb-only compilation. */
389
390 #if defined(__thumb2__)
391
392 /* For Thumb-2 we build everything in thumb mode. */
393 .macro ARM_FUNC_START name
394 FUNC_START \name
395 .syntax unified
396 .endm
397 #define EQUIV .thumb_set
398 .macro ARM_CALL name
399 bl __\name
400 .endm
401
402 #elif defined(__INTERWORKING_STUBS__)
403
404 .macro ARM_FUNC_START name
405 FUNC_START \name
406 bx pc
407 nop
408 .arm
409 /* A hook to tell gdb that we've switched to ARM mode. Also used to call
410 directly from other local arm routines. */
411 _L__\name:
412 .endm
413 #define EQUIV .thumb_set
414 /* Branch directly to a function declared with ARM_FUNC_START.
415 Must be called in arm mode. */
416 .macro ARM_CALL name
417 bl _L__\name
418 .endm
419
420 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
421
422 #ifdef NOT_ISA_TARGET_32BIT
423 #define EQUIV .thumb_set
424 #else
425 .macro ARM_FUNC_START name
426 .text
427 .globl SYM (__\name)
428 TYPE (__\name)
429 .align 0
430 .arm
431 SYM (__\name):
432 .endm
433 #define EQUIV .set
434 .macro ARM_CALL name
435 bl __\name
436 .endm
437 #endif
438
439 #endif
440
441 .macro FUNC_ALIAS new old
442 .globl SYM (__\new)
443 #if defined (__thumb__)
444 .thumb_set SYM (__\new), SYM (__\old)
445 #else
446 .set SYM (__\new), SYM (__\old)
447 #endif
448 .endm
449
450 #ifndef NOT_ISA_TARGET_32BIT
451 .macro ARM_FUNC_ALIAS new old
452 .globl SYM (__\new)
453 EQUIV SYM (__\new), SYM (__\old)
454 #if defined(__INTERWORKING_STUBS__)
455 .set SYM (_L__\new), SYM (_L__\old)
456 #endif
457 .endm
458 #endif
459
460 #ifdef __ARMEB__
461 #define xxh r0
462 #define xxl r1
463 #define yyh r2
464 #define yyl r3
465 #else
466 #define xxh r1
467 #define xxl r0
468 #define yyh r3
469 #define yyl r2
470 #endif
471
472 #ifdef __ARM_EABI__
473 .macro WEAK name
474 .weak SYM (__\name)
475 .endm
476 #endif
477
478 #ifdef __thumb__
479 /* Register aliases. */
480
481 work .req r4 @ XXXX is this safe ?
482 dividend .req r0
483 divisor .req r1
484 overdone .req r2
485 result .req r2
486 curbit .req r3
487 #endif
488 #if 0
489 ip .req r12
490 sp .req r13
491 lr .req r14
492 pc .req r15
493 #endif
494
495 /* ------------------------------------------------------------------------ */
496 /* Bodies of the division and modulo routines. */
497 /* ------------------------------------------------------------------------ */
498
499 .macro ARM_DIV_BODY dividend, divisor, result, curbit
500
501 #if defined (__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
502
503 #if defined (__thumb2__)
504 clz \curbit, \dividend
505 clz \result, \divisor
506 sub \curbit, \result, \curbit
507 rsb \curbit, \curbit, #31
508 adr \result, 1f
509 add \curbit, \result, \curbit, lsl #4
510 mov \result, #0
511 mov pc, \curbit
512 .p2align 3
513 1:
514 .set shift, 32
515 .rept 32
516 .set shift, shift - 1
517 cmp.w \dividend, \divisor, lsl #shift
518 nop.n
519 adc.w \result, \result, \result
520 it cs
521 subcs.w \dividend, \dividend, \divisor, lsl #shift
522 .endr
523 #else
524 clz \curbit, \dividend
525 clz \result, \divisor
526 sub \curbit, \result, \curbit
527 rsbs \curbit, \curbit, #31
528 addne \curbit, \curbit, \curbit, lsl #1
529 mov \result, #0
530 addne pc, pc, \curbit, lsl #2
531 nop
532 .set shift, 32
533 .rept 32
534 .set shift, shift - 1
535 cmp \dividend, \divisor, lsl #shift
536 adc \result, \result, \result
537 subcs \dividend, \dividend, \divisor, lsl #shift
538 .endr
539 #endif
540
541 #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
542 #if defined (__ARM_FEATURE_CLZ)
543
544 clz \curbit, \divisor
545 clz \result, \dividend
546 sub \result, \curbit, \result
547 mov \curbit, #1
548 mov \divisor, \divisor, lsl \result
549 mov \curbit, \curbit, lsl \result
550 mov \result, #0
551
552 #else /* !defined (__ARM_FEATURE_CLZ) */
553
554 @ Initially shift the divisor left 3 bits if possible,
555 @ set curbit accordingly. This allows for curbit to be located
556 @ at the left end of each 4-bit nibbles in the division loop
557 @ to save one loop in most cases.
558 tst \divisor, #0xe0000000
559 moveq \divisor, \divisor, lsl #3
560 moveq \curbit, #8
561 movne \curbit, #1
562
563 @ Unless the divisor is very big, shift it up in multiples of
564 @ four bits, since this is the amount of unwinding in the main
565 @ division loop. Continue shifting until the divisor is
566 @ larger than the dividend.
567 1: cmp \divisor, #0x10000000
568 cmplo \divisor, \dividend
569 movlo \divisor, \divisor, lsl #4
570 movlo \curbit, \curbit, lsl #4
571 blo 1b
572
573 @ For very big divisors, we must shift it a bit at a time, or
574 @ we will be in danger of overflowing.
575 1: cmp \divisor, #0x80000000
576 cmplo \divisor, \dividend
577 movlo \divisor, \divisor, lsl #1
578 movlo \curbit, \curbit, lsl #1
579 blo 1b
580
581 mov \result, #0
582
583 #endif /* !defined (__ARM_FEATURE_CLZ) */
584
585 @ Division loop
586 1: cmp \dividend, \divisor
587 do_it hs, t
588 subhs \dividend, \dividend, \divisor
589 orrhs \result, \result, \curbit
590 cmp \dividend, \divisor, lsr #1
591 do_it hs, t
592 subhs \dividend, \dividend, \divisor, lsr #1
593 orrhs \result, \result, \curbit, lsr #1
594 cmp \dividend, \divisor, lsr #2
595 do_it hs, t
596 subhs \dividend, \dividend, \divisor, lsr #2
597 orrhs \result, \result, \curbit, lsr #2
598 cmp \dividend, \divisor, lsr #3
599 do_it hs, t
600 subhs \dividend, \dividend, \divisor, lsr #3
601 orrhs \result, \result, \curbit, lsr #3
602 cmp \dividend, #0 @ Early termination?
603 do_it ne, t
604 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
605 movne \divisor, \divisor, lsr #4
606 bne 1b
607
608 #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
609
610 .endm
611 /* ------------------------------------------------------------------------ */
612 .macro ARM_DIV2_ORDER divisor, order
613
614 #if defined (__ARM_FEATURE_CLZ)
615
616 clz \order, \divisor
617 rsb \order, \order, #31
618
619 #else
620
621 cmp \divisor, #(1 << 16)
622 movhs \divisor, \divisor, lsr #16
623 movhs \order, #16
624 movlo \order, #0
625
626 cmp \divisor, #(1 << 8)
627 movhs \divisor, \divisor, lsr #8
628 addhs \order, \order, #8
629
630 cmp \divisor, #(1 << 4)
631 movhs \divisor, \divisor, lsr #4
632 addhs \order, \order, #4
633
634 cmp \divisor, #(1 << 2)
635 addhi \order, \order, #3
636 addls \order, \order, \divisor, lsr #1
637
638 #endif
639
640 .endm
641 /* ------------------------------------------------------------------------ */
642 .macro ARM_MOD_BODY dividend, divisor, order, spare
643
644 #if defined(__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
645
646 clz \order, \divisor
647 clz \spare, \dividend
648 sub \order, \order, \spare
649 rsbs \order, \order, #31
650 addne pc, pc, \order, lsl #3
651 nop
652 .set shift, 32
653 .rept 32
654 .set shift, shift - 1
655 cmp \dividend, \divisor, lsl #shift
656 subcs \dividend, \dividend, \divisor, lsl #shift
657 .endr
658
659 #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
660 #if defined (__ARM_FEATURE_CLZ)
661
662 clz \order, \divisor
663 clz \spare, \dividend
664 sub \order, \order, \spare
665 mov \divisor, \divisor, lsl \order
666
667 #else /* !defined (__ARM_FEATURE_CLZ) */
668
669 mov \order, #0
670
671 @ Unless the divisor is very big, shift it up in multiples of
672 @ four bits, since this is the amount of unwinding in the main
673 @ division loop. Continue shifting until the divisor is
674 @ larger than the dividend.
675 1: cmp \divisor, #0x10000000
676 cmplo \divisor, \dividend
677 movlo \divisor, \divisor, lsl #4
678 addlo \order, \order, #4
679 blo 1b
680
681 @ For very big divisors, we must shift it a bit at a time, or
682 @ we will be in danger of overflowing.
683 1: cmp \divisor, #0x80000000
684 cmplo \divisor, \dividend
685 movlo \divisor, \divisor, lsl #1
686 addlo \order, \order, #1
687 blo 1b
688
689 #endif /* !defined (__ARM_FEATURE_CLZ) */
690
691 @ Perform all needed substractions to keep only the reminder.
692 @ Do comparisons in batch of 4 first.
693 subs \order, \order, #3 @ yes, 3 is intended here
694 blt 2f
695
696 1: cmp \dividend, \divisor
697 subhs \dividend, \dividend, \divisor
698 cmp \dividend, \divisor, lsr #1
699 subhs \dividend, \dividend, \divisor, lsr #1
700 cmp \dividend, \divisor, lsr #2
701 subhs \dividend, \dividend, \divisor, lsr #2
702 cmp \dividend, \divisor, lsr #3
703 subhs \dividend, \dividend, \divisor, lsr #3
704 cmp \dividend, #1
705 mov \divisor, \divisor, lsr #4
706 subges \order, \order, #4
707 bge 1b
708
709 tst \order, #3
710 teqne \dividend, #0
711 beq 5f
712
713 @ Either 1, 2 or 3 comparison/substractions are left.
714 2: cmn \order, #2
715 blt 4f
716 beq 3f
717 cmp \dividend, \divisor
718 subhs \dividend, \dividend, \divisor
719 mov \divisor, \divisor, lsr #1
720 3: cmp \dividend, \divisor
721 subhs \dividend, \dividend, \divisor
722 mov \divisor, \divisor, lsr #1
723 4: cmp \dividend, \divisor
724 subhs \dividend, \dividend, \divisor
725 5:
726
727 #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
728
729 .endm
730 /* ------------------------------------------------------------------------ */
731 .macro THUMB_DIV_MOD_BODY modulo
732 @ Load the constant 0x10000000 into our work register.
733 movs work, #1
734 lsls work, #28
735 LSYM(Loop1):
736 @ Unless the divisor is very big, shift it up in multiples of
737 @ four bits, since this is the amount of unwinding in the main
738 @ division loop. Continue shifting until the divisor is
739 @ larger than the dividend.
740 cmp divisor, work
741 bhs LSYM(Lbignum)
742 cmp divisor, dividend
743 bhs LSYM(Lbignum)
744 lsls divisor, #4
745 lsls curbit, #4
746 b LSYM(Loop1)
747 LSYM(Lbignum):
748 @ Set work to 0x80000000
749 lsls work, #3
750 LSYM(Loop2):
751 @ For very big divisors, we must shift it a bit at a time, or
752 @ we will be in danger of overflowing.
753 cmp divisor, work
754 bhs LSYM(Loop3)
755 cmp divisor, dividend
756 bhs LSYM(Loop3)
757 lsls divisor, #1
758 lsls curbit, #1
759 b LSYM(Loop2)
760 LSYM(Loop3):
761 @ Test for possible subtractions ...
762 .if \modulo
763 @ ... On the final pass, this may subtract too much from the dividend,
764 @ so keep track of which subtractions are done, we can fix them up
765 @ afterwards.
766 movs overdone, #0
767 cmp dividend, divisor
768 blo LSYM(Lover1)
769 subs dividend, dividend, divisor
770 LSYM(Lover1):
771 lsrs work, divisor, #1
772 cmp dividend, work
773 blo LSYM(Lover2)
774 subs dividend, dividend, work
775 mov ip, curbit
776 movs work, #1
777 rors curbit, work
778 orrs overdone, curbit
779 mov curbit, ip
780 LSYM(Lover2):
781 lsrs work, divisor, #2
782 cmp dividend, work
783 blo LSYM(Lover3)
784 subs dividend, dividend, work
785 mov ip, curbit
786 movs work, #2
787 rors curbit, work
788 orrs overdone, curbit
789 mov curbit, ip
790 LSYM(Lover3):
791 lsrs work, divisor, #3
792 cmp dividend, work
793 blo LSYM(Lover4)
794 subs dividend, dividend, work
795 mov ip, curbit
796 movs work, #3
797 rors curbit, work
798 orrs overdone, curbit
799 mov curbit, ip
800 LSYM(Lover4):
801 mov ip, curbit
802 .else
803 @ ... and note which bits are done in the result. On the final pass,
804 @ this may subtract too much from the dividend, but the result will be ok,
805 @ since the "bit" will have been shifted out at the bottom.
806 cmp dividend, divisor
807 blo LSYM(Lover1)
808 subs dividend, dividend, divisor
809 orrs result, result, curbit
810 LSYM(Lover1):
811 lsrs work, divisor, #1
812 cmp dividend, work
813 blo LSYM(Lover2)
814 subs dividend, dividend, work
815 lsrs work, curbit, #1
816 orrs result, work
817 LSYM(Lover2):
818 lsrs work, divisor, #2
819 cmp dividend, work
820 blo LSYM(Lover3)
821 subs dividend, dividend, work
822 lsrs work, curbit, #2
823 orrs result, work
824 LSYM(Lover3):
825 lsrs work, divisor, #3
826 cmp dividend, work
827 blo LSYM(Lover4)
828 subs dividend, dividend, work
829 lsrs work, curbit, #3
830 orrs result, work
831 LSYM(Lover4):
832 .endif
833
834 cmp dividend, #0 @ Early termination?
835 beq LSYM(Lover5)
836 lsrs curbit, #4 @ No, any more bits to do?
837 beq LSYM(Lover5)
838 lsrs divisor, #4
839 b LSYM(Loop3)
840 LSYM(Lover5):
841 .if \modulo
842 @ Any subtractions that we should not have done will be recorded in
843 @ the top three bits of "overdone". Exactly which were not needed
844 @ are governed by the position of the bit, stored in ip.
845 movs work, #0xe
846 lsls work, #28
847 ands overdone, work
848 beq LSYM(Lgot_result)
849
850 @ If we terminated early, because dividend became zero, then the
851 @ bit in ip will not be in the bottom nibble, and we should not
852 @ perform the additions below. We must test for this though
853 @ (rather relying upon the TSTs to prevent the additions) since
854 @ the bit in ip could be in the top two bits which might then match
855 @ with one of the smaller RORs.
856 mov curbit, ip
857 movs work, #0x7
858 tst curbit, work
859 beq LSYM(Lgot_result)
860
861 mov curbit, ip
862 movs work, #3
863 rors curbit, work
864 tst overdone, curbit
865 beq LSYM(Lover6)
866 lsrs work, divisor, #3
867 adds dividend, work
868 LSYM(Lover6):
869 mov curbit, ip
870 movs work, #2
871 rors curbit, work
872 tst overdone, curbit
873 beq LSYM(Lover7)
874 lsrs work, divisor, #2
875 adds dividend, work
876 LSYM(Lover7):
877 mov curbit, ip
878 movs work, #1
879 rors curbit, work
880 tst overdone, curbit
881 beq LSYM(Lgot_result)
882 lsrs work, divisor, #1
883 adds dividend, work
884 .endif
885 LSYM(Lgot_result):
886 .endm
887
888 /* If performance is preferred, the following functions are provided. */
889 #if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
890
891 /* Branch to div(n), and jump to label if curbit is lo than divisior. */
892 .macro BranchToDiv n, label
893 lsrs curbit, dividend, \n
894 cmp curbit, divisor
895 blo \label
896 .endm
897
898 /* Body of div(n). Shift the divisor in n bits and compare the divisor
899 and dividend. Update the dividend as the substruction result. */
900 .macro DoDiv n
901 lsrs curbit, dividend, \n
902 cmp curbit, divisor
903 bcc 1f
904 lsls curbit, divisor, \n
905 subs dividend, dividend, curbit
906
907 1: adcs result, result
908 .endm
909
910 /* The body of division with positive divisor. Unless the divisor is very
911 big, shift it up in multiples of four bits, since this is the amount of
912 unwinding in the main division loop. Continue shifting until the divisor
913 is larger than the dividend. */
914 .macro THUMB1_Div_Positive
915 movs result, #0
916 BranchToDiv #1, LSYM(Lthumb1_div1)
917 BranchToDiv #4, LSYM(Lthumb1_div4)
918 BranchToDiv #8, LSYM(Lthumb1_div8)
919 BranchToDiv #12, LSYM(Lthumb1_div12)
920 BranchToDiv #16, LSYM(Lthumb1_div16)
921 LSYM(Lthumb1_div_large_positive):
922 movs result, #0xff
923 lsls divisor, divisor, #8
924 rev result, result
925 lsrs curbit, dividend, #16
926 cmp curbit, divisor
927 blo 1f
928 asrs result, #8
929 lsls divisor, divisor, #8
930 beq LSYM(Ldivbyzero_waypoint)
931
932 1: lsrs curbit, dividend, #12
933 cmp curbit, divisor
934 blo LSYM(Lthumb1_div12)
935 b LSYM(Lthumb1_div16)
936 LSYM(Lthumb1_div_loop):
937 lsrs divisor, divisor, #8
938 LSYM(Lthumb1_div16):
939 Dodiv #15
940 Dodiv #14
941 Dodiv #13
942 Dodiv #12
943 LSYM(Lthumb1_div12):
944 Dodiv #11
945 Dodiv #10
946 Dodiv #9
947 Dodiv #8
948 bcs LSYM(Lthumb1_div_loop)
949 LSYM(Lthumb1_div8):
950 Dodiv #7
951 Dodiv #6
952 Dodiv #5
953 LSYM(Lthumb1_div5):
954 Dodiv #4
955 LSYM(Lthumb1_div4):
956 Dodiv #3
957 LSYM(Lthumb1_div3):
958 Dodiv #2
959 LSYM(Lthumb1_div2):
960 Dodiv #1
961 LSYM(Lthumb1_div1):
962 subs divisor, dividend, divisor
963 bcs 1f
964 cpy divisor, dividend
965
966 1: adcs result, result
967 cpy dividend, result
968 RET
969
970 LSYM(Ldivbyzero_waypoint):
971 b LSYM(Ldiv0)
972 .endm
973
974 /* The body of division with negative divisor. Similar with
975 THUMB1_Div_Positive except that the shift steps are in multiples
976 of six bits. */
977 .macro THUMB1_Div_Negative
978 lsrs result, divisor, #31
979 beq 1f
980 negs divisor, divisor
981
982 1: asrs curbit, dividend, #32
983 bcc 2f
984 negs dividend, dividend
985
986 2: eors curbit, result
987 movs result, #0
988 cpy ip, curbit
989 BranchToDiv #4, LSYM(Lthumb1_div_negative4)
990 BranchToDiv #8, LSYM(Lthumb1_div_negative8)
991 LSYM(Lthumb1_div_large):
992 movs result, #0xfc
993 lsls divisor, divisor, #6
994 rev result, result
995 lsrs curbit, dividend, #8
996 cmp curbit, divisor
997 blo LSYM(Lthumb1_div_negative8)
998
999 lsls divisor, divisor, #6
1000 asrs result, result, #6
1001 cmp curbit, divisor
1002 blo LSYM(Lthumb1_div_negative8)
1003
1004 lsls divisor, divisor, #6
1005 asrs result, result, #6
1006 cmp curbit, divisor
1007 blo LSYM(Lthumb1_div_negative8)
1008
1009 lsls divisor, divisor, #6
1010 beq LSYM(Ldivbyzero_negative)
1011 asrs result, result, #6
1012 b LSYM(Lthumb1_div_negative8)
1013 LSYM(Lthumb1_div_negative_loop):
1014 lsrs divisor, divisor, #6
1015 LSYM(Lthumb1_div_negative8):
1016 DoDiv #7
1017 DoDiv #6
1018 DoDiv #5
1019 DoDiv #4
1020 LSYM(Lthumb1_div_negative4):
1021 DoDiv #3
1022 DoDiv #2
1023 bcs LSYM(Lthumb1_div_negative_loop)
1024 DoDiv #1
1025 subs divisor, dividend, divisor
1026 bcs 1f
1027 cpy divisor, dividend
1028
1029 1: cpy curbit, ip
1030 adcs result, result
1031 asrs curbit, curbit, #1
1032 cpy dividend, result
1033 bcc 2f
1034 negs dividend, dividend
1035 cmp curbit, #0
1036
1037 2: bpl 3f
1038 negs divisor, divisor
1039
1040 3: RET
1041
1042 LSYM(Ldivbyzero_negative):
1043 cpy curbit, ip
1044 asrs curbit, curbit, #1
1045 bcc LSYM(Ldiv0)
1046 negs dividend, dividend
1047 .endm
1048 #endif /* ARM Thumb version. */
1049
1050 /* ------------------------------------------------------------------------ */
1051 /* Start of the Real Functions */
1052 /* ------------------------------------------------------------------------ */
1053 #ifdef L_udivsi3
1054
1055 #if defined(__prefer_thumb__)
1056
1057 FUNC_START udivsi3
1058 FUNC_ALIAS aeabi_uidiv udivsi3
1059 #if defined(__OPTIMIZE_SIZE__)
1060
1061 cmp divisor, #0
1062 beq LSYM(Ldiv0)
1063 LSYM(udivsi3_skip_div0_test):
1064 movs curbit, #1
1065 movs result, #0
1066
1067 push { work }
1068 cmp dividend, divisor
1069 blo LSYM(Lgot_result)
1070
1071 THUMB_DIV_MOD_BODY 0
1072
1073 movs r0, result
1074 pop { work }
1075 RET
1076
1077 /* Implementation of aeabi_uidiv for ARMv6m. This version is only
1078 used in ARMv6-M when we need an efficient implementation. */
1079 #else
1080 LSYM(udivsi3_skip_div0_test):
1081 THUMB1_Div_Positive
1082
1083 #endif /* __OPTIMIZE_SIZE__ */
1084
1085 #elif defined(__ARM_ARCH_EXT_IDIV__)
1086
1087 ARM_FUNC_START udivsi3
1088 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1089
1090 cmp r1, #0
1091 beq LSYM(Ldiv0)
1092
1093 udiv r0, r0, r1
1094 RET
1095
1096 #else /* ARM version/Thumb-2. */
1097
1098 ARM_FUNC_START udivsi3
1099 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1100
1101 /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
1102 check for division-by-zero a second time. */
1103 LSYM(udivsi3_skip_div0_test):
1104 subs r2, r1, #1
1105 do_it eq
1106 RETc(eq)
1107 bcc LSYM(Ldiv0)
1108 cmp r0, r1
1109 bls 11f
1110 tst r1, r2
1111 beq 12f
1112
1113 ARM_DIV_BODY r0, r1, r2, r3
1114
1115 mov r0, r2
1116 RET
1117
1118 11: do_it eq, e
1119 moveq r0, #1
1120 movne r0, #0
1121 RET
1122
1123 12: ARM_DIV2_ORDER r1, r2
1124
1125 mov r0, r0, lsr r2
1126 RET
1127
1128 #endif /* ARM version */
1129
1130 DIV_FUNC_END udivsi3 unsigned
1131
1132 #if defined(__prefer_thumb__)
1133 FUNC_START aeabi_uidivmod
1134 cmp r1, #0
1135 beq LSYM(Ldiv0)
1136 # if defined(__OPTIMIZE_SIZE__)
1137 push {r0, r1, lr}
1138 bl LSYM(udivsi3_skip_div0_test)
1139 POP {r1, r2, r3}
1140 muls r2, r0
1141 subs r1, r1, r2
1142 bx r3
1143 # else
1144 /* Both the quotient and remainder are calculated simultaneously
1145 in THUMB1_Div_Positive. There is no need to calculate the
1146 remainder again here. */
1147 b LSYM(udivsi3_skip_div0_test)
1148 RET
1149 # endif /* __OPTIMIZE_SIZE__ */
1150
1151 #elif defined(__ARM_ARCH_EXT_IDIV__)
1152 ARM_FUNC_START aeabi_uidivmod
1153 cmp r1, #0
1154 beq LSYM(Ldiv0)
1155 mov r2, r0
1156 udiv r0, r0, r1
1157 mls r1, r0, r1, r2
1158 RET
1159 #else
1160 ARM_FUNC_START aeabi_uidivmod
1161 cmp r1, #0
1162 beq LSYM(Ldiv0)
1163 stmfd sp!, { r0, r1, lr }
1164 bl LSYM(udivsi3_skip_div0_test)
1165 ldmfd sp!, { r1, r2, lr }
1166 mul r3, r2, r0
1167 sub r1, r1, r3
1168 RET
1169 #endif
1170 FUNC_END aeabi_uidivmod
1171
1172 #endif /* L_udivsi3 */
1173 /* ------------------------------------------------------------------------ */
1174 #ifdef L_umodsi3
1175
1176 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1177
1178 ARM_FUNC_START umodsi3
1179
1180 cmp r1, #0
1181 beq LSYM(Ldiv0)
1182 udiv r2, r0, r1
1183 mls r0, r1, r2, r0
1184 RET
1185
1186 #elif defined(__thumb__)
1187
1188 FUNC_START umodsi3
1189
1190 cmp divisor, #0
1191 beq LSYM(Ldiv0)
1192 movs curbit, #1
1193 cmp dividend, divisor
1194 bhs LSYM(Lover10)
1195 RET
1196
1197 LSYM(Lover10):
1198 push { work }
1199
1200 THUMB_DIV_MOD_BODY 1
1201
1202 pop { work }
1203 RET
1204
1205 #else /* ARM version. */
1206
1207 FUNC_START umodsi3
1208
1209 subs r2, r1, #1 @ compare divisor with 1
1210 bcc LSYM(Ldiv0)
1211 cmpne r0, r1 @ compare dividend with divisor
1212 moveq r0, #0
1213 tsthi r1, r2 @ see if divisor is power of 2
1214 andeq r0, r0, r2
1215 RETc(ls)
1216
1217 ARM_MOD_BODY r0, r1, r2, r3
1218
1219 RET
1220
1221 #endif /* ARM version. */
1222
1223 DIV_FUNC_END umodsi3 unsigned
1224
1225 #endif /* L_umodsi3 */
1226 /* ------------------------------------------------------------------------ */
1227 #ifdef L_divsi3
1228
1229 #if defined(__prefer_thumb__)
1230
1231 FUNC_START divsi3
1232 FUNC_ALIAS aeabi_idiv divsi3
1233 #if defined(__OPTIMIZE_SIZE__)
1234
1235 cmp divisor, #0
1236 beq LSYM(Ldiv0)
1237 LSYM(divsi3_skip_div0_test):
1238 push { work }
1239 movs work, dividend
1240 eors work, divisor @ Save the sign of the result.
1241 mov ip, work
1242 movs curbit, #1
1243 movs result, #0
1244 cmp divisor, #0
1245 bpl LSYM(Lover10)
1246 negs divisor, divisor @ Loops below use unsigned.
1247 LSYM(Lover10):
1248 cmp dividend, #0
1249 bpl LSYM(Lover11)
1250 negs dividend, dividend
1251 LSYM(Lover11):
1252 cmp dividend, divisor
1253 blo LSYM(Lgot_result)
1254
1255 THUMB_DIV_MOD_BODY 0
1256
1257 movs r0, result
1258 mov work, ip
1259 cmp work, #0
1260 bpl LSYM(Lover12)
1261 negs r0, r0
1262 LSYM(Lover12):
1263 pop { work }
1264 RET
1265
1266 /* Implementation of aeabi_idiv for ARMv6m. This version is only
1267 used in ARMv6-M when we need an efficient implementation. */
1268 #else
1269 LSYM(divsi3_skip_div0_test):
1270 cpy curbit, dividend
1271 orrs curbit, divisor
1272 bmi LSYM(Lthumb1_div_negative)
1273
1274 LSYM(Lthumb1_div_positive):
1275 THUMB1_Div_Positive
1276
1277 LSYM(Lthumb1_div_negative):
1278 THUMB1_Div_Negative
1279
1280 #endif /* __OPTIMIZE_SIZE__ */
1281
1282 #elif defined(__ARM_ARCH_EXT_IDIV__)
1283
1284 ARM_FUNC_START divsi3
1285 ARM_FUNC_ALIAS aeabi_idiv divsi3
1286
1287 cmp r1, #0
1288 beq LSYM(Ldiv0)
1289 sdiv r0, r0, r1
1290 RET
1291
1292 #else /* ARM/Thumb-2 version. */
1293
1294 ARM_FUNC_START divsi3
1295 ARM_FUNC_ALIAS aeabi_idiv divsi3
1296
1297 cmp r1, #0
1298 beq LSYM(Ldiv0)
1299 LSYM(divsi3_skip_div0_test):
1300 eor ip, r0, r1 @ save the sign of the result.
1301 do_it mi
1302 rsbmi r1, r1, #0 @ loops below use unsigned.
1303 subs r2, r1, #1 @ division by 1 or -1 ?
1304 beq 10f
1305 movs r3, r0
1306 do_it mi
1307 rsbmi r3, r0, #0 @ positive dividend value
1308 cmp r3, r1
1309 bls 11f
1310 tst r1, r2 @ divisor is power of 2 ?
1311 beq 12f
1312
1313 ARM_DIV_BODY r3, r1, r0, r2
1314
1315 cmp ip, #0
1316 do_it mi
1317 rsbmi r0, r0, #0
1318 RET
1319
1320 10: teq ip, r0 @ same sign ?
1321 do_it mi
1322 rsbmi r0, r0, #0
1323 RET
1324
1325 11: do_it lo
1326 movlo r0, #0
1327 do_it eq,t
1328 moveq r0, ip, asr #31
1329 orreq r0, r0, #1
1330 RET
1331
1332 12: ARM_DIV2_ORDER r1, r2
1333
1334 cmp ip, #0
1335 mov r0, r3, lsr r2
1336 do_it mi
1337 rsbmi r0, r0, #0
1338 RET
1339
1340 #endif /* ARM version */
1341
1342 DIV_FUNC_END divsi3 signed
1343
1344 #if defined(__prefer_thumb__)
1345 FUNC_START aeabi_idivmod
1346 cmp r1, #0
1347 beq LSYM(Ldiv0)
1348 # if defined(__OPTIMIZE_SIZE__)
1349 push {r0, r1, lr}
1350 bl LSYM(divsi3_skip_div0_test)
1351 POP {r1, r2, r3}
1352 muls r2, r0
1353 subs r1, r1, r2
1354 bx r3
1355 # else
1356 /* Both the quotient and remainder are calculated simultaneously
1357 in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no
1358 need to calculate the remainder again here. */
1359 b LSYM(divsi3_skip_div0_test)
1360 RET
1361 # endif /* __OPTIMIZE_SIZE__ */
1362
1363 #elif defined(__ARM_ARCH_EXT_IDIV__)
1364 ARM_FUNC_START aeabi_idivmod
1365 cmp r1, #0
1366 beq LSYM(Ldiv0)
1367 mov r2, r0
1368 sdiv r0, r0, r1
1369 mls r1, r0, r1, r2
1370 RET
1371 #else
1372 ARM_FUNC_START aeabi_idivmod
1373 cmp r1, #0
1374 beq LSYM(Ldiv0)
1375 stmfd sp!, { r0, r1, lr }
1376 bl LSYM(divsi3_skip_div0_test)
1377 ldmfd sp!, { r1, r2, lr }
1378 mul r3, r2, r0
1379 sub r1, r1, r3
1380 RET
1381 #endif
1382 FUNC_END aeabi_idivmod
1383
1384 #endif /* L_divsi3 */
1385 /* ------------------------------------------------------------------------ */
1386 #ifdef L_modsi3
1387
1388 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1389
1390 ARM_FUNC_START modsi3
1391
1392 cmp r1, #0
1393 beq LSYM(Ldiv0)
1394
1395 sdiv r2, r0, r1
1396 mls r0, r1, r2, r0
1397 RET
1398
1399 #elif defined(__thumb__)
1400
1401 FUNC_START modsi3
1402
1403 movs curbit, #1
1404 cmp divisor, #0
1405 beq LSYM(Ldiv0)
1406 bpl LSYM(Lover10)
1407 negs divisor, divisor @ Loops below use unsigned.
1408 LSYM(Lover10):
1409 push { work }
1410 @ Need to save the sign of the dividend, unfortunately, we need
1411 @ work later on. Must do this after saving the original value of
1412 @ the work register, because we will pop this value off first.
1413 push { dividend }
1414 cmp dividend, #0
1415 bpl LSYM(Lover11)
1416 negs dividend, dividend
1417 LSYM(Lover11):
1418 cmp dividend, divisor
1419 blo LSYM(Lgot_result)
1420
1421 THUMB_DIV_MOD_BODY 1
1422
1423 pop { work }
1424 cmp work, #0
1425 bpl LSYM(Lover12)
1426 negs dividend, dividend
1427 LSYM(Lover12):
1428 pop { work }
1429 RET
1430
1431 #else /* ARM version. */
1432
1433 FUNC_START modsi3
1434
1435 cmp r1, #0
1436 beq LSYM(Ldiv0)
1437 rsbmi r1, r1, #0 @ loops below use unsigned.
1438 movs ip, r0 @ preserve sign of dividend
1439 rsbmi r0, r0, #0 @ if negative make positive
1440 subs r2, r1, #1 @ compare divisor with 1
1441 cmpne r0, r1 @ compare dividend with divisor
1442 moveq r0, #0
1443 tsthi r1, r2 @ see if divisor is power of 2
1444 andeq r0, r0, r2
1445 bls 10f
1446
1447 ARM_MOD_BODY r0, r1, r2, r3
1448
1449 10: cmp ip, #0
1450 rsbmi r0, r0, #0
1451 RET
1452
1453 #endif /* ARM version */
1454
1455 DIV_FUNC_END modsi3 signed
1456
1457 #endif /* L_modsi3 */
1458 /* ------------------------------------------------------------------------ */
1459 #ifdef L_dvmd_tls
1460
1461 #ifdef __ARM_EABI__
1462 WEAK aeabi_idiv0
1463 WEAK aeabi_ldiv0
1464 FUNC_START aeabi_idiv0
1465 FUNC_START aeabi_ldiv0
1466 RET
1467 FUNC_END aeabi_ldiv0
1468 FUNC_END aeabi_idiv0
1469 #else
1470 FUNC_START div0
1471 RET
1472 FUNC_END div0
1473 #endif
1474
1475 #endif /* L_divmodsi_tools */
1476 /* ------------------------------------------------------------------------ */
1477 #ifdef L_dvmd_lnx
1478 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
1479
1480 /* Constant taken from <asm/signal.h>. */
1481 #define SIGFPE 8
1482
1483 #ifdef __ARM_EABI__
1484 cfi_start __aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0)
1485 WEAK aeabi_idiv0
1486 WEAK aeabi_ldiv0
1487 ARM_FUNC_START aeabi_idiv0
1488 ARM_FUNC_START aeabi_ldiv0
1489 do_push {r1, lr}
1490 98: cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8
1491 #else
1492 cfi_start __div0, LSYM(Lend_div0)
1493 ARM_FUNC_START div0
1494 do_push {r1, lr}
1495 98: cfi_push 98b - __div0, 0xe, -0x4, 0x8
1496 #endif
1497
1498 mov r0, #SIGFPE
1499 bl SYM(raise) __PLT__
1500 RETLDM r1 unwind=98b
1501
1502 #ifdef __ARM_EABI__
1503 cfi_end LSYM(Lend_aeabi_ldiv0)
1504 FUNC_END aeabi_ldiv0
1505 FUNC_END aeabi_idiv0
1506 #else
1507 cfi_end LSYM(Lend_div0)
1508 FUNC_END div0
1509 #endif
1510
1511 #endif /* L_dvmd_lnx */
1512 #ifdef L_clear_cache
1513 #if defined __ARM_EABI__ && defined __linux__
1514 @ EABI GNU/Linux call to cacheflush syscall.
1515 ARM_FUNC_START clear_cache
1516 do_push {r7}
1517 #if __ARM_ARCH >= 7 || defined(__ARM_ARCH_6T2__)
1518 movw r7, #2
1519 movt r7, #0xf
1520 #else
1521 mov r7, #0xf0000
1522 add r7, r7, #2
1523 #endif
1524 mov r2, #0
1525 swi 0
1526 do_pop {r7}
1527 RET
1528 FUNC_END clear_cache
1529 #else
1530 #error "This is only for ARM EABI GNU/Linux"
1531 #endif
1532 #endif /* L_clear_cache */
1533
1534 #ifdef L_speculation_barrier
1535 FUNC_START speculation_barrier
1536 #if __ARM_ARCH >= 7
1537 isb
1538 dsb sy
1539 #elif defined __ARM_EABI__ && defined __linux__
1540 /* We don't have a speculation barrier directly for this
1541 platform/architecture variant. But we can use a kernel
1542 clear_cache service routine which will emit such instructions
1543 if run on a later version of the architecture. We don't
1544 really want to flush the cache, but we must give it a valid
1545 address, so just clear pc..pc+1. */
1546 #if defined __thumb__ && !defined __thumb2__
1547 push {r7}
1548 movs r7, #0xf
1549 lsls r7, #16
1550 adds r7, #2
1551 adr r0, . + 4
1552 adds r1, r0, #1
1553 movs r2, #0
1554 svc 0
1555 pop {r7}
1556 #else
1557 do_push {r7}
1558 #ifdef __ARM_ARCH_6T2__
1559 movw r7, #2
1560 movt r7, #0xf
1561 #else
1562 mov r7, #0xf0000
1563 add r7, r7, #2
1564 #endif
1565 add r0, pc, #0 /* ADR. */
1566 add r1, r0, #1
1567 mov r2, #0
1568 svc 0
1569 do_pop {r7}
1570 #endif /* Thumb1 only */
1571 #else
1572 #warning "No speculation barrier defined for this platform"
1573 #endif
1574 RET
1575 FUNC_END speculation_barrier
1576 #endif
1577 /* ------------------------------------------------------------------------ */
1578 /* Dword shift operations. */
1579 /* All the following Dword shift variants rely on the fact that
1580 shft xxx, Reg
1581 is in fact done as
1582 shft xxx, (Reg & 255)
1583 so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1584 case of logical shifts) or the sign (for asr). */
1585
1586 #ifdef __ARMEB__
1587 #define al r1
1588 #define ah r0
1589 #else
1590 #define al r0
1591 #define ah r1
1592 #endif
1593
1594 /* Prevent __aeabi double-word shifts from being produced on SymbianOS. */
1595 #ifndef __symbian__
1596
1597 #ifdef L_lshrdi3
1598
1599 FUNC_START lshrdi3
1600 FUNC_ALIAS aeabi_llsr lshrdi3
1601
1602 #ifdef __thumb__
1603 lsrs al, r2
1604 movs r3, ah
1605 lsrs ah, r2
1606 mov ip, r3
1607 subs r2, #32
1608 lsrs r3, r2
1609 orrs al, r3
1610 negs r2, r2
1611 mov r3, ip
1612 lsls r3, r2
1613 orrs al, r3
1614 RET
1615 #else
1616 subs r3, r2, #32
1617 rsb ip, r2, #32
1618 movmi al, al, lsr r2
1619 movpl al, ah, lsr r3
1620 orrmi al, al, ah, lsl ip
1621 mov ah, ah, lsr r2
1622 RET
1623 #endif
1624 FUNC_END aeabi_llsr
1625 FUNC_END lshrdi3
1626
1627 #endif
1628
1629 #ifdef L_ashrdi3
1630
1631 FUNC_START ashrdi3
1632 FUNC_ALIAS aeabi_lasr ashrdi3
1633
1634 #ifdef __thumb__
1635 lsrs al, r2
1636 movs r3, ah
1637 asrs ah, r2
1638 subs r2, #32
1639 @ If r2 is negative at this point the following step would OR
1640 @ the sign bit into all of AL. That's not what we want...
1641 bmi 1f
1642 mov ip, r3
1643 asrs r3, r2
1644 orrs al, r3
1645 mov r3, ip
1646 1:
1647 negs r2, r2
1648 lsls r3, r2
1649 orrs al, r3
1650 RET
1651 #else
1652 subs r3, r2, #32
1653 rsb ip, r2, #32
1654 movmi al, al, lsr r2
1655 movpl al, ah, asr r3
1656 orrmi al, al, ah, lsl ip
1657 mov ah, ah, asr r2
1658 RET
1659 #endif
1660
1661 FUNC_END aeabi_lasr
1662 FUNC_END ashrdi3
1663
1664 #endif
1665
1666 #ifdef L_ashldi3
1667
1668 FUNC_START ashldi3
1669 FUNC_ALIAS aeabi_llsl ashldi3
1670
1671 #ifdef __thumb__
1672 lsls ah, r2
1673 movs r3, al
1674 lsls al, r2
1675 mov ip, r3
1676 subs r2, #32
1677 lsls r3, r2
1678 orrs ah, r3
1679 negs r2, r2
1680 mov r3, ip
1681 lsrs r3, r2
1682 orrs ah, r3
1683 RET
1684 #else
1685 subs r3, r2, #32
1686 rsb ip, r2, #32
1687 movmi ah, ah, lsl r2
1688 movpl ah, al, lsl r3
1689 orrmi ah, ah, al, lsr ip
1690 mov al, al, lsl r2
1691 RET
1692 #endif
1693 FUNC_END aeabi_llsl
1694 FUNC_END ashldi3
1695
1696 #endif
1697
1698 #endif /* __symbian__ */
1699
1700 #ifdef L_clzsi2
1701 #ifdef NOT_ISA_TARGET_32BIT
1702 FUNC_START clzsi2
1703 movs r1, #28
1704 movs r3, #1
1705 lsls r3, r3, #16
1706 cmp r0, r3 /* 0x10000 */
1707 bcc 2f
1708 lsrs r0, r0, #16
1709 subs r1, r1, #16
1710 2: lsrs r3, r3, #8
1711 cmp r0, r3 /* #0x100 */
1712 bcc 2f
1713 lsrs r0, r0, #8
1714 subs r1, r1, #8
1715 2: lsrs r3, r3, #4
1716 cmp r0, r3 /* #0x10 */
1717 bcc 2f
1718 lsrs r0, r0, #4
1719 subs r1, r1, #4
1720 2: adr r2, 1f
1721 ldrb r0, [r2, r0]
1722 adds r0, r0, r1
1723 bx lr
1724 .align 2
1725 1:
1726 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1727 FUNC_END clzsi2
1728 #else
1729 ARM_FUNC_START clzsi2
1730 # if defined (__ARM_FEATURE_CLZ)
1731 clz r0, r0
1732 RET
1733 # else
1734 mov r1, #28
1735 cmp r0, #0x10000
1736 do_it cs, t
1737 movcs r0, r0, lsr #16
1738 subcs r1, r1, #16
1739 cmp r0, #0x100
1740 do_it cs, t
1741 movcs r0, r0, lsr #8
1742 subcs r1, r1, #8
1743 cmp r0, #0x10
1744 do_it cs, t
1745 movcs r0, r0, lsr #4
1746 subcs r1, r1, #4
1747 adr r2, 1f
1748 ldrb r0, [r2, r0]
1749 add r0, r0, r1
1750 RET
1751 .align 2
1752 1:
1753 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1754 # endif /* !defined (__ARM_FEATURE_CLZ) */
1755 FUNC_END clzsi2
1756 #endif
1757 #endif /* L_clzsi2 */
1758
1759 #ifdef L_clzdi2
1760 #if !defined (__ARM_FEATURE_CLZ)
1761
1762 # ifdef NOT_ISA_TARGET_32BIT
1763 FUNC_START clzdi2
1764 push {r4, lr}
1765 cmp xxh, #0
1766 bne 1f
1767 # ifdef __ARMEB__
1768 movs r0, xxl
1769 bl __clzsi2
1770 adds r0, r0, #32
1771 b 2f
1772 1:
1773 bl __clzsi2
1774 # else
1775 bl __clzsi2
1776 adds r0, r0, #32
1777 b 2f
1778 1:
1779 movs r0, xxh
1780 bl __clzsi2
1781 # endif
1782 2:
1783 pop {r4, pc}
1784 # else /* NOT_ISA_TARGET_32BIT */
1785 ARM_FUNC_START clzdi2
1786 do_push {r4, lr}
1787 cmp xxh, #0
1788 bne 1f
1789 # ifdef __ARMEB__
1790 mov r0, xxl
1791 bl __clzsi2
1792 add r0, r0, #32
1793 b 2f
1794 1:
1795 bl __clzsi2
1796 # else
1797 bl __clzsi2
1798 add r0, r0, #32
1799 b 2f
1800 1:
1801 mov r0, xxh
1802 bl __clzsi2
1803 # endif
1804 2:
1805 RETLDM r4
1806 FUNC_END clzdi2
1807 # endif /* NOT_ISA_TARGET_32BIT */
1808
1809 #else /* defined (__ARM_FEATURE_CLZ) */
1810
1811 ARM_FUNC_START clzdi2
1812 cmp xxh, #0
1813 do_it eq, et
1814 clzeq r0, xxl
1815 clzne r0, xxh
1816 addeq r0, r0, #32
1817 RET
1818 FUNC_END clzdi2
1819
1820 #endif
1821 #endif /* L_clzdi2 */
1822
1823 #ifdef L_ctzsi2
1824 #ifdef NOT_ISA_TARGET_32BIT
1825 FUNC_START ctzsi2
1826 negs r1, r0
1827 ands r0, r0, r1
1828 movs r1, #28
1829 movs r3, #1
1830 lsls r3, r3, #16
1831 cmp r0, r3 /* 0x10000 */
1832 bcc 2f
1833 lsrs r0, r0, #16
1834 subs r1, r1, #16
1835 2: lsrs r3, r3, #8
1836 cmp r0, r3 /* #0x100 */
1837 bcc 2f
1838 lsrs r0, r0, #8
1839 subs r1, r1, #8
1840 2: lsrs r3, r3, #4
1841 cmp r0, r3 /* #0x10 */
1842 bcc 2f
1843 lsrs r0, r0, #4
1844 subs r1, r1, #4
1845 2: adr r2, 1f
1846 ldrb r0, [r2, r0]
1847 subs r0, r0, r1
1848 bx lr
1849 .align 2
1850 1:
1851 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1852 FUNC_END ctzsi2
1853 #else
1854 ARM_FUNC_START ctzsi2
1855 rsb r1, r0, #0
1856 and r0, r0, r1
1857 # if defined (__ARM_FEATURE_CLZ)
1858 clz r0, r0
1859 rsb r0, r0, #31
1860 RET
1861 # else
1862 mov r1, #28
1863 cmp r0, #0x10000
1864 do_it cs, t
1865 movcs r0, r0, lsr #16
1866 subcs r1, r1, #16
1867 cmp r0, #0x100
1868 do_it cs, t
1869 movcs r0, r0, lsr #8
1870 subcs r1, r1, #8
1871 cmp r0, #0x10
1872 do_it cs, t
1873 movcs r0, r0, lsr #4
1874 subcs r1, r1, #4
1875 adr r2, 1f
1876 ldrb r0, [r2, r0]
1877 sub r0, r0, r1
1878 RET
1879 .align 2
1880 1:
1881 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1882 # endif /* !defined (__ARM_FEATURE_CLZ) */
1883 FUNC_END ctzsi2
1884 #endif
1885 #endif /* L_clzsi2 */
1886
1887 /* ------------------------------------------------------------------------ */
1888 /* These next two sections are here despite the fact that they contain Thumb
1889 assembler because their presence allows interworked code to be linked even
1890 when the GCC library is this one. */
1891
1892 /* Do not build the interworking functions when the target architecture does
1893 not support Thumb instructions. (This can be a multilib option). */
1894 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1895 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1896 || __ARM_ARCH >= 6
1897
1898 #if defined L_call_via_rX
1899
1900 /* These labels & instructions are used by the Arm/Thumb interworking code.
1901 The address of function to be called is loaded into a register and then
1902 one of these labels is called via a BL instruction. This puts the
1903 return address into the link register with the bottom bit set, and the
1904 code here switches to the correct mode before executing the function. */
1905
1906 .text
1907 .align 0
1908 .force_thumb
1909
1910 .macro call_via register
1911 THUMB_FUNC_START _call_via_\register
1912
1913 bx \register
1914 nop
1915
1916 SIZE (_call_via_\register)
1917 .endm
1918
1919 call_via r0
1920 call_via r1
1921 call_via r2
1922 call_via r3
1923 call_via r4
1924 call_via r5
1925 call_via r6
1926 call_via r7
1927 call_via r8
1928 call_via r9
1929 call_via sl
1930 call_via fp
1931 call_via ip
1932 call_via sp
1933 call_via lr
1934
1935 #endif /* L_call_via_rX */
1936
1937 /* Don't bother with the old interworking routines for Thumb-2. */
1938 /* ??? Maybe only omit these on "m" variants. */
1939 #if !defined(__thumb2__) && __ARM_ARCH_ISA_ARM
1940
1941 #if defined L_interwork_call_via_rX
1942
1943 /* These labels & instructions are used by the Arm/Thumb interworking code,
1944 when the target address is in an unknown instruction set. The address
1945 of function to be called is loaded into a register and then one of these
1946 labels is called via a BL instruction. This puts the return address
1947 into the link register with the bottom bit set, and the code here
1948 switches to the correct mode before executing the function. Unfortunately
1949 the target code cannot be relied upon to return via a BX instruction, so
1950 instead we have to store the resturn address on the stack and allow the
1951 called function to return here instead. Upon return we recover the real
1952 return address and use a BX to get back to Thumb mode.
1953
1954 There are three variations of this code. The first,
1955 _interwork_call_via_rN(), will push the return address onto the
1956 stack and pop it in _arm_return(). It should only be used if all
1957 arguments are passed in registers.
1958
1959 The second, _interwork_r7_call_via_rN(), instead stores the return
1960 address at [r7, #-4]. It is the caller's responsibility to ensure
1961 that this address is valid and contains no useful data.
1962
1963 The third, _interwork_r11_call_via_rN(), works in the same way but
1964 uses r11 instead of r7. It is useful if the caller does not really
1965 need a frame pointer. */
1966
1967 .text
1968 .align 0
1969
1970 .code 32
1971 .globl _arm_return
1972 LSYM(Lstart_arm_return):
1973 cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1974 cfi_push 0, 0xe, -0x8, 0x8
1975 nop @ This nop is for the benefit of debuggers, so that
1976 @ backtraces will use the correct unwind information.
1977 _arm_return:
1978 RETLDM unwind=LSYM(Lstart_arm_return)
1979 cfi_end LSYM(Lend_arm_return)
1980
1981 .globl _arm_return_r7
1982 _arm_return_r7:
1983 ldr lr, [r7, #-4]
1984 bx lr
1985
1986 .globl _arm_return_r11
1987 _arm_return_r11:
1988 ldr lr, [r11, #-4]
1989 bx lr
1990
1991 .macro interwork_with_frame frame, register, name, return
1992 .code 16
1993
1994 THUMB_FUNC_START \name
1995
1996 bx pc
1997 nop
1998
1999 .code 32
2000 tst \register, #1
2001 streq lr, [\frame, #-4]
2002 adreq lr, _arm_return_\frame
2003 bx \register
2004
2005 SIZE (\name)
2006 .endm
2007
2008 .macro interwork register
2009 .code 16
2010
2011 THUMB_FUNC_START _interwork_call_via_\register
2012
2013 bx pc
2014 nop
2015
2016 .code 32
2017 .globl LSYM(Lchange_\register)
2018 LSYM(Lchange_\register):
2019 tst \register, #1
2020 streq lr, [sp, #-8]!
2021 adreq lr, _arm_return
2022 bx \register
2023
2024 SIZE (_interwork_call_via_\register)
2025
2026 interwork_with_frame r7,\register,_interwork_r7_call_via_\register
2027 interwork_with_frame r11,\register,_interwork_r11_call_via_\register
2028 .endm
2029
2030 interwork r0
2031 interwork r1
2032 interwork r2
2033 interwork r3
2034 interwork r4
2035 interwork r5
2036 interwork r6
2037 interwork r7
2038 interwork r8
2039 interwork r9
2040 interwork sl
2041 interwork fp
2042 interwork ip
2043 interwork sp
2044
2045 /* The LR case has to be handled a little differently... */
2046 .code 16
2047
2048 THUMB_FUNC_START _interwork_call_via_lr
2049
2050 bx pc
2051 nop
2052
2053 .code 32
2054 .globl .Lchange_lr
2055 .Lchange_lr:
2056 tst lr, #1
2057 stmeqdb r13!, {lr, pc}
2058 mov ip, lr
2059 adreq lr, _arm_return
2060 bx ip
2061
2062 SIZE (_interwork_call_via_lr)
2063
2064 #endif /* L_interwork_call_via_rX */
2065 #endif /* !__thumb2__ */
2066
2067 /* Functions to support compact pic switch tables in thumb1 state.
2068 All these routines take an index into the table in r0. The
2069 table is at LR & ~1 (but this must be rounded up in the case
2070 of 32-bit entires). They are only permitted to clobber r12
2071 and r14 and r0 must be preserved on exit. */
2072 #ifdef L_thumb1_case_sqi
2073
2074 .text
2075 .align 0
2076 .force_thumb
2077 .syntax unified
2078 THUMB_FUNC_START __gnu_thumb1_case_sqi
2079 push {r1}
2080 mov r1, lr
2081 lsrs r1, r1, #1
2082 lsls r1, r1, #1
2083 ldrsb r1, [r1, r0]
2084 lsls r1, r1, #1
2085 add lr, lr, r1
2086 pop {r1}
2087 bx lr
2088 SIZE (__gnu_thumb1_case_sqi)
2089 #endif
2090
2091 #ifdef L_thumb1_case_uqi
2092
2093 .text
2094 .align 0
2095 .force_thumb
2096 .syntax unified
2097 THUMB_FUNC_START __gnu_thumb1_case_uqi
2098 push {r1}
2099 mov r1, lr
2100 lsrs r1, r1, #1
2101 lsls r1, r1, #1
2102 ldrb r1, [r1, r0]
2103 lsls r1, r1, #1
2104 add lr, lr, r1
2105 pop {r1}
2106 bx lr
2107 SIZE (__gnu_thumb1_case_uqi)
2108 #endif
2109
2110 #ifdef L_thumb1_case_shi
2111
2112 .text
2113 .align 0
2114 .force_thumb
2115 .syntax unified
2116 THUMB_FUNC_START __gnu_thumb1_case_shi
2117 push {r0, r1}
2118 mov r1, lr
2119 lsrs r1, r1, #1
2120 lsls r0, r0, #1
2121 lsls r1, r1, #1
2122 ldrsh r1, [r1, r0]
2123 lsls r1, r1, #1
2124 add lr, lr, r1
2125 pop {r0, r1}
2126 bx lr
2127 SIZE (__gnu_thumb1_case_shi)
2128 #endif
2129
2130 #ifdef L_thumb1_case_uhi
2131
2132 .text
2133 .align 0
2134 .force_thumb
2135 .syntax unified
2136 THUMB_FUNC_START __gnu_thumb1_case_uhi
2137 push {r0, r1}
2138 mov r1, lr
2139 lsrs r1, r1, #1
2140 lsls r0, r0, #1
2141 lsls r1, r1, #1
2142 ldrh r1, [r1, r0]
2143 lsls r1, r1, #1
2144 add lr, lr, r1
2145 pop {r0, r1}
2146 bx lr
2147 SIZE (__gnu_thumb1_case_uhi)
2148 #endif
2149
2150 #ifdef L_thumb1_case_si
2151
2152 .text
2153 .align 0
2154 .force_thumb
2155 .syntax unified
2156 THUMB_FUNC_START __gnu_thumb1_case_si
2157 push {r0, r1}
2158 mov r1, lr
2159 adds.n r1, r1, #2 /* Align to word. */
2160 lsrs r1, r1, #2
2161 lsls r0, r0, #2
2162 lsls r1, r1, #2
2163 ldr r0, [r1, r0]
2164 adds r0, r0, r1
2165 mov lr, r0
2166 pop {r0, r1}
2167 mov pc, lr /* We know we were called from thumb code. */
2168 SIZE (__gnu_thumb1_case_si)
2169 #endif
2170
2171 #endif /* Arch supports thumb. */
2172
2173 .macro CFI_START_FUNCTION
2174 .cfi_startproc
2175 .cfi_remember_state
2176 .endm
2177
2178 .macro CFI_END_FUNCTION
2179 .cfi_restore_state
2180 .cfi_endproc
2181 .endm
2182
2183 #ifndef __symbian__
2184 /* The condition here must match the one in gcc/config/arm/elf.h and
2185 libgcc/config/arm/t-elf. */
2186 #ifndef NOT_ISA_TARGET_32BIT
2187 #include "ieee754-df.S"
2188 #include "ieee754-sf.S"
2189 #include "bpabi.S"
2190 #else /* NOT_ISA_TARGET_32BIT */
2191 #include "bpabi-v6m.S"
2192 #endif /* NOT_ISA_TARGET_32BIT */
2193 #endif /* !__symbian__ */
2194