lib1funcs.S revision 1.3.4.2 1 @ libgcc routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4 /* Copyright (C) 1995-2017 Free Software Foundation, Inc.
5
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25 /* An executable stack is *not* required for these functions. */
26 #if defined(__ELF__) && defined(__linux__)
27 .section .note.GNU-stack,"",%progbits
28 .previous
29 #endif /* __ELF__ and __linux__ */
30
31 #ifdef __ARM_EABI__
32 /* Some attributes that are common to all routines in this file. */
33 /* Tag_ABI_align_needed: This code does not require 8-byte
34 alignment from the caller. */
35 /* .eabi_attribute 24, 0 -- default setting. */
36 /* Tag_ABI_align_preserved: This code preserves 8-byte
37 alignment in any callee. */
38 .eabi_attribute 25, 1
39 #endif /* __ARM_EABI__ */
40 /* ------------------------------------------------------------------------ */
41
42 /* We need to know what prefix to add to function names. */
43
44 #ifndef __USER_LABEL_PREFIX__
45 #error __USER_LABEL_PREFIX__ not defined
46 #endif
47
48 /* ANSI concatenation macros. */
49
50 #define CONCAT1(a, b) CONCAT2(a, b)
51 #define CONCAT2(a, b) a ## b
52
53 /* Use the right prefix for global labels. */
54
55 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
56
57 #ifdef __ELF__
58 #ifdef __thumb__
59 #define __PLT__ /* Not supported in Thumb assembler (for now). */
60 #elif defined __vxworks && !defined __PIC__
61 #define __PLT__ /* Not supported by the kernel loader. */
62 #else
63 #define __PLT__ (PLT)
64 #endif
65 #define TYPE(x) .type SYM(x),function
66 #define SIZE(x) .size SYM(x), . - SYM(x)
67 #define LSYM(x) .x
68 #else
69 #define __PLT__
70 #define TYPE(x)
71 #define SIZE(x)
72 #define LSYM(x) x
73 #endif
74
75 /* Function end macros. Variants for interworking. */
76
77 #if defined(__ARM_ARCH_2__)
78 # define __ARM_ARCH__ 2
79 #endif
80
81 #if defined(__ARM_ARCH_3__)
82 # define __ARM_ARCH__ 3
83 #endif
84
85 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
86 || defined(__ARM_ARCH_4T__)
87 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
88 long multiply instructions. That includes v3M. */
89 # define __ARM_ARCH__ 4
90 #endif
91
92 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
93 || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
94 || defined(__ARM_ARCH_5TEJ__)
95 # define __ARM_ARCH__ 5
96 #endif
97
98 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
99 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
100 || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
101 || defined(__ARM_ARCH_6M__)
102 # define __ARM_ARCH__ 6
103 #endif
104
105 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
106 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
107 || defined(__ARM_ARCH_7EM__)
108 # define __ARM_ARCH__ 7
109 #endif
110
111 #if defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M_BASE__) \
112 || defined(__ARM_ARCH_8M_MAIN__)
113 # define __ARM_ARCH__ 8
114 #endif
115
116 #ifndef __ARM_ARCH__
117 #error Unable to determine architecture.
118 #endif
119
120 /* There are times when we might prefer Thumb1 code even if ARM code is
121 permitted, for example, the code might be smaller, or there might be
122 interworking problems with switching to ARM state if interworking is
123 disabled. */
124 #if (defined(__thumb__) \
125 && !defined(__thumb2__) \
126 && (!defined(__THUMB_INTERWORK__) \
127 || defined (__OPTIMIZE_SIZE__) \
128 || !__ARM_ARCH_ISA_ARM))
129 # define __prefer_thumb__
130 #endif
131
132 #if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1
133 #define NOT_ISA_TARGET_32BIT 1
134 #endif
135
136 /* How to return from a function call depends on the architecture variant. */
137
138 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
139
140 # define RET bx lr
141 # define RETc(x) bx##x lr
142
143 /* Special precautions for interworking on armv4t. */
144 # if (__ARM_ARCH__ == 4)
145
146 /* Always use bx, not ldr pc. */
147 # if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
148 # define __INTERWORKING__
149 # endif /* __THUMB__ || __THUMB_INTERWORK__ */
150
151 /* Include thumb stub before arm mode code. */
152 # if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
153 # define __INTERWORKING_STUBS__
154 # endif /* __thumb__ && !__THUMB_INTERWORK__ */
155
156 #endif /* __ARM_ARCH == 4 */
157
158 #else
159
160 # define RET mov pc, lr
161 # define RETc(x) mov##x pc, lr
162
163 #endif
164
165 .macro cfi_pop advance, reg, cfa_offset
166 #ifdef __ELF__
167 .pushsection .debug_frame
168 .byte 0x4 /* DW_CFA_advance_loc4 */
169 .4byte \advance
170 .byte (0xc0 | \reg) /* DW_CFA_restore */
171 .byte 0xe /* DW_CFA_def_cfa_offset */
172 .uleb128 \cfa_offset
173 .popsection
174 #endif
175 .endm
176 .macro cfi_push advance, reg, offset, cfa_offset
177 #ifdef __ELF__
178 .pushsection .debug_frame
179 .byte 0x4 /* DW_CFA_advance_loc4 */
180 .4byte \advance
181 .byte (0x80 | \reg) /* DW_CFA_offset */
182 .uleb128 (\offset / -4)
183 .byte 0xe /* DW_CFA_def_cfa_offset */
184 .uleb128 \cfa_offset
185 .popsection
186 #endif
187 .endm
188 .macro cfi_start start_label, end_label
189 #ifdef __ELF__
190 .pushsection .debug_frame
191 LSYM(Lstart_frame):
192 .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
193 LSYM(Lstart_cie):
194 .4byte 0xffffffff @ CIE Identifier Tag
195 .byte 0x1 @ CIE Version
196 .ascii "\0" @ CIE Augmentation
197 .uleb128 0x1 @ CIE Code Alignment Factor
198 .sleb128 -4 @ CIE Data Alignment Factor
199 .byte 0xe @ CIE RA Column
200 .byte 0xc @ DW_CFA_def_cfa
201 .uleb128 0xd
202 .uleb128 0x0
203
204 .align 2
205 LSYM(Lend_cie):
206 .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
207 LSYM(Lstart_fde):
208 .4byte LSYM(Lstart_frame) @ FDE CIE offset
209 .4byte \start_label @ FDE initial location
210 .4byte \end_label-\start_label @ FDE address range
211 .popsection
212 #endif
213 .endm
214 .macro cfi_end end_label
215 #ifdef __ELF__
216 .pushsection .debug_frame
217 .align 2
218 LSYM(Lend_fde):
219 .popsection
220 \end_label:
221 #endif
222 .endm
223
224 /* Don't pass dirn, it's there just to get token pasting right. */
225
226 .macro RETLDM regs=, cond=, unwind=, dirn=ia
227 #if defined (__INTERWORKING__)
228 .ifc "\regs",""
229 ldr\cond lr, [sp], #8
230 .else
231 # if defined(__thumb2__)
232 pop\cond {\regs, lr}
233 # else
234 ldm\cond\dirn sp!, {\regs, lr}
235 # endif
236 .endif
237 .ifnc "\unwind", ""
238 /* Mark LR as restored. */
239 97: cfi_pop 97b - \unwind, 0xe, 0x0
240 .endif
241 bx\cond lr
242 #else
243 /* Caller is responsible for providing IT instruction. */
244 .ifc "\regs",""
245 ldr\cond pc, [sp], #8
246 .else
247 # if defined(__thumb2__)
248 pop\cond {\regs, pc}
249 # else
250 ldm\cond\dirn sp!, {\regs, pc}
251 # endif
252 .endif
253 #endif
254 .endm
255
256 /* The Unified assembly syntax allows the same code to be assembled for both
257 ARM and Thumb-2. However this is only supported by recent gas, so define
258 a set of macros to allow ARM code on older assemblers. */
259 #if defined(__thumb2__)
260 .macro do_it cond, suffix=""
261 it\suffix \cond
262 .endm
263 .macro shift1 op, arg0, arg1, arg2
264 \op \arg0, \arg1, \arg2
265 .endm
266 #define do_push push
267 #define do_pop pop
268 #define COND(op1, op2, cond) op1 ## op2 ## cond
269 /* Perform an arithmetic operation with a variable shift operand. This
270 requires two instructions and a scratch register on Thumb-2. */
271 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
272 \shiftop \tmp, \src2, \shiftreg
273 \name \dest, \src1, \tmp
274 .endm
275 #else
276 .macro do_it cond, suffix=""
277 .endm
278 .macro shift1 op, arg0, arg1, arg2
279 mov \arg0, \arg1, \op \arg2
280 .endm
281 #define do_push stmfd sp!,
282 #define do_pop ldmfd sp!,
283 #define COND(op1, op2, cond) op1 ## cond ## op2
284 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
285 \name \dest, \src1, \src2, \shiftop \shiftreg
286 .endm
287 #endif
288
289 #ifdef __ARM_EABI__
290 .macro ARM_LDIV0 name signed
291 cmp r0, #0
292 .ifc \signed, unsigned
293 movne r0, #0xffffffff
294 .else
295 movgt r0, #0x7fffffff
296 movlt r0, #0x80000000
297 .endif
298 b SYM (__aeabi_idiv0) __PLT__
299 .endm
300 #else
301 .macro ARM_LDIV0 name signed
302 str lr, [sp, #-8]!
303 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8
304 bl SYM (__div0) __PLT__
305 mov r0, #0 @ About as wrong as it could be.
306 RETLDM unwind=98b
307 .endm
308 #endif
309
310
311 #ifdef __ARM_EABI__
312 .macro THUMB_LDIV0 name signed
313 #ifdef NOT_ISA_TARGET_32BIT
314
315 push {r0, lr}
316 mov r0, #0
317 bl SYM(__aeabi_idiv0)
318 @ We know we are not on armv4t, so pop pc is safe.
319 pop {r1, pc}
320
321 #elif defined(__thumb2__)
322 .syntax unified
323 .ifc \signed, unsigned
324 cbz r0, 1f
325 mov r0, #0xffffffff
326 1:
327 .else
328 cmp r0, #0
329 do_it gt
330 movgt r0, #0x7fffffff
331 do_it lt
332 movlt r0, #0x80000000
333 .endif
334 b.w SYM(__aeabi_idiv0) __PLT__
335 #else
336 .align 2
337 bx pc
338 nop
339 .arm
340 cmp r0, #0
341 .ifc \signed, unsigned
342 movne r0, #0xffffffff
343 .else
344 movgt r0, #0x7fffffff
345 movlt r0, #0x80000000
346 .endif
347 b SYM(__aeabi_idiv0) __PLT__
348 .thumb
349 #endif
350 .endm
351 #else
352 .macro THUMB_LDIV0 name signed
353 push { r1, lr }
354 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8
355 bl SYM (__div0)
356 mov r0, #0 @ About as wrong as it could be.
357 #if defined (__INTERWORKING__)
358 pop { r1, r2 }
359 bx r2
360 #else
361 pop { r1, pc }
362 #endif
363 .endm
364 #endif
365
366 .macro FUNC_END name
367 SIZE (__\name)
368 .endm
369
370 .macro DIV_FUNC_END name signed
371 cfi_start __\name, LSYM(Lend_div0)
372 LSYM(Ldiv0):
373 #ifdef __thumb__
374 THUMB_LDIV0 \name \signed
375 #else
376 ARM_LDIV0 \name \signed
377 #endif
378 cfi_end LSYM(Lend_div0)
379 FUNC_END \name
380 .endm
381
382 .macro THUMB_FUNC_START name
383 .globl SYM (\name)
384 TYPE (\name)
385 .thumb_func
386 SYM (\name):
387 .endm
388
389 /* Function start macros. Variants for ARM and Thumb. */
390
391 #ifdef __thumb__
392 #define THUMB_FUNC .thumb_func
393 #define THUMB_CODE .force_thumb
394 # if defined(__thumb2__)
395 #define THUMB_SYNTAX .syntax divided
396 # else
397 #define THUMB_SYNTAX
398 # endif
399 #else
400 #define THUMB_FUNC
401 #define THUMB_CODE
402 #define THUMB_SYNTAX
403 #endif
404
405 .macro FUNC_START name sp_section=
406 .ifc \sp_section, function_section
407 .section .text.__\name,"ax",%progbits
408 .else
409 .text
410 .endif
411 .globl SYM (__\name)
412 TYPE (__\name)
413 .align 0
414 THUMB_CODE
415 THUMB_FUNC
416 THUMB_SYNTAX
417 SYM (__\name):
418 .endm
419
420 .macro ARM_SYM_START name
421 TYPE (\name)
422 .align 0
423 SYM (\name):
424 .endm
425
426 .macro SYM_END name
427 SIZE (\name)
428 .endm
429
430 /* Special function that will always be coded in ARM assembly, even if
431 in Thumb-only compilation. */
432
433 #if defined(__thumb2__)
434
435 /* For Thumb-2 we build everything in thumb mode. */
436 .macro ARM_FUNC_START name sp_section=
437 FUNC_START \name \sp_section
438 .syntax unified
439 .endm
440 #define EQUIV .thumb_set
441 .macro ARM_CALL name
442 bl __\name
443 .endm
444
445 #elif defined(__INTERWORKING_STUBS__)
446
447 .macro ARM_FUNC_START name
448 FUNC_START \name
449 bx pc
450 nop
451 .arm
452 /* A hook to tell gdb that we've switched to ARM mode. Also used to call
453 directly from other local arm routines. */
454 _L__\name:
455 .endm
456 #define EQUIV .thumb_set
457 /* Branch directly to a function declared with ARM_FUNC_START.
458 Must be called in arm mode. */
459 .macro ARM_CALL name
460 bl _L__\name
461 .endm
462
463 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
464
465 #ifdef NOT_ISA_TARGET_32BIT
466 #define EQUIV .thumb_set
467 #else
468 .macro ARM_FUNC_START name sp_section=
469 .ifc \sp_section, function_section
470 .section .text.__\name,"ax",%progbits
471 .else
472 .text
473 .endif
474 .globl SYM (__\name)
475 TYPE (__\name)
476 .align 0
477 .arm
478 SYM (__\name):
479 .endm
480 #define EQUIV .set
481 .macro ARM_CALL name
482 bl __\name
483 .endm
484 #endif
485
486 #endif
487
488 .macro FUNC_ALIAS new old
489 .globl SYM (__\new)
490 #if defined (__thumb__)
491 .thumb_set SYM (__\new), SYM (__\old)
492 #else
493 .set SYM (__\new), SYM (__\old)
494 #endif
495 .endm
496
497 #ifndef NOT_ISA_TARGET_32BIT
498 .macro ARM_FUNC_ALIAS new old
499 .globl SYM (__\new)
500 EQUIV SYM (__\new), SYM (__\old)
501 #if defined(__INTERWORKING_STUBS__)
502 .set SYM (_L__\new), SYM (_L__\old)
503 #endif
504 .endm
505 #endif
506
507 #ifdef __ARMEB__
508 #define xxh r0
509 #define xxl r1
510 #define yyh r2
511 #define yyl r3
512 #else
513 #define xxh r1
514 #define xxl r0
515 #define yyh r3
516 #define yyl r2
517 #endif
518
519 #ifdef __ARM_EABI__
520 .macro WEAK name
521 .weak SYM (__\name)
522 .endm
523 #endif
524
525 #ifdef __thumb__
526 /* Register aliases. */
527
528 work .req r4 @ XXXX is this safe ?
529 dividend .req r0
530 divisor .req r1
531 overdone .req r2
532 result .req r2
533 curbit .req r3
534 #endif
535 #if 0
536 ip .req r12
537 sp .req r13
538 lr .req r14
539 pc .req r15
540 #endif
541
542 /* ------------------------------------------------------------------------ */
543 /* Bodies of the division and modulo routines. */
544 /* ------------------------------------------------------------------------ */
545 .macro ARM_DIV_BODY dividend, divisor, result, curbit
546
547 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
548
549 #if defined (__thumb2__)
550 clz \curbit, \dividend
551 clz \result, \divisor
552 sub \curbit, \result, \curbit
553 rsb \curbit, \curbit, #31
554 adr \result, 1f
555 add \curbit, \result, \curbit, lsl #4
556 mov \result, #0
557 mov pc, \curbit
558 .p2align 3
559 1:
560 .set shift, 32
561 .rept 32
562 .set shift, shift - 1
563 cmp.w \dividend, \divisor, lsl #shift
564 nop.n
565 adc.w \result, \result, \result
566 it cs
567 subcs.w \dividend, \dividend, \divisor, lsl #shift
568 .endr
569 #else
570 clz \curbit, \dividend
571 clz \result, \divisor
572 sub \curbit, \result, \curbit
573 rsbs \curbit, \curbit, #31
574 addne \curbit, \curbit, \curbit, lsl #1
575 mov \result, #0
576 addne pc, pc, \curbit, lsl #2
577 nop
578 .set shift, 32
579 .rept 32
580 .set shift, shift - 1
581 cmp \dividend, \divisor, lsl #shift
582 adc \result, \result, \result
583 subcs \dividend, \dividend, \divisor, lsl #shift
584 .endr
585 #endif
586
587 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
588 #if __ARM_ARCH__ >= 5
589
590 clz \curbit, \divisor
591 clz \result, \dividend
592 sub \result, \curbit, \result
593 mov \curbit, #1
594 mov \divisor, \divisor, lsl \result
595 mov \curbit, \curbit, lsl \result
596 mov \result, #0
597
598 #else /* __ARM_ARCH__ < 5 */
599
600 @ Initially shift the divisor left 3 bits if possible,
601 @ set curbit accordingly. This allows for curbit to be located
602 @ at the left end of each 4-bit nibbles in the division loop
603 @ to save one loop in most cases.
604 tst \divisor, #0xe0000000
605 moveq \divisor, \divisor, lsl #3
606 moveq \curbit, #8
607 movne \curbit, #1
608
609 @ Unless the divisor is very big, shift it up in multiples of
610 @ four bits, since this is the amount of unwinding in the main
611 @ division loop. Continue shifting until the divisor is
612 @ larger than the dividend.
613 1: cmp \divisor, #0x10000000
614 cmplo \divisor, \dividend
615 movlo \divisor, \divisor, lsl #4
616 movlo \curbit, \curbit, lsl #4
617 blo 1b
618
619 @ For very big divisors, we must shift it a bit at a time, or
620 @ we will be in danger of overflowing.
621 1: cmp \divisor, #0x80000000
622 cmplo \divisor, \dividend
623 movlo \divisor, \divisor, lsl #1
624 movlo \curbit, \curbit, lsl #1
625 blo 1b
626
627 mov \result, #0
628
629 #endif /* __ARM_ARCH__ < 5 */
630
631 @ Division loop
632 1: cmp \dividend, \divisor
633 do_it hs, t
634 subhs \dividend, \dividend, \divisor
635 orrhs \result, \result, \curbit
636 cmp \dividend, \divisor, lsr #1
637 do_it hs, t
638 subhs \dividend, \dividend, \divisor, lsr #1
639 orrhs \result, \result, \curbit, lsr #1
640 cmp \dividend, \divisor, lsr #2
641 do_it hs, t
642 subhs \dividend, \dividend, \divisor, lsr #2
643 orrhs \result, \result, \curbit, lsr #2
644 cmp \dividend, \divisor, lsr #3
645 do_it hs, t
646 subhs \dividend, \dividend, \divisor, lsr #3
647 orrhs \result, \result, \curbit, lsr #3
648 cmp \dividend, #0 @ Early termination?
649 do_it ne, t
650 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
651 movne \divisor, \divisor, lsr #4
652 bne 1b
653
654 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
655
656 .endm
657 /* ------------------------------------------------------------------------ */
658 .macro ARM_DIV2_ORDER divisor, order
659
660 #if __ARM_ARCH__ >= 5
661
662 clz \order, \divisor
663 rsb \order, \order, #31
664
665 #else
666
667 cmp \divisor, #(1 << 16)
668 movhs \divisor, \divisor, lsr #16
669 movhs \order, #16
670 movlo \order, #0
671
672 cmp \divisor, #(1 << 8)
673 movhs \divisor, \divisor, lsr #8
674 addhs \order, \order, #8
675
676 cmp \divisor, #(1 << 4)
677 movhs \divisor, \divisor, lsr #4
678 addhs \order, \order, #4
679
680 cmp \divisor, #(1 << 2)
681 addhi \order, \order, #3
682 addls \order, \order, \divisor, lsr #1
683
684 #endif
685
686 .endm
687 /* ------------------------------------------------------------------------ */
688 .macro ARM_MOD_BODY dividend, divisor, order, spare
689
690 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
691
692 clz \order, \divisor
693 clz \spare, \dividend
694 sub \order, \order, \spare
695 rsbs \order, \order, #31
696 addne pc, pc, \order, lsl #3
697 nop
698 .set shift, 32
699 .rept 32
700 .set shift, shift - 1
701 cmp \dividend, \divisor, lsl #shift
702 subcs \dividend, \dividend, \divisor, lsl #shift
703 .endr
704
705 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
706 #if __ARM_ARCH__ >= 5
707
708 clz \order, \divisor
709 clz \spare, \dividend
710 sub \order, \order, \spare
711 mov \divisor, \divisor, lsl \order
712
713 #else /* __ARM_ARCH__ < 5 */
714
715 mov \order, #0
716
717 @ Unless the divisor is very big, shift it up in multiples of
718 @ four bits, since this is the amount of unwinding in the main
719 @ division loop. Continue shifting until the divisor is
720 @ larger than the dividend.
721 1: cmp \divisor, #0x10000000
722 cmplo \divisor, \dividend
723 movlo \divisor, \divisor, lsl #4
724 addlo \order, \order, #4
725 blo 1b
726
727 @ For very big divisors, we must shift it a bit at a time, or
728 @ we will be in danger of overflowing.
729 1: cmp \divisor, #0x80000000
730 cmplo \divisor, \dividend
731 movlo \divisor, \divisor, lsl #1
732 addlo \order, \order, #1
733 blo 1b
734
735 #endif /* __ARM_ARCH__ < 5 */
736
737 @ Perform all needed substractions to keep only the reminder.
738 @ Do comparisons in batch of 4 first.
739 subs \order, \order, #3 @ yes, 3 is intended here
740 blt 2f
741
742 1: cmp \dividend, \divisor
743 subhs \dividend, \dividend, \divisor
744 cmp \dividend, \divisor, lsr #1
745 subhs \dividend, \dividend, \divisor, lsr #1
746 cmp \dividend, \divisor, lsr #2
747 subhs \dividend, \dividend, \divisor, lsr #2
748 cmp \dividend, \divisor, lsr #3
749 subhs \dividend, \dividend, \divisor, lsr #3
750 cmp \dividend, #1
751 mov \divisor, \divisor, lsr #4
752 subges \order, \order, #4
753 bge 1b
754
755 tst \order, #3
756 teqne \dividend, #0
757 beq 5f
758
759 @ Either 1, 2 or 3 comparison/substractions are left.
760 2: cmn \order, #2
761 blt 4f
762 beq 3f
763 cmp \dividend, \divisor
764 subhs \dividend, \dividend, \divisor
765 mov \divisor, \divisor, lsr #1
766 3: cmp \dividend, \divisor
767 subhs \dividend, \dividend, \divisor
768 mov \divisor, \divisor, lsr #1
769 4: cmp \dividend, \divisor
770 subhs \dividend, \dividend, \divisor
771 5:
772
773 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
774
775 .endm
776 /* ------------------------------------------------------------------------ */
777 .macro THUMB_DIV_MOD_BODY modulo
778 @ Load the constant 0x10000000 into our work register.
779 mov work, #1
780 lsl work, #28
781 LSYM(Loop1):
782 @ Unless the divisor is very big, shift it up in multiples of
783 @ four bits, since this is the amount of unwinding in the main
784 @ division loop. Continue shifting until the divisor is
785 @ larger than the dividend.
786 cmp divisor, work
787 bhs LSYM(Lbignum)
788 cmp divisor, dividend
789 bhs LSYM(Lbignum)
790 lsl divisor, #4
791 lsl curbit, #4
792 b LSYM(Loop1)
793 LSYM(Lbignum):
794 @ Set work to 0x80000000
795 lsl work, #3
796 LSYM(Loop2):
797 @ For very big divisors, we must shift it a bit at a time, or
798 @ we will be in danger of overflowing.
799 cmp divisor, work
800 bhs LSYM(Loop3)
801 cmp divisor, dividend
802 bhs LSYM(Loop3)
803 lsl divisor, #1
804 lsl curbit, #1
805 b LSYM(Loop2)
806 LSYM(Loop3):
807 @ Test for possible subtractions ...
808 .if \modulo
809 @ ... On the final pass, this may subtract too much from the dividend,
810 @ so keep track of which subtractions are done, we can fix them up
811 @ afterwards.
812 mov overdone, #0
813 cmp dividend, divisor
814 blo LSYM(Lover1)
815 sub dividend, dividend, divisor
816 LSYM(Lover1):
817 lsr work, divisor, #1
818 cmp dividend, work
819 blo LSYM(Lover2)
820 sub dividend, dividend, work
821 mov ip, curbit
822 mov work, #1
823 ror curbit, work
824 orr overdone, curbit
825 mov curbit, ip
826 LSYM(Lover2):
827 lsr work, divisor, #2
828 cmp dividend, work
829 blo LSYM(Lover3)
830 sub dividend, dividend, work
831 mov ip, curbit
832 mov work, #2
833 ror curbit, work
834 orr overdone, curbit
835 mov curbit, ip
836 LSYM(Lover3):
837 lsr work, divisor, #3
838 cmp dividend, work
839 blo LSYM(Lover4)
840 sub dividend, dividend, work
841 mov ip, curbit
842 mov work, #3
843 ror curbit, work
844 orr overdone, curbit
845 mov curbit, ip
846 LSYM(Lover4):
847 mov ip, curbit
848 .else
849 @ ... and note which bits are done in the result. On the final pass,
850 @ this may subtract too much from the dividend, but the result will be ok,
851 @ since the "bit" will have been shifted out at the bottom.
852 cmp dividend, divisor
853 blo LSYM(Lover1)
854 sub dividend, dividend, divisor
855 orr result, result, curbit
856 LSYM(Lover1):
857 lsr work, divisor, #1
858 cmp dividend, work
859 blo LSYM(Lover2)
860 sub dividend, dividend, work
861 lsr work, curbit, #1
862 orr result, work
863 LSYM(Lover2):
864 lsr work, divisor, #2
865 cmp dividend, work
866 blo LSYM(Lover3)
867 sub dividend, dividend, work
868 lsr work, curbit, #2
869 orr result, work
870 LSYM(Lover3):
871 lsr work, divisor, #3
872 cmp dividend, work
873 blo LSYM(Lover4)
874 sub dividend, dividend, work
875 lsr work, curbit, #3
876 orr result, work
877 LSYM(Lover4):
878 .endif
879
880 cmp dividend, #0 @ Early termination?
881 beq LSYM(Lover5)
882 lsr curbit, #4 @ No, any more bits to do?
883 beq LSYM(Lover5)
884 lsr divisor, #4
885 b LSYM(Loop3)
886 LSYM(Lover5):
887 .if \modulo
888 @ Any subtractions that we should not have done will be recorded in
889 @ the top three bits of "overdone". Exactly which were not needed
890 @ are governed by the position of the bit, stored in ip.
891 mov work, #0xe
892 lsl work, #28
893 and overdone, work
894 beq LSYM(Lgot_result)
895
896 @ If we terminated early, because dividend became zero, then the
897 @ bit in ip will not be in the bottom nibble, and we should not
898 @ perform the additions below. We must test for this though
899 @ (rather relying upon the TSTs to prevent the additions) since
900 @ the bit in ip could be in the top two bits which might then match
901 @ with one of the smaller RORs.
902 mov curbit, ip
903 mov work, #0x7
904 tst curbit, work
905 beq LSYM(Lgot_result)
906
907 mov curbit, ip
908 mov work, #3
909 ror curbit, work
910 tst overdone, curbit
911 beq LSYM(Lover6)
912 lsr work, divisor, #3
913 add dividend, work
914 LSYM(Lover6):
915 mov curbit, ip
916 mov work, #2
917 ror curbit, work
918 tst overdone, curbit
919 beq LSYM(Lover7)
920 lsr work, divisor, #2
921 add dividend, work
922 LSYM(Lover7):
923 mov curbit, ip
924 mov work, #1
925 ror curbit, work
926 tst overdone, curbit
927 beq LSYM(Lgot_result)
928 lsr work, divisor, #1
929 add dividend, work
930 .endif
931 LSYM(Lgot_result):
932 .endm
933
934 /* If performance is preferred, the following functions are provided. */
935 #if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
936
937 /* Branch to div(n), and jump to label if curbit is lo than divisior. */
938 .macro BranchToDiv n, label
939 lsr curbit, dividend, \n
940 cmp curbit, divisor
941 blo \label
942 .endm
943
944 /* Body of div(n). Shift the divisor in n bits and compare the divisor
945 and dividend. Update the dividend as the substruction result. */
946 .macro DoDiv n
947 lsr curbit, dividend, \n
948 cmp curbit, divisor
949 bcc 1f
950 lsl curbit, divisor, \n
951 sub dividend, dividend, curbit
952
953 1: adc result, result
954 .endm
955
956 /* The body of division with positive divisor. Unless the divisor is very
957 big, shift it up in multiples of four bits, since this is the amount of
958 unwinding in the main division loop. Continue shifting until the divisor
959 is larger than the dividend. */
960 .macro THUMB1_Div_Positive
961 mov result, #0
962 BranchToDiv #1, LSYM(Lthumb1_div1)
963 BranchToDiv #4, LSYM(Lthumb1_div4)
964 BranchToDiv #8, LSYM(Lthumb1_div8)
965 BranchToDiv #12, LSYM(Lthumb1_div12)
966 BranchToDiv #16, LSYM(Lthumb1_div16)
967 LSYM(Lthumb1_div_large_positive):
968 mov result, #0xff
969 lsl divisor, divisor, #8
970 rev result, result
971 lsr curbit, dividend, #16
972 cmp curbit, divisor
973 blo 1f
974 asr result, #8
975 lsl divisor, divisor, #8
976 beq LSYM(Ldivbyzero_waypoint)
977
978 1: lsr curbit, dividend, #12
979 cmp curbit, divisor
980 blo LSYM(Lthumb1_div12)
981 b LSYM(Lthumb1_div16)
982 LSYM(Lthumb1_div_loop):
983 lsr divisor, divisor, #8
984 LSYM(Lthumb1_div16):
985 Dodiv #15
986 Dodiv #14
987 Dodiv #13
988 Dodiv #12
989 LSYM(Lthumb1_div12):
990 Dodiv #11
991 Dodiv #10
992 Dodiv #9
993 Dodiv #8
994 bcs LSYM(Lthumb1_div_loop)
995 LSYM(Lthumb1_div8):
996 Dodiv #7
997 Dodiv #6
998 Dodiv #5
999 LSYM(Lthumb1_div5):
1000 Dodiv #4
1001 LSYM(Lthumb1_div4):
1002 Dodiv #3
1003 LSYM(Lthumb1_div3):
1004 Dodiv #2
1005 LSYM(Lthumb1_div2):
1006 Dodiv #1
1007 LSYM(Lthumb1_div1):
1008 sub divisor, dividend, divisor
1009 bcs 1f
1010 cpy divisor, dividend
1011
1012 1: adc result, result
1013 cpy dividend, result
1014 RET
1015
1016 LSYM(Ldivbyzero_waypoint):
1017 b LSYM(Ldiv0)
1018 .endm
1019
1020 /* The body of division with negative divisor. Similar with
1021 THUMB1_Div_Positive except that the shift steps are in multiples
1022 of six bits. */
1023 .macro THUMB1_Div_Negative
1024 lsr result, divisor, #31
1025 beq 1f
1026 neg divisor, divisor
1027
1028 1: asr curbit, dividend, #32
1029 bcc 2f
1030 neg dividend, dividend
1031
1032 2: eor curbit, result
1033 mov result, #0
1034 cpy ip, curbit
1035 BranchToDiv #4, LSYM(Lthumb1_div_negative4)
1036 BranchToDiv #8, LSYM(Lthumb1_div_negative8)
1037 LSYM(Lthumb1_div_large):
1038 mov result, #0xfc
1039 lsl divisor, divisor, #6
1040 rev result, result
1041 lsr curbit, dividend, #8
1042 cmp curbit, divisor
1043 blo LSYM(Lthumb1_div_negative8)
1044
1045 lsl divisor, divisor, #6
1046 asr result, result, #6
1047 cmp curbit, divisor
1048 blo LSYM(Lthumb1_div_negative8)
1049
1050 lsl divisor, divisor, #6
1051 asr result, result, #6
1052 cmp curbit, divisor
1053 blo LSYM(Lthumb1_div_negative8)
1054
1055 lsl divisor, divisor, #6
1056 beq LSYM(Ldivbyzero_negative)
1057 asr result, result, #6
1058 b LSYM(Lthumb1_div_negative8)
1059 LSYM(Lthumb1_div_negative_loop):
1060 lsr divisor, divisor, #6
1061 LSYM(Lthumb1_div_negative8):
1062 DoDiv #7
1063 DoDiv #6
1064 DoDiv #5
1065 DoDiv #4
1066 LSYM(Lthumb1_div_negative4):
1067 DoDiv #3
1068 DoDiv #2
1069 bcs LSYM(Lthumb1_div_negative_loop)
1070 DoDiv #1
1071 sub divisor, dividend, divisor
1072 bcs 1f
1073 cpy divisor, dividend
1074
1075 1: cpy curbit, ip
1076 adc result, result
1077 asr curbit, curbit, #1
1078 cpy dividend, result
1079 bcc 2f
1080 neg dividend, dividend
1081 cmp curbit, #0
1082
1083 2: bpl 3f
1084 neg divisor, divisor
1085
1086 3: RET
1087
1088 LSYM(Ldivbyzero_negative):
1089 cpy curbit, ip
1090 asr curbit, curbit, #1
1091 bcc LSYM(Ldiv0)
1092 neg dividend, dividend
1093 .endm
1094 #endif /* ARM Thumb version. */
1095
1096 /* ------------------------------------------------------------------------ */
1097 /* Start of the Real Functions */
1098 /* ------------------------------------------------------------------------ */
1099 #ifdef L_udivsi3
1100
1101 #if defined(__prefer_thumb__)
1102
1103 FUNC_START udivsi3
1104 FUNC_ALIAS aeabi_uidiv udivsi3
1105 #if defined(__OPTIMIZE_SIZE__)
1106
1107 cmp divisor, #0
1108 beq LSYM(Ldiv0)
1109 LSYM(udivsi3_skip_div0_test):
1110 mov curbit, #1
1111 mov result, #0
1112
1113 push { work }
1114 cmp dividend, divisor
1115 blo LSYM(Lgot_result)
1116
1117 THUMB_DIV_MOD_BODY 0
1118
1119 mov r0, result
1120 pop { work }
1121 RET
1122
1123 /* Implementation of aeabi_uidiv for ARMv6m. This version is only
1124 used in ARMv6-M when we need an efficient implementation. */
1125 #else
1126 LSYM(udivsi3_skip_div0_test):
1127 THUMB1_Div_Positive
1128
1129 #endif /* __OPTIMIZE_SIZE__ */
1130
1131 #elif defined(__ARM_ARCH_EXT_IDIV__)
1132
1133 ARM_FUNC_START udivsi3
1134 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1135
1136 cmp r1, #0
1137 beq LSYM(Ldiv0)
1138
1139 udiv r0, r0, r1
1140 RET
1141
1142 #else /* ARM version/Thumb-2. */
1143
1144 ARM_FUNC_START udivsi3
1145 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1146
1147 /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
1148 check for division-by-zero a second time. */
1149 LSYM(udivsi3_skip_div0_test):
1150 subs r2, r1, #1
1151 do_it eq
1152 RETc(eq)
1153 bcc LSYM(Ldiv0)
1154 cmp r0, r1
1155 bls 11f
1156 tst r1, r2
1157 beq 12f
1158
1159 ARM_DIV_BODY r0, r1, r2, r3
1160
1161 mov r0, r2
1162 RET
1163
1164 11: do_it eq, e
1165 moveq r0, #1
1166 movne r0, #0
1167 RET
1168
1169 12: ARM_DIV2_ORDER r1, r2
1170
1171 mov r0, r0, lsr r2
1172 RET
1173
1174 #endif /* ARM version */
1175
1176 DIV_FUNC_END udivsi3 unsigned
1177
1178 #if defined(__prefer_thumb__)
1179 FUNC_START aeabi_uidivmod
1180 cmp r1, #0
1181 beq LSYM(Ldiv0)
1182 # if defined(__OPTIMIZE_SIZE__)
1183 push {r0, r1, lr}
1184 bl LSYM(udivsi3_skip_div0_test)
1185 POP {r1, r2, r3}
1186 mul r2, r0
1187 sub r1, r1, r2
1188 bx r3
1189 # else
1190 /* Both the quotient and remainder are calculated simultaneously
1191 in THUMB1_Div_Positive. There is no need to calculate the
1192 remainder again here. */
1193 b LSYM(udivsi3_skip_div0_test)
1194 RET
1195 # endif /* __OPTIMIZE_SIZE__ */
1196
1197 #elif defined(__ARM_ARCH_EXT_IDIV__)
1198 ARM_FUNC_START aeabi_uidivmod
1199 cmp r1, #0
1200 beq LSYM(Ldiv0)
1201 mov r2, r0
1202 udiv r0, r0, r1
1203 mls r1, r0, r1, r2
1204 RET
1205 #else
1206 ARM_FUNC_START aeabi_uidivmod
1207 cmp r1, #0
1208 beq LSYM(Ldiv0)
1209 stmfd sp!, { r0, r1, lr }
1210 bl LSYM(udivsi3_skip_div0_test)
1211 ldmfd sp!, { r1, r2, lr }
1212 mul r3, r2, r0
1213 sub r1, r1, r3
1214 RET
1215 #endif
1216 FUNC_END aeabi_uidivmod
1217
1218 #endif /* L_udivsi3 */
1219 /* ------------------------------------------------------------------------ */
1220 #ifdef L_umodsi3
1221
1222 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1223
1224 ARM_FUNC_START umodsi3
1225
1226 cmp r1, #0
1227 beq LSYM(Ldiv0)
1228 udiv r2, r0, r1
1229 mls r0, r1, r2, r0
1230 RET
1231
1232 #elif defined(__thumb__)
1233
1234 FUNC_START umodsi3
1235
1236 cmp divisor, #0
1237 beq LSYM(Ldiv0)
1238 mov curbit, #1
1239 cmp dividend, divisor
1240 bhs LSYM(Lover10)
1241 RET
1242
1243 LSYM(Lover10):
1244 push { work }
1245
1246 THUMB_DIV_MOD_BODY 1
1247
1248 pop { work }
1249 RET
1250
1251 #else /* ARM version. */
1252
1253 FUNC_START umodsi3
1254
1255 subs r2, r1, #1 @ compare divisor with 1
1256 bcc LSYM(Ldiv0)
1257 cmpne r0, r1 @ compare dividend with divisor
1258 moveq r0, #0
1259 tsthi r1, r2 @ see if divisor is power of 2
1260 andeq r0, r0, r2
1261 RETc(ls)
1262
1263 ARM_MOD_BODY r0, r1, r2, r3
1264
1265 RET
1266
1267 #endif /* ARM version. */
1268
1269 DIV_FUNC_END umodsi3 unsigned
1270
1271 #endif /* L_umodsi3 */
1272 /* ------------------------------------------------------------------------ */
1273 #ifdef L_divsi3
1274
1275 #if defined(__prefer_thumb__)
1276
1277 FUNC_START divsi3
1278 FUNC_ALIAS aeabi_idiv divsi3
1279 #if defined(__OPTIMIZE_SIZE__)
1280
1281 cmp divisor, #0
1282 beq LSYM(Ldiv0)
1283 LSYM(divsi3_skip_div0_test):
1284 push { work }
1285 mov work, dividend
1286 eor work, divisor @ Save the sign of the result.
1287 mov ip, work
1288 mov curbit, #1
1289 mov result, #0
1290 cmp divisor, #0
1291 bpl LSYM(Lover10)
1292 neg divisor, divisor @ Loops below use unsigned.
1293 LSYM(Lover10):
1294 cmp dividend, #0
1295 bpl LSYM(Lover11)
1296 neg dividend, dividend
1297 LSYM(Lover11):
1298 cmp dividend, divisor
1299 blo LSYM(Lgot_result)
1300
1301 THUMB_DIV_MOD_BODY 0
1302
1303 mov r0, result
1304 mov work, ip
1305 cmp work, #0
1306 bpl LSYM(Lover12)
1307 neg r0, r0
1308 LSYM(Lover12):
1309 pop { work }
1310 RET
1311
1312 /* Implementation of aeabi_idiv for ARMv6m. This version is only
1313 used in ARMv6-M when we need an efficient implementation. */
1314 #else
1315 LSYM(divsi3_skip_div0_test):
1316 cpy curbit, dividend
1317 orr curbit, divisor
1318 bmi LSYM(Lthumb1_div_negative)
1319
1320 LSYM(Lthumb1_div_positive):
1321 THUMB1_Div_Positive
1322
1323 LSYM(Lthumb1_div_negative):
1324 THUMB1_Div_Negative
1325
1326 #endif /* __OPTIMIZE_SIZE__ */
1327
1328 #elif defined(__ARM_ARCH_EXT_IDIV__)
1329
1330 ARM_FUNC_START divsi3
1331 ARM_FUNC_ALIAS aeabi_idiv divsi3
1332
1333 cmp r1, #0
1334 beq LSYM(Ldiv0)
1335 sdiv r0, r0, r1
1336 RET
1337
1338 #else /* ARM/Thumb-2 version. */
1339
1340 ARM_FUNC_START divsi3
1341 ARM_FUNC_ALIAS aeabi_idiv divsi3
1342
1343 cmp r1, #0
1344 beq LSYM(Ldiv0)
1345 LSYM(divsi3_skip_div0_test):
1346 eor ip, r0, r1 @ save the sign of the result.
1347 do_it mi
1348 rsbmi r1, r1, #0 @ loops below use unsigned.
1349 subs r2, r1, #1 @ division by 1 or -1 ?
1350 beq 10f
1351 movs r3, r0
1352 do_it mi
1353 rsbmi r3, r0, #0 @ positive dividend value
1354 cmp r3, r1
1355 bls 11f
1356 tst r1, r2 @ divisor is power of 2 ?
1357 beq 12f
1358
1359 ARM_DIV_BODY r3, r1, r0, r2
1360
1361 cmp ip, #0
1362 do_it mi
1363 rsbmi r0, r0, #0
1364 RET
1365
1366 10: teq ip, r0 @ same sign ?
1367 do_it mi
1368 rsbmi r0, r0, #0
1369 RET
1370
1371 11: do_it lo
1372 movlo r0, #0
1373 do_it eq,t
1374 moveq r0, ip, asr #31
1375 orreq r0, r0, #1
1376 RET
1377
1378 12: ARM_DIV2_ORDER r1, r2
1379
1380 cmp ip, #0
1381 mov r0, r3, lsr r2
1382 do_it mi
1383 rsbmi r0, r0, #0
1384 RET
1385
1386 #endif /* ARM version */
1387
1388 DIV_FUNC_END divsi3 signed
1389
1390 #if defined(__prefer_thumb__)
1391 FUNC_START aeabi_idivmod
1392 cmp r1, #0
1393 beq LSYM(Ldiv0)
1394 # if defined(__OPTIMIZE_SIZE__)
1395 push {r0, r1, lr}
1396 bl LSYM(divsi3_skip_div0_test)
1397 POP {r1, r2, r3}
1398 mul r2, r0
1399 sub r1, r1, r2
1400 bx r3
1401 # else
1402 /* Both the quotient and remainder are calculated simultaneously
1403 in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no
1404 need to calculate the remainder again here. */
1405 b LSYM(divsi3_skip_div0_test)
1406 RET
1407 # endif /* __OPTIMIZE_SIZE__ */
1408
1409 #elif defined(__ARM_ARCH_EXT_IDIV__)
1410 ARM_FUNC_START aeabi_idivmod
1411 cmp r1, #0
1412 beq LSYM(Ldiv0)
1413 mov r2, r0
1414 sdiv r0, r0, r1
1415 mls r1, r0, r1, r2
1416 RET
1417 #else
1418 ARM_FUNC_START aeabi_idivmod
1419 cmp r1, #0
1420 beq LSYM(Ldiv0)
1421 stmfd sp!, { r0, r1, lr }
1422 bl LSYM(divsi3_skip_div0_test)
1423 ldmfd sp!, { r1, r2, lr }
1424 mul r3, r2, r0
1425 sub r1, r1, r3
1426 RET
1427 #endif
1428 FUNC_END aeabi_idivmod
1429
1430 #endif /* L_divsi3 */
1431 /* ------------------------------------------------------------------------ */
1432 #ifdef L_modsi3
1433
1434 #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1435
1436 ARM_FUNC_START modsi3
1437
1438 cmp r1, #0
1439 beq LSYM(Ldiv0)
1440
1441 sdiv r2, r0, r1
1442 mls r0, r1, r2, r0
1443 RET
1444
1445 #elif defined(__thumb__)
1446
1447 FUNC_START modsi3
1448
1449 mov curbit, #1
1450 cmp divisor, #0
1451 beq LSYM(Ldiv0)
1452 bpl LSYM(Lover10)
1453 neg divisor, divisor @ Loops below use unsigned.
1454 LSYM(Lover10):
1455 push { work }
1456 @ Need to save the sign of the dividend, unfortunately, we need
1457 @ work later on. Must do this after saving the original value of
1458 @ the work register, because we will pop this value off first.
1459 push { dividend }
1460 cmp dividend, #0
1461 bpl LSYM(Lover11)
1462 neg dividend, dividend
1463 LSYM(Lover11):
1464 cmp dividend, divisor
1465 blo LSYM(Lgot_result)
1466
1467 THUMB_DIV_MOD_BODY 1
1468
1469 pop { work }
1470 cmp work, #0
1471 bpl LSYM(Lover12)
1472 neg dividend, dividend
1473 LSYM(Lover12):
1474 pop { work }
1475 RET
1476
1477 #else /* ARM version. */
1478
1479 FUNC_START modsi3
1480
1481 cmp r1, #0
1482 beq LSYM(Ldiv0)
1483 rsbmi r1, r1, #0 @ loops below use unsigned.
1484 movs ip, r0 @ preserve sign of dividend
1485 rsbmi r0, r0, #0 @ if negative make positive
1486 subs r2, r1, #1 @ compare divisor with 1
1487 cmpne r0, r1 @ compare dividend with divisor
1488 moveq r0, #0
1489 tsthi r1, r2 @ see if divisor is power of 2
1490 andeq r0, r0, r2
1491 bls 10f
1492
1493 ARM_MOD_BODY r0, r1, r2, r3
1494
1495 10: cmp ip, #0
1496 rsbmi r0, r0, #0
1497 RET
1498
1499 #endif /* ARM version */
1500
1501 DIV_FUNC_END modsi3 signed
1502
1503 #endif /* L_modsi3 */
1504 /* ------------------------------------------------------------------------ */
1505 #ifdef L_dvmd_tls
1506
1507 #ifdef __ARM_EABI__
1508 WEAK aeabi_idiv0
1509 WEAK aeabi_ldiv0
1510 FUNC_START aeabi_idiv0
1511 FUNC_START aeabi_ldiv0
1512 RET
1513 FUNC_END aeabi_ldiv0
1514 FUNC_END aeabi_idiv0
1515 #else
1516 FUNC_START div0
1517 RET
1518 FUNC_END div0
1519 #endif
1520
1521 #endif /* L_divmodsi_tools */
1522 /* ------------------------------------------------------------------------ */
1523 #ifdef L_dvmd_lnx
1524 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
1525
1526 /* Constant taken from <asm/signal.h>. */
1527 #define SIGFPE 8
1528
1529 #ifdef __ARM_EABI__
1530 cfi_start __aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0)
1531 WEAK aeabi_idiv0
1532 WEAK aeabi_ldiv0
1533 ARM_FUNC_START aeabi_idiv0
1534 ARM_FUNC_START aeabi_ldiv0
1535 do_push {r1, lr}
1536 98: cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8
1537 #else
1538 cfi_start __div0, LSYM(Lend_div0)
1539 ARM_FUNC_START div0
1540 do_push {r1, lr}
1541 98: cfi_push 98b - __div0, 0xe, -0x4, 0x8
1542 #endif
1543
1544 mov r0, #SIGFPE
1545 bl SYM(raise) __PLT__
1546 RETLDM r1 unwind=98b
1547
1548 #ifdef __ARM_EABI__
1549 cfi_end LSYM(Lend_aeabi_ldiv0)
1550 FUNC_END aeabi_ldiv0
1551 FUNC_END aeabi_idiv0
1552 #else
1553 cfi_end LSYM(Lend_div0)
1554 FUNC_END div0
1555 #endif
1556
1557 #endif /* L_dvmd_lnx */
1558 #ifdef L_clear_cache
1559 #if defined __ARM_EABI__ && defined __linux__
1560 @ EABI GNU/Linux call to cacheflush syscall.
1561 ARM_FUNC_START clear_cache
1562 do_push {r7}
1563 #if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
1564 movw r7, #2
1565 movt r7, #0xf
1566 #else
1567 mov r7, #0xf0000
1568 add r7, r7, #2
1569 #endif
1570 mov r2, #0
1571 swi 0
1572 do_pop {r7}
1573 RET
1574 FUNC_END clear_cache
1575 #else
1576 #error "This is only for ARM EABI GNU/Linux"
1577 #endif
1578 #endif /* L_clear_cache */
1579 /* ------------------------------------------------------------------------ */
1580 /* Dword shift operations. */
1581 /* All the following Dword shift variants rely on the fact that
1582 shft xxx, Reg
1583 is in fact done as
1584 shft xxx, (Reg & 255)
1585 so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1586 case of logical shifts) or the sign (for asr). */
1587
1588 #ifdef __ARMEB__
1589 #define al r1
1590 #define ah r0
1591 #else
1592 #define al r0
1593 #define ah r1
1594 #endif
1595
1596 /* Prevent __aeabi double-word shifts from being produced on SymbianOS. */
1597 #ifndef __symbian__
1598
1599 #ifdef L_lshrdi3
1600
1601 FUNC_START lshrdi3
1602 FUNC_ALIAS aeabi_llsr lshrdi3
1603
1604 #ifdef __thumb__
1605 lsr al, r2
1606 mov r3, ah
1607 lsr ah, r2
1608 mov ip, r3
1609 sub r2, #32
1610 lsr r3, r2
1611 orr al, r3
1612 neg r2, r2
1613 mov r3, ip
1614 lsl r3, r2
1615 orr al, r3
1616 RET
1617 #else
1618 subs r3, r2, #32
1619 rsb ip, r2, #32
1620 movmi al, al, lsr r2
1621 movpl al, ah, lsr r3
1622 orrmi al, al, ah, lsl ip
1623 mov ah, ah, lsr r2
1624 RET
1625 #endif
1626 FUNC_END aeabi_llsr
1627 FUNC_END lshrdi3
1628
1629 #endif
1630
1631 #ifdef L_ashrdi3
1632
1633 FUNC_START ashrdi3
1634 FUNC_ALIAS aeabi_lasr ashrdi3
1635
1636 #ifdef __thumb__
1637 lsr al, r2
1638 mov r3, ah
1639 asr ah, r2
1640 sub r2, #32
1641 @ If r2 is negative at this point the following step would OR
1642 @ the sign bit into all of AL. That's not what we want...
1643 bmi 1f
1644 mov ip, r3
1645 asr r3, r2
1646 orr al, r3
1647 mov r3, ip
1648 1:
1649 neg r2, r2
1650 lsl r3, r2
1651 orr al, r3
1652 RET
1653 #else
1654 subs r3, r2, #32
1655 rsb ip, r2, #32
1656 movmi al, al, lsr r2
1657 movpl al, ah, asr r3
1658 orrmi al, al, ah, lsl ip
1659 mov ah, ah, asr r2
1660 RET
1661 #endif
1662
1663 FUNC_END aeabi_lasr
1664 FUNC_END ashrdi3
1665
1666 #endif
1667
1668 #ifdef L_ashldi3
1669
1670 FUNC_START ashldi3
1671 FUNC_ALIAS aeabi_llsl ashldi3
1672
1673 #ifdef __thumb__
1674 lsl ah, r2
1675 mov r3, al
1676 lsl al, r2
1677 mov ip, r3
1678 sub r2, #32
1679 lsl r3, r2
1680 orr ah, r3
1681 neg r2, r2
1682 mov r3, ip
1683 lsr r3, r2
1684 orr ah, r3
1685 RET
1686 #else
1687 subs r3, r2, #32
1688 rsb ip, r2, #32
1689 movmi ah, ah, lsl r2
1690 movpl ah, al, lsl r3
1691 orrmi ah, ah, al, lsr ip
1692 mov al, al, lsl r2
1693 RET
1694 #endif
1695 FUNC_END aeabi_llsl
1696 FUNC_END ashldi3
1697
1698 #endif
1699
1700 #endif /* __symbian__ */
1701
1702 #if (__ARM_ARCH_ISA_THUMB == 2 \
1703 || (__ARM_ARCH_ISA_ARM \
1704 && (__ARM_ARCH__ > 5 \
1705 || (__ARM_ARCH__ == 5 && __ARM_ARCH_ISA_THUMB))))
1706 #define HAVE_ARM_CLZ 1
1707 #endif
1708
1709 #ifdef L_clzsi2
1710 #ifdef NOT_ISA_TARGET_32BIT
1711 FUNC_START clzsi2
1712 mov r1, #28
1713 mov r3, #1
1714 lsl r3, r3, #16
1715 cmp r0, r3 /* 0x10000 */
1716 bcc 2f
1717 lsr r0, r0, #16
1718 sub r1, r1, #16
1719 2: lsr r3, r3, #8
1720 cmp r0, r3 /* #0x100 */
1721 bcc 2f
1722 lsr r0, r0, #8
1723 sub r1, r1, #8
1724 2: lsr r3, r3, #4
1725 cmp r0, r3 /* #0x10 */
1726 bcc 2f
1727 lsr r0, r0, #4
1728 sub r1, r1, #4
1729 2: adr r2, 1f
1730 ldrb r0, [r2, r0]
1731 add r0, r0, r1
1732 bx lr
1733 .align 2
1734 1:
1735 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1736 FUNC_END clzsi2
1737 #else
1738 ARM_FUNC_START clzsi2
1739 # if defined(HAVE_ARM_CLZ)
1740 clz r0, r0
1741 RET
1742 # else
1743 mov r1, #28
1744 cmp r0, #0x10000
1745 do_it cs, t
1746 movcs r0, r0, lsr #16
1747 subcs r1, r1, #16
1748 cmp r0, #0x100
1749 do_it cs, t
1750 movcs r0, r0, lsr #8
1751 subcs r1, r1, #8
1752 cmp r0, #0x10
1753 do_it cs, t
1754 movcs r0, r0, lsr #4
1755 subcs r1, r1, #4
1756 adr r2, 1f
1757 ldrb r0, [r2, r0]
1758 add r0, r0, r1
1759 RET
1760 .align 2
1761 1:
1762 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1763 # endif /* !HAVE_ARM_CLZ */
1764 FUNC_END clzsi2
1765 #endif
1766 #endif /* L_clzsi2 */
1767
1768 #ifdef L_clzdi2
1769 #if !defined(HAVE_ARM_CLZ)
1770
1771 # ifdef NOT_ISA_TARGET_32BIT
1772 FUNC_START clzdi2
1773 push {r4, lr}
1774 # else
1775 ARM_FUNC_START clzdi2
1776 do_push {r4, lr}
1777 # endif
1778 cmp xxh, #0
1779 bne 1f
1780 # ifdef __ARMEB__
1781 mov r0, xxl
1782 bl __clzsi2
1783 add r0, r0, #32
1784 b 2f
1785 1:
1786 bl __clzsi2
1787 # else
1788 bl __clzsi2
1789 add r0, r0, #32
1790 b 2f
1791 1:
1792 mov r0, xxh
1793 bl __clzsi2
1794 # endif
1795 2:
1796 # ifdef NOT_ISA_TARGET_32BIT
1797 pop {r4, pc}
1798 # else
1799 RETLDM r4
1800 # endif
1801 FUNC_END clzdi2
1802
1803 #else /* HAVE_ARM_CLZ */
1804
1805 ARM_FUNC_START clzdi2
1806 cmp xxh, #0
1807 do_it eq, et
1808 clzeq r0, xxl
1809 clzne r0, xxh
1810 addeq r0, r0, #32
1811 RET
1812 FUNC_END clzdi2
1813
1814 #endif
1815 #endif /* L_clzdi2 */
1816
1817 #ifdef L_ctzsi2
1818 #ifdef NOT_ISA_TARGET_32BIT
1819 FUNC_START ctzsi2
1820 neg r1, r0
1821 and r0, r0, r1
1822 mov r1, #28
1823 mov r3, #1
1824 lsl r3, r3, #16
1825 cmp r0, r3 /* 0x10000 */
1826 bcc 2f
1827 lsr r0, r0, #16
1828 sub r1, r1, #16
1829 2: lsr r3, r3, #8
1830 cmp r0, r3 /* #0x100 */
1831 bcc 2f
1832 lsr r0, r0, #8
1833 sub r1, r1, #8
1834 2: lsr r3, r3, #4
1835 cmp r0, r3 /* #0x10 */
1836 bcc 2f
1837 lsr r0, r0, #4
1838 sub r1, r1, #4
1839 2: adr r2, 1f
1840 ldrb r0, [r2, r0]
1841 sub r0, r0, r1
1842 bx lr
1843 .align 2
1844 1:
1845 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1846 FUNC_END ctzsi2
1847 #else
1848 ARM_FUNC_START ctzsi2
1849 rsb r1, r0, #0
1850 and r0, r0, r1
1851 # if defined(HAVE_ARM_CLZ)
1852 clz r0, r0
1853 rsb r0, r0, #31
1854 RET
1855 # else
1856 mov r1, #28
1857 cmp r0, #0x10000
1858 do_it cs, t
1859 movcs r0, r0, lsr #16
1860 subcs r1, r1, #16
1861 cmp r0, #0x100
1862 do_it cs, t
1863 movcs r0, r0, lsr #8
1864 subcs r1, r1, #8
1865 cmp r0, #0x10
1866 do_it cs, t
1867 movcs r0, r0, lsr #4
1868 subcs r1, r1, #4
1869 adr r2, 1f
1870 ldrb r0, [r2, r0]
1871 sub r0, r0, r1
1872 RET
1873 .align 2
1874 1:
1875 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1876 # endif /* !HAVE_ARM_CLZ */
1877 FUNC_END ctzsi2
1878 #endif
1879 #endif /* L_clzsi2 */
1880
1881 /* ------------------------------------------------------------------------ */
1882 /* These next two sections are here despite the fact that they contain Thumb
1883 assembler because their presence allows interworked code to be linked even
1884 when the GCC library is this one. */
1885
1886 /* Do not build the interworking functions when the target architecture does
1887 not support Thumb instructions. (This can be a multilib option). */
1888 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1889 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1890 || __ARM_ARCH__ >= 6
1891
1892 #if defined L_call_via_rX
1893
1894 /* These labels & instructions are used by the Arm/Thumb interworking code.
1895 The address of function to be called is loaded into a register and then
1896 one of these labels is called via a BL instruction. This puts the
1897 return address into the link register with the bottom bit set, and the
1898 code here switches to the correct mode before executing the function. */
1899
1900 .text
1901 .align 0
1902 .force_thumb
1903
1904 .macro call_via register
1905 THUMB_FUNC_START _call_via_\register
1906
1907 bx \register
1908 nop
1909
1910 SIZE (_call_via_\register)
1911 .endm
1912
1913 call_via r0
1914 call_via r1
1915 call_via r2
1916 call_via r3
1917 call_via r4
1918 call_via r5
1919 call_via r6
1920 call_via r7
1921 call_via r8
1922 call_via r9
1923 call_via sl
1924 call_via fp
1925 call_via ip
1926 call_via sp
1927 call_via lr
1928
1929 #endif /* L_call_via_rX */
1930
1931 /* Don't bother with the old interworking routines for Thumb-2. */
1932 /* ??? Maybe only omit these on "m" variants. */
1933 #if !defined(__thumb2__) && __ARM_ARCH_ISA_ARM
1934
1935 #if defined L_interwork_call_via_rX
1936
1937 /* These labels & instructions are used by the Arm/Thumb interworking code,
1938 when the target address is in an unknown instruction set. The address
1939 of function to be called is loaded into a register and then one of these
1940 labels is called via a BL instruction. This puts the return address
1941 into the link register with the bottom bit set, and the code here
1942 switches to the correct mode before executing the function. Unfortunately
1943 the target code cannot be relied upon to return via a BX instruction, so
1944 instead we have to store the resturn address on the stack and allow the
1945 called function to return here instead. Upon return we recover the real
1946 return address and use a BX to get back to Thumb mode.
1947
1948 There are three variations of this code. The first,
1949 _interwork_call_via_rN(), will push the return address onto the
1950 stack and pop it in _arm_return(). It should only be used if all
1951 arguments are passed in registers.
1952
1953 The second, _interwork_r7_call_via_rN(), instead stores the return
1954 address at [r7, #-4]. It is the caller's responsibility to ensure
1955 that this address is valid and contains no useful data.
1956
1957 The third, _interwork_r11_call_via_rN(), works in the same way but
1958 uses r11 instead of r7. It is useful if the caller does not really
1959 need a frame pointer. */
1960
1961 .text
1962 .align 0
1963
1964 .code 32
1965 .globl _arm_return
1966 LSYM(Lstart_arm_return):
1967 cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1968 cfi_push 0, 0xe, -0x8, 0x8
1969 nop @ This nop is for the benefit of debuggers, so that
1970 @ backtraces will use the correct unwind information.
1971 _arm_return:
1972 RETLDM unwind=LSYM(Lstart_arm_return)
1973 cfi_end LSYM(Lend_arm_return)
1974
1975 .globl _arm_return_r7
1976 _arm_return_r7:
1977 ldr lr, [r7, #-4]
1978 bx lr
1979
1980 .globl _arm_return_r11
1981 _arm_return_r11:
1982 ldr lr, [r11, #-4]
1983 bx lr
1984
1985 .macro interwork_with_frame frame, register, name, return
1986 .code 16
1987
1988 THUMB_FUNC_START \name
1989
1990 bx pc
1991 nop
1992
1993 .code 32
1994 tst \register, #1
1995 streq lr, [\frame, #-4]
1996 adreq lr, _arm_return_\frame
1997 bx \register
1998
1999 SIZE (\name)
2000 .endm
2001
2002 .macro interwork register
2003 .code 16
2004
2005 THUMB_FUNC_START _interwork_call_via_\register
2006
2007 bx pc
2008 nop
2009
2010 .code 32
2011 .globl LSYM(Lchange_\register)
2012 LSYM(Lchange_\register):
2013 tst \register, #1
2014 streq lr, [sp, #-8]!
2015 adreq lr, _arm_return
2016 bx \register
2017
2018 SIZE (_interwork_call_via_\register)
2019
2020 interwork_with_frame r7,\register,_interwork_r7_call_via_\register
2021 interwork_with_frame r11,\register,_interwork_r11_call_via_\register
2022 .endm
2023
2024 interwork r0
2025 interwork r1
2026 interwork r2
2027 interwork r3
2028 interwork r4
2029 interwork r5
2030 interwork r6
2031 interwork r7
2032 interwork r8
2033 interwork r9
2034 interwork sl
2035 interwork fp
2036 interwork ip
2037 interwork sp
2038
2039 /* The LR case has to be handled a little differently... */
2040 .code 16
2041
2042 THUMB_FUNC_START _interwork_call_via_lr
2043
2044 bx pc
2045 nop
2046
2047 .code 32
2048 .globl .Lchange_lr
2049 .Lchange_lr:
2050 tst lr, #1
2051 stmeqdb r13!, {lr, pc}
2052 mov ip, lr
2053 adreq lr, _arm_return
2054 bx ip
2055
2056 SIZE (_interwork_call_via_lr)
2057
2058 #endif /* L_interwork_call_via_rX */
2059 #endif /* !__thumb2__ */
2060
2061 /* Functions to support compact pic switch tables in thumb1 state.
2062 All these routines take an index into the table in r0. The
2063 table is at LR & ~1 (but this must be rounded up in the case
2064 of 32-bit entires). They are only permitted to clobber r12
2065 and r14 and r0 must be preserved on exit. */
2066 #ifdef L_thumb1_case_sqi
2067
2068 .text
2069 .align 0
2070 .force_thumb
2071 .syntax unified
2072 THUMB_FUNC_START __gnu_thumb1_case_sqi
2073 push {r1}
2074 mov r1, lr
2075 lsrs r1, r1, #1
2076 lsls r1, r1, #1
2077 ldrsb r1, [r1, r0]
2078 lsls r1, r1, #1
2079 add lr, lr, r1
2080 pop {r1}
2081 bx lr
2082 SIZE (__gnu_thumb1_case_sqi)
2083 #endif
2084
2085 #ifdef L_thumb1_case_uqi
2086
2087 .text
2088 .align 0
2089 .force_thumb
2090 .syntax unified
2091 THUMB_FUNC_START __gnu_thumb1_case_uqi
2092 push {r1}
2093 mov r1, lr
2094 lsrs r1, r1, #1
2095 lsls r1, r1, #1
2096 ldrb r1, [r1, r0]
2097 lsls r1, r1, #1
2098 add lr, lr, r1
2099 pop {r1}
2100 bx lr
2101 SIZE (__gnu_thumb1_case_uqi)
2102 #endif
2103
2104 #ifdef L_thumb1_case_shi
2105
2106 .text
2107 .align 0
2108 .force_thumb
2109 .syntax unified
2110 THUMB_FUNC_START __gnu_thumb1_case_shi
2111 push {r0, r1}
2112 mov r1, lr
2113 lsrs r1, r1, #1
2114 lsls r0, r0, #1
2115 lsls r1, r1, #1
2116 ldrsh r1, [r1, r0]
2117 lsls r1, r1, #1
2118 add lr, lr, r1
2119 pop {r0, r1}
2120 bx lr
2121 SIZE (__gnu_thumb1_case_shi)
2122 #endif
2123
2124 #ifdef L_thumb1_case_uhi
2125
2126 .text
2127 .align 0
2128 .force_thumb
2129 .syntax unified
2130 THUMB_FUNC_START __gnu_thumb1_case_uhi
2131 push {r0, r1}
2132 mov r1, lr
2133 lsrs r1, r1, #1
2134 lsls r0, r0, #1
2135 lsls r1, r1, #1
2136 ldrh r1, [r1, r0]
2137 lsls r1, r1, #1
2138 add lr, lr, r1
2139 pop {r0, r1}
2140 bx lr
2141 SIZE (__gnu_thumb1_case_uhi)
2142 #endif
2143
2144 #ifdef L_thumb1_case_si
2145
2146 .text
2147 .align 0
2148 .force_thumb
2149 .syntax unified
2150 THUMB_FUNC_START __gnu_thumb1_case_si
2151 push {r0, r1}
2152 mov r1, lr
2153 adds.n r1, r1, #2 /* Align to word. */
2154 lsrs r1, r1, #2
2155 lsls r0, r0, #2
2156 lsls r1, r1, #2
2157 ldr r0, [r1, r0]
2158 adds r0, r0, r1
2159 mov lr, r0
2160 pop {r0, r1}
2161 mov pc, lr /* We know we were called from thumb code. */
2162 SIZE (__gnu_thumb1_case_si)
2163 #endif
2164
2165 #endif /* Arch supports thumb. */
2166
2167 .macro CFI_START_FUNCTION
2168 .cfi_startproc
2169 .cfi_remember_state
2170 .endm
2171
2172 .macro CFI_END_FUNCTION
2173 .cfi_restore_state
2174 .cfi_endproc
2175 .endm
2176
2177 #ifndef __symbian__
2178 /* The condition here must match the one in gcc/config/arm/elf.h. */
2179 #ifndef NOT_ISA_TARGET_32BIT
2180 #include "ieee754-df.S"
2181 #include "ieee754-sf.S"
2182 #include "bpabi.S"
2183 #else /* NOT_ISA_TARGET_32BIT */
2184 #include "bpabi-v6m.S"
2185 #endif /* NOT_ISA_TARGET_32BIT */
2186 #endif /* !__symbian__ */
2187