lib1funcs.S revision 1.1 1 @ libgcc routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4 /* Copyright (C) 1995-2013 Free Software Foundation, Inc.
5
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25 /* An executable stack is *not* required for these functions. */
26 #if defined(__ELF__) && defined(__linux__)
27 .section .note.GNU-stack,"",%progbits
28 .previous
29 #endif /* __ELF__ and __linux__ */
30
31 #ifdef __ARM_EABI__
32 /* Some attributes that are common to all routines in this file. */
33 /* Tag_ABI_align_needed: This code does not require 8-byte
34 alignment from the caller. */
35 /* .eabi_attribute 24, 0 -- default setting. */
36 /* Tag_ABI_align_preserved: This code preserves 8-byte
37 alignment in any callee. */
38 .eabi_attribute 25, 1
39 #endif /* __ARM_EABI__ */
40 /* ------------------------------------------------------------------------ */
41
42 /* We need to know what prefix to add to function names. */
43
44 #ifndef __USER_LABEL_PREFIX__
45 #error __USER_LABEL_PREFIX__ not defined
46 #endif
47
48 /* ANSI concatenation macros. */
49
50 #define CONCAT1(a, b) CONCAT2(a, b)
51 #define CONCAT2(a, b) a ## b
52
53 /* Use the right prefix for global labels. */
54
55 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
56
57 #ifdef __ELF__
58 #ifdef __thumb__
59 #define __PLT__ /* Not supported in Thumb assembler (for now). */
60 #elif defined __vxworks && !defined __PIC__
61 #define __PLT__ /* Not supported by the kernel loader. */
62 #else
63 #define __PLT__ (PLT)
64 #endif
65 #define TYPE(x) .type SYM(x),function
66 #define SIZE(x) .size SYM(x), . - SYM(x)
67 #define LSYM(x) .x
68 #else
69 #define __PLT__
70 #define TYPE(x)
71 #define SIZE(x)
72 #define LSYM(x) x
73 #endif
74
75 /* Function end macros. Variants for interworking. */
76
77 #if defined(__ARM_ARCH_2__)
78 # define __ARM_ARCH__ 2
79 #endif
80
81 #if defined(__ARM_ARCH_3__)
82 # define __ARM_ARCH__ 3
83 #endif
84
85 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
86 || defined(__ARM_ARCH_4T__)
87 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
88 long multiply instructions. That includes v3M. */
89 # define __ARM_ARCH__ 4
90 #endif
91
92 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
93 || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
94 || defined(__ARM_ARCH_5TEJ__)
95 # define __ARM_ARCH__ 5
96 #endif
97
98 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
99 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
100 || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
101 || defined(__ARM_ARCH_6M__)
102 # define __ARM_ARCH__ 6
103 #endif
104
105 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
106 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
107 || defined(__ARM_ARCH_7EM__)
108 # define __ARM_ARCH__ 7
109 #endif
110
111 #if defined(__ARM_ARCH_8A__)
112 # define __ARM_ARCH__ 8
113 #endif
114
115 #ifndef __ARM_ARCH__
116 #error Unable to determine architecture.
117 #endif
118
119 /* There are times when we might prefer Thumb1 code even if ARM code is
120 permitted, for example, the code might be smaller, or there might be
121 interworking problems with switching to ARM state if interworking is
122 disabled. */
123 #if (defined(__thumb__) \
124 && !defined(__thumb2__) \
125 && (!defined(__THUMB_INTERWORK__) \
126 || defined (__OPTIMIZE_SIZE__) \
127 || defined(__ARM_ARCH_6M__)))
128 # define __prefer_thumb__
129 #endif
130
131 /* How to return from a function call depends on the architecture variant. */
132
133 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
134
135 # define RET bx lr
136 # define RETc(x) bx##x lr
137
138 /* Special precautions for interworking on armv4t. */
139 # if (__ARM_ARCH__ == 4)
140
141 /* Always use bx, not ldr pc. */
142 # if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
143 # define __INTERWORKING__
144 # endif /* __THUMB__ || __THUMB_INTERWORK__ */
145
146 /* Include thumb stub before arm mode code. */
147 # if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
148 # define __INTERWORKING_STUBS__
149 # endif /* __thumb__ && !__THUMB_INTERWORK__ */
150
151 #endif /* __ARM_ARCH == 4 */
152
153 #else
154
155 # define RET mov pc, lr
156 # define RETc(x) mov##x pc, lr
157
158 #endif
159
160 .macro cfi_pop advance, reg, cfa_offset
161 #ifdef __ELF__
162 .pushsection .debug_frame
163 .byte 0x4 /* DW_CFA_advance_loc4 */
164 .4byte \advance
165 .byte (0xc0 | \reg) /* DW_CFA_restore */
166 .byte 0xe /* DW_CFA_def_cfa_offset */
167 .uleb128 \cfa_offset
168 .popsection
169 #endif
170 .endm
171 .macro cfi_push advance, reg, offset, cfa_offset
172 #ifdef __ELF__
173 .pushsection .debug_frame
174 .byte 0x4 /* DW_CFA_advance_loc4 */
175 .4byte \advance
176 .byte (0x80 | \reg) /* DW_CFA_offset */
177 .uleb128 (\offset / -4)
178 .byte 0xe /* DW_CFA_def_cfa_offset */
179 .uleb128 \cfa_offset
180 .popsection
181 #endif
182 .endm
183 .macro cfi_start start_label, end_label
184 #ifdef __ELF__
185 .pushsection .debug_frame
186 LSYM(Lstart_frame):
187 .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
188 LSYM(Lstart_cie):
189 .4byte 0xffffffff @ CIE Identifier Tag
190 .byte 0x1 @ CIE Version
191 .ascii "\0" @ CIE Augmentation
192 .uleb128 0x1 @ CIE Code Alignment Factor
193 .sleb128 -4 @ CIE Data Alignment Factor
194 .byte 0xe @ CIE RA Column
195 .byte 0xc @ DW_CFA_def_cfa
196 .uleb128 0xd
197 .uleb128 0x0
198
199 .align 2
200 LSYM(Lend_cie):
201 .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
202 LSYM(Lstart_fde):
203 .4byte LSYM(Lstart_frame) @ FDE CIE offset
204 .4byte \start_label @ FDE initial location
205 .4byte \end_label-\start_label @ FDE address range
206 .popsection
207 #endif
208 .endm
209 .macro cfi_end end_label
210 #ifdef __ELF__
211 .pushsection .debug_frame
212 .align 2
213 LSYM(Lend_fde):
214 .popsection
215 \end_label:
216 #endif
217 .endm
218
219 /* Don't pass dirn, it's there just to get token pasting right. */
220
221 .macro RETLDM regs=, cond=, unwind=, dirn=ia
222 #if defined (__INTERWORKING__)
223 .ifc "\regs",""
224 ldr\cond lr, [sp], #8
225 .else
226 # if defined(__thumb2__)
227 pop\cond {\regs, lr}
228 # else
229 ldm\cond\dirn sp!, {\regs, lr}
230 # endif
231 .endif
232 .ifnc "\unwind", ""
233 /* Mark LR as restored. */
234 97: cfi_pop 97b - \unwind, 0xe, 0x0
235 .endif
236 bx\cond lr
237 #else
238 /* Caller is responsible for providing IT instruction. */
239 .ifc "\regs",""
240 ldr\cond pc, [sp], #8
241 .else
242 # if defined(__thumb2__)
243 pop\cond {\regs, pc}
244 # else
245 ldm\cond\dirn sp!, {\regs, pc}
246 # endif
247 .endif
248 #endif
249 .endm
250
251 /* The Unified assembly syntax allows the same code to be assembled for both
252 ARM and Thumb-2. However this is only supported by recent gas, so define
253 a set of macros to allow ARM code on older assemblers. */
254 #if defined(__thumb2__)
255 .macro do_it cond, suffix=""
256 it\suffix \cond
257 .endm
258 .macro shift1 op, arg0, arg1, arg2
259 \op \arg0, \arg1, \arg2
260 .endm
261 #define do_push push
262 #define do_pop pop
263 #define COND(op1, op2, cond) op1 ## op2 ## cond
264 /* Perform an arithmetic operation with a variable shift operand. This
265 requires two instructions and a scratch register on Thumb-2. */
266 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
267 \shiftop \tmp, \src2, \shiftreg
268 \name \dest, \src1, \tmp
269 .endm
270 #else
271 .macro do_it cond, suffix=""
272 .endm
273 .macro shift1 op, arg0, arg1, arg2
274 mov \arg0, \arg1, \op \arg2
275 .endm
276 #define do_push stmfd sp!,
277 #define do_pop ldmfd sp!,
278 #define COND(op1, op2, cond) op1 ## cond ## op2
279 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
280 \name \dest, \src1, \src2, \shiftop \shiftreg
281 .endm
282 #endif
283
284 #ifdef __ARM_EABI__
285 .macro ARM_LDIV0 name signed
286 cmp r0, #0
287 .ifc \signed, unsigned
288 movne r0, #0xffffffff
289 .else
290 movgt r0, #0x7fffffff
291 movlt r0, #0x80000000
292 .endif
293 b SYM (__aeabi_idiv0) __PLT__
294 .endm
295 #else
296 .macro ARM_LDIV0 name signed
297 str lr, [sp, #-8]!
298 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8
299 bl SYM (__div0) __PLT__
300 mov r0, #0 @ About as wrong as it could be.
301 RETLDM unwind=98b
302 .endm
303 #endif
304
305
306 #ifdef __ARM_EABI__
307 .macro THUMB_LDIV0 name signed
308 #if defined(__ARM_ARCH_6M__)
309 .ifc \signed, unsigned
310 cmp r0, #0
311 beq 1f
312 mov r0, #0
313 mvn r0, r0 @ 0xffffffff
314 1:
315 .else
316 cmp r0, #0
317 beq 2f
318 blt 3f
319 mov r0, #0
320 mvn r0, r0
321 lsr r0, r0, #1 @ 0x7fffffff
322 b 2f
323 3: mov r0, #0x80
324 lsl r0, r0, #24 @ 0x80000000
325 2:
326 .endif
327 push {r0, r1, r2}
328 ldr r0, 4f
329 adr r1, 4f
330 add r0, r1
331 str r0, [sp, #8]
332 @ We know we are not on armv4t, so pop pc is safe.
333 pop {r0, r1, pc}
334 .align 2
335 4:
336 .word __aeabi_idiv0 - 4b
337 #elif defined(__thumb2__)
338 .syntax unified
339 .ifc \signed, unsigned
340 cbz r0, 1f
341 mov r0, #0xffffffff
342 1:
343 .else
344 cmp r0, #0
345 do_it gt
346 movgt r0, #0x7fffffff
347 do_it lt
348 movlt r0, #0x80000000
349 .endif
350 b.w SYM(__aeabi_idiv0) __PLT__
351 #else
352 .align 2
353 bx pc
354 nop
355 .arm
356 cmp r0, #0
357 .ifc \signed, unsigned
358 movne r0, #0xffffffff
359 .else
360 movgt r0, #0x7fffffff
361 movlt r0, #0x80000000
362 .endif
363 b SYM(__aeabi_idiv0) __PLT__
364 .thumb
365 #endif
366 .endm
367 #else
368 .macro THUMB_LDIV0 name signed
369 push { r1, lr }
370 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8
371 bl SYM (__div0)
372 mov r0, #0 @ About as wrong as it could be.
373 #if defined (__INTERWORKING__)
374 pop { r1, r2 }
375 bx r2
376 #else
377 pop { r1, pc }
378 #endif
379 .endm
380 #endif
381
382 .macro FUNC_END name
383 SIZE (__\name)
384 .endm
385
386 .macro DIV_FUNC_END name signed
387 cfi_start __\name, LSYM(Lend_div0)
388 LSYM(Ldiv0):
389 #ifdef __thumb__
390 THUMB_LDIV0 \name \signed
391 #else
392 ARM_LDIV0 \name \signed
393 #endif
394 cfi_end LSYM(Lend_div0)
395 FUNC_END \name
396 .endm
397
398 .macro THUMB_FUNC_START name
399 .globl SYM (\name)
400 TYPE (\name)
401 .thumb_func
402 SYM (\name):
403 .endm
404
405 /* Function start macros. Variants for ARM and Thumb. */
406
407 #ifdef __thumb__
408 #define THUMB_FUNC .thumb_func
409 #define THUMB_CODE .force_thumb
410 # if defined(__thumb2__)
411 #define THUMB_SYNTAX .syntax divided
412 # else
413 #define THUMB_SYNTAX
414 # endif
415 #else
416 #define THUMB_FUNC
417 #define THUMB_CODE
418 #define THUMB_SYNTAX
419 #endif
420
421 .macro FUNC_START name
422 .text
423 .globl SYM (__\name)
424 TYPE (__\name)
425 .align 0
426 THUMB_CODE
427 THUMB_FUNC
428 THUMB_SYNTAX
429 SYM (__\name):
430 .endm
431
432 /* Special function that will always be coded in ARM assembly, even if
433 in Thumb-only compilation. */
434
435 #if defined(__thumb2__)
436
437 /* For Thumb-2 we build everything in thumb mode. */
438 .macro ARM_FUNC_START name
439 FUNC_START \name
440 .syntax unified
441 .endm
442 #define EQUIV .thumb_set
443 .macro ARM_CALL name
444 bl __\name
445 .endm
446
447 #elif defined(__INTERWORKING_STUBS__)
448
449 .macro ARM_FUNC_START name
450 FUNC_START \name
451 bx pc
452 nop
453 .arm
454 /* A hook to tell gdb that we've switched to ARM mode. Also used to call
455 directly from other local arm routines. */
456 _L__\name:
457 .endm
458 #define EQUIV .thumb_set
459 /* Branch directly to a function declared with ARM_FUNC_START.
460 Must be called in arm mode. */
461 .macro ARM_CALL name
462 bl _L__\name
463 .endm
464
465 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
466
467 #ifdef __ARM_ARCH_6M__
468 #define EQUIV .thumb_set
469 #else
470 .macro ARM_FUNC_START name
471 .text
472 .globl SYM (__\name)
473 TYPE (__\name)
474 .align 0
475 .arm
476 SYM (__\name):
477 .endm
478 #define EQUIV .set
479 .macro ARM_CALL name
480 bl __\name
481 .endm
482 #endif
483
484 #endif
485
486 .macro FUNC_ALIAS new old
487 .globl SYM (__\new)
488 #if defined (__thumb__)
489 .thumb_set SYM (__\new), SYM (__\old)
490 #else
491 .set SYM (__\new), SYM (__\old)
492 #endif
493 .endm
494
495 #ifndef __ARM_ARCH_6M__
496 .macro ARM_FUNC_ALIAS new old
497 .globl SYM (__\new)
498 EQUIV SYM (__\new), SYM (__\old)
499 #if defined(__INTERWORKING_STUBS__)
500 .set SYM (_L__\new), SYM (_L__\old)
501 #endif
502 .endm
503 #endif
504
505 #ifdef __ARMEB__
506 #define xxh r0
507 #define xxl r1
508 #define yyh r2
509 #define yyl r3
510 #else
511 #define xxh r1
512 #define xxl r0
513 #define yyh r3
514 #define yyl r2
515 #endif
516
517 #ifdef __ARM_EABI__
518 .macro WEAK name
519 .weak SYM (__\name)
520 .endm
521 #endif
522
523 #ifdef __thumb__
524 /* Register aliases. */
525
526 work .req r4 @ XXXX is this safe ?
527 dividend .req r0
528 divisor .req r1
529 overdone .req r2
530 result .req r2
531 curbit .req r3
532 #endif
533 #if 0
534 ip .req r12
535 sp .req r13
536 lr .req r14
537 pc .req r15
538 #endif
539
540 /* ------------------------------------------------------------------------ */
541 /* Bodies of the division and modulo routines. */
542 /* ------------------------------------------------------------------------ */
543 .macro ARM_DIV_BODY dividend, divisor, result, curbit
544
545 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
546
547 #if defined (__thumb2__)
548 clz \curbit, \dividend
549 clz \result, \divisor
550 sub \curbit, \result, \curbit
551 rsb \curbit, \curbit, #31
552 adr \result, 1f
553 add \curbit, \result, \curbit, lsl #4
554 mov \result, #0
555 mov pc, \curbit
556 .p2align 3
557 1:
558 .set shift, 32
559 .rept 32
560 .set shift, shift - 1
561 cmp.w \dividend, \divisor, lsl #shift
562 nop.n
563 adc.w \result, \result, \result
564 it cs
565 subcs.w \dividend, \dividend, \divisor, lsl #shift
566 .endr
567 #else
568 clz \curbit, \dividend
569 clz \result, \divisor
570 sub \curbit, \result, \curbit
571 rsbs \curbit, \curbit, #31
572 addne \curbit, \curbit, \curbit, lsl #1
573 mov \result, #0
574 addne pc, pc, \curbit, lsl #2
575 nop
576 .set shift, 32
577 .rept 32
578 .set shift, shift - 1
579 cmp \dividend, \divisor, lsl #shift
580 adc \result, \result, \result
581 subcs \dividend, \dividend, \divisor, lsl #shift
582 .endr
583 #endif
584
585 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
586 #if __ARM_ARCH__ >= 5
587
588 clz \curbit, \divisor
589 clz \result, \dividend
590 sub \result, \curbit, \result
591 mov \curbit, #1
592 mov \divisor, \divisor, lsl \result
593 mov \curbit, \curbit, lsl \result
594 mov \result, #0
595
596 #else /* __ARM_ARCH__ < 5 */
597
598 @ Initially shift the divisor left 3 bits if possible,
599 @ set curbit accordingly. This allows for curbit to be located
600 @ at the left end of each 4-bit nibbles in the division loop
601 @ to save one loop in most cases.
602 tst \divisor, #0xe0000000
603 moveq \divisor, \divisor, lsl #3
604 moveq \curbit, #8
605 movne \curbit, #1
606
607 @ Unless the divisor is very big, shift it up in multiples of
608 @ four bits, since this is the amount of unwinding in the main
609 @ division loop. Continue shifting until the divisor is
610 @ larger than the dividend.
611 1: cmp \divisor, #0x10000000
612 cmplo \divisor, \dividend
613 movlo \divisor, \divisor, lsl #4
614 movlo \curbit, \curbit, lsl #4
615 blo 1b
616
617 @ For very big divisors, we must shift it a bit at a time, or
618 @ we will be in danger of overflowing.
619 1: cmp \divisor, #0x80000000
620 cmplo \divisor, \dividend
621 movlo \divisor, \divisor, lsl #1
622 movlo \curbit, \curbit, lsl #1
623 blo 1b
624
625 mov \result, #0
626
627 #endif /* __ARM_ARCH__ < 5 */
628
629 @ Division loop
630 1: cmp \dividend, \divisor
631 do_it hs, t
632 subhs \dividend, \dividend, \divisor
633 orrhs \result, \result, \curbit
634 cmp \dividend, \divisor, lsr #1
635 do_it hs, t
636 subhs \dividend, \dividend, \divisor, lsr #1
637 orrhs \result, \result, \curbit, lsr #1
638 cmp \dividend, \divisor, lsr #2
639 do_it hs, t
640 subhs \dividend, \dividend, \divisor, lsr #2
641 orrhs \result, \result, \curbit, lsr #2
642 cmp \dividend, \divisor, lsr #3
643 do_it hs, t
644 subhs \dividend, \dividend, \divisor, lsr #3
645 orrhs \result, \result, \curbit, lsr #3
646 cmp \dividend, #0 @ Early termination?
647 do_it ne, t
648 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
649 movne \divisor, \divisor, lsr #4
650 bne 1b
651
652 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
653
654 .endm
655 /* ------------------------------------------------------------------------ */
656 .macro ARM_DIV2_ORDER divisor, order
657
658 #if __ARM_ARCH__ >= 5
659
660 clz \order, \divisor
661 rsb \order, \order, #31
662
663 #else
664
665 cmp \divisor, #(1 << 16)
666 movhs \divisor, \divisor, lsr #16
667 movhs \order, #16
668 movlo \order, #0
669
670 cmp \divisor, #(1 << 8)
671 movhs \divisor, \divisor, lsr #8
672 addhs \order, \order, #8
673
674 cmp \divisor, #(1 << 4)
675 movhs \divisor, \divisor, lsr #4
676 addhs \order, \order, #4
677
678 cmp \divisor, #(1 << 2)
679 addhi \order, \order, #3
680 addls \order, \order, \divisor, lsr #1
681
682 #endif
683
684 .endm
685 /* ------------------------------------------------------------------------ */
686 .macro ARM_MOD_BODY dividend, divisor, order, spare
687
688 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
689
690 clz \order, \divisor
691 clz \spare, \dividend
692 sub \order, \order, \spare
693 rsbs \order, \order, #31
694 addne pc, pc, \order, lsl #3
695 nop
696 .set shift, 32
697 .rept 32
698 .set shift, shift - 1
699 cmp \dividend, \divisor, lsl #shift
700 subcs \dividend, \dividend, \divisor, lsl #shift
701 .endr
702
703 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
704 #if __ARM_ARCH__ >= 5
705
706 clz \order, \divisor
707 clz \spare, \dividend
708 sub \order, \order, \spare
709 mov \divisor, \divisor, lsl \order
710
711 #else /* __ARM_ARCH__ < 5 */
712
713 mov \order, #0
714
715 @ Unless the divisor is very big, shift it up in multiples of
716 @ four bits, since this is the amount of unwinding in the main
717 @ division loop. Continue shifting until the divisor is
718 @ larger than the dividend.
719 1: cmp \divisor, #0x10000000
720 cmplo \divisor, \dividend
721 movlo \divisor, \divisor, lsl #4
722 addlo \order, \order, #4
723 blo 1b
724
725 @ For very big divisors, we must shift it a bit at a time, or
726 @ we will be in danger of overflowing.
727 1: cmp \divisor, #0x80000000
728 cmplo \divisor, \dividend
729 movlo \divisor, \divisor, lsl #1
730 addlo \order, \order, #1
731 blo 1b
732
733 #endif /* __ARM_ARCH__ < 5 */
734
735 @ Perform all needed substractions to keep only the reminder.
736 @ Do comparisons in batch of 4 first.
737 subs \order, \order, #3 @ yes, 3 is intended here
738 blt 2f
739
740 1: cmp \dividend, \divisor
741 subhs \dividend, \dividend, \divisor
742 cmp \dividend, \divisor, lsr #1
743 subhs \dividend, \dividend, \divisor, lsr #1
744 cmp \dividend, \divisor, lsr #2
745 subhs \dividend, \dividend, \divisor, lsr #2
746 cmp \dividend, \divisor, lsr #3
747 subhs \dividend, \dividend, \divisor, lsr #3
748 cmp \dividend, #1
749 mov \divisor, \divisor, lsr #4
750 subges \order, \order, #4
751 bge 1b
752
753 tst \order, #3
754 teqne \dividend, #0
755 beq 5f
756
757 @ Either 1, 2 or 3 comparison/substractions are left.
758 2: cmn \order, #2
759 blt 4f
760 beq 3f
761 cmp \dividend, \divisor
762 subhs \dividend, \dividend, \divisor
763 mov \divisor, \divisor, lsr #1
764 3: cmp \dividend, \divisor
765 subhs \dividend, \dividend, \divisor
766 mov \divisor, \divisor, lsr #1
767 4: cmp \dividend, \divisor
768 subhs \dividend, \dividend, \divisor
769 5:
770
771 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
772
773 .endm
774 /* ------------------------------------------------------------------------ */
775 .macro THUMB_DIV_MOD_BODY modulo
776 @ Load the constant 0x10000000 into our work register.
777 mov work, #1
778 lsl work, #28
779 LSYM(Loop1):
780 @ Unless the divisor is very big, shift it up in multiples of
781 @ four bits, since this is the amount of unwinding in the main
782 @ division loop. Continue shifting until the divisor is
783 @ larger than the dividend.
784 cmp divisor, work
785 bhs LSYM(Lbignum)
786 cmp divisor, dividend
787 bhs LSYM(Lbignum)
788 lsl divisor, #4
789 lsl curbit, #4
790 b LSYM(Loop1)
791 LSYM(Lbignum):
792 @ Set work to 0x80000000
793 lsl work, #3
794 LSYM(Loop2):
795 @ For very big divisors, we must shift it a bit at a time, or
796 @ we will be in danger of overflowing.
797 cmp divisor, work
798 bhs LSYM(Loop3)
799 cmp divisor, dividend
800 bhs LSYM(Loop3)
801 lsl divisor, #1
802 lsl curbit, #1
803 b LSYM(Loop2)
804 LSYM(Loop3):
805 @ Test for possible subtractions ...
806 .if \modulo
807 @ ... On the final pass, this may subtract too much from the dividend,
808 @ so keep track of which subtractions are done, we can fix them up
809 @ afterwards.
810 mov overdone, #0
811 cmp dividend, divisor
812 blo LSYM(Lover1)
813 sub dividend, dividend, divisor
814 LSYM(Lover1):
815 lsr work, divisor, #1
816 cmp dividend, work
817 blo LSYM(Lover2)
818 sub dividend, dividend, work
819 mov ip, curbit
820 mov work, #1
821 ror curbit, work
822 orr overdone, curbit
823 mov curbit, ip
824 LSYM(Lover2):
825 lsr work, divisor, #2
826 cmp dividend, work
827 blo LSYM(Lover3)
828 sub dividend, dividend, work
829 mov ip, curbit
830 mov work, #2
831 ror curbit, work
832 orr overdone, curbit
833 mov curbit, ip
834 LSYM(Lover3):
835 lsr work, divisor, #3
836 cmp dividend, work
837 blo LSYM(Lover4)
838 sub dividend, dividend, work
839 mov ip, curbit
840 mov work, #3
841 ror curbit, work
842 orr overdone, curbit
843 mov curbit, ip
844 LSYM(Lover4):
845 mov ip, curbit
846 .else
847 @ ... and note which bits are done in the result. On the final pass,
848 @ this may subtract too much from the dividend, but the result will be ok,
849 @ since the "bit" will have been shifted out at the bottom.
850 cmp dividend, divisor
851 blo LSYM(Lover1)
852 sub dividend, dividend, divisor
853 orr result, result, curbit
854 LSYM(Lover1):
855 lsr work, divisor, #1
856 cmp dividend, work
857 blo LSYM(Lover2)
858 sub dividend, dividend, work
859 lsr work, curbit, #1
860 orr result, work
861 LSYM(Lover2):
862 lsr work, divisor, #2
863 cmp dividend, work
864 blo LSYM(Lover3)
865 sub dividend, dividend, work
866 lsr work, curbit, #2
867 orr result, work
868 LSYM(Lover3):
869 lsr work, divisor, #3
870 cmp dividend, work
871 blo LSYM(Lover4)
872 sub dividend, dividend, work
873 lsr work, curbit, #3
874 orr result, work
875 LSYM(Lover4):
876 .endif
877
878 cmp dividend, #0 @ Early termination?
879 beq LSYM(Lover5)
880 lsr curbit, #4 @ No, any more bits to do?
881 beq LSYM(Lover5)
882 lsr divisor, #4
883 b LSYM(Loop3)
884 LSYM(Lover5):
885 .if \modulo
886 @ Any subtractions that we should not have done will be recorded in
887 @ the top three bits of "overdone". Exactly which were not needed
888 @ are governed by the position of the bit, stored in ip.
889 mov work, #0xe
890 lsl work, #28
891 and overdone, work
892 beq LSYM(Lgot_result)
893
894 @ If we terminated early, because dividend became zero, then the
895 @ bit in ip will not be in the bottom nibble, and we should not
896 @ perform the additions below. We must test for this though
897 @ (rather relying upon the TSTs to prevent the additions) since
898 @ the bit in ip could be in the top two bits which might then match
899 @ with one of the smaller RORs.
900 mov curbit, ip
901 mov work, #0x7
902 tst curbit, work
903 beq LSYM(Lgot_result)
904
905 mov curbit, ip
906 mov work, #3
907 ror curbit, work
908 tst overdone, curbit
909 beq LSYM(Lover6)
910 lsr work, divisor, #3
911 add dividend, work
912 LSYM(Lover6):
913 mov curbit, ip
914 mov work, #2
915 ror curbit, work
916 tst overdone, curbit
917 beq LSYM(Lover7)
918 lsr work, divisor, #2
919 add dividend, work
920 LSYM(Lover7):
921 mov curbit, ip
922 mov work, #1
923 ror curbit, work
924 tst overdone, curbit
925 beq LSYM(Lgot_result)
926 lsr work, divisor, #1
927 add dividend, work
928 .endif
929 LSYM(Lgot_result):
930 .endm
931 /* ------------------------------------------------------------------------ */
932 /* Start of the Real Functions */
933 /* ------------------------------------------------------------------------ */
934 #ifdef L_udivsi3
935
936 #if defined(__prefer_thumb__)
937
938 FUNC_START udivsi3
939 FUNC_ALIAS aeabi_uidiv udivsi3
940
941 cmp divisor, #0
942 beq LSYM(Ldiv0)
943 LSYM(udivsi3_skip_div0_test):
944 mov curbit, #1
945 mov result, #0
946
947 push { work }
948 cmp dividend, divisor
949 blo LSYM(Lgot_result)
950
951 THUMB_DIV_MOD_BODY 0
952
953 mov r0, result
954 pop { work }
955 RET
956
957 #elif defined(__ARM_ARCH_EXT_IDIV__)
958
959 ARM_FUNC_START udivsi3
960 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
961
962 cmp r1, #0
963 beq LSYM(Ldiv0)
964
965 udiv r0, r0, r1
966 RET
967
968 #else /* ARM version/Thumb-2. */
969
970 ARM_FUNC_START udivsi3
971 ARM_FUNC_ALIAS aeabi_uidiv udivsi3
972
973 /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
974 check for division-by-zero a second time. */
975 LSYM(udivsi3_skip_div0_test):
976 subs r2, r1, #1
977 do_it eq
978 RETc(eq)
979 bcc LSYM(Ldiv0)
980 cmp r0, r1
981 bls 11f
982 tst r1, r2
983 beq 12f
984
985 ARM_DIV_BODY r0, r1, r2, r3
986
987 mov r0, r2
988 RET
989
990 11: do_it eq, e
991 moveq r0, #1
992 movne r0, #0
993 RET
994
995 12: ARM_DIV2_ORDER r1, r2
996
997 mov r0, r0, lsr r2
998 RET
999
1000 #endif /* ARM version */
1001
1002 DIV_FUNC_END udivsi3 unsigned
1003
1004 #if defined(__prefer_thumb__)
1005 FUNC_START aeabi_uidivmod
1006 cmp r1, #0
1007 beq LSYM(Ldiv0)
1008 push {r0, r1, lr}
1009 bl LSYM(udivsi3_skip_div0_test)
1010 POP {r1, r2, r3}
1011 mul r2, r0
1012 sub r1, r1, r2
1013 bx r3
1014 #elif defined(__ARM_ARCH_EXT_IDIV__)
1015 ARM_FUNC_START aeabi_uidivmod
1016 cmp r1, #0
1017 beq LSYM(Ldiv0)
1018 mov r2, r0
1019 udiv r0, r0, r1
1020 mls r1, r0, r1, r2
1021 RET
1022 #else
1023 ARM_FUNC_START aeabi_uidivmod
1024 cmp r1, #0
1025 beq LSYM(Ldiv0)
1026 stmfd sp!, { r0, r1, lr }
1027 bl LSYM(udivsi3_skip_div0_test)
1028 ldmfd sp!, { r1, r2, lr }
1029 mul r3, r2, r0
1030 sub r1, r1, r3
1031 RET
1032 #endif
1033 FUNC_END aeabi_uidivmod
1034
1035 #endif /* L_udivsi3 */
1036 /* ------------------------------------------------------------------------ */
1037 #ifdef L_umodsi3
1038
1039 #ifdef __ARM_ARCH_EXT_IDIV__
1040
1041 ARM_FUNC_START umodsi3
1042
1043 cmp r1, #0
1044 beq LSYM(Ldiv0)
1045 udiv r2, r0, r1
1046 mls r0, r1, r2, r0
1047 RET
1048
1049 #elif defined(__thumb__)
1050
1051 FUNC_START umodsi3
1052
1053 cmp divisor, #0
1054 beq LSYM(Ldiv0)
1055 mov curbit, #1
1056 cmp dividend, divisor
1057 bhs LSYM(Lover10)
1058 RET
1059
1060 LSYM(Lover10):
1061 push { work }
1062
1063 THUMB_DIV_MOD_BODY 1
1064
1065 pop { work }
1066 RET
1067
1068 #else /* ARM version. */
1069
1070 FUNC_START umodsi3
1071
1072 subs r2, r1, #1 @ compare divisor with 1
1073 bcc LSYM(Ldiv0)
1074 cmpne r0, r1 @ compare dividend with divisor
1075 moveq r0, #0
1076 tsthi r1, r2 @ see if divisor is power of 2
1077 andeq r0, r0, r2
1078 RETc(ls)
1079
1080 ARM_MOD_BODY r0, r1, r2, r3
1081
1082 RET
1083
1084 #endif /* ARM version. */
1085
1086 DIV_FUNC_END umodsi3 unsigned
1087
1088 #endif /* L_umodsi3 */
1089 /* ------------------------------------------------------------------------ */
1090 #ifdef L_divsi3
1091
1092 #if defined(__prefer_thumb__)
1093
1094 FUNC_START divsi3
1095 FUNC_ALIAS aeabi_idiv divsi3
1096
1097 cmp divisor, #0
1098 beq LSYM(Ldiv0)
1099 LSYM(divsi3_skip_div0_test):
1100 push { work }
1101 mov work, dividend
1102 eor work, divisor @ Save the sign of the result.
1103 mov ip, work
1104 mov curbit, #1
1105 mov result, #0
1106 cmp divisor, #0
1107 bpl LSYM(Lover10)
1108 neg divisor, divisor @ Loops below use unsigned.
1109 LSYM(Lover10):
1110 cmp dividend, #0
1111 bpl LSYM(Lover11)
1112 neg dividend, dividend
1113 LSYM(Lover11):
1114 cmp dividend, divisor
1115 blo LSYM(Lgot_result)
1116
1117 THUMB_DIV_MOD_BODY 0
1118
1119 mov r0, result
1120 mov work, ip
1121 cmp work, #0
1122 bpl LSYM(Lover12)
1123 neg r0, r0
1124 LSYM(Lover12):
1125 pop { work }
1126 RET
1127
1128 #elif defined(__ARM_ARCH_EXT_IDIV__)
1129
1130 ARM_FUNC_START divsi3
1131 ARM_FUNC_ALIAS aeabi_idiv divsi3
1132
1133 cmp r1, #0
1134 beq LSYM(Ldiv0)
1135 sdiv r0, r0, r1
1136 RET
1137
1138 #else /* ARM/Thumb-2 version. */
1139
1140 ARM_FUNC_START divsi3
1141 ARM_FUNC_ALIAS aeabi_idiv divsi3
1142
1143 cmp r1, #0
1144 beq LSYM(Ldiv0)
1145 LSYM(divsi3_skip_div0_test):
1146 eor ip, r0, r1 @ save the sign of the result.
1147 do_it mi
1148 rsbmi r1, r1, #0 @ loops below use unsigned.
1149 subs r2, r1, #1 @ division by 1 or -1 ?
1150 beq 10f
1151 movs r3, r0
1152 do_it mi
1153 rsbmi r3, r0, #0 @ positive dividend value
1154 cmp r3, r1
1155 bls 11f
1156 tst r1, r2 @ divisor is power of 2 ?
1157 beq 12f
1158
1159 ARM_DIV_BODY r3, r1, r0, r2
1160
1161 cmp ip, #0
1162 do_it mi
1163 rsbmi r0, r0, #0
1164 RET
1165
1166 10: teq ip, r0 @ same sign ?
1167 do_it mi
1168 rsbmi r0, r0, #0
1169 RET
1170
1171 11: do_it lo
1172 movlo r0, #0
1173 do_it eq,t
1174 moveq r0, ip, asr #31
1175 orreq r0, r0, #1
1176 RET
1177
1178 12: ARM_DIV2_ORDER r1, r2
1179
1180 cmp ip, #0
1181 mov r0, r3, lsr r2
1182 do_it mi
1183 rsbmi r0, r0, #0
1184 RET
1185
1186 #endif /* ARM version */
1187
1188 DIV_FUNC_END divsi3 signed
1189
1190 #if defined(__prefer_thumb__)
1191 FUNC_START aeabi_idivmod
1192 cmp r1, #0
1193 beq LSYM(Ldiv0)
1194 push {r0, r1, lr}
1195 bl LSYM(divsi3_skip_div0_test)
1196 POP {r1, r2, r3}
1197 mul r2, r0
1198 sub r1, r1, r2
1199 bx r3
1200 #elif defined(__ARM_ARCH_EXT_IDIV__)
1201 ARM_FUNC_START aeabi_idivmod
1202 cmp r1, #0
1203 beq LSYM(Ldiv0)
1204 mov r2, r0
1205 sdiv r0, r0, r1
1206 mls r1, r0, r1, r2
1207 RET
1208 #else
1209 ARM_FUNC_START aeabi_idivmod
1210 cmp r1, #0
1211 beq LSYM(Ldiv0)
1212 stmfd sp!, { r0, r1, lr }
1213 bl LSYM(divsi3_skip_div0_test)
1214 ldmfd sp!, { r1, r2, lr }
1215 mul r3, r2, r0
1216 sub r1, r1, r3
1217 RET
1218 #endif
1219 FUNC_END aeabi_idivmod
1220
1221 #endif /* L_divsi3 */
1222 /* ------------------------------------------------------------------------ */
1223 #ifdef L_modsi3
1224
1225 #if defined(__ARM_ARCH_EXT_IDIV__)
1226
1227 ARM_FUNC_START modsi3
1228
1229 cmp r1, #0
1230 beq LSYM(Ldiv0)
1231
1232 sdiv r2, r0, r1
1233 mls r0, r1, r2, r0
1234 RET
1235
1236 #elif defined(__thumb__)
1237
1238 FUNC_START modsi3
1239
1240 mov curbit, #1
1241 cmp divisor, #0
1242 beq LSYM(Ldiv0)
1243 bpl LSYM(Lover10)
1244 neg divisor, divisor @ Loops below use unsigned.
1245 LSYM(Lover10):
1246 push { work }
1247 @ Need to save the sign of the dividend, unfortunately, we need
1248 @ work later on. Must do this after saving the original value of
1249 @ the work register, because we will pop this value off first.
1250 push { dividend }
1251 cmp dividend, #0
1252 bpl LSYM(Lover11)
1253 neg dividend, dividend
1254 LSYM(Lover11):
1255 cmp dividend, divisor
1256 blo LSYM(Lgot_result)
1257
1258 THUMB_DIV_MOD_BODY 1
1259
1260 pop { work }
1261 cmp work, #0
1262 bpl LSYM(Lover12)
1263 neg dividend, dividend
1264 LSYM(Lover12):
1265 pop { work }
1266 RET
1267
1268 #else /* ARM version. */
1269
1270 FUNC_START modsi3
1271
1272 cmp r1, #0
1273 beq LSYM(Ldiv0)
1274 rsbmi r1, r1, #0 @ loops below use unsigned.
1275 movs ip, r0 @ preserve sign of dividend
1276 rsbmi r0, r0, #0 @ if negative make positive
1277 subs r2, r1, #1 @ compare divisor with 1
1278 cmpne r0, r1 @ compare dividend with divisor
1279 moveq r0, #0
1280 tsthi r1, r2 @ see if divisor is power of 2
1281 andeq r0, r0, r2
1282 bls 10f
1283
1284 ARM_MOD_BODY r0, r1, r2, r3
1285
1286 10: cmp ip, #0
1287 rsbmi r0, r0, #0
1288 RET
1289
1290 #endif /* ARM version */
1291
1292 DIV_FUNC_END modsi3 signed
1293
1294 #endif /* L_modsi3 */
1295 /* ------------------------------------------------------------------------ */
1296 #ifdef L_dvmd_tls
1297
1298 #ifdef __ARM_EABI__
1299 WEAK aeabi_idiv0
1300 WEAK aeabi_ldiv0
1301 FUNC_START aeabi_idiv0
1302 FUNC_START aeabi_ldiv0
1303 RET
1304 FUNC_END aeabi_ldiv0
1305 FUNC_END aeabi_idiv0
1306 #else
1307 FUNC_START div0
1308 RET
1309 FUNC_END div0
1310 #endif
1311
1312 #endif /* L_divmodsi_tools */
1313 /* ------------------------------------------------------------------------ */
1314 #ifdef L_dvmd_lnx
1315 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
1316
1317 /* Constant taken from <asm/signal.h>. */
1318 #define SIGFPE 8
1319
1320 #ifdef __ARM_EABI__
1321 WEAK aeabi_idiv0
1322 WEAK aeabi_ldiv0
1323 ARM_FUNC_START aeabi_idiv0
1324 ARM_FUNC_START aeabi_ldiv0
1325 #else
1326 ARM_FUNC_START div0
1327 #endif
1328
1329 do_push {r1, lr}
1330 mov r0, #SIGFPE
1331 bl SYM(raise) __PLT__
1332 RETLDM r1
1333
1334 #ifdef __ARM_EABI__
1335 FUNC_END aeabi_ldiv0
1336 FUNC_END aeabi_idiv0
1337 #else
1338 FUNC_END div0
1339 #endif
1340
1341 #endif /* L_dvmd_lnx */
1342 #ifdef L_clear_cache
1343 #if defined __ARM_EABI__ && defined __linux__
1344 @ EABI GNU/Linux call to cacheflush syscall.
1345 ARM_FUNC_START clear_cache
1346 do_push {r7}
1347 #if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
1348 movw r7, #2
1349 movt r7, #0xf
1350 #else
1351 mov r7, #0xf0000
1352 add r7, r7, #2
1353 #endif
1354 mov r2, #0
1355 swi 0
1356 do_pop {r7}
1357 RET
1358 FUNC_END clear_cache
1359 #else
1360 #error "This is only for ARM EABI GNU/Linux"
1361 #endif
1362 #endif /* L_clear_cache */
1363 /* ------------------------------------------------------------------------ */
1364 /* Dword shift operations. */
1365 /* All the following Dword shift variants rely on the fact that
1366 shft xxx, Reg
1367 is in fact done as
1368 shft xxx, (Reg & 255)
1369 so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1370 case of logical shifts) or the sign (for asr). */
1371
1372 #ifdef __ARMEB__
1373 #define al r1
1374 #define ah r0
1375 #else
1376 #define al r0
1377 #define ah r1
1378 #endif
1379
1380 /* Prevent __aeabi double-word shifts from being produced on SymbianOS. */
1381 #ifndef __symbian__
1382
1383 #ifdef L_lshrdi3
1384
1385 FUNC_START lshrdi3
1386 FUNC_ALIAS aeabi_llsr lshrdi3
1387
1388 #ifdef __thumb__
1389 lsr al, r2
1390 mov r3, ah
1391 lsr ah, r2
1392 mov ip, r3
1393 sub r2, #32
1394 lsr r3, r2
1395 orr al, r3
1396 neg r2, r2
1397 mov r3, ip
1398 lsl r3, r2
1399 orr al, r3
1400 RET
1401 #else
1402 subs r3, r2, #32
1403 rsb ip, r2, #32
1404 movmi al, al, lsr r2
1405 movpl al, ah, lsr r3
1406 orrmi al, al, ah, lsl ip
1407 mov ah, ah, lsr r2
1408 RET
1409 #endif
1410 FUNC_END aeabi_llsr
1411 FUNC_END lshrdi3
1412
1413 #endif
1414
1415 #ifdef L_ashrdi3
1416
1417 FUNC_START ashrdi3
1418 FUNC_ALIAS aeabi_lasr ashrdi3
1419
1420 #ifdef __thumb__
1421 lsr al, r2
1422 mov r3, ah
1423 asr ah, r2
1424 sub r2, #32
1425 @ If r2 is negative at this point the following step would OR
1426 @ the sign bit into all of AL. That's not what we want...
1427 bmi 1f
1428 mov ip, r3
1429 asr r3, r2
1430 orr al, r3
1431 mov r3, ip
1432 1:
1433 neg r2, r2
1434 lsl r3, r2
1435 orr al, r3
1436 RET
1437 #else
1438 subs r3, r2, #32
1439 rsb ip, r2, #32
1440 movmi al, al, lsr r2
1441 movpl al, ah, asr r3
1442 orrmi al, al, ah, lsl ip
1443 mov ah, ah, asr r2
1444 RET
1445 #endif
1446
1447 FUNC_END aeabi_lasr
1448 FUNC_END ashrdi3
1449
1450 #endif
1451
1452 #ifdef L_ashldi3
1453
1454 FUNC_START ashldi3
1455 FUNC_ALIAS aeabi_llsl ashldi3
1456
1457 #ifdef __thumb__
1458 lsl ah, r2
1459 mov r3, al
1460 lsl al, r2
1461 mov ip, r3
1462 sub r2, #32
1463 lsl r3, r2
1464 orr ah, r3
1465 neg r2, r2
1466 mov r3, ip
1467 lsr r3, r2
1468 orr ah, r3
1469 RET
1470 #else
1471 subs r3, r2, #32
1472 rsb ip, r2, #32
1473 movmi ah, ah, lsl r2
1474 movpl ah, al, lsl r3
1475 orrmi ah, ah, al, lsr ip
1476 mov al, al, lsl r2
1477 RET
1478 #endif
1479 FUNC_END aeabi_llsl
1480 FUNC_END ashldi3
1481
1482 #endif
1483
1484 #endif /* __symbian__ */
1485
1486 #if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
1487 || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
1488 || defined(__ARM_ARCH_5TEJ__)
1489 #define HAVE_ARM_CLZ 1
1490 #endif
1491
1492 #ifdef L_clzsi2
1493 #if defined(__ARM_ARCH_6M__)
1494 FUNC_START clzsi2
1495 mov r1, #28
1496 mov r3, #1
1497 lsl r3, r3, #16
1498 cmp r0, r3 /* 0x10000 */
1499 bcc 2f
1500 lsr r0, r0, #16
1501 sub r1, r1, #16
1502 2: lsr r3, r3, #8
1503 cmp r0, r3 /* #0x100 */
1504 bcc 2f
1505 lsr r0, r0, #8
1506 sub r1, r1, #8
1507 2: lsr r3, r3, #4
1508 cmp r0, r3 /* #0x10 */
1509 bcc 2f
1510 lsr r0, r0, #4
1511 sub r1, r1, #4
1512 2: adr r2, 1f
1513 ldrb r0, [r2, r0]
1514 add r0, r0, r1
1515 bx lr
1516 .align 2
1517 1:
1518 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1519 FUNC_END clzsi2
1520 #else
1521 ARM_FUNC_START clzsi2
1522 # if defined(HAVE_ARM_CLZ)
1523 clz r0, r0
1524 RET
1525 # else
1526 mov r1, #28
1527 cmp r0, #0x10000
1528 do_it cs, t
1529 movcs r0, r0, lsr #16
1530 subcs r1, r1, #16
1531 cmp r0, #0x100
1532 do_it cs, t
1533 movcs r0, r0, lsr #8
1534 subcs r1, r1, #8
1535 cmp r0, #0x10
1536 do_it cs, t
1537 movcs r0, r0, lsr #4
1538 subcs r1, r1, #4
1539 adr r2, 1f
1540 ldrb r0, [r2, r0]
1541 add r0, r0, r1
1542 RET
1543 .align 2
1544 1:
1545 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1546 # endif /* !HAVE_ARM_CLZ */
1547 FUNC_END clzsi2
1548 #endif
1549 #endif /* L_clzsi2 */
1550
1551 #ifdef L_clzdi2
1552 #if !defined(HAVE_ARM_CLZ)
1553
1554 # if defined(__ARM_ARCH_6M__)
1555 FUNC_START clzdi2
1556 push {r4, lr}
1557 # else
1558 ARM_FUNC_START clzdi2
1559 do_push {r4, lr}
1560 # endif
1561 cmp xxh, #0
1562 bne 1f
1563 # ifdef __ARMEB__
1564 mov r0, xxl
1565 bl __clzsi2
1566 add r0, r0, #32
1567 b 2f
1568 1:
1569 bl __clzsi2
1570 # else
1571 bl __clzsi2
1572 add r0, r0, #32
1573 b 2f
1574 1:
1575 mov r0, xxh
1576 bl __clzsi2
1577 # endif
1578 2:
1579 # if defined(__ARM_ARCH_6M__)
1580 pop {r4, pc}
1581 # else
1582 RETLDM r4
1583 # endif
1584 FUNC_END clzdi2
1585
1586 #else /* HAVE_ARM_CLZ */
1587
1588 ARM_FUNC_START clzdi2
1589 cmp xxh, #0
1590 do_it eq, et
1591 clzeq r0, xxl
1592 clzne r0, xxh
1593 addeq r0, r0, #32
1594 RET
1595 FUNC_END clzdi2
1596
1597 #endif
1598 #endif /* L_clzdi2 */
1599
1600 #ifdef L_ctzsi2
1601 #if defined(__ARM_ARCH_6M__)
1602 FUNC_START ctzsi2
1603 neg r1, r0
1604 and r0, r0, r1
1605 mov r1, #28
1606 mov r3, #1
1607 lsl r3, r3, #16
1608 cmp r0, r3 /* 0x10000 */
1609 bcc 2f
1610 lsr r0, r0, #16
1611 sub r1, r1, #16
1612 2: lsr r3, r3, #8
1613 cmp r0, r3 /* #0x100 */
1614 bcc 2f
1615 lsr r0, r0, #8
1616 sub r1, r1, #8
1617 2: lsr r3, r3, #4
1618 cmp r0, r3 /* #0x10 */
1619 bcc 2f
1620 lsr r0, r0, #4
1621 sub r1, r1, #4
1622 2: adr r2, 1f
1623 ldrb r0, [r2, r0]
1624 sub r0, r0, r1
1625 bx lr
1626 .align 2
1627 1:
1628 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1629 FUNC_END ctzsi2
1630 #else
1631 ARM_FUNC_START ctzsi2
1632 rsb r1, r0, #0
1633 and r0, r0, r1
1634 # if defined(HAVE_ARM_CLZ)
1635 clz r0, r0
1636 rsb r0, r0, #31
1637 RET
1638 # else
1639 mov r1, #28
1640 cmp r0, #0x10000
1641 do_it cs, t
1642 movcs r0, r0, lsr #16
1643 subcs r1, r1, #16
1644 cmp r0, #0x100
1645 do_it cs, t
1646 movcs r0, r0, lsr #8
1647 subcs r1, r1, #8
1648 cmp r0, #0x10
1649 do_it cs, t
1650 movcs r0, r0, lsr #4
1651 subcs r1, r1, #4
1652 adr r2, 1f
1653 ldrb r0, [r2, r0]
1654 sub r0, r0, r1
1655 RET
1656 .align 2
1657 1:
1658 .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1659 # endif /* !HAVE_ARM_CLZ */
1660 FUNC_END ctzsi2
1661 #endif
1662 #endif /* L_clzsi2 */
1663
1664 /* ------------------------------------------------------------------------ */
1665 /* These next two sections are here despite the fact that they contain Thumb
1666 assembler because their presence allows interworked code to be linked even
1667 when the GCC library is this one. */
1668
1669 /* Do not build the interworking functions when the target architecture does
1670 not support Thumb instructions. (This can be a multilib option). */
1671 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1672 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1673 || __ARM_ARCH__ >= 6
1674
1675 #if defined L_call_via_rX
1676
1677 /* These labels & instructions are used by the Arm/Thumb interworking code.
1678 The address of function to be called is loaded into a register and then
1679 one of these labels is called via a BL instruction. This puts the
1680 return address into the link register with the bottom bit set, and the
1681 code here switches to the correct mode before executing the function. */
1682
1683 .text
1684 .align 0
1685 .force_thumb
1686
1687 .macro call_via register
1688 THUMB_FUNC_START _call_via_\register
1689
1690 bx \register
1691 nop
1692
1693 SIZE (_call_via_\register)
1694 .endm
1695
1696 call_via r0
1697 call_via r1
1698 call_via r2
1699 call_via r3
1700 call_via r4
1701 call_via r5
1702 call_via r6
1703 call_via r7
1704 call_via r8
1705 call_via r9
1706 call_via sl
1707 call_via fp
1708 call_via ip
1709 call_via sp
1710 call_via lr
1711
1712 #endif /* L_call_via_rX */
1713
1714 /* Don't bother with the old interworking routines for Thumb-2. */
1715 /* ??? Maybe only omit these on "m" variants. */
1716 #if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
1717
1718 #if defined L_interwork_call_via_rX
1719
1720 /* These labels & instructions are used by the Arm/Thumb interworking code,
1721 when the target address is in an unknown instruction set. The address
1722 of function to be called is loaded into a register and then one of these
1723 labels is called via a BL instruction. This puts the return address
1724 into the link register with the bottom bit set, and the code here
1725 switches to the correct mode before executing the function. Unfortunately
1726 the target code cannot be relied upon to return via a BX instruction, so
1727 instead we have to store the resturn address on the stack and allow the
1728 called function to return here instead. Upon return we recover the real
1729 return address and use a BX to get back to Thumb mode.
1730
1731 There are three variations of this code. The first,
1732 _interwork_call_via_rN(), will push the return address onto the
1733 stack and pop it in _arm_return(). It should only be used if all
1734 arguments are passed in registers.
1735
1736 The second, _interwork_r7_call_via_rN(), instead stores the return
1737 address at [r7, #-4]. It is the caller's responsibility to ensure
1738 that this address is valid and contains no useful data.
1739
1740 The third, _interwork_r11_call_via_rN(), works in the same way but
1741 uses r11 instead of r7. It is useful if the caller does not really
1742 need a frame pointer. */
1743
1744 .text
1745 .align 0
1746
1747 .code 32
1748 .globl _arm_return
1749 LSYM(Lstart_arm_return):
1750 cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1751 cfi_push 0, 0xe, -0x8, 0x8
1752 nop @ This nop is for the benefit of debuggers, so that
1753 @ backtraces will use the correct unwind information.
1754 _arm_return:
1755 RETLDM unwind=LSYM(Lstart_arm_return)
1756 cfi_end LSYM(Lend_arm_return)
1757
1758 .globl _arm_return_r7
1759 _arm_return_r7:
1760 ldr lr, [r7, #-4]
1761 bx lr
1762
1763 .globl _arm_return_r11
1764 _arm_return_r11:
1765 ldr lr, [r11, #-4]
1766 bx lr
1767
1768 .macro interwork_with_frame frame, register, name, return
1769 .code 16
1770
1771 THUMB_FUNC_START \name
1772
1773 bx pc
1774 nop
1775
1776 .code 32
1777 tst \register, #1
1778 streq lr, [\frame, #-4]
1779 adreq lr, _arm_return_\frame
1780 bx \register
1781
1782 SIZE (\name)
1783 .endm
1784
1785 .macro interwork register
1786 .code 16
1787
1788 THUMB_FUNC_START _interwork_call_via_\register
1789
1790 bx pc
1791 nop
1792
1793 .code 32
1794 .globl LSYM(Lchange_\register)
1795 LSYM(Lchange_\register):
1796 tst \register, #1
1797 streq lr, [sp, #-8]!
1798 adreq lr, _arm_return
1799 bx \register
1800
1801 SIZE (_interwork_call_via_\register)
1802
1803 interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1804 interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1805 .endm
1806
1807 interwork r0
1808 interwork r1
1809 interwork r2
1810 interwork r3
1811 interwork r4
1812 interwork r5
1813 interwork r6
1814 interwork r7
1815 interwork r8
1816 interwork r9
1817 interwork sl
1818 interwork fp
1819 interwork ip
1820 interwork sp
1821
1822 /* The LR case has to be handled a little differently... */
1823 .code 16
1824
1825 THUMB_FUNC_START _interwork_call_via_lr
1826
1827 bx pc
1828 nop
1829
1830 .code 32
1831 .globl .Lchange_lr
1832 .Lchange_lr:
1833 tst lr, #1
1834 stmeqdb r13!, {lr, pc}
1835 mov ip, lr
1836 adreq lr, _arm_return
1837 bx ip
1838
1839 SIZE (_interwork_call_via_lr)
1840
1841 #endif /* L_interwork_call_via_rX */
1842 #endif /* !__thumb2__ */
1843
1844 /* Functions to support compact pic switch tables in thumb1 state.
1845 All these routines take an index into the table in r0. The
1846 table is at LR & ~1 (but this must be rounded up in the case
1847 of 32-bit entires). They are only permitted to clobber r12
1848 and r14 and r0 must be preserved on exit. */
1849 #ifdef L_thumb1_case_sqi
1850
1851 .text
1852 .align 0
1853 .force_thumb
1854 .syntax unified
1855 THUMB_FUNC_START __gnu_thumb1_case_sqi
1856 push {r1}
1857 mov r1, lr
1858 lsrs r1, r1, #1
1859 lsls r1, r1, #1
1860 ldrsb r1, [r1, r0]
1861 lsls r1, r1, #1
1862 add lr, lr, r1
1863 pop {r1}
1864 bx lr
1865 SIZE (__gnu_thumb1_case_sqi)
1866 #endif
1867
1868 #ifdef L_thumb1_case_uqi
1869
1870 .text
1871 .align 0
1872 .force_thumb
1873 .syntax unified
1874 THUMB_FUNC_START __gnu_thumb1_case_uqi
1875 push {r1}
1876 mov r1, lr
1877 lsrs r1, r1, #1
1878 lsls r1, r1, #1
1879 ldrb r1, [r1, r0]
1880 lsls r1, r1, #1
1881 add lr, lr, r1
1882 pop {r1}
1883 bx lr
1884 SIZE (__gnu_thumb1_case_uqi)
1885 #endif
1886
1887 #ifdef L_thumb1_case_shi
1888
1889 .text
1890 .align 0
1891 .force_thumb
1892 .syntax unified
1893 THUMB_FUNC_START __gnu_thumb1_case_shi
1894 push {r0, r1}
1895 mov r1, lr
1896 lsrs r1, r1, #1
1897 lsls r0, r0, #1
1898 lsls r1, r1, #1
1899 ldrsh r1, [r1, r0]
1900 lsls r1, r1, #1
1901 add lr, lr, r1
1902 pop {r0, r1}
1903 bx lr
1904 SIZE (__gnu_thumb1_case_shi)
1905 #endif
1906
1907 #ifdef L_thumb1_case_uhi
1908
1909 .text
1910 .align 0
1911 .force_thumb
1912 .syntax unified
1913 THUMB_FUNC_START __gnu_thumb1_case_uhi
1914 push {r0, r1}
1915 mov r1, lr
1916 lsrs r1, r1, #1
1917 lsls r0, r0, #1
1918 lsls r1, r1, #1
1919 ldrh r1, [r1, r0]
1920 lsls r1, r1, #1
1921 add lr, lr, r1
1922 pop {r0, r1}
1923 bx lr
1924 SIZE (__gnu_thumb1_case_uhi)
1925 #endif
1926
1927 #ifdef L_thumb1_case_si
1928
1929 .text
1930 .align 0
1931 .force_thumb
1932 .syntax unified
1933 THUMB_FUNC_START __gnu_thumb1_case_si
1934 push {r0, r1}
1935 mov r1, lr
1936 adds.n r1, r1, #2 /* Align to word. */
1937 lsrs r1, r1, #2
1938 lsls r0, r0, #2
1939 lsls r1, r1, #2
1940 ldr r0, [r1, r0]
1941 adds r0, r0, r1
1942 mov lr, r0
1943 pop {r0, r1}
1944 mov pc, lr /* We know we were called from thumb code. */
1945 SIZE (__gnu_thumb1_case_si)
1946 #endif
1947
1948 #endif /* Arch supports thumb. */
1949
1950 #ifndef __symbian__
1951 #ifndef __ARM_ARCH_6M__
1952 #include "ieee754-df.S"
1953 #include "ieee754-sf.S"
1954 #include "bpabi.S"
1955 #else /* __ARM_ARCH_6M__ */
1956 #include "bpabi-v6m.S"
1957 #endif /* __ARM_ARCH_6M__ */
1958 #endif /* !__symbian__ */
1959