lib1funcs.S revision 1.1.1.2 1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2015 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov (at) gmail.com>
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #if defined (__AVR_TINY__)
25 #define __zero_reg__ r17
26 #define __tmp_reg__ r16
27 #else
28 #define __zero_reg__ r1
29 #define __tmp_reg__ r0
30 #endif
31 #define __SREG__ 0x3f
32 #if defined (__AVR_HAVE_SPH__)
33 #define __SP_H__ 0x3e
34 #endif
35 #define __SP_L__ 0x3d
36 #define __RAMPZ__ 0x3B
37 #define __EIND__ 0x3C
38
39 /* Most of the functions here are called directly from avr.md
40 patterns, instead of using the standard libcall mechanisms.
41 This can make better code because GCC knows exactly which
42 of the call-used registers (not all of them) are clobbered. */
43
44 /* FIXME: At present, there is no SORT directive in the linker
45 script so that we must not assume that different modules
46 in the same input section like .libgcc.text.mul will be
47 located close together. Therefore, we cannot use
48 RCALL/RJMP to call a function like __udivmodhi4 from
49 __divmodhi4 and have to use lengthy XCALL/XJMP even
50 though they are in the same input section and all same
51 input sections together are small enough to reach every
52 location with a RCALL/RJMP instruction. */
53
54 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 #error device not supported
56 #endif
57
58 .macro mov_l r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
60 movw \r_dest, \r_src
61 #else
62 mov \r_dest, \r_src
63 #endif
64 .endm
65
66 .macro mov_h r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
68 ; empty
69 #else
70 mov \r_dest, \r_src
71 #endif
72 .endm
73
74 .macro wmov r_dest, r_src
75 #if defined (__AVR_HAVE_MOVW__)
76 movw \r_dest, \r_src
77 #else
78 mov \r_dest, \r_src
79 mov \r_dest+1, \r_src+1
80 #endif
81 .endm
82
83 #if defined (__AVR_HAVE_JMP_CALL__)
84 #define XCALL call
85 #define XJMP jmp
86 #else
87 #define XCALL rcall
88 #define XJMP rjmp
89 #endif
90
91 #if defined (__AVR_HAVE_EIJMP_EICALL__)
92 #define XICALL eicall
93 #define XIJMP eijmp
94 #else
95 #define XICALL icall
96 #define XIJMP ijmp
97 #endif
98
99 ;; Prologue stuff
100
101 .macro do_prologue_saves n_pushed n_frame=0
102 ldi r26, lo8(\n_frame)
103 ldi r27, hi8(\n_frame)
104 ldi r30, lo8(gs(.L_prologue_saves.\@))
105 ldi r31, hi8(gs(.L_prologue_saves.\@))
106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 .L_prologue_saves.\@:
108 .endm
109
110 ;; Epilogue stuff
111
112 .macro do_epilogue_restores n_pushed n_frame=0
113 in r28, __SP_L__
114 #ifdef __AVR_HAVE_SPH__
115 in r29, __SP_H__
116 .if \n_frame > 63
117 subi r28, lo8(-\n_frame)
118 sbci r29, hi8(-\n_frame)
119 .elseif \n_frame > 0
120 adiw r28, \n_frame
121 .endif
122 #else
123 clr r29
124 .if \n_frame > 0
125 subi r28, lo8(-\n_frame)
126 .endif
127 #endif /* HAVE SPH */
128 ldi r30, \n_pushed
129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130 .endm
131
132 ;; Support function entry and exit for convenience
133
134 .macro wsubi r_arg1, i_arg2
135 #if defined (__AVR_TINY__)
136 subi \r_arg1, lo8(\i_arg2)
137 sbci \r_arg1+1, hi8(\i_arg2)
138 #else
139 sbiw \r_arg1, \i_arg2
140 #endif
141 .endm
142
143 .macro waddi r_arg1, i_arg2
144 #if defined (__AVR_TINY__)
145 subi \r_arg1, lo8(-\i_arg2)
146 sbci \r_arg1+1, hi8(-\i_arg2)
147 #else
148 adiw \r_arg1, \i_arg2
149 #endif
150 .endm
151
152 .macro DEFUN name
153 .global \name
154 .func \name
155 \name:
156 .endm
157
158 .macro ENDF name
159 .size \name, .-\name
160 .endfunc
161 .endm
162
163 .macro FALIAS name
164 .global \name
165 .func \name
166 \name:
167 .size \name, .-\name
168 .endfunc
169 .endm
170
171 ;; Skip next instruction, typically a jump target
172 #if defined(__AVR_TINY__)
173 #define skip cpse 0,0
174 #else
175 #define skip cpse 16,16
176 #endif
177
178 ;; Negate a 2-byte value held in consecutive registers
179 .macro NEG2 reg
180 com \reg+1
181 neg \reg
182 sbci \reg+1, -1
183 .endm
184
185 ;; Negate a 4-byte value held in consecutive registers
186 ;; Sets the V flag for signed overflow tests if REG >= 16
187 .macro NEG4 reg
188 com \reg+3
189 com \reg+2
190 com \reg+1
191 .if \reg >= 16
192 neg \reg
193 sbci \reg+1, -1
194 sbci \reg+2, -1
195 sbci \reg+3, -1
196 .else
197 com \reg
198 adc \reg, __zero_reg__
199 adc \reg+1, __zero_reg__
200 adc \reg+2, __zero_reg__
201 adc \reg+3, __zero_reg__
202 .endif
203 .endm
204
205 #define exp_lo(N) hlo8 ((N) << 23)
206 #define exp_hi(N) hhi8 ((N) << 23)
207
208
209 .section .text.libgcc.mul, "ax", @progbits
211
212 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
213 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
214 #if !defined (__AVR_HAVE_MUL__)
215 /*******************************************************
216 Multiplication 8 x 8 without MUL
217 *******************************************************/
218 #if defined (L_mulqi3)
219
220 #define r_arg2 r22 /* multiplicand */
221 #define r_arg1 r24 /* multiplier */
222 #define r_res __tmp_reg__ /* result */
223
224 DEFUN __mulqi3
225 clr r_res ; clear result
226 __mulqi3_loop:
227 sbrc r_arg1,0
228 add r_res,r_arg2
229 add r_arg2,r_arg2 ; shift multiplicand
230 breq __mulqi3_exit ; while multiplicand != 0
231 lsr r_arg1 ;
232 brne __mulqi3_loop ; exit if multiplier = 0
233 __mulqi3_exit:
234 mov r_arg1,r_res ; result to return register
235 ret
236 ENDF __mulqi3
237
238 #undef r_arg2
239 #undef r_arg1
240 #undef r_res
241
242 #endif /* defined (L_mulqi3) */
243
244
245 /*******************************************************
246 Widening Multiplication 16 = 8 x 8 without MUL
247 Multiplication 16 x 16 without MUL
248 *******************************************************/
249
250 #define A0 22
251 #define A1 23
252 #define B0 24
253 #define BB0 20
254 #define B1 25
255 ;; Output overlaps input, thus expand result in CC0/1
256 #define C0 24
257 #define C1 25
258 #define CC0 __tmp_reg__
259 #define CC1 21
260
261 #if defined (L_umulqihi3)
262 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
263 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
264 ;;; Clobbers: __tmp_reg__, R21..R23
265 DEFUN __umulqihi3
266 clr A1
267 clr B1
268 XJMP __mulhi3
269 ENDF __umulqihi3
270 #endif /* L_umulqihi3 */
271
272 #if defined (L_mulqihi3)
273 ;;; R25:R24 = (signed int) R22 * (signed int) R24
274 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
275 ;;; Clobbers: __tmp_reg__, R20..R23
276 DEFUN __mulqihi3
277 ;; Sign-extend B0
278 clr B1
279 sbrc B0, 7
280 com B1
281 ;; The multiplication runs twice as fast if A1 is zero, thus:
282 ;; Zero-extend A0
283 clr A1
284 #ifdef __AVR_HAVE_JMP_CALL__
285 ;; Store B0 * sign of A
286 clr BB0
287 sbrc A0, 7
288 mov BB0, B0
289 call __mulhi3
290 #else /* have no CALL */
291 ;; Skip sign-extension of A if A >= 0
292 ;; Same size as with the first alternative but avoids errata skip
293 ;; and is faster if A >= 0
294 sbrs A0, 7
295 rjmp __mulhi3
296 ;; If A < 0 store B
297 mov BB0, B0
298 rcall __mulhi3
299 #endif /* HAVE_JMP_CALL */
300 ;; 1-extend A after the multiplication
301 sub C1, BB0
302 ret
303 ENDF __mulqihi3
304 #endif /* L_mulqihi3 */
305
306 #if defined (L_mulhi3)
307 ;;; R25:R24 = R23:R22 * R25:R24
308 ;;; (C1:C0) = (A1:A0) * (B1:B0)
309 ;;; Clobbers: __tmp_reg__, R21..R23
310 DEFUN __mulhi3
311
312 ;; Clear result
313 clr CC0
314 clr CC1
315 rjmp 3f
316 1:
317 ;; Bit n of A is 1 --> C += B << n
318 add CC0, B0
319 adc CC1, B1
320 2:
321 lsl B0
322 rol B1
323 3:
324 ;; If B == 0 we are ready
325 wsubi B0, 0
326 breq 9f
327
328 ;; Carry = n-th bit of A
329 lsr A1
330 ror A0
331 ;; If bit n of A is set, then go add B * 2^n to C
332 brcs 1b
333
334 ;; Carry = 0 --> The ROR above acts like CP A0, 0
335 ;; Thus, it is sufficient to CPC the high part to test A against 0
336 cpc A1, __zero_reg__
337 ;; Only proceed if A != 0
338 brne 2b
339 9:
340 ;; Move Result into place
341 mov C0, CC0
342 mov C1, CC1
343 ret
344 ENDF __mulhi3
345 #endif /* L_mulhi3 */
346
347 #undef A0
348 #undef A1
349 #undef B0
350 #undef BB0
351 #undef B1
352 #undef C0
353 #undef C1
354 #undef CC0
355 #undef CC1
356
357
358 #define A0 22
360 #define A1 A0+1
361 #define A2 A0+2
362 #define A3 A0+3
363
364 #define B0 18
365 #define B1 B0+1
366 #define B2 B0+2
367 #define B3 B0+3
368
369 #define CC0 26
370 #define CC1 CC0+1
371 #define CC2 30
372 #define CC3 CC2+1
373
374 #define C0 22
375 #define C1 C0+1
376 #define C2 C0+2
377 #define C3 C0+3
378
379 /*******************************************************
380 Widening Multiplication 32 = 16 x 16 without MUL
381 *******************************************************/
382
383 #if defined (L_umulhisi3)
384 DEFUN __umulhisi3
385 wmov B0, 24
386 ;; Zero-extend B
387 clr B2
388 clr B3
389 ;; Zero-extend A
390 wmov A2, B2
391 XJMP __mulsi3
392 ENDF __umulhisi3
393 #endif /* L_umulhisi3 */
394
395 #if defined (L_mulhisi3)
396 DEFUN __mulhisi3
397 wmov B0, 24
398 ;; Sign-extend B
399 lsl r25
400 sbc B2, B2
401 mov B3, B2
402 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
403 ;; Sign-extend A
404 clr A2
405 sbrc A1, 7
406 com A2
407 mov A3, A2
408 XJMP __mulsi3
409 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
410 ;; Zero-extend A and __mulsi3 will run at least twice as fast
411 ;; compared to a sign-extended A.
412 clr A2
413 clr A3
414 sbrs A1, 7
415 XJMP __mulsi3
416 ;; If A < 0 then perform the B * 0xffff.... before the
417 ;; very multiplication by initializing the high part of the
418 ;; result CC with -B.
419 wmov CC2, A2
420 sub CC2, B0
421 sbc CC3, B1
422 XJMP __mulsi3_helper
423 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
424 ENDF __mulhisi3
425 #endif /* L_mulhisi3 */
426
427
428 /*******************************************************
429 Multiplication 32 x 32 without MUL
430 *******************************************************/
431
432 #if defined (L_mulsi3)
433 DEFUN __mulsi3
434 #if defined (__AVR_TINY__)
435 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
436 in r27, __SP_H__
437 subi r26, lo8(-3) ; Add 3 to point past return address
438 sbci r27, hi8(-3)
439 push B0 ; save callee saved regs
440 push B1
441 ld B0, X+ ; load from caller stack
442 ld B1, X+
443 ld B2, X+
444 ld B3, X
445 #endif
446 ;; Clear result
447 clr CC2
448 clr CC3
449 ;; FALLTHRU
450 ENDF __mulsi3
451
452 DEFUN __mulsi3_helper
453 clr CC0
454 clr CC1
455 rjmp 3f
456
457 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
458 ;; CC += B
459 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
460
461 2: ;; B <<= 1
462 lsl B0 $ rol B1 $ rol B2 $ rol B3
463
464 3: ;; A >>= 1: Carry = n-th bit of A
465 lsr A3 $ ror A2 $ ror A1 $ ror A0
466
467 brcs 1b
468 ;; Only continue if A != 0
469 sbci A1, 0
470 brne 2b
471 wsubi A2, 0
472 brne 2b
473
474 ;; All bits of A are consumed: Copy result to return register C
475 wmov C0, CC0
476 wmov C2, CC2
477 #if defined (__AVR_TINY__)
478 pop B1 ; restore callee saved regs
479 pop B0
480 #endif /* defined (__AVR_TINY__) */
481
482 ret
483 ENDF __mulsi3_helper
484 #endif /* L_mulsi3 */
485
486 #undef A0
487 #undef A1
488 #undef A2
489 #undef A3
490 #undef B0
491 #undef B1
492 #undef B2
493 #undef B3
494 #undef C0
495 #undef C1
496 #undef C2
497 #undef C3
498 #undef CC0
499 #undef CC1
500 #undef CC2
501 #undef CC3
502
503 #endif /* !defined (__AVR_HAVE_MUL__) */
504 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505
506 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
508 #if defined (__AVR_HAVE_MUL__)
509 #define A0 26
510 #define B0 18
511 #define C0 22
512
513 #define A1 A0+1
514
515 #define B1 B0+1
516 #define B2 B0+2
517 #define B3 B0+3
518
519 #define C1 C0+1
520 #define C2 C0+2
521 #define C3 C0+3
522
523 /*******************************************************
524 Widening Multiplication 32 = 16 x 16 with MUL
525 *******************************************************/
526
527 #if defined (L_mulhisi3)
528 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
529 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
530 ;;; Clobbers: __tmp_reg__
531 DEFUN __mulhisi3
532 XCALL __umulhisi3
533 ;; Sign-extend B
534 tst B1
535 brpl 1f
536 sub C2, A0
537 sbc C3, A1
538 1: ;; Sign-extend A
539 XJMP __usmulhisi3_tail
540 ENDF __mulhisi3
541 #endif /* L_mulhisi3 */
542
543 #if defined (L_usmulhisi3)
544 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
545 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
546 ;;; Clobbers: __tmp_reg__
547 DEFUN __usmulhisi3
548 XCALL __umulhisi3
549 ;; FALLTHRU
550 ENDF __usmulhisi3
551
552 DEFUN __usmulhisi3_tail
553 ;; Sign-extend A
554 sbrs A1, 7
555 ret
556 sub C2, B0
557 sbc C3, B1
558 ret
559 ENDF __usmulhisi3_tail
560 #endif /* L_usmulhisi3 */
561
562 #if defined (L_umulhisi3)
563 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
564 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
565 ;;; Clobbers: __tmp_reg__
566 DEFUN __umulhisi3
567 mul A0, B0
568 movw C0, r0
569 mul A1, B1
570 movw C2, r0
571 mul A0, B1
572 #ifdef __AVR_HAVE_JMP_CALL__
573 ;; This function is used by many other routines, often multiple times.
574 ;; Therefore, if the flash size is not too limited, avoid the RCALL
575 ;; and inverst 6 Bytes to speed things up.
576 add C1, r0
577 adc C2, r1
578 clr __zero_reg__
579 adc C3, __zero_reg__
580 #else
581 rcall 1f
582 #endif
583 mul A1, B0
584 1: add C1, r0
585 adc C2, r1
586 clr __zero_reg__
587 adc C3, __zero_reg__
588 ret
589 ENDF __umulhisi3
590 #endif /* L_umulhisi3 */
591
592 /*******************************************************
593 Widening Multiplication 32 = 16 x 32 with MUL
594 *******************************************************/
595
596 #if defined (L_mulshisi3)
597 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
598 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
599 ;;; Clobbers: __tmp_reg__
600 DEFUN __mulshisi3
601 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
602 ;; Some cores have problem skipping 2-word instruction
603 tst A1
604 brmi __mulohisi3
605 #else
606 sbrs A1, 7
607 #endif /* __AVR_HAVE_JMP_CALL__ */
608 XJMP __muluhisi3
609 ;; FALLTHRU
610 ENDF __mulshisi3
611
612 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
613 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
614 ;;; Clobbers: __tmp_reg__
615 DEFUN __mulohisi3
616 XCALL __muluhisi3
617 ;; One-extend R27:R26 (A1:A0)
618 sub C2, B0
619 sbc C3, B1
620 ret
621 ENDF __mulohisi3
622 #endif /* L_mulshisi3 */
623
624 #if defined (L_muluhisi3)
625 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
626 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
627 ;;; Clobbers: __tmp_reg__
628 DEFUN __muluhisi3
629 XCALL __umulhisi3
630 mul A0, B3
631 add C3, r0
632 mul A1, B2
633 add C3, r0
634 mul A0, B2
635 add C2, r0
636 adc C3, r1
637 clr __zero_reg__
638 ret
639 ENDF __muluhisi3
640 #endif /* L_muluhisi3 */
641
642 /*******************************************************
643 Multiplication 32 x 32 with MUL
644 *******************************************************/
645
646 #if defined (L_mulsi3)
647 ;;; R25:R22 = R25:R22 * R21:R18
648 ;;; (C3:C0) = C3:C0 * B3:B0
649 ;;; Clobbers: R26, R27, __tmp_reg__
650 DEFUN __mulsi3
651 movw A0, C0
652 push C2
653 push C3
654 XCALL __muluhisi3
655 pop A1
656 pop A0
657 ;; A1:A0 now contains the high word of A
658 mul A0, B0
659 add C2, r0
660 adc C3, r1
661 mul A0, B1
662 add C3, r0
663 mul A1, B0
664 add C3, r0
665 clr __zero_reg__
666 ret
667 ENDF __mulsi3
668 #endif /* L_mulsi3 */
669
670 #undef A0
671 #undef A1
672
673 #undef B0
674 #undef B1
675 #undef B2
676 #undef B3
677
678 #undef C0
679 #undef C1
680 #undef C2
681 #undef C3
682
683 #endif /* __AVR_HAVE_MUL__ */
684
685 /*******************************************************
686 Multiplication 24 x 24 with MUL
687 *******************************************************/
688
689 #if defined (L_mulpsi3)
690
691 ;; A[0..2]: In: Multiplicand; Out: Product
692 #define A0 22
693 #define A1 A0+1
694 #define A2 A0+2
695
696 ;; B[0..2]: In: Multiplier
697 #define B0 18
698 #define B1 B0+1
699 #define B2 B0+2
700
701 #if defined (__AVR_HAVE_MUL__)
702
703 ;; C[0..2]: Expand Result
704 #define C0 22
705 #define C1 C0+1
706 #define C2 C0+2
707
708 ;; R24:R22 *= R20:R18
709 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
710
711 #define AA0 26
712 #define AA2 21
713
714 DEFUN __mulpsi3
715 wmov AA0, A0
716 mov AA2, A2
717 XCALL __umulhisi3
718 mul AA2, B0 $ add C2, r0
719 mul AA0, B2 $ add C2, r0
720 clr __zero_reg__
721 ret
722 ENDF __mulpsi3
723
724 #undef AA2
725 #undef AA0
726
727 #undef C2
728 #undef C1
729 #undef C0
730
731 #else /* !HAVE_MUL */
732 ;; C[0..2]: Expand Result
733 #if defined (__AVR_TINY__)
734 #define C0 16
735 #else
736 #define C0 0
737 #endif /* defined (__AVR_TINY__) */
738 #define C1 C0+1
739 #define C2 21
740
741 ;; R24:R22 *= R20:R18
742 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
743
744 DEFUN __mulpsi3
745 #if defined (__AVR_TINY__)
746 in r26,__SP_L__
747 in r27,__SP_H__
748 subi r26, lo8(-3) ; Add 3 to point past return address
749 sbci r27, hi8(-3)
750 push B0 ; save callee saved regs
751 push B1
752 ld B0,X+ ; load from caller stack
753 ld B1,X+
754 ld B2,X+
755 #endif /* defined (__AVR_TINY__) */
756
757 ;; C[] = 0
758 clr __tmp_reg__
759 clr C2
760
761 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
762 LSR B2 $ ror B1 $ ror B0
763
764 ;; If the N-th Bit of B[] was set...
765 brcc 1f
766
767 ;; ...then add A[] * 2^N to the Result C[]
768 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
769
770 1: ;; Multiply A[] by 2
771 LSL A0 $ rol A1 $ rol A2
772
773 ;; Loop until B[] is 0
774 subi B0,0 $ sbci B1,0 $ sbci B2,0
775 brne 0b
776
777 ;; Copy C[] to the return Register A[]
778 wmov A0, C0
779 mov A2, C2
780
781 clr __zero_reg__
782 #if defined (__AVR_TINY__)
783 pop B1
784 pop B0
785 #endif /* (__AVR_TINY__) */
786 ret
787 ENDF __mulpsi3
788
789 #undef C2
790 #undef C1
791 #undef C0
792
793 #endif /* HAVE_MUL */
794
795 #undef B2
796 #undef B1
797 #undef B0
798
799 #undef A2
800 #undef A1
801 #undef A0
802
803 #endif /* L_mulpsi3 */
804
805 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
806
807 ;; A[0..2]: In: Multiplicand
808 #define A0 22
809 #define A1 A0+1
810 #define A2 A0+2
811
812 ;; BB: In: Multiplier
813 #define BB 25
814
815 ;; C[0..2]: Result
816 #define C0 18
817 #define C1 C0+1
818 #define C2 C0+2
819
820 ;; C[] = A[] * sign_extend (BB)
821 DEFUN __mulsqipsi3
822 mul A0, BB
823 movw C0, r0
824 mul A2, BB
825 mov C2, r0
826 mul A1, BB
827 add C1, r0
828 adc C2, r1
829 clr __zero_reg__
830 sbrs BB, 7
831 ret
832 ;; One-extend BB
833 sub C1, A0
834 sbc C2, A1
835 ret
836 ENDF __mulsqipsi3
837
838 #undef C2
839 #undef C1
840 #undef C0
841
842 #undef BB
843
844 #undef A2
845 #undef A1
846 #undef A0
847
848 #endif /* L_mulsqipsi3 && HAVE_MUL */
849
850 /*******************************************************
851 Multiplication 64 x 64
852 *******************************************************/
853
854 ;; A[] = A[] * B[]
855
856 ;; A[0..7]: In: Multiplicand
857 ;; Out: Product
858 #define A0 18
859 #define A1 A0+1
860 #define A2 A0+2
861 #define A3 A0+3
862 #define A4 A0+4
863 #define A5 A0+5
864 #define A6 A0+6
865 #define A7 A0+7
866
867 ;; B[0..7]: In: Multiplier
868 #define B0 10
869 #define B1 B0+1
870 #define B2 B0+2
871 #define B3 B0+3
872 #define B4 B0+4
873 #define B5 B0+5
874 #define B6 B0+6
875 #define B7 B0+7
876
877 #ifndef __AVR_TINY__
878 #if defined (__AVR_HAVE_MUL__)
879 ;; Define C[] for convenience
880 ;; Notice that parts of C[] overlap A[] respective B[]
881 #define C0 16
882 #define C1 C0+1
883 #define C2 20
884 #define C3 C2+1
885 #define C4 28
886 #define C5 C4+1
887 #define C6 C4+2
888 #define C7 C4+3
889
890 #if defined (L_muldi3)
891
892 ;; A[] *= B[]
893 ;; R25:R18 *= R17:R10
894 ;; Ordinary ABI-Function
895
896 DEFUN __muldi3
897 push r29
898 push r28
899 push r17
900 push r16
901
902 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
903
904 ;; 3 * 0 + 0 * 3
905 mul A7,B0 $ $ mov C7,r0
906 mul A0,B7 $ $ add C7,r0
907 mul A6,B1 $ $ add C7,r0
908 mul A6,B0 $ mov C6,r0 $ add C7,r1
909 mul B6,A1 $ $ add C7,r0
910 mul B6,A0 $ add C6,r0 $ adc C7,r1
911
912 ;; 1 * 2
913 mul A2,B4 $ add C6,r0 $ adc C7,r1
914 mul A3,B4 $ $ add C7,r0
915 mul A2,B5 $ $ add C7,r0
916
917 push A5
918 push A4
919 push B1
920 push B0
921 push A3
922 push A2
923
924 ;; 0 * 0
925 wmov 26, B0
926 XCALL __umulhisi3
927 wmov C0, 22
928 wmov C2, 24
929
930 ;; 0 * 2
931 wmov 26, B4
932 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
933
934 wmov 26, B2
935 ;; 0 * 1
936 XCALL __muldi3_6
937
938 pop A0
939 pop A1
940 ;; 1 * 1
941 wmov 26, B2
942 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
943
944 pop r26
945 pop r27
946 ;; 1 * 0
947 XCALL __muldi3_6
948
949 pop A0
950 pop A1
951 ;; 2 * 0
952 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
953
954 ;; 2 * 1
955 wmov 26, B2
956 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
957
958 ;; A[] = C[]
959 wmov A0, C0
960 ;; A2 = C2 already
961 wmov A4, C4
962 wmov A6, C6
963
964 clr __zero_reg__
965 pop r16
966 pop r17
967 pop r28
968 pop r29
969 ret
970 ENDF __muldi3
971 #endif /* L_muldi3 */
972
973 #if defined (L_muldi3_6)
974 ;; A helper for some 64-bit multiplications with MUL available
975 DEFUN __muldi3_6
976 __muldi3_6:
977 XCALL __umulhisi3
978 add C2, 22
979 adc C3, 23
980 adc C4, 24
981 adc C5, 25
982 brcc 0f
983 adiw C6, 1
984 0: ret
985 ENDF __muldi3_6
986 #endif /* L_muldi3_6 */
987
988 #undef C7
989 #undef C6
990 #undef C5
991 #undef C4
992 #undef C3
993 #undef C2
994 #undef C1
995 #undef C0
996
997 #else /* !HAVE_MUL */
998
999 #if defined (L_muldi3)
1000
1001 #define C0 26
1002 #define C1 C0+1
1003 #define C2 C0+2
1004 #define C3 C0+3
1005 #define C4 C0+4
1006 #define C5 C0+5
1007 #define C6 0
1008 #define C7 C6+1
1009
1010 #define Loop 9
1011
1012 ;; A[] *= B[]
1013 ;; R25:R18 *= R17:R10
1014 ;; Ordinary ABI-Function
1015
1016 DEFUN __muldi3
1017 push r29
1018 push r28
1019 push Loop
1020
1021 ldi C0, 64
1022 mov Loop, C0
1023
1024 ;; C[] = 0
1025 clr __tmp_reg__
1026 wmov C0, 0
1027 wmov C2, 0
1028 wmov C4, 0
1029
1030 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1031 ;; where N = 64 - Loop.
1032 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1033 ;; B[] will have its initial Value again.
1034 LSR B7 $ ror B6 $ ror B5 $ ror B4
1035 ror B3 $ ror B2 $ ror B1 $ ror B0
1036
1037 ;; If the N-th Bit of B[] was set then...
1038 brcc 1f
1039 ;; ...finish Rotation...
1040 ori B7, 1 << 7
1041
1042 ;; ...and add A[] * 2^N to the Result C[]
1043 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1044 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1045
1046 1: ;; Multiply A[] by 2
1047 LSL A0 $ rol A1 $ rol A2 $ rol A3
1048 rol A4 $ rol A5 $ rol A6 $ rol A7
1049
1050 dec Loop
1051 brne 0b
1052
1053 ;; We expanded the Result in C[]
1054 ;; Copy Result to the Return Register A[]
1055 wmov A0, C0
1056 wmov A2, C2
1057 wmov A4, C4
1058 wmov A6, C6
1059
1060 clr __zero_reg__
1061 pop Loop
1062 pop r28
1063 pop r29
1064 ret
1065 ENDF __muldi3
1066
1067 #undef Loop
1068
1069 #undef C7
1070 #undef C6
1071 #undef C5
1072 #undef C4
1073 #undef C3
1074 #undef C2
1075 #undef C1
1076 #undef C0
1077
1078 #endif /* L_muldi3 */
1079 #endif /* HAVE_MUL */
1080 #endif /* if not __AVR_TINY__ */
1081
1082 #undef B7
1083 #undef B6
1084 #undef B5
1085 #undef B4
1086 #undef B3
1087 #undef B2
1088 #undef B1
1089 #undef B0
1090
1091 #undef A7
1092 #undef A6
1093 #undef A5
1094 #undef A4
1095 #undef A3
1096 #undef A2
1097 #undef A1
1098 #undef A0
1099
1100 /*******************************************************
1101 Widening Multiplication 64 = 32 x 32 with MUL
1102 *******************************************************/
1103
1104 #if defined (__AVR_HAVE_MUL__)
1105 #define A0 r22
1106 #define A1 r23
1107 #define A2 r24
1108 #define A3 r25
1109
1110 #define B0 r18
1111 #define B1 r19
1112 #define B2 r20
1113 #define B3 r21
1114
1115 #define C0 18
1116 #define C1 C0+1
1117 #define C2 20
1118 #define C3 C2+1
1119 #define C4 28
1120 #define C5 C4+1
1121 #define C6 C4+2
1122 #define C7 C4+3
1123
1124 #if defined (L_umulsidi3)
1125
1126 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1127
1128 ;; R18[8] = R22[4] * R18[4]
1129 ;;
1130 ;; Ordinary ABI Function, but additionally sets
1131 ;; X = R20[2] = B2[2]
1132 ;; Z = R22[2] = A0[2]
1133 DEFUN __umulsidi3
1134 clt
1135 ;; FALLTHRU
1136 ENDF __umulsidi3
1137 ;; T = sign (A)
1138 DEFUN __umulsidi3_helper
1139 push 29 $ push 28 ; Y
1140 wmov 30, A2
1141 ;; Counting in Words, we have to perform 4 Multiplications
1142 ;; 0 * 0
1143 wmov 26, A0
1144 XCALL __umulhisi3
1145 push 23 $ push 22 ; C0
1146 wmov 28, B0
1147 wmov 18, B2
1148 wmov C2, 24
1149 push 27 $ push 26 ; A0
1150 push 19 $ push 18 ; B2
1151 ;;
1152 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1153 ;; B2 C2 -- -- -- B0 A2
1154 ;; 1 * 1
1155 wmov 26, 30 ; A2
1156 XCALL __umulhisi3
1157 ;; Sign-extend A. T holds the sign of A
1158 brtc 0f
1159 ;; Subtract B from the high part of the result
1160 sub 22, 28
1161 sbc 23, 29
1162 sbc 24, 18
1163 sbc 25, 19
1164 0: wmov 18, 28 ;; B0
1165 wmov C4, 22
1166 wmov C6, 24
1167 ;;
1168 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1169 ;; B0 C2 -- -- A2 C4 C6
1170 ;;
1171 ;; 1 * 0
1172 XCALL __muldi3_6
1173 ;; 0 * 1
1174 pop 26 $ pop 27 ;; B2
1175 pop 18 $ pop 19 ;; A0
1176 XCALL __muldi3_6
1177
1178 ;; Move result C into place and save A0 in Z
1179 wmov 22, C4
1180 wmov 24, C6
1181 wmov 30, 18 ; A0
1182 pop C0 $ pop C1
1183
1184 ;; Epilogue
1185 pop 28 $ pop 29 ;; Y
1186 ret
1187 ENDF __umulsidi3_helper
1188 #endif /* L_umulsidi3 */
1189
1190
1191 #if defined (L_mulsidi3)
1192
1193 ;; Signed widening 64 = 32 * 32 Multiplication
1194 ;;
1195 ;; R18[8] = R22[4] * R18[4]
1196 ;; Ordinary ABI Function
1197 DEFUN __mulsidi3
1198 bst A3, 7
1199 sbrs B3, 7 ; Enhanced core has no skip bug
1200 XJMP __umulsidi3_helper
1201
1202 ;; B needs sign-extension
1203 push A3
1204 push A2
1205 XCALL __umulsidi3_helper
1206 ;; A0 survived in Z
1207 sub r22, r30
1208 sbc r23, r31
1209 pop r26
1210 pop r27
1211 sbc r24, r26
1212 sbc r25, r27
1213 ret
1214 ENDF __mulsidi3
1215 #endif /* L_mulsidi3 */
1216
1217 #undef A0
1218 #undef A1
1219 #undef A2
1220 #undef A3
1221 #undef B0
1222 #undef B1
1223 #undef B2
1224 #undef B3
1225 #undef C0
1226 #undef C1
1227 #undef C2
1228 #undef C3
1229 #undef C4
1230 #undef C5
1231 #undef C6
1232 #undef C7
1233 #endif /* HAVE_MUL */
1234
1235 /**********************************************************
1236 Widening Multiplication 64 = 32 x 32 without MUL
1237 **********************************************************/
1238 #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1239 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1240 #define A0 18
1241 #define A1 A0+1
1242 #define A2 A0+2
1243 #define A3 A0+3
1244 #define A4 A0+4
1245 #define A5 A0+5
1246 #define A6 A0+6
1247 #define A7 A0+7
1248
1249 #define B0 10
1250 #define B1 B0+1
1251 #define B2 B0+2
1252 #define B3 B0+3
1253 #define B4 B0+4
1254 #define B5 B0+5
1255 #define B6 B0+6
1256 #define B7 B0+7
1257
1258 #define AA0 22
1259 #define AA1 AA0+1
1260 #define AA2 AA0+2
1261 #define AA3 AA0+3
1262
1263 #define BB0 18
1264 #define BB1 BB0+1
1265 #define BB2 BB0+2
1266 #define BB3 BB0+3
1267
1268 #define Mask r30
1269
1270 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1271 ;;
1272 ;; R18[8] = R22[4] * R18[4]
1273 ;; Ordinary ABI Function
1274 DEFUN __mulsidi3
1275 set
1276 skip
1277 ;; FALLTHRU
1278 ENDF __mulsidi3
1279
1280 DEFUN __umulsidi3
1281 clt ; skipped
1282 ;; Save 10 Registers: R10..R17, R28, R29
1283 do_prologue_saves 10
1284 ldi Mask, 0xff
1285 bld Mask, 7
1286 ;; Move B into place...
1287 wmov B0, BB0
1288 wmov B2, BB2
1289 ;; ...and extend it
1290 and BB3, Mask
1291 lsl BB3
1292 sbc B4, B4
1293 mov B5, B4
1294 wmov B6, B4
1295 ;; Move A into place...
1296 wmov A0, AA0
1297 wmov A2, AA2
1298 ;; ...and extend it
1299 and AA3, Mask
1300 lsl AA3
1301 sbc A4, A4
1302 mov A5, A4
1303 wmov A6, A4
1304 XCALL __muldi3
1305 do_epilogue_restores 10
1306 ENDF __umulsidi3
1307
1308 #undef A0
1309 #undef A1
1310 #undef A2
1311 #undef A3
1312 #undef A4
1313 #undef A5
1314 #undef A6
1315 #undef A7
1316 #undef B0
1317 #undef B1
1318 #undef B2
1319 #undef B3
1320 #undef B4
1321 #undef B5
1322 #undef B6
1323 #undef B7
1324 #undef AA0
1325 #undef AA1
1326 #undef AA2
1327 #undef AA3
1328 #undef BB0
1329 #undef BB1
1330 #undef BB2
1331 #undef BB3
1332 #undef Mask
1333 #endif /* L_mulsidi3 && !HAVE_MUL */
1334 #endif /* if not __AVR_TINY__ */
1335 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1336
1337
1338 .section .text.libgcc.div, "ax", @progbits
1340
1341 /*******************************************************
1342 Division 8 / 8 => (result + remainder)
1343 *******************************************************/
1344 #define r_rem r25 /* remainder */
1345 #define r_arg1 r24 /* dividend, quotient */
1346 #define r_arg2 r22 /* divisor */
1347 #define r_cnt r23 /* loop count */
1348
1349 #if defined (L_udivmodqi4)
1350 DEFUN __udivmodqi4
1351 sub r_rem,r_rem ; clear remainder and carry
1352 ldi r_cnt,9 ; init loop counter
1353 rjmp __udivmodqi4_ep ; jump to entry point
1354 __udivmodqi4_loop:
1355 rol r_rem ; shift dividend into remainder
1356 cp r_rem,r_arg2 ; compare remainder & divisor
1357 brcs __udivmodqi4_ep ; remainder <= divisor
1358 sub r_rem,r_arg2 ; restore remainder
1359 __udivmodqi4_ep:
1360 rol r_arg1 ; shift dividend (with CARRY)
1361 dec r_cnt ; decrement loop counter
1362 brne __udivmodqi4_loop
1363 com r_arg1 ; complement result
1364 ; because C flag was complemented in loop
1365 ret
1366 ENDF __udivmodqi4
1367 #endif /* defined (L_udivmodqi4) */
1368
1369 #if defined (L_divmodqi4)
1370 DEFUN __divmodqi4
1371 bst r_arg1,7 ; store sign of dividend
1372 mov __tmp_reg__,r_arg1
1373 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1374 sbrc r_arg1,7
1375 neg r_arg1 ; dividend negative : negate
1376 sbrc r_arg2,7
1377 neg r_arg2 ; divisor negative : negate
1378 XCALL __udivmodqi4 ; do the unsigned div/mod
1379 brtc __divmodqi4_1
1380 neg r_rem ; correct remainder sign
1381 __divmodqi4_1:
1382 sbrc __tmp_reg__,7
1383 neg r_arg1 ; correct result sign
1384 __divmodqi4_exit:
1385 ret
1386 ENDF __divmodqi4
1387 #endif /* defined (L_divmodqi4) */
1388
1389 #undef r_rem
1390 #undef r_arg1
1391 #undef r_arg2
1392 #undef r_cnt
1393
1394
1395 /*******************************************************
1396 Division 16 / 16 => (result + remainder)
1397 *******************************************************/
1398 #define r_remL r26 /* remainder Low */
1399 #define r_remH r27 /* remainder High */
1400
1401 /* return: remainder */
1402 #define r_arg1L r24 /* dividend Low */
1403 #define r_arg1H r25 /* dividend High */
1404
1405 /* return: quotient */
1406 #define r_arg2L r22 /* divisor Low */
1407 #define r_arg2H r23 /* divisor High */
1408
1409 #define r_cnt r21 /* loop count */
1410
1411 #if defined (L_udivmodhi4)
1412 DEFUN __udivmodhi4
1413 sub r_remL,r_remL
1414 sub r_remH,r_remH ; clear remainder and carry
1415 ldi r_cnt,17 ; init loop counter
1416 rjmp __udivmodhi4_ep ; jump to entry point
1417 __udivmodhi4_loop:
1418 rol r_remL ; shift dividend into remainder
1419 rol r_remH
1420 cp r_remL,r_arg2L ; compare remainder & divisor
1421 cpc r_remH,r_arg2H
1422 brcs __udivmodhi4_ep ; remainder < divisor
1423 sub r_remL,r_arg2L ; restore remainder
1424 sbc r_remH,r_arg2H
1425 __udivmodhi4_ep:
1426 rol r_arg1L ; shift dividend (with CARRY)
1427 rol r_arg1H
1428 dec r_cnt ; decrement loop counter
1429 brne __udivmodhi4_loop
1430 com r_arg1L
1431 com r_arg1H
1432 ; div/mod results to return registers, as for the div() function
1433 mov_l r_arg2L, r_arg1L ; quotient
1434 mov_h r_arg2H, r_arg1H
1435 mov_l r_arg1L, r_remL ; remainder
1436 mov_h r_arg1H, r_remH
1437 ret
1438 ENDF __udivmodhi4
1439 #endif /* defined (L_udivmodhi4) */
1440
1441 #if defined (L_divmodhi4)
1442 DEFUN __divmodhi4
1443 .global _div
1444 _div:
1445 bst r_arg1H,7 ; store sign of dividend
1446 mov __tmp_reg__,r_arg2H
1447 brtc 0f
1448 com __tmp_reg__ ; r0.7 is sign of result
1449 rcall __divmodhi4_neg1 ; dividend negative: negate
1450 0:
1451 sbrc r_arg2H,7
1452 rcall __divmodhi4_neg2 ; divisor negative: negate
1453 XCALL __udivmodhi4 ; do the unsigned div/mod
1454 sbrc __tmp_reg__,7
1455 rcall __divmodhi4_neg2 ; correct remainder sign
1456 brtc __divmodhi4_exit
1457 __divmodhi4_neg1:
1458 ;; correct dividend/remainder sign
1459 com r_arg1H
1460 neg r_arg1L
1461 sbci r_arg1H,0xff
1462 ret
1463 __divmodhi4_neg2:
1464 ;; correct divisor/result sign
1465 com r_arg2H
1466 neg r_arg2L
1467 sbci r_arg2H,0xff
1468 __divmodhi4_exit:
1469 ret
1470 ENDF __divmodhi4
1471 #endif /* defined (L_divmodhi4) */
1472
1473 #undef r_remH
1474 #undef r_remL
1475
1476 #undef r_arg1H
1477 #undef r_arg1L
1478
1479 #undef r_arg2H
1480 #undef r_arg2L
1481
1482 #undef r_cnt
1483
1484 /*******************************************************
1485 Division 24 / 24 => (result + remainder)
1486 *******************************************************/
1487
1488 ;; A[0..2]: In: Dividend; Out: Quotient
1489 #define A0 22
1490 #define A1 A0+1
1491 #define A2 A0+2
1492
1493 ;; B[0..2]: In: Divisor; Out: Remainder
1494 #define B0 18
1495 #define B1 B0+1
1496 #define B2 B0+2
1497
1498 ;; C[0..2]: Expand remainder
1499 #define C0 __zero_reg__
1500 #define C1 26
1501 #define C2 25
1502
1503 ;; Loop counter
1504 #define r_cnt 21
1505
1506 #if defined (L_udivmodpsi4)
1507 ;; R24:R22 = R24:R24 udiv R20:R18
1508 ;; R20:R18 = R24:R22 umod R20:R18
1509 ;; Clobbers: R21, R25, R26
1510
1511 DEFUN __udivmodpsi4
1512 ; init loop counter
1513 ldi r_cnt, 24+1
1514 ; Clear remainder and carry. C0 is already 0
1515 clr C1
1516 sub C2, C2
1517 ; jump to entry point
1518 rjmp __udivmodpsi4_start
1519 __udivmodpsi4_loop:
1520 ; shift dividend into remainder
1521 rol C0
1522 rol C1
1523 rol C2
1524 ; compare remainder & divisor
1525 cp C0, B0
1526 cpc C1, B1
1527 cpc C2, B2
1528 brcs __udivmodpsi4_start ; remainder <= divisor
1529 sub C0, B0 ; restore remainder
1530 sbc C1, B1
1531 sbc C2, B2
1532 __udivmodpsi4_start:
1533 ; shift dividend (with CARRY)
1534 rol A0
1535 rol A1
1536 rol A2
1537 ; decrement loop counter
1538 dec r_cnt
1539 brne __udivmodpsi4_loop
1540 com A0
1541 com A1
1542 com A2
1543 ; div/mod results to return registers
1544 ; remainder
1545 mov B0, C0
1546 mov B1, C1
1547 mov B2, C2
1548 clr __zero_reg__ ; C0
1549 ret
1550 ENDF __udivmodpsi4
1551 #endif /* defined (L_udivmodpsi4) */
1552
1553 #if defined (L_divmodpsi4)
1554 ;; R24:R22 = R24:R22 div R20:R18
1555 ;; R20:R18 = R24:R22 mod R20:R18
1556 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1557
1558 DEFUN __divmodpsi4
1559 ; R0.7 will contain the sign of the result:
1560 ; R0.7 = A.sign ^ B.sign
1561 mov __tmp_reg__, B2
1562 ; T-flag = sign of dividend
1563 bst A2, 7
1564 brtc 0f
1565 com __tmp_reg__
1566 ; Adjust dividend's sign
1567 rcall __divmodpsi4_negA
1568 0:
1569 ; Adjust divisor's sign
1570 sbrc B2, 7
1571 rcall __divmodpsi4_negB
1572
1573 ; Do the unsigned div/mod
1574 XCALL __udivmodpsi4
1575
1576 ; Adjust quotient's sign
1577 sbrc __tmp_reg__, 7
1578 rcall __divmodpsi4_negA
1579
1580 ; Adjust remainder's sign
1581 brtc __divmodpsi4_end
1582
1583 __divmodpsi4_negB:
1584 ; Correct divisor/remainder sign
1585 com B2
1586 com B1
1587 neg B0
1588 sbci B1, -1
1589 sbci B2, -1
1590 ret
1591
1592 ; Correct dividend/quotient sign
1593 __divmodpsi4_negA:
1594 com A2
1595 com A1
1596 neg A0
1597 sbci A1, -1
1598 sbci A2, -1
1599 __divmodpsi4_end:
1600 ret
1601
1602 ENDF __divmodpsi4
1603 #endif /* defined (L_divmodpsi4) */
1604
1605 #undef A0
1606 #undef A1
1607 #undef A2
1608
1609 #undef B0
1610 #undef B1
1611 #undef B2
1612
1613 #undef C0
1614 #undef C1
1615 #undef C2
1616
1617 #undef r_cnt
1618
1619 /*******************************************************
1620 Division 32 / 32 => (result + remainder)
1621 *******************************************************/
1622 #define r_remHH r31 /* remainder High */
1623 #define r_remHL r30
1624 #define r_remH r27
1625 #define r_remL r26 /* remainder Low */
1626
1627 /* return: remainder */
1628 #define r_arg1HH r25 /* dividend High */
1629 #define r_arg1HL r24
1630 #define r_arg1H r23
1631 #define r_arg1L r22 /* dividend Low */
1632
1633 /* return: quotient */
1634 #define r_arg2HH r21 /* divisor High */
1635 #define r_arg2HL r20
1636 #define r_arg2H r19
1637 #define r_arg2L r18 /* divisor Low */
1638
1639 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1640
1641 #if defined (L_udivmodsi4)
1642 DEFUN __udivmodsi4
1643 ldi r_remL, 33 ; init loop counter
1644 mov r_cnt, r_remL
1645 sub r_remL,r_remL
1646 sub r_remH,r_remH ; clear remainder and carry
1647 mov_l r_remHL, r_remL
1648 mov_h r_remHH, r_remH
1649 rjmp __udivmodsi4_ep ; jump to entry point
1650 __udivmodsi4_loop:
1651 rol r_remL ; shift dividend into remainder
1652 rol r_remH
1653 rol r_remHL
1654 rol r_remHH
1655 cp r_remL,r_arg2L ; compare remainder & divisor
1656 cpc r_remH,r_arg2H
1657 cpc r_remHL,r_arg2HL
1658 cpc r_remHH,r_arg2HH
1659 brcs __udivmodsi4_ep ; remainder <= divisor
1660 sub r_remL,r_arg2L ; restore remainder
1661 sbc r_remH,r_arg2H
1662 sbc r_remHL,r_arg2HL
1663 sbc r_remHH,r_arg2HH
1664 __udivmodsi4_ep:
1665 rol r_arg1L ; shift dividend (with CARRY)
1666 rol r_arg1H
1667 rol r_arg1HL
1668 rol r_arg1HH
1669 dec r_cnt ; decrement loop counter
1670 brne __udivmodsi4_loop
1671 ; __zero_reg__ now restored (r_cnt == 0)
1672 com r_arg1L
1673 com r_arg1H
1674 com r_arg1HL
1675 com r_arg1HH
1676 ; div/mod results to return registers, as for the ldiv() function
1677 mov_l r_arg2L, r_arg1L ; quotient
1678 mov_h r_arg2H, r_arg1H
1679 mov_l r_arg2HL, r_arg1HL
1680 mov_h r_arg2HH, r_arg1HH
1681 mov_l r_arg1L, r_remL ; remainder
1682 mov_h r_arg1H, r_remH
1683 mov_l r_arg1HL, r_remHL
1684 mov_h r_arg1HH, r_remHH
1685 ret
1686 ENDF __udivmodsi4
1687 #endif /* defined (L_udivmodsi4) */
1688
1689 #if defined (L_divmodsi4)
1690 DEFUN __divmodsi4
1691 mov __tmp_reg__,r_arg2HH
1692 bst r_arg1HH,7 ; store sign of dividend
1693 brtc 0f
1694 com __tmp_reg__ ; r0.7 is sign of result
1695 XCALL __negsi2 ; dividend negative: negate
1696 0:
1697 sbrc r_arg2HH,7
1698 rcall __divmodsi4_neg2 ; divisor negative: negate
1699 XCALL __udivmodsi4 ; do the unsigned div/mod
1700 sbrc __tmp_reg__, 7 ; correct quotient sign
1701 rcall __divmodsi4_neg2
1702 brtc __divmodsi4_exit ; correct remainder sign
1703 XJMP __negsi2
1704 __divmodsi4_neg2:
1705 ;; correct divisor/quotient sign
1706 com r_arg2HH
1707 com r_arg2HL
1708 com r_arg2H
1709 neg r_arg2L
1710 sbci r_arg2H,0xff
1711 sbci r_arg2HL,0xff
1712 sbci r_arg2HH,0xff
1713 __divmodsi4_exit:
1714 ret
1715 ENDF __divmodsi4
1716 #endif /* defined (L_divmodsi4) */
1717
1718 #if defined (L_negsi2)
1719 ;; (set (reg:SI 22)
1720 ;; (neg:SI (reg:SI 22)))
1721 ;; Sets the V flag for signed overflow tests
1722 DEFUN __negsi2
1723 NEG4 22
1724 ret
1725 ENDF __negsi2
1726 #endif /* L_negsi2 */
1727
1728 #undef r_remHH
1729 #undef r_remHL
1730 #undef r_remH
1731 #undef r_remL
1732 #undef r_arg1HH
1733 #undef r_arg1HL
1734 #undef r_arg1H
1735 #undef r_arg1L
1736 #undef r_arg2HH
1737 #undef r_arg2HL
1738 #undef r_arg2H
1739 #undef r_arg2L
1740 #undef r_cnt
1741
1742 /* *di routines use registers below R19 and won't work with tiny arch
1743 right now. */
1744
1745 #if !defined (__AVR_TINY__)
1746 /*******************************************************
1747 Division 64 / 64
1748 Modulo 64 % 64
1749 *******************************************************/
1750
1751 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1752 ;; at least 16k of Program Memory. For smaller Devices, depend
1753 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1754 ;; Flash Size so that SP Size can be used to test for Flash Size.
1755
1756 #if defined (__AVR_HAVE_JMP_CALL__)
1757 # define SPEED_DIV 8
1758 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1759 # define SPEED_DIV 16
1760 #else
1761 # define SPEED_DIV 0
1762 #endif
1763
1764 ;; A[0..7]: In: Dividend;
1765 ;; Out: Quotient (T = 0)
1766 ;; Out: Remainder (T = 1)
1767 #define A0 18
1768 #define A1 A0+1
1769 #define A2 A0+2
1770 #define A3 A0+3
1771 #define A4 A0+4
1772 #define A5 A0+5
1773 #define A6 A0+6
1774 #define A7 A0+7
1775
1776 ;; B[0..7]: In: Divisor; Out: Clobber
1777 #define B0 10
1778 #define B1 B0+1
1779 #define B2 B0+2
1780 #define B3 B0+3
1781 #define B4 B0+4
1782 #define B5 B0+5
1783 #define B6 B0+6
1784 #define B7 B0+7
1785
1786 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1787 #define C0 8
1788 #define C1 C0+1
1789 #define C2 30
1790 #define C3 C2+1
1791 #define C4 28
1792 #define C5 C4+1
1793 #define C6 26
1794 #define C7 C6+1
1795
1796 ;; Holds Signs during Division Routine
1797 #define SS __tmp_reg__
1798
1799 ;; Bit-Counter in Division Routine
1800 #define R_cnt __zero_reg__
1801
1802 ;; Scratch Register for Negation
1803 #define NN r31
1804
1805 #if defined (L_udivdi3)
1806
1807 ;; R25:R18 = R24:R18 umod R17:R10
1808 ;; Ordinary ABI-Function
1809
1810 DEFUN __umoddi3
1811 set
1812 rjmp __udivdi3_umoddi3
1813 ENDF __umoddi3
1814
1815 ;; R25:R18 = R24:R18 udiv R17:R10
1816 ;; Ordinary ABI-Function
1817
1818 DEFUN __udivdi3
1819 clt
1820 ENDF __udivdi3
1821
1822 DEFUN __udivdi3_umoddi3
1823 push C0
1824 push C1
1825 push C4
1826 push C5
1827 XCALL __udivmod64
1828 pop C5
1829 pop C4
1830 pop C1
1831 pop C0
1832 ret
1833 ENDF __udivdi3_umoddi3
1834 #endif /* L_udivdi3 */
1835
1836 #if defined (L_udivmod64)
1837
1838 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1839 ;; No Registers saved/restored; the Callers will take Care.
1840 ;; Preserves B[] and T-flag
1841 ;; T = 0: Compute Quotient in A[]
1842 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1843
1844 DEFUN __udivmod64
1845
1846 ;; Clear Remainder (C6, C7 will follow)
1847 clr C0
1848 clr C1
1849 wmov C2, C0
1850 wmov C4, C0
1851 ldi C7, 64
1852
1853 #if SPEED_DIV == 0 || SPEED_DIV == 16
1854 ;; Initialize Loop-Counter
1855 mov R_cnt, C7
1856 wmov C6, C0
1857 #endif /* SPEED_DIV */
1858
1859 #if SPEED_DIV == 8
1860
1861 push A7
1862 clr C6
1863
1864 1: ;; Compare shifted Devidend against Divisor
1865 ;; If -- even after Shifting -- it is smaller...
1866 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1867 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1868 brcc 2f
1869
1870 ;; ...then we can subtract it. Thus, it is legal to shift left
1871 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1872 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1873 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1874 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1875
1876 ;; 8 Bits are done
1877 subi C7, 8
1878 brne 1b
1879
1880 ;; Shifted 64 Bits: A7 has traveled to C7
1881 pop C7
1882 ;; Divisor is greater than Dividend. We have:
1883 ;; A[] % B[] = A[]
1884 ;; A[] / B[] = 0
1885 ;; Thus, we can return immediately
1886 rjmp 5f
1887
1888 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1889 mov R_cnt, C7
1890
1891 ;; Push of A7 is not needed because C7 is still 0
1892 pop C7
1893 clr C7
1894
1895 #elif SPEED_DIV == 16
1896
1897 ;; Compare shifted Dividend against Divisor
1898 cp A7, B3
1899 cpc C0, B4
1900 cpc C1, B5
1901 cpc C2, B6
1902 cpc C3, B7
1903 brcc 2f
1904
1905 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1906 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1907 wmov C2,A6 $ wmov C0,A4
1908 wmov A6,A2 $ wmov A4,A0
1909 wmov A2,C6 $ wmov A0,C4
1910
1911 ;; Set Bit Counter to 32
1912 lsr R_cnt
1913 2:
1914 #elif SPEED_DIV
1915 #error SPEED_DIV = ?
1916 #endif /* SPEED_DIV */
1917
1918 ;; The very Division + Remainder Routine
1919
1920 3: ;; Left-shift Dividend...
1921 lsl A0 $ rol A1 $ rol A2 $ rol A3
1922 rol A4 $ rol A5 $ rol A6 $ rol A7
1923
1924 ;; ...into Remainder
1925 rol C0 $ rol C1 $ rol C2 $ rol C3
1926 rol C4 $ rol C5 $ rol C6 $ rol C7
1927
1928 ;; Compare Remainder and Divisor
1929 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1930 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1931
1932 brcs 4f
1933
1934 ;; Divisor fits into Remainder: Subtract it from Remainder...
1935 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1936 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1937
1938 ;; ...and set according Bit in the upcoming Quotient
1939 ;; The Bit will travel to its final Position
1940 ori A0, 1
1941
1942 4: ;; This Bit is done
1943 dec R_cnt
1944 brne 3b
1945 ;; __zero_reg__ is 0 again
1946
1947 ;; T = 0: We are fine with the Quotient in A[]
1948 ;; T = 1: Copy Remainder to A[]
1949 5: brtc 6f
1950 wmov A0, C0
1951 wmov A2, C2
1952 wmov A4, C4
1953 wmov A6, C6
1954 ;; Move the Sign of the Result to SS.7
1955 lsl SS
1956
1957 6: ret
1958
1959 ENDF __udivmod64
1960 #endif /* L_udivmod64 */
1961
1962
1963 #if defined (L_divdi3)
1964
1965 ;; R25:R18 = R24:R18 mod R17:R10
1966 ;; Ordinary ABI-Function
1967
1968 DEFUN __moddi3
1969 set
1970 rjmp __divdi3_moddi3
1971 ENDF __moddi3
1972
1973 ;; R25:R18 = R24:R18 div R17:R10
1974 ;; Ordinary ABI-Function
1975
1976 DEFUN __divdi3
1977 clt
1978 ENDF __divdi3
1979
1980 DEFUN __divdi3_moddi3
1981 #if SPEED_DIV
1982 mov r31, A7
1983 or r31, B7
1984 brmi 0f
1985 ;; Both Signs are 0: the following Complexitiy is not needed
1986 XJMP __udivdi3_umoddi3
1987 #endif /* SPEED_DIV */
1988
1989 0: ;; The Prologue
1990 ;; Save 12 Registers: Y, 17...8
1991 ;; No Frame needed
1992 do_prologue_saves 12
1993
1994 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1995 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1996 mov SS, A7
1997 asr SS
1998 ;; Adjust Dividend's Sign as needed
1999 #if SPEED_DIV
2000 ;; Compiling for Speed we know that at least one Sign must be < 0
2001 ;; Thus, if A[] >= 0 then we know B[] < 0
2002 brpl 22f
2003 #else
2004 brpl 21f
2005 #endif /* SPEED_DIV */
2006
2007 XCALL __negdi2
2008
2009 ;; Adjust Divisor's Sign and SS.7 as needed
2010 21: tst B7
2011 brpl 3f
2012 22: ldi NN, 1 << 7
2013 eor SS, NN
2014
2015 ldi NN, -1
2016 com B4 $ com B5 $ com B6 $ com B7
2017 $ com B1 $ com B2 $ com B3
2018 NEG B0
2019 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2020 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2021
2022 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2023 XCALL __udivmod64
2024
2025 ;; Adjust Result's Sign
2026 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2027 tst SS
2028 brpl 4f
2029 #else
2030 sbrc SS, 7
2031 #endif /* __AVR_HAVE_JMP_CALL__ */
2032 XCALL __negdi2
2033
2034 4: ;; Epilogue: Restore 12 Registers and return
2035 do_epilogue_restores 12
2036
2037 ENDF __divdi3_moddi3
2038
2039 #endif /* L_divdi3 */
2040
2041 #undef R_cnt
2042 #undef SS
2043 #undef NN
2044
2045 .section .text.libgcc, "ax", @progbits
2046
2047 #define TT __tmp_reg__
2048
2049 #if defined (L_adddi3)
2050 ;; (set (reg:DI 18)
2051 ;; (plus:DI (reg:DI 18)
2052 ;; (reg:DI 10)))
2053 ;; Sets the V flag for signed overflow tests
2054 ;; Sets the C flag for unsigned overflow tests
2055 DEFUN __adddi3
2056 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2057 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2058 ret
2059 ENDF __adddi3
2060 #endif /* L_adddi3 */
2061
2062 #if defined (L_adddi3_s8)
2063 ;; (set (reg:DI 18)
2064 ;; (plus:DI (reg:DI 18)
2065 ;; (sign_extend:SI (reg:QI 26))))
2066 ;; Sets the V flag for signed overflow tests
2067 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2068 DEFUN __adddi3_s8
2069 clr TT
2070 sbrc r26, 7
2071 com TT
2072 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2073 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2074 ret
2075 ENDF __adddi3_s8
2076 #endif /* L_adddi3_s8 */
2077
2078 #if defined (L_subdi3)
2079 ;; (set (reg:DI 18)
2080 ;; (minus:DI (reg:DI 18)
2081 ;; (reg:DI 10)))
2082 ;; Sets the V flag for signed overflow tests
2083 ;; Sets the C flag for unsigned overflow tests
2084 DEFUN __subdi3
2085 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2086 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2087 ret
2088 ENDF __subdi3
2089 #endif /* L_subdi3 */
2090
2091 #if defined (L_cmpdi2)
2092 ;; (set (cc0)
2093 ;; (compare (reg:DI 18)
2094 ;; (reg:DI 10)))
2095 DEFUN __cmpdi2
2096 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2097 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2098 ret
2099 ENDF __cmpdi2
2100 #endif /* L_cmpdi2 */
2101
2102 #if defined (L_cmpdi2_s8)
2103 ;; (set (cc0)
2104 ;; (compare (reg:DI 18)
2105 ;; (sign_extend:SI (reg:QI 26))))
2106 DEFUN __cmpdi2_s8
2107 clr TT
2108 sbrc r26, 7
2109 com TT
2110 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2111 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2112 ret
2113 ENDF __cmpdi2_s8
2114 #endif /* L_cmpdi2_s8 */
2115
2116 #if defined (L_negdi2)
2117 ;; (set (reg:DI 18)
2118 ;; (neg:DI (reg:DI 18)))
2119 ;; Sets the V flag for signed overflow tests
2120 DEFUN __negdi2
2121
2122 com A4 $ com A5 $ com A6 $ com A7
2123 $ com A1 $ com A2 $ com A3
2124 NEG A0
2125 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2126 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2127 ret
2128
2129 ENDF __negdi2
2130 #endif /* L_negdi2 */
2131
2132 #undef TT
2133
2134 #undef C7
2135 #undef C6
2136 #undef C5
2137 #undef C4
2138 #undef C3
2139 #undef C2
2140 #undef C1
2141 #undef C0
2142
2143 #undef B7
2144 #undef B6
2145 #undef B5
2146 #undef B4
2147 #undef B3
2148 #undef B2
2149 #undef B1
2150 #undef B0
2151
2152 #undef A7
2153 #undef A6
2154 #undef A5
2155 #undef A4
2156 #undef A3
2157 #undef A2
2158 #undef A1
2159 #undef A0
2160
2161 #endif /* !defined (__AVR_TINY__) */
2162
2163
2164 .section .text.libgcc.prologue, "ax", @progbits
2166
2167 /**********************************
2168 * This is a prologue subroutine
2169 **********************************/
2170 #if !defined (__AVR_TINY__)
2171 #if defined (L_prologue)
2172
2173 ;; This function does not clobber T-flag; 64-bit division relies on it
2174 DEFUN __prologue_saves__
2175 push r2
2176 push r3
2177 push r4
2178 push r5
2179 push r6
2180 push r7
2181 push r8
2182 push r9
2183 push r10
2184 push r11
2185 push r12
2186 push r13
2187 push r14
2188 push r15
2189 push r16
2190 push r17
2191 push r28
2192 push r29
2193 #if !defined (__AVR_HAVE_SPH__)
2194 in r28,__SP_L__
2195 sub r28,r26
2196 out __SP_L__,r28
2197 clr r29
2198 #elif defined (__AVR_XMEGA__)
2199 in r28,__SP_L__
2200 in r29,__SP_H__
2201 sub r28,r26
2202 sbc r29,r27
2203 out __SP_L__,r28
2204 out __SP_H__,r29
2205 #else
2206 in r28,__SP_L__
2207 in r29,__SP_H__
2208 sub r28,r26
2209 sbc r29,r27
2210 in __tmp_reg__,__SREG__
2211 cli
2212 out __SP_H__,r29
2213 out __SREG__,__tmp_reg__
2214 out __SP_L__,r28
2215 #endif /* #SP = 8/16 */
2216
2217 XIJMP
2218
2219 ENDF __prologue_saves__
2220 #endif /* defined (L_prologue) */
2221
2222 /*
2223 * This is an epilogue subroutine
2224 */
2225 #if defined (L_epilogue)
2226
2227 DEFUN __epilogue_restores__
2228 ldd r2,Y+18
2229 ldd r3,Y+17
2230 ldd r4,Y+16
2231 ldd r5,Y+15
2232 ldd r6,Y+14
2233 ldd r7,Y+13
2234 ldd r8,Y+12
2235 ldd r9,Y+11
2236 ldd r10,Y+10
2237 ldd r11,Y+9
2238 ldd r12,Y+8
2239 ldd r13,Y+7
2240 ldd r14,Y+6
2241 ldd r15,Y+5
2242 ldd r16,Y+4
2243 ldd r17,Y+3
2244 ldd r26,Y+2
2245 #if !defined (__AVR_HAVE_SPH__)
2246 ldd r29,Y+1
2247 add r28,r30
2248 out __SP_L__,r28
2249 mov r28, r26
2250 #elif defined (__AVR_XMEGA__)
2251 ldd r27,Y+1
2252 add r28,r30
2253 adc r29,__zero_reg__
2254 out __SP_L__,r28
2255 out __SP_H__,r29
2256 wmov 28, 26
2257 #else
2258 ldd r27,Y+1
2259 add r28,r30
2260 adc r29,__zero_reg__
2261 in __tmp_reg__,__SREG__
2262 cli
2263 out __SP_H__,r29
2264 out __SREG__,__tmp_reg__
2265 out __SP_L__,r28
2266 mov_l r28, r26
2267 mov_h r29, r27
2268 #endif /* #SP = 8/16 */
2269 ret
2270 ENDF __epilogue_restores__
2271 #endif /* defined (L_epilogue) */
2272 #endif /* !defined (__AVR_TINY__) */
2273
2274 #ifdef L_exit
2275 .section .fini9,"ax",@progbits
2276 DEFUN _exit
2277 .weak exit
2278 exit:
2279 ENDF _exit
2280
2281 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2282
2283 .section .fini0,"ax",@progbits
2284 cli
2285 __stop_program:
2286 rjmp __stop_program
2287 #endif /* defined (L_exit) */
2288
2289 #ifdef L_cleanup
2290 .weak _cleanup
2291 .func _cleanup
2292 _cleanup:
2293 ret
2294 .endfunc
2295 #endif /* defined (L_cleanup) */
2296
2297
2298 .section .text.libgcc, "ax", @progbits
2300
2301 #ifdef L_tablejump2
2302 DEFUN __tablejump2__
2303 lsl r30
2304 rol r31
2305 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2306 ;; Word address of gs() jumptable entry in R24:Z
2307 rol r24
2308 out __RAMPZ__, r24
2309 #elif defined (__AVR_HAVE_ELPM__)
2310 ;; Word address of jumptable entry in Z
2311 clr __tmp_reg__
2312 rol __tmp_reg__
2313 out __RAMPZ__, __tmp_reg__
2314 #endif
2315
2316 ;; Read word address from jumptable and jump
2317
2318 #if defined (__AVR_HAVE_ELPMX__)
2319 elpm __tmp_reg__, Z+
2320 elpm r31, Z
2321 mov r30, __tmp_reg__
2322 #ifdef __AVR_HAVE_RAMPD__
2323 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2324 out __RAMPZ__, __zero_reg__
2325 #endif /* RAMPD */
2326 XIJMP
2327 #elif defined (__AVR_HAVE_ELPM__)
2328 elpm
2329 push r0
2330 adiw r30, 1
2331 elpm
2332 push r0
2333 ret
2334 #elif defined (__AVR_HAVE_LPMX__)
2335 lpm __tmp_reg__, Z+
2336 lpm r31, Z
2337 mov r30, __tmp_reg__
2338 ijmp
2339 #elif defined (__AVR_TINY__)
2340 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2341 ld __tmp_reg__, Z+
2342 ld r31, Z ; Use ld instead of lpm to load Z
2343 mov r30, __tmp_reg__
2344 ijmp
2345 #else
2346 lpm
2347 push r0
2348 adiw r30, 1
2349 lpm
2350 push r0
2351 ret
2352 #endif
2353 ENDF __tablejump2__
2354 #endif /* L_tablejump2 */
2355
2356 #if defined(__AVR_TINY__)
2357 #ifdef L_copy_data
2358 .section .init4,"ax",@progbits
2359 .global __do_copy_data
2360 __do_copy_data:
2361 ldi r18, hi8(__data_end)
2362 ldi r26, lo8(__data_start)
2363 ldi r27, hi8(__data_start)
2364 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2365 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2366 rjmp .L__do_copy_data_start
2367 .L__do_copy_data_loop:
2368 ld r19, z+
2369 st X+, r19
2370 .L__do_copy_data_start:
2371 cpi r26, lo8(__data_end)
2372 cpc r27, r18
2373 brne .L__do_copy_data_loop
2374 #endif
2375 #else
2376 #ifdef L_copy_data
2377 .section .init4,"ax",@progbits
2378 DEFUN __do_copy_data
2379 #if defined(__AVR_HAVE_ELPMX__)
2380 ldi r17, hi8(__data_end)
2381 ldi r26, lo8(__data_start)
2382 ldi r27, hi8(__data_start)
2383 ldi r30, lo8(__data_load_start)
2384 ldi r31, hi8(__data_load_start)
2385 ldi r16, hh8(__data_load_start)
2386 out __RAMPZ__, r16
2387 rjmp .L__do_copy_data_start
2388 .L__do_copy_data_loop:
2389 elpm r0, Z+
2390 st X+, r0
2391 .L__do_copy_data_start:
2392 cpi r26, lo8(__data_end)
2393 cpc r27, r17
2394 brne .L__do_copy_data_loop
2395 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2396 ldi r17, hi8(__data_end)
2397 ldi r26, lo8(__data_start)
2398 ldi r27, hi8(__data_start)
2399 ldi r30, lo8(__data_load_start)
2400 ldi r31, hi8(__data_load_start)
2401 ldi r16, hh8(__data_load_start - 0x10000)
2402 .L__do_copy_data_carry:
2403 inc r16
2404 out __RAMPZ__, r16
2405 rjmp .L__do_copy_data_start
2406 .L__do_copy_data_loop:
2407 elpm
2408 st X+, r0
2409 adiw r30, 1
2410 brcs .L__do_copy_data_carry
2411 .L__do_copy_data_start:
2412 cpi r26, lo8(__data_end)
2413 cpc r27, r17
2414 brne .L__do_copy_data_loop
2415 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2416 ldi r17, hi8(__data_end)
2417 ldi r26, lo8(__data_start)
2418 ldi r27, hi8(__data_start)
2419 ldi r30, lo8(__data_load_start)
2420 ldi r31, hi8(__data_load_start)
2421 rjmp .L__do_copy_data_start
2422 .L__do_copy_data_loop:
2423 #if defined (__AVR_HAVE_LPMX__)
2424 lpm r0, Z+
2425 #else
2426 lpm
2427 adiw r30, 1
2428 #endif
2429 st X+, r0
2430 .L__do_copy_data_start:
2431 cpi r26, lo8(__data_end)
2432 cpc r27, r17
2433 brne .L__do_copy_data_loop
2434 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2435 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2436 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2437 out __RAMPZ__, __zero_reg__
2438 #endif /* ELPM && RAMPD */
2439 ENDF __do_copy_data
2440 #endif /* L_copy_data */
2441 #endif /* !defined (__AVR_TINY__) */
2442
2443 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2444
2445 #ifdef L_clear_bss
2446 .section .init4,"ax",@progbits
2447 DEFUN __do_clear_bss
2448 ldi r18, hi8(__bss_end)
2449 ldi r26, lo8(__bss_start)
2450 ldi r27, hi8(__bss_start)
2451 rjmp .do_clear_bss_start
2452 .do_clear_bss_loop:
2453 st X+, __zero_reg__
2454 .do_clear_bss_start:
2455 cpi r26, lo8(__bss_end)
2456 cpc r27, r18
2457 brne .do_clear_bss_loop
2458 ENDF __do_clear_bss
2459 #endif /* L_clear_bss */
2460
2461 /* __do_global_ctors and __do_global_dtors are only necessary
2462 if there are any constructors/destructors. */
2463
2464 #if defined(__AVR_TINY__)
2465 #define cdtors_tst_reg r18
2466 #else
2467 #define cdtors_tst_reg r17
2468 #endif
2469
2470 #ifdef L_ctors
2471 .section .init6,"ax",@progbits
2472 DEFUN __do_global_ctors
2473 ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2474 ldi r28, pm_lo8(__ctors_end)
2475 ldi r29, pm_hi8(__ctors_end)
2476 #ifdef __AVR_HAVE_EIJMP_EICALL__
2477 ldi r16, pm_hh8(__ctors_end)
2478 #endif /* HAVE_EIJMP */
2479 rjmp .L__do_global_ctors_start
2480 .L__do_global_ctors_loop:
2481 wsubi 28, 1
2482 #ifdef __AVR_HAVE_EIJMP_EICALL__
2483 sbc r16, __zero_reg__
2484 mov r24, r16
2485 #endif /* HAVE_EIJMP */
2486 mov_h r31, r29
2487 mov_l r30, r28
2488 XCALL __tablejump2__
2489 .L__do_global_ctors_start:
2490 cpi r28, pm_lo8(__ctors_start)
2491 cpc r29, cdtors_tst_reg
2492 #ifdef __AVR_HAVE_EIJMP_EICALL__
2493 ldi r24, pm_hh8(__ctors_start)
2494 cpc r16, r24
2495 #endif /* HAVE_EIJMP */
2496 brne .L__do_global_ctors_loop
2497 ENDF __do_global_ctors
2498 #endif /* L_ctors */
2499
2500 #ifdef L_dtors
2501 .section .fini6,"ax",@progbits
2502 DEFUN __do_global_dtors
2503 ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2504 ldi r28, pm_lo8(__dtors_start)
2505 ldi r29, pm_hi8(__dtors_start)
2506 #ifdef __AVR_HAVE_EIJMP_EICALL__
2507 ldi r16, pm_hh8(__dtors_start)
2508 #endif /* HAVE_EIJMP */
2509 rjmp .L__do_global_dtors_start
2510 .L__do_global_dtors_loop:
2511 #ifdef __AVR_HAVE_EIJMP_EICALL__
2512 mov r24, r16
2513 #endif /* HAVE_EIJMP */
2514 mov_h r31, r29
2515 mov_l r30, r28
2516 XCALL __tablejump2__
2517 waddi 28, 1
2518 #ifdef __AVR_HAVE_EIJMP_EICALL__
2519 adc r16, __zero_reg__
2520 #endif /* HAVE_EIJMP */
2521 .L__do_global_dtors_start:
2522 cpi r28, pm_lo8(__dtors_end)
2523 cpc r29, cdtors_tst_reg
2524 #ifdef __AVR_HAVE_EIJMP_EICALL__
2525 ldi r24, pm_hh8(__dtors_end)
2526 cpc r16, r24
2527 #endif /* HAVE_EIJMP */
2528 brne .L__do_global_dtors_loop
2529 ENDF __do_global_dtors
2530 #endif /* L_dtors */
2531
2532 #undef cdtors_tst_reg
2533
2534 .section .text.libgcc, "ax", @progbits
2535
2536 #if !defined (__AVR_TINY__)
2537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2538 ;; Loading n bytes from Flash; n = 3,4
2539 ;; R22... = Flash[Z]
2540 ;; Clobbers: __tmp_reg__
2541
2542 #if (defined (L_load_3) \
2543 || defined (L_load_4)) \
2544 && !defined (__AVR_HAVE_LPMX__)
2545
2546 ;; Destination
2547 #define D0 22
2548 #define D1 D0+1
2549 #define D2 D0+2
2550 #define D3 D0+3
2551
2552 .macro .load dest, n
2553 lpm
2554 mov \dest, r0
2555 .if \dest != D0+\n-1
2556 adiw r30, 1
2557 .else
2558 sbiw r30, \n-1
2559 .endif
2560 .endm
2561
2562 #if defined (L_load_3)
2563 DEFUN __load_3
2564 push D3
2565 XCALL __load_4
2566 pop D3
2567 ret
2568 ENDF __load_3
2569 #endif /* L_load_3 */
2570
2571 #if defined (L_load_4)
2572 DEFUN __load_4
2573 .load D0, 4
2574 .load D1, 4
2575 .load D2, 4
2576 .load D3, 4
2577 ret
2578 ENDF __load_4
2579 #endif /* L_load_4 */
2580
2581 #endif /* L_load_3 || L_load_3 */
2582 #endif /* !defined (__AVR_TINY__) */
2583
2584 #if !defined (__AVR_TINY__)
2585 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2586 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2587 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2588 ;; Clobbers: __tmp_reg__, R21, R30, R31
2589
2590 #if (defined (L_xload_1) \
2591 || defined (L_xload_2) \
2592 || defined (L_xload_3) \
2593 || defined (L_xload_4))
2594
2595 ;; Destination
2596 #define D0 22
2597 #define D1 D0+1
2598 #define D2 D0+2
2599 #define D3 D0+3
2600
2601 ;; Register containing bits 16+ of the address
2602
2603 #define HHI8 21
2604
2605 .macro .xload dest, n
2606 #if defined (__AVR_HAVE_ELPMX__)
2607 elpm \dest, Z+
2608 #elif defined (__AVR_HAVE_ELPM__)
2609 elpm
2610 mov \dest, r0
2611 .if \dest != D0+\n-1
2612 adiw r30, 1
2613 adc HHI8, __zero_reg__
2614 out __RAMPZ__, HHI8
2615 .endif
2616 #elif defined (__AVR_HAVE_LPMX__)
2617 lpm \dest, Z+
2618 #else
2619 lpm
2620 mov \dest, r0
2621 .if \dest != D0+\n-1
2622 adiw r30, 1
2623 .endif
2624 #endif
2625 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2626 .if \dest == D0+\n-1
2627 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2628 out __RAMPZ__, __zero_reg__
2629 .endif
2630 #endif
2631 .endm ; .xload
2632
2633 #if defined (L_xload_1)
2634 DEFUN __xload_1
2635 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2636 sbrc HHI8, 7
2637 ld D0, Z
2638 sbrs HHI8, 7
2639 lpm D0, Z
2640 ret
2641 #else
2642 sbrc HHI8, 7
2643 rjmp 1f
2644 #if defined (__AVR_HAVE_ELPM__)
2645 out __RAMPZ__, HHI8
2646 #endif /* __AVR_HAVE_ELPM__ */
2647 .xload D0, 1
2648 ret
2649 1: ld D0, Z
2650 ret
2651 #endif /* LPMx && ! ELPM */
2652 ENDF __xload_1
2653 #endif /* L_xload_1 */
2654
2655 #if defined (L_xload_2)
2656 DEFUN __xload_2
2657 sbrc HHI8, 7
2658 rjmp 1f
2659 #if defined (__AVR_HAVE_ELPM__)
2660 out __RAMPZ__, HHI8
2661 #endif /* __AVR_HAVE_ELPM__ */
2662 .xload D0, 2
2663 .xload D1, 2
2664 ret
2665 1: ld D0, Z+
2666 ld D1, Z+
2667 ret
2668 ENDF __xload_2
2669 #endif /* L_xload_2 */
2670
2671 #if defined (L_xload_3)
2672 DEFUN __xload_3
2673 sbrc HHI8, 7
2674 rjmp 1f
2675 #if defined (__AVR_HAVE_ELPM__)
2676 out __RAMPZ__, HHI8
2677 #endif /* __AVR_HAVE_ELPM__ */
2678 .xload D0, 3
2679 .xload D1, 3
2680 .xload D2, 3
2681 ret
2682 1: ld D0, Z+
2683 ld D1, Z+
2684 ld D2, Z+
2685 ret
2686 ENDF __xload_3
2687 #endif /* L_xload_3 */
2688
2689 #if defined (L_xload_4)
2690 DEFUN __xload_4
2691 sbrc HHI8, 7
2692 rjmp 1f
2693 #if defined (__AVR_HAVE_ELPM__)
2694 out __RAMPZ__, HHI8
2695 #endif /* __AVR_HAVE_ELPM__ */
2696 .xload D0, 4
2697 .xload D1, 4
2698 .xload D2, 4
2699 .xload D3, 4
2700 ret
2701 1: ld D0, Z+
2702 ld D1, Z+
2703 ld D2, Z+
2704 ld D3, Z+
2705 ret
2706 ENDF __xload_4
2707 #endif /* L_xload_4 */
2708
2709 #endif /* L_xload_{1|2|3|4} */
2710 #endif /* if !defined (__AVR_TINY__) */
2711
2712 #if !defined (__AVR_TINY__)
2713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2714 ;; memcopy from Address Space __pgmx to RAM
2715 ;; R23:Z = Source Address
2716 ;; X = Destination Address
2717 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2718
2719 #if defined (L_movmemx)
2720
2721 #define HHI8 23
2722 #define LOOP 24
2723
2724 DEFUN __movmemx_qi
2725 ;; #Bytes to copy fity in 8 Bits (1..255)
2726 ;; Zero-extend Loop Counter
2727 clr LOOP+1
2728 ;; FALLTHRU
2729 ENDF __movmemx_qi
2730
2731 DEFUN __movmemx_hi
2732
2733 ;; Read from where?
2734 sbrc HHI8, 7
2735 rjmp 1f
2736
2737 ;; Read from Flash
2738
2739 #if defined (__AVR_HAVE_ELPM__)
2740 out __RAMPZ__, HHI8
2741 #endif
2742
2743 0: ;; Load 1 Byte from Flash...
2744
2745 #if defined (__AVR_HAVE_ELPMX__)
2746 elpm r0, Z+
2747 #elif defined (__AVR_HAVE_ELPM__)
2748 elpm
2749 adiw r30, 1
2750 adc HHI8, __zero_reg__
2751 out __RAMPZ__, HHI8
2752 #elif defined (__AVR_HAVE_LPMX__)
2753 lpm r0, Z+
2754 #else
2755 lpm
2756 adiw r30, 1
2757 #endif
2758
2759 ;; ...and store that Byte to RAM Destination
2760 st X+, r0
2761 sbiw LOOP, 1
2762 brne 0b
2763 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2764 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2765 out __RAMPZ__, __zero_reg__
2766 #endif /* ELPM && RAMPD */
2767 ret
2768
2769 ;; Read from RAM
2770
2771 1: ;; Read 1 Byte from RAM...
2772 ld r0, Z+
2773 ;; and store that Byte to RAM Destination
2774 st X+, r0
2775 sbiw LOOP, 1
2776 brne 1b
2777 ret
2778 ENDF __movmemx_hi
2779
2780 #undef HHI8
2781 #undef LOOP
2782
2783 #endif /* L_movmemx */
2784 #endif /* !defined (__AVR_TINY__) */
2785
2786
2787 .section .text.libgcc.builtins, "ax", @progbits
2789
2790 /**********************************
2791 * Find first set Bit (ffs)
2792 **********************************/
2793
2794 #if defined (L_ffssi2)
2795 ;; find first set bit
2796 ;; r25:r24 = ffs32 (r25:r22)
2797 ;; clobbers: r22, r26
2798 DEFUN __ffssi2
2799 clr r26
2800 tst r22
2801 brne 1f
2802 subi r26, -8
2803 or r22, r23
2804 brne 1f
2805 subi r26, -8
2806 or r22, r24
2807 brne 1f
2808 subi r26, -8
2809 or r22, r25
2810 brne 1f
2811 ret
2812 1: mov r24, r22
2813 XJMP __loop_ffsqi2
2814 ENDF __ffssi2
2815 #endif /* defined (L_ffssi2) */
2816
2817 #if defined (L_ffshi2)
2818 ;; find first set bit
2819 ;; r25:r24 = ffs16 (r25:r24)
2820 ;; clobbers: r26
2821 DEFUN __ffshi2
2822 clr r26
2823 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2824 ;; Some cores have problem skipping 2-word instruction
2825 tst r24
2826 breq 2f
2827 #else
2828 cpse r24, __zero_reg__
2829 #endif /* __AVR_HAVE_JMP_CALL__ */
2830 1: XJMP __loop_ffsqi2
2831 2: ldi r26, 8
2832 or r24, r25
2833 brne 1b
2834 ret
2835 ENDF __ffshi2
2836 #endif /* defined (L_ffshi2) */
2837
2838 #if defined (L_loop_ffsqi2)
2839 ;; Helper for ffshi2, ffssi2
2840 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2841 ;; r24 must be != 0
2842 ;; clobbers: r26
2843 DEFUN __loop_ffsqi2
2844 inc r26
2845 lsr r24
2846 brcc __loop_ffsqi2
2847 mov r24, r26
2848 clr r25
2849 ret
2850 ENDF __loop_ffsqi2
2851 #endif /* defined (L_loop_ffsqi2) */
2852
2853
2854 /**********************************
2856 * Count trailing Zeros (ctz)
2857 **********************************/
2858
2859 #if defined (L_ctzsi2)
2860 ;; count trailing zeros
2861 ;; r25:r24 = ctz32 (r25:r22)
2862 ;; clobbers: r26, r22
2863 ;; ctz(0) = 255
2864 ;; Note that ctz(0) in undefined for GCC
2865 DEFUN __ctzsi2
2866 XCALL __ffssi2
2867 dec r24
2868 ret
2869 ENDF __ctzsi2
2870 #endif /* defined (L_ctzsi2) */
2871
2872 #if defined (L_ctzhi2)
2873 ;; count trailing zeros
2874 ;; r25:r24 = ctz16 (r25:r24)
2875 ;; clobbers: r26
2876 ;; ctz(0) = 255
2877 ;; Note that ctz(0) in undefined for GCC
2878 DEFUN __ctzhi2
2879 XCALL __ffshi2
2880 dec r24
2881 ret
2882 ENDF __ctzhi2
2883 #endif /* defined (L_ctzhi2) */
2884
2885
2886 /**********************************
2888 * Count leading Zeros (clz)
2889 **********************************/
2890
2891 #if defined (L_clzdi2)
2892 ;; count leading zeros
2893 ;; r25:r24 = clz64 (r25:r18)
2894 ;; clobbers: r22, r23, r26
2895 DEFUN __clzdi2
2896 XCALL __clzsi2
2897 sbrs r24, 5
2898 ret
2899 mov_l r22, r18
2900 mov_h r23, r19
2901 mov_l r24, r20
2902 mov_h r25, r21
2903 XCALL __clzsi2
2904 subi r24, -32
2905 ret
2906 ENDF __clzdi2
2907 #endif /* defined (L_clzdi2) */
2908
2909 #if defined (L_clzsi2)
2910 ;; count leading zeros
2911 ;; r25:r24 = clz32 (r25:r22)
2912 ;; clobbers: r26
2913 DEFUN __clzsi2
2914 XCALL __clzhi2
2915 sbrs r24, 4
2916 ret
2917 mov_l r24, r22
2918 mov_h r25, r23
2919 XCALL __clzhi2
2920 subi r24, -16
2921 ret
2922 ENDF __clzsi2
2923 #endif /* defined (L_clzsi2) */
2924
2925 #if defined (L_clzhi2)
2926 ;; count leading zeros
2927 ;; r25:r24 = clz16 (r25:r24)
2928 ;; clobbers: r26
2929 DEFUN __clzhi2
2930 clr r26
2931 tst r25
2932 brne 1f
2933 subi r26, -8
2934 or r25, r24
2935 brne 1f
2936 ldi r24, 16
2937 ret
2938 1: cpi r25, 16
2939 brsh 3f
2940 subi r26, -3
2941 swap r25
2942 2: inc r26
2943 3: lsl r25
2944 brcc 2b
2945 mov r24, r26
2946 clr r25
2947 ret
2948 ENDF __clzhi2
2949 #endif /* defined (L_clzhi2) */
2950
2951
2952 /**********************************
2954 * Parity
2955 **********************************/
2956
2957 #if defined (L_paritydi2)
2958 ;; r25:r24 = parity64 (r25:r18)
2959 ;; clobbers: __tmp_reg__
2960 DEFUN __paritydi2
2961 eor r24, r18
2962 eor r24, r19
2963 eor r24, r20
2964 eor r24, r21
2965 XJMP __paritysi2
2966 ENDF __paritydi2
2967 #endif /* defined (L_paritydi2) */
2968
2969 #if defined (L_paritysi2)
2970 ;; r25:r24 = parity32 (r25:r22)
2971 ;; clobbers: __tmp_reg__
2972 DEFUN __paritysi2
2973 eor r24, r22
2974 eor r24, r23
2975 XJMP __parityhi2
2976 ENDF __paritysi2
2977 #endif /* defined (L_paritysi2) */
2978
2979 #if defined (L_parityhi2)
2980 ;; r25:r24 = parity16 (r25:r24)
2981 ;; clobbers: __tmp_reg__
2982 DEFUN __parityhi2
2983 eor r24, r25
2984 ;; FALLTHRU
2985 ENDF __parityhi2
2986
2987 ;; r25:r24 = parity8 (r24)
2988 ;; clobbers: __tmp_reg__
2989 DEFUN __parityqi2
2990 ;; parity is in r24[0..7]
2991 mov __tmp_reg__, r24
2992 swap __tmp_reg__
2993 eor r24, __tmp_reg__
2994 ;; parity is in r24[0..3]
2995 subi r24, -4
2996 andi r24, -5
2997 subi r24, -6
2998 ;; parity is in r24[0,3]
2999 sbrc r24, 3
3000 inc r24
3001 ;; parity is in r24[0]
3002 andi r24, 1
3003 clr r25
3004 ret
3005 ENDF __parityqi2
3006 #endif /* defined (L_parityhi2) */
3007
3008
3009 /**********************************
3011 * Population Count
3012 **********************************/
3013
3014 #if defined (L_popcounthi2)
3015 ;; population count
3016 ;; r25:r24 = popcount16 (r25:r24)
3017 ;; clobbers: __tmp_reg__
3018 DEFUN __popcounthi2
3019 XCALL __popcountqi2
3020 push r24
3021 mov r24, r25
3022 XCALL __popcountqi2
3023 clr r25
3024 ;; FALLTHRU
3025 ENDF __popcounthi2
3026
3027 DEFUN __popcounthi2_tail
3028 pop __tmp_reg__
3029 add r24, __tmp_reg__
3030 ret
3031 ENDF __popcounthi2_tail
3032 #endif /* defined (L_popcounthi2) */
3033
3034 #if defined (L_popcountsi2)
3035 ;; population count
3036 ;; r25:r24 = popcount32 (r25:r22)
3037 ;; clobbers: __tmp_reg__
3038 DEFUN __popcountsi2
3039 XCALL __popcounthi2
3040 push r24
3041 mov_l r24, r22
3042 mov_h r25, r23
3043 XCALL __popcounthi2
3044 XJMP __popcounthi2_tail
3045 ENDF __popcountsi2
3046 #endif /* defined (L_popcountsi2) */
3047
3048 #if defined (L_popcountdi2)
3049 ;; population count
3050 ;; r25:r24 = popcount64 (r25:r18)
3051 ;; clobbers: r22, r23, __tmp_reg__
3052 DEFUN __popcountdi2
3053 XCALL __popcountsi2
3054 push r24
3055 mov_l r22, r18
3056 mov_h r23, r19
3057 mov_l r24, r20
3058 mov_h r25, r21
3059 XCALL __popcountsi2
3060 XJMP __popcounthi2_tail
3061 ENDF __popcountdi2
3062 #endif /* defined (L_popcountdi2) */
3063
3064 #if defined (L_popcountqi2)
3065 ;; population count
3066 ;; r24 = popcount8 (r24)
3067 ;; clobbers: __tmp_reg__
3068 DEFUN __popcountqi2
3069 mov __tmp_reg__, r24
3070 andi r24, 1
3071 lsr __tmp_reg__
3072 lsr __tmp_reg__
3073 adc r24, __zero_reg__
3074 lsr __tmp_reg__
3075 adc r24, __zero_reg__
3076 lsr __tmp_reg__
3077 adc r24, __zero_reg__
3078 lsr __tmp_reg__
3079 adc r24, __zero_reg__
3080 lsr __tmp_reg__
3081 adc r24, __zero_reg__
3082 lsr __tmp_reg__
3083 adc r24, __tmp_reg__
3084 ret
3085 ENDF __popcountqi2
3086 #endif /* defined (L_popcountqi2) */
3087
3088
3089 /**********************************
3091 * Swap bytes
3092 **********************************/
3093
3094 ;; swap two registers with different register number
3095 .macro bswap a, b
3096 eor \a, \b
3097 eor \b, \a
3098 eor \a, \b
3099 .endm
3100
3101 #if defined (L_bswapsi2)
3102 ;; swap bytes
3103 ;; r25:r22 = bswap32 (r25:r22)
3104 DEFUN __bswapsi2
3105 bswap r22, r25
3106 bswap r23, r24
3107 ret
3108 ENDF __bswapsi2
3109 #endif /* defined (L_bswapsi2) */
3110
3111 #if defined (L_bswapdi2)
3112 ;; swap bytes
3113 ;; r25:r18 = bswap64 (r25:r18)
3114 DEFUN __bswapdi2
3115 bswap r18, r25
3116 bswap r19, r24
3117 bswap r20, r23
3118 bswap r21, r22
3119 ret
3120 ENDF __bswapdi2
3121 #endif /* defined (L_bswapdi2) */
3122
3123
3124 /**********************************
3126 * 64-bit shifts
3127 **********************************/
3128
3129 #if defined (L_ashrdi3)
3130 ;; Arithmetic shift right
3131 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3132 DEFUN __ashrdi3
3133 bst r25, 7
3134 bld __zero_reg__, 0
3135 ;; FALLTHRU
3136 ENDF __ashrdi3
3137
3138 ;; Logic shift right
3139 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3140 DEFUN __lshrdi3
3141 lsr __zero_reg__
3142 sbc __tmp_reg__, __tmp_reg__
3143 push r16
3144 0: cpi r16, 8
3145 brlo 2f
3146 subi r16, 8
3147 mov r18, r19
3148 mov r19, r20
3149 mov r20, r21
3150 mov r21, r22
3151 mov r22, r23
3152 mov r23, r24
3153 mov r24, r25
3154 mov r25, __tmp_reg__
3155 rjmp 0b
3156 1: asr __tmp_reg__
3157 ror r25
3158 ror r24
3159 ror r23
3160 ror r22
3161 ror r21
3162 ror r20
3163 ror r19
3164 ror r18
3165 2: dec r16
3166 brpl 1b
3167 pop r16
3168 ret
3169 ENDF __lshrdi3
3170 #endif /* defined (L_ashrdi3) */
3171
3172 #if defined (L_ashldi3)
3173 ;; Shift left
3174 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3175 DEFUN __ashldi3
3176 push r16
3177 0: cpi r16, 8
3178 brlo 2f
3179 mov r25, r24
3180 mov r24, r23
3181 mov r23, r22
3182 mov r22, r21
3183 mov r21, r20
3184 mov r20, r19
3185 mov r19, r18
3186 clr r18
3187 subi r16, 8
3188 rjmp 0b
3189 1: lsl r18
3190 rol r19
3191 rol r20
3192 rol r21
3193 rol r22
3194 rol r23
3195 rol r24
3196 rol r25
3197 2: dec r16
3198 brpl 1b
3199 pop r16
3200 ret
3201 ENDF __ashldi3
3202 #endif /* defined (L_ashldi3) */
3203
3204 #if defined (L_rotldi3)
3205 ;; Shift left
3206 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3207 DEFUN __rotldi3
3208 push r16
3209 0: cpi r16, 8
3210 brlo 2f
3211 subi r16, 8
3212 mov __tmp_reg__, r25
3213 mov r25, r24
3214 mov r24, r23
3215 mov r23, r22
3216 mov r22, r21
3217 mov r21, r20
3218 mov r20, r19
3219 mov r19, r18
3220 mov r18, __tmp_reg__
3221 rjmp 0b
3222 1: lsl r18
3223 rol r19
3224 rol r20
3225 rol r21
3226 rol r22
3227 rol r23
3228 rol r24
3229 rol r25
3230 adc r18, __zero_reg__
3231 2: dec r16
3232 brpl 1b
3233 pop r16
3234 ret
3235 ENDF __rotldi3
3236 #endif /* defined (L_rotldi3) */
3237
3238
3239 .section .text.libgcc.fmul, "ax", @progbits
3241
3242 /***********************************************************/
3243 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3244 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3245 /***********************************************************/
3246
3247 #define A1 24
3248 #define B1 25
3249 #define C0 22
3250 #define C1 23
3251 #define A0 __tmp_reg__
3252
3253 #ifdef L_fmuls
3254 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3255 ;;; Clobbers: r24, r25, __tmp_reg__
3256 DEFUN __fmuls
3257 ;; A0.7 = negate result?
3258 mov A0, A1
3259 eor A0, B1
3260 ;; B1 = |B1|
3261 sbrc B1, 7
3262 neg B1
3263 XJMP __fmulsu_exit
3264 ENDF __fmuls
3265 #endif /* L_fmuls */
3266
3267 #ifdef L_fmulsu
3268 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3269 ;;; Clobbers: r24, r25, __tmp_reg__
3270 DEFUN __fmulsu
3271 ;; A0.7 = negate result?
3272 mov A0, A1
3273 ;; FALLTHRU
3274 ENDF __fmulsu
3275
3276 ;; Helper for __fmuls and __fmulsu
3277 DEFUN __fmulsu_exit
3278 ;; A1 = |A1|
3279 sbrc A1, 7
3280 neg A1
3281 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3282 ;; Some cores have problem skipping 2-word instruction
3283 tst A0
3284 brmi 1f
3285 #else
3286 sbrs A0, 7
3287 #endif /* __AVR_HAVE_JMP_CALL__ */
3288 XJMP __fmul
3289 1: XCALL __fmul
3290 ;; C = -C iff A0.7 = 1
3291 NEG2 C0
3292 ret
3293 ENDF __fmulsu_exit
3294 #endif /* L_fmulsu */
3295
3296
3297 #ifdef L_fmul
3298 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3299 ;;; Clobbers: r24, r25, __tmp_reg__
3300 DEFUN __fmul
3301 ; clear result
3302 clr C0
3303 clr C1
3304 clr A0
3305 1: tst B1
3306 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3307 2: brpl 3f
3308 ;; C += A
3309 add C0, A0
3310 adc C1, A1
3311 3: ;; A >>= 1
3312 lsr A1
3313 ror A0
3314 ;; B <<= 1
3315 lsl B1
3316 brne 2b
3317 ret
3318 ENDF __fmul
3319 #endif /* L_fmul */
3320
3321 #undef A0
3322 #undef A1
3323 #undef B1
3324 #undef C0
3325 #undef C1
3326
3327 #include "lib1funcs-fixed.S"
3328