lib1funcs.S revision 1.1.1.7 1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2018 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov (at) gmail.com>
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #if defined (__AVR_TINY__)
25 #define __zero_reg__ r17
26 #define __tmp_reg__ r16
27 #else
28 #define __zero_reg__ r1
29 #define __tmp_reg__ r0
30 #endif
31 #define __SREG__ 0x3f
32 #if defined (__AVR_HAVE_SPH__)
33 #define __SP_H__ 0x3e
34 #endif
35 #define __SP_L__ 0x3d
36 #define __RAMPZ__ 0x3B
37 #define __EIND__ 0x3C
38
39 /* Most of the functions here are called directly from avr.md
40 patterns, instead of using the standard libcall mechanisms.
41 This can make better code because GCC knows exactly which
42 of the call-used registers (not all of them) are clobbered. */
43
44 /* FIXME: At present, there is no SORT directive in the linker
45 script so that we must not assume that different modules
46 in the same input section like .libgcc.text.mul will be
47 located close together. Therefore, we cannot use
48 RCALL/RJMP to call a function like __udivmodhi4 from
49 __divmodhi4 and have to use lengthy XCALL/XJMP even
50 though they are in the same input section and all same
51 input sections together are small enough to reach every
52 location with a RCALL/RJMP instruction. */
53
54 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 #error device not supported
56 #endif
57
58 .macro mov_l r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
60 movw \r_dest, \r_src
61 #else
62 mov \r_dest, \r_src
63 #endif
64 .endm
65
66 .macro mov_h r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
68 ; empty
69 #else
70 mov \r_dest, \r_src
71 #endif
72 .endm
73
74 .macro wmov r_dest, r_src
75 #if defined (__AVR_HAVE_MOVW__)
76 movw \r_dest, \r_src
77 #else
78 mov \r_dest, \r_src
79 mov \r_dest+1, \r_src+1
80 #endif
81 .endm
82
83 #if defined (__AVR_HAVE_JMP_CALL__)
84 #define XCALL call
85 #define XJMP jmp
86 #else
87 #define XCALL rcall
88 #define XJMP rjmp
89 #endif
90
91 #if defined (__AVR_HAVE_EIJMP_EICALL__)
92 #define XICALL eicall
93 #define XIJMP eijmp
94 #else
95 #define XICALL icall
96 #define XIJMP ijmp
97 #endif
98
99 ;; Prologue stuff
100
101 .macro do_prologue_saves n_pushed n_frame=0
102 ldi r26, lo8(\n_frame)
103 ldi r27, hi8(\n_frame)
104 ldi r30, lo8(gs(.L_prologue_saves.\@))
105 ldi r31, hi8(gs(.L_prologue_saves.\@))
106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 .L_prologue_saves.\@:
108 .endm
109
110 ;; Epilogue stuff
111
112 .macro do_epilogue_restores n_pushed n_frame=0
113 in r28, __SP_L__
114 #ifdef __AVR_HAVE_SPH__
115 in r29, __SP_H__
116 .if \n_frame > 63
117 subi r28, lo8(-\n_frame)
118 sbci r29, hi8(-\n_frame)
119 .elseif \n_frame > 0
120 adiw r28, \n_frame
121 .endif
122 #else
123 clr r29
124 .if \n_frame > 0
125 subi r28, lo8(-\n_frame)
126 .endif
127 #endif /* HAVE SPH */
128 ldi r30, \n_pushed
129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130 .endm
131
132 ;; Support function entry and exit for convenience
133
134 .macro wsubi r_arg1, i_arg2
135 #if defined (__AVR_TINY__)
136 subi \r_arg1, lo8(\i_arg2)
137 sbci \r_arg1+1, hi8(\i_arg2)
138 #else
139 sbiw \r_arg1, \i_arg2
140 #endif
141 .endm
142
143 .macro waddi r_arg1, i_arg2
144 #if defined (__AVR_TINY__)
145 subi \r_arg1, lo8(-\i_arg2)
146 sbci \r_arg1+1, hi8(-\i_arg2)
147 #else
148 adiw \r_arg1, \i_arg2
149 #endif
150 .endm
151
152 .macro DEFUN name
153 .global \name
154 .func \name
155 \name:
156 .endm
157
158 .macro ENDF name
159 .size \name, .-\name
160 .endfunc
161 .endm
162
163 .macro FALIAS name
164 .global \name
165 .func \name
166 \name:
167 .size \name, .-\name
168 .endfunc
169 .endm
170
171 ;; Skip next instruction, typically a jump target
172 #if defined(__AVR_TINY__)
173 #define skip cpse 0,0
174 #else
175 #define skip cpse 16,16
176 #endif
177
178 ;; Negate a 2-byte value held in consecutive registers
179 .macro NEG2 reg
180 com \reg+1
181 neg \reg
182 sbci \reg+1, -1
183 .endm
184
185 ;; Negate a 4-byte value held in consecutive registers
186 ;; Sets the V flag for signed overflow tests if REG >= 16
187 .macro NEG4 reg
188 com \reg+3
189 com \reg+2
190 com \reg+1
191 .if \reg >= 16
192 neg \reg
193 sbci \reg+1, -1
194 sbci \reg+2, -1
195 sbci \reg+3, -1
196 .else
197 com \reg
198 adc \reg, __zero_reg__
199 adc \reg+1, __zero_reg__
200 adc \reg+2, __zero_reg__
201 adc \reg+3, __zero_reg__
202 .endif
203 .endm
204
205 #define exp_lo(N) hlo8 ((N) << 23)
206 #define exp_hi(N) hhi8 ((N) << 23)
207
208
209 .section .text.libgcc.mul, "ax", @progbits
211
212 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
213 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
214 #if !defined (__AVR_HAVE_MUL__)
215 /*******************************************************
216 Multiplication 8 x 8 without MUL
217 *******************************************************/
218 #if defined (L_mulqi3)
219
220 #define r_arg2 r22 /* multiplicand */
221 #define r_arg1 r24 /* multiplier */
222 #define r_res __tmp_reg__ /* result */
223
224 DEFUN __mulqi3
225 clr r_res ; clear result
226 __mulqi3_loop:
227 sbrc r_arg1,0
228 add r_res,r_arg2
229 add r_arg2,r_arg2 ; shift multiplicand
230 breq __mulqi3_exit ; while multiplicand != 0
231 lsr r_arg1 ;
232 brne __mulqi3_loop ; exit if multiplier = 0
233 __mulqi3_exit:
234 mov r_arg1,r_res ; result to return register
235 ret
236 ENDF __mulqi3
237
238 #undef r_arg2
239 #undef r_arg1
240 #undef r_res
241
242 #endif /* defined (L_mulqi3) */
243
244
245 /*******************************************************
246 Widening Multiplication 16 = 8 x 8 without MUL
247 Multiplication 16 x 16 without MUL
248 *******************************************************/
249
250 #define A0 22
251 #define A1 23
252 #define B0 24
253 #define BB0 20
254 #define B1 25
255 ;; Output overlaps input, thus expand result in CC0/1
256 #define C0 24
257 #define C1 25
258 #define CC0 __tmp_reg__
259 #define CC1 21
260
261 #if defined (L_umulqihi3)
262 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
263 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
264 ;;; Clobbers: __tmp_reg__, R21..R23
265 DEFUN __umulqihi3
266 clr A1
267 clr B1
268 XJMP __mulhi3
269 ENDF __umulqihi3
270 #endif /* L_umulqihi3 */
271
272 #if defined (L_mulqihi3)
273 ;;; R25:R24 = (signed int) R22 * (signed int) R24
274 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
275 ;;; Clobbers: __tmp_reg__, R20..R23
276 DEFUN __mulqihi3
277 ;; Sign-extend B0
278 clr B1
279 sbrc B0, 7
280 com B1
281 ;; The multiplication runs twice as fast if A1 is zero, thus:
282 ;; Zero-extend A0
283 clr A1
284 #ifdef __AVR_HAVE_JMP_CALL__
285 ;; Store B0 * sign of A
286 clr BB0
287 sbrc A0, 7
288 mov BB0, B0
289 call __mulhi3
290 #else /* have no CALL */
291 ;; Skip sign-extension of A if A >= 0
292 ;; Same size as with the first alternative but avoids errata skip
293 ;; and is faster if A >= 0
294 sbrs A0, 7
295 rjmp __mulhi3
296 ;; If A < 0 store B
297 mov BB0, B0
298 rcall __mulhi3
299 #endif /* HAVE_JMP_CALL */
300 ;; 1-extend A after the multiplication
301 sub C1, BB0
302 ret
303 ENDF __mulqihi3
304 #endif /* L_mulqihi3 */
305
306 #if defined (L_mulhi3)
307 ;;; R25:R24 = R23:R22 * R25:R24
308 ;;; (C1:C0) = (A1:A0) * (B1:B0)
309 ;;; Clobbers: __tmp_reg__, R21..R23
310 DEFUN __mulhi3
311
312 ;; Clear result
313 clr CC0
314 clr CC1
315 rjmp 3f
316 1:
317 ;; Bit n of A is 1 --> C += B << n
318 add CC0, B0
319 adc CC1, B1
320 2:
321 lsl B0
322 rol B1
323 3:
324 ;; If B == 0 we are ready
325 wsubi B0, 0
326 breq 9f
327
328 ;; Carry = n-th bit of A
329 lsr A1
330 ror A0
331 ;; If bit n of A is set, then go add B * 2^n to C
332 brcs 1b
333
334 ;; Carry = 0 --> The ROR above acts like CP A0, 0
335 ;; Thus, it is sufficient to CPC the high part to test A against 0
336 cpc A1, __zero_reg__
337 ;; Only proceed if A != 0
338 brne 2b
339 9:
340 ;; Move Result into place
341 mov C0, CC0
342 mov C1, CC1
343 ret
344 ENDF __mulhi3
345 #endif /* L_mulhi3 */
346
347 #undef A0
348 #undef A1
349 #undef B0
350 #undef BB0
351 #undef B1
352 #undef C0
353 #undef C1
354 #undef CC0
355 #undef CC1
356
357
358 #define A0 22
360 #define A1 A0+1
361 #define A2 A0+2
362 #define A3 A0+3
363
364 #define B0 18
365 #define B1 B0+1
366 #define B2 B0+2
367 #define B3 B0+3
368
369 #define CC0 26
370 #define CC1 CC0+1
371 #define CC2 30
372 #define CC3 CC2+1
373
374 #define C0 22
375 #define C1 C0+1
376 #define C2 C0+2
377 #define C3 C0+3
378
379 /*******************************************************
380 Widening Multiplication 32 = 16 x 16 without MUL
381 *******************************************************/
382
383 #if defined (L_umulhisi3)
384 DEFUN __umulhisi3
385 wmov B0, 24
386 ;; Zero-extend B
387 clr B2
388 clr B3
389 ;; Zero-extend A
390 wmov A2, B2
391 XJMP __mulsi3
392 ENDF __umulhisi3
393 #endif /* L_umulhisi3 */
394
395 #if defined (L_mulhisi3)
396 DEFUN __mulhisi3
397 wmov B0, 24
398 ;; Sign-extend B
399 lsl r25
400 sbc B2, B2
401 mov B3, B2
402 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
403 ;; Sign-extend A
404 clr A2
405 sbrc A1, 7
406 com A2
407 mov A3, A2
408 XJMP __mulsi3
409 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
410 ;; Zero-extend A and __mulsi3 will run at least twice as fast
411 ;; compared to a sign-extended A.
412 clr A2
413 clr A3
414 sbrs A1, 7
415 XJMP __mulsi3
416 ;; If A < 0 then perform the B * 0xffff.... before the
417 ;; very multiplication by initializing the high part of the
418 ;; result CC with -B.
419 wmov CC2, A2
420 sub CC2, B0
421 sbc CC3, B1
422 XJMP __mulsi3_helper
423 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
424 ENDF __mulhisi3
425 #endif /* L_mulhisi3 */
426
427
428 /*******************************************************
429 Multiplication 32 x 32 without MUL
430 *******************************************************/
431
432 #if defined (L_mulsi3)
433 DEFUN __mulsi3
434 #if defined (__AVR_TINY__)
435 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
436 in r27, __SP_H__
437 subi r26, lo8(-3) ; Add 3 to point past return address
438 sbci r27, hi8(-3)
439 push B0 ; save callee saved regs
440 push B1
441 ld B0, X+ ; load from caller stack
442 ld B1, X+
443 ld B2, X+
444 ld B3, X
445 #endif
446 ;; Clear result
447 clr CC2
448 clr CC3
449 ;; FALLTHRU
450 ENDF __mulsi3
451
452 DEFUN __mulsi3_helper
453 clr CC0
454 clr CC1
455 rjmp 3f
456
457 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
458 ;; CC += B
459 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
460
461 2: ;; B <<= 1
462 lsl B0 $ rol B1 $ rol B2 $ rol B3
463
464 3: ;; A >>= 1: Carry = n-th bit of A
465 lsr A3 $ ror A2 $ ror A1 $ ror A0
466
467 brcs 1b
468 ;; Only continue if A != 0
469 sbci A1, 0
470 brne 2b
471 wsubi A2, 0
472 brne 2b
473
474 ;; All bits of A are consumed: Copy result to return register C
475 wmov C0, CC0
476 wmov C2, CC2
477 #if defined (__AVR_TINY__)
478 pop B1 ; restore callee saved regs
479 pop B0
480 #endif /* defined (__AVR_TINY__) */
481
482 ret
483 ENDF __mulsi3_helper
484 #endif /* L_mulsi3 */
485
486 #undef A0
487 #undef A1
488 #undef A2
489 #undef A3
490 #undef B0
491 #undef B1
492 #undef B2
493 #undef B3
494 #undef C0
495 #undef C1
496 #undef C2
497 #undef C3
498 #undef CC0
499 #undef CC1
500 #undef CC2
501 #undef CC3
502
503 #endif /* !defined (__AVR_HAVE_MUL__) */
504 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505
506 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
508 #if defined (__AVR_HAVE_MUL__)
509 #define A0 26
510 #define B0 18
511 #define C0 22
512
513 #define A1 A0+1
514
515 #define B1 B0+1
516 #define B2 B0+2
517 #define B3 B0+3
518
519 #define C1 C0+1
520 #define C2 C0+2
521 #define C3 C0+3
522
523 /*******************************************************
524 Widening Multiplication 32 = 16 x 16 with MUL
525 *******************************************************/
526
527 #if defined (L_mulhisi3)
528 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
529 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
530 ;;; Clobbers: __tmp_reg__
531 DEFUN __mulhisi3
532 XCALL __umulhisi3
533 ;; Sign-extend B
534 tst B1
535 brpl 1f
536 sub C2, A0
537 sbc C3, A1
538 1: ;; Sign-extend A
539 XJMP __usmulhisi3_tail
540 ENDF __mulhisi3
541 #endif /* L_mulhisi3 */
542
543 #if defined (L_usmulhisi3)
544 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
545 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
546 ;;; Clobbers: __tmp_reg__
547 DEFUN __usmulhisi3
548 XCALL __umulhisi3
549 ;; FALLTHRU
550 ENDF __usmulhisi3
551
552 DEFUN __usmulhisi3_tail
553 ;; Sign-extend A
554 sbrs A1, 7
555 ret
556 sub C2, B0
557 sbc C3, B1
558 ret
559 ENDF __usmulhisi3_tail
560 #endif /* L_usmulhisi3 */
561
562 #if defined (L_umulhisi3)
563 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
564 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
565 ;;; Clobbers: __tmp_reg__
566 DEFUN __umulhisi3
567 mul A0, B0
568 movw C0, r0
569 mul A1, B1
570 movw C2, r0
571 mul A0, B1
572 #ifdef __AVR_HAVE_JMP_CALL__
573 ;; This function is used by many other routines, often multiple times.
574 ;; Therefore, if the flash size is not too limited, avoid the RCALL
575 ;; and inverst 6 Bytes to speed things up.
576 add C1, r0
577 adc C2, r1
578 clr __zero_reg__
579 adc C3, __zero_reg__
580 #else
581 rcall 1f
582 #endif
583 mul A1, B0
584 1: add C1, r0
585 adc C2, r1
586 clr __zero_reg__
587 adc C3, __zero_reg__
588 ret
589 ENDF __umulhisi3
590 #endif /* L_umulhisi3 */
591
592 /*******************************************************
593 Widening Multiplication 32 = 16 x 32 with MUL
594 *******************************************************/
595
596 #if defined (L_mulshisi3)
597 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
598 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
599 ;;; Clobbers: __tmp_reg__
600 DEFUN __mulshisi3
601 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
602 ;; Some cores have problem skipping 2-word instruction
603 tst A1
604 brmi __mulohisi3
605 #else
606 sbrs A1, 7
607 #endif /* __AVR_HAVE_JMP_CALL__ */
608 XJMP __muluhisi3
609 ;; FALLTHRU
610 ENDF __mulshisi3
611
612 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
613 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
614 ;;; Clobbers: __tmp_reg__
615 DEFUN __mulohisi3
616 XCALL __muluhisi3
617 ;; One-extend R27:R26 (A1:A0)
618 sub C2, B0
619 sbc C3, B1
620 ret
621 ENDF __mulohisi3
622 #endif /* L_mulshisi3 */
623
624 #if defined (L_muluhisi3)
625 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
626 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
627 ;;; Clobbers: __tmp_reg__
628 DEFUN __muluhisi3
629 XCALL __umulhisi3
630 mul A0, B3
631 add C3, r0
632 mul A1, B2
633 add C3, r0
634 mul A0, B2
635 add C2, r0
636 adc C3, r1
637 clr __zero_reg__
638 ret
639 ENDF __muluhisi3
640 #endif /* L_muluhisi3 */
641
642 /*******************************************************
643 Multiplication 32 x 32 with MUL
644 *******************************************************/
645
646 #if defined (L_mulsi3)
647 ;;; R25:R22 = R25:R22 * R21:R18
648 ;;; (C3:C0) = C3:C0 * B3:B0
649 ;;; Clobbers: R26, R27, __tmp_reg__
650 DEFUN __mulsi3
651 movw A0, C0
652 push C2
653 push C3
654 XCALL __muluhisi3
655 pop A1
656 pop A0
657 ;; A1:A0 now contains the high word of A
658 mul A0, B0
659 add C2, r0
660 adc C3, r1
661 mul A0, B1
662 add C3, r0
663 mul A1, B0
664 add C3, r0
665 clr __zero_reg__
666 ret
667 ENDF __mulsi3
668 #endif /* L_mulsi3 */
669
670 #undef A0
671 #undef A1
672
673 #undef B0
674 #undef B1
675 #undef B2
676 #undef B3
677
678 #undef C0
679 #undef C1
680 #undef C2
681 #undef C3
682
683 #endif /* __AVR_HAVE_MUL__ */
684
685 /*******************************************************
686 Multiplication 24 x 24 with MUL
687 *******************************************************/
688
689 #if defined (L_mulpsi3)
690
691 ;; A[0..2]: In: Multiplicand; Out: Product
692 #define A0 22
693 #define A1 A0+1
694 #define A2 A0+2
695
696 ;; B[0..2]: In: Multiplier
697 #define B0 18
698 #define B1 B0+1
699 #define B2 B0+2
700
701 #if defined (__AVR_HAVE_MUL__)
702
703 ;; C[0..2]: Expand Result
704 #define C0 22
705 #define C1 C0+1
706 #define C2 C0+2
707
708 ;; R24:R22 *= R20:R18
709 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
710
711 #define AA0 26
712 #define AA2 21
713
714 DEFUN __mulpsi3
715 wmov AA0, A0
716 mov AA2, A2
717 XCALL __umulhisi3
718 mul AA2, B0 $ add C2, r0
719 mul AA0, B2 $ add C2, r0
720 clr __zero_reg__
721 ret
722 ENDF __mulpsi3
723
724 #undef AA2
725 #undef AA0
726
727 #undef C2
728 #undef C1
729 #undef C0
730
731 #else /* !HAVE_MUL */
732 ;; C[0..2]: Expand Result
733 #if defined (__AVR_TINY__)
734 #define C0 16
735 #else
736 #define C0 0
737 #endif /* defined (__AVR_TINY__) */
738 #define C1 C0+1
739 #define C2 21
740
741 ;; R24:R22 *= R20:R18
742 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
743
744 DEFUN __mulpsi3
745 #if defined (__AVR_TINY__)
746 in r26,__SP_L__
747 in r27,__SP_H__
748 subi r26, lo8(-3) ; Add 3 to point past return address
749 sbci r27, hi8(-3)
750 push B0 ; save callee saved regs
751 push B1
752 ld B0,X+ ; load from caller stack
753 ld B1,X+
754 ld B2,X+
755 #endif /* defined (__AVR_TINY__) */
756
757 ;; C[] = 0
758 clr __tmp_reg__
759 clr C2
760
761 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
762 LSR B2 $ ror B1 $ ror B0
763
764 ;; If the N-th Bit of B[] was set...
765 brcc 1f
766
767 ;; ...then add A[] * 2^N to the Result C[]
768 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
769
770 1: ;; Multiply A[] by 2
771 LSL A0 $ rol A1 $ rol A2
772
773 ;; Loop until B[] is 0
774 subi B0,0 $ sbci B1,0 $ sbci B2,0
775 brne 0b
776
777 ;; Copy C[] to the return Register A[]
778 wmov A0, C0
779 mov A2, C2
780
781 clr __zero_reg__
782 #if defined (__AVR_TINY__)
783 pop B1
784 pop B0
785 #endif /* (__AVR_TINY__) */
786 ret
787 ENDF __mulpsi3
788
789 #undef C2
790 #undef C1
791 #undef C0
792
793 #endif /* HAVE_MUL */
794
795 #undef B2
796 #undef B1
797 #undef B0
798
799 #undef A2
800 #undef A1
801 #undef A0
802
803 #endif /* L_mulpsi3 */
804
805 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
806
807 ;; A[0..2]: In: Multiplicand
808 #define A0 22
809 #define A1 A0+1
810 #define A2 A0+2
811
812 ;; BB: In: Multiplier
813 #define BB 25
814
815 ;; C[0..2]: Result
816 #define C0 18
817 #define C1 C0+1
818 #define C2 C0+2
819
820 ;; C[] = A[] * sign_extend (BB)
821 DEFUN __mulsqipsi3
822 mul A0, BB
823 movw C0, r0
824 mul A2, BB
825 mov C2, r0
826 mul A1, BB
827 add C1, r0
828 adc C2, r1
829 clr __zero_reg__
830 sbrs BB, 7
831 ret
832 ;; One-extend BB
833 sub C1, A0
834 sbc C2, A1
835 ret
836 ENDF __mulsqipsi3
837
838 #undef C2
839 #undef C1
840 #undef C0
841
842 #undef BB
843
844 #undef A2
845 #undef A1
846 #undef A0
847
848 #endif /* L_mulsqipsi3 && HAVE_MUL */
849
850 /*******************************************************
851 Multiplication 64 x 64
852 *******************************************************/
853
854 ;; A[] = A[] * B[]
855
856 ;; A[0..7]: In: Multiplicand
857 ;; Out: Product
858 #define A0 18
859 #define A1 A0+1
860 #define A2 A0+2
861 #define A3 A0+3
862 #define A4 A0+4
863 #define A5 A0+5
864 #define A6 A0+6
865 #define A7 A0+7
866
867 ;; B[0..7]: In: Multiplier
868 #define B0 10
869 #define B1 B0+1
870 #define B2 B0+2
871 #define B3 B0+3
872 #define B4 B0+4
873 #define B5 B0+5
874 #define B6 B0+6
875 #define B7 B0+7
876
877 #ifndef __AVR_TINY__
878 #if defined (__AVR_HAVE_MUL__)
879 ;; Define C[] for convenience
880 ;; Notice that parts of C[] overlap A[] respective B[]
881 #define C0 16
882 #define C1 C0+1
883 #define C2 20
884 #define C3 C2+1
885 #define C4 28
886 #define C5 C4+1
887 #define C6 C4+2
888 #define C7 C4+3
889
890 #if defined (L_muldi3)
891
892 ;; A[] *= B[]
893 ;; R25:R18 *= R17:R10
894 ;; Ordinary ABI-Function
895
896 DEFUN __muldi3
897 push r29
898 push r28
899 push r17
900 push r16
901
902 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
903
904 ;; 3 * 0 + 0 * 3
905 mul A7,B0 $ $ mov C7,r0
906 mul A0,B7 $ $ add C7,r0
907 mul A6,B1 $ $ add C7,r0
908 mul A6,B0 $ mov C6,r0 $ add C7,r1
909 mul B6,A1 $ $ add C7,r0
910 mul B6,A0 $ add C6,r0 $ adc C7,r1
911
912 ;; 1 * 2
913 mul A2,B4 $ add C6,r0 $ adc C7,r1
914 mul A3,B4 $ $ add C7,r0
915 mul A2,B5 $ $ add C7,r0
916
917 push A5
918 push A4
919 push B1
920 push B0
921 push A3
922 push A2
923
924 ;; 0 * 0
925 wmov 26, B0
926 XCALL __umulhisi3
927 wmov C0, 22
928 wmov C2, 24
929
930 ;; 0 * 2
931 wmov 26, B4
932 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
933
934 wmov 26, B2
935 ;; 0 * 1
936 XCALL __muldi3_6
937
938 pop A0
939 pop A1
940 ;; 1 * 1
941 wmov 26, B2
942 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
943
944 pop r26
945 pop r27
946 ;; 1 * 0
947 XCALL __muldi3_6
948
949 pop A0
950 pop A1
951 ;; 2 * 0
952 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
953
954 ;; 2 * 1
955 wmov 26, B2
956 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
957
958 ;; A[] = C[]
959 wmov A0, C0
960 ;; A2 = C2 already
961 wmov A4, C4
962 wmov A6, C6
963
964 pop r16
965 pop r17
966 pop r28
967 pop r29
968 ret
969 ENDF __muldi3
970 #endif /* L_muldi3 */
971
972 #if defined (L_muldi3_6)
973 ;; A helper for some 64-bit multiplications with MUL available
974 DEFUN __muldi3_6
975 __muldi3_6:
976 XCALL __umulhisi3
977 add C2, 22
978 adc C3, 23
979 adc C4, 24
980 adc C5, 25
981 brcc 0f
982 adiw C6, 1
983 0: ret
984 ENDF __muldi3_6
985 #endif /* L_muldi3_6 */
986
987 #undef C7
988 #undef C6
989 #undef C5
990 #undef C4
991 #undef C3
992 #undef C2
993 #undef C1
994 #undef C0
995
996 #else /* !HAVE_MUL */
997
998 #if defined (L_muldi3)
999
1000 #define C0 26
1001 #define C1 C0+1
1002 #define C2 C0+2
1003 #define C3 C0+3
1004 #define C4 C0+4
1005 #define C5 C0+5
1006 #define C6 0
1007 #define C7 C6+1
1008
1009 #define Loop 9
1010
1011 ;; A[] *= B[]
1012 ;; R25:R18 *= R17:R10
1013 ;; Ordinary ABI-Function
1014
1015 DEFUN __muldi3
1016 push r29
1017 push r28
1018 push Loop
1019
1020 ldi C0, 64
1021 mov Loop, C0
1022
1023 ;; C[] = 0
1024 clr __tmp_reg__
1025 wmov C0, 0
1026 wmov C2, 0
1027 wmov C4, 0
1028
1029 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1030 ;; where N = 64 - Loop.
1031 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1032 ;; B[] will have its initial Value again.
1033 LSR B7 $ ror B6 $ ror B5 $ ror B4
1034 ror B3 $ ror B2 $ ror B1 $ ror B0
1035
1036 ;; If the N-th Bit of B[] was set then...
1037 brcc 1f
1038 ;; ...finish Rotation...
1039 ori B7, 1 << 7
1040
1041 ;; ...and add A[] * 2^N to the Result C[]
1042 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1043 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1044
1045 1: ;; Multiply A[] by 2
1046 LSL A0 $ rol A1 $ rol A2 $ rol A3
1047 rol A4 $ rol A5 $ rol A6 $ rol A7
1048
1049 dec Loop
1050 brne 0b
1051
1052 ;; We expanded the Result in C[]
1053 ;; Copy Result to the Return Register A[]
1054 wmov A0, C0
1055 wmov A2, C2
1056 wmov A4, C4
1057 wmov A6, C6
1058
1059 clr __zero_reg__
1060 pop Loop
1061 pop r28
1062 pop r29
1063 ret
1064 ENDF __muldi3
1065
1066 #undef Loop
1067
1068 #undef C7
1069 #undef C6
1070 #undef C5
1071 #undef C4
1072 #undef C3
1073 #undef C2
1074 #undef C1
1075 #undef C0
1076
1077 #endif /* L_muldi3 */
1078 #endif /* HAVE_MUL */
1079 #endif /* if not __AVR_TINY__ */
1080
1081 #undef B7
1082 #undef B6
1083 #undef B5
1084 #undef B4
1085 #undef B3
1086 #undef B2
1087 #undef B1
1088 #undef B0
1089
1090 #undef A7
1091 #undef A6
1092 #undef A5
1093 #undef A4
1094 #undef A3
1095 #undef A2
1096 #undef A1
1097 #undef A0
1098
1099 /*******************************************************
1100 Widening Multiplication 64 = 32 x 32 with MUL
1101 *******************************************************/
1102
1103 #if defined (__AVR_HAVE_MUL__)
1104 #define A0 r22
1105 #define A1 r23
1106 #define A2 r24
1107 #define A3 r25
1108
1109 #define B0 r18
1110 #define B1 r19
1111 #define B2 r20
1112 #define B3 r21
1113
1114 #define C0 18
1115 #define C1 C0+1
1116 #define C2 20
1117 #define C3 C2+1
1118 #define C4 28
1119 #define C5 C4+1
1120 #define C6 C4+2
1121 #define C7 C4+3
1122
1123 #if defined (L_umulsidi3)
1124
1125 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1126
1127 ;; R18[8] = R22[4] * R18[4]
1128 ;;
1129 ;; Ordinary ABI Function, but additionally sets
1130 ;; X = R20[2] = B2[2]
1131 ;; Z = R22[2] = A0[2]
1132 DEFUN __umulsidi3
1133 clt
1134 ;; FALLTHRU
1135 ENDF __umulsidi3
1136 ;; T = sign (A)
1137 DEFUN __umulsidi3_helper
1138 push 29 $ push 28 ; Y
1139 wmov 30, A2
1140 ;; Counting in Words, we have to perform 4 Multiplications
1141 ;; 0 * 0
1142 wmov 26, A0
1143 XCALL __umulhisi3
1144 push 23 $ push 22 ; C0
1145 wmov 28, B0
1146 wmov 18, B2
1147 wmov C2, 24
1148 push 27 $ push 26 ; A0
1149 push 19 $ push 18 ; B2
1150 ;;
1151 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1152 ;; B2 C2 -- -- -- B0 A2
1153 ;; 1 * 1
1154 wmov 26, 30 ; A2
1155 XCALL __umulhisi3
1156 ;; Sign-extend A. T holds the sign of A
1157 brtc 0f
1158 ;; Subtract B from the high part of the result
1159 sub 22, 28
1160 sbc 23, 29
1161 sbc 24, 18
1162 sbc 25, 19
1163 0: wmov 18, 28 ;; B0
1164 wmov C4, 22
1165 wmov C6, 24
1166 ;;
1167 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1168 ;; B0 C2 -- -- A2 C4 C6
1169 ;;
1170 ;; 1 * 0
1171 XCALL __muldi3_6
1172 ;; 0 * 1
1173 pop 26 $ pop 27 ;; B2
1174 pop 18 $ pop 19 ;; A0
1175 XCALL __muldi3_6
1176
1177 ;; Move result C into place and save A0 in Z
1178 wmov 22, C4
1179 wmov 24, C6
1180 wmov 30, 18 ; A0
1181 pop C0 $ pop C1
1182
1183 ;; Epilogue
1184 pop 28 $ pop 29 ;; Y
1185 ret
1186 ENDF __umulsidi3_helper
1187 #endif /* L_umulsidi3 */
1188
1189
1190 #if defined (L_mulsidi3)
1191
1192 ;; Signed widening 64 = 32 * 32 Multiplication
1193 ;;
1194 ;; R18[8] = R22[4] * R18[4]
1195 ;; Ordinary ABI Function
1196 DEFUN __mulsidi3
1197 bst A3, 7
1198 sbrs B3, 7 ; Enhanced core has no skip bug
1199 XJMP __umulsidi3_helper
1200
1201 ;; B needs sign-extension
1202 push A3
1203 push A2
1204 XCALL __umulsidi3_helper
1205 ;; A0 survived in Z
1206 sub r22, r30
1207 sbc r23, r31
1208 pop r26
1209 pop r27
1210 sbc r24, r26
1211 sbc r25, r27
1212 ret
1213 ENDF __mulsidi3
1214 #endif /* L_mulsidi3 */
1215
1216 #undef A0
1217 #undef A1
1218 #undef A2
1219 #undef A3
1220 #undef B0
1221 #undef B1
1222 #undef B2
1223 #undef B3
1224 #undef C0
1225 #undef C1
1226 #undef C2
1227 #undef C3
1228 #undef C4
1229 #undef C5
1230 #undef C6
1231 #undef C7
1232 #endif /* HAVE_MUL */
1233
1234 /**********************************************************
1235 Widening Multiplication 64 = 32 x 32 without MUL
1236 **********************************************************/
1237 #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1238 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1239 #define A0 18
1240 #define A1 A0+1
1241 #define A2 A0+2
1242 #define A3 A0+3
1243 #define A4 A0+4
1244 #define A5 A0+5
1245 #define A6 A0+6
1246 #define A7 A0+7
1247
1248 #define B0 10
1249 #define B1 B0+1
1250 #define B2 B0+2
1251 #define B3 B0+3
1252 #define B4 B0+4
1253 #define B5 B0+5
1254 #define B6 B0+6
1255 #define B7 B0+7
1256
1257 #define AA0 22
1258 #define AA1 AA0+1
1259 #define AA2 AA0+2
1260 #define AA3 AA0+3
1261
1262 #define BB0 18
1263 #define BB1 BB0+1
1264 #define BB2 BB0+2
1265 #define BB3 BB0+3
1266
1267 #define Mask r30
1268
1269 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1270 ;;
1271 ;; R18[8] = R22[4] * R18[4]
1272 ;; Ordinary ABI Function
1273 DEFUN __mulsidi3
1274 set
1275 skip
1276 ;; FALLTHRU
1277 ENDF __mulsidi3
1278
1279 DEFUN __umulsidi3
1280 clt ; skipped
1281 ;; Save 10 Registers: R10..R17, R28, R29
1282 do_prologue_saves 10
1283 ldi Mask, 0xff
1284 bld Mask, 7
1285 ;; Move B into place...
1286 wmov B0, BB0
1287 wmov B2, BB2
1288 ;; ...and extend it
1289 and BB3, Mask
1290 lsl BB3
1291 sbc B4, B4
1292 mov B5, B4
1293 wmov B6, B4
1294 ;; Move A into place...
1295 wmov A0, AA0
1296 wmov A2, AA2
1297 ;; ...and extend it
1298 and AA3, Mask
1299 lsl AA3
1300 sbc A4, A4
1301 mov A5, A4
1302 wmov A6, A4
1303 XCALL __muldi3
1304 do_epilogue_restores 10
1305 ENDF __umulsidi3
1306
1307 #undef A0
1308 #undef A1
1309 #undef A2
1310 #undef A3
1311 #undef A4
1312 #undef A5
1313 #undef A6
1314 #undef A7
1315 #undef B0
1316 #undef B1
1317 #undef B2
1318 #undef B3
1319 #undef B4
1320 #undef B5
1321 #undef B6
1322 #undef B7
1323 #undef AA0
1324 #undef AA1
1325 #undef AA2
1326 #undef AA3
1327 #undef BB0
1328 #undef BB1
1329 #undef BB2
1330 #undef BB3
1331 #undef Mask
1332 #endif /* L_mulsidi3 && !HAVE_MUL */
1333 #endif /* if not __AVR_TINY__ */
1334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1335
1336
1337 .section .text.libgcc.div, "ax", @progbits
1339
1340 /*******************************************************
1341 Division 8 / 8 => (result + remainder)
1342 *******************************************************/
1343 #define r_rem r25 /* remainder */
1344 #define r_arg1 r24 /* dividend, quotient */
1345 #define r_arg2 r22 /* divisor */
1346 #define r_cnt r23 /* loop count */
1347
1348 #if defined (L_udivmodqi4)
1349 DEFUN __udivmodqi4
1350 sub r_rem,r_rem ; clear remainder and carry
1351 ldi r_cnt,9 ; init loop counter
1352 rjmp __udivmodqi4_ep ; jump to entry point
1353 __udivmodqi4_loop:
1354 rol r_rem ; shift dividend into remainder
1355 cp r_rem,r_arg2 ; compare remainder & divisor
1356 brcs __udivmodqi4_ep ; remainder <= divisor
1357 sub r_rem,r_arg2 ; restore remainder
1358 __udivmodqi4_ep:
1359 rol r_arg1 ; shift dividend (with CARRY)
1360 dec r_cnt ; decrement loop counter
1361 brne __udivmodqi4_loop
1362 com r_arg1 ; complement result
1363 ; because C flag was complemented in loop
1364 ret
1365 ENDF __udivmodqi4
1366 #endif /* defined (L_udivmodqi4) */
1367
1368 #if defined (L_divmodqi4)
1369 DEFUN __divmodqi4
1370 bst r_arg1,7 ; store sign of dividend
1371 mov __tmp_reg__,r_arg1
1372 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1373 sbrc r_arg1,7
1374 neg r_arg1 ; dividend negative : negate
1375 sbrc r_arg2,7
1376 neg r_arg2 ; divisor negative : negate
1377 XCALL __udivmodqi4 ; do the unsigned div/mod
1378 brtc __divmodqi4_1
1379 neg r_rem ; correct remainder sign
1380 __divmodqi4_1:
1381 sbrc __tmp_reg__,7
1382 neg r_arg1 ; correct result sign
1383 __divmodqi4_exit:
1384 ret
1385 ENDF __divmodqi4
1386 #endif /* defined (L_divmodqi4) */
1387
1388 #undef r_rem
1389 #undef r_arg1
1390 #undef r_arg2
1391 #undef r_cnt
1392
1393
1394 /*******************************************************
1395 Division 16 / 16 => (result + remainder)
1396 *******************************************************/
1397 #define r_remL r26 /* remainder Low */
1398 #define r_remH r27 /* remainder High */
1399
1400 /* return: remainder */
1401 #define r_arg1L r24 /* dividend Low */
1402 #define r_arg1H r25 /* dividend High */
1403
1404 /* return: quotient */
1405 #define r_arg2L r22 /* divisor Low */
1406 #define r_arg2H r23 /* divisor High */
1407
1408 #define r_cnt r21 /* loop count */
1409
1410 #if defined (L_udivmodhi4)
1411 DEFUN __udivmodhi4
1412 sub r_remL,r_remL
1413 sub r_remH,r_remH ; clear remainder and carry
1414 ldi r_cnt,17 ; init loop counter
1415 rjmp __udivmodhi4_ep ; jump to entry point
1416 __udivmodhi4_loop:
1417 rol r_remL ; shift dividend into remainder
1418 rol r_remH
1419 cp r_remL,r_arg2L ; compare remainder & divisor
1420 cpc r_remH,r_arg2H
1421 brcs __udivmodhi4_ep ; remainder < divisor
1422 sub r_remL,r_arg2L ; restore remainder
1423 sbc r_remH,r_arg2H
1424 __udivmodhi4_ep:
1425 rol r_arg1L ; shift dividend (with CARRY)
1426 rol r_arg1H
1427 dec r_cnt ; decrement loop counter
1428 brne __udivmodhi4_loop
1429 com r_arg1L
1430 com r_arg1H
1431 ; div/mod results to return registers, as for the div() function
1432 mov_l r_arg2L, r_arg1L ; quotient
1433 mov_h r_arg2H, r_arg1H
1434 mov_l r_arg1L, r_remL ; remainder
1435 mov_h r_arg1H, r_remH
1436 ret
1437 ENDF __udivmodhi4
1438 #endif /* defined (L_udivmodhi4) */
1439
1440 #if defined (L_divmodhi4)
1441 DEFUN __divmodhi4
1442 .global _div
1443 _div:
1444 bst r_arg1H,7 ; store sign of dividend
1445 mov __tmp_reg__,r_arg2H
1446 brtc 0f
1447 com __tmp_reg__ ; r0.7 is sign of result
1448 rcall __divmodhi4_neg1 ; dividend negative: negate
1449 0:
1450 sbrc r_arg2H,7
1451 rcall __divmodhi4_neg2 ; divisor negative: negate
1452 XCALL __udivmodhi4 ; do the unsigned div/mod
1453 sbrc __tmp_reg__,7
1454 rcall __divmodhi4_neg2 ; correct remainder sign
1455 brtc __divmodhi4_exit
1456 __divmodhi4_neg1:
1457 ;; correct dividend/remainder sign
1458 com r_arg1H
1459 neg r_arg1L
1460 sbci r_arg1H,0xff
1461 ret
1462 __divmodhi4_neg2:
1463 ;; correct divisor/result sign
1464 com r_arg2H
1465 neg r_arg2L
1466 sbci r_arg2H,0xff
1467 __divmodhi4_exit:
1468 ret
1469 ENDF __divmodhi4
1470 #endif /* defined (L_divmodhi4) */
1471
1472 #undef r_remH
1473 #undef r_remL
1474
1475 #undef r_arg1H
1476 #undef r_arg1L
1477
1478 #undef r_arg2H
1479 #undef r_arg2L
1480
1481 #undef r_cnt
1482
1483 /*******************************************************
1484 Division 24 / 24 => (result + remainder)
1485 *******************************************************/
1486
1487 ;; A[0..2]: In: Dividend; Out: Quotient
1488 #define A0 22
1489 #define A1 A0+1
1490 #define A2 A0+2
1491
1492 ;; B[0..2]: In: Divisor; Out: Remainder
1493 #define B0 18
1494 #define B1 B0+1
1495 #define B2 B0+2
1496
1497 ;; C[0..2]: Expand remainder
1498 #define C0 __zero_reg__
1499 #define C1 26
1500 #define C2 25
1501
1502 ;; Loop counter
1503 #define r_cnt 21
1504
1505 #if defined (L_udivmodpsi4)
1506 ;; R24:R22 = R24:R24 udiv R20:R18
1507 ;; R20:R18 = R24:R22 umod R20:R18
1508 ;; Clobbers: R21, R25, R26
1509
1510 DEFUN __udivmodpsi4
1511 ; init loop counter
1512 ldi r_cnt, 24+1
1513 ; Clear remainder and carry. C0 is already 0
1514 clr C1
1515 sub C2, C2
1516 ; jump to entry point
1517 rjmp __udivmodpsi4_start
1518 __udivmodpsi4_loop:
1519 ; shift dividend into remainder
1520 rol C0
1521 rol C1
1522 rol C2
1523 ; compare remainder & divisor
1524 cp C0, B0
1525 cpc C1, B1
1526 cpc C2, B2
1527 brcs __udivmodpsi4_start ; remainder <= divisor
1528 sub C0, B0 ; restore remainder
1529 sbc C1, B1
1530 sbc C2, B2
1531 __udivmodpsi4_start:
1532 ; shift dividend (with CARRY)
1533 rol A0
1534 rol A1
1535 rol A2
1536 ; decrement loop counter
1537 dec r_cnt
1538 brne __udivmodpsi4_loop
1539 com A0
1540 com A1
1541 com A2
1542 ; div/mod results to return registers
1543 ; remainder
1544 mov B0, C0
1545 mov B1, C1
1546 mov B2, C2
1547 clr __zero_reg__ ; C0
1548 ret
1549 ENDF __udivmodpsi4
1550 #endif /* defined (L_udivmodpsi4) */
1551
1552 #if defined (L_divmodpsi4)
1553 ;; R24:R22 = R24:R22 div R20:R18
1554 ;; R20:R18 = R24:R22 mod R20:R18
1555 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1556
1557 DEFUN __divmodpsi4
1558 ; R0.7 will contain the sign of the result:
1559 ; R0.7 = A.sign ^ B.sign
1560 mov __tmp_reg__, B2
1561 ; T-flag = sign of dividend
1562 bst A2, 7
1563 brtc 0f
1564 com __tmp_reg__
1565 ; Adjust dividend's sign
1566 rcall __divmodpsi4_negA
1567 0:
1568 ; Adjust divisor's sign
1569 sbrc B2, 7
1570 rcall __divmodpsi4_negB
1571
1572 ; Do the unsigned div/mod
1573 XCALL __udivmodpsi4
1574
1575 ; Adjust quotient's sign
1576 sbrc __tmp_reg__, 7
1577 rcall __divmodpsi4_negA
1578
1579 ; Adjust remainder's sign
1580 brtc __divmodpsi4_end
1581
1582 __divmodpsi4_negB:
1583 ; Correct divisor/remainder sign
1584 com B2
1585 com B1
1586 neg B0
1587 sbci B1, -1
1588 sbci B2, -1
1589 ret
1590
1591 ; Correct dividend/quotient sign
1592 __divmodpsi4_negA:
1593 com A2
1594 com A1
1595 neg A0
1596 sbci A1, -1
1597 sbci A2, -1
1598 __divmodpsi4_end:
1599 ret
1600
1601 ENDF __divmodpsi4
1602 #endif /* defined (L_divmodpsi4) */
1603
1604 #undef A0
1605 #undef A1
1606 #undef A2
1607
1608 #undef B0
1609 #undef B1
1610 #undef B2
1611
1612 #undef C0
1613 #undef C1
1614 #undef C2
1615
1616 #undef r_cnt
1617
1618 /*******************************************************
1619 Division 32 / 32 => (result + remainder)
1620 *******************************************************/
1621 #define r_remHH r31 /* remainder High */
1622 #define r_remHL r30
1623 #define r_remH r27
1624 #define r_remL r26 /* remainder Low */
1625
1626 /* return: remainder */
1627 #define r_arg1HH r25 /* dividend High */
1628 #define r_arg1HL r24
1629 #define r_arg1H r23
1630 #define r_arg1L r22 /* dividend Low */
1631
1632 /* return: quotient */
1633 #define r_arg2HH r21 /* divisor High */
1634 #define r_arg2HL r20
1635 #define r_arg2H r19
1636 #define r_arg2L r18 /* divisor Low */
1637
1638 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1639
1640 #if defined (L_udivmodsi4)
1641 DEFUN __udivmodsi4
1642 ldi r_remL, 33 ; init loop counter
1643 mov r_cnt, r_remL
1644 sub r_remL,r_remL
1645 sub r_remH,r_remH ; clear remainder and carry
1646 mov_l r_remHL, r_remL
1647 mov_h r_remHH, r_remH
1648 rjmp __udivmodsi4_ep ; jump to entry point
1649 __udivmodsi4_loop:
1650 rol r_remL ; shift dividend into remainder
1651 rol r_remH
1652 rol r_remHL
1653 rol r_remHH
1654 cp r_remL,r_arg2L ; compare remainder & divisor
1655 cpc r_remH,r_arg2H
1656 cpc r_remHL,r_arg2HL
1657 cpc r_remHH,r_arg2HH
1658 brcs __udivmodsi4_ep ; remainder <= divisor
1659 sub r_remL,r_arg2L ; restore remainder
1660 sbc r_remH,r_arg2H
1661 sbc r_remHL,r_arg2HL
1662 sbc r_remHH,r_arg2HH
1663 __udivmodsi4_ep:
1664 rol r_arg1L ; shift dividend (with CARRY)
1665 rol r_arg1H
1666 rol r_arg1HL
1667 rol r_arg1HH
1668 dec r_cnt ; decrement loop counter
1669 brne __udivmodsi4_loop
1670 ; __zero_reg__ now restored (r_cnt == 0)
1671 com r_arg1L
1672 com r_arg1H
1673 com r_arg1HL
1674 com r_arg1HH
1675 ; div/mod results to return registers, as for the ldiv() function
1676 mov_l r_arg2L, r_arg1L ; quotient
1677 mov_h r_arg2H, r_arg1H
1678 mov_l r_arg2HL, r_arg1HL
1679 mov_h r_arg2HH, r_arg1HH
1680 mov_l r_arg1L, r_remL ; remainder
1681 mov_h r_arg1H, r_remH
1682 mov_l r_arg1HL, r_remHL
1683 mov_h r_arg1HH, r_remHH
1684 ret
1685 ENDF __udivmodsi4
1686 #endif /* defined (L_udivmodsi4) */
1687
1688 #if defined (L_divmodsi4)
1689 DEFUN __divmodsi4
1690 mov __tmp_reg__,r_arg2HH
1691 bst r_arg1HH,7 ; store sign of dividend
1692 brtc 0f
1693 com __tmp_reg__ ; r0.7 is sign of result
1694 XCALL __negsi2 ; dividend negative: negate
1695 0:
1696 sbrc r_arg2HH,7
1697 rcall __divmodsi4_neg2 ; divisor negative: negate
1698 XCALL __udivmodsi4 ; do the unsigned div/mod
1699 sbrc __tmp_reg__, 7 ; correct quotient sign
1700 rcall __divmodsi4_neg2
1701 brtc __divmodsi4_exit ; correct remainder sign
1702 XJMP __negsi2
1703 __divmodsi4_neg2:
1704 ;; correct divisor/quotient sign
1705 com r_arg2HH
1706 com r_arg2HL
1707 com r_arg2H
1708 neg r_arg2L
1709 sbci r_arg2H,0xff
1710 sbci r_arg2HL,0xff
1711 sbci r_arg2HH,0xff
1712 __divmodsi4_exit:
1713 ret
1714 ENDF __divmodsi4
1715 #endif /* defined (L_divmodsi4) */
1716
1717 #if defined (L_negsi2)
1718 ;; (set (reg:SI 22)
1719 ;; (neg:SI (reg:SI 22)))
1720 ;; Sets the V flag for signed overflow tests
1721 DEFUN __negsi2
1722 NEG4 22
1723 ret
1724 ENDF __negsi2
1725 #endif /* L_negsi2 */
1726
1727 #undef r_remHH
1728 #undef r_remHL
1729 #undef r_remH
1730 #undef r_remL
1731 #undef r_arg1HH
1732 #undef r_arg1HL
1733 #undef r_arg1H
1734 #undef r_arg1L
1735 #undef r_arg2HH
1736 #undef r_arg2HL
1737 #undef r_arg2H
1738 #undef r_arg2L
1739 #undef r_cnt
1740
1741 /* *di routines use registers below R19 and won't work with tiny arch
1742 right now. */
1743
1744 #if !defined (__AVR_TINY__)
1745 /*******************************************************
1746 Division 64 / 64
1747 Modulo 64 % 64
1748 *******************************************************/
1749
1750 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1751 ;; at least 16k of Program Memory. For smaller Devices, depend
1752 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1753 ;; Flash Size so that SP Size can be used to test for Flash Size.
1754
1755 #if defined (__AVR_HAVE_JMP_CALL__)
1756 # define SPEED_DIV 8
1757 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1758 # define SPEED_DIV 16
1759 #else
1760 # define SPEED_DIV 0
1761 #endif
1762
1763 ;; A[0..7]: In: Dividend;
1764 ;; Out: Quotient (T = 0)
1765 ;; Out: Remainder (T = 1)
1766 #define A0 18
1767 #define A1 A0+1
1768 #define A2 A0+2
1769 #define A3 A0+3
1770 #define A4 A0+4
1771 #define A5 A0+5
1772 #define A6 A0+6
1773 #define A7 A0+7
1774
1775 ;; B[0..7]: In: Divisor; Out: Clobber
1776 #define B0 10
1777 #define B1 B0+1
1778 #define B2 B0+2
1779 #define B3 B0+3
1780 #define B4 B0+4
1781 #define B5 B0+5
1782 #define B6 B0+6
1783 #define B7 B0+7
1784
1785 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1786 #define C0 8
1787 #define C1 C0+1
1788 #define C2 30
1789 #define C3 C2+1
1790 #define C4 28
1791 #define C5 C4+1
1792 #define C6 26
1793 #define C7 C6+1
1794
1795 ;; Holds Signs during Division Routine
1796 #define SS __tmp_reg__
1797
1798 ;; Bit-Counter in Division Routine
1799 #define R_cnt __zero_reg__
1800
1801 ;; Scratch Register for Negation
1802 #define NN r31
1803
1804 #if defined (L_udivdi3)
1805
1806 ;; R25:R18 = R24:R18 umod R17:R10
1807 ;; Ordinary ABI-Function
1808
1809 DEFUN __umoddi3
1810 set
1811 rjmp __udivdi3_umoddi3
1812 ENDF __umoddi3
1813
1814 ;; R25:R18 = R24:R18 udiv R17:R10
1815 ;; Ordinary ABI-Function
1816
1817 DEFUN __udivdi3
1818 clt
1819 ENDF __udivdi3
1820
1821 DEFUN __udivdi3_umoddi3
1822 push C0
1823 push C1
1824 push C4
1825 push C5
1826 XCALL __udivmod64
1827 pop C5
1828 pop C4
1829 pop C1
1830 pop C0
1831 ret
1832 ENDF __udivdi3_umoddi3
1833 #endif /* L_udivdi3 */
1834
1835 #if defined (L_udivmod64)
1836
1837 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1838 ;; No Registers saved/restored; the Callers will take Care.
1839 ;; Preserves B[] and T-flag
1840 ;; T = 0: Compute Quotient in A[]
1841 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1842
1843 DEFUN __udivmod64
1844
1845 ;; Clear Remainder (C6, C7 will follow)
1846 clr C0
1847 clr C1
1848 wmov C2, C0
1849 wmov C4, C0
1850 ldi C7, 64
1851
1852 #if SPEED_DIV == 0 || SPEED_DIV == 16
1853 ;; Initialize Loop-Counter
1854 mov R_cnt, C7
1855 wmov C6, C0
1856 #endif /* SPEED_DIV */
1857
1858 #if SPEED_DIV == 8
1859
1860 push A7
1861 clr C6
1862
1863 1: ;; Compare shifted Devidend against Divisor
1864 ;; If -- even after Shifting -- it is smaller...
1865 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1866 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1867 brcc 2f
1868
1869 ;; ...then we can subtract it. Thus, it is legal to shift left
1870 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1871 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1872 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1873 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1874
1875 ;; 8 Bits are done
1876 subi C7, 8
1877 brne 1b
1878
1879 ;; Shifted 64 Bits: A7 has traveled to C7
1880 pop C7
1881 ;; Divisor is greater than Dividend. We have:
1882 ;; A[] % B[] = A[]
1883 ;; A[] / B[] = 0
1884 ;; Thus, we can return immediately
1885 rjmp 5f
1886
1887 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1888 mov R_cnt, C7
1889
1890 ;; Push of A7 is not needed because C7 is still 0
1891 pop C7
1892 clr C7
1893
1894 #elif SPEED_DIV == 16
1895
1896 ;; Compare shifted Dividend against Divisor
1897 cp A7, B3
1898 cpc C0, B4
1899 cpc C1, B5
1900 cpc C2, B6
1901 cpc C3, B7
1902 brcc 2f
1903
1904 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1905 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1906 wmov C2,A6 $ wmov C0,A4
1907 wmov A6,A2 $ wmov A4,A0
1908 wmov A2,C6 $ wmov A0,C4
1909
1910 ;; Set Bit Counter to 32
1911 lsr R_cnt
1912 2:
1913 #elif SPEED_DIV
1914 #error SPEED_DIV = ?
1915 #endif /* SPEED_DIV */
1916
1917 ;; The very Division + Remainder Routine
1918
1919 3: ;; Left-shift Dividend...
1920 lsl A0 $ rol A1 $ rol A2 $ rol A3
1921 rol A4 $ rol A5 $ rol A6 $ rol A7
1922
1923 ;; ...into Remainder
1924 rol C0 $ rol C1 $ rol C2 $ rol C3
1925 rol C4 $ rol C5 $ rol C6 $ rol C7
1926
1927 ;; Compare Remainder and Divisor
1928 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1929 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1930
1931 brcs 4f
1932
1933 ;; Divisor fits into Remainder: Subtract it from Remainder...
1934 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1935 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1936
1937 ;; ...and set according Bit in the upcoming Quotient
1938 ;; The Bit will travel to its final Position
1939 ori A0, 1
1940
1941 4: ;; This Bit is done
1942 dec R_cnt
1943 brne 3b
1944 ;; __zero_reg__ is 0 again
1945
1946 ;; T = 0: We are fine with the Quotient in A[]
1947 ;; T = 1: Copy Remainder to A[]
1948 5: brtc 6f
1949 wmov A0, C0
1950 wmov A2, C2
1951 wmov A4, C4
1952 wmov A6, C6
1953 ;; Move the Sign of the Result to SS.7
1954 lsl SS
1955
1956 6: ret
1957
1958 ENDF __udivmod64
1959 #endif /* L_udivmod64 */
1960
1961
1962 #if defined (L_divdi3)
1963
1964 ;; R25:R18 = R24:R18 mod R17:R10
1965 ;; Ordinary ABI-Function
1966
1967 DEFUN __moddi3
1968 set
1969 rjmp __divdi3_moddi3
1970 ENDF __moddi3
1971
1972 ;; R25:R18 = R24:R18 div R17:R10
1973 ;; Ordinary ABI-Function
1974
1975 DEFUN __divdi3
1976 clt
1977 ENDF __divdi3
1978
1979 DEFUN __divdi3_moddi3
1980 #if SPEED_DIV
1981 mov r31, A7
1982 or r31, B7
1983 brmi 0f
1984 ;; Both Signs are 0: the following Complexitiy is not needed
1985 XJMP __udivdi3_umoddi3
1986 #endif /* SPEED_DIV */
1987
1988 0: ;; The Prologue
1989 ;; Save 12 Registers: Y, 17...8
1990 ;; No Frame needed
1991 do_prologue_saves 12
1992
1993 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1994 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1995 mov SS, A7
1996 asr SS
1997 ;; Adjust Dividend's Sign as needed
1998 #if SPEED_DIV
1999 ;; Compiling for Speed we know that at least one Sign must be < 0
2000 ;; Thus, if A[] >= 0 then we know B[] < 0
2001 brpl 22f
2002 #else
2003 brpl 21f
2004 #endif /* SPEED_DIV */
2005
2006 XCALL __negdi2
2007
2008 ;; Adjust Divisor's Sign and SS.7 as needed
2009 21: tst B7
2010 brpl 3f
2011 22: ldi NN, 1 << 7
2012 eor SS, NN
2013
2014 ldi NN, -1
2015 com B4 $ com B5 $ com B6 $ com B7
2016 $ com B1 $ com B2 $ com B3
2017 NEG B0
2018 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2019 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2020
2021 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2022 XCALL __udivmod64
2023
2024 ;; Adjust Result's Sign
2025 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2026 tst SS
2027 brpl 4f
2028 #else
2029 sbrc SS, 7
2030 #endif /* __AVR_HAVE_JMP_CALL__ */
2031 XCALL __negdi2
2032
2033 4: ;; Epilogue: Restore 12 Registers and return
2034 do_epilogue_restores 12
2035
2036 ENDF __divdi3_moddi3
2037
2038 #endif /* L_divdi3 */
2039
2040 #undef R_cnt
2041 #undef SS
2042 #undef NN
2043
2044 .section .text.libgcc, "ax", @progbits
2045
2046 #define TT __tmp_reg__
2047
2048 #if defined (L_adddi3)
2049 ;; (set (reg:DI 18)
2050 ;; (plus:DI (reg:DI 18)
2051 ;; (reg:DI 10)))
2052 ;; Sets the V flag for signed overflow tests
2053 ;; Sets the C flag for unsigned overflow tests
2054 DEFUN __adddi3
2055 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2056 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2057 ret
2058 ENDF __adddi3
2059 #endif /* L_adddi3 */
2060
2061 #if defined (L_adddi3_s8)
2062 ;; (set (reg:DI 18)
2063 ;; (plus:DI (reg:DI 18)
2064 ;; (sign_extend:SI (reg:QI 26))))
2065 ;; Sets the V flag for signed overflow tests
2066 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2067 DEFUN __adddi3_s8
2068 clr TT
2069 sbrc r26, 7
2070 com TT
2071 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2072 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2073 ret
2074 ENDF __adddi3_s8
2075 #endif /* L_adddi3_s8 */
2076
2077 #if defined (L_subdi3)
2078 ;; (set (reg:DI 18)
2079 ;; (minus:DI (reg:DI 18)
2080 ;; (reg:DI 10)))
2081 ;; Sets the V flag for signed overflow tests
2082 ;; Sets the C flag for unsigned overflow tests
2083 DEFUN __subdi3
2084 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2085 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2086 ret
2087 ENDF __subdi3
2088 #endif /* L_subdi3 */
2089
2090 #if defined (L_cmpdi2)
2091 ;; (set (cc0)
2092 ;; (compare (reg:DI 18)
2093 ;; (reg:DI 10)))
2094 DEFUN __cmpdi2
2095 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2096 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2097 ret
2098 ENDF __cmpdi2
2099 #endif /* L_cmpdi2 */
2100
2101 #if defined (L_cmpdi2_s8)
2102 ;; (set (cc0)
2103 ;; (compare (reg:DI 18)
2104 ;; (sign_extend:SI (reg:QI 26))))
2105 DEFUN __cmpdi2_s8
2106 clr TT
2107 sbrc r26, 7
2108 com TT
2109 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2110 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2111 ret
2112 ENDF __cmpdi2_s8
2113 #endif /* L_cmpdi2_s8 */
2114
2115 #if defined (L_negdi2)
2116 ;; (set (reg:DI 18)
2117 ;; (neg:DI (reg:DI 18)))
2118 ;; Sets the V flag for signed overflow tests
2119 DEFUN __negdi2
2120
2121 com A4 $ com A5 $ com A6 $ com A7
2122 $ com A1 $ com A2 $ com A3
2123 NEG A0
2124 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2125 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2126 ret
2127
2128 ENDF __negdi2
2129 #endif /* L_negdi2 */
2130
2131 #undef TT
2132
2133 #undef C7
2134 #undef C6
2135 #undef C5
2136 #undef C4
2137 #undef C3
2138 #undef C2
2139 #undef C1
2140 #undef C0
2141
2142 #undef B7
2143 #undef B6
2144 #undef B5
2145 #undef B4
2146 #undef B3
2147 #undef B2
2148 #undef B1
2149 #undef B0
2150
2151 #undef A7
2152 #undef A6
2153 #undef A5
2154 #undef A4
2155 #undef A3
2156 #undef A2
2157 #undef A1
2158 #undef A0
2159
2160 #endif /* !defined (__AVR_TINY__) */
2161
2162
2163 .section .text.libgcc.prologue, "ax", @progbits
2165
2166 /**********************************
2167 * This is a prologue subroutine
2168 **********************************/
2169 #if !defined (__AVR_TINY__)
2170 #if defined (L_prologue)
2171
2172 ;; This function does not clobber T-flag; 64-bit division relies on it
2173 DEFUN __prologue_saves__
2174 push r2
2175 push r3
2176 push r4
2177 push r5
2178 push r6
2179 push r7
2180 push r8
2181 push r9
2182 push r10
2183 push r11
2184 push r12
2185 push r13
2186 push r14
2187 push r15
2188 push r16
2189 push r17
2190 push r28
2191 push r29
2192 #if !defined (__AVR_HAVE_SPH__)
2193 in r28,__SP_L__
2194 sub r28,r26
2195 out __SP_L__,r28
2196 clr r29
2197 #elif defined (__AVR_XMEGA__)
2198 in r28,__SP_L__
2199 in r29,__SP_H__
2200 sub r28,r26
2201 sbc r29,r27
2202 out __SP_L__,r28
2203 out __SP_H__,r29
2204 #else
2205 in r28,__SP_L__
2206 in r29,__SP_H__
2207 sub r28,r26
2208 sbc r29,r27
2209 in __tmp_reg__,__SREG__
2210 cli
2211 out __SP_H__,r29
2212 out __SREG__,__tmp_reg__
2213 out __SP_L__,r28
2214 #endif /* #SP = 8/16 */
2215
2216 XIJMP
2217
2218 ENDF __prologue_saves__
2219 #endif /* defined (L_prologue) */
2220
2221 /*
2222 * This is an epilogue subroutine
2223 */
2224 #if defined (L_epilogue)
2225
2226 DEFUN __epilogue_restores__
2227 ldd r2,Y+18
2228 ldd r3,Y+17
2229 ldd r4,Y+16
2230 ldd r5,Y+15
2231 ldd r6,Y+14
2232 ldd r7,Y+13
2233 ldd r8,Y+12
2234 ldd r9,Y+11
2235 ldd r10,Y+10
2236 ldd r11,Y+9
2237 ldd r12,Y+8
2238 ldd r13,Y+7
2239 ldd r14,Y+6
2240 ldd r15,Y+5
2241 ldd r16,Y+4
2242 ldd r17,Y+3
2243 ldd r26,Y+2
2244 #if !defined (__AVR_HAVE_SPH__)
2245 ldd r29,Y+1
2246 add r28,r30
2247 out __SP_L__,r28
2248 mov r28, r26
2249 #elif defined (__AVR_XMEGA__)
2250 ldd r27,Y+1
2251 add r28,r30
2252 adc r29,__zero_reg__
2253 out __SP_L__,r28
2254 out __SP_H__,r29
2255 wmov 28, 26
2256 #else
2257 ldd r27,Y+1
2258 add r28,r30
2259 adc r29,__zero_reg__
2260 in __tmp_reg__,__SREG__
2261 cli
2262 out __SP_H__,r29
2263 out __SREG__,__tmp_reg__
2264 out __SP_L__,r28
2265 mov_l r28, r26
2266 mov_h r29, r27
2267 #endif /* #SP = 8/16 */
2268 ret
2269 ENDF __epilogue_restores__
2270 #endif /* defined (L_epilogue) */
2271 #endif /* !defined (__AVR_TINY__) */
2272
2273 #ifdef L_exit
2274 .section .fini9,"ax",@progbits
2275 DEFUN _exit
2276 .weak exit
2277 exit:
2278 ENDF _exit
2279
2280 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2281
2282 .section .fini0,"ax",@progbits
2283 cli
2284 __stop_program:
2285 rjmp __stop_program
2286 #endif /* defined (L_exit) */
2287
2288 #ifdef L_cleanup
2289 .weak _cleanup
2290 .func _cleanup
2291 _cleanup:
2292 ret
2293 .endfunc
2294 #endif /* defined (L_cleanup) */
2295
2296
2297 .section .text.libgcc, "ax", @progbits
2299
2300 #ifdef L_tablejump2
2301 DEFUN __tablejump2__
2302 lsl r30
2303 rol r31
2304 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2305 ;; Word address of gs() jumptable entry in R24:Z
2306 rol r24
2307 out __RAMPZ__, r24
2308 #elif defined (__AVR_HAVE_ELPM__)
2309 ;; Word address of jumptable entry in Z
2310 clr __tmp_reg__
2311 rol __tmp_reg__
2312 out __RAMPZ__, __tmp_reg__
2313 #endif
2314
2315 ;; Read word address from jumptable and jump
2316
2317 #if defined (__AVR_HAVE_ELPMX__)
2318 elpm __tmp_reg__, Z+
2319 elpm r31, Z
2320 mov r30, __tmp_reg__
2321 #ifdef __AVR_HAVE_RAMPD__
2322 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2323 out __RAMPZ__, __zero_reg__
2324 #endif /* RAMPD */
2325 XIJMP
2326 #elif defined (__AVR_HAVE_ELPM__)
2327 elpm
2328 push r0
2329 adiw r30, 1
2330 elpm
2331 push r0
2332 ret
2333 #elif defined (__AVR_HAVE_LPMX__)
2334 lpm __tmp_reg__, Z+
2335 lpm r31, Z
2336 mov r30, __tmp_reg__
2337 ijmp
2338 #elif defined (__AVR_TINY__)
2339 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2340 ld __tmp_reg__, Z+
2341 ld r31, Z ; Use ld instead of lpm to load Z
2342 mov r30, __tmp_reg__
2343 ijmp
2344 #else
2345 lpm
2346 push r0
2347 adiw r30, 1
2348 lpm
2349 push r0
2350 ret
2351 #endif
2352 ENDF __tablejump2__
2353 #endif /* L_tablejump2 */
2354
2355 #if defined(__AVR_TINY__)
2356 #ifdef L_copy_data
2357 .section .init4,"ax",@progbits
2358 .global __do_copy_data
2359 __do_copy_data:
2360 ldi r18, hi8(__data_end)
2361 ldi r26, lo8(__data_start)
2362 ldi r27, hi8(__data_start)
2363 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2364 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2365 rjmp .L__do_copy_data_start
2366 .L__do_copy_data_loop:
2367 ld r19, z+
2368 st X+, r19
2369 .L__do_copy_data_start:
2370 cpi r26, lo8(__data_end)
2371 cpc r27, r18
2372 brne .L__do_copy_data_loop
2373 #endif
2374 #else
2375 #ifdef L_copy_data
2376 .section .init4,"ax",@progbits
2377 DEFUN __do_copy_data
2378 #if defined(__AVR_HAVE_ELPMX__)
2379 ldi r17, hi8(__data_end)
2380 ldi r26, lo8(__data_start)
2381 ldi r27, hi8(__data_start)
2382 ldi r30, lo8(__data_load_start)
2383 ldi r31, hi8(__data_load_start)
2384 ldi r16, hh8(__data_load_start)
2385 out __RAMPZ__, r16
2386 rjmp .L__do_copy_data_start
2387 .L__do_copy_data_loop:
2388 elpm r0, Z+
2389 st X+, r0
2390 .L__do_copy_data_start:
2391 cpi r26, lo8(__data_end)
2392 cpc r27, r17
2393 brne .L__do_copy_data_loop
2394 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2395 ldi r17, hi8(__data_end)
2396 ldi r26, lo8(__data_start)
2397 ldi r27, hi8(__data_start)
2398 ldi r30, lo8(__data_load_start)
2399 ldi r31, hi8(__data_load_start)
2400 ldi r16, hh8(__data_load_start - 0x10000)
2401 .L__do_copy_data_carry:
2402 inc r16
2403 out __RAMPZ__, r16
2404 rjmp .L__do_copy_data_start
2405 .L__do_copy_data_loop:
2406 elpm
2407 st X+, r0
2408 adiw r30, 1
2409 brcs .L__do_copy_data_carry
2410 .L__do_copy_data_start:
2411 cpi r26, lo8(__data_end)
2412 cpc r27, r17
2413 brne .L__do_copy_data_loop
2414 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2415 ldi r17, hi8(__data_end)
2416 ldi r26, lo8(__data_start)
2417 ldi r27, hi8(__data_start)
2418 ldi r30, lo8(__data_load_start)
2419 ldi r31, hi8(__data_load_start)
2420 rjmp .L__do_copy_data_start
2421 .L__do_copy_data_loop:
2422 #if defined (__AVR_HAVE_LPMX__)
2423 lpm r0, Z+
2424 #else
2425 lpm
2426 adiw r30, 1
2427 #endif
2428 st X+, r0
2429 .L__do_copy_data_start:
2430 cpi r26, lo8(__data_end)
2431 cpc r27, r17
2432 brne .L__do_copy_data_loop
2433 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2434 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2435 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2436 out __RAMPZ__, __zero_reg__
2437 #endif /* ELPM && RAMPD */
2438 ENDF __do_copy_data
2439 #endif /* L_copy_data */
2440 #endif /* !defined (__AVR_TINY__) */
2441
2442 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2443
2444 #ifdef L_clear_bss
2445 .section .init4,"ax",@progbits
2446 DEFUN __do_clear_bss
2447 ldi r18, hi8(__bss_end)
2448 ldi r26, lo8(__bss_start)
2449 ldi r27, hi8(__bss_start)
2450 rjmp .do_clear_bss_start
2451 .do_clear_bss_loop:
2452 st X+, __zero_reg__
2453 .do_clear_bss_start:
2454 cpi r26, lo8(__bss_end)
2455 cpc r27, r18
2456 brne .do_clear_bss_loop
2457 ENDF __do_clear_bss
2458 #endif /* L_clear_bss */
2459
2460 /* __do_global_ctors and __do_global_dtors are only necessary
2461 if there are any constructors/destructors. */
2462
2463 #if defined(__AVR_TINY__)
2464 #define cdtors_tst_reg r18
2465 #else
2466 #define cdtors_tst_reg r17
2467 #endif
2468
2469 #ifdef L_ctors
2470 .section .init6,"ax",@progbits
2471 DEFUN __do_global_ctors
2472 ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2473 ldi r28, pm_lo8(__ctors_end)
2474 ldi r29, pm_hi8(__ctors_end)
2475 #ifdef __AVR_HAVE_EIJMP_EICALL__
2476 ldi r16, pm_hh8(__ctors_end)
2477 #endif /* HAVE_EIJMP */
2478 rjmp .L__do_global_ctors_start
2479 .L__do_global_ctors_loop:
2480 wsubi 28, 1
2481 #ifdef __AVR_HAVE_EIJMP_EICALL__
2482 sbc r16, __zero_reg__
2483 mov r24, r16
2484 #endif /* HAVE_EIJMP */
2485 mov_h r31, r29
2486 mov_l r30, r28
2487 XCALL __tablejump2__
2488 .L__do_global_ctors_start:
2489 cpi r28, pm_lo8(__ctors_start)
2490 cpc r29, cdtors_tst_reg
2491 #ifdef __AVR_HAVE_EIJMP_EICALL__
2492 ldi r24, pm_hh8(__ctors_start)
2493 cpc r16, r24
2494 #endif /* HAVE_EIJMP */
2495 brne .L__do_global_ctors_loop
2496 ENDF __do_global_ctors
2497 #endif /* L_ctors */
2498
2499 #ifdef L_dtors
2500 .section .fini6,"ax",@progbits
2501 DEFUN __do_global_dtors
2502 ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2503 ldi r28, pm_lo8(__dtors_start)
2504 ldi r29, pm_hi8(__dtors_start)
2505 #ifdef __AVR_HAVE_EIJMP_EICALL__
2506 ldi r16, pm_hh8(__dtors_start)
2507 #endif /* HAVE_EIJMP */
2508 rjmp .L__do_global_dtors_start
2509 .L__do_global_dtors_loop:
2510 #ifdef __AVR_HAVE_EIJMP_EICALL__
2511 mov r24, r16
2512 #endif /* HAVE_EIJMP */
2513 mov_h r31, r29
2514 mov_l r30, r28
2515 XCALL __tablejump2__
2516 waddi 28, 1
2517 #ifdef __AVR_HAVE_EIJMP_EICALL__
2518 adc r16, __zero_reg__
2519 #endif /* HAVE_EIJMP */
2520 .L__do_global_dtors_start:
2521 cpi r28, pm_lo8(__dtors_end)
2522 cpc r29, cdtors_tst_reg
2523 #ifdef __AVR_HAVE_EIJMP_EICALL__
2524 ldi r24, pm_hh8(__dtors_end)
2525 cpc r16, r24
2526 #endif /* HAVE_EIJMP */
2527 brne .L__do_global_dtors_loop
2528 ENDF __do_global_dtors
2529 #endif /* L_dtors */
2530
2531 #undef cdtors_tst_reg
2532
2533 .section .text.libgcc, "ax", @progbits
2534
2535 #if !defined (__AVR_TINY__)
2536 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2537 ;; Loading n bytes from Flash; n = 3,4
2538 ;; R22... = Flash[Z]
2539 ;; Clobbers: __tmp_reg__
2540
2541 #if (defined (L_load_3) \
2542 || defined (L_load_4)) \
2543 && !defined (__AVR_HAVE_LPMX__)
2544
2545 ;; Destination
2546 #define D0 22
2547 #define D1 D0+1
2548 #define D2 D0+2
2549 #define D3 D0+3
2550
2551 .macro .load dest, n
2552 lpm
2553 mov \dest, r0
2554 .if \dest != D0+\n-1
2555 adiw r30, 1
2556 .else
2557 sbiw r30, \n-1
2558 .endif
2559 .endm
2560
2561 #if defined (L_load_3)
2562 DEFUN __load_3
2563 push D3
2564 XCALL __load_4
2565 pop D3
2566 ret
2567 ENDF __load_3
2568 #endif /* L_load_3 */
2569
2570 #if defined (L_load_4)
2571 DEFUN __load_4
2572 .load D0, 4
2573 .load D1, 4
2574 .load D2, 4
2575 .load D3, 4
2576 ret
2577 ENDF __load_4
2578 #endif /* L_load_4 */
2579
2580 #endif /* L_load_3 || L_load_3 */
2581 #endif /* !defined (__AVR_TINY__) */
2582
2583 #if !defined (__AVR_TINY__)
2584 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2585 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2586 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2587 ;; Clobbers: __tmp_reg__, R21, R30, R31
2588
2589 #if (defined (L_xload_1) \
2590 || defined (L_xload_2) \
2591 || defined (L_xload_3) \
2592 || defined (L_xload_4))
2593
2594 ;; Destination
2595 #define D0 22
2596 #define D1 D0+1
2597 #define D2 D0+2
2598 #define D3 D0+3
2599
2600 ;; Register containing bits 16+ of the address
2601
2602 #define HHI8 21
2603
2604 .macro .xload dest, n
2605 #if defined (__AVR_HAVE_ELPMX__)
2606 elpm \dest, Z+
2607 #elif defined (__AVR_HAVE_ELPM__)
2608 elpm
2609 mov \dest, r0
2610 .if \dest != D0+\n-1
2611 adiw r30, 1
2612 adc HHI8, __zero_reg__
2613 out __RAMPZ__, HHI8
2614 .endif
2615 #elif defined (__AVR_HAVE_LPMX__)
2616 lpm \dest, Z+
2617 #else
2618 lpm
2619 mov \dest, r0
2620 .if \dest != D0+\n-1
2621 adiw r30, 1
2622 .endif
2623 #endif
2624 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2625 .if \dest == D0+\n-1
2626 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2627 out __RAMPZ__, __zero_reg__
2628 .endif
2629 #endif
2630 .endm ; .xload
2631
2632 #if defined (L_xload_1)
2633 DEFUN __xload_1
2634 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2635 sbrc HHI8, 7
2636 ld D0, Z
2637 sbrs HHI8, 7
2638 lpm D0, Z
2639 ret
2640 #else
2641 sbrc HHI8, 7
2642 rjmp 1f
2643 #if defined (__AVR_HAVE_ELPM__)
2644 out __RAMPZ__, HHI8
2645 #endif /* __AVR_HAVE_ELPM__ */
2646 .xload D0, 1
2647 ret
2648 1: ld D0, Z
2649 ret
2650 #endif /* LPMx && ! ELPM */
2651 ENDF __xload_1
2652 #endif /* L_xload_1 */
2653
2654 #if defined (L_xload_2)
2655 DEFUN __xload_2
2656 sbrc HHI8, 7
2657 rjmp 1f
2658 #if defined (__AVR_HAVE_ELPM__)
2659 out __RAMPZ__, HHI8
2660 #endif /* __AVR_HAVE_ELPM__ */
2661 .xload D0, 2
2662 .xload D1, 2
2663 ret
2664 1: ld D0, Z+
2665 ld D1, Z+
2666 ret
2667 ENDF __xload_2
2668 #endif /* L_xload_2 */
2669
2670 #if defined (L_xload_3)
2671 DEFUN __xload_3
2672 sbrc HHI8, 7
2673 rjmp 1f
2674 #if defined (__AVR_HAVE_ELPM__)
2675 out __RAMPZ__, HHI8
2676 #endif /* __AVR_HAVE_ELPM__ */
2677 .xload D0, 3
2678 .xload D1, 3
2679 .xload D2, 3
2680 ret
2681 1: ld D0, Z+
2682 ld D1, Z+
2683 ld D2, Z+
2684 ret
2685 ENDF __xload_3
2686 #endif /* L_xload_3 */
2687
2688 #if defined (L_xload_4)
2689 DEFUN __xload_4
2690 sbrc HHI8, 7
2691 rjmp 1f
2692 #if defined (__AVR_HAVE_ELPM__)
2693 out __RAMPZ__, HHI8
2694 #endif /* __AVR_HAVE_ELPM__ */
2695 .xload D0, 4
2696 .xload D1, 4
2697 .xload D2, 4
2698 .xload D3, 4
2699 ret
2700 1: ld D0, Z+
2701 ld D1, Z+
2702 ld D2, Z+
2703 ld D3, Z+
2704 ret
2705 ENDF __xload_4
2706 #endif /* L_xload_4 */
2707
2708 #endif /* L_xload_{1|2|3|4} */
2709 #endif /* if !defined (__AVR_TINY__) */
2710
2711 #if !defined (__AVR_TINY__)
2712 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2713 ;; memcopy from Address Space __pgmx to RAM
2714 ;; R23:Z = Source Address
2715 ;; X = Destination Address
2716 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2717
2718 #if defined (L_movmemx)
2719
2720 #define HHI8 23
2721 #define LOOP 24
2722
2723 DEFUN __movmemx_qi
2724 ;; #Bytes to copy fity in 8 Bits (1..255)
2725 ;; Zero-extend Loop Counter
2726 clr LOOP+1
2727 ;; FALLTHRU
2728 ENDF __movmemx_qi
2729
2730 DEFUN __movmemx_hi
2731
2732 ;; Read from where?
2733 sbrc HHI8, 7
2734 rjmp 1f
2735
2736 ;; Read from Flash
2737
2738 #if defined (__AVR_HAVE_ELPM__)
2739 out __RAMPZ__, HHI8
2740 #endif
2741
2742 0: ;; Load 1 Byte from Flash...
2743
2744 #if defined (__AVR_HAVE_ELPMX__)
2745 elpm r0, Z+
2746 #elif defined (__AVR_HAVE_ELPM__)
2747 elpm
2748 adiw r30, 1
2749 adc HHI8, __zero_reg__
2750 out __RAMPZ__, HHI8
2751 #elif defined (__AVR_HAVE_LPMX__)
2752 lpm r0, Z+
2753 #else
2754 lpm
2755 adiw r30, 1
2756 #endif
2757
2758 ;; ...and store that Byte to RAM Destination
2759 st X+, r0
2760 sbiw LOOP, 1
2761 brne 0b
2762 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2763 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2764 out __RAMPZ__, __zero_reg__
2765 #endif /* ELPM && RAMPD */
2766 ret
2767
2768 ;; Read from RAM
2769
2770 1: ;; Read 1 Byte from RAM...
2771 ld r0, Z+
2772 ;; and store that Byte to RAM Destination
2773 st X+, r0
2774 sbiw LOOP, 1
2775 brne 1b
2776 ret
2777 ENDF __movmemx_hi
2778
2779 #undef HHI8
2780 #undef LOOP
2781
2782 #endif /* L_movmemx */
2783 #endif /* !defined (__AVR_TINY__) */
2784
2785
2786 .section .text.libgcc.builtins, "ax", @progbits
2788
2789 /**********************************
2790 * Find first set Bit (ffs)
2791 **********************************/
2792
2793 #if defined (L_ffssi2)
2794 ;; find first set bit
2795 ;; r25:r24 = ffs32 (r25:r22)
2796 ;; clobbers: r22, r26
2797 DEFUN __ffssi2
2798 clr r26
2799 tst r22
2800 brne 1f
2801 subi r26, -8
2802 or r22, r23
2803 brne 1f
2804 subi r26, -8
2805 or r22, r24
2806 brne 1f
2807 subi r26, -8
2808 or r22, r25
2809 brne 1f
2810 ret
2811 1: mov r24, r22
2812 XJMP __loop_ffsqi2
2813 ENDF __ffssi2
2814 #endif /* defined (L_ffssi2) */
2815
2816 #if defined (L_ffshi2)
2817 ;; find first set bit
2818 ;; r25:r24 = ffs16 (r25:r24)
2819 ;; clobbers: r26
2820 DEFUN __ffshi2
2821 clr r26
2822 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2823 ;; Some cores have problem skipping 2-word instruction
2824 tst r24
2825 breq 2f
2826 #else
2827 cpse r24, __zero_reg__
2828 #endif /* __AVR_HAVE_JMP_CALL__ */
2829 1: XJMP __loop_ffsqi2
2830 2: ldi r26, 8
2831 or r24, r25
2832 brne 1b
2833 ret
2834 ENDF __ffshi2
2835 #endif /* defined (L_ffshi2) */
2836
2837 #if defined (L_loop_ffsqi2)
2838 ;; Helper for ffshi2, ffssi2
2839 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2840 ;; r24 must be != 0
2841 ;; clobbers: r26
2842 DEFUN __loop_ffsqi2
2843 inc r26
2844 lsr r24
2845 brcc __loop_ffsqi2
2846 mov r24, r26
2847 clr r25
2848 ret
2849 ENDF __loop_ffsqi2
2850 #endif /* defined (L_loop_ffsqi2) */
2851
2852
2853 /**********************************
2855 * Count trailing Zeros (ctz)
2856 **********************************/
2857
2858 #if defined (L_ctzsi2)
2859 ;; count trailing zeros
2860 ;; r25:r24 = ctz32 (r25:r22)
2861 ;; clobbers: r26, r22
2862 ;; ctz(0) = 255
2863 ;; Note that ctz(0) in undefined for GCC
2864 DEFUN __ctzsi2
2865 XCALL __ffssi2
2866 dec r24
2867 ret
2868 ENDF __ctzsi2
2869 #endif /* defined (L_ctzsi2) */
2870
2871 #if defined (L_ctzhi2)
2872 ;; count trailing zeros
2873 ;; r25:r24 = ctz16 (r25:r24)
2874 ;; clobbers: r26
2875 ;; ctz(0) = 255
2876 ;; Note that ctz(0) in undefined for GCC
2877 DEFUN __ctzhi2
2878 XCALL __ffshi2
2879 dec r24
2880 ret
2881 ENDF __ctzhi2
2882 #endif /* defined (L_ctzhi2) */
2883
2884
2885 /**********************************
2887 * Count leading Zeros (clz)
2888 **********************************/
2889
2890 #if defined (L_clzdi2)
2891 ;; count leading zeros
2892 ;; r25:r24 = clz64 (r25:r18)
2893 ;; clobbers: r22, r23, r26
2894 DEFUN __clzdi2
2895 XCALL __clzsi2
2896 sbrs r24, 5
2897 ret
2898 mov_l r22, r18
2899 mov_h r23, r19
2900 mov_l r24, r20
2901 mov_h r25, r21
2902 XCALL __clzsi2
2903 subi r24, -32
2904 ret
2905 ENDF __clzdi2
2906 #endif /* defined (L_clzdi2) */
2907
2908 #if defined (L_clzsi2)
2909 ;; count leading zeros
2910 ;; r25:r24 = clz32 (r25:r22)
2911 ;; clobbers: r26
2912 DEFUN __clzsi2
2913 XCALL __clzhi2
2914 sbrs r24, 4
2915 ret
2916 mov_l r24, r22
2917 mov_h r25, r23
2918 XCALL __clzhi2
2919 subi r24, -16
2920 ret
2921 ENDF __clzsi2
2922 #endif /* defined (L_clzsi2) */
2923
2924 #if defined (L_clzhi2)
2925 ;; count leading zeros
2926 ;; r25:r24 = clz16 (r25:r24)
2927 ;; clobbers: r26
2928 DEFUN __clzhi2
2929 clr r26
2930 tst r25
2931 brne 1f
2932 subi r26, -8
2933 or r25, r24
2934 brne 1f
2935 ldi r24, 16
2936 ret
2937 1: cpi r25, 16
2938 brsh 3f
2939 subi r26, -3
2940 swap r25
2941 2: inc r26
2942 3: lsl r25
2943 brcc 2b
2944 mov r24, r26
2945 clr r25
2946 ret
2947 ENDF __clzhi2
2948 #endif /* defined (L_clzhi2) */
2949
2950
2951 /**********************************
2953 * Parity
2954 **********************************/
2955
2956 #if defined (L_paritydi2)
2957 ;; r25:r24 = parity64 (r25:r18)
2958 ;; clobbers: __tmp_reg__
2959 DEFUN __paritydi2
2960 eor r24, r18
2961 eor r24, r19
2962 eor r24, r20
2963 eor r24, r21
2964 XJMP __paritysi2
2965 ENDF __paritydi2
2966 #endif /* defined (L_paritydi2) */
2967
2968 #if defined (L_paritysi2)
2969 ;; r25:r24 = parity32 (r25:r22)
2970 ;; clobbers: __tmp_reg__
2971 DEFUN __paritysi2
2972 eor r24, r22
2973 eor r24, r23
2974 XJMP __parityhi2
2975 ENDF __paritysi2
2976 #endif /* defined (L_paritysi2) */
2977
2978 #if defined (L_parityhi2)
2979 ;; r25:r24 = parity16 (r25:r24)
2980 ;; clobbers: __tmp_reg__
2981 DEFUN __parityhi2
2982 eor r24, r25
2983 ;; FALLTHRU
2984 ENDF __parityhi2
2985
2986 ;; r25:r24 = parity8 (r24)
2987 ;; clobbers: __tmp_reg__
2988 DEFUN __parityqi2
2989 ;; parity is in r24[0..7]
2990 mov __tmp_reg__, r24
2991 swap __tmp_reg__
2992 eor r24, __tmp_reg__
2993 ;; parity is in r24[0..3]
2994 subi r24, -4
2995 andi r24, -5
2996 subi r24, -6
2997 ;; parity is in r24[0,3]
2998 sbrc r24, 3
2999 inc r24
3000 ;; parity is in r24[0]
3001 andi r24, 1
3002 clr r25
3003 ret
3004 ENDF __parityqi2
3005 #endif /* defined (L_parityhi2) */
3006
3007
3008 /**********************************
3010 * Population Count
3011 **********************************/
3012
3013 #if defined (L_popcounthi2)
3014 ;; population count
3015 ;; r25:r24 = popcount16 (r25:r24)
3016 ;; clobbers: __tmp_reg__
3017 DEFUN __popcounthi2
3018 XCALL __popcountqi2
3019 push r24
3020 mov r24, r25
3021 XCALL __popcountqi2
3022 clr r25
3023 ;; FALLTHRU
3024 ENDF __popcounthi2
3025
3026 DEFUN __popcounthi2_tail
3027 pop __tmp_reg__
3028 add r24, __tmp_reg__
3029 ret
3030 ENDF __popcounthi2_tail
3031 #endif /* defined (L_popcounthi2) */
3032
3033 #if defined (L_popcountsi2)
3034 ;; population count
3035 ;; r25:r24 = popcount32 (r25:r22)
3036 ;; clobbers: __tmp_reg__
3037 DEFUN __popcountsi2
3038 XCALL __popcounthi2
3039 push r24
3040 mov_l r24, r22
3041 mov_h r25, r23
3042 XCALL __popcounthi2
3043 XJMP __popcounthi2_tail
3044 ENDF __popcountsi2
3045 #endif /* defined (L_popcountsi2) */
3046
3047 #if defined (L_popcountdi2)
3048 ;; population count
3049 ;; r25:r24 = popcount64 (r25:r18)
3050 ;; clobbers: r22, r23, __tmp_reg__
3051 DEFUN __popcountdi2
3052 XCALL __popcountsi2
3053 push r24
3054 mov_l r22, r18
3055 mov_h r23, r19
3056 mov_l r24, r20
3057 mov_h r25, r21
3058 XCALL __popcountsi2
3059 XJMP __popcounthi2_tail
3060 ENDF __popcountdi2
3061 #endif /* defined (L_popcountdi2) */
3062
3063 #if defined (L_popcountqi2)
3064 ;; population count
3065 ;; r24 = popcount8 (r24)
3066 ;; clobbers: __tmp_reg__
3067 DEFUN __popcountqi2
3068 mov __tmp_reg__, r24
3069 andi r24, 1
3070 lsr __tmp_reg__
3071 lsr __tmp_reg__
3072 adc r24, __zero_reg__
3073 lsr __tmp_reg__
3074 adc r24, __zero_reg__
3075 lsr __tmp_reg__
3076 adc r24, __zero_reg__
3077 lsr __tmp_reg__
3078 adc r24, __zero_reg__
3079 lsr __tmp_reg__
3080 adc r24, __zero_reg__
3081 lsr __tmp_reg__
3082 adc r24, __tmp_reg__
3083 ret
3084 ENDF __popcountqi2
3085 #endif /* defined (L_popcountqi2) */
3086
3087
3088 /**********************************
3090 * Swap bytes
3091 **********************************/
3092
3093 ;; swap two registers with different register number
3094 .macro bswap a, b
3095 eor \a, \b
3096 eor \b, \a
3097 eor \a, \b
3098 .endm
3099
3100 #if defined (L_bswapsi2)
3101 ;; swap bytes
3102 ;; r25:r22 = bswap32 (r25:r22)
3103 DEFUN __bswapsi2
3104 bswap r22, r25
3105 bswap r23, r24
3106 ret
3107 ENDF __bswapsi2
3108 #endif /* defined (L_bswapsi2) */
3109
3110 #if defined (L_bswapdi2)
3111 ;; swap bytes
3112 ;; r25:r18 = bswap64 (r25:r18)
3113 DEFUN __bswapdi2
3114 bswap r18, r25
3115 bswap r19, r24
3116 bswap r20, r23
3117 bswap r21, r22
3118 ret
3119 ENDF __bswapdi2
3120 #endif /* defined (L_bswapdi2) */
3121
3122
3123 /**********************************
3125 * 64-bit shifts
3126 **********************************/
3127
3128 #if defined (L_ashrdi3)
3129
3130 #define SS __zero_reg__
3131
3132 ;; Arithmetic shift right
3133 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3134 DEFUN __ashrdi3
3135 sbrc r25, 7
3136 com SS
3137 ;; FALLTHRU
3138 ENDF __ashrdi3
3139
3140 ;; Logic shift right
3141 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3142 DEFUN __lshrdi3
3143 ;; Signs are in SS (zero_reg)
3144 mov __tmp_reg__, r16
3145 0: cpi r16, 8
3146 brlo 2f
3147 subi r16, 8
3148 mov r18, r19
3149 mov r19, r20
3150 mov r20, r21
3151 mov r21, r22
3152 mov r22, r23
3153 mov r23, r24
3154 mov r24, r25
3155 mov r25, SS
3156 rjmp 0b
3157 1: asr SS
3158 ror r25
3159 ror r24
3160 ror r23
3161 ror r22
3162 ror r21
3163 ror r20
3164 ror r19
3165 ror r18
3166 2: dec r16
3167 brpl 1b
3168 clr __zero_reg__
3169 mov r16, __tmp_reg__
3170 ret
3171 ENDF __lshrdi3
3172
3173 #undef SS
3174
3175 #endif /* defined (L_ashrdi3) */
3176
3177 #if defined (L_ashldi3)
3178 ;; Shift left
3179 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3180 ;; This function does not clobber T.
3181 DEFUN __ashldi3
3182 mov __tmp_reg__, r16
3183 0: cpi r16, 8
3184 brlo 2f
3185 mov r25, r24
3186 mov r24, r23
3187 mov r23, r22
3188 mov r22, r21
3189 mov r21, r20
3190 mov r20, r19
3191 mov r19, r18
3192 clr r18
3193 subi r16, 8
3194 rjmp 0b
3195 1: lsl r18
3196 rol r19
3197 rol r20
3198 rol r21
3199 rol r22
3200 rol r23
3201 rol r24
3202 rol r25
3203 2: dec r16
3204 brpl 1b
3205 mov r16, __tmp_reg__
3206 ret
3207 ENDF __ashldi3
3208 #endif /* defined (L_ashldi3) */
3209
3210 #if defined (L_rotldi3)
3211 ;; Rotate left
3212 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3213 DEFUN __rotldi3
3214 push r16
3215 0: cpi r16, 8
3216 brlo 2f
3217 subi r16, 8
3218 mov __tmp_reg__, r25
3219 mov r25, r24
3220 mov r24, r23
3221 mov r23, r22
3222 mov r22, r21
3223 mov r21, r20
3224 mov r20, r19
3225 mov r19, r18
3226 mov r18, __tmp_reg__
3227 rjmp 0b
3228 1: lsl r18
3229 rol r19
3230 rol r20
3231 rol r21
3232 rol r22
3233 rol r23
3234 rol r24
3235 rol r25
3236 adc r18, __zero_reg__
3237 2: dec r16
3238 brpl 1b
3239 pop r16
3240 ret
3241 ENDF __rotldi3
3242 #endif /* defined (L_rotldi3) */
3243
3244
3245 .section .text.libgcc.fmul, "ax", @progbits
3247
3248 /***********************************************************/
3249 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3250 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3251 /***********************************************************/
3252
3253 #define A1 24
3254 #define B1 25
3255 #define C0 22
3256 #define C1 23
3257 #define A0 __tmp_reg__
3258
3259 #ifdef L_fmuls
3260 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3261 ;;; Clobbers: r24, r25, __tmp_reg__
3262 DEFUN __fmuls
3263 ;; A0.7 = negate result?
3264 mov A0, A1
3265 eor A0, B1
3266 ;; B1 = |B1|
3267 sbrc B1, 7
3268 neg B1
3269 XJMP __fmulsu_exit
3270 ENDF __fmuls
3271 #endif /* L_fmuls */
3272
3273 #ifdef L_fmulsu
3274 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3275 ;;; Clobbers: r24, r25, __tmp_reg__
3276 DEFUN __fmulsu
3277 ;; A0.7 = negate result?
3278 mov A0, A1
3279 ;; FALLTHRU
3280 ENDF __fmulsu
3281
3282 ;; Helper for __fmuls and __fmulsu
3283 DEFUN __fmulsu_exit
3284 ;; A1 = |A1|
3285 sbrc A1, 7
3286 neg A1
3287 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3288 ;; Some cores have problem skipping 2-word instruction
3289 tst A0
3290 brmi 1f
3291 #else
3292 sbrs A0, 7
3293 #endif /* __AVR_HAVE_JMP_CALL__ */
3294 XJMP __fmul
3295 1: XCALL __fmul
3296 ;; C = -C iff A0.7 = 1
3297 NEG2 C0
3298 ret
3299 ENDF __fmulsu_exit
3300 #endif /* L_fmulsu */
3301
3302
3303 #ifdef L_fmul
3304 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3305 ;;; Clobbers: r24, r25, __tmp_reg__
3306 DEFUN __fmul
3307 ; clear result
3308 clr C0
3309 clr C1
3310 clr A0
3311 1: tst B1
3312 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3313 2: brpl 3f
3314 ;; C += A
3315 add C0, A0
3316 adc C1, A1
3317 3: ;; A >>= 1
3318 lsr A1
3319 ror A0
3320 ;; B <<= 1
3321 lsl B1
3322 brne 2b
3323 ret
3324 ENDF __fmul
3325 #endif /* L_fmul */
3326
3327 #undef A0
3328 #undef A1
3329 #undef B1
3330 #undef C0
3331 #undef C1
3332
3333 #include "lib1funcs-fixed.S"
3334