lib1funcs.S revision 1.1.1.7 1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2020 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov (at) gmail.com>
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #if defined (__AVR_TINY__)
25 #define __zero_reg__ r17
26 #define __tmp_reg__ r16
27 #else
28 #define __zero_reg__ r1
29 #define __tmp_reg__ r0
30 #endif
31 #define __SREG__ 0x3f
32 #if defined (__AVR_HAVE_SPH__)
33 #define __SP_H__ 0x3e
34 #endif
35 #define __SP_L__ 0x3d
36 #define __RAMPZ__ 0x3B
37 #define __EIND__ 0x3C
38
39 /* Most of the functions here are called directly from avr.md
40 patterns, instead of using the standard libcall mechanisms.
41 This can make better code because GCC knows exactly which
42 of the call-used registers (not all of them) are clobbered. */
43
44 /* FIXME: At present, there is no SORT directive in the linker
45 script so that we must not assume that different modules
46 in the same input section like .libgcc.text.mul will be
47 located close together. Therefore, we cannot use
48 RCALL/RJMP to call a function like __udivmodhi4 from
49 __divmodhi4 and have to use lengthy XCALL/XJMP even
50 though they are in the same input section and all same
51 input sections together are small enough to reach every
52 location with a RCALL/RJMP instruction. */
53
54 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 #error device not supported
56 #endif
57
58 .macro mov_l r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
60 movw \r_dest, \r_src
61 #else
62 mov \r_dest, \r_src
63 #endif
64 .endm
65
66 .macro mov_h r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
68 ; empty
69 #else
70 mov \r_dest, \r_src
71 #endif
72 .endm
73
74 .macro wmov r_dest, r_src
75 #if defined (__AVR_HAVE_MOVW__)
76 movw \r_dest, \r_src
77 #else
78 mov \r_dest, \r_src
79 mov \r_dest+1, \r_src+1
80 #endif
81 .endm
82
83 #if defined (__AVR_HAVE_JMP_CALL__)
84 #define XCALL call
85 #define XJMP jmp
86 #else
87 #define XCALL rcall
88 #define XJMP rjmp
89 #endif
90
91 #if defined (__AVR_HAVE_EIJMP_EICALL__)
92 #define XICALL eicall
93 #define XIJMP eijmp
94 #else
95 #define XICALL icall
96 #define XIJMP ijmp
97 #endif
98
99 ;; Prologue stuff
100
101 .macro do_prologue_saves n_pushed n_frame=0
102 ldi r26, lo8(\n_frame)
103 ldi r27, hi8(\n_frame)
104 ldi r30, lo8(gs(.L_prologue_saves.\@))
105 ldi r31, hi8(gs(.L_prologue_saves.\@))
106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 .L_prologue_saves.\@:
108 .endm
109
110 ;; Epilogue stuff
111
112 .macro do_epilogue_restores n_pushed n_frame=0
113 in r28, __SP_L__
114 #ifdef __AVR_HAVE_SPH__
115 in r29, __SP_H__
116 .if \n_frame > 63
117 subi r28, lo8(-\n_frame)
118 sbci r29, hi8(-\n_frame)
119 .elseif \n_frame > 0
120 adiw r28, \n_frame
121 .endif
122 #else
123 clr r29
124 .if \n_frame > 0
125 subi r28, lo8(-\n_frame)
126 .endif
127 #endif /* HAVE SPH */
128 ldi r30, \n_pushed
129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130 .endm
131
132 ;; Support function entry and exit for convenience
133
134 .macro wsubi r_arg1, i_arg2
135 #if defined (__AVR_TINY__)
136 subi \r_arg1, lo8(\i_arg2)
137 sbci \r_arg1+1, hi8(\i_arg2)
138 #else
139 sbiw \r_arg1, \i_arg2
140 #endif
141 .endm
142
143 .macro waddi r_arg1, i_arg2
144 #if defined (__AVR_TINY__)
145 subi \r_arg1, lo8(-\i_arg2)
146 sbci \r_arg1+1, hi8(-\i_arg2)
147 #else
148 adiw \r_arg1, \i_arg2
149 #endif
150 .endm
151
152 .macro DEFUN name
153 .global \name
154 .func \name
155 \name:
156 .endm
157
158 .macro ENDF name
159 .size \name, .-\name
160 .endfunc
161 .endm
162
163 .macro FALIAS name
164 .global \name
165 .func \name
166 \name:
167 .size \name, .-\name
168 .endfunc
169 .endm
170
171 ;; Skip next instruction, typically a jump target
172 #define skip cpse 16,16
173
174 ;; Negate a 2-byte value held in consecutive registers
175 .macro NEG2 reg
176 com \reg+1
177 neg \reg
178 sbci \reg+1, -1
179 .endm
180
181 ;; Negate a 4-byte value held in consecutive registers
182 ;; Sets the V flag for signed overflow tests if REG >= 16
183 .macro NEG4 reg
184 com \reg+3
185 com \reg+2
186 com \reg+1
187 .if \reg >= 16
188 neg \reg
189 sbci \reg+1, -1
190 sbci \reg+2, -1
191 sbci \reg+3, -1
192 .else
193 com \reg
194 adc \reg, __zero_reg__
195 adc \reg+1, __zero_reg__
196 adc \reg+2, __zero_reg__
197 adc \reg+3, __zero_reg__
198 .endif
199 .endm
200
201 #define exp_lo(N) hlo8 ((N) << 23)
202 #define exp_hi(N) hhi8 ((N) << 23)
203
204
205 .section .text.libgcc.mul, "ax", @progbits
207
208 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
209 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
210 #if !defined (__AVR_HAVE_MUL__)
211 /*******************************************************
212 Multiplication 8 x 8 without MUL
213 *******************************************************/
214 #if defined (L_mulqi3)
215
216 #define r_arg2 r22 /* multiplicand */
217 #define r_arg1 r24 /* multiplier */
218 #define r_res __tmp_reg__ /* result */
219
220 DEFUN __mulqi3
221 clr r_res ; clear result
222 __mulqi3_loop:
223 sbrc r_arg1,0
224 add r_res,r_arg2
225 add r_arg2,r_arg2 ; shift multiplicand
226 breq __mulqi3_exit ; while multiplicand != 0
227 lsr r_arg1 ;
228 brne __mulqi3_loop ; exit if multiplier = 0
229 __mulqi3_exit:
230 mov r_arg1,r_res ; result to return register
231 ret
232 ENDF __mulqi3
233
234 #undef r_arg2
235 #undef r_arg1
236 #undef r_res
237
238 #endif /* defined (L_mulqi3) */
239
240
241 /*******************************************************
242 Widening Multiplication 16 = 8 x 8 without MUL
243 Multiplication 16 x 16 without MUL
244 *******************************************************/
245
246 #define A0 22
247 #define A1 23
248 #define B0 24
249 #define BB0 20
250 #define B1 25
251 ;; Output overlaps input, thus expand result in CC0/1
252 #define C0 24
253 #define C1 25
254 #define CC0 __tmp_reg__
255 #define CC1 21
256
257 #if defined (L_umulqihi3)
258 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
259 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
260 ;;; Clobbers: __tmp_reg__, R21..R23
261 DEFUN __umulqihi3
262 clr A1
263 clr B1
264 XJMP __mulhi3
265 ENDF __umulqihi3
266 #endif /* L_umulqihi3 */
267
268 #if defined (L_mulqihi3)
269 ;;; R25:R24 = (signed int) R22 * (signed int) R24
270 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
271 ;;; Clobbers: __tmp_reg__, R20..R23
272 DEFUN __mulqihi3
273 ;; Sign-extend B0
274 clr B1
275 sbrc B0, 7
276 com B1
277 ;; The multiplication runs twice as fast if A1 is zero, thus:
278 ;; Zero-extend A0
279 clr A1
280 #ifdef __AVR_HAVE_JMP_CALL__
281 ;; Store B0 * sign of A
282 clr BB0
283 sbrc A0, 7
284 mov BB0, B0
285 call __mulhi3
286 #else /* have no CALL */
287 ;; Skip sign-extension of A if A >= 0
288 ;; Same size as with the first alternative but avoids errata skip
289 ;; and is faster if A >= 0
290 sbrs A0, 7
291 rjmp __mulhi3
292 ;; If A < 0 store B
293 mov BB0, B0
294 rcall __mulhi3
295 #endif /* HAVE_JMP_CALL */
296 ;; 1-extend A after the multiplication
297 sub C1, BB0
298 ret
299 ENDF __mulqihi3
300 #endif /* L_mulqihi3 */
301
302 #if defined (L_mulhi3)
303 ;;; R25:R24 = R23:R22 * R25:R24
304 ;;; (C1:C0) = (A1:A0) * (B1:B0)
305 ;;; Clobbers: __tmp_reg__, R21..R23
306 DEFUN __mulhi3
307
308 ;; Clear result
309 clr CC0
310 clr CC1
311 rjmp 3f
312 1:
313 ;; Bit n of A is 1 --> C += B << n
314 add CC0, B0
315 adc CC1, B1
316 2:
317 lsl B0
318 rol B1
319 3:
320 ;; If B == 0 we are ready
321 wsubi B0, 0
322 breq 9f
323
324 ;; Carry = n-th bit of A
325 lsr A1
326 ror A0
327 ;; If bit n of A is set, then go add B * 2^n to C
328 brcs 1b
329
330 ;; Carry = 0 --> The ROR above acts like CP A0, 0
331 ;; Thus, it is sufficient to CPC the high part to test A against 0
332 cpc A1, __zero_reg__
333 ;; Only proceed if A != 0
334 brne 2b
335 9:
336 ;; Move Result into place
337 mov C0, CC0
338 mov C1, CC1
339 ret
340 ENDF __mulhi3
341 #endif /* L_mulhi3 */
342
343 #undef A0
344 #undef A1
345 #undef B0
346 #undef BB0
347 #undef B1
348 #undef C0
349 #undef C1
350 #undef CC0
351 #undef CC1
352
353
354 #define A0 22
356 #define A1 A0+1
357 #define A2 A0+2
358 #define A3 A0+3
359
360 #define B0 18
361 #define B1 B0+1
362 #define B2 B0+2
363 #define B3 B0+3
364
365 #define CC0 26
366 #define CC1 CC0+1
367 #define CC2 30
368 #define CC3 CC2+1
369
370 #define C0 22
371 #define C1 C0+1
372 #define C2 C0+2
373 #define C3 C0+3
374
375 /*******************************************************
376 Widening Multiplication 32 = 16 x 16 without MUL
377 *******************************************************/
378
379 #if defined (L_umulhisi3)
380 DEFUN __umulhisi3
381 wmov B0, 24
382 ;; Zero-extend B
383 clr B2
384 clr B3
385 ;; Zero-extend A
386 wmov A2, B2
387 XJMP __mulsi3
388 ENDF __umulhisi3
389 #endif /* L_umulhisi3 */
390
391 #if defined (L_mulhisi3)
392 DEFUN __mulhisi3
393 wmov B0, 24
394 ;; Sign-extend B
395 lsl r25
396 sbc B2, B2
397 mov B3, B2
398 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
399 ;; Sign-extend A
400 clr A2
401 sbrc A1, 7
402 com A2
403 mov A3, A2
404 XJMP __mulsi3
405 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
406 ;; Zero-extend A and __mulsi3 will run at least twice as fast
407 ;; compared to a sign-extended A.
408 clr A2
409 clr A3
410 sbrs A1, 7
411 XJMP __mulsi3
412 ;; If A < 0 then perform the B * 0xffff.... before the
413 ;; very multiplication by initializing the high part of the
414 ;; result CC with -B.
415 wmov CC2, A2
416 sub CC2, B0
417 sbc CC3, B1
418 XJMP __mulsi3_helper
419 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
420 ENDF __mulhisi3
421 #endif /* L_mulhisi3 */
422
423
424 /*******************************************************
425 Multiplication 32 x 32 without MUL
426 *******************************************************/
427
428 #if defined (L_mulsi3)
429 DEFUN __mulsi3
430 #if defined (__AVR_TINY__)
431 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
432 in r27, __SP_H__
433 subi r26, lo8(-3) ; Add 3 to point past return address
434 sbci r27, hi8(-3)
435 push B0 ; save callee saved regs
436 push B1
437 ld B0, X+ ; load from caller stack
438 ld B1, X+
439 ld B2, X+
440 ld B3, X
441 #endif
442 ;; Clear result
443 clr CC2
444 clr CC3
445 ;; FALLTHRU
446 ENDF __mulsi3
447
448 DEFUN __mulsi3_helper
449 clr CC0
450 clr CC1
451 rjmp 3f
452
453 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
454 ;; CC += B
455 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
456
457 2: ;; B <<= 1
458 lsl B0 $ rol B1 $ rol B2 $ rol B3
459
460 3: ;; A >>= 1: Carry = n-th bit of A
461 lsr A3 $ ror A2 $ ror A1 $ ror A0
462
463 brcs 1b
464 ;; Only continue if A != 0
465 sbci A1, 0
466 brne 2b
467 wsubi A2, 0
468 brne 2b
469
470 ;; All bits of A are consumed: Copy result to return register C
471 wmov C0, CC0
472 wmov C2, CC2
473 #if defined (__AVR_TINY__)
474 pop B1 ; restore callee saved regs
475 pop B0
476 #endif /* defined (__AVR_TINY__) */
477
478 ret
479 ENDF __mulsi3_helper
480 #endif /* L_mulsi3 */
481
482 #undef A0
483 #undef A1
484 #undef A2
485 #undef A3
486 #undef B0
487 #undef B1
488 #undef B2
489 #undef B3
490 #undef C0
491 #undef C1
492 #undef C2
493 #undef C3
494 #undef CC0
495 #undef CC1
496 #undef CC2
497 #undef CC3
498
499 #endif /* !defined (__AVR_HAVE_MUL__) */
500 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501
502 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
504 #if defined (__AVR_HAVE_MUL__)
505 #define A0 26
506 #define B0 18
507 #define C0 22
508
509 #define A1 A0+1
510
511 #define B1 B0+1
512 #define B2 B0+2
513 #define B3 B0+3
514
515 #define C1 C0+1
516 #define C2 C0+2
517 #define C3 C0+3
518
519 /*******************************************************
520 Widening Multiplication 32 = 16 x 16 with MUL
521 *******************************************************/
522
523 #if defined (L_mulhisi3)
524 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
525 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
526 ;;; Clobbers: __tmp_reg__
527 DEFUN __mulhisi3
528 XCALL __umulhisi3
529 ;; Sign-extend B
530 tst B1
531 brpl 1f
532 sub C2, A0
533 sbc C3, A1
534 1: ;; Sign-extend A
535 XJMP __usmulhisi3_tail
536 ENDF __mulhisi3
537 #endif /* L_mulhisi3 */
538
539 #if defined (L_usmulhisi3)
540 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
541 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
542 ;;; Clobbers: __tmp_reg__
543 DEFUN __usmulhisi3
544 XCALL __umulhisi3
545 ;; FALLTHRU
546 ENDF __usmulhisi3
547
548 DEFUN __usmulhisi3_tail
549 ;; Sign-extend A
550 sbrs A1, 7
551 ret
552 sub C2, B0
553 sbc C3, B1
554 ret
555 ENDF __usmulhisi3_tail
556 #endif /* L_usmulhisi3 */
557
558 #if defined (L_umulhisi3)
559 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
560 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
561 ;;; Clobbers: __tmp_reg__
562 DEFUN __umulhisi3
563 mul A0, B0
564 movw C0, r0
565 mul A1, B1
566 movw C2, r0
567 mul A0, B1
568 #ifdef __AVR_HAVE_JMP_CALL__
569 ;; This function is used by many other routines, often multiple times.
570 ;; Therefore, if the flash size is not too limited, avoid the RCALL
571 ;; and inverst 6 Bytes to speed things up.
572 add C1, r0
573 adc C2, r1
574 clr __zero_reg__
575 adc C3, __zero_reg__
576 #else
577 rcall 1f
578 #endif
579 mul A1, B0
580 1: add C1, r0
581 adc C2, r1
582 clr __zero_reg__
583 adc C3, __zero_reg__
584 ret
585 ENDF __umulhisi3
586 #endif /* L_umulhisi3 */
587
588 /*******************************************************
589 Widening Multiplication 32 = 16 x 32 with MUL
590 *******************************************************/
591
592 #if defined (L_mulshisi3)
593 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
594 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
595 ;;; Clobbers: __tmp_reg__
596 DEFUN __mulshisi3
597 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
598 ;; Some cores have problem skipping 2-word instruction
599 tst A1
600 brmi __mulohisi3
601 #else
602 sbrs A1, 7
603 #endif /* __AVR_HAVE_JMP_CALL__ */
604 XJMP __muluhisi3
605 ;; FALLTHRU
606 ENDF __mulshisi3
607
608 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
609 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
610 ;;; Clobbers: __tmp_reg__
611 DEFUN __mulohisi3
612 XCALL __muluhisi3
613 ;; One-extend R27:R26 (A1:A0)
614 sub C2, B0
615 sbc C3, B1
616 ret
617 ENDF __mulohisi3
618 #endif /* L_mulshisi3 */
619
620 #if defined (L_muluhisi3)
621 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
622 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
623 ;;; Clobbers: __tmp_reg__
624 DEFUN __muluhisi3
625 XCALL __umulhisi3
626 mul A0, B3
627 add C3, r0
628 mul A1, B2
629 add C3, r0
630 mul A0, B2
631 add C2, r0
632 adc C3, r1
633 clr __zero_reg__
634 ret
635 ENDF __muluhisi3
636 #endif /* L_muluhisi3 */
637
638 /*******************************************************
639 Multiplication 32 x 32 with MUL
640 *******************************************************/
641
642 #if defined (L_mulsi3)
643 ;;; R25:R22 = R25:R22 * R21:R18
644 ;;; (C3:C0) = C3:C0 * B3:B0
645 ;;; Clobbers: R26, R27, __tmp_reg__
646 DEFUN __mulsi3
647 movw A0, C0
648 push C2
649 push C3
650 XCALL __muluhisi3
651 pop A1
652 pop A0
653 ;; A1:A0 now contains the high word of A
654 mul A0, B0
655 add C2, r0
656 adc C3, r1
657 mul A0, B1
658 add C3, r0
659 mul A1, B0
660 add C3, r0
661 clr __zero_reg__
662 ret
663 ENDF __mulsi3
664 #endif /* L_mulsi3 */
665
666 #undef A0
667 #undef A1
668
669 #undef B0
670 #undef B1
671 #undef B2
672 #undef B3
673
674 #undef C0
675 #undef C1
676 #undef C2
677 #undef C3
678
679 #endif /* __AVR_HAVE_MUL__ */
680
681 /*******************************************************
682 Multiplication 24 x 24 with MUL
683 *******************************************************/
684
685 #if defined (L_mulpsi3)
686
687 ;; A[0..2]: In: Multiplicand; Out: Product
688 #define A0 22
689 #define A1 A0+1
690 #define A2 A0+2
691
692 ;; B[0..2]: In: Multiplier
693 #define B0 18
694 #define B1 B0+1
695 #define B2 B0+2
696
697 #if defined (__AVR_HAVE_MUL__)
698
699 ;; C[0..2]: Expand Result
700 #define C0 22
701 #define C1 C0+1
702 #define C2 C0+2
703
704 ;; R24:R22 *= R20:R18
705 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
706
707 #define AA0 26
708 #define AA2 21
709
710 DEFUN __mulpsi3
711 wmov AA0, A0
712 mov AA2, A2
713 XCALL __umulhisi3
714 mul AA2, B0 $ add C2, r0
715 mul AA0, B2 $ add C2, r0
716 clr __zero_reg__
717 ret
718 ENDF __mulpsi3
719
720 #undef AA2
721 #undef AA0
722
723 #undef C2
724 #undef C1
725 #undef C0
726
727 #else /* !HAVE_MUL */
728 ;; C[0..2]: Expand Result
729 #if defined (__AVR_TINY__)
730 #define C0 16
731 #else
732 #define C0 0
733 #endif /* defined (__AVR_TINY__) */
734 #define C1 C0+1
735 #define C2 21
736
737 ;; R24:R22 *= R20:R18
738 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
739
740 DEFUN __mulpsi3
741 #if defined (__AVR_TINY__)
742 in r26,__SP_L__
743 in r27,__SP_H__
744 subi r26, lo8(-3) ; Add 3 to point past return address
745 sbci r27, hi8(-3)
746 push B0 ; save callee saved regs
747 push B1
748 ld B0,X+ ; load from caller stack
749 ld B1,X+
750 ld B2,X+
751 #endif /* defined (__AVR_TINY__) */
752
753 ;; C[] = 0
754 clr __tmp_reg__
755 clr C2
756
757 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
758 LSR B2 $ ror B1 $ ror B0
759
760 ;; If the N-th Bit of B[] was set...
761 brcc 1f
762
763 ;; ...then add A[] * 2^N to the Result C[]
764 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
765
766 1: ;; Multiply A[] by 2
767 LSL A0 $ rol A1 $ rol A2
768
769 ;; Loop until B[] is 0
770 subi B0,0 $ sbci B1,0 $ sbci B2,0
771 brne 0b
772
773 ;; Copy C[] to the return Register A[]
774 wmov A0, C0
775 mov A2, C2
776
777 clr __zero_reg__
778 #if defined (__AVR_TINY__)
779 pop B1
780 pop B0
781 #endif /* (__AVR_TINY__) */
782 ret
783 ENDF __mulpsi3
784
785 #undef C2
786 #undef C1
787 #undef C0
788
789 #endif /* HAVE_MUL */
790
791 #undef B2
792 #undef B1
793 #undef B0
794
795 #undef A2
796 #undef A1
797 #undef A0
798
799 #endif /* L_mulpsi3 */
800
801 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
802
803 ;; A[0..2]: In: Multiplicand
804 #define A0 22
805 #define A1 A0+1
806 #define A2 A0+2
807
808 ;; BB: In: Multiplier
809 #define BB 25
810
811 ;; C[0..2]: Result
812 #define C0 18
813 #define C1 C0+1
814 #define C2 C0+2
815
816 ;; C[] = A[] * sign_extend (BB)
817 DEFUN __mulsqipsi3
818 mul A0, BB
819 movw C0, r0
820 mul A2, BB
821 mov C2, r0
822 mul A1, BB
823 add C1, r0
824 adc C2, r1
825 clr __zero_reg__
826 sbrs BB, 7
827 ret
828 ;; One-extend BB
829 sub C1, A0
830 sbc C2, A1
831 ret
832 ENDF __mulsqipsi3
833
834 #undef C2
835 #undef C1
836 #undef C0
837
838 #undef BB
839
840 #undef A2
841 #undef A1
842 #undef A0
843
844 #endif /* L_mulsqipsi3 && HAVE_MUL */
845
846 /*******************************************************
847 Multiplication 64 x 64
848 *******************************************************/
849
850 ;; A[] = A[] * B[]
851
852 ;; A[0..7]: In: Multiplicand
853 ;; Out: Product
854 #define A0 18
855 #define A1 A0+1
856 #define A2 A0+2
857 #define A3 A0+3
858 #define A4 A0+4
859 #define A5 A0+5
860 #define A6 A0+6
861 #define A7 A0+7
862
863 ;; B[0..7]: In: Multiplier
864 #define B0 10
865 #define B1 B0+1
866 #define B2 B0+2
867 #define B3 B0+3
868 #define B4 B0+4
869 #define B5 B0+5
870 #define B6 B0+6
871 #define B7 B0+7
872
873 #ifndef __AVR_TINY__
874 #if defined (__AVR_HAVE_MUL__)
875 ;; Define C[] for convenience
876 ;; Notice that parts of C[] overlap A[] respective B[]
877 #define C0 16
878 #define C1 C0+1
879 #define C2 20
880 #define C3 C2+1
881 #define C4 28
882 #define C5 C4+1
883 #define C6 C4+2
884 #define C7 C4+3
885
886 #if defined (L_muldi3)
887
888 ;; A[] *= B[]
889 ;; R25:R18 *= R17:R10
890 ;; Ordinary ABI-Function
891
892 DEFUN __muldi3
893 push r29
894 push r28
895 push r17
896 push r16
897
898 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
899
900 ;; 3 * 0 + 0 * 3
901 mul A7,B0 $ $ mov C7,r0
902 mul A0,B7 $ $ add C7,r0
903 mul A6,B1 $ $ add C7,r0
904 mul A6,B0 $ mov C6,r0 $ add C7,r1
905 mul B6,A1 $ $ add C7,r0
906 mul B6,A0 $ add C6,r0 $ adc C7,r1
907
908 ;; 1 * 2
909 mul A2,B4 $ add C6,r0 $ adc C7,r1
910 mul A3,B4 $ $ add C7,r0
911 mul A2,B5 $ $ add C7,r0
912
913 push A5
914 push A4
915 push B1
916 push B0
917 push A3
918 push A2
919
920 ;; 0 * 0
921 wmov 26, B0
922 XCALL __umulhisi3
923 wmov C0, 22
924 wmov C2, 24
925
926 ;; 0 * 2
927 wmov 26, B4
928 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
929
930 wmov 26, B2
931 ;; 0 * 1
932 XCALL __muldi3_6
933
934 pop A0
935 pop A1
936 ;; 1 * 1
937 wmov 26, B2
938 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
939
940 pop r26
941 pop r27
942 ;; 1 * 0
943 XCALL __muldi3_6
944
945 pop A0
946 pop A1
947 ;; 2 * 0
948 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
949
950 ;; 2 * 1
951 wmov 26, B2
952 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
953
954 ;; A[] = C[]
955 wmov A0, C0
956 ;; A2 = C2 already
957 wmov A4, C4
958 wmov A6, C6
959
960 pop r16
961 pop r17
962 pop r28
963 pop r29
964 ret
965 ENDF __muldi3
966 #endif /* L_muldi3 */
967
968 #if defined (L_muldi3_6)
969 ;; A helper for some 64-bit multiplications with MUL available
970 DEFUN __muldi3_6
971 __muldi3_6:
972 XCALL __umulhisi3
973 add C2, 22
974 adc C3, 23
975 adc C4, 24
976 adc C5, 25
977 brcc 0f
978 adiw C6, 1
979 0: ret
980 ENDF __muldi3_6
981 #endif /* L_muldi3_6 */
982
983 #undef C7
984 #undef C6
985 #undef C5
986 #undef C4
987 #undef C3
988 #undef C2
989 #undef C1
990 #undef C0
991
992 #else /* !HAVE_MUL */
993
994 #if defined (L_muldi3)
995
996 #define C0 26
997 #define C1 C0+1
998 #define C2 C0+2
999 #define C3 C0+3
1000 #define C4 C0+4
1001 #define C5 C0+5
1002 #define C6 0
1003 #define C7 C6+1
1004
1005 #define Loop 9
1006
1007 ;; A[] *= B[]
1008 ;; R25:R18 *= R17:R10
1009 ;; Ordinary ABI-Function
1010
1011 DEFUN __muldi3
1012 push r29
1013 push r28
1014 push Loop
1015
1016 ldi C0, 64
1017 mov Loop, C0
1018
1019 ;; C[] = 0
1020 clr __tmp_reg__
1021 wmov C0, 0
1022 wmov C2, 0
1023 wmov C4, 0
1024
1025 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1026 ;; where N = 64 - Loop.
1027 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1028 ;; B[] will have its initial Value again.
1029 LSR B7 $ ror B6 $ ror B5 $ ror B4
1030 ror B3 $ ror B2 $ ror B1 $ ror B0
1031
1032 ;; If the N-th Bit of B[] was set then...
1033 brcc 1f
1034 ;; ...finish Rotation...
1035 ori B7, 1 << 7
1036
1037 ;; ...and add A[] * 2^N to the Result C[]
1038 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1039 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1040
1041 1: ;; Multiply A[] by 2
1042 LSL A0 $ rol A1 $ rol A2 $ rol A3
1043 rol A4 $ rol A5 $ rol A6 $ rol A7
1044
1045 dec Loop
1046 brne 0b
1047
1048 ;; We expanded the Result in C[]
1049 ;; Copy Result to the Return Register A[]
1050 wmov A0, C0
1051 wmov A2, C2
1052 wmov A4, C4
1053 wmov A6, C6
1054
1055 clr __zero_reg__
1056 pop Loop
1057 pop r28
1058 pop r29
1059 ret
1060 ENDF __muldi3
1061
1062 #undef Loop
1063
1064 #undef C7
1065 #undef C6
1066 #undef C5
1067 #undef C4
1068 #undef C3
1069 #undef C2
1070 #undef C1
1071 #undef C0
1072
1073 #endif /* L_muldi3 */
1074 #endif /* HAVE_MUL */
1075 #endif /* if not __AVR_TINY__ */
1076
1077 #undef B7
1078 #undef B6
1079 #undef B5
1080 #undef B4
1081 #undef B3
1082 #undef B2
1083 #undef B1
1084 #undef B0
1085
1086 #undef A7
1087 #undef A6
1088 #undef A5
1089 #undef A4
1090 #undef A3
1091 #undef A2
1092 #undef A1
1093 #undef A0
1094
1095 /*******************************************************
1096 Widening Multiplication 64 = 32 x 32 with MUL
1097 *******************************************************/
1098
1099 #if defined (__AVR_HAVE_MUL__)
1100 #define A0 r22
1101 #define A1 r23
1102 #define A2 r24
1103 #define A3 r25
1104
1105 #define B0 r18
1106 #define B1 r19
1107 #define B2 r20
1108 #define B3 r21
1109
1110 #define C0 18
1111 #define C1 C0+1
1112 #define C2 20
1113 #define C3 C2+1
1114 #define C4 28
1115 #define C5 C4+1
1116 #define C6 C4+2
1117 #define C7 C4+3
1118
1119 #if defined (L_umulsidi3)
1120
1121 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1122
1123 ;; R18[8] = R22[4] * R18[4]
1124 ;;
1125 ;; Ordinary ABI Function, but additionally sets
1126 ;; X = R20[2] = B2[2]
1127 ;; Z = R22[2] = A0[2]
1128 DEFUN __umulsidi3
1129 clt
1130 ;; FALLTHRU
1131 ENDF __umulsidi3
1132 ;; T = sign (A)
1133 DEFUN __umulsidi3_helper
1134 push 29 $ push 28 ; Y
1135 wmov 30, A2
1136 ;; Counting in Words, we have to perform 4 Multiplications
1137 ;; 0 * 0
1138 wmov 26, A0
1139 XCALL __umulhisi3
1140 push 23 $ push 22 ; C0
1141 wmov 28, B0
1142 wmov 18, B2
1143 wmov C2, 24
1144 push 27 $ push 26 ; A0
1145 push 19 $ push 18 ; B2
1146 ;;
1147 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1148 ;; B2 C2 -- -- -- B0 A2
1149 ;; 1 * 1
1150 wmov 26, 30 ; A2
1151 XCALL __umulhisi3
1152 ;; Sign-extend A. T holds the sign of A
1153 brtc 0f
1154 ;; Subtract B from the high part of the result
1155 sub 22, 28
1156 sbc 23, 29
1157 sbc 24, 18
1158 sbc 25, 19
1159 0: wmov 18, 28 ;; B0
1160 wmov C4, 22
1161 wmov C6, 24
1162 ;;
1163 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1164 ;; B0 C2 -- -- A2 C4 C6
1165 ;;
1166 ;; 1 * 0
1167 XCALL __muldi3_6
1168 ;; 0 * 1
1169 pop 26 $ pop 27 ;; B2
1170 pop 18 $ pop 19 ;; A0
1171 XCALL __muldi3_6
1172
1173 ;; Move result C into place and save A0 in Z
1174 wmov 22, C4
1175 wmov 24, C6
1176 wmov 30, 18 ; A0
1177 pop C0 $ pop C1
1178
1179 ;; Epilogue
1180 pop 28 $ pop 29 ;; Y
1181 ret
1182 ENDF __umulsidi3_helper
1183 #endif /* L_umulsidi3 */
1184
1185
1186 #if defined (L_mulsidi3)
1187
1188 ;; Signed widening 64 = 32 * 32 Multiplication
1189 ;;
1190 ;; R18[8] = R22[4] * R18[4]
1191 ;; Ordinary ABI Function
1192 DEFUN __mulsidi3
1193 bst A3, 7
1194 sbrs B3, 7 ; Enhanced core has no skip bug
1195 XJMP __umulsidi3_helper
1196
1197 ;; B needs sign-extension
1198 push A3
1199 push A2
1200 XCALL __umulsidi3_helper
1201 ;; A0 survived in Z
1202 sub r22, r30
1203 sbc r23, r31
1204 pop r26
1205 pop r27
1206 sbc r24, r26
1207 sbc r25, r27
1208 ret
1209 ENDF __mulsidi3
1210 #endif /* L_mulsidi3 */
1211
1212 #undef A0
1213 #undef A1
1214 #undef A2
1215 #undef A3
1216 #undef B0
1217 #undef B1
1218 #undef B2
1219 #undef B3
1220 #undef C0
1221 #undef C1
1222 #undef C2
1223 #undef C3
1224 #undef C4
1225 #undef C5
1226 #undef C6
1227 #undef C7
1228 #endif /* HAVE_MUL */
1229
1230 /**********************************************************
1231 Widening Multiplication 64 = 32 x 32 without MUL
1232 **********************************************************/
1233 #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1234 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1235 #define A0 18
1236 #define A1 A0+1
1237 #define A2 A0+2
1238 #define A3 A0+3
1239 #define A4 A0+4
1240 #define A5 A0+5
1241 #define A6 A0+6
1242 #define A7 A0+7
1243
1244 #define B0 10
1245 #define B1 B0+1
1246 #define B2 B0+2
1247 #define B3 B0+3
1248 #define B4 B0+4
1249 #define B5 B0+5
1250 #define B6 B0+6
1251 #define B7 B0+7
1252
1253 #define AA0 22
1254 #define AA1 AA0+1
1255 #define AA2 AA0+2
1256 #define AA3 AA0+3
1257
1258 #define BB0 18
1259 #define BB1 BB0+1
1260 #define BB2 BB0+2
1261 #define BB3 BB0+3
1262
1263 #define Mask r30
1264
1265 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1266 ;;
1267 ;; R18[8] = R22[4] * R18[4]
1268 ;; Ordinary ABI Function
1269 DEFUN __mulsidi3
1270 set
1271 skip
1272 ;; FALLTHRU
1273 ENDF __mulsidi3
1274
1275 DEFUN __umulsidi3
1276 clt ; skipped
1277 ;; Save 10 Registers: R10..R17, R28, R29
1278 do_prologue_saves 10
1279 ldi Mask, 0xff
1280 bld Mask, 7
1281 ;; Move B into place...
1282 wmov B0, BB0
1283 wmov B2, BB2
1284 ;; ...and extend it
1285 and BB3, Mask
1286 lsl BB3
1287 sbc B4, B4
1288 mov B5, B4
1289 wmov B6, B4
1290 ;; Move A into place...
1291 wmov A0, AA0
1292 wmov A2, AA2
1293 ;; ...and extend it
1294 and AA3, Mask
1295 lsl AA3
1296 sbc A4, A4
1297 mov A5, A4
1298 wmov A6, A4
1299 XCALL __muldi3
1300 do_epilogue_restores 10
1301 ENDF __umulsidi3
1302
1303 #undef A0
1304 #undef A1
1305 #undef A2
1306 #undef A3
1307 #undef A4
1308 #undef A5
1309 #undef A6
1310 #undef A7
1311 #undef B0
1312 #undef B1
1313 #undef B2
1314 #undef B3
1315 #undef B4
1316 #undef B5
1317 #undef B6
1318 #undef B7
1319 #undef AA0
1320 #undef AA1
1321 #undef AA2
1322 #undef AA3
1323 #undef BB0
1324 #undef BB1
1325 #undef BB2
1326 #undef BB3
1327 #undef Mask
1328 #endif /* L_mulsidi3 && !HAVE_MUL */
1329 #endif /* if not __AVR_TINY__ */
1330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1331
1332
1333 .section .text.libgcc.div, "ax", @progbits
1335
1336 /*******************************************************
1337 Division 8 / 8 => (result + remainder)
1338 *******************************************************/
1339 #define r_rem r25 /* remainder */
1340 #define r_arg1 r24 /* dividend, quotient */
1341 #define r_arg2 r22 /* divisor */
1342 #define r_cnt r23 /* loop count */
1343
1344 #if defined (L_udivmodqi4)
1345 DEFUN __udivmodqi4
1346 sub r_rem,r_rem ; clear remainder and carry
1347 ldi r_cnt,9 ; init loop counter
1348 rjmp __udivmodqi4_ep ; jump to entry point
1349 __udivmodqi4_loop:
1350 rol r_rem ; shift dividend into remainder
1351 cp r_rem,r_arg2 ; compare remainder & divisor
1352 brcs __udivmodqi4_ep ; remainder <= divisor
1353 sub r_rem,r_arg2 ; restore remainder
1354 __udivmodqi4_ep:
1355 rol r_arg1 ; shift dividend (with CARRY)
1356 dec r_cnt ; decrement loop counter
1357 brne __udivmodqi4_loop
1358 com r_arg1 ; complement result
1359 ; because C flag was complemented in loop
1360 ret
1361 ENDF __udivmodqi4
1362 #endif /* defined (L_udivmodqi4) */
1363
1364 #if defined (L_divmodqi4)
1365 DEFUN __divmodqi4
1366 bst r_arg1,7 ; store sign of dividend
1367 mov __tmp_reg__,r_arg1
1368 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1369 sbrc r_arg1,7
1370 neg r_arg1 ; dividend negative : negate
1371 sbrc r_arg2,7
1372 neg r_arg2 ; divisor negative : negate
1373 XCALL __udivmodqi4 ; do the unsigned div/mod
1374 brtc __divmodqi4_1
1375 neg r_rem ; correct remainder sign
1376 __divmodqi4_1:
1377 sbrc __tmp_reg__,7
1378 neg r_arg1 ; correct result sign
1379 __divmodqi4_exit:
1380 ret
1381 ENDF __divmodqi4
1382 #endif /* defined (L_divmodqi4) */
1383
1384 #undef r_rem
1385 #undef r_arg1
1386 #undef r_arg2
1387 #undef r_cnt
1388
1389
1390 /*******************************************************
1391 Division 16 / 16 => (result + remainder)
1392 *******************************************************/
1393 #define r_remL r26 /* remainder Low */
1394 #define r_remH r27 /* remainder High */
1395
1396 /* return: remainder */
1397 #define r_arg1L r24 /* dividend Low */
1398 #define r_arg1H r25 /* dividend High */
1399
1400 /* return: quotient */
1401 #define r_arg2L r22 /* divisor Low */
1402 #define r_arg2H r23 /* divisor High */
1403
1404 #define r_cnt r21 /* loop count */
1405
1406 #if defined (L_udivmodhi4)
1407 DEFUN __udivmodhi4
1408 sub r_remL,r_remL
1409 sub r_remH,r_remH ; clear remainder and carry
1410 ldi r_cnt,17 ; init loop counter
1411 rjmp __udivmodhi4_ep ; jump to entry point
1412 __udivmodhi4_loop:
1413 rol r_remL ; shift dividend into remainder
1414 rol r_remH
1415 cp r_remL,r_arg2L ; compare remainder & divisor
1416 cpc r_remH,r_arg2H
1417 brcs __udivmodhi4_ep ; remainder < divisor
1418 sub r_remL,r_arg2L ; restore remainder
1419 sbc r_remH,r_arg2H
1420 __udivmodhi4_ep:
1421 rol r_arg1L ; shift dividend (with CARRY)
1422 rol r_arg1H
1423 dec r_cnt ; decrement loop counter
1424 brne __udivmodhi4_loop
1425 com r_arg1L
1426 com r_arg1H
1427 ; div/mod results to return registers, as for the div() function
1428 mov_l r_arg2L, r_arg1L ; quotient
1429 mov_h r_arg2H, r_arg1H
1430 mov_l r_arg1L, r_remL ; remainder
1431 mov_h r_arg1H, r_remH
1432 ret
1433 ENDF __udivmodhi4
1434 #endif /* defined (L_udivmodhi4) */
1435
1436 #if defined (L_divmodhi4)
1437 DEFUN __divmodhi4
1438 .global _div
1439 _div:
1440 bst r_arg1H,7 ; store sign of dividend
1441 mov __tmp_reg__,r_arg2H
1442 brtc 0f
1443 com __tmp_reg__ ; r0.7 is sign of result
1444 rcall __divmodhi4_neg1 ; dividend negative: negate
1445 0:
1446 sbrc r_arg2H,7
1447 rcall __divmodhi4_neg2 ; divisor negative: negate
1448 XCALL __udivmodhi4 ; do the unsigned div/mod
1449 sbrc __tmp_reg__,7
1450 rcall __divmodhi4_neg2 ; correct remainder sign
1451 brtc __divmodhi4_exit
1452 __divmodhi4_neg1:
1453 ;; correct dividend/remainder sign
1454 com r_arg1H
1455 neg r_arg1L
1456 sbci r_arg1H,0xff
1457 ret
1458 __divmodhi4_neg2:
1459 ;; correct divisor/result sign
1460 com r_arg2H
1461 neg r_arg2L
1462 sbci r_arg2H,0xff
1463 __divmodhi4_exit:
1464 ret
1465 ENDF __divmodhi4
1466 #endif /* defined (L_divmodhi4) */
1467
1468 #undef r_remH
1469 #undef r_remL
1470
1471 #undef r_arg1H
1472 #undef r_arg1L
1473
1474 #undef r_arg2H
1475 #undef r_arg2L
1476
1477 #undef r_cnt
1478
1479 /*******************************************************
1480 Division 24 / 24 => (result + remainder)
1481 *******************************************************/
1482
1483 ;; A[0..2]: In: Dividend; Out: Quotient
1484 #define A0 22
1485 #define A1 A0+1
1486 #define A2 A0+2
1487
1488 ;; B[0..2]: In: Divisor; Out: Remainder
1489 #define B0 18
1490 #define B1 B0+1
1491 #define B2 B0+2
1492
1493 ;; C[0..2]: Expand remainder
1494 #define C0 __zero_reg__
1495 #define C1 26
1496 #define C2 25
1497
1498 ;; Loop counter
1499 #define r_cnt 21
1500
1501 #if defined (L_udivmodpsi4)
1502 ;; R24:R22 = R24:R24 udiv R20:R18
1503 ;; R20:R18 = R24:R22 umod R20:R18
1504 ;; Clobbers: R21, R25, R26
1505
1506 DEFUN __udivmodpsi4
1507 ; init loop counter
1508 ldi r_cnt, 24+1
1509 ; Clear remainder and carry. C0 is already 0
1510 clr C1
1511 sub C2, C2
1512 ; jump to entry point
1513 rjmp __udivmodpsi4_start
1514 __udivmodpsi4_loop:
1515 ; shift dividend into remainder
1516 rol C0
1517 rol C1
1518 rol C2
1519 ; compare remainder & divisor
1520 cp C0, B0
1521 cpc C1, B1
1522 cpc C2, B2
1523 brcs __udivmodpsi4_start ; remainder <= divisor
1524 sub C0, B0 ; restore remainder
1525 sbc C1, B1
1526 sbc C2, B2
1527 __udivmodpsi4_start:
1528 ; shift dividend (with CARRY)
1529 rol A0
1530 rol A1
1531 rol A2
1532 ; decrement loop counter
1533 dec r_cnt
1534 brne __udivmodpsi4_loop
1535 com A0
1536 com A1
1537 com A2
1538 ; div/mod results to return registers
1539 ; remainder
1540 mov B0, C0
1541 mov B1, C1
1542 mov B2, C2
1543 clr __zero_reg__ ; C0
1544 ret
1545 ENDF __udivmodpsi4
1546 #endif /* defined (L_udivmodpsi4) */
1547
1548 #if defined (L_divmodpsi4)
1549 ;; R24:R22 = R24:R22 div R20:R18
1550 ;; R20:R18 = R24:R22 mod R20:R18
1551 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1552
1553 DEFUN __divmodpsi4
1554 ; R0.7 will contain the sign of the result:
1555 ; R0.7 = A.sign ^ B.sign
1556 mov __tmp_reg__, B2
1557 ; T-flag = sign of dividend
1558 bst A2, 7
1559 brtc 0f
1560 com __tmp_reg__
1561 ; Adjust dividend's sign
1562 rcall __divmodpsi4_negA
1563 0:
1564 ; Adjust divisor's sign
1565 sbrc B2, 7
1566 rcall __divmodpsi4_negB
1567
1568 ; Do the unsigned div/mod
1569 XCALL __udivmodpsi4
1570
1571 ; Adjust quotient's sign
1572 sbrc __tmp_reg__, 7
1573 rcall __divmodpsi4_negA
1574
1575 ; Adjust remainder's sign
1576 brtc __divmodpsi4_end
1577
1578 __divmodpsi4_negB:
1579 ; Correct divisor/remainder sign
1580 com B2
1581 com B1
1582 neg B0
1583 sbci B1, -1
1584 sbci B2, -1
1585 ret
1586
1587 ; Correct dividend/quotient sign
1588 __divmodpsi4_negA:
1589 com A2
1590 com A1
1591 neg A0
1592 sbci A1, -1
1593 sbci A2, -1
1594 __divmodpsi4_end:
1595 ret
1596
1597 ENDF __divmodpsi4
1598 #endif /* defined (L_divmodpsi4) */
1599
1600 #undef A0
1601 #undef A1
1602 #undef A2
1603
1604 #undef B0
1605 #undef B1
1606 #undef B2
1607
1608 #undef C0
1609 #undef C1
1610 #undef C2
1611
1612 #undef r_cnt
1613
1614 /*******************************************************
1615 Division 32 / 32 => (result + remainder)
1616 *******************************************************/
1617 #define r_remHH r31 /* remainder High */
1618 #define r_remHL r30
1619 #define r_remH r27
1620 #define r_remL r26 /* remainder Low */
1621
1622 /* return: remainder */
1623 #define r_arg1HH r25 /* dividend High */
1624 #define r_arg1HL r24
1625 #define r_arg1H r23
1626 #define r_arg1L r22 /* dividend Low */
1627
1628 /* return: quotient */
1629 #define r_arg2HH r21 /* divisor High */
1630 #define r_arg2HL r20
1631 #define r_arg2H r19
1632 #define r_arg2L r18 /* divisor Low */
1633
1634 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1635
1636 #if defined (L_udivmodsi4)
1637 DEFUN __udivmodsi4
1638 ldi r_remL, 33 ; init loop counter
1639 mov r_cnt, r_remL
1640 sub r_remL,r_remL
1641 sub r_remH,r_remH ; clear remainder and carry
1642 mov_l r_remHL, r_remL
1643 mov_h r_remHH, r_remH
1644 rjmp __udivmodsi4_ep ; jump to entry point
1645 __udivmodsi4_loop:
1646 rol r_remL ; shift dividend into remainder
1647 rol r_remH
1648 rol r_remHL
1649 rol r_remHH
1650 cp r_remL,r_arg2L ; compare remainder & divisor
1651 cpc r_remH,r_arg2H
1652 cpc r_remHL,r_arg2HL
1653 cpc r_remHH,r_arg2HH
1654 brcs __udivmodsi4_ep ; remainder <= divisor
1655 sub r_remL,r_arg2L ; restore remainder
1656 sbc r_remH,r_arg2H
1657 sbc r_remHL,r_arg2HL
1658 sbc r_remHH,r_arg2HH
1659 __udivmodsi4_ep:
1660 rol r_arg1L ; shift dividend (with CARRY)
1661 rol r_arg1H
1662 rol r_arg1HL
1663 rol r_arg1HH
1664 dec r_cnt ; decrement loop counter
1665 brne __udivmodsi4_loop
1666 ; __zero_reg__ now restored (r_cnt == 0)
1667 com r_arg1L
1668 com r_arg1H
1669 com r_arg1HL
1670 com r_arg1HH
1671 ; div/mod results to return registers, as for the ldiv() function
1672 mov_l r_arg2L, r_arg1L ; quotient
1673 mov_h r_arg2H, r_arg1H
1674 mov_l r_arg2HL, r_arg1HL
1675 mov_h r_arg2HH, r_arg1HH
1676 mov_l r_arg1L, r_remL ; remainder
1677 mov_h r_arg1H, r_remH
1678 mov_l r_arg1HL, r_remHL
1679 mov_h r_arg1HH, r_remHH
1680 ret
1681 ENDF __udivmodsi4
1682 #endif /* defined (L_udivmodsi4) */
1683
1684 #if defined (L_divmodsi4)
1685 DEFUN __divmodsi4
1686 mov __tmp_reg__,r_arg2HH
1687 bst r_arg1HH,7 ; store sign of dividend
1688 brtc 0f
1689 com __tmp_reg__ ; r0.7 is sign of result
1690 XCALL __negsi2 ; dividend negative: negate
1691 0:
1692 sbrc r_arg2HH,7
1693 rcall __divmodsi4_neg2 ; divisor negative: negate
1694 XCALL __udivmodsi4 ; do the unsigned div/mod
1695 sbrc __tmp_reg__, 7 ; correct quotient sign
1696 rcall __divmodsi4_neg2
1697 brtc __divmodsi4_exit ; correct remainder sign
1698 XJMP __negsi2
1699 __divmodsi4_neg2:
1700 ;; correct divisor/quotient sign
1701 com r_arg2HH
1702 com r_arg2HL
1703 com r_arg2H
1704 neg r_arg2L
1705 sbci r_arg2H,0xff
1706 sbci r_arg2HL,0xff
1707 sbci r_arg2HH,0xff
1708 __divmodsi4_exit:
1709 ret
1710 ENDF __divmodsi4
1711 #endif /* defined (L_divmodsi4) */
1712
1713 #if defined (L_negsi2)
1714 ;; (set (reg:SI 22)
1715 ;; (neg:SI (reg:SI 22)))
1716 ;; Sets the V flag for signed overflow tests
1717 DEFUN __negsi2
1718 NEG4 22
1719 ret
1720 ENDF __negsi2
1721 #endif /* L_negsi2 */
1722
1723 #undef r_remHH
1724 #undef r_remHL
1725 #undef r_remH
1726 #undef r_remL
1727 #undef r_arg1HH
1728 #undef r_arg1HL
1729 #undef r_arg1H
1730 #undef r_arg1L
1731 #undef r_arg2HH
1732 #undef r_arg2HL
1733 #undef r_arg2H
1734 #undef r_arg2L
1735 #undef r_cnt
1736
1737 /* *di routines use registers below R19 and won't work with tiny arch
1738 right now. */
1739
1740 #if !defined (__AVR_TINY__)
1741 /*******************************************************
1742 Division 64 / 64
1743 Modulo 64 % 64
1744 *******************************************************/
1745
1746 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1747 ;; at least 16k of Program Memory. For smaller Devices, depend
1748 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1749 ;; Flash Size so that SP Size can be used to test for Flash Size.
1750
1751 #if defined (__AVR_HAVE_JMP_CALL__)
1752 # define SPEED_DIV 8
1753 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1754 # define SPEED_DIV 16
1755 #else
1756 # define SPEED_DIV 0
1757 #endif
1758
1759 ;; A[0..7]: In: Dividend;
1760 ;; Out: Quotient (T = 0)
1761 ;; Out: Remainder (T = 1)
1762 #define A0 18
1763 #define A1 A0+1
1764 #define A2 A0+2
1765 #define A3 A0+3
1766 #define A4 A0+4
1767 #define A5 A0+5
1768 #define A6 A0+6
1769 #define A7 A0+7
1770
1771 ;; B[0..7]: In: Divisor; Out: Clobber
1772 #define B0 10
1773 #define B1 B0+1
1774 #define B2 B0+2
1775 #define B3 B0+3
1776 #define B4 B0+4
1777 #define B5 B0+5
1778 #define B6 B0+6
1779 #define B7 B0+7
1780
1781 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1782 #define C0 8
1783 #define C1 C0+1
1784 #define C2 30
1785 #define C3 C2+1
1786 #define C4 28
1787 #define C5 C4+1
1788 #define C6 26
1789 #define C7 C6+1
1790
1791 ;; Holds Signs during Division Routine
1792 #define SS __tmp_reg__
1793
1794 ;; Bit-Counter in Division Routine
1795 #define R_cnt __zero_reg__
1796
1797 ;; Scratch Register for Negation
1798 #define NN r31
1799
1800 #if defined (L_udivdi3)
1801
1802 ;; R25:R18 = R24:R18 umod R17:R10
1803 ;; Ordinary ABI-Function
1804
1805 DEFUN __umoddi3
1806 set
1807 rjmp __udivdi3_umoddi3
1808 ENDF __umoddi3
1809
1810 ;; R25:R18 = R24:R18 udiv R17:R10
1811 ;; Ordinary ABI-Function
1812
1813 DEFUN __udivdi3
1814 clt
1815 ENDF __udivdi3
1816
1817 DEFUN __udivdi3_umoddi3
1818 push C0
1819 push C1
1820 push C4
1821 push C5
1822 XCALL __udivmod64
1823 pop C5
1824 pop C4
1825 pop C1
1826 pop C0
1827 ret
1828 ENDF __udivdi3_umoddi3
1829 #endif /* L_udivdi3 */
1830
1831 #if defined (L_udivmod64)
1832
1833 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1834 ;; No Registers saved/restored; the Callers will take Care.
1835 ;; Preserves B[] and T-flag
1836 ;; T = 0: Compute Quotient in A[]
1837 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1838
1839 DEFUN __udivmod64
1840
1841 ;; Clear Remainder (C6, C7 will follow)
1842 clr C0
1843 clr C1
1844 wmov C2, C0
1845 wmov C4, C0
1846 ldi C7, 64
1847
1848 #if SPEED_DIV == 0 || SPEED_DIV == 16
1849 ;; Initialize Loop-Counter
1850 mov R_cnt, C7
1851 wmov C6, C0
1852 #endif /* SPEED_DIV */
1853
1854 #if SPEED_DIV == 8
1855
1856 push A7
1857 clr C6
1858
1859 1: ;; Compare shifted Devidend against Divisor
1860 ;; If -- even after Shifting -- it is smaller...
1861 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1862 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1863 brcc 2f
1864
1865 ;; ...then we can subtract it. Thus, it is legal to shift left
1866 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1867 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1868 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1869 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1870
1871 ;; 8 Bits are done
1872 subi C7, 8
1873 brne 1b
1874
1875 ;; Shifted 64 Bits: A7 has traveled to C7
1876 pop C7
1877 ;; Divisor is greater than Dividend. We have:
1878 ;; A[] % B[] = A[]
1879 ;; A[] / B[] = 0
1880 ;; Thus, we can return immediately
1881 rjmp 5f
1882
1883 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1884 mov R_cnt, C7
1885
1886 ;; Push of A7 is not needed because C7 is still 0
1887 pop C7
1888 clr C7
1889
1890 #elif SPEED_DIV == 16
1891
1892 ;; Compare shifted Dividend against Divisor
1893 cp A7, B3
1894 cpc C0, B4
1895 cpc C1, B5
1896 cpc C2, B6
1897 cpc C3, B7
1898 brcc 2f
1899
1900 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1901 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1902 wmov C2,A6 $ wmov C0,A4
1903 wmov A6,A2 $ wmov A4,A0
1904 wmov A2,C6 $ wmov A0,C4
1905
1906 ;; Set Bit Counter to 32
1907 lsr R_cnt
1908 2:
1909 #elif SPEED_DIV
1910 #error SPEED_DIV = ?
1911 #endif /* SPEED_DIV */
1912
1913 ;; The very Division + Remainder Routine
1914
1915 3: ;; Left-shift Dividend...
1916 lsl A0 $ rol A1 $ rol A2 $ rol A3
1917 rol A4 $ rol A5 $ rol A6 $ rol A7
1918
1919 ;; ...into Remainder
1920 rol C0 $ rol C1 $ rol C2 $ rol C3
1921 rol C4 $ rol C5 $ rol C6 $ rol C7
1922
1923 ;; Compare Remainder and Divisor
1924 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1925 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1926
1927 brcs 4f
1928
1929 ;; Divisor fits into Remainder: Subtract it from Remainder...
1930 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1931 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1932
1933 ;; ...and set according Bit in the upcoming Quotient
1934 ;; The Bit will travel to its final Position
1935 ori A0, 1
1936
1937 4: ;; This Bit is done
1938 dec R_cnt
1939 brne 3b
1940 ;; __zero_reg__ is 0 again
1941
1942 ;; T = 0: We are fine with the Quotient in A[]
1943 ;; T = 1: Copy Remainder to A[]
1944 5: brtc 6f
1945 wmov A0, C0
1946 wmov A2, C2
1947 wmov A4, C4
1948 wmov A6, C6
1949 ;; Move the Sign of the Result to SS.7
1950 lsl SS
1951
1952 6: ret
1953
1954 ENDF __udivmod64
1955 #endif /* L_udivmod64 */
1956
1957
1958 #if defined (L_divdi3)
1959
1960 ;; R25:R18 = R24:R18 mod R17:R10
1961 ;; Ordinary ABI-Function
1962
1963 DEFUN __moddi3
1964 set
1965 rjmp __divdi3_moddi3
1966 ENDF __moddi3
1967
1968 ;; R25:R18 = R24:R18 div R17:R10
1969 ;; Ordinary ABI-Function
1970
1971 DEFUN __divdi3
1972 clt
1973 ENDF __divdi3
1974
1975 DEFUN __divdi3_moddi3
1976 #if SPEED_DIV
1977 mov r31, A7
1978 or r31, B7
1979 brmi 0f
1980 ;; Both Signs are 0: the following Complexitiy is not needed
1981 XJMP __udivdi3_umoddi3
1982 #endif /* SPEED_DIV */
1983
1984 0: ;; The Prologue
1985 ;; Save 12 Registers: Y, 17...8
1986 ;; No Frame needed
1987 do_prologue_saves 12
1988
1989 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1990 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1991 mov SS, A7
1992 asr SS
1993 ;; Adjust Dividend's Sign as needed
1994 #if SPEED_DIV
1995 ;; Compiling for Speed we know that at least one Sign must be < 0
1996 ;; Thus, if A[] >= 0 then we know B[] < 0
1997 brpl 22f
1998 #else
1999 brpl 21f
2000 #endif /* SPEED_DIV */
2001
2002 XCALL __negdi2
2003
2004 ;; Adjust Divisor's Sign and SS.7 as needed
2005 21: tst B7
2006 brpl 3f
2007 22: ldi NN, 1 << 7
2008 eor SS, NN
2009
2010 ldi NN, -1
2011 com B4 $ com B5 $ com B6 $ com B7
2012 $ com B1 $ com B2 $ com B3
2013 NEG B0
2014 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2015 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2016
2017 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2018 XCALL __udivmod64
2019
2020 ;; Adjust Result's Sign
2021 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2022 tst SS
2023 brpl 4f
2024 #else
2025 sbrc SS, 7
2026 #endif /* __AVR_HAVE_JMP_CALL__ */
2027 XCALL __negdi2
2028
2029 4: ;; Epilogue: Restore 12 Registers and return
2030 do_epilogue_restores 12
2031
2032 ENDF __divdi3_moddi3
2033
2034 #endif /* L_divdi3 */
2035
2036 #undef R_cnt
2037 #undef SS
2038 #undef NN
2039
2040 .section .text.libgcc, "ax", @progbits
2041
2042 #define TT __tmp_reg__
2043
2044 #if defined (L_adddi3)
2045 ;; (set (reg:DI 18)
2046 ;; (plus:DI (reg:DI 18)
2047 ;; (reg:DI 10)))
2048 ;; Sets the V flag for signed overflow tests
2049 ;; Sets the C flag for unsigned overflow tests
2050 DEFUN __adddi3
2051 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2052 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2053 ret
2054 ENDF __adddi3
2055 #endif /* L_adddi3 */
2056
2057 #if defined (L_adddi3_s8)
2058 ;; (set (reg:DI 18)
2059 ;; (plus:DI (reg:DI 18)
2060 ;; (sign_extend:SI (reg:QI 26))))
2061 ;; Sets the V flag for signed overflow tests
2062 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2063 DEFUN __adddi3_s8
2064 clr TT
2065 sbrc r26, 7
2066 com TT
2067 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2068 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2069 ret
2070 ENDF __adddi3_s8
2071 #endif /* L_adddi3_s8 */
2072
2073 #if defined (L_subdi3)
2074 ;; (set (reg:DI 18)
2075 ;; (minus:DI (reg:DI 18)
2076 ;; (reg:DI 10)))
2077 ;; Sets the V flag for signed overflow tests
2078 ;; Sets the C flag for unsigned overflow tests
2079 DEFUN __subdi3
2080 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2081 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2082 ret
2083 ENDF __subdi3
2084 #endif /* L_subdi3 */
2085
2086 #if defined (L_cmpdi2)
2087 ;; (set (cc0)
2088 ;; (compare (reg:DI 18)
2089 ;; (reg:DI 10)))
2090 DEFUN __cmpdi2
2091 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2092 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2093 ret
2094 ENDF __cmpdi2
2095 #endif /* L_cmpdi2 */
2096
2097 #if defined (L_cmpdi2_s8)
2098 ;; (set (cc0)
2099 ;; (compare (reg:DI 18)
2100 ;; (sign_extend:SI (reg:QI 26))))
2101 DEFUN __cmpdi2_s8
2102 clr TT
2103 sbrc r26, 7
2104 com TT
2105 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2106 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2107 ret
2108 ENDF __cmpdi2_s8
2109 #endif /* L_cmpdi2_s8 */
2110
2111 #if defined (L_negdi2)
2112 ;; (set (reg:DI 18)
2113 ;; (neg:DI (reg:DI 18)))
2114 ;; Sets the V flag for signed overflow tests
2115 DEFUN __negdi2
2116
2117 com A4 $ com A5 $ com A6 $ com A7
2118 $ com A1 $ com A2 $ com A3
2119 NEG A0
2120 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2121 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2122 ret
2123
2124 ENDF __negdi2
2125 #endif /* L_negdi2 */
2126
2127 #undef TT
2128
2129 #undef C7
2130 #undef C6
2131 #undef C5
2132 #undef C4
2133 #undef C3
2134 #undef C2
2135 #undef C1
2136 #undef C0
2137
2138 #undef B7
2139 #undef B6
2140 #undef B5
2141 #undef B4
2142 #undef B3
2143 #undef B2
2144 #undef B1
2145 #undef B0
2146
2147 #undef A7
2148 #undef A6
2149 #undef A5
2150 #undef A4
2151 #undef A3
2152 #undef A2
2153 #undef A1
2154 #undef A0
2155
2156 #endif /* !defined (__AVR_TINY__) */
2157
2158
2159 .section .text.libgcc.prologue, "ax", @progbits
2161
2162 /**********************************
2163 * This is a prologue subroutine
2164 **********************************/
2165 #if !defined (__AVR_TINY__)
2166 #if defined (L_prologue)
2167
2168 ;; This function does not clobber T-flag; 64-bit division relies on it
2169 DEFUN __prologue_saves__
2170 push r2
2171 push r3
2172 push r4
2173 push r5
2174 push r6
2175 push r7
2176 push r8
2177 push r9
2178 push r10
2179 push r11
2180 push r12
2181 push r13
2182 push r14
2183 push r15
2184 push r16
2185 push r17
2186 push r28
2187 push r29
2188 #if !defined (__AVR_HAVE_SPH__)
2189 in r28,__SP_L__
2190 sub r28,r26
2191 out __SP_L__,r28
2192 clr r29
2193 #elif defined (__AVR_XMEGA__)
2194 in r28,__SP_L__
2195 in r29,__SP_H__
2196 sub r28,r26
2197 sbc r29,r27
2198 out __SP_L__,r28
2199 out __SP_H__,r29
2200 #else
2201 in r28,__SP_L__
2202 in r29,__SP_H__
2203 sub r28,r26
2204 sbc r29,r27
2205 in __tmp_reg__,__SREG__
2206 cli
2207 out __SP_H__,r29
2208 out __SREG__,__tmp_reg__
2209 out __SP_L__,r28
2210 #endif /* #SP = 8/16 */
2211
2212 XIJMP
2213
2214 ENDF __prologue_saves__
2215 #endif /* defined (L_prologue) */
2216
2217 /*
2218 * This is an epilogue subroutine
2219 */
2220 #if defined (L_epilogue)
2221
2222 DEFUN __epilogue_restores__
2223 ldd r2,Y+18
2224 ldd r3,Y+17
2225 ldd r4,Y+16
2226 ldd r5,Y+15
2227 ldd r6,Y+14
2228 ldd r7,Y+13
2229 ldd r8,Y+12
2230 ldd r9,Y+11
2231 ldd r10,Y+10
2232 ldd r11,Y+9
2233 ldd r12,Y+8
2234 ldd r13,Y+7
2235 ldd r14,Y+6
2236 ldd r15,Y+5
2237 ldd r16,Y+4
2238 ldd r17,Y+3
2239 ldd r26,Y+2
2240 #if !defined (__AVR_HAVE_SPH__)
2241 ldd r29,Y+1
2242 add r28,r30
2243 out __SP_L__,r28
2244 mov r28, r26
2245 #elif defined (__AVR_XMEGA__)
2246 ldd r27,Y+1
2247 add r28,r30
2248 adc r29,__zero_reg__
2249 out __SP_L__,r28
2250 out __SP_H__,r29
2251 wmov 28, 26
2252 #else
2253 ldd r27,Y+1
2254 add r28,r30
2255 adc r29,__zero_reg__
2256 in __tmp_reg__,__SREG__
2257 cli
2258 out __SP_H__,r29
2259 out __SREG__,__tmp_reg__
2260 out __SP_L__,r28
2261 mov_l r28, r26
2262 mov_h r29, r27
2263 #endif /* #SP = 8/16 */
2264 ret
2265 ENDF __epilogue_restores__
2266 #endif /* defined (L_epilogue) */
2267 #endif /* !defined (__AVR_TINY__) */
2268
2269 #ifdef L_exit
2270 .section .fini9,"ax",@progbits
2271 DEFUN _exit
2272 .weak exit
2273 exit:
2274 ENDF _exit
2275
2276 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2277
2278 .section .fini0,"ax",@progbits
2279 cli
2280 __stop_program:
2281 rjmp __stop_program
2282 #endif /* defined (L_exit) */
2283
2284 #ifdef L_cleanup
2285 .weak _cleanup
2286 .func _cleanup
2287 _cleanup:
2288 ret
2289 .endfunc
2290 #endif /* defined (L_cleanup) */
2291
2292
2293 .section .text.libgcc, "ax", @progbits
2295
2296 #ifdef L_tablejump2
2297 DEFUN __tablejump2__
2298 lsl r30
2299 rol r31
2300 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2301 ;; Word address of gs() jumptable entry in R24:Z
2302 rol r24
2303 out __RAMPZ__, r24
2304 #elif defined (__AVR_HAVE_ELPM__)
2305 ;; Word address of jumptable entry in Z
2306 clr __tmp_reg__
2307 rol __tmp_reg__
2308 out __RAMPZ__, __tmp_reg__
2309 #endif
2310
2311 ;; Read word address from jumptable and jump
2312
2313 #if defined (__AVR_HAVE_ELPMX__)
2314 elpm __tmp_reg__, Z+
2315 elpm r31, Z
2316 mov r30, __tmp_reg__
2317 #ifdef __AVR_HAVE_RAMPD__
2318 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2319 out __RAMPZ__, __zero_reg__
2320 #endif /* RAMPD */
2321 XIJMP
2322 #elif defined (__AVR_HAVE_ELPM__)
2323 elpm
2324 push r0
2325 adiw r30, 1
2326 elpm
2327 push r0
2328 ret
2329 #elif defined (__AVR_HAVE_LPMX__)
2330 lpm __tmp_reg__, Z+
2331 lpm r31, Z
2332 mov r30, __tmp_reg__
2333 ijmp
2334 #elif defined (__AVR_TINY__)
2335 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2336 ld __tmp_reg__, Z+
2337 ld r31, Z ; Use ld instead of lpm to load Z
2338 mov r30, __tmp_reg__
2339 ijmp
2340 #else
2341 lpm
2342 push r0
2343 adiw r30, 1
2344 lpm
2345 push r0
2346 ret
2347 #endif
2348 ENDF __tablejump2__
2349 #endif /* L_tablejump2 */
2350
2351 #if defined(__AVR_TINY__)
2352 #ifdef L_copy_data
2353 .section .init4,"ax",@progbits
2354 .global __do_copy_data
2355 __do_copy_data:
2356 ldi r18, hi8(__data_end)
2357 ldi r26, lo8(__data_start)
2358 ldi r27, hi8(__data_start)
2359 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2360 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2361 rjmp .L__do_copy_data_start
2362 .L__do_copy_data_loop:
2363 ld r19, z+
2364 st X+, r19
2365 .L__do_copy_data_start:
2366 cpi r26, lo8(__data_end)
2367 cpc r27, r18
2368 brne .L__do_copy_data_loop
2369 #endif
2370 #else
2371 #ifdef L_copy_data
2372 .section .init4,"ax",@progbits
2373 DEFUN __do_copy_data
2374 #if defined(__AVR_HAVE_ELPMX__)
2375 ldi r17, hi8(__data_end)
2376 ldi r26, lo8(__data_start)
2377 ldi r27, hi8(__data_start)
2378 ldi r30, lo8(__data_load_start)
2379 ldi r31, hi8(__data_load_start)
2380 ldi r16, hh8(__data_load_start)
2381 out __RAMPZ__, r16
2382 rjmp .L__do_copy_data_start
2383 .L__do_copy_data_loop:
2384 elpm r0, Z+
2385 st X+, r0
2386 .L__do_copy_data_start:
2387 cpi r26, lo8(__data_end)
2388 cpc r27, r17
2389 brne .L__do_copy_data_loop
2390 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2391 ldi r17, hi8(__data_end)
2392 ldi r26, lo8(__data_start)
2393 ldi r27, hi8(__data_start)
2394 ldi r30, lo8(__data_load_start)
2395 ldi r31, hi8(__data_load_start)
2396 ldi r16, hh8(__data_load_start - 0x10000)
2397 .L__do_copy_data_carry:
2398 inc r16
2399 out __RAMPZ__, r16
2400 rjmp .L__do_copy_data_start
2401 .L__do_copy_data_loop:
2402 elpm
2403 st X+, r0
2404 adiw r30, 1
2405 brcs .L__do_copy_data_carry
2406 .L__do_copy_data_start:
2407 cpi r26, lo8(__data_end)
2408 cpc r27, r17
2409 brne .L__do_copy_data_loop
2410 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2411 ldi r17, hi8(__data_end)
2412 ldi r26, lo8(__data_start)
2413 ldi r27, hi8(__data_start)
2414 ldi r30, lo8(__data_load_start)
2415 ldi r31, hi8(__data_load_start)
2416 rjmp .L__do_copy_data_start
2417 .L__do_copy_data_loop:
2418 #if defined (__AVR_HAVE_LPMX__)
2419 lpm r0, Z+
2420 #else
2421 lpm
2422 adiw r30, 1
2423 #endif
2424 st X+, r0
2425 .L__do_copy_data_start:
2426 cpi r26, lo8(__data_end)
2427 cpc r27, r17
2428 brne .L__do_copy_data_loop
2429 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2430 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2431 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2432 out __RAMPZ__, __zero_reg__
2433 #endif /* ELPM && RAMPD */
2434 ENDF __do_copy_data
2435 #endif /* L_copy_data */
2436 #endif /* !defined (__AVR_TINY__) */
2437
2438 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2439
2440 #ifdef L_clear_bss
2441 .section .init4,"ax",@progbits
2442 DEFUN __do_clear_bss
2443 ldi r18, hi8(__bss_end)
2444 ldi r26, lo8(__bss_start)
2445 ldi r27, hi8(__bss_start)
2446 rjmp .do_clear_bss_start
2447 .do_clear_bss_loop:
2448 st X+, __zero_reg__
2449 .do_clear_bss_start:
2450 cpi r26, lo8(__bss_end)
2451 cpc r27, r18
2452 brne .do_clear_bss_loop
2453 ENDF __do_clear_bss
2454 #endif /* L_clear_bss */
2455
2456 /* __do_global_ctors and __do_global_dtors are only necessary
2457 if there are any constructors/destructors. */
2458
2459 #if defined(__AVR_TINY__)
2460 #define cdtors_tst_reg r18
2461 #else
2462 #define cdtors_tst_reg r17
2463 #endif
2464
2465 #ifdef L_ctors
2466 .section .init6,"ax",@progbits
2467 DEFUN __do_global_ctors
2468 ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2469 ldi r28, pm_lo8(__ctors_end)
2470 ldi r29, pm_hi8(__ctors_end)
2471 #ifdef __AVR_HAVE_EIJMP_EICALL__
2472 ldi r16, pm_hh8(__ctors_end)
2473 #endif /* HAVE_EIJMP */
2474 rjmp .L__do_global_ctors_start
2475 .L__do_global_ctors_loop:
2476 wsubi 28, 1
2477 #ifdef __AVR_HAVE_EIJMP_EICALL__
2478 sbc r16, __zero_reg__
2479 mov r24, r16
2480 #endif /* HAVE_EIJMP */
2481 mov_h r31, r29
2482 mov_l r30, r28
2483 XCALL __tablejump2__
2484 .L__do_global_ctors_start:
2485 cpi r28, pm_lo8(__ctors_start)
2486 cpc r29, cdtors_tst_reg
2487 #ifdef __AVR_HAVE_EIJMP_EICALL__
2488 ldi r24, pm_hh8(__ctors_start)
2489 cpc r16, r24
2490 #endif /* HAVE_EIJMP */
2491 brne .L__do_global_ctors_loop
2492 ENDF __do_global_ctors
2493 #endif /* L_ctors */
2494
2495 #ifdef L_dtors
2496 .section .fini6,"ax",@progbits
2497 DEFUN __do_global_dtors
2498 ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2499 ldi r28, pm_lo8(__dtors_start)
2500 ldi r29, pm_hi8(__dtors_start)
2501 #ifdef __AVR_HAVE_EIJMP_EICALL__
2502 ldi r16, pm_hh8(__dtors_start)
2503 #endif /* HAVE_EIJMP */
2504 rjmp .L__do_global_dtors_start
2505 .L__do_global_dtors_loop:
2506 #ifdef __AVR_HAVE_EIJMP_EICALL__
2507 mov r24, r16
2508 #endif /* HAVE_EIJMP */
2509 mov_h r31, r29
2510 mov_l r30, r28
2511 XCALL __tablejump2__
2512 waddi 28, 1
2513 #ifdef __AVR_HAVE_EIJMP_EICALL__
2514 adc r16, __zero_reg__
2515 #endif /* HAVE_EIJMP */
2516 .L__do_global_dtors_start:
2517 cpi r28, pm_lo8(__dtors_end)
2518 cpc r29, cdtors_tst_reg
2519 #ifdef __AVR_HAVE_EIJMP_EICALL__
2520 ldi r24, pm_hh8(__dtors_end)
2521 cpc r16, r24
2522 #endif /* HAVE_EIJMP */
2523 brne .L__do_global_dtors_loop
2524 ENDF __do_global_dtors
2525 #endif /* L_dtors */
2526
2527 #undef cdtors_tst_reg
2528
2529 .section .text.libgcc, "ax", @progbits
2530
2531 #if !defined (__AVR_TINY__)
2532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2533 ;; Loading n bytes from Flash; n = 3,4
2534 ;; R22... = Flash[Z]
2535 ;; Clobbers: __tmp_reg__
2536
2537 #if (defined (L_load_3) \
2538 || defined (L_load_4)) \
2539 && !defined (__AVR_HAVE_LPMX__)
2540
2541 ;; Destination
2542 #define D0 22
2543 #define D1 D0+1
2544 #define D2 D0+2
2545 #define D3 D0+3
2546
2547 .macro .load dest, n
2548 lpm
2549 mov \dest, r0
2550 .if \dest != D0+\n-1
2551 adiw r30, 1
2552 .else
2553 sbiw r30, \n-1
2554 .endif
2555 .endm
2556
2557 #if defined (L_load_3)
2558 DEFUN __load_3
2559 push D3
2560 XCALL __load_4
2561 pop D3
2562 ret
2563 ENDF __load_3
2564 #endif /* L_load_3 */
2565
2566 #if defined (L_load_4)
2567 DEFUN __load_4
2568 .load D0, 4
2569 .load D1, 4
2570 .load D2, 4
2571 .load D3, 4
2572 ret
2573 ENDF __load_4
2574 #endif /* L_load_4 */
2575
2576 #endif /* L_load_3 || L_load_3 */
2577 #endif /* !defined (__AVR_TINY__) */
2578
2579 #if !defined (__AVR_TINY__)
2580 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2581 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2582 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2583 ;; Clobbers: __tmp_reg__, R21, R30, R31
2584
2585 #if (defined (L_xload_1) \
2586 || defined (L_xload_2) \
2587 || defined (L_xload_3) \
2588 || defined (L_xload_4))
2589
2590 ;; Destination
2591 #define D0 22
2592 #define D1 D0+1
2593 #define D2 D0+2
2594 #define D3 D0+3
2595
2596 ;; Register containing bits 16+ of the address
2597
2598 #define HHI8 21
2599
2600 .macro .xload dest, n
2601 #if defined (__AVR_HAVE_ELPMX__)
2602 elpm \dest, Z+
2603 #elif defined (__AVR_HAVE_ELPM__)
2604 elpm
2605 mov \dest, r0
2606 .if \dest != D0+\n-1
2607 adiw r30, 1
2608 adc HHI8, __zero_reg__
2609 out __RAMPZ__, HHI8
2610 .endif
2611 #elif defined (__AVR_HAVE_LPMX__)
2612 lpm \dest, Z+
2613 #else
2614 lpm
2615 mov \dest, r0
2616 .if \dest != D0+\n-1
2617 adiw r30, 1
2618 .endif
2619 #endif
2620 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2621 .if \dest == D0+\n-1
2622 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2623 out __RAMPZ__, __zero_reg__
2624 .endif
2625 #endif
2626 .endm ; .xload
2627
2628 #if defined (L_xload_1)
2629 DEFUN __xload_1
2630 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2631 sbrc HHI8, 7
2632 ld D0, Z
2633 sbrs HHI8, 7
2634 lpm D0, Z
2635 ret
2636 #else
2637 sbrc HHI8, 7
2638 rjmp 1f
2639 #if defined (__AVR_HAVE_ELPM__)
2640 out __RAMPZ__, HHI8
2641 #endif /* __AVR_HAVE_ELPM__ */
2642 .xload D0, 1
2643 ret
2644 1: ld D0, Z
2645 ret
2646 #endif /* LPMx && ! ELPM */
2647 ENDF __xload_1
2648 #endif /* L_xload_1 */
2649
2650 #if defined (L_xload_2)
2651 DEFUN __xload_2
2652 sbrc HHI8, 7
2653 rjmp 1f
2654 #if defined (__AVR_HAVE_ELPM__)
2655 out __RAMPZ__, HHI8
2656 #endif /* __AVR_HAVE_ELPM__ */
2657 .xload D0, 2
2658 .xload D1, 2
2659 ret
2660 1: ld D0, Z+
2661 ld D1, Z+
2662 ret
2663 ENDF __xload_2
2664 #endif /* L_xload_2 */
2665
2666 #if defined (L_xload_3)
2667 DEFUN __xload_3
2668 sbrc HHI8, 7
2669 rjmp 1f
2670 #if defined (__AVR_HAVE_ELPM__)
2671 out __RAMPZ__, HHI8
2672 #endif /* __AVR_HAVE_ELPM__ */
2673 .xload D0, 3
2674 .xload D1, 3
2675 .xload D2, 3
2676 ret
2677 1: ld D0, Z+
2678 ld D1, Z+
2679 ld D2, Z+
2680 ret
2681 ENDF __xload_3
2682 #endif /* L_xload_3 */
2683
2684 #if defined (L_xload_4)
2685 DEFUN __xload_4
2686 sbrc HHI8, 7
2687 rjmp 1f
2688 #if defined (__AVR_HAVE_ELPM__)
2689 out __RAMPZ__, HHI8
2690 #endif /* __AVR_HAVE_ELPM__ */
2691 .xload D0, 4
2692 .xload D1, 4
2693 .xload D2, 4
2694 .xload D3, 4
2695 ret
2696 1: ld D0, Z+
2697 ld D1, Z+
2698 ld D2, Z+
2699 ld D3, Z+
2700 ret
2701 ENDF __xload_4
2702 #endif /* L_xload_4 */
2703
2704 #endif /* L_xload_{1|2|3|4} */
2705 #endif /* if !defined (__AVR_TINY__) */
2706
2707 #if !defined (__AVR_TINY__)
2708 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2709 ;; memcopy from Address Space __pgmx to RAM
2710 ;; R23:Z = Source Address
2711 ;; X = Destination Address
2712 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2713
2714 #if defined (L_movmemx)
2715
2716 #define HHI8 23
2717 #define LOOP 24
2718
2719 DEFUN __movmemx_qi
2720 ;; #Bytes to copy fity in 8 Bits (1..255)
2721 ;; Zero-extend Loop Counter
2722 clr LOOP+1
2723 ;; FALLTHRU
2724 ENDF __movmemx_qi
2725
2726 DEFUN __movmemx_hi
2727
2728 ;; Read from where?
2729 sbrc HHI8, 7
2730 rjmp 1f
2731
2732 ;; Read from Flash
2733
2734 #if defined (__AVR_HAVE_ELPM__)
2735 out __RAMPZ__, HHI8
2736 #endif
2737
2738 0: ;; Load 1 Byte from Flash...
2739
2740 #if defined (__AVR_HAVE_ELPMX__)
2741 elpm r0, Z+
2742 #elif defined (__AVR_HAVE_ELPM__)
2743 elpm
2744 adiw r30, 1
2745 adc HHI8, __zero_reg__
2746 out __RAMPZ__, HHI8
2747 #elif defined (__AVR_HAVE_LPMX__)
2748 lpm r0, Z+
2749 #else
2750 lpm
2751 adiw r30, 1
2752 #endif
2753
2754 ;; ...and store that Byte to RAM Destination
2755 st X+, r0
2756 sbiw LOOP, 1
2757 brne 0b
2758 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2759 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2760 out __RAMPZ__, __zero_reg__
2761 #endif /* ELPM && RAMPD */
2762 ret
2763
2764 ;; Read from RAM
2765
2766 1: ;; Read 1 Byte from RAM...
2767 ld r0, Z+
2768 ;; and store that Byte to RAM Destination
2769 st X+, r0
2770 sbiw LOOP, 1
2771 brne 1b
2772 ret
2773 ENDF __movmemx_hi
2774
2775 #undef HHI8
2776 #undef LOOP
2777
2778 #endif /* L_movmemx */
2779 #endif /* !defined (__AVR_TINY__) */
2780
2781
2782 .section .text.libgcc.builtins, "ax", @progbits
2784
2785 /**********************************
2786 * Find first set Bit (ffs)
2787 **********************************/
2788
2789 #if defined (L_ffssi2)
2790 ;; find first set bit
2791 ;; r25:r24 = ffs32 (r25:r22)
2792 ;; clobbers: r22, r26
2793 DEFUN __ffssi2
2794 clr r26
2795 tst r22
2796 brne 1f
2797 subi r26, -8
2798 or r22, r23
2799 brne 1f
2800 subi r26, -8
2801 or r22, r24
2802 brne 1f
2803 subi r26, -8
2804 or r22, r25
2805 brne 1f
2806 ret
2807 1: mov r24, r22
2808 XJMP __loop_ffsqi2
2809 ENDF __ffssi2
2810 #endif /* defined (L_ffssi2) */
2811
2812 #if defined (L_ffshi2)
2813 ;; find first set bit
2814 ;; r25:r24 = ffs16 (r25:r24)
2815 ;; clobbers: r26
2816 DEFUN __ffshi2
2817 clr r26
2818 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2819 ;; Some cores have problem skipping 2-word instruction
2820 tst r24
2821 breq 2f
2822 #else
2823 cpse r24, __zero_reg__
2824 #endif /* __AVR_HAVE_JMP_CALL__ */
2825 1: XJMP __loop_ffsqi2
2826 2: ldi r26, 8
2827 or r24, r25
2828 brne 1b
2829 ret
2830 ENDF __ffshi2
2831 #endif /* defined (L_ffshi2) */
2832
2833 #if defined (L_loop_ffsqi2)
2834 ;; Helper for ffshi2, ffssi2
2835 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2836 ;; r24 must be != 0
2837 ;; clobbers: r26
2838 DEFUN __loop_ffsqi2
2839 inc r26
2840 lsr r24
2841 brcc __loop_ffsqi2
2842 mov r24, r26
2843 clr r25
2844 ret
2845 ENDF __loop_ffsqi2
2846 #endif /* defined (L_loop_ffsqi2) */
2847
2848
2849 /**********************************
2851 * Count trailing Zeros (ctz)
2852 **********************************/
2853
2854 #if defined (L_ctzsi2)
2855 ;; count trailing zeros
2856 ;; r25:r24 = ctz32 (r25:r22)
2857 ;; clobbers: r26, r22
2858 ;; ctz(0) = 255
2859 ;; Note that ctz(0) in undefined for GCC
2860 DEFUN __ctzsi2
2861 XCALL __ffssi2
2862 dec r24
2863 ret
2864 ENDF __ctzsi2
2865 #endif /* defined (L_ctzsi2) */
2866
2867 #if defined (L_ctzhi2)
2868 ;; count trailing zeros
2869 ;; r25:r24 = ctz16 (r25:r24)
2870 ;; clobbers: r26
2871 ;; ctz(0) = 255
2872 ;; Note that ctz(0) in undefined for GCC
2873 DEFUN __ctzhi2
2874 XCALL __ffshi2
2875 dec r24
2876 ret
2877 ENDF __ctzhi2
2878 #endif /* defined (L_ctzhi2) */
2879
2880
2881 /**********************************
2883 * Count leading Zeros (clz)
2884 **********************************/
2885
2886 #if defined (L_clzdi2)
2887 ;; count leading zeros
2888 ;; r25:r24 = clz64 (r25:r18)
2889 ;; clobbers: r22, r23, r26
2890 DEFUN __clzdi2
2891 XCALL __clzsi2
2892 sbrs r24, 5
2893 ret
2894 mov_l r22, r18
2895 mov_h r23, r19
2896 mov_l r24, r20
2897 mov_h r25, r21
2898 XCALL __clzsi2
2899 subi r24, -32
2900 ret
2901 ENDF __clzdi2
2902 #endif /* defined (L_clzdi2) */
2903
2904 #if defined (L_clzsi2)
2905 ;; count leading zeros
2906 ;; r25:r24 = clz32 (r25:r22)
2907 ;; clobbers: r26
2908 DEFUN __clzsi2
2909 XCALL __clzhi2
2910 sbrs r24, 4
2911 ret
2912 mov_l r24, r22
2913 mov_h r25, r23
2914 XCALL __clzhi2
2915 subi r24, -16
2916 ret
2917 ENDF __clzsi2
2918 #endif /* defined (L_clzsi2) */
2919
2920 #if defined (L_clzhi2)
2921 ;; count leading zeros
2922 ;; r25:r24 = clz16 (r25:r24)
2923 ;; clobbers: r26
2924 DEFUN __clzhi2
2925 clr r26
2926 tst r25
2927 brne 1f
2928 subi r26, -8
2929 or r25, r24
2930 brne 1f
2931 ldi r24, 16
2932 ret
2933 1: cpi r25, 16
2934 brsh 3f
2935 subi r26, -3
2936 swap r25
2937 2: inc r26
2938 3: lsl r25
2939 brcc 2b
2940 mov r24, r26
2941 clr r25
2942 ret
2943 ENDF __clzhi2
2944 #endif /* defined (L_clzhi2) */
2945
2946
2947 /**********************************
2949 * Parity
2950 **********************************/
2951
2952 #if defined (L_paritydi2)
2953 ;; r25:r24 = parity64 (r25:r18)
2954 ;; clobbers: __tmp_reg__
2955 DEFUN __paritydi2
2956 eor r24, r18
2957 eor r24, r19
2958 eor r24, r20
2959 eor r24, r21
2960 XJMP __paritysi2
2961 ENDF __paritydi2
2962 #endif /* defined (L_paritydi2) */
2963
2964 #if defined (L_paritysi2)
2965 ;; r25:r24 = parity32 (r25:r22)
2966 ;; clobbers: __tmp_reg__
2967 DEFUN __paritysi2
2968 eor r24, r22
2969 eor r24, r23
2970 XJMP __parityhi2
2971 ENDF __paritysi2
2972 #endif /* defined (L_paritysi2) */
2973
2974 #if defined (L_parityhi2)
2975 ;; r25:r24 = parity16 (r25:r24)
2976 ;; clobbers: __tmp_reg__
2977 DEFUN __parityhi2
2978 eor r24, r25
2979 ;; FALLTHRU
2980 ENDF __parityhi2
2981
2982 ;; r25:r24 = parity8 (r24)
2983 ;; clobbers: __tmp_reg__
2984 DEFUN __parityqi2
2985 ;; parity is in r24[0..7]
2986 mov __tmp_reg__, r24
2987 swap __tmp_reg__
2988 eor r24, __tmp_reg__
2989 ;; parity is in r24[0..3]
2990 subi r24, -4
2991 andi r24, -5
2992 subi r24, -6
2993 ;; parity is in r24[0,3]
2994 sbrc r24, 3
2995 inc r24
2996 ;; parity is in r24[0]
2997 andi r24, 1
2998 clr r25
2999 ret
3000 ENDF __parityqi2
3001 #endif /* defined (L_parityhi2) */
3002
3003
3004 /**********************************
3006 * Population Count
3007 **********************************/
3008
3009 #if defined (L_popcounthi2)
3010 ;; population count
3011 ;; r25:r24 = popcount16 (r25:r24)
3012 ;; clobbers: __tmp_reg__
3013 DEFUN __popcounthi2
3014 XCALL __popcountqi2
3015 push r24
3016 mov r24, r25
3017 XCALL __popcountqi2
3018 clr r25
3019 ;; FALLTHRU
3020 ENDF __popcounthi2
3021
3022 DEFUN __popcounthi2_tail
3023 pop __tmp_reg__
3024 add r24, __tmp_reg__
3025 ret
3026 ENDF __popcounthi2_tail
3027 #endif /* defined (L_popcounthi2) */
3028
3029 #if defined (L_popcountsi2)
3030 ;; population count
3031 ;; r25:r24 = popcount32 (r25:r22)
3032 ;; clobbers: __tmp_reg__
3033 DEFUN __popcountsi2
3034 XCALL __popcounthi2
3035 push r24
3036 mov_l r24, r22
3037 mov_h r25, r23
3038 XCALL __popcounthi2
3039 XJMP __popcounthi2_tail
3040 ENDF __popcountsi2
3041 #endif /* defined (L_popcountsi2) */
3042
3043 #if defined (L_popcountdi2)
3044 ;; population count
3045 ;; r25:r24 = popcount64 (r25:r18)
3046 ;; clobbers: r22, r23, __tmp_reg__
3047 DEFUN __popcountdi2
3048 XCALL __popcountsi2
3049 push r24
3050 mov_l r22, r18
3051 mov_h r23, r19
3052 mov_l r24, r20
3053 mov_h r25, r21
3054 XCALL __popcountsi2
3055 XJMP __popcounthi2_tail
3056 ENDF __popcountdi2
3057 #endif /* defined (L_popcountdi2) */
3058
3059 #if defined (L_popcountqi2)
3060 ;; population count
3061 ;; r24 = popcount8 (r24)
3062 ;; clobbers: __tmp_reg__
3063 DEFUN __popcountqi2
3064 mov __tmp_reg__, r24
3065 andi r24, 1
3066 lsr __tmp_reg__
3067 lsr __tmp_reg__
3068 adc r24, __zero_reg__
3069 lsr __tmp_reg__
3070 adc r24, __zero_reg__
3071 lsr __tmp_reg__
3072 adc r24, __zero_reg__
3073 lsr __tmp_reg__
3074 adc r24, __zero_reg__
3075 lsr __tmp_reg__
3076 adc r24, __zero_reg__
3077 lsr __tmp_reg__
3078 adc r24, __tmp_reg__
3079 ret
3080 ENDF __popcountqi2
3081 #endif /* defined (L_popcountqi2) */
3082
3083
3084 /**********************************
3086 * Swap bytes
3087 **********************************/
3088
3089 ;; swap two registers with different register number
3090 .macro bswap a, b
3091 eor \a, \b
3092 eor \b, \a
3093 eor \a, \b
3094 .endm
3095
3096 #if defined (L_bswapsi2)
3097 ;; swap bytes
3098 ;; r25:r22 = bswap32 (r25:r22)
3099 DEFUN __bswapsi2
3100 bswap r22, r25
3101 bswap r23, r24
3102 ret
3103 ENDF __bswapsi2
3104 #endif /* defined (L_bswapsi2) */
3105
3106 #if defined (L_bswapdi2)
3107 ;; swap bytes
3108 ;; r25:r18 = bswap64 (r25:r18)
3109 DEFUN __bswapdi2
3110 bswap r18, r25
3111 bswap r19, r24
3112 bswap r20, r23
3113 bswap r21, r22
3114 ret
3115 ENDF __bswapdi2
3116 #endif /* defined (L_bswapdi2) */
3117
3118
3119 /**********************************
3121 * 64-bit shifts
3122 **********************************/
3123
3124 #if defined (L_ashrdi3)
3125
3126 #define SS __zero_reg__
3127
3128 ;; Arithmetic shift right
3129 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3130 DEFUN __ashrdi3
3131 sbrc r25, 7
3132 com SS
3133 ;; FALLTHRU
3134 ENDF __ashrdi3
3135
3136 ;; Logic shift right
3137 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3138 DEFUN __lshrdi3
3139 ;; Signs are in SS (zero_reg)
3140 mov __tmp_reg__, r16
3141 0: cpi r16, 8
3142 brlo 2f
3143 subi r16, 8
3144 mov r18, r19
3145 mov r19, r20
3146 mov r20, r21
3147 mov r21, r22
3148 mov r22, r23
3149 mov r23, r24
3150 mov r24, r25
3151 mov r25, SS
3152 rjmp 0b
3153 1: asr SS
3154 ror r25
3155 ror r24
3156 ror r23
3157 ror r22
3158 ror r21
3159 ror r20
3160 ror r19
3161 ror r18
3162 2: dec r16
3163 brpl 1b
3164 clr __zero_reg__
3165 mov r16, __tmp_reg__
3166 ret
3167 ENDF __lshrdi3
3168
3169 #undef SS
3170
3171 #endif /* defined (L_ashrdi3) */
3172
3173 #if defined (L_ashldi3)
3174 ;; Shift left
3175 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3176 ;; This function does not clobber T.
3177 DEFUN __ashldi3
3178 mov __tmp_reg__, r16
3179 0: cpi r16, 8
3180 brlo 2f
3181 mov r25, r24
3182 mov r24, r23
3183 mov r23, r22
3184 mov r22, r21
3185 mov r21, r20
3186 mov r20, r19
3187 mov r19, r18
3188 clr r18
3189 subi r16, 8
3190 rjmp 0b
3191 1: lsl r18
3192 rol r19
3193 rol r20
3194 rol r21
3195 rol r22
3196 rol r23
3197 rol r24
3198 rol r25
3199 2: dec r16
3200 brpl 1b
3201 mov r16, __tmp_reg__
3202 ret
3203 ENDF __ashldi3
3204 #endif /* defined (L_ashldi3) */
3205
3206 #if defined (L_rotldi3)
3207 ;; Rotate left
3208 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3209 DEFUN __rotldi3
3210 push r16
3211 0: cpi r16, 8
3212 brlo 2f
3213 subi r16, 8
3214 mov __tmp_reg__, r25
3215 mov r25, r24
3216 mov r24, r23
3217 mov r23, r22
3218 mov r22, r21
3219 mov r21, r20
3220 mov r20, r19
3221 mov r19, r18
3222 mov r18, __tmp_reg__
3223 rjmp 0b
3224 1: lsl r18
3225 rol r19
3226 rol r20
3227 rol r21
3228 rol r22
3229 rol r23
3230 rol r24
3231 rol r25
3232 adc r18, __zero_reg__
3233 2: dec r16
3234 brpl 1b
3235 pop r16
3236 ret
3237 ENDF __rotldi3
3238 #endif /* defined (L_rotldi3) */
3239
3240
3241 .section .text.libgcc.fmul, "ax", @progbits
3243
3244 /***********************************************************/
3245 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3246 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3247 /***********************************************************/
3248
3249 #define A1 24
3250 #define B1 25
3251 #define C0 22
3252 #define C1 23
3253 #define A0 __tmp_reg__
3254
3255 #ifdef L_fmuls
3256 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3257 ;;; Clobbers: r24, r25, __tmp_reg__
3258 DEFUN __fmuls
3259 ;; A0.7 = negate result?
3260 mov A0, A1
3261 eor A0, B1
3262 ;; B1 = |B1|
3263 sbrc B1, 7
3264 neg B1
3265 XJMP __fmulsu_exit
3266 ENDF __fmuls
3267 #endif /* L_fmuls */
3268
3269 #ifdef L_fmulsu
3270 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3271 ;;; Clobbers: r24, r25, __tmp_reg__
3272 DEFUN __fmulsu
3273 ;; A0.7 = negate result?
3274 mov A0, A1
3275 ;; FALLTHRU
3276 ENDF __fmulsu
3277
3278 ;; Helper for __fmuls and __fmulsu
3279 DEFUN __fmulsu_exit
3280 ;; A1 = |A1|
3281 sbrc A1, 7
3282 neg A1
3283 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3284 ;; Some cores have problem skipping 2-word instruction
3285 tst A0
3286 brmi 1f
3287 #else
3288 sbrs A0, 7
3289 #endif /* __AVR_HAVE_JMP_CALL__ */
3290 XJMP __fmul
3291 1: XCALL __fmul
3292 ;; C = -C iff A0.7 = 1
3293 NEG2 C0
3294 ret
3295 ENDF __fmulsu_exit
3296 #endif /* L_fmulsu */
3297
3298
3299 #ifdef L_fmul
3300 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3301 ;;; Clobbers: r24, r25, __tmp_reg__
3302 DEFUN __fmul
3303 ; clear result
3304 clr C0
3305 clr C1
3306 clr A0
3307 1: tst B1
3308 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3309 2: brpl 3f
3310 ;; C += A
3311 add C0, A0
3312 adc C1, A1
3313 3: ;; A >>= 1
3314 lsr A1
3315 ror A0
3316 ;; B <<= 1
3317 lsl B1
3318 brne 2b
3319 ret
3320 ENDF __fmul
3321 #endif /* L_fmul */
3322
3323 #undef A0
3324 #undef A1
3325 #undef B1
3326 #undef C0
3327 #undef C1
3328
3329 #include "lib1funcs-fixed.S"
3330