lib1funcs.S revision 1.1.1.1 1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2013 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov (at) gmail.com>
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #define __zero_reg__ r1
25 #define __tmp_reg__ r0
26 #define __SREG__ 0x3f
27 #if defined (__AVR_HAVE_SPH__)
28 #define __SP_H__ 0x3e
29 #endif
30 #define __SP_L__ 0x3d
31 #define __RAMPZ__ 0x3B
32 #define __EIND__ 0x3C
33
34 /* Most of the functions here are called directly from avr.md
35 patterns, instead of using the standard libcall mechanisms.
36 This can make better code because GCC knows exactly which
37 of the call-used registers (not all of them) are clobbered. */
38
39 /* FIXME: At present, there is no SORT directive in the linker
40 script so that we must not assume that different modules
41 in the same input section like .libgcc.text.mul will be
42 located close together. Therefore, we cannot use
43 RCALL/RJMP to call a function like __udivmodhi4 from
44 __divmodhi4 and have to use lengthy XCALL/XJMP even
45 though they are in the same input section and all same
46 input sections together are small enough to reach every
47 location with a RCALL/RJMP instruction. */
48
49 .macro mov_l r_dest, r_src
50 #if defined (__AVR_HAVE_MOVW__)
51 movw \r_dest, \r_src
52 #else
53 mov \r_dest, \r_src
54 #endif
55 .endm
56
57 .macro mov_h r_dest, r_src
58 #if defined (__AVR_HAVE_MOVW__)
59 ; empty
60 #else
61 mov \r_dest, \r_src
62 #endif
63 .endm
64
65 .macro wmov r_dest, r_src
66 #if defined (__AVR_HAVE_MOVW__)
67 movw \r_dest, \r_src
68 #else
69 mov \r_dest, \r_src
70 mov \r_dest+1, \r_src+1
71 #endif
72 .endm
73
74 #if defined (__AVR_HAVE_JMP_CALL__)
75 #define XCALL call
76 #define XJMP jmp
77 #else
78 #define XCALL rcall
79 #define XJMP rjmp
80 #endif
81
82 ;; Prologue stuff
83
84 .macro do_prologue_saves n_pushed n_frame=0
85 ldi r26, lo8(\n_frame)
86 ldi r27, hi8(\n_frame)
87 ldi r30, lo8(gs(.L_prologue_saves.\@))
88 ldi r31, hi8(gs(.L_prologue_saves.\@))
89 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
90 .L_prologue_saves.\@:
91 .endm
92
93 ;; Epilogue stuff
94
95 .macro do_epilogue_restores n_pushed n_frame=0
96 in r28, __SP_L__
97 #ifdef __AVR_HAVE_SPH__
98 in r29, __SP_H__
99 .if \n_frame > 63
100 subi r28, lo8(-\n_frame)
101 sbci r29, hi8(-\n_frame)
102 .elseif \n_frame > 0
103 adiw r28, \n_frame
104 .endif
105 #else
106 clr r29
107 .if \n_frame > 0
108 subi r28, lo8(-\n_frame)
109 .endif
110 #endif /* HAVE SPH */
111 ldi r30, \n_pushed
112 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
113 .endm
114
115 ;; Support function entry and exit for convenience
116
117 .macro DEFUN name
118 .global \name
119 .func \name
120 \name:
121 .endm
122
123 .macro ENDF name
124 .size \name, .-\name
125 .endfunc
126 .endm
127
128 .macro FALIAS name
129 .global \name
130 .func \name
131 \name:
132 .size \name, .-\name
133 .endfunc
134 .endm
135
136 ;; Skip next instruction, typically a jump target
137 #define skip cpse 0,0
138
139 ;; Negate a 2-byte value held in consecutive registers
140 .macro NEG2 reg
141 com \reg+1
142 neg \reg
143 sbci \reg+1, -1
144 .endm
145
146 ;; Negate a 4-byte value held in consecutive registers
147 ;; Sets the V flag for signed overflow tests if REG >= 16
148 .macro NEG4 reg
149 com \reg+3
150 com \reg+2
151 com \reg+1
152 .if \reg >= 16
153 neg \reg
154 sbci \reg+1, -1
155 sbci \reg+2, -1
156 sbci \reg+3, -1
157 .else
158 com \reg
159 adc \reg, __zero_reg__
160 adc \reg+1, __zero_reg__
161 adc \reg+2, __zero_reg__
162 adc \reg+3, __zero_reg__
163 .endif
164 .endm
165
166 #define exp_lo(N) hlo8 ((N) << 23)
167 #define exp_hi(N) hhi8 ((N) << 23)
168
169
170 .section .text.libgcc.mul, "ax", @progbits
172
173 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
174 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
175 #if !defined (__AVR_HAVE_MUL__)
176 /*******************************************************
177 Multiplication 8 x 8 without MUL
178 *******************************************************/
179 #if defined (L_mulqi3)
180
181 #define r_arg2 r22 /* multiplicand */
182 #define r_arg1 r24 /* multiplier */
183 #define r_res __tmp_reg__ /* result */
184
185 DEFUN __mulqi3
186 clr r_res ; clear result
187 __mulqi3_loop:
188 sbrc r_arg1,0
189 add r_res,r_arg2
190 add r_arg2,r_arg2 ; shift multiplicand
191 breq __mulqi3_exit ; while multiplicand != 0
192 lsr r_arg1 ;
193 brne __mulqi3_loop ; exit if multiplier = 0
194 __mulqi3_exit:
195 mov r_arg1,r_res ; result to return register
196 ret
197 ENDF __mulqi3
198
199 #undef r_arg2
200 #undef r_arg1
201 #undef r_res
202
203 #endif /* defined (L_mulqi3) */
204
205
206 /*******************************************************
207 Widening Multiplication 16 = 8 x 8 without MUL
208 Multiplication 16 x 16 without MUL
209 *******************************************************/
210
211 #define A0 r22
212 #define A1 r23
213 #define B0 r24
214 #define BB0 r20
215 #define B1 r25
216 ;; Output overlaps input, thus expand result in CC0/1
217 #define C0 r24
218 #define C1 r25
219 #define CC0 __tmp_reg__
220 #define CC1 R21
221
222 #if defined (L_umulqihi3)
223 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
224 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
225 ;;; Clobbers: __tmp_reg__, R21..R23
226 DEFUN __umulqihi3
227 clr A1
228 clr B1
229 XJMP __mulhi3
230 ENDF __umulqihi3
231 #endif /* L_umulqihi3 */
232
233 #if defined (L_mulqihi3)
234 ;;; R25:R24 = (signed int) R22 * (signed int) R24
235 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
236 ;;; Clobbers: __tmp_reg__, R20..R23
237 DEFUN __mulqihi3
238 ;; Sign-extend B0
239 clr B1
240 sbrc B0, 7
241 com B1
242 ;; The multiplication runs twice as fast if A1 is zero, thus:
243 ;; Zero-extend A0
244 clr A1
245 #ifdef __AVR_HAVE_JMP_CALL__
246 ;; Store B0 * sign of A
247 clr BB0
248 sbrc A0, 7
249 mov BB0, B0
250 call __mulhi3
251 #else /* have no CALL */
252 ;; Skip sign-extension of A if A >= 0
253 ;; Same size as with the first alternative but avoids errata skip
254 ;; and is faster if A >= 0
255 sbrs A0, 7
256 rjmp __mulhi3
257 ;; If A < 0 store B
258 mov BB0, B0
259 rcall __mulhi3
260 #endif /* HAVE_JMP_CALL */
261 ;; 1-extend A after the multiplication
262 sub C1, BB0
263 ret
264 ENDF __mulqihi3
265 #endif /* L_mulqihi3 */
266
267 #if defined (L_mulhi3)
268 ;;; R25:R24 = R23:R22 * R25:R24
269 ;;; (C1:C0) = (A1:A0) * (B1:B0)
270 ;;; Clobbers: __tmp_reg__, R21..R23
271 DEFUN __mulhi3
272
273 ;; Clear result
274 clr CC0
275 clr CC1
276 rjmp 3f
277 1:
278 ;; Bit n of A is 1 --> C += B << n
279 add CC0, B0
280 adc CC1, B1
281 2:
282 lsl B0
283 rol B1
284 3:
285 ;; If B == 0 we are ready
286 sbiw B0, 0
287 breq 9f
288
289 ;; Carry = n-th bit of A
290 lsr A1
291 ror A0
292 ;; If bit n of A is set, then go add B * 2^n to C
293 brcs 1b
294
295 ;; Carry = 0 --> The ROR above acts like CP A0, 0
296 ;; Thus, it is sufficient to CPC the high part to test A against 0
297 cpc A1, __zero_reg__
298 ;; Only proceed if A != 0
299 brne 2b
300 9:
301 ;; Move Result into place
302 mov C0, CC0
303 mov C1, CC1
304 ret
305 ENDF __mulhi3
306 #endif /* L_mulhi3 */
307
308 #undef A0
309 #undef A1
310 #undef B0
311 #undef BB0
312 #undef B1
313 #undef C0
314 #undef C1
315 #undef CC0
316 #undef CC1
317
318
319 #define A0 22
321 #define A1 A0+1
322 #define A2 A0+2
323 #define A3 A0+3
324
325 #define B0 18
326 #define B1 B0+1
327 #define B2 B0+2
328 #define B3 B0+3
329
330 #define CC0 26
331 #define CC1 CC0+1
332 #define CC2 30
333 #define CC3 CC2+1
334
335 #define C0 22
336 #define C1 C0+1
337 #define C2 C0+2
338 #define C3 C0+3
339
340 /*******************************************************
341 Widening Multiplication 32 = 16 x 16 without MUL
342 *******************************************************/
343
344 #if defined (L_umulhisi3)
345 DEFUN __umulhisi3
346 wmov B0, 24
347 ;; Zero-extend B
348 clr B2
349 clr B3
350 ;; Zero-extend A
351 wmov A2, B2
352 XJMP __mulsi3
353 ENDF __umulhisi3
354 #endif /* L_umulhisi3 */
355
356 #if defined (L_mulhisi3)
357 DEFUN __mulhisi3
358 wmov B0, 24
359 ;; Sign-extend B
360 lsl r25
361 sbc B2, B2
362 mov B3, B2
363 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
364 ;; Sign-extend A
365 clr A2
366 sbrc A1, 7
367 com A2
368 mov A3, A2
369 XJMP __mulsi3
370 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
371 ;; Zero-extend A and __mulsi3 will run at least twice as fast
372 ;; compared to a sign-extended A.
373 clr A2
374 clr A3
375 sbrs A1, 7
376 XJMP __mulsi3
377 ;; If A < 0 then perform the B * 0xffff.... before the
378 ;; very multiplication by initializing the high part of the
379 ;; result CC with -B.
380 wmov CC2, A2
381 sub CC2, B0
382 sbc CC3, B1
383 XJMP __mulsi3_helper
384 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
385 ENDF __mulhisi3
386 #endif /* L_mulhisi3 */
387
388
389 /*******************************************************
390 Multiplication 32 x 32 without MUL
391 *******************************************************/
392
393 #if defined (L_mulsi3)
394 DEFUN __mulsi3
395 ;; Clear result
396 clr CC2
397 clr CC3
398 ;; FALLTHRU
399 ENDF __mulsi3
400
401 DEFUN __mulsi3_helper
402 clr CC0
403 clr CC1
404 rjmp 3f
405
406 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
407 ;; CC += B
408 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
409
410 2: ;; B <<= 1
411 lsl B0 $ rol B1 $ rol B2 $ rol B3
412
413 3: ;; A >>= 1: Carry = n-th bit of A
414 lsr A3 $ ror A2 $ ror A1 $ ror A0
415
416 brcs 1b
417 ;; Only continue if A != 0
418 sbci A1, 0
419 brne 2b
420 sbiw A2, 0
421 brne 2b
422
423 ;; All bits of A are consumed: Copy result to return register C
424 wmov C0, CC0
425 wmov C2, CC2
426 ret
427 ENDF __mulsi3_helper
428 #endif /* L_mulsi3 */
429
430 #undef A0
431 #undef A1
432 #undef A2
433 #undef A3
434 #undef B0
435 #undef B1
436 #undef B2
437 #undef B3
438 #undef C0
439 #undef C1
440 #undef C2
441 #undef C3
442 #undef CC0
443 #undef CC1
444 #undef CC2
445 #undef CC3
446
447 #endif /* !defined (__AVR_HAVE_MUL__) */
448 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
449
450 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
452 #if defined (__AVR_HAVE_MUL__)
453 #define A0 26
454 #define B0 18
455 #define C0 22
456
457 #define A1 A0+1
458
459 #define B1 B0+1
460 #define B2 B0+2
461 #define B3 B0+3
462
463 #define C1 C0+1
464 #define C2 C0+2
465 #define C3 C0+3
466
467 /*******************************************************
468 Widening Multiplication 32 = 16 x 16 with MUL
469 *******************************************************/
470
471 #if defined (L_mulhisi3)
472 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
473 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
474 ;;; Clobbers: __tmp_reg__
475 DEFUN __mulhisi3
476 XCALL __umulhisi3
477 ;; Sign-extend B
478 tst B1
479 brpl 1f
480 sub C2, A0
481 sbc C3, A1
482 1: ;; Sign-extend A
483 XJMP __usmulhisi3_tail
484 ENDF __mulhisi3
485 #endif /* L_mulhisi3 */
486
487 #if defined (L_usmulhisi3)
488 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
489 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
490 ;;; Clobbers: __tmp_reg__
491 DEFUN __usmulhisi3
492 XCALL __umulhisi3
493 ;; FALLTHRU
494 ENDF __usmulhisi3
495
496 DEFUN __usmulhisi3_tail
497 ;; Sign-extend A
498 sbrs A1, 7
499 ret
500 sub C2, B0
501 sbc C3, B1
502 ret
503 ENDF __usmulhisi3_tail
504 #endif /* L_usmulhisi3 */
505
506 #if defined (L_umulhisi3)
507 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
508 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
509 ;;; Clobbers: __tmp_reg__
510 DEFUN __umulhisi3
511 mul A0, B0
512 movw C0, r0
513 mul A1, B1
514 movw C2, r0
515 mul A0, B1
516 #ifdef __AVR_HAVE_JMP_CALL__
517 ;; This function is used by many other routines, often multiple times.
518 ;; Therefore, if the flash size is not too limited, avoid the RCALL
519 ;; and inverst 6 Bytes to speed things up.
520 add C1, r0
521 adc C2, r1
522 clr __zero_reg__
523 adc C3, __zero_reg__
524 #else
525 rcall 1f
526 #endif
527 mul A1, B0
528 1: add C1, r0
529 adc C2, r1
530 clr __zero_reg__
531 adc C3, __zero_reg__
532 ret
533 ENDF __umulhisi3
534 #endif /* L_umulhisi3 */
535
536 /*******************************************************
537 Widening Multiplication 32 = 16 x 32 with MUL
538 *******************************************************/
539
540 #if defined (L_mulshisi3)
541 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
542 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
543 ;;; Clobbers: __tmp_reg__
544 DEFUN __mulshisi3
545 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
546 ;; Some cores have problem skipping 2-word instruction
547 tst A1
548 brmi __mulohisi3
549 #else
550 sbrs A1, 7
551 #endif /* __AVR_HAVE_JMP_CALL__ */
552 XJMP __muluhisi3
553 ;; FALLTHRU
554 ENDF __mulshisi3
555
556 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
557 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
558 ;;; Clobbers: __tmp_reg__
559 DEFUN __mulohisi3
560 XCALL __muluhisi3
561 ;; One-extend R27:R26 (A1:A0)
562 sub C2, B0
563 sbc C3, B1
564 ret
565 ENDF __mulohisi3
566 #endif /* L_mulshisi3 */
567
568 #if defined (L_muluhisi3)
569 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
570 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
571 ;;; Clobbers: __tmp_reg__
572 DEFUN __muluhisi3
573 XCALL __umulhisi3
574 mul A0, B3
575 add C3, r0
576 mul A1, B2
577 add C3, r0
578 mul A0, B2
579 add C2, r0
580 adc C3, r1
581 clr __zero_reg__
582 ret
583 ENDF __muluhisi3
584 #endif /* L_muluhisi3 */
585
586 /*******************************************************
587 Multiplication 32 x 32 with MUL
588 *******************************************************/
589
590 #if defined (L_mulsi3)
591 ;;; R25:R22 = R25:R22 * R21:R18
592 ;;; (C3:C0) = C3:C0 * B3:B0
593 ;;; Clobbers: R26, R27, __tmp_reg__
594 DEFUN __mulsi3
595 movw A0, C0
596 push C2
597 push C3
598 XCALL __muluhisi3
599 pop A1
600 pop A0
601 ;; A1:A0 now contains the high word of A
602 mul A0, B0
603 add C2, r0
604 adc C3, r1
605 mul A0, B1
606 add C3, r0
607 mul A1, B0
608 add C3, r0
609 clr __zero_reg__
610 ret
611 ENDF __mulsi3
612 #endif /* L_mulsi3 */
613
614 #undef A0
615 #undef A1
616
617 #undef B0
618 #undef B1
619 #undef B2
620 #undef B3
621
622 #undef C0
623 #undef C1
624 #undef C2
625 #undef C3
626
627 #endif /* __AVR_HAVE_MUL__ */
628
629 /*******************************************************
630 Multiplication 24 x 24 with MUL
631 *******************************************************/
632
633 #if defined (L_mulpsi3)
634
635 ;; A[0..2]: In: Multiplicand; Out: Product
636 #define A0 22
637 #define A1 A0+1
638 #define A2 A0+2
639
640 ;; B[0..2]: In: Multiplier
641 #define B0 18
642 #define B1 B0+1
643 #define B2 B0+2
644
645 #if defined (__AVR_HAVE_MUL__)
646
647 ;; C[0..2]: Expand Result
648 #define C0 22
649 #define C1 C0+1
650 #define C2 C0+2
651
652 ;; R24:R22 *= R20:R18
653 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
654
655 #define AA0 26
656 #define AA2 21
657
658 DEFUN __mulpsi3
659 wmov AA0, A0
660 mov AA2, A2
661 XCALL __umulhisi3
662 mul AA2, B0 $ add C2, r0
663 mul AA0, B2 $ add C2, r0
664 clr __zero_reg__
665 ret
666 ENDF __mulpsi3
667
668 #undef AA2
669 #undef AA0
670
671 #undef C2
672 #undef C1
673 #undef C0
674
675 #else /* !HAVE_MUL */
676
677 ;; C[0..2]: Expand Result
678 #define C0 0
679 #define C1 C0+1
680 #define C2 21
681
682 ;; R24:R22 *= R20:R18
683 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
684
685 DEFUN __mulpsi3
686
687 ;; C[] = 0
688 clr __tmp_reg__
689 clr C2
690
691 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
692 LSR B2 $ ror B1 $ ror B0
693
694 ;; If the N-th Bit of B[] was set...
695 brcc 1f
696
697 ;; ...then add A[] * 2^N to the Result C[]
698 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
699
700 1: ;; Multiply A[] by 2
701 LSL A0 $ rol A1 $ rol A2
702
703 ;; Loop until B[] is 0
704 subi B0,0 $ sbci B1,0 $ sbci B2,0
705 brne 0b
706
707 ;; Copy C[] to the return Register A[]
708 wmov A0, C0
709 mov A2, C2
710
711 clr __zero_reg__
712 ret
713 ENDF __mulpsi3
714
715 #undef C2
716 #undef C1
717 #undef C0
718
719 #endif /* HAVE_MUL */
720
721 #undef B2
722 #undef B1
723 #undef B0
724
725 #undef A2
726 #undef A1
727 #undef A0
728
729 #endif /* L_mulpsi3 */
730
731 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
732
733 ;; A[0..2]: In: Multiplicand
734 #define A0 22
735 #define A1 A0+1
736 #define A2 A0+2
737
738 ;; BB: In: Multiplier
739 #define BB 25
740
741 ;; C[0..2]: Result
742 #define C0 18
743 #define C1 C0+1
744 #define C2 C0+2
745
746 ;; C[] = A[] * sign_extend (BB)
747 DEFUN __mulsqipsi3
748 mul A0, BB
749 movw C0, r0
750 mul A2, BB
751 mov C2, r0
752 mul A1, BB
753 add C1, r0
754 adc C2, r1
755 clr __zero_reg__
756 sbrs BB, 7
757 ret
758 ;; One-extend BB
759 sub C1, A0
760 sbc C2, A1
761 ret
762 ENDF __mulsqipsi3
763
764 #undef C2
765 #undef C1
766 #undef C0
767
768 #undef BB
769
770 #undef A2
771 #undef A1
772 #undef A0
773
774 #endif /* L_mulsqipsi3 && HAVE_MUL */
775
776 /*******************************************************
777 Multiplication 64 x 64
778 *******************************************************/
779
780 ;; A[] = A[] * B[]
781
782 ;; A[0..7]: In: Multiplicand
783 ;; Out: Product
784 #define A0 18
785 #define A1 A0+1
786 #define A2 A0+2
787 #define A3 A0+3
788 #define A4 A0+4
789 #define A5 A0+5
790 #define A6 A0+6
791 #define A7 A0+7
792
793 ;; B[0..7]: In: Multiplier
794 #define B0 10
795 #define B1 B0+1
796 #define B2 B0+2
797 #define B3 B0+3
798 #define B4 B0+4
799 #define B5 B0+5
800 #define B6 B0+6
801 #define B7 B0+7
802
803 #if defined (__AVR_HAVE_MUL__)
804
805 ;; Define C[] for convenience
806 ;; Notice that parts of C[] overlap A[] respective B[]
807 #define C0 16
808 #define C1 C0+1
809 #define C2 20
810 #define C3 C2+1
811 #define C4 28
812 #define C5 C4+1
813 #define C6 C4+2
814 #define C7 C4+3
815
816 #if defined (L_muldi3)
817
818 ;; A[] *= B[]
819 ;; R25:R18 *= R17:R10
820 ;; Ordinary ABI-Function
821
822 DEFUN __muldi3
823 push r29
824 push r28
825 push r17
826 push r16
827
828 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
829
830 ;; 3 * 0 + 0 * 3
831 mul A7,B0 $ $ mov C7,r0
832 mul A0,B7 $ $ add C7,r0
833 mul A6,B1 $ $ add C7,r0
834 mul A6,B0 $ mov C6,r0 $ add C7,r1
835 mul B6,A1 $ $ add C7,r0
836 mul B6,A0 $ add C6,r0 $ adc C7,r1
837
838 ;; 1 * 2
839 mul A2,B4 $ add C6,r0 $ adc C7,r1
840 mul A3,B4 $ $ add C7,r0
841 mul A2,B5 $ $ add C7,r0
842
843 push A5
844 push A4
845 push B1
846 push B0
847 push A3
848 push A2
849
850 ;; 0 * 0
851 wmov 26, B0
852 XCALL __umulhisi3
853 wmov C0, 22
854 wmov C2, 24
855
856 ;; 0 * 2
857 wmov 26, B4
858 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
859
860 wmov 26, B2
861 ;; 0 * 1
862 XCALL __muldi3_6
863
864 pop A0
865 pop A1
866 ;; 1 * 1
867 wmov 26, B2
868 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
869
870 pop r26
871 pop r27
872 ;; 1 * 0
873 XCALL __muldi3_6
874
875 pop A0
876 pop A1
877 ;; 2 * 0
878 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
879
880 ;; 2 * 1
881 wmov 26, B2
882 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
883
884 ;; A[] = C[]
885 wmov A0, C0
886 ;; A2 = C2 already
887 wmov A4, C4
888 wmov A6, C6
889
890 clr __zero_reg__
891 pop r16
892 pop r17
893 pop r28
894 pop r29
895 ret
896 ENDF __muldi3
897 #endif /* L_muldi3 */
898
899 #if defined (L_muldi3_6)
900 ;; A helper for some 64-bit multiplications with MUL available
901 DEFUN __muldi3_6
902 __muldi3_6:
903 XCALL __umulhisi3
904 add C2, 22
905 adc C3, 23
906 adc C4, 24
907 adc C5, 25
908 brcc 0f
909 adiw C6, 1
910 0: ret
911 ENDF __muldi3_6
912 #endif /* L_muldi3_6 */
913
914 #undef C7
915 #undef C6
916 #undef C5
917 #undef C4
918 #undef C3
919 #undef C2
920 #undef C1
921 #undef C0
922
923 #else /* !HAVE_MUL */
924
925 #if defined (L_muldi3)
926
927 #define C0 26
928 #define C1 C0+1
929 #define C2 C0+2
930 #define C3 C0+3
931 #define C4 C0+4
932 #define C5 C0+5
933 #define C6 0
934 #define C7 C6+1
935
936 #define Loop 9
937
938 ;; A[] *= B[]
939 ;; R25:R18 *= R17:R10
940 ;; Ordinary ABI-Function
941
942 DEFUN __muldi3
943 push r29
944 push r28
945 push Loop
946
947 ldi C0, 64
948 mov Loop, C0
949
950 ;; C[] = 0
951 clr __tmp_reg__
952 wmov C0, 0
953 wmov C2, 0
954 wmov C4, 0
955
956 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
957 ;; where N = 64 - Loop.
958 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
959 ;; B[] will have its initial Value again.
960 LSR B7 $ ror B6 $ ror B5 $ ror B4
961 ror B3 $ ror B2 $ ror B1 $ ror B0
962
963 ;; If the N-th Bit of B[] was set then...
964 brcc 1f
965 ;; ...finish Rotation...
966 ori B7, 1 << 7
967
968 ;; ...and add A[] * 2^N to the Result C[]
969 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
970 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
971
972 1: ;; Multiply A[] by 2
973 LSL A0 $ rol A1 $ rol A2 $ rol A3
974 rol A4 $ rol A5 $ rol A6 $ rol A7
975
976 dec Loop
977 brne 0b
978
979 ;; We expanded the Result in C[]
980 ;; Copy Result to the Return Register A[]
981 wmov A0, C0
982 wmov A2, C2
983 wmov A4, C4
984 wmov A6, C6
985
986 clr __zero_reg__
987 pop Loop
988 pop r28
989 pop r29
990 ret
991 ENDF __muldi3
992
993 #undef Loop
994
995 #undef C7
996 #undef C6
997 #undef C5
998 #undef C4
999 #undef C3
1000 #undef C2
1001 #undef C1
1002 #undef C0
1003
1004 #endif /* L_muldi3 */
1005 #endif /* HAVE_MUL */
1006
1007 #undef B7
1008 #undef B6
1009 #undef B5
1010 #undef B4
1011 #undef B3
1012 #undef B2
1013 #undef B1
1014 #undef B0
1015
1016 #undef A7
1017 #undef A6
1018 #undef A5
1019 #undef A4
1020 #undef A3
1021 #undef A2
1022 #undef A1
1023 #undef A0
1024
1025 /*******************************************************
1026 Widening Multiplication 64 = 32 x 32 with MUL
1027 *******************************************************/
1028
1029 #if defined (__AVR_HAVE_MUL__)
1030 #define A0 r22
1031 #define A1 r23
1032 #define A2 r24
1033 #define A3 r25
1034
1035 #define B0 r18
1036 #define B1 r19
1037 #define B2 r20
1038 #define B3 r21
1039
1040 #define C0 18
1041 #define C1 C0+1
1042 #define C2 20
1043 #define C3 C2+1
1044 #define C4 28
1045 #define C5 C4+1
1046 #define C6 C4+2
1047 #define C7 C4+3
1048
1049 #if defined (L_umulsidi3)
1050
1051 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1052
1053 ;; R18[8] = R22[4] * R18[4]
1054 ;;
1055 ;; Ordinary ABI Function, but additionally sets
1056 ;; X = R20[2] = B2[2]
1057 ;; Z = R22[2] = A0[2]
1058 DEFUN __umulsidi3
1059 clt
1060 ;; FALLTHRU
1061 ENDF __umulsidi3
1062 ;; T = sign (A)
1063 DEFUN __umulsidi3_helper
1064 push 29 $ push 28 ; Y
1065 wmov 30, A2
1066 ;; Counting in Words, we have to perform 4 Multiplications
1067 ;; 0 * 0
1068 wmov 26, A0
1069 XCALL __umulhisi3
1070 push 23 $ push 22 ; C0
1071 wmov 28, B0
1072 wmov 18, B2
1073 wmov C2, 24
1074 push 27 $ push 26 ; A0
1075 push 19 $ push 18 ; B2
1076 ;;
1077 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1078 ;; B2 C2 -- -- -- B0 A2
1079 ;; 1 * 1
1080 wmov 26, 30 ; A2
1081 XCALL __umulhisi3
1082 ;; Sign-extend A. T holds the sign of A
1083 brtc 0f
1084 ;; Subtract B from the high part of the result
1085 sub 22, 28
1086 sbc 23, 29
1087 sbc 24, 18
1088 sbc 25, 19
1089 0: wmov 18, 28 ;; B0
1090 wmov C4, 22
1091 wmov C6, 24
1092 ;;
1093 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1094 ;; B0 C2 -- -- A2 C4 C6
1095 ;;
1096 ;; 1 * 0
1097 XCALL __muldi3_6
1098 ;; 0 * 1
1099 pop 26 $ pop 27 ;; B2
1100 pop 18 $ pop 19 ;; A0
1101 XCALL __muldi3_6
1102
1103 ;; Move result C into place and save A0 in Z
1104 wmov 22, C4
1105 wmov 24, C6
1106 wmov 30, 18 ; A0
1107 pop C0 $ pop C1
1108
1109 ;; Epilogue
1110 pop 28 $ pop 29 ;; Y
1111 ret
1112 ENDF __umulsidi3_helper
1113 #endif /* L_umulsidi3 */
1114
1115
1116 #if defined (L_mulsidi3)
1117
1118 ;; Signed widening 64 = 32 * 32 Multiplication
1119 ;;
1120 ;; R18[8] = R22[4] * R18[4]
1121 ;; Ordinary ABI Function
1122 DEFUN __mulsidi3
1123 bst A3, 7
1124 sbrs B3, 7 ; Enhanced core has no skip bug
1125 XJMP __umulsidi3_helper
1126
1127 ;; B needs sign-extension
1128 push A3
1129 push A2
1130 XCALL __umulsidi3_helper
1131 ;; A0 survived in Z
1132 sub r22, r30
1133 sbc r23, r31
1134 pop r26
1135 pop r27
1136 sbc r24, r26
1137 sbc r25, r27
1138 ret
1139 ENDF __mulsidi3
1140 #endif /* L_mulsidi3 */
1141
1142 #undef A0
1143 #undef A1
1144 #undef A2
1145 #undef A3
1146 #undef B0
1147 #undef B1
1148 #undef B2
1149 #undef B3
1150 #undef C0
1151 #undef C1
1152 #undef C2
1153 #undef C3
1154 #undef C4
1155 #undef C5
1156 #undef C6
1157 #undef C7
1158 #endif /* HAVE_MUL */
1159
1160 /**********************************************************
1161 Widening Multiplication 64 = 32 x 32 without MUL
1162 **********************************************************/
1163
1164 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1165 #define A0 18
1166 #define A1 A0+1
1167 #define A2 A0+2
1168 #define A3 A0+3
1169 #define A4 A0+4
1170 #define A5 A0+5
1171 #define A6 A0+6
1172 #define A7 A0+7
1173
1174 #define B0 10
1175 #define B1 B0+1
1176 #define B2 B0+2
1177 #define B3 B0+3
1178 #define B4 B0+4
1179 #define B5 B0+5
1180 #define B6 B0+6
1181 #define B7 B0+7
1182
1183 #define AA0 22
1184 #define AA1 AA0+1
1185 #define AA2 AA0+2
1186 #define AA3 AA0+3
1187
1188 #define BB0 18
1189 #define BB1 BB0+1
1190 #define BB2 BB0+2
1191 #define BB3 BB0+3
1192
1193 #define Mask r30
1194
1195 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1196 ;;
1197 ;; R18[8] = R22[4] * R18[4]
1198 ;; Ordinary ABI Function
1199 DEFUN __mulsidi3
1200 set
1201 skip
1202 ;; FALLTHRU
1203 ENDF __mulsidi3
1204
1205 DEFUN __umulsidi3
1206 clt ; skipped
1207 ;; Save 10 Registers: R10..R17, R28, R29
1208 do_prologue_saves 10
1209 ldi Mask, 0xff
1210 bld Mask, 7
1211 ;; Move B into place...
1212 wmov B0, BB0
1213 wmov B2, BB2
1214 ;; ...and extend it
1215 and BB3, Mask
1216 lsl BB3
1217 sbc B4, B4
1218 mov B5, B4
1219 wmov B6, B4
1220 ;; Move A into place...
1221 wmov A0, AA0
1222 wmov A2, AA2
1223 ;; ...and extend it
1224 and AA3, Mask
1225 lsl AA3
1226 sbc A4, A4
1227 mov A5, A4
1228 wmov A6, A4
1229 XCALL __muldi3
1230 do_epilogue_restores 10
1231 ENDF __umulsidi3
1232
1233 #undef A0
1234 #undef A1
1235 #undef A2
1236 #undef A3
1237 #undef A4
1238 #undef A5
1239 #undef A6
1240 #undef A7
1241 #undef B0
1242 #undef B1
1243 #undef B2
1244 #undef B3
1245 #undef B4
1246 #undef B5
1247 #undef B6
1248 #undef B7
1249 #undef AA0
1250 #undef AA1
1251 #undef AA2
1252 #undef AA3
1253 #undef BB0
1254 #undef BB1
1255 #undef BB2
1256 #undef BB3
1257 #undef Mask
1258 #endif /* L_mulsidi3 && !HAVE_MUL */
1259
1260 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1261
1262
1263 .section .text.libgcc.div, "ax", @progbits
1265
1266 /*******************************************************
1267 Division 8 / 8 => (result + remainder)
1268 *******************************************************/
1269 #define r_rem r25 /* remainder */
1270 #define r_arg1 r24 /* dividend, quotient */
1271 #define r_arg2 r22 /* divisor */
1272 #define r_cnt r23 /* loop count */
1273
1274 #if defined (L_udivmodqi4)
1275 DEFUN __udivmodqi4
1276 sub r_rem,r_rem ; clear remainder and carry
1277 ldi r_cnt,9 ; init loop counter
1278 rjmp __udivmodqi4_ep ; jump to entry point
1279 __udivmodqi4_loop:
1280 rol r_rem ; shift dividend into remainder
1281 cp r_rem,r_arg2 ; compare remainder & divisor
1282 brcs __udivmodqi4_ep ; remainder <= divisor
1283 sub r_rem,r_arg2 ; restore remainder
1284 __udivmodqi4_ep:
1285 rol r_arg1 ; shift dividend (with CARRY)
1286 dec r_cnt ; decrement loop counter
1287 brne __udivmodqi4_loop
1288 com r_arg1 ; complement result
1289 ; because C flag was complemented in loop
1290 ret
1291 ENDF __udivmodqi4
1292 #endif /* defined (L_udivmodqi4) */
1293
1294 #if defined (L_divmodqi4)
1295 DEFUN __divmodqi4
1296 bst r_arg1,7 ; store sign of dividend
1297 mov __tmp_reg__,r_arg1
1298 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1299 sbrc r_arg1,7
1300 neg r_arg1 ; dividend negative : negate
1301 sbrc r_arg2,7
1302 neg r_arg2 ; divisor negative : negate
1303 XCALL __udivmodqi4 ; do the unsigned div/mod
1304 brtc __divmodqi4_1
1305 neg r_rem ; correct remainder sign
1306 __divmodqi4_1:
1307 sbrc __tmp_reg__,7
1308 neg r_arg1 ; correct result sign
1309 __divmodqi4_exit:
1310 ret
1311 ENDF __divmodqi4
1312 #endif /* defined (L_divmodqi4) */
1313
1314 #undef r_rem
1315 #undef r_arg1
1316 #undef r_arg2
1317 #undef r_cnt
1318
1319
1320 /*******************************************************
1321 Division 16 / 16 => (result + remainder)
1322 *******************************************************/
1323 #define r_remL r26 /* remainder Low */
1324 #define r_remH r27 /* remainder High */
1325
1326 /* return: remainder */
1327 #define r_arg1L r24 /* dividend Low */
1328 #define r_arg1H r25 /* dividend High */
1329
1330 /* return: quotient */
1331 #define r_arg2L r22 /* divisor Low */
1332 #define r_arg2H r23 /* divisor High */
1333
1334 #define r_cnt r21 /* loop count */
1335
1336 #if defined (L_udivmodhi4)
1337 DEFUN __udivmodhi4
1338 sub r_remL,r_remL
1339 sub r_remH,r_remH ; clear remainder and carry
1340 ldi r_cnt,17 ; init loop counter
1341 rjmp __udivmodhi4_ep ; jump to entry point
1342 __udivmodhi4_loop:
1343 rol r_remL ; shift dividend into remainder
1344 rol r_remH
1345 cp r_remL,r_arg2L ; compare remainder & divisor
1346 cpc r_remH,r_arg2H
1347 brcs __udivmodhi4_ep ; remainder < divisor
1348 sub r_remL,r_arg2L ; restore remainder
1349 sbc r_remH,r_arg2H
1350 __udivmodhi4_ep:
1351 rol r_arg1L ; shift dividend (with CARRY)
1352 rol r_arg1H
1353 dec r_cnt ; decrement loop counter
1354 brne __udivmodhi4_loop
1355 com r_arg1L
1356 com r_arg1H
1357 ; div/mod results to return registers, as for the div() function
1358 mov_l r_arg2L, r_arg1L ; quotient
1359 mov_h r_arg2H, r_arg1H
1360 mov_l r_arg1L, r_remL ; remainder
1361 mov_h r_arg1H, r_remH
1362 ret
1363 ENDF __udivmodhi4
1364 #endif /* defined (L_udivmodhi4) */
1365
1366 #if defined (L_divmodhi4)
1367 DEFUN __divmodhi4
1368 .global _div
1369 _div:
1370 bst r_arg1H,7 ; store sign of dividend
1371 mov __tmp_reg__,r_arg2H
1372 brtc 0f
1373 com __tmp_reg__ ; r0.7 is sign of result
1374 rcall __divmodhi4_neg1 ; dividend negative: negate
1375 0:
1376 sbrc r_arg2H,7
1377 rcall __divmodhi4_neg2 ; divisor negative: negate
1378 XCALL __udivmodhi4 ; do the unsigned div/mod
1379 sbrc __tmp_reg__,7
1380 rcall __divmodhi4_neg2 ; correct remainder sign
1381 brtc __divmodhi4_exit
1382 __divmodhi4_neg1:
1383 ;; correct dividend/remainder sign
1384 com r_arg1H
1385 neg r_arg1L
1386 sbci r_arg1H,0xff
1387 ret
1388 __divmodhi4_neg2:
1389 ;; correct divisor/result sign
1390 com r_arg2H
1391 neg r_arg2L
1392 sbci r_arg2H,0xff
1393 __divmodhi4_exit:
1394 ret
1395 ENDF __divmodhi4
1396 #endif /* defined (L_divmodhi4) */
1397
1398 #undef r_remH
1399 #undef r_remL
1400
1401 #undef r_arg1H
1402 #undef r_arg1L
1403
1404 #undef r_arg2H
1405 #undef r_arg2L
1406
1407 #undef r_cnt
1408
1409 /*******************************************************
1410 Division 24 / 24 => (result + remainder)
1411 *******************************************************/
1412
1413 ;; A[0..2]: In: Dividend; Out: Quotient
1414 #define A0 22
1415 #define A1 A0+1
1416 #define A2 A0+2
1417
1418 ;; B[0..2]: In: Divisor; Out: Remainder
1419 #define B0 18
1420 #define B1 B0+1
1421 #define B2 B0+2
1422
1423 ;; C[0..2]: Expand remainder
1424 #define C0 __zero_reg__
1425 #define C1 26
1426 #define C2 25
1427
1428 ;; Loop counter
1429 #define r_cnt 21
1430
1431 #if defined (L_udivmodpsi4)
1432 ;; R24:R22 = R24:R22 udiv R20:R18
1433 ;; R20:R18 = R24:R22 umod R20:R18
1434 ;; Clobbers: R21, R25, R26
1435
1436 DEFUN __udivmodpsi4
1437 ; init loop counter
1438 ldi r_cnt, 24+1
1439 ; Clear remainder and carry. C0 is already 0
1440 clr C1
1441 sub C2, C2
1442 ; jump to entry point
1443 rjmp __udivmodpsi4_start
1444 __udivmodpsi4_loop:
1445 ; shift dividend into remainder
1446 rol C0
1447 rol C1
1448 rol C2
1449 ; compare remainder & divisor
1450 cp C0, B0
1451 cpc C1, B1
1452 cpc C2, B2
1453 brcs __udivmodpsi4_start ; remainder <= divisor
1454 sub C0, B0 ; restore remainder
1455 sbc C1, B1
1456 sbc C2, B2
1457 __udivmodpsi4_start:
1458 ; shift dividend (with CARRY)
1459 rol A0
1460 rol A1
1461 rol A2
1462 ; decrement loop counter
1463 dec r_cnt
1464 brne __udivmodpsi4_loop
1465 com A0
1466 com A1
1467 com A2
1468 ; div/mod results to return registers
1469 ; remainder
1470 mov B0, C0
1471 mov B1, C1
1472 mov B2, C2
1473 clr __zero_reg__ ; C0
1474 ret
1475 ENDF __udivmodpsi4
1476 #endif /* defined (L_udivmodpsi4) */
1477
1478 #if defined (L_divmodpsi4)
1479 ;; R24:R22 = R24:R22 div R20:R18
1480 ;; R20:R18 = R24:R22 mod R20:R18
1481 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1482
1483 DEFUN __divmodpsi4
1484 ; R0.7 will contain the sign of the result:
1485 ; R0.7 = A.sign ^ B.sign
1486 mov __tmp_reg__, B2
1487 ; T-flag = sign of dividend
1488 bst A2, 7
1489 brtc 0f
1490 com __tmp_reg__
1491 ; Adjust dividend's sign
1492 rcall __divmodpsi4_negA
1493 0:
1494 ; Adjust divisor's sign
1495 sbrc B2, 7
1496 rcall __divmodpsi4_negB
1497
1498 ; Do the unsigned div/mod
1499 XCALL __udivmodpsi4
1500
1501 ; Adjust quotient's sign
1502 sbrc __tmp_reg__, 7
1503 rcall __divmodpsi4_negA
1504
1505 ; Adjust remainder's sign
1506 brtc __divmodpsi4_end
1507
1508 __divmodpsi4_negB:
1509 ; Correct divisor/remainder sign
1510 com B2
1511 com B1
1512 neg B0
1513 sbci B1, -1
1514 sbci B2, -1
1515 ret
1516
1517 ; Correct dividend/quotient sign
1518 __divmodpsi4_negA:
1519 com A2
1520 com A1
1521 neg A0
1522 sbci A1, -1
1523 sbci A2, -1
1524 __divmodpsi4_end:
1525 ret
1526
1527 ENDF __divmodpsi4
1528 #endif /* defined (L_divmodpsi4) */
1529
1530 #undef A0
1531 #undef A1
1532 #undef A2
1533
1534 #undef B0
1535 #undef B1
1536 #undef B2
1537
1538 #undef C0
1539 #undef C1
1540 #undef C2
1541
1542 #undef r_cnt
1543
1544 /*******************************************************
1545 Division 32 / 32 => (result + remainder)
1546 *******************************************************/
1547 #define r_remHH r31 /* remainder High */
1548 #define r_remHL r30
1549 #define r_remH r27
1550 #define r_remL r26 /* remainder Low */
1551
1552 /* return: remainder */
1553 #define r_arg1HH r25 /* dividend High */
1554 #define r_arg1HL r24
1555 #define r_arg1H r23
1556 #define r_arg1L r22 /* dividend Low */
1557
1558 /* return: quotient */
1559 #define r_arg2HH r21 /* divisor High */
1560 #define r_arg2HL r20
1561 #define r_arg2H r19
1562 #define r_arg2L r18 /* divisor Low */
1563
1564 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1565
1566 #if defined (L_udivmodsi4)
1567 DEFUN __udivmodsi4
1568 ldi r_remL, 33 ; init loop counter
1569 mov r_cnt, r_remL
1570 sub r_remL,r_remL
1571 sub r_remH,r_remH ; clear remainder and carry
1572 mov_l r_remHL, r_remL
1573 mov_h r_remHH, r_remH
1574 rjmp __udivmodsi4_ep ; jump to entry point
1575 __udivmodsi4_loop:
1576 rol r_remL ; shift dividend into remainder
1577 rol r_remH
1578 rol r_remHL
1579 rol r_remHH
1580 cp r_remL,r_arg2L ; compare remainder & divisor
1581 cpc r_remH,r_arg2H
1582 cpc r_remHL,r_arg2HL
1583 cpc r_remHH,r_arg2HH
1584 brcs __udivmodsi4_ep ; remainder <= divisor
1585 sub r_remL,r_arg2L ; restore remainder
1586 sbc r_remH,r_arg2H
1587 sbc r_remHL,r_arg2HL
1588 sbc r_remHH,r_arg2HH
1589 __udivmodsi4_ep:
1590 rol r_arg1L ; shift dividend (with CARRY)
1591 rol r_arg1H
1592 rol r_arg1HL
1593 rol r_arg1HH
1594 dec r_cnt ; decrement loop counter
1595 brne __udivmodsi4_loop
1596 ; __zero_reg__ now restored (r_cnt == 0)
1597 com r_arg1L
1598 com r_arg1H
1599 com r_arg1HL
1600 com r_arg1HH
1601 ; div/mod results to return registers, as for the ldiv() function
1602 mov_l r_arg2L, r_arg1L ; quotient
1603 mov_h r_arg2H, r_arg1H
1604 mov_l r_arg2HL, r_arg1HL
1605 mov_h r_arg2HH, r_arg1HH
1606 mov_l r_arg1L, r_remL ; remainder
1607 mov_h r_arg1H, r_remH
1608 mov_l r_arg1HL, r_remHL
1609 mov_h r_arg1HH, r_remHH
1610 ret
1611 ENDF __udivmodsi4
1612 #endif /* defined (L_udivmodsi4) */
1613
1614 #if defined (L_divmodsi4)
1615 DEFUN __divmodsi4
1616 mov __tmp_reg__,r_arg2HH
1617 bst r_arg1HH,7 ; store sign of dividend
1618 brtc 0f
1619 com __tmp_reg__ ; r0.7 is sign of result
1620 XCALL __negsi2 ; dividend negative: negate
1621 0:
1622 sbrc r_arg2HH,7
1623 rcall __divmodsi4_neg2 ; divisor negative: negate
1624 XCALL __udivmodsi4 ; do the unsigned div/mod
1625 sbrc __tmp_reg__, 7 ; correct quotient sign
1626 rcall __divmodsi4_neg2
1627 brtc __divmodsi4_exit ; correct remainder sign
1628 XJMP __negsi2
1629 __divmodsi4_neg2:
1630 ;; correct divisor/quotient sign
1631 com r_arg2HH
1632 com r_arg2HL
1633 com r_arg2H
1634 neg r_arg2L
1635 sbci r_arg2H,0xff
1636 sbci r_arg2HL,0xff
1637 sbci r_arg2HH,0xff
1638 __divmodsi4_exit:
1639 ret
1640 ENDF __divmodsi4
1641 #endif /* defined (L_divmodsi4) */
1642
1643 #if defined (L_negsi2)
1644 ;; (set (reg:SI 22)
1645 ;; (neg:SI (reg:SI 22)))
1646 ;; Sets the V flag for signed overflow tests
1647 DEFUN __negsi2
1648 NEG4 22
1649 ret
1650 ENDF __negsi2
1651 #endif /* L_negsi2 */
1652
1653 #undef r_remHH
1654 #undef r_remHL
1655 #undef r_remH
1656 #undef r_remL
1657 #undef r_arg1HH
1658 #undef r_arg1HL
1659 #undef r_arg1H
1660 #undef r_arg1L
1661 #undef r_arg2HH
1662 #undef r_arg2HL
1663 #undef r_arg2H
1664 #undef r_arg2L
1665 #undef r_cnt
1666
1667 /*******************************************************
1668 Division 64 / 64
1669 Modulo 64 % 64
1670 *******************************************************/
1671
1672 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1673 ;; at least 16k of Program Memory. For smaller Devices, depend
1674 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1675 ;; Flash Size so that SP Size can be used to test for Flash Size.
1676
1677 #if defined (__AVR_HAVE_JMP_CALL__)
1678 # define SPEED_DIV 8
1679 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1680 # define SPEED_DIV 16
1681 #else
1682 # define SPEED_DIV 0
1683 #endif
1684
1685 ;; A[0..7]: In: Dividend;
1686 ;; Out: Quotient (T = 0)
1687 ;; Out: Remainder (T = 1)
1688 #define A0 18
1689 #define A1 A0+1
1690 #define A2 A0+2
1691 #define A3 A0+3
1692 #define A4 A0+4
1693 #define A5 A0+5
1694 #define A6 A0+6
1695 #define A7 A0+7
1696
1697 ;; B[0..7]: In: Divisor; Out: Clobber
1698 #define B0 10
1699 #define B1 B0+1
1700 #define B2 B0+2
1701 #define B3 B0+3
1702 #define B4 B0+4
1703 #define B5 B0+5
1704 #define B6 B0+6
1705 #define B7 B0+7
1706
1707 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1708 #define C0 8
1709 #define C1 C0+1
1710 #define C2 30
1711 #define C3 C2+1
1712 #define C4 28
1713 #define C5 C4+1
1714 #define C6 26
1715 #define C7 C6+1
1716
1717 ;; Holds Signs during Division Routine
1718 #define SS __tmp_reg__
1719
1720 ;; Bit-Counter in Division Routine
1721 #define R_cnt __zero_reg__
1722
1723 ;; Scratch Register for Negation
1724 #define NN r31
1725
1726 #if defined (L_udivdi3)
1727
1728 ;; R25:R18 = R24:R18 umod R17:R10
1729 ;; Ordinary ABI-Function
1730
1731 DEFUN __umoddi3
1732 set
1733 rjmp __udivdi3_umoddi3
1734 ENDF __umoddi3
1735
1736 ;; R25:R18 = R24:R18 udiv R17:R10
1737 ;; Ordinary ABI-Function
1738
1739 DEFUN __udivdi3
1740 clt
1741 ENDF __udivdi3
1742
1743 DEFUN __udivdi3_umoddi3
1744 push C0
1745 push C1
1746 push C4
1747 push C5
1748 XCALL __udivmod64
1749 pop C5
1750 pop C4
1751 pop C1
1752 pop C0
1753 ret
1754 ENDF __udivdi3_umoddi3
1755 #endif /* L_udivdi3 */
1756
1757 #if defined (L_udivmod64)
1758
1759 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1760 ;; No Registers saved/restored; the Callers will take Care.
1761 ;; Preserves B[] and T-flag
1762 ;; T = 0: Compute Quotient in A[]
1763 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1764
1765 DEFUN __udivmod64
1766
1767 ;; Clear Remainder (C6, C7 will follow)
1768 clr C0
1769 clr C1
1770 wmov C2, C0
1771 wmov C4, C0
1772 ldi C7, 64
1773
1774 #if SPEED_DIV == 0 || SPEED_DIV == 16
1775 ;; Initialize Loop-Counter
1776 mov R_cnt, C7
1777 wmov C6, C0
1778 #endif /* SPEED_DIV */
1779
1780 #if SPEED_DIV == 8
1781
1782 push A7
1783 clr C6
1784
1785 1: ;; Compare shifted Devidend against Divisor
1786 ;; If -- even after Shifting -- it is smaller...
1787 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1788 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1789 brcc 2f
1790
1791 ;; ...then we can subtract it. Thus, it is legal to shift left
1792 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1793 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1794 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1795 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1796
1797 ;; 8 Bits are done
1798 subi C7, 8
1799 brne 1b
1800
1801 ;; Shifted 64 Bits: A7 has traveled to C7
1802 pop C7
1803 ;; Divisor is greater than Dividend. We have:
1804 ;; A[] % B[] = A[]
1805 ;; A[] / B[] = 0
1806 ;; Thus, we can return immediately
1807 rjmp 5f
1808
1809 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1810 mov R_cnt, C7
1811
1812 ;; Push of A7 is not needed because C7 is still 0
1813 pop C7
1814 clr C7
1815
1816 #elif SPEED_DIV == 16
1817
1818 ;; Compare shifted Dividend against Divisor
1819 cp A7, B3
1820 cpc C0, B4
1821 cpc C1, B5
1822 cpc C2, B6
1823 cpc C3, B7
1824 brcc 2f
1825
1826 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1827 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1828 wmov C2,A6 $ wmov C0,A4
1829 wmov A6,A2 $ wmov A4,A0
1830 wmov A2,C6 $ wmov A0,C4
1831
1832 ;; Set Bit Counter to 32
1833 lsr R_cnt
1834 2:
1835 #elif SPEED_DIV
1836 #error SPEED_DIV = ?
1837 #endif /* SPEED_DIV */
1838
1839 ;; The very Division + Remainder Routine
1840
1841 3: ;; Left-shift Dividend...
1842 lsl A0 $ rol A1 $ rol A2 $ rol A3
1843 rol A4 $ rol A5 $ rol A6 $ rol A7
1844
1845 ;; ...into Remainder
1846 rol C0 $ rol C1 $ rol C2 $ rol C3
1847 rol C4 $ rol C5 $ rol C6 $ rol C7
1848
1849 ;; Compare Remainder and Divisor
1850 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1851 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1852
1853 brcs 4f
1854
1855 ;; Divisor fits into Remainder: Subtract it from Remainder...
1856 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1857 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1858
1859 ;; ...and set according Bit in the upcoming Quotient
1860 ;; The Bit will travel to its final Position
1861 ori A0, 1
1862
1863 4: ;; This Bit is done
1864 dec R_cnt
1865 brne 3b
1866 ;; __zero_reg__ is 0 again
1867
1868 ;; T = 0: We are fine with the Quotient in A[]
1869 ;; T = 1: Copy Remainder to A[]
1870 5: brtc 6f
1871 wmov A0, C0
1872 wmov A2, C2
1873 wmov A4, C4
1874 wmov A6, C6
1875 ;; Move the Sign of the Result to SS.7
1876 lsl SS
1877
1878 6: ret
1879
1880 ENDF __udivmod64
1881 #endif /* L_udivmod64 */
1882
1883
1884 #if defined (L_divdi3)
1885
1886 ;; R25:R18 = R24:R18 mod R17:R10
1887 ;; Ordinary ABI-Function
1888
1889 DEFUN __moddi3
1890 set
1891 rjmp __divdi3_moddi3
1892 ENDF __moddi3
1893
1894 ;; R25:R18 = R24:R18 div R17:R10
1895 ;; Ordinary ABI-Function
1896
1897 DEFUN __divdi3
1898 clt
1899 ENDF __divdi3
1900
1901 DEFUN __divdi3_moddi3
1902 #if SPEED_DIV
1903 mov r31, A7
1904 or r31, B7
1905 brmi 0f
1906 ;; Both Signs are 0: the following Complexitiy is not needed
1907 XJMP __udivdi3_umoddi3
1908 #endif /* SPEED_DIV */
1909
1910 0: ;; The Prologue
1911 ;; Save 12 Registers: Y, 17...8
1912 ;; No Frame needed
1913 do_prologue_saves 12
1914
1915 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1916 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1917 mov SS, A7
1918 asr SS
1919 ;; Adjust Dividend's Sign as needed
1920 #if SPEED_DIV
1921 ;; Compiling for Speed we know that at least one Sign must be < 0
1922 ;; Thus, if A[] >= 0 then we know B[] < 0
1923 brpl 22f
1924 #else
1925 brpl 21f
1926 #endif /* SPEED_DIV */
1927
1928 XCALL __negdi2
1929
1930 ;; Adjust Divisor's Sign and SS.7 as needed
1931 21: tst B7
1932 brpl 3f
1933 22: ldi NN, 1 << 7
1934 eor SS, NN
1935
1936 ldi NN, -1
1937 com B4 $ com B5 $ com B6 $ com B7
1938 $ com B1 $ com B2 $ com B3
1939 NEG B0
1940 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
1941 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
1942
1943 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1944 XCALL __udivmod64
1945
1946 ;; Adjust Result's Sign
1947 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1948 tst SS
1949 brpl 4f
1950 #else
1951 sbrc SS, 7
1952 #endif /* __AVR_HAVE_JMP_CALL__ */
1953 XCALL __negdi2
1954
1955 4: ;; Epilogue: Restore 12 Registers and return
1956 do_epilogue_restores 12
1957
1958 ENDF __divdi3_moddi3
1959
1960 #endif /* L_divdi3 */
1961
1962 #undef R_cnt
1963 #undef SS
1964 #undef NN
1965
1966 .section .text.libgcc, "ax", @progbits
1967
1968 #define TT __tmp_reg__
1969
1970 #if defined (L_adddi3)
1971 ;; (set (reg:DI 18)
1972 ;; (plus:DI (reg:DI 18)
1973 ;; (reg:DI 10)))
1974 ;; Sets the V flag for signed overflow tests
1975 ;; Sets the C flag for unsigned overflow tests
1976 DEFUN __adddi3
1977 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
1978 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
1979 ret
1980 ENDF __adddi3
1981 #endif /* L_adddi3 */
1982
1983 #if defined (L_adddi3_s8)
1984 ;; (set (reg:DI 18)
1985 ;; (plus:DI (reg:DI 18)
1986 ;; (sign_extend:SI (reg:QI 26))))
1987 ;; Sets the V flag for signed overflow tests
1988 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
1989 DEFUN __adddi3_s8
1990 clr TT
1991 sbrc r26, 7
1992 com TT
1993 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
1994 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
1995 ret
1996 ENDF __adddi3_s8
1997 #endif /* L_adddi3_s8 */
1998
1999 #if defined (L_subdi3)
2000 ;; (set (reg:DI 18)
2001 ;; (minus:DI (reg:DI 18)
2002 ;; (reg:DI 10)))
2003 ;; Sets the V flag for signed overflow tests
2004 ;; Sets the C flag for unsigned overflow tests
2005 DEFUN __subdi3
2006 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2007 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2008 ret
2009 ENDF __subdi3
2010 #endif /* L_subdi3 */
2011
2012 #if defined (L_cmpdi2)
2013 ;; (set (cc0)
2014 ;; (compare (reg:DI 18)
2015 ;; (reg:DI 10)))
2016 DEFUN __cmpdi2
2017 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2018 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2019 ret
2020 ENDF __cmpdi2
2021 #endif /* L_cmpdi2 */
2022
2023 #if defined (L_cmpdi2_s8)
2024 ;; (set (cc0)
2025 ;; (compare (reg:DI 18)
2026 ;; (sign_extend:SI (reg:QI 26))))
2027 DEFUN __cmpdi2_s8
2028 clr TT
2029 sbrc r26, 7
2030 com TT
2031 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2032 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2033 ret
2034 ENDF __cmpdi2_s8
2035 #endif /* L_cmpdi2_s8 */
2036
2037 #if defined (L_negdi2)
2038 ;; (set (reg:DI 18)
2039 ;; (neg:DI (reg:DI 18)))
2040 ;; Sets the V flag for signed overflow tests
2041 DEFUN __negdi2
2042
2043 com A4 $ com A5 $ com A6 $ com A7
2044 $ com A1 $ com A2 $ com A3
2045 NEG A0
2046 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2047 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2048 ret
2049
2050 ENDF __negdi2
2051 #endif /* L_negdi2 */
2052
2053 #undef TT
2054
2055 #undef C7
2056 #undef C6
2057 #undef C5
2058 #undef C4
2059 #undef C3
2060 #undef C2
2061 #undef C1
2062 #undef C0
2063
2064 #undef B7
2065 #undef B6
2066 #undef B5
2067 #undef B4
2068 #undef B3
2069 #undef B2
2070 #undef B1
2071 #undef B0
2072
2073 #undef A7
2074 #undef A6
2075 #undef A5
2076 #undef A4
2077 #undef A3
2078 #undef A2
2079 #undef A1
2080 #undef A0
2081
2082
2083 .section .text.libgcc.prologue, "ax", @progbits
2085
2086 /**********************************
2087 * This is a prologue subroutine
2088 **********************************/
2089 #if defined (L_prologue)
2090
2091 ;; This function does not clobber T-flag; 64-bit division relies on it
2092 DEFUN __prologue_saves__
2093 push r2
2094 push r3
2095 push r4
2096 push r5
2097 push r6
2098 push r7
2099 push r8
2100 push r9
2101 push r10
2102 push r11
2103 push r12
2104 push r13
2105 push r14
2106 push r15
2107 push r16
2108 push r17
2109 push r28
2110 push r29
2111 #if !defined (__AVR_HAVE_SPH__)
2112 in r28,__SP_L__
2113 sub r28,r26
2114 out __SP_L__,r28
2115 clr r29
2116 #elif defined (__AVR_XMEGA__)
2117 in r28,__SP_L__
2118 in r29,__SP_H__
2119 sub r28,r26
2120 sbc r29,r27
2121 out __SP_L__,r28
2122 out __SP_H__,r29
2123 #else
2124 in r28,__SP_L__
2125 in r29,__SP_H__
2126 sub r28,r26
2127 sbc r29,r27
2128 in __tmp_reg__,__SREG__
2129 cli
2130 out __SP_H__,r29
2131 out __SREG__,__tmp_reg__
2132 out __SP_L__,r28
2133 #endif /* #SP = 8/16 */
2134
2135 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2136 eijmp
2137 #else
2138 ijmp
2139 #endif
2140
2141 ENDF __prologue_saves__
2142 #endif /* defined (L_prologue) */
2143
2144 /*
2145 * This is an epilogue subroutine
2146 */
2147 #if defined (L_epilogue)
2148
2149 DEFUN __epilogue_restores__
2150 ldd r2,Y+18
2151 ldd r3,Y+17
2152 ldd r4,Y+16
2153 ldd r5,Y+15
2154 ldd r6,Y+14
2155 ldd r7,Y+13
2156 ldd r8,Y+12
2157 ldd r9,Y+11
2158 ldd r10,Y+10
2159 ldd r11,Y+9
2160 ldd r12,Y+8
2161 ldd r13,Y+7
2162 ldd r14,Y+6
2163 ldd r15,Y+5
2164 ldd r16,Y+4
2165 ldd r17,Y+3
2166 ldd r26,Y+2
2167 #if !defined (__AVR_HAVE_SPH__)
2168 ldd r29,Y+1
2169 add r28,r30
2170 out __SP_L__,r28
2171 mov r28, r26
2172 #elif defined (__AVR_XMEGA__)
2173 ldd r27,Y+1
2174 add r28,r30
2175 adc r29,__zero_reg__
2176 out __SP_L__,r28
2177 out __SP_H__,r29
2178 wmov 28, 26
2179 #else
2180 ldd r27,Y+1
2181 add r28,r30
2182 adc r29,__zero_reg__
2183 in __tmp_reg__,__SREG__
2184 cli
2185 out __SP_H__,r29
2186 out __SREG__,__tmp_reg__
2187 out __SP_L__,r28
2188 mov_l r28, r26
2189 mov_h r29, r27
2190 #endif /* #SP = 8/16 */
2191 ret
2192 ENDF __epilogue_restores__
2193 #endif /* defined (L_epilogue) */
2194
2195 #ifdef L_exit
2196 .section .fini9,"ax",@progbits
2197 DEFUN _exit
2198 .weak exit
2199 exit:
2200 ENDF _exit
2201
2202 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2203
2204 .section .fini0,"ax",@progbits
2205 cli
2206 __stop_program:
2207 rjmp __stop_program
2208 #endif /* defined (L_exit) */
2209
2210 #ifdef L_cleanup
2211 .weak _cleanup
2212 .func _cleanup
2213 _cleanup:
2214 ret
2215 .endfunc
2216 #endif /* defined (L_cleanup) */
2217
2218
2219 .section .text.libgcc, "ax", @progbits
2221
2222 #ifdef L_tablejump
2223 DEFUN __tablejump2__
2224 lsl r30
2225 rol r31
2226 ;; FALLTHRU
2227 ENDF __tablejump2__
2228
2229 DEFUN __tablejump__
2230 #if defined (__AVR_HAVE_LPMX__)
2231 lpm __tmp_reg__, Z+
2232 lpm r31, Z
2233 mov r30, __tmp_reg__
2234 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2235 eijmp
2236 #else
2237 ijmp
2238 #endif
2239
2240 #else /* !HAVE_LPMX */
2241 lpm
2242 adiw r30, 1
2243 push r0
2244 lpm
2245 push r0
2246 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2247 in __tmp_reg__, __EIND__
2248 push __tmp_reg__
2249 #endif
2250 ret
2251 #endif /* !HAVE_LPMX */
2252 ENDF __tablejump__
2253 #endif /* defined (L_tablejump) */
2254
2255 #ifdef L_copy_data
2256 .section .init4,"ax",@progbits
2257 DEFUN __do_copy_data
2258 #if defined(__AVR_HAVE_ELPMX__)
2259 ldi r17, hi8(__data_end)
2260 ldi r26, lo8(__data_start)
2261 ldi r27, hi8(__data_start)
2262 ldi r30, lo8(__data_load_start)
2263 ldi r31, hi8(__data_load_start)
2264 ldi r16, hh8(__data_load_start)
2265 out __RAMPZ__, r16
2266 rjmp .L__do_copy_data_start
2267 .L__do_copy_data_loop:
2268 elpm r0, Z+
2269 st X+, r0
2270 .L__do_copy_data_start:
2271 cpi r26, lo8(__data_end)
2272 cpc r27, r17
2273 brne .L__do_copy_data_loop
2274 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2275 ldi r17, hi8(__data_end)
2276 ldi r26, lo8(__data_start)
2277 ldi r27, hi8(__data_start)
2278 ldi r30, lo8(__data_load_start)
2279 ldi r31, hi8(__data_load_start)
2280 ldi r16, hh8(__data_load_start - 0x10000)
2281 .L__do_copy_data_carry:
2282 inc r16
2283 out __RAMPZ__, r16
2284 rjmp .L__do_copy_data_start
2285 .L__do_copy_data_loop:
2286 elpm
2287 st X+, r0
2288 adiw r30, 1
2289 brcs .L__do_copy_data_carry
2290 .L__do_copy_data_start:
2291 cpi r26, lo8(__data_end)
2292 cpc r27, r17
2293 brne .L__do_copy_data_loop
2294 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2295 ldi r17, hi8(__data_end)
2296 ldi r26, lo8(__data_start)
2297 ldi r27, hi8(__data_start)
2298 ldi r30, lo8(__data_load_start)
2299 ldi r31, hi8(__data_load_start)
2300 rjmp .L__do_copy_data_start
2301 .L__do_copy_data_loop:
2302 #if defined (__AVR_HAVE_LPMX__)
2303 lpm r0, Z+
2304 #else
2305 lpm
2306 adiw r30, 1
2307 #endif
2308 st X+, r0
2309 .L__do_copy_data_start:
2310 cpi r26, lo8(__data_end)
2311 cpc r27, r17
2312 brne .L__do_copy_data_loop
2313 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2314 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2315 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2316 out __RAMPZ__, __zero_reg__
2317 #endif /* ELPM && RAMPD */
2318 ENDF __do_copy_data
2319 #endif /* L_copy_data */
2320
2321 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2322
2323 #ifdef L_clear_bss
2324 .section .init4,"ax",@progbits
2325 DEFUN __do_clear_bss
2326 ldi r17, hi8(__bss_end)
2327 ldi r26, lo8(__bss_start)
2328 ldi r27, hi8(__bss_start)
2329 rjmp .do_clear_bss_start
2330 .do_clear_bss_loop:
2331 st X+, __zero_reg__
2332 .do_clear_bss_start:
2333 cpi r26, lo8(__bss_end)
2334 cpc r27, r17
2335 brne .do_clear_bss_loop
2336 ENDF __do_clear_bss
2337 #endif /* L_clear_bss */
2338
2339 /* __do_global_ctors and __do_global_dtors are only necessary
2340 if there are any constructors/destructors. */
2341
2342 #ifdef L_ctors
2343 .section .init6,"ax",@progbits
2344 DEFUN __do_global_ctors
2345 #if defined(__AVR_HAVE_ELPM__)
2346 ldi r17, hi8(__ctors_start)
2347 ldi r28, lo8(__ctors_end)
2348 ldi r29, hi8(__ctors_end)
2349 ldi r16, hh8(__ctors_end)
2350 rjmp .L__do_global_ctors_start
2351 .L__do_global_ctors_loop:
2352 sbiw r28, 2
2353 sbc r16, __zero_reg__
2354 mov_h r31, r29
2355 mov_l r30, r28
2356 out __RAMPZ__, r16
2357 XCALL __tablejump_elpm__
2358 .L__do_global_ctors_start:
2359 cpi r28, lo8(__ctors_start)
2360 cpc r29, r17
2361 ldi r24, hh8(__ctors_start)
2362 cpc r16, r24
2363 brne .L__do_global_ctors_loop
2364 #else
2365 ldi r17, hi8(__ctors_start)
2366 ldi r28, lo8(__ctors_end)
2367 ldi r29, hi8(__ctors_end)
2368 rjmp .L__do_global_ctors_start
2369 .L__do_global_ctors_loop:
2370 sbiw r28, 2
2371 mov_h r31, r29
2372 mov_l r30, r28
2373 XCALL __tablejump__
2374 .L__do_global_ctors_start:
2375 cpi r28, lo8(__ctors_start)
2376 cpc r29, r17
2377 brne .L__do_global_ctors_loop
2378 #endif /* defined(__AVR_HAVE_ELPM__) */
2379 ENDF __do_global_ctors
2380 #endif /* L_ctors */
2381
2382 #ifdef L_dtors
2383 .section .fini6,"ax",@progbits
2384 DEFUN __do_global_dtors
2385 #if defined(__AVR_HAVE_ELPM__)
2386 ldi r17, hi8(__dtors_end)
2387 ldi r28, lo8(__dtors_start)
2388 ldi r29, hi8(__dtors_start)
2389 ldi r16, hh8(__dtors_start)
2390 rjmp .L__do_global_dtors_start
2391 .L__do_global_dtors_loop:
2392 sbiw r28, 2
2393 sbc r16, __zero_reg__
2394 mov_h r31, r29
2395 mov_l r30, r28
2396 out __RAMPZ__, r16
2397 XCALL __tablejump_elpm__
2398 .L__do_global_dtors_start:
2399 cpi r28, lo8(__dtors_end)
2400 cpc r29, r17
2401 ldi r24, hh8(__dtors_end)
2402 cpc r16, r24
2403 brne .L__do_global_dtors_loop
2404 #else
2405 ldi r17, hi8(__dtors_end)
2406 ldi r28, lo8(__dtors_start)
2407 ldi r29, hi8(__dtors_start)
2408 rjmp .L__do_global_dtors_start
2409 .L__do_global_dtors_loop:
2410 mov_h r31, r29
2411 mov_l r30, r28
2412 XCALL __tablejump__
2413 adiw r28, 2
2414 .L__do_global_dtors_start:
2415 cpi r28, lo8(__dtors_end)
2416 cpc r29, r17
2417 brne .L__do_global_dtors_loop
2418 #endif /* defined(__AVR_HAVE_ELPM__) */
2419 ENDF __do_global_dtors
2420 #endif /* L_dtors */
2421
2422 .section .text.libgcc, "ax", @progbits
2423
2424 #ifdef L_tablejump_elpm
2425 DEFUN __tablejump_elpm__
2426 #if defined (__AVR_HAVE_ELPMX__)
2427 elpm __tmp_reg__, Z+
2428 elpm r31, Z
2429 mov r30, __tmp_reg__
2430 #if defined (__AVR_HAVE_RAMPD__)
2431 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2432 out __RAMPZ__, __zero_reg__
2433 #endif /* RAMPD */
2434 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2435 eijmp
2436 #else
2437 ijmp
2438 #endif
2439
2440 #elif defined (__AVR_HAVE_ELPM__)
2441 elpm
2442 adiw r30, 1
2443 push r0
2444 elpm
2445 push r0
2446 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2447 in __tmp_reg__, __EIND__
2448 push __tmp_reg__
2449 #endif
2450 ret
2451 #endif
2452 ENDF __tablejump_elpm__
2453 #endif /* defined (L_tablejump_elpm) */
2454
2455 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2456 ;; Loading n bytes from Flash; n = 3,4
2457 ;; R22... = Flash[Z]
2458 ;; Clobbers: __tmp_reg__
2459
2460 #if (defined (L_load_3) \
2461 || defined (L_load_4)) \
2462 && !defined (__AVR_HAVE_LPMX__)
2463
2464 ;; Destination
2465 #define D0 22
2466 #define D1 D0+1
2467 #define D2 D0+2
2468 #define D3 D0+3
2469
2470 .macro .load dest, n
2471 lpm
2472 mov \dest, r0
2473 .if \dest != D0+\n-1
2474 adiw r30, 1
2475 .else
2476 sbiw r30, \n-1
2477 .endif
2478 .endm
2479
2480 #if defined (L_load_3)
2481 DEFUN __load_3
2482 push D3
2483 XCALL __load_4
2484 pop D3
2485 ret
2486 ENDF __load_3
2487 #endif /* L_load_3 */
2488
2489 #if defined (L_load_4)
2490 DEFUN __load_4
2491 .load D0, 4
2492 .load D1, 4
2493 .load D2, 4
2494 .load D3, 4
2495 ret
2496 ENDF __load_4
2497 #endif /* L_load_4 */
2498
2499 #endif /* L_load_3 || L_load_3 */
2500
2501 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2502 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2503 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2504 ;; Clobbers: __tmp_reg__, R21, R30, R31
2505
2506 #if (defined (L_xload_1) \
2507 || defined (L_xload_2) \
2508 || defined (L_xload_3) \
2509 || defined (L_xload_4))
2510
2511 ;; Destination
2512 #define D0 22
2513 #define D1 D0+1
2514 #define D2 D0+2
2515 #define D3 D0+3
2516
2517 ;; Register containing bits 16+ of the address
2518
2519 #define HHI8 21
2520
2521 .macro .xload dest, n
2522 #if defined (__AVR_HAVE_ELPMX__)
2523 elpm \dest, Z+
2524 #elif defined (__AVR_HAVE_ELPM__)
2525 elpm
2526 mov \dest, r0
2527 .if \dest != D0+\n-1
2528 adiw r30, 1
2529 adc HHI8, __zero_reg__
2530 out __RAMPZ__, HHI8
2531 .endif
2532 #elif defined (__AVR_HAVE_LPMX__)
2533 lpm \dest, Z+
2534 #else
2535 lpm
2536 mov \dest, r0
2537 .if \dest != D0+\n-1
2538 adiw r30, 1
2539 .endif
2540 #endif
2541 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2542 .if \dest == D0+\n-1
2543 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2544 out __RAMPZ__, __zero_reg__
2545 .endif
2546 #endif
2547 .endm ; .xload
2548
2549 #if defined (L_xload_1)
2550 DEFUN __xload_1
2551 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2552 sbrc HHI8, 7
2553 ld D0, Z
2554 sbrs HHI8, 7
2555 lpm D0, Z
2556 ret
2557 #else
2558 sbrc HHI8, 7
2559 rjmp 1f
2560 #if defined (__AVR_HAVE_ELPM__)
2561 out __RAMPZ__, HHI8
2562 #endif /* __AVR_HAVE_ELPM__ */
2563 .xload D0, 1
2564 ret
2565 1: ld D0, Z
2566 ret
2567 #endif /* LPMx && ! ELPM */
2568 ENDF __xload_1
2569 #endif /* L_xload_1 */
2570
2571 #if defined (L_xload_2)
2572 DEFUN __xload_2
2573 sbrc HHI8, 7
2574 rjmp 1f
2575 #if defined (__AVR_HAVE_ELPM__)
2576 out __RAMPZ__, HHI8
2577 #endif /* __AVR_HAVE_ELPM__ */
2578 .xload D0, 2
2579 .xload D1, 2
2580 ret
2581 1: ld D0, Z+
2582 ld D1, Z+
2583 ret
2584 ENDF __xload_2
2585 #endif /* L_xload_2 */
2586
2587 #if defined (L_xload_3)
2588 DEFUN __xload_3
2589 sbrc HHI8, 7
2590 rjmp 1f
2591 #if defined (__AVR_HAVE_ELPM__)
2592 out __RAMPZ__, HHI8
2593 #endif /* __AVR_HAVE_ELPM__ */
2594 .xload D0, 3
2595 .xload D1, 3
2596 .xload D2, 3
2597 ret
2598 1: ld D0, Z+
2599 ld D1, Z+
2600 ld D2, Z+
2601 ret
2602 ENDF __xload_3
2603 #endif /* L_xload_3 */
2604
2605 #if defined (L_xload_4)
2606 DEFUN __xload_4
2607 sbrc HHI8, 7
2608 rjmp 1f
2609 #if defined (__AVR_HAVE_ELPM__)
2610 out __RAMPZ__, HHI8
2611 #endif /* __AVR_HAVE_ELPM__ */
2612 .xload D0, 4
2613 .xload D1, 4
2614 .xload D2, 4
2615 .xload D3, 4
2616 ret
2617 1: ld D0, Z+
2618 ld D1, Z+
2619 ld D2, Z+
2620 ld D3, Z+
2621 ret
2622 ENDF __xload_4
2623 #endif /* L_xload_4 */
2624
2625 #endif /* L_xload_{1|2|3|4} */
2626
2627 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2628 ;; memcopy from Address Space __pgmx to RAM
2629 ;; R23:Z = Source Address
2630 ;; X = Destination Address
2631 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2632
2633 #if defined (L_movmemx)
2634
2635 #define HHI8 23
2636 #define LOOP 24
2637
2638 DEFUN __movmemx_qi
2639 ;; #Bytes to copy fity in 8 Bits (1..255)
2640 ;; Zero-extend Loop Counter
2641 clr LOOP+1
2642 ;; FALLTHRU
2643 ENDF __movmemx_qi
2644
2645 DEFUN __movmemx_hi
2646
2647 ;; Read from where?
2648 sbrc HHI8, 7
2649 rjmp 1f
2650
2651 ;; Read from Flash
2652
2653 #if defined (__AVR_HAVE_ELPM__)
2654 out __RAMPZ__, HHI8
2655 #endif
2656
2657 0: ;; Load 1 Byte from Flash...
2658
2659 #if defined (__AVR_HAVE_ELPMX__)
2660 elpm r0, Z+
2661 #elif defined (__AVR_HAVE_ELPM__)
2662 elpm
2663 adiw r30, 1
2664 adc HHI8, __zero_reg__
2665 out __RAMPZ__, HHI8
2666 #elif defined (__AVR_HAVE_LPMX__)
2667 lpm r0, Z+
2668 #else
2669 lpm
2670 adiw r30, 1
2671 #endif
2672
2673 ;; ...and store that Byte to RAM Destination
2674 st X+, r0
2675 sbiw LOOP, 1
2676 brne 0b
2677 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2678 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2679 out __RAMPZ__, __zero_reg__
2680 #endif /* ELPM && RAMPD */
2681 ret
2682
2683 ;; Read from RAM
2684
2685 1: ;; Read 1 Byte from RAM...
2686 ld r0, Z+
2687 ;; and store that Byte to RAM Destination
2688 st X+, r0
2689 sbiw LOOP, 1
2690 brne 1b
2691 ret
2692 ENDF __movmemx_hi
2693
2694 #undef HHI8
2695 #undef LOOP
2696
2697 #endif /* L_movmemx */
2698
2699
2700 .section .text.libgcc.builtins, "ax", @progbits
2702
2703 /**********************************
2704 * Find first set Bit (ffs)
2705 **********************************/
2706
2707 #if defined (L_ffssi2)
2708 ;; find first set bit
2709 ;; r25:r24 = ffs32 (r25:r22)
2710 ;; clobbers: r22, r26
2711 DEFUN __ffssi2
2712 clr r26
2713 tst r22
2714 brne 1f
2715 subi r26, -8
2716 or r22, r23
2717 brne 1f
2718 subi r26, -8
2719 or r22, r24
2720 brne 1f
2721 subi r26, -8
2722 or r22, r25
2723 brne 1f
2724 ret
2725 1: mov r24, r22
2726 XJMP __loop_ffsqi2
2727 ENDF __ffssi2
2728 #endif /* defined (L_ffssi2) */
2729
2730 #if defined (L_ffshi2)
2731 ;; find first set bit
2732 ;; r25:r24 = ffs16 (r25:r24)
2733 ;; clobbers: r26
2734 DEFUN __ffshi2
2735 clr r26
2736 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2737 ;; Some cores have problem skipping 2-word instruction
2738 tst r24
2739 breq 2f
2740 #else
2741 cpse r24, __zero_reg__
2742 #endif /* __AVR_HAVE_JMP_CALL__ */
2743 1: XJMP __loop_ffsqi2
2744 2: ldi r26, 8
2745 or r24, r25
2746 brne 1b
2747 ret
2748 ENDF __ffshi2
2749 #endif /* defined (L_ffshi2) */
2750
2751 #if defined (L_loop_ffsqi2)
2752 ;; Helper for ffshi2, ffssi2
2753 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2754 ;; r24 must be != 0
2755 ;; clobbers: r26
2756 DEFUN __loop_ffsqi2
2757 inc r26
2758 lsr r24
2759 brcc __loop_ffsqi2
2760 mov r24, r26
2761 clr r25
2762 ret
2763 ENDF __loop_ffsqi2
2764 #endif /* defined (L_loop_ffsqi2) */
2765
2766
2767 /**********************************
2769 * Count trailing Zeros (ctz)
2770 **********************************/
2771
2772 #if defined (L_ctzsi2)
2773 ;; count trailing zeros
2774 ;; r25:r24 = ctz32 (r25:r22)
2775 ;; clobbers: r26, r22
2776 ;; ctz(0) = 255
2777 ;; Note that ctz(0) in undefined for GCC
2778 DEFUN __ctzsi2
2779 XCALL __ffssi2
2780 dec r24
2781 ret
2782 ENDF __ctzsi2
2783 #endif /* defined (L_ctzsi2) */
2784
2785 #if defined (L_ctzhi2)
2786 ;; count trailing zeros
2787 ;; r25:r24 = ctz16 (r25:r24)
2788 ;; clobbers: r26
2789 ;; ctz(0) = 255
2790 ;; Note that ctz(0) in undefined for GCC
2791 DEFUN __ctzhi2
2792 XCALL __ffshi2
2793 dec r24
2794 ret
2795 ENDF __ctzhi2
2796 #endif /* defined (L_ctzhi2) */
2797
2798
2799 /**********************************
2801 * Count leading Zeros (clz)
2802 **********************************/
2803
2804 #if defined (L_clzdi2)
2805 ;; count leading zeros
2806 ;; r25:r24 = clz64 (r25:r18)
2807 ;; clobbers: r22, r23, r26
2808 DEFUN __clzdi2
2809 XCALL __clzsi2
2810 sbrs r24, 5
2811 ret
2812 mov_l r22, r18
2813 mov_h r23, r19
2814 mov_l r24, r20
2815 mov_h r25, r21
2816 XCALL __clzsi2
2817 subi r24, -32
2818 ret
2819 ENDF __clzdi2
2820 #endif /* defined (L_clzdi2) */
2821
2822 #if defined (L_clzsi2)
2823 ;; count leading zeros
2824 ;; r25:r24 = clz32 (r25:r22)
2825 ;; clobbers: r26
2826 DEFUN __clzsi2
2827 XCALL __clzhi2
2828 sbrs r24, 4
2829 ret
2830 mov_l r24, r22
2831 mov_h r25, r23
2832 XCALL __clzhi2
2833 subi r24, -16
2834 ret
2835 ENDF __clzsi2
2836 #endif /* defined (L_clzsi2) */
2837
2838 #if defined (L_clzhi2)
2839 ;; count leading zeros
2840 ;; r25:r24 = clz16 (r25:r24)
2841 ;; clobbers: r26
2842 DEFUN __clzhi2
2843 clr r26
2844 tst r25
2845 brne 1f
2846 subi r26, -8
2847 or r25, r24
2848 brne 1f
2849 ldi r24, 16
2850 ret
2851 1: cpi r25, 16
2852 brsh 3f
2853 subi r26, -3
2854 swap r25
2855 2: inc r26
2856 3: lsl r25
2857 brcc 2b
2858 mov r24, r26
2859 clr r25
2860 ret
2861 ENDF __clzhi2
2862 #endif /* defined (L_clzhi2) */
2863
2864
2865 /**********************************
2867 * Parity
2868 **********************************/
2869
2870 #if defined (L_paritydi2)
2871 ;; r25:r24 = parity64 (r25:r18)
2872 ;; clobbers: __tmp_reg__
2873 DEFUN __paritydi2
2874 eor r24, r18
2875 eor r24, r19
2876 eor r24, r20
2877 eor r24, r21
2878 XJMP __paritysi2
2879 ENDF __paritydi2
2880 #endif /* defined (L_paritydi2) */
2881
2882 #if defined (L_paritysi2)
2883 ;; r25:r24 = parity32 (r25:r22)
2884 ;; clobbers: __tmp_reg__
2885 DEFUN __paritysi2
2886 eor r24, r22
2887 eor r24, r23
2888 XJMP __parityhi2
2889 ENDF __paritysi2
2890 #endif /* defined (L_paritysi2) */
2891
2892 #if defined (L_parityhi2)
2893 ;; r25:r24 = parity16 (r25:r24)
2894 ;; clobbers: __tmp_reg__
2895 DEFUN __parityhi2
2896 eor r24, r25
2897 ;; FALLTHRU
2898 ENDF __parityhi2
2899
2900 ;; r25:r24 = parity8 (r24)
2901 ;; clobbers: __tmp_reg__
2902 DEFUN __parityqi2
2903 ;; parity is in r24[0..7]
2904 mov __tmp_reg__, r24
2905 swap __tmp_reg__
2906 eor r24, __tmp_reg__
2907 ;; parity is in r24[0..3]
2908 subi r24, -4
2909 andi r24, -5
2910 subi r24, -6
2911 ;; parity is in r24[0,3]
2912 sbrc r24, 3
2913 inc r24
2914 ;; parity is in r24[0]
2915 andi r24, 1
2916 clr r25
2917 ret
2918 ENDF __parityqi2
2919 #endif /* defined (L_parityhi2) */
2920
2921
2922 /**********************************
2924 * Population Count
2925 **********************************/
2926
2927 #if defined (L_popcounthi2)
2928 ;; population count
2929 ;; r25:r24 = popcount16 (r25:r24)
2930 ;; clobbers: __tmp_reg__
2931 DEFUN __popcounthi2
2932 XCALL __popcountqi2
2933 push r24
2934 mov r24, r25
2935 XCALL __popcountqi2
2936 clr r25
2937 ;; FALLTHRU
2938 ENDF __popcounthi2
2939
2940 DEFUN __popcounthi2_tail
2941 pop __tmp_reg__
2942 add r24, __tmp_reg__
2943 ret
2944 ENDF __popcounthi2_tail
2945 #endif /* defined (L_popcounthi2) */
2946
2947 #if defined (L_popcountsi2)
2948 ;; population count
2949 ;; r25:r24 = popcount32 (r25:r22)
2950 ;; clobbers: __tmp_reg__
2951 DEFUN __popcountsi2
2952 XCALL __popcounthi2
2953 push r24
2954 mov_l r24, r22
2955 mov_h r25, r23
2956 XCALL __popcounthi2
2957 XJMP __popcounthi2_tail
2958 ENDF __popcountsi2
2959 #endif /* defined (L_popcountsi2) */
2960
2961 #if defined (L_popcountdi2)
2962 ;; population count
2963 ;; r25:r24 = popcount64 (r25:r18)
2964 ;; clobbers: r22, r23, __tmp_reg__
2965 DEFUN __popcountdi2
2966 XCALL __popcountsi2
2967 push r24
2968 mov_l r22, r18
2969 mov_h r23, r19
2970 mov_l r24, r20
2971 mov_h r25, r21
2972 XCALL __popcountsi2
2973 XJMP __popcounthi2_tail
2974 ENDF __popcountdi2
2975 #endif /* defined (L_popcountdi2) */
2976
2977 #if defined (L_popcountqi2)
2978 ;; population count
2979 ;; r24 = popcount8 (r24)
2980 ;; clobbers: __tmp_reg__
2981 DEFUN __popcountqi2
2982 mov __tmp_reg__, r24
2983 andi r24, 1
2984 lsr __tmp_reg__
2985 lsr __tmp_reg__
2986 adc r24, __zero_reg__
2987 lsr __tmp_reg__
2988 adc r24, __zero_reg__
2989 lsr __tmp_reg__
2990 adc r24, __zero_reg__
2991 lsr __tmp_reg__
2992 adc r24, __zero_reg__
2993 lsr __tmp_reg__
2994 adc r24, __zero_reg__
2995 lsr __tmp_reg__
2996 adc r24, __tmp_reg__
2997 ret
2998 ENDF __popcountqi2
2999 #endif /* defined (L_popcountqi2) */
3000
3001
3002 /**********************************
3004 * Swap bytes
3005 **********************************/
3006
3007 ;; swap two registers with different register number
3008 .macro bswap a, b
3009 eor \a, \b
3010 eor \b, \a
3011 eor \a, \b
3012 .endm
3013
3014 #if defined (L_bswapsi2)
3015 ;; swap bytes
3016 ;; r25:r22 = bswap32 (r25:r22)
3017 DEFUN __bswapsi2
3018 bswap r22, r25
3019 bswap r23, r24
3020 ret
3021 ENDF __bswapsi2
3022 #endif /* defined (L_bswapsi2) */
3023
3024 #if defined (L_bswapdi2)
3025 ;; swap bytes
3026 ;; r25:r18 = bswap64 (r25:r18)
3027 DEFUN __bswapdi2
3028 bswap r18, r25
3029 bswap r19, r24
3030 bswap r20, r23
3031 bswap r21, r22
3032 ret
3033 ENDF __bswapdi2
3034 #endif /* defined (L_bswapdi2) */
3035
3036
3037 /**********************************
3039 * 64-bit shifts
3040 **********************************/
3041
3042 #if defined (L_ashrdi3)
3043 ;; Arithmetic shift right
3044 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3045 DEFUN __ashrdi3
3046 bst r25, 7
3047 bld __zero_reg__, 0
3048 ;; FALLTHRU
3049 ENDF __ashrdi3
3050
3051 ;; Logic shift right
3052 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3053 DEFUN __lshrdi3
3054 lsr __zero_reg__
3055 sbc __tmp_reg__, __tmp_reg__
3056 push r16
3057 0: cpi r16, 8
3058 brlo 2f
3059 subi r16, 8
3060 mov r18, r19
3061 mov r19, r20
3062 mov r20, r21
3063 mov r21, r22
3064 mov r22, r23
3065 mov r23, r24
3066 mov r24, r25
3067 mov r25, __tmp_reg__
3068 rjmp 0b
3069 1: asr __tmp_reg__
3070 ror r25
3071 ror r24
3072 ror r23
3073 ror r22
3074 ror r21
3075 ror r20
3076 ror r19
3077 ror r18
3078 2: dec r16
3079 brpl 1b
3080 pop r16
3081 ret
3082 ENDF __lshrdi3
3083 #endif /* defined (L_ashrdi3) */
3084
3085 #if defined (L_ashldi3)
3086 ;; Shift left
3087 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3088 DEFUN __ashldi3
3089 push r16
3090 0: cpi r16, 8
3091 brlo 2f
3092 mov r25, r24
3093 mov r24, r23
3094 mov r23, r22
3095 mov r22, r21
3096 mov r21, r20
3097 mov r20, r19
3098 mov r19, r18
3099 clr r18
3100 subi r16, 8
3101 rjmp 0b
3102 1: lsl r18
3103 rol r19
3104 rol r20
3105 rol r21
3106 rol r22
3107 rol r23
3108 rol r24
3109 rol r25
3110 2: dec r16
3111 brpl 1b
3112 pop r16
3113 ret
3114 ENDF __ashldi3
3115 #endif /* defined (L_ashldi3) */
3116
3117 #if defined (L_rotldi3)
3118 ;; Shift left
3119 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3120 DEFUN __rotldi3
3121 push r16
3122 0: cpi r16, 8
3123 brlo 2f
3124 subi r16, 8
3125 mov __tmp_reg__, r25
3126 mov r25, r24
3127 mov r24, r23
3128 mov r23, r22
3129 mov r22, r21
3130 mov r21, r20
3131 mov r20, r19
3132 mov r19, r18
3133 mov r18, __tmp_reg__
3134 rjmp 0b
3135 1: lsl r18
3136 rol r19
3137 rol r20
3138 rol r21
3139 rol r22
3140 rol r23
3141 rol r24
3142 rol r25
3143 adc r18, __zero_reg__
3144 2: dec r16
3145 brpl 1b
3146 pop r16
3147 ret
3148 ENDF __rotldi3
3149 #endif /* defined (L_rotldi3) */
3150
3151
3152 .section .text.libgcc.fmul, "ax", @progbits
3154
3155 /***********************************************************/
3156 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3157 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3158 /***********************************************************/
3159
3160 #define A1 24
3161 #define B1 25
3162 #define C0 22
3163 #define C1 23
3164 #define A0 __tmp_reg__
3165
3166 #ifdef L_fmuls
3167 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3168 ;;; Clobbers: r24, r25, __tmp_reg__
3169 DEFUN __fmuls
3170 ;; A0.7 = negate result?
3171 mov A0, A1
3172 eor A0, B1
3173 ;; B1 = |B1|
3174 sbrc B1, 7
3175 neg B1
3176 XJMP __fmulsu_exit
3177 ENDF __fmuls
3178 #endif /* L_fmuls */
3179
3180 #ifdef L_fmulsu
3181 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3182 ;;; Clobbers: r24, r25, __tmp_reg__
3183 DEFUN __fmulsu
3184 ;; A0.7 = negate result?
3185 mov A0, A1
3186 ;; FALLTHRU
3187 ENDF __fmulsu
3188
3189 ;; Helper for __fmuls and __fmulsu
3190 DEFUN __fmulsu_exit
3191 ;; A1 = |A1|
3192 sbrc A1, 7
3193 neg A1
3194 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3195 ;; Some cores have problem skipping 2-word instruction
3196 tst A0
3197 brmi 1f
3198 #else
3199 sbrs A0, 7
3200 #endif /* __AVR_HAVE_JMP_CALL__ */
3201 XJMP __fmul
3202 1: XCALL __fmul
3203 ;; C = -C iff A0.7 = 1
3204 NEG2 C0
3205 ret
3206 ENDF __fmulsu_exit
3207 #endif /* L_fmulsu */
3208
3209
3210 #ifdef L_fmul
3211 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3212 ;;; Clobbers: r24, r25, __tmp_reg__
3213 DEFUN __fmul
3214 ; clear result
3215 clr C0
3216 clr C1
3217 clr A0
3218 1: tst B1
3219 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3220 2: brpl 3f
3221 ;; C += A
3222 add C0, A0
3223 adc C1, A1
3224 3: ;; A >>= 1
3225 lsr A1
3226 ror A0
3227 ;; B <<= 1
3228 lsl B1
3229 brne 2b
3230 ret
3231 ENDF __fmul
3232 #endif /* L_fmul */
3233
3234 #undef A0
3235 #undef A1
3236 #undef B1
3237 #undef C0
3238 #undef C1
3239
3240 #include "lib1funcs-fixed.S"
3241