lib1funcs.S revision 1.1.1.3 1 1.1 mrg /* -*- Mode: Asm -*- */
2 1.1.1.3 mrg /* Copyright (C) 1998-2016 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Denis Chertykov <chertykov (at) gmail.com>
4 1.1 mrg
5 1.1 mrg This file is free software; you can redistribute it and/or modify it
6 1.1 mrg under the terms of the GNU General Public License as published by the
7 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
8 1.1 mrg later version.
9 1.1 mrg
10 1.1 mrg This file is distributed in the hope that it will be useful, but
11 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 1.1 mrg General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1.1.2 mrg #if defined (__AVR_TINY__)
25 1.1.1.2 mrg #define __zero_reg__ r17
26 1.1.1.2 mrg #define __tmp_reg__ r16
27 1.1.1.2 mrg #else
28 1.1 mrg #define __zero_reg__ r1
29 1.1 mrg #define __tmp_reg__ r0
30 1.1.1.2 mrg #endif
31 1.1 mrg #define __SREG__ 0x3f
32 1.1 mrg #if defined (__AVR_HAVE_SPH__)
33 1.1 mrg #define __SP_H__ 0x3e
34 1.1 mrg #endif
35 1.1 mrg #define __SP_L__ 0x3d
36 1.1 mrg #define __RAMPZ__ 0x3B
37 1.1 mrg #define __EIND__ 0x3C
38 1.1 mrg
39 1.1 mrg /* Most of the functions here are called directly from avr.md
40 1.1 mrg patterns, instead of using the standard libcall mechanisms.
41 1.1 mrg This can make better code because GCC knows exactly which
42 1.1 mrg of the call-used registers (not all of them) are clobbered. */
43 1.1 mrg
44 1.1 mrg /* FIXME: At present, there is no SORT directive in the linker
45 1.1 mrg script so that we must not assume that different modules
46 1.1 mrg in the same input section like .libgcc.text.mul will be
47 1.1 mrg located close together. Therefore, we cannot use
48 1.1 mrg RCALL/RJMP to call a function like __udivmodhi4 from
49 1.1 mrg __divmodhi4 and have to use lengthy XCALL/XJMP even
50 1.1 mrg though they are in the same input section and all same
51 1.1 mrg input sections together are small enough to reach every
52 1.1 mrg location with a RCALL/RJMP instruction. */
53 1.1 mrg
54 1.1.1.2 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 1.1.1.2 mrg #error device not supported
56 1.1.1.2 mrg #endif
57 1.1.1.2 mrg
58 1.1 mrg .macro mov_l r_dest, r_src
59 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
60 1.1 mrg movw \r_dest, \r_src
61 1.1 mrg #else
62 1.1 mrg mov \r_dest, \r_src
63 1.1 mrg #endif
64 1.1 mrg .endm
65 1.1 mrg
66 1.1 mrg .macro mov_h r_dest, r_src
67 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
68 1.1 mrg ; empty
69 1.1 mrg #else
70 1.1 mrg mov \r_dest, \r_src
71 1.1 mrg #endif
72 1.1 mrg .endm
73 1.1 mrg
74 1.1 mrg .macro wmov r_dest, r_src
75 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
76 1.1 mrg movw \r_dest, \r_src
77 1.1 mrg #else
78 1.1 mrg mov \r_dest, \r_src
79 1.1 mrg mov \r_dest+1, \r_src+1
80 1.1 mrg #endif
81 1.1 mrg .endm
82 1.1 mrg
83 1.1 mrg #if defined (__AVR_HAVE_JMP_CALL__)
84 1.1 mrg #define XCALL call
85 1.1 mrg #define XJMP jmp
86 1.1 mrg #else
87 1.1 mrg #define XCALL rcall
88 1.1 mrg #define XJMP rjmp
89 1.1 mrg #endif
90 1.1 mrg
91 1.1.1.2 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
92 1.1.1.2 mrg #define XICALL eicall
93 1.1.1.2 mrg #define XIJMP eijmp
94 1.1.1.2 mrg #else
95 1.1.1.2 mrg #define XICALL icall
96 1.1.1.2 mrg #define XIJMP ijmp
97 1.1.1.2 mrg #endif
98 1.1.1.2 mrg
99 1.1 mrg ;; Prologue stuff
100 1.1 mrg
101 1.1 mrg .macro do_prologue_saves n_pushed n_frame=0
102 1.1 mrg ldi r26, lo8(\n_frame)
103 1.1 mrg ldi r27, hi8(\n_frame)
104 1.1 mrg ldi r30, lo8(gs(.L_prologue_saves.\@))
105 1.1 mrg ldi r31, hi8(gs(.L_prologue_saves.\@))
106 1.1 mrg XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 1.1 mrg .L_prologue_saves.\@:
108 1.1 mrg .endm
109 1.1 mrg
110 1.1 mrg ;; Epilogue stuff
111 1.1 mrg
112 1.1 mrg .macro do_epilogue_restores n_pushed n_frame=0
113 1.1 mrg in r28, __SP_L__
114 1.1 mrg #ifdef __AVR_HAVE_SPH__
115 1.1 mrg in r29, __SP_H__
116 1.1 mrg .if \n_frame > 63
117 1.1 mrg subi r28, lo8(-\n_frame)
118 1.1 mrg sbci r29, hi8(-\n_frame)
119 1.1 mrg .elseif \n_frame > 0
120 1.1 mrg adiw r28, \n_frame
121 1.1 mrg .endif
122 1.1 mrg #else
123 1.1 mrg clr r29
124 1.1 mrg .if \n_frame > 0
125 1.1 mrg subi r28, lo8(-\n_frame)
126 1.1 mrg .endif
127 1.1 mrg #endif /* HAVE SPH */
128 1.1 mrg ldi r30, \n_pushed
129 1.1 mrg XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130 1.1 mrg .endm
131 1.1 mrg
132 1.1 mrg ;; Support function entry and exit for convenience
133 1.1 mrg
134 1.1.1.2 mrg .macro wsubi r_arg1, i_arg2
135 1.1.1.2 mrg #if defined (__AVR_TINY__)
136 1.1.1.2 mrg subi \r_arg1, lo8(\i_arg2)
137 1.1.1.2 mrg sbci \r_arg1+1, hi8(\i_arg2)
138 1.1.1.2 mrg #else
139 1.1.1.2 mrg sbiw \r_arg1, \i_arg2
140 1.1.1.2 mrg #endif
141 1.1.1.2 mrg .endm
142 1.1.1.2 mrg
143 1.1.1.2 mrg .macro waddi r_arg1, i_arg2
144 1.1.1.2 mrg #if defined (__AVR_TINY__)
145 1.1.1.2 mrg subi \r_arg1, lo8(-\i_arg2)
146 1.1.1.2 mrg sbci \r_arg1+1, hi8(-\i_arg2)
147 1.1.1.2 mrg #else
148 1.1.1.2 mrg adiw \r_arg1, \i_arg2
149 1.1.1.2 mrg #endif
150 1.1.1.2 mrg .endm
151 1.1.1.2 mrg
152 1.1 mrg .macro DEFUN name
153 1.1 mrg .global \name
154 1.1 mrg .func \name
155 1.1 mrg \name:
156 1.1 mrg .endm
157 1.1 mrg
158 1.1 mrg .macro ENDF name
159 1.1 mrg .size \name, .-\name
160 1.1 mrg .endfunc
161 1.1 mrg .endm
162 1.1 mrg
163 1.1 mrg .macro FALIAS name
164 1.1 mrg .global \name
165 1.1 mrg .func \name
166 1.1 mrg \name:
167 1.1 mrg .size \name, .-\name
168 1.1 mrg .endfunc
169 1.1 mrg .endm
170 1.1 mrg
171 1.1 mrg ;; Skip next instruction, typically a jump target
172 1.1.1.2 mrg #if defined(__AVR_TINY__)
173 1.1 mrg #define skip cpse 0,0
174 1.1.1.2 mrg #else
175 1.1.1.2 mrg #define skip cpse 16,16
176 1.1.1.2 mrg #endif
177 1.1 mrg
178 1.1 mrg ;; Negate a 2-byte value held in consecutive registers
179 1.1 mrg .macro NEG2 reg
180 1.1 mrg com \reg+1
181 1.1 mrg neg \reg
182 1.1 mrg sbci \reg+1, -1
183 1.1 mrg .endm
184 1.1 mrg
185 1.1 mrg ;; Negate a 4-byte value held in consecutive registers
186 1.1 mrg ;; Sets the V flag for signed overflow tests if REG >= 16
187 1.1 mrg .macro NEG4 reg
188 1.1 mrg com \reg+3
189 1.1 mrg com \reg+2
190 1.1 mrg com \reg+1
191 1.1 mrg .if \reg >= 16
192 1.1 mrg neg \reg
193 1.1 mrg sbci \reg+1, -1
194 1.1 mrg sbci \reg+2, -1
195 1.1 mrg sbci \reg+3, -1
196 1.1 mrg .else
197 1.1 mrg com \reg
198 1.1 mrg adc \reg, __zero_reg__
199 1.1 mrg adc \reg+1, __zero_reg__
200 1.1 mrg adc \reg+2, __zero_reg__
201 1.1 mrg adc \reg+3, __zero_reg__
202 1.1 mrg .endif
203 1.1 mrg .endm
204 1.1 mrg
205 1.1 mrg #define exp_lo(N) hlo8 ((N) << 23)
206 1.1 mrg #define exp_hi(N) hhi8 ((N) << 23)
207 1.1 mrg
208 1.1 mrg
209 1.1 mrg .section .text.libgcc.mul, "ax", @progbits
211 1.1 mrg
212 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
213 1.1 mrg /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
214 1.1 mrg #if !defined (__AVR_HAVE_MUL__)
215 1.1 mrg /*******************************************************
216 1.1 mrg Multiplication 8 x 8 without MUL
217 1.1 mrg *******************************************************/
218 1.1 mrg #if defined (L_mulqi3)
219 1.1 mrg
220 1.1 mrg #define r_arg2 r22 /* multiplicand */
221 1.1 mrg #define r_arg1 r24 /* multiplier */
222 1.1 mrg #define r_res __tmp_reg__ /* result */
223 1.1 mrg
224 1.1 mrg DEFUN __mulqi3
225 1.1 mrg clr r_res ; clear result
226 1.1 mrg __mulqi3_loop:
227 1.1 mrg sbrc r_arg1,0
228 1.1 mrg add r_res,r_arg2
229 1.1 mrg add r_arg2,r_arg2 ; shift multiplicand
230 1.1 mrg breq __mulqi3_exit ; while multiplicand != 0
231 1.1 mrg lsr r_arg1 ;
232 1.1 mrg brne __mulqi3_loop ; exit if multiplier = 0
233 1.1 mrg __mulqi3_exit:
234 1.1 mrg mov r_arg1,r_res ; result to return register
235 1.1 mrg ret
236 1.1 mrg ENDF __mulqi3
237 1.1 mrg
238 1.1 mrg #undef r_arg2
239 1.1 mrg #undef r_arg1
240 1.1 mrg #undef r_res
241 1.1 mrg
242 1.1 mrg #endif /* defined (L_mulqi3) */
243 1.1 mrg
244 1.1 mrg
245 1.1 mrg /*******************************************************
246 1.1 mrg Widening Multiplication 16 = 8 x 8 without MUL
247 1.1 mrg Multiplication 16 x 16 without MUL
248 1.1 mrg *******************************************************/
249 1.1.1.2 mrg
250 1.1.1.2 mrg #define A0 22
251 1.1.1.2 mrg #define A1 23
252 1.1.1.2 mrg #define B0 24
253 1.1.1.2 mrg #define BB0 20
254 1.1 mrg #define B1 25
255 1.1.1.2 mrg ;; Output overlaps input, thus expand result in CC0/1
256 1.1.1.2 mrg #define C0 24
257 1.1 mrg #define C1 25
258 1.1.1.2 mrg #define CC0 __tmp_reg__
259 1.1 mrg #define CC1 21
260 1.1 mrg
261 1.1 mrg #if defined (L_umulqihi3)
262 1.1 mrg ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
263 1.1 mrg ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
264 1.1 mrg ;;; Clobbers: __tmp_reg__, R21..R23
265 1.1 mrg DEFUN __umulqihi3
266 1.1 mrg clr A1
267 1.1 mrg clr B1
268 1.1 mrg XJMP __mulhi3
269 1.1 mrg ENDF __umulqihi3
270 1.1 mrg #endif /* L_umulqihi3 */
271 1.1 mrg
272 1.1 mrg #if defined (L_mulqihi3)
273 1.1 mrg ;;; R25:R24 = (signed int) R22 * (signed int) R24
274 1.1 mrg ;;; (C1:C0) = (signed int) A0 * (signed int) B0
275 1.1 mrg ;;; Clobbers: __tmp_reg__, R20..R23
276 1.1 mrg DEFUN __mulqihi3
277 1.1 mrg ;; Sign-extend B0
278 1.1 mrg clr B1
279 1.1 mrg sbrc B0, 7
280 1.1 mrg com B1
281 1.1 mrg ;; The multiplication runs twice as fast if A1 is zero, thus:
282 1.1 mrg ;; Zero-extend A0
283 1.1 mrg clr A1
284 1.1 mrg #ifdef __AVR_HAVE_JMP_CALL__
285 1.1 mrg ;; Store B0 * sign of A
286 1.1 mrg clr BB0
287 1.1 mrg sbrc A0, 7
288 1.1 mrg mov BB0, B0
289 1.1 mrg call __mulhi3
290 1.1 mrg #else /* have no CALL */
291 1.1 mrg ;; Skip sign-extension of A if A >= 0
292 1.1 mrg ;; Same size as with the first alternative but avoids errata skip
293 1.1 mrg ;; and is faster if A >= 0
294 1.1 mrg sbrs A0, 7
295 1.1 mrg rjmp __mulhi3
296 1.1 mrg ;; If A < 0 store B
297 1.1 mrg mov BB0, B0
298 1.1 mrg rcall __mulhi3
299 1.1 mrg #endif /* HAVE_JMP_CALL */
300 1.1 mrg ;; 1-extend A after the multiplication
301 1.1 mrg sub C1, BB0
302 1.1 mrg ret
303 1.1 mrg ENDF __mulqihi3
304 1.1 mrg #endif /* L_mulqihi3 */
305 1.1 mrg
306 1.1 mrg #if defined (L_mulhi3)
307 1.1 mrg ;;; R25:R24 = R23:R22 * R25:R24
308 1.1 mrg ;;; (C1:C0) = (A1:A0) * (B1:B0)
309 1.1 mrg ;;; Clobbers: __tmp_reg__, R21..R23
310 1.1 mrg DEFUN __mulhi3
311 1.1 mrg
312 1.1 mrg ;; Clear result
313 1.1 mrg clr CC0
314 1.1 mrg clr CC1
315 1.1 mrg rjmp 3f
316 1.1 mrg 1:
317 1.1 mrg ;; Bit n of A is 1 --> C += B << n
318 1.1 mrg add CC0, B0
319 1.1 mrg adc CC1, B1
320 1.1 mrg 2:
321 1.1 mrg lsl B0
322 1.1 mrg rol B1
323 1.1 mrg 3:
324 1.1.1.2 mrg ;; If B == 0 we are ready
325 1.1 mrg wsubi B0, 0
326 1.1 mrg breq 9f
327 1.1 mrg
328 1.1 mrg ;; Carry = n-th bit of A
329 1.1 mrg lsr A1
330 1.1 mrg ror A0
331 1.1 mrg ;; If bit n of A is set, then go add B * 2^n to C
332 1.1 mrg brcs 1b
333 1.1 mrg
334 1.1 mrg ;; Carry = 0 --> The ROR above acts like CP A0, 0
335 1.1 mrg ;; Thus, it is sufficient to CPC the high part to test A against 0
336 1.1 mrg cpc A1, __zero_reg__
337 1.1 mrg ;; Only proceed if A != 0
338 1.1 mrg brne 2b
339 1.1 mrg 9:
340 1.1 mrg ;; Move Result into place
341 1.1 mrg mov C0, CC0
342 1.1 mrg mov C1, CC1
343 1.1 mrg ret
344 1.1 mrg ENDF __mulhi3
345 1.1 mrg #endif /* L_mulhi3 */
346 1.1 mrg
347 1.1 mrg #undef A0
348 1.1 mrg #undef A1
349 1.1 mrg #undef B0
350 1.1 mrg #undef BB0
351 1.1 mrg #undef B1
352 1.1 mrg #undef C0
353 1.1 mrg #undef C1
354 1.1 mrg #undef CC0
355 1.1 mrg #undef CC1
356 1.1 mrg
357 1.1 mrg
358 1.1 mrg #define A0 22
360 1.1 mrg #define A1 A0+1
361 1.1 mrg #define A2 A0+2
362 1.1 mrg #define A3 A0+3
363 1.1 mrg
364 1.1 mrg #define B0 18
365 1.1 mrg #define B1 B0+1
366 1.1 mrg #define B2 B0+2
367 1.1 mrg #define B3 B0+3
368 1.1 mrg
369 1.1 mrg #define CC0 26
370 1.1 mrg #define CC1 CC0+1
371 1.1 mrg #define CC2 30
372 1.1 mrg #define CC3 CC2+1
373 1.1 mrg
374 1.1 mrg #define C0 22
375 1.1 mrg #define C1 C0+1
376 1.1 mrg #define C2 C0+2
377 1.1 mrg #define C3 C0+3
378 1.1 mrg
379 1.1 mrg /*******************************************************
380 1.1 mrg Widening Multiplication 32 = 16 x 16 without MUL
381 1.1 mrg *******************************************************/
382 1.1 mrg
383 1.1 mrg #if defined (L_umulhisi3)
384 1.1 mrg DEFUN __umulhisi3
385 1.1 mrg wmov B0, 24
386 1.1 mrg ;; Zero-extend B
387 1.1 mrg clr B2
388 1.1 mrg clr B3
389 1.1 mrg ;; Zero-extend A
390 1.1 mrg wmov A2, B2
391 1.1 mrg XJMP __mulsi3
392 1.1 mrg ENDF __umulhisi3
393 1.1 mrg #endif /* L_umulhisi3 */
394 1.1 mrg
395 1.1 mrg #if defined (L_mulhisi3)
396 1.1 mrg DEFUN __mulhisi3
397 1.1 mrg wmov B0, 24
398 1.1 mrg ;; Sign-extend B
399 1.1 mrg lsl r25
400 1.1 mrg sbc B2, B2
401 1.1 mrg mov B3, B2
402 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
403 1.1 mrg ;; Sign-extend A
404 1.1 mrg clr A2
405 1.1 mrg sbrc A1, 7
406 1.1 mrg com A2
407 1.1 mrg mov A3, A2
408 1.1 mrg XJMP __mulsi3
409 1.1 mrg #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
410 1.1 mrg ;; Zero-extend A and __mulsi3 will run at least twice as fast
411 1.1 mrg ;; compared to a sign-extended A.
412 1.1 mrg clr A2
413 1.1 mrg clr A3
414 1.1 mrg sbrs A1, 7
415 1.1 mrg XJMP __mulsi3
416 1.1 mrg ;; If A < 0 then perform the B * 0xffff.... before the
417 1.1 mrg ;; very multiplication by initializing the high part of the
418 1.1 mrg ;; result CC with -B.
419 1.1 mrg wmov CC2, A2
420 1.1 mrg sub CC2, B0
421 1.1 mrg sbc CC3, B1
422 1.1 mrg XJMP __mulsi3_helper
423 1.1 mrg #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
424 1.1 mrg ENDF __mulhisi3
425 1.1 mrg #endif /* L_mulhisi3 */
426 1.1 mrg
427 1.1 mrg
428 1.1 mrg /*******************************************************
429 1.1 mrg Multiplication 32 x 32 without MUL
430 1.1 mrg *******************************************************/
431 1.1 mrg
432 1.1.1.2 mrg #if defined (L_mulsi3)
433 1.1.1.2 mrg DEFUN __mulsi3
434 1.1.1.2 mrg #if defined (__AVR_TINY__)
435 1.1.1.2 mrg in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
436 1.1.1.2 mrg in r27, __SP_H__
437 1.1.1.2 mrg subi r26, lo8(-3) ; Add 3 to point past return address
438 1.1.1.2 mrg sbci r27, hi8(-3)
439 1.1.1.2 mrg push B0 ; save callee saved regs
440 1.1.1.2 mrg push B1
441 1.1.1.2 mrg ld B0, X+ ; load from caller stack
442 1.1.1.2 mrg ld B1, X+
443 1.1.1.2 mrg ld B2, X+
444 1.1 mrg ld B3, X
445 1.1 mrg #endif
446 1.1 mrg ;; Clear result
447 1.1 mrg clr CC2
448 1.1 mrg clr CC3
449 1.1 mrg ;; FALLTHRU
450 1.1 mrg ENDF __mulsi3
451 1.1 mrg
452 1.1 mrg DEFUN __mulsi3_helper
453 1.1 mrg clr CC0
454 1.1 mrg clr CC1
455 1.1 mrg rjmp 3f
456 1.1 mrg
457 1.1 mrg 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
458 1.1 mrg ;; CC += B
459 1.1 mrg add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
460 1.1 mrg
461 1.1 mrg 2: ;; B <<= 1
462 1.1 mrg lsl B0 $ rol B1 $ rol B2 $ rol B3
463 1.1 mrg
464 1.1 mrg 3: ;; A >>= 1: Carry = n-th bit of A
465 1.1 mrg lsr A3 $ ror A2 $ ror A1 $ ror A0
466 1.1 mrg
467 1.1 mrg brcs 1b
468 1.1 mrg ;; Only continue if A != 0
469 1.1.1.2 mrg sbci A1, 0
470 1.1 mrg brne 2b
471 1.1 mrg wsubi A2, 0
472 1.1 mrg brne 2b
473 1.1 mrg
474 1.1 mrg ;; All bits of A are consumed: Copy result to return register C
475 1.1.1.2 mrg wmov C0, CC0
476 1.1.1.2 mrg wmov C2, CC2
477 1.1.1.2 mrg #if defined (__AVR_TINY__)
478 1.1.1.2 mrg pop B1 ; restore callee saved regs
479 1.1.1.2 mrg pop B0
480 1.1 mrg #endif /* defined (__AVR_TINY__) */
481 1.1 mrg
482 1.1 mrg ret
483 1.1 mrg ENDF __mulsi3_helper
484 1.1 mrg #endif /* L_mulsi3 */
485 1.1 mrg
486 1.1 mrg #undef A0
487 1.1 mrg #undef A1
488 1.1 mrg #undef A2
489 1.1 mrg #undef A3
490 1.1 mrg #undef B0
491 1.1 mrg #undef B1
492 1.1 mrg #undef B2
493 1.1 mrg #undef B3
494 1.1 mrg #undef C0
495 1.1 mrg #undef C1
496 1.1 mrg #undef C2
497 1.1 mrg #undef C3
498 1.1 mrg #undef CC0
499 1.1 mrg #undef CC1
500 1.1 mrg #undef CC2
501 1.1 mrg #undef CC3
502 1.1 mrg
503 1.1 mrg #endif /* !defined (__AVR_HAVE_MUL__) */
504 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 1.1 mrg
506 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
508 1.1 mrg #if defined (__AVR_HAVE_MUL__)
509 1.1 mrg #define A0 26
510 1.1 mrg #define B0 18
511 1.1 mrg #define C0 22
512 1.1 mrg
513 1.1 mrg #define A1 A0+1
514 1.1 mrg
515 1.1 mrg #define B1 B0+1
516 1.1 mrg #define B2 B0+2
517 1.1 mrg #define B3 B0+3
518 1.1 mrg
519 1.1 mrg #define C1 C0+1
520 1.1 mrg #define C2 C0+2
521 1.1 mrg #define C3 C0+3
522 1.1 mrg
523 1.1 mrg /*******************************************************
524 1.1 mrg Widening Multiplication 32 = 16 x 16 with MUL
525 1.1 mrg *******************************************************/
526 1.1 mrg
527 1.1 mrg #if defined (L_mulhisi3)
528 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
529 1.1 mrg ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
530 1.1 mrg ;;; Clobbers: __tmp_reg__
531 1.1 mrg DEFUN __mulhisi3
532 1.1 mrg XCALL __umulhisi3
533 1.1 mrg ;; Sign-extend B
534 1.1 mrg tst B1
535 1.1 mrg brpl 1f
536 1.1 mrg sub C2, A0
537 1.1 mrg sbc C3, A1
538 1.1 mrg 1: ;; Sign-extend A
539 1.1 mrg XJMP __usmulhisi3_tail
540 1.1 mrg ENDF __mulhisi3
541 1.1 mrg #endif /* L_mulhisi3 */
542 1.1 mrg
543 1.1 mrg #if defined (L_usmulhisi3)
544 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
545 1.1 mrg ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
546 1.1 mrg ;;; Clobbers: __tmp_reg__
547 1.1 mrg DEFUN __usmulhisi3
548 1.1 mrg XCALL __umulhisi3
549 1.1 mrg ;; FALLTHRU
550 1.1 mrg ENDF __usmulhisi3
551 1.1 mrg
552 1.1 mrg DEFUN __usmulhisi3_tail
553 1.1 mrg ;; Sign-extend A
554 1.1 mrg sbrs A1, 7
555 1.1 mrg ret
556 1.1 mrg sub C2, B0
557 1.1 mrg sbc C3, B1
558 1.1 mrg ret
559 1.1 mrg ENDF __usmulhisi3_tail
560 1.1 mrg #endif /* L_usmulhisi3 */
561 1.1 mrg
562 1.1 mrg #if defined (L_umulhisi3)
563 1.1 mrg ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
564 1.1 mrg ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
565 1.1 mrg ;;; Clobbers: __tmp_reg__
566 1.1 mrg DEFUN __umulhisi3
567 1.1 mrg mul A0, B0
568 1.1 mrg movw C0, r0
569 1.1 mrg mul A1, B1
570 1.1 mrg movw C2, r0
571 1.1 mrg mul A0, B1
572 1.1 mrg #ifdef __AVR_HAVE_JMP_CALL__
573 1.1 mrg ;; This function is used by many other routines, often multiple times.
574 1.1 mrg ;; Therefore, if the flash size is not too limited, avoid the RCALL
575 1.1 mrg ;; and inverst 6 Bytes to speed things up.
576 1.1 mrg add C1, r0
577 1.1 mrg adc C2, r1
578 1.1 mrg clr __zero_reg__
579 1.1 mrg adc C3, __zero_reg__
580 1.1 mrg #else
581 1.1 mrg rcall 1f
582 1.1 mrg #endif
583 1.1 mrg mul A1, B0
584 1.1 mrg 1: add C1, r0
585 1.1 mrg adc C2, r1
586 1.1 mrg clr __zero_reg__
587 1.1 mrg adc C3, __zero_reg__
588 1.1 mrg ret
589 1.1 mrg ENDF __umulhisi3
590 1.1 mrg #endif /* L_umulhisi3 */
591 1.1 mrg
592 1.1 mrg /*******************************************************
593 1.1 mrg Widening Multiplication 32 = 16 x 32 with MUL
594 1.1 mrg *******************************************************/
595 1.1 mrg
596 1.1 mrg #if defined (L_mulshisi3)
597 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * R21:R18
598 1.1 mrg ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
599 1.1 mrg ;;; Clobbers: __tmp_reg__
600 1.1 mrg DEFUN __mulshisi3
601 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
602 1.1 mrg ;; Some cores have problem skipping 2-word instruction
603 1.1 mrg tst A1
604 1.1 mrg brmi __mulohisi3
605 1.1 mrg #else
606 1.1 mrg sbrs A1, 7
607 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
608 1.1 mrg XJMP __muluhisi3
609 1.1 mrg ;; FALLTHRU
610 1.1 mrg ENDF __mulshisi3
611 1.1 mrg
612 1.1 mrg ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
613 1.1 mrg ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
614 1.1 mrg ;;; Clobbers: __tmp_reg__
615 1.1 mrg DEFUN __mulohisi3
616 1.1 mrg XCALL __muluhisi3
617 1.1 mrg ;; One-extend R27:R26 (A1:A0)
618 1.1 mrg sub C2, B0
619 1.1 mrg sbc C3, B1
620 1.1 mrg ret
621 1.1 mrg ENDF __mulohisi3
622 1.1 mrg #endif /* L_mulshisi3 */
623 1.1 mrg
624 1.1 mrg #if defined (L_muluhisi3)
625 1.1 mrg ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
626 1.1 mrg ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
627 1.1 mrg ;;; Clobbers: __tmp_reg__
628 1.1 mrg DEFUN __muluhisi3
629 1.1 mrg XCALL __umulhisi3
630 1.1 mrg mul A0, B3
631 1.1 mrg add C3, r0
632 1.1 mrg mul A1, B2
633 1.1 mrg add C3, r0
634 1.1 mrg mul A0, B2
635 1.1 mrg add C2, r0
636 1.1 mrg adc C3, r1
637 1.1 mrg clr __zero_reg__
638 1.1 mrg ret
639 1.1 mrg ENDF __muluhisi3
640 1.1 mrg #endif /* L_muluhisi3 */
641 1.1 mrg
642 1.1 mrg /*******************************************************
643 1.1 mrg Multiplication 32 x 32 with MUL
644 1.1 mrg *******************************************************/
645 1.1 mrg
646 1.1 mrg #if defined (L_mulsi3)
647 1.1 mrg ;;; R25:R22 = R25:R22 * R21:R18
648 1.1 mrg ;;; (C3:C0) = C3:C0 * B3:B0
649 1.1 mrg ;;; Clobbers: R26, R27, __tmp_reg__
650 1.1 mrg DEFUN __mulsi3
651 1.1 mrg movw A0, C0
652 1.1 mrg push C2
653 1.1 mrg push C3
654 1.1 mrg XCALL __muluhisi3
655 1.1 mrg pop A1
656 1.1 mrg pop A0
657 1.1 mrg ;; A1:A0 now contains the high word of A
658 1.1 mrg mul A0, B0
659 1.1 mrg add C2, r0
660 1.1 mrg adc C3, r1
661 1.1 mrg mul A0, B1
662 1.1 mrg add C3, r0
663 1.1 mrg mul A1, B0
664 1.1 mrg add C3, r0
665 1.1 mrg clr __zero_reg__
666 1.1 mrg ret
667 1.1 mrg ENDF __mulsi3
668 1.1 mrg #endif /* L_mulsi3 */
669 1.1 mrg
670 1.1 mrg #undef A0
671 1.1 mrg #undef A1
672 1.1 mrg
673 1.1 mrg #undef B0
674 1.1 mrg #undef B1
675 1.1 mrg #undef B2
676 1.1 mrg #undef B3
677 1.1 mrg
678 1.1 mrg #undef C0
679 1.1 mrg #undef C1
680 1.1 mrg #undef C2
681 1.1 mrg #undef C3
682 1.1 mrg
683 1.1 mrg #endif /* __AVR_HAVE_MUL__ */
684 1.1 mrg
685 1.1 mrg /*******************************************************
686 1.1 mrg Multiplication 24 x 24 with MUL
687 1.1 mrg *******************************************************/
688 1.1 mrg
689 1.1 mrg #if defined (L_mulpsi3)
690 1.1 mrg
691 1.1 mrg ;; A[0..2]: In: Multiplicand; Out: Product
692 1.1 mrg #define A0 22
693 1.1 mrg #define A1 A0+1
694 1.1 mrg #define A2 A0+2
695 1.1 mrg
696 1.1 mrg ;; B[0..2]: In: Multiplier
697 1.1 mrg #define B0 18
698 1.1 mrg #define B1 B0+1
699 1.1 mrg #define B2 B0+2
700 1.1 mrg
701 1.1 mrg #if defined (__AVR_HAVE_MUL__)
702 1.1 mrg
703 1.1 mrg ;; C[0..2]: Expand Result
704 1.1 mrg #define C0 22
705 1.1 mrg #define C1 C0+1
706 1.1 mrg #define C2 C0+2
707 1.1 mrg
708 1.1 mrg ;; R24:R22 *= R20:R18
709 1.1 mrg ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
710 1.1 mrg
711 1.1 mrg #define AA0 26
712 1.1 mrg #define AA2 21
713 1.1 mrg
714 1.1 mrg DEFUN __mulpsi3
715 1.1 mrg wmov AA0, A0
716 1.1 mrg mov AA2, A2
717 1.1 mrg XCALL __umulhisi3
718 1.1 mrg mul AA2, B0 $ add C2, r0
719 1.1 mrg mul AA0, B2 $ add C2, r0
720 1.1 mrg clr __zero_reg__
721 1.1 mrg ret
722 1.1 mrg ENDF __mulpsi3
723 1.1 mrg
724 1.1 mrg #undef AA2
725 1.1 mrg #undef AA0
726 1.1 mrg
727 1.1 mrg #undef C2
728 1.1 mrg #undef C1
729 1.1 mrg #undef C0
730 1.1.1.2 mrg
731 1.1.1.2 mrg #else /* !HAVE_MUL */
732 1.1.1.2 mrg ;; C[0..2]: Expand Result
733 1.1 mrg #if defined (__AVR_TINY__)
734 1.1.1.2 mrg #define C0 16
735 1.1 mrg #else
736 1.1 mrg #define C0 0
737 1.1 mrg #endif /* defined (__AVR_TINY__) */
738 1.1 mrg #define C1 C0+1
739 1.1 mrg #define C2 21
740 1.1 mrg
741 1.1 mrg ;; R24:R22 *= R20:R18
742 1.1.1.2 mrg ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
743 1.1.1.2 mrg
744 1.1.1.2 mrg DEFUN __mulpsi3
745 1.1.1.2 mrg #if defined (__AVR_TINY__)
746 1.1.1.2 mrg in r26,__SP_L__
747 1.1.1.2 mrg in r27,__SP_H__
748 1.1.1.2 mrg subi r26, lo8(-3) ; Add 3 to point past return address
749 1.1.1.2 mrg sbci r27, hi8(-3)
750 1.1.1.2 mrg push B0 ; save callee saved regs
751 1.1.1.2 mrg push B1
752 1.1.1.2 mrg ld B0,X+ ; load from caller stack
753 1.1 mrg ld B1,X+
754 1.1 mrg ld B2,X+
755 1.1 mrg #endif /* defined (__AVR_TINY__) */
756 1.1 mrg
757 1.1 mrg ;; C[] = 0
758 1.1 mrg clr __tmp_reg__
759 1.1 mrg clr C2
760 1.1 mrg
761 1.1 mrg 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
762 1.1 mrg LSR B2 $ ror B1 $ ror B0
763 1.1 mrg
764 1.1 mrg ;; If the N-th Bit of B[] was set...
765 1.1 mrg brcc 1f
766 1.1 mrg
767 1.1 mrg ;; ...then add A[] * 2^N to the Result C[]
768 1.1 mrg ADD C0,A0 $ adc C1,A1 $ adc C2,A2
769 1.1 mrg
770 1.1 mrg 1: ;; Multiply A[] by 2
771 1.1 mrg LSL A0 $ rol A1 $ rol A2
772 1.1 mrg
773 1.1 mrg ;; Loop until B[] is 0
774 1.1 mrg subi B0,0 $ sbci B1,0 $ sbci B2,0
775 1.1 mrg brne 0b
776 1.1 mrg
777 1.1 mrg ;; Copy C[] to the return Register A[]
778 1.1 mrg wmov A0, C0
779 1.1.1.2 mrg mov A2, C2
780 1.1.1.2 mrg
781 1.1.1.2 mrg clr __zero_reg__
782 1.1.1.2 mrg #if defined (__AVR_TINY__)
783 1.1 mrg pop B1
784 1.1 mrg pop B0
785 1.1 mrg #endif /* (__AVR_TINY__) */
786 1.1 mrg ret
787 1.1 mrg ENDF __mulpsi3
788 1.1 mrg
789 1.1 mrg #undef C2
790 1.1 mrg #undef C1
791 1.1 mrg #undef C0
792 1.1 mrg
793 1.1 mrg #endif /* HAVE_MUL */
794 1.1 mrg
795 1.1 mrg #undef B2
796 1.1 mrg #undef B1
797 1.1 mrg #undef B0
798 1.1 mrg
799 1.1 mrg #undef A2
800 1.1 mrg #undef A1
801 1.1 mrg #undef A0
802 1.1 mrg
803 1.1 mrg #endif /* L_mulpsi3 */
804 1.1 mrg
805 1.1 mrg #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
806 1.1 mrg
807 1.1 mrg ;; A[0..2]: In: Multiplicand
808 1.1 mrg #define A0 22
809 1.1 mrg #define A1 A0+1
810 1.1 mrg #define A2 A0+2
811 1.1 mrg
812 1.1 mrg ;; BB: In: Multiplier
813 1.1 mrg #define BB 25
814 1.1 mrg
815 1.1 mrg ;; C[0..2]: Result
816 1.1 mrg #define C0 18
817 1.1 mrg #define C1 C0+1
818 1.1 mrg #define C2 C0+2
819 1.1 mrg
820 1.1 mrg ;; C[] = A[] * sign_extend (BB)
821 1.1 mrg DEFUN __mulsqipsi3
822 1.1 mrg mul A0, BB
823 1.1 mrg movw C0, r0
824 1.1 mrg mul A2, BB
825 1.1 mrg mov C2, r0
826 1.1 mrg mul A1, BB
827 1.1 mrg add C1, r0
828 1.1 mrg adc C2, r1
829 1.1 mrg clr __zero_reg__
830 1.1 mrg sbrs BB, 7
831 1.1 mrg ret
832 1.1 mrg ;; One-extend BB
833 1.1 mrg sub C1, A0
834 1.1 mrg sbc C2, A1
835 1.1 mrg ret
836 1.1 mrg ENDF __mulsqipsi3
837 1.1 mrg
838 1.1 mrg #undef C2
839 1.1 mrg #undef C1
840 1.1 mrg #undef C0
841 1.1 mrg
842 1.1 mrg #undef BB
843 1.1 mrg
844 1.1 mrg #undef A2
845 1.1 mrg #undef A1
846 1.1 mrg #undef A0
847 1.1 mrg
848 1.1 mrg #endif /* L_mulsqipsi3 && HAVE_MUL */
849 1.1 mrg
850 1.1 mrg /*******************************************************
851 1.1 mrg Multiplication 64 x 64
852 1.1 mrg *******************************************************/
853 1.1 mrg
854 1.1 mrg ;; A[] = A[] * B[]
855 1.1 mrg
856 1.1 mrg ;; A[0..7]: In: Multiplicand
857 1.1 mrg ;; Out: Product
858 1.1 mrg #define A0 18
859 1.1 mrg #define A1 A0+1
860 1.1 mrg #define A2 A0+2
861 1.1 mrg #define A3 A0+3
862 1.1 mrg #define A4 A0+4
863 1.1 mrg #define A5 A0+5
864 1.1 mrg #define A6 A0+6
865 1.1 mrg #define A7 A0+7
866 1.1 mrg
867 1.1 mrg ;; B[0..7]: In: Multiplier
868 1.1 mrg #define B0 10
869 1.1 mrg #define B1 B0+1
870 1.1 mrg #define B2 B0+2
871 1.1 mrg #define B3 B0+3
872 1.1 mrg #define B4 B0+4
873 1.1 mrg #define B5 B0+5
874 1.1.1.2 mrg #define B6 B0+6
875 1.1 mrg #define B7 B0+7
876 1.1 mrg
877 1.1 mrg #ifndef __AVR_TINY__
878 1.1 mrg #if defined (__AVR_HAVE_MUL__)
879 1.1 mrg ;; Define C[] for convenience
880 1.1 mrg ;; Notice that parts of C[] overlap A[] respective B[]
881 1.1 mrg #define C0 16
882 1.1 mrg #define C1 C0+1
883 1.1 mrg #define C2 20
884 1.1 mrg #define C3 C2+1
885 1.1 mrg #define C4 28
886 1.1 mrg #define C5 C4+1
887 1.1 mrg #define C6 C4+2
888 1.1 mrg #define C7 C4+3
889 1.1 mrg
890 1.1 mrg #if defined (L_muldi3)
891 1.1 mrg
892 1.1 mrg ;; A[] *= B[]
893 1.1 mrg ;; R25:R18 *= R17:R10
894 1.1 mrg ;; Ordinary ABI-Function
895 1.1 mrg
896 1.1 mrg DEFUN __muldi3
897 1.1 mrg push r29
898 1.1 mrg push r28
899 1.1 mrg push r17
900 1.1 mrg push r16
901 1.1 mrg
902 1.1 mrg ;; Counting in Words, we have to perform a 4 * 4 Multiplication
903 1.1 mrg
904 1.1 mrg ;; 3 * 0 + 0 * 3
905 1.1 mrg mul A7,B0 $ $ mov C7,r0
906 1.1 mrg mul A0,B7 $ $ add C7,r0
907 1.1 mrg mul A6,B1 $ $ add C7,r0
908 1.1 mrg mul A6,B0 $ mov C6,r0 $ add C7,r1
909 1.1 mrg mul B6,A1 $ $ add C7,r0
910 1.1 mrg mul B6,A0 $ add C6,r0 $ adc C7,r1
911 1.1 mrg
912 1.1 mrg ;; 1 * 2
913 1.1 mrg mul A2,B4 $ add C6,r0 $ adc C7,r1
914 1.1 mrg mul A3,B4 $ $ add C7,r0
915 1.1 mrg mul A2,B5 $ $ add C7,r0
916 1.1 mrg
917 1.1 mrg push A5
918 1.1 mrg push A4
919 1.1 mrg push B1
920 1.1 mrg push B0
921 1.1 mrg push A3
922 1.1 mrg push A2
923 1.1 mrg
924 1.1 mrg ;; 0 * 0
925 1.1 mrg wmov 26, B0
926 1.1 mrg XCALL __umulhisi3
927 1.1 mrg wmov C0, 22
928 1.1 mrg wmov C2, 24
929 1.1 mrg
930 1.1 mrg ;; 0 * 2
931 1.1 mrg wmov 26, B4
932 1.1 mrg XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
933 1.1 mrg
934 1.1 mrg wmov 26, B2
935 1.1 mrg ;; 0 * 1
936 1.1 mrg XCALL __muldi3_6
937 1.1 mrg
938 1.1 mrg pop A0
939 1.1 mrg pop A1
940 1.1 mrg ;; 1 * 1
941 1.1 mrg wmov 26, B2
942 1.1 mrg XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
943 1.1 mrg
944 1.1 mrg pop r26
945 1.1 mrg pop r27
946 1.1 mrg ;; 1 * 0
947 1.1 mrg XCALL __muldi3_6
948 1.1 mrg
949 1.1 mrg pop A0
950 1.1 mrg pop A1
951 1.1 mrg ;; 2 * 0
952 1.1 mrg XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
953 1.1 mrg
954 1.1 mrg ;; 2 * 1
955 1.1 mrg wmov 26, B2
956 1.1 mrg XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
957 1.1 mrg
958 1.1 mrg ;; A[] = C[]
959 1.1 mrg wmov A0, C0
960 1.1 mrg ;; A2 = C2 already
961 1.1 mrg wmov A4, C4
962 1.1 mrg wmov A6, C6
963 1.1 mrg
964 1.1 mrg clr __zero_reg__
965 1.1 mrg pop r16
966 1.1 mrg pop r17
967 1.1 mrg pop r28
968 1.1 mrg pop r29
969 1.1 mrg ret
970 1.1 mrg ENDF __muldi3
971 1.1 mrg #endif /* L_muldi3 */
972 1.1 mrg
973 1.1 mrg #if defined (L_muldi3_6)
974 1.1 mrg ;; A helper for some 64-bit multiplications with MUL available
975 1.1 mrg DEFUN __muldi3_6
976 1.1 mrg __muldi3_6:
977 1.1 mrg XCALL __umulhisi3
978 1.1 mrg add C2, 22
979 1.1 mrg adc C3, 23
980 1.1 mrg adc C4, 24
981 1.1 mrg adc C5, 25
982 1.1 mrg brcc 0f
983 1.1 mrg adiw C6, 1
984 1.1 mrg 0: ret
985 1.1 mrg ENDF __muldi3_6
986 1.1 mrg #endif /* L_muldi3_6 */
987 1.1 mrg
988 1.1 mrg #undef C7
989 1.1 mrg #undef C6
990 1.1 mrg #undef C5
991 1.1 mrg #undef C4
992 1.1 mrg #undef C3
993 1.1 mrg #undef C2
994 1.1 mrg #undef C1
995 1.1 mrg #undef C0
996 1.1 mrg
997 1.1 mrg #else /* !HAVE_MUL */
998 1.1 mrg
999 1.1 mrg #if defined (L_muldi3)
1000 1.1 mrg
1001 1.1 mrg #define C0 26
1002 1.1 mrg #define C1 C0+1
1003 1.1 mrg #define C2 C0+2
1004 1.1 mrg #define C3 C0+3
1005 1.1 mrg #define C4 C0+4
1006 1.1 mrg #define C5 C0+5
1007 1.1 mrg #define C6 0
1008 1.1 mrg #define C7 C6+1
1009 1.1 mrg
1010 1.1 mrg #define Loop 9
1011 1.1 mrg
1012 1.1 mrg ;; A[] *= B[]
1013 1.1 mrg ;; R25:R18 *= R17:R10
1014 1.1 mrg ;; Ordinary ABI-Function
1015 1.1 mrg
1016 1.1 mrg DEFUN __muldi3
1017 1.1 mrg push r29
1018 1.1 mrg push r28
1019 1.1 mrg push Loop
1020 1.1 mrg
1021 1.1 mrg ldi C0, 64
1022 1.1 mrg mov Loop, C0
1023 1.1 mrg
1024 1.1 mrg ;; C[] = 0
1025 1.1 mrg clr __tmp_reg__
1026 1.1 mrg wmov C0, 0
1027 1.1 mrg wmov C2, 0
1028 1.1 mrg wmov C4, 0
1029 1.1 mrg
1030 1.1 mrg 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1031 1.1 mrg ;; where N = 64 - Loop.
1032 1.1 mrg ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1033 1.1 mrg ;; B[] will have its initial Value again.
1034 1.1 mrg LSR B7 $ ror B6 $ ror B5 $ ror B4
1035 1.1 mrg ror B3 $ ror B2 $ ror B1 $ ror B0
1036 1.1 mrg
1037 1.1 mrg ;; If the N-th Bit of B[] was set then...
1038 1.1 mrg brcc 1f
1039 1.1 mrg ;; ...finish Rotation...
1040 1.1 mrg ori B7, 1 << 7
1041 1.1 mrg
1042 1.1 mrg ;; ...and add A[] * 2^N to the Result C[]
1043 1.1 mrg ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1044 1.1 mrg adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1045 1.1 mrg
1046 1.1 mrg 1: ;; Multiply A[] by 2
1047 1.1 mrg LSL A0 $ rol A1 $ rol A2 $ rol A3
1048 1.1 mrg rol A4 $ rol A5 $ rol A6 $ rol A7
1049 1.1 mrg
1050 1.1 mrg dec Loop
1051 1.1 mrg brne 0b
1052 1.1 mrg
1053 1.1 mrg ;; We expanded the Result in C[]
1054 1.1 mrg ;; Copy Result to the Return Register A[]
1055 1.1 mrg wmov A0, C0
1056 1.1 mrg wmov A2, C2
1057 1.1 mrg wmov A4, C4
1058 1.1 mrg wmov A6, C6
1059 1.1 mrg
1060 1.1 mrg clr __zero_reg__
1061 1.1 mrg pop Loop
1062 1.1 mrg pop r28
1063 1.1 mrg pop r29
1064 1.1 mrg ret
1065 1.1 mrg ENDF __muldi3
1066 1.1 mrg
1067 1.1 mrg #undef Loop
1068 1.1 mrg
1069 1.1 mrg #undef C7
1070 1.1 mrg #undef C6
1071 1.1 mrg #undef C5
1072 1.1 mrg #undef C4
1073 1.1 mrg #undef C3
1074 1.1 mrg #undef C2
1075 1.1 mrg #undef C1
1076 1.1 mrg #undef C0
1077 1.1.1.2 mrg
1078 1.1 mrg #endif /* L_muldi3 */
1079 1.1 mrg #endif /* HAVE_MUL */
1080 1.1 mrg #endif /* if not __AVR_TINY__ */
1081 1.1 mrg
1082 1.1 mrg #undef B7
1083 1.1 mrg #undef B6
1084 1.1 mrg #undef B5
1085 1.1 mrg #undef B4
1086 1.1 mrg #undef B3
1087 1.1 mrg #undef B2
1088 1.1 mrg #undef B1
1089 1.1 mrg #undef B0
1090 1.1 mrg
1091 1.1 mrg #undef A7
1092 1.1 mrg #undef A6
1093 1.1 mrg #undef A5
1094 1.1 mrg #undef A4
1095 1.1 mrg #undef A3
1096 1.1 mrg #undef A2
1097 1.1 mrg #undef A1
1098 1.1 mrg #undef A0
1099 1.1 mrg
1100 1.1 mrg /*******************************************************
1101 1.1 mrg Widening Multiplication 64 = 32 x 32 with MUL
1102 1.1 mrg *******************************************************/
1103 1.1 mrg
1104 1.1 mrg #if defined (__AVR_HAVE_MUL__)
1105 1.1 mrg #define A0 r22
1106 1.1 mrg #define A1 r23
1107 1.1 mrg #define A2 r24
1108 1.1 mrg #define A3 r25
1109 1.1 mrg
1110 1.1 mrg #define B0 r18
1111 1.1 mrg #define B1 r19
1112 1.1 mrg #define B2 r20
1113 1.1 mrg #define B3 r21
1114 1.1 mrg
1115 1.1 mrg #define C0 18
1116 1.1 mrg #define C1 C0+1
1117 1.1 mrg #define C2 20
1118 1.1 mrg #define C3 C2+1
1119 1.1 mrg #define C4 28
1120 1.1 mrg #define C5 C4+1
1121 1.1 mrg #define C6 C4+2
1122 1.1 mrg #define C7 C4+3
1123 1.1 mrg
1124 1.1 mrg #if defined (L_umulsidi3)
1125 1.1 mrg
1126 1.1 mrg ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1127 1.1 mrg
1128 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1129 1.1 mrg ;;
1130 1.1 mrg ;; Ordinary ABI Function, but additionally sets
1131 1.1 mrg ;; X = R20[2] = B2[2]
1132 1.1 mrg ;; Z = R22[2] = A0[2]
1133 1.1 mrg DEFUN __umulsidi3
1134 1.1 mrg clt
1135 1.1 mrg ;; FALLTHRU
1136 1.1 mrg ENDF __umulsidi3
1137 1.1 mrg ;; T = sign (A)
1138 1.1 mrg DEFUN __umulsidi3_helper
1139 1.1 mrg push 29 $ push 28 ; Y
1140 1.1 mrg wmov 30, A2
1141 1.1 mrg ;; Counting in Words, we have to perform 4 Multiplications
1142 1.1 mrg ;; 0 * 0
1143 1.1 mrg wmov 26, A0
1144 1.1 mrg XCALL __umulhisi3
1145 1.1 mrg push 23 $ push 22 ; C0
1146 1.1 mrg wmov 28, B0
1147 1.1 mrg wmov 18, B2
1148 1.1 mrg wmov C2, 24
1149 1.1 mrg push 27 $ push 26 ; A0
1150 1.1 mrg push 19 $ push 18 ; B2
1151 1.1 mrg ;;
1152 1.1 mrg ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1153 1.1 mrg ;; B2 C2 -- -- -- B0 A2
1154 1.1 mrg ;; 1 * 1
1155 1.1 mrg wmov 26, 30 ; A2
1156 1.1 mrg XCALL __umulhisi3
1157 1.1 mrg ;; Sign-extend A. T holds the sign of A
1158 1.1 mrg brtc 0f
1159 1.1 mrg ;; Subtract B from the high part of the result
1160 1.1 mrg sub 22, 28
1161 1.1 mrg sbc 23, 29
1162 1.1 mrg sbc 24, 18
1163 1.1 mrg sbc 25, 19
1164 1.1 mrg 0: wmov 18, 28 ;; B0
1165 1.1 mrg wmov C4, 22
1166 1.1 mrg wmov C6, 24
1167 1.1 mrg ;;
1168 1.1 mrg ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1169 1.1 mrg ;; B0 C2 -- -- A2 C4 C6
1170 1.1 mrg ;;
1171 1.1 mrg ;; 1 * 0
1172 1.1 mrg XCALL __muldi3_6
1173 1.1 mrg ;; 0 * 1
1174 1.1 mrg pop 26 $ pop 27 ;; B2
1175 1.1 mrg pop 18 $ pop 19 ;; A0
1176 1.1 mrg XCALL __muldi3_6
1177 1.1 mrg
1178 1.1 mrg ;; Move result C into place and save A0 in Z
1179 1.1 mrg wmov 22, C4
1180 1.1 mrg wmov 24, C6
1181 1.1 mrg wmov 30, 18 ; A0
1182 1.1 mrg pop C0 $ pop C1
1183 1.1 mrg
1184 1.1 mrg ;; Epilogue
1185 1.1 mrg pop 28 $ pop 29 ;; Y
1186 1.1 mrg ret
1187 1.1 mrg ENDF __umulsidi3_helper
1188 1.1 mrg #endif /* L_umulsidi3 */
1189 1.1 mrg
1190 1.1 mrg
1191 1.1 mrg #if defined (L_mulsidi3)
1192 1.1 mrg
1193 1.1 mrg ;; Signed widening 64 = 32 * 32 Multiplication
1194 1.1 mrg ;;
1195 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1196 1.1 mrg ;; Ordinary ABI Function
1197 1.1 mrg DEFUN __mulsidi3
1198 1.1 mrg bst A3, 7
1199 1.1 mrg sbrs B3, 7 ; Enhanced core has no skip bug
1200 1.1 mrg XJMP __umulsidi3_helper
1201 1.1 mrg
1202 1.1 mrg ;; B needs sign-extension
1203 1.1 mrg push A3
1204 1.1 mrg push A2
1205 1.1 mrg XCALL __umulsidi3_helper
1206 1.1 mrg ;; A0 survived in Z
1207 1.1 mrg sub r22, r30
1208 1.1 mrg sbc r23, r31
1209 1.1 mrg pop r26
1210 1.1 mrg pop r27
1211 1.1 mrg sbc r24, r26
1212 1.1 mrg sbc r25, r27
1213 1.1 mrg ret
1214 1.1 mrg ENDF __mulsidi3
1215 1.1 mrg #endif /* L_mulsidi3 */
1216 1.1 mrg
1217 1.1 mrg #undef A0
1218 1.1 mrg #undef A1
1219 1.1 mrg #undef A2
1220 1.1 mrg #undef A3
1221 1.1 mrg #undef B0
1222 1.1 mrg #undef B1
1223 1.1 mrg #undef B2
1224 1.1 mrg #undef B3
1225 1.1 mrg #undef C0
1226 1.1 mrg #undef C1
1227 1.1 mrg #undef C2
1228 1.1 mrg #undef C3
1229 1.1 mrg #undef C4
1230 1.1 mrg #undef C5
1231 1.1 mrg #undef C6
1232 1.1 mrg #undef C7
1233 1.1 mrg #endif /* HAVE_MUL */
1234 1.1 mrg
1235 1.1.1.2 mrg /**********************************************************
1236 1.1 mrg Widening Multiplication 64 = 32 x 32 without MUL
1237 1.1 mrg **********************************************************/
1238 1.1 mrg #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1239 1.1 mrg #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1240 1.1 mrg #define A0 18
1241 1.1 mrg #define A1 A0+1
1242 1.1 mrg #define A2 A0+2
1243 1.1 mrg #define A3 A0+3
1244 1.1 mrg #define A4 A0+4
1245 1.1 mrg #define A5 A0+5
1246 1.1 mrg #define A6 A0+6
1247 1.1 mrg #define A7 A0+7
1248 1.1 mrg
1249 1.1 mrg #define B0 10
1250 1.1 mrg #define B1 B0+1
1251 1.1 mrg #define B2 B0+2
1252 1.1 mrg #define B3 B0+3
1253 1.1 mrg #define B4 B0+4
1254 1.1 mrg #define B5 B0+5
1255 1.1 mrg #define B6 B0+6
1256 1.1 mrg #define B7 B0+7
1257 1.1 mrg
1258 1.1 mrg #define AA0 22
1259 1.1 mrg #define AA1 AA0+1
1260 1.1 mrg #define AA2 AA0+2
1261 1.1 mrg #define AA3 AA0+3
1262 1.1 mrg
1263 1.1 mrg #define BB0 18
1264 1.1 mrg #define BB1 BB0+1
1265 1.1 mrg #define BB2 BB0+2
1266 1.1 mrg #define BB3 BB0+3
1267 1.1 mrg
1268 1.1 mrg #define Mask r30
1269 1.1 mrg
1270 1.1 mrg ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1271 1.1 mrg ;;
1272 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1273 1.1 mrg ;; Ordinary ABI Function
1274 1.1 mrg DEFUN __mulsidi3
1275 1.1 mrg set
1276 1.1 mrg skip
1277 1.1 mrg ;; FALLTHRU
1278 1.1 mrg ENDF __mulsidi3
1279 1.1 mrg
1280 1.1 mrg DEFUN __umulsidi3
1281 1.1 mrg clt ; skipped
1282 1.1 mrg ;; Save 10 Registers: R10..R17, R28, R29
1283 1.1 mrg do_prologue_saves 10
1284 1.1 mrg ldi Mask, 0xff
1285 1.1 mrg bld Mask, 7
1286 1.1 mrg ;; Move B into place...
1287 1.1 mrg wmov B0, BB0
1288 1.1 mrg wmov B2, BB2
1289 1.1 mrg ;; ...and extend it
1290 1.1 mrg and BB3, Mask
1291 1.1 mrg lsl BB3
1292 1.1 mrg sbc B4, B4
1293 1.1 mrg mov B5, B4
1294 1.1 mrg wmov B6, B4
1295 1.1 mrg ;; Move A into place...
1296 1.1 mrg wmov A0, AA0
1297 1.1 mrg wmov A2, AA2
1298 1.1 mrg ;; ...and extend it
1299 1.1 mrg and AA3, Mask
1300 1.1 mrg lsl AA3
1301 1.1 mrg sbc A4, A4
1302 1.1 mrg mov A5, A4
1303 1.1 mrg wmov A6, A4
1304 1.1 mrg XCALL __muldi3
1305 1.1 mrg do_epilogue_restores 10
1306 1.1 mrg ENDF __umulsidi3
1307 1.1 mrg
1308 1.1 mrg #undef A0
1309 1.1 mrg #undef A1
1310 1.1 mrg #undef A2
1311 1.1 mrg #undef A3
1312 1.1 mrg #undef A4
1313 1.1 mrg #undef A5
1314 1.1 mrg #undef A6
1315 1.1 mrg #undef A7
1316 1.1 mrg #undef B0
1317 1.1 mrg #undef B1
1318 1.1 mrg #undef B2
1319 1.1 mrg #undef B3
1320 1.1 mrg #undef B4
1321 1.1 mrg #undef B5
1322 1.1 mrg #undef B6
1323 1.1 mrg #undef B7
1324 1.1 mrg #undef AA0
1325 1.1 mrg #undef AA1
1326 1.1 mrg #undef AA2
1327 1.1 mrg #undef AA3
1328 1.1 mrg #undef BB0
1329 1.1 mrg #undef BB1
1330 1.1 mrg #undef BB2
1331 1.1.1.2 mrg #undef BB3
1332 1.1 mrg #undef Mask
1333 1.1 mrg #endif /* L_mulsidi3 && !HAVE_MUL */
1334 1.1 mrg #endif /* if not __AVR_TINY__ */
1335 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1336 1.1 mrg
1337 1.1 mrg
1338 1.1 mrg .section .text.libgcc.div, "ax", @progbits
1340 1.1 mrg
1341 1.1 mrg /*******************************************************
1342 1.1 mrg Division 8 / 8 => (result + remainder)
1343 1.1 mrg *******************************************************/
1344 1.1 mrg #define r_rem r25 /* remainder */
1345 1.1 mrg #define r_arg1 r24 /* dividend, quotient */
1346 1.1 mrg #define r_arg2 r22 /* divisor */
1347 1.1 mrg #define r_cnt r23 /* loop count */
1348 1.1 mrg
1349 1.1 mrg #if defined (L_udivmodqi4)
1350 1.1 mrg DEFUN __udivmodqi4
1351 1.1 mrg sub r_rem,r_rem ; clear remainder and carry
1352 1.1 mrg ldi r_cnt,9 ; init loop counter
1353 1.1 mrg rjmp __udivmodqi4_ep ; jump to entry point
1354 1.1 mrg __udivmodqi4_loop:
1355 1.1 mrg rol r_rem ; shift dividend into remainder
1356 1.1 mrg cp r_rem,r_arg2 ; compare remainder & divisor
1357 1.1 mrg brcs __udivmodqi4_ep ; remainder <= divisor
1358 1.1 mrg sub r_rem,r_arg2 ; restore remainder
1359 1.1 mrg __udivmodqi4_ep:
1360 1.1 mrg rol r_arg1 ; shift dividend (with CARRY)
1361 1.1 mrg dec r_cnt ; decrement loop counter
1362 1.1 mrg brne __udivmodqi4_loop
1363 1.1 mrg com r_arg1 ; complement result
1364 1.1 mrg ; because C flag was complemented in loop
1365 1.1 mrg ret
1366 1.1 mrg ENDF __udivmodqi4
1367 1.1 mrg #endif /* defined (L_udivmodqi4) */
1368 1.1 mrg
1369 1.1 mrg #if defined (L_divmodqi4)
1370 1.1 mrg DEFUN __divmodqi4
1371 1.1 mrg bst r_arg1,7 ; store sign of dividend
1372 1.1 mrg mov __tmp_reg__,r_arg1
1373 1.1 mrg eor __tmp_reg__,r_arg2; r0.7 is sign of result
1374 1.1 mrg sbrc r_arg1,7
1375 1.1 mrg neg r_arg1 ; dividend negative : negate
1376 1.1 mrg sbrc r_arg2,7
1377 1.1 mrg neg r_arg2 ; divisor negative : negate
1378 1.1 mrg XCALL __udivmodqi4 ; do the unsigned div/mod
1379 1.1 mrg brtc __divmodqi4_1
1380 1.1 mrg neg r_rem ; correct remainder sign
1381 1.1 mrg __divmodqi4_1:
1382 1.1 mrg sbrc __tmp_reg__,7
1383 1.1 mrg neg r_arg1 ; correct result sign
1384 1.1 mrg __divmodqi4_exit:
1385 1.1 mrg ret
1386 1.1 mrg ENDF __divmodqi4
1387 1.1 mrg #endif /* defined (L_divmodqi4) */
1388 1.1 mrg
1389 1.1 mrg #undef r_rem
1390 1.1 mrg #undef r_arg1
1391 1.1 mrg #undef r_arg2
1392 1.1 mrg #undef r_cnt
1393 1.1 mrg
1394 1.1 mrg
1395 1.1 mrg /*******************************************************
1396 1.1 mrg Division 16 / 16 => (result + remainder)
1397 1.1 mrg *******************************************************/
1398 1.1 mrg #define r_remL r26 /* remainder Low */
1399 1.1 mrg #define r_remH r27 /* remainder High */
1400 1.1 mrg
1401 1.1 mrg /* return: remainder */
1402 1.1 mrg #define r_arg1L r24 /* dividend Low */
1403 1.1 mrg #define r_arg1H r25 /* dividend High */
1404 1.1 mrg
1405 1.1 mrg /* return: quotient */
1406 1.1 mrg #define r_arg2L r22 /* divisor Low */
1407 1.1 mrg #define r_arg2H r23 /* divisor High */
1408 1.1 mrg
1409 1.1 mrg #define r_cnt r21 /* loop count */
1410 1.1 mrg
1411 1.1 mrg #if defined (L_udivmodhi4)
1412 1.1 mrg DEFUN __udivmodhi4
1413 1.1 mrg sub r_remL,r_remL
1414 1.1 mrg sub r_remH,r_remH ; clear remainder and carry
1415 1.1 mrg ldi r_cnt,17 ; init loop counter
1416 1.1 mrg rjmp __udivmodhi4_ep ; jump to entry point
1417 1.1 mrg __udivmodhi4_loop:
1418 1.1 mrg rol r_remL ; shift dividend into remainder
1419 1.1 mrg rol r_remH
1420 1.1 mrg cp r_remL,r_arg2L ; compare remainder & divisor
1421 1.1 mrg cpc r_remH,r_arg2H
1422 1.1 mrg brcs __udivmodhi4_ep ; remainder < divisor
1423 1.1 mrg sub r_remL,r_arg2L ; restore remainder
1424 1.1 mrg sbc r_remH,r_arg2H
1425 1.1 mrg __udivmodhi4_ep:
1426 1.1 mrg rol r_arg1L ; shift dividend (with CARRY)
1427 1.1 mrg rol r_arg1H
1428 1.1 mrg dec r_cnt ; decrement loop counter
1429 1.1 mrg brne __udivmodhi4_loop
1430 1.1 mrg com r_arg1L
1431 1.1 mrg com r_arg1H
1432 1.1 mrg ; div/mod results to return registers, as for the div() function
1433 1.1 mrg mov_l r_arg2L, r_arg1L ; quotient
1434 1.1 mrg mov_h r_arg2H, r_arg1H
1435 1.1 mrg mov_l r_arg1L, r_remL ; remainder
1436 1.1 mrg mov_h r_arg1H, r_remH
1437 1.1 mrg ret
1438 1.1 mrg ENDF __udivmodhi4
1439 1.1 mrg #endif /* defined (L_udivmodhi4) */
1440 1.1 mrg
1441 1.1 mrg #if defined (L_divmodhi4)
1442 1.1 mrg DEFUN __divmodhi4
1443 1.1 mrg .global _div
1444 1.1 mrg _div:
1445 1.1 mrg bst r_arg1H,7 ; store sign of dividend
1446 1.1 mrg mov __tmp_reg__,r_arg2H
1447 1.1 mrg brtc 0f
1448 1.1 mrg com __tmp_reg__ ; r0.7 is sign of result
1449 1.1 mrg rcall __divmodhi4_neg1 ; dividend negative: negate
1450 1.1 mrg 0:
1451 1.1 mrg sbrc r_arg2H,7
1452 1.1 mrg rcall __divmodhi4_neg2 ; divisor negative: negate
1453 1.1 mrg XCALL __udivmodhi4 ; do the unsigned div/mod
1454 1.1 mrg sbrc __tmp_reg__,7
1455 1.1 mrg rcall __divmodhi4_neg2 ; correct remainder sign
1456 1.1 mrg brtc __divmodhi4_exit
1457 1.1 mrg __divmodhi4_neg1:
1458 1.1 mrg ;; correct dividend/remainder sign
1459 1.1 mrg com r_arg1H
1460 1.1 mrg neg r_arg1L
1461 1.1 mrg sbci r_arg1H,0xff
1462 1.1 mrg ret
1463 1.1 mrg __divmodhi4_neg2:
1464 1.1 mrg ;; correct divisor/result sign
1465 1.1 mrg com r_arg2H
1466 1.1 mrg neg r_arg2L
1467 1.1 mrg sbci r_arg2H,0xff
1468 1.1 mrg __divmodhi4_exit:
1469 1.1 mrg ret
1470 1.1 mrg ENDF __divmodhi4
1471 1.1 mrg #endif /* defined (L_divmodhi4) */
1472 1.1 mrg
1473 1.1 mrg #undef r_remH
1474 1.1 mrg #undef r_remL
1475 1.1 mrg
1476 1.1 mrg #undef r_arg1H
1477 1.1 mrg #undef r_arg1L
1478 1.1 mrg
1479 1.1 mrg #undef r_arg2H
1480 1.1 mrg #undef r_arg2L
1481 1.1 mrg
1482 1.1 mrg #undef r_cnt
1483 1.1 mrg
1484 1.1 mrg /*******************************************************
1485 1.1 mrg Division 24 / 24 => (result + remainder)
1486 1.1 mrg *******************************************************/
1487 1.1 mrg
1488 1.1 mrg ;; A[0..2]: In: Dividend; Out: Quotient
1489 1.1 mrg #define A0 22
1490 1.1 mrg #define A1 A0+1
1491 1.1 mrg #define A2 A0+2
1492 1.1 mrg
1493 1.1 mrg ;; B[0..2]: In: Divisor; Out: Remainder
1494 1.1 mrg #define B0 18
1495 1.1 mrg #define B1 B0+1
1496 1.1 mrg #define B2 B0+2
1497 1.1 mrg
1498 1.1 mrg ;; C[0..2]: Expand remainder
1499 1.1 mrg #define C0 __zero_reg__
1500 1.1 mrg #define C1 26
1501 1.1 mrg #define C2 25
1502 1.1 mrg
1503 1.1.1.2 mrg ;; Loop counter
1504 1.1 mrg #define r_cnt 21
1505 1.1 mrg
1506 1.1 mrg #if defined (L_udivmodpsi4)
1507 1.1 mrg ;; R24:R22 = R24:R24 udiv R20:R18
1508 1.1 mrg ;; R20:R18 = R24:R22 umod R20:R18
1509 1.1 mrg ;; Clobbers: R21, R25, R26
1510 1.1 mrg
1511 1.1 mrg DEFUN __udivmodpsi4
1512 1.1 mrg ; init loop counter
1513 1.1 mrg ldi r_cnt, 24+1
1514 1.1 mrg ; Clear remainder and carry. C0 is already 0
1515 1.1 mrg clr C1
1516 1.1 mrg sub C2, C2
1517 1.1 mrg ; jump to entry point
1518 1.1 mrg rjmp __udivmodpsi4_start
1519 1.1 mrg __udivmodpsi4_loop:
1520 1.1 mrg ; shift dividend into remainder
1521 1.1 mrg rol C0
1522 1.1 mrg rol C1
1523 1.1 mrg rol C2
1524 1.1 mrg ; compare remainder & divisor
1525 1.1 mrg cp C0, B0
1526 1.1 mrg cpc C1, B1
1527 1.1 mrg cpc C2, B2
1528 1.1 mrg brcs __udivmodpsi4_start ; remainder <= divisor
1529 1.1 mrg sub C0, B0 ; restore remainder
1530 1.1 mrg sbc C1, B1
1531 1.1 mrg sbc C2, B2
1532 1.1 mrg __udivmodpsi4_start:
1533 1.1 mrg ; shift dividend (with CARRY)
1534 1.1 mrg rol A0
1535 1.1 mrg rol A1
1536 1.1 mrg rol A2
1537 1.1 mrg ; decrement loop counter
1538 1.1 mrg dec r_cnt
1539 1.1 mrg brne __udivmodpsi4_loop
1540 1.1 mrg com A0
1541 1.1 mrg com A1
1542 1.1 mrg com A2
1543 1.1 mrg ; div/mod results to return registers
1544 1.1 mrg ; remainder
1545 1.1 mrg mov B0, C0
1546 1.1 mrg mov B1, C1
1547 1.1 mrg mov B2, C2
1548 1.1 mrg clr __zero_reg__ ; C0
1549 1.1 mrg ret
1550 1.1 mrg ENDF __udivmodpsi4
1551 1.1 mrg #endif /* defined (L_udivmodpsi4) */
1552 1.1 mrg
1553 1.1 mrg #if defined (L_divmodpsi4)
1554 1.1 mrg ;; R24:R22 = R24:R22 div R20:R18
1555 1.1 mrg ;; R20:R18 = R24:R22 mod R20:R18
1556 1.1 mrg ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1557 1.1 mrg
1558 1.1 mrg DEFUN __divmodpsi4
1559 1.1 mrg ; R0.7 will contain the sign of the result:
1560 1.1 mrg ; R0.7 = A.sign ^ B.sign
1561 1.1 mrg mov __tmp_reg__, B2
1562 1.1 mrg ; T-flag = sign of dividend
1563 1.1 mrg bst A2, 7
1564 1.1 mrg brtc 0f
1565 1.1 mrg com __tmp_reg__
1566 1.1 mrg ; Adjust dividend's sign
1567 1.1 mrg rcall __divmodpsi4_negA
1568 1.1 mrg 0:
1569 1.1 mrg ; Adjust divisor's sign
1570 1.1 mrg sbrc B2, 7
1571 1.1 mrg rcall __divmodpsi4_negB
1572 1.1 mrg
1573 1.1 mrg ; Do the unsigned div/mod
1574 1.1 mrg XCALL __udivmodpsi4
1575 1.1 mrg
1576 1.1 mrg ; Adjust quotient's sign
1577 1.1 mrg sbrc __tmp_reg__, 7
1578 1.1 mrg rcall __divmodpsi4_negA
1579 1.1 mrg
1580 1.1 mrg ; Adjust remainder's sign
1581 1.1 mrg brtc __divmodpsi4_end
1582 1.1 mrg
1583 1.1 mrg __divmodpsi4_negB:
1584 1.1 mrg ; Correct divisor/remainder sign
1585 1.1 mrg com B2
1586 1.1 mrg com B1
1587 1.1 mrg neg B0
1588 1.1 mrg sbci B1, -1
1589 1.1 mrg sbci B2, -1
1590 1.1 mrg ret
1591 1.1 mrg
1592 1.1 mrg ; Correct dividend/quotient sign
1593 1.1 mrg __divmodpsi4_negA:
1594 1.1 mrg com A2
1595 1.1 mrg com A1
1596 1.1 mrg neg A0
1597 1.1 mrg sbci A1, -1
1598 1.1 mrg sbci A2, -1
1599 1.1 mrg __divmodpsi4_end:
1600 1.1 mrg ret
1601 1.1 mrg
1602 1.1 mrg ENDF __divmodpsi4
1603 1.1 mrg #endif /* defined (L_divmodpsi4) */
1604 1.1 mrg
1605 1.1 mrg #undef A0
1606 1.1 mrg #undef A1
1607 1.1 mrg #undef A2
1608 1.1 mrg
1609 1.1 mrg #undef B0
1610 1.1 mrg #undef B1
1611 1.1 mrg #undef B2
1612 1.1 mrg
1613 1.1 mrg #undef C0
1614 1.1 mrg #undef C1
1615 1.1 mrg #undef C2
1616 1.1 mrg
1617 1.1 mrg #undef r_cnt
1618 1.1 mrg
1619 1.1 mrg /*******************************************************
1620 1.1 mrg Division 32 / 32 => (result + remainder)
1621 1.1 mrg *******************************************************/
1622 1.1 mrg #define r_remHH r31 /* remainder High */
1623 1.1 mrg #define r_remHL r30
1624 1.1 mrg #define r_remH r27
1625 1.1 mrg #define r_remL r26 /* remainder Low */
1626 1.1 mrg
1627 1.1 mrg /* return: remainder */
1628 1.1 mrg #define r_arg1HH r25 /* dividend High */
1629 1.1 mrg #define r_arg1HL r24
1630 1.1 mrg #define r_arg1H r23
1631 1.1 mrg #define r_arg1L r22 /* dividend Low */
1632 1.1 mrg
1633 1.1 mrg /* return: quotient */
1634 1.1 mrg #define r_arg2HH r21 /* divisor High */
1635 1.1 mrg #define r_arg2HL r20
1636 1.1 mrg #define r_arg2H r19
1637 1.1 mrg #define r_arg2L r18 /* divisor Low */
1638 1.1 mrg
1639 1.1 mrg #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1640 1.1 mrg
1641 1.1 mrg #if defined (L_udivmodsi4)
1642 1.1 mrg DEFUN __udivmodsi4
1643 1.1 mrg ldi r_remL, 33 ; init loop counter
1644 1.1 mrg mov r_cnt, r_remL
1645 1.1 mrg sub r_remL,r_remL
1646 1.1 mrg sub r_remH,r_remH ; clear remainder and carry
1647 1.1 mrg mov_l r_remHL, r_remL
1648 1.1 mrg mov_h r_remHH, r_remH
1649 1.1 mrg rjmp __udivmodsi4_ep ; jump to entry point
1650 1.1 mrg __udivmodsi4_loop:
1651 1.1 mrg rol r_remL ; shift dividend into remainder
1652 1.1 mrg rol r_remH
1653 1.1 mrg rol r_remHL
1654 1.1 mrg rol r_remHH
1655 1.1 mrg cp r_remL,r_arg2L ; compare remainder & divisor
1656 1.1 mrg cpc r_remH,r_arg2H
1657 1.1 mrg cpc r_remHL,r_arg2HL
1658 1.1 mrg cpc r_remHH,r_arg2HH
1659 1.1 mrg brcs __udivmodsi4_ep ; remainder <= divisor
1660 1.1 mrg sub r_remL,r_arg2L ; restore remainder
1661 1.1 mrg sbc r_remH,r_arg2H
1662 1.1 mrg sbc r_remHL,r_arg2HL
1663 1.1 mrg sbc r_remHH,r_arg2HH
1664 1.1 mrg __udivmodsi4_ep:
1665 1.1 mrg rol r_arg1L ; shift dividend (with CARRY)
1666 1.1 mrg rol r_arg1H
1667 1.1 mrg rol r_arg1HL
1668 1.1 mrg rol r_arg1HH
1669 1.1 mrg dec r_cnt ; decrement loop counter
1670 1.1 mrg brne __udivmodsi4_loop
1671 1.1 mrg ; __zero_reg__ now restored (r_cnt == 0)
1672 1.1 mrg com r_arg1L
1673 1.1 mrg com r_arg1H
1674 1.1 mrg com r_arg1HL
1675 1.1 mrg com r_arg1HH
1676 1.1 mrg ; div/mod results to return registers, as for the ldiv() function
1677 1.1 mrg mov_l r_arg2L, r_arg1L ; quotient
1678 1.1 mrg mov_h r_arg2H, r_arg1H
1679 1.1 mrg mov_l r_arg2HL, r_arg1HL
1680 1.1 mrg mov_h r_arg2HH, r_arg1HH
1681 1.1 mrg mov_l r_arg1L, r_remL ; remainder
1682 1.1 mrg mov_h r_arg1H, r_remH
1683 1.1 mrg mov_l r_arg1HL, r_remHL
1684 1.1 mrg mov_h r_arg1HH, r_remHH
1685 1.1 mrg ret
1686 1.1 mrg ENDF __udivmodsi4
1687 1.1 mrg #endif /* defined (L_udivmodsi4) */
1688 1.1 mrg
1689 1.1 mrg #if defined (L_divmodsi4)
1690 1.1 mrg DEFUN __divmodsi4
1691 1.1 mrg mov __tmp_reg__,r_arg2HH
1692 1.1 mrg bst r_arg1HH,7 ; store sign of dividend
1693 1.1 mrg brtc 0f
1694 1.1 mrg com __tmp_reg__ ; r0.7 is sign of result
1695 1.1 mrg XCALL __negsi2 ; dividend negative: negate
1696 1.1 mrg 0:
1697 1.1 mrg sbrc r_arg2HH,7
1698 1.1 mrg rcall __divmodsi4_neg2 ; divisor negative: negate
1699 1.1 mrg XCALL __udivmodsi4 ; do the unsigned div/mod
1700 1.1 mrg sbrc __tmp_reg__, 7 ; correct quotient sign
1701 1.1 mrg rcall __divmodsi4_neg2
1702 1.1 mrg brtc __divmodsi4_exit ; correct remainder sign
1703 1.1 mrg XJMP __negsi2
1704 1.1 mrg __divmodsi4_neg2:
1705 1.1 mrg ;; correct divisor/quotient sign
1706 1.1 mrg com r_arg2HH
1707 1.1 mrg com r_arg2HL
1708 1.1 mrg com r_arg2H
1709 1.1 mrg neg r_arg2L
1710 1.1 mrg sbci r_arg2H,0xff
1711 1.1 mrg sbci r_arg2HL,0xff
1712 1.1 mrg sbci r_arg2HH,0xff
1713 1.1 mrg __divmodsi4_exit:
1714 1.1 mrg ret
1715 1.1 mrg ENDF __divmodsi4
1716 1.1 mrg #endif /* defined (L_divmodsi4) */
1717 1.1 mrg
1718 1.1 mrg #if defined (L_negsi2)
1719 1.1 mrg ;; (set (reg:SI 22)
1720 1.1 mrg ;; (neg:SI (reg:SI 22)))
1721 1.1 mrg ;; Sets the V flag for signed overflow tests
1722 1.1 mrg DEFUN __negsi2
1723 1.1 mrg NEG4 22
1724 1.1 mrg ret
1725 1.1 mrg ENDF __negsi2
1726 1.1 mrg #endif /* L_negsi2 */
1727 1.1 mrg
1728 1.1 mrg #undef r_remHH
1729 1.1 mrg #undef r_remHL
1730 1.1 mrg #undef r_remH
1731 1.1 mrg #undef r_remL
1732 1.1 mrg #undef r_arg1HH
1733 1.1 mrg #undef r_arg1HL
1734 1.1 mrg #undef r_arg1H
1735 1.1 mrg #undef r_arg1L
1736 1.1 mrg #undef r_arg2HH
1737 1.1 mrg #undef r_arg2HL
1738 1.1.1.2 mrg #undef r_arg2H
1739 1.1.1.2 mrg #undef r_arg2L
1740 1.1.1.2 mrg #undef r_cnt
1741 1.1.1.2 mrg
1742 1.1 mrg /* *di routines use registers below R19 and won't work with tiny arch
1743 1.1 mrg right now. */
1744 1.1 mrg
1745 1.1 mrg #if !defined (__AVR_TINY__)
1746 1.1 mrg /*******************************************************
1747 1.1 mrg Division 64 / 64
1748 1.1 mrg Modulo 64 % 64
1749 1.1 mrg *******************************************************/
1750 1.1 mrg
1751 1.1 mrg ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1752 1.1 mrg ;; at least 16k of Program Memory. For smaller Devices, depend
1753 1.1 mrg ;; on MOVW and SP Size. There is a Connexion between SP Size and
1754 1.1 mrg ;; Flash Size so that SP Size can be used to test for Flash Size.
1755 1.1 mrg
1756 1.1 mrg #if defined (__AVR_HAVE_JMP_CALL__)
1757 1.1 mrg # define SPEED_DIV 8
1758 1.1 mrg #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1759 1.1 mrg # define SPEED_DIV 16
1760 1.1 mrg #else
1761 1.1 mrg # define SPEED_DIV 0
1762 1.1 mrg #endif
1763 1.1 mrg
1764 1.1 mrg ;; A[0..7]: In: Dividend;
1765 1.1 mrg ;; Out: Quotient (T = 0)
1766 1.1 mrg ;; Out: Remainder (T = 1)
1767 1.1 mrg #define A0 18
1768 1.1 mrg #define A1 A0+1
1769 1.1 mrg #define A2 A0+2
1770 1.1 mrg #define A3 A0+3
1771 1.1 mrg #define A4 A0+4
1772 1.1 mrg #define A5 A0+5
1773 1.1 mrg #define A6 A0+6
1774 1.1 mrg #define A7 A0+7
1775 1.1 mrg
1776 1.1 mrg ;; B[0..7]: In: Divisor; Out: Clobber
1777 1.1 mrg #define B0 10
1778 1.1 mrg #define B1 B0+1
1779 1.1 mrg #define B2 B0+2
1780 1.1 mrg #define B3 B0+3
1781 1.1 mrg #define B4 B0+4
1782 1.1 mrg #define B5 B0+5
1783 1.1 mrg #define B6 B0+6
1784 1.1 mrg #define B7 B0+7
1785 1.1 mrg
1786 1.1 mrg ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1787 1.1 mrg #define C0 8
1788 1.1 mrg #define C1 C0+1
1789 1.1 mrg #define C2 30
1790 1.1 mrg #define C3 C2+1
1791 1.1 mrg #define C4 28
1792 1.1 mrg #define C5 C4+1
1793 1.1 mrg #define C6 26
1794 1.1 mrg #define C7 C6+1
1795 1.1 mrg
1796 1.1 mrg ;; Holds Signs during Division Routine
1797 1.1 mrg #define SS __tmp_reg__
1798 1.1 mrg
1799 1.1 mrg ;; Bit-Counter in Division Routine
1800 1.1 mrg #define R_cnt __zero_reg__
1801 1.1 mrg
1802 1.1 mrg ;; Scratch Register for Negation
1803 1.1 mrg #define NN r31
1804 1.1 mrg
1805 1.1 mrg #if defined (L_udivdi3)
1806 1.1 mrg
1807 1.1 mrg ;; R25:R18 = R24:R18 umod R17:R10
1808 1.1 mrg ;; Ordinary ABI-Function
1809 1.1 mrg
1810 1.1 mrg DEFUN __umoddi3
1811 1.1 mrg set
1812 1.1 mrg rjmp __udivdi3_umoddi3
1813 1.1 mrg ENDF __umoddi3
1814 1.1 mrg
1815 1.1 mrg ;; R25:R18 = R24:R18 udiv R17:R10
1816 1.1 mrg ;; Ordinary ABI-Function
1817 1.1 mrg
1818 1.1 mrg DEFUN __udivdi3
1819 1.1 mrg clt
1820 1.1 mrg ENDF __udivdi3
1821 1.1 mrg
1822 1.1 mrg DEFUN __udivdi3_umoddi3
1823 1.1 mrg push C0
1824 1.1 mrg push C1
1825 1.1 mrg push C4
1826 1.1 mrg push C5
1827 1.1 mrg XCALL __udivmod64
1828 1.1 mrg pop C5
1829 1.1 mrg pop C4
1830 1.1 mrg pop C1
1831 1.1 mrg pop C0
1832 1.1 mrg ret
1833 1.1 mrg ENDF __udivdi3_umoddi3
1834 1.1 mrg #endif /* L_udivdi3 */
1835 1.1 mrg
1836 1.1 mrg #if defined (L_udivmod64)
1837 1.1 mrg
1838 1.1 mrg ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1839 1.1 mrg ;; No Registers saved/restored; the Callers will take Care.
1840 1.1 mrg ;; Preserves B[] and T-flag
1841 1.1 mrg ;; T = 0: Compute Quotient in A[]
1842 1.1 mrg ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1843 1.1 mrg
1844 1.1 mrg DEFUN __udivmod64
1845 1.1 mrg
1846 1.1 mrg ;; Clear Remainder (C6, C7 will follow)
1847 1.1 mrg clr C0
1848 1.1 mrg clr C1
1849 1.1 mrg wmov C2, C0
1850 1.1 mrg wmov C4, C0
1851 1.1 mrg ldi C7, 64
1852 1.1 mrg
1853 1.1 mrg #if SPEED_DIV == 0 || SPEED_DIV == 16
1854 1.1 mrg ;; Initialize Loop-Counter
1855 1.1 mrg mov R_cnt, C7
1856 1.1 mrg wmov C6, C0
1857 1.1 mrg #endif /* SPEED_DIV */
1858 1.1 mrg
1859 1.1 mrg #if SPEED_DIV == 8
1860 1.1 mrg
1861 1.1 mrg push A7
1862 1.1 mrg clr C6
1863 1.1 mrg
1864 1.1 mrg 1: ;; Compare shifted Devidend against Divisor
1865 1.1 mrg ;; If -- even after Shifting -- it is smaller...
1866 1.1 mrg CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1867 1.1 mrg cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1868 1.1 mrg brcc 2f
1869 1.1 mrg
1870 1.1 mrg ;; ...then we can subtract it. Thus, it is legal to shift left
1871 1.1 mrg $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1872 1.1 mrg mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1873 1.1 mrg mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1874 1.1 mrg mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1875 1.1 mrg
1876 1.1 mrg ;; 8 Bits are done
1877 1.1 mrg subi C7, 8
1878 1.1 mrg brne 1b
1879 1.1 mrg
1880 1.1 mrg ;; Shifted 64 Bits: A7 has traveled to C7
1881 1.1 mrg pop C7
1882 1.1 mrg ;; Divisor is greater than Dividend. We have:
1883 1.1 mrg ;; A[] % B[] = A[]
1884 1.1 mrg ;; A[] / B[] = 0
1885 1.1 mrg ;; Thus, we can return immediately
1886 1.1 mrg rjmp 5f
1887 1.1 mrg
1888 1.1 mrg 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1889 1.1 mrg mov R_cnt, C7
1890 1.1 mrg
1891 1.1 mrg ;; Push of A7 is not needed because C7 is still 0
1892 1.1 mrg pop C7
1893 1.1 mrg clr C7
1894 1.1 mrg
1895 1.1 mrg #elif SPEED_DIV == 16
1896 1.1 mrg
1897 1.1 mrg ;; Compare shifted Dividend against Divisor
1898 1.1 mrg cp A7, B3
1899 1.1 mrg cpc C0, B4
1900 1.1 mrg cpc C1, B5
1901 1.1 mrg cpc C2, B6
1902 1.1 mrg cpc C3, B7
1903 1.1 mrg brcc 2f
1904 1.1 mrg
1905 1.1 mrg ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1906 1.1 mrg ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1907 1.1 mrg wmov C2,A6 $ wmov C0,A4
1908 1.1 mrg wmov A6,A2 $ wmov A4,A0
1909 1.1 mrg wmov A2,C6 $ wmov A0,C4
1910 1.1 mrg
1911 1.1 mrg ;; Set Bit Counter to 32
1912 1.1 mrg lsr R_cnt
1913 1.1 mrg 2:
1914 1.1 mrg #elif SPEED_DIV
1915 1.1 mrg #error SPEED_DIV = ?
1916 1.1 mrg #endif /* SPEED_DIV */
1917 1.1 mrg
1918 1.1 mrg ;; The very Division + Remainder Routine
1919 1.1 mrg
1920 1.1 mrg 3: ;; Left-shift Dividend...
1921 1.1 mrg lsl A0 $ rol A1 $ rol A2 $ rol A3
1922 1.1 mrg rol A4 $ rol A5 $ rol A6 $ rol A7
1923 1.1 mrg
1924 1.1 mrg ;; ...into Remainder
1925 1.1 mrg rol C0 $ rol C1 $ rol C2 $ rol C3
1926 1.1 mrg rol C4 $ rol C5 $ rol C6 $ rol C7
1927 1.1 mrg
1928 1.1 mrg ;; Compare Remainder and Divisor
1929 1.1 mrg CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1930 1.1 mrg cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1931 1.1 mrg
1932 1.1 mrg brcs 4f
1933 1.1 mrg
1934 1.1 mrg ;; Divisor fits into Remainder: Subtract it from Remainder...
1935 1.1 mrg SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1936 1.1 mrg sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1937 1.1 mrg
1938 1.1 mrg ;; ...and set according Bit in the upcoming Quotient
1939 1.1 mrg ;; The Bit will travel to its final Position
1940 1.1 mrg ori A0, 1
1941 1.1 mrg
1942 1.1 mrg 4: ;; This Bit is done
1943 1.1 mrg dec R_cnt
1944 1.1 mrg brne 3b
1945 1.1 mrg ;; __zero_reg__ is 0 again
1946 1.1 mrg
1947 1.1 mrg ;; T = 0: We are fine with the Quotient in A[]
1948 1.1 mrg ;; T = 1: Copy Remainder to A[]
1949 1.1 mrg 5: brtc 6f
1950 1.1 mrg wmov A0, C0
1951 1.1 mrg wmov A2, C2
1952 1.1 mrg wmov A4, C4
1953 1.1 mrg wmov A6, C6
1954 1.1 mrg ;; Move the Sign of the Result to SS.7
1955 1.1 mrg lsl SS
1956 1.1 mrg
1957 1.1 mrg 6: ret
1958 1.1 mrg
1959 1.1 mrg ENDF __udivmod64
1960 1.1 mrg #endif /* L_udivmod64 */
1961 1.1 mrg
1962 1.1 mrg
1963 1.1 mrg #if defined (L_divdi3)
1964 1.1 mrg
1965 1.1 mrg ;; R25:R18 = R24:R18 mod R17:R10
1966 1.1 mrg ;; Ordinary ABI-Function
1967 1.1 mrg
1968 1.1 mrg DEFUN __moddi3
1969 1.1 mrg set
1970 1.1 mrg rjmp __divdi3_moddi3
1971 1.1 mrg ENDF __moddi3
1972 1.1 mrg
1973 1.1 mrg ;; R25:R18 = R24:R18 div R17:R10
1974 1.1 mrg ;; Ordinary ABI-Function
1975 1.1 mrg
1976 1.1 mrg DEFUN __divdi3
1977 1.1 mrg clt
1978 1.1 mrg ENDF __divdi3
1979 1.1 mrg
1980 1.1 mrg DEFUN __divdi3_moddi3
1981 1.1 mrg #if SPEED_DIV
1982 1.1 mrg mov r31, A7
1983 1.1 mrg or r31, B7
1984 1.1 mrg brmi 0f
1985 1.1 mrg ;; Both Signs are 0: the following Complexitiy is not needed
1986 1.1 mrg XJMP __udivdi3_umoddi3
1987 1.1 mrg #endif /* SPEED_DIV */
1988 1.1 mrg
1989 1.1 mrg 0: ;; The Prologue
1990 1.1 mrg ;; Save 12 Registers: Y, 17...8
1991 1.1 mrg ;; No Frame needed
1992 1.1 mrg do_prologue_saves 12
1993 1.1 mrg
1994 1.1 mrg ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1995 1.1 mrg ;; SS.6 will contain the Sign of the Remainder (A.sign)
1996 1.1 mrg mov SS, A7
1997 1.1 mrg asr SS
1998 1.1 mrg ;; Adjust Dividend's Sign as needed
1999 1.1 mrg #if SPEED_DIV
2000 1.1 mrg ;; Compiling for Speed we know that at least one Sign must be < 0
2001 1.1 mrg ;; Thus, if A[] >= 0 then we know B[] < 0
2002 1.1 mrg brpl 22f
2003 1.1 mrg #else
2004 1.1 mrg brpl 21f
2005 1.1 mrg #endif /* SPEED_DIV */
2006 1.1 mrg
2007 1.1 mrg XCALL __negdi2
2008 1.1 mrg
2009 1.1 mrg ;; Adjust Divisor's Sign and SS.7 as needed
2010 1.1 mrg 21: tst B7
2011 1.1 mrg brpl 3f
2012 1.1 mrg 22: ldi NN, 1 << 7
2013 1.1 mrg eor SS, NN
2014 1.1 mrg
2015 1.1 mrg ldi NN, -1
2016 1.1 mrg com B4 $ com B5 $ com B6 $ com B7
2017 1.1 mrg $ com B1 $ com B2 $ com B3
2018 1.1 mrg NEG B0
2019 1.1 mrg $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2020 1.1 mrg sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2021 1.1 mrg
2022 1.1 mrg 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2023 1.1 mrg XCALL __udivmod64
2024 1.1 mrg
2025 1.1 mrg ;; Adjust Result's Sign
2026 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2027 1.1 mrg tst SS
2028 1.1 mrg brpl 4f
2029 1.1 mrg #else
2030 1.1 mrg sbrc SS, 7
2031 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
2032 1.1 mrg XCALL __negdi2
2033 1.1 mrg
2034 1.1 mrg 4: ;; Epilogue: Restore 12 Registers and return
2035 1.1 mrg do_epilogue_restores 12
2036 1.1 mrg
2037 1.1 mrg ENDF __divdi3_moddi3
2038 1.1 mrg
2039 1.1 mrg #endif /* L_divdi3 */
2040 1.1 mrg
2041 1.1 mrg #undef R_cnt
2042 1.1 mrg #undef SS
2043 1.1 mrg #undef NN
2044 1.1 mrg
2045 1.1 mrg .section .text.libgcc, "ax", @progbits
2046 1.1 mrg
2047 1.1 mrg #define TT __tmp_reg__
2048 1.1 mrg
2049 1.1 mrg #if defined (L_adddi3)
2050 1.1 mrg ;; (set (reg:DI 18)
2051 1.1 mrg ;; (plus:DI (reg:DI 18)
2052 1.1 mrg ;; (reg:DI 10)))
2053 1.1 mrg ;; Sets the V flag for signed overflow tests
2054 1.1 mrg ;; Sets the C flag for unsigned overflow tests
2055 1.1 mrg DEFUN __adddi3
2056 1.1 mrg ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2057 1.1 mrg adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2058 1.1 mrg ret
2059 1.1 mrg ENDF __adddi3
2060 1.1 mrg #endif /* L_adddi3 */
2061 1.1 mrg
2062 1.1 mrg #if defined (L_adddi3_s8)
2063 1.1 mrg ;; (set (reg:DI 18)
2064 1.1 mrg ;; (plus:DI (reg:DI 18)
2065 1.1 mrg ;; (sign_extend:SI (reg:QI 26))))
2066 1.1 mrg ;; Sets the V flag for signed overflow tests
2067 1.1 mrg ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2068 1.1 mrg DEFUN __adddi3_s8
2069 1.1 mrg clr TT
2070 1.1 mrg sbrc r26, 7
2071 1.1 mrg com TT
2072 1.1 mrg ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2073 1.1 mrg adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2074 1.1 mrg ret
2075 1.1 mrg ENDF __adddi3_s8
2076 1.1 mrg #endif /* L_adddi3_s8 */
2077 1.1 mrg
2078 1.1 mrg #if defined (L_subdi3)
2079 1.1 mrg ;; (set (reg:DI 18)
2080 1.1 mrg ;; (minus:DI (reg:DI 18)
2081 1.1 mrg ;; (reg:DI 10)))
2082 1.1 mrg ;; Sets the V flag for signed overflow tests
2083 1.1 mrg ;; Sets the C flag for unsigned overflow tests
2084 1.1 mrg DEFUN __subdi3
2085 1.1 mrg SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2086 1.1 mrg sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2087 1.1 mrg ret
2088 1.1 mrg ENDF __subdi3
2089 1.1 mrg #endif /* L_subdi3 */
2090 1.1 mrg
2091 1.1 mrg #if defined (L_cmpdi2)
2092 1.1 mrg ;; (set (cc0)
2093 1.1 mrg ;; (compare (reg:DI 18)
2094 1.1 mrg ;; (reg:DI 10)))
2095 1.1 mrg DEFUN __cmpdi2
2096 1.1 mrg CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2097 1.1 mrg cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2098 1.1 mrg ret
2099 1.1 mrg ENDF __cmpdi2
2100 1.1 mrg #endif /* L_cmpdi2 */
2101 1.1 mrg
2102 1.1 mrg #if defined (L_cmpdi2_s8)
2103 1.1 mrg ;; (set (cc0)
2104 1.1 mrg ;; (compare (reg:DI 18)
2105 1.1 mrg ;; (sign_extend:SI (reg:QI 26))))
2106 1.1 mrg DEFUN __cmpdi2_s8
2107 1.1 mrg clr TT
2108 1.1 mrg sbrc r26, 7
2109 1.1 mrg com TT
2110 1.1 mrg CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2111 1.1 mrg cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2112 1.1 mrg ret
2113 1.1 mrg ENDF __cmpdi2_s8
2114 1.1 mrg #endif /* L_cmpdi2_s8 */
2115 1.1 mrg
2116 1.1 mrg #if defined (L_negdi2)
2117 1.1 mrg ;; (set (reg:DI 18)
2118 1.1 mrg ;; (neg:DI (reg:DI 18)))
2119 1.1 mrg ;; Sets the V flag for signed overflow tests
2120 1.1 mrg DEFUN __negdi2
2121 1.1 mrg
2122 1.1 mrg com A4 $ com A5 $ com A6 $ com A7
2123 1.1 mrg $ com A1 $ com A2 $ com A3
2124 1.1 mrg NEG A0
2125 1.1 mrg $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2126 1.1 mrg sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2127 1.1 mrg ret
2128 1.1 mrg
2129 1.1 mrg ENDF __negdi2
2130 1.1 mrg #endif /* L_negdi2 */
2131 1.1 mrg
2132 1.1 mrg #undef TT
2133 1.1 mrg
2134 1.1 mrg #undef C7
2135 1.1 mrg #undef C6
2136 1.1 mrg #undef C5
2137 1.1 mrg #undef C4
2138 1.1 mrg #undef C3
2139 1.1 mrg #undef C2
2140 1.1 mrg #undef C1
2141 1.1 mrg #undef C0
2142 1.1 mrg
2143 1.1 mrg #undef B7
2144 1.1 mrg #undef B6
2145 1.1 mrg #undef B5
2146 1.1 mrg #undef B4
2147 1.1 mrg #undef B3
2148 1.1 mrg #undef B2
2149 1.1 mrg #undef B1
2150 1.1 mrg #undef B0
2151 1.1 mrg
2152 1.1 mrg #undef A7
2153 1.1 mrg #undef A6
2154 1.1 mrg #undef A5
2155 1.1 mrg #undef A4
2156 1.1 mrg #undef A3
2157 1.1.1.2 mrg #undef A2
2158 1.1.1.2 mrg #undef A1
2159 1.1 mrg #undef A0
2160 1.1 mrg
2161 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2162 1.1 mrg
2163 1.1 mrg
2164 1.1 mrg .section .text.libgcc.prologue, "ax", @progbits
2166 1.1 mrg
2167 1.1 mrg /**********************************
2168 1.1 mrg * This is a prologue subroutine
2169 1.1 mrg **********************************/
2170 1.1 mrg #if !defined (__AVR_TINY__)
2171 1.1 mrg #if defined (L_prologue)
2172 1.1 mrg
2173 1.1 mrg ;; This function does not clobber T-flag; 64-bit division relies on it
2174 1.1 mrg DEFUN __prologue_saves__
2175 1.1 mrg push r2
2176 1.1 mrg push r3
2177 1.1 mrg push r4
2178 1.1 mrg push r5
2179 1.1 mrg push r6
2180 1.1 mrg push r7
2181 1.1 mrg push r8
2182 1.1 mrg push r9
2183 1.1 mrg push r10
2184 1.1 mrg push r11
2185 1.1 mrg push r12
2186 1.1 mrg push r13
2187 1.1 mrg push r14
2188 1.1 mrg push r15
2189 1.1 mrg push r16
2190 1.1 mrg push r17
2191 1.1 mrg push r28
2192 1.1 mrg push r29
2193 1.1 mrg #if !defined (__AVR_HAVE_SPH__)
2194 1.1 mrg in r28,__SP_L__
2195 1.1 mrg sub r28,r26
2196 1.1 mrg out __SP_L__,r28
2197 1.1 mrg clr r29
2198 1.1 mrg #elif defined (__AVR_XMEGA__)
2199 1.1 mrg in r28,__SP_L__
2200 1.1 mrg in r29,__SP_H__
2201 1.1 mrg sub r28,r26
2202 1.1 mrg sbc r29,r27
2203 1.1 mrg out __SP_L__,r28
2204 1.1 mrg out __SP_H__,r29
2205 1.1 mrg #else
2206 1.1 mrg in r28,__SP_L__
2207 1.1 mrg in r29,__SP_H__
2208 1.1 mrg sub r28,r26
2209 1.1 mrg sbc r29,r27
2210 1.1 mrg in __tmp_reg__,__SREG__
2211 1.1 mrg cli
2212 1.1.1.2 mrg out __SP_H__,r29
2213 1.1 mrg out __SREG__,__tmp_reg__
2214 1.1 mrg out __SP_L__,r28
2215 1.1 mrg #endif /* #SP = 8/16 */
2216 1.1 mrg
2217 1.1 mrg XIJMP
2218 1.1 mrg
2219 1.1 mrg ENDF __prologue_saves__
2220 1.1 mrg #endif /* defined (L_prologue) */
2221 1.1 mrg
2222 1.1 mrg /*
2223 1.1 mrg * This is an epilogue subroutine
2224 1.1 mrg */
2225 1.1 mrg #if defined (L_epilogue)
2226 1.1 mrg
2227 1.1 mrg DEFUN __epilogue_restores__
2228 1.1 mrg ldd r2,Y+18
2229 1.1 mrg ldd r3,Y+17
2230 1.1 mrg ldd r4,Y+16
2231 1.1 mrg ldd r5,Y+15
2232 1.1 mrg ldd r6,Y+14
2233 1.1 mrg ldd r7,Y+13
2234 1.1 mrg ldd r8,Y+12
2235 1.1 mrg ldd r9,Y+11
2236 1.1 mrg ldd r10,Y+10
2237 1.1 mrg ldd r11,Y+9
2238 1.1 mrg ldd r12,Y+8
2239 1.1 mrg ldd r13,Y+7
2240 1.1 mrg ldd r14,Y+6
2241 1.1 mrg ldd r15,Y+5
2242 1.1 mrg ldd r16,Y+4
2243 1.1 mrg ldd r17,Y+3
2244 1.1 mrg ldd r26,Y+2
2245 1.1 mrg #if !defined (__AVR_HAVE_SPH__)
2246 1.1 mrg ldd r29,Y+1
2247 1.1 mrg add r28,r30
2248 1.1 mrg out __SP_L__,r28
2249 1.1 mrg mov r28, r26
2250 1.1 mrg #elif defined (__AVR_XMEGA__)
2251 1.1 mrg ldd r27,Y+1
2252 1.1 mrg add r28,r30
2253 1.1 mrg adc r29,__zero_reg__
2254 1.1 mrg out __SP_L__,r28
2255 1.1 mrg out __SP_H__,r29
2256 1.1 mrg wmov 28, 26
2257 1.1 mrg #else
2258 1.1 mrg ldd r27,Y+1
2259 1.1 mrg add r28,r30
2260 1.1 mrg adc r29,__zero_reg__
2261 1.1 mrg in __tmp_reg__,__SREG__
2262 1.1 mrg cli
2263 1.1 mrg out __SP_H__,r29
2264 1.1 mrg out __SREG__,__tmp_reg__
2265 1.1 mrg out __SP_L__,r28
2266 1.1 mrg mov_l r28, r26
2267 1.1.1.2 mrg mov_h r29, r27
2268 1.1 mrg #endif /* #SP = 8/16 */
2269 1.1 mrg ret
2270 1.1 mrg ENDF __epilogue_restores__
2271 1.1 mrg #endif /* defined (L_epilogue) */
2272 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2273 1.1 mrg
2274 1.1 mrg #ifdef L_exit
2275 1.1 mrg .section .fini9,"ax",@progbits
2276 1.1 mrg DEFUN _exit
2277 1.1 mrg .weak exit
2278 1.1 mrg exit:
2279 1.1 mrg ENDF _exit
2280 1.1 mrg
2281 1.1 mrg /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2282 1.1 mrg
2283 1.1 mrg .section .fini0,"ax",@progbits
2284 1.1 mrg cli
2285 1.1 mrg __stop_program:
2286 1.1 mrg rjmp __stop_program
2287 1.1 mrg #endif /* defined (L_exit) */
2288 1.1 mrg
2289 1.1 mrg #ifdef L_cleanup
2290 1.1 mrg .weak _cleanup
2291 1.1 mrg .func _cleanup
2292 1.1 mrg _cleanup:
2293 1.1 mrg ret
2294 1.1 mrg .endfunc
2295 1.1.1.2 mrg #endif /* defined (L_cleanup) */
2296 1.1 mrg
2297 1.1.1.2 mrg
2298 1.1.1.2 mrg .section .text.libgcc, "ax", @progbits
2300 1.1.1.2 mrg
2301 1.1.1.2 mrg #ifdef L_tablejump2
2302 1.1.1.2 mrg DEFUN __tablejump2__
2303 1.1.1.2 mrg lsl r30
2304 1.1.1.2 mrg rol r31
2305 1.1.1.2 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
2306 1.1.1.2 mrg ;; Word address of gs() jumptable entry in R24:Z
2307 1.1.1.2 mrg rol r24
2308 1.1 mrg out __RAMPZ__, r24
2309 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2310 1.1.1.2 mrg ;; Word address of jumptable entry in Z
2311 1.1.1.2 mrg clr __tmp_reg__
2312 1.1.1.2 mrg rol __tmp_reg__
2313 1.1.1.2 mrg out __RAMPZ__, __tmp_reg__
2314 1.1.1.2 mrg #endif
2315 1.1.1.2 mrg
2316 1.1.1.2 mrg ;; Read word address from jumptable and jump
2317 1.1.1.2 mrg
2318 1.1.1.2 mrg #if defined (__AVR_HAVE_ELPMX__)
2319 1.1.1.2 mrg elpm __tmp_reg__, Z+
2320 1.1.1.2 mrg elpm r31, Z
2321 1.1.1.2 mrg mov r30, __tmp_reg__
2322 1.1.1.2 mrg #ifdef __AVR_HAVE_RAMPD__
2323 1.1.1.2 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2324 1.1.1.2 mrg out __RAMPZ__, __zero_reg__
2325 1.1.1.2 mrg #endif /* RAMPD */
2326 1.1.1.2 mrg XIJMP
2327 1.1.1.2 mrg #elif defined (__AVR_HAVE_ELPM__)
2328 1.1.1.2 mrg elpm
2329 1.1.1.2 mrg push r0
2330 1.1.1.2 mrg adiw r30, 1
2331 1.1.1.2 mrg elpm
2332 1.1.1.2 mrg push r0
2333 1.1.1.2 mrg ret
2334 1.1.1.2 mrg #elif defined (__AVR_HAVE_LPMX__)
2335 1.1.1.2 mrg lpm __tmp_reg__, Z+
2336 1.1.1.2 mrg lpm r31, Z
2337 1.1.1.2 mrg mov r30, __tmp_reg__
2338 1.1.1.2 mrg ijmp
2339 1.1.1.2 mrg #elif defined (__AVR_TINY__)
2340 1.1.1.2 mrg wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2341 1.1.1.2 mrg ld __tmp_reg__, Z+
2342 1.1.1.2 mrg ld r31, Z ; Use ld instead of lpm to load Z
2343 1.1.1.2 mrg mov r30, __tmp_reg__
2344 1.1.1.2 mrg ijmp
2345 1.1.1.2 mrg #else
2346 1.1 mrg lpm
2347 1.1.1.2 mrg push r0
2348 1.1.1.2 mrg adiw r30, 1
2349 1.1 mrg lpm
2350 1.1.1.2 mrg push r0
2351 1.1.1.2 mrg ret
2352 1.1.1.2 mrg #endif
2353 1.1.1.2 mrg ENDF __tablejump2__
2354 1.1.1.2 mrg #endif /* L_tablejump2 */
2355 1.1.1.2 mrg
2356 1.1.1.2 mrg #if defined(__AVR_TINY__)
2357 1.1.1.2 mrg #ifdef L_copy_data
2358 1.1.1.2 mrg .section .init4,"ax",@progbits
2359 1.1.1.2 mrg .global __do_copy_data
2360 1.1.1.2 mrg __do_copy_data:
2361 1.1.1.2 mrg ldi r18, hi8(__data_end)
2362 1.1.1.2 mrg ldi r26, lo8(__data_start)
2363 1.1.1.2 mrg ldi r27, hi8(__data_start)
2364 1.1.1.2 mrg ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2365 1.1.1.2 mrg ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2366 1.1.1.2 mrg rjmp .L__do_copy_data_start
2367 1.1.1.2 mrg .L__do_copy_data_loop:
2368 1.1.1.2 mrg ld r19, z+
2369 1.1.1.2 mrg st X+, r19
2370 1.1 mrg .L__do_copy_data_start:
2371 1.1 mrg cpi r26, lo8(__data_end)
2372 1.1 mrg cpc r27, r18
2373 1.1 mrg brne .L__do_copy_data_loop
2374 1.1 mrg #endif
2375 1.1 mrg #else
2376 1.1 mrg #ifdef L_copy_data
2377 1.1 mrg .section .init4,"ax",@progbits
2378 1.1 mrg DEFUN __do_copy_data
2379 1.1 mrg #if defined(__AVR_HAVE_ELPMX__)
2380 1.1 mrg ldi r17, hi8(__data_end)
2381 1.1 mrg ldi r26, lo8(__data_start)
2382 1.1 mrg ldi r27, hi8(__data_start)
2383 1.1 mrg ldi r30, lo8(__data_load_start)
2384 1.1 mrg ldi r31, hi8(__data_load_start)
2385 1.1 mrg ldi r16, hh8(__data_load_start)
2386 1.1 mrg out __RAMPZ__, r16
2387 1.1 mrg rjmp .L__do_copy_data_start
2388 1.1 mrg .L__do_copy_data_loop:
2389 1.1 mrg elpm r0, Z+
2390 1.1 mrg st X+, r0
2391 1.1 mrg .L__do_copy_data_start:
2392 1.1 mrg cpi r26, lo8(__data_end)
2393 1.1 mrg cpc r27, r17
2394 1.1 mrg brne .L__do_copy_data_loop
2395 1.1 mrg #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2396 1.1 mrg ldi r17, hi8(__data_end)
2397 1.1 mrg ldi r26, lo8(__data_start)
2398 1.1 mrg ldi r27, hi8(__data_start)
2399 1.1 mrg ldi r30, lo8(__data_load_start)
2400 1.1 mrg ldi r31, hi8(__data_load_start)
2401 1.1 mrg ldi r16, hh8(__data_load_start - 0x10000)
2402 1.1 mrg .L__do_copy_data_carry:
2403 1.1 mrg inc r16
2404 1.1 mrg out __RAMPZ__, r16
2405 1.1 mrg rjmp .L__do_copy_data_start
2406 1.1 mrg .L__do_copy_data_loop:
2407 1.1 mrg elpm
2408 1.1 mrg st X+, r0
2409 1.1 mrg adiw r30, 1
2410 1.1 mrg brcs .L__do_copy_data_carry
2411 1.1 mrg .L__do_copy_data_start:
2412 1.1 mrg cpi r26, lo8(__data_end)
2413 1.1 mrg cpc r27, r17
2414 1.1 mrg brne .L__do_copy_data_loop
2415 1.1 mrg #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2416 1.1 mrg ldi r17, hi8(__data_end)
2417 1.1 mrg ldi r26, lo8(__data_start)
2418 1.1 mrg ldi r27, hi8(__data_start)
2419 1.1 mrg ldi r30, lo8(__data_load_start)
2420 1.1 mrg ldi r31, hi8(__data_load_start)
2421 1.1 mrg rjmp .L__do_copy_data_start
2422 1.1 mrg .L__do_copy_data_loop:
2423 1.1 mrg #if defined (__AVR_HAVE_LPMX__)
2424 1.1 mrg lpm r0, Z+
2425 1.1 mrg #else
2426 1.1 mrg lpm
2427 1.1 mrg adiw r30, 1
2428 1.1 mrg #endif
2429 1.1 mrg st X+, r0
2430 1.1 mrg .L__do_copy_data_start:
2431 1.1 mrg cpi r26, lo8(__data_end)
2432 1.1 mrg cpc r27, r17
2433 1.1 mrg brne .L__do_copy_data_loop
2434 1.1 mrg #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2435 1.1.1.2 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2436 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2437 1.1 mrg out __RAMPZ__, __zero_reg__
2438 1.1 mrg #endif /* ELPM && RAMPD */
2439 1.1 mrg ENDF __do_copy_data
2440 1.1 mrg #endif /* L_copy_data */
2441 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2442 1.1.1.2 mrg
2443 1.1 mrg /* __do_clear_bss is only necessary if there is anything in .bss section. */
2444 1.1 mrg
2445 1.1 mrg #ifdef L_clear_bss
2446 1.1 mrg .section .init4,"ax",@progbits
2447 1.1 mrg DEFUN __do_clear_bss
2448 1.1 mrg ldi r18, hi8(__bss_end)
2449 1.1 mrg ldi r26, lo8(__bss_start)
2450 1.1.1.2 mrg ldi r27, hi8(__bss_start)
2451 1.1 mrg rjmp .do_clear_bss_start
2452 1.1 mrg .do_clear_bss_loop:
2453 1.1 mrg st X+, __zero_reg__
2454 1.1 mrg .do_clear_bss_start:
2455 1.1 mrg cpi r26, lo8(__bss_end)
2456 1.1 mrg cpc r27, r18
2457 1.1 mrg brne .do_clear_bss_loop
2458 1.1.1.2 mrg ENDF __do_clear_bss
2459 1.1.1.2 mrg #endif /* L_clear_bss */
2460 1.1.1.2 mrg
2461 1.1.1.2 mrg /* __do_global_ctors and __do_global_dtors are only necessary
2462 1.1.1.2 mrg if there are any constructors/destructors. */
2463 1.1.1.2 mrg
2464 1.1 mrg #if defined(__AVR_TINY__)
2465 1.1 mrg #define cdtors_tst_reg r18
2466 1.1 mrg #else
2467 1.1.1.2 mrg #define cdtors_tst_reg r17
2468 1.1.1.2 mrg #endif
2469 1.1.1.2 mrg
2470 1.1.1.2 mrg #ifdef L_ctors
2471 1.1.1.2 mrg .section .init6,"ax",@progbits
2472 1.1.1.2 mrg DEFUN __do_global_ctors
2473 1.1.1.2 mrg ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2474 1.1 mrg ldi r28, pm_lo8(__ctors_end)
2475 1.1.1.2 mrg ldi r29, pm_hi8(__ctors_end)
2476 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2477 1.1.1.2 mrg ldi r16, pm_hh8(__ctors_end)
2478 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2479 1.1.1.2 mrg rjmp .L__do_global_ctors_start
2480 1.1.1.2 mrg .L__do_global_ctors_loop:
2481 1.1.1.2 mrg wsubi 28, 1
2482 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2483 1.1 mrg sbc r16, __zero_reg__
2484 1.1.1.2 mrg mov r24, r16
2485 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2486 1.1.1.2 mrg mov_h r31, r29
2487 1.1.1.2 mrg mov_l r30, r28
2488 1.1.1.2 mrg XCALL __tablejump2__
2489 1.1.1.2 mrg .L__do_global_ctors_start:
2490 1.1.1.2 mrg cpi r28, pm_lo8(__ctors_start)
2491 1.1 mrg cpc r29, cdtors_tst_reg
2492 1.1 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2493 1.1 mrg ldi r24, pm_hh8(__ctors_start)
2494 1.1 mrg cpc r16, r24
2495 1.1 mrg #endif /* HAVE_EIJMP */
2496 1.1 mrg brne .L__do_global_ctors_loop
2497 1.1.1.2 mrg ENDF __do_global_ctors
2498 1.1.1.2 mrg #endif /* L_ctors */
2499 1.1.1.2 mrg
2500 1.1.1.2 mrg #ifdef L_dtors
2501 1.1.1.2 mrg .section .fini6,"ax",@progbits
2502 1.1.1.2 mrg DEFUN __do_global_dtors
2503 1.1.1.2 mrg ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2504 1.1 mrg ldi r28, pm_lo8(__dtors_start)
2505 1.1.1.2 mrg ldi r29, pm_hi8(__dtors_start)
2506 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2507 1.1.1.2 mrg ldi r16, pm_hh8(__dtors_start)
2508 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2509 1.1.1.2 mrg rjmp .L__do_global_dtors_start
2510 1.1.1.2 mrg .L__do_global_dtors_loop:
2511 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2512 1.1.1.2 mrg mov r24, r16
2513 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2514 1.1.1.2 mrg mov_h r31, r29
2515 1.1 mrg mov_l r30, r28
2516 1.1.1.2 mrg XCALL __tablejump2__
2517 1.1.1.2 mrg waddi 28, 1
2518 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2519 1.1.1.2 mrg adc r16, __zero_reg__
2520 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2521 1.1.1.2 mrg .L__do_global_dtors_start:
2522 1.1.1.2 mrg cpi r28, pm_lo8(__dtors_end)
2523 1.1 mrg cpc r29, cdtors_tst_reg
2524 1.1 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2525 1.1 mrg ldi r24, pm_hh8(__dtors_end)
2526 1.1.1.2 mrg cpc r16, r24
2527 1.1 mrg #endif /* HAVE_EIJMP */
2528 1.1.1.2 mrg brne .L__do_global_dtors_loop
2529 1.1 mrg ENDF __do_global_dtors
2530 1.1.1.2 mrg #endif /* L_dtors */
2531 1.1 mrg
2532 1.1 mrg #undef cdtors_tst_reg
2533 1.1 mrg
2534 1.1 mrg .section .text.libgcc, "ax", @progbits
2535 1.1 mrg
2536 1.1 mrg #if !defined (__AVR_TINY__)
2537 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2538 1.1 mrg ;; Loading n bytes from Flash; n = 3,4
2539 1.1 mrg ;; R22... = Flash[Z]
2540 1.1 mrg ;; Clobbers: __tmp_reg__
2541 1.1 mrg
2542 1.1 mrg #if (defined (L_load_3) \
2543 1.1 mrg || defined (L_load_4)) \
2544 1.1 mrg && !defined (__AVR_HAVE_LPMX__)
2545 1.1 mrg
2546 1.1 mrg ;; Destination
2547 1.1 mrg #define D0 22
2548 1.1 mrg #define D1 D0+1
2549 1.1 mrg #define D2 D0+2
2550 1.1 mrg #define D3 D0+3
2551 1.1 mrg
2552 1.1 mrg .macro .load dest, n
2553 1.1 mrg lpm
2554 1.1 mrg mov \dest, r0
2555 1.1 mrg .if \dest != D0+\n-1
2556 1.1 mrg adiw r30, 1
2557 1.1 mrg .else
2558 1.1 mrg sbiw r30, \n-1
2559 1.1 mrg .endif
2560 1.1 mrg .endm
2561 1.1 mrg
2562 1.1 mrg #if defined (L_load_3)
2563 1.1 mrg DEFUN __load_3
2564 1.1 mrg push D3
2565 1.1 mrg XCALL __load_4
2566 1.1 mrg pop D3
2567 1.1 mrg ret
2568 1.1 mrg ENDF __load_3
2569 1.1 mrg #endif /* L_load_3 */
2570 1.1 mrg
2571 1.1 mrg #if defined (L_load_4)
2572 1.1 mrg DEFUN __load_4
2573 1.1 mrg .load D0, 4
2574 1.1 mrg .load D1, 4
2575 1.1 mrg .load D2, 4
2576 1.1.1.2 mrg .load D3, 4
2577 1.1 mrg ret
2578 1.1.1.2 mrg ENDF __load_4
2579 1.1 mrg #endif /* L_load_4 */
2580 1.1 mrg
2581 1.1 mrg #endif /* L_load_3 || L_load_3 */
2582 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2583 1.1 mrg
2584 1.1 mrg #if !defined (__AVR_TINY__)
2585 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2586 1.1 mrg ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2587 1.1 mrg ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2588 1.1 mrg ;; Clobbers: __tmp_reg__, R21, R30, R31
2589 1.1 mrg
2590 1.1 mrg #if (defined (L_xload_1) \
2591 1.1 mrg || defined (L_xload_2) \
2592 1.1 mrg || defined (L_xload_3) \
2593 1.1 mrg || defined (L_xload_4))
2594 1.1 mrg
2595 1.1 mrg ;; Destination
2596 1.1 mrg #define D0 22
2597 1.1 mrg #define D1 D0+1
2598 1.1 mrg #define D2 D0+2
2599 1.1 mrg #define D3 D0+3
2600 1.1 mrg
2601 1.1 mrg ;; Register containing bits 16+ of the address
2602 1.1 mrg
2603 1.1 mrg #define HHI8 21
2604 1.1 mrg
2605 1.1 mrg .macro .xload dest, n
2606 1.1 mrg #if defined (__AVR_HAVE_ELPMX__)
2607 1.1 mrg elpm \dest, Z+
2608 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2609 1.1 mrg elpm
2610 1.1 mrg mov \dest, r0
2611 1.1 mrg .if \dest != D0+\n-1
2612 1.1 mrg adiw r30, 1
2613 1.1 mrg adc HHI8, __zero_reg__
2614 1.1 mrg out __RAMPZ__, HHI8
2615 1.1 mrg .endif
2616 1.1 mrg #elif defined (__AVR_HAVE_LPMX__)
2617 1.1 mrg lpm \dest, Z+
2618 1.1 mrg #else
2619 1.1 mrg lpm
2620 1.1 mrg mov \dest, r0
2621 1.1 mrg .if \dest != D0+\n-1
2622 1.1 mrg adiw r30, 1
2623 1.1 mrg .endif
2624 1.1 mrg #endif
2625 1.1 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2626 1.1 mrg .if \dest == D0+\n-1
2627 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2628 1.1 mrg out __RAMPZ__, __zero_reg__
2629 1.1 mrg .endif
2630 1.1 mrg #endif
2631 1.1 mrg .endm ; .xload
2632 1.1 mrg
2633 1.1 mrg #if defined (L_xload_1)
2634 1.1 mrg DEFUN __xload_1
2635 1.1 mrg #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2636 1.1 mrg sbrc HHI8, 7
2637 1.1 mrg ld D0, Z
2638 1.1 mrg sbrs HHI8, 7
2639 1.1 mrg lpm D0, Z
2640 1.1 mrg ret
2641 1.1 mrg #else
2642 1.1 mrg sbrc HHI8, 7
2643 1.1 mrg rjmp 1f
2644 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2645 1.1 mrg out __RAMPZ__, HHI8
2646 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2647 1.1 mrg .xload D0, 1
2648 1.1 mrg ret
2649 1.1 mrg 1: ld D0, Z
2650 1.1 mrg ret
2651 1.1 mrg #endif /* LPMx && ! ELPM */
2652 1.1 mrg ENDF __xload_1
2653 1.1 mrg #endif /* L_xload_1 */
2654 1.1 mrg
2655 1.1 mrg #if defined (L_xload_2)
2656 1.1 mrg DEFUN __xload_2
2657 1.1 mrg sbrc HHI8, 7
2658 1.1 mrg rjmp 1f
2659 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2660 1.1 mrg out __RAMPZ__, HHI8
2661 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2662 1.1 mrg .xload D0, 2
2663 1.1 mrg .xload D1, 2
2664 1.1 mrg ret
2665 1.1 mrg 1: ld D0, Z+
2666 1.1 mrg ld D1, Z+
2667 1.1 mrg ret
2668 1.1 mrg ENDF __xload_2
2669 1.1 mrg #endif /* L_xload_2 */
2670 1.1 mrg
2671 1.1 mrg #if defined (L_xload_3)
2672 1.1 mrg DEFUN __xload_3
2673 1.1 mrg sbrc HHI8, 7
2674 1.1 mrg rjmp 1f
2675 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2676 1.1 mrg out __RAMPZ__, HHI8
2677 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2678 1.1 mrg .xload D0, 3
2679 1.1 mrg .xload D1, 3
2680 1.1 mrg .xload D2, 3
2681 1.1 mrg ret
2682 1.1 mrg 1: ld D0, Z+
2683 1.1 mrg ld D1, Z+
2684 1.1 mrg ld D2, Z+
2685 1.1 mrg ret
2686 1.1 mrg ENDF __xload_3
2687 1.1 mrg #endif /* L_xload_3 */
2688 1.1 mrg
2689 1.1 mrg #if defined (L_xload_4)
2690 1.1 mrg DEFUN __xload_4
2691 1.1 mrg sbrc HHI8, 7
2692 1.1 mrg rjmp 1f
2693 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2694 1.1 mrg out __RAMPZ__, HHI8
2695 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2696 1.1 mrg .xload D0, 4
2697 1.1 mrg .xload D1, 4
2698 1.1 mrg .xload D2, 4
2699 1.1 mrg .xload D3, 4
2700 1.1 mrg ret
2701 1.1 mrg 1: ld D0, Z+
2702 1.1 mrg ld D1, Z+
2703 1.1 mrg ld D2, Z+
2704 1.1.1.2 mrg ld D3, Z+
2705 1.1 mrg ret
2706 1.1.1.2 mrg ENDF __xload_4
2707 1.1 mrg #endif /* L_xload_4 */
2708 1.1 mrg
2709 1.1 mrg #endif /* L_xload_{1|2|3|4} */
2710 1.1 mrg #endif /* if !defined (__AVR_TINY__) */
2711 1.1 mrg
2712 1.1 mrg #if !defined (__AVR_TINY__)
2713 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2714 1.1 mrg ;; memcopy from Address Space __pgmx to RAM
2715 1.1 mrg ;; R23:Z = Source Address
2716 1.1 mrg ;; X = Destination Address
2717 1.1 mrg ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2718 1.1 mrg
2719 1.1 mrg #if defined (L_movmemx)
2720 1.1 mrg
2721 1.1 mrg #define HHI8 23
2722 1.1 mrg #define LOOP 24
2723 1.1 mrg
2724 1.1 mrg DEFUN __movmemx_qi
2725 1.1 mrg ;; #Bytes to copy fity in 8 Bits (1..255)
2726 1.1 mrg ;; Zero-extend Loop Counter
2727 1.1 mrg clr LOOP+1
2728 1.1 mrg ;; FALLTHRU
2729 1.1 mrg ENDF __movmemx_qi
2730 1.1 mrg
2731 1.1 mrg DEFUN __movmemx_hi
2732 1.1 mrg
2733 1.1 mrg ;; Read from where?
2734 1.1 mrg sbrc HHI8, 7
2735 1.1 mrg rjmp 1f
2736 1.1 mrg
2737 1.1 mrg ;; Read from Flash
2738 1.1 mrg
2739 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2740 1.1 mrg out __RAMPZ__, HHI8
2741 1.1 mrg #endif
2742 1.1 mrg
2743 1.1 mrg 0: ;; Load 1 Byte from Flash...
2744 1.1 mrg
2745 1.1 mrg #if defined (__AVR_HAVE_ELPMX__)
2746 1.1 mrg elpm r0, Z+
2747 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2748 1.1 mrg elpm
2749 1.1 mrg adiw r30, 1
2750 1.1 mrg adc HHI8, __zero_reg__
2751 1.1 mrg out __RAMPZ__, HHI8
2752 1.1 mrg #elif defined (__AVR_HAVE_LPMX__)
2753 1.1 mrg lpm r0, Z+
2754 1.1 mrg #else
2755 1.1 mrg lpm
2756 1.1 mrg adiw r30, 1
2757 1.1 mrg #endif
2758 1.1 mrg
2759 1.1 mrg ;; ...and store that Byte to RAM Destination
2760 1.1 mrg st X+, r0
2761 1.1 mrg sbiw LOOP, 1
2762 1.1 mrg brne 0b
2763 1.1 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2764 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2765 1.1 mrg out __RAMPZ__, __zero_reg__
2766 1.1 mrg #endif /* ELPM && RAMPD */
2767 1.1 mrg ret
2768 1.1 mrg
2769 1.1 mrg ;; Read from RAM
2770 1.1 mrg
2771 1.1 mrg 1: ;; Read 1 Byte from RAM...
2772 1.1 mrg ld r0, Z+
2773 1.1 mrg ;; and store that Byte to RAM Destination
2774 1.1 mrg st X+, r0
2775 1.1 mrg sbiw LOOP, 1
2776 1.1 mrg brne 1b
2777 1.1 mrg ret
2778 1.1.1.2 mrg ENDF __movmemx_hi
2779 1.1 mrg
2780 1.1 mrg #undef HHI8
2781 1.1 mrg #undef LOOP
2782 1.1 mrg
2783 1.1 mrg #endif /* L_movmemx */
2784 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2785 1.1 mrg
2786 1.1 mrg
2787 1.1 mrg .section .text.libgcc.builtins, "ax", @progbits
2789 1.1 mrg
2790 1.1 mrg /**********************************
2791 1.1 mrg * Find first set Bit (ffs)
2792 1.1 mrg **********************************/
2793 1.1 mrg
2794 1.1 mrg #if defined (L_ffssi2)
2795 1.1 mrg ;; find first set bit
2796 1.1 mrg ;; r25:r24 = ffs32 (r25:r22)
2797 1.1 mrg ;; clobbers: r22, r26
2798 1.1 mrg DEFUN __ffssi2
2799 1.1 mrg clr r26
2800 1.1 mrg tst r22
2801 1.1 mrg brne 1f
2802 1.1 mrg subi r26, -8
2803 1.1 mrg or r22, r23
2804 1.1 mrg brne 1f
2805 1.1 mrg subi r26, -8
2806 1.1 mrg or r22, r24
2807 1.1 mrg brne 1f
2808 1.1 mrg subi r26, -8
2809 1.1 mrg or r22, r25
2810 1.1 mrg brne 1f
2811 1.1 mrg ret
2812 1.1 mrg 1: mov r24, r22
2813 1.1 mrg XJMP __loop_ffsqi2
2814 1.1 mrg ENDF __ffssi2
2815 1.1 mrg #endif /* defined (L_ffssi2) */
2816 1.1 mrg
2817 1.1 mrg #if defined (L_ffshi2)
2818 1.1 mrg ;; find first set bit
2819 1.1 mrg ;; r25:r24 = ffs16 (r25:r24)
2820 1.1 mrg ;; clobbers: r26
2821 1.1 mrg DEFUN __ffshi2
2822 1.1 mrg clr r26
2823 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2824 1.1 mrg ;; Some cores have problem skipping 2-word instruction
2825 1.1 mrg tst r24
2826 1.1 mrg breq 2f
2827 1.1 mrg #else
2828 1.1 mrg cpse r24, __zero_reg__
2829 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
2830 1.1 mrg 1: XJMP __loop_ffsqi2
2831 1.1 mrg 2: ldi r26, 8
2832 1.1 mrg or r24, r25
2833 1.1 mrg brne 1b
2834 1.1 mrg ret
2835 1.1 mrg ENDF __ffshi2
2836 1.1 mrg #endif /* defined (L_ffshi2) */
2837 1.1 mrg
2838 1.1 mrg #if defined (L_loop_ffsqi2)
2839 1.1 mrg ;; Helper for ffshi2, ffssi2
2840 1.1 mrg ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2841 1.1 mrg ;; r24 must be != 0
2842 1.1 mrg ;; clobbers: r26
2843 1.1 mrg DEFUN __loop_ffsqi2
2844 1.1 mrg inc r26
2845 1.1 mrg lsr r24
2846 1.1 mrg brcc __loop_ffsqi2
2847 1.1 mrg mov r24, r26
2848 1.1 mrg clr r25
2849 1.1 mrg ret
2850 1.1 mrg ENDF __loop_ffsqi2
2851 1.1 mrg #endif /* defined (L_loop_ffsqi2) */
2852 1.1 mrg
2853 1.1 mrg
2854 1.1 mrg /**********************************
2856 1.1 mrg * Count trailing Zeros (ctz)
2857 1.1 mrg **********************************/
2858 1.1 mrg
2859 1.1 mrg #if defined (L_ctzsi2)
2860 1.1 mrg ;; count trailing zeros
2861 1.1 mrg ;; r25:r24 = ctz32 (r25:r22)
2862 1.1 mrg ;; clobbers: r26, r22
2863 1.1 mrg ;; ctz(0) = 255
2864 1.1 mrg ;; Note that ctz(0) in undefined for GCC
2865 1.1 mrg DEFUN __ctzsi2
2866 1.1 mrg XCALL __ffssi2
2867 1.1 mrg dec r24
2868 1.1 mrg ret
2869 1.1 mrg ENDF __ctzsi2
2870 1.1 mrg #endif /* defined (L_ctzsi2) */
2871 1.1 mrg
2872 1.1 mrg #if defined (L_ctzhi2)
2873 1.1 mrg ;; count trailing zeros
2874 1.1 mrg ;; r25:r24 = ctz16 (r25:r24)
2875 1.1 mrg ;; clobbers: r26
2876 1.1 mrg ;; ctz(0) = 255
2877 1.1 mrg ;; Note that ctz(0) in undefined for GCC
2878 1.1 mrg DEFUN __ctzhi2
2879 1.1 mrg XCALL __ffshi2
2880 1.1 mrg dec r24
2881 1.1 mrg ret
2882 1.1 mrg ENDF __ctzhi2
2883 1.1 mrg #endif /* defined (L_ctzhi2) */
2884 1.1 mrg
2885 1.1 mrg
2886 1.1 mrg /**********************************
2888 1.1 mrg * Count leading Zeros (clz)
2889 1.1 mrg **********************************/
2890 1.1 mrg
2891 1.1 mrg #if defined (L_clzdi2)
2892 1.1 mrg ;; count leading zeros
2893 1.1 mrg ;; r25:r24 = clz64 (r25:r18)
2894 1.1 mrg ;; clobbers: r22, r23, r26
2895 1.1 mrg DEFUN __clzdi2
2896 1.1 mrg XCALL __clzsi2
2897 1.1 mrg sbrs r24, 5
2898 1.1 mrg ret
2899 1.1 mrg mov_l r22, r18
2900 1.1 mrg mov_h r23, r19
2901 1.1 mrg mov_l r24, r20
2902 1.1 mrg mov_h r25, r21
2903 1.1 mrg XCALL __clzsi2
2904 1.1 mrg subi r24, -32
2905 1.1 mrg ret
2906 1.1 mrg ENDF __clzdi2
2907 1.1 mrg #endif /* defined (L_clzdi2) */
2908 1.1 mrg
2909 1.1 mrg #if defined (L_clzsi2)
2910 1.1 mrg ;; count leading zeros
2911 1.1 mrg ;; r25:r24 = clz32 (r25:r22)
2912 1.1 mrg ;; clobbers: r26
2913 1.1 mrg DEFUN __clzsi2
2914 1.1 mrg XCALL __clzhi2
2915 1.1 mrg sbrs r24, 4
2916 1.1 mrg ret
2917 1.1 mrg mov_l r24, r22
2918 1.1 mrg mov_h r25, r23
2919 1.1 mrg XCALL __clzhi2
2920 1.1 mrg subi r24, -16
2921 1.1 mrg ret
2922 1.1 mrg ENDF __clzsi2
2923 1.1 mrg #endif /* defined (L_clzsi2) */
2924 1.1 mrg
2925 1.1 mrg #if defined (L_clzhi2)
2926 1.1 mrg ;; count leading zeros
2927 1.1 mrg ;; r25:r24 = clz16 (r25:r24)
2928 1.1 mrg ;; clobbers: r26
2929 1.1 mrg DEFUN __clzhi2
2930 1.1 mrg clr r26
2931 1.1 mrg tst r25
2932 1.1 mrg brne 1f
2933 1.1 mrg subi r26, -8
2934 1.1 mrg or r25, r24
2935 1.1 mrg brne 1f
2936 1.1 mrg ldi r24, 16
2937 1.1 mrg ret
2938 1.1 mrg 1: cpi r25, 16
2939 1.1 mrg brsh 3f
2940 1.1 mrg subi r26, -3
2941 1.1 mrg swap r25
2942 1.1 mrg 2: inc r26
2943 1.1 mrg 3: lsl r25
2944 1.1 mrg brcc 2b
2945 1.1 mrg mov r24, r26
2946 1.1 mrg clr r25
2947 1.1 mrg ret
2948 1.1 mrg ENDF __clzhi2
2949 1.1 mrg #endif /* defined (L_clzhi2) */
2950 1.1 mrg
2951 1.1 mrg
2952 1.1 mrg /**********************************
2954 1.1 mrg * Parity
2955 1.1 mrg **********************************/
2956 1.1 mrg
2957 1.1 mrg #if defined (L_paritydi2)
2958 1.1 mrg ;; r25:r24 = parity64 (r25:r18)
2959 1.1 mrg ;; clobbers: __tmp_reg__
2960 1.1 mrg DEFUN __paritydi2
2961 1.1 mrg eor r24, r18
2962 1.1 mrg eor r24, r19
2963 1.1 mrg eor r24, r20
2964 1.1 mrg eor r24, r21
2965 1.1 mrg XJMP __paritysi2
2966 1.1 mrg ENDF __paritydi2
2967 1.1 mrg #endif /* defined (L_paritydi2) */
2968 1.1 mrg
2969 1.1 mrg #if defined (L_paritysi2)
2970 1.1 mrg ;; r25:r24 = parity32 (r25:r22)
2971 1.1 mrg ;; clobbers: __tmp_reg__
2972 1.1 mrg DEFUN __paritysi2
2973 1.1 mrg eor r24, r22
2974 1.1 mrg eor r24, r23
2975 1.1 mrg XJMP __parityhi2
2976 1.1 mrg ENDF __paritysi2
2977 1.1 mrg #endif /* defined (L_paritysi2) */
2978 1.1 mrg
2979 1.1 mrg #if defined (L_parityhi2)
2980 1.1 mrg ;; r25:r24 = parity16 (r25:r24)
2981 1.1 mrg ;; clobbers: __tmp_reg__
2982 1.1 mrg DEFUN __parityhi2
2983 1.1 mrg eor r24, r25
2984 1.1 mrg ;; FALLTHRU
2985 1.1 mrg ENDF __parityhi2
2986 1.1 mrg
2987 1.1 mrg ;; r25:r24 = parity8 (r24)
2988 1.1 mrg ;; clobbers: __tmp_reg__
2989 1.1 mrg DEFUN __parityqi2
2990 1.1 mrg ;; parity is in r24[0..7]
2991 1.1 mrg mov __tmp_reg__, r24
2992 1.1 mrg swap __tmp_reg__
2993 1.1 mrg eor r24, __tmp_reg__
2994 1.1 mrg ;; parity is in r24[0..3]
2995 1.1 mrg subi r24, -4
2996 1.1 mrg andi r24, -5
2997 1.1 mrg subi r24, -6
2998 1.1 mrg ;; parity is in r24[0,3]
2999 1.1 mrg sbrc r24, 3
3000 1.1 mrg inc r24
3001 1.1 mrg ;; parity is in r24[0]
3002 1.1 mrg andi r24, 1
3003 1.1 mrg clr r25
3004 1.1 mrg ret
3005 1.1 mrg ENDF __parityqi2
3006 1.1 mrg #endif /* defined (L_parityhi2) */
3007 1.1 mrg
3008 1.1 mrg
3009 1.1 mrg /**********************************
3011 1.1 mrg * Population Count
3012 1.1 mrg **********************************/
3013 1.1 mrg
3014 1.1 mrg #if defined (L_popcounthi2)
3015 1.1 mrg ;; population count
3016 1.1 mrg ;; r25:r24 = popcount16 (r25:r24)
3017 1.1 mrg ;; clobbers: __tmp_reg__
3018 1.1 mrg DEFUN __popcounthi2
3019 1.1 mrg XCALL __popcountqi2
3020 1.1 mrg push r24
3021 1.1 mrg mov r24, r25
3022 1.1 mrg XCALL __popcountqi2
3023 1.1 mrg clr r25
3024 1.1 mrg ;; FALLTHRU
3025 1.1 mrg ENDF __popcounthi2
3026 1.1 mrg
3027 1.1 mrg DEFUN __popcounthi2_tail
3028 1.1 mrg pop __tmp_reg__
3029 1.1 mrg add r24, __tmp_reg__
3030 1.1 mrg ret
3031 1.1 mrg ENDF __popcounthi2_tail
3032 1.1 mrg #endif /* defined (L_popcounthi2) */
3033 1.1 mrg
3034 1.1 mrg #if defined (L_popcountsi2)
3035 1.1 mrg ;; population count
3036 1.1 mrg ;; r25:r24 = popcount32 (r25:r22)
3037 1.1 mrg ;; clobbers: __tmp_reg__
3038 1.1 mrg DEFUN __popcountsi2
3039 1.1 mrg XCALL __popcounthi2
3040 1.1 mrg push r24
3041 1.1 mrg mov_l r24, r22
3042 1.1 mrg mov_h r25, r23
3043 1.1 mrg XCALL __popcounthi2
3044 1.1 mrg XJMP __popcounthi2_tail
3045 1.1 mrg ENDF __popcountsi2
3046 1.1 mrg #endif /* defined (L_popcountsi2) */
3047 1.1 mrg
3048 1.1 mrg #if defined (L_popcountdi2)
3049 1.1 mrg ;; population count
3050 1.1 mrg ;; r25:r24 = popcount64 (r25:r18)
3051 1.1 mrg ;; clobbers: r22, r23, __tmp_reg__
3052 1.1 mrg DEFUN __popcountdi2
3053 1.1 mrg XCALL __popcountsi2
3054 1.1 mrg push r24
3055 1.1 mrg mov_l r22, r18
3056 1.1 mrg mov_h r23, r19
3057 1.1 mrg mov_l r24, r20
3058 1.1 mrg mov_h r25, r21
3059 1.1 mrg XCALL __popcountsi2
3060 1.1 mrg XJMP __popcounthi2_tail
3061 1.1 mrg ENDF __popcountdi2
3062 1.1 mrg #endif /* defined (L_popcountdi2) */
3063 1.1 mrg
3064 1.1 mrg #if defined (L_popcountqi2)
3065 1.1 mrg ;; population count
3066 1.1 mrg ;; r24 = popcount8 (r24)
3067 1.1 mrg ;; clobbers: __tmp_reg__
3068 1.1 mrg DEFUN __popcountqi2
3069 1.1 mrg mov __tmp_reg__, r24
3070 1.1 mrg andi r24, 1
3071 1.1 mrg lsr __tmp_reg__
3072 1.1 mrg lsr __tmp_reg__
3073 1.1 mrg adc r24, __zero_reg__
3074 1.1 mrg lsr __tmp_reg__
3075 1.1 mrg adc r24, __zero_reg__
3076 1.1 mrg lsr __tmp_reg__
3077 1.1 mrg adc r24, __zero_reg__
3078 1.1 mrg lsr __tmp_reg__
3079 1.1 mrg adc r24, __zero_reg__
3080 1.1 mrg lsr __tmp_reg__
3081 1.1 mrg adc r24, __zero_reg__
3082 1.1 mrg lsr __tmp_reg__
3083 1.1 mrg adc r24, __tmp_reg__
3084 1.1 mrg ret
3085 1.1 mrg ENDF __popcountqi2
3086 1.1 mrg #endif /* defined (L_popcountqi2) */
3087 1.1 mrg
3088 1.1 mrg
3089 1.1 mrg /**********************************
3091 1.1 mrg * Swap bytes
3092 1.1 mrg **********************************/
3093 1.1 mrg
3094 1.1 mrg ;; swap two registers with different register number
3095 1.1 mrg .macro bswap a, b
3096 1.1 mrg eor \a, \b
3097 1.1 mrg eor \b, \a
3098 1.1 mrg eor \a, \b
3099 1.1 mrg .endm
3100 1.1 mrg
3101 1.1 mrg #if defined (L_bswapsi2)
3102 1.1 mrg ;; swap bytes
3103 1.1 mrg ;; r25:r22 = bswap32 (r25:r22)
3104 1.1 mrg DEFUN __bswapsi2
3105 1.1 mrg bswap r22, r25
3106 1.1 mrg bswap r23, r24
3107 1.1 mrg ret
3108 1.1 mrg ENDF __bswapsi2
3109 1.1 mrg #endif /* defined (L_bswapsi2) */
3110 1.1 mrg
3111 1.1 mrg #if defined (L_bswapdi2)
3112 1.1 mrg ;; swap bytes
3113 1.1 mrg ;; r25:r18 = bswap64 (r25:r18)
3114 1.1 mrg DEFUN __bswapdi2
3115 1.1 mrg bswap r18, r25
3116 1.1 mrg bswap r19, r24
3117 1.1 mrg bswap r20, r23
3118 1.1 mrg bswap r21, r22
3119 1.1 mrg ret
3120 1.1 mrg ENDF __bswapdi2
3121 1.1 mrg #endif /* defined (L_bswapdi2) */
3122 1.1 mrg
3123 1.1 mrg
3124 1.1 mrg /**********************************
3126 1.1 mrg * 64-bit shifts
3127 1.1 mrg **********************************/
3128 1.1 mrg
3129 1.1 mrg #if defined (L_ashrdi3)
3130 1.1 mrg ;; Arithmetic shift right
3131 1.1 mrg ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3132 1.1 mrg DEFUN __ashrdi3
3133 1.1 mrg bst r25, 7
3134 1.1 mrg bld __zero_reg__, 0
3135 1.1 mrg ;; FALLTHRU
3136 1.1 mrg ENDF __ashrdi3
3137 1.1 mrg
3138 1.1 mrg ;; Logic shift right
3139 1.1 mrg ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3140 1.1 mrg DEFUN __lshrdi3
3141 1.1 mrg lsr __zero_reg__
3142 1.1 mrg sbc __tmp_reg__, __tmp_reg__
3143 1.1 mrg push r16
3144 1.1 mrg 0: cpi r16, 8
3145 1.1 mrg brlo 2f
3146 1.1 mrg subi r16, 8
3147 1.1 mrg mov r18, r19
3148 1.1 mrg mov r19, r20
3149 1.1 mrg mov r20, r21
3150 1.1 mrg mov r21, r22
3151 1.1 mrg mov r22, r23
3152 1.1 mrg mov r23, r24
3153 1.1 mrg mov r24, r25
3154 1.1 mrg mov r25, __tmp_reg__
3155 1.1 mrg rjmp 0b
3156 1.1 mrg 1: asr __tmp_reg__
3157 1.1 mrg ror r25
3158 1.1 mrg ror r24
3159 1.1 mrg ror r23
3160 1.1 mrg ror r22
3161 1.1 mrg ror r21
3162 1.1 mrg ror r20
3163 1.1 mrg ror r19
3164 1.1 mrg ror r18
3165 1.1 mrg 2: dec r16
3166 1.1 mrg brpl 1b
3167 1.1 mrg pop r16
3168 1.1 mrg ret
3169 1.1 mrg ENDF __lshrdi3
3170 1.1 mrg #endif /* defined (L_ashrdi3) */
3171 1.1 mrg
3172 1.1 mrg #if defined (L_ashldi3)
3173 1.1 mrg ;; Shift left
3174 1.1 mrg ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3175 1.1 mrg DEFUN __ashldi3
3176 1.1 mrg push r16
3177 1.1 mrg 0: cpi r16, 8
3178 1.1 mrg brlo 2f
3179 1.1 mrg mov r25, r24
3180 1.1 mrg mov r24, r23
3181 1.1 mrg mov r23, r22
3182 1.1 mrg mov r22, r21
3183 1.1 mrg mov r21, r20
3184 1.1 mrg mov r20, r19
3185 1.1 mrg mov r19, r18
3186 1.1 mrg clr r18
3187 1.1 mrg subi r16, 8
3188 1.1 mrg rjmp 0b
3189 1.1 mrg 1: lsl r18
3190 1.1 mrg rol r19
3191 1.1 mrg rol r20
3192 1.1 mrg rol r21
3193 1.1 mrg rol r22
3194 1.1 mrg rol r23
3195 1.1 mrg rol r24
3196 1.1 mrg rol r25
3197 1.1 mrg 2: dec r16
3198 1.1 mrg brpl 1b
3199 1.1 mrg pop r16
3200 1.1 mrg ret
3201 1.1 mrg ENDF __ashldi3
3202 1.1 mrg #endif /* defined (L_ashldi3) */
3203 1.1 mrg
3204 1.1 mrg #if defined (L_rotldi3)
3205 1.1 mrg ;; Shift left
3206 1.1 mrg ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3207 1.1 mrg DEFUN __rotldi3
3208 1.1 mrg push r16
3209 1.1 mrg 0: cpi r16, 8
3210 1.1 mrg brlo 2f
3211 1.1 mrg subi r16, 8
3212 1.1 mrg mov __tmp_reg__, r25
3213 1.1 mrg mov r25, r24
3214 1.1 mrg mov r24, r23
3215 1.1 mrg mov r23, r22
3216 1.1 mrg mov r22, r21
3217 1.1 mrg mov r21, r20
3218 1.1 mrg mov r20, r19
3219 1.1 mrg mov r19, r18
3220 1.1 mrg mov r18, __tmp_reg__
3221 1.1 mrg rjmp 0b
3222 1.1 mrg 1: lsl r18
3223 1.1 mrg rol r19
3224 1.1 mrg rol r20
3225 1.1 mrg rol r21
3226 1.1 mrg rol r22
3227 1.1 mrg rol r23
3228 1.1 mrg rol r24
3229 1.1 mrg rol r25
3230 1.1 mrg adc r18, __zero_reg__
3231 1.1 mrg 2: dec r16
3232 1.1 mrg brpl 1b
3233 1.1 mrg pop r16
3234 1.1 mrg ret
3235 1.1 mrg ENDF __rotldi3
3236 1.1 mrg #endif /* defined (L_rotldi3) */
3237 1.1 mrg
3238 1.1 mrg
3239 1.1 mrg .section .text.libgcc.fmul, "ax", @progbits
3241 1.1 mrg
3242 1.1 mrg /***********************************************************/
3243 1.1 mrg ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3244 1.1 mrg ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3245 1.1 mrg /***********************************************************/
3246 1.1 mrg
3247 1.1 mrg #define A1 24
3248 1.1 mrg #define B1 25
3249 1.1 mrg #define C0 22
3250 1.1 mrg #define C1 23
3251 1.1 mrg #define A0 __tmp_reg__
3252 1.1 mrg
3253 1.1 mrg #ifdef L_fmuls
3254 1.1 mrg ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3255 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3256 1.1 mrg DEFUN __fmuls
3257 1.1 mrg ;; A0.7 = negate result?
3258 1.1 mrg mov A0, A1
3259 1.1 mrg eor A0, B1
3260 1.1 mrg ;; B1 = |B1|
3261 1.1 mrg sbrc B1, 7
3262 1.1 mrg neg B1
3263 1.1 mrg XJMP __fmulsu_exit
3264 1.1 mrg ENDF __fmuls
3265 1.1 mrg #endif /* L_fmuls */
3266 1.1 mrg
3267 1.1 mrg #ifdef L_fmulsu
3268 1.1 mrg ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3269 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3270 1.1 mrg DEFUN __fmulsu
3271 1.1 mrg ;; A0.7 = negate result?
3272 1.1 mrg mov A0, A1
3273 1.1 mrg ;; FALLTHRU
3274 1.1 mrg ENDF __fmulsu
3275 1.1 mrg
3276 1.1 mrg ;; Helper for __fmuls and __fmulsu
3277 1.1 mrg DEFUN __fmulsu_exit
3278 1.1 mrg ;; A1 = |A1|
3279 1.1 mrg sbrc A1, 7
3280 1.1 mrg neg A1
3281 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3282 1.1 mrg ;; Some cores have problem skipping 2-word instruction
3283 1.1 mrg tst A0
3284 1.1 mrg brmi 1f
3285 1.1 mrg #else
3286 1.1 mrg sbrs A0, 7
3287 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
3288 1.1 mrg XJMP __fmul
3289 1.1 mrg 1: XCALL __fmul
3290 1.1 mrg ;; C = -C iff A0.7 = 1
3291 1.1 mrg NEG2 C0
3292 1.1 mrg ret
3293 1.1 mrg ENDF __fmulsu_exit
3294 1.1 mrg #endif /* L_fmulsu */
3295 1.1 mrg
3296 1.1 mrg
3297 1.1 mrg #ifdef L_fmul
3298 1.1 mrg ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3299 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3300 1.1 mrg DEFUN __fmul
3301 1.1 mrg ; clear result
3302 1.1 mrg clr C0
3303 1.1 mrg clr C1
3304 1.1 mrg clr A0
3305 1.1 mrg 1: tst B1
3306 1.1 mrg ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3307 1.1 mrg 2: brpl 3f
3308 1.1 mrg ;; C += A
3309 1.1 mrg add C0, A0
3310 1.1 mrg adc C1, A1
3311 1.1 mrg 3: ;; A >>= 1
3312 1.1 mrg lsr A1
3313 1.1 mrg ror A0
3314 ;; B <<= 1
3315 lsl B1
3316 brne 2b
3317 ret
3318 ENDF __fmul
3319 #endif /* L_fmul */
3320
3321 #undef A0
3322 #undef A1
3323 #undef B1
3324 #undef C0
3325 #undef C1
3326
3327 #include "lib1funcs-fixed.S"
3328