lib1funcs.S revision 1.1.1.4 1 1.1 mrg /* -*- Mode: Asm -*- */
2 1.1.1.4 mrg /* Copyright (C) 1998-2017 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Denis Chertykov <chertykov (at) gmail.com>
4 1.1 mrg
5 1.1 mrg This file is free software; you can redistribute it and/or modify it
6 1.1 mrg under the terms of the GNU General Public License as published by the
7 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
8 1.1 mrg later version.
9 1.1 mrg
10 1.1 mrg This file is distributed in the hope that it will be useful, but
11 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 1.1 mrg General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1.1.2 mrg #if defined (__AVR_TINY__)
25 1.1.1.2 mrg #define __zero_reg__ r17
26 1.1.1.2 mrg #define __tmp_reg__ r16
27 1.1.1.2 mrg #else
28 1.1 mrg #define __zero_reg__ r1
29 1.1 mrg #define __tmp_reg__ r0
30 1.1.1.2 mrg #endif
31 1.1 mrg #define __SREG__ 0x3f
32 1.1 mrg #if defined (__AVR_HAVE_SPH__)
33 1.1 mrg #define __SP_H__ 0x3e
34 1.1 mrg #endif
35 1.1 mrg #define __SP_L__ 0x3d
36 1.1 mrg #define __RAMPZ__ 0x3B
37 1.1 mrg #define __EIND__ 0x3C
38 1.1 mrg
39 1.1 mrg /* Most of the functions here are called directly from avr.md
40 1.1 mrg patterns, instead of using the standard libcall mechanisms.
41 1.1 mrg This can make better code because GCC knows exactly which
42 1.1 mrg of the call-used registers (not all of them) are clobbered. */
43 1.1 mrg
44 1.1 mrg /* FIXME: At present, there is no SORT directive in the linker
45 1.1 mrg script so that we must not assume that different modules
46 1.1 mrg in the same input section like .libgcc.text.mul will be
47 1.1 mrg located close together. Therefore, we cannot use
48 1.1 mrg RCALL/RJMP to call a function like __udivmodhi4 from
49 1.1 mrg __divmodhi4 and have to use lengthy XCALL/XJMP even
50 1.1 mrg though they are in the same input section and all same
51 1.1 mrg input sections together are small enough to reach every
52 1.1 mrg location with a RCALL/RJMP instruction. */
53 1.1 mrg
54 1.1.1.2 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 1.1.1.2 mrg #error device not supported
56 1.1.1.2 mrg #endif
57 1.1.1.2 mrg
58 1.1 mrg .macro mov_l r_dest, r_src
59 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
60 1.1 mrg movw \r_dest, \r_src
61 1.1 mrg #else
62 1.1 mrg mov \r_dest, \r_src
63 1.1 mrg #endif
64 1.1 mrg .endm
65 1.1 mrg
66 1.1 mrg .macro mov_h r_dest, r_src
67 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
68 1.1 mrg ; empty
69 1.1 mrg #else
70 1.1 mrg mov \r_dest, \r_src
71 1.1 mrg #endif
72 1.1 mrg .endm
73 1.1 mrg
74 1.1 mrg .macro wmov r_dest, r_src
75 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
76 1.1 mrg movw \r_dest, \r_src
77 1.1 mrg #else
78 1.1 mrg mov \r_dest, \r_src
79 1.1 mrg mov \r_dest+1, \r_src+1
80 1.1 mrg #endif
81 1.1 mrg .endm
82 1.1 mrg
83 1.1 mrg #if defined (__AVR_HAVE_JMP_CALL__)
84 1.1 mrg #define XCALL call
85 1.1 mrg #define XJMP jmp
86 1.1 mrg #else
87 1.1 mrg #define XCALL rcall
88 1.1 mrg #define XJMP rjmp
89 1.1 mrg #endif
90 1.1 mrg
91 1.1.1.2 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
92 1.1.1.2 mrg #define XICALL eicall
93 1.1.1.2 mrg #define XIJMP eijmp
94 1.1.1.2 mrg #else
95 1.1.1.2 mrg #define XICALL icall
96 1.1.1.2 mrg #define XIJMP ijmp
97 1.1.1.2 mrg #endif
98 1.1.1.2 mrg
99 1.1 mrg ;; Prologue stuff
100 1.1 mrg
101 1.1 mrg .macro do_prologue_saves n_pushed n_frame=0
102 1.1 mrg ldi r26, lo8(\n_frame)
103 1.1 mrg ldi r27, hi8(\n_frame)
104 1.1 mrg ldi r30, lo8(gs(.L_prologue_saves.\@))
105 1.1 mrg ldi r31, hi8(gs(.L_prologue_saves.\@))
106 1.1 mrg XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 1.1 mrg .L_prologue_saves.\@:
108 1.1 mrg .endm
109 1.1 mrg
110 1.1 mrg ;; Epilogue stuff
111 1.1 mrg
112 1.1 mrg .macro do_epilogue_restores n_pushed n_frame=0
113 1.1 mrg in r28, __SP_L__
114 1.1 mrg #ifdef __AVR_HAVE_SPH__
115 1.1 mrg in r29, __SP_H__
116 1.1 mrg .if \n_frame > 63
117 1.1 mrg subi r28, lo8(-\n_frame)
118 1.1 mrg sbci r29, hi8(-\n_frame)
119 1.1 mrg .elseif \n_frame > 0
120 1.1 mrg adiw r28, \n_frame
121 1.1 mrg .endif
122 1.1 mrg #else
123 1.1 mrg clr r29
124 1.1 mrg .if \n_frame > 0
125 1.1 mrg subi r28, lo8(-\n_frame)
126 1.1 mrg .endif
127 1.1 mrg #endif /* HAVE SPH */
128 1.1 mrg ldi r30, \n_pushed
129 1.1 mrg XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130 1.1 mrg .endm
131 1.1 mrg
132 1.1 mrg ;; Support function entry and exit for convenience
133 1.1 mrg
134 1.1.1.2 mrg .macro wsubi r_arg1, i_arg2
135 1.1.1.2 mrg #if defined (__AVR_TINY__)
136 1.1.1.2 mrg subi \r_arg1, lo8(\i_arg2)
137 1.1.1.2 mrg sbci \r_arg1+1, hi8(\i_arg2)
138 1.1.1.2 mrg #else
139 1.1.1.2 mrg sbiw \r_arg1, \i_arg2
140 1.1.1.2 mrg #endif
141 1.1.1.2 mrg .endm
142 1.1.1.2 mrg
143 1.1.1.2 mrg .macro waddi r_arg1, i_arg2
144 1.1.1.2 mrg #if defined (__AVR_TINY__)
145 1.1.1.2 mrg subi \r_arg1, lo8(-\i_arg2)
146 1.1.1.2 mrg sbci \r_arg1+1, hi8(-\i_arg2)
147 1.1.1.2 mrg #else
148 1.1.1.2 mrg adiw \r_arg1, \i_arg2
149 1.1.1.2 mrg #endif
150 1.1.1.2 mrg .endm
151 1.1.1.2 mrg
152 1.1 mrg .macro DEFUN name
153 1.1 mrg .global \name
154 1.1 mrg .func \name
155 1.1 mrg \name:
156 1.1 mrg .endm
157 1.1 mrg
158 1.1 mrg .macro ENDF name
159 1.1 mrg .size \name, .-\name
160 1.1 mrg .endfunc
161 1.1 mrg .endm
162 1.1 mrg
163 1.1 mrg .macro FALIAS name
164 1.1 mrg .global \name
165 1.1 mrg .func \name
166 1.1 mrg \name:
167 1.1 mrg .size \name, .-\name
168 1.1 mrg .endfunc
169 1.1 mrg .endm
170 1.1 mrg
171 1.1 mrg ;; Skip next instruction, typically a jump target
172 1.1.1.2 mrg #if defined(__AVR_TINY__)
173 1.1 mrg #define skip cpse 0,0
174 1.1.1.2 mrg #else
175 1.1.1.2 mrg #define skip cpse 16,16
176 1.1.1.2 mrg #endif
177 1.1 mrg
178 1.1 mrg ;; Negate a 2-byte value held in consecutive registers
179 1.1 mrg .macro NEG2 reg
180 1.1 mrg com \reg+1
181 1.1 mrg neg \reg
182 1.1 mrg sbci \reg+1, -1
183 1.1 mrg .endm
184 1.1 mrg
185 1.1 mrg ;; Negate a 4-byte value held in consecutive registers
186 1.1 mrg ;; Sets the V flag for signed overflow tests if REG >= 16
187 1.1 mrg .macro NEG4 reg
188 1.1 mrg com \reg+3
189 1.1 mrg com \reg+2
190 1.1 mrg com \reg+1
191 1.1 mrg .if \reg >= 16
192 1.1 mrg neg \reg
193 1.1 mrg sbci \reg+1, -1
194 1.1 mrg sbci \reg+2, -1
195 1.1 mrg sbci \reg+3, -1
196 1.1 mrg .else
197 1.1 mrg com \reg
198 1.1 mrg adc \reg, __zero_reg__
199 1.1 mrg adc \reg+1, __zero_reg__
200 1.1 mrg adc \reg+2, __zero_reg__
201 1.1 mrg adc \reg+3, __zero_reg__
202 1.1 mrg .endif
203 1.1 mrg .endm
204 1.1 mrg
205 1.1 mrg #define exp_lo(N) hlo8 ((N) << 23)
206 1.1 mrg #define exp_hi(N) hhi8 ((N) << 23)
207 1.1 mrg
208 1.1 mrg
209 1.1 mrg .section .text.libgcc.mul, "ax", @progbits
211 1.1 mrg
212 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
213 1.1 mrg /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
214 1.1 mrg #if !defined (__AVR_HAVE_MUL__)
215 1.1 mrg /*******************************************************
216 1.1 mrg Multiplication 8 x 8 without MUL
217 1.1 mrg *******************************************************/
218 1.1 mrg #if defined (L_mulqi3)
219 1.1 mrg
220 1.1 mrg #define r_arg2 r22 /* multiplicand */
221 1.1 mrg #define r_arg1 r24 /* multiplier */
222 1.1 mrg #define r_res __tmp_reg__ /* result */
223 1.1 mrg
224 1.1 mrg DEFUN __mulqi3
225 1.1 mrg clr r_res ; clear result
226 1.1 mrg __mulqi3_loop:
227 1.1 mrg sbrc r_arg1,0
228 1.1 mrg add r_res,r_arg2
229 1.1 mrg add r_arg2,r_arg2 ; shift multiplicand
230 1.1 mrg breq __mulqi3_exit ; while multiplicand != 0
231 1.1 mrg lsr r_arg1 ;
232 1.1 mrg brne __mulqi3_loop ; exit if multiplier = 0
233 1.1 mrg __mulqi3_exit:
234 1.1 mrg mov r_arg1,r_res ; result to return register
235 1.1 mrg ret
236 1.1 mrg ENDF __mulqi3
237 1.1 mrg
238 1.1 mrg #undef r_arg2
239 1.1 mrg #undef r_arg1
240 1.1 mrg #undef r_res
241 1.1 mrg
242 1.1 mrg #endif /* defined (L_mulqi3) */
243 1.1 mrg
244 1.1 mrg
245 1.1 mrg /*******************************************************
246 1.1 mrg Widening Multiplication 16 = 8 x 8 without MUL
247 1.1 mrg Multiplication 16 x 16 without MUL
248 1.1 mrg *******************************************************/
249 1.1.1.2 mrg
250 1.1.1.2 mrg #define A0 22
251 1.1.1.2 mrg #define A1 23
252 1.1.1.2 mrg #define B0 24
253 1.1.1.2 mrg #define BB0 20
254 1.1 mrg #define B1 25
255 1.1.1.2 mrg ;; Output overlaps input, thus expand result in CC0/1
256 1.1.1.2 mrg #define C0 24
257 1.1 mrg #define C1 25
258 1.1.1.2 mrg #define CC0 __tmp_reg__
259 1.1 mrg #define CC1 21
260 1.1 mrg
261 1.1 mrg #if defined (L_umulqihi3)
262 1.1 mrg ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
263 1.1 mrg ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
264 1.1 mrg ;;; Clobbers: __tmp_reg__, R21..R23
265 1.1 mrg DEFUN __umulqihi3
266 1.1 mrg clr A1
267 1.1 mrg clr B1
268 1.1 mrg XJMP __mulhi3
269 1.1 mrg ENDF __umulqihi3
270 1.1 mrg #endif /* L_umulqihi3 */
271 1.1 mrg
272 1.1 mrg #if defined (L_mulqihi3)
273 1.1 mrg ;;; R25:R24 = (signed int) R22 * (signed int) R24
274 1.1 mrg ;;; (C1:C0) = (signed int) A0 * (signed int) B0
275 1.1 mrg ;;; Clobbers: __tmp_reg__, R20..R23
276 1.1 mrg DEFUN __mulqihi3
277 1.1 mrg ;; Sign-extend B0
278 1.1 mrg clr B1
279 1.1 mrg sbrc B0, 7
280 1.1 mrg com B1
281 1.1 mrg ;; The multiplication runs twice as fast if A1 is zero, thus:
282 1.1 mrg ;; Zero-extend A0
283 1.1 mrg clr A1
284 1.1 mrg #ifdef __AVR_HAVE_JMP_CALL__
285 1.1 mrg ;; Store B0 * sign of A
286 1.1 mrg clr BB0
287 1.1 mrg sbrc A0, 7
288 1.1 mrg mov BB0, B0
289 1.1 mrg call __mulhi3
290 1.1 mrg #else /* have no CALL */
291 1.1 mrg ;; Skip sign-extension of A if A >= 0
292 1.1 mrg ;; Same size as with the first alternative but avoids errata skip
293 1.1 mrg ;; and is faster if A >= 0
294 1.1 mrg sbrs A0, 7
295 1.1 mrg rjmp __mulhi3
296 1.1 mrg ;; If A < 0 store B
297 1.1 mrg mov BB0, B0
298 1.1 mrg rcall __mulhi3
299 1.1 mrg #endif /* HAVE_JMP_CALL */
300 1.1 mrg ;; 1-extend A after the multiplication
301 1.1 mrg sub C1, BB0
302 1.1 mrg ret
303 1.1 mrg ENDF __mulqihi3
304 1.1 mrg #endif /* L_mulqihi3 */
305 1.1 mrg
306 1.1 mrg #if defined (L_mulhi3)
307 1.1 mrg ;;; R25:R24 = R23:R22 * R25:R24
308 1.1 mrg ;;; (C1:C0) = (A1:A0) * (B1:B0)
309 1.1 mrg ;;; Clobbers: __tmp_reg__, R21..R23
310 1.1 mrg DEFUN __mulhi3
311 1.1 mrg
312 1.1 mrg ;; Clear result
313 1.1 mrg clr CC0
314 1.1 mrg clr CC1
315 1.1 mrg rjmp 3f
316 1.1 mrg 1:
317 1.1 mrg ;; Bit n of A is 1 --> C += B << n
318 1.1 mrg add CC0, B0
319 1.1 mrg adc CC1, B1
320 1.1 mrg 2:
321 1.1 mrg lsl B0
322 1.1 mrg rol B1
323 1.1 mrg 3:
324 1.1.1.2 mrg ;; If B == 0 we are ready
325 1.1 mrg wsubi B0, 0
326 1.1 mrg breq 9f
327 1.1 mrg
328 1.1 mrg ;; Carry = n-th bit of A
329 1.1 mrg lsr A1
330 1.1 mrg ror A0
331 1.1 mrg ;; If bit n of A is set, then go add B * 2^n to C
332 1.1 mrg brcs 1b
333 1.1 mrg
334 1.1 mrg ;; Carry = 0 --> The ROR above acts like CP A0, 0
335 1.1 mrg ;; Thus, it is sufficient to CPC the high part to test A against 0
336 1.1 mrg cpc A1, __zero_reg__
337 1.1 mrg ;; Only proceed if A != 0
338 1.1 mrg brne 2b
339 1.1 mrg 9:
340 1.1 mrg ;; Move Result into place
341 1.1 mrg mov C0, CC0
342 1.1 mrg mov C1, CC1
343 1.1 mrg ret
344 1.1 mrg ENDF __mulhi3
345 1.1 mrg #endif /* L_mulhi3 */
346 1.1 mrg
347 1.1 mrg #undef A0
348 1.1 mrg #undef A1
349 1.1 mrg #undef B0
350 1.1 mrg #undef BB0
351 1.1 mrg #undef B1
352 1.1 mrg #undef C0
353 1.1 mrg #undef C1
354 1.1 mrg #undef CC0
355 1.1 mrg #undef CC1
356 1.1 mrg
357 1.1 mrg
358 1.1 mrg #define A0 22
360 1.1 mrg #define A1 A0+1
361 1.1 mrg #define A2 A0+2
362 1.1 mrg #define A3 A0+3
363 1.1 mrg
364 1.1 mrg #define B0 18
365 1.1 mrg #define B1 B0+1
366 1.1 mrg #define B2 B0+2
367 1.1 mrg #define B3 B0+3
368 1.1 mrg
369 1.1 mrg #define CC0 26
370 1.1 mrg #define CC1 CC0+1
371 1.1 mrg #define CC2 30
372 1.1 mrg #define CC3 CC2+1
373 1.1 mrg
374 1.1 mrg #define C0 22
375 1.1 mrg #define C1 C0+1
376 1.1 mrg #define C2 C0+2
377 1.1 mrg #define C3 C0+3
378 1.1 mrg
379 1.1 mrg /*******************************************************
380 1.1 mrg Widening Multiplication 32 = 16 x 16 without MUL
381 1.1 mrg *******************************************************/
382 1.1 mrg
383 1.1 mrg #if defined (L_umulhisi3)
384 1.1 mrg DEFUN __umulhisi3
385 1.1 mrg wmov B0, 24
386 1.1 mrg ;; Zero-extend B
387 1.1 mrg clr B2
388 1.1 mrg clr B3
389 1.1 mrg ;; Zero-extend A
390 1.1 mrg wmov A2, B2
391 1.1 mrg XJMP __mulsi3
392 1.1 mrg ENDF __umulhisi3
393 1.1 mrg #endif /* L_umulhisi3 */
394 1.1 mrg
395 1.1 mrg #if defined (L_mulhisi3)
396 1.1 mrg DEFUN __mulhisi3
397 1.1 mrg wmov B0, 24
398 1.1 mrg ;; Sign-extend B
399 1.1 mrg lsl r25
400 1.1 mrg sbc B2, B2
401 1.1 mrg mov B3, B2
402 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
403 1.1 mrg ;; Sign-extend A
404 1.1 mrg clr A2
405 1.1 mrg sbrc A1, 7
406 1.1 mrg com A2
407 1.1 mrg mov A3, A2
408 1.1 mrg XJMP __mulsi3
409 1.1 mrg #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
410 1.1 mrg ;; Zero-extend A and __mulsi3 will run at least twice as fast
411 1.1 mrg ;; compared to a sign-extended A.
412 1.1 mrg clr A2
413 1.1 mrg clr A3
414 1.1 mrg sbrs A1, 7
415 1.1 mrg XJMP __mulsi3
416 1.1 mrg ;; If A < 0 then perform the B * 0xffff.... before the
417 1.1 mrg ;; very multiplication by initializing the high part of the
418 1.1 mrg ;; result CC with -B.
419 1.1 mrg wmov CC2, A2
420 1.1 mrg sub CC2, B0
421 1.1 mrg sbc CC3, B1
422 1.1 mrg XJMP __mulsi3_helper
423 1.1 mrg #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
424 1.1 mrg ENDF __mulhisi3
425 1.1 mrg #endif /* L_mulhisi3 */
426 1.1 mrg
427 1.1 mrg
428 1.1 mrg /*******************************************************
429 1.1 mrg Multiplication 32 x 32 without MUL
430 1.1 mrg *******************************************************/
431 1.1 mrg
432 1.1.1.2 mrg #if defined (L_mulsi3)
433 1.1.1.2 mrg DEFUN __mulsi3
434 1.1.1.2 mrg #if defined (__AVR_TINY__)
435 1.1.1.2 mrg in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
436 1.1.1.2 mrg in r27, __SP_H__
437 1.1.1.2 mrg subi r26, lo8(-3) ; Add 3 to point past return address
438 1.1.1.2 mrg sbci r27, hi8(-3)
439 1.1.1.2 mrg push B0 ; save callee saved regs
440 1.1.1.2 mrg push B1
441 1.1.1.2 mrg ld B0, X+ ; load from caller stack
442 1.1.1.2 mrg ld B1, X+
443 1.1.1.2 mrg ld B2, X+
444 1.1 mrg ld B3, X
445 1.1 mrg #endif
446 1.1 mrg ;; Clear result
447 1.1 mrg clr CC2
448 1.1 mrg clr CC3
449 1.1 mrg ;; FALLTHRU
450 1.1 mrg ENDF __mulsi3
451 1.1 mrg
452 1.1 mrg DEFUN __mulsi3_helper
453 1.1 mrg clr CC0
454 1.1 mrg clr CC1
455 1.1 mrg rjmp 3f
456 1.1 mrg
457 1.1 mrg 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
458 1.1 mrg ;; CC += B
459 1.1 mrg add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
460 1.1 mrg
461 1.1 mrg 2: ;; B <<= 1
462 1.1 mrg lsl B0 $ rol B1 $ rol B2 $ rol B3
463 1.1 mrg
464 1.1 mrg 3: ;; A >>= 1: Carry = n-th bit of A
465 1.1 mrg lsr A3 $ ror A2 $ ror A1 $ ror A0
466 1.1 mrg
467 1.1 mrg brcs 1b
468 1.1 mrg ;; Only continue if A != 0
469 1.1.1.2 mrg sbci A1, 0
470 1.1 mrg brne 2b
471 1.1 mrg wsubi A2, 0
472 1.1 mrg brne 2b
473 1.1 mrg
474 1.1 mrg ;; All bits of A are consumed: Copy result to return register C
475 1.1.1.2 mrg wmov C0, CC0
476 1.1.1.2 mrg wmov C2, CC2
477 1.1.1.2 mrg #if defined (__AVR_TINY__)
478 1.1.1.2 mrg pop B1 ; restore callee saved regs
479 1.1.1.2 mrg pop B0
480 1.1 mrg #endif /* defined (__AVR_TINY__) */
481 1.1 mrg
482 1.1 mrg ret
483 1.1 mrg ENDF __mulsi3_helper
484 1.1 mrg #endif /* L_mulsi3 */
485 1.1 mrg
486 1.1 mrg #undef A0
487 1.1 mrg #undef A1
488 1.1 mrg #undef A2
489 1.1 mrg #undef A3
490 1.1 mrg #undef B0
491 1.1 mrg #undef B1
492 1.1 mrg #undef B2
493 1.1 mrg #undef B3
494 1.1 mrg #undef C0
495 1.1 mrg #undef C1
496 1.1 mrg #undef C2
497 1.1 mrg #undef C3
498 1.1 mrg #undef CC0
499 1.1 mrg #undef CC1
500 1.1 mrg #undef CC2
501 1.1 mrg #undef CC3
502 1.1 mrg
503 1.1 mrg #endif /* !defined (__AVR_HAVE_MUL__) */
504 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 1.1 mrg
506 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
508 1.1 mrg #if defined (__AVR_HAVE_MUL__)
509 1.1 mrg #define A0 26
510 1.1 mrg #define B0 18
511 1.1 mrg #define C0 22
512 1.1 mrg
513 1.1 mrg #define A1 A0+1
514 1.1 mrg
515 1.1 mrg #define B1 B0+1
516 1.1 mrg #define B2 B0+2
517 1.1 mrg #define B3 B0+3
518 1.1 mrg
519 1.1 mrg #define C1 C0+1
520 1.1 mrg #define C2 C0+2
521 1.1 mrg #define C3 C0+3
522 1.1 mrg
523 1.1 mrg /*******************************************************
524 1.1 mrg Widening Multiplication 32 = 16 x 16 with MUL
525 1.1 mrg *******************************************************/
526 1.1 mrg
527 1.1 mrg #if defined (L_mulhisi3)
528 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
529 1.1 mrg ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
530 1.1 mrg ;;; Clobbers: __tmp_reg__
531 1.1 mrg DEFUN __mulhisi3
532 1.1 mrg XCALL __umulhisi3
533 1.1 mrg ;; Sign-extend B
534 1.1 mrg tst B1
535 1.1 mrg brpl 1f
536 1.1 mrg sub C2, A0
537 1.1 mrg sbc C3, A1
538 1.1 mrg 1: ;; Sign-extend A
539 1.1 mrg XJMP __usmulhisi3_tail
540 1.1 mrg ENDF __mulhisi3
541 1.1 mrg #endif /* L_mulhisi3 */
542 1.1 mrg
543 1.1 mrg #if defined (L_usmulhisi3)
544 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
545 1.1 mrg ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
546 1.1 mrg ;;; Clobbers: __tmp_reg__
547 1.1 mrg DEFUN __usmulhisi3
548 1.1 mrg XCALL __umulhisi3
549 1.1 mrg ;; FALLTHRU
550 1.1 mrg ENDF __usmulhisi3
551 1.1 mrg
552 1.1 mrg DEFUN __usmulhisi3_tail
553 1.1 mrg ;; Sign-extend A
554 1.1 mrg sbrs A1, 7
555 1.1 mrg ret
556 1.1 mrg sub C2, B0
557 1.1 mrg sbc C3, B1
558 1.1 mrg ret
559 1.1 mrg ENDF __usmulhisi3_tail
560 1.1 mrg #endif /* L_usmulhisi3 */
561 1.1 mrg
562 1.1 mrg #if defined (L_umulhisi3)
563 1.1 mrg ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
564 1.1 mrg ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
565 1.1 mrg ;;; Clobbers: __tmp_reg__
566 1.1 mrg DEFUN __umulhisi3
567 1.1 mrg mul A0, B0
568 1.1 mrg movw C0, r0
569 1.1 mrg mul A1, B1
570 1.1 mrg movw C2, r0
571 1.1 mrg mul A0, B1
572 1.1 mrg #ifdef __AVR_HAVE_JMP_CALL__
573 1.1 mrg ;; This function is used by many other routines, often multiple times.
574 1.1 mrg ;; Therefore, if the flash size is not too limited, avoid the RCALL
575 1.1 mrg ;; and inverst 6 Bytes to speed things up.
576 1.1 mrg add C1, r0
577 1.1 mrg adc C2, r1
578 1.1 mrg clr __zero_reg__
579 1.1 mrg adc C3, __zero_reg__
580 1.1 mrg #else
581 1.1 mrg rcall 1f
582 1.1 mrg #endif
583 1.1 mrg mul A1, B0
584 1.1 mrg 1: add C1, r0
585 1.1 mrg adc C2, r1
586 1.1 mrg clr __zero_reg__
587 1.1 mrg adc C3, __zero_reg__
588 1.1 mrg ret
589 1.1 mrg ENDF __umulhisi3
590 1.1 mrg #endif /* L_umulhisi3 */
591 1.1 mrg
592 1.1 mrg /*******************************************************
593 1.1 mrg Widening Multiplication 32 = 16 x 32 with MUL
594 1.1 mrg *******************************************************/
595 1.1 mrg
596 1.1 mrg #if defined (L_mulshisi3)
597 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * R21:R18
598 1.1 mrg ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
599 1.1 mrg ;;; Clobbers: __tmp_reg__
600 1.1 mrg DEFUN __mulshisi3
601 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
602 1.1 mrg ;; Some cores have problem skipping 2-word instruction
603 1.1 mrg tst A1
604 1.1 mrg brmi __mulohisi3
605 1.1 mrg #else
606 1.1 mrg sbrs A1, 7
607 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
608 1.1 mrg XJMP __muluhisi3
609 1.1 mrg ;; FALLTHRU
610 1.1 mrg ENDF __mulshisi3
611 1.1 mrg
612 1.1 mrg ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
613 1.1 mrg ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
614 1.1 mrg ;;; Clobbers: __tmp_reg__
615 1.1 mrg DEFUN __mulohisi3
616 1.1 mrg XCALL __muluhisi3
617 1.1 mrg ;; One-extend R27:R26 (A1:A0)
618 1.1 mrg sub C2, B0
619 1.1 mrg sbc C3, B1
620 1.1 mrg ret
621 1.1 mrg ENDF __mulohisi3
622 1.1 mrg #endif /* L_mulshisi3 */
623 1.1 mrg
624 1.1 mrg #if defined (L_muluhisi3)
625 1.1 mrg ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
626 1.1 mrg ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
627 1.1 mrg ;;; Clobbers: __tmp_reg__
628 1.1 mrg DEFUN __muluhisi3
629 1.1 mrg XCALL __umulhisi3
630 1.1 mrg mul A0, B3
631 1.1 mrg add C3, r0
632 1.1 mrg mul A1, B2
633 1.1 mrg add C3, r0
634 1.1 mrg mul A0, B2
635 1.1 mrg add C2, r0
636 1.1 mrg adc C3, r1
637 1.1 mrg clr __zero_reg__
638 1.1 mrg ret
639 1.1 mrg ENDF __muluhisi3
640 1.1 mrg #endif /* L_muluhisi3 */
641 1.1 mrg
642 1.1 mrg /*******************************************************
643 1.1 mrg Multiplication 32 x 32 with MUL
644 1.1 mrg *******************************************************/
645 1.1 mrg
646 1.1 mrg #if defined (L_mulsi3)
647 1.1 mrg ;;; R25:R22 = R25:R22 * R21:R18
648 1.1 mrg ;;; (C3:C0) = C3:C0 * B3:B0
649 1.1 mrg ;;; Clobbers: R26, R27, __tmp_reg__
650 1.1 mrg DEFUN __mulsi3
651 1.1 mrg movw A0, C0
652 1.1 mrg push C2
653 1.1 mrg push C3
654 1.1 mrg XCALL __muluhisi3
655 1.1 mrg pop A1
656 1.1 mrg pop A0
657 1.1 mrg ;; A1:A0 now contains the high word of A
658 1.1 mrg mul A0, B0
659 1.1 mrg add C2, r0
660 1.1 mrg adc C3, r1
661 1.1 mrg mul A0, B1
662 1.1 mrg add C3, r0
663 1.1 mrg mul A1, B0
664 1.1 mrg add C3, r0
665 1.1 mrg clr __zero_reg__
666 1.1 mrg ret
667 1.1 mrg ENDF __mulsi3
668 1.1 mrg #endif /* L_mulsi3 */
669 1.1 mrg
670 1.1 mrg #undef A0
671 1.1 mrg #undef A1
672 1.1 mrg
673 1.1 mrg #undef B0
674 1.1 mrg #undef B1
675 1.1 mrg #undef B2
676 1.1 mrg #undef B3
677 1.1 mrg
678 1.1 mrg #undef C0
679 1.1 mrg #undef C1
680 1.1 mrg #undef C2
681 1.1 mrg #undef C3
682 1.1 mrg
683 1.1 mrg #endif /* __AVR_HAVE_MUL__ */
684 1.1 mrg
685 1.1 mrg /*******************************************************
686 1.1 mrg Multiplication 24 x 24 with MUL
687 1.1 mrg *******************************************************/
688 1.1 mrg
689 1.1 mrg #if defined (L_mulpsi3)
690 1.1 mrg
691 1.1 mrg ;; A[0..2]: In: Multiplicand; Out: Product
692 1.1 mrg #define A0 22
693 1.1 mrg #define A1 A0+1
694 1.1 mrg #define A2 A0+2
695 1.1 mrg
696 1.1 mrg ;; B[0..2]: In: Multiplier
697 1.1 mrg #define B0 18
698 1.1 mrg #define B1 B0+1
699 1.1 mrg #define B2 B0+2
700 1.1 mrg
701 1.1 mrg #if defined (__AVR_HAVE_MUL__)
702 1.1 mrg
703 1.1 mrg ;; C[0..2]: Expand Result
704 1.1 mrg #define C0 22
705 1.1 mrg #define C1 C0+1
706 1.1 mrg #define C2 C0+2
707 1.1 mrg
708 1.1 mrg ;; R24:R22 *= R20:R18
709 1.1 mrg ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
710 1.1 mrg
711 1.1 mrg #define AA0 26
712 1.1 mrg #define AA2 21
713 1.1 mrg
714 1.1 mrg DEFUN __mulpsi3
715 1.1 mrg wmov AA0, A0
716 1.1 mrg mov AA2, A2
717 1.1 mrg XCALL __umulhisi3
718 1.1 mrg mul AA2, B0 $ add C2, r0
719 1.1 mrg mul AA0, B2 $ add C2, r0
720 1.1 mrg clr __zero_reg__
721 1.1 mrg ret
722 1.1 mrg ENDF __mulpsi3
723 1.1 mrg
724 1.1 mrg #undef AA2
725 1.1 mrg #undef AA0
726 1.1 mrg
727 1.1 mrg #undef C2
728 1.1 mrg #undef C1
729 1.1 mrg #undef C0
730 1.1.1.2 mrg
731 1.1.1.2 mrg #else /* !HAVE_MUL */
732 1.1.1.2 mrg ;; C[0..2]: Expand Result
733 1.1 mrg #if defined (__AVR_TINY__)
734 1.1.1.2 mrg #define C0 16
735 1.1 mrg #else
736 1.1 mrg #define C0 0
737 1.1 mrg #endif /* defined (__AVR_TINY__) */
738 1.1 mrg #define C1 C0+1
739 1.1 mrg #define C2 21
740 1.1 mrg
741 1.1 mrg ;; R24:R22 *= R20:R18
742 1.1.1.2 mrg ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
743 1.1.1.2 mrg
744 1.1.1.2 mrg DEFUN __mulpsi3
745 1.1.1.2 mrg #if defined (__AVR_TINY__)
746 1.1.1.2 mrg in r26,__SP_L__
747 1.1.1.2 mrg in r27,__SP_H__
748 1.1.1.2 mrg subi r26, lo8(-3) ; Add 3 to point past return address
749 1.1.1.2 mrg sbci r27, hi8(-3)
750 1.1.1.2 mrg push B0 ; save callee saved regs
751 1.1.1.2 mrg push B1
752 1.1.1.2 mrg ld B0,X+ ; load from caller stack
753 1.1 mrg ld B1,X+
754 1.1 mrg ld B2,X+
755 1.1 mrg #endif /* defined (__AVR_TINY__) */
756 1.1 mrg
757 1.1 mrg ;; C[] = 0
758 1.1 mrg clr __tmp_reg__
759 1.1 mrg clr C2
760 1.1 mrg
761 1.1 mrg 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
762 1.1 mrg LSR B2 $ ror B1 $ ror B0
763 1.1 mrg
764 1.1 mrg ;; If the N-th Bit of B[] was set...
765 1.1 mrg brcc 1f
766 1.1 mrg
767 1.1 mrg ;; ...then add A[] * 2^N to the Result C[]
768 1.1 mrg ADD C0,A0 $ adc C1,A1 $ adc C2,A2
769 1.1 mrg
770 1.1 mrg 1: ;; Multiply A[] by 2
771 1.1 mrg LSL A0 $ rol A1 $ rol A2
772 1.1 mrg
773 1.1 mrg ;; Loop until B[] is 0
774 1.1 mrg subi B0,0 $ sbci B1,0 $ sbci B2,0
775 1.1 mrg brne 0b
776 1.1 mrg
777 1.1 mrg ;; Copy C[] to the return Register A[]
778 1.1 mrg wmov A0, C0
779 1.1.1.2 mrg mov A2, C2
780 1.1.1.2 mrg
781 1.1.1.2 mrg clr __zero_reg__
782 1.1.1.2 mrg #if defined (__AVR_TINY__)
783 1.1 mrg pop B1
784 1.1 mrg pop B0
785 1.1 mrg #endif /* (__AVR_TINY__) */
786 1.1 mrg ret
787 1.1 mrg ENDF __mulpsi3
788 1.1 mrg
789 1.1 mrg #undef C2
790 1.1 mrg #undef C1
791 1.1 mrg #undef C0
792 1.1 mrg
793 1.1 mrg #endif /* HAVE_MUL */
794 1.1 mrg
795 1.1 mrg #undef B2
796 1.1 mrg #undef B1
797 1.1 mrg #undef B0
798 1.1 mrg
799 1.1 mrg #undef A2
800 1.1 mrg #undef A1
801 1.1 mrg #undef A0
802 1.1 mrg
803 1.1 mrg #endif /* L_mulpsi3 */
804 1.1 mrg
805 1.1 mrg #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
806 1.1 mrg
807 1.1 mrg ;; A[0..2]: In: Multiplicand
808 1.1 mrg #define A0 22
809 1.1 mrg #define A1 A0+1
810 1.1 mrg #define A2 A0+2
811 1.1 mrg
812 1.1 mrg ;; BB: In: Multiplier
813 1.1 mrg #define BB 25
814 1.1 mrg
815 1.1 mrg ;; C[0..2]: Result
816 1.1 mrg #define C0 18
817 1.1 mrg #define C1 C0+1
818 1.1 mrg #define C2 C0+2
819 1.1 mrg
820 1.1 mrg ;; C[] = A[] * sign_extend (BB)
821 1.1 mrg DEFUN __mulsqipsi3
822 1.1 mrg mul A0, BB
823 1.1 mrg movw C0, r0
824 1.1 mrg mul A2, BB
825 1.1 mrg mov C2, r0
826 1.1 mrg mul A1, BB
827 1.1 mrg add C1, r0
828 1.1 mrg adc C2, r1
829 1.1 mrg clr __zero_reg__
830 1.1 mrg sbrs BB, 7
831 1.1 mrg ret
832 1.1 mrg ;; One-extend BB
833 1.1 mrg sub C1, A0
834 1.1 mrg sbc C2, A1
835 1.1 mrg ret
836 1.1 mrg ENDF __mulsqipsi3
837 1.1 mrg
838 1.1 mrg #undef C2
839 1.1 mrg #undef C1
840 1.1 mrg #undef C0
841 1.1 mrg
842 1.1 mrg #undef BB
843 1.1 mrg
844 1.1 mrg #undef A2
845 1.1 mrg #undef A1
846 1.1 mrg #undef A0
847 1.1 mrg
848 1.1 mrg #endif /* L_mulsqipsi3 && HAVE_MUL */
849 1.1 mrg
850 1.1 mrg /*******************************************************
851 1.1 mrg Multiplication 64 x 64
852 1.1 mrg *******************************************************/
853 1.1 mrg
854 1.1 mrg ;; A[] = A[] * B[]
855 1.1 mrg
856 1.1 mrg ;; A[0..7]: In: Multiplicand
857 1.1 mrg ;; Out: Product
858 1.1 mrg #define A0 18
859 1.1 mrg #define A1 A0+1
860 1.1 mrg #define A2 A0+2
861 1.1 mrg #define A3 A0+3
862 1.1 mrg #define A4 A0+4
863 1.1 mrg #define A5 A0+5
864 1.1 mrg #define A6 A0+6
865 1.1 mrg #define A7 A0+7
866 1.1 mrg
867 1.1 mrg ;; B[0..7]: In: Multiplier
868 1.1 mrg #define B0 10
869 1.1 mrg #define B1 B0+1
870 1.1 mrg #define B2 B0+2
871 1.1 mrg #define B3 B0+3
872 1.1 mrg #define B4 B0+4
873 1.1 mrg #define B5 B0+5
874 1.1.1.2 mrg #define B6 B0+6
875 1.1 mrg #define B7 B0+7
876 1.1 mrg
877 1.1 mrg #ifndef __AVR_TINY__
878 1.1 mrg #if defined (__AVR_HAVE_MUL__)
879 1.1 mrg ;; Define C[] for convenience
880 1.1 mrg ;; Notice that parts of C[] overlap A[] respective B[]
881 1.1 mrg #define C0 16
882 1.1 mrg #define C1 C0+1
883 1.1 mrg #define C2 20
884 1.1 mrg #define C3 C2+1
885 1.1 mrg #define C4 28
886 1.1 mrg #define C5 C4+1
887 1.1 mrg #define C6 C4+2
888 1.1 mrg #define C7 C4+3
889 1.1 mrg
890 1.1 mrg #if defined (L_muldi3)
891 1.1 mrg
892 1.1 mrg ;; A[] *= B[]
893 1.1 mrg ;; R25:R18 *= R17:R10
894 1.1 mrg ;; Ordinary ABI-Function
895 1.1 mrg
896 1.1 mrg DEFUN __muldi3
897 1.1 mrg push r29
898 1.1 mrg push r28
899 1.1 mrg push r17
900 1.1 mrg push r16
901 1.1 mrg
902 1.1 mrg ;; Counting in Words, we have to perform a 4 * 4 Multiplication
903 1.1 mrg
904 1.1 mrg ;; 3 * 0 + 0 * 3
905 1.1 mrg mul A7,B0 $ $ mov C7,r0
906 1.1 mrg mul A0,B7 $ $ add C7,r0
907 1.1 mrg mul A6,B1 $ $ add C7,r0
908 1.1 mrg mul A6,B0 $ mov C6,r0 $ add C7,r1
909 1.1 mrg mul B6,A1 $ $ add C7,r0
910 1.1 mrg mul B6,A0 $ add C6,r0 $ adc C7,r1
911 1.1 mrg
912 1.1 mrg ;; 1 * 2
913 1.1 mrg mul A2,B4 $ add C6,r0 $ adc C7,r1
914 1.1 mrg mul A3,B4 $ $ add C7,r0
915 1.1 mrg mul A2,B5 $ $ add C7,r0
916 1.1 mrg
917 1.1 mrg push A5
918 1.1 mrg push A4
919 1.1 mrg push B1
920 1.1 mrg push B0
921 1.1 mrg push A3
922 1.1 mrg push A2
923 1.1 mrg
924 1.1 mrg ;; 0 * 0
925 1.1 mrg wmov 26, B0
926 1.1 mrg XCALL __umulhisi3
927 1.1 mrg wmov C0, 22
928 1.1 mrg wmov C2, 24
929 1.1 mrg
930 1.1 mrg ;; 0 * 2
931 1.1 mrg wmov 26, B4
932 1.1 mrg XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
933 1.1 mrg
934 1.1 mrg wmov 26, B2
935 1.1 mrg ;; 0 * 1
936 1.1 mrg XCALL __muldi3_6
937 1.1 mrg
938 1.1 mrg pop A0
939 1.1 mrg pop A1
940 1.1 mrg ;; 1 * 1
941 1.1 mrg wmov 26, B2
942 1.1 mrg XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
943 1.1 mrg
944 1.1 mrg pop r26
945 1.1 mrg pop r27
946 1.1 mrg ;; 1 * 0
947 1.1 mrg XCALL __muldi3_6
948 1.1 mrg
949 1.1 mrg pop A0
950 1.1 mrg pop A1
951 1.1 mrg ;; 2 * 0
952 1.1 mrg XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
953 1.1 mrg
954 1.1 mrg ;; 2 * 1
955 1.1 mrg wmov 26, B2
956 1.1 mrg XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
957 1.1 mrg
958 1.1 mrg ;; A[] = C[]
959 1.1 mrg wmov A0, C0
960 1.1 mrg ;; A2 = C2 already
961 1.1 mrg wmov A4, C4
962 1.1 mrg wmov A6, C6
963 1.1 mrg
964 1.1 mrg pop r16
965 1.1 mrg pop r17
966 1.1 mrg pop r28
967 1.1 mrg pop r29
968 1.1 mrg ret
969 1.1 mrg ENDF __muldi3
970 1.1 mrg #endif /* L_muldi3 */
971 1.1 mrg
972 1.1 mrg #if defined (L_muldi3_6)
973 1.1 mrg ;; A helper for some 64-bit multiplications with MUL available
974 1.1 mrg DEFUN __muldi3_6
975 1.1 mrg __muldi3_6:
976 1.1 mrg XCALL __umulhisi3
977 1.1 mrg add C2, 22
978 1.1 mrg adc C3, 23
979 1.1 mrg adc C4, 24
980 1.1 mrg adc C5, 25
981 1.1 mrg brcc 0f
982 1.1 mrg adiw C6, 1
983 1.1 mrg 0: ret
984 1.1 mrg ENDF __muldi3_6
985 1.1 mrg #endif /* L_muldi3_6 */
986 1.1 mrg
987 1.1 mrg #undef C7
988 1.1 mrg #undef C6
989 1.1 mrg #undef C5
990 1.1 mrg #undef C4
991 1.1 mrg #undef C3
992 1.1 mrg #undef C2
993 1.1 mrg #undef C1
994 1.1 mrg #undef C0
995 1.1 mrg
996 1.1 mrg #else /* !HAVE_MUL */
997 1.1 mrg
998 1.1 mrg #if defined (L_muldi3)
999 1.1 mrg
1000 1.1 mrg #define C0 26
1001 1.1 mrg #define C1 C0+1
1002 1.1 mrg #define C2 C0+2
1003 1.1 mrg #define C3 C0+3
1004 1.1 mrg #define C4 C0+4
1005 1.1 mrg #define C5 C0+5
1006 1.1 mrg #define C6 0
1007 1.1 mrg #define C7 C6+1
1008 1.1 mrg
1009 1.1 mrg #define Loop 9
1010 1.1 mrg
1011 1.1 mrg ;; A[] *= B[]
1012 1.1 mrg ;; R25:R18 *= R17:R10
1013 1.1 mrg ;; Ordinary ABI-Function
1014 1.1 mrg
1015 1.1 mrg DEFUN __muldi3
1016 1.1 mrg push r29
1017 1.1 mrg push r28
1018 1.1 mrg push Loop
1019 1.1 mrg
1020 1.1 mrg ldi C0, 64
1021 1.1 mrg mov Loop, C0
1022 1.1 mrg
1023 1.1 mrg ;; C[] = 0
1024 1.1 mrg clr __tmp_reg__
1025 1.1 mrg wmov C0, 0
1026 1.1 mrg wmov C2, 0
1027 1.1 mrg wmov C4, 0
1028 1.1 mrg
1029 1.1 mrg 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1030 1.1 mrg ;; where N = 64 - Loop.
1031 1.1 mrg ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1032 1.1 mrg ;; B[] will have its initial Value again.
1033 1.1 mrg LSR B7 $ ror B6 $ ror B5 $ ror B4
1034 1.1 mrg ror B3 $ ror B2 $ ror B1 $ ror B0
1035 1.1 mrg
1036 1.1 mrg ;; If the N-th Bit of B[] was set then...
1037 1.1 mrg brcc 1f
1038 1.1 mrg ;; ...finish Rotation...
1039 1.1 mrg ori B7, 1 << 7
1040 1.1 mrg
1041 1.1 mrg ;; ...and add A[] * 2^N to the Result C[]
1042 1.1 mrg ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1043 1.1 mrg adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1044 1.1 mrg
1045 1.1 mrg 1: ;; Multiply A[] by 2
1046 1.1 mrg LSL A0 $ rol A1 $ rol A2 $ rol A3
1047 1.1 mrg rol A4 $ rol A5 $ rol A6 $ rol A7
1048 1.1 mrg
1049 1.1 mrg dec Loop
1050 1.1 mrg brne 0b
1051 1.1 mrg
1052 1.1 mrg ;; We expanded the Result in C[]
1053 1.1 mrg ;; Copy Result to the Return Register A[]
1054 1.1 mrg wmov A0, C0
1055 1.1 mrg wmov A2, C2
1056 1.1 mrg wmov A4, C4
1057 1.1 mrg wmov A6, C6
1058 1.1 mrg
1059 1.1 mrg clr __zero_reg__
1060 1.1 mrg pop Loop
1061 1.1 mrg pop r28
1062 1.1 mrg pop r29
1063 1.1 mrg ret
1064 1.1 mrg ENDF __muldi3
1065 1.1 mrg
1066 1.1 mrg #undef Loop
1067 1.1 mrg
1068 1.1 mrg #undef C7
1069 1.1 mrg #undef C6
1070 1.1 mrg #undef C5
1071 1.1 mrg #undef C4
1072 1.1 mrg #undef C3
1073 1.1 mrg #undef C2
1074 1.1 mrg #undef C1
1075 1.1 mrg #undef C0
1076 1.1.1.2 mrg
1077 1.1 mrg #endif /* L_muldi3 */
1078 1.1 mrg #endif /* HAVE_MUL */
1079 1.1 mrg #endif /* if not __AVR_TINY__ */
1080 1.1 mrg
1081 1.1 mrg #undef B7
1082 1.1 mrg #undef B6
1083 1.1 mrg #undef B5
1084 1.1 mrg #undef B4
1085 1.1 mrg #undef B3
1086 1.1 mrg #undef B2
1087 1.1 mrg #undef B1
1088 1.1 mrg #undef B0
1089 1.1 mrg
1090 1.1 mrg #undef A7
1091 1.1 mrg #undef A6
1092 1.1 mrg #undef A5
1093 1.1 mrg #undef A4
1094 1.1 mrg #undef A3
1095 1.1 mrg #undef A2
1096 1.1 mrg #undef A1
1097 1.1 mrg #undef A0
1098 1.1 mrg
1099 1.1 mrg /*******************************************************
1100 1.1 mrg Widening Multiplication 64 = 32 x 32 with MUL
1101 1.1 mrg *******************************************************/
1102 1.1 mrg
1103 1.1 mrg #if defined (__AVR_HAVE_MUL__)
1104 1.1 mrg #define A0 r22
1105 1.1 mrg #define A1 r23
1106 1.1 mrg #define A2 r24
1107 1.1 mrg #define A3 r25
1108 1.1 mrg
1109 1.1 mrg #define B0 r18
1110 1.1 mrg #define B1 r19
1111 1.1 mrg #define B2 r20
1112 1.1 mrg #define B3 r21
1113 1.1 mrg
1114 1.1 mrg #define C0 18
1115 1.1 mrg #define C1 C0+1
1116 1.1 mrg #define C2 20
1117 1.1 mrg #define C3 C2+1
1118 1.1 mrg #define C4 28
1119 1.1 mrg #define C5 C4+1
1120 1.1 mrg #define C6 C4+2
1121 1.1 mrg #define C7 C4+3
1122 1.1 mrg
1123 1.1 mrg #if defined (L_umulsidi3)
1124 1.1 mrg
1125 1.1 mrg ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1126 1.1 mrg
1127 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1128 1.1 mrg ;;
1129 1.1 mrg ;; Ordinary ABI Function, but additionally sets
1130 1.1 mrg ;; X = R20[2] = B2[2]
1131 1.1 mrg ;; Z = R22[2] = A0[2]
1132 1.1 mrg DEFUN __umulsidi3
1133 1.1 mrg clt
1134 1.1 mrg ;; FALLTHRU
1135 1.1 mrg ENDF __umulsidi3
1136 1.1 mrg ;; T = sign (A)
1137 1.1 mrg DEFUN __umulsidi3_helper
1138 1.1 mrg push 29 $ push 28 ; Y
1139 1.1 mrg wmov 30, A2
1140 1.1 mrg ;; Counting in Words, we have to perform 4 Multiplications
1141 1.1 mrg ;; 0 * 0
1142 1.1 mrg wmov 26, A0
1143 1.1 mrg XCALL __umulhisi3
1144 1.1 mrg push 23 $ push 22 ; C0
1145 1.1 mrg wmov 28, B0
1146 1.1 mrg wmov 18, B2
1147 1.1 mrg wmov C2, 24
1148 1.1 mrg push 27 $ push 26 ; A0
1149 1.1 mrg push 19 $ push 18 ; B2
1150 1.1 mrg ;;
1151 1.1 mrg ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1152 1.1 mrg ;; B2 C2 -- -- -- B0 A2
1153 1.1 mrg ;; 1 * 1
1154 1.1 mrg wmov 26, 30 ; A2
1155 1.1 mrg XCALL __umulhisi3
1156 1.1 mrg ;; Sign-extend A. T holds the sign of A
1157 1.1 mrg brtc 0f
1158 1.1 mrg ;; Subtract B from the high part of the result
1159 1.1 mrg sub 22, 28
1160 1.1 mrg sbc 23, 29
1161 1.1 mrg sbc 24, 18
1162 1.1 mrg sbc 25, 19
1163 1.1 mrg 0: wmov 18, 28 ;; B0
1164 1.1 mrg wmov C4, 22
1165 1.1 mrg wmov C6, 24
1166 1.1 mrg ;;
1167 1.1 mrg ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1168 1.1 mrg ;; B0 C2 -- -- A2 C4 C6
1169 1.1 mrg ;;
1170 1.1 mrg ;; 1 * 0
1171 1.1 mrg XCALL __muldi3_6
1172 1.1 mrg ;; 0 * 1
1173 1.1 mrg pop 26 $ pop 27 ;; B2
1174 1.1 mrg pop 18 $ pop 19 ;; A0
1175 1.1 mrg XCALL __muldi3_6
1176 1.1 mrg
1177 1.1 mrg ;; Move result C into place and save A0 in Z
1178 1.1 mrg wmov 22, C4
1179 1.1 mrg wmov 24, C6
1180 1.1 mrg wmov 30, 18 ; A0
1181 1.1 mrg pop C0 $ pop C1
1182 1.1 mrg
1183 1.1 mrg ;; Epilogue
1184 1.1 mrg pop 28 $ pop 29 ;; Y
1185 1.1 mrg ret
1186 1.1 mrg ENDF __umulsidi3_helper
1187 1.1 mrg #endif /* L_umulsidi3 */
1188 1.1 mrg
1189 1.1 mrg
1190 1.1 mrg #if defined (L_mulsidi3)
1191 1.1 mrg
1192 1.1 mrg ;; Signed widening 64 = 32 * 32 Multiplication
1193 1.1 mrg ;;
1194 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1195 1.1 mrg ;; Ordinary ABI Function
1196 1.1 mrg DEFUN __mulsidi3
1197 1.1 mrg bst A3, 7
1198 1.1 mrg sbrs B3, 7 ; Enhanced core has no skip bug
1199 1.1 mrg XJMP __umulsidi3_helper
1200 1.1 mrg
1201 1.1 mrg ;; B needs sign-extension
1202 1.1 mrg push A3
1203 1.1 mrg push A2
1204 1.1 mrg XCALL __umulsidi3_helper
1205 1.1 mrg ;; A0 survived in Z
1206 1.1 mrg sub r22, r30
1207 1.1 mrg sbc r23, r31
1208 1.1 mrg pop r26
1209 1.1 mrg pop r27
1210 1.1 mrg sbc r24, r26
1211 1.1 mrg sbc r25, r27
1212 1.1 mrg ret
1213 1.1 mrg ENDF __mulsidi3
1214 1.1 mrg #endif /* L_mulsidi3 */
1215 1.1 mrg
1216 1.1 mrg #undef A0
1217 1.1 mrg #undef A1
1218 1.1 mrg #undef A2
1219 1.1 mrg #undef A3
1220 1.1 mrg #undef B0
1221 1.1 mrg #undef B1
1222 1.1 mrg #undef B2
1223 1.1 mrg #undef B3
1224 1.1 mrg #undef C0
1225 1.1 mrg #undef C1
1226 1.1 mrg #undef C2
1227 1.1 mrg #undef C3
1228 1.1 mrg #undef C4
1229 1.1 mrg #undef C5
1230 1.1 mrg #undef C6
1231 1.1 mrg #undef C7
1232 1.1 mrg #endif /* HAVE_MUL */
1233 1.1 mrg
1234 1.1.1.2 mrg /**********************************************************
1235 1.1 mrg Widening Multiplication 64 = 32 x 32 without MUL
1236 1.1 mrg **********************************************************/
1237 1.1 mrg #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1238 1.1 mrg #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1239 1.1 mrg #define A0 18
1240 1.1 mrg #define A1 A0+1
1241 1.1 mrg #define A2 A0+2
1242 1.1 mrg #define A3 A0+3
1243 1.1 mrg #define A4 A0+4
1244 1.1 mrg #define A5 A0+5
1245 1.1 mrg #define A6 A0+6
1246 1.1 mrg #define A7 A0+7
1247 1.1 mrg
1248 1.1 mrg #define B0 10
1249 1.1 mrg #define B1 B0+1
1250 1.1 mrg #define B2 B0+2
1251 1.1 mrg #define B3 B0+3
1252 1.1 mrg #define B4 B0+4
1253 1.1 mrg #define B5 B0+5
1254 1.1 mrg #define B6 B0+6
1255 1.1 mrg #define B7 B0+7
1256 1.1 mrg
1257 1.1 mrg #define AA0 22
1258 1.1 mrg #define AA1 AA0+1
1259 1.1 mrg #define AA2 AA0+2
1260 1.1 mrg #define AA3 AA0+3
1261 1.1 mrg
1262 1.1 mrg #define BB0 18
1263 1.1 mrg #define BB1 BB0+1
1264 1.1 mrg #define BB2 BB0+2
1265 1.1 mrg #define BB3 BB0+3
1266 1.1 mrg
1267 1.1 mrg #define Mask r30
1268 1.1 mrg
1269 1.1 mrg ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1270 1.1 mrg ;;
1271 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1272 1.1 mrg ;; Ordinary ABI Function
1273 1.1 mrg DEFUN __mulsidi3
1274 1.1 mrg set
1275 1.1 mrg skip
1276 1.1 mrg ;; FALLTHRU
1277 1.1 mrg ENDF __mulsidi3
1278 1.1 mrg
1279 1.1 mrg DEFUN __umulsidi3
1280 1.1 mrg clt ; skipped
1281 1.1 mrg ;; Save 10 Registers: R10..R17, R28, R29
1282 1.1 mrg do_prologue_saves 10
1283 1.1 mrg ldi Mask, 0xff
1284 1.1 mrg bld Mask, 7
1285 1.1 mrg ;; Move B into place...
1286 1.1 mrg wmov B0, BB0
1287 1.1 mrg wmov B2, BB2
1288 1.1 mrg ;; ...and extend it
1289 1.1 mrg and BB3, Mask
1290 1.1 mrg lsl BB3
1291 1.1 mrg sbc B4, B4
1292 1.1 mrg mov B5, B4
1293 1.1 mrg wmov B6, B4
1294 1.1 mrg ;; Move A into place...
1295 1.1 mrg wmov A0, AA0
1296 1.1 mrg wmov A2, AA2
1297 1.1 mrg ;; ...and extend it
1298 1.1 mrg and AA3, Mask
1299 1.1 mrg lsl AA3
1300 1.1 mrg sbc A4, A4
1301 1.1 mrg mov A5, A4
1302 1.1 mrg wmov A6, A4
1303 1.1 mrg XCALL __muldi3
1304 1.1 mrg do_epilogue_restores 10
1305 1.1 mrg ENDF __umulsidi3
1306 1.1 mrg
1307 1.1 mrg #undef A0
1308 1.1 mrg #undef A1
1309 1.1 mrg #undef A2
1310 1.1 mrg #undef A3
1311 1.1 mrg #undef A4
1312 1.1 mrg #undef A5
1313 1.1 mrg #undef A6
1314 1.1 mrg #undef A7
1315 1.1 mrg #undef B0
1316 1.1 mrg #undef B1
1317 1.1 mrg #undef B2
1318 1.1 mrg #undef B3
1319 1.1 mrg #undef B4
1320 1.1 mrg #undef B5
1321 1.1 mrg #undef B6
1322 1.1 mrg #undef B7
1323 1.1 mrg #undef AA0
1324 1.1 mrg #undef AA1
1325 1.1 mrg #undef AA2
1326 1.1 mrg #undef AA3
1327 1.1 mrg #undef BB0
1328 1.1 mrg #undef BB1
1329 1.1 mrg #undef BB2
1330 1.1.1.2 mrg #undef BB3
1331 1.1 mrg #undef Mask
1332 1.1 mrg #endif /* L_mulsidi3 && !HAVE_MUL */
1333 1.1 mrg #endif /* if not __AVR_TINY__ */
1334 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1335 1.1 mrg
1336 1.1 mrg
1337 1.1 mrg .section .text.libgcc.div, "ax", @progbits
1339 1.1 mrg
1340 1.1 mrg /*******************************************************
1341 1.1 mrg Division 8 / 8 => (result + remainder)
1342 1.1 mrg *******************************************************/
1343 1.1 mrg #define r_rem r25 /* remainder */
1344 1.1 mrg #define r_arg1 r24 /* dividend, quotient */
1345 1.1 mrg #define r_arg2 r22 /* divisor */
1346 1.1 mrg #define r_cnt r23 /* loop count */
1347 1.1 mrg
1348 1.1 mrg #if defined (L_udivmodqi4)
1349 1.1 mrg DEFUN __udivmodqi4
1350 1.1 mrg sub r_rem,r_rem ; clear remainder and carry
1351 1.1 mrg ldi r_cnt,9 ; init loop counter
1352 1.1 mrg rjmp __udivmodqi4_ep ; jump to entry point
1353 1.1 mrg __udivmodqi4_loop:
1354 1.1 mrg rol r_rem ; shift dividend into remainder
1355 1.1 mrg cp r_rem,r_arg2 ; compare remainder & divisor
1356 1.1 mrg brcs __udivmodqi4_ep ; remainder <= divisor
1357 1.1 mrg sub r_rem,r_arg2 ; restore remainder
1358 1.1 mrg __udivmodqi4_ep:
1359 1.1 mrg rol r_arg1 ; shift dividend (with CARRY)
1360 1.1 mrg dec r_cnt ; decrement loop counter
1361 1.1 mrg brne __udivmodqi4_loop
1362 1.1 mrg com r_arg1 ; complement result
1363 1.1 mrg ; because C flag was complemented in loop
1364 1.1 mrg ret
1365 1.1 mrg ENDF __udivmodqi4
1366 1.1 mrg #endif /* defined (L_udivmodqi4) */
1367 1.1 mrg
1368 1.1 mrg #if defined (L_divmodqi4)
1369 1.1 mrg DEFUN __divmodqi4
1370 1.1 mrg bst r_arg1,7 ; store sign of dividend
1371 1.1 mrg mov __tmp_reg__,r_arg1
1372 1.1 mrg eor __tmp_reg__,r_arg2; r0.7 is sign of result
1373 1.1 mrg sbrc r_arg1,7
1374 1.1 mrg neg r_arg1 ; dividend negative : negate
1375 1.1 mrg sbrc r_arg2,7
1376 1.1 mrg neg r_arg2 ; divisor negative : negate
1377 1.1 mrg XCALL __udivmodqi4 ; do the unsigned div/mod
1378 1.1 mrg brtc __divmodqi4_1
1379 1.1 mrg neg r_rem ; correct remainder sign
1380 1.1 mrg __divmodqi4_1:
1381 1.1 mrg sbrc __tmp_reg__,7
1382 1.1 mrg neg r_arg1 ; correct result sign
1383 1.1 mrg __divmodqi4_exit:
1384 1.1 mrg ret
1385 1.1 mrg ENDF __divmodqi4
1386 1.1 mrg #endif /* defined (L_divmodqi4) */
1387 1.1 mrg
1388 1.1 mrg #undef r_rem
1389 1.1 mrg #undef r_arg1
1390 1.1 mrg #undef r_arg2
1391 1.1 mrg #undef r_cnt
1392 1.1 mrg
1393 1.1 mrg
1394 1.1 mrg /*******************************************************
1395 1.1 mrg Division 16 / 16 => (result + remainder)
1396 1.1 mrg *******************************************************/
1397 1.1 mrg #define r_remL r26 /* remainder Low */
1398 1.1 mrg #define r_remH r27 /* remainder High */
1399 1.1 mrg
1400 1.1 mrg /* return: remainder */
1401 1.1 mrg #define r_arg1L r24 /* dividend Low */
1402 1.1 mrg #define r_arg1H r25 /* dividend High */
1403 1.1 mrg
1404 1.1 mrg /* return: quotient */
1405 1.1 mrg #define r_arg2L r22 /* divisor Low */
1406 1.1 mrg #define r_arg2H r23 /* divisor High */
1407 1.1 mrg
1408 1.1 mrg #define r_cnt r21 /* loop count */
1409 1.1 mrg
1410 1.1 mrg #if defined (L_udivmodhi4)
1411 1.1 mrg DEFUN __udivmodhi4
1412 1.1 mrg sub r_remL,r_remL
1413 1.1 mrg sub r_remH,r_remH ; clear remainder and carry
1414 1.1 mrg ldi r_cnt,17 ; init loop counter
1415 1.1 mrg rjmp __udivmodhi4_ep ; jump to entry point
1416 1.1 mrg __udivmodhi4_loop:
1417 1.1 mrg rol r_remL ; shift dividend into remainder
1418 1.1 mrg rol r_remH
1419 1.1 mrg cp r_remL,r_arg2L ; compare remainder & divisor
1420 1.1 mrg cpc r_remH,r_arg2H
1421 1.1 mrg brcs __udivmodhi4_ep ; remainder < divisor
1422 1.1 mrg sub r_remL,r_arg2L ; restore remainder
1423 1.1 mrg sbc r_remH,r_arg2H
1424 1.1 mrg __udivmodhi4_ep:
1425 1.1 mrg rol r_arg1L ; shift dividend (with CARRY)
1426 1.1 mrg rol r_arg1H
1427 1.1 mrg dec r_cnt ; decrement loop counter
1428 1.1 mrg brne __udivmodhi4_loop
1429 1.1 mrg com r_arg1L
1430 1.1 mrg com r_arg1H
1431 1.1 mrg ; div/mod results to return registers, as for the div() function
1432 1.1 mrg mov_l r_arg2L, r_arg1L ; quotient
1433 1.1 mrg mov_h r_arg2H, r_arg1H
1434 1.1 mrg mov_l r_arg1L, r_remL ; remainder
1435 1.1 mrg mov_h r_arg1H, r_remH
1436 1.1 mrg ret
1437 1.1 mrg ENDF __udivmodhi4
1438 1.1 mrg #endif /* defined (L_udivmodhi4) */
1439 1.1 mrg
1440 1.1 mrg #if defined (L_divmodhi4)
1441 1.1 mrg DEFUN __divmodhi4
1442 1.1 mrg .global _div
1443 1.1 mrg _div:
1444 1.1 mrg bst r_arg1H,7 ; store sign of dividend
1445 1.1 mrg mov __tmp_reg__,r_arg2H
1446 1.1 mrg brtc 0f
1447 1.1 mrg com __tmp_reg__ ; r0.7 is sign of result
1448 1.1 mrg rcall __divmodhi4_neg1 ; dividend negative: negate
1449 1.1 mrg 0:
1450 1.1 mrg sbrc r_arg2H,7
1451 1.1 mrg rcall __divmodhi4_neg2 ; divisor negative: negate
1452 1.1 mrg XCALL __udivmodhi4 ; do the unsigned div/mod
1453 1.1 mrg sbrc __tmp_reg__,7
1454 1.1 mrg rcall __divmodhi4_neg2 ; correct remainder sign
1455 1.1 mrg brtc __divmodhi4_exit
1456 1.1 mrg __divmodhi4_neg1:
1457 1.1 mrg ;; correct dividend/remainder sign
1458 1.1 mrg com r_arg1H
1459 1.1 mrg neg r_arg1L
1460 1.1 mrg sbci r_arg1H,0xff
1461 1.1 mrg ret
1462 1.1 mrg __divmodhi4_neg2:
1463 1.1 mrg ;; correct divisor/result sign
1464 1.1 mrg com r_arg2H
1465 1.1 mrg neg r_arg2L
1466 1.1 mrg sbci r_arg2H,0xff
1467 1.1 mrg __divmodhi4_exit:
1468 1.1 mrg ret
1469 1.1 mrg ENDF __divmodhi4
1470 1.1 mrg #endif /* defined (L_divmodhi4) */
1471 1.1 mrg
1472 1.1 mrg #undef r_remH
1473 1.1 mrg #undef r_remL
1474 1.1 mrg
1475 1.1 mrg #undef r_arg1H
1476 1.1 mrg #undef r_arg1L
1477 1.1 mrg
1478 1.1 mrg #undef r_arg2H
1479 1.1 mrg #undef r_arg2L
1480 1.1 mrg
1481 1.1 mrg #undef r_cnt
1482 1.1 mrg
1483 1.1 mrg /*******************************************************
1484 1.1 mrg Division 24 / 24 => (result + remainder)
1485 1.1 mrg *******************************************************/
1486 1.1 mrg
1487 1.1 mrg ;; A[0..2]: In: Dividend; Out: Quotient
1488 1.1 mrg #define A0 22
1489 1.1 mrg #define A1 A0+1
1490 1.1 mrg #define A2 A0+2
1491 1.1 mrg
1492 1.1 mrg ;; B[0..2]: In: Divisor; Out: Remainder
1493 1.1 mrg #define B0 18
1494 1.1 mrg #define B1 B0+1
1495 1.1 mrg #define B2 B0+2
1496 1.1 mrg
1497 1.1 mrg ;; C[0..2]: Expand remainder
1498 1.1 mrg #define C0 __zero_reg__
1499 1.1 mrg #define C1 26
1500 1.1 mrg #define C2 25
1501 1.1 mrg
1502 1.1.1.2 mrg ;; Loop counter
1503 1.1 mrg #define r_cnt 21
1504 1.1 mrg
1505 1.1 mrg #if defined (L_udivmodpsi4)
1506 1.1 mrg ;; R24:R22 = R24:R24 udiv R20:R18
1507 1.1 mrg ;; R20:R18 = R24:R22 umod R20:R18
1508 1.1 mrg ;; Clobbers: R21, R25, R26
1509 1.1 mrg
1510 1.1 mrg DEFUN __udivmodpsi4
1511 1.1 mrg ; init loop counter
1512 1.1 mrg ldi r_cnt, 24+1
1513 1.1 mrg ; Clear remainder and carry. C0 is already 0
1514 1.1 mrg clr C1
1515 1.1 mrg sub C2, C2
1516 1.1 mrg ; jump to entry point
1517 1.1 mrg rjmp __udivmodpsi4_start
1518 1.1 mrg __udivmodpsi4_loop:
1519 1.1 mrg ; shift dividend into remainder
1520 1.1 mrg rol C0
1521 1.1 mrg rol C1
1522 1.1 mrg rol C2
1523 1.1 mrg ; compare remainder & divisor
1524 1.1 mrg cp C0, B0
1525 1.1 mrg cpc C1, B1
1526 1.1 mrg cpc C2, B2
1527 1.1 mrg brcs __udivmodpsi4_start ; remainder <= divisor
1528 1.1 mrg sub C0, B0 ; restore remainder
1529 1.1 mrg sbc C1, B1
1530 1.1 mrg sbc C2, B2
1531 1.1 mrg __udivmodpsi4_start:
1532 1.1 mrg ; shift dividend (with CARRY)
1533 1.1 mrg rol A0
1534 1.1 mrg rol A1
1535 1.1 mrg rol A2
1536 1.1 mrg ; decrement loop counter
1537 1.1 mrg dec r_cnt
1538 1.1 mrg brne __udivmodpsi4_loop
1539 1.1 mrg com A0
1540 1.1 mrg com A1
1541 1.1 mrg com A2
1542 1.1 mrg ; div/mod results to return registers
1543 1.1 mrg ; remainder
1544 1.1 mrg mov B0, C0
1545 1.1 mrg mov B1, C1
1546 1.1 mrg mov B2, C2
1547 1.1 mrg clr __zero_reg__ ; C0
1548 1.1 mrg ret
1549 1.1 mrg ENDF __udivmodpsi4
1550 1.1 mrg #endif /* defined (L_udivmodpsi4) */
1551 1.1 mrg
1552 1.1 mrg #if defined (L_divmodpsi4)
1553 1.1 mrg ;; R24:R22 = R24:R22 div R20:R18
1554 1.1 mrg ;; R20:R18 = R24:R22 mod R20:R18
1555 1.1 mrg ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1556 1.1 mrg
1557 1.1 mrg DEFUN __divmodpsi4
1558 1.1 mrg ; R0.7 will contain the sign of the result:
1559 1.1 mrg ; R0.7 = A.sign ^ B.sign
1560 1.1 mrg mov __tmp_reg__, B2
1561 1.1 mrg ; T-flag = sign of dividend
1562 1.1 mrg bst A2, 7
1563 1.1 mrg brtc 0f
1564 1.1 mrg com __tmp_reg__
1565 1.1 mrg ; Adjust dividend's sign
1566 1.1 mrg rcall __divmodpsi4_negA
1567 1.1 mrg 0:
1568 1.1 mrg ; Adjust divisor's sign
1569 1.1 mrg sbrc B2, 7
1570 1.1 mrg rcall __divmodpsi4_negB
1571 1.1 mrg
1572 1.1 mrg ; Do the unsigned div/mod
1573 1.1 mrg XCALL __udivmodpsi4
1574 1.1 mrg
1575 1.1 mrg ; Adjust quotient's sign
1576 1.1 mrg sbrc __tmp_reg__, 7
1577 1.1 mrg rcall __divmodpsi4_negA
1578 1.1 mrg
1579 1.1 mrg ; Adjust remainder's sign
1580 1.1 mrg brtc __divmodpsi4_end
1581 1.1 mrg
1582 1.1 mrg __divmodpsi4_negB:
1583 1.1 mrg ; Correct divisor/remainder sign
1584 1.1 mrg com B2
1585 1.1 mrg com B1
1586 1.1 mrg neg B0
1587 1.1 mrg sbci B1, -1
1588 1.1 mrg sbci B2, -1
1589 1.1 mrg ret
1590 1.1 mrg
1591 1.1 mrg ; Correct dividend/quotient sign
1592 1.1 mrg __divmodpsi4_negA:
1593 1.1 mrg com A2
1594 1.1 mrg com A1
1595 1.1 mrg neg A0
1596 1.1 mrg sbci A1, -1
1597 1.1 mrg sbci A2, -1
1598 1.1 mrg __divmodpsi4_end:
1599 1.1 mrg ret
1600 1.1 mrg
1601 1.1 mrg ENDF __divmodpsi4
1602 1.1 mrg #endif /* defined (L_divmodpsi4) */
1603 1.1 mrg
1604 1.1 mrg #undef A0
1605 1.1 mrg #undef A1
1606 1.1 mrg #undef A2
1607 1.1 mrg
1608 1.1 mrg #undef B0
1609 1.1 mrg #undef B1
1610 1.1 mrg #undef B2
1611 1.1 mrg
1612 1.1 mrg #undef C0
1613 1.1 mrg #undef C1
1614 1.1 mrg #undef C2
1615 1.1 mrg
1616 1.1 mrg #undef r_cnt
1617 1.1 mrg
1618 1.1 mrg /*******************************************************
1619 1.1 mrg Division 32 / 32 => (result + remainder)
1620 1.1 mrg *******************************************************/
1621 1.1 mrg #define r_remHH r31 /* remainder High */
1622 1.1 mrg #define r_remHL r30
1623 1.1 mrg #define r_remH r27
1624 1.1 mrg #define r_remL r26 /* remainder Low */
1625 1.1 mrg
1626 1.1 mrg /* return: remainder */
1627 1.1 mrg #define r_arg1HH r25 /* dividend High */
1628 1.1 mrg #define r_arg1HL r24
1629 1.1 mrg #define r_arg1H r23
1630 1.1 mrg #define r_arg1L r22 /* dividend Low */
1631 1.1 mrg
1632 1.1 mrg /* return: quotient */
1633 1.1 mrg #define r_arg2HH r21 /* divisor High */
1634 1.1 mrg #define r_arg2HL r20
1635 1.1 mrg #define r_arg2H r19
1636 1.1 mrg #define r_arg2L r18 /* divisor Low */
1637 1.1 mrg
1638 1.1 mrg #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1639 1.1 mrg
1640 1.1 mrg #if defined (L_udivmodsi4)
1641 1.1 mrg DEFUN __udivmodsi4
1642 1.1 mrg ldi r_remL, 33 ; init loop counter
1643 1.1 mrg mov r_cnt, r_remL
1644 1.1 mrg sub r_remL,r_remL
1645 1.1 mrg sub r_remH,r_remH ; clear remainder and carry
1646 1.1 mrg mov_l r_remHL, r_remL
1647 1.1 mrg mov_h r_remHH, r_remH
1648 1.1 mrg rjmp __udivmodsi4_ep ; jump to entry point
1649 1.1 mrg __udivmodsi4_loop:
1650 1.1 mrg rol r_remL ; shift dividend into remainder
1651 1.1 mrg rol r_remH
1652 1.1 mrg rol r_remHL
1653 1.1 mrg rol r_remHH
1654 1.1 mrg cp r_remL,r_arg2L ; compare remainder & divisor
1655 1.1 mrg cpc r_remH,r_arg2H
1656 1.1 mrg cpc r_remHL,r_arg2HL
1657 1.1 mrg cpc r_remHH,r_arg2HH
1658 1.1 mrg brcs __udivmodsi4_ep ; remainder <= divisor
1659 1.1 mrg sub r_remL,r_arg2L ; restore remainder
1660 1.1 mrg sbc r_remH,r_arg2H
1661 1.1 mrg sbc r_remHL,r_arg2HL
1662 1.1 mrg sbc r_remHH,r_arg2HH
1663 1.1 mrg __udivmodsi4_ep:
1664 1.1 mrg rol r_arg1L ; shift dividend (with CARRY)
1665 1.1 mrg rol r_arg1H
1666 1.1 mrg rol r_arg1HL
1667 1.1 mrg rol r_arg1HH
1668 1.1 mrg dec r_cnt ; decrement loop counter
1669 1.1 mrg brne __udivmodsi4_loop
1670 1.1 mrg ; __zero_reg__ now restored (r_cnt == 0)
1671 1.1 mrg com r_arg1L
1672 1.1 mrg com r_arg1H
1673 1.1 mrg com r_arg1HL
1674 1.1 mrg com r_arg1HH
1675 1.1 mrg ; div/mod results to return registers, as for the ldiv() function
1676 1.1 mrg mov_l r_arg2L, r_arg1L ; quotient
1677 1.1 mrg mov_h r_arg2H, r_arg1H
1678 1.1 mrg mov_l r_arg2HL, r_arg1HL
1679 1.1 mrg mov_h r_arg2HH, r_arg1HH
1680 1.1 mrg mov_l r_arg1L, r_remL ; remainder
1681 1.1 mrg mov_h r_arg1H, r_remH
1682 1.1 mrg mov_l r_arg1HL, r_remHL
1683 1.1 mrg mov_h r_arg1HH, r_remHH
1684 1.1 mrg ret
1685 1.1 mrg ENDF __udivmodsi4
1686 1.1 mrg #endif /* defined (L_udivmodsi4) */
1687 1.1 mrg
1688 1.1 mrg #if defined (L_divmodsi4)
1689 1.1 mrg DEFUN __divmodsi4
1690 1.1 mrg mov __tmp_reg__,r_arg2HH
1691 1.1 mrg bst r_arg1HH,7 ; store sign of dividend
1692 1.1 mrg brtc 0f
1693 1.1 mrg com __tmp_reg__ ; r0.7 is sign of result
1694 1.1 mrg XCALL __negsi2 ; dividend negative: negate
1695 1.1 mrg 0:
1696 1.1 mrg sbrc r_arg2HH,7
1697 1.1 mrg rcall __divmodsi4_neg2 ; divisor negative: negate
1698 1.1 mrg XCALL __udivmodsi4 ; do the unsigned div/mod
1699 1.1 mrg sbrc __tmp_reg__, 7 ; correct quotient sign
1700 1.1 mrg rcall __divmodsi4_neg2
1701 1.1 mrg brtc __divmodsi4_exit ; correct remainder sign
1702 1.1 mrg XJMP __negsi2
1703 1.1 mrg __divmodsi4_neg2:
1704 1.1 mrg ;; correct divisor/quotient sign
1705 1.1 mrg com r_arg2HH
1706 1.1 mrg com r_arg2HL
1707 1.1 mrg com r_arg2H
1708 1.1 mrg neg r_arg2L
1709 1.1 mrg sbci r_arg2H,0xff
1710 1.1 mrg sbci r_arg2HL,0xff
1711 1.1 mrg sbci r_arg2HH,0xff
1712 1.1 mrg __divmodsi4_exit:
1713 1.1 mrg ret
1714 1.1 mrg ENDF __divmodsi4
1715 1.1 mrg #endif /* defined (L_divmodsi4) */
1716 1.1 mrg
1717 1.1 mrg #if defined (L_negsi2)
1718 1.1 mrg ;; (set (reg:SI 22)
1719 1.1 mrg ;; (neg:SI (reg:SI 22)))
1720 1.1 mrg ;; Sets the V flag for signed overflow tests
1721 1.1 mrg DEFUN __negsi2
1722 1.1 mrg NEG4 22
1723 1.1 mrg ret
1724 1.1 mrg ENDF __negsi2
1725 1.1 mrg #endif /* L_negsi2 */
1726 1.1 mrg
1727 1.1 mrg #undef r_remHH
1728 1.1 mrg #undef r_remHL
1729 1.1 mrg #undef r_remH
1730 1.1 mrg #undef r_remL
1731 1.1 mrg #undef r_arg1HH
1732 1.1 mrg #undef r_arg1HL
1733 1.1 mrg #undef r_arg1H
1734 1.1 mrg #undef r_arg1L
1735 1.1 mrg #undef r_arg2HH
1736 1.1 mrg #undef r_arg2HL
1737 1.1.1.2 mrg #undef r_arg2H
1738 1.1.1.2 mrg #undef r_arg2L
1739 1.1.1.2 mrg #undef r_cnt
1740 1.1.1.2 mrg
1741 1.1 mrg /* *di routines use registers below R19 and won't work with tiny arch
1742 1.1 mrg right now. */
1743 1.1 mrg
1744 1.1 mrg #if !defined (__AVR_TINY__)
1745 1.1 mrg /*******************************************************
1746 1.1 mrg Division 64 / 64
1747 1.1 mrg Modulo 64 % 64
1748 1.1 mrg *******************************************************/
1749 1.1 mrg
1750 1.1 mrg ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1751 1.1 mrg ;; at least 16k of Program Memory. For smaller Devices, depend
1752 1.1 mrg ;; on MOVW and SP Size. There is a Connexion between SP Size and
1753 1.1 mrg ;; Flash Size so that SP Size can be used to test for Flash Size.
1754 1.1 mrg
1755 1.1 mrg #if defined (__AVR_HAVE_JMP_CALL__)
1756 1.1 mrg # define SPEED_DIV 8
1757 1.1 mrg #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1758 1.1 mrg # define SPEED_DIV 16
1759 1.1 mrg #else
1760 1.1 mrg # define SPEED_DIV 0
1761 1.1 mrg #endif
1762 1.1 mrg
1763 1.1 mrg ;; A[0..7]: In: Dividend;
1764 1.1 mrg ;; Out: Quotient (T = 0)
1765 1.1 mrg ;; Out: Remainder (T = 1)
1766 1.1 mrg #define A0 18
1767 1.1 mrg #define A1 A0+1
1768 1.1 mrg #define A2 A0+2
1769 1.1 mrg #define A3 A0+3
1770 1.1 mrg #define A4 A0+4
1771 1.1 mrg #define A5 A0+5
1772 1.1 mrg #define A6 A0+6
1773 1.1 mrg #define A7 A0+7
1774 1.1 mrg
1775 1.1 mrg ;; B[0..7]: In: Divisor; Out: Clobber
1776 1.1 mrg #define B0 10
1777 1.1 mrg #define B1 B0+1
1778 1.1 mrg #define B2 B0+2
1779 1.1 mrg #define B3 B0+3
1780 1.1 mrg #define B4 B0+4
1781 1.1 mrg #define B5 B0+5
1782 1.1 mrg #define B6 B0+6
1783 1.1 mrg #define B7 B0+7
1784 1.1 mrg
1785 1.1 mrg ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1786 1.1 mrg #define C0 8
1787 1.1 mrg #define C1 C0+1
1788 1.1 mrg #define C2 30
1789 1.1 mrg #define C3 C2+1
1790 1.1 mrg #define C4 28
1791 1.1 mrg #define C5 C4+1
1792 1.1 mrg #define C6 26
1793 1.1 mrg #define C7 C6+1
1794 1.1 mrg
1795 1.1 mrg ;; Holds Signs during Division Routine
1796 1.1 mrg #define SS __tmp_reg__
1797 1.1 mrg
1798 1.1 mrg ;; Bit-Counter in Division Routine
1799 1.1 mrg #define R_cnt __zero_reg__
1800 1.1 mrg
1801 1.1 mrg ;; Scratch Register for Negation
1802 1.1 mrg #define NN r31
1803 1.1 mrg
1804 1.1 mrg #if defined (L_udivdi3)
1805 1.1 mrg
1806 1.1 mrg ;; R25:R18 = R24:R18 umod R17:R10
1807 1.1 mrg ;; Ordinary ABI-Function
1808 1.1 mrg
1809 1.1 mrg DEFUN __umoddi3
1810 1.1 mrg set
1811 1.1 mrg rjmp __udivdi3_umoddi3
1812 1.1 mrg ENDF __umoddi3
1813 1.1 mrg
1814 1.1 mrg ;; R25:R18 = R24:R18 udiv R17:R10
1815 1.1 mrg ;; Ordinary ABI-Function
1816 1.1 mrg
1817 1.1 mrg DEFUN __udivdi3
1818 1.1 mrg clt
1819 1.1 mrg ENDF __udivdi3
1820 1.1 mrg
1821 1.1 mrg DEFUN __udivdi3_umoddi3
1822 1.1 mrg push C0
1823 1.1 mrg push C1
1824 1.1 mrg push C4
1825 1.1 mrg push C5
1826 1.1 mrg XCALL __udivmod64
1827 1.1 mrg pop C5
1828 1.1 mrg pop C4
1829 1.1 mrg pop C1
1830 1.1 mrg pop C0
1831 1.1 mrg ret
1832 1.1 mrg ENDF __udivdi3_umoddi3
1833 1.1 mrg #endif /* L_udivdi3 */
1834 1.1 mrg
1835 1.1 mrg #if defined (L_udivmod64)
1836 1.1 mrg
1837 1.1 mrg ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1838 1.1 mrg ;; No Registers saved/restored; the Callers will take Care.
1839 1.1 mrg ;; Preserves B[] and T-flag
1840 1.1 mrg ;; T = 0: Compute Quotient in A[]
1841 1.1 mrg ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1842 1.1 mrg
1843 1.1 mrg DEFUN __udivmod64
1844 1.1 mrg
1845 1.1 mrg ;; Clear Remainder (C6, C7 will follow)
1846 1.1 mrg clr C0
1847 1.1 mrg clr C1
1848 1.1 mrg wmov C2, C0
1849 1.1 mrg wmov C4, C0
1850 1.1 mrg ldi C7, 64
1851 1.1 mrg
1852 1.1 mrg #if SPEED_DIV == 0 || SPEED_DIV == 16
1853 1.1 mrg ;; Initialize Loop-Counter
1854 1.1 mrg mov R_cnt, C7
1855 1.1 mrg wmov C6, C0
1856 1.1 mrg #endif /* SPEED_DIV */
1857 1.1 mrg
1858 1.1 mrg #if SPEED_DIV == 8
1859 1.1 mrg
1860 1.1 mrg push A7
1861 1.1 mrg clr C6
1862 1.1 mrg
1863 1.1 mrg 1: ;; Compare shifted Devidend against Divisor
1864 1.1 mrg ;; If -- even after Shifting -- it is smaller...
1865 1.1 mrg CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1866 1.1 mrg cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1867 1.1 mrg brcc 2f
1868 1.1 mrg
1869 1.1 mrg ;; ...then we can subtract it. Thus, it is legal to shift left
1870 1.1 mrg $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1871 1.1 mrg mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1872 1.1 mrg mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1873 1.1 mrg mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1874 1.1 mrg
1875 1.1 mrg ;; 8 Bits are done
1876 1.1 mrg subi C7, 8
1877 1.1 mrg brne 1b
1878 1.1 mrg
1879 1.1 mrg ;; Shifted 64 Bits: A7 has traveled to C7
1880 1.1 mrg pop C7
1881 1.1 mrg ;; Divisor is greater than Dividend. We have:
1882 1.1 mrg ;; A[] % B[] = A[]
1883 1.1 mrg ;; A[] / B[] = 0
1884 1.1 mrg ;; Thus, we can return immediately
1885 1.1 mrg rjmp 5f
1886 1.1 mrg
1887 1.1 mrg 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1888 1.1 mrg mov R_cnt, C7
1889 1.1 mrg
1890 1.1 mrg ;; Push of A7 is not needed because C7 is still 0
1891 1.1 mrg pop C7
1892 1.1 mrg clr C7
1893 1.1 mrg
1894 1.1 mrg #elif SPEED_DIV == 16
1895 1.1 mrg
1896 1.1 mrg ;; Compare shifted Dividend against Divisor
1897 1.1 mrg cp A7, B3
1898 1.1 mrg cpc C0, B4
1899 1.1 mrg cpc C1, B5
1900 1.1 mrg cpc C2, B6
1901 1.1 mrg cpc C3, B7
1902 1.1 mrg brcc 2f
1903 1.1 mrg
1904 1.1 mrg ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1905 1.1 mrg ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1906 1.1 mrg wmov C2,A6 $ wmov C0,A4
1907 1.1 mrg wmov A6,A2 $ wmov A4,A0
1908 1.1 mrg wmov A2,C6 $ wmov A0,C4
1909 1.1 mrg
1910 1.1 mrg ;; Set Bit Counter to 32
1911 1.1 mrg lsr R_cnt
1912 1.1 mrg 2:
1913 1.1 mrg #elif SPEED_DIV
1914 1.1 mrg #error SPEED_DIV = ?
1915 1.1 mrg #endif /* SPEED_DIV */
1916 1.1 mrg
1917 1.1 mrg ;; The very Division + Remainder Routine
1918 1.1 mrg
1919 1.1 mrg 3: ;; Left-shift Dividend...
1920 1.1 mrg lsl A0 $ rol A1 $ rol A2 $ rol A3
1921 1.1 mrg rol A4 $ rol A5 $ rol A6 $ rol A7
1922 1.1 mrg
1923 1.1 mrg ;; ...into Remainder
1924 1.1 mrg rol C0 $ rol C1 $ rol C2 $ rol C3
1925 1.1 mrg rol C4 $ rol C5 $ rol C6 $ rol C7
1926 1.1 mrg
1927 1.1 mrg ;; Compare Remainder and Divisor
1928 1.1 mrg CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1929 1.1 mrg cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1930 1.1 mrg
1931 1.1 mrg brcs 4f
1932 1.1 mrg
1933 1.1 mrg ;; Divisor fits into Remainder: Subtract it from Remainder...
1934 1.1 mrg SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1935 1.1 mrg sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1936 1.1 mrg
1937 1.1 mrg ;; ...and set according Bit in the upcoming Quotient
1938 1.1 mrg ;; The Bit will travel to its final Position
1939 1.1 mrg ori A0, 1
1940 1.1 mrg
1941 1.1 mrg 4: ;; This Bit is done
1942 1.1 mrg dec R_cnt
1943 1.1 mrg brne 3b
1944 1.1 mrg ;; __zero_reg__ is 0 again
1945 1.1 mrg
1946 1.1 mrg ;; T = 0: We are fine with the Quotient in A[]
1947 1.1 mrg ;; T = 1: Copy Remainder to A[]
1948 1.1 mrg 5: brtc 6f
1949 1.1 mrg wmov A0, C0
1950 1.1 mrg wmov A2, C2
1951 1.1 mrg wmov A4, C4
1952 1.1 mrg wmov A6, C6
1953 1.1 mrg ;; Move the Sign of the Result to SS.7
1954 1.1 mrg lsl SS
1955 1.1 mrg
1956 1.1 mrg 6: ret
1957 1.1 mrg
1958 1.1 mrg ENDF __udivmod64
1959 1.1 mrg #endif /* L_udivmod64 */
1960 1.1 mrg
1961 1.1 mrg
1962 1.1 mrg #if defined (L_divdi3)
1963 1.1 mrg
1964 1.1 mrg ;; R25:R18 = R24:R18 mod R17:R10
1965 1.1 mrg ;; Ordinary ABI-Function
1966 1.1 mrg
1967 1.1 mrg DEFUN __moddi3
1968 1.1 mrg set
1969 1.1 mrg rjmp __divdi3_moddi3
1970 1.1 mrg ENDF __moddi3
1971 1.1 mrg
1972 1.1 mrg ;; R25:R18 = R24:R18 div R17:R10
1973 1.1 mrg ;; Ordinary ABI-Function
1974 1.1 mrg
1975 1.1 mrg DEFUN __divdi3
1976 1.1 mrg clt
1977 1.1 mrg ENDF __divdi3
1978 1.1 mrg
1979 1.1 mrg DEFUN __divdi3_moddi3
1980 1.1 mrg #if SPEED_DIV
1981 1.1 mrg mov r31, A7
1982 1.1 mrg or r31, B7
1983 1.1 mrg brmi 0f
1984 1.1 mrg ;; Both Signs are 0: the following Complexitiy is not needed
1985 1.1 mrg XJMP __udivdi3_umoddi3
1986 1.1 mrg #endif /* SPEED_DIV */
1987 1.1 mrg
1988 1.1 mrg 0: ;; The Prologue
1989 1.1 mrg ;; Save 12 Registers: Y, 17...8
1990 1.1 mrg ;; No Frame needed
1991 1.1 mrg do_prologue_saves 12
1992 1.1 mrg
1993 1.1 mrg ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1994 1.1 mrg ;; SS.6 will contain the Sign of the Remainder (A.sign)
1995 1.1 mrg mov SS, A7
1996 1.1 mrg asr SS
1997 1.1 mrg ;; Adjust Dividend's Sign as needed
1998 1.1 mrg #if SPEED_DIV
1999 1.1 mrg ;; Compiling for Speed we know that at least one Sign must be < 0
2000 1.1 mrg ;; Thus, if A[] >= 0 then we know B[] < 0
2001 1.1 mrg brpl 22f
2002 1.1 mrg #else
2003 1.1 mrg brpl 21f
2004 1.1 mrg #endif /* SPEED_DIV */
2005 1.1 mrg
2006 1.1 mrg XCALL __negdi2
2007 1.1 mrg
2008 1.1 mrg ;; Adjust Divisor's Sign and SS.7 as needed
2009 1.1 mrg 21: tst B7
2010 1.1 mrg brpl 3f
2011 1.1 mrg 22: ldi NN, 1 << 7
2012 1.1 mrg eor SS, NN
2013 1.1 mrg
2014 1.1 mrg ldi NN, -1
2015 1.1 mrg com B4 $ com B5 $ com B6 $ com B7
2016 1.1 mrg $ com B1 $ com B2 $ com B3
2017 1.1 mrg NEG B0
2018 1.1 mrg $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2019 1.1 mrg sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2020 1.1 mrg
2021 1.1 mrg 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2022 1.1 mrg XCALL __udivmod64
2023 1.1 mrg
2024 1.1 mrg ;; Adjust Result's Sign
2025 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2026 1.1 mrg tst SS
2027 1.1 mrg brpl 4f
2028 1.1 mrg #else
2029 1.1 mrg sbrc SS, 7
2030 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
2031 1.1 mrg XCALL __negdi2
2032 1.1 mrg
2033 1.1 mrg 4: ;; Epilogue: Restore 12 Registers and return
2034 1.1 mrg do_epilogue_restores 12
2035 1.1 mrg
2036 1.1 mrg ENDF __divdi3_moddi3
2037 1.1 mrg
2038 1.1 mrg #endif /* L_divdi3 */
2039 1.1 mrg
2040 1.1 mrg #undef R_cnt
2041 1.1 mrg #undef SS
2042 1.1 mrg #undef NN
2043 1.1 mrg
2044 1.1 mrg .section .text.libgcc, "ax", @progbits
2045 1.1 mrg
2046 1.1 mrg #define TT __tmp_reg__
2047 1.1 mrg
2048 1.1 mrg #if defined (L_adddi3)
2049 1.1 mrg ;; (set (reg:DI 18)
2050 1.1 mrg ;; (plus:DI (reg:DI 18)
2051 1.1 mrg ;; (reg:DI 10)))
2052 1.1 mrg ;; Sets the V flag for signed overflow tests
2053 1.1 mrg ;; Sets the C flag for unsigned overflow tests
2054 1.1 mrg DEFUN __adddi3
2055 1.1 mrg ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2056 1.1 mrg adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2057 1.1 mrg ret
2058 1.1 mrg ENDF __adddi3
2059 1.1 mrg #endif /* L_adddi3 */
2060 1.1 mrg
2061 1.1 mrg #if defined (L_adddi3_s8)
2062 1.1 mrg ;; (set (reg:DI 18)
2063 1.1 mrg ;; (plus:DI (reg:DI 18)
2064 1.1 mrg ;; (sign_extend:SI (reg:QI 26))))
2065 1.1 mrg ;; Sets the V flag for signed overflow tests
2066 1.1 mrg ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2067 1.1 mrg DEFUN __adddi3_s8
2068 1.1 mrg clr TT
2069 1.1 mrg sbrc r26, 7
2070 1.1 mrg com TT
2071 1.1 mrg ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2072 1.1 mrg adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2073 1.1 mrg ret
2074 1.1 mrg ENDF __adddi3_s8
2075 1.1 mrg #endif /* L_adddi3_s8 */
2076 1.1 mrg
2077 1.1 mrg #if defined (L_subdi3)
2078 1.1 mrg ;; (set (reg:DI 18)
2079 1.1 mrg ;; (minus:DI (reg:DI 18)
2080 1.1 mrg ;; (reg:DI 10)))
2081 1.1 mrg ;; Sets the V flag for signed overflow tests
2082 1.1 mrg ;; Sets the C flag for unsigned overflow tests
2083 1.1 mrg DEFUN __subdi3
2084 1.1 mrg SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2085 1.1 mrg sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2086 1.1 mrg ret
2087 1.1 mrg ENDF __subdi3
2088 1.1 mrg #endif /* L_subdi3 */
2089 1.1 mrg
2090 1.1 mrg #if defined (L_cmpdi2)
2091 1.1 mrg ;; (set (cc0)
2092 1.1 mrg ;; (compare (reg:DI 18)
2093 1.1 mrg ;; (reg:DI 10)))
2094 1.1 mrg DEFUN __cmpdi2
2095 1.1 mrg CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2096 1.1 mrg cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2097 1.1 mrg ret
2098 1.1 mrg ENDF __cmpdi2
2099 1.1 mrg #endif /* L_cmpdi2 */
2100 1.1 mrg
2101 1.1 mrg #if defined (L_cmpdi2_s8)
2102 1.1 mrg ;; (set (cc0)
2103 1.1 mrg ;; (compare (reg:DI 18)
2104 1.1 mrg ;; (sign_extend:SI (reg:QI 26))))
2105 1.1 mrg DEFUN __cmpdi2_s8
2106 1.1 mrg clr TT
2107 1.1 mrg sbrc r26, 7
2108 1.1 mrg com TT
2109 1.1 mrg CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2110 1.1 mrg cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2111 1.1 mrg ret
2112 1.1 mrg ENDF __cmpdi2_s8
2113 1.1 mrg #endif /* L_cmpdi2_s8 */
2114 1.1 mrg
2115 1.1 mrg #if defined (L_negdi2)
2116 1.1 mrg ;; (set (reg:DI 18)
2117 1.1 mrg ;; (neg:DI (reg:DI 18)))
2118 1.1 mrg ;; Sets the V flag for signed overflow tests
2119 1.1 mrg DEFUN __negdi2
2120 1.1 mrg
2121 1.1 mrg com A4 $ com A5 $ com A6 $ com A7
2122 1.1 mrg $ com A1 $ com A2 $ com A3
2123 1.1 mrg NEG A0
2124 1.1 mrg $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2125 1.1 mrg sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2126 1.1 mrg ret
2127 1.1 mrg
2128 1.1 mrg ENDF __negdi2
2129 1.1 mrg #endif /* L_negdi2 */
2130 1.1 mrg
2131 1.1 mrg #undef TT
2132 1.1 mrg
2133 1.1 mrg #undef C7
2134 1.1 mrg #undef C6
2135 1.1 mrg #undef C5
2136 1.1 mrg #undef C4
2137 1.1 mrg #undef C3
2138 1.1 mrg #undef C2
2139 1.1 mrg #undef C1
2140 1.1 mrg #undef C0
2141 1.1 mrg
2142 1.1 mrg #undef B7
2143 1.1 mrg #undef B6
2144 1.1 mrg #undef B5
2145 1.1 mrg #undef B4
2146 1.1 mrg #undef B3
2147 1.1 mrg #undef B2
2148 1.1 mrg #undef B1
2149 1.1 mrg #undef B0
2150 1.1 mrg
2151 1.1 mrg #undef A7
2152 1.1 mrg #undef A6
2153 1.1 mrg #undef A5
2154 1.1 mrg #undef A4
2155 1.1 mrg #undef A3
2156 1.1.1.2 mrg #undef A2
2157 1.1.1.2 mrg #undef A1
2158 1.1 mrg #undef A0
2159 1.1 mrg
2160 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2161 1.1 mrg
2162 1.1 mrg
2163 1.1 mrg .section .text.libgcc.prologue, "ax", @progbits
2165 1.1 mrg
2166 1.1 mrg /**********************************
2167 1.1 mrg * This is a prologue subroutine
2168 1.1 mrg **********************************/
2169 1.1 mrg #if !defined (__AVR_TINY__)
2170 1.1 mrg #if defined (L_prologue)
2171 1.1 mrg
2172 1.1 mrg ;; This function does not clobber T-flag; 64-bit division relies on it
2173 1.1 mrg DEFUN __prologue_saves__
2174 1.1 mrg push r2
2175 1.1 mrg push r3
2176 1.1 mrg push r4
2177 1.1 mrg push r5
2178 1.1 mrg push r6
2179 1.1 mrg push r7
2180 1.1 mrg push r8
2181 1.1 mrg push r9
2182 1.1 mrg push r10
2183 1.1 mrg push r11
2184 1.1 mrg push r12
2185 1.1 mrg push r13
2186 1.1 mrg push r14
2187 1.1 mrg push r15
2188 1.1 mrg push r16
2189 1.1 mrg push r17
2190 1.1 mrg push r28
2191 1.1 mrg push r29
2192 1.1 mrg #if !defined (__AVR_HAVE_SPH__)
2193 1.1 mrg in r28,__SP_L__
2194 1.1 mrg sub r28,r26
2195 1.1 mrg out __SP_L__,r28
2196 1.1 mrg clr r29
2197 1.1 mrg #elif defined (__AVR_XMEGA__)
2198 1.1 mrg in r28,__SP_L__
2199 1.1 mrg in r29,__SP_H__
2200 1.1 mrg sub r28,r26
2201 1.1 mrg sbc r29,r27
2202 1.1 mrg out __SP_L__,r28
2203 1.1 mrg out __SP_H__,r29
2204 1.1 mrg #else
2205 1.1 mrg in r28,__SP_L__
2206 1.1 mrg in r29,__SP_H__
2207 1.1 mrg sub r28,r26
2208 1.1 mrg sbc r29,r27
2209 1.1 mrg in __tmp_reg__,__SREG__
2210 1.1 mrg cli
2211 1.1.1.2 mrg out __SP_H__,r29
2212 1.1 mrg out __SREG__,__tmp_reg__
2213 1.1 mrg out __SP_L__,r28
2214 1.1 mrg #endif /* #SP = 8/16 */
2215 1.1 mrg
2216 1.1 mrg XIJMP
2217 1.1 mrg
2218 1.1 mrg ENDF __prologue_saves__
2219 1.1 mrg #endif /* defined (L_prologue) */
2220 1.1 mrg
2221 1.1 mrg /*
2222 1.1 mrg * This is an epilogue subroutine
2223 1.1 mrg */
2224 1.1 mrg #if defined (L_epilogue)
2225 1.1 mrg
2226 1.1 mrg DEFUN __epilogue_restores__
2227 1.1 mrg ldd r2,Y+18
2228 1.1 mrg ldd r3,Y+17
2229 1.1 mrg ldd r4,Y+16
2230 1.1 mrg ldd r5,Y+15
2231 1.1 mrg ldd r6,Y+14
2232 1.1 mrg ldd r7,Y+13
2233 1.1 mrg ldd r8,Y+12
2234 1.1 mrg ldd r9,Y+11
2235 1.1 mrg ldd r10,Y+10
2236 1.1 mrg ldd r11,Y+9
2237 1.1 mrg ldd r12,Y+8
2238 1.1 mrg ldd r13,Y+7
2239 1.1 mrg ldd r14,Y+6
2240 1.1 mrg ldd r15,Y+5
2241 1.1 mrg ldd r16,Y+4
2242 1.1 mrg ldd r17,Y+3
2243 1.1 mrg ldd r26,Y+2
2244 1.1 mrg #if !defined (__AVR_HAVE_SPH__)
2245 1.1 mrg ldd r29,Y+1
2246 1.1 mrg add r28,r30
2247 1.1 mrg out __SP_L__,r28
2248 1.1 mrg mov r28, r26
2249 1.1 mrg #elif defined (__AVR_XMEGA__)
2250 1.1 mrg ldd r27,Y+1
2251 1.1 mrg add r28,r30
2252 1.1 mrg adc r29,__zero_reg__
2253 1.1 mrg out __SP_L__,r28
2254 1.1 mrg out __SP_H__,r29
2255 1.1 mrg wmov 28, 26
2256 1.1 mrg #else
2257 1.1 mrg ldd r27,Y+1
2258 1.1 mrg add r28,r30
2259 1.1 mrg adc r29,__zero_reg__
2260 1.1 mrg in __tmp_reg__,__SREG__
2261 1.1 mrg cli
2262 1.1 mrg out __SP_H__,r29
2263 1.1 mrg out __SREG__,__tmp_reg__
2264 1.1 mrg out __SP_L__,r28
2265 1.1 mrg mov_l r28, r26
2266 1.1.1.2 mrg mov_h r29, r27
2267 1.1 mrg #endif /* #SP = 8/16 */
2268 1.1 mrg ret
2269 1.1 mrg ENDF __epilogue_restores__
2270 1.1 mrg #endif /* defined (L_epilogue) */
2271 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2272 1.1 mrg
2273 1.1 mrg #ifdef L_exit
2274 1.1 mrg .section .fini9,"ax",@progbits
2275 1.1 mrg DEFUN _exit
2276 1.1 mrg .weak exit
2277 1.1 mrg exit:
2278 1.1 mrg ENDF _exit
2279 1.1 mrg
2280 1.1 mrg /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2281 1.1 mrg
2282 1.1 mrg .section .fini0,"ax",@progbits
2283 1.1 mrg cli
2284 1.1 mrg __stop_program:
2285 1.1 mrg rjmp __stop_program
2286 1.1 mrg #endif /* defined (L_exit) */
2287 1.1 mrg
2288 1.1 mrg #ifdef L_cleanup
2289 1.1 mrg .weak _cleanup
2290 1.1 mrg .func _cleanup
2291 1.1 mrg _cleanup:
2292 1.1 mrg ret
2293 1.1 mrg .endfunc
2294 1.1.1.2 mrg #endif /* defined (L_cleanup) */
2295 1.1 mrg
2296 1.1.1.2 mrg
2297 1.1.1.2 mrg .section .text.libgcc, "ax", @progbits
2299 1.1.1.2 mrg
2300 1.1.1.2 mrg #ifdef L_tablejump2
2301 1.1.1.2 mrg DEFUN __tablejump2__
2302 1.1.1.2 mrg lsl r30
2303 1.1.1.2 mrg rol r31
2304 1.1.1.2 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
2305 1.1.1.2 mrg ;; Word address of gs() jumptable entry in R24:Z
2306 1.1.1.2 mrg rol r24
2307 1.1 mrg out __RAMPZ__, r24
2308 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2309 1.1.1.2 mrg ;; Word address of jumptable entry in Z
2310 1.1.1.2 mrg clr __tmp_reg__
2311 1.1.1.2 mrg rol __tmp_reg__
2312 1.1.1.2 mrg out __RAMPZ__, __tmp_reg__
2313 1.1.1.2 mrg #endif
2314 1.1.1.2 mrg
2315 1.1.1.2 mrg ;; Read word address from jumptable and jump
2316 1.1.1.2 mrg
2317 1.1.1.2 mrg #if defined (__AVR_HAVE_ELPMX__)
2318 1.1.1.2 mrg elpm __tmp_reg__, Z+
2319 1.1.1.2 mrg elpm r31, Z
2320 1.1.1.2 mrg mov r30, __tmp_reg__
2321 1.1.1.2 mrg #ifdef __AVR_HAVE_RAMPD__
2322 1.1.1.2 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2323 1.1.1.2 mrg out __RAMPZ__, __zero_reg__
2324 1.1.1.2 mrg #endif /* RAMPD */
2325 1.1.1.2 mrg XIJMP
2326 1.1.1.2 mrg #elif defined (__AVR_HAVE_ELPM__)
2327 1.1.1.2 mrg elpm
2328 1.1.1.2 mrg push r0
2329 1.1.1.2 mrg adiw r30, 1
2330 1.1.1.2 mrg elpm
2331 1.1.1.2 mrg push r0
2332 1.1.1.2 mrg ret
2333 1.1.1.2 mrg #elif defined (__AVR_HAVE_LPMX__)
2334 1.1.1.2 mrg lpm __tmp_reg__, Z+
2335 1.1.1.2 mrg lpm r31, Z
2336 1.1.1.2 mrg mov r30, __tmp_reg__
2337 1.1.1.2 mrg ijmp
2338 1.1.1.2 mrg #elif defined (__AVR_TINY__)
2339 1.1.1.2 mrg wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2340 1.1.1.2 mrg ld __tmp_reg__, Z+
2341 1.1.1.2 mrg ld r31, Z ; Use ld instead of lpm to load Z
2342 1.1.1.2 mrg mov r30, __tmp_reg__
2343 1.1.1.2 mrg ijmp
2344 1.1.1.2 mrg #else
2345 1.1 mrg lpm
2346 1.1.1.2 mrg push r0
2347 1.1.1.2 mrg adiw r30, 1
2348 1.1 mrg lpm
2349 1.1.1.2 mrg push r0
2350 1.1.1.2 mrg ret
2351 1.1.1.2 mrg #endif
2352 1.1.1.2 mrg ENDF __tablejump2__
2353 1.1.1.2 mrg #endif /* L_tablejump2 */
2354 1.1.1.2 mrg
2355 1.1.1.2 mrg #if defined(__AVR_TINY__)
2356 1.1.1.2 mrg #ifdef L_copy_data
2357 1.1.1.2 mrg .section .init4,"ax",@progbits
2358 1.1.1.2 mrg .global __do_copy_data
2359 1.1.1.2 mrg __do_copy_data:
2360 1.1.1.2 mrg ldi r18, hi8(__data_end)
2361 1.1.1.2 mrg ldi r26, lo8(__data_start)
2362 1.1.1.2 mrg ldi r27, hi8(__data_start)
2363 1.1.1.2 mrg ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2364 1.1.1.2 mrg ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2365 1.1.1.2 mrg rjmp .L__do_copy_data_start
2366 1.1.1.2 mrg .L__do_copy_data_loop:
2367 1.1.1.2 mrg ld r19, z+
2368 1.1.1.2 mrg st X+, r19
2369 1.1 mrg .L__do_copy_data_start:
2370 1.1 mrg cpi r26, lo8(__data_end)
2371 1.1 mrg cpc r27, r18
2372 1.1 mrg brne .L__do_copy_data_loop
2373 1.1 mrg #endif
2374 1.1 mrg #else
2375 1.1 mrg #ifdef L_copy_data
2376 1.1 mrg .section .init4,"ax",@progbits
2377 1.1 mrg DEFUN __do_copy_data
2378 1.1 mrg #if defined(__AVR_HAVE_ELPMX__)
2379 1.1 mrg ldi r17, hi8(__data_end)
2380 1.1 mrg ldi r26, lo8(__data_start)
2381 1.1 mrg ldi r27, hi8(__data_start)
2382 1.1 mrg ldi r30, lo8(__data_load_start)
2383 1.1 mrg ldi r31, hi8(__data_load_start)
2384 1.1 mrg ldi r16, hh8(__data_load_start)
2385 1.1 mrg out __RAMPZ__, r16
2386 1.1 mrg rjmp .L__do_copy_data_start
2387 1.1 mrg .L__do_copy_data_loop:
2388 1.1 mrg elpm r0, Z+
2389 1.1 mrg st X+, r0
2390 1.1 mrg .L__do_copy_data_start:
2391 1.1 mrg cpi r26, lo8(__data_end)
2392 1.1 mrg cpc r27, r17
2393 1.1 mrg brne .L__do_copy_data_loop
2394 1.1 mrg #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2395 1.1 mrg ldi r17, hi8(__data_end)
2396 1.1 mrg ldi r26, lo8(__data_start)
2397 1.1 mrg ldi r27, hi8(__data_start)
2398 1.1 mrg ldi r30, lo8(__data_load_start)
2399 1.1 mrg ldi r31, hi8(__data_load_start)
2400 1.1 mrg ldi r16, hh8(__data_load_start - 0x10000)
2401 1.1 mrg .L__do_copy_data_carry:
2402 1.1 mrg inc r16
2403 1.1 mrg out __RAMPZ__, r16
2404 1.1 mrg rjmp .L__do_copy_data_start
2405 1.1 mrg .L__do_copy_data_loop:
2406 1.1 mrg elpm
2407 1.1 mrg st X+, r0
2408 1.1 mrg adiw r30, 1
2409 1.1 mrg brcs .L__do_copy_data_carry
2410 1.1 mrg .L__do_copy_data_start:
2411 1.1 mrg cpi r26, lo8(__data_end)
2412 1.1 mrg cpc r27, r17
2413 1.1 mrg brne .L__do_copy_data_loop
2414 1.1 mrg #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2415 1.1 mrg ldi r17, hi8(__data_end)
2416 1.1 mrg ldi r26, lo8(__data_start)
2417 1.1 mrg ldi r27, hi8(__data_start)
2418 1.1 mrg ldi r30, lo8(__data_load_start)
2419 1.1 mrg ldi r31, hi8(__data_load_start)
2420 1.1 mrg rjmp .L__do_copy_data_start
2421 1.1 mrg .L__do_copy_data_loop:
2422 1.1 mrg #if defined (__AVR_HAVE_LPMX__)
2423 1.1 mrg lpm r0, Z+
2424 1.1 mrg #else
2425 1.1 mrg lpm
2426 1.1 mrg adiw r30, 1
2427 1.1 mrg #endif
2428 1.1 mrg st X+, r0
2429 1.1 mrg .L__do_copy_data_start:
2430 1.1 mrg cpi r26, lo8(__data_end)
2431 1.1 mrg cpc r27, r17
2432 1.1 mrg brne .L__do_copy_data_loop
2433 1.1 mrg #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2434 1.1.1.2 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2435 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2436 1.1 mrg out __RAMPZ__, __zero_reg__
2437 1.1 mrg #endif /* ELPM && RAMPD */
2438 1.1 mrg ENDF __do_copy_data
2439 1.1 mrg #endif /* L_copy_data */
2440 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2441 1.1.1.2 mrg
2442 1.1 mrg /* __do_clear_bss is only necessary if there is anything in .bss section. */
2443 1.1 mrg
2444 1.1 mrg #ifdef L_clear_bss
2445 1.1 mrg .section .init4,"ax",@progbits
2446 1.1 mrg DEFUN __do_clear_bss
2447 1.1 mrg ldi r18, hi8(__bss_end)
2448 1.1 mrg ldi r26, lo8(__bss_start)
2449 1.1.1.2 mrg ldi r27, hi8(__bss_start)
2450 1.1 mrg rjmp .do_clear_bss_start
2451 1.1 mrg .do_clear_bss_loop:
2452 1.1 mrg st X+, __zero_reg__
2453 1.1 mrg .do_clear_bss_start:
2454 1.1 mrg cpi r26, lo8(__bss_end)
2455 1.1 mrg cpc r27, r18
2456 1.1 mrg brne .do_clear_bss_loop
2457 1.1.1.2 mrg ENDF __do_clear_bss
2458 1.1.1.2 mrg #endif /* L_clear_bss */
2459 1.1.1.2 mrg
2460 1.1.1.2 mrg /* __do_global_ctors and __do_global_dtors are only necessary
2461 1.1.1.2 mrg if there are any constructors/destructors. */
2462 1.1.1.2 mrg
2463 1.1 mrg #if defined(__AVR_TINY__)
2464 1.1 mrg #define cdtors_tst_reg r18
2465 1.1 mrg #else
2466 1.1.1.2 mrg #define cdtors_tst_reg r17
2467 1.1.1.2 mrg #endif
2468 1.1.1.2 mrg
2469 1.1.1.2 mrg #ifdef L_ctors
2470 1.1.1.2 mrg .section .init6,"ax",@progbits
2471 1.1.1.2 mrg DEFUN __do_global_ctors
2472 1.1.1.2 mrg ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2473 1.1 mrg ldi r28, pm_lo8(__ctors_end)
2474 1.1.1.2 mrg ldi r29, pm_hi8(__ctors_end)
2475 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2476 1.1.1.2 mrg ldi r16, pm_hh8(__ctors_end)
2477 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2478 1.1.1.2 mrg rjmp .L__do_global_ctors_start
2479 1.1.1.2 mrg .L__do_global_ctors_loop:
2480 1.1.1.2 mrg wsubi 28, 1
2481 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2482 1.1 mrg sbc r16, __zero_reg__
2483 1.1.1.2 mrg mov r24, r16
2484 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2485 1.1.1.2 mrg mov_h r31, r29
2486 1.1.1.2 mrg mov_l r30, r28
2487 1.1.1.2 mrg XCALL __tablejump2__
2488 1.1.1.2 mrg .L__do_global_ctors_start:
2489 1.1.1.2 mrg cpi r28, pm_lo8(__ctors_start)
2490 1.1 mrg cpc r29, cdtors_tst_reg
2491 1.1 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2492 1.1 mrg ldi r24, pm_hh8(__ctors_start)
2493 1.1 mrg cpc r16, r24
2494 1.1 mrg #endif /* HAVE_EIJMP */
2495 1.1 mrg brne .L__do_global_ctors_loop
2496 1.1.1.2 mrg ENDF __do_global_ctors
2497 1.1.1.2 mrg #endif /* L_ctors */
2498 1.1.1.2 mrg
2499 1.1.1.2 mrg #ifdef L_dtors
2500 1.1.1.2 mrg .section .fini6,"ax",@progbits
2501 1.1.1.2 mrg DEFUN __do_global_dtors
2502 1.1.1.2 mrg ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2503 1.1 mrg ldi r28, pm_lo8(__dtors_start)
2504 1.1.1.2 mrg ldi r29, pm_hi8(__dtors_start)
2505 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2506 1.1.1.2 mrg ldi r16, pm_hh8(__dtors_start)
2507 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2508 1.1.1.2 mrg rjmp .L__do_global_dtors_start
2509 1.1.1.2 mrg .L__do_global_dtors_loop:
2510 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2511 1.1.1.2 mrg mov r24, r16
2512 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2513 1.1.1.2 mrg mov_h r31, r29
2514 1.1 mrg mov_l r30, r28
2515 1.1.1.2 mrg XCALL __tablejump2__
2516 1.1.1.2 mrg waddi 28, 1
2517 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2518 1.1.1.2 mrg adc r16, __zero_reg__
2519 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2520 1.1.1.2 mrg .L__do_global_dtors_start:
2521 1.1.1.2 mrg cpi r28, pm_lo8(__dtors_end)
2522 1.1 mrg cpc r29, cdtors_tst_reg
2523 1.1 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2524 1.1 mrg ldi r24, pm_hh8(__dtors_end)
2525 1.1.1.2 mrg cpc r16, r24
2526 1.1 mrg #endif /* HAVE_EIJMP */
2527 1.1.1.2 mrg brne .L__do_global_dtors_loop
2528 1.1 mrg ENDF __do_global_dtors
2529 1.1.1.2 mrg #endif /* L_dtors */
2530 1.1 mrg
2531 1.1 mrg #undef cdtors_tst_reg
2532 1.1 mrg
2533 1.1 mrg .section .text.libgcc, "ax", @progbits
2534 1.1 mrg
2535 1.1 mrg #if !defined (__AVR_TINY__)
2536 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2537 1.1 mrg ;; Loading n bytes from Flash; n = 3,4
2538 1.1 mrg ;; R22... = Flash[Z]
2539 1.1 mrg ;; Clobbers: __tmp_reg__
2540 1.1 mrg
2541 1.1 mrg #if (defined (L_load_3) \
2542 1.1 mrg || defined (L_load_4)) \
2543 1.1 mrg && !defined (__AVR_HAVE_LPMX__)
2544 1.1 mrg
2545 1.1 mrg ;; Destination
2546 1.1 mrg #define D0 22
2547 1.1 mrg #define D1 D0+1
2548 1.1 mrg #define D2 D0+2
2549 1.1 mrg #define D3 D0+3
2550 1.1 mrg
2551 1.1 mrg .macro .load dest, n
2552 1.1 mrg lpm
2553 1.1 mrg mov \dest, r0
2554 1.1 mrg .if \dest != D0+\n-1
2555 1.1 mrg adiw r30, 1
2556 1.1 mrg .else
2557 1.1 mrg sbiw r30, \n-1
2558 1.1 mrg .endif
2559 1.1 mrg .endm
2560 1.1 mrg
2561 1.1 mrg #if defined (L_load_3)
2562 1.1 mrg DEFUN __load_3
2563 1.1 mrg push D3
2564 1.1 mrg XCALL __load_4
2565 1.1 mrg pop D3
2566 1.1 mrg ret
2567 1.1 mrg ENDF __load_3
2568 1.1 mrg #endif /* L_load_3 */
2569 1.1 mrg
2570 1.1 mrg #if defined (L_load_4)
2571 1.1 mrg DEFUN __load_4
2572 1.1 mrg .load D0, 4
2573 1.1 mrg .load D1, 4
2574 1.1 mrg .load D2, 4
2575 1.1.1.2 mrg .load D3, 4
2576 1.1 mrg ret
2577 1.1.1.2 mrg ENDF __load_4
2578 1.1 mrg #endif /* L_load_4 */
2579 1.1 mrg
2580 1.1 mrg #endif /* L_load_3 || L_load_3 */
2581 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2582 1.1 mrg
2583 1.1 mrg #if !defined (__AVR_TINY__)
2584 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2585 1.1 mrg ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2586 1.1 mrg ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2587 1.1 mrg ;; Clobbers: __tmp_reg__, R21, R30, R31
2588 1.1 mrg
2589 1.1 mrg #if (defined (L_xload_1) \
2590 1.1 mrg || defined (L_xload_2) \
2591 1.1 mrg || defined (L_xload_3) \
2592 1.1 mrg || defined (L_xload_4))
2593 1.1 mrg
2594 1.1 mrg ;; Destination
2595 1.1 mrg #define D0 22
2596 1.1 mrg #define D1 D0+1
2597 1.1 mrg #define D2 D0+2
2598 1.1 mrg #define D3 D0+3
2599 1.1 mrg
2600 1.1 mrg ;; Register containing bits 16+ of the address
2601 1.1 mrg
2602 1.1 mrg #define HHI8 21
2603 1.1 mrg
2604 1.1 mrg .macro .xload dest, n
2605 1.1 mrg #if defined (__AVR_HAVE_ELPMX__)
2606 1.1 mrg elpm \dest, Z+
2607 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2608 1.1 mrg elpm
2609 1.1 mrg mov \dest, r0
2610 1.1 mrg .if \dest != D0+\n-1
2611 1.1 mrg adiw r30, 1
2612 1.1 mrg adc HHI8, __zero_reg__
2613 1.1 mrg out __RAMPZ__, HHI8
2614 1.1 mrg .endif
2615 1.1 mrg #elif defined (__AVR_HAVE_LPMX__)
2616 1.1 mrg lpm \dest, Z+
2617 1.1 mrg #else
2618 1.1 mrg lpm
2619 1.1 mrg mov \dest, r0
2620 1.1 mrg .if \dest != D0+\n-1
2621 1.1 mrg adiw r30, 1
2622 1.1 mrg .endif
2623 1.1 mrg #endif
2624 1.1 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2625 1.1 mrg .if \dest == D0+\n-1
2626 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2627 1.1 mrg out __RAMPZ__, __zero_reg__
2628 1.1 mrg .endif
2629 1.1 mrg #endif
2630 1.1 mrg .endm ; .xload
2631 1.1 mrg
2632 1.1 mrg #if defined (L_xload_1)
2633 1.1 mrg DEFUN __xload_1
2634 1.1 mrg #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2635 1.1 mrg sbrc HHI8, 7
2636 1.1 mrg ld D0, Z
2637 1.1 mrg sbrs HHI8, 7
2638 1.1 mrg lpm D0, Z
2639 1.1 mrg ret
2640 1.1 mrg #else
2641 1.1 mrg sbrc HHI8, 7
2642 1.1 mrg rjmp 1f
2643 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2644 1.1 mrg out __RAMPZ__, HHI8
2645 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2646 1.1 mrg .xload D0, 1
2647 1.1 mrg ret
2648 1.1 mrg 1: ld D0, Z
2649 1.1 mrg ret
2650 1.1 mrg #endif /* LPMx && ! ELPM */
2651 1.1 mrg ENDF __xload_1
2652 1.1 mrg #endif /* L_xload_1 */
2653 1.1 mrg
2654 1.1 mrg #if defined (L_xload_2)
2655 1.1 mrg DEFUN __xload_2
2656 1.1 mrg sbrc HHI8, 7
2657 1.1 mrg rjmp 1f
2658 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2659 1.1 mrg out __RAMPZ__, HHI8
2660 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2661 1.1 mrg .xload D0, 2
2662 1.1 mrg .xload D1, 2
2663 1.1 mrg ret
2664 1.1 mrg 1: ld D0, Z+
2665 1.1 mrg ld D1, Z+
2666 1.1 mrg ret
2667 1.1 mrg ENDF __xload_2
2668 1.1 mrg #endif /* L_xload_2 */
2669 1.1 mrg
2670 1.1 mrg #if defined (L_xload_3)
2671 1.1 mrg DEFUN __xload_3
2672 1.1 mrg sbrc HHI8, 7
2673 1.1 mrg rjmp 1f
2674 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2675 1.1 mrg out __RAMPZ__, HHI8
2676 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2677 1.1 mrg .xload D0, 3
2678 1.1 mrg .xload D1, 3
2679 1.1 mrg .xload D2, 3
2680 1.1 mrg ret
2681 1.1 mrg 1: ld D0, Z+
2682 1.1 mrg ld D1, Z+
2683 1.1 mrg ld D2, Z+
2684 1.1 mrg ret
2685 1.1 mrg ENDF __xload_3
2686 1.1 mrg #endif /* L_xload_3 */
2687 1.1 mrg
2688 1.1 mrg #if defined (L_xload_4)
2689 1.1 mrg DEFUN __xload_4
2690 1.1 mrg sbrc HHI8, 7
2691 1.1 mrg rjmp 1f
2692 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2693 1.1 mrg out __RAMPZ__, HHI8
2694 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2695 1.1 mrg .xload D0, 4
2696 1.1 mrg .xload D1, 4
2697 1.1 mrg .xload D2, 4
2698 1.1 mrg .xload D3, 4
2699 1.1 mrg ret
2700 1.1 mrg 1: ld D0, Z+
2701 1.1 mrg ld D1, Z+
2702 1.1 mrg ld D2, Z+
2703 1.1.1.2 mrg ld D3, Z+
2704 1.1 mrg ret
2705 1.1.1.2 mrg ENDF __xload_4
2706 1.1 mrg #endif /* L_xload_4 */
2707 1.1 mrg
2708 1.1 mrg #endif /* L_xload_{1|2|3|4} */
2709 1.1 mrg #endif /* if !defined (__AVR_TINY__) */
2710 1.1 mrg
2711 1.1 mrg #if !defined (__AVR_TINY__)
2712 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2713 1.1 mrg ;; memcopy from Address Space __pgmx to RAM
2714 1.1 mrg ;; R23:Z = Source Address
2715 1.1 mrg ;; X = Destination Address
2716 1.1 mrg ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2717 1.1 mrg
2718 1.1 mrg #if defined (L_movmemx)
2719 1.1 mrg
2720 1.1 mrg #define HHI8 23
2721 1.1 mrg #define LOOP 24
2722 1.1 mrg
2723 1.1 mrg DEFUN __movmemx_qi
2724 1.1 mrg ;; #Bytes to copy fity in 8 Bits (1..255)
2725 1.1 mrg ;; Zero-extend Loop Counter
2726 1.1 mrg clr LOOP+1
2727 1.1 mrg ;; FALLTHRU
2728 1.1 mrg ENDF __movmemx_qi
2729 1.1 mrg
2730 1.1 mrg DEFUN __movmemx_hi
2731 1.1 mrg
2732 1.1 mrg ;; Read from where?
2733 1.1 mrg sbrc HHI8, 7
2734 1.1 mrg rjmp 1f
2735 1.1 mrg
2736 1.1 mrg ;; Read from Flash
2737 1.1 mrg
2738 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2739 1.1 mrg out __RAMPZ__, HHI8
2740 1.1 mrg #endif
2741 1.1 mrg
2742 1.1 mrg 0: ;; Load 1 Byte from Flash...
2743 1.1 mrg
2744 1.1 mrg #if defined (__AVR_HAVE_ELPMX__)
2745 1.1 mrg elpm r0, Z+
2746 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2747 1.1 mrg elpm
2748 1.1 mrg adiw r30, 1
2749 1.1 mrg adc HHI8, __zero_reg__
2750 1.1 mrg out __RAMPZ__, HHI8
2751 1.1 mrg #elif defined (__AVR_HAVE_LPMX__)
2752 1.1 mrg lpm r0, Z+
2753 1.1 mrg #else
2754 1.1 mrg lpm
2755 1.1 mrg adiw r30, 1
2756 1.1 mrg #endif
2757 1.1 mrg
2758 1.1 mrg ;; ...and store that Byte to RAM Destination
2759 1.1 mrg st X+, r0
2760 1.1 mrg sbiw LOOP, 1
2761 1.1 mrg brne 0b
2762 1.1 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2763 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2764 1.1 mrg out __RAMPZ__, __zero_reg__
2765 1.1 mrg #endif /* ELPM && RAMPD */
2766 1.1 mrg ret
2767 1.1 mrg
2768 1.1 mrg ;; Read from RAM
2769 1.1 mrg
2770 1.1 mrg 1: ;; Read 1 Byte from RAM...
2771 1.1 mrg ld r0, Z+
2772 1.1 mrg ;; and store that Byte to RAM Destination
2773 1.1 mrg st X+, r0
2774 1.1 mrg sbiw LOOP, 1
2775 1.1 mrg brne 1b
2776 1.1 mrg ret
2777 1.1.1.2 mrg ENDF __movmemx_hi
2778 1.1 mrg
2779 1.1 mrg #undef HHI8
2780 1.1 mrg #undef LOOP
2781 1.1 mrg
2782 1.1 mrg #endif /* L_movmemx */
2783 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2784 1.1 mrg
2785 1.1 mrg
2786 1.1 mrg .section .text.libgcc.builtins, "ax", @progbits
2788 1.1 mrg
2789 1.1 mrg /**********************************
2790 1.1 mrg * Find first set Bit (ffs)
2791 1.1 mrg **********************************/
2792 1.1 mrg
2793 1.1 mrg #if defined (L_ffssi2)
2794 1.1 mrg ;; find first set bit
2795 1.1 mrg ;; r25:r24 = ffs32 (r25:r22)
2796 1.1 mrg ;; clobbers: r22, r26
2797 1.1 mrg DEFUN __ffssi2
2798 1.1 mrg clr r26
2799 1.1 mrg tst r22
2800 1.1 mrg brne 1f
2801 1.1 mrg subi r26, -8
2802 1.1 mrg or r22, r23
2803 1.1 mrg brne 1f
2804 1.1 mrg subi r26, -8
2805 1.1 mrg or r22, r24
2806 1.1 mrg brne 1f
2807 1.1 mrg subi r26, -8
2808 1.1 mrg or r22, r25
2809 1.1 mrg brne 1f
2810 1.1 mrg ret
2811 1.1 mrg 1: mov r24, r22
2812 1.1 mrg XJMP __loop_ffsqi2
2813 1.1 mrg ENDF __ffssi2
2814 1.1 mrg #endif /* defined (L_ffssi2) */
2815 1.1 mrg
2816 1.1 mrg #if defined (L_ffshi2)
2817 1.1 mrg ;; find first set bit
2818 1.1 mrg ;; r25:r24 = ffs16 (r25:r24)
2819 1.1 mrg ;; clobbers: r26
2820 1.1 mrg DEFUN __ffshi2
2821 1.1 mrg clr r26
2822 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2823 1.1 mrg ;; Some cores have problem skipping 2-word instruction
2824 1.1 mrg tst r24
2825 1.1 mrg breq 2f
2826 1.1 mrg #else
2827 1.1 mrg cpse r24, __zero_reg__
2828 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
2829 1.1 mrg 1: XJMP __loop_ffsqi2
2830 1.1 mrg 2: ldi r26, 8
2831 1.1 mrg or r24, r25
2832 1.1 mrg brne 1b
2833 1.1 mrg ret
2834 1.1 mrg ENDF __ffshi2
2835 1.1 mrg #endif /* defined (L_ffshi2) */
2836 1.1 mrg
2837 1.1 mrg #if defined (L_loop_ffsqi2)
2838 1.1 mrg ;; Helper for ffshi2, ffssi2
2839 1.1 mrg ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2840 1.1 mrg ;; r24 must be != 0
2841 1.1 mrg ;; clobbers: r26
2842 1.1 mrg DEFUN __loop_ffsqi2
2843 1.1 mrg inc r26
2844 1.1 mrg lsr r24
2845 1.1 mrg brcc __loop_ffsqi2
2846 1.1 mrg mov r24, r26
2847 1.1 mrg clr r25
2848 1.1 mrg ret
2849 1.1 mrg ENDF __loop_ffsqi2
2850 1.1 mrg #endif /* defined (L_loop_ffsqi2) */
2851 1.1 mrg
2852 1.1 mrg
2853 1.1 mrg /**********************************
2855 1.1 mrg * Count trailing Zeros (ctz)
2856 1.1 mrg **********************************/
2857 1.1 mrg
2858 1.1 mrg #if defined (L_ctzsi2)
2859 1.1 mrg ;; count trailing zeros
2860 1.1 mrg ;; r25:r24 = ctz32 (r25:r22)
2861 1.1 mrg ;; clobbers: r26, r22
2862 1.1 mrg ;; ctz(0) = 255
2863 1.1 mrg ;; Note that ctz(0) in undefined for GCC
2864 1.1 mrg DEFUN __ctzsi2
2865 1.1 mrg XCALL __ffssi2
2866 1.1 mrg dec r24
2867 1.1 mrg ret
2868 1.1 mrg ENDF __ctzsi2
2869 1.1 mrg #endif /* defined (L_ctzsi2) */
2870 1.1 mrg
2871 1.1 mrg #if defined (L_ctzhi2)
2872 1.1 mrg ;; count trailing zeros
2873 1.1 mrg ;; r25:r24 = ctz16 (r25:r24)
2874 1.1 mrg ;; clobbers: r26
2875 1.1 mrg ;; ctz(0) = 255
2876 1.1 mrg ;; Note that ctz(0) in undefined for GCC
2877 1.1 mrg DEFUN __ctzhi2
2878 1.1 mrg XCALL __ffshi2
2879 1.1 mrg dec r24
2880 1.1 mrg ret
2881 1.1 mrg ENDF __ctzhi2
2882 1.1 mrg #endif /* defined (L_ctzhi2) */
2883 1.1 mrg
2884 1.1 mrg
2885 1.1 mrg /**********************************
2887 1.1 mrg * Count leading Zeros (clz)
2888 1.1 mrg **********************************/
2889 1.1 mrg
2890 1.1 mrg #if defined (L_clzdi2)
2891 1.1 mrg ;; count leading zeros
2892 1.1 mrg ;; r25:r24 = clz64 (r25:r18)
2893 1.1 mrg ;; clobbers: r22, r23, r26
2894 1.1 mrg DEFUN __clzdi2
2895 1.1 mrg XCALL __clzsi2
2896 1.1 mrg sbrs r24, 5
2897 1.1 mrg ret
2898 1.1 mrg mov_l r22, r18
2899 1.1 mrg mov_h r23, r19
2900 1.1 mrg mov_l r24, r20
2901 1.1 mrg mov_h r25, r21
2902 1.1 mrg XCALL __clzsi2
2903 1.1 mrg subi r24, -32
2904 1.1 mrg ret
2905 1.1 mrg ENDF __clzdi2
2906 1.1 mrg #endif /* defined (L_clzdi2) */
2907 1.1 mrg
2908 1.1 mrg #if defined (L_clzsi2)
2909 1.1 mrg ;; count leading zeros
2910 1.1 mrg ;; r25:r24 = clz32 (r25:r22)
2911 1.1 mrg ;; clobbers: r26
2912 1.1 mrg DEFUN __clzsi2
2913 1.1 mrg XCALL __clzhi2
2914 1.1 mrg sbrs r24, 4
2915 1.1 mrg ret
2916 1.1 mrg mov_l r24, r22
2917 1.1 mrg mov_h r25, r23
2918 1.1 mrg XCALL __clzhi2
2919 1.1 mrg subi r24, -16
2920 1.1 mrg ret
2921 1.1 mrg ENDF __clzsi2
2922 1.1 mrg #endif /* defined (L_clzsi2) */
2923 1.1 mrg
2924 1.1 mrg #if defined (L_clzhi2)
2925 1.1 mrg ;; count leading zeros
2926 1.1 mrg ;; r25:r24 = clz16 (r25:r24)
2927 1.1 mrg ;; clobbers: r26
2928 1.1 mrg DEFUN __clzhi2
2929 1.1 mrg clr r26
2930 1.1 mrg tst r25
2931 1.1 mrg brne 1f
2932 1.1 mrg subi r26, -8
2933 1.1 mrg or r25, r24
2934 1.1 mrg brne 1f
2935 1.1 mrg ldi r24, 16
2936 1.1 mrg ret
2937 1.1 mrg 1: cpi r25, 16
2938 1.1 mrg brsh 3f
2939 1.1 mrg subi r26, -3
2940 1.1 mrg swap r25
2941 1.1 mrg 2: inc r26
2942 1.1 mrg 3: lsl r25
2943 1.1 mrg brcc 2b
2944 1.1 mrg mov r24, r26
2945 1.1 mrg clr r25
2946 1.1 mrg ret
2947 1.1 mrg ENDF __clzhi2
2948 1.1 mrg #endif /* defined (L_clzhi2) */
2949 1.1 mrg
2950 1.1 mrg
2951 1.1 mrg /**********************************
2953 1.1 mrg * Parity
2954 1.1 mrg **********************************/
2955 1.1 mrg
2956 1.1 mrg #if defined (L_paritydi2)
2957 1.1 mrg ;; r25:r24 = parity64 (r25:r18)
2958 1.1 mrg ;; clobbers: __tmp_reg__
2959 1.1 mrg DEFUN __paritydi2
2960 1.1 mrg eor r24, r18
2961 1.1 mrg eor r24, r19
2962 1.1 mrg eor r24, r20
2963 1.1 mrg eor r24, r21
2964 1.1 mrg XJMP __paritysi2
2965 1.1 mrg ENDF __paritydi2
2966 1.1 mrg #endif /* defined (L_paritydi2) */
2967 1.1 mrg
2968 1.1 mrg #if defined (L_paritysi2)
2969 1.1 mrg ;; r25:r24 = parity32 (r25:r22)
2970 1.1 mrg ;; clobbers: __tmp_reg__
2971 1.1 mrg DEFUN __paritysi2
2972 1.1 mrg eor r24, r22
2973 1.1 mrg eor r24, r23
2974 1.1 mrg XJMP __parityhi2
2975 1.1 mrg ENDF __paritysi2
2976 1.1 mrg #endif /* defined (L_paritysi2) */
2977 1.1 mrg
2978 1.1 mrg #if defined (L_parityhi2)
2979 1.1 mrg ;; r25:r24 = parity16 (r25:r24)
2980 1.1 mrg ;; clobbers: __tmp_reg__
2981 1.1 mrg DEFUN __parityhi2
2982 1.1 mrg eor r24, r25
2983 1.1 mrg ;; FALLTHRU
2984 1.1 mrg ENDF __parityhi2
2985 1.1 mrg
2986 1.1 mrg ;; r25:r24 = parity8 (r24)
2987 1.1 mrg ;; clobbers: __tmp_reg__
2988 1.1 mrg DEFUN __parityqi2
2989 1.1 mrg ;; parity is in r24[0..7]
2990 1.1 mrg mov __tmp_reg__, r24
2991 1.1 mrg swap __tmp_reg__
2992 1.1 mrg eor r24, __tmp_reg__
2993 1.1 mrg ;; parity is in r24[0..3]
2994 1.1 mrg subi r24, -4
2995 1.1 mrg andi r24, -5
2996 1.1 mrg subi r24, -6
2997 1.1 mrg ;; parity is in r24[0,3]
2998 1.1 mrg sbrc r24, 3
2999 1.1 mrg inc r24
3000 1.1 mrg ;; parity is in r24[0]
3001 1.1 mrg andi r24, 1
3002 1.1 mrg clr r25
3003 1.1 mrg ret
3004 1.1 mrg ENDF __parityqi2
3005 1.1 mrg #endif /* defined (L_parityhi2) */
3006 1.1 mrg
3007 1.1 mrg
3008 1.1 mrg /**********************************
3010 1.1 mrg * Population Count
3011 1.1 mrg **********************************/
3012 1.1 mrg
3013 1.1 mrg #if defined (L_popcounthi2)
3014 1.1 mrg ;; population count
3015 1.1 mrg ;; r25:r24 = popcount16 (r25:r24)
3016 1.1 mrg ;; clobbers: __tmp_reg__
3017 1.1 mrg DEFUN __popcounthi2
3018 1.1 mrg XCALL __popcountqi2
3019 1.1 mrg push r24
3020 1.1 mrg mov r24, r25
3021 1.1 mrg XCALL __popcountqi2
3022 1.1 mrg clr r25
3023 1.1 mrg ;; FALLTHRU
3024 1.1 mrg ENDF __popcounthi2
3025 1.1 mrg
3026 1.1 mrg DEFUN __popcounthi2_tail
3027 1.1 mrg pop __tmp_reg__
3028 1.1 mrg add r24, __tmp_reg__
3029 1.1 mrg ret
3030 1.1 mrg ENDF __popcounthi2_tail
3031 1.1 mrg #endif /* defined (L_popcounthi2) */
3032 1.1 mrg
3033 1.1 mrg #if defined (L_popcountsi2)
3034 1.1 mrg ;; population count
3035 1.1 mrg ;; r25:r24 = popcount32 (r25:r22)
3036 1.1 mrg ;; clobbers: __tmp_reg__
3037 1.1 mrg DEFUN __popcountsi2
3038 1.1 mrg XCALL __popcounthi2
3039 1.1 mrg push r24
3040 1.1 mrg mov_l r24, r22
3041 1.1 mrg mov_h r25, r23
3042 1.1 mrg XCALL __popcounthi2
3043 1.1 mrg XJMP __popcounthi2_tail
3044 1.1 mrg ENDF __popcountsi2
3045 1.1 mrg #endif /* defined (L_popcountsi2) */
3046 1.1 mrg
3047 1.1 mrg #if defined (L_popcountdi2)
3048 1.1 mrg ;; population count
3049 1.1 mrg ;; r25:r24 = popcount64 (r25:r18)
3050 1.1 mrg ;; clobbers: r22, r23, __tmp_reg__
3051 1.1 mrg DEFUN __popcountdi2
3052 1.1 mrg XCALL __popcountsi2
3053 1.1 mrg push r24
3054 1.1 mrg mov_l r22, r18
3055 1.1 mrg mov_h r23, r19
3056 1.1 mrg mov_l r24, r20
3057 1.1 mrg mov_h r25, r21
3058 1.1 mrg XCALL __popcountsi2
3059 1.1 mrg XJMP __popcounthi2_tail
3060 1.1 mrg ENDF __popcountdi2
3061 1.1 mrg #endif /* defined (L_popcountdi2) */
3062 1.1 mrg
3063 1.1 mrg #if defined (L_popcountqi2)
3064 1.1 mrg ;; population count
3065 1.1 mrg ;; r24 = popcount8 (r24)
3066 1.1 mrg ;; clobbers: __tmp_reg__
3067 1.1 mrg DEFUN __popcountqi2
3068 1.1 mrg mov __tmp_reg__, r24
3069 1.1 mrg andi r24, 1
3070 1.1 mrg lsr __tmp_reg__
3071 1.1 mrg lsr __tmp_reg__
3072 1.1 mrg adc r24, __zero_reg__
3073 1.1 mrg lsr __tmp_reg__
3074 1.1 mrg adc r24, __zero_reg__
3075 1.1 mrg lsr __tmp_reg__
3076 1.1 mrg adc r24, __zero_reg__
3077 1.1 mrg lsr __tmp_reg__
3078 1.1 mrg adc r24, __zero_reg__
3079 1.1 mrg lsr __tmp_reg__
3080 1.1 mrg adc r24, __zero_reg__
3081 1.1 mrg lsr __tmp_reg__
3082 1.1 mrg adc r24, __tmp_reg__
3083 1.1 mrg ret
3084 1.1 mrg ENDF __popcountqi2
3085 1.1 mrg #endif /* defined (L_popcountqi2) */
3086 1.1 mrg
3087 1.1 mrg
3088 1.1 mrg /**********************************
3090 1.1 mrg * Swap bytes
3091 1.1 mrg **********************************/
3092 1.1 mrg
3093 1.1 mrg ;; swap two registers with different register number
3094 1.1 mrg .macro bswap a, b
3095 1.1 mrg eor \a, \b
3096 1.1 mrg eor \b, \a
3097 1.1 mrg eor \a, \b
3098 1.1 mrg .endm
3099 1.1 mrg
3100 1.1 mrg #if defined (L_bswapsi2)
3101 1.1 mrg ;; swap bytes
3102 1.1 mrg ;; r25:r22 = bswap32 (r25:r22)
3103 1.1 mrg DEFUN __bswapsi2
3104 1.1 mrg bswap r22, r25
3105 1.1 mrg bswap r23, r24
3106 1.1 mrg ret
3107 1.1 mrg ENDF __bswapsi2
3108 1.1 mrg #endif /* defined (L_bswapsi2) */
3109 1.1 mrg
3110 1.1 mrg #if defined (L_bswapdi2)
3111 1.1 mrg ;; swap bytes
3112 1.1 mrg ;; r25:r18 = bswap64 (r25:r18)
3113 1.1 mrg DEFUN __bswapdi2
3114 1.1 mrg bswap r18, r25
3115 1.1 mrg bswap r19, r24
3116 1.1.1.4 mrg bswap r20, r23
3117 1.1.1.4 mrg bswap r21, r22
3118 1.1.1.4 mrg ret
3119 1.1 mrg ENDF __bswapdi2
3120 1.1 mrg #endif /* defined (L_bswapdi2) */
3121 1.1 mrg
3122 1.1.1.4 mrg
3123 1.1.1.4 mrg /**********************************
3125 1.1 mrg * 64-bit shifts
3126 1.1 mrg **********************************/
3127 1.1 mrg
3128 1.1 mrg #if defined (L_ashrdi3)
3129 1.1 mrg
3130 1.1.1.4 mrg #define SS __zero_reg__
3131 1.1.1.4 mrg
3132 1.1 mrg ;; Arithmetic shift right
3133 1.1 mrg ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3134 1.1 mrg DEFUN __ashrdi3
3135 1.1 mrg sbrc r25, 7
3136 1.1 mrg com SS
3137 1.1 mrg ;; FALLTHRU
3138 1.1 mrg ENDF __ashrdi3
3139 1.1 mrg
3140 1.1 mrg ;; Logic shift right
3141 1.1 mrg ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3142 1.1.1.4 mrg DEFUN __lshrdi3
3143 1.1 mrg ;; Signs are in SS (zero_reg)
3144 1.1.1.4 mrg mov __tmp_reg__, r16
3145 1.1 mrg 0: cpi r16, 8
3146 1.1 mrg brlo 2f
3147 1.1 mrg subi r16, 8
3148 1.1 mrg mov r18, r19
3149 1.1 mrg mov r19, r20
3150 1.1 mrg mov r20, r21
3151 1.1 mrg mov r21, r22
3152 1.1 mrg mov r22, r23
3153 1.1 mrg mov r23, r24
3154 1.1 mrg mov r24, r25
3155 1.1.1.4 mrg mov r25, SS
3156 1.1.1.4 mrg rjmp 0b
3157 1.1 mrg 1: asr SS
3158 1.1 mrg ror r25
3159 1.1.1.4 mrg ror r24
3160 1.1.1.4 mrg ror r23
3161 1.1.1.4 mrg ror r22
3162 1.1 mrg ror r21
3163 1.1 mrg ror r20
3164 1.1 mrg ror r19
3165 1.1 mrg ror r18
3166 1.1 mrg 2: dec r16
3167 1.1.1.4 mrg brpl 1b
3168 1.1 mrg clr __zero_reg__
3169 1.1.1.4 mrg mov r16, __tmp_reg__
3170 1.1 mrg ret
3171 1.1 mrg ENDF __lshrdi3
3172 1.1 mrg
3173 1.1 mrg #undef SS
3174 1.1 mrg
3175 1.1 mrg #endif /* defined (L_ashrdi3) */
3176 1.1 mrg
3177 1.1 mrg #if defined (L_ashldi3)
3178 1.1 mrg ;; Shift left
3179 1.1 mrg ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3180 1.1 mrg ;; This function does not clobber T.
3181 1.1 mrg DEFUN __ashldi3
3182 1.1 mrg mov __tmp_reg__, r16
3183 1.1 mrg 0: cpi r16, 8
3184 1.1 mrg brlo 2f
3185 1.1 mrg mov r25, r24
3186 1.1 mrg mov r24, r23
3187 1.1 mrg mov r23, r22
3188 1.1 mrg mov r22, r21
3189 1.1 mrg mov r21, r20
3190 1.1 mrg mov r20, r19
3191 1.1 mrg mov r19, r18
3192 1.1.1.4 mrg clr r18
3193 1.1 mrg subi r16, 8
3194 1.1 mrg rjmp 0b
3195 1.1 mrg 1: lsl r18
3196 1.1 mrg rol r19
3197 1.1 mrg rol r20
3198 1.1.1.4 mrg rol r21
3199 1.1 mrg rol r22
3200 1.1 mrg rol r23
3201 1.1 mrg rol r24
3202 1.1 mrg rol r25
3203 1.1 mrg 2: dec r16
3204 1.1 mrg brpl 1b
3205 1.1 mrg mov r16, __tmp_reg__
3206 1.1 mrg ret
3207 1.1 mrg ENDF __ashldi3
3208 1.1 mrg #endif /* defined (L_ashldi3) */
3209 1.1 mrg
3210 1.1 mrg #if defined (L_rotldi3)
3211 1.1 mrg ;; Rotate left
3212 1.1 mrg ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3213 1.1 mrg DEFUN __rotldi3
3214 1.1 mrg push r16
3215 1.1 mrg 0: cpi r16, 8
3216 1.1 mrg brlo 2f
3217 1.1 mrg subi r16, 8
3218 1.1 mrg mov __tmp_reg__, r25
3219 1.1 mrg mov r25, r24
3220 1.1 mrg mov r24, r23
3221 1.1 mrg mov r23, r22
3222 1.1 mrg mov r22, r21
3223 1.1 mrg mov r21, r20
3224 1.1 mrg mov r20, r19
3225 1.1 mrg mov r19, r18
3226 1.1 mrg mov r18, __tmp_reg__
3227 1.1 mrg rjmp 0b
3228 1.1 mrg 1: lsl r18
3229 1.1 mrg rol r19
3230 1.1 mrg rol r20
3231 1.1 mrg rol r21
3232 1.1 mrg rol r22
3233 1.1 mrg rol r23
3234 1.1 mrg rol r24
3235 1.1 mrg rol r25
3236 1.1 mrg adc r18, __zero_reg__
3237 1.1 mrg 2: dec r16
3238 1.1 mrg brpl 1b
3239 1.1 mrg pop r16
3240 1.1 mrg ret
3241 1.1 mrg ENDF __rotldi3
3242 1.1 mrg #endif /* defined (L_rotldi3) */
3243 1.1 mrg
3244 1.1 mrg
3245 1.1 mrg .section .text.libgcc.fmul, "ax", @progbits
3247 1.1 mrg
3248 1.1 mrg /***********************************************************/
3249 1.1 mrg ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3250 1.1 mrg ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3251 1.1 mrg /***********************************************************/
3252 1.1 mrg
3253 1.1 mrg #define A1 24
3254 1.1 mrg #define B1 25
3255 1.1 mrg #define C0 22
3256 1.1 mrg #define C1 23
3257 1.1 mrg #define A0 __tmp_reg__
3258 1.1 mrg
3259 1.1 mrg #ifdef L_fmuls
3260 1.1 mrg ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3261 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3262 1.1 mrg DEFUN __fmuls
3263 1.1 mrg ;; A0.7 = negate result?
3264 1.1 mrg mov A0, A1
3265 1.1 mrg eor A0, B1
3266 1.1 mrg ;; B1 = |B1|
3267 1.1 mrg sbrc B1, 7
3268 1.1 mrg neg B1
3269 1.1 mrg XJMP __fmulsu_exit
3270 1.1 mrg ENDF __fmuls
3271 1.1 mrg #endif /* L_fmuls */
3272 1.1 mrg
3273 1.1 mrg #ifdef L_fmulsu
3274 1.1 mrg ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3275 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3276 1.1 mrg DEFUN __fmulsu
3277 1.1 mrg ;; A0.7 = negate result?
3278 1.1 mrg mov A0, A1
3279 1.1 mrg ;; FALLTHRU
3280 1.1 mrg ENDF __fmulsu
3281 1.1 mrg
3282 1.1 mrg ;; Helper for __fmuls and __fmulsu
3283 1.1 mrg DEFUN __fmulsu_exit
3284 1.1 mrg ;; A1 = |A1|
3285 1.1 mrg sbrc A1, 7
3286 1.1 mrg neg A1
3287 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3288 1.1 mrg ;; Some cores have problem skipping 2-word instruction
3289 1.1 mrg tst A0
3290 1.1 mrg brmi 1f
3291 1.1 mrg #else
3292 1.1 mrg sbrs A0, 7
3293 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
3294 1.1 mrg XJMP __fmul
3295 1.1 mrg 1: XCALL __fmul
3296 1.1 mrg ;; C = -C iff A0.7 = 1
3297 1.1 mrg NEG2 C0
3298 1.1 mrg ret
3299 1.1 mrg ENDF __fmulsu_exit
3300 1.1 mrg #endif /* L_fmulsu */
3301 1.1 mrg
3302 1.1 mrg
3303 1.1 mrg #ifdef L_fmul
3304 1.1 mrg ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3305 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3306 1.1 mrg DEFUN __fmul
3307 1.1 mrg ; clear result
3308 1.1 mrg clr C0
3309 1.1 mrg clr C1
3310 1.1 mrg clr A0
3311 1.1 mrg 1: tst B1
3312 1.1 mrg ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3313 1.1 mrg 2: brpl 3f
3314 1.1 mrg ;; C += A
3315 1.1 mrg add C0, A0
3316 1.1 mrg adc C1, A1
3317 1.1 mrg 3: ;; A >>= 1
3318 1.1 mrg lsr A1
3319 1.1 mrg ror A0
3320 ;; B <<= 1
3321 lsl B1
3322 brne 2b
3323 ret
3324 ENDF __fmul
3325 #endif /* L_fmul */
3326
3327 #undef A0
3328 #undef A1
3329 #undef B1
3330 #undef C0
3331 #undef C1
3332
3333 #include "lib1funcs-fixed.S"
3334