lib1funcs.S revision 1.1.1.8 1 1.1 mrg /* -*- Mode: Asm -*- */
2 1.1.1.8 mrg /* Copyright (C) 1998-2022 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Denis Chertykov <chertykov (at) gmail.com>
4 1.1 mrg
5 1.1 mrg This file is free software; you can redistribute it and/or modify it
6 1.1 mrg under the terms of the GNU General Public License as published by the
7 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
8 1.1 mrg later version.
9 1.1 mrg
10 1.1 mrg This file is distributed in the hope that it will be useful, but
11 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 1.1 mrg General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1.1.2 mrg #if defined (__AVR_TINY__)
25 1.1.1.2 mrg #define __zero_reg__ r17
26 1.1.1.2 mrg #define __tmp_reg__ r16
27 1.1.1.2 mrg #else
28 1.1 mrg #define __zero_reg__ r1
29 1.1 mrg #define __tmp_reg__ r0
30 1.1.1.2 mrg #endif
31 1.1 mrg #define __SREG__ 0x3f
32 1.1 mrg #if defined (__AVR_HAVE_SPH__)
33 1.1 mrg #define __SP_H__ 0x3e
34 1.1 mrg #endif
35 1.1 mrg #define __SP_L__ 0x3d
36 1.1 mrg #define __RAMPZ__ 0x3B
37 1.1 mrg #define __EIND__ 0x3C
38 1.1 mrg
39 1.1 mrg /* Most of the functions here are called directly from avr.md
40 1.1 mrg patterns, instead of using the standard libcall mechanisms.
41 1.1 mrg This can make better code because GCC knows exactly which
42 1.1 mrg of the call-used registers (not all of them) are clobbered. */
43 1.1 mrg
44 1.1 mrg /* FIXME: At present, there is no SORT directive in the linker
45 1.1 mrg script so that we must not assume that different modules
46 1.1 mrg in the same input section like .libgcc.text.mul will be
47 1.1 mrg located close together. Therefore, we cannot use
48 1.1 mrg RCALL/RJMP to call a function like __udivmodhi4 from
49 1.1 mrg __divmodhi4 and have to use lengthy XCALL/XJMP even
50 1.1 mrg though they are in the same input section and all same
51 1.1 mrg input sections together are small enough to reach every
52 1.1 mrg location with a RCALL/RJMP instruction. */
53 1.1 mrg
54 1.1.1.2 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 1.1.1.2 mrg #error device not supported
56 1.1.1.2 mrg #endif
57 1.1.1.2 mrg
58 1.1 mrg .macro mov_l r_dest, r_src
59 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
60 1.1 mrg movw \r_dest, \r_src
61 1.1 mrg #else
62 1.1 mrg mov \r_dest, \r_src
63 1.1 mrg #endif
64 1.1 mrg .endm
65 1.1 mrg
66 1.1 mrg .macro mov_h r_dest, r_src
67 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
68 1.1 mrg ; empty
69 1.1 mrg #else
70 1.1 mrg mov \r_dest, \r_src
71 1.1 mrg #endif
72 1.1 mrg .endm
73 1.1 mrg
74 1.1 mrg .macro wmov r_dest, r_src
75 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
76 1.1 mrg movw \r_dest, \r_src
77 1.1 mrg #else
78 1.1 mrg mov \r_dest, \r_src
79 1.1 mrg mov \r_dest+1, \r_src+1
80 1.1 mrg #endif
81 1.1 mrg .endm
82 1.1 mrg
83 1.1 mrg #if defined (__AVR_HAVE_JMP_CALL__)
84 1.1 mrg #define XCALL call
85 1.1 mrg #define XJMP jmp
86 1.1 mrg #else
87 1.1 mrg #define XCALL rcall
88 1.1 mrg #define XJMP rjmp
89 1.1 mrg #endif
90 1.1 mrg
91 1.1.1.2 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
92 1.1.1.2 mrg #define XICALL eicall
93 1.1.1.2 mrg #define XIJMP eijmp
94 1.1.1.2 mrg #else
95 1.1.1.2 mrg #define XICALL icall
96 1.1.1.2 mrg #define XIJMP ijmp
97 1.1.1.2 mrg #endif
98 1.1.1.2 mrg
99 1.1 mrg ;; Prologue stuff
100 1.1 mrg
101 1.1 mrg .macro do_prologue_saves n_pushed n_frame=0
102 1.1 mrg ldi r26, lo8(\n_frame)
103 1.1 mrg ldi r27, hi8(\n_frame)
104 1.1 mrg ldi r30, lo8(gs(.L_prologue_saves.\@))
105 1.1 mrg ldi r31, hi8(gs(.L_prologue_saves.\@))
106 1.1 mrg XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 1.1 mrg .L_prologue_saves.\@:
108 1.1 mrg .endm
109 1.1 mrg
110 1.1 mrg ;; Epilogue stuff
111 1.1 mrg
112 1.1 mrg .macro do_epilogue_restores n_pushed n_frame=0
113 1.1 mrg in r28, __SP_L__
114 1.1 mrg #ifdef __AVR_HAVE_SPH__
115 1.1 mrg in r29, __SP_H__
116 1.1 mrg .if \n_frame > 63
117 1.1 mrg subi r28, lo8(-\n_frame)
118 1.1 mrg sbci r29, hi8(-\n_frame)
119 1.1 mrg .elseif \n_frame > 0
120 1.1 mrg adiw r28, \n_frame
121 1.1 mrg .endif
122 1.1 mrg #else
123 1.1 mrg clr r29
124 1.1 mrg .if \n_frame > 0
125 1.1 mrg subi r28, lo8(-\n_frame)
126 1.1 mrg .endif
127 1.1 mrg #endif /* HAVE SPH */
128 1.1 mrg ldi r30, \n_pushed
129 1.1 mrg XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130 1.1 mrg .endm
131 1.1 mrg
132 1.1 mrg ;; Support function entry and exit for convenience
133 1.1 mrg
134 1.1.1.2 mrg .macro wsubi r_arg1, i_arg2
135 1.1.1.2 mrg #if defined (__AVR_TINY__)
136 1.1.1.2 mrg subi \r_arg1, lo8(\i_arg2)
137 1.1.1.2 mrg sbci \r_arg1+1, hi8(\i_arg2)
138 1.1.1.2 mrg #else
139 1.1.1.2 mrg sbiw \r_arg1, \i_arg2
140 1.1.1.2 mrg #endif
141 1.1.1.2 mrg .endm
142 1.1.1.2 mrg
143 1.1.1.2 mrg .macro waddi r_arg1, i_arg2
144 1.1.1.2 mrg #if defined (__AVR_TINY__)
145 1.1.1.2 mrg subi \r_arg1, lo8(-\i_arg2)
146 1.1.1.2 mrg sbci \r_arg1+1, hi8(-\i_arg2)
147 1.1.1.2 mrg #else
148 1.1.1.2 mrg adiw \r_arg1, \i_arg2
149 1.1.1.2 mrg #endif
150 1.1.1.2 mrg .endm
151 1.1.1.2 mrg
152 1.1 mrg .macro DEFUN name
153 1.1 mrg .global \name
154 1.1 mrg .func \name
155 1.1 mrg \name:
156 1.1 mrg .endm
157 1.1 mrg
158 1.1 mrg .macro ENDF name
159 1.1 mrg .size \name, .-\name
160 1.1 mrg .endfunc
161 1.1 mrg .endm
162 1.1 mrg
163 1.1 mrg .macro FALIAS name
164 1.1 mrg .global \name
165 1.1 mrg .func \name
166 1.1 mrg \name:
167 1.1 mrg .size \name, .-\name
168 1.1 mrg .endfunc
169 1.1 mrg .endm
170 1.1 mrg
171 1.1 mrg ;; Skip next instruction, typically a jump target
172 1.1.1.2 mrg #define skip cpse 16,16
173 1.1 mrg
174 1.1 mrg ;; Negate a 2-byte value held in consecutive registers
175 1.1 mrg .macro NEG2 reg
176 1.1 mrg com \reg+1
177 1.1 mrg neg \reg
178 1.1 mrg sbci \reg+1, -1
179 1.1 mrg .endm
180 1.1 mrg
181 1.1 mrg ;; Negate a 4-byte value held in consecutive registers
182 1.1 mrg ;; Sets the V flag for signed overflow tests if REG >= 16
183 1.1 mrg .macro NEG4 reg
184 1.1 mrg com \reg+3
185 1.1 mrg com \reg+2
186 1.1 mrg com \reg+1
187 1.1 mrg .if \reg >= 16
188 1.1 mrg neg \reg
189 1.1 mrg sbci \reg+1, -1
190 1.1 mrg sbci \reg+2, -1
191 1.1 mrg sbci \reg+3, -1
192 1.1 mrg .else
193 1.1 mrg com \reg
194 1.1 mrg adc \reg, __zero_reg__
195 1.1 mrg adc \reg+1, __zero_reg__
196 1.1 mrg adc \reg+2, __zero_reg__
197 1.1 mrg adc \reg+3, __zero_reg__
198 1.1 mrg .endif
199 1.1 mrg .endm
200 1.1 mrg
201 1.1 mrg #define exp_lo(N) hlo8 ((N) << 23)
202 1.1 mrg #define exp_hi(N) hhi8 ((N) << 23)
203 1.1 mrg
204 1.1 mrg
205 1.1 mrg .section .text.libgcc.mul, "ax", @progbits
207 1.1 mrg
208 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
209 1.1 mrg /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
210 1.1 mrg #if !defined (__AVR_HAVE_MUL__)
211 1.1 mrg /*******************************************************
212 1.1 mrg Multiplication 8 x 8 without MUL
213 1.1 mrg *******************************************************/
214 1.1 mrg #if defined (L_mulqi3)
215 1.1 mrg
216 1.1 mrg #define r_arg2 r22 /* multiplicand */
217 1.1 mrg #define r_arg1 r24 /* multiplier */
218 1.1 mrg #define r_res __tmp_reg__ /* result */
219 1.1 mrg
220 1.1 mrg DEFUN __mulqi3
221 1.1 mrg clr r_res ; clear result
222 1.1 mrg __mulqi3_loop:
223 1.1 mrg sbrc r_arg1,0
224 1.1 mrg add r_res,r_arg2
225 1.1 mrg add r_arg2,r_arg2 ; shift multiplicand
226 1.1 mrg breq __mulqi3_exit ; while multiplicand != 0
227 1.1 mrg lsr r_arg1 ;
228 1.1 mrg brne __mulqi3_loop ; exit if multiplier = 0
229 1.1 mrg __mulqi3_exit:
230 1.1 mrg mov r_arg1,r_res ; result to return register
231 1.1 mrg ret
232 1.1 mrg ENDF __mulqi3
233 1.1 mrg
234 1.1 mrg #undef r_arg2
235 1.1 mrg #undef r_arg1
236 1.1 mrg #undef r_res
237 1.1 mrg
238 1.1 mrg #endif /* defined (L_mulqi3) */
239 1.1 mrg
240 1.1 mrg
241 1.1 mrg /*******************************************************
242 1.1 mrg Widening Multiplication 16 = 8 x 8 without MUL
243 1.1 mrg Multiplication 16 x 16 without MUL
244 1.1 mrg *******************************************************/
245 1.1.1.2 mrg
246 1.1.1.2 mrg #define A0 22
247 1.1.1.2 mrg #define A1 23
248 1.1.1.2 mrg #define B0 24
249 1.1.1.2 mrg #define BB0 20
250 1.1 mrg #define B1 25
251 1.1.1.2 mrg ;; Output overlaps input, thus expand result in CC0/1
252 1.1.1.2 mrg #define C0 24
253 1.1 mrg #define C1 25
254 1.1.1.2 mrg #define CC0 __tmp_reg__
255 1.1 mrg #define CC1 21
256 1.1 mrg
257 1.1 mrg #if defined (L_umulqihi3)
258 1.1 mrg ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
259 1.1 mrg ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
260 1.1 mrg ;;; Clobbers: __tmp_reg__, R21..R23
261 1.1 mrg DEFUN __umulqihi3
262 1.1 mrg clr A1
263 1.1 mrg clr B1
264 1.1 mrg XJMP __mulhi3
265 1.1 mrg ENDF __umulqihi3
266 1.1 mrg #endif /* L_umulqihi3 */
267 1.1 mrg
268 1.1 mrg #if defined (L_mulqihi3)
269 1.1 mrg ;;; R25:R24 = (signed int) R22 * (signed int) R24
270 1.1 mrg ;;; (C1:C0) = (signed int) A0 * (signed int) B0
271 1.1 mrg ;;; Clobbers: __tmp_reg__, R20..R23
272 1.1 mrg DEFUN __mulqihi3
273 1.1 mrg ;; Sign-extend B0
274 1.1 mrg clr B1
275 1.1 mrg sbrc B0, 7
276 1.1 mrg com B1
277 1.1 mrg ;; The multiplication runs twice as fast if A1 is zero, thus:
278 1.1 mrg ;; Zero-extend A0
279 1.1 mrg clr A1
280 1.1 mrg #ifdef __AVR_HAVE_JMP_CALL__
281 1.1 mrg ;; Store B0 * sign of A
282 1.1 mrg clr BB0
283 1.1 mrg sbrc A0, 7
284 1.1 mrg mov BB0, B0
285 1.1 mrg call __mulhi3
286 1.1 mrg #else /* have no CALL */
287 1.1 mrg ;; Skip sign-extension of A if A >= 0
288 1.1 mrg ;; Same size as with the first alternative but avoids errata skip
289 1.1 mrg ;; and is faster if A >= 0
290 1.1 mrg sbrs A0, 7
291 1.1 mrg rjmp __mulhi3
292 1.1 mrg ;; If A < 0 store B
293 1.1 mrg mov BB0, B0
294 1.1 mrg rcall __mulhi3
295 1.1 mrg #endif /* HAVE_JMP_CALL */
296 1.1 mrg ;; 1-extend A after the multiplication
297 1.1 mrg sub C1, BB0
298 1.1 mrg ret
299 1.1 mrg ENDF __mulqihi3
300 1.1 mrg #endif /* L_mulqihi3 */
301 1.1 mrg
302 1.1 mrg #if defined (L_mulhi3)
303 1.1 mrg ;;; R25:R24 = R23:R22 * R25:R24
304 1.1 mrg ;;; (C1:C0) = (A1:A0) * (B1:B0)
305 1.1 mrg ;;; Clobbers: __tmp_reg__, R21..R23
306 1.1 mrg DEFUN __mulhi3
307 1.1 mrg
308 1.1 mrg ;; Clear result
309 1.1 mrg clr CC0
310 1.1 mrg clr CC1
311 1.1 mrg rjmp 3f
312 1.1 mrg 1:
313 1.1 mrg ;; Bit n of A is 1 --> C += B << n
314 1.1 mrg add CC0, B0
315 1.1 mrg adc CC1, B1
316 1.1 mrg 2:
317 1.1 mrg lsl B0
318 1.1 mrg rol B1
319 1.1 mrg 3:
320 1.1.1.2 mrg ;; If B == 0 we are ready
321 1.1 mrg wsubi B0, 0
322 1.1 mrg breq 9f
323 1.1 mrg
324 1.1 mrg ;; Carry = n-th bit of A
325 1.1 mrg lsr A1
326 1.1 mrg ror A0
327 1.1 mrg ;; If bit n of A is set, then go add B * 2^n to C
328 1.1 mrg brcs 1b
329 1.1 mrg
330 1.1 mrg ;; Carry = 0 --> The ROR above acts like CP A0, 0
331 1.1 mrg ;; Thus, it is sufficient to CPC the high part to test A against 0
332 1.1 mrg cpc A1, __zero_reg__
333 1.1 mrg ;; Only proceed if A != 0
334 1.1 mrg brne 2b
335 1.1 mrg 9:
336 1.1 mrg ;; Move Result into place
337 1.1 mrg mov C0, CC0
338 1.1 mrg mov C1, CC1
339 1.1 mrg ret
340 1.1 mrg ENDF __mulhi3
341 1.1 mrg #endif /* L_mulhi3 */
342 1.1 mrg
343 1.1 mrg #undef A0
344 1.1 mrg #undef A1
345 1.1 mrg #undef B0
346 1.1 mrg #undef BB0
347 1.1 mrg #undef B1
348 1.1 mrg #undef C0
349 1.1 mrg #undef C1
350 1.1 mrg #undef CC0
351 1.1 mrg #undef CC1
352 1.1 mrg
353 1.1 mrg
354 1.1 mrg #define A0 22
356 1.1 mrg #define A1 A0+1
357 1.1 mrg #define A2 A0+2
358 1.1 mrg #define A3 A0+3
359 1.1 mrg
360 1.1 mrg #define B0 18
361 1.1 mrg #define B1 B0+1
362 1.1 mrg #define B2 B0+2
363 1.1 mrg #define B3 B0+3
364 1.1 mrg
365 1.1 mrg #define CC0 26
366 1.1 mrg #define CC1 CC0+1
367 1.1 mrg #define CC2 30
368 1.1 mrg #define CC3 CC2+1
369 1.1 mrg
370 1.1 mrg #define C0 22
371 1.1 mrg #define C1 C0+1
372 1.1 mrg #define C2 C0+2
373 1.1 mrg #define C3 C0+3
374 1.1 mrg
375 1.1 mrg /*******************************************************
376 1.1 mrg Widening Multiplication 32 = 16 x 16 without MUL
377 1.1 mrg *******************************************************/
378 1.1 mrg
379 1.1 mrg #if defined (L_umulhisi3)
380 1.1 mrg DEFUN __umulhisi3
381 1.1 mrg wmov B0, 24
382 1.1 mrg ;; Zero-extend B
383 1.1 mrg clr B2
384 1.1 mrg clr B3
385 1.1 mrg ;; Zero-extend A
386 1.1 mrg wmov A2, B2
387 1.1 mrg XJMP __mulsi3
388 1.1 mrg ENDF __umulhisi3
389 1.1 mrg #endif /* L_umulhisi3 */
390 1.1 mrg
391 1.1 mrg #if defined (L_mulhisi3)
392 1.1 mrg DEFUN __mulhisi3
393 1.1 mrg wmov B0, 24
394 1.1 mrg ;; Sign-extend B
395 1.1 mrg lsl r25
396 1.1 mrg sbc B2, B2
397 1.1 mrg mov B3, B2
398 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
399 1.1 mrg ;; Sign-extend A
400 1.1 mrg clr A2
401 1.1 mrg sbrc A1, 7
402 1.1 mrg com A2
403 1.1 mrg mov A3, A2
404 1.1 mrg XJMP __mulsi3
405 1.1 mrg #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
406 1.1 mrg ;; Zero-extend A and __mulsi3 will run at least twice as fast
407 1.1 mrg ;; compared to a sign-extended A.
408 1.1 mrg clr A2
409 1.1 mrg clr A3
410 1.1 mrg sbrs A1, 7
411 1.1 mrg XJMP __mulsi3
412 1.1 mrg ;; If A < 0 then perform the B * 0xffff.... before the
413 1.1 mrg ;; very multiplication by initializing the high part of the
414 1.1 mrg ;; result CC with -B.
415 1.1 mrg wmov CC2, A2
416 1.1 mrg sub CC2, B0
417 1.1 mrg sbc CC3, B1
418 1.1 mrg XJMP __mulsi3_helper
419 1.1 mrg #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
420 1.1 mrg ENDF __mulhisi3
421 1.1 mrg #endif /* L_mulhisi3 */
422 1.1 mrg
423 1.1 mrg
424 1.1 mrg /*******************************************************
425 1.1 mrg Multiplication 32 x 32 without MUL
426 1.1 mrg *******************************************************/
427 1.1 mrg
428 1.1.1.2 mrg #if defined (L_mulsi3)
429 1.1.1.2 mrg DEFUN __mulsi3
430 1.1.1.2 mrg #if defined (__AVR_TINY__)
431 1.1.1.2 mrg in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
432 1.1.1.2 mrg in r27, __SP_H__
433 1.1.1.2 mrg subi r26, lo8(-3) ; Add 3 to point past return address
434 1.1.1.2 mrg sbci r27, hi8(-3)
435 1.1.1.2 mrg push B0 ; save callee saved regs
436 1.1.1.2 mrg push B1
437 1.1.1.2 mrg ld B0, X+ ; load from caller stack
438 1.1.1.2 mrg ld B1, X+
439 1.1.1.2 mrg ld B2, X+
440 1.1 mrg ld B3, X
441 1.1 mrg #endif
442 1.1 mrg ;; Clear result
443 1.1 mrg clr CC2
444 1.1 mrg clr CC3
445 1.1 mrg ;; FALLTHRU
446 1.1 mrg ENDF __mulsi3
447 1.1 mrg
448 1.1 mrg DEFUN __mulsi3_helper
449 1.1 mrg clr CC0
450 1.1 mrg clr CC1
451 1.1 mrg rjmp 3f
452 1.1 mrg
453 1.1 mrg 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
454 1.1 mrg ;; CC += B
455 1.1 mrg add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
456 1.1 mrg
457 1.1 mrg 2: ;; B <<= 1
458 1.1 mrg lsl B0 $ rol B1 $ rol B2 $ rol B3
459 1.1 mrg
460 1.1 mrg 3: ;; A >>= 1: Carry = n-th bit of A
461 1.1 mrg lsr A3 $ ror A2 $ ror A1 $ ror A0
462 1.1 mrg
463 1.1 mrg brcs 1b
464 1.1 mrg ;; Only continue if A != 0
465 1.1.1.2 mrg sbci A1, 0
466 1.1 mrg brne 2b
467 1.1 mrg wsubi A2, 0
468 1.1 mrg brne 2b
469 1.1 mrg
470 1.1 mrg ;; All bits of A are consumed: Copy result to return register C
471 1.1.1.2 mrg wmov C0, CC0
472 1.1.1.2 mrg wmov C2, CC2
473 1.1.1.2 mrg #if defined (__AVR_TINY__)
474 1.1.1.2 mrg pop B1 ; restore callee saved regs
475 1.1.1.2 mrg pop B0
476 1.1 mrg #endif /* defined (__AVR_TINY__) */
477 1.1 mrg
478 1.1 mrg ret
479 1.1 mrg ENDF __mulsi3_helper
480 1.1 mrg #endif /* L_mulsi3 */
481 1.1 mrg
482 1.1 mrg #undef A0
483 1.1 mrg #undef A1
484 1.1 mrg #undef A2
485 1.1 mrg #undef A3
486 1.1 mrg #undef B0
487 1.1 mrg #undef B1
488 1.1 mrg #undef B2
489 1.1 mrg #undef B3
490 1.1 mrg #undef C0
491 1.1 mrg #undef C1
492 1.1 mrg #undef C2
493 1.1 mrg #undef C3
494 1.1 mrg #undef CC0
495 1.1 mrg #undef CC1
496 1.1 mrg #undef CC2
497 1.1 mrg #undef CC3
498 1.1 mrg
499 1.1 mrg #endif /* !defined (__AVR_HAVE_MUL__) */
500 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 1.1 mrg
502 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
504 1.1 mrg #if defined (__AVR_HAVE_MUL__)
505 1.1 mrg #define A0 26
506 1.1 mrg #define B0 18
507 1.1 mrg #define C0 22
508 1.1 mrg
509 1.1 mrg #define A1 A0+1
510 1.1 mrg
511 1.1 mrg #define B1 B0+1
512 1.1 mrg #define B2 B0+2
513 1.1 mrg #define B3 B0+3
514 1.1 mrg
515 1.1 mrg #define C1 C0+1
516 1.1 mrg #define C2 C0+2
517 1.1 mrg #define C3 C0+3
518 1.1 mrg
519 1.1 mrg /*******************************************************
520 1.1 mrg Widening Multiplication 32 = 16 x 16 with MUL
521 1.1 mrg *******************************************************/
522 1.1 mrg
523 1.1 mrg #if defined (L_mulhisi3)
524 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
525 1.1 mrg ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
526 1.1 mrg ;;; Clobbers: __tmp_reg__
527 1.1 mrg DEFUN __mulhisi3
528 1.1 mrg XCALL __umulhisi3
529 1.1 mrg ;; Sign-extend B
530 1.1 mrg tst B1
531 1.1 mrg brpl 1f
532 1.1 mrg sub C2, A0
533 1.1 mrg sbc C3, A1
534 1.1 mrg 1: ;; Sign-extend A
535 1.1 mrg XJMP __usmulhisi3_tail
536 1.1 mrg ENDF __mulhisi3
537 1.1 mrg #endif /* L_mulhisi3 */
538 1.1 mrg
539 1.1 mrg #if defined (L_usmulhisi3)
540 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
541 1.1 mrg ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
542 1.1 mrg ;;; Clobbers: __tmp_reg__
543 1.1 mrg DEFUN __usmulhisi3
544 1.1 mrg XCALL __umulhisi3
545 1.1 mrg ;; FALLTHRU
546 1.1 mrg ENDF __usmulhisi3
547 1.1 mrg
548 1.1 mrg DEFUN __usmulhisi3_tail
549 1.1 mrg ;; Sign-extend A
550 1.1 mrg sbrs A1, 7
551 1.1 mrg ret
552 1.1 mrg sub C2, B0
553 1.1 mrg sbc C3, B1
554 1.1 mrg ret
555 1.1 mrg ENDF __usmulhisi3_tail
556 1.1 mrg #endif /* L_usmulhisi3 */
557 1.1 mrg
558 1.1 mrg #if defined (L_umulhisi3)
559 1.1 mrg ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
560 1.1 mrg ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
561 1.1 mrg ;;; Clobbers: __tmp_reg__
562 1.1 mrg DEFUN __umulhisi3
563 1.1 mrg mul A0, B0
564 1.1 mrg movw C0, r0
565 1.1 mrg mul A1, B1
566 1.1 mrg movw C2, r0
567 1.1 mrg mul A0, B1
568 1.1 mrg #ifdef __AVR_HAVE_JMP_CALL__
569 1.1 mrg ;; This function is used by many other routines, often multiple times.
570 1.1 mrg ;; Therefore, if the flash size is not too limited, avoid the RCALL
571 1.1 mrg ;; and inverst 6 Bytes to speed things up.
572 1.1 mrg add C1, r0
573 1.1 mrg adc C2, r1
574 1.1 mrg clr __zero_reg__
575 1.1 mrg adc C3, __zero_reg__
576 1.1 mrg #else
577 1.1 mrg rcall 1f
578 1.1 mrg #endif
579 1.1 mrg mul A1, B0
580 1.1 mrg 1: add C1, r0
581 1.1 mrg adc C2, r1
582 1.1 mrg clr __zero_reg__
583 1.1 mrg adc C3, __zero_reg__
584 1.1 mrg ret
585 1.1 mrg ENDF __umulhisi3
586 1.1 mrg #endif /* L_umulhisi3 */
587 1.1 mrg
588 1.1 mrg /*******************************************************
589 1.1 mrg Widening Multiplication 32 = 16 x 32 with MUL
590 1.1 mrg *******************************************************/
591 1.1 mrg
592 1.1 mrg #if defined (L_mulshisi3)
593 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * R21:R18
594 1.1 mrg ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
595 1.1 mrg ;;; Clobbers: __tmp_reg__
596 1.1 mrg DEFUN __mulshisi3
597 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
598 1.1 mrg ;; Some cores have problem skipping 2-word instruction
599 1.1 mrg tst A1
600 1.1 mrg brmi __mulohisi3
601 1.1 mrg #else
602 1.1 mrg sbrs A1, 7
603 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
604 1.1 mrg XJMP __muluhisi3
605 1.1 mrg ;; FALLTHRU
606 1.1 mrg ENDF __mulshisi3
607 1.1 mrg
608 1.1 mrg ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
609 1.1 mrg ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
610 1.1 mrg ;;; Clobbers: __tmp_reg__
611 1.1 mrg DEFUN __mulohisi3
612 1.1 mrg XCALL __muluhisi3
613 1.1 mrg ;; One-extend R27:R26 (A1:A0)
614 1.1 mrg sub C2, B0
615 1.1 mrg sbc C3, B1
616 1.1 mrg ret
617 1.1 mrg ENDF __mulohisi3
618 1.1 mrg #endif /* L_mulshisi3 */
619 1.1 mrg
620 1.1 mrg #if defined (L_muluhisi3)
621 1.1 mrg ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
622 1.1 mrg ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
623 1.1 mrg ;;; Clobbers: __tmp_reg__
624 1.1 mrg DEFUN __muluhisi3
625 1.1 mrg XCALL __umulhisi3
626 1.1 mrg mul A0, B3
627 1.1 mrg add C3, r0
628 1.1 mrg mul A1, B2
629 1.1 mrg add C3, r0
630 1.1 mrg mul A0, B2
631 1.1 mrg add C2, r0
632 1.1 mrg adc C3, r1
633 1.1 mrg clr __zero_reg__
634 1.1 mrg ret
635 1.1 mrg ENDF __muluhisi3
636 1.1 mrg #endif /* L_muluhisi3 */
637 1.1 mrg
638 1.1 mrg /*******************************************************
639 1.1 mrg Multiplication 32 x 32 with MUL
640 1.1 mrg *******************************************************/
641 1.1 mrg
642 1.1 mrg #if defined (L_mulsi3)
643 1.1 mrg ;;; R25:R22 = R25:R22 * R21:R18
644 1.1 mrg ;;; (C3:C0) = C3:C0 * B3:B0
645 1.1 mrg ;;; Clobbers: R26, R27, __tmp_reg__
646 1.1 mrg DEFUN __mulsi3
647 1.1 mrg movw A0, C0
648 1.1 mrg push C2
649 1.1 mrg push C3
650 1.1 mrg XCALL __muluhisi3
651 1.1 mrg pop A1
652 1.1 mrg pop A0
653 1.1 mrg ;; A1:A0 now contains the high word of A
654 1.1 mrg mul A0, B0
655 1.1 mrg add C2, r0
656 1.1 mrg adc C3, r1
657 1.1 mrg mul A0, B1
658 1.1 mrg add C3, r0
659 1.1 mrg mul A1, B0
660 1.1 mrg add C3, r0
661 1.1 mrg clr __zero_reg__
662 1.1 mrg ret
663 1.1 mrg ENDF __mulsi3
664 1.1 mrg #endif /* L_mulsi3 */
665 1.1 mrg
666 1.1 mrg #undef A0
667 1.1 mrg #undef A1
668 1.1 mrg
669 1.1 mrg #undef B0
670 1.1 mrg #undef B1
671 1.1 mrg #undef B2
672 1.1 mrg #undef B3
673 1.1 mrg
674 1.1 mrg #undef C0
675 1.1 mrg #undef C1
676 1.1 mrg #undef C2
677 1.1 mrg #undef C3
678 1.1 mrg
679 1.1 mrg #endif /* __AVR_HAVE_MUL__ */
680 1.1 mrg
681 1.1 mrg /*******************************************************
682 1.1 mrg Multiplication 24 x 24 with MUL
683 1.1 mrg *******************************************************/
684 1.1 mrg
685 1.1 mrg #if defined (L_mulpsi3)
686 1.1 mrg
687 1.1 mrg ;; A[0..2]: In: Multiplicand; Out: Product
688 1.1 mrg #define A0 22
689 1.1 mrg #define A1 A0+1
690 1.1 mrg #define A2 A0+2
691 1.1 mrg
692 1.1 mrg ;; B[0..2]: In: Multiplier
693 1.1 mrg #define B0 18
694 1.1 mrg #define B1 B0+1
695 1.1 mrg #define B2 B0+2
696 1.1 mrg
697 1.1 mrg #if defined (__AVR_HAVE_MUL__)
698 1.1 mrg
699 1.1 mrg ;; C[0..2]: Expand Result
700 1.1 mrg #define C0 22
701 1.1 mrg #define C1 C0+1
702 1.1 mrg #define C2 C0+2
703 1.1 mrg
704 1.1 mrg ;; R24:R22 *= R20:R18
705 1.1 mrg ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
706 1.1 mrg
707 1.1 mrg #define AA0 26
708 1.1 mrg #define AA2 21
709 1.1 mrg
710 1.1 mrg DEFUN __mulpsi3
711 1.1 mrg wmov AA0, A0
712 1.1 mrg mov AA2, A2
713 1.1 mrg XCALL __umulhisi3
714 1.1 mrg mul AA2, B0 $ add C2, r0
715 1.1 mrg mul AA0, B2 $ add C2, r0
716 1.1 mrg clr __zero_reg__
717 1.1 mrg ret
718 1.1 mrg ENDF __mulpsi3
719 1.1 mrg
720 1.1 mrg #undef AA2
721 1.1 mrg #undef AA0
722 1.1 mrg
723 1.1 mrg #undef C2
724 1.1 mrg #undef C1
725 1.1 mrg #undef C0
726 1.1.1.2 mrg
727 1.1.1.2 mrg #else /* !HAVE_MUL */
728 1.1.1.2 mrg ;; C[0..2]: Expand Result
729 1.1 mrg #if defined (__AVR_TINY__)
730 1.1.1.2 mrg #define C0 16
731 1.1 mrg #else
732 1.1 mrg #define C0 0
733 1.1 mrg #endif /* defined (__AVR_TINY__) */
734 1.1 mrg #define C1 C0+1
735 1.1 mrg #define C2 21
736 1.1 mrg
737 1.1 mrg ;; R24:R22 *= R20:R18
738 1.1.1.2 mrg ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
739 1.1.1.2 mrg
740 1.1.1.2 mrg DEFUN __mulpsi3
741 1.1.1.2 mrg #if defined (__AVR_TINY__)
742 1.1.1.2 mrg in r26,__SP_L__
743 1.1.1.2 mrg in r27,__SP_H__
744 1.1.1.2 mrg subi r26, lo8(-3) ; Add 3 to point past return address
745 1.1.1.2 mrg sbci r27, hi8(-3)
746 1.1.1.2 mrg push B0 ; save callee saved regs
747 1.1.1.2 mrg push B1
748 1.1.1.2 mrg ld B0,X+ ; load from caller stack
749 1.1 mrg ld B1,X+
750 1.1 mrg ld B2,X+
751 1.1 mrg #endif /* defined (__AVR_TINY__) */
752 1.1 mrg
753 1.1 mrg ;; C[] = 0
754 1.1 mrg clr __tmp_reg__
755 1.1 mrg clr C2
756 1.1 mrg
757 1.1 mrg 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
758 1.1 mrg LSR B2 $ ror B1 $ ror B0
759 1.1 mrg
760 1.1 mrg ;; If the N-th Bit of B[] was set...
761 1.1 mrg brcc 1f
762 1.1 mrg
763 1.1 mrg ;; ...then add A[] * 2^N to the Result C[]
764 1.1 mrg ADD C0,A0 $ adc C1,A1 $ adc C2,A2
765 1.1 mrg
766 1.1 mrg 1: ;; Multiply A[] by 2
767 1.1 mrg LSL A0 $ rol A1 $ rol A2
768 1.1 mrg
769 1.1 mrg ;; Loop until B[] is 0
770 1.1 mrg subi B0,0 $ sbci B1,0 $ sbci B2,0
771 1.1 mrg brne 0b
772 1.1 mrg
773 1.1 mrg ;; Copy C[] to the return Register A[]
774 1.1 mrg wmov A0, C0
775 1.1.1.2 mrg mov A2, C2
776 1.1.1.2 mrg
777 1.1.1.2 mrg clr __zero_reg__
778 1.1.1.2 mrg #if defined (__AVR_TINY__)
779 1.1 mrg pop B1
780 1.1 mrg pop B0
781 1.1 mrg #endif /* (__AVR_TINY__) */
782 1.1 mrg ret
783 1.1 mrg ENDF __mulpsi3
784 1.1 mrg
785 1.1 mrg #undef C2
786 1.1 mrg #undef C1
787 1.1 mrg #undef C0
788 1.1 mrg
789 1.1 mrg #endif /* HAVE_MUL */
790 1.1 mrg
791 1.1 mrg #undef B2
792 1.1 mrg #undef B1
793 1.1 mrg #undef B0
794 1.1 mrg
795 1.1 mrg #undef A2
796 1.1 mrg #undef A1
797 1.1 mrg #undef A0
798 1.1 mrg
799 1.1 mrg #endif /* L_mulpsi3 */
800 1.1 mrg
801 1.1 mrg #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
802 1.1 mrg
803 1.1 mrg ;; A[0..2]: In: Multiplicand
804 1.1 mrg #define A0 22
805 1.1 mrg #define A1 A0+1
806 1.1 mrg #define A2 A0+2
807 1.1 mrg
808 1.1 mrg ;; BB: In: Multiplier
809 1.1 mrg #define BB 25
810 1.1 mrg
811 1.1 mrg ;; C[0..2]: Result
812 1.1 mrg #define C0 18
813 1.1 mrg #define C1 C0+1
814 1.1 mrg #define C2 C0+2
815 1.1 mrg
816 1.1 mrg ;; C[] = A[] * sign_extend (BB)
817 1.1 mrg DEFUN __mulsqipsi3
818 1.1 mrg mul A0, BB
819 1.1 mrg movw C0, r0
820 1.1 mrg mul A2, BB
821 1.1 mrg mov C2, r0
822 1.1 mrg mul A1, BB
823 1.1 mrg add C1, r0
824 1.1 mrg adc C2, r1
825 1.1 mrg clr __zero_reg__
826 1.1 mrg sbrs BB, 7
827 1.1 mrg ret
828 1.1 mrg ;; One-extend BB
829 1.1 mrg sub C1, A0
830 1.1 mrg sbc C2, A1
831 1.1 mrg ret
832 1.1 mrg ENDF __mulsqipsi3
833 1.1 mrg
834 1.1 mrg #undef C2
835 1.1 mrg #undef C1
836 1.1 mrg #undef C0
837 1.1 mrg
838 1.1 mrg #undef BB
839 1.1 mrg
840 1.1 mrg #undef A2
841 1.1 mrg #undef A1
842 1.1 mrg #undef A0
843 1.1 mrg
844 1.1 mrg #endif /* L_mulsqipsi3 && HAVE_MUL */
845 1.1 mrg
846 1.1 mrg /*******************************************************
847 1.1 mrg Multiplication 64 x 64
848 1.1 mrg *******************************************************/
849 1.1 mrg
850 1.1 mrg ;; A[] = A[] * B[]
851 1.1 mrg
852 1.1 mrg ;; A[0..7]: In: Multiplicand
853 1.1 mrg ;; Out: Product
854 1.1 mrg #define A0 18
855 1.1 mrg #define A1 A0+1
856 1.1 mrg #define A2 A0+2
857 1.1 mrg #define A3 A0+3
858 1.1 mrg #define A4 A0+4
859 1.1 mrg #define A5 A0+5
860 1.1 mrg #define A6 A0+6
861 1.1 mrg #define A7 A0+7
862 1.1 mrg
863 1.1 mrg ;; B[0..7]: In: Multiplier
864 1.1 mrg #define B0 10
865 1.1 mrg #define B1 B0+1
866 1.1 mrg #define B2 B0+2
867 1.1 mrg #define B3 B0+3
868 1.1 mrg #define B4 B0+4
869 1.1 mrg #define B5 B0+5
870 1.1.1.2 mrg #define B6 B0+6
871 1.1 mrg #define B7 B0+7
872 1.1 mrg
873 1.1 mrg #ifndef __AVR_TINY__
874 1.1 mrg #if defined (__AVR_HAVE_MUL__)
875 1.1 mrg ;; Define C[] for convenience
876 1.1 mrg ;; Notice that parts of C[] overlap A[] respective B[]
877 1.1 mrg #define C0 16
878 1.1 mrg #define C1 C0+1
879 1.1 mrg #define C2 20
880 1.1 mrg #define C3 C2+1
881 1.1 mrg #define C4 28
882 1.1 mrg #define C5 C4+1
883 1.1 mrg #define C6 C4+2
884 1.1 mrg #define C7 C4+3
885 1.1 mrg
886 1.1 mrg #if defined (L_muldi3)
887 1.1 mrg
888 1.1 mrg ;; A[] *= B[]
889 1.1 mrg ;; R25:R18 *= R17:R10
890 1.1 mrg ;; Ordinary ABI-Function
891 1.1 mrg
892 1.1 mrg DEFUN __muldi3
893 1.1 mrg push r29
894 1.1 mrg push r28
895 1.1 mrg push r17
896 1.1 mrg push r16
897 1.1 mrg
898 1.1 mrg ;; Counting in Words, we have to perform a 4 * 4 Multiplication
899 1.1 mrg
900 1.1 mrg ;; 3 * 0 + 0 * 3
901 1.1 mrg mul A7,B0 $ $ mov C7,r0
902 1.1 mrg mul A0,B7 $ $ add C7,r0
903 1.1 mrg mul A6,B1 $ $ add C7,r0
904 1.1 mrg mul A6,B0 $ mov C6,r0 $ add C7,r1
905 1.1 mrg mul B6,A1 $ $ add C7,r0
906 1.1 mrg mul B6,A0 $ add C6,r0 $ adc C7,r1
907 1.1 mrg
908 1.1 mrg ;; 1 * 2
909 1.1 mrg mul A2,B4 $ add C6,r0 $ adc C7,r1
910 1.1 mrg mul A3,B4 $ $ add C7,r0
911 1.1 mrg mul A2,B5 $ $ add C7,r0
912 1.1 mrg
913 1.1 mrg push A5
914 1.1 mrg push A4
915 1.1 mrg push B1
916 1.1 mrg push B0
917 1.1 mrg push A3
918 1.1 mrg push A2
919 1.1 mrg
920 1.1 mrg ;; 0 * 0
921 1.1 mrg wmov 26, B0
922 1.1 mrg XCALL __umulhisi3
923 1.1 mrg wmov C0, 22
924 1.1 mrg wmov C2, 24
925 1.1 mrg
926 1.1 mrg ;; 0 * 2
927 1.1 mrg wmov 26, B4
928 1.1 mrg XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
929 1.1 mrg
930 1.1 mrg wmov 26, B2
931 1.1 mrg ;; 0 * 1
932 1.1 mrg XCALL __muldi3_6
933 1.1 mrg
934 1.1 mrg pop A0
935 1.1 mrg pop A1
936 1.1 mrg ;; 1 * 1
937 1.1 mrg wmov 26, B2
938 1.1 mrg XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
939 1.1 mrg
940 1.1 mrg pop r26
941 1.1 mrg pop r27
942 1.1 mrg ;; 1 * 0
943 1.1 mrg XCALL __muldi3_6
944 1.1 mrg
945 1.1 mrg pop A0
946 1.1 mrg pop A1
947 1.1 mrg ;; 2 * 0
948 1.1 mrg XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
949 1.1 mrg
950 1.1 mrg ;; 2 * 1
951 1.1 mrg wmov 26, B2
952 1.1 mrg XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
953 1.1 mrg
954 1.1 mrg ;; A[] = C[]
955 1.1 mrg wmov A0, C0
956 1.1 mrg ;; A2 = C2 already
957 1.1 mrg wmov A4, C4
958 1.1 mrg wmov A6, C6
959 1.1 mrg
960 1.1 mrg pop r16
961 1.1 mrg pop r17
962 1.1 mrg pop r28
963 1.1 mrg pop r29
964 1.1 mrg ret
965 1.1 mrg ENDF __muldi3
966 1.1 mrg #endif /* L_muldi3 */
967 1.1 mrg
968 1.1 mrg #if defined (L_muldi3_6)
969 1.1 mrg ;; A helper for some 64-bit multiplications with MUL available
970 1.1 mrg DEFUN __muldi3_6
971 1.1 mrg __muldi3_6:
972 1.1 mrg XCALL __umulhisi3
973 1.1 mrg add C2, 22
974 1.1 mrg adc C3, 23
975 1.1 mrg adc C4, 24
976 1.1 mrg adc C5, 25
977 1.1 mrg brcc 0f
978 1.1 mrg adiw C6, 1
979 1.1 mrg 0: ret
980 1.1 mrg ENDF __muldi3_6
981 1.1 mrg #endif /* L_muldi3_6 */
982 1.1 mrg
983 1.1 mrg #undef C7
984 1.1 mrg #undef C6
985 1.1 mrg #undef C5
986 1.1 mrg #undef C4
987 1.1 mrg #undef C3
988 1.1 mrg #undef C2
989 1.1 mrg #undef C1
990 1.1 mrg #undef C0
991 1.1 mrg
992 1.1 mrg #else /* !HAVE_MUL */
993 1.1 mrg
994 1.1 mrg #if defined (L_muldi3)
995 1.1 mrg
996 1.1 mrg #define C0 26
997 1.1 mrg #define C1 C0+1
998 1.1 mrg #define C2 C0+2
999 1.1 mrg #define C3 C0+3
1000 1.1 mrg #define C4 C0+4
1001 1.1 mrg #define C5 C0+5
1002 1.1 mrg #define C6 0
1003 1.1 mrg #define C7 C6+1
1004 1.1 mrg
1005 1.1 mrg #define Loop 9
1006 1.1 mrg
1007 1.1 mrg ;; A[] *= B[]
1008 1.1 mrg ;; R25:R18 *= R17:R10
1009 1.1 mrg ;; Ordinary ABI-Function
1010 1.1 mrg
1011 1.1 mrg DEFUN __muldi3
1012 1.1 mrg push r29
1013 1.1 mrg push r28
1014 1.1 mrg push Loop
1015 1.1 mrg
1016 1.1 mrg ldi C0, 64
1017 1.1 mrg mov Loop, C0
1018 1.1 mrg
1019 1.1 mrg ;; C[] = 0
1020 1.1 mrg clr __tmp_reg__
1021 1.1 mrg wmov C0, 0
1022 1.1 mrg wmov C2, 0
1023 1.1 mrg wmov C4, 0
1024 1.1 mrg
1025 1.1 mrg 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1026 1.1 mrg ;; where N = 64 - Loop.
1027 1.1 mrg ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1028 1.1 mrg ;; B[] will have its initial Value again.
1029 1.1 mrg LSR B7 $ ror B6 $ ror B5 $ ror B4
1030 1.1 mrg ror B3 $ ror B2 $ ror B1 $ ror B0
1031 1.1 mrg
1032 1.1 mrg ;; If the N-th Bit of B[] was set then...
1033 1.1 mrg brcc 1f
1034 1.1 mrg ;; ...finish Rotation...
1035 1.1 mrg ori B7, 1 << 7
1036 1.1 mrg
1037 1.1 mrg ;; ...and add A[] * 2^N to the Result C[]
1038 1.1 mrg ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1039 1.1 mrg adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1040 1.1 mrg
1041 1.1 mrg 1: ;; Multiply A[] by 2
1042 1.1 mrg LSL A0 $ rol A1 $ rol A2 $ rol A3
1043 1.1 mrg rol A4 $ rol A5 $ rol A6 $ rol A7
1044 1.1 mrg
1045 1.1 mrg dec Loop
1046 1.1 mrg brne 0b
1047 1.1 mrg
1048 1.1 mrg ;; We expanded the Result in C[]
1049 1.1 mrg ;; Copy Result to the Return Register A[]
1050 1.1 mrg wmov A0, C0
1051 1.1 mrg wmov A2, C2
1052 1.1 mrg wmov A4, C4
1053 1.1 mrg wmov A6, C6
1054 1.1 mrg
1055 1.1 mrg clr __zero_reg__
1056 1.1 mrg pop Loop
1057 1.1 mrg pop r28
1058 1.1 mrg pop r29
1059 1.1 mrg ret
1060 1.1 mrg ENDF __muldi3
1061 1.1 mrg
1062 1.1 mrg #undef Loop
1063 1.1 mrg
1064 1.1 mrg #undef C7
1065 1.1 mrg #undef C6
1066 1.1 mrg #undef C5
1067 1.1 mrg #undef C4
1068 1.1 mrg #undef C3
1069 1.1 mrg #undef C2
1070 1.1 mrg #undef C1
1071 1.1 mrg #undef C0
1072 1.1.1.2 mrg
1073 1.1 mrg #endif /* L_muldi3 */
1074 1.1 mrg #endif /* HAVE_MUL */
1075 1.1 mrg #endif /* if not __AVR_TINY__ */
1076 1.1 mrg
1077 1.1 mrg #undef B7
1078 1.1 mrg #undef B6
1079 1.1 mrg #undef B5
1080 1.1 mrg #undef B4
1081 1.1 mrg #undef B3
1082 1.1 mrg #undef B2
1083 1.1 mrg #undef B1
1084 1.1 mrg #undef B0
1085 1.1 mrg
1086 1.1 mrg #undef A7
1087 1.1 mrg #undef A6
1088 1.1 mrg #undef A5
1089 1.1 mrg #undef A4
1090 1.1 mrg #undef A3
1091 1.1 mrg #undef A2
1092 1.1 mrg #undef A1
1093 1.1 mrg #undef A0
1094 1.1 mrg
1095 1.1 mrg /*******************************************************
1096 1.1 mrg Widening Multiplication 64 = 32 x 32 with MUL
1097 1.1 mrg *******************************************************/
1098 1.1 mrg
1099 1.1 mrg #if defined (__AVR_HAVE_MUL__)
1100 1.1 mrg #define A0 r22
1101 1.1 mrg #define A1 r23
1102 1.1 mrg #define A2 r24
1103 1.1 mrg #define A3 r25
1104 1.1 mrg
1105 1.1 mrg #define B0 r18
1106 1.1 mrg #define B1 r19
1107 1.1 mrg #define B2 r20
1108 1.1 mrg #define B3 r21
1109 1.1 mrg
1110 1.1 mrg #define C0 18
1111 1.1 mrg #define C1 C0+1
1112 1.1 mrg #define C2 20
1113 1.1 mrg #define C3 C2+1
1114 1.1 mrg #define C4 28
1115 1.1 mrg #define C5 C4+1
1116 1.1 mrg #define C6 C4+2
1117 1.1 mrg #define C7 C4+3
1118 1.1 mrg
1119 1.1 mrg #if defined (L_umulsidi3)
1120 1.1 mrg
1121 1.1 mrg ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1122 1.1 mrg
1123 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1124 1.1 mrg ;;
1125 1.1 mrg ;; Ordinary ABI Function, but additionally sets
1126 1.1 mrg ;; X = R20[2] = B2[2]
1127 1.1 mrg ;; Z = R22[2] = A0[2]
1128 1.1 mrg DEFUN __umulsidi3
1129 1.1 mrg clt
1130 1.1 mrg ;; FALLTHRU
1131 1.1 mrg ENDF __umulsidi3
1132 1.1 mrg ;; T = sign (A)
1133 1.1 mrg DEFUN __umulsidi3_helper
1134 1.1 mrg push 29 $ push 28 ; Y
1135 1.1 mrg wmov 30, A2
1136 1.1 mrg ;; Counting in Words, we have to perform 4 Multiplications
1137 1.1 mrg ;; 0 * 0
1138 1.1 mrg wmov 26, A0
1139 1.1 mrg XCALL __umulhisi3
1140 1.1 mrg push 23 $ push 22 ; C0
1141 1.1 mrg wmov 28, B0
1142 1.1 mrg wmov 18, B2
1143 1.1 mrg wmov C2, 24
1144 1.1 mrg push 27 $ push 26 ; A0
1145 1.1 mrg push 19 $ push 18 ; B2
1146 1.1 mrg ;;
1147 1.1 mrg ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1148 1.1 mrg ;; B2 C2 -- -- -- B0 A2
1149 1.1 mrg ;; 1 * 1
1150 1.1 mrg wmov 26, 30 ; A2
1151 1.1 mrg XCALL __umulhisi3
1152 1.1 mrg ;; Sign-extend A. T holds the sign of A
1153 1.1 mrg brtc 0f
1154 1.1 mrg ;; Subtract B from the high part of the result
1155 1.1 mrg sub 22, 28
1156 1.1 mrg sbc 23, 29
1157 1.1 mrg sbc 24, 18
1158 1.1 mrg sbc 25, 19
1159 1.1 mrg 0: wmov 18, 28 ;; B0
1160 1.1 mrg wmov C4, 22
1161 1.1 mrg wmov C6, 24
1162 1.1 mrg ;;
1163 1.1 mrg ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1164 1.1 mrg ;; B0 C2 -- -- A2 C4 C6
1165 1.1 mrg ;;
1166 1.1 mrg ;; 1 * 0
1167 1.1 mrg XCALL __muldi3_6
1168 1.1 mrg ;; 0 * 1
1169 1.1 mrg pop 26 $ pop 27 ;; B2
1170 1.1 mrg pop 18 $ pop 19 ;; A0
1171 1.1 mrg XCALL __muldi3_6
1172 1.1 mrg
1173 1.1 mrg ;; Move result C into place and save A0 in Z
1174 1.1 mrg wmov 22, C4
1175 1.1 mrg wmov 24, C6
1176 1.1 mrg wmov 30, 18 ; A0
1177 1.1 mrg pop C0 $ pop C1
1178 1.1 mrg
1179 1.1 mrg ;; Epilogue
1180 1.1 mrg pop 28 $ pop 29 ;; Y
1181 1.1 mrg ret
1182 1.1 mrg ENDF __umulsidi3_helper
1183 1.1 mrg #endif /* L_umulsidi3 */
1184 1.1 mrg
1185 1.1 mrg
1186 1.1 mrg #if defined (L_mulsidi3)
1187 1.1 mrg
1188 1.1 mrg ;; Signed widening 64 = 32 * 32 Multiplication
1189 1.1 mrg ;;
1190 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1191 1.1 mrg ;; Ordinary ABI Function
1192 1.1 mrg DEFUN __mulsidi3
1193 1.1 mrg bst A3, 7
1194 1.1 mrg sbrs B3, 7 ; Enhanced core has no skip bug
1195 1.1 mrg XJMP __umulsidi3_helper
1196 1.1 mrg
1197 1.1 mrg ;; B needs sign-extension
1198 1.1 mrg push A3
1199 1.1 mrg push A2
1200 1.1 mrg XCALL __umulsidi3_helper
1201 1.1 mrg ;; A0 survived in Z
1202 1.1 mrg sub r22, r30
1203 1.1 mrg sbc r23, r31
1204 1.1 mrg pop r26
1205 1.1 mrg pop r27
1206 1.1 mrg sbc r24, r26
1207 1.1 mrg sbc r25, r27
1208 1.1 mrg ret
1209 1.1 mrg ENDF __mulsidi3
1210 1.1 mrg #endif /* L_mulsidi3 */
1211 1.1 mrg
1212 1.1 mrg #undef A0
1213 1.1 mrg #undef A1
1214 1.1 mrg #undef A2
1215 1.1 mrg #undef A3
1216 1.1 mrg #undef B0
1217 1.1 mrg #undef B1
1218 1.1 mrg #undef B2
1219 1.1 mrg #undef B3
1220 1.1 mrg #undef C0
1221 1.1 mrg #undef C1
1222 1.1 mrg #undef C2
1223 1.1 mrg #undef C3
1224 1.1 mrg #undef C4
1225 1.1 mrg #undef C5
1226 1.1 mrg #undef C6
1227 1.1 mrg #undef C7
1228 1.1 mrg #endif /* HAVE_MUL */
1229 1.1 mrg
1230 1.1.1.2 mrg /**********************************************************
1231 1.1 mrg Widening Multiplication 64 = 32 x 32 without MUL
1232 1.1 mrg **********************************************************/
1233 1.1 mrg #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1234 1.1 mrg #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1235 1.1 mrg #define A0 18
1236 1.1 mrg #define A1 A0+1
1237 1.1 mrg #define A2 A0+2
1238 1.1 mrg #define A3 A0+3
1239 1.1 mrg #define A4 A0+4
1240 1.1 mrg #define A5 A0+5
1241 1.1 mrg #define A6 A0+6
1242 1.1 mrg #define A7 A0+7
1243 1.1 mrg
1244 1.1 mrg #define B0 10
1245 1.1 mrg #define B1 B0+1
1246 1.1 mrg #define B2 B0+2
1247 1.1 mrg #define B3 B0+3
1248 1.1 mrg #define B4 B0+4
1249 1.1 mrg #define B5 B0+5
1250 1.1 mrg #define B6 B0+6
1251 1.1 mrg #define B7 B0+7
1252 1.1 mrg
1253 1.1 mrg #define AA0 22
1254 1.1 mrg #define AA1 AA0+1
1255 1.1 mrg #define AA2 AA0+2
1256 1.1 mrg #define AA3 AA0+3
1257 1.1 mrg
1258 1.1 mrg #define BB0 18
1259 1.1 mrg #define BB1 BB0+1
1260 1.1 mrg #define BB2 BB0+2
1261 1.1 mrg #define BB3 BB0+3
1262 1.1 mrg
1263 1.1 mrg #define Mask r30
1264 1.1 mrg
1265 1.1 mrg ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1266 1.1 mrg ;;
1267 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1268 1.1 mrg ;; Ordinary ABI Function
1269 1.1 mrg DEFUN __mulsidi3
1270 1.1 mrg set
1271 1.1 mrg skip
1272 1.1 mrg ;; FALLTHRU
1273 1.1 mrg ENDF __mulsidi3
1274 1.1 mrg
1275 1.1 mrg DEFUN __umulsidi3
1276 1.1 mrg clt ; skipped
1277 1.1 mrg ;; Save 10 Registers: R10..R17, R28, R29
1278 1.1 mrg do_prologue_saves 10
1279 1.1 mrg ldi Mask, 0xff
1280 1.1 mrg bld Mask, 7
1281 1.1 mrg ;; Move B into place...
1282 1.1 mrg wmov B0, BB0
1283 1.1 mrg wmov B2, BB2
1284 1.1 mrg ;; ...and extend it
1285 1.1 mrg and BB3, Mask
1286 1.1 mrg lsl BB3
1287 1.1 mrg sbc B4, B4
1288 1.1 mrg mov B5, B4
1289 1.1 mrg wmov B6, B4
1290 1.1 mrg ;; Move A into place...
1291 1.1 mrg wmov A0, AA0
1292 1.1 mrg wmov A2, AA2
1293 1.1 mrg ;; ...and extend it
1294 1.1 mrg and AA3, Mask
1295 1.1 mrg lsl AA3
1296 1.1 mrg sbc A4, A4
1297 1.1 mrg mov A5, A4
1298 1.1 mrg wmov A6, A4
1299 1.1 mrg XCALL __muldi3
1300 1.1 mrg do_epilogue_restores 10
1301 1.1 mrg ENDF __umulsidi3
1302 1.1 mrg
1303 1.1 mrg #undef A0
1304 1.1 mrg #undef A1
1305 1.1 mrg #undef A2
1306 1.1 mrg #undef A3
1307 1.1 mrg #undef A4
1308 1.1 mrg #undef A5
1309 1.1 mrg #undef A6
1310 1.1 mrg #undef A7
1311 1.1 mrg #undef B0
1312 1.1 mrg #undef B1
1313 1.1 mrg #undef B2
1314 1.1 mrg #undef B3
1315 1.1 mrg #undef B4
1316 1.1 mrg #undef B5
1317 1.1 mrg #undef B6
1318 1.1 mrg #undef B7
1319 1.1 mrg #undef AA0
1320 1.1 mrg #undef AA1
1321 1.1 mrg #undef AA2
1322 1.1 mrg #undef AA3
1323 1.1 mrg #undef BB0
1324 1.1 mrg #undef BB1
1325 1.1 mrg #undef BB2
1326 1.1.1.2 mrg #undef BB3
1327 1.1 mrg #undef Mask
1328 1.1 mrg #endif /* L_mulsidi3 && !HAVE_MUL */
1329 1.1 mrg #endif /* if not __AVR_TINY__ */
1330 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1331 1.1 mrg
1332 1.1 mrg
1333 1.1 mrg .section .text.libgcc.div, "ax", @progbits
1335 1.1 mrg
1336 1.1 mrg /*******************************************************
1337 1.1 mrg Division 8 / 8 => (result + remainder)
1338 1.1 mrg *******************************************************/
1339 1.1 mrg #define r_rem r25 /* remainder */
1340 1.1 mrg #define r_arg1 r24 /* dividend, quotient */
1341 1.1 mrg #define r_arg2 r22 /* divisor */
1342 1.1 mrg #define r_cnt r23 /* loop count */
1343 1.1 mrg
1344 1.1 mrg #if defined (L_udivmodqi4)
1345 1.1 mrg DEFUN __udivmodqi4
1346 1.1 mrg sub r_rem,r_rem ; clear remainder and carry
1347 1.1 mrg ldi r_cnt,9 ; init loop counter
1348 1.1 mrg rjmp __udivmodqi4_ep ; jump to entry point
1349 1.1 mrg __udivmodqi4_loop:
1350 1.1 mrg rol r_rem ; shift dividend into remainder
1351 1.1 mrg cp r_rem,r_arg2 ; compare remainder & divisor
1352 1.1 mrg brcs __udivmodqi4_ep ; remainder <= divisor
1353 1.1 mrg sub r_rem,r_arg2 ; restore remainder
1354 1.1 mrg __udivmodqi4_ep:
1355 1.1 mrg rol r_arg1 ; shift dividend (with CARRY)
1356 1.1 mrg dec r_cnt ; decrement loop counter
1357 1.1 mrg brne __udivmodqi4_loop
1358 1.1 mrg com r_arg1 ; complement result
1359 1.1 mrg ; because C flag was complemented in loop
1360 1.1 mrg ret
1361 1.1 mrg ENDF __udivmodqi4
1362 1.1 mrg #endif /* defined (L_udivmodqi4) */
1363 1.1 mrg
1364 1.1 mrg #if defined (L_divmodqi4)
1365 1.1 mrg DEFUN __divmodqi4
1366 1.1 mrg bst r_arg1,7 ; store sign of dividend
1367 1.1 mrg mov __tmp_reg__,r_arg1
1368 1.1 mrg eor __tmp_reg__,r_arg2; r0.7 is sign of result
1369 1.1 mrg sbrc r_arg1,7
1370 1.1 mrg neg r_arg1 ; dividend negative : negate
1371 1.1 mrg sbrc r_arg2,7
1372 1.1 mrg neg r_arg2 ; divisor negative : negate
1373 1.1 mrg XCALL __udivmodqi4 ; do the unsigned div/mod
1374 1.1 mrg brtc __divmodqi4_1
1375 1.1 mrg neg r_rem ; correct remainder sign
1376 1.1 mrg __divmodqi4_1:
1377 1.1 mrg sbrc __tmp_reg__,7
1378 1.1 mrg neg r_arg1 ; correct result sign
1379 1.1 mrg __divmodqi4_exit:
1380 1.1 mrg ret
1381 1.1 mrg ENDF __divmodqi4
1382 1.1 mrg #endif /* defined (L_divmodqi4) */
1383 1.1 mrg
1384 1.1 mrg #undef r_rem
1385 1.1 mrg #undef r_arg1
1386 1.1 mrg #undef r_arg2
1387 1.1 mrg #undef r_cnt
1388 1.1 mrg
1389 1.1 mrg
1390 1.1 mrg /*******************************************************
1391 1.1 mrg Division 16 / 16 => (result + remainder)
1392 1.1 mrg *******************************************************/
1393 1.1 mrg #define r_remL r26 /* remainder Low */
1394 1.1 mrg #define r_remH r27 /* remainder High */
1395 1.1 mrg
1396 1.1 mrg /* return: remainder */
1397 1.1 mrg #define r_arg1L r24 /* dividend Low */
1398 1.1 mrg #define r_arg1H r25 /* dividend High */
1399 1.1 mrg
1400 1.1 mrg /* return: quotient */
1401 1.1 mrg #define r_arg2L r22 /* divisor Low */
1402 1.1 mrg #define r_arg2H r23 /* divisor High */
1403 1.1 mrg
1404 1.1 mrg #define r_cnt r21 /* loop count */
1405 1.1 mrg
1406 1.1 mrg #if defined (L_udivmodhi4)
1407 1.1 mrg DEFUN __udivmodhi4
1408 1.1 mrg sub r_remL,r_remL
1409 1.1 mrg sub r_remH,r_remH ; clear remainder and carry
1410 1.1 mrg ldi r_cnt,17 ; init loop counter
1411 1.1 mrg rjmp __udivmodhi4_ep ; jump to entry point
1412 1.1 mrg __udivmodhi4_loop:
1413 1.1 mrg rol r_remL ; shift dividend into remainder
1414 1.1 mrg rol r_remH
1415 1.1 mrg cp r_remL,r_arg2L ; compare remainder & divisor
1416 1.1 mrg cpc r_remH,r_arg2H
1417 1.1 mrg brcs __udivmodhi4_ep ; remainder < divisor
1418 1.1 mrg sub r_remL,r_arg2L ; restore remainder
1419 1.1 mrg sbc r_remH,r_arg2H
1420 1.1 mrg __udivmodhi4_ep:
1421 1.1 mrg rol r_arg1L ; shift dividend (with CARRY)
1422 1.1 mrg rol r_arg1H
1423 1.1 mrg dec r_cnt ; decrement loop counter
1424 1.1 mrg brne __udivmodhi4_loop
1425 1.1 mrg com r_arg1L
1426 1.1 mrg com r_arg1H
1427 1.1 mrg ; div/mod results to return registers, as for the div() function
1428 1.1 mrg mov_l r_arg2L, r_arg1L ; quotient
1429 1.1 mrg mov_h r_arg2H, r_arg1H
1430 1.1 mrg mov_l r_arg1L, r_remL ; remainder
1431 1.1 mrg mov_h r_arg1H, r_remH
1432 1.1 mrg ret
1433 1.1 mrg ENDF __udivmodhi4
1434 1.1 mrg #endif /* defined (L_udivmodhi4) */
1435 1.1 mrg
1436 1.1 mrg #if defined (L_divmodhi4)
1437 1.1 mrg DEFUN __divmodhi4
1438 1.1 mrg .global _div
1439 1.1 mrg _div:
1440 1.1 mrg bst r_arg1H,7 ; store sign of dividend
1441 1.1 mrg mov __tmp_reg__,r_arg2H
1442 1.1 mrg brtc 0f
1443 1.1 mrg com __tmp_reg__ ; r0.7 is sign of result
1444 1.1 mrg rcall __divmodhi4_neg1 ; dividend negative: negate
1445 1.1 mrg 0:
1446 1.1 mrg sbrc r_arg2H,7
1447 1.1 mrg rcall __divmodhi4_neg2 ; divisor negative: negate
1448 1.1 mrg XCALL __udivmodhi4 ; do the unsigned div/mod
1449 1.1 mrg sbrc __tmp_reg__,7
1450 1.1 mrg rcall __divmodhi4_neg2 ; correct remainder sign
1451 1.1 mrg brtc __divmodhi4_exit
1452 1.1 mrg __divmodhi4_neg1:
1453 1.1 mrg ;; correct dividend/remainder sign
1454 1.1 mrg com r_arg1H
1455 1.1 mrg neg r_arg1L
1456 1.1 mrg sbci r_arg1H,0xff
1457 1.1 mrg ret
1458 1.1 mrg __divmodhi4_neg2:
1459 1.1 mrg ;; correct divisor/result sign
1460 1.1 mrg com r_arg2H
1461 1.1 mrg neg r_arg2L
1462 1.1 mrg sbci r_arg2H,0xff
1463 1.1 mrg __divmodhi4_exit:
1464 1.1 mrg ret
1465 1.1 mrg ENDF __divmodhi4
1466 1.1 mrg #endif /* defined (L_divmodhi4) */
1467 1.1 mrg
1468 1.1 mrg #undef r_remH
1469 1.1 mrg #undef r_remL
1470 1.1 mrg
1471 1.1 mrg #undef r_arg1H
1472 1.1 mrg #undef r_arg1L
1473 1.1 mrg
1474 1.1 mrg #undef r_arg2H
1475 1.1 mrg #undef r_arg2L
1476 1.1 mrg
1477 1.1 mrg #undef r_cnt
1478 1.1 mrg
1479 1.1 mrg /*******************************************************
1480 1.1 mrg Division 24 / 24 => (result + remainder)
1481 1.1 mrg *******************************************************/
1482 1.1 mrg
1483 1.1 mrg ;; A[0..2]: In: Dividend; Out: Quotient
1484 1.1 mrg #define A0 22
1485 1.1 mrg #define A1 A0+1
1486 1.1 mrg #define A2 A0+2
1487 1.1 mrg
1488 1.1 mrg ;; B[0..2]: In: Divisor; Out: Remainder
1489 1.1 mrg #define B0 18
1490 1.1 mrg #define B1 B0+1
1491 1.1 mrg #define B2 B0+2
1492 1.1 mrg
1493 1.1 mrg ;; C[0..2]: Expand remainder
1494 1.1 mrg #define C0 __zero_reg__
1495 1.1 mrg #define C1 26
1496 1.1 mrg #define C2 25
1497 1.1 mrg
1498 1.1.1.2 mrg ;; Loop counter
1499 1.1 mrg #define r_cnt 21
1500 1.1 mrg
1501 1.1 mrg #if defined (L_udivmodpsi4)
1502 1.1 mrg ;; R24:R22 = R24:R24 udiv R20:R18
1503 1.1 mrg ;; R20:R18 = R24:R22 umod R20:R18
1504 1.1 mrg ;; Clobbers: R21, R25, R26
1505 1.1 mrg
1506 1.1 mrg DEFUN __udivmodpsi4
1507 1.1 mrg ; init loop counter
1508 1.1 mrg ldi r_cnt, 24+1
1509 1.1 mrg ; Clear remainder and carry. C0 is already 0
1510 1.1 mrg clr C1
1511 1.1 mrg sub C2, C2
1512 1.1 mrg ; jump to entry point
1513 1.1 mrg rjmp __udivmodpsi4_start
1514 1.1 mrg __udivmodpsi4_loop:
1515 1.1 mrg ; shift dividend into remainder
1516 1.1 mrg rol C0
1517 1.1 mrg rol C1
1518 1.1 mrg rol C2
1519 1.1 mrg ; compare remainder & divisor
1520 1.1 mrg cp C0, B0
1521 1.1 mrg cpc C1, B1
1522 1.1 mrg cpc C2, B2
1523 1.1 mrg brcs __udivmodpsi4_start ; remainder <= divisor
1524 1.1 mrg sub C0, B0 ; restore remainder
1525 1.1 mrg sbc C1, B1
1526 1.1 mrg sbc C2, B2
1527 1.1 mrg __udivmodpsi4_start:
1528 1.1 mrg ; shift dividend (with CARRY)
1529 1.1 mrg rol A0
1530 1.1 mrg rol A1
1531 1.1 mrg rol A2
1532 1.1 mrg ; decrement loop counter
1533 1.1 mrg dec r_cnt
1534 1.1 mrg brne __udivmodpsi4_loop
1535 1.1 mrg com A0
1536 1.1 mrg com A1
1537 1.1 mrg com A2
1538 1.1 mrg ; div/mod results to return registers
1539 1.1 mrg ; remainder
1540 1.1 mrg mov B0, C0
1541 1.1 mrg mov B1, C1
1542 1.1 mrg mov B2, C2
1543 1.1 mrg clr __zero_reg__ ; C0
1544 1.1 mrg ret
1545 1.1 mrg ENDF __udivmodpsi4
1546 1.1 mrg #endif /* defined (L_udivmodpsi4) */
1547 1.1 mrg
1548 1.1 mrg #if defined (L_divmodpsi4)
1549 1.1 mrg ;; R24:R22 = R24:R22 div R20:R18
1550 1.1 mrg ;; R20:R18 = R24:R22 mod R20:R18
1551 1.1 mrg ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1552 1.1 mrg
1553 1.1 mrg DEFUN __divmodpsi4
1554 1.1 mrg ; R0.7 will contain the sign of the result:
1555 1.1 mrg ; R0.7 = A.sign ^ B.sign
1556 1.1 mrg mov __tmp_reg__, B2
1557 1.1 mrg ; T-flag = sign of dividend
1558 1.1 mrg bst A2, 7
1559 1.1 mrg brtc 0f
1560 1.1 mrg com __tmp_reg__
1561 1.1 mrg ; Adjust dividend's sign
1562 1.1 mrg rcall __divmodpsi4_negA
1563 1.1 mrg 0:
1564 1.1 mrg ; Adjust divisor's sign
1565 1.1 mrg sbrc B2, 7
1566 1.1 mrg rcall __divmodpsi4_negB
1567 1.1 mrg
1568 1.1 mrg ; Do the unsigned div/mod
1569 1.1 mrg XCALL __udivmodpsi4
1570 1.1 mrg
1571 1.1 mrg ; Adjust quotient's sign
1572 1.1 mrg sbrc __tmp_reg__, 7
1573 1.1 mrg rcall __divmodpsi4_negA
1574 1.1 mrg
1575 1.1 mrg ; Adjust remainder's sign
1576 1.1 mrg brtc __divmodpsi4_end
1577 1.1 mrg
1578 1.1 mrg __divmodpsi4_negB:
1579 1.1 mrg ; Correct divisor/remainder sign
1580 1.1 mrg com B2
1581 1.1 mrg com B1
1582 1.1 mrg neg B0
1583 1.1 mrg sbci B1, -1
1584 1.1 mrg sbci B2, -1
1585 1.1 mrg ret
1586 1.1 mrg
1587 1.1 mrg ; Correct dividend/quotient sign
1588 1.1 mrg __divmodpsi4_negA:
1589 1.1 mrg com A2
1590 1.1 mrg com A1
1591 1.1 mrg neg A0
1592 1.1 mrg sbci A1, -1
1593 1.1 mrg sbci A2, -1
1594 1.1 mrg __divmodpsi4_end:
1595 1.1 mrg ret
1596 1.1 mrg
1597 1.1 mrg ENDF __divmodpsi4
1598 1.1 mrg #endif /* defined (L_divmodpsi4) */
1599 1.1 mrg
1600 1.1 mrg #undef A0
1601 1.1 mrg #undef A1
1602 1.1 mrg #undef A2
1603 1.1 mrg
1604 1.1 mrg #undef B0
1605 1.1 mrg #undef B1
1606 1.1 mrg #undef B2
1607 1.1 mrg
1608 1.1 mrg #undef C0
1609 1.1 mrg #undef C1
1610 1.1 mrg #undef C2
1611 1.1 mrg
1612 1.1 mrg #undef r_cnt
1613 1.1 mrg
1614 1.1 mrg /*******************************************************
1615 1.1 mrg Division 32 / 32 => (result + remainder)
1616 1.1 mrg *******************************************************/
1617 1.1 mrg #define r_remHH r31 /* remainder High */
1618 1.1 mrg #define r_remHL r30
1619 1.1 mrg #define r_remH r27
1620 1.1 mrg #define r_remL r26 /* remainder Low */
1621 1.1 mrg
1622 1.1 mrg /* return: remainder */
1623 1.1 mrg #define r_arg1HH r25 /* dividend High */
1624 1.1 mrg #define r_arg1HL r24
1625 1.1 mrg #define r_arg1H r23
1626 1.1 mrg #define r_arg1L r22 /* dividend Low */
1627 1.1 mrg
1628 1.1 mrg /* return: quotient */
1629 1.1 mrg #define r_arg2HH r21 /* divisor High */
1630 1.1 mrg #define r_arg2HL r20
1631 1.1 mrg #define r_arg2H r19
1632 1.1 mrg #define r_arg2L r18 /* divisor Low */
1633 1.1 mrg
1634 1.1 mrg #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1635 1.1 mrg
1636 1.1 mrg #if defined (L_udivmodsi4)
1637 1.1 mrg DEFUN __udivmodsi4
1638 1.1 mrg ldi r_remL, 33 ; init loop counter
1639 1.1 mrg mov r_cnt, r_remL
1640 1.1 mrg sub r_remL,r_remL
1641 1.1 mrg sub r_remH,r_remH ; clear remainder and carry
1642 1.1 mrg mov_l r_remHL, r_remL
1643 1.1 mrg mov_h r_remHH, r_remH
1644 1.1 mrg rjmp __udivmodsi4_ep ; jump to entry point
1645 1.1 mrg __udivmodsi4_loop:
1646 1.1 mrg rol r_remL ; shift dividend into remainder
1647 1.1 mrg rol r_remH
1648 1.1 mrg rol r_remHL
1649 1.1 mrg rol r_remHH
1650 1.1 mrg cp r_remL,r_arg2L ; compare remainder & divisor
1651 1.1 mrg cpc r_remH,r_arg2H
1652 1.1 mrg cpc r_remHL,r_arg2HL
1653 1.1 mrg cpc r_remHH,r_arg2HH
1654 1.1 mrg brcs __udivmodsi4_ep ; remainder <= divisor
1655 1.1 mrg sub r_remL,r_arg2L ; restore remainder
1656 1.1 mrg sbc r_remH,r_arg2H
1657 1.1 mrg sbc r_remHL,r_arg2HL
1658 1.1 mrg sbc r_remHH,r_arg2HH
1659 1.1 mrg __udivmodsi4_ep:
1660 1.1 mrg rol r_arg1L ; shift dividend (with CARRY)
1661 1.1 mrg rol r_arg1H
1662 1.1 mrg rol r_arg1HL
1663 1.1 mrg rol r_arg1HH
1664 1.1 mrg dec r_cnt ; decrement loop counter
1665 1.1 mrg brne __udivmodsi4_loop
1666 1.1 mrg ; __zero_reg__ now restored (r_cnt == 0)
1667 1.1 mrg com r_arg1L
1668 1.1 mrg com r_arg1H
1669 1.1 mrg com r_arg1HL
1670 1.1 mrg com r_arg1HH
1671 1.1 mrg ; div/mod results to return registers, as for the ldiv() function
1672 1.1 mrg mov_l r_arg2L, r_arg1L ; quotient
1673 1.1 mrg mov_h r_arg2H, r_arg1H
1674 1.1 mrg mov_l r_arg2HL, r_arg1HL
1675 1.1 mrg mov_h r_arg2HH, r_arg1HH
1676 1.1 mrg mov_l r_arg1L, r_remL ; remainder
1677 1.1 mrg mov_h r_arg1H, r_remH
1678 1.1 mrg mov_l r_arg1HL, r_remHL
1679 1.1 mrg mov_h r_arg1HH, r_remHH
1680 1.1 mrg ret
1681 1.1 mrg ENDF __udivmodsi4
1682 1.1 mrg #endif /* defined (L_udivmodsi4) */
1683 1.1 mrg
1684 1.1 mrg #if defined (L_divmodsi4)
1685 1.1 mrg DEFUN __divmodsi4
1686 1.1 mrg mov __tmp_reg__,r_arg2HH
1687 1.1 mrg bst r_arg1HH,7 ; store sign of dividend
1688 1.1 mrg brtc 0f
1689 1.1 mrg com __tmp_reg__ ; r0.7 is sign of result
1690 1.1 mrg XCALL __negsi2 ; dividend negative: negate
1691 1.1 mrg 0:
1692 1.1 mrg sbrc r_arg2HH,7
1693 1.1 mrg rcall __divmodsi4_neg2 ; divisor negative: negate
1694 1.1 mrg XCALL __udivmodsi4 ; do the unsigned div/mod
1695 1.1 mrg sbrc __tmp_reg__, 7 ; correct quotient sign
1696 1.1 mrg rcall __divmodsi4_neg2
1697 1.1 mrg brtc __divmodsi4_exit ; correct remainder sign
1698 1.1 mrg XJMP __negsi2
1699 1.1 mrg __divmodsi4_neg2:
1700 1.1 mrg ;; correct divisor/quotient sign
1701 1.1 mrg com r_arg2HH
1702 1.1 mrg com r_arg2HL
1703 1.1 mrg com r_arg2H
1704 1.1 mrg neg r_arg2L
1705 1.1 mrg sbci r_arg2H,0xff
1706 1.1 mrg sbci r_arg2HL,0xff
1707 1.1 mrg sbci r_arg2HH,0xff
1708 1.1 mrg __divmodsi4_exit:
1709 1.1 mrg ret
1710 1.1 mrg ENDF __divmodsi4
1711 1.1 mrg #endif /* defined (L_divmodsi4) */
1712 1.1 mrg
1713 1.1 mrg #if defined (L_negsi2)
1714 1.1 mrg ;; (set (reg:SI 22)
1715 1.1 mrg ;; (neg:SI (reg:SI 22)))
1716 1.1 mrg ;; Sets the V flag for signed overflow tests
1717 1.1 mrg DEFUN __negsi2
1718 1.1 mrg NEG4 22
1719 1.1 mrg ret
1720 1.1 mrg ENDF __negsi2
1721 1.1 mrg #endif /* L_negsi2 */
1722 1.1 mrg
1723 1.1 mrg #undef r_remHH
1724 1.1 mrg #undef r_remHL
1725 1.1 mrg #undef r_remH
1726 1.1 mrg #undef r_remL
1727 1.1 mrg #undef r_arg1HH
1728 1.1 mrg #undef r_arg1HL
1729 1.1 mrg #undef r_arg1H
1730 1.1 mrg #undef r_arg1L
1731 1.1 mrg #undef r_arg2HH
1732 1.1 mrg #undef r_arg2HL
1733 1.1.1.2 mrg #undef r_arg2H
1734 1.1.1.2 mrg #undef r_arg2L
1735 1.1.1.2 mrg #undef r_cnt
1736 1.1.1.2 mrg
1737 1.1 mrg /* *di routines use registers below R19 and won't work with tiny arch
1738 1.1 mrg right now. */
1739 1.1 mrg
1740 1.1 mrg #if !defined (__AVR_TINY__)
1741 1.1 mrg /*******************************************************
1742 1.1 mrg Division 64 / 64
1743 1.1 mrg Modulo 64 % 64
1744 1.1 mrg *******************************************************/
1745 1.1 mrg
1746 1.1 mrg ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1747 1.1 mrg ;; at least 16k of Program Memory. For smaller Devices, depend
1748 1.1 mrg ;; on MOVW and SP Size. There is a Connexion between SP Size and
1749 1.1 mrg ;; Flash Size so that SP Size can be used to test for Flash Size.
1750 1.1 mrg
1751 1.1 mrg #if defined (__AVR_HAVE_JMP_CALL__)
1752 1.1 mrg # define SPEED_DIV 8
1753 1.1 mrg #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1754 1.1 mrg # define SPEED_DIV 16
1755 1.1 mrg #else
1756 1.1 mrg # define SPEED_DIV 0
1757 1.1 mrg #endif
1758 1.1 mrg
1759 1.1 mrg ;; A[0..7]: In: Dividend;
1760 1.1 mrg ;; Out: Quotient (T = 0)
1761 1.1 mrg ;; Out: Remainder (T = 1)
1762 1.1 mrg #define A0 18
1763 1.1 mrg #define A1 A0+1
1764 1.1 mrg #define A2 A0+2
1765 1.1 mrg #define A3 A0+3
1766 1.1 mrg #define A4 A0+4
1767 1.1 mrg #define A5 A0+5
1768 1.1 mrg #define A6 A0+6
1769 1.1 mrg #define A7 A0+7
1770 1.1 mrg
1771 1.1 mrg ;; B[0..7]: In: Divisor; Out: Clobber
1772 1.1 mrg #define B0 10
1773 1.1 mrg #define B1 B0+1
1774 1.1 mrg #define B2 B0+2
1775 1.1 mrg #define B3 B0+3
1776 1.1 mrg #define B4 B0+4
1777 1.1 mrg #define B5 B0+5
1778 1.1 mrg #define B6 B0+6
1779 1.1 mrg #define B7 B0+7
1780 1.1 mrg
1781 1.1 mrg ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1782 1.1 mrg #define C0 8
1783 1.1 mrg #define C1 C0+1
1784 1.1 mrg #define C2 30
1785 1.1 mrg #define C3 C2+1
1786 1.1 mrg #define C4 28
1787 1.1 mrg #define C5 C4+1
1788 1.1 mrg #define C6 26
1789 1.1 mrg #define C7 C6+1
1790 1.1 mrg
1791 1.1 mrg ;; Holds Signs during Division Routine
1792 1.1 mrg #define SS __tmp_reg__
1793 1.1 mrg
1794 1.1 mrg ;; Bit-Counter in Division Routine
1795 1.1 mrg #define R_cnt __zero_reg__
1796 1.1 mrg
1797 1.1 mrg ;; Scratch Register for Negation
1798 1.1 mrg #define NN r31
1799 1.1 mrg
1800 1.1 mrg #if defined (L_udivdi3)
1801 1.1 mrg
1802 1.1 mrg ;; R25:R18 = R24:R18 umod R17:R10
1803 1.1 mrg ;; Ordinary ABI-Function
1804 1.1 mrg
1805 1.1 mrg DEFUN __umoddi3
1806 1.1 mrg set
1807 1.1 mrg rjmp __udivdi3_umoddi3
1808 1.1 mrg ENDF __umoddi3
1809 1.1 mrg
1810 1.1 mrg ;; R25:R18 = R24:R18 udiv R17:R10
1811 1.1 mrg ;; Ordinary ABI-Function
1812 1.1 mrg
1813 1.1 mrg DEFUN __udivdi3
1814 1.1 mrg clt
1815 1.1 mrg ENDF __udivdi3
1816 1.1 mrg
1817 1.1 mrg DEFUN __udivdi3_umoddi3
1818 1.1 mrg push C0
1819 1.1 mrg push C1
1820 1.1 mrg push C4
1821 1.1 mrg push C5
1822 1.1 mrg XCALL __udivmod64
1823 1.1 mrg pop C5
1824 1.1 mrg pop C4
1825 1.1 mrg pop C1
1826 1.1 mrg pop C0
1827 1.1 mrg ret
1828 1.1 mrg ENDF __udivdi3_umoddi3
1829 1.1 mrg #endif /* L_udivdi3 */
1830 1.1 mrg
1831 1.1 mrg #if defined (L_udivmod64)
1832 1.1 mrg
1833 1.1 mrg ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1834 1.1 mrg ;; No Registers saved/restored; the Callers will take Care.
1835 1.1 mrg ;; Preserves B[] and T-flag
1836 1.1 mrg ;; T = 0: Compute Quotient in A[]
1837 1.1 mrg ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1838 1.1 mrg
1839 1.1 mrg DEFUN __udivmod64
1840 1.1 mrg
1841 1.1 mrg ;; Clear Remainder (C6, C7 will follow)
1842 1.1 mrg clr C0
1843 1.1 mrg clr C1
1844 1.1 mrg wmov C2, C0
1845 1.1 mrg wmov C4, C0
1846 1.1 mrg ldi C7, 64
1847 1.1 mrg
1848 1.1 mrg #if SPEED_DIV == 0 || SPEED_DIV == 16
1849 1.1 mrg ;; Initialize Loop-Counter
1850 1.1 mrg mov R_cnt, C7
1851 1.1 mrg wmov C6, C0
1852 1.1 mrg #endif /* SPEED_DIV */
1853 1.1 mrg
1854 1.1 mrg #if SPEED_DIV == 8
1855 1.1 mrg
1856 1.1 mrg push A7
1857 1.1 mrg clr C6
1858 1.1 mrg
1859 1.1 mrg 1: ;; Compare shifted Devidend against Divisor
1860 1.1 mrg ;; If -- even after Shifting -- it is smaller...
1861 1.1 mrg CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1862 1.1 mrg cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1863 1.1 mrg brcc 2f
1864 1.1 mrg
1865 1.1 mrg ;; ...then we can subtract it. Thus, it is legal to shift left
1866 1.1 mrg $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1867 1.1 mrg mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1868 1.1 mrg mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1869 1.1 mrg mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1870 1.1 mrg
1871 1.1 mrg ;; 8 Bits are done
1872 1.1 mrg subi C7, 8
1873 1.1 mrg brne 1b
1874 1.1 mrg
1875 1.1 mrg ;; Shifted 64 Bits: A7 has traveled to C7
1876 1.1 mrg pop C7
1877 1.1 mrg ;; Divisor is greater than Dividend. We have:
1878 1.1 mrg ;; A[] % B[] = A[]
1879 1.1 mrg ;; A[] / B[] = 0
1880 1.1 mrg ;; Thus, we can return immediately
1881 1.1 mrg rjmp 5f
1882 1.1 mrg
1883 1.1 mrg 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1884 1.1 mrg mov R_cnt, C7
1885 1.1 mrg
1886 1.1 mrg ;; Push of A7 is not needed because C7 is still 0
1887 1.1 mrg pop C7
1888 1.1 mrg clr C7
1889 1.1 mrg
1890 1.1 mrg #elif SPEED_DIV == 16
1891 1.1 mrg
1892 1.1 mrg ;; Compare shifted Dividend against Divisor
1893 1.1 mrg cp A7, B3
1894 1.1 mrg cpc C0, B4
1895 1.1 mrg cpc C1, B5
1896 1.1 mrg cpc C2, B6
1897 1.1 mrg cpc C3, B7
1898 1.1 mrg brcc 2f
1899 1.1 mrg
1900 1.1 mrg ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1901 1.1 mrg ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1902 1.1 mrg wmov C2,A6 $ wmov C0,A4
1903 1.1 mrg wmov A6,A2 $ wmov A4,A0
1904 1.1 mrg wmov A2,C6 $ wmov A0,C4
1905 1.1 mrg
1906 1.1 mrg ;; Set Bit Counter to 32
1907 1.1 mrg lsr R_cnt
1908 1.1 mrg 2:
1909 1.1 mrg #elif SPEED_DIV
1910 1.1 mrg #error SPEED_DIV = ?
1911 1.1 mrg #endif /* SPEED_DIV */
1912 1.1 mrg
1913 1.1 mrg ;; The very Division + Remainder Routine
1914 1.1 mrg
1915 1.1 mrg 3: ;; Left-shift Dividend...
1916 1.1 mrg lsl A0 $ rol A1 $ rol A2 $ rol A3
1917 1.1 mrg rol A4 $ rol A5 $ rol A6 $ rol A7
1918 1.1 mrg
1919 1.1 mrg ;; ...into Remainder
1920 1.1 mrg rol C0 $ rol C1 $ rol C2 $ rol C3
1921 1.1 mrg rol C4 $ rol C5 $ rol C6 $ rol C7
1922 1.1 mrg
1923 1.1 mrg ;; Compare Remainder and Divisor
1924 1.1 mrg CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1925 1.1 mrg cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1926 1.1 mrg
1927 1.1 mrg brcs 4f
1928 1.1 mrg
1929 1.1 mrg ;; Divisor fits into Remainder: Subtract it from Remainder...
1930 1.1 mrg SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1931 1.1 mrg sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1932 1.1 mrg
1933 1.1 mrg ;; ...and set according Bit in the upcoming Quotient
1934 1.1 mrg ;; The Bit will travel to its final Position
1935 1.1 mrg ori A0, 1
1936 1.1 mrg
1937 1.1 mrg 4: ;; This Bit is done
1938 1.1 mrg dec R_cnt
1939 1.1 mrg brne 3b
1940 1.1 mrg ;; __zero_reg__ is 0 again
1941 1.1 mrg
1942 1.1 mrg ;; T = 0: We are fine with the Quotient in A[]
1943 1.1 mrg ;; T = 1: Copy Remainder to A[]
1944 1.1 mrg 5: brtc 6f
1945 1.1 mrg wmov A0, C0
1946 1.1 mrg wmov A2, C2
1947 1.1 mrg wmov A4, C4
1948 1.1 mrg wmov A6, C6
1949 1.1 mrg ;; Move the Sign of the Result to SS.7
1950 1.1 mrg lsl SS
1951 1.1 mrg
1952 1.1 mrg 6: ret
1953 1.1 mrg
1954 1.1 mrg ENDF __udivmod64
1955 1.1 mrg #endif /* L_udivmod64 */
1956 1.1 mrg
1957 1.1 mrg
1958 1.1 mrg #if defined (L_divdi3)
1959 1.1 mrg
1960 1.1 mrg ;; R25:R18 = R24:R18 mod R17:R10
1961 1.1 mrg ;; Ordinary ABI-Function
1962 1.1 mrg
1963 1.1 mrg DEFUN __moddi3
1964 1.1 mrg set
1965 1.1 mrg rjmp __divdi3_moddi3
1966 1.1 mrg ENDF __moddi3
1967 1.1 mrg
1968 1.1 mrg ;; R25:R18 = R24:R18 div R17:R10
1969 1.1 mrg ;; Ordinary ABI-Function
1970 1.1 mrg
1971 1.1 mrg DEFUN __divdi3
1972 1.1 mrg clt
1973 1.1 mrg ENDF __divdi3
1974 1.1 mrg
1975 1.1 mrg DEFUN __divdi3_moddi3
1976 1.1 mrg #if SPEED_DIV
1977 1.1 mrg mov r31, A7
1978 1.1 mrg or r31, B7
1979 1.1 mrg brmi 0f
1980 1.1 mrg ;; Both Signs are 0: the following Complexitiy is not needed
1981 1.1 mrg XJMP __udivdi3_umoddi3
1982 1.1 mrg #endif /* SPEED_DIV */
1983 1.1 mrg
1984 1.1 mrg 0: ;; The Prologue
1985 1.1 mrg ;; Save 12 Registers: Y, 17...8
1986 1.1 mrg ;; No Frame needed
1987 1.1 mrg do_prologue_saves 12
1988 1.1 mrg
1989 1.1 mrg ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1990 1.1 mrg ;; SS.6 will contain the Sign of the Remainder (A.sign)
1991 1.1 mrg mov SS, A7
1992 1.1 mrg asr SS
1993 1.1 mrg ;; Adjust Dividend's Sign as needed
1994 1.1 mrg #if SPEED_DIV
1995 1.1 mrg ;; Compiling for Speed we know that at least one Sign must be < 0
1996 1.1 mrg ;; Thus, if A[] >= 0 then we know B[] < 0
1997 1.1 mrg brpl 22f
1998 1.1 mrg #else
1999 1.1 mrg brpl 21f
2000 1.1 mrg #endif /* SPEED_DIV */
2001 1.1 mrg
2002 1.1 mrg XCALL __negdi2
2003 1.1 mrg
2004 1.1 mrg ;; Adjust Divisor's Sign and SS.7 as needed
2005 1.1 mrg 21: tst B7
2006 1.1 mrg brpl 3f
2007 1.1 mrg 22: ldi NN, 1 << 7
2008 1.1 mrg eor SS, NN
2009 1.1 mrg
2010 1.1 mrg ldi NN, -1
2011 1.1 mrg com B4 $ com B5 $ com B6 $ com B7
2012 1.1 mrg $ com B1 $ com B2 $ com B3
2013 1.1 mrg NEG B0
2014 1.1 mrg $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2015 1.1 mrg sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2016 1.1 mrg
2017 1.1 mrg 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2018 1.1 mrg XCALL __udivmod64
2019 1.1 mrg
2020 1.1 mrg ;; Adjust Result's Sign
2021 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2022 1.1 mrg tst SS
2023 1.1 mrg brpl 4f
2024 1.1 mrg #else
2025 1.1 mrg sbrc SS, 7
2026 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
2027 1.1 mrg XCALL __negdi2
2028 1.1 mrg
2029 1.1 mrg 4: ;; Epilogue: Restore 12 Registers and return
2030 1.1 mrg do_epilogue_restores 12
2031 1.1 mrg
2032 1.1 mrg ENDF __divdi3_moddi3
2033 1.1 mrg
2034 1.1 mrg #endif /* L_divdi3 */
2035 1.1 mrg
2036 1.1 mrg #undef R_cnt
2037 1.1 mrg #undef SS
2038 1.1 mrg #undef NN
2039 1.1 mrg
2040 1.1 mrg .section .text.libgcc, "ax", @progbits
2041 1.1 mrg
2042 1.1 mrg #define TT __tmp_reg__
2043 1.1 mrg
2044 1.1 mrg #if defined (L_adddi3)
2045 1.1 mrg ;; (set (reg:DI 18)
2046 1.1 mrg ;; (plus:DI (reg:DI 18)
2047 1.1 mrg ;; (reg:DI 10)))
2048 1.1 mrg ;; Sets the V flag for signed overflow tests
2049 1.1 mrg ;; Sets the C flag for unsigned overflow tests
2050 1.1 mrg DEFUN __adddi3
2051 1.1 mrg ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2052 1.1 mrg adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2053 1.1 mrg ret
2054 1.1 mrg ENDF __adddi3
2055 1.1 mrg #endif /* L_adddi3 */
2056 1.1 mrg
2057 1.1 mrg #if defined (L_adddi3_s8)
2058 1.1 mrg ;; (set (reg:DI 18)
2059 1.1 mrg ;; (plus:DI (reg:DI 18)
2060 1.1 mrg ;; (sign_extend:SI (reg:QI 26))))
2061 1.1 mrg ;; Sets the V flag for signed overflow tests
2062 1.1 mrg ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2063 1.1 mrg DEFUN __adddi3_s8
2064 1.1 mrg clr TT
2065 1.1 mrg sbrc r26, 7
2066 1.1 mrg com TT
2067 1.1 mrg ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2068 1.1 mrg adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2069 1.1 mrg ret
2070 1.1 mrg ENDF __adddi3_s8
2071 1.1 mrg #endif /* L_adddi3_s8 */
2072 1.1 mrg
2073 1.1 mrg #if defined (L_subdi3)
2074 1.1 mrg ;; (set (reg:DI 18)
2075 1.1 mrg ;; (minus:DI (reg:DI 18)
2076 1.1 mrg ;; (reg:DI 10)))
2077 1.1 mrg ;; Sets the V flag for signed overflow tests
2078 1.1 mrg ;; Sets the C flag for unsigned overflow tests
2079 1.1 mrg DEFUN __subdi3
2080 1.1 mrg SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2081 1.1 mrg sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2082 1.1 mrg ret
2083 1.1 mrg ENDF __subdi3
2084 1.1 mrg #endif /* L_subdi3 */
2085 1.1 mrg
2086 1.1 mrg #if defined (L_cmpdi2)
2087 1.1 mrg ;; (set (cc0)
2088 1.1 mrg ;; (compare (reg:DI 18)
2089 1.1 mrg ;; (reg:DI 10)))
2090 1.1 mrg DEFUN __cmpdi2
2091 1.1 mrg CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2092 1.1 mrg cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2093 1.1 mrg ret
2094 1.1 mrg ENDF __cmpdi2
2095 1.1 mrg #endif /* L_cmpdi2 */
2096 1.1 mrg
2097 1.1 mrg #if defined (L_cmpdi2_s8)
2098 1.1 mrg ;; (set (cc0)
2099 1.1 mrg ;; (compare (reg:DI 18)
2100 1.1 mrg ;; (sign_extend:SI (reg:QI 26))))
2101 1.1 mrg DEFUN __cmpdi2_s8
2102 1.1 mrg clr TT
2103 1.1 mrg sbrc r26, 7
2104 1.1 mrg com TT
2105 1.1 mrg CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2106 1.1 mrg cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2107 1.1 mrg ret
2108 1.1 mrg ENDF __cmpdi2_s8
2109 1.1 mrg #endif /* L_cmpdi2_s8 */
2110 1.1 mrg
2111 1.1 mrg #if defined (L_negdi2)
2112 1.1 mrg ;; (set (reg:DI 18)
2113 1.1 mrg ;; (neg:DI (reg:DI 18)))
2114 1.1 mrg ;; Sets the V flag for signed overflow tests
2115 1.1 mrg DEFUN __negdi2
2116 1.1 mrg
2117 1.1 mrg com A4 $ com A5 $ com A6 $ com A7
2118 1.1 mrg $ com A1 $ com A2 $ com A3
2119 1.1 mrg NEG A0
2120 1.1 mrg $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2121 1.1 mrg sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2122 1.1 mrg ret
2123 1.1 mrg
2124 1.1 mrg ENDF __negdi2
2125 1.1 mrg #endif /* L_negdi2 */
2126 1.1 mrg
2127 1.1 mrg #undef TT
2128 1.1 mrg
2129 1.1 mrg #undef C7
2130 1.1 mrg #undef C6
2131 1.1 mrg #undef C5
2132 1.1 mrg #undef C4
2133 1.1 mrg #undef C3
2134 1.1 mrg #undef C2
2135 1.1 mrg #undef C1
2136 1.1 mrg #undef C0
2137 1.1 mrg
2138 1.1 mrg #undef B7
2139 1.1 mrg #undef B6
2140 1.1 mrg #undef B5
2141 1.1 mrg #undef B4
2142 1.1 mrg #undef B3
2143 1.1 mrg #undef B2
2144 1.1 mrg #undef B1
2145 1.1 mrg #undef B0
2146 1.1 mrg
2147 1.1 mrg #undef A7
2148 1.1 mrg #undef A6
2149 1.1 mrg #undef A5
2150 1.1 mrg #undef A4
2151 1.1 mrg #undef A3
2152 1.1.1.2 mrg #undef A2
2153 1.1.1.2 mrg #undef A1
2154 1.1 mrg #undef A0
2155 1.1 mrg
2156 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2157 1.1 mrg
2158 1.1 mrg
2159 1.1 mrg .section .text.libgcc.prologue, "ax", @progbits
2161 1.1 mrg
2162 1.1 mrg /**********************************
2163 1.1 mrg * This is a prologue subroutine
2164 1.1 mrg **********************************/
2165 1.1 mrg #if !defined (__AVR_TINY__)
2166 1.1 mrg #if defined (L_prologue)
2167 1.1 mrg
2168 1.1 mrg ;; This function does not clobber T-flag; 64-bit division relies on it
2169 1.1 mrg DEFUN __prologue_saves__
2170 1.1 mrg push r2
2171 1.1 mrg push r3
2172 1.1 mrg push r4
2173 1.1 mrg push r5
2174 1.1 mrg push r6
2175 1.1 mrg push r7
2176 1.1 mrg push r8
2177 1.1 mrg push r9
2178 1.1 mrg push r10
2179 1.1 mrg push r11
2180 1.1 mrg push r12
2181 1.1 mrg push r13
2182 1.1 mrg push r14
2183 1.1 mrg push r15
2184 1.1 mrg push r16
2185 1.1 mrg push r17
2186 1.1 mrg push r28
2187 1.1 mrg push r29
2188 1.1 mrg #if !defined (__AVR_HAVE_SPH__)
2189 1.1 mrg in r28,__SP_L__
2190 1.1 mrg sub r28,r26
2191 1.1 mrg out __SP_L__,r28
2192 1.1 mrg clr r29
2193 1.1 mrg #elif defined (__AVR_XMEGA__)
2194 1.1 mrg in r28,__SP_L__
2195 1.1 mrg in r29,__SP_H__
2196 1.1 mrg sub r28,r26
2197 1.1 mrg sbc r29,r27
2198 1.1 mrg out __SP_L__,r28
2199 1.1 mrg out __SP_H__,r29
2200 1.1 mrg #else
2201 1.1 mrg in r28,__SP_L__
2202 1.1 mrg in r29,__SP_H__
2203 1.1 mrg sub r28,r26
2204 1.1 mrg sbc r29,r27
2205 1.1 mrg in __tmp_reg__,__SREG__
2206 1.1 mrg cli
2207 1.1.1.2 mrg out __SP_H__,r29
2208 1.1 mrg out __SREG__,__tmp_reg__
2209 1.1 mrg out __SP_L__,r28
2210 1.1 mrg #endif /* #SP = 8/16 */
2211 1.1 mrg
2212 1.1 mrg XIJMP
2213 1.1 mrg
2214 1.1 mrg ENDF __prologue_saves__
2215 1.1 mrg #endif /* defined (L_prologue) */
2216 1.1 mrg
2217 1.1 mrg /*
2218 1.1 mrg * This is an epilogue subroutine
2219 1.1 mrg */
2220 1.1 mrg #if defined (L_epilogue)
2221 1.1 mrg
2222 1.1 mrg DEFUN __epilogue_restores__
2223 1.1 mrg ldd r2,Y+18
2224 1.1 mrg ldd r3,Y+17
2225 1.1 mrg ldd r4,Y+16
2226 1.1 mrg ldd r5,Y+15
2227 1.1 mrg ldd r6,Y+14
2228 1.1 mrg ldd r7,Y+13
2229 1.1 mrg ldd r8,Y+12
2230 1.1 mrg ldd r9,Y+11
2231 1.1 mrg ldd r10,Y+10
2232 1.1 mrg ldd r11,Y+9
2233 1.1 mrg ldd r12,Y+8
2234 1.1 mrg ldd r13,Y+7
2235 1.1 mrg ldd r14,Y+6
2236 1.1 mrg ldd r15,Y+5
2237 1.1 mrg ldd r16,Y+4
2238 1.1 mrg ldd r17,Y+3
2239 1.1 mrg ldd r26,Y+2
2240 1.1 mrg #if !defined (__AVR_HAVE_SPH__)
2241 1.1 mrg ldd r29,Y+1
2242 1.1 mrg add r28,r30
2243 1.1 mrg out __SP_L__,r28
2244 1.1 mrg mov r28, r26
2245 1.1 mrg #elif defined (__AVR_XMEGA__)
2246 1.1 mrg ldd r27,Y+1
2247 1.1 mrg add r28,r30
2248 1.1 mrg adc r29,__zero_reg__
2249 1.1 mrg out __SP_L__,r28
2250 1.1 mrg out __SP_H__,r29
2251 1.1 mrg wmov 28, 26
2252 1.1 mrg #else
2253 1.1 mrg ldd r27,Y+1
2254 1.1 mrg add r28,r30
2255 1.1 mrg adc r29,__zero_reg__
2256 1.1 mrg in __tmp_reg__,__SREG__
2257 1.1 mrg cli
2258 1.1 mrg out __SP_H__,r29
2259 1.1 mrg out __SREG__,__tmp_reg__
2260 1.1 mrg out __SP_L__,r28
2261 1.1 mrg mov_l r28, r26
2262 1.1.1.2 mrg mov_h r29, r27
2263 1.1 mrg #endif /* #SP = 8/16 */
2264 1.1 mrg ret
2265 1.1 mrg ENDF __epilogue_restores__
2266 1.1 mrg #endif /* defined (L_epilogue) */
2267 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2268 1.1 mrg
2269 1.1 mrg #ifdef L_exit
2270 1.1 mrg .section .fini9,"ax",@progbits
2271 1.1 mrg DEFUN _exit
2272 1.1 mrg .weak exit
2273 1.1 mrg exit:
2274 1.1 mrg ENDF _exit
2275 1.1 mrg
2276 1.1 mrg /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2277 1.1 mrg
2278 1.1 mrg .section .fini0,"ax",@progbits
2279 1.1 mrg cli
2280 1.1 mrg __stop_program:
2281 1.1 mrg rjmp __stop_program
2282 1.1 mrg #endif /* defined (L_exit) */
2283 1.1 mrg
2284 1.1 mrg #ifdef L_cleanup
2285 1.1 mrg .weak _cleanup
2286 1.1 mrg .func _cleanup
2287 1.1 mrg _cleanup:
2288 1.1 mrg ret
2289 1.1 mrg .endfunc
2290 1.1.1.2 mrg #endif /* defined (L_cleanup) */
2291 1.1 mrg
2292 1.1.1.2 mrg
2293 1.1.1.2 mrg .section .text.libgcc, "ax", @progbits
2295 1.1.1.2 mrg
2296 1.1.1.2 mrg #ifdef L_tablejump2
2297 1.1.1.2 mrg DEFUN __tablejump2__
2298 1.1.1.2 mrg lsl r30
2299 1.1.1.2 mrg rol r31
2300 1.1.1.2 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
2301 1.1.1.2 mrg ;; Word address of gs() jumptable entry in R24:Z
2302 1.1.1.2 mrg rol r24
2303 1.1 mrg out __RAMPZ__, r24
2304 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2305 1.1.1.2 mrg ;; Word address of jumptable entry in Z
2306 1.1.1.2 mrg clr __tmp_reg__
2307 1.1.1.2 mrg rol __tmp_reg__
2308 1.1.1.2 mrg out __RAMPZ__, __tmp_reg__
2309 1.1.1.2 mrg #endif
2310 1.1.1.2 mrg
2311 1.1.1.2 mrg ;; Read word address from jumptable and jump
2312 1.1.1.2 mrg
2313 1.1.1.2 mrg #if defined (__AVR_HAVE_ELPMX__)
2314 1.1.1.2 mrg elpm __tmp_reg__, Z+
2315 1.1.1.2 mrg elpm r31, Z
2316 1.1.1.2 mrg mov r30, __tmp_reg__
2317 1.1.1.2 mrg #ifdef __AVR_HAVE_RAMPD__
2318 1.1.1.2 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2319 1.1.1.2 mrg out __RAMPZ__, __zero_reg__
2320 1.1.1.2 mrg #endif /* RAMPD */
2321 1.1.1.2 mrg XIJMP
2322 1.1.1.2 mrg #elif defined (__AVR_HAVE_ELPM__)
2323 1.1.1.2 mrg elpm
2324 1.1.1.2 mrg push r0
2325 1.1.1.2 mrg adiw r30, 1
2326 1.1.1.2 mrg elpm
2327 1.1.1.2 mrg push r0
2328 1.1.1.2 mrg ret
2329 1.1.1.2 mrg #elif defined (__AVR_HAVE_LPMX__)
2330 1.1.1.2 mrg lpm __tmp_reg__, Z+
2331 1.1.1.2 mrg lpm r31, Z
2332 1.1.1.2 mrg mov r30, __tmp_reg__
2333 1.1.1.2 mrg ijmp
2334 1.1.1.2 mrg #elif defined (__AVR_TINY__)
2335 1.1.1.2 mrg wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2336 1.1.1.2 mrg ld __tmp_reg__, Z+
2337 1.1.1.2 mrg ld r31, Z ; Use ld instead of lpm to load Z
2338 1.1.1.2 mrg mov r30, __tmp_reg__
2339 1.1.1.2 mrg ijmp
2340 1.1.1.2 mrg #else
2341 1.1 mrg lpm
2342 1.1.1.2 mrg push r0
2343 1.1.1.2 mrg adiw r30, 1
2344 1.1 mrg lpm
2345 1.1.1.2 mrg push r0
2346 1.1.1.2 mrg ret
2347 1.1.1.2 mrg #endif
2348 1.1.1.2 mrg ENDF __tablejump2__
2349 1.1.1.2 mrg #endif /* L_tablejump2 */
2350 1.1.1.2 mrg
2351 1.1.1.2 mrg #if defined(__AVR_TINY__)
2352 1.1.1.2 mrg #ifdef L_copy_data
2353 1.1.1.2 mrg .section .init4,"ax",@progbits
2354 1.1.1.2 mrg .global __do_copy_data
2355 1.1.1.2 mrg __do_copy_data:
2356 1.1.1.2 mrg ldi r18, hi8(__data_end)
2357 1.1.1.2 mrg ldi r26, lo8(__data_start)
2358 1.1.1.2 mrg ldi r27, hi8(__data_start)
2359 1.1.1.2 mrg ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2360 1.1.1.2 mrg ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2361 1.1.1.2 mrg rjmp .L__do_copy_data_start
2362 1.1.1.2 mrg .L__do_copy_data_loop:
2363 1.1.1.2 mrg ld r19, z+
2364 1.1.1.2 mrg st X+, r19
2365 1.1 mrg .L__do_copy_data_start:
2366 1.1 mrg cpi r26, lo8(__data_end)
2367 1.1 mrg cpc r27, r18
2368 1.1 mrg brne .L__do_copy_data_loop
2369 1.1 mrg #endif
2370 1.1 mrg #else
2371 1.1 mrg #ifdef L_copy_data
2372 1.1 mrg .section .init4,"ax",@progbits
2373 1.1 mrg DEFUN __do_copy_data
2374 1.1 mrg #if defined(__AVR_HAVE_ELPMX__)
2375 1.1 mrg ldi r17, hi8(__data_end)
2376 1.1 mrg ldi r26, lo8(__data_start)
2377 1.1 mrg ldi r27, hi8(__data_start)
2378 1.1 mrg ldi r30, lo8(__data_load_start)
2379 1.1 mrg ldi r31, hi8(__data_load_start)
2380 1.1 mrg ldi r16, hh8(__data_load_start)
2381 1.1 mrg out __RAMPZ__, r16
2382 1.1 mrg rjmp .L__do_copy_data_start
2383 1.1 mrg .L__do_copy_data_loop:
2384 1.1 mrg elpm r0, Z+
2385 1.1 mrg st X+, r0
2386 1.1 mrg .L__do_copy_data_start:
2387 1.1 mrg cpi r26, lo8(__data_end)
2388 1.1 mrg cpc r27, r17
2389 1.1 mrg brne .L__do_copy_data_loop
2390 1.1 mrg #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2391 1.1 mrg ldi r17, hi8(__data_end)
2392 1.1 mrg ldi r26, lo8(__data_start)
2393 1.1 mrg ldi r27, hi8(__data_start)
2394 1.1 mrg ldi r30, lo8(__data_load_start)
2395 1.1 mrg ldi r31, hi8(__data_load_start)
2396 1.1 mrg ldi r16, hh8(__data_load_start - 0x10000)
2397 1.1 mrg .L__do_copy_data_carry:
2398 1.1 mrg inc r16
2399 1.1 mrg out __RAMPZ__, r16
2400 1.1 mrg rjmp .L__do_copy_data_start
2401 1.1 mrg .L__do_copy_data_loop:
2402 1.1 mrg elpm
2403 1.1 mrg st X+, r0
2404 1.1 mrg adiw r30, 1
2405 1.1 mrg brcs .L__do_copy_data_carry
2406 1.1 mrg .L__do_copy_data_start:
2407 1.1 mrg cpi r26, lo8(__data_end)
2408 1.1 mrg cpc r27, r17
2409 1.1 mrg brne .L__do_copy_data_loop
2410 1.1 mrg #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2411 1.1 mrg ldi r17, hi8(__data_end)
2412 1.1 mrg ldi r26, lo8(__data_start)
2413 1.1 mrg ldi r27, hi8(__data_start)
2414 1.1 mrg ldi r30, lo8(__data_load_start)
2415 1.1 mrg ldi r31, hi8(__data_load_start)
2416 1.1 mrg rjmp .L__do_copy_data_start
2417 1.1 mrg .L__do_copy_data_loop:
2418 1.1 mrg #if defined (__AVR_HAVE_LPMX__)
2419 1.1 mrg lpm r0, Z+
2420 1.1 mrg #else
2421 1.1 mrg lpm
2422 1.1 mrg adiw r30, 1
2423 1.1 mrg #endif
2424 1.1 mrg st X+, r0
2425 1.1 mrg .L__do_copy_data_start:
2426 1.1 mrg cpi r26, lo8(__data_end)
2427 1.1 mrg cpc r27, r17
2428 1.1 mrg brne .L__do_copy_data_loop
2429 1.1 mrg #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2430 1.1.1.2 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2431 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2432 1.1 mrg out __RAMPZ__, __zero_reg__
2433 1.1 mrg #endif /* ELPM && RAMPD */
2434 1.1 mrg ENDF __do_copy_data
2435 1.1 mrg #endif /* L_copy_data */
2436 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2437 1.1.1.2 mrg
2438 1.1 mrg /* __do_clear_bss is only necessary if there is anything in .bss section. */
2439 1.1 mrg
2440 1.1 mrg #ifdef L_clear_bss
2441 1.1 mrg .section .init4,"ax",@progbits
2442 1.1 mrg DEFUN __do_clear_bss
2443 1.1 mrg ldi r18, hi8(__bss_end)
2444 1.1 mrg ldi r26, lo8(__bss_start)
2445 1.1.1.2 mrg ldi r27, hi8(__bss_start)
2446 1.1 mrg rjmp .do_clear_bss_start
2447 1.1 mrg .do_clear_bss_loop:
2448 1.1 mrg st X+, __zero_reg__
2449 1.1 mrg .do_clear_bss_start:
2450 1.1 mrg cpi r26, lo8(__bss_end)
2451 1.1 mrg cpc r27, r18
2452 1.1 mrg brne .do_clear_bss_loop
2453 1.1.1.2 mrg ENDF __do_clear_bss
2454 1.1.1.2 mrg #endif /* L_clear_bss */
2455 1.1.1.2 mrg
2456 1.1.1.2 mrg /* __do_global_ctors and __do_global_dtors are only necessary
2457 1.1.1.2 mrg if there are any constructors/destructors. */
2458 1.1.1.2 mrg
2459 1.1 mrg #if defined(__AVR_TINY__)
2460 1.1 mrg #define cdtors_tst_reg r18
2461 1.1 mrg #else
2462 1.1.1.2 mrg #define cdtors_tst_reg r17
2463 1.1.1.2 mrg #endif
2464 1.1.1.2 mrg
2465 1.1.1.2 mrg #ifdef L_ctors
2466 1.1.1.2 mrg .section .init6,"ax",@progbits
2467 1.1.1.2 mrg DEFUN __do_global_ctors
2468 1.1.1.2 mrg ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2469 1.1 mrg ldi r28, pm_lo8(__ctors_end)
2470 1.1.1.2 mrg ldi r29, pm_hi8(__ctors_end)
2471 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2472 1.1.1.2 mrg ldi r16, pm_hh8(__ctors_end)
2473 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2474 1.1.1.2 mrg rjmp .L__do_global_ctors_start
2475 1.1.1.2 mrg .L__do_global_ctors_loop:
2476 1.1.1.2 mrg wsubi 28, 1
2477 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2478 1.1 mrg sbc r16, __zero_reg__
2479 1.1.1.2 mrg mov r24, r16
2480 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2481 1.1.1.2 mrg mov_h r31, r29
2482 1.1.1.2 mrg mov_l r30, r28
2483 1.1.1.2 mrg XCALL __tablejump2__
2484 1.1.1.2 mrg .L__do_global_ctors_start:
2485 1.1.1.2 mrg cpi r28, pm_lo8(__ctors_start)
2486 1.1 mrg cpc r29, cdtors_tst_reg
2487 1.1 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2488 1.1 mrg ldi r24, pm_hh8(__ctors_start)
2489 1.1 mrg cpc r16, r24
2490 1.1 mrg #endif /* HAVE_EIJMP */
2491 1.1 mrg brne .L__do_global_ctors_loop
2492 1.1.1.2 mrg ENDF __do_global_ctors
2493 1.1.1.2 mrg #endif /* L_ctors */
2494 1.1.1.2 mrg
2495 1.1.1.2 mrg #ifdef L_dtors
2496 1.1.1.2 mrg .section .fini6,"ax",@progbits
2497 1.1.1.2 mrg DEFUN __do_global_dtors
2498 1.1.1.2 mrg ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2499 1.1 mrg ldi r28, pm_lo8(__dtors_start)
2500 1.1.1.2 mrg ldi r29, pm_hi8(__dtors_start)
2501 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2502 1.1.1.2 mrg ldi r16, pm_hh8(__dtors_start)
2503 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2504 1.1.1.2 mrg rjmp .L__do_global_dtors_start
2505 1.1.1.2 mrg .L__do_global_dtors_loop:
2506 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2507 1.1.1.2 mrg mov r24, r16
2508 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2509 1.1.1.2 mrg mov_h r31, r29
2510 1.1 mrg mov_l r30, r28
2511 1.1.1.2 mrg XCALL __tablejump2__
2512 1.1.1.2 mrg waddi 28, 1
2513 1.1.1.2 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2514 1.1.1.2 mrg adc r16, __zero_reg__
2515 1.1.1.2 mrg #endif /* HAVE_EIJMP */
2516 1.1.1.2 mrg .L__do_global_dtors_start:
2517 1.1.1.2 mrg cpi r28, pm_lo8(__dtors_end)
2518 1.1 mrg cpc r29, cdtors_tst_reg
2519 1.1 mrg #ifdef __AVR_HAVE_EIJMP_EICALL__
2520 1.1 mrg ldi r24, pm_hh8(__dtors_end)
2521 1.1.1.2 mrg cpc r16, r24
2522 1.1 mrg #endif /* HAVE_EIJMP */
2523 1.1.1.2 mrg brne .L__do_global_dtors_loop
2524 1.1 mrg ENDF __do_global_dtors
2525 1.1.1.2 mrg #endif /* L_dtors */
2526 1.1 mrg
2527 1.1 mrg #undef cdtors_tst_reg
2528 1.1 mrg
2529 1.1 mrg .section .text.libgcc, "ax", @progbits
2530 1.1 mrg
2531 1.1 mrg #if !defined (__AVR_TINY__)
2532 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2533 1.1 mrg ;; Loading n bytes from Flash; n = 3,4
2534 1.1 mrg ;; R22... = Flash[Z]
2535 1.1 mrg ;; Clobbers: __tmp_reg__
2536 1.1 mrg
2537 1.1 mrg #if (defined (L_load_3) \
2538 1.1 mrg || defined (L_load_4)) \
2539 1.1 mrg && !defined (__AVR_HAVE_LPMX__)
2540 1.1 mrg
2541 1.1 mrg ;; Destination
2542 1.1 mrg #define D0 22
2543 1.1 mrg #define D1 D0+1
2544 1.1 mrg #define D2 D0+2
2545 1.1 mrg #define D3 D0+3
2546 1.1 mrg
2547 1.1 mrg .macro .load dest, n
2548 1.1 mrg lpm
2549 1.1 mrg mov \dest, r0
2550 1.1 mrg .if \dest != D0+\n-1
2551 1.1 mrg adiw r30, 1
2552 1.1 mrg .else
2553 1.1 mrg sbiw r30, \n-1
2554 1.1 mrg .endif
2555 1.1 mrg .endm
2556 1.1 mrg
2557 1.1 mrg #if defined (L_load_3)
2558 1.1 mrg DEFUN __load_3
2559 1.1 mrg push D3
2560 1.1 mrg XCALL __load_4
2561 1.1 mrg pop D3
2562 1.1 mrg ret
2563 1.1 mrg ENDF __load_3
2564 1.1 mrg #endif /* L_load_3 */
2565 1.1 mrg
2566 1.1 mrg #if defined (L_load_4)
2567 1.1 mrg DEFUN __load_4
2568 1.1 mrg .load D0, 4
2569 1.1 mrg .load D1, 4
2570 1.1 mrg .load D2, 4
2571 1.1.1.2 mrg .load D3, 4
2572 1.1 mrg ret
2573 1.1.1.2 mrg ENDF __load_4
2574 1.1 mrg #endif /* L_load_4 */
2575 1.1 mrg
2576 1.1 mrg #endif /* L_load_3 || L_load_3 */
2577 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2578 1.1 mrg
2579 1.1 mrg #if !defined (__AVR_TINY__)
2580 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2581 1.1 mrg ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2582 1.1 mrg ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2583 1.1 mrg ;; Clobbers: __tmp_reg__, R21, R30, R31
2584 1.1 mrg
2585 1.1 mrg #if (defined (L_xload_1) \
2586 1.1 mrg || defined (L_xload_2) \
2587 1.1 mrg || defined (L_xload_3) \
2588 1.1 mrg || defined (L_xload_4))
2589 1.1 mrg
2590 1.1 mrg ;; Destination
2591 1.1 mrg #define D0 22
2592 1.1 mrg #define D1 D0+1
2593 1.1 mrg #define D2 D0+2
2594 1.1 mrg #define D3 D0+3
2595 1.1 mrg
2596 1.1 mrg ;; Register containing bits 16+ of the address
2597 1.1 mrg
2598 1.1 mrg #define HHI8 21
2599 1.1 mrg
2600 1.1 mrg .macro .xload dest, n
2601 1.1 mrg #if defined (__AVR_HAVE_ELPMX__)
2602 1.1 mrg elpm \dest, Z+
2603 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2604 1.1 mrg elpm
2605 1.1 mrg mov \dest, r0
2606 1.1 mrg .if \dest != D0+\n-1
2607 1.1 mrg adiw r30, 1
2608 1.1 mrg adc HHI8, __zero_reg__
2609 1.1 mrg out __RAMPZ__, HHI8
2610 1.1 mrg .endif
2611 1.1 mrg #elif defined (__AVR_HAVE_LPMX__)
2612 1.1 mrg lpm \dest, Z+
2613 1.1 mrg #else
2614 1.1 mrg lpm
2615 1.1 mrg mov \dest, r0
2616 1.1 mrg .if \dest != D0+\n-1
2617 1.1 mrg adiw r30, 1
2618 1.1 mrg .endif
2619 1.1 mrg #endif
2620 1.1 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2621 1.1 mrg .if \dest == D0+\n-1
2622 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2623 1.1 mrg out __RAMPZ__, __zero_reg__
2624 1.1 mrg .endif
2625 1.1 mrg #endif
2626 1.1 mrg .endm ; .xload
2627 1.1 mrg
2628 1.1 mrg #if defined (L_xload_1)
2629 1.1 mrg DEFUN __xload_1
2630 1.1 mrg #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2631 1.1 mrg sbrc HHI8, 7
2632 1.1 mrg ld D0, Z
2633 1.1 mrg sbrs HHI8, 7
2634 1.1 mrg lpm D0, Z
2635 1.1 mrg ret
2636 1.1 mrg #else
2637 1.1 mrg sbrc HHI8, 7
2638 1.1 mrg rjmp 1f
2639 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2640 1.1 mrg out __RAMPZ__, HHI8
2641 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2642 1.1 mrg .xload D0, 1
2643 1.1 mrg ret
2644 1.1 mrg 1: ld D0, Z
2645 1.1 mrg ret
2646 1.1 mrg #endif /* LPMx && ! ELPM */
2647 1.1 mrg ENDF __xload_1
2648 1.1 mrg #endif /* L_xload_1 */
2649 1.1 mrg
2650 1.1 mrg #if defined (L_xload_2)
2651 1.1 mrg DEFUN __xload_2
2652 1.1 mrg sbrc HHI8, 7
2653 1.1 mrg rjmp 1f
2654 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2655 1.1 mrg out __RAMPZ__, HHI8
2656 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2657 1.1 mrg .xload D0, 2
2658 1.1 mrg .xload D1, 2
2659 1.1 mrg ret
2660 1.1 mrg 1: ld D0, Z+
2661 1.1 mrg ld D1, Z+
2662 1.1 mrg ret
2663 1.1 mrg ENDF __xload_2
2664 1.1 mrg #endif /* L_xload_2 */
2665 1.1 mrg
2666 1.1 mrg #if defined (L_xload_3)
2667 1.1 mrg DEFUN __xload_3
2668 1.1 mrg sbrc HHI8, 7
2669 1.1 mrg rjmp 1f
2670 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2671 1.1 mrg out __RAMPZ__, HHI8
2672 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2673 1.1 mrg .xload D0, 3
2674 1.1 mrg .xload D1, 3
2675 1.1 mrg .xload D2, 3
2676 1.1 mrg ret
2677 1.1 mrg 1: ld D0, Z+
2678 1.1 mrg ld D1, Z+
2679 1.1 mrg ld D2, Z+
2680 1.1 mrg ret
2681 1.1 mrg ENDF __xload_3
2682 1.1 mrg #endif /* L_xload_3 */
2683 1.1 mrg
2684 1.1 mrg #if defined (L_xload_4)
2685 1.1 mrg DEFUN __xload_4
2686 1.1 mrg sbrc HHI8, 7
2687 1.1 mrg rjmp 1f
2688 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2689 1.1 mrg out __RAMPZ__, HHI8
2690 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2691 1.1 mrg .xload D0, 4
2692 1.1 mrg .xload D1, 4
2693 1.1 mrg .xload D2, 4
2694 1.1 mrg .xload D3, 4
2695 1.1 mrg ret
2696 1.1 mrg 1: ld D0, Z+
2697 1.1 mrg ld D1, Z+
2698 1.1 mrg ld D2, Z+
2699 1.1.1.2 mrg ld D3, Z+
2700 1.1 mrg ret
2701 1.1.1.2 mrg ENDF __xload_4
2702 1.1 mrg #endif /* L_xload_4 */
2703 1.1 mrg
2704 1.1 mrg #endif /* L_xload_{1|2|3|4} */
2705 1.1 mrg #endif /* if !defined (__AVR_TINY__) */
2706 1.1 mrg
2707 1.1 mrg #if !defined (__AVR_TINY__)
2708 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2709 1.1 mrg ;; memcopy from Address Space __pgmx to RAM
2710 1.1 mrg ;; R23:Z = Source Address
2711 1.1 mrg ;; X = Destination Address
2712 1.1 mrg ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2713 1.1 mrg
2714 1.1 mrg #if defined (L_movmemx)
2715 1.1 mrg
2716 1.1 mrg #define HHI8 23
2717 1.1 mrg #define LOOP 24
2718 1.1 mrg
2719 1.1 mrg DEFUN __movmemx_qi
2720 1.1 mrg ;; #Bytes to copy fity in 8 Bits (1..255)
2721 1.1 mrg ;; Zero-extend Loop Counter
2722 1.1 mrg clr LOOP+1
2723 1.1 mrg ;; FALLTHRU
2724 1.1 mrg ENDF __movmemx_qi
2725 1.1 mrg
2726 1.1 mrg DEFUN __movmemx_hi
2727 1.1 mrg
2728 1.1 mrg ;; Read from where?
2729 1.1 mrg sbrc HHI8, 7
2730 1.1 mrg rjmp 1f
2731 1.1 mrg
2732 1.1 mrg ;; Read from Flash
2733 1.1 mrg
2734 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2735 1.1 mrg out __RAMPZ__, HHI8
2736 1.1 mrg #endif
2737 1.1 mrg
2738 1.1 mrg 0: ;; Load 1 Byte from Flash...
2739 1.1 mrg
2740 1.1 mrg #if defined (__AVR_HAVE_ELPMX__)
2741 1.1 mrg elpm r0, Z+
2742 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2743 1.1 mrg elpm
2744 1.1 mrg adiw r30, 1
2745 1.1 mrg adc HHI8, __zero_reg__
2746 1.1 mrg out __RAMPZ__, HHI8
2747 1.1 mrg #elif defined (__AVR_HAVE_LPMX__)
2748 1.1 mrg lpm r0, Z+
2749 1.1 mrg #else
2750 1.1 mrg lpm
2751 1.1 mrg adiw r30, 1
2752 1.1 mrg #endif
2753 1.1 mrg
2754 1.1 mrg ;; ...and store that Byte to RAM Destination
2755 1.1 mrg st X+, r0
2756 1.1 mrg sbiw LOOP, 1
2757 1.1 mrg brne 0b
2758 1.1 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2759 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2760 1.1 mrg out __RAMPZ__, __zero_reg__
2761 1.1 mrg #endif /* ELPM && RAMPD */
2762 1.1 mrg ret
2763 1.1 mrg
2764 1.1 mrg ;; Read from RAM
2765 1.1 mrg
2766 1.1 mrg 1: ;; Read 1 Byte from RAM...
2767 1.1 mrg ld r0, Z+
2768 1.1 mrg ;; and store that Byte to RAM Destination
2769 1.1 mrg st X+, r0
2770 1.1 mrg sbiw LOOP, 1
2771 1.1 mrg brne 1b
2772 1.1 mrg ret
2773 1.1.1.2 mrg ENDF __movmemx_hi
2774 1.1 mrg
2775 1.1 mrg #undef HHI8
2776 1.1 mrg #undef LOOP
2777 1.1 mrg
2778 1.1 mrg #endif /* L_movmemx */
2779 1.1 mrg #endif /* !defined (__AVR_TINY__) */
2780 1.1 mrg
2781 1.1 mrg
2782 1.1 mrg .section .text.libgcc.builtins, "ax", @progbits
2784 1.1 mrg
2785 1.1 mrg /**********************************
2786 1.1 mrg * Find first set Bit (ffs)
2787 1.1 mrg **********************************/
2788 1.1 mrg
2789 1.1 mrg #if defined (L_ffssi2)
2790 1.1 mrg ;; find first set bit
2791 1.1 mrg ;; r25:r24 = ffs32 (r25:r22)
2792 1.1 mrg ;; clobbers: r22, r26
2793 1.1 mrg DEFUN __ffssi2
2794 1.1 mrg clr r26
2795 1.1 mrg tst r22
2796 1.1 mrg brne 1f
2797 1.1 mrg subi r26, -8
2798 1.1 mrg or r22, r23
2799 1.1 mrg brne 1f
2800 1.1 mrg subi r26, -8
2801 1.1 mrg or r22, r24
2802 1.1 mrg brne 1f
2803 1.1 mrg subi r26, -8
2804 1.1 mrg or r22, r25
2805 1.1 mrg brne 1f
2806 1.1 mrg ret
2807 1.1 mrg 1: mov r24, r22
2808 1.1 mrg XJMP __loop_ffsqi2
2809 1.1 mrg ENDF __ffssi2
2810 1.1 mrg #endif /* defined (L_ffssi2) */
2811 1.1 mrg
2812 1.1 mrg #if defined (L_ffshi2)
2813 1.1 mrg ;; find first set bit
2814 1.1 mrg ;; r25:r24 = ffs16 (r25:r24)
2815 1.1 mrg ;; clobbers: r26
2816 1.1 mrg DEFUN __ffshi2
2817 1.1 mrg clr r26
2818 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2819 1.1 mrg ;; Some cores have problem skipping 2-word instruction
2820 1.1 mrg tst r24
2821 1.1 mrg breq 2f
2822 1.1 mrg #else
2823 1.1 mrg cpse r24, __zero_reg__
2824 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
2825 1.1 mrg 1: XJMP __loop_ffsqi2
2826 1.1 mrg 2: ldi r26, 8
2827 1.1 mrg or r24, r25
2828 1.1 mrg brne 1b
2829 1.1 mrg ret
2830 1.1 mrg ENDF __ffshi2
2831 1.1 mrg #endif /* defined (L_ffshi2) */
2832 1.1 mrg
2833 1.1 mrg #if defined (L_loop_ffsqi2)
2834 1.1 mrg ;; Helper for ffshi2, ffssi2
2835 1.1 mrg ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2836 1.1 mrg ;; r24 must be != 0
2837 1.1 mrg ;; clobbers: r26
2838 1.1 mrg DEFUN __loop_ffsqi2
2839 1.1 mrg inc r26
2840 1.1 mrg lsr r24
2841 1.1 mrg brcc __loop_ffsqi2
2842 1.1 mrg mov r24, r26
2843 1.1 mrg clr r25
2844 1.1 mrg ret
2845 1.1 mrg ENDF __loop_ffsqi2
2846 1.1 mrg #endif /* defined (L_loop_ffsqi2) */
2847 1.1 mrg
2848 1.1 mrg
2849 1.1 mrg /**********************************
2851 1.1 mrg * Count trailing Zeros (ctz)
2852 1.1 mrg **********************************/
2853 1.1 mrg
2854 1.1 mrg #if defined (L_ctzsi2)
2855 1.1 mrg ;; count trailing zeros
2856 1.1 mrg ;; r25:r24 = ctz32 (r25:r22)
2857 1.1 mrg ;; clobbers: r26, r22
2858 1.1 mrg ;; ctz(0) = 255
2859 1.1 mrg ;; Note that ctz(0) in undefined for GCC
2860 1.1 mrg DEFUN __ctzsi2
2861 1.1 mrg XCALL __ffssi2
2862 1.1 mrg dec r24
2863 1.1 mrg ret
2864 1.1 mrg ENDF __ctzsi2
2865 1.1 mrg #endif /* defined (L_ctzsi2) */
2866 1.1 mrg
2867 1.1 mrg #if defined (L_ctzhi2)
2868 1.1 mrg ;; count trailing zeros
2869 1.1 mrg ;; r25:r24 = ctz16 (r25:r24)
2870 1.1 mrg ;; clobbers: r26
2871 1.1 mrg ;; ctz(0) = 255
2872 1.1 mrg ;; Note that ctz(0) in undefined for GCC
2873 1.1 mrg DEFUN __ctzhi2
2874 1.1 mrg XCALL __ffshi2
2875 1.1 mrg dec r24
2876 1.1 mrg ret
2877 1.1 mrg ENDF __ctzhi2
2878 1.1 mrg #endif /* defined (L_ctzhi2) */
2879 1.1 mrg
2880 1.1 mrg
2881 1.1 mrg /**********************************
2883 1.1 mrg * Count leading Zeros (clz)
2884 1.1 mrg **********************************/
2885 1.1 mrg
2886 1.1 mrg #if defined (L_clzdi2)
2887 1.1 mrg ;; count leading zeros
2888 1.1 mrg ;; r25:r24 = clz64 (r25:r18)
2889 1.1 mrg ;; clobbers: r22, r23, r26
2890 1.1 mrg DEFUN __clzdi2
2891 1.1 mrg XCALL __clzsi2
2892 1.1 mrg sbrs r24, 5
2893 1.1 mrg ret
2894 1.1 mrg mov_l r22, r18
2895 1.1 mrg mov_h r23, r19
2896 1.1 mrg mov_l r24, r20
2897 1.1 mrg mov_h r25, r21
2898 1.1 mrg XCALL __clzsi2
2899 1.1 mrg subi r24, -32
2900 1.1 mrg ret
2901 1.1 mrg ENDF __clzdi2
2902 1.1 mrg #endif /* defined (L_clzdi2) */
2903 1.1 mrg
2904 1.1 mrg #if defined (L_clzsi2)
2905 1.1 mrg ;; count leading zeros
2906 1.1 mrg ;; r25:r24 = clz32 (r25:r22)
2907 1.1 mrg ;; clobbers: r26
2908 1.1 mrg DEFUN __clzsi2
2909 1.1 mrg XCALL __clzhi2
2910 1.1 mrg sbrs r24, 4
2911 1.1 mrg ret
2912 1.1 mrg mov_l r24, r22
2913 1.1 mrg mov_h r25, r23
2914 1.1 mrg XCALL __clzhi2
2915 1.1 mrg subi r24, -16
2916 1.1 mrg ret
2917 1.1 mrg ENDF __clzsi2
2918 1.1 mrg #endif /* defined (L_clzsi2) */
2919 1.1 mrg
2920 1.1 mrg #if defined (L_clzhi2)
2921 1.1 mrg ;; count leading zeros
2922 1.1 mrg ;; r25:r24 = clz16 (r25:r24)
2923 1.1 mrg ;; clobbers: r26
2924 1.1 mrg DEFUN __clzhi2
2925 1.1 mrg clr r26
2926 1.1 mrg tst r25
2927 1.1 mrg brne 1f
2928 1.1 mrg subi r26, -8
2929 1.1 mrg or r25, r24
2930 1.1 mrg brne 1f
2931 1.1 mrg ldi r24, 16
2932 1.1 mrg ret
2933 1.1 mrg 1: cpi r25, 16
2934 1.1 mrg brsh 3f
2935 1.1 mrg subi r26, -3
2936 1.1 mrg swap r25
2937 1.1 mrg 2: inc r26
2938 1.1 mrg 3: lsl r25
2939 1.1 mrg brcc 2b
2940 1.1 mrg mov r24, r26
2941 1.1 mrg clr r25
2942 1.1 mrg ret
2943 1.1 mrg ENDF __clzhi2
2944 1.1 mrg #endif /* defined (L_clzhi2) */
2945 1.1 mrg
2946 1.1 mrg
2947 1.1 mrg /**********************************
2949 1.1 mrg * Parity
2950 1.1 mrg **********************************/
2951 1.1 mrg
2952 1.1 mrg #if defined (L_paritydi2)
2953 1.1 mrg ;; r25:r24 = parity64 (r25:r18)
2954 1.1 mrg ;; clobbers: __tmp_reg__
2955 1.1 mrg DEFUN __paritydi2
2956 1.1 mrg eor r24, r18
2957 1.1 mrg eor r24, r19
2958 1.1 mrg eor r24, r20
2959 1.1 mrg eor r24, r21
2960 1.1 mrg XJMP __paritysi2
2961 1.1 mrg ENDF __paritydi2
2962 1.1 mrg #endif /* defined (L_paritydi2) */
2963 1.1 mrg
2964 1.1 mrg #if defined (L_paritysi2)
2965 1.1 mrg ;; r25:r24 = parity32 (r25:r22)
2966 1.1 mrg ;; clobbers: __tmp_reg__
2967 1.1 mrg DEFUN __paritysi2
2968 1.1 mrg eor r24, r22
2969 1.1 mrg eor r24, r23
2970 1.1 mrg XJMP __parityhi2
2971 1.1 mrg ENDF __paritysi2
2972 1.1 mrg #endif /* defined (L_paritysi2) */
2973 1.1 mrg
2974 1.1 mrg #if defined (L_parityhi2)
2975 1.1 mrg ;; r25:r24 = parity16 (r25:r24)
2976 1.1 mrg ;; clobbers: __tmp_reg__
2977 1.1 mrg DEFUN __parityhi2
2978 1.1 mrg eor r24, r25
2979 1.1 mrg ;; FALLTHRU
2980 1.1 mrg ENDF __parityhi2
2981 1.1 mrg
2982 1.1 mrg ;; r25:r24 = parity8 (r24)
2983 1.1 mrg ;; clobbers: __tmp_reg__
2984 1.1 mrg DEFUN __parityqi2
2985 1.1 mrg ;; parity is in r24[0..7]
2986 1.1 mrg mov __tmp_reg__, r24
2987 1.1 mrg swap __tmp_reg__
2988 1.1 mrg eor r24, __tmp_reg__
2989 1.1 mrg ;; parity is in r24[0..3]
2990 1.1 mrg subi r24, -4
2991 1.1 mrg andi r24, -5
2992 1.1 mrg subi r24, -6
2993 1.1 mrg ;; parity is in r24[0,3]
2994 1.1 mrg sbrc r24, 3
2995 1.1 mrg inc r24
2996 1.1 mrg ;; parity is in r24[0]
2997 1.1 mrg andi r24, 1
2998 1.1 mrg clr r25
2999 1.1 mrg ret
3000 1.1 mrg ENDF __parityqi2
3001 1.1 mrg #endif /* defined (L_parityhi2) */
3002 1.1 mrg
3003 1.1 mrg
3004 1.1 mrg /**********************************
3006 1.1 mrg * Population Count
3007 1.1 mrg **********************************/
3008 1.1 mrg
3009 1.1 mrg #if defined (L_popcounthi2)
3010 1.1 mrg ;; population count
3011 1.1 mrg ;; r25:r24 = popcount16 (r25:r24)
3012 1.1 mrg ;; clobbers: __tmp_reg__
3013 1.1 mrg DEFUN __popcounthi2
3014 1.1 mrg XCALL __popcountqi2
3015 1.1 mrg push r24
3016 1.1 mrg mov r24, r25
3017 1.1 mrg XCALL __popcountqi2
3018 1.1 mrg clr r25
3019 1.1 mrg ;; FALLTHRU
3020 1.1 mrg ENDF __popcounthi2
3021 1.1 mrg
3022 1.1 mrg DEFUN __popcounthi2_tail
3023 1.1 mrg pop __tmp_reg__
3024 1.1 mrg add r24, __tmp_reg__
3025 1.1 mrg ret
3026 1.1 mrg ENDF __popcounthi2_tail
3027 1.1 mrg #endif /* defined (L_popcounthi2) */
3028 1.1 mrg
3029 1.1 mrg #if defined (L_popcountsi2)
3030 1.1 mrg ;; population count
3031 1.1 mrg ;; r25:r24 = popcount32 (r25:r22)
3032 1.1 mrg ;; clobbers: __tmp_reg__
3033 1.1 mrg DEFUN __popcountsi2
3034 1.1 mrg XCALL __popcounthi2
3035 1.1 mrg push r24
3036 1.1 mrg mov_l r24, r22
3037 1.1 mrg mov_h r25, r23
3038 1.1 mrg XCALL __popcounthi2
3039 1.1 mrg XJMP __popcounthi2_tail
3040 1.1 mrg ENDF __popcountsi2
3041 1.1 mrg #endif /* defined (L_popcountsi2) */
3042 1.1 mrg
3043 1.1 mrg #if defined (L_popcountdi2)
3044 1.1 mrg ;; population count
3045 1.1 mrg ;; r25:r24 = popcount64 (r25:r18)
3046 1.1 mrg ;; clobbers: r22, r23, __tmp_reg__
3047 1.1 mrg DEFUN __popcountdi2
3048 1.1 mrg XCALL __popcountsi2
3049 1.1 mrg push r24
3050 1.1 mrg mov_l r22, r18
3051 1.1 mrg mov_h r23, r19
3052 1.1 mrg mov_l r24, r20
3053 1.1 mrg mov_h r25, r21
3054 1.1 mrg XCALL __popcountsi2
3055 1.1 mrg XJMP __popcounthi2_tail
3056 1.1 mrg ENDF __popcountdi2
3057 1.1 mrg #endif /* defined (L_popcountdi2) */
3058 1.1 mrg
3059 1.1 mrg #if defined (L_popcountqi2)
3060 1.1 mrg ;; population count
3061 1.1 mrg ;; r24 = popcount8 (r24)
3062 1.1 mrg ;; clobbers: __tmp_reg__
3063 1.1 mrg DEFUN __popcountqi2
3064 1.1 mrg mov __tmp_reg__, r24
3065 1.1 mrg andi r24, 1
3066 1.1 mrg lsr __tmp_reg__
3067 1.1 mrg lsr __tmp_reg__
3068 1.1 mrg adc r24, __zero_reg__
3069 1.1 mrg lsr __tmp_reg__
3070 1.1 mrg adc r24, __zero_reg__
3071 1.1 mrg lsr __tmp_reg__
3072 1.1 mrg adc r24, __zero_reg__
3073 1.1 mrg lsr __tmp_reg__
3074 1.1 mrg adc r24, __zero_reg__
3075 1.1 mrg lsr __tmp_reg__
3076 1.1 mrg adc r24, __zero_reg__
3077 1.1 mrg lsr __tmp_reg__
3078 1.1 mrg adc r24, __tmp_reg__
3079 1.1 mrg ret
3080 1.1 mrg ENDF __popcountqi2
3081 1.1 mrg #endif /* defined (L_popcountqi2) */
3082 1.1 mrg
3083 1.1 mrg
3084 1.1 mrg /**********************************
3086 1.1 mrg * Swap bytes
3087 1.1 mrg **********************************/
3088 1.1 mrg
3089 1.1 mrg ;; swap two registers with different register number
3090 1.1 mrg .macro bswap a, b
3091 1.1 mrg eor \a, \b
3092 1.1 mrg eor \b, \a
3093 1.1 mrg eor \a, \b
3094 1.1 mrg .endm
3095 1.1 mrg
3096 1.1 mrg #if defined (L_bswapsi2)
3097 1.1 mrg ;; swap bytes
3098 1.1 mrg ;; r25:r22 = bswap32 (r25:r22)
3099 1.1 mrg DEFUN __bswapsi2
3100 1.1 mrg bswap r22, r25
3101 1.1 mrg bswap r23, r24
3102 1.1 mrg ret
3103 1.1 mrg ENDF __bswapsi2
3104 1.1 mrg #endif /* defined (L_bswapsi2) */
3105 1.1 mrg
3106 1.1 mrg #if defined (L_bswapdi2)
3107 1.1 mrg ;; swap bytes
3108 1.1 mrg ;; r25:r18 = bswap64 (r25:r18)
3109 1.1 mrg DEFUN __bswapdi2
3110 1.1 mrg bswap r18, r25
3111 1.1 mrg bswap r19, r24
3112 1.1.1.4 mrg bswap r20, r23
3113 1.1.1.4 mrg bswap r21, r22
3114 1.1.1.4 mrg ret
3115 1.1 mrg ENDF __bswapdi2
3116 1.1 mrg #endif /* defined (L_bswapdi2) */
3117 1.1 mrg
3118 1.1.1.4 mrg
3119 1.1.1.4 mrg /**********************************
3121 1.1 mrg * 64-bit shifts
3122 1.1 mrg **********************************/
3123 1.1 mrg
3124 1.1 mrg #if defined (L_ashrdi3)
3125 1.1 mrg
3126 1.1.1.4 mrg #define SS __zero_reg__
3127 1.1.1.4 mrg
3128 1.1 mrg ;; Arithmetic shift right
3129 1.1 mrg ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3130 1.1 mrg DEFUN __ashrdi3
3131 1.1 mrg sbrc r25, 7
3132 1.1 mrg com SS
3133 1.1 mrg ;; FALLTHRU
3134 1.1 mrg ENDF __ashrdi3
3135 1.1 mrg
3136 1.1 mrg ;; Logic shift right
3137 1.1 mrg ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3138 1.1.1.4 mrg DEFUN __lshrdi3
3139 1.1 mrg ;; Signs are in SS (zero_reg)
3140 1.1.1.4 mrg mov __tmp_reg__, r16
3141 1.1 mrg 0: cpi r16, 8
3142 1.1 mrg brlo 2f
3143 1.1 mrg subi r16, 8
3144 1.1 mrg mov r18, r19
3145 1.1 mrg mov r19, r20
3146 1.1 mrg mov r20, r21
3147 1.1 mrg mov r21, r22
3148 1.1 mrg mov r22, r23
3149 1.1 mrg mov r23, r24
3150 1.1 mrg mov r24, r25
3151 1.1.1.4 mrg mov r25, SS
3152 1.1.1.4 mrg rjmp 0b
3153 1.1 mrg 1: asr SS
3154 1.1 mrg ror r25
3155 1.1.1.4 mrg ror r24
3156 1.1.1.4 mrg ror r23
3157 1.1.1.4 mrg ror r22
3158 1.1 mrg ror r21
3159 1.1 mrg ror r20
3160 1.1 mrg ror r19
3161 1.1 mrg ror r18
3162 1.1 mrg 2: dec r16
3163 1.1.1.4 mrg brpl 1b
3164 1.1 mrg clr __zero_reg__
3165 1.1.1.4 mrg mov r16, __tmp_reg__
3166 1.1 mrg ret
3167 1.1 mrg ENDF __lshrdi3
3168 1.1 mrg
3169 1.1 mrg #undef SS
3170 1.1 mrg
3171 1.1 mrg #endif /* defined (L_ashrdi3) */
3172 1.1 mrg
3173 1.1 mrg #if defined (L_ashldi3)
3174 1.1 mrg ;; Shift left
3175 1.1 mrg ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3176 1.1 mrg ;; This function does not clobber T.
3177 1.1 mrg DEFUN __ashldi3
3178 1.1 mrg mov __tmp_reg__, r16
3179 1.1 mrg 0: cpi r16, 8
3180 1.1 mrg brlo 2f
3181 1.1 mrg mov r25, r24
3182 1.1 mrg mov r24, r23
3183 1.1 mrg mov r23, r22
3184 1.1 mrg mov r22, r21
3185 1.1 mrg mov r21, r20
3186 1.1 mrg mov r20, r19
3187 1.1 mrg mov r19, r18
3188 1.1.1.4 mrg clr r18
3189 1.1 mrg subi r16, 8
3190 1.1 mrg rjmp 0b
3191 1.1 mrg 1: lsl r18
3192 1.1 mrg rol r19
3193 1.1 mrg rol r20
3194 1.1.1.4 mrg rol r21
3195 1.1 mrg rol r22
3196 1.1 mrg rol r23
3197 1.1 mrg rol r24
3198 1.1 mrg rol r25
3199 1.1 mrg 2: dec r16
3200 1.1 mrg brpl 1b
3201 1.1 mrg mov r16, __tmp_reg__
3202 1.1 mrg ret
3203 1.1 mrg ENDF __ashldi3
3204 1.1 mrg #endif /* defined (L_ashldi3) */
3205 1.1 mrg
3206 1.1 mrg #if defined (L_rotldi3)
3207 1.1 mrg ;; Rotate left
3208 1.1 mrg ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3209 1.1 mrg DEFUN __rotldi3
3210 1.1 mrg push r16
3211 1.1 mrg 0: cpi r16, 8
3212 1.1 mrg brlo 2f
3213 1.1 mrg subi r16, 8
3214 1.1 mrg mov __tmp_reg__, r25
3215 1.1 mrg mov r25, r24
3216 1.1 mrg mov r24, r23
3217 1.1 mrg mov r23, r22
3218 1.1 mrg mov r22, r21
3219 1.1 mrg mov r21, r20
3220 1.1 mrg mov r20, r19
3221 1.1 mrg mov r19, r18
3222 1.1 mrg mov r18, __tmp_reg__
3223 1.1 mrg rjmp 0b
3224 1.1 mrg 1: lsl r18
3225 1.1 mrg rol r19
3226 1.1 mrg rol r20
3227 1.1 mrg rol r21
3228 1.1 mrg rol r22
3229 1.1 mrg rol r23
3230 1.1 mrg rol r24
3231 1.1 mrg rol r25
3232 1.1 mrg adc r18, __zero_reg__
3233 1.1 mrg 2: dec r16
3234 1.1 mrg brpl 1b
3235 1.1 mrg pop r16
3236 1.1 mrg ret
3237 1.1 mrg ENDF __rotldi3
3238 1.1 mrg #endif /* defined (L_rotldi3) */
3239 1.1 mrg
3240 1.1 mrg
3241 1.1 mrg .section .text.libgcc.fmul, "ax", @progbits
3243 1.1 mrg
3244 1.1 mrg /***********************************************************/
3245 1.1 mrg ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3246 1.1 mrg ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3247 1.1 mrg /***********************************************************/
3248 1.1 mrg
3249 1.1 mrg #define A1 24
3250 1.1 mrg #define B1 25
3251 1.1 mrg #define C0 22
3252 1.1 mrg #define C1 23
3253 1.1 mrg #define A0 __tmp_reg__
3254 1.1 mrg
3255 1.1 mrg #ifdef L_fmuls
3256 1.1 mrg ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3257 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3258 1.1 mrg DEFUN __fmuls
3259 1.1 mrg ;; A0.7 = negate result?
3260 1.1 mrg mov A0, A1
3261 1.1 mrg eor A0, B1
3262 1.1 mrg ;; B1 = |B1|
3263 1.1 mrg sbrc B1, 7
3264 1.1 mrg neg B1
3265 1.1 mrg XJMP __fmulsu_exit
3266 1.1 mrg ENDF __fmuls
3267 1.1 mrg #endif /* L_fmuls */
3268 1.1 mrg
3269 1.1 mrg #ifdef L_fmulsu
3270 1.1 mrg ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3271 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3272 1.1 mrg DEFUN __fmulsu
3273 1.1 mrg ;; A0.7 = negate result?
3274 1.1 mrg mov A0, A1
3275 1.1 mrg ;; FALLTHRU
3276 1.1 mrg ENDF __fmulsu
3277 1.1 mrg
3278 1.1 mrg ;; Helper for __fmuls and __fmulsu
3279 1.1 mrg DEFUN __fmulsu_exit
3280 1.1 mrg ;; A1 = |A1|
3281 1.1 mrg sbrc A1, 7
3282 1.1 mrg neg A1
3283 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3284 1.1 mrg ;; Some cores have problem skipping 2-word instruction
3285 1.1 mrg tst A0
3286 1.1 mrg brmi 1f
3287 1.1 mrg #else
3288 1.1 mrg sbrs A0, 7
3289 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
3290 1.1 mrg XJMP __fmul
3291 1.1 mrg 1: XCALL __fmul
3292 1.1 mrg ;; C = -C iff A0.7 = 1
3293 1.1 mrg NEG2 C0
3294 1.1 mrg ret
3295 1.1 mrg ENDF __fmulsu_exit
3296 1.1 mrg #endif /* L_fmulsu */
3297 1.1 mrg
3298 1.1 mrg
3299 1.1 mrg #ifdef L_fmul
3300 1.1 mrg ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3301 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3302 1.1 mrg DEFUN __fmul
3303 1.1 mrg ; clear result
3304 1.1 mrg clr C0
3305 1.1 mrg clr C1
3306 1.1 mrg clr A0
3307 1.1 mrg 1: tst B1
3308 1.1 mrg ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3309 1.1 mrg 2: brpl 3f
3310 1.1 mrg ;; C += A
3311 1.1 mrg add C0, A0
3312 1.1 mrg adc C1, A1
3313 1.1 mrg 3: ;; A >>= 1
3314 1.1 mrg lsr A1
3315 1.1 mrg ror A0
3316 ;; B <<= 1
3317 lsl B1
3318 brne 2b
3319 ret
3320 ENDF __fmul
3321 #endif /* L_fmul */
3322
3323 #undef A0
3324 #undef A1
3325 #undef B1
3326 #undef C0
3327 #undef C1
3328
3329 #include "lib1funcs-fixed.S"
3330