lib1funcs.S revision 1.1.1.1 1 1.1 mrg /* -*- Mode: Asm -*- */
2 1.1 mrg /* Copyright (C) 1998-2013 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Denis Chertykov <chertykov (at) gmail.com>
4 1.1 mrg
5 1.1 mrg This file is free software; you can redistribute it and/or modify it
6 1.1 mrg under the terms of the GNU General Public License as published by the
7 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
8 1.1 mrg later version.
9 1.1 mrg
10 1.1 mrg This file is distributed in the hope that it will be useful, but
11 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 1.1 mrg General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #define __zero_reg__ r1
25 1.1 mrg #define __tmp_reg__ r0
26 1.1 mrg #define __SREG__ 0x3f
27 1.1 mrg #if defined (__AVR_HAVE_SPH__)
28 1.1 mrg #define __SP_H__ 0x3e
29 1.1 mrg #endif
30 1.1 mrg #define __SP_L__ 0x3d
31 1.1 mrg #define __RAMPZ__ 0x3B
32 1.1 mrg #define __EIND__ 0x3C
33 1.1 mrg
34 1.1 mrg /* Most of the functions here are called directly from avr.md
35 1.1 mrg patterns, instead of using the standard libcall mechanisms.
36 1.1 mrg This can make better code because GCC knows exactly which
37 1.1 mrg of the call-used registers (not all of them) are clobbered. */
38 1.1 mrg
39 1.1 mrg /* FIXME: At present, there is no SORT directive in the linker
40 1.1 mrg script so that we must not assume that different modules
41 1.1 mrg in the same input section like .libgcc.text.mul will be
42 1.1 mrg located close together. Therefore, we cannot use
43 1.1 mrg RCALL/RJMP to call a function like __udivmodhi4 from
44 1.1 mrg __divmodhi4 and have to use lengthy XCALL/XJMP even
45 1.1 mrg though they are in the same input section and all same
46 1.1 mrg input sections together are small enough to reach every
47 1.1 mrg location with a RCALL/RJMP instruction. */
48 1.1 mrg
49 1.1 mrg .macro mov_l r_dest, r_src
50 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
51 1.1 mrg movw \r_dest, \r_src
52 1.1 mrg #else
53 1.1 mrg mov \r_dest, \r_src
54 1.1 mrg #endif
55 1.1 mrg .endm
56 1.1 mrg
57 1.1 mrg .macro mov_h r_dest, r_src
58 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
59 1.1 mrg ; empty
60 1.1 mrg #else
61 1.1 mrg mov \r_dest, \r_src
62 1.1 mrg #endif
63 1.1 mrg .endm
64 1.1 mrg
65 1.1 mrg .macro wmov r_dest, r_src
66 1.1 mrg #if defined (__AVR_HAVE_MOVW__)
67 1.1 mrg movw \r_dest, \r_src
68 1.1 mrg #else
69 1.1 mrg mov \r_dest, \r_src
70 1.1 mrg mov \r_dest+1, \r_src+1
71 1.1 mrg #endif
72 1.1 mrg .endm
73 1.1 mrg
74 1.1 mrg #if defined (__AVR_HAVE_JMP_CALL__)
75 1.1 mrg #define XCALL call
76 1.1 mrg #define XJMP jmp
77 1.1 mrg #else
78 1.1 mrg #define XCALL rcall
79 1.1 mrg #define XJMP rjmp
80 1.1 mrg #endif
81 1.1 mrg
82 1.1 mrg ;; Prologue stuff
83 1.1 mrg
84 1.1 mrg .macro do_prologue_saves n_pushed n_frame=0
85 1.1 mrg ldi r26, lo8(\n_frame)
86 1.1 mrg ldi r27, hi8(\n_frame)
87 1.1 mrg ldi r30, lo8(gs(.L_prologue_saves.\@))
88 1.1 mrg ldi r31, hi8(gs(.L_prologue_saves.\@))
89 1.1 mrg XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
90 1.1 mrg .L_prologue_saves.\@:
91 1.1 mrg .endm
92 1.1 mrg
93 1.1 mrg ;; Epilogue stuff
94 1.1 mrg
95 1.1 mrg .macro do_epilogue_restores n_pushed n_frame=0
96 1.1 mrg in r28, __SP_L__
97 1.1 mrg #ifdef __AVR_HAVE_SPH__
98 1.1 mrg in r29, __SP_H__
99 1.1 mrg .if \n_frame > 63
100 1.1 mrg subi r28, lo8(-\n_frame)
101 1.1 mrg sbci r29, hi8(-\n_frame)
102 1.1 mrg .elseif \n_frame > 0
103 1.1 mrg adiw r28, \n_frame
104 1.1 mrg .endif
105 1.1 mrg #else
106 1.1 mrg clr r29
107 1.1 mrg .if \n_frame > 0
108 1.1 mrg subi r28, lo8(-\n_frame)
109 1.1 mrg .endif
110 1.1 mrg #endif /* HAVE SPH */
111 1.1 mrg ldi r30, \n_pushed
112 1.1 mrg XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
113 1.1 mrg .endm
114 1.1 mrg
115 1.1 mrg ;; Support function entry and exit for convenience
116 1.1 mrg
117 1.1 mrg .macro DEFUN name
118 1.1 mrg .global \name
119 1.1 mrg .func \name
120 1.1 mrg \name:
121 1.1 mrg .endm
122 1.1 mrg
123 1.1 mrg .macro ENDF name
124 1.1 mrg .size \name, .-\name
125 1.1 mrg .endfunc
126 1.1 mrg .endm
127 1.1 mrg
128 1.1 mrg .macro FALIAS name
129 1.1 mrg .global \name
130 1.1 mrg .func \name
131 1.1 mrg \name:
132 1.1 mrg .size \name, .-\name
133 1.1 mrg .endfunc
134 1.1 mrg .endm
135 1.1 mrg
136 1.1 mrg ;; Skip next instruction, typically a jump target
137 1.1 mrg #define skip cpse 0,0
138 1.1 mrg
139 1.1 mrg ;; Negate a 2-byte value held in consecutive registers
140 1.1 mrg .macro NEG2 reg
141 1.1 mrg com \reg+1
142 1.1 mrg neg \reg
143 1.1 mrg sbci \reg+1, -1
144 1.1 mrg .endm
145 1.1 mrg
146 1.1 mrg ;; Negate a 4-byte value held in consecutive registers
147 1.1 mrg ;; Sets the V flag for signed overflow tests if REG >= 16
148 1.1 mrg .macro NEG4 reg
149 1.1 mrg com \reg+3
150 1.1 mrg com \reg+2
151 1.1 mrg com \reg+1
152 1.1 mrg .if \reg >= 16
153 1.1 mrg neg \reg
154 1.1 mrg sbci \reg+1, -1
155 1.1 mrg sbci \reg+2, -1
156 1.1 mrg sbci \reg+3, -1
157 1.1 mrg .else
158 1.1 mrg com \reg
159 1.1 mrg adc \reg, __zero_reg__
160 1.1 mrg adc \reg+1, __zero_reg__
161 1.1 mrg adc \reg+2, __zero_reg__
162 1.1 mrg adc \reg+3, __zero_reg__
163 1.1 mrg .endif
164 1.1 mrg .endm
165 1.1 mrg
166 1.1 mrg #define exp_lo(N) hlo8 ((N) << 23)
167 1.1 mrg #define exp_hi(N) hhi8 ((N) << 23)
168 1.1 mrg
169 1.1 mrg
170 1.1 mrg .section .text.libgcc.mul, "ax", @progbits
172 1.1 mrg
173 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
174 1.1 mrg /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
175 1.1 mrg #if !defined (__AVR_HAVE_MUL__)
176 1.1 mrg /*******************************************************
177 1.1 mrg Multiplication 8 x 8 without MUL
178 1.1 mrg *******************************************************/
179 1.1 mrg #if defined (L_mulqi3)
180 1.1 mrg
181 1.1 mrg #define r_arg2 r22 /* multiplicand */
182 1.1 mrg #define r_arg1 r24 /* multiplier */
183 1.1 mrg #define r_res __tmp_reg__ /* result */
184 1.1 mrg
185 1.1 mrg DEFUN __mulqi3
186 1.1 mrg clr r_res ; clear result
187 1.1 mrg __mulqi3_loop:
188 1.1 mrg sbrc r_arg1,0
189 1.1 mrg add r_res,r_arg2
190 1.1 mrg add r_arg2,r_arg2 ; shift multiplicand
191 1.1 mrg breq __mulqi3_exit ; while multiplicand != 0
192 1.1 mrg lsr r_arg1 ;
193 1.1 mrg brne __mulqi3_loop ; exit if multiplier = 0
194 1.1 mrg __mulqi3_exit:
195 1.1 mrg mov r_arg1,r_res ; result to return register
196 1.1 mrg ret
197 1.1 mrg ENDF __mulqi3
198 1.1 mrg
199 1.1 mrg #undef r_arg2
200 1.1 mrg #undef r_arg1
201 1.1 mrg #undef r_res
202 1.1 mrg
203 1.1 mrg #endif /* defined (L_mulqi3) */
204 1.1 mrg
205 1.1 mrg
206 1.1 mrg /*******************************************************
207 1.1 mrg Widening Multiplication 16 = 8 x 8 without MUL
208 1.1 mrg Multiplication 16 x 16 without MUL
209 1.1 mrg *******************************************************/
210 1.1 mrg
211 1.1 mrg #define A0 r22
212 1.1 mrg #define A1 r23
213 1.1 mrg #define B0 r24
214 1.1 mrg #define BB0 r20
215 1.1 mrg #define B1 r25
216 1.1 mrg ;; Output overlaps input, thus expand result in CC0/1
217 1.1 mrg #define C0 r24
218 1.1 mrg #define C1 r25
219 1.1 mrg #define CC0 __tmp_reg__
220 1.1 mrg #define CC1 R21
221 1.1 mrg
222 1.1 mrg #if defined (L_umulqihi3)
223 1.1 mrg ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
224 1.1 mrg ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
225 1.1 mrg ;;; Clobbers: __tmp_reg__, R21..R23
226 1.1 mrg DEFUN __umulqihi3
227 1.1 mrg clr A1
228 1.1 mrg clr B1
229 1.1 mrg XJMP __mulhi3
230 1.1 mrg ENDF __umulqihi3
231 1.1 mrg #endif /* L_umulqihi3 */
232 1.1 mrg
233 1.1 mrg #if defined (L_mulqihi3)
234 1.1 mrg ;;; R25:R24 = (signed int) R22 * (signed int) R24
235 1.1 mrg ;;; (C1:C0) = (signed int) A0 * (signed int) B0
236 1.1 mrg ;;; Clobbers: __tmp_reg__, R20..R23
237 1.1 mrg DEFUN __mulqihi3
238 1.1 mrg ;; Sign-extend B0
239 1.1 mrg clr B1
240 1.1 mrg sbrc B0, 7
241 1.1 mrg com B1
242 1.1 mrg ;; The multiplication runs twice as fast if A1 is zero, thus:
243 1.1 mrg ;; Zero-extend A0
244 1.1 mrg clr A1
245 1.1 mrg #ifdef __AVR_HAVE_JMP_CALL__
246 1.1 mrg ;; Store B0 * sign of A
247 1.1 mrg clr BB0
248 1.1 mrg sbrc A0, 7
249 1.1 mrg mov BB0, B0
250 1.1 mrg call __mulhi3
251 1.1 mrg #else /* have no CALL */
252 1.1 mrg ;; Skip sign-extension of A if A >= 0
253 1.1 mrg ;; Same size as with the first alternative but avoids errata skip
254 1.1 mrg ;; and is faster if A >= 0
255 1.1 mrg sbrs A0, 7
256 1.1 mrg rjmp __mulhi3
257 1.1 mrg ;; If A < 0 store B
258 1.1 mrg mov BB0, B0
259 1.1 mrg rcall __mulhi3
260 1.1 mrg #endif /* HAVE_JMP_CALL */
261 1.1 mrg ;; 1-extend A after the multiplication
262 1.1 mrg sub C1, BB0
263 1.1 mrg ret
264 1.1 mrg ENDF __mulqihi3
265 1.1 mrg #endif /* L_mulqihi3 */
266 1.1 mrg
267 1.1 mrg #if defined (L_mulhi3)
268 1.1 mrg ;;; R25:R24 = R23:R22 * R25:R24
269 1.1 mrg ;;; (C1:C0) = (A1:A0) * (B1:B0)
270 1.1 mrg ;;; Clobbers: __tmp_reg__, R21..R23
271 1.1 mrg DEFUN __mulhi3
272 1.1 mrg
273 1.1 mrg ;; Clear result
274 1.1 mrg clr CC0
275 1.1 mrg clr CC1
276 1.1 mrg rjmp 3f
277 1.1 mrg 1:
278 1.1 mrg ;; Bit n of A is 1 --> C += B << n
279 1.1 mrg add CC0, B0
280 1.1 mrg adc CC1, B1
281 1.1 mrg 2:
282 1.1 mrg lsl B0
283 1.1 mrg rol B1
284 1.1 mrg 3:
285 1.1 mrg ;; If B == 0 we are ready
286 1.1 mrg sbiw B0, 0
287 1.1 mrg breq 9f
288 1.1 mrg
289 1.1 mrg ;; Carry = n-th bit of A
290 1.1 mrg lsr A1
291 1.1 mrg ror A0
292 1.1 mrg ;; If bit n of A is set, then go add B * 2^n to C
293 1.1 mrg brcs 1b
294 1.1 mrg
295 1.1 mrg ;; Carry = 0 --> The ROR above acts like CP A0, 0
296 1.1 mrg ;; Thus, it is sufficient to CPC the high part to test A against 0
297 1.1 mrg cpc A1, __zero_reg__
298 1.1 mrg ;; Only proceed if A != 0
299 1.1 mrg brne 2b
300 1.1 mrg 9:
301 1.1 mrg ;; Move Result into place
302 1.1 mrg mov C0, CC0
303 1.1 mrg mov C1, CC1
304 1.1 mrg ret
305 1.1 mrg ENDF __mulhi3
306 1.1 mrg #endif /* L_mulhi3 */
307 1.1 mrg
308 1.1 mrg #undef A0
309 1.1 mrg #undef A1
310 1.1 mrg #undef B0
311 1.1 mrg #undef BB0
312 1.1 mrg #undef B1
313 1.1 mrg #undef C0
314 1.1 mrg #undef C1
315 1.1 mrg #undef CC0
316 1.1 mrg #undef CC1
317 1.1 mrg
318 1.1 mrg
319 1.1 mrg #define A0 22
321 1.1 mrg #define A1 A0+1
322 1.1 mrg #define A2 A0+2
323 1.1 mrg #define A3 A0+3
324 1.1 mrg
325 1.1 mrg #define B0 18
326 1.1 mrg #define B1 B0+1
327 1.1 mrg #define B2 B0+2
328 1.1 mrg #define B3 B0+3
329 1.1 mrg
330 1.1 mrg #define CC0 26
331 1.1 mrg #define CC1 CC0+1
332 1.1 mrg #define CC2 30
333 1.1 mrg #define CC3 CC2+1
334 1.1 mrg
335 1.1 mrg #define C0 22
336 1.1 mrg #define C1 C0+1
337 1.1 mrg #define C2 C0+2
338 1.1 mrg #define C3 C0+3
339 1.1 mrg
340 1.1 mrg /*******************************************************
341 1.1 mrg Widening Multiplication 32 = 16 x 16 without MUL
342 1.1 mrg *******************************************************/
343 1.1 mrg
344 1.1 mrg #if defined (L_umulhisi3)
345 1.1 mrg DEFUN __umulhisi3
346 1.1 mrg wmov B0, 24
347 1.1 mrg ;; Zero-extend B
348 1.1 mrg clr B2
349 1.1 mrg clr B3
350 1.1 mrg ;; Zero-extend A
351 1.1 mrg wmov A2, B2
352 1.1 mrg XJMP __mulsi3
353 1.1 mrg ENDF __umulhisi3
354 1.1 mrg #endif /* L_umulhisi3 */
355 1.1 mrg
356 1.1 mrg #if defined (L_mulhisi3)
357 1.1 mrg DEFUN __mulhisi3
358 1.1 mrg wmov B0, 24
359 1.1 mrg ;; Sign-extend B
360 1.1 mrg lsl r25
361 1.1 mrg sbc B2, B2
362 1.1 mrg mov B3, B2
363 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
364 1.1 mrg ;; Sign-extend A
365 1.1 mrg clr A2
366 1.1 mrg sbrc A1, 7
367 1.1 mrg com A2
368 1.1 mrg mov A3, A2
369 1.1 mrg XJMP __mulsi3
370 1.1 mrg #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
371 1.1 mrg ;; Zero-extend A and __mulsi3 will run at least twice as fast
372 1.1 mrg ;; compared to a sign-extended A.
373 1.1 mrg clr A2
374 1.1 mrg clr A3
375 1.1 mrg sbrs A1, 7
376 1.1 mrg XJMP __mulsi3
377 1.1 mrg ;; If A < 0 then perform the B * 0xffff.... before the
378 1.1 mrg ;; very multiplication by initializing the high part of the
379 1.1 mrg ;; result CC with -B.
380 1.1 mrg wmov CC2, A2
381 1.1 mrg sub CC2, B0
382 1.1 mrg sbc CC3, B1
383 1.1 mrg XJMP __mulsi3_helper
384 1.1 mrg #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
385 1.1 mrg ENDF __mulhisi3
386 1.1 mrg #endif /* L_mulhisi3 */
387 1.1 mrg
388 1.1 mrg
389 1.1 mrg /*******************************************************
390 1.1 mrg Multiplication 32 x 32 without MUL
391 1.1 mrg *******************************************************/
392 1.1 mrg
393 1.1 mrg #if defined (L_mulsi3)
394 1.1 mrg DEFUN __mulsi3
395 1.1 mrg ;; Clear result
396 1.1 mrg clr CC2
397 1.1 mrg clr CC3
398 1.1 mrg ;; FALLTHRU
399 1.1 mrg ENDF __mulsi3
400 1.1 mrg
401 1.1 mrg DEFUN __mulsi3_helper
402 1.1 mrg clr CC0
403 1.1 mrg clr CC1
404 1.1 mrg rjmp 3f
405 1.1 mrg
406 1.1 mrg 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
407 1.1 mrg ;; CC += B
408 1.1 mrg add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
409 1.1 mrg
410 1.1 mrg 2: ;; B <<= 1
411 1.1 mrg lsl B0 $ rol B1 $ rol B2 $ rol B3
412 1.1 mrg
413 1.1 mrg 3: ;; A >>= 1: Carry = n-th bit of A
414 1.1 mrg lsr A3 $ ror A2 $ ror A1 $ ror A0
415 1.1 mrg
416 1.1 mrg brcs 1b
417 1.1 mrg ;; Only continue if A != 0
418 1.1 mrg sbci A1, 0
419 1.1 mrg brne 2b
420 1.1 mrg sbiw A2, 0
421 1.1 mrg brne 2b
422 1.1 mrg
423 1.1 mrg ;; All bits of A are consumed: Copy result to return register C
424 1.1 mrg wmov C0, CC0
425 1.1 mrg wmov C2, CC2
426 1.1 mrg ret
427 1.1 mrg ENDF __mulsi3_helper
428 1.1 mrg #endif /* L_mulsi3 */
429 1.1 mrg
430 1.1 mrg #undef A0
431 1.1 mrg #undef A1
432 1.1 mrg #undef A2
433 1.1 mrg #undef A3
434 1.1 mrg #undef B0
435 1.1 mrg #undef B1
436 1.1 mrg #undef B2
437 1.1 mrg #undef B3
438 1.1 mrg #undef C0
439 1.1 mrg #undef C1
440 1.1 mrg #undef C2
441 1.1 mrg #undef C3
442 1.1 mrg #undef CC0
443 1.1 mrg #undef CC1
444 1.1 mrg #undef CC2
445 1.1 mrg #undef CC3
446 1.1 mrg
447 1.1 mrg #endif /* !defined (__AVR_HAVE_MUL__) */
448 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
449 1.1 mrg
450 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
452 1.1 mrg #if defined (__AVR_HAVE_MUL__)
453 1.1 mrg #define A0 26
454 1.1 mrg #define B0 18
455 1.1 mrg #define C0 22
456 1.1 mrg
457 1.1 mrg #define A1 A0+1
458 1.1 mrg
459 1.1 mrg #define B1 B0+1
460 1.1 mrg #define B2 B0+2
461 1.1 mrg #define B3 B0+3
462 1.1 mrg
463 1.1 mrg #define C1 C0+1
464 1.1 mrg #define C2 C0+2
465 1.1 mrg #define C3 C0+3
466 1.1 mrg
467 1.1 mrg /*******************************************************
468 1.1 mrg Widening Multiplication 32 = 16 x 16 with MUL
469 1.1 mrg *******************************************************/
470 1.1 mrg
471 1.1 mrg #if defined (L_mulhisi3)
472 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
473 1.1 mrg ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
474 1.1 mrg ;;; Clobbers: __tmp_reg__
475 1.1 mrg DEFUN __mulhisi3
476 1.1 mrg XCALL __umulhisi3
477 1.1 mrg ;; Sign-extend B
478 1.1 mrg tst B1
479 1.1 mrg brpl 1f
480 1.1 mrg sub C2, A0
481 1.1 mrg sbc C3, A1
482 1.1 mrg 1: ;; Sign-extend A
483 1.1 mrg XJMP __usmulhisi3_tail
484 1.1 mrg ENDF __mulhisi3
485 1.1 mrg #endif /* L_mulhisi3 */
486 1.1 mrg
487 1.1 mrg #if defined (L_usmulhisi3)
488 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
489 1.1 mrg ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
490 1.1 mrg ;;; Clobbers: __tmp_reg__
491 1.1 mrg DEFUN __usmulhisi3
492 1.1 mrg XCALL __umulhisi3
493 1.1 mrg ;; FALLTHRU
494 1.1 mrg ENDF __usmulhisi3
495 1.1 mrg
496 1.1 mrg DEFUN __usmulhisi3_tail
497 1.1 mrg ;; Sign-extend A
498 1.1 mrg sbrs A1, 7
499 1.1 mrg ret
500 1.1 mrg sub C2, B0
501 1.1 mrg sbc C3, B1
502 1.1 mrg ret
503 1.1 mrg ENDF __usmulhisi3_tail
504 1.1 mrg #endif /* L_usmulhisi3 */
505 1.1 mrg
506 1.1 mrg #if defined (L_umulhisi3)
507 1.1 mrg ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
508 1.1 mrg ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
509 1.1 mrg ;;; Clobbers: __tmp_reg__
510 1.1 mrg DEFUN __umulhisi3
511 1.1 mrg mul A0, B0
512 1.1 mrg movw C0, r0
513 1.1 mrg mul A1, B1
514 1.1 mrg movw C2, r0
515 1.1 mrg mul A0, B1
516 1.1 mrg #ifdef __AVR_HAVE_JMP_CALL__
517 1.1 mrg ;; This function is used by many other routines, often multiple times.
518 1.1 mrg ;; Therefore, if the flash size is not too limited, avoid the RCALL
519 1.1 mrg ;; and inverst 6 Bytes to speed things up.
520 1.1 mrg add C1, r0
521 1.1 mrg adc C2, r1
522 1.1 mrg clr __zero_reg__
523 1.1 mrg adc C3, __zero_reg__
524 1.1 mrg #else
525 1.1 mrg rcall 1f
526 1.1 mrg #endif
527 1.1 mrg mul A1, B0
528 1.1 mrg 1: add C1, r0
529 1.1 mrg adc C2, r1
530 1.1 mrg clr __zero_reg__
531 1.1 mrg adc C3, __zero_reg__
532 1.1 mrg ret
533 1.1 mrg ENDF __umulhisi3
534 1.1 mrg #endif /* L_umulhisi3 */
535 1.1 mrg
536 1.1 mrg /*******************************************************
537 1.1 mrg Widening Multiplication 32 = 16 x 32 with MUL
538 1.1 mrg *******************************************************/
539 1.1 mrg
540 1.1 mrg #if defined (L_mulshisi3)
541 1.1 mrg ;;; R25:R22 = (signed long) R27:R26 * R21:R18
542 1.1 mrg ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
543 1.1 mrg ;;; Clobbers: __tmp_reg__
544 1.1 mrg DEFUN __mulshisi3
545 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
546 1.1 mrg ;; Some cores have problem skipping 2-word instruction
547 1.1 mrg tst A1
548 1.1 mrg brmi __mulohisi3
549 1.1 mrg #else
550 1.1 mrg sbrs A1, 7
551 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
552 1.1 mrg XJMP __muluhisi3
553 1.1 mrg ;; FALLTHRU
554 1.1 mrg ENDF __mulshisi3
555 1.1 mrg
556 1.1 mrg ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
557 1.1 mrg ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
558 1.1 mrg ;;; Clobbers: __tmp_reg__
559 1.1 mrg DEFUN __mulohisi3
560 1.1 mrg XCALL __muluhisi3
561 1.1 mrg ;; One-extend R27:R26 (A1:A0)
562 1.1 mrg sub C2, B0
563 1.1 mrg sbc C3, B1
564 1.1 mrg ret
565 1.1 mrg ENDF __mulohisi3
566 1.1 mrg #endif /* L_mulshisi3 */
567 1.1 mrg
568 1.1 mrg #if defined (L_muluhisi3)
569 1.1 mrg ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
570 1.1 mrg ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
571 1.1 mrg ;;; Clobbers: __tmp_reg__
572 1.1 mrg DEFUN __muluhisi3
573 1.1 mrg XCALL __umulhisi3
574 1.1 mrg mul A0, B3
575 1.1 mrg add C3, r0
576 1.1 mrg mul A1, B2
577 1.1 mrg add C3, r0
578 1.1 mrg mul A0, B2
579 1.1 mrg add C2, r0
580 1.1 mrg adc C3, r1
581 1.1 mrg clr __zero_reg__
582 1.1 mrg ret
583 1.1 mrg ENDF __muluhisi3
584 1.1 mrg #endif /* L_muluhisi3 */
585 1.1 mrg
586 1.1 mrg /*******************************************************
587 1.1 mrg Multiplication 32 x 32 with MUL
588 1.1 mrg *******************************************************/
589 1.1 mrg
590 1.1 mrg #if defined (L_mulsi3)
591 1.1 mrg ;;; R25:R22 = R25:R22 * R21:R18
592 1.1 mrg ;;; (C3:C0) = C3:C0 * B3:B0
593 1.1 mrg ;;; Clobbers: R26, R27, __tmp_reg__
594 1.1 mrg DEFUN __mulsi3
595 1.1 mrg movw A0, C0
596 1.1 mrg push C2
597 1.1 mrg push C3
598 1.1 mrg XCALL __muluhisi3
599 1.1 mrg pop A1
600 1.1 mrg pop A0
601 1.1 mrg ;; A1:A0 now contains the high word of A
602 1.1 mrg mul A0, B0
603 1.1 mrg add C2, r0
604 1.1 mrg adc C3, r1
605 1.1 mrg mul A0, B1
606 1.1 mrg add C3, r0
607 1.1 mrg mul A1, B0
608 1.1 mrg add C3, r0
609 1.1 mrg clr __zero_reg__
610 1.1 mrg ret
611 1.1 mrg ENDF __mulsi3
612 1.1 mrg #endif /* L_mulsi3 */
613 1.1 mrg
614 1.1 mrg #undef A0
615 1.1 mrg #undef A1
616 1.1 mrg
617 1.1 mrg #undef B0
618 1.1 mrg #undef B1
619 1.1 mrg #undef B2
620 1.1 mrg #undef B3
621 1.1 mrg
622 1.1 mrg #undef C0
623 1.1 mrg #undef C1
624 1.1 mrg #undef C2
625 1.1 mrg #undef C3
626 1.1 mrg
627 1.1 mrg #endif /* __AVR_HAVE_MUL__ */
628 1.1 mrg
629 1.1 mrg /*******************************************************
630 1.1 mrg Multiplication 24 x 24 with MUL
631 1.1 mrg *******************************************************/
632 1.1 mrg
633 1.1 mrg #if defined (L_mulpsi3)
634 1.1 mrg
635 1.1 mrg ;; A[0..2]: In: Multiplicand; Out: Product
636 1.1 mrg #define A0 22
637 1.1 mrg #define A1 A0+1
638 1.1 mrg #define A2 A0+2
639 1.1 mrg
640 1.1 mrg ;; B[0..2]: In: Multiplier
641 1.1 mrg #define B0 18
642 1.1 mrg #define B1 B0+1
643 1.1 mrg #define B2 B0+2
644 1.1 mrg
645 1.1 mrg #if defined (__AVR_HAVE_MUL__)
646 1.1 mrg
647 1.1 mrg ;; C[0..2]: Expand Result
648 1.1 mrg #define C0 22
649 1.1 mrg #define C1 C0+1
650 1.1 mrg #define C2 C0+2
651 1.1 mrg
652 1.1 mrg ;; R24:R22 *= R20:R18
653 1.1 mrg ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
654 1.1 mrg
655 1.1 mrg #define AA0 26
656 1.1 mrg #define AA2 21
657 1.1 mrg
658 1.1 mrg DEFUN __mulpsi3
659 1.1 mrg wmov AA0, A0
660 1.1 mrg mov AA2, A2
661 1.1 mrg XCALL __umulhisi3
662 1.1 mrg mul AA2, B0 $ add C2, r0
663 1.1 mrg mul AA0, B2 $ add C2, r0
664 1.1 mrg clr __zero_reg__
665 1.1 mrg ret
666 1.1 mrg ENDF __mulpsi3
667 1.1 mrg
668 1.1 mrg #undef AA2
669 1.1 mrg #undef AA0
670 1.1 mrg
671 1.1 mrg #undef C2
672 1.1 mrg #undef C1
673 1.1 mrg #undef C0
674 1.1 mrg
675 1.1 mrg #else /* !HAVE_MUL */
676 1.1 mrg
677 1.1 mrg ;; C[0..2]: Expand Result
678 1.1 mrg #define C0 0
679 1.1 mrg #define C1 C0+1
680 1.1 mrg #define C2 21
681 1.1 mrg
682 1.1 mrg ;; R24:R22 *= R20:R18
683 1.1 mrg ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
684 1.1 mrg
685 1.1 mrg DEFUN __mulpsi3
686 1.1 mrg
687 1.1 mrg ;; C[] = 0
688 1.1 mrg clr __tmp_reg__
689 1.1 mrg clr C2
690 1.1 mrg
691 1.1 mrg 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
692 1.1 mrg LSR B2 $ ror B1 $ ror B0
693 1.1 mrg
694 1.1 mrg ;; If the N-th Bit of B[] was set...
695 1.1 mrg brcc 1f
696 1.1 mrg
697 1.1 mrg ;; ...then add A[] * 2^N to the Result C[]
698 1.1 mrg ADD C0,A0 $ adc C1,A1 $ adc C2,A2
699 1.1 mrg
700 1.1 mrg 1: ;; Multiply A[] by 2
701 1.1 mrg LSL A0 $ rol A1 $ rol A2
702 1.1 mrg
703 1.1 mrg ;; Loop until B[] is 0
704 1.1 mrg subi B0,0 $ sbci B1,0 $ sbci B2,0
705 1.1 mrg brne 0b
706 1.1 mrg
707 1.1 mrg ;; Copy C[] to the return Register A[]
708 1.1 mrg wmov A0, C0
709 1.1 mrg mov A2, C2
710 1.1 mrg
711 1.1 mrg clr __zero_reg__
712 1.1 mrg ret
713 1.1 mrg ENDF __mulpsi3
714 1.1 mrg
715 1.1 mrg #undef C2
716 1.1 mrg #undef C1
717 1.1 mrg #undef C0
718 1.1 mrg
719 1.1 mrg #endif /* HAVE_MUL */
720 1.1 mrg
721 1.1 mrg #undef B2
722 1.1 mrg #undef B1
723 1.1 mrg #undef B0
724 1.1 mrg
725 1.1 mrg #undef A2
726 1.1 mrg #undef A1
727 1.1 mrg #undef A0
728 1.1 mrg
729 1.1 mrg #endif /* L_mulpsi3 */
730 1.1 mrg
731 1.1 mrg #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
732 1.1 mrg
733 1.1 mrg ;; A[0..2]: In: Multiplicand
734 1.1 mrg #define A0 22
735 1.1 mrg #define A1 A0+1
736 1.1 mrg #define A2 A0+2
737 1.1 mrg
738 1.1 mrg ;; BB: In: Multiplier
739 1.1 mrg #define BB 25
740 1.1 mrg
741 1.1 mrg ;; C[0..2]: Result
742 1.1 mrg #define C0 18
743 1.1 mrg #define C1 C0+1
744 1.1 mrg #define C2 C0+2
745 1.1 mrg
746 1.1 mrg ;; C[] = A[] * sign_extend (BB)
747 1.1 mrg DEFUN __mulsqipsi3
748 1.1 mrg mul A0, BB
749 1.1 mrg movw C0, r0
750 1.1 mrg mul A2, BB
751 1.1 mrg mov C2, r0
752 1.1 mrg mul A1, BB
753 1.1 mrg add C1, r0
754 1.1 mrg adc C2, r1
755 1.1 mrg clr __zero_reg__
756 1.1 mrg sbrs BB, 7
757 1.1 mrg ret
758 1.1 mrg ;; One-extend BB
759 1.1 mrg sub C1, A0
760 1.1 mrg sbc C2, A1
761 1.1 mrg ret
762 1.1 mrg ENDF __mulsqipsi3
763 1.1 mrg
764 1.1 mrg #undef C2
765 1.1 mrg #undef C1
766 1.1 mrg #undef C0
767 1.1 mrg
768 1.1 mrg #undef BB
769 1.1 mrg
770 1.1 mrg #undef A2
771 1.1 mrg #undef A1
772 1.1 mrg #undef A0
773 1.1 mrg
774 1.1 mrg #endif /* L_mulsqipsi3 && HAVE_MUL */
775 1.1 mrg
776 1.1 mrg /*******************************************************
777 1.1 mrg Multiplication 64 x 64
778 1.1 mrg *******************************************************/
779 1.1 mrg
780 1.1 mrg ;; A[] = A[] * B[]
781 1.1 mrg
782 1.1 mrg ;; A[0..7]: In: Multiplicand
783 1.1 mrg ;; Out: Product
784 1.1 mrg #define A0 18
785 1.1 mrg #define A1 A0+1
786 1.1 mrg #define A2 A0+2
787 1.1 mrg #define A3 A0+3
788 1.1 mrg #define A4 A0+4
789 1.1 mrg #define A5 A0+5
790 1.1 mrg #define A6 A0+6
791 1.1 mrg #define A7 A0+7
792 1.1 mrg
793 1.1 mrg ;; B[0..7]: In: Multiplier
794 1.1 mrg #define B0 10
795 1.1 mrg #define B1 B0+1
796 1.1 mrg #define B2 B0+2
797 1.1 mrg #define B3 B0+3
798 1.1 mrg #define B4 B0+4
799 1.1 mrg #define B5 B0+5
800 1.1 mrg #define B6 B0+6
801 1.1 mrg #define B7 B0+7
802 1.1 mrg
803 1.1 mrg #if defined (__AVR_HAVE_MUL__)
804 1.1 mrg
805 1.1 mrg ;; Define C[] for convenience
806 1.1 mrg ;; Notice that parts of C[] overlap A[] respective B[]
807 1.1 mrg #define C0 16
808 1.1 mrg #define C1 C0+1
809 1.1 mrg #define C2 20
810 1.1 mrg #define C3 C2+1
811 1.1 mrg #define C4 28
812 1.1 mrg #define C5 C4+1
813 1.1 mrg #define C6 C4+2
814 1.1 mrg #define C7 C4+3
815 1.1 mrg
816 1.1 mrg #if defined (L_muldi3)
817 1.1 mrg
818 1.1 mrg ;; A[] *= B[]
819 1.1 mrg ;; R25:R18 *= R17:R10
820 1.1 mrg ;; Ordinary ABI-Function
821 1.1 mrg
822 1.1 mrg DEFUN __muldi3
823 1.1 mrg push r29
824 1.1 mrg push r28
825 1.1 mrg push r17
826 1.1 mrg push r16
827 1.1 mrg
828 1.1 mrg ;; Counting in Words, we have to perform a 4 * 4 Multiplication
829 1.1 mrg
830 1.1 mrg ;; 3 * 0 + 0 * 3
831 1.1 mrg mul A7,B0 $ $ mov C7,r0
832 1.1 mrg mul A0,B7 $ $ add C7,r0
833 1.1 mrg mul A6,B1 $ $ add C7,r0
834 1.1 mrg mul A6,B0 $ mov C6,r0 $ add C7,r1
835 1.1 mrg mul B6,A1 $ $ add C7,r0
836 1.1 mrg mul B6,A0 $ add C6,r0 $ adc C7,r1
837 1.1 mrg
838 1.1 mrg ;; 1 * 2
839 1.1 mrg mul A2,B4 $ add C6,r0 $ adc C7,r1
840 1.1 mrg mul A3,B4 $ $ add C7,r0
841 1.1 mrg mul A2,B5 $ $ add C7,r0
842 1.1 mrg
843 1.1 mrg push A5
844 1.1 mrg push A4
845 1.1 mrg push B1
846 1.1 mrg push B0
847 1.1 mrg push A3
848 1.1 mrg push A2
849 1.1 mrg
850 1.1 mrg ;; 0 * 0
851 1.1 mrg wmov 26, B0
852 1.1 mrg XCALL __umulhisi3
853 1.1 mrg wmov C0, 22
854 1.1 mrg wmov C2, 24
855 1.1 mrg
856 1.1 mrg ;; 0 * 2
857 1.1 mrg wmov 26, B4
858 1.1 mrg XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
859 1.1 mrg
860 1.1 mrg wmov 26, B2
861 1.1 mrg ;; 0 * 1
862 1.1 mrg XCALL __muldi3_6
863 1.1 mrg
864 1.1 mrg pop A0
865 1.1 mrg pop A1
866 1.1 mrg ;; 1 * 1
867 1.1 mrg wmov 26, B2
868 1.1 mrg XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
869 1.1 mrg
870 1.1 mrg pop r26
871 1.1 mrg pop r27
872 1.1 mrg ;; 1 * 0
873 1.1 mrg XCALL __muldi3_6
874 1.1 mrg
875 1.1 mrg pop A0
876 1.1 mrg pop A1
877 1.1 mrg ;; 2 * 0
878 1.1 mrg XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
879 1.1 mrg
880 1.1 mrg ;; 2 * 1
881 1.1 mrg wmov 26, B2
882 1.1 mrg XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
883 1.1 mrg
884 1.1 mrg ;; A[] = C[]
885 1.1 mrg wmov A0, C0
886 1.1 mrg ;; A2 = C2 already
887 1.1 mrg wmov A4, C4
888 1.1 mrg wmov A6, C6
889 1.1 mrg
890 1.1 mrg clr __zero_reg__
891 1.1 mrg pop r16
892 1.1 mrg pop r17
893 1.1 mrg pop r28
894 1.1 mrg pop r29
895 1.1 mrg ret
896 1.1 mrg ENDF __muldi3
897 1.1 mrg #endif /* L_muldi3 */
898 1.1 mrg
899 1.1 mrg #if defined (L_muldi3_6)
900 1.1 mrg ;; A helper for some 64-bit multiplications with MUL available
901 1.1 mrg DEFUN __muldi3_6
902 1.1 mrg __muldi3_6:
903 1.1 mrg XCALL __umulhisi3
904 1.1 mrg add C2, 22
905 1.1 mrg adc C3, 23
906 1.1 mrg adc C4, 24
907 1.1 mrg adc C5, 25
908 1.1 mrg brcc 0f
909 1.1 mrg adiw C6, 1
910 1.1 mrg 0: ret
911 1.1 mrg ENDF __muldi3_6
912 1.1 mrg #endif /* L_muldi3_6 */
913 1.1 mrg
914 1.1 mrg #undef C7
915 1.1 mrg #undef C6
916 1.1 mrg #undef C5
917 1.1 mrg #undef C4
918 1.1 mrg #undef C3
919 1.1 mrg #undef C2
920 1.1 mrg #undef C1
921 1.1 mrg #undef C0
922 1.1 mrg
923 1.1 mrg #else /* !HAVE_MUL */
924 1.1 mrg
925 1.1 mrg #if defined (L_muldi3)
926 1.1 mrg
927 1.1 mrg #define C0 26
928 1.1 mrg #define C1 C0+1
929 1.1 mrg #define C2 C0+2
930 1.1 mrg #define C3 C0+3
931 1.1 mrg #define C4 C0+4
932 1.1 mrg #define C5 C0+5
933 1.1 mrg #define C6 0
934 1.1 mrg #define C7 C6+1
935 1.1 mrg
936 1.1 mrg #define Loop 9
937 1.1 mrg
938 1.1 mrg ;; A[] *= B[]
939 1.1 mrg ;; R25:R18 *= R17:R10
940 1.1 mrg ;; Ordinary ABI-Function
941 1.1 mrg
942 1.1 mrg DEFUN __muldi3
943 1.1 mrg push r29
944 1.1 mrg push r28
945 1.1 mrg push Loop
946 1.1 mrg
947 1.1 mrg ldi C0, 64
948 1.1 mrg mov Loop, C0
949 1.1 mrg
950 1.1 mrg ;; C[] = 0
951 1.1 mrg clr __tmp_reg__
952 1.1 mrg wmov C0, 0
953 1.1 mrg wmov C2, 0
954 1.1 mrg wmov C4, 0
955 1.1 mrg
956 1.1 mrg 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
957 1.1 mrg ;; where N = 64 - Loop.
958 1.1 mrg ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
959 1.1 mrg ;; B[] will have its initial Value again.
960 1.1 mrg LSR B7 $ ror B6 $ ror B5 $ ror B4
961 1.1 mrg ror B3 $ ror B2 $ ror B1 $ ror B0
962 1.1 mrg
963 1.1 mrg ;; If the N-th Bit of B[] was set then...
964 1.1 mrg brcc 1f
965 1.1 mrg ;; ...finish Rotation...
966 1.1 mrg ori B7, 1 << 7
967 1.1 mrg
968 1.1 mrg ;; ...and add A[] * 2^N to the Result C[]
969 1.1 mrg ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
970 1.1 mrg adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
971 1.1 mrg
972 1.1 mrg 1: ;; Multiply A[] by 2
973 1.1 mrg LSL A0 $ rol A1 $ rol A2 $ rol A3
974 1.1 mrg rol A4 $ rol A5 $ rol A6 $ rol A7
975 1.1 mrg
976 1.1 mrg dec Loop
977 1.1 mrg brne 0b
978 1.1 mrg
979 1.1 mrg ;; We expanded the Result in C[]
980 1.1 mrg ;; Copy Result to the Return Register A[]
981 1.1 mrg wmov A0, C0
982 1.1 mrg wmov A2, C2
983 1.1 mrg wmov A4, C4
984 1.1 mrg wmov A6, C6
985 1.1 mrg
986 1.1 mrg clr __zero_reg__
987 1.1 mrg pop Loop
988 1.1 mrg pop r28
989 1.1 mrg pop r29
990 1.1 mrg ret
991 1.1 mrg ENDF __muldi3
992 1.1 mrg
993 1.1 mrg #undef Loop
994 1.1 mrg
995 1.1 mrg #undef C7
996 1.1 mrg #undef C6
997 1.1 mrg #undef C5
998 1.1 mrg #undef C4
999 1.1 mrg #undef C3
1000 1.1 mrg #undef C2
1001 1.1 mrg #undef C1
1002 1.1 mrg #undef C0
1003 1.1 mrg
1004 1.1 mrg #endif /* L_muldi3 */
1005 1.1 mrg #endif /* HAVE_MUL */
1006 1.1 mrg
1007 1.1 mrg #undef B7
1008 1.1 mrg #undef B6
1009 1.1 mrg #undef B5
1010 1.1 mrg #undef B4
1011 1.1 mrg #undef B3
1012 1.1 mrg #undef B2
1013 1.1 mrg #undef B1
1014 1.1 mrg #undef B0
1015 1.1 mrg
1016 1.1 mrg #undef A7
1017 1.1 mrg #undef A6
1018 1.1 mrg #undef A5
1019 1.1 mrg #undef A4
1020 1.1 mrg #undef A3
1021 1.1 mrg #undef A2
1022 1.1 mrg #undef A1
1023 1.1 mrg #undef A0
1024 1.1 mrg
1025 1.1 mrg /*******************************************************
1026 1.1 mrg Widening Multiplication 64 = 32 x 32 with MUL
1027 1.1 mrg *******************************************************/
1028 1.1 mrg
1029 1.1 mrg #if defined (__AVR_HAVE_MUL__)
1030 1.1 mrg #define A0 r22
1031 1.1 mrg #define A1 r23
1032 1.1 mrg #define A2 r24
1033 1.1 mrg #define A3 r25
1034 1.1 mrg
1035 1.1 mrg #define B0 r18
1036 1.1 mrg #define B1 r19
1037 1.1 mrg #define B2 r20
1038 1.1 mrg #define B3 r21
1039 1.1 mrg
1040 1.1 mrg #define C0 18
1041 1.1 mrg #define C1 C0+1
1042 1.1 mrg #define C2 20
1043 1.1 mrg #define C3 C2+1
1044 1.1 mrg #define C4 28
1045 1.1 mrg #define C5 C4+1
1046 1.1 mrg #define C6 C4+2
1047 1.1 mrg #define C7 C4+3
1048 1.1 mrg
1049 1.1 mrg #if defined (L_umulsidi3)
1050 1.1 mrg
1051 1.1 mrg ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1052 1.1 mrg
1053 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1054 1.1 mrg ;;
1055 1.1 mrg ;; Ordinary ABI Function, but additionally sets
1056 1.1 mrg ;; X = R20[2] = B2[2]
1057 1.1 mrg ;; Z = R22[2] = A0[2]
1058 1.1 mrg DEFUN __umulsidi3
1059 1.1 mrg clt
1060 1.1 mrg ;; FALLTHRU
1061 1.1 mrg ENDF __umulsidi3
1062 1.1 mrg ;; T = sign (A)
1063 1.1 mrg DEFUN __umulsidi3_helper
1064 1.1 mrg push 29 $ push 28 ; Y
1065 1.1 mrg wmov 30, A2
1066 1.1 mrg ;; Counting in Words, we have to perform 4 Multiplications
1067 1.1 mrg ;; 0 * 0
1068 1.1 mrg wmov 26, A0
1069 1.1 mrg XCALL __umulhisi3
1070 1.1 mrg push 23 $ push 22 ; C0
1071 1.1 mrg wmov 28, B0
1072 1.1 mrg wmov 18, B2
1073 1.1 mrg wmov C2, 24
1074 1.1 mrg push 27 $ push 26 ; A0
1075 1.1 mrg push 19 $ push 18 ; B2
1076 1.1 mrg ;;
1077 1.1 mrg ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1078 1.1 mrg ;; B2 C2 -- -- -- B0 A2
1079 1.1 mrg ;; 1 * 1
1080 1.1 mrg wmov 26, 30 ; A2
1081 1.1 mrg XCALL __umulhisi3
1082 1.1 mrg ;; Sign-extend A. T holds the sign of A
1083 1.1 mrg brtc 0f
1084 1.1 mrg ;; Subtract B from the high part of the result
1085 1.1 mrg sub 22, 28
1086 1.1 mrg sbc 23, 29
1087 1.1 mrg sbc 24, 18
1088 1.1 mrg sbc 25, 19
1089 1.1 mrg 0: wmov 18, 28 ;; B0
1090 1.1 mrg wmov C4, 22
1091 1.1 mrg wmov C6, 24
1092 1.1 mrg ;;
1093 1.1 mrg ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1094 1.1 mrg ;; B0 C2 -- -- A2 C4 C6
1095 1.1 mrg ;;
1096 1.1 mrg ;; 1 * 0
1097 1.1 mrg XCALL __muldi3_6
1098 1.1 mrg ;; 0 * 1
1099 1.1 mrg pop 26 $ pop 27 ;; B2
1100 1.1 mrg pop 18 $ pop 19 ;; A0
1101 1.1 mrg XCALL __muldi3_6
1102 1.1 mrg
1103 1.1 mrg ;; Move result C into place and save A0 in Z
1104 1.1 mrg wmov 22, C4
1105 1.1 mrg wmov 24, C6
1106 1.1 mrg wmov 30, 18 ; A0
1107 1.1 mrg pop C0 $ pop C1
1108 1.1 mrg
1109 1.1 mrg ;; Epilogue
1110 1.1 mrg pop 28 $ pop 29 ;; Y
1111 1.1 mrg ret
1112 1.1 mrg ENDF __umulsidi3_helper
1113 1.1 mrg #endif /* L_umulsidi3 */
1114 1.1 mrg
1115 1.1 mrg
1116 1.1 mrg #if defined (L_mulsidi3)
1117 1.1 mrg
1118 1.1 mrg ;; Signed widening 64 = 32 * 32 Multiplication
1119 1.1 mrg ;;
1120 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1121 1.1 mrg ;; Ordinary ABI Function
1122 1.1 mrg DEFUN __mulsidi3
1123 1.1 mrg bst A3, 7
1124 1.1 mrg sbrs B3, 7 ; Enhanced core has no skip bug
1125 1.1 mrg XJMP __umulsidi3_helper
1126 1.1 mrg
1127 1.1 mrg ;; B needs sign-extension
1128 1.1 mrg push A3
1129 1.1 mrg push A2
1130 1.1 mrg XCALL __umulsidi3_helper
1131 1.1 mrg ;; A0 survived in Z
1132 1.1 mrg sub r22, r30
1133 1.1 mrg sbc r23, r31
1134 1.1 mrg pop r26
1135 1.1 mrg pop r27
1136 1.1 mrg sbc r24, r26
1137 1.1 mrg sbc r25, r27
1138 1.1 mrg ret
1139 1.1 mrg ENDF __mulsidi3
1140 1.1 mrg #endif /* L_mulsidi3 */
1141 1.1 mrg
1142 1.1 mrg #undef A0
1143 1.1 mrg #undef A1
1144 1.1 mrg #undef A2
1145 1.1 mrg #undef A3
1146 1.1 mrg #undef B0
1147 1.1 mrg #undef B1
1148 1.1 mrg #undef B2
1149 1.1 mrg #undef B3
1150 1.1 mrg #undef C0
1151 1.1 mrg #undef C1
1152 1.1 mrg #undef C2
1153 1.1 mrg #undef C3
1154 1.1 mrg #undef C4
1155 1.1 mrg #undef C5
1156 1.1 mrg #undef C6
1157 1.1 mrg #undef C7
1158 1.1 mrg #endif /* HAVE_MUL */
1159 1.1 mrg
1160 1.1 mrg /**********************************************************
1161 1.1 mrg Widening Multiplication 64 = 32 x 32 without MUL
1162 1.1 mrg **********************************************************/
1163 1.1 mrg
1164 1.1 mrg #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1165 1.1 mrg #define A0 18
1166 1.1 mrg #define A1 A0+1
1167 1.1 mrg #define A2 A0+2
1168 1.1 mrg #define A3 A0+3
1169 1.1 mrg #define A4 A0+4
1170 1.1 mrg #define A5 A0+5
1171 1.1 mrg #define A6 A0+6
1172 1.1 mrg #define A7 A0+7
1173 1.1 mrg
1174 1.1 mrg #define B0 10
1175 1.1 mrg #define B1 B0+1
1176 1.1 mrg #define B2 B0+2
1177 1.1 mrg #define B3 B0+3
1178 1.1 mrg #define B4 B0+4
1179 1.1 mrg #define B5 B0+5
1180 1.1 mrg #define B6 B0+6
1181 1.1 mrg #define B7 B0+7
1182 1.1 mrg
1183 1.1 mrg #define AA0 22
1184 1.1 mrg #define AA1 AA0+1
1185 1.1 mrg #define AA2 AA0+2
1186 1.1 mrg #define AA3 AA0+3
1187 1.1 mrg
1188 1.1 mrg #define BB0 18
1189 1.1 mrg #define BB1 BB0+1
1190 1.1 mrg #define BB2 BB0+2
1191 1.1 mrg #define BB3 BB0+3
1192 1.1 mrg
1193 1.1 mrg #define Mask r30
1194 1.1 mrg
1195 1.1 mrg ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1196 1.1 mrg ;;
1197 1.1 mrg ;; R18[8] = R22[4] * R18[4]
1198 1.1 mrg ;; Ordinary ABI Function
1199 1.1 mrg DEFUN __mulsidi3
1200 1.1 mrg set
1201 1.1 mrg skip
1202 1.1 mrg ;; FALLTHRU
1203 1.1 mrg ENDF __mulsidi3
1204 1.1 mrg
1205 1.1 mrg DEFUN __umulsidi3
1206 1.1 mrg clt ; skipped
1207 1.1 mrg ;; Save 10 Registers: R10..R17, R28, R29
1208 1.1 mrg do_prologue_saves 10
1209 1.1 mrg ldi Mask, 0xff
1210 1.1 mrg bld Mask, 7
1211 1.1 mrg ;; Move B into place...
1212 1.1 mrg wmov B0, BB0
1213 1.1 mrg wmov B2, BB2
1214 1.1 mrg ;; ...and extend it
1215 1.1 mrg and BB3, Mask
1216 1.1 mrg lsl BB3
1217 1.1 mrg sbc B4, B4
1218 1.1 mrg mov B5, B4
1219 1.1 mrg wmov B6, B4
1220 1.1 mrg ;; Move A into place...
1221 1.1 mrg wmov A0, AA0
1222 1.1 mrg wmov A2, AA2
1223 1.1 mrg ;; ...and extend it
1224 1.1 mrg and AA3, Mask
1225 1.1 mrg lsl AA3
1226 1.1 mrg sbc A4, A4
1227 1.1 mrg mov A5, A4
1228 1.1 mrg wmov A6, A4
1229 1.1 mrg XCALL __muldi3
1230 1.1 mrg do_epilogue_restores 10
1231 1.1 mrg ENDF __umulsidi3
1232 1.1 mrg
1233 1.1 mrg #undef A0
1234 1.1 mrg #undef A1
1235 1.1 mrg #undef A2
1236 1.1 mrg #undef A3
1237 1.1 mrg #undef A4
1238 1.1 mrg #undef A5
1239 1.1 mrg #undef A6
1240 1.1 mrg #undef A7
1241 1.1 mrg #undef B0
1242 1.1 mrg #undef B1
1243 1.1 mrg #undef B2
1244 1.1 mrg #undef B3
1245 1.1 mrg #undef B4
1246 1.1 mrg #undef B5
1247 1.1 mrg #undef B6
1248 1.1 mrg #undef B7
1249 1.1 mrg #undef AA0
1250 1.1 mrg #undef AA1
1251 1.1 mrg #undef AA2
1252 1.1 mrg #undef AA3
1253 1.1 mrg #undef BB0
1254 1.1 mrg #undef BB1
1255 1.1 mrg #undef BB2
1256 1.1 mrg #undef BB3
1257 1.1 mrg #undef Mask
1258 1.1 mrg #endif /* L_mulsidi3 && !HAVE_MUL */
1259 1.1 mrg
1260 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1261 1.1 mrg
1262 1.1 mrg
1263 1.1 mrg .section .text.libgcc.div, "ax", @progbits
1265 1.1 mrg
1266 1.1 mrg /*******************************************************
1267 1.1 mrg Division 8 / 8 => (result + remainder)
1268 1.1 mrg *******************************************************/
1269 1.1 mrg #define r_rem r25 /* remainder */
1270 1.1 mrg #define r_arg1 r24 /* dividend, quotient */
1271 1.1 mrg #define r_arg2 r22 /* divisor */
1272 1.1 mrg #define r_cnt r23 /* loop count */
1273 1.1 mrg
1274 1.1 mrg #if defined (L_udivmodqi4)
1275 1.1 mrg DEFUN __udivmodqi4
1276 1.1 mrg sub r_rem,r_rem ; clear remainder and carry
1277 1.1 mrg ldi r_cnt,9 ; init loop counter
1278 1.1 mrg rjmp __udivmodqi4_ep ; jump to entry point
1279 1.1 mrg __udivmodqi4_loop:
1280 1.1 mrg rol r_rem ; shift dividend into remainder
1281 1.1 mrg cp r_rem,r_arg2 ; compare remainder & divisor
1282 1.1 mrg brcs __udivmodqi4_ep ; remainder <= divisor
1283 1.1 mrg sub r_rem,r_arg2 ; restore remainder
1284 1.1 mrg __udivmodqi4_ep:
1285 1.1 mrg rol r_arg1 ; shift dividend (with CARRY)
1286 1.1 mrg dec r_cnt ; decrement loop counter
1287 1.1 mrg brne __udivmodqi4_loop
1288 1.1 mrg com r_arg1 ; complement result
1289 1.1 mrg ; because C flag was complemented in loop
1290 1.1 mrg ret
1291 1.1 mrg ENDF __udivmodqi4
1292 1.1 mrg #endif /* defined (L_udivmodqi4) */
1293 1.1 mrg
1294 1.1 mrg #if defined (L_divmodqi4)
1295 1.1 mrg DEFUN __divmodqi4
1296 1.1 mrg bst r_arg1,7 ; store sign of dividend
1297 1.1 mrg mov __tmp_reg__,r_arg1
1298 1.1 mrg eor __tmp_reg__,r_arg2; r0.7 is sign of result
1299 1.1 mrg sbrc r_arg1,7
1300 1.1 mrg neg r_arg1 ; dividend negative : negate
1301 1.1 mrg sbrc r_arg2,7
1302 1.1 mrg neg r_arg2 ; divisor negative : negate
1303 1.1 mrg XCALL __udivmodqi4 ; do the unsigned div/mod
1304 1.1 mrg brtc __divmodqi4_1
1305 1.1 mrg neg r_rem ; correct remainder sign
1306 1.1 mrg __divmodqi4_1:
1307 1.1 mrg sbrc __tmp_reg__,7
1308 1.1 mrg neg r_arg1 ; correct result sign
1309 1.1 mrg __divmodqi4_exit:
1310 1.1 mrg ret
1311 1.1 mrg ENDF __divmodqi4
1312 1.1 mrg #endif /* defined (L_divmodqi4) */
1313 1.1 mrg
1314 1.1 mrg #undef r_rem
1315 1.1 mrg #undef r_arg1
1316 1.1 mrg #undef r_arg2
1317 1.1 mrg #undef r_cnt
1318 1.1 mrg
1319 1.1 mrg
1320 1.1 mrg /*******************************************************
1321 1.1 mrg Division 16 / 16 => (result + remainder)
1322 1.1 mrg *******************************************************/
1323 1.1 mrg #define r_remL r26 /* remainder Low */
1324 1.1 mrg #define r_remH r27 /* remainder High */
1325 1.1 mrg
1326 1.1 mrg /* return: remainder */
1327 1.1 mrg #define r_arg1L r24 /* dividend Low */
1328 1.1 mrg #define r_arg1H r25 /* dividend High */
1329 1.1 mrg
1330 1.1 mrg /* return: quotient */
1331 1.1 mrg #define r_arg2L r22 /* divisor Low */
1332 1.1 mrg #define r_arg2H r23 /* divisor High */
1333 1.1 mrg
1334 1.1 mrg #define r_cnt r21 /* loop count */
1335 1.1 mrg
1336 1.1 mrg #if defined (L_udivmodhi4)
1337 1.1 mrg DEFUN __udivmodhi4
1338 1.1 mrg sub r_remL,r_remL
1339 1.1 mrg sub r_remH,r_remH ; clear remainder and carry
1340 1.1 mrg ldi r_cnt,17 ; init loop counter
1341 1.1 mrg rjmp __udivmodhi4_ep ; jump to entry point
1342 1.1 mrg __udivmodhi4_loop:
1343 1.1 mrg rol r_remL ; shift dividend into remainder
1344 1.1 mrg rol r_remH
1345 1.1 mrg cp r_remL,r_arg2L ; compare remainder & divisor
1346 1.1 mrg cpc r_remH,r_arg2H
1347 1.1 mrg brcs __udivmodhi4_ep ; remainder < divisor
1348 1.1 mrg sub r_remL,r_arg2L ; restore remainder
1349 1.1 mrg sbc r_remH,r_arg2H
1350 1.1 mrg __udivmodhi4_ep:
1351 1.1 mrg rol r_arg1L ; shift dividend (with CARRY)
1352 1.1 mrg rol r_arg1H
1353 1.1 mrg dec r_cnt ; decrement loop counter
1354 1.1 mrg brne __udivmodhi4_loop
1355 1.1 mrg com r_arg1L
1356 1.1 mrg com r_arg1H
1357 1.1 mrg ; div/mod results to return registers, as for the div() function
1358 1.1 mrg mov_l r_arg2L, r_arg1L ; quotient
1359 1.1 mrg mov_h r_arg2H, r_arg1H
1360 1.1 mrg mov_l r_arg1L, r_remL ; remainder
1361 1.1 mrg mov_h r_arg1H, r_remH
1362 1.1 mrg ret
1363 1.1 mrg ENDF __udivmodhi4
1364 1.1 mrg #endif /* defined (L_udivmodhi4) */
1365 1.1 mrg
1366 1.1 mrg #if defined (L_divmodhi4)
1367 1.1 mrg DEFUN __divmodhi4
1368 1.1 mrg .global _div
1369 1.1 mrg _div:
1370 1.1 mrg bst r_arg1H,7 ; store sign of dividend
1371 1.1 mrg mov __tmp_reg__,r_arg2H
1372 1.1 mrg brtc 0f
1373 1.1 mrg com __tmp_reg__ ; r0.7 is sign of result
1374 1.1 mrg rcall __divmodhi4_neg1 ; dividend negative: negate
1375 1.1 mrg 0:
1376 1.1 mrg sbrc r_arg2H,7
1377 1.1 mrg rcall __divmodhi4_neg2 ; divisor negative: negate
1378 1.1 mrg XCALL __udivmodhi4 ; do the unsigned div/mod
1379 1.1 mrg sbrc __tmp_reg__,7
1380 1.1 mrg rcall __divmodhi4_neg2 ; correct remainder sign
1381 1.1 mrg brtc __divmodhi4_exit
1382 1.1 mrg __divmodhi4_neg1:
1383 1.1 mrg ;; correct dividend/remainder sign
1384 1.1 mrg com r_arg1H
1385 1.1 mrg neg r_arg1L
1386 1.1 mrg sbci r_arg1H,0xff
1387 1.1 mrg ret
1388 1.1 mrg __divmodhi4_neg2:
1389 1.1 mrg ;; correct divisor/result sign
1390 1.1 mrg com r_arg2H
1391 1.1 mrg neg r_arg2L
1392 1.1 mrg sbci r_arg2H,0xff
1393 1.1 mrg __divmodhi4_exit:
1394 1.1 mrg ret
1395 1.1 mrg ENDF __divmodhi4
1396 1.1 mrg #endif /* defined (L_divmodhi4) */
1397 1.1 mrg
1398 1.1 mrg #undef r_remH
1399 1.1 mrg #undef r_remL
1400 1.1 mrg
1401 1.1 mrg #undef r_arg1H
1402 1.1 mrg #undef r_arg1L
1403 1.1 mrg
1404 1.1 mrg #undef r_arg2H
1405 1.1 mrg #undef r_arg2L
1406 1.1 mrg
1407 1.1 mrg #undef r_cnt
1408 1.1 mrg
1409 1.1 mrg /*******************************************************
1410 1.1 mrg Division 24 / 24 => (result + remainder)
1411 1.1 mrg *******************************************************/
1412 1.1 mrg
1413 1.1 mrg ;; A[0..2]: In: Dividend; Out: Quotient
1414 1.1 mrg #define A0 22
1415 1.1 mrg #define A1 A0+1
1416 1.1 mrg #define A2 A0+2
1417 1.1 mrg
1418 1.1 mrg ;; B[0..2]: In: Divisor; Out: Remainder
1419 1.1 mrg #define B0 18
1420 1.1 mrg #define B1 B0+1
1421 1.1 mrg #define B2 B0+2
1422 1.1 mrg
1423 1.1 mrg ;; C[0..2]: Expand remainder
1424 1.1 mrg #define C0 __zero_reg__
1425 1.1 mrg #define C1 26
1426 1.1 mrg #define C2 25
1427 1.1 mrg
1428 1.1 mrg ;; Loop counter
1429 1.1 mrg #define r_cnt 21
1430 1.1 mrg
1431 1.1 mrg #if defined (L_udivmodpsi4)
1432 1.1 mrg ;; R24:R22 = R24:R22 udiv R20:R18
1433 1.1 mrg ;; R20:R18 = R24:R22 umod R20:R18
1434 1.1 mrg ;; Clobbers: R21, R25, R26
1435 1.1 mrg
1436 1.1 mrg DEFUN __udivmodpsi4
1437 1.1 mrg ; init loop counter
1438 1.1 mrg ldi r_cnt, 24+1
1439 1.1 mrg ; Clear remainder and carry. C0 is already 0
1440 1.1 mrg clr C1
1441 1.1 mrg sub C2, C2
1442 1.1 mrg ; jump to entry point
1443 1.1 mrg rjmp __udivmodpsi4_start
1444 1.1 mrg __udivmodpsi4_loop:
1445 1.1 mrg ; shift dividend into remainder
1446 1.1 mrg rol C0
1447 1.1 mrg rol C1
1448 1.1 mrg rol C2
1449 1.1 mrg ; compare remainder & divisor
1450 1.1 mrg cp C0, B0
1451 1.1 mrg cpc C1, B1
1452 1.1 mrg cpc C2, B2
1453 1.1 mrg brcs __udivmodpsi4_start ; remainder <= divisor
1454 1.1 mrg sub C0, B0 ; restore remainder
1455 1.1 mrg sbc C1, B1
1456 1.1 mrg sbc C2, B2
1457 1.1 mrg __udivmodpsi4_start:
1458 1.1 mrg ; shift dividend (with CARRY)
1459 1.1 mrg rol A0
1460 1.1 mrg rol A1
1461 1.1 mrg rol A2
1462 1.1 mrg ; decrement loop counter
1463 1.1 mrg dec r_cnt
1464 1.1 mrg brne __udivmodpsi4_loop
1465 1.1 mrg com A0
1466 1.1 mrg com A1
1467 1.1 mrg com A2
1468 1.1 mrg ; div/mod results to return registers
1469 1.1 mrg ; remainder
1470 1.1 mrg mov B0, C0
1471 1.1 mrg mov B1, C1
1472 1.1 mrg mov B2, C2
1473 1.1 mrg clr __zero_reg__ ; C0
1474 1.1 mrg ret
1475 1.1 mrg ENDF __udivmodpsi4
1476 1.1 mrg #endif /* defined (L_udivmodpsi4) */
1477 1.1 mrg
1478 1.1 mrg #if defined (L_divmodpsi4)
1479 1.1 mrg ;; R24:R22 = R24:R22 div R20:R18
1480 1.1 mrg ;; R20:R18 = R24:R22 mod R20:R18
1481 1.1 mrg ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1482 1.1 mrg
1483 1.1 mrg DEFUN __divmodpsi4
1484 1.1 mrg ; R0.7 will contain the sign of the result:
1485 1.1 mrg ; R0.7 = A.sign ^ B.sign
1486 1.1 mrg mov __tmp_reg__, B2
1487 1.1 mrg ; T-flag = sign of dividend
1488 1.1 mrg bst A2, 7
1489 1.1 mrg brtc 0f
1490 1.1 mrg com __tmp_reg__
1491 1.1 mrg ; Adjust dividend's sign
1492 1.1 mrg rcall __divmodpsi4_negA
1493 1.1 mrg 0:
1494 1.1 mrg ; Adjust divisor's sign
1495 1.1 mrg sbrc B2, 7
1496 1.1 mrg rcall __divmodpsi4_negB
1497 1.1 mrg
1498 1.1 mrg ; Do the unsigned div/mod
1499 1.1 mrg XCALL __udivmodpsi4
1500 1.1 mrg
1501 1.1 mrg ; Adjust quotient's sign
1502 1.1 mrg sbrc __tmp_reg__, 7
1503 1.1 mrg rcall __divmodpsi4_negA
1504 1.1 mrg
1505 1.1 mrg ; Adjust remainder's sign
1506 1.1 mrg brtc __divmodpsi4_end
1507 1.1 mrg
1508 1.1 mrg __divmodpsi4_negB:
1509 1.1 mrg ; Correct divisor/remainder sign
1510 1.1 mrg com B2
1511 1.1 mrg com B1
1512 1.1 mrg neg B0
1513 1.1 mrg sbci B1, -1
1514 1.1 mrg sbci B2, -1
1515 1.1 mrg ret
1516 1.1 mrg
1517 1.1 mrg ; Correct dividend/quotient sign
1518 1.1 mrg __divmodpsi4_negA:
1519 1.1 mrg com A2
1520 1.1 mrg com A1
1521 1.1 mrg neg A0
1522 1.1 mrg sbci A1, -1
1523 1.1 mrg sbci A2, -1
1524 1.1 mrg __divmodpsi4_end:
1525 1.1 mrg ret
1526 1.1 mrg
1527 1.1 mrg ENDF __divmodpsi4
1528 1.1 mrg #endif /* defined (L_divmodpsi4) */
1529 1.1 mrg
1530 1.1 mrg #undef A0
1531 1.1 mrg #undef A1
1532 1.1 mrg #undef A2
1533 1.1 mrg
1534 1.1 mrg #undef B0
1535 1.1 mrg #undef B1
1536 1.1 mrg #undef B2
1537 1.1 mrg
1538 1.1 mrg #undef C0
1539 1.1 mrg #undef C1
1540 1.1 mrg #undef C2
1541 1.1 mrg
1542 1.1 mrg #undef r_cnt
1543 1.1 mrg
1544 1.1 mrg /*******************************************************
1545 1.1 mrg Division 32 / 32 => (result + remainder)
1546 1.1 mrg *******************************************************/
1547 1.1 mrg #define r_remHH r31 /* remainder High */
1548 1.1 mrg #define r_remHL r30
1549 1.1 mrg #define r_remH r27
1550 1.1 mrg #define r_remL r26 /* remainder Low */
1551 1.1 mrg
1552 1.1 mrg /* return: remainder */
1553 1.1 mrg #define r_arg1HH r25 /* dividend High */
1554 1.1 mrg #define r_arg1HL r24
1555 1.1 mrg #define r_arg1H r23
1556 1.1 mrg #define r_arg1L r22 /* dividend Low */
1557 1.1 mrg
1558 1.1 mrg /* return: quotient */
1559 1.1 mrg #define r_arg2HH r21 /* divisor High */
1560 1.1 mrg #define r_arg2HL r20
1561 1.1 mrg #define r_arg2H r19
1562 1.1 mrg #define r_arg2L r18 /* divisor Low */
1563 1.1 mrg
1564 1.1 mrg #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1565 1.1 mrg
1566 1.1 mrg #if defined (L_udivmodsi4)
1567 1.1 mrg DEFUN __udivmodsi4
1568 1.1 mrg ldi r_remL, 33 ; init loop counter
1569 1.1 mrg mov r_cnt, r_remL
1570 1.1 mrg sub r_remL,r_remL
1571 1.1 mrg sub r_remH,r_remH ; clear remainder and carry
1572 1.1 mrg mov_l r_remHL, r_remL
1573 1.1 mrg mov_h r_remHH, r_remH
1574 1.1 mrg rjmp __udivmodsi4_ep ; jump to entry point
1575 1.1 mrg __udivmodsi4_loop:
1576 1.1 mrg rol r_remL ; shift dividend into remainder
1577 1.1 mrg rol r_remH
1578 1.1 mrg rol r_remHL
1579 1.1 mrg rol r_remHH
1580 1.1 mrg cp r_remL,r_arg2L ; compare remainder & divisor
1581 1.1 mrg cpc r_remH,r_arg2H
1582 1.1 mrg cpc r_remHL,r_arg2HL
1583 1.1 mrg cpc r_remHH,r_arg2HH
1584 1.1 mrg brcs __udivmodsi4_ep ; remainder <= divisor
1585 1.1 mrg sub r_remL,r_arg2L ; restore remainder
1586 1.1 mrg sbc r_remH,r_arg2H
1587 1.1 mrg sbc r_remHL,r_arg2HL
1588 1.1 mrg sbc r_remHH,r_arg2HH
1589 1.1 mrg __udivmodsi4_ep:
1590 1.1 mrg rol r_arg1L ; shift dividend (with CARRY)
1591 1.1 mrg rol r_arg1H
1592 1.1 mrg rol r_arg1HL
1593 1.1 mrg rol r_arg1HH
1594 1.1 mrg dec r_cnt ; decrement loop counter
1595 1.1 mrg brne __udivmodsi4_loop
1596 1.1 mrg ; __zero_reg__ now restored (r_cnt == 0)
1597 1.1 mrg com r_arg1L
1598 1.1 mrg com r_arg1H
1599 1.1 mrg com r_arg1HL
1600 1.1 mrg com r_arg1HH
1601 1.1 mrg ; div/mod results to return registers, as for the ldiv() function
1602 1.1 mrg mov_l r_arg2L, r_arg1L ; quotient
1603 1.1 mrg mov_h r_arg2H, r_arg1H
1604 1.1 mrg mov_l r_arg2HL, r_arg1HL
1605 1.1 mrg mov_h r_arg2HH, r_arg1HH
1606 1.1 mrg mov_l r_arg1L, r_remL ; remainder
1607 1.1 mrg mov_h r_arg1H, r_remH
1608 1.1 mrg mov_l r_arg1HL, r_remHL
1609 1.1 mrg mov_h r_arg1HH, r_remHH
1610 1.1 mrg ret
1611 1.1 mrg ENDF __udivmodsi4
1612 1.1 mrg #endif /* defined (L_udivmodsi4) */
1613 1.1 mrg
1614 1.1 mrg #if defined (L_divmodsi4)
1615 1.1 mrg DEFUN __divmodsi4
1616 1.1 mrg mov __tmp_reg__,r_arg2HH
1617 1.1 mrg bst r_arg1HH,7 ; store sign of dividend
1618 1.1 mrg brtc 0f
1619 1.1 mrg com __tmp_reg__ ; r0.7 is sign of result
1620 1.1 mrg XCALL __negsi2 ; dividend negative: negate
1621 1.1 mrg 0:
1622 1.1 mrg sbrc r_arg2HH,7
1623 1.1 mrg rcall __divmodsi4_neg2 ; divisor negative: negate
1624 1.1 mrg XCALL __udivmodsi4 ; do the unsigned div/mod
1625 1.1 mrg sbrc __tmp_reg__, 7 ; correct quotient sign
1626 1.1 mrg rcall __divmodsi4_neg2
1627 1.1 mrg brtc __divmodsi4_exit ; correct remainder sign
1628 1.1 mrg XJMP __negsi2
1629 1.1 mrg __divmodsi4_neg2:
1630 1.1 mrg ;; correct divisor/quotient sign
1631 1.1 mrg com r_arg2HH
1632 1.1 mrg com r_arg2HL
1633 1.1 mrg com r_arg2H
1634 1.1 mrg neg r_arg2L
1635 1.1 mrg sbci r_arg2H,0xff
1636 1.1 mrg sbci r_arg2HL,0xff
1637 1.1 mrg sbci r_arg2HH,0xff
1638 1.1 mrg __divmodsi4_exit:
1639 1.1 mrg ret
1640 1.1 mrg ENDF __divmodsi4
1641 1.1 mrg #endif /* defined (L_divmodsi4) */
1642 1.1 mrg
1643 1.1 mrg #if defined (L_negsi2)
1644 1.1 mrg ;; (set (reg:SI 22)
1645 1.1 mrg ;; (neg:SI (reg:SI 22)))
1646 1.1 mrg ;; Sets the V flag for signed overflow tests
1647 1.1 mrg DEFUN __negsi2
1648 1.1 mrg NEG4 22
1649 1.1 mrg ret
1650 1.1 mrg ENDF __negsi2
1651 1.1 mrg #endif /* L_negsi2 */
1652 1.1 mrg
1653 1.1 mrg #undef r_remHH
1654 1.1 mrg #undef r_remHL
1655 1.1 mrg #undef r_remH
1656 1.1 mrg #undef r_remL
1657 1.1 mrg #undef r_arg1HH
1658 1.1 mrg #undef r_arg1HL
1659 1.1 mrg #undef r_arg1H
1660 1.1 mrg #undef r_arg1L
1661 1.1 mrg #undef r_arg2HH
1662 1.1 mrg #undef r_arg2HL
1663 1.1 mrg #undef r_arg2H
1664 1.1 mrg #undef r_arg2L
1665 1.1 mrg #undef r_cnt
1666 1.1 mrg
1667 1.1 mrg /*******************************************************
1668 1.1 mrg Division 64 / 64
1669 1.1 mrg Modulo 64 % 64
1670 1.1 mrg *******************************************************/
1671 1.1 mrg
1672 1.1 mrg ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1673 1.1 mrg ;; at least 16k of Program Memory. For smaller Devices, depend
1674 1.1 mrg ;; on MOVW and SP Size. There is a Connexion between SP Size and
1675 1.1 mrg ;; Flash Size so that SP Size can be used to test for Flash Size.
1676 1.1 mrg
1677 1.1 mrg #if defined (__AVR_HAVE_JMP_CALL__)
1678 1.1 mrg # define SPEED_DIV 8
1679 1.1 mrg #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1680 1.1 mrg # define SPEED_DIV 16
1681 1.1 mrg #else
1682 1.1 mrg # define SPEED_DIV 0
1683 1.1 mrg #endif
1684 1.1 mrg
1685 1.1 mrg ;; A[0..7]: In: Dividend;
1686 1.1 mrg ;; Out: Quotient (T = 0)
1687 1.1 mrg ;; Out: Remainder (T = 1)
1688 1.1 mrg #define A0 18
1689 1.1 mrg #define A1 A0+1
1690 1.1 mrg #define A2 A0+2
1691 1.1 mrg #define A3 A0+3
1692 1.1 mrg #define A4 A0+4
1693 1.1 mrg #define A5 A0+5
1694 1.1 mrg #define A6 A0+6
1695 1.1 mrg #define A7 A0+7
1696 1.1 mrg
1697 1.1 mrg ;; B[0..7]: In: Divisor; Out: Clobber
1698 1.1 mrg #define B0 10
1699 1.1 mrg #define B1 B0+1
1700 1.1 mrg #define B2 B0+2
1701 1.1 mrg #define B3 B0+3
1702 1.1 mrg #define B4 B0+4
1703 1.1 mrg #define B5 B0+5
1704 1.1 mrg #define B6 B0+6
1705 1.1 mrg #define B7 B0+7
1706 1.1 mrg
1707 1.1 mrg ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1708 1.1 mrg #define C0 8
1709 1.1 mrg #define C1 C0+1
1710 1.1 mrg #define C2 30
1711 1.1 mrg #define C3 C2+1
1712 1.1 mrg #define C4 28
1713 1.1 mrg #define C5 C4+1
1714 1.1 mrg #define C6 26
1715 1.1 mrg #define C7 C6+1
1716 1.1 mrg
1717 1.1 mrg ;; Holds Signs during Division Routine
1718 1.1 mrg #define SS __tmp_reg__
1719 1.1 mrg
1720 1.1 mrg ;; Bit-Counter in Division Routine
1721 1.1 mrg #define R_cnt __zero_reg__
1722 1.1 mrg
1723 1.1 mrg ;; Scratch Register for Negation
1724 1.1 mrg #define NN r31
1725 1.1 mrg
1726 1.1 mrg #if defined (L_udivdi3)
1727 1.1 mrg
1728 1.1 mrg ;; R25:R18 = R24:R18 umod R17:R10
1729 1.1 mrg ;; Ordinary ABI-Function
1730 1.1 mrg
1731 1.1 mrg DEFUN __umoddi3
1732 1.1 mrg set
1733 1.1 mrg rjmp __udivdi3_umoddi3
1734 1.1 mrg ENDF __umoddi3
1735 1.1 mrg
1736 1.1 mrg ;; R25:R18 = R24:R18 udiv R17:R10
1737 1.1 mrg ;; Ordinary ABI-Function
1738 1.1 mrg
1739 1.1 mrg DEFUN __udivdi3
1740 1.1 mrg clt
1741 1.1 mrg ENDF __udivdi3
1742 1.1 mrg
1743 1.1 mrg DEFUN __udivdi3_umoddi3
1744 1.1 mrg push C0
1745 1.1 mrg push C1
1746 1.1 mrg push C4
1747 1.1 mrg push C5
1748 1.1 mrg XCALL __udivmod64
1749 1.1 mrg pop C5
1750 1.1 mrg pop C4
1751 1.1 mrg pop C1
1752 1.1 mrg pop C0
1753 1.1 mrg ret
1754 1.1 mrg ENDF __udivdi3_umoddi3
1755 1.1 mrg #endif /* L_udivdi3 */
1756 1.1 mrg
1757 1.1 mrg #if defined (L_udivmod64)
1758 1.1 mrg
1759 1.1 mrg ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1760 1.1 mrg ;; No Registers saved/restored; the Callers will take Care.
1761 1.1 mrg ;; Preserves B[] and T-flag
1762 1.1 mrg ;; T = 0: Compute Quotient in A[]
1763 1.1 mrg ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1764 1.1 mrg
1765 1.1 mrg DEFUN __udivmod64
1766 1.1 mrg
1767 1.1 mrg ;; Clear Remainder (C6, C7 will follow)
1768 1.1 mrg clr C0
1769 1.1 mrg clr C1
1770 1.1 mrg wmov C2, C0
1771 1.1 mrg wmov C4, C0
1772 1.1 mrg ldi C7, 64
1773 1.1 mrg
1774 1.1 mrg #if SPEED_DIV == 0 || SPEED_DIV == 16
1775 1.1 mrg ;; Initialize Loop-Counter
1776 1.1 mrg mov R_cnt, C7
1777 1.1 mrg wmov C6, C0
1778 1.1 mrg #endif /* SPEED_DIV */
1779 1.1 mrg
1780 1.1 mrg #if SPEED_DIV == 8
1781 1.1 mrg
1782 1.1 mrg push A7
1783 1.1 mrg clr C6
1784 1.1 mrg
1785 1.1 mrg 1: ;; Compare shifted Devidend against Divisor
1786 1.1 mrg ;; If -- even after Shifting -- it is smaller...
1787 1.1 mrg CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1788 1.1 mrg cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1789 1.1 mrg brcc 2f
1790 1.1 mrg
1791 1.1 mrg ;; ...then we can subtract it. Thus, it is legal to shift left
1792 1.1 mrg $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1793 1.1 mrg mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1794 1.1 mrg mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1795 1.1 mrg mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1796 1.1 mrg
1797 1.1 mrg ;; 8 Bits are done
1798 1.1 mrg subi C7, 8
1799 1.1 mrg brne 1b
1800 1.1 mrg
1801 1.1 mrg ;; Shifted 64 Bits: A7 has traveled to C7
1802 1.1 mrg pop C7
1803 1.1 mrg ;; Divisor is greater than Dividend. We have:
1804 1.1 mrg ;; A[] % B[] = A[]
1805 1.1 mrg ;; A[] / B[] = 0
1806 1.1 mrg ;; Thus, we can return immediately
1807 1.1 mrg rjmp 5f
1808 1.1 mrg
1809 1.1 mrg 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1810 1.1 mrg mov R_cnt, C7
1811 1.1 mrg
1812 1.1 mrg ;; Push of A7 is not needed because C7 is still 0
1813 1.1 mrg pop C7
1814 1.1 mrg clr C7
1815 1.1 mrg
1816 1.1 mrg #elif SPEED_DIV == 16
1817 1.1 mrg
1818 1.1 mrg ;; Compare shifted Dividend against Divisor
1819 1.1 mrg cp A7, B3
1820 1.1 mrg cpc C0, B4
1821 1.1 mrg cpc C1, B5
1822 1.1 mrg cpc C2, B6
1823 1.1 mrg cpc C3, B7
1824 1.1 mrg brcc 2f
1825 1.1 mrg
1826 1.1 mrg ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1827 1.1 mrg ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1828 1.1 mrg wmov C2,A6 $ wmov C0,A4
1829 1.1 mrg wmov A6,A2 $ wmov A4,A0
1830 1.1 mrg wmov A2,C6 $ wmov A0,C4
1831 1.1 mrg
1832 1.1 mrg ;; Set Bit Counter to 32
1833 1.1 mrg lsr R_cnt
1834 1.1 mrg 2:
1835 1.1 mrg #elif SPEED_DIV
1836 1.1 mrg #error SPEED_DIV = ?
1837 1.1 mrg #endif /* SPEED_DIV */
1838 1.1 mrg
1839 1.1 mrg ;; The very Division + Remainder Routine
1840 1.1 mrg
1841 1.1 mrg 3: ;; Left-shift Dividend...
1842 1.1 mrg lsl A0 $ rol A1 $ rol A2 $ rol A3
1843 1.1 mrg rol A4 $ rol A5 $ rol A6 $ rol A7
1844 1.1 mrg
1845 1.1 mrg ;; ...into Remainder
1846 1.1 mrg rol C0 $ rol C1 $ rol C2 $ rol C3
1847 1.1 mrg rol C4 $ rol C5 $ rol C6 $ rol C7
1848 1.1 mrg
1849 1.1 mrg ;; Compare Remainder and Divisor
1850 1.1 mrg CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1851 1.1 mrg cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1852 1.1 mrg
1853 1.1 mrg brcs 4f
1854 1.1 mrg
1855 1.1 mrg ;; Divisor fits into Remainder: Subtract it from Remainder...
1856 1.1 mrg SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1857 1.1 mrg sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1858 1.1 mrg
1859 1.1 mrg ;; ...and set according Bit in the upcoming Quotient
1860 1.1 mrg ;; The Bit will travel to its final Position
1861 1.1 mrg ori A0, 1
1862 1.1 mrg
1863 1.1 mrg 4: ;; This Bit is done
1864 1.1 mrg dec R_cnt
1865 1.1 mrg brne 3b
1866 1.1 mrg ;; __zero_reg__ is 0 again
1867 1.1 mrg
1868 1.1 mrg ;; T = 0: We are fine with the Quotient in A[]
1869 1.1 mrg ;; T = 1: Copy Remainder to A[]
1870 1.1 mrg 5: brtc 6f
1871 1.1 mrg wmov A0, C0
1872 1.1 mrg wmov A2, C2
1873 1.1 mrg wmov A4, C4
1874 1.1 mrg wmov A6, C6
1875 1.1 mrg ;; Move the Sign of the Result to SS.7
1876 1.1 mrg lsl SS
1877 1.1 mrg
1878 1.1 mrg 6: ret
1879 1.1 mrg
1880 1.1 mrg ENDF __udivmod64
1881 1.1 mrg #endif /* L_udivmod64 */
1882 1.1 mrg
1883 1.1 mrg
1884 1.1 mrg #if defined (L_divdi3)
1885 1.1 mrg
1886 1.1 mrg ;; R25:R18 = R24:R18 mod R17:R10
1887 1.1 mrg ;; Ordinary ABI-Function
1888 1.1 mrg
1889 1.1 mrg DEFUN __moddi3
1890 1.1 mrg set
1891 1.1 mrg rjmp __divdi3_moddi3
1892 1.1 mrg ENDF __moddi3
1893 1.1 mrg
1894 1.1 mrg ;; R25:R18 = R24:R18 div R17:R10
1895 1.1 mrg ;; Ordinary ABI-Function
1896 1.1 mrg
1897 1.1 mrg DEFUN __divdi3
1898 1.1 mrg clt
1899 1.1 mrg ENDF __divdi3
1900 1.1 mrg
1901 1.1 mrg DEFUN __divdi3_moddi3
1902 1.1 mrg #if SPEED_DIV
1903 1.1 mrg mov r31, A7
1904 1.1 mrg or r31, B7
1905 1.1 mrg brmi 0f
1906 1.1 mrg ;; Both Signs are 0: the following Complexitiy is not needed
1907 1.1 mrg XJMP __udivdi3_umoddi3
1908 1.1 mrg #endif /* SPEED_DIV */
1909 1.1 mrg
1910 1.1 mrg 0: ;; The Prologue
1911 1.1 mrg ;; Save 12 Registers: Y, 17...8
1912 1.1 mrg ;; No Frame needed
1913 1.1 mrg do_prologue_saves 12
1914 1.1 mrg
1915 1.1 mrg ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1916 1.1 mrg ;; SS.6 will contain the Sign of the Remainder (A.sign)
1917 1.1 mrg mov SS, A7
1918 1.1 mrg asr SS
1919 1.1 mrg ;; Adjust Dividend's Sign as needed
1920 1.1 mrg #if SPEED_DIV
1921 1.1 mrg ;; Compiling for Speed we know that at least one Sign must be < 0
1922 1.1 mrg ;; Thus, if A[] >= 0 then we know B[] < 0
1923 1.1 mrg brpl 22f
1924 1.1 mrg #else
1925 1.1 mrg brpl 21f
1926 1.1 mrg #endif /* SPEED_DIV */
1927 1.1 mrg
1928 1.1 mrg XCALL __negdi2
1929 1.1 mrg
1930 1.1 mrg ;; Adjust Divisor's Sign and SS.7 as needed
1931 1.1 mrg 21: tst B7
1932 1.1 mrg brpl 3f
1933 1.1 mrg 22: ldi NN, 1 << 7
1934 1.1 mrg eor SS, NN
1935 1.1 mrg
1936 1.1 mrg ldi NN, -1
1937 1.1 mrg com B4 $ com B5 $ com B6 $ com B7
1938 1.1 mrg $ com B1 $ com B2 $ com B3
1939 1.1 mrg NEG B0
1940 1.1 mrg $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
1941 1.1 mrg sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
1942 1.1 mrg
1943 1.1 mrg 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1944 1.1 mrg XCALL __udivmod64
1945 1.1 mrg
1946 1.1 mrg ;; Adjust Result's Sign
1947 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1948 1.1 mrg tst SS
1949 1.1 mrg brpl 4f
1950 1.1 mrg #else
1951 1.1 mrg sbrc SS, 7
1952 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
1953 1.1 mrg XCALL __negdi2
1954 1.1 mrg
1955 1.1 mrg 4: ;; Epilogue: Restore 12 Registers and return
1956 1.1 mrg do_epilogue_restores 12
1957 1.1 mrg
1958 1.1 mrg ENDF __divdi3_moddi3
1959 1.1 mrg
1960 1.1 mrg #endif /* L_divdi3 */
1961 1.1 mrg
1962 1.1 mrg #undef R_cnt
1963 1.1 mrg #undef SS
1964 1.1 mrg #undef NN
1965 1.1 mrg
1966 1.1 mrg .section .text.libgcc, "ax", @progbits
1967 1.1 mrg
1968 1.1 mrg #define TT __tmp_reg__
1969 1.1 mrg
1970 1.1 mrg #if defined (L_adddi3)
1971 1.1 mrg ;; (set (reg:DI 18)
1972 1.1 mrg ;; (plus:DI (reg:DI 18)
1973 1.1 mrg ;; (reg:DI 10)))
1974 1.1 mrg ;; Sets the V flag for signed overflow tests
1975 1.1 mrg ;; Sets the C flag for unsigned overflow tests
1976 1.1 mrg DEFUN __adddi3
1977 1.1 mrg ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
1978 1.1 mrg adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
1979 1.1 mrg ret
1980 1.1 mrg ENDF __adddi3
1981 1.1 mrg #endif /* L_adddi3 */
1982 1.1 mrg
1983 1.1 mrg #if defined (L_adddi3_s8)
1984 1.1 mrg ;; (set (reg:DI 18)
1985 1.1 mrg ;; (plus:DI (reg:DI 18)
1986 1.1 mrg ;; (sign_extend:SI (reg:QI 26))))
1987 1.1 mrg ;; Sets the V flag for signed overflow tests
1988 1.1 mrg ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
1989 1.1 mrg DEFUN __adddi3_s8
1990 1.1 mrg clr TT
1991 1.1 mrg sbrc r26, 7
1992 1.1 mrg com TT
1993 1.1 mrg ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
1994 1.1 mrg adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
1995 1.1 mrg ret
1996 1.1 mrg ENDF __adddi3_s8
1997 1.1 mrg #endif /* L_adddi3_s8 */
1998 1.1 mrg
1999 1.1 mrg #if defined (L_subdi3)
2000 1.1 mrg ;; (set (reg:DI 18)
2001 1.1 mrg ;; (minus:DI (reg:DI 18)
2002 1.1 mrg ;; (reg:DI 10)))
2003 1.1 mrg ;; Sets the V flag for signed overflow tests
2004 1.1 mrg ;; Sets the C flag for unsigned overflow tests
2005 1.1 mrg DEFUN __subdi3
2006 1.1 mrg SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2007 1.1 mrg sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2008 1.1 mrg ret
2009 1.1 mrg ENDF __subdi3
2010 1.1 mrg #endif /* L_subdi3 */
2011 1.1 mrg
2012 1.1 mrg #if defined (L_cmpdi2)
2013 1.1 mrg ;; (set (cc0)
2014 1.1 mrg ;; (compare (reg:DI 18)
2015 1.1 mrg ;; (reg:DI 10)))
2016 1.1 mrg DEFUN __cmpdi2
2017 1.1 mrg CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2018 1.1 mrg cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2019 1.1 mrg ret
2020 1.1 mrg ENDF __cmpdi2
2021 1.1 mrg #endif /* L_cmpdi2 */
2022 1.1 mrg
2023 1.1 mrg #if defined (L_cmpdi2_s8)
2024 1.1 mrg ;; (set (cc0)
2025 1.1 mrg ;; (compare (reg:DI 18)
2026 1.1 mrg ;; (sign_extend:SI (reg:QI 26))))
2027 1.1 mrg DEFUN __cmpdi2_s8
2028 1.1 mrg clr TT
2029 1.1 mrg sbrc r26, 7
2030 1.1 mrg com TT
2031 1.1 mrg CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2032 1.1 mrg cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2033 1.1 mrg ret
2034 1.1 mrg ENDF __cmpdi2_s8
2035 1.1 mrg #endif /* L_cmpdi2_s8 */
2036 1.1 mrg
2037 1.1 mrg #if defined (L_negdi2)
2038 1.1 mrg ;; (set (reg:DI 18)
2039 1.1 mrg ;; (neg:DI (reg:DI 18)))
2040 1.1 mrg ;; Sets the V flag for signed overflow tests
2041 1.1 mrg DEFUN __negdi2
2042 1.1 mrg
2043 1.1 mrg com A4 $ com A5 $ com A6 $ com A7
2044 1.1 mrg $ com A1 $ com A2 $ com A3
2045 1.1 mrg NEG A0
2046 1.1 mrg $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2047 1.1 mrg sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2048 1.1 mrg ret
2049 1.1 mrg
2050 1.1 mrg ENDF __negdi2
2051 1.1 mrg #endif /* L_negdi2 */
2052 1.1 mrg
2053 1.1 mrg #undef TT
2054 1.1 mrg
2055 1.1 mrg #undef C7
2056 1.1 mrg #undef C6
2057 1.1 mrg #undef C5
2058 1.1 mrg #undef C4
2059 1.1 mrg #undef C3
2060 1.1 mrg #undef C2
2061 1.1 mrg #undef C1
2062 1.1 mrg #undef C0
2063 1.1 mrg
2064 1.1 mrg #undef B7
2065 1.1 mrg #undef B6
2066 1.1 mrg #undef B5
2067 1.1 mrg #undef B4
2068 1.1 mrg #undef B3
2069 1.1 mrg #undef B2
2070 1.1 mrg #undef B1
2071 1.1 mrg #undef B0
2072 1.1 mrg
2073 1.1 mrg #undef A7
2074 1.1 mrg #undef A6
2075 1.1 mrg #undef A5
2076 1.1 mrg #undef A4
2077 1.1 mrg #undef A3
2078 1.1 mrg #undef A2
2079 1.1 mrg #undef A1
2080 1.1 mrg #undef A0
2081 1.1 mrg
2082 1.1 mrg
2083 1.1 mrg .section .text.libgcc.prologue, "ax", @progbits
2085 1.1 mrg
2086 1.1 mrg /**********************************
2087 1.1 mrg * This is a prologue subroutine
2088 1.1 mrg **********************************/
2089 1.1 mrg #if defined (L_prologue)
2090 1.1 mrg
2091 1.1 mrg ;; This function does not clobber T-flag; 64-bit division relies on it
2092 1.1 mrg DEFUN __prologue_saves__
2093 1.1 mrg push r2
2094 1.1 mrg push r3
2095 1.1 mrg push r4
2096 1.1 mrg push r5
2097 1.1 mrg push r6
2098 1.1 mrg push r7
2099 1.1 mrg push r8
2100 1.1 mrg push r9
2101 1.1 mrg push r10
2102 1.1 mrg push r11
2103 1.1 mrg push r12
2104 1.1 mrg push r13
2105 1.1 mrg push r14
2106 1.1 mrg push r15
2107 1.1 mrg push r16
2108 1.1 mrg push r17
2109 1.1 mrg push r28
2110 1.1 mrg push r29
2111 1.1 mrg #if !defined (__AVR_HAVE_SPH__)
2112 1.1 mrg in r28,__SP_L__
2113 1.1 mrg sub r28,r26
2114 1.1 mrg out __SP_L__,r28
2115 1.1 mrg clr r29
2116 1.1 mrg #elif defined (__AVR_XMEGA__)
2117 1.1 mrg in r28,__SP_L__
2118 1.1 mrg in r29,__SP_H__
2119 1.1 mrg sub r28,r26
2120 1.1 mrg sbc r29,r27
2121 1.1 mrg out __SP_L__,r28
2122 1.1 mrg out __SP_H__,r29
2123 1.1 mrg #else
2124 1.1 mrg in r28,__SP_L__
2125 1.1 mrg in r29,__SP_H__
2126 1.1 mrg sub r28,r26
2127 1.1 mrg sbc r29,r27
2128 1.1 mrg in __tmp_reg__,__SREG__
2129 1.1 mrg cli
2130 1.1 mrg out __SP_H__,r29
2131 1.1 mrg out __SREG__,__tmp_reg__
2132 1.1 mrg out __SP_L__,r28
2133 1.1 mrg #endif /* #SP = 8/16 */
2134 1.1 mrg
2135 1.1 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
2136 1.1 mrg eijmp
2137 1.1 mrg #else
2138 1.1 mrg ijmp
2139 1.1 mrg #endif
2140 1.1 mrg
2141 1.1 mrg ENDF __prologue_saves__
2142 1.1 mrg #endif /* defined (L_prologue) */
2143 1.1 mrg
2144 1.1 mrg /*
2145 1.1 mrg * This is an epilogue subroutine
2146 1.1 mrg */
2147 1.1 mrg #if defined (L_epilogue)
2148 1.1 mrg
2149 1.1 mrg DEFUN __epilogue_restores__
2150 1.1 mrg ldd r2,Y+18
2151 1.1 mrg ldd r3,Y+17
2152 1.1 mrg ldd r4,Y+16
2153 1.1 mrg ldd r5,Y+15
2154 1.1 mrg ldd r6,Y+14
2155 1.1 mrg ldd r7,Y+13
2156 1.1 mrg ldd r8,Y+12
2157 1.1 mrg ldd r9,Y+11
2158 1.1 mrg ldd r10,Y+10
2159 1.1 mrg ldd r11,Y+9
2160 1.1 mrg ldd r12,Y+8
2161 1.1 mrg ldd r13,Y+7
2162 1.1 mrg ldd r14,Y+6
2163 1.1 mrg ldd r15,Y+5
2164 1.1 mrg ldd r16,Y+4
2165 1.1 mrg ldd r17,Y+3
2166 1.1 mrg ldd r26,Y+2
2167 1.1 mrg #if !defined (__AVR_HAVE_SPH__)
2168 1.1 mrg ldd r29,Y+1
2169 1.1 mrg add r28,r30
2170 1.1 mrg out __SP_L__,r28
2171 1.1 mrg mov r28, r26
2172 1.1 mrg #elif defined (__AVR_XMEGA__)
2173 1.1 mrg ldd r27,Y+1
2174 1.1 mrg add r28,r30
2175 1.1 mrg adc r29,__zero_reg__
2176 1.1 mrg out __SP_L__,r28
2177 1.1 mrg out __SP_H__,r29
2178 1.1 mrg wmov 28, 26
2179 1.1 mrg #else
2180 1.1 mrg ldd r27,Y+1
2181 1.1 mrg add r28,r30
2182 1.1 mrg adc r29,__zero_reg__
2183 1.1 mrg in __tmp_reg__,__SREG__
2184 1.1 mrg cli
2185 1.1 mrg out __SP_H__,r29
2186 1.1 mrg out __SREG__,__tmp_reg__
2187 1.1 mrg out __SP_L__,r28
2188 1.1 mrg mov_l r28, r26
2189 1.1 mrg mov_h r29, r27
2190 1.1 mrg #endif /* #SP = 8/16 */
2191 1.1 mrg ret
2192 1.1 mrg ENDF __epilogue_restores__
2193 1.1 mrg #endif /* defined (L_epilogue) */
2194 1.1 mrg
2195 1.1 mrg #ifdef L_exit
2196 1.1 mrg .section .fini9,"ax",@progbits
2197 1.1 mrg DEFUN _exit
2198 1.1 mrg .weak exit
2199 1.1 mrg exit:
2200 1.1 mrg ENDF _exit
2201 1.1 mrg
2202 1.1 mrg /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2203 1.1 mrg
2204 1.1 mrg .section .fini0,"ax",@progbits
2205 1.1 mrg cli
2206 1.1 mrg __stop_program:
2207 1.1 mrg rjmp __stop_program
2208 1.1 mrg #endif /* defined (L_exit) */
2209 1.1 mrg
2210 1.1 mrg #ifdef L_cleanup
2211 1.1 mrg .weak _cleanup
2212 1.1 mrg .func _cleanup
2213 1.1 mrg _cleanup:
2214 1.1 mrg ret
2215 1.1 mrg .endfunc
2216 1.1 mrg #endif /* defined (L_cleanup) */
2217 1.1 mrg
2218 1.1 mrg
2219 1.1 mrg .section .text.libgcc, "ax", @progbits
2221 1.1 mrg
2222 1.1 mrg #ifdef L_tablejump
2223 1.1 mrg DEFUN __tablejump2__
2224 1.1 mrg lsl r30
2225 1.1 mrg rol r31
2226 1.1 mrg ;; FALLTHRU
2227 1.1 mrg ENDF __tablejump2__
2228 1.1 mrg
2229 1.1 mrg DEFUN __tablejump__
2230 1.1 mrg #if defined (__AVR_HAVE_LPMX__)
2231 1.1 mrg lpm __tmp_reg__, Z+
2232 1.1 mrg lpm r31, Z
2233 1.1 mrg mov r30, __tmp_reg__
2234 1.1 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
2235 1.1 mrg eijmp
2236 1.1 mrg #else
2237 1.1 mrg ijmp
2238 1.1 mrg #endif
2239 1.1 mrg
2240 1.1 mrg #else /* !HAVE_LPMX */
2241 1.1 mrg lpm
2242 1.1 mrg adiw r30, 1
2243 1.1 mrg push r0
2244 1.1 mrg lpm
2245 1.1 mrg push r0
2246 1.1 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
2247 1.1 mrg in __tmp_reg__, __EIND__
2248 1.1 mrg push __tmp_reg__
2249 1.1 mrg #endif
2250 1.1 mrg ret
2251 1.1 mrg #endif /* !HAVE_LPMX */
2252 1.1 mrg ENDF __tablejump__
2253 1.1 mrg #endif /* defined (L_tablejump) */
2254 1.1 mrg
2255 1.1 mrg #ifdef L_copy_data
2256 1.1 mrg .section .init4,"ax",@progbits
2257 1.1 mrg DEFUN __do_copy_data
2258 1.1 mrg #if defined(__AVR_HAVE_ELPMX__)
2259 1.1 mrg ldi r17, hi8(__data_end)
2260 1.1 mrg ldi r26, lo8(__data_start)
2261 1.1 mrg ldi r27, hi8(__data_start)
2262 1.1 mrg ldi r30, lo8(__data_load_start)
2263 1.1 mrg ldi r31, hi8(__data_load_start)
2264 1.1 mrg ldi r16, hh8(__data_load_start)
2265 1.1 mrg out __RAMPZ__, r16
2266 1.1 mrg rjmp .L__do_copy_data_start
2267 1.1 mrg .L__do_copy_data_loop:
2268 1.1 mrg elpm r0, Z+
2269 1.1 mrg st X+, r0
2270 1.1 mrg .L__do_copy_data_start:
2271 1.1 mrg cpi r26, lo8(__data_end)
2272 1.1 mrg cpc r27, r17
2273 1.1 mrg brne .L__do_copy_data_loop
2274 1.1 mrg #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2275 1.1 mrg ldi r17, hi8(__data_end)
2276 1.1 mrg ldi r26, lo8(__data_start)
2277 1.1 mrg ldi r27, hi8(__data_start)
2278 1.1 mrg ldi r30, lo8(__data_load_start)
2279 1.1 mrg ldi r31, hi8(__data_load_start)
2280 1.1 mrg ldi r16, hh8(__data_load_start - 0x10000)
2281 1.1 mrg .L__do_copy_data_carry:
2282 1.1 mrg inc r16
2283 1.1 mrg out __RAMPZ__, r16
2284 1.1 mrg rjmp .L__do_copy_data_start
2285 1.1 mrg .L__do_copy_data_loop:
2286 1.1 mrg elpm
2287 1.1 mrg st X+, r0
2288 1.1 mrg adiw r30, 1
2289 1.1 mrg brcs .L__do_copy_data_carry
2290 1.1 mrg .L__do_copy_data_start:
2291 1.1 mrg cpi r26, lo8(__data_end)
2292 1.1 mrg cpc r27, r17
2293 1.1 mrg brne .L__do_copy_data_loop
2294 1.1 mrg #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2295 1.1 mrg ldi r17, hi8(__data_end)
2296 1.1 mrg ldi r26, lo8(__data_start)
2297 1.1 mrg ldi r27, hi8(__data_start)
2298 1.1 mrg ldi r30, lo8(__data_load_start)
2299 1.1 mrg ldi r31, hi8(__data_load_start)
2300 1.1 mrg rjmp .L__do_copy_data_start
2301 1.1 mrg .L__do_copy_data_loop:
2302 1.1 mrg #if defined (__AVR_HAVE_LPMX__)
2303 1.1 mrg lpm r0, Z+
2304 1.1 mrg #else
2305 1.1 mrg lpm
2306 1.1 mrg adiw r30, 1
2307 1.1 mrg #endif
2308 1.1 mrg st X+, r0
2309 1.1 mrg .L__do_copy_data_start:
2310 1.1 mrg cpi r26, lo8(__data_end)
2311 1.1 mrg cpc r27, r17
2312 1.1 mrg brne .L__do_copy_data_loop
2313 1.1 mrg #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2314 1.1 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2315 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2316 1.1 mrg out __RAMPZ__, __zero_reg__
2317 1.1 mrg #endif /* ELPM && RAMPD */
2318 1.1 mrg ENDF __do_copy_data
2319 1.1 mrg #endif /* L_copy_data */
2320 1.1 mrg
2321 1.1 mrg /* __do_clear_bss is only necessary if there is anything in .bss section. */
2322 1.1 mrg
2323 1.1 mrg #ifdef L_clear_bss
2324 1.1 mrg .section .init4,"ax",@progbits
2325 1.1 mrg DEFUN __do_clear_bss
2326 1.1 mrg ldi r17, hi8(__bss_end)
2327 1.1 mrg ldi r26, lo8(__bss_start)
2328 1.1 mrg ldi r27, hi8(__bss_start)
2329 1.1 mrg rjmp .do_clear_bss_start
2330 1.1 mrg .do_clear_bss_loop:
2331 1.1 mrg st X+, __zero_reg__
2332 1.1 mrg .do_clear_bss_start:
2333 1.1 mrg cpi r26, lo8(__bss_end)
2334 1.1 mrg cpc r27, r17
2335 1.1 mrg brne .do_clear_bss_loop
2336 1.1 mrg ENDF __do_clear_bss
2337 1.1 mrg #endif /* L_clear_bss */
2338 1.1 mrg
2339 1.1 mrg /* __do_global_ctors and __do_global_dtors are only necessary
2340 1.1 mrg if there are any constructors/destructors. */
2341 1.1 mrg
2342 1.1 mrg #ifdef L_ctors
2343 1.1 mrg .section .init6,"ax",@progbits
2344 1.1 mrg DEFUN __do_global_ctors
2345 1.1 mrg #if defined(__AVR_HAVE_ELPM__)
2346 1.1 mrg ldi r17, hi8(__ctors_start)
2347 1.1 mrg ldi r28, lo8(__ctors_end)
2348 1.1 mrg ldi r29, hi8(__ctors_end)
2349 1.1 mrg ldi r16, hh8(__ctors_end)
2350 1.1 mrg rjmp .L__do_global_ctors_start
2351 1.1 mrg .L__do_global_ctors_loop:
2352 1.1 mrg sbiw r28, 2
2353 1.1 mrg sbc r16, __zero_reg__
2354 1.1 mrg mov_h r31, r29
2355 1.1 mrg mov_l r30, r28
2356 1.1 mrg out __RAMPZ__, r16
2357 1.1 mrg XCALL __tablejump_elpm__
2358 1.1 mrg .L__do_global_ctors_start:
2359 1.1 mrg cpi r28, lo8(__ctors_start)
2360 1.1 mrg cpc r29, r17
2361 1.1 mrg ldi r24, hh8(__ctors_start)
2362 1.1 mrg cpc r16, r24
2363 1.1 mrg brne .L__do_global_ctors_loop
2364 1.1 mrg #else
2365 1.1 mrg ldi r17, hi8(__ctors_start)
2366 1.1 mrg ldi r28, lo8(__ctors_end)
2367 1.1 mrg ldi r29, hi8(__ctors_end)
2368 1.1 mrg rjmp .L__do_global_ctors_start
2369 1.1 mrg .L__do_global_ctors_loop:
2370 1.1 mrg sbiw r28, 2
2371 1.1 mrg mov_h r31, r29
2372 1.1 mrg mov_l r30, r28
2373 1.1 mrg XCALL __tablejump__
2374 1.1 mrg .L__do_global_ctors_start:
2375 1.1 mrg cpi r28, lo8(__ctors_start)
2376 1.1 mrg cpc r29, r17
2377 1.1 mrg brne .L__do_global_ctors_loop
2378 1.1 mrg #endif /* defined(__AVR_HAVE_ELPM__) */
2379 1.1 mrg ENDF __do_global_ctors
2380 1.1 mrg #endif /* L_ctors */
2381 1.1 mrg
2382 1.1 mrg #ifdef L_dtors
2383 1.1 mrg .section .fini6,"ax",@progbits
2384 1.1 mrg DEFUN __do_global_dtors
2385 1.1 mrg #if defined(__AVR_HAVE_ELPM__)
2386 1.1 mrg ldi r17, hi8(__dtors_end)
2387 1.1 mrg ldi r28, lo8(__dtors_start)
2388 1.1 mrg ldi r29, hi8(__dtors_start)
2389 1.1 mrg ldi r16, hh8(__dtors_start)
2390 1.1 mrg rjmp .L__do_global_dtors_start
2391 1.1 mrg .L__do_global_dtors_loop:
2392 1.1 mrg sbiw r28, 2
2393 1.1 mrg sbc r16, __zero_reg__
2394 1.1 mrg mov_h r31, r29
2395 1.1 mrg mov_l r30, r28
2396 1.1 mrg out __RAMPZ__, r16
2397 1.1 mrg XCALL __tablejump_elpm__
2398 1.1 mrg .L__do_global_dtors_start:
2399 1.1 mrg cpi r28, lo8(__dtors_end)
2400 1.1 mrg cpc r29, r17
2401 1.1 mrg ldi r24, hh8(__dtors_end)
2402 1.1 mrg cpc r16, r24
2403 1.1 mrg brne .L__do_global_dtors_loop
2404 1.1 mrg #else
2405 1.1 mrg ldi r17, hi8(__dtors_end)
2406 1.1 mrg ldi r28, lo8(__dtors_start)
2407 1.1 mrg ldi r29, hi8(__dtors_start)
2408 1.1 mrg rjmp .L__do_global_dtors_start
2409 1.1 mrg .L__do_global_dtors_loop:
2410 1.1 mrg mov_h r31, r29
2411 1.1 mrg mov_l r30, r28
2412 1.1 mrg XCALL __tablejump__
2413 1.1 mrg adiw r28, 2
2414 1.1 mrg .L__do_global_dtors_start:
2415 1.1 mrg cpi r28, lo8(__dtors_end)
2416 1.1 mrg cpc r29, r17
2417 1.1 mrg brne .L__do_global_dtors_loop
2418 1.1 mrg #endif /* defined(__AVR_HAVE_ELPM__) */
2419 1.1 mrg ENDF __do_global_dtors
2420 1.1 mrg #endif /* L_dtors */
2421 1.1 mrg
2422 1.1 mrg .section .text.libgcc, "ax", @progbits
2423 1.1 mrg
2424 1.1 mrg #ifdef L_tablejump_elpm
2425 1.1 mrg DEFUN __tablejump_elpm__
2426 1.1 mrg #if defined (__AVR_HAVE_ELPMX__)
2427 1.1 mrg elpm __tmp_reg__, Z+
2428 1.1 mrg elpm r31, Z
2429 1.1 mrg mov r30, __tmp_reg__
2430 1.1 mrg #if defined (__AVR_HAVE_RAMPD__)
2431 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2432 1.1 mrg out __RAMPZ__, __zero_reg__
2433 1.1 mrg #endif /* RAMPD */
2434 1.1 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
2435 1.1 mrg eijmp
2436 1.1 mrg #else
2437 1.1 mrg ijmp
2438 1.1 mrg #endif
2439 1.1 mrg
2440 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2441 1.1 mrg elpm
2442 1.1 mrg adiw r30, 1
2443 1.1 mrg push r0
2444 1.1 mrg elpm
2445 1.1 mrg push r0
2446 1.1 mrg #if defined (__AVR_HAVE_EIJMP_EICALL__)
2447 1.1 mrg in __tmp_reg__, __EIND__
2448 1.1 mrg push __tmp_reg__
2449 1.1 mrg #endif
2450 1.1 mrg ret
2451 1.1 mrg #endif
2452 1.1 mrg ENDF __tablejump_elpm__
2453 1.1 mrg #endif /* defined (L_tablejump_elpm) */
2454 1.1 mrg
2455 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2456 1.1 mrg ;; Loading n bytes from Flash; n = 3,4
2457 1.1 mrg ;; R22... = Flash[Z]
2458 1.1 mrg ;; Clobbers: __tmp_reg__
2459 1.1 mrg
2460 1.1 mrg #if (defined (L_load_3) \
2461 1.1 mrg || defined (L_load_4)) \
2462 1.1 mrg && !defined (__AVR_HAVE_LPMX__)
2463 1.1 mrg
2464 1.1 mrg ;; Destination
2465 1.1 mrg #define D0 22
2466 1.1 mrg #define D1 D0+1
2467 1.1 mrg #define D2 D0+2
2468 1.1 mrg #define D3 D0+3
2469 1.1 mrg
2470 1.1 mrg .macro .load dest, n
2471 1.1 mrg lpm
2472 1.1 mrg mov \dest, r0
2473 1.1 mrg .if \dest != D0+\n-1
2474 1.1 mrg adiw r30, 1
2475 1.1 mrg .else
2476 1.1 mrg sbiw r30, \n-1
2477 1.1 mrg .endif
2478 1.1 mrg .endm
2479 1.1 mrg
2480 1.1 mrg #if defined (L_load_3)
2481 1.1 mrg DEFUN __load_3
2482 1.1 mrg push D3
2483 1.1 mrg XCALL __load_4
2484 1.1 mrg pop D3
2485 1.1 mrg ret
2486 1.1 mrg ENDF __load_3
2487 1.1 mrg #endif /* L_load_3 */
2488 1.1 mrg
2489 1.1 mrg #if defined (L_load_4)
2490 1.1 mrg DEFUN __load_4
2491 1.1 mrg .load D0, 4
2492 1.1 mrg .load D1, 4
2493 1.1 mrg .load D2, 4
2494 1.1 mrg .load D3, 4
2495 1.1 mrg ret
2496 1.1 mrg ENDF __load_4
2497 1.1 mrg #endif /* L_load_4 */
2498 1.1 mrg
2499 1.1 mrg #endif /* L_load_3 || L_load_3 */
2500 1.1 mrg
2501 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2502 1.1 mrg ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2503 1.1 mrg ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2504 1.1 mrg ;; Clobbers: __tmp_reg__, R21, R30, R31
2505 1.1 mrg
2506 1.1 mrg #if (defined (L_xload_1) \
2507 1.1 mrg || defined (L_xload_2) \
2508 1.1 mrg || defined (L_xload_3) \
2509 1.1 mrg || defined (L_xload_4))
2510 1.1 mrg
2511 1.1 mrg ;; Destination
2512 1.1 mrg #define D0 22
2513 1.1 mrg #define D1 D0+1
2514 1.1 mrg #define D2 D0+2
2515 1.1 mrg #define D3 D0+3
2516 1.1 mrg
2517 1.1 mrg ;; Register containing bits 16+ of the address
2518 1.1 mrg
2519 1.1 mrg #define HHI8 21
2520 1.1 mrg
2521 1.1 mrg .macro .xload dest, n
2522 1.1 mrg #if defined (__AVR_HAVE_ELPMX__)
2523 1.1 mrg elpm \dest, Z+
2524 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2525 1.1 mrg elpm
2526 1.1 mrg mov \dest, r0
2527 1.1 mrg .if \dest != D0+\n-1
2528 1.1 mrg adiw r30, 1
2529 1.1 mrg adc HHI8, __zero_reg__
2530 1.1 mrg out __RAMPZ__, HHI8
2531 1.1 mrg .endif
2532 1.1 mrg #elif defined (__AVR_HAVE_LPMX__)
2533 1.1 mrg lpm \dest, Z+
2534 1.1 mrg #else
2535 1.1 mrg lpm
2536 1.1 mrg mov \dest, r0
2537 1.1 mrg .if \dest != D0+\n-1
2538 1.1 mrg adiw r30, 1
2539 1.1 mrg .endif
2540 1.1 mrg #endif
2541 1.1 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2542 1.1 mrg .if \dest == D0+\n-1
2543 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2544 1.1 mrg out __RAMPZ__, __zero_reg__
2545 1.1 mrg .endif
2546 1.1 mrg #endif
2547 1.1 mrg .endm ; .xload
2548 1.1 mrg
2549 1.1 mrg #if defined (L_xload_1)
2550 1.1 mrg DEFUN __xload_1
2551 1.1 mrg #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2552 1.1 mrg sbrc HHI8, 7
2553 1.1 mrg ld D0, Z
2554 1.1 mrg sbrs HHI8, 7
2555 1.1 mrg lpm D0, Z
2556 1.1 mrg ret
2557 1.1 mrg #else
2558 1.1 mrg sbrc HHI8, 7
2559 1.1 mrg rjmp 1f
2560 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2561 1.1 mrg out __RAMPZ__, HHI8
2562 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2563 1.1 mrg .xload D0, 1
2564 1.1 mrg ret
2565 1.1 mrg 1: ld D0, Z
2566 1.1 mrg ret
2567 1.1 mrg #endif /* LPMx && ! ELPM */
2568 1.1 mrg ENDF __xload_1
2569 1.1 mrg #endif /* L_xload_1 */
2570 1.1 mrg
2571 1.1 mrg #if defined (L_xload_2)
2572 1.1 mrg DEFUN __xload_2
2573 1.1 mrg sbrc HHI8, 7
2574 1.1 mrg rjmp 1f
2575 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2576 1.1 mrg out __RAMPZ__, HHI8
2577 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2578 1.1 mrg .xload D0, 2
2579 1.1 mrg .xload D1, 2
2580 1.1 mrg ret
2581 1.1 mrg 1: ld D0, Z+
2582 1.1 mrg ld D1, Z+
2583 1.1 mrg ret
2584 1.1 mrg ENDF __xload_2
2585 1.1 mrg #endif /* L_xload_2 */
2586 1.1 mrg
2587 1.1 mrg #if defined (L_xload_3)
2588 1.1 mrg DEFUN __xload_3
2589 1.1 mrg sbrc HHI8, 7
2590 1.1 mrg rjmp 1f
2591 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2592 1.1 mrg out __RAMPZ__, HHI8
2593 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2594 1.1 mrg .xload D0, 3
2595 1.1 mrg .xload D1, 3
2596 1.1 mrg .xload D2, 3
2597 1.1 mrg ret
2598 1.1 mrg 1: ld D0, Z+
2599 1.1 mrg ld D1, Z+
2600 1.1 mrg ld D2, Z+
2601 1.1 mrg ret
2602 1.1 mrg ENDF __xload_3
2603 1.1 mrg #endif /* L_xload_3 */
2604 1.1 mrg
2605 1.1 mrg #if defined (L_xload_4)
2606 1.1 mrg DEFUN __xload_4
2607 1.1 mrg sbrc HHI8, 7
2608 1.1 mrg rjmp 1f
2609 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2610 1.1 mrg out __RAMPZ__, HHI8
2611 1.1 mrg #endif /* __AVR_HAVE_ELPM__ */
2612 1.1 mrg .xload D0, 4
2613 1.1 mrg .xload D1, 4
2614 1.1 mrg .xload D2, 4
2615 1.1 mrg .xload D3, 4
2616 1.1 mrg ret
2617 1.1 mrg 1: ld D0, Z+
2618 1.1 mrg ld D1, Z+
2619 1.1 mrg ld D2, Z+
2620 1.1 mrg ld D3, Z+
2621 1.1 mrg ret
2622 1.1 mrg ENDF __xload_4
2623 1.1 mrg #endif /* L_xload_4 */
2624 1.1 mrg
2625 1.1 mrg #endif /* L_xload_{1|2|3|4} */
2626 1.1 mrg
2627 1.1 mrg ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2628 1.1 mrg ;; memcopy from Address Space __pgmx to RAM
2629 1.1 mrg ;; R23:Z = Source Address
2630 1.1 mrg ;; X = Destination Address
2631 1.1 mrg ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2632 1.1 mrg
2633 1.1 mrg #if defined (L_movmemx)
2634 1.1 mrg
2635 1.1 mrg #define HHI8 23
2636 1.1 mrg #define LOOP 24
2637 1.1 mrg
2638 1.1 mrg DEFUN __movmemx_qi
2639 1.1 mrg ;; #Bytes to copy fity in 8 Bits (1..255)
2640 1.1 mrg ;; Zero-extend Loop Counter
2641 1.1 mrg clr LOOP+1
2642 1.1 mrg ;; FALLTHRU
2643 1.1 mrg ENDF __movmemx_qi
2644 1.1 mrg
2645 1.1 mrg DEFUN __movmemx_hi
2646 1.1 mrg
2647 1.1 mrg ;; Read from where?
2648 1.1 mrg sbrc HHI8, 7
2649 1.1 mrg rjmp 1f
2650 1.1 mrg
2651 1.1 mrg ;; Read from Flash
2652 1.1 mrg
2653 1.1 mrg #if defined (__AVR_HAVE_ELPM__)
2654 1.1 mrg out __RAMPZ__, HHI8
2655 1.1 mrg #endif
2656 1.1 mrg
2657 1.1 mrg 0: ;; Load 1 Byte from Flash...
2658 1.1 mrg
2659 1.1 mrg #if defined (__AVR_HAVE_ELPMX__)
2660 1.1 mrg elpm r0, Z+
2661 1.1 mrg #elif defined (__AVR_HAVE_ELPM__)
2662 1.1 mrg elpm
2663 1.1 mrg adiw r30, 1
2664 1.1 mrg adc HHI8, __zero_reg__
2665 1.1 mrg out __RAMPZ__, HHI8
2666 1.1 mrg #elif defined (__AVR_HAVE_LPMX__)
2667 1.1 mrg lpm r0, Z+
2668 1.1 mrg #else
2669 1.1 mrg lpm
2670 1.1 mrg adiw r30, 1
2671 1.1 mrg #endif
2672 1.1 mrg
2673 1.1 mrg ;; ...and store that Byte to RAM Destination
2674 1.1 mrg st X+, r0
2675 1.1 mrg sbiw LOOP, 1
2676 1.1 mrg brne 0b
2677 1.1 mrg #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2678 1.1 mrg ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2679 1.1 mrg out __RAMPZ__, __zero_reg__
2680 1.1 mrg #endif /* ELPM && RAMPD */
2681 1.1 mrg ret
2682 1.1 mrg
2683 1.1 mrg ;; Read from RAM
2684 1.1 mrg
2685 1.1 mrg 1: ;; Read 1 Byte from RAM...
2686 1.1 mrg ld r0, Z+
2687 1.1 mrg ;; and store that Byte to RAM Destination
2688 1.1 mrg st X+, r0
2689 1.1 mrg sbiw LOOP, 1
2690 1.1 mrg brne 1b
2691 1.1 mrg ret
2692 1.1 mrg ENDF __movmemx_hi
2693 1.1 mrg
2694 1.1 mrg #undef HHI8
2695 1.1 mrg #undef LOOP
2696 1.1 mrg
2697 1.1 mrg #endif /* L_movmemx */
2698 1.1 mrg
2699 1.1 mrg
2700 1.1 mrg .section .text.libgcc.builtins, "ax", @progbits
2702 1.1 mrg
2703 1.1 mrg /**********************************
2704 1.1 mrg * Find first set Bit (ffs)
2705 1.1 mrg **********************************/
2706 1.1 mrg
2707 1.1 mrg #if defined (L_ffssi2)
2708 1.1 mrg ;; find first set bit
2709 1.1 mrg ;; r25:r24 = ffs32 (r25:r22)
2710 1.1 mrg ;; clobbers: r22, r26
2711 1.1 mrg DEFUN __ffssi2
2712 1.1 mrg clr r26
2713 1.1 mrg tst r22
2714 1.1 mrg brne 1f
2715 1.1 mrg subi r26, -8
2716 1.1 mrg or r22, r23
2717 1.1 mrg brne 1f
2718 1.1 mrg subi r26, -8
2719 1.1 mrg or r22, r24
2720 1.1 mrg brne 1f
2721 1.1 mrg subi r26, -8
2722 1.1 mrg or r22, r25
2723 1.1 mrg brne 1f
2724 1.1 mrg ret
2725 1.1 mrg 1: mov r24, r22
2726 1.1 mrg XJMP __loop_ffsqi2
2727 1.1 mrg ENDF __ffssi2
2728 1.1 mrg #endif /* defined (L_ffssi2) */
2729 1.1 mrg
2730 1.1 mrg #if defined (L_ffshi2)
2731 1.1 mrg ;; find first set bit
2732 1.1 mrg ;; r25:r24 = ffs16 (r25:r24)
2733 1.1 mrg ;; clobbers: r26
2734 1.1 mrg DEFUN __ffshi2
2735 1.1 mrg clr r26
2736 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2737 1.1 mrg ;; Some cores have problem skipping 2-word instruction
2738 1.1 mrg tst r24
2739 1.1 mrg breq 2f
2740 1.1 mrg #else
2741 1.1 mrg cpse r24, __zero_reg__
2742 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
2743 1.1 mrg 1: XJMP __loop_ffsqi2
2744 1.1 mrg 2: ldi r26, 8
2745 1.1 mrg or r24, r25
2746 1.1 mrg brne 1b
2747 1.1 mrg ret
2748 1.1 mrg ENDF __ffshi2
2749 1.1 mrg #endif /* defined (L_ffshi2) */
2750 1.1 mrg
2751 1.1 mrg #if defined (L_loop_ffsqi2)
2752 1.1 mrg ;; Helper for ffshi2, ffssi2
2753 1.1 mrg ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2754 1.1 mrg ;; r24 must be != 0
2755 1.1 mrg ;; clobbers: r26
2756 1.1 mrg DEFUN __loop_ffsqi2
2757 1.1 mrg inc r26
2758 1.1 mrg lsr r24
2759 1.1 mrg brcc __loop_ffsqi2
2760 1.1 mrg mov r24, r26
2761 1.1 mrg clr r25
2762 1.1 mrg ret
2763 1.1 mrg ENDF __loop_ffsqi2
2764 1.1 mrg #endif /* defined (L_loop_ffsqi2) */
2765 1.1 mrg
2766 1.1 mrg
2767 1.1 mrg /**********************************
2769 1.1 mrg * Count trailing Zeros (ctz)
2770 1.1 mrg **********************************/
2771 1.1 mrg
2772 1.1 mrg #if defined (L_ctzsi2)
2773 1.1 mrg ;; count trailing zeros
2774 1.1 mrg ;; r25:r24 = ctz32 (r25:r22)
2775 1.1 mrg ;; clobbers: r26, r22
2776 1.1 mrg ;; ctz(0) = 255
2777 1.1 mrg ;; Note that ctz(0) in undefined for GCC
2778 1.1 mrg DEFUN __ctzsi2
2779 1.1 mrg XCALL __ffssi2
2780 1.1 mrg dec r24
2781 1.1 mrg ret
2782 1.1 mrg ENDF __ctzsi2
2783 1.1 mrg #endif /* defined (L_ctzsi2) */
2784 1.1 mrg
2785 1.1 mrg #if defined (L_ctzhi2)
2786 1.1 mrg ;; count trailing zeros
2787 1.1 mrg ;; r25:r24 = ctz16 (r25:r24)
2788 1.1 mrg ;; clobbers: r26
2789 1.1 mrg ;; ctz(0) = 255
2790 1.1 mrg ;; Note that ctz(0) in undefined for GCC
2791 1.1 mrg DEFUN __ctzhi2
2792 1.1 mrg XCALL __ffshi2
2793 1.1 mrg dec r24
2794 1.1 mrg ret
2795 1.1 mrg ENDF __ctzhi2
2796 1.1 mrg #endif /* defined (L_ctzhi2) */
2797 1.1 mrg
2798 1.1 mrg
2799 1.1 mrg /**********************************
2801 1.1 mrg * Count leading Zeros (clz)
2802 1.1 mrg **********************************/
2803 1.1 mrg
2804 1.1 mrg #if defined (L_clzdi2)
2805 1.1 mrg ;; count leading zeros
2806 1.1 mrg ;; r25:r24 = clz64 (r25:r18)
2807 1.1 mrg ;; clobbers: r22, r23, r26
2808 1.1 mrg DEFUN __clzdi2
2809 1.1 mrg XCALL __clzsi2
2810 1.1 mrg sbrs r24, 5
2811 1.1 mrg ret
2812 1.1 mrg mov_l r22, r18
2813 1.1 mrg mov_h r23, r19
2814 1.1 mrg mov_l r24, r20
2815 1.1 mrg mov_h r25, r21
2816 1.1 mrg XCALL __clzsi2
2817 1.1 mrg subi r24, -32
2818 1.1 mrg ret
2819 1.1 mrg ENDF __clzdi2
2820 1.1 mrg #endif /* defined (L_clzdi2) */
2821 1.1 mrg
2822 1.1 mrg #if defined (L_clzsi2)
2823 1.1 mrg ;; count leading zeros
2824 1.1 mrg ;; r25:r24 = clz32 (r25:r22)
2825 1.1 mrg ;; clobbers: r26
2826 1.1 mrg DEFUN __clzsi2
2827 1.1 mrg XCALL __clzhi2
2828 1.1 mrg sbrs r24, 4
2829 1.1 mrg ret
2830 1.1 mrg mov_l r24, r22
2831 1.1 mrg mov_h r25, r23
2832 1.1 mrg XCALL __clzhi2
2833 1.1 mrg subi r24, -16
2834 1.1 mrg ret
2835 1.1 mrg ENDF __clzsi2
2836 1.1 mrg #endif /* defined (L_clzsi2) */
2837 1.1 mrg
2838 1.1 mrg #if defined (L_clzhi2)
2839 1.1 mrg ;; count leading zeros
2840 1.1 mrg ;; r25:r24 = clz16 (r25:r24)
2841 1.1 mrg ;; clobbers: r26
2842 1.1 mrg DEFUN __clzhi2
2843 1.1 mrg clr r26
2844 1.1 mrg tst r25
2845 1.1 mrg brne 1f
2846 1.1 mrg subi r26, -8
2847 1.1 mrg or r25, r24
2848 1.1 mrg brne 1f
2849 1.1 mrg ldi r24, 16
2850 1.1 mrg ret
2851 1.1 mrg 1: cpi r25, 16
2852 1.1 mrg brsh 3f
2853 1.1 mrg subi r26, -3
2854 1.1 mrg swap r25
2855 1.1 mrg 2: inc r26
2856 1.1 mrg 3: lsl r25
2857 1.1 mrg brcc 2b
2858 1.1 mrg mov r24, r26
2859 1.1 mrg clr r25
2860 1.1 mrg ret
2861 1.1 mrg ENDF __clzhi2
2862 1.1 mrg #endif /* defined (L_clzhi2) */
2863 1.1 mrg
2864 1.1 mrg
2865 1.1 mrg /**********************************
2867 1.1 mrg * Parity
2868 1.1 mrg **********************************/
2869 1.1 mrg
2870 1.1 mrg #if defined (L_paritydi2)
2871 1.1 mrg ;; r25:r24 = parity64 (r25:r18)
2872 1.1 mrg ;; clobbers: __tmp_reg__
2873 1.1 mrg DEFUN __paritydi2
2874 1.1 mrg eor r24, r18
2875 1.1 mrg eor r24, r19
2876 1.1 mrg eor r24, r20
2877 1.1 mrg eor r24, r21
2878 1.1 mrg XJMP __paritysi2
2879 1.1 mrg ENDF __paritydi2
2880 1.1 mrg #endif /* defined (L_paritydi2) */
2881 1.1 mrg
2882 1.1 mrg #if defined (L_paritysi2)
2883 1.1 mrg ;; r25:r24 = parity32 (r25:r22)
2884 1.1 mrg ;; clobbers: __tmp_reg__
2885 1.1 mrg DEFUN __paritysi2
2886 1.1 mrg eor r24, r22
2887 1.1 mrg eor r24, r23
2888 1.1 mrg XJMP __parityhi2
2889 1.1 mrg ENDF __paritysi2
2890 1.1 mrg #endif /* defined (L_paritysi2) */
2891 1.1 mrg
2892 1.1 mrg #if defined (L_parityhi2)
2893 1.1 mrg ;; r25:r24 = parity16 (r25:r24)
2894 1.1 mrg ;; clobbers: __tmp_reg__
2895 1.1 mrg DEFUN __parityhi2
2896 1.1 mrg eor r24, r25
2897 1.1 mrg ;; FALLTHRU
2898 1.1 mrg ENDF __parityhi2
2899 1.1 mrg
2900 1.1 mrg ;; r25:r24 = parity8 (r24)
2901 1.1 mrg ;; clobbers: __tmp_reg__
2902 1.1 mrg DEFUN __parityqi2
2903 1.1 mrg ;; parity is in r24[0..7]
2904 1.1 mrg mov __tmp_reg__, r24
2905 1.1 mrg swap __tmp_reg__
2906 1.1 mrg eor r24, __tmp_reg__
2907 1.1 mrg ;; parity is in r24[0..3]
2908 1.1 mrg subi r24, -4
2909 1.1 mrg andi r24, -5
2910 1.1 mrg subi r24, -6
2911 1.1 mrg ;; parity is in r24[0,3]
2912 1.1 mrg sbrc r24, 3
2913 1.1 mrg inc r24
2914 1.1 mrg ;; parity is in r24[0]
2915 1.1 mrg andi r24, 1
2916 1.1 mrg clr r25
2917 1.1 mrg ret
2918 1.1 mrg ENDF __parityqi2
2919 1.1 mrg #endif /* defined (L_parityhi2) */
2920 1.1 mrg
2921 1.1 mrg
2922 1.1 mrg /**********************************
2924 1.1 mrg * Population Count
2925 1.1 mrg **********************************/
2926 1.1 mrg
2927 1.1 mrg #if defined (L_popcounthi2)
2928 1.1 mrg ;; population count
2929 1.1 mrg ;; r25:r24 = popcount16 (r25:r24)
2930 1.1 mrg ;; clobbers: __tmp_reg__
2931 1.1 mrg DEFUN __popcounthi2
2932 1.1 mrg XCALL __popcountqi2
2933 1.1 mrg push r24
2934 1.1 mrg mov r24, r25
2935 1.1 mrg XCALL __popcountqi2
2936 1.1 mrg clr r25
2937 1.1 mrg ;; FALLTHRU
2938 1.1 mrg ENDF __popcounthi2
2939 1.1 mrg
2940 1.1 mrg DEFUN __popcounthi2_tail
2941 1.1 mrg pop __tmp_reg__
2942 1.1 mrg add r24, __tmp_reg__
2943 1.1 mrg ret
2944 1.1 mrg ENDF __popcounthi2_tail
2945 1.1 mrg #endif /* defined (L_popcounthi2) */
2946 1.1 mrg
2947 1.1 mrg #if defined (L_popcountsi2)
2948 1.1 mrg ;; population count
2949 1.1 mrg ;; r25:r24 = popcount32 (r25:r22)
2950 1.1 mrg ;; clobbers: __tmp_reg__
2951 1.1 mrg DEFUN __popcountsi2
2952 1.1 mrg XCALL __popcounthi2
2953 1.1 mrg push r24
2954 1.1 mrg mov_l r24, r22
2955 1.1 mrg mov_h r25, r23
2956 1.1 mrg XCALL __popcounthi2
2957 1.1 mrg XJMP __popcounthi2_tail
2958 1.1 mrg ENDF __popcountsi2
2959 1.1 mrg #endif /* defined (L_popcountsi2) */
2960 1.1 mrg
2961 1.1 mrg #if defined (L_popcountdi2)
2962 1.1 mrg ;; population count
2963 1.1 mrg ;; r25:r24 = popcount64 (r25:r18)
2964 1.1 mrg ;; clobbers: r22, r23, __tmp_reg__
2965 1.1 mrg DEFUN __popcountdi2
2966 1.1 mrg XCALL __popcountsi2
2967 1.1 mrg push r24
2968 1.1 mrg mov_l r22, r18
2969 1.1 mrg mov_h r23, r19
2970 1.1 mrg mov_l r24, r20
2971 1.1 mrg mov_h r25, r21
2972 1.1 mrg XCALL __popcountsi2
2973 1.1 mrg XJMP __popcounthi2_tail
2974 1.1 mrg ENDF __popcountdi2
2975 1.1 mrg #endif /* defined (L_popcountdi2) */
2976 1.1 mrg
2977 1.1 mrg #if defined (L_popcountqi2)
2978 1.1 mrg ;; population count
2979 1.1 mrg ;; r24 = popcount8 (r24)
2980 1.1 mrg ;; clobbers: __tmp_reg__
2981 1.1 mrg DEFUN __popcountqi2
2982 1.1 mrg mov __tmp_reg__, r24
2983 1.1 mrg andi r24, 1
2984 1.1 mrg lsr __tmp_reg__
2985 1.1 mrg lsr __tmp_reg__
2986 1.1 mrg adc r24, __zero_reg__
2987 1.1 mrg lsr __tmp_reg__
2988 1.1 mrg adc r24, __zero_reg__
2989 1.1 mrg lsr __tmp_reg__
2990 1.1 mrg adc r24, __zero_reg__
2991 1.1 mrg lsr __tmp_reg__
2992 1.1 mrg adc r24, __zero_reg__
2993 1.1 mrg lsr __tmp_reg__
2994 1.1 mrg adc r24, __zero_reg__
2995 1.1 mrg lsr __tmp_reg__
2996 1.1 mrg adc r24, __tmp_reg__
2997 1.1 mrg ret
2998 1.1 mrg ENDF __popcountqi2
2999 1.1 mrg #endif /* defined (L_popcountqi2) */
3000 1.1 mrg
3001 1.1 mrg
3002 1.1 mrg /**********************************
3004 1.1 mrg * Swap bytes
3005 1.1 mrg **********************************/
3006 1.1 mrg
3007 1.1 mrg ;; swap two registers with different register number
3008 1.1 mrg .macro bswap a, b
3009 1.1 mrg eor \a, \b
3010 1.1 mrg eor \b, \a
3011 1.1 mrg eor \a, \b
3012 1.1 mrg .endm
3013 1.1 mrg
3014 1.1 mrg #if defined (L_bswapsi2)
3015 1.1 mrg ;; swap bytes
3016 1.1 mrg ;; r25:r22 = bswap32 (r25:r22)
3017 1.1 mrg DEFUN __bswapsi2
3018 1.1 mrg bswap r22, r25
3019 1.1 mrg bswap r23, r24
3020 1.1 mrg ret
3021 1.1 mrg ENDF __bswapsi2
3022 1.1 mrg #endif /* defined (L_bswapsi2) */
3023 1.1 mrg
3024 1.1 mrg #if defined (L_bswapdi2)
3025 1.1 mrg ;; swap bytes
3026 1.1 mrg ;; r25:r18 = bswap64 (r25:r18)
3027 1.1 mrg DEFUN __bswapdi2
3028 1.1 mrg bswap r18, r25
3029 1.1 mrg bswap r19, r24
3030 1.1 mrg bswap r20, r23
3031 1.1 mrg bswap r21, r22
3032 1.1 mrg ret
3033 1.1 mrg ENDF __bswapdi2
3034 1.1 mrg #endif /* defined (L_bswapdi2) */
3035 1.1 mrg
3036 1.1 mrg
3037 1.1 mrg /**********************************
3039 1.1 mrg * 64-bit shifts
3040 1.1 mrg **********************************/
3041 1.1 mrg
3042 1.1 mrg #if defined (L_ashrdi3)
3043 1.1 mrg ;; Arithmetic shift right
3044 1.1 mrg ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3045 1.1 mrg DEFUN __ashrdi3
3046 1.1 mrg bst r25, 7
3047 1.1 mrg bld __zero_reg__, 0
3048 1.1 mrg ;; FALLTHRU
3049 1.1 mrg ENDF __ashrdi3
3050 1.1 mrg
3051 1.1 mrg ;; Logic shift right
3052 1.1 mrg ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3053 1.1 mrg DEFUN __lshrdi3
3054 1.1 mrg lsr __zero_reg__
3055 1.1 mrg sbc __tmp_reg__, __tmp_reg__
3056 1.1 mrg push r16
3057 1.1 mrg 0: cpi r16, 8
3058 1.1 mrg brlo 2f
3059 1.1 mrg subi r16, 8
3060 1.1 mrg mov r18, r19
3061 1.1 mrg mov r19, r20
3062 1.1 mrg mov r20, r21
3063 1.1 mrg mov r21, r22
3064 1.1 mrg mov r22, r23
3065 1.1 mrg mov r23, r24
3066 1.1 mrg mov r24, r25
3067 1.1 mrg mov r25, __tmp_reg__
3068 1.1 mrg rjmp 0b
3069 1.1 mrg 1: asr __tmp_reg__
3070 1.1 mrg ror r25
3071 1.1 mrg ror r24
3072 1.1 mrg ror r23
3073 1.1 mrg ror r22
3074 1.1 mrg ror r21
3075 1.1 mrg ror r20
3076 1.1 mrg ror r19
3077 1.1 mrg ror r18
3078 1.1 mrg 2: dec r16
3079 1.1 mrg brpl 1b
3080 1.1 mrg pop r16
3081 1.1 mrg ret
3082 1.1 mrg ENDF __lshrdi3
3083 1.1 mrg #endif /* defined (L_ashrdi3) */
3084 1.1 mrg
3085 1.1 mrg #if defined (L_ashldi3)
3086 1.1 mrg ;; Shift left
3087 1.1 mrg ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3088 1.1 mrg DEFUN __ashldi3
3089 1.1 mrg push r16
3090 1.1 mrg 0: cpi r16, 8
3091 1.1 mrg brlo 2f
3092 1.1 mrg mov r25, r24
3093 1.1 mrg mov r24, r23
3094 1.1 mrg mov r23, r22
3095 1.1 mrg mov r22, r21
3096 1.1 mrg mov r21, r20
3097 1.1 mrg mov r20, r19
3098 1.1 mrg mov r19, r18
3099 1.1 mrg clr r18
3100 1.1 mrg subi r16, 8
3101 1.1 mrg rjmp 0b
3102 1.1 mrg 1: lsl r18
3103 1.1 mrg rol r19
3104 1.1 mrg rol r20
3105 1.1 mrg rol r21
3106 1.1 mrg rol r22
3107 1.1 mrg rol r23
3108 1.1 mrg rol r24
3109 1.1 mrg rol r25
3110 1.1 mrg 2: dec r16
3111 1.1 mrg brpl 1b
3112 1.1 mrg pop r16
3113 1.1 mrg ret
3114 1.1 mrg ENDF __ashldi3
3115 1.1 mrg #endif /* defined (L_ashldi3) */
3116 1.1 mrg
3117 1.1 mrg #if defined (L_rotldi3)
3118 1.1 mrg ;; Shift left
3119 1.1 mrg ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3120 1.1 mrg DEFUN __rotldi3
3121 1.1 mrg push r16
3122 1.1 mrg 0: cpi r16, 8
3123 1.1 mrg brlo 2f
3124 1.1 mrg subi r16, 8
3125 1.1 mrg mov __tmp_reg__, r25
3126 1.1 mrg mov r25, r24
3127 1.1 mrg mov r24, r23
3128 1.1 mrg mov r23, r22
3129 1.1 mrg mov r22, r21
3130 1.1 mrg mov r21, r20
3131 1.1 mrg mov r20, r19
3132 1.1 mrg mov r19, r18
3133 1.1 mrg mov r18, __tmp_reg__
3134 1.1 mrg rjmp 0b
3135 1.1 mrg 1: lsl r18
3136 1.1 mrg rol r19
3137 1.1 mrg rol r20
3138 1.1 mrg rol r21
3139 1.1 mrg rol r22
3140 1.1 mrg rol r23
3141 1.1 mrg rol r24
3142 1.1 mrg rol r25
3143 1.1 mrg adc r18, __zero_reg__
3144 1.1 mrg 2: dec r16
3145 1.1 mrg brpl 1b
3146 1.1 mrg pop r16
3147 1.1 mrg ret
3148 1.1 mrg ENDF __rotldi3
3149 1.1 mrg #endif /* defined (L_rotldi3) */
3150 1.1 mrg
3151 1.1 mrg
3152 1.1 mrg .section .text.libgcc.fmul, "ax", @progbits
3154 1.1 mrg
3155 1.1 mrg /***********************************************************/
3156 1.1 mrg ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3157 1.1 mrg ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3158 1.1 mrg /***********************************************************/
3159 1.1 mrg
3160 1.1 mrg #define A1 24
3161 1.1 mrg #define B1 25
3162 1.1 mrg #define C0 22
3163 1.1 mrg #define C1 23
3164 1.1 mrg #define A0 __tmp_reg__
3165 1.1 mrg
3166 1.1 mrg #ifdef L_fmuls
3167 1.1 mrg ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3168 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3169 1.1 mrg DEFUN __fmuls
3170 1.1 mrg ;; A0.7 = negate result?
3171 1.1 mrg mov A0, A1
3172 1.1 mrg eor A0, B1
3173 1.1 mrg ;; B1 = |B1|
3174 1.1 mrg sbrc B1, 7
3175 1.1 mrg neg B1
3176 1.1 mrg XJMP __fmulsu_exit
3177 1.1 mrg ENDF __fmuls
3178 1.1 mrg #endif /* L_fmuls */
3179 1.1 mrg
3180 1.1 mrg #ifdef L_fmulsu
3181 1.1 mrg ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3182 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3183 1.1 mrg DEFUN __fmulsu
3184 1.1 mrg ;; A0.7 = negate result?
3185 1.1 mrg mov A0, A1
3186 1.1 mrg ;; FALLTHRU
3187 1.1 mrg ENDF __fmulsu
3188 1.1 mrg
3189 1.1 mrg ;; Helper for __fmuls and __fmulsu
3190 1.1 mrg DEFUN __fmulsu_exit
3191 1.1 mrg ;; A1 = |A1|
3192 1.1 mrg sbrc A1, 7
3193 1.1 mrg neg A1
3194 1.1 mrg #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3195 1.1 mrg ;; Some cores have problem skipping 2-word instruction
3196 1.1 mrg tst A0
3197 1.1 mrg brmi 1f
3198 1.1 mrg #else
3199 1.1 mrg sbrs A0, 7
3200 1.1 mrg #endif /* __AVR_HAVE_JMP_CALL__ */
3201 1.1 mrg XJMP __fmul
3202 1.1 mrg 1: XCALL __fmul
3203 1.1 mrg ;; C = -C iff A0.7 = 1
3204 1.1 mrg NEG2 C0
3205 1.1 mrg ret
3206 1.1 mrg ENDF __fmulsu_exit
3207 1.1 mrg #endif /* L_fmulsu */
3208 1.1 mrg
3209 1.1 mrg
3210 1.1 mrg #ifdef L_fmul
3211 1.1 mrg ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3212 1.1 mrg ;;; Clobbers: r24, r25, __tmp_reg__
3213 1.1 mrg DEFUN __fmul
3214 1.1 mrg ; clear result
3215 1.1 mrg clr C0
3216 1.1 mrg clr C1
3217 1.1 mrg clr A0
3218 1.1 mrg 1: tst B1
3219 1.1 mrg ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3220 1.1 mrg 2: brpl 3f
3221 1.1 mrg ;; C += A
3222 1.1 mrg add C0, A0
3223 1.1 mrg adc C1, A1
3224 1.1 mrg 3: ;; A >>= 1
3225 1.1 mrg lsr A1
3226 1.1 mrg ror A0
3227 ;; B <<= 1
3228 lsl B1
3229 brne 2b
3230 ret
3231 ENDF __fmul
3232 #endif /* L_fmul */
3233
3234 #undef A0
3235 #undef A1
3236 #undef B1
3237 #undef C0
3238 #undef C1
3239
3240 #include "lib1funcs-fixed.S"
3241