lib1funcs.S revision 1.1 1 1.1 mrg /* Copyright (C) 1994-2013 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is free software; you can redistribute it and/or modify it
4 1.1 mrg under the terms of the GNU General Public License as published by the
5 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
6 1.1 mrg later version.
7 1.1 mrg
8 1.1 mrg This file is distributed in the hope that it will be useful, but
9 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of
10 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 1.1 mrg General Public License for more details.
12 1.1 mrg
13 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
14 1.1 mrg permissions described in the GCC Runtime Library Exception, version
15 1.1 mrg 3.1, as published by the Free Software Foundation.
16 1.1 mrg
17 1.1 mrg You should have received a copy of the GNU General Public License and
18 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
19 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 1.1 mrg <http://www.gnu.org/licenses/>. */
21 1.1 mrg
22 1.1 mrg
23 1.1 mrg !! libgcc routines for the Renesas / SuperH SH CPUs.
24 1.1 mrg !! Contributed by Steve Chamberlain.
25 1.1 mrg !! sac@cygnus.com
26 1.1 mrg
27 1.1 mrg !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
28 1.1 mrg !! recoded in assembly by Toshiyasu Morita
29 1.1 mrg !! tm@netcom.com
30 1.1 mrg
31 1.1 mrg #if defined(__ELF__) && defined(__linux__)
32 1.1 mrg .section .note.GNU-stack,"",%progbits
33 1.1 mrg .previous
34 1.1 mrg #endif
35 1.1 mrg
36 1.1 mrg /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 1.1 mrg ELF local label prefixes by J"orn Rennecke
38 1.1 mrg amylaar (at) cygnus.com */
39 1.1 mrg
40 1.1 mrg #include "lib1funcs.h"
41 1.1 mrg
42 1.1 mrg /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
43 1.1 mrg so it is more convenient to define NO_FPSCR_VALUES here than to
44 1.1 mrg define it on the command line. */
45 1.1 mrg #if defined __vxworks && defined __PIC__
46 1.1 mrg #define NO_FPSCR_VALUES
47 1.1 mrg #endif
48 1.1 mrg
49 1.1 mrg #if ! __SH5__
50 1.1 mrg #ifdef L_ashiftrt
51 1.1 mrg .global GLOBAL(ashiftrt_r4_0)
52 1.1 mrg .global GLOBAL(ashiftrt_r4_1)
53 1.1 mrg .global GLOBAL(ashiftrt_r4_2)
54 1.1 mrg .global GLOBAL(ashiftrt_r4_3)
55 1.1 mrg .global GLOBAL(ashiftrt_r4_4)
56 1.1 mrg .global GLOBAL(ashiftrt_r4_5)
57 1.1 mrg .global GLOBAL(ashiftrt_r4_6)
58 1.1 mrg .global GLOBAL(ashiftrt_r4_7)
59 1.1 mrg .global GLOBAL(ashiftrt_r4_8)
60 1.1 mrg .global GLOBAL(ashiftrt_r4_9)
61 1.1 mrg .global GLOBAL(ashiftrt_r4_10)
62 1.1 mrg .global GLOBAL(ashiftrt_r4_11)
63 1.1 mrg .global GLOBAL(ashiftrt_r4_12)
64 1.1 mrg .global GLOBAL(ashiftrt_r4_13)
65 1.1 mrg .global GLOBAL(ashiftrt_r4_14)
66 1.1 mrg .global GLOBAL(ashiftrt_r4_15)
67 1.1 mrg .global GLOBAL(ashiftrt_r4_16)
68 1.1 mrg .global GLOBAL(ashiftrt_r4_17)
69 1.1 mrg .global GLOBAL(ashiftrt_r4_18)
70 1.1 mrg .global GLOBAL(ashiftrt_r4_19)
71 1.1 mrg .global GLOBAL(ashiftrt_r4_20)
72 1.1 mrg .global GLOBAL(ashiftrt_r4_21)
73 1.1 mrg .global GLOBAL(ashiftrt_r4_22)
74 1.1 mrg .global GLOBAL(ashiftrt_r4_23)
75 1.1 mrg .global GLOBAL(ashiftrt_r4_24)
76 1.1 mrg .global GLOBAL(ashiftrt_r4_25)
77 1.1 mrg .global GLOBAL(ashiftrt_r4_26)
78 1.1 mrg .global GLOBAL(ashiftrt_r4_27)
79 1.1 mrg .global GLOBAL(ashiftrt_r4_28)
80 1.1 mrg .global GLOBAL(ashiftrt_r4_29)
81 1.1 mrg .global GLOBAL(ashiftrt_r4_30)
82 1.1 mrg .global GLOBAL(ashiftrt_r4_31)
83 1.1 mrg .global GLOBAL(ashiftrt_r4_32)
84 1.1 mrg
85 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
86 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
87 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
88 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
89 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
90 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
91 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
92 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
93 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
94 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
95 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
96 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
97 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
98 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
99 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
100 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
101 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
102 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
103 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
104 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
105 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
106 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
107 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
108 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
109 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
110 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
111 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
112 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
113 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
114 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
115 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
116 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
117 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
118 1.1 mrg
119 1.1 mrg .align 1
120 1.1 mrg GLOBAL(ashiftrt_r4_32):
121 1.1 mrg GLOBAL(ashiftrt_r4_31):
122 1.1 mrg rotcl r4
123 1.1 mrg rts
124 1.1 mrg subc r4,r4
125 1.1 mrg
126 1.1 mrg GLOBAL(ashiftrt_r4_30):
127 1.1 mrg shar r4
128 1.1 mrg GLOBAL(ashiftrt_r4_29):
129 1.1 mrg shar r4
130 1.1 mrg GLOBAL(ashiftrt_r4_28):
131 1.1 mrg shar r4
132 1.1 mrg GLOBAL(ashiftrt_r4_27):
133 1.1 mrg shar r4
134 1.1 mrg GLOBAL(ashiftrt_r4_26):
135 1.1 mrg shar r4
136 1.1 mrg GLOBAL(ashiftrt_r4_25):
137 1.1 mrg shar r4
138 1.1 mrg GLOBAL(ashiftrt_r4_24):
139 1.1 mrg shlr16 r4
140 1.1 mrg shlr8 r4
141 1.1 mrg rts
142 1.1 mrg exts.b r4,r4
143 1.1 mrg
144 1.1 mrg GLOBAL(ashiftrt_r4_23):
145 1.1 mrg shar r4
146 1.1 mrg GLOBAL(ashiftrt_r4_22):
147 1.1 mrg shar r4
148 1.1 mrg GLOBAL(ashiftrt_r4_21):
149 1.1 mrg shar r4
150 1.1 mrg GLOBAL(ashiftrt_r4_20):
151 1.1 mrg shar r4
152 1.1 mrg GLOBAL(ashiftrt_r4_19):
153 1.1 mrg shar r4
154 1.1 mrg GLOBAL(ashiftrt_r4_18):
155 1.1 mrg shar r4
156 1.1 mrg GLOBAL(ashiftrt_r4_17):
157 1.1 mrg shar r4
158 1.1 mrg GLOBAL(ashiftrt_r4_16):
159 1.1 mrg shlr16 r4
160 1.1 mrg rts
161 1.1 mrg exts.w r4,r4
162 1.1 mrg
163 1.1 mrg GLOBAL(ashiftrt_r4_15):
164 1.1 mrg shar r4
165 1.1 mrg GLOBAL(ashiftrt_r4_14):
166 1.1 mrg shar r4
167 1.1 mrg GLOBAL(ashiftrt_r4_13):
168 1.1 mrg shar r4
169 1.1 mrg GLOBAL(ashiftrt_r4_12):
170 1.1 mrg shar r4
171 1.1 mrg GLOBAL(ashiftrt_r4_11):
172 1.1 mrg shar r4
173 1.1 mrg GLOBAL(ashiftrt_r4_10):
174 1.1 mrg shar r4
175 1.1 mrg GLOBAL(ashiftrt_r4_9):
176 1.1 mrg shar r4
177 1.1 mrg GLOBAL(ashiftrt_r4_8):
178 1.1 mrg shar r4
179 1.1 mrg GLOBAL(ashiftrt_r4_7):
180 1.1 mrg shar r4
181 1.1 mrg GLOBAL(ashiftrt_r4_6):
182 1.1 mrg shar r4
183 1.1 mrg GLOBAL(ashiftrt_r4_5):
184 1.1 mrg shar r4
185 1.1 mrg GLOBAL(ashiftrt_r4_4):
186 1.1 mrg shar r4
187 1.1 mrg GLOBAL(ashiftrt_r4_3):
188 1.1 mrg shar r4
189 1.1 mrg GLOBAL(ashiftrt_r4_2):
190 1.1 mrg shar r4
191 1.1 mrg GLOBAL(ashiftrt_r4_1):
192 1.1 mrg rts
193 1.1 mrg shar r4
194 1.1 mrg
195 1.1 mrg GLOBAL(ashiftrt_r4_0):
196 1.1 mrg rts
197 1.1 mrg nop
198 1.1 mrg
199 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_0))
200 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_1))
201 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_2))
202 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_3))
203 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_4))
204 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_5))
205 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_6))
206 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_7))
207 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_8))
208 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_9))
209 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_10))
210 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_11))
211 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_12))
212 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_13))
213 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_14))
214 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_15))
215 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_16))
216 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_17))
217 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_18))
218 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_19))
219 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_20))
220 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_21))
221 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_22))
222 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_23))
223 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_24))
224 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_25))
225 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_26))
226 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_27))
227 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_28))
228 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_29))
229 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_30))
230 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_31))
231 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_32))
232 1.1 mrg #endif
233 1.1 mrg
234 1.1 mrg #ifdef L_ashiftrt_n
235 1.1 mrg
236 1.1 mrg !
237 1.1 mrg ! GLOBAL(ashrsi3)
238 1.1 mrg !
239 1.1 mrg ! Entry:
240 1.1 mrg !
241 1.1 mrg ! r4: Value to shift
242 1.1 mrg ! r5: Shift count
243 1.1 mrg !
244 1.1 mrg ! Exit:
245 1.1 mrg !
246 1.1 mrg ! r0: Result
247 1.1 mrg !
248 1.1 mrg ! Destroys:
249 1.1 mrg !
250 1.1 mrg ! T bit, r5
251 1.1 mrg !
252 1.1 mrg
253 1.1 mrg .global GLOBAL(ashrsi3)
254 1.1 mrg HIDDEN_FUNC(GLOBAL(ashrsi3))
255 1.1 mrg .align 2
256 1.1 mrg GLOBAL(ashrsi3):
257 1.1 mrg mov #31,r0
258 1.1 mrg and r0,r5
259 1.1 mrg mova LOCAL(ashrsi3_table),r0
260 1.1 mrg mov.b @(r0,r5),r5
261 1.1 mrg #ifdef __sh1__
262 1.1 mrg add r5,r0
263 1.1 mrg jmp @r0
264 1.1 mrg #else
265 1.1 mrg braf r5
266 1.1 mrg #endif
267 1.1 mrg mov r4,r0
268 1.1 mrg
269 1.1 mrg .align 2
270 1.1 mrg LOCAL(ashrsi3_table):
271 1.1 mrg .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
272 1.1 mrg .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
273 1.1 mrg .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
274 1.1 mrg .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
275 1.1 mrg .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
276 1.1 mrg .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
277 1.1 mrg .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
278 1.1 mrg .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
279 1.1 mrg .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
280 1.1 mrg .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
281 1.1 mrg .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
282 1.1 mrg .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
283 1.1 mrg .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
284 1.1 mrg .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
285 1.1 mrg .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
286 1.1 mrg .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
287 1.1 mrg .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
288 1.1 mrg .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
289 1.1 mrg .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
290 1.1 mrg .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
291 1.1 mrg .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
292 1.1 mrg .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
293 1.1 mrg .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
294 1.1 mrg .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
295 1.1 mrg .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
296 1.1 mrg .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
297 1.1 mrg .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
298 1.1 mrg .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
299 1.1 mrg .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
300 1.1 mrg .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
301 1.1 mrg .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
302 1.1 mrg .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
303 1.1 mrg
304 1.1 mrg LOCAL(ashrsi3_31):
305 1.1 mrg rotcl r0
306 1.1 mrg rts
307 1.1 mrg subc r0,r0
308 1.1 mrg
309 1.1 mrg LOCAL(ashrsi3_30):
310 1.1 mrg shar r0
311 1.1 mrg LOCAL(ashrsi3_29):
312 1.1 mrg shar r0
313 1.1 mrg LOCAL(ashrsi3_28):
314 1.1 mrg shar r0
315 1.1 mrg LOCAL(ashrsi3_27):
316 1.1 mrg shar r0
317 1.1 mrg LOCAL(ashrsi3_26):
318 1.1 mrg shar r0
319 1.1 mrg LOCAL(ashrsi3_25):
320 1.1 mrg shar r0
321 1.1 mrg LOCAL(ashrsi3_24):
322 1.1 mrg shlr16 r0
323 1.1 mrg shlr8 r0
324 1.1 mrg rts
325 1.1 mrg exts.b r0,r0
326 1.1 mrg
327 1.1 mrg LOCAL(ashrsi3_23):
328 1.1 mrg shar r0
329 1.1 mrg LOCAL(ashrsi3_22):
330 1.1 mrg shar r0
331 1.1 mrg LOCAL(ashrsi3_21):
332 1.1 mrg shar r0
333 1.1 mrg LOCAL(ashrsi3_20):
334 1.1 mrg shar r0
335 1.1 mrg LOCAL(ashrsi3_19):
336 1.1 mrg shar r0
337 1.1 mrg LOCAL(ashrsi3_18):
338 1.1 mrg shar r0
339 1.1 mrg LOCAL(ashrsi3_17):
340 1.1 mrg shar r0
341 1.1 mrg LOCAL(ashrsi3_16):
342 1.1 mrg shlr16 r0
343 1.1 mrg rts
344 1.1 mrg exts.w r0,r0
345 1.1 mrg
346 1.1 mrg LOCAL(ashrsi3_15):
347 1.1 mrg shar r0
348 1.1 mrg LOCAL(ashrsi3_14):
349 1.1 mrg shar r0
350 1.1 mrg LOCAL(ashrsi3_13):
351 1.1 mrg shar r0
352 1.1 mrg LOCAL(ashrsi3_12):
353 1.1 mrg shar r0
354 1.1 mrg LOCAL(ashrsi3_11):
355 1.1 mrg shar r0
356 1.1 mrg LOCAL(ashrsi3_10):
357 1.1 mrg shar r0
358 1.1 mrg LOCAL(ashrsi3_9):
359 1.1 mrg shar r0
360 1.1 mrg LOCAL(ashrsi3_8):
361 1.1 mrg shar r0
362 1.1 mrg LOCAL(ashrsi3_7):
363 1.1 mrg shar r0
364 1.1 mrg LOCAL(ashrsi3_6):
365 1.1 mrg shar r0
366 1.1 mrg LOCAL(ashrsi3_5):
367 1.1 mrg shar r0
368 1.1 mrg LOCAL(ashrsi3_4):
369 1.1 mrg shar r0
370 1.1 mrg LOCAL(ashrsi3_3):
371 1.1 mrg shar r0
372 1.1 mrg LOCAL(ashrsi3_2):
373 1.1 mrg shar r0
374 1.1 mrg LOCAL(ashrsi3_1):
375 1.1 mrg rts
376 1.1 mrg shar r0
377 1.1 mrg
378 1.1 mrg LOCAL(ashrsi3_0):
379 1.1 mrg rts
380 1.1 mrg nop
381 1.1 mrg
382 1.1 mrg ENDFUNC(GLOBAL(ashrsi3))
383 1.1 mrg #endif
384 1.1 mrg
385 1.1 mrg #ifdef L_ashiftlt
386 1.1 mrg
387 1.1 mrg !
388 1.1 mrg ! GLOBAL(ashlsi3)
389 1.1 mrg ! (For compatibility with older binaries, not used by compiler)
390 1.1 mrg !
391 1.1 mrg ! Entry:
392 1.1 mrg ! r4: Value to shift
393 1.1 mrg ! r5: Shift count
394 1.1 mrg !
395 1.1 mrg ! Exit:
396 1.1 mrg ! r0: Result
397 1.1 mrg !
398 1.1 mrg ! Destroys:
399 1.1 mrg ! T bit
400 1.1 mrg !
401 1.1 mrg !
402 1.1 mrg ! GLOBAL(ashlsi3_r0)
403 1.1 mrg !
404 1.1 mrg ! Entry:
405 1.1 mrg ! r4: Value to shift
406 1.1 mrg ! r0: Shift count
407 1.1 mrg !
408 1.1 mrg ! Exit:
409 1.1 mrg ! r0: Result
410 1.1 mrg !
411 1.1 mrg ! Destroys:
412 1.1 mrg ! T bit
413 1.1 mrg
414 1.1 mrg .global GLOBAL(ashlsi3)
415 1.1 mrg .global GLOBAL(ashlsi3_r0)
416 1.1 mrg HIDDEN_FUNC(GLOBAL(ashlsi3))
417 1.1 mrg HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
418 1.1 mrg GLOBAL(ashlsi3):
419 1.1 mrg mov r5,r0
420 1.1 mrg .align 2
421 1.1 mrg GLOBAL(ashlsi3_r0):
422 1.1 mrg
423 1.1 mrg #ifdef __sh1__
424 1.1 mrg and #31,r0
425 1.1 mrg shll2 r0
426 1.1 mrg mov.l r4,@-r15
427 1.1 mrg mov r0,r4
428 1.1 mrg mova LOCAL(ashlsi3_table),r0
429 1.1 mrg add r4,r0
430 1.1 mrg mov.l @r15+,r4
431 1.1 mrg jmp @r0
432 1.1 mrg mov r4,r0
433 1.1 mrg .align 2
434 1.1 mrg #else
435 1.1 mrg and #31,r0
436 1.1 mrg shll2 r0
437 1.1 mrg braf r0
438 1.1 mrg mov r4,r0
439 1.1 mrg #endif
440 1.1 mrg
441 1.1 mrg LOCAL(ashlsi3_table):
442 1.1 mrg rts // << 0
443 1.1 mrg nop
444 1.1 mrg LOCAL(ashlsi_1):
445 1.1 mrg rts // << 1
446 1.1 mrg shll r0
447 1.1 mrg LOCAL(ashlsi_2): // << 2
448 1.1 mrg rts
449 1.1 mrg shll2 r0
450 1.1 mrg bra LOCAL(ashlsi_1) // << 3
451 1.1 mrg shll2 r0
452 1.1 mrg bra LOCAL(ashlsi_2) // << 4
453 1.1 mrg shll2 r0
454 1.1 mrg bra LOCAL(ashlsi_5) // << 5
455 1.1 mrg shll r0
456 1.1 mrg bra LOCAL(ashlsi_6) // << 6
457 1.1 mrg shll2 r0
458 1.1 mrg bra LOCAL(ashlsi_7) // << 7
459 1.1 mrg shll r0
460 1.1 mrg LOCAL(ashlsi_8): // << 8
461 1.1 mrg rts
462 1.1 mrg shll8 r0
463 1.1 mrg bra LOCAL(ashlsi_8) // << 9
464 1.1 mrg shll r0
465 1.1 mrg bra LOCAL(ashlsi_8) // << 10
466 1.1 mrg shll2 r0
467 1.1 mrg bra LOCAL(ashlsi_11) // << 11
468 1.1 mrg shll r0
469 1.1 mrg bra LOCAL(ashlsi_12) // << 12
470 1.1 mrg shll2 r0
471 1.1 mrg bra LOCAL(ashlsi_13) // << 13
472 1.1 mrg shll r0
473 1.1 mrg bra LOCAL(ashlsi_14) // << 14
474 1.1 mrg shll8 r0
475 1.1 mrg bra LOCAL(ashlsi_15) // << 15
476 1.1 mrg shll8 r0
477 1.1 mrg LOCAL(ashlsi_16): // << 16
478 1.1 mrg rts
479 1.1 mrg shll16 r0
480 1.1 mrg bra LOCAL(ashlsi_16) // << 17
481 1.1 mrg shll r0
482 1.1 mrg bra LOCAL(ashlsi_16) // << 18
483 1.1 mrg shll2 r0
484 1.1 mrg bra LOCAL(ashlsi_19) // << 19
485 1.1 mrg shll r0
486 1.1 mrg bra LOCAL(ashlsi_20) // << 20
487 1.1 mrg shll2 r0
488 1.1 mrg bra LOCAL(ashlsi_21) // << 21
489 1.1 mrg shll r0
490 1.1 mrg bra LOCAL(ashlsi_22) // << 22
491 1.1 mrg shll16 r0
492 1.1 mrg bra LOCAL(ashlsi_23) // << 23
493 1.1 mrg shll16 r0
494 1.1 mrg bra LOCAL(ashlsi_16) // << 24
495 1.1 mrg shll8 r0
496 1.1 mrg bra LOCAL(ashlsi_25) // << 25
497 1.1 mrg shll r0
498 1.1 mrg bra LOCAL(ashlsi_26) // << 26
499 1.1 mrg shll2 r0
500 1.1 mrg bra LOCAL(ashlsi_27) // << 27
501 1.1 mrg shll r0
502 1.1 mrg bra LOCAL(ashlsi_28) // << 28
503 1.1 mrg shll2 r0
504 1.1 mrg bra LOCAL(ashlsi_29) // << 29
505 1.1 mrg shll16 r0
506 1.1 mrg bra LOCAL(ashlsi_30) // << 30
507 1.1 mrg shll16 r0
508 1.1 mrg and #1,r0 // << 31
509 1.1 mrg rts
510 1.1 mrg rotr r0
511 1.1 mrg
512 1.1 mrg LOCAL(ashlsi_7):
513 1.1 mrg shll2 r0
514 1.1 mrg LOCAL(ashlsi_5):
515 1.1 mrg LOCAL(ashlsi_6):
516 1.1 mrg shll2 r0
517 1.1 mrg rts
518 1.1 mrg LOCAL(ashlsi_13):
519 1.1 mrg shll2 r0
520 1.1 mrg LOCAL(ashlsi_12):
521 1.1 mrg LOCAL(ashlsi_11):
522 1.1 mrg shll8 r0
523 1.1 mrg rts
524 1.1 mrg LOCAL(ashlsi_21):
525 1.1 mrg shll2 r0
526 1.1 mrg LOCAL(ashlsi_20):
527 1.1 mrg LOCAL(ashlsi_19):
528 1.1 mrg shll16 r0
529 1.1 mrg rts
530 1.1 mrg LOCAL(ashlsi_28):
531 1.1 mrg LOCAL(ashlsi_27):
532 1.1 mrg shll2 r0
533 1.1 mrg LOCAL(ashlsi_26):
534 1.1 mrg LOCAL(ashlsi_25):
535 1.1 mrg shll16 r0
536 1.1 mrg rts
537 1.1 mrg shll8 r0
538 1.1 mrg
539 1.1 mrg LOCAL(ashlsi_22):
540 1.1 mrg LOCAL(ashlsi_14):
541 1.1 mrg shlr2 r0
542 1.1 mrg rts
543 1.1 mrg shll8 r0
544 1.1 mrg
545 1.1 mrg LOCAL(ashlsi_23):
546 1.1 mrg LOCAL(ashlsi_15):
547 1.1 mrg shlr r0
548 1.1 mrg rts
549 1.1 mrg shll8 r0
550 1.1 mrg
551 1.1 mrg LOCAL(ashlsi_29):
552 1.1 mrg shlr r0
553 1.1 mrg LOCAL(ashlsi_30):
554 1.1 mrg shlr2 r0
555 1.1 mrg rts
556 1.1 mrg shll16 r0
557 1.1 mrg
558 1.1 mrg ENDFUNC(GLOBAL(ashlsi3))
559 1.1 mrg ENDFUNC(GLOBAL(ashlsi3_r0))
560 1.1 mrg #endif
561 1.1 mrg
562 1.1 mrg #ifdef L_lshiftrt
563 1.1 mrg
564 1.1 mrg !
565 1.1 mrg ! GLOBAL(lshrsi3)
566 1.1 mrg ! (For compatibility with older binaries, not used by compiler)
567 1.1 mrg !
568 1.1 mrg ! Entry:
569 1.1 mrg ! r4: Value to shift
570 1.1 mrg ! r5: Shift count
571 1.1 mrg !
572 1.1 mrg ! Exit:
573 1.1 mrg ! r0: Result
574 1.1 mrg !
575 1.1 mrg ! Destroys:
576 1.1 mrg ! T bit
577 1.1 mrg !
578 1.1 mrg !
579 1.1 mrg ! GLOBAL(lshrsi3_r0)
580 1.1 mrg !
581 1.1 mrg ! Entry:
582 1.1 mrg ! r4: Value to shift
583 1.1 mrg ! r0: Shift count
584 1.1 mrg !
585 1.1 mrg ! Exit:
586 1.1 mrg ! r0: Result
587 1.1 mrg !
588 1.1 mrg ! Destroys:
589 1.1 mrg ! T bit
590 1.1 mrg
591 1.1 mrg .global GLOBAL(lshrsi3)
592 1.1 mrg .global GLOBAL(lshrsi3_r0)
593 1.1 mrg HIDDEN_FUNC(GLOBAL(lshrsi3))
594 1.1 mrg HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
595 1.1 mrg GLOBAL(lshrsi3):
596 1.1 mrg mov r5,r0
597 1.1 mrg .align 2
598 1.1 mrg GLOBAL(lshrsi3_r0):
599 1.1 mrg
600 1.1 mrg #ifdef __sh1__
601 1.1 mrg and #31,r0
602 1.1 mrg shll2 r0
603 1.1 mrg mov.l r4,@-r15
604 1.1 mrg mov r0,r4
605 1.1 mrg mova LOCAL(lshrsi3_table),r0
606 1.1 mrg add r4,r0
607 1.1 mrg mov.l @r15+,r4
608 1.1 mrg jmp @r0
609 1.1 mrg mov r4,r0
610 1.1 mrg .align 2
611 1.1 mrg #else
612 1.1 mrg and #31,r0
613 1.1 mrg shll2 r0
614 1.1 mrg braf r0
615 1.1 mrg mov r4,r0
616 1.1 mrg #endif
617 1.1 mrg LOCAL(lshrsi3_table):
618 1.1 mrg rts // >> 0
619 1.1 mrg nop
620 1.1 mrg LOCAL(lshrsi_1): // >> 1
621 1.1 mrg rts
622 1.1 mrg shlr r0
623 1.1 mrg LOCAL(lshrsi_2): // >> 2
624 1.1 mrg rts
625 1.1 mrg shlr2 r0
626 1.1 mrg bra LOCAL(lshrsi_1) // >> 3
627 1.1 mrg shlr2 r0
628 1.1 mrg bra LOCAL(lshrsi_2) // >> 4
629 1.1 mrg shlr2 r0
630 1.1 mrg bra LOCAL(lshrsi_5) // >> 5
631 1.1 mrg shlr r0
632 1.1 mrg bra LOCAL(lshrsi_6) // >> 6
633 1.1 mrg shlr2 r0
634 1.1 mrg bra LOCAL(lshrsi_7) // >> 7
635 1.1 mrg shlr r0
636 1.1 mrg LOCAL(lshrsi_8): // >> 8
637 1.1 mrg rts
638 1.1 mrg shlr8 r0
639 1.1 mrg bra LOCAL(lshrsi_8) // >> 9
640 1.1 mrg shlr r0
641 1.1 mrg bra LOCAL(lshrsi_8) // >> 10
642 1.1 mrg shlr2 r0
643 1.1 mrg bra LOCAL(lshrsi_11) // >> 11
644 1.1 mrg shlr r0
645 1.1 mrg bra LOCAL(lshrsi_12) // >> 12
646 1.1 mrg shlr2 r0
647 1.1 mrg bra LOCAL(lshrsi_13) // >> 13
648 1.1 mrg shlr r0
649 1.1 mrg bra LOCAL(lshrsi_14) // >> 14
650 1.1 mrg shlr8 r0
651 1.1 mrg bra LOCAL(lshrsi_15) // >> 15
652 1.1 mrg shlr8 r0
653 1.1 mrg LOCAL(lshrsi_16): // >> 16
654 1.1 mrg rts
655 1.1 mrg shlr16 r0
656 1.1 mrg bra LOCAL(lshrsi_16) // >> 17
657 1.1 mrg shlr r0
658 1.1 mrg bra LOCAL(lshrsi_16) // >> 18
659 1.1 mrg shlr2 r0
660 1.1 mrg bra LOCAL(lshrsi_19) // >> 19
661 1.1 mrg shlr r0
662 1.1 mrg bra LOCAL(lshrsi_20) // >> 20
663 1.1 mrg shlr2 r0
664 1.1 mrg bra LOCAL(lshrsi_21) // >> 21
665 1.1 mrg shlr r0
666 1.1 mrg bra LOCAL(lshrsi_22) // >> 22
667 1.1 mrg shlr16 r0
668 1.1 mrg bra LOCAL(lshrsi_23) // >> 23
669 1.1 mrg shlr16 r0
670 1.1 mrg bra LOCAL(lshrsi_16) // >> 24
671 1.1 mrg shlr8 r0
672 1.1 mrg bra LOCAL(lshrsi_25) // >> 25
673 1.1 mrg shlr r0
674 1.1 mrg bra LOCAL(lshrsi_26) // >> 26
675 1.1 mrg shlr2 r0
676 1.1 mrg bra LOCAL(lshrsi_27) // >> 27
677 1.1 mrg shlr r0
678 1.1 mrg bra LOCAL(lshrsi_28) // >> 28
679 1.1 mrg shlr2 r0
680 1.1 mrg bra LOCAL(lshrsi_29) // >> 29
681 1.1 mrg shlr16 r0
682 1.1 mrg bra LOCAL(lshrsi_30) // >> 30
683 1.1 mrg shlr16 r0
684 1.1 mrg shll r0 // >> 31
685 1.1 mrg rts
686 1.1 mrg movt r0
687 1.1 mrg
688 1.1 mrg LOCAL(lshrsi_7):
689 1.1 mrg shlr2 r0
690 1.1 mrg LOCAL(lshrsi_5):
691 1.1 mrg LOCAL(lshrsi_6):
692 1.1 mrg shlr2 r0
693 1.1 mrg rts
694 1.1 mrg LOCAL(lshrsi_13):
695 1.1 mrg shlr2 r0
696 1.1 mrg LOCAL(lshrsi_12):
697 1.1 mrg LOCAL(lshrsi_11):
698 1.1 mrg shlr8 r0
699 1.1 mrg rts
700 1.1 mrg LOCAL(lshrsi_21):
701 1.1 mrg shlr2 r0
702 1.1 mrg LOCAL(lshrsi_20):
703 1.1 mrg LOCAL(lshrsi_19):
704 1.1 mrg shlr16 r0
705 1.1 mrg rts
706 1.1 mrg LOCAL(lshrsi_28):
707 1.1 mrg LOCAL(lshrsi_27):
708 1.1 mrg shlr2 r0
709 1.1 mrg LOCAL(lshrsi_26):
710 1.1 mrg LOCAL(lshrsi_25):
711 1.1 mrg shlr16 r0
712 1.1 mrg rts
713 1.1 mrg shlr8 r0
714 1.1 mrg
715 1.1 mrg LOCAL(lshrsi_22):
716 1.1 mrg LOCAL(lshrsi_14):
717 1.1 mrg shll2 r0
718 1.1 mrg rts
719 1.1 mrg shlr8 r0
720 1.1 mrg
721 1.1 mrg LOCAL(lshrsi_23):
722 1.1 mrg LOCAL(lshrsi_15):
723 1.1 mrg shll r0
724 1.1 mrg rts
725 1.1 mrg shlr8 r0
726 1.1 mrg
727 1.1 mrg LOCAL(lshrsi_29):
728 1.1 mrg shll r0
729 1.1 mrg LOCAL(lshrsi_30):
730 1.1 mrg shll2 r0
731 1.1 mrg rts
732 1.1 mrg shlr16 r0
733 1.1 mrg
734 1.1 mrg ENDFUNC(GLOBAL(lshrsi3))
735 1.1 mrg ENDFUNC(GLOBAL(lshrsi3_r0))
736 1.1 mrg #endif
737 1.1 mrg
738 1.1 mrg #ifdef L_movmem
739 1.1 mrg .text
740 1.1 mrg .balign 4
741 1.1 mrg .global GLOBAL(movmem)
742 1.1 mrg HIDDEN_FUNC(GLOBAL(movmem))
743 1.1 mrg HIDDEN_ALIAS(movstr,movmem)
744 1.1 mrg /* This would be a lot simpler if r6 contained the byte count
745 1.1 mrg minus 64, and we wouldn't be called here for a byte count of 64. */
746 1.1 mrg GLOBAL(movmem):
747 1.1 mrg sts.l pr,@-r15
748 1.1 mrg shll2 r6
749 1.1 mrg bsr GLOBAL(movmemSI52+2)
750 1.1 mrg mov.l @(48,r5),r0
751 1.1 mrg .balign 4
752 1.1 mrg LOCAL(movmem_loop): /* Reached with rts */
753 1.1 mrg mov.l @(60,r5),r0
754 1.1 mrg add #-64,r6
755 1.1 mrg mov.l r0,@(60,r4)
756 1.1 mrg tst r6,r6
757 1.1 mrg mov.l @(56,r5),r0
758 1.1 mrg bt LOCAL(movmem_done)
759 1.1 mrg mov.l r0,@(56,r4)
760 1.1 mrg cmp/pl r6
761 1.1 mrg mov.l @(52,r5),r0
762 1.1 mrg add #64,r5
763 1.1 mrg mov.l r0,@(52,r4)
764 1.1 mrg add #64,r4
765 1.1 mrg bt GLOBAL(movmemSI52)
766 1.1 mrg ! done all the large groups, do the remainder
767 1.1 mrg ! jump to movmem+
768 1.1 mrg mova GLOBAL(movmemSI4)+4,r0
769 1.1 mrg add r6,r0
770 1.1 mrg jmp @r0
771 1.1 mrg LOCAL(movmem_done): ! share slot insn, works out aligned.
772 1.1 mrg lds.l @r15+,pr
773 1.1 mrg mov.l r0,@(56,r4)
774 1.1 mrg mov.l @(52,r5),r0
775 1.1 mrg rts
776 1.1 mrg mov.l r0,@(52,r4)
777 1.1 mrg .balign 4
778 1.1 mrg ! ??? We need aliases movstr* for movmem* for the older libraries. These
779 1.1 mrg ! aliases will be removed at the some point in the future.
780 1.1 mrg .global GLOBAL(movmemSI64)
781 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI64))
782 1.1 mrg HIDDEN_ALIAS(movstrSI64,movmemSI64)
783 1.1 mrg GLOBAL(movmemSI64):
784 1.1 mrg mov.l @(60,r5),r0
785 1.1 mrg mov.l r0,@(60,r4)
786 1.1 mrg .global GLOBAL(movmemSI60)
787 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI60))
788 1.1 mrg HIDDEN_ALIAS(movstrSI60,movmemSI60)
789 1.1 mrg GLOBAL(movmemSI60):
790 1.1 mrg mov.l @(56,r5),r0
791 1.1 mrg mov.l r0,@(56,r4)
792 1.1 mrg .global GLOBAL(movmemSI56)
793 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI56))
794 1.1 mrg HIDDEN_ALIAS(movstrSI56,movmemSI56)
795 1.1 mrg GLOBAL(movmemSI56):
796 1.1 mrg mov.l @(52,r5),r0
797 1.1 mrg mov.l r0,@(52,r4)
798 1.1 mrg .global GLOBAL(movmemSI52)
799 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI52))
800 1.1 mrg HIDDEN_ALIAS(movstrSI52,movmemSI52)
801 1.1 mrg GLOBAL(movmemSI52):
802 1.1 mrg mov.l @(48,r5),r0
803 1.1 mrg mov.l r0,@(48,r4)
804 1.1 mrg .global GLOBAL(movmemSI48)
805 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI48))
806 1.1 mrg HIDDEN_ALIAS(movstrSI48,movmemSI48)
807 1.1 mrg GLOBAL(movmemSI48):
808 1.1 mrg mov.l @(44,r5),r0
809 1.1 mrg mov.l r0,@(44,r4)
810 1.1 mrg .global GLOBAL(movmemSI44)
811 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI44))
812 1.1 mrg HIDDEN_ALIAS(movstrSI44,movmemSI44)
813 1.1 mrg GLOBAL(movmemSI44):
814 1.1 mrg mov.l @(40,r5),r0
815 1.1 mrg mov.l r0,@(40,r4)
816 1.1 mrg .global GLOBAL(movmemSI40)
817 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI40))
818 1.1 mrg HIDDEN_ALIAS(movstrSI40,movmemSI40)
819 1.1 mrg GLOBAL(movmemSI40):
820 1.1 mrg mov.l @(36,r5),r0
821 1.1 mrg mov.l r0,@(36,r4)
822 1.1 mrg .global GLOBAL(movmemSI36)
823 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI36))
824 1.1 mrg HIDDEN_ALIAS(movstrSI36,movmemSI36)
825 1.1 mrg GLOBAL(movmemSI36):
826 1.1 mrg mov.l @(32,r5),r0
827 1.1 mrg mov.l r0,@(32,r4)
828 1.1 mrg .global GLOBAL(movmemSI32)
829 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI32))
830 1.1 mrg HIDDEN_ALIAS(movstrSI32,movmemSI32)
831 1.1 mrg GLOBAL(movmemSI32):
832 1.1 mrg mov.l @(28,r5),r0
833 1.1 mrg mov.l r0,@(28,r4)
834 1.1 mrg .global GLOBAL(movmemSI28)
835 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI28))
836 1.1 mrg HIDDEN_ALIAS(movstrSI28,movmemSI28)
837 1.1 mrg GLOBAL(movmemSI28):
838 1.1 mrg mov.l @(24,r5),r0
839 1.1 mrg mov.l r0,@(24,r4)
840 1.1 mrg .global GLOBAL(movmemSI24)
841 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI24))
842 1.1 mrg HIDDEN_ALIAS(movstrSI24,movmemSI24)
843 1.1 mrg GLOBAL(movmemSI24):
844 1.1 mrg mov.l @(20,r5),r0
845 1.1 mrg mov.l r0,@(20,r4)
846 1.1 mrg .global GLOBAL(movmemSI20)
847 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI20))
848 1.1 mrg HIDDEN_ALIAS(movstrSI20,movmemSI20)
849 1.1 mrg GLOBAL(movmemSI20):
850 1.1 mrg mov.l @(16,r5),r0
851 1.1 mrg mov.l r0,@(16,r4)
852 1.1 mrg .global GLOBAL(movmemSI16)
853 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI16))
854 1.1 mrg HIDDEN_ALIAS(movstrSI16,movmemSI16)
855 1.1 mrg GLOBAL(movmemSI16):
856 1.1 mrg mov.l @(12,r5),r0
857 1.1 mrg mov.l r0,@(12,r4)
858 1.1 mrg .global GLOBAL(movmemSI12)
859 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI12))
860 1.1 mrg HIDDEN_ALIAS(movstrSI12,movmemSI12)
861 1.1 mrg GLOBAL(movmemSI12):
862 1.1 mrg mov.l @(8,r5),r0
863 1.1 mrg mov.l r0,@(8,r4)
864 1.1 mrg .global GLOBAL(movmemSI8)
865 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI8))
866 1.1 mrg HIDDEN_ALIAS(movstrSI8,movmemSI8)
867 1.1 mrg GLOBAL(movmemSI8):
868 1.1 mrg mov.l @(4,r5),r0
869 1.1 mrg mov.l r0,@(4,r4)
870 1.1 mrg .global GLOBAL(movmemSI4)
871 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI4))
872 1.1 mrg HIDDEN_ALIAS(movstrSI4,movmemSI4)
873 1.1 mrg GLOBAL(movmemSI4):
874 1.1 mrg mov.l @(0,r5),r0
875 1.1 mrg rts
876 1.1 mrg mov.l r0,@(0,r4)
877 1.1 mrg
878 1.1 mrg ENDFUNC(GLOBAL(movmemSI64))
879 1.1 mrg ENDFUNC(GLOBAL(movmemSI60))
880 1.1 mrg ENDFUNC(GLOBAL(movmemSI56))
881 1.1 mrg ENDFUNC(GLOBAL(movmemSI52))
882 1.1 mrg ENDFUNC(GLOBAL(movmemSI48))
883 1.1 mrg ENDFUNC(GLOBAL(movmemSI44))
884 1.1 mrg ENDFUNC(GLOBAL(movmemSI40))
885 1.1 mrg ENDFUNC(GLOBAL(movmemSI36))
886 1.1 mrg ENDFUNC(GLOBAL(movmemSI32))
887 1.1 mrg ENDFUNC(GLOBAL(movmemSI28))
888 1.1 mrg ENDFUNC(GLOBAL(movmemSI24))
889 1.1 mrg ENDFUNC(GLOBAL(movmemSI20))
890 1.1 mrg ENDFUNC(GLOBAL(movmemSI16))
891 1.1 mrg ENDFUNC(GLOBAL(movmemSI12))
892 1.1 mrg ENDFUNC(GLOBAL(movmemSI8))
893 1.1 mrg ENDFUNC(GLOBAL(movmemSI4))
894 1.1 mrg ENDFUNC(GLOBAL(movmem))
895 1.1 mrg #endif
896 1.1 mrg
897 1.1 mrg #ifdef L_movmem_i4
898 1.1 mrg .text
899 1.1 mrg .global GLOBAL(movmem_i4_even)
900 1.1 mrg .global GLOBAL(movmem_i4_odd)
901 1.1 mrg .global GLOBAL(movmemSI12_i4)
902 1.1 mrg
903 1.1 mrg HIDDEN_FUNC(GLOBAL(movmem_i4_even))
904 1.1 mrg HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
905 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
906 1.1 mrg
907 1.1 mrg HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
908 1.1 mrg HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
909 1.1 mrg HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
910 1.1 mrg
911 1.1 mrg .p2align 5
912 1.1 mrg L_movmem_2mod4_end:
913 1.1 mrg mov.l r0,@(16,r4)
914 1.1 mrg rts
915 1.1 mrg mov.l r1,@(20,r4)
916 1.1 mrg
917 1.1 mrg .p2align 2
918 1.1 mrg
919 1.1 mrg GLOBAL(movmem_i4_even):
920 1.1 mrg mov.l @r5+,r0
921 1.1 mrg bra L_movmem_start_even
922 1.1 mrg mov.l @r5+,r1
923 1.1 mrg
924 1.1 mrg GLOBAL(movmem_i4_odd):
925 1.1 mrg mov.l @r5+,r1
926 1.1 mrg add #-4,r4
927 1.1 mrg mov.l @r5+,r2
928 1.1 mrg mov.l @r5+,r3
929 1.1 mrg mov.l r1,@(4,r4)
930 1.1 mrg mov.l r2,@(8,r4)
931 1.1 mrg
932 1.1 mrg L_movmem_loop:
933 1.1 mrg mov.l r3,@(12,r4)
934 1.1 mrg dt r6
935 1.1 mrg mov.l @r5+,r0
936 1.1 mrg bt/s L_movmem_2mod4_end
937 1.1 mrg mov.l @r5+,r1
938 1.1 mrg add #16,r4
939 1.1 mrg L_movmem_start_even:
940 1.1 mrg mov.l @r5+,r2
941 1.1 mrg mov.l @r5+,r3
942 1.1 mrg mov.l r0,@r4
943 1.1 mrg dt r6
944 1.1 mrg mov.l r1,@(4,r4)
945 1.1 mrg bf/s L_movmem_loop
946 1.1 mrg mov.l r2,@(8,r4)
947 1.1 mrg rts
948 1.1 mrg mov.l r3,@(12,r4)
949 1.1 mrg
950 1.1 mrg ENDFUNC(GLOBAL(movmem_i4_even))
951 1.1 mrg ENDFUNC(GLOBAL(movmem_i4_odd))
952 1.1 mrg
953 1.1 mrg .p2align 4
954 1.1 mrg GLOBAL(movmemSI12_i4):
955 1.1 mrg mov.l @r5,r0
956 1.1 mrg mov.l @(4,r5),r1
957 1.1 mrg mov.l @(8,r5),r2
958 1.1 mrg mov.l r0,@r4
959 1.1 mrg mov.l r1,@(4,r4)
960 1.1 mrg rts
961 1.1 mrg mov.l r2,@(8,r4)
962 1.1 mrg
963 1.1 mrg ENDFUNC(GLOBAL(movmemSI12_i4))
964 1.1 mrg #endif
965 1.1 mrg
966 1.1 mrg #ifdef L_mulsi3
967 1.1 mrg
968 1.1 mrg
969 1.1 mrg .global GLOBAL(mulsi3)
970 1.1 mrg HIDDEN_FUNC(GLOBAL(mulsi3))
971 1.1 mrg
972 1.1 mrg ! r4 = aabb
973 1.1 mrg ! r5 = ccdd
974 1.1 mrg ! r0 = aabb*ccdd via partial products
975 1.1 mrg !
976 1.1 mrg ! if aa == 0 and cc = 0
977 1.1 mrg ! r0 = bb*dd
978 1.1 mrg !
979 1.1 mrg ! else
980 1.1 mrg ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
981 1.1 mrg !
982 1.1 mrg
983 1.1 mrg GLOBAL(mulsi3):
984 1.1 mrg mulu.w r4,r5 ! multiply the lsws macl=bb*dd
985 1.1 mrg mov r5,r3 ! r3 = ccdd
986 1.1 mrg swap.w r4,r2 ! r2 = bbaa
987 1.1 mrg xtrct r2,r3 ! r3 = aacc
988 1.1 mrg tst r3,r3 ! msws zero ?
989 1.1 mrg bf hiset
990 1.1 mrg rts ! yes - then we have the answer
991 1.1 mrg sts macl,r0
992 1.1 mrg
993 1.1 mrg hiset: sts macl,r0 ! r0 = bb*dd
994 1.1 mrg mulu.w r2,r5 ! brewing macl = aa*dd
995 1.1 mrg sts macl,r1
996 1.1 mrg mulu.w r3,r4 ! brewing macl = cc*bb
997 1.1 mrg sts macl,r2
998 1.1 mrg add r1,r2
999 1.1 mrg shll16 r2
1000 1.1 mrg rts
1001 1.1 mrg add r2,r0
1002 1.1 mrg
1003 1.1 mrg ENDFUNC(GLOBAL(mulsi3))
1004 1.1 mrg #endif
1005 1.1 mrg #endif /* ! __SH5__ */
1006 1.1 mrg #ifdef L_sdivsi3_i4
1007 1.1 mrg .title "SH DIVIDE"
1008 1.1 mrg !! 4 byte integer Divide code for the Renesas SH
1009 1.1 mrg #if defined (__SH4__) || defined (__SH2A__)
1010 1.1 mrg !! args in r4 and r5, result in fpul, clobber dr0, dr2
1011 1.1 mrg
1012 1.1 mrg .global GLOBAL(sdivsi3_i4)
1013 1.1 mrg HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1014 1.1 mrg GLOBAL(sdivsi3_i4):
1015 1.1 mrg lds r4,fpul
1016 1.1 mrg float fpul,dr0
1017 1.1 mrg lds r5,fpul
1018 1.1 mrg float fpul,dr2
1019 1.1 mrg fdiv dr2,dr0
1020 1.1 mrg rts
1021 1.1 mrg ftrc dr0,fpul
1022 1.1 mrg
1023 1.1 mrg ENDFUNC(GLOBAL(sdivsi3_i4))
1024 1.1 mrg #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1025 1.1 mrg !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1026 1.1 mrg
1027 1.1 mrg #if ! __SH5__ || __SH5__ == 32
1028 1.1 mrg #if __SH5__
1029 1.1 mrg .mode SHcompact
1030 1.1 mrg #endif
1031 1.1 mrg .global GLOBAL(sdivsi3_i4)
1032 1.1 mrg HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1033 1.1 mrg GLOBAL(sdivsi3_i4):
1034 1.1 mrg sts.l fpscr,@-r15
1035 1.1 mrg mov #8,r2
1036 1.1 mrg swap.w r2,r2
1037 1.1 mrg lds r2,fpscr
1038 1.1 mrg lds r4,fpul
1039 1.1 mrg float fpul,dr0
1040 1.1 mrg lds r5,fpul
1041 1.1 mrg float fpul,dr2
1042 1.1 mrg fdiv dr2,dr0
1043 1.1 mrg ftrc dr0,fpul
1044 1.1 mrg rts
1045 1.1 mrg lds.l @r15+,fpscr
1046 1.1 mrg
1047 1.1 mrg ENDFUNC(GLOBAL(sdivsi3_i4))
1048 1.1 mrg #endif /* ! __SH5__ || __SH5__ == 32 */
1049 1.1 mrg #endif /* ! __SH4__ || __SH2A__ */
1050 1.1 mrg #endif
1051 1.1 mrg
1052 1.1 mrg #ifdef L_sdivsi3
1053 1.1 mrg /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1054 1.1 mrg sh2e/sh3e code. */
1055 1.1 mrg !!
1056 1.1 mrg !! Steve Chamberlain
1057 1.1 mrg !! sac@cygnus.com
1058 1.1 mrg !!
1059 1.1 mrg !!
1060 1.1 mrg
1061 1.1 mrg !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1062 1.1 mrg
1063 1.1 mrg .global GLOBAL(sdivsi3)
1064 1.1 mrg #if __SHMEDIA__
1065 1.1 mrg #if __SH5__ == 32
1066 1.1 mrg .section .text..SHmedia32,"ax"
1067 1.1 mrg #else
1068 1.1 mrg .text
1069 1.1 mrg #endif
1070 1.1 mrg .align 2
1071 1.1 mrg #if 0
1072 1.1 mrg /* The assembly code that follows is a hand-optimized version of the C
1073 1.1 mrg code that follows. Note that the registers that are modified are
1074 1.1 mrg exactly those listed as clobbered in the patterns divsi3_i1 and
1075 1.1 mrg divsi3_i1_media.
1076 1.1 mrg
1077 1.1 mrg int __sdivsi3 (i, j)
1078 1.1 mrg int i, j;
1079 1.1 mrg {
1080 1.1 mrg register unsigned long long r18 asm ("r18");
1081 1.1 mrg register unsigned long long r19 asm ("r19");
1082 1.1 mrg register unsigned long long r0 asm ("r0") = 0;
1083 1.1 mrg register unsigned long long r1 asm ("r1") = 1;
1084 1.1 mrg register int r2 asm ("r2") = i >> 31;
1085 1.1 mrg register int r3 asm ("r3") = j >> 31;
1086 1.1 mrg
1087 1.1 mrg r2 = r2 ? r2 : r1;
1088 1.1 mrg r3 = r3 ? r3 : r1;
1089 1.1 mrg r18 = i * r2;
1090 1.1 mrg r19 = j * r3;
1091 1.1 mrg r2 *= r3;
1092 1.1 mrg
1093 1.1 mrg r19 <<= 31;
1094 1.1 mrg r1 <<= 31;
1095 1.1 mrg do
1096 1.1 mrg if (r18 >= r19)
1097 1.1 mrg r0 |= r1, r18 -= r19;
1098 1.1 mrg while (r19 >>= 1, r1 >>= 1);
1099 1.1 mrg
1100 1.1 mrg return r2 * (int)r0;
1101 1.1 mrg }
1102 1.1 mrg */
1103 1.1 mrg GLOBAL(sdivsi3):
1104 1.1 mrg pt/l LOCAL(sdivsi3_dontadd), tr2
1105 1.1 mrg pt/l LOCAL(sdivsi3_loop), tr1
1106 1.1 mrg ptabs/l r18, tr0
1107 1.1 mrg movi 0, r0
1108 1.1 mrg movi 1, r1
1109 1.1 mrg shari.l r4, 31, r2
1110 1.1 mrg shari.l r5, 31, r3
1111 1.1 mrg cmveq r2, r1, r2
1112 1.1 mrg cmveq r3, r1, r3
1113 1.1 mrg muls.l r4, r2, r18
1114 1.1 mrg muls.l r5, r3, r19
1115 1.1 mrg muls.l r2, r3, r2
1116 1.1 mrg shlli r19, 31, r19
1117 1.1 mrg shlli r1, 31, r1
1118 1.1 mrg LOCAL(sdivsi3_loop):
1119 1.1 mrg bgtu r19, r18, tr2
1120 1.1 mrg or r0, r1, r0
1121 1.1 mrg sub r18, r19, r18
1122 1.1 mrg LOCAL(sdivsi3_dontadd):
1123 1.1 mrg shlri r1, 1, r1
1124 1.1 mrg shlri r19, 1, r19
1125 1.1 mrg bnei r1, 0, tr1
1126 1.1 mrg muls.l r0, r2, r0
1127 1.1 mrg add.l r0, r63, r0
1128 1.1 mrg blink tr0, r63
1129 1.1 mrg #elif 0 /* ! 0 */
1130 1.1 mrg // inputs: r4,r5
1131 1.1 mrg // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1132 1.1 mrg // result in r0
1133 1.1 mrg GLOBAL(sdivsi3):
1134 1.1 mrg // can create absolute value without extra latency,
1135 1.1 mrg // but dependent on proper sign extension of inputs:
1136 1.1 mrg // shari.l r5,31,r2
1137 1.1 mrg // xor r5,r2,r20
1138 1.1 mrg // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1139 1.1 mrg shari.l r5,31,r2
1140 1.1 mrg ori r2,1,r2
1141 1.1 mrg muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1142 1.1 mrg movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1143 1.1 mrg shari.l r4,31,r3
1144 1.1 mrg nsb r20,r0
1145 1.1 mrg shlld r20,r0,r25
1146 1.1 mrg shlri r25,48,r25
1147 1.1 mrg sub r19,r25,r1
1148 1.1 mrg mmulfx.w r1,r1,r2
1149 1.1 mrg mshflo.w r1,r63,r1
1150 1.1 mrg // If r4 was to be used in-place instead of r21, could use this sequence
1151 1.1 mrg // to compute absolute:
1152 1.1 mrg // sub r63,r4,r19 // compute absolute value of r4
1153 1.1 mrg // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1154 1.1 mrg // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1155 1.1 mrg ori r3,1,r3
1156 1.1 mrg mmulfx.w r25,r2,r2
1157 1.1 mrg sub r19,r0,r0
1158 1.1 mrg muls.l r4,r3,r21
1159 1.1 mrg msub.w r1,r2,r2
1160 1.1 mrg addi r2,-2,r1
1161 1.1 mrg mulu.l r21,r1,r19
1162 1.1 mrg mmulfx.w r2,r2,r2
1163 1.1 mrg shlli r1,15,r1
1164 1.1 mrg shlrd r19,r0,r19
1165 1.1 mrg mulu.l r19,r20,r3
1166 1.1 mrg mmacnfx.wl r25,r2,r1
1167 1.1 mrg ptabs r18,tr0
1168 1.1 mrg sub r21,r3,r25
1169 1.1 mrg
1170 1.1 mrg mulu.l r25,r1,r2
1171 1.1 mrg addi r0,14,r0
1172 1.1 mrg xor r4,r5,r18
1173 1.1 mrg shlrd r2,r0,r2
1174 1.1 mrg mulu.l r2,r20,r3
1175 1.1 mrg add r19,r2,r19
1176 1.1 mrg shari.l r18,31,r18
1177 1.1 mrg sub r25,r3,r25
1178 1.1 mrg
1179 1.1 mrg mulu.l r25,r1,r2
1180 1.1 mrg sub r25,r20,r25
1181 1.1 mrg add r19,r18,r19
1182 1.1 mrg shlrd r2,r0,r2
1183 1.1 mrg mulu.l r2,r20,r3
1184 1.1 mrg addi r25,1,r25
1185 1.1 mrg add r19,r2,r19
1186 1.1 mrg
1187 1.1 mrg cmpgt r25,r3,r25
1188 1.1 mrg add.l r19,r25,r0
1189 1.1 mrg xor r0,r18,r0
1190 1.1 mrg blink tr0,r63
1191 1.1 mrg #else /* ! 0 && ! 0 */
1192 1.1 mrg
1193 1.1 mrg // inputs: r4,r5
1194 1.1 mrg // clobbered: r1,r18,r19,r20,r21,r25,tr0
1195 1.1 mrg // result in r0
1196 1.1 mrg HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1197 1.1 mrg #ifndef __pic__
1198 1.1 mrg FUNC(GLOBAL(sdivsi3))
1199 1.1 mrg GLOBAL(sdivsi3): /* this is the shcompact entry point */
1200 1.1 mrg // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1201 1.1 mrg // with the SHcompact implementation, which clobbers tr1 / tr2.
1202 1.1 mrg .global GLOBAL(sdivsi3_1)
1203 1.1 mrg GLOBAL(sdivsi3_1):
1204 1.1 mrg .global GLOBAL(div_table_internal)
1205 1.1 mrg movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1206 1.1 mrg shori GLOBAL(div_table_internal) & 65535, r20
1207 1.1 mrg #endif
1208 1.1 mrg .global GLOBAL(sdivsi3_2)
1209 1.1 mrg // div_table in r20
1210 1.1 mrg // clobbered: r1,r18,r19,r21,r25,tr0
1211 1.1 mrg GLOBAL(sdivsi3_2):
1212 1.1 mrg nsb r5, r1
1213 1.1 mrg shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1214 1.1 mrg shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1215 1.1 mrg ldx.ub r20, r21, r19 // u0.8
1216 1.1 mrg shari r25, 32, r25 // normalize to s2.30
1217 1.1 mrg shlli r21, 1, r21
1218 1.1 mrg muls.l r25, r19, r19 // s2.38
1219 1.1 mrg ldx.w r20, r21, r21 // s2.14
1220 1.1 mrg ptabs r18, tr0
1221 1.1 mrg shari r19, 24, r19 // truncate to s2.14
1222 1.1 mrg sub r21, r19, r19 // some 11 bit inverse in s1.14
1223 1.1 mrg muls.l r19, r19, r21 // u0.28
1224 1.1 mrg sub r63, r1, r1
1225 1.1 mrg addi r1, 92, r1
1226 1.1 mrg muls.l r25, r21, r18 // s2.58
1227 1.1 mrg shlli r19, 45, r19 // multiply by two and convert to s2.58
1228 1.1 mrg /* bubble */
1229 1.1 mrg sub r19, r18, r18
1230 1.1 mrg shari r18, 28, r18 // some 22 bit inverse in s1.30
1231 1.1 mrg muls.l r18, r25, r0 // s2.60
1232 1.1 mrg muls.l r18, r4, r25 // s32.30
1233 1.1 mrg /* bubble */
1234 1.1 mrg shari r0, 16, r19 // s-16.44
1235 1.1 mrg muls.l r19, r18, r19 // s-16.74
1236 1.1 mrg shari r25, 63, r0
1237 1.1 mrg shari r4, 14, r18 // s19.-14
1238 1.1 mrg shari r19, 30, r19 // s-16.44
1239 1.1 mrg muls.l r19, r18, r19 // s15.30
1240 1.1 mrg xor r21, r0, r21 // You could also use the constant 1 << 27.
1241 1.1 mrg add r21, r25, r21
1242 1.1 mrg sub r21, r19, r21
1243 1.1 mrg shard r21, r1, r21
1244 1.1 mrg sub r21, r0, r0
1245 1.1 mrg blink tr0, r63
1246 1.1 mrg #ifndef __pic__
1247 1.1 mrg ENDFUNC(GLOBAL(sdivsi3))
1248 1.1 mrg #endif
1249 1.1 mrg ENDFUNC(GLOBAL(sdivsi3_2))
1250 1.1 mrg #endif
1251 1.1 mrg #elif __SHMEDIA__
1252 1.1 mrg /* m5compact-nofpu */
1253 1.1 mrg // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1254 1.1 mrg .mode SHmedia
1255 1.1 mrg .section .text..SHmedia32,"ax"
1256 1.1 mrg .align 2
1257 1.1 mrg FUNC(GLOBAL(sdivsi3))
1258 1.1 mrg GLOBAL(sdivsi3):
1259 1.1 mrg pt/l LOCAL(sdivsi3_dontsub), tr0
1260 1.1 mrg pt/l LOCAL(sdivsi3_loop), tr1
1261 1.1 mrg ptabs/l r18,tr2
1262 1.1 mrg shari.l r4,31,r18
1263 1.1 mrg shari.l r5,31,r19
1264 1.1 mrg xor r4,r18,r20
1265 1.1 mrg xor r5,r19,r21
1266 1.1 mrg sub.l r20,r18,r20
1267 1.1 mrg sub.l r21,r19,r21
1268 1.1 mrg xor r18,r19,r19
1269 1.1 mrg shlli r21,32,r25
1270 1.1 mrg addi r25,-1,r21
1271 1.1 mrg addz.l r20,r63,r20
1272 1.1 mrg LOCAL(sdivsi3_loop):
1273 1.1 mrg shlli r20,1,r20
1274 1.1 mrg bgeu/u r21,r20,tr0
1275 1.1 mrg sub r20,r21,r20
1276 1.1 mrg LOCAL(sdivsi3_dontsub):
1277 1.1 mrg addi.l r25,-1,r25
1278 1.1 mrg bnei r25,-32,tr1
1279 1.1 mrg xor r20,r19,r20
1280 1.1 mrg sub.l r20,r19,r0
1281 1.1 mrg blink tr2,r63
1282 1.1 mrg ENDFUNC(GLOBAL(sdivsi3))
1283 1.1 mrg #else /* ! __SHMEDIA__ */
1284 1.1 mrg FUNC(GLOBAL(sdivsi3))
1285 1.1 mrg GLOBAL(sdivsi3):
1286 1.1 mrg mov r4,r1
1287 1.1 mrg mov r5,r0
1288 1.1 mrg
1289 1.1 mrg tst r0,r0
1290 1.1 mrg bt div0
1291 1.1 mrg mov #0,r2
1292 1.1 mrg div0s r2,r1
1293 1.1 mrg subc r3,r3
1294 1.1 mrg subc r2,r1
1295 1.1 mrg div0s r0,r3
1296 1.1 mrg rotcl r1
1297 1.1 mrg div1 r0,r3
1298 1.1 mrg rotcl r1
1299 1.1 mrg div1 r0,r3
1300 1.1 mrg rotcl r1
1301 1.1 mrg div1 r0,r3
1302 1.1 mrg rotcl r1
1303 1.1 mrg div1 r0,r3
1304 1.1 mrg rotcl r1
1305 1.1 mrg div1 r0,r3
1306 1.1 mrg rotcl r1
1307 1.1 mrg div1 r0,r3
1308 1.1 mrg rotcl r1
1309 1.1 mrg div1 r0,r3
1310 1.1 mrg rotcl r1
1311 1.1 mrg div1 r0,r3
1312 1.1 mrg rotcl r1
1313 1.1 mrg div1 r0,r3
1314 1.1 mrg rotcl r1
1315 1.1 mrg div1 r0,r3
1316 1.1 mrg rotcl r1
1317 1.1 mrg div1 r0,r3
1318 1.1 mrg rotcl r1
1319 1.1 mrg div1 r0,r3
1320 1.1 mrg rotcl r1
1321 1.1 mrg div1 r0,r3
1322 1.1 mrg rotcl r1
1323 1.1 mrg div1 r0,r3
1324 1.1 mrg rotcl r1
1325 1.1 mrg div1 r0,r3
1326 1.1 mrg rotcl r1
1327 1.1 mrg div1 r0,r3
1328 1.1 mrg rotcl r1
1329 1.1 mrg div1 r0,r3
1330 1.1 mrg rotcl r1
1331 1.1 mrg div1 r0,r3
1332 1.1 mrg rotcl r1
1333 1.1 mrg div1 r0,r3
1334 1.1 mrg rotcl r1
1335 1.1 mrg div1 r0,r3
1336 1.1 mrg rotcl r1
1337 1.1 mrg div1 r0,r3
1338 1.1 mrg rotcl r1
1339 1.1 mrg div1 r0,r3
1340 1.1 mrg rotcl r1
1341 1.1 mrg div1 r0,r3
1342 1.1 mrg rotcl r1
1343 1.1 mrg div1 r0,r3
1344 1.1 mrg rotcl r1
1345 1.1 mrg div1 r0,r3
1346 1.1 mrg rotcl r1
1347 1.1 mrg div1 r0,r3
1348 1.1 mrg rotcl r1
1349 1.1 mrg div1 r0,r3
1350 1.1 mrg rotcl r1
1351 1.1 mrg div1 r0,r3
1352 1.1 mrg rotcl r1
1353 1.1 mrg div1 r0,r3
1354 1.1 mrg rotcl r1
1355 1.1 mrg div1 r0,r3
1356 1.1 mrg rotcl r1
1357 1.1 mrg div1 r0,r3
1358 1.1 mrg rotcl r1
1359 1.1 mrg div1 r0,r3
1360 1.1 mrg rotcl r1
1361 1.1 mrg addc r2,r1
1362 1.1 mrg rts
1363 1.1 mrg mov r1,r0
1364 1.1 mrg
1365 1.1 mrg
1366 1.1 mrg div0: rts
1367 1.1 mrg mov #0,r0
1368 1.1 mrg
1369 1.1 mrg ENDFUNC(GLOBAL(sdivsi3))
1370 1.1 mrg #endif /* ! __SHMEDIA__ */
1371 1.1 mrg #endif
1372 1.1 mrg #ifdef L_udivsi3_i4
1373 1.1 mrg
1374 1.1 mrg .title "SH DIVIDE"
1375 1.1 mrg !! 4 byte integer Divide code for the Renesas SH
1376 1.1 mrg #if defined (__SH4__) || defined (__SH2A__)
1377 1.1 mrg !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1378 1.1 mrg !! and t bit
1379 1.1 mrg
1380 1.1 mrg .global GLOBAL(udivsi3_i4)
1381 1.1 mrg HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1382 1.1 mrg GLOBAL(udivsi3_i4):
1383 1.1 mrg mov #1,r1
1384 1.1 mrg cmp/hi r1,r5
1385 1.1 mrg bf trivial
1386 1.1 mrg rotr r1
1387 1.1 mrg xor r1,r4
1388 1.1 mrg lds r4,fpul
1389 1.1 mrg mova L1,r0
1390 1.1 mrg #ifdef FMOVD_WORKS
1391 1.1 mrg fmov.d @r0+,dr4
1392 1.1 mrg #else
1393 1.1 mrg fmov.s @r0+,DR40
1394 1.1 mrg fmov.s @r0,DR41
1395 1.1 mrg #endif
1396 1.1 mrg float fpul,dr0
1397 1.1 mrg xor r1,r5
1398 1.1 mrg lds r5,fpul
1399 1.1 mrg float fpul,dr2
1400 1.1 mrg fadd dr4,dr0
1401 1.1 mrg fadd dr4,dr2
1402 1.1 mrg fdiv dr2,dr0
1403 1.1 mrg rts
1404 1.1 mrg ftrc dr0,fpul
1405 1.1 mrg
1406 1.1 mrg trivial:
1407 1.1 mrg rts
1408 1.1 mrg lds r4,fpul
1409 1.1 mrg
1410 1.1 mrg .align 2
1411 1.1 mrg #ifdef FMOVD_WORKS
1412 1.1 mrg .align 3 ! make double below 8 byte aligned.
1413 1.1 mrg #endif
1414 1.1 mrg L1:
1415 1.1 mrg .double 2147483648
1416 1.1 mrg
1417 1.1 mrg ENDFUNC(GLOBAL(udivsi3_i4))
1418 1.1 mrg #elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__)
1419 1.1 mrg #if ! __SH5__ || __SH5__ == 32
1420 1.1 mrg !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1421 1.1 mrg .mode SHmedia
1422 1.1 mrg .global GLOBAL(udivsi3_i4)
1423 1.1 mrg HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1424 1.1 mrg GLOBAL(udivsi3_i4):
1425 1.1 mrg addz.l r4,r63,r20
1426 1.1 mrg addz.l r5,r63,r21
1427 1.1 mrg fmov.qd r20,dr0
1428 1.1 mrg fmov.qd r21,dr32
1429 1.1 mrg ptabs r18,tr0
1430 1.1 mrg float.qd dr0,dr0
1431 1.1 mrg float.qd dr32,dr32
1432 1.1 mrg fdiv.d dr0,dr32,dr0
1433 1.1 mrg ftrc.dq dr0,dr32
1434 1.1 mrg fmov.s fr33,fr32
1435 1.1 mrg blink tr0,r63
1436 1.1 mrg
1437 1.1 mrg ENDFUNC(GLOBAL(udivsi3_i4))
1438 1.1 mrg #endif /* ! __SH5__ || __SH5__ == 32 */
1439 1.1 mrg #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1440 1.1 mrg !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1441 1.1 mrg
1442 1.1 mrg .global GLOBAL(udivsi3_i4)
1443 1.1 mrg HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1444 1.1 mrg GLOBAL(udivsi3_i4):
1445 1.1 mrg mov #1,r1
1446 1.1 mrg cmp/hi r1,r5
1447 1.1 mrg bf trivial
1448 1.1 mrg sts.l fpscr,@-r15
1449 1.1 mrg mova L1,r0
1450 1.1 mrg lds.l @r0+,fpscr
1451 1.1 mrg rotr r1
1452 1.1 mrg xor r1,r4
1453 1.1 mrg lds r4,fpul
1454 1.1 mrg #ifdef FMOVD_WORKS
1455 1.1 mrg fmov.d @r0+,dr4
1456 1.1 mrg #else
1457 1.1 mrg fmov.s @r0+,DR40
1458 1.1 mrg fmov.s @r0,DR41
1459 1.1 mrg #endif
1460 1.1 mrg float fpul,dr0
1461 1.1 mrg xor r1,r5
1462 1.1 mrg lds r5,fpul
1463 1.1 mrg float fpul,dr2
1464 1.1 mrg fadd dr4,dr0
1465 1.1 mrg fadd dr4,dr2
1466 1.1 mrg fdiv dr2,dr0
1467 1.1 mrg ftrc dr0,fpul
1468 1.1 mrg rts
1469 1.1 mrg lds.l @r15+,fpscr
1470 1.1 mrg
1471 1.1 mrg #ifdef FMOVD_WORKS
1472 1.1 mrg .align 3 ! make double below 8 byte aligned.
1473 1.1 mrg #endif
1474 1.1 mrg trivial:
1475 1.1 mrg rts
1476 1.1 mrg lds r4,fpul
1477 1.1 mrg
1478 1.1 mrg .align 2
1479 1.1 mrg L1:
1480 1.1 mrg #ifndef FMOVD_WORKS
1481 1.1 mrg .long 0x80000
1482 1.1 mrg #else
1483 1.1 mrg .long 0x180000
1484 1.1 mrg #endif
1485 1.1 mrg .double 2147483648
1486 1.1 mrg
1487 1.1 mrg ENDFUNC(GLOBAL(udivsi3_i4))
1488 1.1 mrg #endif /* ! __SH4__ */
1489 1.1 mrg #endif
1490 1.1 mrg
1491 1.1 mrg #ifdef L_udivsi3
1492 1.1 mrg /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1493 1.1 mrg sh2e/sh3e code. */
1494 1.1 mrg
1495 1.1 mrg !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1496 1.1 mrg .global GLOBAL(udivsi3)
1497 1.1 mrg HIDDEN_FUNC(GLOBAL(udivsi3))
1498 1.1 mrg
1499 1.1 mrg #if __SHMEDIA__
1500 1.1 mrg #if __SH5__ == 32
1501 1.1 mrg .section .text..SHmedia32,"ax"
1502 1.1 mrg #else
1503 1.1 mrg .text
1504 1.1 mrg #endif
1505 1.1 mrg .align 2
1506 1.1 mrg #if 0
1507 1.1 mrg /* The assembly code that follows is a hand-optimized version of the C
1508 1.1 mrg code that follows. Note that the registers that are modified are
1509 1.1 mrg exactly those listed as clobbered in the patterns udivsi3_i1 and
1510 1.1 mrg udivsi3_i1_media.
1511 1.1 mrg
1512 1.1 mrg unsigned
1513 1.1 mrg __udivsi3 (i, j)
1514 1.1 mrg unsigned i, j;
1515 1.1 mrg {
1516 1.1 mrg register unsigned long long r0 asm ("r0") = 0;
1517 1.1 mrg register unsigned long long r18 asm ("r18") = 1;
1518 1.1 mrg register unsigned long long r4 asm ("r4") = i;
1519 1.1 mrg register unsigned long long r19 asm ("r19") = j;
1520 1.1 mrg
1521 1.1 mrg r19 <<= 31;
1522 1.1 mrg r18 <<= 31;
1523 1.1 mrg do
1524 1.1 mrg if (r4 >= r19)
1525 1.1 mrg r0 |= r18, r4 -= r19;
1526 1.1 mrg while (r19 >>= 1, r18 >>= 1);
1527 1.1 mrg
1528 1.1 mrg return r0;
1529 1.1 mrg }
1530 1.1 mrg */
1531 1.1 mrg GLOBAL(udivsi3):
1532 1.1 mrg pt/l LOCAL(udivsi3_dontadd), tr2
1533 1.1 mrg pt/l LOCAL(udivsi3_loop), tr1
1534 1.1 mrg ptabs/l r18, tr0
1535 1.1 mrg movi 0, r0
1536 1.1 mrg movi 1, r18
1537 1.1 mrg addz.l r5, r63, r19
1538 1.1 mrg addz.l r4, r63, r4
1539 1.1 mrg shlli r19, 31, r19
1540 1.1 mrg shlli r18, 31, r18
1541 1.1 mrg LOCAL(udivsi3_loop):
1542 1.1 mrg bgtu r19, r4, tr2
1543 1.1 mrg or r0, r18, r0
1544 1.1 mrg sub r4, r19, r4
1545 1.1 mrg LOCAL(udivsi3_dontadd):
1546 1.1 mrg shlri r18, 1, r18
1547 1.1 mrg shlri r19, 1, r19
1548 1.1 mrg bnei r18, 0, tr1
1549 1.1 mrg blink tr0, r63
1550 1.1 mrg #else
1551 1.1 mrg GLOBAL(udivsi3):
1552 1.1 mrg // inputs: r4,r5
1553 1.1 mrg // clobbered: r18,r19,r20,r21,r22,r25,tr0
1554 1.1 mrg // result in r0.
1555 1.1 mrg addz.l r5,r63,r22
1556 1.1 mrg nsb r22,r0
1557 1.1 mrg shlld r22,r0,r25
1558 1.1 mrg shlri r25,48,r25
1559 1.1 mrg movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1560 1.1 mrg sub r20,r25,r21
1561 1.1 mrg mmulfx.w r21,r21,r19
1562 1.1 mrg mshflo.w r21,r63,r21
1563 1.1 mrg ptabs r18,tr0
1564 1.1 mrg mmulfx.w r25,r19,r19
1565 1.1 mrg sub r20,r0,r0
1566 1.1 mrg /* bubble */
1567 1.1 mrg msub.w r21,r19,r19
1568 1.1 mrg addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1569 1.1 mrg before the msub.w, but we need a different value for
1570 1.1 mrg r19 to keep errors under control. */
1571 1.1 mrg mulu.l r4,r21,r18
1572 1.1 mrg mmulfx.w r19,r19,r19
1573 1.1 mrg shlli r21,15,r21
1574 1.1 mrg shlrd r18,r0,r18
1575 1.1 mrg mulu.l r18,r22,r20
1576 1.1 mrg mmacnfx.wl r25,r19,r21
1577 1.1 mrg /* bubble */
1578 1.1 mrg sub r4,r20,r25
1579 1.1 mrg
1580 1.1 mrg mulu.l r25,r21,r19
1581 1.1 mrg addi r0,14,r0
1582 1.1 mrg /* bubble */
1583 1.1 mrg shlrd r19,r0,r19
1584 1.1 mrg mulu.l r19,r22,r20
1585 1.1 mrg add r18,r19,r18
1586 1.1 mrg /* bubble */
1587 1.1 mrg sub.l r25,r20,r25
1588 1.1 mrg
1589 1.1 mrg mulu.l r25,r21,r19
1590 1.1 mrg addz.l r25,r63,r25
1591 1.1 mrg sub r25,r22,r25
1592 1.1 mrg shlrd r19,r0,r19
1593 1.1 mrg mulu.l r19,r22,r20
1594 1.1 mrg addi r25,1,r25
1595 1.1 mrg add r18,r19,r18
1596 1.1 mrg
1597 1.1 mrg cmpgt r25,r20,r25
1598 1.1 mrg add.l r18,r25,r0
1599 1.1 mrg blink tr0,r63
1600 1.1 mrg #endif
1601 1.1 mrg #elif __SHMEDIA__
1602 1.1 mrg /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1603 1.1 mrg ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1604 1.1 mrg So use a short shmedia loop. */
1605 1.1 mrg // clobbered: r20,r21,r25,tr0,tr1,tr2
1606 1.1 mrg .mode SHmedia
1607 1.1 mrg .section .text..SHmedia32,"ax"
1608 1.1 mrg .align 2
1609 1.1 mrg GLOBAL(udivsi3):
1610 1.1 mrg pt/l LOCAL(udivsi3_dontsub), tr0
1611 1.1 mrg pt/l LOCAL(udivsi3_loop), tr1
1612 1.1 mrg ptabs/l r18,tr2
1613 1.1 mrg shlli r5,32,r25
1614 1.1 mrg addi r25,-1,r21
1615 1.1 mrg addz.l r4,r63,r20
1616 1.1 mrg LOCAL(udivsi3_loop):
1617 1.1 mrg shlli r20,1,r20
1618 1.1 mrg bgeu/u r21,r20,tr0
1619 1.1 mrg sub r20,r21,r20
1620 1.1 mrg LOCAL(udivsi3_dontsub):
1621 1.1 mrg addi.l r25,-1,r25
1622 1.1 mrg bnei r25,-32,tr1
1623 1.1 mrg add.l r20,r63,r0
1624 1.1 mrg blink tr2,r63
1625 1.1 mrg #else /* ! __SHMEDIA__ */
1626 1.1 mrg LOCAL(div8):
1627 1.1 mrg div1 r5,r4
1628 1.1 mrg LOCAL(div7):
1629 1.1 mrg div1 r5,r4; div1 r5,r4; div1 r5,r4
1630 1.1 mrg div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1631 1.1 mrg
1632 1.1 mrg LOCAL(divx4):
1633 1.1 mrg div1 r5,r4; rotcl r0
1634 1.1 mrg div1 r5,r4; rotcl r0
1635 1.1 mrg div1 r5,r4; rotcl r0
1636 1.1 mrg rts; div1 r5,r4
1637 1.1 mrg
1638 1.1 mrg GLOBAL(udivsi3):
1639 1.1 mrg sts.l pr,@-r15
1640 1.1 mrg extu.w r5,r0
1641 1.1 mrg cmp/eq r5,r0
1642 1.1 mrg #ifdef __sh1__
1643 1.1 mrg bf LOCAL(large_divisor)
1644 1.1 mrg #else
1645 1.1 mrg bf/s LOCAL(large_divisor)
1646 1.1 mrg #endif
1647 1.1 mrg div0u
1648 1.1 mrg swap.w r4,r0
1649 1.1 mrg shlr16 r4
1650 1.1 mrg bsr LOCAL(div8)
1651 1.1 mrg shll16 r5
1652 1.1 mrg bsr LOCAL(div7)
1653 1.1 mrg div1 r5,r4
1654 1.1 mrg xtrct r4,r0
1655 1.1 mrg xtrct r0,r4
1656 1.1 mrg bsr LOCAL(div8)
1657 1.1 mrg swap.w r4,r4
1658 1.1 mrg bsr LOCAL(div7)
1659 1.1 mrg div1 r5,r4
1660 1.1 mrg lds.l @r15+,pr
1661 1.1 mrg xtrct r4,r0
1662 1.1 mrg swap.w r0,r0
1663 1.1 mrg rotcl r0
1664 1.1 mrg rts
1665 1.1 mrg shlr16 r5
1666 1.1 mrg
1667 1.1 mrg LOCAL(large_divisor):
1668 1.1 mrg #ifdef __sh1__
1669 1.1 mrg div0u
1670 1.1 mrg #endif
1671 1.1 mrg mov #0,r0
1672 1.1 mrg xtrct r4,r0
1673 1.1 mrg xtrct r0,r4
1674 1.1 mrg bsr LOCAL(divx4)
1675 1.1 mrg rotcl r0
1676 1.1 mrg bsr LOCAL(divx4)
1677 1.1 mrg rotcl r0
1678 1.1 mrg bsr LOCAL(divx4)
1679 1.1 mrg rotcl r0
1680 1.1 mrg bsr LOCAL(divx4)
1681 1.1 mrg rotcl r0
1682 1.1 mrg lds.l @r15+,pr
1683 1.1 mrg rts
1684 1.1 mrg rotcl r0
1685 1.1 mrg
1686 1.1 mrg ENDFUNC(GLOBAL(udivsi3))
1687 1.1 mrg #endif /* ! __SHMEDIA__ */
1688 1.1 mrg #endif /* L_udivsi3 */
1689 1.1 mrg
1690 1.1 mrg #ifdef L_udivdi3
1691 1.1 mrg #if __SHMEDIA__
1692 1.1 mrg .mode SHmedia
1693 1.1 mrg .section .text..SHmedia32,"ax"
1694 1.1 mrg .align 2
1695 1.1 mrg .global GLOBAL(udivdi3)
1696 1.1 mrg FUNC(GLOBAL(udivdi3))
1697 1.1 mrg GLOBAL(udivdi3):
1698 1.1 mrg HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1699 1.1 mrg shlri r3,1,r4
1700 1.1 mrg nsb r4,r22
1701 1.1 mrg shlld r3,r22,r6
1702 1.1 mrg shlri r6,49,r5
1703 1.1 mrg movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1704 1.1 mrg sub r21,r5,r1
1705 1.1 mrg mmulfx.w r1,r1,r4
1706 1.1 mrg mshflo.w r1,r63,r1
1707 1.1 mrg sub r63,r22,r20 // r63 == 64 % 64
1708 1.1 mrg mmulfx.w r5,r4,r4
1709 1.1 mrg pta LOCAL(large_divisor),tr0
1710 1.1 mrg addi r20,32,r9
1711 1.1 mrg msub.w r1,r4,r1
1712 1.1 mrg madd.w r1,r1,r1
1713 1.1 mrg mmulfx.w r1,r1,r4
1714 1.1 mrg shlri r6,32,r7
1715 1.1 mrg bgt/u r9,r63,tr0 // large_divisor
1716 1.1 mrg mmulfx.w r5,r4,r4
1717 1.1 mrg shlri r2,32+14,r19
1718 1.1 mrg addi r22,-31,r0
1719 1.1 mrg msub.w r1,r4,r1
1720 1.1 mrg
1721 1.1 mrg mulu.l r1,r7,r4
1722 1.1 mrg addi r1,-3,r5
1723 1.1 mrg mulu.l r5,r19,r5
1724 1.1 mrg sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1725 1.1 mrg shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1726 1.1 mrg the case may be, %0000000000000000 000.11111111111, still */
1727 1.1 mrg muls.l r1,r4,r4 /* leaving at least one sign bit. */
1728 1.1 mrg mulu.l r5,r3,r8
1729 1.1 mrg mshalds.l r1,r21,r1
1730 1.1 mrg shari r4,26,r4
1731 1.1 mrg shlld r8,r0,r8
1732 1.1 mrg add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1733 1.1 mrg sub r2,r8,r2
1734 1.1 mrg /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1735 1.1 mrg
1736 1.1 mrg shlri r2,22,r21
1737 1.1 mrg mulu.l r21,r1,r21
1738 1.1 mrg shlld r5,r0,r8
1739 1.1 mrg addi r20,30-22,r0
1740 1.1 mrg shlrd r21,r0,r21
1741 1.1 mrg mulu.l r21,r3,r5
1742 1.1 mrg add r8,r21,r8
1743 1.1 mrg mcmpgt.l r21,r63,r21 // See Note 1
1744 1.1 mrg addi r20,30,r0
1745 1.1 mrg mshfhi.l r63,r21,r21
1746 1.1 mrg sub r2,r5,r2
1747 1.1 mrg andc r2,r21,r2
1748 1.1 mrg
1749 1.1 mrg /* small divisor: need a third divide step */
1750 1.1 mrg mulu.l r2,r1,r7
1751 1.1 mrg ptabs r18,tr0
1752 1.1 mrg addi r2,1,r2
1753 1.1 mrg shlrd r7,r0,r7
1754 1.1 mrg mulu.l r7,r3,r5
1755 1.1 mrg add r8,r7,r8
1756 1.1 mrg sub r2,r3,r2
1757 1.1 mrg cmpgt r2,r5,r5
1758 1.1 mrg add r8,r5,r2
1759 1.1 mrg /* could test r3 here to check for divide by zero. */
1760 1.1 mrg blink tr0,r63
1761 1.1 mrg
1762 1.1 mrg LOCAL(large_divisor):
1763 1.1 mrg mmulfx.w r5,r4,r4
1764 1.1 mrg shlrd r2,r9,r25
1765 1.1 mrg shlri r25,32,r8
1766 1.1 mrg msub.w r1,r4,r1
1767 1.1 mrg
1768 1.1 mrg mulu.l r1,r7,r4
1769 1.1 mrg addi r1,-3,r5
1770 1.1 mrg mulu.l r5,r8,r5
1771 1.1 mrg sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1772 1.1 mrg shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1773 1.1 mrg the case may be, %0000000000000000 000.11111111111, still */
1774 1.1 mrg muls.l r1,r4,r4 /* leaving at least one sign bit. */
1775 1.1 mrg shlri r5,14-1,r8
1776 1.1 mrg mulu.l r8,r7,r5
1777 1.1 mrg mshalds.l r1,r21,r1
1778 1.1 mrg shari r4,26,r4
1779 1.1 mrg add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1780 1.1 mrg sub r25,r5,r25
1781 1.1 mrg /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1782 1.1 mrg
1783 1.1 mrg shlri r25,22,r21
1784 1.1 mrg mulu.l r21,r1,r21
1785 1.1 mrg pta LOCAL(no_lo_adj),tr0
1786 1.1 mrg addi r22,32,r0
1787 1.1 mrg shlri r21,40,r21
1788 1.1 mrg mulu.l r21,r7,r5
1789 1.1 mrg add r8,r21,r8
1790 1.1 mrg shlld r2,r0,r2
1791 1.1 mrg sub r25,r5,r25
1792 1.1 mrg bgtu/u r7,r25,tr0 // no_lo_adj
1793 1.1 mrg addi r8,1,r8
1794 1.1 mrg sub r25,r7,r25
1795 1.1 mrg LOCAL(no_lo_adj):
1796 1.1 mrg mextr4 r2,r25,r2
1797 1.1 mrg
1798 1.1 mrg /* large_divisor: only needs a few adjustments. */
1799 1.1 mrg mulu.l r8,r6,r5
1800 1.1 mrg ptabs r18,tr0
1801 1.1 mrg /* bubble */
1802 1.1 mrg cmpgtu r5,r2,r5
1803 1.1 mrg sub r8,r5,r2
1804 1.1 mrg blink tr0,r63
1805 1.1 mrg ENDFUNC(GLOBAL(udivdi3))
1806 1.1 mrg /* Note 1: To shift the result of the second divide stage so that the result
1807 1.1 mrg always fits into 32 bits, yet we still reduce the rest sufficiently
1808 1.1 mrg would require a lot of instructions to do the shifts just right. Using
1809 1.1 mrg the full 64 bit shift result to multiply with the divisor would require
1810 1.1 mrg four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1811 1.1 mrg Fortunately, if the upper 32 bits of the shift result are nonzero, we
1812 1.1 mrg know that the rest after taking this partial result into account will
1813 1.1 mrg fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1814 1.1 mrg upper 32 bits of the partial result are nonzero. */
1815 1.1 mrg #endif /* __SHMEDIA__ */
1816 1.1 mrg #endif /* L_udivdi3 */
1817 1.1 mrg
1818 1.1 mrg #ifdef L_divdi3
1819 1.1 mrg #if __SHMEDIA__
1820 1.1 mrg .mode SHmedia
1821 1.1 mrg .section .text..SHmedia32,"ax"
1822 1.1 mrg .align 2
1823 1.1 mrg .global GLOBAL(divdi3)
1824 1.1 mrg FUNC(GLOBAL(divdi3))
1825 1.1 mrg GLOBAL(divdi3):
1826 1.1 mrg pta GLOBAL(udivdi3_internal),tr0
1827 1.1 mrg shari r2,63,r22
1828 1.1 mrg shari r3,63,r23
1829 1.1 mrg xor r2,r22,r2
1830 1.1 mrg xor r3,r23,r3
1831 1.1 mrg sub r2,r22,r2
1832 1.1 mrg sub r3,r23,r3
1833 1.1 mrg beq/u r22,r23,tr0
1834 1.1 mrg ptabs r18,tr1
1835 1.1 mrg blink tr0,r18
1836 1.1 mrg sub r63,r2,r2
1837 1.1 mrg blink tr1,r63
1838 1.1 mrg ENDFUNC(GLOBAL(divdi3))
1839 1.1 mrg #endif /* __SHMEDIA__ */
1840 1.1 mrg #endif /* L_divdi3 */
1841 1.1 mrg
1842 1.1 mrg #ifdef L_umoddi3
1843 1.1 mrg #if __SHMEDIA__
1844 1.1 mrg .mode SHmedia
1845 1.1 mrg .section .text..SHmedia32,"ax"
1846 1.1 mrg .align 2
1847 1.1 mrg .global GLOBAL(umoddi3)
1848 1.1 mrg FUNC(GLOBAL(umoddi3))
1849 1.1 mrg GLOBAL(umoddi3):
1850 1.1 mrg HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1851 1.1 mrg shlri r3,1,r4
1852 1.1 mrg nsb r4,r22
1853 1.1 mrg shlld r3,r22,r6
1854 1.1 mrg shlri r6,49,r5
1855 1.1 mrg movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1856 1.1 mrg sub r21,r5,r1
1857 1.1 mrg mmulfx.w r1,r1,r4
1858 1.1 mrg mshflo.w r1,r63,r1
1859 1.1 mrg sub r63,r22,r20 // r63 == 64 % 64
1860 1.1 mrg mmulfx.w r5,r4,r4
1861 1.1 mrg pta LOCAL(large_divisor),tr0
1862 1.1 mrg addi r20,32,r9
1863 1.1 mrg msub.w r1,r4,r1
1864 1.1 mrg madd.w r1,r1,r1
1865 1.1 mrg mmulfx.w r1,r1,r4
1866 1.1 mrg shlri r6,32,r7
1867 1.1 mrg bgt/u r9,r63,tr0 // large_divisor
1868 1.1 mrg mmulfx.w r5,r4,r4
1869 1.1 mrg shlri r2,32+14,r19
1870 1.1 mrg addi r22,-31,r0
1871 1.1 mrg msub.w r1,r4,r1
1872 1.1 mrg
1873 1.1 mrg mulu.l r1,r7,r4
1874 1.1 mrg addi r1,-3,r5
1875 1.1 mrg mulu.l r5,r19,r5
1876 1.1 mrg sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1877 1.1 mrg shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1878 1.1 mrg the case may be, %0000000000000000 000.11111111111, still */
1879 1.1 mrg muls.l r1,r4,r4 /* leaving at least one sign bit. */
1880 1.1 mrg mulu.l r5,r3,r5
1881 1.1 mrg mshalds.l r1,r21,r1
1882 1.1 mrg shari r4,26,r4
1883 1.1 mrg shlld r5,r0,r5
1884 1.1 mrg add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1885 1.1 mrg sub r2,r5,r2
1886 1.1 mrg /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1887 1.1 mrg
1888 1.1 mrg shlri r2,22,r21
1889 1.1 mrg mulu.l r21,r1,r21
1890 1.1 mrg addi r20,30-22,r0
1891 1.1 mrg /* bubble */ /* could test r3 here to check for divide by zero. */
1892 1.1 mrg shlrd r21,r0,r21
1893 1.1 mrg mulu.l r21,r3,r5
1894 1.1 mrg mcmpgt.l r21,r63,r21 // See Note 1
1895 1.1 mrg addi r20,30,r0
1896 1.1 mrg mshfhi.l r63,r21,r21
1897 1.1 mrg sub r2,r5,r2
1898 1.1 mrg andc r2,r21,r2
1899 1.1 mrg
1900 1.1 mrg /* small divisor: need a third divide step */
1901 1.1 mrg mulu.l r2,r1,r7
1902 1.1 mrg ptabs r18,tr0
1903 1.1 mrg sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1904 1.1 mrg shlrd r7,r0,r7
1905 1.1 mrg mulu.l r7,r3,r5
1906 1.1 mrg /* bubble */
1907 1.1 mrg addi r8,1,r7
1908 1.1 mrg cmpgt r7,r5,r7
1909 1.1 mrg cmvne r7,r8,r2
1910 1.1 mrg sub r2,r5,r2
1911 1.1 mrg blink tr0,r63
1912 1.1 mrg
1913 1.1 mrg LOCAL(large_divisor):
1914 1.1 mrg mmulfx.w r5,r4,r4
1915 1.1 mrg shlrd r2,r9,r25
1916 1.1 mrg shlri r25,32,r8
1917 1.1 mrg msub.w r1,r4,r1
1918 1.1 mrg
1919 1.1 mrg mulu.l r1,r7,r4
1920 1.1 mrg addi r1,-3,r5
1921 1.1 mrg mulu.l r5,r8,r5
1922 1.1 mrg sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1923 1.1 mrg shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1924 1.1 mrg the case may be, %0000000000000000 000.11111111111, still */
1925 1.1 mrg muls.l r1,r4,r4 /* leaving at least one sign bit. */
1926 1.1 mrg shlri r5,14-1,r8
1927 1.1 mrg mulu.l r8,r7,r5
1928 1.1 mrg mshalds.l r1,r21,r1
1929 1.1 mrg shari r4,26,r4
1930 1.1 mrg add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1931 1.1 mrg sub r25,r5,r25
1932 1.1 mrg /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1933 1.1 mrg
1934 1.1 mrg shlri r25,22,r21
1935 1.1 mrg mulu.l r21,r1,r21
1936 1.1 mrg pta LOCAL(no_lo_adj),tr0
1937 1.1 mrg addi r22,32,r0
1938 1.1 mrg shlri r21,40,r21
1939 1.1 mrg mulu.l r21,r7,r5
1940 1.1 mrg add r8,r21,r8
1941 1.1 mrg shlld r2,r0,r2
1942 1.1 mrg sub r25,r5,r25
1943 1.1 mrg bgtu/u r7,r25,tr0 // no_lo_adj
1944 1.1 mrg addi r8,1,r8
1945 1.1 mrg sub r25,r7,r25
1946 1.1 mrg LOCAL(no_lo_adj):
1947 1.1 mrg mextr4 r2,r25,r2
1948 1.1 mrg
1949 1.1 mrg /* large_divisor: only needs a few adjustments. */
1950 1.1 mrg mulu.l r8,r6,r5
1951 1.1 mrg ptabs r18,tr0
1952 1.1 mrg add r2,r6,r7
1953 1.1 mrg cmpgtu r5,r2,r8
1954 1.1 mrg cmvne r8,r7,r2
1955 1.1 mrg sub r2,r5,r2
1956 1.1 mrg shlrd r2,r22,r2
1957 1.1 mrg blink tr0,r63
1958 1.1 mrg ENDFUNC(GLOBAL(umoddi3))
1959 1.1 mrg /* Note 1: To shift the result of the second divide stage so that the result
1960 1.1 mrg always fits into 32 bits, yet we still reduce the rest sufficiently
1961 1.1 mrg would require a lot of instructions to do the shifts just right. Using
1962 1.1 mrg the full 64 bit shift result to multiply with the divisor would require
1963 1.1 mrg four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1964 1.1 mrg Fortunately, if the upper 32 bits of the shift result are nonzero, we
1965 1.1 mrg know that the rest after taking this partial result into account will
1966 1.1 mrg fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1967 1.1 mrg upper 32 bits of the partial result are nonzero. */
1968 1.1 mrg #endif /* __SHMEDIA__ */
1969 1.1 mrg #endif /* L_umoddi3 */
1970 1.1 mrg
1971 1.1 mrg #ifdef L_moddi3
1972 1.1 mrg #if __SHMEDIA__
1973 1.1 mrg .mode SHmedia
1974 1.1 mrg .section .text..SHmedia32,"ax"
1975 1.1 mrg .align 2
1976 1.1 mrg .global GLOBAL(moddi3)
1977 1.1 mrg FUNC(GLOBAL(moddi3))
1978 1.1 mrg GLOBAL(moddi3):
1979 1.1 mrg pta GLOBAL(umoddi3_internal),tr0
1980 1.1 mrg shari r2,63,r22
1981 1.1 mrg shari r3,63,r23
1982 1.1 mrg xor r2,r22,r2
1983 1.1 mrg xor r3,r23,r3
1984 1.1 mrg sub r2,r22,r2
1985 1.1 mrg sub r3,r23,r3
1986 1.1 mrg beq/u r22,r63,tr0
1987 1.1 mrg ptabs r18,tr1
1988 1.1 mrg blink tr0,r18
1989 1.1 mrg sub r63,r2,r2
1990 1.1 mrg blink tr1,r63
1991 1.1 mrg ENDFUNC(GLOBAL(moddi3))
1992 1.1 mrg #endif /* __SHMEDIA__ */
1993 1.1 mrg #endif /* L_moddi3 */
1994 1.1 mrg
1995 1.1 mrg #ifdef L_set_fpscr
1996 1.1 mrg #if !defined (__SH2A_NOFPU__)
1997 1.1 mrg #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1998 1.1 mrg #ifdef __SH5__
1999 1.1 mrg .mode SHcompact
2000 1.1 mrg #endif
2001 1.1 mrg .global GLOBAL(set_fpscr)
2002 1.1 mrg HIDDEN_FUNC(GLOBAL(set_fpscr))
2003 1.1 mrg GLOBAL(set_fpscr):
2004 1.1 mrg lds r4,fpscr
2005 1.1 mrg #ifdef __PIC__
2006 1.1 mrg mov.l r12,@-r15
2007 1.1 mrg #ifdef __vxworks
2008 1.1 mrg mov.l LOCAL(set_fpscr_L0_base),r12
2009 1.1 mrg mov.l LOCAL(set_fpscr_L0_index),r0
2010 1.1 mrg mov.l @r12,r12
2011 1.1 mrg mov.l @(r0,r12),r12
2012 1.1 mrg #else
2013 1.1 mrg mova LOCAL(set_fpscr_L0),r0
2014 1.1 mrg mov.l LOCAL(set_fpscr_L0),r12
2015 1.1 mrg add r0,r12
2016 1.1 mrg #endif
2017 1.1 mrg mov.l LOCAL(set_fpscr_L1),r0
2018 1.1 mrg mov.l @(r0,r12),r1
2019 1.1 mrg mov.l @r15+,r12
2020 1.1 mrg #else
2021 1.1 mrg mov.l LOCAL(set_fpscr_L1),r1
2022 1.1 mrg #endif
2023 1.1 mrg swap.w r4,r0
2024 1.1 mrg or #24,r0
2025 1.1 mrg #ifndef FMOVD_WORKS
2026 1.1 mrg xor #16,r0
2027 1.1 mrg #endif
2028 1.1 mrg #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2029 1.1 mrg swap.w r0,r3
2030 1.1 mrg mov.l r3,@(4,r1)
2031 1.1 mrg #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2032 1.1 mrg swap.w r0,r2
2033 1.1 mrg mov.l r2,@r1
2034 1.1 mrg #endif
2035 1.1 mrg #ifndef FMOVD_WORKS
2036 1.1 mrg xor #8,r0
2037 1.1 mrg #else
2038 1.1 mrg xor #24,r0
2039 1.1 mrg #endif
2040 1.1 mrg #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2041 1.1 mrg swap.w r0,r2
2042 1.1 mrg rts
2043 1.1 mrg mov.l r2,@r1
2044 1.1 mrg #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2045 1.1 mrg swap.w r0,r3
2046 1.1 mrg rts
2047 1.1 mrg mov.l r3,@(4,r1)
2048 1.1 mrg #endif
2049 1.1 mrg .align 2
2050 1.1 mrg #ifdef __PIC__
2051 1.1 mrg #ifdef __vxworks
2052 1.1 mrg LOCAL(set_fpscr_L0_base):
2053 1.1 mrg .long ___GOTT_BASE__
2054 1.1 mrg LOCAL(set_fpscr_L0_index):
2055 1.1 mrg .long ___GOTT_INDEX__
2056 1.1 mrg #else
2057 1.1 mrg LOCAL(set_fpscr_L0):
2058 1.1 mrg .long _GLOBAL_OFFSET_TABLE_
2059 1.1 mrg #endif
2060 1.1 mrg LOCAL(set_fpscr_L1):
2061 1.1 mrg .long GLOBAL(fpscr_values@GOT)
2062 1.1 mrg #else
2063 1.1 mrg LOCAL(set_fpscr_L1):
2064 1.1 mrg .long GLOBAL(fpscr_values)
2065 1.1 mrg #endif
2066 1.1 mrg
2067 1.1 mrg ENDFUNC(GLOBAL(set_fpscr))
2068 1.1 mrg #ifndef NO_FPSCR_VALUES
2069 1.1 mrg #ifdef __ELF__
2070 1.1 mrg .comm GLOBAL(fpscr_values),8,4
2071 1.1 mrg #else
2072 1.1 mrg .comm GLOBAL(fpscr_values),8
2073 1.1 mrg #endif /* ELF */
2074 1.1 mrg #endif /* NO_FPSCR_VALUES */
2075 1.1 mrg #endif /* SH2E / SH3E / SH4 */
2076 1.1 mrg #endif /* __SH2A_NOFPU__ */
2077 1.1 mrg #endif /* L_set_fpscr */
2078 1.1 mrg #ifdef L_ic_invalidate
2079 1.1 mrg #if __SH5__ == 32
2080 1.1 mrg .mode SHmedia
2081 1.1 mrg .section .text..SHmedia32,"ax"
2082 1.1 mrg .align 2
2083 1.1 mrg .global GLOBAL(init_trampoline)
2084 1.1 mrg HIDDEN_FUNC(GLOBAL(init_trampoline))
2085 1.1 mrg GLOBAL(init_trampoline):
2086 1.1 mrg st.l r0,8,r2
2087 1.1 mrg #ifdef __LITTLE_ENDIAN__
2088 1.1 mrg movi 9,r20
2089 1.1 mrg shori 0x402b,r20
2090 1.1 mrg shori 0xd101,r20
2091 1.1 mrg shori 0xd002,r20
2092 1.1 mrg #else
2093 1.1 mrg movi 0xffffffffffffd002,r20
2094 1.1 mrg shori 0xd101,r20
2095 1.1 mrg shori 0x402b,r20
2096 1.1 mrg shori 9,r20
2097 1.1 mrg #endif
2098 1.1 mrg st.q r0,0,r20
2099 1.1 mrg st.l r0,12,r3
2100 1.1 mrg ENDFUNC(GLOBAL(init_trampoline))
2101 1.1 mrg .global GLOBAL(ic_invalidate)
2102 1.1 mrg HIDDEN_FUNC(GLOBAL(ic_invalidate))
2103 1.1 mrg GLOBAL(ic_invalidate):
2104 1.1 mrg ocbwb r0,0
2105 1.1 mrg synco
2106 1.1 mrg icbi r0, 0
2107 1.1 mrg ptabs r18, tr0
2108 1.1 mrg synci
2109 1.1 mrg blink tr0, r63
2110 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate))
2111 1.1 mrg #elif defined(__SH4A__)
2112 1.1 mrg .global GLOBAL(ic_invalidate)
2113 1.1 mrg HIDDEN_FUNC(GLOBAL(ic_invalidate))
2114 1.1 mrg GLOBAL(ic_invalidate):
2115 1.1 mrg ocbwb @r4
2116 1.1 mrg synco
2117 1.1 mrg icbi @r4
2118 1.1 mrg rts
2119 1.1 mrg nop
2120 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate))
2121 1.1 mrg #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2122 1.1 mrg /* For system code, we use ic_invalidate_line_i, but user code
2123 1.1 mrg needs a different mechanism. A kernel call is generally not
2124 1.1 mrg available, and it would also be slow. Different SH4 variants use
2125 1.1 mrg different sizes and associativities of the Icache. We use a small
2126 1.1 mrg bit of dispatch code that can be put hidden in every shared object,
2127 1.1 mrg which calls the actual processor-specific invalidation code in a
2128 1.1 mrg separate module.
2129 1.1 mrg Or if you have operating system support, the OS could mmap the
2130 1.1 mrg procesor-specific code from a single page, since it is highly
2131 1.1 mrg repetitive. */
2132 1.1 mrg .global GLOBAL(ic_invalidate)
2133 1.1 mrg HIDDEN_FUNC(GLOBAL(ic_invalidate))
2134 1.1 mrg GLOBAL(ic_invalidate):
2135 1.1 mrg #ifdef __pic__
2136 1.1 mrg #ifdef __vxworks
2137 1.1 mrg mov.l 1f,r1
2138 1.1 mrg mov.l 2f,r0
2139 1.1 mrg mov.l @r1,r1
2140 1.1 mrg mov.l 0f,r2
2141 1.1 mrg mov.l @(r0,r1),r0
2142 1.1 mrg #else
2143 1.1 mrg mov.l 1f,r1
2144 1.1 mrg mova 1f,r0
2145 1.1 mrg mov.l 0f,r2
2146 1.1 mrg add r1,r0
2147 1.1 mrg #endif
2148 1.1 mrg mov.l @(r0,r2),r1
2149 1.1 mrg #else
2150 1.1 mrg mov.l 0f,r1
2151 1.1 mrg #endif
2152 1.1 mrg ocbwb @r4
2153 1.1 mrg mov.l @(8,r1),r0
2154 1.1 mrg sub r1,r4
2155 1.1 mrg and r4,r0
2156 1.1 mrg add r1,r0
2157 1.1 mrg jmp @r0
2158 1.1 mrg mov.l @(4,r1),r0
2159 1.1 mrg .align 2
2160 1.1 mrg #ifndef __pic__
2161 1.1 mrg 0: .long GLOBAL(ic_invalidate_array)
2162 1.1 mrg #else /* __pic__ */
2163 1.1 mrg .global GLOBAL(ic_invalidate_array)
2164 1.1 mrg 0: .long GLOBAL(ic_invalidate_array)@GOT
2165 1.1 mrg #ifdef __vxworks
2166 1.1 mrg 1: .long ___GOTT_BASE__
2167 1.1 mrg 2: .long ___GOTT_INDEX__
2168 1.1 mrg #else
2169 1.1 mrg 1: .long _GLOBAL_OFFSET_TABLE_
2170 1.1 mrg #endif
2171 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate))
2172 1.1 mrg #endif /* __pic__ */
2173 1.1 mrg #endif /* SH4 */
2174 1.1 mrg #endif /* L_ic_invalidate */
2175 1.1 mrg
2176 1.1 mrg #ifdef L_ic_invalidate_array
2177 1.1 mrg #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2178 1.1 mrg .global GLOBAL(ic_invalidate_array)
2179 1.1 mrg /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2180 1.1 mrg .global GLOBAL(ic_invalidate_array)
2181 1.1 mrg FUNC(GLOBAL(ic_invalidate_array))
2182 1.1 mrg GLOBAL(ic_invalidate_array):
2183 1.1 mrg add r1,r4
2184 1.1 mrg synco
2185 1.1 mrg icbi @r4
2186 1.1 mrg rts
2187 1.1 mrg nop
2188 1.1 mrg .align 2
2189 1.1 mrg .long 0
2190 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate_array))
2191 1.1 mrg #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2192 1.1 mrg .global GLOBAL(ic_invalidate_array)
2193 1.1 mrg .p2align 5
2194 1.1 mrg FUNC(GLOBAL(ic_invalidate_array))
2195 1.1 mrg /* This must be aligned to the beginning of a cache line. */
2196 1.1 mrg GLOBAL(ic_invalidate_array):
2197 1.1 mrg #ifndef WAYS
2198 1.1 mrg #define WAYS 4
2199 1.1 mrg #define WAY_SIZE 0x4000
2200 1.1 mrg #endif
2201 1.1 mrg #if WAYS == 1
2202 1.1 mrg .rept WAY_SIZE * WAYS / 32
2203 1.1 mrg rts
2204 1.1 mrg nop
2205 1.1 mrg .rept 7
2206 1.1 mrg .long WAY_SIZE - 32
2207 1.1 mrg .endr
2208 1.1 mrg .endr
2209 1.1 mrg #elif WAYS <= 6
2210 1.1 mrg .rept WAY_SIZE * WAYS / 32
2211 1.1 mrg braf r0
2212 1.1 mrg add #-8,r0
2213 1.1 mrg .long WAY_SIZE + 8
2214 1.1 mrg .long WAY_SIZE - 32
2215 1.1 mrg .rept WAYS-2
2216 1.1 mrg braf r0
2217 1.1 mrg nop
2218 1.1 mrg .endr
2219 1.1 mrg .rept 7 - WAYS
2220 1.1 mrg rts
2221 1.1 mrg nop
2222 1.1 mrg .endr
2223 1.1 mrg .endr
2224 1.1 mrg #else /* WAYS > 6 */
2225 1.1 mrg /* This variant needs two different pages for mmap-ing. */
2226 1.1 mrg .rept WAYS-1
2227 1.1 mrg .rept WAY_SIZE / 32
2228 1.1 mrg braf r0
2229 1.1 mrg nop
2230 1.1 mrg .long WAY_SIZE
2231 1.1 mrg .rept 6
2232 1.1 mrg .long WAY_SIZE - 32
2233 1.1 mrg .endr
2234 1.1 mrg .endr
2235 1.1 mrg .endr
2236 1.1 mrg .rept WAY_SIZE / 32
2237 1.1 mrg rts
2238 1.1 mrg .rept 15
2239 1.1 mrg nop
2240 1.1 mrg .endr
2241 1.1 mrg .endr
2242 1.1 mrg #endif /* WAYS */
2243 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate_array))
2244 1.1 mrg #endif /* SH4 */
2245 1.1 mrg #endif /* L_ic_invalidate_array */
2246 1.1 mrg
2247 1.1 mrg #if defined (__SH5__) && __SH5__ == 32
2248 1.1 mrg #ifdef L_shcompact_call_trampoline
2249 1.1 mrg .section .rodata
2250 1.1 mrg .align 1
2251 1.1 mrg LOCAL(ct_main_table):
2252 1.1 mrg .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2253 1.1 mrg .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2254 1.1 mrg .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2255 1.1 mrg .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2256 1.1 mrg .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2257 1.1 mrg .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2258 1.1 mrg .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2259 1.1 mrg .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2260 1.1 mrg .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2261 1.1 mrg .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2262 1.1 mrg .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2263 1.1 mrg .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2264 1.1 mrg .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2265 1.1 mrg .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2266 1.1 mrg .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2267 1.1 mrg .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2268 1.1 mrg .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2269 1.1 mrg .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2270 1.1 mrg .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2271 1.1 mrg .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2272 1.1 mrg .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2273 1.1 mrg .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2274 1.1 mrg .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2275 1.1 mrg .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2276 1.1 mrg .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2277 1.1 mrg .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2278 1.1 mrg .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2279 1.1 mrg .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2280 1.1 mrg .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2281 1.1 mrg .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2282 1.1 mrg .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2283 1.1 mrg .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2284 1.1 mrg .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2285 1.1 mrg .mode SHmedia
2286 1.1 mrg .section .text..SHmedia32, "ax"
2287 1.1 mrg .align 2
2288 1.1 mrg
2289 1.1 mrg /* This function loads 64-bit general-purpose registers from the
2290 1.1 mrg stack, from a memory address contained in them or from an FP
2291 1.1 mrg register, according to a cookie passed in r1. Its execution
2292 1.1 mrg time is linear on the number of registers that actually have
2293 1.1 mrg to be copied. See sh.h for details on the actual bit pattern.
2294 1.1 mrg
2295 1.1 mrg The function to be called is passed in r0. If a 32-bit return
2296 1.1 mrg value is expected, the actual function will be tail-called,
2297 1.1 mrg otherwise the return address will be stored in r10 (that the
2298 1.1 mrg caller should expect to be clobbered) and the return value
2299 1.1 mrg will be expanded into r2/r3 upon return. */
2300 1.1 mrg
2301 1.1 mrg .global GLOBAL(GCC_shcompact_call_trampoline)
2302 1.1 mrg FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2303 1.1 mrg GLOBAL(GCC_shcompact_call_trampoline):
2304 1.1 mrg ptabs/l r0, tr0 /* Prepare to call the actual function. */
2305 1.1 mrg movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2306 1.1 mrg pt/l LOCAL(ct_loop), tr1
2307 1.1 mrg addz.l r1, r63, r1
2308 1.1 mrg shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2309 1.1 mrg LOCAL(ct_loop):
2310 1.1 mrg nsb r1, r28
2311 1.1 mrg shlli r28, 1, r29
2312 1.1 mrg ldx.w r0, r29, r30
2313 1.1 mrg LOCAL(ct_main_label):
2314 1.1 mrg ptrel/l r30, tr2
2315 1.1 mrg blink tr2, r63
2316 1.1 mrg LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2317 1.1 mrg /* It must be dr0, so just do it. */
2318 1.1 mrg fmov.dq dr0, r2
2319 1.1 mrg movi 7, r30
2320 1.1 mrg shlli r30, 29, r31
2321 1.1 mrg andc r1, r31, r1
2322 1.1 mrg blink tr1, r63
2323 1.1 mrg LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2324 1.1 mrg /* It is either dr0 or dr2. */
2325 1.1 mrg movi 7, r30
2326 1.1 mrg shlri r1, 26, r32
2327 1.1 mrg shlli r30, 26, r31
2328 1.1 mrg andc r1, r31, r1
2329 1.1 mrg fmov.dq dr0, r3
2330 1.1 mrg beqi/l r32, 4, tr1
2331 1.1 mrg fmov.dq dr2, r3
2332 1.1 mrg blink tr1, r63
2333 1.1 mrg LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2334 1.1 mrg shlri r1, 23 - 3, r34
2335 1.1 mrg andi r34, 3 << 3, r33
2336 1.1 mrg addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2337 1.1 mrg LOCAL(ct_r4_fp_base):
2338 1.1 mrg ptrel/l r32, tr2
2339 1.1 mrg movi 7, r30
2340 1.1 mrg shlli r30, 23, r31
2341 1.1 mrg andc r1, r31, r1
2342 1.1 mrg blink tr2, r63
2343 1.1 mrg LOCAL(ct_r4_fp_copy):
2344 1.1 mrg fmov.dq dr0, r4
2345 1.1 mrg blink tr1, r63
2346 1.1 mrg fmov.dq dr2, r4
2347 1.1 mrg blink tr1, r63
2348 1.1 mrg fmov.dq dr4, r4
2349 1.1 mrg blink tr1, r63
2350 1.1 mrg LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2351 1.1 mrg shlri r1, 20 - 3, r34
2352 1.1 mrg andi r34, 3 << 3, r33
2353 1.1 mrg addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2354 1.1 mrg LOCAL(ct_r5_fp_base):
2355 1.1 mrg ptrel/l r32, tr2
2356 1.1 mrg movi 7, r30
2357 1.1 mrg shlli r30, 20, r31
2358 1.1 mrg andc r1, r31, r1
2359 1.1 mrg blink tr2, r63
2360 1.1 mrg LOCAL(ct_r5_fp_copy):
2361 1.1 mrg fmov.dq dr0, r5
2362 1.1 mrg blink tr1, r63
2363 1.1 mrg fmov.dq dr2, r5
2364 1.1 mrg blink tr1, r63
2365 1.1 mrg fmov.dq dr4, r5
2366 1.1 mrg blink tr1, r63
2367 1.1 mrg fmov.dq dr6, r5
2368 1.1 mrg blink tr1, r63
2369 1.1 mrg LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2370 1.1 mrg /* It must be dr8. */
2371 1.1 mrg fmov.dq dr8, r6
2372 1.1 mrg movi 15, r30
2373 1.1 mrg shlli r30, 16, r31
2374 1.1 mrg andc r1, r31, r1
2375 1.1 mrg blink tr1, r63
2376 1.1 mrg LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2377 1.1 mrg shlri r1, 16 - 3, r34
2378 1.1 mrg andi r34, 3 << 3, r33
2379 1.1 mrg addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2380 1.1 mrg LOCAL(ct_r6_fp_base):
2381 1.1 mrg ptrel/l r32, tr2
2382 1.1 mrg movi 7, r30
2383 1.1 mrg shlli r30, 16, r31
2384 1.1 mrg andc r1, r31, r1
2385 1.1 mrg blink tr2, r63
2386 1.1 mrg LOCAL(ct_r6_fp_copy):
2387 1.1 mrg fmov.dq dr0, r6
2388 1.1 mrg blink tr1, r63
2389 1.1 mrg fmov.dq dr2, r6
2390 1.1 mrg blink tr1, r63
2391 1.1 mrg fmov.dq dr4, r6
2392 1.1 mrg blink tr1, r63
2393 1.1 mrg fmov.dq dr6, r6
2394 1.1 mrg blink tr1, r63
2395 1.1 mrg LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2396 1.1 mrg /* It is either dr8 or dr10. */
2397 1.1 mrg movi 15 << 12, r31
2398 1.1 mrg shlri r1, 12, r32
2399 1.1 mrg andc r1, r31, r1
2400 1.1 mrg fmov.dq dr8, r7
2401 1.1 mrg beqi/l r32, 8, tr1
2402 1.1 mrg fmov.dq dr10, r7
2403 1.1 mrg blink tr1, r63
2404 1.1 mrg LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2405 1.1 mrg shlri r1, 12 - 3, r34
2406 1.1 mrg andi r34, 3 << 3, r33
2407 1.1 mrg addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2408 1.1 mrg LOCAL(ct_r7_fp_base):
2409 1.1 mrg ptrel/l r32, tr2
2410 1.1 mrg movi 7 << 12, r31
2411 1.1 mrg andc r1, r31, r1
2412 1.1 mrg blink tr2, r63
2413 1.1 mrg LOCAL(ct_r7_fp_copy):
2414 1.1 mrg fmov.dq dr0, r7
2415 1.1 mrg blink tr1, r63
2416 1.1 mrg fmov.dq dr2, r7
2417 1.1 mrg blink tr1, r63
2418 1.1 mrg fmov.dq dr4, r7
2419 1.1 mrg blink tr1, r63
2420 1.1 mrg fmov.dq dr6, r7
2421 1.1 mrg blink tr1, r63
2422 1.1 mrg LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2423 1.1 mrg /* It is either dr8 or dr10. */
2424 1.1 mrg movi 15 << 8, r31
2425 1.1 mrg andi r1, 1 << 8, r32
2426 1.1 mrg andc r1, r31, r1
2427 1.1 mrg fmov.dq dr8, r8
2428 1.1 mrg beq/l r32, r63, tr1
2429 1.1 mrg fmov.dq dr10, r8
2430 1.1 mrg blink tr1, r63
2431 1.1 mrg LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2432 1.1 mrg shlri r1, 8 - 3, r34
2433 1.1 mrg andi r34, 3 << 3, r33
2434 1.1 mrg addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2435 1.1 mrg LOCAL(ct_r8_fp_base):
2436 1.1 mrg ptrel/l r32, tr2
2437 1.1 mrg movi 7 << 8, r31
2438 1.1 mrg andc r1, r31, r1
2439 1.1 mrg blink tr2, r63
2440 1.1 mrg LOCAL(ct_r8_fp_copy):
2441 1.1 mrg fmov.dq dr0, r8
2442 1.1 mrg blink tr1, r63
2443 1.1 mrg fmov.dq dr2, r8
2444 1.1 mrg blink tr1, r63
2445 1.1 mrg fmov.dq dr4, r8
2446 1.1 mrg blink tr1, r63
2447 1.1 mrg fmov.dq dr6, r8
2448 1.1 mrg blink tr1, r63
2449 1.1 mrg LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2450 1.1 mrg /* It is either dr8 or dr10. */
2451 1.1 mrg movi 15 << 4, r31
2452 1.1 mrg andi r1, 1 << 4, r32
2453 1.1 mrg andc r1, r31, r1
2454 1.1 mrg fmov.dq dr8, r9
2455 1.1 mrg beq/l r32, r63, tr1
2456 1.1 mrg fmov.dq dr10, r9
2457 1.1 mrg blink tr1, r63
2458 1.1 mrg LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2459 1.1 mrg shlri r1, 4 - 3, r34
2460 1.1 mrg andi r34, 3 << 3, r33
2461 1.1 mrg addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2462 1.1 mrg LOCAL(ct_r9_fp_base):
2463 1.1 mrg ptrel/l r32, tr2
2464 1.1 mrg movi 7 << 4, r31
2465 1.1 mrg andc r1, r31, r1
2466 1.1 mrg blink tr2, r63
2467 1.1 mrg LOCAL(ct_r9_fp_copy):
2468 1.1 mrg fmov.dq dr0, r9
2469 1.1 mrg blink tr1, r63
2470 1.1 mrg fmov.dq dr2, r9
2471 1.1 mrg blink tr1, r63
2472 1.1 mrg fmov.dq dr4, r9
2473 1.1 mrg blink tr1, r63
2474 1.1 mrg fmov.dq dr6, r9
2475 1.1 mrg blink tr1, r63
2476 1.1 mrg LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2477 1.1 mrg pt/l LOCAL(ct_r2_load), tr2
2478 1.1 mrg movi 3, r30
2479 1.1 mrg shlli r30, 29, r31
2480 1.1 mrg and r1, r31, r32
2481 1.1 mrg andc r1, r31, r1
2482 1.1 mrg beq/l r31, r32, tr2
2483 1.1 mrg addi.l r2, 8, r3
2484 1.1 mrg ldx.q r2, r63, r2
2485 1.1 mrg /* Fall through. */
2486 1.1 mrg LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2487 1.1 mrg pt/l LOCAL(ct_r3_load), tr2
2488 1.1 mrg movi 3, r30
2489 1.1 mrg shlli r30, 26, r31
2490 1.1 mrg and r1, r31, r32
2491 1.1 mrg andc r1, r31, r1
2492 1.1 mrg beq/l r31, r32, tr2
2493 1.1 mrg addi.l r3, 8, r4
2494 1.1 mrg ldx.q r3, r63, r3
2495 1.1 mrg LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2496 1.1 mrg pt/l LOCAL(ct_r4_load), tr2
2497 1.1 mrg movi 3, r30
2498 1.1 mrg shlli r30, 23, r31
2499 1.1 mrg and r1, r31, r32
2500 1.1 mrg andc r1, r31, r1
2501 1.1 mrg beq/l r31, r32, tr2
2502 1.1 mrg addi.l r4, 8, r5
2503 1.1 mrg ldx.q r4, r63, r4
2504 1.1 mrg LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2505 1.1 mrg pt/l LOCAL(ct_r5_load), tr2
2506 1.1 mrg movi 3, r30
2507 1.1 mrg shlli r30, 20, r31
2508 1.1 mrg and r1, r31, r32
2509 1.1 mrg andc r1, r31, r1
2510 1.1 mrg beq/l r31, r32, tr2
2511 1.1 mrg addi.l r5, 8, r6
2512 1.1 mrg ldx.q r5, r63, r5
2513 1.1 mrg LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2514 1.1 mrg pt/l LOCAL(ct_r6_load), tr2
2515 1.1 mrg movi 3 << 16, r31
2516 1.1 mrg and r1, r31, r32
2517 1.1 mrg andc r1, r31, r1
2518 1.1 mrg beq/l r31, r32, tr2
2519 1.1 mrg addi.l r6, 8, r7
2520 1.1 mrg ldx.q r6, r63, r6
2521 1.1 mrg LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2522 1.1 mrg pt/l LOCAL(ct_r7_load), tr2
2523 1.1 mrg movi 3 << 12, r31
2524 1.1 mrg and r1, r31, r32
2525 1.1 mrg andc r1, r31, r1
2526 1.1 mrg beq/l r31, r32, tr2
2527 1.1 mrg addi.l r7, 8, r8
2528 1.1 mrg ldx.q r7, r63, r7
2529 1.1 mrg LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2530 1.1 mrg pt/l LOCAL(ct_r8_load), tr2
2531 1.1 mrg movi 3 << 8, r31
2532 1.1 mrg and r1, r31, r32
2533 1.1 mrg andc r1, r31, r1
2534 1.1 mrg beq/l r31, r32, tr2
2535 1.1 mrg addi.l r8, 8, r9
2536 1.1 mrg ldx.q r8, r63, r8
2537 1.1 mrg LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2538 1.1 mrg pt/l LOCAL(ct_check_tramp), tr2
2539 1.1 mrg ldx.q r9, r63, r9
2540 1.1 mrg blink tr2, r63
2541 1.1 mrg LOCAL(ct_r2_load):
2542 1.1 mrg ldx.q r2, r63, r2
2543 1.1 mrg blink tr1, r63
2544 1.1 mrg LOCAL(ct_r3_load):
2545 1.1 mrg ldx.q r3, r63, r3
2546 1.1 mrg blink tr1, r63
2547 1.1 mrg LOCAL(ct_r4_load):
2548 1.1 mrg ldx.q r4, r63, r4
2549 1.1 mrg blink tr1, r63
2550 1.1 mrg LOCAL(ct_r5_load):
2551 1.1 mrg ldx.q r5, r63, r5
2552 1.1 mrg blink tr1, r63
2553 1.1 mrg LOCAL(ct_r6_load):
2554 1.1 mrg ldx.q r6, r63, r6
2555 1.1 mrg blink tr1, r63
2556 1.1 mrg LOCAL(ct_r7_load):
2557 1.1 mrg ldx.q r7, r63, r7
2558 1.1 mrg blink tr1, r63
2559 1.1 mrg LOCAL(ct_r8_load):
2560 1.1 mrg ldx.q r8, r63, r8
2561 1.1 mrg blink tr1, r63
2562 1.1 mrg LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2563 1.1 mrg movi 1, r30
2564 1.1 mrg ldx.q r15, r63, r2
2565 1.1 mrg shlli r30, 29, r31
2566 1.1 mrg addi.l r15, 8, r15
2567 1.1 mrg andc r1, r31, r1
2568 1.1 mrg blink tr1, r63
2569 1.1 mrg LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2570 1.1 mrg movi 1, r30
2571 1.1 mrg ldx.q r15, r63, r3
2572 1.1 mrg shlli r30, 26, r31
2573 1.1 mrg addi.l r15, 8, r15
2574 1.1 mrg andc r1, r31, r1
2575 1.1 mrg blink tr1, r63
2576 1.1 mrg LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2577 1.1 mrg movi 1, r30
2578 1.1 mrg ldx.q r15, r63, r4
2579 1.1 mrg shlli r30, 23, r31
2580 1.1 mrg addi.l r15, 8, r15
2581 1.1 mrg andc r1, r31, r1
2582 1.1 mrg blink tr1, r63
2583 1.1 mrg LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2584 1.1 mrg movi 1, r30
2585 1.1 mrg ldx.q r15, r63, r5
2586 1.1 mrg shlli r30, 20, r31
2587 1.1 mrg addi.l r15, 8, r15
2588 1.1 mrg andc r1, r31, r1
2589 1.1 mrg blink tr1, r63
2590 1.1 mrg LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2591 1.1 mrg movi 1, r30
2592 1.1 mrg ldx.q r15, r63, r6
2593 1.1 mrg shlli r30, 16, r31
2594 1.1 mrg addi.l r15, 8, r15
2595 1.1 mrg andc r1, r31, r1
2596 1.1 mrg blink tr1, r63
2597 1.1 mrg LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2598 1.1 mrg ldx.q r15, r63, r7
2599 1.1 mrg movi 1 << 12, r31
2600 1.1 mrg addi.l r15, 8, r15
2601 1.1 mrg andc r1, r31, r1
2602 1.1 mrg blink tr1, r63
2603 1.1 mrg LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2604 1.1 mrg ldx.q r15, r63, r8
2605 1.1 mrg movi 1 << 8, r31
2606 1.1 mrg addi.l r15, 8, r15
2607 1.1 mrg andc r1, r31, r1
2608 1.1 mrg blink tr1, r63
2609 1.1 mrg LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2610 1.1 mrg andi r1, 7 << 1, r30
2611 1.1 mrg movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2612 1.1 mrg shlli r30, 2, r31
2613 1.1 mrg shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2614 1.1 mrg sub.l r32, r31, r33
2615 1.1 mrg ptabs/l r33, tr2
2616 1.1 mrg blink tr2, r63
2617 1.1 mrg LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2618 1.1 mrg ldx.q r15, r63, r3
2619 1.1 mrg addi.l r15, 8, r15
2620 1.1 mrg ldx.q r15, r63, r4
2621 1.1 mrg addi.l r15, 8, r15
2622 1.1 mrg ldx.q r15, r63, r5
2623 1.1 mrg addi.l r15, 8, r15
2624 1.1 mrg ldx.q r15, r63, r6
2625 1.1 mrg addi.l r15, 8, r15
2626 1.1 mrg ldx.q r15, r63, r7
2627 1.1 mrg addi.l r15, 8, r15
2628 1.1 mrg ldx.q r15, r63, r8
2629 1.1 mrg addi.l r15, 8, r15
2630 1.1 mrg LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2631 1.1 mrg ldx.q r15, r63, r9
2632 1.1 mrg addi.l r15, 8, r15
2633 1.1 mrg LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2634 1.1 mrg LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2635 1.1 mrg pt/u LOCAL(ct_ret_wide), tr2
2636 1.1 mrg andi r1, 1, r1
2637 1.1 mrg bne/u r1, r63, tr2
2638 1.1 mrg LOCAL(ct_call_func): /* Just branch to the function. */
2639 1.1 mrg blink tr0, r63
2640 1.1 mrg LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2641 1.1 mrg 64-bit return value. */
2642 1.1 mrg add.l r18, r63, r10
2643 1.1 mrg blink tr0, r18
2644 1.1 mrg ptabs r10, tr0
2645 1.1 mrg #if __LITTLE_ENDIAN__
2646 1.1 mrg shari r2, 32, r3
2647 1.1 mrg add.l r2, r63, r2
2648 1.1 mrg #else
2649 1.1 mrg add.l r2, r63, r3
2650 1.1 mrg shari r2, 32, r2
2651 1.1 mrg #endif
2652 1.1 mrg blink tr0, r63
2653 1.1 mrg
2654 1.1 mrg ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2655 1.1 mrg #endif /* L_shcompact_call_trampoline */
2656 1.1 mrg
2657 1.1 mrg #ifdef L_shcompact_return_trampoline
2658 1.1 mrg /* This function does the converse of the code in `ret_wide'
2659 1.1 mrg above. It is tail-called by SHcompact functions returning
2660 1.1 mrg 64-bit non-floating-point values, to pack the 32-bit values in
2661 1.1 mrg r2 and r3 into r2. */
2662 1.1 mrg
2663 1.1 mrg .mode SHmedia
2664 1.1 mrg .section .text..SHmedia32, "ax"
2665 1.1 mrg .align 2
2666 1.1 mrg .global GLOBAL(GCC_shcompact_return_trampoline)
2667 1.1 mrg HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2668 1.1 mrg GLOBAL(GCC_shcompact_return_trampoline):
2669 1.1 mrg ptabs/l r18, tr0
2670 1.1 mrg #if __LITTLE_ENDIAN__
2671 1.1 mrg addz.l r2, r63, r2
2672 1.1 mrg shlli r3, 32, r3
2673 1.1 mrg #else
2674 1.1 mrg addz.l r3, r63, r3
2675 1.1 mrg shlli r2, 32, r2
2676 1.1 mrg #endif
2677 1.1 mrg or r3, r2, r2
2678 1.1 mrg blink tr0, r63
2679 1.1 mrg
2680 1.1 mrg ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2681 1.1 mrg #endif /* L_shcompact_return_trampoline */
2682 1.1 mrg
2683 1.1 mrg #ifdef L_shcompact_incoming_args
2684 1.1 mrg .section .rodata
2685 1.1 mrg .align 1
2686 1.1 mrg LOCAL(ia_main_table):
2687 1.1 mrg .word 1 /* Invalid, just loop */
2688 1.1 mrg .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2689 1.1 mrg .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2690 1.1 mrg .word 1 /* Invalid, just loop */
2691 1.1 mrg .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2692 1.1 mrg .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2693 1.1 mrg .word 1 /* Invalid, just loop */
2694 1.1 mrg .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2695 1.1 mrg .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2696 1.1 mrg .word 1 /* Invalid, just loop */
2697 1.1 mrg .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2698 1.1 mrg .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2699 1.1 mrg .word 1 /* Invalid, just loop */
2700 1.1 mrg .word 1 /* Invalid, just loop */
2701 1.1 mrg .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2702 1.1 mrg .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2703 1.1 mrg .word 1 /* Invalid, just loop */
2704 1.1 mrg .word 1 /* Invalid, just loop */
2705 1.1 mrg .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2706 1.1 mrg .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2707 1.1 mrg .word 1 /* Invalid, just loop */
2708 1.1 mrg .word 1 /* Invalid, just loop */
2709 1.1 mrg .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2710 1.1 mrg .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2711 1.1 mrg .word 1 /* Invalid, just loop */
2712 1.1 mrg .word 1 /* Invalid, just loop */
2713 1.1 mrg .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2714 1.1 mrg .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2715 1.1 mrg .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2716 1.1 mrg .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2717 1.1 mrg .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2718 1.1 mrg .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2719 1.1 mrg .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2720 1.1 mrg .mode SHmedia
2721 1.1 mrg .section .text..SHmedia32, "ax"
2722 1.1 mrg .align 2
2723 1.1 mrg
2724 1.1 mrg /* This function stores 64-bit general-purpose registers back in
2725 1.1 mrg the stack, and loads the address in which each register
2726 1.1 mrg was stored into itself. The lower 32 bits of r17 hold the address
2727 1.1 mrg to begin storing, and the upper 32 bits of r17 hold the cookie.
2728 1.1 mrg Its execution time is linear on the
2729 1.1 mrg number of registers that actually have to be copied, and it is
2730 1.1 mrg optimized for structures larger than 64 bits, as opposed to
2731 1.1 mrg individual `long long' arguments. See sh.h for details on the
2732 1.1 mrg actual bit pattern. */
2733 1.1 mrg
2734 1.1 mrg .global GLOBAL(GCC_shcompact_incoming_args)
2735 1.1 mrg FUNC(GLOBAL(GCC_shcompact_incoming_args))
2736 1.1 mrg GLOBAL(GCC_shcompact_incoming_args):
2737 1.1 mrg ptabs/l r18, tr0 /* Prepare to return. */
2738 1.1 mrg shlri r17, 32, r0 /* Load the cookie. */
2739 1.1 mrg movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2740 1.1 mrg pt/l LOCAL(ia_loop), tr1
2741 1.1 mrg add.l r17, r63, r17
2742 1.1 mrg shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2743 1.1 mrg LOCAL(ia_loop):
2744 1.1 mrg nsb r0, r36
2745 1.1 mrg shlli r36, 1, r37
2746 1.1 mrg ldx.w r43, r37, r38
2747 1.1 mrg LOCAL(ia_main_label):
2748 1.1 mrg ptrel/l r38, tr2
2749 1.1 mrg blink tr2, r63
2750 1.1 mrg LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2751 1.1 mrg movi 3, r38
2752 1.1 mrg shlli r38, 29, r39
2753 1.1 mrg and r0, r39, r40
2754 1.1 mrg andc r0, r39, r0
2755 1.1 mrg stx.q r17, r63, r2
2756 1.1 mrg add.l r17, r63, r2
2757 1.1 mrg addi.l r17, 8, r17
2758 1.1 mrg beq/u r39, r40, tr1
2759 1.1 mrg LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2760 1.1 mrg movi 3, r38
2761 1.1 mrg shlli r38, 26, r39
2762 1.1 mrg and r0, r39, r40
2763 1.1 mrg andc r0, r39, r0
2764 1.1 mrg stx.q r17, r63, r3
2765 1.1 mrg add.l r17, r63, r3
2766 1.1 mrg addi.l r17, 8, r17
2767 1.1 mrg beq/u r39, r40, tr1
2768 1.1 mrg LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2769 1.1 mrg movi 3, r38
2770 1.1 mrg shlli r38, 23, r39
2771 1.1 mrg and r0, r39, r40
2772 1.1 mrg andc r0, r39, r0
2773 1.1 mrg stx.q r17, r63, r4
2774 1.1 mrg add.l r17, r63, r4
2775 1.1 mrg addi.l r17, 8, r17
2776 1.1 mrg beq/u r39, r40, tr1
2777 1.1 mrg LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2778 1.1 mrg movi 3, r38
2779 1.1 mrg shlli r38, 20, r39
2780 1.1 mrg and r0, r39, r40
2781 1.1 mrg andc r0, r39, r0
2782 1.1 mrg stx.q r17, r63, r5
2783 1.1 mrg add.l r17, r63, r5
2784 1.1 mrg addi.l r17, 8, r17
2785 1.1 mrg beq/u r39, r40, tr1
2786 1.1 mrg LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2787 1.1 mrg movi 3, r38
2788 1.1 mrg shlli r38, 16, r39
2789 1.1 mrg and r0, r39, r40
2790 1.1 mrg andc r0, r39, r0
2791 1.1 mrg stx.q r17, r63, r6
2792 1.1 mrg add.l r17, r63, r6
2793 1.1 mrg addi.l r17, 8, r17
2794 1.1 mrg beq/u r39, r40, tr1
2795 1.1 mrg LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2796 1.1 mrg movi 3 << 12, r39
2797 1.1 mrg and r0, r39, r40
2798 1.1 mrg andc r0, r39, r0
2799 1.1 mrg stx.q r17, r63, r7
2800 1.1 mrg add.l r17, r63, r7
2801 1.1 mrg addi.l r17, 8, r17
2802 1.1 mrg beq/u r39, r40, tr1
2803 1.1 mrg LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2804 1.1 mrg movi 3 << 8, r39
2805 1.1 mrg and r0, r39, r40
2806 1.1 mrg andc r0, r39, r0
2807 1.1 mrg stx.q r17, r63, r8
2808 1.1 mrg add.l r17, r63, r8
2809 1.1 mrg addi.l r17, 8, r17
2810 1.1 mrg beq/u r39, r40, tr1
2811 1.1 mrg LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2812 1.1 mrg stx.q r17, r63, r9
2813 1.1 mrg add.l r17, r63, r9
2814 1.1 mrg blink tr0, r63
2815 1.1 mrg LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2816 1.1 mrg movi 1, r38
2817 1.1 mrg shlli r38, 29, r39
2818 1.1 mrg andc r0, r39, r0
2819 1.1 mrg stx.q r17, r63, r2
2820 1.1 mrg addi.l r17, 8, r17
2821 1.1 mrg blink tr1, r63
2822 1.1 mrg LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2823 1.1 mrg movi 1, r38
2824 1.1 mrg shlli r38, 26, r39
2825 1.1 mrg andc r0, r39, r0
2826 1.1 mrg stx.q r17, r63, r3
2827 1.1 mrg addi.l r17, 8, r17
2828 1.1 mrg blink tr1, r63
2829 1.1 mrg LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2830 1.1 mrg movi 1, r38
2831 1.1 mrg shlli r38, 23, r39
2832 1.1 mrg andc r0, r39, r0
2833 1.1 mrg stx.q r17, r63, r4
2834 1.1 mrg addi.l r17, 8, r17
2835 1.1 mrg blink tr1, r63
2836 1.1 mrg LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2837 1.1 mrg movi 1, r38
2838 1.1 mrg shlli r38, 20, r39
2839 1.1 mrg andc r0, r39, r0
2840 1.1 mrg stx.q r17, r63, r5
2841 1.1 mrg addi.l r17, 8, r17
2842 1.1 mrg blink tr1, r63
2843 1.1 mrg LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2844 1.1 mrg movi 1, r38
2845 1.1 mrg shlli r38, 16, r39
2846 1.1 mrg andc r0, r39, r0
2847 1.1 mrg stx.q r17, r63, r6
2848 1.1 mrg addi.l r17, 8, r17
2849 1.1 mrg blink tr1, r63
2850 1.1 mrg LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2851 1.1 mrg movi 1 << 12, r39
2852 1.1 mrg andc r0, r39, r0
2853 1.1 mrg stx.q r17, r63, r7
2854 1.1 mrg addi.l r17, 8, r17
2855 1.1 mrg blink tr1, r63
2856 1.1 mrg LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2857 1.1 mrg movi 1 << 8, r39
2858 1.1 mrg andc r0, r39, r0
2859 1.1 mrg stx.q r17, r63, r8
2860 1.1 mrg addi.l r17, 8, r17
2861 1.1 mrg blink tr1, r63
2862 1.1 mrg LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2863 1.1 mrg andi r0, 7 << 1, r38
2864 1.1 mrg movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2865 1.1 mrg shlli r38, 2, r39
2866 1.1 mrg shori LOCAL(ia_end_of_push_seq) & 65535, r40
2867 1.1 mrg sub.l r40, r39, r41
2868 1.1 mrg ptabs/l r41, tr2
2869 1.1 mrg blink tr2, r63
2870 1.1 mrg LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2871 1.1 mrg stx.q r17, r63, r3
2872 1.1 mrg addi.l r17, 8, r17
2873 1.1 mrg stx.q r17, r63, r4
2874 1.1 mrg addi.l r17, 8, r17
2875 1.1 mrg stx.q r17, r63, r5
2876 1.1 mrg addi.l r17, 8, r17
2877 1.1 mrg stx.q r17, r63, r6
2878 1.1 mrg addi.l r17, 8, r17
2879 1.1 mrg stx.q r17, r63, r7
2880 1.1 mrg addi.l r17, 8, r17
2881 1.1 mrg stx.q r17, r63, r8
2882 1.1 mrg addi.l r17, 8, r17
2883 1.1 mrg LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2884 1.1 mrg stx.q r17, r63, r9
2885 1.1 mrg LOCAL(ia_return): /* Return. */
2886 1.1 mrg blink tr0, r63
2887 1.1 mrg LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2888 1.1 mrg ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2889 1.1 mrg #endif /* L_shcompact_incoming_args */
2890 1.1 mrg #endif
2891 1.1 mrg #if __SH5__
2892 1.1 mrg #ifdef L_nested_trampoline
2893 1.1 mrg #if __SH5__ == 32
2894 1.1 mrg .section .text..SHmedia32,"ax"
2895 1.1 mrg #else
2896 1.1 mrg .text
2897 1.1 mrg #endif
2898 1.1 mrg .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2899 1.1 mrg .global GLOBAL(GCC_nested_trampoline)
2900 1.1 mrg HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2901 1.1 mrg GLOBAL(GCC_nested_trampoline):
2902 1.1 mrg .mode SHmedia
2903 1.1 mrg ptrel/u r63, tr0
2904 1.1 mrg gettr tr0, r0
2905 1.1 mrg #if __SH5__ == 64
2906 1.1 mrg ld.q r0, 24, r1
2907 1.1 mrg #else
2908 1.1 mrg ld.l r0, 24, r1
2909 1.1 mrg #endif
2910 1.1 mrg ptabs/l r1, tr1
2911 1.1 mrg #if __SH5__ == 64
2912 1.1 mrg ld.q r0, 32, r1
2913 1.1 mrg #else
2914 1.1 mrg ld.l r0, 28, r1
2915 1.1 mrg #endif
2916 1.1 mrg blink tr1, r63
2917 1.1 mrg
2918 1.1 mrg ENDFUNC(GLOBAL(GCC_nested_trampoline))
2919 1.1 mrg #endif /* L_nested_trampoline */
2920 1.1 mrg #endif /* __SH5__ */
2921 1.1 mrg #if __SH5__ == 32
2922 1.1 mrg #ifdef L_push_pop_shmedia_regs
2923 1.1 mrg .section .text..SHmedia32,"ax"
2924 1.1 mrg .mode SHmedia
2925 1.1 mrg .align 2
2926 1.1 mrg #ifndef __SH4_NOFPU__
2927 1.1 mrg .global GLOBAL(GCC_push_shmedia_regs)
2928 1.1 mrg FUNC(GLOBAL(GCC_push_shmedia_regs))
2929 1.1 mrg GLOBAL(GCC_push_shmedia_regs):
2930 1.1 mrg addi.l r15, -14*8, r15
2931 1.1 mrg fst.d r15, 13*8, dr62
2932 1.1 mrg fst.d r15, 12*8, dr60
2933 1.1 mrg fst.d r15, 11*8, dr58
2934 1.1 mrg fst.d r15, 10*8, dr56
2935 1.1 mrg fst.d r15, 9*8, dr54
2936 1.1 mrg fst.d r15, 8*8, dr52
2937 1.1 mrg fst.d r15, 7*8, dr50
2938 1.1 mrg fst.d r15, 6*8, dr48
2939 1.1 mrg fst.d r15, 5*8, dr46
2940 1.1 mrg fst.d r15, 4*8, dr44
2941 1.1 mrg fst.d r15, 3*8, dr42
2942 1.1 mrg fst.d r15, 2*8, dr40
2943 1.1 mrg fst.d r15, 1*8, dr38
2944 1.1 mrg fst.d r15, 0*8, dr36
2945 1.1 mrg #else /* ! __SH4_NOFPU__ */
2946 1.1 mrg .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2947 1.1 mrg FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2948 1.1 mrg GLOBAL(GCC_push_shmedia_regs_nofpu):
2949 1.1 mrg #endif /* ! __SH4_NOFPU__ */
2950 1.1 mrg ptabs/l r18, tr0
2951 1.1 mrg addi.l r15, -27*8, r15
2952 1.1 mrg gettr tr7, r62
2953 1.1 mrg gettr tr6, r61
2954 1.1 mrg gettr tr5, r60
2955 1.1 mrg st.q r15, 26*8, r62
2956 1.1 mrg st.q r15, 25*8, r61
2957 1.1 mrg st.q r15, 24*8, r60
2958 1.1 mrg st.q r15, 23*8, r59
2959 1.1 mrg st.q r15, 22*8, r58
2960 1.1 mrg st.q r15, 21*8, r57
2961 1.1 mrg st.q r15, 20*8, r56
2962 1.1 mrg st.q r15, 19*8, r55
2963 1.1 mrg st.q r15, 18*8, r54
2964 1.1 mrg st.q r15, 17*8, r53
2965 1.1 mrg st.q r15, 16*8, r52
2966 1.1 mrg st.q r15, 15*8, r51
2967 1.1 mrg st.q r15, 14*8, r50
2968 1.1 mrg st.q r15, 13*8, r49
2969 1.1 mrg st.q r15, 12*8, r48
2970 1.1 mrg st.q r15, 11*8, r47
2971 1.1 mrg st.q r15, 10*8, r46
2972 1.1 mrg st.q r15, 9*8, r45
2973 1.1 mrg st.q r15, 8*8, r44
2974 1.1 mrg st.q r15, 7*8, r35
2975 1.1 mrg st.q r15, 6*8, r34
2976 1.1 mrg st.q r15, 5*8, r33
2977 1.1 mrg st.q r15, 4*8, r32
2978 1.1 mrg st.q r15, 3*8, r31
2979 1.1 mrg st.q r15, 2*8, r30
2980 1.1 mrg st.q r15, 1*8, r29
2981 1.1 mrg st.q r15, 0*8, r28
2982 1.1 mrg blink tr0, r63
2983 1.1 mrg #ifndef __SH4_NOFPU__
2984 1.1 mrg ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2985 1.1 mrg #else
2986 1.1 mrg ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2987 1.1 mrg #endif
2988 1.1 mrg #ifndef __SH4_NOFPU__
2989 1.1 mrg .global GLOBAL(GCC_pop_shmedia_regs)
2990 1.1 mrg FUNC(GLOBAL(GCC_pop_shmedia_regs))
2991 1.1 mrg GLOBAL(GCC_pop_shmedia_regs):
2992 1.1 mrg pt .L0, tr1
2993 1.1 mrg movi 41*8, r0
2994 1.1 mrg fld.d r15, 40*8, dr62
2995 1.1 mrg fld.d r15, 39*8, dr60
2996 1.1 mrg fld.d r15, 38*8, dr58
2997 1.1 mrg fld.d r15, 37*8, dr56
2998 1.1 mrg fld.d r15, 36*8, dr54
2999 1.1 mrg fld.d r15, 35*8, dr52
3000 1.1 mrg fld.d r15, 34*8, dr50
3001 1.1 mrg fld.d r15, 33*8, dr48
3002 1.1 mrg fld.d r15, 32*8, dr46
3003 1.1 mrg fld.d r15, 31*8, dr44
3004 1.1 mrg fld.d r15, 30*8, dr42
3005 1.1 mrg fld.d r15, 29*8, dr40
3006 1.1 mrg fld.d r15, 28*8, dr38
3007 1.1 mrg fld.d r15, 27*8, dr36
3008 1.1 mrg blink tr1, r63
3009 1.1 mrg #else /* ! __SH4_NOFPU__ */
3010 1.1 mrg .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
3011 1.1 mrg FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3012 1.1 mrg GLOBAL(GCC_pop_shmedia_regs_nofpu):
3013 1.1 mrg #endif /* ! __SH4_NOFPU__ */
3014 1.1 mrg movi 27*8, r0
3015 1.1 mrg .L0:
3016 1.1 mrg ptabs r18, tr0
3017 1.1 mrg ld.q r15, 26*8, r62
3018 1.1 mrg ld.q r15, 25*8, r61
3019 1.1 mrg ld.q r15, 24*8, r60
3020 1.1 mrg ptabs r62, tr7
3021 1.1 mrg ptabs r61, tr6
3022 1.1 mrg ptabs r60, tr5
3023 1.1 mrg ld.q r15, 23*8, r59
3024 1.1 mrg ld.q r15, 22*8, r58
3025 1.1 mrg ld.q r15, 21*8, r57
3026 1.1 mrg ld.q r15, 20*8, r56
3027 1.1 mrg ld.q r15, 19*8, r55
3028 1.1 mrg ld.q r15, 18*8, r54
3029 1.1 mrg ld.q r15, 17*8, r53
3030 1.1 mrg ld.q r15, 16*8, r52
3031 1.1 mrg ld.q r15, 15*8, r51
3032 1.1 mrg ld.q r15, 14*8, r50
3033 1.1 mrg ld.q r15, 13*8, r49
3034 1.1 mrg ld.q r15, 12*8, r48
3035 1.1 mrg ld.q r15, 11*8, r47
3036 1.1 mrg ld.q r15, 10*8, r46
3037 1.1 mrg ld.q r15, 9*8, r45
3038 1.1 mrg ld.q r15, 8*8, r44
3039 1.1 mrg ld.q r15, 7*8, r35
3040 1.1 mrg ld.q r15, 6*8, r34
3041 1.1 mrg ld.q r15, 5*8, r33
3042 1.1 mrg ld.q r15, 4*8, r32
3043 1.1 mrg ld.q r15, 3*8, r31
3044 1.1 mrg ld.q r15, 2*8, r30
3045 1.1 mrg ld.q r15, 1*8, r29
3046 1.1 mrg ld.q r15, 0*8, r28
3047 1.1 mrg add.l r15, r0, r15
3048 1.1 mrg blink tr0, r63
3049 1.1 mrg
3050 1.1 mrg #ifndef __SH4_NOFPU__
3051 1.1 mrg ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3052 1.1 mrg #else
3053 1.1 mrg ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3054 1.1 mrg #endif
3055 1.1 mrg #endif /* __SH5__ == 32 */
3056 1.1 mrg #endif /* L_push_pop_shmedia_regs */
3057 1.1 mrg
3058 1.1 mrg #ifdef L_div_table
3059 1.1 mrg #if __SH5__
3060 1.1 mrg #if defined(__pic__) && __SHMEDIA__
3061 1.1 mrg .global GLOBAL(sdivsi3)
3062 1.1 mrg FUNC(GLOBAL(sdivsi3))
3063 1.1 mrg #if __SH5__ == 32
3064 1.1 mrg .section .text..SHmedia32,"ax"
3065 1.1 mrg #else
3066 1.1 mrg .text
3067 1.1 mrg #endif
3068 1.1 mrg #if 0
3069 1.1 mrg /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3070 1.1 mrg in a text section does not work (at least for shared libraries):
3071 1.1 mrg the linker sets the LSB of the address as if this was SHmedia code. */
3072 1.1 mrg #define TEXT_DATA_BUG
3073 1.1 mrg #endif
3074 1.1 mrg .align 2
3075 1.1 mrg // inputs: r4,r5
3076 1.1 mrg // clobbered: r1,r18,r19,r20,r21,r25,tr0
3077 1.1 mrg // result in r0
3078 1.1 mrg .global GLOBAL(sdivsi3)
3079 1.1 mrg GLOBAL(sdivsi3):
3080 1.1 mrg #ifdef TEXT_DATA_BUG
3081 1.1 mrg ptb datalabel Local_div_table,tr0
3082 1.1 mrg #else
3083 1.1 mrg ptb GLOBAL(div_table_internal),tr0
3084 1.1 mrg #endif
3085 1.1 mrg nsb r5, r1
3086 1.1 mrg shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3087 1.1 mrg shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3088 1.1 mrg /* bubble */
3089 1.1 mrg gettr tr0,r20
3090 1.1 mrg ldx.ub r20, r21, r19 // u0.8
3091 1.1 mrg shari r25, 32, r25 // normalize to s2.30
3092 1.1 mrg shlli r21, 1, r21
3093 1.1 mrg muls.l r25, r19, r19 // s2.38
3094 1.1 mrg ldx.w r20, r21, r21 // s2.14
3095 1.1 mrg ptabs r18, tr0
3096 1.1 mrg shari r19, 24, r19 // truncate to s2.14
3097 1.1 mrg sub r21, r19, r19 // some 11 bit inverse in s1.14
3098 1.1 mrg muls.l r19, r19, r21 // u0.28
3099 1.1 mrg sub r63, r1, r1
3100 1.1 mrg addi r1, 92, r1
3101 1.1 mrg muls.l r25, r21, r18 // s2.58
3102 1.1 mrg shlli r19, 45, r19 // multiply by two and convert to s2.58
3103 1.1 mrg /* bubble */
3104 1.1 mrg sub r19, r18, r18
3105 1.1 mrg shari r18, 28, r18 // some 22 bit inverse in s1.30
3106 1.1 mrg muls.l r18, r25, r0 // s2.60
3107 1.1 mrg muls.l r18, r4, r25 // s32.30
3108 1.1 mrg /* bubble */
3109 1.1 mrg shari r0, 16, r19 // s-16.44
3110 1.1 mrg muls.l r19, r18, r19 // s-16.74
3111 1.1 mrg shari r25, 63, r0
3112 1.1 mrg shari r4, 14, r18 // s19.-14
3113 1.1 mrg shari r19, 30, r19 // s-16.44
3114 1.1 mrg muls.l r19, r18, r19 // s15.30
3115 1.1 mrg xor r21, r0, r21 // You could also use the constant 1 << 27.
3116 1.1 mrg add r21, r25, r21
3117 1.1 mrg sub r21, r19, r21
3118 1.1 mrg shard r21, r1, r21
3119 1.1 mrg sub r21, r0, r0
3120 1.1 mrg blink tr0, r63
3121 1.1 mrg ENDFUNC(GLOBAL(sdivsi3))
3122 1.1 mrg /* This table has been generated by divtab.c .
3123 1.1 mrg Defects for bias -330:
3124 1.1 mrg Max defect: 6.081536e-07 at -1.000000e+00
3125 1.1 mrg Min defect: 2.849516e-08 at 1.030651e+00
3126 1.1 mrg Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3127 1.1 mrg Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3128 1.1 mrg Defect at 1: 1.238659e-07
3129 1.1 mrg Defect at -2: 1.061708e-07 */
3130 1.1 mrg #else /* ! __pic__ || ! __SHMEDIA__ */
3131 1.1 mrg .section .rodata
3132 1.1 mrg #endif /* __pic__ */
3133 1.1 mrg #if defined(TEXT_DATA_BUG) && defined(__pic__) && __SHMEDIA__
3134 1.1 mrg .balign 2
3135 1.1 mrg .type Local_div_table,@object
3136 1.1 mrg .size Local_div_table,128
3137 1.1 mrg /* negative division constants */
3138 1.1 mrg .word -16638
3139 1.1 mrg .word -17135
3140 1.1 mrg .word -17737
3141 1.1 mrg .word -18433
3142 1.1 mrg .word -19103
3143 1.1 mrg .word -19751
3144 1.1 mrg .word -20583
3145 1.1 mrg .word -21383
3146 1.1 mrg .word -22343
3147 1.1 mrg .word -23353
3148 1.1 mrg .word -24407
3149 1.1 mrg .word -25582
3150 1.1 mrg .word -26863
3151 1.1 mrg .word -28382
3152 1.1 mrg .word -29965
3153 1.1 mrg .word -31800
3154 1.1 mrg /* negative division factors */
3155 1.1 mrg .byte 66
3156 1.1 mrg .byte 70
3157 1.1 mrg .byte 75
3158 1.1 mrg .byte 81
3159 1.1 mrg .byte 87
3160 1.1 mrg .byte 93
3161 1.1 mrg .byte 101
3162 1.1 mrg .byte 109
3163 1.1 mrg .byte 119
3164 1.1 mrg .byte 130
3165 1.1 mrg .byte 142
3166 1.1 mrg .byte 156
3167 1.1 mrg .byte 172
3168 1.1 mrg .byte 192
3169 1.1 mrg .byte 214
3170 1.1 mrg .byte 241
3171 1.1 mrg .skip 16
3172 1.1 mrg Local_div_table:
3173 1.1 mrg .skip 16
3174 1.1 mrg /* positive division factors */
3175 1.1 mrg .byte 241
3176 1.1 mrg .byte 214
3177 1.1 mrg .byte 192
3178 1.1 mrg .byte 172
3179 1.1 mrg .byte 156
3180 1.1 mrg .byte 142
3181 1.1 mrg .byte 130
3182 1.1 mrg .byte 119
3183 1.1 mrg .byte 109
3184 1.1 mrg .byte 101
3185 1.1 mrg .byte 93
3186 1.1 mrg .byte 87
3187 1.1 mrg .byte 81
3188 1.1 mrg .byte 75
3189 1.1 mrg .byte 70
3190 1.1 mrg .byte 66
3191 1.1 mrg /* positive division constants */
3192 1.1 mrg .word 31801
3193 1.1 mrg .word 29966
3194 1.1 mrg .word 28383
3195 1.1 mrg .word 26864
3196 1.1 mrg .word 25583
3197 1.1 mrg .word 24408
3198 1.1 mrg .word 23354
3199 1.1 mrg .word 22344
3200 1.1 mrg .word 21384
3201 1.1 mrg .word 20584
3202 1.1 mrg .word 19752
3203 1.1 mrg .word 19104
3204 1.1 mrg .word 18434
3205 1.1 mrg .word 17738
3206 1.1 mrg .word 17136
3207 1.1 mrg .word 16639
3208 1.1 mrg .section .rodata
3209 1.1 mrg #endif /* TEXT_DATA_BUG */
3210 1.1 mrg .balign 2
3211 1.1 mrg .type GLOBAL(div_table),@object
3212 1.1 mrg .size GLOBAL(div_table),128
3213 1.1 mrg /* negative division constants */
3214 1.1 mrg .word -16638
3215 1.1 mrg .word -17135
3216 1.1 mrg .word -17737
3217 1.1 mrg .word -18433
3218 1.1 mrg .word -19103
3219 1.1 mrg .word -19751
3220 1.1 mrg .word -20583
3221 1.1 mrg .word -21383
3222 1.1 mrg .word -22343
3223 1.1 mrg .word -23353
3224 1.1 mrg .word -24407
3225 1.1 mrg .word -25582
3226 1.1 mrg .word -26863
3227 1.1 mrg .word -28382
3228 1.1 mrg .word -29965
3229 1.1 mrg .word -31800
3230 1.1 mrg /* negative division factors */
3231 1.1 mrg .byte 66
3232 1.1 mrg .byte 70
3233 1.1 mrg .byte 75
3234 1.1 mrg .byte 81
3235 1.1 mrg .byte 87
3236 1.1 mrg .byte 93
3237 1.1 mrg .byte 101
3238 1.1 mrg .byte 109
3239 1.1 mrg .byte 119
3240 1.1 mrg .byte 130
3241 1.1 mrg .byte 142
3242 1.1 mrg .byte 156
3243 1.1 mrg .byte 172
3244 1.1 mrg .byte 192
3245 1.1 mrg .byte 214
3246 1.1 mrg .byte 241
3247 1.1 mrg .skip 16
3248 1.1 mrg .global GLOBAL(div_table)
3249 1.1 mrg GLOBAL(div_table):
3250 1.1 mrg HIDDEN_ALIAS(div_table_internal,div_table)
3251 1.1 mrg .skip 16
3252 1.1 mrg /* positive division factors */
3253 1.1 mrg .byte 241
3254 1.1 mrg .byte 214
3255 1.1 mrg .byte 192
3256 1.1 mrg .byte 172
3257 1.1 mrg .byte 156
3258 1.1 mrg .byte 142
3259 1.1 mrg .byte 130
3260 1.1 mrg .byte 119
3261 1.1 mrg .byte 109
3262 1.1 mrg .byte 101
3263 1.1 mrg .byte 93
3264 1.1 mrg .byte 87
3265 1.1 mrg .byte 81
3266 1.1 mrg .byte 75
3267 1.1 mrg .byte 70
3268 1.1 mrg .byte 66
3269 1.1 mrg /* positive division constants */
3270 1.1 mrg .word 31801
3271 1.1 mrg .word 29966
3272 1.1 mrg .word 28383
3273 1.1 mrg .word 26864
3274 1.1 mrg .word 25583
3275 1.1 mrg .word 24408
3276 1.1 mrg .word 23354
3277 1.1 mrg .word 22344
3278 1.1 mrg .word 21384
3279 1.1 mrg .word 20584
3280 1.1 mrg .word 19752
3281 1.1 mrg .word 19104
3282 1.1 mrg .word 18434
3283 1.1 mrg .word 17738
3284 1.1 mrg .word 17136
3285 1.1 mrg .word 16639
3286 1.1 mrg
3287 1.1 mrg #elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3288 1.1 mrg /* This code uses shld, thus is not suitable for SH1 / SH2. */
3289 1.1 mrg
3290 1.1 mrg /* Signed / unsigned division without use of FPU, optimized for SH4.
3291 1.1 mrg Uses a lookup table for divisors in the range -128 .. +128, and
3292 1.1 mrg div1 with case distinction for larger divisors in three more ranges.
3293 1.1 mrg The code is lumped together with the table to allow the use of mova. */
3294 1.1 mrg #ifdef __LITTLE_ENDIAN__
3295 1.1 mrg #define L_LSB 0
3296 1.1 mrg #define L_LSWMSB 1
3297 1.1 mrg #define L_MSWLSB 2
3298 1.1 mrg #else
3299 1.1 mrg #define L_LSB 3
3300 1.1 mrg #define L_LSWMSB 2
3301 1.1 mrg #define L_MSWLSB 1
3302 1.1 mrg #endif
3303 1.1 mrg
3304 1.1 mrg .balign 4
3305 1.1 mrg .global GLOBAL(udivsi3_i4i)
3306 1.1 mrg FUNC(GLOBAL(udivsi3_i4i))
3307 1.1 mrg GLOBAL(udivsi3_i4i):
3308 1.1 mrg mov.w LOCAL(c128_w), r1
3309 1.1 mrg div0u
3310 1.1 mrg mov r4,r0
3311 1.1 mrg shlr8 r0
3312 1.1 mrg cmp/hi r1,r5
3313 1.1 mrg extu.w r5,r1
3314 1.1 mrg bf LOCAL(udiv_le128)
3315 1.1 mrg cmp/eq r5,r1
3316 1.1 mrg bf LOCAL(udiv_ge64k)
3317 1.1 mrg shlr r0
3318 1.1 mrg mov r5,r1
3319 1.1 mrg shll16 r5
3320 1.1 mrg mov.l r4,@-r15
3321 1.1 mrg div1 r5,r0
3322 1.1 mrg mov.l r1,@-r15
3323 1.1 mrg div1 r5,r0
3324 1.1 mrg div1 r5,r0
3325 1.1 mrg bra LOCAL(udiv_25)
3326 1.1 mrg div1 r5,r0
3327 1.1 mrg
3328 1.1 mrg LOCAL(div_le128):
3329 1.1 mrg mova LOCAL(div_table_ix),r0
3330 1.1 mrg bra LOCAL(div_le128_2)
3331 1.1 mrg mov.b @(r0,r5),r1
3332 1.1 mrg LOCAL(udiv_le128):
3333 1.1 mrg mov.l r4,@-r15
3334 1.1 mrg mova LOCAL(div_table_ix),r0
3335 1.1 mrg mov.b @(r0,r5),r1
3336 1.1 mrg mov.l r5,@-r15
3337 1.1 mrg LOCAL(div_le128_2):
3338 1.1 mrg mova LOCAL(div_table_inv),r0
3339 1.1 mrg mov.l @(r0,r1),r1
3340 1.1 mrg mov r5,r0
3341 1.1 mrg tst #0xfe,r0
3342 1.1 mrg mova LOCAL(div_table_clz),r0
3343 1.1 mrg dmulu.l r1,r4
3344 1.1 mrg mov.b @(r0,r5),r1
3345 1.1 mrg bt/s LOCAL(div_by_1)
3346 1.1 mrg mov r4,r0
3347 1.1 mrg mov.l @r15+,r5
3348 1.1 mrg sts mach,r0
3349 1.1 mrg /* clrt */
3350 1.1 mrg addc r4,r0
3351 1.1 mrg mov.l @r15+,r4
3352 1.1 mrg rotcr r0
3353 1.1 mrg rts
3354 1.1 mrg shld r1,r0
3355 1.1 mrg
3356 1.1 mrg LOCAL(div_by_1_neg):
3357 1.1 mrg neg r4,r0
3358 1.1 mrg LOCAL(div_by_1):
3359 1.1 mrg mov.l @r15+,r5
3360 1.1 mrg rts
3361 1.1 mrg mov.l @r15+,r4
3362 1.1 mrg
3363 1.1 mrg LOCAL(div_ge64k):
3364 1.1 mrg bt/s LOCAL(div_r8)
3365 1.1 mrg div0u
3366 1.1 mrg shll8 r5
3367 1.1 mrg bra LOCAL(div_ge64k_2)
3368 1.1 mrg div1 r5,r0
3369 1.1 mrg LOCAL(udiv_ge64k):
3370 1.1 mrg cmp/hi r0,r5
3371 1.1 mrg mov r5,r1
3372 1.1 mrg bt LOCAL(udiv_r8)
3373 1.1 mrg shll8 r5
3374 1.1 mrg mov.l r4,@-r15
3375 1.1 mrg div1 r5,r0
3376 1.1 mrg mov.l r1,@-r15
3377 1.1 mrg LOCAL(div_ge64k_2):
3378 1.1 mrg div1 r5,r0
3379 1.1 mrg mov.l LOCAL(zero_l),r1
3380 1.1 mrg .rept 4
3381 1.1 mrg div1 r5,r0
3382 1.1 mrg .endr
3383 1.1 mrg mov.l r1,@-r15
3384 1.1 mrg div1 r5,r0
3385 1.1 mrg mov.w LOCAL(m256_w),r1
3386 1.1 mrg div1 r5,r0
3387 1.1 mrg mov.b r0,@(L_LSWMSB,r15)
3388 1.1 mrg xor r4,r0
3389 1.1 mrg and r1,r0
3390 1.1 mrg bra LOCAL(div_ge64k_end)
3391 1.1 mrg xor r4,r0
3392 1.1 mrg
3393 1.1 mrg LOCAL(div_r8):
3394 1.1 mrg shll16 r4
3395 1.1 mrg bra LOCAL(div_r8_2)
3396 1.1 mrg shll8 r4
3397 1.1 mrg LOCAL(udiv_r8):
3398 1.1 mrg mov.l r4,@-r15
3399 1.1 mrg shll16 r4
3400 1.1 mrg clrt
3401 1.1 mrg shll8 r4
3402 1.1 mrg mov.l r5,@-r15
3403 1.1 mrg LOCAL(div_r8_2):
3404 1.1 mrg rotcl r4
3405 1.1 mrg mov r0,r1
3406 1.1 mrg div1 r5,r1
3407 1.1 mrg mov r4,r0
3408 1.1 mrg rotcl r0
3409 1.1 mrg mov r5,r4
3410 1.1 mrg div1 r5,r1
3411 1.1 mrg .rept 5
3412 1.1 mrg rotcl r0; div1 r5,r1
3413 1.1 mrg .endr
3414 1.1 mrg rotcl r0
3415 1.1 mrg mov.l @r15+,r5
3416 1.1 mrg div1 r4,r1
3417 1.1 mrg mov.l @r15+,r4
3418 1.1 mrg rts
3419 1.1 mrg rotcl r0
3420 1.1 mrg
3421 1.1 mrg ENDFUNC(GLOBAL(udivsi3_i4i))
3422 1.1 mrg
3423 1.1 mrg .global GLOBAL(sdivsi3_i4i)
3424 1.1 mrg FUNC(GLOBAL(sdivsi3_i4i))
3425 1.1 mrg /* This is link-compatible with a GLOBAL(sdivsi3) call,
3426 1.1 mrg but we effectively clobber only r1. */
3427 1.1 mrg GLOBAL(sdivsi3_i4i):
3428 1.1 mrg mov.l r4,@-r15
3429 1.1 mrg cmp/pz r5
3430 1.1 mrg mov.w LOCAL(c128_w), r1
3431 1.1 mrg bt/s LOCAL(pos_divisor)
3432 1.1 mrg cmp/pz r4
3433 1.1 mrg mov.l r5,@-r15
3434 1.1 mrg neg r5,r5
3435 1.1 mrg bt/s LOCAL(neg_result)
3436 1.1 mrg cmp/hi r1,r5
3437 1.1 mrg neg r4,r4
3438 1.1 mrg LOCAL(pos_result):
3439 1.1 mrg extu.w r5,r0
3440 1.1 mrg bf LOCAL(div_le128)
3441 1.1 mrg cmp/eq r5,r0
3442 1.1 mrg mov r4,r0
3443 1.1 mrg shlr8 r0
3444 1.1 mrg bf/s LOCAL(div_ge64k)
3445 1.1 mrg cmp/hi r0,r5
3446 1.1 mrg div0u
3447 1.1 mrg shll16 r5
3448 1.1 mrg div1 r5,r0
3449 1.1 mrg div1 r5,r0
3450 1.1 mrg div1 r5,r0
3451 1.1 mrg LOCAL(udiv_25):
3452 1.1 mrg mov.l LOCAL(zero_l),r1
3453 1.1 mrg div1 r5,r0
3454 1.1 mrg div1 r5,r0
3455 1.1 mrg mov.l r1,@-r15
3456 1.1 mrg .rept 3
3457 1.1 mrg div1 r5,r0
3458 1.1 mrg .endr
3459 1.1 mrg mov.b r0,@(L_MSWLSB,r15)
3460 1.1 mrg xtrct r4,r0
3461 1.1 mrg swap.w r0,r0
3462 1.1 mrg .rept 8
3463 1.1 mrg div1 r5,r0
3464 1.1 mrg .endr
3465 1.1 mrg mov.b r0,@(L_LSWMSB,r15)
3466 1.1 mrg LOCAL(div_ge64k_end):
3467 1.1 mrg .rept 8
3468 1.1 mrg div1 r5,r0
3469 1.1 mrg .endr
3470 1.1 mrg mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3471 1.1 mrg extu.b r0,r0
3472 1.1 mrg mov.l @r15+,r5
3473 1.1 mrg or r4,r0
3474 1.1 mrg mov.l @r15+,r4
3475 1.1 mrg rts
3476 1.1 mrg rotcl r0
3477 1.1 mrg
3478 1.1 mrg LOCAL(div_le128_neg):
3479 1.1 mrg tst #0xfe,r0
3480 1.1 mrg mova LOCAL(div_table_ix),r0
3481 1.1 mrg mov.b @(r0,r5),r1
3482 1.1 mrg mova LOCAL(div_table_inv),r0
3483 1.1 mrg bt/s LOCAL(div_by_1_neg)
3484 1.1 mrg mov.l @(r0,r1),r1
3485 1.1 mrg mova LOCAL(div_table_clz),r0
3486 1.1 mrg dmulu.l r1,r4
3487 1.1 mrg mov.b @(r0,r5),r1
3488 1.1 mrg mov.l @r15+,r5
3489 1.1 mrg sts mach,r0
3490 1.1 mrg /* clrt */
3491 1.1 mrg addc r4,r0
3492 1.1 mrg mov.l @r15+,r4
3493 1.1 mrg rotcr r0
3494 1.1 mrg shld r1,r0
3495 1.1 mrg rts
3496 1.1 mrg neg r0,r0
3497 1.1 mrg
3498 1.1 mrg LOCAL(pos_divisor):
3499 1.1 mrg mov.l r5,@-r15
3500 1.1 mrg bt/s LOCAL(pos_result)
3501 1.1 mrg cmp/hi r1,r5
3502 1.1 mrg neg r4,r4
3503 1.1 mrg LOCAL(neg_result):
3504 1.1 mrg extu.w r5,r0
3505 1.1 mrg bf LOCAL(div_le128_neg)
3506 1.1 mrg cmp/eq r5,r0
3507 1.1 mrg mov r4,r0
3508 1.1 mrg shlr8 r0
3509 1.1 mrg bf/s LOCAL(div_ge64k_neg)
3510 1.1 mrg cmp/hi r0,r5
3511 1.1 mrg div0u
3512 1.1 mrg mov.l LOCAL(zero_l),r1
3513 1.1 mrg shll16 r5
3514 1.1 mrg div1 r5,r0
3515 1.1 mrg mov.l r1,@-r15
3516 1.1 mrg .rept 7
3517 1.1 mrg div1 r5,r0
3518 1.1 mrg .endr
3519 1.1 mrg mov.b r0,@(L_MSWLSB,r15)
3520 1.1 mrg xtrct r4,r0
3521 1.1 mrg swap.w r0,r0
3522 1.1 mrg .rept 8
3523 1.1 mrg div1 r5,r0
3524 1.1 mrg .endr
3525 1.1 mrg mov.b r0,@(L_LSWMSB,r15)
3526 1.1 mrg LOCAL(div_ge64k_neg_end):
3527 1.1 mrg .rept 8
3528 1.1 mrg div1 r5,r0
3529 1.1 mrg .endr
3530 1.1 mrg mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3531 1.1 mrg extu.b r0,r1
3532 1.1 mrg mov.l @r15+,r5
3533 1.1 mrg or r4,r1
3534 1.1 mrg LOCAL(div_r8_neg_end):
3535 1.1 mrg mov.l @r15+,r4
3536 1.1 mrg rotcl r1
3537 1.1 mrg rts
3538 1.1 mrg neg r1,r0
3539 1.1 mrg
3540 1.1 mrg LOCAL(div_ge64k_neg):
3541 1.1 mrg bt/s LOCAL(div_r8_neg)
3542 1.1 mrg div0u
3543 1.1 mrg shll8 r5
3544 1.1 mrg mov.l LOCAL(zero_l),r1
3545 1.1 mrg .rept 6
3546 1.1 mrg div1 r5,r0
3547 1.1 mrg .endr
3548 1.1 mrg mov.l r1,@-r15
3549 1.1 mrg div1 r5,r0
3550 1.1 mrg mov.w LOCAL(m256_w),r1
3551 1.1 mrg div1 r5,r0
3552 1.1 mrg mov.b r0,@(L_LSWMSB,r15)
3553 1.1 mrg xor r4,r0
3554 1.1 mrg and r1,r0
3555 1.1 mrg bra LOCAL(div_ge64k_neg_end)
3556 1.1 mrg xor r4,r0
3557 1.1 mrg
3558 1.1 mrg LOCAL(c128_w):
3559 1.1 mrg .word 128
3560 1.1 mrg
3561 1.1 mrg LOCAL(div_r8_neg):
3562 1.1 mrg clrt
3563 1.1 mrg shll16 r4
3564 1.1 mrg mov r4,r1
3565 1.1 mrg shll8 r1
3566 1.1 mrg mov r5,r4
3567 1.1 mrg .rept 7
3568 1.1 mrg rotcl r1; div1 r5,r0
3569 1.1 mrg .endr
3570 1.1 mrg mov.l @r15+,r5
3571 1.1 mrg rotcl r1
3572 1.1 mrg bra LOCAL(div_r8_neg_end)
3573 1.1 mrg div1 r4,r0
3574 1.1 mrg
3575 1.1 mrg LOCAL(m256_w):
3576 1.1 mrg .word 0xff00
3577 1.1 mrg /* This table has been generated by divtab-sh4.c. */
3578 1.1 mrg .balign 4
3579 1.1 mrg LOCAL(div_table_clz):
3580 1.1 mrg .byte 0
3581 1.1 mrg .byte 1
3582 1.1 mrg .byte 0
3583 1.1 mrg .byte -1
3584 1.1 mrg .byte -1
3585 1.1 mrg .byte -2
3586 1.1 mrg .byte -2
3587 1.1 mrg .byte -2
3588 1.1 mrg .byte -2
3589 1.1 mrg .byte -3
3590 1.1 mrg .byte -3
3591 1.1 mrg .byte -3
3592 1.1 mrg .byte -3
3593 1.1 mrg .byte -3
3594 1.1 mrg .byte -3
3595 1.1 mrg .byte -3
3596 1.1 mrg .byte -3
3597 1.1 mrg .byte -4
3598 1.1 mrg .byte -4
3599 1.1 mrg .byte -4
3600 1.1 mrg .byte -4
3601 1.1 mrg .byte -4
3602 1.1 mrg .byte -4
3603 1.1 mrg .byte -4
3604 1.1 mrg .byte -4
3605 1.1 mrg .byte -4
3606 1.1 mrg .byte -4
3607 1.1 mrg .byte -4
3608 1.1 mrg .byte -4
3609 1.1 mrg .byte -4
3610 1.1 mrg .byte -4
3611 1.1 mrg .byte -4
3612 1.1 mrg .byte -4
3613 1.1 mrg .byte -5
3614 1.1 mrg .byte -5
3615 1.1 mrg .byte -5
3616 1.1 mrg .byte -5
3617 1.1 mrg .byte -5
3618 1.1 mrg .byte -5
3619 1.1 mrg .byte -5
3620 1.1 mrg .byte -5
3621 1.1 mrg .byte -5
3622 1.1 mrg .byte -5
3623 1.1 mrg .byte -5
3624 1.1 mrg .byte -5
3625 1.1 mrg .byte -5
3626 1.1 mrg .byte -5
3627 1.1 mrg .byte -5
3628 1.1 mrg .byte -5
3629 1.1 mrg .byte -5
3630 1.1 mrg .byte -5
3631 1.1 mrg .byte -5
3632 1.1 mrg .byte -5
3633 1.1 mrg .byte -5
3634 1.1 mrg .byte -5
3635 1.1 mrg .byte -5
3636 1.1 mrg .byte -5
3637 1.1 mrg .byte -5
3638 1.1 mrg .byte -5
3639 1.1 mrg .byte -5
3640 1.1 mrg .byte -5
3641 1.1 mrg .byte -5
3642 1.1 mrg .byte -5
3643 1.1 mrg .byte -5
3644 1.1 mrg .byte -5
3645 1.1 mrg .byte -6
3646 1.1 mrg .byte -6
3647 1.1 mrg .byte -6
3648 1.1 mrg .byte -6
3649 1.1 mrg .byte -6
3650 1.1 mrg .byte -6
3651 1.1 mrg .byte -6
3652 1.1 mrg .byte -6
3653 1.1 mrg .byte -6
3654 1.1 mrg .byte -6
3655 1.1 mrg .byte -6
3656 1.1 mrg .byte -6
3657 1.1 mrg .byte -6
3658 1.1 mrg .byte -6
3659 1.1 mrg .byte -6
3660 1.1 mrg .byte -6
3661 1.1 mrg .byte -6
3662 1.1 mrg .byte -6
3663 1.1 mrg .byte -6
3664 1.1 mrg .byte -6
3665 1.1 mrg .byte -6
3666 1.1 mrg .byte -6
3667 1.1 mrg .byte -6
3668 1.1 mrg .byte -6
3669 1.1 mrg .byte -6
3670 1.1 mrg .byte -6
3671 1.1 mrg .byte -6
3672 1.1 mrg .byte -6
3673 1.1 mrg .byte -6
3674 1.1 mrg .byte -6
3675 1.1 mrg .byte -6
3676 1.1 mrg .byte -6
3677 1.1 mrg .byte -6
3678 1.1 mrg .byte -6
3679 1.1 mrg .byte -6
3680 1.1 mrg .byte -6
3681 1.1 mrg .byte -6
3682 1.1 mrg .byte -6
3683 1.1 mrg .byte -6
3684 1.1 mrg .byte -6
3685 1.1 mrg .byte -6
3686 1.1 mrg .byte -6
3687 1.1 mrg .byte -6
3688 1.1 mrg .byte -6
3689 1.1 mrg .byte -6
3690 1.1 mrg .byte -6
3691 1.1 mrg .byte -6
3692 1.1 mrg .byte -6
3693 1.1 mrg .byte -6
3694 1.1 mrg .byte -6
3695 1.1 mrg .byte -6
3696 1.1 mrg .byte -6
3697 1.1 mrg .byte -6
3698 1.1 mrg .byte -6
3699 1.1 mrg .byte -6
3700 1.1 mrg .byte -6
3701 1.1 mrg .byte -6
3702 1.1 mrg .byte -6
3703 1.1 mrg .byte -6
3704 1.1 mrg .byte -6
3705 1.1 mrg .byte -6
3706 1.1 mrg .byte -6
3707 1.1 mrg .byte -6
3708 1.1 mrg /* Lookup table translating positive divisor to index into table of
3709 1.1 mrg normalized inverse. N.B. the '0' entry is also the last entry of the
3710 1.1 mrg previous table, and causes an unaligned access for division by zero. */
3711 1.1 mrg LOCAL(div_table_ix):
3712 1.1 mrg .byte -6
3713 1.1 mrg .byte -128
3714 1.1 mrg .byte -128
3715 1.1 mrg .byte 0
3716 1.1 mrg .byte -128
3717 1.1 mrg .byte -64
3718 1.1 mrg .byte 0
3719 1.1 mrg .byte 64
3720 1.1 mrg .byte -128
3721 1.1 mrg .byte -96
3722 1.1 mrg .byte -64
3723 1.1 mrg .byte -32
3724 1.1 mrg .byte 0
3725 1.1 mrg .byte 32
3726 1.1 mrg .byte 64
3727 1.1 mrg .byte 96
3728 1.1 mrg .byte -128
3729 1.1 mrg .byte -112
3730 1.1 mrg .byte -96
3731 1.1 mrg .byte -80
3732 1.1 mrg .byte -64
3733 1.1 mrg .byte -48
3734 1.1 mrg .byte -32
3735 1.1 mrg .byte -16
3736 1.1 mrg .byte 0
3737 1.1 mrg .byte 16
3738 1.1 mrg .byte 32
3739 1.1 mrg .byte 48
3740 1.1 mrg .byte 64
3741 1.1 mrg .byte 80
3742 1.1 mrg .byte 96
3743 1.1 mrg .byte 112
3744 1.1 mrg .byte -128
3745 1.1 mrg .byte -120
3746 1.1 mrg .byte -112
3747 1.1 mrg .byte -104
3748 1.1 mrg .byte -96
3749 1.1 mrg .byte -88
3750 1.1 mrg .byte -80
3751 1.1 mrg .byte -72
3752 1.1 mrg .byte -64
3753 1.1 mrg .byte -56
3754 1.1 mrg .byte -48
3755 1.1 mrg .byte -40
3756 1.1 mrg .byte -32
3757 1.1 mrg .byte -24
3758 1.1 mrg .byte -16
3759 1.1 mrg .byte -8
3760 1.1 mrg .byte 0
3761 1.1 mrg .byte 8
3762 1.1 mrg .byte 16
3763 1.1 mrg .byte 24
3764 1.1 mrg .byte 32
3765 1.1 mrg .byte 40
3766 1.1 mrg .byte 48
3767 1.1 mrg .byte 56
3768 1.1 mrg .byte 64
3769 1.1 mrg .byte 72
3770 1.1 mrg .byte 80
3771 1.1 mrg .byte 88
3772 1.1 mrg .byte 96
3773 1.1 mrg .byte 104
3774 1.1 mrg .byte 112
3775 1.1 mrg .byte 120
3776 1.1 mrg .byte -128
3777 1.1 mrg .byte -124
3778 1.1 mrg .byte -120
3779 1.1 mrg .byte -116
3780 1.1 mrg .byte -112
3781 1.1 mrg .byte -108
3782 1.1 mrg .byte -104
3783 1.1 mrg .byte -100
3784 1.1 mrg .byte -96
3785 1.1 mrg .byte -92
3786 1.1 mrg .byte -88
3787 1.1 mrg .byte -84
3788 1.1 mrg .byte -80
3789 1.1 mrg .byte -76
3790 1.1 mrg .byte -72
3791 1.1 mrg .byte -68
3792 1.1 mrg .byte -64
3793 1.1 mrg .byte -60
3794 1.1 mrg .byte -56
3795 1.1 mrg .byte -52
3796 1.1 mrg .byte -48
3797 1.1 mrg .byte -44
3798 1.1 mrg .byte -40
3799 1.1 mrg .byte -36
3800 1.1 mrg .byte -32
3801 1.1 mrg .byte -28
3802 1.1 mrg .byte -24
3803 1.1 mrg .byte -20
3804 1.1 mrg .byte -16
3805 1.1 mrg .byte -12
3806 1.1 mrg .byte -8
3807 1.1 mrg .byte -4
3808 1.1 mrg .byte 0
3809 1.1 mrg .byte 4
3810 1.1 mrg .byte 8
3811 1.1 mrg .byte 12
3812 1.1 mrg .byte 16
3813 1.1 mrg .byte 20
3814 1.1 mrg .byte 24
3815 1.1 mrg .byte 28
3816 1.1 mrg .byte 32
3817 1.1 mrg .byte 36
3818 1.1 mrg .byte 40
3819 1.1 mrg .byte 44
3820 1.1 mrg .byte 48
3821 1.1 mrg .byte 52
3822 1.1 mrg .byte 56
3823 1.1 mrg .byte 60
3824 1.1 mrg .byte 64
3825 1.1 mrg .byte 68
3826 1.1 mrg .byte 72
3827 1.1 mrg .byte 76
3828 1.1 mrg .byte 80
3829 1.1 mrg .byte 84
3830 1.1 mrg .byte 88
3831 1.1 mrg .byte 92
3832 1.1 mrg .byte 96
3833 1.1 mrg .byte 100
3834 1.1 mrg .byte 104
3835 1.1 mrg .byte 108
3836 1.1 mrg .byte 112
3837 1.1 mrg .byte 116
3838 1.1 mrg .byte 120
3839 1.1 mrg .byte 124
3840 1.1 mrg .byte -128
3841 1.1 mrg /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3842 1.1 mrg .balign 4
3843 1.1 mrg LOCAL(zero_l):
3844 1.1 mrg .long 0x0
3845 1.1 mrg .long 0xF81F81F9
3846 1.1 mrg .long 0xF07C1F08
3847 1.1 mrg .long 0xE9131AC0
3848 1.1 mrg .long 0xE1E1E1E2
3849 1.1 mrg .long 0xDAE6076C
3850 1.1 mrg .long 0xD41D41D5
3851 1.1 mrg .long 0xCD856891
3852 1.1 mrg .long 0xC71C71C8
3853 1.1 mrg .long 0xC0E07039
3854 1.1 mrg .long 0xBACF914D
3855 1.1 mrg .long 0xB4E81B4F
3856 1.1 mrg .long 0xAF286BCB
3857 1.1 mrg .long 0xA98EF607
3858 1.1 mrg .long 0xA41A41A5
3859 1.1 mrg .long 0x9EC8E952
3860 1.1 mrg .long 0x9999999A
3861 1.1 mrg .long 0x948B0FCE
3862 1.1 mrg .long 0x8F9C18FA
3863 1.1 mrg .long 0x8ACB90F7
3864 1.1 mrg .long 0x86186187
3865 1.1 mrg .long 0x81818182
3866 1.1 mrg .long 0x7D05F418
3867 1.1 mrg .long 0x78A4C818
3868 1.1 mrg .long 0x745D1746
3869 1.1 mrg .long 0x702E05C1
3870 1.1 mrg .long 0x6C16C16D
3871 1.1 mrg .long 0x68168169
3872 1.1 mrg .long 0x642C8591
3873 1.1 mrg .long 0x60581606
3874 1.1 mrg .long 0x5C9882BA
3875 1.1 mrg .long 0x58ED2309
3876 1.1 mrg LOCAL(div_table_inv):
3877 1.1 mrg .long 0x55555556
3878 1.1 mrg .long 0x51D07EAF
3879 1.1 mrg .long 0x4E5E0A73
3880 1.1 mrg .long 0x4AFD6A06
3881 1.1 mrg .long 0x47AE147B
3882 1.1 mrg .long 0x446F8657
3883 1.1 mrg .long 0x41414142
3884 1.1 mrg .long 0x3E22CBCF
3885 1.1 mrg .long 0x3B13B13C
3886 1.1 mrg .long 0x38138139
3887 1.1 mrg .long 0x3521CFB3
3888 1.1 mrg .long 0x323E34A3
3889 1.1 mrg .long 0x2F684BDB
3890 1.1 mrg .long 0x2C9FB4D9
3891 1.1 mrg .long 0x29E4129F
3892 1.1 mrg .long 0x27350B89
3893 1.1 mrg .long 0x24924925
3894 1.1 mrg .long 0x21FB7813
3895 1.1 mrg .long 0x1F7047DD
3896 1.1 mrg .long 0x1CF06ADB
3897 1.1 mrg .long 0x1A7B9612
3898 1.1 mrg .long 0x18118119
3899 1.1 mrg .long 0x15B1E5F8
3900 1.1 mrg .long 0x135C8114
3901 1.1 mrg .long 0x11111112
3902 1.1 mrg .long 0xECF56BF
3903 1.1 mrg .long 0xC9714FC
3904 1.1 mrg .long 0xA6810A7
3905 1.1 mrg .long 0x8421085
3906 1.1 mrg .long 0x624DD30
3907 1.1 mrg .long 0x4104105
3908 1.1 mrg .long 0x2040811
3909 1.1 mrg /* maximum error: 0.987342 scaled: 0.921875*/
3910 1.1 mrg
3911 1.1 mrg ENDFUNC(GLOBAL(sdivsi3_i4i))
3912 1.1 mrg #endif /* SH3 / SH4 */
3913 1.1 mrg
3914 1.1 mrg #endif /* L_div_table */
3915 1.1 mrg
3916 1.1 mrg #ifdef L_udiv_qrnnd_16
3917 1.1 mrg #if !__SHMEDIA__
3918 1.1 mrg HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3919 1.1 mrg /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3920 1.1 mrg /* n1 < d, but n1 might be larger than d1. */
3921 1.1 mrg .global GLOBAL(udiv_qrnnd_16)
3922 1.1 mrg .balign 8
3923 1.1 mrg GLOBAL(udiv_qrnnd_16):
3924 1.1 mrg div0u
3925 1.1 mrg cmp/hi r6,r0
3926 1.1 mrg bt .Lots
3927 1.1 mrg .rept 16
3928 1.1 mrg div1 r6,r0
3929 1.1 mrg .endr
3930 1.1 mrg extu.w r0,r1
3931 1.1 mrg bt 0f
3932 1.1 mrg add r6,r0
3933 1.1 mrg 0: rotcl r1
3934 1.1 mrg mulu.w r1,r5
3935 1.1 mrg xtrct r4,r0
3936 1.1 mrg swap.w r0,r0
3937 1.1 mrg sts macl,r2
3938 1.1 mrg cmp/hs r2,r0
3939 1.1 mrg sub r2,r0
3940 1.1 mrg bt 0f
3941 1.1 mrg addc r5,r0
3942 1.1 mrg add #-1,r1
3943 1.1 mrg bt 0f
3944 1.1 mrg 1: add #-1,r1
3945 1.1 mrg rts
3946 1.1 mrg add r5,r0
3947 1.1 mrg .balign 8
3948 1.1 mrg .Lots:
3949 1.1 mrg sub r5,r0
3950 1.1 mrg swap.w r4,r1
3951 1.1 mrg xtrct r0,r1
3952 1.1 mrg clrt
3953 1.1 mrg mov r1,r0
3954 1.1 mrg addc r5,r0
3955 1.1 mrg mov #-1,r1
3956 1.1 mrg SL1(bf, 1b,
3957 1.1 mrg shlr16 r1)
3958 1.1 mrg 0: rts
3959 1.1 mrg nop
3960 1.1 mrg ENDFUNC(GLOBAL(udiv_qrnnd_16))
3961 1.1 mrg #endif /* !__SHMEDIA__ */
3962 1.1 mrg #endif /* L_udiv_qrnnd_16 */
3963