lib1funcs.S revision 1.3 1 1.3 mrg /* Copyright (C) 1994-2015 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is free software; you can redistribute it and/or modify it
4 1.1 mrg under the terms of the GNU General Public License as published by the
5 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
6 1.1 mrg later version.
7 1.1 mrg
8 1.1 mrg This file is distributed in the hope that it will be useful, but
9 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of
10 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 1.1 mrg General Public License for more details.
12 1.1 mrg
13 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
14 1.1 mrg permissions described in the GCC Runtime Library Exception, version
15 1.1 mrg 3.1, as published by the Free Software Foundation.
16 1.1 mrg
17 1.1 mrg You should have received a copy of the GNU General Public License and
18 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
19 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 1.1 mrg <http://www.gnu.org/licenses/>. */
21 1.1 mrg
22 1.1 mrg
23 1.1 mrg !! libgcc routines for the Renesas / SuperH SH CPUs.
24 1.1 mrg !! Contributed by Steve Chamberlain.
25 1.1 mrg !! sac@cygnus.com
26 1.1 mrg
27 1.1 mrg !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
28 1.1 mrg !! recoded in assembly by Toshiyasu Morita
29 1.1 mrg !! tm@netcom.com
30 1.1 mrg
31 1.1 mrg #if defined(__ELF__) && defined(__linux__)
32 1.1 mrg .section .note.GNU-stack,"",%progbits
33 1.1 mrg .previous
34 1.1 mrg #endif
35 1.1 mrg
36 1.1 mrg /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 1.1 mrg ELF local label prefixes by J"orn Rennecke
38 1.1 mrg amylaar (at) cygnus.com */
39 1.1 mrg
40 1.1 mrg #include "lib1funcs.h"
41 1.1 mrg
42 1.1 mrg /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
43 1.1 mrg so it is more convenient to define NO_FPSCR_VALUES here than to
44 1.1 mrg define it on the command line. */
45 1.1 mrg #if defined __vxworks && defined __PIC__
46 1.1 mrg #define NO_FPSCR_VALUES
47 1.1 mrg #endif
48 1.1 mrg
49 1.1 mrg #if ! __SH5__
50 1.1 mrg #ifdef L_ashiftrt
51 1.1 mrg .global GLOBAL(ashiftrt_r4_0)
52 1.1 mrg .global GLOBAL(ashiftrt_r4_1)
53 1.1 mrg .global GLOBAL(ashiftrt_r4_2)
54 1.1 mrg .global GLOBAL(ashiftrt_r4_3)
55 1.1 mrg .global GLOBAL(ashiftrt_r4_4)
56 1.1 mrg .global GLOBAL(ashiftrt_r4_5)
57 1.1 mrg .global GLOBAL(ashiftrt_r4_6)
58 1.1 mrg .global GLOBAL(ashiftrt_r4_7)
59 1.1 mrg .global GLOBAL(ashiftrt_r4_8)
60 1.1 mrg .global GLOBAL(ashiftrt_r4_9)
61 1.1 mrg .global GLOBAL(ashiftrt_r4_10)
62 1.1 mrg .global GLOBAL(ashiftrt_r4_11)
63 1.1 mrg .global GLOBAL(ashiftrt_r4_12)
64 1.1 mrg .global GLOBAL(ashiftrt_r4_13)
65 1.1 mrg .global GLOBAL(ashiftrt_r4_14)
66 1.1 mrg .global GLOBAL(ashiftrt_r4_15)
67 1.1 mrg .global GLOBAL(ashiftrt_r4_16)
68 1.1 mrg .global GLOBAL(ashiftrt_r4_17)
69 1.1 mrg .global GLOBAL(ashiftrt_r4_18)
70 1.1 mrg .global GLOBAL(ashiftrt_r4_19)
71 1.1 mrg .global GLOBAL(ashiftrt_r4_20)
72 1.1 mrg .global GLOBAL(ashiftrt_r4_21)
73 1.1 mrg .global GLOBAL(ashiftrt_r4_22)
74 1.1 mrg .global GLOBAL(ashiftrt_r4_23)
75 1.1 mrg .global GLOBAL(ashiftrt_r4_24)
76 1.1 mrg .global GLOBAL(ashiftrt_r4_25)
77 1.1 mrg .global GLOBAL(ashiftrt_r4_26)
78 1.1 mrg .global GLOBAL(ashiftrt_r4_27)
79 1.1 mrg .global GLOBAL(ashiftrt_r4_28)
80 1.1 mrg .global GLOBAL(ashiftrt_r4_29)
81 1.1 mrg .global GLOBAL(ashiftrt_r4_30)
82 1.1 mrg .global GLOBAL(ashiftrt_r4_31)
83 1.1 mrg .global GLOBAL(ashiftrt_r4_32)
84 1.1 mrg
85 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
86 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
87 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
88 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
89 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
90 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
91 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
92 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
93 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
94 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
95 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
96 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
97 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
98 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
99 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
100 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
101 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
102 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
103 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
104 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
105 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
106 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
107 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
108 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
109 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
110 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
111 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
112 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
113 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
114 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
115 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
116 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
117 1.1 mrg HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
118 1.1 mrg
119 1.1 mrg .align 1
120 1.1 mrg GLOBAL(ashiftrt_r4_32):
121 1.1 mrg GLOBAL(ashiftrt_r4_31):
122 1.1 mrg rotcl r4
123 1.1 mrg rts
124 1.1 mrg subc r4,r4
125 1.1 mrg
126 1.1 mrg GLOBAL(ashiftrt_r4_30):
127 1.1 mrg shar r4
128 1.1 mrg GLOBAL(ashiftrt_r4_29):
129 1.1 mrg shar r4
130 1.1 mrg GLOBAL(ashiftrt_r4_28):
131 1.1 mrg shar r4
132 1.1 mrg GLOBAL(ashiftrt_r4_27):
133 1.1 mrg shar r4
134 1.1 mrg GLOBAL(ashiftrt_r4_26):
135 1.1 mrg shar r4
136 1.1 mrg GLOBAL(ashiftrt_r4_25):
137 1.1 mrg shar r4
138 1.1 mrg GLOBAL(ashiftrt_r4_24):
139 1.1 mrg shlr16 r4
140 1.1 mrg shlr8 r4
141 1.1 mrg rts
142 1.1 mrg exts.b r4,r4
143 1.1 mrg
144 1.1 mrg GLOBAL(ashiftrt_r4_23):
145 1.1 mrg shar r4
146 1.1 mrg GLOBAL(ashiftrt_r4_22):
147 1.1 mrg shar r4
148 1.1 mrg GLOBAL(ashiftrt_r4_21):
149 1.1 mrg shar r4
150 1.1 mrg GLOBAL(ashiftrt_r4_20):
151 1.1 mrg shar r4
152 1.1 mrg GLOBAL(ashiftrt_r4_19):
153 1.1 mrg shar r4
154 1.1 mrg GLOBAL(ashiftrt_r4_18):
155 1.1 mrg shar r4
156 1.1 mrg GLOBAL(ashiftrt_r4_17):
157 1.1 mrg shar r4
158 1.1 mrg GLOBAL(ashiftrt_r4_16):
159 1.1 mrg shlr16 r4
160 1.1 mrg rts
161 1.1 mrg exts.w r4,r4
162 1.1 mrg
163 1.1 mrg GLOBAL(ashiftrt_r4_15):
164 1.1 mrg shar r4
165 1.1 mrg GLOBAL(ashiftrt_r4_14):
166 1.1 mrg shar r4
167 1.1 mrg GLOBAL(ashiftrt_r4_13):
168 1.1 mrg shar r4
169 1.1 mrg GLOBAL(ashiftrt_r4_12):
170 1.1 mrg shar r4
171 1.1 mrg GLOBAL(ashiftrt_r4_11):
172 1.1 mrg shar r4
173 1.1 mrg GLOBAL(ashiftrt_r4_10):
174 1.1 mrg shar r4
175 1.1 mrg GLOBAL(ashiftrt_r4_9):
176 1.1 mrg shar r4
177 1.1 mrg GLOBAL(ashiftrt_r4_8):
178 1.1 mrg shar r4
179 1.1 mrg GLOBAL(ashiftrt_r4_7):
180 1.1 mrg shar r4
181 1.1 mrg GLOBAL(ashiftrt_r4_6):
182 1.1 mrg shar r4
183 1.1 mrg GLOBAL(ashiftrt_r4_5):
184 1.1 mrg shar r4
185 1.1 mrg GLOBAL(ashiftrt_r4_4):
186 1.1 mrg shar r4
187 1.1 mrg GLOBAL(ashiftrt_r4_3):
188 1.1 mrg shar r4
189 1.1 mrg GLOBAL(ashiftrt_r4_2):
190 1.1 mrg shar r4
191 1.1 mrg GLOBAL(ashiftrt_r4_1):
192 1.1 mrg rts
193 1.1 mrg shar r4
194 1.1 mrg
195 1.1 mrg GLOBAL(ashiftrt_r4_0):
196 1.1 mrg rts
197 1.1 mrg nop
198 1.1 mrg
199 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_0))
200 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_1))
201 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_2))
202 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_3))
203 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_4))
204 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_5))
205 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_6))
206 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_7))
207 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_8))
208 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_9))
209 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_10))
210 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_11))
211 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_12))
212 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_13))
213 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_14))
214 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_15))
215 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_16))
216 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_17))
217 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_18))
218 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_19))
219 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_20))
220 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_21))
221 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_22))
222 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_23))
223 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_24))
224 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_25))
225 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_26))
226 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_27))
227 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_28))
228 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_29))
229 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_30))
230 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_31))
231 1.1 mrg ENDFUNC(GLOBAL(ashiftrt_r4_32))
232 1.1 mrg #endif
233 1.1 mrg
234 1.1 mrg #ifdef L_ashiftrt_n
235 1.1 mrg
236 1.1 mrg !
237 1.1 mrg ! GLOBAL(ashrsi3)
238 1.1 mrg !
239 1.1 mrg ! Entry:
240 1.1 mrg !
241 1.1 mrg ! r4: Value to shift
242 1.1 mrg ! r5: Shift count
243 1.1 mrg !
244 1.1 mrg ! Exit:
245 1.1 mrg !
246 1.1 mrg ! r0: Result
247 1.1 mrg !
248 1.1 mrg ! Destroys:
249 1.1 mrg !
250 1.1 mrg ! T bit, r5
251 1.1 mrg !
252 1.1 mrg
253 1.1 mrg .global GLOBAL(ashrsi3)
254 1.1 mrg HIDDEN_FUNC(GLOBAL(ashrsi3))
255 1.1 mrg .align 2
256 1.1 mrg GLOBAL(ashrsi3):
257 1.1 mrg mov #31,r0
258 1.1 mrg and r0,r5
259 1.1 mrg mova LOCAL(ashrsi3_table),r0
260 1.1 mrg mov.b @(r0,r5),r5
261 1.1 mrg #ifdef __sh1__
262 1.1 mrg add r5,r0
263 1.1 mrg jmp @r0
264 1.1 mrg #else
265 1.1 mrg braf r5
266 1.1 mrg #endif
267 1.1 mrg mov r4,r0
268 1.1 mrg
269 1.1 mrg .align 2
270 1.1 mrg LOCAL(ashrsi3_table):
271 1.1 mrg .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
272 1.1 mrg .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
273 1.1 mrg .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
274 1.1 mrg .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
275 1.1 mrg .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
276 1.1 mrg .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
277 1.1 mrg .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
278 1.1 mrg .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
279 1.1 mrg .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
280 1.1 mrg .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
281 1.1 mrg .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
282 1.1 mrg .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
283 1.1 mrg .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
284 1.1 mrg .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
285 1.1 mrg .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
286 1.1 mrg .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
287 1.1 mrg .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
288 1.1 mrg .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
289 1.1 mrg .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
290 1.1 mrg .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
291 1.1 mrg .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
292 1.1 mrg .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
293 1.1 mrg .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
294 1.1 mrg .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
295 1.1 mrg .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
296 1.1 mrg .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
297 1.1 mrg .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
298 1.1 mrg .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
299 1.1 mrg .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
300 1.1 mrg .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
301 1.1 mrg .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
302 1.1 mrg .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
303 1.1 mrg
304 1.1 mrg LOCAL(ashrsi3_31):
305 1.1 mrg rotcl r0
306 1.1 mrg rts
307 1.1 mrg subc r0,r0
308 1.1 mrg
309 1.1 mrg LOCAL(ashrsi3_30):
310 1.1 mrg shar r0
311 1.1 mrg LOCAL(ashrsi3_29):
312 1.1 mrg shar r0
313 1.1 mrg LOCAL(ashrsi3_28):
314 1.1 mrg shar r0
315 1.1 mrg LOCAL(ashrsi3_27):
316 1.1 mrg shar r0
317 1.1 mrg LOCAL(ashrsi3_26):
318 1.1 mrg shar r0
319 1.1 mrg LOCAL(ashrsi3_25):
320 1.1 mrg shar r0
321 1.1 mrg LOCAL(ashrsi3_24):
322 1.1 mrg shlr16 r0
323 1.1 mrg shlr8 r0
324 1.1 mrg rts
325 1.1 mrg exts.b r0,r0
326 1.1 mrg
327 1.1 mrg LOCAL(ashrsi3_23):
328 1.1 mrg shar r0
329 1.1 mrg LOCAL(ashrsi3_22):
330 1.1 mrg shar r0
331 1.1 mrg LOCAL(ashrsi3_21):
332 1.1 mrg shar r0
333 1.1 mrg LOCAL(ashrsi3_20):
334 1.1 mrg shar r0
335 1.1 mrg LOCAL(ashrsi3_19):
336 1.1 mrg shar r0
337 1.1 mrg LOCAL(ashrsi3_18):
338 1.1 mrg shar r0
339 1.1 mrg LOCAL(ashrsi3_17):
340 1.1 mrg shar r0
341 1.1 mrg LOCAL(ashrsi3_16):
342 1.1 mrg shlr16 r0
343 1.1 mrg rts
344 1.1 mrg exts.w r0,r0
345 1.1 mrg
346 1.1 mrg LOCAL(ashrsi3_15):
347 1.1 mrg shar r0
348 1.1 mrg LOCAL(ashrsi3_14):
349 1.1 mrg shar r0
350 1.1 mrg LOCAL(ashrsi3_13):
351 1.1 mrg shar r0
352 1.1 mrg LOCAL(ashrsi3_12):
353 1.1 mrg shar r0
354 1.1 mrg LOCAL(ashrsi3_11):
355 1.1 mrg shar r0
356 1.1 mrg LOCAL(ashrsi3_10):
357 1.1 mrg shar r0
358 1.1 mrg LOCAL(ashrsi3_9):
359 1.1 mrg shar r0
360 1.1 mrg LOCAL(ashrsi3_8):
361 1.1 mrg shar r0
362 1.1 mrg LOCAL(ashrsi3_7):
363 1.1 mrg shar r0
364 1.1 mrg LOCAL(ashrsi3_6):
365 1.1 mrg shar r0
366 1.1 mrg LOCAL(ashrsi3_5):
367 1.1 mrg shar r0
368 1.1 mrg LOCAL(ashrsi3_4):
369 1.1 mrg shar r0
370 1.1 mrg LOCAL(ashrsi3_3):
371 1.1 mrg shar r0
372 1.1 mrg LOCAL(ashrsi3_2):
373 1.1 mrg shar r0
374 1.1 mrg LOCAL(ashrsi3_1):
375 1.1 mrg rts
376 1.1 mrg shar r0
377 1.1 mrg
378 1.1 mrg LOCAL(ashrsi3_0):
379 1.1 mrg rts
380 1.1 mrg nop
381 1.1 mrg
382 1.1 mrg ENDFUNC(GLOBAL(ashrsi3))
383 1.1 mrg #endif
384 1.1 mrg
385 1.1 mrg #ifdef L_ashiftlt
386 1.1 mrg
387 1.1 mrg !
388 1.1 mrg ! GLOBAL(ashlsi3)
389 1.1 mrg ! (For compatibility with older binaries, not used by compiler)
390 1.1 mrg !
391 1.1 mrg ! Entry:
392 1.1 mrg ! r4: Value to shift
393 1.1 mrg ! r5: Shift count
394 1.1 mrg !
395 1.1 mrg ! Exit:
396 1.1 mrg ! r0: Result
397 1.1 mrg !
398 1.1 mrg ! Destroys:
399 1.1 mrg ! T bit
400 1.1 mrg !
401 1.1 mrg !
402 1.1 mrg ! GLOBAL(ashlsi3_r0)
403 1.1 mrg !
404 1.1 mrg ! Entry:
405 1.1 mrg ! r4: Value to shift
406 1.1 mrg ! r0: Shift count
407 1.1 mrg !
408 1.1 mrg ! Exit:
409 1.1 mrg ! r0: Result
410 1.1 mrg !
411 1.1 mrg ! Destroys:
412 1.1 mrg ! T bit
413 1.1 mrg
414 1.1 mrg .global GLOBAL(ashlsi3)
415 1.1 mrg .global GLOBAL(ashlsi3_r0)
416 1.1 mrg HIDDEN_FUNC(GLOBAL(ashlsi3))
417 1.1 mrg HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
418 1.1 mrg GLOBAL(ashlsi3):
419 1.1 mrg mov r5,r0
420 1.1 mrg .align 2
421 1.1 mrg GLOBAL(ashlsi3_r0):
422 1.1 mrg
423 1.1 mrg #ifdef __sh1__
424 1.1 mrg and #31,r0
425 1.1 mrg shll2 r0
426 1.1 mrg mov.l r4,@-r15
427 1.1 mrg mov r0,r4
428 1.1 mrg mova LOCAL(ashlsi3_table),r0
429 1.1 mrg add r4,r0
430 1.1 mrg mov.l @r15+,r4
431 1.1 mrg jmp @r0
432 1.1 mrg mov r4,r0
433 1.1 mrg .align 2
434 1.1 mrg #else
435 1.1 mrg and #31,r0
436 1.1 mrg shll2 r0
437 1.1 mrg braf r0
438 1.1 mrg mov r4,r0
439 1.1 mrg #endif
440 1.1 mrg
441 1.1 mrg LOCAL(ashlsi3_table):
442 1.1 mrg rts // << 0
443 1.1 mrg nop
444 1.1 mrg LOCAL(ashlsi_1):
445 1.1 mrg rts // << 1
446 1.1 mrg shll r0
447 1.1 mrg LOCAL(ashlsi_2): // << 2
448 1.1 mrg rts
449 1.1 mrg shll2 r0
450 1.1 mrg bra LOCAL(ashlsi_1) // << 3
451 1.1 mrg shll2 r0
452 1.1 mrg bra LOCAL(ashlsi_2) // << 4
453 1.1 mrg shll2 r0
454 1.1 mrg bra LOCAL(ashlsi_5) // << 5
455 1.1 mrg shll r0
456 1.1 mrg bra LOCAL(ashlsi_6) // << 6
457 1.1 mrg shll2 r0
458 1.1 mrg bra LOCAL(ashlsi_7) // << 7
459 1.1 mrg shll r0
460 1.1 mrg LOCAL(ashlsi_8): // << 8
461 1.1 mrg rts
462 1.1 mrg shll8 r0
463 1.1 mrg bra LOCAL(ashlsi_8) // << 9
464 1.1 mrg shll r0
465 1.1 mrg bra LOCAL(ashlsi_8) // << 10
466 1.1 mrg shll2 r0
467 1.1 mrg bra LOCAL(ashlsi_11) // << 11
468 1.1 mrg shll r0
469 1.1 mrg bra LOCAL(ashlsi_12) // << 12
470 1.1 mrg shll2 r0
471 1.1 mrg bra LOCAL(ashlsi_13) // << 13
472 1.1 mrg shll r0
473 1.1 mrg bra LOCAL(ashlsi_14) // << 14
474 1.1 mrg shll8 r0
475 1.1 mrg bra LOCAL(ashlsi_15) // << 15
476 1.1 mrg shll8 r0
477 1.1 mrg LOCAL(ashlsi_16): // << 16
478 1.1 mrg rts
479 1.1 mrg shll16 r0
480 1.1 mrg bra LOCAL(ashlsi_16) // << 17
481 1.1 mrg shll r0
482 1.1 mrg bra LOCAL(ashlsi_16) // << 18
483 1.1 mrg shll2 r0
484 1.1 mrg bra LOCAL(ashlsi_19) // << 19
485 1.1 mrg shll r0
486 1.1 mrg bra LOCAL(ashlsi_20) // << 20
487 1.1 mrg shll2 r0
488 1.1 mrg bra LOCAL(ashlsi_21) // << 21
489 1.1 mrg shll r0
490 1.1 mrg bra LOCAL(ashlsi_22) // << 22
491 1.1 mrg shll16 r0
492 1.1 mrg bra LOCAL(ashlsi_23) // << 23
493 1.1 mrg shll16 r0
494 1.1 mrg bra LOCAL(ashlsi_16) // << 24
495 1.1 mrg shll8 r0
496 1.1 mrg bra LOCAL(ashlsi_25) // << 25
497 1.1 mrg shll r0
498 1.1 mrg bra LOCAL(ashlsi_26) // << 26
499 1.1 mrg shll2 r0
500 1.1 mrg bra LOCAL(ashlsi_27) // << 27
501 1.1 mrg shll r0
502 1.1 mrg bra LOCAL(ashlsi_28) // << 28
503 1.1 mrg shll2 r0
504 1.1 mrg bra LOCAL(ashlsi_29) // << 29
505 1.1 mrg shll16 r0
506 1.1 mrg bra LOCAL(ashlsi_30) // << 30
507 1.1 mrg shll16 r0
508 1.1 mrg and #1,r0 // << 31
509 1.1 mrg rts
510 1.1 mrg rotr r0
511 1.1 mrg
512 1.1 mrg LOCAL(ashlsi_7):
513 1.1 mrg shll2 r0
514 1.1 mrg LOCAL(ashlsi_5):
515 1.1 mrg LOCAL(ashlsi_6):
516 1.1 mrg shll2 r0
517 1.1 mrg rts
518 1.1 mrg LOCAL(ashlsi_13):
519 1.1 mrg shll2 r0
520 1.1 mrg LOCAL(ashlsi_12):
521 1.1 mrg LOCAL(ashlsi_11):
522 1.1 mrg shll8 r0
523 1.1 mrg rts
524 1.1 mrg LOCAL(ashlsi_21):
525 1.1 mrg shll2 r0
526 1.1 mrg LOCAL(ashlsi_20):
527 1.1 mrg LOCAL(ashlsi_19):
528 1.1 mrg shll16 r0
529 1.1 mrg rts
530 1.1 mrg LOCAL(ashlsi_28):
531 1.1 mrg LOCAL(ashlsi_27):
532 1.1 mrg shll2 r0
533 1.1 mrg LOCAL(ashlsi_26):
534 1.1 mrg LOCAL(ashlsi_25):
535 1.1 mrg shll16 r0
536 1.1 mrg rts
537 1.1 mrg shll8 r0
538 1.1 mrg
539 1.1 mrg LOCAL(ashlsi_22):
540 1.1 mrg LOCAL(ashlsi_14):
541 1.1 mrg shlr2 r0
542 1.1 mrg rts
543 1.1 mrg shll8 r0
544 1.1 mrg
545 1.1 mrg LOCAL(ashlsi_23):
546 1.1 mrg LOCAL(ashlsi_15):
547 1.1 mrg shlr r0
548 1.1 mrg rts
549 1.1 mrg shll8 r0
550 1.1 mrg
551 1.1 mrg LOCAL(ashlsi_29):
552 1.1 mrg shlr r0
553 1.1 mrg LOCAL(ashlsi_30):
554 1.1 mrg shlr2 r0
555 1.1 mrg rts
556 1.1 mrg shll16 r0
557 1.1 mrg
558 1.1 mrg ENDFUNC(GLOBAL(ashlsi3))
559 1.1 mrg ENDFUNC(GLOBAL(ashlsi3_r0))
560 1.1 mrg #endif
561 1.1 mrg
562 1.1 mrg #ifdef L_lshiftrt
563 1.1 mrg
564 1.1 mrg !
565 1.1 mrg ! GLOBAL(lshrsi3)
566 1.1 mrg ! (For compatibility with older binaries, not used by compiler)
567 1.1 mrg !
568 1.1 mrg ! Entry:
569 1.1 mrg ! r4: Value to shift
570 1.1 mrg ! r5: Shift count
571 1.1 mrg !
572 1.1 mrg ! Exit:
573 1.1 mrg ! r0: Result
574 1.1 mrg !
575 1.1 mrg ! Destroys:
576 1.1 mrg ! T bit
577 1.1 mrg !
578 1.1 mrg !
579 1.1 mrg ! GLOBAL(lshrsi3_r0)
580 1.1 mrg !
581 1.1 mrg ! Entry:
582 1.1 mrg ! r4: Value to shift
583 1.1 mrg ! r0: Shift count
584 1.1 mrg !
585 1.1 mrg ! Exit:
586 1.1 mrg ! r0: Result
587 1.1 mrg !
588 1.1 mrg ! Destroys:
589 1.1 mrg ! T bit
590 1.1 mrg
591 1.1 mrg .global GLOBAL(lshrsi3)
592 1.1 mrg .global GLOBAL(lshrsi3_r0)
593 1.1 mrg HIDDEN_FUNC(GLOBAL(lshrsi3))
594 1.1 mrg HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
595 1.1 mrg GLOBAL(lshrsi3):
596 1.1 mrg mov r5,r0
597 1.1 mrg .align 2
598 1.1 mrg GLOBAL(lshrsi3_r0):
599 1.1 mrg
600 1.1 mrg #ifdef __sh1__
601 1.1 mrg and #31,r0
602 1.1 mrg shll2 r0
603 1.1 mrg mov.l r4,@-r15
604 1.1 mrg mov r0,r4
605 1.1 mrg mova LOCAL(lshrsi3_table),r0
606 1.1 mrg add r4,r0
607 1.1 mrg mov.l @r15+,r4
608 1.1 mrg jmp @r0
609 1.1 mrg mov r4,r0
610 1.1 mrg .align 2
611 1.1 mrg #else
612 1.1 mrg and #31,r0
613 1.1 mrg shll2 r0
614 1.1 mrg braf r0
615 1.1 mrg mov r4,r0
616 1.1 mrg #endif
617 1.1 mrg LOCAL(lshrsi3_table):
618 1.1 mrg rts // >> 0
619 1.1 mrg nop
620 1.1 mrg LOCAL(lshrsi_1): // >> 1
621 1.1 mrg rts
622 1.1 mrg shlr r0
623 1.1 mrg LOCAL(lshrsi_2): // >> 2
624 1.1 mrg rts
625 1.1 mrg shlr2 r0
626 1.1 mrg bra LOCAL(lshrsi_1) // >> 3
627 1.1 mrg shlr2 r0
628 1.1 mrg bra LOCAL(lshrsi_2) // >> 4
629 1.1 mrg shlr2 r0
630 1.1 mrg bra LOCAL(lshrsi_5) // >> 5
631 1.1 mrg shlr r0
632 1.1 mrg bra LOCAL(lshrsi_6) // >> 6
633 1.1 mrg shlr2 r0
634 1.1 mrg bra LOCAL(lshrsi_7) // >> 7
635 1.1 mrg shlr r0
636 1.1 mrg LOCAL(lshrsi_8): // >> 8
637 1.1 mrg rts
638 1.1 mrg shlr8 r0
639 1.1 mrg bra LOCAL(lshrsi_8) // >> 9
640 1.1 mrg shlr r0
641 1.1 mrg bra LOCAL(lshrsi_8) // >> 10
642 1.1 mrg shlr2 r0
643 1.1 mrg bra LOCAL(lshrsi_11) // >> 11
644 1.1 mrg shlr r0
645 1.1 mrg bra LOCAL(lshrsi_12) // >> 12
646 1.1 mrg shlr2 r0
647 1.1 mrg bra LOCAL(lshrsi_13) // >> 13
648 1.1 mrg shlr r0
649 1.1 mrg bra LOCAL(lshrsi_14) // >> 14
650 1.1 mrg shlr8 r0
651 1.1 mrg bra LOCAL(lshrsi_15) // >> 15
652 1.1 mrg shlr8 r0
653 1.1 mrg LOCAL(lshrsi_16): // >> 16
654 1.1 mrg rts
655 1.1 mrg shlr16 r0
656 1.1 mrg bra LOCAL(lshrsi_16) // >> 17
657 1.1 mrg shlr r0
658 1.1 mrg bra LOCAL(lshrsi_16) // >> 18
659 1.1 mrg shlr2 r0
660 1.1 mrg bra LOCAL(lshrsi_19) // >> 19
661 1.1 mrg shlr r0
662 1.1 mrg bra LOCAL(lshrsi_20) // >> 20
663 1.1 mrg shlr2 r0
664 1.1 mrg bra LOCAL(lshrsi_21) // >> 21
665 1.1 mrg shlr r0
666 1.1 mrg bra LOCAL(lshrsi_22) // >> 22
667 1.1 mrg shlr16 r0
668 1.1 mrg bra LOCAL(lshrsi_23) // >> 23
669 1.1 mrg shlr16 r0
670 1.1 mrg bra LOCAL(lshrsi_16) // >> 24
671 1.1 mrg shlr8 r0
672 1.1 mrg bra LOCAL(lshrsi_25) // >> 25
673 1.1 mrg shlr r0
674 1.1 mrg bra LOCAL(lshrsi_26) // >> 26
675 1.1 mrg shlr2 r0
676 1.1 mrg bra LOCAL(lshrsi_27) // >> 27
677 1.1 mrg shlr r0
678 1.1 mrg bra LOCAL(lshrsi_28) // >> 28
679 1.1 mrg shlr2 r0
680 1.1 mrg bra LOCAL(lshrsi_29) // >> 29
681 1.1 mrg shlr16 r0
682 1.1 mrg bra LOCAL(lshrsi_30) // >> 30
683 1.1 mrg shlr16 r0
684 1.1 mrg shll r0 // >> 31
685 1.1 mrg rts
686 1.1 mrg movt r0
687 1.1 mrg
688 1.1 mrg LOCAL(lshrsi_7):
689 1.1 mrg shlr2 r0
690 1.1 mrg LOCAL(lshrsi_5):
691 1.1 mrg LOCAL(lshrsi_6):
692 1.1 mrg shlr2 r0
693 1.1 mrg rts
694 1.1 mrg LOCAL(lshrsi_13):
695 1.1 mrg shlr2 r0
696 1.1 mrg LOCAL(lshrsi_12):
697 1.1 mrg LOCAL(lshrsi_11):
698 1.1 mrg shlr8 r0
699 1.1 mrg rts
700 1.1 mrg LOCAL(lshrsi_21):
701 1.1 mrg shlr2 r0
702 1.1 mrg LOCAL(lshrsi_20):
703 1.1 mrg LOCAL(lshrsi_19):
704 1.1 mrg shlr16 r0
705 1.1 mrg rts
706 1.1 mrg LOCAL(lshrsi_28):
707 1.1 mrg LOCAL(lshrsi_27):
708 1.1 mrg shlr2 r0
709 1.1 mrg LOCAL(lshrsi_26):
710 1.1 mrg LOCAL(lshrsi_25):
711 1.1 mrg shlr16 r0
712 1.1 mrg rts
713 1.1 mrg shlr8 r0
714 1.1 mrg
715 1.1 mrg LOCAL(lshrsi_22):
716 1.1 mrg LOCAL(lshrsi_14):
717 1.1 mrg shll2 r0
718 1.1 mrg rts
719 1.1 mrg shlr8 r0
720 1.1 mrg
721 1.1 mrg LOCAL(lshrsi_23):
722 1.1 mrg LOCAL(lshrsi_15):
723 1.1 mrg shll r0
724 1.1 mrg rts
725 1.1 mrg shlr8 r0
726 1.1 mrg
727 1.1 mrg LOCAL(lshrsi_29):
728 1.1 mrg shll r0
729 1.1 mrg LOCAL(lshrsi_30):
730 1.1 mrg shll2 r0
731 1.1 mrg rts
732 1.1 mrg shlr16 r0
733 1.1 mrg
734 1.1 mrg ENDFUNC(GLOBAL(lshrsi3))
735 1.1 mrg ENDFUNC(GLOBAL(lshrsi3_r0))
736 1.1 mrg #endif
737 1.1 mrg
738 1.1 mrg #ifdef L_movmem
739 1.1 mrg .text
740 1.1 mrg .balign 4
741 1.1 mrg .global GLOBAL(movmem)
742 1.1 mrg HIDDEN_FUNC(GLOBAL(movmem))
743 1.1 mrg HIDDEN_ALIAS(movstr,movmem)
744 1.1 mrg /* This would be a lot simpler if r6 contained the byte count
745 1.1 mrg minus 64, and we wouldn't be called here for a byte count of 64. */
746 1.1 mrg GLOBAL(movmem):
747 1.1 mrg sts.l pr,@-r15
748 1.1 mrg shll2 r6
749 1.1 mrg bsr GLOBAL(movmemSI52+2)
750 1.1 mrg mov.l @(48,r5),r0
751 1.1 mrg .balign 4
752 1.1 mrg LOCAL(movmem_loop): /* Reached with rts */
753 1.1 mrg mov.l @(60,r5),r0
754 1.1 mrg add #-64,r6
755 1.1 mrg mov.l r0,@(60,r4)
756 1.1 mrg tst r6,r6
757 1.1 mrg mov.l @(56,r5),r0
758 1.1 mrg bt LOCAL(movmem_done)
759 1.1 mrg mov.l r0,@(56,r4)
760 1.1 mrg cmp/pl r6
761 1.1 mrg mov.l @(52,r5),r0
762 1.1 mrg add #64,r5
763 1.1 mrg mov.l r0,@(52,r4)
764 1.1 mrg add #64,r4
765 1.1 mrg bt GLOBAL(movmemSI52)
766 1.1 mrg ! done all the large groups, do the remainder
767 1.1 mrg ! jump to movmem+
768 1.1 mrg mova GLOBAL(movmemSI4)+4,r0
769 1.1 mrg add r6,r0
770 1.1 mrg jmp @r0
771 1.1 mrg LOCAL(movmem_done): ! share slot insn, works out aligned.
772 1.1 mrg lds.l @r15+,pr
773 1.1 mrg mov.l r0,@(56,r4)
774 1.1 mrg mov.l @(52,r5),r0
775 1.1 mrg rts
776 1.1 mrg mov.l r0,@(52,r4)
777 1.1 mrg .balign 4
778 1.1 mrg ! ??? We need aliases movstr* for movmem* for the older libraries. These
779 1.1 mrg ! aliases will be removed at the some point in the future.
780 1.1 mrg .global GLOBAL(movmemSI64)
781 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI64))
782 1.1 mrg HIDDEN_ALIAS(movstrSI64,movmemSI64)
783 1.1 mrg GLOBAL(movmemSI64):
784 1.1 mrg mov.l @(60,r5),r0
785 1.1 mrg mov.l r0,@(60,r4)
786 1.1 mrg .global GLOBAL(movmemSI60)
787 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI60))
788 1.1 mrg HIDDEN_ALIAS(movstrSI60,movmemSI60)
789 1.1 mrg GLOBAL(movmemSI60):
790 1.1 mrg mov.l @(56,r5),r0
791 1.1 mrg mov.l r0,@(56,r4)
792 1.1 mrg .global GLOBAL(movmemSI56)
793 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI56))
794 1.1 mrg HIDDEN_ALIAS(movstrSI56,movmemSI56)
795 1.1 mrg GLOBAL(movmemSI56):
796 1.1 mrg mov.l @(52,r5),r0
797 1.1 mrg mov.l r0,@(52,r4)
798 1.1 mrg .global GLOBAL(movmemSI52)
799 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI52))
800 1.1 mrg HIDDEN_ALIAS(movstrSI52,movmemSI52)
801 1.1 mrg GLOBAL(movmemSI52):
802 1.1 mrg mov.l @(48,r5),r0
803 1.1 mrg mov.l r0,@(48,r4)
804 1.1 mrg .global GLOBAL(movmemSI48)
805 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI48))
806 1.1 mrg HIDDEN_ALIAS(movstrSI48,movmemSI48)
807 1.1 mrg GLOBAL(movmemSI48):
808 1.1 mrg mov.l @(44,r5),r0
809 1.1 mrg mov.l r0,@(44,r4)
810 1.1 mrg .global GLOBAL(movmemSI44)
811 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI44))
812 1.1 mrg HIDDEN_ALIAS(movstrSI44,movmemSI44)
813 1.1 mrg GLOBAL(movmemSI44):
814 1.1 mrg mov.l @(40,r5),r0
815 1.1 mrg mov.l r0,@(40,r4)
816 1.1 mrg .global GLOBAL(movmemSI40)
817 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI40))
818 1.1 mrg HIDDEN_ALIAS(movstrSI40,movmemSI40)
819 1.1 mrg GLOBAL(movmemSI40):
820 1.1 mrg mov.l @(36,r5),r0
821 1.1 mrg mov.l r0,@(36,r4)
822 1.1 mrg .global GLOBAL(movmemSI36)
823 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI36))
824 1.1 mrg HIDDEN_ALIAS(movstrSI36,movmemSI36)
825 1.1 mrg GLOBAL(movmemSI36):
826 1.1 mrg mov.l @(32,r5),r0
827 1.1 mrg mov.l r0,@(32,r4)
828 1.1 mrg .global GLOBAL(movmemSI32)
829 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI32))
830 1.1 mrg HIDDEN_ALIAS(movstrSI32,movmemSI32)
831 1.1 mrg GLOBAL(movmemSI32):
832 1.1 mrg mov.l @(28,r5),r0
833 1.1 mrg mov.l r0,@(28,r4)
834 1.1 mrg .global GLOBAL(movmemSI28)
835 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI28))
836 1.1 mrg HIDDEN_ALIAS(movstrSI28,movmemSI28)
837 1.1 mrg GLOBAL(movmemSI28):
838 1.1 mrg mov.l @(24,r5),r0
839 1.1 mrg mov.l r0,@(24,r4)
840 1.1 mrg .global GLOBAL(movmemSI24)
841 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI24))
842 1.1 mrg HIDDEN_ALIAS(movstrSI24,movmemSI24)
843 1.1 mrg GLOBAL(movmemSI24):
844 1.1 mrg mov.l @(20,r5),r0
845 1.1 mrg mov.l r0,@(20,r4)
846 1.1 mrg .global GLOBAL(movmemSI20)
847 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI20))
848 1.1 mrg HIDDEN_ALIAS(movstrSI20,movmemSI20)
849 1.1 mrg GLOBAL(movmemSI20):
850 1.1 mrg mov.l @(16,r5),r0
851 1.1 mrg mov.l r0,@(16,r4)
852 1.1 mrg .global GLOBAL(movmemSI16)
853 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI16))
854 1.1 mrg HIDDEN_ALIAS(movstrSI16,movmemSI16)
855 1.1 mrg GLOBAL(movmemSI16):
856 1.1 mrg mov.l @(12,r5),r0
857 1.1 mrg mov.l r0,@(12,r4)
858 1.1 mrg .global GLOBAL(movmemSI12)
859 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI12))
860 1.1 mrg HIDDEN_ALIAS(movstrSI12,movmemSI12)
861 1.1 mrg GLOBAL(movmemSI12):
862 1.1 mrg mov.l @(8,r5),r0
863 1.1 mrg mov.l r0,@(8,r4)
864 1.1 mrg .global GLOBAL(movmemSI8)
865 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI8))
866 1.1 mrg HIDDEN_ALIAS(movstrSI8,movmemSI8)
867 1.1 mrg GLOBAL(movmemSI8):
868 1.1 mrg mov.l @(4,r5),r0
869 1.1 mrg mov.l r0,@(4,r4)
870 1.1 mrg .global GLOBAL(movmemSI4)
871 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI4))
872 1.1 mrg HIDDEN_ALIAS(movstrSI4,movmemSI4)
873 1.1 mrg GLOBAL(movmemSI4):
874 1.1 mrg mov.l @(0,r5),r0
875 1.1 mrg rts
876 1.1 mrg mov.l r0,@(0,r4)
877 1.1 mrg
878 1.1 mrg ENDFUNC(GLOBAL(movmemSI64))
879 1.1 mrg ENDFUNC(GLOBAL(movmemSI60))
880 1.1 mrg ENDFUNC(GLOBAL(movmemSI56))
881 1.1 mrg ENDFUNC(GLOBAL(movmemSI52))
882 1.1 mrg ENDFUNC(GLOBAL(movmemSI48))
883 1.1 mrg ENDFUNC(GLOBAL(movmemSI44))
884 1.1 mrg ENDFUNC(GLOBAL(movmemSI40))
885 1.1 mrg ENDFUNC(GLOBAL(movmemSI36))
886 1.1 mrg ENDFUNC(GLOBAL(movmemSI32))
887 1.1 mrg ENDFUNC(GLOBAL(movmemSI28))
888 1.1 mrg ENDFUNC(GLOBAL(movmemSI24))
889 1.1 mrg ENDFUNC(GLOBAL(movmemSI20))
890 1.1 mrg ENDFUNC(GLOBAL(movmemSI16))
891 1.1 mrg ENDFUNC(GLOBAL(movmemSI12))
892 1.1 mrg ENDFUNC(GLOBAL(movmemSI8))
893 1.1 mrg ENDFUNC(GLOBAL(movmemSI4))
894 1.1 mrg ENDFUNC(GLOBAL(movmem))
895 1.1 mrg #endif
896 1.1 mrg
897 1.1 mrg #ifdef L_movmem_i4
898 1.1 mrg .text
899 1.1 mrg .global GLOBAL(movmem_i4_even)
900 1.1 mrg .global GLOBAL(movmem_i4_odd)
901 1.1 mrg .global GLOBAL(movmemSI12_i4)
902 1.1 mrg
903 1.1 mrg HIDDEN_FUNC(GLOBAL(movmem_i4_even))
904 1.1 mrg HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
905 1.1 mrg HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
906 1.1 mrg
907 1.1 mrg HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
908 1.1 mrg HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
909 1.1 mrg HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
910 1.1 mrg
911 1.1 mrg .p2align 5
912 1.1 mrg L_movmem_2mod4_end:
913 1.1 mrg mov.l r0,@(16,r4)
914 1.1 mrg rts
915 1.1 mrg mov.l r1,@(20,r4)
916 1.1 mrg
917 1.1 mrg .p2align 2
918 1.1 mrg
919 1.1 mrg GLOBAL(movmem_i4_even):
920 1.1 mrg mov.l @r5+,r0
921 1.1 mrg bra L_movmem_start_even
922 1.1 mrg mov.l @r5+,r1
923 1.1 mrg
924 1.1 mrg GLOBAL(movmem_i4_odd):
925 1.1 mrg mov.l @r5+,r1
926 1.1 mrg add #-4,r4
927 1.1 mrg mov.l @r5+,r2
928 1.1 mrg mov.l @r5+,r3
929 1.1 mrg mov.l r1,@(4,r4)
930 1.1 mrg mov.l r2,@(8,r4)
931 1.1 mrg
932 1.1 mrg L_movmem_loop:
933 1.1 mrg mov.l r3,@(12,r4)
934 1.1 mrg dt r6
935 1.1 mrg mov.l @r5+,r0
936 1.1 mrg bt/s L_movmem_2mod4_end
937 1.1 mrg mov.l @r5+,r1
938 1.1 mrg add #16,r4
939 1.1 mrg L_movmem_start_even:
940 1.1 mrg mov.l @r5+,r2
941 1.1 mrg mov.l @r5+,r3
942 1.1 mrg mov.l r0,@r4
943 1.1 mrg dt r6
944 1.1 mrg mov.l r1,@(4,r4)
945 1.1 mrg bf/s L_movmem_loop
946 1.1 mrg mov.l r2,@(8,r4)
947 1.1 mrg rts
948 1.1 mrg mov.l r3,@(12,r4)
949 1.1 mrg
950 1.1 mrg ENDFUNC(GLOBAL(movmem_i4_even))
951 1.1 mrg ENDFUNC(GLOBAL(movmem_i4_odd))
952 1.1 mrg
953 1.1 mrg .p2align 4
954 1.1 mrg GLOBAL(movmemSI12_i4):
955 1.1 mrg mov.l @r5,r0
956 1.1 mrg mov.l @(4,r5),r1
957 1.1 mrg mov.l @(8,r5),r2
958 1.1 mrg mov.l r0,@r4
959 1.1 mrg mov.l r1,@(4,r4)
960 1.1 mrg rts
961 1.1 mrg mov.l r2,@(8,r4)
962 1.1 mrg
963 1.1 mrg ENDFUNC(GLOBAL(movmemSI12_i4))
964 1.1 mrg #endif
965 1.1 mrg
966 1.1 mrg #ifdef L_mulsi3
967 1.1 mrg
968 1.1 mrg
969 1.1 mrg .global GLOBAL(mulsi3)
970 1.1 mrg HIDDEN_FUNC(GLOBAL(mulsi3))
971 1.1 mrg
972 1.1 mrg ! r4 = aabb
973 1.1 mrg ! r5 = ccdd
974 1.1 mrg ! r0 = aabb*ccdd via partial products
975 1.1 mrg !
976 1.1 mrg ! if aa == 0 and cc = 0
977 1.1 mrg ! r0 = bb*dd
978 1.1 mrg !
979 1.1 mrg ! else
980 1.1 mrg ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
981 1.1 mrg !
982 1.1 mrg
983 1.1 mrg GLOBAL(mulsi3):
984 1.1 mrg mulu.w r4,r5 ! multiply the lsws macl=bb*dd
985 1.1 mrg mov r5,r3 ! r3 = ccdd
986 1.1 mrg swap.w r4,r2 ! r2 = bbaa
987 1.1 mrg xtrct r2,r3 ! r3 = aacc
988 1.1 mrg tst r3,r3 ! msws zero ?
989 1.1 mrg bf hiset
990 1.1 mrg rts ! yes - then we have the answer
991 1.1 mrg sts macl,r0
992 1.1 mrg
993 1.1 mrg hiset: sts macl,r0 ! r0 = bb*dd
994 1.1 mrg mulu.w r2,r5 ! brewing macl = aa*dd
995 1.1 mrg sts macl,r1
996 1.1 mrg mulu.w r3,r4 ! brewing macl = cc*bb
997 1.1 mrg sts macl,r2
998 1.1 mrg add r1,r2
999 1.1 mrg shll16 r2
1000 1.1 mrg rts
1001 1.1 mrg add r2,r0
1002 1.1 mrg
1003 1.1 mrg ENDFUNC(GLOBAL(mulsi3))
1004 1.1 mrg #endif
1005 1.1 mrg #endif /* ! __SH5__ */
1006 1.3 mrg
1007 1.3 mrg /*------------------------------------------------------------------------------
1008 1.3 mrg 32 bit signed integer division that uses FPU double precision division. */
1009 1.3 mrg
1010 1.1 mrg #ifdef L_sdivsi3_i4
1011 1.1 mrg .title "SH DIVIDE"
1012 1.3 mrg
1013 1.1 mrg #if defined (__SH4__) || defined (__SH2A__)
1014 1.3 mrg /* This variant is used when FPSCR.PR = 1 (double precision) is the default
1015 1.3 mrg setting.
1016 1.3 mrg Args in r4 and r5, result in fpul, clobber dr0, dr2. */
1017 1.1 mrg
1018 1.1 mrg .global GLOBAL(sdivsi3_i4)
1019 1.1 mrg HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1020 1.1 mrg GLOBAL(sdivsi3_i4):
1021 1.1 mrg lds r4,fpul
1022 1.1 mrg float fpul,dr0
1023 1.1 mrg lds r5,fpul
1024 1.1 mrg float fpul,dr2
1025 1.1 mrg fdiv dr2,dr0
1026 1.1 mrg rts
1027 1.1 mrg ftrc dr0,fpul
1028 1.1 mrg
1029 1.1 mrg ENDFUNC(GLOBAL(sdivsi3_i4))
1030 1.3 mrg
1031 1.1 mrg #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1032 1.3 mrg /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1033 1.3 mrg setting.
1034 1.3 mrg Args in r4 and r5, result in fpul, clobber r2, dr0, dr2.
1035 1.3 mrg For this to work, we must temporarily switch the FPU do double precision,
1036 1.3 mrg but we better do not touch FPSCR.FR. See PR 6526. */
1037 1.1 mrg
1038 1.1 mrg #if ! __SH5__ || __SH5__ == 32
1039 1.1 mrg #if __SH5__
1040 1.1 mrg .mode SHcompact
1041 1.1 mrg #endif
1042 1.1 mrg .global GLOBAL(sdivsi3_i4)
1043 1.1 mrg HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1044 1.1 mrg GLOBAL(sdivsi3_i4):
1045 1.3 mrg
1046 1.3 mrg #ifndef __SH4A__
1047 1.3 mrg mov.l r3,@-r15
1048 1.3 mrg sts fpscr,r2
1049 1.3 mrg mov #8,r3
1050 1.3 mrg swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit)
1051 1.3 mrg or r2,r3
1052 1.3 mrg lds r3,fpscr // Set FPSCR.PR = 1.
1053 1.3 mrg lds r4,fpul
1054 1.3 mrg float fpul,dr0
1055 1.3 mrg lds r5,fpul
1056 1.3 mrg float fpul,dr2
1057 1.3 mrg fdiv dr2,dr0
1058 1.3 mrg ftrc dr0,fpul
1059 1.3 mrg lds r2,fpscr
1060 1.3 mrg rts
1061 1.3 mrg mov.l @r15+,r3
1062 1.3 mrg #else
1063 1.3 mrg /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */
1064 1.3 mrg fpchg
1065 1.3 mrg lds r4,fpul
1066 1.3 mrg float fpul,dr0
1067 1.3 mrg lds r5,fpul
1068 1.3 mrg float fpul,dr2
1069 1.3 mrg fdiv dr2,dr0
1070 1.3 mrg ftrc dr0,fpul
1071 1.1 mrg rts
1072 1.3 mrg fpchg
1073 1.3 mrg
1074 1.3 mrg #endif /* __SH4A__ */
1075 1.1 mrg
1076 1.1 mrg ENDFUNC(GLOBAL(sdivsi3_i4))
1077 1.1 mrg #endif /* ! __SH5__ || __SH5__ == 32 */
1078 1.1 mrg #endif /* ! __SH4__ || __SH2A__ */
1079 1.3 mrg #endif /* L_sdivsi3_i4 */
1080 1.1 mrg
1081 1.3 mrg //------------------------------------------------------------------------------
1082 1.1 mrg #ifdef L_sdivsi3
1083 1.1 mrg /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1084 1.1 mrg sh2e/sh3e code. */
1085 1.1 mrg !!
1086 1.1 mrg !! Steve Chamberlain
1087 1.1 mrg !! sac@cygnus.com
1088 1.1 mrg !!
1089 1.1 mrg !!
1090 1.1 mrg
1091 1.1 mrg !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1092 1.1 mrg
1093 1.1 mrg .global GLOBAL(sdivsi3)
1094 1.1 mrg #if __SHMEDIA__
1095 1.1 mrg #if __SH5__ == 32
1096 1.1 mrg .section .text..SHmedia32,"ax"
1097 1.1 mrg #else
1098 1.1 mrg .text
1099 1.1 mrg #endif
1100 1.1 mrg .align 2
1101 1.1 mrg #if 0
1102 1.1 mrg /* The assembly code that follows is a hand-optimized version of the C
1103 1.1 mrg code that follows. Note that the registers that are modified are
1104 1.1 mrg exactly those listed as clobbered in the patterns divsi3_i1 and
1105 1.1 mrg divsi3_i1_media.
1106 1.1 mrg
1107 1.1 mrg int __sdivsi3 (i, j)
1108 1.1 mrg int i, j;
1109 1.1 mrg {
1110 1.1 mrg register unsigned long long r18 asm ("r18");
1111 1.1 mrg register unsigned long long r19 asm ("r19");
1112 1.1 mrg register unsigned long long r0 asm ("r0") = 0;
1113 1.1 mrg register unsigned long long r1 asm ("r1") = 1;
1114 1.1 mrg register int r2 asm ("r2") = i >> 31;
1115 1.1 mrg register int r3 asm ("r3") = j >> 31;
1116 1.1 mrg
1117 1.1 mrg r2 = r2 ? r2 : r1;
1118 1.1 mrg r3 = r3 ? r3 : r1;
1119 1.1 mrg r18 = i * r2;
1120 1.1 mrg r19 = j * r3;
1121 1.1 mrg r2 *= r3;
1122 1.1 mrg
1123 1.1 mrg r19 <<= 31;
1124 1.1 mrg r1 <<= 31;
1125 1.1 mrg do
1126 1.1 mrg if (r18 >= r19)
1127 1.1 mrg r0 |= r1, r18 -= r19;
1128 1.1 mrg while (r19 >>= 1, r1 >>= 1);
1129 1.1 mrg
1130 1.1 mrg return r2 * (int)r0;
1131 1.1 mrg }
1132 1.1 mrg */
1133 1.1 mrg GLOBAL(sdivsi3):
1134 1.1 mrg pt/l LOCAL(sdivsi3_dontadd), tr2
1135 1.1 mrg pt/l LOCAL(sdivsi3_loop), tr1
1136 1.1 mrg ptabs/l r18, tr0
1137 1.1 mrg movi 0, r0
1138 1.1 mrg movi 1, r1
1139 1.1 mrg shari.l r4, 31, r2
1140 1.1 mrg shari.l r5, 31, r3
1141 1.1 mrg cmveq r2, r1, r2
1142 1.1 mrg cmveq r3, r1, r3
1143 1.1 mrg muls.l r4, r2, r18
1144 1.1 mrg muls.l r5, r3, r19
1145 1.1 mrg muls.l r2, r3, r2
1146 1.1 mrg shlli r19, 31, r19
1147 1.1 mrg shlli r1, 31, r1
1148 1.1 mrg LOCAL(sdivsi3_loop):
1149 1.1 mrg bgtu r19, r18, tr2
1150 1.1 mrg or r0, r1, r0
1151 1.1 mrg sub r18, r19, r18
1152 1.1 mrg LOCAL(sdivsi3_dontadd):
1153 1.1 mrg shlri r1, 1, r1
1154 1.1 mrg shlri r19, 1, r19
1155 1.1 mrg bnei r1, 0, tr1
1156 1.1 mrg muls.l r0, r2, r0
1157 1.1 mrg add.l r0, r63, r0
1158 1.1 mrg blink tr0, r63
1159 1.1 mrg #elif 0 /* ! 0 */
1160 1.1 mrg // inputs: r4,r5
1161 1.1 mrg // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1162 1.1 mrg // result in r0
1163 1.1 mrg GLOBAL(sdivsi3):
1164 1.1 mrg // can create absolute value without extra latency,
1165 1.1 mrg // but dependent on proper sign extension of inputs:
1166 1.1 mrg // shari.l r5,31,r2
1167 1.1 mrg // xor r5,r2,r20
1168 1.1 mrg // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1169 1.1 mrg shari.l r5,31,r2
1170 1.1 mrg ori r2,1,r2
1171 1.1 mrg muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1172 1.1 mrg movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1173 1.1 mrg shari.l r4,31,r3
1174 1.1 mrg nsb r20,r0
1175 1.1 mrg shlld r20,r0,r25
1176 1.1 mrg shlri r25,48,r25
1177 1.1 mrg sub r19,r25,r1
1178 1.1 mrg mmulfx.w r1,r1,r2
1179 1.1 mrg mshflo.w r1,r63,r1
1180 1.1 mrg // If r4 was to be used in-place instead of r21, could use this sequence
1181 1.1 mrg // to compute absolute:
1182 1.1 mrg // sub r63,r4,r19 // compute absolute value of r4
1183 1.1 mrg // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1184 1.1 mrg // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1185 1.1 mrg ori r3,1,r3
1186 1.1 mrg mmulfx.w r25,r2,r2
1187 1.1 mrg sub r19,r0,r0
1188 1.1 mrg muls.l r4,r3,r21
1189 1.1 mrg msub.w r1,r2,r2
1190 1.1 mrg addi r2,-2,r1
1191 1.1 mrg mulu.l r21,r1,r19
1192 1.1 mrg mmulfx.w r2,r2,r2
1193 1.1 mrg shlli r1,15,r1
1194 1.1 mrg shlrd r19,r0,r19
1195 1.1 mrg mulu.l r19,r20,r3
1196 1.1 mrg mmacnfx.wl r25,r2,r1
1197 1.1 mrg ptabs r18,tr0
1198 1.1 mrg sub r21,r3,r25
1199 1.1 mrg
1200 1.1 mrg mulu.l r25,r1,r2
1201 1.1 mrg addi r0,14,r0
1202 1.1 mrg xor r4,r5,r18
1203 1.1 mrg shlrd r2,r0,r2
1204 1.1 mrg mulu.l r2,r20,r3
1205 1.1 mrg add r19,r2,r19
1206 1.1 mrg shari.l r18,31,r18
1207 1.1 mrg sub r25,r3,r25
1208 1.1 mrg
1209 1.1 mrg mulu.l r25,r1,r2
1210 1.1 mrg sub r25,r20,r25
1211 1.1 mrg add r19,r18,r19
1212 1.1 mrg shlrd r2,r0,r2
1213 1.1 mrg mulu.l r2,r20,r3
1214 1.1 mrg addi r25,1,r25
1215 1.1 mrg add r19,r2,r19
1216 1.1 mrg
1217 1.1 mrg cmpgt r25,r3,r25
1218 1.1 mrg add.l r19,r25,r0
1219 1.1 mrg xor r0,r18,r0
1220 1.1 mrg blink tr0,r63
1221 1.1 mrg #else /* ! 0 && ! 0 */
1222 1.1 mrg
1223 1.1 mrg // inputs: r4,r5
1224 1.1 mrg // clobbered: r1,r18,r19,r20,r21,r25,tr0
1225 1.1 mrg // result in r0
1226 1.1 mrg HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1227 1.1 mrg #ifndef __pic__
1228 1.1 mrg FUNC(GLOBAL(sdivsi3))
1229 1.1 mrg GLOBAL(sdivsi3): /* this is the shcompact entry point */
1230 1.1 mrg // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1231 1.1 mrg // with the SHcompact implementation, which clobbers tr1 / tr2.
1232 1.1 mrg .global GLOBAL(sdivsi3_1)
1233 1.1 mrg GLOBAL(sdivsi3_1):
1234 1.1 mrg .global GLOBAL(div_table_internal)
1235 1.1 mrg movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1236 1.1 mrg shori GLOBAL(div_table_internal) & 65535, r20
1237 1.1 mrg #endif
1238 1.1 mrg .global GLOBAL(sdivsi3_2)
1239 1.1 mrg // div_table in r20
1240 1.1 mrg // clobbered: r1,r18,r19,r21,r25,tr0
1241 1.1 mrg GLOBAL(sdivsi3_2):
1242 1.1 mrg nsb r5, r1
1243 1.1 mrg shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1244 1.1 mrg shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1245 1.1 mrg ldx.ub r20, r21, r19 // u0.8
1246 1.1 mrg shari r25, 32, r25 // normalize to s2.30
1247 1.1 mrg shlli r21, 1, r21
1248 1.1 mrg muls.l r25, r19, r19 // s2.38
1249 1.1 mrg ldx.w r20, r21, r21 // s2.14
1250 1.1 mrg ptabs r18, tr0
1251 1.1 mrg shari r19, 24, r19 // truncate to s2.14
1252 1.1 mrg sub r21, r19, r19 // some 11 bit inverse in s1.14
1253 1.1 mrg muls.l r19, r19, r21 // u0.28
1254 1.1 mrg sub r63, r1, r1
1255 1.1 mrg addi r1, 92, r1
1256 1.1 mrg muls.l r25, r21, r18 // s2.58
1257 1.1 mrg shlli r19, 45, r19 // multiply by two and convert to s2.58
1258 1.1 mrg /* bubble */
1259 1.1 mrg sub r19, r18, r18
1260 1.1 mrg shari r18, 28, r18 // some 22 bit inverse in s1.30
1261 1.1 mrg muls.l r18, r25, r0 // s2.60
1262 1.1 mrg muls.l r18, r4, r25 // s32.30
1263 1.1 mrg /* bubble */
1264 1.1 mrg shari r0, 16, r19 // s-16.44
1265 1.1 mrg muls.l r19, r18, r19 // s-16.74
1266 1.1 mrg shari r25, 63, r0
1267 1.1 mrg shari r4, 14, r18 // s19.-14
1268 1.1 mrg shari r19, 30, r19 // s-16.44
1269 1.1 mrg muls.l r19, r18, r19 // s15.30
1270 1.1 mrg xor r21, r0, r21 // You could also use the constant 1 << 27.
1271 1.1 mrg add r21, r25, r21
1272 1.1 mrg sub r21, r19, r21
1273 1.1 mrg shard r21, r1, r21
1274 1.1 mrg sub r21, r0, r0
1275 1.1 mrg blink tr0, r63
1276 1.1 mrg #ifndef __pic__
1277 1.1 mrg ENDFUNC(GLOBAL(sdivsi3))
1278 1.1 mrg #endif
1279 1.1 mrg ENDFUNC(GLOBAL(sdivsi3_2))
1280 1.1 mrg #endif
1281 1.1 mrg #elif __SHMEDIA__
1282 1.1 mrg /* m5compact-nofpu */
1283 1.1 mrg // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1284 1.1 mrg .mode SHmedia
1285 1.1 mrg .section .text..SHmedia32,"ax"
1286 1.1 mrg .align 2
1287 1.1 mrg FUNC(GLOBAL(sdivsi3))
1288 1.1 mrg GLOBAL(sdivsi3):
1289 1.1 mrg pt/l LOCAL(sdivsi3_dontsub), tr0
1290 1.1 mrg pt/l LOCAL(sdivsi3_loop), tr1
1291 1.1 mrg ptabs/l r18,tr2
1292 1.1 mrg shari.l r4,31,r18
1293 1.1 mrg shari.l r5,31,r19
1294 1.1 mrg xor r4,r18,r20
1295 1.1 mrg xor r5,r19,r21
1296 1.1 mrg sub.l r20,r18,r20
1297 1.1 mrg sub.l r21,r19,r21
1298 1.1 mrg xor r18,r19,r19
1299 1.1 mrg shlli r21,32,r25
1300 1.1 mrg addi r25,-1,r21
1301 1.1 mrg addz.l r20,r63,r20
1302 1.1 mrg LOCAL(sdivsi3_loop):
1303 1.1 mrg shlli r20,1,r20
1304 1.1 mrg bgeu/u r21,r20,tr0
1305 1.1 mrg sub r20,r21,r20
1306 1.1 mrg LOCAL(sdivsi3_dontsub):
1307 1.1 mrg addi.l r25,-1,r25
1308 1.1 mrg bnei r25,-32,tr1
1309 1.1 mrg xor r20,r19,r20
1310 1.1 mrg sub.l r20,r19,r0
1311 1.1 mrg blink tr2,r63
1312 1.1 mrg ENDFUNC(GLOBAL(sdivsi3))
1313 1.1 mrg #else /* ! __SHMEDIA__ */
1314 1.1 mrg FUNC(GLOBAL(sdivsi3))
1315 1.1 mrg GLOBAL(sdivsi3):
1316 1.1 mrg mov r4,r1
1317 1.1 mrg mov r5,r0
1318 1.1 mrg
1319 1.1 mrg tst r0,r0
1320 1.1 mrg bt div0
1321 1.1 mrg mov #0,r2
1322 1.1 mrg div0s r2,r1
1323 1.1 mrg subc r3,r3
1324 1.1 mrg subc r2,r1
1325 1.1 mrg div0s r0,r3
1326 1.1 mrg rotcl r1
1327 1.1 mrg div1 r0,r3
1328 1.1 mrg rotcl r1
1329 1.1 mrg div1 r0,r3
1330 1.1 mrg rotcl r1
1331 1.1 mrg div1 r0,r3
1332 1.1 mrg rotcl r1
1333 1.1 mrg div1 r0,r3
1334 1.1 mrg rotcl r1
1335 1.1 mrg div1 r0,r3
1336 1.1 mrg rotcl r1
1337 1.1 mrg div1 r0,r3
1338 1.1 mrg rotcl r1
1339 1.1 mrg div1 r0,r3
1340 1.1 mrg rotcl r1
1341 1.1 mrg div1 r0,r3
1342 1.1 mrg rotcl r1
1343 1.1 mrg div1 r0,r3
1344 1.1 mrg rotcl r1
1345 1.1 mrg div1 r0,r3
1346 1.1 mrg rotcl r1
1347 1.1 mrg div1 r0,r3
1348 1.1 mrg rotcl r1
1349 1.1 mrg div1 r0,r3
1350 1.1 mrg rotcl r1
1351 1.1 mrg div1 r0,r3
1352 1.1 mrg rotcl r1
1353 1.1 mrg div1 r0,r3
1354 1.1 mrg rotcl r1
1355 1.1 mrg div1 r0,r3
1356 1.1 mrg rotcl r1
1357 1.1 mrg div1 r0,r3
1358 1.1 mrg rotcl r1
1359 1.1 mrg div1 r0,r3
1360 1.1 mrg rotcl r1
1361 1.1 mrg div1 r0,r3
1362 1.1 mrg rotcl r1
1363 1.1 mrg div1 r0,r3
1364 1.1 mrg rotcl r1
1365 1.1 mrg div1 r0,r3
1366 1.1 mrg rotcl r1
1367 1.1 mrg div1 r0,r3
1368 1.1 mrg rotcl r1
1369 1.1 mrg div1 r0,r3
1370 1.1 mrg rotcl r1
1371 1.1 mrg div1 r0,r3
1372 1.1 mrg rotcl r1
1373 1.1 mrg div1 r0,r3
1374 1.1 mrg rotcl r1
1375 1.1 mrg div1 r0,r3
1376 1.1 mrg rotcl r1
1377 1.1 mrg div1 r0,r3
1378 1.1 mrg rotcl r1
1379 1.1 mrg div1 r0,r3
1380 1.1 mrg rotcl r1
1381 1.1 mrg div1 r0,r3
1382 1.1 mrg rotcl r1
1383 1.1 mrg div1 r0,r3
1384 1.1 mrg rotcl r1
1385 1.1 mrg div1 r0,r3
1386 1.1 mrg rotcl r1
1387 1.1 mrg div1 r0,r3
1388 1.1 mrg rotcl r1
1389 1.1 mrg div1 r0,r3
1390 1.1 mrg rotcl r1
1391 1.1 mrg addc r2,r1
1392 1.1 mrg rts
1393 1.1 mrg mov r1,r0
1394 1.1 mrg
1395 1.1 mrg
1396 1.1 mrg div0: rts
1397 1.1 mrg mov #0,r0
1398 1.1 mrg
1399 1.1 mrg ENDFUNC(GLOBAL(sdivsi3))
1400 1.3 mrg #endif /* ! __SHMEDIA__ */
1401 1.3 mrg #endif /* L_sdivsi3 */
1402 1.3 mrg
1403 1.3 mrg /*------------------------------------------------------------------------------
1404 1.3 mrg 32 bit unsigned integer division that uses FPU double precision division. */
1405 1.3 mrg
1406 1.1 mrg #ifdef L_udivsi3_i4
1407 1.3 mrg .title "SH DIVIDE"
1408 1.1 mrg
1409 1.1 mrg #if defined (__SH4__) || defined (__SH2A__)
1410 1.3 mrg /* This variant is used when FPSCR.PR = 1 (double precision) is the default
1411 1.3 mrg setting.
1412 1.3 mrg Args in r4 and r5, result in fpul,
1413 1.3 mrg clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */
1414 1.1 mrg
1415 1.1 mrg .global GLOBAL(udivsi3_i4)
1416 1.1 mrg HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1417 1.1 mrg GLOBAL(udivsi3_i4):
1418 1.3 mrg mov #1,r1
1419 1.3 mrg cmp/hi r1,r5
1420 1.3 mrg bf/s trivial
1421 1.3 mrg rotr r1
1422 1.3 mrg xor r1,r4
1423 1.3 mrg lds r4,fpul
1424 1.3 mrg mova L1,r0
1425 1.1 mrg #ifdef FMOVD_WORKS
1426 1.3 mrg fmov.d @r0+,dr4
1427 1.1 mrg #else
1428 1.3 mrg fmov.s @r0+,DR40
1429 1.3 mrg fmov.s @r0,DR41
1430 1.1 mrg #endif
1431 1.3 mrg float fpul,dr0
1432 1.3 mrg xor r1,r5
1433 1.3 mrg lds r5,fpul
1434 1.3 mrg float fpul,dr2
1435 1.3 mrg fadd dr4,dr0
1436 1.3 mrg fadd dr4,dr2
1437 1.3 mrg fdiv dr2,dr0
1438 1.1 mrg rts
1439 1.3 mrg ftrc dr0,fpul
1440 1.1 mrg
1441 1.1 mrg trivial:
1442 1.1 mrg rts
1443 1.3 mrg lds r4,fpul
1444 1.1 mrg
1445 1.1 mrg .align 2
1446 1.1 mrg #ifdef FMOVD_WORKS
1447 1.3 mrg .align 3 // Make the double below 8 byte aligned.
1448 1.1 mrg #endif
1449 1.1 mrg L1:
1450 1.1 mrg .double 2147483648
1451 1.1 mrg
1452 1.1 mrg ENDFUNC(GLOBAL(udivsi3_i4))
1453 1.3 mrg
1454 1.1 mrg #elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__)
1455 1.1 mrg #if ! __SH5__ || __SH5__ == 32
1456 1.1 mrg !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1457 1.1 mrg .mode SHmedia
1458 1.1 mrg .global GLOBAL(udivsi3_i4)
1459 1.1 mrg HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1460 1.1 mrg GLOBAL(udivsi3_i4):
1461 1.1 mrg addz.l r4,r63,r20
1462 1.1 mrg addz.l r5,r63,r21
1463 1.1 mrg fmov.qd r20,dr0
1464 1.1 mrg fmov.qd r21,dr32
1465 1.1 mrg ptabs r18,tr0
1466 1.1 mrg float.qd dr0,dr0
1467 1.1 mrg float.qd dr32,dr32
1468 1.1 mrg fdiv.d dr0,dr32,dr0
1469 1.1 mrg ftrc.dq dr0,dr32
1470 1.1 mrg fmov.s fr33,fr32
1471 1.1 mrg blink tr0,r63
1472 1.1 mrg
1473 1.1 mrg ENDFUNC(GLOBAL(udivsi3_i4))
1474 1.1 mrg #endif /* ! __SH5__ || __SH5__ == 32 */
1475 1.3 mrg
1476 1.1 mrg #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1477 1.3 mrg /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1478 1.3 mrg setting.
1479 1.3 mrg Args in r4 and r5, result in fpul,
1480 1.3 mrg clobber r0, r1, r4, r5, dr0, dr2, dr4.
1481 1.3 mrg For this to work, we must temporarily switch the FPU do double precision,
1482 1.3 mrg but we better do not touch FPSCR.FR. See PR 6526. */
1483 1.1 mrg
1484 1.1 mrg .global GLOBAL(udivsi3_i4)
1485 1.1 mrg HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1486 1.1 mrg GLOBAL(udivsi3_i4):
1487 1.3 mrg
1488 1.3 mrg #ifndef __SH4A__
1489 1.3 mrg mov #1,r1
1490 1.3 mrg cmp/hi r1,r5
1491 1.3 mrg bf/s trivial
1492 1.3 mrg rotr r1 // r1 = 1 << 31
1493 1.3 mrg sts.l fpscr,@-r15
1494 1.3 mrg xor r1,r4
1495 1.3 mrg mov.l @(0,r15),r0
1496 1.3 mrg xor r1,r5
1497 1.3 mrg mov.l L2,r1
1498 1.3 mrg lds r4,fpul
1499 1.3 mrg or r0,r1
1500 1.3 mrg mova L1,r0
1501 1.3 mrg lds r1,fpscr
1502 1.1 mrg #ifdef FMOVD_WORKS
1503 1.3 mrg fmov.d @r0+,dr4
1504 1.1 mrg #else
1505 1.3 mrg fmov.s @r0+,DR40
1506 1.3 mrg fmov.s @r0,DR41
1507 1.1 mrg #endif
1508 1.3 mrg float fpul,dr0
1509 1.3 mrg lds r5,fpul
1510 1.3 mrg float fpul,dr2
1511 1.3 mrg fadd dr4,dr0
1512 1.3 mrg fadd dr4,dr2
1513 1.3 mrg fdiv dr2,dr0
1514 1.3 mrg ftrc dr0,fpul
1515 1.1 mrg rts
1516 1.3 mrg lds.l @r15+,fpscr
1517 1.1 mrg
1518 1.1 mrg #ifdef FMOVD_WORKS
1519 1.3 mrg .align 3 // Make the double below 8 byte aligned.
1520 1.1 mrg #endif
1521 1.1 mrg trivial:
1522 1.1 mrg rts
1523 1.3 mrg lds r4,fpul
1524 1.1 mrg
1525 1.1 mrg .align 2
1526 1.3 mrg L2:
1527 1.3 mrg #ifdef FMOVD_WORKS
1528 1.3 mrg .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1
1529 1.3 mrg #else
1530 1.3 mrg .long 0x80000 // FPSCR.PR = 1
1531 1.3 mrg #endif
1532 1.1 mrg L1:
1533 1.3 mrg .double 2147483648
1534 1.3 mrg
1535 1.1 mrg #else
1536 1.3 mrg /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.
1537 1.3 mrg Although on SH4A fmovd usually works, it would require either additional
1538 1.3 mrg two fschg instructions or an FPSCR push + pop. It's not worth the effort
1539 1.3 mrg for loading only one double constant. */
1540 1.3 mrg mov #1,r1
1541 1.3 mrg cmp/hi r1,r5
1542 1.3 mrg bf/s trivial
1543 1.3 mrg rotr r1 // r1 = 1 << 31
1544 1.3 mrg fpchg
1545 1.3 mrg mova L1,r0
1546 1.3 mrg xor r1,r4
1547 1.3 mrg fmov.s @r0+,DR40
1548 1.3 mrg lds r4,fpul
1549 1.3 mrg fmov.s @r0,DR41
1550 1.3 mrg xor r1,r5
1551 1.3 mrg float fpul,dr0
1552 1.3 mrg lds r5,fpul
1553 1.3 mrg float fpul,dr2
1554 1.3 mrg fadd dr4,dr0
1555 1.3 mrg fadd dr4,dr2
1556 1.3 mrg fdiv dr2,dr0
1557 1.3 mrg ftrc dr0,fpul
1558 1.3 mrg rts
1559 1.3 mrg fpchg
1560 1.3 mrg
1561 1.3 mrg trivial:
1562 1.3 mrg rts
1563 1.3 mrg lds r4,fpul
1564 1.3 mrg
1565 1.3 mrg .align 2
1566 1.3 mrg L1:
1567 1.1 mrg .double 2147483648
1568 1.1 mrg
1569 1.3 mrg #endif /* __SH4A__ */
1570 1.3 mrg
1571 1.3 mrg
1572 1.1 mrg ENDFUNC(GLOBAL(udivsi3_i4))
1573 1.1 mrg #endif /* ! __SH4__ */
1574 1.3 mrg #endif /* L_udivsi3_i4 */
1575 1.1 mrg
1576 1.1 mrg #ifdef L_udivsi3
1577 1.1 mrg /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1578 1.1 mrg sh2e/sh3e code. */
1579 1.1 mrg
1580 1.1 mrg !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1581 1.1 mrg .global GLOBAL(udivsi3)
1582 1.1 mrg HIDDEN_FUNC(GLOBAL(udivsi3))
1583 1.1 mrg
1584 1.1 mrg #if __SHMEDIA__
1585 1.1 mrg #if __SH5__ == 32
1586 1.1 mrg .section .text..SHmedia32,"ax"
1587 1.1 mrg #else
1588 1.1 mrg .text
1589 1.1 mrg #endif
1590 1.1 mrg .align 2
1591 1.1 mrg #if 0
1592 1.1 mrg /* The assembly code that follows is a hand-optimized version of the C
1593 1.1 mrg code that follows. Note that the registers that are modified are
1594 1.1 mrg exactly those listed as clobbered in the patterns udivsi3_i1 and
1595 1.1 mrg udivsi3_i1_media.
1596 1.1 mrg
1597 1.1 mrg unsigned
1598 1.1 mrg __udivsi3 (i, j)
1599 1.1 mrg unsigned i, j;
1600 1.1 mrg {
1601 1.1 mrg register unsigned long long r0 asm ("r0") = 0;
1602 1.1 mrg register unsigned long long r18 asm ("r18") = 1;
1603 1.1 mrg register unsigned long long r4 asm ("r4") = i;
1604 1.1 mrg register unsigned long long r19 asm ("r19") = j;
1605 1.1 mrg
1606 1.1 mrg r19 <<= 31;
1607 1.1 mrg r18 <<= 31;
1608 1.1 mrg do
1609 1.1 mrg if (r4 >= r19)
1610 1.1 mrg r0 |= r18, r4 -= r19;
1611 1.1 mrg while (r19 >>= 1, r18 >>= 1);
1612 1.1 mrg
1613 1.1 mrg return r0;
1614 1.1 mrg }
1615 1.1 mrg */
1616 1.1 mrg GLOBAL(udivsi3):
1617 1.1 mrg pt/l LOCAL(udivsi3_dontadd), tr2
1618 1.1 mrg pt/l LOCAL(udivsi3_loop), tr1
1619 1.1 mrg ptabs/l r18, tr0
1620 1.1 mrg movi 0, r0
1621 1.1 mrg movi 1, r18
1622 1.1 mrg addz.l r5, r63, r19
1623 1.1 mrg addz.l r4, r63, r4
1624 1.1 mrg shlli r19, 31, r19
1625 1.1 mrg shlli r18, 31, r18
1626 1.1 mrg LOCAL(udivsi3_loop):
1627 1.1 mrg bgtu r19, r4, tr2
1628 1.1 mrg or r0, r18, r0
1629 1.1 mrg sub r4, r19, r4
1630 1.1 mrg LOCAL(udivsi3_dontadd):
1631 1.1 mrg shlri r18, 1, r18
1632 1.1 mrg shlri r19, 1, r19
1633 1.1 mrg bnei r18, 0, tr1
1634 1.1 mrg blink tr0, r63
1635 1.1 mrg #else
1636 1.1 mrg GLOBAL(udivsi3):
1637 1.1 mrg // inputs: r4,r5
1638 1.1 mrg // clobbered: r18,r19,r20,r21,r22,r25,tr0
1639 1.1 mrg // result in r0.
1640 1.1 mrg addz.l r5,r63,r22
1641 1.1 mrg nsb r22,r0
1642 1.1 mrg shlld r22,r0,r25
1643 1.1 mrg shlri r25,48,r25
1644 1.1 mrg movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1645 1.1 mrg sub r20,r25,r21
1646 1.1 mrg mmulfx.w r21,r21,r19
1647 1.1 mrg mshflo.w r21,r63,r21
1648 1.1 mrg ptabs r18,tr0
1649 1.1 mrg mmulfx.w r25,r19,r19
1650 1.1 mrg sub r20,r0,r0
1651 1.1 mrg /* bubble */
1652 1.1 mrg msub.w r21,r19,r19
1653 1.1 mrg addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1654 1.1 mrg before the msub.w, but we need a different value for
1655 1.1 mrg r19 to keep errors under control. */
1656 1.1 mrg mulu.l r4,r21,r18
1657 1.1 mrg mmulfx.w r19,r19,r19
1658 1.1 mrg shlli r21,15,r21
1659 1.1 mrg shlrd r18,r0,r18
1660 1.1 mrg mulu.l r18,r22,r20
1661 1.1 mrg mmacnfx.wl r25,r19,r21
1662 1.1 mrg /* bubble */
1663 1.1 mrg sub r4,r20,r25
1664 1.1 mrg
1665 1.1 mrg mulu.l r25,r21,r19
1666 1.1 mrg addi r0,14,r0
1667 1.1 mrg /* bubble */
1668 1.1 mrg shlrd r19,r0,r19
1669 1.1 mrg mulu.l r19,r22,r20
1670 1.1 mrg add r18,r19,r18
1671 1.1 mrg /* bubble */
1672 1.1 mrg sub.l r25,r20,r25
1673 1.1 mrg
1674 1.1 mrg mulu.l r25,r21,r19
1675 1.1 mrg addz.l r25,r63,r25
1676 1.1 mrg sub r25,r22,r25
1677 1.1 mrg shlrd r19,r0,r19
1678 1.1 mrg mulu.l r19,r22,r20
1679 1.1 mrg addi r25,1,r25
1680 1.1 mrg add r18,r19,r18
1681 1.1 mrg
1682 1.1 mrg cmpgt r25,r20,r25
1683 1.1 mrg add.l r18,r25,r0
1684 1.1 mrg blink tr0,r63
1685 1.1 mrg #endif
1686 1.1 mrg #elif __SHMEDIA__
1687 1.1 mrg /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1688 1.1 mrg ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1689 1.1 mrg So use a short shmedia loop. */
1690 1.1 mrg // clobbered: r20,r21,r25,tr0,tr1,tr2
1691 1.1 mrg .mode SHmedia
1692 1.1 mrg .section .text..SHmedia32,"ax"
1693 1.1 mrg .align 2
1694 1.1 mrg GLOBAL(udivsi3):
1695 1.1 mrg pt/l LOCAL(udivsi3_dontsub), tr0
1696 1.1 mrg pt/l LOCAL(udivsi3_loop), tr1
1697 1.1 mrg ptabs/l r18,tr2
1698 1.1 mrg shlli r5,32,r25
1699 1.1 mrg addi r25,-1,r21
1700 1.1 mrg addz.l r4,r63,r20
1701 1.1 mrg LOCAL(udivsi3_loop):
1702 1.1 mrg shlli r20,1,r20
1703 1.1 mrg bgeu/u r21,r20,tr0
1704 1.1 mrg sub r20,r21,r20
1705 1.1 mrg LOCAL(udivsi3_dontsub):
1706 1.1 mrg addi.l r25,-1,r25
1707 1.1 mrg bnei r25,-32,tr1
1708 1.1 mrg add.l r20,r63,r0
1709 1.1 mrg blink tr2,r63
1710 1.1 mrg #else /* ! __SHMEDIA__ */
1711 1.1 mrg LOCAL(div8):
1712 1.1 mrg div1 r5,r4
1713 1.1 mrg LOCAL(div7):
1714 1.1 mrg div1 r5,r4; div1 r5,r4; div1 r5,r4
1715 1.1 mrg div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1716 1.1 mrg
1717 1.1 mrg LOCAL(divx4):
1718 1.1 mrg div1 r5,r4; rotcl r0
1719 1.1 mrg div1 r5,r4; rotcl r0
1720 1.1 mrg div1 r5,r4; rotcl r0
1721 1.1 mrg rts; div1 r5,r4
1722 1.1 mrg
1723 1.1 mrg GLOBAL(udivsi3):
1724 1.1 mrg sts.l pr,@-r15
1725 1.1 mrg extu.w r5,r0
1726 1.1 mrg cmp/eq r5,r0
1727 1.1 mrg #ifdef __sh1__
1728 1.1 mrg bf LOCAL(large_divisor)
1729 1.1 mrg #else
1730 1.1 mrg bf/s LOCAL(large_divisor)
1731 1.1 mrg #endif
1732 1.1 mrg div0u
1733 1.1 mrg swap.w r4,r0
1734 1.1 mrg shlr16 r4
1735 1.1 mrg bsr LOCAL(div8)
1736 1.1 mrg shll16 r5
1737 1.1 mrg bsr LOCAL(div7)
1738 1.1 mrg div1 r5,r4
1739 1.1 mrg xtrct r4,r0
1740 1.1 mrg xtrct r0,r4
1741 1.1 mrg bsr LOCAL(div8)
1742 1.1 mrg swap.w r4,r4
1743 1.1 mrg bsr LOCAL(div7)
1744 1.1 mrg div1 r5,r4
1745 1.1 mrg lds.l @r15+,pr
1746 1.1 mrg xtrct r4,r0
1747 1.1 mrg swap.w r0,r0
1748 1.1 mrg rotcl r0
1749 1.1 mrg rts
1750 1.1 mrg shlr16 r5
1751 1.1 mrg
1752 1.1 mrg LOCAL(large_divisor):
1753 1.1 mrg #ifdef __sh1__
1754 1.1 mrg div0u
1755 1.1 mrg #endif
1756 1.1 mrg mov #0,r0
1757 1.1 mrg xtrct r4,r0
1758 1.1 mrg xtrct r0,r4
1759 1.1 mrg bsr LOCAL(divx4)
1760 1.1 mrg rotcl r0
1761 1.1 mrg bsr LOCAL(divx4)
1762 1.1 mrg rotcl r0
1763 1.1 mrg bsr LOCAL(divx4)
1764 1.1 mrg rotcl r0
1765 1.1 mrg bsr LOCAL(divx4)
1766 1.1 mrg rotcl r0
1767 1.1 mrg lds.l @r15+,pr
1768 1.1 mrg rts
1769 1.1 mrg rotcl r0
1770 1.1 mrg
1771 1.1 mrg ENDFUNC(GLOBAL(udivsi3))
1772 1.1 mrg #endif /* ! __SHMEDIA__ */
1773 1.1 mrg #endif /* L_udivsi3 */
1774 1.1 mrg
1775 1.1 mrg #ifdef L_udivdi3
1776 1.1 mrg #if __SHMEDIA__
1777 1.1 mrg .mode SHmedia
1778 1.1 mrg .section .text..SHmedia32,"ax"
1779 1.1 mrg .align 2
1780 1.1 mrg .global GLOBAL(udivdi3)
1781 1.1 mrg FUNC(GLOBAL(udivdi3))
1782 1.1 mrg GLOBAL(udivdi3):
1783 1.1 mrg HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1784 1.1 mrg shlri r3,1,r4
1785 1.1 mrg nsb r4,r22
1786 1.1 mrg shlld r3,r22,r6
1787 1.1 mrg shlri r6,49,r5
1788 1.1 mrg movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1789 1.1 mrg sub r21,r5,r1
1790 1.1 mrg mmulfx.w r1,r1,r4
1791 1.1 mrg mshflo.w r1,r63,r1
1792 1.1 mrg sub r63,r22,r20 // r63 == 64 % 64
1793 1.1 mrg mmulfx.w r5,r4,r4
1794 1.1 mrg pta LOCAL(large_divisor),tr0
1795 1.1 mrg addi r20,32,r9
1796 1.1 mrg msub.w r1,r4,r1
1797 1.1 mrg madd.w r1,r1,r1
1798 1.1 mrg mmulfx.w r1,r1,r4
1799 1.1 mrg shlri r6,32,r7
1800 1.1 mrg bgt/u r9,r63,tr0 // large_divisor
1801 1.1 mrg mmulfx.w r5,r4,r4
1802 1.1 mrg shlri r2,32+14,r19
1803 1.1 mrg addi r22,-31,r0
1804 1.1 mrg msub.w r1,r4,r1
1805 1.1 mrg
1806 1.1 mrg mulu.l r1,r7,r4
1807 1.1 mrg addi r1,-3,r5
1808 1.1 mrg mulu.l r5,r19,r5
1809 1.1 mrg sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1810 1.1 mrg shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1811 1.1 mrg the case may be, %0000000000000000 000.11111111111, still */
1812 1.1 mrg muls.l r1,r4,r4 /* leaving at least one sign bit. */
1813 1.1 mrg mulu.l r5,r3,r8
1814 1.1 mrg mshalds.l r1,r21,r1
1815 1.1 mrg shari r4,26,r4
1816 1.1 mrg shlld r8,r0,r8
1817 1.1 mrg add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1818 1.1 mrg sub r2,r8,r2
1819 1.1 mrg /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1820 1.1 mrg
1821 1.1 mrg shlri r2,22,r21
1822 1.1 mrg mulu.l r21,r1,r21
1823 1.1 mrg shlld r5,r0,r8
1824 1.1 mrg addi r20,30-22,r0
1825 1.1 mrg shlrd r21,r0,r21
1826 1.1 mrg mulu.l r21,r3,r5
1827 1.1 mrg add r8,r21,r8
1828 1.1 mrg mcmpgt.l r21,r63,r21 // See Note 1
1829 1.1 mrg addi r20,30,r0
1830 1.1 mrg mshfhi.l r63,r21,r21
1831 1.1 mrg sub r2,r5,r2
1832 1.1 mrg andc r2,r21,r2
1833 1.1 mrg
1834 1.1 mrg /* small divisor: need a third divide step */
1835 1.1 mrg mulu.l r2,r1,r7
1836 1.1 mrg ptabs r18,tr0
1837 1.1 mrg addi r2,1,r2
1838 1.1 mrg shlrd r7,r0,r7
1839 1.1 mrg mulu.l r7,r3,r5
1840 1.1 mrg add r8,r7,r8
1841 1.1 mrg sub r2,r3,r2
1842 1.1 mrg cmpgt r2,r5,r5
1843 1.1 mrg add r8,r5,r2
1844 1.1 mrg /* could test r3 here to check for divide by zero. */
1845 1.1 mrg blink tr0,r63
1846 1.1 mrg
1847 1.1 mrg LOCAL(large_divisor):
1848 1.1 mrg mmulfx.w r5,r4,r4
1849 1.1 mrg shlrd r2,r9,r25
1850 1.1 mrg shlri r25,32,r8
1851 1.1 mrg msub.w r1,r4,r1
1852 1.1 mrg
1853 1.1 mrg mulu.l r1,r7,r4
1854 1.1 mrg addi r1,-3,r5
1855 1.1 mrg mulu.l r5,r8,r5
1856 1.1 mrg sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1857 1.1 mrg shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1858 1.1 mrg the case may be, %0000000000000000 000.11111111111, still */
1859 1.1 mrg muls.l r1,r4,r4 /* leaving at least one sign bit. */
1860 1.1 mrg shlri r5,14-1,r8
1861 1.1 mrg mulu.l r8,r7,r5
1862 1.1 mrg mshalds.l r1,r21,r1
1863 1.1 mrg shari r4,26,r4
1864 1.1 mrg add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1865 1.1 mrg sub r25,r5,r25
1866 1.1 mrg /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1867 1.1 mrg
1868 1.1 mrg shlri r25,22,r21
1869 1.1 mrg mulu.l r21,r1,r21
1870 1.1 mrg pta LOCAL(no_lo_adj),tr0
1871 1.1 mrg addi r22,32,r0
1872 1.1 mrg shlri r21,40,r21
1873 1.1 mrg mulu.l r21,r7,r5
1874 1.1 mrg add r8,r21,r8
1875 1.1 mrg shlld r2,r0,r2
1876 1.1 mrg sub r25,r5,r25
1877 1.1 mrg bgtu/u r7,r25,tr0 // no_lo_adj
1878 1.1 mrg addi r8,1,r8
1879 1.1 mrg sub r25,r7,r25
1880 1.1 mrg LOCAL(no_lo_adj):
1881 1.1 mrg mextr4 r2,r25,r2
1882 1.1 mrg
1883 1.1 mrg /* large_divisor: only needs a few adjustments. */
1884 1.1 mrg mulu.l r8,r6,r5
1885 1.1 mrg ptabs r18,tr0
1886 1.1 mrg /* bubble */
1887 1.1 mrg cmpgtu r5,r2,r5
1888 1.1 mrg sub r8,r5,r2
1889 1.1 mrg blink tr0,r63
1890 1.1 mrg ENDFUNC(GLOBAL(udivdi3))
1891 1.1 mrg /* Note 1: To shift the result of the second divide stage so that the result
1892 1.1 mrg always fits into 32 bits, yet we still reduce the rest sufficiently
1893 1.1 mrg would require a lot of instructions to do the shifts just right. Using
1894 1.1 mrg the full 64 bit shift result to multiply with the divisor would require
1895 1.1 mrg four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1896 1.1 mrg Fortunately, if the upper 32 bits of the shift result are nonzero, we
1897 1.1 mrg know that the rest after taking this partial result into account will
1898 1.1 mrg fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1899 1.1 mrg upper 32 bits of the partial result are nonzero. */
1900 1.1 mrg #endif /* __SHMEDIA__ */
1901 1.1 mrg #endif /* L_udivdi3 */
1902 1.1 mrg
1903 1.1 mrg #ifdef L_divdi3
1904 1.1 mrg #if __SHMEDIA__
1905 1.1 mrg .mode SHmedia
1906 1.1 mrg .section .text..SHmedia32,"ax"
1907 1.1 mrg .align 2
1908 1.1 mrg .global GLOBAL(divdi3)
1909 1.1 mrg FUNC(GLOBAL(divdi3))
1910 1.1 mrg GLOBAL(divdi3):
1911 1.1 mrg pta GLOBAL(udivdi3_internal),tr0
1912 1.1 mrg shari r2,63,r22
1913 1.1 mrg shari r3,63,r23
1914 1.1 mrg xor r2,r22,r2
1915 1.1 mrg xor r3,r23,r3
1916 1.1 mrg sub r2,r22,r2
1917 1.1 mrg sub r3,r23,r3
1918 1.1 mrg beq/u r22,r23,tr0
1919 1.1 mrg ptabs r18,tr1
1920 1.1 mrg blink tr0,r18
1921 1.1 mrg sub r63,r2,r2
1922 1.1 mrg blink tr1,r63
1923 1.1 mrg ENDFUNC(GLOBAL(divdi3))
1924 1.1 mrg #endif /* __SHMEDIA__ */
1925 1.1 mrg #endif /* L_divdi3 */
1926 1.1 mrg
1927 1.1 mrg #ifdef L_umoddi3
1928 1.1 mrg #if __SHMEDIA__
1929 1.1 mrg .mode SHmedia
1930 1.1 mrg .section .text..SHmedia32,"ax"
1931 1.1 mrg .align 2
1932 1.1 mrg .global GLOBAL(umoddi3)
1933 1.1 mrg FUNC(GLOBAL(umoddi3))
1934 1.1 mrg GLOBAL(umoddi3):
1935 1.1 mrg HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1936 1.1 mrg shlri r3,1,r4
1937 1.1 mrg nsb r4,r22
1938 1.1 mrg shlld r3,r22,r6
1939 1.1 mrg shlri r6,49,r5
1940 1.1 mrg movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1941 1.1 mrg sub r21,r5,r1
1942 1.1 mrg mmulfx.w r1,r1,r4
1943 1.1 mrg mshflo.w r1,r63,r1
1944 1.1 mrg sub r63,r22,r20 // r63 == 64 % 64
1945 1.1 mrg mmulfx.w r5,r4,r4
1946 1.1 mrg pta LOCAL(large_divisor),tr0
1947 1.1 mrg addi r20,32,r9
1948 1.1 mrg msub.w r1,r4,r1
1949 1.1 mrg madd.w r1,r1,r1
1950 1.1 mrg mmulfx.w r1,r1,r4
1951 1.1 mrg shlri r6,32,r7
1952 1.1 mrg bgt/u r9,r63,tr0 // large_divisor
1953 1.1 mrg mmulfx.w r5,r4,r4
1954 1.1 mrg shlri r2,32+14,r19
1955 1.1 mrg addi r22,-31,r0
1956 1.1 mrg msub.w r1,r4,r1
1957 1.1 mrg
1958 1.1 mrg mulu.l r1,r7,r4
1959 1.1 mrg addi r1,-3,r5
1960 1.1 mrg mulu.l r5,r19,r5
1961 1.1 mrg sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1962 1.1 mrg shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1963 1.1 mrg the case may be, %0000000000000000 000.11111111111, still */
1964 1.1 mrg muls.l r1,r4,r4 /* leaving at least one sign bit. */
1965 1.1 mrg mulu.l r5,r3,r5
1966 1.1 mrg mshalds.l r1,r21,r1
1967 1.1 mrg shari r4,26,r4
1968 1.1 mrg shlld r5,r0,r5
1969 1.1 mrg add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1970 1.1 mrg sub r2,r5,r2
1971 1.1 mrg /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1972 1.1 mrg
1973 1.1 mrg shlri r2,22,r21
1974 1.1 mrg mulu.l r21,r1,r21
1975 1.1 mrg addi r20,30-22,r0
1976 1.1 mrg /* bubble */ /* could test r3 here to check for divide by zero. */
1977 1.1 mrg shlrd r21,r0,r21
1978 1.1 mrg mulu.l r21,r3,r5
1979 1.1 mrg mcmpgt.l r21,r63,r21 // See Note 1
1980 1.1 mrg addi r20,30,r0
1981 1.1 mrg mshfhi.l r63,r21,r21
1982 1.1 mrg sub r2,r5,r2
1983 1.1 mrg andc r2,r21,r2
1984 1.1 mrg
1985 1.1 mrg /* small divisor: need a third divide step */
1986 1.1 mrg mulu.l r2,r1,r7
1987 1.1 mrg ptabs r18,tr0
1988 1.1 mrg sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1989 1.1 mrg shlrd r7,r0,r7
1990 1.1 mrg mulu.l r7,r3,r5
1991 1.1 mrg /* bubble */
1992 1.1 mrg addi r8,1,r7
1993 1.1 mrg cmpgt r7,r5,r7
1994 1.1 mrg cmvne r7,r8,r2
1995 1.1 mrg sub r2,r5,r2
1996 1.1 mrg blink tr0,r63
1997 1.1 mrg
1998 1.1 mrg LOCAL(large_divisor):
1999 1.1 mrg mmulfx.w r5,r4,r4
2000 1.1 mrg shlrd r2,r9,r25
2001 1.1 mrg shlri r25,32,r8
2002 1.1 mrg msub.w r1,r4,r1
2003 1.1 mrg
2004 1.1 mrg mulu.l r1,r7,r4
2005 1.1 mrg addi r1,-3,r5
2006 1.1 mrg mulu.l r5,r8,r5
2007 1.1 mrg sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
2008 1.1 mrg shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
2009 1.1 mrg the case may be, %0000000000000000 000.11111111111, still */
2010 1.1 mrg muls.l r1,r4,r4 /* leaving at least one sign bit. */
2011 1.1 mrg shlri r5,14-1,r8
2012 1.1 mrg mulu.l r8,r7,r5
2013 1.1 mrg mshalds.l r1,r21,r1
2014 1.1 mrg shari r4,26,r4
2015 1.1 mrg add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
2016 1.1 mrg sub r25,r5,r25
2017 1.1 mrg /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
2018 1.1 mrg
2019 1.1 mrg shlri r25,22,r21
2020 1.1 mrg mulu.l r21,r1,r21
2021 1.1 mrg pta LOCAL(no_lo_adj),tr0
2022 1.1 mrg addi r22,32,r0
2023 1.1 mrg shlri r21,40,r21
2024 1.1 mrg mulu.l r21,r7,r5
2025 1.1 mrg add r8,r21,r8
2026 1.1 mrg shlld r2,r0,r2
2027 1.1 mrg sub r25,r5,r25
2028 1.1 mrg bgtu/u r7,r25,tr0 // no_lo_adj
2029 1.1 mrg addi r8,1,r8
2030 1.1 mrg sub r25,r7,r25
2031 1.1 mrg LOCAL(no_lo_adj):
2032 1.1 mrg mextr4 r2,r25,r2
2033 1.1 mrg
2034 1.1 mrg /* large_divisor: only needs a few adjustments. */
2035 1.1 mrg mulu.l r8,r6,r5
2036 1.1 mrg ptabs r18,tr0
2037 1.1 mrg add r2,r6,r7
2038 1.1 mrg cmpgtu r5,r2,r8
2039 1.1 mrg cmvne r8,r7,r2
2040 1.1 mrg sub r2,r5,r2
2041 1.1 mrg shlrd r2,r22,r2
2042 1.1 mrg blink tr0,r63
2043 1.1 mrg ENDFUNC(GLOBAL(umoddi3))
2044 1.1 mrg /* Note 1: To shift the result of the second divide stage so that the result
2045 1.1 mrg always fits into 32 bits, yet we still reduce the rest sufficiently
2046 1.1 mrg would require a lot of instructions to do the shifts just right. Using
2047 1.1 mrg the full 64 bit shift result to multiply with the divisor would require
2048 1.1 mrg four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
2049 1.1 mrg Fortunately, if the upper 32 bits of the shift result are nonzero, we
2050 1.1 mrg know that the rest after taking this partial result into account will
2051 1.1 mrg fit into 32 bits. So we just clear the upper 32 bits of the rest if the
2052 1.1 mrg upper 32 bits of the partial result are nonzero. */
2053 1.1 mrg #endif /* __SHMEDIA__ */
2054 1.1 mrg #endif /* L_umoddi3 */
2055 1.1 mrg
2056 1.1 mrg #ifdef L_moddi3
2057 1.1 mrg #if __SHMEDIA__
2058 1.1 mrg .mode SHmedia
2059 1.1 mrg .section .text..SHmedia32,"ax"
2060 1.1 mrg .align 2
2061 1.1 mrg .global GLOBAL(moddi3)
2062 1.1 mrg FUNC(GLOBAL(moddi3))
2063 1.1 mrg GLOBAL(moddi3):
2064 1.1 mrg pta GLOBAL(umoddi3_internal),tr0
2065 1.1 mrg shari r2,63,r22
2066 1.1 mrg shari r3,63,r23
2067 1.1 mrg xor r2,r22,r2
2068 1.1 mrg xor r3,r23,r3
2069 1.1 mrg sub r2,r22,r2
2070 1.1 mrg sub r3,r23,r3
2071 1.1 mrg beq/u r22,r63,tr0
2072 1.1 mrg ptabs r18,tr1
2073 1.1 mrg blink tr0,r18
2074 1.1 mrg sub r63,r2,r2
2075 1.1 mrg blink tr1,r63
2076 1.1 mrg ENDFUNC(GLOBAL(moddi3))
2077 1.1 mrg #endif /* __SHMEDIA__ */
2078 1.1 mrg #endif /* L_moddi3 */
2079 1.1 mrg
2080 1.1 mrg #ifdef L_set_fpscr
2081 1.1 mrg #if !defined (__SH2A_NOFPU__)
2082 1.1 mrg #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
2083 1.1 mrg #ifdef __SH5__
2084 1.1 mrg .mode SHcompact
2085 1.1 mrg #endif
2086 1.1 mrg .global GLOBAL(set_fpscr)
2087 1.1 mrg HIDDEN_FUNC(GLOBAL(set_fpscr))
2088 1.1 mrg GLOBAL(set_fpscr):
2089 1.1 mrg lds r4,fpscr
2090 1.1 mrg #ifdef __PIC__
2091 1.1 mrg mov.l r12,@-r15
2092 1.1 mrg #ifdef __vxworks
2093 1.1 mrg mov.l LOCAL(set_fpscr_L0_base),r12
2094 1.1 mrg mov.l LOCAL(set_fpscr_L0_index),r0
2095 1.1 mrg mov.l @r12,r12
2096 1.1 mrg mov.l @(r0,r12),r12
2097 1.1 mrg #else
2098 1.1 mrg mova LOCAL(set_fpscr_L0),r0
2099 1.1 mrg mov.l LOCAL(set_fpscr_L0),r12
2100 1.1 mrg add r0,r12
2101 1.1 mrg #endif
2102 1.1 mrg mov.l LOCAL(set_fpscr_L1),r0
2103 1.1 mrg mov.l @(r0,r12),r1
2104 1.1 mrg mov.l @r15+,r12
2105 1.1 mrg #else
2106 1.1 mrg mov.l LOCAL(set_fpscr_L1),r1
2107 1.1 mrg #endif
2108 1.1 mrg swap.w r4,r0
2109 1.1 mrg or #24,r0
2110 1.1 mrg #ifndef FMOVD_WORKS
2111 1.1 mrg xor #16,r0
2112 1.1 mrg #endif
2113 1.1 mrg #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2114 1.1 mrg swap.w r0,r3
2115 1.1 mrg mov.l r3,@(4,r1)
2116 1.1 mrg #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2117 1.1 mrg swap.w r0,r2
2118 1.1 mrg mov.l r2,@r1
2119 1.1 mrg #endif
2120 1.1 mrg #ifndef FMOVD_WORKS
2121 1.1 mrg xor #8,r0
2122 1.1 mrg #else
2123 1.1 mrg xor #24,r0
2124 1.1 mrg #endif
2125 1.1 mrg #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2126 1.1 mrg swap.w r0,r2
2127 1.1 mrg rts
2128 1.1 mrg mov.l r2,@r1
2129 1.1 mrg #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2130 1.1 mrg swap.w r0,r3
2131 1.1 mrg rts
2132 1.1 mrg mov.l r3,@(4,r1)
2133 1.1 mrg #endif
2134 1.1 mrg .align 2
2135 1.1 mrg #ifdef __PIC__
2136 1.1 mrg #ifdef __vxworks
2137 1.1 mrg LOCAL(set_fpscr_L0_base):
2138 1.1 mrg .long ___GOTT_BASE__
2139 1.1 mrg LOCAL(set_fpscr_L0_index):
2140 1.1 mrg .long ___GOTT_INDEX__
2141 1.1 mrg #else
2142 1.1 mrg LOCAL(set_fpscr_L0):
2143 1.1 mrg .long _GLOBAL_OFFSET_TABLE_
2144 1.1 mrg #endif
2145 1.1 mrg LOCAL(set_fpscr_L1):
2146 1.1 mrg .long GLOBAL(fpscr_values@GOT)
2147 1.1 mrg #else
2148 1.1 mrg LOCAL(set_fpscr_L1):
2149 1.1 mrg .long GLOBAL(fpscr_values)
2150 1.1 mrg #endif
2151 1.1 mrg
2152 1.1 mrg ENDFUNC(GLOBAL(set_fpscr))
2153 1.1 mrg #ifndef NO_FPSCR_VALUES
2154 1.1 mrg #ifdef __ELF__
2155 1.1 mrg .comm GLOBAL(fpscr_values),8,4
2156 1.1 mrg #else
2157 1.1 mrg .comm GLOBAL(fpscr_values),8
2158 1.1 mrg #endif /* ELF */
2159 1.1 mrg #endif /* NO_FPSCR_VALUES */
2160 1.1 mrg #endif /* SH2E / SH3E / SH4 */
2161 1.1 mrg #endif /* __SH2A_NOFPU__ */
2162 1.1 mrg #endif /* L_set_fpscr */
2163 1.1 mrg #ifdef L_ic_invalidate
2164 1.1 mrg #if __SH5__ == 32
2165 1.1 mrg .mode SHmedia
2166 1.1 mrg .section .text..SHmedia32,"ax"
2167 1.1 mrg .align 2
2168 1.1 mrg .global GLOBAL(init_trampoline)
2169 1.1 mrg HIDDEN_FUNC(GLOBAL(init_trampoline))
2170 1.1 mrg GLOBAL(init_trampoline):
2171 1.1 mrg st.l r0,8,r2
2172 1.1 mrg #ifdef __LITTLE_ENDIAN__
2173 1.1 mrg movi 9,r20
2174 1.1 mrg shori 0x402b,r20
2175 1.1 mrg shori 0xd101,r20
2176 1.1 mrg shori 0xd002,r20
2177 1.1 mrg #else
2178 1.1 mrg movi 0xffffffffffffd002,r20
2179 1.1 mrg shori 0xd101,r20
2180 1.1 mrg shori 0x402b,r20
2181 1.1 mrg shori 9,r20
2182 1.1 mrg #endif
2183 1.1 mrg st.q r0,0,r20
2184 1.1 mrg st.l r0,12,r3
2185 1.1 mrg ENDFUNC(GLOBAL(init_trampoline))
2186 1.1 mrg .global GLOBAL(ic_invalidate)
2187 1.1 mrg HIDDEN_FUNC(GLOBAL(ic_invalidate))
2188 1.1 mrg GLOBAL(ic_invalidate):
2189 1.1 mrg ocbwb r0,0
2190 1.1 mrg synco
2191 1.1 mrg icbi r0, 0
2192 1.1 mrg ptabs r18, tr0
2193 1.1 mrg synci
2194 1.1 mrg blink tr0, r63
2195 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate))
2196 1.1 mrg #elif defined(__SH4A__)
2197 1.1 mrg .global GLOBAL(ic_invalidate)
2198 1.1 mrg HIDDEN_FUNC(GLOBAL(ic_invalidate))
2199 1.1 mrg GLOBAL(ic_invalidate):
2200 1.1 mrg ocbwb @r4
2201 1.1 mrg synco
2202 1.1 mrg icbi @r4
2203 1.1 mrg rts
2204 1.1 mrg nop
2205 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate))
2206 1.1 mrg #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2207 1.1 mrg /* For system code, we use ic_invalidate_line_i, but user code
2208 1.1 mrg needs a different mechanism. A kernel call is generally not
2209 1.1 mrg available, and it would also be slow. Different SH4 variants use
2210 1.1 mrg different sizes and associativities of the Icache. We use a small
2211 1.1 mrg bit of dispatch code that can be put hidden in every shared object,
2212 1.1 mrg which calls the actual processor-specific invalidation code in a
2213 1.1 mrg separate module.
2214 1.1 mrg Or if you have operating system support, the OS could mmap the
2215 1.1 mrg procesor-specific code from a single page, since it is highly
2216 1.1 mrg repetitive. */
2217 1.1 mrg .global GLOBAL(ic_invalidate)
2218 1.1 mrg HIDDEN_FUNC(GLOBAL(ic_invalidate))
2219 1.1 mrg GLOBAL(ic_invalidate):
2220 1.1 mrg #ifdef __pic__
2221 1.1 mrg #ifdef __vxworks
2222 1.1 mrg mov.l 1f,r1
2223 1.1 mrg mov.l 2f,r0
2224 1.1 mrg mov.l @r1,r1
2225 1.1 mrg mov.l 0f,r2
2226 1.1 mrg mov.l @(r0,r1),r0
2227 1.1 mrg #else
2228 1.1 mrg mov.l 1f,r1
2229 1.1 mrg mova 1f,r0
2230 1.1 mrg mov.l 0f,r2
2231 1.1 mrg add r1,r0
2232 1.1 mrg #endif
2233 1.1 mrg mov.l @(r0,r2),r1
2234 1.1 mrg #else
2235 1.1 mrg mov.l 0f,r1
2236 1.1 mrg #endif
2237 1.1 mrg ocbwb @r4
2238 1.1 mrg mov.l @(8,r1),r0
2239 1.1 mrg sub r1,r4
2240 1.1 mrg and r4,r0
2241 1.1 mrg add r1,r0
2242 1.1 mrg jmp @r0
2243 1.1 mrg mov.l @(4,r1),r0
2244 1.1 mrg .align 2
2245 1.1 mrg #ifndef __pic__
2246 1.1 mrg 0: .long GLOBAL(ic_invalidate_array)
2247 1.1 mrg #else /* __pic__ */
2248 1.1 mrg .global GLOBAL(ic_invalidate_array)
2249 1.1 mrg 0: .long GLOBAL(ic_invalidate_array)@GOT
2250 1.1 mrg #ifdef __vxworks
2251 1.1 mrg 1: .long ___GOTT_BASE__
2252 1.1 mrg 2: .long ___GOTT_INDEX__
2253 1.1 mrg #else
2254 1.1 mrg 1: .long _GLOBAL_OFFSET_TABLE_
2255 1.1 mrg #endif
2256 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate))
2257 1.1 mrg #endif /* __pic__ */
2258 1.1 mrg #endif /* SH4 */
2259 1.1 mrg #endif /* L_ic_invalidate */
2260 1.1 mrg
2261 1.1 mrg #ifdef L_ic_invalidate_array
2262 1.1 mrg #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2263 1.1 mrg .global GLOBAL(ic_invalidate_array)
2264 1.1 mrg /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2265 1.1 mrg .global GLOBAL(ic_invalidate_array)
2266 1.1 mrg FUNC(GLOBAL(ic_invalidate_array))
2267 1.1 mrg GLOBAL(ic_invalidate_array):
2268 1.1 mrg add r1,r4
2269 1.1 mrg synco
2270 1.1 mrg icbi @r4
2271 1.1 mrg rts
2272 1.1 mrg nop
2273 1.1 mrg .align 2
2274 1.1 mrg .long 0
2275 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate_array))
2276 1.1 mrg #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2277 1.1 mrg .global GLOBAL(ic_invalidate_array)
2278 1.1 mrg .p2align 5
2279 1.1 mrg FUNC(GLOBAL(ic_invalidate_array))
2280 1.1 mrg /* This must be aligned to the beginning of a cache line. */
2281 1.1 mrg GLOBAL(ic_invalidate_array):
2282 1.1 mrg #ifndef WAYS
2283 1.1 mrg #define WAYS 4
2284 1.1 mrg #define WAY_SIZE 0x4000
2285 1.1 mrg #endif
2286 1.1 mrg #if WAYS == 1
2287 1.1 mrg .rept WAY_SIZE * WAYS / 32
2288 1.1 mrg rts
2289 1.1 mrg nop
2290 1.1 mrg .rept 7
2291 1.1 mrg .long WAY_SIZE - 32
2292 1.1 mrg .endr
2293 1.1 mrg .endr
2294 1.1 mrg #elif WAYS <= 6
2295 1.1 mrg .rept WAY_SIZE * WAYS / 32
2296 1.1 mrg braf r0
2297 1.1 mrg add #-8,r0
2298 1.1 mrg .long WAY_SIZE + 8
2299 1.1 mrg .long WAY_SIZE - 32
2300 1.1 mrg .rept WAYS-2
2301 1.1 mrg braf r0
2302 1.1 mrg nop
2303 1.1 mrg .endr
2304 1.1 mrg .rept 7 - WAYS
2305 1.1 mrg rts
2306 1.1 mrg nop
2307 1.1 mrg .endr
2308 1.1 mrg .endr
2309 1.1 mrg #else /* WAYS > 6 */
2310 1.1 mrg /* This variant needs two different pages for mmap-ing. */
2311 1.1 mrg .rept WAYS-1
2312 1.1 mrg .rept WAY_SIZE / 32
2313 1.1 mrg braf r0
2314 1.1 mrg nop
2315 1.1 mrg .long WAY_SIZE
2316 1.1 mrg .rept 6
2317 1.1 mrg .long WAY_SIZE - 32
2318 1.1 mrg .endr
2319 1.1 mrg .endr
2320 1.1 mrg .endr
2321 1.1 mrg .rept WAY_SIZE / 32
2322 1.1 mrg rts
2323 1.1 mrg .rept 15
2324 1.1 mrg nop
2325 1.1 mrg .endr
2326 1.1 mrg .endr
2327 1.1 mrg #endif /* WAYS */
2328 1.1 mrg ENDFUNC(GLOBAL(ic_invalidate_array))
2329 1.1 mrg #endif /* SH4 */
2330 1.1 mrg #endif /* L_ic_invalidate_array */
2331 1.1 mrg
2332 1.1 mrg #if defined (__SH5__) && __SH5__ == 32
2333 1.1 mrg #ifdef L_shcompact_call_trampoline
2334 1.1 mrg .section .rodata
2335 1.1 mrg .align 1
2336 1.1 mrg LOCAL(ct_main_table):
2337 1.1 mrg .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2338 1.1 mrg .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2339 1.1 mrg .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2340 1.1 mrg .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2341 1.1 mrg .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2342 1.1 mrg .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2343 1.1 mrg .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2344 1.1 mrg .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2345 1.1 mrg .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2346 1.1 mrg .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2347 1.1 mrg .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2348 1.1 mrg .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2349 1.1 mrg .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2350 1.1 mrg .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2351 1.1 mrg .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2352 1.1 mrg .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2353 1.1 mrg .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2354 1.1 mrg .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2355 1.1 mrg .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2356 1.1 mrg .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2357 1.1 mrg .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2358 1.1 mrg .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2359 1.1 mrg .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2360 1.1 mrg .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2361 1.1 mrg .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2362 1.1 mrg .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2363 1.1 mrg .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2364 1.1 mrg .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2365 1.1 mrg .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2366 1.1 mrg .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2367 1.1 mrg .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2368 1.1 mrg .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2369 1.1 mrg .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2370 1.1 mrg .mode SHmedia
2371 1.1 mrg .section .text..SHmedia32, "ax"
2372 1.1 mrg .align 2
2373 1.1 mrg
2374 1.1 mrg /* This function loads 64-bit general-purpose registers from the
2375 1.1 mrg stack, from a memory address contained in them or from an FP
2376 1.1 mrg register, according to a cookie passed in r1. Its execution
2377 1.1 mrg time is linear on the number of registers that actually have
2378 1.1 mrg to be copied. See sh.h for details on the actual bit pattern.
2379 1.1 mrg
2380 1.1 mrg The function to be called is passed in r0. If a 32-bit return
2381 1.1 mrg value is expected, the actual function will be tail-called,
2382 1.1 mrg otherwise the return address will be stored in r10 (that the
2383 1.1 mrg caller should expect to be clobbered) and the return value
2384 1.1 mrg will be expanded into r2/r3 upon return. */
2385 1.1 mrg
2386 1.1 mrg .global GLOBAL(GCC_shcompact_call_trampoline)
2387 1.1 mrg FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2388 1.1 mrg GLOBAL(GCC_shcompact_call_trampoline):
2389 1.1 mrg ptabs/l r0, tr0 /* Prepare to call the actual function. */
2390 1.1 mrg movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2391 1.1 mrg pt/l LOCAL(ct_loop), tr1
2392 1.1 mrg addz.l r1, r63, r1
2393 1.1 mrg shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2394 1.1 mrg LOCAL(ct_loop):
2395 1.1 mrg nsb r1, r28
2396 1.1 mrg shlli r28, 1, r29
2397 1.1 mrg ldx.w r0, r29, r30
2398 1.1 mrg LOCAL(ct_main_label):
2399 1.1 mrg ptrel/l r30, tr2
2400 1.1 mrg blink tr2, r63
2401 1.1 mrg LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2402 1.1 mrg /* It must be dr0, so just do it. */
2403 1.1 mrg fmov.dq dr0, r2
2404 1.1 mrg movi 7, r30
2405 1.1 mrg shlli r30, 29, r31
2406 1.1 mrg andc r1, r31, r1
2407 1.1 mrg blink tr1, r63
2408 1.1 mrg LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2409 1.1 mrg /* It is either dr0 or dr2. */
2410 1.1 mrg movi 7, r30
2411 1.1 mrg shlri r1, 26, r32
2412 1.1 mrg shlli r30, 26, r31
2413 1.1 mrg andc r1, r31, r1
2414 1.1 mrg fmov.dq dr0, r3
2415 1.1 mrg beqi/l r32, 4, tr1
2416 1.1 mrg fmov.dq dr2, r3
2417 1.1 mrg blink tr1, r63
2418 1.1 mrg LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2419 1.1 mrg shlri r1, 23 - 3, r34
2420 1.1 mrg andi r34, 3 << 3, r33
2421 1.1 mrg addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2422 1.1 mrg LOCAL(ct_r4_fp_base):
2423 1.1 mrg ptrel/l r32, tr2
2424 1.1 mrg movi 7, r30
2425 1.1 mrg shlli r30, 23, r31
2426 1.1 mrg andc r1, r31, r1
2427 1.1 mrg blink tr2, r63
2428 1.1 mrg LOCAL(ct_r4_fp_copy):
2429 1.1 mrg fmov.dq dr0, r4
2430 1.1 mrg blink tr1, r63
2431 1.1 mrg fmov.dq dr2, r4
2432 1.1 mrg blink tr1, r63
2433 1.1 mrg fmov.dq dr4, r4
2434 1.1 mrg blink tr1, r63
2435 1.1 mrg LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2436 1.1 mrg shlri r1, 20 - 3, r34
2437 1.1 mrg andi r34, 3 << 3, r33
2438 1.1 mrg addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2439 1.1 mrg LOCAL(ct_r5_fp_base):
2440 1.1 mrg ptrel/l r32, tr2
2441 1.1 mrg movi 7, r30
2442 1.1 mrg shlli r30, 20, r31
2443 1.1 mrg andc r1, r31, r1
2444 1.1 mrg blink tr2, r63
2445 1.1 mrg LOCAL(ct_r5_fp_copy):
2446 1.1 mrg fmov.dq dr0, r5
2447 1.1 mrg blink tr1, r63
2448 1.1 mrg fmov.dq dr2, r5
2449 1.1 mrg blink tr1, r63
2450 1.1 mrg fmov.dq dr4, r5
2451 1.1 mrg blink tr1, r63
2452 1.1 mrg fmov.dq dr6, r5
2453 1.1 mrg blink tr1, r63
2454 1.1 mrg LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2455 1.1 mrg /* It must be dr8. */
2456 1.1 mrg fmov.dq dr8, r6
2457 1.1 mrg movi 15, r30
2458 1.1 mrg shlli r30, 16, r31
2459 1.1 mrg andc r1, r31, r1
2460 1.1 mrg blink tr1, r63
2461 1.1 mrg LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2462 1.1 mrg shlri r1, 16 - 3, r34
2463 1.1 mrg andi r34, 3 << 3, r33
2464 1.1 mrg addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2465 1.1 mrg LOCAL(ct_r6_fp_base):
2466 1.1 mrg ptrel/l r32, tr2
2467 1.1 mrg movi 7, r30
2468 1.1 mrg shlli r30, 16, r31
2469 1.1 mrg andc r1, r31, r1
2470 1.1 mrg blink tr2, r63
2471 1.1 mrg LOCAL(ct_r6_fp_copy):
2472 1.1 mrg fmov.dq dr0, r6
2473 1.1 mrg blink tr1, r63
2474 1.1 mrg fmov.dq dr2, r6
2475 1.1 mrg blink tr1, r63
2476 1.1 mrg fmov.dq dr4, r6
2477 1.1 mrg blink tr1, r63
2478 1.1 mrg fmov.dq dr6, r6
2479 1.1 mrg blink tr1, r63
2480 1.1 mrg LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2481 1.1 mrg /* It is either dr8 or dr10. */
2482 1.1 mrg movi 15 << 12, r31
2483 1.1 mrg shlri r1, 12, r32
2484 1.1 mrg andc r1, r31, r1
2485 1.1 mrg fmov.dq dr8, r7
2486 1.1 mrg beqi/l r32, 8, tr1
2487 1.1 mrg fmov.dq dr10, r7
2488 1.1 mrg blink tr1, r63
2489 1.1 mrg LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2490 1.1 mrg shlri r1, 12 - 3, r34
2491 1.1 mrg andi r34, 3 << 3, r33
2492 1.1 mrg addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2493 1.1 mrg LOCAL(ct_r7_fp_base):
2494 1.1 mrg ptrel/l r32, tr2
2495 1.1 mrg movi 7 << 12, r31
2496 1.1 mrg andc r1, r31, r1
2497 1.1 mrg blink tr2, r63
2498 1.1 mrg LOCAL(ct_r7_fp_copy):
2499 1.1 mrg fmov.dq dr0, r7
2500 1.1 mrg blink tr1, r63
2501 1.1 mrg fmov.dq dr2, r7
2502 1.1 mrg blink tr1, r63
2503 1.1 mrg fmov.dq dr4, r7
2504 1.1 mrg blink tr1, r63
2505 1.1 mrg fmov.dq dr6, r7
2506 1.1 mrg blink tr1, r63
2507 1.1 mrg LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2508 1.1 mrg /* It is either dr8 or dr10. */
2509 1.1 mrg movi 15 << 8, r31
2510 1.1 mrg andi r1, 1 << 8, r32
2511 1.1 mrg andc r1, r31, r1
2512 1.1 mrg fmov.dq dr8, r8
2513 1.1 mrg beq/l r32, r63, tr1
2514 1.1 mrg fmov.dq dr10, r8
2515 1.1 mrg blink tr1, r63
2516 1.1 mrg LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2517 1.1 mrg shlri r1, 8 - 3, r34
2518 1.1 mrg andi r34, 3 << 3, r33
2519 1.1 mrg addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2520 1.1 mrg LOCAL(ct_r8_fp_base):
2521 1.1 mrg ptrel/l r32, tr2
2522 1.1 mrg movi 7 << 8, r31
2523 1.1 mrg andc r1, r31, r1
2524 1.1 mrg blink tr2, r63
2525 1.1 mrg LOCAL(ct_r8_fp_copy):
2526 1.1 mrg fmov.dq dr0, r8
2527 1.1 mrg blink tr1, r63
2528 1.1 mrg fmov.dq dr2, r8
2529 1.1 mrg blink tr1, r63
2530 1.1 mrg fmov.dq dr4, r8
2531 1.1 mrg blink tr1, r63
2532 1.1 mrg fmov.dq dr6, r8
2533 1.1 mrg blink tr1, r63
2534 1.1 mrg LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2535 1.1 mrg /* It is either dr8 or dr10. */
2536 1.1 mrg movi 15 << 4, r31
2537 1.1 mrg andi r1, 1 << 4, r32
2538 1.1 mrg andc r1, r31, r1
2539 1.1 mrg fmov.dq dr8, r9
2540 1.1 mrg beq/l r32, r63, tr1
2541 1.1 mrg fmov.dq dr10, r9
2542 1.1 mrg blink tr1, r63
2543 1.1 mrg LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2544 1.1 mrg shlri r1, 4 - 3, r34
2545 1.1 mrg andi r34, 3 << 3, r33
2546 1.1 mrg addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2547 1.1 mrg LOCAL(ct_r9_fp_base):
2548 1.1 mrg ptrel/l r32, tr2
2549 1.1 mrg movi 7 << 4, r31
2550 1.1 mrg andc r1, r31, r1
2551 1.1 mrg blink tr2, r63
2552 1.1 mrg LOCAL(ct_r9_fp_copy):
2553 1.1 mrg fmov.dq dr0, r9
2554 1.1 mrg blink tr1, r63
2555 1.1 mrg fmov.dq dr2, r9
2556 1.1 mrg blink tr1, r63
2557 1.1 mrg fmov.dq dr4, r9
2558 1.1 mrg blink tr1, r63
2559 1.1 mrg fmov.dq dr6, r9
2560 1.1 mrg blink tr1, r63
2561 1.1 mrg LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2562 1.1 mrg pt/l LOCAL(ct_r2_load), tr2
2563 1.1 mrg movi 3, r30
2564 1.1 mrg shlli r30, 29, r31
2565 1.1 mrg and r1, r31, r32
2566 1.1 mrg andc r1, r31, r1
2567 1.1 mrg beq/l r31, r32, tr2
2568 1.1 mrg addi.l r2, 8, r3
2569 1.1 mrg ldx.q r2, r63, r2
2570 1.1 mrg /* Fall through. */
2571 1.1 mrg LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2572 1.1 mrg pt/l LOCAL(ct_r3_load), tr2
2573 1.1 mrg movi 3, r30
2574 1.1 mrg shlli r30, 26, r31
2575 1.1 mrg and r1, r31, r32
2576 1.1 mrg andc r1, r31, r1
2577 1.1 mrg beq/l r31, r32, tr2
2578 1.1 mrg addi.l r3, 8, r4
2579 1.1 mrg ldx.q r3, r63, r3
2580 1.1 mrg LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2581 1.1 mrg pt/l LOCAL(ct_r4_load), tr2
2582 1.1 mrg movi 3, r30
2583 1.1 mrg shlli r30, 23, r31
2584 1.1 mrg and r1, r31, r32
2585 1.1 mrg andc r1, r31, r1
2586 1.1 mrg beq/l r31, r32, tr2
2587 1.1 mrg addi.l r4, 8, r5
2588 1.1 mrg ldx.q r4, r63, r4
2589 1.1 mrg LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2590 1.1 mrg pt/l LOCAL(ct_r5_load), tr2
2591 1.1 mrg movi 3, r30
2592 1.1 mrg shlli r30, 20, r31
2593 1.1 mrg and r1, r31, r32
2594 1.1 mrg andc r1, r31, r1
2595 1.1 mrg beq/l r31, r32, tr2
2596 1.1 mrg addi.l r5, 8, r6
2597 1.1 mrg ldx.q r5, r63, r5
2598 1.1 mrg LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2599 1.1 mrg pt/l LOCAL(ct_r6_load), tr2
2600 1.1 mrg movi 3 << 16, r31
2601 1.1 mrg and r1, r31, r32
2602 1.1 mrg andc r1, r31, r1
2603 1.1 mrg beq/l r31, r32, tr2
2604 1.1 mrg addi.l r6, 8, r7
2605 1.1 mrg ldx.q r6, r63, r6
2606 1.1 mrg LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2607 1.1 mrg pt/l LOCAL(ct_r7_load), tr2
2608 1.1 mrg movi 3 << 12, r31
2609 1.1 mrg and r1, r31, r32
2610 1.1 mrg andc r1, r31, r1
2611 1.1 mrg beq/l r31, r32, tr2
2612 1.1 mrg addi.l r7, 8, r8
2613 1.1 mrg ldx.q r7, r63, r7
2614 1.1 mrg LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2615 1.1 mrg pt/l LOCAL(ct_r8_load), tr2
2616 1.1 mrg movi 3 << 8, r31
2617 1.1 mrg and r1, r31, r32
2618 1.1 mrg andc r1, r31, r1
2619 1.1 mrg beq/l r31, r32, tr2
2620 1.1 mrg addi.l r8, 8, r9
2621 1.1 mrg ldx.q r8, r63, r8
2622 1.1 mrg LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2623 1.1 mrg pt/l LOCAL(ct_check_tramp), tr2
2624 1.1 mrg ldx.q r9, r63, r9
2625 1.1 mrg blink tr2, r63
2626 1.1 mrg LOCAL(ct_r2_load):
2627 1.1 mrg ldx.q r2, r63, r2
2628 1.1 mrg blink tr1, r63
2629 1.1 mrg LOCAL(ct_r3_load):
2630 1.1 mrg ldx.q r3, r63, r3
2631 1.1 mrg blink tr1, r63
2632 1.1 mrg LOCAL(ct_r4_load):
2633 1.1 mrg ldx.q r4, r63, r4
2634 1.1 mrg blink tr1, r63
2635 1.1 mrg LOCAL(ct_r5_load):
2636 1.1 mrg ldx.q r5, r63, r5
2637 1.1 mrg blink tr1, r63
2638 1.1 mrg LOCAL(ct_r6_load):
2639 1.1 mrg ldx.q r6, r63, r6
2640 1.1 mrg blink tr1, r63
2641 1.1 mrg LOCAL(ct_r7_load):
2642 1.1 mrg ldx.q r7, r63, r7
2643 1.1 mrg blink tr1, r63
2644 1.1 mrg LOCAL(ct_r8_load):
2645 1.1 mrg ldx.q r8, r63, r8
2646 1.1 mrg blink tr1, r63
2647 1.1 mrg LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2648 1.1 mrg movi 1, r30
2649 1.1 mrg ldx.q r15, r63, r2
2650 1.1 mrg shlli r30, 29, r31
2651 1.1 mrg addi.l r15, 8, r15
2652 1.1 mrg andc r1, r31, r1
2653 1.1 mrg blink tr1, r63
2654 1.1 mrg LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2655 1.1 mrg movi 1, r30
2656 1.1 mrg ldx.q r15, r63, r3
2657 1.1 mrg shlli r30, 26, r31
2658 1.1 mrg addi.l r15, 8, r15
2659 1.1 mrg andc r1, r31, r1
2660 1.1 mrg blink tr1, r63
2661 1.1 mrg LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2662 1.1 mrg movi 1, r30
2663 1.1 mrg ldx.q r15, r63, r4
2664 1.1 mrg shlli r30, 23, r31
2665 1.1 mrg addi.l r15, 8, r15
2666 1.1 mrg andc r1, r31, r1
2667 1.1 mrg blink tr1, r63
2668 1.1 mrg LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2669 1.1 mrg movi 1, r30
2670 1.1 mrg ldx.q r15, r63, r5
2671 1.1 mrg shlli r30, 20, r31
2672 1.1 mrg addi.l r15, 8, r15
2673 1.1 mrg andc r1, r31, r1
2674 1.1 mrg blink tr1, r63
2675 1.1 mrg LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2676 1.1 mrg movi 1, r30
2677 1.1 mrg ldx.q r15, r63, r6
2678 1.1 mrg shlli r30, 16, r31
2679 1.1 mrg addi.l r15, 8, r15
2680 1.1 mrg andc r1, r31, r1
2681 1.1 mrg blink tr1, r63
2682 1.1 mrg LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2683 1.1 mrg ldx.q r15, r63, r7
2684 1.1 mrg movi 1 << 12, r31
2685 1.1 mrg addi.l r15, 8, r15
2686 1.1 mrg andc r1, r31, r1
2687 1.1 mrg blink tr1, r63
2688 1.1 mrg LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2689 1.1 mrg ldx.q r15, r63, r8
2690 1.1 mrg movi 1 << 8, r31
2691 1.1 mrg addi.l r15, 8, r15
2692 1.1 mrg andc r1, r31, r1
2693 1.1 mrg blink tr1, r63
2694 1.1 mrg LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2695 1.1 mrg andi r1, 7 << 1, r30
2696 1.1 mrg movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2697 1.1 mrg shlli r30, 2, r31
2698 1.1 mrg shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2699 1.1 mrg sub.l r32, r31, r33
2700 1.1 mrg ptabs/l r33, tr2
2701 1.1 mrg blink tr2, r63
2702 1.1 mrg LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2703 1.1 mrg ldx.q r15, r63, r3
2704 1.1 mrg addi.l r15, 8, r15
2705 1.1 mrg ldx.q r15, r63, r4
2706 1.1 mrg addi.l r15, 8, r15
2707 1.1 mrg ldx.q r15, r63, r5
2708 1.1 mrg addi.l r15, 8, r15
2709 1.1 mrg ldx.q r15, r63, r6
2710 1.1 mrg addi.l r15, 8, r15
2711 1.1 mrg ldx.q r15, r63, r7
2712 1.1 mrg addi.l r15, 8, r15
2713 1.1 mrg ldx.q r15, r63, r8
2714 1.1 mrg addi.l r15, 8, r15
2715 1.1 mrg LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2716 1.1 mrg ldx.q r15, r63, r9
2717 1.1 mrg addi.l r15, 8, r15
2718 1.1 mrg LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2719 1.1 mrg LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2720 1.1 mrg pt/u LOCAL(ct_ret_wide), tr2
2721 1.1 mrg andi r1, 1, r1
2722 1.1 mrg bne/u r1, r63, tr2
2723 1.1 mrg LOCAL(ct_call_func): /* Just branch to the function. */
2724 1.1 mrg blink tr0, r63
2725 1.1 mrg LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2726 1.1 mrg 64-bit return value. */
2727 1.1 mrg add.l r18, r63, r10
2728 1.1 mrg blink tr0, r18
2729 1.1 mrg ptabs r10, tr0
2730 1.1 mrg #if __LITTLE_ENDIAN__
2731 1.1 mrg shari r2, 32, r3
2732 1.1 mrg add.l r2, r63, r2
2733 1.1 mrg #else
2734 1.1 mrg add.l r2, r63, r3
2735 1.1 mrg shari r2, 32, r2
2736 1.1 mrg #endif
2737 1.1 mrg blink tr0, r63
2738 1.1 mrg
2739 1.1 mrg ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2740 1.1 mrg #endif /* L_shcompact_call_trampoline */
2741 1.1 mrg
2742 1.1 mrg #ifdef L_shcompact_return_trampoline
2743 1.1 mrg /* This function does the converse of the code in `ret_wide'
2744 1.1 mrg above. It is tail-called by SHcompact functions returning
2745 1.1 mrg 64-bit non-floating-point values, to pack the 32-bit values in
2746 1.1 mrg r2 and r3 into r2. */
2747 1.1 mrg
2748 1.1 mrg .mode SHmedia
2749 1.1 mrg .section .text..SHmedia32, "ax"
2750 1.1 mrg .align 2
2751 1.1 mrg .global GLOBAL(GCC_shcompact_return_trampoline)
2752 1.1 mrg HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2753 1.1 mrg GLOBAL(GCC_shcompact_return_trampoline):
2754 1.1 mrg ptabs/l r18, tr0
2755 1.1 mrg #if __LITTLE_ENDIAN__
2756 1.1 mrg addz.l r2, r63, r2
2757 1.1 mrg shlli r3, 32, r3
2758 1.1 mrg #else
2759 1.1 mrg addz.l r3, r63, r3
2760 1.1 mrg shlli r2, 32, r2
2761 1.1 mrg #endif
2762 1.1 mrg or r3, r2, r2
2763 1.1 mrg blink tr0, r63
2764 1.1 mrg
2765 1.1 mrg ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2766 1.1 mrg #endif /* L_shcompact_return_trampoline */
2767 1.1 mrg
2768 1.1 mrg #ifdef L_shcompact_incoming_args
2769 1.1 mrg .section .rodata
2770 1.1 mrg .align 1
2771 1.1 mrg LOCAL(ia_main_table):
2772 1.1 mrg .word 1 /* Invalid, just loop */
2773 1.1 mrg .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2774 1.1 mrg .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2775 1.1 mrg .word 1 /* Invalid, just loop */
2776 1.1 mrg .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2777 1.1 mrg .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2778 1.1 mrg .word 1 /* Invalid, just loop */
2779 1.1 mrg .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2780 1.1 mrg .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2781 1.1 mrg .word 1 /* Invalid, just loop */
2782 1.1 mrg .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2783 1.1 mrg .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2784 1.1 mrg .word 1 /* Invalid, just loop */
2785 1.1 mrg .word 1 /* Invalid, just loop */
2786 1.1 mrg .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2787 1.1 mrg .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2788 1.1 mrg .word 1 /* Invalid, just loop */
2789 1.1 mrg .word 1 /* Invalid, just loop */
2790 1.1 mrg .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2791 1.1 mrg .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2792 1.1 mrg .word 1 /* Invalid, just loop */
2793 1.1 mrg .word 1 /* Invalid, just loop */
2794 1.1 mrg .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2795 1.1 mrg .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2796 1.1 mrg .word 1 /* Invalid, just loop */
2797 1.1 mrg .word 1 /* Invalid, just loop */
2798 1.1 mrg .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2799 1.1 mrg .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2800 1.1 mrg .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2801 1.1 mrg .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2802 1.1 mrg .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2803 1.1 mrg .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2804 1.1 mrg .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2805 1.1 mrg .mode SHmedia
2806 1.1 mrg .section .text..SHmedia32, "ax"
2807 1.1 mrg .align 2
2808 1.1 mrg
2809 1.1 mrg /* This function stores 64-bit general-purpose registers back in
2810 1.1 mrg the stack, and loads the address in which each register
2811 1.1 mrg was stored into itself. The lower 32 bits of r17 hold the address
2812 1.1 mrg to begin storing, and the upper 32 bits of r17 hold the cookie.
2813 1.1 mrg Its execution time is linear on the
2814 1.1 mrg number of registers that actually have to be copied, and it is
2815 1.1 mrg optimized for structures larger than 64 bits, as opposed to
2816 1.1 mrg individual `long long' arguments. See sh.h for details on the
2817 1.1 mrg actual bit pattern. */
2818 1.1 mrg
2819 1.1 mrg .global GLOBAL(GCC_shcompact_incoming_args)
2820 1.1 mrg FUNC(GLOBAL(GCC_shcompact_incoming_args))
2821 1.1 mrg GLOBAL(GCC_shcompact_incoming_args):
2822 1.1 mrg ptabs/l r18, tr0 /* Prepare to return. */
2823 1.1 mrg shlri r17, 32, r0 /* Load the cookie. */
2824 1.1 mrg movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2825 1.1 mrg pt/l LOCAL(ia_loop), tr1
2826 1.1 mrg add.l r17, r63, r17
2827 1.1 mrg shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2828 1.1 mrg LOCAL(ia_loop):
2829 1.1 mrg nsb r0, r36
2830 1.1 mrg shlli r36, 1, r37
2831 1.1 mrg ldx.w r43, r37, r38
2832 1.1 mrg LOCAL(ia_main_label):
2833 1.1 mrg ptrel/l r38, tr2
2834 1.1 mrg blink tr2, r63
2835 1.1 mrg LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2836 1.1 mrg movi 3, r38
2837 1.1 mrg shlli r38, 29, r39
2838 1.1 mrg and r0, r39, r40
2839 1.1 mrg andc r0, r39, r0
2840 1.1 mrg stx.q r17, r63, r2
2841 1.1 mrg add.l r17, r63, r2
2842 1.1 mrg addi.l r17, 8, r17
2843 1.1 mrg beq/u r39, r40, tr1
2844 1.1 mrg LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2845 1.1 mrg movi 3, r38
2846 1.1 mrg shlli r38, 26, r39
2847 1.1 mrg and r0, r39, r40
2848 1.1 mrg andc r0, r39, r0
2849 1.1 mrg stx.q r17, r63, r3
2850 1.1 mrg add.l r17, r63, r3
2851 1.1 mrg addi.l r17, 8, r17
2852 1.1 mrg beq/u r39, r40, tr1
2853 1.1 mrg LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2854 1.1 mrg movi 3, r38
2855 1.1 mrg shlli r38, 23, r39
2856 1.1 mrg and r0, r39, r40
2857 1.1 mrg andc r0, r39, r0
2858 1.1 mrg stx.q r17, r63, r4
2859 1.1 mrg add.l r17, r63, r4
2860 1.1 mrg addi.l r17, 8, r17
2861 1.1 mrg beq/u r39, r40, tr1
2862 1.1 mrg LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2863 1.1 mrg movi 3, r38
2864 1.1 mrg shlli r38, 20, r39
2865 1.1 mrg and r0, r39, r40
2866 1.1 mrg andc r0, r39, r0
2867 1.1 mrg stx.q r17, r63, r5
2868 1.1 mrg add.l r17, r63, r5
2869 1.1 mrg addi.l r17, 8, r17
2870 1.1 mrg beq/u r39, r40, tr1
2871 1.1 mrg LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2872 1.1 mrg movi 3, r38
2873 1.1 mrg shlli r38, 16, r39
2874 1.1 mrg and r0, r39, r40
2875 1.1 mrg andc r0, r39, r0
2876 1.1 mrg stx.q r17, r63, r6
2877 1.1 mrg add.l r17, r63, r6
2878 1.1 mrg addi.l r17, 8, r17
2879 1.1 mrg beq/u r39, r40, tr1
2880 1.1 mrg LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2881 1.1 mrg movi 3 << 12, r39
2882 1.1 mrg and r0, r39, r40
2883 1.1 mrg andc r0, r39, r0
2884 1.1 mrg stx.q r17, r63, r7
2885 1.1 mrg add.l r17, r63, r7
2886 1.1 mrg addi.l r17, 8, r17
2887 1.1 mrg beq/u r39, r40, tr1
2888 1.1 mrg LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2889 1.1 mrg movi 3 << 8, r39
2890 1.1 mrg and r0, r39, r40
2891 1.1 mrg andc r0, r39, r0
2892 1.1 mrg stx.q r17, r63, r8
2893 1.1 mrg add.l r17, r63, r8
2894 1.1 mrg addi.l r17, 8, r17
2895 1.1 mrg beq/u r39, r40, tr1
2896 1.1 mrg LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2897 1.1 mrg stx.q r17, r63, r9
2898 1.1 mrg add.l r17, r63, r9
2899 1.1 mrg blink tr0, r63
2900 1.1 mrg LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2901 1.1 mrg movi 1, r38
2902 1.1 mrg shlli r38, 29, r39
2903 1.1 mrg andc r0, r39, r0
2904 1.1 mrg stx.q r17, r63, r2
2905 1.1 mrg addi.l r17, 8, r17
2906 1.1 mrg blink tr1, r63
2907 1.1 mrg LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2908 1.1 mrg movi 1, r38
2909 1.1 mrg shlli r38, 26, r39
2910 1.1 mrg andc r0, r39, r0
2911 1.1 mrg stx.q r17, r63, r3
2912 1.1 mrg addi.l r17, 8, r17
2913 1.1 mrg blink tr1, r63
2914 1.1 mrg LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2915 1.1 mrg movi 1, r38
2916 1.1 mrg shlli r38, 23, r39
2917 1.1 mrg andc r0, r39, r0
2918 1.1 mrg stx.q r17, r63, r4
2919 1.1 mrg addi.l r17, 8, r17
2920 1.1 mrg blink tr1, r63
2921 1.1 mrg LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2922 1.1 mrg movi 1, r38
2923 1.1 mrg shlli r38, 20, r39
2924 1.1 mrg andc r0, r39, r0
2925 1.1 mrg stx.q r17, r63, r5
2926 1.1 mrg addi.l r17, 8, r17
2927 1.1 mrg blink tr1, r63
2928 1.1 mrg LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2929 1.1 mrg movi 1, r38
2930 1.1 mrg shlli r38, 16, r39
2931 1.1 mrg andc r0, r39, r0
2932 1.1 mrg stx.q r17, r63, r6
2933 1.1 mrg addi.l r17, 8, r17
2934 1.1 mrg blink tr1, r63
2935 1.1 mrg LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2936 1.1 mrg movi 1 << 12, r39
2937 1.1 mrg andc r0, r39, r0
2938 1.1 mrg stx.q r17, r63, r7
2939 1.1 mrg addi.l r17, 8, r17
2940 1.1 mrg blink tr1, r63
2941 1.1 mrg LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2942 1.1 mrg movi 1 << 8, r39
2943 1.1 mrg andc r0, r39, r0
2944 1.1 mrg stx.q r17, r63, r8
2945 1.1 mrg addi.l r17, 8, r17
2946 1.1 mrg blink tr1, r63
2947 1.1 mrg LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2948 1.1 mrg andi r0, 7 << 1, r38
2949 1.1 mrg movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2950 1.1 mrg shlli r38, 2, r39
2951 1.1 mrg shori LOCAL(ia_end_of_push_seq) & 65535, r40
2952 1.1 mrg sub.l r40, r39, r41
2953 1.1 mrg ptabs/l r41, tr2
2954 1.1 mrg blink tr2, r63
2955 1.1 mrg LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2956 1.1 mrg stx.q r17, r63, r3
2957 1.1 mrg addi.l r17, 8, r17
2958 1.1 mrg stx.q r17, r63, r4
2959 1.1 mrg addi.l r17, 8, r17
2960 1.1 mrg stx.q r17, r63, r5
2961 1.1 mrg addi.l r17, 8, r17
2962 1.1 mrg stx.q r17, r63, r6
2963 1.1 mrg addi.l r17, 8, r17
2964 1.1 mrg stx.q r17, r63, r7
2965 1.1 mrg addi.l r17, 8, r17
2966 1.1 mrg stx.q r17, r63, r8
2967 1.1 mrg addi.l r17, 8, r17
2968 1.1 mrg LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2969 1.1 mrg stx.q r17, r63, r9
2970 1.1 mrg LOCAL(ia_return): /* Return. */
2971 1.1 mrg blink tr0, r63
2972 1.1 mrg LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2973 1.1 mrg ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2974 1.1 mrg #endif /* L_shcompact_incoming_args */
2975 1.1 mrg #endif
2976 1.1 mrg #if __SH5__
2977 1.1 mrg #ifdef L_nested_trampoline
2978 1.1 mrg #if __SH5__ == 32
2979 1.1 mrg .section .text..SHmedia32,"ax"
2980 1.1 mrg #else
2981 1.1 mrg .text
2982 1.1 mrg #endif
2983 1.1 mrg .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2984 1.1 mrg .global GLOBAL(GCC_nested_trampoline)
2985 1.1 mrg HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2986 1.1 mrg GLOBAL(GCC_nested_trampoline):
2987 1.1 mrg .mode SHmedia
2988 1.1 mrg ptrel/u r63, tr0
2989 1.1 mrg gettr tr0, r0
2990 1.1 mrg #if __SH5__ == 64
2991 1.1 mrg ld.q r0, 24, r1
2992 1.1 mrg #else
2993 1.1 mrg ld.l r0, 24, r1
2994 1.1 mrg #endif
2995 1.1 mrg ptabs/l r1, tr1
2996 1.1 mrg #if __SH5__ == 64
2997 1.1 mrg ld.q r0, 32, r1
2998 1.1 mrg #else
2999 1.1 mrg ld.l r0, 28, r1
3000 1.1 mrg #endif
3001 1.1 mrg blink tr1, r63
3002 1.1 mrg
3003 1.1 mrg ENDFUNC(GLOBAL(GCC_nested_trampoline))
3004 1.1 mrg #endif /* L_nested_trampoline */
3005 1.1 mrg #endif /* __SH5__ */
3006 1.1 mrg #if __SH5__ == 32
3007 1.1 mrg #ifdef L_push_pop_shmedia_regs
3008 1.1 mrg .section .text..SHmedia32,"ax"
3009 1.1 mrg .mode SHmedia
3010 1.1 mrg .align 2
3011 1.1 mrg #ifndef __SH4_NOFPU__
3012 1.1 mrg .global GLOBAL(GCC_push_shmedia_regs)
3013 1.1 mrg FUNC(GLOBAL(GCC_push_shmedia_regs))
3014 1.1 mrg GLOBAL(GCC_push_shmedia_regs):
3015 1.1 mrg addi.l r15, -14*8, r15
3016 1.1 mrg fst.d r15, 13*8, dr62
3017 1.1 mrg fst.d r15, 12*8, dr60
3018 1.1 mrg fst.d r15, 11*8, dr58
3019 1.1 mrg fst.d r15, 10*8, dr56
3020 1.1 mrg fst.d r15, 9*8, dr54
3021 1.1 mrg fst.d r15, 8*8, dr52
3022 1.1 mrg fst.d r15, 7*8, dr50
3023 1.1 mrg fst.d r15, 6*8, dr48
3024 1.1 mrg fst.d r15, 5*8, dr46
3025 1.1 mrg fst.d r15, 4*8, dr44
3026 1.1 mrg fst.d r15, 3*8, dr42
3027 1.1 mrg fst.d r15, 2*8, dr40
3028 1.1 mrg fst.d r15, 1*8, dr38
3029 1.1 mrg fst.d r15, 0*8, dr36
3030 1.1 mrg #else /* ! __SH4_NOFPU__ */
3031 1.1 mrg .global GLOBAL(GCC_push_shmedia_regs_nofpu)
3032 1.1 mrg FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
3033 1.1 mrg GLOBAL(GCC_push_shmedia_regs_nofpu):
3034 1.1 mrg #endif /* ! __SH4_NOFPU__ */
3035 1.1 mrg ptabs/l r18, tr0
3036 1.1 mrg addi.l r15, -27*8, r15
3037 1.1 mrg gettr tr7, r62
3038 1.1 mrg gettr tr6, r61
3039 1.1 mrg gettr tr5, r60
3040 1.1 mrg st.q r15, 26*8, r62
3041 1.1 mrg st.q r15, 25*8, r61
3042 1.1 mrg st.q r15, 24*8, r60
3043 1.1 mrg st.q r15, 23*8, r59
3044 1.1 mrg st.q r15, 22*8, r58
3045 1.1 mrg st.q r15, 21*8, r57
3046 1.1 mrg st.q r15, 20*8, r56
3047 1.1 mrg st.q r15, 19*8, r55
3048 1.1 mrg st.q r15, 18*8, r54
3049 1.1 mrg st.q r15, 17*8, r53
3050 1.1 mrg st.q r15, 16*8, r52
3051 1.1 mrg st.q r15, 15*8, r51
3052 1.1 mrg st.q r15, 14*8, r50
3053 1.1 mrg st.q r15, 13*8, r49
3054 1.1 mrg st.q r15, 12*8, r48
3055 1.1 mrg st.q r15, 11*8, r47
3056 1.1 mrg st.q r15, 10*8, r46
3057 1.1 mrg st.q r15, 9*8, r45
3058 1.1 mrg st.q r15, 8*8, r44
3059 1.1 mrg st.q r15, 7*8, r35
3060 1.1 mrg st.q r15, 6*8, r34
3061 1.1 mrg st.q r15, 5*8, r33
3062 1.1 mrg st.q r15, 4*8, r32
3063 1.1 mrg st.q r15, 3*8, r31
3064 1.1 mrg st.q r15, 2*8, r30
3065 1.1 mrg st.q r15, 1*8, r29
3066 1.1 mrg st.q r15, 0*8, r28
3067 1.1 mrg blink tr0, r63
3068 1.1 mrg #ifndef __SH4_NOFPU__
3069 1.1 mrg ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
3070 1.1 mrg #else
3071 1.1 mrg ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
3072 1.1 mrg #endif
3073 1.1 mrg #ifndef __SH4_NOFPU__
3074 1.1 mrg .global GLOBAL(GCC_pop_shmedia_regs)
3075 1.1 mrg FUNC(GLOBAL(GCC_pop_shmedia_regs))
3076 1.1 mrg GLOBAL(GCC_pop_shmedia_regs):
3077 1.1 mrg pt .L0, tr1
3078 1.1 mrg movi 41*8, r0
3079 1.1 mrg fld.d r15, 40*8, dr62
3080 1.1 mrg fld.d r15, 39*8, dr60
3081 1.1 mrg fld.d r15, 38*8, dr58
3082 1.1 mrg fld.d r15, 37*8, dr56
3083 1.1 mrg fld.d r15, 36*8, dr54
3084 1.1 mrg fld.d r15, 35*8, dr52
3085 1.1 mrg fld.d r15, 34*8, dr50
3086 1.1 mrg fld.d r15, 33*8, dr48
3087 1.1 mrg fld.d r15, 32*8, dr46
3088 1.1 mrg fld.d r15, 31*8, dr44
3089 1.1 mrg fld.d r15, 30*8, dr42
3090 1.1 mrg fld.d r15, 29*8, dr40
3091 1.1 mrg fld.d r15, 28*8, dr38
3092 1.1 mrg fld.d r15, 27*8, dr36
3093 1.1 mrg blink tr1, r63
3094 1.1 mrg #else /* ! __SH4_NOFPU__ */
3095 1.1 mrg .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
3096 1.1 mrg FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3097 1.1 mrg GLOBAL(GCC_pop_shmedia_regs_nofpu):
3098 1.1 mrg #endif /* ! __SH4_NOFPU__ */
3099 1.1 mrg movi 27*8, r0
3100 1.1 mrg .L0:
3101 1.1 mrg ptabs r18, tr0
3102 1.1 mrg ld.q r15, 26*8, r62
3103 1.1 mrg ld.q r15, 25*8, r61
3104 1.1 mrg ld.q r15, 24*8, r60
3105 1.1 mrg ptabs r62, tr7
3106 1.1 mrg ptabs r61, tr6
3107 1.1 mrg ptabs r60, tr5
3108 1.1 mrg ld.q r15, 23*8, r59
3109 1.1 mrg ld.q r15, 22*8, r58
3110 1.1 mrg ld.q r15, 21*8, r57
3111 1.1 mrg ld.q r15, 20*8, r56
3112 1.1 mrg ld.q r15, 19*8, r55
3113 1.1 mrg ld.q r15, 18*8, r54
3114 1.1 mrg ld.q r15, 17*8, r53
3115 1.1 mrg ld.q r15, 16*8, r52
3116 1.1 mrg ld.q r15, 15*8, r51
3117 1.1 mrg ld.q r15, 14*8, r50
3118 1.1 mrg ld.q r15, 13*8, r49
3119 1.1 mrg ld.q r15, 12*8, r48
3120 1.1 mrg ld.q r15, 11*8, r47
3121 1.1 mrg ld.q r15, 10*8, r46
3122 1.1 mrg ld.q r15, 9*8, r45
3123 1.1 mrg ld.q r15, 8*8, r44
3124 1.1 mrg ld.q r15, 7*8, r35
3125 1.1 mrg ld.q r15, 6*8, r34
3126 1.1 mrg ld.q r15, 5*8, r33
3127 1.1 mrg ld.q r15, 4*8, r32
3128 1.1 mrg ld.q r15, 3*8, r31
3129 1.1 mrg ld.q r15, 2*8, r30
3130 1.1 mrg ld.q r15, 1*8, r29
3131 1.1 mrg ld.q r15, 0*8, r28
3132 1.1 mrg add.l r15, r0, r15
3133 1.1 mrg blink tr0, r63
3134 1.1 mrg
3135 1.1 mrg #ifndef __SH4_NOFPU__
3136 1.1 mrg ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3137 1.1 mrg #else
3138 1.1 mrg ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3139 1.1 mrg #endif
3140 1.1 mrg #endif /* __SH5__ == 32 */
3141 1.1 mrg #endif /* L_push_pop_shmedia_regs */
3142 1.1 mrg
3143 1.1 mrg #ifdef L_div_table
3144 1.1 mrg #if __SH5__
3145 1.1 mrg #if defined(__pic__) && __SHMEDIA__
3146 1.1 mrg .global GLOBAL(sdivsi3)
3147 1.1 mrg FUNC(GLOBAL(sdivsi3))
3148 1.1 mrg #if __SH5__ == 32
3149 1.1 mrg .section .text..SHmedia32,"ax"
3150 1.1 mrg #else
3151 1.1 mrg .text
3152 1.1 mrg #endif
3153 1.1 mrg #if 0
3154 1.1 mrg /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3155 1.1 mrg in a text section does not work (at least for shared libraries):
3156 1.1 mrg the linker sets the LSB of the address as if this was SHmedia code. */
3157 1.1 mrg #define TEXT_DATA_BUG
3158 1.1 mrg #endif
3159 1.1 mrg .align 2
3160 1.1 mrg // inputs: r4,r5
3161 1.1 mrg // clobbered: r1,r18,r19,r20,r21,r25,tr0
3162 1.1 mrg // result in r0
3163 1.1 mrg .global GLOBAL(sdivsi3)
3164 1.1 mrg GLOBAL(sdivsi3):
3165 1.1 mrg #ifdef TEXT_DATA_BUG
3166 1.1 mrg ptb datalabel Local_div_table,tr0
3167 1.1 mrg #else
3168 1.1 mrg ptb GLOBAL(div_table_internal),tr0
3169 1.1 mrg #endif
3170 1.1 mrg nsb r5, r1
3171 1.1 mrg shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3172 1.1 mrg shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3173 1.1 mrg /* bubble */
3174 1.1 mrg gettr tr0,r20
3175 1.1 mrg ldx.ub r20, r21, r19 // u0.8
3176 1.1 mrg shari r25, 32, r25 // normalize to s2.30
3177 1.1 mrg shlli r21, 1, r21
3178 1.1 mrg muls.l r25, r19, r19 // s2.38
3179 1.1 mrg ldx.w r20, r21, r21 // s2.14
3180 1.1 mrg ptabs r18, tr0
3181 1.1 mrg shari r19, 24, r19 // truncate to s2.14
3182 1.1 mrg sub r21, r19, r19 // some 11 bit inverse in s1.14
3183 1.1 mrg muls.l r19, r19, r21 // u0.28
3184 1.1 mrg sub r63, r1, r1
3185 1.1 mrg addi r1, 92, r1
3186 1.1 mrg muls.l r25, r21, r18 // s2.58
3187 1.1 mrg shlli r19, 45, r19 // multiply by two and convert to s2.58
3188 1.1 mrg /* bubble */
3189 1.1 mrg sub r19, r18, r18
3190 1.1 mrg shari r18, 28, r18 // some 22 bit inverse in s1.30
3191 1.1 mrg muls.l r18, r25, r0 // s2.60
3192 1.1 mrg muls.l r18, r4, r25 // s32.30
3193 1.1 mrg /* bubble */
3194 1.1 mrg shari r0, 16, r19 // s-16.44
3195 1.1 mrg muls.l r19, r18, r19 // s-16.74
3196 1.1 mrg shari r25, 63, r0
3197 1.1 mrg shari r4, 14, r18 // s19.-14
3198 1.1 mrg shari r19, 30, r19 // s-16.44
3199 1.1 mrg muls.l r19, r18, r19 // s15.30
3200 1.1 mrg xor r21, r0, r21 // You could also use the constant 1 << 27.
3201 1.1 mrg add r21, r25, r21
3202 1.1 mrg sub r21, r19, r21
3203 1.1 mrg shard r21, r1, r21
3204 1.1 mrg sub r21, r0, r0
3205 1.1 mrg blink tr0, r63
3206 1.1 mrg ENDFUNC(GLOBAL(sdivsi3))
3207 1.1 mrg /* This table has been generated by divtab.c .
3208 1.1 mrg Defects for bias -330:
3209 1.1 mrg Max defect: 6.081536e-07 at -1.000000e+00
3210 1.1 mrg Min defect: 2.849516e-08 at 1.030651e+00
3211 1.1 mrg Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3212 1.1 mrg Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3213 1.1 mrg Defect at 1: 1.238659e-07
3214 1.1 mrg Defect at -2: 1.061708e-07 */
3215 1.1 mrg #else /* ! __pic__ || ! __SHMEDIA__ */
3216 1.1 mrg .section .rodata
3217 1.1 mrg #endif /* __pic__ */
3218 1.1 mrg #if defined(TEXT_DATA_BUG) && defined(__pic__) && __SHMEDIA__
3219 1.1 mrg .balign 2
3220 1.1 mrg .type Local_div_table,@object
3221 1.1 mrg .size Local_div_table,128
3222 1.1 mrg /* negative division constants */
3223 1.1 mrg .word -16638
3224 1.1 mrg .word -17135
3225 1.1 mrg .word -17737
3226 1.1 mrg .word -18433
3227 1.1 mrg .word -19103
3228 1.1 mrg .word -19751
3229 1.1 mrg .word -20583
3230 1.1 mrg .word -21383
3231 1.1 mrg .word -22343
3232 1.1 mrg .word -23353
3233 1.1 mrg .word -24407
3234 1.1 mrg .word -25582
3235 1.1 mrg .word -26863
3236 1.1 mrg .word -28382
3237 1.1 mrg .word -29965
3238 1.1 mrg .word -31800
3239 1.1 mrg /* negative division factors */
3240 1.1 mrg .byte 66
3241 1.1 mrg .byte 70
3242 1.1 mrg .byte 75
3243 1.1 mrg .byte 81
3244 1.1 mrg .byte 87
3245 1.1 mrg .byte 93
3246 1.1 mrg .byte 101
3247 1.1 mrg .byte 109
3248 1.1 mrg .byte 119
3249 1.1 mrg .byte 130
3250 1.1 mrg .byte 142
3251 1.1 mrg .byte 156
3252 1.1 mrg .byte 172
3253 1.1 mrg .byte 192
3254 1.1 mrg .byte 214
3255 1.1 mrg .byte 241
3256 1.1 mrg .skip 16
3257 1.1 mrg Local_div_table:
3258 1.1 mrg .skip 16
3259 1.1 mrg /* positive division factors */
3260 1.1 mrg .byte 241
3261 1.1 mrg .byte 214
3262 1.1 mrg .byte 192
3263 1.1 mrg .byte 172
3264 1.1 mrg .byte 156
3265 1.1 mrg .byte 142
3266 1.1 mrg .byte 130
3267 1.1 mrg .byte 119
3268 1.1 mrg .byte 109
3269 1.1 mrg .byte 101
3270 1.1 mrg .byte 93
3271 1.1 mrg .byte 87
3272 1.1 mrg .byte 81
3273 1.1 mrg .byte 75
3274 1.1 mrg .byte 70
3275 1.1 mrg .byte 66
3276 1.1 mrg /* positive division constants */
3277 1.1 mrg .word 31801
3278 1.1 mrg .word 29966
3279 1.1 mrg .word 28383
3280 1.1 mrg .word 26864
3281 1.1 mrg .word 25583
3282 1.1 mrg .word 24408
3283 1.1 mrg .word 23354
3284 1.1 mrg .word 22344
3285 1.1 mrg .word 21384
3286 1.1 mrg .word 20584
3287 1.1 mrg .word 19752
3288 1.1 mrg .word 19104
3289 1.1 mrg .word 18434
3290 1.1 mrg .word 17738
3291 1.1 mrg .word 17136
3292 1.1 mrg .word 16639
3293 1.1 mrg .section .rodata
3294 1.1 mrg #endif /* TEXT_DATA_BUG */
3295 1.1 mrg .balign 2
3296 1.1 mrg .type GLOBAL(div_table),@object
3297 1.1 mrg .size GLOBAL(div_table),128
3298 1.1 mrg /* negative division constants */
3299 1.1 mrg .word -16638
3300 1.1 mrg .word -17135
3301 1.1 mrg .word -17737
3302 1.1 mrg .word -18433
3303 1.1 mrg .word -19103
3304 1.1 mrg .word -19751
3305 1.1 mrg .word -20583
3306 1.1 mrg .word -21383
3307 1.1 mrg .word -22343
3308 1.1 mrg .word -23353
3309 1.1 mrg .word -24407
3310 1.1 mrg .word -25582
3311 1.1 mrg .word -26863
3312 1.1 mrg .word -28382
3313 1.1 mrg .word -29965
3314 1.1 mrg .word -31800
3315 1.1 mrg /* negative division factors */
3316 1.1 mrg .byte 66
3317 1.1 mrg .byte 70
3318 1.1 mrg .byte 75
3319 1.1 mrg .byte 81
3320 1.1 mrg .byte 87
3321 1.1 mrg .byte 93
3322 1.1 mrg .byte 101
3323 1.1 mrg .byte 109
3324 1.1 mrg .byte 119
3325 1.1 mrg .byte 130
3326 1.1 mrg .byte 142
3327 1.1 mrg .byte 156
3328 1.1 mrg .byte 172
3329 1.1 mrg .byte 192
3330 1.1 mrg .byte 214
3331 1.1 mrg .byte 241
3332 1.1 mrg .skip 16
3333 1.1 mrg .global GLOBAL(div_table)
3334 1.1 mrg GLOBAL(div_table):
3335 1.1 mrg HIDDEN_ALIAS(div_table_internal,div_table)
3336 1.1 mrg .skip 16
3337 1.1 mrg /* positive division factors */
3338 1.1 mrg .byte 241
3339 1.1 mrg .byte 214
3340 1.1 mrg .byte 192
3341 1.1 mrg .byte 172
3342 1.1 mrg .byte 156
3343 1.1 mrg .byte 142
3344 1.1 mrg .byte 130
3345 1.1 mrg .byte 119
3346 1.1 mrg .byte 109
3347 1.1 mrg .byte 101
3348 1.1 mrg .byte 93
3349 1.1 mrg .byte 87
3350 1.1 mrg .byte 81
3351 1.1 mrg .byte 75
3352 1.1 mrg .byte 70
3353 1.1 mrg .byte 66
3354 1.1 mrg /* positive division constants */
3355 1.1 mrg .word 31801
3356 1.1 mrg .word 29966
3357 1.1 mrg .word 28383
3358 1.1 mrg .word 26864
3359 1.1 mrg .word 25583
3360 1.1 mrg .word 24408
3361 1.1 mrg .word 23354
3362 1.1 mrg .word 22344
3363 1.1 mrg .word 21384
3364 1.1 mrg .word 20584
3365 1.1 mrg .word 19752
3366 1.1 mrg .word 19104
3367 1.1 mrg .word 18434
3368 1.1 mrg .word 17738
3369 1.1 mrg .word 17136
3370 1.1 mrg .word 16639
3371 1.1 mrg
3372 1.1 mrg #elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3373 1.1 mrg /* This code uses shld, thus is not suitable for SH1 / SH2. */
3374 1.1 mrg
3375 1.1 mrg /* Signed / unsigned division without use of FPU, optimized for SH4.
3376 1.1 mrg Uses a lookup table for divisors in the range -128 .. +128, and
3377 1.1 mrg div1 with case distinction for larger divisors in three more ranges.
3378 1.1 mrg The code is lumped together with the table to allow the use of mova. */
3379 1.1 mrg #ifdef __LITTLE_ENDIAN__
3380 1.1 mrg #define L_LSB 0
3381 1.1 mrg #define L_LSWMSB 1
3382 1.1 mrg #define L_MSWLSB 2
3383 1.1 mrg #else
3384 1.1 mrg #define L_LSB 3
3385 1.1 mrg #define L_LSWMSB 2
3386 1.1 mrg #define L_MSWLSB 1
3387 1.1 mrg #endif
3388 1.1 mrg
3389 1.1 mrg .balign 4
3390 1.1 mrg .global GLOBAL(udivsi3_i4i)
3391 1.1 mrg FUNC(GLOBAL(udivsi3_i4i))
3392 1.1 mrg GLOBAL(udivsi3_i4i):
3393 1.1 mrg mov.w LOCAL(c128_w), r1
3394 1.1 mrg div0u
3395 1.1 mrg mov r4,r0
3396 1.1 mrg shlr8 r0
3397 1.1 mrg cmp/hi r1,r5
3398 1.1 mrg extu.w r5,r1
3399 1.1 mrg bf LOCAL(udiv_le128)
3400 1.1 mrg cmp/eq r5,r1
3401 1.1 mrg bf LOCAL(udiv_ge64k)
3402 1.1 mrg shlr r0
3403 1.1 mrg mov r5,r1
3404 1.1 mrg shll16 r5
3405 1.1 mrg mov.l r4,@-r15
3406 1.1 mrg div1 r5,r0
3407 1.1 mrg mov.l r1,@-r15
3408 1.1 mrg div1 r5,r0
3409 1.1 mrg div1 r5,r0
3410 1.1 mrg bra LOCAL(udiv_25)
3411 1.1 mrg div1 r5,r0
3412 1.1 mrg
3413 1.1 mrg LOCAL(div_le128):
3414 1.1 mrg mova LOCAL(div_table_ix),r0
3415 1.1 mrg bra LOCAL(div_le128_2)
3416 1.1 mrg mov.b @(r0,r5),r1
3417 1.1 mrg LOCAL(udiv_le128):
3418 1.1 mrg mov.l r4,@-r15
3419 1.1 mrg mova LOCAL(div_table_ix),r0
3420 1.1 mrg mov.b @(r0,r5),r1
3421 1.1 mrg mov.l r5,@-r15
3422 1.1 mrg LOCAL(div_le128_2):
3423 1.1 mrg mova LOCAL(div_table_inv),r0
3424 1.1 mrg mov.l @(r0,r1),r1
3425 1.1 mrg mov r5,r0
3426 1.1 mrg tst #0xfe,r0
3427 1.1 mrg mova LOCAL(div_table_clz),r0
3428 1.1 mrg dmulu.l r1,r4
3429 1.1 mrg mov.b @(r0,r5),r1
3430 1.1 mrg bt/s LOCAL(div_by_1)
3431 1.1 mrg mov r4,r0
3432 1.1 mrg mov.l @r15+,r5
3433 1.1 mrg sts mach,r0
3434 1.1 mrg /* clrt */
3435 1.1 mrg addc r4,r0
3436 1.1 mrg mov.l @r15+,r4
3437 1.1 mrg rotcr r0
3438 1.1 mrg rts
3439 1.1 mrg shld r1,r0
3440 1.1 mrg
3441 1.1 mrg LOCAL(div_by_1_neg):
3442 1.1 mrg neg r4,r0
3443 1.1 mrg LOCAL(div_by_1):
3444 1.1 mrg mov.l @r15+,r5
3445 1.1 mrg rts
3446 1.1 mrg mov.l @r15+,r4
3447 1.1 mrg
3448 1.1 mrg LOCAL(div_ge64k):
3449 1.1 mrg bt/s LOCAL(div_r8)
3450 1.1 mrg div0u
3451 1.1 mrg shll8 r5
3452 1.1 mrg bra LOCAL(div_ge64k_2)
3453 1.1 mrg div1 r5,r0
3454 1.1 mrg LOCAL(udiv_ge64k):
3455 1.1 mrg cmp/hi r0,r5
3456 1.1 mrg mov r5,r1
3457 1.1 mrg bt LOCAL(udiv_r8)
3458 1.1 mrg shll8 r5
3459 1.1 mrg mov.l r4,@-r15
3460 1.1 mrg div1 r5,r0
3461 1.1 mrg mov.l r1,@-r15
3462 1.1 mrg LOCAL(div_ge64k_2):
3463 1.1 mrg div1 r5,r0
3464 1.1 mrg mov.l LOCAL(zero_l),r1
3465 1.1 mrg .rept 4
3466 1.1 mrg div1 r5,r0
3467 1.1 mrg .endr
3468 1.1 mrg mov.l r1,@-r15
3469 1.1 mrg div1 r5,r0
3470 1.1 mrg mov.w LOCAL(m256_w),r1
3471 1.1 mrg div1 r5,r0
3472 1.1 mrg mov.b r0,@(L_LSWMSB,r15)
3473 1.1 mrg xor r4,r0
3474 1.1 mrg and r1,r0
3475 1.1 mrg bra LOCAL(div_ge64k_end)
3476 1.1 mrg xor r4,r0
3477 1.1 mrg
3478 1.1 mrg LOCAL(div_r8):
3479 1.1 mrg shll16 r4
3480 1.1 mrg bra LOCAL(div_r8_2)
3481 1.1 mrg shll8 r4
3482 1.1 mrg LOCAL(udiv_r8):
3483 1.1 mrg mov.l r4,@-r15
3484 1.1 mrg shll16 r4
3485 1.1 mrg clrt
3486 1.1 mrg shll8 r4
3487 1.1 mrg mov.l r5,@-r15
3488 1.1 mrg LOCAL(div_r8_2):
3489 1.1 mrg rotcl r4
3490 1.1 mrg mov r0,r1
3491 1.1 mrg div1 r5,r1
3492 1.1 mrg mov r4,r0
3493 1.1 mrg rotcl r0
3494 1.1 mrg mov r5,r4
3495 1.1 mrg div1 r5,r1
3496 1.1 mrg .rept 5
3497 1.1 mrg rotcl r0; div1 r5,r1
3498 1.1 mrg .endr
3499 1.1 mrg rotcl r0
3500 1.1 mrg mov.l @r15+,r5
3501 1.1 mrg div1 r4,r1
3502 1.1 mrg mov.l @r15+,r4
3503 1.1 mrg rts
3504 1.1 mrg rotcl r0
3505 1.1 mrg
3506 1.1 mrg ENDFUNC(GLOBAL(udivsi3_i4i))
3507 1.1 mrg
3508 1.1 mrg .global GLOBAL(sdivsi3_i4i)
3509 1.1 mrg FUNC(GLOBAL(sdivsi3_i4i))
3510 1.1 mrg /* This is link-compatible with a GLOBAL(sdivsi3) call,
3511 1.1 mrg but we effectively clobber only r1. */
3512 1.1 mrg GLOBAL(sdivsi3_i4i):
3513 1.1 mrg mov.l r4,@-r15
3514 1.1 mrg cmp/pz r5
3515 1.1 mrg mov.w LOCAL(c128_w), r1
3516 1.1 mrg bt/s LOCAL(pos_divisor)
3517 1.1 mrg cmp/pz r4
3518 1.1 mrg mov.l r5,@-r15
3519 1.1 mrg neg r5,r5
3520 1.1 mrg bt/s LOCAL(neg_result)
3521 1.1 mrg cmp/hi r1,r5
3522 1.1 mrg neg r4,r4
3523 1.1 mrg LOCAL(pos_result):
3524 1.1 mrg extu.w r5,r0
3525 1.1 mrg bf LOCAL(div_le128)
3526 1.1 mrg cmp/eq r5,r0
3527 1.1 mrg mov r4,r0
3528 1.1 mrg shlr8 r0
3529 1.1 mrg bf/s LOCAL(div_ge64k)
3530 1.1 mrg cmp/hi r0,r5
3531 1.1 mrg div0u
3532 1.1 mrg shll16 r5
3533 1.1 mrg div1 r5,r0
3534 1.1 mrg div1 r5,r0
3535 1.1 mrg div1 r5,r0
3536 1.1 mrg LOCAL(udiv_25):
3537 1.1 mrg mov.l LOCAL(zero_l),r1
3538 1.1 mrg div1 r5,r0
3539 1.1 mrg div1 r5,r0
3540 1.1 mrg mov.l r1,@-r15
3541 1.1 mrg .rept 3
3542 1.1 mrg div1 r5,r0
3543 1.1 mrg .endr
3544 1.1 mrg mov.b r0,@(L_MSWLSB,r15)
3545 1.1 mrg xtrct r4,r0
3546 1.1 mrg swap.w r0,r0
3547 1.1 mrg .rept 8
3548 1.1 mrg div1 r5,r0
3549 1.1 mrg .endr
3550 1.1 mrg mov.b r0,@(L_LSWMSB,r15)
3551 1.1 mrg LOCAL(div_ge64k_end):
3552 1.1 mrg .rept 8
3553 1.1 mrg div1 r5,r0
3554 1.1 mrg .endr
3555 1.1 mrg mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3556 1.1 mrg extu.b r0,r0
3557 1.1 mrg mov.l @r15+,r5
3558 1.1 mrg or r4,r0
3559 1.1 mrg mov.l @r15+,r4
3560 1.1 mrg rts
3561 1.1 mrg rotcl r0
3562 1.1 mrg
3563 1.1 mrg LOCAL(div_le128_neg):
3564 1.1 mrg tst #0xfe,r0
3565 1.1 mrg mova LOCAL(div_table_ix),r0
3566 1.1 mrg mov.b @(r0,r5),r1
3567 1.1 mrg mova LOCAL(div_table_inv),r0
3568 1.1 mrg bt/s LOCAL(div_by_1_neg)
3569 1.1 mrg mov.l @(r0,r1),r1
3570 1.1 mrg mova LOCAL(div_table_clz),r0
3571 1.1 mrg dmulu.l r1,r4
3572 1.1 mrg mov.b @(r0,r5),r1
3573 1.1 mrg mov.l @r15+,r5
3574 1.1 mrg sts mach,r0
3575 1.1 mrg /* clrt */
3576 1.1 mrg addc r4,r0
3577 1.1 mrg mov.l @r15+,r4
3578 1.1 mrg rotcr r0
3579 1.1 mrg shld r1,r0
3580 1.1 mrg rts
3581 1.1 mrg neg r0,r0
3582 1.1 mrg
3583 1.1 mrg LOCAL(pos_divisor):
3584 1.1 mrg mov.l r5,@-r15
3585 1.1 mrg bt/s LOCAL(pos_result)
3586 1.1 mrg cmp/hi r1,r5
3587 1.1 mrg neg r4,r4
3588 1.1 mrg LOCAL(neg_result):
3589 1.1 mrg extu.w r5,r0
3590 1.1 mrg bf LOCAL(div_le128_neg)
3591 1.1 mrg cmp/eq r5,r0
3592 1.1 mrg mov r4,r0
3593 1.1 mrg shlr8 r0
3594 1.1 mrg bf/s LOCAL(div_ge64k_neg)
3595 1.1 mrg cmp/hi r0,r5
3596 1.1 mrg div0u
3597 1.1 mrg mov.l LOCAL(zero_l),r1
3598 1.1 mrg shll16 r5
3599 1.1 mrg div1 r5,r0
3600 1.1 mrg mov.l r1,@-r15
3601 1.1 mrg .rept 7
3602 1.1 mrg div1 r5,r0
3603 1.1 mrg .endr
3604 1.1 mrg mov.b r0,@(L_MSWLSB,r15)
3605 1.1 mrg xtrct r4,r0
3606 1.1 mrg swap.w r0,r0
3607 1.1 mrg .rept 8
3608 1.1 mrg div1 r5,r0
3609 1.1 mrg .endr
3610 1.1 mrg mov.b r0,@(L_LSWMSB,r15)
3611 1.1 mrg LOCAL(div_ge64k_neg_end):
3612 1.1 mrg .rept 8
3613 1.1 mrg div1 r5,r0
3614 1.1 mrg .endr
3615 1.1 mrg mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3616 1.1 mrg extu.b r0,r1
3617 1.1 mrg mov.l @r15+,r5
3618 1.1 mrg or r4,r1
3619 1.1 mrg LOCAL(div_r8_neg_end):
3620 1.1 mrg mov.l @r15+,r4
3621 1.1 mrg rotcl r1
3622 1.1 mrg rts
3623 1.1 mrg neg r1,r0
3624 1.1 mrg
3625 1.1 mrg LOCAL(div_ge64k_neg):
3626 1.1 mrg bt/s LOCAL(div_r8_neg)
3627 1.1 mrg div0u
3628 1.1 mrg shll8 r5
3629 1.1 mrg mov.l LOCAL(zero_l),r1
3630 1.1 mrg .rept 6
3631 1.1 mrg div1 r5,r0
3632 1.1 mrg .endr
3633 1.1 mrg mov.l r1,@-r15
3634 1.1 mrg div1 r5,r0
3635 1.1 mrg mov.w LOCAL(m256_w),r1
3636 1.1 mrg div1 r5,r0
3637 1.1 mrg mov.b r0,@(L_LSWMSB,r15)
3638 1.1 mrg xor r4,r0
3639 1.1 mrg and r1,r0
3640 1.1 mrg bra LOCAL(div_ge64k_neg_end)
3641 1.1 mrg xor r4,r0
3642 1.1 mrg
3643 1.1 mrg LOCAL(c128_w):
3644 1.1 mrg .word 128
3645 1.1 mrg
3646 1.1 mrg LOCAL(div_r8_neg):
3647 1.1 mrg clrt
3648 1.1 mrg shll16 r4
3649 1.1 mrg mov r4,r1
3650 1.1 mrg shll8 r1
3651 1.1 mrg mov r5,r4
3652 1.1 mrg .rept 7
3653 1.1 mrg rotcl r1; div1 r5,r0
3654 1.1 mrg .endr
3655 1.1 mrg mov.l @r15+,r5
3656 1.1 mrg rotcl r1
3657 1.1 mrg bra LOCAL(div_r8_neg_end)
3658 1.1 mrg div1 r4,r0
3659 1.1 mrg
3660 1.1 mrg LOCAL(m256_w):
3661 1.1 mrg .word 0xff00
3662 1.1 mrg /* This table has been generated by divtab-sh4.c. */
3663 1.1 mrg .balign 4
3664 1.1 mrg LOCAL(div_table_clz):
3665 1.1 mrg .byte 0
3666 1.1 mrg .byte 1
3667 1.1 mrg .byte 0
3668 1.1 mrg .byte -1
3669 1.1 mrg .byte -1
3670 1.1 mrg .byte -2
3671 1.1 mrg .byte -2
3672 1.1 mrg .byte -2
3673 1.1 mrg .byte -2
3674 1.1 mrg .byte -3
3675 1.1 mrg .byte -3
3676 1.1 mrg .byte -3
3677 1.1 mrg .byte -3
3678 1.1 mrg .byte -3
3679 1.1 mrg .byte -3
3680 1.1 mrg .byte -3
3681 1.1 mrg .byte -3
3682 1.1 mrg .byte -4
3683 1.1 mrg .byte -4
3684 1.1 mrg .byte -4
3685 1.1 mrg .byte -4
3686 1.1 mrg .byte -4
3687 1.1 mrg .byte -4
3688 1.1 mrg .byte -4
3689 1.1 mrg .byte -4
3690 1.1 mrg .byte -4
3691 1.1 mrg .byte -4
3692 1.1 mrg .byte -4
3693 1.1 mrg .byte -4
3694 1.1 mrg .byte -4
3695 1.1 mrg .byte -4
3696 1.1 mrg .byte -4
3697 1.1 mrg .byte -4
3698 1.1 mrg .byte -5
3699 1.1 mrg .byte -5
3700 1.1 mrg .byte -5
3701 1.1 mrg .byte -5
3702 1.1 mrg .byte -5
3703 1.1 mrg .byte -5
3704 1.1 mrg .byte -5
3705 1.1 mrg .byte -5
3706 1.1 mrg .byte -5
3707 1.1 mrg .byte -5
3708 1.1 mrg .byte -5
3709 1.1 mrg .byte -5
3710 1.1 mrg .byte -5
3711 1.1 mrg .byte -5
3712 1.1 mrg .byte -5
3713 1.1 mrg .byte -5
3714 1.1 mrg .byte -5
3715 1.1 mrg .byte -5
3716 1.1 mrg .byte -5
3717 1.1 mrg .byte -5
3718 1.1 mrg .byte -5
3719 1.1 mrg .byte -5
3720 1.1 mrg .byte -5
3721 1.1 mrg .byte -5
3722 1.1 mrg .byte -5
3723 1.1 mrg .byte -5
3724 1.1 mrg .byte -5
3725 1.1 mrg .byte -5
3726 1.1 mrg .byte -5
3727 1.1 mrg .byte -5
3728 1.1 mrg .byte -5
3729 1.1 mrg .byte -5
3730 1.1 mrg .byte -6
3731 1.1 mrg .byte -6
3732 1.1 mrg .byte -6
3733 1.1 mrg .byte -6
3734 1.1 mrg .byte -6
3735 1.1 mrg .byte -6
3736 1.1 mrg .byte -6
3737 1.1 mrg .byte -6
3738 1.1 mrg .byte -6
3739 1.1 mrg .byte -6
3740 1.1 mrg .byte -6
3741 1.1 mrg .byte -6
3742 1.1 mrg .byte -6
3743 1.1 mrg .byte -6
3744 1.1 mrg .byte -6
3745 1.1 mrg .byte -6
3746 1.1 mrg .byte -6
3747 1.1 mrg .byte -6
3748 1.1 mrg .byte -6
3749 1.1 mrg .byte -6
3750 1.1 mrg .byte -6
3751 1.1 mrg .byte -6
3752 1.1 mrg .byte -6
3753 1.1 mrg .byte -6
3754 1.1 mrg .byte -6
3755 1.1 mrg .byte -6
3756 1.1 mrg .byte -6
3757 1.1 mrg .byte -6
3758 1.1 mrg .byte -6
3759 1.1 mrg .byte -6
3760 1.1 mrg .byte -6
3761 1.1 mrg .byte -6
3762 1.1 mrg .byte -6
3763 1.1 mrg .byte -6
3764 1.1 mrg .byte -6
3765 1.1 mrg .byte -6
3766 1.1 mrg .byte -6
3767 1.1 mrg .byte -6
3768 1.1 mrg .byte -6
3769 1.1 mrg .byte -6
3770 1.1 mrg .byte -6
3771 1.1 mrg .byte -6
3772 1.1 mrg .byte -6
3773 1.1 mrg .byte -6
3774 1.1 mrg .byte -6
3775 1.1 mrg .byte -6
3776 1.1 mrg .byte -6
3777 1.1 mrg .byte -6
3778 1.1 mrg .byte -6
3779 1.1 mrg .byte -6
3780 1.1 mrg .byte -6
3781 1.1 mrg .byte -6
3782 1.1 mrg .byte -6
3783 1.1 mrg .byte -6
3784 1.1 mrg .byte -6
3785 1.1 mrg .byte -6
3786 1.1 mrg .byte -6
3787 1.1 mrg .byte -6
3788 1.1 mrg .byte -6
3789 1.1 mrg .byte -6
3790 1.1 mrg .byte -6
3791 1.1 mrg .byte -6
3792 1.1 mrg .byte -6
3793 1.1 mrg /* Lookup table translating positive divisor to index into table of
3794 1.1 mrg normalized inverse. N.B. the '0' entry is also the last entry of the
3795 1.1 mrg previous table, and causes an unaligned access for division by zero. */
3796 1.1 mrg LOCAL(div_table_ix):
3797 1.1 mrg .byte -6
3798 1.1 mrg .byte -128
3799 1.1 mrg .byte -128
3800 1.1 mrg .byte 0
3801 1.1 mrg .byte -128
3802 1.1 mrg .byte -64
3803 1.1 mrg .byte 0
3804 1.1 mrg .byte 64
3805 1.1 mrg .byte -128
3806 1.1 mrg .byte -96
3807 1.1 mrg .byte -64
3808 1.1 mrg .byte -32
3809 1.1 mrg .byte 0
3810 1.1 mrg .byte 32
3811 1.1 mrg .byte 64
3812 1.1 mrg .byte 96
3813 1.1 mrg .byte -128
3814 1.1 mrg .byte -112
3815 1.1 mrg .byte -96
3816 1.1 mrg .byte -80
3817 1.1 mrg .byte -64
3818 1.1 mrg .byte -48
3819 1.1 mrg .byte -32
3820 1.1 mrg .byte -16
3821 1.1 mrg .byte 0
3822 1.1 mrg .byte 16
3823 1.1 mrg .byte 32
3824 1.1 mrg .byte 48
3825 1.1 mrg .byte 64
3826 1.1 mrg .byte 80
3827 1.1 mrg .byte 96
3828 1.1 mrg .byte 112
3829 1.1 mrg .byte -128
3830 1.1 mrg .byte -120
3831 1.1 mrg .byte -112
3832 1.1 mrg .byte -104
3833 1.1 mrg .byte -96
3834 1.1 mrg .byte -88
3835 1.1 mrg .byte -80
3836 1.1 mrg .byte -72
3837 1.1 mrg .byte -64
3838 1.1 mrg .byte -56
3839 1.1 mrg .byte -48
3840 1.1 mrg .byte -40
3841 1.1 mrg .byte -32
3842 1.1 mrg .byte -24
3843 1.1 mrg .byte -16
3844 1.1 mrg .byte -8
3845 1.1 mrg .byte 0
3846 1.1 mrg .byte 8
3847 1.1 mrg .byte 16
3848 1.1 mrg .byte 24
3849 1.1 mrg .byte 32
3850 1.1 mrg .byte 40
3851 1.1 mrg .byte 48
3852 1.1 mrg .byte 56
3853 1.1 mrg .byte 64
3854 1.1 mrg .byte 72
3855 1.1 mrg .byte 80
3856 1.1 mrg .byte 88
3857 1.1 mrg .byte 96
3858 1.1 mrg .byte 104
3859 1.1 mrg .byte 112
3860 1.1 mrg .byte 120
3861 1.1 mrg .byte -128
3862 1.1 mrg .byte -124
3863 1.1 mrg .byte -120
3864 1.1 mrg .byte -116
3865 1.1 mrg .byte -112
3866 1.1 mrg .byte -108
3867 1.1 mrg .byte -104
3868 1.1 mrg .byte -100
3869 1.1 mrg .byte -96
3870 1.1 mrg .byte -92
3871 1.1 mrg .byte -88
3872 1.1 mrg .byte -84
3873 1.1 mrg .byte -80
3874 1.1 mrg .byte -76
3875 1.1 mrg .byte -72
3876 1.1 mrg .byte -68
3877 1.1 mrg .byte -64
3878 1.1 mrg .byte -60
3879 1.1 mrg .byte -56
3880 1.1 mrg .byte -52
3881 1.1 mrg .byte -48
3882 1.1 mrg .byte -44
3883 1.1 mrg .byte -40
3884 1.1 mrg .byte -36
3885 1.1 mrg .byte -32
3886 1.1 mrg .byte -28
3887 1.1 mrg .byte -24
3888 1.1 mrg .byte -20
3889 1.1 mrg .byte -16
3890 1.1 mrg .byte -12
3891 1.1 mrg .byte -8
3892 1.1 mrg .byte -4
3893 1.1 mrg .byte 0
3894 1.1 mrg .byte 4
3895 1.1 mrg .byte 8
3896 1.1 mrg .byte 12
3897 1.1 mrg .byte 16
3898 1.1 mrg .byte 20
3899 1.1 mrg .byte 24
3900 1.1 mrg .byte 28
3901 1.1 mrg .byte 32
3902 1.1 mrg .byte 36
3903 1.1 mrg .byte 40
3904 1.1 mrg .byte 44
3905 1.1 mrg .byte 48
3906 1.1 mrg .byte 52
3907 1.1 mrg .byte 56
3908 1.1 mrg .byte 60
3909 1.1 mrg .byte 64
3910 1.1 mrg .byte 68
3911 1.1 mrg .byte 72
3912 1.1 mrg .byte 76
3913 1.1 mrg .byte 80
3914 1.1 mrg .byte 84
3915 1.1 mrg .byte 88
3916 1.1 mrg .byte 92
3917 1.1 mrg .byte 96
3918 1.1 mrg .byte 100
3919 1.1 mrg .byte 104
3920 1.1 mrg .byte 108
3921 1.1 mrg .byte 112
3922 1.1 mrg .byte 116
3923 1.1 mrg .byte 120
3924 1.1 mrg .byte 124
3925 1.1 mrg .byte -128
3926 1.1 mrg /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3927 1.1 mrg .balign 4
3928 1.1 mrg LOCAL(zero_l):
3929 1.1 mrg .long 0x0
3930 1.1 mrg .long 0xF81F81F9
3931 1.1 mrg .long 0xF07C1F08
3932 1.1 mrg .long 0xE9131AC0
3933 1.1 mrg .long 0xE1E1E1E2
3934 1.1 mrg .long 0xDAE6076C
3935 1.1 mrg .long 0xD41D41D5
3936 1.1 mrg .long 0xCD856891
3937 1.1 mrg .long 0xC71C71C8
3938 1.1 mrg .long 0xC0E07039
3939 1.1 mrg .long 0xBACF914D
3940 1.1 mrg .long 0xB4E81B4F
3941 1.1 mrg .long 0xAF286BCB
3942 1.1 mrg .long 0xA98EF607
3943 1.1 mrg .long 0xA41A41A5
3944 1.1 mrg .long 0x9EC8E952
3945 1.1 mrg .long 0x9999999A
3946 1.1 mrg .long 0x948B0FCE
3947 1.1 mrg .long 0x8F9C18FA
3948 1.1 mrg .long 0x8ACB90F7
3949 1.1 mrg .long 0x86186187
3950 1.1 mrg .long 0x81818182
3951 1.1 mrg .long 0x7D05F418
3952 1.1 mrg .long 0x78A4C818
3953 1.1 mrg .long 0x745D1746
3954 1.1 mrg .long 0x702E05C1
3955 1.1 mrg .long 0x6C16C16D
3956 1.1 mrg .long 0x68168169
3957 1.1 mrg .long 0x642C8591
3958 1.1 mrg .long 0x60581606
3959 1.1 mrg .long 0x5C9882BA
3960 1.1 mrg .long 0x58ED2309
3961 1.1 mrg LOCAL(div_table_inv):
3962 1.1 mrg .long 0x55555556
3963 1.1 mrg .long 0x51D07EAF
3964 1.1 mrg .long 0x4E5E0A73
3965 1.1 mrg .long 0x4AFD6A06
3966 1.1 mrg .long 0x47AE147B
3967 1.1 mrg .long 0x446F8657
3968 1.1 mrg .long 0x41414142
3969 1.1 mrg .long 0x3E22CBCF
3970 1.1 mrg .long 0x3B13B13C
3971 1.1 mrg .long 0x38138139
3972 1.1 mrg .long 0x3521CFB3
3973 1.1 mrg .long 0x323E34A3
3974 1.1 mrg .long 0x2F684BDB
3975 1.1 mrg .long 0x2C9FB4D9
3976 1.1 mrg .long 0x29E4129F
3977 1.1 mrg .long 0x27350B89
3978 1.1 mrg .long 0x24924925
3979 1.1 mrg .long 0x21FB7813
3980 1.1 mrg .long 0x1F7047DD
3981 1.1 mrg .long 0x1CF06ADB
3982 1.1 mrg .long 0x1A7B9612
3983 1.1 mrg .long 0x18118119
3984 1.1 mrg .long 0x15B1E5F8
3985 1.1 mrg .long 0x135C8114
3986 1.1 mrg .long 0x11111112
3987 1.1 mrg .long 0xECF56BF
3988 1.1 mrg .long 0xC9714FC
3989 1.1 mrg .long 0xA6810A7
3990 1.1 mrg .long 0x8421085
3991 1.1 mrg .long 0x624DD30
3992 1.1 mrg .long 0x4104105
3993 1.1 mrg .long 0x2040811
3994 1.1 mrg /* maximum error: 0.987342 scaled: 0.921875*/
3995 1.1 mrg
3996 1.1 mrg ENDFUNC(GLOBAL(sdivsi3_i4i))
3997 1.1 mrg #endif /* SH3 / SH4 */
3998 1.1 mrg
3999 1.1 mrg #endif /* L_div_table */
4000 1.1 mrg
4001 1.1 mrg #ifdef L_udiv_qrnnd_16
4002 1.1 mrg #if !__SHMEDIA__
4003 1.1 mrg HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
4004 1.1 mrg /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
4005 1.1 mrg /* n1 < d, but n1 might be larger than d1. */
4006 1.1 mrg .global GLOBAL(udiv_qrnnd_16)
4007 1.1 mrg .balign 8
4008 1.1 mrg GLOBAL(udiv_qrnnd_16):
4009 1.1 mrg div0u
4010 1.1 mrg cmp/hi r6,r0
4011 1.1 mrg bt .Lots
4012 1.1 mrg .rept 16
4013 1.1 mrg div1 r6,r0
4014 1.1 mrg .endr
4015 1.1 mrg extu.w r0,r1
4016 1.1 mrg bt 0f
4017 1.1 mrg add r6,r0
4018 1.1 mrg 0: rotcl r1
4019 1.1 mrg mulu.w r1,r5
4020 1.1 mrg xtrct r4,r0
4021 1.1 mrg swap.w r0,r0
4022 1.1 mrg sts macl,r2
4023 1.1 mrg cmp/hs r2,r0
4024 1.1 mrg sub r2,r0
4025 1.1 mrg bt 0f
4026 1.1 mrg addc r5,r0
4027 1.1 mrg add #-1,r1
4028 1.1 mrg bt 0f
4029 1.1 mrg 1: add #-1,r1
4030 1.1 mrg rts
4031 1.1 mrg add r5,r0
4032 1.1 mrg .balign 8
4033 1.1 mrg .Lots:
4034 1.1 mrg sub r5,r0
4035 1.1 mrg swap.w r4,r1
4036 1.1 mrg xtrct r0,r1
4037 1.1 mrg clrt
4038 1.1 mrg mov r1,r0
4039 1.1 mrg addc r5,r0
4040 1.1 mrg mov #-1,r1
4041 1.1 mrg SL1(bf, 1b,
4042 1.1 mrg shlr16 r1)
4043 1.1 mrg 0: rts
4044 1.1 mrg nop
4045 1.1 mrg ENDFUNC(GLOBAL(udiv_qrnnd_16))
4046 1.1 mrg #endif /* !__SHMEDIA__ */
4047 1.1 mrg #endif /* L_udiv_qrnnd_16 */
4048