milli64.S revision 1.1 1 1.1 mrg /* 32 and 64-bit millicode, original author Hewlett-Packard
2 1.1 mrg adapted for gcc by Paul Bame <bame (at) debian.org>
3 1.1 mrg and Alan Modra <alan (at) linuxcare.com.au>.
4 1.1 mrg
5 1.1 mrg Copyright (C) 2001-2013 Free Software Foundation, Inc.
6 1.1 mrg
7 1.1 mrg This file is part of GCC.
8 1.1 mrg
9 1.1 mrg GCC is free software; you can redistribute it and/or modify it under
10 1.1 mrg the terms of the GNU General Public License as published by the Free
11 1.1 mrg Software Foundation; either version 3, or (at your option) any later
12 1.1 mrg version.
13 1.1 mrg
14 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 1.1 mrg for more details.
18 1.1 mrg
19 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
20 1.1 mrg permissions described in the GCC Runtime Library Exception, version
21 1.1 mrg 3.1, as published by the Free Software Foundation.
22 1.1 mrg
23 1.1 mrg You should have received a copy of the GNU General Public License and
24 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
25 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
26 1.1 mrg <http://www.gnu.org/licenses/>. */
27 1.1 mrg
28 1.1 mrg #ifdef pa64
29 1.1 mrg .level 2.0w
30 1.1 mrg #endif
31 1.1 mrg
32 1.1 mrg /* Hardware General Registers. */
33 1.1 mrg r0: .reg %r0
34 1.1 mrg r1: .reg %r1
35 1.1 mrg r2: .reg %r2
36 1.1 mrg r3: .reg %r3
37 1.1 mrg r4: .reg %r4
38 1.1 mrg r5: .reg %r5
39 1.1 mrg r6: .reg %r6
40 1.1 mrg r7: .reg %r7
41 1.1 mrg r8: .reg %r8
42 1.1 mrg r9: .reg %r9
43 1.1 mrg r10: .reg %r10
44 1.1 mrg r11: .reg %r11
45 1.1 mrg r12: .reg %r12
46 1.1 mrg r13: .reg %r13
47 1.1 mrg r14: .reg %r14
48 1.1 mrg r15: .reg %r15
49 1.1 mrg r16: .reg %r16
50 1.1 mrg r17: .reg %r17
51 1.1 mrg r18: .reg %r18
52 1.1 mrg r19: .reg %r19
53 1.1 mrg r20: .reg %r20
54 1.1 mrg r21: .reg %r21
55 1.1 mrg r22: .reg %r22
56 1.1 mrg r23: .reg %r23
57 1.1 mrg r24: .reg %r24
58 1.1 mrg r25: .reg %r25
59 1.1 mrg r26: .reg %r26
60 1.1 mrg r27: .reg %r27
61 1.1 mrg r28: .reg %r28
62 1.1 mrg r29: .reg %r29
63 1.1 mrg r30: .reg %r30
64 1.1 mrg r31: .reg %r31
65 1.1 mrg
66 1.1 mrg /* Hardware Space Registers. */
67 1.1 mrg sr0: .reg %sr0
68 1.1 mrg sr1: .reg %sr1
69 1.1 mrg sr2: .reg %sr2
70 1.1 mrg sr3: .reg %sr3
71 1.1 mrg sr4: .reg %sr4
72 1.1 mrg sr5: .reg %sr5
73 1.1 mrg sr6: .reg %sr6
74 1.1 mrg sr7: .reg %sr7
75 1.1 mrg
76 1.1 mrg /* Hardware Floating Point Registers. */
77 1.1 mrg fr0: .reg %fr0
78 1.1 mrg fr1: .reg %fr1
79 1.1 mrg fr2: .reg %fr2
80 1.1 mrg fr3: .reg %fr3
81 1.1 mrg fr4: .reg %fr4
82 1.1 mrg fr5: .reg %fr5
83 1.1 mrg fr6: .reg %fr6
84 1.1 mrg fr7: .reg %fr7
85 1.1 mrg fr8: .reg %fr8
86 1.1 mrg fr9: .reg %fr9
87 1.1 mrg fr10: .reg %fr10
88 1.1 mrg fr11: .reg %fr11
89 1.1 mrg fr12: .reg %fr12
90 1.1 mrg fr13: .reg %fr13
91 1.1 mrg fr14: .reg %fr14
92 1.1 mrg fr15: .reg %fr15
93 1.1 mrg
94 1.1 mrg /* Hardware Control Registers. */
95 1.1 mrg cr11: .reg %cr11
96 1.1 mrg sar: .reg %cr11 /* Shift Amount Register */
97 1.1 mrg
98 1.1 mrg /* Software Architecture General Registers. */
99 1.1 mrg rp: .reg r2 /* return pointer */
100 1.1 mrg #ifdef pa64
101 1.1 mrg mrp: .reg r2 /* millicode return pointer */
102 1.1 mrg #else
103 1.1 mrg mrp: .reg r31 /* millicode return pointer */
104 1.1 mrg #endif
105 1.1 mrg ret0: .reg r28 /* return value */
106 1.1 mrg ret1: .reg r29 /* return value (high part of double) */
107 1.1 mrg sp: .reg r30 /* stack pointer */
108 1.1 mrg dp: .reg r27 /* data pointer */
109 1.1 mrg arg0: .reg r26 /* argument */
110 1.1 mrg arg1: .reg r25 /* argument or high part of double argument */
111 1.1 mrg arg2: .reg r24 /* argument */
112 1.1 mrg arg3: .reg r23 /* argument or high part of double argument */
113 1.1 mrg
114 1.1 mrg /* Software Architecture Space Registers. */
115 1.1 mrg /* sr0 ; return link from BLE */
116 1.1 mrg sret: .reg sr1 /* return value */
117 1.1 mrg sarg: .reg sr1 /* argument */
118 1.1 mrg /* sr4 ; PC SPACE tracker */
119 1.1 mrg /* sr5 ; process private data */
120 1.1 mrg
121 1.1 mrg /* Frame Offsets (millicode convention!) Used when calling other
122 1.1 mrg millicode routines. Stack unwinding is dependent upon these
123 1.1 mrg definitions. */
124 1.1 mrg r31_slot: .equ -20 /* "current RP" slot */
125 1.1 mrg sr0_slot: .equ -16 /* "static link" slot */
126 1.1 mrg #if defined(pa64)
127 1.1 mrg mrp_slot: .equ -16 /* "current RP" slot */
128 1.1 mrg psp_slot: .equ -8 /* "previous SP" slot */
129 1.1 mrg #else
130 1.1 mrg mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
131 1.1 mrg #endif
132 1.1 mrg
133 1.1 mrg
134 1.1 mrg #define DEFINE(name,value)name: .EQU value
135 1.1 mrg #define RDEFINE(name,value)name: .REG value
136 1.1 mrg #ifdef milliext
137 1.1 mrg #define MILLI_BE(lbl) BE lbl(sr7,r0)
138 1.1 mrg #define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
139 1.1 mrg #define MILLI_BLE(lbl) BLE lbl(sr7,r0)
140 1.1 mrg #define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
141 1.1 mrg #define MILLIRETN BE,n 0(sr0,mrp)
142 1.1 mrg #define MILLIRET BE 0(sr0,mrp)
143 1.1 mrg #define MILLI_RETN BE,n 0(sr0,mrp)
144 1.1 mrg #define MILLI_RET BE 0(sr0,mrp)
145 1.1 mrg #else
146 1.1 mrg #define MILLI_BE(lbl) B lbl
147 1.1 mrg #define MILLI_BEN(lbl) B,n lbl
148 1.1 mrg #define MILLI_BLE(lbl) BL lbl,mrp
149 1.1 mrg #define MILLI_BLEN(lbl) BL,n lbl,mrp
150 1.1 mrg #define MILLIRETN BV,n 0(mrp)
151 1.1 mrg #define MILLIRET BV 0(mrp)
152 1.1 mrg #define MILLI_RETN BV,n 0(mrp)
153 1.1 mrg #define MILLI_RET BV 0(mrp)
154 1.1 mrg #endif
155 1.1 mrg
156 1.1 mrg #ifdef __STDC__
157 1.1 mrg #define CAT(a,b) a##b
158 1.1 mrg #else
159 1.1 mrg #define CAT(a,b) a/**/b
160 1.1 mrg #endif
161 1.1 mrg
162 1.1 mrg #ifdef ELF
163 1.1 mrg #define SUBSPA_MILLI .section .text
164 1.1 mrg #define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
165 1.1 mrg #define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
166 1.1 mrg #define ATTR_MILLI
167 1.1 mrg #define SUBSPA_DATA .section .data
168 1.1 mrg #define ATTR_DATA
169 1.1 mrg #define GLOBAL $global$
170 1.1 mrg #define GSYM(sym) !sym:
171 1.1 mrg #define LSYM(sym) !CAT(.L,sym:)
172 1.1 mrg #define LREF(sym) CAT(.L,sym)
173 1.1 mrg
174 1.1 mrg #else
175 1.1 mrg
176 1.1 mrg #ifdef coff
177 1.1 mrg /* This used to be .milli but since link32 places different named
178 1.1 mrg sections in different segments millicode ends up a long ways away
179 1.1 mrg from .text (1meg?). This way they will be a lot closer.
180 1.1 mrg
181 1.1 mrg The SUBSPA_MILLI_* specify locality sets for certain millicode
182 1.1 mrg modules in order to ensure that modules that call one another are
183 1.1 mrg placed close together. Without locality sets this is unlikely to
184 1.1 mrg happen because of the Dynamite linker library search algorithm. We
185 1.1 mrg want these modules close together so that short calls always reach
186 1.1 mrg (we don't want to require long calls or use long call stubs). */
187 1.1 mrg
188 1.1 mrg #define SUBSPA_MILLI .subspa .text
189 1.1 mrg #define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
190 1.1 mrg #define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
191 1.1 mrg #define ATTR_MILLI .attr code,read,execute
192 1.1 mrg #define SUBSPA_DATA .subspa .data
193 1.1 mrg #define ATTR_DATA .attr init_data,read,write
194 1.1 mrg #define GLOBAL _gp
195 1.1 mrg #else
196 1.1 mrg #define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
197 1.1 mrg #define SUBSPA_MILLI_DIV SUBSPA_MILLI
198 1.1 mrg #define SUBSPA_MILLI_MUL SUBSPA_MILLI
199 1.1 mrg #define ATTR_MILLI
200 1.1 mrg #define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
201 1.1 mrg #define ATTR_DATA
202 1.1 mrg #define GLOBAL $global$
203 1.1 mrg #endif
204 1.1 mrg #define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16
205 1.1 mrg
206 1.1 mrg #define GSYM(sym) !sym
207 1.1 mrg #define LSYM(sym) !CAT(L$,sym)
208 1.1 mrg #define LREF(sym) CAT(L$,sym)
209 1.1 mrg #endif
210 1.1 mrg
211 1.1 mrg #ifdef L_dyncall
212 1.1 mrg SUBSPA_MILLI
213 1.1 mrg ATTR_DATA
214 1.1 mrg GSYM($$dyncall)
215 1.1 mrg .export $$dyncall,millicode
216 1.1 mrg .proc
217 1.1 mrg .callinfo millicode
218 1.1 mrg .entry
219 1.1 mrg bb,>=,n %r22,30,LREF(1) ; branch if not plabel address
220 1.1 mrg depi 0,31,2,%r22 ; clear the two least significant bits
221 1.1 mrg ldw 4(%r22),%r19 ; load new LTP value
222 1.1 mrg ldw 0(%r22),%r22 ; load address of target
223 1.1 mrg LSYM(1)
224 1.1 mrg #ifdef LINUX
225 1.1 mrg bv %r0(%r22) ; branch to the real target
226 1.1 mrg #else
227 1.1 mrg ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22
228 1.1 mrg mtsp %r1,%sr0 ; move that space identifier into sr0
229 1.1 mrg be 0(%sr0,%r22) ; branch to the real target
230 1.1 mrg #endif
231 1.1 mrg stw %r2,-24(%r30) ; save return address into frame marker
232 1.1 mrg .exit
233 1.1 mrg .procend
234 1.1 mrg #endif
235 1.1 mrg
236 1.1 mrg #ifdef L_divI
237 1.1 mrg /* ROUTINES: $$divI, $$divoI
238 1.1 mrg
239 1.1 mrg Single precision divide for signed binary integers.
240 1.1 mrg
241 1.1 mrg The quotient is truncated towards zero.
242 1.1 mrg The sign of the quotient is the XOR of the signs of the dividend and
243 1.1 mrg divisor.
244 1.1 mrg Divide by zero is trapped.
245 1.1 mrg Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
246 1.1 mrg
247 1.1 mrg INPUT REGISTERS:
248 1.1 mrg . arg0 == dividend
249 1.1 mrg . arg1 == divisor
250 1.1 mrg . mrp == return pc
251 1.1 mrg . sr0 == return space when called externally
252 1.1 mrg
253 1.1 mrg OUTPUT REGISTERS:
254 1.1 mrg . arg0 = undefined
255 1.1 mrg . arg1 = undefined
256 1.1 mrg . ret1 = quotient
257 1.1 mrg
258 1.1 mrg OTHER REGISTERS AFFECTED:
259 1.1 mrg . r1 = undefined
260 1.1 mrg
261 1.1 mrg SIDE EFFECTS:
262 1.1 mrg . Causes a trap under the following conditions:
263 1.1 mrg . divisor is zero (traps with ADDIT,= 0,25,0)
264 1.1 mrg . dividend==-2**31 and divisor==-1 and routine is $$divoI
265 1.1 mrg . (traps with ADDO 26,25,0)
266 1.1 mrg . Changes memory at the following places:
267 1.1 mrg . NONE
268 1.1 mrg
269 1.1 mrg PERMISSIBLE CONTEXT:
270 1.1 mrg . Unwindable.
271 1.1 mrg . Suitable for internal or external millicode.
272 1.1 mrg . Assumes the special millicode register conventions.
273 1.1 mrg
274 1.1 mrg DISCUSSION:
275 1.1 mrg . Branchs to other millicode routines using BE
276 1.1 mrg . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
277 1.1 mrg .
278 1.1 mrg . For selected divisors, calls a divide by constant routine written by
279 1.1 mrg . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
280 1.1 mrg .
281 1.1 mrg . The only overflow case is -2**31 divided by -1.
282 1.1 mrg . Both routines return -2**31 but only $$divoI traps. */
283 1.1 mrg
284 1.1 mrg RDEFINE(temp,r1)
285 1.1 mrg RDEFINE(retreg,ret1) /* r29 */
286 1.1 mrg RDEFINE(temp1,arg0)
287 1.1 mrg SUBSPA_MILLI_DIV
288 1.1 mrg ATTR_MILLI
289 1.1 mrg .import $$divI_2,millicode
290 1.1 mrg .import $$divI_3,millicode
291 1.1 mrg .import $$divI_4,millicode
292 1.1 mrg .import $$divI_5,millicode
293 1.1 mrg .import $$divI_6,millicode
294 1.1 mrg .import $$divI_7,millicode
295 1.1 mrg .import $$divI_8,millicode
296 1.1 mrg .import $$divI_9,millicode
297 1.1 mrg .import $$divI_10,millicode
298 1.1 mrg .import $$divI_12,millicode
299 1.1 mrg .import $$divI_14,millicode
300 1.1 mrg .import $$divI_15,millicode
301 1.1 mrg .export $$divI,millicode
302 1.1 mrg .export $$divoI,millicode
303 1.1 mrg .proc
304 1.1 mrg .callinfo millicode
305 1.1 mrg .entry
306 1.1 mrg GSYM($$divoI)
307 1.1 mrg comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
308 1.1 mrg GSYM($$divI)
309 1.1 mrg ldo -1(arg1),temp /* is there at most one bit set ? */
310 1.1 mrg and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
311 1.1 mrg addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
312 1.1 mrg b,n LREF(neg_denom)
313 1.1 mrg LSYM(pow2)
314 1.1 mrg addi,>= 0,arg0,retreg /* if numerator is negative, add the */
315 1.1 mrg add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
316 1.1 mrg extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
317 1.1 mrg extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
318 1.1 mrg or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
319 1.1 mrg ldi 0xcc,temp1 /* setup 0xcc in temp1 */
320 1.1 mrg extru,= arg1,23,8,temp /* test denominator with 0xff00 */
321 1.1 mrg extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
322 1.1 mrg or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
323 1.1 mrg ldi 0xaa,temp /* setup 0xaa in temp */
324 1.1 mrg extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
325 1.1 mrg extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
326 1.1 mrg and,= arg1,temp1,r0 /* test denominator with 0xcc */
327 1.1 mrg extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
328 1.1 mrg and,= arg1,temp,r0 /* test denominator with 0xaa */
329 1.1 mrg extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
330 1.1 mrg MILLIRETN
331 1.1 mrg LSYM(neg_denom)
332 1.1 mrg addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
333 1.1 mrg b,n LREF(regular_seq)
334 1.1 mrg sub r0,arg1,temp /* make denominator positive */
335 1.1 mrg comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
336 1.1 mrg ldo -1(temp),retreg /* is there at most one bit set ? */
337 1.1 mrg and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
338 1.1 mrg b,n LREF(regular_seq)
339 1.1 mrg sub r0,arg0,retreg /* negate numerator */
340 1.1 mrg comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
341 1.1 mrg copy retreg,arg0 /* set up arg0, arg1 and temp */
342 1.1 mrg copy temp,arg1 /* before branching to pow2 */
343 1.1 mrg b LREF(pow2)
344 1.1 mrg ldo -1(arg1),temp
345 1.1 mrg LSYM(regular_seq)
346 1.1 mrg comib,>>=,n 15,arg1,LREF(small_divisor)
347 1.1 mrg add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
348 1.1 mrg LSYM(normal)
349 1.1 mrg subi 0,retreg,retreg /* make it positive */
350 1.1 mrg sub 0,arg1,temp /* clear carry, */
351 1.1 mrg /* negate the divisor */
352 1.1 mrg ds 0,temp,0 /* set V-bit to the comple- */
353 1.1 mrg /* ment of the divisor sign */
354 1.1 mrg add retreg,retreg,retreg /* shift msb bit into carry */
355 1.1 mrg ds r0,arg1,temp /* 1st divide step, if no carry */
356 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
357 1.1 mrg ds temp,arg1,temp /* 2nd divide step */
358 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
359 1.1 mrg ds temp,arg1,temp /* 3rd divide step */
360 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
361 1.1 mrg ds temp,arg1,temp /* 4th divide step */
362 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
363 1.1 mrg ds temp,arg1,temp /* 5th divide step */
364 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
365 1.1 mrg ds temp,arg1,temp /* 6th divide step */
366 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
367 1.1 mrg ds temp,arg1,temp /* 7th divide step */
368 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
369 1.1 mrg ds temp,arg1,temp /* 8th divide step */
370 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
371 1.1 mrg ds temp,arg1,temp /* 9th divide step */
372 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
373 1.1 mrg ds temp,arg1,temp /* 10th divide step */
374 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
375 1.1 mrg ds temp,arg1,temp /* 11th divide step */
376 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
377 1.1 mrg ds temp,arg1,temp /* 12th divide step */
378 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
379 1.1 mrg ds temp,arg1,temp /* 13th divide step */
380 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
381 1.1 mrg ds temp,arg1,temp /* 14th divide step */
382 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
383 1.1 mrg ds temp,arg1,temp /* 15th divide step */
384 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
385 1.1 mrg ds temp,arg1,temp /* 16th divide step */
386 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
387 1.1 mrg ds temp,arg1,temp /* 17th divide step */
388 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
389 1.1 mrg ds temp,arg1,temp /* 18th divide step */
390 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
391 1.1 mrg ds temp,arg1,temp /* 19th divide step */
392 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
393 1.1 mrg ds temp,arg1,temp /* 20th divide step */
394 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
395 1.1 mrg ds temp,arg1,temp /* 21st divide step */
396 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
397 1.1 mrg ds temp,arg1,temp /* 22nd divide step */
398 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
399 1.1 mrg ds temp,arg1,temp /* 23rd divide step */
400 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
401 1.1 mrg ds temp,arg1,temp /* 24th divide step */
402 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
403 1.1 mrg ds temp,arg1,temp /* 25th divide step */
404 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
405 1.1 mrg ds temp,arg1,temp /* 26th divide step */
406 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
407 1.1 mrg ds temp,arg1,temp /* 27th divide step */
408 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
409 1.1 mrg ds temp,arg1,temp /* 28th divide step */
410 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
411 1.1 mrg ds temp,arg1,temp /* 29th divide step */
412 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
413 1.1 mrg ds temp,arg1,temp /* 30th divide step */
414 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
415 1.1 mrg ds temp,arg1,temp /* 31st divide step */
416 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
417 1.1 mrg ds temp,arg1,temp /* 32nd divide step, */
418 1.1 mrg addc retreg,retreg,retreg /* shift last retreg bit into retreg */
419 1.1 mrg xor,>= arg0,arg1,0 /* get correct sign of quotient */
420 1.1 mrg sub 0,retreg,retreg /* based on operand signs */
421 1.1 mrg MILLIRETN
422 1.1 mrg nop
423 1.1 mrg
424 1.1 mrg LSYM(small_divisor)
425 1.1 mrg
426 1.1 mrg #if defined(pa64)
427 1.1 mrg /* Clear the upper 32 bits of the arg1 register. We are working with */
428 1.1 mrg /* small divisors (and 32-bit integers) We must not be mislead */
429 1.1 mrg /* by "1" bits left in the upper 32 bits. */
430 1.1 mrg depd %r0,31,32,%r25
431 1.1 mrg #endif
432 1.1 mrg blr,n arg1,r0
433 1.1 mrg nop
434 1.1 mrg /* table for divisor == 0,1, ... ,15 */
435 1.1 mrg addit,= 0,arg1,r0 /* trap if divisor == 0 */
436 1.1 mrg nop
437 1.1 mrg MILLIRET /* divisor == 1 */
438 1.1 mrg copy arg0,retreg
439 1.1 mrg MILLI_BEN($$divI_2) /* divisor == 2 */
440 1.1 mrg nop
441 1.1 mrg MILLI_BEN($$divI_3) /* divisor == 3 */
442 1.1 mrg nop
443 1.1 mrg MILLI_BEN($$divI_4) /* divisor == 4 */
444 1.1 mrg nop
445 1.1 mrg MILLI_BEN($$divI_5) /* divisor == 5 */
446 1.1 mrg nop
447 1.1 mrg MILLI_BEN($$divI_6) /* divisor == 6 */
448 1.1 mrg nop
449 1.1 mrg MILLI_BEN($$divI_7) /* divisor == 7 */
450 1.1 mrg nop
451 1.1 mrg MILLI_BEN($$divI_8) /* divisor == 8 */
452 1.1 mrg nop
453 1.1 mrg MILLI_BEN($$divI_9) /* divisor == 9 */
454 1.1 mrg nop
455 1.1 mrg MILLI_BEN($$divI_10) /* divisor == 10 */
456 1.1 mrg nop
457 1.1 mrg b LREF(normal) /* divisor == 11 */
458 1.1 mrg add,>= 0,arg0,retreg
459 1.1 mrg MILLI_BEN($$divI_12) /* divisor == 12 */
460 1.1 mrg nop
461 1.1 mrg b LREF(normal) /* divisor == 13 */
462 1.1 mrg add,>= 0,arg0,retreg
463 1.1 mrg MILLI_BEN($$divI_14) /* divisor == 14 */
464 1.1 mrg nop
465 1.1 mrg MILLI_BEN($$divI_15) /* divisor == 15 */
466 1.1 mrg nop
467 1.1 mrg
468 1.1 mrg LSYM(negative1)
469 1.1 mrg sub 0,arg0,retreg /* result is negation of dividend */
470 1.1 mrg MILLIRET
471 1.1 mrg addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
472 1.1 mrg .exit
473 1.1 mrg .procend
474 1.1 mrg .end
475 1.1 mrg #endif
476 1.1 mrg
477 1.1 mrg #ifdef L_divU
478 1.1 mrg /* ROUTINE: $$divU
479 1.1 mrg .
480 1.1 mrg . Single precision divide for unsigned integers.
481 1.1 mrg .
482 1.1 mrg . Quotient is truncated towards zero.
483 1.1 mrg . Traps on divide by zero.
484 1.1 mrg
485 1.1 mrg INPUT REGISTERS:
486 1.1 mrg . arg0 == dividend
487 1.1 mrg . arg1 == divisor
488 1.1 mrg . mrp == return pc
489 1.1 mrg . sr0 == return space when called externally
490 1.1 mrg
491 1.1 mrg OUTPUT REGISTERS:
492 1.1 mrg . arg0 = undefined
493 1.1 mrg . arg1 = undefined
494 1.1 mrg . ret1 = quotient
495 1.1 mrg
496 1.1 mrg OTHER REGISTERS AFFECTED:
497 1.1 mrg . r1 = undefined
498 1.1 mrg
499 1.1 mrg SIDE EFFECTS:
500 1.1 mrg . Causes a trap under the following conditions:
501 1.1 mrg . divisor is zero
502 1.1 mrg . Changes memory at the following places:
503 1.1 mrg . NONE
504 1.1 mrg
505 1.1 mrg PERMISSIBLE CONTEXT:
506 1.1 mrg . Unwindable.
507 1.1 mrg . Does not create a stack frame.
508 1.1 mrg . Suitable for internal or external millicode.
509 1.1 mrg . Assumes the special millicode register conventions.
510 1.1 mrg
511 1.1 mrg DISCUSSION:
512 1.1 mrg . Branchs to other millicode routines using BE:
513 1.1 mrg . $$divU_# for 3,5,6,7,9,10,12,14,15
514 1.1 mrg .
515 1.1 mrg . For selected small divisors calls the special divide by constant
516 1.1 mrg . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
517 1.1 mrg
518 1.1 mrg RDEFINE(temp,r1)
519 1.1 mrg RDEFINE(retreg,ret1) /* r29 */
520 1.1 mrg RDEFINE(temp1,arg0)
521 1.1 mrg SUBSPA_MILLI_DIV
522 1.1 mrg ATTR_MILLI
523 1.1 mrg .export $$divU,millicode
524 1.1 mrg .import $$divU_3,millicode
525 1.1 mrg .import $$divU_5,millicode
526 1.1 mrg .import $$divU_6,millicode
527 1.1 mrg .import $$divU_7,millicode
528 1.1 mrg .import $$divU_9,millicode
529 1.1 mrg .import $$divU_10,millicode
530 1.1 mrg .import $$divU_12,millicode
531 1.1 mrg .import $$divU_14,millicode
532 1.1 mrg .import $$divU_15,millicode
533 1.1 mrg .proc
534 1.1 mrg .callinfo millicode
535 1.1 mrg .entry
536 1.1 mrg GSYM($$divU)
537 1.1 mrg /* The subtract is not nullified since it does no harm and can be used
538 1.1 mrg by the two cases that branch back to "normal". */
539 1.1 mrg ldo -1(arg1),temp /* is there at most one bit set ? */
540 1.1 mrg and,= arg1,temp,r0 /* if so, denominator is power of 2 */
541 1.1 mrg b LREF(regular_seq)
542 1.1 mrg addit,= 0,arg1,0 /* trap for zero dvr */
543 1.1 mrg copy arg0,retreg
544 1.1 mrg extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
545 1.1 mrg extru retreg,15,16,retreg /* retreg = retreg >> 16 */
546 1.1 mrg or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
547 1.1 mrg ldi 0xcc,temp1 /* setup 0xcc in temp1 */
548 1.1 mrg extru,= arg1,23,8,temp /* test denominator with 0xff00 */
549 1.1 mrg extru retreg,23,24,retreg /* retreg = retreg >> 8 */
550 1.1 mrg or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
551 1.1 mrg ldi 0xaa,temp /* setup 0xaa in temp */
552 1.1 mrg extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
553 1.1 mrg extru retreg,27,28,retreg /* retreg = retreg >> 4 */
554 1.1 mrg and,= arg1,temp1,r0 /* test denominator with 0xcc */
555 1.1 mrg extru retreg,29,30,retreg /* retreg = retreg >> 2 */
556 1.1 mrg and,= arg1,temp,r0 /* test denominator with 0xaa */
557 1.1 mrg extru retreg,30,31,retreg /* retreg = retreg >> 1 */
558 1.1 mrg MILLIRETN
559 1.1 mrg nop
560 1.1 mrg LSYM(regular_seq)
561 1.1 mrg comib,>= 15,arg1,LREF(special_divisor)
562 1.1 mrg subi 0,arg1,temp /* clear carry, negate the divisor */
563 1.1 mrg ds r0,temp,r0 /* set V-bit to 1 */
564 1.1 mrg LSYM(normal)
565 1.1 mrg add arg0,arg0,retreg /* shift msb bit into carry */
566 1.1 mrg ds r0,arg1,temp /* 1st divide step, if no carry */
567 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
568 1.1 mrg ds temp,arg1,temp /* 2nd divide step */
569 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
570 1.1 mrg ds temp,arg1,temp /* 3rd divide step */
571 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
572 1.1 mrg ds temp,arg1,temp /* 4th divide step */
573 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
574 1.1 mrg ds temp,arg1,temp /* 5th divide step */
575 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
576 1.1 mrg ds temp,arg1,temp /* 6th divide step */
577 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
578 1.1 mrg ds temp,arg1,temp /* 7th divide step */
579 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
580 1.1 mrg ds temp,arg1,temp /* 8th divide step */
581 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
582 1.1 mrg ds temp,arg1,temp /* 9th divide step */
583 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
584 1.1 mrg ds temp,arg1,temp /* 10th divide step */
585 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
586 1.1 mrg ds temp,arg1,temp /* 11th divide step */
587 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
588 1.1 mrg ds temp,arg1,temp /* 12th divide step */
589 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
590 1.1 mrg ds temp,arg1,temp /* 13th divide step */
591 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
592 1.1 mrg ds temp,arg1,temp /* 14th divide step */
593 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
594 1.1 mrg ds temp,arg1,temp /* 15th divide step */
595 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
596 1.1 mrg ds temp,arg1,temp /* 16th divide step */
597 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
598 1.1 mrg ds temp,arg1,temp /* 17th divide step */
599 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
600 1.1 mrg ds temp,arg1,temp /* 18th divide step */
601 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
602 1.1 mrg ds temp,arg1,temp /* 19th divide step */
603 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
604 1.1 mrg ds temp,arg1,temp /* 20th divide step */
605 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
606 1.1 mrg ds temp,arg1,temp /* 21st divide step */
607 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
608 1.1 mrg ds temp,arg1,temp /* 22nd divide step */
609 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
610 1.1 mrg ds temp,arg1,temp /* 23rd divide step */
611 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
612 1.1 mrg ds temp,arg1,temp /* 24th divide step */
613 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
614 1.1 mrg ds temp,arg1,temp /* 25th divide step */
615 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
616 1.1 mrg ds temp,arg1,temp /* 26th divide step */
617 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
618 1.1 mrg ds temp,arg1,temp /* 27th divide step */
619 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
620 1.1 mrg ds temp,arg1,temp /* 28th divide step */
621 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
622 1.1 mrg ds temp,arg1,temp /* 29th divide step */
623 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
624 1.1 mrg ds temp,arg1,temp /* 30th divide step */
625 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
626 1.1 mrg ds temp,arg1,temp /* 31st divide step */
627 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
628 1.1 mrg ds temp,arg1,temp /* 32nd divide step, */
629 1.1 mrg MILLIRET
630 1.1 mrg addc retreg,retreg,retreg /* shift last retreg bit into retreg */
631 1.1 mrg
632 1.1 mrg /* Handle the cases where divisor is a small constant or has high bit on. */
633 1.1 mrg LSYM(special_divisor)
634 1.1 mrg /* blr arg1,r0 */
635 1.1 mrg /* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
636 1.1 mrg
637 1.1 mrg /* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
638 1.1 mrg generating such a blr, comib sequence. A problem in nullification. So I
639 1.1 mrg rewrote this code. */
640 1.1 mrg
641 1.1 mrg #if defined(pa64)
642 1.1 mrg /* Clear the upper 32 bits of the arg1 register. We are working with
643 1.1 mrg small divisors (and 32-bit unsigned integers) We must not be mislead
644 1.1 mrg by "1" bits left in the upper 32 bits. */
645 1.1 mrg depd %r0,31,32,%r25
646 1.1 mrg #endif
647 1.1 mrg comib,> 0,arg1,LREF(big_divisor)
648 1.1 mrg nop
649 1.1 mrg blr arg1,r0
650 1.1 mrg nop
651 1.1 mrg
652 1.1 mrg LSYM(zero_divisor) /* this label is here to provide external visibility */
653 1.1 mrg addit,= 0,arg1,0 /* trap for zero dvr */
654 1.1 mrg nop
655 1.1 mrg MILLIRET /* divisor == 1 */
656 1.1 mrg copy arg0,retreg
657 1.1 mrg MILLIRET /* divisor == 2 */
658 1.1 mrg extru arg0,30,31,retreg
659 1.1 mrg MILLI_BEN($$divU_3) /* divisor == 3 */
660 1.1 mrg nop
661 1.1 mrg MILLIRET /* divisor == 4 */
662 1.1 mrg extru arg0,29,30,retreg
663 1.1 mrg MILLI_BEN($$divU_5) /* divisor == 5 */
664 1.1 mrg nop
665 1.1 mrg MILLI_BEN($$divU_6) /* divisor == 6 */
666 1.1 mrg nop
667 1.1 mrg MILLI_BEN($$divU_7) /* divisor == 7 */
668 1.1 mrg nop
669 1.1 mrg MILLIRET /* divisor == 8 */
670 1.1 mrg extru arg0,28,29,retreg
671 1.1 mrg MILLI_BEN($$divU_9) /* divisor == 9 */
672 1.1 mrg nop
673 1.1 mrg MILLI_BEN($$divU_10) /* divisor == 10 */
674 1.1 mrg nop
675 1.1 mrg b LREF(normal) /* divisor == 11 */
676 1.1 mrg ds r0,temp,r0 /* set V-bit to 1 */
677 1.1 mrg MILLI_BEN($$divU_12) /* divisor == 12 */
678 1.1 mrg nop
679 1.1 mrg b LREF(normal) /* divisor == 13 */
680 1.1 mrg ds r0,temp,r0 /* set V-bit to 1 */
681 1.1 mrg MILLI_BEN($$divU_14) /* divisor == 14 */
682 1.1 mrg nop
683 1.1 mrg MILLI_BEN($$divU_15) /* divisor == 15 */
684 1.1 mrg nop
685 1.1 mrg
686 1.1 mrg /* Handle the case where the high bit is on in the divisor.
687 1.1 mrg Compute: if( dividend>=divisor) quotient=1; else quotient=0;
688 1.1 mrg Note: dividend>==divisor iff dividend-divisor does not borrow
689 1.1 mrg and not borrow iff carry. */
690 1.1 mrg LSYM(big_divisor)
691 1.1 mrg sub arg0,arg1,r0
692 1.1 mrg MILLIRET
693 1.1 mrg addc r0,r0,retreg
694 1.1 mrg .exit
695 1.1 mrg .procend
696 1.1 mrg .end
697 1.1 mrg #endif
698 1.1 mrg
699 1.1 mrg #ifdef L_remI
700 1.1 mrg /* ROUTINE: $$remI
701 1.1 mrg
702 1.1 mrg DESCRIPTION:
703 1.1 mrg . $$remI returns the remainder of the division of two signed 32-bit
704 1.1 mrg . integers. The sign of the remainder is the same as the sign of
705 1.1 mrg . the dividend.
706 1.1 mrg
707 1.1 mrg
708 1.1 mrg INPUT REGISTERS:
709 1.1 mrg . arg0 == dividend
710 1.1 mrg . arg1 == divisor
711 1.1 mrg . mrp == return pc
712 1.1 mrg . sr0 == return space when called externally
713 1.1 mrg
714 1.1 mrg OUTPUT REGISTERS:
715 1.1 mrg . arg0 = destroyed
716 1.1 mrg . arg1 = destroyed
717 1.1 mrg . ret1 = remainder
718 1.1 mrg
719 1.1 mrg OTHER REGISTERS AFFECTED:
720 1.1 mrg . r1 = undefined
721 1.1 mrg
722 1.1 mrg SIDE EFFECTS:
723 1.1 mrg . Causes a trap under the following conditions: DIVIDE BY ZERO
724 1.1 mrg . Changes memory at the following places: NONE
725 1.1 mrg
726 1.1 mrg PERMISSIBLE CONTEXT:
727 1.1 mrg . Unwindable
728 1.1 mrg . Does not create a stack frame
729 1.1 mrg . Is usable for internal or external microcode
730 1.1 mrg
731 1.1 mrg DISCUSSION:
732 1.1 mrg . Calls other millicode routines via mrp: NONE
733 1.1 mrg . Calls other millicode routines: NONE */
734 1.1 mrg
735 1.1 mrg RDEFINE(tmp,r1)
736 1.1 mrg RDEFINE(retreg,ret1)
737 1.1 mrg
738 1.1 mrg SUBSPA_MILLI
739 1.1 mrg ATTR_MILLI
740 1.1 mrg .proc
741 1.1 mrg .callinfo millicode
742 1.1 mrg .entry
743 1.1 mrg GSYM($$remI)
744 1.1 mrg GSYM($$remoI)
745 1.1 mrg .export $$remI,MILLICODE
746 1.1 mrg .export $$remoI,MILLICODE
747 1.1 mrg ldo -1(arg1),tmp /* is there at most one bit set ? */
748 1.1 mrg and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
749 1.1 mrg addi,> 0,arg1,r0 /* if denominator > 0, use power */
750 1.1 mrg /* of 2 */
751 1.1 mrg b,n LREF(neg_denom)
752 1.1 mrg LSYM(pow2)
753 1.1 mrg comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
754 1.1 mrg and arg0,tmp,retreg /* get the result */
755 1.1 mrg MILLIRETN
756 1.1 mrg LSYM(neg_num)
757 1.1 mrg subi 0,arg0,arg0 /* negate numerator */
758 1.1 mrg and arg0,tmp,retreg /* get the result */
759 1.1 mrg subi 0,retreg,retreg /* negate result */
760 1.1 mrg MILLIRETN
761 1.1 mrg LSYM(neg_denom)
762 1.1 mrg addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
763 1.1 mrg /* of 2 */
764 1.1 mrg b,n LREF(regular_seq)
765 1.1 mrg sub r0,arg1,tmp /* make denominator positive */
766 1.1 mrg comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
767 1.1 mrg ldo -1(tmp),retreg /* is there at most one bit set ? */
768 1.1 mrg and,= tmp,retreg,r0 /* if not, go to regular_seq */
769 1.1 mrg b,n LREF(regular_seq)
770 1.1 mrg comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
771 1.1 mrg and arg0,retreg,retreg
772 1.1 mrg MILLIRETN
773 1.1 mrg LSYM(neg_num_2)
774 1.1 mrg subi 0,arg0,tmp /* test against 0x80000000 */
775 1.1 mrg and tmp,retreg,retreg
776 1.1 mrg subi 0,retreg,retreg
777 1.1 mrg MILLIRETN
778 1.1 mrg LSYM(regular_seq)
779 1.1 mrg addit,= 0,arg1,0 /* trap if div by zero */
780 1.1 mrg add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
781 1.1 mrg sub 0,retreg,retreg /* make it positive */
782 1.1 mrg sub 0,arg1, tmp /* clear carry, */
783 1.1 mrg /* negate the divisor */
784 1.1 mrg ds 0, tmp,0 /* set V-bit to the comple- */
785 1.1 mrg /* ment of the divisor sign */
786 1.1 mrg or 0,0, tmp /* clear tmp */
787 1.1 mrg add retreg,retreg,retreg /* shift msb bit into carry */
788 1.1 mrg ds tmp,arg1, tmp /* 1st divide step, if no carry */
789 1.1 mrg /* out, msb of quotient = 0 */
790 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
791 1.1 mrg LSYM(t1)
792 1.1 mrg ds tmp,arg1, tmp /* 2nd divide step */
793 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
794 1.1 mrg ds tmp,arg1, tmp /* 3rd divide step */
795 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
796 1.1 mrg ds tmp,arg1, tmp /* 4th divide step */
797 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
798 1.1 mrg ds tmp,arg1, tmp /* 5th divide step */
799 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
800 1.1 mrg ds tmp,arg1, tmp /* 6th divide step */
801 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
802 1.1 mrg ds tmp,arg1, tmp /* 7th divide step */
803 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
804 1.1 mrg ds tmp,arg1, tmp /* 8th divide step */
805 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
806 1.1 mrg ds tmp,arg1, tmp /* 9th divide step */
807 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
808 1.1 mrg ds tmp,arg1, tmp /* 10th divide step */
809 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
810 1.1 mrg ds tmp,arg1, tmp /* 11th divide step */
811 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
812 1.1 mrg ds tmp,arg1, tmp /* 12th divide step */
813 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
814 1.1 mrg ds tmp,arg1, tmp /* 13th divide step */
815 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
816 1.1 mrg ds tmp,arg1, tmp /* 14th divide step */
817 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
818 1.1 mrg ds tmp,arg1, tmp /* 15th divide step */
819 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
820 1.1 mrg ds tmp,arg1, tmp /* 16th divide step */
821 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
822 1.1 mrg ds tmp,arg1, tmp /* 17th divide step */
823 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
824 1.1 mrg ds tmp,arg1, tmp /* 18th divide step */
825 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
826 1.1 mrg ds tmp,arg1, tmp /* 19th divide step */
827 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
828 1.1 mrg ds tmp,arg1, tmp /* 20th divide step */
829 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
830 1.1 mrg ds tmp,arg1, tmp /* 21st divide step */
831 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
832 1.1 mrg ds tmp,arg1, tmp /* 22nd divide step */
833 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
834 1.1 mrg ds tmp,arg1, tmp /* 23rd divide step */
835 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
836 1.1 mrg ds tmp,arg1, tmp /* 24th divide step */
837 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
838 1.1 mrg ds tmp,arg1, tmp /* 25th divide step */
839 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
840 1.1 mrg ds tmp,arg1, tmp /* 26th divide step */
841 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
842 1.1 mrg ds tmp,arg1, tmp /* 27th divide step */
843 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
844 1.1 mrg ds tmp,arg1, tmp /* 28th divide step */
845 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
846 1.1 mrg ds tmp,arg1, tmp /* 29th divide step */
847 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
848 1.1 mrg ds tmp,arg1, tmp /* 30th divide step */
849 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
850 1.1 mrg ds tmp,arg1, tmp /* 31st divide step */
851 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
852 1.1 mrg ds tmp,arg1, tmp /* 32nd divide step, */
853 1.1 mrg addc retreg,retreg,retreg /* shift last bit into retreg */
854 1.1 mrg movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
855 1.1 mrg add,< arg1,0,0 /* if arg1 > 0, add arg1 */
856 1.1 mrg add,tr tmp,arg1,retreg /* for correcting remainder tmp */
857 1.1 mrg sub tmp,arg1,retreg /* else add absolute value arg1 */
858 1.1 mrg LSYM(finish)
859 1.1 mrg add,>= arg0,0,0 /* set sign of remainder */
860 1.1 mrg sub 0,retreg,retreg /* to sign of dividend */
861 1.1 mrg MILLIRET
862 1.1 mrg nop
863 1.1 mrg .exit
864 1.1 mrg .procend
865 1.1 mrg #ifdef milliext
866 1.1 mrg .origin 0x00000200
867 1.1 mrg #endif
868 1.1 mrg .end
869 1.1 mrg #endif
870 1.1 mrg
871 1.1 mrg #ifdef L_remU
872 1.1 mrg /* ROUTINE: $$remU
873 1.1 mrg . Single precision divide for remainder with unsigned binary integers.
874 1.1 mrg .
875 1.1 mrg . The remainder must be dividend-(dividend/divisor)*divisor.
876 1.1 mrg . Divide by zero is trapped.
877 1.1 mrg
878 1.1 mrg INPUT REGISTERS:
879 1.1 mrg . arg0 == dividend
880 1.1 mrg . arg1 == divisor
881 1.1 mrg . mrp == return pc
882 1.1 mrg . sr0 == return space when called externally
883 1.1 mrg
884 1.1 mrg OUTPUT REGISTERS:
885 1.1 mrg . arg0 = undefined
886 1.1 mrg . arg1 = undefined
887 1.1 mrg . ret1 = remainder
888 1.1 mrg
889 1.1 mrg OTHER REGISTERS AFFECTED:
890 1.1 mrg . r1 = undefined
891 1.1 mrg
892 1.1 mrg SIDE EFFECTS:
893 1.1 mrg . Causes a trap under the following conditions: DIVIDE BY ZERO
894 1.1 mrg . Changes memory at the following places: NONE
895 1.1 mrg
896 1.1 mrg PERMISSIBLE CONTEXT:
897 1.1 mrg . Unwindable.
898 1.1 mrg . Does not create a stack frame.
899 1.1 mrg . Suitable for internal or external millicode.
900 1.1 mrg . Assumes the special millicode register conventions.
901 1.1 mrg
902 1.1 mrg DISCUSSION:
903 1.1 mrg . Calls other millicode routines using mrp: NONE
904 1.1 mrg . Calls other millicode routines: NONE */
905 1.1 mrg
906 1.1 mrg
907 1.1 mrg RDEFINE(temp,r1)
908 1.1 mrg RDEFINE(rmndr,ret1) /* r29 */
909 1.1 mrg SUBSPA_MILLI
910 1.1 mrg ATTR_MILLI
911 1.1 mrg .export $$remU,millicode
912 1.1 mrg .proc
913 1.1 mrg .callinfo millicode
914 1.1 mrg .entry
915 1.1 mrg GSYM($$remU)
916 1.1 mrg ldo -1(arg1),temp /* is there at most one bit set ? */
917 1.1 mrg and,= arg1,temp,r0 /* if not, don't use power of 2 */
918 1.1 mrg b LREF(regular_seq)
919 1.1 mrg addit,= 0,arg1,r0 /* trap on div by zero */
920 1.1 mrg and arg0,temp,rmndr /* get the result for power of 2 */
921 1.1 mrg MILLIRETN
922 1.1 mrg LSYM(regular_seq)
923 1.1 mrg comib,>=,n 0,arg1,LREF(special_case)
924 1.1 mrg subi 0,arg1,rmndr /* clear carry, negate the divisor */
925 1.1 mrg ds r0,rmndr,r0 /* set V-bit to 1 */
926 1.1 mrg add arg0,arg0,temp /* shift msb bit into carry */
927 1.1 mrg ds r0,arg1,rmndr /* 1st divide step, if no carry */
928 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
929 1.1 mrg ds rmndr,arg1,rmndr /* 2nd divide step */
930 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
931 1.1 mrg ds rmndr,arg1,rmndr /* 3rd divide step */
932 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
933 1.1 mrg ds rmndr,arg1,rmndr /* 4th divide step */
934 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
935 1.1 mrg ds rmndr,arg1,rmndr /* 5th divide step */
936 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
937 1.1 mrg ds rmndr,arg1,rmndr /* 6th divide step */
938 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
939 1.1 mrg ds rmndr,arg1,rmndr /* 7th divide step */
940 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
941 1.1 mrg ds rmndr,arg1,rmndr /* 8th divide step */
942 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
943 1.1 mrg ds rmndr,arg1,rmndr /* 9th divide step */
944 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
945 1.1 mrg ds rmndr,arg1,rmndr /* 10th divide step */
946 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
947 1.1 mrg ds rmndr,arg1,rmndr /* 11th divide step */
948 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
949 1.1 mrg ds rmndr,arg1,rmndr /* 12th divide step */
950 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
951 1.1 mrg ds rmndr,arg1,rmndr /* 13th divide step */
952 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
953 1.1 mrg ds rmndr,arg1,rmndr /* 14th divide step */
954 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
955 1.1 mrg ds rmndr,arg1,rmndr /* 15th divide step */
956 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
957 1.1 mrg ds rmndr,arg1,rmndr /* 16th divide step */
958 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
959 1.1 mrg ds rmndr,arg1,rmndr /* 17th divide step */
960 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
961 1.1 mrg ds rmndr,arg1,rmndr /* 18th divide step */
962 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
963 1.1 mrg ds rmndr,arg1,rmndr /* 19th divide step */
964 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
965 1.1 mrg ds rmndr,arg1,rmndr /* 20th divide step */
966 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
967 1.1 mrg ds rmndr,arg1,rmndr /* 21st divide step */
968 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
969 1.1 mrg ds rmndr,arg1,rmndr /* 22nd divide step */
970 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
971 1.1 mrg ds rmndr,arg1,rmndr /* 23rd divide step */
972 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
973 1.1 mrg ds rmndr,arg1,rmndr /* 24th divide step */
974 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
975 1.1 mrg ds rmndr,arg1,rmndr /* 25th divide step */
976 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
977 1.1 mrg ds rmndr,arg1,rmndr /* 26th divide step */
978 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
979 1.1 mrg ds rmndr,arg1,rmndr /* 27th divide step */
980 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
981 1.1 mrg ds rmndr,arg1,rmndr /* 28th divide step */
982 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
983 1.1 mrg ds rmndr,arg1,rmndr /* 29th divide step */
984 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
985 1.1 mrg ds rmndr,arg1,rmndr /* 30th divide step */
986 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
987 1.1 mrg ds rmndr,arg1,rmndr /* 31st divide step */
988 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
989 1.1 mrg ds rmndr,arg1,rmndr /* 32nd divide step, */
990 1.1 mrg comiclr,<= 0,rmndr,r0
991 1.1 mrg add rmndr,arg1,rmndr /* correction */
992 1.1 mrg MILLIRETN
993 1.1 mrg nop
994 1.1 mrg
995 1.1 mrg /* Putting >= on the last DS and deleting COMICLR does not work! */
996 1.1 mrg LSYM(special_case)
997 1.1 mrg sub,>>= arg0,arg1,rmndr
998 1.1 mrg copy arg0,rmndr
999 1.1 mrg MILLIRETN
1000 1.1 mrg nop
1001 1.1 mrg .exit
1002 1.1 mrg .procend
1003 1.1 mrg .end
1004 1.1 mrg #endif
1005 1.1 mrg
1006 1.1 mrg #ifdef L_div_const
1007 1.1 mrg /* ROUTINE: $$divI_2
1008 1.1 mrg . $$divI_3 $$divU_3
1009 1.1 mrg . $$divI_4
1010 1.1 mrg . $$divI_5 $$divU_5
1011 1.1 mrg . $$divI_6 $$divU_6
1012 1.1 mrg . $$divI_7 $$divU_7
1013 1.1 mrg . $$divI_8
1014 1.1 mrg . $$divI_9 $$divU_9
1015 1.1 mrg . $$divI_10 $$divU_10
1016 1.1 mrg .
1017 1.1 mrg . $$divI_12 $$divU_12
1018 1.1 mrg .
1019 1.1 mrg . $$divI_14 $$divU_14
1020 1.1 mrg . $$divI_15 $$divU_15
1021 1.1 mrg . $$divI_16
1022 1.1 mrg . $$divI_17 $$divU_17
1023 1.1 mrg .
1024 1.1 mrg . Divide by selected constants for single precision binary integers.
1025 1.1 mrg
1026 1.1 mrg INPUT REGISTERS:
1027 1.1 mrg . arg0 == dividend
1028 1.1 mrg . mrp == return pc
1029 1.1 mrg . sr0 == return space when called externally
1030 1.1 mrg
1031 1.1 mrg OUTPUT REGISTERS:
1032 1.1 mrg . arg0 = undefined
1033 1.1 mrg . arg1 = undefined
1034 1.1 mrg . ret1 = quotient
1035 1.1 mrg
1036 1.1 mrg OTHER REGISTERS AFFECTED:
1037 1.1 mrg . r1 = undefined
1038 1.1 mrg
1039 1.1 mrg SIDE EFFECTS:
1040 1.1 mrg . Causes a trap under the following conditions: NONE
1041 1.1 mrg . Changes memory at the following places: NONE
1042 1.1 mrg
1043 1.1 mrg PERMISSIBLE CONTEXT:
1044 1.1 mrg . Unwindable.
1045 1.1 mrg . Does not create a stack frame.
1046 1.1 mrg . Suitable for internal or external millicode.
1047 1.1 mrg . Assumes the special millicode register conventions.
1048 1.1 mrg
1049 1.1 mrg DISCUSSION:
1050 1.1 mrg . Calls other millicode routines using mrp: NONE
1051 1.1 mrg . Calls other millicode routines: NONE */
1052 1.1 mrg
1053 1.1 mrg
1054 1.1 mrg /* TRUNCATED DIVISION BY SMALL INTEGERS
1055 1.1 mrg
1056 1.1 mrg We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
1057 1.1 mrg (with y fixed).
1058 1.1 mrg
1059 1.1 mrg Let a = floor(z/y), for some choice of z. Note that z will be
1060 1.1 mrg chosen so that division by z is cheap.
1061 1.1 mrg
1062 1.1 mrg Let r be the remainder(z/y). In other words, r = z - ay.
1063 1.1 mrg
1064 1.1 mrg Now, our method is to choose a value for b such that
1065 1.1 mrg
1066 1.1 mrg q'(x) = floor((ax+b)/z)
1067 1.1 mrg
1068 1.1 mrg is equal to q(x) over as large a range of x as possible. If the
1069 1.1 mrg two are equal over a sufficiently large range, and if it is easy to
1070 1.1 mrg form the product (ax), and it is easy to divide by z, then we can
1071 1.1 mrg perform the division much faster than the general division algorithm.
1072 1.1 mrg
1073 1.1 mrg So, we want the following to be true:
1074 1.1 mrg
1075 1.1 mrg . For x in the following range:
1076 1.1 mrg .
1077 1.1 mrg . ky <= x < (k+1)y
1078 1.1 mrg .
1079 1.1 mrg . implies that
1080 1.1 mrg .
1081 1.1 mrg . k <= (ax+b)/z < (k+1)
1082 1.1 mrg
1083 1.1 mrg We want to determine b such that this is true for all k in the
1084 1.1 mrg range {0..K} for some maximum K.
1085 1.1 mrg
1086 1.1 mrg Since (ax+b) is an increasing function of x, we can take each
1087 1.1 mrg bound separately to determine the "best" value for b.
1088 1.1 mrg
1089 1.1 mrg (ax+b)/z < (k+1) implies
1090 1.1 mrg
1091 1.1 mrg (a((k+1)y-1)+b < (k+1)z implies
1092 1.1 mrg
1093 1.1 mrg b < a + (k+1)(z-ay) implies
1094 1.1 mrg
1095 1.1 mrg b < a + (k+1)r
1096 1.1 mrg
1097 1.1 mrg This needs to be true for all k in the range {0..K}. In
1098 1.1 mrg particular, it is true for k = 0 and this leads to a maximum
1099 1.1 mrg acceptable value for b.
1100 1.1 mrg
1101 1.1 mrg b < a+r or b <= a+r-1
1102 1.1 mrg
1103 1.1 mrg Taking the other bound, we have
1104 1.1 mrg
1105 1.1 mrg k <= (ax+b)/z implies
1106 1.1 mrg
1107 1.1 mrg k <= (aky+b)/z implies
1108 1.1 mrg
1109 1.1 mrg k(z-ay) <= b implies
1110 1.1 mrg
1111 1.1 mrg kr <= b
1112 1.1 mrg
1113 1.1 mrg Clearly, the largest range for k will be achieved by maximizing b,
1114 1.1 mrg when r is not zero. When r is zero, then the simplest choice for b
1115 1.1 mrg is 0. When r is not 0, set
1116 1.1 mrg
1117 1.1 mrg . b = a+r-1
1118 1.1 mrg
1119 1.1 mrg Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
1120 1.1 mrg for all x in the range:
1121 1.1 mrg
1122 1.1 mrg . 0 <= x < (K+1)y
1123 1.1 mrg
1124 1.1 mrg We need to determine what K is. Of our two bounds,
1125 1.1 mrg
1126 1.1 mrg . b < a+(k+1)r is satisfied for all k >= 0, by construction.
1127 1.1 mrg
1128 1.1 mrg The other bound is
1129 1.1 mrg
1130 1.1 mrg . kr <= b
1131 1.1 mrg
1132 1.1 mrg This is always true if r = 0. If r is not 0 (the usual case), then
1133 1.1 mrg K = floor((a+r-1)/r), is the maximum value for k.
1134 1.1 mrg
1135 1.1 mrg Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
1136 1.1 mrg answer for q(x) = floor(x/y) when x is in the range
1137 1.1 mrg
1138 1.1 mrg (0,(K+1)y-1) K = floor((a+r-1)/r)
1139 1.1 mrg
1140 1.1 mrg To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
1141 1.1 mrg the formula for q'(x) yields the correct value of q(x) for all x
1142 1.1 mrg representable by a single word in HPPA.
1143 1.1 mrg
1144 1.1 mrg We are also constrained in that computing the product (ax), adding
1145 1.1 mrg b, and dividing by z must all be done quickly, otherwise we will be
1146 1.1 mrg better off going through the general algorithm using the DS
1147 1.1 mrg instruction, which uses approximately 70 cycles.
1148 1.1 mrg
1149 1.1 mrg For each y, there is a choice of z which satisfies the constraints
1150 1.1 mrg for (K+1)y >= 2**32. We may not, however, be able to satisfy the
1151 1.1 mrg timing constraints for arbitrary y. It seems that z being equal to
1152 1.1 mrg a power of 2 or a power of 2 minus 1 is as good as we can do, since
1153 1.1 mrg it minimizes the time to do division by z. We want the choice of z
1154 1.1 mrg to also result in a value for (a) that minimizes the computation of
1155 1.1 mrg the product (ax). This is best achieved if (a) has a regular bit
1156 1.1 mrg pattern (so the multiplication can be done with shifts and adds).
1157 1.1 mrg The value of (a) also needs to be less than 2**32 so the product is
1158 1.1 mrg always guaranteed to fit in 2 words.
1159 1.1 mrg
1160 1.1 mrg In actual practice, the following should be done:
1161 1.1 mrg
1162 1.1 mrg 1) For negative x, you should take the absolute value and remember
1163 1.1 mrg . the fact so that the result can be negated. This obviously does
1164 1.1 mrg . not apply in the unsigned case.
1165 1.1 mrg 2) For even y, you should factor out the power of 2 that divides y
1166 1.1 mrg . and divide x by it. You can then proceed by dividing by the
1167 1.1 mrg . odd factor of y.
1168 1.1 mrg
1169 1.1 mrg Here is a table of some odd values of y, and corresponding choices
1170 1.1 mrg for z which are "good".
1171 1.1 mrg
1172 1.1 mrg y z r a (hex) max x (hex)
1173 1.1 mrg
1174 1.1 mrg 3 2**32 1 55555555 100000001
1175 1.1 mrg 5 2**32 1 33333333 100000003
1176 1.1 mrg 7 2**24-1 0 249249 (infinite)
1177 1.1 mrg 9 2**24-1 0 1c71c7 (infinite)
1178 1.1 mrg 11 2**20-1 0 1745d (infinite)
1179 1.1 mrg 13 2**24-1 0 13b13b (infinite)
1180 1.1 mrg 15 2**32 1 11111111 10000000d
1181 1.1 mrg 17 2**32 1 f0f0f0f 10000000f
1182 1.1 mrg
1183 1.1 mrg If r is 1, then b = a+r-1 = a. This simplifies the computation
1184 1.1 mrg of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
1185 1.1 mrg then b = 0 is ok to use which simplifies (ax+b).
1186 1.1 mrg
1187 1.1 mrg The bit patterns for 55555555, 33333333, and 11111111 are obviously
1188 1.1 mrg very regular. The bit patterns for the other values of a above are:
1189 1.1 mrg
1190 1.1 mrg y (hex) (binary)
1191 1.1 mrg
1192 1.1 mrg 7 249249 001001001001001001001001 << regular >>
1193 1.1 mrg 9 1c71c7 000111000111000111000111 << regular >>
1194 1.1 mrg 11 1745d 000000010111010001011101 << irregular >>
1195 1.1 mrg 13 13b13b 000100111011000100111011 << irregular >>
1196 1.1 mrg
1197 1.1 mrg The bit patterns for (a) corresponding to (y) of 11 and 13 may be
1198 1.1 mrg too irregular to warrant using this method.
1199 1.1 mrg
1200 1.1 mrg When z is a power of 2 minus 1, then the division by z is slightly
1201 1.1 mrg more complicated, involving an iterative solution.
1202 1.1 mrg
1203 1.1 mrg The code presented here solves division by 1 through 17, except for
1204 1.1 mrg 11 and 13. There are algorithms for both signed and unsigned
1205 1.1 mrg quantities given.
1206 1.1 mrg
1207 1.1 mrg TIMINGS (cycles)
1208 1.1 mrg
1209 1.1 mrg divisor positive negative unsigned
1210 1.1 mrg
1211 1.1 mrg . 1 2 2 2
1212 1.1 mrg . 2 4 4 2
1213 1.1 mrg . 3 19 21 19
1214 1.1 mrg . 4 4 4 2
1215 1.1 mrg . 5 18 22 19
1216 1.1 mrg . 6 19 22 19
1217 1.1 mrg . 8 4 4 2
1218 1.1 mrg . 10 18 19 17
1219 1.1 mrg . 12 18 20 18
1220 1.1 mrg . 15 16 18 16
1221 1.1 mrg . 16 4 4 2
1222 1.1 mrg . 17 16 18 16
1223 1.1 mrg
1224 1.1 mrg Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
1225 1.1 mrg a loop body is executed until the tentative quotient is 0. The
1226 1.1 mrg number of times the loop body is executed varies depending on the
1227 1.1 mrg dividend, but is never more than two times. If the dividend is
1228 1.1 mrg less than the divisor, then the loop body is not executed at all.
1229 1.1 mrg Each iteration adds 4 cycles to the timings.
1230 1.1 mrg
1231 1.1 mrg divisor positive negative unsigned
1232 1.1 mrg
1233 1.1 mrg . 7 19+4n 20+4n 20+4n n = number of iterations
1234 1.1 mrg . 9 21+4n 22+4n 21+4n
1235 1.1 mrg . 14 21+4n 22+4n 20+4n
1236 1.1 mrg
1237 1.1 mrg To give an idea of how the number of iterations varies, here is a
1238 1.1 mrg table of dividend versus number of iterations when dividing by 7.
1239 1.1 mrg
1240 1.1 mrg smallest largest required
1241 1.1 mrg dividend dividend iterations
1242 1.1 mrg
1243 1.1 mrg . 0 6 0
1244 1.1 mrg . 7 0x6ffffff 1
1245 1.1 mrg 0x1000006 0xffffffff 2
1246 1.1 mrg
1247 1.1 mrg There is some overlap in the range of numbers requiring 1 and 2
1248 1.1 mrg iterations. */
1249 1.1 mrg
1250 1.1 mrg RDEFINE(t2,r1)
1251 1.1 mrg RDEFINE(x2,arg0) /* r26 */
1252 1.1 mrg RDEFINE(t1,arg1) /* r25 */
1253 1.1 mrg RDEFINE(x1,ret1) /* r29 */
1254 1.1 mrg
1255 1.1 mrg SUBSPA_MILLI_DIV
1256 1.1 mrg ATTR_MILLI
1257 1.1 mrg
1258 1.1 mrg .proc
1259 1.1 mrg .callinfo millicode
1260 1.1 mrg .entry
1261 1.1 mrg /* NONE of these routines require a stack frame
1262 1.1 mrg ALL of these routines are unwindable from millicode */
1263 1.1 mrg
1264 1.1 mrg GSYM($$divide_by_constant)
1265 1.1 mrg .export $$divide_by_constant,millicode
1266 1.1 mrg /* Provides a "nice" label for the code covered by the unwind descriptor
1267 1.1 mrg for things like gprof. */
1268 1.1 mrg
1269 1.1 mrg /* DIVISION BY 2 (shift by 1) */
1270 1.1 mrg GSYM($$divI_2)
1271 1.1 mrg .export $$divI_2,millicode
1272 1.1 mrg comclr,>= arg0,0,0
1273 1.1 mrg addi 1,arg0,arg0
1274 1.1 mrg MILLIRET
1275 1.1 mrg extrs arg0,30,31,ret1
1276 1.1 mrg
1277 1.1 mrg
1278 1.1 mrg /* DIVISION BY 4 (shift by 2) */
1279 1.1 mrg GSYM($$divI_4)
1280 1.1 mrg .export $$divI_4,millicode
1281 1.1 mrg comclr,>= arg0,0,0
1282 1.1 mrg addi 3,arg0,arg0
1283 1.1 mrg MILLIRET
1284 1.1 mrg extrs arg0,29,30,ret1
1285 1.1 mrg
1286 1.1 mrg
1287 1.1 mrg /* DIVISION BY 8 (shift by 3) */
1288 1.1 mrg GSYM($$divI_8)
1289 1.1 mrg .export $$divI_8,millicode
1290 1.1 mrg comclr,>= arg0,0,0
1291 1.1 mrg addi 7,arg0,arg0
1292 1.1 mrg MILLIRET
1293 1.1 mrg extrs arg0,28,29,ret1
1294 1.1 mrg
1295 1.1 mrg /* DIVISION BY 16 (shift by 4) */
1296 1.1 mrg GSYM($$divI_16)
1297 1.1 mrg .export $$divI_16,millicode
1298 1.1 mrg comclr,>= arg0,0,0
1299 1.1 mrg addi 15,arg0,arg0
1300 1.1 mrg MILLIRET
1301 1.1 mrg extrs arg0,27,28,ret1
1302 1.1 mrg
1303 1.1 mrg /****************************************************************************
1304 1.1 mrg *
1305 1.1 mrg * DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
1306 1.1 mrg *
1307 1.1 mrg * includes 3,5,15,17 and also 6,10,12
1308 1.1 mrg *
1309 1.1 mrg ****************************************************************************/
1310 1.1 mrg
1311 1.1 mrg /* DIVISION BY 3 (use z = 2**32; a = 55555555) */
1312 1.1 mrg
1313 1.1 mrg GSYM($$divI_3)
1314 1.1 mrg .export $$divI_3,millicode
1315 1.1 mrg comb,<,N x2,0,LREF(neg3)
1316 1.1 mrg
1317 1.1 mrg addi 1,x2,x2 /* this cannot overflow */
1318 1.1 mrg extru x2,1,2,x1 /* multiply by 5 to get started */
1319 1.1 mrg sh2add x2,x2,x2
1320 1.1 mrg b LREF(pos)
1321 1.1 mrg addc x1,0,x1
1322 1.1 mrg
1323 1.1 mrg LSYM(neg3)
1324 1.1 mrg subi 1,x2,x2 /* this cannot overflow */
1325 1.1 mrg extru x2,1,2,x1 /* multiply by 5 to get started */
1326 1.1 mrg sh2add x2,x2,x2
1327 1.1 mrg b LREF(neg)
1328 1.1 mrg addc x1,0,x1
1329 1.1 mrg
1330 1.1 mrg GSYM($$divU_3)
1331 1.1 mrg .export $$divU_3,millicode
1332 1.1 mrg addi 1,x2,x2 /* this CAN overflow */
1333 1.1 mrg addc 0,0,x1
1334 1.1 mrg shd x1,x2,30,t1 /* multiply by 5 to get started */
1335 1.1 mrg sh2add x2,x2,x2
1336 1.1 mrg b LREF(pos)
1337 1.1 mrg addc x1,t1,x1
1338 1.1 mrg
1339 1.1 mrg /* DIVISION BY 5 (use z = 2**32; a = 33333333) */
1340 1.1 mrg
1341 1.1 mrg GSYM($$divI_5)
1342 1.1 mrg .export $$divI_5,millicode
1343 1.1 mrg comb,<,N x2,0,LREF(neg5)
1344 1.1 mrg
1345 1.1 mrg addi 3,x2,t1 /* this cannot overflow */
1346 1.1 mrg sh1add x2,t1,x2 /* multiply by 3 to get started */
1347 1.1 mrg b LREF(pos)
1348 1.1 mrg addc 0,0,x1
1349 1.1 mrg
1350 1.1 mrg LSYM(neg5)
1351 1.1 mrg sub 0,x2,x2 /* negate x2 */
1352 1.1 mrg addi 1,x2,x2 /* this cannot overflow */
1353 1.1 mrg shd 0,x2,31,x1 /* get top bit (can be 1) */
1354 1.1 mrg sh1add x2,x2,x2 /* multiply by 3 to get started */
1355 1.1 mrg b LREF(neg)
1356 1.1 mrg addc x1,0,x1
1357 1.1 mrg
1358 1.1 mrg GSYM($$divU_5)
1359 1.1 mrg .export $$divU_5,millicode
1360 1.1 mrg addi 1,x2,x2 /* this CAN overflow */
1361 1.1 mrg addc 0,0,x1
1362 1.1 mrg shd x1,x2,31,t1 /* multiply by 3 to get started */
1363 1.1 mrg sh1add x2,x2,x2
1364 1.1 mrg b LREF(pos)
1365 1.1 mrg addc t1,x1,x1
1366 1.1 mrg
1367 1.1 mrg /* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
1368 1.1 mrg GSYM($$divI_6)
1369 1.1 mrg .export $$divI_6,millicode
1370 1.1 mrg comb,<,N x2,0,LREF(neg6)
1371 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1372 1.1 mrg addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
1373 1.1 mrg sh2add x2,t1,x2 /* multiply by 5 to get started */
1374 1.1 mrg b LREF(pos)
1375 1.1 mrg addc 0,0,x1
1376 1.1 mrg
1377 1.1 mrg LSYM(neg6)
1378 1.1 mrg subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1379 1.1 mrg /* negation and adding 1 are done */
1380 1.1 mrg /* at the same time by the SUBI */
1381 1.1 mrg extru x2,30,31,x2
1382 1.1 mrg shd 0,x2,30,x1
1383 1.1 mrg sh2add x2,x2,x2 /* multiply by 5 to get started */
1384 1.1 mrg b LREF(neg)
1385 1.1 mrg addc x1,0,x1
1386 1.1 mrg
1387 1.1 mrg GSYM($$divU_6)
1388 1.1 mrg .export $$divU_6,millicode
1389 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1390 1.1 mrg addi 1,x2,x2 /* cannot carry */
1391 1.1 mrg shd 0,x2,30,x1 /* multiply by 5 to get started */
1392 1.1 mrg sh2add x2,x2,x2
1393 1.1 mrg b LREF(pos)
1394 1.1 mrg addc x1,0,x1
1395 1.1 mrg
1396 1.1 mrg /* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
1397 1.1 mrg GSYM($$divU_10)
1398 1.1 mrg .export $$divU_10,millicode
1399 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1400 1.1 mrg addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
1401 1.1 mrg sh1add x2,t1,x2 /* multiply by 3 to get started */
1402 1.1 mrg addc 0,0,x1
1403 1.1 mrg LSYM(pos)
1404 1.1 mrg shd x1,x2,28,t1 /* multiply by 0x11 */
1405 1.1 mrg shd x2,0,28,t2
1406 1.1 mrg add x2,t2,x2
1407 1.1 mrg addc x1,t1,x1
1408 1.1 mrg LSYM(pos_for_17)
1409 1.1 mrg shd x1,x2,24,t1 /* multiply by 0x101 */
1410 1.1 mrg shd x2,0,24,t2
1411 1.1 mrg add x2,t2,x2
1412 1.1 mrg addc x1,t1,x1
1413 1.1 mrg
1414 1.1 mrg shd x1,x2,16,t1 /* multiply by 0x10001 */
1415 1.1 mrg shd x2,0,16,t2
1416 1.1 mrg add x2,t2,x2
1417 1.1 mrg MILLIRET
1418 1.1 mrg addc x1,t1,x1
1419 1.1 mrg
1420 1.1 mrg GSYM($$divI_10)
1421 1.1 mrg .export $$divI_10,millicode
1422 1.1 mrg comb,< x2,0,LREF(neg10)
1423 1.1 mrg copy 0,x1
1424 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1425 1.1 mrg addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */
1426 1.1 mrg sh1add x2,x2,x2 /* multiply by 3 to get started */
1427 1.1 mrg
1428 1.1 mrg LSYM(neg10)
1429 1.1 mrg subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1430 1.1 mrg /* negation and adding 1 are done */
1431 1.1 mrg /* at the same time by the SUBI */
1432 1.1 mrg extru x2,30,31,x2
1433 1.1 mrg sh1add x2,x2,x2 /* multiply by 3 to get started */
1434 1.1 mrg LSYM(neg)
1435 1.1 mrg shd x1,x2,28,t1 /* multiply by 0x11 */
1436 1.1 mrg shd x2,0,28,t2
1437 1.1 mrg add x2,t2,x2
1438 1.1 mrg addc x1,t1,x1
1439 1.1 mrg LSYM(neg_for_17)
1440 1.1 mrg shd x1,x2,24,t1 /* multiply by 0x101 */
1441 1.1 mrg shd x2,0,24,t2
1442 1.1 mrg add x2,t2,x2
1443 1.1 mrg addc x1,t1,x1
1444 1.1 mrg
1445 1.1 mrg shd x1,x2,16,t1 /* multiply by 0x10001 */
1446 1.1 mrg shd x2,0,16,t2
1447 1.1 mrg add x2,t2,x2
1448 1.1 mrg addc x1,t1,x1
1449 1.1 mrg MILLIRET
1450 1.1 mrg sub 0,x1,x1
1451 1.1 mrg
1452 1.1 mrg /* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
1453 1.1 mrg GSYM($$divI_12)
1454 1.1 mrg .export $$divI_12,millicode
1455 1.1 mrg comb,< x2,0,LREF(neg12)
1456 1.1 mrg copy 0,x1
1457 1.1 mrg extru x2,29,30,x2 /* divide by 4 */
1458 1.1 mrg addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
1459 1.1 mrg sh2add x2,x2,x2 /* multiply by 5 to get started */
1460 1.1 mrg
1461 1.1 mrg LSYM(neg12)
1462 1.1 mrg subi 4,x2,x2 /* negate, divide by 4, and add 1 */
1463 1.1 mrg /* negation and adding 1 are done */
1464 1.1 mrg /* at the same time by the SUBI */
1465 1.1 mrg extru x2,29,30,x2
1466 1.1 mrg b LREF(neg)
1467 1.1 mrg sh2add x2,x2,x2 /* multiply by 5 to get started */
1468 1.1 mrg
1469 1.1 mrg GSYM($$divU_12)
1470 1.1 mrg .export $$divU_12,millicode
1471 1.1 mrg extru x2,29,30,x2 /* divide by 4 */
1472 1.1 mrg addi 5,x2,t1 /* cannot carry */
1473 1.1 mrg sh2add x2,t1,x2 /* multiply by 5 to get started */
1474 1.1 mrg b LREF(pos)
1475 1.1 mrg addc 0,0,x1
1476 1.1 mrg
1477 1.1 mrg /* DIVISION BY 15 (use z = 2**32; a = 11111111) */
1478 1.1 mrg GSYM($$divI_15)
1479 1.1 mrg .export $$divI_15,millicode
1480 1.1 mrg comb,< x2,0,LREF(neg15)
1481 1.1 mrg copy 0,x1
1482 1.1 mrg addib,tr 1,x2,LREF(pos)+4
1483 1.1 mrg shd x1,x2,28,t1
1484 1.1 mrg
1485 1.1 mrg LSYM(neg15)
1486 1.1 mrg b LREF(neg)
1487 1.1 mrg subi 1,x2,x2
1488 1.1 mrg
1489 1.1 mrg GSYM($$divU_15)
1490 1.1 mrg .export $$divU_15,millicode
1491 1.1 mrg addi 1,x2,x2 /* this CAN overflow */
1492 1.1 mrg b LREF(pos)
1493 1.1 mrg addc 0,0,x1
1494 1.1 mrg
1495 1.1 mrg /* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
1496 1.1 mrg GSYM($$divI_17)
1497 1.1 mrg .export $$divI_17,millicode
1498 1.1 mrg comb,<,n x2,0,LREF(neg17)
1499 1.1 mrg addi 1,x2,x2 /* this cannot overflow */
1500 1.1 mrg shd 0,x2,28,t1 /* multiply by 0xf to get started */
1501 1.1 mrg shd x2,0,28,t2
1502 1.1 mrg sub t2,x2,x2
1503 1.1 mrg b LREF(pos_for_17)
1504 1.1 mrg subb t1,0,x1
1505 1.1 mrg
1506 1.1 mrg LSYM(neg17)
1507 1.1 mrg subi 1,x2,x2 /* this cannot overflow */
1508 1.1 mrg shd 0,x2,28,t1 /* multiply by 0xf to get started */
1509 1.1 mrg shd x2,0,28,t2
1510 1.1 mrg sub t2,x2,x2
1511 1.1 mrg b LREF(neg_for_17)
1512 1.1 mrg subb t1,0,x1
1513 1.1 mrg
1514 1.1 mrg GSYM($$divU_17)
1515 1.1 mrg .export $$divU_17,millicode
1516 1.1 mrg addi 1,x2,x2 /* this CAN overflow */
1517 1.1 mrg addc 0,0,x1
1518 1.1 mrg shd x1,x2,28,t1 /* multiply by 0xf to get started */
1519 1.1 mrg LSYM(u17)
1520 1.1 mrg shd x2,0,28,t2
1521 1.1 mrg sub t2,x2,x2
1522 1.1 mrg b LREF(pos_for_17)
1523 1.1 mrg subb t1,x1,x1
1524 1.1 mrg
1525 1.1 mrg
1526 1.1 mrg /* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
1527 1.1 mrg includes 7,9 and also 14
1528 1.1 mrg
1529 1.1 mrg
1530 1.1 mrg z = 2**24-1
1531 1.1 mrg r = z mod x = 0
1532 1.1 mrg
1533 1.1 mrg so choose b = 0
1534 1.1 mrg
1535 1.1 mrg Also, in order to divide by z = 2**24-1, we approximate by dividing
1536 1.1 mrg by (z+1) = 2**24 (which is easy), and then correcting.
1537 1.1 mrg
1538 1.1 mrg (ax) = (z+1)q' + r
1539 1.1 mrg . = zq' + (q'+r)
1540 1.1 mrg
1541 1.1 mrg So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
1542 1.1 mrg Then the true remainder of (ax)/z is (q'+r). Repeat the process
1543 1.1 mrg with this new remainder, adding the tentative quotients together,
1544 1.1 mrg until a tentative quotient is 0 (and then we are done). There is
1545 1.1 mrg one last correction to be done. It is possible that (q'+r) = z.
1546 1.1 mrg If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
1547 1.1 mrg in fact, we need to add 1 more to the quotient. Now, it turns
1548 1.1 mrg out that this happens if and only if the original value x is
1549 1.1 mrg an exact multiple of y. So, to avoid a three instruction test at
1550 1.1 mrg the end, instead use 1 instruction to add 1 to x at the beginning. */
1551 1.1 mrg
1552 1.1 mrg /* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
1553 1.1 mrg GSYM($$divI_7)
1554 1.1 mrg .export $$divI_7,millicode
1555 1.1 mrg comb,<,n x2,0,LREF(neg7)
1556 1.1 mrg LSYM(7)
1557 1.1 mrg addi 1,x2,x2 /* cannot overflow */
1558 1.1 mrg shd 0,x2,29,x1
1559 1.1 mrg sh3add x2,x2,x2
1560 1.1 mrg addc x1,0,x1
1561 1.1 mrg LSYM(pos7)
1562 1.1 mrg shd x1,x2,26,t1
1563 1.1 mrg shd x2,0,26,t2
1564 1.1 mrg add x2,t2,x2
1565 1.1 mrg addc x1,t1,x1
1566 1.1 mrg
1567 1.1 mrg shd x1,x2,20,t1
1568 1.1 mrg shd x2,0,20,t2
1569 1.1 mrg add x2,t2,x2
1570 1.1 mrg addc x1,t1,t1
1571 1.1 mrg
1572 1.1 mrg /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1573 1.1 mrg
1574 1.1 mrg copy 0,x1
1575 1.1 mrg shd,= t1,x2,24,t1 /* tentative quotient */
1576 1.1 mrg LSYM(1)
1577 1.1 mrg addb,tr t1,x1,LREF(2) /* add to previous quotient */
1578 1.1 mrg extru x2,31,24,x2 /* new remainder (unadjusted) */
1579 1.1 mrg
1580 1.1 mrg MILLIRETN
1581 1.1 mrg
1582 1.1 mrg LSYM(2)
1583 1.1 mrg addb,tr t1,x2,LREF(1) /* adjust remainder */
1584 1.1 mrg extru,= x2,7,8,t1 /* new quotient */
1585 1.1 mrg
1586 1.1 mrg LSYM(neg7)
1587 1.1 mrg subi 1,x2,x2 /* negate x2 and add 1 */
1588 1.1 mrg LSYM(8)
1589 1.1 mrg shd 0,x2,29,x1
1590 1.1 mrg sh3add x2,x2,x2
1591 1.1 mrg addc x1,0,x1
1592 1.1 mrg
1593 1.1 mrg LSYM(neg7_shift)
1594 1.1 mrg shd x1,x2,26,t1
1595 1.1 mrg shd x2,0,26,t2
1596 1.1 mrg add x2,t2,x2
1597 1.1 mrg addc x1,t1,x1
1598 1.1 mrg
1599 1.1 mrg shd x1,x2,20,t1
1600 1.1 mrg shd x2,0,20,t2
1601 1.1 mrg add x2,t2,x2
1602 1.1 mrg addc x1,t1,t1
1603 1.1 mrg
1604 1.1 mrg /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1605 1.1 mrg
1606 1.1 mrg copy 0,x1
1607 1.1 mrg shd,= t1,x2,24,t1 /* tentative quotient */
1608 1.1 mrg LSYM(3)
1609 1.1 mrg addb,tr t1,x1,LREF(4) /* add to previous quotient */
1610 1.1 mrg extru x2,31,24,x2 /* new remainder (unadjusted) */
1611 1.1 mrg
1612 1.1 mrg MILLIRET
1613 1.1 mrg sub 0,x1,x1 /* negate result */
1614 1.1 mrg
1615 1.1 mrg LSYM(4)
1616 1.1 mrg addb,tr t1,x2,LREF(3) /* adjust remainder */
1617 1.1 mrg extru,= x2,7,8,t1 /* new quotient */
1618 1.1 mrg
1619 1.1 mrg GSYM($$divU_7)
1620 1.1 mrg .export $$divU_7,millicode
1621 1.1 mrg addi 1,x2,x2 /* can carry */
1622 1.1 mrg addc 0,0,x1
1623 1.1 mrg shd x1,x2,29,t1
1624 1.1 mrg sh3add x2,x2,x2
1625 1.1 mrg b LREF(pos7)
1626 1.1 mrg addc t1,x1,x1
1627 1.1 mrg
1628 1.1 mrg /* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
1629 1.1 mrg GSYM($$divI_9)
1630 1.1 mrg .export $$divI_9,millicode
1631 1.1 mrg comb,<,n x2,0,LREF(neg9)
1632 1.1 mrg addi 1,x2,x2 /* cannot overflow */
1633 1.1 mrg shd 0,x2,29,t1
1634 1.1 mrg shd x2,0,29,t2
1635 1.1 mrg sub t2,x2,x2
1636 1.1 mrg b LREF(pos7)
1637 1.1 mrg subb t1,0,x1
1638 1.1 mrg
1639 1.1 mrg LSYM(neg9)
1640 1.1 mrg subi 1,x2,x2 /* negate and add 1 */
1641 1.1 mrg shd 0,x2,29,t1
1642 1.1 mrg shd x2,0,29,t2
1643 1.1 mrg sub t2,x2,x2
1644 1.1 mrg b LREF(neg7_shift)
1645 1.1 mrg subb t1,0,x1
1646 1.1 mrg
1647 1.1 mrg GSYM($$divU_9)
1648 1.1 mrg .export $$divU_9,millicode
1649 1.1 mrg addi 1,x2,x2 /* can carry */
1650 1.1 mrg addc 0,0,x1
1651 1.1 mrg shd x1,x2,29,t1
1652 1.1 mrg shd x2,0,29,t2
1653 1.1 mrg sub t2,x2,x2
1654 1.1 mrg b LREF(pos7)
1655 1.1 mrg subb t1,x1,x1
1656 1.1 mrg
1657 1.1 mrg /* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
1658 1.1 mrg GSYM($$divI_14)
1659 1.1 mrg .export $$divI_14,millicode
1660 1.1 mrg comb,<,n x2,0,LREF(neg14)
1661 1.1 mrg GSYM($$divU_14)
1662 1.1 mrg .export $$divU_14,millicode
1663 1.1 mrg b LREF(7) /* go to 7 case */
1664 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1665 1.1 mrg
1666 1.1 mrg LSYM(neg14)
1667 1.1 mrg subi 2,x2,x2 /* negate (and add 2) */
1668 1.1 mrg b LREF(8)
1669 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1670 1.1 mrg .exit
1671 1.1 mrg .procend
1672 1.1 mrg .end
1673 1.1 mrg #endif
1674 1.1 mrg
1675 1.1 mrg #ifdef L_mulI
1676 1.1 mrg /* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
1677 1.1 mrg /******************************************************************************
1678 1.1 mrg This routine is used on PA2.0 processors when gcc -mno-fpregs is used
1679 1.1 mrg
1680 1.1 mrg ROUTINE: $$mulI
1681 1.1 mrg
1682 1.1 mrg
1683 1.1 mrg DESCRIPTION:
1684 1.1 mrg
1685 1.1 mrg $$mulI multiplies two single word integers, giving a single
1686 1.1 mrg word result.
1687 1.1 mrg
1688 1.1 mrg
1689 1.1 mrg INPUT REGISTERS:
1690 1.1 mrg
1691 1.1 mrg arg0 = Operand 1
1692 1.1 mrg arg1 = Operand 2
1693 1.1 mrg r31 == return pc
1694 1.1 mrg sr0 == return space when called externally
1695 1.1 mrg
1696 1.1 mrg
1697 1.1 mrg OUTPUT REGISTERS:
1698 1.1 mrg
1699 1.1 mrg arg0 = undefined
1700 1.1 mrg arg1 = undefined
1701 1.1 mrg ret1 = result
1702 1.1 mrg
1703 1.1 mrg OTHER REGISTERS AFFECTED:
1704 1.1 mrg
1705 1.1 mrg r1 = undefined
1706 1.1 mrg
1707 1.1 mrg SIDE EFFECTS:
1708 1.1 mrg
1709 1.1 mrg Causes a trap under the following conditions: NONE
1710 1.1 mrg Changes memory at the following places: NONE
1711 1.1 mrg
1712 1.1 mrg PERMISSIBLE CONTEXT:
1713 1.1 mrg
1714 1.1 mrg Unwindable
1715 1.1 mrg Does not create a stack frame
1716 1.1 mrg Is usable for internal or external microcode
1717 1.1 mrg
1718 1.1 mrg DISCUSSION:
1719 1.1 mrg
1720 1.1 mrg Calls other millicode routines via mrp: NONE
1721 1.1 mrg Calls other millicode routines: NONE
1722 1.1 mrg
1723 1.1 mrg ***************************************************************************/
1724 1.1 mrg
1725 1.1 mrg
1726 1.1 mrg #define a0 %arg0
1727 1.1 mrg #define a1 %arg1
1728 1.1 mrg #define t0 %r1
1729 1.1 mrg #define r %ret1
1730 1.1 mrg
1731 1.1 mrg #define a0__128a0 zdep a0,24,25,a0
1732 1.1 mrg #define a0__256a0 zdep a0,23,24,a0
1733 1.1 mrg #define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
1734 1.1 mrg #define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
1735 1.1 mrg #define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
1736 1.1 mrg #define b_n_ret_t0 b,n LREF(ret_t0)
1737 1.1 mrg #define b_e_shift b LREF(e_shift)
1738 1.1 mrg #define b_e_t0ma0 b LREF(e_t0ma0)
1739 1.1 mrg #define b_e_t0 b LREF(e_t0)
1740 1.1 mrg #define b_e_t0a0 b LREF(e_t0a0)
1741 1.1 mrg #define b_e_t02a0 b LREF(e_t02a0)
1742 1.1 mrg #define b_e_t04a0 b LREF(e_t04a0)
1743 1.1 mrg #define b_e_2t0 b LREF(e_2t0)
1744 1.1 mrg #define b_e_2t0a0 b LREF(e_2t0a0)
1745 1.1 mrg #define b_e_2t04a0 b LREF(e2t04a0)
1746 1.1 mrg #define b_e_3t0 b LREF(e_3t0)
1747 1.1 mrg #define b_e_4t0 b LREF(e_4t0)
1748 1.1 mrg #define b_e_4t0a0 b LREF(e_4t0a0)
1749 1.1 mrg #define b_e_4t08a0 b LREF(e4t08a0)
1750 1.1 mrg #define b_e_5t0 b LREF(e_5t0)
1751 1.1 mrg #define b_e_8t0 b LREF(e_8t0)
1752 1.1 mrg #define b_e_8t0a0 b LREF(e_8t0a0)
1753 1.1 mrg #define r__r_a0 add r,a0,r
1754 1.1 mrg #define r__r_2a0 sh1add a0,r,r
1755 1.1 mrg #define r__r_4a0 sh2add a0,r,r
1756 1.1 mrg #define r__r_8a0 sh3add a0,r,r
1757 1.1 mrg #define r__r_t0 add r,t0,r
1758 1.1 mrg #define r__r_2t0 sh1add t0,r,r
1759 1.1 mrg #define r__r_4t0 sh2add t0,r,r
1760 1.1 mrg #define r__r_8t0 sh3add t0,r,r
1761 1.1 mrg #define t0__3a0 sh1add a0,a0,t0
1762 1.1 mrg #define t0__4a0 sh2add a0,0,t0
1763 1.1 mrg #define t0__5a0 sh2add a0,a0,t0
1764 1.1 mrg #define t0__8a0 sh3add a0,0,t0
1765 1.1 mrg #define t0__9a0 sh3add a0,a0,t0
1766 1.1 mrg #define t0__16a0 zdep a0,27,28,t0
1767 1.1 mrg #define t0__32a0 zdep a0,26,27,t0
1768 1.1 mrg #define t0__64a0 zdep a0,25,26,t0
1769 1.1 mrg #define t0__128a0 zdep a0,24,25,t0
1770 1.1 mrg #define t0__t0ma0 sub t0,a0,t0
1771 1.1 mrg #define t0__t0_a0 add t0,a0,t0
1772 1.1 mrg #define t0__t0_2a0 sh1add a0,t0,t0
1773 1.1 mrg #define t0__t0_4a0 sh2add a0,t0,t0
1774 1.1 mrg #define t0__t0_8a0 sh3add a0,t0,t0
1775 1.1 mrg #define t0__2t0_a0 sh1add t0,a0,t0
1776 1.1 mrg #define t0__3t0 sh1add t0,t0,t0
1777 1.1 mrg #define t0__4t0 sh2add t0,0,t0
1778 1.1 mrg #define t0__4t0_a0 sh2add t0,a0,t0
1779 1.1 mrg #define t0__5t0 sh2add t0,t0,t0
1780 1.1 mrg #define t0__8t0 sh3add t0,0,t0
1781 1.1 mrg #define t0__8t0_a0 sh3add t0,a0,t0
1782 1.1 mrg #define t0__9t0 sh3add t0,t0,t0
1783 1.1 mrg #define t0__16t0 zdep t0,27,28,t0
1784 1.1 mrg #define t0__32t0 zdep t0,26,27,t0
1785 1.1 mrg #define t0__256a0 zdep a0,23,24,t0
1786 1.1 mrg
1787 1.1 mrg
1788 1.1 mrg SUBSPA_MILLI
1789 1.1 mrg ATTR_MILLI
1790 1.1 mrg .align 16
1791 1.1 mrg .proc
1792 1.1 mrg .callinfo millicode
1793 1.1 mrg .export $$mulI,millicode
1794 1.1 mrg GSYM($$mulI)
1795 1.1 mrg combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
1796 1.1 mrg copy 0,r /* zero out the result */
1797 1.1 mrg xor a0,a1,a0 /* swap a0 & a1 using the */
1798 1.1 mrg xor a0,a1,a1 /* old xor trick */
1799 1.1 mrg xor a0,a1,a0
1800 1.1 mrg LSYM(l4)
1801 1.1 mrg combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
1802 1.1 mrg zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1803 1.1 mrg sub,> 0,a1,t0 /* otherwise negate both and */
1804 1.1 mrg combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
1805 1.1 mrg sub 0,a0,a1
1806 1.1 mrg movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
1807 1.1 mrg
1808 1.1 mrg LSYM(l0) r__r_t0 /* add in this partial product */
1809 1.1 mrg LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
1810 1.1 mrg LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1811 1.1 mrg LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
1812 1.1 mrg extru a1,23,24,a1 /* a1 >>= 8 ****************** */
1813 1.1 mrg
1814 1.1 mrg /*16 insts before this. */
1815 1.1 mrg /* a0 <<= 8 ************************** */
1816 1.1 mrg LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
1817 1.1 mrg LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
1818 1.1 mrg LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
1819 1.1 mrg LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
1820 1.1 mrg LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
1821 1.1 mrg LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
1822 1.1 mrg LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1823 1.1 mrg LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
1824 1.1 mrg LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
1825 1.1 mrg LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
1826 1.1 mrg LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1827 1.1 mrg LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1828 1.1 mrg LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1829 1.1 mrg LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1830 1.1 mrg LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1831 1.1 mrg LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
1832 1.1 mrg LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1833 1.1 mrg LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
1834 1.1 mrg LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1835 1.1 mrg LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
1836 1.1 mrg LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1837 1.1 mrg LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1838 1.1 mrg LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1839 1.1 mrg LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1840 1.1 mrg LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1841 1.1 mrg LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1842 1.1 mrg LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1843 1.1 mrg LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
1844 1.1 mrg LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1845 1.1 mrg LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1846 1.1 mrg LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1847 1.1 mrg LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1848 1.1 mrg LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1849 1.1 mrg LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1850 1.1 mrg LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1851 1.1 mrg LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
1852 1.1 mrg LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1853 1.1 mrg LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1854 1.1 mrg LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1855 1.1 mrg LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1856 1.1 mrg LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1857 1.1 mrg LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1858 1.1 mrg LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1859 1.1 mrg LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1860 1.1 mrg LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1861 1.1 mrg LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1862 1.1 mrg LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
1863 1.1 mrg LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
1864 1.1 mrg LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
1865 1.1 mrg LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
1866 1.1 mrg LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1867 1.1 mrg LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
1868 1.1 mrg LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1869 1.1 mrg LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1870 1.1 mrg LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1871 1.1 mrg LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
1872 1.1 mrg LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1873 1.1 mrg LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
1874 1.1 mrg LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1875 1.1 mrg LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
1876 1.1 mrg LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1877 1.1 mrg LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1878 1.1 mrg LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1879 1.1 mrg LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1880 1.1 mrg LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1881 1.1 mrg LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1882 1.1 mrg LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1883 1.1 mrg LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1884 1.1 mrg LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1885 1.1 mrg LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1886 1.1 mrg LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
1887 1.1 mrg LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
1888 1.1 mrg LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1889 1.1 mrg LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
1890 1.1 mrg LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1891 1.1 mrg LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1892 1.1 mrg LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1893 1.1 mrg LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1894 1.1 mrg LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
1895 1.1 mrg LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1896 1.1 mrg LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
1897 1.1 mrg LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
1898 1.1 mrg LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1899 1.1 mrg LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1900 1.1 mrg LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1901 1.1 mrg LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1902 1.1 mrg LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1903 1.1 mrg LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
1904 1.1 mrg LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1905 1.1 mrg LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1906 1.1 mrg LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1907 1.1 mrg LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
1908 1.1 mrg LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1909 1.1 mrg LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
1910 1.1 mrg LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
1911 1.1 mrg LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1912 1.1 mrg LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1913 1.1 mrg LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1914 1.1 mrg LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
1915 1.1 mrg LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1916 1.1 mrg LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
1917 1.1 mrg LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1918 1.1 mrg LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
1919 1.1 mrg LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
1920 1.1 mrg LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1921 1.1 mrg LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1922 1.1 mrg LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
1923 1.1 mrg LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
1924 1.1 mrg LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1925 1.1 mrg LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1926 1.1 mrg LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
1927 1.1 mrg LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1928 1.1 mrg LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
1929 1.1 mrg LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
1930 1.1 mrg LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
1931 1.1 mrg LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
1932 1.1 mrg LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
1933 1.1 mrg LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
1934 1.1 mrg LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
1935 1.1 mrg LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
1936 1.1 mrg LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1937 1.1 mrg LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1938 1.1 mrg LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1939 1.1 mrg LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1940 1.1 mrg LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
1941 1.1 mrg LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
1942 1.1 mrg LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1943 1.1 mrg LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1944 1.1 mrg LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1945 1.1 mrg LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
1946 1.1 mrg LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1947 1.1 mrg LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1948 1.1 mrg LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1949 1.1 mrg LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1950 1.1 mrg LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1951 1.1 mrg LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
1952 1.1 mrg LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1953 1.1 mrg LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1954 1.1 mrg LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1955 1.1 mrg LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
1956 1.1 mrg LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
1957 1.1 mrg LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
1958 1.1 mrg LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
1959 1.1 mrg LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
1960 1.1 mrg LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
1961 1.1 mrg LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
1962 1.1 mrg LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1963 1.1 mrg LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1964 1.1 mrg LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1965 1.1 mrg LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1966 1.1 mrg LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1967 1.1 mrg LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1968 1.1 mrg LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1969 1.1 mrg LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1970 1.1 mrg LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1971 1.1 mrg LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
1972 1.1 mrg LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1973 1.1 mrg LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
1974 1.1 mrg LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
1975 1.1 mrg LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1976 1.1 mrg LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
1977 1.1 mrg LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1978 1.1 mrg LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
1979 1.1 mrg LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
1980 1.1 mrg LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
1981 1.1 mrg LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1982 1.1 mrg LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
1983 1.1 mrg LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1984 1.1 mrg LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1985 1.1 mrg LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
1986 1.1 mrg LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
1987 1.1 mrg LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
1988 1.1 mrg LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
1989 1.1 mrg LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
1990 1.1 mrg LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
1991 1.1 mrg LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
1992 1.1 mrg LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
1993 1.1 mrg LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
1994 1.1 mrg LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
1995 1.1 mrg LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
1996 1.1 mrg LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
1997 1.1 mrg LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1998 1.1 mrg LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
1999 1.1 mrg LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
2000 1.1 mrg LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
2001 1.1 mrg LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
2002 1.1 mrg LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
2003 1.1 mrg LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
2004 1.1 mrg LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
2005 1.1 mrg LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
2006 1.1 mrg LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
2007 1.1 mrg LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
2008 1.1 mrg LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
2009 1.1 mrg LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
2010 1.1 mrg LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
2011 1.1 mrg LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
2012 1.1 mrg LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
2013 1.1 mrg LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
2014 1.1 mrg LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
2015 1.1 mrg LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
2016 1.1 mrg LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
2017 1.1 mrg LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
2018 1.1 mrg LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
2019 1.1 mrg LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
2020 1.1 mrg LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
2021 1.1 mrg LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
2022 1.1 mrg LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
2023 1.1 mrg LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
2024 1.1 mrg LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
2025 1.1 mrg LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
2026 1.1 mrg LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
2027 1.1 mrg LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
2028 1.1 mrg LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
2029 1.1 mrg LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
2030 1.1 mrg LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
2031 1.1 mrg LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
2032 1.1 mrg LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
2033 1.1 mrg LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
2034 1.1 mrg LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
2035 1.1 mrg LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
2036 1.1 mrg LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
2037 1.1 mrg LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
2038 1.1 mrg LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
2039 1.1 mrg LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
2040 1.1 mrg LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
2041 1.1 mrg LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
2042 1.1 mrg LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
2043 1.1 mrg LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
2044 1.1 mrg LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
2045 1.1 mrg LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
2046 1.1 mrg LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
2047 1.1 mrg LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
2048 1.1 mrg LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
2049 1.1 mrg LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
2050 1.1 mrg LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
2051 1.1 mrg LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
2052 1.1 mrg LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
2053 1.1 mrg LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
2054 1.1 mrg LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
2055 1.1 mrg LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
2056 1.1 mrg LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
2057 1.1 mrg LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
2058 1.1 mrg LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
2059 1.1 mrg LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
2060 1.1 mrg LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
2061 1.1 mrg LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
2062 1.1 mrg LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
2063 1.1 mrg LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
2064 1.1 mrg LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
2065 1.1 mrg LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
2066 1.1 mrg LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
2067 1.1 mrg LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
2068 1.1 mrg LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
2069 1.1 mrg LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
2070 1.1 mrg LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
2071 1.1 mrg LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
2072 1.1 mrg /*1040 insts before this. */
2073 1.1 mrg LSYM(ret_t0) MILLIRET
2074 1.1 mrg LSYM(e_t0) r__r_t0
2075 1.1 mrg LSYM(e_shift) a1_ne_0_b_l2
2076 1.1 mrg a0__256a0 /* a0 <<= 8 *********** */
2077 1.1 mrg MILLIRETN
2078 1.1 mrg LSYM(e_t0ma0) a1_ne_0_b_l0
2079 1.1 mrg t0__t0ma0
2080 1.1 mrg MILLIRET
2081 1.1 mrg r__r_t0
2082 1.1 mrg LSYM(e_t0a0) a1_ne_0_b_l0
2083 1.1 mrg t0__t0_a0
2084 1.1 mrg MILLIRET
2085 1.1 mrg r__r_t0
2086 1.1 mrg LSYM(e_t02a0) a1_ne_0_b_l0
2087 1.1 mrg t0__t0_2a0
2088 1.1 mrg MILLIRET
2089 1.1 mrg r__r_t0
2090 1.1 mrg LSYM(e_t04a0) a1_ne_0_b_l0
2091 1.1 mrg t0__t0_4a0
2092 1.1 mrg MILLIRET
2093 1.1 mrg r__r_t0
2094 1.1 mrg LSYM(e_2t0) a1_ne_0_b_l1
2095 1.1 mrg r__r_2t0
2096 1.1 mrg MILLIRETN
2097 1.1 mrg LSYM(e_2t0a0) a1_ne_0_b_l0
2098 1.1 mrg t0__2t0_a0
2099 1.1 mrg MILLIRET
2100 1.1 mrg r__r_t0
2101 1.1 mrg LSYM(e2t04a0) t0__t0_2a0
2102 1.1 mrg a1_ne_0_b_l1
2103 1.1 mrg r__r_2t0
2104 1.1 mrg MILLIRETN
2105 1.1 mrg LSYM(e_3t0) a1_ne_0_b_l0
2106 1.1 mrg t0__3t0
2107 1.1 mrg MILLIRET
2108 1.1 mrg r__r_t0
2109 1.1 mrg LSYM(e_4t0) a1_ne_0_b_l1
2110 1.1 mrg r__r_4t0
2111 1.1 mrg MILLIRETN
2112 1.1 mrg LSYM(e_4t0a0) a1_ne_0_b_l0
2113 1.1 mrg t0__4t0_a0
2114 1.1 mrg MILLIRET
2115 1.1 mrg r__r_t0
2116 1.1 mrg LSYM(e4t08a0) t0__t0_2a0
2117 1.1 mrg a1_ne_0_b_l1
2118 1.1 mrg r__r_4t0
2119 1.1 mrg MILLIRETN
2120 1.1 mrg LSYM(e_5t0) a1_ne_0_b_l0
2121 1.1 mrg t0__5t0
2122 1.1 mrg MILLIRET
2123 1.1 mrg r__r_t0
2124 1.1 mrg LSYM(e_8t0) a1_ne_0_b_l1
2125 1.1 mrg r__r_8t0
2126 1.1 mrg MILLIRETN
2127 1.1 mrg LSYM(e_8t0a0) a1_ne_0_b_l0
2128 1.1 mrg t0__8t0_a0
2129 1.1 mrg MILLIRET
2130 1.1 mrg r__r_t0
2131 1.1 mrg
2132 1.1 mrg .procend
2133 1.1 mrg .end
2134 1.1 mrg #endif
2135