milli64.S revision 1.7 1 1.1 mrg /* 32 and 64-bit millicode, original author Hewlett-Packard
2 1.1 mrg adapted for gcc by Paul Bame <bame (at) debian.org>
3 1.1 mrg and Alan Modra <alan (at) linuxcare.com.au>.
4 1.1 mrg
5 1.7 mrg Copyright (C) 2001-2018 Free Software Foundation, Inc.
6 1.1 mrg
7 1.1 mrg This file is part of GCC.
8 1.1 mrg
9 1.1 mrg GCC is free software; you can redistribute it and/or modify it under
10 1.1 mrg the terms of the GNU General Public License as published by the Free
11 1.1 mrg Software Foundation; either version 3, or (at your option) any later
12 1.1 mrg version.
13 1.1 mrg
14 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 1.1 mrg for more details.
18 1.1 mrg
19 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
20 1.1 mrg permissions described in the GCC Runtime Library Exception, version
21 1.1 mrg 3.1, as published by the Free Software Foundation.
22 1.1 mrg
23 1.1 mrg You should have received a copy of the GNU General Public License and
24 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
25 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
26 1.1 mrg <http://www.gnu.org/licenses/>. */
27 1.1 mrg
28 1.7 mrg /* An executable stack is *not* required for these functions. */
29 1.7 mrg #if defined(__ELF__) && defined(__linux__)
30 1.7 mrg .section .note.GNU-stack,"",%progbits
31 1.7 mrg .previous
32 1.7 mrg #endif
33 1.7 mrg
34 1.1 mrg #ifdef pa64
35 1.1 mrg .level 2.0w
36 1.1 mrg #endif
37 1.1 mrg
38 1.1 mrg /* Hardware General Registers. */
39 1.1 mrg r0: .reg %r0
40 1.1 mrg r1: .reg %r1
41 1.1 mrg r2: .reg %r2
42 1.1 mrg r3: .reg %r3
43 1.1 mrg r4: .reg %r4
44 1.1 mrg r5: .reg %r5
45 1.1 mrg r6: .reg %r6
46 1.1 mrg r7: .reg %r7
47 1.1 mrg r8: .reg %r8
48 1.1 mrg r9: .reg %r9
49 1.1 mrg r10: .reg %r10
50 1.1 mrg r11: .reg %r11
51 1.1 mrg r12: .reg %r12
52 1.1 mrg r13: .reg %r13
53 1.1 mrg r14: .reg %r14
54 1.1 mrg r15: .reg %r15
55 1.1 mrg r16: .reg %r16
56 1.1 mrg r17: .reg %r17
57 1.1 mrg r18: .reg %r18
58 1.1 mrg r19: .reg %r19
59 1.1 mrg r20: .reg %r20
60 1.1 mrg r21: .reg %r21
61 1.1 mrg r22: .reg %r22
62 1.1 mrg r23: .reg %r23
63 1.1 mrg r24: .reg %r24
64 1.1 mrg r25: .reg %r25
65 1.1 mrg r26: .reg %r26
66 1.1 mrg r27: .reg %r27
67 1.1 mrg r28: .reg %r28
68 1.1 mrg r29: .reg %r29
69 1.1 mrg r30: .reg %r30
70 1.1 mrg r31: .reg %r31
71 1.1 mrg
72 1.1 mrg /* Hardware Space Registers. */
73 1.1 mrg sr0: .reg %sr0
74 1.1 mrg sr1: .reg %sr1
75 1.1 mrg sr2: .reg %sr2
76 1.1 mrg sr3: .reg %sr3
77 1.1 mrg sr4: .reg %sr4
78 1.1 mrg sr5: .reg %sr5
79 1.1 mrg sr6: .reg %sr6
80 1.1 mrg sr7: .reg %sr7
81 1.1 mrg
82 1.1 mrg /* Hardware Floating Point Registers. */
83 1.1 mrg fr0: .reg %fr0
84 1.1 mrg fr1: .reg %fr1
85 1.1 mrg fr2: .reg %fr2
86 1.1 mrg fr3: .reg %fr3
87 1.1 mrg fr4: .reg %fr4
88 1.1 mrg fr5: .reg %fr5
89 1.1 mrg fr6: .reg %fr6
90 1.1 mrg fr7: .reg %fr7
91 1.1 mrg fr8: .reg %fr8
92 1.1 mrg fr9: .reg %fr9
93 1.1 mrg fr10: .reg %fr10
94 1.1 mrg fr11: .reg %fr11
95 1.1 mrg fr12: .reg %fr12
96 1.1 mrg fr13: .reg %fr13
97 1.1 mrg fr14: .reg %fr14
98 1.1 mrg fr15: .reg %fr15
99 1.1 mrg
100 1.1 mrg /* Hardware Control Registers. */
101 1.1 mrg cr11: .reg %cr11
102 1.1 mrg sar: .reg %cr11 /* Shift Amount Register */
103 1.1 mrg
104 1.1 mrg /* Software Architecture General Registers. */
105 1.1 mrg rp: .reg r2 /* return pointer */
106 1.1 mrg #ifdef pa64
107 1.1 mrg mrp: .reg r2 /* millicode return pointer */
108 1.1 mrg #else
109 1.1 mrg mrp: .reg r31 /* millicode return pointer */
110 1.1 mrg #endif
111 1.1 mrg ret0: .reg r28 /* return value */
112 1.1 mrg ret1: .reg r29 /* return value (high part of double) */
113 1.1 mrg sp: .reg r30 /* stack pointer */
114 1.1 mrg dp: .reg r27 /* data pointer */
115 1.1 mrg arg0: .reg r26 /* argument */
116 1.1 mrg arg1: .reg r25 /* argument or high part of double argument */
117 1.1 mrg arg2: .reg r24 /* argument */
118 1.1 mrg arg3: .reg r23 /* argument or high part of double argument */
119 1.1 mrg
120 1.1 mrg /* Software Architecture Space Registers. */
121 1.1 mrg /* sr0 ; return link from BLE */
122 1.1 mrg sret: .reg sr1 /* return value */
123 1.1 mrg sarg: .reg sr1 /* argument */
124 1.1 mrg /* sr4 ; PC SPACE tracker */
125 1.1 mrg /* sr5 ; process private data */
126 1.1 mrg
127 1.1 mrg /* Frame Offsets (millicode convention!) Used when calling other
128 1.1 mrg millicode routines. Stack unwinding is dependent upon these
129 1.1 mrg definitions. */
130 1.1 mrg r31_slot: .equ -20 /* "current RP" slot */
131 1.1 mrg sr0_slot: .equ -16 /* "static link" slot */
132 1.1 mrg #if defined(pa64)
133 1.1 mrg mrp_slot: .equ -16 /* "current RP" slot */
134 1.1 mrg psp_slot: .equ -8 /* "previous SP" slot */
135 1.1 mrg #else
136 1.1 mrg mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
137 1.1 mrg #endif
138 1.1 mrg
139 1.1 mrg
140 1.1 mrg #define DEFINE(name,value)name: .EQU value
141 1.1 mrg #define RDEFINE(name,value)name: .REG value
142 1.1 mrg #ifdef milliext
143 1.1 mrg #define MILLI_BE(lbl) BE lbl(sr7,r0)
144 1.1 mrg #define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
145 1.1 mrg #define MILLI_BLE(lbl) BLE lbl(sr7,r0)
146 1.1 mrg #define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
147 1.1 mrg #define MILLIRETN BE,n 0(sr0,mrp)
148 1.1 mrg #define MILLIRET BE 0(sr0,mrp)
149 1.1 mrg #define MILLI_RETN BE,n 0(sr0,mrp)
150 1.1 mrg #define MILLI_RET BE 0(sr0,mrp)
151 1.1 mrg #else
152 1.1 mrg #define MILLI_BE(lbl) B lbl
153 1.1 mrg #define MILLI_BEN(lbl) B,n lbl
154 1.1 mrg #define MILLI_BLE(lbl) BL lbl,mrp
155 1.1 mrg #define MILLI_BLEN(lbl) BL,n lbl,mrp
156 1.1 mrg #define MILLIRETN BV,n 0(mrp)
157 1.1 mrg #define MILLIRET BV 0(mrp)
158 1.1 mrg #define MILLI_RETN BV,n 0(mrp)
159 1.1 mrg #define MILLI_RET BV 0(mrp)
160 1.1 mrg #endif
161 1.1 mrg
162 1.1 mrg #ifdef __STDC__
163 1.1 mrg #define CAT(a,b) a##b
164 1.1 mrg #else
165 1.1 mrg #define CAT(a,b) a/**/b
166 1.1 mrg #endif
167 1.1 mrg
168 1.1 mrg #ifdef ELF
169 1.1 mrg #define SUBSPA_MILLI .section .text
170 1.1 mrg #define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
171 1.1 mrg #define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
172 1.1 mrg #define ATTR_MILLI
173 1.1 mrg #define SUBSPA_DATA .section .data
174 1.1 mrg #define ATTR_DATA
175 1.1 mrg #define GLOBAL $global$
176 1.1 mrg #define GSYM(sym) !sym:
177 1.1 mrg #define LSYM(sym) !CAT(.L,sym:)
178 1.1 mrg #define LREF(sym) CAT(.L,sym)
179 1.1 mrg
180 1.1 mrg #else
181 1.1 mrg
182 1.1 mrg #ifdef coff
183 1.1 mrg /* This used to be .milli but since link32 places different named
184 1.1 mrg sections in different segments millicode ends up a long ways away
185 1.1 mrg from .text (1meg?). This way they will be a lot closer.
186 1.1 mrg
187 1.1 mrg The SUBSPA_MILLI_* specify locality sets for certain millicode
188 1.1 mrg modules in order to ensure that modules that call one another are
189 1.1 mrg placed close together. Without locality sets this is unlikely to
190 1.1 mrg happen because of the Dynamite linker library search algorithm. We
191 1.1 mrg want these modules close together so that short calls always reach
192 1.1 mrg (we don't want to require long calls or use long call stubs). */
193 1.1 mrg
194 1.1 mrg #define SUBSPA_MILLI .subspa .text
195 1.1 mrg #define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
196 1.1 mrg #define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
197 1.1 mrg #define ATTR_MILLI .attr code,read,execute
198 1.1 mrg #define SUBSPA_DATA .subspa .data
199 1.1 mrg #define ATTR_DATA .attr init_data,read,write
200 1.1 mrg #define GLOBAL _gp
201 1.1 mrg #else
202 1.1 mrg #define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
203 1.1 mrg #define SUBSPA_MILLI_DIV SUBSPA_MILLI
204 1.1 mrg #define SUBSPA_MILLI_MUL SUBSPA_MILLI
205 1.1 mrg #define ATTR_MILLI
206 1.1 mrg #define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
207 1.1 mrg #define ATTR_DATA
208 1.1 mrg #define GLOBAL $global$
209 1.1 mrg #endif
210 1.1 mrg #define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16
211 1.1 mrg
212 1.1 mrg #define GSYM(sym) !sym
213 1.1 mrg #define LSYM(sym) !CAT(L$,sym)
214 1.1 mrg #define LREF(sym) CAT(L$,sym)
215 1.1 mrg #endif
216 1.1 mrg
217 1.1 mrg #ifdef L_dyncall
218 1.1 mrg SUBSPA_MILLI
219 1.1 mrg ATTR_DATA
220 1.1 mrg GSYM($$dyncall)
221 1.1 mrg .export $$dyncall,millicode
222 1.1 mrg .proc
223 1.1 mrg .callinfo millicode
224 1.1 mrg .entry
225 1.7 mrg #ifdef LINUX
226 1.7 mrg extru,<> %r22,30,1,%r0 ; nullify if plabel bit set
227 1.7 mrg bv,n %r0(%r22) ; branch to target
228 1.7 mrg ldw -2(%r22),%r21 ; load address of target
229 1.7 mrg bv %r0(%r21) ; branch to the real target
230 1.7 mrg ldw 2(%r22),%r19 ; load new LTP value
231 1.7 mrg #else
232 1.1 mrg bb,>=,n %r22,30,LREF(1) ; branch if not plabel address
233 1.7 mrg ldw -2(%r22),%r21 ; load address of target to r21
234 1.7 mrg ldsid (%sr0,%r21),%r1 ; get the "space ident" selected by r21
235 1.7 mrg ldw 2(%r22),%r19 ; load new LTP value
236 1.7 mrg mtsp %r1,%sr0 ; move that space identifier into sr0
237 1.7 mrg be 0(%sr0,%r21) ; branch to the real target
238 1.7 mrg stw %r2,-24(%r30) ; save return address into frame marker
239 1.1 mrg LSYM(1)
240 1.1 mrg ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22
241 1.1 mrg mtsp %r1,%sr0 ; move that space identifier into sr0
242 1.7 mrg be 0(%sr0,%r22) ; branch to the target
243 1.7 mrg stw %r2,-24(%r30) ; save return address into frame marker
244 1.1 mrg #endif
245 1.1 mrg .exit
246 1.1 mrg .procend
247 1.1 mrg #endif
248 1.1 mrg
249 1.1 mrg #ifdef L_divI
250 1.1 mrg /* ROUTINES: $$divI, $$divoI
251 1.1 mrg
252 1.1 mrg Single precision divide for signed binary integers.
253 1.1 mrg
254 1.1 mrg The quotient is truncated towards zero.
255 1.1 mrg The sign of the quotient is the XOR of the signs of the dividend and
256 1.1 mrg divisor.
257 1.1 mrg Divide by zero is trapped.
258 1.1 mrg Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
259 1.1 mrg
260 1.1 mrg INPUT REGISTERS:
261 1.1 mrg . arg0 == dividend
262 1.1 mrg . arg1 == divisor
263 1.1 mrg . mrp == return pc
264 1.1 mrg . sr0 == return space when called externally
265 1.1 mrg
266 1.1 mrg OUTPUT REGISTERS:
267 1.1 mrg . arg0 = undefined
268 1.1 mrg . arg1 = undefined
269 1.1 mrg . ret1 = quotient
270 1.1 mrg
271 1.1 mrg OTHER REGISTERS AFFECTED:
272 1.1 mrg . r1 = undefined
273 1.1 mrg
274 1.1 mrg SIDE EFFECTS:
275 1.1 mrg . Causes a trap under the following conditions:
276 1.1 mrg . divisor is zero (traps with ADDIT,= 0,25,0)
277 1.1 mrg . dividend==-2**31 and divisor==-1 and routine is $$divoI
278 1.1 mrg . (traps with ADDO 26,25,0)
279 1.1 mrg . Changes memory at the following places:
280 1.1 mrg . NONE
281 1.1 mrg
282 1.1 mrg PERMISSIBLE CONTEXT:
283 1.1 mrg . Unwindable.
284 1.1 mrg . Suitable for internal or external millicode.
285 1.1 mrg . Assumes the special millicode register conventions.
286 1.1 mrg
287 1.1 mrg DISCUSSION:
288 1.1 mrg . Branchs to other millicode routines using BE
289 1.1 mrg . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
290 1.1 mrg .
291 1.1 mrg . For selected divisors, calls a divide by constant routine written by
292 1.1 mrg . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
293 1.1 mrg .
294 1.1 mrg . The only overflow case is -2**31 divided by -1.
295 1.1 mrg . Both routines return -2**31 but only $$divoI traps. */
296 1.1 mrg
297 1.1 mrg RDEFINE(temp,r1)
298 1.1 mrg RDEFINE(retreg,ret1) /* r29 */
299 1.1 mrg RDEFINE(temp1,arg0)
300 1.1 mrg SUBSPA_MILLI_DIV
301 1.1 mrg ATTR_MILLI
302 1.1 mrg .import $$divI_2,millicode
303 1.1 mrg .import $$divI_3,millicode
304 1.1 mrg .import $$divI_4,millicode
305 1.1 mrg .import $$divI_5,millicode
306 1.1 mrg .import $$divI_6,millicode
307 1.1 mrg .import $$divI_7,millicode
308 1.1 mrg .import $$divI_8,millicode
309 1.1 mrg .import $$divI_9,millicode
310 1.1 mrg .import $$divI_10,millicode
311 1.1 mrg .import $$divI_12,millicode
312 1.1 mrg .import $$divI_14,millicode
313 1.1 mrg .import $$divI_15,millicode
314 1.1 mrg .export $$divI,millicode
315 1.1 mrg .export $$divoI,millicode
316 1.1 mrg .proc
317 1.1 mrg .callinfo millicode
318 1.1 mrg .entry
319 1.1 mrg GSYM($$divoI)
320 1.1 mrg comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
321 1.1 mrg GSYM($$divI)
322 1.1 mrg ldo -1(arg1),temp /* is there at most one bit set ? */
323 1.1 mrg and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
324 1.1 mrg addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
325 1.1 mrg b,n LREF(neg_denom)
326 1.1 mrg LSYM(pow2)
327 1.1 mrg addi,>= 0,arg0,retreg /* if numerator is negative, add the */
328 1.1 mrg add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
329 1.1 mrg extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
330 1.1 mrg extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
331 1.1 mrg or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
332 1.1 mrg ldi 0xcc,temp1 /* setup 0xcc in temp1 */
333 1.1 mrg extru,= arg1,23,8,temp /* test denominator with 0xff00 */
334 1.1 mrg extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
335 1.1 mrg or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
336 1.1 mrg ldi 0xaa,temp /* setup 0xaa in temp */
337 1.1 mrg extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
338 1.1 mrg extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
339 1.1 mrg and,= arg1,temp1,r0 /* test denominator with 0xcc */
340 1.1 mrg extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
341 1.1 mrg and,= arg1,temp,r0 /* test denominator with 0xaa */
342 1.1 mrg extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
343 1.1 mrg MILLIRETN
344 1.1 mrg LSYM(neg_denom)
345 1.1 mrg addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
346 1.1 mrg b,n LREF(regular_seq)
347 1.1 mrg sub r0,arg1,temp /* make denominator positive */
348 1.1 mrg comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
349 1.1 mrg ldo -1(temp),retreg /* is there at most one bit set ? */
350 1.1 mrg and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
351 1.1 mrg b,n LREF(regular_seq)
352 1.1 mrg sub r0,arg0,retreg /* negate numerator */
353 1.1 mrg comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
354 1.1 mrg copy retreg,arg0 /* set up arg0, arg1 and temp */
355 1.1 mrg copy temp,arg1 /* before branching to pow2 */
356 1.1 mrg b LREF(pow2)
357 1.1 mrg ldo -1(arg1),temp
358 1.1 mrg LSYM(regular_seq)
359 1.1 mrg comib,>>=,n 15,arg1,LREF(small_divisor)
360 1.1 mrg add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
361 1.1 mrg LSYM(normal)
362 1.1 mrg subi 0,retreg,retreg /* make it positive */
363 1.1 mrg sub 0,arg1,temp /* clear carry, */
364 1.1 mrg /* negate the divisor */
365 1.1 mrg ds 0,temp,0 /* set V-bit to the comple- */
366 1.1 mrg /* ment of the divisor sign */
367 1.1 mrg add retreg,retreg,retreg /* shift msb bit into carry */
368 1.1 mrg ds r0,arg1,temp /* 1st divide step, if no carry */
369 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
370 1.1 mrg ds temp,arg1,temp /* 2nd divide step */
371 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
372 1.1 mrg ds temp,arg1,temp /* 3rd divide step */
373 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
374 1.1 mrg ds temp,arg1,temp /* 4th divide step */
375 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
376 1.1 mrg ds temp,arg1,temp /* 5th divide step */
377 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
378 1.1 mrg ds temp,arg1,temp /* 6th divide step */
379 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
380 1.1 mrg ds temp,arg1,temp /* 7th divide step */
381 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
382 1.1 mrg ds temp,arg1,temp /* 8th divide step */
383 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
384 1.1 mrg ds temp,arg1,temp /* 9th divide step */
385 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
386 1.1 mrg ds temp,arg1,temp /* 10th divide step */
387 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
388 1.1 mrg ds temp,arg1,temp /* 11th divide step */
389 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
390 1.1 mrg ds temp,arg1,temp /* 12th divide step */
391 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
392 1.1 mrg ds temp,arg1,temp /* 13th divide step */
393 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
394 1.1 mrg ds temp,arg1,temp /* 14th divide step */
395 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
396 1.1 mrg ds temp,arg1,temp /* 15th divide step */
397 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
398 1.1 mrg ds temp,arg1,temp /* 16th divide step */
399 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
400 1.1 mrg ds temp,arg1,temp /* 17th divide step */
401 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
402 1.1 mrg ds temp,arg1,temp /* 18th divide step */
403 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
404 1.1 mrg ds temp,arg1,temp /* 19th divide step */
405 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
406 1.1 mrg ds temp,arg1,temp /* 20th divide step */
407 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
408 1.1 mrg ds temp,arg1,temp /* 21st divide step */
409 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
410 1.1 mrg ds temp,arg1,temp /* 22nd divide step */
411 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
412 1.1 mrg ds temp,arg1,temp /* 23rd divide step */
413 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
414 1.1 mrg ds temp,arg1,temp /* 24th divide step */
415 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
416 1.1 mrg ds temp,arg1,temp /* 25th divide step */
417 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
418 1.1 mrg ds temp,arg1,temp /* 26th divide step */
419 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
420 1.1 mrg ds temp,arg1,temp /* 27th divide step */
421 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
422 1.1 mrg ds temp,arg1,temp /* 28th divide step */
423 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
424 1.1 mrg ds temp,arg1,temp /* 29th divide step */
425 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
426 1.1 mrg ds temp,arg1,temp /* 30th divide step */
427 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
428 1.1 mrg ds temp,arg1,temp /* 31st divide step */
429 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
430 1.1 mrg ds temp,arg1,temp /* 32nd divide step, */
431 1.1 mrg addc retreg,retreg,retreg /* shift last retreg bit into retreg */
432 1.1 mrg xor,>= arg0,arg1,0 /* get correct sign of quotient */
433 1.1 mrg sub 0,retreg,retreg /* based on operand signs */
434 1.1 mrg MILLIRETN
435 1.1 mrg nop
436 1.1 mrg
437 1.1 mrg LSYM(small_divisor)
438 1.1 mrg
439 1.1 mrg #if defined(pa64)
440 1.1 mrg /* Clear the upper 32 bits of the arg1 register. We are working with */
441 1.1 mrg /* small divisors (and 32-bit integers) We must not be mislead */
442 1.1 mrg /* by "1" bits left in the upper 32 bits. */
443 1.1 mrg depd %r0,31,32,%r25
444 1.1 mrg #endif
445 1.1 mrg blr,n arg1,r0
446 1.1 mrg nop
447 1.1 mrg /* table for divisor == 0,1, ... ,15 */
448 1.1 mrg addit,= 0,arg1,r0 /* trap if divisor == 0 */
449 1.1 mrg nop
450 1.1 mrg MILLIRET /* divisor == 1 */
451 1.1 mrg copy arg0,retreg
452 1.1 mrg MILLI_BEN($$divI_2) /* divisor == 2 */
453 1.1 mrg nop
454 1.1 mrg MILLI_BEN($$divI_3) /* divisor == 3 */
455 1.1 mrg nop
456 1.1 mrg MILLI_BEN($$divI_4) /* divisor == 4 */
457 1.1 mrg nop
458 1.1 mrg MILLI_BEN($$divI_5) /* divisor == 5 */
459 1.1 mrg nop
460 1.1 mrg MILLI_BEN($$divI_6) /* divisor == 6 */
461 1.1 mrg nop
462 1.1 mrg MILLI_BEN($$divI_7) /* divisor == 7 */
463 1.1 mrg nop
464 1.1 mrg MILLI_BEN($$divI_8) /* divisor == 8 */
465 1.1 mrg nop
466 1.1 mrg MILLI_BEN($$divI_9) /* divisor == 9 */
467 1.1 mrg nop
468 1.1 mrg MILLI_BEN($$divI_10) /* divisor == 10 */
469 1.1 mrg nop
470 1.1 mrg b LREF(normal) /* divisor == 11 */
471 1.1 mrg add,>= 0,arg0,retreg
472 1.1 mrg MILLI_BEN($$divI_12) /* divisor == 12 */
473 1.1 mrg nop
474 1.1 mrg b LREF(normal) /* divisor == 13 */
475 1.1 mrg add,>= 0,arg0,retreg
476 1.1 mrg MILLI_BEN($$divI_14) /* divisor == 14 */
477 1.1 mrg nop
478 1.1 mrg MILLI_BEN($$divI_15) /* divisor == 15 */
479 1.1 mrg nop
480 1.1 mrg
481 1.1 mrg LSYM(negative1)
482 1.1 mrg sub 0,arg0,retreg /* result is negation of dividend */
483 1.1 mrg MILLIRET
484 1.1 mrg addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
485 1.1 mrg .exit
486 1.1 mrg .procend
487 1.1 mrg .end
488 1.1 mrg #endif
489 1.1 mrg
490 1.1 mrg #ifdef L_divU
491 1.1 mrg /* ROUTINE: $$divU
492 1.1 mrg .
493 1.1 mrg . Single precision divide for unsigned integers.
494 1.1 mrg .
495 1.1 mrg . Quotient is truncated towards zero.
496 1.1 mrg . Traps on divide by zero.
497 1.1 mrg
498 1.1 mrg INPUT REGISTERS:
499 1.1 mrg . arg0 == dividend
500 1.1 mrg . arg1 == divisor
501 1.1 mrg . mrp == return pc
502 1.1 mrg . sr0 == return space when called externally
503 1.1 mrg
504 1.1 mrg OUTPUT REGISTERS:
505 1.1 mrg . arg0 = undefined
506 1.1 mrg . arg1 = undefined
507 1.1 mrg . ret1 = quotient
508 1.1 mrg
509 1.1 mrg OTHER REGISTERS AFFECTED:
510 1.1 mrg . r1 = undefined
511 1.1 mrg
512 1.1 mrg SIDE EFFECTS:
513 1.1 mrg . Causes a trap under the following conditions:
514 1.1 mrg . divisor is zero
515 1.1 mrg . Changes memory at the following places:
516 1.1 mrg . NONE
517 1.1 mrg
518 1.1 mrg PERMISSIBLE CONTEXT:
519 1.1 mrg . Unwindable.
520 1.1 mrg . Does not create a stack frame.
521 1.1 mrg . Suitable for internal or external millicode.
522 1.1 mrg . Assumes the special millicode register conventions.
523 1.1 mrg
524 1.1 mrg DISCUSSION:
525 1.1 mrg . Branchs to other millicode routines using BE:
526 1.1 mrg . $$divU_# for 3,5,6,7,9,10,12,14,15
527 1.1 mrg .
528 1.1 mrg . For selected small divisors calls the special divide by constant
529 1.1 mrg . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
530 1.1 mrg
531 1.1 mrg RDEFINE(temp,r1)
532 1.1 mrg RDEFINE(retreg,ret1) /* r29 */
533 1.1 mrg RDEFINE(temp1,arg0)
534 1.1 mrg SUBSPA_MILLI_DIV
535 1.1 mrg ATTR_MILLI
536 1.1 mrg .export $$divU,millicode
537 1.1 mrg .import $$divU_3,millicode
538 1.1 mrg .import $$divU_5,millicode
539 1.1 mrg .import $$divU_6,millicode
540 1.1 mrg .import $$divU_7,millicode
541 1.1 mrg .import $$divU_9,millicode
542 1.1 mrg .import $$divU_10,millicode
543 1.1 mrg .import $$divU_12,millicode
544 1.1 mrg .import $$divU_14,millicode
545 1.1 mrg .import $$divU_15,millicode
546 1.1 mrg .proc
547 1.1 mrg .callinfo millicode
548 1.1 mrg .entry
549 1.1 mrg GSYM($$divU)
550 1.1 mrg /* The subtract is not nullified since it does no harm and can be used
551 1.1 mrg by the two cases that branch back to "normal". */
552 1.1 mrg ldo -1(arg1),temp /* is there at most one bit set ? */
553 1.1 mrg and,= arg1,temp,r0 /* if so, denominator is power of 2 */
554 1.1 mrg b LREF(regular_seq)
555 1.1 mrg addit,= 0,arg1,0 /* trap for zero dvr */
556 1.1 mrg copy arg0,retreg
557 1.1 mrg extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
558 1.1 mrg extru retreg,15,16,retreg /* retreg = retreg >> 16 */
559 1.1 mrg or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
560 1.1 mrg ldi 0xcc,temp1 /* setup 0xcc in temp1 */
561 1.1 mrg extru,= arg1,23,8,temp /* test denominator with 0xff00 */
562 1.1 mrg extru retreg,23,24,retreg /* retreg = retreg >> 8 */
563 1.1 mrg or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
564 1.1 mrg ldi 0xaa,temp /* setup 0xaa in temp */
565 1.1 mrg extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
566 1.1 mrg extru retreg,27,28,retreg /* retreg = retreg >> 4 */
567 1.1 mrg and,= arg1,temp1,r0 /* test denominator with 0xcc */
568 1.1 mrg extru retreg,29,30,retreg /* retreg = retreg >> 2 */
569 1.1 mrg and,= arg1,temp,r0 /* test denominator with 0xaa */
570 1.1 mrg extru retreg,30,31,retreg /* retreg = retreg >> 1 */
571 1.1 mrg MILLIRETN
572 1.1 mrg nop
573 1.1 mrg LSYM(regular_seq)
574 1.1 mrg comib,>= 15,arg1,LREF(special_divisor)
575 1.1 mrg subi 0,arg1,temp /* clear carry, negate the divisor */
576 1.1 mrg ds r0,temp,r0 /* set V-bit to 1 */
577 1.1 mrg LSYM(normal)
578 1.1 mrg add arg0,arg0,retreg /* shift msb bit into carry */
579 1.1 mrg ds r0,arg1,temp /* 1st divide step, if no carry */
580 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
581 1.1 mrg ds temp,arg1,temp /* 2nd divide step */
582 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
583 1.1 mrg ds temp,arg1,temp /* 3rd divide step */
584 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
585 1.1 mrg ds temp,arg1,temp /* 4th divide step */
586 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
587 1.1 mrg ds temp,arg1,temp /* 5th divide step */
588 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
589 1.1 mrg ds temp,arg1,temp /* 6th divide step */
590 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
591 1.1 mrg ds temp,arg1,temp /* 7th divide step */
592 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
593 1.1 mrg ds temp,arg1,temp /* 8th divide step */
594 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
595 1.1 mrg ds temp,arg1,temp /* 9th divide step */
596 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
597 1.1 mrg ds temp,arg1,temp /* 10th divide step */
598 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
599 1.1 mrg ds temp,arg1,temp /* 11th divide step */
600 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
601 1.1 mrg ds temp,arg1,temp /* 12th divide step */
602 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
603 1.1 mrg ds temp,arg1,temp /* 13th divide step */
604 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
605 1.1 mrg ds temp,arg1,temp /* 14th divide step */
606 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
607 1.1 mrg ds temp,arg1,temp /* 15th divide step */
608 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
609 1.1 mrg ds temp,arg1,temp /* 16th divide step */
610 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
611 1.1 mrg ds temp,arg1,temp /* 17th divide step */
612 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
613 1.1 mrg ds temp,arg1,temp /* 18th divide step */
614 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
615 1.1 mrg ds temp,arg1,temp /* 19th divide step */
616 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
617 1.1 mrg ds temp,arg1,temp /* 20th divide step */
618 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
619 1.1 mrg ds temp,arg1,temp /* 21st divide step */
620 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
621 1.1 mrg ds temp,arg1,temp /* 22nd divide step */
622 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
623 1.1 mrg ds temp,arg1,temp /* 23rd divide step */
624 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
625 1.1 mrg ds temp,arg1,temp /* 24th divide step */
626 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
627 1.1 mrg ds temp,arg1,temp /* 25th divide step */
628 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
629 1.1 mrg ds temp,arg1,temp /* 26th divide step */
630 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
631 1.1 mrg ds temp,arg1,temp /* 27th divide step */
632 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
633 1.1 mrg ds temp,arg1,temp /* 28th divide step */
634 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
635 1.1 mrg ds temp,arg1,temp /* 29th divide step */
636 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
637 1.1 mrg ds temp,arg1,temp /* 30th divide step */
638 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
639 1.1 mrg ds temp,arg1,temp /* 31st divide step */
640 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
641 1.1 mrg ds temp,arg1,temp /* 32nd divide step, */
642 1.1 mrg MILLIRET
643 1.1 mrg addc retreg,retreg,retreg /* shift last retreg bit into retreg */
644 1.1 mrg
645 1.1 mrg /* Handle the cases where divisor is a small constant or has high bit on. */
646 1.1 mrg LSYM(special_divisor)
647 1.1 mrg /* blr arg1,r0 */
648 1.1 mrg /* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
649 1.1 mrg
650 1.1 mrg /* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
651 1.1 mrg generating such a blr, comib sequence. A problem in nullification. So I
652 1.1 mrg rewrote this code. */
653 1.1 mrg
654 1.1 mrg #if defined(pa64)
655 1.1 mrg /* Clear the upper 32 bits of the arg1 register. We are working with
656 1.1 mrg small divisors (and 32-bit unsigned integers) We must not be mislead
657 1.1 mrg by "1" bits left in the upper 32 bits. */
658 1.1 mrg depd %r0,31,32,%r25
659 1.1 mrg #endif
660 1.1 mrg comib,> 0,arg1,LREF(big_divisor)
661 1.1 mrg nop
662 1.1 mrg blr arg1,r0
663 1.1 mrg nop
664 1.1 mrg
665 1.1 mrg LSYM(zero_divisor) /* this label is here to provide external visibility */
666 1.1 mrg addit,= 0,arg1,0 /* trap for zero dvr */
667 1.1 mrg nop
668 1.1 mrg MILLIRET /* divisor == 1 */
669 1.1 mrg copy arg0,retreg
670 1.1 mrg MILLIRET /* divisor == 2 */
671 1.1 mrg extru arg0,30,31,retreg
672 1.1 mrg MILLI_BEN($$divU_3) /* divisor == 3 */
673 1.1 mrg nop
674 1.1 mrg MILLIRET /* divisor == 4 */
675 1.1 mrg extru arg0,29,30,retreg
676 1.1 mrg MILLI_BEN($$divU_5) /* divisor == 5 */
677 1.1 mrg nop
678 1.1 mrg MILLI_BEN($$divU_6) /* divisor == 6 */
679 1.1 mrg nop
680 1.1 mrg MILLI_BEN($$divU_7) /* divisor == 7 */
681 1.1 mrg nop
682 1.1 mrg MILLIRET /* divisor == 8 */
683 1.1 mrg extru arg0,28,29,retreg
684 1.1 mrg MILLI_BEN($$divU_9) /* divisor == 9 */
685 1.1 mrg nop
686 1.1 mrg MILLI_BEN($$divU_10) /* divisor == 10 */
687 1.1 mrg nop
688 1.1 mrg b LREF(normal) /* divisor == 11 */
689 1.1 mrg ds r0,temp,r0 /* set V-bit to 1 */
690 1.1 mrg MILLI_BEN($$divU_12) /* divisor == 12 */
691 1.1 mrg nop
692 1.1 mrg b LREF(normal) /* divisor == 13 */
693 1.1 mrg ds r0,temp,r0 /* set V-bit to 1 */
694 1.1 mrg MILLI_BEN($$divU_14) /* divisor == 14 */
695 1.1 mrg nop
696 1.1 mrg MILLI_BEN($$divU_15) /* divisor == 15 */
697 1.1 mrg nop
698 1.1 mrg
699 1.1 mrg /* Handle the case where the high bit is on in the divisor.
700 1.1 mrg Compute: if( dividend>=divisor) quotient=1; else quotient=0;
701 1.1 mrg Note: dividend>==divisor iff dividend-divisor does not borrow
702 1.1 mrg and not borrow iff carry. */
703 1.1 mrg LSYM(big_divisor)
704 1.1 mrg sub arg0,arg1,r0
705 1.1 mrg MILLIRET
706 1.1 mrg addc r0,r0,retreg
707 1.1 mrg .exit
708 1.1 mrg .procend
709 1.1 mrg .end
710 1.1 mrg #endif
711 1.1 mrg
712 1.1 mrg #ifdef L_remI
713 1.1 mrg /* ROUTINE: $$remI
714 1.1 mrg
715 1.1 mrg DESCRIPTION:
716 1.1 mrg . $$remI returns the remainder of the division of two signed 32-bit
717 1.1 mrg . integers. The sign of the remainder is the same as the sign of
718 1.1 mrg . the dividend.
719 1.1 mrg
720 1.1 mrg
721 1.1 mrg INPUT REGISTERS:
722 1.1 mrg . arg0 == dividend
723 1.1 mrg . arg1 == divisor
724 1.1 mrg . mrp == return pc
725 1.1 mrg . sr0 == return space when called externally
726 1.1 mrg
727 1.1 mrg OUTPUT REGISTERS:
728 1.1 mrg . arg0 = destroyed
729 1.1 mrg . arg1 = destroyed
730 1.1 mrg . ret1 = remainder
731 1.1 mrg
732 1.1 mrg OTHER REGISTERS AFFECTED:
733 1.1 mrg . r1 = undefined
734 1.1 mrg
735 1.1 mrg SIDE EFFECTS:
736 1.1 mrg . Causes a trap under the following conditions: DIVIDE BY ZERO
737 1.1 mrg . Changes memory at the following places: NONE
738 1.1 mrg
739 1.1 mrg PERMISSIBLE CONTEXT:
740 1.1 mrg . Unwindable
741 1.1 mrg . Does not create a stack frame
742 1.1 mrg . Is usable for internal or external microcode
743 1.1 mrg
744 1.1 mrg DISCUSSION:
745 1.1 mrg . Calls other millicode routines via mrp: NONE
746 1.1 mrg . Calls other millicode routines: NONE */
747 1.1 mrg
748 1.1 mrg RDEFINE(tmp,r1)
749 1.1 mrg RDEFINE(retreg,ret1)
750 1.1 mrg
751 1.1 mrg SUBSPA_MILLI
752 1.1 mrg ATTR_MILLI
753 1.1 mrg .proc
754 1.1 mrg .callinfo millicode
755 1.1 mrg .entry
756 1.1 mrg GSYM($$remI)
757 1.1 mrg GSYM($$remoI)
758 1.1 mrg .export $$remI,MILLICODE
759 1.1 mrg .export $$remoI,MILLICODE
760 1.1 mrg ldo -1(arg1),tmp /* is there at most one bit set ? */
761 1.1 mrg and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
762 1.1 mrg addi,> 0,arg1,r0 /* if denominator > 0, use power */
763 1.1 mrg /* of 2 */
764 1.1 mrg b,n LREF(neg_denom)
765 1.1 mrg LSYM(pow2)
766 1.1 mrg comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
767 1.1 mrg and arg0,tmp,retreg /* get the result */
768 1.1 mrg MILLIRETN
769 1.1 mrg LSYM(neg_num)
770 1.1 mrg subi 0,arg0,arg0 /* negate numerator */
771 1.1 mrg and arg0,tmp,retreg /* get the result */
772 1.1 mrg subi 0,retreg,retreg /* negate result */
773 1.1 mrg MILLIRETN
774 1.1 mrg LSYM(neg_denom)
775 1.1 mrg addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
776 1.1 mrg /* of 2 */
777 1.1 mrg b,n LREF(regular_seq)
778 1.1 mrg sub r0,arg1,tmp /* make denominator positive */
779 1.1 mrg comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
780 1.1 mrg ldo -1(tmp),retreg /* is there at most one bit set ? */
781 1.1 mrg and,= tmp,retreg,r0 /* if not, go to regular_seq */
782 1.1 mrg b,n LREF(regular_seq)
783 1.1 mrg comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
784 1.1 mrg and arg0,retreg,retreg
785 1.1 mrg MILLIRETN
786 1.1 mrg LSYM(neg_num_2)
787 1.1 mrg subi 0,arg0,tmp /* test against 0x80000000 */
788 1.1 mrg and tmp,retreg,retreg
789 1.1 mrg subi 0,retreg,retreg
790 1.1 mrg MILLIRETN
791 1.1 mrg LSYM(regular_seq)
792 1.1 mrg addit,= 0,arg1,0 /* trap if div by zero */
793 1.1 mrg add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
794 1.1 mrg sub 0,retreg,retreg /* make it positive */
795 1.1 mrg sub 0,arg1, tmp /* clear carry, */
796 1.1 mrg /* negate the divisor */
797 1.1 mrg ds 0, tmp,0 /* set V-bit to the comple- */
798 1.1 mrg /* ment of the divisor sign */
799 1.1 mrg or 0,0, tmp /* clear tmp */
800 1.1 mrg add retreg,retreg,retreg /* shift msb bit into carry */
801 1.1 mrg ds tmp,arg1, tmp /* 1st divide step, if no carry */
802 1.1 mrg /* out, msb of quotient = 0 */
803 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
804 1.1 mrg LSYM(t1)
805 1.1 mrg ds tmp,arg1, tmp /* 2nd divide step */
806 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
807 1.1 mrg ds tmp,arg1, tmp /* 3rd divide step */
808 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
809 1.1 mrg ds tmp,arg1, tmp /* 4th divide step */
810 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
811 1.1 mrg ds tmp,arg1, tmp /* 5th divide step */
812 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
813 1.1 mrg ds tmp,arg1, tmp /* 6th divide step */
814 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
815 1.1 mrg ds tmp,arg1, tmp /* 7th divide step */
816 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
817 1.1 mrg ds tmp,arg1, tmp /* 8th divide step */
818 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
819 1.1 mrg ds tmp,arg1, tmp /* 9th divide step */
820 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
821 1.1 mrg ds tmp,arg1, tmp /* 10th divide step */
822 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
823 1.1 mrg ds tmp,arg1, tmp /* 11th divide step */
824 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
825 1.1 mrg ds tmp,arg1, tmp /* 12th divide step */
826 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
827 1.1 mrg ds tmp,arg1, tmp /* 13th divide step */
828 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
829 1.1 mrg ds tmp,arg1, tmp /* 14th divide step */
830 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
831 1.1 mrg ds tmp,arg1, tmp /* 15th divide step */
832 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
833 1.1 mrg ds tmp,arg1, tmp /* 16th divide step */
834 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
835 1.1 mrg ds tmp,arg1, tmp /* 17th divide step */
836 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
837 1.1 mrg ds tmp,arg1, tmp /* 18th divide step */
838 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
839 1.1 mrg ds tmp,arg1, tmp /* 19th divide step */
840 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
841 1.1 mrg ds tmp,arg1, tmp /* 20th divide step */
842 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
843 1.1 mrg ds tmp,arg1, tmp /* 21st divide step */
844 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
845 1.1 mrg ds tmp,arg1, tmp /* 22nd divide step */
846 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
847 1.1 mrg ds tmp,arg1, tmp /* 23rd divide step */
848 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
849 1.1 mrg ds tmp,arg1, tmp /* 24th divide step */
850 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
851 1.1 mrg ds tmp,arg1, tmp /* 25th divide step */
852 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
853 1.1 mrg ds tmp,arg1, tmp /* 26th divide step */
854 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
855 1.1 mrg ds tmp,arg1, tmp /* 27th divide step */
856 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
857 1.1 mrg ds tmp,arg1, tmp /* 28th divide step */
858 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
859 1.1 mrg ds tmp,arg1, tmp /* 29th divide step */
860 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
861 1.1 mrg ds tmp,arg1, tmp /* 30th divide step */
862 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
863 1.1 mrg ds tmp,arg1, tmp /* 31st divide step */
864 1.1 mrg addc retreg,retreg,retreg /* shift retreg with/into carry */
865 1.1 mrg ds tmp,arg1, tmp /* 32nd divide step, */
866 1.1 mrg addc retreg,retreg,retreg /* shift last bit into retreg */
867 1.1 mrg movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
868 1.1 mrg add,< arg1,0,0 /* if arg1 > 0, add arg1 */
869 1.1 mrg add,tr tmp,arg1,retreg /* for correcting remainder tmp */
870 1.1 mrg sub tmp,arg1,retreg /* else add absolute value arg1 */
871 1.1 mrg LSYM(finish)
872 1.1 mrg add,>= arg0,0,0 /* set sign of remainder */
873 1.1 mrg sub 0,retreg,retreg /* to sign of dividend */
874 1.1 mrg MILLIRET
875 1.1 mrg nop
876 1.1 mrg .exit
877 1.1 mrg .procend
878 1.1 mrg #ifdef milliext
879 1.1 mrg .origin 0x00000200
880 1.1 mrg #endif
881 1.1 mrg .end
882 1.1 mrg #endif
883 1.1 mrg
884 1.1 mrg #ifdef L_remU
885 1.1 mrg /* ROUTINE: $$remU
886 1.1 mrg . Single precision divide for remainder with unsigned binary integers.
887 1.1 mrg .
888 1.1 mrg . The remainder must be dividend-(dividend/divisor)*divisor.
889 1.1 mrg . Divide by zero is trapped.
890 1.1 mrg
891 1.1 mrg INPUT REGISTERS:
892 1.1 mrg . arg0 == dividend
893 1.1 mrg . arg1 == divisor
894 1.1 mrg . mrp == return pc
895 1.1 mrg . sr0 == return space when called externally
896 1.1 mrg
897 1.1 mrg OUTPUT REGISTERS:
898 1.1 mrg . arg0 = undefined
899 1.1 mrg . arg1 = undefined
900 1.1 mrg . ret1 = remainder
901 1.1 mrg
902 1.1 mrg OTHER REGISTERS AFFECTED:
903 1.1 mrg . r1 = undefined
904 1.1 mrg
905 1.1 mrg SIDE EFFECTS:
906 1.1 mrg . Causes a trap under the following conditions: DIVIDE BY ZERO
907 1.1 mrg . Changes memory at the following places: NONE
908 1.1 mrg
909 1.1 mrg PERMISSIBLE CONTEXT:
910 1.1 mrg . Unwindable.
911 1.1 mrg . Does not create a stack frame.
912 1.1 mrg . Suitable for internal or external millicode.
913 1.1 mrg . Assumes the special millicode register conventions.
914 1.1 mrg
915 1.1 mrg DISCUSSION:
916 1.1 mrg . Calls other millicode routines using mrp: NONE
917 1.1 mrg . Calls other millicode routines: NONE */
918 1.1 mrg
919 1.1 mrg
920 1.1 mrg RDEFINE(temp,r1)
921 1.1 mrg RDEFINE(rmndr,ret1) /* r29 */
922 1.1 mrg SUBSPA_MILLI
923 1.1 mrg ATTR_MILLI
924 1.1 mrg .export $$remU,millicode
925 1.1 mrg .proc
926 1.1 mrg .callinfo millicode
927 1.1 mrg .entry
928 1.1 mrg GSYM($$remU)
929 1.1 mrg ldo -1(arg1),temp /* is there at most one bit set ? */
930 1.1 mrg and,= arg1,temp,r0 /* if not, don't use power of 2 */
931 1.1 mrg b LREF(regular_seq)
932 1.1 mrg addit,= 0,arg1,r0 /* trap on div by zero */
933 1.1 mrg and arg0,temp,rmndr /* get the result for power of 2 */
934 1.1 mrg MILLIRETN
935 1.1 mrg LSYM(regular_seq)
936 1.1 mrg comib,>=,n 0,arg1,LREF(special_case)
937 1.1 mrg subi 0,arg1,rmndr /* clear carry, negate the divisor */
938 1.1 mrg ds r0,rmndr,r0 /* set V-bit to 1 */
939 1.1 mrg add arg0,arg0,temp /* shift msb bit into carry */
940 1.1 mrg ds r0,arg1,rmndr /* 1st divide step, if no carry */
941 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
942 1.1 mrg ds rmndr,arg1,rmndr /* 2nd divide step */
943 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
944 1.1 mrg ds rmndr,arg1,rmndr /* 3rd divide step */
945 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
946 1.1 mrg ds rmndr,arg1,rmndr /* 4th divide step */
947 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
948 1.1 mrg ds rmndr,arg1,rmndr /* 5th divide step */
949 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
950 1.1 mrg ds rmndr,arg1,rmndr /* 6th divide step */
951 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
952 1.1 mrg ds rmndr,arg1,rmndr /* 7th divide step */
953 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
954 1.1 mrg ds rmndr,arg1,rmndr /* 8th divide step */
955 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
956 1.1 mrg ds rmndr,arg1,rmndr /* 9th divide step */
957 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
958 1.1 mrg ds rmndr,arg1,rmndr /* 10th divide step */
959 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
960 1.1 mrg ds rmndr,arg1,rmndr /* 11th divide step */
961 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
962 1.1 mrg ds rmndr,arg1,rmndr /* 12th divide step */
963 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
964 1.1 mrg ds rmndr,arg1,rmndr /* 13th divide step */
965 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
966 1.1 mrg ds rmndr,arg1,rmndr /* 14th divide step */
967 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
968 1.1 mrg ds rmndr,arg1,rmndr /* 15th divide step */
969 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
970 1.1 mrg ds rmndr,arg1,rmndr /* 16th divide step */
971 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
972 1.1 mrg ds rmndr,arg1,rmndr /* 17th divide step */
973 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
974 1.1 mrg ds rmndr,arg1,rmndr /* 18th divide step */
975 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
976 1.1 mrg ds rmndr,arg1,rmndr /* 19th divide step */
977 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
978 1.1 mrg ds rmndr,arg1,rmndr /* 20th divide step */
979 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
980 1.1 mrg ds rmndr,arg1,rmndr /* 21st divide step */
981 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
982 1.1 mrg ds rmndr,arg1,rmndr /* 22nd divide step */
983 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
984 1.1 mrg ds rmndr,arg1,rmndr /* 23rd divide step */
985 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
986 1.1 mrg ds rmndr,arg1,rmndr /* 24th divide step */
987 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
988 1.1 mrg ds rmndr,arg1,rmndr /* 25th divide step */
989 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
990 1.1 mrg ds rmndr,arg1,rmndr /* 26th divide step */
991 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
992 1.1 mrg ds rmndr,arg1,rmndr /* 27th divide step */
993 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
994 1.1 mrg ds rmndr,arg1,rmndr /* 28th divide step */
995 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
996 1.1 mrg ds rmndr,arg1,rmndr /* 29th divide step */
997 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
998 1.1 mrg ds rmndr,arg1,rmndr /* 30th divide step */
999 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
1000 1.1 mrg ds rmndr,arg1,rmndr /* 31st divide step */
1001 1.1 mrg addc temp,temp,temp /* shift temp with/into carry */
1002 1.1 mrg ds rmndr,arg1,rmndr /* 32nd divide step, */
1003 1.1 mrg comiclr,<= 0,rmndr,r0
1004 1.1 mrg add rmndr,arg1,rmndr /* correction */
1005 1.1 mrg MILLIRETN
1006 1.1 mrg nop
1007 1.1 mrg
1008 1.1 mrg /* Putting >= on the last DS and deleting COMICLR does not work! */
1009 1.1 mrg LSYM(special_case)
1010 1.1 mrg sub,>>= arg0,arg1,rmndr
1011 1.1 mrg copy arg0,rmndr
1012 1.1 mrg MILLIRETN
1013 1.1 mrg nop
1014 1.1 mrg .exit
1015 1.1 mrg .procend
1016 1.1 mrg .end
1017 1.1 mrg #endif
1018 1.1 mrg
1019 1.1 mrg #ifdef L_div_const
1020 1.1 mrg /* ROUTINE: $$divI_2
1021 1.1 mrg . $$divI_3 $$divU_3
1022 1.1 mrg . $$divI_4
1023 1.1 mrg . $$divI_5 $$divU_5
1024 1.1 mrg . $$divI_6 $$divU_6
1025 1.1 mrg . $$divI_7 $$divU_7
1026 1.1 mrg . $$divI_8
1027 1.1 mrg . $$divI_9 $$divU_9
1028 1.1 mrg . $$divI_10 $$divU_10
1029 1.1 mrg .
1030 1.1 mrg . $$divI_12 $$divU_12
1031 1.1 mrg .
1032 1.1 mrg . $$divI_14 $$divU_14
1033 1.1 mrg . $$divI_15 $$divU_15
1034 1.1 mrg . $$divI_16
1035 1.1 mrg . $$divI_17 $$divU_17
1036 1.1 mrg .
1037 1.1 mrg . Divide by selected constants for single precision binary integers.
1038 1.1 mrg
1039 1.1 mrg INPUT REGISTERS:
1040 1.1 mrg . arg0 == dividend
1041 1.1 mrg . mrp == return pc
1042 1.1 mrg . sr0 == return space when called externally
1043 1.1 mrg
1044 1.1 mrg OUTPUT REGISTERS:
1045 1.1 mrg . arg0 = undefined
1046 1.1 mrg . arg1 = undefined
1047 1.1 mrg . ret1 = quotient
1048 1.1 mrg
1049 1.1 mrg OTHER REGISTERS AFFECTED:
1050 1.1 mrg . r1 = undefined
1051 1.1 mrg
1052 1.1 mrg SIDE EFFECTS:
1053 1.1 mrg . Causes a trap under the following conditions: NONE
1054 1.1 mrg . Changes memory at the following places: NONE
1055 1.1 mrg
1056 1.1 mrg PERMISSIBLE CONTEXT:
1057 1.1 mrg . Unwindable.
1058 1.1 mrg . Does not create a stack frame.
1059 1.1 mrg . Suitable for internal or external millicode.
1060 1.1 mrg . Assumes the special millicode register conventions.
1061 1.1 mrg
1062 1.1 mrg DISCUSSION:
1063 1.1 mrg . Calls other millicode routines using mrp: NONE
1064 1.1 mrg . Calls other millicode routines: NONE */
1065 1.1 mrg
1066 1.1 mrg
1067 1.1 mrg /* TRUNCATED DIVISION BY SMALL INTEGERS
1068 1.1 mrg
1069 1.1 mrg We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
1070 1.1 mrg (with y fixed).
1071 1.1 mrg
1072 1.1 mrg Let a = floor(z/y), for some choice of z. Note that z will be
1073 1.1 mrg chosen so that division by z is cheap.
1074 1.1 mrg
1075 1.1 mrg Let r be the remainder(z/y). In other words, r = z - ay.
1076 1.1 mrg
1077 1.1 mrg Now, our method is to choose a value for b such that
1078 1.1 mrg
1079 1.1 mrg q'(x) = floor((ax+b)/z)
1080 1.1 mrg
1081 1.1 mrg is equal to q(x) over as large a range of x as possible. If the
1082 1.1 mrg two are equal over a sufficiently large range, and if it is easy to
1083 1.1 mrg form the product (ax), and it is easy to divide by z, then we can
1084 1.1 mrg perform the division much faster than the general division algorithm.
1085 1.1 mrg
1086 1.1 mrg So, we want the following to be true:
1087 1.1 mrg
1088 1.1 mrg . For x in the following range:
1089 1.1 mrg .
1090 1.1 mrg . ky <= x < (k+1)y
1091 1.1 mrg .
1092 1.1 mrg . implies that
1093 1.1 mrg .
1094 1.1 mrg . k <= (ax+b)/z < (k+1)
1095 1.1 mrg
1096 1.1 mrg We want to determine b such that this is true for all k in the
1097 1.1 mrg range {0..K} for some maximum K.
1098 1.1 mrg
1099 1.1 mrg Since (ax+b) is an increasing function of x, we can take each
1100 1.1 mrg bound separately to determine the "best" value for b.
1101 1.1 mrg
1102 1.1 mrg (ax+b)/z < (k+1) implies
1103 1.1 mrg
1104 1.1 mrg (a((k+1)y-1)+b < (k+1)z implies
1105 1.1 mrg
1106 1.1 mrg b < a + (k+1)(z-ay) implies
1107 1.1 mrg
1108 1.1 mrg b < a + (k+1)r
1109 1.1 mrg
1110 1.1 mrg This needs to be true for all k in the range {0..K}. In
1111 1.1 mrg particular, it is true for k = 0 and this leads to a maximum
1112 1.1 mrg acceptable value for b.
1113 1.1 mrg
1114 1.1 mrg b < a+r or b <= a+r-1
1115 1.1 mrg
1116 1.1 mrg Taking the other bound, we have
1117 1.1 mrg
1118 1.1 mrg k <= (ax+b)/z implies
1119 1.1 mrg
1120 1.1 mrg k <= (aky+b)/z implies
1121 1.1 mrg
1122 1.1 mrg k(z-ay) <= b implies
1123 1.1 mrg
1124 1.1 mrg kr <= b
1125 1.1 mrg
1126 1.1 mrg Clearly, the largest range for k will be achieved by maximizing b,
1127 1.1 mrg when r is not zero. When r is zero, then the simplest choice for b
1128 1.1 mrg is 0. When r is not 0, set
1129 1.1 mrg
1130 1.1 mrg . b = a+r-1
1131 1.1 mrg
1132 1.1 mrg Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
1133 1.1 mrg for all x in the range:
1134 1.1 mrg
1135 1.1 mrg . 0 <= x < (K+1)y
1136 1.1 mrg
1137 1.1 mrg We need to determine what K is. Of our two bounds,
1138 1.1 mrg
1139 1.1 mrg . b < a+(k+1)r is satisfied for all k >= 0, by construction.
1140 1.1 mrg
1141 1.1 mrg The other bound is
1142 1.1 mrg
1143 1.1 mrg . kr <= b
1144 1.1 mrg
1145 1.1 mrg This is always true if r = 0. If r is not 0 (the usual case), then
1146 1.1 mrg K = floor((a+r-1)/r), is the maximum value for k.
1147 1.1 mrg
1148 1.1 mrg Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
1149 1.1 mrg answer for q(x) = floor(x/y) when x is in the range
1150 1.1 mrg
1151 1.1 mrg (0,(K+1)y-1) K = floor((a+r-1)/r)
1152 1.1 mrg
1153 1.1 mrg To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
1154 1.1 mrg the formula for q'(x) yields the correct value of q(x) for all x
1155 1.1 mrg representable by a single word in HPPA.
1156 1.1 mrg
1157 1.1 mrg We are also constrained in that computing the product (ax), adding
1158 1.1 mrg b, and dividing by z must all be done quickly, otherwise we will be
1159 1.1 mrg better off going through the general algorithm using the DS
1160 1.1 mrg instruction, which uses approximately 70 cycles.
1161 1.1 mrg
1162 1.1 mrg For each y, there is a choice of z which satisfies the constraints
1163 1.1 mrg for (K+1)y >= 2**32. We may not, however, be able to satisfy the
1164 1.1 mrg timing constraints for arbitrary y. It seems that z being equal to
1165 1.1 mrg a power of 2 or a power of 2 minus 1 is as good as we can do, since
1166 1.1 mrg it minimizes the time to do division by z. We want the choice of z
1167 1.1 mrg to also result in a value for (a) that minimizes the computation of
1168 1.1 mrg the product (ax). This is best achieved if (a) has a regular bit
1169 1.1 mrg pattern (so the multiplication can be done with shifts and adds).
1170 1.1 mrg The value of (a) also needs to be less than 2**32 so the product is
1171 1.1 mrg always guaranteed to fit in 2 words.
1172 1.1 mrg
1173 1.1 mrg In actual practice, the following should be done:
1174 1.1 mrg
1175 1.1 mrg 1) For negative x, you should take the absolute value and remember
1176 1.1 mrg . the fact so that the result can be negated. This obviously does
1177 1.1 mrg . not apply in the unsigned case.
1178 1.1 mrg 2) For even y, you should factor out the power of 2 that divides y
1179 1.1 mrg . and divide x by it. You can then proceed by dividing by the
1180 1.1 mrg . odd factor of y.
1181 1.1 mrg
1182 1.1 mrg Here is a table of some odd values of y, and corresponding choices
1183 1.1 mrg for z which are "good".
1184 1.1 mrg
1185 1.1 mrg y z r a (hex) max x (hex)
1186 1.1 mrg
1187 1.1 mrg 3 2**32 1 55555555 100000001
1188 1.1 mrg 5 2**32 1 33333333 100000003
1189 1.1 mrg 7 2**24-1 0 249249 (infinite)
1190 1.1 mrg 9 2**24-1 0 1c71c7 (infinite)
1191 1.1 mrg 11 2**20-1 0 1745d (infinite)
1192 1.1 mrg 13 2**24-1 0 13b13b (infinite)
1193 1.1 mrg 15 2**32 1 11111111 10000000d
1194 1.1 mrg 17 2**32 1 f0f0f0f 10000000f
1195 1.1 mrg
1196 1.1 mrg If r is 1, then b = a+r-1 = a. This simplifies the computation
1197 1.1 mrg of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
1198 1.1 mrg then b = 0 is ok to use which simplifies (ax+b).
1199 1.1 mrg
1200 1.1 mrg The bit patterns for 55555555, 33333333, and 11111111 are obviously
1201 1.1 mrg very regular. The bit patterns for the other values of a above are:
1202 1.1 mrg
1203 1.1 mrg y (hex) (binary)
1204 1.1 mrg
1205 1.1 mrg 7 249249 001001001001001001001001 << regular >>
1206 1.1 mrg 9 1c71c7 000111000111000111000111 << regular >>
1207 1.1 mrg 11 1745d 000000010111010001011101 << irregular >>
1208 1.1 mrg 13 13b13b 000100111011000100111011 << irregular >>
1209 1.1 mrg
1210 1.1 mrg The bit patterns for (a) corresponding to (y) of 11 and 13 may be
1211 1.1 mrg too irregular to warrant using this method.
1212 1.1 mrg
1213 1.1 mrg When z is a power of 2 minus 1, then the division by z is slightly
1214 1.1 mrg more complicated, involving an iterative solution.
1215 1.1 mrg
1216 1.1 mrg The code presented here solves division by 1 through 17, except for
1217 1.1 mrg 11 and 13. There are algorithms for both signed and unsigned
1218 1.1 mrg quantities given.
1219 1.1 mrg
1220 1.1 mrg TIMINGS (cycles)
1221 1.1 mrg
1222 1.1 mrg divisor positive negative unsigned
1223 1.1 mrg
1224 1.1 mrg . 1 2 2 2
1225 1.1 mrg . 2 4 4 2
1226 1.1 mrg . 3 19 21 19
1227 1.1 mrg . 4 4 4 2
1228 1.1 mrg . 5 18 22 19
1229 1.1 mrg . 6 19 22 19
1230 1.1 mrg . 8 4 4 2
1231 1.1 mrg . 10 18 19 17
1232 1.1 mrg . 12 18 20 18
1233 1.1 mrg . 15 16 18 16
1234 1.1 mrg . 16 4 4 2
1235 1.1 mrg . 17 16 18 16
1236 1.1 mrg
1237 1.1 mrg Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
1238 1.1 mrg a loop body is executed until the tentative quotient is 0. The
1239 1.1 mrg number of times the loop body is executed varies depending on the
1240 1.1 mrg dividend, but is never more than two times. If the dividend is
1241 1.1 mrg less than the divisor, then the loop body is not executed at all.
1242 1.1 mrg Each iteration adds 4 cycles to the timings.
1243 1.1 mrg
1244 1.1 mrg divisor positive negative unsigned
1245 1.1 mrg
1246 1.1 mrg . 7 19+4n 20+4n 20+4n n = number of iterations
1247 1.1 mrg . 9 21+4n 22+4n 21+4n
1248 1.1 mrg . 14 21+4n 22+4n 20+4n
1249 1.1 mrg
1250 1.1 mrg To give an idea of how the number of iterations varies, here is a
1251 1.1 mrg table of dividend versus number of iterations when dividing by 7.
1252 1.1 mrg
1253 1.1 mrg smallest largest required
1254 1.1 mrg dividend dividend iterations
1255 1.1 mrg
1256 1.1 mrg . 0 6 0
1257 1.1 mrg . 7 0x6ffffff 1
1258 1.1 mrg 0x1000006 0xffffffff 2
1259 1.1 mrg
1260 1.1 mrg There is some overlap in the range of numbers requiring 1 and 2
1261 1.1 mrg iterations. */
1262 1.1 mrg
1263 1.1 mrg RDEFINE(t2,r1)
1264 1.1 mrg RDEFINE(x2,arg0) /* r26 */
1265 1.1 mrg RDEFINE(t1,arg1) /* r25 */
1266 1.1 mrg RDEFINE(x1,ret1) /* r29 */
1267 1.1 mrg
1268 1.1 mrg SUBSPA_MILLI_DIV
1269 1.1 mrg ATTR_MILLI
1270 1.1 mrg
1271 1.1 mrg .proc
1272 1.1 mrg .callinfo millicode
1273 1.1 mrg .entry
1274 1.1 mrg /* NONE of these routines require a stack frame
1275 1.1 mrg ALL of these routines are unwindable from millicode */
1276 1.1 mrg
1277 1.1 mrg GSYM($$divide_by_constant)
1278 1.1 mrg .export $$divide_by_constant,millicode
1279 1.1 mrg /* Provides a "nice" label for the code covered by the unwind descriptor
1280 1.1 mrg for things like gprof. */
1281 1.1 mrg
1282 1.1 mrg /* DIVISION BY 2 (shift by 1) */
1283 1.1 mrg GSYM($$divI_2)
1284 1.1 mrg .export $$divI_2,millicode
1285 1.1 mrg comclr,>= arg0,0,0
1286 1.1 mrg addi 1,arg0,arg0
1287 1.1 mrg MILLIRET
1288 1.1 mrg extrs arg0,30,31,ret1
1289 1.1 mrg
1290 1.1 mrg
1291 1.1 mrg /* DIVISION BY 4 (shift by 2) */
1292 1.1 mrg GSYM($$divI_4)
1293 1.1 mrg .export $$divI_4,millicode
1294 1.1 mrg comclr,>= arg0,0,0
1295 1.1 mrg addi 3,arg0,arg0
1296 1.1 mrg MILLIRET
1297 1.1 mrg extrs arg0,29,30,ret1
1298 1.1 mrg
1299 1.1 mrg
1300 1.1 mrg /* DIVISION BY 8 (shift by 3) */
1301 1.1 mrg GSYM($$divI_8)
1302 1.1 mrg .export $$divI_8,millicode
1303 1.1 mrg comclr,>= arg0,0,0
1304 1.1 mrg addi 7,arg0,arg0
1305 1.1 mrg MILLIRET
1306 1.1 mrg extrs arg0,28,29,ret1
1307 1.1 mrg
1308 1.1 mrg /* DIVISION BY 16 (shift by 4) */
1309 1.1 mrg GSYM($$divI_16)
1310 1.1 mrg .export $$divI_16,millicode
1311 1.1 mrg comclr,>= arg0,0,0
1312 1.1 mrg addi 15,arg0,arg0
1313 1.1 mrg MILLIRET
1314 1.1 mrg extrs arg0,27,28,ret1
1315 1.1 mrg
1316 1.1 mrg /****************************************************************************
1317 1.1 mrg *
1318 1.1 mrg * DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
1319 1.1 mrg *
1320 1.1 mrg * includes 3,5,15,17 and also 6,10,12
1321 1.1 mrg *
1322 1.1 mrg ****************************************************************************/
1323 1.1 mrg
1324 1.1 mrg /* DIVISION BY 3 (use z = 2**32; a = 55555555) */
1325 1.1 mrg
1326 1.1 mrg GSYM($$divI_3)
1327 1.1 mrg .export $$divI_3,millicode
1328 1.1 mrg comb,<,N x2,0,LREF(neg3)
1329 1.1 mrg
1330 1.1 mrg addi 1,x2,x2 /* this cannot overflow */
1331 1.1 mrg extru x2,1,2,x1 /* multiply by 5 to get started */
1332 1.1 mrg sh2add x2,x2,x2
1333 1.1 mrg b LREF(pos)
1334 1.1 mrg addc x1,0,x1
1335 1.1 mrg
1336 1.1 mrg LSYM(neg3)
1337 1.1 mrg subi 1,x2,x2 /* this cannot overflow */
1338 1.1 mrg extru x2,1,2,x1 /* multiply by 5 to get started */
1339 1.1 mrg sh2add x2,x2,x2
1340 1.1 mrg b LREF(neg)
1341 1.1 mrg addc x1,0,x1
1342 1.1 mrg
1343 1.1 mrg GSYM($$divU_3)
1344 1.1 mrg .export $$divU_3,millicode
1345 1.1 mrg addi 1,x2,x2 /* this CAN overflow */
1346 1.1 mrg addc 0,0,x1
1347 1.1 mrg shd x1,x2,30,t1 /* multiply by 5 to get started */
1348 1.1 mrg sh2add x2,x2,x2
1349 1.1 mrg b LREF(pos)
1350 1.1 mrg addc x1,t1,x1
1351 1.1 mrg
1352 1.1 mrg /* DIVISION BY 5 (use z = 2**32; a = 33333333) */
1353 1.1 mrg
1354 1.1 mrg GSYM($$divI_5)
1355 1.1 mrg .export $$divI_5,millicode
1356 1.1 mrg comb,<,N x2,0,LREF(neg5)
1357 1.1 mrg
1358 1.1 mrg addi 3,x2,t1 /* this cannot overflow */
1359 1.1 mrg sh1add x2,t1,x2 /* multiply by 3 to get started */
1360 1.1 mrg b LREF(pos)
1361 1.1 mrg addc 0,0,x1
1362 1.1 mrg
1363 1.1 mrg LSYM(neg5)
1364 1.1 mrg sub 0,x2,x2 /* negate x2 */
1365 1.1 mrg addi 1,x2,x2 /* this cannot overflow */
1366 1.1 mrg shd 0,x2,31,x1 /* get top bit (can be 1) */
1367 1.1 mrg sh1add x2,x2,x2 /* multiply by 3 to get started */
1368 1.1 mrg b LREF(neg)
1369 1.1 mrg addc x1,0,x1
1370 1.1 mrg
1371 1.1 mrg GSYM($$divU_5)
1372 1.1 mrg .export $$divU_5,millicode
1373 1.1 mrg addi 1,x2,x2 /* this CAN overflow */
1374 1.1 mrg addc 0,0,x1
1375 1.1 mrg shd x1,x2,31,t1 /* multiply by 3 to get started */
1376 1.1 mrg sh1add x2,x2,x2
1377 1.1 mrg b LREF(pos)
1378 1.1 mrg addc t1,x1,x1
1379 1.1 mrg
1380 1.1 mrg /* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
1381 1.1 mrg GSYM($$divI_6)
1382 1.1 mrg .export $$divI_6,millicode
1383 1.1 mrg comb,<,N x2,0,LREF(neg6)
1384 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1385 1.1 mrg addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
1386 1.1 mrg sh2add x2,t1,x2 /* multiply by 5 to get started */
1387 1.1 mrg b LREF(pos)
1388 1.1 mrg addc 0,0,x1
1389 1.1 mrg
1390 1.1 mrg LSYM(neg6)
1391 1.1 mrg subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1392 1.1 mrg /* negation and adding 1 are done */
1393 1.1 mrg /* at the same time by the SUBI */
1394 1.1 mrg extru x2,30,31,x2
1395 1.1 mrg shd 0,x2,30,x1
1396 1.1 mrg sh2add x2,x2,x2 /* multiply by 5 to get started */
1397 1.1 mrg b LREF(neg)
1398 1.1 mrg addc x1,0,x1
1399 1.1 mrg
1400 1.1 mrg GSYM($$divU_6)
1401 1.1 mrg .export $$divU_6,millicode
1402 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1403 1.1 mrg addi 1,x2,x2 /* cannot carry */
1404 1.1 mrg shd 0,x2,30,x1 /* multiply by 5 to get started */
1405 1.1 mrg sh2add x2,x2,x2
1406 1.1 mrg b LREF(pos)
1407 1.1 mrg addc x1,0,x1
1408 1.1 mrg
1409 1.1 mrg /* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
1410 1.1 mrg GSYM($$divU_10)
1411 1.1 mrg .export $$divU_10,millicode
1412 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1413 1.1 mrg addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
1414 1.1 mrg sh1add x2,t1,x2 /* multiply by 3 to get started */
1415 1.1 mrg addc 0,0,x1
1416 1.1 mrg LSYM(pos)
1417 1.1 mrg shd x1,x2,28,t1 /* multiply by 0x11 */
1418 1.1 mrg shd x2,0,28,t2
1419 1.1 mrg add x2,t2,x2
1420 1.1 mrg addc x1,t1,x1
1421 1.1 mrg LSYM(pos_for_17)
1422 1.1 mrg shd x1,x2,24,t1 /* multiply by 0x101 */
1423 1.1 mrg shd x2,0,24,t2
1424 1.1 mrg add x2,t2,x2
1425 1.1 mrg addc x1,t1,x1
1426 1.1 mrg
1427 1.1 mrg shd x1,x2,16,t1 /* multiply by 0x10001 */
1428 1.1 mrg shd x2,0,16,t2
1429 1.1 mrg add x2,t2,x2
1430 1.1 mrg MILLIRET
1431 1.1 mrg addc x1,t1,x1
1432 1.1 mrg
1433 1.1 mrg GSYM($$divI_10)
1434 1.1 mrg .export $$divI_10,millicode
1435 1.1 mrg comb,< x2,0,LREF(neg10)
1436 1.1 mrg copy 0,x1
1437 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1438 1.1 mrg addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */
1439 1.1 mrg sh1add x2,x2,x2 /* multiply by 3 to get started */
1440 1.1 mrg
1441 1.1 mrg LSYM(neg10)
1442 1.1 mrg subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1443 1.1 mrg /* negation and adding 1 are done */
1444 1.1 mrg /* at the same time by the SUBI */
1445 1.1 mrg extru x2,30,31,x2
1446 1.1 mrg sh1add x2,x2,x2 /* multiply by 3 to get started */
1447 1.1 mrg LSYM(neg)
1448 1.1 mrg shd x1,x2,28,t1 /* multiply by 0x11 */
1449 1.1 mrg shd x2,0,28,t2
1450 1.1 mrg add x2,t2,x2
1451 1.1 mrg addc x1,t1,x1
1452 1.1 mrg LSYM(neg_for_17)
1453 1.1 mrg shd x1,x2,24,t1 /* multiply by 0x101 */
1454 1.1 mrg shd x2,0,24,t2
1455 1.1 mrg add x2,t2,x2
1456 1.1 mrg addc x1,t1,x1
1457 1.1 mrg
1458 1.1 mrg shd x1,x2,16,t1 /* multiply by 0x10001 */
1459 1.1 mrg shd x2,0,16,t2
1460 1.1 mrg add x2,t2,x2
1461 1.1 mrg addc x1,t1,x1
1462 1.1 mrg MILLIRET
1463 1.1 mrg sub 0,x1,x1
1464 1.1 mrg
1465 1.1 mrg /* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
1466 1.1 mrg GSYM($$divI_12)
1467 1.1 mrg .export $$divI_12,millicode
1468 1.1 mrg comb,< x2,0,LREF(neg12)
1469 1.1 mrg copy 0,x1
1470 1.1 mrg extru x2,29,30,x2 /* divide by 4 */
1471 1.1 mrg addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
1472 1.1 mrg sh2add x2,x2,x2 /* multiply by 5 to get started */
1473 1.1 mrg
1474 1.1 mrg LSYM(neg12)
1475 1.1 mrg subi 4,x2,x2 /* negate, divide by 4, and add 1 */
1476 1.1 mrg /* negation and adding 1 are done */
1477 1.1 mrg /* at the same time by the SUBI */
1478 1.1 mrg extru x2,29,30,x2
1479 1.1 mrg b LREF(neg)
1480 1.1 mrg sh2add x2,x2,x2 /* multiply by 5 to get started */
1481 1.1 mrg
1482 1.1 mrg GSYM($$divU_12)
1483 1.1 mrg .export $$divU_12,millicode
1484 1.1 mrg extru x2,29,30,x2 /* divide by 4 */
1485 1.1 mrg addi 5,x2,t1 /* cannot carry */
1486 1.1 mrg sh2add x2,t1,x2 /* multiply by 5 to get started */
1487 1.1 mrg b LREF(pos)
1488 1.1 mrg addc 0,0,x1
1489 1.1 mrg
1490 1.1 mrg /* DIVISION BY 15 (use z = 2**32; a = 11111111) */
1491 1.1 mrg GSYM($$divI_15)
1492 1.1 mrg .export $$divI_15,millicode
1493 1.1 mrg comb,< x2,0,LREF(neg15)
1494 1.1 mrg copy 0,x1
1495 1.1 mrg addib,tr 1,x2,LREF(pos)+4
1496 1.1 mrg shd x1,x2,28,t1
1497 1.1 mrg
1498 1.1 mrg LSYM(neg15)
1499 1.1 mrg b LREF(neg)
1500 1.1 mrg subi 1,x2,x2
1501 1.1 mrg
1502 1.1 mrg GSYM($$divU_15)
1503 1.1 mrg .export $$divU_15,millicode
1504 1.1 mrg addi 1,x2,x2 /* this CAN overflow */
1505 1.1 mrg b LREF(pos)
1506 1.1 mrg addc 0,0,x1
1507 1.1 mrg
1508 1.1 mrg /* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
1509 1.1 mrg GSYM($$divI_17)
1510 1.1 mrg .export $$divI_17,millicode
1511 1.1 mrg comb,<,n x2,0,LREF(neg17)
1512 1.1 mrg addi 1,x2,x2 /* this cannot overflow */
1513 1.1 mrg shd 0,x2,28,t1 /* multiply by 0xf to get started */
1514 1.1 mrg shd x2,0,28,t2
1515 1.1 mrg sub t2,x2,x2
1516 1.1 mrg b LREF(pos_for_17)
1517 1.1 mrg subb t1,0,x1
1518 1.1 mrg
1519 1.1 mrg LSYM(neg17)
1520 1.1 mrg subi 1,x2,x2 /* this cannot overflow */
1521 1.1 mrg shd 0,x2,28,t1 /* multiply by 0xf to get started */
1522 1.1 mrg shd x2,0,28,t2
1523 1.1 mrg sub t2,x2,x2
1524 1.1 mrg b LREF(neg_for_17)
1525 1.1 mrg subb t1,0,x1
1526 1.1 mrg
1527 1.1 mrg GSYM($$divU_17)
1528 1.1 mrg .export $$divU_17,millicode
1529 1.1 mrg addi 1,x2,x2 /* this CAN overflow */
1530 1.1 mrg addc 0,0,x1
1531 1.1 mrg shd x1,x2,28,t1 /* multiply by 0xf to get started */
1532 1.1 mrg LSYM(u17)
1533 1.1 mrg shd x2,0,28,t2
1534 1.1 mrg sub t2,x2,x2
1535 1.1 mrg b LREF(pos_for_17)
1536 1.1 mrg subb t1,x1,x1
1537 1.1 mrg
1538 1.1 mrg
1539 1.1 mrg /* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
1540 1.1 mrg includes 7,9 and also 14
1541 1.1 mrg
1542 1.1 mrg
1543 1.1 mrg z = 2**24-1
1544 1.1 mrg r = z mod x = 0
1545 1.1 mrg
1546 1.1 mrg so choose b = 0
1547 1.1 mrg
1548 1.1 mrg Also, in order to divide by z = 2**24-1, we approximate by dividing
1549 1.1 mrg by (z+1) = 2**24 (which is easy), and then correcting.
1550 1.1 mrg
1551 1.1 mrg (ax) = (z+1)q' + r
1552 1.1 mrg . = zq' + (q'+r)
1553 1.1 mrg
1554 1.1 mrg So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
1555 1.1 mrg Then the true remainder of (ax)/z is (q'+r). Repeat the process
1556 1.1 mrg with this new remainder, adding the tentative quotients together,
1557 1.1 mrg until a tentative quotient is 0 (and then we are done). There is
1558 1.1 mrg one last correction to be done. It is possible that (q'+r) = z.
1559 1.1 mrg If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
1560 1.1 mrg in fact, we need to add 1 more to the quotient. Now, it turns
1561 1.1 mrg out that this happens if and only if the original value x is
1562 1.1 mrg an exact multiple of y. So, to avoid a three instruction test at
1563 1.1 mrg the end, instead use 1 instruction to add 1 to x at the beginning. */
1564 1.1 mrg
1565 1.1 mrg /* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
1566 1.1 mrg GSYM($$divI_7)
1567 1.1 mrg .export $$divI_7,millicode
1568 1.1 mrg comb,<,n x2,0,LREF(neg7)
1569 1.1 mrg LSYM(7)
1570 1.1 mrg addi 1,x2,x2 /* cannot overflow */
1571 1.1 mrg shd 0,x2,29,x1
1572 1.1 mrg sh3add x2,x2,x2
1573 1.1 mrg addc x1,0,x1
1574 1.1 mrg LSYM(pos7)
1575 1.1 mrg shd x1,x2,26,t1
1576 1.1 mrg shd x2,0,26,t2
1577 1.1 mrg add x2,t2,x2
1578 1.1 mrg addc x1,t1,x1
1579 1.1 mrg
1580 1.1 mrg shd x1,x2,20,t1
1581 1.1 mrg shd x2,0,20,t2
1582 1.1 mrg add x2,t2,x2
1583 1.1 mrg addc x1,t1,t1
1584 1.1 mrg
1585 1.1 mrg /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1586 1.1 mrg
1587 1.1 mrg copy 0,x1
1588 1.1 mrg shd,= t1,x2,24,t1 /* tentative quotient */
1589 1.1 mrg LSYM(1)
1590 1.1 mrg addb,tr t1,x1,LREF(2) /* add to previous quotient */
1591 1.1 mrg extru x2,31,24,x2 /* new remainder (unadjusted) */
1592 1.1 mrg
1593 1.1 mrg MILLIRETN
1594 1.1 mrg
1595 1.1 mrg LSYM(2)
1596 1.1 mrg addb,tr t1,x2,LREF(1) /* adjust remainder */
1597 1.1 mrg extru,= x2,7,8,t1 /* new quotient */
1598 1.1 mrg
1599 1.1 mrg LSYM(neg7)
1600 1.1 mrg subi 1,x2,x2 /* negate x2 and add 1 */
1601 1.1 mrg LSYM(8)
1602 1.1 mrg shd 0,x2,29,x1
1603 1.1 mrg sh3add x2,x2,x2
1604 1.1 mrg addc x1,0,x1
1605 1.1 mrg
1606 1.1 mrg LSYM(neg7_shift)
1607 1.1 mrg shd x1,x2,26,t1
1608 1.1 mrg shd x2,0,26,t2
1609 1.1 mrg add x2,t2,x2
1610 1.1 mrg addc x1,t1,x1
1611 1.1 mrg
1612 1.1 mrg shd x1,x2,20,t1
1613 1.1 mrg shd x2,0,20,t2
1614 1.1 mrg add x2,t2,x2
1615 1.1 mrg addc x1,t1,t1
1616 1.1 mrg
1617 1.1 mrg /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1618 1.1 mrg
1619 1.1 mrg copy 0,x1
1620 1.1 mrg shd,= t1,x2,24,t1 /* tentative quotient */
1621 1.1 mrg LSYM(3)
1622 1.1 mrg addb,tr t1,x1,LREF(4) /* add to previous quotient */
1623 1.1 mrg extru x2,31,24,x2 /* new remainder (unadjusted) */
1624 1.1 mrg
1625 1.1 mrg MILLIRET
1626 1.1 mrg sub 0,x1,x1 /* negate result */
1627 1.1 mrg
1628 1.1 mrg LSYM(4)
1629 1.1 mrg addb,tr t1,x2,LREF(3) /* adjust remainder */
1630 1.1 mrg extru,= x2,7,8,t1 /* new quotient */
1631 1.1 mrg
1632 1.1 mrg GSYM($$divU_7)
1633 1.1 mrg .export $$divU_7,millicode
1634 1.1 mrg addi 1,x2,x2 /* can carry */
1635 1.1 mrg addc 0,0,x1
1636 1.1 mrg shd x1,x2,29,t1
1637 1.1 mrg sh3add x2,x2,x2
1638 1.1 mrg b LREF(pos7)
1639 1.1 mrg addc t1,x1,x1
1640 1.1 mrg
1641 1.1 mrg /* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
1642 1.1 mrg GSYM($$divI_9)
1643 1.1 mrg .export $$divI_9,millicode
1644 1.1 mrg comb,<,n x2,0,LREF(neg9)
1645 1.1 mrg addi 1,x2,x2 /* cannot overflow */
1646 1.1 mrg shd 0,x2,29,t1
1647 1.1 mrg shd x2,0,29,t2
1648 1.1 mrg sub t2,x2,x2
1649 1.1 mrg b LREF(pos7)
1650 1.1 mrg subb t1,0,x1
1651 1.1 mrg
1652 1.1 mrg LSYM(neg9)
1653 1.1 mrg subi 1,x2,x2 /* negate and add 1 */
1654 1.1 mrg shd 0,x2,29,t1
1655 1.1 mrg shd x2,0,29,t2
1656 1.1 mrg sub t2,x2,x2
1657 1.1 mrg b LREF(neg7_shift)
1658 1.1 mrg subb t1,0,x1
1659 1.1 mrg
1660 1.1 mrg GSYM($$divU_9)
1661 1.1 mrg .export $$divU_9,millicode
1662 1.1 mrg addi 1,x2,x2 /* can carry */
1663 1.1 mrg addc 0,0,x1
1664 1.1 mrg shd x1,x2,29,t1
1665 1.1 mrg shd x2,0,29,t2
1666 1.1 mrg sub t2,x2,x2
1667 1.1 mrg b LREF(pos7)
1668 1.1 mrg subb t1,x1,x1
1669 1.1 mrg
1670 1.1 mrg /* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
1671 1.1 mrg GSYM($$divI_14)
1672 1.1 mrg .export $$divI_14,millicode
1673 1.1 mrg comb,<,n x2,0,LREF(neg14)
1674 1.1 mrg GSYM($$divU_14)
1675 1.1 mrg .export $$divU_14,millicode
1676 1.1 mrg b LREF(7) /* go to 7 case */
1677 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1678 1.1 mrg
1679 1.1 mrg LSYM(neg14)
1680 1.1 mrg subi 2,x2,x2 /* negate (and add 2) */
1681 1.1 mrg b LREF(8)
1682 1.1 mrg extru x2,30,31,x2 /* divide by 2 */
1683 1.1 mrg .exit
1684 1.1 mrg .procend
1685 1.1 mrg .end
1686 1.1 mrg #endif
1687 1.1 mrg
1688 1.1 mrg #ifdef L_mulI
1689 1.1 mrg /* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
1690 1.1 mrg /******************************************************************************
1691 1.1 mrg This routine is used on PA2.0 processors when gcc -mno-fpregs is used
1692 1.1 mrg
1693 1.1 mrg ROUTINE: $$mulI
1694 1.1 mrg
1695 1.1 mrg
1696 1.1 mrg DESCRIPTION:
1697 1.1 mrg
1698 1.1 mrg $$mulI multiplies two single word integers, giving a single
1699 1.1 mrg word result.
1700 1.1 mrg
1701 1.1 mrg
1702 1.1 mrg INPUT REGISTERS:
1703 1.1 mrg
1704 1.1 mrg arg0 = Operand 1
1705 1.1 mrg arg1 = Operand 2
1706 1.1 mrg r31 == return pc
1707 1.1 mrg sr0 == return space when called externally
1708 1.1 mrg
1709 1.1 mrg
1710 1.1 mrg OUTPUT REGISTERS:
1711 1.1 mrg
1712 1.1 mrg arg0 = undefined
1713 1.1 mrg arg1 = undefined
1714 1.1 mrg ret1 = result
1715 1.1 mrg
1716 1.1 mrg OTHER REGISTERS AFFECTED:
1717 1.1 mrg
1718 1.1 mrg r1 = undefined
1719 1.1 mrg
1720 1.1 mrg SIDE EFFECTS:
1721 1.1 mrg
1722 1.1 mrg Causes a trap under the following conditions: NONE
1723 1.1 mrg Changes memory at the following places: NONE
1724 1.1 mrg
1725 1.1 mrg PERMISSIBLE CONTEXT:
1726 1.1 mrg
1727 1.1 mrg Unwindable
1728 1.1 mrg Does not create a stack frame
1729 1.1 mrg Is usable for internal or external microcode
1730 1.1 mrg
1731 1.1 mrg DISCUSSION:
1732 1.1 mrg
1733 1.1 mrg Calls other millicode routines via mrp: NONE
1734 1.1 mrg Calls other millicode routines: NONE
1735 1.1 mrg
1736 1.1 mrg ***************************************************************************/
1737 1.1 mrg
1738 1.1 mrg
1739 1.1 mrg #define a0 %arg0
1740 1.1 mrg #define a1 %arg1
1741 1.1 mrg #define t0 %r1
1742 1.1 mrg #define r %ret1
1743 1.1 mrg
1744 1.1 mrg #define a0__128a0 zdep a0,24,25,a0
1745 1.1 mrg #define a0__256a0 zdep a0,23,24,a0
1746 1.1 mrg #define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
1747 1.1 mrg #define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
1748 1.1 mrg #define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
1749 1.1 mrg #define b_n_ret_t0 b,n LREF(ret_t0)
1750 1.1 mrg #define b_e_shift b LREF(e_shift)
1751 1.1 mrg #define b_e_t0ma0 b LREF(e_t0ma0)
1752 1.1 mrg #define b_e_t0 b LREF(e_t0)
1753 1.1 mrg #define b_e_t0a0 b LREF(e_t0a0)
1754 1.1 mrg #define b_e_t02a0 b LREF(e_t02a0)
1755 1.1 mrg #define b_e_t04a0 b LREF(e_t04a0)
1756 1.1 mrg #define b_e_2t0 b LREF(e_2t0)
1757 1.1 mrg #define b_e_2t0a0 b LREF(e_2t0a0)
1758 1.1 mrg #define b_e_2t04a0 b LREF(e2t04a0)
1759 1.1 mrg #define b_e_3t0 b LREF(e_3t0)
1760 1.1 mrg #define b_e_4t0 b LREF(e_4t0)
1761 1.1 mrg #define b_e_4t0a0 b LREF(e_4t0a0)
1762 1.1 mrg #define b_e_4t08a0 b LREF(e4t08a0)
1763 1.1 mrg #define b_e_5t0 b LREF(e_5t0)
1764 1.1 mrg #define b_e_8t0 b LREF(e_8t0)
1765 1.1 mrg #define b_e_8t0a0 b LREF(e_8t0a0)
1766 1.1 mrg #define r__r_a0 add r,a0,r
1767 1.1 mrg #define r__r_2a0 sh1add a0,r,r
1768 1.1 mrg #define r__r_4a0 sh2add a0,r,r
1769 1.1 mrg #define r__r_8a0 sh3add a0,r,r
1770 1.1 mrg #define r__r_t0 add r,t0,r
1771 1.1 mrg #define r__r_2t0 sh1add t0,r,r
1772 1.1 mrg #define r__r_4t0 sh2add t0,r,r
1773 1.1 mrg #define r__r_8t0 sh3add t0,r,r
1774 1.1 mrg #define t0__3a0 sh1add a0,a0,t0
1775 1.1 mrg #define t0__4a0 sh2add a0,0,t0
1776 1.1 mrg #define t0__5a0 sh2add a0,a0,t0
1777 1.1 mrg #define t0__8a0 sh3add a0,0,t0
1778 1.1 mrg #define t0__9a0 sh3add a0,a0,t0
1779 1.1 mrg #define t0__16a0 zdep a0,27,28,t0
1780 1.1 mrg #define t0__32a0 zdep a0,26,27,t0
1781 1.1 mrg #define t0__64a0 zdep a0,25,26,t0
1782 1.1 mrg #define t0__128a0 zdep a0,24,25,t0
1783 1.1 mrg #define t0__t0ma0 sub t0,a0,t0
1784 1.1 mrg #define t0__t0_a0 add t0,a0,t0
1785 1.1 mrg #define t0__t0_2a0 sh1add a0,t0,t0
1786 1.1 mrg #define t0__t0_4a0 sh2add a0,t0,t0
1787 1.1 mrg #define t0__t0_8a0 sh3add a0,t0,t0
1788 1.1 mrg #define t0__2t0_a0 sh1add t0,a0,t0
1789 1.1 mrg #define t0__3t0 sh1add t0,t0,t0
1790 1.1 mrg #define t0__4t0 sh2add t0,0,t0
1791 1.1 mrg #define t0__4t0_a0 sh2add t0,a0,t0
1792 1.1 mrg #define t0__5t0 sh2add t0,t0,t0
1793 1.1 mrg #define t0__8t0 sh3add t0,0,t0
1794 1.1 mrg #define t0__8t0_a0 sh3add t0,a0,t0
1795 1.1 mrg #define t0__9t0 sh3add t0,t0,t0
1796 1.1 mrg #define t0__16t0 zdep t0,27,28,t0
1797 1.1 mrg #define t0__32t0 zdep t0,26,27,t0
1798 1.1 mrg #define t0__256a0 zdep a0,23,24,t0
1799 1.1 mrg
1800 1.1 mrg
1801 1.1 mrg SUBSPA_MILLI
1802 1.1 mrg ATTR_MILLI
1803 1.1 mrg .align 16
1804 1.1 mrg .proc
1805 1.1 mrg .callinfo millicode
1806 1.1 mrg .export $$mulI,millicode
1807 1.1 mrg GSYM($$mulI)
1808 1.1 mrg combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
1809 1.1 mrg copy 0,r /* zero out the result */
1810 1.1 mrg xor a0,a1,a0 /* swap a0 & a1 using the */
1811 1.1 mrg xor a0,a1,a1 /* old xor trick */
1812 1.1 mrg xor a0,a1,a0
1813 1.1 mrg LSYM(l4)
1814 1.1 mrg combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
1815 1.1 mrg zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1816 1.1 mrg sub,> 0,a1,t0 /* otherwise negate both and */
1817 1.1 mrg combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
1818 1.1 mrg sub 0,a0,a1
1819 1.1 mrg movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
1820 1.1 mrg
1821 1.1 mrg LSYM(l0) r__r_t0 /* add in this partial product */
1822 1.1 mrg LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
1823 1.1 mrg LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1824 1.1 mrg LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
1825 1.1 mrg extru a1,23,24,a1 /* a1 >>= 8 ****************** */
1826 1.1 mrg
1827 1.1 mrg /*16 insts before this. */
1828 1.1 mrg /* a0 <<= 8 ************************** */
1829 1.1 mrg LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
1830 1.1 mrg LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
1831 1.1 mrg LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
1832 1.1 mrg LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
1833 1.1 mrg LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
1834 1.1 mrg LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
1835 1.1 mrg LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1836 1.1 mrg LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
1837 1.1 mrg LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
1838 1.1 mrg LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
1839 1.1 mrg LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1840 1.1 mrg LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1841 1.1 mrg LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1842 1.1 mrg LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1843 1.1 mrg LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1844 1.1 mrg LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
1845 1.1 mrg LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1846 1.1 mrg LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
1847 1.1 mrg LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1848 1.1 mrg LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
1849 1.1 mrg LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1850 1.1 mrg LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1851 1.1 mrg LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1852 1.1 mrg LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1853 1.1 mrg LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1854 1.1 mrg LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1855 1.1 mrg LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1856 1.1 mrg LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
1857 1.1 mrg LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1858 1.1 mrg LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1859 1.1 mrg LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1860 1.1 mrg LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1861 1.1 mrg LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1862 1.1 mrg LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1863 1.1 mrg LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1864 1.1 mrg LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
1865 1.1 mrg LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1866 1.1 mrg LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1867 1.1 mrg LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1868 1.1 mrg LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1869 1.1 mrg LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1870 1.1 mrg LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1871 1.1 mrg LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1872 1.1 mrg LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1873 1.1 mrg LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1874 1.1 mrg LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1875 1.1 mrg LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
1876 1.1 mrg LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
1877 1.1 mrg LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
1878 1.1 mrg LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
1879 1.1 mrg LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1880 1.1 mrg LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
1881 1.1 mrg LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1882 1.1 mrg LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1883 1.1 mrg LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1884 1.1 mrg LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
1885 1.1 mrg LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1886 1.1 mrg LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
1887 1.1 mrg LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1888 1.1 mrg LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
1889 1.1 mrg LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1890 1.1 mrg LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1891 1.1 mrg LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1892 1.1 mrg LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1893 1.1 mrg LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1894 1.1 mrg LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1895 1.1 mrg LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1896 1.1 mrg LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1897 1.1 mrg LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1898 1.1 mrg LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1899 1.1 mrg LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
1900 1.1 mrg LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
1901 1.1 mrg LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1902 1.1 mrg LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
1903 1.1 mrg LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1904 1.1 mrg LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1905 1.1 mrg LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1906 1.1 mrg LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1907 1.1 mrg LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
1908 1.1 mrg LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1909 1.1 mrg LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
1910 1.1 mrg LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
1911 1.1 mrg LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1912 1.1 mrg LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1913 1.1 mrg LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1914 1.1 mrg LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1915 1.1 mrg LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1916 1.1 mrg LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
1917 1.1 mrg LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1918 1.1 mrg LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1919 1.1 mrg LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1920 1.1 mrg LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
1921 1.1 mrg LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1922 1.1 mrg LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
1923 1.1 mrg LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
1924 1.1 mrg LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1925 1.1 mrg LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1926 1.1 mrg LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1927 1.1 mrg LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
1928 1.1 mrg LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1929 1.1 mrg LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
1930 1.1 mrg LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1931 1.1 mrg LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
1932 1.1 mrg LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
1933 1.1 mrg LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1934 1.1 mrg LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1935 1.1 mrg LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
1936 1.1 mrg LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
1937 1.1 mrg LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1938 1.1 mrg LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1939 1.1 mrg LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
1940 1.1 mrg LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1941 1.1 mrg LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
1942 1.1 mrg LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
1943 1.1 mrg LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
1944 1.1 mrg LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
1945 1.1 mrg LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
1946 1.1 mrg LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
1947 1.1 mrg LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
1948 1.1 mrg LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
1949 1.1 mrg LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1950 1.1 mrg LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1951 1.1 mrg LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1952 1.1 mrg LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1953 1.1 mrg LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
1954 1.1 mrg LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
1955 1.1 mrg LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1956 1.1 mrg LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1957 1.1 mrg LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1958 1.1 mrg LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
1959 1.1 mrg LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1960 1.1 mrg LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1961 1.1 mrg LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1962 1.1 mrg LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1963 1.1 mrg LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1964 1.1 mrg LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
1965 1.1 mrg LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1966 1.1 mrg LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1967 1.1 mrg LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1968 1.1 mrg LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
1969 1.1 mrg LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
1970 1.1 mrg LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
1971 1.1 mrg LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
1972 1.1 mrg LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
1973 1.1 mrg LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
1974 1.1 mrg LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
1975 1.1 mrg LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1976 1.1 mrg LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1977 1.1 mrg LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1978 1.1 mrg LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1979 1.1 mrg LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1980 1.1 mrg LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1981 1.1 mrg LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1982 1.1 mrg LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1983 1.1 mrg LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1984 1.1 mrg LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
1985 1.1 mrg LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1986 1.1 mrg LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
1987 1.1 mrg LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
1988 1.1 mrg LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1989 1.1 mrg LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
1990 1.1 mrg LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1991 1.1 mrg LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
1992 1.1 mrg LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
1993 1.1 mrg LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
1994 1.1 mrg LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1995 1.1 mrg LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
1996 1.1 mrg LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1997 1.1 mrg LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1998 1.1 mrg LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
1999 1.1 mrg LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
2000 1.1 mrg LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
2001 1.1 mrg LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
2002 1.1 mrg LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
2003 1.1 mrg LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
2004 1.1 mrg LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
2005 1.1 mrg LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
2006 1.1 mrg LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
2007 1.1 mrg LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
2008 1.1 mrg LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
2009 1.1 mrg LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
2010 1.1 mrg LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
2011 1.1 mrg LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
2012 1.1 mrg LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
2013 1.1 mrg LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
2014 1.1 mrg LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
2015 1.1 mrg LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
2016 1.1 mrg LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
2017 1.1 mrg LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
2018 1.1 mrg LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
2019 1.1 mrg LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
2020 1.1 mrg LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
2021 1.1 mrg LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
2022 1.1 mrg LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
2023 1.1 mrg LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
2024 1.1 mrg LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
2025 1.1 mrg LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
2026 1.1 mrg LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
2027 1.1 mrg LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
2028 1.1 mrg LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
2029 1.1 mrg LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
2030 1.1 mrg LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
2031 1.1 mrg LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
2032 1.1 mrg LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
2033 1.1 mrg LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
2034 1.1 mrg LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
2035 1.1 mrg LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
2036 1.1 mrg LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
2037 1.1 mrg LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
2038 1.1 mrg LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
2039 1.1 mrg LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
2040 1.1 mrg LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
2041 1.1 mrg LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
2042 1.1 mrg LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
2043 1.1 mrg LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
2044 1.1 mrg LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
2045 1.1 mrg LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
2046 1.1 mrg LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
2047 1.1 mrg LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
2048 1.1 mrg LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
2049 1.1 mrg LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
2050 1.1 mrg LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
2051 1.1 mrg LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
2052 1.1 mrg LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
2053 1.1 mrg LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
2054 1.1 mrg LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
2055 1.1 mrg LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
2056 1.1 mrg LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
2057 1.1 mrg LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
2058 1.1 mrg LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
2059 1.1 mrg LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
2060 1.1 mrg LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
2061 1.1 mrg LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
2062 1.1 mrg LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
2063 1.1 mrg LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
2064 1.1 mrg LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
2065 1.1 mrg LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
2066 1.1 mrg LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
2067 1.1 mrg LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
2068 1.1 mrg LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
2069 1.1 mrg LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
2070 1.1 mrg LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
2071 1.1 mrg LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
2072 1.1 mrg LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
2073 1.1 mrg LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
2074 1.1 mrg LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
2075 1.1 mrg LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
2076 1.1 mrg LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
2077 1.1 mrg LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
2078 1.1 mrg LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
2079 1.1 mrg LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
2080 1.1 mrg LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
2081 1.1 mrg LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
2082 1.1 mrg LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
2083 1.1 mrg LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
2084 1.1 mrg LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
2085 1.1 mrg /*1040 insts before this. */
2086 1.1 mrg LSYM(ret_t0) MILLIRET
2087 1.1 mrg LSYM(e_t0) r__r_t0
2088 1.1 mrg LSYM(e_shift) a1_ne_0_b_l2
2089 1.1 mrg a0__256a0 /* a0 <<= 8 *********** */
2090 1.1 mrg MILLIRETN
2091 1.1 mrg LSYM(e_t0ma0) a1_ne_0_b_l0
2092 1.1 mrg t0__t0ma0
2093 1.1 mrg MILLIRET
2094 1.1 mrg r__r_t0
2095 1.1 mrg LSYM(e_t0a0) a1_ne_0_b_l0
2096 1.1 mrg t0__t0_a0
2097 1.1 mrg MILLIRET
2098 1.1 mrg r__r_t0
2099 1.1 mrg LSYM(e_t02a0) a1_ne_0_b_l0
2100 1.1 mrg t0__t0_2a0
2101 1.1 mrg MILLIRET
2102 1.1 mrg r__r_t0
2103 1.1 mrg LSYM(e_t04a0) a1_ne_0_b_l0
2104 1.1 mrg t0__t0_4a0
2105 1.1 mrg MILLIRET
2106 1.1 mrg r__r_t0
2107 1.1 mrg LSYM(e_2t0) a1_ne_0_b_l1
2108 1.1 mrg r__r_2t0
2109 1.1 mrg MILLIRETN
2110 1.1 mrg LSYM(e_2t0a0) a1_ne_0_b_l0
2111 1.1 mrg t0__2t0_a0
2112 1.1 mrg MILLIRET
2113 1.1 mrg r__r_t0
2114 1.1 mrg LSYM(e2t04a0) t0__t0_2a0
2115 1.1 mrg a1_ne_0_b_l1
2116 1.1 mrg r__r_2t0
2117 1.1 mrg MILLIRETN
2118 1.1 mrg LSYM(e_3t0) a1_ne_0_b_l0
2119 1.1 mrg t0__3t0
2120 1.1 mrg MILLIRET
2121 1.1 mrg r__r_t0
2122 1.1 mrg LSYM(e_4t0) a1_ne_0_b_l1
2123 1.1 mrg r__r_4t0
2124 1.1 mrg MILLIRETN
2125 1.1 mrg LSYM(e_4t0a0) a1_ne_0_b_l0
2126 1.1 mrg t0__4t0_a0
2127 1.1 mrg MILLIRET
2128 1.1 mrg r__r_t0
2129 1.1 mrg LSYM(e4t08a0) t0__t0_2a0
2130 1.1 mrg a1_ne_0_b_l1
2131 1.1 mrg r__r_4t0
2132 1.1 mrg MILLIRETN
2133 1.1 mrg LSYM(e_5t0) a1_ne_0_b_l0
2134 1.1 mrg t0__5t0
2135 1.1 mrg MILLIRET
2136 1.1 mrg r__r_t0
2137 1.1 mrg LSYM(e_8t0) a1_ne_0_b_l1
2138 1.1 mrg r__r_8t0
2139 1.1 mrg MILLIRETN
2140 1.1 mrg LSYM(e_8t0a0) a1_ne_0_b_l0
2141 1.1 mrg t0__8t0_a0
2142 1.1 mrg MILLIRET
2143 1.1 mrg r__r_t0
2144 1.1 mrg
2145 1.1 mrg .procend
2146 1.1 mrg .end
2147 1.1 mrg #endif
2148