milli64.S revision 1.1.1.5 1 /* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame (at) debian.org>
3 and Alan Modra <alan (at) linuxcare.com.au>.
4
5 Copyright (C) 2001-2018 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 Under Section 7 of GPL version 3, you are granted additional
20 permissions described in the GCC Runtime Library Exception, version
21 3.1, as published by the Free Software Foundation.
22
23 You should have received a copy of the GNU General Public License and
24 a copy of the GCC Runtime Library Exception along with this program;
25 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
26 <http://www.gnu.org/licenses/>. */
27
28 /* An executable stack is *not* required for these functions. */
29 #if defined(__ELF__) && defined(__linux__)
30 .section .note.GNU-stack,"",%progbits
31 .previous
32 #endif
33
34 #ifdef pa64
35 .level 2.0w
36 #endif
37
38 /* Hardware General Registers. */
39 r0: .reg %r0
40 r1: .reg %r1
41 r2: .reg %r2
42 r3: .reg %r3
43 r4: .reg %r4
44 r5: .reg %r5
45 r6: .reg %r6
46 r7: .reg %r7
47 r8: .reg %r8
48 r9: .reg %r9
49 r10: .reg %r10
50 r11: .reg %r11
51 r12: .reg %r12
52 r13: .reg %r13
53 r14: .reg %r14
54 r15: .reg %r15
55 r16: .reg %r16
56 r17: .reg %r17
57 r18: .reg %r18
58 r19: .reg %r19
59 r20: .reg %r20
60 r21: .reg %r21
61 r22: .reg %r22
62 r23: .reg %r23
63 r24: .reg %r24
64 r25: .reg %r25
65 r26: .reg %r26
66 r27: .reg %r27
67 r28: .reg %r28
68 r29: .reg %r29
69 r30: .reg %r30
70 r31: .reg %r31
71
72 /* Hardware Space Registers. */
73 sr0: .reg %sr0
74 sr1: .reg %sr1
75 sr2: .reg %sr2
76 sr3: .reg %sr3
77 sr4: .reg %sr4
78 sr5: .reg %sr5
79 sr6: .reg %sr6
80 sr7: .reg %sr7
81
82 /* Hardware Floating Point Registers. */
83 fr0: .reg %fr0
84 fr1: .reg %fr1
85 fr2: .reg %fr2
86 fr3: .reg %fr3
87 fr4: .reg %fr4
88 fr5: .reg %fr5
89 fr6: .reg %fr6
90 fr7: .reg %fr7
91 fr8: .reg %fr8
92 fr9: .reg %fr9
93 fr10: .reg %fr10
94 fr11: .reg %fr11
95 fr12: .reg %fr12
96 fr13: .reg %fr13
97 fr14: .reg %fr14
98 fr15: .reg %fr15
99
100 /* Hardware Control Registers. */
101 cr11: .reg %cr11
102 sar: .reg %cr11 /* Shift Amount Register */
103
104 /* Software Architecture General Registers. */
105 rp: .reg r2 /* return pointer */
106 #ifdef pa64
107 mrp: .reg r2 /* millicode return pointer */
108 #else
109 mrp: .reg r31 /* millicode return pointer */
110 #endif
111 ret0: .reg r28 /* return value */
112 ret1: .reg r29 /* return value (high part of double) */
113 sp: .reg r30 /* stack pointer */
114 dp: .reg r27 /* data pointer */
115 arg0: .reg r26 /* argument */
116 arg1: .reg r25 /* argument or high part of double argument */
117 arg2: .reg r24 /* argument */
118 arg3: .reg r23 /* argument or high part of double argument */
119
120 /* Software Architecture Space Registers. */
121 /* sr0 ; return link from BLE */
122 sret: .reg sr1 /* return value */
123 sarg: .reg sr1 /* argument */
124 /* sr4 ; PC SPACE tracker */
125 /* sr5 ; process private data */
126
127 /* Frame Offsets (millicode convention!) Used when calling other
128 millicode routines. Stack unwinding is dependent upon these
129 definitions. */
130 r31_slot: .equ -20 /* "current RP" slot */
131 sr0_slot: .equ -16 /* "static link" slot */
132 #if defined(pa64)
133 mrp_slot: .equ -16 /* "current RP" slot */
134 psp_slot: .equ -8 /* "previous SP" slot */
135 #else
136 mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
137 #endif
138
139
140 #define DEFINE(name,value)name: .EQU value
141 #define RDEFINE(name,value)name: .REG value
142 #ifdef milliext
143 #define MILLI_BE(lbl) BE lbl(sr7,r0)
144 #define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
145 #define MILLI_BLE(lbl) BLE lbl(sr7,r0)
146 #define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
147 #define MILLIRETN BE,n 0(sr0,mrp)
148 #define MILLIRET BE 0(sr0,mrp)
149 #define MILLI_RETN BE,n 0(sr0,mrp)
150 #define MILLI_RET BE 0(sr0,mrp)
151 #else
152 #define MILLI_BE(lbl) B lbl
153 #define MILLI_BEN(lbl) B,n lbl
154 #define MILLI_BLE(lbl) BL lbl,mrp
155 #define MILLI_BLEN(lbl) BL,n lbl,mrp
156 #define MILLIRETN BV,n 0(mrp)
157 #define MILLIRET BV 0(mrp)
158 #define MILLI_RETN BV,n 0(mrp)
159 #define MILLI_RET BV 0(mrp)
160 #endif
161
162 #ifdef __STDC__
163 #define CAT(a,b) a##b
164 #else
165 #define CAT(a,b) a/**/b
166 #endif
167
168 #ifdef ELF
169 #define SUBSPA_MILLI .section .text
170 #define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
171 #define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
172 #define ATTR_MILLI
173 #define SUBSPA_DATA .section .data
174 #define ATTR_DATA
175 #define GLOBAL $global$
176 #define GSYM(sym) !sym:
177 #define LSYM(sym) !CAT(.L,sym:)
178 #define LREF(sym) CAT(.L,sym)
179
180 #else
181
182 #ifdef coff
183 /* This used to be .milli but since link32 places different named
184 sections in different segments millicode ends up a long ways away
185 from .text (1meg?). This way they will be a lot closer.
186
187 The SUBSPA_MILLI_* specify locality sets for certain millicode
188 modules in order to ensure that modules that call one another are
189 placed close together. Without locality sets this is unlikely to
190 happen because of the Dynamite linker library search algorithm. We
191 want these modules close together so that short calls always reach
192 (we don't want to require long calls or use long call stubs). */
193
194 #define SUBSPA_MILLI .subspa .text
195 #define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
196 #define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
197 #define ATTR_MILLI .attr code,read,execute
198 #define SUBSPA_DATA .subspa .data
199 #define ATTR_DATA .attr init_data,read,write
200 #define GLOBAL _gp
201 #else
202 #define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
203 #define SUBSPA_MILLI_DIV SUBSPA_MILLI
204 #define SUBSPA_MILLI_MUL SUBSPA_MILLI
205 #define ATTR_MILLI
206 #define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
207 #define ATTR_DATA
208 #define GLOBAL $global$
209 #endif
210 #define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16
211
212 #define GSYM(sym) !sym
213 #define LSYM(sym) !CAT(L$,sym)
214 #define LREF(sym) CAT(L$,sym)
215 #endif
216
217 #ifdef L_dyncall
218 SUBSPA_MILLI
219 ATTR_DATA
220 GSYM($$dyncall)
221 .export $$dyncall,millicode
222 .proc
223 .callinfo millicode
224 .entry
225 #ifdef LINUX
226 extru,<> %r22,30,1,%r0 ; nullify if plabel bit set
227 bv,n %r0(%r22) ; branch to target
228 ldw -2(%r22),%r21 ; load address of target
229 bv %r0(%r21) ; branch to the real target
230 ldw 2(%r22),%r19 ; load new LTP value
231 #else
232 bb,>=,n %r22,30,LREF(1) ; branch if not plabel address
233 ldw -2(%r22),%r21 ; load address of target to r21
234 ldsid (%sr0,%r21),%r1 ; get the "space ident" selected by r21
235 ldw 2(%r22),%r19 ; load new LTP value
236 mtsp %r1,%sr0 ; move that space identifier into sr0
237 be 0(%sr0,%r21) ; branch to the real target
238 stw %r2,-24(%r30) ; save return address into frame marker
239 LSYM(1)
240 ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22
241 mtsp %r1,%sr0 ; move that space identifier into sr0
242 be 0(%sr0,%r22) ; branch to the target
243 stw %r2,-24(%r30) ; save return address into frame marker
244 #endif
245 .exit
246 .procend
247 #endif
248
249 #ifdef L_divI
250 /* ROUTINES: $$divI, $$divoI
251
252 Single precision divide for signed binary integers.
253
254 The quotient is truncated towards zero.
255 The sign of the quotient is the XOR of the signs of the dividend and
256 divisor.
257 Divide by zero is trapped.
258 Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
259
260 INPUT REGISTERS:
261 . arg0 == dividend
262 . arg1 == divisor
263 . mrp == return pc
264 . sr0 == return space when called externally
265
266 OUTPUT REGISTERS:
267 . arg0 = undefined
268 . arg1 = undefined
269 . ret1 = quotient
270
271 OTHER REGISTERS AFFECTED:
272 . r1 = undefined
273
274 SIDE EFFECTS:
275 . Causes a trap under the following conditions:
276 . divisor is zero (traps with ADDIT,= 0,25,0)
277 . dividend==-2**31 and divisor==-1 and routine is $$divoI
278 . (traps with ADDO 26,25,0)
279 . Changes memory at the following places:
280 . NONE
281
282 PERMISSIBLE CONTEXT:
283 . Unwindable.
284 . Suitable for internal or external millicode.
285 . Assumes the special millicode register conventions.
286
287 DISCUSSION:
288 . Branchs to other millicode routines using BE
289 . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
290 .
291 . For selected divisors, calls a divide by constant routine written by
292 . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
293 .
294 . The only overflow case is -2**31 divided by -1.
295 . Both routines return -2**31 but only $$divoI traps. */
296
297 RDEFINE(temp,r1)
298 RDEFINE(retreg,ret1) /* r29 */
299 RDEFINE(temp1,arg0)
300 SUBSPA_MILLI_DIV
301 ATTR_MILLI
302 .import $$divI_2,millicode
303 .import $$divI_3,millicode
304 .import $$divI_4,millicode
305 .import $$divI_5,millicode
306 .import $$divI_6,millicode
307 .import $$divI_7,millicode
308 .import $$divI_8,millicode
309 .import $$divI_9,millicode
310 .import $$divI_10,millicode
311 .import $$divI_12,millicode
312 .import $$divI_14,millicode
313 .import $$divI_15,millicode
314 .export $$divI,millicode
315 .export $$divoI,millicode
316 .proc
317 .callinfo millicode
318 .entry
319 GSYM($$divoI)
320 comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
321 GSYM($$divI)
322 ldo -1(arg1),temp /* is there at most one bit set ? */
323 and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
324 addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
325 b,n LREF(neg_denom)
326 LSYM(pow2)
327 addi,>= 0,arg0,retreg /* if numerator is negative, add the */
328 add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
329 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
330 extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
331 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
332 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
333 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
334 extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
335 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
336 ldi 0xaa,temp /* setup 0xaa in temp */
337 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
338 extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
339 and,= arg1,temp1,r0 /* test denominator with 0xcc */
340 extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
341 and,= arg1,temp,r0 /* test denominator with 0xaa */
342 extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
343 MILLIRETN
344 LSYM(neg_denom)
345 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
346 b,n LREF(regular_seq)
347 sub r0,arg1,temp /* make denominator positive */
348 comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
349 ldo -1(temp),retreg /* is there at most one bit set ? */
350 and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
351 b,n LREF(regular_seq)
352 sub r0,arg0,retreg /* negate numerator */
353 comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
354 copy retreg,arg0 /* set up arg0, arg1 and temp */
355 copy temp,arg1 /* before branching to pow2 */
356 b LREF(pow2)
357 ldo -1(arg1),temp
358 LSYM(regular_seq)
359 comib,>>=,n 15,arg1,LREF(small_divisor)
360 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
361 LSYM(normal)
362 subi 0,retreg,retreg /* make it positive */
363 sub 0,arg1,temp /* clear carry, */
364 /* negate the divisor */
365 ds 0,temp,0 /* set V-bit to the comple- */
366 /* ment of the divisor sign */
367 add retreg,retreg,retreg /* shift msb bit into carry */
368 ds r0,arg1,temp /* 1st divide step, if no carry */
369 addc retreg,retreg,retreg /* shift retreg with/into carry */
370 ds temp,arg1,temp /* 2nd divide step */
371 addc retreg,retreg,retreg /* shift retreg with/into carry */
372 ds temp,arg1,temp /* 3rd divide step */
373 addc retreg,retreg,retreg /* shift retreg with/into carry */
374 ds temp,arg1,temp /* 4th divide step */
375 addc retreg,retreg,retreg /* shift retreg with/into carry */
376 ds temp,arg1,temp /* 5th divide step */
377 addc retreg,retreg,retreg /* shift retreg with/into carry */
378 ds temp,arg1,temp /* 6th divide step */
379 addc retreg,retreg,retreg /* shift retreg with/into carry */
380 ds temp,arg1,temp /* 7th divide step */
381 addc retreg,retreg,retreg /* shift retreg with/into carry */
382 ds temp,arg1,temp /* 8th divide step */
383 addc retreg,retreg,retreg /* shift retreg with/into carry */
384 ds temp,arg1,temp /* 9th divide step */
385 addc retreg,retreg,retreg /* shift retreg with/into carry */
386 ds temp,arg1,temp /* 10th divide step */
387 addc retreg,retreg,retreg /* shift retreg with/into carry */
388 ds temp,arg1,temp /* 11th divide step */
389 addc retreg,retreg,retreg /* shift retreg with/into carry */
390 ds temp,arg1,temp /* 12th divide step */
391 addc retreg,retreg,retreg /* shift retreg with/into carry */
392 ds temp,arg1,temp /* 13th divide step */
393 addc retreg,retreg,retreg /* shift retreg with/into carry */
394 ds temp,arg1,temp /* 14th divide step */
395 addc retreg,retreg,retreg /* shift retreg with/into carry */
396 ds temp,arg1,temp /* 15th divide step */
397 addc retreg,retreg,retreg /* shift retreg with/into carry */
398 ds temp,arg1,temp /* 16th divide step */
399 addc retreg,retreg,retreg /* shift retreg with/into carry */
400 ds temp,arg1,temp /* 17th divide step */
401 addc retreg,retreg,retreg /* shift retreg with/into carry */
402 ds temp,arg1,temp /* 18th divide step */
403 addc retreg,retreg,retreg /* shift retreg with/into carry */
404 ds temp,arg1,temp /* 19th divide step */
405 addc retreg,retreg,retreg /* shift retreg with/into carry */
406 ds temp,arg1,temp /* 20th divide step */
407 addc retreg,retreg,retreg /* shift retreg with/into carry */
408 ds temp,arg1,temp /* 21st divide step */
409 addc retreg,retreg,retreg /* shift retreg with/into carry */
410 ds temp,arg1,temp /* 22nd divide step */
411 addc retreg,retreg,retreg /* shift retreg with/into carry */
412 ds temp,arg1,temp /* 23rd divide step */
413 addc retreg,retreg,retreg /* shift retreg with/into carry */
414 ds temp,arg1,temp /* 24th divide step */
415 addc retreg,retreg,retreg /* shift retreg with/into carry */
416 ds temp,arg1,temp /* 25th divide step */
417 addc retreg,retreg,retreg /* shift retreg with/into carry */
418 ds temp,arg1,temp /* 26th divide step */
419 addc retreg,retreg,retreg /* shift retreg with/into carry */
420 ds temp,arg1,temp /* 27th divide step */
421 addc retreg,retreg,retreg /* shift retreg with/into carry */
422 ds temp,arg1,temp /* 28th divide step */
423 addc retreg,retreg,retreg /* shift retreg with/into carry */
424 ds temp,arg1,temp /* 29th divide step */
425 addc retreg,retreg,retreg /* shift retreg with/into carry */
426 ds temp,arg1,temp /* 30th divide step */
427 addc retreg,retreg,retreg /* shift retreg with/into carry */
428 ds temp,arg1,temp /* 31st divide step */
429 addc retreg,retreg,retreg /* shift retreg with/into carry */
430 ds temp,arg1,temp /* 32nd divide step, */
431 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
432 xor,>= arg0,arg1,0 /* get correct sign of quotient */
433 sub 0,retreg,retreg /* based on operand signs */
434 MILLIRETN
435 nop
436
437 LSYM(small_divisor)
438
439 #if defined(pa64)
440 /* Clear the upper 32 bits of the arg1 register. We are working with */
441 /* small divisors (and 32-bit integers) We must not be mislead */
442 /* by "1" bits left in the upper 32 bits. */
443 depd %r0,31,32,%r25
444 #endif
445 blr,n arg1,r0
446 nop
447 /* table for divisor == 0,1, ... ,15 */
448 addit,= 0,arg1,r0 /* trap if divisor == 0 */
449 nop
450 MILLIRET /* divisor == 1 */
451 copy arg0,retreg
452 MILLI_BEN($$divI_2) /* divisor == 2 */
453 nop
454 MILLI_BEN($$divI_3) /* divisor == 3 */
455 nop
456 MILLI_BEN($$divI_4) /* divisor == 4 */
457 nop
458 MILLI_BEN($$divI_5) /* divisor == 5 */
459 nop
460 MILLI_BEN($$divI_6) /* divisor == 6 */
461 nop
462 MILLI_BEN($$divI_7) /* divisor == 7 */
463 nop
464 MILLI_BEN($$divI_8) /* divisor == 8 */
465 nop
466 MILLI_BEN($$divI_9) /* divisor == 9 */
467 nop
468 MILLI_BEN($$divI_10) /* divisor == 10 */
469 nop
470 b LREF(normal) /* divisor == 11 */
471 add,>= 0,arg0,retreg
472 MILLI_BEN($$divI_12) /* divisor == 12 */
473 nop
474 b LREF(normal) /* divisor == 13 */
475 add,>= 0,arg0,retreg
476 MILLI_BEN($$divI_14) /* divisor == 14 */
477 nop
478 MILLI_BEN($$divI_15) /* divisor == 15 */
479 nop
480
481 LSYM(negative1)
482 sub 0,arg0,retreg /* result is negation of dividend */
483 MILLIRET
484 addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
485 .exit
486 .procend
487 .end
488 #endif
489
490 #ifdef L_divU
491 /* ROUTINE: $$divU
492 .
493 . Single precision divide for unsigned integers.
494 .
495 . Quotient is truncated towards zero.
496 . Traps on divide by zero.
497
498 INPUT REGISTERS:
499 . arg0 == dividend
500 . arg1 == divisor
501 . mrp == return pc
502 . sr0 == return space when called externally
503
504 OUTPUT REGISTERS:
505 . arg0 = undefined
506 . arg1 = undefined
507 . ret1 = quotient
508
509 OTHER REGISTERS AFFECTED:
510 . r1 = undefined
511
512 SIDE EFFECTS:
513 . Causes a trap under the following conditions:
514 . divisor is zero
515 . Changes memory at the following places:
516 . NONE
517
518 PERMISSIBLE CONTEXT:
519 . Unwindable.
520 . Does not create a stack frame.
521 . Suitable for internal or external millicode.
522 . Assumes the special millicode register conventions.
523
524 DISCUSSION:
525 . Branchs to other millicode routines using BE:
526 . $$divU_# for 3,5,6,7,9,10,12,14,15
527 .
528 . For selected small divisors calls the special divide by constant
529 . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
530
531 RDEFINE(temp,r1)
532 RDEFINE(retreg,ret1) /* r29 */
533 RDEFINE(temp1,arg0)
534 SUBSPA_MILLI_DIV
535 ATTR_MILLI
536 .export $$divU,millicode
537 .import $$divU_3,millicode
538 .import $$divU_5,millicode
539 .import $$divU_6,millicode
540 .import $$divU_7,millicode
541 .import $$divU_9,millicode
542 .import $$divU_10,millicode
543 .import $$divU_12,millicode
544 .import $$divU_14,millicode
545 .import $$divU_15,millicode
546 .proc
547 .callinfo millicode
548 .entry
549 GSYM($$divU)
550 /* The subtract is not nullified since it does no harm and can be used
551 by the two cases that branch back to "normal". */
552 ldo -1(arg1),temp /* is there at most one bit set ? */
553 and,= arg1,temp,r0 /* if so, denominator is power of 2 */
554 b LREF(regular_seq)
555 addit,= 0,arg1,0 /* trap for zero dvr */
556 copy arg0,retreg
557 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
558 extru retreg,15,16,retreg /* retreg = retreg >> 16 */
559 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
560 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
561 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
562 extru retreg,23,24,retreg /* retreg = retreg >> 8 */
563 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
564 ldi 0xaa,temp /* setup 0xaa in temp */
565 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
566 extru retreg,27,28,retreg /* retreg = retreg >> 4 */
567 and,= arg1,temp1,r0 /* test denominator with 0xcc */
568 extru retreg,29,30,retreg /* retreg = retreg >> 2 */
569 and,= arg1,temp,r0 /* test denominator with 0xaa */
570 extru retreg,30,31,retreg /* retreg = retreg >> 1 */
571 MILLIRETN
572 nop
573 LSYM(regular_seq)
574 comib,>= 15,arg1,LREF(special_divisor)
575 subi 0,arg1,temp /* clear carry, negate the divisor */
576 ds r0,temp,r0 /* set V-bit to 1 */
577 LSYM(normal)
578 add arg0,arg0,retreg /* shift msb bit into carry */
579 ds r0,arg1,temp /* 1st divide step, if no carry */
580 addc retreg,retreg,retreg /* shift retreg with/into carry */
581 ds temp,arg1,temp /* 2nd divide step */
582 addc retreg,retreg,retreg /* shift retreg with/into carry */
583 ds temp,arg1,temp /* 3rd divide step */
584 addc retreg,retreg,retreg /* shift retreg with/into carry */
585 ds temp,arg1,temp /* 4th divide step */
586 addc retreg,retreg,retreg /* shift retreg with/into carry */
587 ds temp,arg1,temp /* 5th divide step */
588 addc retreg,retreg,retreg /* shift retreg with/into carry */
589 ds temp,arg1,temp /* 6th divide step */
590 addc retreg,retreg,retreg /* shift retreg with/into carry */
591 ds temp,arg1,temp /* 7th divide step */
592 addc retreg,retreg,retreg /* shift retreg with/into carry */
593 ds temp,arg1,temp /* 8th divide step */
594 addc retreg,retreg,retreg /* shift retreg with/into carry */
595 ds temp,arg1,temp /* 9th divide step */
596 addc retreg,retreg,retreg /* shift retreg with/into carry */
597 ds temp,arg1,temp /* 10th divide step */
598 addc retreg,retreg,retreg /* shift retreg with/into carry */
599 ds temp,arg1,temp /* 11th divide step */
600 addc retreg,retreg,retreg /* shift retreg with/into carry */
601 ds temp,arg1,temp /* 12th divide step */
602 addc retreg,retreg,retreg /* shift retreg with/into carry */
603 ds temp,arg1,temp /* 13th divide step */
604 addc retreg,retreg,retreg /* shift retreg with/into carry */
605 ds temp,arg1,temp /* 14th divide step */
606 addc retreg,retreg,retreg /* shift retreg with/into carry */
607 ds temp,arg1,temp /* 15th divide step */
608 addc retreg,retreg,retreg /* shift retreg with/into carry */
609 ds temp,arg1,temp /* 16th divide step */
610 addc retreg,retreg,retreg /* shift retreg with/into carry */
611 ds temp,arg1,temp /* 17th divide step */
612 addc retreg,retreg,retreg /* shift retreg with/into carry */
613 ds temp,arg1,temp /* 18th divide step */
614 addc retreg,retreg,retreg /* shift retreg with/into carry */
615 ds temp,arg1,temp /* 19th divide step */
616 addc retreg,retreg,retreg /* shift retreg with/into carry */
617 ds temp,arg1,temp /* 20th divide step */
618 addc retreg,retreg,retreg /* shift retreg with/into carry */
619 ds temp,arg1,temp /* 21st divide step */
620 addc retreg,retreg,retreg /* shift retreg with/into carry */
621 ds temp,arg1,temp /* 22nd divide step */
622 addc retreg,retreg,retreg /* shift retreg with/into carry */
623 ds temp,arg1,temp /* 23rd divide step */
624 addc retreg,retreg,retreg /* shift retreg with/into carry */
625 ds temp,arg1,temp /* 24th divide step */
626 addc retreg,retreg,retreg /* shift retreg with/into carry */
627 ds temp,arg1,temp /* 25th divide step */
628 addc retreg,retreg,retreg /* shift retreg with/into carry */
629 ds temp,arg1,temp /* 26th divide step */
630 addc retreg,retreg,retreg /* shift retreg with/into carry */
631 ds temp,arg1,temp /* 27th divide step */
632 addc retreg,retreg,retreg /* shift retreg with/into carry */
633 ds temp,arg1,temp /* 28th divide step */
634 addc retreg,retreg,retreg /* shift retreg with/into carry */
635 ds temp,arg1,temp /* 29th divide step */
636 addc retreg,retreg,retreg /* shift retreg with/into carry */
637 ds temp,arg1,temp /* 30th divide step */
638 addc retreg,retreg,retreg /* shift retreg with/into carry */
639 ds temp,arg1,temp /* 31st divide step */
640 addc retreg,retreg,retreg /* shift retreg with/into carry */
641 ds temp,arg1,temp /* 32nd divide step, */
642 MILLIRET
643 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
644
645 /* Handle the cases where divisor is a small constant or has high bit on. */
646 LSYM(special_divisor)
647 /* blr arg1,r0 */
648 /* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
649
650 /* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
651 generating such a blr, comib sequence. A problem in nullification. So I
652 rewrote this code. */
653
654 #if defined(pa64)
655 /* Clear the upper 32 bits of the arg1 register. We are working with
656 small divisors (and 32-bit unsigned integers) We must not be mislead
657 by "1" bits left in the upper 32 bits. */
658 depd %r0,31,32,%r25
659 #endif
660 comib,> 0,arg1,LREF(big_divisor)
661 nop
662 blr arg1,r0
663 nop
664
665 LSYM(zero_divisor) /* this label is here to provide external visibility */
666 addit,= 0,arg1,0 /* trap for zero dvr */
667 nop
668 MILLIRET /* divisor == 1 */
669 copy arg0,retreg
670 MILLIRET /* divisor == 2 */
671 extru arg0,30,31,retreg
672 MILLI_BEN($$divU_3) /* divisor == 3 */
673 nop
674 MILLIRET /* divisor == 4 */
675 extru arg0,29,30,retreg
676 MILLI_BEN($$divU_5) /* divisor == 5 */
677 nop
678 MILLI_BEN($$divU_6) /* divisor == 6 */
679 nop
680 MILLI_BEN($$divU_7) /* divisor == 7 */
681 nop
682 MILLIRET /* divisor == 8 */
683 extru arg0,28,29,retreg
684 MILLI_BEN($$divU_9) /* divisor == 9 */
685 nop
686 MILLI_BEN($$divU_10) /* divisor == 10 */
687 nop
688 b LREF(normal) /* divisor == 11 */
689 ds r0,temp,r0 /* set V-bit to 1 */
690 MILLI_BEN($$divU_12) /* divisor == 12 */
691 nop
692 b LREF(normal) /* divisor == 13 */
693 ds r0,temp,r0 /* set V-bit to 1 */
694 MILLI_BEN($$divU_14) /* divisor == 14 */
695 nop
696 MILLI_BEN($$divU_15) /* divisor == 15 */
697 nop
698
699 /* Handle the case where the high bit is on in the divisor.
700 Compute: if( dividend>=divisor) quotient=1; else quotient=0;
701 Note: dividend>==divisor iff dividend-divisor does not borrow
702 and not borrow iff carry. */
703 LSYM(big_divisor)
704 sub arg0,arg1,r0
705 MILLIRET
706 addc r0,r0,retreg
707 .exit
708 .procend
709 .end
710 #endif
711
712 #ifdef L_remI
713 /* ROUTINE: $$remI
714
715 DESCRIPTION:
716 . $$remI returns the remainder of the division of two signed 32-bit
717 . integers. The sign of the remainder is the same as the sign of
718 . the dividend.
719
720
721 INPUT REGISTERS:
722 . arg0 == dividend
723 . arg1 == divisor
724 . mrp == return pc
725 . sr0 == return space when called externally
726
727 OUTPUT REGISTERS:
728 . arg0 = destroyed
729 . arg1 = destroyed
730 . ret1 = remainder
731
732 OTHER REGISTERS AFFECTED:
733 . r1 = undefined
734
735 SIDE EFFECTS:
736 . Causes a trap under the following conditions: DIVIDE BY ZERO
737 . Changes memory at the following places: NONE
738
739 PERMISSIBLE CONTEXT:
740 . Unwindable
741 . Does not create a stack frame
742 . Is usable for internal or external microcode
743
744 DISCUSSION:
745 . Calls other millicode routines via mrp: NONE
746 . Calls other millicode routines: NONE */
747
748 RDEFINE(tmp,r1)
749 RDEFINE(retreg,ret1)
750
751 SUBSPA_MILLI
752 ATTR_MILLI
753 .proc
754 .callinfo millicode
755 .entry
756 GSYM($$remI)
757 GSYM($$remoI)
758 .export $$remI,MILLICODE
759 .export $$remoI,MILLICODE
760 ldo -1(arg1),tmp /* is there at most one bit set ? */
761 and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
762 addi,> 0,arg1,r0 /* if denominator > 0, use power */
763 /* of 2 */
764 b,n LREF(neg_denom)
765 LSYM(pow2)
766 comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
767 and arg0,tmp,retreg /* get the result */
768 MILLIRETN
769 LSYM(neg_num)
770 subi 0,arg0,arg0 /* negate numerator */
771 and arg0,tmp,retreg /* get the result */
772 subi 0,retreg,retreg /* negate result */
773 MILLIRETN
774 LSYM(neg_denom)
775 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
776 /* of 2 */
777 b,n LREF(regular_seq)
778 sub r0,arg1,tmp /* make denominator positive */
779 comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
780 ldo -1(tmp),retreg /* is there at most one bit set ? */
781 and,= tmp,retreg,r0 /* if not, go to regular_seq */
782 b,n LREF(regular_seq)
783 comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
784 and arg0,retreg,retreg
785 MILLIRETN
786 LSYM(neg_num_2)
787 subi 0,arg0,tmp /* test against 0x80000000 */
788 and tmp,retreg,retreg
789 subi 0,retreg,retreg
790 MILLIRETN
791 LSYM(regular_seq)
792 addit,= 0,arg1,0 /* trap if div by zero */
793 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
794 sub 0,retreg,retreg /* make it positive */
795 sub 0,arg1, tmp /* clear carry, */
796 /* negate the divisor */
797 ds 0, tmp,0 /* set V-bit to the comple- */
798 /* ment of the divisor sign */
799 or 0,0, tmp /* clear tmp */
800 add retreg,retreg,retreg /* shift msb bit into carry */
801 ds tmp,arg1, tmp /* 1st divide step, if no carry */
802 /* out, msb of quotient = 0 */
803 addc retreg,retreg,retreg /* shift retreg with/into carry */
804 LSYM(t1)
805 ds tmp,arg1, tmp /* 2nd divide step */
806 addc retreg,retreg,retreg /* shift retreg with/into carry */
807 ds tmp,arg1, tmp /* 3rd divide step */
808 addc retreg,retreg,retreg /* shift retreg with/into carry */
809 ds tmp,arg1, tmp /* 4th divide step */
810 addc retreg,retreg,retreg /* shift retreg with/into carry */
811 ds tmp,arg1, tmp /* 5th divide step */
812 addc retreg,retreg,retreg /* shift retreg with/into carry */
813 ds tmp,arg1, tmp /* 6th divide step */
814 addc retreg,retreg,retreg /* shift retreg with/into carry */
815 ds tmp,arg1, tmp /* 7th divide step */
816 addc retreg,retreg,retreg /* shift retreg with/into carry */
817 ds tmp,arg1, tmp /* 8th divide step */
818 addc retreg,retreg,retreg /* shift retreg with/into carry */
819 ds tmp,arg1, tmp /* 9th divide step */
820 addc retreg,retreg,retreg /* shift retreg with/into carry */
821 ds tmp,arg1, tmp /* 10th divide step */
822 addc retreg,retreg,retreg /* shift retreg with/into carry */
823 ds tmp,arg1, tmp /* 11th divide step */
824 addc retreg,retreg,retreg /* shift retreg with/into carry */
825 ds tmp,arg1, tmp /* 12th divide step */
826 addc retreg,retreg,retreg /* shift retreg with/into carry */
827 ds tmp,arg1, tmp /* 13th divide step */
828 addc retreg,retreg,retreg /* shift retreg with/into carry */
829 ds tmp,arg1, tmp /* 14th divide step */
830 addc retreg,retreg,retreg /* shift retreg with/into carry */
831 ds tmp,arg1, tmp /* 15th divide step */
832 addc retreg,retreg,retreg /* shift retreg with/into carry */
833 ds tmp,arg1, tmp /* 16th divide step */
834 addc retreg,retreg,retreg /* shift retreg with/into carry */
835 ds tmp,arg1, tmp /* 17th divide step */
836 addc retreg,retreg,retreg /* shift retreg with/into carry */
837 ds tmp,arg1, tmp /* 18th divide step */
838 addc retreg,retreg,retreg /* shift retreg with/into carry */
839 ds tmp,arg1, tmp /* 19th divide step */
840 addc retreg,retreg,retreg /* shift retreg with/into carry */
841 ds tmp,arg1, tmp /* 20th divide step */
842 addc retreg,retreg,retreg /* shift retreg with/into carry */
843 ds tmp,arg1, tmp /* 21st divide step */
844 addc retreg,retreg,retreg /* shift retreg with/into carry */
845 ds tmp,arg1, tmp /* 22nd divide step */
846 addc retreg,retreg,retreg /* shift retreg with/into carry */
847 ds tmp,arg1, tmp /* 23rd divide step */
848 addc retreg,retreg,retreg /* shift retreg with/into carry */
849 ds tmp,arg1, tmp /* 24th divide step */
850 addc retreg,retreg,retreg /* shift retreg with/into carry */
851 ds tmp,arg1, tmp /* 25th divide step */
852 addc retreg,retreg,retreg /* shift retreg with/into carry */
853 ds tmp,arg1, tmp /* 26th divide step */
854 addc retreg,retreg,retreg /* shift retreg with/into carry */
855 ds tmp,arg1, tmp /* 27th divide step */
856 addc retreg,retreg,retreg /* shift retreg with/into carry */
857 ds tmp,arg1, tmp /* 28th divide step */
858 addc retreg,retreg,retreg /* shift retreg with/into carry */
859 ds tmp,arg1, tmp /* 29th divide step */
860 addc retreg,retreg,retreg /* shift retreg with/into carry */
861 ds tmp,arg1, tmp /* 30th divide step */
862 addc retreg,retreg,retreg /* shift retreg with/into carry */
863 ds tmp,arg1, tmp /* 31st divide step */
864 addc retreg,retreg,retreg /* shift retreg with/into carry */
865 ds tmp,arg1, tmp /* 32nd divide step, */
866 addc retreg,retreg,retreg /* shift last bit into retreg */
867 movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
868 add,< arg1,0,0 /* if arg1 > 0, add arg1 */
869 add,tr tmp,arg1,retreg /* for correcting remainder tmp */
870 sub tmp,arg1,retreg /* else add absolute value arg1 */
871 LSYM(finish)
872 add,>= arg0,0,0 /* set sign of remainder */
873 sub 0,retreg,retreg /* to sign of dividend */
874 MILLIRET
875 nop
876 .exit
877 .procend
878 #ifdef milliext
879 .origin 0x00000200
880 #endif
881 .end
882 #endif
883
884 #ifdef L_remU
885 /* ROUTINE: $$remU
886 . Single precision divide for remainder with unsigned binary integers.
887 .
888 . The remainder must be dividend-(dividend/divisor)*divisor.
889 . Divide by zero is trapped.
890
891 INPUT REGISTERS:
892 . arg0 == dividend
893 . arg1 == divisor
894 . mrp == return pc
895 . sr0 == return space when called externally
896
897 OUTPUT REGISTERS:
898 . arg0 = undefined
899 . arg1 = undefined
900 . ret1 = remainder
901
902 OTHER REGISTERS AFFECTED:
903 . r1 = undefined
904
905 SIDE EFFECTS:
906 . Causes a trap under the following conditions: DIVIDE BY ZERO
907 . Changes memory at the following places: NONE
908
909 PERMISSIBLE CONTEXT:
910 . Unwindable.
911 . Does not create a stack frame.
912 . Suitable for internal or external millicode.
913 . Assumes the special millicode register conventions.
914
915 DISCUSSION:
916 . Calls other millicode routines using mrp: NONE
917 . Calls other millicode routines: NONE */
918
919
920 RDEFINE(temp,r1)
921 RDEFINE(rmndr,ret1) /* r29 */
922 SUBSPA_MILLI
923 ATTR_MILLI
924 .export $$remU,millicode
925 .proc
926 .callinfo millicode
927 .entry
928 GSYM($$remU)
929 ldo -1(arg1),temp /* is there at most one bit set ? */
930 and,= arg1,temp,r0 /* if not, don't use power of 2 */
931 b LREF(regular_seq)
932 addit,= 0,arg1,r0 /* trap on div by zero */
933 and arg0,temp,rmndr /* get the result for power of 2 */
934 MILLIRETN
935 LSYM(regular_seq)
936 comib,>=,n 0,arg1,LREF(special_case)
937 subi 0,arg1,rmndr /* clear carry, negate the divisor */
938 ds r0,rmndr,r0 /* set V-bit to 1 */
939 add arg0,arg0,temp /* shift msb bit into carry */
940 ds r0,arg1,rmndr /* 1st divide step, if no carry */
941 addc temp,temp,temp /* shift temp with/into carry */
942 ds rmndr,arg1,rmndr /* 2nd divide step */
943 addc temp,temp,temp /* shift temp with/into carry */
944 ds rmndr,arg1,rmndr /* 3rd divide step */
945 addc temp,temp,temp /* shift temp with/into carry */
946 ds rmndr,arg1,rmndr /* 4th divide step */
947 addc temp,temp,temp /* shift temp with/into carry */
948 ds rmndr,arg1,rmndr /* 5th divide step */
949 addc temp,temp,temp /* shift temp with/into carry */
950 ds rmndr,arg1,rmndr /* 6th divide step */
951 addc temp,temp,temp /* shift temp with/into carry */
952 ds rmndr,arg1,rmndr /* 7th divide step */
953 addc temp,temp,temp /* shift temp with/into carry */
954 ds rmndr,arg1,rmndr /* 8th divide step */
955 addc temp,temp,temp /* shift temp with/into carry */
956 ds rmndr,arg1,rmndr /* 9th divide step */
957 addc temp,temp,temp /* shift temp with/into carry */
958 ds rmndr,arg1,rmndr /* 10th divide step */
959 addc temp,temp,temp /* shift temp with/into carry */
960 ds rmndr,arg1,rmndr /* 11th divide step */
961 addc temp,temp,temp /* shift temp with/into carry */
962 ds rmndr,arg1,rmndr /* 12th divide step */
963 addc temp,temp,temp /* shift temp with/into carry */
964 ds rmndr,arg1,rmndr /* 13th divide step */
965 addc temp,temp,temp /* shift temp with/into carry */
966 ds rmndr,arg1,rmndr /* 14th divide step */
967 addc temp,temp,temp /* shift temp with/into carry */
968 ds rmndr,arg1,rmndr /* 15th divide step */
969 addc temp,temp,temp /* shift temp with/into carry */
970 ds rmndr,arg1,rmndr /* 16th divide step */
971 addc temp,temp,temp /* shift temp with/into carry */
972 ds rmndr,arg1,rmndr /* 17th divide step */
973 addc temp,temp,temp /* shift temp with/into carry */
974 ds rmndr,arg1,rmndr /* 18th divide step */
975 addc temp,temp,temp /* shift temp with/into carry */
976 ds rmndr,arg1,rmndr /* 19th divide step */
977 addc temp,temp,temp /* shift temp with/into carry */
978 ds rmndr,arg1,rmndr /* 20th divide step */
979 addc temp,temp,temp /* shift temp with/into carry */
980 ds rmndr,arg1,rmndr /* 21st divide step */
981 addc temp,temp,temp /* shift temp with/into carry */
982 ds rmndr,arg1,rmndr /* 22nd divide step */
983 addc temp,temp,temp /* shift temp with/into carry */
984 ds rmndr,arg1,rmndr /* 23rd divide step */
985 addc temp,temp,temp /* shift temp with/into carry */
986 ds rmndr,arg1,rmndr /* 24th divide step */
987 addc temp,temp,temp /* shift temp with/into carry */
988 ds rmndr,arg1,rmndr /* 25th divide step */
989 addc temp,temp,temp /* shift temp with/into carry */
990 ds rmndr,arg1,rmndr /* 26th divide step */
991 addc temp,temp,temp /* shift temp with/into carry */
992 ds rmndr,arg1,rmndr /* 27th divide step */
993 addc temp,temp,temp /* shift temp with/into carry */
994 ds rmndr,arg1,rmndr /* 28th divide step */
995 addc temp,temp,temp /* shift temp with/into carry */
996 ds rmndr,arg1,rmndr /* 29th divide step */
997 addc temp,temp,temp /* shift temp with/into carry */
998 ds rmndr,arg1,rmndr /* 30th divide step */
999 addc temp,temp,temp /* shift temp with/into carry */
1000 ds rmndr,arg1,rmndr /* 31st divide step */
1001 addc temp,temp,temp /* shift temp with/into carry */
1002 ds rmndr,arg1,rmndr /* 32nd divide step, */
1003 comiclr,<= 0,rmndr,r0
1004 add rmndr,arg1,rmndr /* correction */
1005 MILLIRETN
1006 nop
1007
1008 /* Putting >= on the last DS and deleting COMICLR does not work! */
1009 LSYM(special_case)
1010 sub,>>= arg0,arg1,rmndr
1011 copy arg0,rmndr
1012 MILLIRETN
1013 nop
1014 .exit
1015 .procend
1016 .end
1017 #endif
1018
1019 #ifdef L_div_const
1020 /* ROUTINE: $$divI_2
1021 . $$divI_3 $$divU_3
1022 . $$divI_4
1023 . $$divI_5 $$divU_5
1024 . $$divI_6 $$divU_6
1025 . $$divI_7 $$divU_7
1026 . $$divI_8
1027 . $$divI_9 $$divU_9
1028 . $$divI_10 $$divU_10
1029 .
1030 . $$divI_12 $$divU_12
1031 .
1032 . $$divI_14 $$divU_14
1033 . $$divI_15 $$divU_15
1034 . $$divI_16
1035 . $$divI_17 $$divU_17
1036 .
1037 . Divide by selected constants for single precision binary integers.
1038
1039 INPUT REGISTERS:
1040 . arg0 == dividend
1041 . mrp == return pc
1042 . sr0 == return space when called externally
1043
1044 OUTPUT REGISTERS:
1045 . arg0 = undefined
1046 . arg1 = undefined
1047 . ret1 = quotient
1048
1049 OTHER REGISTERS AFFECTED:
1050 . r1 = undefined
1051
1052 SIDE EFFECTS:
1053 . Causes a trap under the following conditions: NONE
1054 . Changes memory at the following places: NONE
1055
1056 PERMISSIBLE CONTEXT:
1057 . Unwindable.
1058 . Does not create a stack frame.
1059 . Suitable for internal or external millicode.
1060 . Assumes the special millicode register conventions.
1061
1062 DISCUSSION:
1063 . Calls other millicode routines using mrp: NONE
1064 . Calls other millicode routines: NONE */
1065
1066
1067 /* TRUNCATED DIVISION BY SMALL INTEGERS
1068
1069 We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
1070 (with y fixed).
1071
1072 Let a = floor(z/y), for some choice of z. Note that z will be
1073 chosen so that division by z is cheap.
1074
1075 Let r be the remainder(z/y). In other words, r = z - ay.
1076
1077 Now, our method is to choose a value for b such that
1078
1079 q'(x) = floor((ax+b)/z)
1080
1081 is equal to q(x) over as large a range of x as possible. If the
1082 two are equal over a sufficiently large range, and if it is easy to
1083 form the product (ax), and it is easy to divide by z, then we can
1084 perform the division much faster than the general division algorithm.
1085
1086 So, we want the following to be true:
1087
1088 . For x in the following range:
1089 .
1090 . ky <= x < (k+1)y
1091 .
1092 . implies that
1093 .
1094 . k <= (ax+b)/z < (k+1)
1095
1096 We want to determine b such that this is true for all k in the
1097 range {0..K} for some maximum K.
1098
1099 Since (ax+b) is an increasing function of x, we can take each
1100 bound separately to determine the "best" value for b.
1101
1102 (ax+b)/z < (k+1) implies
1103
1104 (a((k+1)y-1)+b < (k+1)z implies
1105
1106 b < a + (k+1)(z-ay) implies
1107
1108 b < a + (k+1)r
1109
1110 This needs to be true for all k in the range {0..K}. In
1111 particular, it is true for k = 0 and this leads to a maximum
1112 acceptable value for b.
1113
1114 b < a+r or b <= a+r-1
1115
1116 Taking the other bound, we have
1117
1118 k <= (ax+b)/z implies
1119
1120 k <= (aky+b)/z implies
1121
1122 k(z-ay) <= b implies
1123
1124 kr <= b
1125
1126 Clearly, the largest range for k will be achieved by maximizing b,
1127 when r is not zero. When r is zero, then the simplest choice for b
1128 is 0. When r is not 0, set
1129
1130 . b = a+r-1
1131
1132 Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
1133 for all x in the range:
1134
1135 . 0 <= x < (K+1)y
1136
1137 We need to determine what K is. Of our two bounds,
1138
1139 . b < a+(k+1)r is satisfied for all k >= 0, by construction.
1140
1141 The other bound is
1142
1143 . kr <= b
1144
1145 This is always true if r = 0. If r is not 0 (the usual case), then
1146 K = floor((a+r-1)/r), is the maximum value for k.
1147
1148 Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
1149 answer for q(x) = floor(x/y) when x is in the range
1150
1151 (0,(K+1)y-1) K = floor((a+r-1)/r)
1152
1153 To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
1154 the formula for q'(x) yields the correct value of q(x) for all x
1155 representable by a single word in HPPA.
1156
1157 We are also constrained in that computing the product (ax), adding
1158 b, and dividing by z must all be done quickly, otherwise we will be
1159 better off going through the general algorithm using the DS
1160 instruction, which uses approximately 70 cycles.
1161
1162 For each y, there is a choice of z which satisfies the constraints
1163 for (K+1)y >= 2**32. We may not, however, be able to satisfy the
1164 timing constraints for arbitrary y. It seems that z being equal to
1165 a power of 2 or a power of 2 minus 1 is as good as we can do, since
1166 it minimizes the time to do division by z. We want the choice of z
1167 to also result in a value for (a) that minimizes the computation of
1168 the product (ax). This is best achieved if (a) has a regular bit
1169 pattern (so the multiplication can be done with shifts and adds).
1170 The value of (a) also needs to be less than 2**32 so the product is
1171 always guaranteed to fit in 2 words.
1172
1173 In actual practice, the following should be done:
1174
1175 1) For negative x, you should take the absolute value and remember
1176 . the fact so that the result can be negated. This obviously does
1177 . not apply in the unsigned case.
1178 2) For even y, you should factor out the power of 2 that divides y
1179 . and divide x by it. You can then proceed by dividing by the
1180 . odd factor of y.
1181
1182 Here is a table of some odd values of y, and corresponding choices
1183 for z which are "good".
1184
1185 y z r a (hex) max x (hex)
1186
1187 3 2**32 1 55555555 100000001
1188 5 2**32 1 33333333 100000003
1189 7 2**24-1 0 249249 (infinite)
1190 9 2**24-1 0 1c71c7 (infinite)
1191 11 2**20-1 0 1745d (infinite)
1192 13 2**24-1 0 13b13b (infinite)
1193 15 2**32 1 11111111 10000000d
1194 17 2**32 1 f0f0f0f 10000000f
1195
1196 If r is 1, then b = a+r-1 = a. This simplifies the computation
1197 of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
1198 then b = 0 is ok to use which simplifies (ax+b).
1199
1200 The bit patterns for 55555555, 33333333, and 11111111 are obviously
1201 very regular. The bit patterns for the other values of a above are:
1202
1203 y (hex) (binary)
1204
1205 7 249249 001001001001001001001001 << regular >>
1206 9 1c71c7 000111000111000111000111 << regular >>
1207 11 1745d 000000010111010001011101 << irregular >>
1208 13 13b13b 000100111011000100111011 << irregular >>
1209
1210 The bit patterns for (a) corresponding to (y) of 11 and 13 may be
1211 too irregular to warrant using this method.
1212
1213 When z is a power of 2 minus 1, then the division by z is slightly
1214 more complicated, involving an iterative solution.
1215
1216 The code presented here solves division by 1 through 17, except for
1217 11 and 13. There are algorithms for both signed and unsigned
1218 quantities given.
1219
1220 TIMINGS (cycles)
1221
1222 divisor positive negative unsigned
1223
1224 . 1 2 2 2
1225 . 2 4 4 2
1226 . 3 19 21 19
1227 . 4 4 4 2
1228 . 5 18 22 19
1229 . 6 19 22 19
1230 . 8 4 4 2
1231 . 10 18 19 17
1232 . 12 18 20 18
1233 . 15 16 18 16
1234 . 16 4 4 2
1235 . 17 16 18 16
1236
1237 Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
1238 a loop body is executed until the tentative quotient is 0. The
1239 number of times the loop body is executed varies depending on the
1240 dividend, but is never more than two times. If the dividend is
1241 less than the divisor, then the loop body is not executed at all.
1242 Each iteration adds 4 cycles to the timings.
1243
1244 divisor positive negative unsigned
1245
1246 . 7 19+4n 20+4n 20+4n n = number of iterations
1247 . 9 21+4n 22+4n 21+4n
1248 . 14 21+4n 22+4n 20+4n
1249
1250 To give an idea of how the number of iterations varies, here is a
1251 table of dividend versus number of iterations when dividing by 7.
1252
1253 smallest largest required
1254 dividend dividend iterations
1255
1256 . 0 6 0
1257 . 7 0x6ffffff 1
1258 0x1000006 0xffffffff 2
1259
1260 There is some overlap in the range of numbers requiring 1 and 2
1261 iterations. */
1262
1263 RDEFINE(t2,r1)
1264 RDEFINE(x2,arg0) /* r26 */
1265 RDEFINE(t1,arg1) /* r25 */
1266 RDEFINE(x1,ret1) /* r29 */
1267
1268 SUBSPA_MILLI_DIV
1269 ATTR_MILLI
1270
1271 .proc
1272 .callinfo millicode
1273 .entry
1274 /* NONE of these routines require a stack frame
1275 ALL of these routines are unwindable from millicode */
1276
1277 GSYM($$divide_by_constant)
1278 .export $$divide_by_constant,millicode
1279 /* Provides a "nice" label for the code covered by the unwind descriptor
1280 for things like gprof. */
1281
1282 /* DIVISION BY 2 (shift by 1) */
1283 GSYM($$divI_2)
1284 .export $$divI_2,millicode
1285 comclr,>= arg0,0,0
1286 addi 1,arg0,arg0
1287 MILLIRET
1288 extrs arg0,30,31,ret1
1289
1290
1291 /* DIVISION BY 4 (shift by 2) */
1292 GSYM($$divI_4)
1293 .export $$divI_4,millicode
1294 comclr,>= arg0,0,0
1295 addi 3,arg0,arg0
1296 MILLIRET
1297 extrs arg0,29,30,ret1
1298
1299
1300 /* DIVISION BY 8 (shift by 3) */
1301 GSYM($$divI_8)
1302 .export $$divI_8,millicode
1303 comclr,>= arg0,0,0
1304 addi 7,arg0,arg0
1305 MILLIRET
1306 extrs arg0,28,29,ret1
1307
1308 /* DIVISION BY 16 (shift by 4) */
1309 GSYM($$divI_16)
1310 .export $$divI_16,millicode
1311 comclr,>= arg0,0,0
1312 addi 15,arg0,arg0
1313 MILLIRET
1314 extrs arg0,27,28,ret1
1315
1316 /****************************************************************************
1317 *
1318 * DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
1319 *
1320 * includes 3,5,15,17 and also 6,10,12
1321 *
1322 ****************************************************************************/
1323
1324 /* DIVISION BY 3 (use z = 2**32; a = 55555555) */
1325
1326 GSYM($$divI_3)
1327 .export $$divI_3,millicode
1328 comb,<,N x2,0,LREF(neg3)
1329
1330 addi 1,x2,x2 /* this cannot overflow */
1331 extru x2,1,2,x1 /* multiply by 5 to get started */
1332 sh2add x2,x2,x2
1333 b LREF(pos)
1334 addc x1,0,x1
1335
1336 LSYM(neg3)
1337 subi 1,x2,x2 /* this cannot overflow */
1338 extru x2,1,2,x1 /* multiply by 5 to get started */
1339 sh2add x2,x2,x2
1340 b LREF(neg)
1341 addc x1,0,x1
1342
1343 GSYM($$divU_3)
1344 .export $$divU_3,millicode
1345 addi 1,x2,x2 /* this CAN overflow */
1346 addc 0,0,x1
1347 shd x1,x2,30,t1 /* multiply by 5 to get started */
1348 sh2add x2,x2,x2
1349 b LREF(pos)
1350 addc x1,t1,x1
1351
1352 /* DIVISION BY 5 (use z = 2**32; a = 33333333) */
1353
1354 GSYM($$divI_5)
1355 .export $$divI_5,millicode
1356 comb,<,N x2,0,LREF(neg5)
1357
1358 addi 3,x2,t1 /* this cannot overflow */
1359 sh1add x2,t1,x2 /* multiply by 3 to get started */
1360 b LREF(pos)
1361 addc 0,0,x1
1362
1363 LSYM(neg5)
1364 sub 0,x2,x2 /* negate x2 */
1365 addi 1,x2,x2 /* this cannot overflow */
1366 shd 0,x2,31,x1 /* get top bit (can be 1) */
1367 sh1add x2,x2,x2 /* multiply by 3 to get started */
1368 b LREF(neg)
1369 addc x1,0,x1
1370
1371 GSYM($$divU_5)
1372 .export $$divU_5,millicode
1373 addi 1,x2,x2 /* this CAN overflow */
1374 addc 0,0,x1
1375 shd x1,x2,31,t1 /* multiply by 3 to get started */
1376 sh1add x2,x2,x2
1377 b LREF(pos)
1378 addc t1,x1,x1
1379
1380 /* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
1381 GSYM($$divI_6)
1382 .export $$divI_6,millicode
1383 comb,<,N x2,0,LREF(neg6)
1384 extru x2,30,31,x2 /* divide by 2 */
1385 addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
1386 sh2add x2,t1,x2 /* multiply by 5 to get started */
1387 b LREF(pos)
1388 addc 0,0,x1
1389
1390 LSYM(neg6)
1391 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1392 /* negation and adding 1 are done */
1393 /* at the same time by the SUBI */
1394 extru x2,30,31,x2
1395 shd 0,x2,30,x1
1396 sh2add x2,x2,x2 /* multiply by 5 to get started */
1397 b LREF(neg)
1398 addc x1,0,x1
1399
1400 GSYM($$divU_6)
1401 .export $$divU_6,millicode
1402 extru x2,30,31,x2 /* divide by 2 */
1403 addi 1,x2,x2 /* cannot carry */
1404 shd 0,x2,30,x1 /* multiply by 5 to get started */
1405 sh2add x2,x2,x2
1406 b LREF(pos)
1407 addc x1,0,x1
1408
1409 /* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
1410 GSYM($$divU_10)
1411 .export $$divU_10,millicode
1412 extru x2,30,31,x2 /* divide by 2 */
1413 addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
1414 sh1add x2,t1,x2 /* multiply by 3 to get started */
1415 addc 0,0,x1
1416 LSYM(pos)
1417 shd x1,x2,28,t1 /* multiply by 0x11 */
1418 shd x2,0,28,t2
1419 add x2,t2,x2
1420 addc x1,t1,x1
1421 LSYM(pos_for_17)
1422 shd x1,x2,24,t1 /* multiply by 0x101 */
1423 shd x2,0,24,t2
1424 add x2,t2,x2
1425 addc x1,t1,x1
1426
1427 shd x1,x2,16,t1 /* multiply by 0x10001 */
1428 shd x2,0,16,t2
1429 add x2,t2,x2
1430 MILLIRET
1431 addc x1,t1,x1
1432
1433 GSYM($$divI_10)
1434 .export $$divI_10,millicode
1435 comb,< x2,0,LREF(neg10)
1436 copy 0,x1
1437 extru x2,30,31,x2 /* divide by 2 */
1438 addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */
1439 sh1add x2,x2,x2 /* multiply by 3 to get started */
1440
1441 LSYM(neg10)
1442 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1443 /* negation and adding 1 are done */
1444 /* at the same time by the SUBI */
1445 extru x2,30,31,x2
1446 sh1add x2,x2,x2 /* multiply by 3 to get started */
1447 LSYM(neg)
1448 shd x1,x2,28,t1 /* multiply by 0x11 */
1449 shd x2,0,28,t2
1450 add x2,t2,x2
1451 addc x1,t1,x1
1452 LSYM(neg_for_17)
1453 shd x1,x2,24,t1 /* multiply by 0x101 */
1454 shd x2,0,24,t2
1455 add x2,t2,x2
1456 addc x1,t1,x1
1457
1458 shd x1,x2,16,t1 /* multiply by 0x10001 */
1459 shd x2,0,16,t2
1460 add x2,t2,x2
1461 addc x1,t1,x1
1462 MILLIRET
1463 sub 0,x1,x1
1464
1465 /* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
1466 GSYM($$divI_12)
1467 .export $$divI_12,millicode
1468 comb,< x2,0,LREF(neg12)
1469 copy 0,x1
1470 extru x2,29,30,x2 /* divide by 4 */
1471 addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
1472 sh2add x2,x2,x2 /* multiply by 5 to get started */
1473
1474 LSYM(neg12)
1475 subi 4,x2,x2 /* negate, divide by 4, and add 1 */
1476 /* negation and adding 1 are done */
1477 /* at the same time by the SUBI */
1478 extru x2,29,30,x2
1479 b LREF(neg)
1480 sh2add x2,x2,x2 /* multiply by 5 to get started */
1481
1482 GSYM($$divU_12)
1483 .export $$divU_12,millicode
1484 extru x2,29,30,x2 /* divide by 4 */
1485 addi 5,x2,t1 /* cannot carry */
1486 sh2add x2,t1,x2 /* multiply by 5 to get started */
1487 b LREF(pos)
1488 addc 0,0,x1
1489
1490 /* DIVISION BY 15 (use z = 2**32; a = 11111111) */
1491 GSYM($$divI_15)
1492 .export $$divI_15,millicode
1493 comb,< x2,0,LREF(neg15)
1494 copy 0,x1
1495 addib,tr 1,x2,LREF(pos)+4
1496 shd x1,x2,28,t1
1497
1498 LSYM(neg15)
1499 b LREF(neg)
1500 subi 1,x2,x2
1501
1502 GSYM($$divU_15)
1503 .export $$divU_15,millicode
1504 addi 1,x2,x2 /* this CAN overflow */
1505 b LREF(pos)
1506 addc 0,0,x1
1507
1508 /* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
1509 GSYM($$divI_17)
1510 .export $$divI_17,millicode
1511 comb,<,n x2,0,LREF(neg17)
1512 addi 1,x2,x2 /* this cannot overflow */
1513 shd 0,x2,28,t1 /* multiply by 0xf to get started */
1514 shd x2,0,28,t2
1515 sub t2,x2,x2
1516 b LREF(pos_for_17)
1517 subb t1,0,x1
1518
1519 LSYM(neg17)
1520 subi 1,x2,x2 /* this cannot overflow */
1521 shd 0,x2,28,t1 /* multiply by 0xf to get started */
1522 shd x2,0,28,t2
1523 sub t2,x2,x2
1524 b LREF(neg_for_17)
1525 subb t1,0,x1
1526
1527 GSYM($$divU_17)
1528 .export $$divU_17,millicode
1529 addi 1,x2,x2 /* this CAN overflow */
1530 addc 0,0,x1
1531 shd x1,x2,28,t1 /* multiply by 0xf to get started */
1532 LSYM(u17)
1533 shd x2,0,28,t2
1534 sub t2,x2,x2
1535 b LREF(pos_for_17)
1536 subb t1,x1,x1
1537
1538
1539 /* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
1540 includes 7,9 and also 14
1541
1542
1543 z = 2**24-1
1544 r = z mod x = 0
1545
1546 so choose b = 0
1547
1548 Also, in order to divide by z = 2**24-1, we approximate by dividing
1549 by (z+1) = 2**24 (which is easy), and then correcting.
1550
1551 (ax) = (z+1)q' + r
1552 . = zq' + (q'+r)
1553
1554 So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
1555 Then the true remainder of (ax)/z is (q'+r). Repeat the process
1556 with this new remainder, adding the tentative quotients together,
1557 until a tentative quotient is 0 (and then we are done). There is
1558 one last correction to be done. It is possible that (q'+r) = z.
1559 If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
1560 in fact, we need to add 1 more to the quotient. Now, it turns
1561 out that this happens if and only if the original value x is
1562 an exact multiple of y. So, to avoid a three instruction test at
1563 the end, instead use 1 instruction to add 1 to x at the beginning. */
1564
1565 /* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
1566 GSYM($$divI_7)
1567 .export $$divI_7,millicode
1568 comb,<,n x2,0,LREF(neg7)
1569 LSYM(7)
1570 addi 1,x2,x2 /* cannot overflow */
1571 shd 0,x2,29,x1
1572 sh3add x2,x2,x2
1573 addc x1,0,x1
1574 LSYM(pos7)
1575 shd x1,x2,26,t1
1576 shd x2,0,26,t2
1577 add x2,t2,x2
1578 addc x1,t1,x1
1579
1580 shd x1,x2,20,t1
1581 shd x2,0,20,t2
1582 add x2,t2,x2
1583 addc x1,t1,t1
1584
1585 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1586
1587 copy 0,x1
1588 shd,= t1,x2,24,t1 /* tentative quotient */
1589 LSYM(1)
1590 addb,tr t1,x1,LREF(2) /* add to previous quotient */
1591 extru x2,31,24,x2 /* new remainder (unadjusted) */
1592
1593 MILLIRETN
1594
1595 LSYM(2)
1596 addb,tr t1,x2,LREF(1) /* adjust remainder */
1597 extru,= x2,7,8,t1 /* new quotient */
1598
1599 LSYM(neg7)
1600 subi 1,x2,x2 /* negate x2 and add 1 */
1601 LSYM(8)
1602 shd 0,x2,29,x1
1603 sh3add x2,x2,x2
1604 addc x1,0,x1
1605
1606 LSYM(neg7_shift)
1607 shd x1,x2,26,t1
1608 shd x2,0,26,t2
1609 add x2,t2,x2
1610 addc x1,t1,x1
1611
1612 shd x1,x2,20,t1
1613 shd x2,0,20,t2
1614 add x2,t2,x2
1615 addc x1,t1,t1
1616
1617 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1618
1619 copy 0,x1
1620 shd,= t1,x2,24,t1 /* tentative quotient */
1621 LSYM(3)
1622 addb,tr t1,x1,LREF(4) /* add to previous quotient */
1623 extru x2,31,24,x2 /* new remainder (unadjusted) */
1624
1625 MILLIRET
1626 sub 0,x1,x1 /* negate result */
1627
1628 LSYM(4)
1629 addb,tr t1,x2,LREF(3) /* adjust remainder */
1630 extru,= x2,7,8,t1 /* new quotient */
1631
1632 GSYM($$divU_7)
1633 .export $$divU_7,millicode
1634 addi 1,x2,x2 /* can carry */
1635 addc 0,0,x1
1636 shd x1,x2,29,t1
1637 sh3add x2,x2,x2
1638 b LREF(pos7)
1639 addc t1,x1,x1
1640
1641 /* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
1642 GSYM($$divI_9)
1643 .export $$divI_9,millicode
1644 comb,<,n x2,0,LREF(neg9)
1645 addi 1,x2,x2 /* cannot overflow */
1646 shd 0,x2,29,t1
1647 shd x2,0,29,t2
1648 sub t2,x2,x2
1649 b LREF(pos7)
1650 subb t1,0,x1
1651
1652 LSYM(neg9)
1653 subi 1,x2,x2 /* negate and add 1 */
1654 shd 0,x2,29,t1
1655 shd x2,0,29,t2
1656 sub t2,x2,x2
1657 b LREF(neg7_shift)
1658 subb t1,0,x1
1659
1660 GSYM($$divU_9)
1661 .export $$divU_9,millicode
1662 addi 1,x2,x2 /* can carry */
1663 addc 0,0,x1
1664 shd x1,x2,29,t1
1665 shd x2,0,29,t2
1666 sub t2,x2,x2
1667 b LREF(pos7)
1668 subb t1,x1,x1
1669
1670 /* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
1671 GSYM($$divI_14)
1672 .export $$divI_14,millicode
1673 comb,<,n x2,0,LREF(neg14)
1674 GSYM($$divU_14)
1675 .export $$divU_14,millicode
1676 b LREF(7) /* go to 7 case */
1677 extru x2,30,31,x2 /* divide by 2 */
1678
1679 LSYM(neg14)
1680 subi 2,x2,x2 /* negate (and add 2) */
1681 b LREF(8)
1682 extru x2,30,31,x2 /* divide by 2 */
1683 .exit
1684 .procend
1685 .end
1686 #endif
1687
1688 #ifdef L_mulI
1689 /* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
1690 /******************************************************************************
1691 This routine is used on PA2.0 processors when gcc -mno-fpregs is used
1692
1693 ROUTINE: $$mulI
1694
1695
1696 DESCRIPTION:
1697
1698 $$mulI multiplies two single word integers, giving a single
1699 word result.
1700
1701
1702 INPUT REGISTERS:
1703
1704 arg0 = Operand 1
1705 arg1 = Operand 2
1706 r31 == return pc
1707 sr0 == return space when called externally
1708
1709
1710 OUTPUT REGISTERS:
1711
1712 arg0 = undefined
1713 arg1 = undefined
1714 ret1 = result
1715
1716 OTHER REGISTERS AFFECTED:
1717
1718 r1 = undefined
1719
1720 SIDE EFFECTS:
1721
1722 Causes a trap under the following conditions: NONE
1723 Changes memory at the following places: NONE
1724
1725 PERMISSIBLE CONTEXT:
1726
1727 Unwindable
1728 Does not create a stack frame
1729 Is usable for internal or external microcode
1730
1731 DISCUSSION:
1732
1733 Calls other millicode routines via mrp: NONE
1734 Calls other millicode routines: NONE
1735
1736 ***************************************************************************/
1737
1738
1739 #define a0 %arg0
1740 #define a1 %arg1
1741 #define t0 %r1
1742 #define r %ret1
1743
1744 #define a0__128a0 zdep a0,24,25,a0
1745 #define a0__256a0 zdep a0,23,24,a0
1746 #define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
1747 #define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
1748 #define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
1749 #define b_n_ret_t0 b,n LREF(ret_t0)
1750 #define b_e_shift b LREF(e_shift)
1751 #define b_e_t0ma0 b LREF(e_t0ma0)
1752 #define b_e_t0 b LREF(e_t0)
1753 #define b_e_t0a0 b LREF(e_t0a0)
1754 #define b_e_t02a0 b LREF(e_t02a0)
1755 #define b_e_t04a0 b LREF(e_t04a0)
1756 #define b_e_2t0 b LREF(e_2t0)
1757 #define b_e_2t0a0 b LREF(e_2t0a0)
1758 #define b_e_2t04a0 b LREF(e2t04a0)
1759 #define b_e_3t0 b LREF(e_3t0)
1760 #define b_e_4t0 b LREF(e_4t0)
1761 #define b_e_4t0a0 b LREF(e_4t0a0)
1762 #define b_e_4t08a0 b LREF(e4t08a0)
1763 #define b_e_5t0 b LREF(e_5t0)
1764 #define b_e_8t0 b LREF(e_8t0)
1765 #define b_e_8t0a0 b LREF(e_8t0a0)
1766 #define r__r_a0 add r,a0,r
1767 #define r__r_2a0 sh1add a0,r,r
1768 #define r__r_4a0 sh2add a0,r,r
1769 #define r__r_8a0 sh3add a0,r,r
1770 #define r__r_t0 add r,t0,r
1771 #define r__r_2t0 sh1add t0,r,r
1772 #define r__r_4t0 sh2add t0,r,r
1773 #define r__r_8t0 sh3add t0,r,r
1774 #define t0__3a0 sh1add a0,a0,t0
1775 #define t0__4a0 sh2add a0,0,t0
1776 #define t0__5a0 sh2add a0,a0,t0
1777 #define t0__8a0 sh3add a0,0,t0
1778 #define t0__9a0 sh3add a0,a0,t0
1779 #define t0__16a0 zdep a0,27,28,t0
1780 #define t0__32a0 zdep a0,26,27,t0
1781 #define t0__64a0 zdep a0,25,26,t0
1782 #define t0__128a0 zdep a0,24,25,t0
1783 #define t0__t0ma0 sub t0,a0,t0
1784 #define t0__t0_a0 add t0,a0,t0
1785 #define t0__t0_2a0 sh1add a0,t0,t0
1786 #define t0__t0_4a0 sh2add a0,t0,t0
1787 #define t0__t0_8a0 sh3add a0,t0,t0
1788 #define t0__2t0_a0 sh1add t0,a0,t0
1789 #define t0__3t0 sh1add t0,t0,t0
1790 #define t0__4t0 sh2add t0,0,t0
1791 #define t0__4t0_a0 sh2add t0,a0,t0
1792 #define t0__5t0 sh2add t0,t0,t0
1793 #define t0__8t0 sh3add t0,0,t0
1794 #define t0__8t0_a0 sh3add t0,a0,t0
1795 #define t0__9t0 sh3add t0,t0,t0
1796 #define t0__16t0 zdep t0,27,28,t0
1797 #define t0__32t0 zdep t0,26,27,t0
1798 #define t0__256a0 zdep a0,23,24,t0
1799
1800
1801 SUBSPA_MILLI
1802 ATTR_MILLI
1803 .align 16
1804 .proc
1805 .callinfo millicode
1806 .export $$mulI,millicode
1807 GSYM($$mulI)
1808 combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
1809 copy 0,r /* zero out the result */
1810 xor a0,a1,a0 /* swap a0 & a1 using the */
1811 xor a0,a1,a1 /* old xor trick */
1812 xor a0,a1,a0
1813 LSYM(l4)
1814 combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
1815 zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1816 sub,> 0,a1,t0 /* otherwise negate both and */
1817 combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
1818 sub 0,a0,a1
1819 movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
1820
1821 LSYM(l0) r__r_t0 /* add in this partial product */
1822 LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
1823 LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1824 LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
1825 extru a1,23,24,a1 /* a1 >>= 8 ****************** */
1826
1827 /*16 insts before this. */
1828 /* a0 <<= 8 ************************** */
1829 LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
1830 LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
1831 LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
1832 LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
1833 LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
1834 LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
1835 LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1836 LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
1837 LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
1838 LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
1839 LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1840 LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1841 LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1842 LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1843 LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1844 LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
1845 LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1846 LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
1847 LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1848 LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
1849 LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1850 LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1851 LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1852 LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1853 LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1854 LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1855 LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1856 LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
1857 LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1858 LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1859 LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1860 LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1861 LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1862 LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1863 LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1864 LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
1865 LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1866 LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1867 LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1868 LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1869 LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1870 LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1871 LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1872 LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1873 LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1874 LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1875 LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
1876 LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
1877 LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
1878 LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
1879 LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1880 LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
1881 LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1882 LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1883 LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1884 LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
1885 LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1886 LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
1887 LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1888 LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
1889 LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1890 LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1891 LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1892 LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1893 LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1894 LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1895 LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1896 LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1897 LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1898 LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1899 LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
1900 LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
1901 LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1902 LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
1903 LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1904 LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1905 LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1906 LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1907 LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
1908 LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1909 LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
1910 LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
1911 LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1912 LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1913 LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1914 LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1915 LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1916 LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
1917 LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1918 LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1919 LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1920 LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
1921 LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1922 LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
1923 LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
1924 LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1925 LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1926 LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1927 LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
1928 LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1929 LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
1930 LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1931 LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
1932 LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
1933 LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1934 LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1935 LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
1936 LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
1937 LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1938 LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1939 LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
1940 LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1941 LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
1942 LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
1943 LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
1944 LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
1945 LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
1946 LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
1947 LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
1948 LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
1949 LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1950 LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1951 LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1952 LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1953 LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
1954 LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
1955 LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1956 LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1957 LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1958 LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
1959 LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1960 LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1961 LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1962 LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1963 LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1964 LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
1965 LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1966 LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1967 LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1968 LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
1969 LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
1970 LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
1971 LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
1972 LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
1973 LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
1974 LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
1975 LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1976 LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1977 LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1978 LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1979 LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1980 LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1981 LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1982 LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1983 LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1984 LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
1985 LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1986 LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
1987 LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
1988 LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1989 LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
1990 LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1991 LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
1992 LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
1993 LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
1994 LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1995 LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
1996 LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1997 LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1998 LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
1999 LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
2000 LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
2001 LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
2002 LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
2003 LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
2004 LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
2005 LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
2006 LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
2007 LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
2008 LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
2009 LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
2010 LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
2011 LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
2012 LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
2013 LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
2014 LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
2015 LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
2016 LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
2017 LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
2018 LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
2019 LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
2020 LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
2021 LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
2022 LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
2023 LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
2024 LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
2025 LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
2026 LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
2027 LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
2028 LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
2029 LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
2030 LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
2031 LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
2032 LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
2033 LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
2034 LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
2035 LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
2036 LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
2037 LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
2038 LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
2039 LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
2040 LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
2041 LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
2042 LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
2043 LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
2044 LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
2045 LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
2046 LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
2047 LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
2048 LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
2049 LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
2050 LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
2051 LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
2052 LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
2053 LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
2054 LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
2055 LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
2056 LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
2057 LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
2058 LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
2059 LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
2060 LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
2061 LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
2062 LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
2063 LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
2064 LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
2065 LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
2066 LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
2067 LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
2068 LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
2069 LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
2070 LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
2071 LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
2072 LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
2073 LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
2074 LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
2075 LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
2076 LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
2077 LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
2078 LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
2079 LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
2080 LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
2081 LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
2082 LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
2083 LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
2084 LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
2085 /*1040 insts before this. */
2086 LSYM(ret_t0) MILLIRET
2087 LSYM(e_t0) r__r_t0
2088 LSYM(e_shift) a1_ne_0_b_l2
2089 a0__256a0 /* a0 <<= 8 *********** */
2090 MILLIRETN
2091 LSYM(e_t0ma0) a1_ne_0_b_l0
2092 t0__t0ma0
2093 MILLIRET
2094 r__r_t0
2095 LSYM(e_t0a0) a1_ne_0_b_l0
2096 t0__t0_a0
2097 MILLIRET
2098 r__r_t0
2099 LSYM(e_t02a0) a1_ne_0_b_l0
2100 t0__t0_2a0
2101 MILLIRET
2102 r__r_t0
2103 LSYM(e_t04a0) a1_ne_0_b_l0
2104 t0__t0_4a0
2105 MILLIRET
2106 r__r_t0
2107 LSYM(e_2t0) a1_ne_0_b_l1
2108 r__r_2t0
2109 MILLIRETN
2110 LSYM(e_2t0a0) a1_ne_0_b_l0
2111 t0__2t0_a0
2112 MILLIRET
2113 r__r_t0
2114 LSYM(e2t04a0) t0__t0_2a0
2115 a1_ne_0_b_l1
2116 r__r_2t0
2117 MILLIRETN
2118 LSYM(e_3t0) a1_ne_0_b_l0
2119 t0__3t0
2120 MILLIRET
2121 r__r_t0
2122 LSYM(e_4t0) a1_ne_0_b_l1
2123 r__r_4t0
2124 MILLIRETN
2125 LSYM(e_4t0a0) a1_ne_0_b_l0
2126 t0__4t0_a0
2127 MILLIRET
2128 r__r_t0
2129 LSYM(e4t08a0) t0__t0_2a0
2130 a1_ne_0_b_l1
2131 r__r_4t0
2132 MILLIRETN
2133 LSYM(e_5t0) a1_ne_0_b_l0
2134 t0__5t0
2135 MILLIRET
2136 r__r_t0
2137 LSYM(e_8t0) a1_ne_0_b_l1
2138 r__r_8t0
2139 MILLIRETN
2140 LSYM(e_8t0a0) a1_ne_0_b_l0
2141 t0__8t0_a0
2142 MILLIRET
2143 r__r_t0
2144
2145 .procend
2146 .end
2147 #endif
2148