config/pa/milli64.S

1.1  mrg /* 32 and 64-bit millicode, original author Hewlett-Packard
1.1  mrg    adapted for gcc by Paul Bame <bame (at) debian.org>
1.1  mrg    and Alan Modra <alan (at) linuxcare.com.au>.
1.1  mrg
1.7  mrg    Copyright (C) 2001-2018 Free Software Foundation, Inc.
1.1  mrg
1.1  mrg This file is part of GCC.
1.1  mrg
1.1  mrg GCC is free software; you can redistribute it and/or modify it under
1.1  mrg the terms of the GNU General Public License as published by the Free
1.1  mrg Software Foundation; either version 3, or (at your option) any later
1.1  mrg version.
1.1  mrg
1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1.1  mrg for more details.
1.1  mrg
1.1  mrg Under Section 7 of GPL version 3, you are granted additional
1.1  mrg permissions described in the GCC Runtime Library Exception, version
1.1  mrg 3.1, as published by the Free Software Foundation.
1.1  mrg
1.1  mrg You should have received a copy of the GNU General Public License and
1.1  mrg a copy of the GCC Runtime Library Exception along with this program;
1.1  mrg see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
1.1  mrg <http://www.gnu.org/licenses/>.  */
1.1  mrg
1.7  mrg /* An executable stack is *not* required for these functions.  */
1.7  mrg #if defined(__ELF__) && defined(__linux__)
1.7  mrg .section .note.GNU-stack,"",%progbits
1.7  mrg .previous
1.7  mrg #endif
1.7  mrg
1.1  mrg #ifdef pa64
1.1  mrg         .level  2.0w
1.1  mrg #endif
1.1  mrg
1.1  mrg /* Hardware General Registers.  */
1.1  mrg r0:	.reg	%r0
1.1  mrg r1:	.reg	%r1
1.1  mrg r2:	.reg	%r2
1.1  mrg r3:	.reg	%r3
1.1  mrg r4:	.reg	%r4
1.1  mrg r5:	.reg	%r5
1.1  mrg r6:	.reg	%r6
1.1  mrg r7:	.reg	%r7
1.1  mrg r8:	.reg	%r8
1.1  mrg r9:	.reg	%r9
1.1  mrg r10:	.reg	%r10
1.1  mrg r11:	.reg	%r11
1.1  mrg r12:	.reg	%r12
1.1  mrg r13:	.reg	%r13
1.1  mrg r14:	.reg	%r14
1.1  mrg r15:	.reg	%r15
1.1  mrg r16:	.reg	%r16
1.1  mrg r17:	.reg	%r17
1.1  mrg r18:	.reg	%r18
1.1  mrg r19:	.reg	%r19
1.1  mrg r20:	.reg	%r20
1.1  mrg r21:	.reg	%r21
1.1  mrg r22:	.reg	%r22
1.1  mrg r23:	.reg	%r23
1.1  mrg r24:	.reg	%r24
1.1  mrg r25:	.reg	%r25
1.1  mrg r26:	.reg	%r26
1.1  mrg r27:	.reg	%r27
1.1  mrg r28:	.reg	%r28
1.1  mrg r29:	.reg	%r29
1.1  mrg r30:	.reg	%r30
1.1  mrg r31:	.reg	%r31
1.1  mrg
1.1  mrg /* Hardware Space Registers.  */
1.1  mrg sr0:	.reg	%sr0
1.1  mrg sr1:	.reg	%sr1
1.1  mrg sr2:	.reg	%sr2
1.1  mrg sr3:	.reg	%sr3
1.1  mrg sr4:	.reg	%sr4
1.1  mrg sr5:	.reg	%sr5
1.1  mrg sr6:	.reg	%sr6
1.1  mrg sr7:	.reg	%sr7
1.1  mrg
1.1  mrg /* Hardware Floating Point Registers.  */
1.1  mrg fr0:	.reg	%fr0
1.1  mrg fr1:	.reg	%fr1
1.1  mrg fr2:	.reg	%fr2
1.1  mrg fr3:	.reg	%fr3
1.1  mrg fr4:	.reg	%fr4
1.1  mrg fr5:	.reg	%fr5
1.1  mrg fr6:	.reg	%fr6
1.1  mrg fr7:	.reg	%fr7
1.1  mrg fr8:	.reg	%fr8
1.1  mrg fr9:	.reg	%fr9
1.1  mrg fr10:	.reg	%fr10
1.1  mrg fr11:	.reg	%fr11
1.1  mrg fr12:	.reg	%fr12
1.1  mrg fr13:	.reg	%fr13
1.1  mrg fr14:	.reg	%fr14
1.1  mrg fr15:	.reg	%fr15
1.1  mrg
1.1  mrg /* Hardware Control Registers.  */
1.1  mrg cr11:	.reg	%cr11
1.1  mrg sar:	.reg	%cr11	/* Shift Amount Register */
1.1  mrg
1.1  mrg /* Software Architecture General Registers.  */
1.1  mrg rp:	.reg    r2	/* return pointer */
1.1  mrg #ifdef pa64
1.1  mrg mrp:	.reg	r2 	/* millicode return pointer */
1.1  mrg #else
1.1  mrg mrp:	.reg	r31	/* millicode return pointer */
1.1  mrg #endif
1.1  mrg ret0:	.reg    r28	/* return value */
1.1  mrg ret1:	.reg    r29	/* return value (high part of double) */
1.1  mrg sp:	.reg 	r30	/* stack pointer */
1.1  mrg dp:	.reg	r27	/* data pointer */
1.1  mrg arg0:	.reg	r26	/* argument */
1.1  mrg arg1:	.reg	r25	/* argument or high part of double argument */
1.1  mrg arg2:	.reg	r24	/* argument */
1.1  mrg arg3:	.reg	r23	/* argument or high part of double argument */
1.1  mrg
1.1  mrg /* Software Architecture Space Registers.  */
1.1  mrg /* 		sr0	; return link from BLE */
1.1  mrg sret:	.reg	sr1	/* return value */
1.1  mrg sarg:	.reg	sr1	/* argument */
1.1  mrg /* 		sr4	; PC SPACE tracker */
1.1  mrg /* 		sr5	; process private data */
1.1  mrg
1.1  mrg /* Frame Offsets (millicode convention!)  Used when calling other
1.1  mrg    millicode routines.  Stack unwinding is dependent upon these
1.1  mrg    definitions.  */
1.1  mrg r31_slot:	.equ	-20	/* "current RP" slot */
1.1  mrg sr0_slot:	.equ	-16     /* "static link" slot */
1.1  mrg #if defined(pa64)
1.1  mrg mrp_slot:       .equ    -16	/* "current RP" slot */
1.1  mrg psp_slot:       .equ    -8	/* "previous SP" slot */
1.1  mrg #else
1.1  mrg mrp_slot:	.equ	-20     /* "current RP" slot (replacing "r31_slot") */
1.1  mrg #endif
1.1  mrg
1.1  mrg
1.1  mrg #define DEFINE(name,value)name:	.EQU	value
1.1  mrg #define RDEFINE(name,value)name:	.REG	value
1.1  mrg #ifdef milliext
1.1  mrg #define MILLI_BE(lbl)   BE    lbl(sr7,r0)
1.1  mrg #define MILLI_BEN(lbl)  BE,n  lbl(sr7,r0)
1.1  mrg #define MILLI_BLE(lbl)	BLE   lbl(sr7,r0)
1.1  mrg #define MILLI_BLEN(lbl)	BLE,n lbl(sr7,r0)
1.1  mrg #define MILLIRETN	BE,n  0(sr0,mrp)
1.1  mrg #define MILLIRET	BE    0(sr0,mrp)
1.1  mrg #define MILLI_RETN	BE,n  0(sr0,mrp)
1.1  mrg #define MILLI_RET	BE    0(sr0,mrp)
1.1  mrg #else
1.1  mrg #define MILLI_BE(lbl)	B     lbl
1.1  mrg #define MILLI_BEN(lbl)  B,n   lbl
1.1  mrg #define MILLI_BLE(lbl)	BL    lbl,mrp
1.1  mrg #define MILLI_BLEN(lbl)	BL,n  lbl,mrp
1.1  mrg #define MILLIRETN	BV,n  0(mrp)
1.1  mrg #define MILLIRET	BV    0(mrp)
1.1  mrg #define MILLI_RETN	BV,n  0(mrp)
1.1  mrg #define MILLI_RET	BV    0(mrp)
1.1  mrg #endif
1.1  mrg
1.1  mrg #ifdef __STDC__
1.1  mrg #define CAT(a,b)	a##b
1.1  mrg #else
1.1  mrg #define CAT(a,b)	a/**/b
1.1  mrg #endif
1.1  mrg
1.1  mrg #ifdef ELF
1.1  mrg #define SUBSPA_MILLI	 .section .text
1.1  mrg #define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
1.1  mrg #define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
1.1  mrg #define ATTR_MILLI
1.1  mrg #define SUBSPA_DATA	 .section .data
1.1  mrg #define ATTR_DATA
1.1  mrg #define GLOBAL		 $global$
1.1  mrg #define GSYM(sym) 	 !sym:
1.1  mrg #define LSYM(sym)	 !CAT(.L,sym:)
1.1  mrg #define LREF(sym)	 CAT(.L,sym)
1.1  mrg
1.1  mrg #else
1.1  mrg
1.1  mrg #ifdef coff
1.1  mrg /* This used to be .milli but since link32 places different named
1.1  mrg    sections in different segments millicode ends up a long ways away
1.1  mrg    from .text (1meg?).  This way they will be a lot closer.
1.1  mrg
1.1  mrg    The SUBSPA_MILLI_* specify locality sets for certain millicode
1.1  mrg    modules in order to ensure that modules that call one another are
1.1  mrg    placed close together. Without locality sets this is unlikely to
1.1  mrg    happen because of the Dynamite linker library search algorithm. We
1.1  mrg    want these modules close together so that short calls always reach
1.1  mrg    (we don't want to require long calls or use long call stubs).  */
1.1  mrg
1.1  mrg #define SUBSPA_MILLI	 .subspa .text
1.1  mrg #define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
1.1  mrg #define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
1.1  mrg #define ATTR_MILLI	 .attr code,read,execute
1.1  mrg #define SUBSPA_DATA	 .subspa .data
1.1  mrg #define ATTR_DATA	 .attr init_data,read,write
1.1  mrg #define GLOBAL		 _gp
1.1  mrg #else
1.1  mrg #define SUBSPA_MILLI	 .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
1.1  mrg #define SUBSPA_MILLI_DIV SUBSPA_MILLI
1.1  mrg #define SUBSPA_MILLI_MUL SUBSPA_MILLI
1.1  mrg #define ATTR_MILLI
1.1  mrg #define SUBSPA_DATA	 .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
1.1  mrg #define ATTR_DATA
1.1  mrg #define GLOBAL		 $global$
1.1  mrg #endif
1.1  mrg #define SPACE_DATA	 .space $PRIVATE$,spnum=1,sort=16
1.1  mrg
1.1  mrg #define GSYM(sym)	 !sym
1.1  mrg #define LSYM(sym)	 !CAT(L$,sym)
1.1  mrg #define LREF(sym)	 CAT(L$,sym)
1.1  mrg #endif
1.1  mrg
1.1  mrg #ifdef L_dyncall
1.1  mrg 	SUBSPA_MILLI
1.1  mrg 	ATTR_DATA
1.1  mrg GSYM($$dyncall)
1.1  mrg 	.export $$dyncall,millicode
1.1  mrg 	.proc
1.1  mrg 	.callinfo	millicode
1.1  mrg 	.entry
1.7  mrg #ifdef LINUX
1.7  mrg 	extru,<>	%r22,30,1,%r0	; nullify if plabel bit set
1.7  mrg 	bv,n	%r0(%r22)		; branch to target
1.7  mrg 	ldw	-2(%r22),%r21		; load address of target
1.7  mrg 	bv	%r0(%r21)		; branch to the real target
1.7  mrg 	ldw	2(%r22),%r19		; load new LTP value
1.7  mrg #else
1.1  mrg 	bb,>=,n %r22,30,LREF(1)		; branch if not plabel address
1.7  mrg 	ldw	-2(%r22),%r21		; load address of target to r21
1.7  mrg 	ldsid	(%sr0,%r21),%r1		; get the "space ident" selected by r21
1.7  mrg 	ldw	2(%r22),%r19		; load new LTP value
1.7  mrg 	mtsp	%r1,%sr0		; move that space identifier into sr0
1.7  mrg 	be	0(%sr0,%r21)		; branch to the real target
1.7  mrg 	stw	%r2,-24(%r30)		; save return address into frame marker
1.1  mrg LSYM(1)
1.1  mrg 	ldsid	(%sr0,%r22),%r1		; get the "space ident" selected by r22
1.1  mrg 	mtsp	%r1,%sr0		; move that space identifier into sr0
1.7  mrg 	be	0(%sr0,%r22)		; branch to the target
1.7  mrg 	stw	%r2,-24(%r30)		; save return address into frame marker
1.1  mrg #endif
1.1  mrg 	.exit
1.1  mrg 	.procend
1.1  mrg #endif
1.1  mrg
1.1  mrg #ifdef L_divI
1.1  mrg /* ROUTINES:	$$divI, $$divoI
1.1  mrg
1.1  mrg    Single precision divide for signed binary integers.
1.1  mrg
1.1  mrg    The quotient is truncated towards zero.
1.1  mrg    The sign of the quotient is the XOR of the signs of the dividend and
1.1  mrg    divisor.
1.1  mrg    Divide by zero is trapped.
1.1  mrg    Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
1.1  mrg
1.1  mrg    INPUT REGISTERS:
1.1  mrg    .	arg0 ==	dividend
1.1  mrg    .	arg1 ==	divisor
1.1  mrg    .	mrp  == return pc
1.1  mrg    .	sr0  == return space when called externally
1.1  mrg
1.1  mrg    OUTPUT REGISTERS:
1.1  mrg    .	arg0 =	undefined
1.1  mrg    .	arg1 =	undefined
1.1  mrg    .	ret1 =	quotient
1.1  mrg
1.1  mrg    OTHER REGISTERS AFFECTED:
1.1  mrg    .	r1   =	undefined
1.1  mrg
1.1  mrg    SIDE EFFECTS:
1.1  mrg    .	Causes a trap under the following conditions:
1.1  mrg    .		divisor is zero  (traps with ADDIT,=  0,25,0)
1.1  mrg    .		dividend==-2**31  and divisor==-1 and routine is $$divoI
1.1  mrg    .				 (traps with ADDO  26,25,0)
1.1  mrg    .	Changes memory at the following places:
1.1  mrg    .		NONE
1.1  mrg
1.1  mrg    PERMISSIBLE CONTEXT:
1.1  mrg    .	Unwindable.
1.1  mrg    .	Suitable for internal or external millicode.
1.1  mrg    .	Assumes the special millicode register conventions.
1.1  mrg
1.1  mrg    DISCUSSION:
1.1  mrg    .	Branchs to other millicode routines using BE
1.1  mrg    .		$$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
1.1  mrg    .
1.1  mrg    .	For selected divisors, calls a divide by constant routine written by
1.1  mrg    .	Karl Pettis.  Eligible divisors are 1..15 excluding 11 and 13.
1.1  mrg    .
1.1  mrg    .	The only overflow case is -2**31 divided by -1.
1.1  mrg    .	Both routines return -2**31 but only $$divoI traps.  */
1.1  mrg
1.1  mrg RDEFINE(temp,r1)
1.1  mrg RDEFINE(retreg,ret1)	/*  r29 */
1.1  mrg RDEFINE(temp1,arg0)
1.1  mrg 	SUBSPA_MILLI_DIV
1.1  mrg 	ATTR_MILLI
1.1  mrg 	.import $$divI_2,millicode
1.1  mrg 	.import $$divI_3,millicode
1.1  mrg 	.import $$divI_4,millicode
1.1  mrg 	.import $$divI_5,millicode
1.1  mrg 	.import $$divI_6,millicode
1.1  mrg 	.import $$divI_7,millicode
1.1  mrg 	.import $$divI_8,millicode
1.1  mrg 	.import $$divI_9,millicode
1.1  mrg 	.import $$divI_10,millicode
1.1  mrg 	.import $$divI_12,millicode
1.1  mrg 	.import $$divI_14,millicode
1.1  mrg 	.import $$divI_15,millicode
1.1  mrg 	.export $$divI,millicode
1.1  mrg 	.export	$$divoI,millicode
1.1  mrg 	.proc
1.1  mrg 	.callinfo	millicode
1.1  mrg 	.entry
1.1  mrg GSYM($$divoI)
1.1  mrg 	comib,=,n  -1,arg1,LREF(negative1)	/*  when divisor == -1 */
1.1  mrg GSYM($$divI)
1.1  mrg 	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
1.1  mrg 	and,<>	arg1,temp,r0		/*  if not, don't use power of 2 divide */
1.1  mrg 	addi,>	0,arg1,r0		/*  if divisor > 0, use power of 2 divide */
1.1  mrg 	b,n	LREF(neg_denom)
1.1  mrg LSYM(pow2)
1.1  mrg 	addi,>=	0,arg0,retreg		/*  if numerator is negative, add the */
1.1  mrg 	add	arg0,temp,retreg	/*  (denominaotr -1) to correct for shifts */
1.1  mrg 	extru,=	arg1,15,16,temp		/*  test denominator with 0xffff0000 */
1.1  mrg 	extrs	retreg,15,16,retreg	/*  retreg = retreg >> 16 */
1.1  mrg 	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 16) */
1.1  mrg 	ldi	0xcc,temp1		/*  setup 0xcc in temp1 */
1.1  mrg 	extru,= arg1,23,8,temp		/*  test denominator with 0xff00 */
1.1  mrg 	extrs	retreg,23,24,retreg	/*  retreg = retreg >> 8 */
1.1  mrg 	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 8) */
1.1  mrg 	ldi	0xaa,temp		/*  setup 0xaa in temp */
1.1  mrg 	extru,= arg1,27,4,r0		/*  test denominator with 0xf0 */
1.1  mrg 	extrs	retreg,27,28,retreg	/*  retreg = retreg >> 4 */
1.1  mrg 	and,=	arg1,temp1,r0		/*  test denominator with 0xcc */
1.1  mrg 	extrs	retreg,29,30,retreg	/*  retreg = retreg >> 2 */
1.1  mrg 	and,=	arg1,temp,r0		/*  test denominator with 0xaa */
1.1  mrg 	extrs	retreg,30,31,retreg	/*  retreg = retreg >> 1 */
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(neg_denom)
1.1  mrg 	addi,<	0,arg1,r0		/*  if arg1 >= 0, it's not power of 2 */
1.1  mrg 	b,n	LREF(regular_seq)
1.1  mrg 	sub	r0,arg1,temp		/*  make denominator positive */
1.1  mrg 	comb,=,n  arg1,temp,LREF(regular_seq)	/*  test against 0x80000000 and 0 */
1.1  mrg 	ldo	-1(temp),retreg		/*  is there at most one bit set ? */
1.1  mrg 	and,=	temp,retreg,r0		/*  if so, the denominator is power of 2 */
1.1  mrg 	b,n	LREF(regular_seq)
1.1  mrg 	sub	r0,arg0,retreg		/*  negate numerator */
1.1  mrg 	comb,=,n arg0,retreg,LREF(regular_seq) /*  test against 0x80000000 */
1.1  mrg 	copy	retreg,arg0		/*  set up arg0, arg1 and temp	*/
1.1  mrg 	copy	temp,arg1		/*  before branching to pow2 */
1.1  mrg 	b	LREF(pow2)
1.1  mrg 	ldo	-1(arg1),temp
1.1  mrg LSYM(regular_seq)
1.1  mrg 	comib,>>=,n 15,arg1,LREF(small_divisor)
1.1  mrg 	add,>=	0,arg0,retreg		/*  move dividend, if retreg < 0, */
1.1  mrg LSYM(normal)
1.1  mrg 	subi	0,retreg,retreg		/*    make it positive */
1.1  mrg 	sub	0,arg1,temp		/*  clear carry,  */
1.1  mrg 					/*    negate the divisor */
1.1  mrg 	ds	0,temp,0		/*  set V-bit to the comple- */
1.1  mrg 					/*    ment of the divisor sign */
1.1  mrg 	add	retreg,retreg,retreg	/*  shift msb bit into carry */
1.1  mrg 	ds	r0,arg1,temp		/*  1st divide step, if no carry */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  2nd divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  3rd divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  4th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  5th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  6th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  7th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  8th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  9th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  10th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  11th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  12th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  13th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  14th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  15th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  16th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  17th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  18th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  19th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  20th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  21st divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  22nd divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  23rd divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  24th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  25th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  26th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  27th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  28th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  29th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  30th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  31st divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/*  32nd divide step, */
1.1  mrg 	addc	retreg,retreg,retreg	/*  shift last retreg bit into retreg */
1.1  mrg 	xor,>=	arg0,arg1,0		/*  get correct sign of quotient */
1.1  mrg 	  sub	0,retreg,retreg		/*    based on operand signs */
1.1  mrg 	MILLIRETN
1.1  mrg 	nop
1.1  mrg
1.1  mrg LSYM(small_divisor)
1.1  mrg
1.1  mrg #if defined(pa64)
1.1  mrg /*  Clear the upper 32 bits of the arg1 register.  We are working with	*/
1.1  mrg /*  small divisors (and 32-bit integers)   We must not be mislead  */
1.1  mrg /*  by "1" bits left in the upper 32 bits.  */
1.1  mrg 	depd %r0,31,32,%r25
1.1  mrg #endif
1.1  mrg 	blr,n	arg1,r0
1.1  mrg 	nop
1.1  mrg /*  table for divisor == 0,1, ... ,15 */
1.1  mrg 	addit,=	0,arg1,r0	/*  trap if divisor == 0 */
1.1  mrg 	nop
1.1  mrg 	MILLIRET		/*  divisor == 1 */
1.1  mrg 	copy	arg0,retreg
1.1  mrg 	MILLI_BEN($$divI_2)	/*  divisor == 2 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divI_3)	/*  divisor == 3 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divI_4)	/*  divisor == 4 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divI_5)	/*  divisor == 5 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divI_6)	/*  divisor == 6 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divI_7)	/*  divisor == 7 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divI_8)	/*  divisor == 8 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divI_9)	/*  divisor == 9 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divI_10)	/*  divisor == 10 */
1.1  mrg 	nop
1.1  mrg 	b	LREF(normal)		/*  divisor == 11 */
1.1  mrg 	add,>=	0,arg0,retreg
1.1  mrg 	MILLI_BEN($$divI_12)	/*  divisor == 12 */
1.1  mrg 	nop
1.1  mrg 	b	LREF(normal)		/*  divisor == 13 */
1.1  mrg 	add,>=	0,arg0,retreg
1.1  mrg 	MILLI_BEN($$divI_14)	/*  divisor == 14 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divI_15)	/*  divisor == 15 */
1.1  mrg 	nop
1.1  mrg
1.1  mrg LSYM(negative1)
1.1  mrg 	sub	0,arg0,retreg	/*  result is negation of dividend */
1.1  mrg 	MILLIRET
1.1  mrg 	addo	arg0,arg1,r0	/*  trap iff dividend==0x80000000 && divisor==-1 */
1.1  mrg 	.exit
1.1  mrg 	.procend
1.1  mrg 	.end
1.1  mrg #endif
1.1  mrg
1.1  mrg #ifdef L_divU
1.1  mrg /* ROUTINE:	$$divU
1.1  mrg    .
1.1  mrg    .	Single precision divide for unsigned integers.
1.1  mrg    .
1.1  mrg    .	Quotient is truncated towards zero.
1.1  mrg    .	Traps on divide by zero.
1.1  mrg
1.1  mrg    INPUT REGISTERS:
1.1  mrg    .	arg0 ==	dividend
1.1  mrg    .	arg1 ==	divisor
1.1  mrg    .	mrp  == return pc
1.1  mrg    .	sr0  == return space when called externally
1.1  mrg
1.1  mrg    OUTPUT REGISTERS:
1.1  mrg    .	arg0 =	undefined
1.1  mrg    .	arg1 =	undefined
1.1  mrg    .	ret1 =	quotient
1.1  mrg
1.1  mrg    OTHER REGISTERS AFFECTED:
1.1  mrg    .	r1   =	undefined
1.1  mrg
1.1  mrg    SIDE EFFECTS:
1.1  mrg    .	Causes a trap under the following conditions:
1.1  mrg    .		divisor is zero
1.1  mrg    .	Changes memory at the following places:
1.1  mrg    .		NONE
1.1  mrg
1.1  mrg    PERMISSIBLE CONTEXT:
1.1  mrg    .	Unwindable.
1.1  mrg    .	Does not create a stack frame.
1.1  mrg    .	Suitable for internal or external millicode.
1.1  mrg    .	Assumes the special millicode register conventions.
1.1  mrg
1.1  mrg    DISCUSSION:
1.1  mrg    .	Branchs to other millicode routines using BE:
1.1  mrg    .		$$divU_# for 3,5,6,7,9,10,12,14,15
1.1  mrg    .
1.1  mrg    .	For selected small divisors calls the special divide by constant
1.1  mrg    .	routines written by Karl Pettis.  These are: 3,5,6,7,9,10,12,14,15.  */
1.1  mrg
1.1  mrg RDEFINE(temp,r1)
1.1  mrg RDEFINE(retreg,ret1)	/* r29 */
1.1  mrg RDEFINE(temp1,arg0)
1.1  mrg 	SUBSPA_MILLI_DIV
1.1  mrg 	ATTR_MILLI
1.1  mrg 	.export $$divU,millicode
1.1  mrg 	.import $$divU_3,millicode
1.1  mrg 	.import $$divU_5,millicode
1.1  mrg 	.import $$divU_6,millicode
1.1  mrg 	.import $$divU_7,millicode
1.1  mrg 	.import $$divU_9,millicode
1.1  mrg 	.import $$divU_10,millicode
1.1  mrg 	.import $$divU_12,millicode
1.1  mrg 	.import $$divU_14,millicode
1.1  mrg 	.import $$divU_15,millicode
1.1  mrg 	.proc
1.1  mrg 	.callinfo	millicode
1.1  mrg 	.entry
1.1  mrg GSYM($$divU)
1.1  mrg /* The subtract is not nullified since it does no harm and can be used
1.1  mrg    by the two cases that branch back to "normal".  */
1.1  mrg 	ldo	-1(arg1),temp		/* is there at most one bit set ? */
1.1  mrg 	and,=	arg1,temp,r0		/* if so, denominator is power of 2 */
1.1  mrg 	b	LREF(regular_seq)
1.1  mrg 	addit,=	0,arg1,0		/* trap for zero dvr */
1.1  mrg 	copy	arg0,retreg
1.1  mrg 	extru,= arg1,15,16,temp		/* test denominator with 0xffff0000 */
1.1  mrg 	extru	retreg,15,16,retreg	/* retreg = retreg >> 16 */
1.1  mrg 	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 16) */
1.1  mrg 	ldi	0xcc,temp1		/* setup 0xcc in temp1 */
1.1  mrg 	extru,= arg1,23,8,temp		/* test denominator with 0xff00 */
1.1  mrg 	extru	retreg,23,24,retreg	/* retreg = retreg >> 8 */
1.1  mrg 	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 8) */
1.1  mrg 	ldi	0xaa,temp		/* setup 0xaa in temp */
1.1  mrg 	extru,= arg1,27,4,r0		/* test denominator with 0xf0 */
1.1  mrg 	extru	retreg,27,28,retreg	/* retreg = retreg >> 4 */
1.1  mrg 	and,=	arg1,temp1,r0		/* test denominator with 0xcc */
1.1  mrg 	extru	retreg,29,30,retreg	/* retreg = retreg >> 2 */
1.1  mrg 	and,=	arg1,temp,r0		/* test denominator with 0xaa */
1.1  mrg 	extru	retreg,30,31,retreg	/* retreg = retreg >> 1 */
1.1  mrg 	MILLIRETN
1.1  mrg 	nop
1.1  mrg LSYM(regular_seq)
1.1  mrg 	comib,>=  15,arg1,LREF(special_divisor)
1.1  mrg 	subi	0,arg1,temp		/* clear carry, negate the divisor */
1.1  mrg 	ds	r0,temp,r0		/* set V-bit to 1 */
1.1  mrg LSYM(normal)
1.1  mrg 	add	arg0,arg0,retreg	/* shift msb bit into carry */
1.1  mrg 	ds	r0,arg1,temp		/* 1st divide step, if no carry */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 2nd divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 3rd divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 4th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 5th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 6th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 7th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 8th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 9th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 10th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 11th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 12th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 13th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 14th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 15th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 16th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 17th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 18th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 19th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 20th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 21st divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 22nd divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 23rd divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 24th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 25th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 26th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 27th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 28th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 29th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 30th divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 31st divide step */
1.1  mrg 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
1.1  mrg 	ds	temp,arg1,temp		/* 32nd divide step, */
1.1  mrg 	MILLIRET
1.1  mrg 	addc	retreg,retreg,retreg	/* shift last retreg bit into retreg */
1.1  mrg
1.1  mrg /* Handle the cases where divisor is a small constant or has high bit on.  */
1.1  mrg LSYM(special_divisor)
1.1  mrg /*	blr	arg1,r0 */
1.1  mrg /*	comib,>,n  0,arg1,LREF(big_divisor) ; nullify previous instruction */
1.1  mrg
1.1  mrg /* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
1.1  mrg    generating such a blr, comib sequence. A problem in nullification. So I
1.1  mrg    rewrote this code.  */
1.1  mrg
1.1  mrg #if defined(pa64)
1.1  mrg /* Clear the upper 32 bits of the arg1 register.  We are working with
1.1  mrg    small divisors (and 32-bit unsigned integers)   We must not be mislead
1.1  mrg    by "1" bits left in the upper 32 bits.  */
1.1  mrg 	depd %r0,31,32,%r25
1.1  mrg #endif
1.1  mrg 	comib,>	0,arg1,LREF(big_divisor)
1.1  mrg 	nop
1.1  mrg 	blr	arg1,r0
1.1  mrg 	nop
1.1  mrg
1.1  mrg LSYM(zero_divisor)	/* this label is here to provide external visibility */
1.1  mrg 	addit,=	0,arg1,0		/* trap for zero dvr */
1.1  mrg 	nop
1.1  mrg 	MILLIRET			/* divisor == 1 */
1.1  mrg 	copy	arg0,retreg
1.1  mrg 	MILLIRET			/* divisor == 2 */
1.1  mrg 	extru	arg0,30,31,retreg
1.1  mrg 	MILLI_BEN($$divU_3)		/* divisor == 3 */
1.1  mrg 	nop
1.1  mrg 	MILLIRET			/* divisor == 4 */
1.1  mrg 	extru	arg0,29,30,retreg
1.1  mrg 	MILLI_BEN($$divU_5)		/* divisor == 5 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divU_6)		/* divisor == 6 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divU_7)		/* divisor == 7 */
1.1  mrg 	nop
1.1  mrg 	MILLIRET			/* divisor == 8 */
1.1  mrg 	extru	arg0,28,29,retreg
1.1  mrg 	MILLI_BEN($$divU_9)		/* divisor == 9 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divU_10)		/* divisor == 10 */
1.1  mrg 	nop
1.1  mrg 	b	LREF(normal)		/* divisor == 11 */
1.1  mrg 	ds	r0,temp,r0		/* set V-bit to 1 */
1.1  mrg 	MILLI_BEN($$divU_12)		/* divisor == 12 */
1.1  mrg 	nop
1.1  mrg 	b	LREF(normal)		/* divisor == 13 */
1.1  mrg 	ds	r0,temp,r0		/* set V-bit to 1 */
1.1  mrg 	MILLI_BEN($$divU_14)		/* divisor == 14 */
1.1  mrg 	nop
1.1  mrg 	MILLI_BEN($$divU_15)		/* divisor == 15 */
1.1  mrg 	nop
1.1  mrg
1.1  mrg /* Handle the case where the high bit is on in the divisor.
1.1  mrg    Compute:	if( dividend>=divisor) quotient=1; else quotient=0;
1.1  mrg    Note:	dividend>==divisor iff dividend-divisor does not borrow
1.1  mrg    and		not borrow iff carry.  */
1.1  mrg LSYM(big_divisor)
1.1  mrg 	sub	arg0,arg1,r0
1.1  mrg 	MILLIRET
1.1  mrg 	addc	r0,r0,retreg
1.1  mrg 	.exit
1.1  mrg 	.procend
1.1  mrg 	.end
1.1  mrg #endif
1.1  mrg
1.1  mrg #ifdef L_remI
1.1  mrg /* ROUTINE:	$$remI
1.1  mrg
1.1  mrg    DESCRIPTION:
1.1  mrg    .	$$remI returns the remainder of the division of two signed 32-bit
1.1  mrg    .	integers.  The sign of the remainder is the same as the sign of
1.1  mrg    .	the dividend.
1.1  mrg
1.1  mrg
1.1  mrg    INPUT REGISTERS:
1.1  mrg    .	arg0 == dividend
1.1  mrg    .	arg1 == divisor
1.1  mrg    .	mrp  == return pc
1.1  mrg    .	sr0  == return space when called externally
1.1  mrg
1.1  mrg    OUTPUT REGISTERS:
1.1  mrg    .	arg0 = destroyed
1.1  mrg    .	arg1 = destroyed
1.1  mrg    .	ret1 = remainder
1.1  mrg
1.1  mrg    OTHER REGISTERS AFFECTED:
1.1  mrg    .	r1   = undefined
1.1  mrg
1.1  mrg    SIDE EFFECTS:
1.1  mrg    .	Causes a trap under the following conditions:  DIVIDE BY ZERO
1.1  mrg    .	Changes memory at the following places:  NONE
1.1  mrg
1.1  mrg    PERMISSIBLE CONTEXT:
1.1  mrg    .	Unwindable
1.1  mrg    .	Does not create a stack frame
1.1  mrg    .	Is usable for internal or external microcode
1.1  mrg
1.1  mrg    DISCUSSION:
1.1  mrg    .	Calls other millicode routines via mrp:  NONE
1.1  mrg    .	Calls other millicode routines:  NONE  */
1.1  mrg
1.1  mrg RDEFINE(tmp,r1)
1.1  mrg RDEFINE(retreg,ret1)
1.1  mrg
1.1  mrg 	SUBSPA_MILLI
1.1  mrg 	ATTR_MILLI
1.1  mrg 	.proc
1.1  mrg 	.callinfo millicode
1.1  mrg 	.entry
1.1  mrg GSYM($$remI)
1.1  mrg GSYM($$remoI)
1.1  mrg 	.export $$remI,MILLICODE
1.1  mrg 	.export $$remoI,MILLICODE
1.1  mrg 	ldo		-1(arg1),tmp		/*  is there at most one bit set ? */
1.1  mrg 	and,<>		arg1,tmp,r0		/*  if not, don't use power of 2 */
1.1  mrg 	addi,>		0,arg1,r0		/*  if denominator > 0, use power */
1.1  mrg 						/*  of 2 */
1.1  mrg 	b,n		LREF(neg_denom)
1.1  mrg LSYM(pow2)
1.1  mrg 	comb,>,n	0,arg0,LREF(neg_num)	/*  is numerator < 0 ? */
1.1  mrg 	and		arg0,tmp,retreg		/*  get the result */
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(neg_num)
1.1  mrg 	subi		0,arg0,arg0		/*  negate numerator */
1.1  mrg 	and		arg0,tmp,retreg		/*  get the result */
1.1  mrg 	subi		0,retreg,retreg		/*  negate result */
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(neg_denom)
1.1  mrg 	addi,<		0,arg1,r0		/*  if arg1 >= 0, it's not power */
1.1  mrg 						/*  of 2 */
1.1  mrg 	b,n		LREF(regular_seq)
1.1  mrg 	sub		r0,arg1,tmp		/*  make denominator positive */
1.1  mrg 	comb,=,n	arg1,tmp,LREF(regular_seq) /*  test against 0x80000000 and 0 */
1.1  mrg 	ldo		-1(tmp),retreg		/*  is there at most one bit set ? */
1.1  mrg 	and,=		tmp,retreg,r0		/*  if not, go to regular_seq */
1.1  mrg 	b,n		LREF(regular_seq)
1.1  mrg 	comb,>,n	0,arg0,LREF(neg_num_2)	/*  if arg0 < 0, negate it  */
1.1  mrg 	and		arg0,retreg,retreg
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(neg_num_2)
1.1  mrg 	subi		0,arg0,tmp		/*  test against 0x80000000 */
1.1  mrg 	and		tmp,retreg,retreg
1.1  mrg 	subi		0,retreg,retreg
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(regular_seq)
1.1  mrg 	addit,=		0,arg1,0		/*  trap if div by zero */
1.1  mrg 	add,>=		0,arg0,retreg		/*  move dividend, if retreg < 0, */
1.1  mrg 	sub		0,retreg,retreg		/*    make it positive */
1.1  mrg 	sub		0,arg1, tmp		/*  clear carry,  */
1.1  mrg 						/*    negate the divisor */
1.1  mrg 	ds		0, tmp,0		/*  set V-bit to the comple- */
1.1  mrg 						/*    ment of the divisor sign */
1.1  mrg 	or		0,0, tmp		/*  clear  tmp */
1.1  mrg 	add		retreg,retreg,retreg	/*  shift msb bit into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  1st divide step, if no carry */
1.1  mrg 						/*    out, msb of quotient = 0 */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg LSYM(t1)
1.1  mrg 	ds		 tmp,arg1, tmp		/*  2nd divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  3rd divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  4th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  5th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  6th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  7th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  8th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  9th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  10th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  11th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  12th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  13th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  14th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  15th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  16th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  17th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  18th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  19th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  20th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  21st divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  22nd divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  23rd divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  24th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  25th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  26th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  27th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  28th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  29th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  30th divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  31st divide step */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
1.1  mrg 	ds		 tmp,arg1, tmp		/*  32nd divide step, */
1.1  mrg 	addc		retreg,retreg,retreg	/*  shift last bit into retreg */
1.1  mrg 	movb,>=,n	 tmp,retreg,LREF(finish) /*  branch if pos.  tmp */
1.1  mrg 	add,<		arg1,0,0		/*  if arg1 > 0, add arg1 */
1.1  mrg 	add,tr		 tmp,arg1,retreg	/*    for correcting remainder tmp */
1.1  mrg 	sub		 tmp,arg1,retreg	/*  else add absolute value arg1 */
1.1  mrg LSYM(finish)
1.1  mrg 	add,>=		arg0,0,0		/*  set sign of remainder */
1.1  mrg 	sub		0,retreg,retreg		/*    to sign of dividend */
1.1  mrg 	MILLIRET
1.1  mrg 	nop
1.1  mrg 	.exit
1.1  mrg 	.procend
1.1  mrg #ifdef milliext
1.1  mrg 	.origin 0x00000200
1.1  mrg #endif
1.1  mrg 	.end
1.1  mrg #endif
1.1  mrg
1.1  mrg #ifdef L_remU
1.1  mrg /* ROUTINE:	$$remU
1.1  mrg    .	Single precision divide for remainder with unsigned binary integers.
1.1  mrg    .
1.1  mrg    .	The remainder must be dividend-(dividend/divisor)*divisor.
1.1  mrg    .	Divide by zero is trapped.
1.1  mrg
1.1  mrg    INPUT REGISTERS:
1.1  mrg    .	arg0 ==	dividend
1.1  mrg    .	arg1 == divisor
1.1  mrg    .	mrp  == return pc
1.1  mrg    .	sr0  == return space when called externally
1.1  mrg
1.1  mrg    OUTPUT REGISTERS:
1.1  mrg    .	arg0 =	undefined
1.1  mrg    .	arg1 =	undefined
1.1  mrg    .	ret1 =	remainder
1.1  mrg
1.1  mrg    OTHER REGISTERS AFFECTED:
1.1  mrg    .	r1   =	undefined
1.1  mrg
1.1  mrg    SIDE EFFECTS:
1.1  mrg    .	Causes a trap under the following conditions:  DIVIDE BY ZERO
1.1  mrg    .	Changes memory at the following places:  NONE
1.1  mrg
1.1  mrg    PERMISSIBLE CONTEXT:
1.1  mrg    .	Unwindable.
1.1  mrg    .	Does not create a stack frame.
1.1  mrg    .	Suitable for internal or external millicode.
1.1  mrg    .	Assumes the special millicode register conventions.
1.1  mrg
1.1  mrg    DISCUSSION:
1.1  mrg    .	Calls other millicode routines using mrp: NONE
1.1  mrg    .	Calls other millicode routines: NONE  */
1.1  mrg
1.1  mrg
1.1  mrg RDEFINE(temp,r1)
1.1  mrg RDEFINE(rmndr,ret1)	/*  r29 */
1.1  mrg 	SUBSPA_MILLI
1.1  mrg 	ATTR_MILLI
1.1  mrg 	.export $$remU,millicode
1.1  mrg 	.proc
1.1  mrg 	.callinfo	millicode
1.1  mrg 	.entry
1.1  mrg GSYM($$remU)
1.1  mrg 	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
1.1  mrg 	and,=	arg1,temp,r0		/*  if not, don't use power of 2 */
1.1  mrg 	b	LREF(regular_seq)
1.1  mrg 	addit,=	0,arg1,r0		/*  trap on div by zero */
1.1  mrg 	and	arg0,temp,rmndr		/*  get the result for power of 2 */
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(regular_seq)
1.1  mrg 	comib,>=,n  0,arg1,LREF(special_case)
1.1  mrg 	subi	0,arg1,rmndr		/*  clear carry, negate the divisor */
1.1  mrg 	ds	r0,rmndr,r0		/*  set V-bit to 1 */
1.1  mrg 	add	arg0,arg0,temp		/*  shift msb bit into carry */
1.1  mrg 	ds	r0,arg1,rmndr		/*  1st divide step, if no carry */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  2nd divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  3rd divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  4th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  5th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  6th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  7th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  8th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  9th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  10th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  11th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  12th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  13th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  14th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  15th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  16th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  17th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  18th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  19th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  20th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  21st divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  22nd divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  23rd divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  24th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  25th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  26th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  27th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  28th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  29th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  30th divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  31st divide step */
1.1  mrg 	addc	temp,temp,temp		/*  shift temp with/into carry */
1.1  mrg 	ds	rmndr,arg1,rmndr		/*  32nd divide step, */
1.1  mrg 	comiclr,<= 0,rmndr,r0
1.1  mrg 	  add	rmndr,arg1,rmndr	/*  correction */
1.1  mrg 	MILLIRETN
1.1  mrg 	nop
1.1  mrg
1.1  mrg /* Putting >= on the last DS and deleting COMICLR does not work!  */
1.1  mrg LSYM(special_case)
1.1  mrg 	sub,>>=	arg0,arg1,rmndr
1.1  mrg 	  copy	arg0,rmndr
1.1  mrg 	MILLIRETN
1.1  mrg 	nop
1.1  mrg 	.exit
1.1  mrg 	.procend
1.1  mrg 	.end
1.1  mrg #endif
1.1  mrg
1.1  mrg #ifdef L_div_const
1.1  mrg /* ROUTINE:	$$divI_2
1.1  mrg    .		$$divI_3	$$divU_3
1.1  mrg    .		$$divI_4
1.1  mrg    .		$$divI_5	$$divU_5
1.1  mrg    .		$$divI_6	$$divU_6
1.1  mrg    .		$$divI_7	$$divU_7
1.1  mrg    .		$$divI_8
1.1  mrg    .		$$divI_9	$$divU_9
1.1  mrg    .		$$divI_10	$$divU_10
1.1  mrg    .
1.1  mrg    .		$$divI_12	$$divU_12
1.1  mrg    .
1.1  mrg    .		$$divI_14	$$divU_14
1.1  mrg    .		$$divI_15	$$divU_15
1.1  mrg    .		$$divI_16
1.1  mrg    .		$$divI_17	$$divU_17
1.1  mrg    .
1.1  mrg    .	Divide by selected constants for single precision binary integers.
1.1  mrg
1.1  mrg    INPUT REGISTERS:
1.1  mrg    .	arg0 ==	dividend
1.1  mrg    .	mrp  == return pc
1.1  mrg    .	sr0  == return space when called externally
1.1  mrg
1.1  mrg    OUTPUT REGISTERS:
1.1  mrg    .	arg0 =	undefined
1.1  mrg    .	arg1 =	undefined
1.1  mrg    .	ret1 =	quotient
1.1  mrg
1.1  mrg    OTHER REGISTERS AFFECTED:
1.1  mrg    .	r1   =	undefined
1.1  mrg
1.1  mrg    SIDE EFFECTS:
1.1  mrg    .	Causes a trap under the following conditions: NONE
1.1  mrg    .	Changes memory at the following places:  NONE
1.1  mrg
1.1  mrg    PERMISSIBLE CONTEXT:
1.1  mrg    .	Unwindable.
1.1  mrg    .	Does not create a stack frame.
1.1  mrg    .	Suitable for internal or external millicode.
1.1  mrg    .	Assumes the special millicode register conventions.
1.1  mrg
1.1  mrg    DISCUSSION:
1.1  mrg    .	Calls other millicode routines using mrp:  NONE
1.1  mrg    .	Calls other millicode routines:  NONE  */
1.1  mrg
1.1  mrg
1.1  mrg /* TRUNCATED DIVISION BY SMALL INTEGERS
1.1  mrg
1.1  mrg    We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
1.1  mrg    (with y fixed).
1.1  mrg
1.1  mrg    Let a = floor(z/y), for some choice of z.  Note that z will be
1.1  mrg    chosen so that division by z is cheap.
1.1  mrg
1.1  mrg    Let r be the remainder(z/y).  In other words, r = z - ay.
1.1  mrg
1.1  mrg    Now, our method is to choose a value for b such that
1.1  mrg
1.1  mrg    q'(x) = floor((ax+b)/z)
1.1  mrg
1.1  mrg    is equal to q(x) over as large a range of x as possible.  If the
1.1  mrg    two are equal over a sufficiently large range, and if it is easy to
1.1  mrg    form the product (ax), and it is easy to divide by z, then we can
1.1  mrg    perform the division much faster than the general division algorithm.
1.1  mrg
1.1  mrg    So, we want the following to be true:
1.1  mrg
1.1  mrg    .	For x in the following range:
1.1  mrg    .
1.1  mrg    .	    ky <= x < (k+1)y
1.1  mrg    .
1.1  mrg    .	implies that
1.1  mrg    .
1.1  mrg    .	    k <= (ax+b)/z < (k+1)
1.1  mrg
1.1  mrg    We want to determine b such that this is true for all k in the
1.1  mrg    range {0..K} for some maximum K.
1.1  mrg
1.1  mrg    Since (ax+b) is an increasing function of x, we can take each
1.1  mrg    bound separately to determine the "best" value for b.
1.1  mrg
1.1  mrg    (ax+b)/z < (k+1)	       implies
1.1  mrg
1.1  mrg    (a((k+1)y-1)+b < (k+1)z     implies
1.1  mrg
1.1  mrg    b < a + (k+1)(z-ay)	       implies
1.1  mrg
1.1  mrg    b < a + (k+1)r
1.1  mrg
1.1  mrg    This needs to be true for all k in the range {0..K}.  In
1.1  mrg    particular, it is true for k = 0 and this leads to a maximum
1.1  mrg    acceptable value for b.
1.1  mrg
1.1  mrg    b < a+r   or   b <= a+r-1
1.1  mrg
1.1  mrg    Taking the other bound, we have
1.1  mrg
1.1  mrg    k <= (ax+b)/z	       implies
1.1  mrg
1.1  mrg    k <= (aky+b)/z	       implies
1.1  mrg
1.1  mrg    k(z-ay) <= b		       implies
1.1  mrg
1.1  mrg    kr <= b
1.1  mrg
1.1  mrg    Clearly, the largest range for k will be achieved by maximizing b,
1.1  mrg    when r is not zero.	When r is zero, then the simplest choice for b
1.1  mrg    is 0.  When r is not 0, set
1.1  mrg
1.1  mrg    .	b = a+r-1
1.1  mrg
1.1  mrg    Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
1.1  mrg    for all x in the range:
1.1  mrg
1.1  mrg    .	0 <= x < (K+1)y
1.1  mrg
1.1  mrg    We need to determine what K is.  Of our two bounds,
1.1  mrg
1.1  mrg    .	b < a+(k+1)r	is satisfied for all k >= 0, by construction.
1.1  mrg
1.1  mrg    The other bound is
1.1  mrg
1.1  mrg    .	kr <= b
1.1  mrg
1.1  mrg    This is always true if r = 0.  If r is not 0 (the usual case), then
1.1  mrg    K = floor((a+r-1)/r), is the maximum value for k.
1.1  mrg
1.1  mrg    Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
1.1  mrg    answer for q(x) = floor(x/y) when x is in the range
1.1  mrg
1.1  mrg    (0,(K+1)y-1)	       K = floor((a+r-1)/r)
1.1  mrg
1.1  mrg    To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
1.1  mrg    the formula for q'(x) yields the correct value of q(x) for all x
1.1  mrg    representable by a single word in HPPA.
1.1  mrg
1.1  mrg    We are also constrained in that computing the product (ax), adding
1.1  mrg    b, and dividing by z must all be done quickly, otherwise we will be
1.1  mrg    better off going through the general algorithm using the DS
1.1  mrg    instruction, which uses approximately 70 cycles.
1.1  mrg
1.1  mrg    For each y, there is a choice of z which satisfies the constraints
1.1  mrg    for (K+1)y >= 2**32.  We may not, however, be able to satisfy the
1.1  mrg    timing constraints for arbitrary y.	It seems that z being equal to
1.1  mrg    a power of 2 or a power of 2 minus 1 is as good as we can do, since
1.1  mrg    it minimizes the time to do division by z.  We want the choice of z
1.1  mrg    to also result in a value for (a) that minimizes the computation of
1.1  mrg    the product (ax).  This is best achieved if (a) has a regular bit
1.1  mrg    pattern (so the multiplication can be done with shifts and adds).
1.1  mrg    The value of (a) also needs to be less than 2**32 so the product is
1.1  mrg    always guaranteed to fit in 2 words.
1.1  mrg
1.1  mrg    In actual practice, the following should be done:
1.1  mrg
1.1  mrg    1) For negative x, you should take the absolute value and remember
1.1  mrg    .  the fact so that the result can be negated.  This obviously does
1.1  mrg    .  not apply in the unsigned case.
1.1  mrg    2) For even y, you should factor out the power of 2 that divides y
1.1  mrg    .  and divide x by it.  You can then proceed by dividing by the
1.1  mrg    .  odd factor of y.
1.1  mrg
1.1  mrg    Here is a table of some odd values of y, and corresponding choices
1.1  mrg    for z which are "good".
1.1  mrg
1.1  mrg     y	  z	  r	 a (hex)     max x (hex)
1.1  mrg
1.1  mrg     3	2**32	  1	55555555      100000001
1.1  mrg     5	2**32	  1	33333333      100000003
1.1  mrg     7  2**24-1	  0	  249249     (infinite)
1.1  mrg     9  2**24-1	  0	  1c71c7     (infinite)
1.1  mrg    11  2**20-1	  0	   1745d     (infinite)
1.1  mrg    13  2**24-1	  0	  13b13b     (infinite)
1.1  mrg    15	2**32	  1	11111111      10000000d
1.1  mrg    17	2**32	  1	 f0f0f0f      10000000f
1.1  mrg
1.1  mrg    If r is 1, then b = a+r-1 = a.  This simplifies the computation
1.1  mrg    of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,
1.1  mrg    then b = 0 is ok to use which simplifies (ax+b).
1.1  mrg
1.1  mrg    The bit patterns for 55555555, 33333333, and 11111111 are obviously
1.1  mrg    very regular.  The bit patterns for the other values of a above are:
1.1  mrg
1.1  mrg     y	   (hex)	  (binary)
1.1  mrg
1.1  mrg     7	  249249  001001001001001001001001  << regular >>
1.1  mrg     9	  1c71c7  000111000111000111000111  << regular >>
1.1  mrg    11	   1745d  000000010111010001011101  << irregular >>
1.1  mrg    13	  13b13b  000100111011000100111011  << irregular >>
1.1  mrg
1.1  mrg    The bit patterns for (a) corresponding to (y) of 11 and 13 may be
1.1  mrg    too irregular to warrant using this method.
1.1  mrg
1.1  mrg    When z is a power of 2 minus 1, then the division by z is slightly
1.1  mrg    more complicated, involving an iterative solution.
1.1  mrg
1.1  mrg    The code presented here solves division by 1 through 17, except for
1.1  mrg    11 and 13. There are algorithms for both signed and unsigned
1.1  mrg    quantities given.
1.1  mrg
1.1  mrg    TIMINGS (cycles)
1.1  mrg
1.1  mrg    divisor  positive  negative	unsigned
1.1  mrg
1.1  mrg    .   1	2	   2	     2
1.1  mrg    .   2	4	   4	     2
1.1  mrg    .   3       19	  21	    19
1.1  mrg    .   4	4	   4	     2
1.1  mrg    .   5       18	  22	    19
1.1  mrg    .   6       19	  22	    19
1.1  mrg    .   8	4	   4	     2
1.1  mrg    .  10       18	  19	    17
1.1  mrg    .  12       18	  20	    18
1.1  mrg    .  15       16	  18	    16
1.1  mrg    .  16	4	   4	     2
1.1  mrg    .  17       16	  18	    16
1.1  mrg
1.1  mrg    Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,
1.1  mrg    a loop body is executed until the tentative quotient is 0.  The
1.1  mrg    number of times the loop body is executed varies depending on the
1.1  mrg    dividend, but is never more than two times.	If the dividend is
1.1  mrg    less than the divisor, then the loop body is not executed at all.
1.1  mrg    Each iteration adds 4 cycles to the timings.
1.1  mrg
1.1  mrg    divisor  positive  negative	unsigned
1.1  mrg
1.1  mrg    .   7       19+4n	 20+4n	   20+4n    n = number of iterations
1.1  mrg    .   9       21+4n	 22+4n	   21+4n
1.1  mrg    .  14       21+4n	 22+4n	   20+4n
1.1  mrg
1.1  mrg    To give an idea of how the number of iterations varies, here is a
1.1  mrg    table of dividend versus number of iterations when dividing by 7.
1.1  mrg
1.1  mrg    smallest	 largest       required
1.1  mrg    dividend	dividend      iterations
1.1  mrg
1.1  mrg    .	0	     6		    0
1.1  mrg    .	7	 0x6ffffff	    1
1.1  mrg    0x1000006	0xffffffff	    2
1.1  mrg
1.1  mrg    There is some overlap in the range of numbers requiring 1 and 2
1.1  mrg    iterations.	*/
1.1  mrg
1.1  mrg RDEFINE(t2,r1)
1.1  mrg RDEFINE(x2,arg0)	/*  r26 */
1.1  mrg RDEFINE(t1,arg1)	/*  r25 */
1.1  mrg RDEFINE(x1,ret1)	/*  r29 */
1.1  mrg
1.1  mrg 	SUBSPA_MILLI_DIV
1.1  mrg 	ATTR_MILLI
1.1  mrg
1.1  mrg 	.proc
1.1  mrg 	.callinfo	millicode
1.1  mrg 	.entry
1.1  mrg /* NONE of these routines require a stack frame
1.1  mrg    ALL of these routines are unwindable from millicode	*/
1.1  mrg
1.1  mrg GSYM($$divide_by_constant)
1.1  mrg 	.export $$divide_by_constant,millicode
1.1  mrg /*  Provides a "nice" label for the code covered by the unwind descriptor
1.1  mrg     for things like gprof.  */
1.1  mrg
1.1  mrg /* DIVISION BY 2 (shift by 1) */
1.1  mrg GSYM($$divI_2)
1.1  mrg 	.export		$$divI_2,millicode
1.1  mrg 	comclr,>=	arg0,0,0
1.1  mrg 	addi		1,arg0,arg0
1.1  mrg 	MILLIRET
1.1  mrg 	extrs		arg0,30,31,ret1
1.1  mrg
1.1  mrg
1.1  mrg /* DIVISION BY 4 (shift by 2) */
1.1  mrg GSYM($$divI_4)
1.1  mrg 	.export		$$divI_4,millicode
1.1  mrg 	comclr,>=	arg0,0,0
1.1  mrg 	addi		3,arg0,arg0
1.1  mrg 	MILLIRET
1.1  mrg 	extrs		arg0,29,30,ret1
1.1  mrg
1.1  mrg
1.1  mrg /* DIVISION BY 8 (shift by 3) */
1.1  mrg GSYM($$divI_8)
1.1  mrg 	.export		$$divI_8,millicode
1.1  mrg 	comclr,>=	arg0,0,0
1.1  mrg 	addi		7,arg0,arg0
1.1  mrg 	MILLIRET
1.1  mrg 	extrs		arg0,28,29,ret1
1.1  mrg
1.1  mrg /* DIVISION BY 16 (shift by 4) */
1.1  mrg GSYM($$divI_16)
1.1  mrg 	.export		$$divI_16,millicode
1.1  mrg 	comclr,>=	arg0,0,0
1.1  mrg 	addi		15,arg0,arg0
1.1  mrg 	MILLIRET
1.1  mrg 	extrs		arg0,27,28,ret1
1.1  mrg
1.1  mrg /****************************************************************************
1.1  mrg *
1.1  mrg *	DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
1.1  mrg *
1.1  mrg *	includes 3,5,15,17 and also 6,10,12
1.1  mrg *
1.1  mrg ****************************************************************************/
1.1  mrg
1.1  mrg /* DIVISION BY 3 (use z = 2**32; a = 55555555) */
1.1  mrg
1.1  mrg GSYM($$divI_3)
1.1  mrg 	.export		$$divI_3,millicode
1.1  mrg 	comb,<,N	x2,0,LREF(neg3)
1.1  mrg
1.1  mrg 	addi		1,x2,x2		/* this cannot overflow	*/
1.1  mrg 	extru		x2,1,2,x1	/* multiply by 5 to get started */
1.1  mrg 	sh2add		x2,x2,x2
1.1  mrg 	b		LREF(pos)
1.1  mrg 	addc		x1,0,x1
1.1  mrg
1.1  mrg LSYM(neg3)
1.1  mrg 	subi		1,x2,x2		/* this cannot overflow	*/
1.1  mrg 	extru		x2,1,2,x1	/* multiply by 5 to get started */
1.1  mrg 	sh2add		x2,x2,x2
1.1  mrg 	b		LREF(neg)
1.1  mrg 	addc		x1,0,x1
1.1  mrg
1.1  mrg GSYM($$divU_3)
1.1  mrg 	.export		$$divU_3,millicode
1.1  mrg 	addi		1,x2,x2		/* this CAN overflow */
1.1  mrg 	addc		0,0,x1
1.1  mrg 	shd		x1,x2,30,t1	/* multiply by 5 to get started */
1.1  mrg 	sh2add		x2,x2,x2
1.1  mrg 	b		LREF(pos)
1.1  mrg 	addc		x1,t1,x1
1.1  mrg
1.1  mrg /* DIVISION BY 5 (use z = 2**32; a = 33333333) */
1.1  mrg
1.1  mrg GSYM($$divI_5)
1.1  mrg 	.export		$$divI_5,millicode
1.1  mrg 	comb,<,N	x2,0,LREF(neg5)
1.1  mrg
1.1  mrg 	addi		3,x2,t1		/* this cannot overflow	*/
1.1  mrg 	sh1add		x2,t1,x2	/* multiply by 3 to get started */
1.1  mrg 	b		LREF(pos)
1.1  mrg 	addc		0,0,x1
1.1  mrg
1.1  mrg LSYM(neg5)
1.1  mrg 	sub		0,x2,x2		/* negate x2			*/
1.1  mrg 	addi		1,x2,x2		/* this cannot overflow	*/
1.1  mrg 	shd		0,x2,31,x1	/* get top bit (can be 1)	*/
1.1  mrg 	sh1add		x2,x2,x2	/* multiply by 3 to get started */
1.1  mrg 	b		LREF(neg)
1.1  mrg 	addc		x1,0,x1
1.1  mrg
1.1  mrg GSYM($$divU_5)
1.1  mrg 	.export		$$divU_5,millicode
1.1  mrg 	addi		1,x2,x2		/* this CAN overflow */
1.1  mrg 	addc		0,0,x1
1.1  mrg 	shd		x1,x2,31,t1	/* multiply by 3 to get started */
1.1  mrg 	sh1add		x2,x2,x2
1.1  mrg 	b		LREF(pos)
1.1  mrg 	addc		t1,x1,x1
1.1  mrg
1.1  mrg /* DIVISION BY	6 (shift to divide by 2 then divide by 3) */
1.1  mrg GSYM($$divI_6)
1.1  mrg 	.export		$$divI_6,millicode
1.1  mrg 	comb,<,N	x2,0,LREF(neg6)
1.1  mrg 	extru		x2,30,31,x2	/* divide by 2			*/
1.1  mrg 	addi		5,x2,t1		/* compute 5*(x2+1) = 5*x2+5	*/
1.1  mrg 	sh2add		x2,t1,x2	/* multiply by 5 to get started */
1.1  mrg 	b		LREF(pos)
1.1  mrg 	addc		0,0,x1
1.1  mrg
1.1  mrg LSYM(neg6)
1.1  mrg 	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
1.1  mrg 					/* negation and adding 1 are done */
1.1  mrg 					/* at the same time by the SUBI   */
1.1  mrg 	extru		x2,30,31,x2
1.1  mrg 	shd		0,x2,30,x1
1.1  mrg 	sh2add		x2,x2,x2	/* multiply by 5 to get started */
1.1  mrg 	b		LREF(neg)
1.1  mrg 	addc		x1,0,x1
1.1  mrg
1.1  mrg GSYM($$divU_6)
1.1  mrg 	.export		$$divU_6,millicode
1.1  mrg 	extru		x2,30,31,x2	/* divide by 2 */
1.1  mrg 	addi		1,x2,x2		/* cannot carry */
1.1  mrg 	shd		0,x2,30,x1	/* multiply by 5 to get started */
1.1  mrg 	sh2add		x2,x2,x2
1.1  mrg 	b		LREF(pos)
1.1  mrg 	addc		x1,0,x1
1.1  mrg
1.1  mrg /* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
1.1  mrg GSYM($$divU_10)
1.1  mrg 	.export		$$divU_10,millicode
1.1  mrg 	extru		x2,30,31,x2	/* divide by 2 */
1.1  mrg 	addi		3,x2,t1		/* compute 3*(x2+1) = (3*x2)+3	*/
1.1  mrg 	sh1add		x2,t1,x2	/* multiply by 3 to get started */
1.1  mrg 	addc		0,0,x1
1.1  mrg LSYM(pos)
1.1  mrg 	shd		x1,x2,28,t1	/* multiply by 0x11 */
1.1  mrg 	shd		x2,0,28,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	addc		x1,t1,x1
1.1  mrg LSYM(pos_for_17)
1.1  mrg 	shd		x1,x2,24,t1	/* multiply by 0x101 */
1.1  mrg 	shd		x2,0,24,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	addc		x1,t1,x1
1.1  mrg
1.1  mrg 	shd		x1,x2,16,t1	/* multiply by 0x10001 */
1.1  mrg 	shd		x2,0,16,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	MILLIRET
1.1  mrg 	addc		x1,t1,x1
1.1  mrg
1.1  mrg GSYM($$divI_10)
1.1  mrg 	.export		$$divI_10,millicode
1.1  mrg 	comb,<		x2,0,LREF(neg10)
1.1  mrg 	copy		0,x1
1.1  mrg 	extru		x2,30,31,x2	/* divide by 2 */
1.1  mrg 	addib,TR	1,x2,LREF(pos)	/* add 1 (cannot overflow)     */
1.1  mrg 	sh1add		x2,x2,x2	/* multiply by 3 to get started */
1.1  mrg
1.1  mrg LSYM(neg10)
1.1  mrg 	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
1.1  mrg 					/* negation and adding 1 are done */
1.1  mrg 					/* at the same time by the SUBI   */
1.1  mrg 	extru		x2,30,31,x2
1.1  mrg 	sh1add		x2,x2,x2	/* multiply by 3 to get started */
1.1  mrg LSYM(neg)
1.1  mrg 	shd		x1,x2,28,t1	/* multiply by 0x11 */
1.1  mrg 	shd		x2,0,28,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	addc		x1,t1,x1
1.1  mrg LSYM(neg_for_17)
1.1  mrg 	shd		x1,x2,24,t1	/* multiply by 0x101 */
1.1  mrg 	shd		x2,0,24,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	addc		x1,t1,x1
1.1  mrg
1.1  mrg 	shd		x1,x2,16,t1	/* multiply by 0x10001 */
1.1  mrg 	shd		x2,0,16,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	addc		x1,t1,x1
1.1  mrg 	MILLIRET
1.1  mrg 	sub		0,x1,x1
1.1  mrg
1.1  mrg /* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
1.1  mrg GSYM($$divI_12)
1.1  mrg 	.export		$$divI_12,millicode
1.1  mrg 	comb,<		x2,0,LREF(neg12)
1.1  mrg 	copy		0,x1
1.1  mrg 	extru		x2,29,30,x2	/* divide by 4			*/
1.1  mrg 	addib,tr	1,x2,LREF(pos)	/* compute 5*(x2+1) = 5*x2+5    */
1.1  mrg 	sh2add		x2,x2,x2	/* multiply by 5 to get started */
1.1  mrg
1.1  mrg LSYM(neg12)
1.1  mrg 	subi		4,x2,x2		/* negate, divide by 4, and add 1 */
1.1  mrg 					/* negation and adding 1 are done */
1.1  mrg 					/* at the same time by the SUBI   */
1.1  mrg 	extru		x2,29,30,x2
1.1  mrg 	b		LREF(neg)
1.1  mrg 	sh2add		x2,x2,x2	/* multiply by 5 to get started */
1.1  mrg
1.1  mrg GSYM($$divU_12)
1.1  mrg 	.export		$$divU_12,millicode
1.1  mrg 	extru		x2,29,30,x2	/* divide by 4   */
1.1  mrg 	addi		5,x2,t1		/* cannot carry */
1.1  mrg 	sh2add		x2,t1,x2	/* multiply by 5 to get started */
1.1  mrg 	b		LREF(pos)
1.1  mrg 	addc		0,0,x1
1.1  mrg
1.1  mrg /* DIVISION BY 15 (use z = 2**32; a = 11111111) */
1.1  mrg GSYM($$divI_15)
1.1  mrg 	.export		$$divI_15,millicode
1.1  mrg 	comb,<		x2,0,LREF(neg15)
1.1  mrg 	copy		0,x1
1.1  mrg 	addib,tr	1,x2,LREF(pos)+4
1.1  mrg 	shd		x1,x2,28,t1
1.1  mrg
1.1  mrg LSYM(neg15)
1.1  mrg 	b		LREF(neg)
1.1  mrg 	subi		1,x2,x2
1.1  mrg
1.1  mrg GSYM($$divU_15)
1.1  mrg 	.export		$$divU_15,millicode
1.1  mrg 	addi		1,x2,x2		/* this CAN overflow */
1.1  mrg 	b		LREF(pos)
1.1  mrg 	addc		0,0,x1
1.1  mrg
1.1  mrg /* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */
1.1  mrg GSYM($$divI_17)
1.1  mrg 	.export		$$divI_17,millicode
1.1  mrg 	comb,<,n	x2,0,LREF(neg17)
1.1  mrg 	addi		1,x2,x2		/* this cannot overflow */
1.1  mrg 	shd		0,x2,28,t1	/* multiply by 0xf to get started */
1.1  mrg 	shd		x2,0,28,t2
1.1  mrg 	sub		t2,x2,x2
1.1  mrg 	b		LREF(pos_for_17)
1.1  mrg 	subb		t1,0,x1
1.1  mrg
1.1  mrg LSYM(neg17)
1.1  mrg 	subi		1,x2,x2		/* this cannot overflow */
1.1  mrg 	shd		0,x2,28,t1	/* multiply by 0xf to get started */
1.1  mrg 	shd		x2,0,28,t2
1.1  mrg 	sub		t2,x2,x2
1.1  mrg 	b		LREF(neg_for_17)
1.1  mrg 	subb		t1,0,x1
1.1  mrg
1.1  mrg GSYM($$divU_17)
1.1  mrg 	.export		$$divU_17,millicode
1.1  mrg 	addi		1,x2,x2		/* this CAN overflow */
1.1  mrg 	addc		0,0,x1
1.1  mrg 	shd		x1,x2,28,t1	/* multiply by 0xf to get started */
1.1  mrg LSYM(u17)
1.1  mrg 	shd		x2,0,28,t2
1.1  mrg 	sub		t2,x2,x2
1.1  mrg 	b		LREF(pos_for_17)
1.1  mrg 	subb		t1,x1,x1
1.1  mrg
1.1  mrg
1.1  mrg /* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
1.1  mrg    includes 7,9 and also 14
1.1  mrg
1.1  mrg
1.1  mrg    z = 2**24-1
1.1  mrg    r = z mod x = 0
1.1  mrg
1.1  mrg    so choose b = 0
1.1  mrg
1.1  mrg    Also, in order to divide by z = 2**24-1, we approximate by dividing
1.1  mrg    by (z+1) = 2**24 (which is easy), and then correcting.
1.1  mrg
1.1  mrg    (ax) = (z+1)q' + r
1.1  mrg    .	= zq' + (q'+r)
1.1  mrg
1.1  mrg    So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
1.1  mrg    Then the true remainder of (ax)/z is (q'+r).  Repeat the process
1.1  mrg    with this new remainder, adding the tentative quotients together,
1.1  mrg    until a tentative quotient is 0 (and then we are done).  There is
1.1  mrg    one last correction to be done.  It is possible that (q'+r) = z.
1.1  mrg    If so, then (q'+r)/(z+1) = 0 and it looks like we are done.	But,
1.1  mrg    in fact, we need to add 1 more to the quotient.  Now, it turns
1.1  mrg    out that this happens if and only if the original value x is
1.1  mrg    an exact multiple of y.  So, to avoid a three instruction test at
1.1  mrg    the end, instead use 1 instruction to add 1 to x at the beginning.  */
1.1  mrg
1.1  mrg /* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
1.1  mrg GSYM($$divI_7)
1.1  mrg 	.export		$$divI_7,millicode
1.1  mrg 	comb,<,n	x2,0,LREF(neg7)
1.1  mrg LSYM(7)
1.1  mrg 	addi		1,x2,x2		/* cannot overflow */
1.1  mrg 	shd		0,x2,29,x1
1.1  mrg 	sh3add		x2,x2,x2
1.1  mrg 	addc		x1,0,x1
1.1  mrg LSYM(pos7)
1.1  mrg 	shd		x1,x2,26,t1
1.1  mrg 	shd		x2,0,26,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	addc		x1,t1,x1
1.1  mrg
1.1  mrg 	shd		x1,x2,20,t1
1.1  mrg 	shd		x2,0,20,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	addc		x1,t1,t1
1.1  mrg
1.1  mrg 	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
1.1  mrg
1.1  mrg 	copy		0,x1
1.1  mrg 	shd,=		t1,x2,24,t1	/* tentative quotient  */
1.1  mrg LSYM(1)
1.1  mrg 	addb,tr		t1,x1,LREF(2)	/* add to previous quotient   */
1.1  mrg 	extru		x2,31,24,x2	/* new remainder (unadjusted) */
1.1  mrg
1.1  mrg 	MILLIRETN
1.1  mrg
1.1  mrg LSYM(2)
1.1  mrg 	addb,tr		t1,x2,LREF(1)	/* adjust remainder */
1.1  mrg 	extru,=		x2,7,8,t1	/* new quotient     */
1.1  mrg
1.1  mrg LSYM(neg7)
1.1  mrg 	subi		1,x2,x2		/* negate x2 and add 1 */
1.1  mrg LSYM(8)
1.1  mrg 	shd		0,x2,29,x1
1.1  mrg 	sh3add		x2,x2,x2
1.1  mrg 	addc		x1,0,x1
1.1  mrg
1.1  mrg LSYM(neg7_shift)
1.1  mrg 	shd		x1,x2,26,t1
1.1  mrg 	shd		x2,0,26,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	addc		x1,t1,x1
1.1  mrg
1.1  mrg 	shd		x1,x2,20,t1
1.1  mrg 	shd		x2,0,20,t2
1.1  mrg 	add		x2,t2,x2
1.1  mrg 	addc		x1,t1,t1
1.1  mrg
1.1  mrg 	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
1.1  mrg
1.1  mrg 	copy		0,x1
1.1  mrg 	shd,=		t1,x2,24,t1	/* tentative quotient  */
1.1  mrg LSYM(3)
1.1  mrg 	addb,tr		t1,x1,LREF(4)	/* add to previous quotient   */
1.1  mrg 	extru		x2,31,24,x2	/* new remainder (unadjusted) */
1.1  mrg
1.1  mrg 	MILLIRET
1.1  mrg 	sub		0,x1,x1		/* negate result    */
1.1  mrg
1.1  mrg LSYM(4)
1.1  mrg 	addb,tr		t1,x2,LREF(3)	/* adjust remainder */
1.1  mrg 	extru,=		x2,7,8,t1	/* new quotient     */
1.1  mrg
1.1  mrg GSYM($$divU_7)
1.1  mrg 	.export		$$divU_7,millicode
1.1  mrg 	addi		1,x2,x2		/* can carry */
1.1  mrg 	addc		0,0,x1
1.1  mrg 	shd		x1,x2,29,t1
1.1  mrg 	sh3add		x2,x2,x2
1.1  mrg 	b		LREF(pos7)
1.1  mrg 	addc		t1,x1,x1
1.1  mrg
1.1  mrg /* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
1.1  mrg GSYM($$divI_9)
1.1  mrg 	.export		$$divI_9,millicode
1.1  mrg 	comb,<,n	x2,0,LREF(neg9)
1.1  mrg 	addi		1,x2,x2		/* cannot overflow */
1.1  mrg 	shd		0,x2,29,t1
1.1  mrg 	shd		x2,0,29,t2
1.1  mrg 	sub		t2,x2,x2
1.1  mrg 	b		LREF(pos7)
1.1  mrg 	subb		t1,0,x1
1.1  mrg
1.1  mrg LSYM(neg9)
1.1  mrg 	subi		1,x2,x2		/* negate and add 1 */
1.1  mrg 	shd		0,x2,29,t1
1.1  mrg 	shd		x2,0,29,t2
1.1  mrg 	sub		t2,x2,x2
1.1  mrg 	b		LREF(neg7_shift)
1.1  mrg 	subb		t1,0,x1
1.1  mrg
1.1  mrg GSYM($$divU_9)
1.1  mrg 	.export		$$divU_9,millicode
1.1  mrg 	addi		1,x2,x2		/* can carry */
1.1  mrg 	addc		0,0,x1
1.1  mrg 	shd		x1,x2,29,t1
1.1  mrg 	shd		x2,0,29,t2
1.1  mrg 	sub		t2,x2,x2
1.1  mrg 	b		LREF(pos7)
1.1  mrg 	subb		t1,x1,x1
1.1  mrg
1.1  mrg /* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
1.1  mrg GSYM($$divI_14)
1.1  mrg 	.export		$$divI_14,millicode
1.1  mrg 	comb,<,n	x2,0,LREF(neg14)
1.1  mrg GSYM($$divU_14)
1.1  mrg 	.export		$$divU_14,millicode
1.1  mrg 	b		LREF(7)		/* go to 7 case */
1.1  mrg 	extru		x2,30,31,x2	/* divide by 2  */
1.1  mrg
1.1  mrg LSYM(neg14)
1.1  mrg 	subi		2,x2,x2		/* negate (and add 2) */
1.1  mrg 	b		LREF(8)
1.1  mrg 	extru		x2,30,31,x2	/* divide by 2	      */
1.1  mrg 	.exit
1.1  mrg 	.procend
1.1  mrg 	.end
1.1  mrg #endif
1.1  mrg
1.1  mrg #ifdef L_mulI
1.1  mrg /* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
1.1  mrg /******************************************************************************
1.1  mrg This routine is used on PA2.0 processors when gcc -mno-fpregs is used
1.1  mrg
1.1  mrg ROUTINE:	$$mulI
1.1  mrg
1.1  mrg
1.1  mrg DESCRIPTION:
1.1  mrg
1.1  mrg 	$$mulI multiplies two single word integers, giving a single
1.1  mrg 	word result.
1.1  mrg
1.1  mrg
1.1  mrg INPUT REGISTERS:
1.1  mrg
1.1  mrg 	arg0 = Operand 1
1.1  mrg 	arg1 = Operand 2
1.1  mrg 	r31  == return pc
1.1  mrg 	sr0  == return space when called externally
1.1  mrg
1.1  mrg
1.1  mrg OUTPUT REGISTERS:
1.1  mrg
1.1  mrg 	arg0 = undefined
1.1  mrg 	arg1 = undefined
1.1  mrg 	ret1 = result
1.1  mrg
1.1  mrg OTHER REGISTERS AFFECTED:
1.1  mrg
1.1  mrg 	r1   = undefined
1.1  mrg
1.1  mrg SIDE EFFECTS:
1.1  mrg
1.1  mrg 	Causes a trap under the following conditions:  NONE
1.1  mrg 	Changes memory at the following places:  NONE
1.1  mrg
1.1  mrg PERMISSIBLE CONTEXT:
1.1  mrg
1.1  mrg 	Unwindable
1.1  mrg 	Does not create a stack frame
1.1  mrg 	Is usable for internal or external microcode
1.1  mrg
1.1  mrg DISCUSSION:
1.1  mrg
1.1  mrg 	Calls other millicode routines via mrp:  NONE
1.1  mrg 	Calls other millicode routines:  NONE
1.1  mrg
1.1  mrg ***************************************************************************/
1.1  mrg
1.1  mrg
1.1  mrg #define	a0	%arg0
1.1  mrg #define	a1	%arg1
1.1  mrg #define	t0	%r1
1.1  mrg #define	r	%ret1
1.1  mrg
1.1  mrg #define	a0__128a0	zdep	a0,24,25,a0
1.1  mrg #define	a0__256a0	zdep	a0,23,24,a0
1.1  mrg #define	a1_ne_0_b_l0	comb,<>	a1,0,LREF(l0)
1.1  mrg #define	a1_ne_0_b_l1	comb,<>	a1,0,LREF(l1)
1.1  mrg #define	a1_ne_0_b_l2	comb,<>	a1,0,LREF(l2)
1.1  mrg #define	b_n_ret_t0	b,n	LREF(ret_t0)
1.1  mrg #define	b_e_shift	b	LREF(e_shift)
1.1  mrg #define	b_e_t0ma0	b	LREF(e_t0ma0)
1.1  mrg #define	b_e_t0		b	LREF(e_t0)
1.1  mrg #define	b_e_t0a0	b	LREF(e_t0a0)
1.1  mrg #define	b_e_t02a0	b	LREF(e_t02a0)
1.1  mrg #define	b_e_t04a0	b	LREF(e_t04a0)
1.1  mrg #define	b_e_2t0		b	LREF(e_2t0)
1.1  mrg #define	b_e_2t0a0	b	LREF(e_2t0a0)
1.1  mrg #define	b_e_2t04a0	b	LREF(e2t04a0)
1.1  mrg #define	b_e_3t0		b	LREF(e_3t0)
1.1  mrg #define	b_e_4t0		b	LREF(e_4t0)
1.1  mrg #define	b_e_4t0a0	b	LREF(e_4t0a0)
1.1  mrg #define	b_e_4t08a0	b	LREF(e4t08a0)
1.1  mrg #define	b_e_5t0		b	LREF(e_5t0)
1.1  mrg #define	b_e_8t0		b	LREF(e_8t0)
1.1  mrg #define	b_e_8t0a0	b	LREF(e_8t0a0)
1.1  mrg #define	r__r_a0		add	r,a0,r
1.1  mrg #define	r__r_2a0	sh1add	a0,r,r
1.1  mrg #define	r__r_4a0	sh2add	a0,r,r
1.1  mrg #define	r__r_8a0	sh3add	a0,r,r
1.1  mrg #define	r__r_t0		add	r,t0,r
1.1  mrg #define	r__r_2t0	sh1add	t0,r,r
1.1  mrg #define	r__r_4t0	sh2add	t0,r,r
1.1  mrg #define	r__r_8t0	sh3add	t0,r,r
1.1  mrg #define	t0__3a0		sh1add	a0,a0,t0
1.1  mrg #define	t0__4a0		sh2add	a0,0,t0
1.1  mrg #define	t0__5a0		sh2add	a0,a0,t0
1.1  mrg #define	t0__8a0		sh3add	a0,0,t0
1.1  mrg #define	t0__9a0		sh3add	a0,a0,t0
1.1  mrg #define	t0__16a0	zdep	a0,27,28,t0
1.1  mrg #define	t0__32a0	zdep	a0,26,27,t0
1.1  mrg #define	t0__64a0	zdep	a0,25,26,t0
1.1  mrg #define	t0__128a0	zdep	a0,24,25,t0
1.1  mrg #define	t0__t0ma0	sub	t0,a0,t0
1.1  mrg #define	t0__t0_a0	add	t0,a0,t0
1.1  mrg #define	t0__t0_2a0	sh1add	a0,t0,t0
1.1  mrg #define	t0__t0_4a0	sh2add	a0,t0,t0
1.1  mrg #define	t0__t0_8a0	sh3add	a0,t0,t0
1.1  mrg #define	t0__2t0_a0	sh1add	t0,a0,t0
1.1  mrg #define	t0__3t0		sh1add	t0,t0,t0
1.1  mrg #define	t0__4t0		sh2add	t0,0,t0
1.1  mrg #define	t0__4t0_a0	sh2add	t0,a0,t0
1.1  mrg #define	t0__5t0		sh2add	t0,t0,t0
1.1  mrg #define	t0__8t0		sh3add	t0,0,t0
1.1  mrg #define	t0__8t0_a0	sh3add	t0,a0,t0
1.1  mrg #define	t0__9t0		sh3add	t0,t0,t0
1.1  mrg #define	t0__16t0	zdep	t0,27,28,t0
1.1  mrg #define	t0__32t0	zdep	t0,26,27,t0
1.1  mrg #define	t0__256a0	zdep	a0,23,24,t0
1.1  mrg
1.1  mrg
1.1  mrg 	SUBSPA_MILLI
1.1  mrg 	ATTR_MILLI
1.1  mrg 	.align 16
1.1  mrg 	.proc
1.1  mrg 	.callinfo millicode
1.1  mrg 	.export $$mulI,millicode
1.1  mrg GSYM($$mulI)
1.1  mrg 	combt,<<=	a1,a0,LREF(l4)	/* swap args if unsigned a1>a0 */
1.1  mrg 	copy		0,r		/* zero out the result */
1.1  mrg 	xor		a0,a1,a0	/* swap a0 & a1 using the */
1.1  mrg 	xor		a0,a1,a1	/*  old xor trick */
1.1  mrg 	xor		a0,a1,a0
1.1  mrg LSYM(l4)
1.1  mrg 	combt,<=	0,a0,LREF(l3)		/* if a0>=0 then proceed like unsigned */
1.1  mrg 	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
1.1  mrg 	sub,>		0,a1,t0		/* otherwise negate both and */
1.1  mrg 	combt,<=,n	a0,t0,LREF(l2)	/*  swap back if |a0|<|a1| */
1.1  mrg 	sub		0,a0,a1
1.1  mrg 	movb,tr,n	t0,a0,LREF(l2)	/* 10th inst.  */
1.1  mrg
1.1  mrg LSYM(l0)	r__r_t0				/* add in this partial product */
1.1  mrg LSYM(l1)	a0__256a0			/* a0 <<= 8 ****************** */
1.1  mrg LSYM(l2)	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
1.1  mrg LSYM(l3)	blr		t0,0		/* case on these 8 bits ****** */
1.1  mrg 		extru		a1,23,24,a1	/* a1 >>= 8 ****************** */
1.1  mrg
1.1  mrg /*16 insts before this.  */
1.1  mrg /*			  a0 <<= 8 ************************** */
1.1  mrg LSYM(x0)	a1_ne_0_b_l2	! a0__256a0	! MILLIRETN	! nop
1.1  mrg LSYM(x1)	a1_ne_0_b_l1	! r__r_a0	! MILLIRETN	! nop
1.1  mrg LSYM(x2)	a1_ne_0_b_l1	! r__r_2a0	! MILLIRETN	! nop
1.1  mrg LSYM(x3)	a1_ne_0_b_l0	! t0__3a0	! MILLIRET	! r__r_t0
1.1  mrg LSYM(x4)	a1_ne_0_b_l1	! r__r_4a0	! MILLIRETN	! nop
1.1  mrg LSYM(x5)	a1_ne_0_b_l0	! t0__5a0	! MILLIRET	! r__r_t0
1.1  mrg LSYM(x6)	t0__3a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
1.1  mrg LSYM(x7)	t0__3a0		! a1_ne_0_b_l0	! r__r_4a0	! b_n_ret_t0
1.1  mrg LSYM(x8)	a1_ne_0_b_l1	! r__r_8a0	! MILLIRETN	! nop
1.1  mrg LSYM(x9)	a1_ne_0_b_l0	! t0__9a0	! MILLIRET	! r__r_t0
1.1  mrg LSYM(x10)	t0__5a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
1.1  mrg LSYM(x11)	t0__3a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
1.1  mrg LSYM(x12)	t0__3a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
1.1  mrg LSYM(x13)	t0__5a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
1.1  mrg LSYM(x14)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x15)	t0__5a0		! a1_ne_0_b_l0	! t0__3t0	! b_n_ret_t0
1.1  mrg LSYM(x16)	t0__16a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
1.1  mrg LSYM(x17)	t0__9a0		! a1_ne_0_b_l0	! t0__t0_8a0	! b_n_ret_t0
1.1  mrg LSYM(x18)	t0__9a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
1.1  mrg LSYM(x19)	t0__9a0		! a1_ne_0_b_l0	! t0__2t0_a0	! b_n_ret_t0
1.1  mrg LSYM(x20)	t0__5a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
1.1  mrg LSYM(x21)	t0__5a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
1.1  mrg LSYM(x22)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x23)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x24)	t0__3a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
1.1  mrg LSYM(x25)	t0__5a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
1.1  mrg LSYM(x26)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x27)	t0__3a0		! a1_ne_0_b_l0	! t0__9t0	! b_n_ret_t0
1.1  mrg LSYM(x28)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x29)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x30)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x31)	t0__32a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
1.1  mrg LSYM(x32)	t0__32a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
1.1  mrg LSYM(x33)	t0__8a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
1.1  mrg LSYM(x34)	t0__16a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x35)	t0__9a0		! t0__3t0	! b_e_t0	! t0__t0_8a0
1.1  mrg LSYM(x36)	t0__9a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
1.1  mrg LSYM(x37)	t0__9a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
1.1  mrg LSYM(x38)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x39)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x40)	t0__5a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
1.1  mrg LSYM(x41)	t0__5a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
1.1  mrg LSYM(x42)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x43)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x44)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x45)	t0__9a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
1.1  mrg LSYM(x46)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_a0
1.1  mrg LSYM(x47)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_2a0
1.1  mrg LSYM(x48)	t0__3a0		! a1_ne_0_b_l0	! t0__16t0	! b_n_ret_t0
1.1  mrg LSYM(x49)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_4a0
1.1  mrg LSYM(x50)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x51)	t0__9a0		! t0__t0_8a0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x52)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x53)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x54)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x55)	t0__9a0		! t0__3t0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x56)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x57)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x58)	t0__3a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
1.1  mrg LSYM(x59)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__3t0
1.1  mrg LSYM(x60)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x61)	t0__5a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x62)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x63)	t0__64a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
1.1  mrg LSYM(x64)	t0__64a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
1.1  mrg LSYM(x65)	t0__8a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
1.1  mrg LSYM(x66)	t0__32a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x67)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x68)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x69)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x70)	t0__64a0	! t0__t0_4a0	! b_e_t0	! t0__t0_2a0
1.1  mrg LSYM(x71)	t0__9a0		! t0__8t0	! b_e_t0	! t0__t0ma0
1.1  mrg LSYM(x72)	t0__9a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
1.1  mrg LSYM(x73)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_t0
1.1  mrg LSYM(x74)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x75)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x76)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x77)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x78)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__2t0_a0
1.1  mrg LSYM(x79)	t0__16a0	! t0__5t0	! b_e_t0	! t0__t0ma0
1.1  mrg LSYM(x80)	t0__16a0	! t0__5t0	! b_e_shift	! r__r_t0
1.1  mrg LSYM(x81)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_t0
1.1  mrg LSYM(x82)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x83)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x84)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x85)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x86)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
1.1  mrg LSYM(x87)	t0__9a0		! t0__9t0	! b_e_t02a0	! t0__t0_4a0
1.1  mrg LSYM(x88)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x89)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
1.1  mrg LSYM(x90)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x91)	t0__9a0		! t0__5t0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x92)	t0__5a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
1.1  mrg LSYM(x93)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x94)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__t0_2a0
1.1  mrg LSYM(x95)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x96)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x97)	t0__8a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x98)	t0__32a0	! t0__3t0	! b_e_t0	! t0__t0_2a0
1.1  mrg LSYM(x99)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x100)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x101)	t0__5a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x102)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x103)	t0__5a0		! t0__5t0	! b_e_t02a0	! t0__4t0_a0
1.1  mrg LSYM(x104)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x105)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x106)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__4t0_a0
1.1  mrg LSYM(x107)	t0__9a0		! t0__t0_4a0	! b_e_t02a0	! t0__8t0_a0
1.1  mrg LSYM(x108)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x109)	t0__9a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x110)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__2t0_a0
1.1  mrg LSYM(x111)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x112)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__16t0
1.1  mrg LSYM(x113)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__3t0
1.1  mrg LSYM(x114)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__3t0
1.1  mrg LSYM(x115)	t0__9a0		! t0__2t0_a0	! b_e_2t0a0	! t0__3t0
1.1  mrg LSYM(x116)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__4t0_a0
1.1  mrg LSYM(x117)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
1.1  mrg LSYM(x118)	t0__3a0		! t0__4t0_a0	! b_e_t0a0	! t0__9t0
1.1  mrg LSYM(x119)	t0__3a0		! t0__4t0_a0	! b_e_t02a0	! t0__9t0
1.1  mrg LSYM(x120)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x121)	t0__5a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
1.1  mrg LSYM(x122)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
1.1  mrg LSYM(x123)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x124)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x125)	t0__5a0		! t0__5t0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x126)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x127)	t0__128a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
1.1  mrg LSYM(x128)	t0__128a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
1.1  mrg LSYM(x129)	t0__128a0	! a1_ne_0_b_l0	! t0__t0_a0	! b_n_ret_t0
1.1  mrg LSYM(x130)	t0__64a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x131)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x132)	t0__8a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x133)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x134)	t0__8a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
1.1  mrg LSYM(x135)	t0__9a0		! t0__5t0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x136)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x137)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
1.1  mrg LSYM(x138)	t0__8a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
1.1  mrg LSYM(x139)	t0__8a0		! t0__2t0_a0	! b_e_2t0a0	! t0__4t0_a0
1.1  mrg LSYM(x140)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__5t0
1.1  mrg LSYM(x141)	t0__8a0		! t0__2t0_a0	! b_e_4t0a0	! t0__2t0_a0
1.1  mrg LSYM(x142)	t0__9a0		! t0__8t0	! b_e_2t0	! t0__t0ma0
1.1  mrg LSYM(x143)	t0__16a0	! t0__9t0	! b_e_t0	! t0__t0ma0
1.1  mrg LSYM(x144)	t0__9a0		! t0__8t0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x145)	t0__9a0		! t0__8t0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x146)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x147)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x148)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x149)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x150)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
1.1  mrg LSYM(x151)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__2t0_a0
1.1  mrg LSYM(x152)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x153)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
1.1  mrg LSYM(x154)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
1.1  mrg LSYM(x155)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x156)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
1.1  mrg LSYM(x157)	t0__32a0	! t0__t0ma0	! b_e_t02a0	! t0__5t0
1.1  mrg LSYM(x158)	t0__16a0	! t0__5t0	! b_e_2t0	! t0__t0ma0
1.1  mrg LSYM(x159)	t0__32a0	! t0__5t0	! b_e_t0	! t0__t0ma0
1.1  mrg LSYM(x160)	t0__5a0		! t0__4t0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x161)	t0__8a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x162)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x163)	t0__9a0		! t0__9t0	! b_e_t0	! t0__2t0_a0
1.1  mrg LSYM(x164)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x165)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x166)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__2t0_a0
1.1  mrg LSYM(x167)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__2t0_a0
1.1  mrg LSYM(x168)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x169)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__8t0_a0
1.1  mrg LSYM(x170)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x171)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__9t0
1.1  mrg LSYM(x172)	t0__5a0		! t0__4t0_a0	! b_e_4t0	! t0__2t0_a0
1.1  mrg LSYM(x173)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__9t0
1.1  mrg LSYM(x174)	t0__32a0	! t0__t0_2a0	! b_e_t04a0	! t0__5t0
1.1  mrg LSYM(x175)	t0__8a0		! t0__2t0_a0	! b_e_5t0	! t0__2t0_a0
1.1  mrg LSYM(x176)	t0__5a0		! t0__4t0_a0	! b_e_8t0	! t0__t0_a0
1.1  mrg LSYM(x177)	t0__5a0		! t0__4t0_a0	! b_e_8t0a0	! t0__t0_a0
1.1  mrg LSYM(x178)	t0__5a0		! t0__2t0_a0	! b_e_2t0	! t0__8t0_a0
1.1  mrg LSYM(x179)	t0__5a0		! t0__2t0_a0	! b_e_2t0a0	! t0__8t0_a0
1.1  mrg LSYM(x180)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x181)	t0__9a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x182)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__2t0_a0
1.1  mrg LSYM(x183)	t0__9a0		! t0__5t0	! b_e_2t0a0	! t0__2t0_a0
1.1  mrg LSYM(x184)	t0__5a0		! t0__9t0	! b_e_4t0	! t0__t0_a0
1.1  mrg LSYM(x185)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x186)	t0__32a0	! t0__t0ma0	! b_e_2t0	! t0__3t0
1.1  mrg LSYM(x187)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__5t0
1.1  mrg LSYM(x188)	t0__9a0		! t0__5t0	! b_e_4t0	! t0__t0_2a0
1.1  mrg LSYM(x189)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
1.1  mrg LSYM(x190)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__5t0
1.1  mrg LSYM(x191)	t0__64a0	! t0__3t0	! b_e_t0	! t0__t0ma0
1.1  mrg LSYM(x192)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x193)	t0__8a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
1.1  mrg LSYM(x194)	t0__8a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
1.1  mrg LSYM(x195)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x196)	t0__8a0		! t0__3t0	! b_e_4t0	! t0__2t0_a0
1.1  mrg LSYM(x197)	t0__8a0		! t0__3t0	! b_e_4t0a0	! t0__2t0_a0
1.1  mrg LSYM(x198)	t0__64a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x199)	t0__8a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
1.1  mrg LSYM(x200)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x201)	t0__5a0		! t0__5t0	! b_e_t0	! t0__8t0_a0
1.1  mrg LSYM(x202)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__4t0_a0
1.1  mrg LSYM(x203)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__4t0_a0
1.1  mrg LSYM(x204)	t0__8a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
1.1  mrg LSYM(x205)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x206)	t0__64a0	! t0__t0_4a0	! b_e_t02a0	! t0__3t0
1.1  mrg LSYM(x207)	t0__8a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
1.1  mrg LSYM(x208)	t0__5a0		! t0__5t0	! b_e_8t0	! t0__t0_a0
1.1  mrg LSYM(x209)	t0__5a0		! t0__5t0	! b_e_8t0a0	! t0__t0_a0
1.1  mrg LSYM(x210)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__5t0
1.1  mrg LSYM(x211)	t0__5a0		! t0__4t0_a0	! b_e_2t0a0	! t0__5t0
1.1  mrg LSYM(x212)	t0__3a0		! t0__4t0_a0	! b_e_4t0	! t0__4t0_a0
1.1  mrg LSYM(x213)	t0__3a0		! t0__4t0_a0	! b_e_4t0a0	! t0__4t0_a0
1.1  mrg LSYM(x214)	t0__9a0		! t0__t0_4a0	! b_e_2t04a0	! t0__8t0_a0
1.1  mrg LSYM(x215)	t0__5a0		! t0__4t0_a0	! b_e_5t0	! t0__2t0_a0
1.1  mrg LSYM(x216)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x217)	t0__9a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
1.1  mrg LSYM(x218)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
1.1  mrg LSYM(x219)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x220)	t0__3a0		! t0__9t0	! b_e_4t0	! t0__2t0_a0
1.1  mrg LSYM(x221)	t0__3a0		! t0__9t0	! b_e_4t0a0	! t0__2t0_a0
1.1  mrg LSYM(x222)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__3t0
1.1  mrg LSYM(x223)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
1.1  mrg LSYM(x224)	t0__9a0		! t0__3t0	! b_e_8t0	! t0__t0_a0
1.1  mrg LSYM(x225)	t0__9a0		! t0__5t0	! b_e_t0	! t0__5t0
1.1  mrg LSYM(x226)	t0__3a0		! t0__2t0_a0	! b_e_t02a0	! t0__32t0
1.1  mrg LSYM(x227)	t0__9a0		! t0__5t0	! b_e_t02a0	! t0__5t0
1.1  mrg LSYM(x228)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
1.1  mrg LSYM(x229)	t0__9a0		! t0__2t0_a0	! b_e_4t0a0	! t0__3t0
1.1  mrg LSYM(x230)	t0__9a0		! t0__5t0	! b_e_5t0	! t0__t0_a0
1.1  mrg LSYM(x231)	t0__9a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
1.1  mrg LSYM(x232)	t0__3a0		! t0__2t0_a0	! b_e_8t0	! t0__4t0_a0
1.1  mrg LSYM(x233)	t0__3a0		! t0__2t0_a0	! b_e_8t0a0	! t0__4t0_a0
1.1  mrg LSYM(x234)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__9t0
1.1  mrg LSYM(x235)	t0__3a0		! t0__4t0_a0	! b_e_2t0a0	! t0__9t0
1.1  mrg LSYM(x236)	t0__9a0		! t0__2t0_a0	! b_e_4t08a0	! t0__3t0
1.1  mrg LSYM(x237)	t0__16a0	! t0__5t0	! b_e_3t0	! t0__t0ma0
1.1  mrg LSYM(x238)	t0__3a0		! t0__4t0_a0	! b_e_2t04a0	! t0__9t0
1.1  mrg LSYM(x239)	t0__16a0	! t0__5t0	! b_e_t0ma0	! t0__3t0
1.1  mrg LSYM(x240)	t0__9a0		! t0__t0_a0	! b_e_8t0	! t0__3t0
1.1  mrg LSYM(x241)	t0__9a0		! t0__t0_a0	! b_e_8t0a0	! t0__3t0
1.1  mrg LSYM(x242)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__8t0_a0
1.1  mrg LSYM(x243)	t0__9a0		! t0__9t0	! b_e_t0	! t0__3t0
1.1  mrg LSYM(x244)	t0__5a0		! t0__3t0	! b_e_4t0	! t0__4t0_a0
1.1  mrg LSYM(x245)	t0__8a0		! t0__3t0	! b_e_5t0	! t0__2t0_a0
1.1  mrg LSYM(x246)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__3t0
1.1  mrg LSYM(x247)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__3t0
1.1  mrg LSYM(x248)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_8t0
1.1  mrg LSYM(x249)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__8t0_a0
1.1  mrg LSYM(x250)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__5t0
1.1  mrg LSYM(x251)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__5t0
1.1  mrg LSYM(x252)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
1.1  mrg LSYM(x253)	t0__64a0	! t0__t0ma0	! b_e_t0	! t0__4t0_a0
1.1  mrg LSYM(x254)	t0__128a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
1.1  mrg LSYM(x255)	t0__256a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
1.1  mrg /*1040 insts before this.  */
1.1  mrg LSYM(ret_t0)	MILLIRET
1.1  mrg LSYM(e_t0)	r__r_t0
1.1  mrg LSYM(e_shift)	a1_ne_0_b_l2
1.1  mrg 	a0__256a0	/* a0 <<= 8 *********** */
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(e_t0ma0)	a1_ne_0_b_l0
1.1  mrg 	t0__t0ma0
1.1  mrg 	MILLIRET
1.1  mrg 	r__r_t0
1.1  mrg LSYM(e_t0a0)	a1_ne_0_b_l0
1.1  mrg 	t0__t0_a0
1.1  mrg 	MILLIRET
1.1  mrg 	r__r_t0
1.1  mrg LSYM(e_t02a0)	a1_ne_0_b_l0
1.1  mrg 	t0__t0_2a0
1.1  mrg 	MILLIRET
1.1  mrg 	r__r_t0
1.1  mrg LSYM(e_t04a0)	a1_ne_0_b_l0
1.1  mrg 	t0__t0_4a0
1.1  mrg 	MILLIRET
1.1  mrg 	r__r_t0
1.1  mrg LSYM(e_2t0)	a1_ne_0_b_l1
1.1  mrg 	r__r_2t0
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(e_2t0a0)	a1_ne_0_b_l0
1.1  mrg 	t0__2t0_a0
1.1  mrg 	MILLIRET
1.1  mrg 	r__r_t0
1.1  mrg LSYM(e2t04a0)	t0__t0_2a0
1.1  mrg 	a1_ne_0_b_l1
1.1  mrg 	r__r_2t0
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(e_3t0)	a1_ne_0_b_l0
1.1  mrg 	t0__3t0
1.1  mrg 	MILLIRET
1.1  mrg 	r__r_t0
1.1  mrg LSYM(e_4t0)	a1_ne_0_b_l1
1.1  mrg 	r__r_4t0
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(e_4t0a0)	a1_ne_0_b_l0
1.1  mrg 	t0__4t0_a0
1.1  mrg 	MILLIRET
1.1  mrg 	r__r_t0
1.1  mrg LSYM(e4t08a0)	t0__t0_2a0
1.1  mrg 	a1_ne_0_b_l1
1.1  mrg 	r__r_4t0
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(e_5t0)	a1_ne_0_b_l0
1.1  mrg 	t0__5t0
1.1  mrg 	MILLIRET
1.1  mrg 	r__r_t0
1.1  mrg LSYM(e_8t0)	a1_ne_0_b_l1
1.1  mrg 	r__r_8t0
1.1  mrg 	MILLIRETN
1.1  mrg LSYM(e_8t0a0)	a1_ne_0_b_l0
1.1  mrg 	t0__8t0_a0
1.1  mrg 	MILLIRET
1.1  mrg 	r__r_t0
1.1  mrg
1.1  mrg 	.procend
1.1  mrg 	.end
1.1  mrg #endif