alpha/gen/divrem.m4

1.7  cgd /*	$NetBSD: divrem.m4,v 1.7 1996/10/17 03:08:04 cgd Exp $	*/
1.1  cgd
1.1  cgd /*
1.1  cgd  * Copyright (c) 1994, 1995 Carnegie-Mellon University.
1.1  cgd  * All rights reserved.
1.1  cgd  *
1.1  cgd  * Author: Chris G. Demetriou
1.1  cgd  *
1.1  cgd  * Permission to use, copy, modify and distribute this software and
1.1  cgd  * its documentation is hereby granted, provided that both the copyright
1.1  cgd  * notice and this permission notice appear in all copies of the
1.1  cgd  * software, derivative works or modified versions, and any portions
1.1  cgd  * thereof, and that both notices appear in supporting documentation.
1.1  cgd  *
1.1  cgd  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
1.1  cgd  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
1.1  cgd  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
1.1  cgd  *
1.1  cgd  * Carnegie Mellon requests users of this software to return to
1.1  cgd  *
1.1  cgd  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
1.1  cgd  *  School of Computer Science
1.1  cgd  *  Carnegie Mellon University
1.1  cgd  *  Pittsburgh PA 15213-3890
1.1  cgd  *
1.1  cgd  * any improvements or extensions that they make and grant Carnegie the
1.1  cgd  * rights to redistribute these changes.
1.1  cgd  */
1.1  cgd
1.1  cgd /*
1.1  cgd  * Division and remainder.
1.1  cgd  *
1.1  cgd  * The use of m4 is modeled after the sparc code, but the algorithm is
1.1  cgd  * simple binary long division.
1.1  cgd  *
1.1  cgd  * Note that the loops could probably benefit from unrolling.
1.1  cgd  */
1.1  cgd
1.1  cgd /*
1.1  cgd  * M4 Parameters
1.1  cgd  * NAME		name of function to generate
1.1  cgd  * OP		OP=div: t10 / t11 -> t12; OP=rem: t10 % t11 -> t12
1.4  cgd  * S		S=true: signed; S=false: unsigned
1.1  cgd  * WORDSIZE	total number of bits
1.1  cgd  */
1.1  cgd
1.1  cgd define(A, `t10')
1.1  cgd define(B, `t11')
1.1  cgd define(RESULT, `t12')
1.1  cgd
1.1  cgd define(BIT, `t0')
1.1  cgd define(I, `t1')
1.1  cgd define(CC, `t2')
1.1  cgd define(T_0, `t3')
1.5  cgd ifelse(S, `true', `define(NEG, `t4')')
1.1  cgd
1.7  cgd #include <machine/asm.h>
1.1  cgd
1.1  cgd LEAF(NAME, 0)					/* XXX */
1.3  cgd 	lda	sp, -64(sp)
1.1  cgd 	stq	BIT, 0(sp)
1.1  cgd 	stq	I, 8(sp)
1.1  cgd 	stq	CC, 16(sp)
1.1  cgd 	stq	T_0, 24(sp)
1.1  cgd ifelse(S, `true',
1.5  cgd `	stq	NEG, 32(sp)')
1.3  cgd 	stq	A, 40(sp)
1.3  cgd 	stq	B, 48(sp)
1.1  cgd 	mov	zero, RESULT			/* Initialize result to zero */
1.1  cgd
1.1  cgd ifelse(S, `true',
1.1  cgd `
1.1  cgd 	/* Compute sign of result.  If either is negative, this is easy.  */
1.5  cgd 	or	A, B, NEG			/* not the sign, but... */
1.5  cgd 	srl	NEG, WORDSIZE - 1, NEG		/* rather, or of high bits */
1.5  cgd 	blbc	NEG, Ldoit			/* neither negative? do it! */
1.1  cgd
1.1  cgd ifelse(OP, `div',
1.5  cgd `	xor	A, B, NEG			/* THIS is the sign! */
1.5  cgd ', `	mov	A, NEG				/* sign follows A. */
1.1  cgd ')
1.5  cgd 	srl	NEG, WORDSIZE - 1, NEG		/* make negation the low bit. */
1.5  cgd
1.5  cgd 	srl	A, WORDSIZE - 1, I		/* is A negative? */
1.5  cgd 	blbc	I, LnegB			/* no. */
1.1  cgd 	/* A is negative; flip it. */
1.5  cgd ifelse(WORDSIZE, `32', `
1.5  cgd 	/* top 32 bits may be random junk */
1.5  cgd 	zap	A, 0xf0, A
1.5  cgd ')
1.1  cgd 	subq	zero, A, A
1.5  cgd 	srl	B, WORDSIZE - 1, I		/* is B negative? */
1.5  cgd 	blbc	I, Ldoit			/* no. */
1.1  cgd LnegB:
1.1  cgd 	/* B is definitely negative, no matter how we got here. */
1.5  cgd ifelse(WORDSIZE, `32', `
1.5  cgd 	/* top 32 bits may be random junk */
1.5  cgd 	zap	B, 0xf0, B
1.5  cgd ')
1.1  cgd 	subq	zero, B, B
1.1  cgd Ldoit:
1.5  cgd ')
1.2  cgd ifelse(WORDSIZE, `32', `
1.2  cgd 	/*
1.5  cgd 	 * Clear the top 32 bits of each operand, as they may
1.5  cgd 	 * sign extension (if negated above), or random junk.
1.2  cgd 	 */
1.2  cgd 	zap	A, 0xf0, A
1.2  cgd 	zap	B, 0xf0, B
1.5  cgd ')
1.1  cgd
1.1  cgd 	/* kill the special cases. */
1.4  cgd 	beq	B, Ldotrap			/* division by zero! */
1.1  cgd
1.6  cgd 	cmpult	A, B, CC			/* A < B? */
1.1  cgd 	/* RESULT is already zero, from above.  A is untouched. */
1.1  cgd 	bne	CC, Lret_result
1.1  cgd
1.1  cgd 	cmpeq	A, B, CC			/* A == B? */
1.1  cgd 	cmovne	CC, 1, RESULT
1.1  cgd 	cmovne	CC, zero, A
1.1  cgd 	bne	CC, Lret_result
1.1  cgd
1.1  cgd 	/*
1.1  cgd 	 * Find out how many bits of zeros are at the beginning of the divisor.
1.1  cgd 	 */
1.1  cgd LBbits:
1.7  cgd 	ldiq	T_0, 1				/* I = 0; BIT = 1<<WORDSIZE-1 */
1.1  cgd 	mov	zero, I
1.1  cgd 	sll	T_0, WORDSIZE-1, BIT
1.1  cgd LBloop:
1.1  cgd 	and	B, BIT, CC			/* if bit in B is set, done. */
1.1  cgd 	bne	CC, LAbits
1.1  cgd 	addq	I, 1, I				/* increment I, shift bit */
1.1  cgd 	srl	BIT, 1, BIT
1.1  cgd 	cmplt	I, WORDSIZE-1, CC		/* if I leaves one bit, done. */
1.1  cgd 	bne	CC, LBloop
1.1  cgd
1.1  cgd LAbits:
1.1  cgd 	beq	I, Ldodiv			/* If I = 0, divide now.  */
1.7  cgd 	ldiq	T_0, 1				/* BIT = 1<<WORDSIZE-1 */
1.1  cgd 	sll	T_0, WORDSIZE-1, BIT
1.1  cgd
1.1  cgd LAloop:
1.1  cgd 	and	A, BIT, CC			/* if bit in A is set, done. */
1.1  cgd 	bne	CC, Ldodiv
1.1  cgd 	subq	I, 1, I				/* decrement I, shift bit */
1.1  cgd 	srl     BIT, 1, BIT
1.1  cgd 	bne	I, LAloop			/* If I != 0, loop again */
1.1  cgd
1.1  cgd Ldodiv:
1.1  cgd 	sll	B, I, B				/* B <<= i */
1.7  cgd 	ldiq	T_0, 1
1.1  cgd 	sll	T_0, I, BIT
1.1  cgd
1.1  cgd Ldivloop:
1.1  cgd 	cmpult	A, B, CC
1.1  cgd 	or	RESULT, BIT, T_0
1.1  cgd 	cmoveq	CC, T_0, RESULT
1.1  cgd 	subq	A, B, T_0
1.1  cgd 	cmoveq	CC, T_0, A
1.1  cgd 	srl	BIT, 1, BIT
1.1  cgd 	srl	B, 1, B
1.1  cgd 	beq	A, Lret_result
1.1  cgd 	bne	BIT, Ldivloop
1.1  cgd
1.1  cgd Lret_result:
1.1  cgd ifelse(OP, `div',
1.1  cgd `', `	mov	A, RESULT
1.1  cgd ')
1.1  cgd ifelse(S, `true',
1.1  cgd `
1.1  cgd 	/* Check to see if we should negate it. */
1.1  cgd 	subqv	zero, RESULT, T_0
1.5  cgd 	cmovlbs	NEG, T_0, RESULT
1.1  cgd ')
1.1  cgd
1.1  cgd 	ldq	BIT, 0(sp)
1.1  cgd 	ldq	I, 8(sp)
1.1  cgd 	ldq	CC, 16(sp)
1.1  cgd 	ldq	T_0, 24(sp)
1.1  cgd ifelse(S, `true',
1.5  cgd `	ldq	NEG, 32(sp)')
1.3  cgd 	ldq	A, 40(sp)
1.3  cgd 	ldq	B, 48(sp)
1.3  cgd 	lda	sp, 64(sp)
1.1  cgd 	ret	zero, (t9), 1
1.1  cgd
1.1  cgd Ldotrap:
1.7  cgd 	ldiq	a0, -2			/* This is the signal to SIGFPE! */
1.1  cgd 	call_pal PAL_gentrap
1.2  cgd ifelse(OP, `div',
1.2  cgd `', `	mov	zero, A			/* so that zero will be returned */
1.2  cgd ')
1.1  cgd 	br	zero, Lret_result
1.1  cgd
1.1  cgd END(NAME)