Home | History | Annotate | Line # | Download | only in gen
      1 /*	$NetBSD: mul.S,v 1.1 2005/12/20 19:28:50 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1992, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This software was developed by the Computer Systems Engineering group
      8  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
      9  * contributed to Berkeley.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  * from: Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp
     36  */
     37 
     38 #include <machine/asm.h>
     39 #if defined(LIBC_SCCS) && !defined(lint)
     40 #if 0
     41 	.asciz "@(#)mul.s	8.1 (Berkeley) 6/4/93"
     42 #else
     43 	RCSID("$NetBSD: mul.S,v 1.1 2005/12/20 19:28:50 christos Exp $")
     44 #endif
     45 #endif /* LIBC_SCCS and not lint */
     46 
     47 /*
     48  * Signed multiply, from Appendix E of the Sparc Version 8
     49  * Architecture Manual.
     50  *
     51  * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
     52  * the 64-bit product).
     53  *
     54  * This code optimizes short (less than 13-bit) multiplies.
     55  */
     56 
     57 FUNC(.mul)
     58 	mov	%o0, %y		! multiplier -> Y
     59 	andncc	%o0, 0xfff, %g0	! test bits 12..31
     60 	be	Lmul_shortway	! if zero, can do it the short way
     61 	andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
     62 
     63 	/*
     64 	 * Long multiply.  32 steps, followed by a final shift step.
     65 	 */
     66 	mulscc	%o4, %o1, %o4	! 1
     67 	mulscc	%o4, %o1, %o4	! 2
     68 	mulscc	%o4, %o1, %o4	! 3
     69 	mulscc	%o4, %o1, %o4	! 4
     70 	mulscc	%o4, %o1, %o4	! 5
     71 	mulscc	%o4, %o1, %o4	! 6
     72 	mulscc	%o4, %o1, %o4	! 7
     73 	mulscc	%o4, %o1, %o4	! 8
     74 	mulscc	%o4, %o1, %o4	! 9
     75 	mulscc	%o4, %o1, %o4	! 10
     76 	mulscc	%o4, %o1, %o4	! 11
     77 	mulscc	%o4, %o1, %o4	! 12
     78 	mulscc	%o4, %o1, %o4	! 13
     79 	mulscc	%o4, %o1, %o4	! 14
     80 	mulscc	%o4, %o1, %o4	! 15
     81 	mulscc	%o4, %o1, %o4	! 16
     82 	mulscc	%o4, %o1, %o4	! 17
     83 	mulscc	%o4, %o1, %o4	! 18
     84 	mulscc	%o4, %o1, %o4	! 19
     85 	mulscc	%o4, %o1, %o4	! 20
     86 	mulscc	%o4, %o1, %o4	! 21
     87 	mulscc	%o4, %o1, %o4	! 22
     88 	mulscc	%o4, %o1, %o4	! 23
     89 	mulscc	%o4, %o1, %o4	! 24
     90 	mulscc	%o4, %o1, %o4	! 25
     91 	mulscc	%o4, %o1, %o4	! 26
     92 	mulscc	%o4, %o1, %o4	! 27
     93 	mulscc	%o4, %o1, %o4	! 28
     94 	mulscc	%o4, %o1, %o4	! 29
     95 	mulscc	%o4, %o1, %o4	! 30
     96 	mulscc	%o4, %o1, %o4	! 31
     97 	mulscc	%o4, %o1, %o4	! 32
     98 	mulscc	%o4, %g0, %o4	! final shift
     99 
    100 	! If %o0 was negative, the result is
    101 	!	(%o0 * %o1) + (%o1 << 32))
    102 	! We fix that here.
    103 
    104 	tst	%o0
    105 	bge	1f
    106 	rd	%y, %o0
    107 
    108 	! %o0 was indeed negative; fix upper 32 bits of result by subtracting
    109 	! %o1 (i.e., return %o4 - %o1 in %o1).
    110 	retl
    111 	sub	%o4, %o1, %o1
    112 
    113 1:
    114 	retl
    115 	mov	%o4, %o1
    116 
    117 Lmul_shortway:
    118 	/*
    119 	 * Short multiply.  12 steps, followed by a final shift step.
    120 	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
    121 	 * but there is no problem with %o0 being negative (unlike above).
    122 	 */
    123 	mulscc	%o4, %o1, %o4	! 1
    124 	mulscc	%o4, %o1, %o4	! 2
    125 	mulscc	%o4, %o1, %o4	! 3
    126 	mulscc	%o4, %o1, %o4	! 4
    127 	mulscc	%o4, %o1, %o4	! 5
    128 	mulscc	%o4, %o1, %o4	! 6
    129 	mulscc	%o4, %o1, %o4	! 7
    130 	mulscc	%o4, %o1, %o4	! 8
    131 	mulscc	%o4, %o1, %o4	! 9
    132 	mulscc	%o4, %o1, %o4	! 10
    133 	mulscc	%o4, %o1, %o4	! 11
    134 	mulscc	%o4, %o1, %o4	! 12
    135 	mulscc	%o4, %g0, %o4	! final shift
    136 
    137 	/*
    138 	 *  %o4 has 20 of the bits that should be in the low part of the
    139 	 * result; %y has the bottom 12 (as %y's top 12).  That is:
    140 	 *
    141 	 *	  %o4		    %y
    142 	 * +----------------+----------------+
    143 	 * | -12- |   -20-  | -12- |   -20-  |
    144 	 * +------(---------+------)---------+
    145 	 *  --hi-- ----low-part----
    146 	 *
    147 	 * The upper 12 bits of %o4 should be sign-extended to form the
    148 	 * high part of the product (i.e., highpart = %o4 >> 20).
    149 	 */
    150 
    151 	rd	%y, %o5
    152 	sll	%o4, 12, %o0	! shift middle bits left 12
    153 	srl	%o5, 20, %o5	! shift low bits right 20, zero fill at left
    154 	or	%o5, %o0, %o0	! construct low part of result
    155 	retl
    156 	sra	%o4, 20, %o1	! ... and extract high part of result
    157