Home | History | Annotate | Line # | Download | only in gen
mul.S revision 1.1.94.2
      1  1.1.94.1    martin /*	$NetBSD: mul.S,v 1.1.94.2 2020/04/21 19:37:47 martin Exp $	*/
      2       1.1  christos 
      3       1.1  christos /*
      4       1.1  christos  * Copyright (c) 1992, 1993
      5       1.1  christos  *	The Regents of the University of California.  All rights reserved.
      6       1.1  christos  *
      7       1.1  christos  * This software was developed by the Computer Systems Engineering group
      8       1.1  christos  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
      9       1.1  christos  * contributed to Berkeley.
     10       1.1  christos  *
     11       1.1  christos  * Redistribution and use in source and binary forms, with or without
     12       1.1  christos  * modification, are permitted provided that the following conditions
     13       1.1  christos  * are met:
     14       1.1  christos  * 1. Redistributions of source code must retain the above copyright
     15       1.1  christos  *    notice, this list of conditions and the following disclaimer.
     16       1.1  christos  * 2. Redistributions in binary form must reproduce the above copyright
     17       1.1  christos  *    notice, this list of conditions and the following disclaimer in the
     18       1.1  christos  *    documentation and/or other materials provided with the distribution.
     19       1.1  christos  * 3. Neither the name of the University nor the names of its contributors
     20       1.1  christos  *    may be used to endorse or promote products derived from this software
     21       1.1  christos  *    without specific prior written permission.
     22       1.1  christos  *
     23       1.1  christos  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24       1.1  christos  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25       1.1  christos  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26       1.1  christos  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27       1.1  christos  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28       1.1  christos  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29       1.1  christos  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30       1.1  christos  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31       1.1  christos  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32       1.1  christos  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33       1.1  christos  * SUCH DAMAGE.
     34       1.1  christos  *
     35       1.1  christos  * from: Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp
     36       1.1  christos  */
     37       1.1  christos 
     38       1.1  christos #include <machine/asm.h>
     39       1.1  christos #if defined(LIBC_SCCS) && !defined(lint)
     40       1.1  christos #if 0
     41       1.1  christos 	.asciz "@(#)mul.s	8.1 (Berkeley) 6/4/93"
     42       1.1  christos #else
     43  1.1.94.1    martin 	RCSID("$NetBSD: mul.S,v 1.1.94.2 2020/04/21 19:37:47 martin Exp $")
     44       1.1  christos #endif
     45       1.1  christos #endif /* LIBC_SCCS and not lint */
     46       1.1  christos 
     47       1.1  christos /*
     48       1.1  christos  * Signed multiply, from Appendix E of the Sparc Version 8
     49       1.1  christos  * Architecture Manual.
     50       1.1  christos  *
     51       1.1  christos  * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
     52       1.1  christos  * the 64-bit product).
     53       1.1  christos  *
     54       1.1  christos  * This code optimizes short (less than 13-bit) multiplies.
     55       1.1  christos  */
     56       1.1  christos 
     57       1.1  christos FUNC(.mul)
     58       1.1  christos 	mov	%o0, %y		! multiplier -> Y
     59       1.1  christos 	andncc	%o0, 0xfff, %g0	! test bits 12..31
     60       1.1  christos 	be	Lmul_shortway	! if zero, can do it the short way
     61       1.1  christos 	andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
     62       1.1  christos 
     63       1.1  christos 	/*
     64       1.1  christos 	 * Long multiply.  32 steps, followed by a final shift step.
     65       1.1  christos 	 */
     66       1.1  christos 	mulscc	%o4, %o1, %o4	! 1
     67       1.1  christos 	mulscc	%o4, %o1, %o4	! 2
     68       1.1  christos 	mulscc	%o4, %o1, %o4	! 3
     69       1.1  christos 	mulscc	%o4, %o1, %o4	! 4
     70       1.1  christos 	mulscc	%o4, %o1, %o4	! 5
     71       1.1  christos 	mulscc	%o4, %o1, %o4	! 6
     72       1.1  christos 	mulscc	%o4, %o1, %o4	! 7
     73       1.1  christos 	mulscc	%o4, %o1, %o4	! 8
     74       1.1  christos 	mulscc	%o4, %o1, %o4	! 9
     75       1.1  christos 	mulscc	%o4, %o1, %o4	! 10
     76       1.1  christos 	mulscc	%o4, %o1, %o4	! 11
     77       1.1  christos 	mulscc	%o4, %o1, %o4	! 12
     78       1.1  christos 	mulscc	%o4, %o1, %o4	! 13
     79       1.1  christos 	mulscc	%o4, %o1, %o4	! 14
     80       1.1  christos 	mulscc	%o4, %o1, %o4	! 15
     81       1.1  christos 	mulscc	%o4, %o1, %o4	! 16
     82       1.1  christos 	mulscc	%o4, %o1, %o4	! 17
     83       1.1  christos 	mulscc	%o4, %o1, %o4	! 18
     84       1.1  christos 	mulscc	%o4, %o1, %o4	! 19
     85       1.1  christos 	mulscc	%o4, %o1, %o4	! 20
     86       1.1  christos 	mulscc	%o4, %o1, %o4	! 21
     87       1.1  christos 	mulscc	%o4, %o1, %o4	! 22
     88       1.1  christos 	mulscc	%o4, %o1, %o4	! 23
     89       1.1  christos 	mulscc	%o4, %o1, %o4	! 24
     90       1.1  christos 	mulscc	%o4, %o1, %o4	! 25
     91       1.1  christos 	mulscc	%o4, %o1, %o4	! 26
     92       1.1  christos 	mulscc	%o4, %o1, %o4	! 27
     93       1.1  christos 	mulscc	%o4, %o1, %o4	! 28
     94       1.1  christos 	mulscc	%o4, %o1, %o4	! 29
     95       1.1  christos 	mulscc	%o4, %o1, %o4	! 30
     96       1.1  christos 	mulscc	%o4, %o1, %o4	! 31
     97       1.1  christos 	mulscc	%o4, %o1, %o4	! 32
     98       1.1  christos 	mulscc	%o4, %g0, %o4	! final shift
     99       1.1  christos 
    100       1.1  christos 	! If %o0 was negative, the result is
    101       1.1  christos 	!	(%o0 * %o1) + (%o1 << 32))
    102       1.1  christos 	! We fix that here.
    103       1.1  christos 
    104       1.1  christos 	tst	%o0
    105       1.1  christos 	bge	1f
    106       1.1  christos 	rd	%y, %o0
    107       1.1  christos 
    108       1.1  christos 	! %o0 was indeed negative; fix upper 32 bits of result by subtracting
    109       1.1  christos 	! %o1 (i.e., return %o4 - %o1 in %o1).
    110       1.1  christos 	retl
    111       1.1  christos 	sub	%o4, %o1, %o1
    112       1.1  christos 
    113       1.1  christos 1:
    114       1.1  christos 	retl
    115       1.1  christos 	mov	%o4, %o1
    116       1.1  christos 
    117       1.1  christos Lmul_shortway:
    118       1.1  christos 	/*
    119       1.1  christos 	 * Short multiply.  12 steps, followed by a final shift step.
    120       1.1  christos 	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
    121       1.1  christos 	 * but there is no problem with %o0 being negative (unlike above).
    122       1.1  christos 	 */
    123       1.1  christos 	mulscc	%o4, %o1, %o4	! 1
    124       1.1  christos 	mulscc	%o4, %o1, %o4	! 2
    125       1.1  christos 	mulscc	%o4, %o1, %o4	! 3
    126       1.1  christos 	mulscc	%o4, %o1, %o4	! 4
    127       1.1  christos 	mulscc	%o4, %o1, %o4	! 5
    128       1.1  christos 	mulscc	%o4, %o1, %o4	! 6
    129       1.1  christos 	mulscc	%o4, %o1, %o4	! 7
    130       1.1  christos 	mulscc	%o4, %o1, %o4	! 8
    131       1.1  christos 	mulscc	%o4, %o1, %o4	! 9
    132       1.1  christos 	mulscc	%o4, %o1, %o4	! 10
    133       1.1  christos 	mulscc	%o4, %o1, %o4	! 11
    134       1.1  christos 	mulscc	%o4, %o1, %o4	! 12
    135       1.1  christos 	mulscc	%o4, %g0, %o4	! final shift
    136       1.1  christos 
    137       1.1  christos 	/*
    138       1.1  christos 	 *  %o4 has 20 of the bits that should be in the low part of the
    139       1.1  christos 	 * result; %y has the bottom 12 (as %y's top 12).  That is:
    140       1.1  christos 	 *
    141       1.1  christos 	 *	  %o4		    %y
    142       1.1  christos 	 * +----------------+----------------+
    143       1.1  christos 	 * | -12- |   -20-  | -12- |   -20-  |
    144       1.1  christos 	 * +------(---------+------)---------+
    145       1.1  christos 	 *  --hi-- ----low-part----
    146       1.1  christos 	 *
    147       1.1  christos 	 * The upper 12 bits of %o4 should be sign-extended to form the
    148       1.1  christos 	 * high part of the product (i.e., highpart = %o4 >> 20).
    149       1.1  christos 	 */
    150       1.1  christos 
    151       1.1  christos 	rd	%y, %o5
    152       1.1  christos 	sll	%o4, 12, %o0	! shift middle bits left 12
    153       1.1  christos 	srl	%o5, 20, %o5	! shift low bits right 20, zero fill at left
    154       1.1  christos 	or	%o5, %o0, %o0	! construct low part of result
    155       1.1  christos 	retl
    156       1.1  christos 	sra	%o4, 20, %o1	! ... and extract high part of result
    157