mul.S revision 1.1.94.2 1 1.1.94.1 martin /* $NetBSD: mul.S,v 1.1.94.2 2020/04/21 19:37:47 martin Exp $ */
2 1.1 christos
3 1.1 christos /*
4 1.1 christos * Copyright (c) 1992, 1993
5 1.1 christos * The Regents of the University of California. All rights reserved.
6 1.1 christos *
7 1.1 christos * This software was developed by the Computer Systems Engineering group
8 1.1 christos * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
9 1.1 christos * contributed to Berkeley.
10 1.1 christos *
11 1.1 christos * Redistribution and use in source and binary forms, with or without
12 1.1 christos * modification, are permitted provided that the following conditions
13 1.1 christos * are met:
14 1.1 christos * 1. Redistributions of source code must retain the above copyright
15 1.1 christos * notice, this list of conditions and the following disclaimer.
16 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 christos * notice, this list of conditions and the following disclaimer in the
18 1.1 christos * documentation and/or other materials provided with the distribution.
19 1.1 christos * 3. Neither the name of the University nor the names of its contributors
20 1.1 christos * may be used to endorse or promote products derived from this software
21 1.1 christos * without specific prior written permission.
22 1.1 christos *
23 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 1.1 christos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 1.1 christos * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 1.1 christos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 1.1 christos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 1.1 christos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 1.1 christos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 1.1 christos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 1.1 christos * SUCH DAMAGE.
34 1.1 christos *
35 1.1 christos * from: Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp
36 1.1 christos */
37 1.1 christos
38 1.1 christos #include <machine/asm.h>
39 1.1 christos #if defined(LIBC_SCCS) && !defined(lint)
40 1.1 christos #if 0
41 1.1 christos .asciz "@(#)mul.s 8.1 (Berkeley) 6/4/93"
42 1.1 christos #else
43 1.1.94.1 martin RCSID("$NetBSD: mul.S,v 1.1.94.2 2020/04/21 19:37:47 martin Exp $")
44 1.1 christos #endif
45 1.1 christos #endif /* LIBC_SCCS and not lint */
46 1.1 christos
47 1.1 christos /*
48 1.1 christos * Signed multiply, from Appendix E of the Sparc Version 8
49 1.1 christos * Architecture Manual.
50 1.1 christos *
51 1.1 christos * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
52 1.1 christos * the 64-bit product).
53 1.1 christos *
54 1.1 christos * This code optimizes short (less than 13-bit) multiplies.
55 1.1 christos */
56 1.1 christos
57 1.1 christos FUNC(.mul)
58 1.1 christos mov %o0, %y ! multiplier -> Y
59 1.1 christos andncc %o0, 0xfff, %g0 ! test bits 12..31
60 1.1 christos be Lmul_shortway ! if zero, can do it the short way
61 1.1 christos andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
62 1.1 christos
63 1.1 christos /*
64 1.1 christos * Long multiply. 32 steps, followed by a final shift step.
65 1.1 christos */
66 1.1 christos mulscc %o4, %o1, %o4 ! 1
67 1.1 christos mulscc %o4, %o1, %o4 ! 2
68 1.1 christos mulscc %o4, %o1, %o4 ! 3
69 1.1 christos mulscc %o4, %o1, %o4 ! 4
70 1.1 christos mulscc %o4, %o1, %o4 ! 5
71 1.1 christos mulscc %o4, %o1, %o4 ! 6
72 1.1 christos mulscc %o4, %o1, %o4 ! 7
73 1.1 christos mulscc %o4, %o1, %o4 ! 8
74 1.1 christos mulscc %o4, %o1, %o4 ! 9
75 1.1 christos mulscc %o4, %o1, %o4 ! 10
76 1.1 christos mulscc %o4, %o1, %o4 ! 11
77 1.1 christos mulscc %o4, %o1, %o4 ! 12
78 1.1 christos mulscc %o4, %o1, %o4 ! 13
79 1.1 christos mulscc %o4, %o1, %o4 ! 14
80 1.1 christos mulscc %o4, %o1, %o4 ! 15
81 1.1 christos mulscc %o4, %o1, %o4 ! 16
82 1.1 christos mulscc %o4, %o1, %o4 ! 17
83 1.1 christos mulscc %o4, %o1, %o4 ! 18
84 1.1 christos mulscc %o4, %o1, %o4 ! 19
85 1.1 christos mulscc %o4, %o1, %o4 ! 20
86 1.1 christos mulscc %o4, %o1, %o4 ! 21
87 1.1 christos mulscc %o4, %o1, %o4 ! 22
88 1.1 christos mulscc %o4, %o1, %o4 ! 23
89 1.1 christos mulscc %o4, %o1, %o4 ! 24
90 1.1 christos mulscc %o4, %o1, %o4 ! 25
91 1.1 christos mulscc %o4, %o1, %o4 ! 26
92 1.1 christos mulscc %o4, %o1, %o4 ! 27
93 1.1 christos mulscc %o4, %o1, %o4 ! 28
94 1.1 christos mulscc %o4, %o1, %o4 ! 29
95 1.1 christos mulscc %o4, %o1, %o4 ! 30
96 1.1 christos mulscc %o4, %o1, %o4 ! 31
97 1.1 christos mulscc %o4, %o1, %o4 ! 32
98 1.1 christos mulscc %o4, %g0, %o4 ! final shift
99 1.1 christos
100 1.1 christos ! If %o0 was negative, the result is
101 1.1 christos ! (%o0 * %o1) + (%o1 << 32))
102 1.1 christos ! We fix that here.
103 1.1 christos
104 1.1 christos tst %o0
105 1.1 christos bge 1f
106 1.1 christos rd %y, %o0
107 1.1 christos
108 1.1 christos ! %o0 was indeed negative; fix upper 32 bits of result by subtracting
109 1.1 christos ! %o1 (i.e., return %o4 - %o1 in %o1).
110 1.1 christos retl
111 1.1 christos sub %o4, %o1, %o1
112 1.1 christos
113 1.1 christos 1:
114 1.1 christos retl
115 1.1 christos mov %o4, %o1
116 1.1 christos
117 1.1 christos Lmul_shortway:
118 1.1 christos /*
119 1.1 christos * Short multiply. 12 steps, followed by a final shift step.
120 1.1 christos * The resulting bits are off by 12 and (32-12) = 20 bit positions,
121 1.1 christos * but there is no problem with %o0 being negative (unlike above).
122 1.1 christos */
123 1.1 christos mulscc %o4, %o1, %o4 ! 1
124 1.1 christos mulscc %o4, %o1, %o4 ! 2
125 1.1 christos mulscc %o4, %o1, %o4 ! 3
126 1.1 christos mulscc %o4, %o1, %o4 ! 4
127 1.1 christos mulscc %o4, %o1, %o4 ! 5
128 1.1 christos mulscc %o4, %o1, %o4 ! 6
129 1.1 christos mulscc %o4, %o1, %o4 ! 7
130 1.1 christos mulscc %o4, %o1, %o4 ! 8
131 1.1 christos mulscc %o4, %o1, %o4 ! 9
132 1.1 christos mulscc %o4, %o1, %o4 ! 10
133 1.1 christos mulscc %o4, %o1, %o4 ! 11
134 1.1 christos mulscc %o4, %o1, %o4 ! 12
135 1.1 christos mulscc %o4, %g0, %o4 ! final shift
136 1.1 christos
137 1.1 christos /*
138 1.1 christos * %o4 has 20 of the bits that should be in the low part of the
139 1.1 christos * result; %y has the bottom 12 (as %y's top 12). That is:
140 1.1 christos *
141 1.1 christos * %o4 %y
142 1.1 christos * +----------------+----------------+
143 1.1 christos * | -12- | -20- | -12- | -20- |
144 1.1 christos * +------(---------+------)---------+
145 1.1 christos * --hi-- ----low-part----
146 1.1 christos *
147 1.1 christos * The upper 12 bits of %o4 should be sign-extended to form the
148 1.1 christos * high part of the product (i.e., highpart = %o4 >> 20).
149 1.1 christos */
150 1.1 christos
151 1.1 christos rd %y, %o5
152 1.1 christos sll %o4, 12, %o0 ! shift middle bits left 12
153 1.1 christos srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left
154 1.1 christos or %o5, %o0, %o0 ! construct low part of result
155 1.1 christos retl
156 1.1 christos sra %o4, 20, %o1 ! ... and extract high part of result
157