divrem.m4 revision 1.2 1 1.2 chs /* $NetBSD: divrem.m4,v 1.2 2002/10/27 18:41:27 chs Exp $ */
2 1.1 eeh
3 1.1 eeh /*
4 1.1 eeh * Copyright (c) 1992, 1993
5 1.1 eeh * The Regents of the University of California. All rights reserved.
6 1.1 eeh *
7 1.1 eeh * This software was developed by the Computer Systems Engineering group
8 1.1 eeh * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
9 1.1 eeh * contributed to Berkeley.
10 1.1 eeh *
11 1.1 eeh * Redistribution and use in source and binary forms, with or without
12 1.1 eeh * modification, are permitted provided that the following conditions
13 1.1 eeh * are met:
14 1.1 eeh * 1. Redistributions of source code must retain the above copyright
15 1.1 eeh * notice, this list of conditions and the following disclaimer.
16 1.1 eeh * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 eeh * notice, this list of conditions and the following disclaimer in the
18 1.1 eeh * documentation and/or other materials provided with the distribution.
19 1.1 eeh * 3. All advertising materials mentioning features or use of this software
20 1.1 eeh * must display the following acknowledgement:
21 1.1 eeh * This product includes software developed by the University of
22 1.1 eeh * California, Berkeley and its contributors.
23 1.1 eeh * 4. Neither the name of the University nor the names of its contributors
24 1.1 eeh * may be used to endorse or promote products derived from this software
25 1.1 eeh * without specific prior written permission.
26 1.1 eeh *
27 1.1 eeh * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 1.1 eeh * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 1.1 eeh * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 1.1 eeh * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 1.1 eeh * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 1.1 eeh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 1.1 eeh * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 1.1 eeh * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 1.1 eeh * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 1.1 eeh * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 1.1 eeh * SUCH DAMAGE.
38 1.1 eeh *
39 1.2 chs * from: Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp
40 1.1 eeh */
41 1.1 eeh
42 1.1 eeh /*
43 1.1 eeh * Division and remainder, from Appendix E of the Sparc Version 8
44 1.1 eeh * Architecture Manual, with fixes from Gordon Irlam.
45 1.1 eeh */
46 1.1 eeh
47 1.2 chs #if defined(LIBC_SCCS)
48 1.2 chs RCSID("$NetBSD: divrem.m4,v 1.2 2002/10/27 18:41:27 chs Exp $")
49 1.1 eeh #endif
50 1.1 eeh
51 1.1 eeh /*
52 1.1 eeh * Input: dividend and divisor in %o0 and %o1 respectively.
53 1.1 eeh *
54 1.1 eeh * m4 parameters:
55 1.1 eeh * NAME name of function to generate
56 1.1 eeh * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1
57 1.1 eeh * S S=true => signed; S=false => unsigned
58 1.1 eeh *
59 1.1 eeh * Algorithm parameters:
60 1.1 eeh * N how many bits per iteration we try to get (4)
61 1.1 eeh * WORDSIZE total number of bits (32)
62 1.1 eeh *
63 1.1 eeh * Derived constants:
64 1.1 eeh * TWOSUPN 2^N, for label generation (m4 exponentiation currently broken)
65 1.1 eeh * TOPBITS number of bits in the top `decade' of a number
66 1.1 eeh *
67 1.1 eeh * Important variables:
68 1.1 eeh * Q the partial quotient under development (initially 0)
69 1.1 eeh * R the remainder so far, initially the dividend
70 1.1 eeh * ITER number of main division loop iterations required;
71 1.1 eeh * equal to ceil(log2(quotient) / N). Note that this
72 1.1 eeh * is the log base (2^N) of the quotient.
73 1.1 eeh * V the current comparand, initially divisor*2^(ITER*N-1)
74 1.1 eeh *
75 1.1 eeh * Cost:
76 1.1 eeh * Current estimate for non-large dividend is
77 1.1 eeh * ceil(log2(quotient) / N) * (10 + 7N/2) + C
78 1.1 eeh * A large dividend is one greater than 2^(31-TOPBITS) and takes a
79 1.1 eeh * different path, as the upper bits of the quotient must be developed
80 1.1 eeh * one bit at a time.
81 1.1 eeh */
82 1.1 eeh
83 1.1 eeh define(N, `4')
84 1.1 eeh define(TWOSUPN, `16')
85 1.1 eeh define(WORDSIZE, `32')
86 1.1 eeh define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))
87 1.1 eeh
88 1.1 eeh define(dividend, `%o0')
89 1.1 eeh define(divisor, `%o1')
90 1.1 eeh define(Q, `%o2')
91 1.1 eeh define(R, `%o3')
92 1.1 eeh define(ITER, `%o4')
93 1.1 eeh define(V, `%o5')
94 1.1 eeh
95 1.1 eeh /* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */
96 1.1 eeh define(T, `%g1')
97 1.2 chs define(SC, `%g5')
98 1.1 eeh ifelse(S, `true', `define(SIGN, `%g6')')
99 1.1 eeh
100 1.1 eeh /*
101 1.1 eeh * This is the recursive definition for developing quotient digits.
102 1.1 eeh *
103 1.1 eeh * Parameters:
104 1.1 eeh * $1 the current depth, 1 <= $1 <= N
105 1.1 eeh * $2 the current accumulation of quotient bits
106 1.1 eeh * N max depth
107 1.1 eeh *
108 1.1 eeh * We add a new bit to $2 and either recurse or insert the bits in
109 1.1 eeh * the quotient. R, Q, and V are inputs and outputs as defined above;
110 1.1 eeh * the condition codes are expected to reflect the input R, and are
111 1.1 eeh * modified to reflect the output R.
112 1.1 eeh */
113 1.1 eeh define(DEVELOP_QUOTIENT_BITS,
114 1.1 eeh ` ! depth $1, accumulated bits $2
115 1.1 eeh bl L.$1.eval(TWOSUPN+$2)
116 1.1 eeh srl V,1,V
117 1.1 eeh ! remainder is positive
118 1.1 eeh subcc R,V,R
119 1.1 eeh ifelse($1, N,
120 1.1 eeh ` b 9f
121 1.1 eeh add Q, ($2*2+1), Q
122 1.1 eeh ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
123 1.1 eeh L.$1.eval(TWOSUPN+$2):
124 1.1 eeh ! remainder is negative
125 1.1 eeh addcc R,V,R
126 1.1 eeh ifelse($1, N,
127 1.1 eeh ` b 9f
128 1.1 eeh add Q, ($2*2-1), Q
129 1.1 eeh ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
130 1.1 eeh ifelse($1, 1, `9:')')
131 1.1 eeh
132 1.2 chs #include <machine/asm.h>
133 1.1 eeh #include <machine/trap.h>
134 1.1 eeh
135 1.1 eeh FUNC(NAME)
136 1.1 eeh ifelse(S, `true',
137 1.1 eeh ` ! compute sign of result; if neither is negative, no problem
138 1.1 eeh orcc divisor, dividend, %g0 ! either negative?
139 1.1 eeh bge 2f ! no, go do the divide
140 1.1 eeh ifelse(OP, `div',
141 1.1 eeh `xor divisor, dividend, SIGN',
142 1.1 eeh `mov dividend, SIGN') ! compute sign in any case
143 1.1 eeh tst divisor
144 1.1 eeh bge 1f
145 1.1 eeh tst dividend
146 1.1 eeh ! divisor is definitely negative; dividend might also be negative
147 1.1 eeh bge 2f ! if dividend not negative...
148 1.1 eeh neg divisor ! in any case, make divisor nonneg
149 1.1 eeh 1: ! dividend is negative, divisor is nonnegative
150 1.1 eeh neg dividend ! make dividend nonnegative
151 1.1 eeh 2:
152 1.1 eeh ')
153 1.1 eeh ! Ready to divide. Compute size of quotient; scale comparand.
154 1.1 eeh orcc divisor, %g0, V
155 1.1 eeh bnz 1f
156 1.1 eeh mov dividend, R
157 1.1 eeh
158 1.1 eeh ! Divide by zero trap. If it returns, return 0 (about as
159 1.1 eeh ! wrong as possible, but that is what SunOS does...).
160 1.1 eeh t ST_DIV0
161 1.1 eeh retl
162 1.1 eeh clr %o0
163 1.1 eeh
164 1.1 eeh 1:
165 1.1 eeh cmp R, V ! if divisor exceeds dividend, done
166 1.1 eeh blu Lgot_result ! (and algorithm fails otherwise)
167 1.1 eeh clr Q
168 1.1 eeh sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
169 1.1 eeh cmp R, T
170 1.1 eeh blu Lnot_really_big
171 1.1 eeh clr ITER
172 1.1 eeh
173 1.1 eeh ! `Here the dividend is >= 2^(31-N) or so. We must be careful here,
174 1.1 eeh ! as our usual N-at-a-shot divide step will cause overflow and havoc.
175 1.1 eeh ! The number of bits in the result here is N*ITER+SC, where SC <= N.
176 1.1 eeh ! Compute ITER in an unorthodox manner: know we need to shift V into
177 1.1 eeh ! the top decade: so do not even bother to compare to R.'
178 1.1 eeh 1:
179 1.1 eeh cmp V, T
180 1.1 eeh bgeu 3f
181 1.1 eeh mov 1, SC
182 1.1 eeh sll V, N, V
183 1.1 eeh b 1b
184 1.1 eeh inc ITER
185 1.1 eeh
186 1.1 eeh ! Now compute SC.
187 1.1 eeh 2: addcc V, V, V
188 1.1 eeh bcc Lnot_too_big
189 1.1 eeh inc SC
190 1.1 eeh
191 1.1 eeh ! We get here if the divisor overflowed while shifting.
192 1.1 eeh ! This means that R has the high-order bit set.
193 1.1 eeh ! Restore V and subtract from R.
194 1.1 eeh sll T, TOPBITS, T ! high order bit
195 1.1 eeh srl V, 1, V ! rest of V
196 1.1 eeh add V, T, V
197 1.1 eeh b Ldo_single_div
198 1.1 eeh dec SC
199 1.1 eeh
200 1.1 eeh Lnot_too_big:
201 1.1 eeh 3: cmp V, R
202 1.1 eeh blu 2b
203 1.1 eeh nop
204 1.1 eeh be Ldo_single_div
205 1.1 eeh nop
206 1.1 eeh /* NB: these are commented out in the V8-Sparc manual as well */
207 1.1 eeh /* (I do not understand this) */
208 1.1 eeh ! V > R: went too far: back up 1 step
209 1.1 eeh ! srl V, 1, V
210 1.1 eeh ! dec SC
211 1.1 eeh ! do single-bit divide steps
212 1.1 eeh !
213 1.1 eeh ! We have to be careful here. We know that R >= V, so we can do the
214 1.1 eeh ! first divide step without thinking. BUT, the others are conditional,
215 1.1 eeh ! and are only done if R >= 0. Because both R and V may have the high-
216 1.1 eeh ! order bit set in the first step, just falling into the regular
217 1.1 eeh ! division loop will mess up the first time around.
218 1.1 eeh ! So we unroll slightly...
219 1.1 eeh Ldo_single_div:
220 1.1 eeh deccc SC
221 1.1 eeh bl Lend_regular_divide
222 1.1 eeh nop
223 1.1 eeh sub R, V, R
224 1.1 eeh mov 1, Q
225 1.1 eeh b Lend_single_divloop
226 1.1 eeh nop
227 1.1 eeh Lsingle_divloop:
228 1.1 eeh sll Q, 1, Q
229 1.1 eeh bl 1f
230 1.1 eeh srl V, 1, V
231 1.1 eeh ! R >= 0
232 1.1 eeh sub R, V, R
233 1.1 eeh b 2f
234 1.1 eeh inc Q
235 1.1 eeh 1: ! R < 0
236 1.1 eeh add R, V, R
237 1.1 eeh dec Q
238 1.1 eeh 2:
239 1.1 eeh Lend_single_divloop:
240 1.1 eeh deccc SC
241 1.1 eeh bge Lsingle_divloop
242 1.1 eeh tst R
243 1.1 eeh b,a Lend_regular_divide
244 1.1 eeh
245 1.1 eeh Lnot_really_big:
246 1.1 eeh 1:
247 1.1 eeh sll V, N, V
248 1.1 eeh cmp V, R
249 1.1 eeh bleu 1b
250 1.1 eeh inccc ITER
251 1.1 eeh be Lgot_result
252 1.1 eeh dec ITER
253 1.1 eeh
254 1.1 eeh tst R ! set up for initial iteration
255 1.1 eeh Ldivloop:
256 1.1 eeh sll Q, N, Q
257 1.1 eeh DEVELOP_QUOTIENT_BITS(1, 0)
258 1.1 eeh Lend_regular_divide:
259 1.1 eeh deccc ITER
260 1.1 eeh bge Ldivloop
261 1.1 eeh tst R
262 1.1 eeh bl,a Lgot_result
263 1.1 eeh ! non-restoring fixup here (one instruction only!)
264 1.1 eeh ifelse(OP, `div',
265 1.1 eeh ` dec Q
266 1.1 eeh ', ` add R, divisor, R
267 1.1 eeh ')
268 1.1 eeh
269 1.1 eeh Lgot_result:
270 1.1 eeh ifelse(S, `true',
271 1.1 eeh ` ! check to see if answer should be < 0
272 1.1 eeh tst SIGN
273 1.1 eeh bl,a 1f
274 1.1 eeh ifelse(OP, `div', `neg Q', `neg R')
275 1.1 eeh 1:')
276 1.1 eeh retl
277 1.1 eeh ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
278