n_argred.S revision 1.4 1 1.4 simonb /* $NetBSD: n_argred.S,v 1.4 1999/07/02 15:37:35 simonb Exp $ */
2 1.1 ragge /*
3 1.1 ragge * Copyright (c) 1985, 1993
4 1.1 ragge * The Regents of the University of California. All rights reserved.
5 1.1 ragge *
6 1.1 ragge * Redistribution and use in source and binary forms, with or without
7 1.1 ragge * modification, are permitted provided that the following conditions
8 1.1 ragge * are met:
9 1.1 ragge * 1. Redistributions of source code must retain the above copyright
10 1.1 ragge * notice, this list of conditions and the following disclaimer.
11 1.1 ragge * 2. Redistributions in binary form must reproduce the above copyright
12 1.1 ragge * notice, this list of conditions and the following disclaimer in the
13 1.1 ragge * documentation and/or other materials provided with the distribution.
14 1.1 ragge * 3. All advertising materials mentioning features or use of this software
15 1.1 ragge * must display the following acknowledgement:
16 1.1 ragge * This product includes software developed by the University of
17 1.1 ragge * California, Berkeley and its contributors.
18 1.1 ragge * 4. Neither the name of the University nor the names of its contributors
19 1.1 ragge * may be used to endorse or promote products derived from this software
20 1.1 ragge * without specific prior written permission.
21 1.1 ragge *
22 1.1 ragge * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 1.1 ragge * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 1.1 ragge * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 1.1 ragge * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 1.1 ragge * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 1.1 ragge * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 1.1 ragge * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 1.1 ragge * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 1.1 ragge * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 1.1 ragge * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 1.1 ragge * SUCH DAMAGE.
33 1.1 ragge *
34 1.1 ragge * @(#)argred.s 8.1 (Berkeley) 6/4/93
35 1.1 ragge */
36 1.1 ragge
37 1.1 ragge /*
38 1.1 ragge * libm$argred implements Bob Corbett's argument reduction and
39 1.1 ragge * libm$sincos implements Peter Tang's double precision sin/cos.
40 1.4 simonb *
41 1.1 ragge * Note: The two entry points libm$argred and libm$sincos are meant
42 1.1 ragge * to be used only by _sin, _cos and _tan.
43 1.1 ragge *
44 1.1 ragge * method: true range reduction to [-pi/4,pi/4], P. Tang & B. Corbett
45 1.1 ragge * S. McDonald, April 4, 1985
46 1.1 ragge */
47 1.2 matt .text
48 1.1 ragge .globl libm$argred
49 1.2 matt .type libm$argred,@label
50 1.1 ragge .globl libm$sincos
51 1.2 matt .type libm$sincos,@label
52 1.1 ragge .align 1
53 1.1 ragge
54 1.1 ragge libm$argred:
55 1.1 ragge /*
56 1.1 ragge * Compare the argument with the largest possible that can
57 1.1 ragge * be reduced by table lookup. r3 := |x| will be used in table_lookup .
58 1.1 ragge */
59 1.1 ragge movd r0,r3
60 1.1 ragge bgeq abs1
61 1.1 ragge mnegd r3,r3
62 1.1 ragge abs1:
63 1.1 ragge cmpd r3,$0d+4.55530934770520019583e+01
64 1.1 ragge blss small_arg
65 1.1 ragge jsb trigred
66 1.1 ragge rsb
67 1.1 ragge small_arg:
68 1.1 ragge jsb table_lookup
69 1.1 ragge rsb
70 1.1 ragge /*
71 1.1 ragge * At this point,
72 1.1 ragge * r0 contains the quadrant number, 0, 1, 2, or 3;
73 1.1 ragge * r2/r1 contains the reduced argument as a D-format number;
74 1.1 ragge * r3 contains a F-format extension to the reduced argument;
75 1.1 ragge * r4 contains a 0 or 1 corresponding to a sin or cos entry.
76 1.1 ragge */
77 1.1 ragge libm$sincos:
78 1.1 ragge /*
79 1.1 ragge * Compensate for a cosine entry by adding one to the quadrant number.
80 1.1 ragge */
81 1.1 ragge addl2 r4,r0
82 1.1 ragge /*
83 1.1 ragge * Polyd clobbers r5-r0 ; save X in r7/r6 .
84 1.1 ragge * This can be avoided by rewriting trigred .
85 1.1 ragge */
86 1.1 ragge movd r1,r6
87 1.1 ragge /*
88 1.1 ragge * Likewise, save alpha in r8 .
89 1.1 ragge * This can be avoided by rewriting trigred .
90 1.1 ragge */
91 1.1 ragge movf r3,r8
92 1.1 ragge /*
93 1.1 ragge * Odd or even quadrant? cosine if odd, sine otherwise.
94 1.1 ragge * Save floor(quadrant/2) in r9 ; it determines the final sign.
95 1.1 ragge */
96 1.1 ragge rotl $-1,r0,r9
97 1.1 ragge blss cosine
98 1.1 ragge sine:
99 1.1 ragge muld2 r1,r1 # Xsq = X * X
100 1.1 ragge cmpw $0x2480,r1 # [zl] Xsq > 2^-56?
101 1.1 ragge blss 1f # [zl] yes, go ahead and do polyd
102 1.1 ragge clrq r1 # [zl] work around 11/780 FPA polyd bug
103 1.1 ragge 1:
104 1.1 ragge polyd r1,$7,sin_coef # Q = P(Xsq) , of deg 7
105 1.1 ragge mulf3 $0f3.0,r8,r4 # beta = 3 * alpha
106 1.1 ragge mulf2 r0,r4 # beta = Q * beta
107 1.1 ragge addf2 r8,r4 # beta = alpha + beta
108 1.1 ragge muld2 r6,r0 # S(X) = X * Q
109 1.1 ragge /* cvtfd r4,r4 ... r5 = 0 after a polyd. */
110 1.1 ragge addd2 r4,r0 # S(X) = beta + S(X)
111 1.1 ragge addd2 r6,r0 # S(X) = X + S(X)
112 1.1 ragge brb done
113 1.1 ragge cosine:
114 1.1 ragge muld2 r6,r6 # Xsq = X * X
115 1.1 ragge beql zero_arg
116 1.1 ragge mulf2 r1,r8 # beta = X * alpha
117 1.1 ragge polyd r6,$7,cos_coef /* Q = P'(Xsq) , of deg 7 */
118 1.1 ragge subd3 r0,r8,r0 # beta = beta - Q
119 1.1 ragge subw2 $0x80,r6 # Xsq = Xsq / 2
120 1.1 ragge addd2 r0,r6 # Xsq = Xsq + beta
121 1.1 ragge zero_arg:
122 1.1 ragge subd3 r6,$0d1.0,r0 # C(X) = 1 - Xsq
123 1.1 ragge done:
124 1.1 ragge blbc r9,even
125 1.1 ragge mnegd r0,r0
126 1.1 ragge even:
127 1.1 ragge rsb
128 1.1 ragge
129 1.1 ragge .data
130 1.1 ragge .align 2
131 1.1 ragge
132 1.1 ragge sin_coef:
133 1.1 ragge .double 0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8..
134 1.1 ragge .double 0d+1.60573519267703489121e-10 # s6 = 2^-21 1.611adaede473c8..
135 1.1 ragge .double 0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382..
136 1.1 ragge .double 0d+2.75573191800593885716e-06 # s4 = 2^-13 1.71de3a4b884278..
137 1.1 ragge .double 0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d..
138 1.1 ragge .double 0d+8.33333333333325688985e-03 # s2 = 2^-07 1.11111111110e50
139 1.1 ragge .double 0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554
140 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0
141 1.1 ragge
142 1.1 ragge cos_coef:
143 1.1 ragge .double 0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE..
144 1.1 ragge .double 0d+2.08746646574796004700e-09 # s6 = 2^-1D 1.1EE632650350BA..
145 1.1 ragge .double 0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E..
146 1.1 ragge .double 0d+2.48015872682668025200e-05 # s4 = 2^-10 1.A01A0196B902E8..
147 1.1 ragge .double 0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE..
148 1.1 ragge .double 0d+4.16666666666664761400e-02 # s2 = 2^-05 1.5555555555539E
149 1.1 ragge .double 0d+0.00000000000000000000e+00 # s1 = 0
150 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0
151 1.1 ragge
152 1.1 ragge /*
153 1.1 ragge * Multiples of pi/2 expressed as the sum of three doubles,
154 1.1 ragge *
155 1.1 ragge * trailing: n * pi/2 , n = 0, 1, 2, ..., 29
156 1.1 ragge * trailing[n] ,
157 1.1 ragge *
158 1.1 ragge * middle: n * pi/2 , n = 0, 1, 2, ..., 29
159 1.1 ragge * middle[n] ,
160 1.1 ragge *
161 1.1 ragge * leading: n * pi/2 , n = 0, 1, 2, ..., 29
162 1.1 ragge * leading[n] ,
163 1.1 ragge *
164 1.1 ragge * where
165 1.1 ragge * leading[n] := (n * pi/2) rounded,
166 1.1 ragge * middle[n] := (n * pi/2 - leading[n]) rounded,
167 1.1 ragge * trailing[n] := (( n * pi/2 - leading[n]) - middle[n]) rounded .
168 1.1 ragge */
169 1.1 ragge trailing:
170 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 trailing
171 1.1 ragge .double 0d+4.33590506506189049611e-35 # 1 * pi/2 trailing
172 1.1 ragge .double 0d+8.67181013012378099223e-35 # 2 * pi/2 trailing
173 1.1 ragge .double 0d+1.30077151951856714215e-34 # 3 * pi/2 trailing
174 1.1 ragge .double 0d+1.73436202602475619845e-34 # 4 * pi/2 trailing
175 1.1 ragge .double 0d-1.68390735624352669192e-34 # 5 * pi/2 trailing
176 1.1 ragge .double 0d+2.60154303903713428430e-34 # 6 * pi/2 trailing
177 1.1 ragge .double 0d-8.16726343231148352150e-35 # 7 * pi/2 trailing
178 1.1 ragge .double 0d+3.46872405204951239689e-34 # 8 * pi/2 trailing
179 1.1 ragge .double 0d+3.90231455855570147991e-34 # 9 * pi/2 trailing
180 1.1 ragge .double 0d-3.36781471248705338384e-34 # 10 * pi/2 trailing
181 1.1 ragge .double 0d-1.06379439835298071785e-33 # 11 * pi/2 trailing
182 1.1 ragge .double 0d+5.20308607807426856861e-34 # 12 * pi/2 trailing
183 1.1 ragge .double 0d+5.63667658458045770509e-34 # 13 * pi/2 trailing
184 1.1 ragge .double 0d-1.63345268646229670430e-34 # 14 * pi/2 trailing
185 1.1 ragge .double 0d-1.19986217995610764801e-34 # 15 * pi/2 trailing
186 1.1 ragge .double 0d+6.93744810409902479378e-34 # 16 * pi/2 trailing
187 1.1 ragge .double 0d-8.03640094449267300110e-34 # 17 * pi/2 trailing
188 1.1 ragge .double 0d+7.80462911711140295982e-34 # 18 * pi/2 trailing
189 1.1 ragge .double 0d-7.16921993148029483506e-34 # 19 * pi/2 trailing
190 1.1 ragge .double 0d-6.73562942497410676769e-34 # 20 * pi/2 trailing
191 1.1 ragge .double 0d-6.30203891846791677593e-34 # 21 * pi/2 trailing
192 1.1 ragge .double 0d-2.12758879670596143570e-33 # 22 * pi/2 trailing
193 1.1 ragge .double 0d+2.53800212047402350390e-33 # 23 * pi/2 trailing
194 1.1 ragge .double 0d+1.04061721561485371372e-33 # 24 * pi/2 trailing
195 1.1 ragge .double 0d+6.11729905311472319056e-32 # 25 * pi/2 trailing
196 1.1 ragge .double 0d+1.12733531691609154102e-33 # 26 * pi/2 trailing
197 1.1 ragge .double 0d-3.70049587943078297272e-34 # 27 * pi/2 trailing
198 1.1 ragge .double 0d-3.26690537292459340860e-34 # 28 * pi/2 trailing
199 1.1 ragge .double 0d-1.14812616507957271361e-34 # 29 * pi/2 trailing
200 1.1 ragge
201 1.1 ragge middle:
202 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 middle
203 1.1 ragge .double 0d+5.72118872610983179676e-18 # 1 * pi/2 middle
204 1.1 ragge .double 0d+1.14423774522196635935e-17 # 2 * pi/2 middle
205 1.1 ragge .double 0d-3.83475850529283316309e-17 # 3 * pi/2 middle
206 1.1 ragge .double 0d+2.28847549044393271871e-17 # 4 * pi/2 middle
207 1.1 ragge .double 0d-2.69052076007086676522e-17 # 5 * pi/2 middle
208 1.1 ragge .double 0d-7.66951701058566632618e-17 # 6 * pi/2 middle
209 1.1 ragge .double 0d-1.54628301484890040587e-17 # 7 * pi/2 middle
210 1.1 ragge .double 0d+4.57695098088786543741e-17 # 8 * pi/2 middle
211 1.1 ragge .double 0d+1.07001849766246313192e-16 # 9 * pi/2 middle
212 1.1 ragge .double 0d-5.38104152014173353044e-17 # 10 * pi/2 middle
213 1.1 ragge .double 0d-2.14622680169080983801e-16 # 11 * pi/2 middle
214 1.1 ragge .double 0d-1.53390340211713326524e-16 # 12 * pi/2 middle
215 1.1 ragge .double 0d-9.21580002543456677056e-17 # 13 * pi/2 middle
216 1.1 ragge .double 0d-3.09256602969780081173e-17 # 14 * pi/2 middle
217 1.1 ragge .double 0d+3.03066796603896507006e-17 # 15 * pi/2 middle
218 1.1 ragge .double 0d+9.15390196177573087482e-17 # 16 * pi/2 middle
219 1.1 ragge .double 0d+1.52771359575124969107e-16 # 17 * pi/2 middle
220 1.1 ragge .double 0d+2.14003699532492626384e-16 # 18 * pi/2 middle
221 1.1 ragge .double 0d-1.68853170360202329427e-16 # 19 * pi/2 middle
222 1.1 ragge .double 0d-1.07620830402834670609e-16 # 20 * pi/2 middle
223 1.1 ragge .double 0d+3.97700719404595604379e-16 # 21 * pi/2 middle
224 1.1 ragge .double 0d-4.29245360338161967602e-16 # 22 * pi/2 middle
225 1.1 ragge .double 0d-3.68013020380794313406e-16 # 23 * pi/2 middle
226 1.1 ragge .double 0d-3.06780680423426653047e-16 # 24 * pi/2 middle
227 1.1 ragge .double 0d-2.45548340466059054318e-16 # 25 * pi/2 middle
228 1.1 ragge .double 0d-1.84316000508691335411e-16 # 26 * pi/2 middle
229 1.1 ragge .double 0d-1.23083660551323675053e-16 # 27 * pi/2 middle
230 1.1 ragge .double 0d-6.18513205939560162346e-17 # 28 * pi/2 middle
231 1.1 ragge .double 0d-6.18980636588357585202e-19 # 29 * pi/2 middle
232 1.1 ragge
233 1.1 ragge leading:
234 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 leading
235 1.1 ragge .double 0d+1.57079632679489661351e+00 # 1 * pi/2 leading
236 1.1 ragge .double 0d+3.14159265358979322702e+00 # 2 * pi/2 leading
237 1.1 ragge .double 0d+4.71238898038468989604e+00 # 3 * pi/2 leading
238 1.1 ragge .double 0d+6.28318530717958645404e+00 # 4 * pi/2 leading
239 1.1 ragge .double 0d+7.85398163397448312306e+00 # 5 * pi/2 leading
240 1.1 ragge .double 0d+9.42477796076937979208e+00 # 6 * pi/2 leading
241 1.1 ragge .double 0d+1.09955742875642763501e+01 # 7 * pi/2 leading
242 1.1 ragge .double 0d+1.25663706143591729081e+01 # 8 * pi/2 leading
243 1.1 ragge .double 0d+1.41371669411540694661e+01 # 9 * pi/2 leading
244 1.1 ragge .double 0d+1.57079632679489662461e+01 # 10 * pi/2 leading
245 1.1 ragge .double 0d+1.72787595947438630262e+01 # 11 * pi/2 leading
246 1.1 ragge .double 0d+1.88495559215387595842e+01 # 12 * pi/2 leading
247 1.1 ragge .double 0d+2.04203522483336561422e+01 # 13 * pi/2 leading
248 1.1 ragge .double 0d+2.19911485751285527002e+01 # 14 * pi/2 leading
249 1.1 ragge .double 0d+2.35619449019234492582e+01 # 15 * pi/2 leading
250 1.1 ragge .double 0d+2.51327412287183458162e+01 # 16 * pi/2 leading
251 1.1 ragge .double 0d+2.67035375555132423742e+01 # 17 * pi/2 leading
252 1.1 ragge .double 0d+2.82743338823081389322e+01 # 18 * pi/2 leading
253 1.1 ragge .double 0d+2.98451302091030359342e+01 # 19 * pi/2 leading
254 1.1 ragge .double 0d+3.14159265358979324922e+01 # 20 * pi/2 leading
255 1.1 ragge .double 0d+3.29867228626928286062e+01 # 21 * pi/2 leading
256 1.1 ragge .double 0d+3.45575191894877260523e+01 # 22 * pi/2 leading
257 1.1 ragge .double 0d+3.61283155162826226103e+01 # 23 * pi/2 leading
258 1.1 ragge .double 0d+3.76991118430775191683e+01 # 24 * pi/2 leading
259 1.1 ragge .double 0d+3.92699081698724157263e+01 # 25 * pi/2 leading
260 1.1 ragge .double 0d+4.08407044966673122843e+01 # 26 * pi/2 leading
261 1.1 ragge .double 0d+4.24115008234622088423e+01 # 27 * pi/2 leading
262 1.1 ragge .double 0d+4.39822971502571054003e+01 # 28 * pi/2 leading
263 1.1 ragge .double 0d+4.55530934770520019583e+01 # 29 * pi/2 leading
264 1.1 ragge
265 1.1 ragge twoOverPi:
266 1.1 ragge .double 0d+6.36619772367581343076e-01
267 1.1 ragge .text
268 1.1 ragge .align 1
269 1.1 ragge
270 1.1 ragge table_lookup:
271 1.1 ragge muld3 r3,twoOverPi,r0
272 1.1 ragge cvtrdl r0,r0 # n = nearest int to ((2/pi)*|x|) rnded
273 1.1 ragge mull3 $8,r0,r5
274 1.1 ragge subd2 leading(r5),r3 # p = (|x| - leading n*pi/2) exactly
275 1.1 ragge subd3 middle(r5),r3,r1 # q = (p - middle n*pi/2) rounded
276 1.1 ragge subd2 r1,r3 # r = (p - q)
277 1.1 ragge subd2 middle(r5),r3 # r = r - middle n*pi/2
278 1.1 ragge subd2 trailing(r5),r3 # r = r - trailing n*pi/2 rounded
279 1.1 ragge /*
280 1.1 ragge * If the original argument was negative,
281 1.1 ragge * negate the reduce argument and
282 1.1 ragge * adjust the octant/quadrant number.
283 1.1 ragge */
284 1.1 ragge tstw 4(ap)
285 1.1 ragge bgeq abs2
286 1.1 ragge mnegf r1,r1
287 1.1 ragge mnegf r3,r3
288 1.1 ragge /* subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD */
289 1.1 ragge subb3 r0,$4,r0
290 1.1 ragge abs2:
291 1.1 ragge /*
292 1.1 ragge * Clear all unneeded octant/quadrant bits.
293 1.1 ragge */
294 1.1 ragge /* bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD */
295 1.1 ragge bicb2 $0xfc,r0
296 1.1 ragge rsb
297 1.1 ragge /*
298 1.1 ragge * p.0
299 1.1 ragge */
300 1.1 ragge .text
301 1.1 ragge .align 2
302 1.1 ragge /*
303 1.1 ragge * Only 256 (actually 225) bits of 2/pi are needed for VAX double
304 1.1 ragge * precision; this was determined by enumerating all the nearest
305 1.1 ragge * machine integer multiples of pi/2 using continued fractions.
306 1.1 ragge * (8a8d3673775b7ff7 required the most bits.) -S.McD
307 1.1 ragge */
308 1.1 ragge .long 0
309 1.1 ragge .long 0
310 1.1 ragge .long 0xaef1586d
311 1.1 ragge .long 0x9458eaf7
312 1.1 ragge .long 0x10e4107f
313 1.1 ragge .long 0xd8a5664f
314 1.1 ragge .long 0x4d377036
315 1.1 ragge .long 0x09d5f47d
316 1.1 ragge .long 0x91054a7f
317 1.1 ragge .long 0xbe60db93
318 1.1 ragge bits2opi:
319 1.1 ragge .long 0x00000028
320 1.1 ragge .long 0
321 1.1 ragge /*
322 1.1 ragge * Note: wherever you see the word `octant', read `quadrant'.
323 1.1 ragge * Currently this code is set up for pi/2 argument reduction.
324 1.1 ragge * By uncommenting/commenting the appropriate lines, it will
325 1.1 ragge * also serve as a pi/4 argument reduction code.
326 1.1 ragge */
327 1.1 ragge
328 1.1 ragge /* p.1
329 1.1 ragge * Trigred preforms argument reduction
330 1.1 ragge * for the trigonometric functions. It
331 1.1 ragge * takes one input argument, a D-format
332 1.1 ragge * number in r1/r0 . The magnitude of
333 1.1 ragge * the input argument must be greater
334 1.1 ragge * than or equal to 1/2 . Trigred produces
335 1.1 ragge * three results: the number of the octant
336 1.4 simonb * occupied by the argument, the reduced
337 1.1 ragge * argument, and an extension of the
338 1.4 simonb * reduced argument. The octant number is
339 1.4 simonb * returned in r0 . The reduced argument
340 1.4 simonb * is returned as a D-format number in
341 1.4 simonb * r2/r1 . An 8 bit extension of the
342 1.4 simonb * reduced argument is returned as an
343 1.1 ragge * F-format number in r3.
344 1.1 ragge * p.2
345 1.1 ragge */
346 1.1 ragge trigred:
347 1.1 ragge /*
348 1.1 ragge * Save the sign of the input argument.
349 1.1 ragge */
350 1.1 ragge movw r0,-(sp)
351 1.1 ragge /*
352 1.1 ragge * Extract the exponent field.
353 1.1 ragge */
354 1.1 ragge extzv $7,$7,r0,r2
355 1.1 ragge /*
356 1.1 ragge * Convert the fraction part of the input
357 1.1 ragge * argument into a quadword integer.
358 1.1 ragge */
359 1.1 ragge bicw2 $0xff80,r0
360 1.1 ragge bisb2 $0x80,r0 # -S.McD
361 1.1 ragge rotl $16,r0,r0
362 1.1 ragge rotl $16,r1,r1
363 1.1 ragge /*
364 1.1 ragge * If r1 is negative, add 1 to r0 . This
365 1.1 ragge * adjustment is made so that the two's
366 1.1 ragge * complement multiplications done later
367 1.1 ragge * will produce unsigned results.
368 1.1 ragge */
369 1.1 ragge bgeq posmid
370 1.1 ragge incl r0
371 1.1 ragge posmid:
372 1.1 ragge /* p.3
373 1.1 ragge *
374 1.1 ragge * Set r3 to the address of the first quadword
375 1.1 ragge * used to obtain the needed portion of 2/pi .
376 1.1 ragge * The address is longword aligned to ensure
377 1.1 ragge * efficient access.
378 1.1 ragge */
379 1.1 ragge ashl $-3,r2,r3
380 1.1 ragge bicb2 $3,r3
381 1.3 matt mnegl r3,r3
382 1.3 matt movab bits2opi[r3],r3
383 1.1 ragge /*
384 1.4 simonb * Set r2 to the size of the shift needed to
385 1.1 ragge * obtain the correct portion of 2/pi .
386 1.1 ragge */
387 1.1 ragge bicb2 $0xe0,r2
388 1.1 ragge /* p.4
389 1.1 ragge *
390 1.1 ragge * Move the needed 128 bits of 2/pi into
391 1.1 ragge * r11 - r8 . Adjust the numbers to allow
392 1.1 ragge * for unsigned multiplication.
393 1.1 ragge */
394 1.1 ragge ashq r2,(r3),r10
395 1.1 ragge
396 1.1 ragge subl2 $4,r3
397 1.1 ragge ashq r2,(r3),r9
398 1.1 ragge bgeq signoff1
399 1.1 ragge incl r11
400 1.1 ragge signoff1:
401 1.1 ragge subl2 $4,r3
402 1.1 ragge ashq r2,(r3),r8
403 1.1 ragge bgeq signoff2
404 1.1 ragge incl r10
405 1.1 ragge signoff2:
406 1.1 ragge subl2 $4,r3
407 1.1 ragge ashq r2,(r3),r7
408 1.1 ragge bgeq signoff3
409 1.1 ragge incl r9
410 1.1 ragge signoff3:
411 1.1 ragge /* p.5
412 1.1 ragge *
413 1.4 simonb * Multiply the contents of r0/r1 by the
414 1.1 ragge * slice of 2/pi in r11 - r8 .
415 1.1 ragge */
416 1.1 ragge emul r0,r8,$0,r4
417 1.1 ragge emul r0,r9,r5,r5
418 1.1 ragge emul r0,r10,r6,r6
419 1.1 ragge
420 1.1 ragge emul r1,r8,$0,r7
421 1.1 ragge emul r1,r9,r8,r8
422 1.1 ragge emul r1,r10,r9,r9
423 1.1 ragge emul r1,r11,r10,r10
424 1.1 ragge
425 1.1 ragge addl2 r4,r8
426 1.1 ragge adwc r5,r9
427 1.1 ragge adwc r6,r10
428 1.1 ragge /* p.6
429 1.1 ragge *
430 1.1 ragge * If there are more than five leading zeros
431 1.1 ragge * after the first two quotient bits or if there
432 1.1 ragge * are more than five leading ones after the first
433 1.1 ragge * two quotient bits, generate more fraction bits.
434 1.1 ragge * Otherwise, branch to code to produce the result.
435 1.1 ragge */
436 1.1 ragge bicl3 $0xc1ffffff,r10,r4
437 1.1 ragge beql more1
438 1.1 ragge cmpl $0x3e000000,r4
439 1.1 ragge bneq result
440 1.1 ragge more1:
441 1.1 ragge /* p.7
442 1.1 ragge *
443 1.1 ragge * generate another 32 result bits.
444 1.1 ragge */
445 1.1 ragge subl2 $4,r3
446 1.1 ragge ashq r2,(r3),r5
447 1.1 ragge bgeq signoff4
448 1.1 ragge
449 1.1 ragge emul r1,r6,$0,r4
450 1.1 ragge addl2 r1,r5
451 1.1 ragge emul r0,r6,r5,r5
452 1.1 ragge addl2 r0,r6
453 1.1 ragge brb addbits1
454 1.1 ragge
455 1.1 ragge signoff4:
456 1.1 ragge emul r1,r6,$0,r4
457 1.1 ragge emul r0,r6,r5,r5
458 1.1 ragge
459 1.1 ragge addbits1:
460 1.1 ragge addl2 r5,r7
461 1.1 ragge adwc r6,r8
462 1.1 ragge adwc $0,r9
463 1.1 ragge adwc $0,r10
464 1.1 ragge /* p.8
465 1.1 ragge *
466 1.1 ragge * Check for massive cancellation.
467 1.1 ragge */
468 1.1 ragge bicl3 $0xc0000000,r10,r6
469 1.1 ragge /* bneq more2 -S.McD Test was backwards */
470 1.1 ragge beql more2
471 1.1 ragge cmpl $0x3fffffff,r6
472 1.1 ragge bneq result
473 1.1 ragge more2:
474 1.1 ragge /* p.9
475 1.1 ragge *
476 1.1 ragge * If massive cancellation has occurred,
477 1.1 ragge * generate another 24 result bits.
478 1.4 simonb * Testing has shown there will always be
479 1.1 ragge * enough bits after this point.
480 1.1 ragge */
481 1.1 ragge subl2 $4,r3
482 1.1 ragge ashq r2,(r3),r5
483 1.1 ragge bgeq signoff5
484 1.1 ragge
485 1.1 ragge emul r0,r6,r4,r5
486 1.1 ragge addl2 r0,r6
487 1.1 ragge brb addbits2
488 1.1 ragge
489 1.1 ragge signoff5:
490 1.1 ragge emul r0,r6,r4,r5
491 1.1 ragge
492 1.1 ragge addbits2:
493 1.1 ragge addl2 r6,r7
494 1.1 ragge adwc $0,r8
495 1.1 ragge adwc $0,r9
496 1.1 ragge adwc $0,r10
497 1.1 ragge /* p.10
498 1.1 ragge *
499 1.1 ragge * The following code produces the reduced
500 1.1 ragge * argument from the product bits contained
501 1.1 ragge * in r10 - r7 .
502 1.1 ragge */
503 1.1 ragge result:
504 1.1 ragge /*
505 1.1 ragge * Extract the octant number from r10 .
506 1.1 ragge */
507 1.1 ragge /* extzv $29,$3,r10,r0 ...used for pi/4 reduction -S.McD */
508 1.1 ragge extzv $30,$2,r10,r0
509 1.1 ragge /*
510 1.1 ragge * Clear the octant bits in r10 .
511 1.1 ragge */
512 1.1 ragge /* bicl2 $0xe0000000,r10 ...used for pi/4 reduction -S.McD */
513 1.1 ragge bicl2 $0xc0000000,r10
514 1.1 ragge /*
515 1.1 ragge * Zero the sign flag.
516 1.1 ragge */
517 1.1 ragge clrl r5
518 1.1 ragge /* p.11
519 1.1 ragge *
520 1.1 ragge * Check to see if the fraction is greater than
521 1.4 simonb * or equal to one-half. If it is, add one
522 1.1 ragge * to the octant number, set the sign flag
523 1.1 ragge * on, and replace the fraction with 1 minus
524 1.1 ragge * the fraction.
525 1.1 ragge */
526 1.1 ragge /* bitl $0x10000000,r10 ...used for pi/4 reduction -S.McD */
527 1.1 ragge bitl $0x20000000,r10
528 1.1 ragge beql small
529 1.1 ragge incl r0
530 1.1 ragge incl r5
531 1.1 ragge /* subl3 r10,$0x1fffffff,r10 ...used for pi/4 reduction -S.McD */
532 1.1 ragge subl3 r10,$0x3fffffff,r10
533 1.1 ragge mcoml r9,r9
534 1.1 ragge mcoml r8,r8
535 1.1 ragge mcoml r7,r7
536 1.1 ragge small:
537 1.1 ragge /* p.12
538 1.1 ragge *
539 1.1 ragge * Test whether the first 29 bits of the ...used for pi/4 reduction -S.McD
540 1.4 simonb * Test whether the first 30 bits of the
541 1.1 ragge * fraction are zero.
542 1.1 ragge */
543 1.1 ragge tstl r10
544 1.1 ragge beql tiny
545 1.1 ragge /*
546 1.1 ragge * Find the position of the first one bit in r10 .
547 1.1 ragge */
548 1.1 ragge cvtld r10,r1
549 1.1 ragge extzv $7,$7,r1,r1
550 1.1 ragge /*
551 1.1 ragge * Compute the size of the shift needed.
552 1.1 ragge */
553 1.1 ragge subl3 r1,$32,r6
554 1.1 ragge /*
555 1.1 ragge * Shift up the high order 64 bits of the
556 1.1 ragge * product.
557 1.1 ragge */
558 1.1 ragge ashq r6,r9,r10
559 1.1 ragge ashq r6,r8,r9
560 1.1 ragge brb mult
561 1.1 ragge /* p.13
562 1.1 ragge *
563 1.1 ragge * Test to see if the sign bit of r9 is on.
564 1.1 ragge */
565 1.1 ragge tiny:
566 1.1 ragge tstl r9
567 1.1 ragge bgeq tinier
568 1.1 ragge /*
569 1.1 ragge * If it is, shift the product bits up 32 bits.
570 1.1 ragge */
571 1.1 ragge movl $32,r6
572 1.1 ragge movq r8,r10
573 1.1 ragge tstl r10
574 1.1 ragge brb mult
575 1.1 ragge /* p.14
576 1.1 ragge *
577 1.1 ragge * Test whether r9 is zero. It is probably
578 1.1 ragge * impossible for both r10 and r9 to be
579 1.1 ragge * zero, but until proven to be so, the test
580 1.1 ragge * must be made.
581 1.1 ragge */
582 1.1 ragge tinier:
583 1.1 ragge beql zero
584 1.1 ragge /*
585 1.1 ragge * Find the position of the first one bit in r9 .
586 1.1 ragge */
587 1.1 ragge cvtld r9,r1
588 1.1 ragge extzv $7,$7,r1,r1
589 1.1 ragge /*
590 1.1 ragge * Compute the size of the shift needed.
591 1.1 ragge */
592 1.1 ragge subl3 r1,$32,r1
593 1.1 ragge addl3 $32,r1,r6
594 1.1 ragge /*
595 1.1 ragge * Shift up the high order 64 bits of the
596 1.1 ragge * product.
597 1.1 ragge */
598 1.1 ragge ashq r1,r8,r10
599 1.1 ragge ashq r1,r7,r9
600 1.1 ragge brb mult
601 1.1 ragge /* p.15
602 1.1 ragge *
603 1.1 ragge * The following code sets the reduced
604 1.1 ragge * argument to zero.
605 1.1 ragge */
606 1.1 ragge zero:
607 1.1 ragge clrl r1
608 1.1 ragge clrl r2
609 1.1 ragge clrl r3
610 1.1 ragge brw return
611 1.1 ragge /* p.16
612 1.1 ragge *
613 1.1 ragge * At this point, r0 contains the octant number,
614 1.1 ragge * r6 indicates the number of bits the fraction
615 1.1 ragge * has been shifted, r5 indicates the sign of
616 1.1 ragge * the fraction, r11/r10 contain the high order
617 1.1 ragge * 64 bits of the fraction, and the condition
618 1.1 ragge * codes indicate where the sign bit of r10
619 1.1 ragge * is on. The following code multiplies the
620 1.1 ragge * fraction by pi/2 .
621 1.1 ragge */
622 1.1 ragge mult:
623 1.1 ragge /*
624 1.1 ragge * Save r11/r10 in r4/r1 . -S.McD
625 1.1 ragge */
626 1.1 ragge movl r11,r4
627 1.1 ragge movl r10,r1
628 1.1 ragge /*
629 1.1 ragge * If the sign bit of r10 is on, add 1 to r11 .
630 1.1 ragge */
631 1.1 ragge bgeq signoff6
632 1.1 ragge incl r11
633 1.1 ragge signoff6:
634 1.1 ragge /* p.17
635 1.1 ragge *
636 1.1 ragge * Move pi/2 into r3/r2 .
637 1.1 ragge */
638 1.1 ragge movq $0xc90fdaa22168c235,r2
639 1.1 ragge /*
640 1.1 ragge * Multiply the fraction by the portion of pi/2
641 1.1 ragge * in r2 .
642 1.1 ragge */
643 1.1 ragge emul r2,r10,$0,r7
644 1.1 ragge emul r2,r11,r8,r7
645 1.1 ragge /*
646 1.4 simonb * Multiply the fraction by the portion of pi/2
647 1.1 ragge * in r3 .
648 1.1 ragge */
649 1.1 ragge emul r3,r10,$0,r9
650 1.1 ragge emul r3,r11,r10,r10
651 1.1 ragge /*
652 1.1 ragge * Add the product bits together.
653 1.1 ragge */
654 1.1 ragge addl2 r7,r9
655 1.1 ragge adwc r8,r10
656 1.1 ragge adwc $0,r11
657 1.1 ragge /*
658 1.1 ragge * Compensate for not sign extending r8 above.-S.McD
659 1.1 ragge */
660 1.1 ragge tstl r8
661 1.1 ragge bgeq signoff6a
662 1.1 ragge decl r11
663 1.1 ragge signoff6a:
664 1.1 ragge /*
665 1.1 ragge * Compensate for r11/r10 being unsigned. -S.McD
666 1.1 ragge */
667 1.1 ragge addl2 r2,r10
668 1.1 ragge adwc r3,r11
669 1.1 ragge /*
670 1.1 ragge * Compensate for r3/r2 being unsigned. -S.McD
671 1.1 ragge */
672 1.1 ragge addl2 r1,r10
673 1.1 ragge adwc r4,r11
674 1.1 ragge /* p.18
675 1.1 ragge *
676 1.1 ragge * If the sign bit of r11 is zero, shift the
677 1.1 ragge * product bits up one bit and increment r6 .
678 1.1 ragge */
679 1.1 ragge blss signon
680 1.1 ragge incl r6
681 1.1 ragge ashq $1,r10,r10
682 1.1 ragge tstl r9
683 1.1 ragge bgeq signoff7
684 1.1 ragge incl r10
685 1.1 ragge signoff7:
686 1.1 ragge signon:
687 1.1 ragge /* p.19
688 1.1 ragge *
689 1.1 ragge * Shift the 56 most significant product
690 1.1 ragge * bits into r9/r8 . The sign extension
691 1.1 ragge * will be handled later.
692 1.1 ragge */
693 1.1 ragge ashq $-8,r10,r8
694 1.1 ragge /*
695 1.1 ragge * Convert the low order 8 bits of r10
696 1.1 ragge * into an F-format number.
697 1.1 ragge */
698 1.1 ragge cvtbf r10,r3
699 1.1 ragge /*
700 1.1 ragge * If the result of the conversion was
701 1.1 ragge * negative, add 1 to r9/r8 .
702 1.1 ragge */
703 1.1 ragge bgeq chop
704 1.1 ragge incl r8
705 1.1 ragge adwc $0,r9
706 1.1 ragge /*
707 1.1 ragge * If r9 is now zero, branch to special
708 1.1 ragge * code to handle that possibility.
709 1.1 ragge */
710 1.1 ragge beql carryout
711 1.1 ragge chop:
712 1.1 ragge /* p.20
713 1.1 ragge *
714 1.1 ragge * Convert the number in r9/r8 into
715 1.1 ragge * D-format number in r2/r1 .
716 1.1 ragge */
717 1.1 ragge rotl $16,r8,r2
718 1.1 ragge rotl $16,r9,r1
719 1.1 ragge /*
720 1.1 ragge * Set the exponent field to the appropriate
721 1.1 ragge * value. Note that the extra bits created by
722 1.1 ragge * sign extension are now eliminated.
723 1.1 ragge */
724 1.1 ragge subw3 r6,$131,r6
725 1.1 ragge insv r6,$7,$9,r1
726 1.1 ragge /*
727 1.1 ragge * Set the exponent field of the F-format
728 1.1 ragge * number in r3 to the appropriate value.
729 1.1 ragge */
730 1.1 ragge tstf r3
731 1.1 ragge beql return
732 1.1 ragge /* extzv $7,$8,r3,r4 -S.McD */
733 1.1 ragge extzv $7,$7,r3,r4
734 1.1 ragge addw2 r4,r6
735 1.1 ragge /* subw2 $217,r6 -S.McD */
736 1.1 ragge subw2 $64,r6
737 1.1 ragge insv r6,$7,$8,r3
738 1.1 ragge brb return
739 1.1 ragge /* p.21
740 1.1 ragge *
741 1.4 simonb * The following code generates the appropriate
742 1.1 ragge * result for the unlikely possibility that
743 1.4 simonb * rounding the number in r9/r8 resulted in
744 1.1 ragge * a carry out.
745 1.1 ragge */
746 1.1 ragge carryout:
747 1.1 ragge clrl r1
748 1.1 ragge clrl r2
749 1.1 ragge subw3 r6,$132,r6
750 1.1 ragge insv r6,$7,$9,r1
751 1.1 ragge tstf r3
752 1.1 ragge beql return
753 1.1 ragge extzv $7,$8,r3,r4
754 1.1 ragge addw2 r4,r6
755 1.1 ragge subw2 $218,r6
756 1.1 ragge insv r6,$7,$8,r3
757 1.1 ragge /* p.22
758 1.1 ragge *
759 1.1 ragge * The following code makes an needed
760 1.4 simonb * adjustments to the signs of the
761 1.1 ragge * results or to the octant number, and
762 1.1 ragge * then returns.
763 1.1 ragge */
764 1.1 ragge return:
765 1.1 ragge /*
766 1.4 simonb * Test if the fraction was greater than or
767 1.1 ragge * equal to 1/2 . If so, negate the reduced
768 1.1 ragge * argument.
769 1.1 ragge */
770 1.1 ragge blbc r5,signoff8
771 1.1 ragge mnegf r1,r1
772 1.1 ragge mnegf r3,r3
773 1.1 ragge signoff8:
774 1.1 ragge /* p.23
775 1.1 ragge *
776 1.1 ragge * If the original argument was negative,
777 1.1 ragge * negate the reduce argument and
778 1.1 ragge * adjust the octant number.
779 1.1 ragge */
780 1.1 ragge tstw (sp)+
781 1.1 ragge bgeq signoff9
782 1.1 ragge mnegf r1,r1
783 1.1 ragge mnegf r3,r3
784 1.1 ragge /* subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD */
785 1.1 ragge subb3 r0,$4,r0
786 1.1 ragge signoff9:
787 1.1 ragge /*
788 1.1 ragge * Clear all unneeded octant bits.
789 1.1 ragge *
790 1.1 ragge * bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD */
791 1.1 ragge bicb2 $0xfc,r0
792 1.1 ragge /*
793 1.1 ragge * Return.
794 1.1 ragge */
795 1.1 ragge rsb
796