n_argred.S revision 1.7 1 1.7 matt /* $NetBSD: n_argred.S,v 1.7 2002/02/24 01:06:21 matt Exp $ */
2 1.1 ragge /*
3 1.1 ragge * Copyright (c) 1985, 1993
4 1.1 ragge * The Regents of the University of California. All rights reserved.
5 1.1 ragge *
6 1.1 ragge * Redistribution and use in source and binary forms, with or without
7 1.1 ragge * modification, are permitted provided that the following conditions
8 1.1 ragge * are met:
9 1.1 ragge * 1. Redistributions of source code must retain the above copyright
10 1.1 ragge * notice, this list of conditions and the following disclaimer.
11 1.1 ragge * 2. Redistributions in binary form must reproduce the above copyright
12 1.1 ragge * notice, this list of conditions and the following disclaimer in the
13 1.1 ragge * documentation and/or other materials provided with the distribution.
14 1.1 ragge * 3. All advertising materials mentioning features or use of this software
15 1.1 ragge * must display the following acknowledgement:
16 1.1 ragge * This product includes software developed by the University of
17 1.1 ragge * California, Berkeley and its contributors.
18 1.1 ragge * 4. Neither the name of the University nor the names of its contributors
19 1.1 ragge * may be used to endorse or promote products derived from this software
20 1.1 ragge * without specific prior written permission.
21 1.1 ragge *
22 1.1 ragge * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 1.1 ragge * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 1.1 ragge * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 1.1 ragge * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 1.1 ragge * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 1.1 ragge * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 1.1 ragge * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 1.1 ragge * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 1.1 ragge * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 1.1 ragge * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 1.1 ragge * SUCH DAMAGE.
33 1.1 ragge *
34 1.1 ragge * @(#)argred.s 8.1 (Berkeley) 6/4/93
35 1.1 ragge */
36 1.1 ragge
37 1.5 matt #include <machine/asm.h>
38 1.5 matt
39 1.1 ragge /*
40 1.1 ragge * libm$argred implements Bob Corbett's argument reduction and
41 1.1 ragge * libm$sincos implements Peter Tang's double precision sin/cos.
42 1.4 simonb *
43 1.1 ragge * Note: The two entry points libm$argred and libm$sincos are meant
44 1.1 ragge * to be used only by _sin, _cos and _tan.
45 1.1 ragge *
46 1.1 ragge * method: true range reduction to [-pi/4,pi/4], P. Tang & B. Corbett
47 1.1 ragge * S. McDonald, April 4, 1985
48 1.1 ragge */
49 1.1 ragge
50 1.5 matt ENTRY(__libm_argred, 0)
51 1.1 ragge /*
52 1.1 ragge * Compare the argument with the largest possible that can
53 1.7 matt * be reduced by table lookup. %r3 := |x| will be used in table_lookup .
54 1.1 ragge */
55 1.7 matt movd %r0,%r3
56 1.1 ragge bgeq abs1
57 1.7 matt mnegd %r3,%r3
58 1.1 ragge abs1:
59 1.7 matt cmpd %r3,$0d+4.55530934770520019583e+01
60 1.1 ragge blss small_arg
61 1.1 ragge jsb trigred
62 1.1 ragge rsb
63 1.1 ragge small_arg:
64 1.1 ragge jsb table_lookup
65 1.1 ragge rsb
66 1.1 ragge /*
67 1.1 ragge * At this point,
68 1.7 matt * %r0 contains the quadrant number, 0, 1, 2, or 3;
69 1.7 matt * %r2/%r1 contains the reduced argument as a D-format number;
70 1.7 matt * %r3 contains a F-format extension to the reduced argument;
71 1.7 matt * %r4 contains a 0 or 1 corresponding to a sin or cos entry.
72 1.1 ragge */
73 1.5 matt
74 1.5 matt ENTRY(__libm_sincos, 0)
75 1.1 ragge /*
76 1.1 ragge * Compensate for a cosine entry by adding one to the quadrant number.
77 1.1 ragge */
78 1.7 matt addl2 %r4,%r0
79 1.1 ragge /*
80 1.7 matt * Polyd clobbers %r5-%r0 ; save X in %r7/%r6 .
81 1.1 ragge * This can be avoided by rewriting trigred .
82 1.1 ragge */
83 1.7 matt movd %r1,%r6
84 1.1 ragge /*
85 1.7 matt * Likewise, save alpha in %r8 .
86 1.1 ragge * This can be avoided by rewriting trigred .
87 1.1 ragge */
88 1.7 matt movf %r3,%r8
89 1.1 ragge /*
90 1.1 ragge * Odd or even quadrant? cosine if odd, sine otherwise.
91 1.7 matt * Save floor(quadrant/2) in %r9 ; it determines the final sign.
92 1.1 ragge */
93 1.7 matt rotl $-1,%r0,%r9
94 1.1 ragge blss cosine
95 1.1 ragge sine:
96 1.7 matt muld2 %r1,%r1 # Xsq = X * X
97 1.7 matt cmpw $0x2480,%r1 # [zl] Xsq > 2^-56?
98 1.1 ragge blss 1f # [zl] yes, go ahead and do polyd
99 1.7 matt clrq %r1 # [zl] work around 11/780 FPA polyd bug
100 1.1 ragge 1:
101 1.7 matt polyd %r1,$7,sin_coef # Q = P(Xsq) , of deg 7
102 1.7 matt mulf3 $0f3.0,%r8,%r4 # beta = 3 * alpha
103 1.7 matt mulf2 %r0,%r4 # beta = Q * beta
104 1.7 matt addf2 %r8,%r4 # beta = alpha + beta
105 1.7 matt muld2 %r6,%r0 # S(X) = X * Q
106 1.7 matt /* cvtfd %r4,%r4 ... %r5 = 0 after a polyd. */
107 1.7 matt addd2 %r4,%r0 # S(X) = beta + S(X)
108 1.7 matt addd2 %r6,%r0 # S(X) = X + S(X)
109 1.5 matt jbr done
110 1.1 ragge cosine:
111 1.7 matt muld2 %r6,%r6 # Xsq = X * X
112 1.1 ragge beql zero_arg
113 1.7 matt mulf2 %r1,%r8 # beta = X * alpha
114 1.7 matt polyd %r6,$7,cos_coef /* Q = P'(Xsq) , of deg 7 */
115 1.7 matt subd3 %r0,%r8,%r0 # beta = beta - Q
116 1.7 matt subw2 $0x80,%r6 # Xsq = Xsq / 2
117 1.7 matt addd2 %r0,%r6 # Xsq = Xsq + beta
118 1.1 ragge zero_arg:
119 1.7 matt subd3 %r6,$0d1.0,%r0 # C(X) = 1 - Xsq
120 1.1 ragge done:
121 1.7 matt blbc %r9,even
122 1.7 matt mnegd %r0,%r0
123 1.1 ragge even:
124 1.1 ragge rsb
125 1.1 ragge
126 1.6 matt #ifdef __ELF__
127 1.6 matt .section .rodata
128 1.6 matt #else
129 1.6 matt .text
130 1.6 matt #endif
131 1.5 matt _ALIGN_TEXT
132 1.1 ragge
133 1.1 ragge sin_coef:
134 1.1 ragge .double 0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8..
135 1.1 ragge .double 0d+1.60573519267703489121e-10 # s6 = 2^-21 1.611adaede473c8..
136 1.1 ragge .double 0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382..
137 1.1 ragge .double 0d+2.75573191800593885716e-06 # s4 = 2^-13 1.71de3a4b884278..
138 1.1 ragge .double 0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d..
139 1.1 ragge .double 0d+8.33333333333325688985e-03 # s2 = 2^-07 1.11111111110e50
140 1.1 ragge .double 0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554
141 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0
142 1.1 ragge
143 1.1 ragge cos_coef:
144 1.1 ragge .double 0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE..
145 1.1 ragge .double 0d+2.08746646574796004700e-09 # s6 = 2^-1D 1.1EE632650350BA..
146 1.1 ragge .double 0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E..
147 1.1 ragge .double 0d+2.48015872682668025200e-05 # s4 = 2^-10 1.A01A0196B902E8..
148 1.1 ragge .double 0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE..
149 1.1 ragge .double 0d+4.16666666666664761400e-02 # s2 = 2^-05 1.5555555555539E
150 1.1 ragge .double 0d+0.00000000000000000000e+00 # s1 = 0
151 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0
152 1.1 ragge
153 1.1 ragge /*
154 1.1 ragge * Multiples of pi/2 expressed as the sum of three doubles,
155 1.1 ragge *
156 1.1 ragge * trailing: n * pi/2 , n = 0, 1, 2, ..., 29
157 1.1 ragge * trailing[n] ,
158 1.1 ragge *
159 1.1 ragge * middle: n * pi/2 , n = 0, 1, 2, ..., 29
160 1.1 ragge * middle[n] ,
161 1.1 ragge *
162 1.1 ragge * leading: n * pi/2 , n = 0, 1, 2, ..., 29
163 1.1 ragge * leading[n] ,
164 1.1 ragge *
165 1.1 ragge * where
166 1.1 ragge * leading[n] := (n * pi/2) rounded,
167 1.1 ragge * middle[n] := (n * pi/2 - leading[n]) rounded,
168 1.1 ragge * trailing[n] := (( n * pi/2 - leading[n]) - middle[n]) rounded .
169 1.1 ragge */
170 1.1 ragge trailing:
171 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 trailing
172 1.1 ragge .double 0d+4.33590506506189049611e-35 # 1 * pi/2 trailing
173 1.1 ragge .double 0d+8.67181013012378099223e-35 # 2 * pi/2 trailing
174 1.1 ragge .double 0d+1.30077151951856714215e-34 # 3 * pi/2 trailing
175 1.1 ragge .double 0d+1.73436202602475619845e-34 # 4 * pi/2 trailing
176 1.1 ragge .double 0d-1.68390735624352669192e-34 # 5 * pi/2 trailing
177 1.1 ragge .double 0d+2.60154303903713428430e-34 # 6 * pi/2 trailing
178 1.1 ragge .double 0d-8.16726343231148352150e-35 # 7 * pi/2 trailing
179 1.1 ragge .double 0d+3.46872405204951239689e-34 # 8 * pi/2 trailing
180 1.1 ragge .double 0d+3.90231455855570147991e-34 # 9 * pi/2 trailing
181 1.1 ragge .double 0d-3.36781471248705338384e-34 # 10 * pi/2 trailing
182 1.1 ragge .double 0d-1.06379439835298071785e-33 # 11 * pi/2 trailing
183 1.1 ragge .double 0d+5.20308607807426856861e-34 # 12 * pi/2 trailing
184 1.1 ragge .double 0d+5.63667658458045770509e-34 # 13 * pi/2 trailing
185 1.1 ragge .double 0d-1.63345268646229670430e-34 # 14 * pi/2 trailing
186 1.1 ragge .double 0d-1.19986217995610764801e-34 # 15 * pi/2 trailing
187 1.1 ragge .double 0d+6.93744810409902479378e-34 # 16 * pi/2 trailing
188 1.1 ragge .double 0d-8.03640094449267300110e-34 # 17 * pi/2 trailing
189 1.1 ragge .double 0d+7.80462911711140295982e-34 # 18 * pi/2 trailing
190 1.1 ragge .double 0d-7.16921993148029483506e-34 # 19 * pi/2 trailing
191 1.1 ragge .double 0d-6.73562942497410676769e-34 # 20 * pi/2 trailing
192 1.1 ragge .double 0d-6.30203891846791677593e-34 # 21 * pi/2 trailing
193 1.1 ragge .double 0d-2.12758879670596143570e-33 # 22 * pi/2 trailing
194 1.1 ragge .double 0d+2.53800212047402350390e-33 # 23 * pi/2 trailing
195 1.1 ragge .double 0d+1.04061721561485371372e-33 # 24 * pi/2 trailing
196 1.1 ragge .double 0d+6.11729905311472319056e-32 # 25 * pi/2 trailing
197 1.1 ragge .double 0d+1.12733531691609154102e-33 # 26 * pi/2 trailing
198 1.1 ragge .double 0d-3.70049587943078297272e-34 # 27 * pi/2 trailing
199 1.1 ragge .double 0d-3.26690537292459340860e-34 # 28 * pi/2 trailing
200 1.1 ragge .double 0d-1.14812616507957271361e-34 # 29 * pi/2 trailing
201 1.1 ragge
202 1.1 ragge middle:
203 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 middle
204 1.1 ragge .double 0d+5.72118872610983179676e-18 # 1 * pi/2 middle
205 1.1 ragge .double 0d+1.14423774522196635935e-17 # 2 * pi/2 middle
206 1.1 ragge .double 0d-3.83475850529283316309e-17 # 3 * pi/2 middle
207 1.1 ragge .double 0d+2.28847549044393271871e-17 # 4 * pi/2 middle
208 1.1 ragge .double 0d-2.69052076007086676522e-17 # 5 * pi/2 middle
209 1.1 ragge .double 0d-7.66951701058566632618e-17 # 6 * pi/2 middle
210 1.1 ragge .double 0d-1.54628301484890040587e-17 # 7 * pi/2 middle
211 1.1 ragge .double 0d+4.57695098088786543741e-17 # 8 * pi/2 middle
212 1.1 ragge .double 0d+1.07001849766246313192e-16 # 9 * pi/2 middle
213 1.1 ragge .double 0d-5.38104152014173353044e-17 # 10 * pi/2 middle
214 1.1 ragge .double 0d-2.14622680169080983801e-16 # 11 * pi/2 middle
215 1.1 ragge .double 0d-1.53390340211713326524e-16 # 12 * pi/2 middle
216 1.1 ragge .double 0d-9.21580002543456677056e-17 # 13 * pi/2 middle
217 1.1 ragge .double 0d-3.09256602969780081173e-17 # 14 * pi/2 middle
218 1.1 ragge .double 0d+3.03066796603896507006e-17 # 15 * pi/2 middle
219 1.1 ragge .double 0d+9.15390196177573087482e-17 # 16 * pi/2 middle
220 1.1 ragge .double 0d+1.52771359575124969107e-16 # 17 * pi/2 middle
221 1.1 ragge .double 0d+2.14003699532492626384e-16 # 18 * pi/2 middle
222 1.1 ragge .double 0d-1.68853170360202329427e-16 # 19 * pi/2 middle
223 1.1 ragge .double 0d-1.07620830402834670609e-16 # 20 * pi/2 middle
224 1.1 ragge .double 0d+3.97700719404595604379e-16 # 21 * pi/2 middle
225 1.1 ragge .double 0d-4.29245360338161967602e-16 # 22 * pi/2 middle
226 1.1 ragge .double 0d-3.68013020380794313406e-16 # 23 * pi/2 middle
227 1.1 ragge .double 0d-3.06780680423426653047e-16 # 24 * pi/2 middle
228 1.1 ragge .double 0d-2.45548340466059054318e-16 # 25 * pi/2 middle
229 1.1 ragge .double 0d-1.84316000508691335411e-16 # 26 * pi/2 middle
230 1.1 ragge .double 0d-1.23083660551323675053e-16 # 27 * pi/2 middle
231 1.1 ragge .double 0d-6.18513205939560162346e-17 # 28 * pi/2 middle
232 1.1 ragge .double 0d-6.18980636588357585202e-19 # 29 * pi/2 middle
233 1.1 ragge
234 1.1 ragge leading:
235 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 leading
236 1.1 ragge .double 0d+1.57079632679489661351e+00 # 1 * pi/2 leading
237 1.1 ragge .double 0d+3.14159265358979322702e+00 # 2 * pi/2 leading
238 1.1 ragge .double 0d+4.71238898038468989604e+00 # 3 * pi/2 leading
239 1.1 ragge .double 0d+6.28318530717958645404e+00 # 4 * pi/2 leading
240 1.1 ragge .double 0d+7.85398163397448312306e+00 # 5 * pi/2 leading
241 1.1 ragge .double 0d+9.42477796076937979208e+00 # 6 * pi/2 leading
242 1.1 ragge .double 0d+1.09955742875642763501e+01 # 7 * pi/2 leading
243 1.1 ragge .double 0d+1.25663706143591729081e+01 # 8 * pi/2 leading
244 1.1 ragge .double 0d+1.41371669411540694661e+01 # 9 * pi/2 leading
245 1.1 ragge .double 0d+1.57079632679489662461e+01 # 10 * pi/2 leading
246 1.1 ragge .double 0d+1.72787595947438630262e+01 # 11 * pi/2 leading
247 1.1 ragge .double 0d+1.88495559215387595842e+01 # 12 * pi/2 leading
248 1.1 ragge .double 0d+2.04203522483336561422e+01 # 13 * pi/2 leading
249 1.1 ragge .double 0d+2.19911485751285527002e+01 # 14 * pi/2 leading
250 1.1 ragge .double 0d+2.35619449019234492582e+01 # 15 * pi/2 leading
251 1.1 ragge .double 0d+2.51327412287183458162e+01 # 16 * pi/2 leading
252 1.1 ragge .double 0d+2.67035375555132423742e+01 # 17 * pi/2 leading
253 1.1 ragge .double 0d+2.82743338823081389322e+01 # 18 * pi/2 leading
254 1.1 ragge .double 0d+2.98451302091030359342e+01 # 19 * pi/2 leading
255 1.1 ragge .double 0d+3.14159265358979324922e+01 # 20 * pi/2 leading
256 1.1 ragge .double 0d+3.29867228626928286062e+01 # 21 * pi/2 leading
257 1.1 ragge .double 0d+3.45575191894877260523e+01 # 22 * pi/2 leading
258 1.1 ragge .double 0d+3.61283155162826226103e+01 # 23 * pi/2 leading
259 1.1 ragge .double 0d+3.76991118430775191683e+01 # 24 * pi/2 leading
260 1.1 ragge .double 0d+3.92699081698724157263e+01 # 25 * pi/2 leading
261 1.1 ragge .double 0d+4.08407044966673122843e+01 # 26 * pi/2 leading
262 1.1 ragge .double 0d+4.24115008234622088423e+01 # 27 * pi/2 leading
263 1.1 ragge .double 0d+4.39822971502571054003e+01 # 28 * pi/2 leading
264 1.1 ragge .double 0d+4.55530934770520019583e+01 # 29 * pi/2 leading
265 1.1 ragge
266 1.1 ragge twoOverPi:
267 1.1 ragge .double 0d+6.36619772367581343076e-01
268 1.5 matt
269 1.1 ragge .text
270 1.5 matt _ALIGN_TEXT
271 1.1 ragge
272 1.1 ragge table_lookup:
273 1.7 matt muld3 %r3,twoOverPi,%r0
274 1.7 matt cvtrdl %r0,%r0 # n = nearest int to ((2/pi)*|x|) rnded
275 1.7 matt subd2 leading[%r0],%r3 # p = (|x| - leading n*pi/2) exactly
276 1.7 matt subd3 middle[%r0],%r3,%r1 # q = (p - middle n*pi/2) rounded
277 1.7 matt subd2 %r1,%r3 # r = (p - q)
278 1.7 matt subd2 middle[%r0],%r3 # r = r - middle n*pi/2
279 1.7 matt subd2 trailing[%r0],%r3 # r = r - trailing n*pi/2 rounded
280 1.1 ragge /*
281 1.1 ragge * If the original argument was negative,
282 1.1 ragge * negate the reduce argument and
283 1.1 ragge * adjust the octant/quadrant number.
284 1.1 ragge */
285 1.7 matt tstw 4(%ap)
286 1.1 ragge bgeq abs2
287 1.7 matt mnegf %r1,%r1
288 1.7 matt mnegf %r3,%r3
289 1.7 matt /* subb3 %r0,$8,%r0 ...used for pi/4 reduction -S.McD */
290 1.7 matt subb3 %r0,$4,%r0
291 1.1 ragge abs2:
292 1.1 ragge /*
293 1.1 ragge * Clear all unneeded octant/quadrant bits.
294 1.1 ragge */
295 1.7 matt /* bicb2 $0xf8,%r0 ...used for pi/4 reduction -S.McD */
296 1.7 matt bicb2 $0xfc,%r0
297 1.1 ragge rsb
298 1.1 ragge /*
299 1.1 ragge * p.0
300 1.1 ragge */
301 1.6 matt #ifdef __ELF__
302 1.6 matt .section .rodata
303 1.6 matt #else
304 1.1 ragge .text
305 1.6 matt #endif
306 1.5 matt _ALIGN_TEXT
307 1.1 ragge /*
308 1.1 ragge * Only 256 (actually 225) bits of 2/pi are needed for VAX double
309 1.1 ragge * precision; this was determined by enumerating all the nearest
310 1.1 ragge * machine integer multiples of pi/2 using continued fractions.
311 1.1 ragge * (8a8d3673775b7ff7 required the most bits.) -S.McD
312 1.1 ragge */
313 1.1 ragge .long 0
314 1.1 ragge .long 0
315 1.1 ragge .long 0xaef1586d
316 1.1 ragge .long 0x9458eaf7
317 1.1 ragge .long 0x10e4107f
318 1.1 ragge .long 0xd8a5664f
319 1.1 ragge .long 0x4d377036
320 1.1 ragge .long 0x09d5f47d
321 1.1 ragge .long 0x91054a7f
322 1.1 ragge .long 0xbe60db93
323 1.1 ragge bits2opi:
324 1.1 ragge .long 0x00000028
325 1.1 ragge .long 0
326 1.1 ragge /*
327 1.1 ragge * Note: wherever you see the word `octant', read `quadrant'.
328 1.1 ragge * Currently this code is set up for pi/2 argument reduction.
329 1.1 ragge * By uncommenting/commenting the appropriate lines, it will
330 1.1 ragge * also serve as a pi/4 argument reduction code.
331 1.1 ragge */
332 1.6 matt .text
333 1.1 ragge
334 1.1 ragge /* p.1
335 1.1 ragge * Trigred preforms argument reduction
336 1.1 ragge * for the trigonometric functions. It
337 1.1 ragge * takes one input argument, a D-format
338 1.7 matt * number in %r1/%r0 . The magnitude of
339 1.1 ragge * the input argument must be greater
340 1.1 ragge * than or equal to 1/2 . Trigred produces
341 1.1 ragge * three results: the number of the octant
342 1.4 simonb * occupied by the argument, the reduced
343 1.1 ragge * argument, and an extension of the
344 1.4 simonb * reduced argument. The octant number is
345 1.7 matt * returned in %r0 . The reduced argument
346 1.4 simonb * is returned as a D-format number in
347 1.7 matt * %r2/%r1 . An 8 bit extension of the
348 1.4 simonb * reduced argument is returned as an
349 1.7 matt * F-format number in %r3.
350 1.1 ragge * p.2
351 1.1 ragge */
352 1.1 ragge trigred:
353 1.1 ragge /*
354 1.1 ragge * Save the sign of the input argument.
355 1.1 ragge */
356 1.7 matt movw %r0,-(%sp)
357 1.1 ragge /*
358 1.1 ragge * Extract the exponent field.
359 1.1 ragge */
360 1.7 matt extzv $7,$7,%r0,%r2
361 1.1 ragge /*
362 1.1 ragge * Convert the fraction part of the input
363 1.1 ragge * argument into a quadword integer.
364 1.1 ragge */
365 1.7 matt bicw2 $0xff80,%r0
366 1.7 matt bisb2 $0x80,%r0 # -S.McD
367 1.7 matt rotl $16,%r0,%r0
368 1.7 matt rotl $16,%r1,%r1
369 1.1 ragge /*
370 1.7 matt * If %r1 is negative, add 1 to %r0 . This
371 1.1 ragge * adjustment is made so that the two's
372 1.1 ragge * complement multiplications done later
373 1.1 ragge * will produce unsigned results.
374 1.1 ragge */
375 1.1 ragge bgeq posmid
376 1.7 matt incl %r0
377 1.1 ragge posmid:
378 1.1 ragge /* p.3
379 1.1 ragge *
380 1.7 matt * Set %r3 to the address of the first quadword
381 1.1 ragge * used to obtain the needed portion of 2/pi .
382 1.1 ragge * The address is longword aligned to ensure
383 1.1 ragge * efficient access.
384 1.1 ragge */
385 1.7 matt ashl $-3,%r2,%r3
386 1.7 matt bicb2 $3,%r3
387 1.7 matt mnegl %r3,%r3
388 1.7 matt movab bits2opi[%r3],%r3
389 1.1 ragge /*
390 1.7 matt * Set %r2 to the size of the shift needed to
391 1.1 ragge * obtain the correct portion of 2/pi .
392 1.1 ragge */
393 1.7 matt bicb2 $0xe0,%r2
394 1.1 ragge /* p.4
395 1.1 ragge *
396 1.1 ragge * Move the needed 128 bits of 2/pi into
397 1.7 matt * %r11 - %r8 . Adjust the numbers to allow
398 1.1 ragge * for unsigned multiplication.
399 1.1 ragge */
400 1.7 matt ashq %r2,(%r3),%r10
401 1.1 ragge
402 1.7 matt subl2 $4,%r3
403 1.7 matt ashq %r2,(%r3),%r9
404 1.1 ragge bgeq signoff1
405 1.7 matt incl %r11
406 1.1 ragge signoff1:
407 1.7 matt subl2 $4,%r3
408 1.7 matt ashq %r2,(%r3),%r8
409 1.1 ragge bgeq signoff2
410 1.7 matt incl %r10
411 1.1 ragge signoff2:
412 1.7 matt subl2 $4,%r3
413 1.7 matt ashq %r2,(%r3),%r7
414 1.1 ragge bgeq signoff3
415 1.7 matt incl %r9
416 1.1 ragge signoff3:
417 1.1 ragge /* p.5
418 1.1 ragge *
419 1.7 matt * Multiply the contents of %r0/%r1 by the
420 1.7 matt * slice of 2/pi in %r11 - %r8 .
421 1.1 ragge */
422 1.7 matt emul %r0,%r8,$0,%r4
423 1.7 matt emul %r0,%r9,%r5,%r5
424 1.7 matt emul %r0,%r10,%r6,%r6
425 1.7 matt
426 1.7 matt emul %r1,%r8,$0,%r7
427 1.7 matt emul %r1,%r9,%r8,%r8
428 1.7 matt emul %r1,%r10,%r9,%r9
429 1.7 matt emul %r1,%r11,%r10,%r10
430 1.7 matt
431 1.7 matt addl2 %r4,%r8
432 1.7 matt adwc %r5,%r9
433 1.7 matt adwc %r6,%r10
434 1.1 ragge /* p.6
435 1.1 ragge *
436 1.1 ragge * If there are more than five leading zeros
437 1.1 ragge * after the first two quotient bits or if there
438 1.1 ragge * are more than five leading ones after the first
439 1.1 ragge * two quotient bits, generate more fraction bits.
440 1.1 ragge * Otherwise, branch to code to produce the result.
441 1.1 ragge */
442 1.7 matt bicl3 $0xc1ffffff,%r10,%r4
443 1.1 ragge beql more1
444 1.7 matt cmpl $0x3e000000,%r4
445 1.1 ragge bneq result
446 1.1 ragge more1:
447 1.1 ragge /* p.7
448 1.1 ragge *
449 1.1 ragge * generate another 32 result bits.
450 1.1 ragge */
451 1.7 matt subl2 $4,%r3
452 1.7 matt ashq %r2,(%r3),%r5
453 1.1 ragge bgeq signoff4
454 1.1 ragge
455 1.7 matt emul %r1,%r6,$0,%r4
456 1.7 matt addl2 %r1,%r5
457 1.7 matt emul %r0,%r6,%r5,%r5
458 1.7 matt addl2 %r0,%r6
459 1.5 matt jbr addbits1
460 1.1 ragge
461 1.1 ragge signoff4:
462 1.7 matt emul %r1,%r6,$0,%r4
463 1.7 matt emul %r0,%r6,%r5,%r5
464 1.1 ragge
465 1.1 ragge addbits1:
466 1.7 matt addl2 %r5,%r7
467 1.7 matt adwc %r6,%r8
468 1.7 matt adwc $0,%r9
469 1.7 matt adwc $0,%r10
470 1.1 ragge /* p.8
471 1.1 ragge *
472 1.1 ragge * Check for massive cancellation.
473 1.1 ragge */
474 1.7 matt bicl3 $0xc0000000,%r10,%r6
475 1.1 ragge /* bneq more2 -S.McD Test was backwards */
476 1.1 ragge beql more2
477 1.7 matt cmpl $0x3fffffff,%r6
478 1.1 ragge bneq result
479 1.1 ragge more2:
480 1.1 ragge /* p.9
481 1.1 ragge *
482 1.1 ragge * If massive cancellation has occurred,
483 1.1 ragge * generate another 24 result bits.
484 1.4 simonb * Testing has shown there will always be
485 1.1 ragge * enough bits after this point.
486 1.1 ragge */
487 1.7 matt subl2 $4,%r3
488 1.7 matt ashq %r2,(%r3),%r5
489 1.1 ragge bgeq signoff5
490 1.1 ragge
491 1.7 matt emul %r0,%r6,%r4,%r5
492 1.7 matt addl2 %r0,%r6
493 1.5 matt jbr addbits2
494 1.1 ragge
495 1.1 ragge signoff5:
496 1.7 matt emul %r0,%r6,%r4,%r5
497 1.1 ragge
498 1.1 ragge addbits2:
499 1.7 matt addl2 %r6,%r7
500 1.7 matt adwc $0,%r8
501 1.7 matt adwc $0,%r9
502 1.7 matt adwc $0,%r10
503 1.1 ragge /* p.10
504 1.1 ragge *
505 1.1 ragge * The following code produces the reduced
506 1.1 ragge * argument from the product bits contained
507 1.7 matt * in %r10 - %r7 .
508 1.1 ragge */
509 1.1 ragge result:
510 1.1 ragge /*
511 1.7 matt * Extract the octant number from %r10 .
512 1.1 ragge */
513 1.7 matt /* extzv $29,$3,%r10,%r0 ...used for pi/4 reduction -S.McD */
514 1.7 matt extzv $30,$2,%r10,%r0
515 1.1 ragge /*
516 1.7 matt * Clear the octant bits in %r10 .
517 1.1 ragge */
518 1.7 matt /* bicl2 $0xe0000000,%r10 ...used for pi/4 reduction -S.McD */
519 1.7 matt bicl2 $0xc0000000,%r10
520 1.1 ragge /*
521 1.1 ragge * Zero the sign flag.
522 1.1 ragge */
523 1.7 matt clrl %r5
524 1.1 ragge /* p.11
525 1.1 ragge *
526 1.1 ragge * Check to see if the fraction is greater than
527 1.4 simonb * or equal to one-half. If it is, add one
528 1.1 ragge * to the octant number, set the sign flag
529 1.1 ragge * on, and replace the fraction with 1 minus
530 1.1 ragge * the fraction.
531 1.1 ragge */
532 1.7 matt /* bitl $0x10000000,%r10 ...used for pi/4 reduction -S.McD */
533 1.7 matt bitl $0x20000000,%r10
534 1.1 ragge beql small
535 1.7 matt incl %r0
536 1.7 matt incl %r5
537 1.7 matt /* subl3 %r10,$0x1fffffff,%r10 ...used for pi/4 reduction -S.McD */
538 1.7 matt subl3 %r10,$0x3fffffff,%r10
539 1.7 matt mcoml %r9,%r9
540 1.7 matt mcoml %r8,%r8
541 1.7 matt mcoml %r7,%r7
542 1.1 ragge small:
543 1.1 ragge /* p.12
544 1.1 ragge *
545 1.1 ragge * Test whether the first 29 bits of the ...used for pi/4 reduction -S.McD
546 1.4 simonb * Test whether the first 30 bits of the
547 1.1 ragge * fraction are zero.
548 1.1 ragge */
549 1.7 matt tstl %r10
550 1.1 ragge beql tiny
551 1.1 ragge /*
552 1.7 matt * Find the position of the first one bit in %r10 .
553 1.1 ragge */
554 1.7 matt cvtld %r10,%r1
555 1.7 matt extzv $7,$7,%r1,%r1
556 1.1 ragge /*
557 1.1 ragge * Compute the size of the shift needed.
558 1.1 ragge */
559 1.7 matt subl3 %r1,$32,%r6
560 1.1 ragge /*
561 1.1 ragge * Shift up the high order 64 bits of the
562 1.1 ragge * product.
563 1.1 ragge */
564 1.7 matt ashq %r6,%r9,%r10
565 1.7 matt ashq %r6,%r8,%r9
566 1.5 matt jbr mult
567 1.1 ragge /* p.13
568 1.1 ragge *
569 1.7 matt * Test to see if the sign bit of %r9 is on.
570 1.1 ragge */
571 1.1 ragge tiny:
572 1.7 matt tstl %r9
573 1.1 ragge bgeq tinier
574 1.1 ragge /*
575 1.1 ragge * If it is, shift the product bits up 32 bits.
576 1.1 ragge */
577 1.7 matt movl $32,%r6
578 1.7 matt movq %r8,%r10
579 1.7 matt tstl %r10
580 1.5 matt jbr mult
581 1.1 ragge /* p.14
582 1.1 ragge *
583 1.7 matt * Test whether %r9 is zero. It is probably
584 1.7 matt * impossible for both %r10 and %r9 to be
585 1.1 ragge * zero, but until proven to be so, the test
586 1.1 ragge * must be made.
587 1.1 ragge */
588 1.1 ragge tinier:
589 1.1 ragge beql zero
590 1.1 ragge /*
591 1.7 matt * Find the position of the first one bit in %r9 .
592 1.1 ragge */
593 1.7 matt cvtld %r9,%r1
594 1.7 matt extzv $7,$7,%r1,%r1
595 1.1 ragge /*
596 1.1 ragge * Compute the size of the shift needed.
597 1.1 ragge */
598 1.7 matt subl3 %r1,$32,%r1
599 1.7 matt addl3 $32,%r1,%r6
600 1.1 ragge /*
601 1.1 ragge * Shift up the high order 64 bits of the
602 1.1 ragge * product.
603 1.1 ragge */
604 1.7 matt ashq %r1,%r8,%r10
605 1.7 matt ashq %r1,%r7,%r9
606 1.5 matt jbr mult
607 1.1 ragge /* p.15
608 1.1 ragge *
609 1.1 ragge * The following code sets the reduced
610 1.1 ragge * argument to zero.
611 1.1 ragge */
612 1.1 ragge zero:
613 1.7 matt clrl %r1
614 1.7 matt clrl %r2
615 1.7 matt clrl %r3
616 1.5 matt jbr return
617 1.1 ragge /* p.16
618 1.1 ragge *
619 1.7 matt * At this point, %r0 contains the octant number,
620 1.7 matt * %r6 indicates the number of bits the fraction
621 1.7 matt * has been shifted, %r5 indicates the sign of
622 1.7 matt * the fraction, %r11/%r10 contain the high order
623 1.1 ragge * 64 bits of the fraction, and the condition
624 1.7 matt * codes indicate where the sign bit of %r10
625 1.1 ragge * is on. The following code multiplies the
626 1.1 ragge * fraction by pi/2 .
627 1.1 ragge */
628 1.1 ragge mult:
629 1.1 ragge /*
630 1.7 matt * Save %r11/%r10 in %r4/%r1 . -S.McD
631 1.1 ragge */
632 1.7 matt movl %r11,%r4
633 1.7 matt movl %r10,%r1
634 1.1 ragge /*
635 1.7 matt * If the sign bit of %r10 is on, add 1 to %r11 .
636 1.1 ragge */
637 1.1 ragge bgeq signoff6
638 1.7 matt incl %r11
639 1.1 ragge signoff6:
640 1.1 ragge /* p.17
641 1.1 ragge *
642 1.7 matt * Move pi/2 into %r3/%r2 .
643 1.1 ragge */
644 1.7 matt movq $0xc90fdaa22168c235,%r2
645 1.1 ragge /*
646 1.1 ragge * Multiply the fraction by the portion of pi/2
647 1.7 matt * in %r2 .
648 1.1 ragge */
649 1.7 matt emul %r2,%r10,$0,%r7
650 1.7 matt emul %r2,%r11,%r8,%r7
651 1.1 ragge /*
652 1.4 simonb * Multiply the fraction by the portion of pi/2
653 1.7 matt * in %r3 .
654 1.1 ragge */
655 1.7 matt emul %r3,%r10,$0,%r9
656 1.7 matt emul %r3,%r11,%r10,%r10
657 1.1 ragge /*
658 1.1 ragge * Add the product bits together.
659 1.1 ragge */
660 1.7 matt addl2 %r7,%r9
661 1.7 matt adwc %r8,%r10
662 1.7 matt adwc $0,%r11
663 1.1 ragge /*
664 1.7 matt * Compensate for not sign extending %r8 above.-S.McD
665 1.1 ragge */
666 1.7 matt tstl %r8
667 1.1 ragge bgeq signoff6a
668 1.7 matt decl %r11
669 1.1 ragge signoff6a:
670 1.1 ragge /*
671 1.7 matt * Compensate for %r11/%r10 being unsigned. -S.McD
672 1.1 ragge */
673 1.7 matt addl2 %r2,%r10
674 1.7 matt adwc %r3,%r11
675 1.1 ragge /*
676 1.7 matt * Compensate for %r3/%r2 being unsigned. -S.McD
677 1.1 ragge */
678 1.7 matt addl2 %r1,%r10
679 1.7 matt adwc %r4,%r11
680 1.1 ragge /* p.18
681 1.1 ragge *
682 1.7 matt * If the sign bit of %r11 is zero, shift the
683 1.7 matt * product bits up one bit and increment %r6 .
684 1.1 ragge */
685 1.1 ragge blss signon
686 1.7 matt incl %r6
687 1.7 matt ashq $1,%r10,%r10
688 1.7 matt tstl %r9
689 1.1 ragge bgeq signoff7
690 1.7 matt incl %r10
691 1.1 ragge signoff7:
692 1.1 ragge signon:
693 1.1 ragge /* p.19
694 1.1 ragge *
695 1.1 ragge * Shift the 56 most significant product
696 1.7 matt * bits into %r9/%r8 . The sign extension
697 1.1 ragge * will be handled later.
698 1.1 ragge */
699 1.7 matt ashq $-8,%r10,%r8
700 1.1 ragge /*
701 1.7 matt * Convert the low order 8 bits of %r10
702 1.1 ragge * into an F-format number.
703 1.1 ragge */
704 1.7 matt cvtbf %r10,%r3
705 1.1 ragge /*
706 1.1 ragge * If the result of the conversion was
707 1.7 matt * negative, add 1 to %r9/%r8 .
708 1.1 ragge */
709 1.1 ragge bgeq chop
710 1.7 matt incl %r8
711 1.7 matt adwc $0,%r9
712 1.1 ragge /*
713 1.7 matt * If %r9 is now zero, branch to special
714 1.1 ragge * code to handle that possibility.
715 1.1 ragge */
716 1.1 ragge beql carryout
717 1.1 ragge chop:
718 1.1 ragge /* p.20
719 1.1 ragge *
720 1.7 matt * Convert the number in %r9/%r8 into
721 1.7 matt * D-format number in %r2/%r1 .
722 1.1 ragge */
723 1.7 matt rotl $16,%r8,%r2
724 1.7 matt rotl $16,%r9,%r1
725 1.1 ragge /*
726 1.1 ragge * Set the exponent field to the appropriate
727 1.1 ragge * value. Note that the extra bits created by
728 1.1 ragge * sign extension are now eliminated.
729 1.1 ragge */
730 1.7 matt subw3 %r6,$131,%r6
731 1.7 matt insv %r6,$7,$9,%r1
732 1.1 ragge /*
733 1.1 ragge * Set the exponent field of the F-format
734 1.7 matt * number in %r3 to the appropriate value.
735 1.1 ragge */
736 1.7 matt tstf %r3
737 1.1 ragge beql return
738 1.7 matt /* extzv $7,$8,%r3,%r4 -S.McD */
739 1.7 matt extzv $7,$7,%r3,%r4
740 1.7 matt addw2 %r4,%r6
741 1.7 matt /* subw2 $217,%r6 -S.McD */
742 1.7 matt subw2 $64,%r6
743 1.7 matt insv %r6,$7,$8,%r3
744 1.5 matt jbr return
745 1.1 ragge /* p.21
746 1.1 ragge *
747 1.4 simonb * The following code generates the appropriate
748 1.1 ragge * result for the unlikely possibility that
749 1.7 matt * rounding the number in %r9/%r8 resulted in
750 1.1 ragge * a carry out.
751 1.1 ragge */
752 1.1 ragge carryout:
753 1.7 matt clrl %r1
754 1.7 matt clrl %r2
755 1.7 matt subw3 %r6,$132,%r6
756 1.7 matt insv %r6,$7,$9,%r1
757 1.7 matt tstf %r3
758 1.1 ragge beql return
759 1.7 matt extzv $7,$8,%r3,%r4
760 1.7 matt addw2 %r4,%r6
761 1.7 matt subw2 $218,%r6
762 1.7 matt insv %r6,$7,$8,%r3
763 1.1 ragge /* p.22
764 1.1 ragge *
765 1.1 ragge * The following code makes an needed
766 1.4 simonb * adjustments to the signs of the
767 1.1 ragge * results or to the octant number, and
768 1.1 ragge * then returns.
769 1.1 ragge */
770 1.1 ragge return:
771 1.1 ragge /*
772 1.4 simonb * Test if the fraction was greater than or
773 1.1 ragge * equal to 1/2 . If so, negate the reduced
774 1.1 ragge * argument.
775 1.1 ragge */
776 1.7 matt blbc %r5,signoff8
777 1.7 matt mnegf %r1,%r1
778 1.7 matt mnegf %r3,%r3
779 1.1 ragge signoff8:
780 1.1 ragge /* p.23
781 1.1 ragge *
782 1.1 ragge * If the original argument was negative,
783 1.1 ragge * negate the reduce argument and
784 1.1 ragge * adjust the octant number.
785 1.1 ragge */
786 1.7 matt tstw (%sp)+
787 1.1 ragge bgeq signoff9
788 1.7 matt mnegf %r1,%r1
789 1.7 matt mnegf %r3,%r3
790 1.7 matt /* subb3 %r0,$8,%r0 ...used for pi/4 reduction -S.McD */
791 1.7 matt subb3 %r0,$4,%r0
792 1.1 ragge signoff9:
793 1.1 ragge /*
794 1.1 ragge * Clear all unneeded octant bits.
795 1.1 ragge *
796 1.7 matt * bicb2 $0xf8,%r0 ...used for pi/4 reduction -S.McD */
797 1.7 matt bicb2 $0xfc,%r0
798 1.1 ragge /*
799 1.1 ragge * Return.
800 1.1 ragge */
801 1.1 ragge rsb
802