n_argred.S revision 1.8 1 1.8 agc /* $NetBSD: n_argred.S,v 1.8 2003/08/07 16:44:44 agc Exp $ */
2 1.1 ragge /*
3 1.1 ragge * Copyright (c) 1985, 1993
4 1.1 ragge * The Regents of the University of California. All rights reserved.
5 1.1 ragge *
6 1.1 ragge * Redistribution and use in source and binary forms, with or without
7 1.1 ragge * modification, are permitted provided that the following conditions
8 1.1 ragge * are met:
9 1.1 ragge * 1. Redistributions of source code must retain the above copyright
10 1.1 ragge * notice, this list of conditions and the following disclaimer.
11 1.1 ragge * 2. Redistributions in binary form must reproduce the above copyright
12 1.1 ragge * notice, this list of conditions and the following disclaimer in the
13 1.1 ragge * documentation and/or other materials provided with the distribution.
14 1.8 agc * 3. Neither the name of the University nor the names of its contributors
15 1.1 ragge * may be used to endorse or promote products derived from this software
16 1.1 ragge * without specific prior written permission.
17 1.1 ragge *
18 1.1 ragge * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 1.1 ragge * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 1.1 ragge * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 1.1 ragge * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 1.1 ragge * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 1.1 ragge * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 1.1 ragge * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 1.1 ragge * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 1.1 ragge * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 1.1 ragge * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 1.1 ragge * SUCH DAMAGE.
29 1.1 ragge *
30 1.1 ragge * @(#)argred.s 8.1 (Berkeley) 6/4/93
31 1.1 ragge */
32 1.1 ragge
33 1.5 matt #include <machine/asm.h>
34 1.5 matt
35 1.1 ragge /*
36 1.1 ragge * libm$argred implements Bob Corbett's argument reduction and
37 1.1 ragge * libm$sincos implements Peter Tang's double precision sin/cos.
38 1.4 simonb *
39 1.1 ragge * Note: The two entry points libm$argred and libm$sincos are meant
40 1.1 ragge * to be used only by _sin, _cos and _tan.
41 1.1 ragge *
42 1.1 ragge * method: true range reduction to [-pi/4,pi/4], P. Tang & B. Corbett
43 1.1 ragge * S. McDonald, April 4, 1985
44 1.1 ragge */
45 1.1 ragge
46 1.5 matt ENTRY(__libm_argred, 0)
47 1.1 ragge /*
48 1.1 ragge * Compare the argument with the largest possible that can
49 1.7 matt * be reduced by table lookup. %r3 := |x| will be used in table_lookup .
50 1.1 ragge */
51 1.7 matt movd %r0,%r3
52 1.1 ragge bgeq abs1
53 1.7 matt mnegd %r3,%r3
54 1.1 ragge abs1:
55 1.7 matt cmpd %r3,$0d+4.55530934770520019583e+01
56 1.1 ragge blss small_arg
57 1.1 ragge jsb trigred
58 1.1 ragge rsb
59 1.1 ragge small_arg:
60 1.1 ragge jsb table_lookup
61 1.1 ragge rsb
62 1.1 ragge /*
63 1.1 ragge * At this point,
64 1.7 matt * %r0 contains the quadrant number, 0, 1, 2, or 3;
65 1.7 matt * %r2/%r1 contains the reduced argument as a D-format number;
66 1.7 matt * %r3 contains a F-format extension to the reduced argument;
67 1.7 matt * %r4 contains a 0 or 1 corresponding to a sin or cos entry.
68 1.1 ragge */
69 1.5 matt
70 1.5 matt ENTRY(__libm_sincos, 0)
71 1.1 ragge /*
72 1.1 ragge * Compensate for a cosine entry by adding one to the quadrant number.
73 1.1 ragge */
74 1.7 matt addl2 %r4,%r0
75 1.1 ragge /*
76 1.7 matt * Polyd clobbers %r5-%r0 ; save X in %r7/%r6 .
77 1.1 ragge * This can be avoided by rewriting trigred .
78 1.1 ragge */
79 1.7 matt movd %r1,%r6
80 1.1 ragge /*
81 1.7 matt * Likewise, save alpha in %r8 .
82 1.1 ragge * This can be avoided by rewriting trigred .
83 1.1 ragge */
84 1.7 matt movf %r3,%r8
85 1.1 ragge /*
86 1.1 ragge * Odd or even quadrant? cosine if odd, sine otherwise.
87 1.7 matt * Save floor(quadrant/2) in %r9 ; it determines the final sign.
88 1.1 ragge */
89 1.7 matt rotl $-1,%r0,%r9
90 1.1 ragge blss cosine
91 1.1 ragge sine:
92 1.7 matt muld2 %r1,%r1 # Xsq = X * X
93 1.7 matt cmpw $0x2480,%r1 # [zl] Xsq > 2^-56?
94 1.1 ragge blss 1f # [zl] yes, go ahead and do polyd
95 1.7 matt clrq %r1 # [zl] work around 11/780 FPA polyd bug
96 1.1 ragge 1:
97 1.7 matt polyd %r1,$7,sin_coef # Q = P(Xsq) , of deg 7
98 1.7 matt mulf3 $0f3.0,%r8,%r4 # beta = 3 * alpha
99 1.7 matt mulf2 %r0,%r4 # beta = Q * beta
100 1.7 matt addf2 %r8,%r4 # beta = alpha + beta
101 1.7 matt muld2 %r6,%r0 # S(X) = X * Q
102 1.7 matt /* cvtfd %r4,%r4 ... %r5 = 0 after a polyd. */
103 1.7 matt addd2 %r4,%r0 # S(X) = beta + S(X)
104 1.7 matt addd2 %r6,%r0 # S(X) = X + S(X)
105 1.5 matt jbr done
106 1.1 ragge cosine:
107 1.7 matt muld2 %r6,%r6 # Xsq = X * X
108 1.1 ragge beql zero_arg
109 1.7 matt mulf2 %r1,%r8 # beta = X * alpha
110 1.7 matt polyd %r6,$7,cos_coef /* Q = P'(Xsq) , of deg 7 */
111 1.7 matt subd3 %r0,%r8,%r0 # beta = beta - Q
112 1.7 matt subw2 $0x80,%r6 # Xsq = Xsq / 2
113 1.7 matt addd2 %r0,%r6 # Xsq = Xsq + beta
114 1.1 ragge zero_arg:
115 1.7 matt subd3 %r6,$0d1.0,%r0 # C(X) = 1 - Xsq
116 1.1 ragge done:
117 1.7 matt blbc %r9,even
118 1.7 matt mnegd %r0,%r0
119 1.1 ragge even:
120 1.1 ragge rsb
121 1.1 ragge
122 1.6 matt #ifdef __ELF__
123 1.6 matt .section .rodata
124 1.6 matt #else
125 1.6 matt .text
126 1.6 matt #endif
127 1.5 matt _ALIGN_TEXT
128 1.1 ragge
129 1.1 ragge sin_coef:
130 1.1 ragge .double 0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8..
131 1.1 ragge .double 0d+1.60573519267703489121e-10 # s6 = 2^-21 1.611adaede473c8..
132 1.1 ragge .double 0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382..
133 1.1 ragge .double 0d+2.75573191800593885716e-06 # s4 = 2^-13 1.71de3a4b884278..
134 1.1 ragge .double 0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d..
135 1.1 ragge .double 0d+8.33333333333325688985e-03 # s2 = 2^-07 1.11111111110e50
136 1.1 ragge .double 0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554
137 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0
138 1.1 ragge
139 1.1 ragge cos_coef:
140 1.1 ragge .double 0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE..
141 1.1 ragge .double 0d+2.08746646574796004700e-09 # s6 = 2^-1D 1.1EE632650350BA..
142 1.1 ragge .double 0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E..
143 1.1 ragge .double 0d+2.48015872682668025200e-05 # s4 = 2^-10 1.A01A0196B902E8..
144 1.1 ragge .double 0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE..
145 1.1 ragge .double 0d+4.16666666666664761400e-02 # s2 = 2^-05 1.5555555555539E
146 1.1 ragge .double 0d+0.00000000000000000000e+00 # s1 = 0
147 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0
148 1.1 ragge
149 1.1 ragge /*
150 1.1 ragge * Multiples of pi/2 expressed as the sum of three doubles,
151 1.1 ragge *
152 1.1 ragge * trailing: n * pi/2 , n = 0, 1, 2, ..., 29
153 1.1 ragge * trailing[n] ,
154 1.1 ragge *
155 1.1 ragge * middle: n * pi/2 , n = 0, 1, 2, ..., 29
156 1.1 ragge * middle[n] ,
157 1.1 ragge *
158 1.1 ragge * leading: n * pi/2 , n = 0, 1, 2, ..., 29
159 1.1 ragge * leading[n] ,
160 1.1 ragge *
161 1.1 ragge * where
162 1.1 ragge * leading[n] := (n * pi/2) rounded,
163 1.1 ragge * middle[n] := (n * pi/2 - leading[n]) rounded,
164 1.1 ragge * trailing[n] := (( n * pi/2 - leading[n]) - middle[n]) rounded .
165 1.1 ragge */
166 1.1 ragge trailing:
167 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 trailing
168 1.1 ragge .double 0d+4.33590506506189049611e-35 # 1 * pi/2 trailing
169 1.1 ragge .double 0d+8.67181013012378099223e-35 # 2 * pi/2 trailing
170 1.1 ragge .double 0d+1.30077151951856714215e-34 # 3 * pi/2 trailing
171 1.1 ragge .double 0d+1.73436202602475619845e-34 # 4 * pi/2 trailing
172 1.1 ragge .double 0d-1.68390735624352669192e-34 # 5 * pi/2 trailing
173 1.1 ragge .double 0d+2.60154303903713428430e-34 # 6 * pi/2 trailing
174 1.1 ragge .double 0d-8.16726343231148352150e-35 # 7 * pi/2 trailing
175 1.1 ragge .double 0d+3.46872405204951239689e-34 # 8 * pi/2 trailing
176 1.1 ragge .double 0d+3.90231455855570147991e-34 # 9 * pi/2 trailing
177 1.1 ragge .double 0d-3.36781471248705338384e-34 # 10 * pi/2 trailing
178 1.1 ragge .double 0d-1.06379439835298071785e-33 # 11 * pi/2 trailing
179 1.1 ragge .double 0d+5.20308607807426856861e-34 # 12 * pi/2 trailing
180 1.1 ragge .double 0d+5.63667658458045770509e-34 # 13 * pi/2 trailing
181 1.1 ragge .double 0d-1.63345268646229670430e-34 # 14 * pi/2 trailing
182 1.1 ragge .double 0d-1.19986217995610764801e-34 # 15 * pi/2 trailing
183 1.1 ragge .double 0d+6.93744810409902479378e-34 # 16 * pi/2 trailing
184 1.1 ragge .double 0d-8.03640094449267300110e-34 # 17 * pi/2 trailing
185 1.1 ragge .double 0d+7.80462911711140295982e-34 # 18 * pi/2 trailing
186 1.1 ragge .double 0d-7.16921993148029483506e-34 # 19 * pi/2 trailing
187 1.1 ragge .double 0d-6.73562942497410676769e-34 # 20 * pi/2 trailing
188 1.1 ragge .double 0d-6.30203891846791677593e-34 # 21 * pi/2 trailing
189 1.1 ragge .double 0d-2.12758879670596143570e-33 # 22 * pi/2 trailing
190 1.1 ragge .double 0d+2.53800212047402350390e-33 # 23 * pi/2 trailing
191 1.1 ragge .double 0d+1.04061721561485371372e-33 # 24 * pi/2 trailing
192 1.1 ragge .double 0d+6.11729905311472319056e-32 # 25 * pi/2 trailing
193 1.1 ragge .double 0d+1.12733531691609154102e-33 # 26 * pi/2 trailing
194 1.1 ragge .double 0d-3.70049587943078297272e-34 # 27 * pi/2 trailing
195 1.1 ragge .double 0d-3.26690537292459340860e-34 # 28 * pi/2 trailing
196 1.1 ragge .double 0d-1.14812616507957271361e-34 # 29 * pi/2 trailing
197 1.1 ragge
198 1.1 ragge middle:
199 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 middle
200 1.1 ragge .double 0d+5.72118872610983179676e-18 # 1 * pi/2 middle
201 1.1 ragge .double 0d+1.14423774522196635935e-17 # 2 * pi/2 middle
202 1.1 ragge .double 0d-3.83475850529283316309e-17 # 3 * pi/2 middle
203 1.1 ragge .double 0d+2.28847549044393271871e-17 # 4 * pi/2 middle
204 1.1 ragge .double 0d-2.69052076007086676522e-17 # 5 * pi/2 middle
205 1.1 ragge .double 0d-7.66951701058566632618e-17 # 6 * pi/2 middle
206 1.1 ragge .double 0d-1.54628301484890040587e-17 # 7 * pi/2 middle
207 1.1 ragge .double 0d+4.57695098088786543741e-17 # 8 * pi/2 middle
208 1.1 ragge .double 0d+1.07001849766246313192e-16 # 9 * pi/2 middle
209 1.1 ragge .double 0d-5.38104152014173353044e-17 # 10 * pi/2 middle
210 1.1 ragge .double 0d-2.14622680169080983801e-16 # 11 * pi/2 middle
211 1.1 ragge .double 0d-1.53390340211713326524e-16 # 12 * pi/2 middle
212 1.1 ragge .double 0d-9.21580002543456677056e-17 # 13 * pi/2 middle
213 1.1 ragge .double 0d-3.09256602969780081173e-17 # 14 * pi/2 middle
214 1.1 ragge .double 0d+3.03066796603896507006e-17 # 15 * pi/2 middle
215 1.1 ragge .double 0d+9.15390196177573087482e-17 # 16 * pi/2 middle
216 1.1 ragge .double 0d+1.52771359575124969107e-16 # 17 * pi/2 middle
217 1.1 ragge .double 0d+2.14003699532492626384e-16 # 18 * pi/2 middle
218 1.1 ragge .double 0d-1.68853170360202329427e-16 # 19 * pi/2 middle
219 1.1 ragge .double 0d-1.07620830402834670609e-16 # 20 * pi/2 middle
220 1.1 ragge .double 0d+3.97700719404595604379e-16 # 21 * pi/2 middle
221 1.1 ragge .double 0d-4.29245360338161967602e-16 # 22 * pi/2 middle
222 1.1 ragge .double 0d-3.68013020380794313406e-16 # 23 * pi/2 middle
223 1.1 ragge .double 0d-3.06780680423426653047e-16 # 24 * pi/2 middle
224 1.1 ragge .double 0d-2.45548340466059054318e-16 # 25 * pi/2 middle
225 1.1 ragge .double 0d-1.84316000508691335411e-16 # 26 * pi/2 middle
226 1.1 ragge .double 0d-1.23083660551323675053e-16 # 27 * pi/2 middle
227 1.1 ragge .double 0d-6.18513205939560162346e-17 # 28 * pi/2 middle
228 1.1 ragge .double 0d-6.18980636588357585202e-19 # 29 * pi/2 middle
229 1.1 ragge
230 1.1 ragge leading:
231 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 leading
232 1.1 ragge .double 0d+1.57079632679489661351e+00 # 1 * pi/2 leading
233 1.1 ragge .double 0d+3.14159265358979322702e+00 # 2 * pi/2 leading
234 1.1 ragge .double 0d+4.71238898038468989604e+00 # 3 * pi/2 leading
235 1.1 ragge .double 0d+6.28318530717958645404e+00 # 4 * pi/2 leading
236 1.1 ragge .double 0d+7.85398163397448312306e+00 # 5 * pi/2 leading
237 1.1 ragge .double 0d+9.42477796076937979208e+00 # 6 * pi/2 leading
238 1.1 ragge .double 0d+1.09955742875642763501e+01 # 7 * pi/2 leading
239 1.1 ragge .double 0d+1.25663706143591729081e+01 # 8 * pi/2 leading
240 1.1 ragge .double 0d+1.41371669411540694661e+01 # 9 * pi/2 leading
241 1.1 ragge .double 0d+1.57079632679489662461e+01 # 10 * pi/2 leading
242 1.1 ragge .double 0d+1.72787595947438630262e+01 # 11 * pi/2 leading
243 1.1 ragge .double 0d+1.88495559215387595842e+01 # 12 * pi/2 leading
244 1.1 ragge .double 0d+2.04203522483336561422e+01 # 13 * pi/2 leading
245 1.1 ragge .double 0d+2.19911485751285527002e+01 # 14 * pi/2 leading
246 1.1 ragge .double 0d+2.35619449019234492582e+01 # 15 * pi/2 leading
247 1.1 ragge .double 0d+2.51327412287183458162e+01 # 16 * pi/2 leading
248 1.1 ragge .double 0d+2.67035375555132423742e+01 # 17 * pi/2 leading
249 1.1 ragge .double 0d+2.82743338823081389322e+01 # 18 * pi/2 leading
250 1.1 ragge .double 0d+2.98451302091030359342e+01 # 19 * pi/2 leading
251 1.1 ragge .double 0d+3.14159265358979324922e+01 # 20 * pi/2 leading
252 1.1 ragge .double 0d+3.29867228626928286062e+01 # 21 * pi/2 leading
253 1.1 ragge .double 0d+3.45575191894877260523e+01 # 22 * pi/2 leading
254 1.1 ragge .double 0d+3.61283155162826226103e+01 # 23 * pi/2 leading
255 1.1 ragge .double 0d+3.76991118430775191683e+01 # 24 * pi/2 leading
256 1.1 ragge .double 0d+3.92699081698724157263e+01 # 25 * pi/2 leading
257 1.1 ragge .double 0d+4.08407044966673122843e+01 # 26 * pi/2 leading
258 1.1 ragge .double 0d+4.24115008234622088423e+01 # 27 * pi/2 leading
259 1.1 ragge .double 0d+4.39822971502571054003e+01 # 28 * pi/2 leading
260 1.1 ragge .double 0d+4.55530934770520019583e+01 # 29 * pi/2 leading
261 1.1 ragge
262 1.1 ragge twoOverPi:
263 1.1 ragge .double 0d+6.36619772367581343076e-01
264 1.5 matt
265 1.1 ragge .text
266 1.5 matt _ALIGN_TEXT
267 1.1 ragge
268 1.1 ragge table_lookup:
269 1.7 matt muld3 %r3,twoOverPi,%r0
270 1.7 matt cvtrdl %r0,%r0 # n = nearest int to ((2/pi)*|x|) rnded
271 1.7 matt subd2 leading[%r0],%r3 # p = (|x| - leading n*pi/2) exactly
272 1.7 matt subd3 middle[%r0],%r3,%r1 # q = (p - middle n*pi/2) rounded
273 1.7 matt subd2 %r1,%r3 # r = (p - q)
274 1.7 matt subd2 middle[%r0],%r3 # r = r - middle n*pi/2
275 1.7 matt subd2 trailing[%r0],%r3 # r = r - trailing n*pi/2 rounded
276 1.1 ragge /*
277 1.1 ragge * If the original argument was negative,
278 1.1 ragge * negate the reduce argument and
279 1.1 ragge * adjust the octant/quadrant number.
280 1.1 ragge */
281 1.7 matt tstw 4(%ap)
282 1.1 ragge bgeq abs2
283 1.7 matt mnegf %r1,%r1
284 1.7 matt mnegf %r3,%r3
285 1.7 matt /* subb3 %r0,$8,%r0 ...used for pi/4 reduction -S.McD */
286 1.7 matt subb3 %r0,$4,%r0
287 1.1 ragge abs2:
288 1.1 ragge /*
289 1.1 ragge * Clear all unneeded octant/quadrant bits.
290 1.1 ragge */
291 1.7 matt /* bicb2 $0xf8,%r0 ...used for pi/4 reduction -S.McD */
292 1.7 matt bicb2 $0xfc,%r0
293 1.1 ragge rsb
294 1.1 ragge /*
295 1.1 ragge * p.0
296 1.1 ragge */
297 1.6 matt #ifdef __ELF__
298 1.6 matt .section .rodata
299 1.6 matt #else
300 1.1 ragge .text
301 1.6 matt #endif
302 1.5 matt _ALIGN_TEXT
303 1.1 ragge /*
304 1.1 ragge * Only 256 (actually 225) bits of 2/pi are needed for VAX double
305 1.1 ragge * precision; this was determined by enumerating all the nearest
306 1.1 ragge * machine integer multiples of pi/2 using continued fractions.
307 1.1 ragge * (8a8d3673775b7ff7 required the most bits.) -S.McD
308 1.1 ragge */
309 1.1 ragge .long 0
310 1.1 ragge .long 0
311 1.1 ragge .long 0xaef1586d
312 1.1 ragge .long 0x9458eaf7
313 1.1 ragge .long 0x10e4107f
314 1.1 ragge .long 0xd8a5664f
315 1.1 ragge .long 0x4d377036
316 1.1 ragge .long 0x09d5f47d
317 1.1 ragge .long 0x91054a7f
318 1.1 ragge .long 0xbe60db93
319 1.1 ragge bits2opi:
320 1.1 ragge .long 0x00000028
321 1.1 ragge .long 0
322 1.1 ragge /*
323 1.1 ragge * Note: wherever you see the word `octant', read `quadrant'.
324 1.1 ragge * Currently this code is set up for pi/2 argument reduction.
325 1.1 ragge * By uncommenting/commenting the appropriate lines, it will
326 1.1 ragge * also serve as a pi/4 argument reduction code.
327 1.1 ragge */
328 1.6 matt .text
329 1.1 ragge
330 1.1 ragge /* p.1
331 1.1 ragge * Trigred preforms argument reduction
332 1.1 ragge * for the trigonometric functions. It
333 1.1 ragge * takes one input argument, a D-format
334 1.7 matt * number in %r1/%r0 . The magnitude of
335 1.1 ragge * the input argument must be greater
336 1.1 ragge * than or equal to 1/2 . Trigred produces
337 1.1 ragge * three results: the number of the octant
338 1.4 simonb * occupied by the argument, the reduced
339 1.1 ragge * argument, and an extension of the
340 1.4 simonb * reduced argument. The octant number is
341 1.7 matt * returned in %r0 . The reduced argument
342 1.4 simonb * is returned as a D-format number in
343 1.7 matt * %r2/%r1 . An 8 bit extension of the
344 1.4 simonb * reduced argument is returned as an
345 1.7 matt * F-format number in %r3.
346 1.1 ragge * p.2
347 1.1 ragge */
348 1.1 ragge trigred:
349 1.1 ragge /*
350 1.1 ragge * Save the sign of the input argument.
351 1.1 ragge */
352 1.7 matt movw %r0,-(%sp)
353 1.1 ragge /*
354 1.1 ragge * Extract the exponent field.
355 1.1 ragge */
356 1.7 matt extzv $7,$7,%r0,%r2
357 1.1 ragge /*
358 1.1 ragge * Convert the fraction part of the input
359 1.1 ragge * argument into a quadword integer.
360 1.1 ragge */
361 1.7 matt bicw2 $0xff80,%r0
362 1.7 matt bisb2 $0x80,%r0 # -S.McD
363 1.7 matt rotl $16,%r0,%r0
364 1.7 matt rotl $16,%r1,%r1
365 1.1 ragge /*
366 1.7 matt * If %r1 is negative, add 1 to %r0 . This
367 1.1 ragge * adjustment is made so that the two's
368 1.1 ragge * complement multiplications done later
369 1.1 ragge * will produce unsigned results.
370 1.1 ragge */
371 1.1 ragge bgeq posmid
372 1.7 matt incl %r0
373 1.1 ragge posmid:
374 1.1 ragge /* p.3
375 1.1 ragge *
376 1.7 matt * Set %r3 to the address of the first quadword
377 1.1 ragge * used to obtain the needed portion of 2/pi .
378 1.1 ragge * The address is longword aligned to ensure
379 1.1 ragge * efficient access.
380 1.1 ragge */
381 1.7 matt ashl $-3,%r2,%r3
382 1.7 matt bicb2 $3,%r3
383 1.7 matt mnegl %r3,%r3
384 1.7 matt movab bits2opi[%r3],%r3
385 1.1 ragge /*
386 1.7 matt * Set %r2 to the size of the shift needed to
387 1.1 ragge * obtain the correct portion of 2/pi .
388 1.1 ragge */
389 1.7 matt bicb2 $0xe0,%r2
390 1.1 ragge /* p.4
391 1.1 ragge *
392 1.1 ragge * Move the needed 128 bits of 2/pi into
393 1.7 matt * %r11 - %r8 . Adjust the numbers to allow
394 1.1 ragge * for unsigned multiplication.
395 1.1 ragge */
396 1.7 matt ashq %r2,(%r3),%r10
397 1.1 ragge
398 1.7 matt subl2 $4,%r3
399 1.7 matt ashq %r2,(%r3),%r9
400 1.1 ragge bgeq signoff1
401 1.7 matt incl %r11
402 1.1 ragge signoff1:
403 1.7 matt subl2 $4,%r3
404 1.7 matt ashq %r2,(%r3),%r8
405 1.1 ragge bgeq signoff2
406 1.7 matt incl %r10
407 1.1 ragge signoff2:
408 1.7 matt subl2 $4,%r3
409 1.7 matt ashq %r2,(%r3),%r7
410 1.1 ragge bgeq signoff3
411 1.7 matt incl %r9
412 1.1 ragge signoff3:
413 1.1 ragge /* p.5
414 1.1 ragge *
415 1.7 matt * Multiply the contents of %r0/%r1 by the
416 1.7 matt * slice of 2/pi in %r11 - %r8 .
417 1.1 ragge */
418 1.7 matt emul %r0,%r8,$0,%r4
419 1.7 matt emul %r0,%r9,%r5,%r5
420 1.7 matt emul %r0,%r10,%r6,%r6
421 1.7 matt
422 1.7 matt emul %r1,%r8,$0,%r7
423 1.7 matt emul %r1,%r9,%r8,%r8
424 1.7 matt emul %r1,%r10,%r9,%r9
425 1.7 matt emul %r1,%r11,%r10,%r10
426 1.7 matt
427 1.7 matt addl2 %r4,%r8
428 1.7 matt adwc %r5,%r9
429 1.7 matt adwc %r6,%r10
430 1.1 ragge /* p.6
431 1.1 ragge *
432 1.1 ragge * If there are more than five leading zeros
433 1.1 ragge * after the first two quotient bits or if there
434 1.1 ragge * are more than five leading ones after the first
435 1.1 ragge * two quotient bits, generate more fraction bits.
436 1.1 ragge * Otherwise, branch to code to produce the result.
437 1.1 ragge */
438 1.7 matt bicl3 $0xc1ffffff,%r10,%r4
439 1.1 ragge beql more1
440 1.7 matt cmpl $0x3e000000,%r4
441 1.1 ragge bneq result
442 1.1 ragge more1:
443 1.1 ragge /* p.7
444 1.1 ragge *
445 1.1 ragge * generate another 32 result bits.
446 1.1 ragge */
447 1.7 matt subl2 $4,%r3
448 1.7 matt ashq %r2,(%r3),%r5
449 1.1 ragge bgeq signoff4
450 1.1 ragge
451 1.7 matt emul %r1,%r6,$0,%r4
452 1.7 matt addl2 %r1,%r5
453 1.7 matt emul %r0,%r6,%r5,%r5
454 1.7 matt addl2 %r0,%r6
455 1.5 matt jbr addbits1
456 1.1 ragge
457 1.1 ragge signoff4:
458 1.7 matt emul %r1,%r6,$0,%r4
459 1.7 matt emul %r0,%r6,%r5,%r5
460 1.1 ragge
461 1.1 ragge addbits1:
462 1.7 matt addl2 %r5,%r7
463 1.7 matt adwc %r6,%r8
464 1.7 matt adwc $0,%r9
465 1.7 matt adwc $0,%r10
466 1.1 ragge /* p.8
467 1.1 ragge *
468 1.1 ragge * Check for massive cancellation.
469 1.1 ragge */
470 1.7 matt bicl3 $0xc0000000,%r10,%r6
471 1.1 ragge /* bneq more2 -S.McD Test was backwards */
472 1.1 ragge beql more2
473 1.7 matt cmpl $0x3fffffff,%r6
474 1.1 ragge bneq result
475 1.1 ragge more2:
476 1.1 ragge /* p.9
477 1.1 ragge *
478 1.1 ragge * If massive cancellation has occurred,
479 1.1 ragge * generate another 24 result bits.
480 1.4 simonb * Testing has shown there will always be
481 1.1 ragge * enough bits after this point.
482 1.1 ragge */
483 1.7 matt subl2 $4,%r3
484 1.7 matt ashq %r2,(%r3),%r5
485 1.1 ragge bgeq signoff5
486 1.1 ragge
487 1.7 matt emul %r0,%r6,%r4,%r5
488 1.7 matt addl2 %r0,%r6
489 1.5 matt jbr addbits2
490 1.1 ragge
491 1.1 ragge signoff5:
492 1.7 matt emul %r0,%r6,%r4,%r5
493 1.1 ragge
494 1.1 ragge addbits2:
495 1.7 matt addl2 %r6,%r7
496 1.7 matt adwc $0,%r8
497 1.7 matt adwc $0,%r9
498 1.7 matt adwc $0,%r10
499 1.1 ragge /* p.10
500 1.1 ragge *
501 1.1 ragge * The following code produces the reduced
502 1.1 ragge * argument from the product bits contained
503 1.7 matt * in %r10 - %r7 .
504 1.1 ragge */
505 1.1 ragge result:
506 1.1 ragge /*
507 1.7 matt * Extract the octant number from %r10 .
508 1.1 ragge */
509 1.7 matt /* extzv $29,$3,%r10,%r0 ...used for pi/4 reduction -S.McD */
510 1.7 matt extzv $30,$2,%r10,%r0
511 1.1 ragge /*
512 1.7 matt * Clear the octant bits in %r10 .
513 1.1 ragge */
514 1.7 matt /* bicl2 $0xe0000000,%r10 ...used for pi/4 reduction -S.McD */
515 1.7 matt bicl2 $0xc0000000,%r10
516 1.1 ragge /*
517 1.1 ragge * Zero the sign flag.
518 1.1 ragge */
519 1.7 matt clrl %r5
520 1.1 ragge /* p.11
521 1.1 ragge *
522 1.1 ragge * Check to see if the fraction is greater than
523 1.4 simonb * or equal to one-half. If it is, add one
524 1.1 ragge * to the octant number, set the sign flag
525 1.1 ragge * on, and replace the fraction with 1 minus
526 1.1 ragge * the fraction.
527 1.1 ragge */
528 1.7 matt /* bitl $0x10000000,%r10 ...used for pi/4 reduction -S.McD */
529 1.7 matt bitl $0x20000000,%r10
530 1.1 ragge beql small
531 1.7 matt incl %r0
532 1.7 matt incl %r5
533 1.7 matt /* subl3 %r10,$0x1fffffff,%r10 ...used for pi/4 reduction -S.McD */
534 1.7 matt subl3 %r10,$0x3fffffff,%r10
535 1.7 matt mcoml %r9,%r9
536 1.7 matt mcoml %r8,%r8
537 1.7 matt mcoml %r7,%r7
538 1.1 ragge small:
539 1.1 ragge /* p.12
540 1.1 ragge *
541 1.1 ragge * Test whether the first 29 bits of the ...used for pi/4 reduction -S.McD
542 1.4 simonb * Test whether the first 30 bits of the
543 1.1 ragge * fraction are zero.
544 1.1 ragge */
545 1.7 matt tstl %r10
546 1.1 ragge beql tiny
547 1.1 ragge /*
548 1.7 matt * Find the position of the first one bit in %r10 .
549 1.1 ragge */
550 1.7 matt cvtld %r10,%r1
551 1.7 matt extzv $7,$7,%r1,%r1
552 1.1 ragge /*
553 1.1 ragge * Compute the size of the shift needed.
554 1.1 ragge */
555 1.7 matt subl3 %r1,$32,%r6
556 1.1 ragge /*
557 1.1 ragge * Shift up the high order 64 bits of the
558 1.1 ragge * product.
559 1.1 ragge */
560 1.7 matt ashq %r6,%r9,%r10
561 1.7 matt ashq %r6,%r8,%r9
562 1.5 matt jbr mult
563 1.1 ragge /* p.13
564 1.1 ragge *
565 1.7 matt * Test to see if the sign bit of %r9 is on.
566 1.1 ragge */
567 1.1 ragge tiny:
568 1.7 matt tstl %r9
569 1.1 ragge bgeq tinier
570 1.1 ragge /*
571 1.1 ragge * If it is, shift the product bits up 32 bits.
572 1.1 ragge */
573 1.7 matt movl $32,%r6
574 1.7 matt movq %r8,%r10
575 1.7 matt tstl %r10
576 1.5 matt jbr mult
577 1.1 ragge /* p.14
578 1.1 ragge *
579 1.7 matt * Test whether %r9 is zero. It is probably
580 1.7 matt * impossible for both %r10 and %r9 to be
581 1.1 ragge * zero, but until proven to be so, the test
582 1.1 ragge * must be made.
583 1.1 ragge */
584 1.1 ragge tinier:
585 1.1 ragge beql zero
586 1.1 ragge /*
587 1.7 matt * Find the position of the first one bit in %r9 .
588 1.1 ragge */
589 1.7 matt cvtld %r9,%r1
590 1.7 matt extzv $7,$7,%r1,%r1
591 1.1 ragge /*
592 1.1 ragge * Compute the size of the shift needed.
593 1.1 ragge */
594 1.7 matt subl3 %r1,$32,%r1
595 1.7 matt addl3 $32,%r1,%r6
596 1.1 ragge /*
597 1.1 ragge * Shift up the high order 64 bits of the
598 1.1 ragge * product.
599 1.1 ragge */
600 1.7 matt ashq %r1,%r8,%r10
601 1.7 matt ashq %r1,%r7,%r9
602 1.5 matt jbr mult
603 1.1 ragge /* p.15
604 1.1 ragge *
605 1.1 ragge * The following code sets the reduced
606 1.1 ragge * argument to zero.
607 1.1 ragge */
608 1.1 ragge zero:
609 1.7 matt clrl %r1
610 1.7 matt clrl %r2
611 1.7 matt clrl %r3
612 1.5 matt jbr return
613 1.1 ragge /* p.16
614 1.1 ragge *
615 1.7 matt * At this point, %r0 contains the octant number,
616 1.7 matt * %r6 indicates the number of bits the fraction
617 1.7 matt * has been shifted, %r5 indicates the sign of
618 1.7 matt * the fraction, %r11/%r10 contain the high order
619 1.1 ragge * 64 bits of the fraction, and the condition
620 1.7 matt * codes indicate where the sign bit of %r10
621 1.1 ragge * is on. The following code multiplies the
622 1.1 ragge * fraction by pi/2 .
623 1.1 ragge */
624 1.1 ragge mult:
625 1.1 ragge /*
626 1.7 matt * Save %r11/%r10 in %r4/%r1 . -S.McD
627 1.1 ragge */
628 1.7 matt movl %r11,%r4
629 1.7 matt movl %r10,%r1
630 1.1 ragge /*
631 1.7 matt * If the sign bit of %r10 is on, add 1 to %r11 .
632 1.1 ragge */
633 1.1 ragge bgeq signoff6
634 1.7 matt incl %r11
635 1.1 ragge signoff6:
636 1.1 ragge /* p.17
637 1.1 ragge *
638 1.7 matt * Move pi/2 into %r3/%r2 .
639 1.1 ragge */
640 1.7 matt movq $0xc90fdaa22168c235,%r2
641 1.1 ragge /*
642 1.1 ragge * Multiply the fraction by the portion of pi/2
643 1.7 matt * in %r2 .
644 1.1 ragge */
645 1.7 matt emul %r2,%r10,$0,%r7
646 1.7 matt emul %r2,%r11,%r8,%r7
647 1.1 ragge /*
648 1.4 simonb * Multiply the fraction by the portion of pi/2
649 1.7 matt * in %r3 .
650 1.1 ragge */
651 1.7 matt emul %r3,%r10,$0,%r9
652 1.7 matt emul %r3,%r11,%r10,%r10
653 1.1 ragge /*
654 1.1 ragge * Add the product bits together.
655 1.1 ragge */
656 1.7 matt addl2 %r7,%r9
657 1.7 matt adwc %r8,%r10
658 1.7 matt adwc $0,%r11
659 1.1 ragge /*
660 1.7 matt * Compensate for not sign extending %r8 above.-S.McD
661 1.1 ragge */
662 1.7 matt tstl %r8
663 1.1 ragge bgeq signoff6a
664 1.7 matt decl %r11
665 1.1 ragge signoff6a:
666 1.1 ragge /*
667 1.7 matt * Compensate for %r11/%r10 being unsigned. -S.McD
668 1.1 ragge */
669 1.7 matt addl2 %r2,%r10
670 1.7 matt adwc %r3,%r11
671 1.1 ragge /*
672 1.7 matt * Compensate for %r3/%r2 being unsigned. -S.McD
673 1.1 ragge */
674 1.7 matt addl2 %r1,%r10
675 1.7 matt adwc %r4,%r11
676 1.1 ragge /* p.18
677 1.1 ragge *
678 1.7 matt * If the sign bit of %r11 is zero, shift the
679 1.7 matt * product bits up one bit and increment %r6 .
680 1.1 ragge */
681 1.1 ragge blss signon
682 1.7 matt incl %r6
683 1.7 matt ashq $1,%r10,%r10
684 1.7 matt tstl %r9
685 1.1 ragge bgeq signoff7
686 1.7 matt incl %r10
687 1.1 ragge signoff7:
688 1.1 ragge signon:
689 1.1 ragge /* p.19
690 1.1 ragge *
691 1.1 ragge * Shift the 56 most significant product
692 1.7 matt * bits into %r9/%r8 . The sign extension
693 1.1 ragge * will be handled later.
694 1.1 ragge */
695 1.7 matt ashq $-8,%r10,%r8
696 1.1 ragge /*
697 1.7 matt * Convert the low order 8 bits of %r10
698 1.1 ragge * into an F-format number.
699 1.1 ragge */
700 1.7 matt cvtbf %r10,%r3
701 1.1 ragge /*
702 1.1 ragge * If the result of the conversion was
703 1.7 matt * negative, add 1 to %r9/%r8 .
704 1.1 ragge */
705 1.1 ragge bgeq chop
706 1.7 matt incl %r8
707 1.7 matt adwc $0,%r9
708 1.1 ragge /*
709 1.7 matt * If %r9 is now zero, branch to special
710 1.1 ragge * code to handle that possibility.
711 1.1 ragge */
712 1.1 ragge beql carryout
713 1.1 ragge chop:
714 1.1 ragge /* p.20
715 1.1 ragge *
716 1.7 matt * Convert the number in %r9/%r8 into
717 1.7 matt * D-format number in %r2/%r1 .
718 1.1 ragge */
719 1.7 matt rotl $16,%r8,%r2
720 1.7 matt rotl $16,%r9,%r1
721 1.1 ragge /*
722 1.1 ragge * Set the exponent field to the appropriate
723 1.1 ragge * value. Note that the extra bits created by
724 1.1 ragge * sign extension are now eliminated.
725 1.1 ragge */
726 1.7 matt subw3 %r6,$131,%r6
727 1.7 matt insv %r6,$7,$9,%r1
728 1.1 ragge /*
729 1.1 ragge * Set the exponent field of the F-format
730 1.7 matt * number in %r3 to the appropriate value.
731 1.1 ragge */
732 1.7 matt tstf %r3
733 1.1 ragge beql return
734 1.7 matt /* extzv $7,$8,%r3,%r4 -S.McD */
735 1.7 matt extzv $7,$7,%r3,%r4
736 1.7 matt addw2 %r4,%r6
737 1.7 matt /* subw2 $217,%r6 -S.McD */
738 1.7 matt subw2 $64,%r6
739 1.7 matt insv %r6,$7,$8,%r3
740 1.5 matt jbr return
741 1.1 ragge /* p.21
742 1.1 ragge *
743 1.4 simonb * The following code generates the appropriate
744 1.1 ragge * result for the unlikely possibility that
745 1.7 matt * rounding the number in %r9/%r8 resulted in
746 1.1 ragge * a carry out.
747 1.1 ragge */
748 1.1 ragge carryout:
749 1.7 matt clrl %r1
750 1.7 matt clrl %r2
751 1.7 matt subw3 %r6,$132,%r6
752 1.7 matt insv %r6,$7,$9,%r1
753 1.7 matt tstf %r3
754 1.1 ragge beql return
755 1.7 matt extzv $7,$8,%r3,%r4
756 1.7 matt addw2 %r4,%r6
757 1.7 matt subw2 $218,%r6
758 1.7 matt insv %r6,$7,$8,%r3
759 1.1 ragge /* p.22
760 1.1 ragge *
761 1.1 ragge * The following code makes an needed
762 1.4 simonb * adjustments to the signs of the
763 1.1 ragge * results or to the octant number, and
764 1.1 ragge * then returns.
765 1.1 ragge */
766 1.1 ragge return:
767 1.1 ragge /*
768 1.4 simonb * Test if the fraction was greater than or
769 1.1 ragge * equal to 1/2 . If so, negate the reduced
770 1.1 ragge * argument.
771 1.1 ragge */
772 1.7 matt blbc %r5,signoff8
773 1.7 matt mnegf %r1,%r1
774 1.7 matt mnegf %r3,%r3
775 1.1 ragge signoff8:
776 1.1 ragge /* p.23
777 1.1 ragge *
778 1.1 ragge * If the original argument was negative,
779 1.1 ragge * negate the reduce argument and
780 1.1 ragge * adjust the octant number.
781 1.1 ragge */
782 1.7 matt tstw (%sp)+
783 1.1 ragge bgeq signoff9
784 1.7 matt mnegf %r1,%r1
785 1.7 matt mnegf %r3,%r3
786 1.7 matt /* subb3 %r0,$8,%r0 ...used for pi/4 reduction -S.McD */
787 1.7 matt subb3 %r0,$4,%r0
788 1.1 ragge signoff9:
789 1.1 ragge /*
790 1.1 ragge * Clear all unneeded octant bits.
791 1.1 ragge *
792 1.7 matt * bicb2 $0xf8,%r0 ...used for pi/4 reduction -S.McD */
793 1.7 matt bicb2 $0xfc,%r0
794 1.1 ragge /*
795 1.1 ragge * Return.
796 1.1 ragge */
797 1.1 ragge rsb
798