n_argred.S revision 1.9 1 1.9 matt /* $NetBSD: n_argred.S,v 1.9 2007/04/19 00:37:20 matt Exp $ */
2 1.1 ragge /*
3 1.1 ragge * Copyright (c) 1985, 1993
4 1.1 ragge * The Regents of the University of California. All rights reserved.
5 1.1 ragge *
6 1.1 ragge * Redistribution and use in source and binary forms, with or without
7 1.1 ragge * modification, are permitted provided that the following conditions
8 1.1 ragge * are met:
9 1.1 ragge * 1. Redistributions of source code must retain the above copyright
10 1.1 ragge * notice, this list of conditions and the following disclaimer.
11 1.1 ragge * 2. Redistributions in binary form must reproduce the above copyright
12 1.1 ragge * notice, this list of conditions and the following disclaimer in the
13 1.1 ragge * documentation and/or other materials provided with the distribution.
14 1.8 agc * 3. Neither the name of the University nor the names of its contributors
15 1.1 ragge * may be used to endorse or promote products derived from this software
16 1.1 ragge * without specific prior written permission.
17 1.1 ragge *
18 1.1 ragge * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 1.1 ragge * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 1.1 ragge * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 1.1 ragge * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 1.1 ragge * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 1.1 ragge * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 1.1 ragge * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 1.1 ragge * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 1.1 ragge * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 1.1 ragge * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 1.1 ragge * SUCH DAMAGE.
29 1.1 ragge *
30 1.1 ragge * @(#)argred.s 8.1 (Berkeley) 6/4/93
31 1.1 ragge */
32 1.1 ragge
33 1.5 matt #include <machine/asm.h>
34 1.5 matt
35 1.1 ragge /*
36 1.1 ragge * libm$argred implements Bob Corbett's argument reduction and
37 1.1 ragge * libm$sincos implements Peter Tang's double precision sin/cos.
38 1.4 simonb *
39 1.1 ragge * Note: The two entry points libm$argred and libm$sincos are meant
40 1.1 ragge * to be used only by _sin, _cos and _tan.
41 1.1 ragge *
42 1.1 ragge * method: true range reduction to [-pi/4,pi/4], P. Tang & B. Corbett
43 1.1 ragge * S. McDonald, April 4, 1985
44 1.1 ragge */
45 1.1 ragge
46 1.9 matt .hidden __libm_argred
47 1.5 matt ENTRY(__libm_argred, 0)
48 1.1 ragge /*
49 1.1 ragge * Compare the argument with the largest possible that can
50 1.7 matt * be reduced by table lookup. %r3 := |x| will be used in table_lookup .
51 1.1 ragge */
52 1.7 matt movd %r0,%r3
53 1.1 ragge bgeq abs1
54 1.7 matt mnegd %r3,%r3
55 1.1 ragge abs1:
56 1.7 matt cmpd %r3,$0d+4.55530934770520019583e+01
57 1.1 ragge blss small_arg
58 1.1 ragge jsb trigred
59 1.1 ragge rsb
60 1.1 ragge small_arg:
61 1.1 ragge jsb table_lookup
62 1.1 ragge rsb
63 1.1 ragge /*
64 1.1 ragge * At this point,
65 1.7 matt * %r0 contains the quadrant number, 0, 1, 2, or 3;
66 1.7 matt * %r2/%r1 contains the reduced argument as a D-format number;
67 1.7 matt * %r3 contains a F-format extension to the reduced argument;
68 1.7 matt * %r4 contains a 0 or 1 corresponding to a sin or cos entry.
69 1.1 ragge */
70 1.5 matt
71 1.9 matt .hidden __libm_sincos
72 1.5 matt ENTRY(__libm_sincos, 0)
73 1.1 ragge /*
74 1.1 ragge * Compensate for a cosine entry by adding one to the quadrant number.
75 1.1 ragge */
76 1.7 matt addl2 %r4,%r0
77 1.1 ragge /*
78 1.7 matt * Polyd clobbers %r5-%r0 ; save X in %r7/%r6 .
79 1.1 ragge * This can be avoided by rewriting trigred .
80 1.1 ragge */
81 1.7 matt movd %r1,%r6
82 1.1 ragge /*
83 1.7 matt * Likewise, save alpha in %r8 .
84 1.1 ragge * This can be avoided by rewriting trigred .
85 1.1 ragge */
86 1.7 matt movf %r3,%r8
87 1.1 ragge /*
88 1.1 ragge * Odd or even quadrant? cosine if odd, sine otherwise.
89 1.7 matt * Save floor(quadrant/2) in %r9 ; it determines the final sign.
90 1.1 ragge */
91 1.7 matt rotl $-1,%r0,%r9
92 1.1 ragge blss cosine
93 1.1 ragge sine:
94 1.7 matt muld2 %r1,%r1 # Xsq = X * X
95 1.7 matt cmpw $0x2480,%r1 # [zl] Xsq > 2^-56?
96 1.1 ragge blss 1f # [zl] yes, go ahead and do polyd
97 1.7 matt clrq %r1 # [zl] work around 11/780 FPA polyd bug
98 1.1 ragge 1:
99 1.7 matt polyd %r1,$7,sin_coef # Q = P(Xsq) , of deg 7
100 1.7 matt mulf3 $0f3.0,%r8,%r4 # beta = 3 * alpha
101 1.7 matt mulf2 %r0,%r4 # beta = Q * beta
102 1.7 matt addf2 %r8,%r4 # beta = alpha + beta
103 1.7 matt muld2 %r6,%r0 # S(X) = X * Q
104 1.7 matt /* cvtfd %r4,%r4 ... %r5 = 0 after a polyd. */
105 1.7 matt addd2 %r4,%r0 # S(X) = beta + S(X)
106 1.7 matt addd2 %r6,%r0 # S(X) = X + S(X)
107 1.5 matt jbr done
108 1.1 ragge cosine:
109 1.7 matt muld2 %r6,%r6 # Xsq = X * X
110 1.1 ragge beql zero_arg
111 1.7 matt mulf2 %r1,%r8 # beta = X * alpha
112 1.7 matt polyd %r6,$7,cos_coef /* Q = P'(Xsq) , of deg 7 */
113 1.7 matt subd3 %r0,%r8,%r0 # beta = beta - Q
114 1.7 matt subw2 $0x80,%r6 # Xsq = Xsq / 2
115 1.7 matt addd2 %r0,%r6 # Xsq = Xsq + beta
116 1.1 ragge zero_arg:
117 1.7 matt subd3 %r6,$0d1.0,%r0 # C(X) = 1 - Xsq
118 1.1 ragge done:
119 1.7 matt blbc %r9,even
120 1.7 matt mnegd %r0,%r0
121 1.1 ragge even:
122 1.1 ragge rsb
123 1.1 ragge
124 1.6 matt #ifdef __ELF__
125 1.6 matt .section .rodata
126 1.6 matt #else
127 1.6 matt .text
128 1.6 matt #endif
129 1.5 matt _ALIGN_TEXT
130 1.1 ragge
131 1.1 ragge sin_coef:
132 1.1 ragge .double 0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8..
133 1.1 ragge .double 0d+1.60573519267703489121e-10 # s6 = 2^-21 1.611adaede473c8..
134 1.1 ragge .double 0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382..
135 1.1 ragge .double 0d+2.75573191800593885716e-06 # s4 = 2^-13 1.71de3a4b884278..
136 1.1 ragge .double 0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d..
137 1.1 ragge .double 0d+8.33333333333325688985e-03 # s2 = 2^-07 1.11111111110e50
138 1.1 ragge .double 0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554
139 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0
140 1.1 ragge
141 1.1 ragge cos_coef:
142 1.1 ragge .double 0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE..
143 1.1 ragge .double 0d+2.08746646574796004700e-09 # s6 = 2^-1D 1.1EE632650350BA..
144 1.1 ragge .double 0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E..
145 1.1 ragge .double 0d+2.48015872682668025200e-05 # s4 = 2^-10 1.A01A0196B902E8..
146 1.1 ragge .double 0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE..
147 1.1 ragge .double 0d+4.16666666666664761400e-02 # s2 = 2^-05 1.5555555555539E
148 1.1 ragge .double 0d+0.00000000000000000000e+00 # s1 = 0
149 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0
150 1.1 ragge
151 1.1 ragge /*
152 1.1 ragge * Multiples of pi/2 expressed as the sum of three doubles,
153 1.1 ragge *
154 1.1 ragge * trailing: n * pi/2 , n = 0, 1, 2, ..., 29
155 1.1 ragge * trailing[n] ,
156 1.1 ragge *
157 1.1 ragge * middle: n * pi/2 , n = 0, 1, 2, ..., 29
158 1.1 ragge * middle[n] ,
159 1.1 ragge *
160 1.1 ragge * leading: n * pi/2 , n = 0, 1, 2, ..., 29
161 1.1 ragge * leading[n] ,
162 1.1 ragge *
163 1.1 ragge * where
164 1.1 ragge * leading[n] := (n * pi/2) rounded,
165 1.1 ragge * middle[n] := (n * pi/2 - leading[n]) rounded,
166 1.1 ragge * trailing[n] := (( n * pi/2 - leading[n]) - middle[n]) rounded .
167 1.1 ragge */
168 1.1 ragge trailing:
169 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 trailing
170 1.1 ragge .double 0d+4.33590506506189049611e-35 # 1 * pi/2 trailing
171 1.1 ragge .double 0d+8.67181013012378099223e-35 # 2 * pi/2 trailing
172 1.1 ragge .double 0d+1.30077151951856714215e-34 # 3 * pi/2 trailing
173 1.1 ragge .double 0d+1.73436202602475619845e-34 # 4 * pi/2 trailing
174 1.1 ragge .double 0d-1.68390735624352669192e-34 # 5 * pi/2 trailing
175 1.1 ragge .double 0d+2.60154303903713428430e-34 # 6 * pi/2 trailing
176 1.1 ragge .double 0d-8.16726343231148352150e-35 # 7 * pi/2 trailing
177 1.1 ragge .double 0d+3.46872405204951239689e-34 # 8 * pi/2 trailing
178 1.1 ragge .double 0d+3.90231455855570147991e-34 # 9 * pi/2 trailing
179 1.1 ragge .double 0d-3.36781471248705338384e-34 # 10 * pi/2 trailing
180 1.1 ragge .double 0d-1.06379439835298071785e-33 # 11 * pi/2 trailing
181 1.1 ragge .double 0d+5.20308607807426856861e-34 # 12 * pi/2 trailing
182 1.1 ragge .double 0d+5.63667658458045770509e-34 # 13 * pi/2 trailing
183 1.1 ragge .double 0d-1.63345268646229670430e-34 # 14 * pi/2 trailing
184 1.1 ragge .double 0d-1.19986217995610764801e-34 # 15 * pi/2 trailing
185 1.1 ragge .double 0d+6.93744810409902479378e-34 # 16 * pi/2 trailing
186 1.1 ragge .double 0d-8.03640094449267300110e-34 # 17 * pi/2 trailing
187 1.1 ragge .double 0d+7.80462911711140295982e-34 # 18 * pi/2 trailing
188 1.1 ragge .double 0d-7.16921993148029483506e-34 # 19 * pi/2 trailing
189 1.1 ragge .double 0d-6.73562942497410676769e-34 # 20 * pi/2 trailing
190 1.1 ragge .double 0d-6.30203891846791677593e-34 # 21 * pi/2 trailing
191 1.1 ragge .double 0d-2.12758879670596143570e-33 # 22 * pi/2 trailing
192 1.1 ragge .double 0d+2.53800212047402350390e-33 # 23 * pi/2 trailing
193 1.1 ragge .double 0d+1.04061721561485371372e-33 # 24 * pi/2 trailing
194 1.1 ragge .double 0d+6.11729905311472319056e-32 # 25 * pi/2 trailing
195 1.1 ragge .double 0d+1.12733531691609154102e-33 # 26 * pi/2 trailing
196 1.1 ragge .double 0d-3.70049587943078297272e-34 # 27 * pi/2 trailing
197 1.1 ragge .double 0d-3.26690537292459340860e-34 # 28 * pi/2 trailing
198 1.1 ragge .double 0d-1.14812616507957271361e-34 # 29 * pi/2 trailing
199 1.1 ragge
200 1.1 ragge middle:
201 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 middle
202 1.1 ragge .double 0d+5.72118872610983179676e-18 # 1 * pi/2 middle
203 1.1 ragge .double 0d+1.14423774522196635935e-17 # 2 * pi/2 middle
204 1.1 ragge .double 0d-3.83475850529283316309e-17 # 3 * pi/2 middle
205 1.1 ragge .double 0d+2.28847549044393271871e-17 # 4 * pi/2 middle
206 1.1 ragge .double 0d-2.69052076007086676522e-17 # 5 * pi/2 middle
207 1.1 ragge .double 0d-7.66951701058566632618e-17 # 6 * pi/2 middle
208 1.1 ragge .double 0d-1.54628301484890040587e-17 # 7 * pi/2 middle
209 1.1 ragge .double 0d+4.57695098088786543741e-17 # 8 * pi/2 middle
210 1.1 ragge .double 0d+1.07001849766246313192e-16 # 9 * pi/2 middle
211 1.1 ragge .double 0d-5.38104152014173353044e-17 # 10 * pi/2 middle
212 1.1 ragge .double 0d-2.14622680169080983801e-16 # 11 * pi/2 middle
213 1.1 ragge .double 0d-1.53390340211713326524e-16 # 12 * pi/2 middle
214 1.1 ragge .double 0d-9.21580002543456677056e-17 # 13 * pi/2 middle
215 1.1 ragge .double 0d-3.09256602969780081173e-17 # 14 * pi/2 middle
216 1.1 ragge .double 0d+3.03066796603896507006e-17 # 15 * pi/2 middle
217 1.1 ragge .double 0d+9.15390196177573087482e-17 # 16 * pi/2 middle
218 1.1 ragge .double 0d+1.52771359575124969107e-16 # 17 * pi/2 middle
219 1.1 ragge .double 0d+2.14003699532492626384e-16 # 18 * pi/2 middle
220 1.1 ragge .double 0d-1.68853170360202329427e-16 # 19 * pi/2 middle
221 1.1 ragge .double 0d-1.07620830402834670609e-16 # 20 * pi/2 middle
222 1.1 ragge .double 0d+3.97700719404595604379e-16 # 21 * pi/2 middle
223 1.1 ragge .double 0d-4.29245360338161967602e-16 # 22 * pi/2 middle
224 1.1 ragge .double 0d-3.68013020380794313406e-16 # 23 * pi/2 middle
225 1.1 ragge .double 0d-3.06780680423426653047e-16 # 24 * pi/2 middle
226 1.1 ragge .double 0d-2.45548340466059054318e-16 # 25 * pi/2 middle
227 1.1 ragge .double 0d-1.84316000508691335411e-16 # 26 * pi/2 middle
228 1.1 ragge .double 0d-1.23083660551323675053e-16 # 27 * pi/2 middle
229 1.1 ragge .double 0d-6.18513205939560162346e-17 # 28 * pi/2 middle
230 1.1 ragge .double 0d-6.18980636588357585202e-19 # 29 * pi/2 middle
231 1.1 ragge
232 1.1 ragge leading:
233 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 leading
234 1.1 ragge .double 0d+1.57079632679489661351e+00 # 1 * pi/2 leading
235 1.1 ragge .double 0d+3.14159265358979322702e+00 # 2 * pi/2 leading
236 1.1 ragge .double 0d+4.71238898038468989604e+00 # 3 * pi/2 leading
237 1.1 ragge .double 0d+6.28318530717958645404e+00 # 4 * pi/2 leading
238 1.1 ragge .double 0d+7.85398163397448312306e+00 # 5 * pi/2 leading
239 1.1 ragge .double 0d+9.42477796076937979208e+00 # 6 * pi/2 leading
240 1.1 ragge .double 0d+1.09955742875642763501e+01 # 7 * pi/2 leading
241 1.1 ragge .double 0d+1.25663706143591729081e+01 # 8 * pi/2 leading
242 1.1 ragge .double 0d+1.41371669411540694661e+01 # 9 * pi/2 leading
243 1.1 ragge .double 0d+1.57079632679489662461e+01 # 10 * pi/2 leading
244 1.1 ragge .double 0d+1.72787595947438630262e+01 # 11 * pi/2 leading
245 1.1 ragge .double 0d+1.88495559215387595842e+01 # 12 * pi/2 leading
246 1.1 ragge .double 0d+2.04203522483336561422e+01 # 13 * pi/2 leading
247 1.1 ragge .double 0d+2.19911485751285527002e+01 # 14 * pi/2 leading
248 1.1 ragge .double 0d+2.35619449019234492582e+01 # 15 * pi/2 leading
249 1.1 ragge .double 0d+2.51327412287183458162e+01 # 16 * pi/2 leading
250 1.1 ragge .double 0d+2.67035375555132423742e+01 # 17 * pi/2 leading
251 1.1 ragge .double 0d+2.82743338823081389322e+01 # 18 * pi/2 leading
252 1.1 ragge .double 0d+2.98451302091030359342e+01 # 19 * pi/2 leading
253 1.1 ragge .double 0d+3.14159265358979324922e+01 # 20 * pi/2 leading
254 1.1 ragge .double 0d+3.29867228626928286062e+01 # 21 * pi/2 leading
255 1.1 ragge .double 0d+3.45575191894877260523e+01 # 22 * pi/2 leading
256 1.1 ragge .double 0d+3.61283155162826226103e+01 # 23 * pi/2 leading
257 1.1 ragge .double 0d+3.76991118430775191683e+01 # 24 * pi/2 leading
258 1.1 ragge .double 0d+3.92699081698724157263e+01 # 25 * pi/2 leading
259 1.1 ragge .double 0d+4.08407044966673122843e+01 # 26 * pi/2 leading
260 1.1 ragge .double 0d+4.24115008234622088423e+01 # 27 * pi/2 leading
261 1.1 ragge .double 0d+4.39822971502571054003e+01 # 28 * pi/2 leading
262 1.1 ragge .double 0d+4.55530934770520019583e+01 # 29 * pi/2 leading
263 1.1 ragge
264 1.1 ragge twoOverPi:
265 1.1 ragge .double 0d+6.36619772367581343076e-01
266 1.5 matt
267 1.1 ragge .text
268 1.5 matt _ALIGN_TEXT
269 1.1 ragge
270 1.1 ragge table_lookup:
271 1.7 matt muld3 %r3,twoOverPi,%r0
272 1.7 matt cvtrdl %r0,%r0 # n = nearest int to ((2/pi)*|x|) rnded
273 1.7 matt subd2 leading[%r0],%r3 # p = (|x| - leading n*pi/2) exactly
274 1.7 matt subd3 middle[%r0],%r3,%r1 # q = (p - middle n*pi/2) rounded
275 1.7 matt subd2 %r1,%r3 # r = (p - q)
276 1.7 matt subd2 middle[%r0],%r3 # r = r - middle n*pi/2
277 1.7 matt subd2 trailing[%r0],%r3 # r = r - trailing n*pi/2 rounded
278 1.1 ragge /*
279 1.1 ragge * If the original argument was negative,
280 1.1 ragge * negate the reduce argument and
281 1.1 ragge * adjust the octant/quadrant number.
282 1.1 ragge */
283 1.7 matt tstw 4(%ap)
284 1.1 ragge bgeq abs2
285 1.7 matt mnegf %r1,%r1
286 1.7 matt mnegf %r3,%r3
287 1.7 matt /* subb3 %r0,$8,%r0 ...used for pi/4 reduction -S.McD */
288 1.7 matt subb3 %r0,$4,%r0
289 1.1 ragge abs2:
290 1.1 ragge /*
291 1.1 ragge * Clear all unneeded octant/quadrant bits.
292 1.1 ragge */
293 1.7 matt /* bicb2 $0xf8,%r0 ...used for pi/4 reduction -S.McD */
294 1.7 matt bicb2 $0xfc,%r0
295 1.1 ragge rsb
296 1.1 ragge /*
297 1.1 ragge * p.0
298 1.1 ragge */
299 1.6 matt #ifdef __ELF__
300 1.6 matt .section .rodata
301 1.6 matt #else
302 1.1 ragge .text
303 1.6 matt #endif
304 1.5 matt _ALIGN_TEXT
305 1.1 ragge /*
306 1.1 ragge * Only 256 (actually 225) bits of 2/pi are needed for VAX double
307 1.1 ragge * precision; this was determined by enumerating all the nearest
308 1.1 ragge * machine integer multiples of pi/2 using continued fractions.
309 1.1 ragge * (8a8d3673775b7ff7 required the most bits.) -S.McD
310 1.1 ragge */
311 1.1 ragge .long 0
312 1.1 ragge .long 0
313 1.1 ragge .long 0xaef1586d
314 1.1 ragge .long 0x9458eaf7
315 1.1 ragge .long 0x10e4107f
316 1.1 ragge .long 0xd8a5664f
317 1.1 ragge .long 0x4d377036
318 1.1 ragge .long 0x09d5f47d
319 1.1 ragge .long 0x91054a7f
320 1.1 ragge .long 0xbe60db93
321 1.1 ragge bits2opi:
322 1.1 ragge .long 0x00000028
323 1.1 ragge .long 0
324 1.1 ragge /*
325 1.1 ragge * Note: wherever you see the word `octant', read `quadrant'.
326 1.1 ragge * Currently this code is set up for pi/2 argument reduction.
327 1.1 ragge * By uncommenting/commenting the appropriate lines, it will
328 1.1 ragge * also serve as a pi/4 argument reduction code.
329 1.1 ragge */
330 1.6 matt .text
331 1.1 ragge
332 1.1 ragge /* p.1
333 1.1 ragge * Trigred preforms argument reduction
334 1.1 ragge * for the trigonometric functions. It
335 1.1 ragge * takes one input argument, a D-format
336 1.7 matt * number in %r1/%r0 . The magnitude of
337 1.1 ragge * the input argument must be greater
338 1.1 ragge * than or equal to 1/2 . Trigred produces
339 1.1 ragge * three results: the number of the octant
340 1.4 simonb * occupied by the argument, the reduced
341 1.1 ragge * argument, and an extension of the
342 1.4 simonb * reduced argument. The octant number is
343 1.7 matt * returned in %r0 . The reduced argument
344 1.4 simonb * is returned as a D-format number in
345 1.7 matt * %r2/%r1 . An 8 bit extension of the
346 1.4 simonb * reduced argument is returned as an
347 1.7 matt * F-format number in %r3.
348 1.1 ragge * p.2
349 1.1 ragge */
350 1.1 ragge trigred:
351 1.1 ragge /*
352 1.1 ragge * Save the sign of the input argument.
353 1.1 ragge */
354 1.7 matt movw %r0,-(%sp)
355 1.1 ragge /*
356 1.1 ragge * Extract the exponent field.
357 1.1 ragge */
358 1.7 matt extzv $7,$7,%r0,%r2
359 1.1 ragge /*
360 1.1 ragge * Convert the fraction part of the input
361 1.1 ragge * argument into a quadword integer.
362 1.1 ragge */
363 1.7 matt bicw2 $0xff80,%r0
364 1.7 matt bisb2 $0x80,%r0 # -S.McD
365 1.7 matt rotl $16,%r0,%r0
366 1.7 matt rotl $16,%r1,%r1
367 1.1 ragge /*
368 1.7 matt * If %r1 is negative, add 1 to %r0 . This
369 1.1 ragge * adjustment is made so that the two's
370 1.1 ragge * complement multiplications done later
371 1.1 ragge * will produce unsigned results.
372 1.1 ragge */
373 1.1 ragge bgeq posmid
374 1.7 matt incl %r0
375 1.1 ragge posmid:
376 1.1 ragge /* p.3
377 1.1 ragge *
378 1.7 matt * Set %r3 to the address of the first quadword
379 1.1 ragge * used to obtain the needed portion of 2/pi .
380 1.1 ragge * The address is longword aligned to ensure
381 1.1 ragge * efficient access.
382 1.1 ragge */
383 1.7 matt ashl $-3,%r2,%r3
384 1.7 matt bicb2 $3,%r3
385 1.7 matt mnegl %r3,%r3
386 1.7 matt movab bits2opi[%r3],%r3
387 1.1 ragge /*
388 1.7 matt * Set %r2 to the size of the shift needed to
389 1.1 ragge * obtain the correct portion of 2/pi .
390 1.1 ragge */
391 1.7 matt bicb2 $0xe0,%r2
392 1.1 ragge /* p.4
393 1.1 ragge *
394 1.1 ragge * Move the needed 128 bits of 2/pi into
395 1.7 matt * %r11 - %r8 . Adjust the numbers to allow
396 1.1 ragge * for unsigned multiplication.
397 1.1 ragge */
398 1.7 matt ashq %r2,(%r3),%r10
399 1.1 ragge
400 1.7 matt subl2 $4,%r3
401 1.7 matt ashq %r2,(%r3),%r9
402 1.1 ragge bgeq signoff1
403 1.7 matt incl %r11
404 1.1 ragge signoff1:
405 1.7 matt subl2 $4,%r3
406 1.7 matt ashq %r2,(%r3),%r8
407 1.1 ragge bgeq signoff2
408 1.7 matt incl %r10
409 1.1 ragge signoff2:
410 1.7 matt subl2 $4,%r3
411 1.7 matt ashq %r2,(%r3),%r7
412 1.1 ragge bgeq signoff3
413 1.7 matt incl %r9
414 1.1 ragge signoff3:
415 1.1 ragge /* p.5
416 1.1 ragge *
417 1.7 matt * Multiply the contents of %r0/%r1 by the
418 1.7 matt * slice of 2/pi in %r11 - %r8 .
419 1.1 ragge */
420 1.7 matt emul %r0,%r8,$0,%r4
421 1.7 matt emul %r0,%r9,%r5,%r5
422 1.7 matt emul %r0,%r10,%r6,%r6
423 1.7 matt
424 1.7 matt emul %r1,%r8,$0,%r7
425 1.7 matt emul %r1,%r9,%r8,%r8
426 1.7 matt emul %r1,%r10,%r9,%r9
427 1.7 matt emul %r1,%r11,%r10,%r10
428 1.7 matt
429 1.7 matt addl2 %r4,%r8
430 1.7 matt adwc %r5,%r9
431 1.7 matt adwc %r6,%r10
432 1.1 ragge /* p.6
433 1.1 ragge *
434 1.1 ragge * If there are more than five leading zeros
435 1.1 ragge * after the first two quotient bits or if there
436 1.1 ragge * are more than five leading ones after the first
437 1.1 ragge * two quotient bits, generate more fraction bits.
438 1.1 ragge * Otherwise, branch to code to produce the result.
439 1.1 ragge */
440 1.7 matt bicl3 $0xc1ffffff,%r10,%r4
441 1.1 ragge beql more1
442 1.7 matt cmpl $0x3e000000,%r4
443 1.1 ragge bneq result
444 1.1 ragge more1:
445 1.1 ragge /* p.7
446 1.1 ragge *
447 1.1 ragge * generate another 32 result bits.
448 1.1 ragge */
449 1.7 matt subl2 $4,%r3
450 1.7 matt ashq %r2,(%r3),%r5
451 1.1 ragge bgeq signoff4
452 1.1 ragge
453 1.7 matt emul %r1,%r6,$0,%r4
454 1.7 matt addl2 %r1,%r5
455 1.7 matt emul %r0,%r6,%r5,%r5
456 1.7 matt addl2 %r0,%r6
457 1.5 matt jbr addbits1
458 1.1 ragge
459 1.1 ragge signoff4:
460 1.7 matt emul %r1,%r6,$0,%r4
461 1.7 matt emul %r0,%r6,%r5,%r5
462 1.1 ragge
463 1.1 ragge addbits1:
464 1.7 matt addl2 %r5,%r7
465 1.7 matt adwc %r6,%r8
466 1.7 matt adwc $0,%r9
467 1.7 matt adwc $0,%r10
468 1.1 ragge /* p.8
469 1.1 ragge *
470 1.1 ragge * Check for massive cancellation.
471 1.1 ragge */
472 1.7 matt bicl3 $0xc0000000,%r10,%r6
473 1.1 ragge /* bneq more2 -S.McD Test was backwards */
474 1.1 ragge beql more2
475 1.7 matt cmpl $0x3fffffff,%r6
476 1.1 ragge bneq result
477 1.1 ragge more2:
478 1.1 ragge /* p.9
479 1.1 ragge *
480 1.1 ragge * If massive cancellation has occurred,
481 1.1 ragge * generate another 24 result bits.
482 1.4 simonb * Testing has shown there will always be
483 1.1 ragge * enough bits after this point.
484 1.1 ragge */
485 1.7 matt subl2 $4,%r3
486 1.7 matt ashq %r2,(%r3),%r5
487 1.1 ragge bgeq signoff5
488 1.1 ragge
489 1.7 matt emul %r0,%r6,%r4,%r5
490 1.7 matt addl2 %r0,%r6
491 1.5 matt jbr addbits2
492 1.1 ragge
493 1.1 ragge signoff5:
494 1.7 matt emul %r0,%r6,%r4,%r5
495 1.1 ragge
496 1.1 ragge addbits2:
497 1.7 matt addl2 %r6,%r7
498 1.7 matt adwc $0,%r8
499 1.7 matt adwc $0,%r9
500 1.7 matt adwc $0,%r10
501 1.1 ragge /* p.10
502 1.1 ragge *
503 1.1 ragge * The following code produces the reduced
504 1.1 ragge * argument from the product bits contained
505 1.7 matt * in %r10 - %r7 .
506 1.1 ragge */
507 1.1 ragge result:
508 1.1 ragge /*
509 1.7 matt * Extract the octant number from %r10 .
510 1.1 ragge */
511 1.7 matt /* extzv $29,$3,%r10,%r0 ...used for pi/4 reduction -S.McD */
512 1.7 matt extzv $30,$2,%r10,%r0
513 1.1 ragge /*
514 1.7 matt * Clear the octant bits in %r10 .
515 1.1 ragge */
516 1.7 matt /* bicl2 $0xe0000000,%r10 ...used for pi/4 reduction -S.McD */
517 1.7 matt bicl2 $0xc0000000,%r10
518 1.1 ragge /*
519 1.1 ragge * Zero the sign flag.
520 1.1 ragge */
521 1.7 matt clrl %r5
522 1.1 ragge /* p.11
523 1.1 ragge *
524 1.1 ragge * Check to see if the fraction is greater than
525 1.4 simonb * or equal to one-half. If it is, add one
526 1.1 ragge * to the octant number, set the sign flag
527 1.1 ragge * on, and replace the fraction with 1 minus
528 1.1 ragge * the fraction.
529 1.1 ragge */
530 1.7 matt /* bitl $0x10000000,%r10 ...used for pi/4 reduction -S.McD */
531 1.7 matt bitl $0x20000000,%r10
532 1.1 ragge beql small
533 1.7 matt incl %r0
534 1.7 matt incl %r5
535 1.7 matt /* subl3 %r10,$0x1fffffff,%r10 ...used for pi/4 reduction -S.McD */
536 1.7 matt subl3 %r10,$0x3fffffff,%r10
537 1.7 matt mcoml %r9,%r9
538 1.7 matt mcoml %r8,%r8
539 1.7 matt mcoml %r7,%r7
540 1.1 ragge small:
541 1.1 ragge /* p.12
542 1.1 ragge *
543 1.1 ragge * Test whether the first 29 bits of the ...used for pi/4 reduction -S.McD
544 1.4 simonb * Test whether the first 30 bits of the
545 1.1 ragge * fraction are zero.
546 1.1 ragge */
547 1.7 matt tstl %r10
548 1.1 ragge beql tiny
549 1.1 ragge /*
550 1.7 matt * Find the position of the first one bit in %r10 .
551 1.1 ragge */
552 1.7 matt cvtld %r10,%r1
553 1.7 matt extzv $7,$7,%r1,%r1
554 1.1 ragge /*
555 1.1 ragge * Compute the size of the shift needed.
556 1.1 ragge */
557 1.7 matt subl3 %r1,$32,%r6
558 1.1 ragge /*
559 1.1 ragge * Shift up the high order 64 bits of the
560 1.1 ragge * product.
561 1.1 ragge */
562 1.7 matt ashq %r6,%r9,%r10
563 1.7 matt ashq %r6,%r8,%r9
564 1.5 matt jbr mult
565 1.1 ragge /* p.13
566 1.1 ragge *
567 1.7 matt * Test to see if the sign bit of %r9 is on.
568 1.1 ragge */
569 1.1 ragge tiny:
570 1.7 matt tstl %r9
571 1.1 ragge bgeq tinier
572 1.1 ragge /*
573 1.1 ragge * If it is, shift the product bits up 32 bits.
574 1.1 ragge */
575 1.7 matt movl $32,%r6
576 1.7 matt movq %r8,%r10
577 1.7 matt tstl %r10
578 1.5 matt jbr mult
579 1.1 ragge /* p.14
580 1.1 ragge *
581 1.7 matt * Test whether %r9 is zero. It is probably
582 1.7 matt * impossible for both %r10 and %r9 to be
583 1.1 ragge * zero, but until proven to be so, the test
584 1.1 ragge * must be made.
585 1.1 ragge */
586 1.1 ragge tinier:
587 1.1 ragge beql zero
588 1.1 ragge /*
589 1.7 matt * Find the position of the first one bit in %r9 .
590 1.1 ragge */
591 1.7 matt cvtld %r9,%r1
592 1.7 matt extzv $7,$7,%r1,%r1
593 1.1 ragge /*
594 1.1 ragge * Compute the size of the shift needed.
595 1.1 ragge */
596 1.7 matt subl3 %r1,$32,%r1
597 1.7 matt addl3 $32,%r1,%r6
598 1.1 ragge /*
599 1.1 ragge * Shift up the high order 64 bits of the
600 1.1 ragge * product.
601 1.1 ragge */
602 1.7 matt ashq %r1,%r8,%r10
603 1.7 matt ashq %r1,%r7,%r9
604 1.5 matt jbr mult
605 1.1 ragge /* p.15
606 1.1 ragge *
607 1.1 ragge * The following code sets the reduced
608 1.1 ragge * argument to zero.
609 1.1 ragge */
610 1.1 ragge zero:
611 1.7 matt clrl %r1
612 1.7 matt clrl %r2
613 1.7 matt clrl %r3
614 1.5 matt jbr return
615 1.1 ragge /* p.16
616 1.1 ragge *
617 1.7 matt * At this point, %r0 contains the octant number,
618 1.7 matt * %r6 indicates the number of bits the fraction
619 1.7 matt * has been shifted, %r5 indicates the sign of
620 1.7 matt * the fraction, %r11/%r10 contain the high order
621 1.1 ragge * 64 bits of the fraction, and the condition
622 1.7 matt * codes indicate where the sign bit of %r10
623 1.1 ragge * is on. The following code multiplies the
624 1.1 ragge * fraction by pi/2 .
625 1.1 ragge */
626 1.1 ragge mult:
627 1.1 ragge /*
628 1.7 matt * Save %r11/%r10 in %r4/%r1 . -S.McD
629 1.1 ragge */
630 1.7 matt movl %r11,%r4
631 1.7 matt movl %r10,%r1
632 1.1 ragge /*
633 1.7 matt * If the sign bit of %r10 is on, add 1 to %r11 .
634 1.1 ragge */
635 1.1 ragge bgeq signoff6
636 1.7 matt incl %r11
637 1.1 ragge signoff6:
638 1.1 ragge /* p.17
639 1.1 ragge *
640 1.7 matt * Move pi/2 into %r3/%r2 .
641 1.1 ragge */
642 1.7 matt movq $0xc90fdaa22168c235,%r2
643 1.1 ragge /*
644 1.1 ragge * Multiply the fraction by the portion of pi/2
645 1.7 matt * in %r2 .
646 1.1 ragge */
647 1.7 matt emul %r2,%r10,$0,%r7
648 1.7 matt emul %r2,%r11,%r8,%r7
649 1.1 ragge /*
650 1.4 simonb * Multiply the fraction by the portion of pi/2
651 1.7 matt * in %r3 .
652 1.1 ragge */
653 1.7 matt emul %r3,%r10,$0,%r9
654 1.7 matt emul %r3,%r11,%r10,%r10
655 1.1 ragge /*
656 1.1 ragge * Add the product bits together.
657 1.1 ragge */
658 1.7 matt addl2 %r7,%r9
659 1.7 matt adwc %r8,%r10
660 1.7 matt adwc $0,%r11
661 1.1 ragge /*
662 1.7 matt * Compensate for not sign extending %r8 above.-S.McD
663 1.1 ragge */
664 1.7 matt tstl %r8
665 1.1 ragge bgeq signoff6a
666 1.7 matt decl %r11
667 1.1 ragge signoff6a:
668 1.1 ragge /*
669 1.7 matt * Compensate for %r11/%r10 being unsigned. -S.McD
670 1.1 ragge */
671 1.7 matt addl2 %r2,%r10
672 1.7 matt adwc %r3,%r11
673 1.1 ragge /*
674 1.7 matt * Compensate for %r3/%r2 being unsigned. -S.McD
675 1.1 ragge */
676 1.7 matt addl2 %r1,%r10
677 1.7 matt adwc %r4,%r11
678 1.1 ragge /* p.18
679 1.1 ragge *
680 1.7 matt * If the sign bit of %r11 is zero, shift the
681 1.7 matt * product bits up one bit and increment %r6 .
682 1.1 ragge */
683 1.1 ragge blss signon
684 1.7 matt incl %r6
685 1.7 matt ashq $1,%r10,%r10
686 1.7 matt tstl %r9
687 1.1 ragge bgeq signoff7
688 1.7 matt incl %r10
689 1.1 ragge signoff7:
690 1.1 ragge signon:
691 1.1 ragge /* p.19
692 1.1 ragge *
693 1.1 ragge * Shift the 56 most significant product
694 1.7 matt * bits into %r9/%r8 . The sign extension
695 1.1 ragge * will be handled later.
696 1.1 ragge */
697 1.7 matt ashq $-8,%r10,%r8
698 1.1 ragge /*
699 1.7 matt * Convert the low order 8 bits of %r10
700 1.1 ragge * into an F-format number.
701 1.1 ragge */
702 1.7 matt cvtbf %r10,%r3
703 1.1 ragge /*
704 1.1 ragge * If the result of the conversion was
705 1.7 matt * negative, add 1 to %r9/%r8 .
706 1.1 ragge */
707 1.1 ragge bgeq chop
708 1.7 matt incl %r8
709 1.7 matt adwc $0,%r9
710 1.1 ragge /*
711 1.7 matt * If %r9 is now zero, branch to special
712 1.1 ragge * code to handle that possibility.
713 1.1 ragge */
714 1.1 ragge beql carryout
715 1.1 ragge chop:
716 1.1 ragge /* p.20
717 1.1 ragge *
718 1.7 matt * Convert the number in %r9/%r8 into
719 1.7 matt * D-format number in %r2/%r1 .
720 1.1 ragge */
721 1.7 matt rotl $16,%r8,%r2
722 1.7 matt rotl $16,%r9,%r1
723 1.1 ragge /*
724 1.1 ragge * Set the exponent field to the appropriate
725 1.1 ragge * value. Note that the extra bits created by
726 1.1 ragge * sign extension are now eliminated.
727 1.1 ragge */
728 1.7 matt subw3 %r6,$131,%r6
729 1.7 matt insv %r6,$7,$9,%r1
730 1.1 ragge /*
731 1.1 ragge * Set the exponent field of the F-format
732 1.7 matt * number in %r3 to the appropriate value.
733 1.1 ragge */
734 1.7 matt tstf %r3
735 1.1 ragge beql return
736 1.7 matt /* extzv $7,$8,%r3,%r4 -S.McD */
737 1.7 matt extzv $7,$7,%r3,%r4
738 1.7 matt addw2 %r4,%r6
739 1.7 matt /* subw2 $217,%r6 -S.McD */
740 1.7 matt subw2 $64,%r6
741 1.7 matt insv %r6,$7,$8,%r3
742 1.5 matt jbr return
743 1.1 ragge /* p.21
744 1.1 ragge *
745 1.4 simonb * The following code generates the appropriate
746 1.1 ragge * result for the unlikely possibility that
747 1.7 matt * rounding the number in %r9/%r8 resulted in
748 1.1 ragge * a carry out.
749 1.1 ragge */
750 1.1 ragge carryout:
751 1.7 matt clrl %r1
752 1.7 matt clrl %r2
753 1.7 matt subw3 %r6,$132,%r6
754 1.7 matt insv %r6,$7,$9,%r1
755 1.7 matt tstf %r3
756 1.1 ragge beql return
757 1.7 matt extzv $7,$8,%r3,%r4
758 1.7 matt addw2 %r4,%r6
759 1.7 matt subw2 $218,%r6
760 1.7 matt insv %r6,$7,$8,%r3
761 1.1 ragge /* p.22
762 1.1 ragge *
763 1.1 ragge * The following code makes an needed
764 1.4 simonb * adjustments to the signs of the
765 1.1 ragge * results or to the octant number, and
766 1.1 ragge * then returns.
767 1.1 ragge */
768 1.1 ragge return:
769 1.1 ragge /*
770 1.4 simonb * Test if the fraction was greater than or
771 1.1 ragge * equal to 1/2 . If so, negate the reduced
772 1.1 ragge * argument.
773 1.1 ragge */
774 1.7 matt blbc %r5,signoff8
775 1.7 matt mnegf %r1,%r1
776 1.7 matt mnegf %r3,%r3
777 1.1 ragge signoff8:
778 1.1 ragge /* p.23
779 1.1 ragge *
780 1.1 ragge * If the original argument was negative,
781 1.1 ragge * negate the reduce argument and
782 1.1 ragge * adjust the octant number.
783 1.1 ragge */
784 1.7 matt tstw (%sp)+
785 1.1 ragge bgeq signoff9
786 1.7 matt mnegf %r1,%r1
787 1.7 matt mnegf %r3,%r3
788 1.7 matt /* subb3 %r0,$8,%r0 ...used for pi/4 reduction -S.McD */
789 1.7 matt subb3 %r0,$4,%r0
790 1.1 ragge signoff9:
791 1.1 ragge /*
792 1.1 ragge * Clear all unneeded octant bits.
793 1.1 ragge *
794 1.7 matt * bicb2 $0xf8,%r0 ...used for pi/4 reduction -S.McD */
795 1.7 matt bicb2 $0xfc,%r0
796 1.1 ragge /*
797 1.1 ragge * Return.
798 1.1 ragge */
799 1.1 ragge rsb
800