n_argred.S revision 1.4 1 /* $NetBSD: n_argred.S,v 1.4 1999/07/02 15:37:35 simonb Exp $ */
2 /*
3 * Copyright (c) 1985, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)argred.s 8.1 (Berkeley) 6/4/93
35 */
36
37 /*
38 * libm$argred implements Bob Corbett's argument reduction and
39 * libm$sincos implements Peter Tang's double precision sin/cos.
40 *
41 * Note: The two entry points libm$argred and libm$sincos are meant
42 * to be used only by _sin, _cos and _tan.
43 *
44 * method: true range reduction to [-pi/4,pi/4], P. Tang & B. Corbett
45 * S. McDonald, April 4, 1985
46 */
47 .text
48 .globl libm$argred
49 .type libm$argred,@label
50 .globl libm$sincos
51 .type libm$sincos,@label
52 .align 1
53
54 libm$argred:
55 /*
56 * Compare the argument with the largest possible that can
57 * be reduced by table lookup. r3 := |x| will be used in table_lookup .
58 */
59 movd r0,r3
60 bgeq abs1
61 mnegd r3,r3
62 abs1:
63 cmpd r3,$0d+4.55530934770520019583e+01
64 blss small_arg
65 jsb trigred
66 rsb
67 small_arg:
68 jsb table_lookup
69 rsb
70 /*
71 * At this point,
72 * r0 contains the quadrant number, 0, 1, 2, or 3;
73 * r2/r1 contains the reduced argument as a D-format number;
74 * r3 contains a F-format extension to the reduced argument;
75 * r4 contains a 0 or 1 corresponding to a sin or cos entry.
76 */
77 libm$sincos:
78 /*
79 * Compensate for a cosine entry by adding one to the quadrant number.
80 */
81 addl2 r4,r0
82 /*
83 * Polyd clobbers r5-r0 ; save X in r7/r6 .
84 * This can be avoided by rewriting trigred .
85 */
86 movd r1,r6
87 /*
88 * Likewise, save alpha in r8 .
89 * This can be avoided by rewriting trigred .
90 */
91 movf r3,r8
92 /*
93 * Odd or even quadrant? cosine if odd, sine otherwise.
94 * Save floor(quadrant/2) in r9 ; it determines the final sign.
95 */
96 rotl $-1,r0,r9
97 blss cosine
98 sine:
99 muld2 r1,r1 # Xsq = X * X
100 cmpw $0x2480,r1 # [zl] Xsq > 2^-56?
101 blss 1f # [zl] yes, go ahead and do polyd
102 clrq r1 # [zl] work around 11/780 FPA polyd bug
103 1:
104 polyd r1,$7,sin_coef # Q = P(Xsq) , of deg 7
105 mulf3 $0f3.0,r8,r4 # beta = 3 * alpha
106 mulf2 r0,r4 # beta = Q * beta
107 addf2 r8,r4 # beta = alpha + beta
108 muld2 r6,r0 # S(X) = X * Q
109 /* cvtfd r4,r4 ... r5 = 0 after a polyd. */
110 addd2 r4,r0 # S(X) = beta + S(X)
111 addd2 r6,r0 # S(X) = X + S(X)
112 brb done
113 cosine:
114 muld2 r6,r6 # Xsq = X * X
115 beql zero_arg
116 mulf2 r1,r8 # beta = X * alpha
117 polyd r6,$7,cos_coef /* Q = P'(Xsq) , of deg 7 */
118 subd3 r0,r8,r0 # beta = beta - Q
119 subw2 $0x80,r6 # Xsq = Xsq / 2
120 addd2 r0,r6 # Xsq = Xsq + beta
121 zero_arg:
122 subd3 r6,$0d1.0,r0 # C(X) = 1 - Xsq
123 done:
124 blbc r9,even
125 mnegd r0,r0
126 even:
127 rsb
128
129 .data
130 .align 2
131
132 sin_coef:
133 .double 0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8..
134 .double 0d+1.60573519267703489121e-10 # s6 = 2^-21 1.611adaede473c8..
135 .double 0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382..
136 .double 0d+2.75573191800593885716e-06 # s4 = 2^-13 1.71de3a4b884278..
137 .double 0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d..
138 .double 0d+8.33333333333325688985e-03 # s2 = 2^-07 1.11111111110e50
139 .double 0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554
140 .double 0d+0.00000000000000000000e+00 # s0 = 0
141
142 cos_coef:
143 .double 0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE..
144 .double 0d+2.08746646574796004700e-09 # s6 = 2^-1D 1.1EE632650350BA..
145 .double 0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E..
146 .double 0d+2.48015872682668025200e-05 # s4 = 2^-10 1.A01A0196B902E8..
147 .double 0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE..
148 .double 0d+4.16666666666664761400e-02 # s2 = 2^-05 1.5555555555539E
149 .double 0d+0.00000000000000000000e+00 # s1 = 0
150 .double 0d+0.00000000000000000000e+00 # s0 = 0
151
152 /*
153 * Multiples of pi/2 expressed as the sum of three doubles,
154 *
155 * trailing: n * pi/2 , n = 0, 1, 2, ..., 29
156 * trailing[n] ,
157 *
158 * middle: n * pi/2 , n = 0, 1, 2, ..., 29
159 * middle[n] ,
160 *
161 * leading: n * pi/2 , n = 0, 1, 2, ..., 29
162 * leading[n] ,
163 *
164 * where
165 * leading[n] := (n * pi/2) rounded,
166 * middle[n] := (n * pi/2 - leading[n]) rounded,
167 * trailing[n] := (( n * pi/2 - leading[n]) - middle[n]) rounded .
168 */
169 trailing:
170 .double 0d+0.00000000000000000000e+00 # 0 * pi/2 trailing
171 .double 0d+4.33590506506189049611e-35 # 1 * pi/2 trailing
172 .double 0d+8.67181013012378099223e-35 # 2 * pi/2 trailing
173 .double 0d+1.30077151951856714215e-34 # 3 * pi/2 trailing
174 .double 0d+1.73436202602475619845e-34 # 4 * pi/2 trailing
175 .double 0d-1.68390735624352669192e-34 # 5 * pi/2 trailing
176 .double 0d+2.60154303903713428430e-34 # 6 * pi/2 trailing
177 .double 0d-8.16726343231148352150e-35 # 7 * pi/2 trailing
178 .double 0d+3.46872405204951239689e-34 # 8 * pi/2 trailing
179 .double 0d+3.90231455855570147991e-34 # 9 * pi/2 trailing
180 .double 0d-3.36781471248705338384e-34 # 10 * pi/2 trailing
181 .double 0d-1.06379439835298071785e-33 # 11 * pi/2 trailing
182 .double 0d+5.20308607807426856861e-34 # 12 * pi/2 trailing
183 .double 0d+5.63667658458045770509e-34 # 13 * pi/2 trailing
184 .double 0d-1.63345268646229670430e-34 # 14 * pi/2 trailing
185 .double 0d-1.19986217995610764801e-34 # 15 * pi/2 trailing
186 .double 0d+6.93744810409902479378e-34 # 16 * pi/2 trailing
187 .double 0d-8.03640094449267300110e-34 # 17 * pi/2 trailing
188 .double 0d+7.80462911711140295982e-34 # 18 * pi/2 trailing
189 .double 0d-7.16921993148029483506e-34 # 19 * pi/2 trailing
190 .double 0d-6.73562942497410676769e-34 # 20 * pi/2 trailing
191 .double 0d-6.30203891846791677593e-34 # 21 * pi/2 trailing
192 .double 0d-2.12758879670596143570e-33 # 22 * pi/2 trailing
193 .double 0d+2.53800212047402350390e-33 # 23 * pi/2 trailing
194 .double 0d+1.04061721561485371372e-33 # 24 * pi/2 trailing
195 .double 0d+6.11729905311472319056e-32 # 25 * pi/2 trailing
196 .double 0d+1.12733531691609154102e-33 # 26 * pi/2 trailing
197 .double 0d-3.70049587943078297272e-34 # 27 * pi/2 trailing
198 .double 0d-3.26690537292459340860e-34 # 28 * pi/2 trailing
199 .double 0d-1.14812616507957271361e-34 # 29 * pi/2 trailing
200
201 middle:
202 .double 0d+0.00000000000000000000e+00 # 0 * pi/2 middle
203 .double 0d+5.72118872610983179676e-18 # 1 * pi/2 middle
204 .double 0d+1.14423774522196635935e-17 # 2 * pi/2 middle
205 .double 0d-3.83475850529283316309e-17 # 3 * pi/2 middle
206 .double 0d+2.28847549044393271871e-17 # 4 * pi/2 middle
207 .double 0d-2.69052076007086676522e-17 # 5 * pi/2 middle
208 .double 0d-7.66951701058566632618e-17 # 6 * pi/2 middle
209 .double 0d-1.54628301484890040587e-17 # 7 * pi/2 middle
210 .double 0d+4.57695098088786543741e-17 # 8 * pi/2 middle
211 .double 0d+1.07001849766246313192e-16 # 9 * pi/2 middle
212 .double 0d-5.38104152014173353044e-17 # 10 * pi/2 middle
213 .double 0d-2.14622680169080983801e-16 # 11 * pi/2 middle
214 .double 0d-1.53390340211713326524e-16 # 12 * pi/2 middle
215 .double 0d-9.21580002543456677056e-17 # 13 * pi/2 middle
216 .double 0d-3.09256602969780081173e-17 # 14 * pi/2 middle
217 .double 0d+3.03066796603896507006e-17 # 15 * pi/2 middle
218 .double 0d+9.15390196177573087482e-17 # 16 * pi/2 middle
219 .double 0d+1.52771359575124969107e-16 # 17 * pi/2 middle
220 .double 0d+2.14003699532492626384e-16 # 18 * pi/2 middle
221 .double 0d-1.68853170360202329427e-16 # 19 * pi/2 middle
222 .double 0d-1.07620830402834670609e-16 # 20 * pi/2 middle
223 .double 0d+3.97700719404595604379e-16 # 21 * pi/2 middle
224 .double 0d-4.29245360338161967602e-16 # 22 * pi/2 middle
225 .double 0d-3.68013020380794313406e-16 # 23 * pi/2 middle
226 .double 0d-3.06780680423426653047e-16 # 24 * pi/2 middle
227 .double 0d-2.45548340466059054318e-16 # 25 * pi/2 middle
228 .double 0d-1.84316000508691335411e-16 # 26 * pi/2 middle
229 .double 0d-1.23083660551323675053e-16 # 27 * pi/2 middle
230 .double 0d-6.18513205939560162346e-17 # 28 * pi/2 middle
231 .double 0d-6.18980636588357585202e-19 # 29 * pi/2 middle
232
233 leading:
234 .double 0d+0.00000000000000000000e+00 # 0 * pi/2 leading
235 .double 0d+1.57079632679489661351e+00 # 1 * pi/2 leading
236 .double 0d+3.14159265358979322702e+00 # 2 * pi/2 leading
237 .double 0d+4.71238898038468989604e+00 # 3 * pi/2 leading
238 .double 0d+6.28318530717958645404e+00 # 4 * pi/2 leading
239 .double 0d+7.85398163397448312306e+00 # 5 * pi/2 leading
240 .double 0d+9.42477796076937979208e+00 # 6 * pi/2 leading
241 .double 0d+1.09955742875642763501e+01 # 7 * pi/2 leading
242 .double 0d+1.25663706143591729081e+01 # 8 * pi/2 leading
243 .double 0d+1.41371669411540694661e+01 # 9 * pi/2 leading
244 .double 0d+1.57079632679489662461e+01 # 10 * pi/2 leading
245 .double 0d+1.72787595947438630262e+01 # 11 * pi/2 leading
246 .double 0d+1.88495559215387595842e+01 # 12 * pi/2 leading
247 .double 0d+2.04203522483336561422e+01 # 13 * pi/2 leading
248 .double 0d+2.19911485751285527002e+01 # 14 * pi/2 leading
249 .double 0d+2.35619449019234492582e+01 # 15 * pi/2 leading
250 .double 0d+2.51327412287183458162e+01 # 16 * pi/2 leading
251 .double 0d+2.67035375555132423742e+01 # 17 * pi/2 leading
252 .double 0d+2.82743338823081389322e+01 # 18 * pi/2 leading
253 .double 0d+2.98451302091030359342e+01 # 19 * pi/2 leading
254 .double 0d+3.14159265358979324922e+01 # 20 * pi/2 leading
255 .double 0d+3.29867228626928286062e+01 # 21 * pi/2 leading
256 .double 0d+3.45575191894877260523e+01 # 22 * pi/2 leading
257 .double 0d+3.61283155162826226103e+01 # 23 * pi/2 leading
258 .double 0d+3.76991118430775191683e+01 # 24 * pi/2 leading
259 .double 0d+3.92699081698724157263e+01 # 25 * pi/2 leading
260 .double 0d+4.08407044966673122843e+01 # 26 * pi/2 leading
261 .double 0d+4.24115008234622088423e+01 # 27 * pi/2 leading
262 .double 0d+4.39822971502571054003e+01 # 28 * pi/2 leading
263 .double 0d+4.55530934770520019583e+01 # 29 * pi/2 leading
264
265 twoOverPi:
266 .double 0d+6.36619772367581343076e-01
267 .text
268 .align 1
269
270 table_lookup:
271 muld3 r3,twoOverPi,r0
272 cvtrdl r0,r0 # n = nearest int to ((2/pi)*|x|) rnded
273 mull3 $8,r0,r5
274 subd2 leading(r5),r3 # p = (|x| - leading n*pi/2) exactly
275 subd3 middle(r5),r3,r1 # q = (p - middle n*pi/2) rounded
276 subd2 r1,r3 # r = (p - q)
277 subd2 middle(r5),r3 # r = r - middle n*pi/2
278 subd2 trailing(r5),r3 # r = r - trailing n*pi/2 rounded
279 /*
280 * If the original argument was negative,
281 * negate the reduce argument and
282 * adjust the octant/quadrant number.
283 */
284 tstw 4(ap)
285 bgeq abs2
286 mnegf r1,r1
287 mnegf r3,r3
288 /* subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD */
289 subb3 r0,$4,r0
290 abs2:
291 /*
292 * Clear all unneeded octant/quadrant bits.
293 */
294 /* bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD */
295 bicb2 $0xfc,r0
296 rsb
297 /*
298 * p.0
299 */
300 .text
301 .align 2
302 /*
303 * Only 256 (actually 225) bits of 2/pi are needed for VAX double
304 * precision; this was determined by enumerating all the nearest
305 * machine integer multiples of pi/2 using continued fractions.
306 * (8a8d3673775b7ff7 required the most bits.) -S.McD
307 */
308 .long 0
309 .long 0
310 .long 0xaef1586d
311 .long 0x9458eaf7
312 .long 0x10e4107f
313 .long 0xd8a5664f
314 .long 0x4d377036
315 .long 0x09d5f47d
316 .long 0x91054a7f
317 .long 0xbe60db93
318 bits2opi:
319 .long 0x00000028
320 .long 0
321 /*
322 * Note: wherever you see the word `octant', read `quadrant'.
323 * Currently this code is set up for pi/2 argument reduction.
324 * By uncommenting/commenting the appropriate lines, it will
325 * also serve as a pi/4 argument reduction code.
326 */
327
328 /* p.1
329 * Trigred preforms argument reduction
330 * for the trigonometric functions. It
331 * takes one input argument, a D-format
332 * number in r1/r0 . The magnitude of
333 * the input argument must be greater
334 * than or equal to 1/2 . Trigred produces
335 * three results: the number of the octant
336 * occupied by the argument, the reduced
337 * argument, and an extension of the
338 * reduced argument. The octant number is
339 * returned in r0 . The reduced argument
340 * is returned as a D-format number in
341 * r2/r1 . An 8 bit extension of the
342 * reduced argument is returned as an
343 * F-format number in r3.
344 * p.2
345 */
346 trigred:
347 /*
348 * Save the sign of the input argument.
349 */
350 movw r0,-(sp)
351 /*
352 * Extract the exponent field.
353 */
354 extzv $7,$7,r0,r2
355 /*
356 * Convert the fraction part of the input
357 * argument into a quadword integer.
358 */
359 bicw2 $0xff80,r0
360 bisb2 $0x80,r0 # -S.McD
361 rotl $16,r0,r0
362 rotl $16,r1,r1
363 /*
364 * If r1 is negative, add 1 to r0 . This
365 * adjustment is made so that the two's
366 * complement multiplications done later
367 * will produce unsigned results.
368 */
369 bgeq posmid
370 incl r0
371 posmid:
372 /* p.3
373 *
374 * Set r3 to the address of the first quadword
375 * used to obtain the needed portion of 2/pi .
376 * The address is longword aligned to ensure
377 * efficient access.
378 */
379 ashl $-3,r2,r3
380 bicb2 $3,r3
381 mnegl r3,r3
382 movab bits2opi[r3],r3
383 /*
384 * Set r2 to the size of the shift needed to
385 * obtain the correct portion of 2/pi .
386 */
387 bicb2 $0xe0,r2
388 /* p.4
389 *
390 * Move the needed 128 bits of 2/pi into
391 * r11 - r8 . Adjust the numbers to allow
392 * for unsigned multiplication.
393 */
394 ashq r2,(r3),r10
395
396 subl2 $4,r3
397 ashq r2,(r3),r9
398 bgeq signoff1
399 incl r11
400 signoff1:
401 subl2 $4,r3
402 ashq r2,(r3),r8
403 bgeq signoff2
404 incl r10
405 signoff2:
406 subl2 $4,r3
407 ashq r2,(r3),r7
408 bgeq signoff3
409 incl r9
410 signoff3:
411 /* p.5
412 *
413 * Multiply the contents of r0/r1 by the
414 * slice of 2/pi in r11 - r8 .
415 */
416 emul r0,r8,$0,r4
417 emul r0,r9,r5,r5
418 emul r0,r10,r6,r6
419
420 emul r1,r8,$0,r7
421 emul r1,r9,r8,r8
422 emul r1,r10,r9,r9
423 emul r1,r11,r10,r10
424
425 addl2 r4,r8
426 adwc r5,r9
427 adwc r6,r10
428 /* p.6
429 *
430 * If there are more than five leading zeros
431 * after the first two quotient bits or if there
432 * are more than five leading ones after the first
433 * two quotient bits, generate more fraction bits.
434 * Otherwise, branch to code to produce the result.
435 */
436 bicl3 $0xc1ffffff,r10,r4
437 beql more1
438 cmpl $0x3e000000,r4
439 bneq result
440 more1:
441 /* p.7
442 *
443 * generate another 32 result bits.
444 */
445 subl2 $4,r3
446 ashq r2,(r3),r5
447 bgeq signoff4
448
449 emul r1,r6,$0,r4
450 addl2 r1,r5
451 emul r0,r6,r5,r5
452 addl2 r0,r6
453 brb addbits1
454
455 signoff4:
456 emul r1,r6,$0,r4
457 emul r0,r6,r5,r5
458
459 addbits1:
460 addl2 r5,r7
461 adwc r6,r8
462 adwc $0,r9
463 adwc $0,r10
464 /* p.8
465 *
466 * Check for massive cancellation.
467 */
468 bicl3 $0xc0000000,r10,r6
469 /* bneq more2 -S.McD Test was backwards */
470 beql more2
471 cmpl $0x3fffffff,r6
472 bneq result
473 more2:
474 /* p.9
475 *
476 * If massive cancellation has occurred,
477 * generate another 24 result bits.
478 * Testing has shown there will always be
479 * enough bits after this point.
480 */
481 subl2 $4,r3
482 ashq r2,(r3),r5
483 bgeq signoff5
484
485 emul r0,r6,r4,r5
486 addl2 r0,r6
487 brb addbits2
488
489 signoff5:
490 emul r0,r6,r4,r5
491
492 addbits2:
493 addl2 r6,r7
494 adwc $0,r8
495 adwc $0,r9
496 adwc $0,r10
497 /* p.10
498 *
499 * The following code produces the reduced
500 * argument from the product bits contained
501 * in r10 - r7 .
502 */
503 result:
504 /*
505 * Extract the octant number from r10 .
506 */
507 /* extzv $29,$3,r10,r0 ...used for pi/4 reduction -S.McD */
508 extzv $30,$2,r10,r0
509 /*
510 * Clear the octant bits in r10 .
511 */
512 /* bicl2 $0xe0000000,r10 ...used for pi/4 reduction -S.McD */
513 bicl2 $0xc0000000,r10
514 /*
515 * Zero the sign flag.
516 */
517 clrl r5
518 /* p.11
519 *
520 * Check to see if the fraction is greater than
521 * or equal to one-half. If it is, add one
522 * to the octant number, set the sign flag
523 * on, and replace the fraction with 1 minus
524 * the fraction.
525 */
526 /* bitl $0x10000000,r10 ...used for pi/4 reduction -S.McD */
527 bitl $0x20000000,r10
528 beql small
529 incl r0
530 incl r5
531 /* subl3 r10,$0x1fffffff,r10 ...used for pi/4 reduction -S.McD */
532 subl3 r10,$0x3fffffff,r10
533 mcoml r9,r9
534 mcoml r8,r8
535 mcoml r7,r7
536 small:
537 /* p.12
538 *
539 * Test whether the first 29 bits of the ...used for pi/4 reduction -S.McD
540 * Test whether the first 30 bits of the
541 * fraction are zero.
542 */
543 tstl r10
544 beql tiny
545 /*
546 * Find the position of the first one bit in r10 .
547 */
548 cvtld r10,r1
549 extzv $7,$7,r1,r1
550 /*
551 * Compute the size of the shift needed.
552 */
553 subl3 r1,$32,r6
554 /*
555 * Shift up the high order 64 bits of the
556 * product.
557 */
558 ashq r6,r9,r10
559 ashq r6,r8,r9
560 brb mult
561 /* p.13
562 *
563 * Test to see if the sign bit of r9 is on.
564 */
565 tiny:
566 tstl r9
567 bgeq tinier
568 /*
569 * If it is, shift the product bits up 32 bits.
570 */
571 movl $32,r6
572 movq r8,r10
573 tstl r10
574 brb mult
575 /* p.14
576 *
577 * Test whether r9 is zero. It is probably
578 * impossible for both r10 and r9 to be
579 * zero, but until proven to be so, the test
580 * must be made.
581 */
582 tinier:
583 beql zero
584 /*
585 * Find the position of the first one bit in r9 .
586 */
587 cvtld r9,r1
588 extzv $7,$7,r1,r1
589 /*
590 * Compute the size of the shift needed.
591 */
592 subl3 r1,$32,r1
593 addl3 $32,r1,r6
594 /*
595 * Shift up the high order 64 bits of the
596 * product.
597 */
598 ashq r1,r8,r10
599 ashq r1,r7,r9
600 brb mult
601 /* p.15
602 *
603 * The following code sets the reduced
604 * argument to zero.
605 */
606 zero:
607 clrl r1
608 clrl r2
609 clrl r3
610 brw return
611 /* p.16
612 *
613 * At this point, r0 contains the octant number,
614 * r6 indicates the number of bits the fraction
615 * has been shifted, r5 indicates the sign of
616 * the fraction, r11/r10 contain the high order
617 * 64 bits of the fraction, and the condition
618 * codes indicate where the sign bit of r10
619 * is on. The following code multiplies the
620 * fraction by pi/2 .
621 */
622 mult:
623 /*
624 * Save r11/r10 in r4/r1 . -S.McD
625 */
626 movl r11,r4
627 movl r10,r1
628 /*
629 * If the sign bit of r10 is on, add 1 to r11 .
630 */
631 bgeq signoff6
632 incl r11
633 signoff6:
634 /* p.17
635 *
636 * Move pi/2 into r3/r2 .
637 */
638 movq $0xc90fdaa22168c235,r2
639 /*
640 * Multiply the fraction by the portion of pi/2
641 * in r2 .
642 */
643 emul r2,r10,$0,r7
644 emul r2,r11,r8,r7
645 /*
646 * Multiply the fraction by the portion of pi/2
647 * in r3 .
648 */
649 emul r3,r10,$0,r9
650 emul r3,r11,r10,r10
651 /*
652 * Add the product bits together.
653 */
654 addl2 r7,r9
655 adwc r8,r10
656 adwc $0,r11
657 /*
658 * Compensate for not sign extending r8 above.-S.McD
659 */
660 tstl r8
661 bgeq signoff6a
662 decl r11
663 signoff6a:
664 /*
665 * Compensate for r11/r10 being unsigned. -S.McD
666 */
667 addl2 r2,r10
668 adwc r3,r11
669 /*
670 * Compensate for r3/r2 being unsigned. -S.McD
671 */
672 addl2 r1,r10
673 adwc r4,r11
674 /* p.18
675 *
676 * If the sign bit of r11 is zero, shift the
677 * product bits up one bit and increment r6 .
678 */
679 blss signon
680 incl r6
681 ashq $1,r10,r10
682 tstl r9
683 bgeq signoff7
684 incl r10
685 signoff7:
686 signon:
687 /* p.19
688 *
689 * Shift the 56 most significant product
690 * bits into r9/r8 . The sign extension
691 * will be handled later.
692 */
693 ashq $-8,r10,r8
694 /*
695 * Convert the low order 8 bits of r10
696 * into an F-format number.
697 */
698 cvtbf r10,r3
699 /*
700 * If the result of the conversion was
701 * negative, add 1 to r9/r8 .
702 */
703 bgeq chop
704 incl r8
705 adwc $0,r9
706 /*
707 * If r9 is now zero, branch to special
708 * code to handle that possibility.
709 */
710 beql carryout
711 chop:
712 /* p.20
713 *
714 * Convert the number in r9/r8 into
715 * D-format number in r2/r1 .
716 */
717 rotl $16,r8,r2
718 rotl $16,r9,r1
719 /*
720 * Set the exponent field to the appropriate
721 * value. Note that the extra bits created by
722 * sign extension are now eliminated.
723 */
724 subw3 r6,$131,r6
725 insv r6,$7,$9,r1
726 /*
727 * Set the exponent field of the F-format
728 * number in r3 to the appropriate value.
729 */
730 tstf r3
731 beql return
732 /* extzv $7,$8,r3,r4 -S.McD */
733 extzv $7,$7,r3,r4
734 addw2 r4,r6
735 /* subw2 $217,r6 -S.McD */
736 subw2 $64,r6
737 insv r6,$7,$8,r3
738 brb return
739 /* p.21
740 *
741 * The following code generates the appropriate
742 * result for the unlikely possibility that
743 * rounding the number in r9/r8 resulted in
744 * a carry out.
745 */
746 carryout:
747 clrl r1
748 clrl r2
749 subw3 r6,$132,r6
750 insv r6,$7,$9,r1
751 tstf r3
752 beql return
753 extzv $7,$8,r3,r4
754 addw2 r4,r6
755 subw2 $218,r6
756 insv r6,$7,$8,r3
757 /* p.22
758 *
759 * The following code makes an needed
760 * adjustments to the signs of the
761 * results or to the octant number, and
762 * then returns.
763 */
764 return:
765 /*
766 * Test if the fraction was greater than or
767 * equal to 1/2 . If so, negate the reduced
768 * argument.
769 */
770 blbc r5,signoff8
771 mnegf r1,r1
772 mnegf r3,r3
773 signoff8:
774 /* p.23
775 *
776 * If the original argument was negative,
777 * negate the reduce argument and
778 * adjust the octant number.
779 */
780 tstw (sp)+
781 bgeq signoff9
782 mnegf r1,r1
783 mnegf r3,r3
784 /* subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD */
785 subb3 r0,$4,r0
786 signoff9:
787 /*
788 * Clear all unneeded octant bits.
789 *
790 * bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD */
791 bicb2 $0xfc,r0
792 /*
793 * Return.
794 */
795 rsb
796