n_argred.S revision 1.2 1 /* $NetBSD: n_argred.S,v 1.2 1998/10/31 02:06:01 matt Exp $ */
2 /*
3 * Copyright (c) 1985, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)argred.s 8.1 (Berkeley) 6/4/93
35 */
36
37 /*
38 * libm$argred implements Bob Corbett's argument reduction and
39 * libm$sincos implements Peter Tang's double precision sin/cos.
40 *
41 * Note: The two entry points libm$argred and libm$sincos are meant
42 * to be used only by _sin, _cos and _tan.
43 *
44 * method: true range reduction to [-pi/4,pi/4], P. Tang & B. Corbett
45 * S. McDonald, April 4, 1985
46 */
47 .text
48 .globl libm$argred
49 .type libm$argred,@label
50 .globl libm$sincos
51 .type libm$sincos,@label
52 .align 1
53
54 libm$argred:
55 /*
56 * Compare the argument with the largest possible that can
57 * be reduced by table lookup. r3 := |x| will be used in table_lookup .
58 */
59 movd r0,r3
60 bgeq abs1
61 mnegd r3,r3
62 abs1:
63 cmpd r3,$0d+4.55530934770520019583e+01
64 blss small_arg
65 jsb trigred
66 rsb
67 small_arg:
68 jsb table_lookup
69 rsb
70 /*
71 * At this point,
72 * r0 contains the quadrant number, 0, 1, 2, or 3;
73 * r2/r1 contains the reduced argument as a D-format number;
74 * r3 contains a F-format extension to the reduced argument;
75 * r4 contains a 0 or 1 corresponding to a sin or cos entry.
76 */
77 libm$sincos:
78 /*
79 * Compensate for a cosine entry by adding one to the quadrant number.
80 */
81 addl2 r4,r0
82 /*
83 * Polyd clobbers r5-r0 ; save X in r7/r6 .
84 * This can be avoided by rewriting trigred .
85 */
86 movd r1,r6
87 /*
88 * Likewise, save alpha in r8 .
89 * This can be avoided by rewriting trigred .
90 */
91 movf r3,r8
92 /*
93 * Odd or even quadrant? cosine if odd, sine otherwise.
94 * Save floor(quadrant/2) in r9 ; it determines the final sign.
95 */
96 rotl $-1,r0,r9
97 blss cosine
98 sine:
99 muld2 r1,r1 # Xsq = X * X
100 cmpw $0x2480,r1 # [zl] Xsq > 2^-56?
101 blss 1f # [zl] yes, go ahead and do polyd
102 clrq r1 # [zl] work around 11/780 FPA polyd bug
103 1:
104 polyd r1,$7,sin_coef # Q = P(Xsq) , of deg 7
105 mulf3 $0f3.0,r8,r4 # beta = 3 * alpha
106 mulf2 r0,r4 # beta = Q * beta
107 addf2 r8,r4 # beta = alpha + beta
108 muld2 r6,r0 # S(X) = X * Q
109 /* cvtfd r4,r4 ... r5 = 0 after a polyd. */
110 addd2 r4,r0 # S(X) = beta + S(X)
111 addd2 r6,r0 # S(X) = X + S(X)
112 brb done
113 cosine:
114 muld2 r6,r6 # Xsq = X * X
115 beql zero_arg
116 mulf2 r1,r8 # beta = X * alpha
117 polyd r6,$7,cos_coef /* Q = P'(Xsq) , of deg 7 */
118 subd3 r0,r8,r0 # beta = beta - Q
119 subw2 $0x80,r6 # Xsq = Xsq / 2
120 addd2 r0,r6 # Xsq = Xsq + beta
121 zero_arg:
122 subd3 r6,$0d1.0,r0 # C(X) = 1 - Xsq
123 done:
124 blbc r9,even
125 mnegd r0,r0
126 even:
127 rsb
128
129 .data
130 .align 2
131
132 sin_coef:
133 .double 0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8..
134 .double 0d+1.60573519267703489121e-10 # s6 = 2^-21 1.611adaede473c8..
135 .double 0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382..
136 .double 0d+2.75573191800593885716e-06 # s4 = 2^-13 1.71de3a4b884278..
137 .double 0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d..
138 .double 0d+8.33333333333325688985e-03 # s2 = 2^-07 1.11111111110e50
139 .double 0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554
140 .double 0d+0.00000000000000000000e+00 # s0 = 0
141
142 cos_coef:
143 .double 0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE..
144 .double 0d+2.08746646574796004700e-09 # s6 = 2^-1D 1.1EE632650350BA..
145 .double 0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E..
146 .double 0d+2.48015872682668025200e-05 # s4 = 2^-10 1.A01A0196B902E8..
147 .double 0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE..
148 .double 0d+4.16666666666664761400e-02 # s2 = 2^-05 1.5555555555539E
149 .double 0d+0.00000000000000000000e+00 # s1 = 0
150 .double 0d+0.00000000000000000000e+00 # s0 = 0
151
152 /*
153 * Multiples of pi/2 expressed as the sum of three doubles,
154 *
155 * trailing: n * pi/2 , n = 0, 1, 2, ..., 29
156 * trailing[n] ,
157 *
158 * middle: n * pi/2 , n = 0, 1, 2, ..., 29
159 * middle[n] ,
160 *
161 * leading: n * pi/2 , n = 0, 1, 2, ..., 29
162 * leading[n] ,
163 *
164 * where
165 * leading[n] := (n * pi/2) rounded,
166 * middle[n] := (n * pi/2 - leading[n]) rounded,
167 * trailing[n] := (( n * pi/2 - leading[n]) - middle[n]) rounded .
168 */
169 trailing:
170 .double 0d+0.00000000000000000000e+00 # 0 * pi/2 trailing
171 .double 0d+4.33590506506189049611e-35 # 1 * pi/2 trailing
172 .double 0d+8.67181013012378099223e-35 # 2 * pi/2 trailing
173 .double 0d+1.30077151951856714215e-34 # 3 * pi/2 trailing
174 .double 0d+1.73436202602475619845e-34 # 4 * pi/2 trailing
175 .double 0d-1.68390735624352669192e-34 # 5 * pi/2 trailing
176 .double 0d+2.60154303903713428430e-34 # 6 * pi/2 trailing
177 .double 0d-8.16726343231148352150e-35 # 7 * pi/2 trailing
178 .double 0d+3.46872405204951239689e-34 # 8 * pi/2 trailing
179 .double 0d+3.90231455855570147991e-34 # 9 * pi/2 trailing
180 .double 0d-3.36781471248705338384e-34 # 10 * pi/2 trailing
181 .double 0d-1.06379439835298071785e-33 # 11 * pi/2 trailing
182 .double 0d+5.20308607807426856861e-34 # 12 * pi/2 trailing
183 .double 0d+5.63667658458045770509e-34 # 13 * pi/2 trailing
184 .double 0d-1.63345268646229670430e-34 # 14 * pi/2 trailing
185 .double 0d-1.19986217995610764801e-34 # 15 * pi/2 trailing
186 .double 0d+6.93744810409902479378e-34 # 16 * pi/2 trailing
187 .double 0d-8.03640094449267300110e-34 # 17 * pi/2 trailing
188 .double 0d+7.80462911711140295982e-34 # 18 * pi/2 trailing
189 .double 0d-7.16921993148029483506e-34 # 19 * pi/2 trailing
190 .double 0d-6.73562942497410676769e-34 # 20 * pi/2 trailing
191 .double 0d-6.30203891846791677593e-34 # 21 * pi/2 trailing
192 .double 0d-2.12758879670596143570e-33 # 22 * pi/2 trailing
193 .double 0d+2.53800212047402350390e-33 # 23 * pi/2 trailing
194 .double 0d+1.04061721561485371372e-33 # 24 * pi/2 trailing
195 .double 0d+6.11729905311472319056e-32 # 25 * pi/2 trailing
196 .double 0d+1.12733531691609154102e-33 # 26 * pi/2 trailing
197 .double 0d-3.70049587943078297272e-34 # 27 * pi/2 trailing
198 .double 0d-3.26690537292459340860e-34 # 28 * pi/2 trailing
199 .double 0d-1.14812616507957271361e-34 # 29 * pi/2 trailing
200
201 middle:
202 .double 0d+0.00000000000000000000e+00 # 0 * pi/2 middle
203 .double 0d+5.72118872610983179676e-18 # 1 * pi/2 middle
204 .double 0d+1.14423774522196635935e-17 # 2 * pi/2 middle
205 .double 0d-3.83475850529283316309e-17 # 3 * pi/2 middle
206 .double 0d+2.28847549044393271871e-17 # 4 * pi/2 middle
207 .double 0d-2.69052076007086676522e-17 # 5 * pi/2 middle
208 .double 0d-7.66951701058566632618e-17 # 6 * pi/2 middle
209 .double 0d-1.54628301484890040587e-17 # 7 * pi/2 middle
210 .double 0d+4.57695098088786543741e-17 # 8 * pi/2 middle
211 .double 0d+1.07001849766246313192e-16 # 9 * pi/2 middle
212 .double 0d-5.38104152014173353044e-17 # 10 * pi/2 middle
213 .double 0d-2.14622680169080983801e-16 # 11 * pi/2 middle
214 .double 0d-1.53390340211713326524e-16 # 12 * pi/2 middle
215 .double 0d-9.21580002543456677056e-17 # 13 * pi/2 middle
216 .double 0d-3.09256602969780081173e-17 # 14 * pi/2 middle
217 .double 0d+3.03066796603896507006e-17 # 15 * pi/2 middle
218 .double 0d+9.15390196177573087482e-17 # 16 * pi/2 middle
219 .double 0d+1.52771359575124969107e-16 # 17 * pi/2 middle
220 .double 0d+2.14003699532492626384e-16 # 18 * pi/2 middle
221 .double 0d-1.68853170360202329427e-16 # 19 * pi/2 middle
222 .double 0d-1.07620830402834670609e-16 # 20 * pi/2 middle
223 .double 0d+3.97700719404595604379e-16 # 21 * pi/2 middle
224 .double 0d-4.29245360338161967602e-16 # 22 * pi/2 middle
225 .double 0d-3.68013020380794313406e-16 # 23 * pi/2 middle
226 .double 0d-3.06780680423426653047e-16 # 24 * pi/2 middle
227 .double 0d-2.45548340466059054318e-16 # 25 * pi/2 middle
228 .double 0d-1.84316000508691335411e-16 # 26 * pi/2 middle
229 .double 0d-1.23083660551323675053e-16 # 27 * pi/2 middle
230 .double 0d-6.18513205939560162346e-17 # 28 * pi/2 middle
231 .double 0d-6.18980636588357585202e-19 # 29 * pi/2 middle
232
233 leading:
234 .double 0d+0.00000000000000000000e+00 # 0 * pi/2 leading
235 .double 0d+1.57079632679489661351e+00 # 1 * pi/2 leading
236 .double 0d+3.14159265358979322702e+00 # 2 * pi/2 leading
237 .double 0d+4.71238898038468989604e+00 # 3 * pi/2 leading
238 .double 0d+6.28318530717958645404e+00 # 4 * pi/2 leading
239 .double 0d+7.85398163397448312306e+00 # 5 * pi/2 leading
240 .double 0d+9.42477796076937979208e+00 # 6 * pi/2 leading
241 .double 0d+1.09955742875642763501e+01 # 7 * pi/2 leading
242 .double 0d+1.25663706143591729081e+01 # 8 * pi/2 leading
243 .double 0d+1.41371669411540694661e+01 # 9 * pi/2 leading
244 .double 0d+1.57079632679489662461e+01 # 10 * pi/2 leading
245 .double 0d+1.72787595947438630262e+01 # 11 * pi/2 leading
246 .double 0d+1.88495559215387595842e+01 # 12 * pi/2 leading
247 .double 0d+2.04203522483336561422e+01 # 13 * pi/2 leading
248 .double 0d+2.19911485751285527002e+01 # 14 * pi/2 leading
249 .double 0d+2.35619449019234492582e+01 # 15 * pi/2 leading
250 .double 0d+2.51327412287183458162e+01 # 16 * pi/2 leading
251 .double 0d+2.67035375555132423742e+01 # 17 * pi/2 leading
252 .double 0d+2.82743338823081389322e+01 # 18 * pi/2 leading
253 .double 0d+2.98451302091030359342e+01 # 19 * pi/2 leading
254 .double 0d+3.14159265358979324922e+01 # 20 * pi/2 leading
255 .double 0d+3.29867228626928286062e+01 # 21 * pi/2 leading
256 .double 0d+3.45575191894877260523e+01 # 22 * pi/2 leading
257 .double 0d+3.61283155162826226103e+01 # 23 * pi/2 leading
258 .double 0d+3.76991118430775191683e+01 # 24 * pi/2 leading
259 .double 0d+3.92699081698724157263e+01 # 25 * pi/2 leading
260 .double 0d+4.08407044966673122843e+01 # 26 * pi/2 leading
261 .double 0d+4.24115008234622088423e+01 # 27 * pi/2 leading
262 .double 0d+4.39822971502571054003e+01 # 28 * pi/2 leading
263 .double 0d+4.55530934770520019583e+01 # 29 * pi/2 leading
264
265 twoOverPi:
266 .double 0d+6.36619772367581343076e-01
267 .text
268 .align 1
269
270 table_lookup:
271 muld3 r3,twoOverPi,r0
272 cvtrdl r0,r0 # n = nearest int to ((2/pi)*|x|) rnded
273 mull3 $8,r0,r5
274 subd2 leading(r5),r3 # p = (|x| - leading n*pi/2) exactly
275 subd3 middle(r5),r3,r1 # q = (p - middle n*pi/2) rounded
276 subd2 r1,r3 # r = (p - q)
277 subd2 middle(r5),r3 # r = r - middle n*pi/2
278 subd2 trailing(r5),r3 # r = r - trailing n*pi/2 rounded
279 /*
280 * If the original argument was negative,
281 * negate the reduce argument and
282 * adjust the octant/quadrant number.
283 */
284 tstw 4(ap)
285 bgeq abs2
286 mnegf r1,r1
287 mnegf r3,r3
288 /* subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD */
289 subb3 r0,$4,r0
290 abs2:
291 /*
292 * Clear all unneeded octant/quadrant bits.
293 */
294 /* bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD */
295 bicb2 $0xfc,r0
296 rsb
297 /*
298 * p.0
299 */
300 .text
301 .align 2
302 /*
303 * Only 256 (actually 225) bits of 2/pi are needed for VAX double
304 * precision; this was determined by enumerating all the nearest
305 * machine integer multiples of pi/2 using continued fractions.
306 * (8a8d3673775b7ff7 required the most bits.) -S.McD
307 */
308 .long 0
309 .long 0
310 .long 0xaef1586d
311 .long 0x9458eaf7
312 .long 0x10e4107f
313 .long 0xd8a5664f
314 .long 0x4d377036
315 .long 0x09d5f47d
316 .long 0x91054a7f
317 .long 0xbe60db93
318 bits2opi:
319 .long 0x00000028
320 .long 0
321 /*
322 * Note: wherever you see the word `octant', read `quadrant'.
323 * Currently this code is set up for pi/2 argument reduction.
324 * By uncommenting/commenting the appropriate lines, it will
325 * also serve as a pi/4 argument reduction code.
326 */
327
328 /* p.1
329 * Trigred preforms argument reduction
330 * for the trigonometric functions. It
331 * takes one input argument, a D-format
332 * number in r1/r0 . The magnitude of
333 * the input argument must be greater
334 * than or equal to 1/2 . Trigred produces
335 * three results: the number of the octant
336 * occupied by the argument, the reduced
337 * argument, and an extension of the
338 * reduced argument. The octant number is
339 * returned in r0 . The reduced argument
340 * is returned as a D-format number in
341 * r2/r1 . An 8 bit extension of the
342 * reduced argument is returned as an
343 * F-format number in r3.
344 * p.2
345 */
346 trigred:
347 /*
348 * Save the sign of the input argument.
349 */
350 movw r0,-(sp)
351 /*
352 * Extract the exponent field.
353 */
354 extzv $7,$7,r0,r2
355 /*
356 * Convert the fraction part of the input
357 * argument into a quadword integer.
358 */
359 bicw2 $0xff80,r0
360 bisb2 $0x80,r0 # -S.McD
361 rotl $16,r0,r0
362 rotl $16,r1,r1
363 /*
364 * If r1 is negative, add 1 to r0 . This
365 * adjustment is made so that the two's
366 * complement multiplications done later
367 * will produce unsigned results.
368 */
369 bgeq posmid
370 incl r0
371 posmid:
372 /* p.3
373 *
374 * Set r3 to the address of the first quadword
375 * used to obtain the needed portion of 2/pi .
376 * The address is longword aligned to ensure
377 * efficient access.
378 */
379 ashl $-3,r2,r3
380 bicb2 $3,r3
381 subl3 r3,$bits2opi,r3
382 /*
383 * Set r2 to the size of the shift needed to
384 * obtain the correct portion of 2/pi .
385 */
386 bicb2 $0xe0,r2
387 /* p.4
388 *
389 * Move the needed 128 bits of 2/pi into
390 * r11 - r8 . Adjust the numbers to allow
391 * for unsigned multiplication.
392 */
393 ashq r2,(r3),r10
394
395 subl2 $4,r3
396 ashq r2,(r3),r9
397 bgeq signoff1
398 incl r11
399 signoff1:
400 subl2 $4,r3
401 ashq r2,(r3),r8
402 bgeq signoff2
403 incl r10
404 signoff2:
405 subl2 $4,r3
406 ashq r2,(r3),r7
407 bgeq signoff3
408 incl r9
409 signoff3:
410 /* p.5
411 *
412 * Multiply the contents of r0/r1 by the
413 * slice of 2/pi in r11 - r8 .
414 */
415 emul r0,r8,$0,r4
416 emul r0,r9,r5,r5
417 emul r0,r10,r6,r6
418
419 emul r1,r8,$0,r7
420 emul r1,r9,r8,r8
421 emul r1,r10,r9,r9
422 emul r1,r11,r10,r10
423
424 addl2 r4,r8
425 adwc r5,r9
426 adwc r6,r10
427 /* p.6
428 *
429 * If there are more than five leading zeros
430 * after the first two quotient bits or if there
431 * are more than five leading ones after the first
432 * two quotient bits, generate more fraction bits.
433 * Otherwise, branch to code to produce the result.
434 */
435 bicl3 $0xc1ffffff,r10,r4
436 beql more1
437 cmpl $0x3e000000,r4
438 bneq result
439 more1:
440 /* p.7
441 *
442 * generate another 32 result bits.
443 */
444 subl2 $4,r3
445 ashq r2,(r3),r5
446 bgeq signoff4
447
448 emul r1,r6,$0,r4
449 addl2 r1,r5
450 emul r0,r6,r5,r5
451 addl2 r0,r6
452 brb addbits1
453
454 signoff4:
455 emul r1,r6,$0,r4
456 emul r0,r6,r5,r5
457
458 addbits1:
459 addl2 r5,r7
460 adwc r6,r8
461 adwc $0,r9
462 adwc $0,r10
463 /* p.8
464 *
465 * Check for massive cancellation.
466 */
467 bicl3 $0xc0000000,r10,r6
468 /* bneq more2 -S.McD Test was backwards */
469 beql more2
470 cmpl $0x3fffffff,r6
471 bneq result
472 more2:
473 /* p.9
474 *
475 * If massive cancellation has occurred,
476 * generate another 24 result bits.
477 * Testing has shown there will always be
478 * enough bits after this point.
479 */
480 subl2 $4,r3
481 ashq r2,(r3),r5
482 bgeq signoff5
483
484 emul r0,r6,r4,r5
485 addl2 r0,r6
486 brb addbits2
487
488 signoff5:
489 emul r0,r6,r4,r5
490
491 addbits2:
492 addl2 r6,r7
493 adwc $0,r8
494 adwc $0,r9
495 adwc $0,r10
496 /* p.10
497 *
498 * The following code produces the reduced
499 * argument from the product bits contained
500 * in r10 - r7 .
501 */
502 result:
503 /*
504 * Extract the octant number from r10 .
505 */
506 /* extzv $29,$3,r10,r0 ...used for pi/4 reduction -S.McD */
507 extzv $30,$2,r10,r0
508 /*
509 * Clear the octant bits in r10 .
510 */
511 /* bicl2 $0xe0000000,r10 ...used for pi/4 reduction -S.McD */
512 bicl2 $0xc0000000,r10
513 /*
514 * Zero the sign flag.
515 */
516 clrl r5
517 /* p.11
518 *
519 * Check to see if the fraction is greater than
520 * or equal to one-half. If it is, add one
521 * to the octant number, set the sign flag
522 * on, and replace the fraction with 1 minus
523 * the fraction.
524 */
525 /* bitl $0x10000000,r10 ...used for pi/4 reduction -S.McD */
526 bitl $0x20000000,r10
527 beql small
528 incl r0
529 incl r5
530 /* subl3 r10,$0x1fffffff,r10 ...used for pi/4 reduction -S.McD */
531 subl3 r10,$0x3fffffff,r10
532 mcoml r9,r9
533 mcoml r8,r8
534 mcoml r7,r7
535 small:
536 /* p.12
537 *
538 * Test whether the first 29 bits of the ...used for pi/4 reduction -S.McD
539 * Test whether the first 30 bits of the
540 * fraction are zero.
541 */
542 tstl r10
543 beql tiny
544 /*
545 * Find the position of the first one bit in r10 .
546 */
547 cvtld r10,r1
548 extzv $7,$7,r1,r1
549 /*
550 * Compute the size of the shift needed.
551 */
552 subl3 r1,$32,r6
553 /*
554 * Shift up the high order 64 bits of the
555 * product.
556 */
557 ashq r6,r9,r10
558 ashq r6,r8,r9
559 brb mult
560 /* p.13
561 *
562 * Test to see if the sign bit of r9 is on.
563 */
564 tiny:
565 tstl r9
566 bgeq tinier
567 /*
568 * If it is, shift the product bits up 32 bits.
569 */
570 movl $32,r6
571 movq r8,r10
572 tstl r10
573 brb mult
574 /* p.14
575 *
576 * Test whether r9 is zero. It is probably
577 * impossible for both r10 and r9 to be
578 * zero, but until proven to be so, the test
579 * must be made.
580 */
581 tinier:
582 beql zero
583 /*
584 * Find the position of the first one bit in r9 .
585 */
586 cvtld r9,r1
587 extzv $7,$7,r1,r1
588 /*
589 * Compute the size of the shift needed.
590 */
591 subl3 r1,$32,r1
592 addl3 $32,r1,r6
593 /*
594 * Shift up the high order 64 bits of the
595 * product.
596 */
597 ashq r1,r8,r10
598 ashq r1,r7,r9
599 brb mult
600 /* p.15
601 *
602 * The following code sets the reduced
603 * argument to zero.
604 */
605 zero:
606 clrl r1
607 clrl r2
608 clrl r3
609 brw return
610 /* p.16
611 *
612 * At this point, r0 contains the octant number,
613 * r6 indicates the number of bits the fraction
614 * has been shifted, r5 indicates the sign of
615 * the fraction, r11/r10 contain the high order
616 * 64 bits of the fraction, and the condition
617 * codes indicate where the sign bit of r10
618 * is on. The following code multiplies the
619 * fraction by pi/2 .
620 */
621 mult:
622 /*
623 * Save r11/r10 in r4/r1 . -S.McD
624 */
625 movl r11,r4
626 movl r10,r1
627 /*
628 * If the sign bit of r10 is on, add 1 to r11 .
629 */
630 bgeq signoff6
631 incl r11
632 signoff6:
633 /* p.17
634 *
635 * Move pi/2 into r3/r2 .
636 */
637 movq $0xc90fdaa22168c235,r2
638 /*
639 * Multiply the fraction by the portion of pi/2
640 * in r2 .
641 */
642 emul r2,r10,$0,r7
643 emul r2,r11,r8,r7
644 /*
645 * Multiply the fraction by the portion of pi/2
646 * in r3 .
647 */
648 emul r3,r10,$0,r9
649 emul r3,r11,r10,r10
650 /*
651 * Add the product bits together.
652 */
653 addl2 r7,r9
654 adwc r8,r10
655 adwc $0,r11
656 /*
657 * Compensate for not sign extending r8 above.-S.McD
658 */
659 tstl r8
660 bgeq signoff6a
661 decl r11
662 signoff6a:
663 /*
664 * Compensate for r11/r10 being unsigned. -S.McD
665 */
666 addl2 r2,r10
667 adwc r3,r11
668 /*
669 * Compensate for r3/r2 being unsigned. -S.McD
670 */
671 addl2 r1,r10
672 adwc r4,r11
673 /* p.18
674 *
675 * If the sign bit of r11 is zero, shift the
676 * product bits up one bit and increment r6 .
677 */
678 blss signon
679 incl r6
680 ashq $1,r10,r10
681 tstl r9
682 bgeq signoff7
683 incl r10
684 signoff7:
685 signon:
686 /* p.19
687 *
688 * Shift the 56 most significant product
689 * bits into r9/r8 . The sign extension
690 * will be handled later.
691 */
692 ashq $-8,r10,r8
693 /*
694 * Convert the low order 8 bits of r10
695 * into an F-format number.
696 */
697 cvtbf r10,r3
698 /*
699 * If the result of the conversion was
700 * negative, add 1 to r9/r8 .
701 */
702 bgeq chop
703 incl r8
704 adwc $0,r9
705 /*
706 * If r9 is now zero, branch to special
707 * code to handle that possibility.
708 */
709 beql carryout
710 chop:
711 /* p.20
712 *
713 * Convert the number in r9/r8 into
714 * D-format number in r2/r1 .
715 */
716 rotl $16,r8,r2
717 rotl $16,r9,r1
718 /*
719 * Set the exponent field to the appropriate
720 * value. Note that the extra bits created by
721 * sign extension are now eliminated.
722 */
723 subw3 r6,$131,r6
724 insv r6,$7,$9,r1
725 /*
726 * Set the exponent field of the F-format
727 * number in r3 to the appropriate value.
728 */
729 tstf r3
730 beql return
731 /* extzv $7,$8,r3,r4 -S.McD */
732 extzv $7,$7,r3,r4
733 addw2 r4,r6
734 /* subw2 $217,r6 -S.McD */
735 subw2 $64,r6
736 insv r6,$7,$8,r3
737 brb return
738 /* p.21
739 *
740 * The following code generates the appropriate
741 * result for the unlikely possibility that
742 * rounding the number in r9/r8 resulted in
743 * a carry out.
744 */
745 carryout:
746 clrl r1
747 clrl r2
748 subw3 r6,$132,r6
749 insv r6,$7,$9,r1
750 tstf r3
751 beql return
752 extzv $7,$8,r3,r4
753 addw2 r4,r6
754 subw2 $218,r6
755 insv r6,$7,$8,r3
756 /* p.22
757 *
758 * The following code makes an needed
759 * adjustments to the signs of the
760 * results or to the octant number, and
761 * then returns.
762 */
763 return:
764 /*
765 * Test if the fraction was greater than or
766 * equal to 1/2 . If so, negate the reduced
767 * argument.
768 */
769 blbc r5,signoff8
770 mnegf r1,r1
771 mnegf r3,r3
772 signoff8:
773 /* p.23
774 *
775 * If the original argument was negative,
776 * negate the reduce argument and
777 * adjust the octant number.
778 */
779 tstw (sp)+
780 bgeq signoff9
781 mnegf r1,r1
782 mnegf r3,r3
783 /* subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD */
784 subb3 r0,$4,r0
785 signoff9:
786 /*
787 * Clear all unneeded octant bits.
788 *
789 * bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD */
790 bicb2 $0xfc,r0
791 /*
792 * Return.
793 */
794 rsb
795