arch/vax/n_argred.S

1.10  riastrad /*	$NetBSD: n_argred.S,v 1.10 2024/05/07 15:15:09 riastradh Exp $	*/
 1.1     ragge /*
 1.1     ragge  * Copyright (c) 1985, 1993
 1.1     ragge  *	The Regents of the University of California.  All rights reserved.
 1.1     ragge  *
 1.1     ragge  * Redistribution and use in source and binary forms, with or without
 1.1     ragge  * modification, are permitted provided that the following conditions
 1.1     ragge  * are met:
 1.1     ragge  * 1. Redistributions of source code must retain the above copyright
 1.1     ragge  *    notice, this list of conditions and the following disclaimer.
 1.1     ragge  * 2. Redistributions in binary form must reproduce the above copyright
 1.1     ragge  *    notice, this list of conditions and the following disclaimer in the
 1.1     ragge  *    documentation and/or other materials provided with the distribution.
 1.8       agc  * 3. Neither the name of the University nor the names of its contributors
 1.1     ragge  *    may be used to endorse or promote products derived from this software
 1.1     ragge  *    without specific prior written permission.
 1.1     ragge  *
 1.1     ragge  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 1.1     ragge  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 1.1     ragge  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 1.1     ragge  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 1.1     ragge  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 1.1     ragge  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 1.1     ragge  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 1.1     ragge  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 1.1     ragge  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 1.1     ragge  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 1.1     ragge  * SUCH DAMAGE.
 1.1     ragge  *
 1.1     ragge  *	@(#)argred.s	8.1 (Berkeley) 6/4/93
 1.1     ragge  */
 1.1     ragge
 1.5      matt #include <machine/asm.h>
 1.5      matt
 1.1     ragge /*
 1.1     ragge  *  libm$argred implements Bob Corbett's argument reduction and
 1.1     ragge  *  libm$sincos implements Peter Tang's double precision sin/cos.
 1.4    simonb  *
 1.1     ragge  *  Note: The two entry points libm$argred and libm$sincos are meant
 1.1     ragge  *        to be used only by _sin, _cos and _tan.
 1.1     ragge  *
 1.1     ragge  * method: true range reduction to [-pi/4,pi/4], P. Tang  &  B. Corbett
 1.1     ragge  * S. McDonald, April 4,  1985
 1.1     ragge  */
 1.1     ragge
 1.9      matt 	.hidden	__libm_argred
 1.5      matt ENTRY(__libm_argred, 0)
 1.1     ragge /*
 1.1     ragge  *  Compare the argument with the largest possible that can
 1.7      matt  *  be reduced by table lookup.  %r3 := |x|  will be used in  table_lookup .
 1.1     ragge  */
 1.7      matt 	movd	%r0,%r3
 1.1     ragge 	bgeq	abs1
 1.7      matt 	mnegd	%r3,%r3
 1.1     ragge abs1:
 1.7      matt 	cmpd	%r3,$0d+4.55530934770520019583e+01
 1.1     ragge 	blss	small_arg
 1.1     ragge 	jsb	trigred
 1.1     ragge 	rsb
 1.1     ragge small_arg:
 1.1     ragge 	jsb	table_lookup
 1.1     ragge 	rsb
 1.1     ragge /*
 1.1     ragge  *  At this point,
 1.7      matt  *	   %r0  contains the quadrant number, 0, 1, 2, or 3;
 1.7      matt  *	%r2/%r1  contains the reduced argument as a D-format number;
 1.7      matt  *  	   %r3  contains a F-format extension to the reduced argument;
 1.7      matt  *          %r4  contains a  0 or 1  corresponding to a  sin or cos  entry.
 1.1     ragge  */
1.10  riastrad END(__libm_argred)
 1.5      matt
 1.9      matt 	.hidden	__libm_sincos
 1.5      matt ENTRY(__libm_sincos, 0)
 1.1     ragge /*
 1.1     ragge  *  Compensate for a cosine entry by adding one to the quadrant number.
 1.1     ragge  */
 1.7      matt 	addl2	%r4,%r0
 1.1     ragge /*
 1.7      matt  *  Polyd clobbers  %r5-%r0 ;  save  X  in  %r7/%r6 .
 1.1     ragge  *  This can be avoided by rewriting  trigred .
 1.1     ragge  */
 1.7      matt 	movd	%r1,%r6
 1.1     ragge /*
 1.7      matt  *  Likewise, save  alpha  in  %r8 .
 1.1     ragge  *  This can be avoided by rewriting  trigred .
 1.1     ragge  */
 1.7      matt 	movf	%r3,%r8
 1.1     ragge /*
 1.1     ragge  *  Odd or even quadrant?  cosine if odd, sine otherwise.
 1.7      matt  *  Save  floor(quadrant/2) in  %r9  ; it determines the final sign.
 1.1     ragge  */
 1.7      matt 	rotl	$-1,%r0,%r9
 1.1     ragge 	blss	cosine
 1.1     ragge sine:
 1.7      matt 	muld2	%r1,%r1		# Xsq = X * X
 1.7      matt 	cmpw	$0x2480,%r1	# [zl] Xsq > 2^-56?
 1.1     ragge 	blss	1f		# [zl] yes, go ahead and do polyd
 1.7      matt 	clrq	%r1		# [zl] work around 11/780 FPA polyd bug
 1.1     ragge 1:
 1.7      matt 	polyd	%r1,$7,sin_coef	# Q = P(Xsq) , of deg 7
 1.7      matt 	mulf3	$0f3.0,%r8,%r4	# beta = 3 * alpha
 1.7      matt 	mulf2	%r0,%r4		# beta = Q * beta
 1.7      matt 	addf2	%r8,%r4		# beta = alpha + beta
 1.7      matt 	muld2	%r6,%r0		# S(X) = X * Q
 1.7      matt /*	cvtfd	%r4,%r4		... %r5 = 0 after a polyd. */
 1.7      matt 	addd2	%r4,%r0		# S(X) = beta + S(X)
 1.7      matt 	addd2	%r6,%r0		# S(X) = X + S(X)
 1.5      matt 	jbr	done
 1.1     ragge cosine:
 1.7      matt 	muld2	%r6,%r6		# Xsq = X * X
 1.1     ragge 	beql	zero_arg
 1.7      matt 	mulf2	%r1,%r8		# beta = X * alpha
 1.7      matt 	polyd	%r6,$7,cos_coef	/* Q = P'(Xsq) , of deg 7 */
 1.7      matt 	subd3	%r0,%r8,%r0	# beta = beta - Q
 1.7      matt 	subw2	$0x80,%r6	# Xsq = Xsq / 2
 1.7      matt 	addd2	%r0,%r6		# Xsq = Xsq + beta
 1.1     ragge zero_arg:
 1.7      matt 	subd3	%r6,$0d1.0,%r0	# C(X) = 1 - Xsq
 1.1     ragge done:
 1.7      matt 	blbc	%r9,even
 1.7      matt 	mnegd	%r0,%r0
 1.1     ragge even:
 1.1     ragge 	rsb
1.10  riastrad END(__libm_sincos)
 1.1     ragge
 1.6      matt #ifdef __ELF__
 1.6      matt 	.section .rodata
 1.6      matt #else
 1.6      matt 	.text
 1.6      matt #endif
 1.5      matt 	_ALIGN_TEXT
 1.1     ragge
 1.1     ragge sin_coef:
 1.1     ragge 	.double	0d-7.53080332264191085773e-13	# s7 = 2^-29 -1.a7f2504ffc49f8..
 1.1     ragge 	.double	0d+1.60573519267703489121e-10	# s6 = 2^-21  1.611adaede473c8..
 1.1     ragge 	.double	0d-2.50520965150706067211e-08	# s5 = 2^-1a -1.ae644921ed8382..
 1.1     ragge 	.double	0d+2.75573191800593885716e-06	# s4 = 2^-13  1.71de3a4b884278..
 1.1     ragge 	.double	0d-1.98412698411850507950e-04	# s3 = 2^-0d -1.a01a01a0125e7d..
 1.1     ragge 	.double	0d+8.33333333333325688985e-03	# s2 = 2^-07  1.11111111110e50
 1.1     ragge 	.double	0d-1.66666666666666664354e-01	# s1 = 2^-03 -1.55555555555554
 1.1     ragge 	.double	0d+0.00000000000000000000e+00	# s0 = 0
 1.1     ragge
 1.1     ragge cos_coef:
 1.1     ragge 	.double	0d-1.13006966202629430300e-11	# s7 = 2^-25 -1.8D9BA04D1374BE..
 1.1     ragge 	.double	0d+2.08746646574796004700e-09	# s6 = 2^-1D  1.1EE632650350BA..
 1.1     ragge 	.double	0d-2.75573073031284417300e-07	# s5 = 2^-16 -1.27E4F31411719E..
 1.1     ragge 	.double	0d+2.48015872682668025200e-05	# s4 = 2^-10  1.A01A0196B902E8..
 1.1     ragge 	.double	0d-1.38888888888464709200e-03	# s3 = 2^-0A -1.6C16C16C11FACE..
 1.1     ragge 	.double	0d+4.16666666666664761400e-02	# s2 = 2^-05  1.5555555555539E
 1.1     ragge 	.double	0d+0.00000000000000000000e+00	# s1 = 0
 1.1     ragge 	.double	0d+0.00000000000000000000e+00	# s0 = 0
 1.1     ragge
 1.1     ragge /*
 1.1     ragge  *  Multiples of  pi/2  expressed as the sum of three doubles,
 1.1     ragge  *
 1.1     ragge  *  trailing:	n * pi/2 ,  n = 0, 1, 2, ..., 29
 1.1     ragge  *			trailing[n] ,
 1.1     ragge  *
 1.1     ragge  *  middle:	n * pi/2 ,  n = 0, 1, 2, ..., 29
 1.1     ragge  *			middle[n]   ,
 1.1     ragge  *
 1.1     ragge  *  leading:	n * pi/2 ,  n = 0, 1, 2, ..., 29
 1.1     ragge  *			leading[n]  ,
 1.1     ragge  *
 1.1     ragge  *	where
 1.1     ragge  *		leading[n]  := (n * pi/2)  rounded,
 1.1     ragge  *		middle[n]   := (n * pi/2  -  leading[n])  rounded,
 1.1     ragge  *		trailing[n] := (( n * pi/2 - leading[n]) - middle[n])  rounded .
 1.1     ragge  */
 1.1     ragge trailing:
 1.1     ragge 	.double	0d+0.00000000000000000000e+00	#  0 * pi/2  trailing
 1.1     ragge 	.double	0d+4.33590506506189049611e-35	#  1 * pi/2  trailing
 1.1     ragge 	.double	0d+8.67181013012378099223e-35	#  2 * pi/2  trailing
 1.1     ragge 	.double	0d+1.30077151951856714215e-34	#  3 * pi/2  trailing
 1.1     ragge 	.double	0d+1.73436202602475619845e-34	#  4 * pi/2  trailing
 1.1     ragge 	.double	0d-1.68390735624352669192e-34	#  5 * pi/2  trailing
 1.1     ragge 	.double	0d+2.60154303903713428430e-34	#  6 * pi/2  trailing
 1.1     ragge 	.double	0d-8.16726343231148352150e-35	#  7 * pi/2  trailing
 1.1     ragge 	.double	0d+3.46872405204951239689e-34	#  8 * pi/2  trailing
 1.1     ragge 	.double	0d+3.90231455855570147991e-34	#  9 * pi/2  trailing
 1.1     ragge 	.double	0d-3.36781471248705338384e-34	# 10 * pi/2  trailing
 1.1     ragge 	.double	0d-1.06379439835298071785e-33	# 11 * pi/2  trailing
 1.1     ragge 	.double	0d+5.20308607807426856861e-34	# 12 * pi/2  trailing
 1.1     ragge 	.double	0d+5.63667658458045770509e-34	# 13 * pi/2  trailing
 1.1     ragge 	.double	0d-1.63345268646229670430e-34	# 14 * pi/2  trailing
 1.1     ragge 	.double	0d-1.19986217995610764801e-34	# 15 * pi/2  trailing
 1.1     ragge 	.double	0d+6.93744810409902479378e-34	# 16 * pi/2  trailing
 1.1     ragge 	.double	0d-8.03640094449267300110e-34	# 17 * pi/2  trailing
 1.1     ragge 	.double	0d+7.80462911711140295982e-34	# 18 * pi/2  trailing
 1.1     ragge 	.double	0d-7.16921993148029483506e-34	# 19 * pi/2  trailing
 1.1     ragge 	.double	0d-6.73562942497410676769e-34	# 20 * pi/2  trailing
 1.1     ragge 	.double	0d-6.30203891846791677593e-34	# 21 * pi/2  trailing
 1.1     ragge 	.double	0d-2.12758879670596143570e-33	# 22 * pi/2  trailing
 1.1     ragge 	.double	0d+2.53800212047402350390e-33	# 23 * pi/2  trailing
 1.1     ragge 	.double	0d+1.04061721561485371372e-33	# 24 * pi/2  trailing
 1.1     ragge 	.double	0d+6.11729905311472319056e-32	# 25 * pi/2  trailing
 1.1     ragge 	.double	0d+1.12733531691609154102e-33	# 26 * pi/2  trailing
 1.1     ragge 	.double	0d-3.70049587943078297272e-34	# 27 * pi/2  trailing
 1.1     ragge 	.double	0d-3.26690537292459340860e-34	# 28 * pi/2  trailing
 1.1     ragge 	.double	0d-1.14812616507957271361e-34	# 29 * pi/2  trailing
 1.1     ragge
 1.1     ragge middle:
 1.1     ragge 	.double	0d+0.00000000000000000000e+00	#  0 * pi/2  middle
 1.1     ragge 	.double	0d+5.72118872610983179676e-18	#  1 * pi/2  middle
 1.1     ragge 	.double	0d+1.14423774522196635935e-17	#  2 * pi/2  middle
 1.1     ragge 	.double	0d-3.83475850529283316309e-17	#  3 * pi/2  middle
 1.1     ragge 	.double	0d+2.28847549044393271871e-17	#  4 * pi/2  middle
 1.1     ragge 	.double	0d-2.69052076007086676522e-17	#  5 * pi/2  middle
 1.1     ragge 	.double	0d-7.66951701058566632618e-17	#  6 * pi/2  middle
 1.1     ragge 	.double	0d-1.54628301484890040587e-17	#  7 * pi/2  middle
 1.1     ragge 	.double	0d+4.57695098088786543741e-17	#  8 * pi/2  middle
 1.1     ragge 	.double	0d+1.07001849766246313192e-16	#  9 * pi/2  middle
 1.1     ragge 	.double	0d-5.38104152014173353044e-17	# 10 * pi/2  middle
 1.1     ragge 	.double	0d-2.14622680169080983801e-16	# 11 * pi/2  middle
 1.1     ragge 	.double	0d-1.53390340211713326524e-16	# 12 * pi/2  middle
 1.1     ragge 	.double	0d-9.21580002543456677056e-17	# 13 * pi/2  middle
 1.1     ragge 	.double	0d-3.09256602969780081173e-17	# 14 * pi/2  middle
 1.1     ragge 	.double	0d+3.03066796603896507006e-17	# 15 * pi/2  middle
 1.1     ragge 	.double	0d+9.15390196177573087482e-17	# 16 * pi/2  middle
 1.1     ragge 	.double	0d+1.52771359575124969107e-16	# 17 * pi/2  middle
 1.1     ragge 	.double	0d+2.14003699532492626384e-16	# 18 * pi/2  middle
 1.1     ragge 	.double	0d-1.68853170360202329427e-16	# 19 * pi/2  middle
 1.1     ragge 	.double	0d-1.07620830402834670609e-16	# 20 * pi/2  middle
 1.1     ragge 	.double	0d+3.97700719404595604379e-16	# 21 * pi/2  middle
 1.1     ragge 	.double	0d-4.29245360338161967602e-16	# 22 * pi/2  middle
 1.1     ragge 	.double	0d-3.68013020380794313406e-16	# 23 * pi/2  middle
 1.1     ragge 	.double	0d-3.06780680423426653047e-16	# 24 * pi/2  middle
 1.1     ragge 	.double	0d-2.45548340466059054318e-16	# 25 * pi/2  middle
 1.1     ragge 	.double	0d-1.84316000508691335411e-16	# 26 * pi/2  middle
 1.1     ragge 	.double	0d-1.23083660551323675053e-16	# 27 * pi/2  middle
 1.1     ragge 	.double	0d-6.18513205939560162346e-17	# 28 * pi/2  middle
 1.1     ragge 	.double	0d-6.18980636588357585202e-19	# 29 * pi/2  middle
 1.1     ragge
 1.1     ragge leading:
 1.1     ragge 	.double	0d+0.00000000000000000000e+00	#  0 * pi/2  leading
 1.1     ragge 	.double	0d+1.57079632679489661351e+00	#  1 * pi/2  leading
 1.1     ragge 	.double	0d+3.14159265358979322702e+00	#  2 * pi/2  leading
 1.1     ragge 	.double	0d+4.71238898038468989604e+00	#  3 * pi/2  leading
 1.1     ragge 	.double	0d+6.28318530717958645404e+00	#  4 * pi/2  leading
 1.1     ragge 	.double	0d+7.85398163397448312306e+00	#  5 * pi/2  leading
 1.1     ragge 	.double	0d+9.42477796076937979208e+00	#  6 * pi/2  leading
 1.1     ragge 	.double	0d+1.09955742875642763501e+01	#  7 * pi/2  leading
 1.1     ragge 	.double	0d+1.25663706143591729081e+01	#  8 * pi/2  leading
 1.1     ragge 	.double	0d+1.41371669411540694661e+01	#  9 * pi/2  leading
 1.1     ragge 	.double	0d+1.57079632679489662461e+01	# 10 * pi/2  leading
 1.1     ragge 	.double	0d+1.72787595947438630262e+01	# 11 * pi/2  leading
 1.1     ragge 	.double	0d+1.88495559215387595842e+01	# 12 * pi/2  leading
 1.1     ragge 	.double	0d+2.04203522483336561422e+01	# 13 * pi/2  leading
 1.1     ragge 	.double	0d+2.19911485751285527002e+01	# 14 * pi/2  leading
 1.1     ragge 	.double	0d+2.35619449019234492582e+01	# 15 * pi/2  leading
 1.1     ragge 	.double	0d+2.51327412287183458162e+01	# 16 * pi/2  leading
 1.1     ragge 	.double	0d+2.67035375555132423742e+01	# 17 * pi/2  leading
 1.1     ragge 	.double	0d+2.82743338823081389322e+01	# 18 * pi/2  leading
 1.1     ragge 	.double	0d+2.98451302091030359342e+01	# 19 * pi/2  leading
 1.1     ragge 	.double	0d+3.14159265358979324922e+01	# 20 * pi/2  leading
 1.1     ragge 	.double	0d+3.29867228626928286062e+01	# 21 * pi/2  leading
 1.1     ragge 	.double	0d+3.45575191894877260523e+01	# 22 * pi/2  leading
 1.1     ragge 	.double	0d+3.61283155162826226103e+01	# 23 * pi/2  leading
 1.1     ragge 	.double	0d+3.76991118430775191683e+01	# 24 * pi/2  leading
 1.1     ragge 	.double	0d+3.92699081698724157263e+01	# 25 * pi/2  leading
 1.1     ragge 	.double	0d+4.08407044966673122843e+01	# 26 * pi/2  leading
 1.1     ragge 	.double	0d+4.24115008234622088423e+01	# 27 * pi/2  leading
 1.1     ragge 	.double	0d+4.39822971502571054003e+01	# 28 * pi/2  leading
 1.1     ragge 	.double	0d+4.55530934770520019583e+01	# 29 * pi/2  leading
 1.1     ragge
 1.1     ragge twoOverPi:
 1.1     ragge 	.double	0d+6.36619772367581343076e-01
 1.5      matt
 1.1     ragge 	.text
 1.5      matt 	_ALIGN_TEXT
 1.1     ragge
 1.1     ragge table_lookup:
 1.7      matt 	muld3	%r3,twoOverPi,%r0
 1.7      matt 	cvtrdl	%r0,%r0			# n = nearest int to ((2/pi)*|x|) rnded
 1.7      matt 	subd2	leading[%r0],%r3		# p = (|x| - leading n*pi/2) exactly
 1.7      matt 	subd3	middle[%r0],%r3,%r1	# q = (p - middle  n*pi/2) rounded
 1.7      matt 	subd2	%r1,%r3			# r = (p - q)
 1.7      matt 	subd2	middle[%r0],%r3		# r =  r - middle  n*pi/2
 1.7      matt 	subd2	trailing[%r0],%r3		# r =  r - trailing n*pi/2  rounded
 1.1     ragge /*
 1.1     ragge  *  If the original argument was negative,
 1.1     ragge  *  negate the reduce argument and
 1.1     ragge  *  adjust the octant/quadrant number.
 1.1     ragge  */
 1.7      matt 	tstw	4(%ap)
 1.1     ragge 	bgeq	abs2
 1.7      matt 	mnegf	%r1,%r1
 1.7      matt 	mnegf	%r3,%r3
 1.7      matt /*	subb3	%r0,$8,%r0	...used for  pi/4  reduction -S.McD */
 1.7      matt 	subb3	%r0,$4,%r0
 1.1     ragge abs2:
 1.1     ragge /*
 1.1     ragge  *  Clear all unneeded octant/quadrant bits.
 1.1     ragge  */
 1.7      matt /*	bicb2	$0xf8,%r0	...used for  pi/4  reduction -S.McD */
 1.7      matt 	bicb2	$0xfc,%r0
 1.1     ragge 	rsb
 1.1     ragge /*
 1.1     ragge  *						p.0
 1.1     ragge  */
 1.6      matt #ifdef __ELF__
 1.6      matt 	.section .rodata
 1.6      matt #else
 1.1     ragge 	.text
 1.6      matt #endif
 1.5      matt 	_ALIGN_TEXT
 1.1     ragge /*
 1.1     ragge  * Only 256 (actually 225) bits of 2/pi are needed for VAX double
 1.1     ragge  * precision; this was determined by enumerating all the nearest
 1.1     ragge  * machine integer multiples of pi/2 using continued fractions.
 1.1     ragge  * (8a8d3673775b7ff7 required the most bits.)		-S.McD
 1.1     ragge  */
 1.1     ragge 	.long	0
 1.1     ragge 	.long	0
 1.1     ragge 	.long	0xaef1586d
 1.1     ragge 	.long	0x9458eaf7
 1.1     ragge 	.long	0x10e4107f
 1.1     ragge 	.long	0xd8a5664f
 1.1     ragge 	.long	0x4d377036
 1.1     ragge 	.long	0x09d5f47d
 1.1     ragge 	.long	0x91054a7f
 1.1     ragge 	.long	0xbe60db93
 1.1     ragge bits2opi:
 1.1     ragge 	.long	0x00000028
 1.1     ragge 	.long	0
 1.1     ragge /*
 1.1     ragge  *  Note: wherever you see the word `octant', read `quadrant'.
 1.1     ragge  *  Currently this code is set up for  pi/2  argument reduction.
 1.1     ragge  *  By uncommenting/commenting the appropriate lines, it will
 1.1     ragge  *  also serve as a  pi/4  argument reduction code.
 1.1     ragge  */
 1.6      matt 	.text
 1.1     ragge
 1.1     ragge /*						p.1
 1.1     ragge  *  Trigred  preforms argument reduction
 1.1     ragge  *  for the trigonometric functions.  It
 1.1     ragge  *  takes one input argument, a D-format
 1.7      matt  *  number in  %r1/%r0 .  The magnitude of
 1.1     ragge  *  the input argument must be greater
 1.1     ragge  *  than or equal to  1/2 .  Trigred produces
 1.1     ragge  *  three results:  the number of the octant
 1.4    simonb  *  occupied by the argument, the reduced
 1.1     ragge  *  argument, and an extension of the
 1.4    simonb  *  reduced argument.  The octant number is
 1.7      matt  *  returned in  %r0 .  The reduced argument
 1.4    simonb  *  is returned as a D-format number in
 1.7      matt  *  %r2/%r1 .  An 8 bit extension of the
 1.4    simonb  *  reduced argument is returned as an
 1.7      matt  *  F-format number in %r3.
 1.1     ragge  *						p.2
 1.1     ragge  */
 1.1     ragge trigred:
 1.1     ragge /*
 1.1     ragge  *  Save the sign of the input argument.
 1.1     ragge  */
 1.7      matt 	movw	%r0,-(%sp)
 1.1     ragge /*
 1.1     ragge  *  Extract the exponent field.
 1.1     ragge  */
 1.7      matt 	extzv	$7,$7,%r0,%r2
 1.1     ragge /*
 1.1     ragge  *  Convert the fraction part of the input
 1.1     ragge  *  argument into a quadword integer.
 1.1     ragge  */
 1.7      matt 	bicw2	$0xff80,%r0
 1.7      matt 	bisb2	$0x80,%r0	# -S.McD
 1.7      matt 	rotl	$16,%r0,%r0
 1.7      matt 	rotl	$16,%r1,%r1
 1.1     ragge /*
 1.7      matt  *  If  %r1  is negative, add  1  to  %r0 .  This
 1.1     ragge  *  adjustment is made so that the two's
 1.1     ragge  *  complement multiplications done later
 1.1     ragge  *  will produce unsigned results.
 1.1     ragge  */
 1.1     ragge 	bgeq	posmid
 1.7      matt 	incl	%r0
 1.1     ragge posmid:
 1.1     ragge /*						p.3
 1.1     ragge  *
 1.7      matt  *  Set  %r3  to the address of the first quadword
 1.1     ragge  *  used to obtain the needed portion of  2/pi .
 1.1     ragge  *  The address is longword aligned to ensure
 1.1     ragge  *  efficient access.
 1.1     ragge  */
 1.7      matt 	ashl	$-3,%r2,%r3
 1.7      matt 	bicb2	$3,%r3
 1.7      matt 	mnegl	%r3,%r3
 1.7      matt 	movab	bits2opi[%r3],%r3
 1.1     ragge /*
 1.7      matt  *  Set  %r2  to the size of the shift needed to
 1.1     ragge  *  obtain the correct portion of  2/pi .
 1.1     ragge  */
 1.7      matt 	bicb2	$0xe0,%r2
 1.1     ragge /*						p.4
 1.1     ragge  *
 1.1     ragge  *  Move the needed  128  bits of  2/pi  into
 1.7      matt  *  %r11 - %r8 .  Adjust the numbers to allow
 1.1     ragge  *  for unsigned multiplication.
 1.1     ragge  */
 1.7      matt 	ashq	%r2,(%r3),%r10
 1.1     ragge
 1.7      matt 	subl2	$4,%r3
 1.7      matt 	ashq	%r2,(%r3),%r9
 1.1     ragge 	bgeq	signoff1
 1.7      matt 	incl	%r11
 1.1     ragge signoff1:
 1.7      matt 	subl2	$4,%r3
 1.7      matt 	ashq	%r2,(%r3),%r8
 1.1     ragge 	bgeq	signoff2
 1.7      matt 	incl	%r10
 1.1     ragge signoff2:
 1.7      matt 	subl2	$4,%r3
 1.7      matt 	ashq	%r2,(%r3),%r7
 1.1     ragge 	bgeq	signoff3
 1.7      matt 	incl	%r9
 1.1     ragge signoff3:
 1.1     ragge /*						p.5
 1.1     ragge  *
 1.7      matt  *  Multiply the contents of  %r0/%r1  by the
 1.7      matt  *  slice of  2/pi  in  %r11 - %r8 .
 1.1     ragge  */
 1.7      matt 	emul	%r0,%r8,$0,%r4
 1.7      matt 	emul	%r0,%r9,%r5,%r5
 1.7      matt 	emul	%r0,%r10,%r6,%r6
 1.7      matt
 1.7      matt 	emul	%r1,%r8,$0,%r7
 1.7      matt 	emul	%r1,%r9,%r8,%r8
 1.7      matt 	emul	%r1,%r10,%r9,%r9
 1.7      matt 	emul	%r1,%r11,%r10,%r10
 1.7      matt
 1.7      matt 	addl2	%r4,%r8
 1.7      matt 	adwc	%r5,%r9
 1.7      matt 	adwc	%r6,%r10
 1.1     ragge /*						p.6
 1.1     ragge  *
 1.1     ragge  *  If there are more than five leading zeros
 1.1     ragge  *  after the first two quotient bits or if there
 1.1     ragge  *  are more than five leading ones after the first
 1.1     ragge  *  two quotient bits, generate more fraction bits.
 1.1     ragge  *  Otherwise, branch to code to produce the result.
 1.1     ragge  */
 1.7      matt 	bicl3	$0xc1ffffff,%r10,%r4
 1.1     ragge 	beql	more1
 1.7      matt 	cmpl	$0x3e000000,%r4
 1.1     ragge 	bneq	result
 1.1     ragge more1:
 1.1     ragge /*						p.7
 1.1     ragge  *
 1.1     ragge  *  generate another  32  result bits.
 1.1     ragge  */
 1.7      matt 	subl2	$4,%r3
 1.7      matt 	ashq	%r2,(%r3),%r5
 1.1     ragge 	bgeq	signoff4
 1.1     ragge
 1.7      matt 	emul	%r1,%r6,$0,%r4
 1.7      matt 	addl2	%r1,%r5
 1.7      matt 	emul	%r0,%r6,%r5,%r5
 1.7      matt 	addl2	%r0,%r6
 1.5      matt 	jbr	addbits1
 1.1     ragge
 1.1     ragge signoff4:
 1.7      matt 	emul	%r1,%r6,$0,%r4
 1.7      matt 	emul	%r0,%r6,%r5,%r5
 1.1     ragge
 1.1     ragge addbits1:
 1.7      matt 	addl2	%r5,%r7
 1.7      matt 	adwc	%r6,%r8
 1.7      matt 	adwc	$0,%r9
 1.7      matt 	adwc	$0,%r10
 1.1     ragge /*						p.8
 1.1     ragge  *
 1.1     ragge  *  Check for massive cancellation.
 1.1     ragge  */
 1.7      matt 	bicl3	$0xc0000000,%r10,%r6
 1.1     ragge /*	bneq	more2			-S.McD  Test was backwards */
 1.1     ragge 	beql	more2
 1.7      matt 	cmpl	$0x3fffffff,%r6
 1.1     ragge 	bneq	result
 1.1     ragge more2:
 1.1     ragge /*						p.9
 1.1     ragge  *
 1.1     ragge  *  If massive cancellation has occurred,
 1.1     ragge  *  generate another  24  result bits.
 1.4    simonb  *  Testing has shown there will always be
 1.1     ragge  *  enough bits after this point.
 1.1     ragge  */
 1.7      matt 	subl2	$4,%r3
 1.7      matt 	ashq	%r2,(%r3),%r5
 1.1     ragge 	bgeq	signoff5
 1.1     ragge
 1.7      matt 	emul	%r0,%r6,%r4,%r5
 1.7      matt 	addl2	%r0,%r6
 1.5      matt 	jbr	addbits2
 1.1     ragge
 1.1     ragge signoff5:
 1.7      matt 	emul	%r0,%r6,%r4,%r5
 1.1     ragge
 1.1     ragge addbits2:
 1.7      matt 	addl2	%r6,%r7
 1.7      matt 	adwc	$0,%r8
 1.7      matt 	adwc	$0,%r9
 1.7      matt 	adwc	$0,%r10
 1.1     ragge /*						p.10
 1.1     ragge  *
 1.1     ragge  *  The following code produces the reduced
 1.1     ragge  *  argument from the product bits contained
 1.7      matt  *  in  %r10 - %r7 .
 1.1     ragge  */
 1.1     ragge result:
 1.1     ragge /*
 1.7      matt  *  Extract the octant number from  %r10 .
 1.1     ragge  */
 1.7      matt /*	extzv	$29,$3,%r10,%r0	...used for  pi/4  reduction -S.McD */
 1.7      matt 	extzv	$30,$2,%r10,%r0
 1.1     ragge /*
 1.7      matt  *  Clear the octant bits in  %r10 .
 1.1     ragge  */
 1.7      matt /*	bicl2	$0xe0000000,%r10	...used for  pi/4  reduction -S.McD */
 1.7      matt 	bicl2	$0xc0000000,%r10
 1.1     ragge /*
 1.1     ragge  *  Zero the sign flag.
 1.1     ragge  */
 1.7      matt 	clrl	%r5
 1.1     ragge /*						p.11
 1.1     ragge  *
 1.1     ragge  *  Check to see if the fraction is greater than
 1.4    simonb  *  or equal to one-half.  If it is, add one
 1.1     ragge  *  to the octant number, set the sign flag
 1.1     ragge  *  on, and replace the fraction with  1 minus
 1.1     ragge  *  the fraction.
 1.1     ragge  */
 1.7      matt /*	bitl	$0x10000000,%r10		...used for  pi/4  reduction -S.McD */
 1.7      matt 	bitl	$0x20000000,%r10
 1.1     ragge 	beql	small
 1.7      matt 	incl	%r0
 1.7      matt 	incl	%r5
 1.7      matt /*	subl3	%r10,$0x1fffffff,%r10	...used for  pi/4  reduction -S.McD */
 1.7      matt 	subl3	%r10,$0x3fffffff,%r10
 1.7      matt 	mcoml	%r9,%r9
 1.7      matt 	mcoml	%r8,%r8
 1.7      matt 	mcoml	%r7,%r7
 1.1     ragge small:
 1.1     ragge /*						p.12
 1.1     ragge  *
 1.1     ragge  *  Test whether the first  29  bits of the ...used for  pi/4  reduction -S.McD
 1.4    simonb  *  Test whether the first  30  bits of the
 1.1     ragge  *  fraction are zero.
 1.1     ragge  */
 1.7      matt 	tstl	%r10
 1.1     ragge 	beql	tiny
 1.1     ragge /*
 1.7      matt  *  Find the position of the first one bit in  %r10 .
 1.1     ragge  */
 1.7      matt 	cvtld	%r10,%r1
 1.7      matt 	extzv	$7,$7,%r1,%r1
 1.1     ragge /*
 1.1     ragge  *  Compute the size of the shift needed.
 1.1     ragge  */
 1.7      matt 	subl3	%r1,$32,%r6
 1.1     ragge /*
 1.1     ragge  *  Shift up the high order  64  bits of the
 1.1     ragge  *  product.
 1.1     ragge  */
 1.7      matt 	ashq	%r6,%r9,%r10
 1.7      matt 	ashq	%r6,%r8,%r9
 1.5      matt 	jbr	mult
 1.1     ragge /*						p.13
 1.1     ragge  *
 1.7      matt  *  Test to see if the sign bit of  %r9  is on.
 1.1     ragge  */
 1.1     ragge tiny:
 1.7      matt 	tstl	%r9
 1.1     ragge 	bgeq	tinier
 1.1     ragge /*
 1.1     ragge  *  If it is, shift the product bits up  32  bits.
 1.1     ragge  */
 1.7      matt 	movl	$32,%r6
 1.7      matt 	movq	%r8,%r10
 1.7      matt 	tstl	%r10
 1.5      matt 	jbr	mult
 1.1     ragge /*						p.14
 1.1     ragge  *
 1.7      matt  *  Test whether  %r9  is zero.  It is probably
 1.7      matt  *  impossible for both  %r10  and  %r9  to be
 1.1     ragge  *  zero, but until proven to be so, the test
 1.1     ragge  *  must be made.
 1.1     ragge  */
 1.1     ragge tinier:
 1.1     ragge 	beql	zero
 1.1     ragge /*
 1.7      matt  *  Find the position of the first one bit in  %r9 .
 1.1     ragge  */
 1.7      matt 	cvtld	%r9,%r1
 1.7      matt 	extzv	$7,$7,%r1,%r1
 1.1     ragge /*
 1.1     ragge  *  Compute the size of the shift needed.
 1.1     ragge  */
 1.7      matt 	subl3	%r1,$32,%r1
 1.7      matt 	addl3	$32,%r1,%r6
 1.1     ragge /*
 1.1     ragge  *  Shift up the high order  64  bits of the
 1.1     ragge  *  product.
 1.1     ragge  */
 1.7      matt 	ashq	%r1,%r8,%r10
 1.7      matt 	ashq	%r1,%r7,%r9
 1.5      matt 	jbr	mult
 1.1     ragge /*						p.15
 1.1     ragge  *
 1.1     ragge  *  The following code sets the reduced
 1.1     ragge  *  argument to zero.
 1.1     ragge  */
 1.1     ragge zero:
 1.7      matt 	clrl	%r1
 1.7      matt 	clrl	%r2
 1.7      matt 	clrl	%r3
 1.5      matt 	jbr	return
 1.1     ragge /*						p.16
 1.1     ragge  *
 1.7      matt  *  At this point,  %r0  contains the octant number,
 1.7      matt  *  %r6  indicates the number of bits the fraction
 1.7      matt  *  has been shifted,  %r5  indicates the sign of
 1.7      matt  *  the fraction,  %r11/%r10  contain the high order
 1.1     ragge  *  64  bits of the fraction, and the condition
 1.7      matt  *  codes indicate where the sign bit of  %r10
 1.1     ragge  *  is on.  The following code multiplies the
 1.1     ragge  *  fraction by  pi/2 .
 1.1     ragge  */
 1.1     ragge mult:
 1.1     ragge /*
 1.7      matt  *  Save  %r11/%r10  in  %r4/%r1 .		-S.McD
 1.1     ragge  */
 1.7      matt 	movl	%r11,%r4
 1.7      matt 	movl	%r10,%r1
 1.1     ragge /*
 1.7      matt  *  If the sign bit of  %r10  is on, add  1  to  %r11 .
 1.1     ragge  */
 1.1     ragge 	bgeq	signoff6
 1.7      matt 	incl	%r11
 1.1     ragge signoff6:
 1.1     ragge /*						p.17
 1.1     ragge  *
 1.7      matt  *  Move  pi/2  into  %r3/%r2 .
 1.1     ragge  */
 1.7      matt 	movq	$0xc90fdaa22168c235,%r2
 1.1     ragge /*
 1.1     ragge  *  Multiply the fraction by the portion of  pi/2
 1.7      matt  *  in  %r2 .
 1.1     ragge  */
 1.7      matt 	emul	%r2,%r10,$0,%r7
 1.7      matt 	emul	%r2,%r11,%r8,%r7
 1.1     ragge /*
 1.4    simonb  *  Multiply the fraction by the portion of  pi/2
 1.7      matt  *  in  %r3 .
 1.1     ragge  */
 1.7      matt 	emul	%r3,%r10,$0,%r9
 1.7      matt 	emul	%r3,%r11,%r10,%r10
 1.1     ragge /*
 1.1     ragge  *  Add the product bits together.
 1.1     ragge  */
 1.7      matt 	addl2	%r7,%r9
 1.7      matt 	adwc	%r8,%r10
 1.7      matt 	adwc	$0,%r11
 1.1     ragge /*
 1.7      matt  *  Compensate for not sign extending  %r8  above.-S.McD
 1.1     ragge  */
 1.7      matt 	tstl	%r8
 1.1     ragge 	bgeq	signoff6a
 1.7      matt 	decl	%r11
 1.1     ragge signoff6a:
 1.1     ragge /*
 1.7      matt  *  Compensate for  %r11/%r10  being unsigned.	-S.McD
 1.1     ragge  */
 1.7      matt 	addl2	%r2,%r10
 1.7      matt 	adwc	%r3,%r11
 1.1     ragge /*
 1.7      matt  *  Compensate for  %r3/%r2  being unsigned.	-S.McD
 1.1     ragge  */
 1.7      matt 	addl2	%r1,%r10
 1.7      matt 	adwc	%r4,%r11
 1.1     ragge /*						p.18
 1.1     ragge  *
 1.7      matt  *  If the sign bit of  %r11  is zero, shift the
 1.7      matt  *  product bits up one bit and increment  %r6 .
 1.1     ragge  */
 1.1     ragge 	blss	signon
 1.7      matt 	incl	%r6
 1.7      matt 	ashq	$1,%r10,%r10
 1.7      matt 	tstl	%r9
 1.1     ragge 	bgeq	signoff7
 1.7      matt 	incl	%r10
 1.1     ragge signoff7:
 1.1     ragge signon:
 1.1     ragge /*						p.19
 1.1     ragge  *
 1.1     ragge  *  Shift the  56  most significant product
 1.7      matt  *  bits into  %r9/%r8 .  The sign extension
 1.1     ragge  *  will be handled later.
 1.1     ragge  */
 1.7      matt 	ashq	$-8,%r10,%r8
 1.1     ragge /*
 1.7      matt  *  Convert the low order  8  bits of  %r10
 1.1     ragge  *  into an F-format number.
 1.1     ragge  */
 1.7      matt 	cvtbf	%r10,%r3
 1.1     ragge /*
 1.1     ragge  *  If the result of the conversion was
 1.7      matt  *  negative, add  1  to  %r9/%r8 .
 1.1     ragge  */
 1.1     ragge 	bgeq	chop
 1.7      matt 	incl	%r8
 1.7      matt 	adwc	$0,%r9
 1.1     ragge /*
 1.7      matt  *  If  %r9  is now zero, branch to special
 1.1     ragge  *  code to handle that possibility.
 1.1     ragge  */
 1.1     ragge 	beql	carryout
 1.1     ragge chop:
 1.1     ragge /*						p.20
 1.1     ragge  *
 1.7      matt  *  Convert the number in  %r9/%r8  into
 1.7      matt  *  D-format number in  %r2/%r1 .
 1.1     ragge  */
 1.7      matt 	rotl	$16,%r8,%r2
 1.7      matt 	rotl	$16,%r9,%r1
 1.1     ragge /*
 1.1     ragge  *  Set the exponent field to the appropriate
 1.1     ragge  *  value.  Note that the extra bits created by
 1.1     ragge  *  sign extension are now eliminated.
 1.1     ragge  */
 1.7      matt 	subw3	%r6,$131,%r6
 1.7      matt 	insv	%r6,$7,$9,%r1
 1.1     ragge /*
 1.1     ragge  *  Set the exponent field of the F-format
 1.7      matt  *  number in  %r3  to the appropriate value.
 1.1     ragge  */
 1.7      matt 	tstf	%r3
 1.1     ragge 	beql	return
 1.7      matt /*	extzv	$7,$8,%r3,%r4	-S.McD */
 1.7      matt 	extzv	$7,$7,%r3,%r4
 1.7      matt 	addw2	%r4,%r6
 1.7      matt /*	subw2	$217,%r6		-S.McD */
 1.7      matt 	subw2	$64,%r6
 1.7      matt 	insv	%r6,$7,$8,%r3
 1.5      matt 	jbr	return
 1.1     ragge /*						p.21
 1.1     ragge  *
 1.4    simonb  *  The following code generates the appropriate
 1.1     ragge  *  result for the unlikely possibility that
 1.7      matt  *  rounding the number in  %r9/%r8  resulted in
 1.1     ragge  *  a carry out.
 1.1     ragge  */
 1.1     ragge carryout:
 1.7      matt 	clrl	%r1
 1.7      matt 	clrl	%r2
 1.7      matt 	subw3	%r6,$132,%r6
 1.7      matt 	insv	%r6,$7,$9,%r1
 1.7      matt 	tstf	%r3
 1.1     ragge 	beql	return
 1.7      matt 	extzv	$7,$8,%r3,%r4
 1.7      matt 	addw2	%r4,%r6
 1.7      matt 	subw2	$218,%r6
 1.7      matt 	insv	%r6,$7,$8,%r3
 1.1     ragge /*						p.22
 1.1     ragge  *
 1.1     ragge  *  The following code makes an needed
 1.4    simonb  *  adjustments to the signs of the
 1.1     ragge  *  results or to the octant number, and
 1.1     ragge  *  then returns.
 1.1     ragge  */
 1.1     ragge return:
 1.1     ragge /*
 1.4    simonb  *  Test if the fraction was greater than or
 1.1     ragge  *  equal to  1/2 .  If so, negate the reduced
 1.1     ragge  *  argument.
 1.1     ragge  */
 1.7      matt 	blbc	%r5,signoff8
 1.7      matt 	mnegf	%r1,%r1
 1.7      matt 	mnegf	%r3,%r3
 1.1     ragge signoff8:
 1.1     ragge /*						p.23
 1.1     ragge  *
 1.1     ragge  *  If the original argument was negative,
 1.1     ragge  *  negate the reduce argument and
 1.1     ragge  *  adjust the octant number.
 1.1     ragge  */
 1.7      matt 	tstw	(%sp)+
 1.1     ragge 	bgeq	signoff9
 1.7      matt 	mnegf	%r1,%r1
 1.7      matt 	mnegf	%r3,%r3
 1.7      matt /*	subb3	%r0,$8,%r0	...used for  pi/4  reduction -S.McD */
 1.7      matt 	subb3	%r0,$4,%r0
 1.1     ragge signoff9:
 1.1     ragge /*
 1.1     ragge  *  Clear all unneeded octant bits.
 1.1     ragge  *
 1.7      matt  *	bicb2	$0xf8,%r0	...used for  pi/4  reduction -S.McD */
 1.7      matt 	bicb2	$0xfc,%r0
 1.1     ragge /*
 1.1     ragge  *  Return.
 1.1     ragge  */
 1.1     ragge 	rsb