1 1.10 riastrad /* $NetBSD: n_argred.S,v 1.10 2024/05/07 15:15:09 riastradh Exp $ */ 2 1.1 ragge /* 3 1.1 ragge * Copyright (c) 1985, 1993 4 1.1 ragge * The Regents of the University of California. All rights reserved. 5 1.1 ragge * 6 1.1 ragge * Redistribution and use in source and binary forms, with or without 7 1.1 ragge * modification, are permitted provided that the following conditions 8 1.1 ragge * are met: 9 1.1 ragge * 1. Redistributions of source code must retain the above copyright 10 1.1 ragge * notice, this list of conditions and the following disclaimer. 11 1.1 ragge * 2. Redistributions in binary form must reproduce the above copyright 12 1.1 ragge * notice, this list of conditions and the following disclaimer in the 13 1.1 ragge * documentation and/or other materials provided with the distribution. 14 1.8 agc * 3. Neither the name of the University nor the names of its contributors 15 1.1 ragge * may be used to endorse or promote products derived from this software 16 1.1 ragge * without specific prior written permission. 17 1.1 ragge * 18 1.1 ragge * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19 1.1 ragge * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 1.1 ragge * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 1.1 ragge * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22 1.1 ragge * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 1.1 ragge * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 1.1 ragge * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 1.1 ragge * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 1.1 ragge * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 1.1 ragge * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 1.1 ragge * SUCH DAMAGE. 29 1.1 ragge * 30 1.1 ragge * @(#)argred.s 8.1 (Berkeley) 6/4/93 31 1.1 ragge */ 32 1.1 ragge 33 1.5 matt #include <machine/asm.h> 34 1.5 matt 35 1.1 ragge /* 36 1.1 ragge * libm$argred implements Bob Corbett's argument reduction and 37 1.1 ragge * libm$sincos implements Peter Tang's double precision sin/cos. 38 1.4 simonb * 39 1.1 ragge * Note: The two entry points libm$argred and libm$sincos are meant 40 1.1 ragge * to be used only by _sin, _cos and _tan. 41 1.1 ragge * 42 1.1 ragge * method: true range reduction to [-pi/4,pi/4], P. Tang & B. Corbett 43 1.1 ragge * S. McDonald, April 4, 1985 44 1.1 ragge */ 45 1.1 ragge 46 1.9 matt .hidden __libm_argred 47 1.5 matt ENTRY(__libm_argred, 0) 48 1.1 ragge /* 49 1.1 ragge * Compare the argument with the largest possible that can 50 1.7 matt * be reduced by table lookup. %r3 := |x| will be used in table_lookup . 51 1.1 ragge */ 52 1.7 matt movd %r0,%r3 53 1.1 ragge bgeq abs1 54 1.7 matt mnegd %r3,%r3 55 1.1 ragge abs1: 56 1.7 matt cmpd %r3,$0d+4.55530934770520019583e+01 57 1.1 ragge blss small_arg 58 1.1 ragge jsb trigred 59 1.1 ragge rsb 60 1.1 ragge small_arg: 61 1.1 ragge jsb table_lookup 62 1.1 ragge rsb 63 1.1 ragge /* 64 1.1 ragge * At this point, 65 1.7 matt * %r0 contains the quadrant number, 0, 1, 2, or 3; 66 1.7 matt * %r2/%r1 contains the reduced argument as a D-format number; 67 1.7 matt * %r3 contains a F-format extension to the reduced argument; 68 1.7 matt * %r4 contains a 0 or 1 corresponding to a sin or cos entry. 69 1.1 ragge */ 70 1.10 riastrad END(__libm_argred) 71 1.5 matt 72 1.9 matt .hidden __libm_sincos 73 1.5 matt ENTRY(__libm_sincos, 0) 74 1.1 ragge /* 75 1.1 ragge * Compensate for a cosine entry by adding one to the quadrant number. 76 1.1 ragge */ 77 1.7 matt addl2 %r4,%r0 78 1.1 ragge /* 79 1.7 matt * Polyd clobbers %r5-%r0 ; save X in %r7/%r6 . 80 1.1 ragge * This can be avoided by rewriting trigred . 81 1.1 ragge */ 82 1.7 matt movd %r1,%r6 83 1.1 ragge /* 84 1.7 matt * Likewise, save alpha in %r8 . 85 1.1 ragge * This can be avoided by rewriting trigred . 86 1.1 ragge */ 87 1.7 matt movf %r3,%r8 88 1.1 ragge /* 89 1.1 ragge * Odd or even quadrant? cosine if odd, sine otherwise. 90 1.7 matt * Save floor(quadrant/2) in %r9 ; it determines the final sign. 91 1.1 ragge */ 92 1.7 matt rotl $-1,%r0,%r9 93 1.1 ragge blss cosine 94 1.1 ragge sine: 95 1.7 matt muld2 %r1,%r1 # Xsq = X * X 96 1.7 matt cmpw $0x2480,%r1 # [zl] Xsq > 2^-56? 97 1.1 ragge blss 1f # [zl] yes, go ahead and do polyd 98 1.7 matt clrq %r1 # [zl] work around 11/780 FPA polyd bug 99 1.1 ragge 1: 100 1.7 matt polyd %r1,$7,sin_coef # Q = P(Xsq) , of deg 7 101 1.7 matt mulf3 $0f3.0,%r8,%r4 # beta = 3 * alpha 102 1.7 matt mulf2 %r0,%r4 # beta = Q * beta 103 1.7 matt addf2 %r8,%r4 # beta = alpha + beta 104 1.7 matt muld2 %r6,%r0 # S(X) = X * Q 105 1.7 matt /* cvtfd %r4,%r4 ... %r5 = 0 after a polyd. */ 106 1.7 matt addd2 %r4,%r0 # S(X) = beta + S(X) 107 1.7 matt addd2 %r6,%r0 # S(X) = X + S(X) 108 1.5 matt jbr done 109 1.1 ragge cosine: 110 1.7 matt muld2 %r6,%r6 # Xsq = X * X 111 1.1 ragge beql zero_arg 112 1.7 matt mulf2 %r1,%r8 # beta = X * alpha 113 1.7 matt polyd %r6,$7,cos_coef /* Q = P'(Xsq) , of deg 7 */ 114 1.7 matt subd3 %r0,%r8,%r0 # beta = beta - Q 115 1.7 matt subw2 $0x80,%r6 # Xsq = Xsq / 2 116 1.7 matt addd2 %r0,%r6 # Xsq = Xsq + beta 117 1.1 ragge zero_arg: 118 1.7 matt subd3 %r6,$0d1.0,%r0 # C(X) = 1 - Xsq 119 1.1 ragge done: 120 1.7 matt blbc %r9,even 121 1.7 matt mnegd %r0,%r0 122 1.1 ragge even: 123 1.1 ragge rsb 124 1.10 riastrad END(__libm_sincos) 125 1.1 ragge 126 1.6 matt #ifdef __ELF__ 127 1.6 matt .section .rodata 128 1.6 matt #else 129 1.6 matt .text 130 1.6 matt #endif 131 1.5 matt _ALIGN_TEXT 132 1.1 ragge 133 1.1 ragge sin_coef: 134 1.1 ragge .double 0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8.. 135 1.1 ragge .double 0d+1.60573519267703489121e-10 # s6 = 2^-21 1.611adaede473c8.. 136 1.1 ragge .double 0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382.. 137 1.1 ragge .double 0d+2.75573191800593885716e-06 # s4 = 2^-13 1.71de3a4b884278.. 138 1.1 ragge .double 0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d.. 139 1.1 ragge .double 0d+8.33333333333325688985e-03 # s2 = 2^-07 1.11111111110e50 140 1.1 ragge .double 0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554 141 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0 142 1.1 ragge 143 1.1 ragge cos_coef: 144 1.1 ragge .double 0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE.. 145 1.1 ragge .double 0d+2.08746646574796004700e-09 # s6 = 2^-1D 1.1EE632650350BA.. 146 1.1 ragge .double 0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E.. 147 1.1 ragge .double 0d+2.48015872682668025200e-05 # s4 = 2^-10 1.A01A0196B902E8.. 148 1.1 ragge .double 0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE.. 149 1.1 ragge .double 0d+4.16666666666664761400e-02 # s2 = 2^-05 1.5555555555539E 150 1.1 ragge .double 0d+0.00000000000000000000e+00 # s1 = 0 151 1.1 ragge .double 0d+0.00000000000000000000e+00 # s0 = 0 152 1.1 ragge 153 1.1 ragge /* 154 1.1 ragge * Multiples of pi/2 expressed as the sum of three doubles, 155 1.1 ragge * 156 1.1 ragge * trailing: n * pi/2 , n = 0, 1, 2, ..., 29 157 1.1 ragge * trailing[n] , 158 1.1 ragge * 159 1.1 ragge * middle: n * pi/2 , n = 0, 1, 2, ..., 29 160 1.1 ragge * middle[n] , 161 1.1 ragge * 162 1.1 ragge * leading: n * pi/2 , n = 0, 1, 2, ..., 29 163 1.1 ragge * leading[n] , 164 1.1 ragge * 165 1.1 ragge * where 166 1.1 ragge * leading[n] := (n * pi/2) rounded, 167 1.1 ragge * middle[n] := (n * pi/2 - leading[n]) rounded, 168 1.1 ragge * trailing[n] := (( n * pi/2 - leading[n]) - middle[n]) rounded . 169 1.1 ragge */ 170 1.1 ragge trailing: 171 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 trailing 172 1.1 ragge .double 0d+4.33590506506189049611e-35 # 1 * pi/2 trailing 173 1.1 ragge .double 0d+8.67181013012378099223e-35 # 2 * pi/2 trailing 174 1.1 ragge .double 0d+1.30077151951856714215e-34 # 3 * pi/2 trailing 175 1.1 ragge .double 0d+1.73436202602475619845e-34 # 4 * pi/2 trailing 176 1.1 ragge .double 0d-1.68390735624352669192e-34 # 5 * pi/2 trailing 177 1.1 ragge .double 0d+2.60154303903713428430e-34 # 6 * pi/2 trailing 178 1.1 ragge .double 0d-8.16726343231148352150e-35 # 7 * pi/2 trailing 179 1.1 ragge .double 0d+3.46872405204951239689e-34 # 8 * pi/2 trailing 180 1.1 ragge .double 0d+3.90231455855570147991e-34 # 9 * pi/2 trailing 181 1.1 ragge .double 0d-3.36781471248705338384e-34 # 10 * pi/2 trailing 182 1.1 ragge .double 0d-1.06379439835298071785e-33 # 11 * pi/2 trailing 183 1.1 ragge .double 0d+5.20308607807426856861e-34 # 12 * pi/2 trailing 184 1.1 ragge .double 0d+5.63667658458045770509e-34 # 13 * pi/2 trailing 185 1.1 ragge .double 0d-1.63345268646229670430e-34 # 14 * pi/2 trailing 186 1.1 ragge .double 0d-1.19986217995610764801e-34 # 15 * pi/2 trailing 187 1.1 ragge .double 0d+6.93744810409902479378e-34 # 16 * pi/2 trailing 188 1.1 ragge .double 0d-8.03640094449267300110e-34 # 17 * pi/2 trailing 189 1.1 ragge .double 0d+7.80462911711140295982e-34 # 18 * pi/2 trailing 190 1.1 ragge .double 0d-7.16921993148029483506e-34 # 19 * pi/2 trailing 191 1.1 ragge .double 0d-6.73562942497410676769e-34 # 20 * pi/2 trailing 192 1.1 ragge .double 0d-6.30203891846791677593e-34 # 21 * pi/2 trailing 193 1.1 ragge .double 0d-2.12758879670596143570e-33 # 22 * pi/2 trailing 194 1.1 ragge .double 0d+2.53800212047402350390e-33 # 23 * pi/2 trailing 195 1.1 ragge .double 0d+1.04061721561485371372e-33 # 24 * pi/2 trailing 196 1.1 ragge .double 0d+6.11729905311472319056e-32 # 25 * pi/2 trailing 197 1.1 ragge .double 0d+1.12733531691609154102e-33 # 26 * pi/2 trailing 198 1.1 ragge .double 0d-3.70049587943078297272e-34 # 27 * pi/2 trailing 199 1.1 ragge .double 0d-3.26690537292459340860e-34 # 28 * pi/2 trailing 200 1.1 ragge .double 0d-1.14812616507957271361e-34 # 29 * pi/2 trailing 201 1.1 ragge 202 1.1 ragge middle: 203 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 middle 204 1.1 ragge .double 0d+5.72118872610983179676e-18 # 1 * pi/2 middle 205 1.1 ragge .double 0d+1.14423774522196635935e-17 # 2 * pi/2 middle 206 1.1 ragge .double 0d-3.83475850529283316309e-17 # 3 * pi/2 middle 207 1.1 ragge .double 0d+2.28847549044393271871e-17 # 4 * pi/2 middle 208 1.1 ragge .double 0d-2.69052076007086676522e-17 # 5 * pi/2 middle 209 1.1 ragge .double 0d-7.66951701058566632618e-17 # 6 * pi/2 middle 210 1.1 ragge .double 0d-1.54628301484890040587e-17 # 7 * pi/2 middle 211 1.1 ragge .double 0d+4.57695098088786543741e-17 # 8 * pi/2 middle 212 1.1 ragge .double 0d+1.07001849766246313192e-16 # 9 * pi/2 middle 213 1.1 ragge .double 0d-5.38104152014173353044e-17 # 10 * pi/2 middle 214 1.1 ragge .double 0d-2.14622680169080983801e-16 # 11 * pi/2 middle 215 1.1 ragge .double 0d-1.53390340211713326524e-16 # 12 * pi/2 middle 216 1.1 ragge .double 0d-9.21580002543456677056e-17 # 13 * pi/2 middle 217 1.1 ragge .double 0d-3.09256602969780081173e-17 # 14 * pi/2 middle 218 1.1 ragge .double 0d+3.03066796603896507006e-17 # 15 * pi/2 middle 219 1.1 ragge .double 0d+9.15390196177573087482e-17 # 16 * pi/2 middle 220 1.1 ragge .double 0d+1.52771359575124969107e-16 # 17 * pi/2 middle 221 1.1 ragge .double 0d+2.14003699532492626384e-16 # 18 * pi/2 middle 222 1.1 ragge .double 0d-1.68853170360202329427e-16 # 19 * pi/2 middle 223 1.1 ragge .double 0d-1.07620830402834670609e-16 # 20 * pi/2 middle 224 1.1 ragge .double 0d+3.97700719404595604379e-16 # 21 * pi/2 middle 225 1.1 ragge .double 0d-4.29245360338161967602e-16 # 22 * pi/2 middle 226 1.1 ragge .double 0d-3.68013020380794313406e-16 # 23 * pi/2 middle 227 1.1 ragge .double 0d-3.06780680423426653047e-16 # 24 * pi/2 middle 228 1.1 ragge .double 0d-2.45548340466059054318e-16 # 25 * pi/2 middle 229 1.1 ragge .double 0d-1.84316000508691335411e-16 # 26 * pi/2 middle 230 1.1 ragge .double 0d-1.23083660551323675053e-16 # 27 * pi/2 middle 231 1.1 ragge .double 0d-6.18513205939560162346e-17 # 28 * pi/2 middle 232 1.1 ragge .double 0d-6.18980636588357585202e-19 # 29 * pi/2 middle 233 1.1 ragge 234 1.1 ragge leading: 235 1.1 ragge .double 0d+0.00000000000000000000e+00 # 0 * pi/2 leading 236 1.1 ragge .double 0d+1.57079632679489661351e+00 # 1 * pi/2 leading 237 1.1 ragge .double 0d+3.14159265358979322702e+00 # 2 * pi/2 leading 238 1.1 ragge .double 0d+4.71238898038468989604e+00 # 3 * pi/2 leading 239 1.1 ragge .double 0d+6.28318530717958645404e+00 # 4 * pi/2 leading 240 1.1 ragge .double 0d+7.85398163397448312306e+00 # 5 * pi/2 leading 241 1.1 ragge .double 0d+9.42477796076937979208e+00 # 6 * pi/2 leading 242 1.1 ragge .double 0d+1.09955742875642763501e+01 # 7 * pi/2 leading 243 1.1 ragge .double 0d+1.25663706143591729081e+01 # 8 * pi/2 leading 244 1.1 ragge .double 0d+1.41371669411540694661e+01 # 9 * pi/2 leading 245 1.1 ragge .double 0d+1.57079632679489662461e+01 # 10 * pi/2 leading 246 1.1 ragge .double 0d+1.72787595947438630262e+01 # 11 * pi/2 leading 247 1.1 ragge .double 0d+1.88495559215387595842e+01 # 12 * pi/2 leading 248 1.1 ragge .double 0d+2.04203522483336561422e+01 # 13 * pi/2 leading 249 1.1 ragge .double 0d+2.19911485751285527002e+01 # 14 * pi/2 leading 250 1.1 ragge .double 0d+2.35619449019234492582e+01 # 15 * pi/2 leading 251 1.1 ragge .double 0d+2.51327412287183458162e+01 # 16 * pi/2 leading 252 1.1 ragge .double 0d+2.67035375555132423742e+01 # 17 * pi/2 leading 253 1.1 ragge .double 0d+2.82743338823081389322e+01 # 18 * pi/2 leading 254 1.1 ragge .double 0d+2.98451302091030359342e+01 # 19 * pi/2 leading 255 1.1 ragge .double 0d+3.14159265358979324922e+01 # 20 * pi/2 leading 256 1.1 ragge .double 0d+3.29867228626928286062e+01 # 21 * pi/2 leading 257 1.1 ragge .double 0d+3.45575191894877260523e+01 # 22 * pi/2 leading 258 1.1 ragge .double 0d+3.61283155162826226103e+01 # 23 * pi/2 leading 259 1.1 ragge .double 0d+3.76991118430775191683e+01 # 24 * pi/2 leading 260 1.1 ragge .double 0d+3.92699081698724157263e+01 # 25 * pi/2 leading 261 1.1 ragge .double 0d+4.08407044966673122843e+01 # 26 * pi/2 leading 262 1.1 ragge .double 0d+4.24115008234622088423e+01 # 27 * pi/2 leading 263 1.1 ragge .double 0d+4.39822971502571054003e+01 # 28 * pi/2 leading 264 1.1 ragge .double 0d+4.55530934770520019583e+01 # 29 * pi/2 leading 265 1.1 ragge 266 1.1 ragge twoOverPi: 267 1.1 ragge .double 0d+6.36619772367581343076e-01 268 1.5 matt 269 1.1 ragge .text 270 1.5 matt _ALIGN_TEXT 271 1.1 ragge 272 1.1 ragge table_lookup: 273 1.7 matt muld3 %r3,twoOverPi,%r0 274 1.7 matt cvtrdl %r0,%r0 # n = nearest int to ((2/pi)*|x|) rnded 275 1.7 matt subd2 leading[%r0],%r3 # p = (|x| - leading n*pi/2) exactly 276 1.7 matt subd3 middle[%r0],%r3,%r1 # q = (p - middle n*pi/2) rounded 277 1.7 matt subd2 %r1,%r3 # r = (p - q) 278 1.7 matt subd2 middle[%r0],%r3 # r = r - middle n*pi/2 279 1.7 matt subd2 trailing[%r0],%r3 # r = r - trailing n*pi/2 rounded 280 1.1 ragge /* 281 1.1 ragge * If the original argument was negative, 282 1.1 ragge * negate the reduce argument and 283 1.1 ragge * adjust the octant/quadrant number. 284 1.1 ragge */ 285 1.7 matt tstw 4(%ap) 286 1.1 ragge bgeq abs2 287 1.7 matt mnegf %r1,%r1 288 1.7 matt mnegf %r3,%r3 289 1.7 matt /* subb3 %r0,$8,%r0 ...used for pi/4 reduction -S.McD */ 290 1.7 matt subb3 %r0,$4,%r0 291 1.1 ragge abs2: 292 1.1 ragge /* 293 1.1 ragge * Clear all unneeded octant/quadrant bits. 294 1.1 ragge */ 295 1.7 matt /* bicb2 $0xf8,%r0 ...used for pi/4 reduction -S.McD */ 296 1.7 matt bicb2 $0xfc,%r0 297 1.1 ragge rsb 298 1.1 ragge /* 299 1.1 ragge * p.0 300 1.1 ragge */ 301 1.6 matt #ifdef __ELF__ 302 1.6 matt .section .rodata 303 1.6 matt #else 304 1.1 ragge .text 305 1.6 matt #endif 306 1.5 matt _ALIGN_TEXT 307 1.1 ragge /* 308 1.1 ragge * Only 256 (actually 225) bits of 2/pi are needed for VAX double 309 1.1 ragge * precision; this was determined by enumerating all the nearest 310 1.1 ragge * machine integer multiples of pi/2 using continued fractions. 311 1.1 ragge * (8a8d3673775b7ff7 required the most bits.) -S.McD 312 1.1 ragge */ 313 1.1 ragge .long 0 314 1.1 ragge .long 0 315 1.1 ragge .long 0xaef1586d 316 1.1 ragge .long 0x9458eaf7 317 1.1 ragge .long 0x10e4107f 318 1.1 ragge .long 0xd8a5664f 319 1.1 ragge .long 0x4d377036 320 1.1 ragge .long 0x09d5f47d 321 1.1 ragge .long 0x91054a7f 322 1.1 ragge .long 0xbe60db93 323 1.1 ragge bits2opi: 324 1.1 ragge .long 0x00000028 325 1.1 ragge .long 0 326 1.1 ragge /* 327 1.1 ragge * Note: wherever you see the word `octant', read `quadrant'. 328 1.1 ragge * Currently this code is set up for pi/2 argument reduction. 329 1.1 ragge * By uncommenting/commenting the appropriate lines, it will 330 1.1 ragge * also serve as a pi/4 argument reduction code. 331 1.1 ragge */ 332 1.6 matt .text 333 1.1 ragge 334 1.1 ragge /* p.1 335 1.1 ragge * Trigred preforms argument reduction 336 1.1 ragge * for the trigonometric functions. It 337 1.1 ragge * takes one input argument, a D-format 338 1.7 matt * number in %r1/%r0 . The magnitude of 339 1.1 ragge * the input argument must be greater 340 1.1 ragge * than or equal to 1/2 . Trigred produces 341 1.1 ragge * three results: the number of the octant 342 1.4 simonb * occupied by the argument, the reduced 343 1.1 ragge * argument, and an extension of the 344 1.4 simonb * reduced argument. The octant number is 345 1.7 matt * returned in %r0 . The reduced argument 346 1.4 simonb * is returned as a D-format number in 347 1.7 matt * %r2/%r1 . An 8 bit extension of the 348 1.4 simonb * reduced argument is returned as an 349 1.7 matt * F-format number in %r3. 350 1.1 ragge * p.2 351 1.1 ragge */ 352 1.1 ragge trigred: 353 1.1 ragge /* 354 1.1 ragge * Save the sign of the input argument. 355 1.1 ragge */ 356 1.7 matt movw %r0,-(%sp) 357 1.1 ragge /* 358 1.1 ragge * Extract the exponent field. 359 1.1 ragge */ 360 1.7 matt extzv $7,$7,%r0,%r2 361 1.1 ragge /* 362 1.1 ragge * Convert the fraction part of the input 363 1.1 ragge * argument into a quadword integer. 364 1.1 ragge */ 365 1.7 matt bicw2 $0xff80,%r0 366 1.7 matt bisb2 $0x80,%r0 # -S.McD 367 1.7 matt rotl $16,%r0,%r0 368 1.7 matt rotl $16,%r1,%r1 369 1.1 ragge /* 370 1.7 matt * If %r1 is negative, add 1 to %r0 . This 371 1.1 ragge * adjustment is made so that the two's 372 1.1 ragge * complement multiplications done later 373 1.1 ragge * will produce unsigned results. 374 1.1 ragge */ 375 1.1 ragge bgeq posmid 376 1.7 matt incl %r0 377 1.1 ragge posmid: 378 1.1 ragge /* p.3 379 1.1 ragge * 380 1.7 matt * Set %r3 to the address of the first quadword 381 1.1 ragge * used to obtain the needed portion of 2/pi . 382 1.1 ragge * The address is longword aligned to ensure 383 1.1 ragge * efficient access. 384 1.1 ragge */ 385 1.7 matt ashl $-3,%r2,%r3 386 1.7 matt bicb2 $3,%r3 387 1.7 matt mnegl %r3,%r3 388 1.7 matt movab bits2opi[%r3],%r3 389 1.1 ragge /* 390 1.7 matt * Set %r2 to the size of the shift needed to 391 1.1 ragge * obtain the correct portion of 2/pi . 392 1.1 ragge */ 393 1.7 matt bicb2 $0xe0,%r2 394 1.1 ragge /* p.4 395 1.1 ragge * 396 1.1 ragge * Move the needed 128 bits of 2/pi into 397 1.7 matt * %r11 - %r8 . Adjust the numbers to allow 398 1.1 ragge * for unsigned multiplication. 399 1.1 ragge */ 400 1.7 matt ashq %r2,(%r3),%r10 401 1.1 ragge 402 1.7 matt subl2 $4,%r3 403 1.7 matt ashq %r2,(%r3),%r9 404 1.1 ragge bgeq signoff1 405 1.7 matt incl %r11 406 1.1 ragge signoff1: 407 1.7 matt subl2 $4,%r3 408 1.7 matt ashq %r2,(%r3),%r8 409 1.1 ragge bgeq signoff2 410 1.7 matt incl %r10 411 1.1 ragge signoff2: 412 1.7 matt subl2 $4,%r3 413 1.7 matt ashq %r2,(%r3),%r7 414 1.1 ragge bgeq signoff3 415 1.7 matt incl %r9 416 1.1 ragge signoff3: 417 1.1 ragge /* p.5 418 1.1 ragge * 419 1.7 matt * Multiply the contents of %r0/%r1 by the 420 1.7 matt * slice of 2/pi in %r11 - %r8 . 421 1.1 ragge */ 422 1.7 matt emul %r0,%r8,$0,%r4 423 1.7 matt emul %r0,%r9,%r5,%r5 424 1.7 matt emul %r0,%r10,%r6,%r6 425 1.7 matt 426 1.7 matt emul %r1,%r8,$0,%r7 427 1.7 matt emul %r1,%r9,%r8,%r8 428 1.7 matt emul %r1,%r10,%r9,%r9 429 1.7 matt emul %r1,%r11,%r10,%r10 430 1.7 matt 431 1.7 matt addl2 %r4,%r8 432 1.7 matt adwc %r5,%r9 433 1.7 matt adwc %r6,%r10 434 1.1 ragge /* p.6 435 1.1 ragge * 436 1.1 ragge * If there are more than five leading zeros 437 1.1 ragge * after the first two quotient bits or if there 438 1.1 ragge * are more than five leading ones after the first 439 1.1 ragge * two quotient bits, generate more fraction bits. 440 1.1 ragge * Otherwise, branch to code to produce the result. 441 1.1 ragge */ 442 1.7 matt bicl3 $0xc1ffffff,%r10,%r4 443 1.1 ragge beql more1 444 1.7 matt cmpl $0x3e000000,%r4 445 1.1 ragge bneq result 446 1.1 ragge more1: 447 1.1 ragge /* p.7 448 1.1 ragge * 449 1.1 ragge * generate another 32 result bits. 450 1.1 ragge */ 451 1.7 matt subl2 $4,%r3 452 1.7 matt ashq %r2,(%r3),%r5 453 1.1 ragge bgeq signoff4 454 1.1 ragge 455 1.7 matt emul %r1,%r6,$0,%r4 456 1.7 matt addl2 %r1,%r5 457 1.7 matt emul %r0,%r6,%r5,%r5 458 1.7 matt addl2 %r0,%r6 459 1.5 matt jbr addbits1 460 1.1 ragge 461 1.1 ragge signoff4: 462 1.7 matt emul %r1,%r6,$0,%r4 463 1.7 matt emul %r0,%r6,%r5,%r5 464 1.1 ragge 465 1.1 ragge addbits1: 466 1.7 matt addl2 %r5,%r7 467 1.7 matt adwc %r6,%r8 468 1.7 matt adwc $0,%r9 469 1.7 matt adwc $0,%r10 470 1.1 ragge /* p.8 471 1.1 ragge * 472 1.1 ragge * Check for massive cancellation. 473 1.1 ragge */ 474 1.7 matt bicl3 $0xc0000000,%r10,%r6 475 1.1 ragge /* bneq more2 -S.McD Test was backwards */ 476 1.1 ragge beql more2 477 1.7 matt cmpl $0x3fffffff,%r6 478 1.1 ragge bneq result 479 1.1 ragge more2: 480 1.1 ragge /* p.9 481 1.1 ragge * 482 1.1 ragge * If massive cancellation has occurred, 483 1.1 ragge * generate another 24 result bits. 484 1.4 simonb * Testing has shown there will always be 485 1.1 ragge * enough bits after this point. 486 1.1 ragge */ 487 1.7 matt subl2 $4,%r3 488 1.7 matt ashq %r2,(%r3),%r5 489 1.1 ragge bgeq signoff5 490 1.1 ragge 491 1.7 matt emul %r0,%r6,%r4,%r5 492 1.7 matt addl2 %r0,%r6 493 1.5 matt jbr addbits2 494 1.1 ragge 495 1.1 ragge signoff5: 496 1.7 matt emul %r0,%r6,%r4,%r5 497 1.1 ragge 498 1.1 ragge addbits2: 499 1.7 matt addl2 %r6,%r7 500 1.7 matt adwc $0,%r8 501 1.7 matt adwc $0,%r9 502 1.7 matt adwc $0,%r10 503 1.1 ragge /* p.10 504 1.1 ragge * 505 1.1 ragge * The following code produces the reduced 506 1.1 ragge * argument from the product bits contained 507 1.7 matt * in %r10 - %r7 . 508 1.1 ragge */ 509 1.1 ragge result: 510 1.1 ragge /* 511 1.7 matt * Extract the octant number from %r10 . 512 1.1 ragge */ 513 1.7 matt /* extzv $29,$3,%r10,%r0 ...used for pi/4 reduction -S.McD */ 514 1.7 matt extzv $30,$2,%r10,%r0 515 1.1 ragge /* 516 1.7 matt * Clear the octant bits in %r10 . 517 1.1 ragge */ 518 1.7 matt /* bicl2 $0xe0000000,%r10 ...used for pi/4 reduction -S.McD */ 519 1.7 matt bicl2 $0xc0000000,%r10 520 1.1 ragge /* 521 1.1 ragge * Zero the sign flag. 522 1.1 ragge */ 523 1.7 matt clrl %r5 524 1.1 ragge /* p.11 525 1.1 ragge * 526 1.1 ragge * Check to see if the fraction is greater than 527 1.4 simonb * or equal to one-half. If it is, add one 528 1.1 ragge * to the octant number, set the sign flag 529 1.1 ragge * on, and replace the fraction with 1 minus 530 1.1 ragge * the fraction. 531 1.1 ragge */ 532 1.7 matt /* bitl $0x10000000,%r10 ...used for pi/4 reduction -S.McD */ 533 1.7 matt bitl $0x20000000,%r10 534 1.1 ragge beql small 535 1.7 matt incl %r0 536 1.7 matt incl %r5 537 1.7 matt /* subl3 %r10,$0x1fffffff,%r10 ...used for pi/4 reduction -S.McD */ 538 1.7 matt subl3 %r10,$0x3fffffff,%r10 539 1.7 matt mcoml %r9,%r9 540 1.7 matt mcoml %r8,%r8 541 1.7 matt mcoml %r7,%r7 542 1.1 ragge small: 543 1.1 ragge /* p.12 544 1.1 ragge * 545 1.1 ragge * Test whether the first 29 bits of the ...used for pi/4 reduction -S.McD 546 1.4 simonb * Test whether the first 30 bits of the 547 1.1 ragge * fraction are zero. 548 1.1 ragge */ 549 1.7 matt tstl %r10 550 1.1 ragge beql tiny 551 1.1 ragge /* 552 1.7 matt * Find the position of the first one bit in %r10 . 553 1.1 ragge */ 554 1.7 matt cvtld %r10,%r1 555 1.7 matt extzv $7,$7,%r1,%r1 556 1.1 ragge /* 557 1.1 ragge * Compute the size of the shift needed. 558 1.1 ragge */ 559 1.7 matt subl3 %r1,$32,%r6 560 1.1 ragge /* 561 1.1 ragge * Shift up the high order 64 bits of the 562 1.1 ragge * product. 563 1.1 ragge */ 564 1.7 matt ashq %r6,%r9,%r10 565 1.7 matt ashq %r6,%r8,%r9 566 1.5 matt jbr mult 567 1.1 ragge /* p.13 568 1.1 ragge * 569 1.7 matt * Test to see if the sign bit of %r9 is on. 570 1.1 ragge */ 571 1.1 ragge tiny: 572 1.7 matt tstl %r9 573 1.1 ragge bgeq tinier 574 1.1 ragge /* 575 1.1 ragge * If it is, shift the product bits up 32 bits. 576 1.1 ragge */ 577 1.7 matt movl $32,%r6 578 1.7 matt movq %r8,%r10 579 1.7 matt tstl %r10 580 1.5 matt jbr mult 581 1.1 ragge /* p.14 582 1.1 ragge * 583 1.7 matt * Test whether %r9 is zero. It is probably 584 1.7 matt * impossible for both %r10 and %r9 to be 585 1.1 ragge * zero, but until proven to be so, the test 586 1.1 ragge * must be made. 587 1.1 ragge */ 588 1.1 ragge tinier: 589 1.1 ragge beql zero 590 1.1 ragge /* 591 1.7 matt * Find the position of the first one bit in %r9 . 592 1.1 ragge */ 593 1.7 matt cvtld %r9,%r1 594 1.7 matt extzv $7,$7,%r1,%r1 595 1.1 ragge /* 596 1.1 ragge * Compute the size of the shift needed. 597 1.1 ragge */ 598 1.7 matt subl3 %r1,$32,%r1 599 1.7 matt addl3 $32,%r1,%r6 600 1.1 ragge /* 601 1.1 ragge * Shift up the high order 64 bits of the 602 1.1 ragge * product. 603 1.1 ragge */ 604 1.7 matt ashq %r1,%r8,%r10 605 1.7 matt ashq %r1,%r7,%r9 606 1.5 matt jbr mult 607 1.1 ragge /* p.15 608 1.1 ragge * 609 1.1 ragge * The following code sets the reduced 610 1.1 ragge * argument to zero. 611 1.1 ragge */ 612 1.1 ragge zero: 613 1.7 matt clrl %r1 614 1.7 matt clrl %r2 615 1.7 matt clrl %r3 616 1.5 matt jbr return 617 1.1 ragge /* p.16 618 1.1 ragge * 619 1.7 matt * At this point, %r0 contains the octant number, 620 1.7 matt * %r6 indicates the number of bits the fraction 621 1.7 matt * has been shifted, %r5 indicates the sign of 622 1.7 matt * the fraction, %r11/%r10 contain the high order 623 1.1 ragge * 64 bits of the fraction, and the condition 624 1.7 matt * codes indicate where the sign bit of %r10 625 1.1 ragge * is on. The following code multiplies the 626 1.1 ragge * fraction by pi/2 . 627 1.1 ragge */ 628 1.1 ragge mult: 629 1.1 ragge /* 630 1.7 matt * Save %r11/%r10 in %r4/%r1 . -S.McD 631 1.1 ragge */ 632 1.7 matt movl %r11,%r4 633 1.7 matt movl %r10,%r1 634 1.1 ragge /* 635 1.7 matt * If the sign bit of %r10 is on, add 1 to %r11 . 636 1.1 ragge */ 637 1.1 ragge bgeq signoff6 638 1.7 matt incl %r11 639 1.1 ragge signoff6: 640 1.1 ragge /* p.17 641 1.1 ragge * 642 1.7 matt * Move pi/2 into %r3/%r2 . 643 1.1 ragge */ 644 1.7 matt movq $0xc90fdaa22168c235,%r2 645 1.1 ragge /* 646 1.1 ragge * Multiply the fraction by the portion of pi/2 647 1.7 matt * in %r2 . 648 1.1 ragge */ 649 1.7 matt emul %r2,%r10,$0,%r7 650 1.7 matt emul %r2,%r11,%r8,%r7 651 1.1 ragge /* 652 1.4 simonb * Multiply the fraction by the portion of pi/2 653 1.7 matt * in %r3 . 654 1.1 ragge */ 655 1.7 matt emul %r3,%r10,$0,%r9 656 1.7 matt emul %r3,%r11,%r10,%r10 657 1.1 ragge /* 658 1.1 ragge * Add the product bits together. 659 1.1 ragge */ 660 1.7 matt addl2 %r7,%r9 661 1.7 matt adwc %r8,%r10 662 1.7 matt adwc $0,%r11 663 1.1 ragge /* 664 1.7 matt * Compensate for not sign extending %r8 above.-S.McD 665 1.1 ragge */ 666 1.7 matt tstl %r8 667 1.1 ragge bgeq signoff6a 668 1.7 matt decl %r11 669 1.1 ragge signoff6a: 670 1.1 ragge /* 671 1.7 matt * Compensate for %r11/%r10 being unsigned. -S.McD 672 1.1 ragge */ 673 1.7 matt addl2 %r2,%r10 674 1.7 matt adwc %r3,%r11 675 1.1 ragge /* 676 1.7 matt * Compensate for %r3/%r2 being unsigned. -S.McD 677 1.1 ragge */ 678 1.7 matt addl2 %r1,%r10 679 1.7 matt adwc %r4,%r11 680 1.1 ragge /* p.18 681 1.1 ragge * 682 1.7 matt * If the sign bit of %r11 is zero, shift the 683 1.7 matt * product bits up one bit and increment %r6 . 684 1.1 ragge */ 685 1.1 ragge blss signon 686 1.7 matt incl %r6 687 1.7 matt ashq $1,%r10,%r10 688 1.7 matt tstl %r9 689 1.1 ragge bgeq signoff7 690 1.7 matt incl %r10 691 1.1 ragge signoff7: 692 1.1 ragge signon: 693 1.1 ragge /* p.19 694 1.1 ragge * 695 1.1 ragge * Shift the 56 most significant product 696 1.7 matt * bits into %r9/%r8 . The sign extension 697 1.1 ragge * will be handled later. 698 1.1 ragge */ 699 1.7 matt ashq $-8,%r10,%r8 700 1.1 ragge /* 701 1.7 matt * Convert the low order 8 bits of %r10 702 1.1 ragge * into an F-format number. 703 1.1 ragge */ 704 1.7 matt cvtbf %r10,%r3 705 1.1 ragge /* 706 1.1 ragge * If the result of the conversion was 707 1.7 matt * negative, add 1 to %r9/%r8 . 708 1.1 ragge */ 709 1.1 ragge bgeq chop 710 1.7 matt incl %r8 711 1.7 matt adwc $0,%r9 712 1.1 ragge /* 713 1.7 matt * If %r9 is now zero, branch to special 714 1.1 ragge * code to handle that possibility. 715 1.1 ragge */ 716 1.1 ragge beql carryout 717 1.1 ragge chop: 718 1.1 ragge /* p.20 719 1.1 ragge * 720 1.7 matt * Convert the number in %r9/%r8 into 721 1.7 matt * D-format number in %r2/%r1 . 722 1.1 ragge */ 723 1.7 matt rotl $16,%r8,%r2 724 1.7 matt rotl $16,%r9,%r1 725 1.1 ragge /* 726 1.1 ragge * Set the exponent field to the appropriate 727 1.1 ragge * value. Note that the extra bits created by 728 1.1 ragge * sign extension are now eliminated. 729 1.1 ragge */ 730 1.7 matt subw3 %r6,$131,%r6 731 1.7 matt insv %r6,$7,$9,%r1 732 1.1 ragge /* 733 1.1 ragge * Set the exponent field of the F-format 734 1.7 matt * number in %r3 to the appropriate value. 735 1.1 ragge */ 736 1.7 matt tstf %r3 737 1.1 ragge beql return 738 1.7 matt /* extzv $7,$8,%r3,%r4 -S.McD */ 739 1.7 matt extzv $7,$7,%r3,%r4 740 1.7 matt addw2 %r4,%r6 741 1.7 matt /* subw2 $217,%r6 -S.McD */ 742 1.7 matt subw2 $64,%r6 743 1.7 matt insv %r6,$7,$8,%r3 744 1.5 matt jbr return 745 1.1 ragge /* p.21 746 1.1 ragge * 747 1.4 simonb * The following code generates the appropriate 748 1.1 ragge * result for the unlikely possibility that 749 1.7 matt * rounding the number in %r9/%r8 resulted in 750 1.1 ragge * a carry out. 751 1.1 ragge */ 752 1.1 ragge carryout: 753 1.7 matt clrl %r1 754 1.7 matt clrl %r2 755 1.7 matt subw3 %r6,$132,%r6 756 1.7 matt insv %r6,$7,$9,%r1 757 1.7 matt tstf %r3 758 1.1 ragge beql return 759 1.7 matt extzv $7,$8,%r3,%r4 760 1.7 matt addw2 %r4,%r6 761 1.7 matt subw2 $218,%r6 762 1.7 matt insv %r6,$7,$8,%r3 763 1.1 ragge /* p.22 764 1.1 ragge * 765 1.1 ragge * The following code makes an needed 766 1.4 simonb * adjustments to the signs of the 767 1.1 ragge * results or to the octant number, and 768 1.1 ragge * then returns. 769 1.1 ragge */ 770 1.1 ragge return: 771 1.1 ragge /* 772 1.4 simonb * Test if the fraction was greater than or 773 1.1 ragge * equal to 1/2 . If so, negate the reduced 774 1.1 ragge * argument. 775 1.1 ragge */ 776 1.7 matt blbc %r5,signoff8 777 1.7 matt mnegf %r1,%r1 778 1.7 matt mnegf %r3,%r3 779 1.1 ragge signoff8: 780 1.1 ragge /* p.23 781 1.1 ragge * 782 1.1 ragge * If the original argument was negative, 783 1.1 ragge * negate the reduce argument and 784 1.1 ragge * adjust the octant number. 785 1.1 ragge */ 786 1.7 matt tstw (%sp)+ 787 1.1 ragge bgeq signoff9 788 1.7 matt mnegf %r1,%r1 789 1.7 matt mnegf %r3,%r3 790 1.7 matt /* subb3 %r0,$8,%r0 ...used for pi/4 reduction -S.McD */ 791 1.7 matt subb3 %r0,$4,%r0 792 1.1 ragge signoff9: 793 1.1 ragge /* 794 1.1 ragge * Clear all unneeded octant bits. 795 1.1 ragge * 796 1.7 matt * bicb2 $0xf8,%r0 ...used for pi/4 reduction -S.McD */ 797 1.7 matt bicb2 $0xfc,%r0 798 1.1 ragge /* 799 1.1 ragge * Return. 800 1.1 ragge */ 801 1.1 ragge rsb 802