s_log1pf.S revision 1.10 1 1.1 jtc /*
2 1.7 salo * Written by J.T. Conklin <jtc (at) NetBSD.org>.
3 1.3 jtc * Public domain.
4 1.1 jtc */
5 1.1 jtc
6 1.8 wennmach /*
7 1.8 wennmach * Modified by Lex Wennmacher <wennmach (at) NetBSD.org>
8 1.8 wennmach * Still public domain.
9 1.8 wennmach */
10 1.8 wennmach
11 1.1 jtc #include <machine/asm.h>
12 1.1 jtc
13 1.6 fvdl #include "abi.h"
14 1.6 fvdl
15 1.10 wennmach RCSID("$NetBSD: s_log1pf.S,v 1.10 2003/09/16 18:17:11 wennmach Exp $")
16 1.1 jtc
17 1.1 jtc /*
18 1.8 wennmach * The log1pf() function is provided to compute an accurate value of
19 1.8 wennmach * log(1 + x), even for tiny values of x. The i387 FPU provides the
20 1.8 wennmach * fyl2xp1 instruction for this purpose. However, the range of this
21 1.8 wennmach * instruction is limited to:
22 1.8 wennmach * -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
23 1.8 wennmach * -0.292893 <= x <= 0.414214
24 1.8 wennmach * at least on older processor versions.
25 1.8 wennmach *
26 1.8 wennmach * log1pf() is implemented by testing the range of the argument.
27 1.8 wennmach * If it is appropriate for fyl2xp1, this instruction is used.
28 1.8 wennmach * Else, we compute log1pf(x) = ln(2)*ld(1 + x) the traditional way
29 1.8 wennmach * (using fyl2x).
30 1.8 wennmach *
31 1.8 wennmach * The range testing costs speed, but as the rationale for the very
32 1.8 wennmach * existence of this function is accuracy, we accept that.
33 1.8 wennmach *
34 1.8 wennmach * In order to reduce the cost for testing the range, we check if
35 1.8 wennmach * the argument is in the range
36 1.8 wennmach * -0.25 <= x <= 0.25
37 1.8 wennmach * which can be done with just one conditional branch. If x is
38 1.8 wennmach * inside this range, we use fyl2xp1. Outside of this range,
39 1.8 wennmach * the use of fyl2x is accurate enough.
40 1.8 wennmach *
41 1.1 jtc */
42 1.2 jtc
43 1.8 wennmach .text
44 1.10 wennmach .align 4
45 1.1 jtc ENTRY(log1pf)
46 1.6 fvdl XMM_ONE_ARG_FLOAT_PROLOGUE
47 1.8 wennmach flds ARG_FLOAT_ONE
48 1.8 wennmach fabs
49 1.10 wennmach fld1 /* ... x 1 */
50 1.10 wennmach fadd %st(0) /* ... x 2 */
51 1.10 wennmach fadd %st(0) /* ... x 4 */
52 1.10 wennmach fld1 /* ... 4 1 */
53 1.10 wennmach fdivp /* ... x 0.25 */
54 1.8 wennmach fcompp
55 1.10 wennmach fnstsw %ax
56 1.10 wennmach andb $69,%ah
57 1.10 wennmach jne use_fyl2x
58 1.10 wennmach jmp use_fyl2xp1
59 1.10 wennmach
60 1.10 wennmach .align 4
61 1.10 wennmach use_fyl2x:
62 1.8 wennmach fldln2
63 1.8 wennmach flds ARG_FLOAT_ONE
64 1.8 wennmach fld1
65 1.8 wennmach faddp
66 1.8 wennmach fyl2x
67 1.8 wennmach XMM_FLOAT_EPILOGUE
68 1.8 wennmach ret
69 1.10 wennmach
70 1.10 wennmach .align 4
71 1.10 wennmach use_fyl2xp1:
72 1.1 jtc fldln2
73 1.6 fvdl flds ARG_FLOAT_ONE
74 1.8 wennmach fyl2xp1
75 1.6 fvdl XMM_FLOAT_EPILOGUE
76 1.1 jtc ret
77