s_log1p.S revision 1.13 1 1.1 jtc /*
2 1.10 salo * Written by J.T. Conklin <jtc (at) NetBSD.org>.
3 1.6 jtc * Public domain.
4 1.1 jtc */
5 1.1 jtc
6 1.11 wennmach /*
7 1.11 wennmach * Modified by Lex Wennmacher <wennmach (at) NetBSD.org>
8 1.11 wennmach * Still public domain.
9 1.11 wennmach */
10 1.11 wennmach
11 1.1 jtc #include <machine/asm.h>
12 1.1 jtc
13 1.9 fvdl #include "abi.h"
14 1.9 fvdl
15 1.13 wennmach RCSID("$NetBSD: s_log1p.S,v 1.13 2003/09/16 18:17:11 wennmach Exp $")
16 1.2 jtc
17 1.4 jtc /*
18 1.11 wennmach * The log1p() function is provided to compute an accurate value of
19 1.11 wennmach * log(1 + x), even for tiny values of x. The i387 FPU provides the
20 1.11 wennmach * fyl2xp1 instruction for this purpose. However, the range of this
21 1.11 wennmach * instruction is limited to:
22 1.11 wennmach * -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
23 1.11 wennmach * -0.292893 <= x <= 0.414214
24 1.11 wennmach * at least on older processor versions.
25 1.11 wennmach *
26 1.11 wennmach * log1p() is implemented by testing the range of the argument.
27 1.11 wennmach * If it is appropriate for fyl2xp1, this instruction is used.
28 1.11 wennmach * Else, we compute log1p(x) = ln(2)*ld(1 + x) the traditional way
29 1.11 wennmach * (using fyl2x).
30 1.11 wennmach *
31 1.11 wennmach * The range testing costs speed, but as the rationale for the very
32 1.11 wennmach * existence of this function is accuracy, we accept that.
33 1.11 wennmach *
34 1.11 wennmach * In order to reduce the cost for testing the range, we check if
35 1.11 wennmach * the argument is in the range
36 1.11 wennmach * -0.25 <= x <= 0.25
37 1.11 wennmach * which can be done with just one conditional branch. If x is
38 1.11 wennmach * inside this range, we use fyl2xp1. Outside of this range,
39 1.11 wennmach * the use of fyl2x is accurate enough.
40 1.11 wennmach *
41 1.4 jtc */
42 1.5 jtc
43 1.11 wennmach .text
44 1.13 wennmach .align 4
45 1.1 jtc ENTRY(log1p)
46 1.9 fvdl XMM_ONE_ARG_DOUBLE_PROLOGUE
47 1.11 wennmach fldl ARG_DOUBLE_ONE
48 1.11 wennmach fabs
49 1.13 wennmach fld1 /* ... x 1 */
50 1.13 wennmach fadd %st(0) /* ... x 2 */
51 1.13 wennmach fadd %st(0) /* ... x 4 */
52 1.13 wennmach fld1 /* ... 4 1 */
53 1.13 wennmach fdivp /* ... x 0.25 */
54 1.11 wennmach fcompp
55 1.13 wennmach fnstsw %ax
56 1.13 wennmach andb $69,%ah
57 1.13 wennmach jne use_fyl2x
58 1.13 wennmach jmp use_fyl2xp1
59 1.13 wennmach
60 1.13 wennmach .align 4
61 1.13 wennmach use_fyl2x:
62 1.11 wennmach fldln2
63 1.11 wennmach fldl ARG_DOUBLE_ONE
64 1.11 wennmach fld1
65 1.11 wennmach faddp
66 1.11 wennmach fyl2x
67 1.11 wennmach XMM_DOUBLE_EPILOGUE
68 1.11 wennmach ret
69 1.13 wennmach
70 1.13 wennmach .align 4
71 1.13 wennmach use_fyl2xp1:
72 1.1 jtc fldln2
73 1.9 fvdl fldl ARG_DOUBLE_ONE
74 1.11 wennmach fyl2xp1
75 1.9 fvdl XMM_DOUBLE_EPILOGUE
76 1.1 jtc ret
77