arch/i387/s_log1p.S

 1.1       jtc /*
1.10      salo  * Written by J.T. Conklin <jtc (at) NetBSD.org>.
 1.6       jtc  * Public domain.
 1.1       jtc  */
 1.1       jtc
1.11  wennmach /*
1.11  wennmach  * Modified by Lex Wennmacher <wennmach (at) NetBSD.org>
1.11  wennmach  * Still public domain.
1.11  wennmach  */
1.11  wennmach
 1.1       jtc #include <machine/asm.h>
 1.1       jtc
 1.9      fvdl #include "abi.h"
 1.9      fvdl
1.14  riastrad RCSID("$NetBSD: s_log1p.S,v 1.14 2024/07/16 14:52:49 riastradh Exp $")
 1.2       jtc
 1.4       jtc /*
1.11  wennmach  * The log1p() function is provided to compute an accurate value of
1.11  wennmach  * log(1 + x), even for tiny values of x. The i387 FPU provides the
1.11  wennmach  * fyl2xp1 instruction for this purpose. However, the range of this
1.11  wennmach  * instruction is limited to:
1.11  wennmach  * 		-(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
1.11  wennmach  *                         -0.292893 <= x <= 0.414214
1.11  wennmach  * at least on older processor versions.
1.11  wennmach  *
1.11  wennmach  * log1p() is implemented by testing the range of the argument.
1.11  wennmach  * If it is appropriate for fyl2xp1, this instruction is used.
1.11  wennmach  * Else, we compute log1p(x) = ln(2)*ld(1 + x) the traditional way
1.11  wennmach  * (using fyl2x).
1.11  wennmach  *
1.11  wennmach  * The range testing costs speed, but as the rationale for the very
1.11  wennmach  * existence of this function is accuracy, we accept that.
1.11  wennmach  *
1.11  wennmach  * In order to reduce the cost for testing the range, we check if
1.11  wennmach  * the argument is in the range
1.11  wennmach  *                             -0.25 <= x <= 0.25
1.11  wennmach  * which can be done with just one conditional branch. If x is
1.11  wennmach  * inside this range, we use fyl2xp1. Outside of this range,
1.11  wennmach  * the use of fyl2x is accurate enough.
 1.4       jtc  */
 1.5       jtc
1.14  riastrad WEAK_ALIAS(log1p, _log1p)
1.14  riastrad
1.11  wennmach .text
1.13  wennmach 	.align	4
1.14  riastrad ENTRY(_log1p)
 1.9      fvdl 	XMM_ONE_ARG_DOUBLE_PROLOGUE
1.11  wennmach 	fldl	ARG_DOUBLE_ONE
1.11  wennmach 	fabs
1.13  wennmach 	fld1				/* ... x 1 */
1.13  wennmach 	fadd	%st(0)			/* ... x 2 */
1.13  wennmach 	fadd	%st(0)			/* ... x 4 */
1.13  wennmach 	fld1				/* ... 4 1 */
1.13  wennmach 	fdivp				/* ... x 0.25 */
1.11  wennmach 	fcompp
1.13  wennmach 	fnstsw	%ax
1.13  wennmach 	andb	$69,%ah
1.13  wennmach 	jne	use_fyl2x
1.13  wennmach 	jmp	use_fyl2xp1
1.13  wennmach
1.13  wennmach 	.align	4
1.13  wennmach use_fyl2x:
1.11  wennmach 	fldln2
1.11  wennmach         fldl	ARG_DOUBLE_ONE
1.11  wennmach         fld1
1.11  wennmach         faddp
1.11  wennmach         fyl2x
1.11  wennmach 	XMM_DOUBLE_EPILOGUE
1.11  wennmach         ret
1.13  wennmach
1.13  wennmach 	.align	4
1.13  wennmach use_fyl2xp1:
 1.1       jtc 	fldln2
 1.9      fvdl 	fldl	ARG_DOUBLE_ONE
1.11  wennmach 	fyl2xp1
 1.9      fvdl 	XMM_DOUBLE_EPILOGUE
 1.1       jtc 	ret
1.14  riastrad END(_log1p)