Home | History | Annotate | Line # | Download | only in soft-fp
      1      1.1  mrg /* Software floating-point emulation.
      2      1.1  mrg    Basic two-word fraction declaration and manipulation.
      3  1.1.1.7  mrg    Copyright (C) 1997-2022 Free Software Foundation, Inc.
      4      1.1  mrg    This file is part of the GNU C Library.
      5      1.1  mrg 
      6      1.1  mrg    The GNU C Library is free software; you can redistribute it and/or
      7      1.1  mrg    modify it under the terms of the GNU Lesser General Public
      8      1.1  mrg    License as published by the Free Software Foundation; either
      9      1.1  mrg    version 2.1 of the License, or (at your option) any later version.
     10      1.1  mrg 
     11      1.1  mrg    In addition to the permissions in the GNU Lesser General Public
     12      1.1  mrg    License, the Free Software Foundation gives you unlimited
     13      1.1  mrg    permission to link the compiled version of this file into
     14      1.1  mrg    combinations with other programs, and to distribute those
     15      1.1  mrg    combinations without any restriction coming from the use of this
     16      1.1  mrg    file.  (The Lesser General Public License restrictions do apply in
     17      1.1  mrg    other respects; for example, they cover modification of the file,
     18      1.1  mrg    and distribution when not linked into a combine executable.)
     19      1.1  mrg 
     20      1.1  mrg    The GNU C Library is distributed in the hope that it will be useful,
     21      1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     22      1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23      1.1  mrg    Lesser General Public License for more details.
     24      1.1  mrg 
     25      1.1  mrg    You should have received a copy of the GNU Lesser General Public
     26      1.1  mrg    License along with the GNU C Library; if not, see
     27  1.1.1.7  mrg    <https://www.gnu.org/licenses/>.  */
     28      1.1  mrg 
     29  1.1.1.3  mrg #ifndef SOFT_FP_OP_2_H
     30  1.1.1.3  mrg #define SOFT_FP_OP_2_H	1
     31  1.1.1.3  mrg 
     32  1.1.1.3  mrg #define _FP_FRAC_DECL_2(X)				\
     33  1.1.1.3  mrg   _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
     34  1.1.1.2  mrg #define _FP_FRAC_COPY_2(D, S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
     35  1.1.1.2  mrg #define _FP_FRAC_SET_2(X, I)	__FP_FRAC_SET_2 (X, I)
     36      1.1  mrg #define _FP_FRAC_HIGH_2(X)	(X##_f1)
     37      1.1  mrg #define _FP_FRAC_LOW_2(X)	(X##_f0)
     38  1.1.1.2  mrg #define _FP_FRAC_WORD_2(X, w)	(X##_f##w)
     39      1.1  mrg 
     40  1.1.1.2  mrg #define _FP_FRAC_SLL_2(X, N)						\
     41  1.1.1.2  mrg   (void) (((N) < _FP_W_TYPE_SIZE)					\
     42  1.1.1.2  mrg 	  ? ({								\
     43  1.1.1.2  mrg 	      if (__builtin_constant_p (N) && (N) == 1)			\
     44  1.1.1.2  mrg 		{							\
     45  1.1.1.2  mrg 		  X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
     46  1.1.1.2  mrg 		  X##_f0 += X##_f0;					\
     47  1.1.1.2  mrg 		}							\
     48  1.1.1.2  mrg 	      else							\
     49  1.1.1.2  mrg 		{							\
     50  1.1.1.2  mrg 		  X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
     51  1.1.1.2  mrg 		  X##_f0 <<= (N);					\
     52  1.1.1.2  mrg 		}							\
     53  1.1.1.2  mrg 	      0;							\
     54  1.1.1.2  mrg 	    })								\
     55  1.1.1.2  mrg 	  : ({								\
     56  1.1.1.2  mrg 	      X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);		\
     57  1.1.1.2  mrg 	      X##_f0 = 0;						\
     58  1.1.1.2  mrg 	    }))
     59  1.1.1.2  mrg 
     60  1.1.1.2  mrg 
     61  1.1.1.2  mrg #define _FP_FRAC_SRL_2(X, N)						\
     62  1.1.1.2  mrg   (void) (((N) < _FP_W_TYPE_SIZE)					\
     63  1.1.1.2  mrg 	  ? ({								\
     64  1.1.1.2  mrg 	      X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
     65  1.1.1.2  mrg 	      X##_f1 >>= (N);						\
     66  1.1.1.2  mrg 	    })								\
     67  1.1.1.2  mrg 	  : ({								\
     68  1.1.1.2  mrg 	      X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);		\
     69  1.1.1.2  mrg 	      X##_f1 = 0;						\
     70  1.1.1.2  mrg 	    }))
     71      1.1  mrg 
     72      1.1  mrg /* Right shift with sticky-lsb.  */
     73  1.1.1.2  mrg #define _FP_FRAC_SRST_2(X, S, N, sz)					\
     74  1.1.1.2  mrg   (void) (((N) < _FP_W_TYPE_SIZE)					\
     75  1.1.1.2  mrg 	  ? ({								\
     76  1.1.1.2  mrg 	      S = (__builtin_constant_p (N) && (N) == 1			\
     77  1.1.1.2  mrg 		   ? X##_f0 & 1						\
     78  1.1.1.2  mrg 		   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);		\
     79  1.1.1.2  mrg 	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
     80  1.1.1.2  mrg 	      X##_f1 >>= (N);						\
     81  1.1.1.2  mrg 	    })								\
     82  1.1.1.2  mrg 	  : ({								\
     83  1.1.1.2  mrg 	      S = ((((N) == _FP_W_TYPE_SIZE				\
     84  1.1.1.2  mrg 		     ? 0						\
     85  1.1.1.2  mrg 		     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))		\
     86  1.1.1.2  mrg 		    | X##_f0) != 0);					\
     87  1.1.1.2  mrg 	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));		\
     88  1.1.1.2  mrg 	      X##_f1 = 0;						\
     89  1.1.1.2  mrg 	    }))
     90  1.1.1.2  mrg 
     91  1.1.1.2  mrg #define _FP_FRAC_SRS_2(X, N, sz)					\
     92  1.1.1.2  mrg   (void) (((N) < _FP_W_TYPE_SIZE)					\
     93  1.1.1.2  mrg 	  ? ({								\
     94  1.1.1.2  mrg 	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
     95  1.1.1.2  mrg 			| (__builtin_constant_p (N) && (N) == 1		\
     96  1.1.1.2  mrg 			   ? X##_f0 & 1					\
     97  1.1.1.2  mrg 			   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
     98  1.1.1.2  mrg 	      X##_f1 >>= (N);						\
     99  1.1.1.2  mrg 	    })								\
    100  1.1.1.2  mrg 	  : ({								\
    101  1.1.1.2  mrg 	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)		\
    102  1.1.1.2  mrg 			| ((((N) == _FP_W_TYPE_SIZE			\
    103  1.1.1.2  mrg 			     ? 0					\
    104  1.1.1.2  mrg 			     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))	\
    105  1.1.1.2  mrg 			    | X##_f0) != 0));				\
    106  1.1.1.2  mrg 	      X##_f1 = 0;						\
    107  1.1.1.2  mrg 	    }))
    108  1.1.1.2  mrg 
    109  1.1.1.2  mrg #define _FP_FRAC_ADDI_2(X, I)	\
    110  1.1.1.2  mrg   __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
    111  1.1.1.2  mrg 
    112  1.1.1.2  mrg #define _FP_FRAC_ADD_2(R, X, Y)	\
    113  1.1.1.2  mrg   __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
    114  1.1.1.2  mrg 
    115  1.1.1.2  mrg #define _FP_FRAC_SUB_2(R, X, Y)	\
    116  1.1.1.2  mrg   __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
    117  1.1.1.2  mrg 
    118  1.1.1.2  mrg #define _FP_FRAC_DEC_2(X, Y)	\
    119  1.1.1.2  mrg   __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
    120  1.1.1.2  mrg 
    121  1.1.1.2  mrg #define _FP_FRAC_CLZ_2(R, X)			\
    122  1.1.1.2  mrg   do						\
    123  1.1.1.2  mrg     {						\
    124  1.1.1.2  mrg       if (X##_f1)				\
    125  1.1.1.2  mrg 	__FP_CLZ ((R), X##_f1);			\
    126  1.1.1.2  mrg       else					\
    127  1.1.1.2  mrg 	{					\
    128  1.1.1.2  mrg 	  __FP_CLZ ((R), X##_f0);		\
    129  1.1.1.2  mrg 	  (R) += _FP_W_TYPE_SIZE;		\
    130  1.1.1.2  mrg 	}					\
    131  1.1.1.2  mrg     }						\
    132  1.1.1.2  mrg   while (0)
    133      1.1  mrg 
    134  1.1.1.2  mrg /* Predicates.  */
    135  1.1.1.2  mrg #define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE) X##_f1 < 0)
    136      1.1  mrg #define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
    137  1.1.1.2  mrg #define _FP_FRAC_OVERP_2(fs, X)	(_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
    138  1.1.1.2  mrg #define _FP_FRAC_CLEAR_OVERP_2(fs, X)	(_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
    139  1.1.1.2  mrg #define _FP_FRAC_HIGHBIT_DW_2(fs, X)	\
    140  1.1.1.2  mrg   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
    141      1.1  mrg #define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
    142      1.1  mrg #define _FP_FRAC_GT_2(X, Y)	\
    143      1.1  mrg   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
    144      1.1  mrg #define _FP_FRAC_GE_2(X, Y)	\
    145      1.1  mrg   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
    146      1.1  mrg 
    147      1.1  mrg #define _FP_ZEROFRAC_2		0, 0
    148      1.1  mrg #define _FP_MINFRAC_2		0, 1
    149  1.1.1.2  mrg #define _FP_MAXFRAC_2		(~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
    150      1.1  mrg 
    151  1.1.1.2  mrg /* Internals.  */
    152  1.1.1.2  mrg 
    153  1.1.1.2  mrg #define __FP_FRAC_SET_2(X, I1, I0)	(X##_f0 = I0, X##_f1 = I1)
    154  1.1.1.2  mrg 
    155  1.1.1.2  mrg #define __FP_CLZ_2(R, xh, xl)			\
    156  1.1.1.2  mrg   do						\
    157  1.1.1.2  mrg     {						\
    158  1.1.1.2  mrg       if (xh)					\
    159  1.1.1.2  mrg 	__FP_CLZ ((R), xh);			\
    160  1.1.1.2  mrg       else					\
    161  1.1.1.2  mrg 	{					\
    162  1.1.1.2  mrg 	  __FP_CLZ ((R), xl);			\
    163  1.1.1.2  mrg 	  (R) += _FP_W_TYPE_SIZE;		\
    164  1.1.1.2  mrg 	}					\
    165  1.1.1.2  mrg     }						\
    166  1.1.1.2  mrg   while (0)
    167      1.1  mrg 
    168      1.1  mrg #if 0
    169      1.1  mrg 
    170  1.1.1.2  mrg # ifndef __FP_FRAC_ADDI_2
    171  1.1.1.2  mrg #  define __FP_FRAC_ADDI_2(xh, xl, i)	\
    172      1.1  mrg   (xh += ((xl += i) < i))
    173  1.1.1.2  mrg # endif
    174  1.1.1.2  mrg # ifndef __FP_FRAC_ADD_2
    175  1.1.1.2  mrg #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)	\
    176      1.1  mrg   (rh = xh + yh + ((rl = xl + yl) < xl))
    177  1.1.1.2  mrg # endif
    178  1.1.1.2  mrg # ifndef __FP_FRAC_SUB_2
    179  1.1.1.2  mrg #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)	\
    180      1.1  mrg   (rh = xh - yh - ((rl = xl - yl) > xl))
    181  1.1.1.2  mrg # endif
    182  1.1.1.2  mrg # ifndef __FP_FRAC_DEC_2
    183  1.1.1.2  mrg #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)		\
    184  1.1.1.2  mrg   do							\
    185  1.1.1.2  mrg     {							\
    186  1.1.1.2  mrg       UWtype __FP_FRAC_DEC_2_t = xl;			\
    187  1.1.1.2  mrg       xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t);	\
    188  1.1.1.2  mrg     }							\
    189  1.1.1.2  mrg   while (0)
    190  1.1.1.2  mrg # endif
    191      1.1  mrg 
    192      1.1  mrg #else
    193      1.1  mrg 
    194  1.1.1.2  mrg # undef __FP_FRAC_ADDI_2
    195  1.1.1.2  mrg # define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa (xh, xl, xh, xl, 0, i)
    196  1.1.1.2  mrg # undef __FP_FRAC_ADD_2
    197  1.1.1.2  mrg # define __FP_FRAC_ADD_2		add_ssaaaa
    198  1.1.1.2  mrg # undef __FP_FRAC_SUB_2
    199  1.1.1.2  mrg # define __FP_FRAC_SUB_2		sub_ddmmss
    200  1.1.1.2  mrg # undef __FP_FRAC_DEC_2
    201  1.1.1.2  mrg # define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
    202  1.1.1.2  mrg   sub_ddmmss (xh, xl, xh, xl, yh, yl)
    203      1.1  mrg 
    204      1.1  mrg #endif
    205      1.1  mrg 
    206  1.1.1.2  mrg /* Unpack the raw bits of a native fp value.  Do not classify or
    207  1.1.1.2  mrg    normalize the data.  */
    208      1.1  mrg 
    209      1.1  mrg #define _FP_UNPACK_RAW_2(fs, X, val)			\
    210  1.1.1.2  mrg   do							\
    211  1.1.1.2  mrg     {							\
    212  1.1.1.2  mrg       union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo;	\
    213  1.1.1.2  mrg       _FP_UNPACK_RAW_2_flo.flt = (val);			\
    214      1.1  mrg 							\
    215  1.1.1.2  mrg       X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0;		\
    216  1.1.1.2  mrg       X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1;		\
    217  1.1.1.2  mrg       X##_e  = _FP_UNPACK_RAW_2_flo.bits.exp;		\
    218  1.1.1.2  mrg       X##_s  = _FP_UNPACK_RAW_2_flo.bits.sign;		\
    219  1.1.1.2  mrg     }							\
    220  1.1.1.2  mrg   while (0)
    221      1.1  mrg 
    222      1.1  mrg #define _FP_UNPACK_RAW_2_P(fs, X, val)			\
    223  1.1.1.2  mrg   do							\
    224  1.1.1.2  mrg     {							\
    225  1.1.1.2  mrg       union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo	\
    226  1.1.1.2  mrg 	= (union _FP_UNION_##fs *) (val);		\
    227      1.1  mrg 							\
    228  1.1.1.2  mrg       X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0;	\
    229  1.1.1.2  mrg       X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1;	\
    230  1.1.1.2  mrg       X##_e  = _FP_UNPACK_RAW_2_P_flo->bits.exp;	\
    231  1.1.1.2  mrg       X##_s  = _FP_UNPACK_RAW_2_P_flo->bits.sign;	\
    232  1.1.1.2  mrg     }							\
    233  1.1.1.2  mrg   while (0)
    234  1.1.1.2  mrg 
    235  1.1.1.2  mrg 
    236  1.1.1.2  mrg /* Repack the raw bits of a native fp value.  */
    237  1.1.1.2  mrg 
    238  1.1.1.2  mrg #define _FP_PACK_RAW_2(fs, val, X)		\
    239  1.1.1.2  mrg   do						\
    240  1.1.1.2  mrg     {						\
    241  1.1.1.2  mrg       union _FP_UNION_##fs _FP_PACK_RAW_2_flo;	\
    242  1.1.1.2  mrg 						\
    243  1.1.1.2  mrg       _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0;	\
    244  1.1.1.2  mrg       _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1;	\
    245  1.1.1.2  mrg       _FP_PACK_RAW_2_flo.bits.exp   = X##_e;	\
    246  1.1.1.2  mrg       _FP_PACK_RAW_2_flo.bits.sign  = X##_s;	\
    247  1.1.1.2  mrg 						\
    248  1.1.1.2  mrg       (val) = _FP_PACK_RAW_2_flo.flt;		\
    249  1.1.1.2  mrg     }						\
    250  1.1.1.2  mrg   while (0)
    251      1.1  mrg 
    252      1.1  mrg #define _FP_PACK_RAW_2_P(fs, val, X)			\
    253  1.1.1.2  mrg   do							\
    254  1.1.1.2  mrg     {							\
    255  1.1.1.2  mrg       union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo	\
    256  1.1.1.2  mrg 	= (union _FP_UNION_##fs *) (val);		\
    257      1.1  mrg 							\
    258  1.1.1.2  mrg       _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0;	\
    259  1.1.1.2  mrg       _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1;	\
    260  1.1.1.2  mrg       _FP_PACK_RAW_2_P_flo->bits.exp   = X##_e;		\
    261  1.1.1.2  mrg       _FP_PACK_RAW_2_P_flo->bits.sign  = X##_s;		\
    262  1.1.1.2  mrg     }							\
    263  1.1.1.2  mrg   while (0)
    264      1.1  mrg 
    265      1.1  mrg 
    266  1.1.1.2  mrg /* Multiplication algorithms: */
    267      1.1  mrg 
    268      1.1  mrg /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
    269      1.1  mrg 
    270  1.1.1.2  mrg #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)		\
    271  1.1.1.2  mrg   do									\
    272  1.1.1.2  mrg     {									\
    273  1.1.1.2  mrg       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b);			\
    274  1.1.1.2  mrg       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c);			\
    275  1.1.1.2  mrg 									\
    276  1.1.1.2  mrg       doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0),		\
    277  1.1.1.2  mrg 	    X##_f0, Y##_f0);						\
    278  1.1.1.2  mrg       doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0,	\
    279  1.1.1.2  mrg 	    X##_f0, Y##_f1);						\
    280  1.1.1.2  mrg       doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0,	\
    281  1.1.1.2  mrg 	    X##_f1, Y##_f0);						\
    282  1.1.1.2  mrg       doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),		\
    283  1.1.1.2  mrg 	    X##_f1, Y##_f1);						\
    284  1.1.1.2  mrg 									\
    285  1.1.1.2  mrg       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    286  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 1), 0,			\
    287  1.1.1.2  mrg 		       _FP_MUL_MEAT_DW_2_wide_b_f1,			\
    288  1.1.1.2  mrg 		       _FP_MUL_MEAT_DW_2_wide_b_f0,			\
    289  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    290  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 1));				\
    291  1.1.1.2  mrg       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    292  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 1), 0,			\
    293  1.1.1.2  mrg 		       _FP_MUL_MEAT_DW_2_wide_c_f1,			\
    294  1.1.1.2  mrg 		       _FP_MUL_MEAT_DW_2_wide_c_f0,			\
    295  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    296  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 1));				\
    297  1.1.1.2  mrg     }									\
    298  1.1.1.2  mrg   while (0)
    299  1.1.1.2  mrg 
    300      1.1  mrg #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
    301  1.1.1.2  mrg   do									\
    302  1.1.1.2  mrg     {									\
    303  1.1.1.2  mrg       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z);				\
    304  1.1.1.2  mrg 									\
    305  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z,	\
    306  1.1.1.2  mrg 			      X, Y, doit);				\
    307  1.1.1.2  mrg 									\
    308  1.1.1.2  mrg       /* Normalize since we know where the msb of the multiplicands	\
    309  1.1.1.2  mrg 	 were (bit B), we know that the msb of the of the product is	\
    310  1.1.1.2  mrg 	 at either 2B or 2B-1.  */					\
    311  1.1.1.2  mrg       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1,		\
    312  1.1.1.2  mrg 		      2*(wfracbits));					\
    313  1.1.1.2  mrg       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0);		\
    314  1.1.1.2  mrg       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1);		\
    315  1.1.1.2  mrg     }									\
    316  1.1.1.2  mrg   while (0)
    317      1.1  mrg 
    318      1.1  mrg /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
    319      1.1  mrg    Do only 3 multiplications instead of four. This one is for machines
    320      1.1  mrg    where multiplication is much more expensive than subtraction.  */
    321      1.1  mrg 
    322  1.1.1.2  mrg #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)		\
    323  1.1.1.2  mrg   do									\
    324  1.1.1.2  mrg     {									\
    325  1.1.1.2  mrg       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b);			\
    326  1.1.1.2  mrg       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c);			\
    327  1.1.1.2  mrg       _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d;				\
    328  1.1.1.2  mrg       int _FP_MUL_MEAT_DW_2_wide_3mul_c1;				\
    329  1.1.1.2  mrg       int _FP_MUL_MEAT_DW_2_wide_3mul_c2;				\
    330  1.1.1.2  mrg 									\
    331  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1;		\
    332  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_wide_3mul_c1					\
    333  1.1.1.2  mrg 	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0;			\
    334  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1;		\
    335  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_wide_3mul_c2					\
    336  1.1.1.2  mrg 	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0;			\
    337  1.1.1.2  mrg       doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0),	\
    338  1.1.1.2  mrg 	    X##_f0, Y##_f0);						\
    339  1.1.1.2  mrg       doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1),		\
    340  1.1.1.2  mrg 	    _FP_MUL_MEAT_DW_2_wide_3mul_b_f0,				\
    341  1.1.1.2  mrg 	    _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);				\
    342  1.1.1.2  mrg       doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1,				\
    343  1.1.1.2  mrg 	    _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1);		\
    344  1.1.1.2  mrg 									\
    345  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0					\
    346  1.1.1.2  mrg 	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c2;				\
    347  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1					\
    348  1.1.1.2  mrg 	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c1;				\
    349  1.1.1.2  mrg       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    350  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 1),				\
    351  1.1.1.2  mrg 		       (_FP_MUL_MEAT_DW_2_wide_3mul_c1			\
    352  1.1.1.2  mrg 			& _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0,		\
    353  1.1.1.2  mrg 		       _FP_MUL_MEAT_DW_2_wide_3mul_d,			\
    354  1.1.1.2  mrg 		       0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
    355  1.1.1.2  mrg       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    356  1.1.1.2  mrg 			_FP_MUL_MEAT_DW_2_wide_3mul_b_f0);		\
    357  1.1.1.2  mrg       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    358  1.1.1.2  mrg 			_FP_MUL_MEAT_DW_2_wide_3mul_b_f1);		\
    359  1.1.1.2  mrg       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    360  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 1),				\
    361  1.1.1.2  mrg 		       0, _FP_MUL_MEAT_DW_2_wide_3mul_d,		\
    362  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 0));				\
    363  1.1.1.2  mrg       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    364  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 1), 0,			\
    365  1.1.1.2  mrg 		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,		\
    366  1.1.1.2  mrg 		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f0);		\
    367  1.1.1.2  mrg       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
    368  1.1.1.2  mrg 		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,		\
    369  1.1.1.2  mrg 		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f0,		\
    370  1.1.1.2  mrg 		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2));	\
    371  1.1.1.2  mrg     }									\
    372  1.1.1.2  mrg   while (0)
    373  1.1.1.2  mrg 
    374      1.1  mrg #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
    375  1.1.1.2  mrg   do									\
    376  1.1.1.2  mrg     {									\
    377  1.1.1.2  mrg       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z);			\
    378  1.1.1.2  mrg 									\
    379  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits),				\
    380  1.1.1.2  mrg 				   _FP_MUL_MEAT_2_wide_3mul_z,		\
    381  1.1.1.2  mrg 				   X, Y, doit);				\
    382  1.1.1.2  mrg 									\
    383  1.1.1.2  mrg       /* Normalize since we know where the msb of the multiplicands	\
    384  1.1.1.2  mrg 	 were (bit B), we know that the msb of the of the product is	\
    385  1.1.1.2  mrg 	 at either 2B or 2B-1.  */					\
    386  1.1.1.2  mrg       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z,			\
    387  1.1.1.2  mrg 		      (wfracbits)-1, 2*(wfracbits));			\
    388  1.1.1.2  mrg       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0);		\
    389  1.1.1.2  mrg       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1);		\
    390  1.1.1.2  mrg     }									\
    391  1.1.1.2  mrg   while (0)
    392  1.1.1.2  mrg 
    393  1.1.1.2  mrg #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)	\
    394  1.1.1.2  mrg   do							\
    395  1.1.1.2  mrg     {							\
    396  1.1.1.2  mrg       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2];		\
    397  1.1.1.2  mrg       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2];		\
    398  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0;		\
    399  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1;		\
    400  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0;		\
    401  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1;		\
    402  1.1.1.2  mrg 							\
    403  1.1.1.2  mrg       mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x,	\
    404  1.1.1.2  mrg 		 _FP_MUL_MEAT_DW_2_gmp_y, 2);		\
    405  1.1.1.2  mrg     }							\
    406  1.1.1.2  mrg   while (0)
    407      1.1  mrg 
    408      1.1  mrg #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
    409  1.1.1.2  mrg   do									\
    410  1.1.1.2  mrg     {									\
    411  1.1.1.2  mrg       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z);				\
    412  1.1.1.2  mrg 									\
    413  1.1.1.2  mrg       _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y);	\
    414  1.1.1.2  mrg 									\
    415  1.1.1.2  mrg       /* Normalize since we know where the msb of the multiplicands	\
    416  1.1.1.2  mrg 	 were (bit B), we know that the msb of the of the product is	\
    417  1.1.1.2  mrg 	 at either 2B or 2B-1.  */					\
    418  1.1.1.2  mrg       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1,		\
    419  1.1.1.2  mrg 		      2*(wfracbits));					\
    420  1.1.1.2  mrg       R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0];				\
    421  1.1.1.2  mrg       R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1];				\
    422  1.1.1.2  mrg     }									\
    423  1.1.1.2  mrg   while (0)
    424      1.1  mrg 
    425      1.1  mrg /* Do at most 120x120=240 bits multiplication using double floating
    426      1.1  mrg    point multiplication.  This is useful if floating point
    427      1.1  mrg    multiplication has much bigger throughput than integer multiply.
    428      1.1  mrg    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
    429  1.1.1.2  mrg    between 106 and 120 only.
    430      1.1  mrg    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
    431      1.1  mrg    SETFETZ is a macro which will disable all FPU exceptions and set rounding
    432      1.1  mrg    towards zero,  RESETFE should optionally reset it back.  */
    433      1.1  mrg 
    434  1.1.1.2  mrg #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
    435  1.1.1.2  mrg   do									\
    436  1.1.1.2  mrg     {									\
    437  1.1.1.2  mrg       static const double _const[] =					\
    438  1.1.1.2  mrg 	{								\
    439  1.1.1.2  mrg 	  /* 2^-24 */ 5.9604644775390625e-08,				\
    440  1.1.1.2  mrg 	  /* 2^-48 */ 3.5527136788005009e-15,				\
    441  1.1.1.2  mrg 	  /* 2^-72 */ 2.1175823681357508e-22,				\
    442  1.1.1.2  mrg 	  /* 2^-96 */ 1.2621774483536189e-29,				\
    443  1.1.1.2  mrg 	  /* 2^28 */ 2.68435456e+08,					\
    444  1.1.1.2  mrg 	  /* 2^4 */ 1.600000e+01,					\
    445  1.1.1.2  mrg 	  /* 2^-20 */ 9.5367431640625e-07,				\
    446  1.1.1.2  mrg 	  /* 2^-44 */ 5.6843418860808015e-14,				\
    447  1.1.1.2  mrg 	  /* 2^-68 */ 3.3881317890172014e-21,				\
    448  1.1.1.2  mrg 	  /* 2^-92 */ 2.0194839173657902e-28,				\
    449  1.1.1.2  mrg 	  /* 2^-116 */ 1.2037062152420224e-35				\
    450  1.1.1.2  mrg 	};								\
    451  1.1.1.2  mrg       double _a240, _b240, _c240, _d240, _e240, _f240,			\
    452  1.1.1.2  mrg 	_g240, _h240, _i240, _j240, _k240;				\
    453  1.1.1.2  mrg       union { double d; UDItype i; } _l240, _m240, _n240, _o240,	\
    454  1.1.1.2  mrg 				       _p240, _q240, _r240, _s240;	\
    455  1.1.1.2  mrg       UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;		\
    456  1.1.1.2  mrg 									\
    457  1.1.1.3  mrg       _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120,	\
    458  1.1.1.3  mrg 			 "wfracbits out of range");			\
    459  1.1.1.2  mrg 									\
    460  1.1.1.2  mrg       setfetz;								\
    461  1.1.1.2  mrg 									\
    462  1.1.1.2  mrg       _e240 = (double) (long) (X##_f0 & 0xffffff);			\
    463  1.1.1.2  mrg       _j240 = (double) (long) (Y##_f0 & 0xffffff);			\
    464  1.1.1.2  mrg       _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);		\
    465  1.1.1.2  mrg       _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);		\
    466  1.1.1.2  mrg       _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
    467  1.1.1.2  mrg       _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
    468  1.1.1.2  mrg       _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);		\
    469  1.1.1.2  mrg       _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);		\
    470  1.1.1.2  mrg       _a240 = (double) (long) (X##_f1 >> 32);				\
    471  1.1.1.2  mrg       _f240 = (double) (long) (Y##_f1 >> 32);				\
    472  1.1.1.2  mrg       _e240 *= _const[3];						\
    473  1.1.1.2  mrg       _j240 *= _const[3];						\
    474  1.1.1.2  mrg       _d240 *= _const[2];						\
    475  1.1.1.2  mrg       _i240 *= _const[2];						\
    476  1.1.1.2  mrg       _c240 *= _const[1];						\
    477  1.1.1.2  mrg       _h240 *= _const[1];						\
    478  1.1.1.2  mrg       _b240 *= _const[0];						\
    479  1.1.1.2  mrg       _g240 *= _const[0];						\
    480  1.1.1.2  mrg       _s240.d =							      _e240*_j240; \
    481  1.1.1.2  mrg       _r240.d =						_d240*_j240 + _e240*_i240; \
    482  1.1.1.2  mrg       _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240; \
    483  1.1.1.2  mrg       _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
    484  1.1.1.2  mrg       _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
    485  1.1.1.2  mrg       _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;	\
    486  1.1.1.2  mrg       _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;		\
    487  1.1.1.2  mrg       _l240.d = _a240*_g240 + _b240*_f240;				\
    488  1.1.1.2  mrg       _k240 =   _a240*_f240;						\
    489  1.1.1.2  mrg       _r240.d += _s240.d;						\
    490  1.1.1.2  mrg       _q240.d += _r240.d;						\
    491  1.1.1.2  mrg       _p240.d += _q240.d;						\
    492  1.1.1.2  mrg       _o240.d += _p240.d;						\
    493  1.1.1.2  mrg       _n240.d += _o240.d;						\
    494  1.1.1.2  mrg       _m240.d += _n240.d;						\
    495  1.1.1.2  mrg       _l240.d += _m240.d;						\
    496  1.1.1.2  mrg       _k240 += _l240.d;							\
    497  1.1.1.2  mrg       _s240.d -= ((_const[10]+_s240.d)-_const[10]);			\
    498  1.1.1.2  mrg       _r240.d -= ((_const[9]+_r240.d)-_const[9]);			\
    499  1.1.1.2  mrg       _q240.d -= ((_const[8]+_q240.d)-_const[8]);			\
    500  1.1.1.2  mrg       _p240.d -= ((_const[7]+_p240.d)-_const[7]);			\
    501  1.1.1.2  mrg       _o240.d += _const[7];						\
    502  1.1.1.2  mrg       _n240.d += _const[6];						\
    503  1.1.1.2  mrg       _m240.d += _const[5];						\
    504  1.1.1.2  mrg       _l240.d += _const[4];						\
    505  1.1.1.2  mrg       if (_s240.d != 0.0)						\
    506  1.1.1.2  mrg 	_y240 = 1;							\
    507  1.1.1.2  mrg       if (_r240.d != 0.0)						\
    508  1.1.1.2  mrg 	_y240 = 1;							\
    509  1.1.1.2  mrg       if (_q240.d != 0.0)						\
    510  1.1.1.2  mrg 	_y240 = 1;							\
    511  1.1.1.2  mrg       if (_p240.d != 0.0)						\
    512  1.1.1.2  mrg 	_y240 = 1;							\
    513  1.1.1.2  mrg       _t240 = (DItype) _k240;						\
    514  1.1.1.2  mrg       _u240 = _l240.i;							\
    515  1.1.1.2  mrg       _v240 = _m240.i;							\
    516  1.1.1.2  mrg       _w240 = _n240.i;							\
    517  1.1.1.2  mrg       _x240 = _o240.i;							\
    518  1.1.1.2  mrg       R##_f1 = ((_t240 << (128 - (wfracbits - 1)))			\
    519  1.1.1.2  mrg 		| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));	\
    520  1.1.1.2  mrg       R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))		\
    521  1.1.1.2  mrg 		| ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))	\
    522  1.1.1.2  mrg 		| ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))	\
    523  1.1.1.2  mrg 		| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))	\
    524  1.1.1.2  mrg 		| _y240);						\
    525  1.1.1.2  mrg       resetfe;								\
    526  1.1.1.2  mrg     }									\
    527  1.1.1.2  mrg   while (0)
    528  1.1.1.2  mrg 
    529  1.1.1.2  mrg /* Division algorithms: */
    530      1.1  mrg 
    531      1.1  mrg #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
    532  1.1.1.2  mrg   do									\
    533  1.1.1.2  mrg     {									\
    534  1.1.1.2  mrg       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2;				\
    535  1.1.1.2  mrg       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1;				\
    536  1.1.1.2  mrg       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0;				\
    537  1.1.1.2  mrg       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1;				\
    538  1.1.1.2  mrg       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0;				\
    539  1.1.1.2  mrg       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1;				\
    540  1.1.1.2  mrg       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0;				\
    541  1.1.1.2  mrg       if (_FP_FRAC_GE_2 (X, Y))						\
    542  1.1.1.2  mrg 	{								\
    543  1.1.1.2  mrg 	  _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1;			\
    544  1.1.1.2  mrg 	  _FP_DIV_MEAT_2_udiv_n_f1					\
    545  1.1.1.2  mrg 	    = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
    546  1.1.1.2  mrg 	  _FP_DIV_MEAT_2_udiv_n_f0					\
    547  1.1.1.2  mrg 	    = X##_f0 << (_FP_W_TYPE_SIZE - 1);				\
    548  1.1.1.2  mrg 	}								\
    549  1.1.1.2  mrg       else								\
    550  1.1.1.2  mrg 	{								\
    551  1.1.1.2  mrg 	  R##_e--;							\
    552  1.1.1.2  mrg 	  _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1;				\
    553  1.1.1.2  mrg 	  _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0;				\
    554  1.1.1.2  mrg 	  _FP_DIV_MEAT_2_udiv_n_f0 = 0;					\
    555  1.1.1.2  mrg 	}								\
    556  1.1.1.2  mrg 									\
    557  1.1.1.2  mrg       /* Normalize, i.e. make the most significant bit of the		\
    558  1.1.1.2  mrg 	 denominator set.  */						\
    559  1.1.1.2  mrg       _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);				\
    560  1.1.1.2  mrg 									\
    561  1.1.1.2  mrg       udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1,			\
    562  1.1.1.2  mrg 		  _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1,	\
    563  1.1.1.2  mrg 		  Y##_f1);						\
    564  1.1.1.2  mrg       umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0,	\
    565  1.1.1.2  mrg 		 R##_f1, Y##_f0);					\
    566  1.1.1.2  mrg       _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0;		\
    567  1.1.1.2  mrg       if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r))	\
    568  1.1.1.2  mrg 	{								\
    569  1.1.1.2  mrg 	  R##_f1--;							\
    570  1.1.1.2  mrg 	  _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,			\
    571  1.1.1.2  mrg 			  _FP_DIV_MEAT_2_udiv_r);			\
    572  1.1.1.2  mrg 	  if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)			\
    573  1.1.1.2  mrg 	      && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,			\
    574  1.1.1.2  mrg 				_FP_DIV_MEAT_2_udiv_r))			\
    575  1.1.1.2  mrg 	    {								\
    576  1.1.1.2  mrg 	      R##_f1--;							\
    577  1.1.1.2  mrg 	      _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,			\
    578  1.1.1.2  mrg 			      _FP_DIV_MEAT_2_udiv_r);			\
    579  1.1.1.2  mrg 	    }								\
    580  1.1.1.2  mrg 	}								\
    581  1.1.1.2  mrg       _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m);	\
    582  1.1.1.2  mrg 									\
    583  1.1.1.2  mrg       if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1)				\
    584  1.1.1.2  mrg 	{								\
    585  1.1.1.2  mrg 	  /* This is a special case, not an optimization		\
    586  1.1.1.2  mrg 	     (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype).	\
    587  1.1.1.2  mrg 	     As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y,		\
    588  1.1.1.2  mrg 	     R##_f0 can be either (UWtype)-1 or (UWtype)-2.  But as we	\
    589  1.1.1.2  mrg 	     know what kind of bits it is (sticky, guard, round),	\
    590  1.1.1.2  mrg 	     we don't care.  We also don't care what the reminder is,	\
    591  1.1.1.2  mrg 	     because the guard bit will be set anyway.  -jj */		\
    592  1.1.1.2  mrg 	  R##_f0 = -1;							\
    593  1.1.1.2  mrg 	}								\
    594  1.1.1.2  mrg       else								\
    595  1.1.1.2  mrg 	{								\
    596  1.1.1.2  mrg 	  udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1,			\
    597  1.1.1.2  mrg 		      _FP_DIV_MEAT_2_udiv_r_f1,				\
    598  1.1.1.2  mrg 		      _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1);		\
    599  1.1.1.2  mrg 	  umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1,				\
    600  1.1.1.2  mrg 		     _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0);		\
    601  1.1.1.2  mrg 	  _FP_DIV_MEAT_2_udiv_r_f0 = 0;					\
    602  1.1.1.2  mrg 	  if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,			\
    603  1.1.1.2  mrg 			     _FP_DIV_MEAT_2_udiv_r))			\
    604  1.1.1.2  mrg 	    {								\
    605  1.1.1.2  mrg 	      R##_f0--;							\
    606  1.1.1.2  mrg 	      _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,			\
    607  1.1.1.2  mrg 			      _FP_DIV_MEAT_2_udiv_r);			\
    608  1.1.1.2  mrg 	      if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)		\
    609  1.1.1.2  mrg 		  && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,		\
    610  1.1.1.2  mrg 				    _FP_DIV_MEAT_2_udiv_r))		\
    611  1.1.1.2  mrg 		{							\
    612  1.1.1.2  mrg 		  R##_f0--;						\
    613  1.1.1.2  mrg 		  _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,		\
    614  1.1.1.2  mrg 				  _FP_DIV_MEAT_2_udiv_r);		\
    615  1.1.1.2  mrg 		}							\
    616  1.1.1.2  mrg 	    }								\
    617  1.1.1.2  mrg 	  if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r,			\
    618  1.1.1.2  mrg 			      _FP_DIV_MEAT_2_udiv_m))			\
    619  1.1.1.2  mrg 	    R##_f0 |= _FP_WORK_STICKY;					\
    620  1.1.1.2  mrg 	}								\
    621  1.1.1.2  mrg     }									\
    622  1.1.1.2  mrg   while (0)
    623  1.1.1.2  mrg 
    624  1.1.1.2  mrg 
    625  1.1.1.2  mrg /* Square root algorithms:
    626  1.1.1.2  mrg    We have just one right now, maybe Newton approximation
    627  1.1.1.2  mrg    should be added for those machines where division is fast.  */
    628  1.1.1.2  mrg 
    629  1.1.1.2  mrg #define _FP_SQRT_MEAT_2(R, S, T, X, q)				\
    630  1.1.1.2  mrg   do								\
    631  1.1.1.2  mrg     {								\
    632  1.1.1.2  mrg       while (q)							\
    633  1.1.1.2  mrg 	{							\
    634  1.1.1.2  mrg 	  T##_f1 = S##_f1 + (q);				\
    635  1.1.1.2  mrg 	  if (T##_f1 <= X##_f1)					\
    636  1.1.1.2  mrg 	    {							\
    637  1.1.1.2  mrg 	      S##_f1 = T##_f1 + (q);				\
    638  1.1.1.2  mrg 	      X##_f1 -= T##_f1;					\
    639  1.1.1.2  mrg 	      R##_f1 += (q);					\
    640  1.1.1.2  mrg 	    }							\
    641  1.1.1.2  mrg 	  _FP_FRAC_SLL_2 (X, 1);				\
    642  1.1.1.2  mrg 	  (q) >>= 1;						\
    643  1.1.1.2  mrg 	}							\
    644  1.1.1.2  mrg       (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);		\
    645  1.1.1.2  mrg       while ((q) != _FP_WORK_ROUND)				\
    646  1.1.1.2  mrg 	{							\
    647  1.1.1.2  mrg 	  T##_f0 = S##_f0 + (q);				\
    648  1.1.1.2  mrg 	  T##_f1 = S##_f1;					\
    649  1.1.1.2  mrg 	  if (T##_f1 < X##_f1					\
    650  1.1.1.2  mrg 	      || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
    651  1.1.1.2  mrg 	    {							\
    652  1.1.1.2  mrg 	      S##_f0 = T##_f0 + (q);				\
    653  1.1.1.2  mrg 	      S##_f1 += (T##_f0 > S##_f0);			\
    654  1.1.1.2  mrg 	      _FP_FRAC_DEC_2 (X, T);				\
    655  1.1.1.2  mrg 	      R##_f0 += (q);					\
    656  1.1.1.2  mrg 	    }							\
    657  1.1.1.2  mrg 	  _FP_FRAC_SLL_2 (X, 1);				\
    658  1.1.1.2  mrg 	  (q) >>= 1;						\
    659  1.1.1.2  mrg 	}							\
    660  1.1.1.2  mrg       if (X##_f0 | X##_f1)					\
    661  1.1.1.2  mrg 	{							\
    662  1.1.1.2  mrg 	  if (S##_f1 < X##_f1					\
    663  1.1.1.2  mrg 	      || (S##_f1 == X##_f1 && S##_f0 < X##_f0))		\
    664  1.1.1.2  mrg 	    R##_f0 |= _FP_WORK_ROUND;				\
    665  1.1.1.2  mrg 	  R##_f0 |= _FP_WORK_STICKY;				\
    666  1.1.1.2  mrg 	}							\
    667  1.1.1.2  mrg     }								\
    668  1.1.1.2  mrg   while (0)
    669  1.1.1.2  mrg 
    670  1.1.1.2  mrg 
    671  1.1.1.2  mrg /* Assembly/disassembly for converting to/from integral types.
    672  1.1.1.2  mrg    No shifting or overflow handled here.  */
    673      1.1  mrg 
    674      1.1  mrg #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
    675  1.1.1.2  mrg   (void) (((rsize) <= _FP_W_TYPE_SIZE)		\
    676  1.1.1.2  mrg 	  ? ({ (r) = X##_f0; })			\
    677  1.1.1.2  mrg 	  : ({					\
    678  1.1.1.2  mrg 	      (r) = X##_f1;			\
    679  1.1.1.2  mrg 	      (r) <<= _FP_W_TYPE_SIZE;		\
    680  1.1.1.2  mrg 	      (r) += X##_f0;			\
    681  1.1.1.2  mrg 	    }))
    682  1.1.1.2  mrg 
    683  1.1.1.2  mrg #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)	\
    684  1.1.1.2  mrg   do						\
    685  1.1.1.2  mrg     {						\
    686  1.1.1.2  mrg       X##_f0 = (r);				\
    687  1.1.1.2  mrg       X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE	\
    688  1.1.1.2  mrg 		? 0				\
    689  1.1.1.2  mrg 		: (r) >> _FP_W_TYPE_SIZE);	\
    690  1.1.1.2  mrg     }						\
    691  1.1.1.2  mrg   while (0)
    692  1.1.1.2  mrg 
    693  1.1.1.2  mrg /* Convert FP values between word sizes.  */
    694      1.1  mrg 
    695      1.1  mrg #define _FP_FRAC_COPY_1_2(D, S)		(D##_f = S##_f0)
    696      1.1  mrg 
    697      1.1  mrg #define _FP_FRAC_COPY_2_1(D, S)		((D##_f0 = S##_f), (D##_f1 = 0))
    698      1.1  mrg 
    699  1.1.1.2  mrg #define _FP_FRAC_COPY_2_2(D, S)		_FP_FRAC_COPY_2 (D, S)
    700  1.1.1.3  mrg 
    701  1.1.1.3  mrg #endif /* !SOFT_FP_OP_2_H */
    702