1 1.1 mrg /* Software floating-point emulation. 2 1.1 mrg Basic two-word fraction declaration and manipulation. 3 1.1.1.7 mrg Copyright (C) 1997-2022 Free Software Foundation, Inc. 4 1.1 mrg This file is part of the GNU C Library. 5 1.1 mrg 6 1.1 mrg The GNU C Library is free software; you can redistribute it and/or 7 1.1 mrg modify it under the terms of the GNU Lesser General Public 8 1.1 mrg License as published by the Free Software Foundation; either 9 1.1 mrg version 2.1 of the License, or (at your option) any later version. 10 1.1 mrg 11 1.1 mrg In addition to the permissions in the GNU Lesser General Public 12 1.1 mrg License, the Free Software Foundation gives you unlimited 13 1.1 mrg permission to link the compiled version of this file into 14 1.1 mrg combinations with other programs, and to distribute those 15 1.1 mrg combinations without any restriction coming from the use of this 16 1.1 mrg file. (The Lesser General Public License restrictions do apply in 17 1.1 mrg other respects; for example, they cover modification of the file, 18 1.1 mrg and distribution when not linked into a combine executable.) 19 1.1 mrg 20 1.1 mrg The GNU C Library is distributed in the hope that it will be useful, 21 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 22 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 1.1 mrg Lesser General Public License for more details. 24 1.1 mrg 25 1.1 mrg You should have received a copy of the GNU Lesser General Public 26 1.1 mrg License along with the GNU C Library; if not, see 27 1.1.1.7 mrg <https://www.gnu.org/licenses/>. */ 28 1.1 mrg 29 1.1.1.3 mrg #ifndef SOFT_FP_OP_2_H 30 1.1.1.3 mrg #define SOFT_FP_OP_2_H 1 31 1.1.1.3 mrg 32 1.1.1.3 mrg #define _FP_FRAC_DECL_2(X) \ 33 1.1.1.3 mrg _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT 34 1.1.1.2 mrg #define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1) 35 1.1.1.2 mrg #define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I) 36 1.1 mrg #define _FP_FRAC_HIGH_2(X) (X##_f1) 37 1.1 mrg #define _FP_FRAC_LOW_2(X) (X##_f0) 38 1.1.1.2 mrg #define _FP_FRAC_WORD_2(X, w) (X##_f##w) 39 1.1 mrg 40 1.1.1.2 mrg #define _FP_FRAC_SLL_2(X, N) \ 41 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \ 42 1.1.1.2 mrg ? ({ \ 43 1.1.1.2 mrg if (__builtin_constant_p (N) && (N) == 1) \ 44 1.1.1.2 mrg { \ 45 1.1.1.2 mrg X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \ 46 1.1.1.2 mrg X##_f0 += X##_f0; \ 47 1.1.1.2 mrg } \ 48 1.1.1.2 mrg else \ 49 1.1.1.2 mrg { \ 50 1.1.1.2 mrg X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ 51 1.1.1.2 mrg X##_f0 <<= (N); \ 52 1.1.1.2 mrg } \ 53 1.1.1.2 mrg 0; \ 54 1.1.1.2 mrg }) \ 55 1.1.1.2 mrg : ({ \ 56 1.1.1.2 mrg X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ 57 1.1.1.2 mrg X##_f0 = 0; \ 58 1.1.1.2 mrg })) 59 1.1.1.2 mrg 60 1.1.1.2 mrg 61 1.1.1.2 mrg #define _FP_FRAC_SRL_2(X, N) \ 62 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \ 63 1.1.1.2 mrg ? ({ \ 64 1.1.1.2 mrg X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ 65 1.1.1.2 mrg X##_f1 >>= (N); \ 66 1.1.1.2 mrg }) \ 67 1.1.1.2 mrg : ({ \ 68 1.1.1.2 mrg X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ 69 1.1.1.2 mrg X##_f1 = 0; \ 70 1.1.1.2 mrg })) 71 1.1 mrg 72 1.1 mrg /* Right shift with sticky-lsb. */ 73 1.1.1.2 mrg #define _FP_FRAC_SRST_2(X, S, N, sz) \ 74 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \ 75 1.1.1.2 mrg ? ({ \ 76 1.1.1.2 mrg S = (__builtin_constant_p (N) && (N) == 1 \ 77 1.1.1.2 mrg ? X##_f0 & 1 \ 78 1.1.1.2 mrg : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \ 79 1.1.1.2 mrg X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \ 80 1.1.1.2 mrg X##_f1 >>= (N); \ 81 1.1.1.2 mrg }) \ 82 1.1.1.2 mrg : ({ \ 83 1.1.1.2 mrg S = ((((N) == _FP_W_TYPE_SIZE \ 84 1.1.1.2 mrg ? 0 \ 85 1.1.1.2 mrg : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \ 86 1.1.1.2 mrg | X##_f0) != 0); \ 87 1.1.1.2 mrg X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \ 88 1.1.1.2 mrg X##_f1 = 0; \ 89 1.1.1.2 mrg })) 90 1.1.1.2 mrg 91 1.1.1.2 mrg #define _FP_FRAC_SRS_2(X, N, sz) \ 92 1.1.1.2 mrg (void) (((N) < _FP_W_TYPE_SIZE) \ 93 1.1.1.2 mrg ? ({ \ 94 1.1.1.2 mrg X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \ 95 1.1.1.2 mrg | (__builtin_constant_p (N) && (N) == 1 \ 96 1.1.1.2 mrg ? X##_f0 & 1 \ 97 1.1.1.2 mrg : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ 98 1.1.1.2 mrg X##_f1 >>= (N); \ 99 1.1.1.2 mrg }) \ 100 1.1.1.2 mrg : ({ \ 101 1.1.1.2 mrg X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \ 102 1.1.1.2 mrg | ((((N) == _FP_W_TYPE_SIZE \ 103 1.1.1.2 mrg ? 0 \ 104 1.1.1.2 mrg : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \ 105 1.1.1.2 mrg | X##_f0) != 0)); \ 106 1.1.1.2 mrg X##_f1 = 0; \ 107 1.1.1.2 mrg })) 108 1.1.1.2 mrg 109 1.1.1.2 mrg #define _FP_FRAC_ADDI_2(X, I) \ 110 1.1.1.2 mrg __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I) 111 1.1.1.2 mrg 112 1.1.1.2 mrg #define _FP_FRAC_ADD_2(R, X, Y) \ 113 1.1.1.2 mrg __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 114 1.1.1.2 mrg 115 1.1.1.2 mrg #define _FP_FRAC_SUB_2(R, X, Y) \ 116 1.1.1.2 mrg __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 117 1.1.1.2 mrg 118 1.1.1.2 mrg #define _FP_FRAC_DEC_2(X, Y) \ 119 1.1.1.2 mrg __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0) 120 1.1.1.2 mrg 121 1.1.1.2 mrg #define _FP_FRAC_CLZ_2(R, X) \ 122 1.1.1.2 mrg do \ 123 1.1.1.2 mrg { \ 124 1.1.1.2 mrg if (X##_f1) \ 125 1.1.1.2 mrg __FP_CLZ ((R), X##_f1); \ 126 1.1.1.2 mrg else \ 127 1.1.1.2 mrg { \ 128 1.1.1.2 mrg __FP_CLZ ((R), X##_f0); \ 129 1.1.1.2 mrg (R) += _FP_W_TYPE_SIZE; \ 130 1.1.1.2 mrg } \ 131 1.1.1.2 mrg } \ 132 1.1.1.2 mrg while (0) 133 1.1 mrg 134 1.1.1.2 mrg /* Predicates. */ 135 1.1.1.2 mrg #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0) 136 1.1 mrg #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) 137 1.1.1.2 mrg #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs) 138 1.1.1.2 mrg #define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs) 139 1.1.1.2 mrg #define _FP_FRAC_HIGHBIT_DW_2(fs, X) \ 140 1.1.1.2 mrg (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs) 141 1.1 mrg #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) 142 1.1 mrg #define _FP_FRAC_GT_2(X, Y) \ 143 1.1 mrg (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0)) 144 1.1 mrg #define _FP_FRAC_GE_2(X, Y) \ 145 1.1 mrg (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)) 146 1.1 mrg 147 1.1 mrg #define _FP_ZEROFRAC_2 0, 0 148 1.1 mrg #define _FP_MINFRAC_2 0, 1 149 1.1.1.2 mrg #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0) 150 1.1 mrg 151 1.1.1.2 mrg /* Internals. */ 152 1.1.1.2 mrg 153 1.1.1.2 mrg #define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1) 154 1.1.1.2 mrg 155 1.1.1.2 mrg #define __FP_CLZ_2(R, xh, xl) \ 156 1.1.1.2 mrg do \ 157 1.1.1.2 mrg { \ 158 1.1.1.2 mrg if (xh) \ 159 1.1.1.2 mrg __FP_CLZ ((R), xh); \ 160 1.1.1.2 mrg else \ 161 1.1.1.2 mrg { \ 162 1.1.1.2 mrg __FP_CLZ ((R), xl); \ 163 1.1.1.2 mrg (R) += _FP_W_TYPE_SIZE; \ 164 1.1.1.2 mrg } \ 165 1.1.1.2 mrg } \ 166 1.1.1.2 mrg while (0) 167 1.1 mrg 168 1.1 mrg #if 0 169 1.1 mrg 170 1.1.1.2 mrg # ifndef __FP_FRAC_ADDI_2 171 1.1.1.2 mrg # define __FP_FRAC_ADDI_2(xh, xl, i) \ 172 1.1 mrg (xh += ((xl += i) < i)) 173 1.1.1.2 mrg # endif 174 1.1.1.2 mrg # ifndef __FP_FRAC_ADD_2 175 1.1.1.2 mrg # define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \ 176 1.1 mrg (rh = xh + yh + ((rl = xl + yl) < xl)) 177 1.1.1.2 mrg # endif 178 1.1.1.2 mrg # ifndef __FP_FRAC_SUB_2 179 1.1.1.2 mrg # define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \ 180 1.1 mrg (rh = xh - yh - ((rl = xl - yl) > xl)) 181 1.1.1.2 mrg # endif 182 1.1.1.2 mrg # ifndef __FP_FRAC_DEC_2 183 1.1.1.2 mrg # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ 184 1.1.1.2 mrg do \ 185 1.1.1.2 mrg { \ 186 1.1.1.2 mrg UWtype __FP_FRAC_DEC_2_t = xl; \ 187 1.1.1.2 mrg xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \ 188 1.1.1.2 mrg } \ 189 1.1.1.2 mrg while (0) 190 1.1.1.2 mrg # endif 191 1.1 mrg 192 1.1 mrg #else 193 1.1 mrg 194 1.1.1.2 mrg # undef __FP_FRAC_ADDI_2 195 1.1.1.2 mrg # define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i) 196 1.1.1.2 mrg # undef __FP_FRAC_ADD_2 197 1.1.1.2 mrg # define __FP_FRAC_ADD_2 add_ssaaaa 198 1.1.1.2 mrg # undef __FP_FRAC_SUB_2 199 1.1.1.2 mrg # define __FP_FRAC_SUB_2 sub_ddmmss 200 1.1.1.2 mrg # undef __FP_FRAC_DEC_2 201 1.1.1.2 mrg # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ 202 1.1.1.2 mrg sub_ddmmss (xh, xl, xh, xl, yh, yl) 203 1.1 mrg 204 1.1 mrg #endif 205 1.1 mrg 206 1.1.1.2 mrg /* Unpack the raw bits of a native fp value. Do not classify or 207 1.1.1.2 mrg normalize the data. */ 208 1.1 mrg 209 1.1 mrg #define _FP_UNPACK_RAW_2(fs, X, val) \ 210 1.1.1.2 mrg do \ 211 1.1.1.2 mrg { \ 212 1.1.1.2 mrg union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \ 213 1.1.1.2 mrg _FP_UNPACK_RAW_2_flo.flt = (val); \ 214 1.1 mrg \ 215 1.1.1.2 mrg X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \ 216 1.1.1.2 mrg X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \ 217 1.1.1.2 mrg X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \ 218 1.1.1.2 mrg X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \ 219 1.1.1.2 mrg } \ 220 1.1.1.2 mrg while (0) 221 1.1 mrg 222 1.1 mrg #define _FP_UNPACK_RAW_2_P(fs, X, val) \ 223 1.1.1.2 mrg do \ 224 1.1.1.2 mrg { \ 225 1.1.1.2 mrg union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \ 226 1.1.1.2 mrg = (union _FP_UNION_##fs *) (val); \ 227 1.1 mrg \ 228 1.1.1.2 mrg X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \ 229 1.1.1.2 mrg X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \ 230 1.1.1.2 mrg X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \ 231 1.1.1.2 mrg X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \ 232 1.1.1.2 mrg } \ 233 1.1.1.2 mrg while (0) 234 1.1.1.2 mrg 235 1.1.1.2 mrg 236 1.1.1.2 mrg /* Repack the raw bits of a native fp value. */ 237 1.1.1.2 mrg 238 1.1.1.2 mrg #define _FP_PACK_RAW_2(fs, val, X) \ 239 1.1.1.2 mrg do \ 240 1.1.1.2 mrg { \ 241 1.1.1.2 mrg union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \ 242 1.1.1.2 mrg \ 243 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \ 244 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \ 245 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.exp = X##_e; \ 246 1.1.1.2 mrg _FP_PACK_RAW_2_flo.bits.sign = X##_s; \ 247 1.1.1.2 mrg \ 248 1.1.1.2 mrg (val) = _FP_PACK_RAW_2_flo.flt; \ 249 1.1.1.2 mrg } \ 250 1.1.1.2 mrg while (0) 251 1.1 mrg 252 1.1 mrg #define _FP_PACK_RAW_2_P(fs, val, X) \ 253 1.1.1.2 mrg do \ 254 1.1.1.2 mrg { \ 255 1.1.1.2 mrg union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \ 256 1.1.1.2 mrg = (union _FP_UNION_##fs *) (val); \ 257 1.1 mrg \ 258 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \ 259 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \ 260 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \ 261 1.1.1.2 mrg _FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \ 262 1.1.1.2 mrg } \ 263 1.1.1.2 mrg while (0) 264 1.1 mrg 265 1.1 mrg 266 1.1.1.2 mrg /* Multiplication algorithms: */ 267 1.1 mrg 268 1.1 mrg /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 269 1.1 mrg 270 1.1.1.2 mrg #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \ 271 1.1.1.2 mrg do \ 272 1.1.1.2 mrg { \ 273 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \ 274 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \ 275 1.1.1.2 mrg \ 276 1.1.1.2 mrg doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \ 277 1.1.1.2 mrg X##_f0, Y##_f0); \ 278 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \ 279 1.1.1.2 mrg X##_f0, Y##_f1); \ 280 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \ 281 1.1.1.2 mrg X##_f1, Y##_f0); \ 282 1.1.1.2 mrg doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 283 1.1.1.2 mrg X##_f1, Y##_f1); \ 284 1.1.1.2 mrg \ 285 1.1.1.2 mrg __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 286 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), 0, \ 287 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_b_f1, \ 288 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_b_f0, \ 289 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 290 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1)); \ 291 1.1.1.2 mrg __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 292 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), 0, \ 293 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_c_f1, \ 294 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_c_f0, \ 295 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 296 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1)); \ 297 1.1.1.2 mrg } \ 298 1.1.1.2 mrg while (0) 299 1.1.1.2 mrg 300 1.1 mrg #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \ 301 1.1.1.2 mrg do \ 302 1.1.1.2 mrg { \ 303 1.1.1.2 mrg _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \ 304 1.1.1.2 mrg \ 305 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \ 306 1.1.1.2 mrg X, Y, doit); \ 307 1.1.1.2 mrg \ 308 1.1.1.2 mrg /* Normalize since we know where the msb of the multiplicands \ 309 1.1.1.2 mrg were (bit B), we know that the msb of the of the product is \ 310 1.1.1.2 mrg at either 2B or 2B-1. */ \ 311 1.1.1.2 mrg _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \ 312 1.1.1.2 mrg 2*(wfracbits)); \ 313 1.1.1.2 mrg R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \ 314 1.1.1.2 mrg R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \ 315 1.1.1.2 mrg } \ 316 1.1.1.2 mrg while (0) 317 1.1 mrg 318 1.1 mrg /* Given a 1W * 1W => 2W primitive, do the extended multiplication. 319 1.1 mrg Do only 3 multiplications instead of four. This one is for machines 320 1.1 mrg where multiplication is much more expensive than subtraction. */ 321 1.1 mrg 322 1.1.1.2 mrg #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \ 323 1.1.1.2 mrg do \ 324 1.1.1.2 mrg { \ 325 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \ 326 1.1.1.2 mrg _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \ 327 1.1.1.2 mrg _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \ 328 1.1.1.2 mrg int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \ 329 1.1.1.2 mrg int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \ 330 1.1.1.2 mrg \ 331 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \ 332 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c1 \ 333 1.1.1.2 mrg = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \ 334 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \ 335 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c2 \ 336 1.1.1.2 mrg = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \ 337 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \ 338 1.1.1.2 mrg X##_f0, Y##_f0); \ 339 1.1.1.2 mrg doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \ 340 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \ 341 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \ 342 1.1.1.2 mrg doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \ 343 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \ 344 1.1.1.2 mrg \ 345 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \ 346 1.1.1.2 mrg &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \ 347 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \ 348 1.1.1.2 mrg &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \ 349 1.1.1.2 mrg __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 350 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), \ 351 1.1.1.2 mrg (_FP_MUL_MEAT_DW_2_wide_3mul_c1 \ 352 1.1.1.2 mrg & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \ 353 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_d, \ 354 1.1.1.2 mrg 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \ 355 1.1.1.2 mrg __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 356 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \ 357 1.1.1.2 mrg __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 358 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \ 359 1.1.1.2 mrg __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 360 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), \ 361 1.1.1.2 mrg 0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \ 362 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 0)); \ 363 1.1.1.2 mrg __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 364 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 1), 0, \ 365 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \ 366 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \ 367 1.1.1.2 mrg __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 368 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \ 369 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \ 370 1.1.1.2 mrg _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \ 371 1.1.1.2 mrg } \ 372 1.1.1.2 mrg while (0) 373 1.1.1.2 mrg 374 1.1 mrg #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \ 375 1.1.1.2 mrg do \ 376 1.1.1.2 mrg { \ 377 1.1.1.2 mrg _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \ 378 1.1.1.2 mrg \ 379 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \ 380 1.1.1.2 mrg _FP_MUL_MEAT_2_wide_3mul_z, \ 381 1.1.1.2 mrg X, Y, doit); \ 382 1.1.1.2 mrg \ 383 1.1.1.2 mrg /* Normalize since we know where the msb of the multiplicands \ 384 1.1.1.2 mrg were (bit B), we know that the msb of the of the product is \ 385 1.1.1.2 mrg at either 2B or 2B-1. */ \ 386 1.1.1.2 mrg _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \ 387 1.1.1.2 mrg (wfracbits)-1, 2*(wfracbits)); \ 388 1.1.1.2 mrg R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \ 389 1.1.1.2 mrg R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \ 390 1.1.1.2 mrg } \ 391 1.1.1.2 mrg while (0) 392 1.1.1.2 mrg 393 1.1.1.2 mrg #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \ 394 1.1.1.2 mrg do \ 395 1.1.1.2 mrg { \ 396 1.1.1.2 mrg _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \ 397 1.1.1.2 mrg _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \ 398 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \ 399 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \ 400 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \ 401 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \ 402 1.1.1.2 mrg \ 403 1.1.1.2 mrg mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \ 404 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp_y, 2); \ 405 1.1.1.2 mrg } \ 406 1.1.1.2 mrg while (0) 407 1.1 mrg 408 1.1 mrg #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \ 409 1.1.1.2 mrg do \ 410 1.1.1.2 mrg { \ 411 1.1.1.2 mrg _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \ 412 1.1.1.2 mrg \ 413 1.1.1.2 mrg _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \ 414 1.1.1.2 mrg \ 415 1.1.1.2 mrg /* Normalize since we know where the msb of the multiplicands \ 416 1.1.1.2 mrg were (bit B), we know that the msb of the of the product is \ 417 1.1.1.2 mrg at either 2B or 2B-1. */ \ 418 1.1.1.2 mrg _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \ 419 1.1.1.2 mrg 2*(wfracbits)); \ 420 1.1.1.2 mrg R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \ 421 1.1.1.2 mrg R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \ 422 1.1.1.2 mrg } \ 423 1.1.1.2 mrg while (0) 424 1.1 mrg 425 1.1 mrg /* Do at most 120x120=240 bits multiplication using double floating 426 1.1 mrg point multiplication. This is useful if floating point 427 1.1 mrg multiplication has much bigger throughput than integer multiply. 428 1.1 mrg It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits 429 1.1.1.2 mrg between 106 and 120 only. 430 1.1 mrg Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set. 431 1.1 mrg SETFETZ is a macro which will disable all FPU exceptions and set rounding 432 1.1 mrg towards zero, RESETFE should optionally reset it back. */ 433 1.1 mrg 434 1.1.1.2 mrg #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \ 435 1.1.1.2 mrg do \ 436 1.1.1.2 mrg { \ 437 1.1.1.2 mrg static const double _const[] = \ 438 1.1.1.2 mrg { \ 439 1.1.1.2 mrg /* 2^-24 */ 5.9604644775390625e-08, \ 440 1.1.1.2 mrg /* 2^-48 */ 3.5527136788005009e-15, \ 441 1.1.1.2 mrg /* 2^-72 */ 2.1175823681357508e-22, \ 442 1.1.1.2 mrg /* 2^-96 */ 1.2621774483536189e-29, \ 443 1.1.1.2 mrg /* 2^28 */ 2.68435456e+08, \ 444 1.1.1.2 mrg /* 2^4 */ 1.600000e+01, \ 445 1.1.1.2 mrg /* 2^-20 */ 9.5367431640625e-07, \ 446 1.1.1.2 mrg /* 2^-44 */ 5.6843418860808015e-14, \ 447 1.1.1.2 mrg /* 2^-68 */ 3.3881317890172014e-21, \ 448 1.1.1.2 mrg /* 2^-92 */ 2.0194839173657902e-28, \ 449 1.1.1.2 mrg /* 2^-116 */ 1.2037062152420224e-35 \ 450 1.1.1.2 mrg }; \ 451 1.1.1.2 mrg double _a240, _b240, _c240, _d240, _e240, _f240, \ 452 1.1.1.2 mrg _g240, _h240, _i240, _j240, _k240; \ 453 1.1.1.2 mrg union { double d; UDItype i; } _l240, _m240, _n240, _o240, \ 454 1.1.1.2 mrg _p240, _q240, _r240, _s240; \ 455 1.1.1.2 mrg UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \ 456 1.1.1.2 mrg \ 457 1.1.1.3 mrg _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \ 458 1.1.1.3 mrg "wfracbits out of range"); \ 459 1.1.1.2 mrg \ 460 1.1.1.2 mrg setfetz; \ 461 1.1.1.2 mrg \ 462 1.1.1.2 mrg _e240 = (double) (long) (X##_f0 & 0xffffff); \ 463 1.1.1.2 mrg _j240 = (double) (long) (Y##_f0 & 0xffffff); \ 464 1.1.1.2 mrg _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \ 465 1.1.1.2 mrg _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \ 466 1.1.1.2 mrg _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \ 467 1.1.1.2 mrg _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \ 468 1.1.1.2 mrg _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \ 469 1.1.1.2 mrg _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \ 470 1.1.1.2 mrg _a240 = (double) (long) (X##_f1 >> 32); \ 471 1.1.1.2 mrg _f240 = (double) (long) (Y##_f1 >> 32); \ 472 1.1.1.2 mrg _e240 *= _const[3]; \ 473 1.1.1.2 mrg _j240 *= _const[3]; \ 474 1.1.1.2 mrg _d240 *= _const[2]; \ 475 1.1.1.2 mrg _i240 *= _const[2]; \ 476 1.1.1.2 mrg _c240 *= _const[1]; \ 477 1.1.1.2 mrg _h240 *= _const[1]; \ 478 1.1.1.2 mrg _b240 *= _const[0]; \ 479 1.1.1.2 mrg _g240 *= _const[0]; \ 480 1.1.1.2 mrg _s240.d = _e240*_j240; \ 481 1.1.1.2 mrg _r240.d = _d240*_j240 + _e240*_i240; \ 482 1.1.1.2 mrg _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \ 483 1.1.1.2 mrg _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \ 484 1.1.1.2 mrg _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \ 485 1.1.1.2 mrg _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \ 486 1.1.1.2 mrg _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \ 487 1.1.1.2 mrg _l240.d = _a240*_g240 + _b240*_f240; \ 488 1.1.1.2 mrg _k240 = _a240*_f240; \ 489 1.1.1.2 mrg _r240.d += _s240.d; \ 490 1.1.1.2 mrg _q240.d += _r240.d; \ 491 1.1.1.2 mrg _p240.d += _q240.d; \ 492 1.1.1.2 mrg _o240.d += _p240.d; \ 493 1.1.1.2 mrg _n240.d += _o240.d; \ 494 1.1.1.2 mrg _m240.d += _n240.d; \ 495 1.1.1.2 mrg _l240.d += _m240.d; \ 496 1.1.1.2 mrg _k240 += _l240.d; \ 497 1.1.1.2 mrg _s240.d -= ((_const[10]+_s240.d)-_const[10]); \ 498 1.1.1.2 mrg _r240.d -= ((_const[9]+_r240.d)-_const[9]); \ 499 1.1.1.2 mrg _q240.d -= ((_const[8]+_q240.d)-_const[8]); \ 500 1.1.1.2 mrg _p240.d -= ((_const[7]+_p240.d)-_const[7]); \ 501 1.1.1.2 mrg _o240.d += _const[7]; \ 502 1.1.1.2 mrg _n240.d += _const[6]; \ 503 1.1.1.2 mrg _m240.d += _const[5]; \ 504 1.1.1.2 mrg _l240.d += _const[4]; \ 505 1.1.1.2 mrg if (_s240.d != 0.0) \ 506 1.1.1.2 mrg _y240 = 1; \ 507 1.1.1.2 mrg if (_r240.d != 0.0) \ 508 1.1.1.2 mrg _y240 = 1; \ 509 1.1.1.2 mrg if (_q240.d != 0.0) \ 510 1.1.1.2 mrg _y240 = 1; \ 511 1.1.1.2 mrg if (_p240.d != 0.0) \ 512 1.1.1.2 mrg _y240 = 1; \ 513 1.1.1.2 mrg _t240 = (DItype) _k240; \ 514 1.1.1.2 mrg _u240 = _l240.i; \ 515 1.1.1.2 mrg _v240 = _m240.i; \ 516 1.1.1.2 mrg _w240 = _n240.i; \ 517 1.1.1.2 mrg _x240 = _o240.i; \ 518 1.1.1.2 mrg R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \ 519 1.1.1.2 mrg | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \ 520 1.1.1.2 mrg R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \ 521 1.1.1.2 mrg | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \ 522 1.1.1.2 mrg | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \ 523 1.1.1.2 mrg | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \ 524 1.1.1.2 mrg | _y240); \ 525 1.1.1.2 mrg resetfe; \ 526 1.1.1.2 mrg } \ 527 1.1.1.2 mrg while (0) 528 1.1.1.2 mrg 529 1.1.1.2 mrg /* Division algorithms: */ 530 1.1 mrg 531 1.1 mrg #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \ 532 1.1.1.2 mrg do \ 533 1.1.1.2 mrg { \ 534 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \ 535 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \ 536 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \ 537 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \ 538 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \ 539 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \ 540 1.1.1.2 mrg _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \ 541 1.1.1.2 mrg if (_FP_FRAC_GE_2 (X, Y)) \ 542 1.1.1.2 mrg { \ 543 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \ 544 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f1 \ 545 1.1.1.2 mrg = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ 546 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f0 \ 547 1.1.1.2 mrg = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ 548 1.1.1.2 mrg } \ 549 1.1.1.2 mrg else \ 550 1.1.1.2 mrg { \ 551 1.1.1.2 mrg R##_e--; \ 552 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \ 553 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \ 554 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f0 = 0; \ 555 1.1.1.2 mrg } \ 556 1.1.1.2 mrg \ 557 1.1.1.2 mrg /* Normalize, i.e. make the most significant bit of the \ 558 1.1.1.2 mrg denominator set. */ \ 559 1.1.1.2 mrg _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \ 560 1.1.1.2 mrg \ 561 1.1.1.2 mrg udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \ 562 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \ 563 1.1.1.2 mrg Y##_f1); \ 564 1.1.1.2 mrg umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \ 565 1.1.1.2 mrg R##_f1, Y##_f0); \ 566 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \ 567 1.1.1.2 mrg if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \ 568 1.1.1.2 mrg { \ 569 1.1.1.2 mrg R##_f1--; \ 570 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ 571 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \ 572 1.1.1.2 mrg if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \ 573 1.1.1.2 mrg && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \ 574 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r)) \ 575 1.1.1.2 mrg { \ 576 1.1.1.2 mrg R##_f1--; \ 577 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ 578 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \ 579 1.1.1.2 mrg } \ 580 1.1.1.2 mrg } \ 581 1.1.1.2 mrg _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \ 582 1.1.1.2 mrg \ 583 1.1.1.2 mrg if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \ 584 1.1.1.2 mrg { \ 585 1.1.1.2 mrg /* This is a special case, not an optimization \ 586 1.1.1.2 mrg (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \ 587 1.1.1.2 mrg As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \ 588 1.1.1.2 mrg R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \ 589 1.1.1.2 mrg know what kind of bits it is (sticky, guard, round), \ 590 1.1.1.2 mrg we don't care. We also don't care what the reminder is, \ 591 1.1.1.2 mrg because the guard bit will be set anyway. -jj */ \ 592 1.1.1.2 mrg R##_f0 = -1; \ 593 1.1.1.2 mrg } \ 594 1.1.1.2 mrg else \ 595 1.1.1.2 mrg { \ 596 1.1.1.2 mrg udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \ 597 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f1, \ 598 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \ 599 1.1.1.2 mrg umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \ 600 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \ 601 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r_f0 = 0; \ 602 1.1.1.2 mrg if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \ 603 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r)) \ 604 1.1.1.2 mrg { \ 605 1.1.1.2 mrg R##_f0--; \ 606 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ 607 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \ 608 1.1.1.2 mrg if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \ 609 1.1.1.2 mrg && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \ 610 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r)) \ 611 1.1.1.2 mrg { \ 612 1.1.1.2 mrg R##_f0--; \ 613 1.1.1.2 mrg _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ 614 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_r); \ 615 1.1.1.2 mrg } \ 616 1.1.1.2 mrg } \ 617 1.1.1.2 mrg if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \ 618 1.1.1.2 mrg _FP_DIV_MEAT_2_udiv_m)) \ 619 1.1.1.2 mrg R##_f0 |= _FP_WORK_STICKY; \ 620 1.1.1.2 mrg } \ 621 1.1.1.2 mrg } \ 622 1.1.1.2 mrg while (0) 623 1.1.1.2 mrg 624 1.1.1.2 mrg 625 1.1.1.2 mrg /* Square root algorithms: 626 1.1.1.2 mrg We have just one right now, maybe Newton approximation 627 1.1.1.2 mrg should be added for those machines where division is fast. */ 628 1.1.1.2 mrg 629 1.1.1.2 mrg #define _FP_SQRT_MEAT_2(R, S, T, X, q) \ 630 1.1.1.2 mrg do \ 631 1.1.1.2 mrg { \ 632 1.1.1.2 mrg while (q) \ 633 1.1.1.2 mrg { \ 634 1.1.1.2 mrg T##_f1 = S##_f1 + (q); \ 635 1.1.1.2 mrg if (T##_f1 <= X##_f1) \ 636 1.1.1.2 mrg { \ 637 1.1.1.2 mrg S##_f1 = T##_f1 + (q); \ 638 1.1.1.2 mrg X##_f1 -= T##_f1; \ 639 1.1.1.2 mrg R##_f1 += (q); \ 640 1.1.1.2 mrg } \ 641 1.1.1.2 mrg _FP_FRAC_SLL_2 (X, 1); \ 642 1.1.1.2 mrg (q) >>= 1; \ 643 1.1.1.2 mrg } \ 644 1.1.1.2 mrg (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \ 645 1.1.1.2 mrg while ((q) != _FP_WORK_ROUND) \ 646 1.1.1.2 mrg { \ 647 1.1.1.2 mrg T##_f0 = S##_f0 + (q); \ 648 1.1.1.2 mrg T##_f1 = S##_f1; \ 649 1.1.1.2 mrg if (T##_f1 < X##_f1 \ 650 1.1.1.2 mrg || (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \ 651 1.1.1.2 mrg { \ 652 1.1.1.2 mrg S##_f0 = T##_f0 + (q); \ 653 1.1.1.2 mrg S##_f1 += (T##_f0 > S##_f0); \ 654 1.1.1.2 mrg _FP_FRAC_DEC_2 (X, T); \ 655 1.1.1.2 mrg R##_f0 += (q); \ 656 1.1.1.2 mrg } \ 657 1.1.1.2 mrg _FP_FRAC_SLL_2 (X, 1); \ 658 1.1.1.2 mrg (q) >>= 1; \ 659 1.1.1.2 mrg } \ 660 1.1.1.2 mrg if (X##_f0 | X##_f1) \ 661 1.1.1.2 mrg { \ 662 1.1.1.2 mrg if (S##_f1 < X##_f1 \ 663 1.1.1.2 mrg || (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \ 664 1.1.1.2 mrg R##_f0 |= _FP_WORK_ROUND; \ 665 1.1.1.2 mrg R##_f0 |= _FP_WORK_STICKY; \ 666 1.1.1.2 mrg } \ 667 1.1.1.2 mrg } \ 668 1.1.1.2 mrg while (0) 669 1.1.1.2 mrg 670 1.1.1.2 mrg 671 1.1.1.2 mrg /* Assembly/disassembly for converting to/from integral types. 672 1.1.1.2 mrg No shifting or overflow handled here. */ 673 1.1 mrg 674 1.1 mrg #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ 675 1.1.1.2 mrg (void) (((rsize) <= _FP_W_TYPE_SIZE) \ 676 1.1.1.2 mrg ? ({ (r) = X##_f0; }) \ 677 1.1.1.2 mrg : ({ \ 678 1.1.1.2 mrg (r) = X##_f1; \ 679 1.1.1.2 mrg (r) <<= _FP_W_TYPE_SIZE; \ 680 1.1.1.2 mrg (r) += X##_f0; \ 681 1.1.1.2 mrg })) 682 1.1.1.2 mrg 683 1.1.1.2 mrg #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ 684 1.1.1.2 mrg do \ 685 1.1.1.2 mrg { \ 686 1.1.1.2 mrg X##_f0 = (r); \ 687 1.1.1.2 mrg X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \ 688 1.1.1.2 mrg ? 0 \ 689 1.1.1.2 mrg : (r) >> _FP_W_TYPE_SIZE); \ 690 1.1.1.2 mrg } \ 691 1.1.1.2 mrg while (0) 692 1.1.1.2 mrg 693 1.1.1.2 mrg /* Convert FP values between word sizes. */ 694 1.1 mrg 695 1.1 mrg #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0) 696 1.1 mrg 697 1.1 mrg #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0)) 698 1.1 mrg 699 1.1.1.2 mrg #define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S) 700 1.1.1.3 mrg 701 1.1.1.3 mrg #endif /* !SOFT_FP_OP_2_H */ 702