1 1.1 mrg /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. 2 1.7 mrg Copyright (C) 1991-2022 Free Software Foundation, Inc. 3 1.1 mrg 4 1.1 mrg This file is part of the GNU C Library. 5 1.1 mrg 6 1.1 mrg The GNU C Library is free software; you can redistribute it and/or 7 1.1 mrg modify it under the terms of the GNU Lesser General Public 8 1.1 mrg License as published by the Free Software Foundation; either 9 1.1 mrg version 2.1 of the License, or (at your option) any later version. 10 1.1 mrg 11 1.1 mrg In addition to the permissions in the GNU Lesser General Public 12 1.1 mrg License, the Free Software Foundation gives you unlimited 13 1.1 mrg permission to link the compiled version of this file into 14 1.1 mrg combinations with other programs, and to distribute those 15 1.1 mrg combinations without any restriction coming from the use of this 16 1.1 mrg file. (The Lesser General Public License restrictions do apply in 17 1.1 mrg other respects; for example, they cover modification of the file, 18 1.1 mrg and distribution when not linked into a combine executable.) 19 1.1 mrg 20 1.1 mrg The GNU C Library is distributed in the hope that it will be useful, 21 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 22 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 1.1 mrg Lesser General Public License for more details. 24 1.1 mrg 25 1.1 mrg You should have received a copy of the GNU Lesser General Public 26 1.1 mrg License along with the GNU C Library; if not, see 27 1.1 mrg <http://www.gnu.org/licenses/>. */ 28 1.1 mrg 29 1.1 mrg /* You have to define the following before including this file: 30 1.1 mrg 31 1.1 mrg UWtype -- An unsigned type, default type for operations (typically a "word") 32 1.1 mrg UHWtype -- An unsigned type, at least half the size of UWtype. 33 1.1 mrg UDWtype -- An unsigned type, at least twice as large a UWtype 34 1.1 mrg W_TYPE_SIZE -- size in bits of UWtype 35 1.1 mrg 36 1.1 mrg UQItype -- Unsigned 8 bit type. 37 1.1 mrg SItype, USItype -- Signed and unsigned 32 bit types. 38 1.1 mrg DItype, UDItype -- Signed and unsigned 64 bit types. 39 1.1 mrg 40 1.1 mrg On a 32 bit machine UWtype should typically be USItype; 41 1.1 mrg on a 64 bit machine, UWtype should typically be UDItype. */ 42 1.1 mrg 43 1.1 mrg #define __BITS4 (W_TYPE_SIZE / 4) 44 1.1 mrg #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 45 1.1 mrg #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 46 1.1 mrg #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 47 1.1 mrg 48 1.1 mrg #ifndef W_TYPE_SIZE 49 1.1 mrg #define W_TYPE_SIZE 32 50 1.1 mrg #define UWtype USItype 51 1.1 mrg #define UHWtype USItype 52 1.1 mrg #define UDWtype UDItype 53 1.1 mrg #endif 54 1.1 mrg 55 1.1 mrg /* Used in glibc only. */ 56 1.1 mrg #ifndef attribute_hidden 57 1.1 mrg #define attribute_hidden 58 1.1 mrg #endif 59 1.1 mrg 60 1.1 mrg extern const UQItype __clz_tab[256] attribute_hidden; 61 1.1 mrg 62 1.1 mrg /* Define auxiliary asm macros. 63 1.1 mrg 64 1.1 mrg 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two 65 1.1 mrg UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype 66 1.1 mrg word product in HIGH_PROD and LOW_PROD. 67 1.1 mrg 68 1.1 mrg 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a 69 1.1 mrg UDWtype product. This is just a variant of umul_ppmm. 70 1.1 mrg 71 1.1 mrg 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 72 1.1 mrg denominator) divides a UDWtype, composed by the UWtype integers 73 1.1 mrg HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient 74 1.1 mrg in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less 75 1.1 mrg than DENOMINATOR for correct operation. If, in addition, the most 76 1.1 mrg significant bit of DENOMINATOR must be 1, then the pre-processor symbol 77 1.1 mrg UDIV_NEEDS_NORMALIZATION is defined to 1. 78 1.1 mrg 79 1.1 mrg 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 80 1.1 mrg denominator). Like udiv_qrnnd but the numbers are signed. The quotient 81 1.1 mrg is rounded towards 0. 82 1.1 mrg 83 1.1 mrg 5) count_leading_zeros(count, x) counts the number of zero-bits from the 84 1.1 mrg msb to the first nonzero bit in the UWtype X. This is the number of 85 1.1 mrg steps X needs to be shifted left to set the msb. Undefined for X == 0, 86 1.1 mrg unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 87 1.1 mrg 88 1.1 mrg 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts 89 1.1 mrg from the least significant end. 90 1.1 mrg 91 1.1 mrg 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 92 1.1 mrg high_addend_2, low_addend_2) adds two UWtype integers, composed by 93 1.1 mrg HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 94 1.1 mrg respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow 95 1.1 mrg (i.e. carry out) is not stored anywhere, and is lost. 96 1.1 mrg 97 1.1 mrg 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, 98 1.1 mrg high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, 99 1.1 mrg composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and 100 1.1 mrg LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE 101 1.1 mrg and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, 102 1.1 mrg and is lost. 103 1.1 mrg 104 1.1 mrg If any of these macros are left undefined for a particular CPU, 105 1.1 mrg C macros are used. */ 106 1.1 mrg 107 1.1 mrg /* The CPUs come in alphabetical order below. 108 1.1 mrg 109 1.1 mrg Please add support for more CPUs here, or improve the current support 110 1.1 mrg for the CPUs below! 111 1.1 mrg (E.g. WE32100, IBM360.) */ 112 1.1 mrg 113 1.1 mrg #if defined (__GNUC__) && !defined (NO_ASM) 114 1.1 mrg 115 1.1 mrg /* We sometimes need to clobber "cc" with gcc2, but that would not be 116 1.1 mrg understood by gcc1. Use cpp to avoid major code duplication. */ 117 1.1 mrg #if __GNUC__ < 2 118 1.1 mrg #define __CLOBBER_CC 119 1.1 mrg #define __AND_CLOBBER_CC 120 1.1 mrg #else /* __GNUC__ >= 2 */ 121 1.1 mrg #define __CLOBBER_CC : "cc" 122 1.1 mrg #define __AND_CLOBBER_CC , "cc" 123 1.1 mrg #endif /* __GNUC__ < 2 */ 124 1.1 mrg 125 1.1 mrg #if defined (__aarch64__) 126 1.1 mrg 127 1.1 mrg #if W_TYPE_SIZE == 32 128 1.1 mrg #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) 129 1.1 mrg #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) 130 1.1 mrg #define COUNT_LEADING_ZEROS_0 32 131 1.1 mrg #endif /* W_TYPE_SIZE == 32 */ 132 1.1 mrg 133 1.1 mrg #if W_TYPE_SIZE == 64 134 1.1 mrg #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X)) 135 1.1 mrg #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X)) 136 1.1 mrg #define COUNT_LEADING_ZEROS_0 64 137 1.1 mrg #endif /* W_TYPE_SIZE == 64 */ 138 1.1 mrg 139 1.1 mrg #endif /* __aarch64__ */ 140 1.1 mrg 141 1.1 mrg #if defined (__alpha) && W_TYPE_SIZE == 64 142 1.1 mrg /* There is a bug in g++ before version 5 that 143 1.1 mrg errors on __builtin_alpha_umulh. */ 144 1.1 mrg #if !defined(__cplusplus) || __GNUC__ >= 5 145 1.1 mrg #define umul_ppmm(ph, pl, m0, m1) \ 146 1.1 mrg do { \ 147 1.1 mrg UDItype __m0 = (m0), __m1 = (m1); \ 148 1.1 mrg (ph) = __builtin_alpha_umulh (__m0, __m1); \ 149 1.1 mrg (pl) = __m0 * __m1; \ 150 1.1 mrg } while (0) 151 1.1 mrg #define UMUL_TIME 46 152 1.1 mrg #endif /* !c++ */ 153 1.1 mrg #ifndef LONGLONG_STANDALONE 154 1.1 mrg #define udiv_qrnnd(q, r, n1, n0, d) \ 155 1.1 mrg do { UDItype __r; \ 156 1.1 mrg (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ 157 1.1 mrg (r) = __r; \ 158 1.1 mrg } while (0) 159 1.1 mrg extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype); 160 1.1 mrg #define UDIV_TIME 220 161 1.1 mrg #endif /* LONGLONG_STANDALONE */ 162 1.1 mrg #ifdef __alpha_cix__ 163 1.1 mrg #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) 164 1.1 mrg #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) 165 1.1 mrg #define COUNT_LEADING_ZEROS_0 64 166 1.1 mrg #else 167 1.1 mrg #define count_leading_zeros(COUNT,X) \ 168 1.1 mrg do { \ 169 1.1 mrg UDItype __xr = (X), __t, __a; \ 170 1.1 mrg __t = __builtin_alpha_cmpbge (0, __xr); \ 171 1.1 mrg __a = __clz_tab[__t ^ 0xff] - 1; \ 172 1.1 mrg __t = __builtin_alpha_extbl (__xr, __a); \ 173 1.1 mrg (COUNT) = 64 - (__clz_tab[__t] + __a*8); \ 174 1.1 mrg } while (0) 175 1.1 mrg #define count_trailing_zeros(COUNT,X) \ 176 1.1 mrg do { \ 177 1.1 mrg UDItype __xr = (X), __t, __a; \ 178 1.1 mrg __t = __builtin_alpha_cmpbge (0, __xr); \ 179 1.1 mrg __t = ~__t & -~__t; \ 180 1.1 mrg __a = ((__t & 0xCC) != 0) * 2; \ 181 1.1 mrg __a += ((__t & 0xF0) != 0) * 4; \ 182 1.1 mrg __a += ((__t & 0xAA) != 0); \ 183 1.1 mrg __t = __builtin_alpha_extbl (__xr, __a); \ 184 1.1 mrg __a <<= 3; \ 185 1.1 mrg __t &= -__t; \ 186 1.1 mrg __a += ((__t & 0xCC) != 0) * 2; \ 187 1.1 mrg __a += ((__t & 0xF0) != 0) * 4; \ 188 1.1 mrg __a += ((__t & 0xAA) != 0); \ 189 1.1 mrg (COUNT) = __a; \ 190 1.1 mrg } while (0) 191 1.1 mrg #endif /* __alpha_cix__ */ 192 1.1 mrg #endif /* __alpha */ 193 1.1 mrg 194 1.1 mrg #if defined (__arc__) && W_TYPE_SIZE == 32 195 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 196 1.1 mrg __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \ 197 1.1 mrg : "=r" ((USItype) (sh)), \ 198 1.1 mrg "=&r" ((USItype) (sl)) \ 199 1.1 mrg : "%r" ((USItype) (ah)), \ 200 1.3 mrg "rICal" ((USItype) (bh)), \ 201 1.1 mrg "%r" ((USItype) (al)), \ 202 1.4 mrg "rICal" ((USItype) (bl)) \ 203 1.4 mrg : "cc") 204 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 205 1.1 mrg __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \ 206 1.1 mrg : "=r" ((USItype) (sh)), \ 207 1.1 mrg "=&r" ((USItype) (sl)) \ 208 1.1 mrg : "r" ((USItype) (ah)), \ 209 1.3 mrg "rICal" ((USItype) (bh)), \ 210 1.1 mrg "r" ((USItype) (al)), \ 211 1.4 mrg "rICal" ((USItype) (bl)) \ 212 1.4 mrg : "cc") 213 1.1 mrg 214 1.1 mrg #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) 215 1.1 mrg #ifdef __ARC_NORM__ 216 1.1 mrg #define count_leading_zeros(count, x) \ 217 1.1 mrg do \ 218 1.1 mrg { \ 219 1.1 mrg SItype c_; \ 220 1.1 mrg \ 221 1.1 mrg __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\ 222 1.1 mrg (count) = c_ + 1; \ 223 1.1 mrg } \ 224 1.1 mrg while (0) 225 1.1 mrg #define COUNT_LEADING_ZEROS_0 32 226 1.3 mrg #endif /* __ARC_NORM__ */ 227 1.3 mrg #endif /* __arc__ */ 228 1.1 mrg 229 1.1 mrg #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \ 230 1.1 mrg && W_TYPE_SIZE == 32 231 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 232 1.1 mrg __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \ 233 1.1 mrg : "=r" ((USItype) (sh)), \ 234 1.1 mrg "=&r" ((USItype) (sl)) \ 235 1.1 mrg : "%r" ((USItype) (ah)), \ 236 1.1 mrg "rI" ((USItype) (bh)), \ 237 1.1 mrg "%r" ((USItype) (al)), \ 238 1.1 mrg "rI" ((USItype) (bl)) __CLOBBER_CC) 239 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 240 1.1 mrg __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \ 241 1.1 mrg : "=r" ((USItype) (sh)), \ 242 1.1 mrg "=&r" ((USItype) (sl)) \ 243 1.1 mrg : "r" ((USItype) (ah)), \ 244 1.1 mrg "rI" ((USItype) (bh)), \ 245 1.1 mrg "r" ((USItype) (al)), \ 246 1.1 mrg "rI" ((USItype) (bl)) __CLOBBER_CC) 247 1.1 mrg # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \ 248 1.1 mrg || defined(__ARM_ARCH_3__) 249 1.1 mrg # define umul_ppmm(xh, xl, a, b) \ 250 1.1 mrg do { \ 251 1.1 mrg register USItype __t0, __t1, __t2; \ 252 1.1 mrg __asm__ ("%@ Inlined umul_ppmm\n" \ 253 1.1 mrg " mov %2, %5, lsr #16\n" \ 254 1.1 mrg " mov %0, %6, lsr #16\n" \ 255 1.1 mrg " bic %3, %5, %2, lsl #16\n" \ 256 1.1 mrg " bic %4, %6, %0, lsl #16\n" \ 257 1.1 mrg " mul %1, %3, %4\n" \ 258 1.1 mrg " mul %4, %2, %4\n" \ 259 1.1 mrg " mul %3, %0, %3\n" \ 260 1.1 mrg " mul %0, %2, %0\n" \ 261 1.1 mrg " adds %3, %4, %3\n" \ 262 1.1 mrg " addcs %0, %0, #65536\n" \ 263 1.1 mrg " adds %1, %1, %3, lsl #16\n" \ 264 1.1 mrg " adc %0, %0, %3, lsr #16" \ 265 1.1 mrg : "=&r" ((USItype) (xh)), \ 266 1.1 mrg "=r" ((USItype) (xl)), \ 267 1.1 mrg "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ 268 1.1 mrg : "r" ((USItype) (a)), \ 269 1.1 mrg "r" ((USItype) (b)) __CLOBBER_CC ); \ 270 1.1 mrg } while (0) 271 1.1 mrg # define UMUL_TIME 20 272 1.1 mrg # else 273 1.1 mrg # define umul_ppmm(xh, xl, a, b) \ 274 1.1 mrg do { \ 275 1.1 mrg /* Generate umull, under compiler control. */ \ 276 1.1 mrg register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \ 277 1.1 mrg (xl) = (USItype)__t0; \ 278 1.1 mrg (xh) = (USItype)(__t0 >> 32); \ 279 1.1 mrg } while (0) 280 1.1 mrg # define UMUL_TIME 3 281 1.1 mrg # endif 282 1.1 mrg # define UDIV_TIME 100 283 1.1 mrg #endif /* __arm__ */ 284 1.1 mrg 285 1.1 mrg #if defined(__arm__) 286 1.1 mrg /* Let gcc decide how best to implement count_leading_zeros. */ 287 1.1 mrg #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 288 1.1 mrg #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) 289 1.1 mrg #define COUNT_LEADING_ZEROS_0 32 290 1.1 mrg #endif 291 1.1 mrg 292 1.1 mrg #if defined (__AVR__) 293 1.1 mrg 294 1.1 mrg #if W_TYPE_SIZE == 16 295 1.1 mrg #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 296 1.1 mrg #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) 297 1.1 mrg #define COUNT_LEADING_ZEROS_0 16 298 1.1 mrg #endif /* W_TYPE_SIZE == 16 */ 299 1.1 mrg 300 1.1 mrg #if W_TYPE_SIZE == 32 301 1.1 mrg #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) 302 1.1 mrg #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) 303 1.1 mrg #define COUNT_LEADING_ZEROS_0 32 304 1.1 mrg #endif /* W_TYPE_SIZE == 32 */ 305 1.1 mrg 306 1.1 mrg #if W_TYPE_SIZE == 64 307 1.1 mrg #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X)) 308 1.1 mrg #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X)) 309 1.1 mrg #define COUNT_LEADING_ZEROS_0 64 310 1.1 mrg #endif /* W_TYPE_SIZE == 64 */ 311 1.1 mrg 312 1.1 mrg #endif /* defined (__AVR__) */ 313 1.1 mrg 314 1.1 mrg #if defined (__CRIS__) 315 1.1 mrg 316 1.1 mrg #if __CRIS_arch_version >= 3 317 1.1 mrg #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) 318 1.1 mrg #define COUNT_LEADING_ZEROS_0 32 319 1.1 mrg #endif /* __CRIS_arch_version >= 3 */ 320 1.1 mrg 321 1.1 mrg #if __CRIS_arch_version >= 8 322 1.1 mrg #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) 323 1.1 mrg #endif /* __CRIS_arch_version >= 8 */ 324 1.1 mrg 325 1.1 mrg #if __CRIS_arch_version >= 10 326 1.1 mrg #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v)) 327 1.1 mrg #else 328 1.1 mrg #define __umulsidi3 __umulsidi3 329 1.1 mrg extern UDItype __umulsidi3 (USItype, USItype); 330 1.1 mrg #endif /* __CRIS_arch_version >= 10 */ 331 1.1 mrg 332 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 333 1.1 mrg do { \ 334 1.1 mrg UDItype __x = __umulsidi3 (u, v); \ 335 1.1 mrg (w0) = (USItype) (__x); \ 336 1.1 mrg (w1) = (USItype) (__x >> 32); \ 337 1.1 mrg } while (0) 338 1.1 mrg 339 1.1 mrg /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for 340 1.1 mrg DFmode ("double" intrinsics, avoiding two of the three insns handling 341 1.1 mrg carry), but defining them as open-code C composing and doing the 342 1.1 mrg operation in DImode (UDImode) shows that the DImode needs work: 343 1.1 mrg register pressure from requiring neighboring registers and the 344 1.1 mrg traffic to and from them come to dominate, in the 4.7 series. */ 345 1.1 mrg 346 1.1 mrg #endif /* defined (__CRIS__) */ 347 1.1 mrg 348 1.1 mrg #if defined (__hppa) && W_TYPE_SIZE == 32 349 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 350 1.1 mrg __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \ 351 1.1 mrg : "=r" ((USItype) (sh)), \ 352 1.1 mrg "=&r" ((USItype) (sl)) \ 353 1.1 mrg : "%rM" ((USItype) (ah)), \ 354 1.1 mrg "rM" ((USItype) (bh)), \ 355 1.1 mrg "%rM" ((USItype) (al)), \ 356 1.1 mrg "rM" ((USItype) (bl))) 357 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 358 1.1 mrg __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \ 359 1.1 mrg : "=r" ((USItype) (sh)), \ 360 1.1 mrg "=&r" ((USItype) (sl)) \ 361 1.1 mrg : "rM" ((USItype) (ah)), \ 362 1.1 mrg "rM" ((USItype) (bh)), \ 363 1.1 mrg "rM" ((USItype) (al)), \ 364 1.1 mrg "rM" ((USItype) (bl))) 365 1.1 mrg #if defined (_PA_RISC1_1) 366 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 367 1.1 mrg do { \ 368 1.1 mrg union \ 369 1.1 mrg { \ 370 1.1 mrg UDItype __f; \ 371 1.1 mrg struct {USItype __w1, __w0;} __w1w0; \ 372 1.1 mrg } __t; \ 373 1.1 mrg __asm__ ("xmpyu %1,%2,%0" \ 374 1.1 mrg : "=x" (__t.__f) \ 375 1.1 mrg : "x" ((USItype) (u)), \ 376 1.1 mrg "x" ((USItype) (v))); \ 377 1.1 mrg (w1) = __t.__w1w0.__w1; \ 378 1.1 mrg (w0) = __t.__w1w0.__w0; \ 379 1.1 mrg } while (0) 380 1.1 mrg #define UMUL_TIME 8 381 1.1 mrg #else 382 1.1 mrg #define UMUL_TIME 30 383 1.1 mrg #endif 384 1.1 mrg #define UDIV_TIME 40 385 1.1 mrg #define count_leading_zeros(count, x) \ 386 1.1 mrg do { \ 387 1.1 mrg USItype __tmp; \ 388 1.1 mrg __asm__ ( \ 389 1.1 mrg "ldi 1,%0\n" \ 390 1.1 mrg " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ 391 1.1 mrg " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\ 392 1.1 mrg " ldo 16(%0),%0 ; Yes. Perform add.\n" \ 393 1.1 mrg " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ 394 1.1 mrg " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\ 395 1.1 mrg " ldo 8(%0),%0 ; Yes. Perform add.\n" \ 396 1.1 mrg " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ 397 1.1 mrg " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\ 398 1.1 mrg " ldo 4(%0),%0 ; Yes. Perform add.\n" \ 399 1.1 mrg " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ 400 1.1 mrg " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\ 401 1.1 mrg " ldo 2(%0),%0 ; Yes. Perform add.\n" \ 402 1.1 mrg " extru %1,30,1,%1 ; Extract bit 1.\n" \ 403 1.1 mrg " sub %0,%1,%0 ; Subtract it.\n" \ 404 1.1 mrg : "=r" (count), "=r" (__tmp) : "1" (x)); \ 405 1.1 mrg } while (0) 406 1.1 mrg #endif 407 1.1 mrg 408 1.1 mrg #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 409 1.1 mrg #if !defined (__zarch__) 410 1.1 mrg #define smul_ppmm(xh, xl, m0, m1) \ 411 1.1 mrg do { \ 412 1.1 mrg union {DItype __ll; \ 413 1.1 mrg struct {USItype __h, __l;} __i; \ 414 1.1 mrg } __x; \ 415 1.1 mrg __asm__ ("lr %N0,%1\n\tmr %0,%2" \ 416 1.1 mrg : "=&r" (__x.__ll) \ 417 1.1 mrg : "r" (m0), "r" (m1)); \ 418 1.1 mrg (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ 419 1.1 mrg } while (0) 420 1.1 mrg #define sdiv_qrnnd(q, r, n1, n0, d) \ 421 1.1 mrg do { \ 422 1.1 mrg union {DItype __ll; \ 423 1.1 mrg struct {USItype __h, __l;} __i; \ 424 1.1 mrg } __x; \ 425 1.1 mrg __x.__i.__h = n1; __x.__i.__l = n0; \ 426 1.1 mrg __asm__ ("dr %0,%2" \ 427 1.1 mrg : "=r" (__x.__ll) \ 428 1.1 mrg : "0" (__x.__ll), "r" (d)); \ 429 1.1 mrg (q) = __x.__i.__l; (r) = __x.__i.__h; \ 430 1.1 mrg } while (0) 431 1.1 mrg #else 432 1.1 mrg #define smul_ppmm(xh, xl, m0, m1) \ 433 1.1 mrg do { \ 434 1.1 mrg register SItype __r0 __asm__ ("0"); \ 435 1.1 mrg register SItype __r1 __asm__ ("1") = (m0); \ 436 1.1 mrg \ 437 1.1 mrg __asm__ ("mr\t%%r0,%3" \ 438 1.1 mrg : "=r" (__r0), "=r" (__r1) \ 439 1.1 mrg : "r" (__r1), "r" (m1)); \ 440 1.1 mrg (xh) = __r0; (xl) = __r1; \ 441 1.1 mrg } while (0) 442 1.1 mrg 443 1.1 mrg #define sdiv_qrnnd(q, r, n1, n0, d) \ 444 1.1 mrg do { \ 445 1.1 mrg register SItype __r0 __asm__ ("0") = (n1); \ 446 1.1 mrg register SItype __r1 __asm__ ("1") = (n0); \ 447 1.1 mrg \ 448 1.1 mrg __asm__ ("dr\t%%r0,%4" \ 449 1.1 mrg : "=r" (__r0), "=r" (__r1) \ 450 1.1 mrg : "r" (__r0), "r" (__r1), "r" (d)); \ 451 1.1 mrg (q) = __r1; (r) = __r0; \ 452 1.1 mrg } while (0) 453 1.1 mrg #endif /* __zarch__ */ 454 1.1 mrg #endif 455 1.1 mrg 456 1.1 mrg #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 457 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 458 1.1 mrg __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \ 459 1.1 mrg : "=r" ((USItype) (sh)), \ 460 1.1 mrg "=&r" ((USItype) (sl)) \ 461 1.1 mrg : "%0" ((USItype) (ah)), \ 462 1.1 mrg "g" ((USItype) (bh)), \ 463 1.1 mrg "%1" ((USItype) (al)), \ 464 1.1 mrg "g" ((USItype) (bl))) 465 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 466 1.1 mrg __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \ 467 1.1 mrg : "=r" ((USItype) (sh)), \ 468 1.1 mrg "=&r" ((USItype) (sl)) \ 469 1.1 mrg : "0" ((USItype) (ah)), \ 470 1.1 mrg "g" ((USItype) (bh)), \ 471 1.1 mrg "1" ((USItype) (al)), \ 472 1.1 mrg "g" ((USItype) (bl))) 473 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 474 1.1 mrg __asm__ ("mul{l} %3" \ 475 1.1 mrg : "=a" ((USItype) (w0)), \ 476 1.1 mrg "=d" ((USItype) (w1)) \ 477 1.1 mrg : "%0" ((USItype) (u)), \ 478 1.1 mrg "rm" ((USItype) (v))) 479 1.1 mrg #define udiv_qrnnd(q, r, n1, n0, dv) \ 480 1.1 mrg __asm__ ("div{l} %4" \ 481 1.1 mrg : "=a" ((USItype) (q)), \ 482 1.1 mrg "=d" ((USItype) (r)) \ 483 1.1 mrg : "0" ((USItype) (n0)), \ 484 1.1 mrg "1" ((USItype) (n1)), \ 485 1.1 mrg "rm" ((USItype) (dv))) 486 1.1 mrg #define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) 487 1.1 mrg #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) 488 1.1 mrg #define UMUL_TIME 40 489 1.1 mrg #define UDIV_TIME 40 490 1.1 mrg #endif /* 80x86 */ 491 1.1 mrg 492 1.1 mrg #if defined (__x86_64__) && W_TYPE_SIZE == 64 493 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 494 1.1 mrg __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \ 495 1.1 mrg : "=r" ((UDItype) (sh)), \ 496 1.1 mrg "=&r" ((UDItype) (sl)) \ 497 1.1 mrg : "%0" ((UDItype) (ah)), \ 498 1.1 mrg "rme" ((UDItype) (bh)), \ 499 1.1 mrg "%1" ((UDItype) (al)), \ 500 1.1 mrg "rme" ((UDItype) (bl))) 501 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 502 1.1 mrg __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \ 503 1.1 mrg : "=r" ((UDItype) (sh)), \ 504 1.1 mrg "=&r" ((UDItype) (sl)) \ 505 1.1 mrg : "0" ((UDItype) (ah)), \ 506 1.1 mrg "rme" ((UDItype) (bh)), \ 507 1.1 mrg "1" ((UDItype) (al)), \ 508 1.1 mrg "rme" ((UDItype) (bl))) 509 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 510 1.1 mrg __asm__ ("mul{q} %3" \ 511 1.1 mrg : "=a" ((UDItype) (w0)), \ 512 1.1 mrg "=d" ((UDItype) (w1)) \ 513 1.1 mrg : "%0" ((UDItype) (u)), \ 514 1.1 mrg "rm" ((UDItype) (v))) 515 1.1 mrg #define udiv_qrnnd(q, r, n1, n0, dv) \ 516 1.1 mrg __asm__ ("div{q} %4" \ 517 1.1 mrg : "=a" ((UDItype) (q)), \ 518 1.1 mrg "=d" ((UDItype) (r)) \ 519 1.1 mrg : "0" ((UDItype) (n0)), \ 520 1.1 mrg "1" ((UDItype) (n1)), \ 521 1.1 mrg "rm" ((UDItype) (dv))) 522 1.1 mrg #define count_leading_zeros(count, x) ((count) = __builtin_clzll (x)) 523 1.1 mrg #define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x)) 524 1.1 mrg #define UMUL_TIME 40 525 1.1 mrg #define UDIV_TIME 40 526 1.1 mrg #endif /* x86_64 */ 527 1.1 mrg 528 1.1 mrg #if defined (__i960__) && W_TYPE_SIZE == 32 529 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 530 1.1 mrg ({union {UDItype __ll; \ 531 1.1 mrg struct {USItype __l, __h;} __i; \ 532 1.1 mrg } __xx; \ 533 1.1 mrg __asm__ ("emul %2,%1,%0" \ 534 1.1 mrg : "=d" (__xx.__ll) \ 535 1.1 mrg : "%dI" ((USItype) (u)), \ 536 1.1 mrg "dI" ((USItype) (v))); \ 537 1.1 mrg (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) 538 1.1 mrg #define __umulsidi3(u, v) \ 539 1.1 mrg ({UDItype __w; \ 540 1.1 mrg __asm__ ("emul %2,%1,%0" \ 541 1.1 mrg : "=d" (__w) \ 542 1.1 mrg : "%dI" ((USItype) (u)), \ 543 1.1 mrg "dI" ((USItype) (v))); \ 544 1.1 mrg __w; }) 545 1.1 mrg #endif /* __i960__ */ 546 1.1 mrg 547 1.1 mrg #if defined (__ia64) && W_TYPE_SIZE == 64 548 1.1 mrg /* This form encourages gcc (pre-release 3.4 at least) to emit predicated 549 1.1 mrg "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic 550 1.1 mrg code using "al<bl" arithmetically comes out making an actual 0 or 1 in a 551 1.1 mrg register, which takes an extra cycle. */ 552 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 553 1.1 mrg do { \ 554 1.1 mrg UWtype __x; \ 555 1.1 mrg __x = (al) - (bl); \ 556 1.1 mrg if ((al) < (bl)) \ 557 1.1 mrg (sh) = (ah) - (bh) - 1; \ 558 1.1 mrg else \ 559 1.1 mrg (sh) = (ah) - (bh); \ 560 1.1 mrg (sl) = __x; \ 561 1.1 mrg } while (0) 562 1.1 mrg 563 1.1 mrg /* Do both product parts in assembly, since that gives better code with 564 1.1 mrg all gcc versions. Some callers will just use the upper part, and in 565 1.1 mrg that situation we waste an instruction, but not any cycles. */ 566 1.1 mrg #define umul_ppmm(ph, pl, m0, m1) \ 567 1.1 mrg __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \ 568 1.1 mrg : "=&f" (ph), "=f" (pl) \ 569 1.1 mrg : "f" (m0), "f" (m1)) 570 1.1 mrg #define count_leading_zeros(count, x) \ 571 1.1 mrg do { \ 572 1.1 mrg UWtype _x = (x), _y, _a, _c; \ 573 1.1 mrg __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \ 574 1.1 mrg __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \ 575 1.1 mrg _c = (_a - 1) << 3; \ 576 1.1 mrg _x >>= _c; \ 577 1.1 mrg if (_x >= 1 << 4) \ 578 1.1 mrg _x >>= 4, _c += 4; \ 579 1.1 mrg if (_x >= 1 << 2) \ 580 1.1 mrg _x >>= 2, _c += 2; \ 581 1.1 mrg _c += _x >> 1; \ 582 1.1 mrg (count) = W_TYPE_SIZE - 1 - _c; \ 583 1.1 mrg } while (0) 584 1.1 mrg /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1 585 1.1 mrg based, and we don't need a special case for x==0 here */ 586 1.1 mrg #define count_trailing_zeros(count, x) \ 587 1.1 mrg do { \ 588 1.1 mrg UWtype __ctz_x = (x); \ 589 1.1 mrg __asm__ ("popcnt %0 = %1" \ 590 1.1 mrg : "=r" (count) \ 591 1.1 mrg : "r" ((__ctz_x-1) & ~__ctz_x)); \ 592 1.1 mrg } while (0) 593 1.1 mrg #define UMUL_TIME 14 594 1.1 mrg #endif 595 1.1 mrg 596 1.1 mrg #if defined (__M32R__) && W_TYPE_SIZE == 32 597 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 598 1.1 mrg /* The cmp clears the condition bit. */ \ 599 1.1 mrg __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \ 600 1.1 mrg : "=r" ((USItype) (sh)), \ 601 1.1 mrg "=&r" ((USItype) (sl)) \ 602 1.1 mrg : "0" ((USItype) (ah)), \ 603 1.1 mrg "r" ((USItype) (bh)), \ 604 1.1 mrg "1" ((USItype) (al)), \ 605 1.1 mrg "r" ((USItype) (bl)) \ 606 1.1 mrg : "cbit") 607 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 608 1.1 mrg /* The cmp clears the condition bit. */ \ 609 1.1 mrg __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \ 610 1.1 mrg : "=r" ((USItype) (sh)), \ 611 1.1 mrg "=&r" ((USItype) (sl)) \ 612 1.1 mrg : "0" ((USItype) (ah)), \ 613 1.1 mrg "r" ((USItype) (bh)), \ 614 1.1 mrg "1" ((USItype) (al)), \ 615 1.1 mrg "r" ((USItype) (bl)) \ 616 1.1 mrg : "cbit") 617 1.1 mrg #endif /* __M32R__ */ 618 1.1 mrg 619 1.1 mrg #if defined (__mc68000__) && W_TYPE_SIZE == 32 620 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 621 1.1 mrg __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ 622 1.1 mrg : "=d" ((USItype) (sh)), \ 623 1.1 mrg "=&d" ((USItype) (sl)) \ 624 1.1 mrg : "%0" ((USItype) (ah)), \ 625 1.1 mrg "d" ((USItype) (bh)), \ 626 1.1 mrg "%1" ((USItype) (al)), \ 627 1.1 mrg "g" ((USItype) (bl))) 628 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 629 1.1 mrg __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ 630 1.1 mrg : "=d" ((USItype) (sh)), \ 631 1.1 mrg "=&d" ((USItype) (sl)) \ 632 1.1 mrg : "0" ((USItype) (ah)), \ 633 1.1 mrg "d" ((USItype) (bh)), \ 634 1.1 mrg "1" ((USItype) (al)), \ 635 1.1 mrg "g" ((USItype) (bl))) 636 1.1 mrg 637 1.1 mrg /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */ 638 1.1 mrg #if (defined (__mc68020__) && !defined (__mc68060__)) 639 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 640 1.1 mrg __asm__ ("mulu%.l %3,%1:%0" \ 641 1.1 mrg : "=d" ((USItype) (w0)), \ 642 1.1 mrg "=d" ((USItype) (w1)) \ 643 1.1 mrg : "%0" ((USItype) (u)), \ 644 1.1 mrg "dmi" ((USItype) (v))) 645 1.1 mrg #define UMUL_TIME 45 646 1.1 mrg #define udiv_qrnnd(q, r, n1, n0, d) \ 647 1.1 mrg __asm__ ("divu%.l %4,%1:%0" \ 648 1.1 mrg : "=d" ((USItype) (q)), \ 649 1.1 mrg "=d" ((USItype) (r)) \ 650 1.1 mrg : "0" ((USItype) (n0)), \ 651 1.1 mrg "1" ((USItype) (n1)), \ 652 1.1 mrg "dmi" ((USItype) (d))) 653 1.1 mrg #define UDIV_TIME 90 654 1.1 mrg #define sdiv_qrnnd(q, r, n1, n0, d) \ 655 1.1 mrg __asm__ ("divs%.l %4,%1:%0" \ 656 1.1 mrg : "=d" ((USItype) (q)), \ 657 1.1 mrg "=d" ((USItype) (r)) \ 658 1.1 mrg : "0" ((USItype) (n0)), \ 659 1.1 mrg "1" ((USItype) (n1)), \ 660 1.1 mrg "dmi" ((USItype) (d))) 661 1.1 mrg 662 1.1 mrg #elif defined (__mcoldfire__) /* not mc68020 */ 663 1.1 mrg 664 1.1 mrg #define umul_ppmm(xh, xl, a, b) \ 665 1.1 mrg __asm__ ("| Inlined umul_ppmm\n" \ 666 1.1 mrg " move%.l %2,%/d0\n" \ 667 1.1 mrg " move%.l %3,%/d1\n" \ 668 1.1 mrg " move%.l %/d0,%/d2\n" \ 669 1.1 mrg " swap %/d0\n" \ 670 1.1 mrg " move%.l %/d1,%/d3\n" \ 671 1.1 mrg " swap %/d1\n" \ 672 1.1 mrg " move%.w %/d2,%/d4\n" \ 673 1.1 mrg " mulu %/d3,%/d4\n" \ 674 1.1 mrg " mulu %/d1,%/d2\n" \ 675 1.1 mrg " mulu %/d0,%/d3\n" \ 676 1.1 mrg " mulu %/d0,%/d1\n" \ 677 1.1 mrg " move%.l %/d4,%/d0\n" \ 678 1.1 mrg " clr%.w %/d0\n" \ 679 1.1 mrg " swap %/d0\n" \ 680 1.1 mrg " add%.l %/d0,%/d2\n" \ 681 1.1 mrg " add%.l %/d3,%/d2\n" \ 682 1.1 mrg " jcc 1f\n" \ 683 1.1 mrg " add%.l %#65536,%/d1\n" \ 684 1.1 mrg "1: swap %/d2\n" \ 685 1.1 mrg " moveq %#0,%/d0\n" \ 686 1.1 mrg " move%.w %/d2,%/d0\n" \ 687 1.1 mrg " move%.w %/d4,%/d2\n" \ 688 1.1 mrg " move%.l %/d2,%1\n" \ 689 1.1 mrg " add%.l %/d1,%/d0\n" \ 690 1.1 mrg " move%.l %/d0,%0" \ 691 1.1 mrg : "=g" ((USItype) (xh)), \ 692 1.1 mrg "=g" ((USItype) (xl)) \ 693 1.1 mrg : "g" ((USItype) (a)), \ 694 1.1 mrg "g" ((USItype) (b)) \ 695 1.1 mrg : "d0", "d1", "d2", "d3", "d4") 696 1.1 mrg #define UMUL_TIME 100 697 1.1 mrg #define UDIV_TIME 400 698 1.1 mrg #else /* not ColdFire */ 699 1.1 mrg /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */ 700 1.1 mrg #define umul_ppmm(xh, xl, a, b) \ 701 1.1 mrg __asm__ ("| Inlined umul_ppmm\n" \ 702 1.1 mrg " move%.l %2,%/d0\n" \ 703 1.1 mrg " move%.l %3,%/d1\n" \ 704 1.1 mrg " move%.l %/d0,%/d2\n" \ 705 1.1 mrg " swap %/d0\n" \ 706 1.1 mrg " move%.l %/d1,%/d3\n" \ 707 1.1 mrg " swap %/d1\n" \ 708 1.1 mrg " move%.w %/d2,%/d4\n" \ 709 1.1 mrg " mulu %/d3,%/d4\n" \ 710 1.1 mrg " mulu %/d1,%/d2\n" \ 711 1.1 mrg " mulu %/d0,%/d3\n" \ 712 1.1 mrg " mulu %/d0,%/d1\n" \ 713 1.1 mrg " move%.l %/d4,%/d0\n" \ 714 1.1 mrg " eor%.w %/d0,%/d0\n" \ 715 1.1 mrg " swap %/d0\n" \ 716 1.1 mrg " add%.l %/d0,%/d2\n" \ 717 1.1 mrg " add%.l %/d3,%/d2\n" \ 718 1.1 mrg " jcc 1f\n" \ 719 1.1 mrg " add%.l %#65536,%/d1\n" \ 720 1.1 mrg "1: swap %/d2\n" \ 721 1.1 mrg " moveq %#0,%/d0\n" \ 722 1.1 mrg " move%.w %/d2,%/d0\n" \ 723 1.1 mrg " move%.w %/d4,%/d2\n" \ 724 1.1 mrg " move%.l %/d2,%1\n" \ 725 1.1 mrg " add%.l %/d1,%/d0\n" \ 726 1.1 mrg " move%.l %/d0,%0" \ 727 1.1 mrg : "=g" ((USItype) (xh)), \ 728 1.1 mrg "=g" ((USItype) (xl)) \ 729 1.1 mrg : "g" ((USItype) (a)), \ 730 1.1 mrg "g" ((USItype) (b)) \ 731 1.1 mrg : "d0", "d1", "d2", "d3", "d4") 732 1.1 mrg #define UMUL_TIME 100 733 1.1 mrg #define UDIV_TIME 400 734 1.1 mrg 735 1.1 mrg #endif /* not mc68020 */ 736 1.1 mrg 737 1.1 mrg /* The '020, '030, '040 and '060 have bitfield insns. 738 1.1 mrg cpu32 disguises as a 68020, but lacks them. */ 739 1.1 mrg #if defined (__mc68020__) && !defined (__mcpu32__) 740 1.1 mrg #define count_leading_zeros(count, x) \ 741 1.1 mrg __asm__ ("bfffo %1{%b2:%b2},%0" \ 742 1.1 mrg : "=d" ((USItype) (count)) \ 743 1.1 mrg : "od" ((USItype) (x)), "n" (0)) 744 1.1 mrg /* Some ColdFire architectures have a ff1 instruction supported via 745 1.1 mrg __builtin_clz. */ 746 1.1 mrg #elif defined (__mcfisaaplus__) || defined (__mcfisac__) 747 1.1 mrg #define count_leading_zeros(count,x) ((count) = __builtin_clz (x)) 748 1.1 mrg #define COUNT_LEADING_ZEROS_0 32 749 1.1 mrg #endif 750 1.1 mrg #endif /* mc68000 */ 751 1.1 mrg 752 1.1 mrg #if defined (__m88000__) && W_TYPE_SIZE == 32 753 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 754 1.1 mrg __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ 755 1.1 mrg : "=r" ((USItype) (sh)), \ 756 1.1 mrg "=&r" ((USItype) (sl)) \ 757 1.1 mrg : "%rJ" ((USItype) (ah)), \ 758 1.1 mrg "rJ" ((USItype) (bh)), \ 759 1.1 mrg "%rJ" ((USItype) (al)), \ 760 1.1 mrg "rJ" ((USItype) (bl))) 761 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 762 1.1 mrg __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ 763 1.1 mrg : "=r" ((USItype) (sh)), \ 764 1.1 mrg "=&r" ((USItype) (sl)) \ 765 1.1 mrg : "rJ" ((USItype) (ah)), \ 766 1.1 mrg "rJ" ((USItype) (bh)), \ 767 1.1 mrg "rJ" ((USItype) (al)), \ 768 1.1 mrg "rJ" ((USItype) (bl))) 769 1.1 mrg #define count_leading_zeros(count, x) \ 770 1.1 mrg do { \ 771 1.1 mrg USItype __cbtmp; \ 772 1.1 mrg __asm__ ("ff1 %0,%1" \ 773 1.1 mrg : "=r" (__cbtmp) \ 774 1.1 mrg : "r" ((USItype) (x))); \ 775 1.1 mrg (count) = __cbtmp ^ 31; \ 776 1.1 mrg } while (0) 777 1.1 mrg #define COUNT_LEADING_ZEROS_0 63 /* sic */ 778 1.1 mrg #if defined (__mc88110__) 779 1.1 mrg #define umul_ppmm(wh, wl, u, v) \ 780 1.1 mrg do { \ 781 1.1 mrg union {UDItype __ll; \ 782 1.1 mrg struct {USItype __h, __l;} __i; \ 783 1.1 mrg } __xx; \ 784 1.1 mrg __asm__ ("mulu.d %0,%1,%2" \ 785 1.1 mrg : "=r" (__xx.__ll) \ 786 1.1 mrg : "r" ((USItype) (u)), \ 787 1.1 mrg "r" ((USItype) (v))); \ 788 1.1 mrg (wh) = __xx.__i.__h; \ 789 1.1 mrg (wl) = __xx.__i.__l; \ 790 1.1 mrg } while (0) 791 1.1 mrg #define udiv_qrnnd(q, r, n1, n0, d) \ 792 1.1 mrg ({union {UDItype __ll; \ 793 1.1 mrg struct {USItype __h, __l;} __i; \ 794 1.1 mrg } __xx; \ 795 1.1 mrg USItype __q; \ 796 1.1 mrg __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 797 1.1 mrg __asm__ ("divu.d %0,%1,%2" \ 798 1.1 mrg : "=r" (__q) \ 799 1.1 mrg : "r" (__xx.__ll), \ 800 1.1 mrg "r" ((USItype) (d))); \ 801 1.1 mrg (r) = (n0) - __q * (d); (q) = __q; }) 802 1.1 mrg #define UMUL_TIME 5 803 1.1 mrg #define UDIV_TIME 25 804 1.1 mrg #else 805 1.1 mrg #define UMUL_TIME 17 806 1.1 mrg #define UDIV_TIME 150 807 1.1 mrg #endif /* __mc88110__ */ 808 1.1 mrg #endif /* __m88000__ */ 809 1.1 mrg 810 1.1 mrg #if defined (__mn10300__) 811 1.1 mrg # if defined (__AM33__) 812 1.1 mrg # define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 813 1.1 mrg # define umul_ppmm(w1, w0, u, v) \ 814 1.1 mrg asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) 815 1.1 mrg # define smul_ppmm(w1, w0, u, v) \ 816 1.1 mrg asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) 817 1.1 mrg # else 818 1.1 mrg # define umul_ppmm(w1, w0, u, v) \ 819 1.1 mrg asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) 820 1.1 mrg # define smul_ppmm(w1, w0, u, v) \ 821 1.1 mrg asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) 822 1.1 mrg # endif 823 1.1 mrg # define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 824 1.1 mrg do { \ 825 1.1 mrg DWunion __s, __a, __b; \ 826 1.1 mrg __a.s.low = (al); __a.s.high = (ah); \ 827 1.1 mrg __b.s.low = (bl); __b.s.high = (bh); \ 828 1.1 mrg __s.ll = __a.ll + __b.ll; \ 829 1.1 mrg (sl) = __s.s.low; (sh) = __s.s.high; \ 830 1.1 mrg } while (0) 831 1.1 mrg # define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 832 1.1 mrg do { \ 833 1.1 mrg DWunion __s, __a, __b; \ 834 1.1 mrg __a.s.low = (al); __a.s.high = (ah); \ 835 1.1 mrg __b.s.low = (bl); __b.s.high = (bh); \ 836 1.1 mrg __s.ll = __a.ll - __b.ll; \ 837 1.1 mrg (sl) = __s.s.low; (sh) = __s.s.high; \ 838 1.1 mrg } while (0) 839 1.1 mrg # define udiv_qrnnd(q, r, nh, nl, d) \ 840 1.1 mrg asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) 841 1.1 mrg # define sdiv_qrnnd(q, r, nh, nl, d) \ 842 1.1 mrg asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) 843 1.1 mrg # define UMUL_TIME 3 844 1.1 mrg # define UDIV_TIME 38 845 1.1 mrg #endif 846 1.1 mrg 847 1.1 mrg #if defined (__mips__) && W_TYPE_SIZE == 32 848 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 849 1.1 mrg do { \ 850 1.1 mrg UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ 851 1.1 mrg (w1) = (USItype) (__x >> 32); \ 852 1.1 mrg (w0) = (USItype) (__x); \ 853 1.1 mrg } while (0) 854 1.1 mrg #define UMUL_TIME 10 855 1.1 mrg #define UDIV_TIME 100 856 1.1 mrg 857 1.1 mrg #if (__mips == 32 || __mips == 64) && ! defined (__mips16) 858 1.1 mrg #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 859 1.1 mrg #define COUNT_LEADING_ZEROS_0 32 860 1.1 mrg #endif 861 1.1 mrg #endif /* __mips__ */ 862 1.1 mrg 863 1.1 mrg /* FIXME: We should test _IBMR2 here when we add assembly support for the 864 1.1 mrg system vendor compilers. 865 1.1 mrg FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good 866 1.1 mrg enough, since that hits ARM and m68k too. */ 867 1.1 mrg #if (defined (_ARCH_PPC) /* AIX */ \ 868 1.1 mrg || defined (__powerpc__) /* gcc */ \ 869 1.1 mrg || defined (__POWERPC__) /* BEOS */ \ 870 1.1 mrg || defined (__ppc__) /* Darwin */ \ 871 1.1 mrg || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ 872 1.1 mrg || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ 873 1.1 mrg && CPU_FAMILY == PPC) \ 874 1.1 mrg ) && W_TYPE_SIZE == 32 875 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 876 1.1 mrg do { \ 877 1.1 mrg if (__builtin_constant_p (bh) && (bh) == 0) \ 878 1.1 mrg __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ 879 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 880 1.1 mrg else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ 881 1.1 mrg __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ 882 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 883 1.1 mrg else \ 884 1.1 mrg __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ 885 1.1 mrg : "=r" (sh), "=&r" (sl) \ 886 1.1 mrg : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ 887 1.1 mrg } while (0) 888 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 889 1.1 mrg do { \ 890 1.1 mrg if (__builtin_constant_p (ah) && (ah) == 0) \ 891 1.1 mrg __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ 892 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 893 1.1 mrg else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ 894 1.1 mrg __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ 895 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 896 1.1 mrg else if (__builtin_constant_p (bh) && (bh) == 0) \ 897 1.1 mrg __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ 898 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 899 1.1 mrg else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ 900 1.1 mrg __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ 901 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 902 1.1 mrg else \ 903 1.1 mrg __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ 904 1.1 mrg : "=r" (sh), "=&r" (sl) \ 905 1.1 mrg : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ 906 1.1 mrg } while (0) 907 1.1 mrg #define count_leading_zeros(count, x) \ 908 1.1 mrg __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x)) 909 1.1 mrg #define COUNT_LEADING_ZEROS_0 32 910 1.1 mrg #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \ 911 1.1 mrg || defined (__ppc__) \ 912 1.1 mrg || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ 913 1.1 mrg || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ 914 1.1 mrg && CPU_FAMILY == PPC) 915 1.1 mrg #define umul_ppmm(ph, pl, m0, m1) \ 916 1.1 mrg do { \ 917 1.1 mrg USItype __m0 = (m0), __m1 = (m1); \ 918 1.1 mrg __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 919 1.1 mrg (pl) = __m0 * __m1; \ 920 1.1 mrg } while (0) 921 1.1 mrg #define UMUL_TIME 15 922 1.1 mrg #define smul_ppmm(ph, pl, m0, m1) \ 923 1.1 mrg do { \ 924 1.1 mrg SItype __m0 = (m0), __m1 = (m1); \ 925 1.1 mrg __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 926 1.1 mrg (pl) = __m0 * __m1; \ 927 1.1 mrg } while (0) 928 1.1 mrg #define SMUL_TIME 14 929 1.1 mrg #define UDIV_TIME 120 930 1.1 mrg #endif 931 1.1 mrg #endif /* 32-bit POWER architecture variants. */ 932 1.1 mrg 933 1.1 mrg /* We should test _IBMR2 here when we add assembly support for the system 934 1.1 mrg vendor compilers. */ 935 1.1 mrg #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64 936 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 937 1.1 mrg do { \ 938 1.1 mrg if (__builtin_constant_p (bh) && (bh) == 0) \ 939 1.1 mrg __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ 940 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 941 1.1 mrg else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ 942 1.1 mrg __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ 943 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 944 1.1 mrg else \ 945 1.1 mrg __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ 946 1.1 mrg : "=r" (sh), "=&r" (sl) \ 947 1.1 mrg : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ 948 1.1 mrg } while (0) 949 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 950 1.1 mrg do { \ 951 1.1 mrg if (__builtin_constant_p (ah) && (ah) == 0) \ 952 1.1 mrg __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ 953 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 954 1.1 mrg else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ 955 1.1 mrg __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ 956 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 957 1.1 mrg else if (__builtin_constant_p (bh) && (bh) == 0) \ 958 1.1 mrg __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ 959 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 960 1.1 mrg else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ 961 1.1 mrg __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ 962 1.1 mrg : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 963 1.1 mrg else \ 964 1.1 mrg __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ 965 1.1 mrg : "=r" (sh), "=&r" (sl) \ 966 1.1 mrg : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ 967 1.1 mrg } while (0) 968 1.1 mrg #define count_leading_zeros(count, x) \ 969 1.1 mrg __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) 970 1.1 mrg #define COUNT_LEADING_ZEROS_0 64 971 1.1 mrg #define umul_ppmm(ph, pl, m0, m1) \ 972 1.1 mrg do { \ 973 1.1 mrg UDItype __m0 = (m0), __m1 = (m1); \ 974 1.1 mrg __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 975 1.1 mrg (pl) = __m0 * __m1; \ 976 1.1 mrg } while (0) 977 1.1 mrg #define UMUL_TIME 15 978 1.1 mrg #define smul_ppmm(ph, pl, m0, m1) \ 979 1.1 mrg do { \ 980 1.1 mrg DItype __m0 = (m0), __m1 = (m1); \ 981 1.1 mrg __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 982 1.1 mrg (pl) = __m0 * __m1; \ 983 1.1 mrg } while (0) 984 1.1 mrg #define SMUL_TIME 14 /* ??? */ 985 1.1 mrg #define UDIV_TIME 120 /* ??? */ 986 1.1 mrg #endif /* 64-bit PowerPC. */ 987 1.1 mrg 988 1.1 mrg #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 989 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 990 1.1 mrg __asm__ ("a %1,%5\n\tae %0,%3" \ 991 1.1 mrg : "=r" ((USItype) (sh)), \ 992 1.1 mrg "=&r" ((USItype) (sl)) \ 993 1.1 mrg : "%0" ((USItype) (ah)), \ 994 1.1 mrg "r" ((USItype) (bh)), \ 995 1.1 mrg "%1" ((USItype) (al)), \ 996 1.1 mrg "r" ((USItype) (bl))) 997 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 998 1.1 mrg __asm__ ("s %1,%5\n\tse %0,%3" \ 999 1.1 mrg : "=r" ((USItype) (sh)), \ 1000 1.1 mrg "=&r" ((USItype) (sl)) \ 1001 1.1 mrg : "0" ((USItype) (ah)), \ 1002 1.1 mrg "r" ((USItype) (bh)), \ 1003 1.1 mrg "1" ((USItype) (al)), \ 1004 1.1 mrg "r" ((USItype) (bl))) 1005 1.1 mrg #define umul_ppmm(ph, pl, m0, m1) \ 1006 1.1 mrg do { \ 1007 1.1 mrg USItype __m0 = (m0), __m1 = (m1); \ 1008 1.1 mrg __asm__ ( \ 1009 1.1 mrg "s r2,r2\n" \ 1010 1.1 mrg " mts r10,%2\n" \ 1011 1.1 mrg " m r2,%3\n" \ 1012 1.1 mrg " m r2,%3\n" \ 1013 1.1 mrg " m r2,%3\n" \ 1014 1.1 mrg " m r2,%3\n" \ 1015 1.1 mrg " m r2,%3\n" \ 1016 1.1 mrg " m r2,%3\n" \ 1017 1.1 mrg " m r2,%3\n" \ 1018 1.1 mrg " m r2,%3\n" \ 1019 1.1 mrg " m r2,%3\n" \ 1020 1.1 mrg " m r2,%3\n" \ 1021 1.1 mrg " m r2,%3\n" \ 1022 1.1 mrg " m r2,%3\n" \ 1023 1.1 mrg " m r2,%3\n" \ 1024 1.1 mrg " m r2,%3\n" \ 1025 1.1 mrg " m r2,%3\n" \ 1026 1.1 mrg " m r2,%3\n" \ 1027 1.1 mrg " cas %0,r2,r0\n" \ 1028 1.1 mrg " mfs r10,%1" \ 1029 1.1 mrg : "=r" ((USItype) (ph)), \ 1030 1.1 mrg "=r" ((USItype) (pl)) \ 1031 1.1 mrg : "%r" (__m0), \ 1032 1.1 mrg "r" (__m1) \ 1033 1.1 mrg : "r2"); \ 1034 1.1 mrg (ph) += ((((SItype) __m0 >> 31) & __m1) \ 1035 1.1 mrg + (((SItype) __m1 >> 31) & __m0)); \ 1036 1.1 mrg } while (0) 1037 1.1 mrg #define UMUL_TIME 20 1038 1.1 mrg #define UDIV_TIME 200 1039 1.1 mrg #define count_leading_zeros(count, x) \ 1040 1.1 mrg do { \ 1041 1.1 mrg if ((x) >= 0x10000) \ 1042 1.1 mrg __asm__ ("clz %0,%1" \ 1043 1.1 mrg : "=r" ((USItype) (count)) \ 1044 1.1 mrg : "r" ((USItype) (x) >> 16)); \ 1045 1.1 mrg else \ 1046 1.1 mrg { \ 1047 1.1 mrg __asm__ ("clz %0,%1" \ 1048 1.1 mrg : "=r" ((USItype) (count)) \ 1049 1.1 mrg : "r" ((USItype) (x))); \ 1050 1.1 mrg (count) += 16; \ 1051 1.1 mrg } \ 1052 1.1 mrg } while (0) 1053 1.1 mrg #endif 1054 1.1 mrg 1055 1.4 mrg #if defined(__riscv) 1056 1.4 mrg #ifdef __riscv_mul 1057 1.4 mrg #define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v)) 1058 1.4 mrg #define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b)) 1059 1.4 mrg #else 1060 1.4 mrg #if __riscv_xlen == 32 1061 1.4 mrg #define MULUW3 "call __mulsi3" 1062 1.4 mrg #elif __riscv_xlen == 64 1063 1.4 mrg #define MULUW3 "call __muldi3" 1064 1.4 mrg #else 1065 1.4 mrg #error unsupport xlen 1066 1.4 mrg #endif /* __riscv_xlen */ 1067 1.4 mrg /* We rely on the fact that MULUW3 doesn't clobber the t-registers. 1068 1.4 mrg It can get better register allocation result. */ 1069 1.4 mrg #define __muluw3(a, b) \ 1070 1.4 mrg ({ \ 1071 1.4 mrg register UWtype __op0 asm ("a0") = a; \ 1072 1.4 mrg register UWtype __op1 asm ("a1") = b; \ 1073 1.4 mrg asm volatile (MULUW3 \ 1074 1.4 mrg : "+r" (__op0), "+r" (__op1) \ 1075 1.4 mrg : \ 1076 1.4 mrg : "ra", "a2", "a3"); \ 1077 1.4 mrg __op0; \ 1078 1.4 mrg }) 1079 1.4 mrg #endif /* __riscv_mul */ 1080 1.4 mrg #define umul_ppmm(w1, w0, u, v) \ 1081 1.4 mrg do { \ 1082 1.4 mrg UWtype __x0, __x1, __x2, __x3; \ 1083 1.4 mrg UHWtype __ul, __vl, __uh, __vh; \ 1084 1.4 mrg \ 1085 1.4 mrg __ul = __ll_lowpart (u); \ 1086 1.4 mrg __uh = __ll_highpart (u); \ 1087 1.4 mrg __vl = __ll_lowpart (v); \ 1088 1.4 mrg __vh = __ll_highpart (v); \ 1089 1.4 mrg \ 1090 1.4 mrg __x0 = __muluw3 (__ul, __vl); \ 1091 1.4 mrg __x1 = __muluw3 (__ul, __vh); \ 1092 1.4 mrg __x2 = __muluw3 (__uh, __vl); \ 1093 1.4 mrg __x3 = __muluw3 (__uh, __vh); \ 1094 1.4 mrg \ 1095 1.4 mrg __x1 += __ll_highpart (__x0);/* this can't give carry */ \ 1096 1.4 mrg __x1 += __x2; /* but this indeed can */ \ 1097 1.4 mrg if (__x1 < __x2) /* did we get it? */ \ 1098 1.4 mrg __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1099 1.4 mrg \ 1100 1.4 mrg (w1) = __x3 + __ll_highpart (__x1); \ 1101 1.4 mrg (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ 1102 1.4 mrg } while (0) 1103 1.4 mrg #endif /* __riscv */ 1104 1.4 mrg 1105 1.3 mrg #if defined(__sh__) && W_TYPE_SIZE == 32 1106 1.1 mrg #ifndef __sh1__ 1107 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 1108 1.1 mrg __asm__ ( \ 1109 1.1 mrg "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \ 1110 1.1 mrg : "=r<" ((USItype)(w1)), \ 1111 1.1 mrg "=r<" ((USItype)(w0)) \ 1112 1.1 mrg : "r" ((USItype)(u)), \ 1113 1.1 mrg "r" ((USItype)(v)) \ 1114 1.1 mrg : "macl", "mach") 1115 1.1 mrg #define UMUL_TIME 5 1116 1.1 mrg #endif 1117 1.1 mrg 1118 1.1 mrg /* This is the same algorithm as __udiv_qrnnd_c. */ 1119 1.1 mrg #define UDIV_NEEDS_NORMALIZATION 1 1120 1.1 mrg 1121 1.3 mrg #ifdef __FDPIC__ 1122 1.3 mrg /* FDPIC needs a special version of the asm fragment to extract the 1123 1.3 mrg code address from the function descriptor. __udiv_qrnnd_16 is 1124 1.3 mrg assumed to be local and not to use the GOT, so loading r12 is 1125 1.3 mrg not needed. */ 1126 1.3 mrg #define udiv_qrnnd(q, r, n1, n0, d) \ 1127 1.3 mrg do { \ 1128 1.3 mrg extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ 1129 1.3 mrg __attribute__ ((visibility ("hidden"))); \ 1130 1.3 mrg /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ 1131 1.3 mrg __asm__ ( \ 1132 1.3 mrg "mov%M4 %4,r5\n" \ 1133 1.3 mrg " swap.w %3,r4\n" \ 1134 1.3 mrg " swap.w r5,r6\n" \ 1135 1.3 mrg " mov.l @%5,r2\n" \ 1136 1.3 mrg " jsr @r2\n" \ 1137 1.3 mrg " shll16 r6\n" \ 1138 1.3 mrg " swap.w r4,r4\n" \ 1139 1.3 mrg " mov.l @%5,r2\n" \ 1140 1.3 mrg " jsr @r2\n" \ 1141 1.3 mrg " swap.w r1,%0\n" \ 1142 1.3 mrg " or r1,%0" \ 1143 1.3 mrg : "=r" (q), "=&z" (r) \ 1144 1.3 mrg : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ 1145 1.3 mrg : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ 1146 1.3 mrg } while (0) 1147 1.3 mrg #else 1148 1.1 mrg #define udiv_qrnnd(q, r, n1, n0, d) \ 1149 1.1 mrg do { \ 1150 1.1 mrg extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ 1151 1.1 mrg __attribute__ ((visibility ("hidden"))); \ 1152 1.1 mrg /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ 1153 1.1 mrg __asm__ ( \ 1154 1.1 mrg "mov%M4 %4,r5\n" \ 1155 1.1 mrg " swap.w %3,r4\n" \ 1156 1.1 mrg " swap.w r5,r6\n" \ 1157 1.1 mrg " jsr @%5\n" \ 1158 1.1 mrg " shll16 r6\n" \ 1159 1.1 mrg " swap.w r4,r4\n" \ 1160 1.1 mrg " jsr @%5\n" \ 1161 1.1 mrg " swap.w r1,%0\n" \ 1162 1.1 mrg " or r1,%0" \ 1163 1.1 mrg : "=r" (q), "=&z" (r) \ 1164 1.1 mrg : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ 1165 1.1 mrg : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ 1166 1.1 mrg } while (0) 1167 1.3 mrg #endif /* __FDPIC__ */ 1168 1.1 mrg 1169 1.1 mrg #define UDIV_TIME 80 1170 1.1 mrg 1171 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1172 1.1 mrg __asm__ ("clrt;subc %5,%1; subc %4,%0" \ 1173 1.1 mrg : "=r" (sh), "=r" (sl) \ 1174 1.1 mrg : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t") 1175 1.1 mrg 1176 1.1 mrg #endif /* __sh__ */ 1177 1.1 mrg 1178 1.1 mrg #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \ 1179 1.1 mrg && W_TYPE_SIZE == 32 1180 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1181 1.1 mrg __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ 1182 1.1 mrg : "=r" ((USItype) (sh)), \ 1183 1.1 mrg "=&r" ((USItype) (sl)) \ 1184 1.1 mrg : "%rJ" ((USItype) (ah)), \ 1185 1.1 mrg "rI" ((USItype) (bh)), \ 1186 1.1 mrg "%rJ" ((USItype) (al)), \ 1187 1.1 mrg "rI" ((USItype) (bl)) \ 1188 1.1 mrg __CLOBBER_CC) 1189 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1190 1.1 mrg __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ 1191 1.1 mrg : "=r" ((USItype) (sh)), \ 1192 1.1 mrg "=&r" ((USItype) (sl)) \ 1193 1.1 mrg : "rJ" ((USItype) (ah)), \ 1194 1.1 mrg "rI" ((USItype) (bh)), \ 1195 1.1 mrg "rJ" ((USItype) (al)), \ 1196 1.1 mrg "rI" ((USItype) (bl)) \ 1197 1.1 mrg __CLOBBER_CC) 1198 1.1 mrg #if defined (__sparc_v9__) 1199 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 1200 1.1 mrg do { \ 1201 1.1 mrg register USItype __g1 asm ("g1"); \ 1202 1.1 mrg __asm__ ("umul\t%2,%3,%1\n\t" \ 1203 1.1 mrg "srlx\t%1, 32, %0" \ 1204 1.1 mrg : "=r" ((USItype) (w1)), \ 1205 1.1 mrg "=r" (__g1) \ 1206 1.1 mrg : "r" ((USItype) (u)), \ 1207 1.1 mrg "r" ((USItype) (v))); \ 1208 1.1 mrg (w0) = __g1; \ 1209 1.1 mrg } while (0) 1210 1.1 mrg #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ 1211 1.1 mrg __asm__ ("mov\t%2,%%y\n\t" \ 1212 1.1 mrg "udiv\t%3,%4,%0\n\t" \ 1213 1.1 mrg "umul\t%0,%4,%1\n\t" \ 1214 1.1 mrg "sub\t%3,%1,%1" \ 1215 1.1 mrg : "=&r" ((USItype) (__q)), \ 1216 1.1 mrg "=&r" ((USItype) (__r)) \ 1217 1.1 mrg : "r" ((USItype) (__n1)), \ 1218 1.1 mrg "r" ((USItype) (__n0)), \ 1219 1.1 mrg "r" ((USItype) (__d))) 1220 1.1 mrg #else 1221 1.1 mrg #if defined (__sparc_v8__) 1222 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 1223 1.1 mrg __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1224 1.1 mrg : "=r" ((USItype) (w1)), \ 1225 1.1 mrg "=r" ((USItype) (w0)) \ 1226 1.1 mrg : "r" ((USItype) (u)), \ 1227 1.1 mrg "r" ((USItype) (v))) 1228 1.1 mrg #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ 1229 1.1 mrg __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\ 1230 1.1 mrg : "=&r" ((USItype) (__q)), \ 1231 1.1 mrg "=&r" ((USItype) (__r)) \ 1232 1.1 mrg : "r" ((USItype) (__n1)), \ 1233 1.1 mrg "r" ((USItype) (__n0)), \ 1234 1.1 mrg "r" ((USItype) (__d))) 1235 1.1 mrg #else 1236 1.1 mrg #if defined (__sparclite__) 1237 1.1 mrg /* This has hardware multiply but not divide. It also has two additional 1238 1.1 mrg instructions scan (ffs from high bit) and divscc. */ 1239 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 1240 1.1 mrg __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1241 1.1 mrg : "=r" ((USItype) (w1)), \ 1242 1.1 mrg "=r" ((USItype) (w0)) \ 1243 1.1 mrg : "r" ((USItype) (u)), \ 1244 1.1 mrg "r" ((USItype) (v))) 1245 1.1 mrg #define udiv_qrnnd(q, r, n1, n0, d) \ 1246 1.1 mrg __asm__ ("! Inlined udiv_qrnnd\n" \ 1247 1.1 mrg " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ 1248 1.1 mrg " tst %%g0\n" \ 1249 1.1 mrg " divscc %3,%4,%%g1\n" \ 1250 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1251 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1252 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1253 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1254 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1255 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1256 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1257 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1258 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1259 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1260 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1261 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1262 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1263 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1264 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1265 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1266 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1267 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1268 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1269 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1270 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1271 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1272 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1273 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1274 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1275 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1276 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1277 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1278 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1279 1.1 mrg " divscc %%g1,%4,%%g1\n" \ 1280 1.1 mrg " divscc %%g1,%4,%0\n" \ 1281 1.1 mrg " rd %%y,%1\n" \ 1282 1.1 mrg " bl,a 1f\n" \ 1283 1.1 mrg " add %1,%4,%1\n" \ 1284 1.1 mrg "1: ! End of inline udiv_qrnnd" \ 1285 1.1 mrg : "=r" ((USItype) (q)), \ 1286 1.1 mrg "=r" ((USItype) (r)) \ 1287 1.1 mrg : "r" ((USItype) (n1)), \ 1288 1.1 mrg "r" ((USItype) (n0)), \ 1289 1.1 mrg "rI" ((USItype) (d)) \ 1290 1.1 mrg : "g1" __AND_CLOBBER_CC) 1291 1.1 mrg #define UDIV_TIME 37 1292 1.1 mrg #define count_leading_zeros(count, x) \ 1293 1.1 mrg do { \ 1294 1.1 mrg __asm__ ("scan %1,1,%0" \ 1295 1.1 mrg : "=r" ((USItype) (count)) \ 1296 1.1 mrg : "r" ((USItype) (x))); \ 1297 1.1 mrg } while (0) 1298 1.1 mrg /* Early sparclites return 63 for an argument of 0, but they warn that future 1299 1.1 mrg implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 1300 1.1 mrg undefined. */ 1301 1.1 mrg #else 1302 1.1 mrg /* SPARC without integer multiplication and divide instructions. 1303 1.1 mrg (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */ 1304 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 1305 1.1 mrg __asm__ ("! Inlined umul_ppmm\n" \ 1306 1.1 mrg " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\ 1307 1.1 mrg " sra %3,31,%%o5 ! Don't move this insn\n" \ 1308 1.1 mrg " and %2,%%o5,%%o5 ! Don't move this insn\n" \ 1309 1.1 mrg " andcc %%g0,0,%%g1 ! Don't move this insn\n" \ 1310 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1311 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1312 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1313 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1314 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1315 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1316 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1317 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1318 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1319 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1320 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1321 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1322 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1323 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1324 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1325 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1326 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1327 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1328 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1329 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1330 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1331 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1332 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1333 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1334 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1335 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1336 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1337 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1338 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1339 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1340 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1341 1.1 mrg " mulscc %%g1,%3,%%g1\n" \ 1342 1.1 mrg " mulscc %%g1,0,%%g1\n" \ 1343 1.1 mrg " add %%g1,%%o5,%0\n" \ 1344 1.1 mrg " rd %%y,%1" \ 1345 1.1 mrg : "=r" ((USItype) (w1)), \ 1346 1.1 mrg "=r" ((USItype) (w0)) \ 1347 1.1 mrg : "%rI" ((USItype) (u)), \ 1348 1.1 mrg "r" ((USItype) (v)) \ 1349 1.1 mrg : "g1", "o5" __AND_CLOBBER_CC) 1350 1.1 mrg #define UMUL_TIME 39 /* 39 instructions */ 1351 1.1 mrg /* It's quite necessary to add this much assembler for the sparc. 1352 1.1 mrg The default udiv_qrnnd (in C) is more than 10 times slower! */ 1353 1.1 mrg #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ 1354 1.1 mrg __asm__ ("! Inlined udiv_qrnnd\n" \ 1355 1.1 mrg " mov 32,%%g1\n" \ 1356 1.1 mrg " subcc %1,%2,%%g0\n" \ 1357 1.1 mrg "1: bcs 5f\n" \ 1358 1.1 mrg " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ 1359 1.1 mrg " sub %1,%2,%1 ! this kills msb of n\n" \ 1360 1.1 mrg " addx %1,%1,%1 ! so this can't give carry\n" \ 1361 1.1 mrg " subcc %%g1,1,%%g1\n" \ 1362 1.1 mrg "2: bne 1b\n" \ 1363 1.1 mrg " subcc %1,%2,%%g0\n" \ 1364 1.1 mrg " bcs 3f\n" \ 1365 1.1 mrg " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ 1366 1.1 mrg " b 3f\n" \ 1367 1.1 mrg " sub %1,%2,%1 ! this kills msb of n\n" \ 1368 1.1 mrg "4: sub %1,%2,%1\n" \ 1369 1.1 mrg "5: addxcc %1,%1,%1\n" \ 1370 1.1 mrg " bcc 2b\n" \ 1371 1.1 mrg " subcc %%g1,1,%%g1\n" \ 1372 1.1 mrg "! Got carry from n. Subtract next step to cancel this carry.\n" \ 1373 1.1 mrg " bne 4b\n" \ 1374 1.1 mrg " addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \ 1375 1.1 mrg " sub %1,%2,%1\n" \ 1376 1.1 mrg "3: xnor %0,0,%0\n" \ 1377 1.1 mrg " ! End of inline udiv_qrnnd" \ 1378 1.1 mrg : "=&r" ((USItype) (__q)), \ 1379 1.1 mrg "=&r" ((USItype) (__r)) \ 1380 1.1 mrg : "r" ((USItype) (__d)), \ 1381 1.1 mrg "1" ((USItype) (__n1)), \ 1382 1.1 mrg "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC) 1383 1.1 mrg #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ 1384 1.1 mrg #endif /* __sparclite__ */ 1385 1.1 mrg #endif /* __sparc_v8__ */ 1386 1.1 mrg #endif /* __sparc_v9__ */ 1387 1.1 mrg #endif /* sparc32 */ 1388 1.1 mrg 1389 1.1 mrg #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \ 1390 1.1 mrg && W_TYPE_SIZE == 64 1391 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1392 1.1 mrg do { \ 1393 1.1 mrg UDItype __carry = 0; \ 1394 1.1 mrg __asm__ ("addcc\t%r5,%6,%1\n\t" \ 1395 1.1 mrg "add\t%r3,%4,%0\n\t" \ 1396 1.1 mrg "movcs\t%%xcc, 1, %2\n\t" \ 1397 1.1 mrg "add\t%0, %2, %0" \ 1398 1.1 mrg : "=r" ((UDItype)(sh)), \ 1399 1.1 mrg "=&r" ((UDItype)(sl)), \ 1400 1.1 mrg "+r" (__carry) \ 1401 1.1 mrg : "%rJ" ((UDItype)(ah)), \ 1402 1.1 mrg "rI" ((UDItype)(bh)), \ 1403 1.1 mrg "%rJ" ((UDItype)(al)), \ 1404 1.1 mrg "rI" ((UDItype)(bl)) \ 1405 1.1 mrg __CLOBBER_CC); \ 1406 1.1 mrg } while (0) 1407 1.1 mrg 1408 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1409 1.1 mrg do { \ 1410 1.1 mrg UDItype __carry = 0; \ 1411 1.1 mrg __asm__ ("subcc\t%r5,%6,%1\n\t" \ 1412 1.1 mrg "sub\t%r3,%4,%0\n\t" \ 1413 1.1 mrg "movcs\t%%xcc, 1, %2\n\t" \ 1414 1.1 mrg "sub\t%0, %2, %0" \ 1415 1.1 mrg : "=r" ((UDItype)(sh)), \ 1416 1.1 mrg "=&r" ((UDItype)(sl)), \ 1417 1.1 mrg "+r" (__carry) \ 1418 1.1 mrg : "%rJ" ((UDItype)(ah)), \ 1419 1.1 mrg "rI" ((UDItype)(bh)), \ 1420 1.1 mrg "%rJ" ((UDItype)(al)), \ 1421 1.1 mrg "rI" ((UDItype)(bl)) \ 1422 1.1 mrg __CLOBBER_CC); \ 1423 1.1 mrg } while (0) 1424 1.1 mrg 1425 1.1 mrg #define umul_ppmm(wh, wl, u, v) \ 1426 1.1 mrg do { \ 1427 1.1 mrg UDItype tmp1, tmp2, tmp3, tmp4; \ 1428 1.1 mrg __asm__ __volatile__ ( \ 1429 1.1 mrg "srl %7,0,%3\n\t" \ 1430 1.1 mrg "mulx %3,%6,%1\n\t" \ 1431 1.1 mrg "srlx %6,32,%2\n\t" \ 1432 1.1 mrg "mulx %2,%3,%4\n\t" \ 1433 1.1 mrg "sllx %4,32,%5\n\t" \ 1434 1.1 mrg "srl %6,0,%3\n\t" \ 1435 1.1 mrg "sub %1,%5,%5\n\t" \ 1436 1.1 mrg "srlx %5,32,%5\n\t" \ 1437 1.1 mrg "addcc %4,%5,%4\n\t" \ 1438 1.1 mrg "srlx %7,32,%5\n\t" \ 1439 1.1 mrg "mulx %3,%5,%3\n\t" \ 1440 1.1 mrg "mulx %2,%5,%5\n\t" \ 1441 1.1 mrg "sethi %%hi(0x80000000),%2\n\t" \ 1442 1.1 mrg "addcc %4,%3,%4\n\t" \ 1443 1.1 mrg "srlx %4,32,%4\n\t" \ 1444 1.1 mrg "add %2,%2,%2\n\t" \ 1445 1.1 mrg "movcc %%xcc,%%g0,%2\n\t" \ 1446 1.1 mrg "addcc %5,%4,%5\n\t" \ 1447 1.1 mrg "sllx %3,32,%3\n\t" \ 1448 1.1 mrg "add %1,%3,%1\n\t" \ 1449 1.1 mrg "add %5,%2,%0" \ 1450 1.1 mrg : "=r" ((UDItype)(wh)), \ 1451 1.1 mrg "=&r" ((UDItype)(wl)), \ 1452 1.1 mrg "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \ 1453 1.1 mrg : "r" ((UDItype)(u)), \ 1454 1.1 mrg "r" ((UDItype)(v)) \ 1455 1.1 mrg __CLOBBER_CC); \ 1456 1.1 mrg } while (0) 1457 1.1 mrg #define UMUL_TIME 96 1458 1.1 mrg #define UDIV_TIME 230 1459 1.1 mrg #endif /* sparc64 */ 1460 1.1 mrg 1461 1.1 mrg #if defined (__vax__) && W_TYPE_SIZE == 32 1462 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1463 1.1 mrg __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ 1464 1.1 mrg : "=g" ((USItype) (sh)), \ 1465 1.1 mrg "=&g" ((USItype) (sl)) \ 1466 1.1 mrg : "%0" ((USItype) (ah)), \ 1467 1.1 mrg "g" ((USItype) (bh)), \ 1468 1.1 mrg "%1" ((USItype) (al)), \ 1469 1.1 mrg "g" ((USItype) (bl))) 1470 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1471 1.1 mrg __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ 1472 1.1 mrg : "=g" ((USItype) (sh)), \ 1473 1.1 mrg "=&g" ((USItype) (sl)) \ 1474 1.1 mrg : "0" ((USItype) (ah)), \ 1475 1.1 mrg "g" ((USItype) (bh)), \ 1476 1.1 mrg "1" ((USItype) (al)), \ 1477 1.1 mrg "g" ((USItype) (bl))) 1478 1.1 mrg #define umul_ppmm(xh, xl, m0, m1) \ 1479 1.1 mrg do { \ 1480 1.1 mrg union { \ 1481 1.1 mrg UDItype __ll; \ 1482 1.1 mrg struct {USItype __l, __h;} __i; \ 1483 1.1 mrg } __xx; \ 1484 1.1 mrg USItype __m0 = (m0), __m1 = (m1); \ 1485 1.1 mrg __asm__ ("emul %1,%2,$0,%0" \ 1486 1.1 mrg : "=r" (__xx.__ll) \ 1487 1.1 mrg : "g" (__m0), \ 1488 1.1 mrg "g" (__m1)); \ 1489 1.1 mrg (xh) = __xx.__i.__h; \ 1490 1.1 mrg (xl) = __xx.__i.__l; \ 1491 1.1 mrg (xh) += ((((SItype) __m0 >> 31) & __m1) \ 1492 1.1 mrg + (((SItype) __m1 >> 31) & __m0)); \ 1493 1.1 mrg } while (0) 1494 1.1 mrg #define sdiv_qrnnd(q, r, n1, n0, d) \ 1495 1.1 mrg do { \ 1496 1.1 mrg union {DItype __ll; \ 1497 1.1 mrg struct {SItype __l, __h;} __i; \ 1498 1.1 mrg } __xx; \ 1499 1.1 mrg __xx.__i.__h = n1; __xx.__i.__l = n0; \ 1500 1.1 mrg __asm__ ("ediv %3,%2,%0,%1" \ 1501 1.1 mrg : "=g" (q), "=g" (r) \ 1502 1.1 mrg : "g" (__xx.__ll), "g" (d)); \ 1503 1.1 mrg } while (0) 1504 1.1 mrg #endif /* __vax__ */ 1505 1.1 mrg 1506 1.1 mrg #ifdef _TMS320C6X 1507 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1508 1.1 mrg do \ 1509 1.1 mrg { \ 1510 1.1 mrg UDItype __ll; \ 1511 1.1 mrg __asm__ ("addu .l1 %1, %2, %0" \ 1512 1.1 mrg : "=a" (__ll) : "a" (al), "a" (bl)); \ 1513 1.1 mrg (sl) = (USItype)__ll; \ 1514 1.1 mrg (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \ 1515 1.1 mrg } \ 1516 1.1 mrg while (0) 1517 1.1 mrg 1518 1.1 mrg #ifdef _TMS320C6400_PLUS 1519 1.1 mrg #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) 1520 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 1521 1.1 mrg do { \ 1522 1.1 mrg UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ 1523 1.1 mrg (w1) = (USItype) (__x >> 32); \ 1524 1.1 mrg (w0) = (USItype) (__x); \ 1525 1.1 mrg } while (0) 1526 1.1 mrg #endif /* _TMS320C6400_PLUS */ 1527 1.1 mrg 1528 1.1 mrg #define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) 1529 1.1 mrg #ifdef _TMS320C6400 1530 1.1 mrg #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) 1531 1.1 mrg #endif 1532 1.1 mrg #define UMUL_TIME 4 1533 1.1 mrg #define UDIV_TIME 40 1534 1.1 mrg #endif /* _TMS320C6X */ 1535 1.1 mrg 1536 1.1 mrg #if defined (__xtensa__) && W_TYPE_SIZE == 32 1537 1.1 mrg /* This code is not Xtensa-configuration-specific, so rely on the compiler 1538 1.1 mrg to expand builtin functions depending on what configuration features 1539 1.1 mrg are available. This avoids library calls when the operation can be 1540 1.1 mrg performed in-line. */ 1541 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 1542 1.1 mrg do { \ 1543 1.1 mrg DWunion __w; \ 1544 1.1 mrg __w.ll = __builtin_umulsidi3 (u, v); \ 1545 1.1 mrg w1 = __w.s.high; \ 1546 1.1 mrg w0 = __w.s.low; \ 1547 1.1 mrg } while (0) 1548 1.1 mrg #define __umulsidi3(u, v) __builtin_umulsidi3 (u, v) 1549 1.1 mrg #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) 1550 1.1 mrg #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) 1551 1.1 mrg #endif /* __xtensa__ */ 1552 1.1 mrg 1553 1.1 mrg #if defined xstormy16 1554 1.1 mrg extern UHItype __stormy16_count_leading_zeros (UHItype); 1555 1.1 mrg #define count_leading_zeros(count, x) \ 1556 1.1 mrg do \ 1557 1.1 mrg { \ 1558 1.1 mrg UHItype size; \ 1559 1.1 mrg \ 1560 1.1 mrg /* We assume that W_TYPE_SIZE is a multiple of 16... */ \ 1561 1.1 mrg for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \ 1562 1.1 mrg { \ 1563 1.1 mrg UHItype c; \ 1564 1.1 mrg \ 1565 1.1 mrg c = __clzhi2 ((x) >> (size - 16)); \ 1566 1.1 mrg (count) += c; \ 1567 1.1 mrg if (c != 16) \ 1568 1.1 mrg break; \ 1569 1.1 mrg } \ 1570 1.1 mrg } \ 1571 1.1 mrg while (0) 1572 1.1 mrg #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE 1573 1.1 mrg #endif 1574 1.1 mrg 1575 1.1 mrg #if defined (__z8000__) && W_TYPE_SIZE == 16 1576 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1577 1.1 mrg __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ 1578 1.1 mrg : "=r" ((unsigned int)(sh)), \ 1579 1.1 mrg "=&r" ((unsigned int)(sl)) \ 1580 1.1 mrg : "%0" ((unsigned int)(ah)), \ 1581 1.1 mrg "r" ((unsigned int)(bh)), \ 1582 1.1 mrg "%1" ((unsigned int)(al)), \ 1583 1.1 mrg "rQR" ((unsigned int)(bl))) 1584 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1585 1.1 mrg __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ 1586 1.1 mrg : "=r" ((unsigned int)(sh)), \ 1587 1.1 mrg "=&r" ((unsigned int)(sl)) \ 1588 1.1 mrg : "0" ((unsigned int)(ah)), \ 1589 1.1 mrg "r" ((unsigned int)(bh)), \ 1590 1.1 mrg "1" ((unsigned int)(al)), \ 1591 1.1 mrg "rQR" ((unsigned int)(bl))) 1592 1.1 mrg #define umul_ppmm(xh, xl, m0, m1) \ 1593 1.1 mrg do { \ 1594 1.1 mrg union {long int __ll; \ 1595 1.1 mrg struct {unsigned int __h, __l;} __i; \ 1596 1.1 mrg } __xx; \ 1597 1.1 mrg unsigned int __m0 = (m0), __m1 = (m1); \ 1598 1.1 mrg __asm__ ("mult %S0,%H3" \ 1599 1.1 mrg : "=r" (__xx.__i.__h), \ 1600 1.1 mrg "=r" (__xx.__i.__l) \ 1601 1.1 mrg : "%1" (__m0), \ 1602 1.1 mrg "rQR" (__m1)); \ 1603 1.1 mrg (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1604 1.1 mrg (xh) += ((((signed int) __m0 >> 15) & __m1) \ 1605 1.1 mrg + (((signed int) __m1 >> 15) & __m0)); \ 1606 1.1 mrg } while (0) 1607 1.1 mrg #endif /* __z8000__ */ 1608 1.1 mrg 1609 1.1 mrg #endif /* __GNUC__ */ 1610 1.1 mrg 1611 1.1 mrg /* If this machine has no inline assembler, use C macros. */ 1612 1.1 mrg 1613 1.1 mrg #if !defined (add_ssaaaa) 1614 1.1 mrg #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1615 1.1 mrg do { \ 1616 1.1 mrg UWtype __x; \ 1617 1.1 mrg __x = (al) + (bl); \ 1618 1.1 mrg (sh) = (ah) + (bh) + (__x < (al)); \ 1619 1.1 mrg (sl) = __x; \ 1620 1.1 mrg } while (0) 1621 1.1 mrg #endif 1622 1.1 mrg 1623 1.1 mrg #if !defined (sub_ddmmss) 1624 1.1 mrg #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1625 1.1 mrg do { \ 1626 1.1 mrg UWtype __x; \ 1627 1.1 mrg __x = (al) - (bl); \ 1628 1.1 mrg (sh) = (ah) - (bh) - (__x > (al)); \ 1629 1.1 mrg (sl) = __x; \ 1630 1.1 mrg } while (0) 1631 1.1 mrg #endif 1632 1.1 mrg 1633 1.1 mrg /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of 1634 1.1 mrg smul_ppmm. */ 1635 1.1 mrg #if !defined (umul_ppmm) && defined (smul_ppmm) 1636 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 1637 1.1 mrg do { \ 1638 1.1 mrg UWtype __w1; \ 1639 1.1 mrg UWtype __xm0 = (u), __xm1 = (v); \ 1640 1.1 mrg smul_ppmm (__w1, w0, __xm0, __xm1); \ 1641 1.1 mrg (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ 1642 1.1 mrg + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ 1643 1.1 mrg } while (0) 1644 1.1 mrg #endif 1645 1.1 mrg 1646 1.1 mrg /* If we still don't have umul_ppmm, define it using plain C. */ 1647 1.1 mrg #if !defined (umul_ppmm) 1648 1.1 mrg #define umul_ppmm(w1, w0, u, v) \ 1649 1.1 mrg do { \ 1650 1.1 mrg UWtype __x0, __x1, __x2, __x3; \ 1651 1.1 mrg UHWtype __ul, __vl, __uh, __vh; \ 1652 1.1 mrg \ 1653 1.1 mrg __ul = __ll_lowpart (u); \ 1654 1.1 mrg __uh = __ll_highpart (u); \ 1655 1.1 mrg __vl = __ll_lowpart (v); \ 1656 1.1 mrg __vh = __ll_highpart (v); \ 1657 1.1 mrg \ 1658 1.1 mrg __x0 = (UWtype) __ul * __vl; \ 1659 1.1 mrg __x1 = (UWtype) __ul * __vh; \ 1660 1.1 mrg __x2 = (UWtype) __uh * __vl; \ 1661 1.1 mrg __x3 = (UWtype) __uh * __vh; \ 1662 1.1 mrg \ 1663 1.1 mrg __x1 += __ll_highpart (__x0);/* this can't give carry */ \ 1664 1.1 mrg __x1 += __x2; /* but this indeed can */ \ 1665 1.1 mrg if (__x1 < __x2) /* did we get it? */ \ 1666 1.1 mrg __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1667 1.1 mrg \ 1668 1.1 mrg (w1) = __x3 + __ll_highpart (__x1); \ 1669 1.1 mrg (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ 1670 1.1 mrg } while (0) 1671 1.1 mrg #endif 1672 1.1 mrg 1673 1.1 mrg #if !defined (__umulsidi3) 1674 1.1 mrg #define __umulsidi3(u, v) \ 1675 1.1 mrg ({DWunion __w; \ 1676 1.1 mrg umul_ppmm (__w.s.high, __w.s.low, u, v); \ 1677 1.1 mrg __w.ll; }) 1678 1.1 mrg #endif 1679 1.1 mrg 1680 1.1 mrg /* Define this unconditionally, so it can be used for debugging. */ 1681 1.1 mrg #define __udiv_qrnnd_c(q, r, n1, n0, d) \ 1682 1.1 mrg do { \ 1683 1.1 mrg UWtype __d1, __d0, __q1, __q0; \ 1684 1.1 mrg UWtype __r1, __r0, __m; \ 1685 1.1 mrg __d1 = __ll_highpart (d); \ 1686 1.1 mrg __d0 = __ll_lowpart (d); \ 1687 1.1 mrg \ 1688 1.1 mrg __r1 = (n1) % __d1; \ 1689 1.1 mrg __q1 = (n1) / __d1; \ 1690 1.1 mrg __m = (UWtype) __q1 * __d0; \ 1691 1.1 mrg __r1 = __r1 * __ll_B | __ll_highpart (n0); \ 1692 1.1 mrg if (__r1 < __m) \ 1693 1.1 mrg { \ 1694 1.1 mrg __q1--, __r1 += (d); \ 1695 1.1 mrg if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ 1696 1.1 mrg if (__r1 < __m) \ 1697 1.1 mrg __q1--, __r1 += (d); \ 1698 1.1 mrg } \ 1699 1.1 mrg __r1 -= __m; \ 1700 1.1 mrg \ 1701 1.1 mrg __r0 = __r1 % __d1; \ 1702 1.1 mrg __q0 = __r1 / __d1; \ 1703 1.1 mrg __m = (UWtype) __q0 * __d0; \ 1704 1.1 mrg __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ 1705 1.1 mrg if (__r0 < __m) \ 1706 1.1 mrg { \ 1707 1.1 mrg __q0--, __r0 += (d); \ 1708 1.1 mrg if (__r0 >= (d)) \ 1709 1.1 mrg if (__r0 < __m) \ 1710 1.1 mrg __q0--, __r0 += (d); \ 1711 1.1 mrg } \ 1712 1.1 mrg __r0 -= __m; \ 1713 1.1 mrg \ 1714 1.1 mrg (q) = (UWtype) __q1 * __ll_B | __q0; \ 1715 1.1 mrg (r) = __r0; \ 1716 1.1 mrg } while (0) 1717 1.1 mrg 1718 1.1 mrg /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through 1719 1.1 mrg __udiv_w_sdiv (defined in libgcc or elsewhere). */ 1720 1.1 mrg #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) 1721 1.1 mrg #define udiv_qrnnd(q, r, nh, nl, d) \ 1722 1.1 mrg do { \ 1723 1.1 mrg extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \ 1724 1.1 mrg UWtype __r; \ 1725 1.1 mrg (q) = __udiv_w_sdiv (&__r, nh, nl, d); \ 1726 1.1 mrg (r) = __r; \ 1727 1.1 mrg } while (0) 1728 1.1 mrg #endif 1729 1.1 mrg 1730 1.1 mrg /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ 1731 1.1 mrg #if !defined (udiv_qrnnd) 1732 1.1 mrg #define UDIV_NEEDS_NORMALIZATION 1 1733 1.1 mrg #define udiv_qrnnd __udiv_qrnnd_c 1734 1.1 mrg #endif 1735 1.1 mrg 1736 1.1 mrg #if !defined (count_leading_zeros) 1737 1.1 mrg #define count_leading_zeros(count, x) \ 1738 1.1 mrg do { \ 1739 1.1 mrg UWtype __xr = (x); \ 1740 1.1 mrg UWtype __a; \ 1741 1.1 mrg \ 1742 1.1 mrg if (W_TYPE_SIZE <= 32) \ 1743 1.1 mrg { \ 1744 1.1 mrg __a = __xr < ((UWtype)1<<2*__BITS4) \ 1745 1.1 mrg ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \ 1746 1.1 mrg : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ 1747 1.1 mrg } \ 1748 1.1 mrg else \ 1749 1.1 mrg { \ 1750 1.1 mrg for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ 1751 1.1 mrg if (((__xr >> __a) & 0xff) != 0) \ 1752 1.1 mrg break; \ 1753 1.1 mrg } \ 1754 1.1 mrg \ 1755 1.1 mrg (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ 1756 1.1 mrg } while (0) 1757 1.1 mrg #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE 1758 1.1 mrg #endif 1759 1.1 mrg 1760 1.1 mrg #if !defined (count_trailing_zeros) 1761 1.1 mrg /* Define count_trailing_zeros using count_leading_zeros. The latter might be 1762 1.1 mrg defined in asm, but if it is not, the C version above is good enough. */ 1763 1.1 mrg #define count_trailing_zeros(count, x) \ 1764 1.1 mrg do { \ 1765 1.1 mrg UWtype __ctz_x = (x); \ 1766 1.1 mrg UWtype __ctz_c; \ 1767 1.1 mrg count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ 1768 1.1 mrg (count) = W_TYPE_SIZE - 1 - __ctz_c; \ 1769 1.1 mrg } while (0) 1770 1.1 mrg #endif 1771 1.1 mrg 1772 1.1 mrg #ifndef UDIV_NEEDS_NORMALIZATION 1773 1.1 mrg #define UDIV_NEEDS_NORMALIZATION 0 1774 1.1 mrg #endif 1775