1 1.1 christos /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. 2 1.11 christos Copyright (C) 1991-2024 Free Software Foundation, Inc. 3 1.1 christos 4 1.1 christos This file is part of the GNU C Library. 5 1.1 christos 6 1.1 christos The GNU C Library is free software; you can redistribute it and/or 7 1.1 christos modify it under the terms of the GNU Lesser General Public 8 1.1 christos License as published by the Free Software Foundation; either 9 1.1 christos version 2.1 of the License, or (at your option) any later version. 10 1.1 christos 11 1.1 christos In addition to the permissions in the GNU Lesser General Public 12 1.1 christos License, the Free Software Foundation gives you unlimited 13 1.1 christos permission to link the compiled version of this file into 14 1.1 christos combinations with other programs, and to distribute those 15 1.1 christos combinations without any restriction coming from the use of this 16 1.1 christos file. (The Lesser General Public License restrictions do apply in 17 1.1 christos other respects; for example, they cover modification of the file, 18 1.1 christos and distribution when not linked into a combine executable.) 19 1.1 christos 20 1.1 christos The GNU C Library is distributed in the hope that it will be useful, 21 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of 22 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 1.1 christos Lesser General Public License for more details. 24 1.1 christos 25 1.1 christos You should have received a copy of the GNU Lesser General Public 26 1.1 christos License along with the GNU C Library; if not, see 27 1.1 christos <http://www.gnu.org/licenses/>. */ 28 1.1 christos 29 1.1 christos /* You have to define the following before including this file: 30 1.1 christos 31 1.1 christos UWtype -- An unsigned type, default type for operations (typically a "word") 32 1.1 christos UHWtype -- An unsigned type, at least half the size of UWtype. 33 1.1 christos UDWtype -- An unsigned type, at least twice as large a UWtype 34 1.1 christos W_TYPE_SIZE -- size in bits of UWtype 35 1.1 christos 36 1.1 christos UQItype -- Unsigned 8 bit type. 37 1.1 christos SItype, USItype -- Signed and unsigned 32 bit types. 38 1.1 christos DItype, UDItype -- Signed and unsigned 64 bit types. 39 1.1 christos 40 1.1 christos On a 32 bit machine UWtype should typically be USItype; 41 1.1 christos on a 64 bit machine, UWtype should typically be UDItype. */ 42 1.1 christos 43 1.1 christos #define __BITS4 (W_TYPE_SIZE / 4) 44 1.1 christos #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 45 1.1 christos #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 46 1.1 christos #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 47 1.1 christos 48 1.1 christos #ifndef W_TYPE_SIZE 49 1.1 christos #define W_TYPE_SIZE 32 50 1.1 christos #define UWtype USItype 51 1.1 christos #define UHWtype USItype 52 1.1 christos #define UDWtype UDItype 53 1.1 christos #endif 54 1.1 christos 55 1.1 christos /* Used in glibc only. */ 56 1.1 christos #ifndef attribute_hidden 57 1.1 christos #define attribute_hidden 58 1.1 christos #endif 59 1.1 christos 60 1.1 christos extern const UQItype __clz_tab[256] attribute_hidden; 61 1.1 christos 62 1.1 christos /* Define auxiliary asm macros. 63 1.1 christos 64 1.1 christos 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two 65 1.1 christos UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype 66 1.1 christos word product in HIGH_PROD and LOW_PROD. 67 1.1 christos 68 1.1 christos 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a 69 1.1 christos UDWtype product. This is just a variant of umul_ppmm. 70 1.1 christos 71 1.1 christos 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 72 1.1 christos denominator) divides a UDWtype, composed by the UWtype integers 73 1.1 christos HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient 74 1.1 christos in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less 75 1.1 christos than DENOMINATOR for correct operation. If, in addition, the most 76 1.1 christos significant bit of DENOMINATOR must be 1, then the pre-processor symbol 77 1.1 christos UDIV_NEEDS_NORMALIZATION is defined to 1. 78 1.1 christos 79 1.1 christos 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 80 1.1 christos denominator). Like udiv_qrnnd but the numbers are signed. The quotient 81 1.1 christos is rounded towards 0. 82 1.1 christos 83 1.1 christos 5) count_leading_zeros(count, x) counts the number of zero-bits from the 84 1.1 christos msb to the first nonzero bit in the UWtype X. This is the number of 85 1.1 christos steps X needs to be shifted left to set the msb. Undefined for X == 0, 86 1.1 christos unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 87 1.1 christos 88 1.1 christos 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts 89 1.1 christos from the least significant end. 90 1.1 christos 91 1.1 christos 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 92 1.1 christos high_addend_2, low_addend_2) adds two UWtype integers, composed by 93 1.1 christos HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 94 1.1 christos respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow 95 1.1 christos (i.e. carry out) is not stored anywhere, and is lost. 96 1.1 christos 97 1.1 christos 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, 98 1.1 christos high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, 99 1.1 christos composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and 100 1.1 christos LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE 101 1.1 christos and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, 102 1.1 christos and is lost. 103 1.1 christos 104 1.1 christos If any of these macros are left undefined for a particular CPU, 105 1.1 christos C macros are used. */ 106 1.1 christos 107 1.1 christos /* The CPUs come in alphabetical order below. 108 1.1 christos 109 1.1 christos Please add support for more CPUs here, or improve the current support 110 1.1 christos for the CPUs below! 111 1.1 christos (E.g. WE32100, IBM360.) */ 112 1.1 christos 113 1.1 christos #if defined (__GNUC__) && !defined (NO_ASM) 114 1.1 christos 115 1.1 christos /* We sometimes need to clobber "cc" with gcc2, but that would not be 116 1.1 christos understood by gcc1. Use cpp to avoid major code duplication. */ 117 1.1 christos #if __GNUC__ < 2 118 1.1 christos #define __CLOBBER_CC 119 1.1 christos #define __AND_CLOBBER_CC 120 1.1 christos #else /* __GNUC__ >= 2 */ 121 1.1 christos #define __CLOBBER_CC : "cc" 122 1.1 christos #define __AND_CLOBBER_CC , "cc" 123 1.1 christos #endif /* __GNUC__ < 2 */ 124 1.1 christos 125 1.3 christos #if defined (__aarch64__) 126 1.3 christos 127 1.3 christos #if W_TYPE_SIZE == 32 128 1.3 christos #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) 129 1.3 christos #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) 130 1.3 christos #define COUNT_LEADING_ZEROS_0 32 131 1.3 christos #endif /* W_TYPE_SIZE == 32 */ 132 1.3 christos 133 1.3 christos #if W_TYPE_SIZE == 64 134 1.3 christos #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X)) 135 1.3 christos #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X)) 136 1.3 christos #define COUNT_LEADING_ZEROS_0 64 137 1.3 christos #endif /* W_TYPE_SIZE == 64 */ 138 1.3 christos 139 1.3 christos #endif /* __aarch64__ */ 140 1.3 christos 141 1.1 christos #if defined (__alpha) && W_TYPE_SIZE == 64 142 1.6 christos /* There is a bug in g++ before version 5 that 143 1.6 christos errors on __builtin_alpha_umulh. */ 144 1.6 christos #if !defined(__cplusplus) || __GNUC__ >= 5 145 1.1 christos #define umul_ppmm(ph, pl, m0, m1) \ 146 1.1 christos do { \ 147 1.1 christos UDItype __m0 = (m0), __m1 = (m1); \ 148 1.1 christos (ph) = __builtin_alpha_umulh (__m0, __m1); \ 149 1.1 christos (pl) = __m0 * __m1; \ 150 1.1 christos } while (0) 151 1.1 christos #define UMUL_TIME 46 152 1.6 christos #endif /* !c++ */ 153 1.1 christos #ifndef LONGLONG_STANDALONE 154 1.1 christos #define udiv_qrnnd(q, r, n1, n0, d) \ 155 1.1 christos do { UDItype __r; \ 156 1.1 christos (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ 157 1.1 christos (r) = __r; \ 158 1.1 christos } while (0) 159 1.1 christos extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype); 160 1.1 christos #define UDIV_TIME 220 161 1.1 christos #endif /* LONGLONG_STANDALONE */ 162 1.1 christos #ifdef __alpha_cix__ 163 1.1 christos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) 164 1.1 christos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) 165 1.1 christos #define COUNT_LEADING_ZEROS_0 64 166 1.1 christos #else 167 1.1 christos #define count_leading_zeros(COUNT,X) \ 168 1.1 christos do { \ 169 1.1 christos UDItype __xr = (X), __t, __a; \ 170 1.1 christos __t = __builtin_alpha_cmpbge (0, __xr); \ 171 1.1 christos __a = __clz_tab[__t ^ 0xff] - 1; \ 172 1.1 christos __t = __builtin_alpha_extbl (__xr, __a); \ 173 1.1 christos (COUNT) = 64 - (__clz_tab[__t] + __a*8); \ 174 1.1 christos } while (0) 175 1.1 christos #define count_trailing_zeros(COUNT,X) \ 176 1.1 christos do { \ 177 1.1 christos UDItype __xr = (X), __t, __a; \ 178 1.1 christos __t = __builtin_alpha_cmpbge (0, __xr); \ 179 1.1 christos __t = ~__t & -~__t; \ 180 1.1 christos __a = ((__t & 0xCC) != 0) * 2; \ 181 1.1 christos __a += ((__t & 0xF0) != 0) * 4; \ 182 1.1 christos __a += ((__t & 0xAA) != 0); \ 183 1.1 christos __t = __builtin_alpha_extbl (__xr, __a); \ 184 1.1 christos __a <<= 3; \ 185 1.1 christos __t &= -__t; \ 186 1.1 christos __a += ((__t & 0xCC) != 0) * 2; \ 187 1.1 christos __a += ((__t & 0xF0) != 0) * 4; \ 188 1.1 christos __a += ((__t & 0xAA) != 0); \ 189 1.1 christos (COUNT) = __a; \ 190 1.1 christos } while (0) 191 1.1 christos #endif /* __alpha_cix__ */ 192 1.1 christos #endif /* __alpha */ 193 1.1 christos 194 1.1 christos #if defined (__arc__) && W_TYPE_SIZE == 32 195 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 196 1.1 christos __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \ 197 1.1 christos : "=r" ((USItype) (sh)), \ 198 1.1 christos "=&r" ((USItype) (sl)) \ 199 1.1 christos : "%r" ((USItype) (ah)), \ 200 1.8 christos "rICal" ((USItype) (bh)), \ 201 1.1 christos "%r" ((USItype) (al)), \ 202 1.9 christos "rICal" ((USItype) (bl)) \ 203 1.9 christos : "cc") 204 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 205 1.1 christos __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \ 206 1.1 christos : "=r" ((USItype) (sh)), \ 207 1.1 christos "=&r" ((USItype) (sl)) \ 208 1.1 christos : "r" ((USItype) (ah)), \ 209 1.8 christos "rICal" ((USItype) (bh)), \ 210 1.1 christos "r" ((USItype) (al)), \ 211 1.9 christos "rICal" ((USItype) (bl)) \ 212 1.9 christos : "cc") 213 1.1 christos 214 1.1 christos #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) 215 1.1 christos #ifdef __ARC_NORM__ 216 1.1 christos #define count_leading_zeros(count, x) \ 217 1.1 christos do \ 218 1.1 christos { \ 219 1.1 christos SItype c_; \ 220 1.1 christos \ 221 1.1 christos __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\ 222 1.1 christos (count) = c_ + 1; \ 223 1.1 christos } \ 224 1.1 christos while (0) 225 1.1 christos #define COUNT_LEADING_ZEROS_0 32 226 1.8 christos #endif /* __ARC_NORM__ */ 227 1.8 christos #endif /* __arc__ */ 228 1.1 christos 229 1.1 christos #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \ 230 1.1 christos && W_TYPE_SIZE == 32 231 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 232 1.1 christos __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \ 233 1.1 christos : "=r" ((USItype) (sh)), \ 234 1.1 christos "=&r" ((USItype) (sl)) \ 235 1.1 christos : "%r" ((USItype) (ah)), \ 236 1.1 christos "rI" ((USItype) (bh)), \ 237 1.1 christos "%r" ((USItype) (al)), \ 238 1.1 christos "rI" ((USItype) (bl)) __CLOBBER_CC) 239 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 240 1.1 christos __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \ 241 1.1 christos : "=r" ((USItype) (sh)), \ 242 1.1 christos "=&r" ((USItype) (sl)) \ 243 1.1 christos : "r" ((USItype) (ah)), \ 244 1.1 christos "rI" ((USItype) (bh)), \ 245 1.1 christos "r" ((USItype) (al)), \ 246 1.1 christos "rI" ((USItype) (bl)) __CLOBBER_CC) 247 1.1 christos # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \ 248 1.1 christos || defined(__ARM_ARCH_3__) 249 1.1 christos # define umul_ppmm(xh, xl, a, b) \ 250 1.1 christos do { \ 251 1.1 christos register USItype __t0, __t1, __t2; \ 252 1.1 christos __asm__ ("%@ Inlined umul_ppmm\n" \ 253 1.1 christos " mov %2, %5, lsr #16\n" \ 254 1.1 christos " mov %0, %6, lsr #16\n" \ 255 1.1 christos " bic %3, %5, %2, lsl #16\n" \ 256 1.1 christos " bic %4, %6, %0, lsl #16\n" \ 257 1.1 christos " mul %1, %3, %4\n" \ 258 1.1 christos " mul %4, %2, %4\n" \ 259 1.1 christos " mul %3, %0, %3\n" \ 260 1.1 christos " mul %0, %2, %0\n" \ 261 1.1 christos " adds %3, %4, %3\n" \ 262 1.1 christos " addcs %0, %0, #65536\n" \ 263 1.1 christos " adds %1, %1, %3, lsl #16\n" \ 264 1.1 christos " adc %0, %0, %3, lsr #16" \ 265 1.1 christos : "=&r" ((USItype) (xh)), \ 266 1.1 christos "=r" ((USItype) (xl)), \ 267 1.1 christos "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ 268 1.1 christos : "r" ((USItype) (a)), \ 269 1.1 christos "r" ((USItype) (b)) __CLOBBER_CC ); \ 270 1.1 christos } while (0) 271 1.1 christos # define UMUL_TIME 20 272 1.1 christos # else 273 1.1 christos # define umul_ppmm(xh, xl, a, b) \ 274 1.1 christos do { \ 275 1.1 christos /* Generate umull, under compiler control. */ \ 276 1.1 christos register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \ 277 1.1 christos (xl) = (USItype)__t0; \ 278 1.1 christos (xh) = (USItype)(__t0 >> 32); \ 279 1.1 christos } while (0) 280 1.1 christos # define UMUL_TIME 3 281 1.1 christos # endif 282 1.1 christos # define UDIV_TIME 100 283 1.1 christos #endif /* __arm__ */ 284 1.1 christos 285 1.1 christos #if defined(__arm__) 286 1.1 christos /* Let gcc decide how best to implement count_leading_zeros. */ 287 1.1 christos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 288 1.1 christos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) 289 1.1 christos #define COUNT_LEADING_ZEROS_0 32 290 1.1 christos #endif 291 1.1 christos 292 1.1 christos #if defined (__AVR__) 293 1.1 christos 294 1.1 christos #if W_TYPE_SIZE == 16 295 1.1 christos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 296 1.1 christos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) 297 1.1 christos #define COUNT_LEADING_ZEROS_0 16 298 1.1 christos #endif /* W_TYPE_SIZE == 16 */ 299 1.1 christos 300 1.1 christos #if W_TYPE_SIZE == 32 301 1.1 christos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) 302 1.1 christos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) 303 1.1 christos #define COUNT_LEADING_ZEROS_0 32 304 1.1 christos #endif /* W_TYPE_SIZE == 32 */ 305 1.1 christos 306 1.1 christos #if W_TYPE_SIZE == 64 307 1.1 christos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X)) 308 1.1 christos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X)) 309 1.1 christos #define COUNT_LEADING_ZEROS_0 64 310 1.1 christos #endif /* W_TYPE_SIZE == 64 */ 311 1.1 christos 312 1.1 christos #endif /* defined (__AVR__) */ 313 1.1 christos 314 1.1 christos #if defined (__CRIS__) 315 1.1 christos 316 1.1 christos #if __CRIS_arch_version >= 3 317 1.1 christos #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) 318 1.1 christos #define COUNT_LEADING_ZEROS_0 32 319 1.1 christos #endif /* __CRIS_arch_version >= 3 */ 320 1.1 christos 321 1.1 christos #if __CRIS_arch_version >= 8 322 1.1 christos #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) 323 1.1 christos #endif /* __CRIS_arch_version >= 8 */ 324 1.1 christos 325 1.1 christos #if __CRIS_arch_version >= 10 326 1.1 christos #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v)) 327 1.1 christos #else 328 1.1 christos #define __umulsidi3 __umulsidi3 329 1.1 christos extern UDItype __umulsidi3 (USItype, USItype); 330 1.1 christos #endif /* __CRIS_arch_version >= 10 */ 331 1.1 christos 332 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 333 1.1 christos do { \ 334 1.1 christos UDItype __x = __umulsidi3 (u, v); \ 335 1.1 christos (w0) = (USItype) (__x); \ 336 1.1 christos (w1) = (USItype) (__x >> 32); \ 337 1.1 christos } while (0) 338 1.1 christos 339 1.1 christos /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for 340 1.1 christos DFmode ("double" intrinsics, avoiding two of the three insns handling 341 1.1 christos carry), but defining them as open-code C composing and doing the 342 1.1 christos operation in DImode (UDImode) shows that the DImode needs work: 343 1.1 christos register pressure from requiring neighboring registers and the 344 1.1 christos traffic to and from them come to dominate, in the 4.7 series. */ 345 1.1 christos 346 1.1 christos #endif /* defined (__CRIS__) */ 347 1.1 christos 348 1.1 christos #if defined (__hppa) && W_TYPE_SIZE == 32 349 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 350 1.1 christos __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \ 351 1.1 christos : "=r" ((USItype) (sh)), \ 352 1.1 christos "=&r" ((USItype) (sl)) \ 353 1.1 christos : "%rM" ((USItype) (ah)), \ 354 1.1 christos "rM" ((USItype) (bh)), \ 355 1.1 christos "%rM" ((USItype) (al)), \ 356 1.1 christos "rM" ((USItype) (bl))) 357 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 358 1.1 christos __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \ 359 1.1 christos : "=r" ((USItype) (sh)), \ 360 1.1 christos "=&r" ((USItype) (sl)) \ 361 1.1 christos : "rM" ((USItype) (ah)), \ 362 1.1 christos "rM" ((USItype) (bh)), \ 363 1.1 christos "rM" ((USItype) (al)), \ 364 1.1 christos "rM" ((USItype) (bl))) 365 1.1 christos #if defined (_PA_RISC1_1) 366 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 367 1.1 christos do { \ 368 1.1 christos union \ 369 1.1 christos { \ 370 1.1 christos UDItype __f; \ 371 1.1 christos struct {USItype __w1, __w0;} __w1w0; \ 372 1.1 christos } __t; \ 373 1.1 christos __asm__ ("xmpyu %1,%2,%0" \ 374 1.1 christos : "=x" (__t.__f) \ 375 1.1 christos : "x" ((USItype) (u)), \ 376 1.1 christos "x" ((USItype) (v))); \ 377 1.1 christos (w1) = __t.__w1w0.__w1; \ 378 1.1 christos (w0) = __t.__w1w0.__w0; \ 379 1.1 christos } while (0) 380 1.1 christos #define UMUL_TIME 8 381 1.1 christos #else 382 1.1 christos #define UMUL_TIME 30 383 1.1 christos #endif 384 1.1 christos #define UDIV_TIME 40 385 1.1 christos #define count_leading_zeros(count, x) \ 386 1.1 christos do { \ 387 1.1 christos USItype __tmp; \ 388 1.1 christos __asm__ ( \ 389 1.1 christos "ldi 1,%0\n" \ 390 1.1 christos " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ 391 1.1 christos " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\ 392 1.1 christos " ldo 16(%0),%0 ; Yes. Perform add.\n" \ 393 1.1 christos " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ 394 1.1 christos " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\ 395 1.1 christos " ldo 8(%0),%0 ; Yes. Perform add.\n" \ 396 1.1 christos " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ 397 1.1 christos " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\ 398 1.1 christos " ldo 4(%0),%0 ; Yes. Perform add.\n" \ 399 1.1 christos " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ 400 1.1 christos " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\ 401 1.1 christos " ldo 2(%0),%0 ; Yes. Perform add.\n" \ 402 1.1 christos " extru %1,30,1,%1 ; Extract bit 1.\n" \ 403 1.1 christos " sub %0,%1,%0 ; Subtract it.\n" \ 404 1.1 christos : "=r" (count), "=r" (__tmp) : "1" (x)); \ 405 1.1 christos } while (0) 406 1.1 christos #endif 407 1.1 christos 408 1.1 christos #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 409 1.1 christos #if !defined (__zarch__) 410 1.1 christos #define smul_ppmm(xh, xl, m0, m1) \ 411 1.1 christos do { \ 412 1.1 christos union {DItype __ll; \ 413 1.1 christos struct {USItype __h, __l;} __i; \ 414 1.1 christos } __x; \ 415 1.1 christos __asm__ ("lr %N0,%1\n\tmr %0,%2" \ 416 1.1 christos : "=&r" (__x.__ll) \ 417 1.1 christos : "r" (m0), "r" (m1)); \ 418 1.1 christos (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ 419 1.1 christos } while (0) 420 1.1 christos #define sdiv_qrnnd(q, r, n1, n0, d) \ 421 1.1 christos do { \ 422 1.1 christos union {DItype __ll; \ 423 1.1 christos struct {USItype __h, __l;} __i; \ 424 1.1 christos } __x; \ 425 1.1 christos __x.__i.__h = n1; __x.__i.__l = n0; \ 426 1.1 christos __asm__ ("dr %0,%2" \ 427 1.1 christos : "=r" (__x.__ll) \ 428 1.1 christos : "0" (__x.__ll), "r" (d)); \ 429 1.1 christos (q) = __x.__i.__l; (r) = __x.__i.__h; \ 430 1.1 christos } while (0) 431 1.1 christos #else 432 1.1 christos #define smul_ppmm(xh, xl, m0, m1) \ 433 1.1 christos do { \ 434 1.1 christos register SItype __r0 __asm__ ("0"); \ 435 1.1 christos register SItype __r1 __asm__ ("1") = (m0); \ 436 1.1 christos \ 437 1.1 christos __asm__ ("mr\t%%r0,%3" \ 438 1.1 christos : "=r" (__r0), "=r" (__r1) \ 439 1.1 christos : "r" (__r1), "r" (m1)); \ 440 1.1 christos (xh) = __r0; (xl) = __r1; \ 441 1.1 christos } while (0) 442 1.1 christos 443 1.1 christos #define sdiv_qrnnd(q, r, n1, n0, d) \ 444 1.1 christos do { \ 445 1.1 christos register SItype __r0 __asm__ ("0") = (n1); \ 446 1.1 christos register SItype __r1 __asm__ ("1") = (n0); \ 447 1.1 christos \ 448 1.1 christos __asm__ ("dr\t%%r0,%4" \ 449 1.1 christos : "=r" (__r0), "=r" (__r1) \ 450 1.1 christos : "r" (__r0), "r" (__r1), "r" (d)); \ 451 1.1 christos (q) = __r1; (r) = __r0; \ 452 1.1 christos } while (0) 453 1.1 christos #endif /* __zarch__ */ 454 1.1 christos #endif 455 1.1 christos 456 1.1 christos #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 457 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 458 1.1 christos __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \ 459 1.1 christos : "=r" ((USItype) (sh)), \ 460 1.1 christos "=&r" ((USItype) (sl)) \ 461 1.1 christos : "%0" ((USItype) (ah)), \ 462 1.1 christos "g" ((USItype) (bh)), \ 463 1.1 christos "%1" ((USItype) (al)), \ 464 1.1 christos "g" ((USItype) (bl))) 465 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 466 1.1 christos __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \ 467 1.1 christos : "=r" ((USItype) (sh)), \ 468 1.1 christos "=&r" ((USItype) (sl)) \ 469 1.1 christos : "0" ((USItype) (ah)), \ 470 1.1 christos "g" ((USItype) (bh)), \ 471 1.1 christos "1" ((USItype) (al)), \ 472 1.1 christos "g" ((USItype) (bl))) 473 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 474 1.1 christos __asm__ ("mul{l} %3" \ 475 1.1 christos : "=a" ((USItype) (w0)), \ 476 1.1 christos "=d" ((USItype) (w1)) \ 477 1.1 christos : "%0" ((USItype) (u)), \ 478 1.1 christos "rm" ((USItype) (v))) 479 1.1 christos #define udiv_qrnnd(q, r, n1, n0, dv) \ 480 1.1 christos __asm__ ("div{l} %4" \ 481 1.1 christos : "=a" ((USItype) (q)), \ 482 1.1 christos "=d" ((USItype) (r)) \ 483 1.1 christos : "0" ((USItype) (n0)), \ 484 1.1 christos "1" ((USItype) (n1)), \ 485 1.1 christos "rm" ((USItype) (dv))) 486 1.1 christos #define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) 487 1.1 christos #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) 488 1.1 christos #define UMUL_TIME 40 489 1.1 christos #define UDIV_TIME 40 490 1.1 christos #endif /* 80x86 */ 491 1.1 christos 492 1.3 christos #if defined (__x86_64__) && W_TYPE_SIZE == 64 493 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 494 1.1 christos __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \ 495 1.1 christos : "=r" ((UDItype) (sh)), \ 496 1.1 christos "=&r" ((UDItype) (sl)) \ 497 1.1 christos : "%0" ((UDItype) (ah)), \ 498 1.1 christos "rme" ((UDItype) (bh)), \ 499 1.1 christos "%1" ((UDItype) (al)), \ 500 1.1 christos "rme" ((UDItype) (bl))) 501 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 502 1.1 christos __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \ 503 1.1 christos : "=r" ((UDItype) (sh)), \ 504 1.1 christos "=&r" ((UDItype) (sl)) \ 505 1.1 christos : "0" ((UDItype) (ah)), \ 506 1.1 christos "rme" ((UDItype) (bh)), \ 507 1.1 christos "1" ((UDItype) (al)), \ 508 1.1 christos "rme" ((UDItype) (bl))) 509 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 510 1.1 christos __asm__ ("mul{q} %3" \ 511 1.1 christos : "=a" ((UDItype) (w0)), \ 512 1.1 christos "=d" ((UDItype) (w1)) \ 513 1.1 christos : "%0" ((UDItype) (u)), \ 514 1.1 christos "rm" ((UDItype) (v))) 515 1.1 christos #define udiv_qrnnd(q, r, n1, n0, dv) \ 516 1.1 christos __asm__ ("div{q} %4" \ 517 1.1 christos : "=a" ((UDItype) (q)), \ 518 1.1 christos "=d" ((UDItype) (r)) \ 519 1.1 christos : "0" ((UDItype) (n0)), \ 520 1.1 christos "1" ((UDItype) (n1)), \ 521 1.1 christos "rm" ((UDItype) (dv))) 522 1.1 christos #define count_leading_zeros(count, x) ((count) = __builtin_clzll (x)) 523 1.1 christos #define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x)) 524 1.1 christos #define UMUL_TIME 40 525 1.1 christos #define UDIV_TIME 40 526 1.1 christos #endif /* x86_64 */ 527 1.1 christos 528 1.1 christos #if defined (__i960__) && W_TYPE_SIZE == 32 529 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 530 1.1 christos ({union {UDItype __ll; \ 531 1.1 christos struct {USItype __l, __h;} __i; \ 532 1.1 christos } __xx; \ 533 1.1 christos __asm__ ("emul %2,%1,%0" \ 534 1.1 christos : "=d" (__xx.__ll) \ 535 1.1 christos : "%dI" ((USItype) (u)), \ 536 1.1 christos "dI" ((USItype) (v))); \ 537 1.1 christos (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) 538 1.1 christos #define __umulsidi3(u, v) \ 539 1.1 christos ({UDItype __w; \ 540 1.1 christos __asm__ ("emul %2,%1,%0" \ 541 1.1 christos : "=d" (__w) \ 542 1.1 christos : "%dI" ((USItype) (u)), \ 543 1.1 christos "dI" ((USItype) (v))); \ 544 1.1 christos __w; }) 545 1.1 christos #endif /* __i960__ */ 546 1.1 christos 547 1.1 christos #if defined (__ia64) && W_TYPE_SIZE == 64 548 1.1 christos /* This form encourages gcc (pre-release 3.4 at least) to emit predicated 549 1.1 christos "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic 550 1.1 christos code using "al<bl" arithmetically comes out making an actual 0 or 1 in a 551 1.1 christos register, which takes an extra cycle. */ 552 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 553 1.1 christos do { \ 554 1.1 christos UWtype __x; \ 555 1.1 christos __x = (al) - (bl); \ 556 1.1 christos if ((al) < (bl)) \ 557 1.1 christos (sh) = (ah) - (bh) - 1; \ 558 1.1 christos else \ 559 1.1 christos (sh) = (ah) - (bh); \ 560 1.1 christos (sl) = __x; \ 561 1.1 christos } while (0) 562 1.1 christos 563 1.1 christos /* Do both product parts in assembly, since that gives better code with 564 1.1 christos all gcc versions. Some callers will just use the upper part, and in 565 1.1 christos that situation we waste an instruction, but not any cycles. */ 566 1.1 christos #define umul_ppmm(ph, pl, m0, m1) \ 567 1.1 christos __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \ 568 1.1 christos : "=&f" (ph), "=f" (pl) \ 569 1.1 christos : "f" (m0), "f" (m1)) 570 1.1 christos #define count_leading_zeros(count, x) \ 571 1.1 christos do { \ 572 1.1 christos UWtype _x = (x), _y, _a, _c; \ 573 1.1 christos __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \ 574 1.1 christos __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \ 575 1.1 christos _c = (_a - 1) << 3; \ 576 1.1 christos _x >>= _c; \ 577 1.1 christos if (_x >= 1 << 4) \ 578 1.1 christos _x >>= 4, _c += 4; \ 579 1.1 christos if (_x >= 1 << 2) \ 580 1.1 christos _x >>= 2, _c += 2; \ 581 1.1 christos _c += _x >> 1; \ 582 1.1 christos (count) = W_TYPE_SIZE - 1 - _c; \ 583 1.1 christos } while (0) 584 1.1 christos /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1 585 1.1 christos based, and we don't need a special case for x==0 here */ 586 1.1 christos #define count_trailing_zeros(count, x) \ 587 1.1 christos do { \ 588 1.1 christos UWtype __ctz_x = (x); \ 589 1.1 christos __asm__ ("popcnt %0 = %1" \ 590 1.1 christos : "=r" (count) \ 591 1.1 christos : "r" ((__ctz_x-1) & ~__ctz_x)); \ 592 1.1 christos } while (0) 593 1.1 christos #define UMUL_TIME 14 594 1.1 christos #endif 595 1.1 christos 596 1.11 christos #ifdef __loongarch__ 597 1.11 christos # if W_TYPE_SIZE == 32 598 1.11 christos # define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) 599 1.11 christos # define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) 600 1.11 christos # define COUNT_LEADING_ZEROS_0 32 601 1.11 christos # elif W_TYPE_SIZE == 64 602 1.11 christos # define count_leading_zeros(count, x) ((count) = __builtin_clzll (x)) 603 1.11 christos # define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x)) 604 1.11 christos # define COUNT_LEADING_ZEROS_0 64 605 1.11 christos # endif 606 1.11 christos #endif 607 1.11 christos 608 1.1 christos #if defined (__M32R__) && W_TYPE_SIZE == 32 609 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 610 1.1 christos /* The cmp clears the condition bit. */ \ 611 1.1 christos __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \ 612 1.1 christos : "=r" ((USItype) (sh)), \ 613 1.1 christos "=&r" ((USItype) (sl)) \ 614 1.1 christos : "0" ((USItype) (ah)), \ 615 1.1 christos "r" ((USItype) (bh)), \ 616 1.1 christos "1" ((USItype) (al)), \ 617 1.1 christos "r" ((USItype) (bl)) \ 618 1.1 christos : "cbit") 619 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 620 1.1 christos /* The cmp clears the condition bit. */ \ 621 1.1 christos __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \ 622 1.1 christos : "=r" ((USItype) (sh)), \ 623 1.1 christos "=&r" ((USItype) (sl)) \ 624 1.1 christos : "0" ((USItype) (ah)), \ 625 1.1 christos "r" ((USItype) (bh)), \ 626 1.1 christos "1" ((USItype) (al)), \ 627 1.1 christos "r" ((USItype) (bl)) \ 628 1.1 christos : "cbit") 629 1.1 christos #endif /* __M32R__ */ 630 1.1 christos 631 1.1 christos #if defined (__mc68000__) && W_TYPE_SIZE == 32 632 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 633 1.1 christos __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ 634 1.1 christos : "=d" ((USItype) (sh)), \ 635 1.1 christos "=&d" ((USItype) (sl)) \ 636 1.1 christos : "%0" ((USItype) (ah)), \ 637 1.1 christos "d" ((USItype) (bh)), \ 638 1.1 christos "%1" ((USItype) (al)), \ 639 1.1 christos "g" ((USItype) (bl))) 640 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 641 1.1 christos __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ 642 1.1 christos : "=d" ((USItype) (sh)), \ 643 1.1 christos "=&d" ((USItype) (sl)) \ 644 1.1 christos : "0" ((USItype) (ah)), \ 645 1.1 christos "d" ((USItype) (bh)), \ 646 1.1 christos "1" ((USItype) (al)), \ 647 1.1 christos "g" ((USItype) (bl))) 648 1.1 christos 649 1.1 christos /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */ 650 1.1 christos #if (defined (__mc68020__) && !defined (__mc68060__)) 651 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 652 1.1 christos __asm__ ("mulu%.l %3,%1:%0" \ 653 1.1 christos : "=d" ((USItype) (w0)), \ 654 1.1 christos "=d" ((USItype) (w1)) \ 655 1.1 christos : "%0" ((USItype) (u)), \ 656 1.1 christos "dmi" ((USItype) (v))) 657 1.1 christos #define UMUL_TIME 45 658 1.1 christos #define udiv_qrnnd(q, r, n1, n0, d) \ 659 1.1 christos __asm__ ("divu%.l %4,%1:%0" \ 660 1.1 christos : "=d" ((USItype) (q)), \ 661 1.1 christos "=d" ((USItype) (r)) \ 662 1.1 christos : "0" ((USItype) (n0)), \ 663 1.1 christos "1" ((USItype) (n1)), \ 664 1.1 christos "dmi" ((USItype) (d))) 665 1.1 christos #define UDIV_TIME 90 666 1.1 christos #define sdiv_qrnnd(q, r, n1, n0, d) \ 667 1.1 christos __asm__ ("divs%.l %4,%1:%0" \ 668 1.1 christos : "=d" ((USItype) (q)), \ 669 1.1 christos "=d" ((USItype) (r)) \ 670 1.1 christos : "0" ((USItype) (n0)), \ 671 1.1 christos "1" ((USItype) (n1)), \ 672 1.1 christos "dmi" ((USItype) (d))) 673 1.1 christos 674 1.1 christos #elif defined (__mcoldfire__) /* not mc68020 */ 675 1.1 christos 676 1.1 christos #define umul_ppmm(xh, xl, a, b) \ 677 1.1 christos __asm__ ("| Inlined umul_ppmm\n" \ 678 1.1 christos " move%.l %2,%/d0\n" \ 679 1.1 christos " move%.l %3,%/d1\n" \ 680 1.1 christos " move%.l %/d0,%/d2\n" \ 681 1.1 christos " swap %/d0\n" \ 682 1.1 christos " move%.l %/d1,%/d3\n" \ 683 1.1 christos " swap %/d1\n" \ 684 1.1 christos " move%.w %/d2,%/d4\n" \ 685 1.1 christos " mulu %/d3,%/d4\n" \ 686 1.1 christos " mulu %/d1,%/d2\n" \ 687 1.1 christos " mulu %/d0,%/d3\n" \ 688 1.1 christos " mulu %/d0,%/d1\n" \ 689 1.1 christos " move%.l %/d4,%/d0\n" \ 690 1.1 christos " clr%.w %/d0\n" \ 691 1.1 christos " swap %/d0\n" \ 692 1.1 christos " add%.l %/d0,%/d2\n" \ 693 1.1 christos " add%.l %/d3,%/d2\n" \ 694 1.1 christos " jcc 1f\n" \ 695 1.1 christos " add%.l %#65536,%/d1\n" \ 696 1.1 christos "1: swap %/d2\n" \ 697 1.1 christos " moveq %#0,%/d0\n" \ 698 1.1 christos " move%.w %/d2,%/d0\n" \ 699 1.1 christos " move%.w %/d4,%/d2\n" \ 700 1.1 christos " move%.l %/d2,%1\n" \ 701 1.1 christos " add%.l %/d1,%/d0\n" \ 702 1.1 christos " move%.l %/d0,%0" \ 703 1.1 christos : "=g" ((USItype) (xh)), \ 704 1.1 christos "=g" ((USItype) (xl)) \ 705 1.1 christos : "g" ((USItype) (a)), \ 706 1.1 christos "g" ((USItype) (b)) \ 707 1.1 christos : "d0", "d1", "d2", "d3", "d4") 708 1.1 christos #define UMUL_TIME 100 709 1.1 christos #define UDIV_TIME 400 710 1.1 christos #else /* not ColdFire */ 711 1.1 christos /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */ 712 1.1 christos #define umul_ppmm(xh, xl, a, b) \ 713 1.1 christos __asm__ ("| Inlined umul_ppmm\n" \ 714 1.1 christos " move%.l %2,%/d0\n" \ 715 1.1 christos " move%.l %3,%/d1\n" \ 716 1.1 christos " move%.l %/d0,%/d2\n" \ 717 1.1 christos " swap %/d0\n" \ 718 1.1 christos " move%.l %/d1,%/d3\n" \ 719 1.1 christos " swap %/d1\n" \ 720 1.1 christos " move%.w %/d2,%/d4\n" \ 721 1.1 christos " mulu %/d3,%/d4\n" \ 722 1.1 christos " mulu %/d1,%/d2\n" \ 723 1.1 christos " mulu %/d0,%/d3\n" \ 724 1.1 christos " mulu %/d0,%/d1\n" \ 725 1.1 christos " move%.l %/d4,%/d0\n" \ 726 1.1 christos " eor%.w %/d0,%/d0\n" \ 727 1.1 christos " swap %/d0\n" \ 728 1.1 christos " add%.l %/d0,%/d2\n" \ 729 1.1 christos " add%.l %/d3,%/d2\n" \ 730 1.1 christos " jcc 1f\n" \ 731 1.1 christos " add%.l %#65536,%/d1\n" \ 732 1.1 christos "1: swap %/d2\n" \ 733 1.1 christos " moveq %#0,%/d0\n" \ 734 1.1 christos " move%.w %/d2,%/d0\n" \ 735 1.1 christos " move%.w %/d4,%/d2\n" \ 736 1.1 christos " move%.l %/d2,%1\n" \ 737 1.1 christos " add%.l %/d1,%/d0\n" \ 738 1.1 christos " move%.l %/d0,%0" \ 739 1.1 christos : "=g" ((USItype) (xh)), \ 740 1.1 christos "=g" ((USItype) (xl)) \ 741 1.1 christos : "g" ((USItype) (a)), \ 742 1.1 christos "g" ((USItype) (b)) \ 743 1.1 christos : "d0", "d1", "d2", "d3", "d4") 744 1.1 christos #define UMUL_TIME 100 745 1.1 christos #define UDIV_TIME 400 746 1.1 christos 747 1.1 christos #endif /* not mc68020 */ 748 1.1 christos 749 1.1 christos /* The '020, '030, '040 and '060 have bitfield insns. 750 1.1 christos cpu32 disguises as a 68020, but lacks them. */ 751 1.1 christos #if defined (__mc68020__) && !defined (__mcpu32__) 752 1.1 christos #define count_leading_zeros(count, x) \ 753 1.1 christos __asm__ ("bfffo %1{%b2:%b2},%0" \ 754 1.1 christos : "=d" ((USItype) (count)) \ 755 1.1 christos : "od" ((USItype) (x)), "n" (0)) 756 1.1 christos /* Some ColdFire architectures have a ff1 instruction supported via 757 1.1 christos __builtin_clz. */ 758 1.1 christos #elif defined (__mcfisaaplus__) || defined (__mcfisac__) 759 1.1 christos #define count_leading_zeros(count,x) ((count) = __builtin_clz (x)) 760 1.1 christos #define COUNT_LEADING_ZEROS_0 32 761 1.1 christos #endif 762 1.1 christos #endif /* mc68000 */ 763 1.1 christos 764 1.1 christos #if defined (__m88000__) && W_TYPE_SIZE == 32 765 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 766 1.1 christos __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ 767 1.1 christos : "=r" ((USItype) (sh)), \ 768 1.1 christos "=&r" ((USItype) (sl)) \ 769 1.1 christos : "%rJ" ((USItype) (ah)), \ 770 1.1 christos "rJ" ((USItype) (bh)), \ 771 1.1 christos "%rJ" ((USItype) (al)), \ 772 1.1 christos "rJ" ((USItype) (bl))) 773 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 774 1.1 christos __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ 775 1.1 christos : "=r" ((USItype) (sh)), \ 776 1.1 christos "=&r" ((USItype) (sl)) \ 777 1.1 christos : "rJ" ((USItype) (ah)), \ 778 1.1 christos "rJ" ((USItype) (bh)), \ 779 1.1 christos "rJ" ((USItype) (al)), \ 780 1.1 christos "rJ" ((USItype) (bl))) 781 1.1 christos #define count_leading_zeros(count, x) \ 782 1.1 christos do { \ 783 1.1 christos USItype __cbtmp; \ 784 1.1 christos __asm__ ("ff1 %0,%1" \ 785 1.1 christos : "=r" (__cbtmp) \ 786 1.1 christos : "r" ((USItype) (x))); \ 787 1.1 christos (count) = __cbtmp ^ 31; \ 788 1.1 christos } while (0) 789 1.1 christos #define COUNT_LEADING_ZEROS_0 63 /* sic */ 790 1.1 christos #if defined (__mc88110__) 791 1.1 christos #define umul_ppmm(wh, wl, u, v) \ 792 1.1 christos do { \ 793 1.1 christos union {UDItype __ll; \ 794 1.1 christos struct {USItype __h, __l;} __i; \ 795 1.1 christos } __xx; \ 796 1.1 christos __asm__ ("mulu.d %0,%1,%2" \ 797 1.1 christos : "=r" (__xx.__ll) \ 798 1.1 christos : "r" ((USItype) (u)), \ 799 1.1 christos "r" ((USItype) (v))); \ 800 1.1 christos (wh) = __xx.__i.__h; \ 801 1.1 christos (wl) = __xx.__i.__l; \ 802 1.1 christos } while (0) 803 1.1 christos #define udiv_qrnnd(q, r, n1, n0, d) \ 804 1.1 christos ({union {UDItype __ll; \ 805 1.1 christos struct {USItype __h, __l;} __i; \ 806 1.1 christos } __xx; \ 807 1.1 christos USItype __q; \ 808 1.1 christos __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 809 1.1 christos __asm__ ("divu.d %0,%1,%2" \ 810 1.1 christos : "=r" (__q) \ 811 1.1 christos : "r" (__xx.__ll), \ 812 1.1 christos "r" ((USItype) (d))); \ 813 1.1 christos (r) = (n0) - __q * (d); (q) = __q; }) 814 1.1 christos #define UMUL_TIME 5 815 1.1 christos #define UDIV_TIME 25 816 1.1 christos #else 817 1.1 christos #define UMUL_TIME 17 818 1.1 christos #define UDIV_TIME 150 819 1.1 christos #endif /* __mc88110__ */ 820 1.1 christos #endif /* __m88000__ */ 821 1.1 christos 822 1.1 christos #if defined (__mn10300__) 823 1.1 christos # if defined (__AM33__) 824 1.1 christos # define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 825 1.1 christos # define umul_ppmm(w1, w0, u, v) \ 826 1.1 christos asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) 827 1.1 christos # define smul_ppmm(w1, w0, u, v) \ 828 1.1 christos asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) 829 1.1 christos # else 830 1.1 christos # define umul_ppmm(w1, w0, u, v) \ 831 1.1 christos asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) 832 1.1 christos # define smul_ppmm(w1, w0, u, v) \ 833 1.1 christos asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) 834 1.1 christos # endif 835 1.1 christos # define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 836 1.1 christos do { \ 837 1.1 christos DWunion __s, __a, __b; \ 838 1.1 christos __a.s.low = (al); __a.s.high = (ah); \ 839 1.1 christos __b.s.low = (bl); __b.s.high = (bh); \ 840 1.1 christos __s.ll = __a.ll + __b.ll; \ 841 1.1 christos (sl) = __s.s.low; (sh) = __s.s.high; \ 842 1.1 christos } while (0) 843 1.1 christos # define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 844 1.1 christos do { \ 845 1.1 christos DWunion __s, __a, __b; \ 846 1.1 christos __a.s.low = (al); __a.s.high = (ah); \ 847 1.1 christos __b.s.low = (bl); __b.s.high = (bh); \ 848 1.1 christos __s.ll = __a.ll - __b.ll; \ 849 1.1 christos (sl) = __s.s.low; (sh) = __s.s.high; \ 850 1.1 christos } while (0) 851 1.1 christos # define udiv_qrnnd(q, r, nh, nl, d) \ 852 1.1 christos asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) 853 1.1 christos # define sdiv_qrnnd(q, r, nh, nl, d) \ 854 1.1 christos asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) 855 1.1 christos # define UMUL_TIME 3 856 1.1 christos # define UDIV_TIME 38 857 1.1 christos #endif 858 1.1 christos 859 1.1 christos #if defined (__mips__) && W_TYPE_SIZE == 32 860 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 861 1.1 christos do { \ 862 1.1 christos UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ 863 1.1 christos (w1) = (USItype) (__x >> 32); \ 864 1.1 christos (w0) = (USItype) (__x); \ 865 1.1 christos } while (0) 866 1.1 christos #define UMUL_TIME 10 867 1.1 christos #define UDIV_TIME 100 868 1.1 christos 869 1.3 christos #if (__mips == 32 || __mips == 64) && ! defined (__mips16) 870 1.1 christos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 871 1.1 christos #define COUNT_LEADING_ZEROS_0 32 872 1.1 christos #endif 873 1.1 christos #endif /* __mips__ */ 874 1.1 christos 875 1.1 christos /* FIXME: We should test _IBMR2 here when we add assembly support for the 876 1.1 christos system vendor compilers. 877 1.1 christos FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good 878 1.1 christos enough, since that hits ARM and m68k too. */ 879 1.1 christos #if (defined (_ARCH_PPC) /* AIX */ \ 880 1.1 christos || defined (__powerpc__) /* gcc */ \ 881 1.1 christos || defined (__POWERPC__) /* BEOS */ \ 882 1.1 christos || defined (__ppc__) /* Darwin */ \ 883 1.1 christos || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ 884 1.1 christos || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ 885 1.1 christos && CPU_FAMILY == PPC) \ 886 1.1 christos ) && W_TYPE_SIZE == 32 887 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 888 1.1 christos do { \ 889 1.1 christos if (__builtin_constant_p (bh) && (bh) == 0) \ 890 1.1 christos __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ 891 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 892 1.1 christos else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ 893 1.1 christos __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ 894 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 895 1.1 christos else \ 896 1.1 christos __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ 897 1.1 christos : "=r" (sh), "=&r" (sl) \ 898 1.1 christos : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ 899 1.1 christos } while (0) 900 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 901 1.1 christos do { \ 902 1.1 christos if (__builtin_constant_p (ah) && (ah) == 0) \ 903 1.1 christos __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ 904 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 905 1.1 christos else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ 906 1.1 christos __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ 907 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 908 1.1 christos else if (__builtin_constant_p (bh) && (bh) == 0) \ 909 1.1 christos __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ 910 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 911 1.1 christos else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ 912 1.1 christos __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ 913 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 914 1.1 christos else \ 915 1.1 christos __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ 916 1.1 christos : "=r" (sh), "=&r" (sl) \ 917 1.1 christos : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ 918 1.1 christos } while (0) 919 1.1 christos #define count_leading_zeros(count, x) \ 920 1.1 christos __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x)) 921 1.1 christos #define COUNT_LEADING_ZEROS_0 32 922 1.1 christos #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \ 923 1.1 christos || defined (__ppc__) \ 924 1.1 christos || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ 925 1.1 christos || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ 926 1.1 christos && CPU_FAMILY == PPC) 927 1.1 christos #define umul_ppmm(ph, pl, m0, m1) \ 928 1.1 christos do { \ 929 1.1 christos USItype __m0 = (m0), __m1 = (m1); \ 930 1.1 christos __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 931 1.1 christos (pl) = __m0 * __m1; \ 932 1.1 christos } while (0) 933 1.1 christos #define UMUL_TIME 15 934 1.1 christos #define smul_ppmm(ph, pl, m0, m1) \ 935 1.1 christos do { \ 936 1.1 christos SItype __m0 = (m0), __m1 = (m1); \ 937 1.1 christos __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 938 1.1 christos (pl) = __m0 * __m1; \ 939 1.1 christos } while (0) 940 1.1 christos #define SMUL_TIME 14 941 1.1 christos #define UDIV_TIME 120 942 1.1 christos #endif 943 1.1 christos #endif /* 32-bit POWER architecture variants. */ 944 1.1 christos 945 1.1 christos /* We should test _IBMR2 here when we add assembly support for the system 946 1.1 christos vendor compilers. */ 947 1.1 christos #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64 948 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 949 1.1 christos do { \ 950 1.1 christos if (__builtin_constant_p (bh) && (bh) == 0) \ 951 1.1 christos __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ 952 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 953 1.1 christos else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ 954 1.1 christos __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ 955 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 956 1.1 christos else \ 957 1.1 christos __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ 958 1.1 christos : "=r" (sh), "=&r" (sl) \ 959 1.1 christos : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ 960 1.1 christos } while (0) 961 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 962 1.1 christos do { \ 963 1.1 christos if (__builtin_constant_p (ah) && (ah) == 0) \ 964 1.1 christos __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ 965 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 966 1.1 christos else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ 967 1.1 christos __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ 968 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 969 1.1 christos else if (__builtin_constant_p (bh) && (bh) == 0) \ 970 1.1 christos __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ 971 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 972 1.1 christos else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ 973 1.1 christos __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ 974 1.1 christos : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 975 1.1 christos else \ 976 1.1 christos __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ 977 1.1 christos : "=r" (sh), "=&r" (sl) \ 978 1.1 christos : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ 979 1.1 christos } while (0) 980 1.1 christos #define count_leading_zeros(count, x) \ 981 1.1 christos __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) 982 1.1 christos #define COUNT_LEADING_ZEROS_0 64 983 1.1 christos #define umul_ppmm(ph, pl, m0, m1) \ 984 1.1 christos do { \ 985 1.1 christos UDItype __m0 = (m0), __m1 = (m1); \ 986 1.1 christos __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 987 1.1 christos (pl) = __m0 * __m1; \ 988 1.1 christos } while (0) 989 1.1 christos #define UMUL_TIME 15 990 1.1 christos #define smul_ppmm(ph, pl, m0, m1) \ 991 1.1 christos do { \ 992 1.1 christos DItype __m0 = (m0), __m1 = (m1); \ 993 1.1 christos __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 994 1.1 christos (pl) = __m0 * __m1; \ 995 1.1 christos } while (0) 996 1.1 christos #define SMUL_TIME 14 /* ??? */ 997 1.1 christos #define UDIV_TIME 120 /* ??? */ 998 1.1 christos #endif /* 64-bit PowerPC. */ 999 1.1 christos 1000 1.1 christos #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 1001 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1002 1.1 christos __asm__ ("a %1,%5\n\tae %0,%3" \ 1003 1.1 christos : "=r" ((USItype) (sh)), \ 1004 1.1 christos "=&r" ((USItype) (sl)) \ 1005 1.1 christos : "%0" ((USItype) (ah)), \ 1006 1.1 christos "r" ((USItype) (bh)), \ 1007 1.1 christos "%1" ((USItype) (al)), \ 1008 1.1 christos "r" ((USItype) (bl))) 1009 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1010 1.1 christos __asm__ ("s %1,%5\n\tse %0,%3" \ 1011 1.1 christos : "=r" ((USItype) (sh)), \ 1012 1.1 christos "=&r" ((USItype) (sl)) \ 1013 1.1 christos : "0" ((USItype) (ah)), \ 1014 1.1 christos "r" ((USItype) (bh)), \ 1015 1.1 christos "1" ((USItype) (al)), \ 1016 1.1 christos "r" ((USItype) (bl))) 1017 1.1 christos #define umul_ppmm(ph, pl, m0, m1) \ 1018 1.1 christos do { \ 1019 1.1 christos USItype __m0 = (m0), __m1 = (m1); \ 1020 1.1 christos __asm__ ( \ 1021 1.1 christos "s r2,r2\n" \ 1022 1.1 christos " mts r10,%2\n" \ 1023 1.1 christos " m r2,%3\n" \ 1024 1.1 christos " m r2,%3\n" \ 1025 1.1 christos " m r2,%3\n" \ 1026 1.1 christos " m r2,%3\n" \ 1027 1.1 christos " m r2,%3\n" \ 1028 1.1 christos " m r2,%3\n" \ 1029 1.1 christos " m r2,%3\n" \ 1030 1.1 christos " m r2,%3\n" \ 1031 1.1 christos " m r2,%3\n" \ 1032 1.1 christos " m r2,%3\n" \ 1033 1.1 christos " m r2,%3\n" \ 1034 1.1 christos " m r2,%3\n" \ 1035 1.1 christos " m r2,%3\n" \ 1036 1.1 christos " m r2,%3\n" \ 1037 1.1 christos " m r2,%3\n" \ 1038 1.1 christos " m r2,%3\n" \ 1039 1.1 christos " cas %0,r2,r0\n" \ 1040 1.1 christos " mfs r10,%1" \ 1041 1.1 christos : "=r" ((USItype) (ph)), \ 1042 1.1 christos "=r" ((USItype) (pl)) \ 1043 1.1 christos : "%r" (__m0), \ 1044 1.1 christos "r" (__m1) \ 1045 1.1 christos : "r2"); \ 1046 1.1 christos (ph) += ((((SItype) __m0 >> 31) & __m1) \ 1047 1.1 christos + (((SItype) __m1 >> 31) & __m0)); \ 1048 1.1 christos } while (0) 1049 1.1 christos #define UMUL_TIME 20 1050 1.1 christos #define UDIV_TIME 200 1051 1.1 christos #define count_leading_zeros(count, x) \ 1052 1.1 christos do { \ 1053 1.1 christos if ((x) >= 0x10000) \ 1054 1.1 christos __asm__ ("clz %0,%1" \ 1055 1.1 christos : "=r" ((USItype) (count)) \ 1056 1.1 christos : "r" ((USItype) (x) >> 16)); \ 1057 1.1 christos else \ 1058 1.1 christos { \ 1059 1.1 christos __asm__ ("clz %0,%1" \ 1060 1.1 christos : "=r" ((USItype) (count)) \ 1061 1.1 christos : "r" ((USItype) (x))); \ 1062 1.1 christos (count) += 16; \ 1063 1.1 christos } \ 1064 1.1 christos } while (0) 1065 1.1 christos #endif 1066 1.1 christos 1067 1.8 christos #if defined(__riscv) 1068 1.8 christos #ifdef __riscv_mul 1069 1.8 christos #define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v)) 1070 1.8 christos #define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b)) 1071 1.8 christos #else 1072 1.8 christos #if __riscv_xlen == 32 1073 1.8 christos #define MULUW3 "call __mulsi3" 1074 1.8 christos #elif __riscv_xlen == 64 1075 1.8 christos #define MULUW3 "call __muldi3" 1076 1.8 christos #else 1077 1.8 christos #error unsupport xlen 1078 1.8 christos #endif /* __riscv_xlen */ 1079 1.8 christos /* We rely on the fact that MULUW3 doesn't clobber the t-registers. 1080 1.8 christos It can get better register allocation result. */ 1081 1.8 christos #define __muluw3(a, b) \ 1082 1.8 christos ({ \ 1083 1.8 christos register UWtype __op0 asm ("a0") = a; \ 1084 1.8 christos register UWtype __op1 asm ("a1") = b; \ 1085 1.8 christos asm volatile (MULUW3 \ 1086 1.8 christos : "+r" (__op0), "+r" (__op1) \ 1087 1.8 christos : \ 1088 1.8 christos : "ra", "a2", "a3"); \ 1089 1.8 christos __op0; \ 1090 1.8 christos }) 1091 1.8 christos #endif /* __riscv_mul */ 1092 1.8 christos #define umul_ppmm(w1, w0, u, v) \ 1093 1.8 christos do { \ 1094 1.8 christos UWtype __x0, __x1, __x2, __x3; \ 1095 1.8 christos UHWtype __ul, __vl, __uh, __vh; \ 1096 1.8 christos \ 1097 1.8 christos __ul = __ll_lowpart (u); \ 1098 1.8 christos __uh = __ll_highpart (u); \ 1099 1.8 christos __vl = __ll_lowpart (v); \ 1100 1.8 christos __vh = __ll_highpart (v); \ 1101 1.8 christos \ 1102 1.8 christos __x0 = __muluw3 (__ul, __vl); \ 1103 1.8 christos __x1 = __muluw3 (__ul, __vh); \ 1104 1.8 christos __x2 = __muluw3 (__uh, __vl); \ 1105 1.8 christos __x3 = __muluw3 (__uh, __vh); \ 1106 1.8 christos \ 1107 1.8 christos __x1 += __ll_highpart (__x0);/* this can't give carry */ \ 1108 1.8 christos __x1 += __x2; /* but this indeed can */ \ 1109 1.8 christos if (__x1 < __x2) /* did we get it? */ \ 1110 1.8 christos __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1111 1.8 christos \ 1112 1.8 christos (w1) = __x3 + __ll_highpart (__x1); \ 1113 1.8 christos (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ 1114 1.8 christos } while (0) 1115 1.8 christos #endif /* __riscv */ 1116 1.8 christos 1117 1.8 christos #if defined(__sh__) && W_TYPE_SIZE == 32 1118 1.1 christos #ifndef __sh1__ 1119 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 1120 1.1 christos __asm__ ( \ 1121 1.1 christos "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \ 1122 1.1 christos : "=r<" ((USItype)(w1)), \ 1123 1.1 christos "=r<" ((USItype)(w0)) \ 1124 1.1 christos : "r" ((USItype)(u)), \ 1125 1.1 christos "r" ((USItype)(v)) \ 1126 1.1 christos : "macl", "mach") 1127 1.1 christos #define UMUL_TIME 5 1128 1.1 christos #endif 1129 1.1 christos 1130 1.1 christos /* This is the same algorithm as __udiv_qrnnd_c. */ 1131 1.1 christos #define UDIV_NEEDS_NORMALIZATION 1 1132 1.1 christos 1133 1.6 christos #ifdef __FDPIC__ 1134 1.6 christos /* FDPIC needs a special version of the asm fragment to extract the 1135 1.6 christos code address from the function descriptor. __udiv_qrnnd_16 is 1136 1.6 christos assumed to be local and not to use the GOT, so loading r12 is 1137 1.6 christos not needed. */ 1138 1.6 christos #define udiv_qrnnd(q, r, n1, n0, d) \ 1139 1.6 christos do { \ 1140 1.6 christos extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ 1141 1.6 christos __attribute__ ((visibility ("hidden"))); \ 1142 1.6 christos /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ 1143 1.6 christos __asm__ ( \ 1144 1.6 christos "mov%M4 %4,r5\n" \ 1145 1.6 christos " swap.w %3,r4\n" \ 1146 1.6 christos " swap.w r5,r6\n" \ 1147 1.6 christos " mov.l @%5,r2\n" \ 1148 1.6 christos " jsr @r2\n" \ 1149 1.6 christos " shll16 r6\n" \ 1150 1.6 christos " swap.w r4,r4\n" \ 1151 1.6 christos " mov.l @%5,r2\n" \ 1152 1.6 christos " jsr @r2\n" \ 1153 1.6 christos " swap.w r1,%0\n" \ 1154 1.6 christos " or r1,%0" \ 1155 1.6 christos : "=r" (q), "=&z" (r) \ 1156 1.6 christos : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ 1157 1.6 christos : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ 1158 1.6 christos } while (0) 1159 1.6 christos #else 1160 1.1 christos #define udiv_qrnnd(q, r, n1, n0, d) \ 1161 1.1 christos do { \ 1162 1.1 christos extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ 1163 1.1 christos __attribute__ ((visibility ("hidden"))); \ 1164 1.1 christos /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ 1165 1.1 christos __asm__ ( \ 1166 1.1 christos "mov%M4 %4,r5\n" \ 1167 1.1 christos " swap.w %3,r4\n" \ 1168 1.1 christos " swap.w r5,r6\n" \ 1169 1.1 christos " jsr @%5\n" \ 1170 1.1 christos " shll16 r6\n" \ 1171 1.1 christos " swap.w r4,r4\n" \ 1172 1.1 christos " jsr @%5\n" \ 1173 1.1 christos " swap.w r1,%0\n" \ 1174 1.1 christos " or r1,%0" \ 1175 1.1 christos : "=r" (q), "=&z" (r) \ 1176 1.1 christos : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ 1177 1.1 christos : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ 1178 1.1 christos } while (0) 1179 1.6 christos #endif /* __FDPIC__ */ 1180 1.1 christos 1181 1.1 christos #define UDIV_TIME 80 1182 1.1 christos 1183 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1184 1.1 christos __asm__ ("clrt;subc %5,%1; subc %4,%0" \ 1185 1.1 christos : "=r" (sh), "=r" (sl) \ 1186 1.1 christos : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t") 1187 1.1 christos 1188 1.1 christos #endif /* __sh__ */ 1189 1.1 christos 1190 1.1 christos #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \ 1191 1.1 christos && W_TYPE_SIZE == 32 1192 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1193 1.1 christos __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ 1194 1.1 christos : "=r" ((USItype) (sh)), \ 1195 1.1 christos "=&r" ((USItype) (sl)) \ 1196 1.1 christos : "%rJ" ((USItype) (ah)), \ 1197 1.1 christos "rI" ((USItype) (bh)), \ 1198 1.1 christos "%rJ" ((USItype) (al)), \ 1199 1.1 christos "rI" ((USItype) (bl)) \ 1200 1.1 christos __CLOBBER_CC) 1201 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1202 1.1 christos __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ 1203 1.1 christos : "=r" ((USItype) (sh)), \ 1204 1.1 christos "=&r" ((USItype) (sl)) \ 1205 1.1 christos : "rJ" ((USItype) (ah)), \ 1206 1.1 christos "rI" ((USItype) (bh)), \ 1207 1.1 christos "rJ" ((USItype) (al)), \ 1208 1.1 christos "rI" ((USItype) (bl)) \ 1209 1.1 christos __CLOBBER_CC) 1210 1.1 christos #if defined (__sparc_v9__) 1211 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 1212 1.1 christos do { \ 1213 1.1 christos register USItype __g1 asm ("g1"); \ 1214 1.1 christos __asm__ ("umul\t%2,%3,%1\n\t" \ 1215 1.1 christos "srlx\t%1, 32, %0" \ 1216 1.1 christos : "=r" ((USItype) (w1)), \ 1217 1.1 christos "=r" (__g1) \ 1218 1.1 christos : "r" ((USItype) (u)), \ 1219 1.1 christos "r" ((USItype) (v))); \ 1220 1.1 christos (w0) = __g1; \ 1221 1.1 christos } while (0) 1222 1.1 christos #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ 1223 1.1 christos __asm__ ("mov\t%2,%%y\n\t" \ 1224 1.1 christos "udiv\t%3,%4,%0\n\t" \ 1225 1.1 christos "umul\t%0,%4,%1\n\t" \ 1226 1.1 christos "sub\t%3,%1,%1" \ 1227 1.1 christos : "=&r" ((USItype) (__q)), \ 1228 1.1 christos "=&r" ((USItype) (__r)) \ 1229 1.1 christos : "r" ((USItype) (__n1)), \ 1230 1.1 christos "r" ((USItype) (__n0)), \ 1231 1.1 christos "r" ((USItype) (__d))) 1232 1.1 christos #else 1233 1.1 christos #if defined (__sparc_v8__) 1234 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 1235 1.1 christos __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1236 1.1 christos : "=r" ((USItype) (w1)), \ 1237 1.1 christos "=r" ((USItype) (w0)) \ 1238 1.1 christos : "r" ((USItype) (u)), \ 1239 1.1 christos "r" ((USItype) (v))) 1240 1.1 christos #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ 1241 1.1 christos __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\ 1242 1.1 christos : "=&r" ((USItype) (__q)), \ 1243 1.1 christos "=&r" ((USItype) (__r)) \ 1244 1.1 christos : "r" ((USItype) (__n1)), \ 1245 1.1 christos "r" ((USItype) (__n0)), \ 1246 1.1 christos "r" ((USItype) (__d))) 1247 1.1 christos #else 1248 1.1 christos #if defined (__sparclite__) 1249 1.1 christos /* This has hardware multiply but not divide. It also has two additional 1250 1.1 christos instructions scan (ffs from high bit) and divscc. */ 1251 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 1252 1.1 christos __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1253 1.1 christos : "=r" ((USItype) (w1)), \ 1254 1.1 christos "=r" ((USItype) (w0)) \ 1255 1.1 christos : "r" ((USItype) (u)), \ 1256 1.1 christos "r" ((USItype) (v))) 1257 1.1 christos #define udiv_qrnnd(q, r, n1, n0, d) \ 1258 1.1 christos __asm__ ("! Inlined udiv_qrnnd\n" \ 1259 1.1 christos " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ 1260 1.1 christos " tst %%g0\n" \ 1261 1.1 christos " divscc %3,%4,%%g1\n" \ 1262 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1263 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1264 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1265 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1266 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1267 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1268 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1269 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1270 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1271 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1272 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1273 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1274 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1275 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1276 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1277 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1278 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1279 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1280 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1281 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1282 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1283 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1284 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1285 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1286 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1287 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1288 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1289 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1290 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1291 1.1 christos " divscc %%g1,%4,%%g1\n" \ 1292 1.1 christos " divscc %%g1,%4,%0\n" \ 1293 1.1 christos " rd %%y,%1\n" \ 1294 1.1 christos " bl,a 1f\n" \ 1295 1.1 christos " add %1,%4,%1\n" \ 1296 1.1 christos "1: ! End of inline udiv_qrnnd" \ 1297 1.1 christos : "=r" ((USItype) (q)), \ 1298 1.1 christos "=r" ((USItype) (r)) \ 1299 1.1 christos : "r" ((USItype) (n1)), \ 1300 1.1 christos "r" ((USItype) (n0)), \ 1301 1.1 christos "rI" ((USItype) (d)) \ 1302 1.1 christos : "g1" __AND_CLOBBER_CC) 1303 1.1 christos #define UDIV_TIME 37 1304 1.1 christos #define count_leading_zeros(count, x) \ 1305 1.1 christos do { \ 1306 1.1 christos __asm__ ("scan %1,1,%0" \ 1307 1.1 christos : "=r" ((USItype) (count)) \ 1308 1.1 christos : "r" ((USItype) (x))); \ 1309 1.1 christos } while (0) 1310 1.1 christos /* Early sparclites return 63 for an argument of 0, but they warn that future 1311 1.1 christos implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 1312 1.1 christos undefined. */ 1313 1.1 christos #else 1314 1.1 christos /* SPARC without integer multiplication and divide instructions. 1315 1.1 christos (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */ 1316 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 1317 1.1 christos __asm__ ("! Inlined umul_ppmm\n" \ 1318 1.1 christos " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\ 1319 1.1 christos " sra %3,31,%%o5 ! Don't move this insn\n" \ 1320 1.1 christos " and %2,%%o5,%%o5 ! Don't move this insn\n" \ 1321 1.1 christos " andcc %%g0,0,%%g1 ! Don't move this insn\n" \ 1322 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1323 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1324 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1325 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1326 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1327 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1328 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1329 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1330 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1331 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1332 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1333 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1334 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1335 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1336 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1337 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1338 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1339 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1340 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1341 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1342 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1343 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1344 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1345 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1346 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1347 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1348 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1349 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1350 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1351 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1352 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1353 1.1 christos " mulscc %%g1,%3,%%g1\n" \ 1354 1.1 christos " mulscc %%g1,0,%%g1\n" \ 1355 1.1 christos " add %%g1,%%o5,%0\n" \ 1356 1.1 christos " rd %%y,%1" \ 1357 1.1 christos : "=r" ((USItype) (w1)), \ 1358 1.1 christos "=r" ((USItype) (w0)) \ 1359 1.1 christos : "%rI" ((USItype) (u)), \ 1360 1.1 christos "r" ((USItype) (v)) \ 1361 1.1 christos : "g1", "o5" __AND_CLOBBER_CC) 1362 1.1 christos #define UMUL_TIME 39 /* 39 instructions */ 1363 1.1 christos /* It's quite necessary to add this much assembler for the sparc. 1364 1.1 christos The default udiv_qrnnd (in C) is more than 10 times slower! */ 1365 1.1 christos #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ 1366 1.1 christos __asm__ ("! Inlined udiv_qrnnd\n" \ 1367 1.1 christos " mov 32,%%g1\n" \ 1368 1.1 christos " subcc %1,%2,%%g0\n" \ 1369 1.1 christos "1: bcs 5f\n" \ 1370 1.1 christos " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ 1371 1.1 christos " sub %1,%2,%1 ! this kills msb of n\n" \ 1372 1.1 christos " addx %1,%1,%1 ! so this can't give carry\n" \ 1373 1.1 christos " subcc %%g1,1,%%g1\n" \ 1374 1.1 christos "2: bne 1b\n" \ 1375 1.1 christos " subcc %1,%2,%%g0\n" \ 1376 1.1 christos " bcs 3f\n" \ 1377 1.1 christos " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ 1378 1.1 christos " b 3f\n" \ 1379 1.1 christos " sub %1,%2,%1 ! this kills msb of n\n" \ 1380 1.1 christos "4: sub %1,%2,%1\n" \ 1381 1.1 christos "5: addxcc %1,%1,%1\n" \ 1382 1.1 christos " bcc 2b\n" \ 1383 1.1 christos " subcc %%g1,1,%%g1\n" \ 1384 1.1 christos "! Got carry from n. Subtract next step to cancel this carry.\n" \ 1385 1.1 christos " bne 4b\n" \ 1386 1.1 christos " addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \ 1387 1.1 christos " sub %1,%2,%1\n" \ 1388 1.1 christos "3: xnor %0,0,%0\n" \ 1389 1.1 christos " ! End of inline udiv_qrnnd" \ 1390 1.1 christos : "=&r" ((USItype) (__q)), \ 1391 1.1 christos "=&r" ((USItype) (__r)) \ 1392 1.1 christos : "r" ((USItype) (__d)), \ 1393 1.1 christos "1" ((USItype) (__n1)), \ 1394 1.1 christos "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC) 1395 1.1 christos #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ 1396 1.1 christos #endif /* __sparclite__ */ 1397 1.1 christos #endif /* __sparc_v8__ */ 1398 1.1 christos #endif /* __sparc_v9__ */ 1399 1.1 christos #endif /* sparc32 */ 1400 1.1 christos 1401 1.1 christos #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \ 1402 1.1 christos && W_TYPE_SIZE == 64 1403 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1404 1.1 christos do { \ 1405 1.1 christos UDItype __carry = 0; \ 1406 1.1 christos __asm__ ("addcc\t%r5,%6,%1\n\t" \ 1407 1.1 christos "add\t%r3,%4,%0\n\t" \ 1408 1.1 christos "movcs\t%%xcc, 1, %2\n\t" \ 1409 1.1 christos "add\t%0, %2, %0" \ 1410 1.1 christos : "=r" ((UDItype)(sh)), \ 1411 1.1 christos "=&r" ((UDItype)(sl)), \ 1412 1.1 christos "+r" (__carry) \ 1413 1.1 christos : "%rJ" ((UDItype)(ah)), \ 1414 1.1 christos "rI" ((UDItype)(bh)), \ 1415 1.1 christos "%rJ" ((UDItype)(al)), \ 1416 1.1 christos "rI" ((UDItype)(bl)) \ 1417 1.1 christos __CLOBBER_CC); \ 1418 1.1 christos } while (0) 1419 1.1 christos 1420 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1421 1.1 christos do { \ 1422 1.1 christos UDItype __carry = 0; \ 1423 1.1 christos __asm__ ("subcc\t%r5,%6,%1\n\t" \ 1424 1.1 christos "sub\t%r3,%4,%0\n\t" \ 1425 1.1 christos "movcs\t%%xcc, 1, %2\n\t" \ 1426 1.1 christos "sub\t%0, %2, %0" \ 1427 1.1 christos : "=r" ((UDItype)(sh)), \ 1428 1.1 christos "=&r" ((UDItype)(sl)), \ 1429 1.1 christos "+r" (__carry) \ 1430 1.1 christos : "%rJ" ((UDItype)(ah)), \ 1431 1.1 christos "rI" ((UDItype)(bh)), \ 1432 1.1 christos "%rJ" ((UDItype)(al)), \ 1433 1.1 christos "rI" ((UDItype)(bl)) \ 1434 1.1 christos __CLOBBER_CC); \ 1435 1.1 christos } while (0) 1436 1.1 christos 1437 1.1 christos #define umul_ppmm(wh, wl, u, v) \ 1438 1.1 christos do { \ 1439 1.1 christos UDItype tmp1, tmp2, tmp3, tmp4; \ 1440 1.1 christos __asm__ __volatile__ ( \ 1441 1.1 christos "srl %7,0,%3\n\t" \ 1442 1.1 christos "mulx %3,%6,%1\n\t" \ 1443 1.1 christos "srlx %6,32,%2\n\t" \ 1444 1.1 christos "mulx %2,%3,%4\n\t" \ 1445 1.1 christos "sllx %4,32,%5\n\t" \ 1446 1.1 christos "srl %6,0,%3\n\t" \ 1447 1.1 christos "sub %1,%5,%5\n\t" \ 1448 1.1 christos "srlx %5,32,%5\n\t" \ 1449 1.1 christos "addcc %4,%5,%4\n\t" \ 1450 1.1 christos "srlx %7,32,%5\n\t" \ 1451 1.1 christos "mulx %3,%5,%3\n\t" \ 1452 1.1 christos "mulx %2,%5,%5\n\t" \ 1453 1.1 christos "sethi %%hi(0x80000000),%2\n\t" \ 1454 1.1 christos "addcc %4,%3,%4\n\t" \ 1455 1.1 christos "srlx %4,32,%4\n\t" \ 1456 1.1 christos "add %2,%2,%2\n\t" \ 1457 1.1 christos "movcc %%xcc,%%g0,%2\n\t" \ 1458 1.1 christos "addcc %5,%4,%5\n\t" \ 1459 1.1 christos "sllx %3,32,%3\n\t" \ 1460 1.1 christos "add %1,%3,%1\n\t" \ 1461 1.1 christos "add %5,%2,%0" \ 1462 1.1 christos : "=r" ((UDItype)(wh)), \ 1463 1.1 christos "=&r" ((UDItype)(wl)), \ 1464 1.1 christos "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \ 1465 1.1 christos : "r" ((UDItype)(u)), \ 1466 1.1 christos "r" ((UDItype)(v)) \ 1467 1.1 christos __CLOBBER_CC); \ 1468 1.1 christos } while (0) 1469 1.1 christos #define UMUL_TIME 96 1470 1.1 christos #define UDIV_TIME 230 1471 1.1 christos #endif /* sparc64 */ 1472 1.1 christos 1473 1.1 christos #if defined (__vax__) && W_TYPE_SIZE == 32 1474 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1475 1.1 christos __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ 1476 1.1 christos : "=g" ((USItype) (sh)), \ 1477 1.1 christos "=&g" ((USItype) (sl)) \ 1478 1.1 christos : "%0" ((USItype) (ah)), \ 1479 1.1 christos "g" ((USItype) (bh)), \ 1480 1.1 christos "%1" ((USItype) (al)), \ 1481 1.1 christos "g" ((USItype) (bl))) 1482 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1483 1.1 christos __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ 1484 1.1 christos : "=g" ((USItype) (sh)), \ 1485 1.1 christos "=&g" ((USItype) (sl)) \ 1486 1.1 christos : "0" ((USItype) (ah)), \ 1487 1.1 christos "g" ((USItype) (bh)), \ 1488 1.1 christos "1" ((USItype) (al)), \ 1489 1.1 christos "g" ((USItype) (bl))) 1490 1.1 christos #define umul_ppmm(xh, xl, m0, m1) \ 1491 1.1 christos do { \ 1492 1.1 christos union { \ 1493 1.1 christos UDItype __ll; \ 1494 1.1 christos struct {USItype __l, __h;} __i; \ 1495 1.1 christos } __xx; \ 1496 1.1 christos USItype __m0 = (m0), __m1 = (m1); \ 1497 1.1 christos __asm__ ("emul %1,%2,$0,%0" \ 1498 1.1 christos : "=r" (__xx.__ll) \ 1499 1.1 christos : "g" (__m0), \ 1500 1.1 christos "g" (__m1)); \ 1501 1.1 christos (xh) = __xx.__i.__h; \ 1502 1.1 christos (xl) = __xx.__i.__l; \ 1503 1.1 christos (xh) += ((((SItype) __m0 >> 31) & __m1) \ 1504 1.1 christos + (((SItype) __m1 >> 31) & __m0)); \ 1505 1.1 christos } while (0) 1506 1.1 christos #define sdiv_qrnnd(q, r, n1, n0, d) \ 1507 1.1 christos do { \ 1508 1.1 christos union {DItype __ll; \ 1509 1.1 christos struct {SItype __l, __h;} __i; \ 1510 1.1 christos } __xx; \ 1511 1.1 christos __xx.__i.__h = n1; __xx.__i.__l = n0; \ 1512 1.1 christos __asm__ ("ediv %3,%2,%0,%1" \ 1513 1.1 christos : "=g" (q), "=g" (r) \ 1514 1.1 christos : "g" (__xx.__ll), "g" (d)); \ 1515 1.1 christos } while (0) 1516 1.1 christos #endif /* __vax__ */ 1517 1.1 christos 1518 1.1 christos #ifdef _TMS320C6X 1519 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1520 1.1 christos do \ 1521 1.1 christos { \ 1522 1.1 christos UDItype __ll; \ 1523 1.1 christos __asm__ ("addu .l1 %1, %2, %0" \ 1524 1.1 christos : "=a" (__ll) : "a" (al), "a" (bl)); \ 1525 1.1 christos (sl) = (USItype)__ll; \ 1526 1.1 christos (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \ 1527 1.1 christos } \ 1528 1.1 christos while (0) 1529 1.1 christos 1530 1.1 christos #ifdef _TMS320C6400_PLUS 1531 1.1 christos #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) 1532 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 1533 1.1 christos do { \ 1534 1.1 christos UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ 1535 1.1 christos (w1) = (USItype) (__x >> 32); \ 1536 1.1 christos (w0) = (USItype) (__x); \ 1537 1.1 christos } while (0) 1538 1.1 christos #endif /* _TMS320C6400_PLUS */ 1539 1.1 christos 1540 1.1 christos #define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) 1541 1.1 christos #ifdef _TMS320C6400 1542 1.1 christos #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) 1543 1.1 christos #endif 1544 1.1 christos #define UMUL_TIME 4 1545 1.1 christos #define UDIV_TIME 40 1546 1.1 christos #endif /* _TMS320C6X */ 1547 1.1 christos 1548 1.1 christos #if defined (__xtensa__) && W_TYPE_SIZE == 32 1549 1.1 christos /* This code is not Xtensa-configuration-specific, so rely on the compiler 1550 1.1 christos to expand builtin functions depending on what configuration features 1551 1.1 christos are available. This avoids library calls when the operation can be 1552 1.1 christos performed in-line. */ 1553 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 1554 1.1 christos do { \ 1555 1.1 christos DWunion __w; \ 1556 1.1 christos __w.ll = __builtin_umulsidi3 (u, v); \ 1557 1.1 christos w1 = __w.s.high; \ 1558 1.1 christos w0 = __w.s.low; \ 1559 1.1 christos } while (0) 1560 1.1 christos #define __umulsidi3(u, v) __builtin_umulsidi3 (u, v) 1561 1.1 christos #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) 1562 1.1 christos #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) 1563 1.1 christos #endif /* __xtensa__ */ 1564 1.1 christos 1565 1.1 christos #if defined xstormy16 1566 1.1 christos extern UHItype __stormy16_count_leading_zeros (UHItype); 1567 1.1 christos #define count_leading_zeros(count, x) \ 1568 1.1 christos do \ 1569 1.1 christos { \ 1570 1.1 christos UHItype size; \ 1571 1.1 christos \ 1572 1.1 christos /* We assume that W_TYPE_SIZE is a multiple of 16... */ \ 1573 1.1 christos for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \ 1574 1.1 christos { \ 1575 1.1 christos UHItype c; \ 1576 1.1 christos \ 1577 1.1 christos c = __clzhi2 ((x) >> (size - 16)); \ 1578 1.1 christos (count) += c; \ 1579 1.1 christos if (c != 16) \ 1580 1.1 christos break; \ 1581 1.1 christos } \ 1582 1.1 christos } \ 1583 1.1 christos while (0) 1584 1.1 christos #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE 1585 1.1 christos #endif 1586 1.1 christos 1587 1.1 christos #if defined (__z8000__) && W_TYPE_SIZE == 16 1588 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1589 1.1 christos __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ 1590 1.1 christos : "=r" ((unsigned int)(sh)), \ 1591 1.1 christos "=&r" ((unsigned int)(sl)) \ 1592 1.1 christos : "%0" ((unsigned int)(ah)), \ 1593 1.1 christos "r" ((unsigned int)(bh)), \ 1594 1.1 christos "%1" ((unsigned int)(al)), \ 1595 1.1 christos "rQR" ((unsigned int)(bl))) 1596 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1597 1.1 christos __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ 1598 1.1 christos : "=r" ((unsigned int)(sh)), \ 1599 1.1 christos "=&r" ((unsigned int)(sl)) \ 1600 1.1 christos : "0" ((unsigned int)(ah)), \ 1601 1.1 christos "r" ((unsigned int)(bh)), \ 1602 1.1 christos "1" ((unsigned int)(al)), \ 1603 1.1 christos "rQR" ((unsigned int)(bl))) 1604 1.1 christos #define umul_ppmm(xh, xl, m0, m1) \ 1605 1.1 christos do { \ 1606 1.1 christos union {long int __ll; \ 1607 1.1 christos struct {unsigned int __h, __l;} __i; \ 1608 1.1 christos } __xx; \ 1609 1.1 christos unsigned int __m0 = (m0), __m1 = (m1); \ 1610 1.1 christos __asm__ ("mult %S0,%H3" \ 1611 1.1 christos : "=r" (__xx.__i.__h), \ 1612 1.1 christos "=r" (__xx.__i.__l) \ 1613 1.1 christos : "%1" (__m0), \ 1614 1.1 christos "rQR" (__m1)); \ 1615 1.1 christos (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1616 1.1 christos (xh) += ((((signed int) __m0 >> 15) & __m1) \ 1617 1.1 christos + (((signed int) __m1 >> 15) & __m0)); \ 1618 1.1 christos } while (0) 1619 1.1 christos #endif /* __z8000__ */ 1620 1.1 christos 1621 1.1 christos #endif /* __GNUC__ */ 1622 1.1 christos 1623 1.1 christos /* If this machine has no inline assembler, use C macros. */ 1624 1.1 christos 1625 1.1 christos #if !defined (add_ssaaaa) 1626 1.1 christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1627 1.1 christos do { \ 1628 1.1 christos UWtype __x; \ 1629 1.1 christos __x = (al) + (bl); \ 1630 1.1 christos (sh) = (ah) + (bh) + (__x < (al)); \ 1631 1.1 christos (sl) = __x; \ 1632 1.1 christos } while (0) 1633 1.1 christos #endif 1634 1.1 christos 1635 1.1 christos #if !defined (sub_ddmmss) 1636 1.1 christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1637 1.1 christos do { \ 1638 1.1 christos UWtype __x; \ 1639 1.1 christos __x = (al) - (bl); \ 1640 1.1 christos (sh) = (ah) - (bh) - (__x > (al)); \ 1641 1.1 christos (sl) = __x; \ 1642 1.1 christos } while (0) 1643 1.1 christos #endif 1644 1.1 christos 1645 1.1 christos /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of 1646 1.1 christos smul_ppmm. */ 1647 1.1 christos #if !defined (umul_ppmm) && defined (smul_ppmm) 1648 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 1649 1.1 christos do { \ 1650 1.1 christos UWtype __w1; \ 1651 1.1 christos UWtype __xm0 = (u), __xm1 = (v); \ 1652 1.1 christos smul_ppmm (__w1, w0, __xm0, __xm1); \ 1653 1.1 christos (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ 1654 1.1 christos + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ 1655 1.1 christos } while (0) 1656 1.1 christos #endif 1657 1.1 christos 1658 1.1 christos /* If we still don't have umul_ppmm, define it using plain C. */ 1659 1.1 christos #if !defined (umul_ppmm) 1660 1.1 christos #define umul_ppmm(w1, w0, u, v) \ 1661 1.1 christos do { \ 1662 1.1 christos UWtype __x0, __x1, __x2, __x3; \ 1663 1.1 christos UHWtype __ul, __vl, __uh, __vh; \ 1664 1.1 christos \ 1665 1.1 christos __ul = __ll_lowpart (u); \ 1666 1.1 christos __uh = __ll_highpart (u); \ 1667 1.1 christos __vl = __ll_lowpart (v); \ 1668 1.1 christos __vh = __ll_highpart (v); \ 1669 1.1 christos \ 1670 1.1 christos __x0 = (UWtype) __ul * __vl; \ 1671 1.1 christos __x1 = (UWtype) __ul * __vh; \ 1672 1.1 christos __x2 = (UWtype) __uh * __vl; \ 1673 1.1 christos __x3 = (UWtype) __uh * __vh; \ 1674 1.1 christos \ 1675 1.1 christos __x1 += __ll_highpart (__x0);/* this can't give carry */ \ 1676 1.1 christos __x1 += __x2; /* but this indeed can */ \ 1677 1.1 christos if (__x1 < __x2) /* did we get it? */ \ 1678 1.1 christos __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1679 1.1 christos \ 1680 1.1 christos (w1) = __x3 + __ll_highpart (__x1); \ 1681 1.1 christos (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ 1682 1.1 christos } while (0) 1683 1.1 christos #endif 1684 1.1 christos 1685 1.1 christos #if !defined (__umulsidi3) 1686 1.1 christos #define __umulsidi3(u, v) \ 1687 1.1 christos ({DWunion __w; \ 1688 1.1 christos umul_ppmm (__w.s.high, __w.s.low, u, v); \ 1689 1.1 christos __w.ll; }) 1690 1.1 christos #endif 1691 1.1 christos 1692 1.1 christos /* Define this unconditionally, so it can be used for debugging. */ 1693 1.1 christos #define __udiv_qrnnd_c(q, r, n1, n0, d) \ 1694 1.1 christos do { \ 1695 1.1 christos UWtype __d1, __d0, __q1, __q0; \ 1696 1.1 christos UWtype __r1, __r0, __m; \ 1697 1.1 christos __d1 = __ll_highpart (d); \ 1698 1.1 christos __d0 = __ll_lowpart (d); \ 1699 1.1 christos \ 1700 1.1 christos __r1 = (n1) % __d1; \ 1701 1.1 christos __q1 = (n1) / __d1; \ 1702 1.1 christos __m = (UWtype) __q1 * __d0; \ 1703 1.1 christos __r1 = __r1 * __ll_B | __ll_highpart (n0); \ 1704 1.1 christos if (__r1 < __m) \ 1705 1.1 christos { \ 1706 1.1 christos __q1--, __r1 += (d); \ 1707 1.1 christos if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ 1708 1.1 christos if (__r1 < __m) \ 1709 1.1 christos __q1--, __r1 += (d); \ 1710 1.1 christos } \ 1711 1.1 christos __r1 -= __m; \ 1712 1.1 christos \ 1713 1.1 christos __r0 = __r1 % __d1; \ 1714 1.1 christos __q0 = __r1 / __d1; \ 1715 1.1 christos __m = (UWtype) __q0 * __d0; \ 1716 1.1 christos __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ 1717 1.1 christos if (__r0 < __m) \ 1718 1.1 christos { \ 1719 1.1 christos __q0--, __r0 += (d); \ 1720 1.1 christos if (__r0 >= (d)) \ 1721 1.1 christos if (__r0 < __m) \ 1722 1.1 christos __q0--, __r0 += (d); \ 1723 1.1 christos } \ 1724 1.1 christos __r0 -= __m; \ 1725 1.1 christos \ 1726 1.1 christos (q) = (UWtype) __q1 * __ll_B | __q0; \ 1727 1.1 christos (r) = __r0; \ 1728 1.1 christos } while (0) 1729 1.1 christos 1730 1.1 christos /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through 1731 1.1 christos __udiv_w_sdiv (defined in libgcc or elsewhere). */ 1732 1.1 christos #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) 1733 1.1 christos #define udiv_qrnnd(q, r, nh, nl, d) \ 1734 1.1 christos do { \ 1735 1.3 christos extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \ 1736 1.3 christos UWtype __r; \ 1737 1.1 christos (q) = __udiv_w_sdiv (&__r, nh, nl, d); \ 1738 1.1 christos (r) = __r; \ 1739 1.1 christos } while (0) 1740 1.1 christos #endif 1741 1.1 christos 1742 1.1 christos /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ 1743 1.1 christos #if !defined (udiv_qrnnd) 1744 1.1 christos #define UDIV_NEEDS_NORMALIZATION 1 1745 1.1 christos #define udiv_qrnnd __udiv_qrnnd_c 1746 1.1 christos #endif 1747 1.1 christos 1748 1.1 christos #if !defined (count_leading_zeros) 1749 1.1 christos #define count_leading_zeros(count, x) \ 1750 1.1 christos do { \ 1751 1.1 christos UWtype __xr = (x); \ 1752 1.1 christos UWtype __a; \ 1753 1.1 christos \ 1754 1.1 christos if (W_TYPE_SIZE <= 32) \ 1755 1.1 christos { \ 1756 1.1 christos __a = __xr < ((UWtype)1<<2*__BITS4) \ 1757 1.1 christos ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \ 1758 1.1 christos : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ 1759 1.1 christos } \ 1760 1.1 christos else \ 1761 1.1 christos { \ 1762 1.1 christos for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ 1763 1.1 christos if (((__xr >> __a) & 0xff) != 0) \ 1764 1.1 christos break; \ 1765 1.1 christos } \ 1766 1.1 christos \ 1767 1.1 christos (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ 1768 1.1 christos } while (0) 1769 1.1 christos #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE 1770 1.1 christos #endif 1771 1.1 christos 1772 1.1 christos #if !defined (count_trailing_zeros) 1773 1.1 christos /* Define count_trailing_zeros using count_leading_zeros. The latter might be 1774 1.1 christos defined in asm, but if it is not, the C version above is good enough. */ 1775 1.1 christos #define count_trailing_zeros(count, x) \ 1776 1.1 christos do { \ 1777 1.1 christos UWtype __ctz_x = (x); \ 1778 1.1 christos UWtype __ctz_c; \ 1779 1.1 christos count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ 1780 1.1 christos (count) = W_TYPE_SIZE - 1 - __ctz_c; \ 1781 1.1 christos } while (0) 1782 1.1 christos #endif 1783 1.1 christos 1784 1.1 christos #ifndef UDIV_NEEDS_NORMALIZATION 1785 1.1 christos #define UDIV_NEEDS_NORMALIZATION 0 1786 1.1 christos #endif 1787