Home | History | Annotate | Line # | Download | only in include
      1   1.1  christos /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
      2  1.11  christos    Copyright (C) 1991-2024 Free Software Foundation, Inc.
      3   1.1  christos 
      4   1.1  christos    This file is part of the GNU C Library.
      5   1.1  christos 
      6   1.1  christos    The GNU C Library is free software; you can redistribute it and/or
      7   1.1  christos    modify it under the terms of the GNU Lesser General Public
      8   1.1  christos    License as published by the Free Software Foundation; either
      9   1.1  christos    version 2.1 of the License, or (at your option) any later version.
     10   1.1  christos 
     11   1.1  christos    In addition to the permissions in the GNU Lesser General Public
     12   1.1  christos    License, the Free Software Foundation gives you unlimited
     13   1.1  christos    permission to link the compiled version of this file into
     14   1.1  christos    combinations with other programs, and to distribute those
     15   1.1  christos    combinations without any restriction coming from the use of this
     16   1.1  christos    file.  (The Lesser General Public License restrictions do apply in
     17   1.1  christos    other respects; for example, they cover modification of the file,
     18   1.1  christos    and distribution when not linked into a combine executable.)
     19   1.1  christos 
     20   1.1  christos    The GNU C Library is distributed in the hope that it will be useful,
     21   1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     22   1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23   1.1  christos    Lesser General Public License for more details.
     24   1.1  christos 
     25   1.1  christos    You should have received a copy of the GNU Lesser General Public
     26   1.1  christos    License along with the GNU C Library; if not, see
     27   1.1  christos    <http://www.gnu.org/licenses/>.  */
     28   1.1  christos 
     29   1.1  christos /* You have to define the following before including this file:
     30   1.1  christos 
     31   1.1  christos    UWtype -- An unsigned type, default type for operations (typically a "word")
     32   1.1  christos    UHWtype -- An unsigned type, at least half the size of UWtype.
     33   1.1  christos    UDWtype -- An unsigned type, at least twice as large a UWtype
     34   1.1  christos    W_TYPE_SIZE -- size in bits of UWtype
     35   1.1  christos 
     36   1.1  christos    UQItype -- Unsigned 8 bit type.
     37   1.1  christos    SItype, USItype -- Signed and unsigned 32 bit types.
     38   1.1  christos    DItype, UDItype -- Signed and unsigned 64 bit types.
     39   1.1  christos 
     40   1.1  christos    On a 32 bit machine UWtype should typically be USItype;
     41   1.1  christos    on a 64 bit machine, UWtype should typically be UDItype.  */
     42   1.1  christos 
     43   1.1  christos #define __BITS4 (W_TYPE_SIZE / 4)
     44   1.1  christos #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
     45   1.1  christos #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
     46   1.1  christos #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
     47   1.1  christos 
     48   1.1  christos #ifndef W_TYPE_SIZE
     49   1.1  christos #define W_TYPE_SIZE	32
     50   1.1  christos #define UWtype		USItype
     51   1.1  christos #define UHWtype		USItype
     52   1.1  christos #define UDWtype		UDItype
     53   1.1  christos #endif
     54   1.1  christos 
     55   1.1  christos /* Used in glibc only.  */
     56   1.1  christos #ifndef attribute_hidden
     57   1.1  christos #define attribute_hidden
     58   1.1  christos #endif
     59   1.1  christos 
     60   1.1  christos extern const UQItype __clz_tab[256] attribute_hidden;
     61   1.1  christos 
     62   1.1  christos /* Define auxiliary asm macros.
     63   1.1  christos 
     64   1.1  christos    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
     65   1.1  christos    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
     66   1.1  christos    word product in HIGH_PROD and LOW_PROD.
     67   1.1  christos 
     68   1.1  christos    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
     69   1.1  christos    UDWtype product.  This is just a variant of umul_ppmm.
     70   1.1  christos 
     71   1.1  christos    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
     72   1.1  christos    denominator) divides a UDWtype, composed by the UWtype integers
     73   1.1  christos    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
     74   1.1  christos    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
     75   1.1  christos    than DENOMINATOR for correct operation.  If, in addition, the most
     76   1.1  christos    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
     77   1.1  christos    UDIV_NEEDS_NORMALIZATION is defined to 1.
     78   1.1  christos 
     79   1.1  christos    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
     80   1.1  christos    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
     81   1.1  christos    is rounded towards 0.
     82   1.1  christos 
     83   1.1  christos    5) count_leading_zeros(count, x) counts the number of zero-bits from the
     84   1.1  christos    msb to the first nonzero bit in the UWtype X.  This is the number of
     85   1.1  christos    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
     86   1.1  christos    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
     87   1.1  christos 
     88   1.1  christos    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
     89   1.1  christos    from the least significant end.
     90   1.1  christos 
     91   1.1  christos    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
     92   1.1  christos    high_addend_2, low_addend_2) adds two UWtype integers, composed by
     93   1.1  christos    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
     94   1.1  christos    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
     95   1.1  christos    (i.e. carry out) is not stored anywhere, and is lost.
     96   1.1  christos 
     97   1.1  christos    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
     98   1.1  christos    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
     99   1.1  christos    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
    100   1.1  christos    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
    101   1.1  christos    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
    102   1.1  christos    and is lost.
    103   1.1  christos 
    104   1.1  christos    If any of these macros are left undefined for a particular CPU,
    105   1.1  christos    C macros are used.  */
    106   1.1  christos 
    107   1.1  christos /* The CPUs come in alphabetical order below.
    108   1.1  christos 
    109   1.1  christos    Please add support for more CPUs here, or improve the current support
    110   1.1  christos    for the CPUs below!
    111   1.1  christos    (E.g. WE32100, IBM360.)  */
    112   1.1  christos 
    113   1.1  christos #if defined (__GNUC__) && !defined (NO_ASM)
    114   1.1  christos 
    115   1.1  christos /* We sometimes need to clobber "cc" with gcc2, but that would not be
    116   1.1  christos    understood by gcc1.  Use cpp to avoid major code duplication.  */
    117   1.1  christos #if __GNUC__ < 2
    118   1.1  christos #define __CLOBBER_CC
    119   1.1  christos #define __AND_CLOBBER_CC
    120   1.1  christos #else /* __GNUC__ >= 2 */
    121   1.1  christos #define __CLOBBER_CC : "cc"
    122   1.1  christos #define __AND_CLOBBER_CC , "cc"
    123   1.1  christos #endif /* __GNUC__ < 2 */
    124   1.1  christos 
    125   1.3  christos #if defined (__aarch64__)
    126   1.3  christos 
    127   1.3  christos #if W_TYPE_SIZE == 32
    128   1.3  christos #define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clz (X))
    129   1.3  christos #define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctz (X))
    130   1.3  christos #define COUNT_LEADING_ZEROS_0 32
    131   1.3  christos #endif /* W_TYPE_SIZE == 32 */
    132   1.3  christos 
    133   1.3  christos #if W_TYPE_SIZE == 64
    134   1.3  christos #define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clzll (X))
    135   1.3  christos #define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctzll (X))
    136   1.3  christos #define COUNT_LEADING_ZEROS_0 64
    137   1.3  christos #endif /* W_TYPE_SIZE == 64 */
    138   1.3  christos 
    139   1.3  christos #endif /* __aarch64__ */
    140   1.3  christos 
    141   1.1  christos #if defined (__alpha) && W_TYPE_SIZE == 64
    142   1.6  christos /* There is a bug in g++ before version 5 that
    143   1.6  christos    errors on __builtin_alpha_umulh.  */
    144   1.6  christos #if !defined(__cplusplus) || __GNUC__ >= 5
    145   1.1  christos #define umul_ppmm(ph, pl, m0, m1) \
    146   1.1  christos   do {									\
    147   1.1  christos     UDItype __m0 = (m0), __m1 = (m1);					\
    148   1.1  christos     (ph) = __builtin_alpha_umulh (__m0, __m1);				\
    149   1.1  christos     (pl) = __m0 * __m1;							\
    150   1.1  christos   } while (0)
    151   1.1  christos #define UMUL_TIME 46
    152   1.6  christos #endif /* !c++ */
    153   1.1  christos #ifndef LONGLONG_STANDALONE
    154   1.1  christos #define udiv_qrnnd(q, r, n1, n0, d) \
    155   1.1  christos   do { UDItype __r;							\
    156   1.1  christos     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
    157   1.1  christos     (r) = __r;								\
    158   1.1  christos   } while (0)
    159   1.1  christos extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
    160   1.1  christos #define UDIV_TIME 220
    161   1.1  christos #endif /* LONGLONG_STANDALONE */
    162   1.1  christos #ifdef __alpha_cix__
    163   1.1  christos #define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clzl (X))
    164   1.1  christos #define count_trailing_zeros(COUNT,X)	((COUNT) = __builtin_ctzl (X))
    165   1.1  christos #define COUNT_LEADING_ZEROS_0 64
    166   1.1  christos #else
    167   1.1  christos #define count_leading_zeros(COUNT,X) \
    168   1.1  christos   do {									\
    169   1.1  christos     UDItype __xr = (X), __t, __a;					\
    170   1.1  christos     __t = __builtin_alpha_cmpbge (0, __xr);				\
    171   1.1  christos     __a = __clz_tab[__t ^ 0xff] - 1;					\
    172   1.1  christos     __t = __builtin_alpha_extbl (__xr, __a);				\
    173   1.1  christos     (COUNT) = 64 - (__clz_tab[__t] + __a*8);				\
    174   1.1  christos   } while (0)
    175   1.1  christos #define count_trailing_zeros(COUNT,X) \
    176   1.1  christos   do {									\
    177   1.1  christos     UDItype __xr = (X), __t, __a;					\
    178   1.1  christos     __t = __builtin_alpha_cmpbge (0, __xr);				\
    179   1.1  christos     __t = ~__t & -~__t;							\
    180   1.1  christos     __a = ((__t & 0xCC) != 0) * 2;					\
    181   1.1  christos     __a += ((__t & 0xF0) != 0) * 4;					\
    182   1.1  christos     __a += ((__t & 0xAA) != 0);						\
    183   1.1  christos     __t = __builtin_alpha_extbl (__xr, __a);				\
    184   1.1  christos     __a <<= 3;								\
    185   1.1  christos     __t &= -__t;							\
    186   1.1  christos     __a += ((__t & 0xCC) != 0) * 2;					\
    187   1.1  christos     __a += ((__t & 0xF0) != 0) * 4;					\
    188   1.1  christos     __a += ((__t & 0xAA) != 0);						\
    189   1.1  christos     (COUNT) = __a;							\
    190   1.1  christos   } while (0)
    191   1.1  christos #endif /* __alpha_cix__ */
    192   1.1  christos #endif /* __alpha */
    193   1.1  christos 
    194   1.1  christos #if defined (__arc__) && W_TYPE_SIZE == 32
    195   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    196   1.1  christos   __asm__ ("add.f	%1, %4, %5\n\tadc	%0, %2, %3"		\
    197   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    198   1.1  christos 	     "=&r" ((USItype) (sl))					\
    199   1.1  christos 	   : "%r" ((USItype) (ah)),					\
    200   1.8  christos 	     "rICal" ((USItype) (bh)),					\
    201   1.1  christos 	     "%r" ((USItype) (al)),					\
    202   1.9  christos 	     "rICal" ((USItype) (bl))					\
    203   1.9  christos 	   : "cc")
    204   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    205   1.1  christos   __asm__ ("sub.f	%1, %4, %5\n\tsbc	%0, %2, %3"		\
    206   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    207   1.1  christos 	     "=&r" ((USItype) (sl))					\
    208   1.1  christos 	   : "r" ((USItype) (ah)),					\
    209   1.8  christos 	     "rICal" ((USItype) (bh)),					\
    210   1.1  christos 	     "r" ((USItype) (al)),					\
    211   1.9  christos 	     "rICal" ((USItype) (bl))					\
    212   1.9  christos 	   : "cc")
    213   1.1  christos 
    214   1.1  christos #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
    215   1.1  christos #ifdef __ARC_NORM__
    216   1.1  christos #define count_leading_zeros(count, x) \
    217   1.1  christos   do									\
    218   1.1  christos     {									\
    219   1.1  christos       SItype c_;							\
    220   1.1  christos 									\
    221   1.1  christos       __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
    222   1.1  christos       (count) = c_ + 1;							\
    223   1.1  christos     }									\
    224   1.1  christos   while (0)
    225   1.1  christos #define COUNT_LEADING_ZEROS_0 32
    226   1.8  christos #endif /* __ARC_NORM__ */
    227   1.8  christos #endif /* __arc__ */
    228   1.1  christos 
    229   1.1  christos #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
    230   1.1  christos  && W_TYPE_SIZE == 32
    231   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    232   1.1  christos   __asm__ ("adds	%1, %4, %5\n\tadc	%0, %2, %3"		\
    233   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    234   1.1  christos 	     "=&r" ((USItype) (sl))					\
    235   1.1  christos 	   : "%r" ((USItype) (ah)),					\
    236   1.1  christos 	     "rI" ((USItype) (bh)),					\
    237   1.1  christos 	     "%r" ((USItype) (al)),					\
    238   1.1  christos 	     "rI" ((USItype) (bl)) __CLOBBER_CC)
    239   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    240   1.1  christos   __asm__ ("subs	%1, %4, %5\n\tsbc	%0, %2, %3"		\
    241   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    242   1.1  christos 	     "=&r" ((USItype) (sl))					\
    243   1.1  christos 	   : "r" ((USItype) (ah)),					\
    244   1.1  christos 	     "rI" ((USItype) (bh)),					\
    245   1.1  christos 	     "r" ((USItype) (al)),					\
    246   1.1  christos 	     "rI" ((USItype) (bl)) __CLOBBER_CC)
    247   1.1  christos # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
    248   1.1  christos      || defined(__ARM_ARCH_3__)
    249   1.1  christos #  define umul_ppmm(xh, xl, a, b)					\
    250   1.1  christos   do {									\
    251   1.1  christos     register USItype __t0, __t1, __t2;					\
    252   1.1  christos     __asm__ ("%@ Inlined umul_ppmm\n"					\
    253   1.1  christos 	   "	mov	%2, %5, lsr #16\n"				\
    254   1.1  christos 	   "	mov	%0, %6, lsr #16\n"				\
    255   1.1  christos 	   "	bic	%3, %5, %2, lsl #16\n"				\
    256   1.1  christos 	   "	bic	%4, %6, %0, lsl #16\n"				\
    257   1.1  christos 	   "	mul	%1, %3, %4\n"					\
    258   1.1  christos 	   "	mul	%4, %2, %4\n"					\
    259   1.1  christos 	   "	mul	%3, %0, %3\n"					\
    260   1.1  christos 	   "	mul	%0, %2, %0\n"					\
    261   1.1  christos 	   "	adds	%3, %4, %3\n"					\
    262   1.1  christos 	   "	addcs	%0, %0, #65536\n"				\
    263   1.1  christos 	   "	adds	%1, %1, %3, lsl #16\n"				\
    264   1.1  christos 	   "	adc	%0, %0, %3, lsr #16"				\
    265   1.1  christos 	   : "=&r" ((USItype) (xh)),					\
    266   1.1  christos 	     "=r" ((USItype) (xl)),					\
    267   1.1  christos 	     "=&r" (__t0), "=&r" (__t1), "=r" (__t2)			\
    268   1.1  christos 	   : "r" ((USItype) (a)),					\
    269   1.1  christos 	     "r" ((USItype) (b)) __CLOBBER_CC );			\
    270   1.1  christos   } while (0)
    271   1.1  christos #  define UMUL_TIME 20
    272   1.1  christos # else
    273   1.1  christos #  define umul_ppmm(xh, xl, a, b)					\
    274   1.1  christos   do {									\
    275   1.1  christos     /* Generate umull, under compiler control.  */			\
    276   1.1  christos     register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);	\
    277   1.1  christos     (xl) = (USItype)__t0;						\
    278   1.1  christos     (xh) = (USItype)(__t0 >> 32);					\
    279   1.1  christos   } while (0)
    280   1.1  christos #  define UMUL_TIME 3
    281   1.1  christos # endif
    282   1.1  christos # define UDIV_TIME 100
    283   1.1  christos #endif /* __arm__ */
    284   1.1  christos 
    285   1.1  christos #if defined(__arm__)
    286   1.1  christos /* Let gcc decide how best to implement count_leading_zeros.  */
    287   1.1  christos #define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
    288   1.1  christos #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
    289   1.1  christos #define COUNT_LEADING_ZEROS_0 32
    290   1.1  christos #endif
    291   1.1  christos 
    292   1.1  christos #if defined (__AVR__)
    293   1.1  christos 
    294   1.1  christos #if W_TYPE_SIZE == 16
    295   1.1  christos #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
    296   1.1  christos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
    297   1.1  christos #define COUNT_LEADING_ZEROS_0 16
    298   1.1  christos #endif /* W_TYPE_SIZE == 16 */
    299   1.1  christos 
    300   1.1  christos #if W_TYPE_SIZE == 32
    301   1.1  christos #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
    302   1.1  christos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
    303   1.1  christos #define COUNT_LEADING_ZEROS_0 32
    304   1.1  christos #endif /* W_TYPE_SIZE == 32 */
    305   1.1  christos 
    306   1.1  christos #if W_TYPE_SIZE == 64
    307   1.1  christos #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
    308   1.1  christos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
    309   1.1  christos #define COUNT_LEADING_ZEROS_0 64
    310   1.1  christos #endif /* W_TYPE_SIZE == 64 */
    311   1.1  christos 
    312   1.1  christos #endif /* defined (__AVR__) */
    313   1.1  christos 
    314   1.1  christos #if defined (__CRIS__)
    315   1.1  christos 
    316   1.1  christos #if __CRIS_arch_version >= 3
    317   1.1  christos #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
    318   1.1  christos #define COUNT_LEADING_ZEROS_0 32
    319   1.1  christos #endif /* __CRIS_arch_version >= 3 */
    320   1.1  christos 
    321   1.1  christos #if __CRIS_arch_version >= 8
    322   1.1  christos #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
    323   1.1  christos #endif /* __CRIS_arch_version >= 8 */
    324   1.1  christos 
    325   1.1  christos #if __CRIS_arch_version >= 10
    326   1.1  christos #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
    327   1.1  christos #else
    328   1.1  christos #define __umulsidi3 __umulsidi3
    329   1.1  christos extern UDItype __umulsidi3 (USItype, USItype);
    330   1.1  christos #endif /* __CRIS_arch_version >= 10 */
    331   1.1  christos 
    332   1.1  christos #define umul_ppmm(w1, w0, u, v)		\
    333   1.1  christos   do {					\
    334   1.1  christos     UDItype __x = __umulsidi3 (u, v);	\
    335   1.1  christos     (w0) = (USItype) (__x);		\
    336   1.1  christos     (w1) = (USItype) (__x >> 32);	\
    337   1.1  christos   } while (0)
    338   1.1  christos 
    339   1.1  christos /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
    340   1.1  christos    DFmode ("double" intrinsics, avoiding two of the three insns handling
    341   1.1  christos    carry), but defining them as open-code C composing and doing the
    342   1.1  christos    operation in DImode (UDImode) shows that the DImode needs work:
    343   1.1  christos    register pressure from requiring neighboring registers and the
    344   1.1  christos    traffic to and from them come to dominate, in the 4.7 series.  */
    345   1.1  christos 
    346   1.1  christos #endif /* defined (__CRIS__) */
    347   1.1  christos 
    348   1.1  christos #if defined (__hppa) && W_TYPE_SIZE == 32
    349   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    350   1.1  christos   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"				\
    351   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    352   1.1  christos 	     "=&r" ((USItype) (sl))					\
    353   1.1  christos 	   : "%rM" ((USItype) (ah)),					\
    354   1.1  christos 	     "rM" ((USItype) (bh)),					\
    355   1.1  christos 	     "%rM" ((USItype) (al)),					\
    356   1.1  christos 	     "rM" ((USItype) (bl)))
    357   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    358   1.1  christos   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"				\
    359   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    360   1.1  christos 	     "=&r" ((USItype) (sl))					\
    361   1.1  christos 	   : "rM" ((USItype) (ah)),					\
    362   1.1  christos 	     "rM" ((USItype) (bh)),					\
    363   1.1  christos 	     "rM" ((USItype) (al)),					\
    364   1.1  christos 	     "rM" ((USItype) (bl)))
    365   1.1  christos #if defined (_PA_RISC1_1)
    366   1.1  christos #define umul_ppmm(w1, w0, u, v) \
    367   1.1  christos   do {									\
    368   1.1  christos     union								\
    369   1.1  christos       {									\
    370   1.1  christos 	UDItype __f;							\
    371   1.1  christos 	struct {USItype __w1, __w0;} __w1w0;				\
    372   1.1  christos       } __t;								\
    373   1.1  christos     __asm__ ("xmpyu %1,%2,%0"						\
    374   1.1  christos 	     : "=x" (__t.__f)						\
    375   1.1  christos 	     : "x" ((USItype) (u)),					\
    376   1.1  christos 	       "x" ((USItype) (v)));					\
    377   1.1  christos     (w1) = __t.__w1w0.__w1;						\
    378   1.1  christos     (w0) = __t.__w1w0.__w0;						\
    379   1.1  christos      } while (0)
    380   1.1  christos #define UMUL_TIME 8
    381   1.1  christos #else
    382   1.1  christos #define UMUL_TIME 30
    383   1.1  christos #endif
    384   1.1  christos #define UDIV_TIME 40
    385   1.1  christos #define count_leading_zeros(count, x) \
    386   1.1  christos   do {									\
    387   1.1  christos     USItype __tmp;							\
    388   1.1  christos     __asm__ (								\
    389   1.1  christos        "ldi		1,%0\n"						\
    390   1.1  christos "	extru,=		%1,15,16,%%r0		; Bits 31..16 zero?\n"	\
    391   1.1  christos "	extru,tr	%1,15,16,%1		; No.  Shift down, skip add.\n"\
    392   1.1  christos "	ldo		16(%0),%0		; Yes.  Perform add.\n"	\
    393   1.1  christos "	extru,=		%1,23,8,%%r0		; Bits 15..8 zero?\n"	\
    394   1.1  christos "	extru,tr	%1,23,8,%1		; No.  Shift down, skip add.\n"\
    395   1.1  christos "	ldo		8(%0),%0		; Yes.  Perform add.\n"	\
    396   1.1  christos "	extru,=		%1,27,4,%%r0		; Bits 7..4 zero?\n"	\
    397   1.1  christos "	extru,tr	%1,27,4,%1		; No.  Shift down, skip add.\n"\
    398   1.1  christos "	ldo		4(%0),%0		; Yes.  Perform add.\n"	\
    399   1.1  christos "	extru,=		%1,29,2,%%r0		; Bits 3..2 zero?\n"	\
    400   1.1  christos "	extru,tr	%1,29,2,%1		; No.  Shift down, skip add.\n"\
    401   1.1  christos "	ldo		2(%0),%0		; Yes.  Perform add.\n"	\
    402   1.1  christos "	extru		%1,30,1,%1		; Extract bit 1.\n"	\
    403   1.1  christos "	sub		%0,%1,%0		; Subtract it.\n"	\
    404   1.1  christos 	: "=r" (count), "=r" (__tmp) : "1" (x));			\
    405   1.1  christos   } while (0)
    406   1.1  christos #endif
    407   1.1  christos 
    408   1.1  christos #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
    409   1.1  christos #if !defined (__zarch__)
    410   1.1  christos #define smul_ppmm(xh, xl, m0, m1) \
    411   1.1  christos   do {									\
    412   1.1  christos     union {DItype __ll;							\
    413   1.1  christos 	   struct {USItype __h, __l;} __i;				\
    414   1.1  christos 	  } __x;							\
    415   1.1  christos     __asm__ ("lr %N0,%1\n\tmr %0,%2"					\
    416   1.1  christos 	     : "=&r" (__x.__ll)						\
    417   1.1  christos 	     : "r" (m0), "r" (m1));					\
    418   1.1  christos     (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
    419   1.1  christos   } while (0)
    420   1.1  christos #define sdiv_qrnnd(q, r, n1, n0, d) \
    421   1.1  christos   do {									\
    422   1.1  christos     union {DItype __ll;							\
    423   1.1  christos 	   struct {USItype __h, __l;} __i;				\
    424   1.1  christos 	  } __x;							\
    425   1.1  christos     __x.__i.__h = n1; __x.__i.__l = n0;					\
    426   1.1  christos     __asm__ ("dr %0,%2"							\
    427   1.1  christos 	     : "=r" (__x.__ll)						\
    428   1.1  christos 	     : "0" (__x.__ll), "r" (d));				\
    429   1.1  christos     (q) = __x.__i.__l; (r) = __x.__i.__h;				\
    430   1.1  christos   } while (0)
    431   1.1  christos #else
    432   1.1  christos #define smul_ppmm(xh, xl, m0, m1) \
    433   1.1  christos   do {                                                                  \
    434   1.1  christos     register SItype __r0 __asm__ ("0");					\
    435   1.1  christos     register SItype __r1 __asm__ ("1") = (m0);				\
    436   1.1  christos 									\
    437   1.1  christos     __asm__ ("mr\t%%r0,%3"                                              \
    438   1.1  christos 	     : "=r" (__r0), "=r" (__r1)					\
    439   1.1  christos 	     : "r"  (__r1),  "r" (m1));					\
    440   1.1  christos     (xh) = __r0; (xl) = __r1;						\
    441   1.1  christos   } while (0)
    442   1.1  christos 
    443   1.1  christos #define sdiv_qrnnd(q, r, n1, n0, d) \
    444   1.1  christos   do {									\
    445   1.1  christos     register SItype __r0 __asm__ ("0") = (n1);				\
    446   1.1  christos     register SItype __r1 __asm__ ("1") = (n0);				\
    447   1.1  christos 									\
    448   1.1  christos     __asm__ ("dr\t%%r0,%4"                                              \
    449   1.1  christos 	     : "=r" (__r0), "=r" (__r1)					\
    450   1.1  christos 	     : "r" (__r0), "r" (__r1), "r" (d));			\
    451   1.1  christos     (q) = __r1; (r) = __r0;						\
    452   1.1  christos   } while (0)
    453   1.1  christos #endif /* __zarch__ */
    454   1.1  christos #endif
    455   1.1  christos 
    456   1.1  christos #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
    457   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    458   1.1  christos   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"		\
    459   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    460   1.1  christos 	     "=&r" ((USItype) (sl))					\
    461   1.1  christos 	   : "%0" ((USItype) (ah)),					\
    462   1.1  christos 	     "g" ((USItype) (bh)),					\
    463   1.1  christos 	     "%1" ((USItype) (al)),					\
    464   1.1  christos 	     "g" ((USItype) (bl)))
    465   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    466   1.1  christos   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"		\
    467   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    468   1.1  christos 	     "=&r" ((USItype) (sl))					\
    469   1.1  christos 	   : "0" ((USItype) (ah)),					\
    470   1.1  christos 	     "g" ((USItype) (bh)),					\
    471   1.1  christos 	     "1" ((USItype) (al)),					\
    472   1.1  christos 	     "g" ((USItype) (bl)))
    473   1.1  christos #define umul_ppmm(w1, w0, u, v) \
    474   1.1  christos   __asm__ ("mul{l} %3"							\
    475   1.1  christos 	   : "=a" ((USItype) (w0)),					\
    476   1.1  christos 	     "=d" ((USItype) (w1))					\
    477   1.1  christos 	   : "%0" ((USItype) (u)),					\
    478   1.1  christos 	     "rm" ((USItype) (v)))
    479   1.1  christos #define udiv_qrnnd(q, r, n1, n0, dv) \
    480   1.1  christos   __asm__ ("div{l} %4"							\
    481   1.1  christos 	   : "=a" ((USItype) (q)),					\
    482   1.1  christos 	     "=d" ((USItype) (r))					\
    483   1.1  christos 	   : "0" ((USItype) (n0)),					\
    484   1.1  christos 	     "1" ((USItype) (n1)),					\
    485   1.1  christos 	     "rm" ((USItype) (dv)))
    486   1.1  christos #define count_leading_zeros(count, x)	((count) = __builtin_clz (x))
    487   1.1  christos #define count_trailing_zeros(count, x)	((count) = __builtin_ctz (x))
    488   1.1  christos #define UMUL_TIME 40
    489   1.1  christos #define UDIV_TIME 40
    490   1.1  christos #endif /* 80x86 */
    491   1.1  christos 
    492   1.3  christos #if defined (__x86_64__) && W_TYPE_SIZE == 64
    493   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    494   1.1  christos   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"		\
    495   1.1  christos 	   : "=r" ((UDItype) (sh)),					\
    496   1.1  christos 	     "=&r" ((UDItype) (sl))					\
    497   1.1  christos 	   : "%0" ((UDItype) (ah)),					\
    498   1.1  christos 	     "rme" ((UDItype) (bh)),					\
    499   1.1  christos 	     "%1" ((UDItype) (al)),					\
    500   1.1  christos 	     "rme" ((UDItype) (bl)))
    501   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    502   1.1  christos   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"		\
    503   1.1  christos 	   : "=r" ((UDItype) (sh)),					\
    504   1.1  christos 	     "=&r" ((UDItype) (sl))					\
    505   1.1  christos 	   : "0" ((UDItype) (ah)),					\
    506   1.1  christos 	     "rme" ((UDItype) (bh)),					\
    507   1.1  christos 	     "1" ((UDItype) (al)),					\
    508   1.1  christos 	     "rme" ((UDItype) (bl)))
    509   1.1  christos #define umul_ppmm(w1, w0, u, v) \
    510   1.1  christos   __asm__ ("mul{q} %3"							\
    511   1.1  christos 	   : "=a" ((UDItype) (w0)),					\
    512   1.1  christos 	     "=d" ((UDItype) (w1))					\
    513   1.1  christos 	   : "%0" ((UDItype) (u)),					\
    514   1.1  christos 	     "rm" ((UDItype) (v)))
    515   1.1  christos #define udiv_qrnnd(q, r, n1, n0, dv) \
    516   1.1  christos   __asm__ ("div{q} %4"							\
    517   1.1  christos 	   : "=a" ((UDItype) (q)),					\
    518   1.1  christos 	     "=d" ((UDItype) (r))					\
    519   1.1  christos 	   : "0" ((UDItype) (n0)),					\
    520   1.1  christos 	     "1" ((UDItype) (n1)),					\
    521   1.1  christos 	     "rm" ((UDItype) (dv)))
    522   1.1  christos #define count_leading_zeros(count, x)	((count) = __builtin_clzll (x))
    523   1.1  christos #define count_trailing_zeros(count, x)	((count) = __builtin_ctzll (x))
    524   1.1  christos #define UMUL_TIME 40
    525   1.1  christos #define UDIV_TIME 40
    526   1.1  christos #endif /* x86_64 */
    527   1.1  christos 
    528   1.1  christos #if defined (__i960__) && W_TYPE_SIZE == 32
    529   1.1  christos #define umul_ppmm(w1, w0, u, v) \
    530   1.1  christos   ({union {UDItype __ll;						\
    531   1.1  christos 	   struct {USItype __l, __h;} __i;				\
    532   1.1  christos 	  } __xx;							\
    533   1.1  christos   __asm__ ("emul	%2,%1,%0"					\
    534   1.1  christos 	   : "=d" (__xx.__ll)						\
    535   1.1  christos 	   : "%dI" ((USItype) (u)),					\
    536   1.1  christos 	     "dI" ((USItype) (v)));					\
    537   1.1  christos   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
    538   1.1  christos #define __umulsidi3(u, v) \
    539   1.1  christos   ({UDItype __w;							\
    540   1.1  christos     __asm__ ("emul	%2,%1,%0"					\
    541   1.1  christos 	     : "=d" (__w)						\
    542   1.1  christos 	     : "%dI" ((USItype) (u)),					\
    543   1.1  christos 	       "dI" ((USItype) (v)));					\
    544   1.1  christos     __w; })
    545   1.1  christos #endif /* __i960__ */
    546   1.1  christos 
    547   1.1  christos #if defined (__ia64) && W_TYPE_SIZE == 64
    548   1.1  christos /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
    549   1.1  christos    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
    550   1.1  christos    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
    551   1.1  christos    register, which takes an extra cycle.  */
    552   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
    553   1.1  christos   do {									\
    554   1.1  christos     UWtype __x;								\
    555   1.1  christos     __x = (al) - (bl);							\
    556   1.1  christos     if ((al) < (bl))							\
    557   1.1  christos       (sh) = (ah) - (bh) - 1;						\
    558   1.1  christos     else								\
    559   1.1  christos       (sh) = (ah) - (bh);						\
    560   1.1  christos     (sl) = __x;								\
    561   1.1  christos   } while (0)
    562   1.1  christos 
    563   1.1  christos /* Do both product parts in assembly, since that gives better code with
    564   1.1  christos    all gcc versions.  Some callers will just use the upper part, and in
    565   1.1  christos    that situation we waste an instruction, but not any cycles.  */
    566   1.1  christos #define umul_ppmm(ph, pl, m0, m1)					\
    567   1.1  christos   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"		\
    568   1.1  christos 	   : "=&f" (ph), "=f" (pl)					\
    569   1.1  christos 	   : "f" (m0), "f" (m1))
    570   1.1  christos #define count_leading_zeros(count, x)					\
    571   1.1  christos   do {									\
    572   1.1  christos     UWtype _x = (x), _y, _a, _c;					\
    573   1.1  christos     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));		\
    574   1.1  christos     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));		\
    575   1.1  christos     _c = (_a - 1) << 3;							\
    576   1.1  christos     _x >>= _c;								\
    577   1.1  christos     if (_x >= 1 << 4)							\
    578   1.1  christos       _x >>= 4, _c += 4;						\
    579   1.1  christos     if (_x >= 1 << 2)							\
    580   1.1  christos       _x >>= 2, _c += 2;						\
    581   1.1  christos     _c += _x >> 1;							\
    582   1.1  christos     (count) =  W_TYPE_SIZE - 1 - _c;					\
    583   1.1  christos   } while (0)
    584   1.1  christos /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
    585   1.1  christos    based, and we don't need a special case for x==0 here */
    586   1.1  christos #define count_trailing_zeros(count, x)					\
    587   1.1  christos   do {									\
    588   1.1  christos     UWtype __ctz_x = (x);						\
    589   1.1  christos     __asm__ ("popcnt %0 = %1"						\
    590   1.1  christos 	     : "=r" (count)						\
    591   1.1  christos 	     : "r" ((__ctz_x-1) & ~__ctz_x));				\
    592   1.1  christos   } while (0)
    593   1.1  christos #define UMUL_TIME 14
    594   1.1  christos #endif
    595   1.1  christos 
    596  1.11  christos #ifdef __loongarch__
    597  1.11  christos # if W_TYPE_SIZE == 32
    598  1.11  christos #  define count_leading_zeros(count, x)  ((count) = __builtin_clz (x))
    599  1.11  christos #  define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
    600  1.11  christos #  define COUNT_LEADING_ZEROS_0 32
    601  1.11  christos # elif W_TYPE_SIZE == 64
    602  1.11  christos #  define count_leading_zeros(count, x)  ((count) = __builtin_clzll (x))
    603  1.11  christos #  define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
    604  1.11  christos #  define COUNT_LEADING_ZEROS_0 64
    605  1.11  christos # endif
    606  1.11  christos #endif
    607  1.11  christos 
    608   1.1  christos #if defined (__M32R__) && W_TYPE_SIZE == 32
    609   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    610   1.1  christos   /* The cmp clears the condition bit.  */ \
    611   1.1  christos   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"			\
    612   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    613   1.1  christos 	     "=&r" ((USItype) (sl))					\
    614   1.1  christos 	   : "0" ((USItype) (ah)),					\
    615   1.1  christos 	     "r" ((USItype) (bh)),					\
    616   1.1  christos 	     "1" ((USItype) (al)),					\
    617   1.1  christos 	     "r" ((USItype) (bl))					\
    618   1.1  christos 	   : "cbit")
    619   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    620   1.1  christos   /* The cmp clears the condition bit.  */ \
    621   1.1  christos   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"			\
    622   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    623   1.1  christos 	     "=&r" ((USItype) (sl))					\
    624   1.1  christos 	   : "0" ((USItype) (ah)),					\
    625   1.1  christos 	     "r" ((USItype) (bh)),					\
    626   1.1  christos 	     "1" ((USItype) (al)),					\
    627   1.1  christos 	     "r" ((USItype) (bl))					\
    628   1.1  christos 	   : "cbit")
    629   1.1  christos #endif /* __M32R__ */
    630   1.1  christos 
    631   1.1  christos #if defined (__mc68000__) && W_TYPE_SIZE == 32
    632   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    633   1.1  christos   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"				\
    634   1.1  christos 	   : "=d" ((USItype) (sh)),					\
    635   1.1  christos 	     "=&d" ((USItype) (sl))					\
    636   1.1  christos 	   : "%0" ((USItype) (ah)),					\
    637   1.1  christos 	     "d" ((USItype) (bh)),					\
    638   1.1  christos 	     "%1" ((USItype) (al)),					\
    639   1.1  christos 	     "g" ((USItype) (bl)))
    640   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    641   1.1  christos   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"				\
    642   1.1  christos 	   : "=d" ((USItype) (sh)),					\
    643   1.1  christos 	     "=&d" ((USItype) (sl))					\
    644   1.1  christos 	   : "0" ((USItype) (ah)),					\
    645   1.1  christos 	     "d" ((USItype) (bh)),					\
    646   1.1  christos 	     "1" ((USItype) (al)),					\
    647   1.1  christos 	     "g" ((USItype) (bl)))
    648   1.1  christos 
    649   1.1  christos /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
    650   1.1  christos #if (defined (__mc68020__) && !defined (__mc68060__))
    651   1.1  christos #define umul_ppmm(w1, w0, u, v) \
    652   1.1  christos   __asm__ ("mulu%.l %3,%1:%0"						\
    653   1.1  christos 	   : "=d" ((USItype) (w0)),					\
    654   1.1  christos 	     "=d" ((USItype) (w1))					\
    655   1.1  christos 	   : "%0" ((USItype) (u)),					\
    656   1.1  christos 	     "dmi" ((USItype) (v)))
    657   1.1  christos #define UMUL_TIME 45
    658   1.1  christos #define udiv_qrnnd(q, r, n1, n0, d) \
    659   1.1  christos   __asm__ ("divu%.l %4,%1:%0"						\
    660   1.1  christos 	   : "=d" ((USItype) (q)),					\
    661   1.1  christos 	     "=d" ((USItype) (r))					\
    662   1.1  christos 	   : "0" ((USItype) (n0)),					\
    663   1.1  christos 	     "1" ((USItype) (n1)),					\
    664   1.1  christos 	     "dmi" ((USItype) (d)))
    665   1.1  christos #define UDIV_TIME 90
    666   1.1  christos #define sdiv_qrnnd(q, r, n1, n0, d) \
    667   1.1  christos   __asm__ ("divs%.l %4,%1:%0"						\
    668   1.1  christos 	   : "=d" ((USItype) (q)),					\
    669   1.1  christos 	     "=d" ((USItype) (r))					\
    670   1.1  christos 	   : "0" ((USItype) (n0)),					\
    671   1.1  christos 	     "1" ((USItype) (n1)),					\
    672   1.1  christos 	     "dmi" ((USItype) (d)))
    673   1.1  christos 
    674   1.1  christos #elif defined (__mcoldfire__) /* not mc68020 */
    675   1.1  christos 
    676   1.1  christos #define umul_ppmm(xh, xl, a, b) \
    677   1.1  christos   __asm__ ("| Inlined umul_ppmm\n"					\
    678   1.1  christos 	   "	move%.l	%2,%/d0\n"					\
    679   1.1  christos 	   "	move%.l	%3,%/d1\n"					\
    680   1.1  christos 	   "	move%.l	%/d0,%/d2\n"					\
    681   1.1  christos 	   "	swap	%/d0\n"						\
    682   1.1  christos 	   "	move%.l	%/d1,%/d3\n"					\
    683   1.1  christos 	   "	swap	%/d1\n"						\
    684   1.1  christos 	   "	move%.w	%/d2,%/d4\n"					\
    685   1.1  christos 	   "	mulu	%/d3,%/d4\n"					\
    686   1.1  christos 	   "	mulu	%/d1,%/d2\n"					\
    687   1.1  christos 	   "	mulu	%/d0,%/d3\n"					\
    688   1.1  christos 	   "	mulu	%/d0,%/d1\n"					\
    689   1.1  christos 	   "	move%.l	%/d4,%/d0\n"					\
    690   1.1  christos 	   "	clr%.w	%/d0\n"						\
    691   1.1  christos 	   "	swap	%/d0\n"						\
    692   1.1  christos 	   "	add%.l	%/d0,%/d2\n"					\
    693   1.1  christos 	   "	add%.l	%/d3,%/d2\n"					\
    694   1.1  christos 	   "	jcc	1f\n"						\
    695   1.1  christos 	   "	add%.l	%#65536,%/d1\n"					\
    696   1.1  christos 	   "1:	swap	%/d2\n"						\
    697   1.1  christos 	   "	moveq	%#0,%/d0\n"					\
    698   1.1  christos 	   "	move%.w	%/d2,%/d0\n"					\
    699   1.1  christos 	   "	move%.w	%/d4,%/d2\n"					\
    700   1.1  christos 	   "	move%.l	%/d2,%1\n"					\
    701   1.1  christos 	   "	add%.l	%/d1,%/d0\n"					\
    702   1.1  christos 	   "	move%.l	%/d0,%0"					\
    703   1.1  christos 	   : "=g" ((USItype) (xh)),					\
    704   1.1  christos 	     "=g" ((USItype) (xl))					\
    705   1.1  christos 	   : "g" ((USItype) (a)),					\
    706   1.1  christos 	     "g" ((USItype) (b))					\
    707   1.1  christos 	   : "d0", "d1", "d2", "d3", "d4")
    708   1.1  christos #define UMUL_TIME 100
    709   1.1  christos #define UDIV_TIME 400
    710   1.1  christos #else /* not ColdFire */
    711   1.1  christos /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
    712   1.1  christos #define umul_ppmm(xh, xl, a, b) \
    713   1.1  christos   __asm__ ("| Inlined umul_ppmm\n"					\
    714   1.1  christos 	   "	move%.l	%2,%/d0\n"					\
    715   1.1  christos 	   "	move%.l	%3,%/d1\n"					\
    716   1.1  christos 	   "	move%.l	%/d0,%/d2\n"					\
    717   1.1  christos 	   "	swap	%/d0\n"						\
    718   1.1  christos 	   "	move%.l	%/d1,%/d3\n"					\
    719   1.1  christos 	   "	swap	%/d1\n"						\
    720   1.1  christos 	   "	move%.w	%/d2,%/d4\n"					\
    721   1.1  christos 	   "	mulu	%/d3,%/d4\n"					\
    722   1.1  christos 	   "	mulu	%/d1,%/d2\n"					\
    723   1.1  christos 	   "	mulu	%/d0,%/d3\n"					\
    724   1.1  christos 	   "	mulu	%/d0,%/d1\n"					\
    725   1.1  christos 	   "	move%.l	%/d4,%/d0\n"					\
    726   1.1  christos 	   "	eor%.w	%/d0,%/d0\n"					\
    727   1.1  christos 	   "	swap	%/d0\n"						\
    728   1.1  christos 	   "	add%.l	%/d0,%/d2\n"					\
    729   1.1  christos 	   "	add%.l	%/d3,%/d2\n"					\
    730   1.1  christos 	   "	jcc	1f\n"						\
    731   1.1  christos 	   "	add%.l	%#65536,%/d1\n"					\
    732   1.1  christos 	   "1:	swap	%/d2\n"						\
    733   1.1  christos 	   "	moveq	%#0,%/d0\n"					\
    734   1.1  christos 	   "	move%.w	%/d2,%/d0\n"					\
    735   1.1  christos 	   "	move%.w	%/d4,%/d2\n"					\
    736   1.1  christos 	   "	move%.l	%/d2,%1\n"					\
    737   1.1  christos 	   "	add%.l	%/d1,%/d0\n"					\
    738   1.1  christos 	   "	move%.l	%/d0,%0"					\
    739   1.1  christos 	   : "=g" ((USItype) (xh)),					\
    740   1.1  christos 	     "=g" ((USItype) (xl))					\
    741   1.1  christos 	   : "g" ((USItype) (a)),					\
    742   1.1  christos 	     "g" ((USItype) (b))					\
    743   1.1  christos 	   : "d0", "d1", "d2", "d3", "d4")
    744   1.1  christos #define UMUL_TIME 100
    745   1.1  christos #define UDIV_TIME 400
    746   1.1  christos 
    747   1.1  christos #endif /* not mc68020 */
    748   1.1  christos 
    749   1.1  christos /* The '020, '030, '040 and '060 have bitfield insns.
    750   1.1  christos    cpu32 disguises as a 68020, but lacks them.  */
    751   1.1  christos #if defined (__mc68020__) && !defined (__mcpu32__)
    752   1.1  christos #define count_leading_zeros(count, x) \
    753   1.1  christos   __asm__ ("bfffo %1{%b2:%b2},%0"					\
    754   1.1  christos 	   : "=d" ((USItype) (count))					\
    755   1.1  christos 	   : "od" ((USItype) (x)), "n" (0))
    756   1.1  christos /* Some ColdFire architectures have a ff1 instruction supported via
    757   1.1  christos    __builtin_clz. */
    758   1.1  christos #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
    759   1.1  christos #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
    760   1.1  christos #define COUNT_LEADING_ZEROS_0 32
    761   1.1  christos #endif
    762   1.1  christos #endif /* mc68000 */
    763   1.1  christos 
    764   1.1  christos #if defined (__m88000__) && W_TYPE_SIZE == 32
    765   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    766   1.1  christos   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"			\
    767   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    768   1.1  christos 	     "=&r" ((USItype) (sl))					\
    769   1.1  christos 	   : "%rJ" ((USItype) (ah)),					\
    770   1.1  christos 	     "rJ" ((USItype) (bh)),					\
    771   1.1  christos 	     "%rJ" ((USItype) (al)),					\
    772   1.1  christos 	     "rJ" ((USItype) (bl)))
    773   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    774   1.1  christos   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"			\
    775   1.1  christos 	   : "=r" ((USItype) (sh)),					\
    776   1.1  christos 	     "=&r" ((USItype) (sl))					\
    777   1.1  christos 	   : "rJ" ((USItype) (ah)),					\
    778   1.1  christos 	     "rJ" ((USItype) (bh)),					\
    779   1.1  christos 	     "rJ" ((USItype) (al)),					\
    780   1.1  christos 	     "rJ" ((USItype) (bl)))
    781   1.1  christos #define count_leading_zeros(count, x) \
    782   1.1  christos   do {									\
    783   1.1  christos     USItype __cbtmp;							\
    784   1.1  christos     __asm__ ("ff1 %0,%1"						\
    785   1.1  christos 	     : "=r" (__cbtmp)						\
    786   1.1  christos 	     : "r" ((USItype) (x)));					\
    787   1.1  christos     (count) = __cbtmp ^ 31;						\
    788   1.1  christos   } while (0)
    789   1.1  christos #define COUNT_LEADING_ZEROS_0 63 /* sic */
    790   1.1  christos #if defined (__mc88110__)
    791   1.1  christos #define umul_ppmm(wh, wl, u, v) \
    792   1.1  christos   do {									\
    793   1.1  christos     union {UDItype __ll;						\
    794   1.1  christos 	   struct {USItype __h, __l;} __i;				\
    795   1.1  christos 	  } __xx;							\
    796   1.1  christos     __asm__ ("mulu.d	%0,%1,%2"					\
    797   1.1  christos 	     : "=r" (__xx.__ll)						\
    798   1.1  christos 	     : "r" ((USItype) (u)),					\
    799   1.1  christos 	       "r" ((USItype) (v)));					\
    800   1.1  christos     (wh) = __xx.__i.__h;						\
    801   1.1  christos     (wl) = __xx.__i.__l;						\
    802   1.1  christos   } while (0)
    803   1.1  christos #define udiv_qrnnd(q, r, n1, n0, d) \
    804   1.1  christos   ({union {UDItype __ll;						\
    805   1.1  christos 	   struct {USItype __h, __l;} __i;				\
    806   1.1  christos 	  } __xx;							\
    807   1.1  christos   USItype __q;								\
    808   1.1  christos   __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
    809   1.1  christos   __asm__ ("divu.d %0,%1,%2"						\
    810   1.1  christos 	   : "=r" (__q)							\
    811   1.1  christos 	   : "r" (__xx.__ll),						\
    812   1.1  christos 	     "r" ((USItype) (d)));					\
    813   1.1  christos   (r) = (n0) - __q * (d); (q) = __q; })
    814   1.1  christos #define UMUL_TIME 5
    815   1.1  christos #define UDIV_TIME 25
    816   1.1  christos #else
    817   1.1  christos #define UMUL_TIME 17
    818   1.1  christos #define UDIV_TIME 150
    819   1.1  christos #endif /* __mc88110__ */
    820   1.1  christos #endif /* __m88000__ */
    821   1.1  christos 
    822   1.1  christos #if defined (__mn10300__)
    823   1.1  christos # if defined (__AM33__)
    824   1.1  christos #  define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
    825   1.1  christos #  define umul_ppmm(w1, w0, u, v)		\
    826   1.1  christos     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
    827   1.1  christos #  define smul_ppmm(w1, w0, u, v)		\
    828   1.1  christos     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
    829   1.1  christos # else
    830   1.1  christos #  define umul_ppmm(w1, w0, u, v)		\
    831   1.1  christos     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
    832   1.1  christos #  define smul_ppmm(w1, w0, u, v)		\
    833   1.1  christos     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
    834   1.1  christos # endif
    835   1.1  christos # define add_ssaaaa(sh, sl, ah, al, bh, bl)	\
    836   1.1  christos   do {						\
    837   1.1  christos     DWunion __s, __a, __b;			\
    838   1.1  christos     __a.s.low = (al); __a.s.high = (ah);	\
    839   1.1  christos     __b.s.low = (bl); __b.s.high = (bh);	\
    840   1.1  christos     __s.ll = __a.ll + __b.ll;			\
    841   1.1  christos     (sl) = __s.s.low; (sh) = __s.s.high;	\
    842   1.1  christos   } while (0)
    843   1.1  christos # define sub_ddmmss(sh, sl, ah, al, bh, bl)	\
    844   1.1  christos   do {						\
    845   1.1  christos     DWunion __s, __a, __b;			\
    846   1.1  christos     __a.s.low = (al); __a.s.high = (ah);	\
    847   1.1  christos     __b.s.low = (bl); __b.s.high = (bh);	\
    848   1.1  christos     __s.ll = __a.ll - __b.ll;			\
    849   1.1  christos     (sl) = __s.s.low; (sh) = __s.s.high;	\
    850   1.1  christos   } while (0)
    851   1.1  christos # define udiv_qrnnd(q, r, nh, nl, d)		\
    852   1.1  christos   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
    853   1.1  christos # define sdiv_qrnnd(q, r, nh, nl, d)		\
    854   1.1  christos   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
    855   1.1  christos # define UMUL_TIME 3
    856   1.1  christos # define UDIV_TIME 38
    857   1.1  christos #endif
    858   1.1  christos 
    859   1.1  christos #if defined (__mips__) && W_TYPE_SIZE == 32
    860   1.1  christos #define umul_ppmm(w1, w0, u, v)						\
    861   1.1  christos   do {									\
    862   1.1  christos     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);		\
    863   1.1  christos     (w1) = (USItype) (__x >> 32);					\
    864   1.1  christos     (w0) = (USItype) (__x);						\
    865   1.1  christos   } while (0)
    866   1.1  christos #define UMUL_TIME 10
    867   1.1  christos #define UDIV_TIME 100
    868   1.1  christos 
    869   1.3  christos #if (__mips == 32 || __mips == 64) && ! defined (__mips16)
    870   1.1  christos #define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
    871   1.1  christos #define COUNT_LEADING_ZEROS_0 32
    872   1.1  christos #endif
    873   1.1  christos #endif /* __mips__ */
    874   1.1  christos 
    875   1.1  christos /* FIXME: We should test _IBMR2 here when we add assembly support for the
    876   1.1  christos    system vendor compilers.
    877   1.1  christos    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
    878   1.1  christos    enough, since that hits ARM and m68k too.  */
    879   1.1  christos #if (defined (_ARCH_PPC)	/* AIX */				\
    880   1.1  christos      || defined (__powerpc__)	/* gcc */				\
    881   1.1  christos      || defined (__POWERPC__)	/* BEOS */				\
    882   1.1  christos      || defined (__ppc__)	/* Darwin */				\
    883   1.1  christos      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
    884   1.1  christos      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
    885   1.1  christos 	 && CPU_FAMILY == PPC)                                                \
    886   1.1  christos      ) && W_TYPE_SIZE == 32
    887   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    888   1.1  christos   do {									\
    889   1.1  christos     if (__builtin_constant_p (bh) && (bh) == 0)				\
    890   1.1  christos       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"		\
    891   1.1  christos 	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
    892   1.1  christos     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
    893   1.1  christos       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"		\
    894   1.1  christos 	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
    895   1.1  christos     else								\
    896   1.1  christos       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"		\
    897   1.1  christos 	     : "=r" (sh), "=&r" (sl)					\
    898   1.1  christos 	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
    899   1.1  christos   } while (0)
    900   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    901   1.1  christos   do {									\
    902   1.1  christos     if (__builtin_constant_p (ah) && (ah) == 0)				\
    903   1.1  christos       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"	\
    904   1.1  christos 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
    905   1.1  christos     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
    906   1.1  christos       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"	\
    907   1.1  christos 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
    908   1.1  christos     else if (__builtin_constant_p (bh) && (bh) == 0)			\
    909   1.1  christos       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"		\
    910   1.1  christos 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
    911   1.1  christos     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
    912   1.1  christos       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"		\
    913   1.1  christos 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
    914   1.1  christos     else								\
    915   1.1  christos       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"	\
    916   1.1  christos 	       : "=r" (sh), "=&r" (sl)					\
    917   1.1  christos 	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
    918   1.1  christos   } while (0)
    919   1.1  christos #define count_leading_zeros(count, x) \
    920   1.1  christos   __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
    921   1.1  christos #define COUNT_LEADING_ZEROS_0 32
    922   1.1  christos #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
    923   1.1  christos   || defined (__ppc__)                                                    \
    924   1.1  christos   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
    925   1.1  christos   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
    926   1.1  christos 	 && CPU_FAMILY == PPC)
    927   1.1  christos #define umul_ppmm(ph, pl, m0, m1) \
    928   1.1  christos   do {									\
    929   1.1  christos     USItype __m0 = (m0), __m1 = (m1);					\
    930   1.1  christos     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
    931   1.1  christos     (pl) = __m0 * __m1;							\
    932   1.1  christos   } while (0)
    933   1.1  christos #define UMUL_TIME 15
    934   1.1  christos #define smul_ppmm(ph, pl, m0, m1) \
    935   1.1  christos   do {									\
    936   1.1  christos     SItype __m0 = (m0), __m1 = (m1);					\
    937   1.1  christos     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
    938   1.1  christos     (pl) = __m0 * __m1;							\
    939   1.1  christos   } while (0)
    940   1.1  christos #define SMUL_TIME 14
    941   1.1  christos #define UDIV_TIME 120
    942   1.1  christos #endif
    943   1.1  christos #endif /* 32-bit POWER architecture variants.  */
    944   1.1  christos 
    945   1.1  christos /* We should test _IBMR2 here when we add assembly support for the system
    946   1.1  christos    vendor compilers.  */
    947   1.1  christos #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
    948   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    949   1.1  christos   do {									\
    950   1.1  christos     if (__builtin_constant_p (bh) && (bh) == 0)				\
    951   1.1  christos       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"		\
    952   1.1  christos 	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
    953   1.1  christos     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
    954   1.1  christos       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"		\
    955   1.1  christos 	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
    956   1.1  christos     else								\
    957   1.1  christos       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"		\
    958   1.1  christos 	     : "=r" (sh), "=&r" (sl)					\
    959   1.1  christos 	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
    960   1.1  christos   } while (0)
    961   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    962   1.1  christos   do {									\
    963   1.1  christos     if (__builtin_constant_p (ah) && (ah) == 0)				\
    964   1.1  christos       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"	\
    965   1.1  christos 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
    966   1.1  christos     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)		\
    967   1.1  christos       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"	\
    968   1.1  christos 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
    969   1.1  christos     else if (__builtin_constant_p (bh) && (bh) == 0)			\
    970   1.1  christos       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"		\
    971   1.1  christos 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
    972   1.1  christos     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
    973   1.1  christos       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"		\
    974   1.1  christos 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
    975   1.1  christos     else								\
    976   1.1  christos       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"	\
    977   1.1  christos 	       : "=r" (sh), "=&r" (sl)					\
    978   1.1  christos 	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
    979   1.1  christos   } while (0)
    980   1.1  christos #define count_leading_zeros(count, x) \
    981   1.1  christos   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
    982   1.1  christos #define COUNT_LEADING_ZEROS_0 64
    983   1.1  christos #define umul_ppmm(ph, pl, m0, m1) \
    984   1.1  christos   do {									\
    985   1.1  christos     UDItype __m0 = (m0), __m1 = (m1);					\
    986   1.1  christos     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
    987   1.1  christos     (pl) = __m0 * __m1;							\
    988   1.1  christos   } while (0)
    989   1.1  christos #define UMUL_TIME 15
    990   1.1  christos #define smul_ppmm(ph, pl, m0, m1) \
    991   1.1  christos   do {									\
    992   1.1  christos     DItype __m0 = (m0), __m1 = (m1);					\
    993   1.1  christos     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
    994   1.1  christos     (pl) = __m0 * __m1;							\
    995   1.1  christos   } while (0)
    996   1.1  christos #define SMUL_TIME 14  /* ??? */
    997   1.1  christos #define UDIV_TIME 120 /* ??? */
    998   1.1  christos #endif /* 64-bit PowerPC.  */
    999   1.1  christos 
   1000   1.1  christos #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
   1001   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   1002   1.1  christos   __asm__ ("a %1,%5\n\tae %0,%3"					\
   1003   1.1  christos 	   : "=r" ((USItype) (sh)),					\
   1004   1.1  christos 	     "=&r" ((USItype) (sl))					\
   1005   1.1  christos 	   : "%0" ((USItype) (ah)),					\
   1006   1.1  christos 	     "r" ((USItype) (bh)),					\
   1007   1.1  christos 	     "%1" ((USItype) (al)),					\
   1008   1.1  christos 	     "r" ((USItype) (bl)))
   1009   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   1010   1.1  christos   __asm__ ("s %1,%5\n\tse %0,%3"					\
   1011   1.1  christos 	   : "=r" ((USItype) (sh)),					\
   1012   1.1  christos 	     "=&r" ((USItype) (sl))					\
   1013   1.1  christos 	   : "0" ((USItype) (ah)),					\
   1014   1.1  christos 	     "r" ((USItype) (bh)),					\
   1015   1.1  christos 	     "1" ((USItype) (al)),					\
   1016   1.1  christos 	     "r" ((USItype) (bl)))
   1017   1.1  christos #define umul_ppmm(ph, pl, m0, m1) \
   1018   1.1  christos   do {									\
   1019   1.1  christos     USItype __m0 = (m0), __m1 = (m1);					\
   1020   1.1  christos     __asm__ (								\
   1021   1.1  christos        "s	r2,r2\n"						\
   1022   1.1  christos "	mts	r10,%2\n"						\
   1023   1.1  christos "	m	r2,%3\n"						\
   1024   1.1  christos "	m	r2,%3\n"						\
   1025   1.1  christos "	m	r2,%3\n"						\
   1026   1.1  christos "	m	r2,%3\n"						\
   1027   1.1  christos "	m	r2,%3\n"						\
   1028   1.1  christos "	m	r2,%3\n"						\
   1029   1.1  christos "	m	r2,%3\n"						\
   1030   1.1  christos "	m	r2,%3\n"						\
   1031   1.1  christos "	m	r2,%3\n"						\
   1032   1.1  christos "	m	r2,%3\n"						\
   1033   1.1  christos "	m	r2,%3\n"						\
   1034   1.1  christos "	m	r2,%3\n"						\
   1035   1.1  christos "	m	r2,%3\n"						\
   1036   1.1  christos "	m	r2,%3\n"						\
   1037   1.1  christos "	m	r2,%3\n"						\
   1038   1.1  christos "	m	r2,%3\n"						\
   1039   1.1  christos "	cas	%0,r2,r0\n"						\
   1040   1.1  christos "	mfs	r10,%1"							\
   1041   1.1  christos 	     : "=r" ((USItype) (ph)),					\
   1042   1.1  christos 	       "=r" ((USItype) (pl))					\
   1043   1.1  christos 	     : "%r" (__m0),						\
   1044   1.1  christos 		"r" (__m1)						\
   1045   1.1  christos 	     : "r2");							\
   1046   1.1  christos     (ph) += ((((SItype) __m0 >> 31) & __m1)				\
   1047   1.1  christos 	     + (((SItype) __m1 >> 31) & __m0));				\
   1048   1.1  christos   } while (0)
   1049   1.1  christos #define UMUL_TIME 20
   1050   1.1  christos #define UDIV_TIME 200
   1051   1.1  christos #define count_leading_zeros(count, x) \
   1052   1.1  christos   do {									\
   1053   1.1  christos     if ((x) >= 0x10000)							\
   1054   1.1  christos       __asm__ ("clz	%0,%1"						\
   1055   1.1  christos 	       : "=r" ((USItype) (count))				\
   1056   1.1  christos 	       : "r" ((USItype) (x) >> 16));				\
   1057   1.1  christos     else								\
   1058   1.1  christos       {									\
   1059   1.1  christos 	__asm__ ("clz	%0,%1"						\
   1060   1.1  christos 		 : "=r" ((USItype) (count))				\
   1061   1.1  christos 		 : "r" ((USItype) (x)));					\
   1062   1.1  christos 	(count) += 16;							\
   1063   1.1  christos       }									\
   1064   1.1  christos   } while (0)
   1065   1.1  christos #endif
   1066   1.1  christos 
   1067   1.8  christos #if defined(__riscv)
   1068   1.8  christos #ifdef __riscv_mul
   1069   1.8  christos #define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v))
   1070   1.8  christos #define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b))
   1071   1.8  christos #else
   1072   1.8  christos #if __riscv_xlen == 32
   1073   1.8  christos   #define MULUW3 "call __mulsi3"
   1074   1.8  christos #elif __riscv_xlen == 64
   1075   1.8  christos   #define MULUW3 "call __muldi3"
   1076   1.8  christos #else
   1077   1.8  christos #error unsupport xlen
   1078   1.8  christos #endif /* __riscv_xlen */
   1079   1.8  christos /* We rely on the fact that MULUW3 doesn't clobber the t-registers.
   1080   1.8  christos    It can get better register allocation result.  */
   1081   1.8  christos #define __muluw3(a, b) \
   1082   1.8  christos   ({ \
   1083   1.8  christos     register UWtype __op0 asm ("a0") = a; \
   1084   1.8  christos     register UWtype __op1 asm ("a1") = b; \
   1085   1.8  christos     asm volatile (MULUW3 \
   1086   1.8  christos                   : "+r" (__op0), "+r" (__op1) \
   1087   1.8  christos                   : \
   1088   1.8  christos                   : "ra", "a2", "a3"); \
   1089   1.8  christos     __op0; \
   1090   1.8  christos   })
   1091   1.8  christos #endif /* __riscv_mul */
   1092   1.8  christos #define umul_ppmm(w1, w0, u, v) \
   1093   1.8  christos   do { \
   1094   1.8  christos     UWtype __x0, __x1, __x2, __x3; \
   1095   1.8  christos     UHWtype __ul, __vl, __uh, __vh; \
   1096   1.8  christos  \
   1097   1.8  christos     __ul = __ll_lowpart (u); \
   1098   1.8  christos     __uh = __ll_highpart (u); \
   1099   1.8  christos     __vl = __ll_lowpart (v); \
   1100   1.8  christos     __vh = __ll_highpart (v); \
   1101   1.8  christos  \
   1102   1.8  christos     __x0 = __muluw3 (__ul, __vl); \
   1103   1.8  christos     __x1 = __muluw3 (__ul, __vh); \
   1104   1.8  christos     __x2 = __muluw3 (__uh, __vl); \
   1105   1.8  christos     __x3 = __muluw3 (__uh, __vh); \
   1106   1.8  christos  \
   1107   1.8  christos     __x1 += __ll_highpart (__x0);/* this can't give carry */ \
   1108   1.8  christos     __x1 += __x2; /* but this indeed can */ \
   1109   1.8  christos     if (__x1 < __x2) /* did we get it? */ \
   1110   1.8  christos       __x3 += __ll_B; /* yes, add it in the proper pos.  */ \
   1111   1.8  christos  \
   1112   1.8  christos     (w1) = __x3 + __ll_highpart (__x1); \
   1113   1.8  christos     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
   1114   1.8  christos   } while (0)
   1115   1.8  christos #endif /* __riscv */
   1116   1.8  christos 
   1117   1.8  christos #if defined(__sh__) && W_TYPE_SIZE == 32
   1118   1.1  christos #ifndef __sh1__
   1119   1.1  christos #define umul_ppmm(w1, w0, u, v) \
   1120   1.1  christos   __asm__ (								\
   1121   1.1  christos        "dmulu.l	%2,%3\n\tsts%M1	macl,%1\n\tsts%M0	mach,%0"	\
   1122   1.1  christos 	   : "=r<" ((USItype)(w1)),					\
   1123   1.1  christos 	     "=r<" ((USItype)(w0))					\
   1124   1.1  christos 	   : "r" ((USItype)(u)),					\
   1125   1.1  christos 	     "r" ((USItype)(v))						\
   1126   1.1  christos 	   : "macl", "mach")
   1127   1.1  christos #define UMUL_TIME 5
   1128   1.1  christos #endif
   1129   1.1  christos 
   1130   1.1  christos /* This is the same algorithm as __udiv_qrnnd_c.  */
   1131   1.1  christos #define UDIV_NEEDS_NORMALIZATION 1
   1132   1.1  christos 
   1133   1.6  christos #ifdef __FDPIC__
   1134   1.6  christos /* FDPIC needs a special version of the asm fragment to extract the
   1135   1.6  christos    code address from the function descriptor. __udiv_qrnnd_16 is
   1136   1.6  christos    assumed to be local and not to use the GOT, so loading r12 is
   1137   1.6  christos    not needed. */
   1138   1.6  christos #define udiv_qrnnd(q, r, n1, n0, d) \
   1139   1.6  christos   do {									\
   1140   1.6  christos     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
   1141   1.6  christos 			__attribute__ ((visibility ("hidden")));	\
   1142   1.6  christos     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
   1143   1.6  christos     __asm__ (								\
   1144   1.6  christos 	"mov%M4	%4,r5\n"						\
   1145   1.6  christos "	swap.w	%3,r4\n"						\
   1146   1.6  christos "	swap.w	r5,r6\n"						\
   1147   1.6  christos "	mov.l	@%5,r2\n"						\
   1148   1.6  christos "	jsr	@r2\n"							\
   1149   1.6  christos "	shll16	r6\n"							\
   1150   1.6  christos "	swap.w	r4,r4\n"						\
   1151   1.6  christos "	mov.l	@%5,r2\n"						\
   1152   1.6  christos "	jsr	@r2\n"							\
   1153   1.6  christos "	swap.w	r1,%0\n"						\
   1154   1.6  christos "	or	r1,%0"							\
   1155   1.6  christos 	: "=r" (q), "=&z" (r)						\
   1156   1.6  christos 	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
   1157   1.6  christos 	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
   1158   1.6  christos   } while (0)
   1159   1.6  christos #else
   1160   1.1  christos #define udiv_qrnnd(q, r, n1, n0, d) \
   1161   1.1  christos   do {									\
   1162   1.1  christos     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
   1163   1.1  christos 			__attribute__ ((visibility ("hidden")));	\
   1164   1.1  christos     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
   1165   1.1  christos     __asm__ (								\
   1166   1.1  christos 	"mov%M4 %4,r5\n"						\
   1167   1.1  christos "	swap.w %3,r4\n"							\
   1168   1.1  christos "	swap.w r5,r6\n"							\
   1169   1.1  christos "	jsr @%5\n"							\
   1170   1.1  christos "	shll16 r6\n"							\
   1171   1.1  christos "	swap.w r4,r4\n"							\
   1172   1.1  christos "	jsr @%5\n"							\
   1173   1.1  christos "	swap.w r1,%0\n"							\
   1174   1.1  christos "	or r1,%0"							\
   1175   1.1  christos 	: "=r" (q), "=&z" (r)						\
   1176   1.1  christos 	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
   1177   1.1  christos 	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
   1178   1.1  christos   } while (0)
   1179   1.6  christos #endif /* __FDPIC__  */
   1180   1.1  christos 
   1181   1.1  christos #define UDIV_TIME 80
   1182   1.1  christos 
   1183   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
   1184   1.1  christos   __asm__ ("clrt;subc %5,%1; subc %4,%0"				\
   1185   1.1  christos 	   : "=r" (sh), "=r" (sl)					\
   1186   1.1  christos 	   : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
   1187   1.1  christos 
   1188   1.1  christos #endif /* __sh__ */
   1189   1.1  christos 
   1190   1.1  christos #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
   1191   1.1  christos     && W_TYPE_SIZE == 32
   1192   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   1193   1.1  christos   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"				\
   1194   1.1  christos 	   : "=r" ((USItype) (sh)),					\
   1195   1.1  christos 	     "=&r" ((USItype) (sl))					\
   1196   1.1  christos 	   : "%rJ" ((USItype) (ah)),					\
   1197   1.1  christos 	     "rI" ((USItype) (bh)),					\
   1198   1.1  christos 	     "%rJ" ((USItype) (al)),					\
   1199   1.1  christos 	     "rI" ((USItype) (bl))					\
   1200   1.1  christos 	   __CLOBBER_CC)
   1201   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   1202   1.1  christos   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"				\
   1203   1.1  christos 	   : "=r" ((USItype) (sh)),					\
   1204   1.1  christos 	     "=&r" ((USItype) (sl))					\
   1205   1.1  christos 	   : "rJ" ((USItype) (ah)),					\
   1206   1.1  christos 	     "rI" ((USItype) (bh)),					\
   1207   1.1  christos 	     "rJ" ((USItype) (al)),					\
   1208   1.1  christos 	     "rI" ((USItype) (bl))					\
   1209   1.1  christos 	   __CLOBBER_CC)
   1210   1.1  christos #if defined (__sparc_v9__)
   1211   1.1  christos #define umul_ppmm(w1, w0, u, v) \
   1212   1.1  christos   do {									\
   1213   1.1  christos     register USItype __g1 asm ("g1");					\
   1214   1.1  christos     __asm__ ("umul\t%2,%3,%1\n\t"					\
   1215   1.1  christos 	     "srlx\t%1, 32, %0"						\
   1216   1.1  christos 	     : "=r" ((USItype) (w1)),					\
   1217   1.1  christos 	       "=r" (__g1)						\
   1218   1.1  christos 	     : "r" ((USItype) (u)),					\
   1219   1.1  christos 	       "r" ((USItype) (v)));					\
   1220   1.1  christos     (w0) = __g1;							\
   1221   1.1  christos   } while (0)
   1222   1.1  christos #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
   1223   1.1  christos   __asm__ ("mov\t%2,%%y\n\t"						\
   1224   1.1  christos 	   "udiv\t%3,%4,%0\n\t"						\
   1225   1.1  christos 	   "umul\t%0,%4,%1\n\t"						\
   1226   1.1  christos 	   "sub\t%3,%1,%1"						\
   1227   1.1  christos 	   : "=&r" ((USItype) (__q)),					\
   1228   1.1  christos 	     "=&r" ((USItype) (__r))					\
   1229   1.1  christos 	   : "r" ((USItype) (__n1)),					\
   1230   1.1  christos 	     "r" ((USItype) (__n0)),					\
   1231   1.1  christos 	     "r" ((USItype) (__d)))
   1232   1.1  christos #else
   1233   1.1  christos #if defined (__sparc_v8__)
   1234   1.1  christos #define umul_ppmm(w1, w0, u, v) \
   1235   1.1  christos   __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
   1236   1.1  christos 	   : "=r" ((USItype) (w1)),					\
   1237   1.1  christos 	     "=r" ((USItype) (w0))					\
   1238   1.1  christos 	   : "r" ((USItype) (u)),					\
   1239   1.1  christos 	     "r" ((USItype) (v)))
   1240   1.1  christos #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
   1241   1.1  christos   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
   1242   1.1  christos 	   : "=&r" ((USItype) (__q)),					\
   1243   1.1  christos 	     "=&r" ((USItype) (__r))					\
   1244   1.1  christos 	   : "r" ((USItype) (__n1)),					\
   1245   1.1  christos 	     "r" ((USItype) (__n0)),					\
   1246   1.1  christos 	     "r" ((USItype) (__d)))
   1247   1.1  christos #else
   1248   1.1  christos #if defined (__sparclite__)
   1249   1.1  christos /* This has hardware multiply but not divide.  It also has two additional
   1250   1.1  christos    instructions scan (ffs from high bit) and divscc.  */
   1251   1.1  christos #define umul_ppmm(w1, w0, u, v) \
   1252   1.1  christos   __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
   1253   1.1  christos 	   : "=r" ((USItype) (w1)),					\
   1254   1.1  christos 	     "=r" ((USItype) (w0))					\
   1255   1.1  christos 	   : "r" ((USItype) (u)),					\
   1256   1.1  christos 	     "r" ((USItype) (v)))
   1257   1.1  christos #define udiv_qrnnd(q, r, n1, n0, d) \
   1258   1.1  christos   __asm__ ("! Inlined udiv_qrnnd\n"					\
   1259   1.1  christos "	wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"	\
   1260   1.1  christos "	tst	%%g0\n"							\
   1261   1.1  christos "	divscc	%3,%4,%%g1\n"						\
   1262   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1263   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1264   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1265   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1266   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1267   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1268   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1269   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1270   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1271   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1272   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1273   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1274   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1275   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1276   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1277   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1278   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1279   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1280   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1281   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1282   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1283   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1284   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1285   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1286   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1287   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1288   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1289   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1290   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1291   1.1  christos "	divscc	%%g1,%4,%%g1\n"						\
   1292   1.1  christos "	divscc	%%g1,%4,%0\n"						\
   1293   1.1  christos "	rd	%%y,%1\n"						\
   1294   1.1  christos "	bl,a 1f\n"							\
   1295   1.1  christos "	add	%1,%4,%1\n"						\
   1296   1.1  christos "1:	! End of inline udiv_qrnnd"					\
   1297   1.1  christos 	   : "=r" ((USItype) (q)),					\
   1298   1.1  christos 	     "=r" ((USItype) (r))					\
   1299   1.1  christos 	   : "r" ((USItype) (n1)),					\
   1300   1.1  christos 	     "r" ((USItype) (n0)),					\
   1301   1.1  christos 	     "rI" ((USItype) (d))					\
   1302   1.1  christos 	   : "g1" __AND_CLOBBER_CC)
   1303   1.1  christos #define UDIV_TIME 37
   1304   1.1  christos #define count_leading_zeros(count, x) \
   1305   1.1  christos   do {                                                                  \
   1306   1.1  christos   __asm__ ("scan %1,1,%0"                                               \
   1307   1.1  christos 	   : "=r" ((USItype) (count))                                   \
   1308   1.1  christos 	   : "r" ((USItype) (x)));					\
   1309   1.1  christos   } while (0)
   1310   1.1  christos /* Early sparclites return 63 for an argument of 0, but they warn that future
   1311   1.1  christos    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
   1312   1.1  christos    undefined.  */
   1313   1.1  christos #else
   1314   1.1  christos /* SPARC without integer multiplication and divide instructions.
   1315   1.1  christos    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
   1316   1.1  christos #define umul_ppmm(w1, w0, u, v) \
   1317   1.1  christos   __asm__ ("! Inlined umul_ppmm\n"					\
   1318   1.1  christos "	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n"\
   1319   1.1  christos "	sra	%3,31,%%o5	! Don't move this insn\n"		\
   1320   1.1  christos "	and	%2,%%o5,%%o5	! Don't move this insn\n"		\
   1321   1.1  christos "	andcc	%%g0,0,%%g1	! Don't move this insn\n"		\
   1322   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1323   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1324   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1325   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1326   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1327   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1328   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1329   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1330   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1331   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1332   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1333   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1334   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1335   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1336   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1337   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1338   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1339   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1340   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1341   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1342   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1343   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1344   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1345   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1346   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1347   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1348   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1349   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1350   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1351   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1352   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1353   1.1  christos "	mulscc	%%g1,%3,%%g1\n"						\
   1354   1.1  christos "	mulscc	%%g1,0,%%g1\n"						\
   1355   1.1  christos "	add	%%g1,%%o5,%0\n"						\
   1356   1.1  christos "	rd	%%y,%1"							\
   1357   1.1  christos 	   : "=r" ((USItype) (w1)),					\
   1358   1.1  christos 	     "=r" ((USItype) (w0))					\
   1359   1.1  christos 	   : "%rI" ((USItype) (u)),					\
   1360   1.1  christos 	     "r" ((USItype) (v))						\
   1361   1.1  christos 	   : "g1", "o5" __AND_CLOBBER_CC)
   1362   1.1  christos #define UMUL_TIME 39		/* 39 instructions */
   1363   1.1  christos /* It's quite necessary to add this much assembler for the sparc.
   1364   1.1  christos    The default udiv_qrnnd (in C) is more than 10 times slower!  */
   1365   1.1  christos #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
   1366   1.1  christos   __asm__ ("! Inlined udiv_qrnnd\n"					\
   1367   1.1  christos "	mov	32,%%g1\n"						\
   1368   1.1  christos "	subcc	%1,%2,%%g0\n"						\
   1369   1.1  christos "1:	bcs	5f\n"							\
   1370   1.1  christos "	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
   1371   1.1  christos "	sub	%1,%2,%1	! this kills msb of n\n"		\
   1372   1.1  christos "	addx	%1,%1,%1	! so this can't give carry\n"		\
   1373   1.1  christos "	subcc	%%g1,1,%%g1\n"						\
   1374   1.1  christos "2:	bne	1b\n"							\
   1375   1.1  christos "	 subcc	%1,%2,%%g0\n"						\
   1376   1.1  christos "	bcs	3f\n"							\
   1377   1.1  christos "	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
   1378   1.1  christos "	b	3f\n"							\
   1379   1.1  christos "	 sub	%1,%2,%1	! this kills msb of n\n"		\
   1380   1.1  christos "4:	sub	%1,%2,%1\n"						\
   1381   1.1  christos "5:	addxcc	%1,%1,%1\n"						\
   1382   1.1  christos "	bcc	2b\n"							\
   1383   1.1  christos "	 subcc	%%g1,1,%%g1\n"						\
   1384   1.1  christos "! Got carry from n.  Subtract next step to cancel this carry.\n"	\
   1385   1.1  christos "	bne	4b\n"							\
   1386   1.1  christos "	 addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n"	\
   1387   1.1  christos "	sub	%1,%2,%1\n"						\
   1388   1.1  christos "3:	xnor	%0,0,%0\n"						\
   1389   1.1  christos "	! End of inline udiv_qrnnd"					\
   1390   1.1  christos 	   : "=&r" ((USItype) (__q)),					\
   1391   1.1  christos 	     "=&r" ((USItype) (__r))					\
   1392   1.1  christos 	   : "r" ((USItype) (__d)),					\
   1393   1.1  christos 	     "1" ((USItype) (__n1)),					\
   1394   1.1  christos 	     "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
   1395   1.1  christos #define UDIV_TIME (3+7*32)	/* 7 instructions/iteration. 32 iterations.  */
   1396   1.1  christos #endif /* __sparclite__ */
   1397   1.1  christos #endif /* __sparc_v8__ */
   1398   1.1  christos #endif /* __sparc_v9__ */
   1399   1.1  christos #endif /* sparc32 */
   1400   1.1  christos 
   1401   1.1  christos #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
   1402   1.1  christos     && W_TYPE_SIZE == 64
   1403   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
   1404   1.1  christos   do {									\
   1405   1.1  christos     UDItype __carry = 0;						\
   1406   1.1  christos     __asm__ ("addcc\t%r5,%6,%1\n\t"					\
   1407   1.1  christos 	     "add\t%r3,%4,%0\n\t"					\
   1408   1.1  christos 	     "movcs\t%%xcc, 1, %2\n\t"					\
   1409   1.1  christos 	     "add\t%0, %2, %0"						\
   1410   1.1  christos 	     : "=r" ((UDItype)(sh)),				      	\
   1411   1.1  christos 	       "=&r" ((UDItype)(sl)),				      	\
   1412   1.1  christos 	       "+r" (__carry)				      		\
   1413   1.1  christos 	     : "%rJ" ((UDItype)(ah)),				     	\
   1414   1.1  christos 	       "rI" ((UDItype)(bh)),				      	\
   1415   1.1  christos 	       "%rJ" ((UDItype)(al)),				     	\
   1416   1.1  christos 	       "rI" ((UDItype)(bl))				       	\
   1417   1.1  christos 	     __CLOBBER_CC);						\
   1418   1.1  christos   } while (0)
   1419   1.1  christos 
   1420   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
   1421   1.1  christos   do {									\
   1422   1.1  christos     UDItype __carry = 0;						\
   1423   1.1  christos     __asm__ ("subcc\t%r5,%6,%1\n\t"					\
   1424   1.1  christos 	     "sub\t%r3,%4,%0\n\t"					\
   1425   1.1  christos 	     "movcs\t%%xcc, 1, %2\n\t"					\
   1426   1.1  christos 	     "sub\t%0, %2, %0"						\
   1427   1.1  christos 	     : "=r" ((UDItype)(sh)),				      	\
   1428   1.1  christos 	       "=&r" ((UDItype)(sl)),				      	\
   1429   1.1  christos 	       "+r" (__carry)				      		\
   1430   1.1  christos 	     : "%rJ" ((UDItype)(ah)),				     	\
   1431   1.1  christos 	       "rI" ((UDItype)(bh)),				      	\
   1432   1.1  christos 	       "%rJ" ((UDItype)(al)),				     	\
   1433   1.1  christos 	       "rI" ((UDItype)(bl))				       	\
   1434   1.1  christos 	     __CLOBBER_CC);						\
   1435   1.1  christos   } while (0)
   1436   1.1  christos 
   1437   1.1  christos #define umul_ppmm(wh, wl, u, v)						\
   1438   1.1  christos   do {									\
   1439   1.1  christos 	  UDItype tmp1, tmp2, tmp3, tmp4;				\
   1440   1.1  christos 	  __asm__ __volatile__ (					\
   1441   1.1  christos 		   "srl %7,0,%3\n\t"					\
   1442   1.1  christos 		   "mulx %3,%6,%1\n\t"					\
   1443   1.1  christos 		   "srlx %6,32,%2\n\t"					\
   1444   1.1  christos 		   "mulx %2,%3,%4\n\t"					\
   1445   1.1  christos 		   "sllx %4,32,%5\n\t"					\
   1446   1.1  christos 		   "srl %6,0,%3\n\t"					\
   1447   1.1  christos 		   "sub %1,%5,%5\n\t"					\
   1448   1.1  christos 		   "srlx %5,32,%5\n\t"					\
   1449   1.1  christos 		   "addcc %4,%5,%4\n\t"					\
   1450   1.1  christos 		   "srlx %7,32,%5\n\t"					\
   1451   1.1  christos 		   "mulx %3,%5,%3\n\t"					\
   1452   1.1  christos 		   "mulx %2,%5,%5\n\t"					\
   1453   1.1  christos 		   "sethi %%hi(0x80000000),%2\n\t"			\
   1454   1.1  christos 		   "addcc %4,%3,%4\n\t"					\
   1455   1.1  christos 		   "srlx %4,32,%4\n\t"					\
   1456   1.1  christos 		   "add %2,%2,%2\n\t"					\
   1457   1.1  christos 		   "movcc %%xcc,%%g0,%2\n\t"				\
   1458   1.1  christos 		   "addcc %5,%4,%5\n\t"					\
   1459   1.1  christos 		   "sllx %3,32,%3\n\t"					\
   1460   1.1  christos 		   "add %1,%3,%1\n\t"					\
   1461   1.1  christos 		   "add %5,%2,%0"					\
   1462   1.1  christos 	   : "=r" ((UDItype)(wh)),					\
   1463   1.1  christos 	     "=&r" ((UDItype)(wl)),					\
   1464   1.1  christos 	     "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)	\
   1465   1.1  christos 	   : "r" ((UDItype)(u)),					\
   1466   1.1  christos 	     "r" ((UDItype)(v))						\
   1467   1.1  christos 	   __CLOBBER_CC);						\
   1468   1.1  christos   } while (0)
   1469   1.1  christos #define UMUL_TIME 96
   1470   1.1  christos #define UDIV_TIME 230
   1471   1.1  christos #endif /* sparc64 */
   1472   1.1  christos 
   1473   1.1  christos #if defined (__vax__) && W_TYPE_SIZE == 32
   1474   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   1475   1.1  christos   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"					\
   1476   1.1  christos 	   : "=g" ((USItype) (sh)),					\
   1477   1.1  christos 	     "=&g" ((USItype) (sl))					\
   1478   1.1  christos 	   : "%0" ((USItype) (ah)),					\
   1479   1.1  christos 	     "g" ((USItype) (bh)),					\
   1480   1.1  christos 	     "%1" ((USItype) (al)),					\
   1481   1.1  christos 	     "g" ((USItype) (bl)))
   1482   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   1483   1.1  christos   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"					\
   1484   1.1  christos 	   : "=g" ((USItype) (sh)),					\
   1485   1.1  christos 	     "=&g" ((USItype) (sl))					\
   1486   1.1  christos 	   : "0" ((USItype) (ah)),					\
   1487   1.1  christos 	     "g" ((USItype) (bh)),					\
   1488   1.1  christos 	     "1" ((USItype) (al)),					\
   1489   1.1  christos 	     "g" ((USItype) (bl)))
   1490   1.1  christos #define umul_ppmm(xh, xl, m0, m1) \
   1491   1.1  christos   do {									\
   1492   1.1  christos     union {								\
   1493   1.1  christos 	UDItype __ll;							\
   1494   1.1  christos 	struct {USItype __l, __h;} __i;					\
   1495   1.1  christos       } __xx;								\
   1496   1.1  christos     USItype __m0 = (m0), __m1 = (m1);					\
   1497   1.1  christos     __asm__ ("emul %1,%2,$0,%0"						\
   1498   1.1  christos 	     : "=r" (__xx.__ll)						\
   1499   1.1  christos 	     : "g" (__m0),						\
   1500   1.1  christos 	       "g" (__m1));						\
   1501   1.1  christos     (xh) = __xx.__i.__h;						\
   1502   1.1  christos     (xl) = __xx.__i.__l;						\
   1503   1.1  christos     (xh) += ((((SItype) __m0 >> 31) & __m1)				\
   1504   1.1  christos 	     + (((SItype) __m1 >> 31) & __m0));				\
   1505   1.1  christos   } while (0)
   1506   1.1  christos #define sdiv_qrnnd(q, r, n1, n0, d) \
   1507   1.1  christos   do {									\
   1508   1.1  christos     union {DItype __ll;							\
   1509   1.1  christos 	   struct {SItype __l, __h;} __i;				\
   1510   1.1  christos 	  } __xx;							\
   1511   1.1  christos     __xx.__i.__h = n1; __xx.__i.__l = n0;				\
   1512   1.1  christos     __asm__ ("ediv %3,%2,%0,%1"						\
   1513   1.1  christos 	     : "=g" (q), "=g" (r)					\
   1514   1.1  christos 	     : "g" (__xx.__ll), "g" (d));				\
   1515   1.1  christos   } while (0)
   1516   1.1  christos #endif /* __vax__ */
   1517   1.1  christos 
   1518   1.1  christos #ifdef _TMS320C6X
   1519   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   1520   1.1  christos   do									\
   1521   1.1  christos     {									\
   1522   1.1  christos       UDItype __ll;							\
   1523   1.1  christos       __asm__ ("addu .l1 %1, %2, %0"					\
   1524   1.1  christos 	       : "=a" (__ll) : "a" (al), "a" (bl));			\
   1525   1.1  christos       (sl) = (USItype)__ll;						\
   1526   1.1  christos       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);			\
   1527   1.1  christos     }									\
   1528   1.1  christos   while (0)
   1529   1.1  christos 
   1530   1.1  christos #ifdef _TMS320C6400_PLUS
   1531   1.1  christos #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
   1532   1.1  christos #define umul_ppmm(w1, w0, u, v)						\
   1533   1.1  christos   do {									\
   1534   1.1  christos     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);		\
   1535   1.1  christos     (w1) = (USItype) (__x >> 32);					\
   1536   1.1  christos     (w0) = (USItype) (__x);						\
   1537   1.1  christos   } while (0)
   1538   1.1  christos #endif  /* _TMS320C6400_PLUS */
   1539   1.1  christos 
   1540   1.1  christos #define count_leading_zeros(count, x)	((count) = __builtin_clz (x))
   1541   1.1  christos #ifdef _TMS320C6400
   1542   1.1  christos #define count_trailing_zeros(count, x)	((count) = __builtin_ctz (x))
   1543   1.1  christos #endif
   1544   1.1  christos #define UMUL_TIME 4
   1545   1.1  christos #define UDIV_TIME 40
   1546   1.1  christos #endif /* _TMS320C6X */
   1547   1.1  christos 
   1548   1.1  christos #if defined (__xtensa__) && W_TYPE_SIZE == 32
   1549   1.1  christos /* This code is not Xtensa-configuration-specific, so rely on the compiler
   1550   1.1  christos    to expand builtin functions depending on what configuration features
   1551   1.1  christos    are available.  This avoids library calls when the operation can be
   1552   1.1  christos    performed in-line.  */
   1553   1.1  christos #define umul_ppmm(w1, w0, u, v)						\
   1554   1.1  christos   do {									\
   1555   1.1  christos     DWunion __w;							\
   1556   1.1  christos     __w.ll = __builtin_umulsidi3 (u, v);				\
   1557   1.1  christos     w1 = __w.s.high;							\
   1558   1.1  christos     w0 = __w.s.low;							\
   1559   1.1  christos   } while (0)
   1560   1.1  christos #define __umulsidi3(u, v)		__builtin_umulsidi3 (u, v)
   1561   1.1  christos #define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clz (X))
   1562   1.1  christos #define count_trailing_zeros(COUNT, X)	((COUNT) = __builtin_ctz (X))
   1563   1.1  christos #endif /* __xtensa__ */
   1564   1.1  christos 
   1565   1.1  christos #if defined xstormy16
   1566   1.1  christos extern UHItype __stormy16_count_leading_zeros (UHItype);
   1567   1.1  christos #define count_leading_zeros(count, x)					\
   1568   1.1  christos   do									\
   1569   1.1  christos     {									\
   1570   1.1  christos       UHItype size;							\
   1571   1.1  christos 									\
   1572   1.1  christos       /* We assume that W_TYPE_SIZE is a multiple of 16...  */		\
   1573   1.1  christos       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)		\
   1574   1.1  christos 	{								\
   1575   1.1  christos 	  UHItype c;							\
   1576   1.1  christos 									\
   1577   1.1  christos 	  c = __clzhi2 ((x) >> (size - 16));				\
   1578   1.1  christos 	  (count) += c;							\
   1579   1.1  christos 	  if (c != 16)							\
   1580   1.1  christos 	    break;							\
   1581   1.1  christos 	}								\
   1582   1.1  christos     }									\
   1583   1.1  christos   while (0)
   1584   1.1  christos #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
   1585   1.1  christos #endif
   1586   1.1  christos 
   1587   1.1  christos #if defined (__z8000__) && W_TYPE_SIZE == 16
   1588   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   1589   1.1  christos   __asm__ ("add	%H1,%H5\n\tadc	%H0,%H3"				\
   1590   1.1  christos 	   : "=r" ((unsigned int)(sh)),					\
   1591   1.1  christos 	     "=&r" ((unsigned int)(sl))					\
   1592   1.1  christos 	   : "%0" ((unsigned int)(ah)),					\
   1593   1.1  christos 	     "r" ((unsigned int)(bh)),					\
   1594   1.1  christos 	     "%1" ((unsigned int)(al)),					\
   1595   1.1  christos 	     "rQR" ((unsigned int)(bl)))
   1596   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   1597   1.1  christos   __asm__ ("sub	%H1,%H5\n\tsbc	%H0,%H3"				\
   1598   1.1  christos 	   : "=r" ((unsigned int)(sh)),					\
   1599   1.1  christos 	     "=&r" ((unsigned int)(sl))					\
   1600   1.1  christos 	   : "0" ((unsigned int)(ah)),					\
   1601   1.1  christos 	     "r" ((unsigned int)(bh)),					\
   1602   1.1  christos 	     "1" ((unsigned int)(al)),					\
   1603   1.1  christos 	     "rQR" ((unsigned int)(bl)))
   1604   1.1  christos #define umul_ppmm(xh, xl, m0, m1) \
   1605   1.1  christos   do {									\
   1606   1.1  christos     union {long int __ll;						\
   1607   1.1  christos 	   struct {unsigned int __h, __l;} __i;				\
   1608   1.1  christos 	  } __xx;							\
   1609   1.1  christos     unsigned int __m0 = (m0), __m1 = (m1);				\
   1610   1.1  christos     __asm__ ("mult	%S0,%H3"					\
   1611   1.1  christos 	     : "=r" (__xx.__i.__h),					\
   1612   1.1  christos 	       "=r" (__xx.__i.__l)					\
   1613   1.1  christos 	     : "%1" (__m0),						\
   1614   1.1  christos 	       "rQR" (__m1));						\
   1615   1.1  christos     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
   1616   1.1  christos     (xh) += ((((signed int) __m0 >> 15) & __m1)				\
   1617   1.1  christos 	     + (((signed int) __m1 >> 15) & __m0));			\
   1618   1.1  christos   } while (0)
   1619   1.1  christos #endif /* __z8000__ */
   1620   1.1  christos 
   1621   1.1  christos #endif /* __GNUC__ */
   1622   1.1  christos 
   1623   1.1  christos /* If this machine has no inline assembler, use C macros.  */
   1624   1.1  christos 
   1625   1.1  christos #if !defined (add_ssaaaa)
   1626   1.1  christos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   1627   1.1  christos   do {									\
   1628   1.1  christos     UWtype __x;								\
   1629   1.1  christos     __x = (al) + (bl);							\
   1630   1.1  christos     (sh) = (ah) + (bh) + (__x < (al));					\
   1631   1.1  christos     (sl) = __x;								\
   1632   1.1  christos   } while (0)
   1633   1.1  christos #endif
   1634   1.1  christos 
   1635   1.1  christos #if !defined (sub_ddmmss)
   1636   1.1  christos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   1637   1.1  christos   do {									\
   1638   1.1  christos     UWtype __x;								\
   1639   1.1  christos     __x = (al) - (bl);							\
   1640   1.1  christos     (sh) = (ah) - (bh) - (__x > (al));					\
   1641   1.1  christos     (sl) = __x;								\
   1642   1.1  christos   } while (0)
   1643   1.1  christos #endif
   1644   1.1  christos 
   1645   1.1  christos /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
   1646   1.1  christos    smul_ppmm.  */
   1647   1.1  christos #if !defined (umul_ppmm) && defined (smul_ppmm)
   1648   1.1  christos #define umul_ppmm(w1, w0, u, v)						\
   1649   1.1  christos   do {									\
   1650   1.1  christos     UWtype __w1;							\
   1651   1.1  christos     UWtype __xm0 = (u), __xm1 = (v);					\
   1652   1.1  christos     smul_ppmm (__w1, w0, __xm0, __xm1);					\
   1653   1.1  christos     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)		\
   1654   1.1  christos 		+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);		\
   1655   1.1  christos   } while (0)
   1656   1.1  christos #endif
   1657   1.1  christos 
   1658   1.1  christos /* If we still don't have umul_ppmm, define it using plain C.  */
   1659   1.1  christos #if !defined (umul_ppmm)
   1660   1.1  christos #define umul_ppmm(w1, w0, u, v)						\
   1661   1.1  christos   do {									\
   1662   1.1  christos     UWtype __x0, __x1, __x2, __x3;					\
   1663   1.1  christos     UHWtype __ul, __vl, __uh, __vh;					\
   1664   1.1  christos 									\
   1665   1.1  christos     __ul = __ll_lowpart (u);						\
   1666   1.1  christos     __uh = __ll_highpart (u);						\
   1667   1.1  christos     __vl = __ll_lowpart (v);						\
   1668   1.1  christos     __vh = __ll_highpart (v);						\
   1669   1.1  christos 									\
   1670   1.1  christos     __x0 = (UWtype) __ul * __vl;					\
   1671   1.1  christos     __x1 = (UWtype) __ul * __vh;					\
   1672   1.1  christos     __x2 = (UWtype) __uh * __vl;					\
   1673   1.1  christos     __x3 = (UWtype) __uh * __vh;					\
   1674   1.1  christos 									\
   1675   1.1  christos     __x1 += __ll_highpart (__x0);/* this can't give carry */		\
   1676   1.1  christos     __x1 += __x2;		/* but this indeed can */		\
   1677   1.1  christos     if (__x1 < __x2)		/* did we get it? */			\
   1678   1.1  christos       __x3 += __ll_B;		/* yes, add it in the proper pos.  */	\
   1679   1.1  christos 									\
   1680   1.1  christos     (w1) = __x3 + __ll_highpart (__x1);					\
   1681   1.1  christos     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);		\
   1682   1.1  christos   } while (0)
   1683   1.1  christos #endif
   1684   1.1  christos 
   1685   1.1  christos #if !defined (__umulsidi3)
   1686   1.1  christos #define __umulsidi3(u, v) \
   1687   1.1  christos   ({DWunion __w;							\
   1688   1.1  christos     umul_ppmm (__w.s.high, __w.s.low, u, v);				\
   1689   1.1  christos     __w.ll; })
   1690   1.1  christos #endif
   1691   1.1  christos 
   1692   1.1  christos /* Define this unconditionally, so it can be used for debugging.  */
   1693   1.1  christos #define __udiv_qrnnd_c(q, r, n1, n0, d) \
   1694   1.1  christos   do {									\
   1695   1.1  christos     UWtype __d1, __d0, __q1, __q0;					\
   1696   1.1  christos     UWtype __r1, __r0, __m;						\
   1697   1.1  christos     __d1 = __ll_highpart (d);						\
   1698   1.1  christos     __d0 = __ll_lowpart (d);						\
   1699   1.1  christos 									\
   1700   1.1  christos     __r1 = (n1) % __d1;							\
   1701   1.1  christos     __q1 = (n1) / __d1;							\
   1702   1.1  christos     __m = (UWtype) __q1 * __d0;						\
   1703   1.1  christos     __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
   1704   1.1  christos     if (__r1 < __m)							\
   1705   1.1  christos       {									\
   1706   1.1  christos 	__q1--, __r1 += (d);						\
   1707   1.1  christos 	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
   1708   1.1  christos 	  if (__r1 < __m)						\
   1709   1.1  christos 	    __q1--, __r1 += (d);					\
   1710   1.1  christos       }									\
   1711   1.1  christos     __r1 -= __m;							\
   1712   1.1  christos 									\
   1713   1.1  christos     __r0 = __r1 % __d1;							\
   1714   1.1  christos     __q0 = __r1 / __d1;							\
   1715   1.1  christos     __m = (UWtype) __q0 * __d0;						\
   1716   1.1  christos     __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
   1717   1.1  christos     if (__r0 < __m)							\
   1718   1.1  christos       {									\
   1719   1.1  christos 	__q0--, __r0 += (d);						\
   1720   1.1  christos 	if (__r0 >= (d))						\
   1721   1.1  christos 	  if (__r0 < __m)						\
   1722   1.1  christos 	    __q0--, __r0 += (d);					\
   1723   1.1  christos       }									\
   1724   1.1  christos     __r0 -= __m;							\
   1725   1.1  christos 									\
   1726   1.1  christos     (q) = (UWtype) __q1 * __ll_B | __q0;				\
   1727   1.1  christos     (r) = __r0;								\
   1728   1.1  christos   } while (0)
   1729   1.1  christos 
   1730   1.1  christos /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
   1731   1.1  christos    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
   1732   1.1  christos #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
   1733   1.1  christos #define udiv_qrnnd(q, r, nh, nl, d) \
   1734   1.1  christos   do {									\
   1735   1.3  christos     extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype);	\
   1736   1.3  christos     UWtype __r;								\
   1737   1.1  christos     (q) = __udiv_w_sdiv (&__r, nh, nl, d);				\
   1738   1.1  christos     (r) = __r;								\
   1739   1.1  christos   } while (0)
   1740   1.1  christos #endif
   1741   1.1  christos 
   1742   1.1  christos /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
   1743   1.1  christos #if !defined (udiv_qrnnd)
   1744   1.1  christos #define UDIV_NEEDS_NORMALIZATION 1
   1745   1.1  christos #define udiv_qrnnd __udiv_qrnnd_c
   1746   1.1  christos #endif
   1747   1.1  christos 
   1748   1.1  christos #if !defined (count_leading_zeros)
   1749   1.1  christos #define count_leading_zeros(count, x) \
   1750   1.1  christos   do {									\
   1751   1.1  christos     UWtype __xr = (x);							\
   1752   1.1  christos     UWtype __a;								\
   1753   1.1  christos 									\
   1754   1.1  christos     if (W_TYPE_SIZE <= 32)						\
   1755   1.1  christos       {									\
   1756   1.1  christos 	__a = __xr < ((UWtype)1<<2*__BITS4)				\
   1757   1.1  christos 	  ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)			\
   1758   1.1  christos 	  : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);	\
   1759   1.1  christos       }									\
   1760   1.1  christos     else								\
   1761   1.1  christos       {									\
   1762   1.1  christos 	for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
   1763   1.1  christos 	  if (((__xr >> __a) & 0xff) != 0)				\
   1764   1.1  christos 	    break;							\
   1765   1.1  christos       }									\
   1766   1.1  christos 									\
   1767   1.1  christos     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);		\
   1768   1.1  christos   } while (0)
   1769   1.1  christos #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
   1770   1.1  christos #endif
   1771   1.1  christos 
   1772   1.1  christos #if !defined (count_trailing_zeros)
   1773   1.1  christos /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
   1774   1.1  christos    defined in asm, but if it is not, the C version above is good enough.  */
   1775   1.1  christos #define count_trailing_zeros(count, x) \
   1776   1.1  christos   do {									\
   1777   1.1  christos     UWtype __ctz_x = (x);						\
   1778   1.1  christos     UWtype __ctz_c;							\
   1779   1.1  christos     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
   1780   1.1  christos     (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
   1781   1.1  christos   } while (0)
   1782   1.1  christos #endif
   1783   1.1  christos 
   1784   1.1  christos #ifndef UDIV_NEEDS_NORMALIZATION
   1785   1.1  christos #define UDIV_NEEDS_NORMALIZATION 0
   1786   1.1  christos #endif
   1787