config/cris/arit.c

    1.1  mrg /* Signed and unsigned multiplication and division and modulus for CRIS.
    1.1  mrg    Contributed by Axis Communications.
    1.1  mrg    Written by Hans-Peter Nilsson <hp (at) axis.se>, c:a 1992.
    1.1  mrg
1.1.1.7  mrg    Copyright (C) 1998-2020 Free Software Foundation, Inc.
    1.1  mrg
    1.1  mrg This file is part of GCC.
    1.1  mrg
    1.1  mrg GCC is free software; you can redistribute it and/or modify it
    1.1  mrg under the terms of the GNU General Public License as published by the
    1.1  mrg Free Software Foundation; either version 3, or (at your option) any
    1.1  mrg later version.
    1.1  mrg
    1.1  mrg This file is distributed in the hope that it will be useful, but
    1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of
    1.1  mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.1  mrg General Public License for more details.
    1.1  mrg
    1.1  mrg Under Section 7 of GPL version 3, you are granted additional
    1.1  mrg permissions described in the GCC Runtime Library Exception, version
    1.1  mrg 3.1, as published by the Free Software Foundation.
    1.1  mrg
    1.1  mrg You should have received a copy of the GNU General Public License and
    1.1  mrg a copy of the GCC Runtime Library Exception along with this program;
    1.1  mrg see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    1.1  mrg <http://www.gnu.org/licenses/>.  */
    1.1  mrg
    1.1  mrg
    1.1  mrg /* Note that we provide prototypes for all "const" functions, to attach
    1.1  mrg    the const attribute.  This is necessary in 2.7.2 - adding the
    1.1  mrg    attribute to the function *definition* is a syntax error.
    1.1  mrg     This did not work with e.g. 2.1; back then, the return type had to
    1.1  mrg    be "const".  */
    1.1  mrg
    1.1  mrg #include "config.h"
    1.1  mrg
    1.1  mrg #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 3
    1.1  mrg #define LZ(v) __builtin_clz (v)
    1.1  mrg #endif
    1.1  mrg
1.1.1.2  mrg /* In (at least) the 4.7 series, GCC doesn't automatically choose the
1.1.1.2  mrg    most optimal strategy, possibly related to insufficient modelling of
1.1.1.2  mrg    delay-slot costs.  */
1.1.1.2  mrg #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
1.1.1.2  mrg #define SIGNMULT(s, a) ((s) * (a)) /* Cheap multiplication, better than branch.  */
1.1.1.2  mrg #else
1.1.1.2  mrg #define SIGNMULT(s, a) ((s) < 0 ? -(a) : (a)) /* Branches are still better.  */
1.1.1.2  mrg #endif
    1.1  mrg
    1.1  mrg #if defined (L_udivsi3) || defined (L_divsi3) || defined (L_umodsi3) \
    1.1  mrg     || defined (L_modsi3)
    1.1  mrg /* Result type of divmod worker function.  */
    1.1  mrg struct quot_rem
    1.1  mrg  {
    1.1  mrg    long quot;
    1.1  mrg    long rem;
    1.1  mrg  };
    1.1  mrg
    1.1  mrg /* This is the worker function for div and mod.  It is inlined into the
    1.1  mrg    respective library function.  Parameter A must have bit 31 == 0.  */
    1.1  mrg
    1.1  mrg static __inline__ struct quot_rem
    1.1  mrg do_31div (unsigned long a, unsigned long b)
    1.1  mrg      __attribute__ ((__const__, __always_inline__));
    1.1  mrg
    1.1  mrg static __inline__ struct quot_rem
    1.1  mrg do_31div (unsigned long a, unsigned long b)
    1.1  mrg {
    1.1  mrg   /* Adjust operands and result if a is 31 bits.  */
    1.1  mrg   long extra = 0;
    1.1  mrg   int quot_digits = 0;
    1.1  mrg
    1.1  mrg   if (b == 0)
    1.1  mrg     {
    1.1  mrg       struct quot_rem ret;
    1.1  mrg       ret.quot = 0xffffffff;
    1.1  mrg       ret.rem = 0xffffffff;
    1.1  mrg       return ret;
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   if (a < b)
    1.1  mrg     return (struct quot_rem) { 0, a };
    1.1  mrg
    1.1  mrg #ifdef LZ
    1.1  mrg   if (b <= a)
    1.1  mrg     {
    1.1  mrg       quot_digits = LZ (b) - LZ (a);
    1.1  mrg       quot_digits += (a >= (b << quot_digits));
    1.1  mrg       b <<= quot_digits;
    1.1  mrg     }
    1.1  mrg #else
    1.1  mrg   while (b <= a)
    1.1  mrg     {
    1.1  mrg       b <<= 1;
    1.1  mrg       quot_digits++;
    1.1  mrg     }
    1.1  mrg #endif
    1.1  mrg
    1.1  mrg   /* Is a 31 bits?  Note that bit 31 is handled by the caller.  */
    1.1  mrg   if (a & 0x40000000)
    1.1  mrg     {
    1.1  mrg       /* Then make b:s highest bit max 0x40000000, because it must have
    1.1  mrg 	 been 0x80000000 to be 1 bit higher than a.  */
    1.1  mrg       b >>= 1;
    1.1  mrg
    1.1  mrg       /* Adjust a to be maximum 0x3fffffff, i.e. two upper bits zero.  */
    1.1  mrg       if (a >= b)
    1.1  mrg 	{
    1.1  mrg 	  a -= b;
    1.1  mrg 	  extra = 1 << (quot_digits - 1);
    1.1  mrg 	}
    1.1  mrg       else
    1.1  mrg 	{
    1.1  mrg 	  a -= b >> 1;
    1.1  mrg
    1.1  mrg 	  /* Remember that we adjusted a by subtracting b * 2 ** Something.  */
    1.1  mrg 	  extra = 1 << quot_digits;
    1.1  mrg 	}
    1.1  mrg
    1.1  mrg       /* The number of quotient digits will be one less, because
    1.1  mrg 	 we just adjusted b.  */
    1.1  mrg       quot_digits--;
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   /* Now do the division part.  */
    1.1  mrg
    1.1  mrg   /* Subtract b and add ones to the right when a >= b
    1.1  mrg      i.e. "a - (b - 1) == (a - b) + 1".  */
    1.1  mrg   b--;
    1.1  mrg
1.1.1.7  mrg #define DS __asm__ ("dstep %2,%0" : "=r" (a) : "0" (a), "r" (b)); \
1.1.1.7  mrg  __attribute__ ((__fallthrough__))
    1.1  mrg
    1.1  mrg   switch (quot_digits)
    1.1  mrg     {
    1.1  mrg     case 32: DS; case 31: DS; case 30: DS; case 29: DS;
    1.1  mrg     case 28: DS; case 27: DS; case 26: DS; case 25: DS;
    1.1  mrg     case 24: DS; case 23: DS; case 22: DS; case 21: DS;
    1.1  mrg     case 20: DS; case 19: DS; case 18: DS; case 17: DS;
    1.1  mrg     case 16: DS; case 15: DS; case 14: DS; case 13: DS;
    1.1  mrg     case 12: DS; case 11: DS; case 10: DS; case 9: DS;
    1.1  mrg     case 8: DS; case 7: DS; case 6: DS; case 5: DS;
    1.1  mrg     case 4: DS; case 3: DS; case 2: DS; case 1: DS;
    1.1  mrg     case 0:;
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   {
    1.1  mrg     struct quot_rem ret;
    1.1  mrg     ret.quot = (a & ((1 << quot_digits) - 1)) + extra;
    1.1  mrg     ret.rem = a >> quot_digits;
    1.1  mrg     return ret;
    1.1  mrg   }
    1.1  mrg }
    1.1  mrg
    1.1  mrg #ifdef L_udivsi3
    1.1  mrg unsigned long
    1.1  mrg __Udiv (unsigned long a, unsigned long b) __attribute__ ((__const__));
    1.1  mrg
    1.1  mrg unsigned long
    1.1  mrg __Udiv (unsigned long a, unsigned long b)
    1.1  mrg {
    1.1  mrg   long extra = 0;
    1.1  mrg
    1.1  mrg   /* Adjust operands and result, if a and/or b is 32 bits.  */
    1.1  mrg   /* Effectively: b & 0x80000000.  */
    1.1  mrg   if ((long) b < 0)
    1.1  mrg     return a >= b;
    1.1  mrg
    1.1  mrg   /* Effectively: a & 0x80000000.  */
    1.1  mrg   if ((long) a < 0)
    1.1  mrg     {
    1.1  mrg       int tmp = 0;
    1.1  mrg
    1.1  mrg       if (b == 0)
    1.1  mrg 	return 0xffffffff;
    1.1  mrg #ifdef LZ
    1.1  mrg       tmp = LZ (b);
    1.1  mrg #else
    1.1  mrg       for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
    1.1  mrg 	;
    1.1  mrg
    1.1  mrg       tmp = 31 - tmp;
    1.1  mrg #endif
    1.1  mrg
    1.1  mrg       if ((b << tmp) > a)
    1.1  mrg 	{
    1.1  mrg 	  extra = 1 << (tmp-1);
    1.1  mrg 	  a -= b << (tmp - 1);
    1.1  mrg 	}
    1.1  mrg       else
    1.1  mrg 	{
    1.1  mrg 	  extra = 1 << tmp;
    1.1  mrg 	  a -= b << tmp;
    1.1  mrg 	}
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   return do_31div (a, b).quot+extra;
    1.1  mrg }
    1.1  mrg #endif /* L_udivsi3 */
    1.1  mrg
    1.1  mrg #ifdef L_divsi3
    1.1  mrg long
    1.1  mrg __Div (long a, long b) __attribute__ ((__const__));
    1.1  mrg
    1.1  mrg long
    1.1  mrg __Div (long a, long b)
    1.1  mrg {
    1.1  mrg   long extra = 0;
    1.1  mrg   long sign = (b < 0) ? -1 : 1;
1.1.1.2  mrg   long res;
    1.1  mrg
    1.1  mrg   /* We need to handle a == -2147483648 as expected and must while
    1.1  mrg      doing that avoid producing a sequence like "abs (a) < 0" as GCC
    1.1  mrg      may optimize out the test.  That sequence may not be obvious as
    1.1  mrg      we call inline functions.  Testing for a being negative and
    1.1  mrg      handling (presumably much rarer than positive) enables us to get
    1.1  mrg      a bit of optimization for an (accumulated) reduction of the
    1.1  mrg      penalty of the 0x80000000 special-case.  */
    1.1  mrg   if (a < 0)
    1.1  mrg     {
    1.1  mrg       sign = -sign;
    1.1  mrg
    1.1  mrg       if ((a & 0x7fffffff) == 0)
    1.1  mrg 	{
    1.1  mrg 	  /* We're at 0x80000000.  Tread carefully.  */
1.1.1.2  mrg 	  a -= SIGNMULT (sign, b);
    1.1  mrg 	  extra = sign;
    1.1  mrg 	}
    1.1  mrg       a = -a;
    1.1  mrg     }
    1.1  mrg
1.1.1.2  mrg   res = do_31div (a, __builtin_labs (b)).quot;
1.1.1.2  mrg   return SIGNMULT (sign, res) + extra;
    1.1  mrg }
    1.1  mrg #endif /* L_divsi3 */
    1.1  mrg
    1.1  mrg
    1.1  mrg #ifdef L_umodsi3
    1.1  mrg unsigned long
    1.1  mrg __Umod (unsigned long a, unsigned long b) __attribute__ ((__const__));
    1.1  mrg
    1.1  mrg unsigned long
    1.1  mrg __Umod (unsigned long a, unsigned long b)
    1.1  mrg {
    1.1  mrg   /* Adjust operands and result if a and/or b is 32 bits.  */
    1.1  mrg   if ((long) b < 0)
    1.1  mrg     return a >= b ? a - b : a;
    1.1  mrg
    1.1  mrg   if ((long) a < 0)
    1.1  mrg     {
    1.1  mrg       int tmp = 0;
    1.1  mrg
    1.1  mrg       if (b == 0)
    1.1  mrg 	return a;
    1.1  mrg #ifdef LZ
    1.1  mrg       tmp = LZ (b);
    1.1  mrg #else
    1.1  mrg       for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
    1.1  mrg 	;
    1.1  mrg       tmp = 31 - tmp;
    1.1  mrg #endif
    1.1  mrg
    1.1  mrg       if ((b << tmp) > a)
    1.1  mrg 	{
    1.1  mrg 	  a -= b << (tmp - 1);
    1.1  mrg 	}
    1.1  mrg       else
    1.1  mrg 	{
    1.1  mrg 	  a -= b << tmp;
    1.1  mrg 	}
    1.1  mrg     }
    1.1  mrg
    1.1  mrg   return do_31div (a, b).rem;
    1.1  mrg }
    1.1  mrg #endif /* L_umodsi3 */
    1.1  mrg
    1.1  mrg #ifdef L_modsi3
    1.1  mrg long
    1.1  mrg __Mod (long a, long b) __attribute__ ((__const__));
    1.1  mrg
    1.1  mrg long
    1.1  mrg __Mod (long a, long b)
    1.1  mrg {
    1.1  mrg   long sign = 1;
1.1.1.2  mrg   long res;
    1.1  mrg
    1.1  mrg   /* We need to handle a == -2147483648 as expected and must while
    1.1  mrg      doing that avoid producing a sequence like "abs (a) < 0" as GCC
    1.1  mrg      may optimize out the test.  That sequence may not be obvious as
    1.1  mrg      we call inline functions.  Testing for a being negative and
    1.1  mrg      handling (presumably much rarer than positive) enables us to get
    1.1  mrg      a bit of optimization for an (accumulated) reduction of the
    1.1  mrg      penalty of the 0x80000000 special-case.  */
    1.1  mrg   if (a < 0)
    1.1  mrg     {
    1.1  mrg       sign = -1;
    1.1  mrg       if ((a & 0x7fffffff) == 0)
    1.1  mrg 	/* We're at 0x80000000.  Tread carefully.  */
    1.1  mrg 	a += __builtin_labs (b);
    1.1  mrg       a = -a;
    1.1  mrg     }
    1.1  mrg
1.1.1.2  mrg   res = do_31div (a, __builtin_labs (b)).rem;
1.1.1.2  mrg   return SIGNMULT (sign, res);
    1.1  mrg }
    1.1  mrg #endif /* L_modsi3 */
    1.1  mrg #endif /* L_udivsi3 || L_divsi3 || L_umodsi3 || L_modsi3 */
    1.1  mrg
    1.1  mrg /*
    1.1  mrg  * Local variables:
    1.1  mrg  * eval: (c-set-style "gnu")
    1.1  mrg  * indent-tabs-mode: t
    1.1  mrg  * End:
    1.1  mrg  */