Home | History | Annotate | Line # | Download | only in cris
arit.c revision 1.10
      1 /* Signed and unsigned multiplication and division and modulus for CRIS.
      2    Contributed by Axis Communications.
      3    Written by Hans-Peter Nilsson <hp (at) axis.se>, c:a 1992.
      4 
      5    Copyright (C) 1998-2022 Free Software Foundation, Inc.
      6 
      7 This file is part of GCC.
      8 
      9 GCC is free software; you can redistribute it and/or modify it
     10 under the terms of the GNU General Public License as published by the
     11 Free Software Foundation; either version 3, or (at your option) any
     12 later version.
     13 
     14 This file is distributed in the hope that it will be useful, but
     15 WITHOUT ANY WARRANTY; without even the implied warranty of
     16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17 General Public License for more details.
     18 
     19 Under Section 7 of GPL version 3, you are granted additional
     20 permissions described in the GCC Runtime Library Exception, version
     21 3.1, as published by the Free Software Foundation.
     22 
     23 You should have received a copy of the GNU General Public License and
     24 a copy of the GCC Runtime Library Exception along with this program;
     25 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     26 <http://www.gnu.org/licenses/>.  */
     27 
     28 
     29 /* Note that we provide prototypes for all "const" functions, to attach
     30    the const attribute.  This is necessary in 2.7.2 - adding the
     31    attribute to the function *definition* is a syntax error.
     32     This did not work with e.g. 2.1; back then, the return type had to
     33    be "const".  */
     34 
     35 #include "config.h"
     36 
     37 #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 3
     38 #define LZ(v) __builtin_clz (v)
     39 #endif
     40 
     41 /* In (at least) the 4.7 series, GCC doesn't automatically choose the
     42    most optimal strategy, possibly related to insufficient modelling of
     43    delay-slot costs.  */
     44 #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
     45 #define SIGNMULT(s, a) ((s) * (a)) /* Cheap multiplication, better than branch.  */
     46 #else
     47 #define SIGNMULT(s, a) ((s) < 0 ? -(a) : (a)) /* Branches are still better.  */
     48 #endif
     49 
     50 #if defined (L_udivsi3) || defined (L_divsi3) || defined (L_umodsi3) \
     51     || defined (L_modsi3)
     52 /* Result type of divmod worker function.  */
     53 struct quot_rem
     54  {
     55    long quot;
     56    long rem;
     57  };
     58 
     59 /* This is the worker function for div and mod.  It is inlined into the
     60    respective library function.  Parameter A must have bit 31 == 0.  */
     61 
     62 static __inline__ struct quot_rem
     63 do_31div (unsigned long a, unsigned long b)
     64      __attribute__ ((__const__, __always_inline__));
     65 
     66 static __inline__ struct quot_rem
     67 do_31div (unsigned long a, unsigned long b)
     68 {
     69   /* Adjust operands and result if a is 31 bits.  */
     70   long extra = 0;
     71   int quot_digits = 0;
     72 
     73   if (b == 0)
     74     {
     75       struct quot_rem ret;
     76       ret.quot = 0xffffffff;
     77       ret.rem = 0xffffffff;
     78       return ret;
     79     }
     80 
     81   if (a < b)
     82     return (struct quot_rem) { 0, a };
     83 
     84 #ifdef LZ
     85   if (b <= a)
     86     {
     87       quot_digits = LZ (b) - LZ (a);
     88       quot_digits += (a >= (b << quot_digits));
     89       b <<= quot_digits;
     90     }
     91 #else
     92   while (b <= a)
     93     {
     94       b <<= 1;
     95       quot_digits++;
     96     }
     97 #endif
     98 
     99   /* Is a 31 bits?  Note that bit 31 is handled by the caller.  */
    100   if (a & 0x40000000)
    101     {
    102       /* Then make b:s highest bit max 0x40000000, because it must have
    103 	 been 0x80000000 to be 1 bit higher than a.  */
    104       b >>= 1;
    105 
    106       /* Adjust a to be maximum 0x3fffffff, i.e. two upper bits zero.  */
    107       if (a >= b)
    108 	{
    109 	  a -= b;
    110 	  extra = 1 << (quot_digits - 1);
    111 	}
    112       else
    113 	{
    114 	  a -= b >> 1;
    115 
    116 	  /* Remember that we adjusted a by subtracting b * 2 ** Something.  */
    117 	  extra = 1 << quot_digits;
    118 	}
    119 
    120       /* The number of quotient digits will be one less, because
    121 	 we just adjusted b.  */
    122       quot_digits--;
    123     }
    124 
    125   /* Now do the division part.  */
    126 
    127   /* Subtract b and add ones to the right when a >= b
    128      i.e. "a - (b - 1) == (a - b) + 1".  */
    129   b--;
    130 
    131 #define DS __asm__ ("dstep %2,%0" : "=r" (a) : "0" (a), "r" (b)); \
    132  __attribute__ ((__fallthrough__))
    133 
    134   switch (quot_digits)
    135     {
    136     case 32: DS; case 31: DS; case 30: DS; case 29: DS;
    137     case 28: DS; case 27: DS; case 26: DS; case 25: DS;
    138     case 24: DS; case 23: DS; case 22: DS; case 21: DS;
    139     case 20: DS; case 19: DS; case 18: DS; case 17: DS;
    140     case 16: DS; case 15: DS; case 14: DS; case 13: DS;
    141     case 12: DS; case 11: DS; case 10: DS; case 9: DS;
    142     case 8: DS; case 7: DS; case 6: DS; case 5: DS;
    143     case 4: DS; case 3: DS; case 2: DS; case 1: DS;
    144     case 0:;
    145     }
    146 
    147   {
    148     struct quot_rem ret;
    149     ret.quot = (a & ((1 << quot_digits) - 1)) + extra;
    150     ret.rem = a >> quot_digits;
    151     return ret;
    152   }
    153 }
    154 
    155 #ifdef L_udivsi3
    156 unsigned long
    157 __Udiv (unsigned long a, unsigned long b) __attribute__ ((__const__));
    158 
    159 unsigned long
    160 __Udiv (unsigned long a, unsigned long b)
    161 {
    162   long extra = 0;
    163 
    164   /* Adjust operands and result, if a and/or b is 32 bits.  */
    165   /* Effectively: b & 0x80000000.  */
    166   if ((long) b < 0)
    167     return a >= b;
    168 
    169   /* Effectively: a & 0x80000000.  */
    170   if ((long) a < 0)
    171     {
    172       int tmp = 0;
    173 
    174       if (b == 0)
    175 	return 0xffffffff;
    176 #ifdef LZ
    177       tmp = LZ (b);
    178 #else
    179       for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
    180 	;
    181 
    182       tmp = 31 - tmp;
    183 #endif
    184 
    185       if ((b << tmp) > a)
    186 	{
    187 	  extra = 1 << (tmp-1);
    188 	  a -= b << (tmp - 1);
    189 	}
    190       else
    191 	{
    192 	  extra = 1 << tmp;
    193 	  a -= b << tmp;
    194 	}
    195     }
    196 
    197   return do_31div (a, b).quot+extra;
    198 }
    199 #endif /* L_udivsi3 */
    200 
    201 #ifdef L_divsi3
    202 long
    203 __Div (long a, long b) __attribute__ ((__const__));
    204 
    205 long
    206 __Div (long a, long b)
    207 {
    208   long extra = 0;
    209   long sign = (b < 0) ? -1 : 1;
    210   long res;
    211 
    212   /* We need to handle a == -2147483648 as expected and must while
    213      doing that avoid producing a sequence like "abs (a) < 0" as GCC
    214      may optimize out the test.  That sequence may not be obvious as
    215      we call inline functions.  Testing for a being negative and
    216      handling (presumably much rarer than positive) enables us to get
    217      a bit of optimization for an (accumulated) reduction of the
    218      penalty of the 0x80000000 special-case.  */
    219   if (a < 0)
    220     {
    221       sign = -sign;
    222 
    223       if ((a & 0x7fffffff) == 0)
    224 	{
    225 	  /* We're at 0x80000000.  Tread carefully.  */
    226 	  a -= SIGNMULT (sign, b);
    227 	  extra = sign;
    228 	}
    229       a = -a;
    230     }
    231 
    232   res = do_31div (a, __builtin_labs (b)).quot;
    233   return SIGNMULT (sign, res) + extra;
    234 }
    235 #endif /* L_divsi3 */
    236 
    237 
    238 #ifdef L_umodsi3
    239 unsigned long
    240 __Umod (unsigned long a, unsigned long b) __attribute__ ((__const__));
    241 
    242 unsigned long
    243 __Umod (unsigned long a, unsigned long b)
    244 {
    245   /* Adjust operands and result if a and/or b is 32 bits.  */
    246   if ((long) b < 0)
    247     return a >= b ? a - b : a;
    248 
    249   if ((long) a < 0)
    250     {
    251       int tmp = 0;
    252 
    253       if (b == 0)
    254 	return a;
    255 #ifdef LZ
    256       tmp = LZ (b);
    257 #else
    258       for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
    259 	;
    260       tmp = 31 - tmp;
    261 #endif
    262 
    263       if ((b << tmp) > a)
    264 	{
    265 	  a -= b << (tmp - 1);
    266 	}
    267       else
    268 	{
    269 	  a -= b << tmp;
    270 	}
    271     }
    272 
    273   return do_31div (a, b).rem;
    274 }
    275 #endif /* L_umodsi3 */
    276 
    277 #ifdef L_modsi3
    278 long
    279 __Mod (long a, long b) __attribute__ ((__const__));
    280 
    281 long
    282 __Mod (long a, long b)
    283 {
    284   long sign = 1;
    285   long res;
    286 
    287   /* We need to handle a == -2147483648 as expected and must while
    288      doing that avoid producing a sequence like "abs (a) < 0" as GCC
    289      may optimize out the test.  That sequence may not be obvious as
    290      we call inline functions.  Testing for a being negative and
    291      handling (presumably much rarer than positive) enables us to get
    292      a bit of optimization for an (accumulated) reduction of the
    293      penalty of the 0x80000000 special-case.  */
    294   if (a < 0)
    295     {
    296       sign = -1;
    297       if ((a & 0x7fffffff) == 0)
    298 	/* We're at 0x80000000.  Tread carefully.  */
    299 	a += __builtin_labs (b);
    300       a = -a;
    301     }
    302 
    303   res = do_31div (a, __builtin_labs (b)).rem;
    304   return SIGNMULT (sign, res);
    305 }
    306 #endif /* L_modsi3 */
    307 #endif /* L_udivsi3 || L_divsi3 || L_umodsi3 || L_modsi3 */
    308 
    309 /*
    310  * Local variables:
    311  * eval: (c-set-style "gnu")
    312  * indent-tabs-mode: t
    313  * End:
    314  */
    315