Home | History | Annotate | Line # | Download | only in cris
arit.c revision 1.1.1.8
      1 /* Signed and unsigned multiplication and division and modulus for CRIS.
      2    Contributed by Axis Communications.
      3    Written by Hans-Peter Nilsson <hp (at) axis.se>, c:a 1992.
      4 
      5    Copyright (C) 1998-2019 Free Software Foundation, Inc.
      6 
      7 This file is part of GCC.
      8 
      9 GCC is free software; you can redistribute it and/or modify it
     10 under the terms of the GNU General Public License as published by the
     11 Free Software Foundation; either version 3, or (at your option) any
     12 later version.
     13 
     14 This file is distributed in the hope that it will be useful, but
     15 WITHOUT ANY WARRANTY; without even the implied warranty of
     16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17 General Public License for more details.
     18 
     19 Under Section 7 of GPL version 3, you are granted additional
     20 permissions described in the GCC Runtime Library Exception, version
     21 3.1, as published by the Free Software Foundation.
     22 
     23 You should have received a copy of the GNU General Public License and
     24 a copy of the GCC Runtime Library Exception along with this program;
     25 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     26 <http://www.gnu.org/licenses/>.  */
     27 
     28 
     29 /* Note that we provide prototypes for all "const" functions, to attach
     30    the const attribute.  This is necessary in 2.7.2 - adding the
     31    attribute to the function *definition* is a syntax error.
     32     This did not work with e.g. 2.1; back then, the return type had to
     33    be "const".  */
     34 
     35 #include "config.h"
     36 
     37 #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 3
     38 #define LZ(v) __builtin_clz (v)
     39 #endif
     40 
     41 /* In (at least) the 4.7 series, GCC doesn't automatically choose the
     42    most optimal strategy, possibly related to insufficient modelling of
     43    delay-slot costs.  */
     44 #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
     45 #define SIGNMULT(s, a) ((s) * (a)) /* Cheap multiplication, better than branch.  */
     46 #else
     47 #define SIGNMULT(s, a) ((s) < 0 ? -(a) : (a)) /* Branches are still better.  */
     48 #endif
     49 
     50 #if defined (L_udivsi3) || defined (L_divsi3) || defined (L_umodsi3) \
     51     || defined (L_modsi3)
     52 /* Result type of divmod worker function.  */
     53 struct quot_rem
     54  {
     55    long quot;
     56    long rem;
     57  };
     58 
     59 /* This is the worker function for div and mod.  It is inlined into the
     60    respective library function.  Parameter A must have bit 31 == 0.  */
     61 
     62 static __inline__ struct quot_rem
     63 do_31div (unsigned long a, unsigned long b)
     64      __attribute__ ((__const__, __always_inline__));
     65 
     66 static __inline__ struct quot_rem
     67 do_31div (unsigned long a, unsigned long b)
     68 {
     69   /* Adjust operands and result if a is 31 bits.  */
     70   long extra = 0;
     71   int quot_digits = 0;
     72 
     73   if (b == 0)
     74     {
     75       struct quot_rem ret;
     76       ret.quot = 0xffffffff;
     77       ret.rem = 0xffffffff;
     78       return ret;
     79     }
     80 
     81   if (a < b)
     82     return (struct quot_rem) { 0, a };
     83 
     84 #ifdef LZ
     85   if (b <= a)
     86     {
     87       quot_digits = LZ (b) - LZ (a);
     88       quot_digits += (a >= (b << quot_digits));
     89       b <<= quot_digits;
     90     }
     91 #else
     92   while (b <= a)
     93     {
     94       b <<= 1;
     95       quot_digits++;
     96     }
     97 #endif
     98 
     99   /* Is a 31 bits?  Note that bit 31 is handled by the caller.  */
    100   if (a & 0x40000000)
    101     {
    102       /* Then make b:s highest bit max 0x40000000, because it must have
    103 	 been 0x80000000 to be 1 bit higher than a.  */
    104       b >>= 1;
    105 
    106       /* Adjust a to be maximum 0x3fffffff, i.e. two upper bits zero.  */
    107       if (a >= b)
    108 	{
    109 	  a -= b;
    110 	  extra = 1 << (quot_digits - 1);
    111 	}
    112       else
    113 	{
    114 	  a -= b >> 1;
    115 
    116 	  /* Remember that we adjusted a by subtracting b * 2 ** Something.  */
    117 	  extra = 1 << quot_digits;
    118 	}
    119 
    120       /* The number of quotient digits will be one less, because
    121 	 we just adjusted b.  */
    122       quot_digits--;
    123     }
    124 
    125   /* Now do the division part.  */
    126 
    127   /* Subtract b and add ones to the right when a >= b
    128      i.e. "a - (b - 1) == (a - b) + 1".  */
    129   b--;
    130 
    131 #define DS __asm__ ("dstep %2,%0" : "=r" (a) : "0" (a), "r" (b))
    132 
    133   switch (quot_digits)
    134     {
    135     case 32: DS; case 31: DS; case 30: DS; case 29: DS;
    136     case 28: DS; case 27: DS; case 26: DS; case 25: DS;
    137     case 24: DS; case 23: DS; case 22: DS; case 21: DS;
    138     case 20: DS; case 19: DS; case 18: DS; case 17: DS;
    139     case 16: DS; case 15: DS; case 14: DS; case 13: DS;
    140     case 12: DS; case 11: DS; case 10: DS; case 9: DS;
    141     case 8: DS; case 7: DS; case 6: DS; case 5: DS;
    142     case 4: DS; case 3: DS; case 2: DS; case 1: DS;
    143     case 0:;
    144     }
    145 
    146   {
    147     struct quot_rem ret;
    148     ret.quot = (a & ((1 << quot_digits) - 1)) + extra;
    149     ret.rem = a >> quot_digits;
    150     return ret;
    151   }
    152 }
    153 
    154 #ifdef L_udivsi3
    155 unsigned long
    156 __Udiv (unsigned long a, unsigned long b) __attribute__ ((__const__));
    157 
    158 unsigned long
    159 __Udiv (unsigned long a, unsigned long b)
    160 {
    161   long extra = 0;
    162 
    163   /* Adjust operands and result, if a and/or b is 32 bits.  */
    164   /* Effectively: b & 0x80000000.  */
    165   if ((long) b < 0)
    166     return a >= b;
    167 
    168   /* Effectively: a & 0x80000000.  */
    169   if ((long) a < 0)
    170     {
    171       int tmp = 0;
    172 
    173       if (b == 0)
    174 	return 0xffffffff;
    175 #ifdef LZ
    176       tmp = LZ (b);
    177 #else
    178       for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
    179 	;
    180 
    181       tmp = 31 - tmp;
    182 #endif
    183 
    184       if ((b << tmp) > a)
    185 	{
    186 	  extra = 1 << (tmp-1);
    187 	  a -= b << (tmp - 1);
    188 	}
    189       else
    190 	{
    191 	  extra = 1 << tmp;
    192 	  a -= b << tmp;
    193 	}
    194     }
    195 
    196   return do_31div (a, b).quot+extra;
    197 }
    198 #endif /* L_udivsi3 */
    199 
    200 #ifdef L_divsi3
    201 long
    202 __Div (long a, long b) __attribute__ ((__const__));
    203 
    204 long
    205 __Div (long a, long b)
    206 {
    207   long extra = 0;
    208   long sign = (b < 0) ? -1 : 1;
    209   long res;
    210 
    211   /* We need to handle a == -2147483648 as expected and must while
    212      doing that avoid producing a sequence like "abs (a) < 0" as GCC
    213      may optimize out the test.  That sequence may not be obvious as
    214      we call inline functions.  Testing for a being negative and
    215      handling (presumably much rarer than positive) enables us to get
    216      a bit of optimization for an (accumulated) reduction of the
    217      penalty of the 0x80000000 special-case.  */
    218   if (a < 0)
    219     {
    220       sign = -sign;
    221 
    222       if ((a & 0x7fffffff) == 0)
    223 	{
    224 	  /* We're at 0x80000000.  Tread carefully.  */
    225 	  a -= SIGNMULT (sign, b);
    226 	  extra = sign;
    227 	}
    228       a = -a;
    229     }
    230 
    231   res = do_31div (a, __builtin_labs (b)).quot;
    232   return SIGNMULT (sign, res) + extra;
    233 }
    234 #endif /* L_divsi3 */
    235 
    236 
    237 #ifdef L_umodsi3
    238 unsigned long
    239 __Umod (unsigned long a, unsigned long b) __attribute__ ((__const__));
    240 
    241 unsigned long
    242 __Umod (unsigned long a, unsigned long b)
    243 {
    244   /* Adjust operands and result if a and/or b is 32 bits.  */
    245   if ((long) b < 0)
    246     return a >= b ? a - b : a;
    247 
    248   if ((long) a < 0)
    249     {
    250       int tmp = 0;
    251 
    252       if (b == 0)
    253 	return a;
    254 #ifdef LZ
    255       tmp = LZ (b);
    256 #else
    257       for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
    258 	;
    259       tmp = 31 - tmp;
    260 #endif
    261 
    262       if ((b << tmp) > a)
    263 	{
    264 	  a -= b << (tmp - 1);
    265 	}
    266       else
    267 	{
    268 	  a -= b << tmp;
    269 	}
    270     }
    271 
    272   return do_31div (a, b).rem;
    273 }
    274 #endif /* L_umodsi3 */
    275 
    276 #ifdef L_modsi3
    277 long
    278 __Mod (long a, long b) __attribute__ ((__const__));
    279 
    280 long
    281 __Mod (long a, long b)
    282 {
    283   long sign = 1;
    284   long res;
    285 
    286   /* We need to handle a == -2147483648 as expected and must while
    287      doing that avoid producing a sequence like "abs (a) < 0" as GCC
    288      may optimize out the test.  That sequence may not be obvious as
    289      we call inline functions.  Testing for a being negative and
    290      handling (presumably much rarer than positive) enables us to get
    291      a bit of optimization for an (accumulated) reduction of the
    292      penalty of the 0x80000000 special-case.  */
    293   if (a < 0)
    294     {
    295       sign = -1;
    296       if ((a & 0x7fffffff) == 0)
    297 	/* We're at 0x80000000.  Tread carefully.  */
    298 	a += __builtin_labs (b);
    299       a = -a;
    300     }
    301 
    302   res = do_31div (a, __builtin_labs (b)).rem;
    303   return SIGNMULT (sign, res);
    304 }
    305 #endif /* L_modsi3 */
    306 #endif /* L_udivsi3 || L_divsi3 || L_umodsi3 || L_modsi3 */
    307 
    308 /*
    309  * Local variables:
    310  * eval: (c-set-style "gnu")
    311  * indent-tabs-mode: t
    312  * End:
    313  */
    314