Home | History | Annotate | Line # | Download | only in tune
      1      1.1  mrg /* Alternate implementations of binvert_limb to compare speeds. */
      2      1.1  mrg 
      3      1.1  mrg /*
      4      1.1  mrg Copyright 2000, 2002 Free Software Foundation, Inc.
      5      1.1  mrg 
      6      1.1  mrg This file is part of the GNU MP Library.
      7      1.1  mrg 
      8      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
      9  1.1.1.2  mrg it under the terms of either:
     10  1.1.1.2  mrg 
     11  1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
     12  1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
     13  1.1.1.2  mrg     option) any later version.
     14  1.1.1.2  mrg 
     15  1.1.1.2  mrg or
     16  1.1.1.2  mrg 
     17  1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
     18  1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
     19  1.1.1.2  mrg     later version.
     20  1.1.1.2  mrg 
     21  1.1.1.2  mrg or both in parallel, as here.
     22      1.1  mrg 
     23      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     24      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25  1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26  1.1.1.2  mrg for more details.
     27      1.1  mrg 
     28  1.1.1.2  mrg You should have received copies of the GNU General Public License and the
     29  1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     30  1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
     31      1.1  mrg 
     32      1.1  mrg #include <stdio.h>
     33      1.1  mrg #include "gmp-impl.h"
     34      1.1  mrg #include "longlong.h"
     35      1.1  mrg #include "speed.h"
     36      1.1  mrg 
     37      1.1  mrg 
     38      1.1  mrg /* Like the standard version in gmp-impl.h, but with the expressions using a
     39      1.1  mrg    "1-" form.  This has the same number of steps, but "1-" is on the
     40      1.1  mrg    dependent chain, whereas the "2*" in the standard version isn't.
     41      1.1  mrg    Depending on the CPU this should be the same or a touch slower.  */
     42      1.1  mrg 
     43      1.1  mrg #if GMP_LIMB_BITS <= 32
     44      1.1  mrg #define binvert_limb_mul1(inv,n)                                \
     45      1.1  mrg   do {                                                          \
     46      1.1  mrg     mp_limb_t  __n = (n);                                       \
     47      1.1  mrg     mp_limb_t  __inv;                                           \
     48      1.1  mrg     ASSERT ((__n & 1) == 1);                                    \
     49      1.1  mrg     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
     50      1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
     51      1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
     52      1.1  mrg     ASSERT (__inv * __n == 1);                                  \
     53      1.1  mrg     (inv) = __inv;                                              \
     54      1.1  mrg   } while (0)
     55      1.1  mrg #endif
     56      1.1  mrg 
     57      1.1  mrg #if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
     58      1.1  mrg #define binvert_limb_mul1(inv,n)                                \
     59      1.1  mrg   do {                                                          \
     60      1.1  mrg     mp_limb_t  __n = (n);                                       \
     61      1.1  mrg     mp_limb_t  __inv;                                           \
     62      1.1  mrg     ASSERT ((__n & 1) == 1);                                    \
     63      1.1  mrg     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
     64      1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
     65      1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
     66      1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 64 */        \
     67      1.1  mrg     ASSERT (__inv * __n == 1);                                  \
     68      1.1  mrg     (inv) = __inv;                                              \
     69      1.1  mrg   } while (0)
     70      1.1  mrg #endif
     71      1.1  mrg 
     72      1.1  mrg 
     73      1.1  mrg /* The loop based version used in GMP 3.0 and earlier.  Usually slower than
     74      1.1  mrg    multiplying, due to the number of steps that must be performed.  Much
     75      1.1  mrg    slower when the processor has a good multiply.  */
     76      1.1  mrg 
     77      1.1  mrg #define binvert_limb_loop(inv,n)                \
     78      1.1  mrg   do {                                          \
     79      1.1  mrg     mp_limb_t  __v = (n);                       \
     80      1.1  mrg     mp_limb_t  __v_orig = __v;                  \
     81      1.1  mrg     mp_limb_t  __make_zero = 1;                 \
     82      1.1  mrg     mp_limb_t  __two_i = 1;                     \
     83      1.1  mrg     mp_limb_t  __v_inv = 0;                     \
     84      1.1  mrg                                                 \
     85      1.1  mrg     ASSERT ((__v & 1) == 1);                    \
     86      1.1  mrg                                                 \
     87      1.1  mrg     do                                          \
     88      1.1  mrg       {                                         \
     89      1.1  mrg         while ((__two_i & __make_zero) == 0)    \
     90      1.1  mrg           __two_i <<= 1, __v <<= 1;             \
     91      1.1  mrg         __v_inv += __two_i;                     \
     92      1.1  mrg         __make_zero -= __v;                     \
     93      1.1  mrg       }                                         \
     94      1.1  mrg     while (__make_zero);                        \
     95      1.1  mrg                                                 \
     96      1.1  mrg     ASSERT (__v_orig * __v_inv == 1);           \
     97      1.1  mrg     (inv) = __v_inv;                            \
     98      1.1  mrg   } while (0)
     99      1.1  mrg 
    100      1.1  mrg 
    101      1.1  mrg /* Another loop based version with conditionals, but doing a fixed number of
    102      1.1  mrg    steps. */
    103      1.1  mrg 
    104      1.1  mrg #define binvert_limb_cond(inv,n)                \
    105      1.1  mrg   do {                                          \
    106      1.1  mrg     mp_limb_t  __n = (n);                       \
    107      1.1  mrg     mp_limb_t  __rem = (1 - __n) >> 1;          \
    108      1.1  mrg     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;        \
    109      1.1  mrg     int        __count;                         \
    110      1.1  mrg                                                 \
    111      1.1  mrg     ASSERT ((__n & 1) == 1);                    \
    112      1.1  mrg                                                 \
    113      1.1  mrg     __count = GMP_LIMB_BITS-1;               \
    114      1.1  mrg     do                                          \
    115      1.1  mrg       {                                         \
    116      1.1  mrg         __inv >>= 1;                            \
    117      1.1  mrg         if (__rem & 1)                          \
    118      1.1  mrg           {                                     \
    119      1.1  mrg             __inv |= GMP_LIMB_HIGHBIT;          \
    120      1.1  mrg             __rem -= __n;                       \
    121      1.1  mrg           }                                     \
    122      1.1  mrg         __rem >>= 1;                            \
    123      1.1  mrg       }                                         \
    124      1.1  mrg     while (-- __count);                         \
    125      1.1  mrg                                                 \
    126      1.1  mrg     ASSERT (__inv * __n == 1);                  \
    127      1.1  mrg     (inv) = __inv;                              \
    128      1.1  mrg   } while (0)
    129      1.1  mrg 
    130      1.1  mrg 
    131      1.1  mrg /* Another loop based bitwise version, but purely arithmetic, no
    132      1.1  mrg    conditionals. */
    133      1.1  mrg 
    134      1.1  mrg #define binvert_limb_arith(inv,n)                                       \
    135      1.1  mrg   do {                                                                  \
    136      1.1  mrg     mp_limb_t  __n = (n);                                               \
    137      1.1  mrg     mp_limb_t  __rem = (1 - __n) >> 1;                                  \
    138      1.1  mrg     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;                                \
    139      1.1  mrg     mp_limb_t  __lowbit;                                                \
    140      1.1  mrg     int        __count;                                                 \
    141      1.1  mrg                                                                         \
    142      1.1  mrg     ASSERT ((__n & 1) == 1);                                            \
    143      1.1  mrg                                                                         \
    144      1.1  mrg     __count = GMP_LIMB_BITS-1;                                       \
    145      1.1  mrg     do                                                                  \
    146      1.1  mrg       {                                                                 \
    147      1.1  mrg         __lowbit = __rem & 1;                                           \
    148      1.1  mrg         __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1));      \
    149      1.1  mrg         __rem = (__rem - (__n & -__lowbit)) >> 1;                       \
    150      1.1  mrg       }                                                                 \
    151      1.1  mrg     while (-- __count);                                                 \
    152      1.1  mrg                                                                         \
    153      1.1  mrg     ASSERT (__inv * __n == 1);                                          \
    154      1.1  mrg     (inv) = __inv;                                                      \
    155      1.1  mrg   } while (0)
    156      1.1  mrg 
    157      1.1  mrg 
    158      1.1  mrg double
    159      1.1  mrg speed_binvert_limb_mul1 (struct speed_params *s)
    160      1.1  mrg {
    161      1.1  mrg   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1);
    162      1.1  mrg }
    163      1.1  mrg double
    164      1.1  mrg speed_binvert_limb_loop (struct speed_params *s)
    165      1.1  mrg {
    166      1.1  mrg   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop);
    167      1.1  mrg }
    168      1.1  mrg double
    169      1.1  mrg speed_binvert_limb_cond (struct speed_params *s)
    170      1.1  mrg {
    171      1.1  mrg   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond);
    172      1.1  mrg }
    173      1.1  mrg double
    174      1.1  mrg speed_binvert_limb_arith (struct speed_params *s)
    175      1.1  mrg {
    176      1.1  mrg   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith);
    177      1.1  mrg }
    178