dist/tune/modlinv.c

    1.1  mrg /* Alternate implementations of binvert_limb to compare speeds. */
    1.1  mrg
    1.1  mrg /*
    1.1  mrg Copyright 2000, 2002 Free Software Foundation, Inc.
    1.1  mrg
    1.1  mrg This file is part of the GNU MP Library.
    1.1  mrg
    1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2  mrg it under the terms of either:
1.1.1.2  mrg
1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
1.1.1.2  mrg     option) any later version.
1.1.1.2  mrg
1.1.1.2  mrg or
1.1.1.2  mrg
1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
1.1.1.2  mrg     later version.
1.1.1.2  mrg
1.1.1.2  mrg or both in parallel, as here.
    1.1  mrg
    1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
    1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1.1.1.2  mrg for more details.
    1.1  mrg
1.1.1.2  mrg You should have received copies of the GNU General Public License and the
1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
    1.1  mrg
    1.1  mrg #include <stdio.h>
    1.1  mrg #include "gmp-impl.h"
    1.1  mrg #include "longlong.h"
    1.1  mrg #include "speed.h"
    1.1  mrg
    1.1  mrg
    1.1  mrg /* Like the standard version in gmp-impl.h, but with the expressions using a
    1.1  mrg    "1-" form.  This has the same number of steps, but "1-" is on the
    1.1  mrg    dependent chain, whereas the "2*" in the standard version isn't.
    1.1  mrg    Depending on the CPU this should be the same or a touch slower.  */
    1.1  mrg
    1.1  mrg #if GMP_LIMB_BITS <= 32
    1.1  mrg #define binvert_limb_mul1(inv,n)                                \
    1.1  mrg   do {                                                          \
    1.1  mrg     mp_limb_t  __n = (n);                                       \
    1.1  mrg     mp_limb_t  __inv;                                           \
    1.1  mrg     ASSERT ((__n & 1) == 1);                                    \
    1.1  mrg     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
    1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
    1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
    1.1  mrg     ASSERT (__inv * __n == 1);                                  \
    1.1  mrg     (inv) = __inv;                                              \
    1.1  mrg   } while (0)
    1.1  mrg #endif
    1.1  mrg
    1.1  mrg #if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
    1.1  mrg #define binvert_limb_mul1(inv,n)                                \
    1.1  mrg   do {                                                          \
    1.1  mrg     mp_limb_t  __n = (n);                                       \
    1.1  mrg     mp_limb_t  __inv;                                           \
    1.1  mrg     ASSERT ((__n & 1) == 1);                                    \
    1.1  mrg     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
    1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
    1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
    1.1  mrg     __inv = (1 - __n * __inv) * __inv + __inv;  /* 64 */        \
    1.1  mrg     ASSERT (__inv * __n == 1);                                  \
    1.1  mrg     (inv) = __inv;                                              \
    1.1  mrg   } while (0)
    1.1  mrg #endif
    1.1  mrg
    1.1  mrg
    1.1  mrg /* The loop based version used in GMP 3.0 and earlier.  Usually slower than
    1.1  mrg    multiplying, due to the number of steps that must be performed.  Much
    1.1  mrg    slower when the processor has a good multiply.  */
    1.1  mrg
    1.1  mrg #define binvert_limb_loop(inv,n)                \
    1.1  mrg   do {                                          \
    1.1  mrg     mp_limb_t  __v = (n);                       \
    1.1  mrg     mp_limb_t  __v_orig = __v;                  \
    1.1  mrg     mp_limb_t  __make_zero = 1;                 \
    1.1  mrg     mp_limb_t  __two_i = 1;                     \
    1.1  mrg     mp_limb_t  __v_inv = 0;                     \
    1.1  mrg                                                 \
    1.1  mrg     ASSERT ((__v & 1) == 1);                    \
    1.1  mrg                                                 \
    1.1  mrg     do                                          \
    1.1  mrg       {                                         \
    1.1  mrg         while ((__two_i & __make_zero) == 0)    \
    1.1  mrg           __two_i <<= 1, __v <<= 1;             \
    1.1  mrg         __v_inv += __two_i;                     \
    1.1  mrg         __make_zero -= __v;                     \
    1.1  mrg       }                                         \
    1.1  mrg     while (__make_zero);                        \
    1.1  mrg                                                 \
    1.1  mrg     ASSERT (__v_orig * __v_inv == 1);           \
    1.1  mrg     (inv) = __v_inv;                            \
    1.1  mrg   } while (0)
    1.1  mrg
    1.1  mrg
    1.1  mrg /* Another loop based version with conditionals, but doing a fixed number of
    1.1  mrg    steps. */
    1.1  mrg
    1.1  mrg #define binvert_limb_cond(inv,n)                \
    1.1  mrg   do {                                          \
    1.1  mrg     mp_limb_t  __n = (n);                       \
    1.1  mrg     mp_limb_t  __rem = (1 - __n) >> 1;          \
    1.1  mrg     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;        \
    1.1  mrg     int        __count;                         \
    1.1  mrg                                                 \
    1.1  mrg     ASSERT ((__n & 1) == 1);                    \
    1.1  mrg                                                 \
    1.1  mrg     __count = GMP_LIMB_BITS-1;               \
    1.1  mrg     do                                          \
    1.1  mrg       {                                         \
    1.1  mrg         __inv >>= 1;                            \
    1.1  mrg         if (__rem & 1)                          \
    1.1  mrg           {                                     \
    1.1  mrg             __inv |= GMP_LIMB_HIGHBIT;          \
    1.1  mrg             __rem -= __n;                       \
    1.1  mrg           }                                     \
    1.1  mrg         __rem >>= 1;                            \
    1.1  mrg       }                                         \
    1.1  mrg     while (-- __count);                         \
    1.1  mrg                                                 \
    1.1  mrg     ASSERT (__inv * __n == 1);                  \
    1.1  mrg     (inv) = __inv;                              \
    1.1  mrg   } while (0)
    1.1  mrg
    1.1  mrg
    1.1  mrg /* Another loop based bitwise version, but purely arithmetic, no
    1.1  mrg    conditionals. */
    1.1  mrg
    1.1  mrg #define binvert_limb_arith(inv,n)                                       \
    1.1  mrg   do {                                                                  \
    1.1  mrg     mp_limb_t  __n = (n);                                               \
    1.1  mrg     mp_limb_t  __rem = (1 - __n) >> 1;                                  \
    1.1  mrg     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;                                \
    1.1  mrg     mp_limb_t  __lowbit;                                                \
    1.1  mrg     int        __count;                                                 \
    1.1  mrg                                                                         \
    1.1  mrg     ASSERT ((__n & 1) == 1);                                            \
    1.1  mrg                                                                         \
    1.1  mrg     __count = GMP_LIMB_BITS-1;                                       \
    1.1  mrg     do                                                                  \
    1.1  mrg       {                                                                 \
    1.1  mrg         __lowbit = __rem & 1;                                           \
    1.1  mrg         __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1));      \
    1.1  mrg         __rem = (__rem - (__n & -__lowbit)) >> 1;                       \
    1.1  mrg       }                                                                 \
    1.1  mrg     while (-- __count);                                                 \
    1.1  mrg                                                                         \
    1.1  mrg     ASSERT (__inv * __n == 1);                                          \
    1.1  mrg     (inv) = __inv;                                                      \
    1.1  mrg   } while (0)
    1.1  mrg
    1.1  mrg
    1.1  mrg double
    1.1  mrg speed_binvert_limb_mul1 (struct speed_params *s)
    1.1  mrg {
    1.1  mrg   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1);
    1.1  mrg }
    1.1  mrg double
    1.1  mrg speed_binvert_limb_loop (struct speed_params *s)
    1.1  mrg {
    1.1  mrg   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop);
    1.1  mrg }
    1.1  mrg double
    1.1  mrg speed_binvert_limb_cond (struct speed_params *s)
    1.1  mrg {
    1.1  mrg   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond);
    1.1  mrg }
    1.1  mrg double
    1.1  mrg speed_binvert_limb_arith (struct speed_params *s)
    1.1  mrg {
    1.1  mrg   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith);
    1.1  mrg }