1 1.1 mrg /* Alternate implementations of binvert_limb to compare speeds. */ 2 1.1 mrg 3 1.1 mrg /* 4 1.1 mrg Copyright 2000, 2002 Free Software Foundation, Inc. 5 1.1 mrg 6 1.1 mrg This file is part of the GNU MP Library. 7 1.1 mrg 8 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 9 1.1.1.2 mrg it under the terms of either: 10 1.1.1.2 mrg 11 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free 12 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your 13 1.1.1.2 mrg option) any later version. 14 1.1.1.2 mrg 15 1.1.1.2 mrg or 16 1.1.1.2 mrg 17 1.1.1.2 mrg * the GNU General Public License as published by the Free Software 18 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any 19 1.1.1.2 mrg later version. 20 1.1.1.2 mrg 21 1.1.1.2 mrg or both in parallel, as here. 22 1.1 mrg 23 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 24 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 1.1.1.2 mrg for more details. 27 1.1 mrg 28 1.1.1.2 mrg You should have received copies of the GNU General Public License and the 29 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 30 1.1.1.2 mrg see https://www.gnu.org/licenses/. */ 31 1.1 mrg 32 1.1 mrg #include <stdio.h> 33 1.1 mrg #include "gmp-impl.h" 34 1.1 mrg #include "longlong.h" 35 1.1 mrg #include "speed.h" 36 1.1 mrg 37 1.1 mrg 38 1.1 mrg /* Like the standard version in gmp-impl.h, but with the expressions using a 39 1.1 mrg "1-" form. This has the same number of steps, but "1-" is on the 40 1.1 mrg dependent chain, whereas the "2*" in the standard version isn't. 41 1.1 mrg Depending on the CPU this should be the same or a touch slower. */ 42 1.1 mrg 43 1.1 mrg #if GMP_LIMB_BITS <= 32 44 1.1 mrg #define binvert_limb_mul1(inv,n) \ 45 1.1 mrg do { \ 46 1.1 mrg mp_limb_t __n = (n); \ 47 1.1 mrg mp_limb_t __inv; \ 48 1.1 mrg ASSERT ((__n & 1) == 1); \ 49 1.1 mrg __inv = binvert_limb_table[(__n&0xFF)/2]; /* 8 */ \ 50 1.1 mrg __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \ 51 1.1 mrg __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \ 52 1.1 mrg ASSERT (__inv * __n == 1); \ 53 1.1 mrg (inv) = __inv; \ 54 1.1 mrg } while (0) 55 1.1 mrg #endif 56 1.1 mrg 57 1.1 mrg #if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64 58 1.1 mrg #define binvert_limb_mul1(inv,n) \ 59 1.1 mrg do { \ 60 1.1 mrg mp_limb_t __n = (n); \ 61 1.1 mrg mp_limb_t __inv; \ 62 1.1 mrg ASSERT ((__n & 1) == 1); \ 63 1.1 mrg __inv = binvert_limb_table[(__n&0xFF)/2]; /* 8 */ \ 64 1.1 mrg __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \ 65 1.1 mrg __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \ 66 1.1 mrg __inv = (1 - __n * __inv) * __inv + __inv; /* 64 */ \ 67 1.1 mrg ASSERT (__inv * __n == 1); \ 68 1.1 mrg (inv) = __inv; \ 69 1.1 mrg } while (0) 70 1.1 mrg #endif 71 1.1 mrg 72 1.1 mrg 73 1.1 mrg /* The loop based version used in GMP 3.0 and earlier. Usually slower than 74 1.1 mrg multiplying, due to the number of steps that must be performed. Much 75 1.1 mrg slower when the processor has a good multiply. */ 76 1.1 mrg 77 1.1 mrg #define binvert_limb_loop(inv,n) \ 78 1.1 mrg do { \ 79 1.1 mrg mp_limb_t __v = (n); \ 80 1.1 mrg mp_limb_t __v_orig = __v; \ 81 1.1 mrg mp_limb_t __make_zero = 1; \ 82 1.1 mrg mp_limb_t __two_i = 1; \ 83 1.1 mrg mp_limb_t __v_inv = 0; \ 84 1.1 mrg \ 85 1.1 mrg ASSERT ((__v & 1) == 1); \ 86 1.1 mrg \ 87 1.1 mrg do \ 88 1.1 mrg { \ 89 1.1 mrg while ((__two_i & __make_zero) == 0) \ 90 1.1 mrg __two_i <<= 1, __v <<= 1; \ 91 1.1 mrg __v_inv += __two_i; \ 92 1.1 mrg __make_zero -= __v; \ 93 1.1 mrg } \ 94 1.1 mrg while (__make_zero); \ 95 1.1 mrg \ 96 1.1 mrg ASSERT (__v_orig * __v_inv == 1); \ 97 1.1 mrg (inv) = __v_inv; \ 98 1.1 mrg } while (0) 99 1.1 mrg 100 1.1 mrg 101 1.1 mrg /* Another loop based version with conditionals, but doing a fixed number of 102 1.1 mrg steps. */ 103 1.1 mrg 104 1.1 mrg #define binvert_limb_cond(inv,n) \ 105 1.1 mrg do { \ 106 1.1 mrg mp_limb_t __n = (n); \ 107 1.1 mrg mp_limb_t __rem = (1 - __n) >> 1; \ 108 1.1 mrg mp_limb_t __inv = GMP_LIMB_HIGHBIT; \ 109 1.1 mrg int __count; \ 110 1.1 mrg \ 111 1.1 mrg ASSERT ((__n & 1) == 1); \ 112 1.1 mrg \ 113 1.1 mrg __count = GMP_LIMB_BITS-1; \ 114 1.1 mrg do \ 115 1.1 mrg { \ 116 1.1 mrg __inv >>= 1; \ 117 1.1 mrg if (__rem & 1) \ 118 1.1 mrg { \ 119 1.1 mrg __inv |= GMP_LIMB_HIGHBIT; \ 120 1.1 mrg __rem -= __n; \ 121 1.1 mrg } \ 122 1.1 mrg __rem >>= 1; \ 123 1.1 mrg } \ 124 1.1 mrg while (-- __count); \ 125 1.1 mrg \ 126 1.1 mrg ASSERT (__inv * __n == 1); \ 127 1.1 mrg (inv) = __inv; \ 128 1.1 mrg } while (0) 129 1.1 mrg 130 1.1 mrg 131 1.1 mrg /* Another loop based bitwise version, but purely arithmetic, no 132 1.1 mrg conditionals. */ 133 1.1 mrg 134 1.1 mrg #define binvert_limb_arith(inv,n) \ 135 1.1 mrg do { \ 136 1.1 mrg mp_limb_t __n = (n); \ 137 1.1 mrg mp_limb_t __rem = (1 - __n) >> 1; \ 138 1.1 mrg mp_limb_t __inv = GMP_LIMB_HIGHBIT; \ 139 1.1 mrg mp_limb_t __lowbit; \ 140 1.1 mrg int __count; \ 141 1.1 mrg \ 142 1.1 mrg ASSERT ((__n & 1) == 1); \ 143 1.1 mrg \ 144 1.1 mrg __count = GMP_LIMB_BITS-1; \ 145 1.1 mrg do \ 146 1.1 mrg { \ 147 1.1 mrg __lowbit = __rem & 1; \ 148 1.1 mrg __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1)); \ 149 1.1 mrg __rem = (__rem - (__n & -__lowbit)) >> 1; \ 150 1.1 mrg } \ 151 1.1 mrg while (-- __count); \ 152 1.1 mrg \ 153 1.1 mrg ASSERT (__inv * __n == 1); \ 154 1.1 mrg (inv) = __inv; \ 155 1.1 mrg } while (0) 156 1.1 mrg 157 1.1 mrg 158 1.1 mrg double 159 1.1 mrg speed_binvert_limb_mul1 (struct speed_params *s) 160 1.1 mrg { 161 1.1 mrg SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1); 162 1.1 mrg } 163 1.1 mrg double 164 1.1 mrg speed_binvert_limb_loop (struct speed_params *s) 165 1.1 mrg { 166 1.1 mrg SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop); 167 1.1 mrg } 168 1.1 mrg double 169 1.1 mrg speed_binvert_limb_cond (struct speed_params *s) 170 1.1 mrg { 171 1.1 mrg SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond); 172 1.1 mrg } 173 1.1 mrg double 174 1.1 mrg speed_binvert_limb_arith (struct speed_params *s) 175 1.1 mrg { 176 1.1 mrg SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith); 177 1.1 mrg } 178