Home | History | Annotate | Line # | Download | only in dist
      1 /* Include file for internal GNU MP types and definitions.
      2 
      3    THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
      4    BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
      5 
      6 Copyright 1991-2018 Free Software Foundation, Inc.
      7 
      8 This file is part of the GNU MP Library.
      9 
     10 The GNU MP Library is free software; you can redistribute it and/or modify
     11 it under the terms of either:
     12 
     13   * the GNU Lesser General Public License as published by the Free
     14     Software Foundation; either version 3 of the License, or (at your
     15     option) any later version.
     16 
     17 or
     18 
     19   * the GNU General Public License as published by the Free Software
     20     Foundation; either version 2 of the License, or (at your option) any
     21     later version.
     22 
     23 or both in parallel, as here.
     24 
     25 The GNU MP Library is distributed in the hope that it will be useful, but
     26 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     27 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     28 for more details.
     29 
     30 You should have received copies of the GNU General Public License and the
     31 GNU Lesser General Public License along with the GNU MP Library.  If not,
     32 see https://www.gnu.org/licenses/.  */
     33 
     34 
     35 /* __GMP_DECLSPEC must be given on any global data that will be accessed
     36    from outside libgmp, meaning from the test or development programs, or
     37    from libgmpxx.  Failing to do this will result in an incorrect address
     38    being used for the accesses.  On functions __GMP_DECLSPEC makes calls
     39    from outside libgmp more efficient, but they'll still work fine without
     40    it.  */
     41 
     42 
     43 #ifndef __GMP_IMPL_H__
     44 #define __GMP_IMPL_H__
     45 
     46 #if defined _CRAY
     47 #include <intrinsics.h>  /* for _popcnt */
     48 #endif
     49 
     50 /* For INT_MAX, etc. We used to avoid it because of a bug (on solaris,
     51    gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
     52    values (the ABI=64 values)), but it should be safe now.
     53 
     54    On Cray vector systems, however, we need the system limits.h since sizes
     55    of signed and unsigned types can differ there, depending on compiler
     56    options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail.  For
     57    reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
     58    short can be 24, 32, 46 or 64 bits, and different for ushort.  */
     59 
     60 #include <limits.h>
     61 
     62 /* For fat.h and other fat binary stuff.
     63    No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
     64    declared this way are only used to set function pointers in __gmpn_cpuvec,
     65    they're not called directly.  */
     66 #define DECL_add_n(name) \
     67   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
     68 #define DECL_addlsh1_n(name) \
     69   DECL_add_n (name)
     70 #define DECL_addlsh2_n(name) \
     71   DECL_add_n (name)
     72 #define DECL_addmul_1(name) \
     73   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
     74 #define DECL_addmul_2(name) \
     75   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
     76 #define DECL_bdiv_dbm1c(name) \
     77   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
     78 #define DECL_cnd_add_n(name) \
     79   __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
     80 #define DECL_cnd_sub_n(name) \
     81   __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
     82 #define DECL_com(name) \
     83   __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
     84 #define DECL_copyd(name) \
     85   __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
     86 #define DECL_copyi(name) \
     87   DECL_copyd (name)
     88 #define DECL_divexact_1(name) \
     89   __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
     90 #define DECL_divexact_by3c(name) \
     91   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
     92 #define DECL_divrem_1(name) \
     93   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)
     94 #define DECL_gcd_11(name) \
     95   __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_limb_t)
     96 #define DECL_lshift(name) \
     97   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned)
     98 #define DECL_lshiftc(name) \
     99   DECL_lshift (name)
    100 #define DECL_mod_1(name) \
    101   __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
    102 #define DECL_mod_1_1p(name) \
    103   __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [])
    104 #define DECL_mod_1_1p_cps(name) \
    105   __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b)
    106 #define DECL_mod_1s_2p(name) \
    107   DECL_mod_1_1p (name)
    108 #define DECL_mod_1s_2p_cps(name) \
    109   DECL_mod_1_1p_cps (name)
    110 #define DECL_mod_1s_4p(name) \
    111   DECL_mod_1_1p (name)
    112 #define DECL_mod_1s_4p_cps(name) \
    113   DECL_mod_1_1p_cps (name)
    114 #define DECL_mod_34lsub1(name) \
    115   __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t)
    116 #define DECL_modexact_1c_odd(name) \
    117   __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
    118 #define DECL_mul_1(name) \
    119   DECL_addmul_1 (name)
    120 #define DECL_mul_basecase(name) \
    121   __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)
    122 #define DECL_mullo_basecase(name) \
    123   __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
    124 #define DECL_preinv_divrem_1(name) \
    125   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)
    126 #define DECL_preinv_mod_1(name) \
    127   __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
    128 #define DECL_redc_1(name) \
    129   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
    130 #define DECL_redc_2(name) \
    131   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
    132 #define DECL_rshift(name) \
    133   DECL_lshift (name)
    134 #define DECL_sqr_basecase(name) \
    135   __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
    136 #define DECL_sub_n(name) \
    137   DECL_add_n (name)
    138 #define DECL_sublsh1_n(name) \
    139   DECL_add_n (name)
    140 #define DECL_submul_1(name) \
    141   DECL_addmul_1 (name)
    142 
    143 #if ! defined (__GMP_WITHIN_CONFIGURE)
    144 #include "config.h"
    145 #include "gmp.h"
    146 #include "gmp-mparam.h"
    147 #include "fib_table.h"
    148 #include "fac_table.h"
    149 #include "mp_bases.h"
    150 #if WANT_FAT_BINARY
    151 #include "fat.h"
    152 #endif
    153 #endif
    154 
    155 #if HAVE_INTTYPES_H      /* for uint_least32_t */
    156 # include <inttypes.h>
    157 #else
    158 # if HAVE_STDINT_H
    159 #  include <stdint.h>
    160 # endif
    161 #endif
    162 
    163 #ifdef __cplusplus
    164 #include <cstring>  /* for strlen */
    165 #include <string>   /* for std::string */
    166 #endif
    167 
    168 
    169 #ifndef WANT_TMP_DEBUG  /* for TMP_ALLOC_LIMBS_2 and others */
    170 #define WANT_TMP_DEBUG 0
    171 #endif
    172 
    173 /* The following tries to get a good version of alloca.  The tests are
    174    adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
    175    Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
    176    be setup appropriately.
    177 
    178    ifndef alloca - a cpp define might already exist.
    179        glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
    180        HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
    181 
    182    GCC __builtin_alloca - preferred whenever available.
    183 
    184    _AIX pragma - IBM compilers need a #pragma in "each module that needs to
    185        use alloca".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was
    186        used in past versions of GMP, retained still in case it matters.
    187 
    188        The autoconf manual says this pragma needs to be at the start of a C
    189        file, apart from comments and preprocessor directives.  Is that true?
    190        xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
    191        from gmp.h.
    192 */
    193 
    194 #ifndef alloca
    195 # ifdef __GNUC__
    196 #  define alloca __builtin_alloca
    197 # else
    198 #  ifdef __DECC
    199 #   define alloca(x) __ALLOCA(x)
    200 #  else
    201 #   ifdef _MSC_VER
    202 #    include <malloc.h>
    203 #    define alloca _alloca
    204 #   else
    205 #    if HAVE_ALLOCA_H
    206 #     include <alloca.h>
    207 #    else
    208 #     if defined (_AIX) || defined (_IBMR2)
    209  #pragma alloca
    210 #     else
    211 #      if !defined (__NetBSD__)
    212         char *alloca ();
    213 #      else
    214 #       include <stdlib.h>
    215 #      endif
    216 #     endif
    217 #    endif
    218 #   endif
    219 #  endif
    220 # endif
    221 #endif
    222 
    223 
    224 /* if not provided by gmp-mparam.h */
    225 #ifndef GMP_LIMB_BYTES
    226 #define GMP_LIMB_BYTES  SIZEOF_MP_LIMB_T
    227 #endif
    228 #ifndef GMP_LIMB_BITS
    229 #define GMP_LIMB_BITS  (8 * SIZEOF_MP_LIMB_T)
    230 #endif
    231 
    232 #define BITS_PER_ULONG  (8 * SIZEOF_UNSIGNED_LONG)
    233 
    234 
    235 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
    236 #if HAVE_UINT_LEAST32_T
    237 typedef uint_least32_t      gmp_uint_least32_t;
    238 #else
    239 #if SIZEOF_UNSIGNED_SHORT >= 4
    240 typedef unsigned short      gmp_uint_least32_t;
    241 #else
    242 #if SIZEOF_UNSIGNED >= 4
    243 typedef unsigned            gmp_uint_least32_t;
    244 #else
    245 typedef unsigned long       gmp_uint_least32_t;
    246 #endif
    247 #endif
    248 #endif
    249 
    250 
    251 /* gmp_intptr_t, for pointer to integer casts */
    252 #if HAVE_INTPTR_T
    253 typedef intptr_t            gmp_intptr_t;
    254 #else /* fallback */
    255 typedef size_t              gmp_intptr_t;
    256 #endif
    257 
    258 
    259 /* pre-inverse types for truncating division and modulo */
    260 typedef struct {mp_limb_t inv32;} gmp_pi1_t;
    261 typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
    262 
    263 
    264 /* "const" basically means a function does nothing but examine its arguments
    265    and give a return value, it doesn't read or write any memory (neither
    266    global nor pointed to by arguments), and has no other side-effects.  This
    267    is more restrictive than "pure".  See info node "(gcc)Function
    268    Attributes".  __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
    269    this off when trying to write timing loops.  */
    270 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
    271 #define ATTRIBUTE_CONST  __attribute__ ((const))
    272 #else
    273 #define ATTRIBUTE_CONST
    274 #endif
    275 
    276 #if HAVE_ATTRIBUTE_NORETURN
    277 #define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))
    278 #else
    279 #define ATTRIBUTE_NORETURN
    280 #endif
    281 
    282 /* "malloc" means a function behaves like malloc in that the pointer it
    283    returns doesn't alias anything.  */
    284 #if HAVE_ATTRIBUTE_MALLOC
    285 #define ATTRIBUTE_MALLOC  __attribute__ ((malloc))
    286 #else
    287 #define ATTRIBUTE_MALLOC
    288 #endif
    289 
    290 
    291 #if ! HAVE_STRCHR
    292 #define strchr(s,c)  index(s,c)
    293 #endif
    294 
    295 #if ! HAVE_MEMSET
    296 #define memset(p, c, n)						\
    297   do {								\
    298     unsigned char *__memset__p = (unsigned char *) (p);		\
    299     int	 __i;							\
    300     ASSERT ((n) >= 0);						\
    301     for (__i = 0; __i < (n); __i++)				\
    302       __memset__p[__i] = (c);					\
    303   } while (0)
    304 #endif
    305 
    306 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
    307    mode.  Falling back to a memcpy will give maximum portability, since it
    308    works no matter whether va_list is a pointer, struct or array.  */
    309 #if ! defined (va_copy) && defined (__va_copy)
    310 #define va_copy(dst,src)  __va_copy(dst,src)
    311 #endif
    312 #if ! defined (va_copy)
    313 #define va_copy(dst,src) \
    314   do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
    315 #endif
    316 
    317 
    318 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
    319    (ie. ctlz, ctpop, cttz).  */
    320 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68  \
    321   || HAVE_HOST_CPU_alphaev7
    322 #define HAVE_HOST_CPU_alpha_CIX 1
    323 #endif
    324 
    325 
    326 #if defined (__cplusplus)
    327 extern "C" {
    328 #endif
    329 
    330 
    331 /* Usage: TMP_DECL;
    332 	  TMP_MARK;
    333 	  ptr = TMP_ALLOC (bytes);
    334 	  TMP_FREE;
    335 
    336    Small allocations should use TMP_SALLOC, big allocations should use
    337    TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.
    338 
    339    Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
    340    TMP_SFREE.
    341 
    342    TMP_DECL just declares a variable, but might be empty and so must be last
    343    in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.
    344    TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a
    345    TMP_MARK was made, but then no TMP_ALLOCs.  */
    346 
    347 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
    348    __gmp_allocate_func doesn't already determine it.  */
    349 union tmp_align_t {
    350   mp_limb_t  l;
    351   double     d;
    352   char       *p;
    353 };
    354 #define __TMP_ALIGN  sizeof (union tmp_align_t)
    355 
    356 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
    357    "a" must be an unsigned type.
    358    This is designed for use with a compile-time constant "m".
    359    The POW2 case is expected to be usual, and gcc 3.0 and up recognises
    360    "(-(8*n))%8" or the like is always zero, which means the rounding up in
    361    the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop.  */
    362 #define ROUND_UP_MULTIPLE(a,m)          \
    363   (POW2_P(m) ? (a) + (-(a))%(m)         \
    364    : (a)+(m)-1 - (((a)+(m)-1) % (m)))
    365 
    366 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
    367 struct tmp_reentrant_t {
    368   struct tmp_reentrant_t  *next;
    369   size_t		  size;	  /* bytes, including header */
    370 };
    371 __GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC;
    372 __GMP_DECLSPEC void  __gmp_tmp_reentrant_free (struct tmp_reentrant_t *);
    373 #endif
    374 
    375 #if WANT_TMP_ALLOCA
    376 #define TMP_SDECL
    377 #define TMP_DECL		struct tmp_reentrant_t *__tmp_marker
    378 #define TMP_SMARK
    379 #define TMP_MARK		__tmp_marker = 0
    380 #define TMP_SALLOC(n)		alloca(n)
    381 #define TMP_BALLOC(n)		__gmp_tmp_reentrant_alloc (&__tmp_marker, n)
    382 /* The peculiar stack allocation limit here is chosen for efficient asm.  */
    383 #define TMP_ALLOC(n)							\
    384   (LIKELY ((n) <= 0x7f00) ? TMP_SALLOC(n) : TMP_BALLOC(n))
    385 #define TMP_SFREE
    386 #define TMP_FREE							\
    387   do {									\
    388     if (UNLIKELY (__tmp_marker != 0))					\
    389       __gmp_tmp_reentrant_free (__tmp_marker);				\
    390   } while (0)
    391 #endif
    392 
    393 #if WANT_TMP_REENTRANT
    394 #define TMP_SDECL		TMP_DECL
    395 #define TMP_DECL		struct tmp_reentrant_t *__tmp_marker
    396 #define TMP_SMARK		TMP_MARK
    397 #define TMP_MARK		__tmp_marker = 0
    398 #define TMP_SALLOC(n)		TMP_ALLOC(n)
    399 #define TMP_BALLOC(n)		TMP_ALLOC(n)
    400 #define TMP_ALLOC(n)		__gmp_tmp_reentrant_alloc (&__tmp_marker, n)
    401 #define TMP_SFREE		TMP_FREE
    402 #define TMP_FREE		__gmp_tmp_reentrant_free (__tmp_marker)
    403 #endif
    404 
    405 #if WANT_TMP_NOTREENTRANT
    406 struct tmp_marker
    407 {
    408   struct tmp_stack *which_chunk;
    409   void *alloc_point;
    410 };
    411 __GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC;
    412 __GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *);
    413 __GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *);
    414 #define TMP_SDECL		TMP_DECL
    415 #define TMP_DECL		struct tmp_marker __tmp_marker
    416 #define TMP_SMARK		TMP_MARK
    417 #define TMP_MARK		__gmp_tmp_mark (&__tmp_marker)
    418 #define TMP_SALLOC(n)		TMP_ALLOC(n)
    419 #define TMP_BALLOC(n)		TMP_ALLOC(n)
    420 #define TMP_ALLOC(n)							\
    421   __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
    422 #define TMP_SFREE		TMP_FREE
    423 #define TMP_FREE		__gmp_tmp_free (&__tmp_marker)
    424 #endif
    425 
    426 #if WANT_TMP_DEBUG
    427 /* See tal-debug.c for some comments. */
    428 struct tmp_debug_t {
    429   struct tmp_debug_entry_t  *list;
    430   const char                *file;
    431   int                       line;
    432 };
    433 struct tmp_debug_entry_t {
    434   struct tmp_debug_entry_t  *next;
    435   void                      *block;
    436   size_t                    size;
    437 };
    438 __GMP_DECLSPEC void  __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **,
    439 					   struct tmp_debug_t *,
    440 					   const char *, const char *);
    441 __GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int,
    442 					    struct tmp_debug_t **, const char *,
    443 					    size_t) ATTRIBUTE_MALLOC;
    444 __GMP_DECLSPEC void  __gmp_tmp_debug_free (const char *, int, int,
    445 					   struct tmp_debug_t **,
    446 					   const char *, const char *);
    447 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
    448 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
    449 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
    450 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
    451 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
    452 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
    453 /* The marker variable is designed to provoke an uninitialized variable
    454    warning from the compiler if TMP_FREE is used without a TMP_MARK.
    455    __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
    456    these things up too.  */
    457 #define TMP_DECL_NAME(marker, marker_name)				\
    458   int marker;								\
    459   int __tmp_marker_inscope;						\
    460   const char *__tmp_marker_name = marker_name;				\
    461   struct tmp_debug_t  __tmp_marker_struct;				\
    462   /* don't demand NULL, just cast a zero */				\
    463   struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
    464 #define TMP_MARK_NAME(marker, marker_name)				\
    465   do {									\
    466     marker = 1;								\
    467     __tmp_marker_inscope = 1;						\
    468     __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,			\
    469 			   &__tmp_marker, &__tmp_marker_struct,		\
    470 			   __tmp_marker_name, marker_name);		\
    471   } while (0)
    472 #define TMP_SALLOC(n)		TMP_ALLOC(n)
    473 #define TMP_BALLOC(n)		TMP_ALLOC(n)
    474 #define TMP_ALLOC(size)							\
    475   __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,			\
    476 			 __tmp_marker_inscope,				\
    477 			 &__tmp_marker, __tmp_marker_name, size)
    478 #define TMP_FREE_NAME(marker, marker_name)				\
    479   do {									\
    480     __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,			\
    481 			   marker, &__tmp_marker,			\
    482 			   __tmp_marker_name, marker_name);		\
    483   } while (0)
    484 #endif /* WANT_TMP_DEBUG */
    485 
    486 
    487 /* Allocating various types. */
    488 #define TMP_ALLOC_TYPE(n,type)  ((type *) TMP_ALLOC ((n) * sizeof (type)))
    489 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
    490 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
    491 #define TMP_ALLOC_LIMBS(n)      TMP_ALLOC_TYPE(n,mp_limb_t)
    492 #define TMP_SALLOC_LIMBS(n)     TMP_SALLOC_TYPE(n,mp_limb_t)
    493 #define TMP_BALLOC_LIMBS(n)     TMP_BALLOC_TYPE(n,mp_limb_t)
    494 #define TMP_ALLOC_MP_PTRS(n)    TMP_ALLOC_TYPE(n,mp_ptr)
    495 #define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)
    496 #define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)
    497 
    498 /* It's more efficient to allocate one block than many.  This is certainly
    499    true of the malloc methods, but it can even be true of alloca if that
    500    involves copying a chunk of stack (various RISCs), or a call to a stack
    501    bounds check (mingw).  In any case, when debugging keep separate blocks
    502    so a redzoning malloc debugger can protect each individually.  */
    503 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)				\
    504   do {									\
    505     if (WANT_TMP_DEBUG)							\
    506       {									\
    507 	(xp) = TMP_ALLOC_LIMBS (xsize);					\
    508 	(yp) = TMP_ALLOC_LIMBS (ysize);					\
    509       }									\
    510     else								\
    511       {									\
    512 	(xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));			\
    513 	(yp) = (xp) + (xsize);						\
    514       }									\
    515   } while (0)
    516 #define TMP_ALLOC_LIMBS_3(xp,xsize, yp,ysize, zp,zsize)			\
    517   do {									\
    518     if (WANT_TMP_DEBUG)							\
    519       {									\
    520 	(xp) = TMP_ALLOC_LIMBS (xsize);					\
    521 	(yp) = TMP_ALLOC_LIMBS (ysize);					\
    522 	(zp) = TMP_ALLOC_LIMBS (zsize);					\
    523       }									\
    524     else								\
    525       {									\
    526 	(xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize) + (zsize));		\
    527 	(yp) = (xp) + (xsize);						\
    528 	(zp) = (yp) + (ysize);						\
    529       }									\
    530   } while (0)
    531 
    532 /* From gmp.h, nicer names for internal use. */
    533 #define CRAY_Pragma(str)               __GMP_CRAY_Pragma(str)
    534 #define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)
    535 #define LIKELY(cond)                   __GMP_LIKELY(cond)
    536 #define UNLIKELY(cond)                 __GMP_UNLIKELY(cond)
    537 
    538 #define ABS(x) ((x) >= 0 ? (x) : -(x))
    539 #define NEG_CAST(T,x) (- (__GMP_CAST (T, (x) + 1) - 1))
    540 #define ABS_CAST(T,x) ((x) >= 0 ? __GMP_CAST (T, x) : NEG_CAST (T,x))
    541 #undef MIN
    542 #define MIN(l,o) ((l) < (o) ? (l) : (o))
    543 #undef MAX
    544 #define MAX(h,i) ((h) > (i) ? (h) : (i))
    545 #define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
    546 
    547 /* Field access macros.  */
    548 #define SIZ(x) ((x)->_mp_size)
    549 #define ABSIZ(x) ABS (SIZ (x))
    550 #define PTR(x) ((x)->_mp_d)
    551 #define EXP(x) ((x)->_mp_exp)
    552 #define PREC(x) ((x)->_mp_prec)
    553 #define ALLOC(x) ((x)->_mp_alloc)
    554 #define NUM(x) mpq_numref(x)
    555 #define DEN(x) mpq_denref(x)
    556 
    557 /* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
    558    then that lowest one bit must have been the only bit set.  n==0 will
    559    return true though, so avoid that.  */
    560 #define POW2_P(n)  (((n) & ((n) - 1)) == 0)
    561 
    562 /* This is intended for constant THRESHOLDs only, where the compiler
    563    can completely fold the result.  */
    564 #define LOG2C(n) \
    565  (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
    566   ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
    567   ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
    568   ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
    569 
    570 #define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)
    571 
    572 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
    573    unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc
    574    treats the plain decimal values in <limits.h> as signed.  */
    575 #define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
    576 #define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
    577 #define USHRT_HIGHBIT      (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1))
    578 #define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
    579 
    580 #if __GMP_MP_SIZE_T_INT
    581 #define MP_SIZE_T_MAX      INT_MAX
    582 #define MP_SIZE_T_MIN      INT_MIN
    583 #else
    584 #define MP_SIZE_T_MAX      LONG_MAX
    585 #define MP_SIZE_T_MIN      LONG_MIN
    586 #endif
    587 
    588 /* mp_exp_t is the same as mp_size_t */
    589 #define MP_EXP_T_MAX   MP_SIZE_T_MAX
    590 #define MP_EXP_T_MIN   MP_SIZE_T_MIN
    591 
    592 #define LONG_HIGHBIT       LONG_MIN
    593 #define INT_HIGHBIT        INT_MIN
    594 #define SHRT_HIGHBIT       SHRT_MIN
    595 
    596 
    597 #define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
    598 
    599 #if GMP_NAIL_BITS == 0
    600 #define GMP_NAIL_LOWBIT   CNST_LIMB(0)
    601 #else
    602 #define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)
    603 #endif
    604 
    605 #if GMP_NAIL_BITS != 0
    606 /* Set various *_THRESHOLD values to be used for nails.  Thus we avoid using
    607    code that has not yet been qualified.  */
    608 
    609 #undef  DC_DIV_QR_THRESHOLD
    610 #define DC_DIV_QR_THRESHOLD              50
    611 
    612 #undef DIVREM_1_NORM_THRESHOLD
    613 #undef DIVREM_1_UNNORM_THRESHOLD
    614 #undef MOD_1_NORM_THRESHOLD
    615 #undef MOD_1_UNNORM_THRESHOLD
    616 #undef USE_PREINV_DIVREM_1
    617 #undef DIVREM_2_THRESHOLD
    618 #undef DIVEXACT_1_THRESHOLD
    619 #define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* no preinv */
    620 #define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* no preinv */
    621 #define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
    622 #define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* no preinv */
    623 #define USE_PREINV_DIVREM_1               0  /* no preinv */
    624 #define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* no preinv */
    625 
    626 /* mpn/generic/mul_fft.c is not nails-capable. */
    627 #undef  MUL_FFT_THRESHOLD
    628 #undef  SQR_FFT_THRESHOLD
    629 #define MUL_FFT_THRESHOLD                MP_SIZE_T_MAX
    630 #define SQR_FFT_THRESHOLD                MP_SIZE_T_MAX
    631 #endif
    632 
    633 /* Swap macros. */
    634 
    635 #define MP_LIMB_T_SWAP(x, y)						\
    636   do {									\
    637     mp_limb_t __mp_limb_t_swap__tmp = (x);				\
    638     (x) = (y);								\
    639     (y) = __mp_limb_t_swap__tmp;					\
    640   } while (0)
    641 #define MP_SIZE_T_SWAP(x, y)						\
    642   do {									\
    643     mp_size_t __mp_size_t_swap__tmp = (x);				\
    644     (x) = (y);								\
    645     (y) = __mp_size_t_swap__tmp;					\
    646   } while (0)
    647 
    648 #define MP_PTR_SWAP(x, y)						\
    649   do {									\
    650     mp_ptr __mp_ptr_swap__tmp = (x);					\
    651     (x) = (y);								\
    652     (y) = __mp_ptr_swap__tmp;						\
    653   } while (0)
    654 #define MP_SRCPTR_SWAP(x, y)						\
    655   do {									\
    656     mp_srcptr __mp_srcptr_swap__tmp = (x);				\
    657     (x) = (y);								\
    658     (y) = __mp_srcptr_swap__tmp;					\
    659   } while (0)
    660 
    661 #define MPN_PTR_SWAP(xp,xs, yp,ys)					\
    662   do {									\
    663     MP_PTR_SWAP (xp, yp);						\
    664     MP_SIZE_T_SWAP (xs, ys);						\
    665   } while(0)
    666 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys)					\
    667   do {									\
    668     MP_SRCPTR_SWAP (xp, yp);						\
    669     MP_SIZE_T_SWAP (xs, ys);						\
    670   } while(0)
    671 
    672 #define MPZ_PTR_SWAP(x, y)						\
    673   do {									\
    674     mpz_ptr __mpz_ptr_swap__tmp = (x);					\
    675     (x) = (y);								\
    676     (y) = __mpz_ptr_swap__tmp;						\
    677   } while (0)
    678 #define MPZ_SRCPTR_SWAP(x, y)						\
    679   do {									\
    680     mpz_srcptr __mpz_srcptr_swap__tmp = (x);				\
    681     (x) = (y);								\
    682     (y) = __mpz_srcptr_swap__tmp;					\
    683   } while (0)
    684 
    685 #define MPQ_PTR_SWAP(x, y)						\
    686   do {                                                                  \
    687     mpq_ptr __mpq_ptr_swap__tmp = (x);					\
    688     (x) = (y);                                                          \
    689     (y) = __mpq_ptr_swap__tmp;						\
    690   } while (0)
    691 #define MPQ_SRCPTR_SWAP(x, y)                                           \
    692   do {                                                                  \
    693     mpq_srcptr __mpq_srcptr_swap__tmp = (x);                            \
    694     (x) = (y);                                                          \
    695     (y) = __mpq_srcptr_swap__tmp;                                       \
    696   } while (0)
    697 
    698 
    699 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
    700    but current gcc (3.0) doesn't seem to support that.  */
    701 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
    702 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
    703 __GMP_DECLSPEC extern void   (*__gmp_free_func) (void *, size_t);
    704 
    705 __GMP_DECLSPEC void *__gmp_default_allocate (size_t);
    706 __GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t);
    707 __GMP_DECLSPEC void __gmp_default_free (void *, size_t);
    708 
    709 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
    710   ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
    711 #define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
    712 
    713 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type)		\
    714   ((type *) (*__gmp_reallocate_func)					\
    715    (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
    716 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size)		\
    717   __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
    718 
    719 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
    720 #define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
    721 
    722 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)		\
    723   do {									\
    724     if ((oldsize) != (newsize))						\
    725       (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize);		\
    726   } while (0)
    727 
    728 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)	\
    729   do {									\
    730     if ((oldsize) != (newsize))						\
    731       (ptr) = (type *) (*__gmp_reallocate_func)				\
    732 	(ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));	\
    733   } while (0)
    734 
    735 
    736 /* Dummy for non-gcc, code involving it will go dead. */
    737 #if ! defined (__GNUC__) || __GNUC__ < 2
    738 #define __builtin_constant_p(x)   0
    739 #endif
    740 
    741 
    742 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
    743    stack usage is compatible.  __attribute__ ((regparm (N))) helps by
    744    putting leading parameters in registers, avoiding extra stack.
    745 
    746    regparm cannot be used with calls going through the PLT, because the
    747    binding code there may clobber the registers (%eax, %edx, %ecx) used for
    748    the regparm parameters.  Calls to local (ie. static) functions could
    749    still use this, if we cared to differentiate locals and globals.
    750 
    751    On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
    752    -p or -pg profiling, since that version of gcc doesn't realize the
    753    .mcount calls will clobber the parameter registers.  Other systems are
    754    ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
    755    bother to try to detect this.  regparm is only an optimization so we just
    756    disable it when profiling (profiling being a slowdown anyway).  */
    757 
    758 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
    759   && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
    760 #define USE_LEADING_REGPARM 1
    761 #else
    762 #define USE_LEADING_REGPARM 0
    763 #endif
    764 
    765 /* Macros for altering parameter order according to regparm usage. */
    766 #if USE_LEADING_REGPARM
    767 #define REGPARM_2_1(a,b,x)    x,a,b
    768 #define REGPARM_3_1(a,b,c,x)  x,a,b,c
    769 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
    770 #else
    771 #define REGPARM_2_1(a,b,x)    a,b,x
    772 #define REGPARM_3_1(a,b,c,x)  a,b,c,x
    773 #define REGPARM_ATTR(n)
    774 #endif
    775 
    776 
    777 /* ASM_L gives a local label for a gcc asm block, for use when temporary
    778    local labels like "1:" might not be available, which is the case for
    779    instance on the x86s (the SCO assembler doesn't support them).
    780 
    781    The label generated is made unique by including "%=" which is a unique
    782    number for each insn.  This ensures the same name can be used in multiple
    783    asm blocks, perhaps via a macro.  Since jumps between asm blocks are not
    784    allowed there's no need for a label to be usable outside a single
    785    block.  */
    786 
    787 #define ASM_L(name)  LSYM_PREFIX "asm_%=_" #name
    788 
    789 
    790 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
    791 #if 0
    792 /* FIXME: Check that these actually improve things.
    793    FIXME: Need a cld after each std.
    794    FIXME: Can't have inputs in clobbered registers, must describe them as
    795    dummy outputs, and add volatile. */
    796 #define MPN_COPY_INCR(DST, SRC, N)					\
    797   __asm__ ("cld\n\trep\n\tmovsl" : :					\
    798 	   "D" (DST), "S" (SRC), "c" (N) :				\
    799 	   "cx", "di", "si", "memory")
    800 #define MPN_COPY_DECR(DST, SRC, N)					\
    801   __asm__ ("std\n\trep\n\tmovsl" : :					\
    802 	   "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :	\
    803 	   "cx", "di", "si", "memory")
    804 #endif
    805 #endif
    806 
    807 
    808 __GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1);
    809 #define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
    810 
    811 #define mpz_n_pow_ui __gmpz_n_pow_ui
    812 __GMP_DECLSPEC void    mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long);
    813 
    814 
    815 #define mpn_addmul_1c __MPN(addmul_1c)
    816 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
    817 
    818 #ifndef mpn_addmul_2  /* if not done with cpuvec in a fat binary */
    819 #define mpn_addmul_2 __MPN(addmul_2)
    820 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
    821 #endif
    822 
    823 #define mpn_addmul_3 __MPN(addmul_3)
    824 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
    825 
    826 #define mpn_addmul_4 __MPN(addmul_4)
    827 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
    828 
    829 #define mpn_addmul_5 __MPN(addmul_5)
    830 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
    831 
    832 #define mpn_addmul_6 __MPN(addmul_6)
    833 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
    834 
    835 #define mpn_addmul_7 __MPN(addmul_7)
    836 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
    837 
    838 #define mpn_addmul_8 __MPN(addmul_8)
    839 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
    840 
    841 /* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase.  */
    842 #define mpn_addmul_2s __MPN(addmul_2s)
    843 __GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
    844 
    845 /* Override mpn_addlsh1_n, mpn_addlsh2_n, mpn_sublsh1_n, etc with mpn_addlsh_n,
    846    etc when !HAVE_NATIVE the former but HAVE_NATIVE_ the latter.  Similarly,
    847    override foo_ip1 functions with foo.  We then lie and say these macros
    848    represent native functions, but leave a trace by using the value 2 rather
    849    than 1.  */
    850 
    851 #if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh1_n
    852 #define mpn_addlsh1_n(a,b,c,d)          mpn_addlsh_n(a,b,c,d,1)
    853 #define HAVE_NATIVE_mpn_addlsh1_n       2
    854 #endif
    855 
    856 #if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh1_nc
    857 #define mpn_addlsh1_nc(a,b,c,d,x)       mpn_addlsh_nc(a,b,c,d,1,x)
    858 #define HAVE_NATIVE_mpn_addlsh1_nc      2
    859 #endif
    860 
    861 #if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1
    862 #define mpn_addlsh1_n_ip1(a,b,n)        mpn_addlsh1_n(a,a,b,n)
    863 #define HAVE_NATIVE_mpn_addlsh1_n_ip1   2
    864 #endif
    865 
    866 #if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1
    867 #define mpn_addlsh1_nc_ip1(a,b,n,c)     mpn_addlsh1_nc(a,a,b,n,c)
    868 #define HAVE_NATIVE_mpn_addlsh1_nc_ip1  2
    869 #endif
    870 
    871 #if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh2_n
    872 #define mpn_addlsh2_n(a,b,c,d)          mpn_addlsh_n(a,b,c,d,2)
    873 #define HAVE_NATIVE_mpn_addlsh2_n       2
    874 #endif
    875 
    876 #if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh2_nc
    877 #define mpn_addlsh2_nc(a,b,c,d,x)       mpn_addlsh_nc(a,b,c,d,2,x)
    878 #define HAVE_NATIVE_mpn_addlsh2_nc      2
    879 #endif
    880 
    881 #if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1
    882 #define mpn_addlsh2_n_ip1(a,b,n)        mpn_addlsh2_n(a,a,b,n)
    883 #define HAVE_NATIVE_mpn_addlsh2_n_ip1   2
    884 #endif
    885 
    886 #if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1
    887 #define mpn_addlsh2_nc_ip1(a,b,n,c)     mpn_addlsh2_nc(a,a,b,n,c)
    888 #define HAVE_NATIVE_mpn_addlsh2_nc_ip1  2
    889 #endif
    890 
    891 #if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh1_n
    892 #define mpn_sublsh1_n(a,b,c,d)          mpn_sublsh_n(a,b,c,d,1)
    893 #define HAVE_NATIVE_mpn_sublsh1_n       2
    894 #endif
    895 
    896 #if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh1_nc
    897 #define mpn_sublsh1_nc(a,b,c,d,x)       mpn_sublsh_nc(a,b,c,d,1,x)
    898 #define HAVE_NATIVE_mpn_sublsh1_nc      2
    899 #endif
    900 
    901 #if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1
    902 #define mpn_sublsh1_n_ip1(a,b,n)        mpn_sublsh1_n(a,a,b,n)
    903 #define HAVE_NATIVE_mpn_sublsh1_n_ip1   2
    904 #endif
    905 
    906 #if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1
    907 #define mpn_sublsh1_nc_ip1(a,b,n,c)     mpn_sublsh1_nc(a,a,b,n,c)
    908 #define HAVE_NATIVE_mpn_sublsh1_nc_ip1  2
    909 #endif
    910 
    911 #if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh2_n
    912 #define mpn_sublsh2_n(a,b,c,d)          mpn_sublsh_n(a,b,c,d,2)
    913 #define HAVE_NATIVE_mpn_sublsh2_n       2
    914 #endif
    915 
    916 #if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh2_nc
    917 #define mpn_sublsh2_nc(a,b,c,d,x)       mpn_sublsh_nc(a,b,c,d,2,x)
    918 #define HAVE_NATIVE_mpn_sublsh2_nc      2
    919 #endif
    920 
    921 #if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1
    922 #define mpn_sublsh2_n_ip1(a,b,n)        mpn_sublsh2_n(a,a,b,n)
    923 #define HAVE_NATIVE_mpn_sublsh2_n_ip1   2
    924 #endif
    925 
    926 #if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1
    927 #define mpn_sublsh2_nc_ip1(a,b,n,c)     mpn_sublsh2_nc(a,a,b,n,c)
    928 #define HAVE_NATIVE_mpn_sublsh2_nc_ip1  2
    929 #endif
    930 
    931 #if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh1_n
    932 #define mpn_rsblsh1_n(a,b,c,d)          mpn_rsblsh_n(a,b,c,d,1)
    933 #define HAVE_NATIVE_mpn_rsblsh1_n       2
    934 #endif
    935 
    936 #if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc
    937 #define mpn_rsblsh1_nc(a,b,c,d,x)       mpn_rsblsh_nc(a,b,c,d,1,x)
    938 #define HAVE_NATIVE_mpn_rsblsh1_nc      2
    939 #endif
    940 
    941 #if HAVE_NATIVE_mpn_rsblsh1_n && ! HAVE_NATIVE_mpn_rsblsh1_n_ip1
    942 #define mpn_rsblsh1_n_ip1(a,b,n)        mpn_rsblsh1_n(a,a,b,n)
    943 #define HAVE_NATIVE_mpn_rsblsh1_n_ip1   2
    944 #endif
    945 
    946 #if HAVE_NATIVE_mpn_rsblsh1_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc_ip1
    947 #define mpn_rsblsh1_nc_ip1(a,b,n,c)     mpn_rsblsh1_nc(a,a,b,n,c)
    948 #define HAVE_NATIVE_mpn_rsblsh1_nc_ip1  2
    949 #endif
    950 
    951 #if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh2_n
    952 #define mpn_rsblsh2_n(a,b,c,d)          mpn_rsblsh_n(a,b,c,d,2)
    953 #define HAVE_NATIVE_mpn_rsblsh2_n       2
    954 #endif
    955 
    956 #if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc
    957 #define mpn_rsblsh2_nc(a,b,c,d,x)       mpn_rsblsh_nc(a,b,c,d,2,x)
    958 #define HAVE_NATIVE_mpn_rsblsh2_nc      2
    959 #endif
    960 
    961 #if HAVE_NATIVE_mpn_rsblsh2_n && ! HAVE_NATIVE_mpn_rsblsh2_n_ip1
    962 #define mpn_rsblsh2_n_ip1(a,b,n)        mpn_rsblsh2_n(a,a,b,n)
    963 #define HAVE_NATIVE_mpn_rsblsh2_n_ip1   2
    964 #endif
    965 
    966 #if HAVE_NATIVE_mpn_rsblsh2_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc_ip1
    967 #define mpn_rsblsh2_nc_ip1(a,b,n,c)     mpn_rsblsh2_nc(a,a,b,n,c)
    968 #define HAVE_NATIVE_mpn_rsblsh2_nc_ip1  2
    969 #endif
    970 
    971 
    972 #ifndef mpn_addlsh1_n
    973 #define mpn_addlsh1_n __MPN(addlsh1_n)
    974 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
    975 #endif
    976 #ifndef mpn_addlsh1_nc
    977 #define mpn_addlsh1_nc __MPN(addlsh1_nc)
    978 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
    979 #endif
    980 #ifndef mpn_addlsh1_n_ip1
    981 #define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1)
    982 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
    983 #endif
    984 #ifndef mpn_addlsh1_nc_ip1
    985 #define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1)
    986 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
    987 #endif
    988 
    989 #ifndef mpn_addlsh2_n
    990 #define mpn_addlsh2_n __MPN(addlsh2_n)
    991 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
    992 #endif
    993 #ifndef mpn_addlsh2_nc
    994 #define mpn_addlsh2_nc __MPN(addlsh2_nc)
    995 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
    996 #endif
    997 #ifndef mpn_addlsh2_n_ip1
    998 #define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1)
    999 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
   1000 #endif
   1001 #ifndef mpn_addlsh2_nc_ip1
   1002 #define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1)
   1003 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1004 #endif
   1005 
   1006 #ifndef mpn_addlsh_n
   1007 #define mpn_addlsh_n __MPN(addlsh_n)
   1008 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
   1009 #endif
   1010 #ifndef mpn_addlsh_nc
   1011 #define mpn_addlsh_nc __MPN(addlsh_nc)
   1012 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
   1013 #endif
   1014 #ifndef mpn_addlsh_n_ip1
   1015 #define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1)
   1016   __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
   1017 #endif
   1018 #ifndef mpn_addlsh_nc_ip1
   1019 #define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1)
   1020 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
   1021 #endif
   1022 
   1023 #ifndef mpn_sublsh1_n
   1024 #define mpn_sublsh1_n __MPN(sublsh1_n)
   1025 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1026 #endif
   1027 #ifndef mpn_sublsh1_nc
   1028 #define mpn_sublsh1_nc __MPN(sublsh1_nc)
   1029 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1030 #endif
   1031 #ifndef mpn_sublsh1_n_ip1
   1032 #define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1)
   1033 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
   1034 #endif
   1035 #ifndef mpn_sublsh1_nc_ip1
   1036 #define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1)
   1037 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1038 #endif
   1039 
   1040 #ifndef mpn_sublsh2_n
   1041 #define mpn_sublsh2_n __MPN(sublsh2_n)
   1042 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1043 #endif
   1044 #ifndef mpn_sublsh2_nc
   1045 #define mpn_sublsh2_nc __MPN(sublsh2_nc)
   1046 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1047 #endif
   1048 #ifndef mpn_sublsh2_n_ip1
   1049 #define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1)
   1050 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
   1051 #endif
   1052 #ifndef mpn_sublsh2_nc_ip1
   1053 #define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1)
   1054 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1055 #endif
   1056 
   1057 #ifndef mpn_sublsh_n
   1058 #define mpn_sublsh_n __MPN(sublsh_n)
   1059 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
   1060 #endif
   1061 #ifndef mpn_sublsh_nc
   1062 #define mpn_sublsh_nc __MPN(sublsh_nc)
   1063 __GMP_DECLSPEC mp_limb_t mpn_sublsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
   1064 #endif
   1065 #ifndef mpn_sublsh_n_ip1
   1066 #define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1)
   1067   __GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
   1068 #endif
   1069 #ifndef mpn_sublsh_nc_ip1
   1070 #define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1)
   1071 __GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
   1072 #endif
   1073 
   1074 #define mpn_rsblsh1_n __MPN(rsblsh1_n)
   1075 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1076 #define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
   1077 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1078 
   1079 #define mpn_rsblsh2_n __MPN(rsblsh2_n)
   1080 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1081 #define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
   1082 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1083 
   1084 #define mpn_rsblsh_n __MPN(rsblsh_n)
   1085 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
   1086 #define mpn_rsblsh_nc __MPN(rsblsh_nc)
   1087 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
   1088 
   1089 #define mpn_rsh1add_n __MPN(rsh1add_n)
   1090 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1091 #define mpn_rsh1add_nc __MPN(rsh1add_nc)
   1092 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1093 
   1094 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
   1095 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1096 #define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
   1097 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1098 
   1099 #ifndef mpn_lshiftc  /* if not done with cpuvec in a fat binary */
   1100 #define mpn_lshiftc __MPN(lshiftc)
   1101 __GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
   1102 #endif
   1103 
   1104 #define mpn_add_err1_n  __MPN(add_err1_n)
   1105 __GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1106 
   1107 #define mpn_add_err2_n  __MPN(add_err2_n)
   1108 __GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1109 
   1110 #define mpn_add_err3_n  __MPN(add_err3_n)
   1111 __GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1112 
   1113 #define mpn_sub_err1_n  __MPN(sub_err1_n)
   1114 __GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1115 
   1116 #define mpn_sub_err2_n  __MPN(sub_err2_n)
   1117 __GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1118 
   1119 #define mpn_sub_err3_n  __MPN(sub_err3_n)
   1120 __GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1121 
   1122 #define mpn_add_n_sub_n __MPN(add_n_sub_n)
   1123 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1124 
   1125 #define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
   1126 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   1127 
   1128 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
   1129 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
   1130 
   1131 #define mpn_divrem_1c __MPN(divrem_1c)
   1132 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
   1133 
   1134 #define mpn_dump __MPN(dump)
   1135 __GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t);
   1136 
   1137 #define mpn_fib2_ui __MPN(fib2_ui)
   1138 __GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long);
   1139 
   1140 #define mpn_fib2m __MPN(fib2m)
   1141 __GMP_DECLSPEC int mpn_fib2m (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
   1142 
   1143 #define mpn_strongfibo __MPN(strongfibo)
   1144 __GMP_DECLSPEC int mpn_strongfibo (mp_srcptr, mp_size_t, mp_ptr);
   1145 
   1146 /* Remap names of internal mpn functions.  */
   1147 #define __clz_tab               __MPN(clz_tab)
   1148 #define mpn_udiv_w_sdiv		__MPN(udiv_w_sdiv)
   1149 
   1150 #define mpn_jacobi_base __MPN(jacobi_base)
   1151 __GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST;
   1152 
   1153 #define mpn_jacobi_2 __MPN(jacobi_2)
   1154 __GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned);
   1155 
   1156 #define mpn_jacobi_n __MPN(jacobi_n)
   1157 __GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned);
   1158 
   1159 #define mpn_mod_1c __MPN(mod_1c)
   1160 __GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
   1161 
   1162 #define mpn_mul_1c __MPN(mul_1c)
   1163 __GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
   1164 
   1165 #define mpn_mul_2 __MPN(mul_2)
   1166 __GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
   1167 
   1168 #define mpn_mul_3 __MPN(mul_3)
   1169 __GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
   1170 
   1171 #define mpn_mul_4 __MPN(mul_4)
   1172 __GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
   1173 
   1174 #define mpn_mul_5 __MPN(mul_5)
   1175 __GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
   1176 
   1177 #define mpn_mul_6 __MPN(mul_6)
   1178 __GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
   1179 
   1180 #ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */
   1181 #define mpn_mul_basecase __MPN(mul_basecase)
   1182 __GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
   1183 #endif
   1184 
   1185 #define mpn_mullo_n __MPN(mullo_n)
   1186 __GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1187 
   1188 #ifndef mpn_mullo_basecase  /* if not done with cpuvec in a fat binary */
   1189 #define mpn_mullo_basecase __MPN(mullo_basecase)
   1190 __GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1191 #endif
   1192 
   1193 #ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */
   1194 #define mpn_sqr_basecase __MPN(sqr_basecase)
   1195 __GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t);
   1196 #endif
   1197 
   1198 #define mpn_sqrlo __MPN(sqrlo)
   1199 __GMP_DECLSPEC void mpn_sqrlo (mp_ptr, mp_srcptr, mp_size_t);
   1200 
   1201 #define mpn_sqrlo_basecase __MPN(sqrlo_basecase)
   1202 __GMP_DECLSPEC void mpn_sqrlo_basecase (mp_ptr, mp_srcptr, mp_size_t);
   1203 
   1204 #define mpn_mulmid_basecase __MPN(mulmid_basecase)
   1205 __GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
   1206 
   1207 #define mpn_mulmid_n __MPN(mulmid_n)
   1208 __GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1209 
   1210 #define mpn_mulmid __MPN(mulmid)
   1211 __GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
   1212 
   1213 #define mpn_submul_1c __MPN(submul_1c)
   1214 __GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
   1215 
   1216 #ifndef mpn_redc_1  /* if not done with cpuvec in a fat binary */
   1217 #define mpn_redc_1 __MPN(redc_1)
   1218 __GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1219 #endif
   1220 
   1221 #ifndef mpn_redc_2  /* if not done with cpuvec in a fat binary */
   1222 #define mpn_redc_2 __MPN(redc_2)
   1223 __GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
   1224 #endif
   1225 
   1226 #define mpn_redc_n __MPN(redc_n)
   1227 __GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
   1228 
   1229 
   1230 #ifndef mpn_mod_1_1p_cps  /* if not done with cpuvec in a fat binary */
   1231 #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
   1232 __GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t);
   1233 #endif
   1234 #ifndef mpn_mod_1_1p  /* if not done with cpuvec in a fat binary */
   1235 #define mpn_mod_1_1p __MPN(mod_1_1p)
   1236 __GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]) __GMP_ATTRIBUTE_PURE;
   1237 #endif
   1238 
   1239 #ifndef mpn_mod_1s_2p_cps  /* if not done with cpuvec in a fat binary */
   1240 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
   1241 __GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t);
   1242 #endif
   1243 #ifndef mpn_mod_1s_2p  /* if not done with cpuvec in a fat binary */
   1244 #define mpn_mod_1s_2p __MPN(mod_1s_2p)
   1245 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [5]) __GMP_ATTRIBUTE_PURE;
   1246 #endif
   1247 
   1248 #ifndef mpn_mod_1s_3p_cps  /* if not done with cpuvec in a fat binary */
   1249 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
   1250 __GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t);
   1251 #endif
   1252 #ifndef mpn_mod_1s_3p  /* if not done with cpuvec in a fat binary */
   1253 #define mpn_mod_1s_3p __MPN(mod_1s_3p)
   1254 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [6]) __GMP_ATTRIBUTE_PURE;
   1255 #endif
   1256 
   1257 #ifndef mpn_mod_1s_4p_cps  /* if not done with cpuvec in a fat binary */
   1258 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
   1259 __GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t);
   1260 #endif
   1261 #ifndef mpn_mod_1s_4p  /* if not done with cpuvec in a fat binary */
   1262 #define mpn_mod_1s_4p __MPN(mod_1s_4p)
   1263 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [7]) __GMP_ATTRIBUTE_PURE;
   1264 #endif
   1265 
   1266 #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
   1267 __GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
   1268 #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
   1269 __GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1270 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
   1271 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
   1272 static inline mp_size_t
   1273 mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
   1274   mp_size_t n, itch;
   1275   n = rn >> 1;
   1276   itch = rn + 4 +
   1277     (an > n ? (bn > n ? rn : n) : 0);
   1278   return itch;
   1279 }
   1280 
   1281 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
   1282 __GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1283 #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
   1284 __GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
   1285 static inline mp_size_t
   1286 mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
   1287   mp_size_t n, itch;
   1288   n = rn >> 1;
   1289   itch = rn + 3 +
   1290     (an > n ? an : 0);
   1291   return itch;
   1292 }
   1293 
   1294 typedef __gmp_randstate_struct *gmp_randstate_ptr;
   1295 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
   1296 
   1297 /* Pseudo-random number generator function pointers structure.  */
   1298 typedef struct {
   1299   void (*randseed_fn) (gmp_randstate_t, mpz_srcptr);
   1300   void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int);
   1301   void (*randclear_fn) (gmp_randstate_t);
   1302   void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr);
   1303 } gmp_randfnptr_t;
   1304 
   1305 /* Macro to obtain a void pointer to the function pointers structure.  */
   1306 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
   1307 
   1308 /* Macro to obtain a pointer to the generator's state.
   1309    When used as a lvalue the rvalue needs to be cast to mp_ptr.  */
   1310 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
   1311 
   1312 /* Write a given number of random bits to rp.  */
   1313 #define _gmp_rand(rp, state, bits)					\
   1314   do {									\
   1315     gmp_randstate_ptr  __rstate = (state);				\
   1316     (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)		\
   1317       (__rstate, rp, bits);						\
   1318   } while (0)
   1319 
   1320 __GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t);
   1321 
   1322 
   1323 /* __gmp_rands is the global state for the old-style random functions, and
   1324    is also used in the test programs (hence the __GMP_DECLSPEC).
   1325 
   1326    There's no seeding here, so mpz_random etc will generate the same
   1327    sequence every time.  This is not unlike the C library random functions
   1328    if you don't seed them, so perhaps it's acceptable.  Digging up a seed
   1329    from /dev/random or the like would work on many systems, but might
   1330    encourage a false confidence, since it'd be pretty much impossible to do
   1331    something that would work reliably everywhere.  In any case the new style
   1332    functions are recommended to applications which care about randomness, so
   1333    the old functions aren't too important.  */
   1334 
   1335 __GMP_DECLSPEC extern char             __gmp_rands_initialized;
   1336 __GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
   1337 
   1338 #define RANDS								\
   1339   ((__gmp_rands_initialized ? 0						\
   1340     : (__gmp_rands_initialized = 1,					\
   1341        __gmp_randinit_mt_noseed (__gmp_rands), 0)),			\
   1342    __gmp_rands)
   1343 
   1344 /* this is used by the test programs, to free memory */
   1345 #define RANDS_CLEAR()							\
   1346   do {									\
   1347     if (__gmp_rands_initialized)					\
   1348       {									\
   1349 	__gmp_rands_initialized = 0;					\
   1350 	gmp_randclear (__gmp_rands);					\
   1351       }									\
   1352   } while (0)
   1353 
   1354 
   1355 /* For a threshold between algorithms A and B, size>=thresh is where B
   1356    should be used.  Special value MP_SIZE_T_MAX means only ever use A, or
   1357    value 0 means only ever use B.  The tests for these special values will
   1358    be compile-time constants, so the compiler should be able to eliminate
   1359    the code for the unwanted algorithm.  */
   1360 
   1361 #if ! defined (__GNUC__) || __GNUC__ < 2
   1362 #define ABOVE_THRESHOLD(size,thresh)					\
   1363   ((thresh) == 0							\
   1364    || ((thresh) != MP_SIZE_T_MAX					\
   1365        && (size) >= (thresh)))
   1366 #else
   1367 #define ABOVE_THRESHOLD(size,thresh)					\
   1368   ((__builtin_constant_p (thresh) && (thresh) == 0)			\
   1369    || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX)	\
   1370        && (size) >= (thresh)))
   1371 #endif
   1372 #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
   1373 
   1374 /* The minimal supported value for Toom22 depends also on Toom32 and
   1375    Toom42 implementations. */
   1376 #define MPN_TOOM22_MUL_MINSIZE    6
   1377 #define MPN_TOOM2_SQR_MINSIZE     4
   1378 
   1379 #define MPN_TOOM33_MUL_MINSIZE   17
   1380 #define MPN_TOOM3_SQR_MINSIZE    17
   1381 
   1382 #define MPN_TOOM44_MUL_MINSIZE   30
   1383 #define MPN_TOOM4_SQR_MINSIZE    30
   1384 
   1385 #define MPN_TOOM6H_MUL_MINSIZE   46
   1386 #define MPN_TOOM6_SQR_MINSIZE    46
   1387 
   1388 #define MPN_TOOM8H_MUL_MINSIZE   86
   1389 #define MPN_TOOM8_SQR_MINSIZE    86
   1390 
   1391 #define MPN_TOOM32_MUL_MINSIZE   10
   1392 #define MPN_TOOM42_MUL_MINSIZE   10
   1393 #define MPN_TOOM43_MUL_MINSIZE   25
   1394 #define MPN_TOOM53_MUL_MINSIZE   17
   1395 #define MPN_TOOM54_MUL_MINSIZE   31
   1396 #define MPN_TOOM63_MUL_MINSIZE   49
   1397 
   1398 #define MPN_TOOM42_MULMID_MINSIZE    4
   1399 
   1400 #define   mpn_sqr_diagonal __MPN(sqr_diagonal)
   1401 __GMP_DECLSPEC void      mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t);
   1402 
   1403 #define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1)
   1404 __GMP_DECLSPEC void      mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
   1405 
   1406 #define   mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
   1407 __GMP_DECLSPEC void      mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t);
   1408 
   1409 enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
   1410 #define   mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
   1411 __GMP_DECLSPEC void      mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t);
   1412 
   1413 enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
   1414 #define   mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
   1415 __GMP_DECLSPEC void      mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
   1416 
   1417 #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
   1418 __GMP_DECLSPEC void      mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
   1419 
   1420 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
   1421 __GMP_DECLSPEC void      mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
   1422 
   1423 #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
   1424 __GMP_DECLSPEC void      mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
   1425 
   1426 #define   mpn_toom_couple_handling __MPN(toom_couple_handling)
   1427 __GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int);
   1428 
   1429 #define   mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
   1430 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
   1431 
   1432 #define   mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
   1433 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
   1434 
   1435 #define   mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
   1436 __GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
   1437 
   1438 #define   mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
   1439 __GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
   1440 
   1441 #define   mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
   1442 __GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
   1443 
   1444 #define   mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
   1445 __GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
   1446 
   1447 #define   mpn_toom22_mul __MPN(toom22_mul)
   1448 __GMP_DECLSPEC void      mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1449 
   1450 #define   mpn_toom32_mul __MPN(toom32_mul)
   1451 __GMP_DECLSPEC void      mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1452 
   1453 #define   mpn_toom42_mul __MPN(toom42_mul)
   1454 __GMP_DECLSPEC void      mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1455 
   1456 #define   mpn_toom52_mul __MPN(toom52_mul)
   1457 __GMP_DECLSPEC void      mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1458 
   1459 #define   mpn_toom62_mul __MPN(toom62_mul)
   1460 __GMP_DECLSPEC void      mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1461 
   1462 #define   mpn_toom2_sqr __MPN(toom2_sqr)
   1463 __GMP_DECLSPEC void      mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
   1464 
   1465 #define   mpn_toom33_mul __MPN(toom33_mul)
   1466 __GMP_DECLSPEC void      mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1467 
   1468 #define   mpn_toom43_mul __MPN(toom43_mul)
   1469 __GMP_DECLSPEC void      mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1470 
   1471 #define   mpn_toom53_mul __MPN(toom53_mul)
   1472 __GMP_DECLSPEC void      mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1473 
   1474 #define   mpn_toom54_mul __MPN(toom54_mul)
   1475 __GMP_DECLSPEC void      mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1476 
   1477 #define   mpn_toom63_mul __MPN(toom63_mul)
   1478 __GMP_DECLSPEC void      mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1479 
   1480 #define   mpn_toom3_sqr __MPN(toom3_sqr)
   1481 __GMP_DECLSPEC void      mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
   1482 
   1483 #define   mpn_toom44_mul __MPN(toom44_mul)
   1484 __GMP_DECLSPEC void      mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1485 
   1486 #define   mpn_toom4_sqr __MPN(toom4_sqr)
   1487 __GMP_DECLSPEC void      mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
   1488 
   1489 #define   mpn_toom6h_mul __MPN(toom6h_mul)
   1490 __GMP_DECLSPEC void      mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1491 
   1492 #define   mpn_toom6_sqr __MPN(toom6_sqr)
   1493 __GMP_DECLSPEC void      mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
   1494 
   1495 #define   mpn_toom8h_mul __MPN(toom8h_mul)
   1496 __GMP_DECLSPEC void      mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1497 
   1498 #define   mpn_toom8_sqr __MPN(toom8_sqr)
   1499 __GMP_DECLSPEC void      mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
   1500 
   1501 #define   mpn_toom42_mulmid __MPN(toom42_mulmid)
   1502 __GMP_DECLSPEC void      mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
   1503 
   1504 #define   mpn_fft_best_k __MPN(fft_best_k)
   1505 __GMP_DECLSPEC int       mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST;
   1506 
   1507 #define   mpn_mul_fft __MPN(mul_fft)
   1508 __GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
   1509 
   1510 #define   mpn_mul_fft_full __MPN(mul_fft_full)
   1511 __GMP_DECLSPEC void      mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
   1512 
   1513 #define   mpn_nussbaumer_mul __MPN(nussbaumer_mul)
   1514 __GMP_DECLSPEC void      mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
   1515 
   1516 #define   mpn_fft_next_size __MPN(fft_next_size)
   1517 __GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
   1518 
   1519 #define   mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1)
   1520   __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
   1521 
   1522 #define   mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
   1523   __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
   1524 
   1525 #define   mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
   1526   __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t);
   1527 
   1528 #define   mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
   1529 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
   1530 
   1531 #define   mpn_sbpi1_div_q __MPN(sbpi1_div_q)
   1532 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
   1533 
   1534 #define   mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
   1535 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
   1536 
   1537 #define   mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
   1538 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
   1539 #define   mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
   1540 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
   1541 
   1542 #define   mpn_dcpi1_div_q __MPN(dcpi1_div_q)
   1543 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
   1544 
   1545 #define   mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
   1546 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
   1547 
   1548 #define   mpn_mu_div_qr __MPN(mu_div_qr)
   1549 __GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1550 #define   mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
   1551 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
   1552 
   1553 #define   mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
   1554 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1555 #define   mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
   1556 __GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t) ATTRIBUTE_CONST;
   1557 
   1558 #define   mpn_mu_divappr_q __MPN(mu_divappr_q)
   1559 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1560 #define   mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
   1561 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
   1562 
   1563 #define   mpn_mu_div_q __MPN(mu_div_q)
   1564 __GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1565 #define   mpn_mu_div_q_itch __MPN(mu_div_q_itch)
   1566 __GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
   1567 
   1568 #define  mpn_div_q __MPN(div_q)
   1569 __GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1570 
   1571 #define   mpn_invert __MPN(invert)
   1572 __GMP_DECLSPEC void      mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
   1573 #define mpn_invert_itch(n)  mpn_invertappr_itch(n)
   1574 
   1575 #define   mpn_ni_invertappr __MPN(ni_invertappr)
   1576 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
   1577 #define   mpn_invertappr __MPN(invertappr)
   1578 __GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
   1579 #define mpn_invertappr_itch(n)  (2 * (n))
   1580 
   1581 #define   mpn_binvert __MPN(binvert)
   1582 __GMP_DECLSPEC void      mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
   1583 #define   mpn_binvert_itch __MPN(binvert_itch)
   1584 __GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t) ATTRIBUTE_CONST;
   1585 
   1586 #define mpn_bdiv_q_1 __MPN(bdiv_q_1)
   1587 __GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1588 
   1589 #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
   1590 __GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
   1591 
   1592 #define   mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
   1593 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
   1594 
   1595 #define   mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
   1596 __GMP_DECLSPEC void      mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
   1597 
   1598 #define   mpn_sbpi1_bdiv_r __MPN(sbpi1_bdiv_r)
   1599 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
   1600 
   1601 #define   mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
   1602 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
   1603 #define   mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
   1604 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t) ATTRIBUTE_CONST;
   1605 
   1606 #define   mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
   1607 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
   1608 #define   mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
   1609 __GMP_DECLSPEC void      mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
   1610 
   1611 #define   mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
   1612 __GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1613 #define   mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
   1614 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
   1615 
   1616 #define   mpn_mu_bdiv_q __MPN(mu_bdiv_q)
   1617 __GMP_DECLSPEC void      mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1618 #define   mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
   1619 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
   1620 
   1621 #define   mpn_bdiv_qr __MPN(bdiv_qr)
   1622 __GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1623 #define   mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
   1624 __GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
   1625 
   1626 #define   mpn_bdiv_q __MPN(bdiv_q)
   1627 __GMP_DECLSPEC void      mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1628 #define   mpn_bdiv_q_itch __MPN(bdiv_q_itch)
   1629 __GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
   1630 
   1631 #define   mpn_divexact __MPN(divexact)
   1632 __GMP_DECLSPEC void      mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
   1633 #define   mpn_divexact_itch __MPN(divexact_itch)
   1634 __GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
   1635 
   1636 #ifndef mpn_bdiv_dbm1c  /* if not done with cpuvec in a fat binary */
   1637 #define   mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
   1638 __GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
   1639 #endif
   1640 
   1641 #define   mpn_bdiv_dbm1(dst, src, size, divisor) \
   1642   mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
   1643 
   1644 #define   mpn_powm __MPN(powm)
   1645 __GMP_DECLSPEC void      mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
   1646 #define   mpn_powlo __MPN(powlo)
   1647 __GMP_DECLSPEC void      mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
   1648 
   1649 #define mpn_sec_pi1_div_qr __MPN(sec_pi1_div_qr)
   1650 __GMP_DECLSPEC mp_limb_t mpn_sec_pi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
   1651 #define mpn_sec_pi1_div_r __MPN(sec_pi1_div_r)
   1652 __GMP_DECLSPEC void mpn_sec_pi1_div_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
   1653 
   1654 
   1655 #ifndef DIVEXACT_BY3_METHOD
   1656 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
   1657 #define DIVEXACT_BY3_METHOD 0	/* default to using mpn_bdiv_dbm1c */
   1658 #else
   1659 #define DIVEXACT_BY3_METHOD 1
   1660 #endif
   1661 #endif
   1662 
   1663 #if DIVEXACT_BY3_METHOD == 0
   1664 #undef mpn_divexact_by3
   1665 #define mpn_divexact_by3(dst,src,size) \
   1666   (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
   1667 /* override mpn_divexact_by3c defined in gmp.h */
   1668 /*
   1669 #undef mpn_divexact_by3c
   1670 #define mpn_divexact_by3c(dst,src,size,cy) \
   1671   (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
   1672 */
   1673 #endif
   1674 
   1675 #if GMP_NUMB_BITS % 4 == 0
   1676 #define mpn_divexact_by5(dst,src,size) \
   1677   (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
   1678 #endif
   1679 
   1680 #if GMP_NUMB_BITS % 3 == 0
   1681 #define mpn_divexact_by7(dst,src,size) \
   1682   (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
   1683 #endif
   1684 
   1685 #if GMP_NUMB_BITS % 6 == 0
   1686 #define mpn_divexact_by9(dst,src,size) \
   1687   (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
   1688 #endif
   1689 
   1690 #if GMP_NUMB_BITS % 10 == 0
   1691 #define mpn_divexact_by11(dst,src,size) \
   1692   (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
   1693 #endif
   1694 
   1695 #if GMP_NUMB_BITS % 12 == 0
   1696 #define mpn_divexact_by13(dst,src,size) \
   1697   (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
   1698 #endif
   1699 
   1700 #if GMP_NUMB_BITS % 4 == 0
   1701 #define mpn_divexact_by15(dst,src,size) \
   1702   (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
   1703 #endif
   1704 
   1705 #define mpz_divexact_gcd  __gmpz_divexact_gcd
   1706 __GMP_DECLSPEC void    mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
   1707 
   1708 #define mpz_prodlimbs  __gmpz_prodlimbs
   1709 __GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t);
   1710 
   1711 #define mpz_oddfac_1  __gmpz_oddfac_1
   1712 __GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned);
   1713 
   1714 #define mpz_stronglucas  __gmpz_stronglucas
   1715 __GMP_DECLSPEC int mpz_stronglucas (mpz_srcptr, mpz_ptr, mpz_ptr);
   1716 
   1717 #define mpz_lucas_mod  __gmpz_lucas_mod
   1718 __GMP_DECLSPEC int mpz_lucas_mod (mpz_ptr, mpz_ptr, long, mp_bitcnt_t, mpz_srcptr, mpz_ptr, mpz_ptr);
   1719 
   1720 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
   1721 #ifdef _GMP_H_HAVE_FILE
   1722 __GMP_DECLSPEC size_t  mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t);
   1723 #endif
   1724 
   1725 #define mpn_divisible_p __MPN(divisible_p)
   1726 __GMP_DECLSPEC int     mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
   1727 
   1728 #define   mpn_rootrem __MPN(rootrem)
   1729 __GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1730 
   1731 #define mpn_broot __MPN(broot)
   1732 __GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1733 
   1734 #define mpn_broot_invm1 __MPN(broot_invm1)
   1735 __GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
   1736 
   1737 #define mpn_brootinv __MPN(brootinv)
   1738 __GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
   1739 
   1740 #define mpn_bsqrt __MPN(bsqrt)
   1741 __GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
   1742 
   1743 #define mpn_bsqrtinv __MPN(bsqrtinv)
   1744 __GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
   1745 
   1746 #if defined (_CRAY)
   1747 #define MPN_COPY_INCR(dst, src, n)					\
   1748   do {									\
   1749     int __i;		/* Faster on some Crays with plain int */	\
   1750     _Pragma ("_CRI ivdep");						\
   1751     for (__i = 0; __i < (n); __i++)					\
   1752       (dst)[__i] = (src)[__i];						\
   1753   } while (0)
   1754 #endif
   1755 
   1756 /* used by test programs, hence __GMP_DECLSPEC */
   1757 #ifndef mpn_copyi  /* if not done with cpuvec in a fat binary */
   1758 #define mpn_copyi __MPN(copyi)
   1759 __GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
   1760 #endif
   1761 
   1762 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
   1763 #define MPN_COPY_INCR(dst, src, size)					\
   1764   do {									\
   1765     ASSERT ((size) >= 0);						\
   1766     ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));			\
   1767     mpn_copyi (dst, src, size);						\
   1768   } while (0)
   1769 #endif
   1770 
   1771 /* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
   1772 #if ! defined (MPN_COPY_INCR)
   1773 #define MPN_COPY_INCR(dst, src, n)					\
   1774   do {									\
   1775     ASSERT ((n) >= 0);							\
   1776     ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));				\
   1777     if ((n) != 0)							\
   1778       {									\
   1779 	mp_size_t __n = (n) - 1;					\
   1780 	mp_ptr __dst = (dst);						\
   1781 	mp_srcptr __src = (src);					\
   1782 	mp_limb_t __x;							\
   1783 	__x = *__src++;							\
   1784 	if (__n != 0)							\
   1785 	  {								\
   1786 	    do								\
   1787 	      {								\
   1788 		*__dst++ = __x;						\
   1789 		__x = *__src++;						\
   1790 	      }								\
   1791 	    while (--__n);						\
   1792 	  }								\
   1793 	*__dst++ = __x;							\
   1794       }									\
   1795   } while (0)
   1796 #endif
   1797 
   1798 
   1799 #if defined (_CRAY)
   1800 #define MPN_COPY_DECR(dst, src, n)					\
   1801   do {									\
   1802     int __i;		/* Faster on some Crays with plain int */	\
   1803     _Pragma ("_CRI ivdep");						\
   1804     for (__i = (n) - 1; __i >= 0; __i--)				\
   1805       (dst)[__i] = (src)[__i];						\
   1806   } while (0)
   1807 #endif
   1808 
   1809 /* used by test programs, hence __GMP_DECLSPEC */
   1810 #ifndef mpn_copyd  /* if not done with cpuvec in a fat binary */
   1811 #define mpn_copyd __MPN(copyd)
   1812 __GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
   1813 #endif
   1814 
   1815 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
   1816 #define MPN_COPY_DECR(dst, src, size)					\
   1817   do {									\
   1818     ASSERT ((size) >= 0);						\
   1819     ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));			\
   1820     mpn_copyd (dst, src, size);						\
   1821   } while (0)
   1822 #endif
   1823 
   1824 /* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
   1825 #if ! defined (MPN_COPY_DECR)
   1826 #define MPN_COPY_DECR(dst, src, n)					\
   1827   do {									\
   1828     ASSERT ((n) >= 0);							\
   1829     ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));				\
   1830     if ((n) != 0)							\
   1831       {									\
   1832 	mp_size_t __n = (n) - 1;					\
   1833 	mp_ptr __dst = (dst) + __n;					\
   1834 	mp_srcptr __src = (src) + __n;					\
   1835 	mp_limb_t __x;							\
   1836 	__x = *__src--;							\
   1837 	if (__n != 0)							\
   1838 	  {								\
   1839 	    do								\
   1840 	      {								\
   1841 		*__dst-- = __x;						\
   1842 		__x = *__src--;						\
   1843 	      }								\
   1844 	    while (--__n);						\
   1845 	  }								\
   1846 	*__dst-- = __x;							\
   1847       }									\
   1848   } while (0)
   1849 #endif
   1850 
   1851 
   1852 #ifndef MPN_COPY
   1853 #define MPN_COPY(d,s,n)							\
   1854   do {									\
   1855     ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));				\
   1856     MPN_COPY_INCR (d, s, n);						\
   1857   } while (0)
   1858 #endif
   1859 
   1860 
   1861 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
   1862 #define MPN_REVERSE(dst, src, size)					\
   1863   do {									\
   1864     mp_ptr     __dst = (dst);						\
   1865     mp_size_t  __size = (size);						\
   1866     mp_srcptr  __src = (src) + __size - 1;				\
   1867     mp_size_t  __i;							\
   1868     ASSERT ((size) >= 0);						\
   1869     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));			\
   1870     CRAY_Pragma ("_CRI ivdep");						\
   1871     for (__i = 0; __i < __size; __i++)					\
   1872       {									\
   1873 	*__dst = *__src;						\
   1874 	__dst++;							\
   1875 	__src--;							\
   1876       }									\
   1877   } while (0)
   1878 
   1879 
   1880 /* Zero n limbs at dst.
   1881 
   1882    For power and powerpc we want an inline stu/bdnz loop for zeroing.  On
   1883    ppc630 for instance this is optimal since it can sustain only 1 store per
   1884    cycle.
   1885 
   1886    gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
   1887    "for" loop in the generic code below can become stu/bdnz.  The do/while
   1888    here helps it get to that.  The same caveat about plain -mpowerpc64 mode
   1889    applies here as to __GMPN_COPY_INCR in gmp.h.
   1890 
   1891    xlc 3.1 already generates stu/bdnz from the generic C, and does so from
   1892    this loop too.
   1893 
   1894    Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
   1895    at a time.  MPN_ZERO isn't all that important in GMP, so it might be more
   1896    trouble than it's worth to do the same, though perhaps a call to memset
   1897    would be good when on a GNU system.  */
   1898 
   1899 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
   1900 #define MPN_FILL(dst, n, f)						\
   1901   do {									\
   1902     mp_ptr __dst = (dst) - 1;						\
   1903     mp_size_t __n = (n);						\
   1904     ASSERT (__n > 0);							\
   1905     do									\
   1906       *++__dst = (f);							\
   1907     while (--__n);							\
   1908   } while (0)
   1909 #endif
   1910 
   1911 #ifndef MPN_FILL
   1912 #define MPN_FILL(dst, n, f)						\
   1913   do {									\
   1914     mp_ptr __dst = (dst);						\
   1915     mp_size_t __n = (n);						\
   1916     ASSERT (__n > 0);							\
   1917     do									\
   1918       *__dst++ = (f);							\
   1919     while (--__n);							\
   1920   } while (0)
   1921 #endif
   1922 
   1923 #define MPN_ZERO(dst, n)						\
   1924   do {									\
   1925     ASSERT ((n) >= 0);							\
   1926     if ((n) != 0)							\
   1927       MPN_FILL (dst, n, CNST_LIMB (0));					\
   1928   } while (0)
   1929 
   1930 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
   1931    start up and would need to strip a lot of zeros before it'd be faster
   1932    than a simple cmpl loop.  Here are some times in cycles for
   1933    std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
   1934    low zeros).
   1935 
   1936 		std   cld
   1937 	   P5    18    16
   1938 	   P6    46    38
   1939 	   K6    36    13
   1940 	   K7    21    20
   1941 */
   1942 #ifndef MPN_NORMALIZE
   1943 #define MPN_NORMALIZE(DST, NLIMBS) \
   1944   do {									\
   1945     while ((NLIMBS) > 0)						\
   1946       {									\
   1947 	if ((DST)[(NLIMBS) - 1] != 0)					\
   1948 	  break;							\
   1949 	(NLIMBS)--;							\
   1950       }									\
   1951   } while (0)
   1952 #endif
   1953 #ifndef MPN_NORMALIZE_NOT_ZERO
   1954 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)				\
   1955   do {									\
   1956     while (1)								\
   1957       {									\
   1958 	ASSERT ((NLIMBS) >= 1);						\
   1959 	if ((DST)[(NLIMBS) - 1] != 0)					\
   1960 	  break;							\
   1961 	(NLIMBS)--;							\
   1962       }									\
   1963   } while (0)
   1964 #endif
   1965 
   1966 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
   1967    and decrementing size.  low should be ptr[0], and will be the new ptr[0]
   1968    on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
   1969    somewhere a non-zero limb.  */
   1970 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)			\
   1971   do {									\
   1972     ASSERT ((size) >= 1);						\
   1973     ASSERT ((low) == (ptr)[0]);						\
   1974 									\
   1975     while ((low) == 0)							\
   1976       {									\
   1977 	(size)--;							\
   1978 	ASSERT ((size) >= 1);						\
   1979 	(ptr)++;							\
   1980 	(low) = *(ptr);							\
   1981       }									\
   1982   } while (0)
   1983 
   1984 /* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
   1985    temporary variable; it will be automatically cleared out at function
   1986    return.  We use __x here to make it possible to accept both mpz_ptr and
   1987    mpz_t arguments.  */
   1988 #define MPZ_TMP_INIT(X, NLIMBS)						\
   1989   do {									\
   1990     mpz_ptr __x = (X);							\
   1991     ASSERT ((NLIMBS) >= 1);						\
   1992     __x->_mp_alloc = (NLIMBS);						\
   1993     __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS);				\
   1994   } while (0)
   1995 
   1996 #if WANT_ASSERT
   1997 static inline void *
   1998 _mpz_newalloc (mpz_ptr z, mp_size_t n)
   1999 {
   2000   void * res = _mpz_realloc(z,n);
   2001   /* If we are checking the code, force a random change to limbs. */
   2002   ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1];
   2003   return res;
   2004 }
   2005 #else
   2006 #define _mpz_newalloc _mpz_realloc
   2007 #endif
   2008 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
   2009 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))			\
   2010 			  ? (mp_ptr) _mpz_realloc(z,n)			\
   2011 			  : PTR(z))
   2012 #define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))			\
   2013 			   ? (mp_ptr) _mpz_newalloc(z,n)		\
   2014 			   : PTR(z))
   2015 
   2016 #define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
   2017 
   2018 
   2019 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
   2020    f1p.
   2021 
   2022    From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
   2023    nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
   2024    number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
   2025 
   2026    The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
   2027    without good floating point.  There's +2 for rounding up, and a further
   2028    +2 since at the last step x limbs are doubled into a 2x+1 limb region
   2029    whereas the actual F[2k] value might be only 2x-1 limbs.
   2030 
   2031    Note that a division is done first, since on a 32-bit system it's at
   2032    least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
   2033    about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
   2034    whatever a multiply of two 190Mbyte numbers takes.)
   2035 
   2036    Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
   2037    worked into the multiplier.  */
   2038 
   2039 #define MPN_FIB2_SIZE(n) \
   2040   ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
   2041 
   2042 
   2043 /* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
   2044    -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
   2045 
   2046    FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
   2047    F[n] + 2*F[n-1] fits in a limb.  */
   2048 
   2049 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
   2050 #define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
   2051 
   2052 extern const mp_limb_t __gmp_oddfac_table[];
   2053 extern const mp_limb_t __gmp_odd2fac_table[];
   2054 extern const unsigned char __gmp_fac2cnt_table[];
   2055 extern const mp_limb_t __gmp_limbroots_table[];
   2056 
   2057 /* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */
   2058 static inline unsigned
   2059 log_n_max (mp_limb_t n)
   2060 {
   2061   unsigned log;
   2062   for (log = 8; n > __gmp_limbroots_table[log - 1]; log--);
   2063   return log;
   2064 }
   2065 
   2066 #define SIEVESIZE 512		/* FIXME: Allow gmp_init_primesieve to choose */
   2067 typedef struct
   2068 {
   2069   unsigned long d;		   /* current index in s[] */
   2070   unsigned long s0;		   /* number corresponding to s[0] */
   2071   unsigned long sqrt_s0;	   /* misnomer for sqrt(s[SIEVESIZE-1]) */
   2072   unsigned char s[SIEVESIZE + 1];  /* sieve table */
   2073 } gmp_primesieve_t;
   2074 
   2075 #define gmp_init_primesieve __gmp_init_primesieve
   2076 __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
   2077 
   2078 #define gmp_nextprime __gmp_nextprime
   2079 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
   2080 
   2081 #define gmp_primesieve __gmp_primesieve
   2082 __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
   2083 
   2084 
   2085 #ifndef MUL_TOOM22_THRESHOLD
   2086 #define MUL_TOOM22_THRESHOLD             30
   2087 #endif
   2088 
   2089 #ifndef MUL_TOOM33_THRESHOLD
   2090 #define MUL_TOOM33_THRESHOLD            100
   2091 #endif
   2092 
   2093 #ifndef MUL_TOOM44_THRESHOLD
   2094 #define MUL_TOOM44_THRESHOLD            300
   2095 #endif
   2096 
   2097 #ifndef MUL_TOOM6H_THRESHOLD
   2098 #define MUL_TOOM6H_THRESHOLD            350
   2099 #endif
   2100 
   2101 #ifndef SQR_TOOM6_THRESHOLD
   2102 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
   2103 #endif
   2104 
   2105 #ifndef MUL_TOOM8H_THRESHOLD
   2106 #define MUL_TOOM8H_THRESHOLD            450
   2107 #endif
   2108 
   2109 #ifndef SQR_TOOM8_THRESHOLD
   2110 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
   2111 #endif
   2112 
   2113 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
   2114 #define MUL_TOOM32_TO_TOOM43_THRESHOLD  100
   2115 #endif
   2116 
   2117 #ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
   2118 #define MUL_TOOM32_TO_TOOM53_THRESHOLD  110
   2119 #endif
   2120 
   2121 #ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
   2122 #define MUL_TOOM42_TO_TOOM53_THRESHOLD  100
   2123 #endif
   2124 
   2125 #ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
   2126 #define MUL_TOOM42_TO_TOOM63_THRESHOLD  110
   2127 #endif
   2128 
   2129 #ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD
   2130 #define MUL_TOOM43_TO_TOOM54_THRESHOLD  150
   2131 #endif
   2132 
   2133 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD.  In a
   2134    normal build MUL_TOOM22_THRESHOLD is a constant and we use that.  In a fat
   2135    binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
   2136    separate hard limit will have been defined.  Similarly for TOOM3.  */
   2137 #ifndef MUL_TOOM22_THRESHOLD_LIMIT
   2138 #define MUL_TOOM22_THRESHOLD_LIMIT  MUL_TOOM22_THRESHOLD
   2139 #endif
   2140 #ifndef MUL_TOOM33_THRESHOLD_LIMIT
   2141 #define MUL_TOOM33_THRESHOLD_LIMIT  MUL_TOOM33_THRESHOLD
   2142 #endif
   2143 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT
   2144 #define MULLO_BASECASE_THRESHOLD_LIMIT  MULLO_BASECASE_THRESHOLD
   2145 #endif
   2146 #ifndef SQRLO_BASECASE_THRESHOLD_LIMIT
   2147 #define SQRLO_BASECASE_THRESHOLD_LIMIT  SQRLO_BASECASE_THRESHOLD
   2148 #endif
   2149 #ifndef SQRLO_DC_THRESHOLD_LIMIT
   2150 #define SQRLO_DC_THRESHOLD_LIMIT  SQRLO_DC_THRESHOLD
   2151 #endif
   2152 
   2153 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
   2154    mpn_mul_basecase.  Default is to use mpn_sqr_basecase from 0.  (Note that we
   2155    certainly always want it if there's a native assembler mpn_sqr_basecase.)
   2156 
   2157    If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
   2158    before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
   2159    threshold and SQR_TOOM2_THRESHOLD is 0.  This oddity arises more or less
   2160    because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
   2161    should be used, and that may be never.  */
   2162 
   2163 #ifndef SQR_BASECASE_THRESHOLD
   2164 #define SQR_BASECASE_THRESHOLD            0  /* never use mpn_mul_basecase */
   2165 #endif
   2166 
   2167 #ifndef SQR_TOOM2_THRESHOLD
   2168 #define SQR_TOOM2_THRESHOLD              50
   2169 #endif
   2170 
   2171 #ifndef SQR_TOOM3_THRESHOLD
   2172 #define SQR_TOOM3_THRESHOLD             120
   2173 #endif
   2174 
   2175 #ifndef SQR_TOOM4_THRESHOLD
   2176 #define SQR_TOOM4_THRESHOLD             400
   2177 #endif
   2178 
   2179 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT.  */
   2180 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
   2181 #define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD
   2182 #endif
   2183 
   2184 #ifndef MULMID_TOOM42_THRESHOLD
   2185 #define MULMID_TOOM42_THRESHOLD     MUL_TOOM22_THRESHOLD
   2186 #endif
   2187 
   2188 #ifndef MULLO_BASECASE_THRESHOLD
   2189 #define MULLO_BASECASE_THRESHOLD          0  /* never use mpn_mul_basecase */
   2190 #endif
   2191 
   2192 #ifndef MULLO_DC_THRESHOLD
   2193 #define MULLO_DC_THRESHOLD         (2*MUL_TOOM22_THRESHOLD)
   2194 #endif
   2195 
   2196 #ifndef MULLO_MUL_N_THRESHOLD
   2197 #define MULLO_MUL_N_THRESHOLD      (2*MUL_FFT_THRESHOLD)
   2198 #endif
   2199 
   2200 #ifndef SQRLO_BASECASE_THRESHOLD
   2201 #define SQRLO_BASECASE_THRESHOLD          0  /* never use mpn_sqr_basecase */
   2202 #endif
   2203 
   2204 #ifndef SQRLO_DC_THRESHOLD
   2205 #define SQRLO_DC_THRESHOLD         (MULLO_DC_THRESHOLD)
   2206 #endif
   2207 
   2208 #ifndef SQRLO_SQR_THRESHOLD
   2209 #define SQRLO_SQR_THRESHOLD        (MULLO_MUL_N_THRESHOLD)
   2210 #endif
   2211 
   2212 #ifndef DC_DIV_QR_THRESHOLD
   2213 #define DC_DIV_QR_THRESHOLD        (2*MUL_TOOM22_THRESHOLD)
   2214 #endif
   2215 
   2216 #ifndef DC_DIVAPPR_Q_THRESHOLD
   2217 #define DC_DIVAPPR_Q_THRESHOLD          200
   2218 #endif
   2219 
   2220 #ifndef DC_BDIV_QR_THRESHOLD
   2221 #define DC_BDIV_QR_THRESHOLD       (2*MUL_TOOM22_THRESHOLD)
   2222 #endif
   2223 
   2224 #ifndef DC_BDIV_Q_THRESHOLD
   2225 #define DC_BDIV_Q_THRESHOLD             180
   2226 #endif
   2227 
   2228 #ifndef DIVEXACT_JEB_THRESHOLD
   2229 #define DIVEXACT_JEB_THRESHOLD           25
   2230 #endif
   2231 
   2232 #ifndef INV_MULMOD_BNM1_THRESHOLD
   2233 #define INV_MULMOD_BNM1_THRESHOLD  (4*MULMOD_BNM1_THRESHOLD)
   2234 #endif
   2235 
   2236 #ifndef INV_APPR_THRESHOLD
   2237 #define INV_APPR_THRESHOLD         INV_NEWTON_THRESHOLD
   2238 #endif
   2239 
   2240 #ifndef INV_NEWTON_THRESHOLD
   2241 #define INV_NEWTON_THRESHOLD            200
   2242 #endif
   2243 
   2244 #ifndef BINV_NEWTON_THRESHOLD
   2245 #define BINV_NEWTON_THRESHOLD           300
   2246 #endif
   2247 
   2248 #ifndef MU_DIVAPPR_Q_THRESHOLD
   2249 #define MU_DIVAPPR_Q_THRESHOLD         2000
   2250 #endif
   2251 
   2252 #ifndef MU_DIV_QR_THRESHOLD
   2253 #define MU_DIV_QR_THRESHOLD            2000
   2254 #endif
   2255 
   2256 #ifndef MUPI_DIV_QR_THRESHOLD
   2257 #define MUPI_DIV_QR_THRESHOLD           200
   2258 #endif
   2259 
   2260 #ifndef MU_BDIV_Q_THRESHOLD
   2261 #define MU_BDIV_Q_THRESHOLD            2000
   2262 #endif
   2263 
   2264 #ifndef MU_BDIV_QR_THRESHOLD
   2265 #define MU_BDIV_QR_THRESHOLD           2000
   2266 #endif
   2267 
   2268 #ifndef MULMOD_BNM1_THRESHOLD
   2269 #define MULMOD_BNM1_THRESHOLD            16
   2270 #endif
   2271 
   2272 #ifndef SQRMOD_BNM1_THRESHOLD
   2273 #define SQRMOD_BNM1_THRESHOLD            16
   2274 #endif
   2275 
   2276 #ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
   2277 #define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD  (INV_MULMOD_BNM1_THRESHOLD/2)
   2278 #endif
   2279 
   2280 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
   2281 
   2282 #ifndef REDC_1_TO_REDC_2_THRESHOLD
   2283 #define REDC_1_TO_REDC_2_THRESHOLD       15
   2284 #endif
   2285 #ifndef REDC_2_TO_REDC_N_THRESHOLD
   2286 #define REDC_2_TO_REDC_N_THRESHOLD      100
   2287 #endif
   2288 
   2289 #else
   2290 
   2291 #ifndef REDC_1_TO_REDC_N_THRESHOLD
   2292 #define REDC_1_TO_REDC_N_THRESHOLD      100
   2293 #endif
   2294 
   2295 #endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
   2296 
   2297 
   2298 /* First k to use for an FFT modF multiply.  A modF FFT is an order
   2299    log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
   2300    whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
   2301 #define FFT_FIRST_K  4
   2302 
   2303 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
   2304 #ifndef MUL_FFT_MODF_THRESHOLD
   2305 #define MUL_FFT_MODF_THRESHOLD   (MUL_TOOM33_THRESHOLD * 3)
   2306 #endif
   2307 #ifndef SQR_FFT_MODF_THRESHOLD
   2308 #define SQR_FFT_MODF_THRESHOLD   (SQR_TOOM3_THRESHOLD * 3)
   2309 #endif
   2310 
   2311 /* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
   2312    will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
   2313    NxN->2N multiply and not recursing into itself is an order
   2314    log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
   2315    is the first better than toom3.  */
   2316 #ifndef MUL_FFT_THRESHOLD
   2317 #define MUL_FFT_THRESHOLD   (MUL_FFT_MODF_THRESHOLD * 10)
   2318 #endif
   2319 #ifndef SQR_FFT_THRESHOLD
   2320 #define SQR_FFT_THRESHOLD   (SQR_FFT_MODF_THRESHOLD * 10)
   2321 #endif
   2322 
   2323 /* Table of thresholds for successive modF FFT "k"s.  The first entry is
   2324    where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
   2325    etc.  See mpn_fft_best_k(). */
   2326 #ifndef MUL_FFT_TABLE
   2327 #define MUL_FFT_TABLE							\
   2328   { MUL_TOOM33_THRESHOLD * 4,   /* k=5 */				\
   2329     MUL_TOOM33_THRESHOLD * 8,   /* k=6 */				\
   2330     MUL_TOOM33_THRESHOLD * 16,  /* k=7 */				\
   2331     MUL_TOOM33_THRESHOLD * 32,  /* k=8 */				\
   2332     MUL_TOOM33_THRESHOLD * 96,  /* k=9 */				\
   2333     MUL_TOOM33_THRESHOLD * 288, /* k=10 */				\
   2334     0 }
   2335 #endif
   2336 #ifndef SQR_FFT_TABLE
   2337 #define SQR_FFT_TABLE							\
   2338   { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */				\
   2339     SQR_TOOM3_THRESHOLD * 8,   /* k=6 */				\
   2340     SQR_TOOM3_THRESHOLD * 16,  /* k=7 */				\
   2341     SQR_TOOM3_THRESHOLD * 32,  /* k=8 */				\
   2342     SQR_TOOM3_THRESHOLD * 96,  /* k=9 */				\
   2343     SQR_TOOM3_THRESHOLD * 288, /* k=10 */				\
   2344     0 }
   2345 #endif
   2346 
   2347 struct fft_table_nk
   2348 {
   2349   gmp_uint_least32_t n:27;
   2350   gmp_uint_least32_t k:5;
   2351 };
   2352 
   2353 #ifndef FFT_TABLE_ATTRS
   2354 #define FFT_TABLE_ATTRS   static const
   2355 #endif
   2356 
   2357 #define MPN_FFT_TABLE_SIZE  16
   2358 
   2359 
   2360 #ifndef DC_DIV_QR_THRESHOLD
   2361 #define DC_DIV_QR_THRESHOLD    (3 * MUL_TOOM22_THRESHOLD)
   2362 #endif
   2363 
   2364 #ifndef GET_STR_DC_THRESHOLD
   2365 #define GET_STR_DC_THRESHOLD             18
   2366 #endif
   2367 
   2368 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
   2369 #define GET_STR_PRECOMPUTE_THRESHOLD     35
   2370 #endif
   2371 
   2372 #ifndef SET_STR_DC_THRESHOLD
   2373 #define SET_STR_DC_THRESHOLD            750
   2374 #endif
   2375 
   2376 #ifndef SET_STR_PRECOMPUTE_THRESHOLD
   2377 #define SET_STR_PRECOMPUTE_THRESHOLD   2000
   2378 #endif
   2379 
   2380 #ifndef FAC_ODD_THRESHOLD
   2381 #define FAC_ODD_THRESHOLD    35
   2382 #endif
   2383 
   2384 #ifndef FAC_DSC_THRESHOLD
   2385 #define FAC_DSC_THRESHOLD   400
   2386 #endif
   2387 
   2388 /* Return non-zero if xp,xsize and yp,ysize overlap.
   2389    If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
   2390    overlap.  If both these are false, there's an overlap. */
   2391 #define MPN_OVERLAP_P(xp, xsize, yp, ysize)				\
   2392   ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
   2393 #define MEM_OVERLAP_P(xp, xsize, yp, ysize)				\
   2394   (   (char *) (xp) + (xsize) > (char *) (yp)				\
   2395    && (char *) (yp) + (ysize) > (char *) (xp))
   2396 
   2397 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
   2398    overlapping.  Return zero if they're partially overlapping. */
   2399 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size)				\
   2400   MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
   2401 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)			\
   2402   ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
   2403 
   2404 /* Return non-zero if dst,dsize and src,ssize are either identical or
   2405    overlapping in a way suitable for an incrementing/decrementing algorithm.
   2406    Return zero if they're partially overlapping in an unsuitable fashion. */
   2407 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)			\
   2408   ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
   2409 #define MPN_SAME_OR_INCR_P(dst, src, size)				\
   2410   MPN_SAME_OR_INCR2_P(dst, size, src, size)
   2411 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)			\
   2412   ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
   2413 #define MPN_SAME_OR_DECR_P(dst, src, size)				\
   2414   MPN_SAME_OR_DECR2_P(dst, size, src, size)
   2415 
   2416 
   2417 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
   2418    ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
   2419    does it always.  Generally assertions are meant for development, but
   2420    might help when looking for a problem later too.  */
   2421 
   2422 #ifdef __LINE__
   2423 #define ASSERT_LINE  __LINE__
   2424 #else
   2425 #define ASSERT_LINE  -1
   2426 #endif
   2427 
   2428 #ifdef __FILE__
   2429 #define ASSERT_FILE  __FILE__
   2430 #else
   2431 #define ASSERT_FILE  ""
   2432 #endif
   2433 
   2434 __GMP_DECLSPEC void __gmp_assert_header (const char *, int);
   2435 __GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN;
   2436 
   2437 #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
   2438 
   2439 #define ASSERT_ALWAYS(expr)						\
   2440   do {									\
   2441     if (UNLIKELY (!(expr)))						\
   2442       ASSERT_FAIL (expr);						\
   2443   } while (0)
   2444 
   2445 #if WANT_ASSERT
   2446 #define ASSERT(expr)   ASSERT_ALWAYS (expr)
   2447 #else
   2448 #define ASSERT(expr)   do {} while (0)
   2449 #endif
   2450 
   2451 
   2452 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
   2453    that it's zero.  In both cases if assertion checking is disabled the
   2454    expression is still evaluated.  These macros are meant for use with
   2455    routines like mpn_add_n() where the return value represents a carry or
   2456    whatever that should or shouldn't occur in some context.  For example,
   2457    ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
   2458 #if WANT_ASSERT
   2459 #define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)
   2460 #define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
   2461 #else
   2462 #define ASSERT_CARRY(expr)     (expr)
   2463 #define ASSERT_NOCARRY(expr)   (expr)
   2464 #endif
   2465 
   2466 
   2467 /* ASSERT_CODE includes code when assertion checking is wanted.  This is the
   2468    same as writing "#if WANT_ASSERT", but more compact.  */
   2469 #if WANT_ASSERT
   2470 #define ASSERT_CODE(expr)  expr
   2471 #else
   2472 #define ASSERT_CODE(expr)
   2473 #endif
   2474 
   2475 
   2476 /* Test that an mpq_t is in fully canonical form.  This can be used as
   2477    protection on routines like mpq_equal which give wrong results on
   2478    non-canonical inputs.  */
   2479 #if WANT_ASSERT
   2480 #define ASSERT_MPQ_CANONICAL(q)						\
   2481   do {									\
   2482     ASSERT (q->_mp_den._mp_size > 0);					\
   2483     if (q->_mp_num._mp_size == 0)					\
   2484       {									\
   2485 	/* zero should be 0/1 */					\
   2486 	ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);			\
   2487       }									\
   2488     else								\
   2489       {									\
   2490 	/* no common factors */						\
   2491 	mpz_t  __g;							\
   2492 	mpz_init (__g);							\
   2493 	mpz_gcd (__g, mpq_numref(q), mpq_denref(q));			\
   2494 	ASSERT (mpz_cmp_ui (__g, 1) == 0);				\
   2495 	mpz_clear (__g);						\
   2496       }									\
   2497   } while (0)
   2498 #else
   2499 #define ASSERT_MPQ_CANONICAL(q)	 do {} while (0)
   2500 #endif
   2501 
   2502 /* Check that the nail parts are zero. */
   2503 #define ASSERT_ALWAYS_LIMB(limb)					\
   2504   do {									\
   2505     mp_limb_t  __nail = (limb) & GMP_NAIL_MASK;				\
   2506     ASSERT_ALWAYS (__nail == 0);					\
   2507   } while (0)
   2508 #define ASSERT_ALWAYS_MPN(ptr, size)					\
   2509   do {									\
   2510     /* let whole loop go dead when no nails */				\
   2511     if (GMP_NAIL_BITS != 0)						\
   2512       {									\
   2513 	mp_size_t  __i;							\
   2514 	for (__i = 0; __i < (size); __i++)				\
   2515 	  ASSERT_ALWAYS_LIMB ((ptr)[__i]);				\
   2516       }									\
   2517   } while (0)
   2518 #if WANT_ASSERT
   2519 #define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
   2520 #define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)
   2521 #else
   2522 #define ASSERT_LIMB(limb)       do {} while (0)
   2523 #define ASSERT_MPN(ptr, size)   do {} while (0)
   2524 #endif
   2525 
   2526 
   2527 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
   2528    size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
   2529 #if WANT_ASSERT
   2530 #define ASSERT_MPN_ZERO_P(ptr,size)					\
   2531   do {									\
   2532     mp_size_t  __i;							\
   2533     ASSERT ((size) >= 0);						\
   2534     for (__i = 0; __i < (size); __i++)					\
   2535       ASSERT ((ptr)[__i] == 0);						\
   2536   } while (0)
   2537 #define ASSERT_MPN_NONZERO_P(ptr,size)					\
   2538   do {									\
   2539     mp_size_t  __i;							\
   2540     int	       __nonzero = 0;						\
   2541     ASSERT ((size) >= 0);						\
   2542     for (__i = 0; __i < (size); __i++)					\
   2543       if ((ptr)[__i] != 0)						\
   2544 	{								\
   2545 	  __nonzero = 1;						\
   2546 	  break;							\
   2547 	}								\
   2548     ASSERT (__nonzero);							\
   2549   } while (0)
   2550 #else
   2551 #define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
   2552 #define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)
   2553 #endif
   2554 
   2555 
   2556 #if ! HAVE_NATIVE_mpn_com
   2557 #undef mpn_com
   2558 #define mpn_com(d,s,n)							\
   2559   do {									\
   2560     mp_ptr     __d = (d);						\
   2561     mp_srcptr  __s = (s);						\
   2562     mp_size_t  __n = (n);						\
   2563     ASSERT (__n >= 1);							\
   2564     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));			\
   2565     do									\
   2566       *__d++ = (~ *__s++) & GMP_NUMB_MASK;				\
   2567     while (--__n);							\
   2568   } while (0)
   2569 #endif
   2570 
   2571 #define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation)			\
   2572   do {									\
   2573     mp_srcptr	__up = (up);						\
   2574     mp_srcptr	__vp = (vp);						\
   2575     mp_ptr	__rp = (rp);						\
   2576     mp_size_t	__n = (n);						\
   2577     mp_limb_t __a, __b;							\
   2578     ASSERT (__n > 0);							\
   2579     ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n));			\
   2580     ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n));			\
   2581     __up += __n;							\
   2582     __vp += __n;							\
   2583     __rp += __n;							\
   2584     __n = -__n;								\
   2585     do {								\
   2586       __a = __up[__n];							\
   2587       __b = __vp[__n];							\
   2588       __rp[__n] = operation;						\
   2589     } while (++__n);							\
   2590   } while (0)
   2591 
   2592 
   2593 #if ! HAVE_NATIVE_mpn_and_n
   2594 #undef mpn_and_n
   2595 #define mpn_and_n(rp, up, vp, n) \
   2596   MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
   2597 #endif
   2598 
   2599 #if ! HAVE_NATIVE_mpn_andn_n
   2600 #undef mpn_andn_n
   2601 #define mpn_andn_n(rp, up, vp, n) \
   2602   MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
   2603 #endif
   2604 
   2605 #if ! HAVE_NATIVE_mpn_nand_n
   2606 #undef mpn_nand_n
   2607 #define mpn_nand_n(rp, up, vp, n) \
   2608   MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
   2609 #endif
   2610 
   2611 #if ! HAVE_NATIVE_mpn_ior_n
   2612 #undef mpn_ior_n
   2613 #define mpn_ior_n(rp, up, vp, n) \
   2614   MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
   2615 #endif
   2616 
   2617 #if ! HAVE_NATIVE_mpn_iorn_n
   2618 #undef mpn_iorn_n
   2619 #define mpn_iorn_n(rp, up, vp, n) \
   2620   MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
   2621 #endif
   2622 
   2623 #if ! HAVE_NATIVE_mpn_nior_n
   2624 #undef mpn_nior_n
   2625 #define mpn_nior_n(rp, up, vp, n) \
   2626   MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
   2627 #endif
   2628 
   2629 #if ! HAVE_NATIVE_mpn_xor_n
   2630 #undef mpn_xor_n
   2631 #define mpn_xor_n(rp, up, vp, n) \
   2632   MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
   2633 #endif
   2634 
   2635 #if ! HAVE_NATIVE_mpn_xnor_n
   2636 #undef mpn_xnor_n
   2637 #define mpn_xnor_n(rp, up, vp, n) \
   2638   MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
   2639 #endif
   2640 
   2641 #define mpn_trialdiv __MPN(trialdiv)
   2642 __GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *);
   2643 
   2644 #define mpn_remove __MPN(remove)
   2645 __GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_bitcnt_t);
   2646 
   2647 
   2648 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
   2649 #if GMP_NAIL_BITS == 0
   2650 #define ADDC_LIMB(cout, w, x, y)					\
   2651   do {									\
   2652     mp_limb_t  __x = (x);						\
   2653     mp_limb_t  __y = (y);						\
   2654     mp_limb_t  __w = __x + __y;						\
   2655     (w) = __w;								\
   2656     (cout) = __w < __x;							\
   2657   } while (0)
   2658 #else
   2659 #define ADDC_LIMB(cout, w, x, y)					\
   2660   do {									\
   2661     mp_limb_t  __w;							\
   2662     ASSERT_LIMB (x);							\
   2663     ASSERT_LIMB (y);							\
   2664     __w = (x) + (y);							\
   2665     (w) = __w & GMP_NUMB_MASK;						\
   2666     (cout) = __w >> GMP_NUMB_BITS;					\
   2667   } while (0)
   2668 #endif
   2669 
   2670 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
   2671    subtract.  */
   2672 #if GMP_NAIL_BITS == 0
   2673 #define SUBC_LIMB(cout, w, x, y)					\
   2674   do {									\
   2675     mp_limb_t  __x = (x);						\
   2676     mp_limb_t  __y = (y);						\
   2677     mp_limb_t  __w = __x - __y;						\
   2678     (w) = __w;								\
   2679     (cout) = __w > __x;							\
   2680   } while (0)
   2681 #else
   2682 #define SUBC_LIMB(cout, w, x, y)					\
   2683   do {									\
   2684     mp_limb_t  __w = (x) - (y);						\
   2685     (w) = __w & GMP_NUMB_MASK;						\
   2686     (cout) = __w >> (GMP_LIMB_BITS-1);					\
   2687   } while (0)
   2688 #endif
   2689 
   2690 
   2691 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
   2692    expecting no carry (or borrow) from that.
   2693 
   2694    The size parameter is only for the benefit of assertion checking.  In a
   2695    normal build it's unused and the carry/borrow is just propagated as far
   2696    as it needs to go.
   2697 
   2698    On random data, usually only one or two limbs of {ptr,size} get updated,
   2699    so there's no need for any sophisticated looping, just something compact
   2700    and sensible.
   2701 
   2702    FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
   2703    declaring their operand sizes, then remove the former.  This is purely
   2704    for the benefit of assertion checking.  */
   2705 
   2706 #if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM)	\
   2707   && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \
   2708   && ! WANT_ASSERT
   2709 /* Better flags handling than the generic C gives on i386, saving a few
   2710    bytes of code and maybe a cycle or two.  */
   2711 
   2712 #define MPN_IORD_U(ptr, incr, aors)					\
   2713   do {									\
   2714     mp_ptr  __ptr_dummy;						\
   2715     if (__builtin_constant_p (incr) && (incr) == 0)			\
   2716       {									\
   2717       }									\
   2718     else if (__builtin_constant_p (incr) && (incr) == 1)		\
   2719       {									\
   2720 	__asm__ __volatile__						\
   2721 	  ("\n" ASM_L(top) ":\n"					\
   2722 	   "\t" aors "\t$1, (%0)\n"					\
   2723 	   "\tlea\t%c2(%0), %0\n"					\
   2724 	   "\tjc\t" ASM_L(top)						\
   2725 	   : "=r" (__ptr_dummy)						\
   2726 	   : "0"  (ptr), "n" (sizeof(mp_limb_t))			\
   2727 	   : "memory");							\
   2728       }									\
   2729     else								\
   2730       {									\
   2731 	__asm__ __volatile__						\
   2732 	  (   aors  "\t%2, (%0)\n"					\
   2733 	   "\tjnc\t" ASM_L(done) "\n"					\
   2734 	   ASM_L(top) ":\n"						\
   2735 	   "\t" aors "\t$1, %c3(%0)\n"					\
   2736 	   "\tlea\t%c3(%0), %0\n"					\
   2737 	   "\tjc\t" ASM_L(top) "\n"					\
   2738 	   ASM_L(done) ":\n"						\
   2739 	   : "=r" (__ptr_dummy)						\
   2740 	   : "0"  (ptr),						\
   2741 	     "re" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t))		\
   2742 	   : "memory");							\
   2743       }									\
   2744   } while (0)
   2745 
   2746 #if GMP_LIMB_BITS == 32
   2747 #define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
   2748 #define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
   2749 #endif
   2750 #if GMP_LIMB_BITS == 64
   2751 #define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addq")
   2752 #define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subq")
   2753 #endif
   2754 #define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
   2755 #define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
   2756 #endif
   2757 
   2758 #if GMP_NAIL_BITS == 0
   2759 #ifndef mpn_incr_u
   2760 #define mpn_incr_u(p,incr)						\
   2761   do {									\
   2762     mp_limb_t __x;							\
   2763     mp_ptr __p = (p);							\
   2764     if (__builtin_constant_p (incr) && (incr) == 1)			\
   2765       {									\
   2766 	while (++(*(__p++)) == 0)					\
   2767 	  ;								\
   2768       }									\
   2769     else								\
   2770       {									\
   2771 	__x = *__p + (incr);						\
   2772 	*__p = __x;							\
   2773 	if (__x < (incr))						\
   2774 	  while (++(*(++__p)) == 0)					\
   2775 	    ;								\
   2776       }									\
   2777   } while (0)
   2778 #endif
   2779 #ifndef mpn_decr_u
   2780 #define mpn_decr_u(p,incr)						\
   2781   do {									\
   2782     mp_limb_t __x;							\
   2783     mp_ptr __p = (p);							\
   2784     if (__builtin_constant_p (incr) && (incr) == 1)			\
   2785       {									\
   2786 	while ((*(__p++))-- == 0)					\
   2787 	  ;								\
   2788       }									\
   2789     else								\
   2790       {									\
   2791 	__x = *__p;							\
   2792 	*__p = __x - (incr);						\
   2793 	if (__x < (incr))						\
   2794 	  while ((*(++__p))-- == 0)					\
   2795 	    ;								\
   2796       }									\
   2797   } while (0)
   2798 #endif
   2799 #endif
   2800 
   2801 #if GMP_NAIL_BITS >= 1
   2802 #ifndef mpn_incr_u
   2803 #define mpn_incr_u(p,incr)						\
   2804   do {									\
   2805     mp_limb_t __x;							\
   2806     mp_ptr __p = (p);							\
   2807     if (__builtin_constant_p (incr) && (incr) == 1)			\
   2808       {									\
   2809 	do								\
   2810 	  {								\
   2811 	    __x = (*__p + 1) & GMP_NUMB_MASK;				\
   2812 	    *__p++ = __x;						\
   2813 	  }								\
   2814 	while (__x == 0);						\
   2815       }									\
   2816     else								\
   2817       {									\
   2818 	__x = (*__p + (incr));						\
   2819 	*__p++ = __x & GMP_NUMB_MASK;					\
   2820 	if (__x >> GMP_NUMB_BITS != 0)					\
   2821 	  {								\
   2822 	    do								\
   2823 	      {								\
   2824 		__x = (*__p + 1) & GMP_NUMB_MASK;			\
   2825 		*__p++ = __x;						\
   2826 	      }								\
   2827 	    while (__x == 0);						\
   2828 	  }								\
   2829       }									\
   2830   } while (0)
   2831 #endif
   2832 #ifndef mpn_decr_u
   2833 #define mpn_decr_u(p,incr)						\
   2834   do {									\
   2835     mp_limb_t __x;							\
   2836     mp_ptr __p = (p);							\
   2837     if (__builtin_constant_p (incr) && (incr) == 1)			\
   2838       {									\
   2839 	do								\
   2840 	  {								\
   2841 	    __x = *__p;							\
   2842 	    *__p++ = (__x - 1) & GMP_NUMB_MASK;				\
   2843 	  }								\
   2844 	while (__x == 0);						\
   2845       }									\
   2846     else								\
   2847       {									\
   2848 	__x = *__p - (incr);						\
   2849 	*__p++ = __x & GMP_NUMB_MASK;					\
   2850 	if (__x >> GMP_NUMB_BITS != 0)					\
   2851 	  {								\
   2852 	    do								\
   2853 	      {								\
   2854 		__x = *__p;						\
   2855 		*__p++ = (__x - 1) & GMP_NUMB_MASK;			\
   2856 	      }								\
   2857 	    while (__x == 0);						\
   2858 	  }								\
   2859       }									\
   2860   } while (0)
   2861 #endif
   2862 #endif
   2863 
   2864 #ifndef MPN_INCR_U
   2865 #if WANT_ASSERT
   2866 #define MPN_INCR_U(ptr, size, n)					\
   2867   do {									\
   2868     ASSERT ((size) >= 1);						\
   2869     ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));			\
   2870   } while (0)
   2871 #else
   2872 #define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
   2873 #endif
   2874 #endif
   2875 
   2876 #ifndef MPN_DECR_U
   2877 #if WANT_ASSERT
   2878 #define MPN_DECR_U(ptr, size, n)					\
   2879   do {									\
   2880     ASSERT ((size) >= 1);						\
   2881     ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));			\
   2882   } while (0)
   2883 #else
   2884 #define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
   2885 #endif
   2886 #endif
   2887 
   2888 
   2889 /* Structure for conversion between internal binary format and strings.  */
   2890 struct bases
   2891 {
   2892   /* Number of digits in the conversion base that always fits in an mp_limb_t.
   2893      For example, for base 10 on a machine where an mp_limb_t has 32 bits this
   2894      is 9, since 10**9 is the largest number that fits into an mp_limb_t.  */
   2895   int chars_per_limb;
   2896 
   2897   /* log(2)/log(conversion_base) */
   2898   mp_limb_t logb2;
   2899 
   2900   /* log(conversion_base)/log(2) */
   2901   mp_limb_t log2b;
   2902 
   2903   /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
   2904      factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
   2905      i.e. the number of bits used to represent each digit in the base.  */
   2906   mp_limb_t big_base;
   2907 
   2908   /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
   2909      fixed-point number.  Instead of dividing by big_base an application can
   2910      choose to multiply by big_base_inverted.  */
   2911   mp_limb_t big_base_inverted;
   2912 };
   2913 
   2914 #define   mp_bases __MPN(bases)
   2915 __GMP_DECLSPEC extern const struct bases mp_bases[257];
   2916 
   2917 
   2918 /* Compute the number of digits in base for nbits bits, making sure the result
   2919    is never too small.  The two variants of the macro implement the same
   2920    function; the GT2 variant below works just for bases > 2.  */
   2921 #define DIGITS_IN_BASE_FROM_BITS(res, nbits, b)				\
   2922   do {									\
   2923     mp_limb_t _ph, _dummy;						\
   2924     size_t _nbits = (nbits);						\
   2925     umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits);			\
   2926     _ph += (_dummy + _nbits < _dummy);					\
   2927     res = _ph + 1;							\
   2928   } while (0)
   2929 #define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b)			\
   2930   do {									\
   2931     mp_limb_t _ph, _dummy;						\
   2932     size_t _nbits = (nbits);						\
   2933     umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits);		\
   2934     res = _ph + 1;							\
   2935   } while (0)
   2936 
   2937 /* For power of 2 bases this is exact.  For other bases the result is either
   2938    exact or one too big.
   2939 
   2940    To be exact always it'd be necessary to examine all the limbs of the
   2941    operand, since numbers like 100..000 and 99...999 generally differ only
   2942    in the lowest limb.  It'd be possible to examine just a couple of high
   2943    limbs to increase the probability of being exact, but that doesn't seem
   2944    worth bothering with.  */
   2945 
   2946 #define MPN_SIZEINBASE(result, ptr, size, base)				\
   2947   do {									\
   2948     int	   __lb_base, __cnt;						\
   2949     size_t __totbits;							\
   2950 									\
   2951     ASSERT ((size) >= 0);						\
   2952     ASSERT ((base) >= 2);						\
   2953     ASSERT ((base) < numberof (mp_bases));				\
   2954 									\
   2955     /* Special case for X == 0.  */					\
   2956     if ((size) == 0)							\
   2957       (result) = 1;							\
   2958     else								\
   2959       {									\
   2960 	/* Calculate the total number of significant bits of X.  */	\
   2961 	count_leading_zeros (__cnt, (ptr)[(size)-1]);			\
   2962 	__totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
   2963 									\
   2964 	if (POW2_P (base))						\
   2965 	  {								\
   2966 	    __lb_base = mp_bases[base].big_base;			\
   2967 	    (result) = (__totbits + __lb_base - 1) / __lb_base;		\
   2968 	  }								\
   2969 	else								\
   2970 	  {								\
   2971 	    DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base);	\
   2972 	  }								\
   2973       }									\
   2974   } while (0)
   2975 
   2976 #define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp)			\
   2977   do {										\
   2978     int          __cnt;								\
   2979     mp_bitcnt_t  __totbits;							\
   2980     ASSERT ((size) > 0);							\
   2981     ASSERT ((ptr)[(size)-1] != 0);						\
   2982     count_leading_zeros (__cnt, (ptr)[(size)-1]);				\
   2983     __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);	\
   2984     (result) = (__totbits + (base2exp)-1) / (base2exp);				\
   2985   } while (0)
   2986 
   2987 
   2988 /* bit count to limb count, rounding up */
   2989 #define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
   2990 
   2991 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui.  MPZ_FAKE_UI creates fake
   2992    mpz_t from ui.  The zp argument must have room for LIMBS_PER_ULONG limbs
   2993    in both cases (LIMBS_PER_ULONG is also defined here.) */
   2994 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
   2995 
   2996 #define LIMBS_PER_ULONG 1
   2997 #define MPN_SET_UI(zp, zn, u)						\
   2998   (zp)[0] = (u);							\
   2999   (zn) = ((zp)[0] != 0);
   3000 #define MPZ_FAKE_UI(z, zp, u)						\
   3001   (zp)[0] = (u);							\
   3002   PTR (z) = (zp);							\
   3003   SIZ (z) = ((zp)[0] != 0);						\
   3004   ASSERT_CODE (ALLOC (z) = 1);
   3005 
   3006 #else /* need two limbs per ulong */
   3007 
   3008 #define LIMBS_PER_ULONG 2
   3009 #define MPN_SET_UI(zp, zn, u)						\
   3010   (zp)[0] = (u) & GMP_NUMB_MASK;					\
   3011   (zp)[1] = (u) >> GMP_NUMB_BITS;					\
   3012   (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
   3013 #define MPZ_FAKE_UI(z, zp, u)						\
   3014   (zp)[0] = (u) & GMP_NUMB_MASK;					\
   3015   (zp)[1] = (u) >> GMP_NUMB_BITS;					\
   3016   SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);			\
   3017   PTR (z) = (zp);							\
   3018   ASSERT_CODE (ALLOC (z) = 2);
   3019 
   3020 #endif
   3021 
   3022 
   3023 #if HAVE_HOST_CPU_FAMILY_x86
   3024 #define TARGET_REGISTER_STARVED 1
   3025 #else
   3026 #define TARGET_REGISTER_STARVED 0
   3027 #endif
   3028 
   3029 
   3030 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
   3031    or 0 there into a limb 0xFF..FF or 0 respectively.
   3032 
   3033    On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
   3034    but C99 doesn't guarantee signed right shifts are arithmetic, so we have
   3035    a little compile-time test and a fallback to a "? :" form.  The latter is
   3036    necessary for instance on Cray vector systems.
   3037 
   3038    Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
   3039    to an arithmetic right shift anyway, but it's good to get the desired
   3040    shift on past versions too (in particular since an important use of
   3041    LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */
   3042 
   3043 #define LIMB_HIGHBIT_TO_MASK(n)						\
   3044   (((mp_limb_signed_t) -1 >> 1) < 0					\
   3045    ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)			\
   3046    : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
   3047 
   3048 
   3049 /* Use a library function for invert_limb, if available. */
   3050 #define  mpn_invert_limb __MPN(invert_limb)
   3051 __GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST;
   3052 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
   3053 #define invert_limb(invxl,xl)						\
   3054   do {									\
   3055     (invxl) = mpn_invert_limb (xl);					\
   3056   } while (0)
   3057 #endif
   3058 
   3059 #ifndef invert_limb
   3060 #define invert_limb(invxl,xl)						\
   3061   do {									\
   3062     mp_limb_t _dummy;							\
   3063     ASSERT ((xl) != 0);							\
   3064     udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl);		\
   3065   } while (0)
   3066 #endif
   3067 
   3068 #define invert_pi1(dinv, d1, d0)					\
   3069   do {									\
   3070     mp_limb_t _v, _p, _t1, _t0, _mask;					\
   3071     invert_limb (_v, d1);						\
   3072     _p = (d1) * _v;							\
   3073     _p += (d0);								\
   3074     if (_p < (d0))							\
   3075       {									\
   3076 	_v--;								\
   3077 	_mask = -(mp_limb_t) (_p >= (d1));				\
   3078 	_p -= (d1);							\
   3079 	_v += _mask;							\
   3080 	_p -= _mask & (d1);						\
   3081       }									\
   3082     umul_ppmm (_t1, _t0, d0, _v);					\
   3083     _p += _t1;								\
   3084     if (_p < _t1)							\
   3085       {									\
   3086 	_v--;								\
   3087 	if (UNLIKELY (_p >= (d1)))					\
   3088 	  {								\
   3089 	    if (_p > (d1) || _t0 >= (d0))				\
   3090 	      _v--;							\
   3091 	  }								\
   3092       }									\
   3093     (dinv).inv32 = _v;							\
   3094   } while (0)
   3095 
   3096 
   3097 /* udiv_qrnnd_preinv -- Based on work by Niels Mller and Torbjrn Granlund.
   3098    We write things strangely below, to help gcc.  A more straightforward
   3099    version:
   3100 	_r = (nl) - _qh * (d);
   3101 	_t = _r + (d);
   3102 	if (_r >= _ql)
   3103 	  {
   3104 	    _qh--;
   3105 	    _r = _t;
   3106 	  }
   3107    For one operation shorter critical path, one may want to use this form:
   3108 	_p = _qh * (d)
   3109 	_s = (nl) + (d);
   3110 	_r = (nl) - _p;
   3111 	_t = _s - _p;
   3112 	if (_r >= _ql)
   3113 	  {
   3114 	    _qh--;
   3115 	    _r = _t;
   3116 	  }
   3117 */
   3118 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di)				\
   3119   do {									\
   3120     mp_limb_t _qh, _ql, _r, _mask;					\
   3121     umul_ppmm (_qh, _ql, (nh), (di));					\
   3122     if (__builtin_constant_p (nl) && (nl) == 0)				\
   3123       {									\
   3124 	_qh += (nh) + 1;						\
   3125 	_r = - _qh * (d);						\
   3126 	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
   3127 	_qh += _mask;							\
   3128 	_r += _mask & (d);						\
   3129       }									\
   3130     else								\
   3131       {									\
   3132 	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
   3133 	_r = (nl) - _qh * (d);						\
   3134 	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
   3135 	_qh += _mask;							\
   3136 	_r += _mask & (d);						\
   3137 	if (UNLIKELY (_r >= (d)))					\
   3138 	  {								\
   3139 	    _r -= (d);							\
   3140 	    _qh++;							\
   3141 	  }								\
   3142       }									\
   3143     (r) = _r;								\
   3144     (q) = _qh;								\
   3145   } while (0)
   3146 
   3147 /* Dividing (NH, NL) by D, returning the remainder only. Unlike
   3148    udiv_qrnnd_preinv, works also for the case NH == D, where the
   3149    quotient doesn't quite fit in a single limb. */
   3150 #define udiv_rnnd_preinv(r, nh, nl, d, di)				\
   3151   do {									\
   3152     mp_limb_t _qh, _ql, _r, _mask;					\
   3153     umul_ppmm (_qh, _ql, (nh), (di));					\
   3154     if (__builtin_constant_p (nl) && (nl) == 0)				\
   3155       {									\
   3156 	_r = ~(_qh + (nh)) * (d);					\
   3157 	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
   3158 	_r += _mask & (d);						\
   3159       }									\
   3160     else								\
   3161       {									\
   3162 	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
   3163 	_r = (nl) - _qh * (d);						\
   3164 	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
   3165 	_r += _mask & (d);						\
   3166 	if (UNLIKELY (_r >= (d)))					\
   3167 	  _r -= (d);							\
   3168       }									\
   3169     (r) = _r;								\
   3170   } while (0)
   3171 
   3172 /* Compute quotient the quotient and remainder for n / d. Requires d
   3173    >= B^2 / 2 and n < d B. di is the inverse
   3174 
   3175      floor ((B^3 - 1) / (d0 + d1 B)) - B.
   3176 
   3177    NOTE: Output variables are updated multiple times. Only some inputs
   3178    and outputs may overlap.
   3179 */
   3180 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)		\
   3181   do {									\
   3182     mp_limb_t _q0, _t1, _t0, _mask;					\
   3183     umul_ppmm ((q), _q0, (n2), (dinv));					\
   3184     add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));			\
   3185 									\
   3186     /* Compute the two most significant limbs of n - q'd */		\
   3187     (r1) = (n1) - (d1) * (q);						\
   3188     sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0));			\
   3189     umul_ppmm (_t1, _t0, (d0), (q));					\
   3190     sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);			\
   3191     (q)++;								\
   3192 									\
   3193     /* Conditionally adjust q and the remainders */			\
   3194     _mask = - (mp_limb_t) ((r1) >= _q0);				\
   3195     (q) += _mask;							\
   3196     add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0));	\
   3197     if (UNLIKELY ((r1) >= (d1)))					\
   3198       {									\
   3199 	if ((r1) > (d1) || (r0) >= (d0))				\
   3200 	  {								\
   3201 	    (q)++;							\
   3202 	    sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));		\
   3203 	  }								\
   3204       }									\
   3205   } while (0)
   3206 
   3207 #ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
   3208 #define   mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
   3209 __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
   3210 #endif
   3211 
   3212 
   3213 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
   3214    plain mpn_divrem_1.  The default is yes, since the few CISC chips where
   3215    preinv is not good have defines saying so.  */
   3216 #ifndef USE_PREINV_DIVREM_1
   3217 #define USE_PREINV_DIVREM_1   1
   3218 #endif
   3219 
   3220 #if USE_PREINV_DIVREM_1
   3221 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
   3222   mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
   3223 #else
   3224 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
   3225   mpn_divrem_1 (qp, xsize, ap, size, d)
   3226 #endif
   3227 
   3228 #ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
   3229 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
   3230 #endif
   3231 
   3232 /* This selection may seem backwards.  The reason mpn_mod_1 typically takes
   3233    over for larger sizes is that it uses the mod_1_1 function.  */
   3234 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)		\
   3235   (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD)		\
   3236    ? mpn_preinv_mod_1 (src, size, divisor, inverse)			\
   3237    : mpn_mod_1 (src, size, divisor))
   3238 
   3239 
   3240 #ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
   3241 #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
   3242 __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
   3243 #endif
   3244 
   3245 
   3246 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
   3247    plain mpn_divrem_1.  Likewise BMOD_1_TO_MOD_1_THRESHOLD for
   3248    mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
   3249    modexact are faster at all sizes, so the defaults are 0.  Those CPUs
   3250    where this is not right have a tuned threshold.  */
   3251 #ifndef DIVEXACT_1_THRESHOLD
   3252 #define DIVEXACT_1_THRESHOLD  0
   3253 #endif
   3254 #ifndef BMOD_1_TO_MOD_1_THRESHOLD
   3255 #define BMOD_1_TO_MOD_1_THRESHOLD  10
   3256 #endif
   3257 
   3258 #define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d)				\
   3259   do {									\
   3260     if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD))			\
   3261       ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d));	\
   3262     else								\
   3263       {									\
   3264 	ASSERT (mpn_mod_1 (up, n, d) == 0);				\
   3265 	mpn_divexact_1 (rp, up, n, d);					\
   3266       }									\
   3267   } while (0)
   3268 
   3269 #ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */
   3270 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
   3271 __GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
   3272 #endif
   3273 
   3274 #if HAVE_NATIVE_mpn_modexact_1_odd
   3275 #define   mpn_modexact_1_odd  __MPN(modexact_1_odd)
   3276 __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
   3277 #else
   3278 #define mpn_modexact_1_odd(src,size,divisor) \
   3279   mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
   3280 #endif
   3281 
   3282 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)			\
   3283   (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD)			\
   3284    ? mpn_modexact_1_odd (src, size, divisor)				\
   3285    : mpn_mod_1 (src, size, divisor))
   3286 
   3287 /* binvert_limb() sets inv to the multiplicative inverse of n modulo
   3288    2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
   3289    n must be odd (otherwise such an inverse doesn't exist).
   3290 
   3291    This is not to be confused with invert_limb(), which is completely
   3292    different.
   3293 
   3294    The table lookup gives an inverse with the low 8 bits valid, and each
   3295    multiply step doubles the number of bits.  See Jebelean "An algorithm for
   3296    exact division" end of section 4 (reference in gmp.texi).
   3297 
   3298    Possible enhancement: Could use UHWtype until the last step, if half-size
   3299    multiplies are faster (might help under _LONG_LONG_LIMB).
   3300 
   3301    Alternative: As noted in Granlund and Montgomery "Division by Invariant
   3302    Integers using Multiplication" (reference in gmp.texi), n itself gives a
   3303    3-bit inverse immediately, and could be used instead of a table lookup.
   3304    A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
   3305    bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */
   3306 
   3307 #define binvert_limb_table  __gmp_binvert_limb_table
   3308 __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
   3309 
   3310 #define binvert_limb(inv,n)						\
   3311   do {									\
   3312     mp_limb_t  __n = (n);						\
   3313     mp_limb_t  __inv;							\
   3314     ASSERT ((__n & 1) == 1);						\
   3315 									\
   3316     __inv = binvert_limb_table[(__n/2) & 0x7F]; /*  8 */		\
   3317     if (GMP_NUMB_BITS > 8)   __inv = 2 * __inv - __inv * __inv * __n;	\
   3318     if (GMP_NUMB_BITS > 16)  __inv = 2 * __inv - __inv * __inv * __n;	\
   3319     if (GMP_NUMB_BITS > 32)  __inv = 2 * __inv - __inv * __inv * __n;	\
   3320 									\
   3321     if (GMP_NUMB_BITS > 64)						\
   3322       {									\
   3323 	int  __invbits = 64;						\
   3324 	do {								\
   3325 	  __inv = 2 * __inv - __inv * __inv * __n;			\
   3326 	  __invbits *= 2;						\
   3327 	} while (__invbits < GMP_NUMB_BITS);				\
   3328       }									\
   3329 									\
   3330     ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);			\
   3331     (inv) = __inv & GMP_NUMB_MASK;					\
   3332   } while (0)
   3333 #define modlimb_invert binvert_limb  /* backward compatibility */
   3334 
   3335 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
   3336    Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
   3337    GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
   3338    we need to start from GMP_NUMB_MAX>>1. */
   3339 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
   3340 
   3341 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
   3342    These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
   3343 #define GMP_NUMB_CEIL_MAX_DIV3   (GMP_NUMB_MAX / 3 + 1)
   3344 #define GMP_NUMB_CEIL_2MAX_DIV3  ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
   3345 
   3346 
   3347 /* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.
   3348 
   3349    It's not clear whether this is the best way to do this calculation.
   3350    Anything congruent to -a would be fine for the one limb congruence
   3351    tests.  */
   3352 
   3353 #define NEG_MOD(r, a, d)						\
   3354   do {									\
   3355     ASSERT ((d) != 0);							\
   3356     ASSERT_LIMB (a);							\
   3357     ASSERT_LIMB (d);							\
   3358 									\
   3359     if ((a) <= (d))							\
   3360       {									\
   3361 	/* small a is reasonably likely */				\
   3362 	(r) = (d) - (a);						\
   3363       }									\
   3364     else								\
   3365       {									\
   3366 	unsigned   __twos;						\
   3367 	mp_limb_t  __dnorm;						\
   3368 	count_leading_zeros (__twos, d);				\
   3369 	__twos -= GMP_NAIL_BITS;					\
   3370 	__dnorm = (d) << __twos;					\
   3371 	(r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);		\
   3372       }									\
   3373 									\
   3374     ASSERT_LIMB (r);							\
   3375   } while (0)
   3376 
   3377 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
   3378 #define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)
   3379 
   3380 
   3381 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
   3382    to 0 if there's an even number.  "n" should be an unsigned long and "p"
   3383    an int.  */
   3384 
   3385 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
   3386 #define ULONG_PARITY(p, n)						\
   3387   do {									\
   3388     int __p;								\
   3389     __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n));			\
   3390     (p) = __p & 1;							\
   3391   } while (0)
   3392 #endif
   3393 
   3394 /* Cray intrinsic _popcnt. */
   3395 #ifdef _CRAY
   3396 #define ULONG_PARITY(p, n)      \
   3397   do {                          \
   3398     (p) = _popcnt (n) & 1;      \
   3399   } while (0)
   3400 #endif
   3401 
   3402 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
   3403     && ! defined (NO_ASM) && defined (__ia64)
   3404 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
   3405    to a 64 bit unsigned long long for popcnt */
   3406 #define ULONG_PARITY(p, n)						\
   3407   do {									\
   3408     unsigned long long  __n = (unsigned long) (n);			\
   3409     int  __p;								\
   3410     __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n));		\
   3411     (p) = __p & 1;							\
   3412   } while (0)
   3413 #endif
   3414 
   3415 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
   3416     && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
   3417 #if __GMP_GNUC_PREREQ (3,1)
   3418 #define __GMP_qm "=Qm"
   3419 #define __GMP_q "=Q"
   3420 #else
   3421 #define __GMP_qm "=qm"
   3422 #define __GMP_q "=q"
   3423 #endif
   3424 #define ULONG_PARITY(p, n)						\
   3425   do {									\
   3426     char	   __p;							\
   3427     unsigned long  __n = (n);						\
   3428     __n ^= (__n >> 16);							\
   3429     __asm__ ("xorb %h1, %b1\n\t"					\
   3430 	     "setpo %0"							\
   3431 	 : __GMP_qm (__p), __GMP_q (__n)				\
   3432 	 : "1" (__n));							\
   3433     (p) = __p;								\
   3434   } while (0)
   3435 #endif
   3436 
   3437 #if ! defined (ULONG_PARITY)
   3438 #define ULONG_PARITY(p, n)						\
   3439   do {									\
   3440     unsigned long  __n = (n);						\
   3441     int  __p = 0;							\
   3442     do									\
   3443       {									\
   3444 	__p ^= 0x96696996L >> (__n & 0x1F);				\
   3445 	__n >>= 5;							\
   3446       }									\
   3447     while (__n != 0);							\
   3448 									\
   3449     (p) = __p & 1;							\
   3450   } while (0)
   3451 #endif
   3452 
   3453 
   3454 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair.  gcc (as of
   3455    version 3.1 at least) doesn't seem to know how to generate rlwimi for
   3456    anything other than bit-fields, so use "asm".  */
   3457 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
   3458   && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
   3459 #define BSWAP_LIMB(dst, src)						\
   3460   do {									\
   3461     mp_limb_t  __bswapl_src = (src);					\
   3462     mp_limb_t  __tmp1 = __bswapl_src >> 24;		/* low byte */	\
   3463     mp_limb_t  __tmp2 = __bswapl_src << 24;		/* high byte */	\
   3464     __asm__ ("rlwimi %0, %2, 24, 16, 23"		/* 2nd low */	\
   3465 	 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src));		\
   3466     __asm__ ("rlwimi %0, %2,  8,  8, 15"		/* 3nd high */	\
   3467 	 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src));		\
   3468     (dst) = __tmp1 | __tmp2;				/* whole */	\
   3469   } while (0)
   3470 #endif
   3471 
   3472 /* bswap is available on i486 and up and is fast.  A combination rorw $8 /
   3473    roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
   3474    kernel with xchgb instead of rorw), but this is not done here, because
   3475    i386 means generic x86 and mixing word and dword operations will cause
   3476    partial register stalls on P6 chips.  */
   3477 #if defined (__GNUC__) && ! defined (NO_ASM)            \
   3478   && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386   \
   3479   && GMP_LIMB_BITS == 32
   3480 #define BSWAP_LIMB(dst, src)						\
   3481   do {									\
   3482     __asm__ ("bswap %0" : "=r" (dst) : "0" (src));			\
   3483   } while (0)
   3484 #endif
   3485 
   3486 #if defined (__GNUC__) && ! defined (NO_ASM)            \
   3487   && defined (__amd64__) && GMP_LIMB_BITS == 64
   3488 #define BSWAP_LIMB(dst, src)						\
   3489   do {									\
   3490     __asm__ ("bswap %q0" : "=r" (dst) : "0" (src));			\
   3491   } while (0)
   3492 #endif
   3493 
   3494 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
   3495     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
   3496 #define BSWAP_LIMB(dst, src)						\
   3497   do {									\
   3498     __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) :  "r" (src));		\
   3499   } while (0)
   3500 #endif
   3501 
   3502 /* As per glibc. */
   3503 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
   3504   && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
   3505 #define BSWAP_LIMB(dst, src)						\
   3506   do {									\
   3507     mp_limb_t  __bswapl_src = (src);					\
   3508     __asm__ ("ror%.w %#8, %0\n\t"					\
   3509 	     "swap   %0\n\t"						\
   3510 	     "ror%.w %#8, %0"						\
   3511 	     : "=d" (dst)						\
   3512 	     : "0" (__bswapl_src));					\
   3513   } while (0)
   3514 #endif
   3515 
   3516 #if ! defined (BSWAP_LIMB)
   3517 #if GMP_LIMB_BITS == 8
   3518 #define BSWAP_LIMB(dst, src)				\
   3519   do { (dst) = (src); } while (0)
   3520 #endif
   3521 #if GMP_LIMB_BITS == 16
   3522 #define BSWAP_LIMB(dst, src)						\
   3523   do {									\
   3524     (dst) = ((src) << 8) + ((src) >> 8);				\
   3525   } while (0)
   3526 #endif
   3527 #if GMP_LIMB_BITS == 32
   3528 #define BSWAP_LIMB(dst, src)						\
   3529   do {									\
   3530     (dst) =								\
   3531       ((src) << 24)							\
   3532       + (((src) & 0xFF00) << 8)						\
   3533       + (((src) >> 8) & 0xFF00)						\
   3534       + ((src) >> 24);							\
   3535   } while (0)
   3536 #endif
   3537 #if GMP_LIMB_BITS == 64
   3538 #define BSWAP_LIMB(dst, src)						\
   3539   do {									\
   3540     (dst) =								\
   3541       ((src) << 56)							\
   3542       + (((src) & 0xFF00) << 40)					\
   3543       + (((src) & 0xFF0000) << 24)					\
   3544       + (((src) & 0xFF000000) << 8)					\
   3545       + (((src) >> 8) & 0xFF000000)					\
   3546       + (((src) >> 24) & 0xFF0000)					\
   3547       + (((src) >> 40) & 0xFF00)					\
   3548       + ((src) >> 56);							\
   3549   } while (0)
   3550 #endif
   3551 #endif
   3552 
   3553 #if ! defined (BSWAP_LIMB)
   3554 #define BSWAP_LIMB(dst, src)						\
   3555   do {									\
   3556     mp_limb_t  __bswapl_src = (src);					\
   3557     mp_limb_t  __dstl = 0;						\
   3558     int	       __i;							\
   3559     for (__i = 0; __i < GMP_LIMB_BYTES; __i++)			\
   3560       {									\
   3561 	__dstl = (__dstl << 8) | (__bswapl_src & 0xFF);			\
   3562 	__bswapl_src >>= 8;						\
   3563       }									\
   3564     (dst) = __dstl;							\
   3565   } while (0)
   3566 #endif
   3567 
   3568 
   3569 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
   3570    those we know are fast.  */
   3571 #if defined (__GNUC__) && ! defined (NO_ASM)				\
   3572   && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN			\
   3573   && (HAVE_HOST_CPU_powerpc604						\
   3574       || HAVE_HOST_CPU_powerpc604e					\
   3575       || HAVE_HOST_CPU_powerpc750					\
   3576       || HAVE_HOST_CPU_powerpc7400)
   3577 #define BSWAP_LIMB_FETCH(limb, src)					\
   3578   do {									\
   3579     mp_srcptr  __blf_src = (src);					\
   3580     mp_limb_t  __limb;							\
   3581     __asm__ ("lwbrx %0, 0, %1"						\
   3582 	     : "=r" (__limb)						\
   3583 	     : "r" (__blf_src),						\
   3584 	       "m" (*__blf_src));					\
   3585     (limb) = __limb;							\
   3586   } while (0)
   3587 #endif
   3588 
   3589 #if ! defined (BSWAP_LIMB_FETCH)
   3590 #define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))
   3591 #endif
   3592 
   3593 
   3594 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
   3595    know are fast.  FIXME: Is this necessary?  */
   3596 #if defined (__GNUC__) && ! defined (NO_ASM)				\
   3597   && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN			\
   3598   && (HAVE_HOST_CPU_powerpc604						\
   3599       || HAVE_HOST_CPU_powerpc604e					\
   3600       || HAVE_HOST_CPU_powerpc750					\
   3601       || HAVE_HOST_CPU_powerpc7400)
   3602 #define BSWAP_LIMB_STORE(dst, limb)					\
   3603   do {									\
   3604     mp_ptr     __dst = (dst);						\
   3605     mp_limb_t  __limb = (limb);						\
   3606     __asm__ ("stwbrx %1, 0, %2"						\
   3607 	     : "=m" (*__dst)						\
   3608 	     : "r" (__limb),						\
   3609 	       "r" (__dst));						\
   3610   } while (0)
   3611 #endif
   3612 
   3613 #if ! defined (BSWAP_LIMB_STORE)
   3614 #define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)
   3615 #endif
   3616 
   3617 
   3618 /* Byte swap limbs from {src,size} and store at {dst,size}. */
   3619 #define MPN_BSWAP(dst, src, size)					\
   3620   do {									\
   3621     mp_ptr     __dst = (dst);						\
   3622     mp_srcptr  __src = (src);						\
   3623     mp_size_t  __size = (size);						\
   3624     mp_size_t  __i;							\
   3625     ASSERT ((size) >= 0);						\
   3626     ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));			\
   3627     CRAY_Pragma ("_CRI ivdep");						\
   3628     for (__i = 0; __i < __size; __i++)					\
   3629       {									\
   3630 	BSWAP_LIMB_FETCH (*__dst, __src);				\
   3631 	__dst++;							\
   3632 	__src++;							\
   3633       }									\
   3634   } while (0)
   3635 
   3636 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
   3637 #define MPN_BSWAP_REVERSE(dst, src, size)				\
   3638   do {									\
   3639     mp_ptr     __dst = (dst);						\
   3640     mp_size_t  __size = (size);						\
   3641     mp_srcptr  __src = (src) + __size - 1;				\
   3642     mp_size_t  __i;							\
   3643     ASSERT ((size) >= 0);						\
   3644     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));			\
   3645     CRAY_Pragma ("_CRI ivdep");						\
   3646     for (__i = 0; __i < __size; __i++)					\
   3647       {									\
   3648 	BSWAP_LIMB_FETCH (*__dst, __src);				\
   3649 	__dst++;							\
   3650 	__src--;							\
   3651       }									\
   3652   } while (0)
   3653 
   3654 
   3655 /* No processor claiming to be SPARC v9 compliant seems to
   3656    implement the POPC instruction.  Disable pattern for now.  */
   3657 #if 0
   3658 #if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
   3659 #define popc_limb(result, input)					\
   3660   do {									\
   3661     DItype __res;							\
   3662     __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input));		\
   3663   } while (0)
   3664 #endif
   3665 #endif
   3666 
   3667 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
   3668 #define popc_limb(result, input)					\
   3669   do {									\
   3670     __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input));		\
   3671   } while (0)
   3672 #endif
   3673 
   3674 /* Cray intrinsic. */
   3675 #ifdef _CRAY
   3676 #define popc_limb(result, input)					\
   3677   do {									\
   3678     (result) = _popcnt (input);						\
   3679   } while (0)
   3680 #endif
   3681 
   3682 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
   3683     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
   3684 #define popc_limb(result, input)					\
   3685   do {									\
   3686     __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input));		\
   3687   } while (0)
   3688 #endif
   3689 
   3690 /* Cool population count of an mp_limb_t.
   3691    You have to figure out how this works, We won't tell you!
   3692 
   3693    The constants could also be expressed as:
   3694      0x55... = [2^N / 3]     = [(2^N-1)/3]
   3695      0x33... = [2^N / 5]     = [(2^N-1)/5]
   3696      0x0f... = [2^N / 17]    = [(2^N-1)/17]
   3697      (N is GMP_LIMB_BITS, [] denotes truncation.) */
   3698 
   3699 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
   3700 #define popc_limb(result, input)					\
   3701   do {									\
   3702     mp_limb_t  __x = (input);						\
   3703     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
   3704     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
   3705     __x = ((__x >> 4) + __x);						\
   3706     (result) = __x & 0x0f;						\
   3707   } while (0)
   3708 #endif
   3709 
   3710 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
   3711 #define popc_limb(result, input)					\
   3712   do {									\
   3713     mp_limb_t  __x = (input);						\
   3714     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
   3715     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
   3716     __x += (__x >> 4);							\
   3717     __x = ((__x >> 8) & MP_LIMB_T_MAX/4369)+(__x & MP_LIMB_T_MAX/4369);	\
   3718     (result) = __x;							\
   3719   } while (0)
   3720 #endif
   3721 
   3722 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
   3723 #define popc_limb(result, input)					\
   3724   do {									\
   3725     mp_limb_t  __x = (input);						\
   3726     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
   3727     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
   3728     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;			\
   3729     __x = ((__x >> 8) + __x);						\
   3730     __x = ((__x >> 16) + __x);						\
   3731     (result) = __x & 0xff;						\
   3732   } while (0)
   3733 #endif
   3734 
   3735 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
   3736 #define popc_limb(result, input)					\
   3737   do {									\
   3738     mp_limb_t  __x = (input);						\
   3739     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
   3740     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
   3741     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;			\
   3742     __x = ((__x >> 8) + __x);						\
   3743     __x = ((__x >> 16) + __x);						\
   3744     __x = ((__x >> 32) + __x);						\
   3745     (result) = __x & 0xff;						\
   3746   } while (0)
   3747 #endif
   3748 
   3749 
   3750 /* Define stuff for longlong.h.  */
   3751 #if HAVE_ATTRIBUTE_MODE
   3752 typedef unsigned int UQItype	__attribute__ ((mode (QI)));
   3753 typedef		 int SItype	__attribute__ ((mode (SI)));
   3754 typedef unsigned int USItype	__attribute__ ((mode (SI)));
   3755 typedef		 int DItype	__attribute__ ((mode (DI)));
   3756 typedef unsigned int UDItype	__attribute__ ((mode (DI)));
   3757 #else
   3758 typedef unsigned char UQItype;
   3759 typedef		 long SItype;
   3760 typedef unsigned long USItype;
   3761 #if HAVE_LONG_LONG
   3762 typedef	long long int DItype;
   3763 typedef unsigned long long int UDItype;
   3764 #else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
   3765 typedef long int DItype;
   3766 typedef unsigned long int UDItype;
   3767 #endif
   3768 #endif
   3769 
   3770 typedef mp_limb_t UWtype;
   3771 typedef unsigned int UHWtype;
   3772 #define W_TYPE_SIZE GMP_LIMB_BITS
   3773 
   3774 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
   3775 
   3776    Bit field packing is "implementation defined" according to C99, which
   3777    leaves us at the compiler's mercy here.  For some systems packing is
   3778    defined in the ABI (eg. x86).  In any case so far it seems universal that
   3779    little endian systems pack from low to high, and big endian from high to
   3780    low within the given type.
   3781 
   3782    Within the fields we rely on the integer endianness being the same as the
   3783    float endianness, this is true everywhere we know of and it'd be a fairly
   3784    strange system that did anything else.  */
   3785 
   3786 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
   3787 #define _GMP_IEEE_FLOATS 1
   3788 union ieee_double_extract
   3789 {
   3790   struct
   3791     {
   3792       gmp_uint_least32_t manh:20;
   3793       gmp_uint_least32_t exp:11;
   3794       gmp_uint_least32_t sig:1;
   3795       gmp_uint_least32_t manl:32;
   3796     } s;
   3797   double d;
   3798 };
   3799 #endif
   3800 
   3801 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
   3802 #define _GMP_IEEE_FLOATS 1
   3803 union ieee_double_extract
   3804 {
   3805   struct
   3806     {
   3807       gmp_uint_least32_t manl:32;
   3808       gmp_uint_least32_t manh:20;
   3809       gmp_uint_least32_t exp:11;
   3810       gmp_uint_least32_t sig:1;
   3811     } s;
   3812   double d;
   3813 };
   3814 #endif
   3815 
   3816 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
   3817 #define _GMP_IEEE_FLOATS 1
   3818 union ieee_double_extract
   3819 {
   3820   struct
   3821     {
   3822       gmp_uint_least32_t sig:1;
   3823       gmp_uint_least32_t exp:11;
   3824       gmp_uint_least32_t manh:20;
   3825       gmp_uint_least32_t manl:32;
   3826     } s;
   3827   double d;
   3828 };
   3829 #endif
   3830 
   3831 #if HAVE_DOUBLE_VAX_D
   3832 union double_extract
   3833 {
   3834   struct
   3835     {
   3836       gmp_uint_least32_t man3:7;	/* highest 7 bits */
   3837       gmp_uint_least32_t exp:8;		/* excess-128 exponent */
   3838       gmp_uint_least32_t sig:1;
   3839       gmp_uint_least32_t man2:16;
   3840       gmp_uint_least32_t man1:16;
   3841       gmp_uint_least32_t man0:16;	/* lowest 16 bits */
   3842     } s;
   3843   double d;
   3844 };
   3845 #endif
   3846 
   3847 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
   3848    that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
   3849 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
   3850 /* Maximum number of limbs it will take to store any `double'.
   3851    We assume doubles have 53 mantissa bits.  */
   3852 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
   3853 
   3854 __GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double);
   3855 
   3856 #define mpn_get_d __gmpn_get_d
   3857 __GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE;
   3858 
   3859 
   3860 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
   3861    a_inf if x is an infinity.  Both are considered unlikely values, for
   3862    branch prediction.  */
   3863 
   3864 #if _GMP_IEEE_FLOATS
   3865 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)				\
   3866   do {									\
   3867     union ieee_double_extract  u;					\
   3868     u.d = (x);								\
   3869     if (UNLIKELY (u.s.exp == 0x7FF))					\
   3870       {									\
   3871 	if (u.s.manl == 0 && u.s.manh == 0)				\
   3872 	  { a_inf; }							\
   3873 	else								\
   3874 	  { a_nan; }							\
   3875       }									\
   3876   } while (0)
   3877 #endif
   3878 
   3879 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
   3880 /* no nans or infs in these formats */
   3881 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
   3882   do { } while (0)
   3883 #endif
   3884 
   3885 #ifndef DOUBLE_NAN_INF_ACTION
   3886 /* Unknown format, try something generic.
   3887    NaN should be "unordered", so x!=x.
   3888    Inf should be bigger than DBL_MAX.  */
   3889 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)				\
   3890   do {									\
   3891     {									\
   3892       if (UNLIKELY ((x) != (x)))					\
   3893 	{ a_nan; }							\
   3894       else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))		\
   3895 	{ a_inf; }							\
   3896     }									\
   3897   } while (0)
   3898 #endif
   3899 
   3900 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
   3901    in the coprocessor, which means a bigger exponent range than normal, and
   3902    depending on the rounding mode, a bigger mantissa than normal.  (See
   3903    "Disappointments" in the gcc manual.)  FORCE_DOUBLE stores and fetches
   3904    "d" through memory to force any rounding and overflows to occur.
   3905 
   3906    On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
   3907    registers, where there's no such extra precision and no need for the
   3908    FORCE_DOUBLE.  We don't bother to detect this since the present uses for
   3909    FORCE_DOUBLE are only in test programs and default generic C code.
   3910 
   3911    Not quite sure that an "automatic volatile" will use memory, but it does
   3912    in gcc.  An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
   3913    apparently matching operands like "0" are only allowed on a register
   3914    output.  gcc 3.4 warns about this, though in fact it and past versions
   3915    seem to put the operand through memory as hoped.  */
   3916 
   3917 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86      \
   3918      || defined (__amd64__))
   3919 #define FORCE_DOUBLE(d) \
   3920   do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
   3921 #else
   3922 #define FORCE_DOUBLE(d)  do { } while (0)
   3923 #endif
   3924 
   3925 
   3926 __GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[];
   3927 
   3928 __GMP_DECLSPEC extern int __gmp_junk;
   3929 __GMP_DECLSPEC extern const int __gmp_0;
   3930 __GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN;
   3931 __GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN;
   3932 __GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN;
   3933 __GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN;
   3934 #define GMP_ERROR(code)   __gmp_exception (code)
   3935 #define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
   3936 #define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
   3937 
   3938 #if defined _LONG_LONG_LIMB
   3939 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
   3940 #else /* not _LONG_LONG_LIMB */
   3941 #define CNST_LIMB(C) ((mp_limb_t) C##L)
   3942 #endif /* _LONG_LONG_LIMB */
   3943 
   3944 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
   3945 #if GMP_NUMB_BITS == 2
   3946 #define PP 0x3					/* 3 */
   3947 #define PP_FIRST_OMITTED 5
   3948 #endif
   3949 #if GMP_NUMB_BITS == 4
   3950 #define PP 0xF					/* 3 x 5 */
   3951 #define PP_FIRST_OMITTED 7
   3952 #endif
   3953 #if GMP_NUMB_BITS == 8
   3954 #define PP 0x69					/* 3 x 5 x 7 */
   3955 #define PP_FIRST_OMITTED 11
   3956 #endif
   3957 #if GMP_NUMB_BITS == 16
   3958 #define PP 0x3AA7				/* 3 x 5 x 7 x 11 x 13 */
   3959 #define PP_FIRST_OMITTED 17
   3960 #endif
   3961 #if GMP_NUMB_BITS == 32
   3962 #define PP 0xC0CFD797L				/* 3 x 5 x 7 x 11 x ... x 29 */
   3963 #define PP_INVERTED 0x53E5645CL
   3964 #define PP_FIRST_OMITTED 31
   3965 #endif
   3966 #if GMP_NUMB_BITS == 64
   3967 #define PP CNST_LIMB(0xE221F97C30E94E1D)	/* 3 x 5 x 7 x 11 x ... x 53 */
   3968 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
   3969 #define PP_FIRST_OMITTED 59
   3970 #endif
   3971 #ifndef PP_FIRST_OMITTED
   3972 #define PP_FIRST_OMITTED 3
   3973 #endif
   3974 
   3975 typedef struct
   3976 {
   3977   mp_limb_t d0, d1;
   3978 } mp_double_limb_t;
   3979 
   3980 #define mpn_gcd_22 __MPN (gcd_22)
   3981 __GMP_DECLSPEC mp_double_limb_t mpn_gcd_22 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
   3982 
   3983 /* BIT1 means a result value in bit 1 (second least significant bit), with a
   3984    zero bit representing +1 and a one bit representing -1.  Bits other than
   3985    bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
   3986    used to ensure the expressions are "int"s even if a and/or b might be
   3987    other types.
   3988 
   3989    JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
   3990    and their speed is important.  Expressions are used rather than
   3991    conditionals to accumulate sign changes, which effectively means XORs
   3992    instead of conditional JUMPs. */
   3993 
   3994 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
   3995 #define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))
   3996 
   3997 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
   3998 #define JACOBI_U0(a)   ((a) == 1)
   3999 
   4000 /* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and
   4001    come up with a better name. */
   4002 
   4003 /* (a/0), with a given by low and size;
   4004    is 1 if a=+/-1, 0 otherwise */
   4005 #define JACOBI_LS0(alow,asize) \
   4006   (((asize) == 1 || (asize) == -1) && (alow) == 1)
   4007 
   4008 /* (a/0), with a an mpz_t;
   4009    fetch of low limb always valid, even if size is zero */
   4010 #define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))
   4011 
   4012 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
   4013 #define JACOBI_0U(b)   ((b) == 1)
   4014 
   4015 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
   4016 #define JACOBI_0S(b)   ((b) == 1 || (b) == -1)
   4017 
   4018 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
   4019 #define JACOBI_0LS(blow,bsize) \
   4020   (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
   4021 
   4022 /* Convert a bit1 to +1 or -1. */
   4023 #define JACOBI_BIT1_TO_PN(result_bit1) \
   4024   (1 - ((int) (result_bit1) & 2))
   4025 
   4026 /* (2/b), with b unsigned and odd;
   4027    is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
   4028    hence obtained from (b>>1)^b */
   4029 #define JACOBI_TWO_U_BIT1(b) \
   4030   ((int) (((b) >> 1) ^ (b)))
   4031 
   4032 /* (2/b)^twos, with b unsigned and odd */
   4033 #define JACOBI_TWOS_U_BIT1(twos, b) \
   4034   ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
   4035 
   4036 /* (2/b)^twos, with b unsigned and odd */
   4037 #define JACOBI_TWOS_U(twos, b) \
   4038   (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
   4039 
   4040 /* (-1/b), with b odd (signed or unsigned);
   4041    is (-1)^((b-1)/2) */
   4042 #define JACOBI_N1B_BIT1(b) \
   4043   ((int) (b))
   4044 
   4045 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
   4046    is (-1/b) if a<0, or +1 if a>=0 */
   4047 #define JACOBI_ASGN_SU_BIT1(a, b) \
   4048   ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
   4049 
   4050 /* (a/b) effect due to sign of b: signed/signed;
   4051    is -1 if a and b both negative, +1 otherwise */
   4052 #define JACOBI_BSGN_SS_BIT1(a, b) \
   4053   ((((a)<0) & ((b)<0)) << 1)
   4054 
   4055 /* (a/b) effect due to sign of b: signed/mpz;
   4056    is -1 if a and b both negative, +1 otherwise */
   4057 #define JACOBI_BSGN_SZ_BIT1(a, b) \
   4058   JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
   4059 
   4060 /* (a/b) effect due to sign of b: mpz/signed;
   4061    is -1 if a and b both negative, +1 otherwise */
   4062 #define JACOBI_BSGN_ZS_BIT1(a, b) \
   4063   JACOBI_BSGN_SZ_BIT1 (b, a)
   4064 
   4065 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
   4066    is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
   4067    both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
   4068    because this is used in a couple of places with only bit 1 of a or b
   4069    valid. */
   4070 #define JACOBI_RECIP_UU_BIT1(a, b) \
   4071   ((int) ((a) & (b)))
   4072 
   4073 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
   4074    decrementing b_size.  b_low should be b_ptr[0] on entry, and will be
   4075    updated for the new b_ptr.  result_bit1 is updated according to the
   4076    factors of 2 stripped, as per (a/2).  */
   4077 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)	\
   4078   do {									\
   4079     ASSERT ((b_size) >= 1);						\
   4080     ASSERT ((b_low) == (b_ptr)[0]);					\
   4081 									\
   4082     while (UNLIKELY ((b_low) == 0))					\
   4083       {									\
   4084 	(b_size)--;							\
   4085 	ASSERT ((b_size) >= 1);						\
   4086 	(b_ptr)++;							\
   4087 	(b_low) = *(b_ptr);						\
   4088 									\
   4089 	ASSERT (((a) & 1) != 0);					\
   4090 	if ((GMP_NUMB_BITS % 2) == 1)					\
   4091 	  (result_bit1) ^= JACOBI_TWO_U_BIT1(a);			\
   4092       }									\
   4093   } while (0)
   4094 
   4095 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
   4096    modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and
   4097    unsigned.  modexact_1_odd effectively calculates -a mod b, and
   4098    result_bit1 is adjusted for the factor of -1.
   4099 
   4100    The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
   4101    sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
   4102    factor to introduce into result_bit1, so for that case use mpn_mod_1
   4103    unconditionally.
   4104 
   4105    FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
   4106    for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,
   4107    or not skip a divide step, or something. */
   4108 
   4109 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
   4110   do {									   \
   4111     mp_srcptr  __a_ptr	= (a_ptr);					   \
   4112     mp_size_t  __a_size = (a_size);					   \
   4113     mp_limb_t  __b	= (b);						   \
   4114 									   \
   4115     ASSERT (__a_size >= 1);						   \
   4116     ASSERT (__b & 1);							   \
   4117 									   \
   4118     if ((GMP_NUMB_BITS % 2) != 0					   \
   4119 	|| ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD))	   \
   4120       {									   \
   4121 	(a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);			   \
   4122       }									   \
   4123     else								   \
   4124       {									   \
   4125 	(result_bit1) ^= JACOBI_N1B_BIT1 (__b);				   \
   4126 	(a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);		   \
   4127       }									   \
   4128   } while (0)
   4129 
   4130 /* State for the Jacobi computation using Lehmer. */
   4131 #define jacobi_table __gmp_jacobi_table
   4132 __GMP_DECLSPEC extern const unsigned char jacobi_table[208];
   4133 
   4134 /* Bit layout for the initial state. b must be odd.
   4135 
   4136       3  2  1 0
   4137    +--+--+--+--+
   4138    |a1|a0|b1| s|
   4139    +--+--+--+--+
   4140 
   4141  */
   4142 static inline unsigned
   4143 mpn_jacobi_init (unsigned a, unsigned b, unsigned s)
   4144 {
   4145   ASSERT (b & 1);
   4146   ASSERT (s <= 1);
   4147   return ((a & 3) << 2) + (b & 2) + s;
   4148 }
   4149 
   4150 static inline int
   4151 mpn_jacobi_finish (unsigned bits)
   4152 {
   4153   /* (a, b) = (1,0) or (0,1) */
   4154   ASSERT ( (bits & 14) == 0);
   4155 
   4156   return 1-2*(bits & 1);
   4157 }
   4158 
   4159 static inline unsigned
   4160 mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)
   4161 {
   4162   /* FIXME: Could halve table size by not including the e bit in the
   4163    * index, and instead xor when updating. Then the lookup would be
   4164    * like
   4165    *
   4166    *   bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];
   4167    */
   4168 
   4169   ASSERT (bits < 26);
   4170   ASSERT (denominator < 2);
   4171   ASSERT (q < 4);
   4172 
   4173   /* For almost all calls, denominator is constant and quite often q
   4174      is constant too. So use addition rather than or, so the compiler
   4175      can put the constant part can into the offset of an indexed
   4176      addressing instruction.
   4177 
   4178      With constant denominator, the below table lookup is compiled to
   4179 
   4180        C Constant q = 1, constant denominator = 1
   4181        movzbl table+5(%eax,8), %eax
   4182 
   4183      or
   4184 
   4185        C q in %edx, constant denominator = 1
   4186        movzbl table+4(%edx,%eax,8), %eax
   4187 
   4188      One could maintain the state preshifted 3 bits, to save a shift
   4189      here, but at least on x86, that's no real saving.
   4190   */
   4191   return bits = jacobi_table[(bits << 3) + (denominator << 2) + q];
   4192 }
   4193 
   4194 /* Matrix multiplication */
   4195 #define   mpn_matrix22_mul __MPN(matrix22_mul)
   4196 __GMP_DECLSPEC void      mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
   4197 #define   mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
   4198 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
   4199 
   4200 #ifndef MATRIX22_STRASSEN_THRESHOLD
   4201 #define MATRIX22_STRASSEN_THRESHOLD 30
   4202 #endif
   4203 
   4204 /* HGCD definitions */
   4205 
   4206 /* Extract one numb, shifting count bits left
   4207     ________  ________
   4208    |___xh___||___xl___|
   4209 	  |____r____|
   4210    >count <
   4211 
   4212    The count includes any nail bits, so it should work fine if count
   4213    is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
   4214    xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
   4215 
   4216    FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
   4217    those calls where the count high bits of xh may be non-zero.
   4218 */
   4219 
   4220 #define MPN_EXTRACT_NUMB(count, xh, xl)				\
   4221   ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) |	\
   4222    ((xl) >> (GMP_LIMB_BITS - (count))))
   4223 
   4224 
   4225 /* The matrix non-negative M = (u, u'; v,v') keeps track of the
   4226    reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
   4227    than a, b. The determinant must always be one, so that M has an
   4228    inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
   4229    bits. */
   4230 struct hgcd_matrix1
   4231 {
   4232   mp_limb_t u[2][2];
   4233 };
   4234 
   4235 #define mpn_hgcd2 __MPN (hgcd2)
   4236 __GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,	struct hgcd_matrix1 *);
   4237 
   4238 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
   4239 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
   4240 
   4241 #define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector)
   4242 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
   4243 
   4244 #define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi)
   4245 __GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *);
   4246 
   4247 struct hgcd_matrix
   4248 {
   4249   mp_size_t alloc;		/* for sanity checking only */
   4250   mp_size_t n;
   4251   mp_ptr p[2][2];
   4252 };
   4253 
   4254 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
   4255 
   4256 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
   4257 __GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr);
   4258 
   4259 #define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q)
   4260 __GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr);
   4261 
   4262 #define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1)
   4263 __GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr);
   4264 
   4265 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
   4266 __GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr);
   4267 
   4268 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
   4269 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
   4270 
   4271 #define mpn_hgcd_step __MPN(hgcd_step)
   4272 __GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
   4273 
   4274 #define mpn_hgcd_reduce __MPN(hgcd_reduce)
   4275 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
   4276 
   4277 #define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch)
   4278 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
   4279 
   4280 #define mpn_hgcd_itch __MPN (hgcd_itch)
   4281 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t) ATTRIBUTE_CONST;
   4282 
   4283 #define mpn_hgcd __MPN (hgcd)
   4284 __GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
   4285 
   4286 #define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
   4287 __GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t) ATTRIBUTE_CONST;
   4288 
   4289 #define mpn_hgcd_appr __MPN (hgcd_appr)
   4290 __GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
   4291 
   4292 #define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
   4293 __GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr);
   4294 
   4295 typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
   4296 
   4297 /* Needs storage for the quotient */
   4298 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
   4299 
   4300 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
   4301 __GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr);
   4302 
   4303 struct gcdext_ctx
   4304 {
   4305   /* Result parameters. */
   4306   mp_ptr gp;
   4307   mp_size_t gn;
   4308   mp_ptr up;
   4309   mp_size_t *usize;
   4310 
   4311   /* Cofactors updated in each step. */
   4312   mp_size_t un;
   4313   mp_ptr u0, u1, tp;
   4314 };
   4315 
   4316 #define mpn_gcdext_hook __MPN (gcdext_hook)
   4317 gcd_subdiv_step_hook mpn_gcdext_hook;
   4318 
   4319 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
   4320 
   4321 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
   4322 __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
   4323 
   4324 /* 4*(an + 1) + 4*(bn + 1) + an */
   4325 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
   4326 
   4327 #ifndef HGCD_THRESHOLD
   4328 #define HGCD_THRESHOLD 400
   4329 #endif
   4330 
   4331 #ifndef HGCD_APPR_THRESHOLD
   4332 #define HGCD_APPR_THRESHOLD 400
   4333 #endif
   4334 
   4335 #ifndef HGCD_REDUCE_THRESHOLD
   4336 #define HGCD_REDUCE_THRESHOLD 1000
   4337 #endif
   4338 
   4339 #ifndef GCD_DC_THRESHOLD
   4340 #define GCD_DC_THRESHOLD 1000
   4341 #endif
   4342 
   4343 #ifndef GCDEXT_DC_THRESHOLD
   4344 #define GCDEXT_DC_THRESHOLD 600
   4345 #endif
   4346 
   4347 /* Definitions for mpn_set_str and mpn_get_str */
   4348 struct powers
   4349 {
   4350   mp_ptr p;			/* actual power value */
   4351   mp_size_t n;			/* # of limbs at p */
   4352   mp_size_t shift;		/* weight of lowest limb, in limb base B */
   4353   size_t digits_in_base;	/* number of corresponding digits */
   4354   int base;
   4355 };
   4356 typedef struct powers powers_t;
   4357 #define mpn_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) /* FIXME: This can perhaps be trimmed */
   4358 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
   4359 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
   4360 
   4361 #define mpn_compute_powtab __MPN(compute_powtab)
   4362 __GMP_DECLSPEC size_t mpn_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
   4363 #define   mpn_dc_set_str __MPN(dc_set_str)
   4364 __GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr);
   4365 #define   mpn_bc_set_str __MPN(bc_set_str)
   4366 __GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
   4367 
   4368 
   4369 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
   4370    limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,
   4371    hence giving back the user's size in bits rounded up.  Notice that
   4372    converting prec->bits->prec gives an unchanged value.  */
   4373 #define __GMPF_BITS_TO_PREC(n)						\
   4374   ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
   4375 #define __GMPF_PREC_TO_BITS(n) \
   4376   ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
   4377 
   4378 __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
   4379 
   4380 /* Compute the number of base-b digits corresponding to nlimbs limbs, rounding
   4381    down.  */
   4382 #define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b)				\
   4383   do {									\
   4384     mp_limb_t _ph, _dummy;						\
   4385     umul_ppmm (_ph, _dummy,						\
   4386 	       mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\
   4387     res = _ph;								\
   4388   } while (0)
   4389 
   4390 /* Compute the number of limbs corresponding to ndigits base-b digits, rounding
   4391    up.  */
   4392 #define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b)			\
   4393   do {									\
   4394     mp_limb_t _ph, _dummy;						\
   4395     umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits));	\
   4396     res = 8 * _ph / GMP_NUMB_BITS + 2;					\
   4397   } while (0)
   4398 
   4399 
   4400 /* Set n to the number of significant digits an mpf of the given _mp_prec
   4401    field, in the given base.  This is a rounded up value, designed to ensure
   4402    there's enough digits to reproduce all the guaranteed part of the value.
   4403 
   4404    There are prec many limbs, but the high might be only "1" so forget it
   4405    and just count prec-1 limbs into chars.  +1 rounds that upwards, and a
   4406    further +1 is because the limbs usually won't fall on digit boundaries.
   4407 
   4408    FIXME: If base is a power of 2 and the bits per digit divides
   4409    GMP_LIMB_BITS then the +2 is unnecessary.  This happens always for
   4410    base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
   4411 
   4412 #define MPF_SIGNIFICANT_DIGITS(n, base, prec)				\
   4413   do {									\
   4414     size_t rawn;							\
   4415     ASSERT (base >= 2 && base < numberof (mp_bases));			\
   4416     DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base);			\
   4417     n = rawn + 2;							\
   4418   } while (0)
   4419 
   4420 
   4421 /* Decimal point string, from the current C locale.  Needs <langinfo.h> for
   4422    nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
   4423    DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
   4424    their respective #if HAVE_FOO_H.
   4425 
   4426    GLIBC recommends nl_langinfo because getting only one facet can be
   4427    faster, apparently. */
   4428 
   4429 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
   4430 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
   4431 #define GMP_DECIMAL_POINT  (nl_langinfo (DECIMAL_POINT))
   4432 #endif
   4433 /* RADIXCHAR is deprecated, still in unix98 or some such. */
   4434 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
   4435 #define GMP_DECIMAL_POINT  (nl_langinfo (RADIXCHAR))
   4436 #endif
   4437 /* localeconv is slower since it returns all locale stuff */
   4438 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
   4439 #define GMP_DECIMAL_POINT  (localeconv()->decimal_point)
   4440 #endif
   4441 #if ! defined (GMP_DECIMAL_POINT)
   4442 #define GMP_DECIMAL_POINT  (".")
   4443 #endif
   4444 
   4445 
   4446 #define DOPRNT_CONV_FIXED        1
   4447 #define DOPRNT_CONV_SCIENTIFIC   2
   4448 #define DOPRNT_CONV_GENERAL      3
   4449 
   4450 #define DOPRNT_JUSTIFY_NONE      0
   4451 #define DOPRNT_JUSTIFY_LEFT      1
   4452 #define DOPRNT_JUSTIFY_RIGHT     2
   4453 #define DOPRNT_JUSTIFY_INTERNAL  3
   4454 
   4455 #define DOPRNT_SHOWBASE_YES      1
   4456 #define DOPRNT_SHOWBASE_NO       2
   4457 #define DOPRNT_SHOWBASE_NONZERO  3
   4458 
   4459 struct doprnt_params_t {
   4460   int         base;          /* negative for upper case */
   4461   int         conv;          /* choices above */
   4462   const char  *expfmt;       /* exponent format */
   4463   int         exptimes4;     /* exponent multiply by 4 */
   4464   char        fill;          /* character */
   4465   int         justify;       /* choices above */
   4466   int         prec;          /* prec field, or -1 for all digits */
   4467   int         showbase;      /* choices above */
   4468   int         showpoint;     /* if radix point always shown */
   4469   int         showtrailing;  /* if trailing zeros wanted */
   4470   char        sign;          /* '+', ' ', or '\0' */
   4471   int         width;         /* width field */
   4472 };
   4473 
   4474 #if _GMP_H_HAVE_VA_LIST
   4475 
   4476 typedef int (*doprnt_format_t) (void *, const char *, va_list);
   4477 typedef int (*doprnt_memory_t) (void *, const char *, size_t);
   4478 typedef int (*doprnt_reps_t)   (void *, int, int);
   4479 typedef int (*doprnt_final_t)  (void *);
   4480 
   4481 struct doprnt_funs_t {
   4482   doprnt_format_t  format;
   4483   doprnt_memory_t  memory;
   4484   doprnt_reps_t    reps;
   4485   doprnt_final_t   final;   /* NULL if not required */
   4486 };
   4487 
   4488 extern const struct doprnt_funs_t  __gmp_fprintf_funs;
   4489 extern const struct doprnt_funs_t  __gmp_sprintf_funs;
   4490 extern const struct doprnt_funs_t  __gmp_snprintf_funs;
   4491 extern const struct doprnt_funs_t  __gmp_obstack_printf_funs;
   4492 extern const struct doprnt_funs_t  __gmp_ostream_funs;
   4493 
   4494 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes.  The first
   4495    "size" of these have been written.  "alloc > size" is maintained, so
   4496    there's room to store a '\0' at the end.  "result" is where the
   4497    application wants the final block pointer.  */
   4498 struct gmp_asprintf_t {
   4499   char    **result;
   4500   char    *buf;
   4501   size_t  size;
   4502   size_t  alloc;
   4503 };
   4504 
   4505 #define GMP_ASPRINTF_T_INIT(d, output)					\
   4506   do {									\
   4507     (d).result = (output);						\
   4508     (d).alloc = 256;							\
   4509     (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);		\
   4510     (d).size = 0;							\
   4511   } while (0)
   4512 
   4513 /* If a realloc is necessary, use twice the size actually required, so as to
   4514    avoid repeated small reallocs.  */
   4515 #define GMP_ASPRINTF_T_NEED(d, n)					\
   4516   do {									\
   4517     size_t  alloc, newsize, newalloc;					\
   4518     ASSERT ((d)->alloc >= (d)->size + 1);				\
   4519 									\
   4520     alloc = (d)->alloc;							\
   4521     newsize = (d)->size + (n);						\
   4522     if (alloc <= newsize)						\
   4523       {									\
   4524 	newalloc = 2*newsize;						\
   4525 	(d)->alloc = newalloc;						\
   4526 	(d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,		\
   4527 					       alloc, newalloc, char);	\
   4528       }									\
   4529   } while (0)
   4530 
   4531 __GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t);
   4532 __GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int);
   4533 __GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *);
   4534 
   4535 /* buf is where to write the next output, and size is how much space is left
   4536    there.  If the application passed size==0 then that's what we'll have
   4537    here, and nothing at all should be written.  */
   4538 struct gmp_snprintf_t {
   4539   char    *buf;
   4540   size_t  size;
   4541 };
   4542 
   4543 /* Add the bytes printed by the call to the total retval, or bail out on an
   4544    error.  */
   4545 #define DOPRNT_ACCUMULATE(call)						\
   4546   do {									\
   4547     int  __ret;								\
   4548     __ret = call;							\
   4549     if (__ret == -1)							\
   4550       goto error;							\
   4551     retval += __ret;							\
   4552   } while (0)
   4553 #define DOPRNT_ACCUMULATE_FUN(fun, params)				\
   4554   do {									\
   4555     ASSERT ((fun) != NULL);						\
   4556     DOPRNT_ACCUMULATE ((*(fun)) params);				\
   4557   } while (0)
   4558 
   4559 #define DOPRNT_FORMAT(fmt, ap)						\
   4560   DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
   4561 #define DOPRNT_MEMORY(ptr, len)						\
   4562   DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
   4563 #define DOPRNT_REPS(c, n)						\
   4564   DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
   4565 
   4566 #define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
   4567 
   4568 #define DOPRNT_REPS_MAYBE(c, n)						\
   4569   do {									\
   4570     if ((n) != 0)							\
   4571       DOPRNT_REPS (c, n);						\
   4572   } while (0)
   4573 #define DOPRNT_MEMORY_MAYBE(ptr, len)					\
   4574   do {									\
   4575     if ((len) != 0)							\
   4576       DOPRNT_MEMORY (ptr, len);						\
   4577   } while (0)
   4578 
   4579 __GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list);
   4580 __GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *);
   4581 
   4582 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
   4583 __GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr);
   4584 
   4585 __GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list);
   4586 #endif /* _GMP_H_HAVE_VA_LIST */
   4587 
   4588 
   4589 typedef int (*gmp_doscan_scan_t)  (void *, const char *, ...);
   4590 typedef void *(*gmp_doscan_step_t) (void *, int);
   4591 typedef int (*gmp_doscan_get_t)   (void *);
   4592 typedef int (*gmp_doscan_unget_t) (int, void *);
   4593 
   4594 struct gmp_doscan_funs_t {
   4595   gmp_doscan_scan_t   scan;
   4596   gmp_doscan_step_t   step;
   4597   gmp_doscan_get_t    get;
   4598   gmp_doscan_unget_t  unget;
   4599 };
   4600 extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
   4601 extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
   4602 
   4603 #if _GMP_H_HAVE_VA_LIST
   4604 __GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list);
   4605 #endif
   4606 
   4607 
   4608 /* For testing and debugging.  */
   4609 #define MPZ_CHECK_FORMAT(z)						\
   4610   do {									\
   4611     ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);		\
   4612     ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));				\
   4613     ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));				\
   4614   } while (0)
   4615 
   4616 #define MPQ_CHECK_FORMAT(q)						\
   4617   do {									\
   4618     MPZ_CHECK_FORMAT (mpq_numref (q));					\
   4619     MPZ_CHECK_FORMAT (mpq_denref (q));					\
   4620     ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);				\
   4621 									\
   4622     if (SIZ(mpq_numref(q)) == 0)					\
   4623       {									\
   4624 	/* should have zero as 0/1 */					\
   4625 	ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1				\
   4626 		       && PTR(mpq_denref(q))[0] == 1);			\
   4627       }									\
   4628     else								\
   4629       {									\
   4630 	/* should have no common factors */				\
   4631 	mpz_t  g;							\
   4632 	mpz_init (g);							\
   4633 	mpz_gcd (g, mpq_numref(q), mpq_denref(q));			\
   4634 	ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);				\
   4635 	mpz_clear (g);							\
   4636       }									\
   4637   } while (0)
   4638 
   4639 #define MPF_CHECK_FORMAT(f)						\
   4640   do {									\
   4641     ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53));			\
   4642     ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);				\
   4643     if (SIZ(f) == 0)							\
   4644       ASSERT_ALWAYS (EXP(f) == 0);					\
   4645     if (SIZ(f) != 0)							\
   4646       ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);			\
   4647   } while (0)
   4648 
   4649 
   4650 /* Enhancement: The "mod" and "gcd_1" functions below could have
   4651    __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
   4652    function pointers, only actual functions.  It probably doesn't make much
   4653    difference to the gmp code, since hopefully we arrange calls so there's
   4654    no great need for the compiler to move things around.  */
   4655 
   4656 #if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
   4657 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
   4658    in mpn/x86/x86-defs.m4 and mpn/x86_64/x86_64-defs.m4.  Be sure to update
   4659    those when changing here.  */
   4660 struct cpuvec_t {
   4661   DECL_add_n           ((*add_n));
   4662   DECL_addlsh1_n       ((*addlsh1_n));
   4663   DECL_addlsh2_n       ((*addlsh2_n));
   4664   DECL_addmul_1        ((*addmul_1));
   4665   DECL_addmul_2        ((*addmul_2));
   4666   DECL_bdiv_dbm1c      ((*bdiv_dbm1c));
   4667   DECL_cnd_add_n       ((*cnd_add_n));
   4668   DECL_cnd_sub_n       ((*cnd_sub_n));
   4669   DECL_com             ((*com));
   4670   DECL_copyd           ((*copyd));
   4671   DECL_copyi           ((*copyi));
   4672   DECL_divexact_1      ((*divexact_1));
   4673   DECL_divrem_1        ((*divrem_1));
   4674   DECL_gcd_11          ((*gcd_11));
   4675   DECL_lshift          ((*lshift));
   4676   DECL_lshiftc         ((*lshiftc));
   4677   DECL_mod_1           ((*mod_1));
   4678   DECL_mod_1_1p        ((*mod_1_1p));
   4679   DECL_mod_1_1p_cps    ((*mod_1_1p_cps));
   4680   DECL_mod_1s_2p       ((*mod_1s_2p));
   4681   DECL_mod_1s_2p_cps   ((*mod_1s_2p_cps));
   4682   DECL_mod_1s_4p       ((*mod_1s_4p));
   4683   DECL_mod_1s_4p_cps   ((*mod_1s_4p_cps));
   4684   DECL_mod_34lsub1     ((*mod_34lsub1));
   4685   DECL_modexact_1c_odd ((*modexact_1c_odd));
   4686   DECL_mul_1           ((*mul_1));
   4687   DECL_mul_basecase    ((*mul_basecase));
   4688   DECL_mullo_basecase  ((*mullo_basecase));
   4689   DECL_preinv_divrem_1 ((*preinv_divrem_1));
   4690   DECL_preinv_mod_1    ((*preinv_mod_1));
   4691   DECL_redc_1          ((*redc_1));
   4692   DECL_redc_2          ((*redc_2));
   4693   DECL_rshift          ((*rshift));
   4694   DECL_sqr_basecase    ((*sqr_basecase));
   4695   DECL_sub_n           ((*sub_n));
   4696   DECL_sublsh1_n       ((*sublsh1_n));
   4697   DECL_submul_1        ((*submul_1));
   4698   mp_size_t            mul_toom22_threshold;
   4699   mp_size_t            mul_toom33_threshold;
   4700   mp_size_t            sqr_toom2_threshold;
   4701   mp_size_t            sqr_toom3_threshold;
   4702   mp_size_t            bmod_1_to_mod_1_threshold;
   4703 };
   4704 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
   4705 __GMP_DECLSPEC extern int __gmpn_cpuvec_initialized;
   4706 #endif /* x86 fat binary */
   4707 
   4708 __GMP_DECLSPEC void __gmpn_cpuvec_init (void);
   4709 
   4710 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
   4711    if that hasn't yet been done (to establish the right values).  */
   4712 #define CPUVEC_THRESHOLD(field)						      \
   4713   ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
   4714    __gmpn_cpuvec.field)
   4715 
   4716 
   4717 #if HAVE_NATIVE_mpn_add_nc
   4718 #define mpn_add_nc __MPN(add_nc)
   4719 __GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   4720 #else
   4721 static inline
   4722 mp_limb_t
   4723 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
   4724 {
   4725   mp_limb_t co;
   4726   co = mpn_add_n (rp, up, vp, n);
   4727   co += mpn_add_1 (rp, rp, n, ci);
   4728   return co;
   4729 }
   4730 #endif
   4731 
   4732 #if HAVE_NATIVE_mpn_sub_nc
   4733 #define mpn_sub_nc __MPN(sub_nc)
   4734 __GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
   4735 #else
   4736 static inline mp_limb_t
   4737 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
   4738 {
   4739   mp_limb_t co;
   4740   co = mpn_sub_n (rp, up, vp, n);
   4741   co += mpn_sub_1 (rp, rp, n, ci);
   4742   return co;
   4743 }
   4744 #endif
   4745 
   4746 #if TUNE_PROGRAM_BUILD
   4747 /* Some extras wanted when recompiling some .c files for use by the tune
   4748    program.  Not part of a normal build.
   4749 
   4750    It's necessary to keep these thresholds as #defines (just to an
   4751    identically named variable), since various defaults are established based
   4752    on #ifdef in the .c files.  For some this is not so (the defaults are
   4753    instead established above), but all are done this way for consistency. */
   4754 
   4755 #undef	MUL_TOOM22_THRESHOLD
   4756 #define MUL_TOOM22_THRESHOLD		mul_toom22_threshold
   4757 extern mp_size_t			mul_toom22_threshold;
   4758 
   4759 #undef	MUL_TOOM33_THRESHOLD
   4760 #define MUL_TOOM33_THRESHOLD		mul_toom33_threshold
   4761 extern mp_size_t			mul_toom33_threshold;
   4762 
   4763 #undef	MUL_TOOM44_THRESHOLD
   4764 #define MUL_TOOM44_THRESHOLD		mul_toom44_threshold
   4765 extern mp_size_t			mul_toom44_threshold;
   4766 
   4767 #undef	MUL_TOOM6H_THRESHOLD
   4768 #define MUL_TOOM6H_THRESHOLD		mul_toom6h_threshold
   4769 extern mp_size_t			mul_toom6h_threshold;
   4770 
   4771 #undef	MUL_TOOM8H_THRESHOLD
   4772 #define MUL_TOOM8H_THRESHOLD		mul_toom8h_threshold
   4773 extern mp_size_t			mul_toom8h_threshold;
   4774 
   4775 #undef	MUL_TOOM32_TO_TOOM43_THRESHOLD
   4776 #define MUL_TOOM32_TO_TOOM43_THRESHOLD	mul_toom32_to_toom43_threshold
   4777 extern mp_size_t			mul_toom32_to_toom43_threshold;
   4778 
   4779 #undef	MUL_TOOM32_TO_TOOM53_THRESHOLD
   4780 #define MUL_TOOM32_TO_TOOM53_THRESHOLD	mul_toom32_to_toom53_threshold
   4781 extern mp_size_t			mul_toom32_to_toom53_threshold;
   4782 
   4783 #undef	MUL_TOOM42_TO_TOOM53_THRESHOLD
   4784 #define MUL_TOOM42_TO_TOOM53_THRESHOLD	mul_toom42_to_toom53_threshold
   4785 extern mp_size_t			mul_toom42_to_toom53_threshold;
   4786 
   4787 #undef	MUL_TOOM42_TO_TOOM63_THRESHOLD
   4788 #define MUL_TOOM42_TO_TOOM63_THRESHOLD	mul_toom42_to_toom63_threshold
   4789 extern mp_size_t			mul_toom42_to_toom63_threshold;
   4790 
   4791 #undef  MUL_TOOM43_TO_TOOM54_THRESHOLD
   4792 #define MUL_TOOM43_TO_TOOM54_THRESHOLD	mul_toom43_to_toom54_threshold;
   4793 extern mp_size_t			mul_toom43_to_toom54_threshold;
   4794 
   4795 #undef	MUL_FFT_THRESHOLD
   4796 #define MUL_FFT_THRESHOLD		mul_fft_threshold
   4797 extern mp_size_t			mul_fft_threshold;
   4798 
   4799 #undef	MUL_FFT_MODF_THRESHOLD
   4800 #define MUL_FFT_MODF_THRESHOLD		mul_fft_modf_threshold
   4801 extern mp_size_t			mul_fft_modf_threshold;
   4802 
   4803 #undef	MUL_FFT_TABLE
   4804 #define MUL_FFT_TABLE			{ 0 }
   4805 
   4806 #undef	MUL_FFT_TABLE3
   4807 #define MUL_FFT_TABLE3			{ {0,0} }
   4808 
   4809 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
   4810    remain as zero (always use it). */
   4811 #if ! HAVE_NATIVE_mpn_sqr_basecase
   4812 #undef	SQR_BASECASE_THRESHOLD
   4813 #define SQR_BASECASE_THRESHOLD		sqr_basecase_threshold
   4814 extern mp_size_t			sqr_basecase_threshold;
   4815 #endif
   4816 
   4817 #if TUNE_PROGRAM_BUILD_SQR
   4818 #undef	SQR_TOOM2_THRESHOLD
   4819 #define SQR_TOOM2_THRESHOLD		SQR_TOOM2_MAX_GENERIC
   4820 #else
   4821 #undef	SQR_TOOM2_THRESHOLD
   4822 #define SQR_TOOM2_THRESHOLD		sqr_toom2_threshold
   4823 extern mp_size_t			sqr_toom2_threshold;
   4824 #endif
   4825 
   4826 #undef	SQR_TOOM3_THRESHOLD
   4827 #define SQR_TOOM3_THRESHOLD		sqr_toom3_threshold
   4828 extern mp_size_t			sqr_toom3_threshold;
   4829 
   4830 #undef	SQR_TOOM4_THRESHOLD
   4831 #define SQR_TOOM4_THRESHOLD		sqr_toom4_threshold
   4832 extern mp_size_t			sqr_toom4_threshold;
   4833 
   4834 #undef	SQR_TOOM6_THRESHOLD
   4835 #define SQR_TOOM6_THRESHOLD		sqr_toom6_threshold
   4836 extern mp_size_t			sqr_toom6_threshold;
   4837 
   4838 #undef	SQR_TOOM8_THRESHOLD
   4839 #define SQR_TOOM8_THRESHOLD		sqr_toom8_threshold
   4840 extern mp_size_t			sqr_toom8_threshold;
   4841 
   4842 #undef  SQR_FFT_THRESHOLD
   4843 #define SQR_FFT_THRESHOLD		sqr_fft_threshold
   4844 extern mp_size_t			sqr_fft_threshold;
   4845 
   4846 #undef  SQR_FFT_MODF_THRESHOLD
   4847 #define SQR_FFT_MODF_THRESHOLD		sqr_fft_modf_threshold
   4848 extern mp_size_t			sqr_fft_modf_threshold;
   4849 
   4850 #undef	SQR_FFT_TABLE
   4851 #define SQR_FFT_TABLE			{ 0 }
   4852 
   4853 #undef	SQR_FFT_TABLE3
   4854 #define SQR_FFT_TABLE3			{ {0,0} }
   4855 
   4856 #undef	MULLO_BASECASE_THRESHOLD
   4857 #define MULLO_BASECASE_THRESHOLD	mullo_basecase_threshold
   4858 extern mp_size_t			mullo_basecase_threshold;
   4859 
   4860 #undef	MULLO_DC_THRESHOLD
   4861 #define MULLO_DC_THRESHOLD		mullo_dc_threshold
   4862 extern mp_size_t			mullo_dc_threshold;
   4863 
   4864 #undef	MULLO_MUL_N_THRESHOLD
   4865 #define MULLO_MUL_N_THRESHOLD		mullo_mul_n_threshold
   4866 extern mp_size_t			mullo_mul_n_threshold;
   4867 
   4868 #undef	SQRLO_BASECASE_THRESHOLD
   4869 #define SQRLO_BASECASE_THRESHOLD	sqrlo_basecase_threshold
   4870 extern mp_size_t			sqrlo_basecase_threshold;
   4871 
   4872 #undef	SQRLO_DC_THRESHOLD
   4873 #define SQRLO_DC_THRESHOLD		sqrlo_dc_threshold
   4874 extern mp_size_t			sqrlo_dc_threshold;
   4875 
   4876 #undef	SQRLO_SQR_THRESHOLD
   4877 #define SQRLO_SQR_THRESHOLD		sqrlo_sqr_threshold
   4878 extern mp_size_t			sqrlo_sqr_threshold;
   4879 
   4880 #undef	MULMID_TOOM42_THRESHOLD
   4881 #define MULMID_TOOM42_THRESHOLD		mulmid_toom42_threshold
   4882 extern mp_size_t			mulmid_toom42_threshold;
   4883 
   4884 #undef	DIV_QR_2_PI2_THRESHOLD
   4885 #define DIV_QR_2_PI2_THRESHOLD		div_qr_2_pi2_threshold
   4886 extern mp_size_t			div_qr_2_pi2_threshold;
   4887 
   4888 #undef	DC_DIV_QR_THRESHOLD
   4889 #define DC_DIV_QR_THRESHOLD		dc_div_qr_threshold
   4890 extern mp_size_t			dc_div_qr_threshold;
   4891 
   4892 #undef	DC_DIVAPPR_Q_THRESHOLD
   4893 #define DC_DIVAPPR_Q_THRESHOLD		dc_divappr_q_threshold
   4894 extern mp_size_t			dc_divappr_q_threshold;
   4895 
   4896 #undef	DC_BDIV_Q_THRESHOLD
   4897 #define DC_BDIV_Q_THRESHOLD		dc_bdiv_q_threshold
   4898 extern mp_size_t			dc_bdiv_q_threshold;
   4899 
   4900 #undef	DC_BDIV_QR_THRESHOLD
   4901 #define DC_BDIV_QR_THRESHOLD		dc_bdiv_qr_threshold
   4902 extern mp_size_t			dc_bdiv_qr_threshold;
   4903 
   4904 #undef	MU_DIV_QR_THRESHOLD
   4905 #define MU_DIV_QR_THRESHOLD		mu_div_qr_threshold
   4906 extern mp_size_t			mu_div_qr_threshold;
   4907 
   4908 #undef	MU_DIVAPPR_Q_THRESHOLD
   4909 #define MU_DIVAPPR_Q_THRESHOLD		mu_divappr_q_threshold
   4910 extern mp_size_t			mu_divappr_q_threshold;
   4911 
   4912 #undef	MUPI_DIV_QR_THRESHOLD
   4913 #define MUPI_DIV_QR_THRESHOLD		mupi_div_qr_threshold
   4914 extern mp_size_t			mupi_div_qr_threshold;
   4915 
   4916 #undef	MU_BDIV_QR_THRESHOLD
   4917 #define MU_BDIV_QR_THRESHOLD		mu_bdiv_qr_threshold
   4918 extern mp_size_t			mu_bdiv_qr_threshold;
   4919 
   4920 #undef	MU_BDIV_Q_THRESHOLD
   4921 #define MU_BDIV_Q_THRESHOLD		mu_bdiv_q_threshold
   4922 extern mp_size_t			mu_bdiv_q_threshold;
   4923 
   4924 #undef	INV_MULMOD_BNM1_THRESHOLD
   4925 #define INV_MULMOD_BNM1_THRESHOLD	inv_mulmod_bnm1_threshold
   4926 extern mp_size_t			inv_mulmod_bnm1_threshold;
   4927 
   4928 #undef	INV_NEWTON_THRESHOLD
   4929 #define INV_NEWTON_THRESHOLD		inv_newton_threshold
   4930 extern mp_size_t			inv_newton_threshold;
   4931 
   4932 #undef	INV_APPR_THRESHOLD
   4933 #define INV_APPR_THRESHOLD		inv_appr_threshold
   4934 extern mp_size_t			inv_appr_threshold;
   4935 
   4936 #undef	BINV_NEWTON_THRESHOLD
   4937 #define BINV_NEWTON_THRESHOLD		binv_newton_threshold
   4938 extern mp_size_t			binv_newton_threshold;
   4939 
   4940 #undef	REDC_1_TO_REDC_2_THRESHOLD
   4941 #define REDC_1_TO_REDC_2_THRESHOLD	redc_1_to_redc_2_threshold
   4942 extern mp_size_t			redc_1_to_redc_2_threshold;
   4943 
   4944 #undef	REDC_2_TO_REDC_N_THRESHOLD
   4945 #define REDC_2_TO_REDC_N_THRESHOLD	redc_2_to_redc_n_threshold
   4946 extern mp_size_t			redc_2_to_redc_n_threshold;
   4947 
   4948 #undef	REDC_1_TO_REDC_N_THRESHOLD
   4949 #define REDC_1_TO_REDC_N_THRESHOLD	redc_1_to_redc_n_threshold
   4950 extern mp_size_t			redc_1_to_redc_n_threshold;
   4951 
   4952 #undef	MATRIX22_STRASSEN_THRESHOLD
   4953 #define MATRIX22_STRASSEN_THRESHOLD	matrix22_strassen_threshold
   4954 extern mp_size_t			matrix22_strassen_threshold;
   4955 
   4956 typedef int hgcd2_func_t (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,
   4957 			  struct hgcd_matrix1 *);
   4958 extern hgcd2_func_t *hgcd2_func;
   4959 
   4960 #undef	HGCD_THRESHOLD
   4961 #define HGCD_THRESHOLD			hgcd_threshold
   4962 extern mp_size_t			hgcd_threshold;
   4963 
   4964 #undef	HGCD_APPR_THRESHOLD
   4965 #define HGCD_APPR_THRESHOLD		hgcd_appr_threshold
   4966 extern mp_size_t			hgcd_appr_threshold;
   4967 
   4968 #undef	HGCD_REDUCE_THRESHOLD
   4969 #define HGCD_REDUCE_THRESHOLD		hgcd_reduce_threshold
   4970 extern mp_size_t			hgcd_reduce_threshold;
   4971 
   4972 #undef	GCD_DC_THRESHOLD
   4973 #define GCD_DC_THRESHOLD		gcd_dc_threshold
   4974 extern mp_size_t			gcd_dc_threshold;
   4975 
   4976 #undef  GCDEXT_DC_THRESHOLD
   4977 #define GCDEXT_DC_THRESHOLD		gcdext_dc_threshold
   4978 extern mp_size_t			gcdext_dc_threshold;
   4979 
   4980 #undef  DIV_QR_1N_PI1_METHOD
   4981 #define DIV_QR_1N_PI1_METHOD		div_qr_1n_pi1_method
   4982 extern int				div_qr_1n_pi1_method;
   4983 
   4984 #undef  DIV_QR_1_NORM_THRESHOLD
   4985 #define DIV_QR_1_NORM_THRESHOLD		div_qr_1_norm_threshold
   4986 extern mp_size_t			div_qr_1_norm_threshold;
   4987 
   4988 #undef  DIV_QR_1_UNNORM_THRESHOLD
   4989 #define DIV_QR_1_UNNORM_THRESHOLD	div_qr_1_unnorm_threshold
   4990 extern mp_size_t			div_qr_1_unnorm_threshold;
   4991 
   4992 #undef  DIVREM_1_NORM_THRESHOLD
   4993 #define DIVREM_1_NORM_THRESHOLD		divrem_1_norm_threshold
   4994 extern mp_size_t			divrem_1_norm_threshold;
   4995 
   4996 #undef  DIVREM_1_UNNORM_THRESHOLD
   4997 #define DIVREM_1_UNNORM_THRESHOLD	divrem_1_unnorm_threshold
   4998 extern mp_size_t			divrem_1_unnorm_threshold;
   4999 
   5000 #undef	MOD_1_NORM_THRESHOLD
   5001 #define MOD_1_NORM_THRESHOLD		mod_1_norm_threshold
   5002 extern mp_size_t			mod_1_norm_threshold;
   5003 
   5004 #undef	MOD_1_UNNORM_THRESHOLD
   5005 #define MOD_1_UNNORM_THRESHOLD		mod_1_unnorm_threshold
   5006 extern mp_size_t			mod_1_unnorm_threshold;
   5007 
   5008 #undef  MOD_1_1P_METHOD
   5009 #define MOD_1_1P_METHOD			mod_1_1p_method
   5010 extern int				mod_1_1p_method;
   5011 
   5012 #undef	MOD_1N_TO_MOD_1_1_THRESHOLD
   5013 #define MOD_1N_TO_MOD_1_1_THRESHOLD	mod_1n_to_mod_1_1_threshold
   5014 extern mp_size_t			mod_1n_to_mod_1_1_threshold;
   5015 
   5016 #undef	MOD_1U_TO_MOD_1_1_THRESHOLD
   5017 #define MOD_1U_TO_MOD_1_1_THRESHOLD	mod_1u_to_mod_1_1_threshold
   5018 extern mp_size_t			mod_1u_to_mod_1_1_threshold;
   5019 
   5020 #undef	MOD_1_1_TO_MOD_1_2_THRESHOLD
   5021 #define MOD_1_1_TO_MOD_1_2_THRESHOLD	mod_1_1_to_mod_1_2_threshold
   5022 extern mp_size_t			mod_1_1_to_mod_1_2_threshold;
   5023 
   5024 #undef	MOD_1_2_TO_MOD_1_4_THRESHOLD
   5025 #define MOD_1_2_TO_MOD_1_4_THRESHOLD	mod_1_2_to_mod_1_4_threshold
   5026 extern mp_size_t			mod_1_2_to_mod_1_4_threshold;
   5027 
   5028 #undef	PREINV_MOD_1_TO_MOD_1_THRESHOLD
   5029 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD	preinv_mod_1_to_mod_1_threshold
   5030 extern mp_size_t			preinv_mod_1_to_mod_1_threshold;
   5031 
   5032 #if ! UDIV_PREINV_ALWAYS
   5033 #undef	DIVREM_2_THRESHOLD
   5034 #define DIVREM_2_THRESHOLD		divrem_2_threshold
   5035 extern mp_size_t			divrem_2_threshold;
   5036 #endif
   5037 
   5038 #undef	MULMOD_BNM1_THRESHOLD
   5039 #define MULMOD_BNM1_THRESHOLD		mulmod_bnm1_threshold
   5040 extern mp_size_t			mulmod_bnm1_threshold;
   5041 
   5042 #undef	SQRMOD_BNM1_THRESHOLD
   5043 #define SQRMOD_BNM1_THRESHOLD		sqrmod_bnm1_threshold
   5044 extern mp_size_t			sqrmod_bnm1_threshold;
   5045 
   5046 #undef	GET_STR_DC_THRESHOLD
   5047 #define GET_STR_DC_THRESHOLD		get_str_dc_threshold
   5048 extern mp_size_t			get_str_dc_threshold;
   5049 
   5050 #undef  GET_STR_PRECOMPUTE_THRESHOLD
   5051 #define GET_STR_PRECOMPUTE_THRESHOLD	get_str_precompute_threshold
   5052 extern mp_size_t			get_str_precompute_threshold;
   5053 
   5054 #undef	SET_STR_DC_THRESHOLD
   5055 #define SET_STR_DC_THRESHOLD		set_str_dc_threshold
   5056 extern mp_size_t			set_str_dc_threshold;
   5057 
   5058 #undef  SET_STR_PRECOMPUTE_THRESHOLD
   5059 #define SET_STR_PRECOMPUTE_THRESHOLD	set_str_precompute_threshold
   5060 extern mp_size_t			set_str_precompute_threshold;
   5061 
   5062 #undef  FAC_ODD_THRESHOLD
   5063 #define FAC_ODD_THRESHOLD		fac_odd_threshold
   5064 extern  mp_size_t			fac_odd_threshold;
   5065 
   5066 #undef  FAC_DSC_THRESHOLD
   5067 #define FAC_DSC_THRESHOLD		fac_dsc_threshold
   5068 extern  mp_size_t			fac_dsc_threshold;
   5069 
   5070 #undef  FFT_TABLE_ATTRS
   5071 #define FFT_TABLE_ATTRS
   5072 extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
   5073 #define FFT_TABLE3_SIZE 2000	/* generous space for tuning */
   5074 extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
   5075 
   5076 /* Sizes the tune program tests up to, used in a couple of recompilations. */
   5077 #undef MUL_TOOM22_THRESHOLD_LIMIT
   5078 #undef MUL_TOOM33_THRESHOLD_LIMIT
   5079 #undef MULLO_BASECASE_THRESHOLD_LIMIT
   5080 #undef SQRLO_BASECASE_THRESHOLD_LIMIT
   5081 #undef SQRLO_DC_THRESHOLD_LIMIT
   5082 #undef SQR_TOOM3_THRESHOLD_LIMIT
   5083 #define SQR_TOOM2_MAX_GENERIC           200
   5084 #define MUL_TOOM22_THRESHOLD_LIMIT      700
   5085 #define MUL_TOOM33_THRESHOLD_LIMIT      700
   5086 #define SQR_TOOM3_THRESHOLD_LIMIT       400
   5087 #define MUL_TOOM44_THRESHOLD_LIMIT     1000
   5088 #define SQR_TOOM4_THRESHOLD_LIMIT      1000
   5089 #define MUL_TOOM6H_THRESHOLD_LIMIT     1100
   5090 #define SQR_TOOM6_THRESHOLD_LIMIT      1100
   5091 #define MUL_TOOM8H_THRESHOLD_LIMIT     1200
   5092 #define SQR_TOOM8_THRESHOLD_LIMIT      1200
   5093 #define MULLO_BASECASE_THRESHOLD_LIMIT  200
   5094 #define SQRLO_BASECASE_THRESHOLD_LIMIT  200
   5095 #define SQRLO_DC_THRESHOLD_LIMIT        400
   5096 #define GET_STR_THRESHOLD_LIMIT         150
   5097 #define FAC_DSC_THRESHOLD_LIMIT        2048
   5098 
   5099 #endif /* TUNE_PROGRAM_BUILD */
   5100 
   5101 #if defined (__cplusplus)
   5102 }
   5103 #endif
   5104 
   5105 /* FIXME: Make these itch functions less conservative.  Also consider making
   5106    them dependent on just 'an', and compute the allocation directly from 'an'
   5107    instead of via n.  */
   5108 
   5109 /* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
   5110    k is ths smallest k such that
   5111      ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
   5112    which implies that
   5113      k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
   5114        = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
   5115 */
   5116 #define mpn_toom22_mul_itch(an, bn) \
   5117   (2 * ((an) + GMP_NUMB_BITS))
   5118 #define mpn_toom2_sqr_itch(an) \
   5119   (2 * ((an) + GMP_NUMB_BITS))
   5120 
   5121 /* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth.
   5122    We use 3an + C, so that we can use a smaller constant.
   5123  */
   5124 #define mpn_toom33_mul_itch(an, bn) \
   5125   (3 * (an) + GMP_NUMB_BITS)
   5126 #define mpn_toom3_sqr_itch(an) \
   5127   (3 * (an) + GMP_NUMB_BITS)
   5128 
   5129 /* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth.
   5130    We use 3an + C, so that we can use a smaller constant.
   5131  */
   5132 #define mpn_toom44_mul_itch(an, bn) \
   5133   (3 * (an) + GMP_NUMB_BITS)
   5134 #define mpn_toom4_sqr_itch(an) \
   5135   (3 * (an) + GMP_NUMB_BITS)
   5136 
   5137 #define mpn_toom6_sqr_itch(n)						\
   5138   (((n) - SQR_TOOM6_THRESHOLD)*2 +					\
   5139    MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6,				\
   5140        mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)))
   5141 
   5142 #define MUL_TOOM6H_MIN							\
   5143   ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ?			\
   5144     MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD)
   5145 #define mpn_toom6_mul_n_itch(n)						\
   5146   (((n) - MUL_TOOM6H_MIN)*2 +						\
   5147    MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6,				\
   5148        mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN)))
   5149 
   5150 static inline mp_size_t
   5151 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
   5152   mp_size_t estimatedN;
   5153   estimatedN = (an + bn) / (size_t) 10 + 1;
   5154   return mpn_toom6_mul_n_itch (estimatedN * 6);
   5155 }
   5156 
   5157 #define mpn_toom8_sqr_itch(n)						\
   5158   ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) +			\
   5159    MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,			\
   5160        mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)))
   5161 
   5162 #define MUL_TOOM8H_MIN							\
   5163   ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ?				\
   5164     MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN)
   5165 #define mpn_toom8_mul_n_itch(n)						\
   5166   ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) +				\
   5167    MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6,			\
   5168        mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN)))
   5169 
   5170 static inline mp_size_t
   5171 mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
   5172   mp_size_t estimatedN;
   5173   estimatedN = (an + bn) / (size_t) 14 + 1;
   5174   return mpn_toom8_mul_n_itch (estimatedN * 8);
   5175 }
   5176 
   5177 static inline mp_size_t
   5178 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
   5179 {
   5180   mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
   5181   mp_size_t itch = 2 * n + 1;
   5182 
   5183   return itch;
   5184 }
   5185 
   5186 static inline mp_size_t
   5187 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
   5188 {
   5189   mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
   5190   return 6 * n + 3;
   5191 }
   5192 
   5193 static inline mp_size_t
   5194 mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
   5195 {
   5196   mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
   5197 
   5198   return 6*n + 4;
   5199 }
   5200 
   5201 static inline mp_size_t
   5202 mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
   5203 {
   5204   mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
   5205   return 6*n + 4;
   5206 }
   5207 
   5208 static inline mp_size_t
   5209 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
   5210 {
   5211   mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
   5212   return 10 * n + 10;
   5213 }
   5214 
   5215 static inline mp_size_t
   5216 mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
   5217 {
   5218   mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
   5219   return 10 * n + 10;
   5220 }
   5221 
   5222 static inline mp_size_t
   5223 mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
   5224 {
   5225   mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
   5226   return 9 * n + 3;
   5227 }
   5228 
   5229 static inline mp_size_t
   5230 mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn)
   5231 {
   5232   mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
   5233   return 9 * n + 3;
   5234 }
   5235 
   5236 /* let S(n) = space required for input size n,
   5237    then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)).   */
   5238 #define mpn_toom42_mulmid_itch(n) \
   5239   (3 * (n) + GMP_NUMB_BITS)
   5240 
   5241 #if 0
   5242 #define mpn_fft_mul mpn_mul_fft_full
   5243 #else
   5244 #define mpn_fft_mul mpn_nussbaumer_mul
   5245 #endif
   5246 
   5247 #ifdef __cplusplus
   5248 
   5249 /* A little helper for a null-terminated __gmp_allocate_func string.
   5250    The destructor ensures it's freed even if an exception is thrown.
   5251    The len field is needed by the destructor, and can be used by anyone else
   5252    to avoid a second strlen pass over the data.
   5253 
   5254    Since our input is a C string, using strlen is correct.  Perhaps it'd be
   5255    more C++-ish style to use std::char_traits<char>::length, but char_traits
   5256    isn't available in gcc 2.95.4.  */
   5257 
   5258 class gmp_allocated_string {
   5259  public:
   5260   char *str;
   5261   size_t len;
   5262   gmp_allocated_string(char *arg)
   5263   {
   5264     str = arg;
   5265     len = std::strlen (str);
   5266   }
   5267   ~gmp_allocated_string()
   5268   {
   5269     (*__gmp_free_func) (str, len+1);
   5270   }
   5271 };
   5272 
   5273 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
   5274 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
   5275 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
   5276 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &);
   5277 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *);
   5278 extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
   5279 
   5280 #endif /* __cplusplus */
   5281 
   5282 #endif /* __GMP_IMPL_H__ */
   5283