Home | History | Annotate | Line # | Download | only in generic
hgcd_reduce.c revision 1.1.1.2
      1      1.1  mrg /* hgcd_reduce.c.
      2      1.1  mrg 
      3      1.1  mrg    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      4      1.1  mrg    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      5      1.1  mrg    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      6      1.1  mrg 
      7      1.1  mrg Copyright 2011, 2012 Free Software Foundation, Inc.
      8      1.1  mrg 
      9      1.1  mrg This file is part of the GNU MP Library.
     10      1.1  mrg 
     11      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     12  1.1.1.2  mrg it under the terms of either:
     13  1.1.1.2  mrg 
     14  1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
     15  1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
     16  1.1.1.2  mrg     option) any later version.
     17  1.1.1.2  mrg 
     18  1.1.1.2  mrg or
     19  1.1.1.2  mrg 
     20  1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
     21  1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
     22  1.1.1.2  mrg     later version.
     23  1.1.1.2  mrg 
     24  1.1.1.2  mrg or both in parallel, as here.
     25      1.1  mrg 
     26      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     27      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     28  1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     29  1.1.1.2  mrg for more details.
     30      1.1  mrg 
     31  1.1.1.2  mrg You should have received copies of the GNU General Public License and the
     32  1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     33  1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
     34      1.1  mrg 
     35      1.1  mrg #include "gmp.h"
     36      1.1  mrg #include "gmp-impl.h"
     37      1.1  mrg #include "longlong.h"
     38      1.1  mrg 
     39      1.1  mrg /* Computes R -= A * B. Result must be non-negative. Normalized down
     40      1.1  mrg    to size an, and resulting size is returned. */
     41      1.1  mrg static mp_size_t
     42      1.1  mrg submul (mp_ptr rp, mp_size_t rn,
     43      1.1  mrg 	mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
     44      1.1  mrg {
     45      1.1  mrg   mp_ptr tp;
     46      1.1  mrg   TMP_DECL;
     47      1.1  mrg 
     48      1.1  mrg   ASSERT (bn > 0);
     49      1.1  mrg   ASSERT (an >= bn);
     50      1.1  mrg   ASSERT (rn >= an);
     51      1.1  mrg   ASSERT (an + bn <= rn + 1);
     52      1.1  mrg 
     53      1.1  mrg   TMP_MARK;
     54      1.1  mrg   tp = TMP_ALLOC_LIMBS (an + bn);
     55      1.1  mrg 
     56      1.1  mrg   mpn_mul (tp, ap, an, bp, bn);
     57  1.1.1.2  mrg   ASSERT ((an + bn <= rn) || (tp[rn] == 0));
     58  1.1.1.2  mrg   ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn - (an + bn > rn)));
     59      1.1  mrg   TMP_FREE;
     60      1.1  mrg 
     61      1.1  mrg   while (rn > an && (rp[rn-1] == 0))
     62      1.1  mrg     rn--;
     63      1.1  mrg 
     64      1.1  mrg   return rn;
     65      1.1  mrg }
     66      1.1  mrg 
     67      1.1  mrg /* Computes (a, b)  <--  M^{-1} (a; b) */
     68      1.1  mrg /* FIXME:
     69      1.1  mrg     x Take scratch parameter, and figure out scratch need.
     70      1.1  mrg 
     71      1.1  mrg     x Use some fallback for small M->n?
     72      1.1  mrg */
     73      1.1  mrg static mp_size_t
     74      1.1  mrg hgcd_matrix_apply (const struct hgcd_matrix *M,
     75      1.1  mrg 		   mp_ptr ap, mp_ptr bp,
     76      1.1  mrg 		   mp_size_t n)
     77      1.1  mrg {
     78      1.1  mrg   mp_size_t an, bn, un, vn, nn;
     79      1.1  mrg   mp_size_t mn[2][2];
     80      1.1  mrg   mp_size_t modn;
     81      1.1  mrg   mp_ptr tp, sp, scratch;
     82      1.1  mrg   mp_limb_t cy;
     83      1.1  mrg   unsigned i, j;
     84      1.1  mrg 
     85      1.1  mrg   TMP_DECL;
     86      1.1  mrg 
     87      1.1  mrg   ASSERT ( (ap[n-1] | bp[n-1]) > 0);
     88      1.1  mrg 
     89      1.1  mrg   an = n;
     90      1.1  mrg   MPN_NORMALIZE (ap, an);
     91      1.1  mrg   bn = n;
     92      1.1  mrg   MPN_NORMALIZE (bp, bn);
     93      1.1  mrg 
     94      1.1  mrg   for (i = 0; i < 2; i++)
     95      1.1  mrg     for (j = 0; j < 2; j++)
     96      1.1  mrg       {
     97      1.1  mrg 	mp_size_t k;
     98      1.1  mrg 	k = M->n;
     99      1.1  mrg 	MPN_NORMALIZE (M->p[i][j], k);
    100      1.1  mrg 	mn[i][j] = k;
    101      1.1  mrg       }
    102      1.1  mrg 
    103      1.1  mrg   ASSERT (mn[0][0] > 0);
    104      1.1  mrg   ASSERT (mn[1][1] > 0);
    105      1.1  mrg   ASSERT ( (mn[0][1] | mn[1][0]) > 0);
    106      1.1  mrg 
    107      1.1  mrg   TMP_MARK;
    108      1.1  mrg 
    109      1.1  mrg   if (mn[0][1] == 0)
    110      1.1  mrg     {
    111      1.1  mrg       /* A unchanged, M = (1, 0; q, 1) */
    112      1.1  mrg       ASSERT (mn[0][0] == 1);
    113      1.1  mrg       ASSERT (M->p[0][0][0] == 1);
    114      1.1  mrg       ASSERT (mn[1][1] == 1);
    115      1.1  mrg       ASSERT (M->p[1][1][0] == 1);
    116      1.1  mrg 
    117      1.1  mrg       /* Put B <-- B - q A */
    118      1.1  mrg       nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
    119      1.1  mrg     }
    120      1.1  mrg   else if (mn[1][0] == 0)
    121      1.1  mrg     {
    122      1.1  mrg       /* B unchanged, M = (1, q; 0, 1) */
    123      1.1  mrg       ASSERT (mn[0][0] == 1);
    124      1.1  mrg       ASSERT (M->p[0][0][0] == 1);
    125      1.1  mrg       ASSERT (mn[1][1] == 1);
    126      1.1  mrg       ASSERT (M->p[1][1][0] == 1);
    127      1.1  mrg 
    128      1.1  mrg       /* Put A  <-- A - q * B */
    129      1.1  mrg       nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
    130      1.1  mrg     }
    131      1.1  mrg   else
    132      1.1  mrg     {
    133      1.1  mrg       /* A = m00 a + m01 b  ==> a <= A / m00, b <= A / m01.
    134      1.1  mrg 	 B = m10 a + m11 b  ==> a <= B / m10, b <= B / m11. */
    135      1.1  mrg       un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
    136      1.1  mrg       vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
    137      1.1  mrg 
    138      1.1  mrg       nn = MAX (un, vn);
    139      1.1  mrg       /* In the range of interest, mulmod_bnm1 should always beat mullo. */
    140      1.1  mrg       modn = mpn_mulmod_bnm1_next_size (nn + 1);
    141      1.1  mrg 
    142  1.1.1.2  mrg       TMP_ALLOC_LIMBS_3 (tp, modn,
    143  1.1.1.2  mrg 			 sp, modn,
    144  1.1.1.2  mrg 			 scratch, mpn_mulmod_bnm1_itch (modn, modn, M->n));
    145      1.1  mrg 
    146      1.1  mrg       ASSERT (n <= 2*modn);
    147      1.1  mrg 
    148      1.1  mrg       if (n > modn)
    149      1.1  mrg 	{
    150      1.1  mrg 	  cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
    151      1.1  mrg 	  MPN_INCR_U (ap, modn, cy);
    152      1.1  mrg 
    153      1.1  mrg 	  cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
    154      1.1  mrg 	  MPN_INCR_U (bp, modn, cy);
    155      1.1  mrg 
    156      1.1  mrg 	  n = modn;
    157      1.1  mrg 	}
    158      1.1  mrg 
    159      1.1  mrg       mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
    160      1.1  mrg       mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
    161      1.1  mrg 
    162      1.1  mrg       /* FIXME: Handle the small n case in some better way. */
    163      1.1  mrg       if (n + mn[1][1] < modn)
    164      1.1  mrg 	MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
    165      1.1  mrg       if (n + mn[0][1] < modn)
    166      1.1  mrg 	MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
    167      1.1  mrg 
    168      1.1  mrg       cy = mpn_sub_n (tp, tp, sp, modn);
    169      1.1  mrg       MPN_DECR_U (tp, modn, cy);
    170      1.1  mrg 
    171      1.1  mrg       ASSERT (mpn_zero_p (tp + nn, modn - nn));
    172      1.1  mrg 
    173      1.1  mrg       mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
    174      1.1  mrg       MPN_COPY (ap, tp, nn);
    175      1.1  mrg       mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
    176      1.1  mrg 
    177      1.1  mrg       if (n + mn[1][0] < modn)
    178      1.1  mrg 	MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
    179      1.1  mrg       if (n + mn[0][0] < modn)
    180      1.1  mrg 	MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
    181      1.1  mrg 
    182      1.1  mrg       cy = mpn_sub_n (tp, tp, sp, modn);
    183      1.1  mrg       MPN_DECR_U (tp, modn, cy);
    184      1.1  mrg 
    185      1.1  mrg       ASSERT (mpn_zero_p (tp + nn, modn - nn));
    186      1.1  mrg       MPN_COPY (bp, tp, nn);
    187      1.1  mrg 
    188      1.1  mrg       while ( (ap[nn-1] | bp[nn-1]) == 0)
    189      1.1  mrg 	{
    190      1.1  mrg 	  nn--;
    191      1.1  mrg 	  ASSERT (nn > 0);
    192      1.1  mrg 	}
    193      1.1  mrg     }
    194      1.1  mrg   TMP_FREE;
    195      1.1  mrg 
    196      1.1  mrg   return nn;
    197      1.1  mrg }
    198      1.1  mrg 
    199      1.1  mrg mp_size_t
    200      1.1  mrg mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
    201      1.1  mrg {
    202      1.1  mrg   mp_size_t itch;
    203      1.1  mrg   if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
    204      1.1  mrg     {
    205      1.1  mrg       itch = mpn_hgcd_itch (n-p);
    206      1.1  mrg 
    207      1.1  mrg       /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
    208      1.1  mrg 	 (p + ceil((n-p)/2) - 1 <= n + p - 1 */
    209      1.1  mrg       if (itch < n + p - 1)
    210      1.1  mrg 	itch = n + p - 1;
    211      1.1  mrg     }
    212      1.1  mrg   else
    213      1.1  mrg     {
    214      1.1  mrg       itch = 2*(n-p) + mpn_hgcd_itch (n-p);
    215      1.1  mrg       /* Currently, hgcd_matrix_apply allocates its own storage. */
    216      1.1  mrg     }
    217      1.1  mrg   return itch;
    218      1.1  mrg }
    219      1.1  mrg 
    220      1.1  mrg /* FIXME: Document storage need. */
    221      1.1  mrg mp_size_t
    222      1.1  mrg mpn_hgcd_reduce (struct hgcd_matrix *M,
    223      1.1  mrg 		 mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
    224      1.1  mrg 		 mp_ptr tp)
    225      1.1  mrg {
    226      1.1  mrg   mp_size_t nn;
    227      1.1  mrg   if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
    228      1.1  mrg     {
    229      1.1  mrg       nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
    230      1.1  mrg       if (nn > 0)
    231      1.1  mrg 	/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
    232      1.1  mrg 	   = 2 (n - 1) */
    233      1.1  mrg 	return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
    234      1.1  mrg     }
    235      1.1  mrg   else
    236      1.1  mrg     {
    237      1.1  mrg       MPN_COPY (tp, ap + p, n - p);
    238      1.1  mrg       MPN_COPY (tp + n - p, bp + p, n - p);
    239      1.1  mrg       if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
    240      1.1  mrg 	return hgcd_matrix_apply (M, ap, bp, n);
    241      1.1  mrg     }
    242      1.1  mrg   return 0;
    243      1.1  mrg }
    244