Home | History | Annotate | Line # | Download | only in generic
hgcd_reduce.c revision 1.1.1.1
      1  1.1  mrg /* hgcd_reduce.c.
      2  1.1  mrg 
      3  1.1  mrg    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
      4  1.1  mrg    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      5  1.1  mrg    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      6  1.1  mrg 
      7  1.1  mrg Copyright 2011, 2012 Free Software Foundation, Inc.
      8  1.1  mrg 
      9  1.1  mrg This file is part of the GNU MP Library.
     10  1.1  mrg 
     11  1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     12  1.1  mrg it under the terms of the GNU Lesser General Public License as published by
     13  1.1  mrg the Free Software Foundation; either version 3 of the License, or (at your
     14  1.1  mrg option) any later version.
     15  1.1  mrg 
     16  1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     17  1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     18  1.1  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     19  1.1  mrg License for more details.
     20  1.1  mrg 
     21  1.1  mrg You should have received a copy of the GNU Lesser General Public License
     22  1.1  mrg along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     23  1.1  mrg 
     24  1.1  mrg #include "gmp.h"
     25  1.1  mrg #include "gmp-impl.h"
     26  1.1  mrg #include "longlong.h"
     27  1.1  mrg 
     28  1.1  mrg /* Computes R -= A * B. Result must be non-negative. Normalized down
     29  1.1  mrg    to size an, and resulting size is returned. */
     30  1.1  mrg static mp_size_t
     31  1.1  mrg submul (mp_ptr rp, mp_size_t rn,
     32  1.1  mrg 	mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
     33  1.1  mrg {
     34  1.1  mrg   mp_ptr tp;
     35  1.1  mrg   TMP_DECL;
     36  1.1  mrg 
     37  1.1  mrg   ASSERT (bn > 0);
     38  1.1  mrg   ASSERT (an >= bn);
     39  1.1  mrg   ASSERT (rn >= an);
     40  1.1  mrg   ASSERT (an + bn <= rn + 1);
     41  1.1  mrg 
     42  1.1  mrg   TMP_MARK;
     43  1.1  mrg   tp = TMP_ALLOC_LIMBS (an + bn);
     44  1.1  mrg 
     45  1.1  mrg   mpn_mul (tp, ap, an, bp, bn);
     46  1.1  mrg   if (an + bn > rn)
     47  1.1  mrg     {
     48  1.1  mrg       ASSERT (tp[rn] == 0);
     49  1.1  mrg       bn--;
     50  1.1  mrg     }
     51  1.1  mrg   ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn));
     52  1.1  mrg   TMP_FREE;
     53  1.1  mrg 
     54  1.1  mrg   while (rn > an && (rp[rn-1] == 0))
     55  1.1  mrg     rn--;
     56  1.1  mrg 
     57  1.1  mrg   return rn;
     58  1.1  mrg }
     59  1.1  mrg 
     60  1.1  mrg /* Computes (a, b)  <--  M^{-1} (a; b) */
     61  1.1  mrg /* FIXME:
     62  1.1  mrg     x Take scratch parameter, and figure out scratch need.
     63  1.1  mrg 
     64  1.1  mrg     x Use some fallback for small M->n?
     65  1.1  mrg */
     66  1.1  mrg static mp_size_t
     67  1.1  mrg hgcd_matrix_apply (const struct hgcd_matrix *M,
     68  1.1  mrg 		   mp_ptr ap, mp_ptr bp,
     69  1.1  mrg 		   mp_size_t n)
     70  1.1  mrg {
     71  1.1  mrg   mp_size_t an, bn, un, vn, nn;
     72  1.1  mrg   mp_size_t mn[2][2];
     73  1.1  mrg   mp_size_t modn;
     74  1.1  mrg   mp_ptr tp, sp, scratch;
     75  1.1  mrg   mp_limb_t cy;
     76  1.1  mrg   unsigned i, j;
     77  1.1  mrg 
     78  1.1  mrg   TMP_DECL;
     79  1.1  mrg 
     80  1.1  mrg   ASSERT ( (ap[n-1] | bp[n-1]) > 0);
     81  1.1  mrg 
     82  1.1  mrg   an = n;
     83  1.1  mrg   MPN_NORMALIZE (ap, an);
     84  1.1  mrg   bn = n;
     85  1.1  mrg   MPN_NORMALIZE (bp, bn);
     86  1.1  mrg 
     87  1.1  mrg   for (i = 0; i < 2; i++)
     88  1.1  mrg     for (j = 0; j < 2; j++)
     89  1.1  mrg       {
     90  1.1  mrg 	mp_size_t k;
     91  1.1  mrg 	k = M->n;
     92  1.1  mrg 	MPN_NORMALIZE (M->p[i][j], k);
     93  1.1  mrg 	mn[i][j] = k;
     94  1.1  mrg       }
     95  1.1  mrg 
     96  1.1  mrg   ASSERT (mn[0][0] > 0);
     97  1.1  mrg   ASSERT (mn[1][1] > 0);
     98  1.1  mrg   ASSERT ( (mn[0][1] | mn[1][0]) > 0);
     99  1.1  mrg 
    100  1.1  mrg   TMP_MARK;
    101  1.1  mrg 
    102  1.1  mrg   if (mn[0][1] == 0)
    103  1.1  mrg     {
    104  1.1  mrg       /* A unchanged, M = (1, 0; q, 1) */
    105  1.1  mrg       ASSERT (mn[0][0] == 1);
    106  1.1  mrg       ASSERT (M->p[0][0][0] == 1);
    107  1.1  mrg       ASSERT (mn[1][1] == 1);
    108  1.1  mrg       ASSERT (M->p[1][1][0] == 1);
    109  1.1  mrg 
    110  1.1  mrg       /* Put B <-- B - q A */
    111  1.1  mrg       nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
    112  1.1  mrg     }
    113  1.1  mrg   else if (mn[1][0] == 0)
    114  1.1  mrg     {
    115  1.1  mrg       /* B unchanged, M = (1, q; 0, 1) */
    116  1.1  mrg       ASSERT (mn[0][0] == 1);
    117  1.1  mrg       ASSERT (M->p[0][0][0] == 1);
    118  1.1  mrg       ASSERT (mn[1][1] == 1);
    119  1.1  mrg       ASSERT (M->p[1][1][0] == 1);
    120  1.1  mrg 
    121  1.1  mrg       /* Put A  <-- A - q * B */
    122  1.1  mrg       nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
    123  1.1  mrg     }
    124  1.1  mrg   else
    125  1.1  mrg     {
    126  1.1  mrg       /* A = m00 a + m01 b  ==> a <= A / m00, b <= A / m01.
    127  1.1  mrg 	 B = m10 a + m11 b  ==> a <= B / m10, b <= B / m11. */
    128  1.1  mrg       un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
    129  1.1  mrg       vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
    130  1.1  mrg 
    131  1.1  mrg       nn = MAX (un, vn);
    132  1.1  mrg       /* In the range of interest, mulmod_bnm1 should always beat mullo. */
    133  1.1  mrg       modn = mpn_mulmod_bnm1_next_size (nn + 1);
    134  1.1  mrg 
    135  1.1  mrg       scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n));
    136  1.1  mrg       tp = TMP_ALLOC_LIMBS (modn);
    137  1.1  mrg       sp = TMP_ALLOC_LIMBS (modn);
    138  1.1  mrg 
    139  1.1  mrg       ASSERT (n <= 2*modn);
    140  1.1  mrg 
    141  1.1  mrg       if (n > modn)
    142  1.1  mrg 	{
    143  1.1  mrg 	  cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
    144  1.1  mrg 	  MPN_INCR_U (ap, modn, cy);
    145  1.1  mrg 
    146  1.1  mrg 	  cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
    147  1.1  mrg 	  MPN_INCR_U (bp, modn, cy);
    148  1.1  mrg 
    149  1.1  mrg 	  n = modn;
    150  1.1  mrg 	}
    151  1.1  mrg 
    152  1.1  mrg       mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
    153  1.1  mrg       mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
    154  1.1  mrg 
    155  1.1  mrg       /* FIXME: Handle the small n case in some better way. */
    156  1.1  mrg       if (n + mn[1][1] < modn)
    157  1.1  mrg 	MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
    158  1.1  mrg       if (n + mn[0][1] < modn)
    159  1.1  mrg 	MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
    160  1.1  mrg 
    161  1.1  mrg       cy = mpn_sub_n (tp, tp, sp, modn);
    162  1.1  mrg       MPN_DECR_U (tp, modn, cy);
    163  1.1  mrg 
    164  1.1  mrg       ASSERT (mpn_zero_p (tp + nn, modn - nn));
    165  1.1  mrg 
    166  1.1  mrg       mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
    167  1.1  mrg       MPN_COPY (ap, tp, nn);
    168  1.1  mrg       mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
    169  1.1  mrg 
    170  1.1  mrg       if (n + mn[1][0] < modn)
    171  1.1  mrg 	MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
    172  1.1  mrg       if (n + mn[0][0] < modn)
    173  1.1  mrg 	MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
    174  1.1  mrg 
    175  1.1  mrg       cy = mpn_sub_n (tp, tp, sp, modn);
    176  1.1  mrg       MPN_DECR_U (tp, modn, cy);
    177  1.1  mrg 
    178  1.1  mrg       ASSERT (mpn_zero_p (tp + nn, modn - nn));
    179  1.1  mrg       MPN_COPY (bp, tp, nn);
    180  1.1  mrg 
    181  1.1  mrg       while ( (ap[nn-1] | bp[nn-1]) == 0)
    182  1.1  mrg 	{
    183  1.1  mrg 	  nn--;
    184  1.1  mrg 	  ASSERT (nn > 0);
    185  1.1  mrg 	}
    186  1.1  mrg     }
    187  1.1  mrg   TMP_FREE;
    188  1.1  mrg 
    189  1.1  mrg   return nn;
    190  1.1  mrg }
    191  1.1  mrg 
    192  1.1  mrg mp_size_t
    193  1.1  mrg mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
    194  1.1  mrg {
    195  1.1  mrg   mp_size_t itch;
    196  1.1  mrg   if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
    197  1.1  mrg     {
    198  1.1  mrg       itch = mpn_hgcd_itch (n-p);
    199  1.1  mrg 
    200  1.1  mrg       /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
    201  1.1  mrg 	 (p + ceil((n-p)/2) - 1 <= n + p - 1 */
    202  1.1  mrg       if (itch < n + p - 1)
    203  1.1  mrg 	itch = n + p - 1;
    204  1.1  mrg     }
    205  1.1  mrg   else
    206  1.1  mrg     {
    207  1.1  mrg       itch = 2*(n-p) + mpn_hgcd_itch (n-p);
    208  1.1  mrg       /* Currently, hgcd_matrix_apply allocates its own storage. */
    209  1.1  mrg     }
    210  1.1  mrg   return itch;
    211  1.1  mrg }
    212  1.1  mrg 
    213  1.1  mrg /* FIXME: Document storage need. */
    214  1.1  mrg mp_size_t
    215  1.1  mrg mpn_hgcd_reduce (struct hgcd_matrix *M,
    216  1.1  mrg 		 mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
    217  1.1  mrg 		 mp_ptr tp)
    218  1.1  mrg {
    219  1.1  mrg   mp_size_t nn;
    220  1.1  mrg   if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
    221  1.1  mrg     {
    222  1.1  mrg       nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
    223  1.1  mrg       if (nn > 0)
    224  1.1  mrg 	/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
    225  1.1  mrg 	   = 2 (n - 1) */
    226  1.1  mrg 	return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
    227  1.1  mrg     }
    228  1.1  mrg   else
    229  1.1  mrg     {
    230  1.1  mrg       MPN_COPY (tp, ap + p, n - p);
    231  1.1  mrg       MPN_COPY (tp + n - p, bp + p, n - p);
    232  1.1  mrg       if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
    233  1.1  mrg 	return hgcd_matrix_apply (M, ap, bp, n);
    234  1.1  mrg     }
    235  1.1  mrg   return 0;
    236  1.1  mrg }
    237