Home | History | Annotate | Line # | Download | only in gcc
      1  1.1  mrg /* A representation of vector permutation indices.
      2  1.1  mrg    Copyright (C) 2017-2022 Free Software Foundation, Inc.
      3  1.1  mrg 
      4  1.1  mrg This file is part of GCC.
      5  1.1  mrg 
      6  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      7  1.1  mrg the terms of the GNU General Public License as published by the Free
      8  1.1  mrg Software Foundation; either version 3, or (at your option) any later
      9  1.1  mrg version.
     10  1.1  mrg 
     11  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     12  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14  1.1  mrg for more details.
     15  1.1  mrg 
     16  1.1  mrg You should have received a copy of the GNU General Public License
     17  1.1  mrg along with GCC; see the file COPYING3.  If not see
     18  1.1  mrg <http://www.gnu.org/licenses/>.  */
     19  1.1  mrg 
     20  1.1  mrg #include "config.h"
     21  1.1  mrg #include "system.h"
     22  1.1  mrg #include "coretypes.h"
     23  1.1  mrg #include "vec-perm-indices.h"
     24  1.1  mrg #include "tree.h"
     25  1.1  mrg #include "fold-const.h"
     26  1.1  mrg #include "tree-vector-builder.h"
     27  1.1  mrg #include "backend.h"
     28  1.1  mrg #include "rtl.h"
     29  1.1  mrg #include "memmodel.h"
     30  1.1  mrg #include "emit-rtl.h"
     31  1.1  mrg #include "selftest.h"
     32  1.1  mrg #include "rtx-vector-builder.h"
     33  1.1  mrg 
     34  1.1  mrg /* Switch to a new permutation vector that selects between NINPUTS vector
     35  1.1  mrg    inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
     36  1.1  mrg    new permutation vector from ELEMENTS, clamping each one to be in range.  */
     37  1.1  mrg 
     38  1.1  mrg void
     39  1.1  mrg vec_perm_indices::new_vector (const vec_perm_builder &elements,
     40  1.1  mrg 			      unsigned int ninputs,
     41  1.1  mrg 			      poly_uint64 nelts_per_input)
     42  1.1  mrg {
     43  1.1  mrg   m_ninputs = ninputs;
     44  1.1  mrg   m_nelts_per_input = nelts_per_input;
     45  1.1  mrg   /* If the vector has a constant number of elements, expand the
     46  1.1  mrg      encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
     47  1.1  mrg      wrap halfway if there is only one vector input, and we want
     48  1.1  mrg      the wrapped form to be the canonical one.
     49  1.1  mrg 
     50  1.1  mrg      If the vector has a variable number of elements, just copy
     51  1.1  mrg      the encoding.  In that case the unwrapped form is canonical
     52  1.1  mrg      and there is no way of representing the wrapped form.  */
     53  1.1  mrg   poly_uint64 full_nelts = elements.full_nelts ();
     54  1.1  mrg   unsigned HOST_WIDE_INT copy_nelts;
     55  1.1  mrg   if (full_nelts.is_constant (&copy_nelts))
     56  1.1  mrg     m_encoding.new_vector (full_nelts, copy_nelts, 1);
     57  1.1  mrg   else
     58  1.1  mrg     {
     59  1.1  mrg       copy_nelts = elements.encoded_nelts ();
     60  1.1  mrg       m_encoding.new_vector (full_nelts, elements.npatterns (),
     61  1.1  mrg 			     elements.nelts_per_pattern ());
     62  1.1  mrg     }
     63  1.1  mrg   unsigned int npatterns = m_encoding.npatterns ();
     64  1.1  mrg   for (unsigned int i = 0; i < npatterns; ++i)
     65  1.1  mrg     m_encoding.quick_push (clamp (elements.elt (i)));
     66  1.1  mrg   /* Use the fact that:
     67  1.1  mrg 
     68  1.1  mrg 	(a + b) % c == ((a % c) + (b % c)) % c
     69  1.1  mrg 
     70  1.1  mrg      to simplify the clamping of variable-length vectors.  */
     71  1.1  mrg   for (unsigned int i = npatterns; i < copy_nelts; ++i)
     72  1.1  mrg     {
     73  1.1  mrg       element_type step = clamp (elements.elt (i)
     74  1.1  mrg 				 - elements.elt (i - npatterns));
     75  1.1  mrg       m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
     76  1.1  mrg     }
     77  1.1  mrg   m_encoding.finalize ();
     78  1.1  mrg }
     79  1.1  mrg 
     80  1.1  mrg /* Switch to a new permutation vector that selects the same input elements
     81  1.1  mrg    as ORIG, but with each element split into FACTOR pieces.  For example,
     82  1.1  mrg    if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
     83  1.1  mrg    { 2, 3, 4, 5, 0, 1, 6, 7 }.  */
     84  1.1  mrg 
     85  1.1  mrg void
     86  1.1  mrg vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
     87  1.1  mrg 				       unsigned int factor)
     88  1.1  mrg {
     89  1.1  mrg   m_ninputs = orig.m_ninputs;
     90  1.1  mrg   m_nelts_per_input = orig.m_nelts_per_input * factor;
     91  1.1  mrg   m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
     92  1.1  mrg 			 orig.m_encoding.npatterns () * factor,
     93  1.1  mrg 			 orig.m_encoding.nelts_per_pattern ());
     94  1.1  mrg   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
     95  1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; ++i)
     96  1.1  mrg     {
     97  1.1  mrg       element_type base = orig.m_encoding[i] * factor;
     98  1.1  mrg       for (unsigned int j = 0; j < factor; ++j)
     99  1.1  mrg 	m_encoding.quick_push (base + j);
    100  1.1  mrg     }
    101  1.1  mrg   m_encoding.finalize ();
    102  1.1  mrg }
    103  1.1  mrg 
    104  1.1  mrg /* Check whether we can switch to a new permutation vector that
    105  1.1  mrg    selects the same input elements as ORIG, but with each element
    106  1.1  mrg    built up from FACTOR pieces.  Return true if yes, otherwise
    107  1.1  mrg    return false.  Every FACTOR permutation indexes should be
    108  1.1  mrg    continuous separately and the first one of each batch should
    109  1.1  mrg    be able to exactly modulo FACTOR.  For example, if ORIG is
    110  1.1  mrg    { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
    111  1.1  mrg    is { 1, 2, 0, 3 }.  */
    112  1.1  mrg 
    113  1.1  mrg bool
    114  1.1  mrg vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
    115  1.1  mrg 				     unsigned int factor)
    116  1.1  mrg {
    117  1.1  mrg   gcc_assert (factor > 0);
    118  1.1  mrg 
    119  1.1  mrg   if (maybe_lt (orig.m_nelts_per_input, factor))
    120  1.1  mrg     return false;
    121  1.1  mrg 
    122  1.1  mrg   poly_uint64 nelts;
    123  1.1  mrg   /* Invalid if vector units number isn't multiple of factor.  */
    124  1.1  mrg   if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
    125  1.1  mrg     return false;
    126  1.1  mrg 
    127  1.1  mrg   /* Only handle the case that npatterns is multiple of factor.
    128  1.1  mrg      FIXME: Try to see whether we can reshape it by factor npatterns.  */
    129  1.1  mrg   if (orig.m_encoding.npatterns () % factor != 0)
    130  1.1  mrg     return false;
    131  1.1  mrg 
    132  1.1  mrg   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
    133  1.1  mrg   auto_vec<element_type, 32> encoding (encoded_nelts);
    134  1.1  mrg   /* Separate all encoded elements into batches by size factor,
    135  1.1  mrg      then ensure the first element of each batch is multiple of
    136  1.1  mrg      factor and all elements in each batch is consecutive from
    137  1.1  mrg      the first one.  */
    138  1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; i += factor)
    139  1.1  mrg     {
    140  1.1  mrg       element_type first = orig.m_encoding[i];
    141  1.1  mrg       element_type new_index;
    142  1.1  mrg       if (!multiple_p (first, factor, &new_index))
    143  1.1  mrg 	return false;
    144  1.1  mrg       for (unsigned int j = 1; j < factor; ++j)
    145  1.1  mrg 	if (maybe_ne (first + j, orig.m_encoding[i + j]))
    146  1.1  mrg 	  return false;
    147  1.1  mrg       encoding.quick_push (new_index);
    148  1.1  mrg     }
    149  1.1  mrg 
    150  1.1  mrg   m_ninputs = orig.m_ninputs;
    151  1.1  mrg   m_nelts_per_input = nelts;
    152  1.1  mrg   poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
    153  1.1  mrg   unsigned int npatterns = orig.m_encoding.npatterns () / factor;
    154  1.1  mrg 
    155  1.1  mrg   m_encoding.new_vector (full_nelts, npatterns,
    156  1.1  mrg 			 orig.m_encoding.nelts_per_pattern ());
    157  1.1  mrg   m_encoding.splice (encoding);
    158  1.1  mrg   m_encoding.finalize ();
    159  1.1  mrg 
    160  1.1  mrg   return true;
    161  1.1  mrg }
    162  1.1  mrg 
    163  1.1  mrg /* Rotate the inputs of the permutation right by DELTA inputs.  This changes
    164  1.1  mrg    the values of the permutation vector but it doesn't change the way that
    165  1.1  mrg    the elements are encoded.  */
    166  1.1  mrg 
    167  1.1  mrg void
    168  1.1  mrg vec_perm_indices::rotate_inputs (int delta)
    169  1.1  mrg {
    170  1.1  mrg   element_type element_delta = delta * m_nelts_per_input;
    171  1.1  mrg   for (unsigned int i = 0; i < m_encoding.length (); ++i)
    172  1.1  mrg     m_encoding[i] = clamp (m_encoding[i] + element_delta);
    173  1.1  mrg }
    174  1.1  mrg 
    175  1.1  mrg /* Return true if index OUT_BASE + I * OUT_STEP selects input
    176  1.1  mrg    element IN_BASE + I * IN_STEP.  For example, the call to test
    177  1.1  mrg    whether a permute reverses a vector of N elements would be:
    178  1.1  mrg 
    179  1.1  mrg      series_p (0, 1, N - 1, -1)
    180  1.1  mrg 
    181  1.1  mrg    which would return true for { N - 1, N - 2, N - 3, ... }.
    182  1.1  mrg    The calls to test for an interleaving of elements starting
    183  1.1  mrg    at N1 and N2 would be:
    184  1.1  mrg 
    185  1.1  mrg      series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
    186  1.1  mrg 
    187  1.1  mrg    which would return true for { N1, N2, N1 + 1, N2 + 1, ... }.  */
    188  1.1  mrg 
    189  1.1  mrg bool
    190  1.1  mrg vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
    191  1.1  mrg 			    element_type in_base, element_type in_step) const
    192  1.1  mrg {
    193  1.1  mrg   /* Check the base value.  */
    194  1.1  mrg   if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
    195  1.1  mrg     return false;
    196  1.1  mrg 
    197  1.1  mrg   element_type full_nelts = m_encoding.full_nelts ();
    198  1.1  mrg   unsigned int npatterns = m_encoding.npatterns ();
    199  1.1  mrg 
    200  1.1  mrg   /* Calculate which multiple of OUT_STEP elements we need to get
    201  1.1  mrg      back to the same pattern.  */
    202  1.1  mrg   unsigned int cycle_length = least_common_multiple (out_step, npatterns);
    203  1.1  mrg 
    204  1.1  mrg   /* Check the steps.  */
    205  1.1  mrg   in_step = clamp (in_step);
    206  1.1  mrg   out_base += out_step;
    207  1.1  mrg   unsigned int limit = 0;
    208  1.1  mrg   for (;;)
    209  1.1  mrg     {
    210  1.1  mrg       /* Succeed if we've checked all the elements in the vector.  */
    211  1.1  mrg       if (known_ge (out_base, full_nelts))
    212  1.1  mrg 	return true;
    213  1.1  mrg 
    214  1.1  mrg       if (out_base >= npatterns)
    215  1.1  mrg 	{
    216  1.1  mrg 	  /* We've got to the end of the "foreground" values.  Check
    217  1.1  mrg 	     2 elements from each pattern in the "background" values.  */
    218  1.1  mrg 	  if (limit == 0)
    219  1.1  mrg 	    limit = out_base + cycle_length * 2;
    220  1.1  mrg 	  else if (out_base >= limit)
    221  1.1  mrg 	    return true;
    222  1.1  mrg 	}
    223  1.1  mrg 
    224  1.1  mrg       element_type v0 = m_encoding.elt (out_base - out_step);
    225  1.1  mrg       element_type v1 = m_encoding.elt (out_base);
    226  1.1  mrg       if (maybe_ne (clamp (v1 - v0), in_step))
    227  1.1  mrg 	return false;
    228  1.1  mrg 
    229  1.1  mrg       out_base += out_step;
    230  1.1  mrg     }
    231  1.1  mrg }
    232  1.1  mrg 
    233  1.1  mrg /* Return true if all elements of the permutation vector are in the range
    234  1.1  mrg    [START, START + SIZE).  */
    235  1.1  mrg 
    236  1.1  mrg bool
    237  1.1  mrg vec_perm_indices::all_in_range_p (element_type start, element_type size) const
    238  1.1  mrg {
    239  1.1  mrg   /* Check the first two elements of each pattern.  */
    240  1.1  mrg   unsigned int npatterns = m_encoding.npatterns ();
    241  1.1  mrg   unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
    242  1.1  mrg   unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
    243  1.1  mrg   for (unsigned int i = 0; i < base_nelts; ++i)
    244  1.1  mrg     if (!known_in_range_p (m_encoding[i], start, size))
    245  1.1  mrg       return false;
    246  1.1  mrg 
    247  1.1  mrg   /* For stepped encodings, check the full range of the series.  */
    248  1.1  mrg   if (nelts_per_pattern == 3)
    249  1.1  mrg     {
    250  1.1  mrg       element_type limit = input_nelts ();
    251  1.1  mrg 
    252  1.1  mrg       /* The number of elements in each pattern beyond the first two
    253  1.1  mrg 	 that we checked above.  */
    254  1.1  mrg       poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
    255  1.1  mrg 					 npatterns) - 2;
    256  1.1  mrg       for (unsigned int i = 0; i < npatterns; ++i)
    257  1.1  mrg 	{
    258  1.1  mrg 	  /* BASE1 has been checked but BASE2 hasn't.   */
    259  1.1  mrg 	  element_type base1 = m_encoding[i + npatterns];
    260  1.1  mrg 	  element_type base2 = m_encoding[i + base_nelts];
    261  1.1  mrg 
    262  1.1  mrg 	  /* The step to add to get from BASE1 to each subsequent value.  */
    263  1.1  mrg 	  element_type step = clamp (base2 - base1);
    264  1.1  mrg 
    265  1.1  mrg 	  /* STEP has no inherent sign, so a value near LIMIT can
    266  1.1  mrg 	     act as a negative step.  The series is in range if it
    267  1.1  mrg 	     is in range according to one of the two interpretations.
    268  1.1  mrg 
    269  1.1  mrg 	     Since we're dealing with clamped values, ELEMENT_TYPE is
    270  1.1  mrg 	     wide enough for overflow not to be a problem.  */
    271  1.1  mrg 	  element_type headroom_down = base1 - start;
    272  1.1  mrg 	  element_type headroom_up = size - headroom_down - 1;
    273  1.1  mrg 	  HOST_WIDE_INT diff;
    274  1.1  mrg 	  if ((!step.is_constant (&diff)
    275  1.1  mrg 	       || maybe_lt (headroom_up, diff * step_nelts))
    276  1.1  mrg 	      && (!(limit - step).is_constant (&diff)
    277  1.1  mrg 		  || maybe_lt (headroom_down, diff * step_nelts)))
    278  1.1  mrg 	    return false;
    279  1.1  mrg 	}
    280  1.1  mrg     }
    281  1.1  mrg   return true;
    282  1.1  mrg }
    283  1.1  mrg 
    284  1.1  mrg /* Try to read the contents of VECTOR_CST CST as a constant permutation
    285  1.1  mrg    vector.  Return true and add the elements to BUILDER on success,
    286  1.1  mrg    otherwise return false without modifying BUILDER.  */
    287  1.1  mrg 
    288  1.1  mrg bool
    289  1.1  mrg tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
    290  1.1  mrg {
    291  1.1  mrg   unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
    292  1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; ++i)
    293  1.1  mrg     if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
    294  1.1  mrg       return false;
    295  1.1  mrg 
    296  1.1  mrg   builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
    297  1.1  mrg 		       VECTOR_CST_NPATTERNS (cst),
    298  1.1  mrg 		       VECTOR_CST_NELTS_PER_PATTERN (cst));
    299  1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; ++i)
    300  1.1  mrg     builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
    301  1.1  mrg   return true;
    302  1.1  mrg }
    303  1.1  mrg 
    304  1.1  mrg /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.  */
    305  1.1  mrg 
    306  1.1  mrg tree
    307  1.1  mrg vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
    308  1.1  mrg {
    309  1.1  mrg   gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
    310  1.1  mrg   tree_vector_builder sel (type, indices.encoding ().npatterns (),
    311  1.1  mrg 			   indices.encoding ().nelts_per_pattern ());
    312  1.1  mrg   unsigned int encoded_nelts = sel.encoded_nelts ();
    313  1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; i++)
    314  1.1  mrg     sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
    315  1.1  mrg   return sel.build ();
    316  1.1  mrg }
    317  1.1  mrg 
    318  1.1  mrg /* Return a CONST_VECTOR of mode MODE that contains the elements of
    319  1.1  mrg    INDICES.  */
    320  1.1  mrg 
    321  1.1  mrg rtx
    322  1.1  mrg vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
    323  1.1  mrg {
    324  1.1  mrg   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
    325  1.1  mrg 	      && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
    326  1.1  mrg   rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
    327  1.1  mrg 			  indices.encoding ().nelts_per_pattern ());
    328  1.1  mrg   unsigned int encoded_nelts = sel.encoded_nelts ();
    329  1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; i++)
    330  1.1  mrg     sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
    331  1.1  mrg   return sel.build ();
    332  1.1  mrg }
    333  1.1  mrg 
    334  1.1  mrg #if CHECKING_P
    335  1.1  mrg 
    336  1.1  mrg namespace selftest {
    337  1.1  mrg 
    338  1.1  mrg /* Test a 12-element vector.  */
    339  1.1  mrg 
    340  1.1  mrg static void
    341  1.1  mrg test_vec_perm_12 (void)
    342  1.1  mrg {
    343  1.1  mrg   vec_perm_builder builder (12, 12, 1);
    344  1.1  mrg   for (unsigned int i = 0; i < 4; ++i)
    345  1.1  mrg     {
    346  1.1  mrg       builder.quick_push (i * 5);
    347  1.1  mrg       builder.quick_push (3 + i);
    348  1.1  mrg       builder.quick_push (2 + 3 * i);
    349  1.1  mrg     }
    350  1.1  mrg   vec_perm_indices indices (builder, 1, 12);
    351  1.1  mrg   ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
    352  1.1  mrg   ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
    353  1.1  mrg   ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
    354  1.1  mrg   ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
    355  1.1  mrg   ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
    356  1.1  mrg 
    357  1.1  mrg   ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
    358  1.1  mrg   ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
    359  1.1  mrg 
    360  1.1  mrg   ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
    361  1.1  mrg   ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
    362  1.1  mrg 
    363  1.1  mrg   ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
    364  1.1  mrg   ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
    365  1.1  mrg 
    366  1.1  mrg   ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
    367  1.1  mrg   ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
    368  1.1  mrg   ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
    369  1.1  mrg }
    370  1.1  mrg 
    371  1.1  mrg /* Run selftests for this file.  */
    372  1.1  mrg 
    373  1.1  mrg void
    374  1.1  mrg vec_perm_indices_cc_tests ()
    375  1.1  mrg {
    376  1.1  mrg   test_vec_perm_12 ();
    377  1.1  mrg }
    378  1.1  mrg 
    379  1.1  mrg } // namespace selftest
    380  1.1  mrg 
    381  1.1  mrg #endif
    382