dist/gcc/vec-perm-indices.cc

1.1  mrg /* A representation of vector permutation indices.
1.1  mrg    Copyright (C) 2017-2022 Free Software Foundation, Inc.
1.1  mrg
1.1  mrg This file is part of GCC.
1.1  mrg
1.1  mrg GCC is free software; you can redistribute it and/or modify it under
1.1  mrg the terms of the GNU General Public License as published by the Free
1.1  mrg Software Foundation; either version 3, or (at your option) any later
1.1  mrg version.
1.1  mrg
1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1.1  mrg for more details.
1.1  mrg
1.1  mrg You should have received a copy of the GNU General Public License
1.1  mrg along with GCC; see the file COPYING3.  If not see
1.1  mrg <http://www.gnu.org/licenses/>.  */
1.1  mrg
1.1  mrg #include "config.h"
1.1  mrg #include "system.h"
1.1  mrg #include "coretypes.h"
1.1  mrg #include "vec-perm-indices.h"
1.1  mrg #include "tree.h"
1.1  mrg #include "fold-const.h"
1.1  mrg #include "tree-vector-builder.h"
1.1  mrg #include "backend.h"
1.1  mrg #include "rtl.h"
1.1  mrg #include "memmodel.h"
1.1  mrg #include "emit-rtl.h"
1.1  mrg #include "selftest.h"
1.1  mrg #include "rtx-vector-builder.h"
1.1  mrg
1.1  mrg /* Switch to a new permutation vector that selects between NINPUTS vector
1.1  mrg    inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
1.1  mrg    new permutation vector from ELEMENTS, clamping each one to be in range.  */
1.1  mrg
1.1  mrg void
1.1  mrg vec_perm_indices::new_vector (const vec_perm_builder &elements,
1.1  mrg 			      unsigned int ninputs,
1.1  mrg 			      poly_uint64 nelts_per_input)
1.1  mrg {
1.1  mrg   m_ninputs = ninputs;
1.1  mrg   m_nelts_per_input = nelts_per_input;
1.1  mrg   /* If the vector has a constant number of elements, expand the
1.1  mrg      encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
1.1  mrg      wrap halfway if there is only one vector input, and we want
1.1  mrg      the wrapped form to be the canonical one.
1.1  mrg
1.1  mrg      If the vector has a variable number of elements, just copy
1.1  mrg      the encoding.  In that case the unwrapped form is canonical
1.1  mrg      and there is no way of representing the wrapped form.  */
1.1  mrg   poly_uint64 full_nelts = elements.full_nelts ();
1.1  mrg   unsigned HOST_WIDE_INT copy_nelts;
1.1  mrg   if (full_nelts.is_constant (&copy_nelts))
1.1  mrg     m_encoding.new_vector (full_nelts, copy_nelts, 1);
1.1  mrg   else
1.1  mrg     {
1.1  mrg       copy_nelts = elements.encoded_nelts ();
1.1  mrg       m_encoding.new_vector (full_nelts, elements.npatterns (),
1.1  mrg 			     elements.nelts_per_pattern ());
1.1  mrg     }
1.1  mrg   unsigned int npatterns = m_encoding.npatterns ();
1.1  mrg   for (unsigned int i = 0; i < npatterns; ++i)
1.1  mrg     m_encoding.quick_push (clamp (elements.elt (i)));
1.1  mrg   /* Use the fact that:
1.1  mrg
1.1  mrg 	(a + b) % c == ((a % c) + (b % c)) % c
1.1  mrg
1.1  mrg      to simplify the clamping of variable-length vectors.  */
1.1  mrg   for (unsigned int i = npatterns; i < copy_nelts; ++i)
1.1  mrg     {
1.1  mrg       element_type step = clamp (elements.elt (i)
1.1  mrg 				 - elements.elt (i - npatterns));
1.1  mrg       m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
1.1  mrg     }
1.1  mrg   m_encoding.finalize ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Switch to a new permutation vector that selects the same input elements
1.1  mrg    as ORIG, but with each element split into FACTOR pieces.  For example,
1.1  mrg    if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
1.1  mrg    { 2, 3, 4, 5, 0, 1, 6, 7 }.  */
1.1  mrg
1.1  mrg void
1.1  mrg vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
1.1  mrg 				       unsigned int factor)
1.1  mrg {
1.1  mrg   m_ninputs = orig.m_ninputs;
1.1  mrg   m_nelts_per_input = orig.m_nelts_per_input * factor;
1.1  mrg   m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
1.1  mrg 			 orig.m_encoding.npatterns () * factor,
1.1  mrg 			 orig.m_encoding.nelts_per_pattern ());
1.1  mrg   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; ++i)
1.1  mrg     {
1.1  mrg       element_type base = orig.m_encoding[i] * factor;
1.1  mrg       for (unsigned int j = 0; j < factor; ++j)
1.1  mrg 	m_encoding.quick_push (base + j);
1.1  mrg     }
1.1  mrg   m_encoding.finalize ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Check whether we can switch to a new permutation vector that
1.1  mrg    selects the same input elements as ORIG, but with each element
1.1  mrg    built up from FACTOR pieces.  Return true if yes, otherwise
1.1  mrg    return false.  Every FACTOR permutation indexes should be
1.1  mrg    continuous separately and the first one of each batch should
1.1  mrg    be able to exactly modulo FACTOR.  For example, if ORIG is
1.1  mrg    { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
1.1  mrg    is { 1, 2, 0, 3 }.  */
1.1  mrg
1.1  mrg bool
1.1  mrg vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
1.1  mrg 				     unsigned int factor)
1.1  mrg {
1.1  mrg   gcc_assert (factor > 0);
1.1  mrg
1.1  mrg   if (maybe_lt (orig.m_nelts_per_input, factor))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   poly_uint64 nelts;
1.1  mrg   /* Invalid if vector units number isn't multiple of factor.  */
1.1  mrg   if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* Only handle the case that npatterns is multiple of factor.
1.1  mrg      FIXME: Try to see whether we can reshape it by factor npatterns.  */
1.1  mrg   if (orig.m_encoding.npatterns () % factor != 0)
1.1  mrg     return false;
1.1  mrg
1.1  mrg   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
1.1  mrg   auto_vec<element_type, 32> encoding (encoded_nelts);
1.1  mrg   /* Separate all encoded elements into batches by size factor,
1.1  mrg      then ensure the first element of each batch is multiple of
1.1  mrg      factor and all elements in each batch is consecutive from
1.1  mrg      the first one.  */
1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; i += factor)
1.1  mrg     {
1.1  mrg       element_type first = orig.m_encoding[i];
1.1  mrg       element_type new_index;
1.1  mrg       if (!multiple_p (first, factor, &new_index))
1.1  mrg 	return false;
1.1  mrg       for (unsigned int j = 1; j < factor; ++j)
1.1  mrg 	if (maybe_ne (first + j, orig.m_encoding[i + j]))
1.1  mrg 	  return false;
1.1  mrg       encoding.quick_push (new_index);
1.1  mrg     }
1.1  mrg
1.1  mrg   m_ninputs = orig.m_ninputs;
1.1  mrg   m_nelts_per_input = nelts;
1.1  mrg   poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
1.1  mrg   unsigned int npatterns = orig.m_encoding.npatterns () / factor;
1.1  mrg
1.1  mrg   m_encoding.new_vector (full_nelts, npatterns,
1.1  mrg 			 orig.m_encoding.nelts_per_pattern ());
1.1  mrg   m_encoding.splice (encoding);
1.1  mrg   m_encoding.finalize ();
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Rotate the inputs of the permutation right by DELTA inputs.  This changes
1.1  mrg    the values of the permutation vector but it doesn't change the way that
1.1  mrg    the elements are encoded.  */
1.1  mrg
1.1  mrg void
1.1  mrg vec_perm_indices::rotate_inputs (int delta)
1.1  mrg {
1.1  mrg   element_type element_delta = delta * m_nelts_per_input;
1.1  mrg   for (unsigned int i = 0; i < m_encoding.length (); ++i)
1.1  mrg     m_encoding[i] = clamp (m_encoding[i] + element_delta);
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true if index OUT_BASE + I * OUT_STEP selects input
1.1  mrg    element IN_BASE + I * IN_STEP.  For example, the call to test
1.1  mrg    whether a permute reverses a vector of N elements would be:
1.1  mrg
1.1  mrg      series_p (0, 1, N - 1, -1)
1.1  mrg
1.1  mrg    which would return true for { N - 1, N - 2, N - 3, ... }.
1.1  mrg    The calls to test for an interleaving of elements starting
1.1  mrg    at N1 and N2 would be:
1.1  mrg
1.1  mrg      series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
1.1  mrg
1.1  mrg    which would return true for { N1, N2, N1 + 1, N2 + 1, ... }.  */
1.1  mrg
1.1  mrg bool
1.1  mrg vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
1.1  mrg 			    element_type in_base, element_type in_step) const
1.1  mrg {
1.1  mrg   /* Check the base value.  */
1.1  mrg   if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
1.1  mrg     return false;
1.1  mrg
1.1  mrg   element_type full_nelts = m_encoding.full_nelts ();
1.1  mrg   unsigned int npatterns = m_encoding.npatterns ();
1.1  mrg
1.1  mrg   /* Calculate which multiple of OUT_STEP elements we need to get
1.1  mrg      back to the same pattern.  */
1.1  mrg   unsigned int cycle_length = least_common_multiple (out_step, npatterns);
1.1  mrg
1.1  mrg   /* Check the steps.  */
1.1  mrg   in_step = clamp (in_step);
1.1  mrg   out_base += out_step;
1.1  mrg   unsigned int limit = 0;
1.1  mrg   for (;;)
1.1  mrg     {
1.1  mrg       /* Succeed if we've checked all the elements in the vector.  */
1.1  mrg       if (known_ge (out_base, full_nelts))
1.1  mrg 	return true;
1.1  mrg
1.1  mrg       if (out_base >= npatterns)
1.1  mrg 	{
1.1  mrg 	  /* We've got to the end of the "foreground" values.  Check
1.1  mrg 	     2 elements from each pattern in the "background" values.  */
1.1  mrg 	  if (limit == 0)
1.1  mrg 	    limit = out_base + cycle_length * 2;
1.1  mrg 	  else if (out_base >= limit)
1.1  mrg 	    return true;
1.1  mrg 	}
1.1  mrg
1.1  mrg       element_type v0 = m_encoding.elt (out_base - out_step);
1.1  mrg       element_type v1 = m_encoding.elt (out_base);
1.1  mrg       if (maybe_ne (clamp (v1 - v0), in_step))
1.1  mrg 	return false;
1.1  mrg
1.1  mrg       out_base += out_step;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true if all elements of the permutation vector are in the range
1.1  mrg    [START, START + SIZE).  */
1.1  mrg
1.1  mrg bool
1.1  mrg vec_perm_indices::all_in_range_p (element_type start, element_type size) const
1.1  mrg {
1.1  mrg   /* Check the first two elements of each pattern.  */
1.1  mrg   unsigned int npatterns = m_encoding.npatterns ();
1.1  mrg   unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
1.1  mrg   unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
1.1  mrg   for (unsigned int i = 0; i < base_nelts; ++i)
1.1  mrg     if (!known_in_range_p (m_encoding[i], start, size))
1.1  mrg       return false;
1.1  mrg
1.1  mrg   /* For stepped encodings, check the full range of the series.  */
1.1  mrg   if (nelts_per_pattern == 3)
1.1  mrg     {
1.1  mrg       element_type limit = input_nelts ();
1.1  mrg
1.1  mrg       /* The number of elements in each pattern beyond the first two
1.1  mrg 	 that we checked above.  */
1.1  mrg       poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
1.1  mrg 					 npatterns) - 2;
1.1  mrg       for (unsigned int i = 0; i < npatterns; ++i)
1.1  mrg 	{
1.1  mrg 	  /* BASE1 has been checked but BASE2 hasn't.   */
1.1  mrg 	  element_type base1 = m_encoding[i + npatterns];
1.1  mrg 	  element_type base2 = m_encoding[i + base_nelts];
1.1  mrg
1.1  mrg 	  /* The step to add to get from BASE1 to each subsequent value.  */
1.1  mrg 	  element_type step = clamp (base2 - base1);
1.1  mrg
1.1  mrg 	  /* STEP has no inherent sign, so a value near LIMIT can
1.1  mrg 	     act as a negative step.  The series is in range if it
1.1  mrg 	     is in range according to one of the two interpretations.
1.1  mrg
1.1  mrg 	     Since we're dealing with clamped values, ELEMENT_TYPE is
1.1  mrg 	     wide enough for overflow not to be a problem.  */
1.1  mrg 	  element_type headroom_down = base1 - start;
1.1  mrg 	  element_type headroom_up = size - headroom_down - 1;
1.1  mrg 	  HOST_WIDE_INT diff;
1.1  mrg 	  if ((!step.is_constant (&diff)
1.1  mrg 	       || maybe_lt (headroom_up, diff * step_nelts))
1.1  mrg 	      && (!(limit - step).is_constant (&diff)
1.1  mrg 		  || maybe_lt (headroom_down, diff * step_nelts)))
1.1  mrg 	    return false;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Try to read the contents of VECTOR_CST CST as a constant permutation
1.1  mrg    vector.  Return true and add the elements to BUILDER on success,
1.1  mrg    otherwise return false without modifying BUILDER.  */
1.1  mrg
1.1  mrg bool
1.1  mrg tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
1.1  mrg {
1.1  mrg   unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; ++i)
1.1  mrg     if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
1.1  mrg       return false;
1.1  mrg
1.1  mrg   builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
1.1  mrg 		       VECTOR_CST_NPATTERNS (cst),
1.1  mrg 		       VECTOR_CST_NELTS_PER_PATTERN (cst));
1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; ++i)
1.1  mrg     builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.  */
1.1  mrg
1.1  mrg tree
1.1  mrg vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
1.1  mrg {
1.1  mrg   gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
1.1  mrg   tree_vector_builder sel (type, indices.encoding ().npatterns (),
1.1  mrg 			   indices.encoding ().nelts_per_pattern ());
1.1  mrg   unsigned int encoded_nelts = sel.encoded_nelts ();
1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; i++)
1.1  mrg     sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
1.1  mrg   return sel.build ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Return a CONST_VECTOR of mode MODE that contains the elements of
1.1  mrg    INDICES.  */
1.1  mrg
1.1  mrg rtx
1.1  mrg vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
1.1  mrg {
1.1  mrg   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1.1  mrg 	      && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
1.1  mrg   rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
1.1  mrg 			  indices.encoding ().nelts_per_pattern ());
1.1  mrg   unsigned int encoded_nelts = sel.encoded_nelts ();
1.1  mrg   for (unsigned int i = 0; i < encoded_nelts; i++)
1.1  mrg     sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
1.1  mrg   return sel.build ();
1.1  mrg }
1.1  mrg
1.1  mrg #if CHECKING_P
1.1  mrg
1.1  mrg namespace selftest {
1.1  mrg
1.1  mrg /* Test a 12-element vector.  */
1.1  mrg
1.1  mrg static void
1.1  mrg test_vec_perm_12 (void)
1.1  mrg {
1.1  mrg   vec_perm_builder builder (12, 12, 1);
1.1  mrg   for (unsigned int i = 0; i < 4; ++i)
1.1  mrg     {
1.1  mrg       builder.quick_push (i * 5);
1.1  mrg       builder.quick_push (3 + i);
1.1  mrg       builder.quick_push (2 + 3 * i);
1.1  mrg     }
1.1  mrg   vec_perm_indices indices (builder, 1, 12);
1.1  mrg   ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
1.1  mrg   ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
1.1  mrg   ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
1.1  mrg   ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
1.1  mrg   ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
1.1  mrg
1.1  mrg   ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
1.1  mrg   ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
1.1  mrg
1.1  mrg   ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
1.1  mrg   ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
1.1  mrg
1.1  mrg   ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
1.1  mrg   ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
1.1  mrg
1.1  mrg   ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
1.1  mrg   ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
1.1  mrg   ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
1.1  mrg }
1.1  mrg
1.1  mrg /* Run selftests for this file.  */
1.1  mrg
1.1  mrg void
1.1  mrg vec_perm_indices_cc_tests ()
1.1  mrg {
1.1  mrg   test_vec_perm_12 ();
1.1  mrg }
1.1  mrg
1.1  mrg } // namespace selftest
1.1  mrg
1.1  mrg #endif