1 1.1 mrg /* A representation of vector permutation indices. 2 1.1 mrg Copyright (C) 2017-2022 Free Software Foundation, Inc. 3 1.1 mrg 4 1.1 mrg This file is part of GCC. 5 1.1 mrg 6 1.1 mrg GCC is free software; you can redistribute it and/or modify it under 7 1.1 mrg the terms of the GNU General Public License as published by the Free 8 1.1 mrg Software Foundation; either version 3, or (at your option) any later 9 1.1 mrg version. 10 1.1 mrg 11 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 1.1 mrg for more details. 15 1.1 mrg 16 1.1 mrg You should have received a copy of the GNU General Public License 17 1.1 mrg along with GCC; see the file COPYING3. If not see 18 1.1 mrg <http://www.gnu.org/licenses/>. */ 19 1.1 mrg 20 1.1 mrg #include "config.h" 21 1.1 mrg #include "system.h" 22 1.1 mrg #include "coretypes.h" 23 1.1 mrg #include "vec-perm-indices.h" 24 1.1 mrg #include "tree.h" 25 1.1 mrg #include "fold-const.h" 26 1.1 mrg #include "tree-vector-builder.h" 27 1.1 mrg #include "backend.h" 28 1.1 mrg #include "rtl.h" 29 1.1 mrg #include "memmodel.h" 30 1.1 mrg #include "emit-rtl.h" 31 1.1 mrg #include "selftest.h" 32 1.1 mrg #include "rtx-vector-builder.h" 33 1.1 mrg 34 1.1 mrg /* Switch to a new permutation vector that selects between NINPUTS vector 35 1.1 mrg inputs that have NELTS_PER_INPUT elements each. Take the elements of the 36 1.1 mrg new permutation vector from ELEMENTS, clamping each one to be in range. */ 37 1.1 mrg 38 1.1 mrg void 39 1.1 mrg vec_perm_indices::new_vector (const vec_perm_builder &elements, 40 1.1 mrg unsigned int ninputs, 41 1.1 mrg poly_uint64 nelts_per_input) 42 1.1 mrg { 43 1.1 mrg m_ninputs = ninputs; 44 1.1 mrg m_nelts_per_input = nelts_per_input; 45 1.1 mrg /* If the vector has a constant number of elements, expand the 46 1.1 mrg encoding and clamp each element. E.g. { 0, 2, 4, ... } might 47 1.1 mrg wrap halfway if there is only one vector input, and we want 48 1.1 mrg the wrapped form to be the canonical one. 49 1.1 mrg 50 1.1 mrg If the vector has a variable number of elements, just copy 51 1.1 mrg the encoding. In that case the unwrapped form is canonical 52 1.1 mrg and there is no way of representing the wrapped form. */ 53 1.1 mrg poly_uint64 full_nelts = elements.full_nelts (); 54 1.1 mrg unsigned HOST_WIDE_INT copy_nelts; 55 1.1 mrg if (full_nelts.is_constant (©_nelts)) 56 1.1 mrg m_encoding.new_vector (full_nelts, copy_nelts, 1); 57 1.1 mrg else 58 1.1 mrg { 59 1.1 mrg copy_nelts = elements.encoded_nelts (); 60 1.1 mrg m_encoding.new_vector (full_nelts, elements.npatterns (), 61 1.1 mrg elements.nelts_per_pattern ()); 62 1.1 mrg } 63 1.1 mrg unsigned int npatterns = m_encoding.npatterns (); 64 1.1 mrg for (unsigned int i = 0; i < npatterns; ++i) 65 1.1 mrg m_encoding.quick_push (clamp (elements.elt (i))); 66 1.1 mrg /* Use the fact that: 67 1.1 mrg 68 1.1 mrg (a + b) % c == ((a % c) + (b % c)) % c 69 1.1 mrg 70 1.1 mrg to simplify the clamping of variable-length vectors. */ 71 1.1 mrg for (unsigned int i = npatterns; i < copy_nelts; ++i) 72 1.1 mrg { 73 1.1 mrg element_type step = clamp (elements.elt (i) 74 1.1 mrg - elements.elt (i - npatterns)); 75 1.1 mrg m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step)); 76 1.1 mrg } 77 1.1 mrg m_encoding.finalize (); 78 1.1 mrg } 79 1.1 mrg 80 1.1 mrg /* Switch to a new permutation vector that selects the same input elements 81 1.1 mrg as ORIG, but with each element split into FACTOR pieces. For example, 82 1.1 mrg if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is 83 1.1 mrg { 2, 3, 4, 5, 0, 1, 6, 7 }. */ 84 1.1 mrg 85 1.1 mrg void 86 1.1 mrg vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig, 87 1.1 mrg unsigned int factor) 88 1.1 mrg { 89 1.1 mrg m_ninputs = orig.m_ninputs; 90 1.1 mrg m_nelts_per_input = orig.m_nelts_per_input * factor; 91 1.1 mrg m_encoding.new_vector (orig.m_encoding.full_nelts () * factor, 92 1.1 mrg orig.m_encoding.npatterns () * factor, 93 1.1 mrg orig.m_encoding.nelts_per_pattern ()); 94 1.1 mrg unsigned int encoded_nelts = orig.m_encoding.encoded_nelts (); 95 1.1 mrg for (unsigned int i = 0; i < encoded_nelts; ++i) 96 1.1 mrg { 97 1.1 mrg element_type base = orig.m_encoding[i] * factor; 98 1.1 mrg for (unsigned int j = 0; j < factor; ++j) 99 1.1 mrg m_encoding.quick_push (base + j); 100 1.1 mrg } 101 1.1 mrg m_encoding.finalize (); 102 1.1 mrg } 103 1.1 mrg 104 1.1 mrg /* Check whether we can switch to a new permutation vector that 105 1.1 mrg selects the same input elements as ORIG, but with each element 106 1.1 mrg built up from FACTOR pieces. Return true if yes, otherwise 107 1.1 mrg return false. Every FACTOR permutation indexes should be 108 1.1 mrg continuous separately and the first one of each batch should 109 1.1 mrg be able to exactly modulo FACTOR. For example, if ORIG is 110 1.1 mrg { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation 111 1.1 mrg is { 1, 2, 0, 3 }. */ 112 1.1 mrg 113 1.1 mrg bool 114 1.1 mrg vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig, 115 1.1 mrg unsigned int factor) 116 1.1 mrg { 117 1.1 mrg gcc_assert (factor > 0); 118 1.1 mrg 119 1.1 mrg if (maybe_lt (orig.m_nelts_per_input, factor)) 120 1.1 mrg return false; 121 1.1 mrg 122 1.1 mrg poly_uint64 nelts; 123 1.1 mrg /* Invalid if vector units number isn't multiple of factor. */ 124 1.1 mrg if (!multiple_p (orig.m_nelts_per_input, factor, &nelts)) 125 1.1 mrg return false; 126 1.1 mrg 127 1.1 mrg /* Only handle the case that npatterns is multiple of factor. 128 1.1 mrg FIXME: Try to see whether we can reshape it by factor npatterns. */ 129 1.1 mrg if (orig.m_encoding.npatterns () % factor != 0) 130 1.1 mrg return false; 131 1.1 mrg 132 1.1 mrg unsigned int encoded_nelts = orig.m_encoding.encoded_nelts (); 133 1.1 mrg auto_vec<element_type, 32> encoding (encoded_nelts); 134 1.1 mrg /* Separate all encoded elements into batches by size factor, 135 1.1 mrg then ensure the first element of each batch is multiple of 136 1.1 mrg factor and all elements in each batch is consecutive from 137 1.1 mrg the first one. */ 138 1.1 mrg for (unsigned int i = 0; i < encoded_nelts; i += factor) 139 1.1 mrg { 140 1.1 mrg element_type first = orig.m_encoding[i]; 141 1.1 mrg element_type new_index; 142 1.1 mrg if (!multiple_p (first, factor, &new_index)) 143 1.1 mrg return false; 144 1.1 mrg for (unsigned int j = 1; j < factor; ++j) 145 1.1 mrg if (maybe_ne (first + j, orig.m_encoding[i + j])) 146 1.1 mrg return false; 147 1.1 mrg encoding.quick_push (new_index); 148 1.1 mrg } 149 1.1 mrg 150 1.1 mrg m_ninputs = orig.m_ninputs; 151 1.1 mrg m_nelts_per_input = nelts; 152 1.1 mrg poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor); 153 1.1 mrg unsigned int npatterns = orig.m_encoding.npatterns () / factor; 154 1.1 mrg 155 1.1 mrg m_encoding.new_vector (full_nelts, npatterns, 156 1.1 mrg orig.m_encoding.nelts_per_pattern ()); 157 1.1 mrg m_encoding.splice (encoding); 158 1.1 mrg m_encoding.finalize (); 159 1.1 mrg 160 1.1 mrg return true; 161 1.1 mrg } 162 1.1 mrg 163 1.1 mrg /* Rotate the inputs of the permutation right by DELTA inputs. This changes 164 1.1 mrg the values of the permutation vector but it doesn't change the way that 165 1.1 mrg the elements are encoded. */ 166 1.1 mrg 167 1.1 mrg void 168 1.1 mrg vec_perm_indices::rotate_inputs (int delta) 169 1.1 mrg { 170 1.1 mrg element_type element_delta = delta * m_nelts_per_input; 171 1.1 mrg for (unsigned int i = 0; i < m_encoding.length (); ++i) 172 1.1 mrg m_encoding[i] = clamp (m_encoding[i] + element_delta); 173 1.1 mrg } 174 1.1 mrg 175 1.1 mrg /* Return true if index OUT_BASE + I * OUT_STEP selects input 176 1.1 mrg element IN_BASE + I * IN_STEP. For example, the call to test 177 1.1 mrg whether a permute reverses a vector of N elements would be: 178 1.1 mrg 179 1.1 mrg series_p (0, 1, N - 1, -1) 180 1.1 mrg 181 1.1 mrg which would return true for { N - 1, N - 2, N - 3, ... }. 182 1.1 mrg The calls to test for an interleaving of elements starting 183 1.1 mrg at N1 and N2 would be: 184 1.1 mrg 185 1.1 mrg series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1). 186 1.1 mrg 187 1.1 mrg which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */ 188 1.1 mrg 189 1.1 mrg bool 190 1.1 mrg vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, 191 1.1 mrg element_type in_base, element_type in_step) const 192 1.1 mrg { 193 1.1 mrg /* Check the base value. */ 194 1.1 mrg if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base))) 195 1.1 mrg return false; 196 1.1 mrg 197 1.1 mrg element_type full_nelts = m_encoding.full_nelts (); 198 1.1 mrg unsigned int npatterns = m_encoding.npatterns (); 199 1.1 mrg 200 1.1 mrg /* Calculate which multiple of OUT_STEP elements we need to get 201 1.1 mrg back to the same pattern. */ 202 1.1 mrg unsigned int cycle_length = least_common_multiple (out_step, npatterns); 203 1.1 mrg 204 1.1 mrg /* Check the steps. */ 205 1.1 mrg in_step = clamp (in_step); 206 1.1 mrg out_base += out_step; 207 1.1 mrg unsigned int limit = 0; 208 1.1 mrg for (;;) 209 1.1 mrg { 210 1.1 mrg /* Succeed if we've checked all the elements in the vector. */ 211 1.1 mrg if (known_ge (out_base, full_nelts)) 212 1.1 mrg return true; 213 1.1 mrg 214 1.1 mrg if (out_base >= npatterns) 215 1.1 mrg { 216 1.1 mrg /* We've got to the end of the "foreground" values. Check 217 1.1 mrg 2 elements from each pattern in the "background" values. */ 218 1.1 mrg if (limit == 0) 219 1.1 mrg limit = out_base + cycle_length * 2; 220 1.1 mrg else if (out_base >= limit) 221 1.1 mrg return true; 222 1.1 mrg } 223 1.1 mrg 224 1.1 mrg element_type v0 = m_encoding.elt (out_base - out_step); 225 1.1 mrg element_type v1 = m_encoding.elt (out_base); 226 1.1 mrg if (maybe_ne (clamp (v1 - v0), in_step)) 227 1.1 mrg return false; 228 1.1 mrg 229 1.1 mrg out_base += out_step; 230 1.1 mrg } 231 1.1 mrg } 232 1.1 mrg 233 1.1 mrg /* Return true if all elements of the permutation vector are in the range 234 1.1 mrg [START, START + SIZE). */ 235 1.1 mrg 236 1.1 mrg bool 237 1.1 mrg vec_perm_indices::all_in_range_p (element_type start, element_type size) const 238 1.1 mrg { 239 1.1 mrg /* Check the first two elements of each pattern. */ 240 1.1 mrg unsigned int npatterns = m_encoding.npatterns (); 241 1.1 mrg unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern (); 242 1.1 mrg unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2); 243 1.1 mrg for (unsigned int i = 0; i < base_nelts; ++i) 244 1.1 mrg if (!known_in_range_p (m_encoding[i], start, size)) 245 1.1 mrg return false; 246 1.1 mrg 247 1.1 mrg /* For stepped encodings, check the full range of the series. */ 248 1.1 mrg if (nelts_per_pattern == 3) 249 1.1 mrg { 250 1.1 mrg element_type limit = input_nelts (); 251 1.1 mrg 252 1.1 mrg /* The number of elements in each pattern beyond the first two 253 1.1 mrg that we checked above. */ 254 1.1 mrg poly_int64 step_nelts = exact_div (m_encoding.full_nelts (), 255 1.1 mrg npatterns) - 2; 256 1.1 mrg for (unsigned int i = 0; i < npatterns; ++i) 257 1.1 mrg { 258 1.1 mrg /* BASE1 has been checked but BASE2 hasn't. */ 259 1.1 mrg element_type base1 = m_encoding[i + npatterns]; 260 1.1 mrg element_type base2 = m_encoding[i + base_nelts]; 261 1.1 mrg 262 1.1 mrg /* The step to add to get from BASE1 to each subsequent value. */ 263 1.1 mrg element_type step = clamp (base2 - base1); 264 1.1 mrg 265 1.1 mrg /* STEP has no inherent sign, so a value near LIMIT can 266 1.1 mrg act as a negative step. The series is in range if it 267 1.1 mrg is in range according to one of the two interpretations. 268 1.1 mrg 269 1.1 mrg Since we're dealing with clamped values, ELEMENT_TYPE is 270 1.1 mrg wide enough for overflow not to be a problem. */ 271 1.1 mrg element_type headroom_down = base1 - start; 272 1.1 mrg element_type headroom_up = size - headroom_down - 1; 273 1.1 mrg HOST_WIDE_INT diff; 274 1.1 mrg if ((!step.is_constant (&diff) 275 1.1 mrg || maybe_lt (headroom_up, diff * step_nelts)) 276 1.1 mrg && (!(limit - step).is_constant (&diff) 277 1.1 mrg || maybe_lt (headroom_down, diff * step_nelts))) 278 1.1 mrg return false; 279 1.1 mrg } 280 1.1 mrg } 281 1.1 mrg return true; 282 1.1 mrg } 283 1.1 mrg 284 1.1 mrg /* Try to read the contents of VECTOR_CST CST as a constant permutation 285 1.1 mrg vector. Return true and add the elements to BUILDER on success, 286 1.1 mrg otherwise return false without modifying BUILDER. */ 287 1.1 mrg 288 1.1 mrg bool 289 1.1 mrg tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst) 290 1.1 mrg { 291 1.1 mrg unsigned int encoded_nelts = vector_cst_encoded_nelts (cst); 292 1.1 mrg for (unsigned int i = 0; i < encoded_nelts; ++i) 293 1.1 mrg if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i))) 294 1.1 mrg return false; 295 1.1 mrg 296 1.1 mrg builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)), 297 1.1 mrg VECTOR_CST_NPATTERNS (cst), 298 1.1 mrg VECTOR_CST_NELTS_PER_PATTERN (cst)); 299 1.1 mrg for (unsigned int i = 0; i < encoded_nelts; ++i) 300 1.1 mrg builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i))); 301 1.1 mrg return true; 302 1.1 mrg } 303 1.1 mrg 304 1.1 mrg /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */ 305 1.1 mrg 306 1.1 mrg tree 307 1.1 mrg vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices) 308 1.1 mrg { 309 1.1 mrg gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ())); 310 1.1 mrg tree_vector_builder sel (type, indices.encoding ().npatterns (), 311 1.1 mrg indices.encoding ().nelts_per_pattern ()); 312 1.1 mrg unsigned int encoded_nelts = sel.encoded_nelts (); 313 1.1 mrg for (unsigned int i = 0; i < encoded_nelts; i++) 314 1.1 mrg sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i])); 315 1.1 mrg return sel.build (); 316 1.1 mrg } 317 1.1 mrg 318 1.1 mrg /* Return a CONST_VECTOR of mode MODE that contains the elements of 319 1.1 mrg INDICES. */ 320 1.1 mrg 321 1.1 mrg rtx 322 1.1 mrg vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices) 323 1.1 mrg { 324 1.1 mrg gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 325 1.1 mrg && known_eq (GET_MODE_NUNITS (mode), indices.length ())); 326 1.1 mrg rtx_vector_builder sel (mode, indices.encoding ().npatterns (), 327 1.1 mrg indices.encoding ().nelts_per_pattern ()); 328 1.1 mrg unsigned int encoded_nelts = sel.encoded_nelts (); 329 1.1 mrg for (unsigned int i = 0; i < encoded_nelts; i++) 330 1.1 mrg sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode))); 331 1.1 mrg return sel.build (); 332 1.1 mrg } 333 1.1 mrg 334 1.1 mrg #if CHECKING_P 335 1.1 mrg 336 1.1 mrg namespace selftest { 337 1.1 mrg 338 1.1 mrg /* Test a 12-element vector. */ 339 1.1 mrg 340 1.1 mrg static void 341 1.1 mrg test_vec_perm_12 (void) 342 1.1 mrg { 343 1.1 mrg vec_perm_builder builder (12, 12, 1); 344 1.1 mrg for (unsigned int i = 0; i < 4; ++i) 345 1.1 mrg { 346 1.1 mrg builder.quick_push (i * 5); 347 1.1 mrg builder.quick_push (3 + i); 348 1.1 mrg builder.quick_push (2 + 3 * i); 349 1.1 mrg } 350 1.1 mrg vec_perm_indices indices (builder, 1, 12); 351 1.1 mrg ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); 352 1.1 mrg ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); 353 1.1 mrg ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); 354 1.1 mrg ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); 355 1.1 mrg ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); 356 1.1 mrg 357 1.1 mrg ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); 358 1.1 mrg ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); 359 1.1 mrg 360 1.1 mrg ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); 361 1.1 mrg ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); 362 1.1 mrg 363 1.1 mrg ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); 364 1.1 mrg ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); 365 1.1 mrg 366 1.1 mrg ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); 367 1.1 mrg ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); 368 1.1 mrg ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); 369 1.1 mrg } 370 1.1 mrg 371 1.1 mrg /* Run selftests for this file. */ 372 1.1 mrg 373 1.1 mrg void 374 1.1 mrg vec_perm_indices_cc_tests () 375 1.1 mrg { 376 1.1 mrg test_vec_perm_12 (); 377 1.1 mrg } 378 1.1 mrg 379 1.1 mrg } // namespace selftest 380 1.1 mrg 381 1.1 mrg #endif 382