1 1.1 mrg /* ACLE support for AArch64 SVE (function_base classes) 2 1.1.1.2 mrg Copyright (C) 2018-2022 Free Software Foundation, Inc. 3 1.1 mrg 4 1.1 mrg This file is part of GCC. 5 1.1 mrg 6 1.1 mrg GCC is free software; you can redistribute it and/or modify it 7 1.1 mrg under the terms of the GNU General Public License as published by 8 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 9 1.1 mrg any later version. 10 1.1 mrg 11 1.1 mrg GCC is distributed in the hope that it will be useful, but 12 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of 13 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 1.1 mrg General Public License for more details. 15 1.1 mrg 16 1.1 mrg You should have received a copy of the GNU General Public License 17 1.1 mrg along with GCC; see the file COPYING3. If not see 18 1.1 mrg <http://www.gnu.org/licenses/>. */ 19 1.1 mrg 20 1.1 mrg #ifndef GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H 21 1.1 mrg #define GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H 22 1.1 mrg 23 1.1 mrg namespace aarch64_sve { 24 1.1 mrg 25 1.1 mrg /* Wrap T, which is derived from function_base, and indicate that the 26 1.1 mrg function never has side effects. It is only necessary to use this 27 1.1 mrg wrapper on functions that might have floating-point suffixes, since 28 1.1 mrg otherwise we assume by default that the function has no side effects. */ 29 1.1 mrg template<typename T> 30 1.1 mrg class quiet : public T 31 1.1 mrg { 32 1.1 mrg public: 33 1.1 mrg CONSTEXPR quiet () : T () {} 34 1.1 mrg 35 1.1 mrg /* Unfortunately we can't use parameter packs yet. */ 36 1.1 mrg template<typename T1> 37 1.1 mrg CONSTEXPR quiet (const T1 &t1) : T (t1) {} 38 1.1 mrg 39 1.1 mrg template<typename T1, typename T2> 40 1.1 mrg CONSTEXPR quiet (const T1 &t1, const T2 &t2) : T (t1, t2) {} 41 1.1 mrg 42 1.1 mrg template<typename T1, typename T2, typename T3> 43 1.1 mrg CONSTEXPR quiet (const T1 &t1, const T2 &t2, const T3 &t3) 44 1.1 mrg : T (t1, t2, t3) {} 45 1.1 mrg 46 1.1 mrg unsigned int 47 1.1 mrg call_properties (const function_instance &) const OVERRIDE 48 1.1 mrg { 49 1.1 mrg return 0; 50 1.1 mrg } 51 1.1 mrg }; 52 1.1 mrg 53 1.1 mrg /* A function_base that sometimes or always operates on tuples of 54 1.1 mrg vectors. */ 55 1.1 mrg class multi_vector_function : public function_base 56 1.1 mrg { 57 1.1 mrg public: 58 1.1 mrg CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple) 59 1.1 mrg : m_vectors_per_tuple (vectors_per_tuple) {} 60 1.1 mrg 61 1.1 mrg unsigned int 62 1.1 mrg vectors_per_tuple () const OVERRIDE 63 1.1 mrg { 64 1.1 mrg return m_vectors_per_tuple; 65 1.1 mrg } 66 1.1 mrg 67 1.1 mrg /* The number of vectors in a tuple, or 1 if the function only operates 68 1.1 mrg on single vectors. */ 69 1.1 mrg unsigned int m_vectors_per_tuple; 70 1.1 mrg }; 71 1.1 mrg 72 1.1 mrg /* A function_base that loads or stores contiguous memory elements 73 1.1 mrg without extending or truncating them. */ 74 1.1 mrg class full_width_access : public multi_vector_function 75 1.1 mrg { 76 1.1 mrg public: 77 1.1 mrg CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1) 78 1.1 mrg : multi_vector_function (vectors_per_tuple) {} 79 1.1 mrg 80 1.1 mrg tree 81 1.1 mrg memory_scalar_type (const function_instance &fi) const OVERRIDE 82 1.1 mrg { 83 1.1 mrg return fi.scalar_type (0); 84 1.1 mrg } 85 1.1 mrg 86 1.1 mrg machine_mode 87 1.1 mrg memory_vector_mode (const function_instance &fi) const OVERRIDE 88 1.1 mrg { 89 1.1 mrg machine_mode mode = fi.vector_mode (0); 90 1.1 mrg if (m_vectors_per_tuple != 1) 91 1.1 mrg mode = targetm.array_mode (mode, m_vectors_per_tuple).require (); 92 1.1 mrg return mode; 93 1.1 mrg } 94 1.1 mrg }; 95 1.1 mrg 96 1.1 mrg /* A function_base that loads elements from memory and extends them 97 1.1 mrg to a wider element. The memory element type is a fixed part of 98 1.1 mrg the function base name. */ 99 1.1 mrg class extending_load : public function_base 100 1.1 mrg { 101 1.1 mrg public: 102 1.1 mrg CONSTEXPR extending_load (type_suffix_index memory_type) 103 1.1 mrg : m_memory_type (memory_type) {} 104 1.1 mrg 105 1.1 mrg unsigned int 106 1.1 mrg call_properties (const function_instance &) const OVERRIDE 107 1.1 mrg { 108 1.1 mrg return CP_READ_MEMORY; 109 1.1 mrg } 110 1.1 mrg 111 1.1 mrg tree 112 1.1 mrg memory_scalar_type (const function_instance &) const OVERRIDE 113 1.1 mrg { 114 1.1 mrg return scalar_types[type_suffixes[m_memory_type].vector_type]; 115 1.1 mrg } 116 1.1 mrg 117 1.1 mrg machine_mode 118 1.1 mrg memory_vector_mode (const function_instance &fi) const OVERRIDE 119 1.1 mrg { 120 1.1 mrg machine_mode mem_mode = type_suffixes[m_memory_type].vector_mode; 121 1.1 mrg machine_mode reg_mode = fi.vector_mode (0); 122 1.1 mrg return aarch64_sve_data_mode (GET_MODE_INNER (mem_mode), 123 1.1 mrg GET_MODE_NUNITS (reg_mode)).require (); 124 1.1 mrg } 125 1.1 mrg 126 1.1 mrg /* Return the rtx code associated with the kind of extension that 127 1.1 mrg the load performs. */ 128 1.1 mrg rtx_code 129 1.1 mrg extend_rtx_code () const 130 1.1 mrg { 131 1.1 mrg return (type_suffixes[m_memory_type].unsigned_p 132 1.1 mrg ? ZERO_EXTEND : SIGN_EXTEND); 133 1.1 mrg } 134 1.1 mrg 135 1.1 mrg /* The type of the memory elements. This is part of the function base 136 1.1 mrg name rather than a true type suffix. */ 137 1.1 mrg type_suffix_index m_memory_type; 138 1.1 mrg }; 139 1.1 mrg 140 1.1 mrg /* A function_base that truncates vector elements and stores them to memory. 141 1.1 mrg The memory element width is a fixed part of the function base name. */ 142 1.1 mrg class truncating_store : public function_base 143 1.1 mrg { 144 1.1 mrg public: 145 1.1 mrg CONSTEXPR truncating_store (scalar_int_mode to_mode) : m_to_mode (to_mode) {} 146 1.1 mrg 147 1.1 mrg unsigned int 148 1.1 mrg call_properties (const function_instance &) const OVERRIDE 149 1.1 mrg { 150 1.1 mrg return CP_WRITE_MEMORY; 151 1.1 mrg } 152 1.1 mrg 153 1.1 mrg tree 154 1.1 mrg memory_scalar_type (const function_instance &fi) const OVERRIDE 155 1.1 mrg { 156 1.1 mrg /* In truncating stores, the signedness of the memory element is defined 157 1.1 mrg to be the same as the signedness of the vector element. The signedness 158 1.1 mrg doesn't make any difference to the behavior of the function. */ 159 1.1 mrg type_class_index tclass = fi.type_suffix (0).tclass; 160 1.1 mrg unsigned int element_bits = GET_MODE_BITSIZE (m_to_mode); 161 1.1 mrg type_suffix_index suffix = find_type_suffix (tclass, element_bits); 162 1.1 mrg return scalar_types[type_suffixes[suffix].vector_type]; 163 1.1 mrg } 164 1.1 mrg 165 1.1 mrg machine_mode 166 1.1 mrg memory_vector_mode (const function_instance &fi) const OVERRIDE 167 1.1 mrg { 168 1.1 mrg poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0)); 169 1.1 mrg return aarch64_sve_data_mode (m_to_mode, nunits).require (); 170 1.1 mrg } 171 1.1 mrg 172 1.1 mrg /* The mode of a single memory element. */ 173 1.1 mrg scalar_int_mode m_to_mode; 174 1.1 mrg }; 175 1.1 mrg 176 1.1 mrg /* An incomplete function_base for functions that have an associated rtx code. 177 1.1 mrg It simply records information about the mapping for derived classes 178 1.1 mrg to use. */ 179 1.1 mrg class rtx_code_function_base : public function_base 180 1.1 mrg { 181 1.1 mrg public: 182 1.1 mrg CONSTEXPR rtx_code_function_base (rtx_code code_for_sint, 183 1.1 mrg rtx_code code_for_uint, 184 1.1 mrg int unspec_for_fp = -1) 185 1.1 mrg : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint), 186 1.1 mrg m_unspec_for_fp (unspec_for_fp) {} 187 1.1 mrg 188 1.1 mrg /* The rtx code to use for signed and unsigned integers respectively. 189 1.1 mrg Can be UNKNOWN for functions that don't have integer forms. */ 190 1.1 mrg rtx_code m_code_for_sint; 191 1.1 mrg rtx_code m_code_for_uint; 192 1.1 mrg 193 1.1 mrg /* The UNSPEC_COND_* to use for floating-point operations. Can be -1 194 1.1 mrg for functions that only operate on integers. */ 195 1.1 mrg int m_unspec_for_fp; 196 1.1 mrg }; 197 1.1 mrg 198 1.1 mrg /* A function_base for functions that have an associated rtx code. 199 1.1 mrg It supports all forms of predication except PRED_implicit. */ 200 1.1 mrg class rtx_code_function : public rtx_code_function_base 201 1.1 mrg { 202 1.1 mrg public: 203 1.1 mrg CONSTEXPR rtx_code_function (rtx_code code_for_sint, rtx_code code_for_uint, 204 1.1 mrg int unspec_for_fp = -1) 205 1.1 mrg : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {} 206 1.1 mrg 207 1.1 mrg rtx 208 1.1 mrg expand (function_expander &e) const OVERRIDE 209 1.1 mrg { 210 1.1 mrg return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, 211 1.1 mrg m_unspec_for_fp); 212 1.1 mrg } 213 1.1 mrg }; 214 1.1 mrg 215 1.1 mrg /* Like rtx_code_function, but for functions that take what is normally 216 1.1 mrg the final argument first. One use of this class is to handle binary 217 1.1 mrg reversed operations; another is to handle MLA-style operations that 218 1.1 mrg are normally expressed in GCC as MAD-style operations. */ 219 1.1 mrg class rtx_code_function_rotated : public rtx_code_function_base 220 1.1 mrg { 221 1.1 mrg public: 222 1.1 mrg CONSTEXPR rtx_code_function_rotated (rtx_code code_for_sint, 223 1.1 mrg rtx_code code_for_uint, 224 1.1 mrg int unspec_for_fp = -1) 225 1.1 mrg : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {} 226 1.1 mrg 227 1.1 mrg rtx 228 1.1 mrg expand (function_expander &e) const OVERRIDE 229 1.1 mrg { 230 1.1 mrg /* Rotate the inputs into their normal order, but continue to make _m 231 1.1 mrg functions merge with what was originally the first vector argument. */ 232 1.1 mrg unsigned int nargs = e.args.length (); 233 1.1 mrg e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs); 234 1.1 mrg return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, 235 1.1 mrg m_unspec_for_fp, nargs - 1); 236 1.1 mrg } 237 1.1 mrg }; 238 1.1 mrg 239 1.1 mrg /* An incomplete function_base for functions that have an associated 240 1.1 mrg unspec code, with separate codes for signed integers, unsigned 241 1.1 mrg integers and floating-point values. The class simply records 242 1.1 mrg information about the mapping for derived classes to use. */ 243 1.1 mrg class unspec_based_function_base : public function_base 244 1.1 mrg { 245 1.1 mrg public: 246 1.1 mrg CONSTEXPR unspec_based_function_base (int unspec_for_sint, 247 1.1 mrg int unspec_for_uint, 248 1.1 mrg int unspec_for_fp) 249 1.1 mrg : m_unspec_for_sint (unspec_for_sint), 250 1.1 mrg m_unspec_for_uint (unspec_for_uint), 251 1.1 mrg m_unspec_for_fp (unspec_for_fp) 252 1.1 mrg {} 253 1.1 mrg 254 1.1 mrg /* Return the unspec code to use for INSTANCE, based on type suffix 0. */ 255 1.1 mrg int 256 1.1 mrg unspec_for (const function_instance &instance) const 257 1.1 mrg { 258 1.1 mrg return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp 259 1.1 mrg : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint 260 1.1 mrg : m_unspec_for_sint); 261 1.1 mrg } 262 1.1 mrg 263 1.1 mrg /* The unspec code associated with signed-integer, unsigned-integer 264 1.1 mrg and floating-point operations respectively. */ 265 1.1 mrg int m_unspec_for_sint; 266 1.1 mrg int m_unspec_for_uint; 267 1.1 mrg int m_unspec_for_fp; 268 1.1 mrg }; 269 1.1 mrg 270 1.1 mrg /* A function_base for functions that have an associated unspec code. 271 1.1 mrg It supports all forms of predication except PRED_implicit. */ 272 1.1 mrg class unspec_based_function : public unspec_based_function_base 273 1.1 mrg { 274 1.1 mrg public: 275 1.1 mrg CONSTEXPR unspec_based_function (int unspec_for_sint, int unspec_for_uint, 276 1.1 mrg int unspec_for_fp) 277 1.1 mrg : unspec_based_function_base (unspec_for_sint, unspec_for_uint, 278 1.1 mrg unspec_for_fp) 279 1.1 mrg {} 280 1.1 mrg 281 1.1 mrg rtx 282 1.1 mrg expand (function_expander &e) const OVERRIDE 283 1.1 mrg { 284 1.1 mrg return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint, 285 1.1 mrg m_unspec_for_fp); 286 1.1 mrg } 287 1.1 mrg }; 288 1.1 mrg 289 1.1 mrg /* Like unspec_based_function, but for functions that take what is normally 290 1.1 mrg the final argument first. One use of this class is to handle binary 291 1.1 mrg reversed operations; another is to handle MLA-style operations that 292 1.1 mrg are normally expressed in GCC as MAD-style operations. */ 293 1.1 mrg class unspec_based_function_rotated : public unspec_based_function_base 294 1.1 mrg { 295 1.1 mrg public: 296 1.1 mrg CONSTEXPR unspec_based_function_rotated (int unspec_for_sint, 297 1.1 mrg int unspec_for_uint, 298 1.1 mrg int unspec_for_fp) 299 1.1 mrg : unspec_based_function_base (unspec_for_sint, unspec_for_uint, 300 1.1 mrg unspec_for_fp) 301 1.1 mrg {} 302 1.1 mrg 303 1.1 mrg rtx 304 1.1 mrg expand (function_expander &e) const OVERRIDE 305 1.1 mrg { 306 1.1 mrg /* Rotate the inputs into their normal order, but continue to make _m 307 1.1 mrg functions merge with what was originally the first vector argument. */ 308 1.1 mrg unsigned int nargs = e.args.length (); 309 1.1 mrg e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs); 310 1.1 mrg return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint, 311 1.1 mrg m_unspec_for_fp, nargs - 1); 312 1.1 mrg } 313 1.1 mrg }; 314 1.1 mrg 315 1.1 mrg /* Like unspec_based_function, but map the function directly to 316 1.1 mrg CODE (UNSPEC, M) instead of using the generic predication-based 317 1.1 mrg expansion. where M is the vector mode associated with type suffix 0. 318 1.1 mrg This is useful if the unspec doesn't describe the full operation or 319 1.1 mrg if the usual predication rules don't apply for some reason. */ 320 1.1 mrg template<insn_code (*CODE) (int, machine_mode)> 321 1.1 mrg class unspec_based_function_exact_insn : public unspec_based_function_base 322 1.1 mrg { 323 1.1 mrg public: 324 1.1 mrg CONSTEXPR unspec_based_function_exact_insn (int unspec_for_sint, 325 1.1 mrg int unspec_for_uint, 326 1.1 mrg int unspec_for_fp) 327 1.1 mrg : unspec_based_function_base (unspec_for_sint, unspec_for_uint, 328 1.1 mrg unspec_for_fp) 329 1.1 mrg {} 330 1.1 mrg 331 1.1 mrg rtx 332 1.1 mrg expand (function_expander &e) const OVERRIDE 333 1.1 mrg { 334 1.1 mrg return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0))); 335 1.1 mrg } 336 1.1 mrg }; 337 1.1 mrg 338 1.1 mrg /* A function that performs an unspec and then adds it to another value. */ 339 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add> 340 1.1 mrg unspec_based_add_function; 341 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add_lane> 342 1.1 mrg unspec_based_add_lane_function; 343 1.1 mrg 344 1.1 mrg /* Generic unspec-based _lane function. */ 345 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_lane> 346 1.1 mrg unspec_based_lane_function; 347 1.1 mrg 348 1.1 mrg /* A functon that uses aarch64_pred* patterns regardless of the 349 1.1 mrg predication type. */ 350 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_pred> 351 1.1 mrg unspec_based_pred_function; 352 1.1 mrg 353 1.1 mrg /* Like unspec_based_add_function and unspec_based_add_lane_function, 354 1.1 mrg but using saturating addition. */ 355 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd> 356 1.1 mrg unspec_based_qadd_function; 357 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd_lane> 358 1.1 mrg unspec_based_qadd_lane_function; 359 1.1 mrg 360 1.1 mrg /* Like unspec_based_sub_function and unspec_based_sub_lane_function, 361 1.1 mrg but using saturating subtraction. */ 362 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub> 363 1.1 mrg unspec_based_qsub_function; 364 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub_lane> 365 1.1 mrg unspec_based_qsub_lane_function; 366 1.1 mrg 367 1.1 mrg /* A function that performs an unspec and then subtracts it from 368 1.1 mrg another value. */ 369 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub> 370 1.1 mrg unspec_based_sub_function; 371 1.1 mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane> 372 1.1 mrg unspec_based_sub_lane_function; 373 1.1 mrg 374 1.1 mrg /* A function that acts like unspec_based_function_exact_insn<INT_CODE> 375 1.1 mrg when operating on integers, but that expands to an (fma ...)-style 376 1.1 mrg aarch64_sve* operation when applied to floats. */ 377 1.1 mrg template<insn_code (*INT_CODE) (int, machine_mode)> 378 1.1 mrg class unspec_based_fused_function : public unspec_based_function_base 379 1.1 mrg { 380 1.1 mrg public: 381 1.1 mrg CONSTEXPR unspec_based_fused_function (int unspec_for_sint, 382 1.1 mrg int unspec_for_uint, 383 1.1 mrg int unspec_for_fp) 384 1.1 mrg : unspec_based_function_base (unspec_for_sint, unspec_for_uint, 385 1.1 mrg unspec_for_fp) 386 1.1 mrg {} 387 1.1 mrg 388 1.1 mrg rtx 389 1.1 mrg expand (function_expander &e) const OVERRIDE 390 1.1 mrg { 391 1.1 mrg int unspec = unspec_for (e); 392 1.1 mrg insn_code icode; 393 1.1 mrg if (e.type_suffix (0).float_p) 394 1.1 mrg { 395 1.1 mrg /* Put the operands in the normal (fma ...) order, with the accumulator 396 1.1 mrg last. This fits naturally since that's also the unprinted operand 397 1.1 mrg in the asm output. */ 398 1.1 mrg e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3); 399 1.1 mrg icode = code_for_aarch64_sve (unspec, e.vector_mode (0)); 400 1.1 mrg } 401 1.1 mrg else 402 1.1 mrg icode = INT_CODE (unspec, e.vector_mode (0)); 403 1.1 mrg return e.use_exact_insn (icode); 404 1.1 mrg } 405 1.1 mrg }; 406 1.1 mrg typedef unspec_based_fused_function<code_for_aarch64_sve_add> 407 1.1 mrg unspec_based_mla_function; 408 1.1 mrg typedef unspec_based_fused_function<code_for_aarch64_sve_sub> 409 1.1 mrg unspec_based_mls_function; 410 1.1 mrg 411 1.1 mrg /* Like unspec_based_fused_function, but for _lane functions. */ 412 1.1 mrg template<insn_code (*INT_CODE) (int, machine_mode)> 413 1.1 mrg class unspec_based_fused_lane_function : public unspec_based_function_base 414 1.1 mrg { 415 1.1 mrg public: 416 1.1 mrg CONSTEXPR unspec_based_fused_lane_function (int unspec_for_sint, 417 1.1 mrg int unspec_for_uint, 418 1.1 mrg int unspec_for_fp) 419 1.1 mrg : unspec_based_function_base (unspec_for_sint, unspec_for_uint, 420 1.1 mrg unspec_for_fp) 421 1.1 mrg {} 422 1.1 mrg 423 1.1 mrg rtx 424 1.1 mrg expand (function_expander &e) const OVERRIDE 425 1.1 mrg { 426 1.1 mrg int unspec = unspec_for (e); 427 1.1 mrg insn_code icode; 428 1.1 mrg if (e.type_suffix (0).float_p) 429 1.1 mrg { 430 1.1 mrg /* Put the operands in the normal (fma ...) order, with the accumulator 431 1.1 mrg last. This fits naturally since that's also the unprinted operand 432 1.1 mrg in the asm output. */ 433 1.1 mrg e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4); 434 1.1 mrg icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); 435 1.1 mrg } 436 1.1 mrg else 437 1.1 mrg icode = INT_CODE (unspec, e.vector_mode (0)); 438 1.1 mrg return e.use_exact_insn (icode); 439 1.1 mrg } 440 1.1 mrg }; 441 1.1 mrg typedef unspec_based_fused_lane_function<code_for_aarch64_sve_add_lane> 442 1.1 mrg unspec_based_mla_lane_function; 443 1.1 mrg typedef unspec_based_fused_lane_function<code_for_aarch64_sve_sub_lane> 444 1.1 mrg unspec_based_mls_lane_function; 445 1.1 mrg 446 1.1 mrg /* A function_base that uses CODE_FOR_MODE (M) to get the associated 447 1.1 mrg instruction code, where M is the vector mode associated with type 448 1.1 mrg suffix N. */ 449 1.1 mrg template<insn_code (*CODE_FOR_MODE) (machine_mode), unsigned int N> 450 1.1 mrg class code_for_mode_function : public function_base 451 1.1 mrg { 452 1.1 mrg public: 453 1.1 mrg rtx 454 1.1 mrg expand (function_expander &e) const OVERRIDE 455 1.1 mrg { 456 1.1 mrg return e.use_exact_insn (CODE_FOR_MODE (e.vector_mode (N))); 457 1.1 mrg } 458 1.1 mrg }; 459 1.1 mrg 460 1.1 mrg /* A function that uses code_for_<PATTERN> (M), where M is the vector 461 1.1 mrg mode associated with the first type suffix. */ 462 1.1 mrg #define CODE_FOR_MODE0(PATTERN) code_for_mode_function<code_for_##PATTERN, 0> 463 1.1 mrg 464 1.1 mrg /* Likewise for the second type suffix. */ 465 1.1 mrg #define CODE_FOR_MODE1(PATTERN) code_for_mode_function<code_for_##PATTERN, 1> 466 1.1 mrg 467 1.1 mrg /* Like CODE_FOR_MODE0, but the function doesn't raise exceptions when 468 1.1 mrg operating on floating-point data. */ 469 1.1 mrg #define QUIET_CODE_FOR_MODE0(PATTERN) \ 470 1.1 mrg quiet< code_for_mode_function<code_for_##PATTERN, 0> > 471 1.1 mrg 472 1.1 mrg /* A function_base for functions that always expand to a fixed insn pattern, 473 1.1 mrg regardless of what the suffixes are. */ 474 1.1 mrg class fixed_insn_function : public function_base 475 1.1 mrg { 476 1.1 mrg public: 477 1.1 mrg CONSTEXPR fixed_insn_function (insn_code code) : m_code (code) {} 478 1.1 mrg 479 1.1 mrg rtx 480 1.1 mrg expand (function_expander &e) const OVERRIDE 481 1.1 mrg { 482 1.1 mrg return e.use_exact_insn (m_code); 483 1.1 mrg } 484 1.1 mrg 485 1.1 mrg /* The instruction to use. */ 486 1.1 mrg insn_code m_code; 487 1.1 mrg }; 488 1.1 mrg 489 1.1 mrg /* A function_base for functions that permute their arguments. */ 490 1.1 mrg class permute : public quiet<function_base> 491 1.1 mrg { 492 1.1 mrg public: 493 1.1 mrg /* Fold a unary or binary permute with the permute vector given by 494 1.1 mrg BUILDER. */ 495 1.1 mrg gimple * 496 1.1 mrg fold_permute (const gimple_folder &f, const vec_perm_builder &builder) const 497 1.1 mrg { 498 1.1 mrg /* Punt for now on _b16 and wider; we'd need more complex evpc logic 499 1.1 mrg to rerecognize the result. */ 500 1.1 mrg if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) 501 1.1 mrg return NULL; 502 1.1 mrg 503 1.1 mrg unsigned int nargs = gimple_call_num_args (f.call); 504 1.1 mrg poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); 505 1.1 mrg vec_perm_indices indices (builder, nargs, nelts); 506 1.1 mrg tree perm_type = build_vector_type (ssizetype, nelts); 507 1.1 mrg return gimple_build_assign (f.lhs, VEC_PERM_EXPR, 508 1.1 mrg gimple_call_arg (f.call, 0), 509 1.1 mrg gimple_call_arg (f.call, nargs - 1), 510 1.1 mrg vec_perm_indices_to_tree (perm_type, indices)); 511 1.1 mrg } 512 1.1 mrg }; 513 1.1 mrg 514 1.1 mrg /* A function_base for functions that permute two vectors using a fixed 515 1.1 mrg choice of indices. */ 516 1.1 mrg class binary_permute : public permute 517 1.1 mrg { 518 1.1 mrg public: 519 1.1 mrg CONSTEXPR binary_permute (int unspec) : m_unspec (unspec) {} 520 1.1 mrg 521 1.1 mrg rtx 522 1.1 mrg expand (function_expander &e) const OVERRIDE 523 1.1 mrg { 524 1.1 mrg insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0)); 525 1.1 mrg return e.use_exact_insn (icode); 526 1.1 mrg } 527 1.1 mrg 528 1.1 mrg /* The unspec code associated with the operation. */ 529 1.1 mrg int m_unspec; 530 1.1 mrg }; 531 1.1 mrg 532 1.1 mrg /* A function_base for functions that reduce a vector to a scalar. */ 533 1.1 mrg class reduction : public function_base 534 1.1 mrg { 535 1.1 mrg public: 536 1.1 mrg CONSTEXPR reduction (int unspec) 537 1.1 mrg : m_unspec_for_sint (unspec), 538 1.1 mrg m_unspec_for_uint (unspec), 539 1.1 mrg m_unspec_for_fp (unspec) 540 1.1 mrg {} 541 1.1 mrg 542 1.1 mrg CONSTEXPR reduction (int unspec_for_sint, int unspec_for_uint, 543 1.1 mrg int unspec_for_fp) 544 1.1 mrg : m_unspec_for_sint (unspec_for_sint), 545 1.1 mrg m_unspec_for_uint (unspec_for_uint), 546 1.1 mrg m_unspec_for_fp (unspec_for_fp) 547 1.1 mrg {} 548 1.1 mrg 549 1.1 mrg rtx 550 1.1 mrg expand (function_expander &e) const OVERRIDE 551 1.1 mrg { 552 1.1 mrg machine_mode mode = e.vector_mode (0); 553 1.1 mrg int unspec = (!e.type_suffix (0).integer_p ? m_unspec_for_fp 554 1.1 mrg : e.type_suffix (0).unsigned_p ? m_unspec_for_uint 555 1.1 mrg : m_unspec_for_sint); 556 1.1 mrg /* There's no distinction between SADDV and UADDV for 64-bit elements; 557 1.1 mrg the signed versions only exist for narrower elements. */ 558 1.1 mrg if (GET_MODE_UNIT_BITSIZE (mode) == 64 && unspec == UNSPEC_SADDV) 559 1.1 mrg unspec = UNSPEC_UADDV; 560 1.1 mrg return e.use_exact_insn (code_for_aarch64_pred_reduc (unspec, mode)); 561 1.1 mrg } 562 1.1 mrg 563 1.1 mrg /* The unspec code associated with signed-integer, unsigned-integer 564 1.1 mrg and floating-point operations respectively. */ 565 1.1 mrg int m_unspec_for_sint; 566 1.1 mrg int m_unspec_for_uint; 567 1.1 mrg int m_unspec_for_fp; 568 1.1 mrg }; 569 1.1 mrg 570 1.1 mrg /* A function_base for functions that shift narrower-than-64-bit values 571 1.1 mrg by 64-bit amounts. */ 572 1.1 mrg class shift_wide : public function_base 573 1.1 mrg { 574 1.1 mrg public: 575 1.1 mrg CONSTEXPR shift_wide (rtx_code code, int wide_unspec) 576 1.1 mrg : m_code (code), m_wide_unspec (wide_unspec) {} 577 1.1 mrg 578 1.1 mrg rtx 579 1.1 mrg expand (function_expander &e) const OVERRIDE 580 1.1 mrg { 581 1.1 mrg machine_mode mode = e.vector_mode (0); 582 1.1 mrg machine_mode elem_mode = GET_MODE_INNER (mode); 583 1.1 mrg 584 1.1 mrg /* If the argument is a constant that the normal shifts can handle 585 1.1 mrg directly, use them instead. */ 586 1.1 mrg rtx shift = unwrap_const_vec_duplicate (e.args.last ()); 587 1.1 mrg if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT)) 588 1.1 mrg { 589 1.1 mrg e.args.last () = shift; 590 1.1 mrg return e.map_to_rtx_codes (m_code, m_code, -1); 591 1.1 mrg } 592 1.1 mrg 593 1.1 mrg if (e.pred == PRED_x) 594 1.1 mrg return e.use_unpred_insn (code_for_aarch64_sve (m_wide_unspec, mode)); 595 1.1 mrg 596 1.1 mrg return e.use_cond_insn (code_for_cond (m_wide_unspec, mode)); 597 1.1 mrg } 598 1.1 mrg 599 1.1 mrg /* The rtx code associated with a "normal" shift. */ 600 1.1 mrg rtx_code m_code; 601 1.1 mrg 602 1.1 mrg /* The unspec code associated with the wide shift. */ 603 1.1 mrg int m_wide_unspec; 604 1.1 mrg }; 605 1.1 mrg 606 1.1 mrg /* A function_base for unary functions that count bits. */ 607 1.1 mrg class unary_count : public quiet<function_base> 608 1.1 mrg { 609 1.1 mrg public: 610 1.1 mrg CONSTEXPR unary_count (rtx_code code) : m_code (code) {} 611 1.1 mrg 612 1.1 mrg rtx 613 1.1 mrg expand (function_expander &e) const OVERRIDE 614 1.1 mrg { 615 1.1 mrg /* The md patterns treat the operand as an integer. */ 616 1.1 mrg machine_mode mode = aarch64_sve_int_mode (e.vector_mode (0)); 617 1.1 mrg e.args.last () = gen_lowpart (mode, e.args.last ()); 618 1.1 mrg 619 1.1 mrg if (e.pred == PRED_x) 620 1.1 mrg return e.use_pred_x_insn (code_for_aarch64_pred (m_code, mode)); 621 1.1 mrg 622 1.1 mrg return e.use_cond_insn (code_for_cond (m_code, mode)); 623 1.1 mrg } 624 1.1 mrg 625 1.1 mrg /* The rtx code associated with the operation. */ 626 1.1 mrg rtx_code m_code; 627 1.1 mrg }; 628 1.1 mrg 629 1.1 mrg /* A function_base for svwhile* functions. */ 630 1.1 mrg class while_comparison : public function_base 631 1.1 mrg { 632 1.1 mrg public: 633 1.1 mrg CONSTEXPR while_comparison (int unspec_for_sint, int unspec_for_uint) 634 1.1 mrg : m_unspec_for_sint (unspec_for_sint), 635 1.1 mrg m_unspec_for_uint (unspec_for_uint) 636 1.1 mrg {} 637 1.1 mrg 638 1.1 mrg rtx 639 1.1 mrg expand (function_expander &e) const OVERRIDE 640 1.1 mrg { 641 1.1 mrg /* Suffix 0 determines the predicate mode, suffix 1 determines the 642 1.1 mrg scalar mode and signedness. */ 643 1.1 mrg int unspec = (e.type_suffix (1).unsigned_p 644 1.1 mrg ? m_unspec_for_uint 645 1.1 mrg : m_unspec_for_sint); 646 1.1 mrg machine_mode pred_mode = e.vector_mode (0); 647 1.1 mrg scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1)); 648 1.1 mrg return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode)); 649 1.1 mrg } 650 1.1 mrg 651 1.1 mrg /* The unspec codes associated with signed and unsigned operations 652 1.1 mrg respectively. */ 653 1.1 mrg int m_unspec_for_sint; 654 1.1 mrg int m_unspec_for_uint; 655 1.1 mrg }; 656 1.1 mrg 657 1.1 mrg } 658 1.1 mrg 659 1.1 mrg /* Declare the global function base NAME, creating it from an instance 660 1.1 mrg of class CLASS with constructor arguments ARGS. */ 661 1.1 mrg #define FUNCTION(NAME, CLASS, ARGS) \ 662 1.1 mrg namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \ 663 1.1 mrg namespace functions { const function_base *const NAME = &NAME##_obj; } 664 1.1 mrg 665 1.1 mrg #endif 666