Home | History | Annotate | Line # | Download | only in aarch64
      1      1.1  mrg /* ACLE support for AArch64 SVE (function_base classes)
      2  1.1.1.2  mrg    Copyright (C) 2018-2022 Free Software Foundation, Inc.
      3      1.1  mrg 
      4      1.1  mrg    This file is part of GCC.
      5      1.1  mrg 
      6      1.1  mrg    GCC is free software; you can redistribute it and/or modify it
      7      1.1  mrg    under the terms of the GNU General Public License as published by
      8      1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
      9      1.1  mrg    any later version.
     10      1.1  mrg 
     11      1.1  mrg    GCC is distributed in the hope that it will be useful, but
     12      1.1  mrg    WITHOUT ANY WARRANTY; without even the implied warranty of
     13      1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14      1.1  mrg    General Public License for more details.
     15      1.1  mrg 
     16      1.1  mrg    You should have received a copy of the GNU General Public License
     17      1.1  mrg    along with GCC; see the file COPYING3.  If not see
     18      1.1  mrg    <http://www.gnu.org/licenses/>.  */
     19      1.1  mrg 
     20      1.1  mrg #ifndef GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H
     21      1.1  mrg #define GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H
     22      1.1  mrg 
     23      1.1  mrg namespace aarch64_sve {
     24      1.1  mrg 
     25      1.1  mrg /* Wrap T, which is derived from function_base, and indicate that the
     26      1.1  mrg    function never has side effects.  It is only necessary to use this
     27      1.1  mrg    wrapper on functions that might have floating-point suffixes, since
     28      1.1  mrg    otherwise we assume by default that the function has no side effects.  */
     29      1.1  mrg template<typename T>
     30      1.1  mrg class quiet : public T
     31      1.1  mrg {
     32      1.1  mrg public:
     33      1.1  mrg   CONSTEXPR quiet () : T () {}
     34      1.1  mrg 
     35      1.1  mrg   /* Unfortunately we can't use parameter packs yet.  */
     36      1.1  mrg   template<typename T1>
     37      1.1  mrg   CONSTEXPR quiet (const T1 &t1) : T (t1) {}
     38      1.1  mrg 
     39      1.1  mrg   template<typename T1, typename T2>
     40      1.1  mrg   CONSTEXPR quiet (const T1 &t1, const T2 &t2) : T (t1, t2) {}
     41      1.1  mrg 
     42      1.1  mrg   template<typename T1, typename T2, typename T3>
     43      1.1  mrg   CONSTEXPR quiet (const T1 &t1, const T2 &t2, const T3 &t3)
     44      1.1  mrg     : T (t1, t2, t3) {}
     45      1.1  mrg 
     46      1.1  mrg   unsigned int
     47      1.1  mrg   call_properties (const function_instance &) const OVERRIDE
     48      1.1  mrg   {
     49      1.1  mrg     return 0;
     50      1.1  mrg   }
     51      1.1  mrg };
     52      1.1  mrg 
     53      1.1  mrg /* A function_base that sometimes or always operates on tuples of
     54      1.1  mrg    vectors.  */
     55      1.1  mrg class multi_vector_function : public function_base
     56      1.1  mrg {
     57      1.1  mrg public:
     58      1.1  mrg   CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple)
     59      1.1  mrg     : m_vectors_per_tuple (vectors_per_tuple) {}
     60      1.1  mrg 
     61      1.1  mrg   unsigned int
     62      1.1  mrg   vectors_per_tuple () const OVERRIDE
     63      1.1  mrg   {
     64      1.1  mrg     return m_vectors_per_tuple;
     65      1.1  mrg   }
     66      1.1  mrg 
     67      1.1  mrg   /* The number of vectors in a tuple, or 1 if the function only operates
     68      1.1  mrg      on single vectors.  */
     69      1.1  mrg   unsigned int m_vectors_per_tuple;
     70      1.1  mrg };
     71      1.1  mrg 
     72      1.1  mrg /* A function_base that loads or stores contiguous memory elements
     73      1.1  mrg    without extending or truncating them.  */
     74      1.1  mrg class full_width_access : public multi_vector_function
     75      1.1  mrg {
     76      1.1  mrg public:
     77      1.1  mrg   CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1)
     78      1.1  mrg     : multi_vector_function (vectors_per_tuple) {}
     79      1.1  mrg 
     80      1.1  mrg   tree
     81      1.1  mrg   memory_scalar_type (const function_instance &fi) const OVERRIDE
     82      1.1  mrg   {
     83      1.1  mrg     return fi.scalar_type (0);
     84      1.1  mrg   }
     85      1.1  mrg 
     86      1.1  mrg   machine_mode
     87      1.1  mrg   memory_vector_mode (const function_instance &fi) const OVERRIDE
     88      1.1  mrg   {
     89      1.1  mrg     machine_mode mode = fi.vector_mode (0);
     90      1.1  mrg     if (m_vectors_per_tuple != 1)
     91      1.1  mrg       mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
     92      1.1  mrg     return mode;
     93      1.1  mrg   }
     94      1.1  mrg };
     95      1.1  mrg 
     96      1.1  mrg /* A function_base that loads elements from memory and extends them
     97      1.1  mrg    to a wider element.  The memory element type is a fixed part of
     98      1.1  mrg    the function base name.  */
     99      1.1  mrg class extending_load : public function_base
    100      1.1  mrg {
    101      1.1  mrg public:
    102      1.1  mrg   CONSTEXPR extending_load (type_suffix_index memory_type)
    103      1.1  mrg     : m_memory_type (memory_type) {}
    104      1.1  mrg 
    105      1.1  mrg   unsigned int
    106      1.1  mrg   call_properties (const function_instance &) const OVERRIDE
    107      1.1  mrg   {
    108      1.1  mrg     return CP_READ_MEMORY;
    109      1.1  mrg   }
    110      1.1  mrg 
    111      1.1  mrg   tree
    112      1.1  mrg   memory_scalar_type (const function_instance &) const OVERRIDE
    113      1.1  mrg   {
    114      1.1  mrg     return scalar_types[type_suffixes[m_memory_type].vector_type];
    115      1.1  mrg   }
    116      1.1  mrg 
    117      1.1  mrg   machine_mode
    118      1.1  mrg   memory_vector_mode (const function_instance &fi) const OVERRIDE
    119      1.1  mrg   {
    120      1.1  mrg     machine_mode mem_mode = type_suffixes[m_memory_type].vector_mode;
    121      1.1  mrg     machine_mode reg_mode = fi.vector_mode (0);
    122      1.1  mrg     return aarch64_sve_data_mode (GET_MODE_INNER (mem_mode),
    123      1.1  mrg 				  GET_MODE_NUNITS (reg_mode)).require ();
    124      1.1  mrg   }
    125      1.1  mrg 
    126      1.1  mrg   /* Return the rtx code associated with the kind of extension that
    127      1.1  mrg      the load performs.  */
    128      1.1  mrg   rtx_code
    129      1.1  mrg   extend_rtx_code () const
    130      1.1  mrg   {
    131      1.1  mrg     return (type_suffixes[m_memory_type].unsigned_p
    132      1.1  mrg 	    ? ZERO_EXTEND : SIGN_EXTEND);
    133      1.1  mrg   }
    134      1.1  mrg 
    135      1.1  mrg   /* The type of the memory elements.  This is part of the function base
    136      1.1  mrg      name rather than a true type suffix.  */
    137      1.1  mrg   type_suffix_index m_memory_type;
    138      1.1  mrg };
    139      1.1  mrg 
    140      1.1  mrg /* A function_base that truncates vector elements and stores them to memory.
    141      1.1  mrg    The memory element width is a fixed part of the function base name.  */
    142      1.1  mrg class truncating_store : public function_base
    143      1.1  mrg {
    144      1.1  mrg public:
    145      1.1  mrg   CONSTEXPR truncating_store (scalar_int_mode to_mode) : m_to_mode (to_mode) {}
    146      1.1  mrg 
    147      1.1  mrg   unsigned int
    148      1.1  mrg   call_properties (const function_instance &) const OVERRIDE
    149      1.1  mrg   {
    150      1.1  mrg     return CP_WRITE_MEMORY;
    151      1.1  mrg   }
    152      1.1  mrg 
    153      1.1  mrg   tree
    154      1.1  mrg   memory_scalar_type (const function_instance &fi) const OVERRIDE
    155      1.1  mrg   {
    156      1.1  mrg     /* In truncating stores, the signedness of the memory element is defined
    157      1.1  mrg        to be the same as the signedness of the vector element.  The signedness
    158      1.1  mrg        doesn't make any difference to the behavior of the function.  */
    159      1.1  mrg     type_class_index tclass = fi.type_suffix (0).tclass;
    160      1.1  mrg     unsigned int element_bits = GET_MODE_BITSIZE (m_to_mode);
    161      1.1  mrg     type_suffix_index suffix = find_type_suffix (tclass, element_bits);
    162      1.1  mrg     return scalar_types[type_suffixes[suffix].vector_type];
    163      1.1  mrg   }
    164      1.1  mrg 
    165      1.1  mrg   machine_mode
    166      1.1  mrg   memory_vector_mode (const function_instance &fi) const OVERRIDE
    167      1.1  mrg   {
    168      1.1  mrg     poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0));
    169      1.1  mrg     return aarch64_sve_data_mode (m_to_mode, nunits).require ();
    170      1.1  mrg   }
    171      1.1  mrg 
    172      1.1  mrg   /* The mode of a single memory element.  */
    173      1.1  mrg   scalar_int_mode m_to_mode;
    174      1.1  mrg };
    175      1.1  mrg 
    176      1.1  mrg /* An incomplete function_base for functions that have an associated rtx code.
    177      1.1  mrg    It simply records information about the mapping for derived classes
    178      1.1  mrg    to use.  */
    179      1.1  mrg class rtx_code_function_base : public function_base
    180      1.1  mrg {
    181      1.1  mrg public:
    182      1.1  mrg   CONSTEXPR rtx_code_function_base (rtx_code code_for_sint,
    183      1.1  mrg 				    rtx_code code_for_uint,
    184      1.1  mrg 				    int unspec_for_fp = -1)
    185      1.1  mrg     : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint),
    186      1.1  mrg       m_unspec_for_fp (unspec_for_fp) {}
    187      1.1  mrg 
    188      1.1  mrg   /* The rtx code to use for signed and unsigned integers respectively.
    189      1.1  mrg      Can be UNKNOWN for functions that don't have integer forms.  */
    190      1.1  mrg   rtx_code m_code_for_sint;
    191      1.1  mrg   rtx_code m_code_for_uint;
    192      1.1  mrg 
    193      1.1  mrg   /* The UNSPEC_COND_* to use for floating-point operations.  Can be -1
    194      1.1  mrg      for functions that only operate on integers.  */
    195      1.1  mrg   int m_unspec_for_fp;
    196      1.1  mrg };
    197      1.1  mrg 
    198      1.1  mrg /* A function_base for functions that have an associated rtx code.
    199      1.1  mrg    It supports all forms of predication except PRED_implicit.  */
    200      1.1  mrg class rtx_code_function : public rtx_code_function_base
    201      1.1  mrg {
    202      1.1  mrg public:
    203      1.1  mrg   CONSTEXPR rtx_code_function (rtx_code code_for_sint, rtx_code code_for_uint,
    204      1.1  mrg 			       int unspec_for_fp = -1)
    205      1.1  mrg     : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {}
    206      1.1  mrg 
    207      1.1  mrg   rtx
    208      1.1  mrg   expand (function_expander &e) const OVERRIDE
    209      1.1  mrg   {
    210      1.1  mrg     return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
    211      1.1  mrg 			       m_unspec_for_fp);
    212      1.1  mrg   }
    213      1.1  mrg };
    214      1.1  mrg 
    215      1.1  mrg /* Like rtx_code_function, but for functions that take what is normally
    216      1.1  mrg    the final argument first.  One use of this class is to handle binary
    217      1.1  mrg    reversed operations; another is to handle MLA-style operations that
    218      1.1  mrg    are normally expressed in GCC as MAD-style operations.  */
    219      1.1  mrg class rtx_code_function_rotated : public rtx_code_function_base
    220      1.1  mrg {
    221      1.1  mrg public:
    222      1.1  mrg   CONSTEXPR rtx_code_function_rotated (rtx_code code_for_sint,
    223      1.1  mrg 				       rtx_code code_for_uint,
    224      1.1  mrg 				       int unspec_for_fp = -1)
    225      1.1  mrg     : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {}
    226      1.1  mrg 
    227      1.1  mrg   rtx
    228      1.1  mrg   expand (function_expander &e) const OVERRIDE
    229      1.1  mrg   {
    230      1.1  mrg     /* Rotate the inputs into their normal order, but continue to make _m
    231      1.1  mrg        functions merge with what was originally the first vector argument.  */
    232      1.1  mrg     unsigned int nargs = e.args.length ();
    233      1.1  mrg     e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs);
    234      1.1  mrg     return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
    235      1.1  mrg 			       m_unspec_for_fp, nargs - 1);
    236      1.1  mrg   }
    237      1.1  mrg };
    238      1.1  mrg 
    239      1.1  mrg /* An incomplete function_base for functions that have an associated
    240      1.1  mrg    unspec code, with separate codes for signed integers, unsigned
    241      1.1  mrg    integers and floating-point values.  The class simply records
    242      1.1  mrg    information about the mapping for derived classes to use.  */
    243      1.1  mrg class unspec_based_function_base : public function_base
    244      1.1  mrg {
    245      1.1  mrg public:
    246      1.1  mrg   CONSTEXPR unspec_based_function_base (int unspec_for_sint,
    247      1.1  mrg 					int unspec_for_uint,
    248      1.1  mrg 					int unspec_for_fp)
    249      1.1  mrg     : m_unspec_for_sint (unspec_for_sint),
    250      1.1  mrg       m_unspec_for_uint (unspec_for_uint),
    251      1.1  mrg       m_unspec_for_fp (unspec_for_fp)
    252      1.1  mrg   {}
    253      1.1  mrg 
    254      1.1  mrg   /* Return the unspec code to use for INSTANCE, based on type suffix 0.  */
    255      1.1  mrg   int
    256      1.1  mrg   unspec_for (const function_instance &instance) const
    257      1.1  mrg   {
    258      1.1  mrg     return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp
    259      1.1  mrg 	    : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint
    260      1.1  mrg 	    : m_unspec_for_sint);
    261      1.1  mrg   }
    262      1.1  mrg 
    263      1.1  mrg   /* The unspec code associated with signed-integer, unsigned-integer
    264      1.1  mrg      and floating-point operations respectively.  */
    265      1.1  mrg   int m_unspec_for_sint;
    266      1.1  mrg   int m_unspec_for_uint;
    267      1.1  mrg   int m_unspec_for_fp;
    268      1.1  mrg };
    269      1.1  mrg 
    270      1.1  mrg /* A function_base for functions that have an associated unspec code.
    271      1.1  mrg    It supports all forms of predication except PRED_implicit.  */
    272      1.1  mrg class unspec_based_function : public unspec_based_function_base
    273      1.1  mrg {
    274      1.1  mrg public:
    275      1.1  mrg   CONSTEXPR unspec_based_function (int unspec_for_sint, int unspec_for_uint,
    276      1.1  mrg 				   int unspec_for_fp)
    277      1.1  mrg     : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
    278      1.1  mrg 				  unspec_for_fp)
    279      1.1  mrg   {}
    280      1.1  mrg 
    281      1.1  mrg   rtx
    282      1.1  mrg   expand (function_expander &e) const OVERRIDE
    283      1.1  mrg   {
    284      1.1  mrg     return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
    285      1.1  mrg 			     m_unspec_for_fp);
    286      1.1  mrg   }
    287      1.1  mrg };
    288      1.1  mrg 
    289      1.1  mrg /* Like unspec_based_function, but for functions that take what is normally
    290      1.1  mrg    the final argument first.  One use of this class is to handle binary
    291      1.1  mrg    reversed operations; another is to handle MLA-style operations that
    292      1.1  mrg    are normally expressed in GCC as MAD-style operations.  */
    293      1.1  mrg class unspec_based_function_rotated : public unspec_based_function_base
    294      1.1  mrg {
    295      1.1  mrg public:
    296      1.1  mrg   CONSTEXPR unspec_based_function_rotated (int unspec_for_sint,
    297      1.1  mrg 					   int unspec_for_uint,
    298      1.1  mrg 					   int unspec_for_fp)
    299      1.1  mrg     : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
    300      1.1  mrg 				  unspec_for_fp)
    301      1.1  mrg   {}
    302      1.1  mrg 
    303      1.1  mrg   rtx
    304      1.1  mrg   expand (function_expander &e) const OVERRIDE
    305      1.1  mrg   {
    306      1.1  mrg     /* Rotate the inputs into their normal order, but continue to make _m
    307      1.1  mrg        functions merge with what was originally the first vector argument.  */
    308      1.1  mrg     unsigned int nargs = e.args.length ();
    309      1.1  mrg     e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs);
    310      1.1  mrg     return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
    311      1.1  mrg 			     m_unspec_for_fp, nargs - 1);
    312      1.1  mrg   }
    313      1.1  mrg };
    314      1.1  mrg 
    315      1.1  mrg /* Like unspec_based_function, but map the function directly to
    316      1.1  mrg    CODE (UNSPEC, M) instead of using the generic predication-based
    317      1.1  mrg    expansion. where M is the vector mode associated with type suffix 0.
    318      1.1  mrg    This is useful if the unspec doesn't describe the full operation or
    319      1.1  mrg    if the usual predication rules don't apply for some reason.  */
    320      1.1  mrg template<insn_code (*CODE) (int, machine_mode)>
    321      1.1  mrg class unspec_based_function_exact_insn : public unspec_based_function_base
    322      1.1  mrg {
    323      1.1  mrg public:
    324      1.1  mrg   CONSTEXPR unspec_based_function_exact_insn (int unspec_for_sint,
    325      1.1  mrg 					      int unspec_for_uint,
    326      1.1  mrg 					      int unspec_for_fp)
    327      1.1  mrg     : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
    328      1.1  mrg 				  unspec_for_fp)
    329      1.1  mrg   {}
    330      1.1  mrg 
    331      1.1  mrg   rtx
    332      1.1  mrg   expand (function_expander &e) const OVERRIDE
    333      1.1  mrg   {
    334      1.1  mrg     return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0)));
    335      1.1  mrg   }
    336      1.1  mrg };
    337      1.1  mrg 
    338      1.1  mrg /* A function that performs an unspec and then adds it to another value.  */
    339      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add>
    340      1.1  mrg   unspec_based_add_function;
    341      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add_lane>
    342      1.1  mrg   unspec_based_add_lane_function;
    343      1.1  mrg 
    344      1.1  mrg /* Generic unspec-based _lane function.  */
    345      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_lane>
    346      1.1  mrg   unspec_based_lane_function;
    347      1.1  mrg 
    348      1.1  mrg /* A functon that uses aarch64_pred* patterns regardless of the
    349      1.1  mrg    predication type.  */
    350      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_pred>
    351      1.1  mrg   unspec_based_pred_function;
    352      1.1  mrg 
    353      1.1  mrg /* Like unspec_based_add_function and unspec_based_add_lane_function,
    354      1.1  mrg    but using saturating addition.  */
    355      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd>
    356      1.1  mrg   unspec_based_qadd_function;
    357      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd_lane>
    358      1.1  mrg   unspec_based_qadd_lane_function;
    359      1.1  mrg 
    360      1.1  mrg /* Like unspec_based_sub_function and unspec_based_sub_lane_function,
    361      1.1  mrg    but using saturating subtraction.  */
    362      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub>
    363      1.1  mrg   unspec_based_qsub_function;
    364      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub_lane>
    365      1.1  mrg   unspec_based_qsub_lane_function;
    366      1.1  mrg 
    367      1.1  mrg /* A function that performs an unspec and then subtracts it from
    368      1.1  mrg    another value.  */
    369      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub>
    370      1.1  mrg   unspec_based_sub_function;
    371      1.1  mrg typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane>
    372      1.1  mrg   unspec_based_sub_lane_function;
    373      1.1  mrg 
    374      1.1  mrg /* A function that acts like unspec_based_function_exact_insn<INT_CODE>
    375      1.1  mrg    when operating on integers, but that expands to an (fma ...)-style
    376      1.1  mrg    aarch64_sve* operation when applied to floats.  */
    377      1.1  mrg template<insn_code (*INT_CODE) (int, machine_mode)>
    378      1.1  mrg class unspec_based_fused_function : public unspec_based_function_base
    379      1.1  mrg {
    380      1.1  mrg public:
    381      1.1  mrg   CONSTEXPR unspec_based_fused_function (int unspec_for_sint,
    382      1.1  mrg 					 int unspec_for_uint,
    383      1.1  mrg 					 int unspec_for_fp)
    384      1.1  mrg     : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
    385      1.1  mrg 				  unspec_for_fp)
    386      1.1  mrg   {}
    387      1.1  mrg 
    388      1.1  mrg   rtx
    389      1.1  mrg   expand (function_expander &e) const OVERRIDE
    390      1.1  mrg   {
    391      1.1  mrg     int unspec = unspec_for (e);
    392      1.1  mrg     insn_code icode;
    393      1.1  mrg     if (e.type_suffix (0).float_p)
    394      1.1  mrg       {
    395      1.1  mrg 	/* Put the operands in the normal (fma ...) order, with the accumulator
    396      1.1  mrg 	   last.  This fits naturally since that's also the unprinted operand
    397      1.1  mrg 	   in the asm output.  */
    398      1.1  mrg 	e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3);
    399      1.1  mrg 	icode = code_for_aarch64_sve (unspec, e.vector_mode (0));
    400      1.1  mrg       }
    401      1.1  mrg     else
    402      1.1  mrg       icode = INT_CODE (unspec, e.vector_mode (0));
    403      1.1  mrg     return e.use_exact_insn (icode);
    404      1.1  mrg   }
    405      1.1  mrg };
    406      1.1  mrg typedef unspec_based_fused_function<code_for_aarch64_sve_add>
    407      1.1  mrg   unspec_based_mla_function;
    408      1.1  mrg typedef unspec_based_fused_function<code_for_aarch64_sve_sub>
    409      1.1  mrg   unspec_based_mls_function;
    410      1.1  mrg 
    411      1.1  mrg /* Like unspec_based_fused_function, but for _lane functions.  */
    412      1.1  mrg template<insn_code (*INT_CODE) (int, machine_mode)>
    413      1.1  mrg class unspec_based_fused_lane_function : public unspec_based_function_base
    414      1.1  mrg {
    415      1.1  mrg public:
    416      1.1  mrg   CONSTEXPR unspec_based_fused_lane_function (int unspec_for_sint,
    417      1.1  mrg 					      int unspec_for_uint,
    418      1.1  mrg 					      int unspec_for_fp)
    419      1.1  mrg     : unspec_based_function_base (unspec_for_sint, unspec_for_uint,
    420      1.1  mrg 				  unspec_for_fp)
    421      1.1  mrg   {}
    422      1.1  mrg 
    423      1.1  mrg   rtx
    424      1.1  mrg   expand (function_expander &e) const OVERRIDE
    425      1.1  mrg   {
    426      1.1  mrg     int unspec = unspec_for (e);
    427      1.1  mrg     insn_code icode;
    428      1.1  mrg     if (e.type_suffix (0).float_p)
    429      1.1  mrg       {
    430      1.1  mrg 	/* Put the operands in the normal (fma ...) order, with the accumulator
    431      1.1  mrg 	   last.  This fits naturally since that's also the unprinted operand
    432      1.1  mrg 	   in the asm output.  */
    433      1.1  mrg 	e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4);
    434      1.1  mrg 	icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
    435      1.1  mrg       }
    436      1.1  mrg     else
    437      1.1  mrg       icode = INT_CODE (unspec, e.vector_mode (0));
    438      1.1  mrg     return e.use_exact_insn (icode);
    439      1.1  mrg   }
    440      1.1  mrg };
    441      1.1  mrg typedef unspec_based_fused_lane_function<code_for_aarch64_sve_add_lane>
    442      1.1  mrg   unspec_based_mla_lane_function;
    443      1.1  mrg typedef unspec_based_fused_lane_function<code_for_aarch64_sve_sub_lane>
    444      1.1  mrg   unspec_based_mls_lane_function;
    445      1.1  mrg 
    446      1.1  mrg /* A function_base that uses CODE_FOR_MODE (M) to get the associated
    447      1.1  mrg    instruction code, where M is the vector mode associated with type
    448      1.1  mrg    suffix N.  */
    449      1.1  mrg template<insn_code (*CODE_FOR_MODE) (machine_mode), unsigned int N>
    450      1.1  mrg class code_for_mode_function : public function_base
    451      1.1  mrg {
    452      1.1  mrg public:
    453      1.1  mrg   rtx
    454      1.1  mrg   expand (function_expander &e) const OVERRIDE
    455      1.1  mrg   {
    456      1.1  mrg     return e.use_exact_insn (CODE_FOR_MODE (e.vector_mode (N)));
    457      1.1  mrg   }
    458      1.1  mrg };
    459      1.1  mrg 
    460      1.1  mrg /* A function that uses code_for_<PATTERN> (M), where M is the vector
    461      1.1  mrg    mode associated with the first type suffix.  */
    462      1.1  mrg #define CODE_FOR_MODE0(PATTERN) code_for_mode_function<code_for_##PATTERN, 0>
    463      1.1  mrg 
    464      1.1  mrg /* Likewise for the second type suffix.  */
    465      1.1  mrg #define CODE_FOR_MODE1(PATTERN) code_for_mode_function<code_for_##PATTERN, 1>
    466      1.1  mrg 
    467      1.1  mrg /* Like CODE_FOR_MODE0, but the function doesn't raise exceptions when
    468      1.1  mrg    operating on floating-point data.  */
    469      1.1  mrg #define QUIET_CODE_FOR_MODE0(PATTERN) \
    470      1.1  mrg   quiet< code_for_mode_function<code_for_##PATTERN, 0> >
    471      1.1  mrg 
    472      1.1  mrg /* A function_base for functions that always expand to a fixed insn pattern,
    473      1.1  mrg    regardless of what the suffixes are.  */
    474      1.1  mrg class fixed_insn_function : public function_base
    475      1.1  mrg {
    476      1.1  mrg public:
    477      1.1  mrg   CONSTEXPR fixed_insn_function (insn_code code) : m_code (code) {}
    478      1.1  mrg 
    479      1.1  mrg   rtx
    480      1.1  mrg   expand (function_expander &e) const OVERRIDE
    481      1.1  mrg   {
    482      1.1  mrg     return e.use_exact_insn (m_code);
    483      1.1  mrg   }
    484      1.1  mrg 
    485      1.1  mrg   /* The instruction to use.  */
    486      1.1  mrg   insn_code m_code;
    487      1.1  mrg };
    488      1.1  mrg 
    489      1.1  mrg /* A function_base for functions that permute their arguments.  */
    490      1.1  mrg class permute : public quiet<function_base>
    491      1.1  mrg {
    492      1.1  mrg public:
    493      1.1  mrg   /* Fold a unary or binary permute with the permute vector given by
    494      1.1  mrg      BUILDER.  */
    495      1.1  mrg   gimple *
    496      1.1  mrg   fold_permute (const gimple_folder &f, const vec_perm_builder &builder) const
    497      1.1  mrg   {
    498      1.1  mrg     /* Punt for now on _b16 and wider; we'd need more complex evpc logic
    499      1.1  mrg        to rerecognize the result.  */
    500      1.1  mrg     if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
    501      1.1  mrg       return NULL;
    502      1.1  mrg 
    503      1.1  mrg     unsigned int nargs = gimple_call_num_args (f.call);
    504      1.1  mrg     poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
    505      1.1  mrg     vec_perm_indices indices (builder, nargs, nelts);
    506      1.1  mrg     tree perm_type = build_vector_type (ssizetype, nelts);
    507      1.1  mrg     return gimple_build_assign (f.lhs, VEC_PERM_EXPR,
    508      1.1  mrg 				gimple_call_arg (f.call, 0),
    509      1.1  mrg 				gimple_call_arg (f.call, nargs - 1),
    510      1.1  mrg 				vec_perm_indices_to_tree (perm_type, indices));
    511      1.1  mrg   }
    512      1.1  mrg };
    513      1.1  mrg 
    514      1.1  mrg /* A function_base for functions that permute two vectors using a fixed
    515      1.1  mrg    choice of indices.  */
    516      1.1  mrg class binary_permute : public permute
    517      1.1  mrg {
    518      1.1  mrg public:
    519      1.1  mrg   CONSTEXPR binary_permute (int unspec) : m_unspec (unspec) {}
    520      1.1  mrg 
    521      1.1  mrg   rtx
    522      1.1  mrg   expand (function_expander &e) const OVERRIDE
    523      1.1  mrg   {
    524      1.1  mrg     insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0));
    525      1.1  mrg     return e.use_exact_insn (icode);
    526      1.1  mrg   }
    527      1.1  mrg 
    528      1.1  mrg   /* The unspec code associated with the operation.  */
    529      1.1  mrg   int m_unspec;
    530      1.1  mrg };
    531      1.1  mrg 
    532      1.1  mrg /* A function_base for functions that reduce a vector to a scalar.  */
    533      1.1  mrg class reduction : public function_base
    534      1.1  mrg {
    535      1.1  mrg public:
    536      1.1  mrg   CONSTEXPR reduction (int unspec)
    537      1.1  mrg     : m_unspec_for_sint (unspec),
    538      1.1  mrg       m_unspec_for_uint (unspec),
    539      1.1  mrg       m_unspec_for_fp (unspec)
    540      1.1  mrg   {}
    541      1.1  mrg 
    542      1.1  mrg   CONSTEXPR reduction (int unspec_for_sint, int unspec_for_uint,
    543      1.1  mrg 		       int unspec_for_fp)
    544      1.1  mrg     : m_unspec_for_sint (unspec_for_sint),
    545      1.1  mrg       m_unspec_for_uint (unspec_for_uint),
    546      1.1  mrg       m_unspec_for_fp (unspec_for_fp)
    547      1.1  mrg   {}
    548      1.1  mrg 
    549      1.1  mrg   rtx
    550      1.1  mrg   expand (function_expander &e) const OVERRIDE
    551      1.1  mrg   {
    552      1.1  mrg     machine_mode mode = e.vector_mode (0);
    553      1.1  mrg     int unspec = (!e.type_suffix (0).integer_p ? m_unspec_for_fp
    554      1.1  mrg 		  : e.type_suffix (0).unsigned_p ? m_unspec_for_uint
    555      1.1  mrg 		  : m_unspec_for_sint);
    556      1.1  mrg     /* There's no distinction between SADDV and UADDV for 64-bit elements;
    557      1.1  mrg        the signed versions only exist for narrower elements.  */
    558      1.1  mrg     if (GET_MODE_UNIT_BITSIZE (mode) == 64 && unspec == UNSPEC_SADDV)
    559      1.1  mrg       unspec = UNSPEC_UADDV;
    560      1.1  mrg     return e.use_exact_insn (code_for_aarch64_pred_reduc (unspec, mode));
    561      1.1  mrg   }
    562      1.1  mrg 
    563      1.1  mrg   /* The unspec code associated with signed-integer, unsigned-integer
    564      1.1  mrg      and floating-point operations respectively.  */
    565      1.1  mrg   int m_unspec_for_sint;
    566      1.1  mrg   int m_unspec_for_uint;
    567      1.1  mrg   int m_unspec_for_fp;
    568      1.1  mrg };
    569      1.1  mrg 
    570      1.1  mrg /* A function_base for functions that shift narrower-than-64-bit values
    571      1.1  mrg    by 64-bit amounts.  */
    572      1.1  mrg class shift_wide : public function_base
    573      1.1  mrg {
    574      1.1  mrg public:
    575      1.1  mrg   CONSTEXPR shift_wide (rtx_code code, int wide_unspec)
    576      1.1  mrg     : m_code (code), m_wide_unspec (wide_unspec) {}
    577      1.1  mrg 
    578      1.1  mrg   rtx
    579      1.1  mrg   expand (function_expander &e) const OVERRIDE
    580      1.1  mrg   {
    581      1.1  mrg     machine_mode mode = e.vector_mode (0);
    582      1.1  mrg     machine_mode elem_mode = GET_MODE_INNER (mode);
    583      1.1  mrg 
    584      1.1  mrg     /* If the argument is a constant that the normal shifts can handle
    585      1.1  mrg        directly, use them instead.  */
    586      1.1  mrg     rtx shift = unwrap_const_vec_duplicate (e.args.last ());
    587      1.1  mrg     if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT))
    588      1.1  mrg       {
    589      1.1  mrg 	e.args.last () = shift;
    590      1.1  mrg 	return e.map_to_rtx_codes (m_code, m_code, -1);
    591      1.1  mrg       }
    592      1.1  mrg 
    593      1.1  mrg     if (e.pred == PRED_x)
    594      1.1  mrg       return e.use_unpred_insn (code_for_aarch64_sve (m_wide_unspec, mode));
    595      1.1  mrg 
    596      1.1  mrg     return e.use_cond_insn (code_for_cond (m_wide_unspec, mode));
    597      1.1  mrg   }
    598      1.1  mrg 
    599      1.1  mrg   /* The rtx code associated with a "normal" shift.  */
    600      1.1  mrg   rtx_code m_code;
    601      1.1  mrg 
    602      1.1  mrg   /* The unspec code associated with the wide shift.  */
    603      1.1  mrg   int m_wide_unspec;
    604      1.1  mrg };
    605      1.1  mrg 
    606      1.1  mrg /* A function_base for unary functions that count bits.  */
    607      1.1  mrg class unary_count : public quiet<function_base>
    608      1.1  mrg {
    609      1.1  mrg public:
    610      1.1  mrg   CONSTEXPR unary_count (rtx_code code) : m_code (code) {}
    611      1.1  mrg 
    612      1.1  mrg   rtx
    613      1.1  mrg   expand (function_expander &e) const OVERRIDE
    614      1.1  mrg   {
    615      1.1  mrg     /* The md patterns treat the operand as an integer.  */
    616      1.1  mrg     machine_mode mode = aarch64_sve_int_mode (e.vector_mode (0));
    617      1.1  mrg     e.args.last () = gen_lowpart (mode, e.args.last ());
    618      1.1  mrg 
    619      1.1  mrg     if (e.pred == PRED_x)
    620      1.1  mrg       return e.use_pred_x_insn (code_for_aarch64_pred (m_code, mode));
    621      1.1  mrg 
    622      1.1  mrg     return e.use_cond_insn (code_for_cond (m_code, mode));
    623      1.1  mrg   }
    624      1.1  mrg 
    625      1.1  mrg   /* The rtx code associated with the operation.  */
    626      1.1  mrg   rtx_code m_code;
    627      1.1  mrg };
    628      1.1  mrg 
    629      1.1  mrg /* A function_base for svwhile* functions.  */
    630      1.1  mrg class while_comparison : public function_base
    631      1.1  mrg {
    632      1.1  mrg public:
    633      1.1  mrg   CONSTEXPR while_comparison (int unspec_for_sint, int unspec_for_uint)
    634      1.1  mrg     : m_unspec_for_sint (unspec_for_sint),
    635      1.1  mrg       m_unspec_for_uint (unspec_for_uint)
    636      1.1  mrg   {}
    637      1.1  mrg 
    638      1.1  mrg   rtx
    639      1.1  mrg   expand (function_expander &e) const OVERRIDE
    640      1.1  mrg   {
    641      1.1  mrg     /* Suffix 0 determines the predicate mode, suffix 1 determines the
    642      1.1  mrg        scalar mode and signedness.  */
    643      1.1  mrg     int unspec = (e.type_suffix (1).unsigned_p
    644      1.1  mrg 		  ? m_unspec_for_uint
    645      1.1  mrg 		  : m_unspec_for_sint);
    646      1.1  mrg     machine_mode pred_mode = e.vector_mode (0);
    647      1.1  mrg     scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1));
    648      1.1  mrg     return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode));
    649      1.1  mrg   }
    650      1.1  mrg 
    651      1.1  mrg   /* The unspec codes associated with signed and unsigned operations
    652      1.1  mrg      respectively.  */
    653      1.1  mrg   int m_unspec_for_sint;
    654      1.1  mrg   int m_unspec_for_uint;
    655      1.1  mrg };
    656      1.1  mrg 
    657      1.1  mrg }
    658      1.1  mrg 
    659      1.1  mrg /* Declare the global function base NAME, creating it from an instance
    660      1.1  mrg    of class CLASS with constructor arguments ARGS.  */
    661      1.1  mrg #define FUNCTION(NAME, CLASS, ARGS) \
    662      1.1  mrg   namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \
    663      1.1  mrg   namespace functions { const function_base *const NAME = &NAME##_obj; }
    664      1.1  mrg 
    665      1.1  mrg #endif
    666