Home | History | Annotate | Line # | Download | only in gcn
      1  1.1  mrg /* Copyright (C) 2016-2022 Free Software Foundation, Inc.
      2  1.1  mrg 
      3  1.1  mrg    This file is free software; you can redistribute it and/or modify it under
      4  1.1  mrg    the terms of the GNU General Public License as published by the Free
      5  1.1  mrg    Software Foundation; either version 3 of the License, or (at your option)
      6  1.1  mrg    any later version.
      7  1.1  mrg 
      8  1.1  mrg    This file is distributed in the hope that it will be useful, but WITHOUT
      9  1.1  mrg    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     10  1.1  mrg    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     11  1.1  mrg    for more details.
     12  1.1  mrg 
     13  1.1  mrg    You should have received a copy of the GNU General Public License
     14  1.1  mrg    along with GCC; see the file COPYING3.  If not see
     15  1.1  mrg    <http://www.gnu.org/licenses/>.  */
     16  1.1  mrg 
     17  1.1  mrg /* {{{ Includes.  */
     18  1.1  mrg 
     19  1.1  mrg /* We want GET_MODE_SIZE et al to return integers, please.  */
     20  1.1  mrg #define IN_TARGET_CODE 1
     21  1.1  mrg 
     22  1.1  mrg #include "config.h"
     23  1.1  mrg #include "system.h"
     24  1.1  mrg #include "coretypes.h"
     25  1.1  mrg #include "backend.h"
     26  1.1  mrg #include "target.h"
     27  1.1  mrg #include "memmodel.h"
     28  1.1  mrg #include "rtl.h"
     29  1.1  mrg #include "tree.h"
     30  1.1  mrg #include "df.h"
     31  1.1  mrg #include "tm_p.h"
     32  1.1  mrg #include "stringpool.h"
     33  1.1  mrg #include "optabs.h"
     34  1.1  mrg #include "regs.h"
     35  1.1  mrg #include "emit-rtl.h"
     36  1.1  mrg #include "recog.h"
     37  1.1  mrg #include "diagnostic-core.h"
     38  1.1  mrg #include "insn-attr.h"
     39  1.1  mrg #include "fold-const.h"
     40  1.1  mrg #include "calls.h"
     41  1.1  mrg #include "explow.h"
     42  1.1  mrg #include "expr.h"
     43  1.1  mrg #include "output.h"
     44  1.1  mrg #include "cfgrtl.h"
     45  1.1  mrg #include "langhooks.h"
     46  1.1  mrg #include "builtins.h"
     47  1.1  mrg #include "omp-general.h"
     48  1.1  mrg #include "print-rtl.h"
     49  1.1  mrg #include "attribs.h"
     50  1.1  mrg #include "varasm.h"
     51  1.1  mrg #include "intl.h"
     52  1.1  mrg #include "rtl-iter.h"
     53  1.1  mrg #include "dwarf2.h"
     54  1.1  mrg #include "gimple.h"
     55  1.1  mrg 
     56  1.1  mrg /* This file should be included last.  */
     57  1.1  mrg #include "target-def.h"
     58  1.1  mrg 
     59  1.1  mrg /* }}}  */
     60  1.1  mrg /* {{{ Global variables.  */
     61  1.1  mrg 
     62  1.1  mrg /* Constants used by FP instructions.  */
     63  1.1  mrg 
     64  1.1  mrg static REAL_VALUE_TYPE dconst4, dconst1over2pi;
     65  1.1  mrg static bool ext_gcn_constants_init = 0;
     66  1.1  mrg 
     67  1.1  mrg /* Holds the ISA variant, derived from the command line parameters.  */
     68  1.1  mrg 
     69  1.1  mrg int gcn_isa = 3;		/* Default to GCN3.  */
     70  1.1  mrg 
     71  1.1  mrg /* Reserve this much space for LDS (for propagating variables from
     72  1.1  mrg    worker-single mode to worker-partitioned mode), per workgroup.  Global
     73  1.1  mrg    analysis could calculate an exact bound, but we don't do that yet.
     74  1.1  mrg 
     75  1.1  mrg    We want to permit full occupancy, so size accordingly.  */
     76  1.1  mrg 
     77  1.1  mrg /* Use this as a default, but allow it to grow if the user requests a large
     78  1.1  mrg    amount of gang-private shared-memory space.  */
     79  1.1  mrg static int acc_lds_size = 0x600;
     80  1.1  mrg 
     81  1.1  mrg #define OMP_LDS_SIZE 0x600    /* 0x600 is 1/40 total, rounded down.  */
     82  1.1  mrg #define ACC_LDS_SIZE acc_lds_size
     83  1.1  mrg #define OTHER_LDS_SIZE 65536  /* If in doubt, reserve all of it.  */
     84  1.1  mrg 
     85  1.1  mrg #define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \
     86  1.1  mrg 		  : flag_openmp ? OMP_LDS_SIZE \
     87  1.1  mrg 		  : OTHER_LDS_SIZE)
     88  1.1  mrg 
     89  1.1  mrg static int gang_private_hwm = 32;
     90  1.1  mrg static hash_map<tree, int> lds_allocs;
     91  1.1  mrg 
     92  1.1  mrg /* The number of registers usable by normal non-kernel functions.
     93  1.1  mrg    The SGPR count includes any special extra registers such as VCC.  */
     94  1.1  mrg 
     95  1.1  mrg #define MAX_NORMAL_SGPR_COUNT	62  // i.e. 64 with VCC
     96  1.1  mrg #define MAX_NORMAL_VGPR_COUNT	24
     97  1.1  mrg 
     98  1.1  mrg /* }}}  */
     99  1.1  mrg /* {{{ Initialization and options.  */
    100  1.1  mrg 
    101  1.1  mrg /* Initialize machine_function.  */
    102  1.1  mrg 
    103  1.1  mrg static struct machine_function *
    104  1.1  mrg gcn_init_machine_status (void)
    105  1.1  mrg {
    106  1.1  mrg   struct machine_function *f;
    107  1.1  mrg 
    108  1.1  mrg   f = ggc_cleared_alloc<machine_function> ();
    109  1.1  mrg 
    110  1.1  mrg   if (TARGET_GCN3)
    111  1.1  mrg     f->use_flat_addressing = true;
    112  1.1  mrg 
    113  1.1  mrg   return f;
    114  1.1  mrg }
    115  1.1  mrg 
    116  1.1  mrg /* Implement TARGET_OPTION_OVERRIDE.
    117  1.1  mrg 
    118  1.1  mrg    Override option settings where defaults are variable, or we have specific
    119  1.1  mrg    needs to consider.  */
    120  1.1  mrg 
    121  1.1  mrg static void
    122  1.1  mrg gcn_option_override (void)
    123  1.1  mrg {
    124  1.1  mrg   init_machine_status = gcn_init_machine_status;
    125  1.1  mrg 
    126  1.1  mrg   /* The HSA runtime does not respect ELF load addresses, so force PIE.  */
    127  1.1  mrg   if (!flag_pie)
    128  1.1  mrg     flag_pie = 2;
    129  1.1  mrg   if (!flag_pic)
    130  1.1  mrg     flag_pic = flag_pie;
    131  1.1  mrg 
    132  1.1  mrg   gcn_isa = gcn_arch == PROCESSOR_FIJI ? 3 : 5;
    133  1.1  mrg 
    134  1.1  mrg   /* The default stack size needs to be small for offload kernels because
    135  1.1  mrg      there may be many, many threads.  Also, a smaller stack gives a
    136  1.1  mrg      measureable performance boost.  But, a small stack is insufficient
    137  1.1  mrg      for running the testsuite, so we use a larger default for the stand
    138  1.1  mrg      alone case.  */
    139  1.1  mrg   if (stack_size_opt == -1)
    140  1.1  mrg     {
    141  1.1  mrg       if (flag_openacc || flag_openmp)
    142  1.1  mrg 	/* 512 bytes per work item = 32kB total.  */
    143  1.1  mrg 	stack_size_opt = 512 * 64;
    144  1.1  mrg       else
    145  1.1  mrg 	/* 1MB total.  */
    146  1.1  mrg 	stack_size_opt = 1048576;
    147  1.1  mrg     }
    148  1.1  mrg 
    149  1.1  mrg   /* Reserve 1Kb (somewhat arbitrarily) of LDS space for reduction results and
    150  1.1  mrg      worker broadcasts.  */
    151  1.1  mrg   if (gang_private_size_opt == -1)
    152  1.1  mrg     gang_private_size_opt = 512;
    153  1.1  mrg   else if (gang_private_size_opt < gang_private_hwm)
    154  1.1  mrg     gang_private_size_opt = gang_private_hwm;
    155  1.1  mrg   else if (gang_private_size_opt >= acc_lds_size - 1024)
    156  1.1  mrg     {
    157  1.1  mrg       /* We need some space for reductions and worker broadcasting.  If the
    158  1.1  mrg 	 user requests a large amount of gang-private LDS space, we might not
    159  1.1  mrg 	 have enough left for the former.  Increase the LDS allocation in that
    160  1.1  mrg 	 case, although this may reduce the maximum occupancy on the
    161  1.1  mrg 	 hardware.  */
    162  1.1  mrg       acc_lds_size = gang_private_size_opt + 1024;
    163  1.1  mrg       if (acc_lds_size > 32768)
    164  1.1  mrg 	acc_lds_size = 32768;
    165  1.1  mrg     }
    166  1.1  mrg 
    167  1.1  mrg   /* The xnack option is a placeholder, for now.  */
    168  1.1  mrg   if (flag_xnack)
    169  1.1  mrg     sorry ("XNACK support");
    170  1.1  mrg }
    171  1.1  mrg 
    172  1.1  mrg /* }}}  */
    173  1.1  mrg /* {{{ Attributes.  */
    174  1.1  mrg 
    175  1.1  mrg /* This table defines the arguments that are permitted in
    176  1.1  mrg    __attribute__ ((amdgpu_hsa_kernel (...))).
    177  1.1  mrg 
    178  1.1  mrg    The names and values correspond to the HSA metadata that is encoded
    179  1.1  mrg    into the assembler file and binary.  */
    180  1.1  mrg 
    181  1.1  mrg static const struct gcn_kernel_arg_type
    182  1.1  mrg {
    183  1.1  mrg   const char *name;
    184  1.1  mrg   const char *header_pseudo;
    185  1.1  mrg   machine_mode mode;
    186  1.1  mrg 
    187  1.1  mrg   /* This should be set to -1 or -2 for a dynamically allocated register
    188  1.1  mrg      number.  Use -1 if this argument contributes to the user_sgpr_count,
    189  1.1  mrg      -2 otherwise.  */
    190  1.1  mrg   int fixed_regno;
    191  1.1  mrg } gcn_kernel_arg_types[] = {
    192  1.1  mrg   {"exec", NULL, DImode, EXEC_REG},
    193  1.1  mrg #define PRIVATE_SEGMENT_BUFFER_ARG 1
    194  1.1  mrg   {"private_segment_buffer",
    195  1.1  mrg     ".amdhsa_user_sgpr_private_segment_buffer", TImode, -1},
    196  1.1  mrg #define DISPATCH_PTR_ARG 2
    197  1.1  mrg   {"dispatch_ptr", ".amdhsa_user_sgpr_dispatch_ptr", DImode, -1},
    198  1.1  mrg #define QUEUE_PTR_ARG 3
    199  1.1  mrg   {"queue_ptr", ".amdhsa_user_sgpr_queue_ptr", DImode, -1},
    200  1.1  mrg #define KERNARG_SEGMENT_PTR_ARG 4
    201  1.1  mrg   {"kernarg_segment_ptr", ".amdhsa_user_sgpr_kernarg_segment_ptr", DImode, -1},
    202  1.1  mrg   {"dispatch_id", ".amdhsa_user_sgpr_dispatch_id", DImode, -1},
    203  1.1  mrg #define FLAT_SCRATCH_INIT_ARG 6
    204  1.1  mrg   {"flat_scratch_init", ".amdhsa_user_sgpr_flat_scratch_init", DImode, -1},
    205  1.1  mrg #define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7
    206  1.1  mrg   {"private_segment_size", ".amdhsa_user_sgpr_private_segment_size", SImode, -1},
    207  1.1  mrg #define WORKGROUP_ID_X_ARG 8
    208  1.1  mrg   {"workgroup_id_X", ".amdhsa_system_sgpr_workgroup_id_x", SImode, -2},
    209  1.1  mrg   {"workgroup_id_Y", ".amdhsa_system_sgpr_workgroup_id_y", SImode, -2},
    210  1.1  mrg   {"workgroup_id_Z", ".amdhsa_system_sgpr_workgroup_id_z", SImode, -2},
    211  1.1  mrg   {"workgroup_info", ".amdhsa_system_sgpr_workgroup_info", SImode, -1},
    212  1.1  mrg #define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 12
    213  1.1  mrg   {"private_segment_wave_offset",
    214  1.1  mrg     ".amdhsa_system_sgpr_private_segment_wavefront_offset", SImode, -2},
    215  1.1  mrg #define WORK_ITEM_ID_X_ARG 13
    216  1.1  mrg   {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG},
    217  1.1  mrg #define WORK_ITEM_ID_Y_ARG 14
    218  1.1  mrg   {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1},
    219  1.1  mrg #define WORK_ITEM_ID_Z_ARG 15
    220  1.1  mrg   {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
    221  1.1  mrg };
    222  1.1  mrg 
    223  1.1  mrg static const long default_requested_args
    224  1.1  mrg 	= (1 << PRIVATE_SEGMENT_BUFFER_ARG)
    225  1.1  mrg 	  | (1 << DISPATCH_PTR_ARG)
    226  1.1  mrg 	  | (1 << QUEUE_PTR_ARG)
    227  1.1  mrg 	  | (1 << KERNARG_SEGMENT_PTR_ARG)
    228  1.1  mrg 	  | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
    229  1.1  mrg 	  | (1 << WORKGROUP_ID_X_ARG)
    230  1.1  mrg 	  | (1 << WORK_ITEM_ID_X_ARG)
    231  1.1  mrg 	  | (1 << WORK_ITEM_ID_Y_ARG)
    232  1.1  mrg 	  | (1 << WORK_ITEM_ID_Z_ARG);
    233  1.1  mrg 
    234  1.1  mrg /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
    235  1.1  mrg    This function also sets the default values for some arguments.
    236  1.1  mrg 
    237  1.1  mrg    Return true on success, with ARGS populated.  */
    238  1.1  mrg 
    239  1.1  mrg static bool
    240  1.1  mrg gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
    241  1.1  mrg 				       tree list)
    242  1.1  mrg {
    243  1.1  mrg   bool err = false;
    244  1.1  mrg   args->requested = default_requested_args;
    245  1.1  mrg   args->nargs = 0;
    246  1.1  mrg 
    247  1.1  mrg   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
    248  1.1  mrg     args->reg[a] = -1;
    249  1.1  mrg 
    250  1.1  mrg   for (; list; list = TREE_CHAIN (list))
    251  1.1  mrg     {
    252  1.1  mrg       const char *str;
    253  1.1  mrg       if (TREE_CODE (TREE_VALUE (list)) != STRING_CST)
    254  1.1  mrg 	{
    255  1.1  mrg 	  error ("%<amdgpu_hsa_kernel%> attribute requires string constant "
    256  1.1  mrg 		 "arguments");
    257  1.1  mrg 	  break;
    258  1.1  mrg 	}
    259  1.1  mrg       str = TREE_STRING_POINTER (TREE_VALUE (list));
    260  1.1  mrg       int a;
    261  1.1  mrg       for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
    262  1.1  mrg 	{
    263  1.1  mrg 	  if (!strcmp (str, gcn_kernel_arg_types[a].name))
    264  1.1  mrg 	    break;
    265  1.1  mrg 	}
    266  1.1  mrg       if (a == GCN_KERNEL_ARG_TYPES)
    267  1.1  mrg 	{
    268  1.1  mrg 	  error ("unknown specifier %qs in %<amdgpu_hsa_kernel%> attribute",
    269  1.1  mrg 		 str);
    270  1.1  mrg 	  err = true;
    271  1.1  mrg 	  break;
    272  1.1  mrg 	}
    273  1.1  mrg       if (args->requested & (1 << a))
    274  1.1  mrg 	{
    275  1.1  mrg 	  error ("duplicated parameter specifier %qs in %<amdgpu_hsa_kernel%> "
    276  1.1  mrg 		 "attribute", str);
    277  1.1  mrg 	  err = true;
    278  1.1  mrg 	  break;
    279  1.1  mrg 	}
    280  1.1  mrg       args->requested |= (1 << a);
    281  1.1  mrg       args->order[args->nargs++] = a;
    282  1.1  mrg     }
    283  1.1  mrg 
    284  1.1  mrg   /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
    285  1.1  mrg      WORK_ITEM_ID_Y_ARG.  Similarly, requesting WORK_ITEM_ID_Y_ARG implies
    286  1.1  mrg      requesting WORK_ITEM_ID_X_ARG.  */
    287  1.1  mrg   if (args->requested & (1 << WORK_ITEM_ID_Z_ARG))
    288  1.1  mrg     args->requested |= (1 << WORK_ITEM_ID_Y_ARG);
    289  1.1  mrg   if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
    290  1.1  mrg     args->requested |= (1 << WORK_ITEM_ID_X_ARG);
    291  1.1  mrg 
    292  1.1  mrg   int sgpr_regno = FIRST_SGPR_REG;
    293  1.1  mrg   args->nsgprs = 0;
    294  1.1  mrg   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
    295  1.1  mrg     {
    296  1.1  mrg       if (!(args->requested & (1 << a)))
    297  1.1  mrg 	continue;
    298  1.1  mrg 
    299  1.1  mrg       if (gcn_kernel_arg_types[a].fixed_regno >= 0)
    300  1.1  mrg 	args->reg[a] = gcn_kernel_arg_types[a].fixed_regno;
    301  1.1  mrg       else
    302  1.1  mrg 	{
    303  1.1  mrg 	  int reg_count;
    304  1.1  mrg 
    305  1.1  mrg 	  switch (gcn_kernel_arg_types[a].mode)
    306  1.1  mrg 	    {
    307  1.1  mrg 	    case E_SImode:
    308  1.1  mrg 	      reg_count = 1;
    309  1.1  mrg 	      break;
    310  1.1  mrg 	    case E_DImode:
    311  1.1  mrg 	      reg_count = 2;
    312  1.1  mrg 	      break;
    313  1.1  mrg 	    case E_TImode:
    314  1.1  mrg 	      reg_count = 4;
    315  1.1  mrg 	      break;
    316  1.1  mrg 	    default:
    317  1.1  mrg 	      gcc_unreachable ();
    318  1.1  mrg 	    }
    319  1.1  mrg 	  args->reg[a] = sgpr_regno;
    320  1.1  mrg 	  sgpr_regno += reg_count;
    321  1.1  mrg 	  if (gcn_kernel_arg_types[a].fixed_regno == -1)
    322  1.1  mrg 	    args->nsgprs += reg_count;
    323  1.1  mrg 	}
    324  1.1  mrg     }
    325  1.1  mrg   if (sgpr_regno > FIRST_SGPR_REG + 16)
    326  1.1  mrg     {
    327  1.1  mrg       error ("too many arguments passed in sgpr registers");
    328  1.1  mrg     }
    329  1.1  mrg   return err;
    330  1.1  mrg }
    331  1.1  mrg 
    332  1.1  mrg /* Referenced by TARGET_ATTRIBUTE_TABLE.
    333  1.1  mrg 
    334  1.1  mrg    Validates target specific attributes.  */
    335  1.1  mrg 
    336  1.1  mrg static tree
    337  1.1  mrg gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name,
    338  1.1  mrg 					tree args, int, bool *no_add_attrs)
    339  1.1  mrg {
    340  1.1  mrg   if (!FUNC_OR_METHOD_TYPE_P (*node))
    341  1.1  mrg     {
    342  1.1  mrg       warning (OPT_Wattributes, "%qE attribute only applies to functions",
    343  1.1  mrg 	       name);
    344  1.1  mrg       *no_add_attrs = true;
    345  1.1  mrg       return NULL_TREE;
    346  1.1  mrg     }
    347  1.1  mrg 
    348  1.1  mrg   /* Can combine regparm with all attributes but fastcall, and thiscall.  */
    349  1.1  mrg   if (is_attribute_p ("gcnhsa_kernel", name))
    350  1.1  mrg     {
    351  1.1  mrg       struct gcn_kernel_args kernelarg;
    352  1.1  mrg 
    353  1.1  mrg       if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args))
    354  1.1  mrg 	*no_add_attrs = true;
    355  1.1  mrg 
    356  1.1  mrg       return NULL_TREE;
    357  1.1  mrg     }
    358  1.1  mrg 
    359  1.1  mrg   return NULL_TREE;
    360  1.1  mrg }
    361  1.1  mrg 
    362  1.1  mrg /* Implement TARGET_ATTRIBUTE_TABLE.
    363  1.1  mrg 
    364  1.1  mrg    Create target-specific __attribute__ types.  */
    365  1.1  mrg 
    366  1.1  mrg static const struct attribute_spec gcn_attribute_table[] = {
    367  1.1  mrg   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
    368  1.1  mrg      affects_type_identity } */
    369  1.1  mrg   {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true,
    370  1.1  mrg    true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL},
    371  1.1  mrg   /* End element.  */
    372  1.1  mrg   {NULL, 0, 0, false, false, false, false, NULL, NULL}
    373  1.1  mrg };
    374  1.1  mrg 
    375  1.1  mrg /* }}}  */
    376  1.1  mrg /* {{{ Registers and modes.  */
    377  1.1  mrg 
    378  1.1  mrg /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
    379  1.1  mrg 
    380  1.1  mrg bool
    381  1.1  mrg gcn_scalar_mode_supported_p (scalar_mode mode)
    382  1.1  mrg {
    383  1.1  mrg   return (mode == BImode
    384  1.1  mrg 	  || mode == QImode
    385  1.1  mrg 	  || mode == HImode /* || mode == HFmode  */
    386  1.1  mrg 	  || mode == SImode || mode == SFmode
    387  1.1  mrg 	  || mode == DImode || mode == DFmode
    388  1.1  mrg 	  || mode == TImode);
    389  1.1  mrg }
    390  1.1  mrg 
    391  1.1  mrg /* Implement TARGET_CLASS_MAX_NREGS.
    392  1.1  mrg 
    393  1.1  mrg    Return the number of hard registers needed to hold a value of MODE in
    394  1.1  mrg    a register of class RCLASS.  */
    395  1.1  mrg 
    396  1.1  mrg static unsigned char
    397  1.1  mrg gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
    398  1.1  mrg {
    399  1.1  mrg   /* Scalar registers are 32bit, vector registers are in fact tuples of
    400  1.1  mrg      64 lanes.  */
    401  1.1  mrg   if (rclass == VGPR_REGS)
    402  1.1  mrg     {
    403  1.1  mrg       if (vgpr_1reg_mode_p (mode))
    404  1.1  mrg 	return 1;
    405  1.1  mrg       if (vgpr_2reg_mode_p (mode))
    406  1.1  mrg 	return 2;
    407  1.1  mrg       /* TImode is used by DImode compare_and_swap.  */
    408  1.1  mrg       if (mode == TImode)
    409  1.1  mrg 	return 4;
    410  1.1  mrg     }
    411  1.1  mrg   else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
    412  1.1  mrg     return 2;
    413  1.1  mrg   return CEIL (GET_MODE_SIZE (mode), 4);
    414  1.1  mrg }
    415  1.1  mrg 
    416  1.1  mrg /* Implement TARGET_HARD_REGNO_NREGS.
    417  1.1  mrg 
    418  1.1  mrg    Return the number of hard registers needed to hold a value of MODE in
    419  1.1  mrg    REGNO.  */
    420  1.1  mrg 
    421  1.1  mrg unsigned int
    422  1.1  mrg gcn_hard_regno_nregs (unsigned int regno, machine_mode mode)
    423  1.1  mrg {
    424  1.1  mrg   return gcn_class_max_nregs (REGNO_REG_CLASS (regno), mode);
    425  1.1  mrg }
    426  1.1  mrg 
    427  1.1  mrg /* Implement TARGET_HARD_REGNO_MODE_OK.
    428  1.1  mrg 
    429  1.1  mrg    Return true if REGNO can hold value in MODE.  */
    430  1.1  mrg 
    431  1.1  mrg bool
    432  1.1  mrg gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    433  1.1  mrg {
    434  1.1  mrg   /* Treat a complex mode as if it were a scalar mode of the same overall
    435  1.1  mrg      size for the purposes of allocating hard registers.  */
    436  1.1  mrg   if (COMPLEX_MODE_P (mode))
    437  1.1  mrg     switch (mode)
    438  1.1  mrg       {
    439  1.1  mrg       case E_CQImode:
    440  1.1  mrg       case E_CHImode:
    441  1.1  mrg 	mode = SImode;
    442  1.1  mrg 	break;
    443  1.1  mrg       case E_CSImode:
    444  1.1  mrg 	mode = DImode;
    445  1.1  mrg 	break;
    446  1.1  mrg       case E_CDImode:
    447  1.1  mrg 	mode = TImode;
    448  1.1  mrg 	break;
    449  1.1  mrg       case E_HCmode:
    450  1.1  mrg 	mode = SFmode;
    451  1.1  mrg 	break;
    452  1.1  mrg       case E_SCmode:
    453  1.1  mrg 	mode = DFmode;
    454  1.1  mrg 	break;
    455  1.1  mrg       default:
    456  1.1  mrg 	/* Not supported.  */
    457  1.1  mrg 	return false;
    458  1.1  mrg       }
    459  1.1  mrg 
    460  1.1  mrg   switch (regno)
    461  1.1  mrg     {
    462  1.1  mrg     case FLAT_SCRATCH_LO_REG:
    463  1.1  mrg     case XNACK_MASK_LO_REG:
    464  1.1  mrg     case TBA_LO_REG:
    465  1.1  mrg     case TMA_LO_REG:
    466  1.1  mrg       return (mode == SImode || mode == DImode);
    467  1.1  mrg     case VCC_LO_REG:
    468  1.1  mrg     case EXEC_LO_REG:
    469  1.1  mrg       return (mode == BImode || mode == SImode || mode == DImode);
    470  1.1  mrg     case M0_REG:
    471  1.1  mrg     case FLAT_SCRATCH_HI_REG:
    472  1.1  mrg     case XNACK_MASK_HI_REG:
    473  1.1  mrg     case TBA_HI_REG:
    474  1.1  mrg     case TMA_HI_REG:
    475  1.1  mrg       return mode == SImode;
    476  1.1  mrg     case VCC_HI_REG:
    477  1.1  mrg       return false;
    478  1.1  mrg     case EXEC_HI_REG:
    479  1.1  mrg       return mode == SImode /*|| mode == V32BImode */ ;
    480  1.1  mrg     case SCC_REG:
    481  1.1  mrg     case VCCZ_REG:
    482  1.1  mrg     case EXECZ_REG:
    483  1.1  mrg       return mode == BImode;
    484  1.1  mrg     }
    485  1.1  mrg   if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
    486  1.1  mrg     return true;
    487  1.1  mrg   if (SGPR_REGNO_P (regno))
    488  1.1  mrg     /* We restrict double register values to aligned registers.  */
    489  1.1  mrg     return (sgpr_1reg_mode_p (mode)
    490  1.1  mrg 	    || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
    491  1.1  mrg 	    || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
    492  1.1  mrg   if (VGPR_REGNO_P (regno))
    493  1.1  mrg     /* Vector instructions do not care about the alignment of register
    494  1.1  mrg        pairs, but where there is no 64-bit instruction, many of the
    495  1.1  mrg        define_split do not work if the input and output registers partially
    496  1.1  mrg        overlap.  We tried to fix this with early clobber and match
    497  1.1  mrg        constraints, but it was bug prone, added complexity, and conflicts
    498  1.1  mrg        with the 'U0' constraints on vec_merge.
    499  1.1  mrg        Therefore, we restrict ourselved to aligned registers.  */
    500  1.1  mrg     return (vgpr_1reg_mode_p (mode)
    501  1.1  mrg 	    || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
    502  1.1  mrg 	    /* TImode is used by DImode compare_and_swap.  */
    503  1.1  mrg 	    || (mode == TImode
    504  1.1  mrg 		&& !((regno - FIRST_VGPR_REG) & 3)));
    505  1.1  mrg   return false;
    506  1.1  mrg }
    507  1.1  mrg 
    508  1.1  mrg /* Implement REGNO_REG_CLASS via gcn.h.
    509  1.1  mrg 
    510  1.1  mrg    Return smallest class containing REGNO.  */
    511  1.1  mrg 
    512  1.1  mrg enum reg_class
    513  1.1  mrg gcn_regno_reg_class (int regno)
    514  1.1  mrg {
    515  1.1  mrg   switch (regno)
    516  1.1  mrg     {
    517  1.1  mrg     case SCC_REG:
    518  1.1  mrg       return SCC_CONDITIONAL_REG;
    519  1.1  mrg     case VCC_LO_REG:
    520  1.1  mrg     case VCC_HI_REG:
    521  1.1  mrg       return VCC_CONDITIONAL_REG;
    522  1.1  mrg     case VCCZ_REG:
    523  1.1  mrg       return VCCZ_CONDITIONAL_REG;
    524  1.1  mrg     case EXECZ_REG:
    525  1.1  mrg       return EXECZ_CONDITIONAL_REG;
    526  1.1  mrg     case EXEC_LO_REG:
    527  1.1  mrg     case EXEC_HI_REG:
    528  1.1  mrg       return EXEC_MASK_REG;
    529  1.1  mrg     }
    530  1.1  mrg   if (VGPR_REGNO_P (regno))
    531  1.1  mrg     return VGPR_REGS;
    532  1.1  mrg   if (SGPR_REGNO_P (regno))
    533  1.1  mrg     return SGPR_REGS;
    534  1.1  mrg   if (regno < FIRST_VGPR_REG)
    535  1.1  mrg     return GENERAL_REGS;
    536  1.1  mrg   if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
    537  1.1  mrg     return AFP_REGS;
    538  1.1  mrg   return ALL_REGS;
    539  1.1  mrg }
    540  1.1  mrg 
    541  1.1  mrg /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
    542  1.1  mrg 
    543  1.1  mrg    GCC assumes that lowpart contains first part of value as stored in memory.
    544  1.1  mrg    This is not the case for vector registers.  */
    545  1.1  mrg 
    546  1.1  mrg bool
    547  1.1  mrg gcn_can_change_mode_class (machine_mode from, machine_mode to,
    548  1.1  mrg 			   reg_class_t regclass)
    549  1.1  mrg {
    550  1.1  mrg   if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
    551  1.1  mrg     return true;
    552  1.1  mrg   return (gcn_class_max_nregs (regclass, from)
    553  1.1  mrg 	  == gcn_class_max_nregs (regclass, to));
    554  1.1  mrg }
    555  1.1  mrg 
    556  1.1  mrg /* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P.
    557  1.1  mrg 
    558  1.1  mrg    When this hook returns true for MODE, the compiler allows
    559  1.1  mrg    registers explicitly used in the rtl to be used as spill registers
    560  1.1  mrg    but prevents the compiler from extending the lifetime of these
    561  1.1  mrg    registers.  */
    562  1.1  mrg 
    563  1.1  mrg bool
    564  1.1  mrg gcn_small_register_classes_for_mode_p (machine_mode mode)
    565  1.1  mrg {
    566  1.1  mrg   /* We allocate into exec and vcc regs.  Those make small register class.  */
    567  1.1  mrg   return mode == DImode || mode == SImode;
    568  1.1  mrg }
    569  1.1  mrg 
    570  1.1  mrg /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
    571  1.1  mrg 
    572  1.1  mrg    Returns true if pseudos that have been assigned to registers of class RCLASS
    573  1.1  mrg    would likely be spilled because registers of RCLASS are needed for spill
    574  1.1  mrg    registers.  */
    575  1.1  mrg 
    576  1.1  mrg static bool
    577  1.1  mrg gcn_class_likely_spilled_p (reg_class_t rclass)
    578  1.1  mrg {
    579  1.1  mrg   return (rclass == EXEC_MASK_REG
    580  1.1  mrg 	  || reg_classes_intersect_p (ALL_CONDITIONAL_REGS, rclass));
    581  1.1  mrg }
    582  1.1  mrg 
    583  1.1  mrg /* Implement TARGET_MODES_TIEABLE_P.
    584  1.1  mrg 
    585  1.1  mrg    Returns true if a value of MODE1 is accessible in MODE2 without
    586  1.1  mrg    copying.  */
    587  1.1  mrg 
    588  1.1  mrg bool
    589  1.1  mrg gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
    590  1.1  mrg {
    591  1.1  mrg   return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
    592  1.1  mrg 	  && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
    593  1.1  mrg }
    594  1.1  mrg 
    595  1.1  mrg /* Implement TARGET_TRULY_NOOP_TRUNCATION.
    596  1.1  mrg 
    597  1.1  mrg    Returns true if it is safe to convert a value of INPREC bits to one of
    598  1.1  mrg    OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on
    599  1.1  mrg    it as if it had only OUTPREC bits.  */
    600  1.1  mrg 
    601  1.1  mrg bool
    602  1.1  mrg gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
    603  1.1  mrg {
    604  1.1  mrg   return ((inprec <= 32) && (outprec <= inprec));
    605  1.1  mrg }
    606  1.1  mrg 
    607  1.1  mrg /* Return N-th part of value occupying multiple registers.  */
    608  1.1  mrg 
    609  1.1  mrg rtx
    610  1.1  mrg gcn_operand_part (machine_mode mode, rtx op, int n)
    611  1.1  mrg {
    612  1.1  mrg   if (GET_MODE_SIZE (mode) >= 256)
    613  1.1  mrg     {
    614  1.1  mrg       /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0);  */
    615  1.1  mrg 
    616  1.1  mrg       if (REG_P (op))
    617  1.1  mrg 	{
    618  1.1  mrg 	  gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
    619  1.1  mrg 	  return gen_rtx_REG (V64SImode, REGNO (op) + n);
    620  1.1  mrg 	}
    621  1.1  mrg       if (GET_CODE (op) == CONST_VECTOR)
    622  1.1  mrg 	{
    623  1.1  mrg 	  int units = GET_MODE_NUNITS (mode);
    624  1.1  mrg 	  rtvec v = rtvec_alloc (units);
    625  1.1  mrg 
    626  1.1  mrg 	  for (int i = 0; i < units; ++i)
    627  1.1  mrg 	    RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
    628  1.1  mrg 						 CONST_VECTOR_ELT (op, i), n);
    629  1.1  mrg 
    630  1.1  mrg 	  return gen_rtx_CONST_VECTOR (V64SImode, v);
    631  1.1  mrg 	}
    632  1.1  mrg       if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
    633  1.1  mrg 	return gcn_gen_undef (V64SImode);
    634  1.1  mrg       gcc_unreachable ();
    635  1.1  mrg     }
    636  1.1  mrg   else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
    637  1.1  mrg     {
    638  1.1  mrg       gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
    639  1.1  mrg       return gen_rtx_REG (SImode, REGNO (op) + n);
    640  1.1  mrg     }
    641  1.1  mrg   else
    642  1.1  mrg     {
    643  1.1  mrg       if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
    644  1.1  mrg 	return gcn_gen_undef (SImode);
    645  1.1  mrg 
    646  1.1  mrg       /* If it's a constant then let's assume it is of the largest mode
    647  1.1  mrg 	 available, otherwise simplify_gen_subreg will fail.  */
    648  1.1  mrg       if (mode == VOIDmode && CONST_INT_P (op))
    649  1.1  mrg 	mode = DImode;
    650  1.1  mrg       return simplify_gen_subreg (SImode, op, mode, n * 4);
    651  1.1  mrg     }
    652  1.1  mrg }
    653  1.1  mrg 
    654  1.1  mrg /* Return N-th part of value occupying multiple registers.  */
    655  1.1  mrg 
    656  1.1  mrg rtx
    657  1.1  mrg gcn_operand_doublepart (machine_mode mode, rtx op, int n)
    658  1.1  mrg {
    659  1.1  mrg   return simplify_gen_subreg (DImode, op, mode, n * 8);
    660  1.1  mrg }
    661  1.1  mrg 
    662  1.1  mrg /* Return true if OP can be split into subregs or high/low parts.
    663  1.1  mrg    This is always true for scalars, but not normally true for vectors.
    664  1.1  mrg    However, for vectors in hardregs we can use the low and high registers.  */
    665  1.1  mrg 
    666  1.1  mrg bool
    667  1.1  mrg gcn_can_split_p (machine_mode, rtx op)
    668  1.1  mrg {
    669  1.1  mrg   if (vgpr_vector_mode_p (GET_MODE (op)))
    670  1.1  mrg     {
    671  1.1  mrg       if (GET_CODE (op) == SUBREG)
    672  1.1  mrg 	op = SUBREG_REG (op);
    673  1.1  mrg       if (!REG_P (op))
    674  1.1  mrg 	return true;
    675  1.1  mrg       return REGNO (op) <= FIRST_PSEUDO_REGISTER;
    676  1.1  mrg     }
    677  1.1  mrg   return true;
    678  1.1  mrg }
    679  1.1  mrg 
    680  1.1  mrg /* Implement TARGET_SPILL_CLASS.
    681  1.1  mrg 
    682  1.1  mrg    Return class of registers which could be used for pseudo of MODE
    683  1.1  mrg    and of class RCLASS for spilling instead of memory.  Return NO_REGS
    684  1.1  mrg    if it is not possible or non-profitable.  */
    685  1.1  mrg 
    686  1.1  mrg static reg_class_t
    687  1.1  mrg gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
    688  1.1  mrg {
    689  1.1  mrg   if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c)
    690  1.1  mrg       || c == VCC_CONDITIONAL_REG)
    691  1.1  mrg     return SGPR_REGS;
    692  1.1  mrg   else
    693  1.1  mrg     return NO_REGS;
    694  1.1  mrg }
    695  1.1  mrg 
    696  1.1  mrg /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
    697  1.1  mrg 
    698  1.1  mrg    Change allocno class for given pseudo from allocno and best class
    699  1.1  mrg    calculated by IRA.  */
    700  1.1  mrg 
    701  1.1  mrg static reg_class_t
    702  1.1  mrg gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl,
    703  1.1  mrg 				     reg_class_t best_cl)
    704  1.1  mrg {
    705  1.1  mrg   /* Avoid returning classes that contain both vgpr and sgpr registers.  */
    706  1.1  mrg   if (cl != ALL_REGS && cl != SRCDST_REGS && cl != ALL_GPR_REGS)
    707  1.1  mrg     return cl;
    708  1.1  mrg   if (best_cl != ALL_REGS && best_cl != SRCDST_REGS
    709  1.1  mrg       && best_cl != ALL_GPR_REGS)
    710  1.1  mrg     return best_cl;
    711  1.1  mrg 
    712  1.1  mrg   machine_mode mode = PSEUDO_REGNO_MODE (regno);
    713  1.1  mrg   if (vgpr_vector_mode_p (mode))
    714  1.1  mrg     return VGPR_REGS;
    715  1.1  mrg 
    716  1.1  mrg   return GENERAL_REGS;
    717  1.1  mrg }
    718  1.1  mrg 
    719  1.1  mrg /* Create a new DImode pseudo reg and emit an instruction to initialize
    720  1.1  mrg    it to VAL.  */
    721  1.1  mrg 
    722  1.1  mrg static rtx
    723  1.1  mrg get_exec (int64_t val)
    724  1.1  mrg {
    725  1.1  mrg   rtx reg = gen_reg_rtx (DImode);
    726  1.1  mrg   emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode)));
    727  1.1  mrg   return reg;
    728  1.1  mrg }
    729  1.1  mrg 
    730  1.1  mrg /* Return value of scalar exec register.  */
    731  1.1  mrg 
    732  1.1  mrg rtx
    733  1.1  mrg gcn_scalar_exec ()
    734  1.1  mrg {
    735  1.1  mrg   return const1_rtx;
    736  1.1  mrg }
    737  1.1  mrg 
    738  1.1  mrg /* Return pseudo holding scalar exec register.  */
    739  1.1  mrg 
    740  1.1  mrg rtx
    741  1.1  mrg gcn_scalar_exec_reg ()
    742  1.1  mrg {
    743  1.1  mrg   return get_exec (1);
    744  1.1  mrg }
    745  1.1  mrg 
    746  1.1  mrg /* Return value of full exec register.  */
    747  1.1  mrg 
    748  1.1  mrg rtx
    749  1.1  mrg gcn_full_exec ()
    750  1.1  mrg {
    751  1.1  mrg   return constm1_rtx;
    752  1.1  mrg }
    753  1.1  mrg 
    754  1.1  mrg /* Return pseudo holding full exec register.  */
    755  1.1  mrg 
    756  1.1  mrg rtx
    757  1.1  mrg gcn_full_exec_reg ()
    758  1.1  mrg {
    759  1.1  mrg   return get_exec (-1);
    760  1.1  mrg }
    761  1.1  mrg 
    762  1.1  mrg /* }}}  */
    763  1.1  mrg /* {{{ Immediate constants.  */
    764  1.1  mrg 
    765  1.1  mrg /* Initialize shared numeric constants.  */
    766  1.1  mrg 
    767  1.1  mrg static void
    768  1.1  mrg init_ext_gcn_constants (void)
    769  1.1  mrg {
    770  1.1  mrg   real_from_integer (&dconst4, DFmode, 4, SIGNED);
    771  1.1  mrg 
    772  1.1  mrg   /* FIXME: this constant probably does not match what hardware really loads.
    773  1.1  mrg      Reality check it eventually.  */
    774  1.1  mrg   real_from_string (&dconst1over2pi,
    775  1.1  mrg 		    "0.1591549430918953357663423455968866839");
    776  1.1  mrg   real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
    777  1.1  mrg 
    778  1.1  mrg   ext_gcn_constants_init = 1;
    779  1.1  mrg }
    780  1.1  mrg 
    781  1.1  mrg /* Return non-zero if X is a constant that can appear as an inline operand.
    782  1.1  mrg    This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
    783  1.1  mrg    Or a vector of those.
    784  1.1  mrg    The value returned should be the encoding of this constant.  */
    785  1.1  mrg 
    786  1.1  mrg int
    787  1.1  mrg gcn_inline_fp_constant_p (rtx x, bool allow_vector)
    788  1.1  mrg {
    789  1.1  mrg   machine_mode mode = GET_MODE (x);
    790  1.1  mrg 
    791  1.1  mrg   if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
    792  1.1  mrg       && allow_vector)
    793  1.1  mrg     {
    794  1.1  mrg       int n;
    795  1.1  mrg       if (GET_CODE (x) != CONST_VECTOR)
    796  1.1  mrg 	return 0;
    797  1.1  mrg       n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
    798  1.1  mrg       if (!n)
    799  1.1  mrg 	return 0;
    800  1.1  mrg       for (int i = 1; i < 64; i++)
    801  1.1  mrg 	if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
    802  1.1  mrg 	  return 0;
    803  1.1  mrg       return 1;
    804  1.1  mrg     }
    805  1.1  mrg 
    806  1.1  mrg   if (mode != HFmode && mode != SFmode && mode != DFmode)
    807  1.1  mrg     return 0;
    808  1.1  mrg 
    809  1.1  mrg   const REAL_VALUE_TYPE *r;
    810  1.1  mrg 
    811  1.1  mrg   if (x == CONST0_RTX (mode))
    812  1.1  mrg     return 128;
    813  1.1  mrg   if (x == CONST1_RTX (mode))
    814  1.1  mrg     return 242;
    815  1.1  mrg 
    816  1.1  mrg   r = CONST_DOUBLE_REAL_VALUE (x);
    817  1.1  mrg 
    818  1.1  mrg   if (real_identical (r, &dconstm1))
    819  1.1  mrg     return 243;
    820  1.1  mrg 
    821  1.1  mrg   if (real_identical (r, &dconsthalf))
    822  1.1  mrg     return 240;
    823  1.1  mrg   if (real_identical (r, &dconstm1))
    824  1.1  mrg     return 243;
    825  1.1  mrg   if (real_identical (r, &dconst2))
    826  1.1  mrg     return 244;
    827  1.1  mrg   if (real_identical (r, &dconst4))
    828  1.1  mrg     return 246;
    829  1.1  mrg   if (real_identical (r, &dconst1over2pi))
    830  1.1  mrg     return 248;
    831  1.1  mrg   if (!ext_gcn_constants_init)
    832  1.1  mrg     init_ext_gcn_constants ();
    833  1.1  mrg   real_value_negate (r);
    834  1.1  mrg   if (real_identical (r, &dconsthalf))
    835  1.1  mrg     return 241;
    836  1.1  mrg   if (real_identical (r, &dconst2))
    837  1.1  mrg     return 245;
    838  1.1  mrg   if (real_identical (r, &dconst4))
    839  1.1  mrg     return 247;
    840  1.1  mrg 
    841  1.1  mrg   /* FIXME: add 4, -4 and 1/(2*PI).  */
    842  1.1  mrg 
    843  1.1  mrg   return 0;
    844  1.1  mrg }
    845  1.1  mrg 
    846  1.1  mrg /* Return non-zero if X is a constant that can appear as an immediate operand.
    847  1.1  mrg    This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
    848  1.1  mrg    Or a vector of those.
    849  1.1  mrg    The value returned should be the encoding of this constant.  */
    850  1.1  mrg 
    851  1.1  mrg bool
    852  1.1  mrg gcn_fp_constant_p (rtx x, bool allow_vector)
    853  1.1  mrg {
    854  1.1  mrg   machine_mode mode = GET_MODE (x);
    855  1.1  mrg 
    856  1.1  mrg   if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
    857  1.1  mrg       && allow_vector)
    858  1.1  mrg     {
    859  1.1  mrg       int n;
    860  1.1  mrg       if (GET_CODE (x) != CONST_VECTOR)
    861  1.1  mrg 	return false;
    862  1.1  mrg       n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
    863  1.1  mrg       if (!n)
    864  1.1  mrg 	return false;
    865  1.1  mrg       for (int i = 1; i < 64; i++)
    866  1.1  mrg 	if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
    867  1.1  mrg 	  return false;
    868  1.1  mrg       return true;
    869  1.1  mrg     }
    870  1.1  mrg   if (mode != HFmode && mode != SFmode && mode != DFmode)
    871  1.1  mrg     return false;
    872  1.1  mrg 
    873  1.1  mrg   if (gcn_inline_fp_constant_p (x, false))
    874  1.1  mrg     return true;
    875  1.1  mrg   /* FIXME: It is not clear how 32bit immediates are interpreted here.  */
    876  1.1  mrg   return (mode != DFmode);
    877  1.1  mrg }
    878  1.1  mrg 
    879  1.1  mrg /* Return true if X is a constant representable as an inline immediate
    880  1.1  mrg    constant in a 32-bit instruction encoding.  */
    881  1.1  mrg 
    882  1.1  mrg bool
    883  1.1  mrg gcn_inline_constant_p (rtx x)
    884  1.1  mrg {
    885  1.1  mrg   if (GET_CODE (x) == CONST_INT)
    886  1.1  mrg     return INTVAL (x) >= -16 && INTVAL (x) <= 64;
    887  1.1  mrg   if (GET_CODE (x) == CONST_DOUBLE)
    888  1.1  mrg     return gcn_inline_fp_constant_p (x, false);
    889  1.1  mrg   if (GET_CODE (x) == CONST_VECTOR)
    890  1.1  mrg     {
    891  1.1  mrg       int n;
    892  1.1  mrg       if (!vgpr_vector_mode_p (GET_MODE (x)))
    893  1.1  mrg 	return false;
    894  1.1  mrg       n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0));
    895  1.1  mrg       if (!n)
    896  1.1  mrg 	return false;
    897  1.1  mrg       for (int i = 1; i < 64; i++)
    898  1.1  mrg 	if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
    899  1.1  mrg 	  return false;
    900  1.1  mrg       return 1;
    901  1.1  mrg     }
    902  1.1  mrg   return false;
    903  1.1  mrg }
    904  1.1  mrg 
    905  1.1  mrg /* Return true if X is a constant representable as an immediate constant
    906  1.1  mrg    in a 32 or 64-bit instruction encoding.  */
    907  1.1  mrg 
    908  1.1  mrg bool
    909  1.1  mrg gcn_constant_p (rtx x)
    910  1.1  mrg {
    911  1.1  mrg   switch (GET_CODE (x))
    912  1.1  mrg     {
    913  1.1  mrg     case CONST_INT:
    914  1.1  mrg       return true;
    915  1.1  mrg 
    916  1.1  mrg     case CONST_DOUBLE:
    917  1.1  mrg       return gcn_fp_constant_p (x, false);
    918  1.1  mrg 
    919  1.1  mrg     case CONST_VECTOR:
    920  1.1  mrg       {
    921  1.1  mrg 	int n;
    922  1.1  mrg 	if (!vgpr_vector_mode_p (GET_MODE (x)))
    923  1.1  mrg 	  return false;
    924  1.1  mrg 	n = gcn_constant_p (CONST_VECTOR_ELT (x, 0));
    925  1.1  mrg 	if (!n)
    926  1.1  mrg 	  return false;
    927  1.1  mrg 	for (int i = 1; i < 64; i++)
    928  1.1  mrg 	  if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
    929  1.1  mrg 	    return false;
    930  1.1  mrg 	return true;
    931  1.1  mrg       }
    932  1.1  mrg 
    933  1.1  mrg     case SYMBOL_REF:
    934  1.1  mrg     case LABEL_REF:
    935  1.1  mrg       return true;
    936  1.1  mrg 
    937  1.1  mrg     default:
    938  1.1  mrg       ;
    939  1.1  mrg     }
    940  1.1  mrg 
    941  1.1  mrg   return false;
    942  1.1  mrg }
    943  1.1  mrg 
    944  1.1  mrg /* Return true if X is a constant representable as two inline immediate
    945  1.1  mrg    constants in a 64-bit instruction that is split into two 32-bit
    946  1.1  mrg    instructions.
    947  1.1  mrg    When MIXED is set, the low-part is permitted to use the full 32-bits.  */
    948  1.1  mrg 
    949  1.1  mrg bool
    950  1.1  mrg gcn_inline_constant64_p (rtx x, bool mixed)
    951  1.1  mrg {
    952  1.1  mrg   if (GET_CODE (x) == CONST_VECTOR)
    953  1.1  mrg     {
    954  1.1  mrg       if (!vgpr_vector_mode_p (GET_MODE (x)))
    955  1.1  mrg 	return false;
    956  1.1  mrg       if (!gcn_inline_constant64_p (CONST_VECTOR_ELT (x, 0), mixed))
    957  1.1  mrg 	return false;
    958  1.1  mrg       for (int i = 1; i < 64; i++)
    959  1.1  mrg 	if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
    960  1.1  mrg 	  return false;
    961  1.1  mrg 
    962  1.1  mrg       return true;
    963  1.1  mrg     }
    964  1.1  mrg 
    965  1.1  mrg   if (GET_CODE (x) != CONST_INT)
    966  1.1  mrg     return false;
    967  1.1  mrg 
    968  1.1  mrg   rtx val_lo = gcn_operand_part (DImode, x, 0);
    969  1.1  mrg   rtx val_hi = gcn_operand_part (DImode, x, 1);
    970  1.1  mrg   return ((mixed || gcn_inline_constant_p (val_lo))
    971  1.1  mrg 	  && gcn_inline_constant_p (val_hi));
    972  1.1  mrg }
    973  1.1  mrg 
    974  1.1  mrg /* Return true if X is a constant representable as an immediate constant
    975  1.1  mrg    in a 32 or 64-bit instruction encoding where the hardware will
    976  1.1  mrg    extend the immediate to 64-bits.  */
    977  1.1  mrg 
    978  1.1  mrg bool
    979  1.1  mrg gcn_constant64_p (rtx x)
    980  1.1  mrg {
    981  1.1  mrg   if (!gcn_constant_p (x))
    982  1.1  mrg     return false;
    983  1.1  mrg 
    984  1.1  mrg   if (GET_CODE (x) != CONST_INT)
    985  1.1  mrg     return true;
    986  1.1  mrg 
    987  1.1  mrg   /* Negative numbers are only allowed if they can be encoded within src0,
    988  1.1  mrg      because the 32-bit immediates do not get sign-extended.
    989  1.1  mrg      Unsigned numbers must not be encodable as 32-bit -1..-16, because the
    990  1.1  mrg      assembler will use a src0 inline immediate and that will get
    991  1.1  mrg      sign-extended.  */
    992  1.1  mrg   HOST_WIDE_INT val = INTVAL (x);
    993  1.1  mrg   return (((val & 0xffffffff) == val	/* Positive 32-bit.  */
    994  1.1  mrg 	   && (val & 0xfffffff0) != 0xfffffff0)	/* Not -1..-16.  */
    995  1.1  mrg 	  || gcn_inline_constant_p (x));	/* Src0.  */
    996  1.1  mrg }
    997  1.1  mrg 
    998  1.1  mrg /* Implement TARGET_LEGITIMATE_CONSTANT_P.
    999  1.1  mrg 
   1000  1.1  mrg    Returns true if X is a legitimate constant for a MODE immediate operand.  */
   1001  1.1  mrg 
   1002  1.1  mrg bool
   1003  1.1  mrg gcn_legitimate_constant_p (machine_mode, rtx x)
   1004  1.1  mrg {
   1005  1.1  mrg   return gcn_constant_p (x);
   1006  1.1  mrg }
   1007  1.1  mrg 
   1008  1.1  mrg /* Return true if X is a CONST_VECTOR of single constant.  */
   1009  1.1  mrg 
   1010  1.1  mrg static bool
   1011  1.1  mrg single_cst_vector_p (rtx x)
   1012  1.1  mrg {
   1013  1.1  mrg   if (GET_CODE (x) != CONST_VECTOR)
   1014  1.1  mrg     return false;
   1015  1.1  mrg   for (int i = 1; i < 64; i++)
   1016  1.1  mrg     if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
   1017  1.1  mrg       return false;
   1018  1.1  mrg   return true;
   1019  1.1  mrg }
   1020  1.1  mrg 
   1021  1.1  mrg /* Create a CONST_VECTOR of duplicated value A.  */
   1022  1.1  mrg 
   1023  1.1  mrg rtx
   1024  1.1  mrg gcn_vec_constant (machine_mode mode, int a)
   1025  1.1  mrg {
   1026  1.1  mrg   /*if (!a)
   1027  1.1  mrg     return CONST0_RTX (mode);
   1028  1.1  mrg   if (a == -1)
   1029  1.1  mrg     return CONSTM1_RTX (mode);
   1030  1.1  mrg   if (a == 1)
   1031  1.1  mrg     return CONST1_RTX (mode);
   1032  1.1  mrg   if (a == 2)
   1033  1.1  mrg     return CONST2_RTX (mode);*/
   1034  1.1  mrg 
   1035  1.1  mrg   int units = GET_MODE_NUNITS (mode);
   1036  1.1  mrg   machine_mode innermode = GET_MODE_INNER (mode);
   1037  1.1  mrg 
   1038  1.1  mrg   rtx tem;
   1039  1.1  mrg   if (FLOAT_MODE_P (innermode))
   1040  1.1  mrg     {
   1041  1.1  mrg       REAL_VALUE_TYPE rv;
   1042  1.1  mrg       real_from_integer (&rv, NULL, a, SIGNED);
   1043  1.1  mrg       tem = const_double_from_real_value (rv, innermode);
   1044  1.1  mrg     }
   1045  1.1  mrg   else
   1046  1.1  mrg     tem = gen_int_mode (a, innermode);
   1047  1.1  mrg 
   1048  1.1  mrg   rtvec v = rtvec_alloc (units);
   1049  1.1  mrg   for (int i = 0; i < units; ++i)
   1050  1.1  mrg     RTVEC_ELT (v, i) = tem;
   1051  1.1  mrg 
   1052  1.1  mrg   return gen_rtx_CONST_VECTOR (mode, v);
   1053  1.1  mrg }
   1054  1.1  mrg 
   1055  1.1  mrg /* Create a CONST_VECTOR of duplicated value A.  */
   1056  1.1  mrg 
   1057  1.1  mrg rtx
   1058  1.1  mrg gcn_vec_constant (machine_mode mode, rtx a)
   1059  1.1  mrg {
   1060  1.1  mrg   int units = GET_MODE_NUNITS (mode);
   1061  1.1  mrg   rtvec v = rtvec_alloc (units);
   1062  1.1  mrg 
   1063  1.1  mrg   for (int i = 0; i < units; ++i)
   1064  1.1  mrg     RTVEC_ELT (v, i) = a;
   1065  1.1  mrg 
   1066  1.1  mrg   return gen_rtx_CONST_VECTOR (mode, v);
   1067  1.1  mrg }
   1068  1.1  mrg 
   1069  1.1  mrg /* Create an undefined vector value, used where an insn operand is
   1070  1.1  mrg    optional.  */
   1071  1.1  mrg 
   1072  1.1  mrg rtx
   1073  1.1  mrg gcn_gen_undef (machine_mode mode)
   1074  1.1  mrg {
   1075  1.1  mrg   return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR);
   1076  1.1  mrg }
   1077  1.1  mrg 
   1078  1.1  mrg /* }}}  */
   1079  1.1  mrg /* {{{ Addresses, pointers and moves.  */
   1080  1.1  mrg 
   1081  1.1  mrg /* Return true is REG is a valid place to store a pointer,
   1082  1.1  mrg    for instructions that require an SGPR.
   1083  1.1  mrg    FIXME rename. */
   1084  1.1  mrg 
   1085  1.1  mrg static bool
   1086  1.1  mrg gcn_address_register_p (rtx reg, machine_mode mode, bool strict)
   1087  1.1  mrg {
   1088  1.1  mrg   if (GET_CODE (reg) == SUBREG)
   1089  1.1  mrg     reg = SUBREG_REG (reg);
   1090  1.1  mrg 
   1091  1.1  mrg   if (!REG_P (reg))
   1092  1.1  mrg     return false;
   1093  1.1  mrg 
   1094  1.1  mrg   if (GET_MODE (reg) != mode)
   1095  1.1  mrg     return false;
   1096  1.1  mrg 
   1097  1.1  mrg   int regno = REGNO (reg);
   1098  1.1  mrg 
   1099  1.1  mrg   if (regno >= FIRST_PSEUDO_REGISTER)
   1100  1.1  mrg     {
   1101  1.1  mrg       if (!strict)
   1102  1.1  mrg 	return true;
   1103  1.1  mrg 
   1104  1.1  mrg       if (!reg_renumber)
   1105  1.1  mrg 	return false;
   1106  1.1  mrg 
   1107  1.1  mrg       regno = reg_renumber[regno];
   1108  1.1  mrg     }
   1109  1.1  mrg 
   1110  1.1  mrg   return (SGPR_REGNO_P (regno) || regno == M0_REG
   1111  1.1  mrg 	  || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
   1112  1.1  mrg }
   1113  1.1  mrg 
   1114  1.1  mrg /* Return true is REG is a valid place to store a pointer,
   1115  1.1  mrg    for instructions that require a VGPR.  */
   1116  1.1  mrg 
   1117  1.1  mrg static bool
   1118  1.1  mrg gcn_vec_address_register_p (rtx reg, machine_mode mode, bool strict)
   1119  1.1  mrg {
   1120  1.1  mrg   if (GET_CODE (reg) == SUBREG)
   1121  1.1  mrg     reg = SUBREG_REG (reg);
   1122  1.1  mrg 
   1123  1.1  mrg   if (!REG_P (reg))
   1124  1.1  mrg     return false;
   1125  1.1  mrg 
   1126  1.1  mrg   if (GET_MODE (reg) != mode)
   1127  1.1  mrg     return false;
   1128  1.1  mrg 
   1129  1.1  mrg   int regno = REGNO (reg);
   1130  1.1  mrg 
   1131  1.1  mrg   if (regno >= FIRST_PSEUDO_REGISTER)
   1132  1.1  mrg     {
   1133  1.1  mrg       if (!strict)
   1134  1.1  mrg 	return true;
   1135  1.1  mrg 
   1136  1.1  mrg       if (!reg_renumber)
   1137  1.1  mrg 	return false;
   1138  1.1  mrg 
   1139  1.1  mrg       regno = reg_renumber[regno];
   1140  1.1  mrg     }
   1141  1.1  mrg 
   1142  1.1  mrg   return VGPR_REGNO_P (regno);
   1143  1.1  mrg }
   1144  1.1  mrg 
   1145  1.1  mrg /* Return true if X would be valid inside a MEM using the Flat address
   1146  1.1  mrg    space.  */
   1147  1.1  mrg 
   1148  1.1  mrg bool
   1149  1.1  mrg gcn_flat_address_p (rtx x, machine_mode mode)
   1150  1.1  mrg {
   1151  1.1  mrg   bool vec_mode = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   1152  1.1  mrg 		   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
   1153  1.1  mrg 
   1154  1.1  mrg   if (vec_mode && gcn_address_register_p (x, DImode, false))
   1155  1.1  mrg     return true;
   1156  1.1  mrg 
   1157  1.1  mrg   if (!vec_mode && gcn_vec_address_register_p (x, DImode, false))
   1158  1.1  mrg     return true;
   1159  1.1  mrg 
   1160  1.1  mrg   if (TARGET_GCN5_PLUS
   1161  1.1  mrg       && GET_CODE (x) == PLUS
   1162  1.1  mrg       && gcn_vec_address_register_p (XEXP (x, 0), DImode, false)
   1163  1.1  mrg       && CONST_INT_P (XEXP (x, 1)))
   1164  1.1  mrg     return true;
   1165  1.1  mrg 
   1166  1.1  mrg   return false;
   1167  1.1  mrg }
   1168  1.1  mrg 
   1169  1.1  mrg /* Return true if X would be valid inside a MEM using the Scalar Flat
   1170  1.1  mrg    address space.  */
   1171  1.1  mrg 
   1172  1.1  mrg bool
   1173  1.1  mrg gcn_scalar_flat_address_p (rtx x)
   1174  1.1  mrg {
   1175  1.1  mrg   if (gcn_address_register_p (x, DImode, false))
   1176  1.1  mrg     return true;
   1177  1.1  mrg 
   1178  1.1  mrg   if (GET_CODE (x) == PLUS
   1179  1.1  mrg       && gcn_address_register_p (XEXP (x, 0), DImode, false)
   1180  1.1  mrg       && CONST_INT_P (XEXP (x, 1)))
   1181  1.1  mrg     return true;
   1182  1.1  mrg 
   1183  1.1  mrg   return false;
   1184  1.1  mrg }
   1185  1.1  mrg 
   1186  1.1  mrg /* Return true if MEM X would be valid for the Scalar Flat address space.  */
   1187  1.1  mrg 
   1188  1.1  mrg bool
   1189  1.1  mrg gcn_scalar_flat_mem_p (rtx x)
   1190  1.1  mrg {
   1191  1.1  mrg   if (!MEM_P (x))
   1192  1.1  mrg     return false;
   1193  1.1  mrg 
   1194  1.1  mrg   if (GET_MODE_SIZE (GET_MODE (x)) < 4)
   1195  1.1  mrg     return false;
   1196  1.1  mrg 
   1197  1.1  mrg   return gcn_scalar_flat_address_p (XEXP (x, 0));
   1198  1.1  mrg }
   1199  1.1  mrg 
   1200  1.1  mrg /* Return true if X would be valid inside a MEM using the LDS or GDS
   1201  1.1  mrg    address spaces.  */
   1202  1.1  mrg 
   1203  1.1  mrg bool
   1204  1.1  mrg gcn_ds_address_p (rtx x)
   1205  1.1  mrg {
   1206  1.1  mrg   if (gcn_vec_address_register_p (x, SImode, false))
   1207  1.1  mrg     return true;
   1208  1.1  mrg 
   1209  1.1  mrg   if (GET_CODE (x) == PLUS
   1210  1.1  mrg       && gcn_vec_address_register_p (XEXP (x, 0), SImode, false)
   1211  1.1  mrg       && CONST_INT_P (XEXP (x, 1)))
   1212  1.1  mrg     return true;
   1213  1.1  mrg 
   1214  1.1  mrg   return false;
   1215  1.1  mrg }
   1216  1.1  mrg 
   1217  1.1  mrg /* Return true if ADDR would be valid inside a MEM using the Global
   1218  1.1  mrg    address space.  */
   1219  1.1  mrg 
   1220  1.1  mrg bool
   1221  1.1  mrg gcn_global_address_p (rtx addr)
   1222  1.1  mrg {
   1223  1.1  mrg   if (gcn_address_register_p (addr, DImode, false)
   1224  1.1  mrg       || gcn_vec_address_register_p (addr, DImode, false))
   1225  1.1  mrg     return true;
   1226  1.1  mrg 
   1227  1.1  mrg   if (GET_CODE (addr) == PLUS)
   1228  1.1  mrg     {
   1229  1.1  mrg       rtx base = XEXP (addr, 0);
   1230  1.1  mrg       rtx offset = XEXP (addr, 1);
   1231  1.1  mrg       bool immediate_p = (CONST_INT_P (offset)
   1232  1.1  mrg 			  && INTVAL (offset) >= -(1 << 12)
   1233  1.1  mrg 			  && INTVAL (offset) < (1 << 12));
   1234  1.1  mrg 
   1235  1.1  mrg       if ((gcn_address_register_p (base, DImode, false)
   1236  1.1  mrg 	   || gcn_vec_address_register_p (base, DImode, false))
   1237  1.1  mrg 	  && immediate_p)
   1238  1.1  mrg 	/* SGPR + CONST or VGPR + CONST  */
   1239  1.1  mrg 	return true;
   1240  1.1  mrg 
   1241  1.1  mrg       if (gcn_address_register_p (base, DImode, false)
   1242  1.1  mrg 	  && gcn_vgpr_register_operand (offset, SImode))
   1243  1.1  mrg 	/* SPGR + VGPR  */
   1244  1.1  mrg 	return true;
   1245  1.1  mrg 
   1246  1.1  mrg       if (GET_CODE (base) == PLUS
   1247  1.1  mrg 	  && gcn_address_register_p (XEXP (base, 0), DImode, false)
   1248  1.1  mrg 	  && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
   1249  1.1  mrg 	  && immediate_p)
   1250  1.1  mrg 	/* (SGPR + VGPR) + CONST  */
   1251  1.1  mrg 	return true;
   1252  1.1  mrg     }
   1253  1.1  mrg 
   1254  1.1  mrg   return false;
   1255  1.1  mrg }
   1256  1.1  mrg 
   1257  1.1  mrg /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
   1258  1.1  mrg 
   1259  1.1  mrg    Recognizes RTL expressions that are valid memory addresses for an
   1260  1.1  mrg    instruction.  The MODE argument is the machine mode for the MEM
   1261  1.1  mrg    expression that wants to use this address.
   1262  1.1  mrg 
   1263  1.1  mrg    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
   1264  1.1  mrg    convert common non-canonical forms to canonical form so that they will
   1265  1.1  mrg    be recognized.  */
   1266  1.1  mrg 
   1267  1.1  mrg static bool
   1268  1.1  mrg gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
   1269  1.1  mrg 				     addr_space_t as)
   1270  1.1  mrg {
   1271  1.1  mrg   /* All vector instructions need to work on addresses in registers.  */
   1272  1.1  mrg   if (!TARGET_GCN5_PLUS && (vgpr_vector_mode_p (mode) && !REG_P (x)))
   1273  1.1  mrg     return false;
   1274  1.1  mrg 
   1275  1.1  mrg   if (AS_SCALAR_FLAT_P (as))
   1276  1.1  mrg     {
   1277  1.1  mrg       if (mode == QImode || mode == HImode)
   1278  1.1  mrg 	return 0;
   1279  1.1  mrg 
   1280  1.1  mrg       switch (GET_CODE (x))
   1281  1.1  mrg 	{
   1282  1.1  mrg 	case REG:
   1283  1.1  mrg 	  return gcn_address_register_p (x, DImode, strict);
   1284  1.1  mrg 	/* Addresses are in the form BASE+OFFSET
   1285  1.1  mrg 	   OFFSET is either 20bit unsigned immediate, SGPR or M0.
   1286  1.1  mrg 	   Writes and atomics do not accept SGPR.  */
   1287  1.1  mrg 	case PLUS:
   1288  1.1  mrg 	  {
   1289  1.1  mrg 	    rtx x0 = XEXP (x, 0);
   1290  1.1  mrg 	    rtx x1 = XEXP (x, 1);
   1291  1.1  mrg 	    if (!gcn_address_register_p (x0, DImode, strict))
   1292  1.1  mrg 	      return false;
   1293  1.1  mrg 	    /* FIXME: This is disabled because of the mode mismatch between
   1294  1.1  mrg 	       SImode (for the address or m0 register) and the DImode PLUS.
   1295  1.1  mrg 	       We'll need a zero_extend or similar.
   1296  1.1  mrg 
   1297  1.1  mrg 	    if (gcn_m0_register_p (x1, SImode, strict)
   1298  1.1  mrg 		|| gcn_address_register_p (x1, SImode, strict))
   1299  1.1  mrg 	      return true;
   1300  1.1  mrg 	    else*/
   1301  1.1  mrg 	    if (GET_CODE (x1) == CONST_INT)
   1302  1.1  mrg 	      {
   1303  1.1  mrg 		if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20)
   1304  1.1  mrg 		    /* The low bits of the offset are ignored, even when
   1305  1.1  mrg 		       they're meant to realign the pointer.  */
   1306  1.1  mrg 		    && !(INTVAL (x1) & 0x3))
   1307  1.1  mrg 		  return true;
   1308  1.1  mrg 	      }
   1309  1.1  mrg 	    return false;
   1310  1.1  mrg 	  }
   1311  1.1  mrg 
   1312  1.1  mrg 	default:
   1313  1.1  mrg 	  break;
   1314  1.1  mrg 	}
   1315  1.1  mrg     }
   1316  1.1  mrg   else if (AS_SCRATCH_P (as))
   1317  1.1  mrg     return gcn_address_register_p (x, SImode, strict);
   1318  1.1  mrg   else if (AS_FLAT_P (as) || AS_FLAT_SCRATCH_P (as))
   1319  1.1  mrg     {
   1320  1.1  mrg       if (TARGET_GCN3 || GET_CODE (x) == REG)
   1321  1.1  mrg        return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   1322  1.1  mrg 		|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
   1323  1.1  mrg 	       ? gcn_address_register_p (x, DImode, strict)
   1324  1.1  mrg 	       : gcn_vec_address_register_p (x, DImode, strict));
   1325  1.1  mrg       else
   1326  1.1  mrg 	{
   1327  1.1  mrg 	  gcc_assert (TARGET_GCN5_PLUS);
   1328  1.1  mrg 
   1329  1.1  mrg 	  if (GET_CODE (x) == PLUS)
   1330  1.1  mrg 	    {
   1331  1.1  mrg 	      rtx x1 = XEXP (x, 1);
   1332  1.1  mrg 
   1333  1.1  mrg 	      if (VECTOR_MODE_P (mode)
   1334  1.1  mrg 		  ? !gcn_address_register_p (x, DImode, strict)
   1335  1.1  mrg 		  : !gcn_vec_address_register_p (x, DImode, strict))
   1336  1.1  mrg 		return false;
   1337  1.1  mrg 
   1338  1.1  mrg 	      if (GET_CODE (x1) == CONST_INT)
   1339  1.1  mrg 		{
   1340  1.1  mrg 		  if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 12)
   1341  1.1  mrg 		      /* The low bits of the offset are ignored, even when
   1342  1.1  mrg 		         they're meant to realign the pointer.  */
   1343  1.1  mrg 		      && !(INTVAL (x1) & 0x3))
   1344  1.1  mrg 		    return true;
   1345  1.1  mrg 		}
   1346  1.1  mrg 	    }
   1347  1.1  mrg 	  return false;
   1348  1.1  mrg 	}
   1349  1.1  mrg     }
   1350  1.1  mrg   else if (AS_GLOBAL_P (as))
   1351  1.1  mrg     {
   1352  1.1  mrg       gcc_assert (TARGET_GCN5_PLUS);
   1353  1.1  mrg 
   1354  1.1  mrg       if (GET_CODE (x) == REG)
   1355  1.1  mrg        return (gcn_address_register_p (x, DImode, strict)
   1356  1.1  mrg 	       || (!VECTOR_MODE_P (mode)
   1357  1.1  mrg 		   && gcn_vec_address_register_p (x, DImode, strict)));
   1358  1.1  mrg       else if (GET_CODE (x) == PLUS)
   1359  1.1  mrg 	{
   1360  1.1  mrg 	  rtx base = XEXP (x, 0);
   1361  1.1  mrg 	  rtx offset = XEXP (x, 1);
   1362  1.1  mrg 
   1363  1.1  mrg 	  bool immediate_p = (GET_CODE (offset) == CONST_INT
   1364  1.1  mrg 			      /* Signed 13-bit immediate.  */
   1365  1.1  mrg 			      && INTVAL (offset) >= -(1 << 12)
   1366  1.1  mrg 			      && INTVAL (offset) < (1 << 12)
   1367  1.1  mrg 			      /* The low bits of the offset are ignored, even
   1368  1.1  mrg 			         when they're meant to realign the pointer.  */
   1369  1.1  mrg 			      && !(INTVAL (offset) & 0x3));
   1370  1.1  mrg 
   1371  1.1  mrg 	  if (!VECTOR_MODE_P (mode))
   1372  1.1  mrg 	    {
   1373  1.1  mrg 	      if ((gcn_address_register_p (base, DImode, strict)
   1374  1.1  mrg 		   || gcn_vec_address_register_p (base, DImode, strict))
   1375  1.1  mrg 		  && immediate_p)
   1376  1.1  mrg 		/* SGPR + CONST or VGPR + CONST  */
   1377  1.1  mrg 		return true;
   1378  1.1  mrg 
   1379  1.1  mrg 	      if (gcn_address_register_p (base, DImode, strict)
   1380  1.1  mrg 		  && gcn_vgpr_register_operand (offset, SImode))
   1381  1.1  mrg 		/* SGPR + VGPR  */
   1382  1.1  mrg 		return true;
   1383  1.1  mrg 
   1384  1.1  mrg 	      if (GET_CODE (base) == PLUS
   1385  1.1  mrg 		  && gcn_address_register_p (XEXP (base, 0), DImode, strict)
   1386  1.1  mrg 		  && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
   1387  1.1  mrg 		  && immediate_p)
   1388  1.1  mrg 		/* (SGPR + VGPR) + CONST  */
   1389  1.1  mrg 		return true;
   1390  1.1  mrg 	    }
   1391  1.1  mrg 	  else
   1392  1.1  mrg 	    {
   1393  1.1  mrg 	      if (gcn_address_register_p (base, DImode, strict)
   1394  1.1  mrg 		  && immediate_p)
   1395  1.1  mrg 		/* SGPR + CONST  */
   1396  1.1  mrg 		return true;
   1397  1.1  mrg 	    }
   1398  1.1  mrg 	}
   1399  1.1  mrg       else
   1400  1.1  mrg 	return false;
   1401  1.1  mrg     }
   1402  1.1  mrg   else if (AS_ANY_DS_P (as))
   1403  1.1  mrg     switch (GET_CODE (x))
   1404  1.1  mrg       {
   1405  1.1  mrg       case REG:
   1406  1.1  mrg 	return (VECTOR_MODE_P (mode)
   1407  1.1  mrg 		? gcn_address_register_p (x, SImode, strict)
   1408  1.1  mrg 		: gcn_vec_address_register_p (x, SImode, strict));
   1409  1.1  mrg       /* Addresses are in the form BASE+OFFSET
   1410  1.1  mrg 	 OFFSET is either 20bit unsigned immediate, SGPR or M0.
   1411  1.1  mrg 	 Writes and atomics do not accept SGPR.  */
   1412  1.1  mrg       case PLUS:
   1413  1.1  mrg 	{
   1414  1.1  mrg 	  rtx x0 = XEXP (x, 0);
   1415  1.1  mrg 	  rtx x1 = XEXP (x, 1);
   1416  1.1  mrg 	  if (!gcn_vec_address_register_p (x0, DImode, strict))
   1417  1.1  mrg 	    return false;
   1418  1.1  mrg 	  if (GET_CODE (x1) == REG)
   1419  1.1  mrg 	    {
   1420  1.1  mrg 	      if (GET_CODE (x1) != REG
   1421  1.1  mrg 		  || (REGNO (x1) <= FIRST_PSEUDO_REGISTER
   1422  1.1  mrg 		      && !gcn_ssrc_register_operand (x1, DImode)))
   1423  1.1  mrg 		return false;
   1424  1.1  mrg 	    }
   1425  1.1  mrg 	  else if (GET_CODE (x1) == CONST_VECTOR
   1426  1.1  mrg 		   && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT
   1427  1.1  mrg 		   && single_cst_vector_p (x1))
   1428  1.1  mrg 	    {
   1429  1.1  mrg 	      x1 = CONST_VECTOR_ELT (x1, 0);
   1430  1.1  mrg 	      if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20))
   1431  1.1  mrg 		return true;
   1432  1.1  mrg 	    }
   1433  1.1  mrg 	  return false;
   1434  1.1  mrg 	}
   1435  1.1  mrg 
   1436  1.1  mrg       default:
   1437  1.1  mrg 	break;
   1438  1.1  mrg       }
   1439  1.1  mrg   else
   1440  1.1  mrg     gcc_unreachable ();
   1441  1.1  mrg   return false;
   1442  1.1  mrg }
   1443  1.1  mrg 
   1444  1.1  mrg /* Implement TARGET_ADDR_SPACE_POINTER_MODE.
   1445  1.1  mrg 
   1446  1.1  mrg    Return the appropriate mode for a named address pointer.  */
   1447  1.1  mrg 
   1448  1.1  mrg static scalar_int_mode
   1449  1.1  mrg gcn_addr_space_pointer_mode (addr_space_t addrspace)
   1450  1.1  mrg {
   1451  1.1  mrg   switch (addrspace)
   1452  1.1  mrg     {
   1453  1.1  mrg     case ADDR_SPACE_SCRATCH:
   1454  1.1  mrg     case ADDR_SPACE_LDS:
   1455  1.1  mrg     case ADDR_SPACE_GDS:
   1456  1.1  mrg       return SImode;
   1457  1.1  mrg     case ADDR_SPACE_DEFAULT:
   1458  1.1  mrg     case ADDR_SPACE_FLAT:
   1459  1.1  mrg     case ADDR_SPACE_FLAT_SCRATCH:
   1460  1.1  mrg     case ADDR_SPACE_SCALAR_FLAT:
   1461  1.1  mrg       return DImode;
   1462  1.1  mrg     default:
   1463  1.1  mrg       gcc_unreachable ();
   1464  1.1  mrg     }
   1465  1.1  mrg }
   1466  1.1  mrg 
   1467  1.1  mrg /* Implement TARGET_ADDR_SPACE_ADDRESS_MODE.
   1468  1.1  mrg 
   1469  1.1  mrg    Return the appropriate mode for a named address space address.  */
   1470  1.1  mrg 
   1471  1.1  mrg static scalar_int_mode
   1472  1.1  mrg gcn_addr_space_address_mode (addr_space_t addrspace)
   1473  1.1  mrg {
   1474  1.1  mrg   return gcn_addr_space_pointer_mode (addrspace);
   1475  1.1  mrg }
   1476  1.1  mrg 
   1477  1.1  mrg /* Implement TARGET_ADDR_SPACE_SUBSET_P.
   1478  1.1  mrg 
   1479  1.1  mrg    Determine if one named address space is a subset of another.  */
   1480  1.1  mrg 
   1481  1.1  mrg static bool
   1482  1.1  mrg gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
   1483  1.1  mrg {
   1484  1.1  mrg   if (subset == superset)
   1485  1.1  mrg     return true;
   1486  1.1  mrg   /* FIXME is this true?  */
   1487  1.1  mrg   if (AS_FLAT_P (superset) || AS_SCALAR_FLAT_P (superset))
   1488  1.1  mrg     return true;
   1489  1.1  mrg   return false;
   1490  1.1  mrg }
   1491  1.1  mrg 
   1492  1.1  mrg /* Convert from one address space to another.  */
   1493  1.1  mrg 
   1494  1.1  mrg static rtx
   1495  1.1  mrg gcn_addr_space_convert (rtx op, tree from_type, tree to_type)
   1496  1.1  mrg {
   1497  1.1  mrg   gcc_assert (POINTER_TYPE_P (from_type));
   1498  1.1  mrg   gcc_assert (POINTER_TYPE_P (to_type));
   1499  1.1  mrg 
   1500  1.1  mrg   addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
   1501  1.1  mrg   addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
   1502  1.1  mrg 
   1503  1.1  mrg   if (AS_LDS_P (as_from) && AS_FLAT_P (as_to))
   1504  1.1  mrg     {
   1505  1.1  mrg       rtx queue = gen_rtx_REG (DImode,
   1506  1.1  mrg 			       cfun->machine->args.reg[QUEUE_PTR_ARG]);
   1507  1.1  mrg       rtx group_seg_aperture_hi = gen_rtx_MEM (SImode,
   1508  1.1  mrg 				     gen_rtx_PLUS (DImode, queue,
   1509  1.1  mrg 						   gen_int_mode (64, SImode)));
   1510  1.1  mrg       rtx tmp = gen_reg_rtx (DImode);
   1511  1.1  mrg 
   1512  1.1  mrg       emit_move_insn (gen_lowpart (SImode, tmp), op);
   1513  1.1  mrg       emit_move_insn (gen_highpart_mode (SImode, DImode, tmp),
   1514  1.1  mrg 		      group_seg_aperture_hi);
   1515  1.1  mrg 
   1516  1.1  mrg       return tmp;
   1517  1.1  mrg     }
   1518  1.1  mrg   else if (as_from == as_to)
   1519  1.1  mrg     return op;
   1520  1.1  mrg   else
   1521  1.1  mrg     gcc_unreachable ();
   1522  1.1  mrg }
   1523  1.1  mrg 
   1524  1.1  mrg /* Implement TARGET_ADDR_SPACE_DEBUG.
   1525  1.1  mrg 
   1526  1.1  mrg    Return the dwarf address space class for each hardware address space.  */
   1527  1.1  mrg 
   1528  1.1  mrg static int
   1529  1.1  mrg gcn_addr_space_debug (addr_space_t as)
   1530  1.1  mrg {
   1531  1.1  mrg   switch (as)
   1532  1.1  mrg     {
   1533  1.1  mrg       case ADDR_SPACE_DEFAULT:
   1534  1.1  mrg       case ADDR_SPACE_FLAT:
   1535  1.1  mrg       case ADDR_SPACE_SCALAR_FLAT:
   1536  1.1  mrg       case ADDR_SPACE_FLAT_SCRATCH:
   1537  1.1  mrg 	return DW_ADDR_none;
   1538  1.1  mrg       case ADDR_SPACE_GLOBAL:
   1539  1.1  mrg 	return 1;      // DW_ADDR_LLVM_global
   1540  1.1  mrg       case ADDR_SPACE_LDS:
   1541  1.1  mrg 	return 3;      // DW_ADDR_LLVM_group
   1542  1.1  mrg       case ADDR_SPACE_SCRATCH:
   1543  1.1  mrg 	return 4;      // DW_ADDR_LLVM_private
   1544  1.1  mrg       case ADDR_SPACE_GDS:
   1545  1.1  mrg 	return 0x8000; // DW_ADDR_AMDGPU_region
   1546  1.1  mrg     }
   1547  1.1  mrg   gcc_unreachable ();
   1548  1.1  mrg }
   1549  1.1  mrg 
   1550  1.1  mrg 
   1551  1.1  mrg /* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
   1552  1.1  mrg 
   1553  1.1  mrg    Retun true if REGNO is OK for memory adressing.  */
   1554  1.1  mrg 
   1555  1.1  mrg bool
   1556  1.1  mrg gcn_regno_mode_code_ok_for_base_p (int regno,
   1557  1.1  mrg 				   machine_mode, addr_space_t as, int, int)
   1558  1.1  mrg {
   1559  1.1  mrg   if (regno >= FIRST_PSEUDO_REGISTER)
   1560  1.1  mrg     {
   1561  1.1  mrg       if (reg_renumber)
   1562  1.1  mrg 	regno = reg_renumber[regno];
   1563  1.1  mrg       else
   1564  1.1  mrg 	return true;
   1565  1.1  mrg     }
   1566  1.1  mrg   if (AS_FLAT_P (as))
   1567  1.1  mrg     return (VGPR_REGNO_P (regno)
   1568  1.1  mrg 	    || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
   1569  1.1  mrg   else if (AS_SCALAR_FLAT_P (as))
   1570  1.1  mrg     return (SGPR_REGNO_P (regno)
   1571  1.1  mrg 	    || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
   1572  1.1  mrg   else if (AS_GLOBAL_P (as))
   1573  1.1  mrg     {
   1574  1.1  mrg       return (SGPR_REGNO_P (regno)
   1575  1.1  mrg 	      || VGPR_REGNO_P (regno)
   1576  1.1  mrg 	      || regno == ARG_POINTER_REGNUM
   1577  1.1  mrg 	      || regno == FRAME_POINTER_REGNUM);
   1578  1.1  mrg     }
   1579  1.1  mrg   else
   1580  1.1  mrg     /* For now.  */
   1581  1.1  mrg     return false;
   1582  1.1  mrg }
   1583  1.1  mrg 
   1584  1.1  mrg /* Implement MODE_CODE_BASE_REG_CLASS via gcn.h.
   1585  1.1  mrg 
   1586  1.1  mrg    Return a suitable register class for memory addressing.  */
   1587  1.1  mrg 
   1588  1.1  mrg reg_class
   1589  1.1  mrg gcn_mode_code_base_reg_class (machine_mode mode, addr_space_t as, int oc,
   1590  1.1  mrg 			      int ic)
   1591  1.1  mrg {
   1592  1.1  mrg   switch (as)
   1593  1.1  mrg     {
   1594  1.1  mrg     case ADDR_SPACE_DEFAULT:
   1595  1.1  mrg       return gcn_mode_code_base_reg_class (mode, DEFAULT_ADDR_SPACE, oc, ic);
   1596  1.1  mrg     case ADDR_SPACE_SCALAR_FLAT:
   1597  1.1  mrg     case ADDR_SPACE_SCRATCH:
   1598  1.1  mrg       return SGPR_REGS;
   1599  1.1  mrg       break;
   1600  1.1  mrg     case ADDR_SPACE_FLAT:
   1601  1.1  mrg     case ADDR_SPACE_FLAT_SCRATCH:
   1602  1.1  mrg     case ADDR_SPACE_LDS:
   1603  1.1  mrg     case ADDR_SPACE_GDS:
   1604  1.1  mrg       return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   1605  1.1  mrg 	       || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
   1606  1.1  mrg 	      ? SGPR_REGS : VGPR_REGS);
   1607  1.1  mrg     case ADDR_SPACE_GLOBAL:
   1608  1.1  mrg       return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   1609  1.1  mrg 	       || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
   1610  1.1  mrg 	      ? SGPR_REGS : ALL_GPR_REGS);
   1611  1.1  mrg     }
   1612  1.1  mrg   gcc_unreachable ();
   1613  1.1  mrg }
   1614  1.1  mrg 
   1615  1.1  mrg /* Implement REGNO_OK_FOR_INDEX_P via gcn.h.
   1616  1.1  mrg 
   1617  1.1  mrg    Return true if REGNO is OK for index of memory addressing.  */
   1618  1.1  mrg 
   1619  1.1  mrg bool
   1620  1.1  mrg regno_ok_for_index_p (int regno)
   1621  1.1  mrg {
   1622  1.1  mrg   if (regno >= FIRST_PSEUDO_REGISTER)
   1623  1.1  mrg     {
   1624  1.1  mrg       if (reg_renumber)
   1625  1.1  mrg 	regno = reg_renumber[regno];
   1626  1.1  mrg       else
   1627  1.1  mrg 	return true;
   1628  1.1  mrg     }
   1629  1.1  mrg   return regno == M0_REG || VGPR_REGNO_P (regno);
   1630  1.1  mrg }
   1631  1.1  mrg 
   1632  1.1  mrg /* Generate move which uses the exec flags.  If EXEC is NULL, then it is
   1633  1.1  mrg    assumed that all lanes normally relevant to the mode of the move are
   1634  1.1  mrg    affected.  If PREV is NULL, then a sensible default is supplied for
   1635  1.1  mrg    the inactive lanes.  */
   1636  1.1  mrg 
   1637  1.1  mrg static rtx
   1638  1.1  mrg gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL)
   1639  1.1  mrg {
   1640  1.1  mrg   machine_mode mode = GET_MODE (op0);
   1641  1.1  mrg 
   1642  1.1  mrg   if (vgpr_vector_mode_p (mode))
   1643  1.1  mrg     {
   1644  1.1  mrg       if (exec && exec != CONSTM1_RTX (DImode))
   1645  1.1  mrg 	{
   1646  1.1  mrg 	  if (!prev)
   1647  1.1  mrg 	    prev = op0;
   1648  1.1  mrg 	}
   1649  1.1  mrg       else
   1650  1.1  mrg 	{
   1651  1.1  mrg 	  if (!prev)
   1652  1.1  mrg 	    prev = gcn_gen_undef (mode);
   1653  1.1  mrg 	  exec = gcn_full_exec_reg ();
   1654  1.1  mrg 	}
   1655  1.1  mrg 
   1656  1.1  mrg       rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec));
   1657  1.1  mrg 
   1658  1.1  mrg       return gen_rtx_PARALLEL (VOIDmode,
   1659  1.1  mrg 	       gen_rtvec (2, set,
   1660  1.1  mrg 			 gen_rtx_CLOBBER (VOIDmode,
   1661  1.1  mrg 					  gen_rtx_SCRATCH (V64DImode))));
   1662  1.1  mrg     }
   1663  1.1  mrg 
   1664  1.1  mrg   return (gen_rtx_PARALLEL
   1665  1.1  mrg 	  (VOIDmode,
   1666  1.1  mrg 	   gen_rtvec (2, gen_rtx_SET (op0, op1),
   1667  1.1  mrg 		      gen_rtx_USE (VOIDmode,
   1668  1.1  mrg 				   exec ? exec : gcn_scalar_exec ()))));
   1669  1.1  mrg }
   1670  1.1  mrg 
   1671  1.1  mrg /* Generate masked move.  */
   1672  1.1  mrg 
   1673  1.1  mrg static rtx
   1674  1.1  mrg gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL)
   1675  1.1  mrg {
   1676  1.1  mrg   if (exec)
   1677  1.1  mrg     return (gen_rtx_SET (op0,
   1678  1.1  mrg 			 gen_rtx_VEC_MERGE (GET_MODE (op0),
   1679  1.1  mrg 					    gen_rtx_VEC_DUPLICATE (GET_MODE
   1680  1.1  mrg 								   (op0), op1),
   1681  1.1  mrg 					    op2, exec)));
   1682  1.1  mrg   else
   1683  1.1  mrg     return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1)));
   1684  1.1  mrg }
   1685  1.1  mrg 
   1686  1.1  mrg /* Expand vector init of OP0 by VEC.
   1687  1.1  mrg    Implements vec_init instruction pattern.  */
   1688  1.1  mrg 
   1689  1.1  mrg void
   1690  1.1  mrg gcn_expand_vector_init (rtx op0, rtx vec)
   1691  1.1  mrg {
   1692  1.1  mrg   int64_t initialized_mask = 0;
   1693  1.1  mrg   int64_t curr_mask = 1;
   1694  1.1  mrg   machine_mode mode = GET_MODE (op0);
   1695  1.1  mrg 
   1696  1.1  mrg   rtx val = XVECEXP (vec, 0, 0);
   1697  1.1  mrg 
   1698  1.1  mrg   for (int i = 1; i < 64; i++)
   1699  1.1  mrg     if (rtx_equal_p (val, XVECEXP (vec, 0, i)))
   1700  1.1  mrg       curr_mask |= (int64_t) 1 << i;
   1701  1.1  mrg 
   1702  1.1  mrg   if (gcn_constant_p (val))
   1703  1.1  mrg     emit_move_insn (op0, gcn_vec_constant (mode, val));
   1704  1.1  mrg   else
   1705  1.1  mrg     {
   1706  1.1  mrg       val = force_reg (GET_MODE_INNER (mode), val);
   1707  1.1  mrg       emit_insn (gen_duplicate_load (op0, val));
   1708  1.1  mrg     }
   1709  1.1  mrg   initialized_mask |= curr_mask;
   1710  1.1  mrg   for (int i = 1; i < 64; i++)
   1711  1.1  mrg     if (!(initialized_mask & ((int64_t) 1 << i)))
   1712  1.1  mrg       {
   1713  1.1  mrg 	curr_mask = (int64_t) 1 << i;
   1714  1.1  mrg 	rtx val = XVECEXP (vec, 0, i);
   1715  1.1  mrg 
   1716  1.1  mrg 	for (int j = i + 1; j < 64; j++)
   1717  1.1  mrg 	  if (rtx_equal_p (val, XVECEXP (vec, 0, j)))
   1718  1.1  mrg 	    curr_mask |= (int64_t) 1 << j;
   1719  1.1  mrg 	if (gcn_constant_p (val))
   1720  1.1  mrg 	  emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val),
   1721  1.1  mrg 					get_exec (curr_mask)));
   1722  1.1  mrg 	else
   1723  1.1  mrg 	  {
   1724  1.1  mrg 	    val = force_reg (GET_MODE_INNER (mode), val);
   1725  1.1  mrg 	    emit_insn (gen_duplicate_load (op0, val, op0,
   1726  1.1  mrg 					   get_exec (curr_mask)));
   1727  1.1  mrg 	  }
   1728  1.1  mrg 	initialized_mask |= curr_mask;
   1729  1.1  mrg       }
   1730  1.1  mrg }
   1731  1.1  mrg 
   1732  1.1  mrg /* Load vector constant where n-th lane contains BASE+n*VAL.  */
   1733  1.1  mrg 
   1734  1.1  mrg static rtx
   1735  1.1  mrg strided_constant (machine_mode mode, int base, int val)
   1736  1.1  mrg {
   1737  1.1  mrg   rtx x = gen_reg_rtx (mode);
   1738  1.1  mrg   emit_move_insn (x, gcn_vec_constant (mode, base));
   1739  1.1  mrg   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32),
   1740  1.1  mrg 				 x, get_exec (0xffffffff00000000)));
   1741  1.1  mrg   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16),
   1742  1.1  mrg 				 x, get_exec (0xffff0000ffff0000)));
   1743  1.1  mrg   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8),
   1744  1.1  mrg 				 x, get_exec (0xff00ff00ff00ff00)));
   1745  1.1  mrg   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4),
   1746  1.1  mrg 				 x, get_exec (0xf0f0f0f0f0f0f0f0)));
   1747  1.1  mrg   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2),
   1748  1.1  mrg 				 x, get_exec (0xcccccccccccccccc)));
   1749  1.1  mrg   emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1),
   1750  1.1  mrg 				 x, get_exec (0xaaaaaaaaaaaaaaaa)));
   1751  1.1  mrg   return x;
   1752  1.1  mrg }
   1753  1.1  mrg 
   1754  1.1  mrg /* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS.  */
   1755  1.1  mrg 
   1756  1.1  mrg static rtx
   1757  1.1  mrg gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
   1758  1.1  mrg 				   addr_space_t as)
   1759  1.1  mrg {
   1760  1.1  mrg   switch (as)
   1761  1.1  mrg     {
   1762  1.1  mrg     case ADDR_SPACE_DEFAULT:
   1763  1.1  mrg       return gcn_addr_space_legitimize_address (x, old, mode,
   1764  1.1  mrg 						DEFAULT_ADDR_SPACE);
   1765  1.1  mrg     case ADDR_SPACE_SCALAR_FLAT:
   1766  1.1  mrg     case ADDR_SPACE_SCRATCH:
   1767  1.1  mrg       /* Instructions working on vectors need the address to be in
   1768  1.1  mrg          a register.  */
   1769  1.1  mrg       if (vgpr_vector_mode_p (mode))
   1770  1.1  mrg 	return force_reg (GET_MODE (x), x);
   1771  1.1  mrg 
   1772  1.1  mrg       return x;
   1773  1.1  mrg     case ADDR_SPACE_FLAT:
   1774  1.1  mrg     case ADDR_SPACE_FLAT_SCRATCH:
   1775  1.1  mrg     case ADDR_SPACE_GLOBAL:
   1776  1.1  mrg       return TARGET_GCN3 ? force_reg (DImode, x) : x;
   1777  1.1  mrg     case ADDR_SPACE_LDS:
   1778  1.1  mrg     case ADDR_SPACE_GDS:
   1779  1.1  mrg       /* FIXME: LDS support offsets, handle them!.  */
   1780  1.1  mrg       if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode)
   1781  1.1  mrg 	{
   1782  1.1  mrg 	  rtx addrs = gen_reg_rtx (V64SImode);
   1783  1.1  mrg 	  rtx base = force_reg (SImode, x);
   1784  1.1  mrg 	  rtx offsets = strided_constant (V64SImode, 0,
   1785  1.1  mrg 					  GET_MODE_UNIT_SIZE (mode));
   1786  1.1  mrg 
   1787  1.1  mrg 	  emit_insn (gen_vec_duplicatev64si (addrs, base));
   1788  1.1  mrg 	  emit_insn (gen_addv64si3 (addrs, offsets, addrs));
   1789  1.1  mrg 	  return addrs;
   1790  1.1  mrg 	}
   1791  1.1  mrg       return x;
   1792  1.1  mrg     }
   1793  1.1  mrg   gcc_unreachable ();
   1794  1.1  mrg }
   1795  1.1  mrg 
   1796  1.1  mrg /* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the
   1797  1.1  mrg    proper vector of stepped addresses.
   1798  1.1  mrg 
   1799  1.1  mrg    MEM will be a DImode address of a vector in an SGPR.
   1800  1.1  mrg    TMP will be a V64DImode VGPR pair or (scratch:V64DI).  */
   1801  1.1  mrg 
   1802  1.1  mrg rtx
   1803  1.1  mrg gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
   1804  1.1  mrg 				     rtx tmp)
   1805  1.1  mrg {
   1806  1.1  mrg   gcc_assert (MEM_P (mem));
   1807  1.1  mrg   rtx mem_base = XEXP (mem, 0);
   1808  1.1  mrg   rtx mem_index = NULL_RTX;
   1809  1.1  mrg 
   1810  1.1  mrg   if (!TARGET_GCN5_PLUS)
   1811  1.1  mrg     {
   1812  1.1  mrg       /* gcn_addr_space_legitimize_address should have put the address in a
   1813  1.1  mrg          register.  If not, it is too late to do anything about it.  */
   1814  1.1  mrg       gcc_assert (REG_P (mem_base));
   1815  1.1  mrg     }
   1816  1.1  mrg 
   1817  1.1  mrg   if (GET_CODE (mem_base) == PLUS)
   1818  1.1  mrg     {
   1819  1.1  mrg       mem_index = XEXP (mem_base, 1);
   1820  1.1  mrg       mem_base = XEXP (mem_base, 0);
   1821  1.1  mrg     }
   1822  1.1  mrg 
   1823  1.1  mrg   /* RF and RM base registers for vector modes should be always an SGPR.  */
   1824  1.1  mrg   gcc_assert (SGPR_REGNO_P (REGNO (mem_base))
   1825  1.1  mrg 	      || REGNO (mem_base) >= FIRST_PSEUDO_REGISTER);
   1826  1.1  mrg 
   1827  1.1  mrg   machine_mode inner = GET_MODE_INNER (mode);
   1828  1.1  mrg   int shift = exact_log2 (GET_MODE_SIZE (inner));
   1829  1.1  mrg   rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
   1830  1.1  mrg   rtx undef_v64si = gcn_gen_undef (V64SImode);
   1831  1.1  mrg   rtx new_base = NULL_RTX;
   1832  1.1  mrg   addr_space_t as = MEM_ADDR_SPACE (mem);
   1833  1.1  mrg 
   1834  1.1  mrg   rtx tmplo = (REG_P (tmp)
   1835  1.1  mrg 	       ? gcn_operand_part (V64DImode, tmp, 0)
   1836  1.1  mrg 	       : gen_reg_rtx (V64SImode));
   1837  1.1  mrg 
   1838  1.1  mrg   /* tmplo[:] = ramp[:] << shift  */
   1839  1.1  mrg   if (exec)
   1840  1.1  mrg     emit_insn (gen_ashlv64si3_exec (tmplo, ramp,
   1841  1.1  mrg 				    gen_int_mode (shift, SImode),
   1842  1.1  mrg 				    undef_v64si, exec));
   1843  1.1  mrg   else
   1844  1.1  mrg     emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode)));
   1845  1.1  mrg 
   1846  1.1  mrg   if (AS_FLAT_P (as))
   1847  1.1  mrg     {
   1848  1.1  mrg       rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG);
   1849  1.1  mrg 
   1850  1.1  mrg       if (REG_P (tmp))
   1851  1.1  mrg 	{
   1852  1.1  mrg 	  rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
   1853  1.1  mrg 	  rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
   1854  1.1  mrg 	  rtx tmphi = gcn_operand_part (V64DImode, tmp, 1);
   1855  1.1  mrg 
   1856  1.1  mrg 	  /* tmphi[:] = mem_base_hi  */
   1857  1.1  mrg 	  if (exec)
   1858  1.1  mrg 	    emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi,
   1859  1.1  mrg 						    undef_v64si, exec));
   1860  1.1  mrg 	  else
   1861  1.1  mrg 	    emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi));
   1862  1.1  mrg 
   1863  1.1  mrg 	  /* tmp[:] += zext (mem_base)  */
   1864  1.1  mrg 	  if (exec)
   1865  1.1  mrg 	    {
   1866  1.1  mrg 	      emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo,
   1867  1.1  mrg 						     vcc, undef_v64si, exec));
   1868  1.1  mrg 	      emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx,
   1869  1.1  mrg 					      vcc, vcc, undef_v64si, exec));
   1870  1.1  mrg 	    }
   1871  1.1  mrg 	  else
   1872  1.1  mrg 	    emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
   1873  1.1  mrg 	}
   1874  1.1  mrg       else
   1875  1.1  mrg 	{
   1876  1.1  mrg 	  tmp = gen_reg_rtx (V64DImode);
   1877  1.1  mrg 	  if (exec)
   1878  1.1  mrg 	    emit_insn (gen_addv64di3_vcc_zext_dup2_exec
   1879  1.1  mrg 		       (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode),
   1880  1.1  mrg 			exec));
   1881  1.1  mrg 	  else
   1882  1.1  mrg 	    emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc));
   1883  1.1  mrg 	}
   1884  1.1  mrg 
   1885  1.1  mrg       new_base = tmp;
   1886  1.1  mrg     }
   1887  1.1  mrg   else if (AS_ANY_DS_P (as))
   1888  1.1  mrg     {
   1889  1.1  mrg       if (!exec)
   1890  1.1  mrg 	emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base));
   1891  1.1  mrg       else
   1892  1.1  mrg         emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base,
   1893  1.1  mrg 					   gcn_gen_undef (V64SImode), exec));
   1894  1.1  mrg       new_base = tmplo;
   1895  1.1  mrg     }
   1896  1.1  mrg   else
   1897  1.1  mrg     {
   1898  1.1  mrg       mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base);
   1899  1.1  mrg       new_base = gen_rtx_PLUS (V64DImode, mem_base,
   1900  1.1  mrg 			       gen_rtx_SIGN_EXTEND (V64DImode, tmplo));
   1901  1.1  mrg     }
   1902  1.1  mrg 
   1903  1.1  mrg   return gen_rtx_PLUS (GET_MODE (new_base), new_base,
   1904  1.1  mrg 		       gen_rtx_VEC_DUPLICATE (GET_MODE (new_base),
   1905  1.1  mrg 					      (mem_index ? mem_index
   1906  1.1  mrg 					       : const0_rtx)));
   1907  1.1  mrg }
   1908  1.1  mrg 
   1909  1.1  mrg /* Convert a BASE address, a vector of OFFSETS, and a SCALE, to addresses
   1910  1.1  mrg    suitable for the given address space.  This is indented for use in
   1911  1.1  mrg    gather/scatter patterns.
   1912  1.1  mrg 
   1913  1.1  mrg    The offsets may be signed or unsigned, according to UNSIGNED_P.
   1914  1.1  mrg    If EXEC is set then _exec patterns will be used, otherwise plain.
   1915  1.1  mrg 
   1916  1.1  mrg    Return values.
   1917  1.1  mrg      ADDR_SPACE_FLAT   - return V64DImode vector of absolute addresses.
   1918  1.1  mrg      ADDR_SPACE_GLOBAL - return V64SImode vector of offsets.  */
   1919  1.1  mrg 
   1920  1.1  mrg rtx
   1921  1.1  mrg gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
   1922  1.1  mrg 			   bool unsigned_p, rtx exec)
   1923  1.1  mrg {
   1924  1.1  mrg   rtx tmpsi = gen_reg_rtx (V64SImode);
   1925  1.1  mrg   rtx tmpdi = gen_reg_rtx (V64DImode);
   1926  1.1  mrg   rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL;
   1927  1.1  mrg   rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL;
   1928  1.1  mrg 
   1929  1.1  mrg   if (CONST_INT_P (scale)
   1930  1.1  mrg       && INTVAL (scale) > 0
   1931  1.1  mrg       && exact_log2 (INTVAL (scale)) >= 0)
   1932  1.1  mrg     emit_insn (gen_ashlv64si3 (tmpsi, offsets,
   1933  1.1  mrg 			       GEN_INT (exact_log2 (INTVAL (scale)))));
   1934  1.1  mrg   else
   1935  1.1  mrg     (exec
   1936  1.1  mrg      ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi,
   1937  1.1  mrg 					  exec))
   1938  1.1  mrg      : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale)));
   1939  1.1  mrg 
   1940  1.1  mrg   /* "Global" instructions do not support negative register offsets.  */
   1941  1.1  mrg   if (as == ADDR_SPACE_FLAT || !unsigned_p)
   1942  1.1  mrg     {
   1943  1.1  mrg       if (unsigned_p)
   1944  1.1  mrg 	(exec
   1945  1.1  mrg 	 ?  emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base,
   1946  1.1  mrg 						    undefdi, exec))
   1947  1.1  mrg 	 :  emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base)));
   1948  1.1  mrg       else
   1949  1.1  mrg 	(exec
   1950  1.1  mrg 	 ?  emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base,
   1951  1.1  mrg 						     undefdi, exec))
   1952  1.1  mrg 	 :  emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base)));
   1953  1.1  mrg       return tmpdi;
   1954  1.1  mrg     }
   1955  1.1  mrg   else if (as == ADDR_SPACE_GLOBAL)
   1956  1.1  mrg     return tmpsi;
   1957  1.1  mrg 
   1958  1.1  mrg   gcc_unreachable ();
   1959  1.1  mrg }
   1960  1.1  mrg 
   1961  1.1  mrg /* Return true if move from OP0 to OP1 is known to be executed in vector
   1962  1.1  mrg    unit.  */
   1963  1.1  mrg 
   1964  1.1  mrg bool
   1965  1.1  mrg gcn_vgpr_move_p (rtx op0, rtx op1)
   1966  1.1  mrg {
   1967  1.1  mrg   if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
   1968  1.1  mrg     return true;
   1969  1.1  mrg   if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
   1970  1.1  mrg     return true;
   1971  1.1  mrg   return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0)))
   1972  1.1  mrg 	  || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1)))
   1973  1.1  mrg 	  || vgpr_vector_mode_p (GET_MODE (op0)));
   1974  1.1  mrg }
   1975  1.1  mrg 
   1976  1.1  mrg /* Return true if move from OP0 to OP1 is known to be executed in scalar
   1977  1.1  mrg    unit.  Used in the machine description.  */
   1978  1.1  mrg 
   1979  1.1  mrg bool
   1980  1.1  mrg gcn_sgpr_move_p (rtx op0, rtx op1)
   1981  1.1  mrg {
   1982  1.1  mrg   if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
   1983  1.1  mrg     return true;
   1984  1.1  mrg   if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
   1985  1.1  mrg     return true;
   1986  1.1  mrg   if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER
   1987  1.1  mrg       || VGPR_REGNO_P (REGNO (op0)))
   1988  1.1  mrg     return false;
   1989  1.1  mrg   if (REG_P (op1)
   1990  1.1  mrg       && REGNO (op1) < FIRST_PSEUDO_REGISTER
   1991  1.1  mrg       && !VGPR_REGNO_P (REGNO (op1)))
   1992  1.1  mrg     return true;
   1993  1.1  mrg   return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
   1994  1.1  mrg }
   1995  1.1  mrg 
   1996  1.1  mrg /* Implement TARGET_SECONDARY_RELOAD.
   1997  1.1  mrg 
   1998  1.1  mrg    The address space determines which registers can be used for loads and
   1999  1.1  mrg    stores.  */
   2000  1.1  mrg 
   2001  1.1  mrg static reg_class_t
   2002  1.1  mrg gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
   2003  1.1  mrg 		      machine_mode reload_mode, secondary_reload_info *sri)
   2004  1.1  mrg {
   2005  1.1  mrg   reg_class_t result = NO_REGS;
   2006  1.1  mrg   bool spilled_pseudo =
   2007  1.1  mrg     (REG_P (x) || GET_CODE (x) == SUBREG) && true_regnum (x) == -1;
   2008  1.1  mrg 
   2009  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   2010  1.1  mrg     {
   2011  1.1  mrg       fprintf (dump_file, "gcn_secondary_reload: ");
   2012  1.1  mrg       dump_value_slim (dump_file, x, 1);
   2013  1.1  mrg       fprintf (dump_file, " %s %s:%s", (in_p ? "->" : "<-"),
   2014  1.1  mrg 	       reg_class_names[rclass], GET_MODE_NAME (reload_mode));
   2015  1.1  mrg       if (REG_P (x) || GET_CODE (x) == SUBREG)
   2016  1.1  mrg 	fprintf (dump_file, " (true regnum: %d \"%s\")", true_regnum (x),
   2017  1.1  mrg 		 (true_regnum (x) >= 0
   2018  1.1  mrg 		  && true_regnum (x) < FIRST_PSEUDO_REGISTER
   2019  1.1  mrg 		  ? reg_names[true_regnum (x)]
   2020  1.1  mrg 		  : (spilled_pseudo ? "stack spill" : "??")));
   2021  1.1  mrg       fprintf (dump_file, "\n");
   2022  1.1  mrg     }
   2023  1.1  mrg 
   2024  1.1  mrg   /* Some callers don't use or initialize icode.  */
   2025  1.1  mrg   sri->icode = CODE_FOR_nothing;
   2026  1.1  mrg 
   2027  1.1  mrg   if (MEM_P (x) || spilled_pseudo)
   2028  1.1  mrg     {
   2029  1.1  mrg       addr_space_t as = DEFAULT_ADDR_SPACE;
   2030  1.1  mrg 
   2031  1.1  mrg       /* If we have a spilled pseudo, we can't find the address space
   2032  1.1  mrg 	 directly, but we know it's in ADDR_SPACE_FLAT space for GCN3 or
   2033  1.1  mrg 	 ADDR_SPACE_GLOBAL for GCN5.  */
   2034  1.1  mrg       if (MEM_P (x))
   2035  1.1  mrg 	as = MEM_ADDR_SPACE (x);
   2036  1.1  mrg 
   2037  1.1  mrg       if (as == ADDR_SPACE_DEFAULT)
   2038  1.1  mrg 	as = DEFAULT_ADDR_SPACE;
   2039  1.1  mrg 
   2040  1.1  mrg       switch (as)
   2041  1.1  mrg 	{
   2042  1.1  mrg 	case ADDR_SPACE_SCALAR_FLAT:
   2043  1.1  mrg 	  result =
   2044  1.1  mrg 	    ((!MEM_P (x) || rclass == SGPR_REGS) ? NO_REGS : SGPR_REGS);
   2045  1.1  mrg 	  break;
   2046  1.1  mrg 	case ADDR_SPACE_FLAT:
   2047  1.1  mrg 	case ADDR_SPACE_FLAT_SCRATCH:
   2048  1.1  mrg 	case ADDR_SPACE_GLOBAL:
   2049  1.1  mrg 	  if (GET_MODE_CLASS (reload_mode) == MODE_VECTOR_INT
   2050  1.1  mrg 	      || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT)
   2051  1.1  mrg 	    {
   2052  1.1  mrg 	      if (in_p)
   2053  1.1  mrg 		switch (reload_mode)
   2054  1.1  mrg 		  {
   2055  1.1  mrg 		  case E_V64SImode:
   2056  1.1  mrg 		    sri->icode = CODE_FOR_reload_inv64si;
   2057  1.1  mrg 		    break;
   2058  1.1  mrg 		  case E_V64SFmode:
   2059  1.1  mrg 		    sri->icode = CODE_FOR_reload_inv64sf;
   2060  1.1  mrg 		    break;
   2061  1.1  mrg 		  case E_V64HImode:
   2062  1.1  mrg 		    sri->icode = CODE_FOR_reload_inv64hi;
   2063  1.1  mrg 		    break;
   2064  1.1  mrg 		  case E_V64HFmode:
   2065  1.1  mrg 		    sri->icode = CODE_FOR_reload_inv64hf;
   2066  1.1  mrg 		    break;
   2067  1.1  mrg 		  case E_V64QImode:
   2068  1.1  mrg 		    sri->icode = CODE_FOR_reload_inv64qi;
   2069  1.1  mrg 		    break;
   2070  1.1  mrg 		  case E_V64DImode:
   2071  1.1  mrg 		    sri->icode = CODE_FOR_reload_inv64di;
   2072  1.1  mrg 		    break;
   2073  1.1  mrg 		  case E_V64DFmode:
   2074  1.1  mrg 		    sri->icode = CODE_FOR_reload_inv64df;
   2075  1.1  mrg 		    break;
   2076  1.1  mrg 		  default:
   2077  1.1  mrg 		    gcc_unreachable ();
   2078  1.1  mrg 		  }
   2079  1.1  mrg 	      else
   2080  1.1  mrg 		switch (reload_mode)
   2081  1.1  mrg 		  {
   2082  1.1  mrg 		  case E_V64SImode:
   2083  1.1  mrg 		    sri->icode = CODE_FOR_reload_outv64si;
   2084  1.1  mrg 		    break;
   2085  1.1  mrg 		  case E_V64SFmode:
   2086  1.1  mrg 		    sri->icode = CODE_FOR_reload_outv64sf;
   2087  1.1  mrg 		    break;
   2088  1.1  mrg 		  case E_V64HImode:
   2089  1.1  mrg 		    sri->icode = CODE_FOR_reload_outv64hi;
   2090  1.1  mrg 		    break;
   2091  1.1  mrg 		  case E_V64HFmode:
   2092  1.1  mrg 		    sri->icode = CODE_FOR_reload_outv64hf;
   2093  1.1  mrg 		    break;
   2094  1.1  mrg 		  case E_V64QImode:
   2095  1.1  mrg 		    sri->icode = CODE_FOR_reload_outv64qi;
   2096  1.1  mrg 		    break;
   2097  1.1  mrg 		  case E_V64DImode:
   2098  1.1  mrg 		    sri->icode = CODE_FOR_reload_outv64di;
   2099  1.1  mrg 		    break;
   2100  1.1  mrg 		  case E_V64DFmode:
   2101  1.1  mrg 		    sri->icode = CODE_FOR_reload_outv64df;
   2102  1.1  mrg 		    break;
   2103  1.1  mrg 		  default:
   2104  1.1  mrg 		    gcc_unreachable ();
   2105  1.1  mrg 		  }
   2106  1.1  mrg 	      break;
   2107  1.1  mrg 	    }
   2108  1.1  mrg 	  /* Fallthrough.  */
   2109  1.1  mrg 	case ADDR_SPACE_LDS:
   2110  1.1  mrg 	case ADDR_SPACE_GDS:
   2111  1.1  mrg 	case ADDR_SPACE_SCRATCH:
   2112  1.1  mrg 	  result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
   2113  1.1  mrg 	  break;
   2114  1.1  mrg 	}
   2115  1.1  mrg     }
   2116  1.1  mrg 
   2117  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   2118  1.1  mrg     fprintf (dump_file, "   <= %s (icode: %s)\n", reg_class_names[result],
   2119  1.1  mrg 	     get_insn_name (sri->icode));
   2120  1.1  mrg 
   2121  1.1  mrg   return result;
   2122  1.1  mrg }
   2123  1.1  mrg 
   2124  1.1  mrg /* Update register usage after having seen the compiler flags and kernel
   2125  1.1  mrg    attributes.  We typically want to fix registers that contain values
   2126  1.1  mrg    set by the HSA runtime.  */
   2127  1.1  mrg 
   2128  1.1  mrg static void
   2129  1.1  mrg gcn_conditional_register_usage (void)
   2130  1.1  mrg {
   2131  1.1  mrg   if (!cfun || !cfun->machine)
   2132  1.1  mrg     return;
   2133  1.1  mrg 
   2134  1.1  mrg   if (cfun->machine->normal_function)
   2135  1.1  mrg     {
   2136  1.1  mrg       /* Restrict the set of SGPRs and VGPRs used by non-kernel functions.  */
   2137  1.1  mrg       for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT);
   2138  1.1  mrg 	   i <= LAST_SGPR_REG; i++)
   2139  1.1  mrg 	fixed_regs[i] = 1, call_used_regs[i] = 1;
   2140  1.1  mrg 
   2141  1.1  mrg       for (int i = VGPR_REGNO (MAX_NORMAL_VGPR_COUNT);
   2142  1.1  mrg 	   i <= LAST_VGPR_REG; i++)
   2143  1.1  mrg 	fixed_regs[i] = 1, call_used_regs[i] = 1;
   2144  1.1  mrg 
   2145  1.1  mrg       return;
   2146  1.1  mrg     }
   2147  1.1  mrg 
   2148  1.1  mrg   /* If the set of requested args is the default set, nothing more needs to
   2149  1.1  mrg      be done.  */
   2150  1.1  mrg   if (cfun->machine->args.requested == default_requested_args)
   2151  1.1  mrg     return;
   2152  1.1  mrg 
   2153  1.1  mrg   /* Requesting a set of args different from the default violates the ABI.  */
   2154  1.1  mrg   if (!leaf_function_p ())
   2155  1.1  mrg     warning (0, "A non-default set of initial values has been requested, "
   2156  1.1  mrg 		"which violates the ABI");
   2157  1.1  mrg 
   2158  1.1  mrg   for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
   2159  1.1  mrg     fixed_regs[i] = 0;
   2160  1.1  mrg 
   2161  1.1  mrg   /* Fix the runtime argument register containing values that may be
   2162  1.1  mrg      needed later.  DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
   2163  1.1  mrg      needed after the prologue so there's no need to fix them.  */
   2164  1.1  mrg   if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0)
   2165  1.1  mrg     fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
   2166  1.1  mrg   if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
   2167  1.1  mrg     {
   2168  1.1  mrg       /* The upper 32-bits of the 64-bit descriptor are not used, so allow
   2169  1.1  mrg 	the containing registers to be used for other purposes.  */
   2170  1.1  mrg       fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
   2171  1.1  mrg       fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
   2172  1.1  mrg     }
   2173  1.1  mrg   if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
   2174  1.1  mrg     {
   2175  1.1  mrg       fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG]] = 1;
   2176  1.1  mrg       fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] + 1] = 1;
   2177  1.1  mrg     }
   2178  1.1  mrg   if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0)
   2179  1.1  mrg     {
   2180  1.1  mrg       fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG]] = 1;
   2181  1.1  mrg       fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG] + 1] = 1;
   2182  1.1  mrg     }
   2183  1.1  mrg   if (cfun->machine->args.reg[WORKGROUP_ID_X_ARG] >= 0)
   2184  1.1  mrg     fixed_regs[cfun->machine->args.reg[WORKGROUP_ID_X_ARG]] = 1;
   2185  1.1  mrg   if (cfun->machine->args.reg[WORK_ITEM_ID_X_ARG] >= 0)
   2186  1.1  mrg     fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_X_ARG]] = 1;
   2187  1.1  mrg   if (cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG] >= 0)
   2188  1.1  mrg     fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG]] = 1;
   2189  1.1  mrg   if (cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG] >= 0)
   2190  1.1  mrg     fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
   2191  1.1  mrg }
   2192  1.1  mrg 
   2193  1.1  mrg /* Determine if a load or store is valid, according to the register classes
   2194  1.1  mrg    and address space.  Used primarily by the machine description to decide
   2195  1.1  mrg    when to split a move into two steps.  */
   2196  1.1  mrg 
   2197  1.1  mrg bool
   2198  1.1  mrg gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
   2199  1.1  mrg {
   2200  1.1  mrg   if (!MEM_P (dest) && !MEM_P (src))
   2201  1.1  mrg     return true;
   2202  1.1  mrg 
   2203  1.1  mrg   if (MEM_P (dest)
   2204  1.1  mrg       && AS_FLAT_P (MEM_ADDR_SPACE (dest))
   2205  1.1  mrg       && (gcn_flat_address_p (XEXP (dest, 0), mode)
   2206  1.1  mrg 	  || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
   2207  1.1  mrg 	  || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
   2208  1.1  mrg       && gcn_vgpr_register_operand (src, mode))
   2209  1.1  mrg     return true;
   2210  1.1  mrg   else if (MEM_P (src)
   2211  1.1  mrg 	   && AS_FLAT_P (MEM_ADDR_SPACE (src))
   2212  1.1  mrg 	   && (gcn_flat_address_p (XEXP (src, 0), mode)
   2213  1.1  mrg 	       || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
   2214  1.1  mrg 	       || GET_CODE (XEXP (src, 0)) == LABEL_REF)
   2215  1.1  mrg 	   && gcn_vgpr_register_operand (dest, mode))
   2216  1.1  mrg     return true;
   2217  1.1  mrg 
   2218  1.1  mrg   if (MEM_P (dest)
   2219  1.1  mrg       && AS_GLOBAL_P (MEM_ADDR_SPACE (dest))
   2220  1.1  mrg       && (gcn_global_address_p (XEXP (dest, 0))
   2221  1.1  mrg 	  || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
   2222  1.1  mrg 	  || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
   2223  1.1  mrg       && gcn_vgpr_register_operand (src, mode))
   2224  1.1  mrg     return true;
   2225  1.1  mrg   else if (MEM_P (src)
   2226  1.1  mrg 	   && AS_GLOBAL_P (MEM_ADDR_SPACE (src))
   2227  1.1  mrg 	   && (gcn_global_address_p (XEXP (src, 0))
   2228  1.1  mrg 	       || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
   2229  1.1  mrg 	       || GET_CODE (XEXP (src, 0)) == LABEL_REF)
   2230  1.1  mrg 	   && gcn_vgpr_register_operand (dest, mode))
   2231  1.1  mrg     return true;
   2232  1.1  mrg 
   2233  1.1  mrg   if (MEM_P (dest)
   2234  1.1  mrg       && MEM_ADDR_SPACE (dest) == ADDR_SPACE_SCALAR_FLAT
   2235  1.1  mrg       && (gcn_scalar_flat_address_p (XEXP (dest, 0))
   2236  1.1  mrg 	  || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
   2237  1.1  mrg 	  || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
   2238  1.1  mrg       && gcn_ssrc_register_operand (src, mode))
   2239  1.1  mrg     return true;
   2240  1.1  mrg   else if (MEM_P (src)
   2241  1.1  mrg 	   && MEM_ADDR_SPACE (src) == ADDR_SPACE_SCALAR_FLAT
   2242  1.1  mrg 	   && (gcn_scalar_flat_address_p (XEXP (src, 0))
   2243  1.1  mrg 	       || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
   2244  1.1  mrg 	       || GET_CODE (XEXP (src, 0)) == LABEL_REF)
   2245  1.1  mrg 	   && gcn_sdst_register_operand (dest, mode))
   2246  1.1  mrg     return true;
   2247  1.1  mrg 
   2248  1.1  mrg   if (MEM_P (dest)
   2249  1.1  mrg       && AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
   2250  1.1  mrg       && gcn_ds_address_p (XEXP (dest, 0))
   2251  1.1  mrg       && gcn_vgpr_register_operand (src, mode))
   2252  1.1  mrg     return true;
   2253  1.1  mrg   else if (MEM_P (src)
   2254  1.1  mrg 	   && AS_ANY_DS_P (MEM_ADDR_SPACE (src))
   2255  1.1  mrg 	   && gcn_ds_address_p (XEXP (src, 0))
   2256  1.1  mrg 	   && gcn_vgpr_register_operand (dest, mode))
   2257  1.1  mrg     return true;
   2258  1.1  mrg 
   2259  1.1  mrg   return false;
   2260  1.1  mrg }
   2261  1.1  mrg 
   2262  1.1  mrg /* }}}  */
   2263  1.1  mrg /* {{{ Functions and ABI.  */
   2264  1.1  mrg 
   2265  1.1  mrg /* Implement TARGET_FUNCTION_VALUE.
   2266  1.1  mrg 
   2267  1.1  mrg    Define how to find the value returned by a function.
   2268  1.1  mrg    The register location is always the same, but the mode depends on
   2269  1.1  mrg    VALTYPE.  */
   2270  1.1  mrg 
   2271  1.1  mrg static rtx
   2272  1.1  mrg gcn_function_value (const_tree valtype, const_tree, bool)
   2273  1.1  mrg {
   2274  1.1  mrg   machine_mode mode = TYPE_MODE (valtype);
   2275  1.1  mrg 
   2276  1.1  mrg   if (INTEGRAL_TYPE_P (valtype)
   2277  1.1  mrg       && GET_MODE_CLASS (mode) == MODE_INT
   2278  1.1  mrg       && GET_MODE_SIZE (mode) < 4)
   2279  1.1  mrg     mode = SImode;
   2280  1.1  mrg 
   2281  1.1  mrg   return gen_rtx_REG (mode, SGPR_REGNO (RETURN_VALUE_REG));
   2282  1.1  mrg }
   2283  1.1  mrg 
   2284  1.1  mrg /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
   2285  1.1  mrg 
   2286  1.1  mrg    Return true if N is a possible register number for the function return
   2287  1.1  mrg    value.  */
   2288  1.1  mrg 
   2289  1.1  mrg static bool
   2290  1.1  mrg gcn_function_value_regno_p (const unsigned int n)
   2291  1.1  mrg {
   2292  1.1  mrg   return n == RETURN_VALUE_REG;
   2293  1.1  mrg }
   2294  1.1  mrg 
   2295  1.1  mrg /* Calculate the number of registers required to hold function argument
   2296  1.1  mrg    ARG.  */
   2297  1.1  mrg 
   2298  1.1  mrg static int
   2299  1.1  mrg num_arg_regs (const function_arg_info &arg)
   2300  1.1  mrg {
   2301  1.1  mrg   if (targetm.calls.must_pass_in_stack (arg))
   2302  1.1  mrg     return 0;
   2303  1.1  mrg 
   2304  1.1  mrg   int size = arg.promoted_size_in_bytes ();
   2305  1.1  mrg   return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
   2306  1.1  mrg }
   2307  1.1  mrg 
   2308  1.1  mrg /* Implement TARGET_STRICT_ARGUMENT_NAMING.
   2309  1.1  mrg 
   2310  1.1  mrg    Return true if the location where a function argument is passed
   2311  1.1  mrg    depends on whether or not it is a named argument
   2312  1.1  mrg 
   2313  1.1  mrg    For gcn, we know how to handle functions declared as stdarg: by
   2314  1.1  mrg    passing an extra pointer to the unnamed arguments.  However, the
   2315  1.1  mrg    Fortran frontend can produce a different situation, where a
   2316  1.1  mrg    function pointer is declared with no arguments, but the actual
   2317  1.1  mrg    function and calls to it take more arguments.  In that case, we
   2318  1.1  mrg    want to ensure the call matches the definition of the function.  */
   2319  1.1  mrg 
   2320  1.1  mrg static bool
   2321  1.1  mrg gcn_strict_argument_naming (cumulative_args_t cum_v)
   2322  1.1  mrg {
   2323  1.1  mrg   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   2324  1.1  mrg 
   2325  1.1  mrg   return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
   2326  1.1  mrg }
   2327  1.1  mrg 
   2328  1.1  mrg /* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED.
   2329  1.1  mrg 
   2330  1.1  mrg    See comment on gcn_strict_argument_naming.  */
   2331  1.1  mrg 
   2332  1.1  mrg static bool
   2333  1.1  mrg gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v)
   2334  1.1  mrg {
   2335  1.1  mrg   return !gcn_strict_argument_naming (cum_v);
   2336  1.1  mrg }
   2337  1.1  mrg 
   2338  1.1  mrg /* Implement TARGET_FUNCTION_ARG.
   2339  1.1  mrg 
   2340  1.1  mrg    Return an RTX indicating whether a function argument is passed in a register
   2341  1.1  mrg    and if so, which register.  */
   2342  1.1  mrg 
   2343  1.1  mrg static rtx
   2344  1.1  mrg gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   2345  1.1  mrg {
   2346  1.1  mrg   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   2347  1.1  mrg   if (cum->normal_function)
   2348  1.1  mrg     {
   2349  1.1  mrg       if (!arg.named || arg.end_marker_p ())
   2350  1.1  mrg 	return 0;
   2351  1.1  mrg 
   2352  1.1  mrg       if (targetm.calls.must_pass_in_stack (arg))
   2353  1.1  mrg 	return 0;
   2354  1.1  mrg 
   2355  1.1  mrg       /* Vector parameters are not supported yet.  */
   2356  1.1  mrg       if (VECTOR_MODE_P (arg.mode))
   2357  1.1  mrg 	return 0;
   2358  1.1  mrg 
   2359  1.1  mrg       int reg_num = FIRST_PARM_REG + cum->num;
   2360  1.1  mrg       int num_regs = num_arg_regs (arg);
   2361  1.1  mrg       if (num_regs > 0)
   2362  1.1  mrg 	while (reg_num % num_regs != 0)
   2363  1.1  mrg 	  reg_num++;
   2364  1.1  mrg       if (reg_num + num_regs <= FIRST_PARM_REG + NUM_PARM_REGS)
   2365  1.1  mrg 	return gen_rtx_REG (arg.mode, reg_num);
   2366  1.1  mrg     }
   2367  1.1  mrg   else
   2368  1.1  mrg     {
   2369  1.1  mrg       if (cum->num >= cum->args.nargs)
   2370  1.1  mrg 	{
   2371  1.1  mrg 	  cum->offset = (cum->offset + TYPE_ALIGN (arg.type) / 8 - 1)
   2372  1.1  mrg 	    & -(TYPE_ALIGN (arg.type) / 8);
   2373  1.1  mrg 	  cfun->machine->kernarg_segment_alignment
   2374  1.1  mrg 	    = MAX ((unsigned) cfun->machine->kernarg_segment_alignment,
   2375  1.1  mrg 		   TYPE_ALIGN (arg.type) / 8);
   2376  1.1  mrg 	  rtx addr = gen_rtx_REG (DImode,
   2377  1.1  mrg 				  cum->args.reg[KERNARG_SEGMENT_PTR_ARG]);
   2378  1.1  mrg 	  if (cum->offset)
   2379  1.1  mrg 	    addr = gen_rtx_PLUS (DImode, addr,
   2380  1.1  mrg 				 gen_int_mode (cum->offset, DImode));
   2381  1.1  mrg 	  rtx mem = gen_rtx_MEM (arg.mode, addr);
   2382  1.1  mrg 	  set_mem_attributes (mem, arg.type, 1);
   2383  1.1  mrg 	  set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT);
   2384  1.1  mrg 	  MEM_READONLY_P (mem) = 1;
   2385  1.1  mrg 	  return mem;
   2386  1.1  mrg 	}
   2387  1.1  mrg 
   2388  1.1  mrg       int a = cum->args.order[cum->num];
   2389  1.1  mrg       if (arg.mode != gcn_kernel_arg_types[a].mode)
   2390  1.1  mrg 	{
   2391  1.1  mrg 	  error ("wrong type of argument %s", gcn_kernel_arg_types[a].name);
   2392  1.1  mrg 	  return 0;
   2393  1.1  mrg 	}
   2394  1.1  mrg       return gen_rtx_REG ((machine_mode) gcn_kernel_arg_types[a].mode,
   2395  1.1  mrg 			  cum->args.reg[a]);
   2396  1.1  mrg     }
   2397  1.1  mrg   return 0;
   2398  1.1  mrg }
   2399  1.1  mrg 
   2400  1.1  mrg /* Implement TARGET_FUNCTION_ARG_ADVANCE.
   2401  1.1  mrg 
   2402  1.1  mrg    Updates the summarizer variable pointed to by CUM_V to advance past an
   2403  1.1  mrg    argument in the argument list.  */
   2404  1.1  mrg 
   2405  1.1  mrg static void
   2406  1.1  mrg gcn_function_arg_advance (cumulative_args_t cum_v,
   2407  1.1  mrg 			  const function_arg_info &arg)
   2408  1.1  mrg {
   2409  1.1  mrg   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   2410  1.1  mrg 
   2411  1.1  mrg   if (cum->normal_function)
   2412  1.1  mrg     {
   2413  1.1  mrg       if (!arg.named)
   2414  1.1  mrg 	return;
   2415  1.1  mrg 
   2416  1.1  mrg       int num_regs = num_arg_regs (arg);
   2417  1.1  mrg       if (num_regs > 0)
   2418  1.1  mrg 	while ((FIRST_PARM_REG + cum->num) % num_regs != 0)
   2419  1.1  mrg 	  cum->num++;
   2420  1.1  mrg       cum->num += num_regs;
   2421  1.1  mrg     }
   2422  1.1  mrg   else
   2423  1.1  mrg     {
   2424  1.1  mrg       if (cum->num < cum->args.nargs)
   2425  1.1  mrg 	cum->num++;
   2426  1.1  mrg       else
   2427  1.1  mrg 	{
   2428  1.1  mrg 	  cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (arg.type));
   2429  1.1  mrg 	  cfun->machine->kernarg_segment_byte_size = cum->offset;
   2430  1.1  mrg 	}
   2431  1.1  mrg     }
   2432  1.1  mrg }
   2433  1.1  mrg 
   2434  1.1  mrg /* Implement TARGET_ARG_PARTIAL_BYTES.
   2435  1.1  mrg 
   2436  1.1  mrg    Returns the number of bytes at the beginning of an argument that must be put
   2437  1.1  mrg    in registers.  The value must be zero for arguments that are passed entirely
   2438  1.1  mrg    in registers or that are entirely pushed on the stack.  */
   2439  1.1  mrg 
   2440  1.1  mrg static int
   2441  1.1  mrg gcn_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
   2442  1.1  mrg {
   2443  1.1  mrg   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   2444  1.1  mrg 
   2445  1.1  mrg   if (!arg.named)
   2446  1.1  mrg     return 0;
   2447  1.1  mrg 
   2448  1.1  mrg   if (targetm.calls.must_pass_in_stack (arg))
   2449  1.1  mrg     return 0;
   2450  1.1  mrg 
   2451  1.1  mrg   if (cum->num >= NUM_PARM_REGS)
   2452  1.1  mrg     return 0;
   2453  1.1  mrg 
   2454  1.1  mrg   /* If the argument fits entirely in registers, return 0.  */
   2455  1.1  mrg   if (cum->num + num_arg_regs (arg) <= NUM_PARM_REGS)
   2456  1.1  mrg     return 0;
   2457  1.1  mrg 
   2458  1.1  mrg   return (NUM_PARM_REGS - cum->num) * UNITS_PER_WORD;
   2459  1.1  mrg }
   2460  1.1  mrg 
   2461  1.1  mrg /* A normal function which takes a pointer argument may be passed a pointer to
   2462  1.1  mrg    LDS space (via a high-bits-set aperture), and that only works with FLAT
   2463  1.1  mrg    addressing, not GLOBAL.  Force FLAT addressing if the function has an
   2464  1.1  mrg    incoming pointer parameter.  NOTE: This is a heuristic that works in the
   2465  1.1  mrg    offloading case, but in general, a function might read global pointer
   2466  1.1  mrg    variables, etc. that may refer to LDS space or other special memory areas
   2467  1.1  mrg    not supported by GLOBAL instructions, and then this argument check would not
   2468  1.1  mrg    suffice.  */
   2469  1.1  mrg 
   2470  1.1  mrg static void
   2471  1.1  mrg gcn_detect_incoming_pointer_arg (tree fndecl)
   2472  1.1  mrg {
   2473  1.1  mrg   gcc_assert (cfun && cfun->machine);
   2474  1.1  mrg 
   2475  1.1  mrg   for (tree arg = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
   2476  1.1  mrg        arg;
   2477  1.1  mrg        arg = TREE_CHAIN (arg))
   2478  1.1  mrg     if (POINTER_TYPE_P (TREE_VALUE (arg)))
   2479  1.1  mrg       cfun->machine->use_flat_addressing = true;
   2480  1.1  mrg }
   2481  1.1  mrg 
   2482  1.1  mrg /* Implement INIT_CUMULATIVE_ARGS, via gcn.h.
   2483  1.1  mrg 
   2484  1.1  mrg    Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
   2485  1.1  mrg    whose data type is FNTYPE.  For a library call, FNTYPE is 0.  */
   2486  1.1  mrg 
   2487  1.1  mrg void
   2488  1.1  mrg gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ ,
   2489  1.1  mrg 			  tree fntype /* tree ptr for function decl */ ,
   2490  1.1  mrg 			  rtx libname /* SYMBOL_REF of library name or 0 */ ,
   2491  1.1  mrg 			  tree fndecl, int caller)
   2492  1.1  mrg {
   2493  1.1  mrg   memset (cum, 0, sizeof (*cum));
   2494  1.1  mrg   cum->fntype = fntype;
   2495  1.1  mrg   if (libname)
   2496  1.1  mrg     {
   2497  1.1  mrg       gcc_assert (cfun && cfun->machine);
   2498  1.1  mrg       cum->normal_function = true;
   2499  1.1  mrg       if (!caller)
   2500  1.1  mrg 	{
   2501  1.1  mrg 	  cfun->machine->normal_function = true;
   2502  1.1  mrg 	  gcn_detect_incoming_pointer_arg (fndecl);
   2503  1.1  mrg 	}
   2504  1.1  mrg       return;
   2505  1.1  mrg     }
   2506  1.1  mrg   tree attr = NULL;
   2507  1.1  mrg   if (fndecl)
   2508  1.1  mrg     attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl));
   2509  1.1  mrg   if (fndecl && !attr)
   2510  1.1  mrg     attr = lookup_attribute ("amdgpu_hsa_kernel",
   2511  1.1  mrg 			     TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
   2512  1.1  mrg   if (!attr && fntype)
   2513  1.1  mrg     attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype));
   2514  1.1  mrg   /* Handle main () as kernel, so we can run testsuite.
   2515  1.1  mrg      Handle OpenACC kernels similarly to main.  */
   2516  1.1  mrg   if (!attr && !caller && fndecl
   2517  1.1  mrg       && (MAIN_NAME_P (DECL_NAME (fndecl))
   2518  1.1  mrg 	  || lookup_attribute ("omp target entrypoint",
   2519  1.1  mrg 			       DECL_ATTRIBUTES (fndecl)) != NULL_TREE))
   2520  1.1  mrg     gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE);
   2521  1.1  mrg   else
   2522  1.1  mrg     {
   2523  1.1  mrg       if (!attr || caller)
   2524  1.1  mrg 	{
   2525  1.1  mrg 	  gcc_assert (cfun && cfun->machine);
   2526  1.1  mrg 	  cum->normal_function = true;
   2527  1.1  mrg 	  if (!caller)
   2528  1.1  mrg 	    cfun->machine->normal_function = true;
   2529  1.1  mrg 	}
   2530  1.1  mrg       gcn_parse_amdgpu_hsa_kernel_attribute
   2531  1.1  mrg 	(&cum->args, attr ? TREE_VALUE (attr) : NULL_TREE);
   2532  1.1  mrg     }
   2533  1.1  mrg   cfun->machine->args = cum->args;
   2534  1.1  mrg   if (!caller && cfun->machine->normal_function)
   2535  1.1  mrg     gcn_detect_incoming_pointer_arg (fndecl);
   2536  1.1  mrg 
   2537  1.1  mrg   reinit_regs ();
   2538  1.1  mrg }
   2539  1.1  mrg 
   2540  1.1  mrg static bool
   2541  1.1  mrg gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
   2542  1.1  mrg {
   2543  1.1  mrg   machine_mode mode = TYPE_MODE (type);
   2544  1.1  mrg   HOST_WIDE_INT size = int_size_in_bytes (type);
   2545  1.1  mrg 
   2546  1.1  mrg   if (AGGREGATE_TYPE_P (type))
   2547  1.1  mrg     return true;
   2548  1.1  mrg 
   2549  1.1  mrg   /* Vector return values are not supported yet.  */
   2550  1.1  mrg   if (VECTOR_TYPE_P (type))
   2551  1.1  mrg     return true;
   2552  1.1  mrg 
   2553  1.1  mrg   if (mode == BLKmode)
   2554  1.1  mrg     return true;
   2555  1.1  mrg 
   2556  1.1  mrg   if (size > 2 * UNITS_PER_WORD)
   2557  1.1  mrg     return true;
   2558  1.1  mrg 
   2559  1.1  mrg   return false;
   2560  1.1  mrg }
   2561  1.1  mrg 
   2562  1.1  mrg /* Implement TARGET_PROMOTE_FUNCTION_MODE.
   2563  1.1  mrg 
   2564  1.1  mrg    Return the mode to use for outgoing function arguments.  */
   2565  1.1  mrg 
   2566  1.1  mrg machine_mode
   2567  1.1  mrg gcn_promote_function_mode (const_tree ARG_UNUSED (type), machine_mode mode,
   2568  1.1  mrg 			   int *ARG_UNUSED (punsignedp),
   2569  1.1  mrg 			   const_tree ARG_UNUSED (funtype),
   2570  1.1  mrg 			   int ARG_UNUSED (for_return))
   2571  1.1  mrg {
   2572  1.1  mrg   if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4)
   2573  1.1  mrg     return SImode;
   2574  1.1  mrg 
   2575  1.1  mrg   return mode;
   2576  1.1  mrg }
   2577  1.1  mrg 
   2578  1.1  mrg /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
   2579  1.1  mrg 
   2580  1.1  mrg    Derived from hppa_gimplify_va_arg_expr.  The generic routine doesn't handle
   2581  1.1  mrg    ARGS_GROW_DOWNWARDS.  */
   2582  1.1  mrg 
   2583  1.1  mrg static tree
   2584  1.1  mrg gcn_gimplify_va_arg_expr (tree valist, tree type,
   2585  1.1  mrg 			  gimple_seq *ARG_UNUSED (pre_p),
   2586  1.1  mrg 			  gimple_seq *ARG_UNUSED (post_p))
   2587  1.1  mrg {
   2588  1.1  mrg   tree ptr = build_pointer_type (type);
   2589  1.1  mrg   tree valist_type;
   2590  1.1  mrg   tree t, u;
   2591  1.1  mrg   bool indirect;
   2592  1.1  mrg 
   2593  1.1  mrg   indirect = pass_va_arg_by_reference (type);
   2594  1.1  mrg   if (indirect)
   2595  1.1  mrg     {
   2596  1.1  mrg       type = ptr;
   2597  1.1  mrg       ptr = build_pointer_type (type);
   2598  1.1  mrg     }
   2599  1.1  mrg   valist_type = TREE_TYPE (valist);
   2600  1.1  mrg 
   2601  1.1  mrg   /* Args grow down.  Not handled by generic routines.  */
   2602  1.1  mrg 
   2603  1.1  mrg   u = fold_convert (sizetype, size_in_bytes (type));
   2604  1.1  mrg   u = fold_build1 (NEGATE_EXPR, sizetype, u);
   2605  1.1  mrg   t = fold_build_pointer_plus (valist, u);
   2606  1.1  mrg 
   2607  1.1  mrg   /* Align to 8 byte boundary.  */
   2608  1.1  mrg 
   2609  1.1  mrg   u = build_int_cst (TREE_TYPE (t), -8);
   2610  1.1  mrg   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
   2611  1.1  mrg   t = fold_convert (valist_type, t);
   2612  1.1  mrg 
   2613  1.1  mrg   t = build2 (MODIFY_EXPR, valist_type, valist, t);
   2614  1.1  mrg 
   2615  1.1  mrg   t = fold_convert (ptr, t);
   2616  1.1  mrg   t = build_va_arg_indirect_ref (t);
   2617  1.1  mrg 
   2618  1.1  mrg   if (indirect)
   2619  1.1  mrg     t = build_va_arg_indirect_ref (t);
   2620  1.1  mrg 
   2621  1.1  mrg   return t;
   2622  1.1  mrg }
   2623  1.1  mrg 
   2624  1.1  mrg /* Return 1 if TRAIT NAME is present in the OpenMP context's
   2625  1.1  mrg    device trait set, return 0 if not present in any OpenMP context in the
   2626  1.1  mrg    whole translation unit, or -1 if not present in the current OpenMP context
   2627  1.1  mrg    but might be present in another OpenMP context in the same TU.  */
   2628  1.1  mrg 
   2629  1.1  mrg int
   2630  1.1  mrg gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
   2631  1.1  mrg 			      const char *name)
   2632  1.1  mrg {
   2633  1.1  mrg   switch (trait)
   2634  1.1  mrg     {
   2635  1.1  mrg     case omp_device_kind:
   2636  1.1  mrg       return strcmp (name, "gpu") == 0;
   2637  1.1  mrg     case omp_device_arch:
   2638  1.1  mrg       return strcmp (name, "gcn") == 0;
   2639  1.1  mrg     case omp_device_isa:
   2640  1.1  mrg       if (strcmp (name, "fiji") == 0)
   2641  1.1  mrg 	return gcn_arch == PROCESSOR_FIJI;
   2642  1.1  mrg       if (strcmp (name, "gfx900") == 0)
   2643  1.1  mrg 	return gcn_arch == PROCESSOR_VEGA10;
   2644  1.1  mrg       if (strcmp (name, "gfx906") == 0)
   2645  1.1  mrg 	return gcn_arch == PROCESSOR_VEGA20;
   2646  1.1  mrg       if (strcmp (name, "gfx908") == 0)
   2647  1.1  mrg 	return gcn_arch == PROCESSOR_GFX908;
   2648  1.1  mrg       return 0;
   2649  1.1  mrg     default:
   2650  1.1  mrg       gcc_unreachable ();
   2651  1.1  mrg     }
   2652  1.1  mrg }
   2653  1.1  mrg 
   2654  1.1  mrg /* Calculate stack offsets needed to create prologues and epilogues.  */
   2655  1.1  mrg 
   2656  1.1  mrg static struct machine_function *
   2657  1.1  mrg gcn_compute_frame_offsets (void)
   2658  1.1  mrg {
   2659  1.1  mrg   machine_function *offsets = cfun->machine;
   2660  1.1  mrg 
   2661  1.1  mrg   if (reload_completed)
   2662  1.1  mrg     return offsets;
   2663  1.1  mrg 
   2664  1.1  mrg   offsets->need_frame_pointer = frame_pointer_needed;
   2665  1.1  mrg 
   2666  1.1  mrg   offsets->outgoing_args_size = crtl->outgoing_args_size;
   2667  1.1  mrg   offsets->pretend_size = crtl->args.pretend_args_size;
   2668  1.1  mrg 
   2669  1.1  mrg   offsets->local_vars = get_frame_size ();
   2670  1.1  mrg 
   2671  1.1  mrg   offsets->lr_needs_saving = (!leaf_function_p ()
   2672  1.1  mrg 			      || df_regs_ever_live_p (LR_REGNUM)
   2673  1.1  mrg 			      || df_regs_ever_live_p (LR_REGNUM + 1));
   2674  1.1  mrg 
   2675  1.1  mrg   offsets->callee_saves = offsets->lr_needs_saving ? 8 : 0;
   2676  1.1  mrg 
   2677  1.1  mrg   for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   2678  1.1  mrg     if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
   2679  1.1  mrg 	|| ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
   2680  1.1  mrg 	    && frame_pointer_needed))
   2681  1.1  mrg       offsets->callee_saves += (VGPR_REGNO_P (regno) ? 256 : 4);
   2682  1.1  mrg 
   2683  1.1  mrg   /* Round up to 64-bit boundary to maintain stack alignment.  */
   2684  1.1  mrg   offsets->callee_saves = (offsets->callee_saves + 7) & ~7;
   2685  1.1  mrg 
   2686  1.1  mrg   return offsets;
   2687  1.1  mrg }
   2688  1.1  mrg 
   2689  1.1  mrg /* Insert code into the prologue or epilogue to store or load any
   2690  1.1  mrg    callee-save register to/from the stack.
   2691  1.1  mrg 
   2692  1.1  mrg    Helper function for gcn_expand_prologue and gcn_expand_epilogue.  */
   2693  1.1  mrg 
   2694  1.1  mrg static void
   2695  1.1  mrg move_callee_saved_registers (rtx sp, machine_function *offsets,
   2696  1.1  mrg 			     bool prologue)
   2697  1.1  mrg {
   2698  1.1  mrg   int regno, offset, saved_scalars;
   2699  1.1  mrg   rtx exec = gen_rtx_REG (DImode, EXEC_REG);
   2700  1.1  mrg   rtx vcc = gen_rtx_REG (DImode, VCC_LO_REG);
   2701  1.1  mrg   rtx offreg = gen_rtx_REG (SImode, SGPR_REGNO (22));
   2702  1.1  mrg   rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE);
   2703  1.1  mrg   HOST_WIDE_INT exec_set = 0;
   2704  1.1  mrg   int offreg_set = 0;
   2705  1.1  mrg   auto_vec<int> saved_sgprs;
   2706  1.1  mrg 
   2707  1.1  mrg   start_sequence ();
   2708  1.1  mrg 
   2709  1.1  mrg   /* Move scalars into two vector registers.  */
   2710  1.1  mrg   for (regno = 0, saved_scalars = 0; regno < FIRST_VGPR_REG; regno++)
   2711  1.1  mrg     if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
   2712  1.1  mrg 	|| ((regno & ~1) == LINK_REGNUM && offsets->lr_needs_saving)
   2713  1.1  mrg 	|| ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
   2714  1.1  mrg 	    && offsets->need_frame_pointer))
   2715  1.1  mrg       {
   2716  1.1  mrg 	rtx reg = gen_rtx_REG (SImode, regno);
   2717  1.1  mrg 	rtx vreg = gen_rtx_REG (V64SImode,
   2718  1.1  mrg 				VGPR_REGNO (6 + (saved_scalars / 64)));
   2719  1.1  mrg 	int lane = saved_scalars % 64;
   2720  1.1  mrg 
   2721  1.1  mrg 	if (prologue)
   2722  1.1  mrg 	  {
   2723  1.1  mrg 	    emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
   2724  1.1  mrg 	    saved_sgprs.safe_push (regno);
   2725  1.1  mrg 	  }
   2726  1.1  mrg 	else
   2727  1.1  mrg 	  emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane)));
   2728  1.1  mrg 
   2729  1.1  mrg 	saved_scalars++;
   2730  1.1  mrg       }
   2731  1.1  mrg 
   2732  1.1  mrg   rtx move_scalars = get_insns ();
   2733  1.1  mrg   end_sequence ();
   2734  1.1  mrg   start_sequence ();
   2735  1.1  mrg 
   2736  1.1  mrg   /* Ensure that all vector lanes are moved.  */
   2737  1.1  mrg   exec_set = -1;
   2738  1.1  mrg   emit_move_insn (exec, GEN_INT (exec_set));
   2739  1.1  mrg 
   2740  1.1  mrg   /* Set up a vector stack pointer.  */
   2741  1.1  mrg   rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
   2742  1.1  mrg   rtx _0_4_8_12 = gen_rtx_REG (V64SImode, VGPR_REGNO (3));
   2743  1.1  mrg   emit_insn (gen_ashlv64si3_exec (_0_4_8_12, _0_1_2_3, GEN_INT (2),
   2744  1.1  mrg 				  gcn_gen_undef (V64SImode), exec));
   2745  1.1  mrg   rtx vsp = gen_rtx_REG (V64DImode, VGPR_REGNO (4));
   2746  1.1  mrg   emit_insn (gen_vec_duplicatev64di_exec (vsp, sp, gcn_gen_undef (V64DImode),
   2747  1.1  mrg 					  exec));
   2748  1.1  mrg   emit_insn (gen_addv64si3_vcc_exec (gcn_operand_part (V64SImode, vsp, 0),
   2749  1.1  mrg 				     gcn_operand_part (V64SImode, vsp, 0),
   2750  1.1  mrg 				     _0_4_8_12, vcc, gcn_gen_undef (V64SImode),
   2751  1.1  mrg 				     exec));
   2752  1.1  mrg   emit_insn (gen_addcv64si3_exec (gcn_operand_part (V64SImode, vsp, 1),
   2753  1.1  mrg 				  gcn_operand_part (V64SImode, vsp, 1),
   2754  1.1  mrg 				  const0_rtx, vcc, vcc,
   2755  1.1  mrg 				  gcn_gen_undef (V64SImode), exec));
   2756  1.1  mrg 
   2757  1.1  mrg   /* Move vectors.  */
   2758  1.1  mrg   for (regno = FIRST_VGPR_REG, offset = 0;
   2759  1.1  mrg        regno < FIRST_PSEUDO_REGISTER; regno++)
   2760  1.1  mrg     if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
   2761  1.1  mrg 	|| (regno == VGPR_REGNO (6) && saved_scalars > 0)
   2762  1.1  mrg 	|| (regno == VGPR_REGNO (7) && saved_scalars > 63))
   2763  1.1  mrg       {
   2764  1.1  mrg 	rtx reg = gen_rtx_REG (V64SImode, regno);
   2765  1.1  mrg 	int size = 256;
   2766  1.1  mrg 
   2767  1.1  mrg 	if (regno == VGPR_REGNO (6) && saved_scalars < 64)
   2768  1.1  mrg 	  size = saved_scalars * 4;
   2769  1.1  mrg 	else if (regno == VGPR_REGNO (7) && saved_scalars < 128)
   2770  1.1  mrg 	  size = (saved_scalars - 64) * 4;
   2771  1.1  mrg 
   2772  1.1  mrg 	if (size != 256 || exec_set != -1)
   2773  1.1  mrg 	  {
   2774  1.1  mrg 	    exec_set = ((unsigned HOST_WIDE_INT) 1 << (size / 4)) - 1;
   2775  1.1  mrg 	    emit_move_insn (exec, gen_int_mode (exec_set, DImode));
   2776  1.1  mrg 	  }
   2777  1.1  mrg 
   2778  1.1  mrg 	if (prologue)
   2779  1.1  mrg 	  {
   2780  1.1  mrg 	    rtx insn = emit_insn (gen_scatterv64si_insn_1offset_exec
   2781  1.1  mrg 				  (vsp, const0_rtx, reg, as, const0_rtx,
   2782  1.1  mrg 				   exec));
   2783  1.1  mrg 
   2784  1.1  mrg 	    /* Add CFI metadata.  */
   2785  1.1  mrg 	    rtx note;
   2786  1.1  mrg 	    if (regno == VGPR_REGNO (6) || regno == VGPR_REGNO (7))
   2787  1.1  mrg 	      {
   2788  1.1  mrg 		int start = (regno == VGPR_REGNO (7) ? 64 : 0);
   2789  1.1  mrg 		int count = MIN (saved_scalars - start, 64);
   2790  1.1  mrg 		int add_lr = (regno == VGPR_REGNO (6)
   2791  1.1  mrg 			      && offsets->lr_needs_saving);
   2792  1.1  mrg 		int lrdest = -1;
   2793  1.1  mrg 		rtvec seq = rtvec_alloc (count + add_lr);
   2794  1.1  mrg 
   2795  1.1  mrg 		/* Add an REG_FRAME_RELATED_EXPR entry for each scalar
   2796  1.1  mrg 		   register that was saved in this batch.  */
   2797  1.1  mrg 		for (int idx = 0; idx < count; idx++)
   2798  1.1  mrg 		  {
   2799  1.1  mrg 		    int stackaddr = offset + idx * 4;
   2800  1.1  mrg 		    rtx dest = gen_rtx_MEM (SImode,
   2801  1.1  mrg 					    gen_rtx_PLUS
   2802  1.1  mrg 					    (DImode, sp,
   2803  1.1  mrg 					     GEN_INT (stackaddr)));
   2804  1.1  mrg 		    rtx src = gen_rtx_REG (SImode, saved_sgprs[start + idx]);
   2805  1.1  mrg 		    rtx set = gen_rtx_SET (dest, src);
   2806  1.1  mrg 		    RTX_FRAME_RELATED_P (set) = 1;
   2807  1.1  mrg 		    RTVEC_ELT (seq, idx) = set;
   2808  1.1  mrg 
   2809  1.1  mrg 		    if (saved_sgprs[start + idx] == LINK_REGNUM)
   2810  1.1  mrg 		      lrdest = stackaddr;
   2811  1.1  mrg 		  }
   2812  1.1  mrg 
   2813  1.1  mrg 		/* Add an additional expression for DWARF_LINK_REGISTER if
   2814  1.1  mrg 		   LINK_REGNUM was saved.  */
   2815  1.1  mrg 		if (lrdest != -1)
   2816  1.1  mrg 		  {
   2817  1.1  mrg 		    rtx dest = gen_rtx_MEM (DImode,
   2818  1.1  mrg 					    gen_rtx_PLUS
   2819  1.1  mrg 					    (DImode, sp,
   2820  1.1  mrg 					     GEN_INT (lrdest)));
   2821  1.1  mrg 		    rtx src = gen_rtx_REG (DImode, DWARF_LINK_REGISTER);
   2822  1.1  mrg 		    rtx set = gen_rtx_SET (dest, src);
   2823  1.1  mrg 		    RTX_FRAME_RELATED_P (set) = 1;
   2824  1.1  mrg 		    RTVEC_ELT (seq, count) = set;
   2825  1.1  mrg 		  }
   2826  1.1  mrg 
   2827  1.1  mrg 		note = gen_rtx_SEQUENCE (VOIDmode, seq);
   2828  1.1  mrg 	      }
   2829  1.1  mrg 	    else
   2830  1.1  mrg 	      {
   2831  1.1  mrg 		rtx dest = gen_rtx_MEM (V64SImode,
   2832  1.1  mrg 					gen_rtx_PLUS (DImode, sp,
   2833  1.1  mrg 						      GEN_INT (offset)));
   2834  1.1  mrg 		rtx src = gen_rtx_REG (V64SImode, regno);
   2835  1.1  mrg 		note = gen_rtx_SET (dest, src);
   2836  1.1  mrg 	      }
   2837  1.1  mrg 	    RTX_FRAME_RELATED_P (insn) = 1;
   2838  1.1  mrg 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
   2839  1.1  mrg 	  }
   2840  1.1  mrg 	else
   2841  1.1  mrg 	  emit_insn (gen_gatherv64si_insn_1offset_exec
   2842  1.1  mrg 		     (reg, vsp, const0_rtx, as, const0_rtx,
   2843  1.1  mrg 		      gcn_gen_undef (V64SImode), exec));
   2844  1.1  mrg 
   2845  1.1  mrg 	/* Move our VSP to the next stack entry.  */
   2846  1.1  mrg 	if (offreg_set != size)
   2847  1.1  mrg 	  {
   2848  1.1  mrg 	    offreg_set = size;
   2849  1.1  mrg 	    emit_move_insn (offreg, GEN_INT (size));
   2850  1.1  mrg 	  }
   2851  1.1  mrg 	if (exec_set != -1)
   2852  1.1  mrg 	  {
   2853  1.1  mrg 	    exec_set = -1;
   2854  1.1  mrg 	    emit_move_insn (exec, GEN_INT (exec_set));
   2855  1.1  mrg 	  }
   2856  1.1  mrg 	emit_insn (gen_addv64si3_vcc_dup_exec
   2857  1.1  mrg 		   (gcn_operand_part (V64SImode, vsp, 0),
   2858  1.1  mrg 		    offreg, gcn_operand_part (V64SImode, vsp, 0),
   2859  1.1  mrg 		    vcc, gcn_gen_undef (V64SImode), exec));
   2860  1.1  mrg 	emit_insn (gen_addcv64si3_exec
   2861  1.1  mrg 		   (gcn_operand_part (V64SImode, vsp, 1),
   2862  1.1  mrg 		    gcn_operand_part (V64SImode, vsp, 1),
   2863  1.1  mrg 		    const0_rtx, vcc, vcc, gcn_gen_undef (V64SImode), exec));
   2864  1.1  mrg 
   2865  1.1  mrg 	offset += size;
   2866  1.1  mrg       }
   2867  1.1  mrg 
   2868  1.1  mrg   rtx move_vectors = get_insns ();
   2869  1.1  mrg   end_sequence ();
   2870  1.1  mrg 
   2871  1.1  mrg   if (prologue)
   2872  1.1  mrg     {
   2873  1.1  mrg       emit_insn (move_scalars);
   2874  1.1  mrg       emit_insn (move_vectors);
   2875  1.1  mrg     }
   2876  1.1  mrg   else
   2877  1.1  mrg     {
   2878  1.1  mrg       emit_insn (move_vectors);
   2879  1.1  mrg       emit_insn (move_scalars);
   2880  1.1  mrg     }
   2881  1.1  mrg }
   2882  1.1  mrg 
   2883  1.1  mrg /* Generate prologue.  Called from gen_prologue during pro_and_epilogue pass.
   2884  1.1  mrg 
   2885  1.1  mrg    For a non-kernel function, the stack layout looks like this (interim),
   2886  1.1  mrg    growing *upwards*:
   2887  1.1  mrg 
   2888  1.1  mrg  hi | + ...
   2889  1.1  mrg     |__________________| <-- current SP
   2890  1.1  mrg     | outgoing args    |
   2891  1.1  mrg     |__________________|
   2892  1.1  mrg     | (alloca space)   |
   2893  1.1  mrg     |__________________|
   2894  1.1  mrg     | local vars       |
   2895  1.1  mrg     |__________________| <-- FP/hard FP
   2896  1.1  mrg     | callee-save regs |
   2897  1.1  mrg     |__________________| <-- soft arg pointer
   2898  1.1  mrg     | pretend args     |
   2899  1.1  mrg     |__________________| <-- incoming SP
   2900  1.1  mrg     | incoming args    |
   2901  1.1  mrg  lo |..................|
   2902  1.1  mrg 
   2903  1.1  mrg    This implies arguments (beyond the first N in registers) must grow
   2904  1.1  mrg    downwards (as, apparently, PA has them do).
   2905  1.1  mrg 
   2906  1.1  mrg    For a kernel function we have the simpler:
   2907  1.1  mrg 
   2908  1.1  mrg  hi | + ...
   2909  1.1  mrg     |__________________| <-- current SP
   2910  1.1  mrg     | outgoing args    |
   2911  1.1  mrg     |__________________|
   2912  1.1  mrg     | (alloca space)   |
   2913  1.1  mrg     |__________________|
   2914  1.1  mrg     | local vars       |
   2915  1.1  mrg  lo |__________________| <-- FP/hard FP
   2916  1.1  mrg 
   2917  1.1  mrg */
   2918  1.1  mrg 
   2919  1.1  mrg void
   2920  1.1  mrg gcn_expand_prologue ()
   2921  1.1  mrg {
   2922  1.1  mrg   machine_function *offsets = gcn_compute_frame_offsets ();
   2923  1.1  mrg 
   2924  1.1  mrg   if (!cfun || !cfun->machine || cfun->machine->normal_function)
   2925  1.1  mrg     {
   2926  1.1  mrg       rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
   2927  1.1  mrg       rtx sp_hi = gcn_operand_part (Pmode, sp, 1);
   2928  1.1  mrg       rtx sp_lo = gcn_operand_part (Pmode, sp, 0);
   2929  1.1  mrg       rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
   2930  1.1  mrg       rtx fp_hi = gcn_operand_part (Pmode, fp, 1);
   2931  1.1  mrg       rtx fp_lo = gcn_operand_part (Pmode, fp, 0);
   2932  1.1  mrg 
   2933  1.1  mrg       start_sequence ();
   2934  1.1  mrg 
   2935  1.1  mrg       if (offsets->pretend_size > 0)
   2936  1.1  mrg 	{
   2937  1.1  mrg 	  /* FIXME: Do the actual saving of register pretend args to the stack.
   2938  1.1  mrg 	     Register order needs consideration.  */
   2939  1.1  mrg 	}
   2940  1.1  mrg 
   2941  1.1  mrg       /* Save callee-save regs.  */
   2942  1.1  mrg       move_callee_saved_registers (sp, offsets, true);
   2943  1.1  mrg 
   2944  1.1  mrg       HOST_WIDE_INT sp_adjust = offsets->pretend_size
   2945  1.1  mrg 	+ offsets->callee_saves
   2946  1.1  mrg 	+ offsets->local_vars + offsets->outgoing_args_size;
   2947  1.1  mrg       if (sp_adjust > 0)
   2948  1.1  mrg 	{
   2949  1.1  mrg 	  /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
   2950  1.1  mrg 	     we use split add explictly, and specify the DImode add in
   2951  1.1  mrg 	     the note.  */
   2952  1.1  mrg 	  rtx scc = gen_rtx_REG (BImode, SCC_REG);
   2953  1.1  mrg 	  rtx adjustment = gen_int_mode (sp_adjust, SImode);
   2954  1.1  mrg 	  rtx insn = emit_insn (gen_addsi3_scalar_carry (sp_lo, sp_lo,
   2955  1.1  mrg 							 adjustment, scc));
   2956  1.1  mrg 	  if (!offsets->need_frame_pointer)
   2957  1.1  mrg 	    {
   2958  1.1  mrg 	      RTX_FRAME_RELATED_P (insn) = 1;
   2959  1.1  mrg 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
   2960  1.1  mrg 			    gen_rtx_SET (sp,
   2961  1.1  mrg 					 gen_rtx_PLUS (DImode, sp,
   2962  1.1  mrg 						       adjustment)));
   2963  1.1  mrg 	    }
   2964  1.1  mrg 	  emit_insn (gen_addcsi3_scalar_zero (sp_hi, sp_hi, scc));
   2965  1.1  mrg 	}
   2966  1.1  mrg 
   2967  1.1  mrg       if (offsets->need_frame_pointer)
   2968  1.1  mrg 	{
   2969  1.1  mrg 	  /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
   2970  1.1  mrg 	     we use split add explictly, and specify the DImode add in
   2971  1.1  mrg 	     the note.  */
   2972  1.1  mrg 	  rtx scc = gen_rtx_REG (BImode, SCC_REG);
   2973  1.1  mrg 	  int fp_adjust = -(offsets->local_vars + offsets->outgoing_args_size);
   2974  1.1  mrg 	  rtx adjustment = gen_int_mode (fp_adjust, SImode);
   2975  1.1  mrg 	  rtx insn = emit_insn (gen_addsi3_scalar_carry(fp_lo, sp_lo,
   2976  1.1  mrg 							adjustment, scc));
   2977  1.1  mrg 	  emit_insn (gen_addcsi3_scalar (fp_hi, sp_hi,
   2978  1.1  mrg 					 (fp_adjust < 0 ? GEN_INT (-1)
   2979  1.1  mrg 					  : const0_rtx),
   2980  1.1  mrg 					 scc, scc));
   2981  1.1  mrg 
   2982  1.1  mrg 	  /* Set the CFA to the entry stack address, as an offset from the
   2983  1.1  mrg 	     frame pointer.  This is preferred because the frame pointer is
   2984  1.1  mrg 	     saved in each frame, whereas the stack pointer is not.  */
   2985  1.1  mrg 	  RTX_FRAME_RELATED_P (insn) = 1;
   2986  1.1  mrg 	  add_reg_note (insn, REG_CFA_DEF_CFA,
   2987  1.1  mrg 			gen_rtx_PLUS (DImode, fp,
   2988  1.1  mrg 				      GEN_INT (-(offsets->pretend_size
   2989  1.1  mrg 						 + offsets->callee_saves))));
   2990  1.1  mrg 	}
   2991  1.1  mrg 
   2992  1.1  mrg       rtx_insn *seq = get_insns ();
   2993  1.1  mrg       end_sequence ();
   2994  1.1  mrg 
   2995  1.1  mrg       emit_insn (seq);
   2996  1.1  mrg     }
   2997  1.1  mrg   else
   2998  1.1  mrg     {
   2999  1.1  mrg       rtx wave_offset = gen_rtx_REG (SImode,
   3000  1.1  mrg 				     cfun->machine->args.
   3001  1.1  mrg 				     reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);
   3002  1.1  mrg 
   3003  1.1  mrg       if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
   3004  1.1  mrg 	{
   3005  1.1  mrg 	  rtx fs_init_lo =
   3006  1.1  mrg 	    gen_rtx_REG (SImode,
   3007  1.1  mrg 			 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]);
   3008  1.1  mrg 	  rtx fs_init_hi =
   3009  1.1  mrg 	    gen_rtx_REG (SImode,
   3010  1.1  mrg 			 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] + 1);
   3011  1.1  mrg 	  rtx fs_reg_lo = gen_rtx_REG (SImode, FLAT_SCRATCH_REG);
   3012  1.1  mrg 	  rtx fs_reg_hi = gen_rtx_REG (SImode, FLAT_SCRATCH_REG + 1);
   3013  1.1  mrg 
   3014  1.1  mrg 	  /*rtx queue = gen_rtx_REG(DImode,
   3015  1.1  mrg 				  cfun->machine->args.reg[QUEUE_PTR_ARG]);
   3016  1.1  mrg 	  rtx aperture = gen_rtx_MEM (SImode,
   3017  1.1  mrg 				      gen_rtx_PLUS (DImode, queue,
   3018  1.1  mrg 						    gen_int_mode (68, SImode)));
   3019  1.1  mrg 	  set_mem_addr_space (aperture, ADDR_SPACE_SCALAR_FLAT);*/
   3020  1.1  mrg 
   3021  1.1  mrg 	  /* Set up flat_scratch.  */
   3022  1.1  mrg 	  emit_insn (gen_addsi3_scc (fs_reg_hi, fs_init_lo, wave_offset));
   3023  1.1  mrg 	  emit_insn (gen_lshrsi3_scc (fs_reg_hi, fs_reg_hi,
   3024  1.1  mrg 				      gen_int_mode (8, SImode)));
   3025  1.1  mrg 	  emit_move_insn (fs_reg_lo, fs_init_hi);
   3026  1.1  mrg 	}
   3027  1.1  mrg 
   3028  1.1  mrg       /* Set up frame pointer and stack pointer.  */
   3029  1.1  mrg       rtx sp = gen_rtx_REG (DImode, STACK_POINTER_REGNUM);
   3030  1.1  mrg       rtx sp_hi = simplify_gen_subreg (SImode, sp, DImode, 4);
   3031  1.1  mrg       rtx sp_lo = simplify_gen_subreg (SImode, sp, DImode, 0);
   3032  1.1  mrg       rtx fp = gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM);
   3033  1.1  mrg       rtx fp_hi = simplify_gen_subreg (SImode, fp, DImode, 4);
   3034  1.1  mrg       rtx fp_lo = simplify_gen_subreg (SImode, fp, DImode, 0);
   3035  1.1  mrg 
   3036  1.1  mrg       HOST_WIDE_INT sp_adjust = (offsets->local_vars
   3037  1.1  mrg 				 + offsets->outgoing_args_size);
   3038  1.1  mrg 
   3039  1.1  mrg       /* Initialise FP and SP from the buffer descriptor in s[0:3].  */
   3040  1.1  mrg       emit_move_insn (fp_lo, gen_rtx_REG (SImode, 0));
   3041  1.1  mrg       emit_insn (gen_andsi3_scc (fp_hi, gen_rtx_REG (SImode, 1),
   3042  1.1  mrg 				 gen_int_mode (0xffff, SImode)));
   3043  1.1  mrg       rtx scc = gen_rtx_REG (BImode, SCC_REG);
   3044  1.1  mrg       emit_insn (gen_addsi3_scalar_carry (fp_lo, fp_lo, wave_offset, scc));
   3045  1.1  mrg       emit_insn (gen_addcsi3_scalar_zero (fp_hi, fp_hi, scc));
   3046  1.1  mrg 
   3047  1.1  mrg       /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so we use
   3048  1.1  mrg 	 split add explictly, and specify the DImode add in the note.
   3049  1.1  mrg          The DWARF info expects that the callee-save data is in the frame,
   3050  1.1  mrg          even though it isn't (because this is the entry point), so we
   3051  1.1  mrg          make a notional adjustment to the DWARF frame offset here.  */
   3052  1.1  mrg       rtx dbg_adjustment = gen_int_mode (sp_adjust + offsets->callee_saves,
   3053  1.1  mrg 					 DImode);
   3054  1.1  mrg       rtx insn;
   3055  1.1  mrg       if (sp_adjust > 0)
   3056  1.1  mrg 	{
   3057  1.1  mrg 	  rtx scc = gen_rtx_REG (BImode, SCC_REG);
   3058  1.1  mrg 	  rtx adjustment = gen_int_mode (sp_adjust, DImode);
   3059  1.1  mrg 	  insn = emit_insn (gen_addsi3_scalar_carry(sp_lo, fp_lo, adjustment,
   3060  1.1  mrg 						    scc));
   3061  1.1  mrg 	  emit_insn (gen_addcsi3_scalar_zero (sp_hi, fp_hi, scc));
   3062  1.1  mrg 	}
   3063  1.1  mrg       else
   3064  1.1  mrg 	insn = emit_move_insn (sp, fp);
   3065  1.1  mrg       RTX_FRAME_RELATED_P (insn) = 1;
   3066  1.1  mrg       add_reg_note (insn, REG_FRAME_RELATED_EXPR,
   3067  1.1  mrg 		    gen_rtx_SET (sp, gen_rtx_PLUS (DImode, sp,
   3068  1.1  mrg 						   dbg_adjustment)));
   3069  1.1  mrg 
   3070  1.1  mrg       if (offsets->need_frame_pointer)
   3071  1.1  mrg 	{
   3072  1.1  mrg 	  /* Set the CFA to the entry stack address, as an offset from the
   3073  1.1  mrg 	     frame pointer.  This is necessary when alloca is used, and
   3074  1.1  mrg 	     harmless otherwise.  */
   3075  1.1  mrg 	  rtx neg_adjust = gen_int_mode (-offsets->callee_saves, DImode);
   3076  1.1  mrg 	  add_reg_note (insn, REG_CFA_DEF_CFA,
   3077  1.1  mrg 			gen_rtx_PLUS (DImode, fp, neg_adjust));
   3078  1.1  mrg 	}
   3079  1.1  mrg 
   3080  1.1  mrg       /* Make sure the flat scratch reg doesn't get optimised away.  */
   3081  1.1  mrg       emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
   3082  1.1  mrg     }
   3083  1.1  mrg 
   3084  1.1  mrg   /* Ensure that the scheduler doesn't do anything unexpected.  */
   3085  1.1  mrg   emit_insn (gen_blockage ());
   3086  1.1  mrg 
   3087  1.1  mrg   /* m0 is initialized for the usual LDS DS and FLAT memory case.
   3088  1.1  mrg      The low-part is the address of the topmost addressable byte, which is
   3089  1.1  mrg      size-1.  The high-part is an offset and should be zero.  */
   3090  1.1  mrg   emit_move_insn (gen_rtx_REG (SImode, M0_REG),
   3091  1.1  mrg 		  gen_int_mode (LDS_SIZE, SImode));
   3092  1.1  mrg 
   3093  1.1  mrg   emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
   3094  1.1  mrg 
   3095  1.1  mrg   if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
   3096  1.1  mrg     {
   3097  1.1  mrg       /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel.  */
   3098  1.1  mrg       rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
   3099  1.1  mrg       emit_move_insn (fn_reg, gen_rtx_SYMBOL_REF (Pmode,
   3100  1.1  mrg 						  "gomp_gcn_enter_kernel"));
   3101  1.1  mrg       emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
   3102  1.1  mrg     }
   3103  1.1  mrg }
   3104  1.1  mrg 
   3105  1.1  mrg /* Generate epilogue.  Called from gen_epilogue during pro_and_epilogue pass.
   3106  1.1  mrg 
   3107  1.1  mrg    See gcn_expand_prologue for stack details.  */
   3108  1.1  mrg 
   3109  1.1  mrg void
   3110  1.1  mrg gcn_expand_epilogue (void)
   3111  1.1  mrg {
   3112  1.1  mrg   /* Ensure that the scheduler doesn't do anything unexpected.  */
   3113  1.1  mrg   emit_insn (gen_blockage ());
   3114  1.1  mrg 
   3115  1.1  mrg   if (!cfun || !cfun->machine || cfun->machine->normal_function)
   3116  1.1  mrg     {
   3117  1.1  mrg       machine_function *offsets = gcn_compute_frame_offsets ();
   3118  1.1  mrg       rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
   3119  1.1  mrg       rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
   3120  1.1  mrg 
   3121  1.1  mrg       HOST_WIDE_INT sp_adjust = offsets->callee_saves + offsets->pretend_size;
   3122  1.1  mrg 
   3123  1.1  mrg       if (offsets->need_frame_pointer)
   3124  1.1  mrg 	{
   3125  1.1  mrg 	  /* Restore old SP from the frame pointer.  */
   3126  1.1  mrg 	  if (sp_adjust > 0)
   3127  1.1  mrg 	    emit_insn (gen_subdi3 (sp, fp, gen_int_mode (sp_adjust, DImode)));
   3128  1.1  mrg 	  else
   3129  1.1  mrg 	    emit_move_insn (sp, fp);
   3130  1.1  mrg 	}
   3131  1.1  mrg       else
   3132  1.1  mrg 	{
   3133  1.1  mrg 	  /* Restore old SP from current SP.  */
   3134  1.1  mrg 	  sp_adjust += offsets->outgoing_args_size + offsets->local_vars;
   3135  1.1  mrg 
   3136  1.1  mrg 	  if (sp_adjust > 0)
   3137  1.1  mrg 	    emit_insn (gen_subdi3 (sp, sp, gen_int_mode (sp_adjust, DImode)));
   3138  1.1  mrg 	}
   3139  1.1  mrg 
   3140  1.1  mrg       move_callee_saved_registers (sp, offsets, false);
   3141  1.1  mrg 
   3142  1.1  mrg       /* There's no explicit use of the link register on the return insn.  Emit
   3143  1.1  mrg          one here instead.  */
   3144  1.1  mrg       if (offsets->lr_needs_saving)
   3145  1.1  mrg 	emit_use (gen_rtx_REG (DImode, LINK_REGNUM));
   3146  1.1  mrg 
   3147  1.1  mrg       /* Similar for frame pointer.  */
   3148  1.1  mrg       if (offsets->need_frame_pointer)
   3149  1.1  mrg 	emit_use (gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM));
   3150  1.1  mrg     }
   3151  1.1  mrg   else if (flag_openmp)
   3152  1.1  mrg     {
   3153  1.1  mrg       /* OpenMP kernels have an implicit call to gomp_gcn_exit_kernel.  */
   3154  1.1  mrg       rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
   3155  1.1  mrg       emit_move_insn (fn_reg,
   3156  1.1  mrg 		      gen_rtx_SYMBOL_REF (Pmode, "gomp_gcn_exit_kernel"));
   3157  1.1  mrg       emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
   3158  1.1  mrg     }
   3159  1.1  mrg   else if (TREE_CODE (TREE_TYPE (DECL_RESULT (cfun->decl))) != VOID_TYPE)
   3160  1.1  mrg     {
   3161  1.1  mrg       /* Assume that an exit value compatible with gcn-run is expected.
   3162  1.1  mrg          That is, the third input parameter is an int*.
   3163  1.1  mrg 
   3164  1.1  mrg          We can't allocate any new registers, but the kernarg_reg is
   3165  1.1  mrg          dead after this, so we'll use that.  */
   3166  1.1  mrg       rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
   3167  1.1  mrg 				     [KERNARG_SEGMENT_PTR_ARG]);
   3168  1.1  mrg       rtx retptr_mem = gen_rtx_MEM (DImode,
   3169  1.1  mrg 				    gen_rtx_PLUS (DImode, kernarg_reg,
   3170  1.1  mrg 						  GEN_INT (16)));
   3171  1.1  mrg       set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
   3172  1.1  mrg       emit_move_insn (kernarg_reg, retptr_mem);
   3173  1.1  mrg 
   3174  1.1  mrg       rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
   3175  1.1  mrg       set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
   3176  1.1  mrg       emit_move_insn (retval_mem,
   3177  1.1  mrg 		      gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
   3178  1.1  mrg     }
   3179  1.1  mrg 
   3180  1.1  mrg   emit_jump_insn (gen_gcn_return ());
   3181  1.1  mrg }
   3182  1.1  mrg 
   3183  1.1  mrg /* Implement TARGET_FRAME_POINTER_REQUIRED.
   3184  1.1  mrg 
   3185  1.1  mrg    Return true if the frame pointer should not be eliminated.  */
   3186  1.1  mrg 
   3187  1.1  mrg bool
   3188  1.1  mrg gcn_frame_pointer_rqd (void)
   3189  1.1  mrg {
   3190  1.1  mrg   /* GDB needs the frame pointer in order to unwind properly,
   3191  1.1  mrg      but that's not important for the entry point, unless alloca is used.
   3192  1.1  mrg      It's not important for code execution, so we should repect the
   3193  1.1  mrg      -fomit-frame-pointer flag.  */
   3194  1.1  mrg   return (!flag_omit_frame_pointer
   3195  1.1  mrg 	  && cfun
   3196  1.1  mrg 	  && (cfun->calls_alloca
   3197  1.1  mrg 	      || (cfun->machine && cfun->machine->normal_function)));
   3198  1.1  mrg }
   3199  1.1  mrg 
   3200  1.1  mrg /* Implement TARGET_CAN_ELIMINATE.
   3201  1.1  mrg 
   3202  1.1  mrg    Return true if the compiler is allowed to try to replace register number
   3203  1.1  mrg    FROM_REG with register number TO_REG.
   3204  1.1  mrg 
   3205  1.1  mrg    FIXME: is the default "true" not enough? Should this be a negative set?  */
   3206  1.1  mrg 
   3207  1.1  mrg bool
   3208  1.1  mrg gcn_can_eliminate_p (int /*from_reg */ , int to_reg)
   3209  1.1  mrg {
   3210  1.1  mrg   return (to_reg == HARD_FRAME_POINTER_REGNUM
   3211  1.1  mrg 	  || to_reg == STACK_POINTER_REGNUM);
   3212  1.1  mrg }
   3213  1.1  mrg 
   3214  1.1  mrg /* Implement INITIAL_ELIMINATION_OFFSET.
   3215  1.1  mrg 
   3216  1.1  mrg    Returns the initial difference between the specified pair of registers, in
   3217  1.1  mrg    terms of stack position.  */
   3218  1.1  mrg 
   3219  1.1  mrg HOST_WIDE_INT
   3220  1.1  mrg gcn_initial_elimination_offset (int from, int to)
   3221  1.1  mrg {
   3222  1.1  mrg   machine_function *offsets = gcn_compute_frame_offsets ();
   3223  1.1  mrg 
   3224  1.1  mrg   switch (from)
   3225  1.1  mrg     {
   3226  1.1  mrg     case ARG_POINTER_REGNUM:
   3227  1.1  mrg       if (to == STACK_POINTER_REGNUM)
   3228  1.1  mrg 	return -(offsets->callee_saves + offsets->local_vars
   3229  1.1  mrg 		 + offsets->outgoing_args_size);
   3230  1.1  mrg       else if (to == FRAME_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM)
   3231  1.1  mrg 	return -offsets->callee_saves;
   3232  1.1  mrg       else
   3233  1.1  mrg 	gcc_unreachable ();
   3234  1.1  mrg       break;
   3235  1.1  mrg 
   3236  1.1  mrg     case FRAME_POINTER_REGNUM:
   3237  1.1  mrg       if (to == STACK_POINTER_REGNUM)
   3238  1.1  mrg 	return -(offsets->local_vars + offsets->outgoing_args_size);
   3239  1.1  mrg       else if (to == HARD_FRAME_POINTER_REGNUM)
   3240  1.1  mrg 	return 0;
   3241  1.1  mrg       else
   3242  1.1  mrg 	gcc_unreachable ();
   3243  1.1  mrg       break;
   3244  1.1  mrg 
   3245  1.1  mrg     default:
   3246  1.1  mrg       gcc_unreachable ();
   3247  1.1  mrg     }
   3248  1.1  mrg }
   3249  1.1  mrg 
   3250  1.1  mrg /* Implement HARD_REGNO_RENAME_OK.
   3251  1.1  mrg 
   3252  1.1  mrg    Return true if it is permissible to rename a hard register from
   3253  1.1  mrg    FROM_REG to TO_REG.  */
   3254  1.1  mrg 
   3255  1.1  mrg bool
   3256  1.1  mrg gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg)
   3257  1.1  mrg {
   3258  1.1  mrg   if (from_reg == SCC_REG
   3259  1.1  mrg       || from_reg == VCC_LO_REG || from_reg == VCC_HI_REG
   3260  1.1  mrg       || from_reg == EXEC_LO_REG || from_reg == EXEC_HI_REG
   3261  1.1  mrg       || to_reg == SCC_REG
   3262  1.1  mrg       || to_reg == VCC_LO_REG || to_reg == VCC_HI_REG
   3263  1.1  mrg       || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG)
   3264  1.1  mrg     return false;
   3265  1.1  mrg 
   3266  1.1  mrg   /* Allow the link register to be used if it was saved.  */
   3267  1.1  mrg   if ((to_reg & ~1) == LINK_REGNUM)
   3268  1.1  mrg     return !cfun || cfun->machine->lr_needs_saving;
   3269  1.1  mrg 
   3270  1.1  mrg   /* Allow the registers used for the static chain to be used if the chain is
   3271  1.1  mrg      not in active use.  */
   3272  1.1  mrg   if ((to_reg & ~1) == STATIC_CHAIN_REGNUM)
   3273  1.1  mrg     return !cfun
   3274  1.1  mrg 	|| !(cfun->static_chain_decl
   3275  1.1  mrg 	     && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
   3276  1.1  mrg 	     && df_regs_ever_live_p (STATIC_CHAIN_REGNUM + 1));
   3277  1.1  mrg 
   3278  1.1  mrg   return true;
   3279  1.1  mrg }
   3280  1.1  mrg 
   3281  1.1  mrg /* Implement HARD_REGNO_CALLER_SAVE_MODE.
   3282  1.1  mrg 
   3283  1.1  mrg    Which mode is required for saving NREGS of a pseudo-register in
   3284  1.1  mrg    call-clobbered hard register REGNO.  */
   3285  1.1  mrg 
   3286  1.1  mrg machine_mode
   3287  1.1  mrg gcn_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
   3288  1.1  mrg 				 machine_mode regmode)
   3289  1.1  mrg {
   3290  1.1  mrg   machine_mode result = choose_hard_reg_mode (regno, nregs, NULL);
   3291  1.1  mrg 
   3292  1.1  mrg   if (VECTOR_MODE_P (result) && !VECTOR_MODE_P (regmode))
   3293  1.1  mrg     result = (nregs == 1 ? SImode : DImode);
   3294  1.1  mrg 
   3295  1.1  mrg   return result;
   3296  1.1  mrg }
   3297  1.1  mrg 
   3298  1.1  mrg /* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
   3299  1.1  mrg 
   3300  1.1  mrg    Output assembler code for a block containing the constant parts
   3301  1.1  mrg    of a trampoline, leaving space for the variable parts.  */
   3302  1.1  mrg 
   3303  1.1  mrg static void
   3304  1.1  mrg gcn_asm_trampoline_template (FILE *f)
   3305  1.1  mrg {
   3306  1.1  mrg   /* The source operand of the move instructions must be a 32-bit
   3307  1.1  mrg      constant following the opcode.  */
   3308  1.1  mrg   asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM);
   3309  1.1  mrg   asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM + 1);
   3310  1.1  mrg   asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG);
   3311  1.1  mrg   asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG + 1);
   3312  1.1  mrg   asm_fprintf (f, "\ts_setpc_b64\ts[%i:%i]\n", CC_SAVE_REG, CC_SAVE_REG + 1);
   3313  1.1  mrg }
   3314  1.1  mrg 
   3315  1.1  mrg /* Implement TARGET_TRAMPOLINE_INIT.
   3316  1.1  mrg 
   3317  1.1  mrg    Emit RTL insns to initialize the variable parts of a trampoline.
   3318  1.1  mrg    FNDECL is the decl of the target address, M_TRAMP is a MEM for
   3319  1.1  mrg    the trampoline, and CHAIN_VALUE is an RTX for the static chain
   3320  1.1  mrg    to be passed to the target function.  */
   3321  1.1  mrg 
   3322  1.1  mrg static void
   3323  1.1  mrg gcn_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
   3324  1.1  mrg {
   3325  1.1  mrg   if (TARGET_GCN5_PLUS)
   3326  1.1  mrg     sorry ("nested function trampolines not supported on GCN5 due to"
   3327  1.1  mrg            " non-executable stacks");
   3328  1.1  mrg 
   3329  1.1  mrg   emit_block_move (m_tramp, assemble_trampoline_template (),
   3330  1.1  mrg 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
   3331  1.1  mrg 
   3332  1.1  mrg   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
   3333  1.1  mrg   rtx chain_value_reg = copy_to_reg (chain_value);
   3334  1.1  mrg   rtx fnaddr_reg = copy_to_reg (fnaddr);
   3335  1.1  mrg 
   3336  1.1  mrg   for (int i = 0; i < 4; i++)
   3337  1.1  mrg     {
   3338  1.1  mrg       rtx mem = adjust_address (m_tramp, SImode, i * 8 + 4);
   3339  1.1  mrg       rtx reg = i < 2 ? chain_value_reg : fnaddr_reg;
   3340  1.1  mrg       emit_move_insn (mem, gen_rtx_SUBREG (SImode, reg, (i % 2) * 4));
   3341  1.1  mrg     }
   3342  1.1  mrg 
   3343  1.1  mrg   rtx tramp_addr = XEXP (m_tramp, 0);
   3344  1.1  mrg   emit_insn (gen_clear_icache (tramp_addr,
   3345  1.1  mrg 			       plus_constant (ptr_mode, tramp_addr,
   3346  1.1  mrg 					      TRAMPOLINE_SIZE)));
   3347  1.1  mrg }
   3348  1.1  mrg 
   3349  1.1  mrg /* }}}  */
   3350  1.1  mrg /* {{{ Miscellaneous.  */
   3351  1.1  mrg 
   3352  1.1  mrg /* Implement TARGET_CANNOT_COPY_INSN_P.
   3353  1.1  mrg 
   3354  1.1  mrg    Return true if INSN must not be duplicated.  */
   3355  1.1  mrg 
   3356  1.1  mrg static bool
   3357  1.1  mrg gcn_cannot_copy_insn_p (rtx_insn *insn)
   3358  1.1  mrg {
   3359  1.1  mrg   if (recog_memoized (insn) == CODE_FOR_gcn_wavefront_barrier)
   3360  1.1  mrg     return true;
   3361  1.1  mrg 
   3362  1.1  mrg   return false;
   3363  1.1  mrg }
   3364  1.1  mrg 
   3365  1.1  mrg /* Implement TARGET_DEBUG_UNWIND_INFO.
   3366  1.1  mrg 
   3367  1.1  mrg    Defines the mechanism that will be used for describing frame unwind
   3368  1.1  mrg    information to the debugger.  */
   3369  1.1  mrg 
   3370  1.1  mrg static enum unwind_info_type
   3371  1.1  mrg gcn_debug_unwind_info ()
   3372  1.1  mrg {
   3373  1.1  mrg   return UI_DWARF2;
   3374  1.1  mrg }
   3375  1.1  mrg 
   3376  1.1  mrg /* Determine if there is a suitable hardware conversion instruction.
   3377  1.1  mrg    Used primarily by the machine description.  */
   3378  1.1  mrg 
   3379  1.1  mrg bool
   3380  1.1  mrg gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op)
   3381  1.1  mrg {
   3382  1.1  mrg   if (VECTOR_MODE_P (from) != VECTOR_MODE_P (to))
   3383  1.1  mrg     return false;
   3384  1.1  mrg 
   3385  1.1  mrg   if (VECTOR_MODE_P (from))
   3386  1.1  mrg     {
   3387  1.1  mrg       from = GET_MODE_INNER (from);
   3388  1.1  mrg       to = GET_MODE_INNER (to);
   3389  1.1  mrg     }
   3390  1.1  mrg 
   3391  1.1  mrg   switch (op)
   3392  1.1  mrg     {
   3393  1.1  mrg     case fix_trunc_cvt:
   3394  1.1  mrg     case fixuns_trunc_cvt:
   3395  1.1  mrg       if (GET_MODE_CLASS (from) != MODE_FLOAT
   3396  1.1  mrg 	  || GET_MODE_CLASS (to) != MODE_INT)
   3397  1.1  mrg 	return false;
   3398  1.1  mrg       break;
   3399  1.1  mrg     case float_cvt:
   3400  1.1  mrg     case floatuns_cvt:
   3401  1.1  mrg       if (GET_MODE_CLASS (from) != MODE_INT
   3402  1.1  mrg 	  || GET_MODE_CLASS (to) != MODE_FLOAT)
   3403  1.1  mrg 	return false;
   3404  1.1  mrg       break;
   3405  1.1  mrg     case extend_cvt:
   3406  1.1  mrg       if (GET_MODE_CLASS (from) != MODE_FLOAT
   3407  1.1  mrg 	  || GET_MODE_CLASS (to) != MODE_FLOAT
   3408  1.1  mrg 	  || GET_MODE_SIZE (from) >= GET_MODE_SIZE (to))
   3409  1.1  mrg 	return false;
   3410  1.1  mrg       break;
   3411  1.1  mrg     case trunc_cvt:
   3412  1.1  mrg       if (GET_MODE_CLASS (from) != MODE_FLOAT
   3413  1.1  mrg 	  || GET_MODE_CLASS (to) != MODE_FLOAT
   3414  1.1  mrg 	  || GET_MODE_SIZE (from) <= GET_MODE_SIZE (to))
   3415  1.1  mrg 	return false;
   3416  1.1  mrg       break;
   3417  1.1  mrg     }
   3418  1.1  mrg 
   3419  1.1  mrg   return ((to == HImode && from == HFmode)
   3420  1.1  mrg 	  || (to == SImode && (from == SFmode || from == DFmode))
   3421  1.1  mrg 	  || (to == HFmode && (from == HImode || from == SFmode))
   3422  1.1  mrg 	  || (to == SFmode && (from == SImode || from == HFmode
   3423  1.1  mrg 			       || from == DFmode))
   3424  1.1  mrg 	  || (to == DFmode && (from == SImode || from == SFmode)));
   3425  1.1  mrg }
   3426  1.1  mrg 
   3427  1.1  mrg /* Implement TARGET_EMUTLS_VAR_INIT.
   3428  1.1  mrg 
   3429  1.1  mrg    Disable emutls (gthr-gcn.h does not support it, yet).  */
   3430  1.1  mrg 
   3431  1.1  mrg tree
   3432  1.1  mrg gcn_emutls_var_init (tree, tree decl, tree)
   3433  1.1  mrg {
   3434  1.1  mrg   sorry_at (DECL_SOURCE_LOCATION (decl), "TLS is not implemented for GCN.");
   3435  1.1  mrg   return NULL_TREE;
   3436  1.1  mrg }
   3437  1.1  mrg 
   3438  1.1  mrg /* }}}  */
   3439  1.1  mrg /* {{{ Costs.  */
   3440  1.1  mrg 
   3441  1.1  mrg /* Implement TARGET_RTX_COSTS.
   3442  1.1  mrg 
   3443  1.1  mrg    Compute a (partial) cost for rtx X.  Return true if the complete
   3444  1.1  mrg    cost has been computed, and false if subexpressions should be
   3445  1.1  mrg    scanned.  In either case, *TOTAL contains the cost result.  */
   3446  1.1  mrg 
   3447  1.1  mrg static bool
   3448  1.1  mrg gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool)
   3449  1.1  mrg {
   3450  1.1  mrg   enum rtx_code code = GET_CODE (x);
   3451  1.1  mrg   switch (code)
   3452  1.1  mrg     {
   3453  1.1  mrg     case CONST:
   3454  1.1  mrg     case CONST_DOUBLE:
   3455  1.1  mrg     case CONST_VECTOR:
   3456  1.1  mrg     case CONST_INT:
   3457  1.1  mrg       if (gcn_inline_constant_p (x))
   3458  1.1  mrg 	*total = 0;
   3459  1.1  mrg       else if (code == CONST_INT
   3460  1.1  mrg 	  && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000)
   3461  1.1  mrg 	*total = 1;
   3462  1.1  mrg       else if (gcn_constant_p (x))
   3463  1.1  mrg 	*total = 2;
   3464  1.1  mrg       else
   3465  1.1  mrg 	*total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4;
   3466  1.1  mrg       return true;
   3467  1.1  mrg 
   3468  1.1  mrg     case DIV:
   3469  1.1  mrg       *total = 100;
   3470  1.1  mrg       return false;
   3471  1.1  mrg 
   3472  1.1  mrg     default:
   3473  1.1  mrg       *total = 3;
   3474  1.1  mrg       return false;
   3475  1.1  mrg     }
   3476  1.1  mrg }
   3477  1.1  mrg 
   3478  1.1  mrg /* Implement TARGET_MEMORY_MOVE_COST.
   3479  1.1  mrg 
   3480  1.1  mrg    Return the cost of moving data of mode M between a
   3481  1.1  mrg    register and memory.  A value of 2 is the default; this cost is
   3482  1.1  mrg    relative to those in `REGISTER_MOVE_COST'.
   3483  1.1  mrg 
   3484  1.1  mrg    This function is used extensively by register_move_cost that is used to
   3485  1.1  mrg    build tables at startup.  Make it inline in this case.
   3486  1.1  mrg    When IN is 2, return maximum of in and out move cost.
   3487  1.1  mrg 
   3488  1.1  mrg    If moving between registers and memory is more expensive than
   3489  1.1  mrg    between two registers, you should define this macro to express the
   3490  1.1  mrg    relative cost.
   3491  1.1  mrg 
   3492  1.1  mrg    Model also increased moving costs of QImode registers in non
   3493  1.1  mrg    Q_REGS classes.  */
   3494  1.1  mrg 
   3495  1.1  mrg #define LOAD_COST  32
   3496  1.1  mrg #define STORE_COST 32
   3497  1.1  mrg static int
   3498  1.1  mrg gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
   3499  1.1  mrg {
   3500  1.1  mrg   int nregs = CEIL (GET_MODE_SIZE (mode), 4);
   3501  1.1  mrg   switch (regclass)
   3502  1.1  mrg     {
   3503  1.1  mrg     case SCC_CONDITIONAL_REG:
   3504  1.1  mrg     case VCCZ_CONDITIONAL_REG:
   3505  1.1  mrg     case VCC_CONDITIONAL_REG:
   3506  1.1  mrg     case EXECZ_CONDITIONAL_REG:
   3507  1.1  mrg     case ALL_CONDITIONAL_REGS:
   3508  1.1  mrg     case SGPR_REGS:
   3509  1.1  mrg     case SGPR_EXEC_REGS:
   3510  1.1  mrg     case EXEC_MASK_REG:
   3511  1.1  mrg     case SGPR_VOP_SRC_REGS:
   3512  1.1  mrg     case SGPR_MEM_SRC_REGS:
   3513  1.1  mrg     case SGPR_SRC_REGS:
   3514  1.1  mrg     case SGPR_DST_REGS:
   3515  1.1  mrg     case GENERAL_REGS:
   3516  1.1  mrg     case AFP_REGS:
   3517  1.1  mrg       if (!in)
   3518  1.1  mrg 	return (STORE_COST + 2) * nregs;
   3519  1.1  mrg       return LOAD_COST * nregs;
   3520  1.1  mrg     case VGPR_REGS:
   3521  1.1  mrg       if (in)
   3522  1.1  mrg 	return (LOAD_COST + 2) * nregs;
   3523  1.1  mrg       return STORE_COST * nregs;
   3524  1.1  mrg     case ALL_REGS:
   3525  1.1  mrg     case ALL_GPR_REGS:
   3526  1.1  mrg     case SRCDST_REGS:
   3527  1.1  mrg       if (in)
   3528  1.1  mrg 	return (LOAD_COST + 2) * nregs;
   3529  1.1  mrg       return (STORE_COST + 2) * nregs;
   3530  1.1  mrg     default:
   3531  1.1  mrg       gcc_unreachable ();
   3532  1.1  mrg     }
   3533  1.1  mrg }
   3534  1.1  mrg 
   3535  1.1  mrg /* Implement TARGET_REGISTER_MOVE_COST.
   3536  1.1  mrg 
   3537  1.1  mrg    Return the cost of moving data from a register in class CLASS1 to
   3538  1.1  mrg    one in class CLASS2.  Base value is 2.  */
   3539  1.1  mrg 
   3540  1.1  mrg static int
   3541  1.1  mrg gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
   3542  1.1  mrg {
   3543  1.1  mrg   /* Increase cost of moving from and to vector registers.  While this is
   3544  1.1  mrg      fast in hardware (I think), it has hidden cost of setting up the exec
   3545  1.1  mrg      flags.  */
   3546  1.1  mrg   if ((src < VGPR_REGS) != (dst < VGPR_REGS))
   3547  1.1  mrg     return 4;
   3548  1.1  mrg   return 2;
   3549  1.1  mrg }
   3550  1.1  mrg 
   3551  1.1  mrg /* }}}  */
   3552  1.1  mrg /* {{{ Builtins.  */
   3553  1.1  mrg 
   3554  1.1  mrg /* Type codes used by GCN built-in definitions.  */
   3555  1.1  mrg 
   3556  1.1  mrg enum gcn_builtin_type_index
   3557  1.1  mrg {
   3558  1.1  mrg   GCN_BTI_END_OF_PARAMS,
   3559  1.1  mrg 
   3560  1.1  mrg   GCN_BTI_VOID,
   3561  1.1  mrg   GCN_BTI_BOOL,
   3562  1.1  mrg   GCN_BTI_INT,
   3563  1.1  mrg   GCN_BTI_UINT,
   3564  1.1  mrg   GCN_BTI_SIZE_T,
   3565  1.1  mrg   GCN_BTI_LLINT,
   3566  1.1  mrg   GCN_BTI_LLUINT,
   3567  1.1  mrg   GCN_BTI_EXEC,
   3568  1.1  mrg 
   3569  1.1  mrg   GCN_BTI_SF,
   3570  1.1  mrg   GCN_BTI_V64SI,
   3571  1.1  mrg   GCN_BTI_V64SF,
   3572  1.1  mrg   GCN_BTI_V64PTR,
   3573  1.1  mrg   GCN_BTI_SIPTR,
   3574  1.1  mrg   GCN_BTI_SFPTR,
   3575  1.1  mrg   GCN_BTI_VOIDPTR,
   3576  1.1  mrg 
   3577  1.1  mrg   GCN_BTI_LDS_VOIDPTR,
   3578  1.1  mrg 
   3579  1.1  mrg   GCN_BTI_MAX
   3580  1.1  mrg };
   3581  1.1  mrg 
   3582  1.1  mrg static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX];
   3583  1.1  mrg 
   3584  1.1  mrg #define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC])
   3585  1.1  mrg #define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
   3586  1.1  mrg #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
   3587  1.1  mrg #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
   3588  1.1  mrg #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
   3589  1.1  mrg #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
   3590  1.1  mrg #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
   3591  1.1  mrg #define voidptr_type_node (gcn_builtin_types[GCN_BTI_VOIDPTR])
   3592  1.1  mrg #define size_t_type_node (gcn_builtin_types[GCN_BTI_SIZE_T])
   3593  1.1  mrg 
   3594  1.1  mrg static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int,
   3595  1.1  mrg 				 struct gcn_builtin_description *);
   3596  1.1  mrg static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int,
   3597  1.1  mrg 				     struct gcn_builtin_description *);
   3598  1.1  mrg 
   3599  1.1  mrg struct gcn_builtin_description;
   3600  1.1  mrg typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int,
   3601  1.1  mrg 				     struct gcn_builtin_description *);
   3602  1.1  mrg 
   3603  1.1  mrg enum gcn_builtin_type
   3604  1.1  mrg {
   3605  1.1  mrg   B_UNIMPLEMENTED,		/* Sorry out */
   3606  1.1  mrg   B_INSN,			/* Emit a pattern */
   3607  1.1  mrg   B_OVERLOAD			/* Placeholder for an overloaded function */
   3608  1.1  mrg };
   3609  1.1  mrg 
   3610  1.1  mrg struct gcn_builtin_description
   3611  1.1  mrg {
   3612  1.1  mrg   int fcode;
   3613  1.1  mrg   int icode;
   3614  1.1  mrg   const char *name;
   3615  1.1  mrg   enum gcn_builtin_type type;
   3616  1.1  mrg   /* The first element of parm is always the return type.  The rest
   3617  1.1  mrg      are a zero terminated list of parameters.  */
   3618  1.1  mrg   int parm[6];
   3619  1.1  mrg   gcn_builtin_expander expander;
   3620  1.1  mrg };
   3621  1.1  mrg 
   3622  1.1  mrg /* Read in the GCN builtins from gcn-builtins.def.  */
   3623  1.1  mrg 
   3624  1.1  mrg extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX];
   3625  1.1  mrg 
   3626  1.1  mrg struct gcn_builtin_description gcn_builtins[] = {
   3627  1.1  mrg #define DEF_BUILTIN(fcode, icode, name, type, params, expander)	\
   3628  1.1  mrg   {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander},
   3629  1.1  mrg 
   3630  1.1  mrg #define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name)			\
   3631  1.1  mrg   {GCN_BUILTIN_ ## fcode ## _V64SI,					\
   3632  1.1  mrg    CODE_FOR_ ## ic ##v64si3_exec, name "_v64int", B_INSN,		\
   3633  1.1  mrg    {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI,		\
   3634  1.1  mrg     GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},	\
   3635  1.1  mrg   {GCN_BUILTIN_ ## fcode ## _V64SI_unspec,				\
   3636  1.1  mrg    CODE_FOR_ ## ic ##v64si3_exec, name "_v64int_unspec", B_INSN, 	\
   3637  1.1  mrg    {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI,		\
   3638  1.1  mrg     GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},
   3639  1.1  mrg 
   3640  1.1  mrg #include "gcn-builtins.def"
   3641  1.1  mrg #undef DEF_BUILTIN_BINOP_INT_FP
   3642  1.1  mrg #undef DEF_BUILTIN
   3643  1.1  mrg };
   3644  1.1  mrg 
   3645  1.1  mrg static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX];
   3646  1.1  mrg 
   3647  1.1  mrg /* Implement TARGET_BUILTIN_DECL.
   3648  1.1  mrg 
   3649  1.1  mrg    Return the GCN builtin for CODE.  */
   3650  1.1  mrg 
   3651  1.1  mrg tree
   3652  1.1  mrg gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
   3653  1.1  mrg {
   3654  1.1  mrg   if (code >= GCN_BUILTIN_MAX)
   3655  1.1  mrg     return error_mark_node;
   3656  1.1  mrg 
   3657  1.1  mrg   return gcn_builtin_decls[code];
   3658  1.1  mrg }
   3659  1.1  mrg 
   3660  1.1  mrg /* Helper function for gcn_init_builtins.  */
   3661  1.1  mrg 
   3662  1.1  mrg static void
   3663  1.1  mrg gcn_init_builtin_types (void)
   3664  1.1  mrg {
   3665  1.1  mrg   gcn_builtin_types[GCN_BTI_VOID] = void_type_node;
   3666  1.1  mrg   gcn_builtin_types[GCN_BTI_BOOL] = boolean_type_node;
   3667  1.1  mrg   gcn_builtin_types[GCN_BTI_INT] = intSI_type_node;
   3668  1.1  mrg   gcn_builtin_types[GCN_BTI_UINT] = unsigned_type_for (intSI_type_node);
   3669  1.1  mrg   gcn_builtin_types[GCN_BTI_SIZE_T] = size_type_node;
   3670  1.1  mrg   gcn_builtin_types[GCN_BTI_LLINT] = intDI_type_node;
   3671  1.1  mrg   gcn_builtin_types[GCN_BTI_LLUINT] = unsigned_type_for (intDI_type_node);
   3672  1.1  mrg 
   3673  1.1  mrg   exec_type_node = unsigned_intDI_type_node;
   3674  1.1  mrg   sf_type_node = float32_type_node;
   3675  1.1  mrg   v64si_type_node = build_vector_type (intSI_type_node, 64);
   3676  1.1  mrg   v64sf_type_node = build_vector_type (float_type_node, 64);
   3677  1.1  mrg   v64ptr_type_node = build_vector_type (unsigned_intDI_type_node
   3678  1.1  mrg 					/*build_pointer_type
   3679  1.1  mrg 					  (integer_type_node) */
   3680  1.1  mrg 					, 64);
   3681  1.1  mrg   tree tmp = build_distinct_type_copy (intSI_type_node);
   3682  1.1  mrg   TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
   3683  1.1  mrg   siptr_type_node = build_pointer_type (tmp);
   3684  1.1  mrg 
   3685  1.1  mrg   tmp = build_distinct_type_copy (float_type_node);
   3686  1.1  mrg   TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
   3687  1.1  mrg   sfptr_type_node = build_pointer_type (tmp);
   3688  1.1  mrg 
   3689  1.1  mrg   tmp = build_distinct_type_copy (void_type_node);
   3690  1.1  mrg   TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
   3691  1.1  mrg   voidptr_type_node = build_pointer_type (tmp);
   3692  1.1  mrg 
   3693  1.1  mrg   tmp = build_distinct_type_copy (void_type_node);
   3694  1.1  mrg   TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_LDS;
   3695  1.1  mrg   gcn_builtin_types[GCN_BTI_LDS_VOIDPTR] = build_pointer_type (tmp);
   3696  1.1  mrg }
   3697  1.1  mrg 
   3698  1.1  mrg /* Implement TARGET_INIT_BUILTINS.
   3699  1.1  mrg 
   3700  1.1  mrg    Set up all builtin functions for this target.  */
   3701  1.1  mrg 
   3702  1.1  mrg static void
   3703  1.1  mrg gcn_init_builtins (void)
   3704  1.1  mrg {
   3705  1.1  mrg   gcn_init_builtin_types ();
   3706  1.1  mrg 
   3707  1.1  mrg   struct gcn_builtin_description *d;
   3708  1.1  mrg   unsigned int i;
   3709  1.1  mrg   for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++)
   3710  1.1  mrg     {
   3711  1.1  mrg       tree p;
   3712  1.1  mrg       char name[64];		/* build_function will make a copy.  */
   3713  1.1  mrg       int parm;
   3714  1.1  mrg 
   3715  1.1  mrg       /* FIXME: Is this necessary/useful? */
   3716  1.1  mrg       if (d->name == 0)
   3717  1.1  mrg 	continue;
   3718  1.1  mrg 
   3719  1.1  mrg       /* Find last parm.  */
   3720  1.1  mrg       for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++)
   3721  1.1  mrg 	;
   3722  1.1  mrg 
   3723  1.1  mrg       p = void_list_node;
   3724  1.1  mrg       while (parm > 1)
   3725  1.1  mrg 	p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p);
   3726  1.1  mrg 
   3727  1.1  mrg       p = build_function_type (gcn_builtin_types[d->parm[0]], p);
   3728  1.1  mrg 
   3729  1.1  mrg       sprintf (name, "__builtin_gcn_%s", d->name);
   3730  1.1  mrg       gcn_builtin_decls[i]
   3731  1.1  mrg 	= add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
   3732  1.1  mrg 
   3733  1.1  mrg       /* These builtins don't throw.  */
   3734  1.1  mrg       TREE_NOTHROW (gcn_builtin_decls[i]) = 1;
   3735  1.1  mrg     }
   3736  1.1  mrg 
   3737  1.1  mrg   /* These builtins need to take/return an LDS pointer: override the generic
   3738  1.1  mrg      versions here.  */
   3739  1.1  mrg 
   3740  1.1  mrg   set_builtin_decl (BUILT_IN_GOACC_SINGLE_START,
   3741  1.1  mrg 		    gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_START], false);
   3742  1.1  mrg 
   3743  1.1  mrg   set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_START,
   3744  1.1  mrg 		    gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_START],
   3745  1.1  mrg 		    false);
   3746  1.1  mrg 
   3747  1.1  mrg   set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_END,
   3748  1.1  mrg 		    gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_END],
   3749  1.1  mrg 		    false);
   3750  1.1  mrg 
   3751  1.1  mrg   set_builtin_decl (BUILT_IN_GOACC_BARRIER,
   3752  1.1  mrg 		    gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false);
   3753  1.1  mrg }
   3754  1.1  mrg 
   3755  1.1  mrg /* Implement TARGET_INIT_LIBFUNCS.  */
   3756  1.1  mrg 
   3757  1.1  mrg static void
   3758  1.1  mrg gcn_init_libfuncs (void)
   3759  1.1  mrg {
   3760  1.1  mrg   /* BITS_PER_UNIT * 2 is 64 bits, which causes
   3761  1.1  mrg      optabs-libfuncs.cc:gen_int_libfunc to omit TImode (i.e 128 bits)
   3762  1.1  mrg      libcalls that we need to support operations for that type.  Initialise
   3763  1.1  mrg      them here instead.  */
   3764  1.1  mrg   set_optab_libfunc (udiv_optab, TImode, "__udivti3");
   3765  1.1  mrg   set_optab_libfunc (umod_optab, TImode, "__umodti3");
   3766  1.1  mrg   set_optab_libfunc (sdiv_optab, TImode, "__divti3");
   3767  1.1  mrg   set_optab_libfunc (smod_optab, TImode, "__modti3");
   3768  1.1  mrg   set_optab_libfunc (smul_optab, TImode, "__multi3");
   3769  1.1  mrg   set_optab_libfunc (addv_optab, TImode, "__addvti3");
   3770  1.1  mrg   set_optab_libfunc (subv_optab, TImode, "__subvti3");
   3771  1.1  mrg   set_optab_libfunc (negv_optab, TImode, "__negvti2");
   3772  1.1  mrg   set_optab_libfunc (absv_optab, TImode, "__absvti2");
   3773  1.1  mrg   set_optab_libfunc (smulv_optab, TImode, "__mulvti3");
   3774  1.1  mrg   set_optab_libfunc (ffs_optab, TImode, "__ffsti2");
   3775  1.1  mrg   set_optab_libfunc (clz_optab, TImode, "__clzti2");
   3776  1.1  mrg   set_optab_libfunc (ctz_optab, TImode, "__ctzti2");
   3777  1.1  mrg   set_optab_libfunc (clrsb_optab, TImode, "__clrsbti2");
   3778  1.1  mrg   set_optab_libfunc (popcount_optab, TImode, "__popcountti2");
   3779  1.1  mrg   set_optab_libfunc (parity_optab, TImode, "__parityti2");
   3780  1.1  mrg   set_optab_libfunc (bswap_optab, TImode, "__bswapti2");
   3781  1.1  mrg }
   3782  1.1  mrg 
   3783  1.1  mrg /* Expand the CMP_SWAP GCN builtins.  We have our own versions that do
   3784  1.1  mrg    not require taking the address of any object, other than the memory
   3785  1.1  mrg    cell being operated on.
   3786  1.1  mrg 
   3787  1.1  mrg    Helper function for gcn_expand_builtin_1.  */
   3788  1.1  mrg 
   3789  1.1  mrg static rtx
   3790  1.1  mrg gcn_expand_cmp_swap (tree exp, rtx target)
   3791  1.1  mrg {
   3792  1.1  mrg   machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
   3793  1.1  mrg   addr_space_t as
   3794  1.1  mrg     = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (CALL_EXPR_ARG (exp, 0))));
   3795  1.1  mrg   machine_mode as_mode = gcn_addr_space_address_mode (as);
   3796  1.1  mrg 
   3797  1.1  mrg   if (!target)
   3798  1.1  mrg     target = gen_reg_rtx (mode);
   3799  1.1  mrg 
   3800  1.1  mrg   rtx addr = expand_expr (CALL_EXPR_ARG (exp, 0),
   3801  1.1  mrg 			  NULL_RTX, as_mode, EXPAND_NORMAL);
   3802  1.1  mrg   rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1),
   3803  1.1  mrg 			 NULL_RTX, mode, EXPAND_NORMAL);
   3804  1.1  mrg   rtx src = expand_expr (CALL_EXPR_ARG (exp, 2),
   3805  1.1  mrg 			 NULL_RTX, mode, EXPAND_NORMAL);
   3806  1.1  mrg   rtx pat;
   3807  1.1  mrg 
   3808  1.1  mrg   rtx mem = gen_rtx_MEM (mode, force_reg (as_mode, addr));
   3809  1.1  mrg   set_mem_addr_space (mem, as);
   3810  1.1  mrg 
   3811  1.1  mrg   if (!REG_P (cmp))
   3812  1.1  mrg     cmp = copy_to_mode_reg (mode, cmp);
   3813  1.1  mrg   if (!REG_P (src))
   3814  1.1  mrg     src = copy_to_mode_reg (mode, src);
   3815  1.1  mrg 
   3816  1.1  mrg   if (mode == SImode)
   3817  1.1  mrg     pat = gen_sync_compare_and_swapsi (target, mem, cmp, src);
   3818  1.1  mrg   else
   3819  1.1  mrg     pat = gen_sync_compare_and_swapdi (target, mem, cmp, src);
   3820  1.1  mrg 
   3821  1.1  mrg   emit_insn (pat);
   3822  1.1  mrg 
   3823  1.1  mrg   return target;
   3824  1.1  mrg }
   3825  1.1  mrg 
   3826  1.1  mrg /* Expand many different builtins.
   3827  1.1  mrg 
   3828  1.1  mrg    Intended for use in gcn-builtins.def.  */
   3829  1.1  mrg 
   3830  1.1  mrg static rtx
   3831  1.1  mrg gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
   3832  1.1  mrg 		      machine_mode /*mode */ , int ignore,
   3833  1.1  mrg 		      struct gcn_builtin_description *)
   3834  1.1  mrg {
   3835  1.1  mrg   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   3836  1.1  mrg   switch (DECL_MD_FUNCTION_CODE (fndecl))
   3837  1.1  mrg     {
   3838  1.1  mrg     case GCN_BUILTIN_FLAT_LOAD_INT32:
   3839  1.1  mrg       {
   3840  1.1  mrg 	if (ignore)
   3841  1.1  mrg 	  return target;
   3842  1.1  mrg 	/*rtx exec = */
   3843  1.1  mrg 	force_reg (DImode,
   3844  1.1  mrg 		   expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
   3845  1.1  mrg 				EXPAND_NORMAL));
   3846  1.1  mrg 	/*rtx ptr = */
   3847  1.1  mrg 	force_reg (V64DImode,
   3848  1.1  mrg 		   expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
   3849  1.1  mrg 				EXPAND_NORMAL));
   3850  1.1  mrg 	/*emit_insn (gen_vector_flat_loadv64si
   3851  1.1  mrg 		     (target, gcn_gen_undef (V64SImode), ptr, exec)); */
   3852  1.1  mrg 	return target;
   3853  1.1  mrg       }
   3854  1.1  mrg     case GCN_BUILTIN_FLAT_LOAD_PTR_INT32:
   3855  1.1  mrg     case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT:
   3856  1.1  mrg       {
   3857  1.1  mrg 	if (ignore)
   3858  1.1  mrg 	  return target;
   3859  1.1  mrg 	rtx exec = force_reg (DImode,
   3860  1.1  mrg 			      expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
   3861  1.1  mrg 					   DImode,
   3862  1.1  mrg 					   EXPAND_NORMAL));
   3863  1.1  mrg 	rtx ptr = force_reg (DImode,
   3864  1.1  mrg 			     expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
   3865  1.1  mrg 					  V64DImode,
   3866  1.1  mrg 					  EXPAND_NORMAL));
   3867  1.1  mrg 	rtx offsets = force_reg (V64SImode,
   3868  1.1  mrg 				 expand_expr (CALL_EXPR_ARG (exp, 2),
   3869  1.1  mrg 					      NULL_RTX, V64DImode,
   3870  1.1  mrg 					      EXPAND_NORMAL));
   3871  1.1  mrg 	rtx addrs = gen_reg_rtx (V64DImode);
   3872  1.1  mrg 	rtx tmp = gen_reg_rtx (V64SImode);
   3873  1.1  mrg 	emit_insn (gen_ashlv64si3_exec (tmp, offsets,
   3874  1.1  mrg 					  GEN_INT (2),
   3875  1.1  mrg 					  gcn_gen_undef (V64SImode), exec));
   3876  1.1  mrg 	emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
   3877  1.1  mrg 						 gcn_gen_undef (V64DImode),
   3878  1.1  mrg 						 exec));
   3879  1.1  mrg 	rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
   3880  1.1  mrg 	/*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
   3881  1.1  mrg 	/* FIXME: set attributes.  */
   3882  1.1  mrg 	emit_insn (gen_mov_with_exec (target, mem, exec));
   3883  1.1  mrg 	return target;
   3884  1.1  mrg       }
   3885  1.1  mrg     case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
   3886  1.1  mrg     case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT:
   3887  1.1  mrg       {
   3888  1.1  mrg 	rtx exec = force_reg (DImode,
   3889  1.1  mrg 			      expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
   3890  1.1  mrg 					   DImode,
   3891  1.1  mrg 					   EXPAND_NORMAL));
   3892  1.1  mrg 	rtx ptr = force_reg (DImode,
   3893  1.1  mrg 			     expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
   3894  1.1  mrg 					  V64DImode,
   3895  1.1  mrg 					  EXPAND_NORMAL));
   3896  1.1  mrg 	rtx offsets = force_reg (V64SImode,
   3897  1.1  mrg 				 expand_expr (CALL_EXPR_ARG (exp, 2),
   3898  1.1  mrg 					      NULL_RTX, V64DImode,
   3899  1.1  mrg 					      EXPAND_NORMAL));
   3900  1.1  mrg 	machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
   3901  1.1  mrg 								       3)));
   3902  1.1  mrg 	rtx val = force_reg (vmode,
   3903  1.1  mrg 			     expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
   3904  1.1  mrg 					  vmode,
   3905  1.1  mrg 					  EXPAND_NORMAL));
   3906  1.1  mrg 	rtx addrs = gen_reg_rtx (V64DImode);
   3907  1.1  mrg 	rtx tmp = gen_reg_rtx (V64SImode);
   3908  1.1  mrg 	emit_insn (gen_ashlv64si3_exec (tmp, offsets,
   3909  1.1  mrg 					  GEN_INT (2),
   3910  1.1  mrg 					  gcn_gen_undef (V64SImode), exec));
   3911  1.1  mrg 	emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
   3912  1.1  mrg 						 gcn_gen_undef (V64DImode),
   3913  1.1  mrg 						 exec));
   3914  1.1  mrg 	rtx mem = gen_rtx_MEM (vmode, addrs);
   3915  1.1  mrg 	/*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
   3916  1.1  mrg 	/* FIXME: set attributes.  */
   3917  1.1  mrg 	emit_insn (gen_mov_with_exec (mem, val, exec));
   3918  1.1  mrg 	return target;
   3919  1.1  mrg       }
   3920  1.1  mrg     case GCN_BUILTIN_SQRTVF:
   3921  1.1  mrg       {
   3922  1.1  mrg 	if (ignore)
   3923  1.1  mrg 	  return target;
   3924  1.1  mrg 	rtx exec = gcn_full_exec_reg ();
   3925  1.1  mrg 	rtx arg = force_reg (V64SFmode,
   3926  1.1  mrg 			     expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
   3927  1.1  mrg 					  V64SFmode,
   3928  1.1  mrg 					  EXPAND_NORMAL));
   3929  1.1  mrg 	emit_insn (gen_sqrtv64sf2_exec
   3930  1.1  mrg 		   (target, arg, gcn_gen_undef (V64SFmode), exec));
   3931  1.1  mrg 	return target;
   3932  1.1  mrg       }
   3933  1.1  mrg     case GCN_BUILTIN_SQRTF:
   3934  1.1  mrg       {
   3935  1.1  mrg 	if (ignore)
   3936  1.1  mrg 	  return target;
   3937  1.1  mrg 	rtx arg = force_reg (SFmode,
   3938  1.1  mrg 			     expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
   3939  1.1  mrg 					  SFmode,
   3940  1.1  mrg 					  EXPAND_NORMAL));
   3941  1.1  mrg 	emit_insn (gen_sqrtsf2 (target, arg));
   3942  1.1  mrg 	return target;
   3943  1.1  mrg       }
   3944  1.1  mrg     case GCN_BUILTIN_OMP_DIM_SIZE:
   3945  1.1  mrg       {
   3946  1.1  mrg 	if (ignore)
   3947  1.1  mrg 	  return target;
   3948  1.1  mrg 	emit_insn (gen_oacc_dim_size (target,
   3949  1.1  mrg 				      expand_expr (CALL_EXPR_ARG (exp, 0),
   3950  1.1  mrg 						   NULL_RTX, SImode,
   3951  1.1  mrg 						   EXPAND_NORMAL)));
   3952  1.1  mrg 	return target;
   3953  1.1  mrg       }
   3954  1.1  mrg     case GCN_BUILTIN_OMP_DIM_POS:
   3955  1.1  mrg       {
   3956  1.1  mrg 	if (ignore)
   3957  1.1  mrg 	  return target;
   3958  1.1  mrg 	emit_insn (gen_oacc_dim_pos (target,
   3959  1.1  mrg 				     expand_expr (CALL_EXPR_ARG (exp, 0),
   3960  1.1  mrg 						  NULL_RTX, SImode,
   3961  1.1  mrg 						  EXPAND_NORMAL)));
   3962  1.1  mrg 	return target;
   3963  1.1  mrg       }
   3964  1.1  mrg     case GCN_BUILTIN_CMP_SWAP:
   3965  1.1  mrg     case GCN_BUILTIN_CMP_SWAPLL:
   3966  1.1  mrg       return gcn_expand_cmp_swap (exp, target);
   3967  1.1  mrg 
   3968  1.1  mrg     case GCN_BUILTIN_ACC_SINGLE_START:
   3969  1.1  mrg       {
   3970  1.1  mrg 	if (ignore)
   3971  1.1  mrg 	  return target;
   3972  1.1  mrg 
   3973  1.1  mrg 	rtx wavefront = gcn_oacc_dim_pos (1);
   3974  1.1  mrg 	rtx cond = gen_rtx_EQ (VOIDmode, wavefront, const0_rtx);
   3975  1.1  mrg 	rtx cc = (target && REG_P (target)) ? target : gen_reg_rtx (BImode);
   3976  1.1  mrg 	emit_insn (gen_cstoresi4 (cc, cond, wavefront, const0_rtx));
   3977  1.1  mrg 	return cc;
   3978  1.1  mrg       }
   3979  1.1  mrg 
   3980  1.1  mrg     case GCN_BUILTIN_ACC_SINGLE_COPY_START:
   3981  1.1  mrg       {
   3982  1.1  mrg 	rtx blk = force_reg (SImode,
   3983  1.1  mrg 			     expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
   3984  1.1  mrg 					  SImode, EXPAND_NORMAL));
   3985  1.1  mrg 	rtx wavefront = gcn_oacc_dim_pos (1);
   3986  1.1  mrg 	rtx cond = gen_rtx_NE (VOIDmode, wavefront, const0_rtx);
   3987  1.1  mrg 	rtx not_zero = gen_label_rtx ();
   3988  1.1  mrg 	emit_insn (gen_cbranchsi4 (cond, wavefront, const0_rtx, not_zero));
   3989  1.1  mrg 	emit_move_insn (blk, const0_rtx);
   3990  1.1  mrg 	emit_label (not_zero);
   3991  1.1  mrg 	return blk;
   3992  1.1  mrg       }
   3993  1.1  mrg 
   3994  1.1  mrg     case GCN_BUILTIN_ACC_SINGLE_COPY_END:
   3995  1.1  mrg       return target;
   3996  1.1  mrg 
   3997  1.1  mrg     case GCN_BUILTIN_ACC_BARRIER:
   3998  1.1  mrg       emit_insn (gen_gcn_wavefront_barrier ());
   3999  1.1  mrg       return target;
   4000  1.1  mrg 
   4001  1.1  mrg     default:
   4002  1.1  mrg       gcc_unreachable ();
   4003  1.1  mrg     }
   4004  1.1  mrg }
   4005  1.1  mrg 
   4006  1.1  mrg /* Expansion of simple arithmetic and bit binary operation builtins.
   4007  1.1  mrg 
   4008  1.1  mrg    Intended for use with gcn_builtins table.  */
   4009  1.1  mrg 
   4010  1.1  mrg static rtx
   4011  1.1  mrg gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget */ ,
   4012  1.1  mrg 			  machine_mode /*mode */ , int ignore,
   4013  1.1  mrg 			  struct gcn_builtin_description *d)
   4014  1.1  mrg {
   4015  1.1  mrg   int icode = d->icode;
   4016  1.1  mrg   if (ignore)
   4017  1.1  mrg     return target;
   4018  1.1  mrg 
   4019  1.1  mrg   rtx exec = force_reg (DImode,
   4020  1.1  mrg 			expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
   4021  1.1  mrg 				     EXPAND_NORMAL));
   4022  1.1  mrg 
   4023  1.1  mrg   machine_mode m1 = insn_data[icode].operand[1].mode;
   4024  1.1  mrg   rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1,
   4025  1.1  mrg 			  EXPAND_NORMAL);
   4026  1.1  mrg   if (!insn_data[icode].operand[1].predicate (arg1, m1))
   4027  1.1  mrg     arg1 = force_reg (m1, arg1);
   4028  1.1  mrg 
   4029  1.1  mrg   machine_mode m2 = insn_data[icode].operand[2].mode;
   4030  1.1  mrg   rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2,
   4031  1.1  mrg 			  EXPAND_NORMAL);
   4032  1.1  mrg   if (!insn_data[icode].operand[2].predicate (arg2, m2))
   4033  1.1  mrg     arg2 = force_reg (m2, arg2);
   4034  1.1  mrg 
   4035  1.1  mrg   rtx arg_prev;
   4036  1.1  mrg   if (call_expr_nargs (exp) == 4)
   4037  1.1  mrg     {
   4038  1.1  mrg       machine_mode m_prev = insn_data[icode].operand[4].mode;
   4039  1.1  mrg       arg_prev = force_reg (m_prev,
   4040  1.1  mrg 			    expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
   4041  1.1  mrg 					 m_prev, EXPAND_NORMAL));
   4042  1.1  mrg     }
   4043  1.1  mrg   else
   4044  1.1  mrg     arg_prev = gcn_gen_undef (GET_MODE (target));
   4045  1.1  mrg 
   4046  1.1  mrg   rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev);
   4047  1.1  mrg   emit_insn (pat);
   4048  1.1  mrg   return target;
   4049  1.1  mrg }
   4050  1.1  mrg 
   4051  1.1  mrg /* Implement TARGET_EXPAND_BUILTIN.
   4052  1.1  mrg 
   4053  1.1  mrg    Expand an expression EXP that calls a built-in function, with result going
   4054  1.1  mrg    to TARGET if that's convenient (and in mode MODE if that's convenient).
   4055  1.1  mrg    SUBTARGET may be used as the target for computing one of EXP's operands.
   4056  1.1  mrg    IGNORE is nonzero if the value is to be ignored.  */
   4057  1.1  mrg 
   4058  1.1  mrg rtx
   4059  1.1  mrg gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
   4060  1.1  mrg 		    int ignore)
   4061  1.1  mrg {
   4062  1.1  mrg   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   4063  1.1  mrg   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
   4064  1.1  mrg   struct gcn_builtin_description *d;
   4065  1.1  mrg 
   4066  1.1  mrg   gcc_assert (fcode < GCN_BUILTIN_MAX);
   4067  1.1  mrg   d = &gcn_builtins[fcode];
   4068  1.1  mrg 
   4069  1.1  mrg   if (d->type == B_UNIMPLEMENTED)
   4070  1.1  mrg     sorry ("Builtin not implemented");
   4071  1.1  mrg 
   4072  1.1  mrg   return d->expander (exp, target, subtarget, mode, ignore, d);
   4073  1.1  mrg }
   4074  1.1  mrg 
   4075  1.1  mrg /* }}}  */
   4076  1.1  mrg /* {{{ Vectorization.  */
   4077  1.1  mrg 
   4078  1.1  mrg /* Implement TARGET_VECTORIZE_GET_MASK_MODE.
   4079  1.1  mrg 
   4080  1.1  mrg    A vector mask is a value that holds one boolean result for every element in
   4081  1.1  mrg    a vector.  */
   4082  1.1  mrg 
   4083  1.1  mrg opt_machine_mode
   4084  1.1  mrg gcn_vectorize_get_mask_mode (machine_mode)
   4085  1.1  mrg {
   4086  1.1  mrg   /* GCN uses a DImode bit-mask.  */
   4087  1.1  mrg   return DImode;
   4088  1.1  mrg }
   4089  1.1  mrg 
   4090  1.1  mrg /* Return an RTX that references a vector with the i-th lane containing
   4091  1.1  mrg    PERM[i]*4.
   4092  1.1  mrg 
   4093  1.1  mrg    Helper function for gcn_vectorize_vec_perm_const.  */
   4094  1.1  mrg 
   4095  1.1  mrg static rtx
   4096  1.1  mrg gcn_make_vec_perm_address (unsigned int *perm)
   4097  1.1  mrg {
   4098  1.1  mrg   rtx x = gen_reg_rtx (V64SImode);
   4099  1.1  mrg   emit_move_insn (x, gcn_vec_constant (V64SImode, 0));
   4100  1.1  mrg 
   4101  1.1  mrg   /* Permutation addresses use byte addressing.  With each vector lane being
   4102  1.1  mrg      4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
   4103  1.1  mrg      so only set those.
   4104  1.1  mrg 
   4105  1.1  mrg      The permutation given to the vec_perm* patterns range from 0 to 2N-1 to
   4106  1.1  mrg      select between lanes in two vectors, but as the DS_BPERMUTE* instructions
   4107  1.1  mrg      only take one source vector, the most-significant bit can be ignored
   4108  1.1  mrg      here.  Instead, we can use EXEC masking to select the relevant part of
   4109  1.1  mrg      each source vector after they are permuted separately.  */
   4110  1.1  mrg   uint64_t bit_mask = 1 << 2;
   4111  1.1  mrg   for (int i = 2; i < 8; i++, bit_mask <<= 1)
   4112  1.1  mrg     {
   4113  1.1  mrg       uint64_t exec_mask = 0;
   4114  1.1  mrg       uint64_t lane_mask = 1;
   4115  1.1  mrg       for (int j = 0; j < 64; j++, lane_mask <<= 1)
   4116  1.1  mrg 	if ((perm[j] * 4) & bit_mask)
   4117  1.1  mrg 	  exec_mask |= lane_mask;
   4118  1.1  mrg 
   4119  1.1  mrg       if (exec_mask)
   4120  1.1  mrg 	emit_insn (gen_addv64si3_exec (x, x,
   4121  1.1  mrg 				       gcn_vec_constant (V64SImode,
   4122  1.1  mrg 							 bit_mask),
   4123  1.1  mrg 				       x, get_exec (exec_mask)));
   4124  1.1  mrg     }
   4125  1.1  mrg 
   4126  1.1  mrg   return x;
   4127  1.1  mrg }
   4128  1.1  mrg 
   4129  1.1  mrg /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.
   4130  1.1  mrg 
   4131  1.1  mrg    Return true if permutation with SEL is possible.
   4132  1.1  mrg 
   4133  1.1  mrg    If DST/SRC0/SRC1 are non-null, emit the instructions to perform the
   4134  1.1  mrg    permutations.  */
   4135  1.1  mrg 
   4136  1.1  mrg static bool
   4137  1.1  mrg gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst,
   4138  1.1  mrg 			      rtx src0, rtx src1,
   4139  1.1  mrg 			      const vec_perm_indices & sel)
   4140  1.1  mrg {
   4141  1.1  mrg   unsigned int nelt = GET_MODE_NUNITS (vmode);
   4142  1.1  mrg 
   4143  1.1  mrg   gcc_assert (VECTOR_MODE_P (vmode));
   4144  1.1  mrg   gcc_assert (nelt <= 64);
   4145  1.1  mrg   gcc_assert (sel.length () == nelt);
   4146  1.1  mrg 
   4147  1.1  mrg   if (!dst)
   4148  1.1  mrg     {
   4149  1.1  mrg       /* All vector permutations are possible on this architecture,
   4150  1.1  mrg          with varying degrees of efficiency depending on the permutation. */
   4151  1.1  mrg       return true;
   4152  1.1  mrg     }
   4153  1.1  mrg 
   4154  1.1  mrg   unsigned int perm[64];
   4155  1.1  mrg   for (unsigned int i = 0; i < nelt; ++i)
   4156  1.1  mrg     perm[i] = sel[i] & (2 * nelt - 1);
   4157  1.1  mrg   for (unsigned int i = nelt; i < 64; ++i)
   4158  1.1  mrg     perm[i] = 0;
   4159  1.1  mrg 
   4160  1.1  mrg   src0 = force_reg (vmode, src0);
   4161  1.1  mrg   src1 = force_reg (vmode, src1);
   4162  1.1  mrg 
   4163  1.1  mrg   /* Make life a bit easier by swapping operands if necessary so that
   4164  1.1  mrg      the first element always comes from src0.  */
   4165  1.1  mrg   if (perm[0] >= nelt)
   4166  1.1  mrg     {
   4167  1.1  mrg       std::swap (src0, src1);
   4168  1.1  mrg 
   4169  1.1  mrg       for (unsigned int i = 0; i < nelt; ++i)
   4170  1.1  mrg 	if (perm[i] < nelt)
   4171  1.1  mrg 	  perm[i] += nelt;
   4172  1.1  mrg 	else
   4173  1.1  mrg 	  perm[i] -= nelt;
   4174  1.1  mrg     }
   4175  1.1  mrg 
   4176  1.1  mrg   /* TODO: There are more efficient ways to implement certain permutations
   4177  1.1  mrg      using ds_swizzle_b32 and/or DPP.  Test for and expand them here, before
   4178  1.1  mrg      this more inefficient generic approach is used.  */
   4179  1.1  mrg 
   4180  1.1  mrg   int64_t src1_lanes = 0;
   4181  1.1  mrg   int64_t lane_bit = 1;
   4182  1.1  mrg 
   4183  1.1  mrg   for (unsigned int i = 0; i < nelt; ++i, lane_bit <<= 1)
   4184  1.1  mrg     {
   4185  1.1  mrg       /* Set the bits for lanes from src1.  */
   4186  1.1  mrg       if (perm[i] >= nelt)
   4187  1.1  mrg 	src1_lanes |= lane_bit;
   4188  1.1  mrg     }
   4189  1.1  mrg 
   4190  1.1  mrg   rtx addr = gcn_make_vec_perm_address (perm);
   4191  1.1  mrg   rtx (*ds_bpermute) (rtx, rtx, rtx, rtx);
   4192  1.1  mrg 
   4193  1.1  mrg   switch (vmode)
   4194  1.1  mrg     {
   4195  1.1  mrg     case E_V64QImode:
   4196  1.1  mrg       ds_bpermute = gen_ds_bpermutev64qi;
   4197  1.1  mrg       break;
   4198  1.1  mrg     case E_V64HImode:
   4199  1.1  mrg       ds_bpermute = gen_ds_bpermutev64hi;
   4200  1.1  mrg       break;
   4201  1.1  mrg     case E_V64SImode:
   4202  1.1  mrg       ds_bpermute = gen_ds_bpermutev64si;
   4203  1.1  mrg       break;
   4204  1.1  mrg     case E_V64HFmode:
   4205  1.1  mrg       ds_bpermute = gen_ds_bpermutev64hf;
   4206  1.1  mrg       break;
   4207  1.1  mrg     case E_V64SFmode:
   4208  1.1  mrg       ds_bpermute = gen_ds_bpermutev64sf;
   4209  1.1  mrg       break;
   4210  1.1  mrg     case E_V64DImode:
   4211  1.1  mrg       ds_bpermute = gen_ds_bpermutev64di;
   4212  1.1  mrg       break;
   4213  1.1  mrg     case E_V64DFmode:
   4214  1.1  mrg       ds_bpermute = gen_ds_bpermutev64df;
   4215  1.1  mrg       break;
   4216  1.1  mrg     default:
   4217  1.1  mrg       gcc_assert (false);
   4218  1.1  mrg     }
   4219  1.1  mrg 
   4220  1.1  mrg   /* Load elements from src0 to dst.  */
   4221  1.1  mrg   gcc_assert (~src1_lanes);
   4222  1.1  mrg   emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ()));
   4223  1.1  mrg 
   4224  1.1  mrg   /* Load elements from src1 to dst.  */
   4225  1.1  mrg   if (src1_lanes)
   4226  1.1  mrg     {
   4227  1.1  mrg       /* Masking a lane masks both the destination and source lanes for
   4228  1.1  mrg          DS_BPERMUTE, so we need to have all lanes enabled for the permute,
   4229  1.1  mrg          then add an extra masked move to merge the results of permuting
   4230  1.1  mrg          the two source vectors together.
   4231  1.1  mrg        */
   4232  1.1  mrg       rtx tmp = gen_reg_rtx (vmode);
   4233  1.1  mrg       emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ()));
   4234  1.1  mrg       emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes)));
   4235  1.1  mrg     }
   4236  1.1  mrg 
   4237  1.1  mrg   return true;
   4238  1.1  mrg }
   4239  1.1  mrg 
   4240  1.1  mrg /* Implements TARGET_VECTOR_MODE_SUPPORTED_P.
   4241  1.1  mrg 
   4242  1.1  mrg    Return nonzero if vector MODE is supported with at least move
   4243  1.1  mrg    instructions.  */
   4244  1.1  mrg 
   4245  1.1  mrg static bool
   4246  1.1  mrg gcn_vector_mode_supported_p (machine_mode mode)
   4247  1.1  mrg {
   4248  1.1  mrg   return (mode == V64QImode || mode == V64HImode
   4249  1.1  mrg 	  || mode == V64SImode || mode == V64DImode
   4250  1.1  mrg 	  || mode == V64SFmode || mode == V64DFmode);
   4251  1.1  mrg }
   4252  1.1  mrg 
   4253  1.1  mrg /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
   4254  1.1  mrg 
   4255  1.1  mrg    Enables autovectorization for all supported modes.  */
   4256  1.1  mrg 
   4257  1.1  mrg static machine_mode
   4258  1.1  mrg gcn_vectorize_preferred_simd_mode (scalar_mode mode)
   4259  1.1  mrg {
   4260  1.1  mrg   switch (mode)
   4261  1.1  mrg     {
   4262  1.1  mrg     case E_QImode:
   4263  1.1  mrg       return V64QImode;
   4264  1.1  mrg     case E_HImode:
   4265  1.1  mrg       return V64HImode;
   4266  1.1  mrg     case E_SImode:
   4267  1.1  mrg       return V64SImode;
   4268  1.1  mrg     case E_DImode:
   4269  1.1  mrg       return V64DImode;
   4270  1.1  mrg     case E_SFmode:
   4271  1.1  mrg       return V64SFmode;
   4272  1.1  mrg     case E_DFmode:
   4273  1.1  mrg       return V64DFmode;
   4274  1.1  mrg     default:
   4275  1.1  mrg       return word_mode;
   4276  1.1  mrg     }
   4277  1.1  mrg }
   4278  1.1  mrg 
   4279  1.1  mrg /* Implement TARGET_VECTORIZE_RELATED_MODE.
   4280  1.1  mrg 
   4281  1.1  mrg    All GCN vectors are 64-lane, so this is simpler than other architectures.
   4282  1.1  mrg    In particular, we do *not* want to match vector bit-size.  */
   4283  1.1  mrg 
   4284  1.1  mrg static opt_machine_mode
   4285  1.1  mrg gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode),
   4286  1.1  mrg 			 scalar_mode element_mode, poly_uint64 nunits)
   4287  1.1  mrg {
   4288  1.1  mrg   if (known_ne (nunits, 0U) && known_ne (nunits, 64U))
   4289  1.1  mrg     return VOIDmode;
   4290  1.1  mrg 
   4291  1.1  mrg   machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode);
   4292  1.1  mrg   if (!VECTOR_MODE_P (pref_mode))
   4293  1.1  mrg     return VOIDmode;
   4294  1.1  mrg 
   4295  1.1  mrg   return pref_mode;
   4296  1.1  mrg }
   4297  1.1  mrg 
   4298  1.1  mrg /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
   4299  1.1  mrg 
   4300  1.1  mrg    Returns the preferred alignment in bits for accesses to vectors of type type
   4301  1.1  mrg    in vectorized code. This might be less than or greater than the ABI-defined
   4302  1.1  mrg    value returned by TARGET_VECTOR_ALIGNMENT. It can be equal to the alignment
   4303  1.1  mrg    of a single element, in which case the vectorizer will not try to optimize
   4304  1.1  mrg    for alignment.  */
   4305  1.1  mrg 
   4306  1.1  mrg static poly_uint64
   4307  1.1  mrg gcn_preferred_vector_alignment (const_tree type)
   4308  1.1  mrg {
   4309  1.1  mrg   return TYPE_ALIGN (TREE_TYPE (type));
   4310  1.1  mrg }
   4311  1.1  mrg 
   4312  1.1  mrg /* Implement TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT.
   4313  1.1  mrg 
   4314  1.1  mrg    Return true if the target supports misaligned vector store/load of a
   4315  1.1  mrg    specific factor denoted in the misalignment parameter.  */
   4316  1.1  mrg 
   4317  1.1  mrg static bool
   4318  1.1  mrg gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode),
   4319  1.1  mrg 					   const_tree type, int misalignment,
   4320  1.1  mrg 					   bool is_packed)
   4321  1.1  mrg {
   4322  1.1  mrg   if (is_packed)
   4323  1.1  mrg     return false;
   4324  1.1  mrg 
   4325  1.1  mrg   /* If the misalignment is unknown, we should be able to handle the access
   4326  1.1  mrg      so long as it is not to a member of a packed data structure.  */
   4327  1.1  mrg   if (misalignment == -1)
   4328  1.1  mrg     return true;
   4329  1.1  mrg 
   4330  1.1  mrg   /* Return true if the misalignment is a multiple of the natural alignment
   4331  1.1  mrg      of the vector's element type.  This is probably always going to be
   4332  1.1  mrg      true in practice, since we've already established that this isn't a
   4333  1.1  mrg      packed access.  */
   4334  1.1  mrg   return misalignment % TYPE_ALIGN_UNIT (type) == 0;
   4335  1.1  mrg }
   4336  1.1  mrg 
   4337  1.1  mrg /* Implement TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.
   4338  1.1  mrg 
   4339  1.1  mrg    Return true if vector alignment is reachable (by peeling N iterations) for
   4340  1.1  mrg    the given scalar type TYPE.  */
   4341  1.1  mrg 
   4342  1.1  mrg static bool
   4343  1.1  mrg gcn_vector_alignment_reachable (const_tree ARG_UNUSED (type), bool is_packed)
   4344  1.1  mrg {
   4345  1.1  mrg   /* Vectors which aren't in packed structures will not be less aligned than
   4346  1.1  mrg      the natural alignment of their element type, so this is safe.  */
   4347  1.1  mrg   return !is_packed;
   4348  1.1  mrg }
   4349  1.1  mrg 
   4350  1.1  mrg /* Generate DPP instructions used for vector reductions.
   4351  1.1  mrg 
   4352  1.1  mrg    The opcode is given by INSN.
   4353  1.1  mrg    The first operand of the operation is shifted right by SHIFT vector lanes.
   4354  1.1  mrg    SHIFT must be a power of 2.  If SHIFT is 16, the 15th lane of each row is
   4355  1.1  mrg    broadcast the next row (thereby acting like a shift of 16 for the end of
   4356  1.1  mrg    each row).  If SHIFT is 32, lane 31 is broadcast to all the
   4357  1.1  mrg    following lanes (thereby acting like a shift of 32 for lane 63).  */
   4358  1.1  mrg 
   4359  1.1  mrg char *
   4360  1.1  mrg gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
   4361  1.1  mrg 			 int unspec, int shift)
   4362  1.1  mrg {
   4363  1.1  mrg   static char buf[128];
   4364  1.1  mrg   const char *dpp;
   4365  1.1  mrg   const char *vcc_in = "";
   4366  1.1  mrg   const char *vcc_out = "";
   4367  1.1  mrg 
   4368  1.1  mrg   /* Add the vcc operand if needed.  */
   4369  1.1  mrg   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   4370  1.1  mrg     {
   4371  1.1  mrg       if (unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
   4372  1.1  mrg 	vcc_in = ", vcc";
   4373  1.1  mrg 
   4374  1.1  mrg       if (unspec == UNSPEC_PLUS_CARRY_DPP_SHR
   4375  1.1  mrg 	  || unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
   4376  1.1  mrg 	vcc_out = ", vcc";
   4377  1.1  mrg     }
   4378  1.1  mrg 
   4379  1.1  mrg   /* Add the DPP modifiers.  */
   4380  1.1  mrg   switch (shift)
   4381  1.1  mrg     {
   4382  1.1  mrg     case 1:
   4383  1.1  mrg       dpp = "row_shr:1 bound_ctrl:0";
   4384  1.1  mrg       break;
   4385  1.1  mrg     case 2:
   4386  1.1  mrg       dpp = "row_shr:2 bound_ctrl:0";
   4387  1.1  mrg       break;
   4388  1.1  mrg     case 4:
   4389  1.1  mrg       dpp = "row_shr:4 bank_mask:0xe";
   4390  1.1  mrg       break;
   4391  1.1  mrg     case 8:
   4392  1.1  mrg       dpp = "row_shr:8 bank_mask:0xc";
   4393  1.1  mrg       break;
   4394  1.1  mrg     case 16:
   4395  1.1  mrg       dpp = "row_bcast:15 row_mask:0xa";
   4396  1.1  mrg       break;
   4397  1.1  mrg     case 32:
   4398  1.1  mrg       dpp = "row_bcast:31 row_mask:0xc";
   4399  1.1  mrg       break;
   4400  1.1  mrg     default:
   4401  1.1  mrg       gcc_unreachable ();
   4402  1.1  mrg     }
   4403  1.1  mrg 
   4404  1.1  mrg   if (unspec == UNSPEC_MOV_DPP_SHR && vgpr_2reg_mode_p (mode))
   4405  1.1  mrg     sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
   4406  1.1  mrg 	     insn, dpp, insn, dpp);
   4407  1.1  mrg   else if (unspec == UNSPEC_MOV_DPP_SHR)
   4408  1.1  mrg     sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp);
   4409  1.1  mrg   else
   4410  1.1  mrg     sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp);
   4411  1.1  mrg 
   4412  1.1  mrg   return buf;
   4413  1.1  mrg }
   4414  1.1  mrg 
   4415  1.1  mrg /* Generate vector reductions in terms of DPP instructions.
   4416  1.1  mrg 
   4417  1.1  mrg    The vector register SRC of mode MODE is reduced using the operation given
   4418  1.1  mrg    by UNSPEC, and the scalar result is returned in lane 63 of a vector
   4419  1.1  mrg    register.  */
   4420  1.1  mrg 
   4421  1.1  mrg rtx
   4422  1.1  mrg gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
   4423  1.1  mrg {
   4424  1.1  mrg   machine_mode orig_mode = mode;
   4425  1.1  mrg   bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR
   4426  1.1  mrg 		      || unspec == UNSPEC_SMAX_DPP_SHR
   4427  1.1  mrg 		      || unspec == UNSPEC_UMIN_DPP_SHR
   4428  1.1  mrg 		      || unspec == UNSPEC_UMAX_DPP_SHR)
   4429  1.1  mrg 		     && (mode == V64DImode
   4430  1.1  mrg 			 || mode == V64DFmode))
   4431  1.1  mrg 		    || (unspec == UNSPEC_PLUS_DPP_SHR
   4432  1.1  mrg 			&& mode == V64DFmode));
   4433  1.1  mrg   rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
   4434  1.1  mrg 		   : unspec == UNSPEC_SMAX_DPP_SHR ? SMAX
   4435  1.1  mrg 		   : unspec == UNSPEC_UMIN_DPP_SHR ? UMIN
   4436  1.1  mrg 		   : unspec == UNSPEC_UMAX_DPP_SHR ? UMAX
   4437  1.1  mrg 		   : unspec == UNSPEC_PLUS_DPP_SHR ? PLUS
   4438  1.1  mrg 		   : UNKNOWN);
   4439  1.1  mrg   bool use_extends = ((unspec == UNSPEC_SMIN_DPP_SHR
   4440  1.1  mrg 		       || unspec == UNSPEC_SMAX_DPP_SHR
   4441  1.1  mrg 		       || unspec == UNSPEC_UMIN_DPP_SHR
   4442  1.1  mrg 		       || unspec == UNSPEC_UMAX_DPP_SHR)
   4443  1.1  mrg 		      && (mode == V64QImode
   4444  1.1  mrg 			  || mode == V64HImode));
   4445  1.1  mrg   bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR
   4446  1.1  mrg 		    || unspec == UNSPEC_UMAX_DPP_SHR);
   4447  1.1  mrg   bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
   4448  1.1  mrg 			&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   4449  1.1  mrg 			&& (TARGET_GCN3 || mode == V64DImode);
   4450  1.1  mrg 
   4451  1.1  mrg   if (use_plus_carry)
   4452  1.1  mrg     unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
   4453  1.1  mrg 
   4454  1.1  mrg   if (use_extends)
   4455  1.1  mrg     {
   4456  1.1  mrg       rtx tmp = gen_reg_rtx (V64SImode);
   4457  1.1  mrg       convert_move (tmp, src, unsignedp);
   4458  1.1  mrg       src = tmp;
   4459  1.1  mrg       mode = V64SImode;
   4460  1.1  mrg     }
   4461  1.1  mrg 
   4462  1.1  mrg   /* Perform reduction by first performing the reduction operation on every
   4463  1.1  mrg      pair of lanes, then on every pair of results from the previous
   4464  1.1  mrg      iteration (thereby effectively reducing every 4 lanes) and so on until
   4465  1.1  mrg      all lanes are reduced.  */
   4466  1.1  mrg   rtx in, out = force_reg (mode, src);
   4467  1.1  mrg   for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
   4468  1.1  mrg     {
   4469  1.1  mrg       rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
   4470  1.1  mrg       in = out;
   4471  1.1  mrg       out = gen_reg_rtx (mode);
   4472  1.1  mrg 
   4473  1.1  mrg       if (use_moves)
   4474  1.1  mrg 	{
   4475  1.1  mrg 	  rtx tmp = gen_reg_rtx (mode);
   4476  1.1  mrg 	  emit_insn (gen_dpp_move (mode, tmp, in, shift_val));
   4477  1.1  mrg 	  emit_insn (gen_rtx_SET (out, gen_rtx_fmt_ee (code, mode, tmp, in)));
   4478  1.1  mrg 	}
   4479  1.1  mrg       else
   4480  1.1  mrg 	{
   4481  1.1  mrg 	  rtx insn = gen_rtx_SET (out,
   4482  1.1  mrg 				  gen_rtx_UNSPEC (mode,
   4483  1.1  mrg 						  gen_rtvec (3, in, in,
   4484  1.1  mrg 							     shift_val),
   4485  1.1  mrg 						  unspec));
   4486  1.1  mrg 
   4487  1.1  mrg 	  /* Add clobber for instructions that set the carry flags.  */
   4488  1.1  mrg 	  if (use_plus_carry)
   4489  1.1  mrg 	    {
   4490  1.1  mrg 	      rtx clobber = gen_rtx_CLOBBER (VOIDmode,
   4491  1.1  mrg 					     gen_rtx_REG (DImode, VCC_REG));
   4492  1.1  mrg 	      insn = gen_rtx_PARALLEL (VOIDmode,
   4493  1.1  mrg 				       gen_rtvec (2, insn, clobber));
   4494  1.1  mrg 	    }
   4495  1.1  mrg 
   4496  1.1  mrg 	  emit_insn (insn);
   4497  1.1  mrg 	}
   4498  1.1  mrg     }
   4499  1.1  mrg 
   4500  1.1  mrg   if (use_extends)
   4501  1.1  mrg     {
   4502  1.1  mrg       rtx tmp = gen_reg_rtx (orig_mode);
   4503  1.1  mrg       convert_move (tmp, out, unsignedp);
   4504  1.1  mrg       out = tmp;
   4505  1.1  mrg     }
   4506  1.1  mrg 
   4507  1.1  mrg   return out;
   4508  1.1  mrg }
   4509  1.1  mrg 
   4510  1.1  mrg /* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST.  */
   4511  1.1  mrg 
   4512  1.1  mrg int
   4513  1.1  mrg gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
   4514  1.1  mrg 			tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
   4515  1.1  mrg {
   4516  1.1  mrg   /* Always vectorize.  */
   4517  1.1  mrg   return 1;
   4518  1.1  mrg }
   4519  1.1  mrg 
   4520  1.1  mrg /* }}}  */
   4521  1.1  mrg /* {{{ md_reorg pass.  */
   4522  1.1  mrg 
   4523  1.1  mrg /* Identify VMEM instructions from their "type" attribute.  */
   4524  1.1  mrg 
   4525  1.1  mrg static bool
   4526  1.1  mrg gcn_vmem_insn_p (attr_type type)
   4527  1.1  mrg {
   4528  1.1  mrg   switch (type)
   4529  1.1  mrg     {
   4530  1.1  mrg     case TYPE_MUBUF:
   4531  1.1  mrg     case TYPE_MTBUF:
   4532  1.1  mrg     case TYPE_FLAT:
   4533  1.1  mrg       return true;
   4534  1.1  mrg     case TYPE_UNKNOWN:
   4535  1.1  mrg     case TYPE_SOP1:
   4536  1.1  mrg     case TYPE_SOP2:
   4537  1.1  mrg     case TYPE_SOPK:
   4538  1.1  mrg     case TYPE_SOPC:
   4539  1.1  mrg     case TYPE_SOPP:
   4540  1.1  mrg     case TYPE_SMEM:
   4541  1.1  mrg     case TYPE_DS:
   4542  1.1  mrg     case TYPE_VOP2:
   4543  1.1  mrg     case TYPE_VOP1:
   4544  1.1  mrg     case TYPE_VOPC:
   4545  1.1  mrg     case TYPE_VOP3A:
   4546  1.1  mrg     case TYPE_VOP3B:
   4547  1.1  mrg     case TYPE_VOP_SDWA:
   4548  1.1  mrg     case TYPE_VOP_DPP:
   4549  1.1  mrg     case TYPE_MULT:
   4550  1.1  mrg     case TYPE_VMULT:
   4551  1.1  mrg       return false;
   4552  1.1  mrg     }
   4553  1.1  mrg   gcc_unreachable ();
   4554  1.1  mrg   return false;
   4555  1.1  mrg }
   4556  1.1  mrg 
   4557  1.1  mrg /* If INSN sets the EXEC register to a constant value, return the value,
   4558  1.1  mrg    otherwise return zero.  */
   4559  1.1  mrg 
   4560  1.1  mrg static int64_t
   4561  1.1  mrg gcn_insn_exec_value (rtx_insn *insn)
   4562  1.1  mrg {
   4563  1.1  mrg   if (!NONDEBUG_INSN_P (insn))
   4564  1.1  mrg     return 0;
   4565  1.1  mrg 
   4566  1.1  mrg   rtx pattern = PATTERN (insn);
   4567  1.1  mrg 
   4568  1.1  mrg   if (GET_CODE (pattern) == SET)
   4569  1.1  mrg     {
   4570  1.1  mrg       rtx dest = XEXP (pattern, 0);
   4571  1.1  mrg       rtx src = XEXP (pattern, 1);
   4572  1.1  mrg 
   4573  1.1  mrg       if (GET_MODE (dest) == DImode
   4574  1.1  mrg 	  && REG_P (dest) && REGNO (dest) == EXEC_REG
   4575  1.1  mrg 	  && CONST_INT_P (src))
   4576  1.1  mrg 	return INTVAL (src);
   4577  1.1  mrg     }
   4578  1.1  mrg 
   4579  1.1  mrg   return 0;
   4580  1.1  mrg }
   4581  1.1  mrg 
   4582  1.1  mrg /* Sets the EXEC register before INSN to the value that it had after
   4583  1.1  mrg    LAST_EXEC_DEF.  The constant value of the EXEC register is returned if
   4584  1.1  mrg    known, otherwise it returns zero.  */
   4585  1.1  mrg 
   4586  1.1  mrg static int64_t
   4587  1.1  mrg gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
   4588  1.1  mrg 		  bool curr_exec_known, bool &last_exec_def_saved)
   4589  1.1  mrg {
   4590  1.1  mrg   rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
   4591  1.1  mrg   rtx exec;
   4592  1.1  mrg 
   4593  1.1  mrg   int64_t exec_value = gcn_insn_exec_value (last_exec_def);
   4594  1.1  mrg 
   4595  1.1  mrg   if (exec_value)
   4596  1.1  mrg     {
   4597  1.1  mrg       /* If the EXEC value is a constant and it happens to be the same as the
   4598  1.1  mrg          current EXEC value, the restore can be skipped.  */
   4599  1.1  mrg       if (curr_exec_known && exec_value == curr_exec)
   4600  1.1  mrg 	return exec_value;
   4601  1.1  mrg 
   4602  1.1  mrg       exec = GEN_INT (exec_value);
   4603  1.1  mrg     }
   4604  1.1  mrg   else
   4605  1.1  mrg     {
   4606  1.1  mrg       /* If the EXEC value is not a constant, save it in a register after the
   4607  1.1  mrg 	 point of definition.  */
   4608  1.1  mrg       rtx exec_save_reg = gen_rtx_REG (DImode, EXEC_SAVE_REG);
   4609  1.1  mrg 
   4610  1.1  mrg       if (!last_exec_def_saved)
   4611  1.1  mrg 	{
   4612  1.1  mrg 	  start_sequence ();
   4613  1.1  mrg 	  emit_move_insn (exec_save_reg, exec_reg);
   4614  1.1  mrg 	  rtx_insn *seq = get_insns ();
   4615  1.1  mrg 	  end_sequence ();
   4616  1.1  mrg 
   4617  1.1  mrg 	  emit_insn_after (seq, last_exec_def);
   4618  1.1  mrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
   4619  1.1  mrg 	    fprintf (dump_file, "Saving EXEC after insn %d.\n",
   4620  1.1  mrg 		     INSN_UID (last_exec_def));
   4621  1.1  mrg 
   4622  1.1  mrg 	  last_exec_def_saved = true;
   4623  1.1  mrg 	}
   4624  1.1  mrg 
   4625  1.1  mrg       exec = exec_save_reg;
   4626  1.1  mrg     }
   4627  1.1  mrg 
   4628  1.1  mrg   /* Restore EXEC register before the usage.  */
   4629  1.1  mrg   start_sequence ();
   4630  1.1  mrg   emit_move_insn (exec_reg, exec);
   4631  1.1  mrg   rtx_insn *seq = get_insns ();
   4632  1.1  mrg   end_sequence ();
   4633  1.1  mrg   emit_insn_before (seq, insn);
   4634  1.1  mrg 
   4635  1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
   4636  1.1  mrg     {
   4637  1.1  mrg       if (exec_value)
   4638  1.1  mrg 	fprintf (dump_file, "Restoring EXEC to %ld before insn %d.\n",
   4639  1.1  mrg 		 exec_value, INSN_UID (insn));
   4640  1.1  mrg       else
   4641  1.1  mrg 	fprintf (dump_file,
   4642  1.1  mrg 		 "Restoring EXEC from saved value before insn %d.\n",
   4643  1.1  mrg 		 INSN_UID (insn));
   4644  1.1  mrg     }
   4645  1.1  mrg 
   4646  1.1  mrg   return exec_value;
   4647  1.1  mrg }
   4648  1.1  mrg 
   4649  1.1  mrg /* Implement TARGET_MACHINE_DEPENDENT_REORG.
   4650  1.1  mrg 
   4651  1.1  mrg    Ensure that pipeline dependencies and lane masking are set correctly.  */
   4652  1.1  mrg 
   4653  1.1  mrg static void
   4654  1.1  mrg gcn_md_reorg (void)
   4655  1.1  mrg {
   4656  1.1  mrg   basic_block bb;
   4657  1.1  mrg   rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
   4658  1.1  mrg   regset_head live;
   4659  1.1  mrg 
   4660  1.1  mrg   INIT_REG_SET (&live);
   4661  1.1  mrg 
   4662  1.1  mrg   compute_bb_for_insn ();
   4663  1.1  mrg 
   4664  1.1  mrg   if (!optimize)
   4665  1.1  mrg     {
   4666  1.1  mrg       split_all_insns ();
   4667  1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
   4668  1.1  mrg 	{
   4669  1.1  mrg 	  fprintf (dump_file, "After split:\n");
   4670  1.1  mrg 	  print_rtl_with_bb (dump_file, get_insns (), dump_flags);
   4671  1.1  mrg 	}
   4672  1.1  mrg 
   4673  1.1  mrg       /* Update data-flow information for split instructions.  */
   4674  1.1  mrg       df_insn_rescan_all ();
   4675  1.1  mrg     }
   4676  1.1  mrg 
   4677  1.1  mrg   df_live_add_problem ();
   4678  1.1  mrg   df_live_set_all_dirty ();
   4679  1.1  mrg   df_analyze ();
   4680  1.1  mrg 
   4681  1.1  mrg   /* This pass ensures that the EXEC register is set correctly, according
   4682  1.1  mrg      to the "exec" attribute.  However, care must be taken so that the
   4683  1.1  mrg      value that reaches explicit uses of the EXEC register remains the
   4684  1.1  mrg      same as before.
   4685  1.1  mrg    */
   4686  1.1  mrg 
   4687  1.1  mrg   FOR_EACH_BB_FN (bb, cfun)
   4688  1.1  mrg     {
   4689  1.1  mrg       if (dump_file && (dump_flags & TDF_DETAILS))
   4690  1.1  mrg 	fprintf (dump_file, "BB %d:\n", bb->index);
   4691  1.1  mrg 
   4692  1.1  mrg       rtx_insn *insn, *curr;
   4693  1.1  mrg       rtx_insn *last_exec_def = BB_HEAD (bb);
   4694  1.1  mrg       bool last_exec_def_saved = false;
   4695  1.1  mrg       bool curr_exec_explicit = true;
   4696  1.1  mrg       bool curr_exec_known = true;
   4697  1.1  mrg       int64_t curr_exec = 0;	/* 0 here means 'the value is that of EXEC
   4698  1.1  mrg 				   after last_exec_def is executed'.  */
   4699  1.1  mrg 
   4700  1.1  mrg       bitmap live_in = DF_LR_IN (bb);
   4701  1.1  mrg       bool exec_live_on_entry = false;
   4702  1.1  mrg       if (bitmap_bit_p (live_in, EXEC_LO_REG)
   4703  1.1  mrg 	  || bitmap_bit_p (live_in, EXEC_HI_REG))
   4704  1.1  mrg 	{
   4705  1.1  mrg 	  if (dump_file)
   4706  1.1  mrg 	    fprintf (dump_file, "EXEC reg is live on entry to block %d\n",
   4707  1.1  mrg 		     (int) bb->index);
   4708  1.1  mrg 	  exec_live_on_entry = true;
   4709  1.1  mrg 	}
   4710  1.1  mrg 
   4711  1.1  mrg       FOR_BB_INSNS_SAFE (bb, insn, curr)
   4712  1.1  mrg 	{
   4713  1.1  mrg 	  if (!NONDEBUG_INSN_P (insn))
   4714  1.1  mrg 	    continue;
   4715  1.1  mrg 
   4716  1.1  mrg 	  if (GET_CODE (PATTERN (insn)) == USE
   4717  1.1  mrg 	      || GET_CODE (PATTERN (insn)) == CLOBBER)
   4718  1.1  mrg 	    continue;
   4719  1.1  mrg 
   4720  1.1  mrg 	  HARD_REG_SET defs, uses;
   4721  1.1  mrg 	  CLEAR_HARD_REG_SET (defs);
   4722  1.1  mrg 	  CLEAR_HARD_REG_SET (uses);
   4723  1.1  mrg 	  note_stores (insn, record_hard_reg_sets, &defs);
   4724  1.1  mrg 	  note_uses (&PATTERN (insn), record_hard_reg_uses, &uses);
   4725  1.1  mrg 
   4726  1.1  mrg 	  bool exec_lo_def_p = TEST_HARD_REG_BIT (defs, EXEC_LO_REG);
   4727  1.1  mrg 	  bool exec_hi_def_p = TEST_HARD_REG_BIT (defs, EXEC_HI_REG);
   4728  1.1  mrg 	  bool exec_used = (hard_reg_set_intersect_p
   4729  1.1  mrg 			    (uses, reg_class_contents[(int) EXEC_MASK_REG])
   4730  1.1  mrg 			    || TEST_HARD_REG_BIT (uses, EXECZ_REG));
   4731  1.1  mrg 
   4732  1.1  mrg 	  /* Check the instruction for implicit setting of EXEC via an
   4733  1.1  mrg 	     attribute.  */
   4734  1.1  mrg 	  attr_exec exec_attr = get_attr_exec (insn);
   4735  1.1  mrg 	  int64_t new_exec;
   4736  1.1  mrg 
   4737  1.1  mrg 	  switch (exec_attr)
   4738  1.1  mrg 	    {
   4739  1.1  mrg 	    case EXEC_NONE:
   4740  1.1  mrg 	      new_exec = 0;
   4741  1.1  mrg 	      break;
   4742  1.1  mrg 
   4743  1.1  mrg 	    case EXEC_SINGLE:
   4744  1.1  mrg 	      /* Instructions that do not involve memory accesses only require
   4745  1.1  mrg 		 bit 0 of EXEC to be set.  */
   4746  1.1  mrg 	      if (gcn_vmem_insn_p (get_attr_type (insn))
   4747  1.1  mrg 		  || get_attr_type (insn) == TYPE_DS)
   4748  1.1  mrg 		new_exec = 1;
   4749  1.1  mrg 	      else
   4750  1.1  mrg 		new_exec = curr_exec | 1;
   4751  1.1  mrg 	      break;
   4752  1.1  mrg 
   4753  1.1  mrg 	    case EXEC_FULL:
   4754  1.1  mrg 	      new_exec = -1;
   4755  1.1  mrg 	      break;
   4756  1.1  mrg 
   4757  1.1  mrg 	    default:  /* Auto-detect what setting is appropriate.  */
   4758  1.1  mrg 	      {
   4759  1.1  mrg 	        new_exec = 0;
   4760  1.1  mrg 
   4761  1.1  mrg 		/* If EXEC is referenced explicitly then we don't need to do
   4762  1.1  mrg 		   anything to set it, so we're done.  */
   4763  1.1  mrg 		if (exec_used)
   4764  1.1  mrg 		  break;
   4765  1.1  mrg 
   4766  1.1  mrg 		/* Scan the insn for VGPRs defs or uses.  The mode determines
   4767  1.1  mrg 		   what kind of exec is needed.  */
   4768  1.1  mrg 		subrtx_iterator::array_type array;
   4769  1.1  mrg 		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
   4770  1.1  mrg 		  {
   4771  1.1  mrg 		    const_rtx x = *iter;
   4772  1.1  mrg 		    if (REG_P (x) && VGPR_REGNO_P (REGNO (x)))
   4773  1.1  mrg 		      {
   4774  1.1  mrg 			if (VECTOR_MODE_P (GET_MODE (x)))
   4775  1.1  mrg 			  {
   4776  1.1  mrg 			    new_exec = -1;
   4777  1.1  mrg 			    break;
   4778  1.1  mrg 			  }
   4779  1.1  mrg 			else
   4780  1.1  mrg 			  new_exec = 1;
   4781  1.1  mrg 		      }
   4782  1.1  mrg 		  }
   4783  1.1  mrg 	        }
   4784  1.1  mrg 	      break;
   4785  1.1  mrg 	    }
   4786  1.1  mrg 
   4787  1.1  mrg 	  if (new_exec && (!curr_exec_known || new_exec != curr_exec))
   4788  1.1  mrg 	    {
   4789  1.1  mrg 	      start_sequence ();
   4790  1.1  mrg 	      emit_move_insn (exec_reg, GEN_INT (new_exec));
   4791  1.1  mrg 	      rtx_insn *seq = get_insns ();
   4792  1.1  mrg 	      end_sequence ();
   4793  1.1  mrg 	      emit_insn_before (seq, insn);
   4794  1.1  mrg 
   4795  1.1  mrg 	      if (dump_file && (dump_flags & TDF_DETAILS))
   4796  1.1  mrg 		fprintf (dump_file, "Setting EXEC to %ld before insn %d.\n",
   4797  1.1  mrg 			 new_exec, INSN_UID (insn));
   4798  1.1  mrg 
   4799  1.1  mrg 	      curr_exec = new_exec;
   4800  1.1  mrg 	      curr_exec_explicit = false;
   4801  1.1  mrg 	      curr_exec_known = true;
   4802  1.1  mrg 	    }
   4803  1.1  mrg 	  else if (new_exec && dump_file && (dump_flags & TDF_DETAILS))
   4804  1.1  mrg 	    {
   4805  1.1  mrg 	      fprintf (dump_file, "Exec already is %ld before insn %d.\n",
   4806  1.1  mrg 		       new_exec, INSN_UID (insn));
   4807  1.1  mrg 	    }
   4808  1.1  mrg 
   4809  1.1  mrg 	  /* The state of the EXEC register is unknown after a
   4810  1.1  mrg 	     function call.  */
   4811  1.1  mrg 	  if (CALL_P (insn))
   4812  1.1  mrg 	    curr_exec_known = false;
   4813  1.1  mrg 
   4814  1.1  mrg 	  /* Handle explicit uses of EXEC.  If the instruction is a partial
   4815  1.1  mrg 	     explicit definition of EXEC, then treat it as an explicit use of
   4816  1.1  mrg 	     EXEC as well.  */
   4817  1.1  mrg 	  if (exec_used || exec_lo_def_p != exec_hi_def_p)
   4818  1.1  mrg 	    {
   4819  1.1  mrg 	      /* An instruction that explicitly uses EXEC should not also
   4820  1.1  mrg 		 implicitly define it.  */
   4821  1.1  mrg 	      gcc_assert (!exec_used || !new_exec);
   4822  1.1  mrg 
   4823  1.1  mrg 	      if (!curr_exec_known || !curr_exec_explicit)
   4824  1.1  mrg 		{
   4825  1.1  mrg 		  /* Restore the previous explicitly defined value.  */
   4826  1.1  mrg 		  curr_exec = gcn_restore_exec (insn, last_exec_def,
   4827  1.1  mrg 						curr_exec, curr_exec_known,
   4828  1.1  mrg 						last_exec_def_saved);
   4829  1.1  mrg 		  curr_exec_explicit = true;
   4830  1.1  mrg 		  curr_exec_known = true;
   4831  1.1  mrg 		}
   4832  1.1  mrg 	    }
   4833  1.1  mrg 
   4834  1.1  mrg 	  /* Handle explicit definitions of EXEC.  */
   4835  1.1  mrg 	  if (exec_lo_def_p || exec_hi_def_p)
   4836  1.1  mrg 	    {
   4837  1.1  mrg 	      last_exec_def = insn;
   4838  1.1  mrg 	      last_exec_def_saved = false;
   4839  1.1  mrg 	      curr_exec = gcn_insn_exec_value (insn);
   4840  1.1  mrg 	      curr_exec_explicit = true;
   4841  1.1  mrg 	      curr_exec_known = true;
   4842  1.1  mrg 
   4843  1.1  mrg 	      if (dump_file && (dump_flags & TDF_DETAILS))
   4844  1.1  mrg 		fprintf (dump_file,
   4845  1.1  mrg 			 "Found %s definition of EXEC at insn %d.\n",
   4846  1.1  mrg 			 exec_lo_def_p == exec_hi_def_p ? "full" : "partial",
   4847  1.1  mrg 			 INSN_UID (insn));
   4848  1.1  mrg 	    }
   4849  1.1  mrg 
   4850  1.1  mrg 	  exec_live_on_entry = false;
   4851  1.1  mrg 	}
   4852  1.1  mrg 
   4853  1.1  mrg       COPY_REG_SET (&live, DF_LR_OUT (bb));
   4854  1.1  mrg       df_simulate_initialize_backwards (bb, &live);
   4855  1.1  mrg 
   4856  1.1  mrg       /* If EXEC is live after the basic block, restore the value of EXEC
   4857  1.1  mrg 	 at the end of the block.  */
   4858  1.1  mrg       if ((REGNO_REG_SET_P (&live, EXEC_LO_REG)
   4859  1.1  mrg 	   || REGNO_REG_SET_P (&live, EXEC_HI_REG))
   4860  1.1  mrg 	  && (!curr_exec_known || !curr_exec_explicit || exec_live_on_entry))
   4861  1.1  mrg 	{
   4862  1.1  mrg 	  rtx_insn *end_insn = BB_END (bb);
   4863  1.1  mrg 
   4864  1.1  mrg 	  /* If the instruction is not a jump instruction, do the restore
   4865  1.1  mrg 	     after the last instruction in the basic block.  */
   4866  1.1  mrg 	  if (NONJUMP_INSN_P (end_insn))
   4867  1.1  mrg 	    end_insn = NEXT_INSN (end_insn);
   4868  1.1  mrg 
   4869  1.1  mrg 	  gcn_restore_exec (end_insn, last_exec_def, curr_exec,
   4870  1.1  mrg 			    curr_exec_known, last_exec_def_saved);
   4871  1.1  mrg 	}
   4872  1.1  mrg     }
   4873  1.1  mrg 
   4874  1.1  mrg   CLEAR_REG_SET (&live);
   4875  1.1  mrg 
   4876  1.1  mrg   /* "Manually Inserted Wait States (NOPs)."
   4877  1.1  mrg 
   4878  1.1  mrg      GCN hardware detects most kinds of register dependencies, but there
   4879  1.1  mrg      are some exceptions documented in the ISA manual.  This pass
   4880  1.1  mrg      detects the missed cases, and inserts the documented number of NOPs
   4881  1.1  mrg      required for correct execution.  */
   4882  1.1  mrg 
   4883  1.1  mrg   const int max_waits = 5;
   4884  1.1  mrg   struct ilist
   4885  1.1  mrg   {
   4886  1.1  mrg     rtx_insn *insn;
   4887  1.1  mrg     attr_unit unit;
   4888  1.1  mrg     attr_delayeduse delayeduse;
   4889  1.1  mrg     HARD_REG_SET writes;
   4890  1.1  mrg     HARD_REG_SET reads;
   4891  1.1  mrg     int age;
   4892  1.1  mrg   } back[max_waits];
   4893  1.1  mrg   int oldest = 0;
   4894  1.1  mrg   for (int i = 0; i < max_waits; i++)
   4895  1.1  mrg     back[i].insn = NULL;
   4896  1.1  mrg 
   4897  1.1  mrg   rtx_insn *insn, *last_insn = NULL;
   4898  1.1  mrg   for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
   4899  1.1  mrg     {
   4900  1.1  mrg       if (!NONDEBUG_INSN_P (insn))
   4901  1.1  mrg 	continue;
   4902  1.1  mrg 
   4903  1.1  mrg       if (GET_CODE (PATTERN (insn)) == USE
   4904  1.1  mrg 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
   4905  1.1  mrg 	continue;
   4906  1.1  mrg 
   4907  1.1  mrg       attr_type itype = get_attr_type (insn);
   4908  1.1  mrg       attr_unit iunit = get_attr_unit (insn);
   4909  1.1  mrg       attr_delayeduse idelayeduse = get_attr_delayeduse (insn);
   4910  1.1  mrg       HARD_REG_SET ireads, iwrites;
   4911  1.1  mrg       CLEAR_HARD_REG_SET (ireads);
   4912  1.1  mrg       CLEAR_HARD_REG_SET (iwrites);
   4913  1.1  mrg       note_stores (insn, record_hard_reg_sets, &iwrites);
   4914  1.1  mrg       note_uses (&PATTERN (insn), record_hard_reg_uses, &ireads);
   4915  1.1  mrg 
   4916  1.1  mrg       /* Scan recent previous instructions for dependencies not handled in
   4917  1.1  mrg          hardware.  */
   4918  1.1  mrg       int nops_rqd = 0;
   4919  1.1  mrg       for (int i = oldest; i < oldest + max_waits; i++)
   4920  1.1  mrg 	{
   4921  1.1  mrg 	  struct ilist *prev_insn = &back[i % max_waits];
   4922  1.1  mrg 
   4923  1.1  mrg 	  if (!prev_insn->insn)
   4924  1.1  mrg 	    continue;
   4925  1.1  mrg 
   4926  1.1  mrg 	  /* VALU writes SGPR followed by VMEM reading the same SGPR
   4927  1.1  mrg 	     requires 5 wait states.  */
   4928  1.1  mrg 	  if ((prev_insn->age + nops_rqd) < 5
   4929  1.1  mrg 	      && prev_insn->unit == UNIT_VECTOR
   4930  1.1  mrg 	      && gcn_vmem_insn_p (itype))
   4931  1.1  mrg 	    {
   4932  1.1  mrg 	      HARD_REG_SET regs = prev_insn->writes & ireads;
   4933  1.1  mrg 	      if (hard_reg_set_intersect_p
   4934  1.1  mrg 		  (regs, reg_class_contents[(int) SGPR_REGS]))
   4935  1.1  mrg 		nops_rqd = 5 - prev_insn->age;
   4936  1.1  mrg 	    }
   4937  1.1  mrg 
   4938  1.1  mrg 	  /* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ
   4939  1.1  mrg 	     requires 5 wait states.  */
   4940  1.1  mrg 	  if ((prev_insn->age + nops_rqd) < 5
   4941  1.1  mrg 	      && prev_insn->unit == UNIT_VECTOR
   4942  1.1  mrg 	      && iunit == UNIT_VECTOR
   4943  1.1  mrg 	      && ((hard_reg_set_intersect_p
   4944  1.1  mrg 		   (prev_insn->writes,
   4945  1.1  mrg 		    reg_class_contents[(int) EXEC_MASK_REG])
   4946  1.1  mrg 		   && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
   4947  1.1  mrg 		  ||
   4948  1.1  mrg 		  (hard_reg_set_intersect_p
   4949  1.1  mrg 		   (prev_insn->writes,
   4950  1.1  mrg 		    reg_class_contents[(int) VCC_CONDITIONAL_REG])
   4951  1.1  mrg 		   && TEST_HARD_REG_BIT (ireads, VCCZ_REG))))
   4952  1.1  mrg 	    nops_rqd = 5 - prev_insn->age;
   4953  1.1  mrg 
   4954  1.1  mrg 	  /* VALU writes SGPR/VCC followed by v_{read,write}lane using
   4955  1.1  mrg 	     SGPR/VCC as lane select requires 4 wait states.  */
   4956  1.1  mrg 	  if ((prev_insn->age + nops_rqd) < 4
   4957  1.1  mrg 	      && prev_insn->unit == UNIT_VECTOR
   4958  1.1  mrg 	      && get_attr_laneselect (insn) == LANESELECT_YES)
   4959  1.1  mrg 	    {
   4960  1.1  mrg 	      HARD_REG_SET regs = prev_insn->writes & ireads;
   4961  1.1  mrg 	      if (hard_reg_set_intersect_p
   4962  1.1  mrg 		  (regs, reg_class_contents[(int) SGPR_REGS])
   4963  1.1  mrg 		  || hard_reg_set_intersect_p
   4964  1.1  mrg 		     (regs, reg_class_contents[(int) VCC_CONDITIONAL_REG]))
   4965  1.1  mrg 		nops_rqd = 4 - prev_insn->age;
   4966  1.1  mrg 	    }
   4967  1.1  mrg 
   4968  1.1  mrg 	  /* VALU writes VGPR followed by VALU_DPP reading that VGPR
   4969  1.1  mrg 	     requires 2 wait states.  */
   4970  1.1  mrg 	  if ((prev_insn->age + nops_rqd) < 2
   4971  1.1  mrg 	      && prev_insn->unit == UNIT_VECTOR
   4972  1.1  mrg 	      && itype == TYPE_VOP_DPP)
   4973  1.1  mrg 	    {
   4974  1.1  mrg 	      HARD_REG_SET regs = prev_insn->writes & ireads;
   4975  1.1  mrg 	      if (hard_reg_set_intersect_p
   4976  1.1  mrg 		  (regs, reg_class_contents[(int) VGPR_REGS]))
   4977  1.1  mrg 		nops_rqd = 2 - prev_insn->age;
   4978  1.1  mrg 	    }
   4979  1.1  mrg 
   4980  1.1  mrg 	  /* Store that requires input registers are not overwritten by
   4981  1.1  mrg 	     following instruction.  */
   4982  1.1  mrg 	  if ((prev_insn->age + nops_rqd) < 1
   4983  1.1  mrg 	      && prev_insn->delayeduse == DELAYEDUSE_YES
   4984  1.1  mrg 	      && ((hard_reg_set_intersect_p
   4985  1.1  mrg 		   (prev_insn->reads, iwrites))))
   4986  1.1  mrg 	    nops_rqd = 1 - prev_insn->age;
   4987  1.1  mrg 	}
   4988  1.1  mrg 
   4989  1.1  mrg       /* Insert the required number of NOPs.  */
   4990  1.1  mrg       for (int i = nops_rqd; i > 0; i--)
   4991  1.1  mrg 	emit_insn_after (gen_nop (), last_insn);
   4992  1.1  mrg 
   4993  1.1  mrg       /* Age the previous instructions.  We can also ignore writes to
   4994  1.1  mrg          registers subsequently overwritten.  */
   4995  1.1  mrg       HARD_REG_SET written;
   4996  1.1  mrg       CLEAR_HARD_REG_SET (written);
   4997  1.1  mrg       for (int i = oldest + max_waits - 1; i > oldest; i--)
   4998  1.1  mrg 	{
   4999  1.1  mrg 	  struct ilist *prev_insn = &back[i % max_waits];
   5000  1.1  mrg 
   5001  1.1  mrg 	  /* Assume all instructions are equivalent to one "wait", the same
   5002  1.1  mrg 	     as s_nop.  This is probably true for SALU, but not VALU (which
   5003  1.1  mrg 	     may take longer), so this is not optimal.  However, AMD do
   5004  1.1  mrg 	     not publish the cycle times for instructions.  */
   5005  1.1  mrg 	  prev_insn->age += 1 + nops_rqd;
   5006  1.1  mrg 
   5007  1.1  mrg 	  written |= iwrites;
   5008  1.1  mrg 	  prev_insn->writes &= ~written;
   5009  1.1  mrg 	}
   5010  1.1  mrg 
   5011  1.1  mrg       /* Track the current instruction as a previous instruction.  */
   5012  1.1  mrg       back[oldest].insn = insn;
   5013  1.1  mrg       back[oldest].unit = iunit;
   5014  1.1  mrg       back[oldest].delayeduse = idelayeduse;
   5015  1.1  mrg       back[oldest].writes = iwrites;
   5016  1.1  mrg       back[oldest].reads = ireads;
   5017  1.1  mrg       back[oldest].age = 0;
   5018  1.1  mrg       oldest = (oldest + 1) % max_waits;
   5019  1.1  mrg 
   5020  1.1  mrg       last_insn = insn;
   5021  1.1  mrg     }
   5022  1.1  mrg }
   5023  1.1  mrg 
   5024  1.1  mrg /* }}}  */
   5025  1.1  mrg /* {{{ OpenACC / OpenMP.  */
   5026  1.1  mrg 
   5027  1.1  mrg #define GCN_DEFAULT_GANGS 0	/* Choose at runtime.  */
   5028  1.1  mrg #define GCN_DEFAULT_WORKERS 0	/* Choose at runtime.  */
   5029  1.1  mrg #define GCN_DEFAULT_VECTORS 1	/* Use autovectorization only, for now.  */
   5030  1.1  mrg 
   5031  1.1  mrg /* Implement TARGET_GOACC_VALIDATE_DIMS.
   5032  1.1  mrg 
   5033  1.1  mrg    Check the launch dimensions provided for an OpenACC compute
   5034  1.1  mrg    region, or routine.  */
   5035  1.1  mrg 
   5036  1.1  mrg static bool
   5037  1.1  mrg gcn_goacc_validate_dims (tree decl, int dims[], int fn_level,
   5038  1.1  mrg 			 unsigned /*used*/)
   5039  1.1  mrg {
   5040  1.1  mrg   bool changed = false;
   5041  1.1  mrg   const int max_workers = 16;
   5042  1.1  mrg 
   5043  1.1  mrg   /* The vector size must appear to be 64, to the user, unless this is a
   5044  1.1  mrg      SEQ routine.  The real, internal value is always 1, which means use
   5045  1.1  mrg      autovectorization, but the user should not see that.  */
   5046  1.1  mrg   if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1
   5047  1.1  mrg       && dims[GOMP_DIM_VECTOR] >= 0)
   5048  1.1  mrg     {
   5049  1.1  mrg       if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0
   5050  1.1  mrg 	  && dims[GOMP_DIM_VECTOR] != 64)
   5051  1.1  mrg 	warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
   5052  1.1  mrg 		    OPT_Wopenacc_dims,
   5053  1.1  mrg 		    (dims[GOMP_DIM_VECTOR]
   5054  1.1  mrg 		     ? G_("using %<vector_length (64)%>, ignoring %d")
   5055  1.1  mrg 		     : G_("using %<vector_length (64)%>, "
   5056  1.1  mrg 			  "ignoring runtime setting")),
   5057  1.1  mrg 		    dims[GOMP_DIM_VECTOR]);
   5058  1.1  mrg       dims[GOMP_DIM_VECTOR] = 1;
   5059  1.1  mrg       changed = true;
   5060  1.1  mrg     }
   5061  1.1  mrg 
   5062  1.1  mrg   /* Check the num workers is not too large.  */
   5063  1.1  mrg   if (dims[GOMP_DIM_WORKER] > max_workers)
   5064  1.1  mrg     {
   5065  1.1  mrg       warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
   5066  1.1  mrg 		  OPT_Wopenacc_dims,
   5067  1.1  mrg 		  "using %<num_workers (%d)%>, ignoring %d",
   5068  1.1  mrg 		  max_workers, dims[GOMP_DIM_WORKER]);
   5069  1.1  mrg       dims[GOMP_DIM_WORKER] = max_workers;
   5070  1.1  mrg       changed = true;
   5071  1.1  mrg     }
   5072  1.1  mrg 
   5073  1.1  mrg   /* Set global defaults.  */
   5074  1.1  mrg   if (!decl)
   5075  1.1  mrg     {
   5076  1.1  mrg       dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS;
   5077  1.1  mrg       if (dims[GOMP_DIM_WORKER] < 0)
   5078  1.1  mrg 	dims[GOMP_DIM_WORKER] = GCN_DEFAULT_WORKERS;
   5079  1.1  mrg       if (dims[GOMP_DIM_GANG] < 0)
   5080  1.1  mrg 	dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS;
   5081  1.1  mrg       changed = true;
   5082  1.1  mrg     }
   5083  1.1  mrg 
   5084  1.1  mrg   return changed;
   5085  1.1  mrg }
   5086  1.1  mrg 
   5087  1.1  mrg /* Helper function for oacc_dim_size instruction.
   5088  1.1  mrg    Also used for OpenMP, via builtin_gcn_dim_size, and the omp_gcn pass.  */
   5089  1.1  mrg 
   5090  1.1  mrg rtx
   5091  1.1  mrg gcn_oacc_dim_size (int dim)
   5092  1.1  mrg {
   5093  1.1  mrg   if (dim < 0 || dim > 2)
   5094  1.1  mrg     error ("offload dimension out of range (%d)", dim);
   5095  1.1  mrg 
   5096  1.1  mrg   /* Vectors are a special case.  */
   5097  1.1  mrg   if (dim == 2)
   5098  1.1  mrg     return const1_rtx;		/* Think of this as 1 times 64.  */
   5099  1.1  mrg 
   5100  1.1  mrg   static int offset[] = {
   5101  1.1  mrg     /* Offsets into dispatch packet.  */
   5102  1.1  mrg     12,				/* X dim = Gang / Team / Work-group.  */
   5103  1.1  mrg     20,				/* Z dim = Worker / Thread / Wavefront.  */
   5104  1.1  mrg     16				/* Y dim = Vector / SIMD / Work-item.  */
   5105  1.1  mrg   };
   5106  1.1  mrg   rtx addr = gen_rtx_PLUS (DImode,
   5107  1.1  mrg 			   gen_rtx_REG (DImode,
   5108  1.1  mrg 					cfun->machine->args.
   5109  1.1  mrg 					reg[DISPATCH_PTR_ARG]),
   5110  1.1  mrg 			   GEN_INT (offset[dim]));
   5111  1.1  mrg   return gen_rtx_MEM (SImode, addr);
   5112  1.1  mrg }
   5113  1.1  mrg 
   5114  1.1  mrg /* Helper function for oacc_dim_pos instruction.
   5115  1.1  mrg    Also used for OpenMP, via builtin_gcn_dim_pos, and the omp_gcn pass.  */
   5116  1.1  mrg 
   5117  1.1  mrg rtx
   5118  1.1  mrg gcn_oacc_dim_pos (int dim)
   5119  1.1  mrg {
   5120  1.1  mrg   if (dim < 0 || dim > 2)
   5121  1.1  mrg     error ("offload dimension out of range (%d)", dim);
   5122  1.1  mrg 
   5123  1.1  mrg   static const int reg[] = {
   5124  1.1  mrg     WORKGROUP_ID_X_ARG,		/* Gang / Team / Work-group.  */
   5125  1.1  mrg     WORK_ITEM_ID_Z_ARG,		/* Worker / Thread / Wavefront.  */
   5126  1.1  mrg     WORK_ITEM_ID_Y_ARG		/* Vector / SIMD / Work-item.  */
   5127  1.1  mrg   };
   5128  1.1  mrg 
   5129  1.1  mrg   int reg_num = cfun->machine->args.reg[reg[dim]];
   5130  1.1  mrg 
   5131  1.1  mrg   /* The information must have been requested by the kernel.  */
   5132  1.1  mrg   gcc_assert (reg_num >= 0);
   5133  1.1  mrg 
   5134  1.1  mrg   return gen_rtx_REG (SImode, reg_num);
   5135  1.1  mrg }
   5136  1.1  mrg 
   5137  1.1  mrg /* Implement TARGET_GOACC_FORK_JOIN.  */
   5138  1.1  mrg 
   5139  1.1  mrg static bool
   5140  1.1  mrg gcn_fork_join (gcall *call, const int dims[], bool is_fork)
   5141  1.1  mrg {
   5142  1.1  mrg   tree arg = gimple_call_arg (call, 2);
   5143  1.1  mrg   unsigned axis = TREE_INT_CST_LOW (arg);
   5144  1.1  mrg 
   5145  1.1  mrg   if (!is_fork && axis == GOMP_DIM_WORKER && dims[axis] != 1)
   5146  1.1  mrg     return true;
   5147  1.1  mrg 
   5148  1.1  mrg   return false;
   5149  1.1  mrg }
   5150  1.1  mrg 
   5151  1.1  mrg /* Implement ???????
   5152  1.1  mrg    FIXME make this a real hook.
   5153  1.1  mrg 
   5154  1.1  mrg    Adjust FNDECL such that options inherited from the host compiler
   5155  1.1  mrg    are made appropriate for the accelerator compiler.  */
   5156  1.1  mrg 
   5157  1.1  mrg void
   5158  1.1  mrg gcn_fixup_accel_lto_options (tree fndecl)
   5159  1.1  mrg {
   5160  1.1  mrg   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
   5161  1.1  mrg   if (!func_optimize)
   5162  1.1  mrg     return;
   5163  1.1  mrg 
   5164  1.1  mrg   tree old_optimize
   5165  1.1  mrg     = build_optimization_node (&global_options, &global_options_set);
   5166  1.1  mrg   tree new_optimize;
   5167  1.1  mrg 
   5168  1.1  mrg   /* If the function changed the optimization levels as well as
   5169  1.1  mrg      setting target options, start with the optimizations
   5170  1.1  mrg      specified.  */
   5171  1.1  mrg   if (func_optimize != old_optimize)
   5172  1.1  mrg     cl_optimization_restore (&global_options, &global_options_set,
   5173  1.1  mrg 			     TREE_OPTIMIZATION (func_optimize));
   5174  1.1  mrg 
   5175  1.1  mrg   gcn_option_override ();
   5176  1.1  mrg 
   5177  1.1  mrg   /* The target attributes may also change some optimization flags,
   5178  1.1  mrg      so update the optimization options if necessary.  */
   5179  1.1  mrg   new_optimize = build_optimization_node (&global_options,
   5180  1.1  mrg 					  &global_options_set);
   5181  1.1  mrg 
   5182  1.1  mrg   if (old_optimize != new_optimize)
   5183  1.1  mrg     {
   5184  1.1  mrg       DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
   5185  1.1  mrg       cl_optimization_restore (&global_options, &global_options_set,
   5186  1.1  mrg 			       TREE_OPTIMIZATION (old_optimize));
   5187  1.1  mrg     }
   5188  1.1  mrg }
   5189  1.1  mrg 
   5190  1.1  mrg /* Implement TARGET_GOACC_SHARED_MEM_LAYOUT hook.  */
   5191  1.1  mrg 
   5192  1.1  mrg static void
   5193  1.1  mrg gcn_shared_mem_layout (unsigned HOST_WIDE_INT *lo,
   5194  1.1  mrg 		       unsigned HOST_WIDE_INT *hi,
   5195  1.1  mrg 		       int ARG_UNUSED (dims[GOMP_DIM_MAX]),
   5196  1.1  mrg 		       unsigned HOST_WIDE_INT
   5197  1.1  mrg 			 ARG_UNUSED (private_size[GOMP_DIM_MAX]),
   5198  1.1  mrg 		       unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX])
   5199  1.1  mrg {
   5200  1.1  mrg   *lo = gang_private_size_opt + reduction_size[GOMP_DIM_WORKER];
   5201  1.1  mrg   /* !!! We can maybe use dims[] to estimate the maximum number of work
   5202  1.1  mrg      groups/wavefronts/etc. we will launch, and therefore tune the maximum
   5203  1.1  mrg      amount of LDS we should use.  For now, use a minimal amount to try to
   5204  1.1  mrg      maximise occupancy.  */
   5205  1.1  mrg   *hi = acc_lds_size;
   5206  1.1  mrg   machine_function *machfun = cfun->machine;
   5207  1.1  mrg   machfun->reduction_base = gang_private_size_opt;
   5208  1.1  mrg   machfun->reduction_limit
   5209  1.1  mrg     = gang_private_size_opt + reduction_size[GOMP_DIM_WORKER];
   5210  1.1  mrg }
   5211  1.1  mrg 
   5212  1.1  mrg /* }}}  */
   5213  1.1  mrg /* {{{ ASM Output.  */
   5214  1.1  mrg 
   5215  1.1  mrg /*  Implement TARGET_ASM_FILE_START.
   5216  1.1  mrg 
   5217  1.1  mrg     Print assembler file header text.  */
   5218  1.1  mrg 
   5219  1.1  mrg static void
   5220  1.1  mrg output_file_start (void)
   5221  1.1  mrg {
   5222  1.1  mrg   const char *cpu;
   5223  1.1  mrg   bool use_xnack_attr = true;
   5224  1.1  mrg   bool use_sram_attr = true;
   5225  1.1  mrg   switch (gcn_arch)
   5226  1.1  mrg     {
   5227  1.1  mrg     case PROCESSOR_FIJI:
   5228  1.1  mrg       cpu = "gfx803";
   5229  1.1  mrg #ifndef HAVE_GCN_XNACK_FIJI
   5230  1.1  mrg       use_xnack_attr = false;
   5231  1.1  mrg #endif
   5232  1.1  mrg       use_sram_attr = false;
   5233  1.1  mrg       break;
   5234  1.1  mrg     case PROCESSOR_VEGA10:
   5235  1.1  mrg       cpu = "gfx900";
   5236  1.1  mrg #ifndef HAVE_GCN_XNACK_GFX900
   5237  1.1  mrg       use_xnack_attr = false;
   5238  1.1  mrg #endif
   5239  1.1  mrg       use_sram_attr = false;
   5240  1.1  mrg       break;
   5241  1.1  mrg     case PROCESSOR_VEGA20:
   5242  1.1  mrg       cpu = "gfx906";
   5243  1.1  mrg #ifndef HAVE_GCN_XNACK_GFX906
   5244  1.1  mrg       use_xnack_attr = false;
   5245  1.1  mrg #endif
   5246  1.1  mrg       use_sram_attr = false;
   5247  1.1  mrg       break;
   5248  1.1  mrg     case PROCESSOR_GFX908:
   5249  1.1  mrg       cpu = "gfx908";
   5250  1.1  mrg #ifndef HAVE_GCN_XNACK_GFX908
   5251  1.1  mrg       use_xnack_attr = false;
   5252  1.1  mrg #endif
   5253  1.1  mrg #ifndef HAVE_GCN_SRAM_ECC_GFX908
   5254  1.1  mrg       use_sram_attr = false;
   5255  1.1  mrg #endif
   5256  1.1  mrg       break;
   5257  1.1  mrg     default: gcc_unreachable ();
   5258  1.1  mrg     }
   5259  1.1  mrg 
   5260  1.1  mrg #if HAVE_GCN_ASM_V3_SYNTAX
   5261  1.1  mrg   const char *xnack = (flag_xnack ? "+xnack" : "");
   5262  1.1  mrg   const char *sram_ecc = (flag_sram_ecc ? "+sram-ecc" : "");
   5263  1.1  mrg #endif
   5264  1.1  mrg #if HAVE_GCN_ASM_V4_SYNTAX
   5265  1.1  mrg   /* In HSACOv4 no attribute setting means the binary supports "any" hardware
   5266  1.1  mrg      configuration.  In GCC binaries, this is true for SRAM ECC, but not
   5267  1.1  mrg      XNACK.  */
   5268  1.1  mrg   const char *xnack = (flag_xnack ? ":xnack+" : ":xnack-");
   5269  1.1  mrg   const char *sram_ecc = (flag_sram_ecc == SRAM_ECC_ON ? ":sramecc+"
   5270  1.1  mrg 			  : flag_sram_ecc == SRAM_ECC_OFF ? ":sramecc-"
   5271  1.1  mrg 			  : "");
   5272  1.1  mrg #endif
   5273  1.1  mrg   if (!use_xnack_attr)
   5274  1.1  mrg     xnack = "";
   5275  1.1  mrg   if (!use_sram_attr)
   5276  1.1  mrg     sram_ecc = "";
   5277  1.1  mrg 
   5278  1.1  mrg   fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n",
   5279  1.1  mrg 	  cpu,
   5280  1.1  mrg #if HAVE_GCN_ASM_V3_SYNTAX
   5281  1.1  mrg 	  xnack, sram_ecc
   5282  1.1  mrg #endif
   5283  1.1  mrg #ifdef HAVE_GCN_ASM_V4_SYNTAX
   5284  1.1  mrg 	  sram_ecc, xnack
   5285  1.1  mrg #endif
   5286  1.1  mrg 	  );
   5287  1.1  mrg }
   5288  1.1  mrg 
   5289  1.1  mrg /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
   5290  1.1  mrg 
   5291  1.1  mrg    Print the initial definition of a function name.
   5292  1.1  mrg 
   5293  1.1  mrg    For GCN kernel entry points this includes all the HSA meta-data, special
   5294  1.1  mrg    alignment constraints that don't apply to regular functions, and magic
   5295  1.1  mrg    comments that pass information to mkoffload.  */
   5296  1.1  mrg 
   5297  1.1  mrg void
   5298  1.1  mrg gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
   5299  1.1  mrg {
   5300  1.1  mrg   int sgpr, vgpr;
   5301  1.1  mrg   bool xnack_enabled = false;
   5302  1.1  mrg 
   5303  1.1  mrg   fputs ("\n\n", file);
   5304  1.1  mrg 
   5305  1.1  mrg   if (cfun && cfun->machine && cfun->machine->normal_function)
   5306  1.1  mrg     {
   5307  1.1  mrg       fputs ("\t.type\t", file);
   5308  1.1  mrg       assemble_name (file, name);
   5309  1.1  mrg       fputs (",@function\n", file);
   5310  1.1  mrg       assemble_name (file, name);
   5311  1.1  mrg       fputs (":\n", file);
   5312  1.1  mrg       return;
   5313  1.1  mrg     }
   5314  1.1  mrg 
   5315  1.1  mrg   /* Determine count of sgpr/vgpr registers by looking for last
   5316  1.1  mrg      one used.  */
   5317  1.1  mrg   for (sgpr = 101; sgpr >= 0; sgpr--)
   5318  1.1  mrg     if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr))
   5319  1.1  mrg       break;
   5320  1.1  mrg   sgpr++;
   5321  1.1  mrg   for (vgpr = 255; vgpr >= 0; vgpr--)
   5322  1.1  mrg     if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
   5323  1.1  mrg       break;
   5324  1.1  mrg   vgpr++;
   5325  1.1  mrg 
   5326  1.1  mrg   if (!leaf_function_p ())
   5327  1.1  mrg     {
   5328  1.1  mrg       /* We can't know how many registers function calls might use.  */
   5329  1.1  mrg       if (vgpr < MAX_NORMAL_VGPR_COUNT)
   5330  1.1  mrg 	vgpr = MAX_NORMAL_VGPR_COUNT;
   5331  1.1  mrg       if (sgpr < MAX_NORMAL_SGPR_COUNT)
   5332  1.1  mrg 	sgpr = MAX_NORMAL_SGPR_COUNT;
   5333  1.1  mrg     }
   5334  1.1  mrg 
   5335  1.1  mrg   fputs ("\t.rodata\n"
   5336  1.1  mrg 	 "\t.p2align\t6\n"
   5337  1.1  mrg 	 "\t.amdhsa_kernel\t", file);
   5338  1.1  mrg   assemble_name (file, name);
   5339  1.1  mrg   fputs ("\n", file);
   5340  1.1  mrg   int reg = FIRST_SGPR_REG;
   5341  1.1  mrg   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
   5342  1.1  mrg     {
   5343  1.1  mrg       int reg_first = -1;
   5344  1.1  mrg       int reg_last;
   5345  1.1  mrg       if ((cfun->machine->args.requested & (1 << a))
   5346  1.1  mrg 	  && (gcn_kernel_arg_types[a].fixed_regno < 0))
   5347  1.1  mrg 	{
   5348  1.1  mrg 	  reg_first = reg;
   5349  1.1  mrg 	  reg_last = (reg_first
   5350  1.1  mrg 		      + (GET_MODE_SIZE (gcn_kernel_arg_types[a].mode)
   5351  1.1  mrg 			 / UNITS_PER_WORD) - 1);
   5352  1.1  mrg 	  reg = reg_last + 1;
   5353  1.1  mrg 	}
   5354  1.1  mrg 
   5355  1.1  mrg       if (gcn_kernel_arg_types[a].header_pseudo)
   5356  1.1  mrg 	{
   5357  1.1  mrg 	  fprintf (file, "\t  %s%s\t%i",
   5358  1.1  mrg 		   (cfun->machine->args.requested & (1 << a)) != 0 ? "" : ";",
   5359  1.1  mrg 		   gcn_kernel_arg_types[a].header_pseudo,
   5360  1.1  mrg 		   (cfun->machine->args.requested & (1 << a)) != 0);
   5361  1.1  mrg 	  if (reg_first != -1)
   5362  1.1  mrg 	    {
   5363  1.1  mrg 	      fprintf (file, " ; (");
   5364  1.1  mrg 	      for (int i = reg_first; i <= reg_last; ++i)
   5365  1.1  mrg 		{
   5366  1.1  mrg 		  if (i != reg_first)
   5367  1.1  mrg 		    fprintf (file, ", ");
   5368  1.1  mrg 		  fprintf (file, "%s", reg_names[i]);
   5369  1.1  mrg 		}
   5370  1.1  mrg 	      fprintf (file, ")");
   5371  1.1  mrg 	    }
   5372  1.1  mrg 	  fprintf (file, "\n");
   5373  1.1  mrg 	}
   5374  1.1  mrg       else if (gcn_kernel_arg_types[a].fixed_regno >= 0
   5375  1.1  mrg 	       && cfun->machine->args.requested & (1 << a))
   5376  1.1  mrg 	fprintf (file, "\t  ; %s\t%i (%s)\n",
   5377  1.1  mrg 		 gcn_kernel_arg_types[a].name,
   5378  1.1  mrg 		 (cfun->machine->args.requested & (1 << a)) != 0,
   5379  1.1  mrg 		 reg_names[gcn_kernel_arg_types[a].fixed_regno]);
   5380  1.1  mrg     }
   5381  1.1  mrg   fprintf (file, "\t  .amdhsa_system_vgpr_workitem_id\t%i\n",
   5382  1.1  mrg 	   (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG))
   5383  1.1  mrg 	   ? 2
   5384  1.1  mrg 	   : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
   5385  1.1  mrg 	   ? 1 : 0);
   5386  1.1  mrg   fprintf (file,
   5387  1.1  mrg 	   "\t  .amdhsa_next_free_vgpr\t%i\n"
   5388  1.1  mrg 	   "\t  .amdhsa_next_free_sgpr\t%i\n"
   5389  1.1  mrg 	   "\t  .amdhsa_reserve_vcc\t1\n"
   5390  1.1  mrg 	   "\t  .amdhsa_reserve_flat_scratch\t0\n"
   5391  1.1  mrg 	   "\t  .amdhsa_reserve_xnack_mask\t%i\n"
   5392  1.1  mrg 	   "\t  .amdhsa_private_segment_fixed_size\t%i\n"
   5393  1.1  mrg 	   "\t  .amdhsa_group_segment_fixed_size\t%u\n"
   5394  1.1  mrg 	   "\t  .amdhsa_float_denorm_mode_32\t3\n"
   5395  1.1  mrg 	   "\t  .amdhsa_float_denorm_mode_16_64\t3\n",
   5396  1.1  mrg 	   vgpr,
   5397  1.1  mrg 	   sgpr,
   5398  1.1  mrg 	   xnack_enabled,
   5399  1.1  mrg 	   /* workitem_private_segment_bytes_size needs to be
   5400  1.1  mrg 	      one 64th the wave-front stack size.  */
   5401  1.1  mrg 	   stack_size_opt / 64,
   5402  1.1  mrg 	   LDS_SIZE);
   5403  1.1  mrg   fputs ("\t.end_amdhsa_kernel\n", file);
   5404  1.1  mrg 
   5405  1.1  mrg #if 1
   5406  1.1  mrg   /* The following is YAML embedded in assembler; tabs are not allowed.  */
   5407  1.1  mrg   fputs ("        .amdgpu_metadata\n"
   5408  1.1  mrg 	 "        amdhsa.version:\n"
   5409  1.1  mrg 	 "          - 1\n"
   5410  1.1  mrg 	 "          - 0\n"
   5411  1.1  mrg 	 "        amdhsa.kernels:\n"
   5412  1.1  mrg 	 "          - .name: ", file);
   5413  1.1  mrg   assemble_name (file, name);
   5414  1.1  mrg   fputs ("\n            .symbol: ", file);
   5415  1.1  mrg   assemble_name (file, name);
   5416  1.1  mrg   fprintf (file,
   5417  1.1  mrg 	   ".kd\n"
   5418  1.1  mrg 	   "            .kernarg_segment_size: %i\n"
   5419  1.1  mrg 	   "            .kernarg_segment_align: %i\n"
   5420  1.1  mrg 	   "            .group_segment_fixed_size: %u\n"
   5421  1.1  mrg 	   "            .private_segment_fixed_size: %i\n"
   5422  1.1  mrg 	   "            .wavefront_size: 64\n"
   5423  1.1  mrg 	   "            .sgpr_count: %i\n"
   5424  1.1  mrg 	   "            .vgpr_count: %i\n"
   5425  1.1  mrg 	   "            .max_flat_workgroup_size: 1024\n",
   5426  1.1  mrg 	   cfun->machine->kernarg_segment_byte_size,
   5427  1.1  mrg 	   cfun->machine->kernarg_segment_alignment,
   5428  1.1  mrg 	   LDS_SIZE,
   5429  1.1  mrg 	   stack_size_opt / 64,
   5430  1.1  mrg 	   sgpr, vgpr);
   5431  1.1  mrg   fputs ("        .end_amdgpu_metadata\n", file);
   5432  1.1  mrg #endif
   5433  1.1  mrg 
   5434  1.1  mrg   fputs ("\t.text\n", file);
   5435  1.1  mrg   fputs ("\t.align\t256\n", file);
   5436  1.1  mrg   fputs ("\t.type\t", file);
   5437  1.1  mrg   assemble_name (file, name);
   5438  1.1  mrg   fputs (",@function\n", file);
   5439  1.1  mrg   assemble_name (file, name);
   5440  1.1  mrg   fputs (":\n", file);
   5441  1.1  mrg 
   5442  1.1  mrg   /* This comment is read by mkoffload.  */
   5443  1.1  mrg   if (flag_openacc)
   5444  1.1  mrg     fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
   5445  1.1  mrg 	     oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG),
   5446  1.1  mrg 	     oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER),
   5447  1.1  mrg 	     oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
   5448  1.1  mrg }
   5449  1.1  mrg 
   5450  1.1  mrg /* Implement TARGET_ASM_SELECT_SECTION.
   5451  1.1  mrg 
   5452  1.1  mrg    Return the section into which EXP should be placed.  */
   5453  1.1  mrg 
   5454  1.1  mrg static section *
   5455  1.1  mrg gcn_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
   5456  1.1  mrg {
   5457  1.1  mrg   if (TREE_TYPE (exp) != error_mark_node
   5458  1.1  mrg       && TYPE_ADDR_SPACE (TREE_TYPE (exp)) == ADDR_SPACE_LDS)
   5459  1.1  mrg     {
   5460  1.1  mrg       if (!DECL_P (exp))
   5461  1.1  mrg 	return get_section (".lds_bss",
   5462  1.1  mrg 			    SECTION_WRITE | SECTION_BSS | SECTION_DEBUG,
   5463  1.1  mrg 			    NULL);
   5464  1.1  mrg 
   5465  1.1  mrg       return get_named_section (exp, ".lds_bss", reloc);
   5466  1.1  mrg     }
   5467  1.1  mrg 
   5468  1.1  mrg   return default_elf_select_section (exp, reloc, align);
   5469  1.1  mrg }
   5470  1.1  mrg 
   5471  1.1  mrg /* Implement TARGET_ASM_FUNCTION_PROLOGUE.
   5472  1.1  mrg 
   5473  1.1  mrg    Emits custom text into the assembler file at the head of each function.  */
   5474  1.1  mrg 
   5475  1.1  mrg static void
   5476  1.1  mrg gcn_target_asm_function_prologue (FILE *file)
   5477  1.1  mrg {
   5478  1.1  mrg   machine_function *offsets = gcn_compute_frame_offsets ();
   5479  1.1  mrg 
   5480  1.1  mrg   asm_fprintf (file, "\t; using %s addressing in function\n",
   5481  1.1  mrg 	       offsets->use_flat_addressing ? "flat" : "global");
   5482  1.1  mrg 
   5483  1.1  mrg   if (offsets->normal_function)
   5484  1.1  mrg     {
   5485  1.1  mrg       asm_fprintf (file, "\t; frame pointer needed: %s\n",
   5486  1.1  mrg 		   offsets->need_frame_pointer ? "true" : "false");
   5487  1.1  mrg       asm_fprintf (file, "\t; lr needs saving: %s\n",
   5488  1.1  mrg 		   offsets->lr_needs_saving ? "true" : "false");
   5489  1.1  mrg       asm_fprintf (file, "\t; outgoing args size: %wd\n",
   5490  1.1  mrg 		   offsets->outgoing_args_size);
   5491  1.1  mrg       asm_fprintf (file, "\t; pretend size: %wd\n", offsets->pretend_size);
   5492  1.1  mrg       asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
   5493  1.1  mrg       asm_fprintf (file, "\t; callee save size: %wd\n",
   5494  1.1  mrg 		   offsets->callee_saves);
   5495  1.1  mrg     }
   5496  1.1  mrg   else
   5497  1.1  mrg     {
   5498  1.1  mrg       asm_fprintf (file, "\t; HSA kernel entry point\n");
   5499  1.1  mrg       asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
   5500  1.1  mrg       asm_fprintf (file, "\t; outgoing args size: %wd\n",
   5501  1.1  mrg 		   offsets->outgoing_args_size);
   5502  1.1  mrg     }
   5503  1.1  mrg }
   5504  1.1  mrg 
   5505  1.1  mrg /* Helper function for print_operand and print_operand_address.
   5506  1.1  mrg 
   5507  1.1  mrg    Print a register as the assembler requires, according to mode and name.  */
   5508  1.1  mrg 
   5509  1.1  mrg static void
   5510  1.1  mrg print_reg (FILE *file, rtx x)
   5511  1.1  mrg {
   5512  1.1  mrg   machine_mode mode = GET_MODE (x);
   5513  1.1  mrg   if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
   5514  1.1  mrg       || mode == HFmode || mode == SFmode
   5515  1.1  mrg       || mode == V64SFmode || mode == V64SImode
   5516  1.1  mrg       || mode == V64QImode || mode == V64HImode)
   5517  1.1  mrg     fprintf (file, "%s", reg_names[REGNO (x)]);
   5518  1.1  mrg   else if (mode == DImode || mode == V64DImode
   5519  1.1  mrg 	   || mode == DFmode || mode == V64DFmode)
   5520  1.1  mrg     {
   5521  1.1  mrg       if (SGPR_REGNO_P (REGNO (x)))
   5522  1.1  mrg 	fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
   5523  1.1  mrg 		 REGNO (x) - FIRST_SGPR_REG + 1);
   5524  1.1  mrg       else if (VGPR_REGNO_P (REGNO (x)))
   5525  1.1  mrg 	fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
   5526  1.1  mrg 		 REGNO (x) - FIRST_VGPR_REG + 1);
   5527  1.1  mrg       else if (REGNO (x) == FLAT_SCRATCH_REG)
   5528  1.1  mrg 	fprintf (file, "flat_scratch");
   5529  1.1  mrg       else if (REGNO (x) == EXEC_REG)
   5530  1.1  mrg 	fprintf (file, "exec");
   5531  1.1  mrg       else if (REGNO (x) == VCC_LO_REG)
   5532  1.1  mrg 	fprintf (file, "vcc");
   5533  1.1  mrg       else
   5534  1.1  mrg 	fprintf (file, "[%s:%s]",
   5535  1.1  mrg 		 reg_names[REGNO (x)], reg_names[REGNO (x) + 1]);
   5536  1.1  mrg     }
   5537  1.1  mrg   else if (mode == TImode)
   5538  1.1  mrg     {
   5539  1.1  mrg       if (SGPR_REGNO_P (REGNO (x)))
   5540  1.1  mrg 	fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
   5541  1.1  mrg 		 REGNO (x) - FIRST_SGPR_REG + 3);
   5542  1.1  mrg       else if (VGPR_REGNO_P (REGNO (x)))
   5543  1.1  mrg 	fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
   5544  1.1  mrg 		 REGNO (x) - FIRST_VGPR_REG + 3);
   5545  1.1  mrg       else
   5546  1.1  mrg 	gcc_unreachable ();
   5547  1.1  mrg     }
   5548  1.1  mrg   else
   5549  1.1  mrg     gcc_unreachable ();
   5550  1.1  mrg }
   5551  1.1  mrg 
   5552  1.1  mrg /* Implement TARGET_SECTION_TYPE_FLAGS.
   5553  1.1  mrg 
   5554  1.1  mrg    Return a set of section attributes for use by TARGET_ASM_NAMED_SECTION.  */
   5555  1.1  mrg 
   5556  1.1  mrg static unsigned int
   5557  1.1  mrg gcn_section_type_flags (tree decl, const char *name, int reloc)
   5558  1.1  mrg {
   5559  1.1  mrg   if (strcmp (name, ".lds_bss") == 0)
   5560  1.1  mrg     return SECTION_WRITE | SECTION_BSS | SECTION_DEBUG;
   5561  1.1  mrg 
   5562  1.1  mrg   return default_section_type_flags (decl, name, reloc);
   5563  1.1  mrg }
   5564  1.1  mrg 
   5565  1.1  mrg /* Helper function for gcn_asm_output_symbol_ref.
   5566  1.1  mrg 
   5567  1.1  mrg    FIXME: This function is used to lay out gang-private variables in LDS
   5568  1.1  mrg    on a per-CU basis.
   5569  1.1  mrg    There may be cases in which gang-private variables in different compilation
   5570  1.1  mrg    units could clobber each other.  In that case we should be relying on the
   5571  1.1  mrg    linker to lay out gang-private LDS space, but that doesn't appear to be
   5572  1.1  mrg    possible at present.  */
   5573  1.1  mrg 
   5574  1.1  mrg static void
   5575  1.1  mrg gcn_print_lds_decl (FILE *f, tree var)
   5576  1.1  mrg {
   5577  1.1  mrg   int *offset;
   5578  1.1  mrg   if ((offset = lds_allocs.get (var)))
   5579  1.1  mrg     fprintf (f, "%u", (unsigned) *offset);
   5580  1.1  mrg   else
   5581  1.1  mrg     {
   5582  1.1  mrg       unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (var);
   5583  1.1  mrg       tree type = TREE_TYPE (var);
   5584  1.1  mrg       unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
   5585  1.1  mrg       if (size > align && size > 4 && align < 8)
   5586  1.1  mrg 	align = 8;
   5587  1.1  mrg 
   5588  1.1  mrg       gang_private_hwm = ((gang_private_hwm + align - 1) & ~(align - 1));
   5589  1.1  mrg 
   5590  1.1  mrg       lds_allocs.put (var, gang_private_hwm);
   5591  1.1  mrg       fprintf (f, "%u", gang_private_hwm);
   5592  1.1  mrg       gang_private_hwm += size;
   5593  1.1  mrg       if (gang_private_hwm > gang_private_size_opt)
   5594  1.1  mrg 	error ("%d bytes of gang-private data-share memory exhausted"
   5595  1.1  mrg 	       " (increase with %<-mgang-private-size=%d%>, for example)",
   5596  1.1  mrg 	       gang_private_size_opt, gang_private_hwm);
   5597  1.1  mrg     }
   5598  1.1  mrg }
   5599  1.1  mrg 
   5600  1.1  mrg /* Implement ASM_OUTPUT_SYMBOL_REF via gcn-hsa.h.  */
   5601  1.1  mrg 
   5602  1.1  mrg void
   5603  1.1  mrg gcn_asm_output_symbol_ref (FILE *file, rtx x)
   5604  1.1  mrg {
   5605  1.1  mrg   tree decl;
   5606  1.1  mrg   if (cfun
   5607  1.1  mrg       && (decl = SYMBOL_REF_DECL (x)) != 0
   5608  1.1  mrg       && TREE_CODE (decl) == VAR_DECL
   5609  1.1  mrg       && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
   5610  1.1  mrg     {
   5611  1.1  mrg       /* LDS symbols (emitted using this hook) are only used at present
   5612  1.1  mrg          to propagate worker values from an active thread to neutered
   5613  1.1  mrg          threads.  Use the same offset for each such block, but don't
   5614  1.1  mrg          use zero because null pointers are used to identify the active
   5615  1.1  mrg          thread in GOACC_single_copy_start calls.  */
   5616  1.1  mrg       gcn_print_lds_decl (file, decl);
   5617  1.1  mrg     }
   5618  1.1  mrg   else
   5619  1.1  mrg     {
   5620  1.1  mrg       assemble_name (file, XSTR (x, 0));
   5621  1.1  mrg       /* FIXME: See above -- this condition is unreachable.  */
   5622  1.1  mrg       if (cfun
   5623  1.1  mrg 	  && (decl = SYMBOL_REF_DECL (x)) != 0
   5624  1.1  mrg 	  && TREE_CODE (decl) == VAR_DECL
   5625  1.1  mrg 	  && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
   5626  1.1  mrg 	fputs ("@abs32", file);
   5627  1.1  mrg     }
   5628  1.1  mrg }
   5629  1.1  mrg 
   5630  1.1  mrg /* Implement TARGET_CONSTANT_ALIGNMENT.
   5631  1.1  mrg 
   5632  1.1  mrg    Returns the alignment in bits of a constant that is being placed in memory.
   5633  1.1  mrg    CONSTANT is the constant and BASIC_ALIGN is the alignment that the object
   5634  1.1  mrg    would ordinarily have.  */
   5635  1.1  mrg 
   5636  1.1  mrg static HOST_WIDE_INT
   5637  1.1  mrg gcn_constant_alignment (const_tree ARG_UNUSED (constant),
   5638  1.1  mrg 			HOST_WIDE_INT basic_align)
   5639  1.1  mrg {
   5640  1.1  mrg   return basic_align > 128 ? basic_align : 128;
   5641  1.1  mrg }
   5642  1.1  mrg 
   5643  1.1  mrg /* Implement PRINT_OPERAND_ADDRESS via gcn.h.  */
   5644  1.1  mrg 
   5645  1.1  mrg void
   5646  1.1  mrg print_operand_address (FILE *file, rtx mem)
   5647  1.1  mrg {
   5648  1.1  mrg   gcc_assert (MEM_P (mem));
   5649  1.1  mrg 
   5650  1.1  mrg   rtx reg;
   5651  1.1  mrg   rtx offset;
   5652  1.1  mrg   addr_space_t as = MEM_ADDR_SPACE (mem);
   5653  1.1  mrg   rtx addr = XEXP (mem, 0);
   5654  1.1  mrg   gcc_assert (REG_P (addr) || GET_CODE (addr) == PLUS);
   5655  1.1  mrg 
   5656  1.1  mrg   if (AS_SCRATCH_P (as))
   5657  1.1  mrg     switch (GET_CODE (addr))
   5658  1.1  mrg       {
   5659  1.1  mrg       case REG:
   5660  1.1  mrg 	print_reg (file, addr);
   5661  1.1  mrg 	break;
   5662  1.1  mrg 
   5663  1.1  mrg       case PLUS:
   5664  1.1  mrg 	reg = XEXP (addr, 0);
   5665  1.1  mrg 	offset = XEXP (addr, 1);
   5666  1.1  mrg 	print_reg (file, reg);
   5667  1.1  mrg 	if (GET_CODE (offset) == CONST_INT)
   5668  1.1  mrg 	  fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   5669  1.1  mrg 	else
   5670  1.1  mrg 	  abort ();
   5671  1.1  mrg 	break;
   5672  1.1  mrg 
   5673  1.1  mrg       default:
   5674  1.1  mrg 	debug_rtx (addr);
   5675  1.1  mrg 	abort ();
   5676  1.1  mrg       }
   5677  1.1  mrg   else if (AS_ANY_FLAT_P (as))
   5678  1.1  mrg     {
   5679  1.1  mrg       if (GET_CODE (addr) == REG)
   5680  1.1  mrg 	print_reg (file, addr);
   5681  1.1  mrg       else
   5682  1.1  mrg 	{
   5683  1.1  mrg 	  gcc_assert (TARGET_GCN5_PLUS);
   5684  1.1  mrg 	  print_reg (file, XEXP (addr, 0));
   5685  1.1  mrg 	}
   5686  1.1  mrg     }
   5687  1.1  mrg   else if (AS_GLOBAL_P (as))
   5688  1.1  mrg     {
   5689  1.1  mrg       gcc_assert (TARGET_GCN5_PLUS);
   5690  1.1  mrg 
   5691  1.1  mrg       rtx base = addr;
   5692  1.1  mrg       rtx vgpr_offset = NULL_RTX;
   5693  1.1  mrg 
   5694  1.1  mrg       if (GET_CODE (addr) == PLUS)
   5695  1.1  mrg 	{
   5696  1.1  mrg 	  base = XEXP (addr, 0);
   5697  1.1  mrg 
   5698  1.1  mrg 	  if (GET_CODE (base) == PLUS)
   5699  1.1  mrg 	    {
   5700  1.1  mrg 	      /* (SGPR + VGPR) + CONST  */
   5701  1.1  mrg 	      vgpr_offset = XEXP (base, 1);
   5702  1.1  mrg 	      base = XEXP (base, 0);
   5703  1.1  mrg 	    }
   5704  1.1  mrg 	  else
   5705  1.1  mrg 	    {
   5706  1.1  mrg 	      rtx offset = XEXP (addr, 1);
   5707  1.1  mrg 
   5708  1.1  mrg 	      if (REG_P (offset))
   5709  1.1  mrg 		/* SGPR + VGPR  */
   5710  1.1  mrg 		vgpr_offset = offset;
   5711  1.1  mrg 	      else if (CONST_INT_P (offset))
   5712  1.1  mrg 		/* VGPR + CONST or SGPR + CONST  */
   5713  1.1  mrg 		;
   5714  1.1  mrg 	      else
   5715  1.1  mrg 		output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
   5716  1.1  mrg 	    }
   5717  1.1  mrg 	}
   5718  1.1  mrg 
   5719  1.1  mrg       if (REG_P (base))
   5720  1.1  mrg 	{
   5721  1.1  mrg 	  if (VGPR_REGNO_P (REGNO (base)))
   5722  1.1  mrg 	    print_reg (file, base);
   5723  1.1  mrg 	  else if (SGPR_REGNO_P (REGNO (base)))
   5724  1.1  mrg 	    {
   5725  1.1  mrg 	      /* The assembler requires a 64-bit VGPR pair here, even though
   5726  1.1  mrg 	         the offset should be only 32-bit.  */
   5727  1.1  mrg 	      if (vgpr_offset == NULL_RTX)
   5728  1.1  mrg 		/* In this case, the vector offset is zero, so we use the first
   5729  1.1  mrg 		   lane of v1, which is initialized to zero.  */
   5730  1.1  mrg 		{
   5731  1.1  mrg 		  if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED)
   5732  1.1  mrg 		    fprintf (file, "v1");
   5733  1.1  mrg 		  else
   5734  1.1  mrg 		    fprintf (file, "v[1:2]");
   5735  1.1  mrg 		}
   5736  1.1  mrg 	      else if (REG_P (vgpr_offset)
   5737  1.1  mrg 		       && VGPR_REGNO_P (REGNO (vgpr_offset)))
   5738  1.1  mrg 		{
   5739  1.1  mrg 		  if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED)
   5740  1.1  mrg 		    fprintf (file, "v%d",
   5741  1.1  mrg 			     REGNO (vgpr_offset) - FIRST_VGPR_REG);
   5742  1.1  mrg 		  else
   5743  1.1  mrg 		    fprintf (file, "v[%d:%d]",
   5744  1.1  mrg 			     REGNO (vgpr_offset) - FIRST_VGPR_REG,
   5745  1.1  mrg 			     REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
   5746  1.1  mrg 		}
   5747  1.1  mrg 	      else
   5748  1.1  mrg 		output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
   5749  1.1  mrg 	    }
   5750  1.1  mrg 	}
   5751  1.1  mrg       else
   5752  1.1  mrg 	output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
   5753  1.1  mrg     }
   5754  1.1  mrg   else if (AS_ANY_DS_P (as))
   5755  1.1  mrg     switch (GET_CODE (addr))
   5756  1.1  mrg       {
   5757  1.1  mrg       case REG:
   5758  1.1  mrg 	print_reg (file, addr);
   5759  1.1  mrg 	break;
   5760  1.1  mrg 
   5761  1.1  mrg       case PLUS:
   5762  1.1  mrg 	reg = XEXP (addr, 0);
   5763  1.1  mrg 	print_reg (file, reg);
   5764  1.1  mrg 	break;
   5765  1.1  mrg 
   5766  1.1  mrg       default:
   5767  1.1  mrg 	debug_rtx (addr);
   5768  1.1  mrg 	abort ();
   5769  1.1  mrg       }
   5770  1.1  mrg   else
   5771  1.1  mrg     switch (GET_CODE (addr))
   5772  1.1  mrg       {
   5773  1.1  mrg       case REG:
   5774  1.1  mrg 	print_reg (file, addr);
   5775  1.1  mrg 	fprintf (file, ", 0");
   5776  1.1  mrg 	break;
   5777  1.1  mrg 
   5778  1.1  mrg       case PLUS:
   5779  1.1  mrg 	reg = XEXP (addr, 0);
   5780  1.1  mrg 	offset = XEXP (addr, 1);
   5781  1.1  mrg 	print_reg (file, reg);
   5782  1.1  mrg 	fprintf (file, ", ");
   5783  1.1  mrg 	if (GET_CODE (offset) == REG)
   5784  1.1  mrg 	  print_reg (file, reg);
   5785  1.1  mrg 	else if (GET_CODE (offset) == CONST_INT)
   5786  1.1  mrg 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   5787  1.1  mrg 	else
   5788  1.1  mrg 	  abort ();
   5789  1.1  mrg 	break;
   5790  1.1  mrg 
   5791  1.1  mrg       default:
   5792  1.1  mrg 	debug_rtx (addr);
   5793  1.1  mrg 	abort ();
   5794  1.1  mrg       }
   5795  1.1  mrg }
   5796  1.1  mrg 
   5797  1.1  mrg /* Implement PRINT_OPERAND via gcn.h.
   5798  1.1  mrg 
   5799  1.1  mrg    b - print operand size as untyped operand (b8/b16/b32/b64)
   5800  1.1  mrg    B - print operand size as SI/DI untyped operand (b32/b32/b32/b64)
   5801  1.1  mrg    i - print operand size as untyped operand (i16/b32/i64)
   5802  1.1  mrg    I - print operand size as SI/DI untyped operand(i32/b32/i64)
   5803  1.1  mrg    u - print operand size as untyped operand (u16/u32/u64)
   5804  1.1  mrg    U - print operand size as SI/DI untyped operand(u32/u64)
   5805  1.1  mrg    o - print operand size as memory access size for loads
   5806  1.1  mrg        (ubyte/ushort/dword/dwordx2/wordx3/dwordx4)
   5807  1.1  mrg    s - print operand size as memory access size for stores
   5808  1.1  mrg        (byte/short/dword/dwordx2/wordx3/dwordx4)
   5809  1.1  mrg    C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...)
   5810  1.1  mrg    c - print inverse conditional code for s_cbranch
   5811  1.1  mrg    D - print conditional code for s_cmp (eq_u64/lg_u64...)
   5812  1.1  mrg    E - print conditional code for v_cmp (eq_u64/ne_u64...)
   5813  1.1  mrg    A - print address in formatting suitable for given address space.
   5814  1.1  mrg    O - print offset:n for data share operations.
   5815  1.1  mrg    ^ - print "_co" suffix for GCN5 mnemonics
   5816  1.1  mrg    g - print "glc", if appropriate for given MEM
   5817  1.1  mrg  */
   5818  1.1  mrg 
   5819  1.1  mrg void
   5820  1.1  mrg print_operand (FILE *file, rtx x, int code)
   5821  1.1  mrg {
   5822  1.1  mrg   int xcode = x ? GET_CODE (x) : 0;
   5823  1.1  mrg   bool invert = false;
   5824  1.1  mrg   switch (code)
   5825  1.1  mrg     {
   5826  1.1  mrg       /* Instructions have the following suffixes.
   5827  1.1  mrg          If there are two suffixes, the first is the destination type,
   5828  1.1  mrg 	 and the second is the source type.
   5829  1.1  mrg 
   5830  1.1  mrg          B32 Bitfield (untyped data) 32-bit
   5831  1.1  mrg          B64 Bitfield (untyped data) 64-bit
   5832  1.1  mrg          F16 floating-point 16-bit
   5833  1.1  mrg          F32 floating-point 32-bit (IEEE 754 single-precision float)
   5834  1.1  mrg          F64 floating-point 64-bit (IEEE 754 double-precision float)
   5835  1.1  mrg          I16 signed 32-bit integer
   5836  1.1  mrg          I32 signed 32-bit integer
   5837  1.1  mrg          I64 signed 64-bit integer
   5838  1.1  mrg          U16 unsigned 32-bit integer
   5839  1.1  mrg          U32 unsigned 32-bit integer
   5840  1.1  mrg          U64 unsigned 64-bit integer  */
   5841  1.1  mrg 
   5842  1.1  mrg       /* Print operand size as untyped suffix.  */
   5843  1.1  mrg     case 'b':
   5844  1.1  mrg       {
   5845  1.1  mrg 	const char *s = "";
   5846  1.1  mrg 	machine_mode mode = GET_MODE (x);
   5847  1.1  mrg 	if (VECTOR_MODE_P (mode))
   5848  1.1  mrg 	  mode = GET_MODE_INNER (mode);
   5849  1.1  mrg 	switch (GET_MODE_SIZE (mode))
   5850  1.1  mrg 	  {
   5851  1.1  mrg 	  case 1:
   5852  1.1  mrg 	    s = "_b8";
   5853  1.1  mrg 	    break;
   5854  1.1  mrg 	  case 2:
   5855  1.1  mrg 	    s = "_b16";
   5856  1.1  mrg 	    break;
   5857  1.1  mrg 	  case 4:
   5858  1.1  mrg 	    s = "_b32";
   5859  1.1  mrg 	    break;
   5860  1.1  mrg 	  case 8:
   5861  1.1  mrg 	    s = "_b64";
   5862  1.1  mrg 	    break;
   5863  1.1  mrg 	  default:
   5864  1.1  mrg 	    output_operand_lossage ("invalid operand %%xn code");
   5865  1.1  mrg 	    return;
   5866  1.1  mrg 	  }
   5867  1.1  mrg 	fputs (s, file);
   5868  1.1  mrg       }
   5869  1.1  mrg       return;
   5870  1.1  mrg     case 'B':
   5871  1.1  mrg       {
   5872  1.1  mrg 	const char *s = "";
   5873  1.1  mrg 	machine_mode mode = GET_MODE (x);
   5874  1.1  mrg 	if (VECTOR_MODE_P (mode))
   5875  1.1  mrg 	  mode = GET_MODE_INNER (mode);
   5876  1.1  mrg 	switch (GET_MODE_SIZE (mode))
   5877  1.1  mrg 	  {
   5878  1.1  mrg 	  case 1:
   5879  1.1  mrg 	  case 2:
   5880  1.1  mrg 	  case 4:
   5881  1.1  mrg 	    s = "_b32";
   5882  1.1  mrg 	    break;
   5883  1.1  mrg 	  case 8:
   5884  1.1  mrg 	    s = "_b64";
   5885  1.1  mrg 	    break;
   5886  1.1  mrg 	  default:
   5887  1.1  mrg 	    output_operand_lossage ("invalid operand %%xn code");
   5888  1.1  mrg 	    return;
   5889  1.1  mrg 	  }
   5890  1.1  mrg 	fputs (s, file);
   5891  1.1  mrg       }
   5892  1.1  mrg       return;
   5893  1.1  mrg     case 'e':
   5894  1.1  mrg       fputs ("sext(", file);
   5895  1.1  mrg       print_operand (file, x, 0);
   5896  1.1  mrg       fputs (")", file);
   5897  1.1  mrg       return;
   5898  1.1  mrg     case 'i':
   5899  1.1  mrg     case 'I':
   5900  1.1  mrg     case 'u':
   5901  1.1  mrg     case 'U':
   5902  1.1  mrg       {
   5903  1.1  mrg 	bool signed_p = code == 'i';
   5904  1.1  mrg 	bool min32_p = code == 'I' || code == 'U';
   5905  1.1  mrg 	const char *s = "";
   5906  1.1  mrg 	machine_mode mode = GET_MODE (x);
   5907  1.1  mrg 	if (VECTOR_MODE_P (mode))
   5908  1.1  mrg 	  mode = GET_MODE_INNER (mode);
   5909  1.1  mrg 	if (mode == VOIDmode)
   5910  1.1  mrg 	  switch (GET_CODE (x))
   5911  1.1  mrg 	    {
   5912  1.1  mrg 	    case CONST_INT:
   5913  1.1  mrg 	      s = signed_p ? "_i32" : "_u32";
   5914  1.1  mrg 	      break;
   5915  1.1  mrg 	    case CONST_DOUBLE:
   5916  1.1  mrg 	      s = "_f64";
   5917  1.1  mrg 	      break;
   5918  1.1  mrg 	    default:
   5919  1.1  mrg 	      output_operand_lossage ("invalid operand %%xn code");
   5920  1.1  mrg 	      return;
   5921  1.1  mrg 	    }
   5922  1.1  mrg 	else if (FLOAT_MODE_P (mode))
   5923  1.1  mrg 	  switch (GET_MODE_SIZE (mode))
   5924  1.1  mrg 	    {
   5925  1.1  mrg 	    case 2:
   5926  1.1  mrg 	      s = "_f16";
   5927  1.1  mrg 	      break;
   5928  1.1  mrg 	    case 4:
   5929  1.1  mrg 	      s = "_f32";
   5930  1.1  mrg 	      break;
   5931  1.1  mrg 	    case 8:
   5932  1.1  mrg 	      s = "_f64";
   5933  1.1  mrg 	      break;
   5934  1.1  mrg 	    default:
   5935  1.1  mrg 	      output_operand_lossage ("invalid operand %%xn code");
   5936  1.1  mrg 	      return;
   5937  1.1  mrg 	    }
   5938  1.1  mrg 	else if (min32_p)
   5939  1.1  mrg 	  switch (GET_MODE_SIZE (mode))
   5940  1.1  mrg 	    {
   5941  1.1  mrg 	    case 1:
   5942  1.1  mrg 	    case 2:
   5943  1.1  mrg 	    case 4:
   5944  1.1  mrg 	      s = signed_p ? "_i32" : "_u32";
   5945  1.1  mrg 	      break;
   5946  1.1  mrg 	    case 8:
   5947  1.1  mrg 	      s = signed_p ? "_i64" : "_u64";
   5948  1.1  mrg 	      break;
   5949  1.1  mrg 	    default:
   5950  1.1  mrg 	      output_operand_lossage ("invalid operand %%xn code");
   5951  1.1  mrg 	      return;
   5952  1.1  mrg 	    }
   5953  1.1  mrg 	else
   5954  1.1  mrg 	  switch (GET_MODE_SIZE (mode))
   5955  1.1  mrg 	    {
   5956  1.1  mrg 	    case 1:
   5957  1.1  mrg 	      s = signed_p ? "_i8" : "_u8";
   5958  1.1  mrg 	      break;
   5959  1.1  mrg 	    case 2:
   5960  1.1  mrg 	      s = signed_p ? "_i16" : "_u16";
   5961  1.1  mrg 	      break;
   5962  1.1  mrg 	    case 4:
   5963  1.1  mrg 	      s = signed_p ? "_i32" : "_u32";
   5964  1.1  mrg 	      break;
   5965  1.1  mrg 	    case 8:
   5966  1.1  mrg 	      s = signed_p ? "_i64" : "_u64";
   5967  1.1  mrg 	      break;
   5968  1.1  mrg 	    default:
   5969  1.1  mrg 	      output_operand_lossage ("invalid operand %%xn code");
   5970  1.1  mrg 	      return;
   5971  1.1  mrg 	    }
   5972  1.1  mrg 	fputs (s, file);
   5973  1.1  mrg       }
   5974  1.1  mrg       return;
   5975  1.1  mrg       /* Print operand size as untyped suffix.  */
   5976  1.1  mrg     case 'o':
   5977  1.1  mrg       {
   5978  1.1  mrg 	const char *s = 0;
   5979  1.1  mrg 	switch (GET_MODE_SIZE (GET_MODE (x)))
   5980  1.1  mrg 	  {
   5981  1.1  mrg 	  case 1:
   5982  1.1  mrg 	    s = "_ubyte";
   5983  1.1  mrg 	    break;
   5984  1.1  mrg 	  case 2:
   5985  1.1  mrg 	    s = "_ushort";
   5986  1.1  mrg 	    break;
   5987  1.1  mrg 	  /* The following are full-vector variants.  */
   5988  1.1  mrg 	  case 64:
   5989  1.1  mrg 	    s = "_ubyte";
   5990  1.1  mrg 	    break;
   5991  1.1  mrg 	  case 128:
   5992  1.1  mrg 	    s = "_ushort";
   5993  1.1  mrg 	    break;
   5994  1.1  mrg 	  }
   5995  1.1  mrg 
   5996  1.1  mrg 	if (s)
   5997  1.1  mrg 	  {
   5998  1.1  mrg 	    fputs (s, file);
   5999  1.1  mrg 	    return;
   6000  1.1  mrg 	  }
   6001  1.1  mrg 
   6002  1.1  mrg 	/* Fall-through - the other cases for 'o' are the same as for 's'.  */
   6003  1.1  mrg 	gcc_fallthrough();
   6004  1.1  mrg       }
   6005  1.1  mrg     case 's':
   6006  1.1  mrg       {
   6007  1.1  mrg 	const char *s = "";
   6008  1.1  mrg 	switch (GET_MODE_SIZE (GET_MODE (x)))
   6009  1.1  mrg 	  {
   6010  1.1  mrg 	  case 1:
   6011  1.1  mrg 	    s = "_byte";
   6012  1.1  mrg 	    break;
   6013  1.1  mrg 	  case 2:
   6014  1.1  mrg 	    s = "_short";
   6015  1.1  mrg 	    break;
   6016  1.1  mrg 	  case 4:
   6017  1.1  mrg 	    s = "_dword";
   6018  1.1  mrg 	    break;
   6019  1.1  mrg 	  case 8:
   6020  1.1  mrg 	    s = "_dwordx2";
   6021  1.1  mrg 	    break;
   6022  1.1  mrg 	  case 12:
   6023  1.1  mrg 	    s = "_dwordx3";
   6024  1.1  mrg 	    break;
   6025  1.1  mrg 	  case 16:
   6026  1.1  mrg 	    s = "_dwordx4";
   6027  1.1  mrg 	    break;
   6028  1.1  mrg 	  case 32:
   6029  1.1  mrg 	    s = "_dwordx8";
   6030  1.1  mrg 	    break;
   6031  1.1  mrg 	  case 64:
   6032  1.1  mrg 	    s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16";
   6033  1.1  mrg 	    break;
   6034  1.1  mrg 	  /* The following are full-vector variants.  */
   6035  1.1  mrg 	  case 128:
   6036  1.1  mrg 	    s = "_short";
   6037  1.1  mrg 	    break;
   6038  1.1  mrg 	  case 256:
   6039  1.1  mrg 	    s = "_dword";
   6040  1.1  mrg 	    break;
   6041  1.1  mrg 	  case 512:
   6042  1.1  mrg 	    s = "_dwordx2";
   6043  1.1  mrg 	    break;
   6044  1.1  mrg 	  default:
   6045  1.1  mrg 	    output_operand_lossage ("invalid operand %%xn code");
   6046  1.1  mrg 	    return;
   6047  1.1  mrg 	  }
   6048  1.1  mrg 	fputs (s, file);
   6049  1.1  mrg       }
   6050  1.1  mrg       return;
   6051  1.1  mrg     case 'A':
   6052  1.1  mrg       if (xcode != MEM)
   6053  1.1  mrg 	{
   6054  1.1  mrg 	  output_operand_lossage ("invalid %%xn code");
   6055  1.1  mrg 	  return;
   6056  1.1  mrg 	}
   6057  1.1  mrg       print_operand_address (file, x);
   6058  1.1  mrg       return;
   6059  1.1  mrg     case 'O':
   6060  1.1  mrg       {
   6061  1.1  mrg 	if (xcode != MEM)
   6062  1.1  mrg 	  {
   6063  1.1  mrg 	    output_operand_lossage ("invalid %%xn code");
   6064  1.1  mrg 	    return;
   6065  1.1  mrg 	  }
   6066  1.1  mrg 	if (AS_GDS_P (MEM_ADDR_SPACE (x)))
   6067  1.1  mrg 	  fprintf (file, " gds");
   6068  1.1  mrg 
   6069  1.1  mrg 	rtx x0 = XEXP (x, 0);
   6070  1.1  mrg 	if (AS_GLOBAL_P (MEM_ADDR_SPACE (x)))
   6071  1.1  mrg 	  {
   6072  1.1  mrg 	    gcc_assert (TARGET_GCN5_PLUS);
   6073  1.1  mrg 
   6074  1.1  mrg 	    fprintf (file, ", ");
   6075  1.1  mrg 
   6076  1.1  mrg 	    rtx base = x0;
   6077  1.1  mrg 	    rtx const_offset = NULL_RTX;
   6078  1.1  mrg 
   6079  1.1  mrg 	    if (GET_CODE (base) == PLUS)
   6080  1.1  mrg 	      {
   6081  1.1  mrg 		rtx offset = XEXP (x0, 1);
   6082  1.1  mrg 		base = XEXP (x0, 0);
   6083  1.1  mrg 
   6084  1.1  mrg 		if (GET_CODE (base) == PLUS)
   6085  1.1  mrg 		  /* (SGPR + VGPR) + CONST  */
   6086  1.1  mrg 		  /* Ignore the VGPR offset for this operand.  */
   6087  1.1  mrg 		  base = XEXP (base, 0);
   6088  1.1  mrg 
   6089  1.1  mrg 		if (CONST_INT_P (offset))
   6090  1.1  mrg 		  const_offset = XEXP (x0, 1);
   6091  1.1  mrg 		else if (REG_P (offset))
   6092  1.1  mrg 		  /* SGPR + VGPR  */
   6093  1.1  mrg 		  /* Ignore the VGPR offset for this operand.  */
   6094  1.1  mrg 		  ;
   6095  1.1  mrg 		else
   6096  1.1  mrg 		  output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
   6097  1.1  mrg 	      }
   6098  1.1  mrg 
   6099  1.1  mrg 	    if (REG_P (base))
   6100  1.1  mrg 	      {
   6101  1.1  mrg 		if (VGPR_REGNO_P (REGNO (base)))
   6102  1.1  mrg 		  /* The VGPR address is specified in the %A operand.  */
   6103  1.1  mrg 		  fprintf (file, "off");
   6104  1.1  mrg 		else if (SGPR_REGNO_P (REGNO (base)))
   6105  1.1  mrg 		  print_reg (file, base);
   6106  1.1  mrg 		else
   6107  1.1  mrg 		  output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
   6108  1.1  mrg 	      }
   6109  1.1  mrg 	    else
   6110  1.1  mrg 	      output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
   6111  1.1  mrg 
   6112  1.1  mrg 	    if (const_offset != NULL_RTX)
   6113  1.1  mrg 	      fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC,
   6114  1.1  mrg 		       INTVAL (const_offset));
   6115  1.1  mrg 
   6116  1.1  mrg 	    return;
   6117  1.1  mrg 	  }
   6118  1.1  mrg 
   6119  1.1  mrg 	if (GET_CODE (x0) == REG)
   6120  1.1  mrg 	  return;
   6121  1.1  mrg 	if (GET_CODE (x0) != PLUS)
   6122  1.1  mrg 	  {
   6123  1.1  mrg 	    output_operand_lossage ("invalid %%xn code");
   6124  1.1  mrg 	    return;
   6125  1.1  mrg 	  }
   6126  1.1  mrg 	rtx val = XEXP (x0, 1);
   6127  1.1  mrg 	if (GET_CODE (val) == CONST_VECTOR)
   6128  1.1  mrg 	  val = CONST_VECTOR_ELT (val, 0);
   6129  1.1  mrg 	if (GET_CODE (val) != CONST_INT)
   6130  1.1  mrg 	  {
   6131  1.1  mrg 	    output_operand_lossage ("invalid %%xn code");
   6132  1.1  mrg 	    return;
   6133  1.1  mrg 	  }
   6134  1.1  mrg 	fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (val));
   6135  1.1  mrg 
   6136  1.1  mrg       }
   6137  1.1  mrg       return;
   6138  1.1  mrg     case 'c':
   6139  1.1  mrg       invert = true;
   6140  1.1  mrg       /* Fall through.  */
   6141  1.1  mrg     case 'C':
   6142  1.1  mrg       {
   6143  1.1  mrg 	const char *s;
   6144  1.1  mrg 	bool num = false;
   6145  1.1  mrg 	if ((xcode != EQ && xcode != NE) || !REG_P (XEXP (x, 0)))
   6146  1.1  mrg 	  {
   6147  1.1  mrg 	    output_operand_lossage ("invalid %%xn code");
   6148  1.1  mrg 	    return;
   6149  1.1  mrg 	  }
   6150  1.1  mrg 	switch (REGNO (XEXP (x, 0)))
   6151  1.1  mrg 	  {
   6152  1.1  mrg 	  case VCC_REG:
   6153  1.1  mrg 	  case VCCZ_REG:
   6154  1.1  mrg 	    s = "_vcc";
   6155  1.1  mrg 	    break;
   6156  1.1  mrg 	  case SCC_REG:
   6157  1.1  mrg 	    /* For some reason llvm-mc insists on scc0 instead of sccz.  */
   6158  1.1  mrg 	    num = true;
   6159  1.1  mrg 	    s = "_scc";
   6160  1.1  mrg 	    break;
   6161  1.1  mrg 	  case EXECZ_REG:
   6162  1.1  mrg 	    s = "_exec";
   6163  1.1  mrg 	    break;
   6164  1.1  mrg 	  default:
   6165  1.1  mrg 	    output_operand_lossage ("invalid %%xn code");
   6166  1.1  mrg 	    return;
   6167  1.1  mrg 	  }
   6168  1.1  mrg 	fputs (s, file);
   6169  1.1  mrg 	if (xcode == (invert ? NE : EQ))
   6170  1.1  mrg 	  fputc (num ? '0' : 'z', file);
   6171  1.1  mrg 	else
   6172  1.1  mrg 	  fputs (num ? "1" : "nz", file);
   6173  1.1  mrg 	return;
   6174  1.1  mrg       }
   6175  1.1  mrg     case 'D':
   6176  1.1  mrg       {
   6177  1.1  mrg 	const char *s;
   6178  1.1  mrg 	bool cmp_signed = false;
   6179  1.1  mrg 	switch (xcode)
   6180  1.1  mrg 	  {
   6181  1.1  mrg 	  case EQ:
   6182  1.1  mrg 	    s = "_eq_";
   6183  1.1  mrg 	    break;
   6184  1.1  mrg 	  case NE:
   6185  1.1  mrg 	    s = "_lg_";
   6186  1.1  mrg 	    break;
   6187  1.1  mrg 	  case LT:
   6188  1.1  mrg 	    s = "_lt_";
   6189  1.1  mrg 	    cmp_signed = true;
   6190  1.1  mrg 	    break;
   6191  1.1  mrg 	  case LE:
   6192  1.1  mrg 	    s = "_le_";
   6193  1.1  mrg 	    cmp_signed = true;
   6194  1.1  mrg 	    break;
   6195  1.1  mrg 	  case GT:
   6196  1.1  mrg 	    s = "_gt_";
   6197  1.1  mrg 	    cmp_signed = true;
   6198  1.1  mrg 	    break;
   6199  1.1  mrg 	  case GE:
   6200  1.1  mrg 	    s = "_ge_";
   6201  1.1  mrg 	    cmp_signed = true;
   6202  1.1  mrg 	    break;
   6203  1.1  mrg 	  case LTU:
   6204  1.1  mrg 	    s = "_lt_";
   6205  1.1  mrg 	    break;
   6206  1.1  mrg 	  case LEU:
   6207  1.1  mrg 	    s = "_le_";
   6208  1.1  mrg 	    break;
   6209  1.1  mrg 	  case GTU:
   6210  1.1  mrg 	    s = "_gt_";
   6211  1.1  mrg 	    break;
   6212  1.1  mrg 	  case GEU:
   6213  1.1  mrg 	    s = "_ge_";
   6214  1.1  mrg 	    break;
   6215  1.1  mrg 	  default:
   6216  1.1  mrg 	    output_operand_lossage ("invalid %%xn code");
   6217  1.1  mrg 	    return;
   6218  1.1  mrg 	  }
   6219  1.1  mrg 	fputs (s, file);
   6220  1.1  mrg 	fputc (cmp_signed ? 'i' : 'u', file);
   6221  1.1  mrg 
   6222  1.1  mrg 	machine_mode mode = GET_MODE (XEXP (x, 0));
   6223  1.1  mrg 
   6224  1.1  mrg 	if (mode == VOIDmode)
   6225  1.1  mrg 	  mode = GET_MODE (XEXP (x, 1));
   6226  1.1  mrg 
   6227  1.1  mrg 	/* If both sides are constants, then assume the instruction is in
   6228  1.1  mrg 	   SImode since s_cmp can only do integer compares.  */
   6229  1.1  mrg 	if (mode == VOIDmode)
   6230  1.1  mrg 	  mode = SImode;
   6231  1.1  mrg 
   6232  1.1  mrg 	switch (GET_MODE_SIZE (mode))
   6233  1.1  mrg 	  {
   6234  1.1  mrg 	  case 4:
   6235  1.1  mrg 	    s = "32";
   6236  1.1  mrg 	    break;
   6237  1.1  mrg 	  case 8:
   6238  1.1  mrg 	    s = "64";
   6239  1.1  mrg 	    break;
   6240  1.1  mrg 	  default:
   6241  1.1  mrg 	    output_operand_lossage ("invalid operand %%xn code");
   6242  1.1  mrg 	    return;
   6243  1.1  mrg 	  }
   6244  1.1  mrg 	fputs (s, file);
   6245  1.1  mrg 	return;
   6246  1.1  mrg       }
   6247  1.1  mrg     case 'E':
   6248  1.1  mrg       {
   6249  1.1  mrg 	const char *s;
   6250  1.1  mrg 	bool cmp_signed = false;
   6251  1.1  mrg 	machine_mode mode = GET_MODE (XEXP (x, 0));
   6252  1.1  mrg 
   6253  1.1  mrg 	if (mode == VOIDmode)
   6254  1.1  mrg 	  mode = GET_MODE (XEXP (x, 1));
   6255  1.1  mrg 
   6256  1.1  mrg 	/* If both sides are constants, assume the instruction is in SFmode
   6257  1.1  mrg 	   if either operand is floating point, otherwise assume SImode.  */
   6258  1.1  mrg 	if (mode == VOIDmode)
   6259  1.1  mrg 	  {
   6260  1.1  mrg 	    if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
   6261  1.1  mrg 		|| GET_CODE (XEXP (x, 1)) == CONST_DOUBLE)
   6262  1.1  mrg 	      mode = SFmode;
   6263  1.1  mrg 	    else
   6264  1.1  mrg 	      mode = SImode;
   6265  1.1  mrg 	  }
   6266  1.1  mrg 
   6267  1.1  mrg 	/* Use the same format code for vector comparisons.  */
   6268  1.1  mrg 	if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
   6269  1.1  mrg 	    || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   6270  1.1  mrg 	  mode = GET_MODE_INNER (mode);
   6271  1.1  mrg 
   6272  1.1  mrg 	bool float_p = GET_MODE_CLASS (mode) == MODE_FLOAT;
   6273  1.1  mrg 
   6274  1.1  mrg 	switch (xcode)
   6275  1.1  mrg 	  {
   6276  1.1  mrg 	  case EQ:
   6277  1.1  mrg 	    s = "_eq_";
   6278  1.1  mrg 	    break;
   6279  1.1  mrg 	  case NE:
   6280  1.1  mrg 	    s = float_p ? "_neq_" : "_ne_";
   6281  1.1  mrg 	    break;
   6282  1.1  mrg 	  case LT:
   6283  1.1  mrg 	    s = "_lt_";
   6284  1.1  mrg 	    cmp_signed = true;
   6285  1.1  mrg 	    break;
   6286  1.1  mrg 	  case LE:
   6287  1.1  mrg 	    s = "_le_";
   6288  1.1  mrg 	    cmp_signed = true;
   6289  1.1  mrg 	    break;
   6290  1.1  mrg 	  case GT:
   6291  1.1  mrg 	    s = "_gt_";
   6292  1.1  mrg 	    cmp_signed = true;
   6293  1.1  mrg 	    break;
   6294  1.1  mrg 	  case GE:
   6295  1.1  mrg 	    s = "_ge_";
   6296  1.1  mrg 	    cmp_signed = true;
   6297  1.1  mrg 	    break;
   6298  1.1  mrg 	  case LTU:
   6299  1.1  mrg 	    s = "_lt_";
   6300  1.1  mrg 	    break;
   6301  1.1  mrg 	  case LEU:
   6302  1.1  mrg 	    s = "_le_";
   6303  1.1  mrg 	    break;
   6304  1.1  mrg 	  case GTU:
   6305  1.1  mrg 	    s = "_gt_";
   6306  1.1  mrg 	    break;
   6307  1.1  mrg 	  case GEU:
   6308  1.1  mrg 	    s = "_ge_";
   6309  1.1  mrg 	    break;
   6310  1.1  mrg 	  case ORDERED:
   6311  1.1  mrg 	    s = "_o_";
   6312  1.1  mrg 	    break;
   6313  1.1  mrg 	  case UNORDERED:
   6314  1.1  mrg 	    s = "_u_";
   6315  1.1  mrg 	    break;
   6316  1.1  mrg 	  case UNEQ:
   6317  1.1  mrg 	    s = "_nlg_";
   6318  1.1  mrg 	    break;
   6319  1.1  mrg 	  case UNGE:
   6320  1.1  mrg 	    s = "_nlt_";
   6321  1.1  mrg 	    break;
   6322  1.1  mrg 	  case UNGT:
   6323  1.1  mrg 	    s = "_nle_";
   6324  1.1  mrg 	    break;
   6325  1.1  mrg 	  case UNLE:
   6326  1.1  mrg 	    s = "_ngt_";
   6327  1.1  mrg 	    break;
   6328  1.1  mrg 	  case UNLT:
   6329  1.1  mrg 	    s = "_nge_";
   6330  1.1  mrg 	    break;
   6331  1.1  mrg 	  case LTGT:
   6332  1.1  mrg 	    s = "_lg_";
   6333  1.1  mrg 	    break;
   6334  1.1  mrg 	  default:
   6335  1.1  mrg 	    output_operand_lossage ("invalid %%xn code");
   6336  1.1  mrg 	    return;
   6337  1.1  mrg 	  }
   6338  1.1  mrg 	fputs (s, file);
   6339  1.1  mrg 	fputc (float_p ? 'f' : cmp_signed ? 'i' : 'u', file);
   6340  1.1  mrg 
   6341  1.1  mrg 	switch (GET_MODE_SIZE (mode))
   6342  1.1  mrg 	  {
   6343  1.1  mrg 	  case 1:
   6344  1.1  mrg 	    output_operand_lossage ("operand %%xn code invalid for QImode");
   6345  1.1  mrg 	    return;
   6346  1.1  mrg 	  case 2:
   6347  1.1  mrg 	    s = "16";
   6348  1.1  mrg 	    break;
   6349  1.1  mrg 	  case 4:
   6350  1.1  mrg 	    s = "32";
   6351  1.1  mrg 	    break;
   6352  1.1  mrg 	  case 8:
   6353  1.1  mrg 	    s = "64";
   6354  1.1  mrg 	    break;
   6355  1.1  mrg 	  default:
   6356  1.1  mrg 	    output_operand_lossage ("invalid operand %%xn code");
   6357  1.1  mrg 	    return;
   6358  1.1  mrg 	  }
   6359  1.1  mrg 	fputs (s, file);
   6360  1.1  mrg 	return;
   6361  1.1  mrg       }
   6362  1.1  mrg     case 'L':
   6363  1.1  mrg       print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0);
   6364  1.1  mrg       return;
   6365  1.1  mrg     case 'H':
   6366  1.1  mrg       print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
   6367  1.1  mrg       return;
   6368  1.1  mrg     case 'R':
   6369  1.1  mrg       /* Print a scalar register number as an integer.  Temporary hack.  */
   6370  1.1  mrg       gcc_assert (REG_P (x));
   6371  1.1  mrg       fprintf (file, "%u", (int) REGNO (x));
   6372  1.1  mrg       return;
   6373  1.1  mrg     case 'V':
   6374  1.1  mrg       /* Print a vector register number as an integer.  Temporary hack.  */
   6375  1.1  mrg       gcc_assert (REG_P (x));
   6376  1.1  mrg       fprintf (file, "%u", (int) REGNO (x) - FIRST_VGPR_REG);
   6377  1.1  mrg       return;
   6378  1.1  mrg     case 0:
   6379  1.1  mrg       if (xcode == REG)
   6380  1.1  mrg 	print_reg (file, x);
   6381  1.1  mrg       else if (xcode == MEM)
   6382  1.1  mrg 	output_address (GET_MODE (x), x);
   6383  1.1  mrg       else if (xcode == CONST_INT)
   6384  1.1  mrg 	fprintf (file, "%i", (int) INTVAL (x));
   6385  1.1  mrg       else if (xcode == CONST_VECTOR)
   6386  1.1  mrg 	print_operand (file, CONST_VECTOR_ELT (x, 0), code);
   6387  1.1  mrg       else if (xcode == CONST_DOUBLE)
   6388  1.1  mrg 	{
   6389  1.1  mrg 	  const char *str;
   6390  1.1  mrg 	  switch (gcn_inline_fp_constant_p (x, false))
   6391  1.1  mrg 	    {
   6392  1.1  mrg 	    case 240:
   6393  1.1  mrg 	      str = "0.5";
   6394  1.1  mrg 	      break;
   6395  1.1  mrg 	    case 241:
   6396  1.1  mrg 	      str = "-0.5";
   6397  1.1  mrg 	      break;
   6398  1.1  mrg 	    case 242:
   6399  1.1  mrg 	      str = "1.0";
   6400  1.1  mrg 	      break;
   6401  1.1  mrg 	    case 243:
   6402  1.1  mrg 	      str = "-1.0";
   6403  1.1  mrg 	      break;
   6404  1.1  mrg 	    case 244:
   6405  1.1  mrg 	      str = "2.0";
   6406  1.1  mrg 	      break;
   6407  1.1  mrg 	    case 245:
   6408  1.1  mrg 	      str = "-2.0";
   6409  1.1  mrg 	      break;
   6410  1.1  mrg 	    case 246:
   6411  1.1  mrg 	      str = "4.0";
   6412  1.1  mrg 	      break;
   6413  1.1  mrg 	    case 247:
   6414  1.1  mrg 	      str = "-4.0";
   6415  1.1  mrg 	      break;
   6416  1.1  mrg 	    case 248:
   6417  1.1  mrg 	      str = "1/pi";
   6418  1.1  mrg 	      break;
   6419  1.1  mrg 	    default:
   6420  1.1  mrg 	      rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
   6421  1.1  mrg 					    ? DImode : SImode,
   6422  1.1  mrg 					    x, GET_MODE (x), 0);
   6423  1.1  mrg 	      if (x)
   6424  1.1  mrg 		print_operand (file, ix, code);
   6425  1.1  mrg 	      else
   6426  1.1  mrg 		output_operand_lossage ("invalid fp constant");
   6427  1.1  mrg 	      return;
   6428  1.1  mrg 	      break;
   6429  1.1  mrg 	    }
   6430  1.1  mrg 	  fprintf (file, str);
   6431  1.1  mrg 	  return;
   6432  1.1  mrg 	}
   6433  1.1  mrg       else
   6434  1.1  mrg 	output_addr_const (file, x);
   6435  1.1  mrg       return;
   6436  1.1  mrg     case '^':
   6437  1.1  mrg       if (TARGET_GCN5_PLUS)
   6438  1.1  mrg 	fputs ("_co", file);
   6439  1.1  mrg       return;
   6440  1.1  mrg     case 'g':
   6441  1.1  mrg       gcc_assert (xcode == MEM);
   6442  1.1  mrg       if (MEM_VOLATILE_P (x))
   6443  1.1  mrg 	fputs (" glc", file);
   6444  1.1  mrg       return;
   6445  1.1  mrg     default:
   6446  1.1  mrg       output_operand_lossage ("invalid %%xn code");
   6447  1.1  mrg     }
   6448  1.1  mrg   gcc_unreachable ();
   6449  1.1  mrg }
   6450  1.1  mrg 
   6451  1.1  mrg /* Implement DBX_REGISTER_NUMBER macro.
   6452  1.1  mrg 
   6453  1.1  mrg    Return the DWARF register number that corresponds to the GCC internal
   6454  1.1  mrg    REGNO.  */
   6455  1.1  mrg 
   6456  1.1  mrg unsigned int
   6457  1.1  mrg gcn_dwarf_register_number (unsigned int regno)
   6458  1.1  mrg {
   6459  1.1  mrg   /* Registers defined in DWARF.  */
   6460  1.1  mrg   if (regno == EXEC_LO_REG)
   6461  1.1  mrg     return 17;
   6462  1.1  mrg   /* We need to use a more complex DWARF expression for this
   6463  1.1  mrg   else if (regno == EXEC_HI_REG)
   6464  1.1  mrg     return 17; */
   6465  1.1  mrg   else if (regno == VCC_LO_REG)
   6466  1.1  mrg     return 768;
   6467  1.1  mrg   /* We need to use a more complex DWARF expression for this
   6468  1.1  mrg   else if (regno == VCC_HI_REG)
   6469  1.1  mrg     return 768;  */
   6470  1.1  mrg   else if (regno == SCC_REG)
   6471  1.1  mrg     return 128;
   6472  1.1  mrg   else if (regno == DWARF_LINK_REGISTER)
   6473  1.1  mrg     return 16;
   6474  1.1  mrg   else if (SGPR_REGNO_P (regno))
   6475  1.1  mrg     {
   6476  1.1  mrg       if (regno - FIRST_SGPR_REG < 64)
   6477  1.1  mrg 	return (regno - FIRST_SGPR_REG + 32);
   6478  1.1  mrg       else
   6479  1.1  mrg 	return (regno - FIRST_SGPR_REG + 1024);
   6480  1.1  mrg     }
   6481  1.1  mrg   else if (VGPR_REGNO_P (regno))
   6482  1.1  mrg     return (regno - FIRST_VGPR_REG + 2560);
   6483  1.1  mrg 
   6484  1.1  mrg   /* Otherwise, there's nothing sensible to do.  */
   6485  1.1  mrg   return regno + 100000;
   6486  1.1  mrg }
   6487  1.1  mrg 
   6488  1.1  mrg /* Implement TARGET_DWARF_REGISTER_SPAN.
   6489  1.1  mrg 
   6490  1.1  mrg    DImode and Vector DImode require additional registers.  */
   6491  1.1  mrg 
   6492  1.1  mrg static rtx
   6493  1.1  mrg gcn_dwarf_register_span (rtx rtl)
   6494  1.1  mrg {
   6495  1.1  mrg   machine_mode mode = GET_MODE (rtl);
   6496  1.1  mrg 
   6497  1.1  mrg   if (VECTOR_MODE_P (mode))
   6498  1.1  mrg     mode = GET_MODE_INNER (mode);
   6499  1.1  mrg 
   6500  1.1  mrg   if (GET_MODE_SIZE (mode) != 8)
   6501  1.1  mrg     return NULL_RTX;
   6502  1.1  mrg 
   6503  1.1  mrg   unsigned regno = REGNO (rtl);
   6504  1.1  mrg 
   6505  1.1  mrg   if (regno == DWARF_LINK_REGISTER)
   6506  1.1  mrg     return NULL_RTX;
   6507  1.1  mrg 
   6508  1.1  mrg   rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
   6509  1.1  mrg   XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
   6510  1.1  mrg   XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
   6511  1.1  mrg 
   6512  1.1  mrg   return p;
   6513  1.1  mrg }
   6514  1.1  mrg 
   6515  1.1  mrg /* }}}  */
   6516  1.1  mrg /* {{{ TARGET hook overrides.  */
   6517  1.1  mrg 
   6518  1.1  mrg #undef  TARGET_ADDR_SPACE_ADDRESS_MODE
   6519  1.1  mrg #define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
   6520  1.1  mrg #undef  TARGET_ADDR_SPACE_DEBUG
   6521  1.1  mrg #define TARGET_ADDR_SPACE_DEBUG gcn_addr_space_debug
   6522  1.1  mrg #undef  TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
   6523  1.1  mrg #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
   6524  1.1  mrg   gcn_addr_space_legitimate_address_p
   6525  1.1  mrg #undef  TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
   6526  1.1  mrg #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address
   6527  1.1  mrg #undef  TARGET_ADDR_SPACE_POINTER_MODE
   6528  1.1  mrg #define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode
   6529  1.1  mrg #undef  TARGET_ADDR_SPACE_SUBSET_P
   6530  1.1  mrg #define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p
   6531  1.1  mrg #undef  TARGET_ADDR_SPACE_CONVERT
   6532  1.1  mrg #define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert
   6533  1.1  mrg #undef  TARGET_ARG_PARTIAL_BYTES
   6534  1.1  mrg #define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes
   6535  1.1  mrg #undef  TARGET_ASM_ALIGNED_DI_OP
   6536  1.1  mrg #define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t"
   6537  1.1  mrg #undef  TARGET_ASM_FILE_START
   6538  1.1  mrg #define TARGET_ASM_FILE_START output_file_start
   6539  1.1  mrg #undef  TARGET_ASM_FUNCTION_PROLOGUE
   6540  1.1  mrg #define TARGET_ASM_FUNCTION_PROLOGUE gcn_target_asm_function_prologue
   6541  1.1  mrg #undef  TARGET_ASM_SELECT_SECTION
   6542  1.1  mrg #define TARGET_ASM_SELECT_SECTION gcn_asm_select_section
   6543  1.1  mrg #undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
   6544  1.1  mrg #define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
   6545  1.1  mrg #undef  TARGET_ATTRIBUTE_TABLE
   6546  1.1  mrg #define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
   6547  1.1  mrg #undef  TARGET_BUILTIN_DECL
   6548  1.1  mrg #define TARGET_BUILTIN_DECL gcn_builtin_decl
   6549  1.1  mrg #undef  TARGET_CAN_CHANGE_MODE_CLASS
   6550  1.1  mrg #define TARGET_CAN_CHANGE_MODE_CLASS gcn_can_change_mode_class
   6551  1.1  mrg #undef  TARGET_CAN_ELIMINATE
   6552  1.1  mrg #define TARGET_CAN_ELIMINATE gcn_can_eliminate_p
   6553  1.1  mrg #undef  TARGET_CANNOT_COPY_INSN_P
   6554  1.1  mrg #define TARGET_CANNOT_COPY_INSN_P gcn_cannot_copy_insn_p
   6555  1.1  mrg #undef  TARGET_CLASS_LIKELY_SPILLED_P
   6556  1.1  mrg #define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p
   6557  1.1  mrg #undef  TARGET_CLASS_MAX_NREGS
   6558  1.1  mrg #define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs
   6559  1.1  mrg #undef  TARGET_CONDITIONAL_REGISTER_USAGE
   6560  1.1  mrg #define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage
   6561  1.1  mrg #undef  TARGET_CONSTANT_ALIGNMENT
   6562  1.1  mrg #define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment
   6563  1.1  mrg #undef  TARGET_DEBUG_UNWIND_INFO
   6564  1.1  mrg #define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info
   6565  1.1  mrg #undef  TARGET_DWARF_REGISTER_SPAN
   6566  1.1  mrg #define TARGET_DWARF_REGISTER_SPAN gcn_dwarf_register_span
   6567  1.1  mrg #undef  TARGET_EMUTLS_VAR_INIT
   6568  1.1  mrg #define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
   6569  1.1  mrg #undef  TARGET_EXPAND_BUILTIN
   6570  1.1  mrg #define TARGET_EXPAND_BUILTIN gcn_expand_builtin
   6571  1.1  mrg #undef  TARGET_FRAME_POINTER_REQUIRED
   6572  1.1  mrg #define TARGET_FRAME_POINTER_REQUIRED gcn_frame_pointer_rqd
   6573  1.1  mrg #undef  TARGET_FUNCTION_ARG
   6574  1.1  mrg #undef  TARGET_FUNCTION_ARG_ADVANCE
   6575  1.1  mrg #define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
   6576  1.1  mrg #define TARGET_FUNCTION_ARG gcn_function_arg
   6577  1.1  mrg #undef  TARGET_FUNCTION_VALUE
   6578  1.1  mrg #define TARGET_FUNCTION_VALUE gcn_function_value
   6579  1.1  mrg #undef  TARGET_FUNCTION_VALUE_REGNO_P
   6580  1.1  mrg #define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
   6581  1.1  mrg #undef  TARGET_GIMPLIFY_VA_ARG_EXPR
   6582  1.1  mrg #define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr
   6583  1.1  mrg #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
   6584  1.1  mrg #define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa
   6585  1.1  mrg #undef  TARGET_GOACC_ADJUST_PRIVATE_DECL
   6586  1.1  mrg #define TARGET_GOACC_ADJUST_PRIVATE_DECL gcn_goacc_adjust_private_decl
   6587  1.1  mrg #undef  TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD
   6588  1.1  mrg #define TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD \
   6589  1.1  mrg   gcn_goacc_create_worker_broadcast_record
   6590  1.1  mrg #undef  TARGET_GOACC_FORK_JOIN
   6591  1.1  mrg #define TARGET_GOACC_FORK_JOIN gcn_fork_join
   6592  1.1  mrg #undef  TARGET_GOACC_REDUCTION
   6593  1.1  mrg #define TARGET_GOACC_REDUCTION gcn_goacc_reduction
   6594  1.1  mrg #undef  TARGET_GOACC_VALIDATE_DIMS
   6595  1.1  mrg #define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims
   6596  1.1  mrg #undef  TARGET_GOACC_SHARED_MEM_LAYOUT
   6597  1.1  mrg #define TARGET_GOACC_SHARED_MEM_LAYOUT gcn_shared_mem_layout
   6598  1.1  mrg #undef  TARGET_HARD_REGNO_MODE_OK
   6599  1.1  mrg #define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok
   6600  1.1  mrg #undef  TARGET_HARD_REGNO_NREGS
   6601  1.1  mrg #define TARGET_HARD_REGNO_NREGS gcn_hard_regno_nregs
   6602  1.1  mrg #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
   6603  1.1  mrg #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
   6604  1.1  mrg #undef  TARGET_INIT_BUILTINS
   6605  1.1  mrg #define TARGET_INIT_BUILTINS gcn_init_builtins
   6606  1.1  mrg #undef  TARGET_INIT_LIBFUNCS
   6607  1.1  mrg #define TARGET_INIT_LIBFUNCS gcn_init_libfuncs
   6608  1.1  mrg #undef  TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
   6609  1.1  mrg #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
   6610  1.1  mrg   gcn_ira_change_pseudo_allocno_class
   6611  1.1  mrg #undef  TARGET_LEGITIMATE_CONSTANT_P
   6612  1.1  mrg #define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
   6613  1.1  mrg #undef  TARGET_LRA_P
   6614  1.1  mrg #define TARGET_LRA_P hook_bool_void_true
   6615  1.1  mrg #undef  TARGET_MACHINE_DEPENDENT_REORG
   6616  1.1  mrg #define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
   6617  1.1  mrg #undef  TARGET_MEMORY_MOVE_COST
   6618  1.1  mrg #define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost
   6619  1.1  mrg #undef  TARGET_MODES_TIEABLE_P
   6620  1.1  mrg #define TARGET_MODES_TIEABLE_P gcn_modes_tieable_p
   6621  1.1  mrg #undef  TARGET_OPTION_OVERRIDE
   6622  1.1  mrg #define TARGET_OPTION_OVERRIDE gcn_option_override
   6623  1.1  mrg #undef  TARGET_PRETEND_OUTGOING_VARARGS_NAMED
   6624  1.1  mrg #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED \
   6625  1.1  mrg   gcn_pretend_outgoing_varargs_named
   6626  1.1  mrg #undef  TARGET_PROMOTE_FUNCTION_MODE
   6627  1.1  mrg #define TARGET_PROMOTE_FUNCTION_MODE gcn_promote_function_mode
   6628  1.1  mrg #undef  TARGET_REGISTER_MOVE_COST
   6629  1.1  mrg #define TARGET_REGISTER_MOVE_COST gcn_register_move_cost
   6630  1.1  mrg #undef  TARGET_RETURN_IN_MEMORY
   6631  1.1  mrg #define TARGET_RETURN_IN_MEMORY gcn_return_in_memory
   6632  1.1  mrg #undef  TARGET_RTX_COSTS
   6633  1.1  mrg #define TARGET_RTX_COSTS gcn_rtx_costs
   6634  1.1  mrg #undef  TARGET_SECONDARY_RELOAD
   6635  1.1  mrg #define TARGET_SECONDARY_RELOAD gcn_secondary_reload
   6636  1.1  mrg #undef  TARGET_SECTION_TYPE_FLAGS
   6637  1.1  mrg #define TARGET_SECTION_TYPE_FLAGS gcn_section_type_flags
   6638  1.1  mrg #undef  TARGET_SCALAR_MODE_SUPPORTED_P
   6639  1.1  mrg #define TARGET_SCALAR_MODE_SUPPORTED_P gcn_scalar_mode_supported_p
   6640  1.1  mrg #undef  TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
   6641  1.1  mrg #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
   6642  1.1  mrg   gcn_small_register_classes_for_mode_p
   6643  1.1  mrg #undef  TARGET_SPILL_CLASS
   6644  1.1  mrg #define TARGET_SPILL_CLASS gcn_spill_class
   6645  1.1  mrg #undef  TARGET_STRICT_ARGUMENT_NAMING
   6646  1.1  mrg #define TARGET_STRICT_ARGUMENT_NAMING gcn_strict_argument_naming
   6647  1.1  mrg #undef  TARGET_TRAMPOLINE_INIT
   6648  1.1  mrg #define TARGET_TRAMPOLINE_INIT gcn_trampoline_init
   6649  1.1  mrg #undef  TARGET_TRULY_NOOP_TRUNCATION
   6650  1.1  mrg #define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
   6651  1.1  mrg #undef  TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
   6652  1.1  mrg #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
   6653  1.1  mrg #undef  TARGET_VECTORIZE_GET_MASK_MODE
   6654  1.1  mrg #define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
   6655  1.1  mrg #undef  TARGET_VECTORIZE_PREFERRED_SIMD_MODE
   6656  1.1  mrg #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
   6657  1.1  mrg #undef  TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
   6658  1.1  mrg #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
   6659  1.1  mrg   gcn_preferred_vector_alignment
   6660  1.1  mrg #undef  TARGET_VECTORIZE_RELATED_MODE
   6661  1.1  mrg #define TARGET_VECTORIZE_RELATED_MODE gcn_related_vector_mode
   6662  1.1  mrg #undef  TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
   6663  1.1  mrg #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
   6664  1.1  mrg   gcn_vectorize_support_vector_misalignment
   6665  1.1  mrg #undef  TARGET_VECTORIZE_VEC_PERM_CONST
   6666  1.1  mrg #define TARGET_VECTORIZE_VEC_PERM_CONST gcn_vectorize_vec_perm_const
   6667  1.1  mrg #undef  TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
   6668  1.1  mrg #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
   6669  1.1  mrg   gcn_vector_alignment_reachable
   6670  1.1  mrg #undef  TARGET_VECTOR_MODE_SUPPORTED_P
   6671  1.1  mrg #define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
   6672  1.1  mrg 
   6673  1.1  mrg struct gcc_target targetm = TARGET_INITIALIZER;
   6674  1.1  mrg 
   6675  1.1  mrg #include "gt-gcn.h"
   6676  1.1  mrg /* }}}  */
   6677