1 1.1 mrg /* Copyright (C) 2016-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is free software; you can redistribute it and/or modify it under 4 1.1 mrg the terms of the GNU General Public License as published by the Free 5 1.1 mrg Software Foundation; either version 3 of the License, or (at your option) 6 1.1 mrg any later version. 7 1.1 mrg 8 1.1 mrg This file is distributed in the hope that it will be useful, but WITHOUT 9 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 1.1 mrg for more details. 12 1.1 mrg 13 1.1 mrg You should have received a copy of the GNU General Public License 14 1.1 mrg along with GCC; see the file COPYING3. If not see 15 1.1 mrg <http://www.gnu.org/licenses/>. */ 16 1.1 mrg 17 1.1 mrg /* {{{ Includes. */ 18 1.1 mrg 19 1.1 mrg /* We want GET_MODE_SIZE et al to return integers, please. */ 20 1.1 mrg #define IN_TARGET_CODE 1 21 1.1 mrg 22 1.1 mrg #include "config.h" 23 1.1 mrg #include "system.h" 24 1.1 mrg #include "coretypes.h" 25 1.1 mrg #include "backend.h" 26 1.1 mrg #include "target.h" 27 1.1 mrg #include "memmodel.h" 28 1.1 mrg #include "rtl.h" 29 1.1 mrg #include "tree.h" 30 1.1 mrg #include "df.h" 31 1.1 mrg #include "tm_p.h" 32 1.1 mrg #include "stringpool.h" 33 1.1 mrg #include "optabs.h" 34 1.1 mrg #include "regs.h" 35 1.1 mrg #include "emit-rtl.h" 36 1.1 mrg #include "recog.h" 37 1.1 mrg #include "diagnostic-core.h" 38 1.1 mrg #include "insn-attr.h" 39 1.1 mrg #include "fold-const.h" 40 1.1 mrg #include "calls.h" 41 1.1 mrg #include "explow.h" 42 1.1 mrg #include "expr.h" 43 1.1 mrg #include "output.h" 44 1.1 mrg #include "cfgrtl.h" 45 1.1 mrg #include "langhooks.h" 46 1.1 mrg #include "builtins.h" 47 1.1 mrg #include "omp-general.h" 48 1.1 mrg #include "print-rtl.h" 49 1.1 mrg #include "attribs.h" 50 1.1 mrg #include "varasm.h" 51 1.1 mrg #include "intl.h" 52 1.1 mrg #include "rtl-iter.h" 53 1.1 mrg #include "dwarf2.h" 54 1.1 mrg #include "gimple.h" 55 1.1 mrg 56 1.1 mrg /* This file should be included last. */ 57 1.1 mrg #include "target-def.h" 58 1.1 mrg 59 1.1 mrg /* }}} */ 60 1.1 mrg /* {{{ Global variables. */ 61 1.1 mrg 62 1.1 mrg /* Constants used by FP instructions. */ 63 1.1 mrg 64 1.1 mrg static REAL_VALUE_TYPE dconst4, dconst1over2pi; 65 1.1 mrg static bool ext_gcn_constants_init = 0; 66 1.1 mrg 67 1.1 mrg /* Holds the ISA variant, derived from the command line parameters. */ 68 1.1 mrg 69 1.1 mrg int gcn_isa = 3; /* Default to GCN3. */ 70 1.1 mrg 71 1.1 mrg /* Reserve this much space for LDS (for propagating variables from 72 1.1 mrg worker-single mode to worker-partitioned mode), per workgroup. Global 73 1.1 mrg analysis could calculate an exact bound, but we don't do that yet. 74 1.1 mrg 75 1.1 mrg We want to permit full occupancy, so size accordingly. */ 76 1.1 mrg 77 1.1 mrg /* Use this as a default, but allow it to grow if the user requests a large 78 1.1 mrg amount of gang-private shared-memory space. */ 79 1.1 mrg static int acc_lds_size = 0x600; 80 1.1 mrg 81 1.1 mrg #define OMP_LDS_SIZE 0x600 /* 0x600 is 1/40 total, rounded down. */ 82 1.1 mrg #define ACC_LDS_SIZE acc_lds_size 83 1.1 mrg #define OTHER_LDS_SIZE 65536 /* If in doubt, reserve all of it. */ 84 1.1 mrg 85 1.1 mrg #define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \ 86 1.1 mrg : flag_openmp ? OMP_LDS_SIZE \ 87 1.1 mrg : OTHER_LDS_SIZE) 88 1.1 mrg 89 1.1 mrg static int gang_private_hwm = 32; 90 1.1 mrg static hash_map<tree, int> lds_allocs; 91 1.1 mrg 92 1.1 mrg /* The number of registers usable by normal non-kernel functions. 93 1.1 mrg The SGPR count includes any special extra registers such as VCC. */ 94 1.1 mrg 95 1.1 mrg #define MAX_NORMAL_SGPR_COUNT 62 // i.e. 64 with VCC 96 1.1 mrg #define MAX_NORMAL_VGPR_COUNT 24 97 1.1 mrg 98 1.1 mrg /* }}} */ 99 1.1 mrg /* {{{ Initialization and options. */ 100 1.1 mrg 101 1.1 mrg /* Initialize machine_function. */ 102 1.1 mrg 103 1.1 mrg static struct machine_function * 104 1.1 mrg gcn_init_machine_status (void) 105 1.1 mrg { 106 1.1 mrg struct machine_function *f; 107 1.1 mrg 108 1.1 mrg f = ggc_cleared_alloc<machine_function> (); 109 1.1 mrg 110 1.1 mrg if (TARGET_GCN3) 111 1.1 mrg f->use_flat_addressing = true; 112 1.1 mrg 113 1.1 mrg return f; 114 1.1 mrg } 115 1.1 mrg 116 1.1 mrg /* Implement TARGET_OPTION_OVERRIDE. 117 1.1 mrg 118 1.1 mrg Override option settings where defaults are variable, or we have specific 119 1.1 mrg needs to consider. */ 120 1.1 mrg 121 1.1 mrg static void 122 1.1 mrg gcn_option_override (void) 123 1.1 mrg { 124 1.1 mrg init_machine_status = gcn_init_machine_status; 125 1.1 mrg 126 1.1 mrg /* The HSA runtime does not respect ELF load addresses, so force PIE. */ 127 1.1 mrg if (!flag_pie) 128 1.1 mrg flag_pie = 2; 129 1.1 mrg if (!flag_pic) 130 1.1 mrg flag_pic = flag_pie; 131 1.1 mrg 132 1.1 mrg gcn_isa = gcn_arch == PROCESSOR_FIJI ? 3 : 5; 133 1.1 mrg 134 1.1 mrg /* The default stack size needs to be small for offload kernels because 135 1.1 mrg there may be many, many threads. Also, a smaller stack gives a 136 1.1 mrg measureable performance boost. But, a small stack is insufficient 137 1.1 mrg for running the testsuite, so we use a larger default for the stand 138 1.1 mrg alone case. */ 139 1.1 mrg if (stack_size_opt == -1) 140 1.1 mrg { 141 1.1 mrg if (flag_openacc || flag_openmp) 142 1.1 mrg /* 512 bytes per work item = 32kB total. */ 143 1.1 mrg stack_size_opt = 512 * 64; 144 1.1 mrg else 145 1.1 mrg /* 1MB total. */ 146 1.1 mrg stack_size_opt = 1048576; 147 1.1 mrg } 148 1.1 mrg 149 1.1 mrg /* Reserve 1Kb (somewhat arbitrarily) of LDS space for reduction results and 150 1.1 mrg worker broadcasts. */ 151 1.1 mrg if (gang_private_size_opt == -1) 152 1.1 mrg gang_private_size_opt = 512; 153 1.1 mrg else if (gang_private_size_opt < gang_private_hwm) 154 1.1 mrg gang_private_size_opt = gang_private_hwm; 155 1.1 mrg else if (gang_private_size_opt >= acc_lds_size - 1024) 156 1.1 mrg { 157 1.1 mrg /* We need some space for reductions and worker broadcasting. If the 158 1.1 mrg user requests a large amount of gang-private LDS space, we might not 159 1.1 mrg have enough left for the former. Increase the LDS allocation in that 160 1.1 mrg case, although this may reduce the maximum occupancy on the 161 1.1 mrg hardware. */ 162 1.1 mrg acc_lds_size = gang_private_size_opt + 1024; 163 1.1 mrg if (acc_lds_size > 32768) 164 1.1 mrg acc_lds_size = 32768; 165 1.1 mrg } 166 1.1 mrg 167 1.1 mrg /* The xnack option is a placeholder, for now. */ 168 1.1 mrg if (flag_xnack) 169 1.1 mrg sorry ("XNACK support"); 170 1.1 mrg } 171 1.1 mrg 172 1.1 mrg /* }}} */ 173 1.1 mrg /* {{{ Attributes. */ 174 1.1 mrg 175 1.1 mrg /* This table defines the arguments that are permitted in 176 1.1 mrg __attribute__ ((amdgpu_hsa_kernel (...))). 177 1.1 mrg 178 1.1 mrg The names and values correspond to the HSA metadata that is encoded 179 1.1 mrg into the assembler file and binary. */ 180 1.1 mrg 181 1.1 mrg static const struct gcn_kernel_arg_type 182 1.1 mrg { 183 1.1 mrg const char *name; 184 1.1 mrg const char *header_pseudo; 185 1.1 mrg machine_mode mode; 186 1.1 mrg 187 1.1 mrg /* This should be set to -1 or -2 for a dynamically allocated register 188 1.1 mrg number. Use -1 if this argument contributes to the user_sgpr_count, 189 1.1 mrg -2 otherwise. */ 190 1.1 mrg int fixed_regno; 191 1.1 mrg } gcn_kernel_arg_types[] = { 192 1.1 mrg {"exec", NULL, DImode, EXEC_REG}, 193 1.1 mrg #define PRIVATE_SEGMENT_BUFFER_ARG 1 194 1.1 mrg {"private_segment_buffer", 195 1.1 mrg ".amdhsa_user_sgpr_private_segment_buffer", TImode, -1}, 196 1.1 mrg #define DISPATCH_PTR_ARG 2 197 1.1 mrg {"dispatch_ptr", ".amdhsa_user_sgpr_dispatch_ptr", DImode, -1}, 198 1.1 mrg #define QUEUE_PTR_ARG 3 199 1.1 mrg {"queue_ptr", ".amdhsa_user_sgpr_queue_ptr", DImode, -1}, 200 1.1 mrg #define KERNARG_SEGMENT_PTR_ARG 4 201 1.1 mrg {"kernarg_segment_ptr", ".amdhsa_user_sgpr_kernarg_segment_ptr", DImode, -1}, 202 1.1 mrg {"dispatch_id", ".amdhsa_user_sgpr_dispatch_id", DImode, -1}, 203 1.1 mrg #define FLAT_SCRATCH_INIT_ARG 6 204 1.1 mrg {"flat_scratch_init", ".amdhsa_user_sgpr_flat_scratch_init", DImode, -1}, 205 1.1 mrg #define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7 206 1.1 mrg {"private_segment_size", ".amdhsa_user_sgpr_private_segment_size", SImode, -1}, 207 1.1 mrg #define WORKGROUP_ID_X_ARG 8 208 1.1 mrg {"workgroup_id_X", ".amdhsa_system_sgpr_workgroup_id_x", SImode, -2}, 209 1.1 mrg {"workgroup_id_Y", ".amdhsa_system_sgpr_workgroup_id_y", SImode, -2}, 210 1.1 mrg {"workgroup_id_Z", ".amdhsa_system_sgpr_workgroup_id_z", SImode, -2}, 211 1.1 mrg {"workgroup_info", ".amdhsa_system_sgpr_workgroup_info", SImode, -1}, 212 1.1 mrg #define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 12 213 1.1 mrg {"private_segment_wave_offset", 214 1.1 mrg ".amdhsa_system_sgpr_private_segment_wavefront_offset", SImode, -2}, 215 1.1 mrg #define WORK_ITEM_ID_X_ARG 13 216 1.1 mrg {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG}, 217 1.1 mrg #define WORK_ITEM_ID_Y_ARG 14 218 1.1 mrg {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1}, 219 1.1 mrg #define WORK_ITEM_ID_Z_ARG 15 220 1.1 mrg {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2} 221 1.1 mrg }; 222 1.1 mrg 223 1.1 mrg static const long default_requested_args 224 1.1 mrg = (1 << PRIVATE_SEGMENT_BUFFER_ARG) 225 1.1 mrg | (1 << DISPATCH_PTR_ARG) 226 1.1 mrg | (1 << QUEUE_PTR_ARG) 227 1.1 mrg | (1 << KERNARG_SEGMENT_PTR_ARG) 228 1.1 mrg | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG) 229 1.1 mrg | (1 << WORKGROUP_ID_X_ARG) 230 1.1 mrg | (1 << WORK_ITEM_ID_X_ARG) 231 1.1 mrg | (1 << WORK_ITEM_ID_Y_ARG) 232 1.1 mrg | (1 << WORK_ITEM_ID_Z_ARG); 233 1.1 mrg 234 1.1 mrg /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())). 235 1.1 mrg This function also sets the default values for some arguments. 236 1.1 mrg 237 1.1 mrg Return true on success, with ARGS populated. */ 238 1.1 mrg 239 1.1 mrg static bool 240 1.1 mrg gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args, 241 1.1 mrg tree list) 242 1.1 mrg { 243 1.1 mrg bool err = false; 244 1.1 mrg args->requested = default_requested_args; 245 1.1 mrg args->nargs = 0; 246 1.1 mrg 247 1.1 mrg for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++) 248 1.1 mrg args->reg[a] = -1; 249 1.1 mrg 250 1.1 mrg for (; list; list = TREE_CHAIN (list)) 251 1.1 mrg { 252 1.1 mrg const char *str; 253 1.1 mrg if (TREE_CODE (TREE_VALUE (list)) != STRING_CST) 254 1.1 mrg { 255 1.1 mrg error ("%<amdgpu_hsa_kernel%> attribute requires string constant " 256 1.1 mrg "arguments"); 257 1.1 mrg break; 258 1.1 mrg } 259 1.1 mrg str = TREE_STRING_POINTER (TREE_VALUE (list)); 260 1.1 mrg int a; 261 1.1 mrg for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++) 262 1.1 mrg { 263 1.1 mrg if (!strcmp (str, gcn_kernel_arg_types[a].name)) 264 1.1 mrg break; 265 1.1 mrg } 266 1.1 mrg if (a == GCN_KERNEL_ARG_TYPES) 267 1.1 mrg { 268 1.1 mrg error ("unknown specifier %qs in %<amdgpu_hsa_kernel%> attribute", 269 1.1 mrg str); 270 1.1 mrg err = true; 271 1.1 mrg break; 272 1.1 mrg } 273 1.1 mrg if (args->requested & (1 << a)) 274 1.1 mrg { 275 1.1 mrg error ("duplicated parameter specifier %qs in %<amdgpu_hsa_kernel%> " 276 1.1 mrg "attribute", str); 277 1.1 mrg err = true; 278 1.1 mrg break; 279 1.1 mrg } 280 1.1 mrg args->requested |= (1 << a); 281 1.1 mrg args->order[args->nargs++] = a; 282 1.1 mrg } 283 1.1 mrg 284 1.1 mrg /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and 285 1.1 mrg WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies 286 1.1 mrg requesting WORK_ITEM_ID_X_ARG. */ 287 1.1 mrg if (args->requested & (1 << WORK_ITEM_ID_Z_ARG)) 288 1.1 mrg args->requested |= (1 << WORK_ITEM_ID_Y_ARG); 289 1.1 mrg if (args->requested & (1 << WORK_ITEM_ID_Y_ARG)) 290 1.1 mrg args->requested |= (1 << WORK_ITEM_ID_X_ARG); 291 1.1 mrg 292 1.1 mrg int sgpr_regno = FIRST_SGPR_REG; 293 1.1 mrg args->nsgprs = 0; 294 1.1 mrg for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++) 295 1.1 mrg { 296 1.1 mrg if (!(args->requested & (1 << a))) 297 1.1 mrg continue; 298 1.1 mrg 299 1.1 mrg if (gcn_kernel_arg_types[a].fixed_regno >= 0) 300 1.1 mrg args->reg[a] = gcn_kernel_arg_types[a].fixed_regno; 301 1.1 mrg else 302 1.1 mrg { 303 1.1 mrg int reg_count; 304 1.1 mrg 305 1.1 mrg switch (gcn_kernel_arg_types[a].mode) 306 1.1 mrg { 307 1.1 mrg case E_SImode: 308 1.1 mrg reg_count = 1; 309 1.1 mrg break; 310 1.1 mrg case E_DImode: 311 1.1 mrg reg_count = 2; 312 1.1 mrg break; 313 1.1 mrg case E_TImode: 314 1.1 mrg reg_count = 4; 315 1.1 mrg break; 316 1.1 mrg default: 317 1.1 mrg gcc_unreachable (); 318 1.1 mrg } 319 1.1 mrg args->reg[a] = sgpr_regno; 320 1.1 mrg sgpr_regno += reg_count; 321 1.1 mrg if (gcn_kernel_arg_types[a].fixed_regno == -1) 322 1.1 mrg args->nsgprs += reg_count; 323 1.1 mrg } 324 1.1 mrg } 325 1.1 mrg if (sgpr_regno > FIRST_SGPR_REG + 16) 326 1.1 mrg { 327 1.1 mrg error ("too many arguments passed in sgpr registers"); 328 1.1 mrg } 329 1.1 mrg return err; 330 1.1 mrg } 331 1.1 mrg 332 1.1 mrg /* Referenced by TARGET_ATTRIBUTE_TABLE. 333 1.1 mrg 334 1.1 mrg Validates target specific attributes. */ 335 1.1 mrg 336 1.1 mrg static tree 337 1.1 mrg gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name, 338 1.1 mrg tree args, int, bool *no_add_attrs) 339 1.1 mrg { 340 1.1 mrg if (!FUNC_OR_METHOD_TYPE_P (*node)) 341 1.1 mrg { 342 1.1 mrg warning (OPT_Wattributes, "%qE attribute only applies to functions", 343 1.1 mrg name); 344 1.1 mrg *no_add_attrs = true; 345 1.1 mrg return NULL_TREE; 346 1.1 mrg } 347 1.1 mrg 348 1.1 mrg /* Can combine regparm with all attributes but fastcall, and thiscall. */ 349 1.1 mrg if (is_attribute_p ("gcnhsa_kernel", name)) 350 1.1 mrg { 351 1.1 mrg struct gcn_kernel_args kernelarg; 352 1.1 mrg 353 1.1 mrg if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args)) 354 1.1 mrg *no_add_attrs = true; 355 1.1 mrg 356 1.1 mrg return NULL_TREE; 357 1.1 mrg } 358 1.1 mrg 359 1.1 mrg return NULL_TREE; 360 1.1 mrg } 361 1.1 mrg 362 1.1 mrg /* Implement TARGET_ATTRIBUTE_TABLE. 363 1.1 mrg 364 1.1 mrg Create target-specific __attribute__ types. */ 365 1.1 mrg 366 1.1 mrg static const struct attribute_spec gcn_attribute_table[] = { 367 1.1 mrg /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 368 1.1 mrg affects_type_identity } */ 369 1.1 mrg {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true, 370 1.1 mrg true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL}, 371 1.1 mrg /* End element. */ 372 1.1 mrg {NULL, 0, 0, false, false, false, false, NULL, NULL} 373 1.1 mrg }; 374 1.1 mrg 375 1.1 mrg /* }}} */ 376 1.1 mrg /* {{{ Registers and modes. */ 377 1.1 mrg 378 1.1 mrg /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */ 379 1.1 mrg 380 1.1 mrg bool 381 1.1 mrg gcn_scalar_mode_supported_p (scalar_mode mode) 382 1.1 mrg { 383 1.1 mrg return (mode == BImode 384 1.1 mrg || mode == QImode 385 1.1 mrg || mode == HImode /* || mode == HFmode */ 386 1.1 mrg || mode == SImode || mode == SFmode 387 1.1 mrg || mode == DImode || mode == DFmode 388 1.1 mrg || mode == TImode); 389 1.1 mrg } 390 1.1 mrg 391 1.1 mrg /* Implement TARGET_CLASS_MAX_NREGS. 392 1.1 mrg 393 1.1 mrg Return the number of hard registers needed to hold a value of MODE in 394 1.1 mrg a register of class RCLASS. */ 395 1.1 mrg 396 1.1 mrg static unsigned char 397 1.1 mrg gcn_class_max_nregs (reg_class_t rclass, machine_mode mode) 398 1.1 mrg { 399 1.1 mrg /* Scalar registers are 32bit, vector registers are in fact tuples of 400 1.1 mrg 64 lanes. */ 401 1.1 mrg if (rclass == VGPR_REGS) 402 1.1 mrg { 403 1.1 mrg if (vgpr_1reg_mode_p (mode)) 404 1.1 mrg return 1; 405 1.1 mrg if (vgpr_2reg_mode_p (mode)) 406 1.1 mrg return 2; 407 1.1 mrg /* TImode is used by DImode compare_and_swap. */ 408 1.1 mrg if (mode == TImode) 409 1.1 mrg return 4; 410 1.1 mrg } 411 1.1 mrg else if (rclass == VCC_CONDITIONAL_REG && mode == BImode) 412 1.1 mrg return 2; 413 1.1 mrg return CEIL (GET_MODE_SIZE (mode), 4); 414 1.1 mrg } 415 1.1 mrg 416 1.1 mrg /* Implement TARGET_HARD_REGNO_NREGS. 417 1.1 mrg 418 1.1 mrg Return the number of hard registers needed to hold a value of MODE in 419 1.1 mrg REGNO. */ 420 1.1 mrg 421 1.1 mrg unsigned int 422 1.1 mrg gcn_hard_regno_nregs (unsigned int regno, machine_mode mode) 423 1.1 mrg { 424 1.1 mrg return gcn_class_max_nregs (REGNO_REG_CLASS (regno), mode); 425 1.1 mrg } 426 1.1 mrg 427 1.1 mrg /* Implement TARGET_HARD_REGNO_MODE_OK. 428 1.1 mrg 429 1.1 mrg Return true if REGNO can hold value in MODE. */ 430 1.1 mrg 431 1.1 mrg bool 432 1.1 mrg gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 433 1.1 mrg { 434 1.1 mrg /* Treat a complex mode as if it were a scalar mode of the same overall 435 1.1 mrg size for the purposes of allocating hard registers. */ 436 1.1 mrg if (COMPLEX_MODE_P (mode)) 437 1.1 mrg switch (mode) 438 1.1 mrg { 439 1.1 mrg case E_CQImode: 440 1.1 mrg case E_CHImode: 441 1.1 mrg mode = SImode; 442 1.1 mrg break; 443 1.1 mrg case E_CSImode: 444 1.1 mrg mode = DImode; 445 1.1 mrg break; 446 1.1 mrg case E_CDImode: 447 1.1 mrg mode = TImode; 448 1.1 mrg break; 449 1.1 mrg case E_HCmode: 450 1.1 mrg mode = SFmode; 451 1.1 mrg break; 452 1.1 mrg case E_SCmode: 453 1.1 mrg mode = DFmode; 454 1.1 mrg break; 455 1.1 mrg default: 456 1.1 mrg /* Not supported. */ 457 1.1 mrg return false; 458 1.1 mrg } 459 1.1 mrg 460 1.1 mrg switch (regno) 461 1.1 mrg { 462 1.1 mrg case FLAT_SCRATCH_LO_REG: 463 1.1 mrg case XNACK_MASK_LO_REG: 464 1.1 mrg case TBA_LO_REG: 465 1.1 mrg case TMA_LO_REG: 466 1.1 mrg return (mode == SImode || mode == DImode); 467 1.1 mrg case VCC_LO_REG: 468 1.1 mrg case EXEC_LO_REG: 469 1.1 mrg return (mode == BImode || mode == SImode || mode == DImode); 470 1.1 mrg case M0_REG: 471 1.1 mrg case FLAT_SCRATCH_HI_REG: 472 1.1 mrg case XNACK_MASK_HI_REG: 473 1.1 mrg case TBA_HI_REG: 474 1.1 mrg case TMA_HI_REG: 475 1.1 mrg return mode == SImode; 476 1.1 mrg case VCC_HI_REG: 477 1.1 mrg return false; 478 1.1 mrg case EXEC_HI_REG: 479 1.1 mrg return mode == SImode /*|| mode == V32BImode */ ; 480 1.1 mrg case SCC_REG: 481 1.1 mrg case VCCZ_REG: 482 1.1 mrg case EXECZ_REG: 483 1.1 mrg return mode == BImode; 484 1.1 mrg } 485 1.1 mrg if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM) 486 1.1 mrg return true; 487 1.1 mrg if (SGPR_REGNO_P (regno)) 488 1.1 mrg /* We restrict double register values to aligned registers. */ 489 1.1 mrg return (sgpr_1reg_mode_p (mode) 490 1.1 mrg || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode)) 491 1.1 mrg || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode)); 492 1.1 mrg if (VGPR_REGNO_P (regno)) 493 1.1 mrg /* Vector instructions do not care about the alignment of register 494 1.1 mrg pairs, but where there is no 64-bit instruction, many of the 495 1.1 mrg define_split do not work if the input and output registers partially 496 1.1 mrg overlap. We tried to fix this with early clobber and match 497 1.1 mrg constraints, but it was bug prone, added complexity, and conflicts 498 1.1 mrg with the 'U0' constraints on vec_merge. 499 1.1 mrg Therefore, we restrict ourselved to aligned registers. */ 500 1.1 mrg return (vgpr_1reg_mode_p (mode) 501 1.1 mrg || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode)) 502 1.1 mrg /* TImode is used by DImode compare_and_swap. */ 503 1.1 mrg || (mode == TImode 504 1.1 mrg && !((regno - FIRST_VGPR_REG) & 3))); 505 1.1 mrg return false; 506 1.1 mrg } 507 1.1 mrg 508 1.1 mrg /* Implement REGNO_REG_CLASS via gcn.h. 509 1.1 mrg 510 1.1 mrg Return smallest class containing REGNO. */ 511 1.1 mrg 512 1.1 mrg enum reg_class 513 1.1 mrg gcn_regno_reg_class (int regno) 514 1.1 mrg { 515 1.1 mrg switch (regno) 516 1.1 mrg { 517 1.1 mrg case SCC_REG: 518 1.1 mrg return SCC_CONDITIONAL_REG; 519 1.1 mrg case VCC_LO_REG: 520 1.1 mrg case VCC_HI_REG: 521 1.1 mrg return VCC_CONDITIONAL_REG; 522 1.1 mrg case VCCZ_REG: 523 1.1 mrg return VCCZ_CONDITIONAL_REG; 524 1.1 mrg case EXECZ_REG: 525 1.1 mrg return EXECZ_CONDITIONAL_REG; 526 1.1 mrg case EXEC_LO_REG: 527 1.1 mrg case EXEC_HI_REG: 528 1.1 mrg return EXEC_MASK_REG; 529 1.1 mrg } 530 1.1 mrg if (VGPR_REGNO_P (regno)) 531 1.1 mrg return VGPR_REGS; 532 1.1 mrg if (SGPR_REGNO_P (regno)) 533 1.1 mrg return SGPR_REGS; 534 1.1 mrg if (regno < FIRST_VGPR_REG) 535 1.1 mrg return GENERAL_REGS; 536 1.1 mrg if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM) 537 1.1 mrg return AFP_REGS; 538 1.1 mrg return ALL_REGS; 539 1.1 mrg } 540 1.1 mrg 541 1.1 mrg /* Implement TARGET_CAN_CHANGE_MODE_CLASS. 542 1.1 mrg 543 1.1 mrg GCC assumes that lowpart contains first part of value as stored in memory. 544 1.1 mrg This is not the case for vector registers. */ 545 1.1 mrg 546 1.1 mrg bool 547 1.1 mrg gcn_can_change_mode_class (machine_mode from, machine_mode to, 548 1.1 mrg reg_class_t regclass) 549 1.1 mrg { 550 1.1 mrg if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to)) 551 1.1 mrg return true; 552 1.1 mrg return (gcn_class_max_nregs (regclass, from) 553 1.1 mrg == gcn_class_max_nregs (regclass, to)); 554 1.1 mrg } 555 1.1 mrg 556 1.1 mrg /* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P. 557 1.1 mrg 558 1.1 mrg When this hook returns true for MODE, the compiler allows 559 1.1 mrg registers explicitly used in the rtl to be used as spill registers 560 1.1 mrg but prevents the compiler from extending the lifetime of these 561 1.1 mrg registers. */ 562 1.1 mrg 563 1.1 mrg bool 564 1.1 mrg gcn_small_register_classes_for_mode_p (machine_mode mode) 565 1.1 mrg { 566 1.1 mrg /* We allocate into exec and vcc regs. Those make small register class. */ 567 1.1 mrg return mode == DImode || mode == SImode; 568 1.1 mrg } 569 1.1 mrg 570 1.1 mrg /* Implement TARGET_CLASS_LIKELY_SPILLED_P. 571 1.1 mrg 572 1.1 mrg Returns true if pseudos that have been assigned to registers of class RCLASS 573 1.1 mrg would likely be spilled because registers of RCLASS are needed for spill 574 1.1 mrg registers. */ 575 1.1 mrg 576 1.1 mrg static bool 577 1.1 mrg gcn_class_likely_spilled_p (reg_class_t rclass) 578 1.1 mrg { 579 1.1 mrg return (rclass == EXEC_MASK_REG 580 1.1 mrg || reg_classes_intersect_p (ALL_CONDITIONAL_REGS, rclass)); 581 1.1 mrg } 582 1.1 mrg 583 1.1 mrg /* Implement TARGET_MODES_TIEABLE_P. 584 1.1 mrg 585 1.1 mrg Returns true if a value of MODE1 is accessible in MODE2 without 586 1.1 mrg copying. */ 587 1.1 mrg 588 1.1 mrg bool 589 1.1 mrg gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2) 590 1.1 mrg { 591 1.1 mrg return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE 592 1.1 mrg && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE); 593 1.1 mrg } 594 1.1 mrg 595 1.1 mrg /* Implement TARGET_TRULY_NOOP_TRUNCATION. 596 1.1 mrg 597 1.1 mrg Returns true if it is safe to convert a value of INPREC bits to one of 598 1.1 mrg OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on 599 1.1 mrg it as if it had only OUTPREC bits. */ 600 1.1 mrg 601 1.1 mrg bool 602 1.1 mrg gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) 603 1.1 mrg { 604 1.1 mrg return ((inprec <= 32) && (outprec <= inprec)); 605 1.1 mrg } 606 1.1 mrg 607 1.1 mrg /* Return N-th part of value occupying multiple registers. */ 608 1.1 mrg 609 1.1 mrg rtx 610 1.1 mrg gcn_operand_part (machine_mode mode, rtx op, int n) 611 1.1 mrg { 612 1.1 mrg if (GET_MODE_SIZE (mode) >= 256) 613 1.1 mrg { 614 1.1 mrg /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */ 615 1.1 mrg 616 1.1 mrg if (REG_P (op)) 617 1.1 mrg { 618 1.1 mrg gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER); 619 1.1 mrg return gen_rtx_REG (V64SImode, REGNO (op) + n); 620 1.1 mrg } 621 1.1 mrg if (GET_CODE (op) == CONST_VECTOR) 622 1.1 mrg { 623 1.1 mrg int units = GET_MODE_NUNITS (mode); 624 1.1 mrg rtvec v = rtvec_alloc (units); 625 1.1 mrg 626 1.1 mrg for (int i = 0; i < units; ++i) 627 1.1 mrg RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode), 628 1.1 mrg CONST_VECTOR_ELT (op, i), n); 629 1.1 mrg 630 1.1 mrg return gen_rtx_CONST_VECTOR (V64SImode, v); 631 1.1 mrg } 632 1.1 mrg if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR) 633 1.1 mrg return gcn_gen_undef (V64SImode); 634 1.1 mrg gcc_unreachable (); 635 1.1 mrg } 636 1.1 mrg else if (GET_MODE_SIZE (mode) == 8 && REG_P (op)) 637 1.1 mrg { 638 1.1 mrg gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER); 639 1.1 mrg return gen_rtx_REG (SImode, REGNO (op) + n); 640 1.1 mrg } 641 1.1 mrg else 642 1.1 mrg { 643 1.1 mrg if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR) 644 1.1 mrg return gcn_gen_undef (SImode); 645 1.1 mrg 646 1.1 mrg /* If it's a constant then let's assume it is of the largest mode 647 1.1 mrg available, otherwise simplify_gen_subreg will fail. */ 648 1.1 mrg if (mode == VOIDmode && CONST_INT_P (op)) 649 1.1 mrg mode = DImode; 650 1.1 mrg return simplify_gen_subreg (SImode, op, mode, n * 4); 651 1.1 mrg } 652 1.1 mrg } 653 1.1 mrg 654 1.1 mrg /* Return N-th part of value occupying multiple registers. */ 655 1.1 mrg 656 1.1 mrg rtx 657 1.1 mrg gcn_operand_doublepart (machine_mode mode, rtx op, int n) 658 1.1 mrg { 659 1.1 mrg return simplify_gen_subreg (DImode, op, mode, n * 8); 660 1.1 mrg } 661 1.1 mrg 662 1.1 mrg /* Return true if OP can be split into subregs or high/low parts. 663 1.1 mrg This is always true for scalars, but not normally true for vectors. 664 1.1 mrg However, for vectors in hardregs we can use the low and high registers. */ 665 1.1 mrg 666 1.1 mrg bool 667 1.1 mrg gcn_can_split_p (machine_mode, rtx op) 668 1.1 mrg { 669 1.1 mrg if (vgpr_vector_mode_p (GET_MODE (op))) 670 1.1 mrg { 671 1.1 mrg if (GET_CODE (op) == SUBREG) 672 1.1 mrg op = SUBREG_REG (op); 673 1.1 mrg if (!REG_P (op)) 674 1.1 mrg return true; 675 1.1 mrg return REGNO (op) <= FIRST_PSEUDO_REGISTER; 676 1.1 mrg } 677 1.1 mrg return true; 678 1.1 mrg } 679 1.1 mrg 680 1.1 mrg /* Implement TARGET_SPILL_CLASS. 681 1.1 mrg 682 1.1 mrg Return class of registers which could be used for pseudo of MODE 683 1.1 mrg and of class RCLASS for spilling instead of memory. Return NO_REGS 684 1.1 mrg if it is not possible or non-profitable. */ 685 1.1 mrg 686 1.1 mrg static reg_class_t 687 1.1 mrg gcn_spill_class (reg_class_t c, machine_mode /*mode */ ) 688 1.1 mrg { 689 1.1 mrg if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c) 690 1.1 mrg || c == VCC_CONDITIONAL_REG) 691 1.1 mrg return SGPR_REGS; 692 1.1 mrg else 693 1.1 mrg return NO_REGS; 694 1.1 mrg } 695 1.1 mrg 696 1.1 mrg /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. 697 1.1 mrg 698 1.1 mrg Change allocno class for given pseudo from allocno and best class 699 1.1 mrg calculated by IRA. */ 700 1.1 mrg 701 1.1 mrg static reg_class_t 702 1.1 mrg gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl, 703 1.1 mrg reg_class_t best_cl) 704 1.1 mrg { 705 1.1 mrg /* Avoid returning classes that contain both vgpr and sgpr registers. */ 706 1.1 mrg if (cl != ALL_REGS && cl != SRCDST_REGS && cl != ALL_GPR_REGS) 707 1.1 mrg return cl; 708 1.1 mrg if (best_cl != ALL_REGS && best_cl != SRCDST_REGS 709 1.1 mrg && best_cl != ALL_GPR_REGS) 710 1.1 mrg return best_cl; 711 1.1 mrg 712 1.1 mrg machine_mode mode = PSEUDO_REGNO_MODE (regno); 713 1.1 mrg if (vgpr_vector_mode_p (mode)) 714 1.1 mrg return VGPR_REGS; 715 1.1 mrg 716 1.1 mrg return GENERAL_REGS; 717 1.1 mrg } 718 1.1 mrg 719 1.1 mrg /* Create a new DImode pseudo reg and emit an instruction to initialize 720 1.1 mrg it to VAL. */ 721 1.1 mrg 722 1.1 mrg static rtx 723 1.1 mrg get_exec (int64_t val) 724 1.1 mrg { 725 1.1 mrg rtx reg = gen_reg_rtx (DImode); 726 1.1 mrg emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode))); 727 1.1 mrg return reg; 728 1.1 mrg } 729 1.1 mrg 730 1.1 mrg /* Return value of scalar exec register. */ 731 1.1 mrg 732 1.1 mrg rtx 733 1.1 mrg gcn_scalar_exec () 734 1.1 mrg { 735 1.1 mrg return const1_rtx; 736 1.1 mrg } 737 1.1 mrg 738 1.1 mrg /* Return pseudo holding scalar exec register. */ 739 1.1 mrg 740 1.1 mrg rtx 741 1.1 mrg gcn_scalar_exec_reg () 742 1.1 mrg { 743 1.1 mrg return get_exec (1); 744 1.1 mrg } 745 1.1 mrg 746 1.1 mrg /* Return value of full exec register. */ 747 1.1 mrg 748 1.1 mrg rtx 749 1.1 mrg gcn_full_exec () 750 1.1 mrg { 751 1.1 mrg return constm1_rtx; 752 1.1 mrg } 753 1.1 mrg 754 1.1 mrg /* Return pseudo holding full exec register. */ 755 1.1 mrg 756 1.1 mrg rtx 757 1.1 mrg gcn_full_exec_reg () 758 1.1 mrg { 759 1.1 mrg return get_exec (-1); 760 1.1 mrg } 761 1.1 mrg 762 1.1 mrg /* }}} */ 763 1.1 mrg /* {{{ Immediate constants. */ 764 1.1 mrg 765 1.1 mrg /* Initialize shared numeric constants. */ 766 1.1 mrg 767 1.1 mrg static void 768 1.1 mrg init_ext_gcn_constants (void) 769 1.1 mrg { 770 1.1 mrg real_from_integer (&dconst4, DFmode, 4, SIGNED); 771 1.1 mrg 772 1.1 mrg /* FIXME: this constant probably does not match what hardware really loads. 773 1.1 mrg Reality check it eventually. */ 774 1.1 mrg real_from_string (&dconst1over2pi, 775 1.1 mrg "0.1591549430918953357663423455968866839"); 776 1.1 mrg real_convert (&dconst1over2pi, SFmode, &dconst1over2pi); 777 1.1 mrg 778 1.1 mrg ext_gcn_constants_init = 1; 779 1.1 mrg } 780 1.1 mrg 781 1.1 mrg /* Return non-zero if X is a constant that can appear as an inline operand. 782 1.1 mrg This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi) 783 1.1 mrg Or a vector of those. 784 1.1 mrg The value returned should be the encoding of this constant. */ 785 1.1 mrg 786 1.1 mrg int 787 1.1 mrg gcn_inline_fp_constant_p (rtx x, bool allow_vector) 788 1.1 mrg { 789 1.1 mrg machine_mode mode = GET_MODE (x); 790 1.1 mrg 791 1.1 mrg if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) 792 1.1 mrg && allow_vector) 793 1.1 mrg { 794 1.1 mrg int n; 795 1.1 mrg if (GET_CODE (x) != CONST_VECTOR) 796 1.1 mrg return 0; 797 1.1 mrg n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); 798 1.1 mrg if (!n) 799 1.1 mrg return 0; 800 1.1 mrg for (int i = 1; i < 64; i++) 801 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) 802 1.1 mrg return 0; 803 1.1 mrg return 1; 804 1.1 mrg } 805 1.1 mrg 806 1.1 mrg if (mode != HFmode && mode != SFmode && mode != DFmode) 807 1.1 mrg return 0; 808 1.1 mrg 809 1.1 mrg const REAL_VALUE_TYPE *r; 810 1.1 mrg 811 1.1 mrg if (x == CONST0_RTX (mode)) 812 1.1 mrg return 128; 813 1.1 mrg if (x == CONST1_RTX (mode)) 814 1.1 mrg return 242; 815 1.1 mrg 816 1.1 mrg r = CONST_DOUBLE_REAL_VALUE (x); 817 1.1 mrg 818 1.1 mrg if (real_identical (r, &dconstm1)) 819 1.1 mrg return 243; 820 1.1 mrg 821 1.1 mrg if (real_identical (r, &dconsthalf)) 822 1.1 mrg return 240; 823 1.1 mrg if (real_identical (r, &dconstm1)) 824 1.1 mrg return 243; 825 1.1 mrg if (real_identical (r, &dconst2)) 826 1.1 mrg return 244; 827 1.1 mrg if (real_identical (r, &dconst4)) 828 1.1 mrg return 246; 829 1.1 mrg if (real_identical (r, &dconst1over2pi)) 830 1.1 mrg return 248; 831 1.1 mrg if (!ext_gcn_constants_init) 832 1.1 mrg init_ext_gcn_constants (); 833 1.1 mrg real_value_negate (r); 834 1.1 mrg if (real_identical (r, &dconsthalf)) 835 1.1 mrg return 241; 836 1.1 mrg if (real_identical (r, &dconst2)) 837 1.1 mrg return 245; 838 1.1 mrg if (real_identical (r, &dconst4)) 839 1.1 mrg return 247; 840 1.1 mrg 841 1.1 mrg /* FIXME: add 4, -4 and 1/(2*PI). */ 842 1.1 mrg 843 1.1 mrg return 0; 844 1.1 mrg } 845 1.1 mrg 846 1.1 mrg /* Return non-zero if X is a constant that can appear as an immediate operand. 847 1.1 mrg This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi) 848 1.1 mrg Or a vector of those. 849 1.1 mrg The value returned should be the encoding of this constant. */ 850 1.1 mrg 851 1.1 mrg bool 852 1.1 mrg gcn_fp_constant_p (rtx x, bool allow_vector) 853 1.1 mrg { 854 1.1 mrg machine_mode mode = GET_MODE (x); 855 1.1 mrg 856 1.1 mrg if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) 857 1.1 mrg && allow_vector) 858 1.1 mrg { 859 1.1 mrg int n; 860 1.1 mrg if (GET_CODE (x) != CONST_VECTOR) 861 1.1 mrg return false; 862 1.1 mrg n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); 863 1.1 mrg if (!n) 864 1.1 mrg return false; 865 1.1 mrg for (int i = 1; i < 64; i++) 866 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) 867 1.1 mrg return false; 868 1.1 mrg return true; 869 1.1 mrg } 870 1.1 mrg if (mode != HFmode && mode != SFmode && mode != DFmode) 871 1.1 mrg return false; 872 1.1 mrg 873 1.1 mrg if (gcn_inline_fp_constant_p (x, false)) 874 1.1 mrg return true; 875 1.1 mrg /* FIXME: It is not clear how 32bit immediates are interpreted here. */ 876 1.1 mrg return (mode != DFmode); 877 1.1 mrg } 878 1.1 mrg 879 1.1 mrg /* Return true if X is a constant representable as an inline immediate 880 1.1 mrg constant in a 32-bit instruction encoding. */ 881 1.1 mrg 882 1.1 mrg bool 883 1.1 mrg gcn_inline_constant_p (rtx x) 884 1.1 mrg { 885 1.1 mrg if (GET_CODE (x) == CONST_INT) 886 1.1 mrg return INTVAL (x) >= -16 && INTVAL (x) <= 64; 887 1.1 mrg if (GET_CODE (x) == CONST_DOUBLE) 888 1.1 mrg return gcn_inline_fp_constant_p (x, false); 889 1.1 mrg if (GET_CODE (x) == CONST_VECTOR) 890 1.1 mrg { 891 1.1 mrg int n; 892 1.1 mrg if (!vgpr_vector_mode_p (GET_MODE (x))) 893 1.1 mrg return false; 894 1.1 mrg n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0)); 895 1.1 mrg if (!n) 896 1.1 mrg return false; 897 1.1 mrg for (int i = 1; i < 64; i++) 898 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) 899 1.1 mrg return false; 900 1.1 mrg return 1; 901 1.1 mrg } 902 1.1 mrg return false; 903 1.1 mrg } 904 1.1 mrg 905 1.1 mrg /* Return true if X is a constant representable as an immediate constant 906 1.1 mrg in a 32 or 64-bit instruction encoding. */ 907 1.1 mrg 908 1.1 mrg bool 909 1.1 mrg gcn_constant_p (rtx x) 910 1.1 mrg { 911 1.1 mrg switch (GET_CODE (x)) 912 1.1 mrg { 913 1.1 mrg case CONST_INT: 914 1.1 mrg return true; 915 1.1 mrg 916 1.1 mrg case CONST_DOUBLE: 917 1.1 mrg return gcn_fp_constant_p (x, false); 918 1.1 mrg 919 1.1 mrg case CONST_VECTOR: 920 1.1 mrg { 921 1.1 mrg int n; 922 1.1 mrg if (!vgpr_vector_mode_p (GET_MODE (x))) 923 1.1 mrg return false; 924 1.1 mrg n = gcn_constant_p (CONST_VECTOR_ELT (x, 0)); 925 1.1 mrg if (!n) 926 1.1 mrg return false; 927 1.1 mrg for (int i = 1; i < 64; i++) 928 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) 929 1.1 mrg return false; 930 1.1 mrg return true; 931 1.1 mrg } 932 1.1 mrg 933 1.1 mrg case SYMBOL_REF: 934 1.1 mrg case LABEL_REF: 935 1.1 mrg return true; 936 1.1 mrg 937 1.1 mrg default: 938 1.1 mrg ; 939 1.1 mrg } 940 1.1 mrg 941 1.1 mrg return false; 942 1.1 mrg } 943 1.1 mrg 944 1.1 mrg /* Return true if X is a constant representable as two inline immediate 945 1.1 mrg constants in a 64-bit instruction that is split into two 32-bit 946 1.1 mrg instructions. 947 1.1 mrg When MIXED is set, the low-part is permitted to use the full 32-bits. */ 948 1.1 mrg 949 1.1 mrg bool 950 1.1 mrg gcn_inline_constant64_p (rtx x, bool mixed) 951 1.1 mrg { 952 1.1 mrg if (GET_CODE (x) == CONST_VECTOR) 953 1.1 mrg { 954 1.1 mrg if (!vgpr_vector_mode_p (GET_MODE (x))) 955 1.1 mrg return false; 956 1.1 mrg if (!gcn_inline_constant64_p (CONST_VECTOR_ELT (x, 0), mixed)) 957 1.1 mrg return false; 958 1.1 mrg for (int i = 1; i < 64; i++) 959 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) 960 1.1 mrg return false; 961 1.1 mrg 962 1.1 mrg return true; 963 1.1 mrg } 964 1.1 mrg 965 1.1 mrg if (GET_CODE (x) != CONST_INT) 966 1.1 mrg return false; 967 1.1 mrg 968 1.1 mrg rtx val_lo = gcn_operand_part (DImode, x, 0); 969 1.1 mrg rtx val_hi = gcn_operand_part (DImode, x, 1); 970 1.1 mrg return ((mixed || gcn_inline_constant_p (val_lo)) 971 1.1 mrg && gcn_inline_constant_p (val_hi)); 972 1.1 mrg } 973 1.1 mrg 974 1.1 mrg /* Return true if X is a constant representable as an immediate constant 975 1.1 mrg in a 32 or 64-bit instruction encoding where the hardware will 976 1.1 mrg extend the immediate to 64-bits. */ 977 1.1 mrg 978 1.1 mrg bool 979 1.1 mrg gcn_constant64_p (rtx x) 980 1.1 mrg { 981 1.1 mrg if (!gcn_constant_p (x)) 982 1.1 mrg return false; 983 1.1 mrg 984 1.1 mrg if (GET_CODE (x) != CONST_INT) 985 1.1 mrg return true; 986 1.1 mrg 987 1.1 mrg /* Negative numbers are only allowed if they can be encoded within src0, 988 1.1 mrg because the 32-bit immediates do not get sign-extended. 989 1.1 mrg Unsigned numbers must not be encodable as 32-bit -1..-16, because the 990 1.1 mrg assembler will use a src0 inline immediate and that will get 991 1.1 mrg sign-extended. */ 992 1.1 mrg HOST_WIDE_INT val = INTVAL (x); 993 1.1 mrg return (((val & 0xffffffff) == val /* Positive 32-bit. */ 994 1.1 mrg && (val & 0xfffffff0) != 0xfffffff0) /* Not -1..-16. */ 995 1.1 mrg || gcn_inline_constant_p (x)); /* Src0. */ 996 1.1 mrg } 997 1.1 mrg 998 1.1 mrg /* Implement TARGET_LEGITIMATE_CONSTANT_P. 999 1.1 mrg 1000 1.1 mrg Returns true if X is a legitimate constant for a MODE immediate operand. */ 1001 1.1 mrg 1002 1.1 mrg bool 1003 1.1 mrg gcn_legitimate_constant_p (machine_mode, rtx x) 1004 1.1 mrg { 1005 1.1 mrg return gcn_constant_p (x); 1006 1.1 mrg } 1007 1.1 mrg 1008 1.1 mrg /* Return true if X is a CONST_VECTOR of single constant. */ 1009 1.1 mrg 1010 1.1 mrg static bool 1011 1.1 mrg single_cst_vector_p (rtx x) 1012 1.1 mrg { 1013 1.1 mrg if (GET_CODE (x) != CONST_VECTOR) 1014 1.1 mrg return false; 1015 1.1 mrg for (int i = 1; i < 64; i++) 1016 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) 1017 1.1 mrg return false; 1018 1.1 mrg return true; 1019 1.1 mrg } 1020 1.1 mrg 1021 1.1 mrg /* Create a CONST_VECTOR of duplicated value A. */ 1022 1.1 mrg 1023 1.1 mrg rtx 1024 1.1 mrg gcn_vec_constant (machine_mode mode, int a) 1025 1.1 mrg { 1026 1.1 mrg /*if (!a) 1027 1.1 mrg return CONST0_RTX (mode); 1028 1.1 mrg if (a == -1) 1029 1.1 mrg return CONSTM1_RTX (mode); 1030 1.1 mrg if (a == 1) 1031 1.1 mrg return CONST1_RTX (mode); 1032 1.1 mrg if (a == 2) 1033 1.1 mrg return CONST2_RTX (mode);*/ 1034 1.1 mrg 1035 1.1 mrg int units = GET_MODE_NUNITS (mode); 1036 1.1 mrg machine_mode innermode = GET_MODE_INNER (mode); 1037 1.1 mrg 1038 1.1 mrg rtx tem; 1039 1.1 mrg if (FLOAT_MODE_P (innermode)) 1040 1.1 mrg { 1041 1.1 mrg REAL_VALUE_TYPE rv; 1042 1.1 mrg real_from_integer (&rv, NULL, a, SIGNED); 1043 1.1 mrg tem = const_double_from_real_value (rv, innermode); 1044 1.1 mrg } 1045 1.1 mrg else 1046 1.1 mrg tem = gen_int_mode (a, innermode); 1047 1.1 mrg 1048 1.1 mrg rtvec v = rtvec_alloc (units); 1049 1.1 mrg for (int i = 0; i < units; ++i) 1050 1.1 mrg RTVEC_ELT (v, i) = tem; 1051 1.1 mrg 1052 1.1 mrg return gen_rtx_CONST_VECTOR (mode, v); 1053 1.1 mrg } 1054 1.1 mrg 1055 1.1 mrg /* Create a CONST_VECTOR of duplicated value A. */ 1056 1.1 mrg 1057 1.1 mrg rtx 1058 1.1 mrg gcn_vec_constant (machine_mode mode, rtx a) 1059 1.1 mrg { 1060 1.1 mrg int units = GET_MODE_NUNITS (mode); 1061 1.1 mrg rtvec v = rtvec_alloc (units); 1062 1.1 mrg 1063 1.1 mrg for (int i = 0; i < units; ++i) 1064 1.1 mrg RTVEC_ELT (v, i) = a; 1065 1.1 mrg 1066 1.1 mrg return gen_rtx_CONST_VECTOR (mode, v); 1067 1.1 mrg } 1068 1.1 mrg 1069 1.1 mrg /* Create an undefined vector value, used where an insn operand is 1070 1.1 mrg optional. */ 1071 1.1 mrg 1072 1.1 mrg rtx 1073 1.1 mrg gcn_gen_undef (machine_mode mode) 1074 1.1 mrg { 1075 1.1 mrg return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR); 1076 1.1 mrg } 1077 1.1 mrg 1078 1.1 mrg /* }}} */ 1079 1.1 mrg /* {{{ Addresses, pointers and moves. */ 1080 1.1 mrg 1081 1.1 mrg /* Return true is REG is a valid place to store a pointer, 1082 1.1 mrg for instructions that require an SGPR. 1083 1.1 mrg FIXME rename. */ 1084 1.1 mrg 1085 1.1 mrg static bool 1086 1.1 mrg gcn_address_register_p (rtx reg, machine_mode mode, bool strict) 1087 1.1 mrg { 1088 1.1 mrg if (GET_CODE (reg) == SUBREG) 1089 1.1 mrg reg = SUBREG_REG (reg); 1090 1.1 mrg 1091 1.1 mrg if (!REG_P (reg)) 1092 1.1 mrg return false; 1093 1.1 mrg 1094 1.1 mrg if (GET_MODE (reg) != mode) 1095 1.1 mrg return false; 1096 1.1 mrg 1097 1.1 mrg int regno = REGNO (reg); 1098 1.1 mrg 1099 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER) 1100 1.1 mrg { 1101 1.1 mrg if (!strict) 1102 1.1 mrg return true; 1103 1.1 mrg 1104 1.1 mrg if (!reg_renumber) 1105 1.1 mrg return false; 1106 1.1 mrg 1107 1.1 mrg regno = reg_renumber[regno]; 1108 1.1 mrg } 1109 1.1 mrg 1110 1.1 mrg return (SGPR_REGNO_P (regno) || regno == M0_REG 1111 1.1 mrg || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM); 1112 1.1 mrg } 1113 1.1 mrg 1114 1.1 mrg /* Return true is REG is a valid place to store a pointer, 1115 1.1 mrg for instructions that require a VGPR. */ 1116 1.1 mrg 1117 1.1 mrg static bool 1118 1.1 mrg gcn_vec_address_register_p (rtx reg, machine_mode mode, bool strict) 1119 1.1 mrg { 1120 1.1 mrg if (GET_CODE (reg) == SUBREG) 1121 1.1 mrg reg = SUBREG_REG (reg); 1122 1.1 mrg 1123 1.1 mrg if (!REG_P (reg)) 1124 1.1 mrg return false; 1125 1.1 mrg 1126 1.1 mrg if (GET_MODE (reg) != mode) 1127 1.1 mrg return false; 1128 1.1 mrg 1129 1.1 mrg int regno = REGNO (reg); 1130 1.1 mrg 1131 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER) 1132 1.1 mrg { 1133 1.1 mrg if (!strict) 1134 1.1 mrg return true; 1135 1.1 mrg 1136 1.1 mrg if (!reg_renumber) 1137 1.1 mrg return false; 1138 1.1 mrg 1139 1.1 mrg regno = reg_renumber[regno]; 1140 1.1 mrg } 1141 1.1 mrg 1142 1.1 mrg return VGPR_REGNO_P (regno); 1143 1.1 mrg } 1144 1.1 mrg 1145 1.1 mrg /* Return true if X would be valid inside a MEM using the Flat address 1146 1.1 mrg space. */ 1147 1.1 mrg 1148 1.1 mrg bool 1149 1.1 mrg gcn_flat_address_p (rtx x, machine_mode mode) 1150 1.1 mrg { 1151 1.1 mrg bool vec_mode = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 1152 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT); 1153 1.1 mrg 1154 1.1 mrg if (vec_mode && gcn_address_register_p (x, DImode, false)) 1155 1.1 mrg return true; 1156 1.1 mrg 1157 1.1 mrg if (!vec_mode && gcn_vec_address_register_p (x, DImode, false)) 1158 1.1 mrg return true; 1159 1.1 mrg 1160 1.1 mrg if (TARGET_GCN5_PLUS 1161 1.1 mrg && GET_CODE (x) == PLUS 1162 1.1 mrg && gcn_vec_address_register_p (XEXP (x, 0), DImode, false) 1163 1.1 mrg && CONST_INT_P (XEXP (x, 1))) 1164 1.1 mrg return true; 1165 1.1 mrg 1166 1.1 mrg return false; 1167 1.1 mrg } 1168 1.1 mrg 1169 1.1 mrg /* Return true if X would be valid inside a MEM using the Scalar Flat 1170 1.1 mrg address space. */ 1171 1.1 mrg 1172 1.1 mrg bool 1173 1.1 mrg gcn_scalar_flat_address_p (rtx x) 1174 1.1 mrg { 1175 1.1 mrg if (gcn_address_register_p (x, DImode, false)) 1176 1.1 mrg return true; 1177 1.1 mrg 1178 1.1 mrg if (GET_CODE (x) == PLUS 1179 1.1 mrg && gcn_address_register_p (XEXP (x, 0), DImode, false) 1180 1.1 mrg && CONST_INT_P (XEXP (x, 1))) 1181 1.1 mrg return true; 1182 1.1 mrg 1183 1.1 mrg return false; 1184 1.1 mrg } 1185 1.1 mrg 1186 1.1 mrg /* Return true if MEM X would be valid for the Scalar Flat address space. */ 1187 1.1 mrg 1188 1.1 mrg bool 1189 1.1 mrg gcn_scalar_flat_mem_p (rtx x) 1190 1.1 mrg { 1191 1.1 mrg if (!MEM_P (x)) 1192 1.1 mrg return false; 1193 1.1 mrg 1194 1.1 mrg if (GET_MODE_SIZE (GET_MODE (x)) < 4) 1195 1.1 mrg return false; 1196 1.1 mrg 1197 1.1 mrg return gcn_scalar_flat_address_p (XEXP (x, 0)); 1198 1.1 mrg } 1199 1.1 mrg 1200 1.1 mrg /* Return true if X would be valid inside a MEM using the LDS or GDS 1201 1.1 mrg address spaces. */ 1202 1.1 mrg 1203 1.1 mrg bool 1204 1.1 mrg gcn_ds_address_p (rtx x) 1205 1.1 mrg { 1206 1.1 mrg if (gcn_vec_address_register_p (x, SImode, false)) 1207 1.1 mrg return true; 1208 1.1 mrg 1209 1.1 mrg if (GET_CODE (x) == PLUS 1210 1.1 mrg && gcn_vec_address_register_p (XEXP (x, 0), SImode, false) 1211 1.1 mrg && CONST_INT_P (XEXP (x, 1))) 1212 1.1 mrg return true; 1213 1.1 mrg 1214 1.1 mrg return false; 1215 1.1 mrg } 1216 1.1 mrg 1217 1.1 mrg /* Return true if ADDR would be valid inside a MEM using the Global 1218 1.1 mrg address space. */ 1219 1.1 mrg 1220 1.1 mrg bool 1221 1.1 mrg gcn_global_address_p (rtx addr) 1222 1.1 mrg { 1223 1.1 mrg if (gcn_address_register_p (addr, DImode, false) 1224 1.1 mrg || gcn_vec_address_register_p (addr, DImode, false)) 1225 1.1 mrg return true; 1226 1.1 mrg 1227 1.1 mrg if (GET_CODE (addr) == PLUS) 1228 1.1 mrg { 1229 1.1 mrg rtx base = XEXP (addr, 0); 1230 1.1 mrg rtx offset = XEXP (addr, 1); 1231 1.1 mrg bool immediate_p = (CONST_INT_P (offset) 1232 1.1 mrg && INTVAL (offset) >= -(1 << 12) 1233 1.1 mrg && INTVAL (offset) < (1 << 12)); 1234 1.1 mrg 1235 1.1 mrg if ((gcn_address_register_p (base, DImode, false) 1236 1.1 mrg || gcn_vec_address_register_p (base, DImode, false)) 1237 1.1 mrg && immediate_p) 1238 1.1 mrg /* SGPR + CONST or VGPR + CONST */ 1239 1.1 mrg return true; 1240 1.1 mrg 1241 1.1 mrg if (gcn_address_register_p (base, DImode, false) 1242 1.1 mrg && gcn_vgpr_register_operand (offset, SImode)) 1243 1.1 mrg /* SPGR + VGPR */ 1244 1.1 mrg return true; 1245 1.1 mrg 1246 1.1 mrg if (GET_CODE (base) == PLUS 1247 1.1 mrg && gcn_address_register_p (XEXP (base, 0), DImode, false) 1248 1.1 mrg && gcn_vgpr_register_operand (XEXP (base, 1), SImode) 1249 1.1 mrg && immediate_p) 1250 1.1 mrg /* (SGPR + VGPR) + CONST */ 1251 1.1 mrg return true; 1252 1.1 mrg } 1253 1.1 mrg 1254 1.1 mrg return false; 1255 1.1 mrg } 1256 1.1 mrg 1257 1.1 mrg /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P. 1258 1.1 mrg 1259 1.1 mrg Recognizes RTL expressions that are valid memory addresses for an 1260 1.1 mrg instruction. The MODE argument is the machine mode for the MEM 1261 1.1 mrg expression that wants to use this address. 1262 1.1 mrg 1263 1.1 mrg It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 1264 1.1 mrg convert common non-canonical forms to canonical form so that they will 1265 1.1 mrg be recognized. */ 1266 1.1 mrg 1267 1.1 mrg static bool 1268 1.1 mrg gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict, 1269 1.1 mrg addr_space_t as) 1270 1.1 mrg { 1271 1.1 mrg /* All vector instructions need to work on addresses in registers. */ 1272 1.1 mrg if (!TARGET_GCN5_PLUS && (vgpr_vector_mode_p (mode) && !REG_P (x))) 1273 1.1 mrg return false; 1274 1.1 mrg 1275 1.1 mrg if (AS_SCALAR_FLAT_P (as)) 1276 1.1 mrg { 1277 1.1 mrg if (mode == QImode || mode == HImode) 1278 1.1 mrg return 0; 1279 1.1 mrg 1280 1.1 mrg switch (GET_CODE (x)) 1281 1.1 mrg { 1282 1.1 mrg case REG: 1283 1.1 mrg return gcn_address_register_p (x, DImode, strict); 1284 1.1 mrg /* Addresses are in the form BASE+OFFSET 1285 1.1 mrg OFFSET is either 20bit unsigned immediate, SGPR or M0. 1286 1.1 mrg Writes and atomics do not accept SGPR. */ 1287 1.1 mrg case PLUS: 1288 1.1 mrg { 1289 1.1 mrg rtx x0 = XEXP (x, 0); 1290 1.1 mrg rtx x1 = XEXP (x, 1); 1291 1.1 mrg if (!gcn_address_register_p (x0, DImode, strict)) 1292 1.1 mrg return false; 1293 1.1 mrg /* FIXME: This is disabled because of the mode mismatch between 1294 1.1 mrg SImode (for the address or m0 register) and the DImode PLUS. 1295 1.1 mrg We'll need a zero_extend or similar. 1296 1.1 mrg 1297 1.1 mrg if (gcn_m0_register_p (x1, SImode, strict) 1298 1.1 mrg || gcn_address_register_p (x1, SImode, strict)) 1299 1.1 mrg return true; 1300 1.1 mrg else*/ 1301 1.1 mrg if (GET_CODE (x1) == CONST_INT) 1302 1.1 mrg { 1303 1.1 mrg if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20) 1304 1.1 mrg /* The low bits of the offset are ignored, even when 1305 1.1 mrg they're meant to realign the pointer. */ 1306 1.1 mrg && !(INTVAL (x1) & 0x3)) 1307 1.1 mrg return true; 1308 1.1 mrg } 1309 1.1 mrg return false; 1310 1.1 mrg } 1311 1.1 mrg 1312 1.1 mrg default: 1313 1.1 mrg break; 1314 1.1 mrg } 1315 1.1 mrg } 1316 1.1 mrg else if (AS_SCRATCH_P (as)) 1317 1.1 mrg return gcn_address_register_p (x, SImode, strict); 1318 1.1 mrg else if (AS_FLAT_P (as) || AS_FLAT_SCRATCH_P (as)) 1319 1.1 mrg { 1320 1.1 mrg if (TARGET_GCN3 || GET_CODE (x) == REG) 1321 1.1 mrg return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT 1322 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) 1323 1.1 mrg ? gcn_address_register_p (x, DImode, strict) 1324 1.1 mrg : gcn_vec_address_register_p (x, DImode, strict)); 1325 1.1 mrg else 1326 1.1 mrg { 1327 1.1 mrg gcc_assert (TARGET_GCN5_PLUS); 1328 1.1 mrg 1329 1.1 mrg if (GET_CODE (x) == PLUS) 1330 1.1 mrg { 1331 1.1 mrg rtx x1 = XEXP (x, 1); 1332 1.1 mrg 1333 1.1 mrg if (VECTOR_MODE_P (mode) 1334 1.1 mrg ? !gcn_address_register_p (x, DImode, strict) 1335 1.1 mrg : !gcn_vec_address_register_p (x, DImode, strict)) 1336 1.1 mrg return false; 1337 1.1 mrg 1338 1.1 mrg if (GET_CODE (x1) == CONST_INT) 1339 1.1 mrg { 1340 1.1 mrg if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 12) 1341 1.1 mrg /* The low bits of the offset are ignored, even when 1342 1.1 mrg they're meant to realign the pointer. */ 1343 1.1 mrg && !(INTVAL (x1) & 0x3)) 1344 1.1 mrg return true; 1345 1.1 mrg } 1346 1.1 mrg } 1347 1.1 mrg return false; 1348 1.1 mrg } 1349 1.1 mrg } 1350 1.1 mrg else if (AS_GLOBAL_P (as)) 1351 1.1 mrg { 1352 1.1 mrg gcc_assert (TARGET_GCN5_PLUS); 1353 1.1 mrg 1354 1.1 mrg if (GET_CODE (x) == REG) 1355 1.1 mrg return (gcn_address_register_p (x, DImode, strict) 1356 1.1 mrg || (!VECTOR_MODE_P (mode) 1357 1.1 mrg && gcn_vec_address_register_p (x, DImode, strict))); 1358 1.1 mrg else if (GET_CODE (x) == PLUS) 1359 1.1 mrg { 1360 1.1 mrg rtx base = XEXP (x, 0); 1361 1.1 mrg rtx offset = XEXP (x, 1); 1362 1.1 mrg 1363 1.1 mrg bool immediate_p = (GET_CODE (offset) == CONST_INT 1364 1.1 mrg /* Signed 13-bit immediate. */ 1365 1.1 mrg && INTVAL (offset) >= -(1 << 12) 1366 1.1 mrg && INTVAL (offset) < (1 << 12) 1367 1.1 mrg /* The low bits of the offset are ignored, even 1368 1.1 mrg when they're meant to realign the pointer. */ 1369 1.1 mrg && !(INTVAL (offset) & 0x3)); 1370 1.1 mrg 1371 1.1 mrg if (!VECTOR_MODE_P (mode)) 1372 1.1 mrg { 1373 1.1 mrg if ((gcn_address_register_p (base, DImode, strict) 1374 1.1 mrg || gcn_vec_address_register_p (base, DImode, strict)) 1375 1.1 mrg && immediate_p) 1376 1.1 mrg /* SGPR + CONST or VGPR + CONST */ 1377 1.1 mrg return true; 1378 1.1 mrg 1379 1.1 mrg if (gcn_address_register_p (base, DImode, strict) 1380 1.1 mrg && gcn_vgpr_register_operand (offset, SImode)) 1381 1.1 mrg /* SGPR + VGPR */ 1382 1.1 mrg return true; 1383 1.1 mrg 1384 1.1 mrg if (GET_CODE (base) == PLUS 1385 1.1 mrg && gcn_address_register_p (XEXP (base, 0), DImode, strict) 1386 1.1 mrg && gcn_vgpr_register_operand (XEXP (base, 1), SImode) 1387 1.1 mrg && immediate_p) 1388 1.1 mrg /* (SGPR + VGPR) + CONST */ 1389 1.1 mrg return true; 1390 1.1 mrg } 1391 1.1 mrg else 1392 1.1 mrg { 1393 1.1 mrg if (gcn_address_register_p (base, DImode, strict) 1394 1.1 mrg && immediate_p) 1395 1.1 mrg /* SGPR + CONST */ 1396 1.1 mrg return true; 1397 1.1 mrg } 1398 1.1 mrg } 1399 1.1 mrg else 1400 1.1 mrg return false; 1401 1.1 mrg } 1402 1.1 mrg else if (AS_ANY_DS_P (as)) 1403 1.1 mrg switch (GET_CODE (x)) 1404 1.1 mrg { 1405 1.1 mrg case REG: 1406 1.1 mrg return (VECTOR_MODE_P (mode) 1407 1.1 mrg ? gcn_address_register_p (x, SImode, strict) 1408 1.1 mrg : gcn_vec_address_register_p (x, SImode, strict)); 1409 1.1 mrg /* Addresses are in the form BASE+OFFSET 1410 1.1 mrg OFFSET is either 20bit unsigned immediate, SGPR or M0. 1411 1.1 mrg Writes and atomics do not accept SGPR. */ 1412 1.1 mrg case PLUS: 1413 1.1 mrg { 1414 1.1 mrg rtx x0 = XEXP (x, 0); 1415 1.1 mrg rtx x1 = XEXP (x, 1); 1416 1.1 mrg if (!gcn_vec_address_register_p (x0, DImode, strict)) 1417 1.1 mrg return false; 1418 1.1 mrg if (GET_CODE (x1) == REG) 1419 1.1 mrg { 1420 1.1 mrg if (GET_CODE (x1) != REG 1421 1.1 mrg || (REGNO (x1) <= FIRST_PSEUDO_REGISTER 1422 1.1 mrg && !gcn_ssrc_register_operand (x1, DImode))) 1423 1.1 mrg return false; 1424 1.1 mrg } 1425 1.1 mrg else if (GET_CODE (x1) == CONST_VECTOR 1426 1.1 mrg && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT 1427 1.1 mrg && single_cst_vector_p (x1)) 1428 1.1 mrg { 1429 1.1 mrg x1 = CONST_VECTOR_ELT (x1, 0); 1430 1.1 mrg if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20)) 1431 1.1 mrg return true; 1432 1.1 mrg } 1433 1.1 mrg return false; 1434 1.1 mrg } 1435 1.1 mrg 1436 1.1 mrg default: 1437 1.1 mrg break; 1438 1.1 mrg } 1439 1.1 mrg else 1440 1.1 mrg gcc_unreachable (); 1441 1.1 mrg return false; 1442 1.1 mrg } 1443 1.1 mrg 1444 1.1 mrg /* Implement TARGET_ADDR_SPACE_POINTER_MODE. 1445 1.1 mrg 1446 1.1 mrg Return the appropriate mode for a named address pointer. */ 1447 1.1 mrg 1448 1.1 mrg static scalar_int_mode 1449 1.1 mrg gcn_addr_space_pointer_mode (addr_space_t addrspace) 1450 1.1 mrg { 1451 1.1 mrg switch (addrspace) 1452 1.1 mrg { 1453 1.1 mrg case ADDR_SPACE_SCRATCH: 1454 1.1 mrg case ADDR_SPACE_LDS: 1455 1.1 mrg case ADDR_SPACE_GDS: 1456 1.1 mrg return SImode; 1457 1.1 mrg case ADDR_SPACE_DEFAULT: 1458 1.1 mrg case ADDR_SPACE_FLAT: 1459 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH: 1460 1.1 mrg case ADDR_SPACE_SCALAR_FLAT: 1461 1.1 mrg return DImode; 1462 1.1 mrg default: 1463 1.1 mrg gcc_unreachable (); 1464 1.1 mrg } 1465 1.1 mrg } 1466 1.1 mrg 1467 1.1 mrg /* Implement TARGET_ADDR_SPACE_ADDRESS_MODE. 1468 1.1 mrg 1469 1.1 mrg Return the appropriate mode for a named address space address. */ 1470 1.1 mrg 1471 1.1 mrg static scalar_int_mode 1472 1.1 mrg gcn_addr_space_address_mode (addr_space_t addrspace) 1473 1.1 mrg { 1474 1.1 mrg return gcn_addr_space_pointer_mode (addrspace); 1475 1.1 mrg } 1476 1.1 mrg 1477 1.1 mrg /* Implement TARGET_ADDR_SPACE_SUBSET_P. 1478 1.1 mrg 1479 1.1 mrg Determine if one named address space is a subset of another. */ 1480 1.1 mrg 1481 1.1 mrg static bool 1482 1.1 mrg gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset) 1483 1.1 mrg { 1484 1.1 mrg if (subset == superset) 1485 1.1 mrg return true; 1486 1.1 mrg /* FIXME is this true? */ 1487 1.1 mrg if (AS_FLAT_P (superset) || AS_SCALAR_FLAT_P (superset)) 1488 1.1 mrg return true; 1489 1.1 mrg return false; 1490 1.1 mrg } 1491 1.1 mrg 1492 1.1 mrg /* Convert from one address space to another. */ 1493 1.1 mrg 1494 1.1 mrg static rtx 1495 1.1 mrg gcn_addr_space_convert (rtx op, tree from_type, tree to_type) 1496 1.1 mrg { 1497 1.1 mrg gcc_assert (POINTER_TYPE_P (from_type)); 1498 1.1 mrg gcc_assert (POINTER_TYPE_P (to_type)); 1499 1.1 mrg 1500 1.1 mrg addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (from_type)); 1501 1.1 mrg addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (to_type)); 1502 1.1 mrg 1503 1.1 mrg if (AS_LDS_P (as_from) && AS_FLAT_P (as_to)) 1504 1.1 mrg { 1505 1.1 mrg rtx queue = gen_rtx_REG (DImode, 1506 1.1 mrg cfun->machine->args.reg[QUEUE_PTR_ARG]); 1507 1.1 mrg rtx group_seg_aperture_hi = gen_rtx_MEM (SImode, 1508 1.1 mrg gen_rtx_PLUS (DImode, queue, 1509 1.1 mrg gen_int_mode (64, SImode))); 1510 1.1 mrg rtx tmp = gen_reg_rtx (DImode); 1511 1.1 mrg 1512 1.1 mrg emit_move_insn (gen_lowpart (SImode, tmp), op); 1513 1.1 mrg emit_move_insn (gen_highpart_mode (SImode, DImode, tmp), 1514 1.1 mrg group_seg_aperture_hi); 1515 1.1 mrg 1516 1.1 mrg return tmp; 1517 1.1 mrg } 1518 1.1 mrg else if (as_from == as_to) 1519 1.1 mrg return op; 1520 1.1 mrg else 1521 1.1 mrg gcc_unreachable (); 1522 1.1 mrg } 1523 1.1 mrg 1524 1.1 mrg /* Implement TARGET_ADDR_SPACE_DEBUG. 1525 1.1 mrg 1526 1.1 mrg Return the dwarf address space class for each hardware address space. */ 1527 1.1 mrg 1528 1.1 mrg static int 1529 1.1 mrg gcn_addr_space_debug (addr_space_t as) 1530 1.1 mrg { 1531 1.1 mrg switch (as) 1532 1.1 mrg { 1533 1.1 mrg case ADDR_SPACE_DEFAULT: 1534 1.1 mrg case ADDR_SPACE_FLAT: 1535 1.1 mrg case ADDR_SPACE_SCALAR_FLAT: 1536 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH: 1537 1.1 mrg return DW_ADDR_none; 1538 1.1 mrg case ADDR_SPACE_GLOBAL: 1539 1.1 mrg return 1; // DW_ADDR_LLVM_global 1540 1.1 mrg case ADDR_SPACE_LDS: 1541 1.1 mrg return 3; // DW_ADDR_LLVM_group 1542 1.1 mrg case ADDR_SPACE_SCRATCH: 1543 1.1 mrg return 4; // DW_ADDR_LLVM_private 1544 1.1 mrg case ADDR_SPACE_GDS: 1545 1.1 mrg return 0x8000; // DW_ADDR_AMDGPU_region 1546 1.1 mrg } 1547 1.1 mrg gcc_unreachable (); 1548 1.1 mrg } 1549 1.1 mrg 1550 1.1 mrg 1551 1.1 mrg /* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h 1552 1.1 mrg 1553 1.1 mrg Retun true if REGNO is OK for memory adressing. */ 1554 1.1 mrg 1555 1.1 mrg bool 1556 1.1 mrg gcn_regno_mode_code_ok_for_base_p (int regno, 1557 1.1 mrg machine_mode, addr_space_t as, int, int) 1558 1.1 mrg { 1559 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER) 1560 1.1 mrg { 1561 1.1 mrg if (reg_renumber) 1562 1.1 mrg regno = reg_renumber[regno]; 1563 1.1 mrg else 1564 1.1 mrg return true; 1565 1.1 mrg } 1566 1.1 mrg if (AS_FLAT_P (as)) 1567 1.1 mrg return (VGPR_REGNO_P (regno) 1568 1.1 mrg || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM); 1569 1.1 mrg else if (AS_SCALAR_FLAT_P (as)) 1570 1.1 mrg return (SGPR_REGNO_P (regno) 1571 1.1 mrg || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM); 1572 1.1 mrg else if (AS_GLOBAL_P (as)) 1573 1.1 mrg { 1574 1.1 mrg return (SGPR_REGNO_P (regno) 1575 1.1 mrg || VGPR_REGNO_P (regno) 1576 1.1 mrg || regno == ARG_POINTER_REGNUM 1577 1.1 mrg || regno == FRAME_POINTER_REGNUM); 1578 1.1 mrg } 1579 1.1 mrg else 1580 1.1 mrg /* For now. */ 1581 1.1 mrg return false; 1582 1.1 mrg } 1583 1.1 mrg 1584 1.1 mrg /* Implement MODE_CODE_BASE_REG_CLASS via gcn.h. 1585 1.1 mrg 1586 1.1 mrg Return a suitable register class for memory addressing. */ 1587 1.1 mrg 1588 1.1 mrg reg_class 1589 1.1 mrg gcn_mode_code_base_reg_class (machine_mode mode, addr_space_t as, int oc, 1590 1.1 mrg int ic) 1591 1.1 mrg { 1592 1.1 mrg switch (as) 1593 1.1 mrg { 1594 1.1 mrg case ADDR_SPACE_DEFAULT: 1595 1.1 mrg return gcn_mode_code_base_reg_class (mode, DEFAULT_ADDR_SPACE, oc, ic); 1596 1.1 mrg case ADDR_SPACE_SCALAR_FLAT: 1597 1.1 mrg case ADDR_SPACE_SCRATCH: 1598 1.1 mrg return SGPR_REGS; 1599 1.1 mrg break; 1600 1.1 mrg case ADDR_SPACE_FLAT: 1601 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH: 1602 1.1 mrg case ADDR_SPACE_LDS: 1603 1.1 mrg case ADDR_SPACE_GDS: 1604 1.1 mrg return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT 1605 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) 1606 1.1 mrg ? SGPR_REGS : VGPR_REGS); 1607 1.1 mrg case ADDR_SPACE_GLOBAL: 1608 1.1 mrg return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT 1609 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) 1610 1.1 mrg ? SGPR_REGS : ALL_GPR_REGS); 1611 1.1 mrg } 1612 1.1 mrg gcc_unreachable (); 1613 1.1 mrg } 1614 1.1 mrg 1615 1.1 mrg /* Implement REGNO_OK_FOR_INDEX_P via gcn.h. 1616 1.1 mrg 1617 1.1 mrg Return true if REGNO is OK for index of memory addressing. */ 1618 1.1 mrg 1619 1.1 mrg bool 1620 1.1 mrg regno_ok_for_index_p (int regno) 1621 1.1 mrg { 1622 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER) 1623 1.1 mrg { 1624 1.1 mrg if (reg_renumber) 1625 1.1 mrg regno = reg_renumber[regno]; 1626 1.1 mrg else 1627 1.1 mrg return true; 1628 1.1 mrg } 1629 1.1 mrg return regno == M0_REG || VGPR_REGNO_P (regno); 1630 1.1 mrg } 1631 1.1 mrg 1632 1.1 mrg /* Generate move which uses the exec flags. If EXEC is NULL, then it is 1633 1.1 mrg assumed that all lanes normally relevant to the mode of the move are 1634 1.1 mrg affected. If PREV is NULL, then a sensible default is supplied for 1635 1.1 mrg the inactive lanes. */ 1636 1.1 mrg 1637 1.1 mrg static rtx 1638 1.1 mrg gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL) 1639 1.1 mrg { 1640 1.1 mrg machine_mode mode = GET_MODE (op0); 1641 1.1 mrg 1642 1.1 mrg if (vgpr_vector_mode_p (mode)) 1643 1.1 mrg { 1644 1.1 mrg if (exec && exec != CONSTM1_RTX (DImode)) 1645 1.1 mrg { 1646 1.1 mrg if (!prev) 1647 1.1 mrg prev = op0; 1648 1.1 mrg } 1649 1.1 mrg else 1650 1.1 mrg { 1651 1.1 mrg if (!prev) 1652 1.1 mrg prev = gcn_gen_undef (mode); 1653 1.1 mrg exec = gcn_full_exec_reg (); 1654 1.1 mrg } 1655 1.1 mrg 1656 1.1 mrg rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec)); 1657 1.1 mrg 1658 1.1 mrg return gen_rtx_PARALLEL (VOIDmode, 1659 1.1 mrg gen_rtvec (2, set, 1660 1.1 mrg gen_rtx_CLOBBER (VOIDmode, 1661 1.1 mrg gen_rtx_SCRATCH (V64DImode)))); 1662 1.1 mrg } 1663 1.1 mrg 1664 1.1 mrg return (gen_rtx_PARALLEL 1665 1.1 mrg (VOIDmode, 1666 1.1 mrg gen_rtvec (2, gen_rtx_SET (op0, op1), 1667 1.1 mrg gen_rtx_USE (VOIDmode, 1668 1.1 mrg exec ? exec : gcn_scalar_exec ())))); 1669 1.1 mrg } 1670 1.1 mrg 1671 1.1 mrg /* Generate masked move. */ 1672 1.1 mrg 1673 1.1 mrg static rtx 1674 1.1 mrg gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL) 1675 1.1 mrg { 1676 1.1 mrg if (exec) 1677 1.1 mrg return (gen_rtx_SET (op0, 1678 1.1 mrg gen_rtx_VEC_MERGE (GET_MODE (op0), 1679 1.1 mrg gen_rtx_VEC_DUPLICATE (GET_MODE 1680 1.1 mrg (op0), op1), 1681 1.1 mrg op2, exec))); 1682 1.1 mrg else 1683 1.1 mrg return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1))); 1684 1.1 mrg } 1685 1.1 mrg 1686 1.1 mrg /* Expand vector init of OP0 by VEC. 1687 1.1 mrg Implements vec_init instruction pattern. */ 1688 1.1 mrg 1689 1.1 mrg void 1690 1.1 mrg gcn_expand_vector_init (rtx op0, rtx vec) 1691 1.1 mrg { 1692 1.1 mrg int64_t initialized_mask = 0; 1693 1.1 mrg int64_t curr_mask = 1; 1694 1.1 mrg machine_mode mode = GET_MODE (op0); 1695 1.1 mrg 1696 1.1 mrg rtx val = XVECEXP (vec, 0, 0); 1697 1.1 mrg 1698 1.1 mrg for (int i = 1; i < 64; i++) 1699 1.1 mrg if (rtx_equal_p (val, XVECEXP (vec, 0, i))) 1700 1.1 mrg curr_mask |= (int64_t) 1 << i; 1701 1.1 mrg 1702 1.1 mrg if (gcn_constant_p (val)) 1703 1.1 mrg emit_move_insn (op0, gcn_vec_constant (mode, val)); 1704 1.1 mrg else 1705 1.1 mrg { 1706 1.1 mrg val = force_reg (GET_MODE_INNER (mode), val); 1707 1.1 mrg emit_insn (gen_duplicate_load (op0, val)); 1708 1.1 mrg } 1709 1.1 mrg initialized_mask |= curr_mask; 1710 1.1 mrg for (int i = 1; i < 64; i++) 1711 1.1 mrg if (!(initialized_mask & ((int64_t) 1 << i))) 1712 1.1 mrg { 1713 1.1 mrg curr_mask = (int64_t) 1 << i; 1714 1.1 mrg rtx val = XVECEXP (vec, 0, i); 1715 1.1 mrg 1716 1.1 mrg for (int j = i + 1; j < 64; j++) 1717 1.1 mrg if (rtx_equal_p (val, XVECEXP (vec, 0, j))) 1718 1.1 mrg curr_mask |= (int64_t) 1 << j; 1719 1.1 mrg if (gcn_constant_p (val)) 1720 1.1 mrg emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val), 1721 1.1 mrg get_exec (curr_mask))); 1722 1.1 mrg else 1723 1.1 mrg { 1724 1.1 mrg val = force_reg (GET_MODE_INNER (mode), val); 1725 1.1 mrg emit_insn (gen_duplicate_load (op0, val, op0, 1726 1.1 mrg get_exec (curr_mask))); 1727 1.1 mrg } 1728 1.1 mrg initialized_mask |= curr_mask; 1729 1.1 mrg } 1730 1.1 mrg } 1731 1.1 mrg 1732 1.1 mrg /* Load vector constant where n-th lane contains BASE+n*VAL. */ 1733 1.1 mrg 1734 1.1 mrg static rtx 1735 1.1 mrg strided_constant (machine_mode mode, int base, int val) 1736 1.1 mrg { 1737 1.1 mrg rtx x = gen_reg_rtx (mode); 1738 1.1 mrg emit_move_insn (x, gcn_vec_constant (mode, base)); 1739 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32), 1740 1.1 mrg x, get_exec (0xffffffff00000000))); 1741 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16), 1742 1.1 mrg x, get_exec (0xffff0000ffff0000))); 1743 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8), 1744 1.1 mrg x, get_exec (0xff00ff00ff00ff00))); 1745 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4), 1746 1.1 mrg x, get_exec (0xf0f0f0f0f0f0f0f0))); 1747 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2), 1748 1.1 mrg x, get_exec (0xcccccccccccccccc))); 1749 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1), 1750 1.1 mrg x, get_exec (0xaaaaaaaaaaaaaaaa))); 1751 1.1 mrg return x; 1752 1.1 mrg } 1753 1.1 mrg 1754 1.1 mrg /* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS. */ 1755 1.1 mrg 1756 1.1 mrg static rtx 1757 1.1 mrg gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode, 1758 1.1 mrg addr_space_t as) 1759 1.1 mrg { 1760 1.1 mrg switch (as) 1761 1.1 mrg { 1762 1.1 mrg case ADDR_SPACE_DEFAULT: 1763 1.1 mrg return gcn_addr_space_legitimize_address (x, old, mode, 1764 1.1 mrg DEFAULT_ADDR_SPACE); 1765 1.1 mrg case ADDR_SPACE_SCALAR_FLAT: 1766 1.1 mrg case ADDR_SPACE_SCRATCH: 1767 1.1 mrg /* Instructions working on vectors need the address to be in 1768 1.1 mrg a register. */ 1769 1.1 mrg if (vgpr_vector_mode_p (mode)) 1770 1.1 mrg return force_reg (GET_MODE (x), x); 1771 1.1 mrg 1772 1.1 mrg return x; 1773 1.1 mrg case ADDR_SPACE_FLAT: 1774 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH: 1775 1.1 mrg case ADDR_SPACE_GLOBAL: 1776 1.1 mrg return TARGET_GCN3 ? force_reg (DImode, x) : x; 1777 1.1 mrg case ADDR_SPACE_LDS: 1778 1.1 mrg case ADDR_SPACE_GDS: 1779 1.1 mrg /* FIXME: LDS support offsets, handle them!. */ 1780 1.1 mrg if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode) 1781 1.1 mrg { 1782 1.1 mrg rtx addrs = gen_reg_rtx (V64SImode); 1783 1.1 mrg rtx base = force_reg (SImode, x); 1784 1.1 mrg rtx offsets = strided_constant (V64SImode, 0, 1785 1.1 mrg GET_MODE_UNIT_SIZE (mode)); 1786 1.1 mrg 1787 1.1 mrg emit_insn (gen_vec_duplicatev64si (addrs, base)); 1788 1.1 mrg emit_insn (gen_addv64si3 (addrs, offsets, addrs)); 1789 1.1 mrg return addrs; 1790 1.1 mrg } 1791 1.1 mrg return x; 1792 1.1 mrg } 1793 1.1 mrg gcc_unreachable (); 1794 1.1 mrg } 1795 1.1 mrg 1796 1.1 mrg /* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the 1797 1.1 mrg proper vector of stepped addresses. 1798 1.1 mrg 1799 1.1 mrg MEM will be a DImode address of a vector in an SGPR. 1800 1.1 mrg TMP will be a V64DImode VGPR pair or (scratch:V64DI). */ 1801 1.1 mrg 1802 1.1 mrg rtx 1803 1.1 mrg gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, 1804 1.1 mrg rtx tmp) 1805 1.1 mrg { 1806 1.1 mrg gcc_assert (MEM_P (mem)); 1807 1.1 mrg rtx mem_base = XEXP (mem, 0); 1808 1.1 mrg rtx mem_index = NULL_RTX; 1809 1.1 mrg 1810 1.1 mrg if (!TARGET_GCN5_PLUS) 1811 1.1 mrg { 1812 1.1 mrg /* gcn_addr_space_legitimize_address should have put the address in a 1813 1.1 mrg register. If not, it is too late to do anything about it. */ 1814 1.1 mrg gcc_assert (REG_P (mem_base)); 1815 1.1 mrg } 1816 1.1 mrg 1817 1.1 mrg if (GET_CODE (mem_base) == PLUS) 1818 1.1 mrg { 1819 1.1 mrg mem_index = XEXP (mem_base, 1); 1820 1.1 mrg mem_base = XEXP (mem_base, 0); 1821 1.1 mrg } 1822 1.1 mrg 1823 1.1 mrg /* RF and RM base registers for vector modes should be always an SGPR. */ 1824 1.1 mrg gcc_assert (SGPR_REGNO_P (REGNO (mem_base)) 1825 1.1 mrg || REGNO (mem_base) >= FIRST_PSEUDO_REGISTER); 1826 1.1 mrg 1827 1.1 mrg machine_mode inner = GET_MODE_INNER (mode); 1828 1.1 mrg int shift = exact_log2 (GET_MODE_SIZE (inner)); 1829 1.1 mrg rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); 1830 1.1 mrg rtx undef_v64si = gcn_gen_undef (V64SImode); 1831 1.1 mrg rtx new_base = NULL_RTX; 1832 1.1 mrg addr_space_t as = MEM_ADDR_SPACE (mem); 1833 1.1 mrg 1834 1.1 mrg rtx tmplo = (REG_P (tmp) 1835 1.1 mrg ? gcn_operand_part (V64DImode, tmp, 0) 1836 1.1 mrg : gen_reg_rtx (V64SImode)); 1837 1.1 mrg 1838 1.1 mrg /* tmplo[:] = ramp[:] << shift */ 1839 1.1 mrg if (exec) 1840 1.1 mrg emit_insn (gen_ashlv64si3_exec (tmplo, ramp, 1841 1.1 mrg gen_int_mode (shift, SImode), 1842 1.1 mrg undef_v64si, exec)); 1843 1.1 mrg else 1844 1.1 mrg emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode))); 1845 1.1 mrg 1846 1.1 mrg if (AS_FLAT_P (as)) 1847 1.1 mrg { 1848 1.1 mrg rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG); 1849 1.1 mrg 1850 1.1 mrg if (REG_P (tmp)) 1851 1.1 mrg { 1852 1.1 mrg rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0); 1853 1.1 mrg rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1); 1854 1.1 mrg rtx tmphi = gcn_operand_part (V64DImode, tmp, 1); 1855 1.1 mrg 1856 1.1 mrg /* tmphi[:] = mem_base_hi */ 1857 1.1 mrg if (exec) 1858 1.1 mrg emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi, 1859 1.1 mrg undef_v64si, exec)); 1860 1.1 mrg else 1861 1.1 mrg emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi)); 1862 1.1 mrg 1863 1.1 mrg /* tmp[:] += zext (mem_base) */ 1864 1.1 mrg if (exec) 1865 1.1 mrg { 1866 1.1 mrg emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo, 1867 1.1 mrg vcc, undef_v64si, exec)); 1868 1.1 mrg emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx, 1869 1.1 mrg vcc, vcc, undef_v64si, exec)); 1870 1.1 mrg } 1871 1.1 mrg else 1872 1.1 mrg emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc)); 1873 1.1 mrg } 1874 1.1 mrg else 1875 1.1 mrg { 1876 1.1 mrg tmp = gen_reg_rtx (V64DImode); 1877 1.1 mrg if (exec) 1878 1.1 mrg emit_insn (gen_addv64di3_vcc_zext_dup2_exec 1879 1.1 mrg (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode), 1880 1.1 mrg exec)); 1881 1.1 mrg else 1882 1.1 mrg emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc)); 1883 1.1 mrg } 1884 1.1 mrg 1885 1.1 mrg new_base = tmp; 1886 1.1 mrg } 1887 1.1 mrg else if (AS_ANY_DS_P (as)) 1888 1.1 mrg { 1889 1.1 mrg if (!exec) 1890 1.1 mrg emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base)); 1891 1.1 mrg else 1892 1.1 mrg emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base, 1893 1.1 mrg gcn_gen_undef (V64SImode), exec)); 1894 1.1 mrg new_base = tmplo; 1895 1.1 mrg } 1896 1.1 mrg else 1897 1.1 mrg { 1898 1.1 mrg mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base); 1899 1.1 mrg new_base = gen_rtx_PLUS (V64DImode, mem_base, 1900 1.1 mrg gen_rtx_SIGN_EXTEND (V64DImode, tmplo)); 1901 1.1 mrg } 1902 1.1 mrg 1903 1.1 mrg return gen_rtx_PLUS (GET_MODE (new_base), new_base, 1904 1.1 mrg gen_rtx_VEC_DUPLICATE (GET_MODE (new_base), 1905 1.1 mrg (mem_index ? mem_index 1906 1.1 mrg : const0_rtx))); 1907 1.1 mrg } 1908 1.1 mrg 1909 1.1 mrg /* Convert a BASE address, a vector of OFFSETS, and a SCALE, to addresses 1910 1.1 mrg suitable for the given address space. This is indented for use in 1911 1.1 mrg gather/scatter patterns. 1912 1.1 mrg 1913 1.1 mrg The offsets may be signed or unsigned, according to UNSIGNED_P. 1914 1.1 mrg If EXEC is set then _exec patterns will be used, otherwise plain. 1915 1.1 mrg 1916 1.1 mrg Return values. 1917 1.1 mrg ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses. 1918 1.1 mrg ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */ 1919 1.1 mrg 1920 1.1 mrg rtx 1921 1.1 mrg gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale, 1922 1.1 mrg bool unsigned_p, rtx exec) 1923 1.1 mrg { 1924 1.1 mrg rtx tmpsi = gen_reg_rtx (V64SImode); 1925 1.1 mrg rtx tmpdi = gen_reg_rtx (V64DImode); 1926 1.1 mrg rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL; 1927 1.1 mrg rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL; 1928 1.1 mrg 1929 1.1 mrg if (CONST_INT_P (scale) 1930 1.1 mrg && INTVAL (scale) > 0 1931 1.1 mrg && exact_log2 (INTVAL (scale)) >= 0) 1932 1.1 mrg emit_insn (gen_ashlv64si3 (tmpsi, offsets, 1933 1.1 mrg GEN_INT (exact_log2 (INTVAL (scale))))); 1934 1.1 mrg else 1935 1.1 mrg (exec 1936 1.1 mrg ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi, 1937 1.1 mrg exec)) 1938 1.1 mrg : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale))); 1939 1.1 mrg 1940 1.1 mrg /* "Global" instructions do not support negative register offsets. */ 1941 1.1 mrg if (as == ADDR_SPACE_FLAT || !unsigned_p) 1942 1.1 mrg { 1943 1.1 mrg if (unsigned_p) 1944 1.1 mrg (exec 1945 1.1 mrg ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base, 1946 1.1 mrg undefdi, exec)) 1947 1.1 mrg : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base))); 1948 1.1 mrg else 1949 1.1 mrg (exec 1950 1.1 mrg ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base, 1951 1.1 mrg undefdi, exec)) 1952 1.1 mrg : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base))); 1953 1.1 mrg return tmpdi; 1954 1.1 mrg } 1955 1.1 mrg else if (as == ADDR_SPACE_GLOBAL) 1956 1.1 mrg return tmpsi; 1957 1.1 mrg 1958 1.1 mrg gcc_unreachable (); 1959 1.1 mrg } 1960 1.1 mrg 1961 1.1 mrg /* Return true if move from OP0 to OP1 is known to be executed in vector 1962 1.1 mrg unit. */ 1963 1.1 mrg 1964 1.1 mrg bool 1965 1.1 mrg gcn_vgpr_move_p (rtx op0, rtx op1) 1966 1.1 mrg { 1967 1.1 mrg if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0))) 1968 1.1 mrg return true; 1969 1.1 mrg if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1))) 1970 1.1 mrg return true; 1971 1.1 mrg return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0))) 1972 1.1 mrg || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1))) 1973 1.1 mrg || vgpr_vector_mode_p (GET_MODE (op0))); 1974 1.1 mrg } 1975 1.1 mrg 1976 1.1 mrg /* Return true if move from OP0 to OP1 is known to be executed in scalar 1977 1.1 mrg unit. Used in the machine description. */ 1978 1.1 mrg 1979 1.1 mrg bool 1980 1.1 mrg gcn_sgpr_move_p (rtx op0, rtx op1) 1981 1.1 mrg { 1982 1.1 mrg if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0))) 1983 1.1 mrg return true; 1984 1.1 mrg if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1))) 1985 1.1 mrg return true; 1986 1.1 mrg if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER 1987 1.1 mrg || VGPR_REGNO_P (REGNO (op0))) 1988 1.1 mrg return false; 1989 1.1 mrg if (REG_P (op1) 1990 1.1 mrg && REGNO (op1) < FIRST_PSEUDO_REGISTER 1991 1.1 mrg && !VGPR_REGNO_P (REGNO (op1))) 1992 1.1 mrg return true; 1993 1.1 mrg return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode); 1994 1.1 mrg } 1995 1.1 mrg 1996 1.1 mrg /* Implement TARGET_SECONDARY_RELOAD. 1997 1.1 mrg 1998 1.1 mrg The address space determines which registers can be used for loads and 1999 1.1 mrg stores. */ 2000 1.1 mrg 2001 1.1 mrg static reg_class_t 2002 1.1 mrg gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass, 2003 1.1 mrg machine_mode reload_mode, secondary_reload_info *sri) 2004 1.1 mrg { 2005 1.1 mrg reg_class_t result = NO_REGS; 2006 1.1 mrg bool spilled_pseudo = 2007 1.1 mrg (REG_P (x) || GET_CODE (x) == SUBREG) && true_regnum (x) == -1; 2008 1.1 mrg 2009 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 2010 1.1 mrg { 2011 1.1 mrg fprintf (dump_file, "gcn_secondary_reload: "); 2012 1.1 mrg dump_value_slim (dump_file, x, 1); 2013 1.1 mrg fprintf (dump_file, " %s %s:%s", (in_p ? "->" : "<-"), 2014 1.1 mrg reg_class_names[rclass], GET_MODE_NAME (reload_mode)); 2015 1.1 mrg if (REG_P (x) || GET_CODE (x) == SUBREG) 2016 1.1 mrg fprintf (dump_file, " (true regnum: %d \"%s\")", true_regnum (x), 2017 1.1 mrg (true_regnum (x) >= 0 2018 1.1 mrg && true_regnum (x) < FIRST_PSEUDO_REGISTER 2019 1.1 mrg ? reg_names[true_regnum (x)] 2020 1.1 mrg : (spilled_pseudo ? "stack spill" : "??"))); 2021 1.1 mrg fprintf (dump_file, "\n"); 2022 1.1 mrg } 2023 1.1 mrg 2024 1.1 mrg /* Some callers don't use or initialize icode. */ 2025 1.1 mrg sri->icode = CODE_FOR_nothing; 2026 1.1 mrg 2027 1.1 mrg if (MEM_P (x) || spilled_pseudo) 2028 1.1 mrg { 2029 1.1 mrg addr_space_t as = DEFAULT_ADDR_SPACE; 2030 1.1 mrg 2031 1.1 mrg /* If we have a spilled pseudo, we can't find the address space 2032 1.1 mrg directly, but we know it's in ADDR_SPACE_FLAT space for GCN3 or 2033 1.1 mrg ADDR_SPACE_GLOBAL for GCN5. */ 2034 1.1 mrg if (MEM_P (x)) 2035 1.1 mrg as = MEM_ADDR_SPACE (x); 2036 1.1 mrg 2037 1.1 mrg if (as == ADDR_SPACE_DEFAULT) 2038 1.1 mrg as = DEFAULT_ADDR_SPACE; 2039 1.1 mrg 2040 1.1 mrg switch (as) 2041 1.1 mrg { 2042 1.1 mrg case ADDR_SPACE_SCALAR_FLAT: 2043 1.1 mrg result = 2044 1.1 mrg ((!MEM_P (x) || rclass == SGPR_REGS) ? NO_REGS : SGPR_REGS); 2045 1.1 mrg break; 2046 1.1 mrg case ADDR_SPACE_FLAT: 2047 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH: 2048 1.1 mrg case ADDR_SPACE_GLOBAL: 2049 1.1 mrg if (GET_MODE_CLASS (reload_mode) == MODE_VECTOR_INT 2050 1.1 mrg || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT) 2051 1.1 mrg { 2052 1.1 mrg if (in_p) 2053 1.1 mrg switch (reload_mode) 2054 1.1 mrg { 2055 1.1 mrg case E_V64SImode: 2056 1.1 mrg sri->icode = CODE_FOR_reload_inv64si; 2057 1.1 mrg break; 2058 1.1 mrg case E_V64SFmode: 2059 1.1 mrg sri->icode = CODE_FOR_reload_inv64sf; 2060 1.1 mrg break; 2061 1.1 mrg case E_V64HImode: 2062 1.1 mrg sri->icode = CODE_FOR_reload_inv64hi; 2063 1.1 mrg break; 2064 1.1 mrg case E_V64HFmode: 2065 1.1 mrg sri->icode = CODE_FOR_reload_inv64hf; 2066 1.1 mrg break; 2067 1.1 mrg case E_V64QImode: 2068 1.1 mrg sri->icode = CODE_FOR_reload_inv64qi; 2069 1.1 mrg break; 2070 1.1 mrg case E_V64DImode: 2071 1.1 mrg sri->icode = CODE_FOR_reload_inv64di; 2072 1.1 mrg break; 2073 1.1 mrg case E_V64DFmode: 2074 1.1 mrg sri->icode = CODE_FOR_reload_inv64df; 2075 1.1 mrg break; 2076 1.1 mrg default: 2077 1.1 mrg gcc_unreachable (); 2078 1.1 mrg } 2079 1.1 mrg else 2080 1.1 mrg switch (reload_mode) 2081 1.1 mrg { 2082 1.1 mrg case E_V64SImode: 2083 1.1 mrg sri->icode = CODE_FOR_reload_outv64si; 2084 1.1 mrg break; 2085 1.1 mrg case E_V64SFmode: 2086 1.1 mrg sri->icode = CODE_FOR_reload_outv64sf; 2087 1.1 mrg break; 2088 1.1 mrg case E_V64HImode: 2089 1.1 mrg sri->icode = CODE_FOR_reload_outv64hi; 2090 1.1 mrg break; 2091 1.1 mrg case E_V64HFmode: 2092 1.1 mrg sri->icode = CODE_FOR_reload_outv64hf; 2093 1.1 mrg break; 2094 1.1 mrg case E_V64QImode: 2095 1.1 mrg sri->icode = CODE_FOR_reload_outv64qi; 2096 1.1 mrg break; 2097 1.1 mrg case E_V64DImode: 2098 1.1 mrg sri->icode = CODE_FOR_reload_outv64di; 2099 1.1 mrg break; 2100 1.1 mrg case E_V64DFmode: 2101 1.1 mrg sri->icode = CODE_FOR_reload_outv64df; 2102 1.1 mrg break; 2103 1.1 mrg default: 2104 1.1 mrg gcc_unreachable (); 2105 1.1 mrg } 2106 1.1 mrg break; 2107 1.1 mrg } 2108 1.1 mrg /* Fallthrough. */ 2109 1.1 mrg case ADDR_SPACE_LDS: 2110 1.1 mrg case ADDR_SPACE_GDS: 2111 1.1 mrg case ADDR_SPACE_SCRATCH: 2112 1.1 mrg result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS); 2113 1.1 mrg break; 2114 1.1 mrg } 2115 1.1 mrg } 2116 1.1 mrg 2117 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 2118 1.1 mrg fprintf (dump_file, " <= %s (icode: %s)\n", reg_class_names[result], 2119 1.1 mrg get_insn_name (sri->icode)); 2120 1.1 mrg 2121 1.1 mrg return result; 2122 1.1 mrg } 2123 1.1 mrg 2124 1.1 mrg /* Update register usage after having seen the compiler flags and kernel 2125 1.1 mrg attributes. We typically want to fix registers that contain values 2126 1.1 mrg set by the HSA runtime. */ 2127 1.1 mrg 2128 1.1 mrg static void 2129 1.1 mrg gcn_conditional_register_usage (void) 2130 1.1 mrg { 2131 1.1 mrg if (!cfun || !cfun->machine) 2132 1.1 mrg return; 2133 1.1 mrg 2134 1.1 mrg if (cfun->machine->normal_function) 2135 1.1 mrg { 2136 1.1 mrg /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */ 2137 1.1 mrg for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT); 2138 1.1 mrg i <= LAST_SGPR_REG; i++) 2139 1.1 mrg fixed_regs[i] = 1, call_used_regs[i] = 1; 2140 1.1 mrg 2141 1.1 mrg for (int i = VGPR_REGNO (MAX_NORMAL_VGPR_COUNT); 2142 1.1 mrg i <= LAST_VGPR_REG; i++) 2143 1.1 mrg fixed_regs[i] = 1, call_used_regs[i] = 1; 2144 1.1 mrg 2145 1.1 mrg return; 2146 1.1 mrg } 2147 1.1 mrg 2148 1.1 mrg /* If the set of requested args is the default set, nothing more needs to 2149 1.1 mrg be done. */ 2150 1.1 mrg if (cfun->machine->args.requested == default_requested_args) 2151 1.1 mrg return; 2152 1.1 mrg 2153 1.1 mrg /* Requesting a set of args different from the default violates the ABI. */ 2154 1.1 mrg if (!leaf_function_p ()) 2155 1.1 mrg warning (0, "A non-default set of initial values has been requested, " 2156 1.1 mrg "which violates the ABI"); 2157 1.1 mrg 2158 1.1 mrg for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++) 2159 1.1 mrg fixed_regs[i] = 0; 2160 1.1 mrg 2161 1.1 mrg /* Fix the runtime argument register containing values that may be 2162 1.1 mrg needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be 2163 1.1 mrg needed after the prologue so there's no need to fix them. */ 2164 1.1 mrg if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0) 2165 1.1 mrg fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1; 2166 1.1 mrg if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0) 2167 1.1 mrg { 2168 1.1 mrg /* The upper 32-bits of the 64-bit descriptor are not used, so allow 2169 1.1 mrg the containing registers to be used for other purposes. */ 2170 1.1 mrg fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1; 2171 1.1 mrg fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1; 2172 1.1 mrg } 2173 1.1 mrg if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0) 2174 1.1 mrg { 2175 1.1 mrg fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG]] = 1; 2176 1.1 mrg fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] + 1] = 1; 2177 1.1 mrg } 2178 1.1 mrg if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0) 2179 1.1 mrg { 2180 1.1 mrg fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG]] = 1; 2181 1.1 mrg fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG] + 1] = 1; 2182 1.1 mrg } 2183 1.1 mrg if (cfun->machine->args.reg[WORKGROUP_ID_X_ARG] >= 0) 2184 1.1 mrg fixed_regs[cfun->machine->args.reg[WORKGROUP_ID_X_ARG]] = 1; 2185 1.1 mrg if (cfun->machine->args.reg[WORK_ITEM_ID_X_ARG] >= 0) 2186 1.1 mrg fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_X_ARG]] = 1; 2187 1.1 mrg if (cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG] >= 0) 2188 1.1 mrg fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG]] = 1; 2189 1.1 mrg if (cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG] >= 0) 2190 1.1 mrg fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1; 2191 1.1 mrg } 2192 1.1 mrg 2193 1.1 mrg /* Determine if a load or store is valid, according to the register classes 2194 1.1 mrg and address space. Used primarily by the machine description to decide 2195 1.1 mrg when to split a move into two steps. */ 2196 1.1 mrg 2197 1.1 mrg bool 2198 1.1 mrg gcn_valid_move_p (machine_mode mode, rtx dest, rtx src) 2199 1.1 mrg { 2200 1.1 mrg if (!MEM_P (dest) && !MEM_P (src)) 2201 1.1 mrg return true; 2202 1.1 mrg 2203 1.1 mrg if (MEM_P (dest) 2204 1.1 mrg && AS_FLAT_P (MEM_ADDR_SPACE (dest)) 2205 1.1 mrg && (gcn_flat_address_p (XEXP (dest, 0), mode) 2206 1.1 mrg || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF 2207 1.1 mrg || GET_CODE (XEXP (dest, 0)) == LABEL_REF) 2208 1.1 mrg && gcn_vgpr_register_operand (src, mode)) 2209 1.1 mrg return true; 2210 1.1 mrg else if (MEM_P (src) 2211 1.1 mrg && AS_FLAT_P (MEM_ADDR_SPACE (src)) 2212 1.1 mrg && (gcn_flat_address_p (XEXP (src, 0), mode) 2213 1.1 mrg || GET_CODE (XEXP (src, 0)) == SYMBOL_REF 2214 1.1 mrg || GET_CODE (XEXP (src, 0)) == LABEL_REF) 2215 1.1 mrg && gcn_vgpr_register_operand (dest, mode)) 2216 1.1 mrg return true; 2217 1.1 mrg 2218 1.1 mrg if (MEM_P (dest) 2219 1.1 mrg && AS_GLOBAL_P (MEM_ADDR_SPACE (dest)) 2220 1.1 mrg && (gcn_global_address_p (XEXP (dest, 0)) 2221 1.1 mrg || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF 2222 1.1 mrg || GET_CODE (XEXP (dest, 0)) == LABEL_REF) 2223 1.1 mrg && gcn_vgpr_register_operand (src, mode)) 2224 1.1 mrg return true; 2225 1.1 mrg else if (MEM_P (src) 2226 1.1 mrg && AS_GLOBAL_P (MEM_ADDR_SPACE (src)) 2227 1.1 mrg && (gcn_global_address_p (XEXP (src, 0)) 2228 1.1 mrg || GET_CODE (XEXP (src, 0)) == SYMBOL_REF 2229 1.1 mrg || GET_CODE (XEXP (src, 0)) == LABEL_REF) 2230 1.1 mrg && gcn_vgpr_register_operand (dest, mode)) 2231 1.1 mrg return true; 2232 1.1 mrg 2233 1.1 mrg if (MEM_P (dest) 2234 1.1 mrg && MEM_ADDR_SPACE (dest) == ADDR_SPACE_SCALAR_FLAT 2235 1.1 mrg && (gcn_scalar_flat_address_p (XEXP (dest, 0)) 2236 1.1 mrg || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF 2237 1.1 mrg || GET_CODE (XEXP (dest, 0)) == LABEL_REF) 2238 1.1 mrg && gcn_ssrc_register_operand (src, mode)) 2239 1.1 mrg return true; 2240 1.1 mrg else if (MEM_P (src) 2241 1.1 mrg && MEM_ADDR_SPACE (src) == ADDR_SPACE_SCALAR_FLAT 2242 1.1 mrg && (gcn_scalar_flat_address_p (XEXP (src, 0)) 2243 1.1 mrg || GET_CODE (XEXP (src, 0)) == SYMBOL_REF 2244 1.1 mrg || GET_CODE (XEXP (src, 0)) == LABEL_REF) 2245 1.1 mrg && gcn_sdst_register_operand (dest, mode)) 2246 1.1 mrg return true; 2247 1.1 mrg 2248 1.1 mrg if (MEM_P (dest) 2249 1.1 mrg && AS_ANY_DS_P (MEM_ADDR_SPACE (dest)) 2250 1.1 mrg && gcn_ds_address_p (XEXP (dest, 0)) 2251 1.1 mrg && gcn_vgpr_register_operand (src, mode)) 2252 1.1 mrg return true; 2253 1.1 mrg else if (MEM_P (src) 2254 1.1 mrg && AS_ANY_DS_P (MEM_ADDR_SPACE (src)) 2255 1.1 mrg && gcn_ds_address_p (XEXP (src, 0)) 2256 1.1 mrg && gcn_vgpr_register_operand (dest, mode)) 2257 1.1 mrg return true; 2258 1.1 mrg 2259 1.1 mrg return false; 2260 1.1 mrg } 2261 1.1 mrg 2262 1.1 mrg /* }}} */ 2263 1.1 mrg /* {{{ Functions and ABI. */ 2264 1.1 mrg 2265 1.1 mrg /* Implement TARGET_FUNCTION_VALUE. 2266 1.1 mrg 2267 1.1 mrg Define how to find the value returned by a function. 2268 1.1 mrg The register location is always the same, but the mode depends on 2269 1.1 mrg VALTYPE. */ 2270 1.1 mrg 2271 1.1 mrg static rtx 2272 1.1 mrg gcn_function_value (const_tree valtype, const_tree, bool) 2273 1.1 mrg { 2274 1.1 mrg machine_mode mode = TYPE_MODE (valtype); 2275 1.1 mrg 2276 1.1 mrg if (INTEGRAL_TYPE_P (valtype) 2277 1.1 mrg && GET_MODE_CLASS (mode) == MODE_INT 2278 1.1 mrg && GET_MODE_SIZE (mode) < 4) 2279 1.1 mrg mode = SImode; 2280 1.1 mrg 2281 1.1 mrg return gen_rtx_REG (mode, SGPR_REGNO (RETURN_VALUE_REG)); 2282 1.1 mrg } 2283 1.1 mrg 2284 1.1 mrg /* Implement TARGET_FUNCTION_VALUE_REGNO_P. 2285 1.1 mrg 2286 1.1 mrg Return true if N is a possible register number for the function return 2287 1.1 mrg value. */ 2288 1.1 mrg 2289 1.1 mrg static bool 2290 1.1 mrg gcn_function_value_regno_p (const unsigned int n) 2291 1.1 mrg { 2292 1.1 mrg return n == RETURN_VALUE_REG; 2293 1.1 mrg } 2294 1.1 mrg 2295 1.1 mrg /* Calculate the number of registers required to hold function argument 2296 1.1 mrg ARG. */ 2297 1.1 mrg 2298 1.1 mrg static int 2299 1.1 mrg num_arg_regs (const function_arg_info &arg) 2300 1.1 mrg { 2301 1.1 mrg if (targetm.calls.must_pass_in_stack (arg)) 2302 1.1 mrg return 0; 2303 1.1 mrg 2304 1.1 mrg int size = arg.promoted_size_in_bytes (); 2305 1.1 mrg return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2306 1.1 mrg } 2307 1.1 mrg 2308 1.1 mrg /* Implement TARGET_STRICT_ARGUMENT_NAMING. 2309 1.1 mrg 2310 1.1 mrg Return true if the location where a function argument is passed 2311 1.1 mrg depends on whether or not it is a named argument 2312 1.1 mrg 2313 1.1 mrg For gcn, we know how to handle functions declared as stdarg: by 2314 1.1 mrg passing an extra pointer to the unnamed arguments. However, the 2315 1.1 mrg Fortran frontend can produce a different situation, where a 2316 1.1 mrg function pointer is declared with no arguments, but the actual 2317 1.1 mrg function and calls to it take more arguments. In that case, we 2318 1.1 mrg want to ensure the call matches the definition of the function. */ 2319 1.1 mrg 2320 1.1 mrg static bool 2321 1.1 mrg gcn_strict_argument_naming (cumulative_args_t cum_v) 2322 1.1 mrg { 2323 1.1 mrg CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 2324 1.1 mrg 2325 1.1 mrg return cum->fntype == NULL_TREE || stdarg_p (cum->fntype); 2326 1.1 mrg } 2327 1.1 mrg 2328 1.1 mrg /* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED. 2329 1.1 mrg 2330 1.1 mrg See comment on gcn_strict_argument_naming. */ 2331 1.1 mrg 2332 1.1 mrg static bool 2333 1.1 mrg gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v) 2334 1.1 mrg { 2335 1.1 mrg return !gcn_strict_argument_naming (cum_v); 2336 1.1 mrg } 2337 1.1 mrg 2338 1.1 mrg /* Implement TARGET_FUNCTION_ARG. 2339 1.1 mrg 2340 1.1 mrg Return an RTX indicating whether a function argument is passed in a register 2341 1.1 mrg and if so, which register. */ 2342 1.1 mrg 2343 1.1 mrg static rtx 2344 1.1 mrg gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) 2345 1.1 mrg { 2346 1.1 mrg CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 2347 1.1 mrg if (cum->normal_function) 2348 1.1 mrg { 2349 1.1 mrg if (!arg.named || arg.end_marker_p ()) 2350 1.1 mrg return 0; 2351 1.1 mrg 2352 1.1 mrg if (targetm.calls.must_pass_in_stack (arg)) 2353 1.1 mrg return 0; 2354 1.1 mrg 2355 1.1 mrg /* Vector parameters are not supported yet. */ 2356 1.1 mrg if (VECTOR_MODE_P (arg.mode)) 2357 1.1 mrg return 0; 2358 1.1 mrg 2359 1.1 mrg int reg_num = FIRST_PARM_REG + cum->num; 2360 1.1 mrg int num_regs = num_arg_regs (arg); 2361 1.1 mrg if (num_regs > 0) 2362 1.1 mrg while (reg_num % num_regs != 0) 2363 1.1 mrg reg_num++; 2364 1.1 mrg if (reg_num + num_regs <= FIRST_PARM_REG + NUM_PARM_REGS) 2365 1.1 mrg return gen_rtx_REG (arg.mode, reg_num); 2366 1.1 mrg } 2367 1.1 mrg else 2368 1.1 mrg { 2369 1.1 mrg if (cum->num >= cum->args.nargs) 2370 1.1 mrg { 2371 1.1 mrg cum->offset = (cum->offset + TYPE_ALIGN (arg.type) / 8 - 1) 2372 1.1 mrg & -(TYPE_ALIGN (arg.type) / 8); 2373 1.1 mrg cfun->machine->kernarg_segment_alignment 2374 1.1 mrg = MAX ((unsigned) cfun->machine->kernarg_segment_alignment, 2375 1.1 mrg TYPE_ALIGN (arg.type) / 8); 2376 1.1 mrg rtx addr = gen_rtx_REG (DImode, 2377 1.1 mrg cum->args.reg[KERNARG_SEGMENT_PTR_ARG]); 2378 1.1 mrg if (cum->offset) 2379 1.1 mrg addr = gen_rtx_PLUS (DImode, addr, 2380 1.1 mrg gen_int_mode (cum->offset, DImode)); 2381 1.1 mrg rtx mem = gen_rtx_MEM (arg.mode, addr); 2382 1.1 mrg set_mem_attributes (mem, arg.type, 1); 2383 1.1 mrg set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT); 2384 1.1 mrg MEM_READONLY_P (mem) = 1; 2385 1.1 mrg return mem; 2386 1.1 mrg } 2387 1.1 mrg 2388 1.1 mrg int a = cum->args.order[cum->num]; 2389 1.1 mrg if (arg.mode != gcn_kernel_arg_types[a].mode) 2390 1.1 mrg { 2391 1.1 mrg error ("wrong type of argument %s", gcn_kernel_arg_types[a].name); 2392 1.1 mrg return 0; 2393 1.1 mrg } 2394 1.1 mrg return gen_rtx_REG ((machine_mode) gcn_kernel_arg_types[a].mode, 2395 1.1 mrg cum->args.reg[a]); 2396 1.1 mrg } 2397 1.1 mrg return 0; 2398 1.1 mrg } 2399 1.1 mrg 2400 1.1 mrg /* Implement TARGET_FUNCTION_ARG_ADVANCE. 2401 1.1 mrg 2402 1.1 mrg Updates the summarizer variable pointed to by CUM_V to advance past an 2403 1.1 mrg argument in the argument list. */ 2404 1.1 mrg 2405 1.1 mrg static void 2406 1.1 mrg gcn_function_arg_advance (cumulative_args_t cum_v, 2407 1.1 mrg const function_arg_info &arg) 2408 1.1 mrg { 2409 1.1 mrg CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 2410 1.1 mrg 2411 1.1 mrg if (cum->normal_function) 2412 1.1 mrg { 2413 1.1 mrg if (!arg.named) 2414 1.1 mrg return; 2415 1.1 mrg 2416 1.1 mrg int num_regs = num_arg_regs (arg); 2417 1.1 mrg if (num_regs > 0) 2418 1.1 mrg while ((FIRST_PARM_REG + cum->num) % num_regs != 0) 2419 1.1 mrg cum->num++; 2420 1.1 mrg cum->num += num_regs; 2421 1.1 mrg } 2422 1.1 mrg else 2423 1.1 mrg { 2424 1.1 mrg if (cum->num < cum->args.nargs) 2425 1.1 mrg cum->num++; 2426 1.1 mrg else 2427 1.1 mrg { 2428 1.1 mrg cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (arg.type)); 2429 1.1 mrg cfun->machine->kernarg_segment_byte_size = cum->offset; 2430 1.1 mrg } 2431 1.1 mrg } 2432 1.1 mrg } 2433 1.1 mrg 2434 1.1 mrg /* Implement TARGET_ARG_PARTIAL_BYTES. 2435 1.1 mrg 2436 1.1 mrg Returns the number of bytes at the beginning of an argument that must be put 2437 1.1 mrg in registers. The value must be zero for arguments that are passed entirely 2438 1.1 mrg in registers or that are entirely pushed on the stack. */ 2439 1.1 mrg 2440 1.1 mrg static int 2441 1.1 mrg gcn_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg) 2442 1.1 mrg { 2443 1.1 mrg CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 2444 1.1 mrg 2445 1.1 mrg if (!arg.named) 2446 1.1 mrg return 0; 2447 1.1 mrg 2448 1.1 mrg if (targetm.calls.must_pass_in_stack (arg)) 2449 1.1 mrg return 0; 2450 1.1 mrg 2451 1.1 mrg if (cum->num >= NUM_PARM_REGS) 2452 1.1 mrg return 0; 2453 1.1 mrg 2454 1.1 mrg /* If the argument fits entirely in registers, return 0. */ 2455 1.1 mrg if (cum->num + num_arg_regs (arg) <= NUM_PARM_REGS) 2456 1.1 mrg return 0; 2457 1.1 mrg 2458 1.1 mrg return (NUM_PARM_REGS - cum->num) * UNITS_PER_WORD; 2459 1.1 mrg } 2460 1.1 mrg 2461 1.1 mrg /* A normal function which takes a pointer argument may be passed a pointer to 2462 1.1 mrg LDS space (via a high-bits-set aperture), and that only works with FLAT 2463 1.1 mrg addressing, not GLOBAL. Force FLAT addressing if the function has an 2464 1.1 mrg incoming pointer parameter. NOTE: This is a heuristic that works in the 2465 1.1 mrg offloading case, but in general, a function might read global pointer 2466 1.1 mrg variables, etc. that may refer to LDS space or other special memory areas 2467 1.1 mrg not supported by GLOBAL instructions, and then this argument check would not 2468 1.1 mrg suffice. */ 2469 1.1 mrg 2470 1.1 mrg static void 2471 1.1 mrg gcn_detect_incoming_pointer_arg (tree fndecl) 2472 1.1 mrg { 2473 1.1 mrg gcc_assert (cfun && cfun->machine); 2474 1.1 mrg 2475 1.1 mrg for (tree arg = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); 2476 1.1 mrg arg; 2477 1.1 mrg arg = TREE_CHAIN (arg)) 2478 1.1 mrg if (POINTER_TYPE_P (TREE_VALUE (arg))) 2479 1.1 mrg cfun->machine->use_flat_addressing = true; 2480 1.1 mrg } 2481 1.1 mrg 2482 1.1 mrg /* Implement INIT_CUMULATIVE_ARGS, via gcn.h. 2483 1.1 mrg 2484 1.1 mrg Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function 2485 1.1 mrg whose data type is FNTYPE. For a library call, FNTYPE is 0. */ 2486 1.1 mrg 2487 1.1 mrg void 2488 1.1 mrg gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ , 2489 1.1 mrg tree fntype /* tree ptr for function decl */ , 2490 1.1 mrg rtx libname /* SYMBOL_REF of library name or 0 */ , 2491 1.1 mrg tree fndecl, int caller) 2492 1.1 mrg { 2493 1.1 mrg memset (cum, 0, sizeof (*cum)); 2494 1.1 mrg cum->fntype = fntype; 2495 1.1 mrg if (libname) 2496 1.1 mrg { 2497 1.1 mrg gcc_assert (cfun && cfun->machine); 2498 1.1 mrg cum->normal_function = true; 2499 1.1 mrg if (!caller) 2500 1.1 mrg { 2501 1.1 mrg cfun->machine->normal_function = true; 2502 1.1 mrg gcn_detect_incoming_pointer_arg (fndecl); 2503 1.1 mrg } 2504 1.1 mrg return; 2505 1.1 mrg } 2506 1.1 mrg tree attr = NULL; 2507 1.1 mrg if (fndecl) 2508 1.1 mrg attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl)); 2509 1.1 mrg if (fndecl && !attr) 2510 1.1 mrg attr = lookup_attribute ("amdgpu_hsa_kernel", 2511 1.1 mrg TYPE_ATTRIBUTES (TREE_TYPE (fndecl))); 2512 1.1 mrg if (!attr && fntype) 2513 1.1 mrg attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype)); 2514 1.1 mrg /* Handle main () as kernel, so we can run testsuite. 2515 1.1 mrg Handle OpenACC kernels similarly to main. */ 2516 1.1 mrg if (!attr && !caller && fndecl 2517 1.1 mrg && (MAIN_NAME_P (DECL_NAME (fndecl)) 2518 1.1 mrg || lookup_attribute ("omp target entrypoint", 2519 1.1 mrg DECL_ATTRIBUTES (fndecl)) != NULL_TREE)) 2520 1.1 mrg gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE); 2521 1.1 mrg else 2522 1.1 mrg { 2523 1.1 mrg if (!attr || caller) 2524 1.1 mrg { 2525 1.1 mrg gcc_assert (cfun && cfun->machine); 2526 1.1 mrg cum->normal_function = true; 2527 1.1 mrg if (!caller) 2528 1.1 mrg cfun->machine->normal_function = true; 2529 1.1 mrg } 2530 1.1 mrg gcn_parse_amdgpu_hsa_kernel_attribute 2531 1.1 mrg (&cum->args, attr ? TREE_VALUE (attr) : NULL_TREE); 2532 1.1 mrg } 2533 1.1 mrg cfun->machine->args = cum->args; 2534 1.1 mrg if (!caller && cfun->machine->normal_function) 2535 1.1 mrg gcn_detect_incoming_pointer_arg (fndecl); 2536 1.1 mrg 2537 1.1 mrg reinit_regs (); 2538 1.1 mrg } 2539 1.1 mrg 2540 1.1 mrg static bool 2541 1.1 mrg gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype)) 2542 1.1 mrg { 2543 1.1 mrg machine_mode mode = TYPE_MODE (type); 2544 1.1 mrg HOST_WIDE_INT size = int_size_in_bytes (type); 2545 1.1 mrg 2546 1.1 mrg if (AGGREGATE_TYPE_P (type)) 2547 1.1 mrg return true; 2548 1.1 mrg 2549 1.1 mrg /* Vector return values are not supported yet. */ 2550 1.1 mrg if (VECTOR_TYPE_P (type)) 2551 1.1 mrg return true; 2552 1.1 mrg 2553 1.1 mrg if (mode == BLKmode) 2554 1.1 mrg return true; 2555 1.1 mrg 2556 1.1 mrg if (size > 2 * UNITS_PER_WORD) 2557 1.1 mrg return true; 2558 1.1 mrg 2559 1.1 mrg return false; 2560 1.1 mrg } 2561 1.1 mrg 2562 1.1 mrg /* Implement TARGET_PROMOTE_FUNCTION_MODE. 2563 1.1 mrg 2564 1.1 mrg Return the mode to use for outgoing function arguments. */ 2565 1.1 mrg 2566 1.1 mrg machine_mode 2567 1.1 mrg gcn_promote_function_mode (const_tree ARG_UNUSED (type), machine_mode mode, 2568 1.1 mrg int *ARG_UNUSED (punsignedp), 2569 1.1 mrg const_tree ARG_UNUSED (funtype), 2570 1.1 mrg int ARG_UNUSED (for_return)) 2571 1.1 mrg { 2572 1.1 mrg if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4) 2573 1.1 mrg return SImode; 2574 1.1 mrg 2575 1.1 mrg return mode; 2576 1.1 mrg } 2577 1.1 mrg 2578 1.1 mrg /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. 2579 1.1 mrg 2580 1.1 mrg Derived from hppa_gimplify_va_arg_expr. The generic routine doesn't handle 2581 1.1 mrg ARGS_GROW_DOWNWARDS. */ 2582 1.1 mrg 2583 1.1 mrg static tree 2584 1.1 mrg gcn_gimplify_va_arg_expr (tree valist, tree type, 2585 1.1 mrg gimple_seq *ARG_UNUSED (pre_p), 2586 1.1 mrg gimple_seq *ARG_UNUSED (post_p)) 2587 1.1 mrg { 2588 1.1 mrg tree ptr = build_pointer_type (type); 2589 1.1 mrg tree valist_type; 2590 1.1 mrg tree t, u; 2591 1.1 mrg bool indirect; 2592 1.1 mrg 2593 1.1 mrg indirect = pass_va_arg_by_reference (type); 2594 1.1 mrg if (indirect) 2595 1.1 mrg { 2596 1.1 mrg type = ptr; 2597 1.1 mrg ptr = build_pointer_type (type); 2598 1.1 mrg } 2599 1.1 mrg valist_type = TREE_TYPE (valist); 2600 1.1 mrg 2601 1.1 mrg /* Args grow down. Not handled by generic routines. */ 2602 1.1 mrg 2603 1.1 mrg u = fold_convert (sizetype, size_in_bytes (type)); 2604 1.1 mrg u = fold_build1 (NEGATE_EXPR, sizetype, u); 2605 1.1 mrg t = fold_build_pointer_plus (valist, u); 2606 1.1 mrg 2607 1.1 mrg /* Align to 8 byte boundary. */ 2608 1.1 mrg 2609 1.1 mrg u = build_int_cst (TREE_TYPE (t), -8); 2610 1.1 mrg t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u); 2611 1.1 mrg t = fold_convert (valist_type, t); 2612 1.1 mrg 2613 1.1 mrg t = build2 (MODIFY_EXPR, valist_type, valist, t); 2614 1.1 mrg 2615 1.1 mrg t = fold_convert (ptr, t); 2616 1.1 mrg t = build_va_arg_indirect_ref (t); 2617 1.1 mrg 2618 1.1 mrg if (indirect) 2619 1.1 mrg t = build_va_arg_indirect_ref (t); 2620 1.1 mrg 2621 1.1 mrg return t; 2622 1.1 mrg } 2623 1.1 mrg 2624 1.1 mrg /* Return 1 if TRAIT NAME is present in the OpenMP context's 2625 1.1 mrg device trait set, return 0 if not present in any OpenMP context in the 2626 1.1 mrg whole translation unit, or -1 if not present in the current OpenMP context 2627 1.1 mrg but might be present in another OpenMP context in the same TU. */ 2628 1.1 mrg 2629 1.1 mrg int 2630 1.1 mrg gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait, 2631 1.1 mrg const char *name) 2632 1.1 mrg { 2633 1.1 mrg switch (trait) 2634 1.1 mrg { 2635 1.1 mrg case omp_device_kind: 2636 1.1 mrg return strcmp (name, "gpu") == 0; 2637 1.1 mrg case omp_device_arch: 2638 1.1 mrg return strcmp (name, "gcn") == 0; 2639 1.1 mrg case omp_device_isa: 2640 1.1 mrg if (strcmp (name, "fiji") == 0) 2641 1.1 mrg return gcn_arch == PROCESSOR_FIJI; 2642 1.1 mrg if (strcmp (name, "gfx900") == 0) 2643 1.1 mrg return gcn_arch == PROCESSOR_VEGA10; 2644 1.1 mrg if (strcmp (name, "gfx906") == 0) 2645 1.1 mrg return gcn_arch == PROCESSOR_VEGA20; 2646 1.1 mrg if (strcmp (name, "gfx908") == 0) 2647 1.1 mrg return gcn_arch == PROCESSOR_GFX908; 2648 1.1 mrg return 0; 2649 1.1 mrg default: 2650 1.1 mrg gcc_unreachable (); 2651 1.1 mrg } 2652 1.1 mrg } 2653 1.1 mrg 2654 1.1 mrg /* Calculate stack offsets needed to create prologues and epilogues. */ 2655 1.1 mrg 2656 1.1 mrg static struct machine_function * 2657 1.1 mrg gcn_compute_frame_offsets (void) 2658 1.1 mrg { 2659 1.1 mrg machine_function *offsets = cfun->machine; 2660 1.1 mrg 2661 1.1 mrg if (reload_completed) 2662 1.1 mrg return offsets; 2663 1.1 mrg 2664 1.1 mrg offsets->need_frame_pointer = frame_pointer_needed; 2665 1.1 mrg 2666 1.1 mrg offsets->outgoing_args_size = crtl->outgoing_args_size; 2667 1.1 mrg offsets->pretend_size = crtl->args.pretend_args_size; 2668 1.1 mrg 2669 1.1 mrg offsets->local_vars = get_frame_size (); 2670 1.1 mrg 2671 1.1 mrg offsets->lr_needs_saving = (!leaf_function_p () 2672 1.1 mrg || df_regs_ever_live_p (LR_REGNUM) 2673 1.1 mrg || df_regs_ever_live_p (LR_REGNUM + 1)); 2674 1.1 mrg 2675 1.1 mrg offsets->callee_saves = offsets->lr_needs_saving ? 8 : 0; 2676 1.1 mrg 2677 1.1 mrg for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 2678 1.1 mrg if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno)) 2679 1.1 mrg || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM 2680 1.1 mrg && frame_pointer_needed)) 2681 1.1 mrg offsets->callee_saves += (VGPR_REGNO_P (regno) ? 256 : 4); 2682 1.1 mrg 2683 1.1 mrg /* Round up to 64-bit boundary to maintain stack alignment. */ 2684 1.1 mrg offsets->callee_saves = (offsets->callee_saves + 7) & ~7; 2685 1.1 mrg 2686 1.1 mrg return offsets; 2687 1.1 mrg } 2688 1.1 mrg 2689 1.1 mrg /* Insert code into the prologue or epilogue to store or load any 2690 1.1 mrg callee-save register to/from the stack. 2691 1.1 mrg 2692 1.1 mrg Helper function for gcn_expand_prologue and gcn_expand_epilogue. */ 2693 1.1 mrg 2694 1.1 mrg static void 2695 1.1 mrg move_callee_saved_registers (rtx sp, machine_function *offsets, 2696 1.1 mrg bool prologue) 2697 1.1 mrg { 2698 1.1 mrg int regno, offset, saved_scalars; 2699 1.1 mrg rtx exec = gen_rtx_REG (DImode, EXEC_REG); 2700 1.1 mrg rtx vcc = gen_rtx_REG (DImode, VCC_LO_REG); 2701 1.1 mrg rtx offreg = gen_rtx_REG (SImode, SGPR_REGNO (22)); 2702 1.1 mrg rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE); 2703 1.1 mrg HOST_WIDE_INT exec_set = 0; 2704 1.1 mrg int offreg_set = 0; 2705 1.1 mrg auto_vec<int> saved_sgprs; 2706 1.1 mrg 2707 1.1 mrg start_sequence (); 2708 1.1 mrg 2709 1.1 mrg /* Move scalars into two vector registers. */ 2710 1.1 mrg for (regno = 0, saved_scalars = 0; regno < FIRST_VGPR_REG; regno++) 2711 1.1 mrg if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno)) 2712 1.1 mrg || ((regno & ~1) == LINK_REGNUM && offsets->lr_needs_saving) 2713 1.1 mrg || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM 2714 1.1 mrg && offsets->need_frame_pointer)) 2715 1.1 mrg { 2716 1.1 mrg rtx reg = gen_rtx_REG (SImode, regno); 2717 1.1 mrg rtx vreg = gen_rtx_REG (V64SImode, 2718 1.1 mrg VGPR_REGNO (6 + (saved_scalars / 64))); 2719 1.1 mrg int lane = saved_scalars % 64; 2720 1.1 mrg 2721 1.1 mrg if (prologue) 2722 1.1 mrg { 2723 1.1 mrg emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane))); 2724 1.1 mrg saved_sgprs.safe_push (regno); 2725 1.1 mrg } 2726 1.1 mrg else 2727 1.1 mrg emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane))); 2728 1.1 mrg 2729 1.1 mrg saved_scalars++; 2730 1.1 mrg } 2731 1.1 mrg 2732 1.1 mrg rtx move_scalars = get_insns (); 2733 1.1 mrg end_sequence (); 2734 1.1 mrg start_sequence (); 2735 1.1 mrg 2736 1.1 mrg /* Ensure that all vector lanes are moved. */ 2737 1.1 mrg exec_set = -1; 2738 1.1 mrg emit_move_insn (exec, GEN_INT (exec_set)); 2739 1.1 mrg 2740 1.1 mrg /* Set up a vector stack pointer. */ 2741 1.1 mrg rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); 2742 1.1 mrg rtx _0_4_8_12 = gen_rtx_REG (V64SImode, VGPR_REGNO (3)); 2743 1.1 mrg emit_insn (gen_ashlv64si3_exec (_0_4_8_12, _0_1_2_3, GEN_INT (2), 2744 1.1 mrg gcn_gen_undef (V64SImode), exec)); 2745 1.1 mrg rtx vsp = gen_rtx_REG (V64DImode, VGPR_REGNO (4)); 2746 1.1 mrg emit_insn (gen_vec_duplicatev64di_exec (vsp, sp, gcn_gen_undef (V64DImode), 2747 1.1 mrg exec)); 2748 1.1 mrg emit_insn (gen_addv64si3_vcc_exec (gcn_operand_part (V64SImode, vsp, 0), 2749 1.1 mrg gcn_operand_part (V64SImode, vsp, 0), 2750 1.1 mrg _0_4_8_12, vcc, gcn_gen_undef (V64SImode), 2751 1.1 mrg exec)); 2752 1.1 mrg emit_insn (gen_addcv64si3_exec (gcn_operand_part (V64SImode, vsp, 1), 2753 1.1 mrg gcn_operand_part (V64SImode, vsp, 1), 2754 1.1 mrg const0_rtx, vcc, vcc, 2755 1.1 mrg gcn_gen_undef (V64SImode), exec)); 2756 1.1 mrg 2757 1.1 mrg /* Move vectors. */ 2758 1.1 mrg for (regno = FIRST_VGPR_REG, offset = 0; 2759 1.1 mrg regno < FIRST_PSEUDO_REGISTER; regno++) 2760 1.1 mrg if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno)) 2761 1.1 mrg || (regno == VGPR_REGNO (6) && saved_scalars > 0) 2762 1.1 mrg || (regno == VGPR_REGNO (7) && saved_scalars > 63)) 2763 1.1 mrg { 2764 1.1 mrg rtx reg = gen_rtx_REG (V64SImode, regno); 2765 1.1 mrg int size = 256; 2766 1.1 mrg 2767 1.1 mrg if (regno == VGPR_REGNO (6) && saved_scalars < 64) 2768 1.1 mrg size = saved_scalars * 4; 2769 1.1 mrg else if (regno == VGPR_REGNO (7) && saved_scalars < 128) 2770 1.1 mrg size = (saved_scalars - 64) * 4; 2771 1.1 mrg 2772 1.1 mrg if (size != 256 || exec_set != -1) 2773 1.1 mrg { 2774 1.1 mrg exec_set = ((unsigned HOST_WIDE_INT) 1 << (size / 4)) - 1; 2775 1.1 mrg emit_move_insn (exec, gen_int_mode (exec_set, DImode)); 2776 1.1 mrg } 2777 1.1 mrg 2778 1.1 mrg if (prologue) 2779 1.1 mrg { 2780 1.1 mrg rtx insn = emit_insn (gen_scatterv64si_insn_1offset_exec 2781 1.1 mrg (vsp, const0_rtx, reg, as, const0_rtx, 2782 1.1 mrg exec)); 2783 1.1 mrg 2784 1.1 mrg /* Add CFI metadata. */ 2785 1.1 mrg rtx note; 2786 1.1 mrg if (regno == VGPR_REGNO (6) || regno == VGPR_REGNO (7)) 2787 1.1 mrg { 2788 1.1 mrg int start = (regno == VGPR_REGNO (7) ? 64 : 0); 2789 1.1 mrg int count = MIN (saved_scalars - start, 64); 2790 1.1 mrg int add_lr = (regno == VGPR_REGNO (6) 2791 1.1 mrg && offsets->lr_needs_saving); 2792 1.1 mrg int lrdest = -1; 2793 1.1 mrg rtvec seq = rtvec_alloc (count + add_lr); 2794 1.1 mrg 2795 1.1 mrg /* Add an REG_FRAME_RELATED_EXPR entry for each scalar 2796 1.1 mrg register that was saved in this batch. */ 2797 1.1 mrg for (int idx = 0; idx < count; idx++) 2798 1.1 mrg { 2799 1.1 mrg int stackaddr = offset + idx * 4; 2800 1.1 mrg rtx dest = gen_rtx_MEM (SImode, 2801 1.1 mrg gen_rtx_PLUS 2802 1.1 mrg (DImode, sp, 2803 1.1 mrg GEN_INT (stackaddr))); 2804 1.1 mrg rtx src = gen_rtx_REG (SImode, saved_sgprs[start + idx]); 2805 1.1 mrg rtx set = gen_rtx_SET (dest, src); 2806 1.1 mrg RTX_FRAME_RELATED_P (set) = 1; 2807 1.1 mrg RTVEC_ELT (seq, idx) = set; 2808 1.1 mrg 2809 1.1 mrg if (saved_sgprs[start + idx] == LINK_REGNUM) 2810 1.1 mrg lrdest = stackaddr; 2811 1.1 mrg } 2812 1.1 mrg 2813 1.1 mrg /* Add an additional expression for DWARF_LINK_REGISTER if 2814 1.1 mrg LINK_REGNUM was saved. */ 2815 1.1 mrg if (lrdest != -1) 2816 1.1 mrg { 2817 1.1 mrg rtx dest = gen_rtx_MEM (DImode, 2818 1.1 mrg gen_rtx_PLUS 2819 1.1 mrg (DImode, sp, 2820 1.1 mrg GEN_INT (lrdest))); 2821 1.1 mrg rtx src = gen_rtx_REG (DImode, DWARF_LINK_REGISTER); 2822 1.1 mrg rtx set = gen_rtx_SET (dest, src); 2823 1.1 mrg RTX_FRAME_RELATED_P (set) = 1; 2824 1.1 mrg RTVEC_ELT (seq, count) = set; 2825 1.1 mrg } 2826 1.1 mrg 2827 1.1 mrg note = gen_rtx_SEQUENCE (VOIDmode, seq); 2828 1.1 mrg } 2829 1.1 mrg else 2830 1.1 mrg { 2831 1.1 mrg rtx dest = gen_rtx_MEM (V64SImode, 2832 1.1 mrg gen_rtx_PLUS (DImode, sp, 2833 1.1 mrg GEN_INT (offset))); 2834 1.1 mrg rtx src = gen_rtx_REG (V64SImode, regno); 2835 1.1 mrg note = gen_rtx_SET (dest, src); 2836 1.1 mrg } 2837 1.1 mrg RTX_FRAME_RELATED_P (insn) = 1; 2838 1.1 mrg add_reg_note (insn, REG_FRAME_RELATED_EXPR, note); 2839 1.1 mrg } 2840 1.1 mrg else 2841 1.1 mrg emit_insn (gen_gatherv64si_insn_1offset_exec 2842 1.1 mrg (reg, vsp, const0_rtx, as, const0_rtx, 2843 1.1 mrg gcn_gen_undef (V64SImode), exec)); 2844 1.1 mrg 2845 1.1 mrg /* Move our VSP to the next stack entry. */ 2846 1.1 mrg if (offreg_set != size) 2847 1.1 mrg { 2848 1.1 mrg offreg_set = size; 2849 1.1 mrg emit_move_insn (offreg, GEN_INT (size)); 2850 1.1 mrg } 2851 1.1 mrg if (exec_set != -1) 2852 1.1 mrg { 2853 1.1 mrg exec_set = -1; 2854 1.1 mrg emit_move_insn (exec, GEN_INT (exec_set)); 2855 1.1 mrg } 2856 1.1 mrg emit_insn (gen_addv64si3_vcc_dup_exec 2857 1.1 mrg (gcn_operand_part (V64SImode, vsp, 0), 2858 1.1 mrg offreg, gcn_operand_part (V64SImode, vsp, 0), 2859 1.1 mrg vcc, gcn_gen_undef (V64SImode), exec)); 2860 1.1 mrg emit_insn (gen_addcv64si3_exec 2861 1.1 mrg (gcn_operand_part (V64SImode, vsp, 1), 2862 1.1 mrg gcn_operand_part (V64SImode, vsp, 1), 2863 1.1 mrg const0_rtx, vcc, vcc, gcn_gen_undef (V64SImode), exec)); 2864 1.1 mrg 2865 1.1 mrg offset += size; 2866 1.1 mrg } 2867 1.1 mrg 2868 1.1 mrg rtx move_vectors = get_insns (); 2869 1.1 mrg end_sequence (); 2870 1.1 mrg 2871 1.1 mrg if (prologue) 2872 1.1 mrg { 2873 1.1 mrg emit_insn (move_scalars); 2874 1.1 mrg emit_insn (move_vectors); 2875 1.1 mrg } 2876 1.1 mrg else 2877 1.1 mrg { 2878 1.1 mrg emit_insn (move_vectors); 2879 1.1 mrg emit_insn (move_scalars); 2880 1.1 mrg } 2881 1.1 mrg } 2882 1.1 mrg 2883 1.1 mrg /* Generate prologue. Called from gen_prologue during pro_and_epilogue pass. 2884 1.1 mrg 2885 1.1 mrg For a non-kernel function, the stack layout looks like this (interim), 2886 1.1 mrg growing *upwards*: 2887 1.1 mrg 2888 1.1 mrg hi | + ... 2889 1.1 mrg |__________________| <-- current SP 2890 1.1 mrg | outgoing args | 2891 1.1 mrg |__________________| 2892 1.1 mrg | (alloca space) | 2893 1.1 mrg |__________________| 2894 1.1 mrg | local vars | 2895 1.1 mrg |__________________| <-- FP/hard FP 2896 1.1 mrg | callee-save regs | 2897 1.1 mrg |__________________| <-- soft arg pointer 2898 1.1 mrg | pretend args | 2899 1.1 mrg |__________________| <-- incoming SP 2900 1.1 mrg | incoming args | 2901 1.1 mrg lo |..................| 2902 1.1 mrg 2903 1.1 mrg This implies arguments (beyond the first N in registers) must grow 2904 1.1 mrg downwards (as, apparently, PA has them do). 2905 1.1 mrg 2906 1.1 mrg For a kernel function we have the simpler: 2907 1.1 mrg 2908 1.1 mrg hi | + ... 2909 1.1 mrg |__________________| <-- current SP 2910 1.1 mrg | outgoing args | 2911 1.1 mrg |__________________| 2912 1.1 mrg | (alloca space) | 2913 1.1 mrg |__________________| 2914 1.1 mrg | local vars | 2915 1.1 mrg lo |__________________| <-- FP/hard FP 2916 1.1 mrg 2917 1.1 mrg */ 2918 1.1 mrg 2919 1.1 mrg void 2920 1.1 mrg gcn_expand_prologue () 2921 1.1 mrg { 2922 1.1 mrg machine_function *offsets = gcn_compute_frame_offsets (); 2923 1.1 mrg 2924 1.1 mrg if (!cfun || !cfun->machine || cfun->machine->normal_function) 2925 1.1 mrg { 2926 1.1 mrg rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 2927 1.1 mrg rtx sp_hi = gcn_operand_part (Pmode, sp, 1); 2928 1.1 mrg rtx sp_lo = gcn_operand_part (Pmode, sp, 0); 2929 1.1 mrg rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM); 2930 1.1 mrg rtx fp_hi = gcn_operand_part (Pmode, fp, 1); 2931 1.1 mrg rtx fp_lo = gcn_operand_part (Pmode, fp, 0); 2932 1.1 mrg 2933 1.1 mrg start_sequence (); 2934 1.1 mrg 2935 1.1 mrg if (offsets->pretend_size > 0) 2936 1.1 mrg { 2937 1.1 mrg /* FIXME: Do the actual saving of register pretend args to the stack. 2938 1.1 mrg Register order needs consideration. */ 2939 1.1 mrg } 2940 1.1 mrg 2941 1.1 mrg /* Save callee-save regs. */ 2942 1.1 mrg move_callee_saved_registers (sp, offsets, true); 2943 1.1 mrg 2944 1.1 mrg HOST_WIDE_INT sp_adjust = offsets->pretend_size 2945 1.1 mrg + offsets->callee_saves 2946 1.1 mrg + offsets->local_vars + offsets->outgoing_args_size; 2947 1.1 mrg if (sp_adjust > 0) 2948 1.1 mrg { 2949 1.1 mrg /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so 2950 1.1 mrg we use split add explictly, and specify the DImode add in 2951 1.1 mrg the note. */ 2952 1.1 mrg rtx scc = gen_rtx_REG (BImode, SCC_REG); 2953 1.1 mrg rtx adjustment = gen_int_mode (sp_adjust, SImode); 2954 1.1 mrg rtx insn = emit_insn (gen_addsi3_scalar_carry (sp_lo, sp_lo, 2955 1.1 mrg adjustment, scc)); 2956 1.1 mrg if (!offsets->need_frame_pointer) 2957 1.1 mrg { 2958 1.1 mrg RTX_FRAME_RELATED_P (insn) = 1; 2959 1.1 mrg add_reg_note (insn, REG_FRAME_RELATED_EXPR, 2960 1.1 mrg gen_rtx_SET (sp, 2961 1.1 mrg gen_rtx_PLUS (DImode, sp, 2962 1.1 mrg adjustment))); 2963 1.1 mrg } 2964 1.1 mrg emit_insn (gen_addcsi3_scalar_zero (sp_hi, sp_hi, scc)); 2965 1.1 mrg } 2966 1.1 mrg 2967 1.1 mrg if (offsets->need_frame_pointer) 2968 1.1 mrg { 2969 1.1 mrg /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so 2970 1.1 mrg we use split add explictly, and specify the DImode add in 2971 1.1 mrg the note. */ 2972 1.1 mrg rtx scc = gen_rtx_REG (BImode, SCC_REG); 2973 1.1 mrg int fp_adjust = -(offsets->local_vars + offsets->outgoing_args_size); 2974 1.1 mrg rtx adjustment = gen_int_mode (fp_adjust, SImode); 2975 1.1 mrg rtx insn = emit_insn (gen_addsi3_scalar_carry(fp_lo, sp_lo, 2976 1.1 mrg adjustment, scc)); 2977 1.1 mrg emit_insn (gen_addcsi3_scalar (fp_hi, sp_hi, 2978 1.1 mrg (fp_adjust < 0 ? GEN_INT (-1) 2979 1.1 mrg : const0_rtx), 2980 1.1 mrg scc, scc)); 2981 1.1 mrg 2982 1.1 mrg /* Set the CFA to the entry stack address, as an offset from the 2983 1.1 mrg frame pointer. This is preferred because the frame pointer is 2984 1.1 mrg saved in each frame, whereas the stack pointer is not. */ 2985 1.1 mrg RTX_FRAME_RELATED_P (insn) = 1; 2986 1.1 mrg add_reg_note (insn, REG_CFA_DEF_CFA, 2987 1.1 mrg gen_rtx_PLUS (DImode, fp, 2988 1.1 mrg GEN_INT (-(offsets->pretend_size 2989 1.1 mrg + offsets->callee_saves)))); 2990 1.1 mrg } 2991 1.1 mrg 2992 1.1 mrg rtx_insn *seq = get_insns (); 2993 1.1 mrg end_sequence (); 2994 1.1 mrg 2995 1.1 mrg emit_insn (seq); 2996 1.1 mrg } 2997 1.1 mrg else 2998 1.1 mrg { 2999 1.1 mrg rtx wave_offset = gen_rtx_REG (SImode, 3000 1.1 mrg cfun->machine->args. 3001 1.1 mrg reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]); 3002 1.1 mrg 3003 1.1 mrg if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG)) 3004 1.1 mrg { 3005 1.1 mrg rtx fs_init_lo = 3006 1.1 mrg gen_rtx_REG (SImode, 3007 1.1 mrg cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]); 3008 1.1 mrg rtx fs_init_hi = 3009 1.1 mrg gen_rtx_REG (SImode, 3010 1.1 mrg cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] + 1); 3011 1.1 mrg rtx fs_reg_lo = gen_rtx_REG (SImode, FLAT_SCRATCH_REG); 3012 1.1 mrg rtx fs_reg_hi = gen_rtx_REG (SImode, FLAT_SCRATCH_REG + 1); 3013 1.1 mrg 3014 1.1 mrg /*rtx queue = gen_rtx_REG(DImode, 3015 1.1 mrg cfun->machine->args.reg[QUEUE_PTR_ARG]); 3016 1.1 mrg rtx aperture = gen_rtx_MEM (SImode, 3017 1.1 mrg gen_rtx_PLUS (DImode, queue, 3018 1.1 mrg gen_int_mode (68, SImode))); 3019 1.1 mrg set_mem_addr_space (aperture, ADDR_SPACE_SCALAR_FLAT);*/ 3020 1.1 mrg 3021 1.1 mrg /* Set up flat_scratch. */ 3022 1.1 mrg emit_insn (gen_addsi3_scc (fs_reg_hi, fs_init_lo, wave_offset)); 3023 1.1 mrg emit_insn (gen_lshrsi3_scc (fs_reg_hi, fs_reg_hi, 3024 1.1 mrg gen_int_mode (8, SImode))); 3025 1.1 mrg emit_move_insn (fs_reg_lo, fs_init_hi); 3026 1.1 mrg } 3027 1.1 mrg 3028 1.1 mrg /* Set up frame pointer and stack pointer. */ 3029 1.1 mrg rtx sp = gen_rtx_REG (DImode, STACK_POINTER_REGNUM); 3030 1.1 mrg rtx sp_hi = simplify_gen_subreg (SImode, sp, DImode, 4); 3031 1.1 mrg rtx sp_lo = simplify_gen_subreg (SImode, sp, DImode, 0); 3032 1.1 mrg rtx fp = gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM); 3033 1.1 mrg rtx fp_hi = simplify_gen_subreg (SImode, fp, DImode, 4); 3034 1.1 mrg rtx fp_lo = simplify_gen_subreg (SImode, fp, DImode, 0); 3035 1.1 mrg 3036 1.1 mrg HOST_WIDE_INT sp_adjust = (offsets->local_vars 3037 1.1 mrg + offsets->outgoing_args_size); 3038 1.1 mrg 3039 1.1 mrg /* Initialise FP and SP from the buffer descriptor in s[0:3]. */ 3040 1.1 mrg emit_move_insn (fp_lo, gen_rtx_REG (SImode, 0)); 3041 1.1 mrg emit_insn (gen_andsi3_scc (fp_hi, gen_rtx_REG (SImode, 1), 3042 1.1 mrg gen_int_mode (0xffff, SImode))); 3043 1.1 mrg rtx scc = gen_rtx_REG (BImode, SCC_REG); 3044 1.1 mrg emit_insn (gen_addsi3_scalar_carry (fp_lo, fp_lo, wave_offset, scc)); 3045 1.1 mrg emit_insn (gen_addcsi3_scalar_zero (fp_hi, fp_hi, scc)); 3046 1.1 mrg 3047 1.1 mrg /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so we use 3048 1.1 mrg split add explictly, and specify the DImode add in the note. 3049 1.1 mrg The DWARF info expects that the callee-save data is in the frame, 3050 1.1 mrg even though it isn't (because this is the entry point), so we 3051 1.1 mrg make a notional adjustment to the DWARF frame offset here. */ 3052 1.1 mrg rtx dbg_adjustment = gen_int_mode (sp_adjust + offsets->callee_saves, 3053 1.1 mrg DImode); 3054 1.1 mrg rtx insn; 3055 1.1 mrg if (sp_adjust > 0) 3056 1.1 mrg { 3057 1.1 mrg rtx scc = gen_rtx_REG (BImode, SCC_REG); 3058 1.1 mrg rtx adjustment = gen_int_mode (sp_adjust, DImode); 3059 1.1 mrg insn = emit_insn (gen_addsi3_scalar_carry(sp_lo, fp_lo, adjustment, 3060 1.1 mrg scc)); 3061 1.1 mrg emit_insn (gen_addcsi3_scalar_zero (sp_hi, fp_hi, scc)); 3062 1.1 mrg } 3063 1.1 mrg else 3064 1.1 mrg insn = emit_move_insn (sp, fp); 3065 1.1 mrg RTX_FRAME_RELATED_P (insn) = 1; 3066 1.1 mrg add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3067 1.1 mrg gen_rtx_SET (sp, gen_rtx_PLUS (DImode, sp, 3068 1.1 mrg dbg_adjustment))); 3069 1.1 mrg 3070 1.1 mrg if (offsets->need_frame_pointer) 3071 1.1 mrg { 3072 1.1 mrg /* Set the CFA to the entry stack address, as an offset from the 3073 1.1 mrg frame pointer. This is necessary when alloca is used, and 3074 1.1 mrg harmless otherwise. */ 3075 1.1 mrg rtx neg_adjust = gen_int_mode (-offsets->callee_saves, DImode); 3076 1.1 mrg add_reg_note (insn, REG_CFA_DEF_CFA, 3077 1.1 mrg gen_rtx_PLUS (DImode, fp, neg_adjust)); 3078 1.1 mrg } 3079 1.1 mrg 3080 1.1 mrg /* Make sure the flat scratch reg doesn't get optimised away. */ 3081 1.1 mrg emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG))); 3082 1.1 mrg } 3083 1.1 mrg 3084 1.1 mrg /* Ensure that the scheduler doesn't do anything unexpected. */ 3085 1.1 mrg emit_insn (gen_blockage ()); 3086 1.1 mrg 3087 1.1 mrg /* m0 is initialized for the usual LDS DS and FLAT memory case. 3088 1.1 mrg The low-part is the address of the topmost addressable byte, which is 3089 1.1 mrg size-1. The high-part is an offset and should be zero. */ 3090 1.1 mrg emit_move_insn (gen_rtx_REG (SImode, M0_REG), 3091 1.1 mrg gen_int_mode (LDS_SIZE, SImode)); 3092 1.1 mrg 3093 1.1 mrg emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG))); 3094 1.1 mrg 3095 1.1 mrg if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp) 3096 1.1 mrg { 3097 1.1 mrg /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel. */ 3098 1.1 mrg rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG); 3099 1.1 mrg emit_move_insn (fn_reg, gen_rtx_SYMBOL_REF (Pmode, 3100 1.1 mrg "gomp_gcn_enter_kernel")); 3101 1.1 mrg emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx)); 3102 1.1 mrg } 3103 1.1 mrg } 3104 1.1 mrg 3105 1.1 mrg /* Generate epilogue. Called from gen_epilogue during pro_and_epilogue pass. 3106 1.1 mrg 3107 1.1 mrg See gcn_expand_prologue for stack details. */ 3108 1.1 mrg 3109 1.1 mrg void 3110 1.1 mrg gcn_expand_epilogue (void) 3111 1.1 mrg { 3112 1.1 mrg /* Ensure that the scheduler doesn't do anything unexpected. */ 3113 1.1 mrg emit_insn (gen_blockage ()); 3114 1.1 mrg 3115 1.1 mrg if (!cfun || !cfun->machine || cfun->machine->normal_function) 3116 1.1 mrg { 3117 1.1 mrg machine_function *offsets = gcn_compute_frame_offsets (); 3118 1.1 mrg rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 3119 1.1 mrg rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM); 3120 1.1 mrg 3121 1.1 mrg HOST_WIDE_INT sp_adjust = offsets->callee_saves + offsets->pretend_size; 3122 1.1 mrg 3123 1.1 mrg if (offsets->need_frame_pointer) 3124 1.1 mrg { 3125 1.1 mrg /* Restore old SP from the frame pointer. */ 3126 1.1 mrg if (sp_adjust > 0) 3127 1.1 mrg emit_insn (gen_subdi3 (sp, fp, gen_int_mode (sp_adjust, DImode))); 3128 1.1 mrg else 3129 1.1 mrg emit_move_insn (sp, fp); 3130 1.1 mrg } 3131 1.1 mrg else 3132 1.1 mrg { 3133 1.1 mrg /* Restore old SP from current SP. */ 3134 1.1 mrg sp_adjust += offsets->outgoing_args_size + offsets->local_vars; 3135 1.1 mrg 3136 1.1 mrg if (sp_adjust > 0) 3137 1.1 mrg emit_insn (gen_subdi3 (sp, sp, gen_int_mode (sp_adjust, DImode))); 3138 1.1 mrg } 3139 1.1 mrg 3140 1.1 mrg move_callee_saved_registers (sp, offsets, false); 3141 1.1 mrg 3142 1.1 mrg /* There's no explicit use of the link register on the return insn. Emit 3143 1.1 mrg one here instead. */ 3144 1.1 mrg if (offsets->lr_needs_saving) 3145 1.1 mrg emit_use (gen_rtx_REG (DImode, LINK_REGNUM)); 3146 1.1 mrg 3147 1.1 mrg /* Similar for frame pointer. */ 3148 1.1 mrg if (offsets->need_frame_pointer) 3149 1.1 mrg emit_use (gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM)); 3150 1.1 mrg } 3151 1.1 mrg else if (flag_openmp) 3152 1.1 mrg { 3153 1.1 mrg /* OpenMP kernels have an implicit call to gomp_gcn_exit_kernel. */ 3154 1.1 mrg rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG); 3155 1.1 mrg emit_move_insn (fn_reg, 3156 1.1 mrg gen_rtx_SYMBOL_REF (Pmode, "gomp_gcn_exit_kernel")); 3157 1.1 mrg emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx)); 3158 1.1 mrg } 3159 1.1 mrg else if (TREE_CODE (TREE_TYPE (DECL_RESULT (cfun->decl))) != VOID_TYPE) 3160 1.1 mrg { 3161 1.1 mrg /* Assume that an exit value compatible with gcn-run is expected. 3162 1.1 mrg That is, the third input parameter is an int*. 3163 1.1 mrg 3164 1.1 mrg We can't allocate any new registers, but the kernarg_reg is 3165 1.1 mrg dead after this, so we'll use that. */ 3166 1.1 mrg rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg 3167 1.1 mrg [KERNARG_SEGMENT_PTR_ARG]); 3168 1.1 mrg rtx retptr_mem = gen_rtx_MEM (DImode, 3169 1.1 mrg gen_rtx_PLUS (DImode, kernarg_reg, 3170 1.1 mrg GEN_INT (16))); 3171 1.1 mrg set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT); 3172 1.1 mrg emit_move_insn (kernarg_reg, retptr_mem); 3173 1.1 mrg 3174 1.1 mrg rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg); 3175 1.1 mrg set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT); 3176 1.1 mrg emit_move_insn (retval_mem, 3177 1.1 mrg gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG))); 3178 1.1 mrg } 3179 1.1 mrg 3180 1.1 mrg emit_jump_insn (gen_gcn_return ()); 3181 1.1 mrg } 3182 1.1 mrg 3183 1.1 mrg /* Implement TARGET_FRAME_POINTER_REQUIRED. 3184 1.1 mrg 3185 1.1 mrg Return true if the frame pointer should not be eliminated. */ 3186 1.1 mrg 3187 1.1 mrg bool 3188 1.1 mrg gcn_frame_pointer_rqd (void) 3189 1.1 mrg { 3190 1.1 mrg /* GDB needs the frame pointer in order to unwind properly, 3191 1.1 mrg but that's not important for the entry point, unless alloca is used. 3192 1.1 mrg It's not important for code execution, so we should repect the 3193 1.1 mrg -fomit-frame-pointer flag. */ 3194 1.1 mrg return (!flag_omit_frame_pointer 3195 1.1 mrg && cfun 3196 1.1 mrg && (cfun->calls_alloca 3197 1.1 mrg || (cfun->machine && cfun->machine->normal_function))); 3198 1.1 mrg } 3199 1.1 mrg 3200 1.1 mrg /* Implement TARGET_CAN_ELIMINATE. 3201 1.1 mrg 3202 1.1 mrg Return true if the compiler is allowed to try to replace register number 3203 1.1 mrg FROM_REG with register number TO_REG. 3204 1.1 mrg 3205 1.1 mrg FIXME: is the default "true" not enough? Should this be a negative set? */ 3206 1.1 mrg 3207 1.1 mrg bool 3208 1.1 mrg gcn_can_eliminate_p (int /*from_reg */ , int to_reg) 3209 1.1 mrg { 3210 1.1 mrg return (to_reg == HARD_FRAME_POINTER_REGNUM 3211 1.1 mrg || to_reg == STACK_POINTER_REGNUM); 3212 1.1 mrg } 3213 1.1 mrg 3214 1.1 mrg /* Implement INITIAL_ELIMINATION_OFFSET. 3215 1.1 mrg 3216 1.1 mrg Returns the initial difference between the specified pair of registers, in 3217 1.1 mrg terms of stack position. */ 3218 1.1 mrg 3219 1.1 mrg HOST_WIDE_INT 3220 1.1 mrg gcn_initial_elimination_offset (int from, int to) 3221 1.1 mrg { 3222 1.1 mrg machine_function *offsets = gcn_compute_frame_offsets (); 3223 1.1 mrg 3224 1.1 mrg switch (from) 3225 1.1 mrg { 3226 1.1 mrg case ARG_POINTER_REGNUM: 3227 1.1 mrg if (to == STACK_POINTER_REGNUM) 3228 1.1 mrg return -(offsets->callee_saves + offsets->local_vars 3229 1.1 mrg + offsets->outgoing_args_size); 3230 1.1 mrg else if (to == FRAME_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM) 3231 1.1 mrg return -offsets->callee_saves; 3232 1.1 mrg else 3233 1.1 mrg gcc_unreachable (); 3234 1.1 mrg break; 3235 1.1 mrg 3236 1.1 mrg case FRAME_POINTER_REGNUM: 3237 1.1 mrg if (to == STACK_POINTER_REGNUM) 3238 1.1 mrg return -(offsets->local_vars + offsets->outgoing_args_size); 3239 1.1 mrg else if (to == HARD_FRAME_POINTER_REGNUM) 3240 1.1 mrg return 0; 3241 1.1 mrg else 3242 1.1 mrg gcc_unreachable (); 3243 1.1 mrg break; 3244 1.1 mrg 3245 1.1 mrg default: 3246 1.1 mrg gcc_unreachable (); 3247 1.1 mrg } 3248 1.1 mrg } 3249 1.1 mrg 3250 1.1 mrg /* Implement HARD_REGNO_RENAME_OK. 3251 1.1 mrg 3252 1.1 mrg Return true if it is permissible to rename a hard register from 3253 1.1 mrg FROM_REG to TO_REG. */ 3254 1.1 mrg 3255 1.1 mrg bool 3256 1.1 mrg gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg) 3257 1.1 mrg { 3258 1.1 mrg if (from_reg == SCC_REG 3259 1.1 mrg || from_reg == VCC_LO_REG || from_reg == VCC_HI_REG 3260 1.1 mrg || from_reg == EXEC_LO_REG || from_reg == EXEC_HI_REG 3261 1.1 mrg || to_reg == SCC_REG 3262 1.1 mrg || to_reg == VCC_LO_REG || to_reg == VCC_HI_REG 3263 1.1 mrg || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG) 3264 1.1 mrg return false; 3265 1.1 mrg 3266 1.1 mrg /* Allow the link register to be used if it was saved. */ 3267 1.1 mrg if ((to_reg & ~1) == LINK_REGNUM) 3268 1.1 mrg return !cfun || cfun->machine->lr_needs_saving; 3269 1.1 mrg 3270 1.1 mrg /* Allow the registers used for the static chain to be used if the chain is 3271 1.1 mrg not in active use. */ 3272 1.1 mrg if ((to_reg & ~1) == STATIC_CHAIN_REGNUM) 3273 1.1 mrg return !cfun 3274 1.1 mrg || !(cfun->static_chain_decl 3275 1.1 mrg && df_regs_ever_live_p (STATIC_CHAIN_REGNUM) 3276 1.1 mrg && df_regs_ever_live_p (STATIC_CHAIN_REGNUM + 1)); 3277 1.1 mrg 3278 1.1 mrg return true; 3279 1.1 mrg } 3280 1.1 mrg 3281 1.1 mrg /* Implement HARD_REGNO_CALLER_SAVE_MODE. 3282 1.1 mrg 3283 1.1 mrg Which mode is required for saving NREGS of a pseudo-register in 3284 1.1 mrg call-clobbered hard register REGNO. */ 3285 1.1 mrg 3286 1.1 mrg machine_mode 3287 1.1 mrg gcn_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs, 3288 1.1 mrg machine_mode regmode) 3289 1.1 mrg { 3290 1.1 mrg machine_mode result = choose_hard_reg_mode (regno, nregs, NULL); 3291 1.1 mrg 3292 1.1 mrg if (VECTOR_MODE_P (result) && !VECTOR_MODE_P (regmode)) 3293 1.1 mrg result = (nregs == 1 ? SImode : DImode); 3294 1.1 mrg 3295 1.1 mrg return result; 3296 1.1 mrg } 3297 1.1 mrg 3298 1.1 mrg /* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE. 3299 1.1 mrg 3300 1.1 mrg Output assembler code for a block containing the constant parts 3301 1.1 mrg of a trampoline, leaving space for the variable parts. */ 3302 1.1 mrg 3303 1.1 mrg static void 3304 1.1 mrg gcn_asm_trampoline_template (FILE *f) 3305 1.1 mrg { 3306 1.1 mrg /* The source operand of the move instructions must be a 32-bit 3307 1.1 mrg constant following the opcode. */ 3308 1.1 mrg asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM); 3309 1.1 mrg asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM + 1); 3310 1.1 mrg asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG); 3311 1.1 mrg asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG + 1); 3312 1.1 mrg asm_fprintf (f, "\ts_setpc_b64\ts[%i:%i]\n", CC_SAVE_REG, CC_SAVE_REG + 1); 3313 1.1 mrg } 3314 1.1 mrg 3315 1.1 mrg /* Implement TARGET_TRAMPOLINE_INIT. 3316 1.1 mrg 3317 1.1 mrg Emit RTL insns to initialize the variable parts of a trampoline. 3318 1.1 mrg FNDECL is the decl of the target address, M_TRAMP is a MEM for 3319 1.1 mrg the trampoline, and CHAIN_VALUE is an RTX for the static chain 3320 1.1 mrg to be passed to the target function. */ 3321 1.1 mrg 3322 1.1 mrg static void 3323 1.1 mrg gcn_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 3324 1.1 mrg { 3325 1.1 mrg if (TARGET_GCN5_PLUS) 3326 1.1 mrg sorry ("nested function trampolines not supported on GCN5 due to" 3327 1.1 mrg " non-executable stacks"); 3328 1.1 mrg 3329 1.1 mrg emit_block_move (m_tramp, assemble_trampoline_template (), 3330 1.1 mrg GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); 3331 1.1 mrg 3332 1.1 mrg rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 3333 1.1 mrg rtx chain_value_reg = copy_to_reg (chain_value); 3334 1.1 mrg rtx fnaddr_reg = copy_to_reg (fnaddr); 3335 1.1 mrg 3336 1.1 mrg for (int i = 0; i < 4; i++) 3337 1.1 mrg { 3338 1.1 mrg rtx mem = adjust_address (m_tramp, SImode, i * 8 + 4); 3339 1.1 mrg rtx reg = i < 2 ? chain_value_reg : fnaddr_reg; 3340 1.1 mrg emit_move_insn (mem, gen_rtx_SUBREG (SImode, reg, (i % 2) * 4)); 3341 1.1 mrg } 3342 1.1 mrg 3343 1.1 mrg rtx tramp_addr = XEXP (m_tramp, 0); 3344 1.1 mrg emit_insn (gen_clear_icache (tramp_addr, 3345 1.1 mrg plus_constant (ptr_mode, tramp_addr, 3346 1.1 mrg TRAMPOLINE_SIZE))); 3347 1.1 mrg } 3348 1.1 mrg 3349 1.1 mrg /* }}} */ 3350 1.1 mrg /* {{{ Miscellaneous. */ 3351 1.1 mrg 3352 1.1 mrg /* Implement TARGET_CANNOT_COPY_INSN_P. 3353 1.1 mrg 3354 1.1 mrg Return true if INSN must not be duplicated. */ 3355 1.1 mrg 3356 1.1 mrg static bool 3357 1.1 mrg gcn_cannot_copy_insn_p (rtx_insn *insn) 3358 1.1 mrg { 3359 1.1 mrg if (recog_memoized (insn) == CODE_FOR_gcn_wavefront_barrier) 3360 1.1 mrg return true; 3361 1.1 mrg 3362 1.1 mrg return false; 3363 1.1 mrg } 3364 1.1 mrg 3365 1.1 mrg /* Implement TARGET_DEBUG_UNWIND_INFO. 3366 1.1 mrg 3367 1.1 mrg Defines the mechanism that will be used for describing frame unwind 3368 1.1 mrg information to the debugger. */ 3369 1.1 mrg 3370 1.1 mrg static enum unwind_info_type 3371 1.1 mrg gcn_debug_unwind_info () 3372 1.1 mrg { 3373 1.1 mrg return UI_DWARF2; 3374 1.1 mrg } 3375 1.1 mrg 3376 1.1 mrg /* Determine if there is a suitable hardware conversion instruction. 3377 1.1 mrg Used primarily by the machine description. */ 3378 1.1 mrg 3379 1.1 mrg bool 3380 1.1 mrg gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op) 3381 1.1 mrg { 3382 1.1 mrg if (VECTOR_MODE_P (from) != VECTOR_MODE_P (to)) 3383 1.1 mrg return false; 3384 1.1 mrg 3385 1.1 mrg if (VECTOR_MODE_P (from)) 3386 1.1 mrg { 3387 1.1 mrg from = GET_MODE_INNER (from); 3388 1.1 mrg to = GET_MODE_INNER (to); 3389 1.1 mrg } 3390 1.1 mrg 3391 1.1 mrg switch (op) 3392 1.1 mrg { 3393 1.1 mrg case fix_trunc_cvt: 3394 1.1 mrg case fixuns_trunc_cvt: 3395 1.1 mrg if (GET_MODE_CLASS (from) != MODE_FLOAT 3396 1.1 mrg || GET_MODE_CLASS (to) != MODE_INT) 3397 1.1 mrg return false; 3398 1.1 mrg break; 3399 1.1 mrg case float_cvt: 3400 1.1 mrg case floatuns_cvt: 3401 1.1 mrg if (GET_MODE_CLASS (from) != MODE_INT 3402 1.1 mrg || GET_MODE_CLASS (to) != MODE_FLOAT) 3403 1.1 mrg return false; 3404 1.1 mrg break; 3405 1.1 mrg case extend_cvt: 3406 1.1 mrg if (GET_MODE_CLASS (from) != MODE_FLOAT 3407 1.1 mrg || GET_MODE_CLASS (to) != MODE_FLOAT 3408 1.1 mrg || GET_MODE_SIZE (from) >= GET_MODE_SIZE (to)) 3409 1.1 mrg return false; 3410 1.1 mrg break; 3411 1.1 mrg case trunc_cvt: 3412 1.1 mrg if (GET_MODE_CLASS (from) != MODE_FLOAT 3413 1.1 mrg || GET_MODE_CLASS (to) != MODE_FLOAT 3414 1.1 mrg || GET_MODE_SIZE (from) <= GET_MODE_SIZE (to)) 3415 1.1 mrg return false; 3416 1.1 mrg break; 3417 1.1 mrg } 3418 1.1 mrg 3419 1.1 mrg return ((to == HImode && from == HFmode) 3420 1.1 mrg || (to == SImode && (from == SFmode || from == DFmode)) 3421 1.1 mrg || (to == HFmode && (from == HImode || from == SFmode)) 3422 1.1 mrg || (to == SFmode && (from == SImode || from == HFmode 3423 1.1 mrg || from == DFmode)) 3424 1.1 mrg || (to == DFmode && (from == SImode || from == SFmode))); 3425 1.1 mrg } 3426 1.1 mrg 3427 1.1 mrg /* Implement TARGET_EMUTLS_VAR_INIT. 3428 1.1 mrg 3429 1.1 mrg Disable emutls (gthr-gcn.h does not support it, yet). */ 3430 1.1 mrg 3431 1.1 mrg tree 3432 1.1 mrg gcn_emutls_var_init (tree, tree decl, tree) 3433 1.1 mrg { 3434 1.1 mrg sorry_at (DECL_SOURCE_LOCATION (decl), "TLS is not implemented for GCN."); 3435 1.1 mrg return NULL_TREE; 3436 1.1 mrg } 3437 1.1 mrg 3438 1.1 mrg /* }}} */ 3439 1.1 mrg /* {{{ Costs. */ 3440 1.1 mrg 3441 1.1 mrg /* Implement TARGET_RTX_COSTS. 3442 1.1 mrg 3443 1.1 mrg Compute a (partial) cost for rtx X. Return true if the complete 3444 1.1 mrg cost has been computed, and false if subexpressions should be 3445 1.1 mrg scanned. In either case, *TOTAL contains the cost result. */ 3446 1.1 mrg 3447 1.1 mrg static bool 3448 1.1 mrg gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool) 3449 1.1 mrg { 3450 1.1 mrg enum rtx_code code = GET_CODE (x); 3451 1.1 mrg switch (code) 3452 1.1 mrg { 3453 1.1 mrg case CONST: 3454 1.1 mrg case CONST_DOUBLE: 3455 1.1 mrg case CONST_VECTOR: 3456 1.1 mrg case CONST_INT: 3457 1.1 mrg if (gcn_inline_constant_p (x)) 3458 1.1 mrg *total = 0; 3459 1.1 mrg else if (code == CONST_INT 3460 1.1 mrg && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000) 3461 1.1 mrg *total = 1; 3462 1.1 mrg else if (gcn_constant_p (x)) 3463 1.1 mrg *total = 2; 3464 1.1 mrg else 3465 1.1 mrg *total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4; 3466 1.1 mrg return true; 3467 1.1 mrg 3468 1.1 mrg case DIV: 3469 1.1 mrg *total = 100; 3470 1.1 mrg return false; 3471 1.1 mrg 3472 1.1 mrg default: 3473 1.1 mrg *total = 3; 3474 1.1 mrg return false; 3475 1.1 mrg } 3476 1.1 mrg } 3477 1.1 mrg 3478 1.1 mrg /* Implement TARGET_MEMORY_MOVE_COST. 3479 1.1 mrg 3480 1.1 mrg Return the cost of moving data of mode M between a 3481 1.1 mrg register and memory. A value of 2 is the default; this cost is 3482 1.1 mrg relative to those in `REGISTER_MOVE_COST'. 3483 1.1 mrg 3484 1.1 mrg This function is used extensively by register_move_cost that is used to 3485 1.1 mrg build tables at startup. Make it inline in this case. 3486 1.1 mrg When IN is 2, return maximum of in and out move cost. 3487 1.1 mrg 3488 1.1 mrg If moving between registers and memory is more expensive than 3489 1.1 mrg between two registers, you should define this macro to express the 3490 1.1 mrg relative cost. 3491 1.1 mrg 3492 1.1 mrg Model also increased moving costs of QImode registers in non 3493 1.1 mrg Q_REGS classes. */ 3494 1.1 mrg 3495 1.1 mrg #define LOAD_COST 32 3496 1.1 mrg #define STORE_COST 32 3497 1.1 mrg static int 3498 1.1 mrg gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) 3499 1.1 mrg { 3500 1.1 mrg int nregs = CEIL (GET_MODE_SIZE (mode), 4); 3501 1.1 mrg switch (regclass) 3502 1.1 mrg { 3503 1.1 mrg case SCC_CONDITIONAL_REG: 3504 1.1 mrg case VCCZ_CONDITIONAL_REG: 3505 1.1 mrg case VCC_CONDITIONAL_REG: 3506 1.1 mrg case EXECZ_CONDITIONAL_REG: 3507 1.1 mrg case ALL_CONDITIONAL_REGS: 3508 1.1 mrg case SGPR_REGS: 3509 1.1 mrg case SGPR_EXEC_REGS: 3510 1.1 mrg case EXEC_MASK_REG: 3511 1.1 mrg case SGPR_VOP_SRC_REGS: 3512 1.1 mrg case SGPR_MEM_SRC_REGS: 3513 1.1 mrg case SGPR_SRC_REGS: 3514 1.1 mrg case SGPR_DST_REGS: 3515 1.1 mrg case GENERAL_REGS: 3516 1.1 mrg case AFP_REGS: 3517 1.1 mrg if (!in) 3518 1.1 mrg return (STORE_COST + 2) * nregs; 3519 1.1 mrg return LOAD_COST * nregs; 3520 1.1 mrg case VGPR_REGS: 3521 1.1 mrg if (in) 3522 1.1 mrg return (LOAD_COST + 2) * nregs; 3523 1.1 mrg return STORE_COST * nregs; 3524 1.1 mrg case ALL_REGS: 3525 1.1 mrg case ALL_GPR_REGS: 3526 1.1 mrg case SRCDST_REGS: 3527 1.1 mrg if (in) 3528 1.1 mrg return (LOAD_COST + 2) * nregs; 3529 1.1 mrg return (STORE_COST + 2) * nregs; 3530 1.1 mrg default: 3531 1.1 mrg gcc_unreachable (); 3532 1.1 mrg } 3533 1.1 mrg } 3534 1.1 mrg 3535 1.1 mrg /* Implement TARGET_REGISTER_MOVE_COST. 3536 1.1 mrg 3537 1.1 mrg Return the cost of moving data from a register in class CLASS1 to 3538 1.1 mrg one in class CLASS2. Base value is 2. */ 3539 1.1 mrg 3540 1.1 mrg static int 3541 1.1 mrg gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src) 3542 1.1 mrg { 3543 1.1 mrg /* Increase cost of moving from and to vector registers. While this is 3544 1.1 mrg fast in hardware (I think), it has hidden cost of setting up the exec 3545 1.1 mrg flags. */ 3546 1.1 mrg if ((src < VGPR_REGS) != (dst < VGPR_REGS)) 3547 1.1 mrg return 4; 3548 1.1 mrg return 2; 3549 1.1 mrg } 3550 1.1 mrg 3551 1.1 mrg /* }}} */ 3552 1.1 mrg /* {{{ Builtins. */ 3553 1.1 mrg 3554 1.1 mrg /* Type codes used by GCN built-in definitions. */ 3555 1.1 mrg 3556 1.1 mrg enum gcn_builtin_type_index 3557 1.1 mrg { 3558 1.1 mrg GCN_BTI_END_OF_PARAMS, 3559 1.1 mrg 3560 1.1 mrg GCN_BTI_VOID, 3561 1.1 mrg GCN_BTI_BOOL, 3562 1.1 mrg GCN_BTI_INT, 3563 1.1 mrg GCN_BTI_UINT, 3564 1.1 mrg GCN_BTI_SIZE_T, 3565 1.1 mrg GCN_BTI_LLINT, 3566 1.1 mrg GCN_BTI_LLUINT, 3567 1.1 mrg GCN_BTI_EXEC, 3568 1.1 mrg 3569 1.1 mrg GCN_BTI_SF, 3570 1.1 mrg GCN_BTI_V64SI, 3571 1.1 mrg GCN_BTI_V64SF, 3572 1.1 mrg GCN_BTI_V64PTR, 3573 1.1 mrg GCN_BTI_SIPTR, 3574 1.1 mrg GCN_BTI_SFPTR, 3575 1.1 mrg GCN_BTI_VOIDPTR, 3576 1.1 mrg 3577 1.1 mrg GCN_BTI_LDS_VOIDPTR, 3578 1.1 mrg 3579 1.1 mrg GCN_BTI_MAX 3580 1.1 mrg }; 3581 1.1 mrg 3582 1.1 mrg static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX]; 3583 1.1 mrg 3584 1.1 mrg #define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC]) 3585 1.1 mrg #define sf_type_node (gcn_builtin_types[GCN_BTI_SF]) 3586 1.1 mrg #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI]) 3587 1.1 mrg #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF]) 3588 1.1 mrg #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR]) 3589 1.1 mrg #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR]) 3590 1.1 mrg #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR]) 3591 1.1 mrg #define voidptr_type_node (gcn_builtin_types[GCN_BTI_VOIDPTR]) 3592 1.1 mrg #define size_t_type_node (gcn_builtin_types[GCN_BTI_SIZE_T]) 3593 1.1 mrg 3594 1.1 mrg static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int, 3595 1.1 mrg struct gcn_builtin_description *); 3596 1.1 mrg static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int, 3597 1.1 mrg struct gcn_builtin_description *); 3598 1.1 mrg 3599 1.1 mrg struct gcn_builtin_description; 3600 1.1 mrg typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int, 3601 1.1 mrg struct gcn_builtin_description *); 3602 1.1 mrg 3603 1.1 mrg enum gcn_builtin_type 3604 1.1 mrg { 3605 1.1 mrg B_UNIMPLEMENTED, /* Sorry out */ 3606 1.1 mrg B_INSN, /* Emit a pattern */ 3607 1.1 mrg B_OVERLOAD /* Placeholder for an overloaded function */ 3608 1.1 mrg }; 3609 1.1 mrg 3610 1.1 mrg struct gcn_builtin_description 3611 1.1 mrg { 3612 1.1 mrg int fcode; 3613 1.1 mrg int icode; 3614 1.1 mrg const char *name; 3615 1.1 mrg enum gcn_builtin_type type; 3616 1.1 mrg /* The first element of parm is always the return type. The rest 3617 1.1 mrg are a zero terminated list of parameters. */ 3618 1.1 mrg int parm[6]; 3619 1.1 mrg gcn_builtin_expander expander; 3620 1.1 mrg }; 3621 1.1 mrg 3622 1.1 mrg /* Read in the GCN builtins from gcn-builtins.def. */ 3623 1.1 mrg 3624 1.1 mrg extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX]; 3625 1.1 mrg 3626 1.1 mrg struct gcn_builtin_description gcn_builtins[] = { 3627 1.1 mrg #define DEF_BUILTIN(fcode, icode, name, type, params, expander) \ 3628 1.1 mrg {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander}, 3629 1.1 mrg 3630 1.1 mrg #define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \ 3631 1.1 mrg {GCN_BUILTIN_ ## fcode ## _V64SI, \ 3632 1.1 mrg CODE_FOR_ ## ic ##v64si3_exec, name "_v64int", B_INSN, \ 3633 1.1 mrg {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \ 3634 1.1 mrg GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, \ 3635 1.1 mrg {GCN_BUILTIN_ ## fcode ## _V64SI_unspec, \ 3636 1.1 mrg CODE_FOR_ ## ic ##v64si3_exec, name "_v64int_unspec", B_INSN, \ 3637 1.1 mrg {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \ 3638 1.1 mrg GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, 3639 1.1 mrg 3640 1.1 mrg #include "gcn-builtins.def" 3641 1.1 mrg #undef DEF_BUILTIN_BINOP_INT_FP 3642 1.1 mrg #undef DEF_BUILTIN 3643 1.1 mrg }; 3644 1.1 mrg 3645 1.1 mrg static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX]; 3646 1.1 mrg 3647 1.1 mrg /* Implement TARGET_BUILTIN_DECL. 3648 1.1 mrg 3649 1.1 mrg Return the GCN builtin for CODE. */ 3650 1.1 mrg 3651 1.1 mrg tree 3652 1.1 mrg gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p)) 3653 1.1 mrg { 3654 1.1 mrg if (code >= GCN_BUILTIN_MAX) 3655 1.1 mrg return error_mark_node; 3656 1.1 mrg 3657 1.1 mrg return gcn_builtin_decls[code]; 3658 1.1 mrg } 3659 1.1 mrg 3660 1.1 mrg /* Helper function for gcn_init_builtins. */ 3661 1.1 mrg 3662 1.1 mrg static void 3663 1.1 mrg gcn_init_builtin_types (void) 3664 1.1 mrg { 3665 1.1 mrg gcn_builtin_types[GCN_BTI_VOID] = void_type_node; 3666 1.1 mrg gcn_builtin_types[GCN_BTI_BOOL] = boolean_type_node; 3667 1.1 mrg gcn_builtin_types[GCN_BTI_INT] = intSI_type_node; 3668 1.1 mrg gcn_builtin_types[GCN_BTI_UINT] = unsigned_type_for (intSI_type_node); 3669 1.1 mrg gcn_builtin_types[GCN_BTI_SIZE_T] = size_type_node; 3670 1.1 mrg gcn_builtin_types[GCN_BTI_LLINT] = intDI_type_node; 3671 1.1 mrg gcn_builtin_types[GCN_BTI_LLUINT] = unsigned_type_for (intDI_type_node); 3672 1.1 mrg 3673 1.1 mrg exec_type_node = unsigned_intDI_type_node; 3674 1.1 mrg sf_type_node = float32_type_node; 3675 1.1 mrg v64si_type_node = build_vector_type (intSI_type_node, 64); 3676 1.1 mrg v64sf_type_node = build_vector_type (float_type_node, 64); 3677 1.1 mrg v64ptr_type_node = build_vector_type (unsigned_intDI_type_node 3678 1.1 mrg /*build_pointer_type 3679 1.1 mrg (integer_type_node) */ 3680 1.1 mrg , 64); 3681 1.1 mrg tree tmp = build_distinct_type_copy (intSI_type_node); 3682 1.1 mrg TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT; 3683 1.1 mrg siptr_type_node = build_pointer_type (tmp); 3684 1.1 mrg 3685 1.1 mrg tmp = build_distinct_type_copy (float_type_node); 3686 1.1 mrg TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT; 3687 1.1 mrg sfptr_type_node = build_pointer_type (tmp); 3688 1.1 mrg 3689 1.1 mrg tmp = build_distinct_type_copy (void_type_node); 3690 1.1 mrg TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT; 3691 1.1 mrg voidptr_type_node = build_pointer_type (tmp); 3692 1.1 mrg 3693 1.1 mrg tmp = build_distinct_type_copy (void_type_node); 3694 1.1 mrg TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_LDS; 3695 1.1 mrg gcn_builtin_types[GCN_BTI_LDS_VOIDPTR] = build_pointer_type (tmp); 3696 1.1 mrg } 3697 1.1 mrg 3698 1.1 mrg /* Implement TARGET_INIT_BUILTINS. 3699 1.1 mrg 3700 1.1 mrg Set up all builtin functions for this target. */ 3701 1.1 mrg 3702 1.1 mrg static void 3703 1.1 mrg gcn_init_builtins (void) 3704 1.1 mrg { 3705 1.1 mrg gcn_init_builtin_types (); 3706 1.1 mrg 3707 1.1 mrg struct gcn_builtin_description *d; 3708 1.1 mrg unsigned int i; 3709 1.1 mrg for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++) 3710 1.1 mrg { 3711 1.1 mrg tree p; 3712 1.1 mrg char name[64]; /* build_function will make a copy. */ 3713 1.1 mrg int parm; 3714 1.1 mrg 3715 1.1 mrg /* FIXME: Is this necessary/useful? */ 3716 1.1 mrg if (d->name == 0) 3717 1.1 mrg continue; 3718 1.1 mrg 3719 1.1 mrg /* Find last parm. */ 3720 1.1 mrg for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++) 3721 1.1 mrg ; 3722 1.1 mrg 3723 1.1 mrg p = void_list_node; 3724 1.1 mrg while (parm > 1) 3725 1.1 mrg p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p); 3726 1.1 mrg 3727 1.1 mrg p = build_function_type (gcn_builtin_types[d->parm[0]], p); 3728 1.1 mrg 3729 1.1 mrg sprintf (name, "__builtin_gcn_%s", d->name); 3730 1.1 mrg gcn_builtin_decls[i] 3731 1.1 mrg = add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE); 3732 1.1 mrg 3733 1.1 mrg /* These builtins don't throw. */ 3734 1.1 mrg TREE_NOTHROW (gcn_builtin_decls[i]) = 1; 3735 1.1 mrg } 3736 1.1 mrg 3737 1.1 mrg /* These builtins need to take/return an LDS pointer: override the generic 3738 1.1 mrg versions here. */ 3739 1.1 mrg 3740 1.1 mrg set_builtin_decl (BUILT_IN_GOACC_SINGLE_START, 3741 1.1 mrg gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_START], false); 3742 1.1 mrg 3743 1.1 mrg set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_START, 3744 1.1 mrg gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_START], 3745 1.1 mrg false); 3746 1.1 mrg 3747 1.1 mrg set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_END, 3748 1.1 mrg gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_END], 3749 1.1 mrg false); 3750 1.1 mrg 3751 1.1 mrg set_builtin_decl (BUILT_IN_GOACC_BARRIER, 3752 1.1 mrg gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false); 3753 1.1 mrg } 3754 1.1 mrg 3755 1.1 mrg /* Implement TARGET_INIT_LIBFUNCS. */ 3756 1.1 mrg 3757 1.1 mrg static void 3758 1.1 mrg gcn_init_libfuncs (void) 3759 1.1 mrg { 3760 1.1 mrg /* BITS_PER_UNIT * 2 is 64 bits, which causes 3761 1.1 mrg optabs-libfuncs.cc:gen_int_libfunc to omit TImode (i.e 128 bits) 3762 1.1 mrg libcalls that we need to support operations for that type. Initialise 3763 1.1 mrg them here instead. */ 3764 1.1 mrg set_optab_libfunc (udiv_optab, TImode, "__udivti3"); 3765 1.1 mrg set_optab_libfunc (umod_optab, TImode, "__umodti3"); 3766 1.1 mrg set_optab_libfunc (sdiv_optab, TImode, "__divti3"); 3767 1.1 mrg set_optab_libfunc (smod_optab, TImode, "__modti3"); 3768 1.1 mrg set_optab_libfunc (smul_optab, TImode, "__multi3"); 3769 1.1 mrg set_optab_libfunc (addv_optab, TImode, "__addvti3"); 3770 1.1 mrg set_optab_libfunc (subv_optab, TImode, "__subvti3"); 3771 1.1 mrg set_optab_libfunc (negv_optab, TImode, "__negvti2"); 3772 1.1 mrg set_optab_libfunc (absv_optab, TImode, "__absvti2"); 3773 1.1 mrg set_optab_libfunc (smulv_optab, TImode, "__mulvti3"); 3774 1.1 mrg set_optab_libfunc (ffs_optab, TImode, "__ffsti2"); 3775 1.1 mrg set_optab_libfunc (clz_optab, TImode, "__clzti2"); 3776 1.1 mrg set_optab_libfunc (ctz_optab, TImode, "__ctzti2"); 3777 1.1 mrg set_optab_libfunc (clrsb_optab, TImode, "__clrsbti2"); 3778 1.1 mrg set_optab_libfunc (popcount_optab, TImode, "__popcountti2"); 3779 1.1 mrg set_optab_libfunc (parity_optab, TImode, "__parityti2"); 3780 1.1 mrg set_optab_libfunc (bswap_optab, TImode, "__bswapti2"); 3781 1.1 mrg } 3782 1.1 mrg 3783 1.1 mrg /* Expand the CMP_SWAP GCN builtins. We have our own versions that do 3784 1.1 mrg not require taking the address of any object, other than the memory 3785 1.1 mrg cell being operated on. 3786 1.1 mrg 3787 1.1 mrg Helper function for gcn_expand_builtin_1. */ 3788 1.1 mrg 3789 1.1 mrg static rtx 3790 1.1 mrg gcn_expand_cmp_swap (tree exp, rtx target) 3791 1.1 mrg { 3792 1.1 mrg machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); 3793 1.1 mrg addr_space_t as 3794 1.1 mrg = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (CALL_EXPR_ARG (exp, 0)))); 3795 1.1 mrg machine_mode as_mode = gcn_addr_space_address_mode (as); 3796 1.1 mrg 3797 1.1 mrg if (!target) 3798 1.1 mrg target = gen_reg_rtx (mode); 3799 1.1 mrg 3800 1.1 mrg rtx addr = expand_expr (CALL_EXPR_ARG (exp, 0), 3801 1.1 mrg NULL_RTX, as_mode, EXPAND_NORMAL); 3802 1.1 mrg rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1), 3803 1.1 mrg NULL_RTX, mode, EXPAND_NORMAL); 3804 1.1 mrg rtx src = expand_expr (CALL_EXPR_ARG (exp, 2), 3805 1.1 mrg NULL_RTX, mode, EXPAND_NORMAL); 3806 1.1 mrg rtx pat; 3807 1.1 mrg 3808 1.1 mrg rtx mem = gen_rtx_MEM (mode, force_reg (as_mode, addr)); 3809 1.1 mrg set_mem_addr_space (mem, as); 3810 1.1 mrg 3811 1.1 mrg if (!REG_P (cmp)) 3812 1.1 mrg cmp = copy_to_mode_reg (mode, cmp); 3813 1.1 mrg if (!REG_P (src)) 3814 1.1 mrg src = copy_to_mode_reg (mode, src); 3815 1.1 mrg 3816 1.1 mrg if (mode == SImode) 3817 1.1 mrg pat = gen_sync_compare_and_swapsi (target, mem, cmp, src); 3818 1.1 mrg else 3819 1.1 mrg pat = gen_sync_compare_and_swapdi (target, mem, cmp, src); 3820 1.1 mrg 3821 1.1 mrg emit_insn (pat); 3822 1.1 mrg 3823 1.1 mrg return target; 3824 1.1 mrg } 3825 1.1 mrg 3826 1.1 mrg /* Expand many different builtins. 3827 1.1 mrg 3828 1.1 mrg Intended for use in gcn-builtins.def. */ 3829 1.1 mrg 3830 1.1 mrg static rtx 3831 1.1 mrg gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , 3832 1.1 mrg machine_mode /*mode */ , int ignore, 3833 1.1 mrg struct gcn_builtin_description *) 3834 1.1 mrg { 3835 1.1 mrg tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 3836 1.1 mrg switch (DECL_MD_FUNCTION_CODE (fndecl)) 3837 1.1 mrg { 3838 1.1 mrg case GCN_BUILTIN_FLAT_LOAD_INT32: 3839 1.1 mrg { 3840 1.1 mrg if (ignore) 3841 1.1 mrg return target; 3842 1.1 mrg /*rtx exec = */ 3843 1.1 mrg force_reg (DImode, 3844 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode, 3845 1.1 mrg EXPAND_NORMAL)); 3846 1.1 mrg /*rtx ptr = */ 3847 1.1 mrg force_reg (V64DImode, 3848 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode, 3849 1.1 mrg EXPAND_NORMAL)); 3850 1.1 mrg /*emit_insn (gen_vector_flat_loadv64si 3851 1.1 mrg (target, gcn_gen_undef (V64SImode), ptr, exec)); */ 3852 1.1 mrg return target; 3853 1.1 mrg } 3854 1.1 mrg case GCN_BUILTIN_FLAT_LOAD_PTR_INT32: 3855 1.1 mrg case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT: 3856 1.1 mrg { 3857 1.1 mrg if (ignore) 3858 1.1 mrg return target; 3859 1.1 mrg rtx exec = force_reg (DImode, 3860 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, 3861 1.1 mrg DImode, 3862 1.1 mrg EXPAND_NORMAL)); 3863 1.1 mrg rtx ptr = force_reg (DImode, 3864 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, 3865 1.1 mrg V64DImode, 3866 1.1 mrg EXPAND_NORMAL)); 3867 1.1 mrg rtx offsets = force_reg (V64SImode, 3868 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 2), 3869 1.1 mrg NULL_RTX, V64DImode, 3870 1.1 mrg EXPAND_NORMAL)); 3871 1.1 mrg rtx addrs = gen_reg_rtx (V64DImode); 3872 1.1 mrg rtx tmp = gen_reg_rtx (V64SImode); 3873 1.1 mrg emit_insn (gen_ashlv64si3_exec (tmp, offsets, 3874 1.1 mrg GEN_INT (2), 3875 1.1 mrg gcn_gen_undef (V64SImode), exec)); 3876 1.1 mrg emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr, 3877 1.1 mrg gcn_gen_undef (V64DImode), 3878 1.1 mrg exec)); 3879 1.1 mrg rtx mem = gen_rtx_MEM (GET_MODE (target), addrs); 3880 1.1 mrg /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */ 3881 1.1 mrg /* FIXME: set attributes. */ 3882 1.1 mrg emit_insn (gen_mov_with_exec (target, mem, exec)); 3883 1.1 mrg return target; 3884 1.1 mrg } 3885 1.1 mrg case GCN_BUILTIN_FLAT_STORE_PTR_INT32: 3886 1.1 mrg case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT: 3887 1.1 mrg { 3888 1.1 mrg rtx exec = force_reg (DImode, 3889 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, 3890 1.1 mrg DImode, 3891 1.1 mrg EXPAND_NORMAL)); 3892 1.1 mrg rtx ptr = force_reg (DImode, 3893 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, 3894 1.1 mrg V64DImode, 3895 1.1 mrg EXPAND_NORMAL)); 3896 1.1 mrg rtx offsets = force_reg (V64SImode, 3897 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 2), 3898 1.1 mrg NULL_RTX, V64DImode, 3899 1.1 mrg EXPAND_NORMAL)); 3900 1.1 mrg machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 3901 1.1 mrg 3))); 3902 1.1 mrg rtx val = force_reg (vmode, 3903 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX, 3904 1.1 mrg vmode, 3905 1.1 mrg EXPAND_NORMAL)); 3906 1.1 mrg rtx addrs = gen_reg_rtx (V64DImode); 3907 1.1 mrg rtx tmp = gen_reg_rtx (V64SImode); 3908 1.1 mrg emit_insn (gen_ashlv64si3_exec (tmp, offsets, 3909 1.1 mrg GEN_INT (2), 3910 1.1 mrg gcn_gen_undef (V64SImode), exec)); 3911 1.1 mrg emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr, 3912 1.1 mrg gcn_gen_undef (V64DImode), 3913 1.1 mrg exec)); 3914 1.1 mrg rtx mem = gen_rtx_MEM (vmode, addrs); 3915 1.1 mrg /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */ 3916 1.1 mrg /* FIXME: set attributes. */ 3917 1.1 mrg emit_insn (gen_mov_with_exec (mem, val, exec)); 3918 1.1 mrg return target; 3919 1.1 mrg } 3920 1.1 mrg case GCN_BUILTIN_SQRTVF: 3921 1.1 mrg { 3922 1.1 mrg if (ignore) 3923 1.1 mrg return target; 3924 1.1 mrg rtx exec = gcn_full_exec_reg (); 3925 1.1 mrg rtx arg = force_reg (V64SFmode, 3926 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, 3927 1.1 mrg V64SFmode, 3928 1.1 mrg EXPAND_NORMAL)); 3929 1.1 mrg emit_insn (gen_sqrtv64sf2_exec 3930 1.1 mrg (target, arg, gcn_gen_undef (V64SFmode), exec)); 3931 1.1 mrg return target; 3932 1.1 mrg } 3933 1.1 mrg case GCN_BUILTIN_SQRTF: 3934 1.1 mrg { 3935 1.1 mrg if (ignore) 3936 1.1 mrg return target; 3937 1.1 mrg rtx arg = force_reg (SFmode, 3938 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, 3939 1.1 mrg SFmode, 3940 1.1 mrg EXPAND_NORMAL)); 3941 1.1 mrg emit_insn (gen_sqrtsf2 (target, arg)); 3942 1.1 mrg return target; 3943 1.1 mrg } 3944 1.1 mrg case GCN_BUILTIN_OMP_DIM_SIZE: 3945 1.1 mrg { 3946 1.1 mrg if (ignore) 3947 1.1 mrg return target; 3948 1.1 mrg emit_insn (gen_oacc_dim_size (target, 3949 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), 3950 1.1 mrg NULL_RTX, SImode, 3951 1.1 mrg EXPAND_NORMAL))); 3952 1.1 mrg return target; 3953 1.1 mrg } 3954 1.1 mrg case GCN_BUILTIN_OMP_DIM_POS: 3955 1.1 mrg { 3956 1.1 mrg if (ignore) 3957 1.1 mrg return target; 3958 1.1 mrg emit_insn (gen_oacc_dim_pos (target, 3959 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), 3960 1.1 mrg NULL_RTX, SImode, 3961 1.1 mrg EXPAND_NORMAL))); 3962 1.1 mrg return target; 3963 1.1 mrg } 3964 1.1 mrg case GCN_BUILTIN_CMP_SWAP: 3965 1.1 mrg case GCN_BUILTIN_CMP_SWAPLL: 3966 1.1 mrg return gcn_expand_cmp_swap (exp, target); 3967 1.1 mrg 3968 1.1 mrg case GCN_BUILTIN_ACC_SINGLE_START: 3969 1.1 mrg { 3970 1.1 mrg if (ignore) 3971 1.1 mrg return target; 3972 1.1 mrg 3973 1.1 mrg rtx wavefront = gcn_oacc_dim_pos (1); 3974 1.1 mrg rtx cond = gen_rtx_EQ (VOIDmode, wavefront, const0_rtx); 3975 1.1 mrg rtx cc = (target && REG_P (target)) ? target : gen_reg_rtx (BImode); 3976 1.1 mrg emit_insn (gen_cstoresi4 (cc, cond, wavefront, const0_rtx)); 3977 1.1 mrg return cc; 3978 1.1 mrg } 3979 1.1 mrg 3980 1.1 mrg case GCN_BUILTIN_ACC_SINGLE_COPY_START: 3981 1.1 mrg { 3982 1.1 mrg rtx blk = force_reg (SImode, 3983 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, 3984 1.1 mrg SImode, EXPAND_NORMAL)); 3985 1.1 mrg rtx wavefront = gcn_oacc_dim_pos (1); 3986 1.1 mrg rtx cond = gen_rtx_NE (VOIDmode, wavefront, const0_rtx); 3987 1.1 mrg rtx not_zero = gen_label_rtx (); 3988 1.1 mrg emit_insn (gen_cbranchsi4 (cond, wavefront, const0_rtx, not_zero)); 3989 1.1 mrg emit_move_insn (blk, const0_rtx); 3990 1.1 mrg emit_label (not_zero); 3991 1.1 mrg return blk; 3992 1.1 mrg } 3993 1.1 mrg 3994 1.1 mrg case GCN_BUILTIN_ACC_SINGLE_COPY_END: 3995 1.1 mrg return target; 3996 1.1 mrg 3997 1.1 mrg case GCN_BUILTIN_ACC_BARRIER: 3998 1.1 mrg emit_insn (gen_gcn_wavefront_barrier ()); 3999 1.1 mrg return target; 4000 1.1 mrg 4001 1.1 mrg default: 4002 1.1 mrg gcc_unreachable (); 4003 1.1 mrg } 4004 1.1 mrg } 4005 1.1 mrg 4006 1.1 mrg /* Expansion of simple arithmetic and bit binary operation builtins. 4007 1.1 mrg 4008 1.1 mrg Intended for use with gcn_builtins table. */ 4009 1.1 mrg 4010 1.1 mrg static rtx 4011 1.1 mrg gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget */ , 4012 1.1 mrg machine_mode /*mode */ , int ignore, 4013 1.1 mrg struct gcn_builtin_description *d) 4014 1.1 mrg { 4015 1.1 mrg int icode = d->icode; 4016 1.1 mrg if (ignore) 4017 1.1 mrg return target; 4018 1.1 mrg 4019 1.1 mrg rtx exec = force_reg (DImode, 4020 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode, 4021 1.1 mrg EXPAND_NORMAL)); 4022 1.1 mrg 4023 1.1 mrg machine_mode m1 = insn_data[icode].operand[1].mode; 4024 1.1 mrg rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1, 4025 1.1 mrg EXPAND_NORMAL); 4026 1.1 mrg if (!insn_data[icode].operand[1].predicate (arg1, m1)) 4027 1.1 mrg arg1 = force_reg (m1, arg1); 4028 1.1 mrg 4029 1.1 mrg machine_mode m2 = insn_data[icode].operand[2].mode; 4030 1.1 mrg rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2, 4031 1.1 mrg EXPAND_NORMAL); 4032 1.1 mrg if (!insn_data[icode].operand[2].predicate (arg2, m2)) 4033 1.1 mrg arg2 = force_reg (m2, arg2); 4034 1.1 mrg 4035 1.1 mrg rtx arg_prev; 4036 1.1 mrg if (call_expr_nargs (exp) == 4) 4037 1.1 mrg { 4038 1.1 mrg machine_mode m_prev = insn_data[icode].operand[4].mode; 4039 1.1 mrg arg_prev = force_reg (m_prev, 4040 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX, 4041 1.1 mrg m_prev, EXPAND_NORMAL)); 4042 1.1 mrg } 4043 1.1 mrg else 4044 1.1 mrg arg_prev = gcn_gen_undef (GET_MODE (target)); 4045 1.1 mrg 4046 1.1 mrg rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev); 4047 1.1 mrg emit_insn (pat); 4048 1.1 mrg return target; 4049 1.1 mrg } 4050 1.1 mrg 4051 1.1 mrg /* Implement TARGET_EXPAND_BUILTIN. 4052 1.1 mrg 4053 1.1 mrg Expand an expression EXP that calls a built-in function, with result going 4054 1.1 mrg to TARGET if that's convenient (and in mode MODE if that's convenient). 4055 1.1 mrg SUBTARGET may be used as the target for computing one of EXP's operands. 4056 1.1 mrg IGNORE is nonzero if the value is to be ignored. */ 4057 1.1 mrg 4058 1.1 mrg rtx 4059 1.1 mrg gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, 4060 1.1 mrg int ignore) 4061 1.1 mrg { 4062 1.1 mrg tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 4063 1.1 mrg unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); 4064 1.1 mrg struct gcn_builtin_description *d; 4065 1.1 mrg 4066 1.1 mrg gcc_assert (fcode < GCN_BUILTIN_MAX); 4067 1.1 mrg d = &gcn_builtins[fcode]; 4068 1.1 mrg 4069 1.1 mrg if (d->type == B_UNIMPLEMENTED) 4070 1.1 mrg sorry ("Builtin not implemented"); 4071 1.1 mrg 4072 1.1 mrg return d->expander (exp, target, subtarget, mode, ignore, d); 4073 1.1 mrg } 4074 1.1 mrg 4075 1.1 mrg /* }}} */ 4076 1.1 mrg /* {{{ Vectorization. */ 4077 1.1 mrg 4078 1.1 mrg /* Implement TARGET_VECTORIZE_GET_MASK_MODE. 4079 1.1 mrg 4080 1.1 mrg A vector mask is a value that holds one boolean result for every element in 4081 1.1 mrg a vector. */ 4082 1.1 mrg 4083 1.1 mrg opt_machine_mode 4084 1.1 mrg gcn_vectorize_get_mask_mode (machine_mode) 4085 1.1 mrg { 4086 1.1 mrg /* GCN uses a DImode bit-mask. */ 4087 1.1 mrg return DImode; 4088 1.1 mrg } 4089 1.1 mrg 4090 1.1 mrg /* Return an RTX that references a vector with the i-th lane containing 4091 1.1 mrg PERM[i]*4. 4092 1.1 mrg 4093 1.1 mrg Helper function for gcn_vectorize_vec_perm_const. */ 4094 1.1 mrg 4095 1.1 mrg static rtx 4096 1.1 mrg gcn_make_vec_perm_address (unsigned int *perm) 4097 1.1 mrg { 4098 1.1 mrg rtx x = gen_reg_rtx (V64SImode); 4099 1.1 mrg emit_move_insn (x, gcn_vec_constant (V64SImode, 0)); 4100 1.1 mrg 4101 1.1 mrg /* Permutation addresses use byte addressing. With each vector lane being 4102 1.1 mrg 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant, 4103 1.1 mrg so only set those. 4104 1.1 mrg 4105 1.1 mrg The permutation given to the vec_perm* patterns range from 0 to 2N-1 to 4106 1.1 mrg select between lanes in two vectors, but as the DS_BPERMUTE* instructions 4107 1.1 mrg only take one source vector, the most-significant bit can be ignored 4108 1.1 mrg here. Instead, we can use EXEC masking to select the relevant part of 4109 1.1 mrg each source vector after they are permuted separately. */ 4110 1.1 mrg uint64_t bit_mask = 1 << 2; 4111 1.1 mrg for (int i = 2; i < 8; i++, bit_mask <<= 1) 4112 1.1 mrg { 4113 1.1 mrg uint64_t exec_mask = 0; 4114 1.1 mrg uint64_t lane_mask = 1; 4115 1.1 mrg for (int j = 0; j < 64; j++, lane_mask <<= 1) 4116 1.1 mrg if ((perm[j] * 4) & bit_mask) 4117 1.1 mrg exec_mask |= lane_mask; 4118 1.1 mrg 4119 1.1 mrg if (exec_mask) 4120 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, 4121 1.1 mrg gcn_vec_constant (V64SImode, 4122 1.1 mrg bit_mask), 4123 1.1 mrg x, get_exec (exec_mask))); 4124 1.1 mrg } 4125 1.1 mrg 4126 1.1 mrg return x; 4127 1.1 mrg } 4128 1.1 mrg 4129 1.1 mrg /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. 4130 1.1 mrg 4131 1.1 mrg Return true if permutation with SEL is possible. 4132 1.1 mrg 4133 1.1 mrg If DST/SRC0/SRC1 are non-null, emit the instructions to perform the 4134 1.1 mrg permutations. */ 4135 1.1 mrg 4136 1.1 mrg static bool 4137 1.1 mrg gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst, 4138 1.1 mrg rtx src0, rtx src1, 4139 1.1 mrg const vec_perm_indices & sel) 4140 1.1 mrg { 4141 1.1 mrg unsigned int nelt = GET_MODE_NUNITS (vmode); 4142 1.1 mrg 4143 1.1 mrg gcc_assert (VECTOR_MODE_P (vmode)); 4144 1.1 mrg gcc_assert (nelt <= 64); 4145 1.1 mrg gcc_assert (sel.length () == nelt); 4146 1.1 mrg 4147 1.1 mrg if (!dst) 4148 1.1 mrg { 4149 1.1 mrg /* All vector permutations are possible on this architecture, 4150 1.1 mrg with varying degrees of efficiency depending on the permutation. */ 4151 1.1 mrg return true; 4152 1.1 mrg } 4153 1.1 mrg 4154 1.1 mrg unsigned int perm[64]; 4155 1.1 mrg for (unsigned int i = 0; i < nelt; ++i) 4156 1.1 mrg perm[i] = sel[i] & (2 * nelt - 1); 4157 1.1 mrg for (unsigned int i = nelt; i < 64; ++i) 4158 1.1 mrg perm[i] = 0; 4159 1.1 mrg 4160 1.1 mrg src0 = force_reg (vmode, src0); 4161 1.1 mrg src1 = force_reg (vmode, src1); 4162 1.1 mrg 4163 1.1 mrg /* Make life a bit easier by swapping operands if necessary so that 4164 1.1 mrg the first element always comes from src0. */ 4165 1.1 mrg if (perm[0] >= nelt) 4166 1.1 mrg { 4167 1.1 mrg std::swap (src0, src1); 4168 1.1 mrg 4169 1.1 mrg for (unsigned int i = 0; i < nelt; ++i) 4170 1.1 mrg if (perm[i] < nelt) 4171 1.1 mrg perm[i] += nelt; 4172 1.1 mrg else 4173 1.1 mrg perm[i] -= nelt; 4174 1.1 mrg } 4175 1.1 mrg 4176 1.1 mrg /* TODO: There are more efficient ways to implement certain permutations 4177 1.1 mrg using ds_swizzle_b32 and/or DPP. Test for and expand them here, before 4178 1.1 mrg this more inefficient generic approach is used. */ 4179 1.1 mrg 4180 1.1 mrg int64_t src1_lanes = 0; 4181 1.1 mrg int64_t lane_bit = 1; 4182 1.1 mrg 4183 1.1 mrg for (unsigned int i = 0; i < nelt; ++i, lane_bit <<= 1) 4184 1.1 mrg { 4185 1.1 mrg /* Set the bits for lanes from src1. */ 4186 1.1 mrg if (perm[i] >= nelt) 4187 1.1 mrg src1_lanes |= lane_bit; 4188 1.1 mrg } 4189 1.1 mrg 4190 1.1 mrg rtx addr = gcn_make_vec_perm_address (perm); 4191 1.1 mrg rtx (*ds_bpermute) (rtx, rtx, rtx, rtx); 4192 1.1 mrg 4193 1.1 mrg switch (vmode) 4194 1.1 mrg { 4195 1.1 mrg case E_V64QImode: 4196 1.1 mrg ds_bpermute = gen_ds_bpermutev64qi; 4197 1.1 mrg break; 4198 1.1 mrg case E_V64HImode: 4199 1.1 mrg ds_bpermute = gen_ds_bpermutev64hi; 4200 1.1 mrg break; 4201 1.1 mrg case E_V64SImode: 4202 1.1 mrg ds_bpermute = gen_ds_bpermutev64si; 4203 1.1 mrg break; 4204 1.1 mrg case E_V64HFmode: 4205 1.1 mrg ds_bpermute = gen_ds_bpermutev64hf; 4206 1.1 mrg break; 4207 1.1 mrg case E_V64SFmode: 4208 1.1 mrg ds_bpermute = gen_ds_bpermutev64sf; 4209 1.1 mrg break; 4210 1.1 mrg case E_V64DImode: 4211 1.1 mrg ds_bpermute = gen_ds_bpermutev64di; 4212 1.1 mrg break; 4213 1.1 mrg case E_V64DFmode: 4214 1.1 mrg ds_bpermute = gen_ds_bpermutev64df; 4215 1.1 mrg break; 4216 1.1 mrg default: 4217 1.1 mrg gcc_assert (false); 4218 1.1 mrg } 4219 1.1 mrg 4220 1.1 mrg /* Load elements from src0 to dst. */ 4221 1.1 mrg gcc_assert (~src1_lanes); 4222 1.1 mrg emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ())); 4223 1.1 mrg 4224 1.1 mrg /* Load elements from src1 to dst. */ 4225 1.1 mrg if (src1_lanes) 4226 1.1 mrg { 4227 1.1 mrg /* Masking a lane masks both the destination and source lanes for 4228 1.1 mrg DS_BPERMUTE, so we need to have all lanes enabled for the permute, 4229 1.1 mrg then add an extra masked move to merge the results of permuting 4230 1.1 mrg the two source vectors together. 4231 1.1 mrg */ 4232 1.1 mrg rtx tmp = gen_reg_rtx (vmode); 4233 1.1 mrg emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ())); 4234 1.1 mrg emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes))); 4235 1.1 mrg } 4236 1.1 mrg 4237 1.1 mrg return true; 4238 1.1 mrg } 4239 1.1 mrg 4240 1.1 mrg /* Implements TARGET_VECTOR_MODE_SUPPORTED_P. 4241 1.1 mrg 4242 1.1 mrg Return nonzero if vector MODE is supported with at least move 4243 1.1 mrg instructions. */ 4244 1.1 mrg 4245 1.1 mrg static bool 4246 1.1 mrg gcn_vector_mode_supported_p (machine_mode mode) 4247 1.1 mrg { 4248 1.1 mrg return (mode == V64QImode || mode == V64HImode 4249 1.1 mrg || mode == V64SImode || mode == V64DImode 4250 1.1 mrg || mode == V64SFmode || mode == V64DFmode); 4251 1.1 mrg } 4252 1.1 mrg 4253 1.1 mrg /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. 4254 1.1 mrg 4255 1.1 mrg Enables autovectorization for all supported modes. */ 4256 1.1 mrg 4257 1.1 mrg static machine_mode 4258 1.1 mrg gcn_vectorize_preferred_simd_mode (scalar_mode mode) 4259 1.1 mrg { 4260 1.1 mrg switch (mode) 4261 1.1 mrg { 4262 1.1 mrg case E_QImode: 4263 1.1 mrg return V64QImode; 4264 1.1 mrg case E_HImode: 4265 1.1 mrg return V64HImode; 4266 1.1 mrg case E_SImode: 4267 1.1 mrg return V64SImode; 4268 1.1 mrg case E_DImode: 4269 1.1 mrg return V64DImode; 4270 1.1 mrg case E_SFmode: 4271 1.1 mrg return V64SFmode; 4272 1.1 mrg case E_DFmode: 4273 1.1 mrg return V64DFmode; 4274 1.1 mrg default: 4275 1.1 mrg return word_mode; 4276 1.1 mrg } 4277 1.1 mrg } 4278 1.1 mrg 4279 1.1 mrg /* Implement TARGET_VECTORIZE_RELATED_MODE. 4280 1.1 mrg 4281 1.1 mrg All GCN vectors are 64-lane, so this is simpler than other architectures. 4282 1.1 mrg In particular, we do *not* want to match vector bit-size. */ 4283 1.1 mrg 4284 1.1 mrg static opt_machine_mode 4285 1.1 mrg gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode), 4286 1.1 mrg scalar_mode element_mode, poly_uint64 nunits) 4287 1.1 mrg { 4288 1.1 mrg if (known_ne (nunits, 0U) && known_ne (nunits, 64U)) 4289 1.1 mrg return VOIDmode; 4290 1.1 mrg 4291 1.1 mrg machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode); 4292 1.1 mrg if (!VECTOR_MODE_P (pref_mode)) 4293 1.1 mrg return VOIDmode; 4294 1.1 mrg 4295 1.1 mrg return pref_mode; 4296 1.1 mrg } 4297 1.1 mrg 4298 1.1 mrg /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. 4299 1.1 mrg 4300 1.1 mrg Returns the preferred alignment in bits for accesses to vectors of type type 4301 1.1 mrg in vectorized code. This might be less than or greater than the ABI-defined 4302 1.1 mrg value returned by TARGET_VECTOR_ALIGNMENT. It can be equal to the alignment 4303 1.1 mrg of a single element, in which case the vectorizer will not try to optimize 4304 1.1 mrg for alignment. */ 4305 1.1 mrg 4306 1.1 mrg static poly_uint64 4307 1.1 mrg gcn_preferred_vector_alignment (const_tree type) 4308 1.1 mrg { 4309 1.1 mrg return TYPE_ALIGN (TREE_TYPE (type)); 4310 1.1 mrg } 4311 1.1 mrg 4312 1.1 mrg /* Implement TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT. 4313 1.1 mrg 4314 1.1 mrg Return true if the target supports misaligned vector store/load of a 4315 1.1 mrg specific factor denoted in the misalignment parameter. */ 4316 1.1 mrg 4317 1.1 mrg static bool 4318 1.1 mrg gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode), 4319 1.1 mrg const_tree type, int misalignment, 4320 1.1 mrg bool is_packed) 4321 1.1 mrg { 4322 1.1 mrg if (is_packed) 4323 1.1 mrg return false; 4324 1.1 mrg 4325 1.1 mrg /* If the misalignment is unknown, we should be able to handle the access 4326 1.1 mrg so long as it is not to a member of a packed data structure. */ 4327 1.1 mrg if (misalignment == -1) 4328 1.1 mrg return true; 4329 1.1 mrg 4330 1.1 mrg /* Return true if the misalignment is a multiple of the natural alignment 4331 1.1 mrg of the vector's element type. This is probably always going to be 4332 1.1 mrg true in practice, since we've already established that this isn't a 4333 1.1 mrg packed access. */ 4334 1.1 mrg return misalignment % TYPE_ALIGN_UNIT (type) == 0; 4335 1.1 mrg } 4336 1.1 mrg 4337 1.1 mrg /* Implement TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. 4338 1.1 mrg 4339 1.1 mrg Return true if vector alignment is reachable (by peeling N iterations) for 4340 1.1 mrg the given scalar type TYPE. */ 4341 1.1 mrg 4342 1.1 mrg static bool 4343 1.1 mrg gcn_vector_alignment_reachable (const_tree ARG_UNUSED (type), bool is_packed) 4344 1.1 mrg { 4345 1.1 mrg /* Vectors which aren't in packed structures will not be less aligned than 4346 1.1 mrg the natural alignment of their element type, so this is safe. */ 4347 1.1 mrg return !is_packed; 4348 1.1 mrg } 4349 1.1 mrg 4350 1.1 mrg /* Generate DPP instructions used for vector reductions. 4351 1.1 mrg 4352 1.1 mrg The opcode is given by INSN. 4353 1.1 mrg The first operand of the operation is shifted right by SHIFT vector lanes. 4354 1.1 mrg SHIFT must be a power of 2. If SHIFT is 16, the 15th lane of each row is 4355 1.1 mrg broadcast the next row (thereby acting like a shift of 16 for the end of 4356 1.1 mrg each row). If SHIFT is 32, lane 31 is broadcast to all the 4357 1.1 mrg following lanes (thereby acting like a shift of 32 for lane 63). */ 4358 1.1 mrg 4359 1.1 mrg char * 4360 1.1 mrg gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn, 4361 1.1 mrg int unspec, int shift) 4362 1.1 mrg { 4363 1.1 mrg static char buf[128]; 4364 1.1 mrg const char *dpp; 4365 1.1 mrg const char *vcc_in = ""; 4366 1.1 mrg const char *vcc_out = ""; 4367 1.1 mrg 4368 1.1 mrg /* Add the vcc operand if needed. */ 4369 1.1 mrg if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 4370 1.1 mrg { 4371 1.1 mrg if (unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR) 4372 1.1 mrg vcc_in = ", vcc"; 4373 1.1 mrg 4374 1.1 mrg if (unspec == UNSPEC_PLUS_CARRY_DPP_SHR 4375 1.1 mrg || unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR) 4376 1.1 mrg vcc_out = ", vcc"; 4377 1.1 mrg } 4378 1.1 mrg 4379 1.1 mrg /* Add the DPP modifiers. */ 4380 1.1 mrg switch (shift) 4381 1.1 mrg { 4382 1.1 mrg case 1: 4383 1.1 mrg dpp = "row_shr:1 bound_ctrl:0"; 4384 1.1 mrg break; 4385 1.1 mrg case 2: 4386 1.1 mrg dpp = "row_shr:2 bound_ctrl:0"; 4387 1.1 mrg break; 4388 1.1 mrg case 4: 4389 1.1 mrg dpp = "row_shr:4 bank_mask:0xe"; 4390 1.1 mrg break; 4391 1.1 mrg case 8: 4392 1.1 mrg dpp = "row_shr:8 bank_mask:0xc"; 4393 1.1 mrg break; 4394 1.1 mrg case 16: 4395 1.1 mrg dpp = "row_bcast:15 row_mask:0xa"; 4396 1.1 mrg break; 4397 1.1 mrg case 32: 4398 1.1 mrg dpp = "row_bcast:31 row_mask:0xc"; 4399 1.1 mrg break; 4400 1.1 mrg default: 4401 1.1 mrg gcc_unreachable (); 4402 1.1 mrg } 4403 1.1 mrg 4404 1.1 mrg if (unspec == UNSPEC_MOV_DPP_SHR && vgpr_2reg_mode_p (mode)) 4405 1.1 mrg sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s", 4406 1.1 mrg insn, dpp, insn, dpp); 4407 1.1 mrg else if (unspec == UNSPEC_MOV_DPP_SHR) 4408 1.1 mrg sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp); 4409 1.1 mrg else 4410 1.1 mrg sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp); 4411 1.1 mrg 4412 1.1 mrg return buf; 4413 1.1 mrg } 4414 1.1 mrg 4415 1.1 mrg /* Generate vector reductions in terms of DPP instructions. 4416 1.1 mrg 4417 1.1 mrg The vector register SRC of mode MODE is reduced using the operation given 4418 1.1 mrg by UNSPEC, and the scalar result is returned in lane 63 of a vector 4419 1.1 mrg register. */ 4420 1.1 mrg 4421 1.1 mrg rtx 4422 1.1 mrg gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec) 4423 1.1 mrg { 4424 1.1 mrg machine_mode orig_mode = mode; 4425 1.1 mrg bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR 4426 1.1 mrg || unspec == UNSPEC_SMAX_DPP_SHR 4427 1.1 mrg || unspec == UNSPEC_UMIN_DPP_SHR 4428 1.1 mrg || unspec == UNSPEC_UMAX_DPP_SHR) 4429 1.1 mrg && (mode == V64DImode 4430 1.1 mrg || mode == V64DFmode)) 4431 1.1 mrg || (unspec == UNSPEC_PLUS_DPP_SHR 4432 1.1 mrg && mode == V64DFmode)); 4433 1.1 mrg rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN 4434 1.1 mrg : unspec == UNSPEC_SMAX_DPP_SHR ? SMAX 4435 1.1 mrg : unspec == UNSPEC_UMIN_DPP_SHR ? UMIN 4436 1.1 mrg : unspec == UNSPEC_UMAX_DPP_SHR ? UMAX 4437 1.1 mrg : unspec == UNSPEC_PLUS_DPP_SHR ? PLUS 4438 1.1 mrg : UNKNOWN); 4439 1.1 mrg bool use_extends = ((unspec == UNSPEC_SMIN_DPP_SHR 4440 1.1 mrg || unspec == UNSPEC_SMAX_DPP_SHR 4441 1.1 mrg || unspec == UNSPEC_UMIN_DPP_SHR 4442 1.1 mrg || unspec == UNSPEC_UMAX_DPP_SHR) 4443 1.1 mrg && (mode == V64QImode 4444 1.1 mrg || mode == V64HImode)); 4445 1.1 mrg bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR 4446 1.1 mrg || unspec == UNSPEC_UMAX_DPP_SHR); 4447 1.1 mrg bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR 4448 1.1 mrg && GET_MODE_CLASS (mode) == MODE_VECTOR_INT 4449 1.1 mrg && (TARGET_GCN3 || mode == V64DImode); 4450 1.1 mrg 4451 1.1 mrg if (use_plus_carry) 4452 1.1 mrg unspec = UNSPEC_PLUS_CARRY_DPP_SHR; 4453 1.1 mrg 4454 1.1 mrg if (use_extends) 4455 1.1 mrg { 4456 1.1 mrg rtx tmp = gen_reg_rtx (V64SImode); 4457 1.1 mrg convert_move (tmp, src, unsignedp); 4458 1.1 mrg src = tmp; 4459 1.1 mrg mode = V64SImode; 4460 1.1 mrg } 4461 1.1 mrg 4462 1.1 mrg /* Perform reduction by first performing the reduction operation on every 4463 1.1 mrg pair of lanes, then on every pair of results from the previous 4464 1.1 mrg iteration (thereby effectively reducing every 4 lanes) and so on until 4465 1.1 mrg all lanes are reduced. */ 4466 1.1 mrg rtx in, out = force_reg (mode, src); 4467 1.1 mrg for (int i = 0, shift = 1; i < 6; i++, shift <<= 1) 4468 1.1 mrg { 4469 1.1 mrg rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift); 4470 1.1 mrg in = out; 4471 1.1 mrg out = gen_reg_rtx (mode); 4472 1.1 mrg 4473 1.1 mrg if (use_moves) 4474 1.1 mrg { 4475 1.1 mrg rtx tmp = gen_reg_rtx (mode); 4476 1.1 mrg emit_insn (gen_dpp_move (mode, tmp, in, shift_val)); 4477 1.1 mrg emit_insn (gen_rtx_SET (out, gen_rtx_fmt_ee (code, mode, tmp, in))); 4478 1.1 mrg } 4479 1.1 mrg else 4480 1.1 mrg { 4481 1.1 mrg rtx insn = gen_rtx_SET (out, 4482 1.1 mrg gen_rtx_UNSPEC (mode, 4483 1.1 mrg gen_rtvec (3, in, in, 4484 1.1 mrg shift_val), 4485 1.1 mrg unspec)); 4486 1.1 mrg 4487 1.1 mrg /* Add clobber for instructions that set the carry flags. */ 4488 1.1 mrg if (use_plus_carry) 4489 1.1 mrg { 4490 1.1 mrg rtx clobber = gen_rtx_CLOBBER (VOIDmode, 4491 1.1 mrg gen_rtx_REG (DImode, VCC_REG)); 4492 1.1 mrg insn = gen_rtx_PARALLEL (VOIDmode, 4493 1.1 mrg gen_rtvec (2, insn, clobber)); 4494 1.1 mrg } 4495 1.1 mrg 4496 1.1 mrg emit_insn (insn); 4497 1.1 mrg } 4498 1.1 mrg } 4499 1.1 mrg 4500 1.1 mrg if (use_extends) 4501 1.1 mrg { 4502 1.1 mrg rtx tmp = gen_reg_rtx (orig_mode); 4503 1.1 mrg convert_move (tmp, out, unsignedp); 4504 1.1 mrg out = tmp; 4505 1.1 mrg } 4506 1.1 mrg 4507 1.1 mrg return out; 4508 1.1 mrg } 4509 1.1 mrg 4510 1.1 mrg /* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST. */ 4511 1.1 mrg 4512 1.1 mrg int 4513 1.1 mrg gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost), 4514 1.1 mrg tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign)) 4515 1.1 mrg { 4516 1.1 mrg /* Always vectorize. */ 4517 1.1 mrg return 1; 4518 1.1 mrg } 4519 1.1 mrg 4520 1.1 mrg /* }}} */ 4521 1.1 mrg /* {{{ md_reorg pass. */ 4522 1.1 mrg 4523 1.1 mrg /* Identify VMEM instructions from their "type" attribute. */ 4524 1.1 mrg 4525 1.1 mrg static bool 4526 1.1 mrg gcn_vmem_insn_p (attr_type type) 4527 1.1 mrg { 4528 1.1 mrg switch (type) 4529 1.1 mrg { 4530 1.1 mrg case TYPE_MUBUF: 4531 1.1 mrg case TYPE_MTBUF: 4532 1.1 mrg case TYPE_FLAT: 4533 1.1 mrg return true; 4534 1.1 mrg case TYPE_UNKNOWN: 4535 1.1 mrg case TYPE_SOP1: 4536 1.1 mrg case TYPE_SOP2: 4537 1.1 mrg case TYPE_SOPK: 4538 1.1 mrg case TYPE_SOPC: 4539 1.1 mrg case TYPE_SOPP: 4540 1.1 mrg case TYPE_SMEM: 4541 1.1 mrg case TYPE_DS: 4542 1.1 mrg case TYPE_VOP2: 4543 1.1 mrg case TYPE_VOP1: 4544 1.1 mrg case TYPE_VOPC: 4545 1.1 mrg case TYPE_VOP3A: 4546 1.1 mrg case TYPE_VOP3B: 4547 1.1 mrg case TYPE_VOP_SDWA: 4548 1.1 mrg case TYPE_VOP_DPP: 4549 1.1 mrg case TYPE_MULT: 4550 1.1 mrg case TYPE_VMULT: 4551 1.1 mrg return false; 4552 1.1 mrg } 4553 1.1 mrg gcc_unreachable (); 4554 1.1 mrg return false; 4555 1.1 mrg } 4556 1.1 mrg 4557 1.1 mrg /* If INSN sets the EXEC register to a constant value, return the value, 4558 1.1 mrg otherwise return zero. */ 4559 1.1 mrg 4560 1.1 mrg static int64_t 4561 1.1 mrg gcn_insn_exec_value (rtx_insn *insn) 4562 1.1 mrg { 4563 1.1 mrg if (!NONDEBUG_INSN_P (insn)) 4564 1.1 mrg return 0; 4565 1.1 mrg 4566 1.1 mrg rtx pattern = PATTERN (insn); 4567 1.1 mrg 4568 1.1 mrg if (GET_CODE (pattern) == SET) 4569 1.1 mrg { 4570 1.1 mrg rtx dest = XEXP (pattern, 0); 4571 1.1 mrg rtx src = XEXP (pattern, 1); 4572 1.1 mrg 4573 1.1 mrg if (GET_MODE (dest) == DImode 4574 1.1 mrg && REG_P (dest) && REGNO (dest) == EXEC_REG 4575 1.1 mrg && CONST_INT_P (src)) 4576 1.1 mrg return INTVAL (src); 4577 1.1 mrg } 4578 1.1 mrg 4579 1.1 mrg return 0; 4580 1.1 mrg } 4581 1.1 mrg 4582 1.1 mrg /* Sets the EXEC register before INSN to the value that it had after 4583 1.1 mrg LAST_EXEC_DEF. The constant value of the EXEC register is returned if 4584 1.1 mrg known, otherwise it returns zero. */ 4585 1.1 mrg 4586 1.1 mrg static int64_t 4587 1.1 mrg gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec, 4588 1.1 mrg bool curr_exec_known, bool &last_exec_def_saved) 4589 1.1 mrg { 4590 1.1 mrg rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG); 4591 1.1 mrg rtx exec; 4592 1.1 mrg 4593 1.1 mrg int64_t exec_value = gcn_insn_exec_value (last_exec_def); 4594 1.1 mrg 4595 1.1 mrg if (exec_value) 4596 1.1 mrg { 4597 1.1 mrg /* If the EXEC value is a constant and it happens to be the same as the 4598 1.1 mrg current EXEC value, the restore can be skipped. */ 4599 1.1 mrg if (curr_exec_known && exec_value == curr_exec) 4600 1.1 mrg return exec_value; 4601 1.1 mrg 4602 1.1 mrg exec = GEN_INT (exec_value); 4603 1.1 mrg } 4604 1.1 mrg else 4605 1.1 mrg { 4606 1.1 mrg /* If the EXEC value is not a constant, save it in a register after the 4607 1.1 mrg point of definition. */ 4608 1.1 mrg rtx exec_save_reg = gen_rtx_REG (DImode, EXEC_SAVE_REG); 4609 1.1 mrg 4610 1.1 mrg if (!last_exec_def_saved) 4611 1.1 mrg { 4612 1.1 mrg start_sequence (); 4613 1.1 mrg emit_move_insn (exec_save_reg, exec_reg); 4614 1.1 mrg rtx_insn *seq = get_insns (); 4615 1.1 mrg end_sequence (); 4616 1.1 mrg 4617 1.1 mrg emit_insn_after (seq, last_exec_def); 4618 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4619 1.1 mrg fprintf (dump_file, "Saving EXEC after insn %d.\n", 4620 1.1 mrg INSN_UID (last_exec_def)); 4621 1.1 mrg 4622 1.1 mrg last_exec_def_saved = true; 4623 1.1 mrg } 4624 1.1 mrg 4625 1.1 mrg exec = exec_save_reg; 4626 1.1 mrg } 4627 1.1 mrg 4628 1.1 mrg /* Restore EXEC register before the usage. */ 4629 1.1 mrg start_sequence (); 4630 1.1 mrg emit_move_insn (exec_reg, exec); 4631 1.1 mrg rtx_insn *seq = get_insns (); 4632 1.1 mrg end_sequence (); 4633 1.1 mrg emit_insn_before (seq, insn); 4634 1.1 mrg 4635 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4636 1.1 mrg { 4637 1.1 mrg if (exec_value) 4638 1.1 mrg fprintf (dump_file, "Restoring EXEC to %ld before insn %d.\n", 4639 1.1 mrg exec_value, INSN_UID (insn)); 4640 1.1 mrg else 4641 1.1 mrg fprintf (dump_file, 4642 1.1 mrg "Restoring EXEC from saved value before insn %d.\n", 4643 1.1 mrg INSN_UID (insn)); 4644 1.1 mrg } 4645 1.1 mrg 4646 1.1 mrg return exec_value; 4647 1.1 mrg } 4648 1.1 mrg 4649 1.1 mrg /* Implement TARGET_MACHINE_DEPENDENT_REORG. 4650 1.1 mrg 4651 1.1 mrg Ensure that pipeline dependencies and lane masking are set correctly. */ 4652 1.1 mrg 4653 1.1 mrg static void 4654 1.1 mrg gcn_md_reorg (void) 4655 1.1 mrg { 4656 1.1 mrg basic_block bb; 4657 1.1 mrg rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG); 4658 1.1 mrg regset_head live; 4659 1.1 mrg 4660 1.1 mrg INIT_REG_SET (&live); 4661 1.1 mrg 4662 1.1 mrg compute_bb_for_insn (); 4663 1.1 mrg 4664 1.1 mrg if (!optimize) 4665 1.1 mrg { 4666 1.1 mrg split_all_insns (); 4667 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4668 1.1 mrg { 4669 1.1 mrg fprintf (dump_file, "After split:\n"); 4670 1.1 mrg print_rtl_with_bb (dump_file, get_insns (), dump_flags); 4671 1.1 mrg } 4672 1.1 mrg 4673 1.1 mrg /* Update data-flow information for split instructions. */ 4674 1.1 mrg df_insn_rescan_all (); 4675 1.1 mrg } 4676 1.1 mrg 4677 1.1 mrg df_live_add_problem (); 4678 1.1 mrg df_live_set_all_dirty (); 4679 1.1 mrg df_analyze (); 4680 1.1 mrg 4681 1.1 mrg /* This pass ensures that the EXEC register is set correctly, according 4682 1.1 mrg to the "exec" attribute. However, care must be taken so that the 4683 1.1 mrg value that reaches explicit uses of the EXEC register remains the 4684 1.1 mrg same as before. 4685 1.1 mrg */ 4686 1.1 mrg 4687 1.1 mrg FOR_EACH_BB_FN (bb, cfun) 4688 1.1 mrg { 4689 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4690 1.1 mrg fprintf (dump_file, "BB %d:\n", bb->index); 4691 1.1 mrg 4692 1.1 mrg rtx_insn *insn, *curr; 4693 1.1 mrg rtx_insn *last_exec_def = BB_HEAD (bb); 4694 1.1 mrg bool last_exec_def_saved = false; 4695 1.1 mrg bool curr_exec_explicit = true; 4696 1.1 mrg bool curr_exec_known = true; 4697 1.1 mrg int64_t curr_exec = 0; /* 0 here means 'the value is that of EXEC 4698 1.1 mrg after last_exec_def is executed'. */ 4699 1.1 mrg 4700 1.1 mrg bitmap live_in = DF_LR_IN (bb); 4701 1.1 mrg bool exec_live_on_entry = false; 4702 1.1 mrg if (bitmap_bit_p (live_in, EXEC_LO_REG) 4703 1.1 mrg || bitmap_bit_p (live_in, EXEC_HI_REG)) 4704 1.1 mrg { 4705 1.1 mrg if (dump_file) 4706 1.1 mrg fprintf (dump_file, "EXEC reg is live on entry to block %d\n", 4707 1.1 mrg (int) bb->index); 4708 1.1 mrg exec_live_on_entry = true; 4709 1.1 mrg } 4710 1.1 mrg 4711 1.1 mrg FOR_BB_INSNS_SAFE (bb, insn, curr) 4712 1.1 mrg { 4713 1.1 mrg if (!NONDEBUG_INSN_P (insn)) 4714 1.1 mrg continue; 4715 1.1 mrg 4716 1.1 mrg if (GET_CODE (PATTERN (insn)) == USE 4717 1.1 mrg || GET_CODE (PATTERN (insn)) == CLOBBER) 4718 1.1 mrg continue; 4719 1.1 mrg 4720 1.1 mrg HARD_REG_SET defs, uses; 4721 1.1 mrg CLEAR_HARD_REG_SET (defs); 4722 1.1 mrg CLEAR_HARD_REG_SET (uses); 4723 1.1 mrg note_stores (insn, record_hard_reg_sets, &defs); 4724 1.1 mrg note_uses (&PATTERN (insn), record_hard_reg_uses, &uses); 4725 1.1 mrg 4726 1.1 mrg bool exec_lo_def_p = TEST_HARD_REG_BIT (defs, EXEC_LO_REG); 4727 1.1 mrg bool exec_hi_def_p = TEST_HARD_REG_BIT (defs, EXEC_HI_REG); 4728 1.1 mrg bool exec_used = (hard_reg_set_intersect_p 4729 1.1 mrg (uses, reg_class_contents[(int) EXEC_MASK_REG]) 4730 1.1 mrg || TEST_HARD_REG_BIT (uses, EXECZ_REG)); 4731 1.1 mrg 4732 1.1 mrg /* Check the instruction for implicit setting of EXEC via an 4733 1.1 mrg attribute. */ 4734 1.1 mrg attr_exec exec_attr = get_attr_exec (insn); 4735 1.1 mrg int64_t new_exec; 4736 1.1 mrg 4737 1.1 mrg switch (exec_attr) 4738 1.1 mrg { 4739 1.1 mrg case EXEC_NONE: 4740 1.1 mrg new_exec = 0; 4741 1.1 mrg break; 4742 1.1 mrg 4743 1.1 mrg case EXEC_SINGLE: 4744 1.1 mrg /* Instructions that do not involve memory accesses only require 4745 1.1 mrg bit 0 of EXEC to be set. */ 4746 1.1 mrg if (gcn_vmem_insn_p (get_attr_type (insn)) 4747 1.1 mrg || get_attr_type (insn) == TYPE_DS) 4748 1.1 mrg new_exec = 1; 4749 1.1 mrg else 4750 1.1 mrg new_exec = curr_exec | 1; 4751 1.1 mrg break; 4752 1.1 mrg 4753 1.1 mrg case EXEC_FULL: 4754 1.1 mrg new_exec = -1; 4755 1.1 mrg break; 4756 1.1 mrg 4757 1.1 mrg default: /* Auto-detect what setting is appropriate. */ 4758 1.1 mrg { 4759 1.1 mrg new_exec = 0; 4760 1.1 mrg 4761 1.1 mrg /* If EXEC is referenced explicitly then we don't need to do 4762 1.1 mrg anything to set it, so we're done. */ 4763 1.1 mrg if (exec_used) 4764 1.1 mrg break; 4765 1.1 mrg 4766 1.1 mrg /* Scan the insn for VGPRs defs or uses. The mode determines 4767 1.1 mrg what kind of exec is needed. */ 4768 1.1 mrg subrtx_iterator::array_type array; 4769 1.1 mrg FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 4770 1.1 mrg { 4771 1.1 mrg const_rtx x = *iter; 4772 1.1 mrg if (REG_P (x) && VGPR_REGNO_P (REGNO (x))) 4773 1.1 mrg { 4774 1.1 mrg if (VECTOR_MODE_P (GET_MODE (x))) 4775 1.1 mrg { 4776 1.1 mrg new_exec = -1; 4777 1.1 mrg break; 4778 1.1 mrg } 4779 1.1 mrg else 4780 1.1 mrg new_exec = 1; 4781 1.1 mrg } 4782 1.1 mrg } 4783 1.1 mrg } 4784 1.1 mrg break; 4785 1.1 mrg } 4786 1.1 mrg 4787 1.1 mrg if (new_exec && (!curr_exec_known || new_exec != curr_exec)) 4788 1.1 mrg { 4789 1.1 mrg start_sequence (); 4790 1.1 mrg emit_move_insn (exec_reg, GEN_INT (new_exec)); 4791 1.1 mrg rtx_insn *seq = get_insns (); 4792 1.1 mrg end_sequence (); 4793 1.1 mrg emit_insn_before (seq, insn); 4794 1.1 mrg 4795 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4796 1.1 mrg fprintf (dump_file, "Setting EXEC to %ld before insn %d.\n", 4797 1.1 mrg new_exec, INSN_UID (insn)); 4798 1.1 mrg 4799 1.1 mrg curr_exec = new_exec; 4800 1.1 mrg curr_exec_explicit = false; 4801 1.1 mrg curr_exec_known = true; 4802 1.1 mrg } 4803 1.1 mrg else if (new_exec && dump_file && (dump_flags & TDF_DETAILS)) 4804 1.1 mrg { 4805 1.1 mrg fprintf (dump_file, "Exec already is %ld before insn %d.\n", 4806 1.1 mrg new_exec, INSN_UID (insn)); 4807 1.1 mrg } 4808 1.1 mrg 4809 1.1 mrg /* The state of the EXEC register is unknown after a 4810 1.1 mrg function call. */ 4811 1.1 mrg if (CALL_P (insn)) 4812 1.1 mrg curr_exec_known = false; 4813 1.1 mrg 4814 1.1 mrg /* Handle explicit uses of EXEC. If the instruction is a partial 4815 1.1 mrg explicit definition of EXEC, then treat it as an explicit use of 4816 1.1 mrg EXEC as well. */ 4817 1.1 mrg if (exec_used || exec_lo_def_p != exec_hi_def_p) 4818 1.1 mrg { 4819 1.1 mrg /* An instruction that explicitly uses EXEC should not also 4820 1.1 mrg implicitly define it. */ 4821 1.1 mrg gcc_assert (!exec_used || !new_exec); 4822 1.1 mrg 4823 1.1 mrg if (!curr_exec_known || !curr_exec_explicit) 4824 1.1 mrg { 4825 1.1 mrg /* Restore the previous explicitly defined value. */ 4826 1.1 mrg curr_exec = gcn_restore_exec (insn, last_exec_def, 4827 1.1 mrg curr_exec, curr_exec_known, 4828 1.1 mrg last_exec_def_saved); 4829 1.1 mrg curr_exec_explicit = true; 4830 1.1 mrg curr_exec_known = true; 4831 1.1 mrg } 4832 1.1 mrg } 4833 1.1 mrg 4834 1.1 mrg /* Handle explicit definitions of EXEC. */ 4835 1.1 mrg if (exec_lo_def_p || exec_hi_def_p) 4836 1.1 mrg { 4837 1.1 mrg last_exec_def = insn; 4838 1.1 mrg last_exec_def_saved = false; 4839 1.1 mrg curr_exec = gcn_insn_exec_value (insn); 4840 1.1 mrg curr_exec_explicit = true; 4841 1.1 mrg curr_exec_known = true; 4842 1.1 mrg 4843 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS)) 4844 1.1 mrg fprintf (dump_file, 4845 1.1 mrg "Found %s definition of EXEC at insn %d.\n", 4846 1.1 mrg exec_lo_def_p == exec_hi_def_p ? "full" : "partial", 4847 1.1 mrg INSN_UID (insn)); 4848 1.1 mrg } 4849 1.1 mrg 4850 1.1 mrg exec_live_on_entry = false; 4851 1.1 mrg } 4852 1.1 mrg 4853 1.1 mrg COPY_REG_SET (&live, DF_LR_OUT (bb)); 4854 1.1 mrg df_simulate_initialize_backwards (bb, &live); 4855 1.1 mrg 4856 1.1 mrg /* If EXEC is live after the basic block, restore the value of EXEC 4857 1.1 mrg at the end of the block. */ 4858 1.1 mrg if ((REGNO_REG_SET_P (&live, EXEC_LO_REG) 4859 1.1 mrg || REGNO_REG_SET_P (&live, EXEC_HI_REG)) 4860 1.1 mrg && (!curr_exec_known || !curr_exec_explicit || exec_live_on_entry)) 4861 1.1 mrg { 4862 1.1 mrg rtx_insn *end_insn = BB_END (bb); 4863 1.1 mrg 4864 1.1 mrg /* If the instruction is not a jump instruction, do the restore 4865 1.1 mrg after the last instruction in the basic block. */ 4866 1.1 mrg if (NONJUMP_INSN_P (end_insn)) 4867 1.1 mrg end_insn = NEXT_INSN (end_insn); 4868 1.1 mrg 4869 1.1 mrg gcn_restore_exec (end_insn, last_exec_def, curr_exec, 4870 1.1 mrg curr_exec_known, last_exec_def_saved); 4871 1.1 mrg } 4872 1.1 mrg } 4873 1.1 mrg 4874 1.1 mrg CLEAR_REG_SET (&live); 4875 1.1 mrg 4876 1.1 mrg /* "Manually Inserted Wait States (NOPs)." 4877 1.1 mrg 4878 1.1 mrg GCN hardware detects most kinds of register dependencies, but there 4879 1.1 mrg are some exceptions documented in the ISA manual. This pass 4880 1.1 mrg detects the missed cases, and inserts the documented number of NOPs 4881 1.1 mrg required for correct execution. */ 4882 1.1 mrg 4883 1.1 mrg const int max_waits = 5; 4884 1.1 mrg struct ilist 4885 1.1 mrg { 4886 1.1 mrg rtx_insn *insn; 4887 1.1 mrg attr_unit unit; 4888 1.1 mrg attr_delayeduse delayeduse; 4889 1.1 mrg HARD_REG_SET writes; 4890 1.1 mrg HARD_REG_SET reads; 4891 1.1 mrg int age; 4892 1.1 mrg } back[max_waits]; 4893 1.1 mrg int oldest = 0; 4894 1.1 mrg for (int i = 0; i < max_waits; i++) 4895 1.1 mrg back[i].insn = NULL; 4896 1.1 mrg 4897 1.1 mrg rtx_insn *insn, *last_insn = NULL; 4898 1.1 mrg for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn)) 4899 1.1 mrg { 4900 1.1 mrg if (!NONDEBUG_INSN_P (insn)) 4901 1.1 mrg continue; 4902 1.1 mrg 4903 1.1 mrg if (GET_CODE (PATTERN (insn)) == USE 4904 1.1 mrg || GET_CODE (PATTERN (insn)) == CLOBBER) 4905 1.1 mrg continue; 4906 1.1 mrg 4907 1.1 mrg attr_type itype = get_attr_type (insn); 4908 1.1 mrg attr_unit iunit = get_attr_unit (insn); 4909 1.1 mrg attr_delayeduse idelayeduse = get_attr_delayeduse (insn); 4910 1.1 mrg HARD_REG_SET ireads, iwrites; 4911 1.1 mrg CLEAR_HARD_REG_SET (ireads); 4912 1.1 mrg CLEAR_HARD_REG_SET (iwrites); 4913 1.1 mrg note_stores (insn, record_hard_reg_sets, &iwrites); 4914 1.1 mrg note_uses (&PATTERN (insn), record_hard_reg_uses, &ireads); 4915 1.1 mrg 4916 1.1 mrg /* Scan recent previous instructions for dependencies not handled in 4917 1.1 mrg hardware. */ 4918 1.1 mrg int nops_rqd = 0; 4919 1.1 mrg for (int i = oldest; i < oldest + max_waits; i++) 4920 1.1 mrg { 4921 1.1 mrg struct ilist *prev_insn = &back[i % max_waits]; 4922 1.1 mrg 4923 1.1 mrg if (!prev_insn->insn) 4924 1.1 mrg continue; 4925 1.1 mrg 4926 1.1 mrg /* VALU writes SGPR followed by VMEM reading the same SGPR 4927 1.1 mrg requires 5 wait states. */ 4928 1.1 mrg if ((prev_insn->age + nops_rqd) < 5 4929 1.1 mrg && prev_insn->unit == UNIT_VECTOR 4930 1.1 mrg && gcn_vmem_insn_p (itype)) 4931 1.1 mrg { 4932 1.1 mrg HARD_REG_SET regs = prev_insn->writes & ireads; 4933 1.1 mrg if (hard_reg_set_intersect_p 4934 1.1 mrg (regs, reg_class_contents[(int) SGPR_REGS])) 4935 1.1 mrg nops_rqd = 5 - prev_insn->age; 4936 1.1 mrg } 4937 1.1 mrg 4938 1.1 mrg /* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ 4939 1.1 mrg requires 5 wait states. */ 4940 1.1 mrg if ((prev_insn->age + nops_rqd) < 5 4941 1.1 mrg && prev_insn->unit == UNIT_VECTOR 4942 1.1 mrg && iunit == UNIT_VECTOR 4943 1.1 mrg && ((hard_reg_set_intersect_p 4944 1.1 mrg (prev_insn->writes, 4945 1.1 mrg reg_class_contents[(int) EXEC_MASK_REG]) 4946 1.1 mrg && TEST_HARD_REG_BIT (ireads, EXECZ_REG)) 4947 1.1 mrg || 4948 1.1 mrg (hard_reg_set_intersect_p 4949 1.1 mrg (prev_insn->writes, 4950 1.1 mrg reg_class_contents[(int) VCC_CONDITIONAL_REG]) 4951 1.1 mrg && TEST_HARD_REG_BIT (ireads, VCCZ_REG)))) 4952 1.1 mrg nops_rqd = 5 - prev_insn->age; 4953 1.1 mrg 4954 1.1 mrg /* VALU writes SGPR/VCC followed by v_{read,write}lane using 4955 1.1 mrg SGPR/VCC as lane select requires 4 wait states. */ 4956 1.1 mrg if ((prev_insn->age + nops_rqd) < 4 4957 1.1 mrg && prev_insn->unit == UNIT_VECTOR 4958 1.1 mrg && get_attr_laneselect (insn) == LANESELECT_YES) 4959 1.1 mrg { 4960 1.1 mrg HARD_REG_SET regs = prev_insn->writes & ireads; 4961 1.1 mrg if (hard_reg_set_intersect_p 4962 1.1 mrg (regs, reg_class_contents[(int) SGPR_REGS]) 4963 1.1 mrg || hard_reg_set_intersect_p 4964 1.1 mrg (regs, reg_class_contents[(int) VCC_CONDITIONAL_REG])) 4965 1.1 mrg nops_rqd = 4 - prev_insn->age; 4966 1.1 mrg } 4967 1.1 mrg 4968 1.1 mrg /* VALU writes VGPR followed by VALU_DPP reading that VGPR 4969 1.1 mrg requires 2 wait states. */ 4970 1.1 mrg if ((prev_insn->age + nops_rqd) < 2 4971 1.1 mrg && prev_insn->unit == UNIT_VECTOR 4972 1.1 mrg && itype == TYPE_VOP_DPP) 4973 1.1 mrg { 4974 1.1 mrg HARD_REG_SET regs = prev_insn->writes & ireads; 4975 1.1 mrg if (hard_reg_set_intersect_p 4976 1.1 mrg (regs, reg_class_contents[(int) VGPR_REGS])) 4977 1.1 mrg nops_rqd = 2 - prev_insn->age; 4978 1.1 mrg } 4979 1.1 mrg 4980 1.1 mrg /* Store that requires input registers are not overwritten by 4981 1.1 mrg following instruction. */ 4982 1.1 mrg if ((prev_insn->age + nops_rqd) < 1 4983 1.1 mrg && prev_insn->delayeduse == DELAYEDUSE_YES 4984 1.1 mrg && ((hard_reg_set_intersect_p 4985 1.1 mrg (prev_insn->reads, iwrites)))) 4986 1.1 mrg nops_rqd = 1 - prev_insn->age; 4987 1.1 mrg } 4988 1.1 mrg 4989 1.1 mrg /* Insert the required number of NOPs. */ 4990 1.1 mrg for (int i = nops_rqd; i > 0; i--) 4991 1.1 mrg emit_insn_after (gen_nop (), last_insn); 4992 1.1 mrg 4993 1.1 mrg /* Age the previous instructions. We can also ignore writes to 4994 1.1 mrg registers subsequently overwritten. */ 4995 1.1 mrg HARD_REG_SET written; 4996 1.1 mrg CLEAR_HARD_REG_SET (written); 4997 1.1 mrg for (int i = oldest + max_waits - 1; i > oldest; i--) 4998 1.1 mrg { 4999 1.1 mrg struct ilist *prev_insn = &back[i % max_waits]; 5000 1.1 mrg 5001 1.1 mrg /* Assume all instructions are equivalent to one "wait", the same 5002 1.1 mrg as s_nop. This is probably true for SALU, but not VALU (which 5003 1.1 mrg may take longer), so this is not optimal. However, AMD do 5004 1.1 mrg not publish the cycle times for instructions. */ 5005 1.1 mrg prev_insn->age += 1 + nops_rqd; 5006 1.1 mrg 5007 1.1 mrg written |= iwrites; 5008 1.1 mrg prev_insn->writes &= ~written; 5009 1.1 mrg } 5010 1.1 mrg 5011 1.1 mrg /* Track the current instruction as a previous instruction. */ 5012 1.1 mrg back[oldest].insn = insn; 5013 1.1 mrg back[oldest].unit = iunit; 5014 1.1 mrg back[oldest].delayeduse = idelayeduse; 5015 1.1 mrg back[oldest].writes = iwrites; 5016 1.1 mrg back[oldest].reads = ireads; 5017 1.1 mrg back[oldest].age = 0; 5018 1.1 mrg oldest = (oldest + 1) % max_waits; 5019 1.1 mrg 5020 1.1 mrg last_insn = insn; 5021 1.1 mrg } 5022 1.1 mrg } 5023 1.1 mrg 5024 1.1 mrg /* }}} */ 5025 1.1 mrg /* {{{ OpenACC / OpenMP. */ 5026 1.1 mrg 5027 1.1 mrg #define GCN_DEFAULT_GANGS 0 /* Choose at runtime. */ 5028 1.1 mrg #define GCN_DEFAULT_WORKERS 0 /* Choose at runtime. */ 5029 1.1 mrg #define GCN_DEFAULT_VECTORS 1 /* Use autovectorization only, for now. */ 5030 1.1 mrg 5031 1.1 mrg /* Implement TARGET_GOACC_VALIDATE_DIMS. 5032 1.1 mrg 5033 1.1 mrg Check the launch dimensions provided for an OpenACC compute 5034 1.1 mrg region, or routine. */ 5035 1.1 mrg 5036 1.1 mrg static bool 5037 1.1 mrg gcn_goacc_validate_dims (tree decl, int dims[], int fn_level, 5038 1.1 mrg unsigned /*used*/) 5039 1.1 mrg { 5040 1.1 mrg bool changed = false; 5041 1.1 mrg const int max_workers = 16; 5042 1.1 mrg 5043 1.1 mrg /* The vector size must appear to be 64, to the user, unless this is a 5044 1.1 mrg SEQ routine. The real, internal value is always 1, which means use 5045 1.1 mrg autovectorization, but the user should not see that. */ 5046 1.1 mrg if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1 5047 1.1 mrg && dims[GOMP_DIM_VECTOR] >= 0) 5048 1.1 mrg { 5049 1.1 mrg if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0 5050 1.1 mrg && dims[GOMP_DIM_VECTOR] != 64) 5051 1.1 mrg warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 5052 1.1 mrg OPT_Wopenacc_dims, 5053 1.1 mrg (dims[GOMP_DIM_VECTOR] 5054 1.1 mrg ? G_("using %<vector_length (64)%>, ignoring %d") 5055 1.1 mrg : G_("using %<vector_length (64)%>, " 5056 1.1 mrg "ignoring runtime setting")), 5057 1.1 mrg dims[GOMP_DIM_VECTOR]); 5058 1.1 mrg dims[GOMP_DIM_VECTOR] = 1; 5059 1.1 mrg changed = true; 5060 1.1 mrg } 5061 1.1 mrg 5062 1.1 mrg /* Check the num workers is not too large. */ 5063 1.1 mrg if (dims[GOMP_DIM_WORKER] > max_workers) 5064 1.1 mrg { 5065 1.1 mrg warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 5066 1.1 mrg OPT_Wopenacc_dims, 5067 1.1 mrg "using %<num_workers (%d)%>, ignoring %d", 5068 1.1 mrg max_workers, dims[GOMP_DIM_WORKER]); 5069 1.1 mrg dims[GOMP_DIM_WORKER] = max_workers; 5070 1.1 mrg changed = true; 5071 1.1 mrg } 5072 1.1 mrg 5073 1.1 mrg /* Set global defaults. */ 5074 1.1 mrg if (!decl) 5075 1.1 mrg { 5076 1.1 mrg dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS; 5077 1.1 mrg if (dims[GOMP_DIM_WORKER] < 0) 5078 1.1 mrg dims[GOMP_DIM_WORKER] = GCN_DEFAULT_WORKERS; 5079 1.1 mrg if (dims[GOMP_DIM_GANG] < 0) 5080 1.1 mrg dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS; 5081 1.1 mrg changed = true; 5082 1.1 mrg } 5083 1.1 mrg 5084 1.1 mrg return changed; 5085 1.1 mrg } 5086 1.1 mrg 5087 1.1 mrg /* Helper function for oacc_dim_size instruction. 5088 1.1 mrg Also used for OpenMP, via builtin_gcn_dim_size, and the omp_gcn pass. */ 5089 1.1 mrg 5090 1.1 mrg rtx 5091 1.1 mrg gcn_oacc_dim_size (int dim) 5092 1.1 mrg { 5093 1.1 mrg if (dim < 0 || dim > 2) 5094 1.1 mrg error ("offload dimension out of range (%d)", dim); 5095 1.1 mrg 5096 1.1 mrg /* Vectors are a special case. */ 5097 1.1 mrg if (dim == 2) 5098 1.1 mrg return const1_rtx; /* Think of this as 1 times 64. */ 5099 1.1 mrg 5100 1.1 mrg static int offset[] = { 5101 1.1 mrg /* Offsets into dispatch packet. */ 5102 1.1 mrg 12, /* X dim = Gang / Team / Work-group. */ 5103 1.1 mrg 20, /* Z dim = Worker / Thread / Wavefront. */ 5104 1.1 mrg 16 /* Y dim = Vector / SIMD / Work-item. */ 5105 1.1 mrg }; 5106 1.1 mrg rtx addr = gen_rtx_PLUS (DImode, 5107 1.1 mrg gen_rtx_REG (DImode, 5108 1.1 mrg cfun->machine->args. 5109 1.1 mrg reg[DISPATCH_PTR_ARG]), 5110 1.1 mrg GEN_INT (offset[dim])); 5111 1.1 mrg return gen_rtx_MEM (SImode, addr); 5112 1.1 mrg } 5113 1.1 mrg 5114 1.1 mrg /* Helper function for oacc_dim_pos instruction. 5115 1.1 mrg Also used for OpenMP, via builtin_gcn_dim_pos, and the omp_gcn pass. */ 5116 1.1 mrg 5117 1.1 mrg rtx 5118 1.1 mrg gcn_oacc_dim_pos (int dim) 5119 1.1 mrg { 5120 1.1 mrg if (dim < 0 || dim > 2) 5121 1.1 mrg error ("offload dimension out of range (%d)", dim); 5122 1.1 mrg 5123 1.1 mrg static const int reg[] = { 5124 1.1 mrg WORKGROUP_ID_X_ARG, /* Gang / Team / Work-group. */ 5125 1.1 mrg WORK_ITEM_ID_Z_ARG, /* Worker / Thread / Wavefront. */ 5126 1.1 mrg WORK_ITEM_ID_Y_ARG /* Vector / SIMD / Work-item. */ 5127 1.1 mrg }; 5128 1.1 mrg 5129 1.1 mrg int reg_num = cfun->machine->args.reg[reg[dim]]; 5130 1.1 mrg 5131 1.1 mrg /* The information must have been requested by the kernel. */ 5132 1.1 mrg gcc_assert (reg_num >= 0); 5133 1.1 mrg 5134 1.1 mrg return gen_rtx_REG (SImode, reg_num); 5135 1.1 mrg } 5136 1.1 mrg 5137 1.1 mrg /* Implement TARGET_GOACC_FORK_JOIN. */ 5138 1.1 mrg 5139 1.1 mrg static bool 5140 1.1 mrg gcn_fork_join (gcall *call, const int dims[], bool is_fork) 5141 1.1 mrg { 5142 1.1 mrg tree arg = gimple_call_arg (call, 2); 5143 1.1 mrg unsigned axis = TREE_INT_CST_LOW (arg); 5144 1.1 mrg 5145 1.1 mrg if (!is_fork && axis == GOMP_DIM_WORKER && dims[axis] != 1) 5146 1.1 mrg return true; 5147 1.1 mrg 5148 1.1 mrg return false; 5149 1.1 mrg } 5150 1.1 mrg 5151 1.1 mrg /* Implement ??????? 5152 1.1 mrg FIXME make this a real hook. 5153 1.1 mrg 5154 1.1 mrg Adjust FNDECL such that options inherited from the host compiler 5155 1.1 mrg are made appropriate for the accelerator compiler. */ 5156 1.1 mrg 5157 1.1 mrg void 5158 1.1 mrg gcn_fixup_accel_lto_options (tree fndecl) 5159 1.1 mrg { 5160 1.1 mrg tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); 5161 1.1 mrg if (!func_optimize) 5162 1.1 mrg return; 5163 1.1 mrg 5164 1.1 mrg tree old_optimize 5165 1.1 mrg = build_optimization_node (&global_options, &global_options_set); 5166 1.1 mrg tree new_optimize; 5167 1.1 mrg 5168 1.1 mrg /* If the function changed the optimization levels as well as 5169 1.1 mrg setting target options, start with the optimizations 5170 1.1 mrg specified. */ 5171 1.1 mrg if (func_optimize != old_optimize) 5172 1.1 mrg cl_optimization_restore (&global_options, &global_options_set, 5173 1.1 mrg TREE_OPTIMIZATION (func_optimize)); 5174 1.1 mrg 5175 1.1 mrg gcn_option_override (); 5176 1.1 mrg 5177 1.1 mrg /* The target attributes may also change some optimization flags, 5178 1.1 mrg so update the optimization options if necessary. */ 5179 1.1 mrg new_optimize = build_optimization_node (&global_options, 5180 1.1 mrg &global_options_set); 5181 1.1 mrg 5182 1.1 mrg if (old_optimize != new_optimize) 5183 1.1 mrg { 5184 1.1 mrg DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; 5185 1.1 mrg cl_optimization_restore (&global_options, &global_options_set, 5186 1.1 mrg TREE_OPTIMIZATION (old_optimize)); 5187 1.1 mrg } 5188 1.1 mrg } 5189 1.1 mrg 5190 1.1 mrg /* Implement TARGET_GOACC_SHARED_MEM_LAYOUT hook. */ 5191 1.1 mrg 5192 1.1 mrg static void 5193 1.1 mrg gcn_shared_mem_layout (unsigned HOST_WIDE_INT *lo, 5194 1.1 mrg unsigned HOST_WIDE_INT *hi, 5195 1.1 mrg int ARG_UNUSED (dims[GOMP_DIM_MAX]), 5196 1.1 mrg unsigned HOST_WIDE_INT 5197 1.1 mrg ARG_UNUSED (private_size[GOMP_DIM_MAX]), 5198 1.1 mrg unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX]) 5199 1.1 mrg { 5200 1.1 mrg *lo = gang_private_size_opt + reduction_size[GOMP_DIM_WORKER]; 5201 1.1 mrg /* !!! We can maybe use dims[] to estimate the maximum number of work 5202 1.1 mrg groups/wavefronts/etc. we will launch, and therefore tune the maximum 5203 1.1 mrg amount of LDS we should use. For now, use a minimal amount to try to 5204 1.1 mrg maximise occupancy. */ 5205 1.1 mrg *hi = acc_lds_size; 5206 1.1 mrg machine_function *machfun = cfun->machine; 5207 1.1 mrg machfun->reduction_base = gang_private_size_opt; 5208 1.1 mrg machfun->reduction_limit 5209 1.1 mrg = gang_private_size_opt + reduction_size[GOMP_DIM_WORKER]; 5210 1.1 mrg } 5211 1.1 mrg 5212 1.1 mrg /* }}} */ 5213 1.1 mrg /* {{{ ASM Output. */ 5214 1.1 mrg 5215 1.1 mrg /* Implement TARGET_ASM_FILE_START. 5216 1.1 mrg 5217 1.1 mrg Print assembler file header text. */ 5218 1.1 mrg 5219 1.1 mrg static void 5220 1.1 mrg output_file_start (void) 5221 1.1 mrg { 5222 1.1 mrg const char *cpu; 5223 1.1 mrg bool use_xnack_attr = true; 5224 1.1 mrg bool use_sram_attr = true; 5225 1.1 mrg switch (gcn_arch) 5226 1.1 mrg { 5227 1.1 mrg case PROCESSOR_FIJI: 5228 1.1 mrg cpu = "gfx803"; 5229 1.1 mrg #ifndef HAVE_GCN_XNACK_FIJI 5230 1.1 mrg use_xnack_attr = false; 5231 1.1 mrg #endif 5232 1.1 mrg use_sram_attr = false; 5233 1.1 mrg break; 5234 1.1 mrg case PROCESSOR_VEGA10: 5235 1.1 mrg cpu = "gfx900"; 5236 1.1 mrg #ifndef HAVE_GCN_XNACK_GFX900 5237 1.1 mrg use_xnack_attr = false; 5238 1.1 mrg #endif 5239 1.1 mrg use_sram_attr = false; 5240 1.1 mrg break; 5241 1.1 mrg case PROCESSOR_VEGA20: 5242 1.1 mrg cpu = "gfx906"; 5243 1.1 mrg #ifndef HAVE_GCN_XNACK_GFX906 5244 1.1 mrg use_xnack_attr = false; 5245 1.1 mrg #endif 5246 1.1 mrg use_sram_attr = false; 5247 1.1 mrg break; 5248 1.1 mrg case PROCESSOR_GFX908: 5249 1.1 mrg cpu = "gfx908"; 5250 1.1 mrg #ifndef HAVE_GCN_XNACK_GFX908 5251 1.1 mrg use_xnack_attr = false; 5252 1.1 mrg #endif 5253 1.1 mrg #ifndef HAVE_GCN_SRAM_ECC_GFX908 5254 1.1 mrg use_sram_attr = false; 5255 1.1 mrg #endif 5256 1.1 mrg break; 5257 1.1 mrg default: gcc_unreachable (); 5258 1.1 mrg } 5259 1.1 mrg 5260 1.1 mrg #if HAVE_GCN_ASM_V3_SYNTAX 5261 1.1 mrg const char *xnack = (flag_xnack ? "+xnack" : ""); 5262 1.1 mrg const char *sram_ecc = (flag_sram_ecc ? "+sram-ecc" : ""); 5263 1.1 mrg #endif 5264 1.1 mrg #if HAVE_GCN_ASM_V4_SYNTAX 5265 1.1 mrg /* In HSACOv4 no attribute setting means the binary supports "any" hardware 5266 1.1 mrg configuration. In GCC binaries, this is true for SRAM ECC, but not 5267 1.1 mrg XNACK. */ 5268 1.1 mrg const char *xnack = (flag_xnack ? ":xnack+" : ":xnack-"); 5269 1.1 mrg const char *sram_ecc = (flag_sram_ecc == SRAM_ECC_ON ? ":sramecc+" 5270 1.1 mrg : flag_sram_ecc == SRAM_ECC_OFF ? ":sramecc-" 5271 1.1 mrg : ""); 5272 1.1 mrg #endif 5273 1.1 mrg if (!use_xnack_attr) 5274 1.1 mrg xnack = ""; 5275 1.1 mrg if (!use_sram_attr) 5276 1.1 mrg sram_ecc = ""; 5277 1.1 mrg 5278 1.1 mrg fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n", 5279 1.1 mrg cpu, 5280 1.1 mrg #if HAVE_GCN_ASM_V3_SYNTAX 5281 1.1 mrg xnack, sram_ecc 5282 1.1 mrg #endif 5283 1.1 mrg #ifdef HAVE_GCN_ASM_V4_SYNTAX 5284 1.1 mrg sram_ecc, xnack 5285 1.1 mrg #endif 5286 1.1 mrg ); 5287 1.1 mrg } 5288 1.1 mrg 5289 1.1 mrg /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h. 5290 1.1 mrg 5291 1.1 mrg Print the initial definition of a function name. 5292 1.1 mrg 5293 1.1 mrg For GCN kernel entry points this includes all the HSA meta-data, special 5294 1.1 mrg alignment constraints that don't apply to regular functions, and magic 5295 1.1 mrg comments that pass information to mkoffload. */ 5296 1.1 mrg 5297 1.1 mrg void 5298 1.1 mrg gcn_hsa_declare_function_name (FILE *file, const char *name, tree) 5299 1.1 mrg { 5300 1.1 mrg int sgpr, vgpr; 5301 1.1 mrg bool xnack_enabled = false; 5302 1.1 mrg 5303 1.1 mrg fputs ("\n\n", file); 5304 1.1 mrg 5305 1.1 mrg if (cfun && cfun->machine && cfun->machine->normal_function) 5306 1.1 mrg { 5307 1.1 mrg fputs ("\t.type\t", file); 5308 1.1 mrg assemble_name (file, name); 5309 1.1 mrg fputs (",@function\n", file); 5310 1.1 mrg assemble_name (file, name); 5311 1.1 mrg fputs (":\n", file); 5312 1.1 mrg return; 5313 1.1 mrg } 5314 1.1 mrg 5315 1.1 mrg /* Determine count of sgpr/vgpr registers by looking for last 5316 1.1 mrg one used. */ 5317 1.1 mrg for (sgpr = 101; sgpr >= 0; sgpr--) 5318 1.1 mrg if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr)) 5319 1.1 mrg break; 5320 1.1 mrg sgpr++; 5321 1.1 mrg for (vgpr = 255; vgpr >= 0; vgpr--) 5322 1.1 mrg if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr)) 5323 1.1 mrg break; 5324 1.1 mrg vgpr++; 5325 1.1 mrg 5326 1.1 mrg if (!leaf_function_p ()) 5327 1.1 mrg { 5328 1.1 mrg /* We can't know how many registers function calls might use. */ 5329 1.1 mrg if (vgpr < MAX_NORMAL_VGPR_COUNT) 5330 1.1 mrg vgpr = MAX_NORMAL_VGPR_COUNT; 5331 1.1 mrg if (sgpr < MAX_NORMAL_SGPR_COUNT) 5332 1.1 mrg sgpr = MAX_NORMAL_SGPR_COUNT; 5333 1.1 mrg } 5334 1.1 mrg 5335 1.1 mrg fputs ("\t.rodata\n" 5336 1.1 mrg "\t.p2align\t6\n" 5337 1.1 mrg "\t.amdhsa_kernel\t", file); 5338 1.1 mrg assemble_name (file, name); 5339 1.1 mrg fputs ("\n", file); 5340 1.1 mrg int reg = FIRST_SGPR_REG; 5341 1.1 mrg for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++) 5342 1.1 mrg { 5343 1.1 mrg int reg_first = -1; 5344 1.1 mrg int reg_last; 5345 1.1 mrg if ((cfun->machine->args.requested & (1 << a)) 5346 1.1 mrg && (gcn_kernel_arg_types[a].fixed_regno < 0)) 5347 1.1 mrg { 5348 1.1 mrg reg_first = reg; 5349 1.1 mrg reg_last = (reg_first 5350 1.1 mrg + (GET_MODE_SIZE (gcn_kernel_arg_types[a].mode) 5351 1.1 mrg / UNITS_PER_WORD) - 1); 5352 1.1 mrg reg = reg_last + 1; 5353 1.1 mrg } 5354 1.1 mrg 5355 1.1 mrg if (gcn_kernel_arg_types[a].header_pseudo) 5356 1.1 mrg { 5357 1.1 mrg fprintf (file, "\t %s%s\t%i", 5358 1.1 mrg (cfun->machine->args.requested & (1 << a)) != 0 ? "" : ";", 5359 1.1 mrg gcn_kernel_arg_types[a].header_pseudo, 5360 1.1 mrg (cfun->machine->args.requested & (1 << a)) != 0); 5361 1.1 mrg if (reg_first != -1) 5362 1.1 mrg { 5363 1.1 mrg fprintf (file, " ; ("); 5364 1.1 mrg for (int i = reg_first; i <= reg_last; ++i) 5365 1.1 mrg { 5366 1.1 mrg if (i != reg_first) 5367 1.1 mrg fprintf (file, ", "); 5368 1.1 mrg fprintf (file, "%s", reg_names[i]); 5369 1.1 mrg } 5370 1.1 mrg fprintf (file, ")"); 5371 1.1 mrg } 5372 1.1 mrg fprintf (file, "\n"); 5373 1.1 mrg } 5374 1.1 mrg else if (gcn_kernel_arg_types[a].fixed_regno >= 0 5375 1.1 mrg && cfun->machine->args.requested & (1 << a)) 5376 1.1 mrg fprintf (file, "\t ; %s\t%i (%s)\n", 5377 1.1 mrg gcn_kernel_arg_types[a].name, 5378 1.1 mrg (cfun->machine->args.requested & (1 << a)) != 0, 5379 1.1 mrg reg_names[gcn_kernel_arg_types[a].fixed_regno]); 5380 1.1 mrg } 5381 1.1 mrg fprintf (file, "\t .amdhsa_system_vgpr_workitem_id\t%i\n", 5382 1.1 mrg (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG)) 5383 1.1 mrg ? 2 5384 1.1 mrg : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG) 5385 1.1 mrg ? 1 : 0); 5386 1.1 mrg fprintf (file, 5387 1.1 mrg "\t .amdhsa_next_free_vgpr\t%i\n" 5388 1.1 mrg "\t .amdhsa_next_free_sgpr\t%i\n" 5389 1.1 mrg "\t .amdhsa_reserve_vcc\t1\n" 5390 1.1 mrg "\t .amdhsa_reserve_flat_scratch\t0\n" 5391 1.1 mrg "\t .amdhsa_reserve_xnack_mask\t%i\n" 5392 1.1 mrg "\t .amdhsa_private_segment_fixed_size\t%i\n" 5393 1.1 mrg "\t .amdhsa_group_segment_fixed_size\t%u\n" 5394 1.1 mrg "\t .amdhsa_float_denorm_mode_32\t3\n" 5395 1.1 mrg "\t .amdhsa_float_denorm_mode_16_64\t3\n", 5396 1.1 mrg vgpr, 5397 1.1 mrg sgpr, 5398 1.1 mrg xnack_enabled, 5399 1.1 mrg /* workitem_private_segment_bytes_size needs to be 5400 1.1 mrg one 64th the wave-front stack size. */ 5401 1.1 mrg stack_size_opt / 64, 5402 1.1 mrg LDS_SIZE); 5403 1.1 mrg fputs ("\t.end_amdhsa_kernel\n", file); 5404 1.1 mrg 5405 1.1 mrg #if 1 5406 1.1 mrg /* The following is YAML embedded in assembler; tabs are not allowed. */ 5407 1.1 mrg fputs (" .amdgpu_metadata\n" 5408 1.1 mrg " amdhsa.version:\n" 5409 1.1 mrg " - 1\n" 5410 1.1 mrg " - 0\n" 5411 1.1 mrg " amdhsa.kernels:\n" 5412 1.1 mrg " - .name: ", file); 5413 1.1 mrg assemble_name (file, name); 5414 1.1 mrg fputs ("\n .symbol: ", file); 5415 1.1 mrg assemble_name (file, name); 5416 1.1 mrg fprintf (file, 5417 1.1 mrg ".kd\n" 5418 1.1 mrg " .kernarg_segment_size: %i\n" 5419 1.1 mrg " .kernarg_segment_align: %i\n" 5420 1.1 mrg " .group_segment_fixed_size: %u\n" 5421 1.1 mrg " .private_segment_fixed_size: %i\n" 5422 1.1 mrg " .wavefront_size: 64\n" 5423 1.1 mrg " .sgpr_count: %i\n" 5424 1.1 mrg " .vgpr_count: %i\n" 5425 1.1 mrg " .max_flat_workgroup_size: 1024\n", 5426 1.1 mrg cfun->machine->kernarg_segment_byte_size, 5427 1.1 mrg cfun->machine->kernarg_segment_alignment, 5428 1.1 mrg LDS_SIZE, 5429 1.1 mrg stack_size_opt / 64, 5430 1.1 mrg sgpr, vgpr); 5431 1.1 mrg fputs (" .end_amdgpu_metadata\n", file); 5432 1.1 mrg #endif 5433 1.1 mrg 5434 1.1 mrg fputs ("\t.text\n", file); 5435 1.1 mrg fputs ("\t.align\t256\n", file); 5436 1.1 mrg fputs ("\t.type\t", file); 5437 1.1 mrg assemble_name (file, name); 5438 1.1 mrg fputs (",@function\n", file); 5439 1.1 mrg assemble_name (file, name); 5440 1.1 mrg fputs (":\n", file); 5441 1.1 mrg 5442 1.1 mrg /* This comment is read by mkoffload. */ 5443 1.1 mrg if (flag_openacc) 5444 1.1 mrg fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n", 5445 1.1 mrg oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG), 5446 1.1 mrg oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER), 5447 1.1 mrg oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name); 5448 1.1 mrg } 5449 1.1 mrg 5450 1.1 mrg /* Implement TARGET_ASM_SELECT_SECTION. 5451 1.1 mrg 5452 1.1 mrg Return the section into which EXP should be placed. */ 5453 1.1 mrg 5454 1.1 mrg static section * 5455 1.1 mrg gcn_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align) 5456 1.1 mrg { 5457 1.1 mrg if (TREE_TYPE (exp) != error_mark_node 5458 1.1 mrg && TYPE_ADDR_SPACE (TREE_TYPE (exp)) == ADDR_SPACE_LDS) 5459 1.1 mrg { 5460 1.1 mrg if (!DECL_P (exp)) 5461 1.1 mrg return get_section (".lds_bss", 5462 1.1 mrg SECTION_WRITE | SECTION_BSS | SECTION_DEBUG, 5463 1.1 mrg NULL); 5464 1.1 mrg 5465 1.1 mrg return get_named_section (exp, ".lds_bss", reloc); 5466 1.1 mrg } 5467 1.1 mrg 5468 1.1 mrg return default_elf_select_section (exp, reloc, align); 5469 1.1 mrg } 5470 1.1 mrg 5471 1.1 mrg /* Implement TARGET_ASM_FUNCTION_PROLOGUE. 5472 1.1 mrg 5473 1.1 mrg Emits custom text into the assembler file at the head of each function. */ 5474 1.1 mrg 5475 1.1 mrg static void 5476 1.1 mrg gcn_target_asm_function_prologue (FILE *file) 5477 1.1 mrg { 5478 1.1 mrg machine_function *offsets = gcn_compute_frame_offsets (); 5479 1.1 mrg 5480 1.1 mrg asm_fprintf (file, "\t; using %s addressing in function\n", 5481 1.1 mrg offsets->use_flat_addressing ? "flat" : "global"); 5482 1.1 mrg 5483 1.1 mrg if (offsets->normal_function) 5484 1.1 mrg { 5485 1.1 mrg asm_fprintf (file, "\t; frame pointer needed: %s\n", 5486 1.1 mrg offsets->need_frame_pointer ? "true" : "false"); 5487 1.1 mrg asm_fprintf (file, "\t; lr needs saving: %s\n", 5488 1.1 mrg offsets->lr_needs_saving ? "true" : "false"); 5489 1.1 mrg asm_fprintf (file, "\t; outgoing args size: %wd\n", 5490 1.1 mrg offsets->outgoing_args_size); 5491 1.1 mrg asm_fprintf (file, "\t; pretend size: %wd\n", offsets->pretend_size); 5492 1.1 mrg asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars); 5493 1.1 mrg asm_fprintf (file, "\t; callee save size: %wd\n", 5494 1.1 mrg offsets->callee_saves); 5495 1.1 mrg } 5496 1.1 mrg else 5497 1.1 mrg { 5498 1.1 mrg asm_fprintf (file, "\t; HSA kernel entry point\n"); 5499 1.1 mrg asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars); 5500 1.1 mrg asm_fprintf (file, "\t; outgoing args size: %wd\n", 5501 1.1 mrg offsets->outgoing_args_size); 5502 1.1 mrg } 5503 1.1 mrg } 5504 1.1 mrg 5505 1.1 mrg /* Helper function for print_operand and print_operand_address. 5506 1.1 mrg 5507 1.1 mrg Print a register as the assembler requires, according to mode and name. */ 5508 1.1 mrg 5509 1.1 mrg static void 5510 1.1 mrg print_reg (FILE *file, rtx x) 5511 1.1 mrg { 5512 1.1 mrg machine_mode mode = GET_MODE (x); 5513 1.1 mrg if (mode == BImode || mode == QImode || mode == HImode || mode == SImode 5514 1.1 mrg || mode == HFmode || mode == SFmode 5515 1.1 mrg || mode == V64SFmode || mode == V64SImode 5516 1.1 mrg || mode == V64QImode || mode == V64HImode) 5517 1.1 mrg fprintf (file, "%s", reg_names[REGNO (x)]); 5518 1.1 mrg else if (mode == DImode || mode == V64DImode 5519 1.1 mrg || mode == DFmode || mode == V64DFmode) 5520 1.1 mrg { 5521 1.1 mrg if (SGPR_REGNO_P (REGNO (x))) 5522 1.1 mrg fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG, 5523 1.1 mrg REGNO (x) - FIRST_SGPR_REG + 1); 5524 1.1 mrg else if (VGPR_REGNO_P (REGNO (x))) 5525 1.1 mrg fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG, 5526 1.1 mrg REGNO (x) - FIRST_VGPR_REG + 1); 5527 1.1 mrg else if (REGNO (x) == FLAT_SCRATCH_REG) 5528 1.1 mrg fprintf (file, "flat_scratch"); 5529 1.1 mrg else if (REGNO (x) == EXEC_REG) 5530 1.1 mrg fprintf (file, "exec"); 5531 1.1 mrg else if (REGNO (x) == VCC_LO_REG) 5532 1.1 mrg fprintf (file, "vcc"); 5533 1.1 mrg else 5534 1.1 mrg fprintf (file, "[%s:%s]", 5535 1.1 mrg reg_names[REGNO (x)], reg_names[REGNO (x) + 1]); 5536 1.1 mrg } 5537 1.1 mrg else if (mode == TImode) 5538 1.1 mrg { 5539 1.1 mrg if (SGPR_REGNO_P (REGNO (x))) 5540 1.1 mrg fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG, 5541 1.1 mrg REGNO (x) - FIRST_SGPR_REG + 3); 5542 1.1 mrg else if (VGPR_REGNO_P (REGNO (x))) 5543 1.1 mrg fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG, 5544 1.1 mrg REGNO (x) - FIRST_VGPR_REG + 3); 5545 1.1 mrg else 5546 1.1 mrg gcc_unreachable (); 5547 1.1 mrg } 5548 1.1 mrg else 5549 1.1 mrg gcc_unreachable (); 5550 1.1 mrg } 5551 1.1 mrg 5552 1.1 mrg /* Implement TARGET_SECTION_TYPE_FLAGS. 5553 1.1 mrg 5554 1.1 mrg Return a set of section attributes for use by TARGET_ASM_NAMED_SECTION. */ 5555 1.1 mrg 5556 1.1 mrg static unsigned int 5557 1.1 mrg gcn_section_type_flags (tree decl, const char *name, int reloc) 5558 1.1 mrg { 5559 1.1 mrg if (strcmp (name, ".lds_bss") == 0) 5560 1.1 mrg return SECTION_WRITE | SECTION_BSS | SECTION_DEBUG; 5561 1.1 mrg 5562 1.1 mrg return default_section_type_flags (decl, name, reloc); 5563 1.1 mrg } 5564 1.1 mrg 5565 1.1 mrg /* Helper function for gcn_asm_output_symbol_ref. 5566 1.1 mrg 5567 1.1 mrg FIXME: This function is used to lay out gang-private variables in LDS 5568 1.1 mrg on a per-CU basis. 5569 1.1 mrg There may be cases in which gang-private variables in different compilation 5570 1.1 mrg units could clobber each other. In that case we should be relying on the 5571 1.1 mrg linker to lay out gang-private LDS space, but that doesn't appear to be 5572 1.1 mrg possible at present. */ 5573 1.1 mrg 5574 1.1 mrg static void 5575 1.1 mrg gcn_print_lds_decl (FILE *f, tree var) 5576 1.1 mrg { 5577 1.1 mrg int *offset; 5578 1.1 mrg if ((offset = lds_allocs.get (var))) 5579 1.1 mrg fprintf (f, "%u", (unsigned) *offset); 5580 1.1 mrg else 5581 1.1 mrg { 5582 1.1 mrg unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (var); 5583 1.1 mrg tree type = TREE_TYPE (var); 5584 1.1 mrg unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type)); 5585 1.1 mrg if (size > align && size > 4 && align < 8) 5586 1.1 mrg align = 8; 5587 1.1 mrg 5588 1.1 mrg gang_private_hwm = ((gang_private_hwm + align - 1) & ~(align - 1)); 5589 1.1 mrg 5590 1.1 mrg lds_allocs.put (var, gang_private_hwm); 5591 1.1 mrg fprintf (f, "%u", gang_private_hwm); 5592 1.1 mrg gang_private_hwm += size; 5593 1.1 mrg if (gang_private_hwm > gang_private_size_opt) 5594 1.1 mrg error ("%d bytes of gang-private data-share memory exhausted" 5595 1.1 mrg " (increase with %<-mgang-private-size=%d%>, for example)", 5596 1.1 mrg gang_private_size_opt, gang_private_hwm); 5597 1.1 mrg } 5598 1.1 mrg } 5599 1.1 mrg 5600 1.1 mrg /* Implement ASM_OUTPUT_SYMBOL_REF via gcn-hsa.h. */ 5601 1.1 mrg 5602 1.1 mrg void 5603 1.1 mrg gcn_asm_output_symbol_ref (FILE *file, rtx x) 5604 1.1 mrg { 5605 1.1 mrg tree decl; 5606 1.1 mrg if (cfun 5607 1.1 mrg && (decl = SYMBOL_REF_DECL (x)) != 0 5608 1.1 mrg && TREE_CODE (decl) == VAR_DECL 5609 1.1 mrg && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl)))) 5610 1.1 mrg { 5611 1.1 mrg /* LDS symbols (emitted using this hook) are only used at present 5612 1.1 mrg to propagate worker values from an active thread to neutered 5613 1.1 mrg threads. Use the same offset for each such block, but don't 5614 1.1 mrg use zero because null pointers are used to identify the active 5615 1.1 mrg thread in GOACC_single_copy_start calls. */ 5616 1.1 mrg gcn_print_lds_decl (file, decl); 5617 1.1 mrg } 5618 1.1 mrg else 5619 1.1 mrg { 5620 1.1 mrg assemble_name (file, XSTR (x, 0)); 5621 1.1 mrg /* FIXME: See above -- this condition is unreachable. */ 5622 1.1 mrg if (cfun 5623 1.1 mrg && (decl = SYMBOL_REF_DECL (x)) != 0 5624 1.1 mrg && TREE_CODE (decl) == VAR_DECL 5625 1.1 mrg && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl)))) 5626 1.1 mrg fputs ("@abs32", file); 5627 1.1 mrg } 5628 1.1 mrg } 5629 1.1 mrg 5630 1.1 mrg /* Implement TARGET_CONSTANT_ALIGNMENT. 5631 1.1 mrg 5632 1.1 mrg Returns the alignment in bits of a constant that is being placed in memory. 5633 1.1 mrg CONSTANT is the constant and BASIC_ALIGN is the alignment that the object 5634 1.1 mrg would ordinarily have. */ 5635 1.1 mrg 5636 1.1 mrg static HOST_WIDE_INT 5637 1.1 mrg gcn_constant_alignment (const_tree ARG_UNUSED (constant), 5638 1.1 mrg HOST_WIDE_INT basic_align) 5639 1.1 mrg { 5640 1.1 mrg return basic_align > 128 ? basic_align : 128; 5641 1.1 mrg } 5642 1.1 mrg 5643 1.1 mrg /* Implement PRINT_OPERAND_ADDRESS via gcn.h. */ 5644 1.1 mrg 5645 1.1 mrg void 5646 1.1 mrg print_operand_address (FILE *file, rtx mem) 5647 1.1 mrg { 5648 1.1 mrg gcc_assert (MEM_P (mem)); 5649 1.1 mrg 5650 1.1 mrg rtx reg; 5651 1.1 mrg rtx offset; 5652 1.1 mrg addr_space_t as = MEM_ADDR_SPACE (mem); 5653 1.1 mrg rtx addr = XEXP (mem, 0); 5654 1.1 mrg gcc_assert (REG_P (addr) || GET_CODE (addr) == PLUS); 5655 1.1 mrg 5656 1.1 mrg if (AS_SCRATCH_P (as)) 5657 1.1 mrg switch (GET_CODE (addr)) 5658 1.1 mrg { 5659 1.1 mrg case REG: 5660 1.1 mrg print_reg (file, addr); 5661 1.1 mrg break; 5662 1.1 mrg 5663 1.1 mrg case PLUS: 5664 1.1 mrg reg = XEXP (addr, 0); 5665 1.1 mrg offset = XEXP (addr, 1); 5666 1.1 mrg print_reg (file, reg); 5667 1.1 mrg if (GET_CODE (offset) == CONST_INT) 5668 1.1 mrg fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 5669 1.1 mrg else 5670 1.1 mrg abort (); 5671 1.1 mrg break; 5672 1.1 mrg 5673 1.1 mrg default: 5674 1.1 mrg debug_rtx (addr); 5675 1.1 mrg abort (); 5676 1.1 mrg } 5677 1.1 mrg else if (AS_ANY_FLAT_P (as)) 5678 1.1 mrg { 5679 1.1 mrg if (GET_CODE (addr) == REG) 5680 1.1 mrg print_reg (file, addr); 5681 1.1 mrg else 5682 1.1 mrg { 5683 1.1 mrg gcc_assert (TARGET_GCN5_PLUS); 5684 1.1 mrg print_reg (file, XEXP (addr, 0)); 5685 1.1 mrg } 5686 1.1 mrg } 5687 1.1 mrg else if (AS_GLOBAL_P (as)) 5688 1.1 mrg { 5689 1.1 mrg gcc_assert (TARGET_GCN5_PLUS); 5690 1.1 mrg 5691 1.1 mrg rtx base = addr; 5692 1.1 mrg rtx vgpr_offset = NULL_RTX; 5693 1.1 mrg 5694 1.1 mrg if (GET_CODE (addr) == PLUS) 5695 1.1 mrg { 5696 1.1 mrg base = XEXP (addr, 0); 5697 1.1 mrg 5698 1.1 mrg if (GET_CODE (base) == PLUS) 5699 1.1 mrg { 5700 1.1 mrg /* (SGPR + VGPR) + CONST */ 5701 1.1 mrg vgpr_offset = XEXP (base, 1); 5702 1.1 mrg base = XEXP (base, 0); 5703 1.1 mrg } 5704 1.1 mrg else 5705 1.1 mrg { 5706 1.1 mrg rtx offset = XEXP (addr, 1); 5707 1.1 mrg 5708 1.1 mrg if (REG_P (offset)) 5709 1.1 mrg /* SGPR + VGPR */ 5710 1.1 mrg vgpr_offset = offset; 5711 1.1 mrg else if (CONST_INT_P (offset)) 5712 1.1 mrg /* VGPR + CONST or SGPR + CONST */ 5713 1.1 mrg ; 5714 1.1 mrg else 5715 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address"); 5716 1.1 mrg } 5717 1.1 mrg } 5718 1.1 mrg 5719 1.1 mrg if (REG_P (base)) 5720 1.1 mrg { 5721 1.1 mrg if (VGPR_REGNO_P (REGNO (base))) 5722 1.1 mrg print_reg (file, base); 5723 1.1 mrg else if (SGPR_REGNO_P (REGNO (base))) 5724 1.1 mrg { 5725 1.1 mrg /* The assembler requires a 64-bit VGPR pair here, even though 5726 1.1 mrg the offset should be only 32-bit. */ 5727 1.1 mrg if (vgpr_offset == NULL_RTX) 5728 1.1 mrg /* In this case, the vector offset is zero, so we use the first 5729 1.1 mrg lane of v1, which is initialized to zero. */ 5730 1.1 mrg { 5731 1.1 mrg if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED) 5732 1.1 mrg fprintf (file, "v1"); 5733 1.1 mrg else 5734 1.1 mrg fprintf (file, "v[1:2]"); 5735 1.1 mrg } 5736 1.1 mrg else if (REG_P (vgpr_offset) 5737 1.1 mrg && VGPR_REGNO_P (REGNO (vgpr_offset))) 5738 1.1 mrg { 5739 1.1 mrg if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED) 5740 1.1 mrg fprintf (file, "v%d", 5741 1.1 mrg REGNO (vgpr_offset) - FIRST_VGPR_REG); 5742 1.1 mrg else 5743 1.1 mrg fprintf (file, "v[%d:%d]", 5744 1.1 mrg REGNO (vgpr_offset) - FIRST_VGPR_REG, 5745 1.1 mrg REGNO (vgpr_offset) - FIRST_VGPR_REG + 1); 5746 1.1 mrg } 5747 1.1 mrg else 5748 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address"); 5749 1.1 mrg } 5750 1.1 mrg } 5751 1.1 mrg else 5752 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address"); 5753 1.1 mrg } 5754 1.1 mrg else if (AS_ANY_DS_P (as)) 5755 1.1 mrg switch (GET_CODE (addr)) 5756 1.1 mrg { 5757 1.1 mrg case REG: 5758 1.1 mrg print_reg (file, addr); 5759 1.1 mrg break; 5760 1.1 mrg 5761 1.1 mrg case PLUS: 5762 1.1 mrg reg = XEXP (addr, 0); 5763 1.1 mrg print_reg (file, reg); 5764 1.1 mrg break; 5765 1.1 mrg 5766 1.1 mrg default: 5767 1.1 mrg debug_rtx (addr); 5768 1.1 mrg abort (); 5769 1.1 mrg } 5770 1.1 mrg else 5771 1.1 mrg switch (GET_CODE (addr)) 5772 1.1 mrg { 5773 1.1 mrg case REG: 5774 1.1 mrg print_reg (file, addr); 5775 1.1 mrg fprintf (file, ", 0"); 5776 1.1 mrg break; 5777 1.1 mrg 5778 1.1 mrg case PLUS: 5779 1.1 mrg reg = XEXP (addr, 0); 5780 1.1 mrg offset = XEXP (addr, 1); 5781 1.1 mrg print_reg (file, reg); 5782 1.1 mrg fprintf (file, ", "); 5783 1.1 mrg if (GET_CODE (offset) == REG) 5784 1.1 mrg print_reg (file, reg); 5785 1.1 mrg else if (GET_CODE (offset) == CONST_INT) 5786 1.1 mrg fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 5787 1.1 mrg else 5788 1.1 mrg abort (); 5789 1.1 mrg break; 5790 1.1 mrg 5791 1.1 mrg default: 5792 1.1 mrg debug_rtx (addr); 5793 1.1 mrg abort (); 5794 1.1 mrg } 5795 1.1 mrg } 5796 1.1 mrg 5797 1.1 mrg /* Implement PRINT_OPERAND via gcn.h. 5798 1.1 mrg 5799 1.1 mrg b - print operand size as untyped operand (b8/b16/b32/b64) 5800 1.1 mrg B - print operand size as SI/DI untyped operand (b32/b32/b32/b64) 5801 1.1 mrg i - print operand size as untyped operand (i16/b32/i64) 5802 1.1 mrg I - print operand size as SI/DI untyped operand(i32/b32/i64) 5803 1.1 mrg u - print operand size as untyped operand (u16/u32/u64) 5804 1.1 mrg U - print operand size as SI/DI untyped operand(u32/u64) 5805 1.1 mrg o - print operand size as memory access size for loads 5806 1.1 mrg (ubyte/ushort/dword/dwordx2/wordx3/dwordx4) 5807 1.1 mrg s - print operand size as memory access size for stores 5808 1.1 mrg (byte/short/dword/dwordx2/wordx3/dwordx4) 5809 1.1 mrg C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...) 5810 1.1 mrg c - print inverse conditional code for s_cbranch 5811 1.1 mrg D - print conditional code for s_cmp (eq_u64/lg_u64...) 5812 1.1 mrg E - print conditional code for v_cmp (eq_u64/ne_u64...) 5813 1.1 mrg A - print address in formatting suitable for given address space. 5814 1.1 mrg O - print offset:n for data share operations. 5815 1.1 mrg ^ - print "_co" suffix for GCN5 mnemonics 5816 1.1 mrg g - print "glc", if appropriate for given MEM 5817 1.1 mrg */ 5818 1.1 mrg 5819 1.1 mrg void 5820 1.1 mrg print_operand (FILE *file, rtx x, int code) 5821 1.1 mrg { 5822 1.1 mrg int xcode = x ? GET_CODE (x) : 0; 5823 1.1 mrg bool invert = false; 5824 1.1 mrg switch (code) 5825 1.1 mrg { 5826 1.1 mrg /* Instructions have the following suffixes. 5827 1.1 mrg If there are two suffixes, the first is the destination type, 5828 1.1 mrg and the second is the source type. 5829 1.1 mrg 5830 1.1 mrg B32 Bitfield (untyped data) 32-bit 5831 1.1 mrg B64 Bitfield (untyped data) 64-bit 5832 1.1 mrg F16 floating-point 16-bit 5833 1.1 mrg F32 floating-point 32-bit (IEEE 754 single-precision float) 5834 1.1 mrg F64 floating-point 64-bit (IEEE 754 double-precision float) 5835 1.1 mrg I16 signed 32-bit integer 5836 1.1 mrg I32 signed 32-bit integer 5837 1.1 mrg I64 signed 64-bit integer 5838 1.1 mrg U16 unsigned 32-bit integer 5839 1.1 mrg U32 unsigned 32-bit integer 5840 1.1 mrg U64 unsigned 64-bit integer */ 5841 1.1 mrg 5842 1.1 mrg /* Print operand size as untyped suffix. */ 5843 1.1 mrg case 'b': 5844 1.1 mrg { 5845 1.1 mrg const char *s = ""; 5846 1.1 mrg machine_mode mode = GET_MODE (x); 5847 1.1 mrg if (VECTOR_MODE_P (mode)) 5848 1.1 mrg mode = GET_MODE_INNER (mode); 5849 1.1 mrg switch (GET_MODE_SIZE (mode)) 5850 1.1 mrg { 5851 1.1 mrg case 1: 5852 1.1 mrg s = "_b8"; 5853 1.1 mrg break; 5854 1.1 mrg case 2: 5855 1.1 mrg s = "_b16"; 5856 1.1 mrg break; 5857 1.1 mrg case 4: 5858 1.1 mrg s = "_b32"; 5859 1.1 mrg break; 5860 1.1 mrg case 8: 5861 1.1 mrg s = "_b64"; 5862 1.1 mrg break; 5863 1.1 mrg default: 5864 1.1 mrg output_operand_lossage ("invalid operand %%xn code"); 5865 1.1 mrg return; 5866 1.1 mrg } 5867 1.1 mrg fputs (s, file); 5868 1.1 mrg } 5869 1.1 mrg return; 5870 1.1 mrg case 'B': 5871 1.1 mrg { 5872 1.1 mrg const char *s = ""; 5873 1.1 mrg machine_mode mode = GET_MODE (x); 5874 1.1 mrg if (VECTOR_MODE_P (mode)) 5875 1.1 mrg mode = GET_MODE_INNER (mode); 5876 1.1 mrg switch (GET_MODE_SIZE (mode)) 5877 1.1 mrg { 5878 1.1 mrg case 1: 5879 1.1 mrg case 2: 5880 1.1 mrg case 4: 5881 1.1 mrg s = "_b32"; 5882 1.1 mrg break; 5883 1.1 mrg case 8: 5884 1.1 mrg s = "_b64"; 5885 1.1 mrg break; 5886 1.1 mrg default: 5887 1.1 mrg output_operand_lossage ("invalid operand %%xn code"); 5888 1.1 mrg return; 5889 1.1 mrg } 5890 1.1 mrg fputs (s, file); 5891 1.1 mrg } 5892 1.1 mrg return; 5893 1.1 mrg case 'e': 5894 1.1 mrg fputs ("sext(", file); 5895 1.1 mrg print_operand (file, x, 0); 5896 1.1 mrg fputs (")", file); 5897 1.1 mrg return; 5898 1.1 mrg case 'i': 5899 1.1 mrg case 'I': 5900 1.1 mrg case 'u': 5901 1.1 mrg case 'U': 5902 1.1 mrg { 5903 1.1 mrg bool signed_p = code == 'i'; 5904 1.1 mrg bool min32_p = code == 'I' || code == 'U'; 5905 1.1 mrg const char *s = ""; 5906 1.1 mrg machine_mode mode = GET_MODE (x); 5907 1.1 mrg if (VECTOR_MODE_P (mode)) 5908 1.1 mrg mode = GET_MODE_INNER (mode); 5909 1.1 mrg if (mode == VOIDmode) 5910 1.1 mrg switch (GET_CODE (x)) 5911 1.1 mrg { 5912 1.1 mrg case CONST_INT: 5913 1.1 mrg s = signed_p ? "_i32" : "_u32"; 5914 1.1 mrg break; 5915 1.1 mrg case CONST_DOUBLE: 5916 1.1 mrg s = "_f64"; 5917 1.1 mrg break; 5918 1.1 mrg default: 5919 1.1 mrg output_operand_lossage ("invalid operand %%xn code"); 5920 1.1 mrg return; 5921 1.1 mrg } 5922 1.1 mrg else if (FLOAT_MODE_P (mode)) 5923 1.1 mrg switch (GET_MODE_SIZE (mode)) 5924 1.1 mrg { 5925 1.1 mrg case 2: 5926 1.1 mrg s = "_f16"; 5927 1.1 mrg break; 5928 1.1 mrg case 4: 5929 1.1 mrg s = "_f32"; 5930 1.1 mrg break; 5931 1.1 mrg case 8: 5932 1.1 mrg s = "_f64"; 5933 1.1 mrg break; 5934 1.1 mrg default: 5935 1.1 mrg output_operand_lossage ("invalid operand %%xn code"); 5936 1.1 mrg return; 5937 1.1 mrg } 5938 1.1 mrg else if (min32_p) 5939 1.1 mrg switch (GET_MODE_SIZE (mode)) 5940 1.1 mrg { 5941 1.1 mrg case 1: 5942 1.1 mrg case 2: 5943 1.1 mrg case 4: 5944 1.1 mrg s = signed_p ? "_i32" : "_u32"; 5945 1.1 mrg break; 5946 1.1 mrg case 8: 5947 1.1 mrg s = signed_p ? "_i64" : "_u64"; 5948 1.1 mrg break; 5949 1.1 mrg default: 5950 1.1 mrg output_operand_lossage ("invalid operand %%xn code"); 5951 1.1 mrg return; 5952 1.1 mrg } 5953 1.1 mrg else 5954 1.1 mrg switch (GET_MODE_SIZE (mode)) 5955 1.1 mrg { 5956 1.1 mrg case 1: 5957 1.1 mrg s = signed_p ? "_i8" : "_u8"; 5958 1.1 mrg break; 5959 1.1 mrg case 2: 5960 1.1 mrg s = signed_p ? "_i16" : "_u16"; 5961 1.1 mrg break; 5962 1.1 mrg case 4: 5963 1.1 mrg s = signed_p ? "_i32" : "_u32"; 5964 1.1 mrg break; 5965 1.1 mrg case 8: 5966 1.1 mrg s = signed_p ? "_i64" : "_u64"; 5967 1.1 mrg break; 5968 1.1 mrg default: 5969 1.1 mrg output_operand_lossage ("invalid operand %%xn code"); 5970 1.1 mrg return; 5971 1.1 mrg } 5972 1.1 mrg fputs (s, file); 5973 1.1 mrg } 5974 1.1 mrg return; 5975 1.1 mrg /* Print operand size as untyped suffix. */ 5976 1.1 mrg case 'o': 5977 1.1 mrg { 5978 1.1 mrg const char *s = 0; 5979 1.1 mrg switch (GET_MODE_SIZE (GET_MODE (x))) 5980 1.1 mrg { 5981 1.1 mrg case 1: 5982 1.1 mrg s = "_ubyte"; 5983 1.1 mrg break; 5984 1.1 mrg case 2: 5985 1.1 mrg s = "_ushort"; 5986 1.1 mrg break; 5987 1.1 mrg /* The following are full-vector variants. */ 5988 1.1 mrg case 64: 5989 1.1 mrg s = "_ubyte"; 5990 1.1 mrg break; 5991 1.1 mrg case 128: 5992 1.1 mrg s = "_ushort"; 5993 1.1 mrg break; 5994 1.1 mrg } 5995 1.1 mrg 5996 1.1 mrg if (s) 5997 1.1 mrg { 5998 1.1 mrg fputs (s, file); 5999 1.1 mrg return; 6000 1.1 mrg } 6001 1.1 mrg 6002 1.1 mrg /* Fall-through - the other cases for 'o' are the same as for 's'. */ 6003 1.1 mrg gcc_fallthrough(); 6004 1.1 mrg } 6005 1.1 mrg case 's': 6006 1.1 mrg { 6007 1.1 mrg const char *s = ""; 6008 1.1 mrg switch (GET_MODE_SIZE (GET_MODE (x))) 6009 1.1 mrg { 6010 1.1 mrg case 1: 6011 1.1 mrg s = "_byte"; 6012 1.1 mrg break; 6013 1.1 mrg case 2: 6014 1.1 mrg s = "_short"; 6015 1.1 mrg break; 6016 1.1 mrg case 4: 6017 1.1 mrg s = "_dword"; 6018 1.1 mrg break; 6019 1.1 mrg case 8: 6020 1.1 mrg s = "_dwordx2"; 6021 1.1 mrg break; 6022 1.1 mrg case 12: 6023 1.1 mrg s = "_dwordx3"; 6024 1.1 mrg break; 6025 1.1 mrg case 16: 6026 1.1 mrg s = "_dwordx4"; 6027 1.1 mrg break; 6028 1.1 mrg case 32: 6029 1.1 mrg s = "_dwordx8"; 6030 1.1 mrg break; 6031 1.1 mrg case 64: 6032 1.1 mrg s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16"; 6033 1.1 mrg break; 6034 1.1 mrg /* The following are full-vector variants. */ 6035 1.1 mrg case 128: 6036 1.1 mrg s = "_short"; 6037 1.1 mrg break; 6038 1.1 mrg case 256: 6039 1.1 mrg s = "_dword"; 6040 1.1 mrg break; 6041 1.1 mrg case 512: 6042 1.1 mrg s = "_dwordx2"; 6043 1.1 mrg break; 6044 1.1 mrg default: 6045 1.1 mrg output_operand_lossage ("invalid operand %%xn code"); 6046 1.1 mrg return; 6047 1.1 mrg } 6048 1.1 mrg fputs (s, file); 6049 1.1 mrg } 6050 1.1 mrg return; 6051 1.1 mrg case 'A': 6052 1.1 mrg if (xcode != MEM) 6053 1.1 mrg { 6054 1.1 mrg output_operand_lossage ("invalid %%xn code"); 6055 1.1 mrg return; 6056 1.1 mrg } 6057 1.1 mrg print_operand_address (file, x); 6058 1.1 mrg return; 6059 1.1 mrg case 'O': 6060 1.1 mrg { 6061 1.1 mrg if (xcode != MEM) 6062 1.1 mrg { 6063 1.1 mrg output_operand_lossage ("invalid %%xn code"); 6064 1.1 mrg return; 6065 1.1 mrg } 6066 1.1 mrg if (AS_GDS_P (MEM_ADDR_SPACE (x))) 6067 1.1 mrg fprintf (file, " gds"); 6068 1.1 mrg 6069 1.1 mrg rtx x0 = XEXP (x, 0); 6070 1.1 mrg if (AS_GLOBAL_P (MEM_ADDR_SPACE (x))) 6071 1.1 mrg { 6072 1.1 mrg gcc_assert (TARGET_GCN5_PLUS); 6073 1.1 mrg 6074 1.1 mrg fprintf (file, ", "); 6075 1.1 mrg 6076 1.1 mrg rtx base = x0; 6077 1.1 mrg rtx const_offset = NULL_RTX; 6078 1.1 mrg 6079 1.1 mrg if (GET_CODE (base) == PLUS) 6080 1.1 mrg { 6081 1.1 mrg rtx offset = XEXP (x0, 1); 6082 1.1 mrg base = XEXP (x0, 0); 6083 1.1 mrg 6084 1.1 mrg if (GET_CODE (base) == PLUS) 6085 1.1 mrg /* (SGPR + VGPR) + CONST */ 6086 1.1 mrg /* Ignore the VGPR offset for this operand. */ 6087 1.1 mrg base = XEXP (base, 0); 6088 1.1 mrg 6089 1.1 mrg if (CONST_INT_P (offset)) 6090 1.1 mrg const_offset = XEXP (x0, 1); 6091 1.1 mrg else if (REG_P (offset)) 6092 1.1 mrg /* SGPR + VGPR */ 6093 1.1 mrg /* Ignore the VGPR offset for this operand. */ 6094 1.1 mrg ; 6095 1.1 mrg else 6096 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address"); 6097 1.1 mrg } 6098 1.1 mrg 6099 1.1 mrg if (REG_P (base)) 6100 1.1 mrg { 6101 1.1 mrg if (VGPR_REGNO_P (REGNO (base))) 6102 1.1 mrg /* The VGPR address is specified in the %A operand. */ 6103 1.1 mrg fprintf (file, "off"); 6104 1.1 mrg else if (SGPR_REGNO_P (REGNO (base))) 6105 1.1 mrg print_reg (file, base); 6106 1.1 mrg else 6107 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address"); 6108 1.1 mrg } 6109 1.1 mrg else 6110 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address"); 6111 1.1 mrg 6112 1.1 mrg if (const_offset != NULL_RTX) 6113 1.1 mrg fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, 6114 1.1 mrg INTVAL (const_offset)); 6115 1.1 mrg 6116 1.1 mrg return; 6117 1.1 mrg } 6118 1.1 mrg 6119 1.1 mrg if (GET_CODE (x0) == REG) 6120 1.1 mrg return; 6121 1.1 mrg if (GET_CODE (x0) != PLUS) 6122 1.1 mrg { 6123 1.1 mrg output_operand_lossage ("invalid %%xn code"); 6124 1.1 mrg return; 6125 1.1 mrg } 6126 1.1 mrg rtx val = XEXP (x0, 1); 6127 1.1 mrg if (GET_CODE (val) == CONST_VECTOR) 6128 1.1 mrg val = CONST_VECTOR_ELT (val, 0); 6129 1.1 mrg if (GET_CODE (val) != CONST_INT) 6130 1.1 mrg { 6131 1.1 mrg output_operand_lossage ("invalid %%xn code"); 6132 1.1 mrg return; 6133 1.1 mrg } 6134 1.1 mrg fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (val)); 6135 1.1 mrg 6136 1.1 mrg } 6137 1.1 mrg return; 6138 1.1 mrg case 'c': 6139 1.1 mrg invert = true; 6140 1.1 mrg /* Fall through. */ 6141 1.1 mrg case 'C': 6142 1.1 mrg { 6143 1.1 mrg const char *s; 6144 1.1 mrg bool num = false; 6145 1.1 mrg if ((xcode != EQ && xcode != NE) || !REG_P (XEXP (x, 0))) 6146 1.1 mrg { 6147 1.1 mrg output_operand_lossage ("invalid %%xn code"); 6148 1.1 mrg return; 6149 1.1 mrg } 6150 1.1 mrg switch (REGNO (XEXP (x, 0))) 6151 1.1 mrg { 6152 1.1 mrg case VCC_REG: 6153 1.1 mrg case VCCZ_REG: 6154 1.1 mrg s = "_vcc"; 6155 1.1 mrg break; 6156 1.1 mrg case SCC_REG: 6157 1.1 mrg /* For some reason llvm-mc insists on scc0 instead of sccz. */ 6158 1.1 mrg num = true; 6159 1.1 mrg s = "_scc"; 6160 1.1 mrg break; 6161 1.1 mrg case EXECZ_REG: 6162 1.1 mrg s = "_exec"; 6163 1.1 mrg break; 6164 1.1 mrg default: 6165 1.1 mrg output_operand_lossage ("invalid %%xn code"); 6166 1.1 mrg return; 6167 1.1 mrg } 6168 1.1 mrg fputs (s, file); 6169 1.1 mrg if (xcode == (invert ? NE : EQ)) 6170 1.1 mrg fputc (num ? '0' : 'z', file); 6171 1.1 mrg else 6172 1.1 mrg fputs (num ? "1" : "nz", file); 6173 1.1 mrg return; 6174 1.1 mrg } 6175 1.1 mrg case 'D': 6176 1.1 mrg { 6177 1.1 mrg const char *s; 6178 1.1 mrg bool cmp_signed = false; 6179 1.1 mrg switch (xcode) 6180 1.1 mrg { 6181 1.1 mrg case EQ: 6182 1.1 mrg s = "_eq_"; 6183 1.1 mrg break; 6184 1.1 mrg case NE: 6185 1.1 mrg s = "_lg_"; 6186 1.1 mrg break; 6187 1.1 mrg case LT: 6188 1.1 mrg s = "_lt_"; 6189 1.1 mrg cmp_signed = true; 6190 1.1 mrg break; 6191 1.1 mrg case LE: 6192 1.1 mrg s = "_le_"; 6193 1.1 mrg cmp_signed = true; 6194 1.1 mrg break; 6195 1.1 mrg case GT: 6196 1.1 mrg s = "_gt_"; 6197 1.1 mrg cmp_signed = true; 6198 1.1 mrg break; 6199 1.1 mrg case GE: 6200 1.1 mrg s = "_ge_"; 6201 1.1 mrg cmp_signed = true; 6202 1.1 mrg break; 6203 1.1 mrg case LTU: 6204 1.1 mrg s = "_lt_"; 6205 1.1 mrg break; 6206 1.1 mrg case LEU: 6207 1.1 mrg s = "_le_"; 6208 1.1 mrg break; 6209 1.1 mrg case GTU: 6210 1.1 mrg s = "_gt_"; 6211 1.1 mrg break; 6212 1.1 mrg case GEU: 6213 1.1 mrg s = "_ge_"; 6214 1.1 mrg break; 6215 1.1 mrg default: 6216 1.1 mrg output_operand_lossage ("invalid %%xn code"); 6217 1.1 mrg return; 6218 1.1 mrg } 6219 1.1 mrg fputs (s, file); 6220 1.1 mrg fputc (cmp_signed ? 'i' : 'u', file); 6221 1.1 mrg 6222 1.1 mrg machine_mode mode = GET_MODE (XEXP (x, 0)); 6223 1.1 mrg 6224 1.1 mrg if (mode == VOIDmode) 6225 1.1 mrg mode = GET_MODE (XEXP (x, 1)); 6226 1.1 mrg 6227 1.1 mrg /* If both sides are constants, then assume the instruction is in 6228 1.1 mrg SImode since s_cmp can only do integer compares. */ 6229 1.1 mrg if (mode == VOIDmode) 6230 1.1 mrg mode = SImode; 6231 1.1 mrg 6232 1.1 mrg switch (GET_MODE_SIZE (mode)) 6233 1.1 mrg { 6234 1.1 mrg case 4: 6235 1.1 mrg s = "32"; 6236 1.1 mrg break; 6237 1.1 mrg case 8: 6238 1.1 mrg s = "64"; 6239 1.1 mrg break; 6240 1.1 mrg default: 6241 1.1 mrg output_operand_lossage ("invalid operand %%xn code"); 6242 1.1 mrg return; 6243 1.1 mrg } 6244 1.1 mrg fputs (s, file); 6245 1.1 mrg return; 6246 1.1 mrg } 6247 1.1 mrg case 'E': 6248 1.1 mrg { 6249 1.1 mrg const char *s; 6250 1.1 mrg bool cmp_signed = false; 6251 1.1 mrg machine_mode mode = GET_MODE (XEXP (x, 0)); 6252 1.1 mrg 6253 1.1 mrg if (mode == VOIDmode) 6254 1.1 mrg mode = GET_MODE (XEXP (x, 1)); 6255 1.1 mrg 6256 1.1 mrg /* If both sides are constants, assume the instruction is in SFmode 6257 1.1 mrg if either operand is floating point, otherwise assume SImode. */ 6258 1.1 mrg if (mode == VOIDmode) 6259 1.1 mrg { 6260 1.1 mrg if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE 6261 1.1 mrg || GET_CODE (XEXP (x, 1)) == CONST_DOUBLE) 6262 1.1 mrg mode = SFmode; 6263 1.1 mrg else 6264 1.1 mrg mode = SImode; 6265 1.1 mrg } 6266 1.1 mrg 6267 1.1 mrg /* Use the same format code for vector comparisons. */ 6268 1.1 mrg if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT 6269 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 6270 1.1 mrg mode = GET_MODE_INNER (mode); 6271 1.1 mrg 6272 1.1 mrg bool float_p = GET_MODE_CLASS (mode) == MODE_FLOAT; 6273 1.1 mrg 6274 1.1 mrg switch (xcode) 6275 1.1 mrg { 6276 1.1 mrg case EQ: 6277 1.1 mrg s = "_eq_"; 6278 1.1 mrg break; 6279 1.1 mrg case NE: 6280 1.1 mrg s = float_p ? "_neq_" : "_ne_"; 6281 1.1 mrg break; 6282 1.1 mrg case LT: 6283 1.1 mrg s = "_lt_"; 6284 1.1 mrg cmp_signed = true; 6285 1.1 mrg break; 6286 1.1 mrg case LE: 6287 1.1 mrg s = "_le_"; 6288 1.1 mrg cmp_signed = true; 6289 1.1 mrg break; 6290 1.1 mrg case GT: 6291 1.1 mrg s = "_gt_"; 6292 1.1 mrg cmp_signed = true; 6293 1.1 mrg break; 6294 1.1 mrg case GE: 6295 1.1 mrg s = "_ge_"; 6296 1.1 mrg cmp_signed = true; 6297 1.1 mrg break; 6298 1.1 mrg case LTU: 6299 1.1 mrg s = "_lt_"; 6300 1.1 mrg break; 6301 1.1 mrg case LEU: 6302 1.1 mrg s = "_le_"; 6303 1.1 mrg break; 6304 1.1 mrg case GTU: 6305 1.1 mrg s = "_gt_"; 6306 1.1 mrg break; 6307 1.1 mrg case GEU: 6308 1.1 mrg s = "_ge_"; 6309 1.1 mrg break; 6310 1.1 mrg case ORDERED: 6311 1.1 mrg s = "_o_"; 6312 1.1 mrg break; 6313 1.1 mrg case UNORDERED: 6314 1.1 mrg s = "_u_"; 6315 1.1 mrg break; 6316 1.1 mrg case UNEQ: 6317 1.1 mrg s = "_nlg_"; 6318 1.1 mrg break; 6319 1.1 mrg case UNGE: 6320 1.1 mrg s = "_nlt_"; 6321 1.1 mrg break; 6322 1.1 mrg case UNGT: 6323 1.1 mrg s = "_nle_"; 6324 1.1 mrg break; 6325 1.1 mrg case UNLE: 6326 1.1 mrg s = "_ngt_"; 6327 1.1 mrg break; 6328 1.1 mrg case UNLT: 6329 1.1 mrg s = "_nge_"; 6330 1.1 mrg break; 6331 1.1 mrg case LTGT: 6332 1.1 mrg s = "_lg_"; 6333 1.1 mrg break; 6334 1.1 mrg default: 6335 1.1 mrg output_operand_lossage ("invalid %%xn code"); 6336 1.1 mrg return; 6337 1.1 mrg } 6338 1.1 mrg fputs (s, file); 6339 1.1 mrg fputc (float_p ? 'f' : cmp_signed ? 'i' : 'u', file); 6340 1.1 mrg 6341 1.1 mrg switch (GET_MODE_SIZE (mode)) 6342 1.1 mrg { 6343 1.1 mrg case 1: 6344 1.1 mrg output_operand_lossage ("operand %%xn code invalid for QImode"); 6345 1.1 mrg return; 6346 1.1 mrg case 2: 6347 1.1 mrg s = "16"; 6348 1.1 mrg break; 6349 1.1 mrg case 4: 6350 1.1 mrg s = "32"; 6351 1.1 mrg break; 6352 1.1 mrg case 8: 6353 1.1 mrg s = "64"; 6354 1.1 mrg break; 6355 1.1 mrg default: 6356 1.1 mrg output_operand_lossage ("invalid operand %%xn code"); 6357 1.1 mrg return; 6358 1.1 mrg } 6359 1.1 mrg fputs (s, file); 6360 1.1 mrg return; 6361 1.1 mrg } 6362 1.1 mrg case 'L': 6363 1.1 mrg print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0); 6364 1.1 mrg return; 6365 1.1 mrg case 'H': 6366 1.1 mrg print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0); 6367 1.1 mrg return; 6368 1.1 mrg case 'R': 6369 1.1 mrg /* Print a scalar register number as an integer. Temporary hack. */ 6370 1.1 mrg gcc_assert (REG_P (x)); 6371 1.1 mrg fprintf (file, "%u", (int) REGNO (x)); 6372 1.1 mrg return; 6373 1.1 mrg case 'V': 6374 1.1 mrg /* Print a vector register number as an integer. Temporary hack. */ 6375 1.1 mrg gcc_assert (REG_P (x)); 6376 1.1 mrg fprintf (file, "%u", (int) REGNO (x) - FIRST_VGPR_REG); 6377 1.1 mrg return; 6378 1.1 mrg case 0: 6379 1.1 mrg if (xcode == REG) 6380 1.1 mrg print_reg (file, x); 6381 1.1 mrg else if (xcode == MEM) 6382 1.1 mrg output_address (GET_MODE (x), x); 6383 1.1 mrg else if (xcode == CONST_INT) 6384 1.1 mrg fprintf (file, "%i", (int) INTVAL (x)); 6385 1.1 mrg else if (xcode == CONST_VECTOR) 6386 1.1 mrg print_operand (file, CONST_VECTOR_ELT (x, 0), code); 6387 1.1 mrg else if (xcode == CONST_DOUBLE) 6388 1.1 mrg { 6389 1.1 mrg const char *str; 6390 1.1 mrg switch (gcn_inline_fp_constant_p (x, false)) 6391 1.1 mrg { 6392 1.1 mrg case 240: 6393 1.1 mrg str = "0.5"; 6394 1.1 mrg break; 6395 1.1 mrg case 241: 6396 1.1 mrg str = "-0.5"; 6397 1.1 mrg break; 6398 1.1 mrg case 242: 6399 1.1 mrg str = "1.0"; 6400 1.1 mrg break; 6401 1.1 mrg case 243: 6402 1.1 mrg str = "-1.0"; 6403 1.1 mrg break; 6404 1.1 mrg case 244: 6405 1.1 mrg str = "2.0"; 6406 1.1 mrg break; 6407 1.1 mrg case 245: 6408 1.1 mrg str = "-2.0"; 6409 1.1 mrg break; 6410 1.1 mrg case 246: 6411 1.1 mrg str = "4.0"; 6412 1.1 mrg break; 6413 1.1 mrg case 247: 6414 1.1 mrg str = "-4.0"; 6415 1.1 mrg break; 6416 1.1 mrg case 248: 6417 1.1 mrg str = "1/pi"; 6418 1.1 mrg break; 6419 1.1 mrg default: 6420 1.1 mrg rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode 6421 1.1 mrg ? DImode : SImode, 6422 1.1 mrg x, GET_MODE (x), 0); 6423 1.1 mrg if (x) 6424 1.1 mrg print_operand (file, ix, code); 6425 1.1 mrg else 6426 1.1 mrg output_operand_lossage ("invalid fp constant"); 6427 1.1 mrg return; 6428 1.1 mrg break; 6429 1.1 mrg } 6430 1.1 mrg fprintf (file, str); 6431 1.1 mrg return; 6432 1.1 mrg } 6433 1.1 mrg else 6434 1.1 mrg output_addr_const (file, x); 6435 1.1 mrg return; 6436 1.1 mrg case '^': 6437 1.1 mrg if (TARGET_GCN5_PLUS) 6438 1.1 mrg fputs ("_co", file); 6439 1.1 mrg return; 6440 1.1 mrg case 'g': 6441 1.1 mrg gcc_assert (xcode == MEM); 6442 1.1 mrg if (MEM_VOLATILE_P (x)) 6443 1.1 mrg fputs (" glc", file); 6444 1.1 mrg return; 6445 1.1 mrg default: 6446 1.1 mrg output_operand_lossage ("invalid %%xn code"); 6447 1.1 mrg } 6448 1.1 mrg gcc_unreachable (); 6449 1.1 mrg } 6450 1.1 mrg 6451 1.1 mrg /* Implement DBX_REGISTER_NUMBER macro. 6452 1.1 mrg 6453 1.1 mrg Return the DWARF register number that corresponds to the GCC internal 6454 1.1 mrg REGNO. */ 6455 1.1 mrg 6456 1.1 mrg unsigned int 6457 1.1 mrg gcn_dwarf_register_number (unsigned int regno) 6458 1.1 mrg { 6459 1.1 mrg /* Registers defined in DWARF. */ 6460 1.1 mrg if (regno == EXEC_LO_REG) 6461 1.1 mrg return 17; 6462 1.1 mrg /* We need to use a more complex DWARF expression for this 6463 1.1 mrg else if (regno == EXEC_HI_REG) 6464 1.1 mrg return 17; */ 6465 1.1 mrg else if (regno == VCC_LO_REG) 6466 1.1 mrg return 768; 6467 1.1 mrg /* We need to use a more complex DWARF expression for this 6468 1.1 mrg else if (regno == VCC_HI_REG) 6469 1.1 mrg return 768; */ 6470 1.1 mrg else if (regno == SCC_REG) 6471 1.1 mrg return 128; 6472 1.1 mrg else if (regno == DWARF_LINK_REGISTER) 6473 1.1 mrg return 16; 6474 1.1 mrg else if (SGPR_REGNO_P (regno)) 6475 1.1 mrg { 6476 1.1 mrg if (regno - FIRST_SGPR_REG < 64) 6477 1.1 mrg return (regno - FIRST_SGPR_REG + 32); 6478 1.1 mrg else 6479 1.1 mrg return (regno - FIRST_SGPR_REG + 1024); 6480 1.1 mrg } 6481 1.1 mrg else if (VGPR_REGNO_P (regno)) 6482 1.1 mrg return (regno - FIRST_VGPR_REG + 2560); 6483 1.1 mrg 6484 1.1 mrg /* Otherwise, there's nothing sensible to do. */ 6485 1.1 mrg return regno + 100000; 6486 1.1 mrg } 6487 1.1 mrg 6488 1.1 mrg /* Implement TARGET_DWARF_REGISTER_SPAN. 6489 1.1 mrg 6490 1.1 mrg DImode and Vector DImode require additional registers. */ 6491 1.1 mrg 6492 1.1 mrg static rtx 6493 1.1 mrg gcn_dwarf_register_span (rtx rtl) 6494 1.1 mrg { 6495 1.1 mrg machine_mode mode = GET_MODE (rtl); 6496 1.1 mrg 6497 1.1 mrg if (VECTOR_MODE_P (mode)) 6498 1.1 mrg mode = GET_MODE_INNER (mode); 6499 1.1 mrg 6500 1.1 mrg if (GET_MODE_SIZE (mode) != 8) 6501 1.1 mrg return NULL_RTX; 6502 1.1 mrg 6503 1.1 mrg unsigned regno = REGNO (rtl); 6504 1.1 mrg 6505 1.1 mrg if (regno == DWARF_LINK_REGISTER) 6506 1.1 mrg return NULL_RTX; 6507 1.1 mrg 6508 1.1 mrg rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); 6509 1.1 mrg XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno); 6510 1.1 mrg XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1); 6511 1.1 mrg 6512 1.1 mrg return p; 6513 1.1 mrg } 6514 1.1 mrg 6515 1.1 mrg /* }}} */ 6516 1.1 mrg /* {{{ TARGET hook overrides. */ 6517 1.1 mrg 6518 1.1 mrg #undef TARGET_ADDR_SPACE_ADDRESS_MODE 6519 1.1 mrg #define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode 6520 1.1 mrg #undef TARGET_ADDR_SPACE_DEBUG 6521 1.1 mrg #define TARGET_ADDR_SPACE_DEBUG gcn_addr_space_debug 6522 1.1 mrg #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P 6523 1.1 mrg #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \ 6524 1.1 mrg gcn_addr_space_legitimate_address_p 6525 1.1 mrg #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS 6526 1.1 mrg #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address 6527 1.1 mrg #undef TARGET_ADDR_SPACE_POINTER_MODE 6528 1.1 mrg #define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode 6529 1.1 mrg #undef TARGET_ADDR_SPACE_SUBSET_P 6530 1.1 mrg #define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p 6531 1.1 mrg #undef TARGET_ADDR_SPACE_CONVERT 6532 1.1 mrg #define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert 6533 1.1 mrg #undef TARGET_ARG_PARTIAL_BYTES 6534 1.1 mrg #define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes 6535 1.1 mrg #undef TARGET_ASM_ALIGNED_DI_OP 6536 1.1 mrg #define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t" 6537 1.1 mrg #undef TARGET_ASM_FILE_START 6538 1.1 mrg #define TARGET_ASM_FILE_START output_file_start 6539 1.1 mrg #undef TARGET_ASM_FUNCTION_PROLOGUE 6540 1.1 mrg #define TARGET_ASM_FUNCTION_PROLOGUE gcn_target_asm_function_prologue 6541 1.1 mrg #undef TARGET_ASM_SELECT_SECTION 6542 1.1 mrg #define TARGET_ASM_SELECT_SECTION gcn_asm_select_section 6543 1.1 mrg #undef TARGET_ASM_TRAMPOLINE_TEMPLATE 6544 1.1 mrg #define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template 6545 1.1 mrg #undef TARGET_ATTRIBUTE_TABLE 6546 1.1 mrg #define TARGET_ATTRIBUTE_TABLE gcn_attribute_table 6547 1.1 mrg #undef TARGET_BUILTIN_DECL 6548 1.1 mrg #define TARGET_BUILTIN_DECL gcn_builtin_decl 6549 1.1 mrg #undef TARGET_CAN_CHANGE_MODE_CLASS 6550 1.1 mrg #define TARGET_CAN_CHANGE_MODE_CLASS gcn_can_change_mode_class 6551 1.1 mrg #undef TARGET_CAN_ELIMINATE 6552 1.1 mrg #define TARGET_CAN_ELIMINATE gcn_can_eliminate_p 6553 1.1 mrg #undef TARGET_CANNOT_COPY_INSN_P 6554 1.1 mrg #define TARGET_CANNOT_COPY_INSN_P gcn_cannot_copy_insn_p 6555 1.1 mrg #undef TARGET_CLASS_LIKELY_SPILLED_P 6556 1.1 mrg #define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p 6557 1.1 mrg #undef TARGET_CLASS_MAX_NREGS 6558 1.1 mrg #define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs 6559 1.1 mrg #undef TARGET_CONDITIONAL_REGISTER_USAGE 6560 1.1 mrg #define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage 6561 1.1 mrg #undef TARGET_CONSTANT_ALIGNMENT 6562 1.1 mrg #define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment 6563 1.1 mrg #undef TARGET_DEBUG_UNWIND_INFO 6564 1.1 mrg #define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info 6565 1.1 mrg #undef TARGET_DWARF_REGISTER_SPAN 6566 1.1 mrg #define TARGET_DWARF_REGISTER_SPAN gcn_dwarf_register_span 6567 1.1 mrg #undef TARGET_EMUTLS_VAR_INIT 6568 1.1 mrg #define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init 6569 1.1 mrg #undef TARGET_EXPAND_BUILTIN 6570 1.1 mrg #define TARGET_EXPAND_BUILTIN gcn_expand_builtin 6571 1.1 mrg #undef TARGET_FRAME_POINTER_REQUIRED 6572 1.1 mrg #define TARGET_FRAME_POINTER_REQUIRED gcn_frame_pointer_rqd 6573 1.1 mrg #undef TARGET_FUNCTION_ARG 6574 1.1 mrg #undef TARGET_FUNCTION_ARG_ADVANCE 6575 1.1 mrg #define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance 6576 1.1 mrg #define TARGET_FUNCTION_ARG gcn_function_arg 6577 1.1 mrg #undef TARGET_FUNCTION_VALUE 6578 1.1 mrg #define TARGET_FUNCTION_VALUE gcn_function_value 6579 1.1 mrg #undef TARGET_FUNCTION_VALUE_REGNO_P 6580 1.1 mrg #define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p 6581 1.1 mrg #undef TARGET_GIMPLIFY_VA_ARG_EXPR 6582 1.1 mrg #define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr 6583 1.1 mrg #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA 6584 1.1 mrg #define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa 6585 1.1 mrg #undef TARGET_GOACC_ADJUST_PRIVATE_DECL 6586 1.1 mrg #define TARGET_GOACC_ADJUST_PRIVATE_DECL gcn_goacc_adjust_private_decl 6587 1.1 mrg #undef TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD 6588 1.1 mrg #define TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD \ 6589 1.1 mrg gcn_goacc_create_worker_broadcast_record 6590 1.1 mrg #undef TARGET_GOACC_FORK_JOIN 6591 1.1 mrg #define TARGET_GOACC_FORK_JOIN gcn_fork_join 6592 1.1 mrg #undef TARGET_GOACC_REDUCTION 6593 1.1 mrg #define TARGET_GOACC_REDUCTION gcn_goacc_reduction 6594 1.1 mrg #undef TARGET_GOACC_VALIDATE_DIMS 6595 1.1 mrg #define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims 6596 1.1 mrg #undef TARGET_GOACC_SHARED_MEM_LAYOUT 6597 1.1 mrg #define TARGET_GOACC_SHARED_MEM_LAYOUT gcn_shared_mem_layout 6598 1.1 mrg #undef TARGET_HARD_REGNO_MODE_OK 6599 1.1 mrg #define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok 6600 1.1 mrg #undef TARGET_HARD_REGNO_NREGS 6601 1.1 mrg #define TARGET_HARD_REGNO_NREGS gcn_hard_regno_nregs 6602 1.1 mrg #undef TARGET_HAVE_SPECULATION_SAFE_VALUE 6603 1.1 mrg #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed 6604 1.1 mrg #undef TARGET_INIT_BUILTINS 6605 1.1 mrg #define TARGET_INIT_BUILTINS gcn_init_builtins 6606 1.1 mrg #undef TARGET_INIT_LIBFUNCS 6607 1.1 mrg #define TARGET_INIT_LIBFUNCS gcn_init_libfuncs 6608 1.1 mrg #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS 6609 1.1 mrg #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \ 6610 1.1 mrg gcn_ira_change_pseudo_allocno_class 6611 1.1 mrg #undef TARGET_LEGITIMATE_CONSTANT_P 6612 1.1 mrg #define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p 6613 1.1 mrg #undef TARGET_LRA_P 6614 1.1 mrg #define TARGET_LRA_P hook_bool_void_true 6615 1.1 mrg #undef TARGET_MACHINE_DEPENDENT_REORG 6616 1.1 mrg #define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg 6617 1.1 mrg #undef TARGET_MEMORY_MOVE_COST 6618 1.1 mrg #define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost 6619 1.1 mrg #undef TARGET_MODES_TIEABLE_P 6620 1.1 mrg #define TARGET_MODES_TIEABLE_P gcn_modes_tieable_p 6621 1.1 mrg #undef TARGET_OPTION_OVERRIDE 6622 1.1 mrg #define TARGET_OPTION_OVERRIDE gcn_option_override 6623 1.1 mrg #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED 6624 1.1 mrg #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED \ 6625 1.1 mrg gcn_pretend_outgoing_varargs_named 6626 1.1 mrg #undef TARGET_PROMOTE_FUNCTION_MODE 6627 1.1 mrg #define TARGET_PROMOTE_FUNCTION_MODE gcn_promote_function_mode 6628 1.1 mrg #undef TARGET_REGISTER_MOVE_COST 6629 1.1 mrg #define TARGET_REGISTER_MOVE_COST gcn_register_move_cost 6630 1.1 mrg #undef TARGET_RETURN_IN_MEMORY 6631 1.1 mrg #define TARGET_RETURN_IN_MEMORY gcn_return_in_memory 6632 1.1 mrg #undef TARGET_RTX_COSTS 6633 1.1 mrg #define TARGET_RTX_COSTS gcn_rtx_costs 6634 1.1 mrg #undef TARGET_SECONDARY_RELOAD 6635 1.1 mrg #define TARGET_SECONDARY_RELOAD gcn_secondary_reload 6636 1.1 mrg #undef TARGET_SECTION_TYPE_FLAGS 6637 1.1 mrg #define TARGET_SECTION_TYPE_FLAGS gcn_section_type_flags 6638 1.1 mrg #undef TARGET_SCALAR_MODE_SUPPORTED_P 6639 1.1 mrg #define TARGET_SCALAR_MODE_SUPPORTED_P gcn_scalar_mode_supported_p 6640 1.1 mrg #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P 6641 1.1 mrg #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \ 6642 1.1 mrg gcn_small_register_classes_for_mode_p 6643 1.1 mrg #undef TARGET_SPILL_CLASS 6644 1.1 mrg #define TARGET_SPILL_CLASS gcn_spill_class 6645 1.1 mrg #undef TARGET_STRICT_ARGUMENT_NAMING 6646 1.1 mrg #define TARGET_STRICT_ARGUMENT_NAMING gcn_strict_argument_naming 6647 1.1 mrg #undef TARGET_TRAMPOLINE_INIT 6648 1.1 mrg #define TARGET_TRAMPOLINE_INIT gcn_trampoline_init 6649 1.1 mrg #undef TARGET_TRULY_NOOP_TRUNCATION 6650 1.1 mrg #define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation 6651 1.1 mrg #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 6652 1.1 mrg #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost 6653 1.1 mrg #undef TARGET_VECTORIZE_GET_MASK_MODE 6654 1.1 mrg #define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode 6655 1.1 mrg #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 6656 1.1 mrg #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode 6657 1.1 mrg #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT 6658 1.1 mrg #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \ 6659 1.1 mrg gcn_preferred_vector_alignment 6660 1.1 mrg #undef TARGET_VECTORIZE_RELATED_MODE 6661 1.1 mrg #define TARGET_VECTORIZE_RELATED_MODE gcn_related_vector_mode 6662 1.1 mrg #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT 6663 1.1 mrg #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ 6664 1.1 mrg gcn_vectorize_support_vector_misalignment 6665 1.1 mrg #undef TARGET_VECTORIZE_VEC_PERM_CONST 6666 1.1 mrg #define TARGET_VECTORIZE_VEC_PERM_CONST gcn_vectorize_vec_perm_const 6667 1.1 mrg #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE 6668 1.1 mrg #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ 6669 1.1 mrg gcn_vector_alignment_reachable 6670 1.1 mrg #undef TARGET_VECTOR_MODE_SUPPORTED_P 6671 1.1 mrg #define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p 6672 1.1 mrg 6673 1.1 mrg struct gcc_target targetm = TARGET_INITIALIZER; 6674 1.1 mrg 6675 1.1 mrg #include "gt-gcn.h" 6676 1.1 mrg /* }}} */ 6677