gcn.cc revision 1.1 1 1.1 mrg /* Copyright (C) 2016-2022 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is free software; you can redistribute it and/or modify it under
4 1.1 mrg the terms of the GNU General Public License as published by the Free
5 1.1 mrg Software Foundation; either version 3 of the License, or (at your option)
6 1.1 mrg any later version.
7 1.1 mrg
8 1.1 mrg This file is distributed in the hope that it will be useful, but WITHOUT
9 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 1.1 mrg for more details.
12 1.1 mrg
13 1.1 mrg You should have received a copy of the GNU General Public License
14 1.1 mrg along with GCC; see the file COPYING3. If not see
15 1.1 mrg <http://www.gnu.org/licenses/>. */
16 1.1 mrg
17 1.1 mrg /* {{{ Includes. */
18 1.1 mrg
19 1.1 mrg /* We want GET_MODE_SIZE et al to return integers, please. */
20 1.1 mrg #define IN_TARGET_CODE 1
21 1.1 mrg
22 1.1 mrg #include "config.h"
23 1.1 mrg #include "system.h"
24 1.1 mrg #include "coretypes.h"
25 1.1 mrg #include "backend.h"
26 1.1 mrg #include "target.h"
27 1.1 mrg #include "memmodel.h"
28 1.1 mrg #include "rtl.h"
29 1.1 mrg #include "tree.h"
30 1.1 mrg #include "df.h"
31 1.1 mrg #include "tm_p.h"
32 1.1 mrg #include "stringpool.h"
33 1.1 mrg #include "optabs.h"
34 1.1 mrg #include "regs.h"
35 1.1 mrg #include "emit-rtl.h"
36 1.1 mrg #include "recog.h"
37 1.1 mrg #include "diagnostic-core.h"
38 1.1 mrg #include "insn-attr.h"
39 1.1 mrg #include "fold-const.h"
40 1.1 mrg #include "calls.h"
41 1.1 mrg #include "explow.h"
42 1.1 mrg #include "expr.h"
43 1.1 mrg #include "output.h"
44 1.1 mrg #include "cfgrtl.h"
45 1.1 mrg #include "langhooks.h"
46 1.1 mrg #include "builtins.h"
47 1.1 mrg #include "omp-general.h"
48 1.1 mrg #include "print-rtl.h"
49 1.1 mrg #include "attribs.h"
50 1.1 mrg #include "varasm.h"
51 1.1 mrg #include "intl.h"
52 1.1 mrg #include "rtl-iter.h"
53 1.1 mrg #include "dwarf2.h"
54 1.1 mrg #include "gimple.h"
55 1.1 mrg
56 1.1 mrg /* This file should be included last. */
57 1.1 mrg #include "target-def.h"
58 1.1 mrg
59 1.1 mrg /* }}} */
60 1.1 mrg /* {{{ Global variables. */
61 1.1 mrg
62 1.1 mrg /* Constants used by FP instructions. */
63 1.1 mrg
64 1.1 mrg static REAL_VALUE_TYPE dconst4, dconst1over2pi;
65 1.1 mrg static bool ext_gcn_constants_init = 0;
66 1.1 mrg
67 1.1 mrg /* Holds the ISA variant, derived from the command line parameters. */
68 1.1 mrg
69 1.1 mrg int gcn_isa = 3; /* Default to GCN3. */
70 1.1 mrg
71 1.1 mrg /* Reserve this much space for LDS (for propagating variables from
72 1.1 mrg worker-single mode to worker-partitioned mode), per workgroup. Global
73 1.1 mrg analysis could calculate an exact bound, but we don't do that yet.
74 1.1 mrg
75 1.1 mrg We want to permit full occupancy, so size accordingly. */
76 1.1 mrg
77 1.1 mrg /* Use this as a default, but allow it to grow if the user requests a large
78 1.1 mrg amount of gang-private shared-memory space. */
79 1.1 mrg static int acc_lds_size = 0x600;
80 1.1 mrg
81 1.1 mrg #define OMP_LDS_SIZE 0x600 /* 0x600 is 1/40 total, rounded down. */
82 1.1 mrg #define ACC_LDS_SIZE acc_lds_size
83 1.1 mrg #define OTHER_LDS_SIZE 65536 /* If in doubt, reserve all of it. */
84 1.1 mrg
85 1.1 mrg #define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \
86 1.1 mrg : flag_openmp ? OMP_LDS_SIZE \
87 1.1 mrg : OTHER_LDS_SIZE)
88 1.1 mrg
89 1.1 mrg static int gang_private_hwm = 32;
90 1.1 mrg static hash_map<tree, int> lds_allocs;
91 1.1 mrg
92 1.1 mrg /* The number of registers usable by normal non-kernel functions.
93 1.1 mrg The SGPR count includes any special extra registers such as VCC. */
94 1.1 mrg
95 1.1 mrg #define MAX_NORMAL_SGPR_COUNT 62 // i.e. 64 with VCC
96 1.1 mrg #define MAX_NORMAL_VGPR_COUNT 24
97 1.1 mrg
98 1.1 mrg /* }}} */
99 1.1 mrg /* {{{ Initialization and options. */
100 1.1 mrg
101 1.1 mrg /* Initialize machine_function. */
102 1.1 mrg
103 1.1 mrg static struct machine_function *
104 1.1 mrg gcn_init_machine_status (void)
105 1.1 mrg {
106 1.1 mrg struct machine_function *f;
107 1.1 mrg
108 1.1 mrg f = ggc_cleared_alloc<machine_function> ();
109 1.1 mrg
110 1.1 mrg if (TARGET_GCN3)
111 1.1 mrg f->use_flat_addressing = true;
112 1.1 mrg
113 1.1 mrg return f;
114 1.1 mrg }
115 1.1 mrg
116 1.1 mrg /* Implement TARGET_OPTION_OVERRIDE.
117 1.1 mrg
118 1.1 mrg Override option settings where defaults are variable, or we have specific
119 1.1 mrg needs to consider. */
120 1.1 mrg
121 1.1 mrg static void
122 1.1 mrg gcn_option_override (void)
123 1.1 mrg {
124 1.1 mrg init_machine_status = gcn_init_machine_status;
125 1.1 mrg
126 1.1 mrg /* The HSA runtime does not respect ELF load addresses, so force PIE. */
127 1.1 mrg if (!flag_pie)
128 1.1 mrg flag_pie = 2;
129 1.1 mrg if (!flag_pic)
130 1.1 mrg flag_pic = flag_pie;
131 1.1 mrg
132 1.1 mrg gcn_isa = gcn_arch == PROCESSOR_FIJI ? 3 : 5;
133 1.1 mrg
134 1.1 mrg /* The default stack size needs to be small for offload kernels because
135 1.1 mrg there may be many, many threads. Also, a smaller stack gives a
136 1.1 mrg measureable performance boost. But, a small stack is insufficient
137 1.1 mrg for running the testsuite, so we use a larger default for the stand
138 1.1 mrg alone case. */
139 1.1 mrg if (stack_size_opt == -1)
140 1.1 mrg {
141 1.1 mrg if (flag_openacc || flag_openmp)
142 1.1 mrg /* 512 bytes per work item = 32kB total. */
143 1.1 mrg stack_size_opt = 512 * 64;
144 1.1 mrg else
145 1.1 mrg /* 1MB total. */
146 1.1 mrg stack_size_opt = 1048576;
147 1.1 mrg }
148 1.1 mrg
149 1.1 mrg /* Reserve 1Kb (somewhat arbitrarily) of LDS space for reduction results and
150 1.1 mrg worker broadcasts. */
151 1.1 mrg if (gang_private_size_opt == -1)
152 1.1 mrg gang_private_size_opt = 512;
153 1.1 mrg else if (gang_private_size_opt < gang_private_hwm)
154 1.1 mrg gang_private_size_opt = gang_private_hwm;
155 1.1 mrg else if (gang_private_size_opt >= acc_lds_size - 1024)
156 1.1 mrg {
157 1.1 mrg /* We need some space for reductions and worker broadcasting. If the
158 1.1 mrg user requests a large amount of gang-private LDS space, we might not
159 1.1 mrg have enough left for the former. Increase the LDS allocation in that
160 1.1 mrg case, although this may reduce the maximum occupancy on the
161 1.1 mrg hardware. */
162 1.1 mrg acc_lds_size = gang_private_size_opt + 1024;
163 1.1 mrg if (acc_lds_size > 32768)
164 1.1 mrg acc_lds_size = 32768;
165 1.1 mrg }
166 1.1 mrg
167 1.1 mrg /* The xnack option is a placeholder, for now. */
168 1.1 mrg if (flag_xnack)
169 1.1 mrg sorry ("XNACK support");
170 1.1 mrg }
171 1.1 mrg
172 1.1 mrg /* }}} */
173 1.1 mrg /* {{{ Attributes. */
174 1.1 mrg
175 1.1 mrg /* This table defines the arguments that are permitted in
176 1.1 mrg __attribute__ ((amdgpu_hsa_kernel (...))).
177 1.1 mrg
178 1.1 mrg The names and values correspond to the HSA metadata that is encoded
179 1.1 mrg into the assembler file and binary. */
180 1.1 mrg
181 1.1 mrg static const struct gcn_kernel_arg_type
182 1.1 mrg {
183 1.1 mrg const char *name;
184 1.1 mrg const char *header_pseudo;
185 1.1 mrg machine_mode mode;
186 1.1 mrg
187 1.1 mrg /* This should be set to -1 or -2 for a dynamically allocated register
188 1.1 mrg number. Use -1 if this argument contributes to the user_sgpr_count,
189 1.1 mrg -2 otherwise. */
190 1.1 mrg int fixed_regno;
191 1.1 mrg } gcn_kernel_arg_types[] = {
192 1.1 mrg {"exec", NULL, DImode, EXEC_REG},
193 1.1 mrg #define PRIVATE_SEGMENT_BUFFER_ARG 1
194 1.1 mrg {"private_segment_buffer",
195 1.1 mrg ".amdhsa_user_sgpr_private_segment_buffer", TImode, -1},
196 1.1 mrg #define DISPATCH_PTR_ARG 2
197 1.1 mrg {"dispatch_ptr", ".amdhsa_user_sgpr_dispatch_ptr", DImode, -1},
198 1.1 mrg #define QUEUE_PTR_ARG 3
199 1.1 mrg {"queue_ptr", ".amdhsa_user_sgpr_queue_ptr", DImode, -1},
200 1.1 mrg #define KERNARG_SEGMENT_PTR_ARG 4
201 1.1 mrg {"kernarg_segment_ptr", ".amdhsa_user_sgpr_kernarg_segment_ptr", DImode, -1},
202 1.1 mrg {"dispatch_id", ".amdhsa_user_sgpr_dispatch_id", DImode, -1},
203 1.1 mrg #define FLAT_SCRATCH_INIT_ARG 6
204 1.1 mrg {"flat_scratch_init", ".amdhsa_user_sgpr_flat_scratch_init", DImode, -1},
205 1.1 mrg #define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7
206 1.1 mrg {"private_segment_size", ".amdhsa_user_sgpr_private_segment_size", SImode, -1},
207 1.1 mrg #define WORKGROUP_ID_X_ARG 8
208 1.1 mrg {"workgroup_id_X", ".amdhsa_system_sgpr_workgroup_id_x", SImode, -2},
209 1.1 mrg {"workgroup_id_Y", ".amdhsa_system_sgpr_workgroup_id_y", SImode, -2},
210 1.1 mrg {"workgroup_id_Z", ".amdhsa_system_sgpr_workgroup_id_z", SImode, -2},
211 1.1 mrg {"workgroup_info", ".amdhsa_system_sgpr_workgroup_info", SImode, -1},
212 1.1 mrg #define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 12
213 1.1 mrg {"private_segment_wave_offset",
214 1.1 mrg ".amdhsa_system_sgpr_private_segment_wavefront_offset", SImode, -2},
215 1.1 mrg #define WORK_ITEM_ID_X_ARG 13
216 1.1 mrg {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG},
217 1.1 mrg #define WORK_ITEM_ID_Y_ARG 14
218 1.1 mrg {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1},
219 1.1 mrg #define WORK_ITEM_ID_Z_ARG 15
220 1.1 mrg {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
221 1.1 mrg };
222 1.1 mrg
223 1.1 mrg static const long default_requested_args
224 1.1 mrg = (1 << PRIVATE_SEGMENT_BUFFER_ARG)
225 1.1 mrg | (1 << DISPATCH_PTR_ARG)
226 1.1 mrg | (1 << QUEUE_PTR_ARG)
227 1.1 mrg | (1 << KERNARG_SEGMENT_PTR_ARG)
228 1.1 mrg | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
229 1.1 mrg | (1 << WORKGROUP_ID_X_ARG)
230 1.1 mrg | (1 << WORK_ITEM_ID_X_ARG)
231 1.1 mrg | (1 << WORK_ITEM_ID_Y_ARG)
232 1.1 mrg | (1 << WORK_ITEM_ID_Z_ARG);
233 1.1 mrg
234 1.1 mrg /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
235 1.1 mrg This function also sets the default values for some arguments.
236 1.1 mrg
237 1.1 mrg Return true on success, with ARGS populated. */
238 1.1 mrg
239 1.1 mrg static bool
240 1.1 mrg gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
241 1.1 mrg tree list)
242 1.1 mrg {
243 1.1 mrg bool err = false;
244 1.1 mrg args->requested = default_requested_args;
245 1.1 mrg args->nargs = 0;
246 1.1 mrg
247 1.1 mrg for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
248 1.1 mrg args->reg[a] = -1;
249 1.1 mrg
250 1.1 mrg for (; list; list = TREE_CHAIN (list))
251 1.1 mrg {
252 1.1 mrg const char *str;
253 1.1 mrg if (TREE_CODE (TREE_VALUE (list)) != STRING_CST)
254 1.1 mrg {
255 1.1 mrg error ("%<amdgpu_hsa_kernel%> attribute requires string constant "
256 1.1 mrg "arguments");
257 1.1 mrg break;
258 1.1 mrg }
259 1.1 mrg str = TREE_STRING_POINTER (TREE_VALUE (list));
260 1.1 mrg int a;
261 1.1 mrg for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
262 1.1 mrg {
263 1.1 mrg if (!strcmp (str, gcn_kernel_arg_types[a].name))
264 1.1 mrg break;
265 1.1 mrg }
266 1.1 mrg if (a == GCN_KERNEL_ARG_TYPES)
267 1.1 mrg {
268 1.1 mrg error ("unknown specifier %qs in %<amdgpu_hsa_kernel%> attribute",
269 1.1 mrg str);
270 1.1 mrg err = true;
271 1.1 mrg break;
272 1.1 mrg }
273 1.1 mrg if (args->requested & (1 << a))
274 1.1 mrg {
275 1.1 mrg error ("duplicated parameter specifier %qs in %<amdgpu_hsa_kernel%> "
276 1.1 mrg "attribute", str);
277 1.1 mrg err = true;
278 1.1 mrg break;
279 1.1 mrg }
280 1.1 mrg args->requested |= (1 << a);
281 1.1 mrg args->order[args->nargs++] = a;
282 1.1 mrg }
283 1.1 mrg
284 1.1 mrg /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
285 1.1 mrg WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies
286 1.1 mrg requesting WORK_ITEM_ID_X_ARG. */
287 1.1 mrg if (args->requested & (1 << WORK_ITEM_ID_Z_ARG))
288 1.1 mrg args->requested |= (1 << WORK_ITEM_ID_Y_ARG);
289 1.1 mrg if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
290 1.1 mrg args->requested |= (1 << WORK_ITEM_ID_X_ARG);
291 1.1 mrg
292 1.1 mrg int sgpr_regno = FIRST_SGPR_REG;
293 1.1 mrg args->nsgprs = 0;
294 1.1 mrg for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
295 1.1 mrg {
296 1.1 mrg if (!(args->requested & (1 << a)))
297 1.1 mrg continue;
298 1.1 mrg
299 1.1 mrg if (gcn_kernel_arg_types[a].fixed_regno >= 0)
300 1.1 mrg args->reg[a] = gcn_kernel_arg_types[a].fixed_regno;
301 1.1 mrg else
302 1.1 mrg {
303 1.1 mrg int reg_count;
304 1.1 mrg
305 1.1 mrg switch (gcn_kernel_arg_types[a].mode)
306 1.1 mrg {
307 1.1 mrg case E_SImode:
308 1.1 mrg reg_count = 1;
309 1.1 mrg break;
310 1.1 mrg case E_DImode:
311 1.1 mrg reg_count = 2;
312 1.1 mrg break;
313 1.1 mrg case E_TImode:
314 1.1 mrg reg_count = 4;
315 1.1 mrg break;
316 1.1 mrg default:
317 1.1 mrg gcc_unreachable ();
318 1.1 mrg }
319 1.1 mrg args->reg[a] = sgpr_regno;
320 1.1 mrg sgpr_regno += reg_count;
321 1.1 mrg if (gcn_kernel_arg_types[a].fixed_regno == -1)
322 1.1 mrg args->nsgprs += reg_count;
323 1.1 mrg }
324 1.1 mrg }
325 1.1 mrg if (sgpr_regno > FIRST_SGPR_REG + 16)
326 1.1 mrg {
327 1.1 mrg error ("too many arguments passed in sgpr registers");
328 1.1 mrg }
329 1.1 mrg return err;
330 1.1 mrg }
331 1.1 mrg
332 1.1 mrg /* Referenced by TARGET_ATTRIBUTE_TABLE.
333 1.1 mrg
334 1.1 mrg Validates target specific attributes. */
335 1.1 mrg
336 1.1 mrg static tree
337 1.1 mrg gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name,
338 1.1 mrg tree args, int, bool *no_add_attrs)
339 1.1 mrg {
340 1.1 mrg if (!FUNC_OR_METHOD_TYPE_P (*node))
341 1.1 mrg {
342 1.1 mrg warning (OPT_Wattributes, "%qE attribute only applies to functions",
343 1.1 mrg name);
344 1.1 mrg *no_add_attrs = true;
345 1.1 mrg return NULL_TREE;
346 1.1 mrg }
347 1.1 mrg
348 1.1 mrg /* Can combine regparm with all attributes but fastcall, and thiscall. */
349 1.1 mrg if (is_attribute_p ("gcnhsa_kernel", name))
350 1.1 mrg {
351 1.1 mrg struct gcn_kernel_args kernelarg;
352 1.1 mrg
353 1.1 mrg if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args))
354 1.1 mrg *no_add_attrs = true;
355 1.1 mrg
356 1.1 mrg return NULL_TREE;
357 1.1 mrg }
358 1.1 mrg
359 1.1 mrg return NULL_TREE;
360 1.1 mrg }
361 1.1 mrg
362 1.1 mrg /* Implement TARGET_ATTRIBUTE_TABLE.
363 1.1 mrg
364 1.1 mrg Create target-specific __attribute__ types. */
365 1.1 mrg
366 1.1 mrg static const struct attribute_spec gcn_attribute_table[] = {
367 1.1 mrg /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
368 1.1 mrg affects_type_identity } */
369 1.1 mrg {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true,
370 1.1 mrg true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL},
371 1.1 mrg /* End element. */
372 1.1 mrg {NULL, 0, 0, false, false, false, false, NULL, NULL}
373 1.1 mrg };
374 1.1 mrg
375 1.1 mrg /* }}} */
376 1.1 mrg /* {{{ Registers and modes. */
377 1.1 mrg
378 1.1 mrg /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
379 1.1 mrg
380 1.1 mrg bool
381 1.1 mrg gcn_scalar_mode_supported_p (scalar_mode mode)
382 1.1 mrg {
383 1.1 mrg return (mode == BImode
384 1.1 mrg || mode == QImode
385 1.1 mrg || mode == HImode /* || mode == HFmode */
386 1.1 mrg || mode == SImode || mode == SFmode
387 1.1 mrg || mode == DImode || mode == DFmode
388 1.1 mrg || mode == TImode);
389 1.1 mrg }
390 1.1 mrg
391 1.1 mrg /* Implement TARGET_CLASS_MAX_NREGS.
392 1.1 mrg
393 1.1 mrg Return the number of hard registers needed to hold a value of MODE in
394 1.1 mrg a register of class RCLASS. */
395 1.1 mrg
396 1.1 mrg static unsigned char
397 1.1 mrg gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
398 1.1 mrg {
399 1.1 mrg /* Scalar registers are 32bit, vector registers are in fact tuples of
400 1.1 mrg 64 lanes. */
401 1.1 mrg if (rclass == VGPR_REGS)
402 1.1 mrg {
403 1.1 mrg if (vgpr_1reg_mode_p (mode))
404 1.1 mrg return 1;
405 1.1 mrg if (vgpr_2reg_mode_p (mode))
406 1.1 mrg return 2;
407 1.1 mrg /* TImode is used by DImode compare_and_swap. */
408 1.1 mrg if (mode == TImode)
409 1.1 mrg return 4;
410 1.1 mrg }
411 1.1 mrg else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
412 1.1 mrg return 2;
413 1.1 mrg return CEIL (GET_MODE_SIZE (mode), 4);
414 1.1 mrg }
415 1.1 mrg
416 1.1 mrg /* Implement TARGET_HARD_REGNO_NREGS.
417 1.1 mrg
418 1.1 mrg Return the number of hard registers needed to hold a value of MODE in
419 1.1 mrg REGNO. */
420 1.1 mrg
421 1.1 mrg unsigned int
422 1.1 mrg gcn_hard_regno_nregs (unsigned int regno, machine_mode mode)
423 1.1 mrg {
424 1.1 mrg return gcn_class_max_nregs (REGNO_REG_CLASS (regno), mode);
425 1.1 mrg }
426 1.1 mrg
427 1.1 mrg /* Implement TARGET_HARD_REGNO_MODE_OK.
428 1.1 mrg
429 1.1 mrg Return true if REGNO can hold value in MODE. */
430 1.1 mrg
431 1.1 mrg bool
432 1.1 mrg gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
433 1.1 mrg {
434 1.1 mrg /* Treat a complex mode as if it were a scalar mode of the same overall
435 1.1 mrg size for the purposes of allocating hard registers. */
436 1.1 mrg if (COMPLEX_MODE_P (mode))
437 1.1 mrg switch (mode)
438 1.1 mrg {
439 1.1 mrg case E_CQImode:
440 1.1 mrg case E_CHImode:
441 1.1 mrg mode = SImode;
442 1.1 mrg break;
443 1.1 mrg case E_CSImode:
444 1.1 mrg mode = DImode;
445 1.1 mrg break;
446 1.1 mrg case E_CDImode:
447 1.1 mrg mode = TImode;
448 1.1 mrg break;
449 1.1 mrg case E_HCmode:
450 1.1 mrg mode = SFmode;
451 1.1 mrg break;
452 1.1 mrg case E_SCmode:
453 1.1 mrg mode = DFmode;
454 1.1 mrg break;
455 1.1 mrg default:
456 1.1 mrg /* Not supported. */
457 1.1 mrg return false;
458 1.1 mrg }
459 1.1 mrg
460 1.1 mrg switch (regno)
461 1.1 mrg {
462 1.1 mrg case FLAT_SCRATCH_LO_REG:
463 1.1 mrg case XNACK_MASK_LO_REG:
464 1.1 mrg case TBA_LO_REG:
465 1.1 mrg case TMA_LO_REG:
466 1.1 mrg return (mode == SImode || mode == DImode);
467 1.1 mrg case VCC_LO_REG:
468 1.1 mrg case EXEC_LO_REG:
469 1.1 mrg return (mode == BImode || mode == SImode || mode == DImode);
470 1.1 mrg case M0_REG:
471 1.1 mrg case FLAT_SCRATCH_HI_REG:
472 1.1 mrg case XNACK_MASK_HI_REG:
473 1.1 mrg case TBA_HI_REG:
474 1.1 mrg case TMA_HI_REG:
475 1.1 mrg return mode == SImode;
476 1.1 mrg case VCC_HI_REG:
477 1.1 mrg return false;
478 1.1 mrg case EXEC_HI_REG:
479 1.1 mrg return mode == SImode /*|| mode == V32BImode */ ;
480 1.1 mrg case SCC_REG:
481 1.1 mrg case VCCZ_REG:
482 1.1 mrg case EXECZ_REG:
483 1.1 mrg return mode == BImode;
484 1.1 mrg }
485 1.1 mrg if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
486 1.1 mrg return true;
487 1.1 mrg if (SGPR_REGNO_P (regno))
488 1.1 mrg /* We restrict double register values to aligned registers. */
489 1.1 mrg return (sgpr_1reg_mode_p (mode)
490 1.1 mrg || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
491 1.1 mrg || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
492 1.1 mrg if (VGPR_REGNO_P (regno))
493 1.1 mrg /* Vector instructions do not care about the alignment of register
494 1.1 mrg pairs, but where there is no 64-bit instruction, many of the
495 1.1 mrg define_split do not work if the input and output registers partially
496 1.1 mrg overlap. We tried to fix this with early clobber and match
497 1.1 mrg constraints, but it was bug prone, added complexity, and conflicts
498 1.1 mrg with the 'U0' constraints on vec_merge.
499 1.1 mrg Therefore, we restrict ourselved to aligned registers. */
500 1.1 mrg return (vgpr_1reg_mode_p (mode)
501 1.1 mrg || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
502 1.1 mrg /* TImode is used by DImode compare_and_swap. */
503 1.1 mrg || (mode == TImode
504 1.1 mrg && !((regno - FIRST_VGPR_REG) & 3)));
505 1.1 mrg return false;
506 1.1 mrg }
507 1.1 mrg
508 1.1 mrg /* Implement REGNO_REG_CLASS via gcn.h.
509 1.1 mrg
510 1.1 mrg Return smallest class containing REGNO. */
511 1.1 mrg
512 1.1 mrg enum reg_class
513 1.1 mrg gcn_regno_reg_class (int regno)
514 1.1 mrg {
515 1.1 mrg switch (regno)
516 1.1 mrg {
517 1.1 mrg case SCC_REG:
518 1.1 mrg return SCC_CONDITIONAL_REG;
519 1.1 mrg case VCC_LO_REG:
520 1.1 mrg case VCC_HI_REG:
521 1.1 mrg return VCC_CONDITIONAL_REG;
522 1.1 mrg case VCCZ_REG:
523 1.1 mrg return VCCZ_CONDITIONAL_REG;
524 1.1 mrg case EXECZ_REG:
525 1.1 mrg return EXECZ_CONDITIONAL_REG;
526 1.1 mrg case EXEC_LO_REG:
527 1.1 mrg case EXEC_HI_REG:
528 1.1 mrg return EXEC_MASK_REG;
529 1.1 mrg }
530 1.1 mrg if (VGPR_REGNO_P (regno))
531 1.1 mrg return VGPR_REGS;
532 1.1 mrg if (SGPR_REGNO_P (regno))
533 1.1 mrg return SGPR_REGS;
534 1.1 mrg if (regno < FIRST_VGPR_REG)
535 1.1 mrg return GENERAL_REGS;
536 1.1 mrg if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
537 1.1 mrg return AFP_REGS;
538 1.1 mrg return ALL_REGS;
539 1.1 mrg }
540 1.1 mrg
541 1.1 mrg /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
542 1.1 mrg
543 1.1 mrg GCC assumes that lowpart contains first part of value as stored in memory.
544 1.1 mrg This is not the case for vector registers. */
545 1.1 mrg
546 1.1 mrg bool
547 1.1 mrg gcn_can_change_mode_class (machine_mode from, machine_mode to,
548 1.1 mrg reg_class_t regclass)
549 1.1 mrg {
550 1.1 mrg if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
551 1.1 mrg return true;
552 1.1 mrg return (gcn_class_max_nregs (regclass, from)
553 1.1 mrg == gcn_class_max_nregs (regclass, to));
554 1.1 mrg }
555 1.1 mrg
556 1.1 mrg /* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P.
557 1.1 mrg
558 1.1 mrg When this hook returns true for MODE, the compiler allows
559 1.1 mrg registers explicitly used in the rtl to be used as spill registers
560 1.1 mrg but prevents the compiler from extending the lifetime of these
561 1.1 mrg registers. */
562 1.1 mrg
563 1.1 mrg bool
564 1.1 mrg gcn_small_register_classes_for_mode_p (machine_mode mode)
565 1.1 mrg {
566 1.1 mrg /* We allocate into exec and vcc regs. Those make small register class. */
567 1.1 mrg return mode == DImode || mode == SImode;
568 1.1 mrg }
569 1.1 mrg
570 1.1 mrg /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
571 1.1 mrg
572 1.1 mrg Returns true if pseudos that have been assigned to registers of class RCLASS
573 1.1 mrg would likely be spilled because registers of RCLASS are needed for spill
574 1.1 mrg registers. */
575 1.1 mrg
576 1.1 mrg static bool
577 1.1 mrg gcn_class_likely_spilled_p (reg_class_t rclass)
578 1.1 mrg {
579 1.1 mrg return (rclass == EXEC_MASK_REG
580 1.1 mrg || reg_classes_intersect_p (ALL_CONDITIONAL_REGS, rclass));
581 1.1 mrg }
582 1.1 mrg
583 1.1 mrg /* Implement TARGET_MODES_TIEABLE_P.
584 1.1 mrg
585 1.1 mrg Returns true if a value of MODE1 is accessible in MODE2 without
586 1.1 mrg copying. */
587 1.1 mrg
588 1.1 mrg bool
589 1.1 mrg gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
590 1.1 mrg {
591 1.1 mrg return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
592 1.1 mrg && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
593 1.1 mrg }
594 1.1 mrg
595 1.1 mrg /* Implement TARGET_TRULY_NOOP_TRUNCATION.
596 1.1 mrg
597 1.1 mrg Returns true if it is safe to convert a value of INPREC bits to one of
598 1.1 mrg OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on
599 1.1 mrg it as if it had only OUTPREC bits. */
600 1.1 mrg
601 1.1 mrg bool
602 1.1 mrg gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
603 1.1 mrg {
604 1.1 mrg return ((inprec <= 32) && (outprec <= inprec));
605 1.1 mrg }
606 1.1 mrg
607 1.1 mrg /* Return N-th part of value occupying multiple registers. */
608 1.1 mrg
609 1.1 mrg rtx
610 1.1 mrg gcn_operand_part (machine_mode mode, rtx op, int n)
611 1.1 mrg {
612 1.1 mrg if (GET_MODE_SIZE (mode) >= 256)
613 1.1 mrg {
614 1.1 mrg /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */
615 1.1 mrg
616 1.1 mrg if (REG_P (op))
617 1.1 mrg {
618 1.1 mrg gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
619 1.1 mrg return gen_rtx_REG (V64SImode, REGNO (op) + n);
620 1.1 mrg }
621 1.1 mrg if (GET_CODE (op) == CONST_VECTOR)
622 1.1 mrg {
623 1.1 mrg int units = GET_MODE_NUNITS (mode);
624 1.1 mrg rtvec v = rtvec_alloc (units);
625 1.1 mrg
626 1.1 mrg for (int i = 0; i < units; ++i)
627 1.1 mrg RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
628 1.1 mrg CONST_VECTOR_ELT (op, i), n);
629 1.1 mrg
630 1.1 mrg return gen_rtx_CONST_VECTOR (V64SImode, v);
631 1.1 mrg }
632 1.1 mrg if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
633 1.1 mrg return gcn_gen_undef (V64SImode);
634 1.1 mrg gcc_unreachable ();
635 1.1 mrg }
636 1.1 mrg else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
637 1.1 mrg {
638 1.1 mrg gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
639 1.1 mrg return gen_rtx_REG (SImode, REGNO (op) + n);
640 1.1 mrg }
641 1.1 mrg else
642 1.1 mrg {
643 1.1 mrg if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
644 1.1 mrg return gcn_gen_undef (SImode);
645 1.1 mrg
646 1.1 mrg /* If it's a constant then let's assume it is of the largest mode
647 1.1 mrg available, otherwise simplify_gen_subreg will fail. */
648 1.1 mrg if (mode == VOIDmode && CONST_INT_P (op))
649 1.1 mrg mode = DImode;
650 1.1 mrg return simplify_gen_subreg (SImode, op, mode, n * 4);
651 1.1 mrg }
652 1.1 mrg }
653 1.1 mrg
654 1.1 mrg /* Return N-th part of value occupying multiple registers. */
655 1.1 mrg
656 1.1 mrg rtx
657 1.1 mrg gcn_operand_doublepart (machine_mode mode, rtx op, int n)
658 1.1 mrg {
659 1.1 mrg return simplify_gen_subreg (DImode, op, mode, n * 8);
660 1.1 mrg }
661 1.1 mrg
662 1.1 mrg /* Return true if OP can be split into subregs or high/low parts.
663 1.1 mrg This is always true for scalars, but not normally true for vectors.
664 1.1 mrg However, for vectors in hardregs we can use the low and high registers. */
665 1.1 mrg
666 1.1 mrg bool
667 1.1 mrg gcn_can_split_p (machine_mode, rtx op)
668 1.1 mrg {
669 1.1 mrg if (vgpr_vector_mode_p (GET_MODE (op)))
670 1.1 mrg {
671 1.1 mrg if (GET_CODE (op) == SUBREG)
672 1.1 mrg op = SUBREG_REG (op);
673 1.1 mrg if (!REG_P (op))
674 1.1 mrg return true;
675 1.1 mrg return REGNO (op) <= FIRST_PSEUDO_REGISTER;
676 1.1 mrg }
677 1.1 mrg return true;
678 1.1 mrg }
679 1.1 mrg
680 1.1 mrg /* Implement TARGET_SPILL_CLASS.
681 1.1 mrg
682 1.1 mrg Return class of registers which could be used for pseudo of MODE
683 1.1 mrg and of class RCLASS for spilling instead of memory. Return NO_REGS
684 1.1 mrg if it is not possible or non-profitable. */
685 1.1 mrg
686 1.1 mrg static reg_class_t
687 1.1 mrg gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
688 1.1 mrg {
689 1.1 mrg if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c)
690 1.1 mrg || c == VCC_CONDITIONAL_REG)
691 1.1 mrg return SGPR_REGS;
692 1.1 mrg else
693 1.1 mrg return NO_REGS;
694 1.1 mrg }
695 1.1 mrg
696 1.1 mrg /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
697 1.1 mrg
698 1.1 mrg Change allocno class for given pseudo from allocno and best class
699 1.1 mrg calculated by IRA. */
700 1.1 mrg
701 1.1 mrg static reg_class_t
702 1.1 mrg gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl,
703 1.1 mrg reg_class_t best_cl)
704 1.1 mrg {
705 1.1 mrg /* Avoid returning classes that contain both vgpr and sgpr registers. */
706 1.1 mrg if (cl != ALL_REGS && cl != SRCDST_REGS && cl != ALL_GPR_REGS)
707 1.1 mrg return cl;
708 1.1 mrg if (best_cl != ALL_REGS && best_cl != SRCDST_REGS
709 1.1 mrg && best_cl != ALL_GPR_REGS)
710 1.1 mrg return best_cl;
711 1.1 mrg
712 1.1 mrg machine_mode mode = PSEUDO_REGNO_MODE (regno);
713 1.1 mrg if (vgpr_vector_mode_p (mode))
714 1.1 mrg return VGPR_REGS;
715 1.1 mrg
716 1.1 mrg return GENERAL_REGS;
717 1.1 mrg }
718 1.1 mrg
719 1.1 mrg /* Create a new DImode pseudo reg and emit an instruction to initialize
720 1.1 mrg it to VAL. */
721 1.1 mrg
722 1.1 mrg static rtx
723 1.1 mrg get_exec (int64_t val)
724 1.1 mrg {
725 1.1 mrg rtx reg = gen_reg_rtx (DImode);
726 1.1 mrg emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode)));
727 1.1 mrg return reg;
728 1.1 mrg }
729 1.1 mrg
730 1.1 mrg /* Return value of scalar exec register. */
731 1.1 mrg
732 1.1 mrg rtx
733 1.1 mrg gcn_scalar_exec ()
734 1.1 mrg {
735 1.1 mrg return const1_rtx;
736 1.1 mrg }
737 1.1 mrg
738 1.1 mrg /* Return pseudo holding scalar exec register. */
739 1.1 mrg
740 1.1 mrg rtx
741 1.1 mrg gcn_scalar_exec_reg ()
742 1.1 mrg {
743 1.1 mrg return get_exec (1);
744 1.1 mrg }
745 1.1 mrg
746 1.1 mrg /* Return value of full exec register. */
747 1.1 mrg
748 1.1 mrg rtx
749 1.1 mrg gcn_full_exec ()
750 1.1 mrg {
751 1.1 mrg return constm1_rtx;
752 1.1 mrg }
753 1.1 mrg
754 1.1 mrg /* Return pseudo holding full exec register. */
755 1.1 mrg
756 1.1 mrg rtx
757 1.1 mrg gcn_full_exec_reg ()
758 1.1 mrg {
759 1.1 mrg return get_exec (-1);
760 1.1 mrg }
761 1.1 mrg
762 1.1 mrg /* }}} */
763 1.1 mrg /* {{{ Immediate constants. */
764 1.1 mrg
765 1.1 mrg /* Initialize shared numeric constants. */
766 1.1 mrg
767 1.1 mrg static void
768 1.1 mrg init_ext_gcn_constants (void)
769 1.1 mrg {
770 1.1 mrg real_from_integer (&dconst4, DFmode, 4, SIGNED);
771 1.1 mrg
772 1.1 mrg /* FIXME: this constant probably does not match what hardware really loads.
773 1.1 mrg Reality check it eventually. */
774 1.1 mrg real_from_string (&dconst1over2pi,
775 1.1 mrg "0.1591549430918953357663423455968866839");
776 1.1 mrg real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
777 1.1 mrg
778 1.1 mrg ext_gcn_constants_init = 1;
779 1.1 mrg }
780 1.1 mrg
781 1.1 mrg /* Return non-zero if X is a constant that can appear as an inline operand.
782 1.1 mrg This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
783 1.1 mrg Or a vector of those.
784 1.1 mrg The value returned should be the encoding of this constant. */
785 1.1 mrg
786 1.1 mrg int
787 1.1 mrg gcn_inline_fp_constant_p (rtx x, bool allow_vector)
788 1.1 mrg {
789 1.1 mrg machine_mode mode = GET_MODE (x);
790 1.1 mrg
791 1.1 mrg if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
792 1.1 mrg && allow_vector)
793 1.1 mrg {
794 1.1 mrg int n;
795 1.1 mrg if (GET_CODE (x) != CONST_VECTOR)
796 1.1 mrg return 0;
797 1.1 mrg n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
798 1.1 mrg if (!n)
799 1.1 mrg return 0;
800 1.1 mrg for (int i = 1; i < 64; i++)
801 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
802 1.1 mrg return 0;
803 1.1 mrg return 1;
804 1.1 mrg }
805 1.1 mrg
806 1.1 mrg if (mode != HFmode && mode != SFmode && mode != DFmode)
807 1.1 mrg return 0;
808 1.1 mrg
809 1.1 mrg const REAL_VALUE_TYPE *r;
810 1.1 mrg
811 1.1 mrg if (x == CONST0_RTX (mode))
812 1.1 mrg return 128;
813 1.1 mrg if (x == CONST1_RTX (mode))
814 1.1 mrg return 242;
815 1.1 mrg
816 1.1 mrg r = CONST_DOUBLE_REAL_VALUE (x);
817 1.1 mrg
818 1.1 mrg if (real_identical (r, &dconstm1))
819 1.1 mrg return 243;
820 1.1 mrg
821 1.1 mrg if (real_identical (r, &dconsthalf))
822 1.1 mrg return 240;
823 1.1 mrg if (real_identical (r, &dconstm1))
824 1.1 mrg return 243;
825 1.1 mrg if (real_identical (r, &dconst2))
826 1.1 mrg return 244;
827 1.1 mrg if (real_identical (r, &dconst4))
828 1.1 mrg return 246;
829 1.1 mrg if (real_identical (r, &dconst1over2pi))
830 1.1 mrg return 248;
831 1.1 mrg if (!ext_gcn_constants_init)
832 1.1 mrg init_ext_gcn_constants ();
833 1.1 mrg real_value_negate (r);
834 1.1 mrg if (real_identical (r, &dconsthalf))
835 1.1 mrg return 241;
836 1.1 mrg if (real_identical (r, &dconst2))
837 1.1 mrg return 245;
838 1.1 mrg if (real_identical (r, &dconst4))
839 1.1 mrg return 247;
840 1.1 mrg
841 1.1 mrg /* FIXME: add 4, -4 and 1/(2*PI). */
842 1.1 mrg
843 1.1 mrg return 0;
844 1.1 mrg }
845 1.1 mrg
846 1.1 mrg /* Return non-zero if X is a constant that can appear as an immediate operand.
847 1.1 mrg This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
848 1.1 mrg Or a vector of those.
849 1.1 mrg The value returned should be the encoding of this constant. */
850 1.1 mrg
851 1.1 mrg bool
852 1.1 mrg gcn_fp_constant_p (rtx x, bool allow_vector)
853 1.1 mrg {
854 1.1 mrg machine_mode mode = GET_MODE (x);
855 1.1 mrg
856 1.1 mrg if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
857 1.1 mrg && allow_vector)
858 1.1 mrg {
859 1.1 mrg int n;
860 1.1 mrg if (GET_CODE (x) != CONST_VECTOR)
861 1.1 mrg return false;
862 1.1 mrg n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
863 1.1 mrg if (!n)
864 1.1 mrg return false;
865 1.1 mrg for (int i = 1; i < 64; i++)
866 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
867 1.1 mrg return false;
868 1.1 mrg return true;
869 1.1 mrg }
870 1.1 mrg if (mode != HFmode && mode != SFmode && mode != DFmode)
871 1.1 mrg return false;
872 1.1 mrg
873 1.1 mrg if (gcn_inline_fp_constant_p (x, false))
874 1.1 mrg return true;
875 1.1 mrg /* FIXME: It is not clear how 32bit immediates are interpreted here. */
876 1.1 mrg return (mode != DFmode);
877 1.1 mrg }
878 1.1 mrg
879 1.1 mrg /* Return true if X is a constant representable as an inline immediate
880 1.1 mrg constant in a 32-bit instruction encoding. */
881 1.1 mrg
882 1.1 mrg bool
883 1.1 mrg gcn_inline_constant_p (rtx x)
884 1.1 mrg {
885 1.1 mrg if (GET_CODE (x) == CONST_INT)
886 1.1 mrg return INTVAL (x) >= -16 && INTVAL (x) <= 64;
887 1.1 mrg if (GET_CODE (x) == CONST_DOUBLE)
888 1.1 mrg return gcn_inline_fp_constant_p (x, false);
889 1.1 mrg if (GET_CODE (x) == CONST_VECTOR)
890 1.1 mrg {
891 1.1 mrg int n;
892 1.1 mrg if (!vgpr_vector_mode_p (GET_MODE (x)))
893 1.1 mrg return false;
894 1.1 mrg n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0));
895 1.1 mrg if (!n)
896 1.1 mrg return false;
897 1.1 mrg for (int i = 1; i < 64; i++)
898 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
899 1.1 mrg return false;
900 1.1 mrg return 1;
901 1.1 mrg }
902 1.1 mrg return false;
903 1.1 mrg }
904 1.1 mrg
905 1.1 mrg /* Return true if X is a constant representable as an immediate constant
906 1.1 mrg in a 32 or 64-bit instruction encoding. */
907 1.1 mrg
908 1.1 mrg bool
909 1.1 mrg gcn_constant_p (rtx x)
910 1.1 mrg {
911 1.1 mrg switch (GET_CODE (x))
912 1.1 mrg {
913 1.1 mrg case CONST_INT:
914 1.1 mrg return true;
915 1.1 mrg
916 1.1 mrg case CONST_DOUBLE:
917 1.1 mrg return gcn_fp_constant_p (x, false);
918 1.1 mrg
919 1.1 mrg case CONST_VECTOR:
920 1.1 mrg {
921 1.1 mrg int n;
922 1.1 mrg if (!vgpr_vector_mode_p (GET_MODE (x)))
923 1.1 mrg return false;
924 1.1 mrg n = gcn_constant_p (CONST_VECTOR_ELT (x, 0));
925 1.1 mrg if (!n)
926 1.1 mrg return false;
927 1.1 mrg for (int i = 1; i < 64; i++)
928 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
929 1.1 mrg return false;
930 1.1 mrg return true;
931 1.1 mrg }
932 1.1 mrg
933 1.1 mrg case SYMBOL_REF:
934 1.1 mrg case LABEL_REF:
935 1.1 mrg return true;
936 1.1 mrg
937 1.1 mrg default:
938 1.1 mrg ;
939 1.1 mrg }
940 1.1 mrg
941 1.1 mrg return false;
942 1.1 mrg }
943 1.1 mrg
944 1.1 mrg /* Return true if X is a constant representable as two inline immediate
945 1.1 mrg constants in a 64-bit instruction that is split into two 32-bit
946 1.1 mrg instructions.
947 1.1 mrg When MIXED is set, the low-part is permitted to use the full 32-bits. */
948 1.1 mrg
949 1.1 mrg bool
950 1.1 mrg gcn_inline_constant64_p (rtx x, bool mixed)
951 1.1 mrg {
952 1.1 mrg if (GET_CODE (x) == CONST_VECTOR)
953 1.1 mrg {
954 1.1 mrg if (!vgpr_vector_mode_p (GET_MODE (x)))
955 1.1 mrg return false;
956 1.1 mrg if (!gcn_inline_constant64_p (CONST_VECTOR_ELT (x, 0), mixed))
957 1.1 mrg return false;
958 1.1 mrg for (int i = 1; i < 64; i++)
959 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
960 1.1 mrg return false;
961 1.1 mrg
962 1.1 mrg return true;
963 1.1 mrg }
964 1.1 mrg
965 1.1 mrg if (GET_CODE (x) != CONST_INT)
966 1.1 mrg return false;
967 1.1 mrg
968 1.1 mrg rtx val_lo = gcn_operand_part (DImode, x, 0);
969 1.1 mrg rtx val_hi = gcn_operand_part (DImode, x, 1);
970 1.1 mrg return ((mixed || gcn_inline_constant_p (val_lo))
971 1.1 mrg && gcn_inline_constant_p (val_hi));
972 1.1 mrg }
973 1.1 mrg
974 1.1 mrg /* Return true if X is a constant representable as an immediate constant
975 1.1 mrg in a 32 or 64-bit instruction encoding where the hardware will
976 1.1 mrg extend the immediate to 64-bits. */
977 1.1 mrg
978 1.1 mrg bool
979 1.1 mrg gcn_constant64_p (rtx x)
980 1.1 mrg {
981 1.1 mrg if (!gcn_constant_p (x))
982 1.1 mrg return false;
983 1.1 mrg
984 1.1 mrg if (GET_CODE (x) != CONST_INT)
985 1.1 mrg return true;
986 1.1 mrg
987 1.1 mrg /* Negative numbers are only allowed if they can be encoded within src0,
988 1.1 mrg because the 32-bit immediates do not get sign-extended.
989 1.1 mrg Unsigned numbers must not be encodable as 32-bit -1..-16, because the
990 1.1 mrg assembler will use a src0 inline immediate and that will get
991 1.1 mrg sign-extended. */
992 1.1 mrg HOST_WIDE_INT val = INTVAL (x);
993 1.1 mrg return (((val & 0xffffffff) == val /* Positive 32-bit. */
994 1.1 mrg && (val & 0xfffffff0) != 0xfffffff0) /* Not -1..-16. */
995 1.1 mrg || gcn_inline_constant_p (x)); /* Src0. */
996 1.1 mrg }
997 1.1 mrg
998 1.1 mrg /* Implement TARGET_LEGITIMATE_CONSTANT_P.
999 1.1 mrg
1000 1.1 mrg Returns true if X is a legitimate constant for a MODE immediate operand. */
1001 1.1 mrg
1002 1.1 mrg bool
1003 1.1 mrg gcn_legitimate_constant_p (machine_mode, rtx x)
1004 1.1 mrg {
1005 1.1 mrg return gcn_constant_p (x);
1006 1.1 mrg }
1007 1.1 mrg
1008 1.1 mrg /* Return true if X is a CONST_VECTOR of single constant. */
1009 1.1 mrg
1010 1.1 mrg static bool
1011 1.1 mrg single_cst_vector_p (rtx x)
1012 1.1 mrg {
1013 1.1 mrg if (GET_CODE (x) != CONST_VECTOR)
1014 1.1 mrg return false;
1015 1.1 mrg for (int i = 1; i < 64; i++)
1016 1.1 mrg if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
1017 1.1 mrg return false;
1018 1.1 mrg return true;
1019 1.1 mrg }
1020 1.1 mrg
1021 1.1 mrg /* Create a CONST_VECTOR of duplicated value A. */
1022 1.1 mrg
1023 1.1 mrg rtx
1024 1.1 mrg gcn_vec_constant (machine_mode mode, int a)
1025 1.1 mrg {
1026 1.1 mrg /*if (!a)
1027 1.1 mrg return CONST0_RTX (mode);
1028 1.1 mrg if (a == -1)
1029 1.1 mrg return CONSTM1_RTX (mode);
1030 1.1 mrg if (a == 1)
1031 1.1 mrg return CONST1_RTX (mode);
1032 1.1 mrg if (a == 2)
1033 1.1 mrg return CONST2_RTX (mode);*/
1034 1.1 mrg
1035 1.1 mrg int units = GET_MODE_NUNITS (mode);
1036 1.1 mrg machine_mode innermode = GET_MODE_INNER (mode);
1037 1.1 mrg
1038 1.1 mrg rtx tem;
1039 1.1 mrg if (FLOAT_MODE_P (innermode))
1040 1.1 mrg {
1041 1.1 mrg REAL_VALUE_TYPE rv;
1042 1.1 mrg real_from_integer (&rv, NULL, a, SIGNED);
1043 1.1 mrg tem = const_double_from_real_value (rv, innermode);
1044 1.1 mrg }
1045 1.1 mrg else
1046 1.1 mrg tem = gen_int_mode (a, innermode);
1047 1.1 mrg
1048 1.1 mrg rtvec v = rtvec_alloc (units);
1049 1.1 mrg for (int i = 0; i < units; ++i)
1050 1.1 mrg RTVEC_ELT (v, i) = tem;
1051 1.1 mrg
1052 1.1 mrg return gen_rtx_CONST_VECTOR (mode, v);
1053 1.1 mrg }
1054 1.1 mrg
1055 1.1 mrg /* Create a CONST_VECTOR of duplicated value A. */
1056 1.1 mrg
1057 1.1 mrg rtx
1058 1.1 mrg gcn_vec_constant (machine_mode mode, rtx a)
1059 1.1 mrg {
1060 1.1 mrg int units = GET_MODE_NUNITS (mode);
1061 1.1 mrg rtvec v = rtvec_alloc (units);
1062 1.1 mrg
1063 1.1 mrg for (int i = 0; i < units; ++i)
1064 1.1 mrg RTVEC_ELT (v, i) = a;
1065 1.1 mrg
1066 1.1 mrg return gen_rtx_CONST_VECTOR (mode, v);
1067 1.1 mrg }
1068 1.1 mrg
1069 1.1 mrg /* Create an undefined vector value, used where an insn operand is
1070 1.1 mrg optional. */
1071 1.1 mrg
1072 1.1 mrg rtx
1073 1.1 mrg gcn_gen_undef (machine_mode mode)
1074 1.1 mrg {
1075 1.1 mrg return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR);
1076 1.1 mrg }
1077 1.1 mrg
1078 1.1 mrg /* }}} */
1079 1.1 mrg /* {{{ Addresses, pointers and moves. */
1080 1.1 mrg
1081 1.1 mrg /* Return true is REG is a valid place to store a pointer,
1082 1.1 mrg for instructions that require an SGPR.
1083 1.1 mrg FIXME rename. */
1084 1.1 mrg
1085 1.1 mrg static bool
1086 1.1 mrg gcn_address_register_p (rtx reg, machine_mode mode, bool strict)
1087 1.1 mrg {
1088 1.1 mrg if (GET_CODE (reg) == SUBREG)
1089 1.1 mrg reg = SUBREG_REG (reg);
1090 1.1 mrg
1091 1.1 mrg if (!REG_P (reg))
1092 1.1 mrg return false;
1093 1.1 mrg
1094 1.1 mrg if (GET_MODE (reg) != mode)
1095 1.1 mrg return false;
1096 1.1 mrg
1097 1.1 mrg int regno = REGNO (reg);
1098 1.1 mrg
1099 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER)
1100 1.1 mrg {
1101 1.1 mrg if (!strict)
1102 1.1 mrg return true;
1103 1.1 mrg
1104 1.1 mrg if (!reg_renumber)
1105 1.1 mrg return false;
1106 1.1 mrg
1107 1.1 mrg regno = reg_renumber[regno];
1108 1.1 mrg }
1109 1.1 mrg
1110 1.1 mrg return (SGPR_REGNO_P (regno) || regno == M0_REG
1111 1.1 mrg || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1112 1.1 mrg }
1113 1.1 mrg
1114 1.1 mrg /* Return true is REG is a valid place to store a pointer,
1115 1.1 mrg for instructions that require a VGPR. */
1116 1.1 mrg
1117 1.1 mrg static bool
1118 1.1 mrg gcn_vec_address_register_p (rtx reg, machine_mode mode, bool strict)
1119 1.1 mrg {
1120 1.1 mrg if (GET_CODE (reg) == SUBREG)
1121 1.1 mrg reg = SUBREG_REG (reg);
1122 1.1 mrg
1123 1.1 mrg if (!REG_P (reg))
1124 1.1 mrg return false;
1125 1.1 mrg
1126 1.1 mrg if (GET_MODE (reg) != mode)
1127 1.1 mrg return false;
1128 1.1 mrg
1129 1.1 mrg int regno = REGNO (reg);
1130 1.1 mrg
1131 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER)
1132 1.1 mrg {
1133 1.1 mrg if (!strict)
1134 1.1 mrg return true;
1135 1.1 mrg
1136 1.1 mrg if (!reg_renumber)
1137 1.1 mrg return false;
1138 1.1 mrg
1139 1.1 mrg regno = reg_renumber[regno];
1140 1.1 mrg }
1141 1.1 mrg
1142 1.1 mrg return VGPR_REGNO_P (regno);
1143 1.1 mrg }
1144 1.1 mrg
1145 1.1 mrg /* Return true if X would be valid inside a MEM using the Flat address
1146 1.1 mrg space. */
1147 1.1 mrg
1148 1.1 mrg bool
1149 1.1 mrg gcn_flat_address_p (rtx x, machine_mode mode)
1150 1.1 mrg {
1151 1.1 mrg bool vec_mode = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1152 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1153 1.1 mrg
1154 1.1 mrg if (vec_mode && gcn_address_register_p (x, DImode, false))
1155 1.1 mrg return true;
1156 1.1 mrg
1157 1.1 mrg if (!vec_mode && gcn_vec_address_register_p (x, DImode, false))
1158 1.1 mrg return true;
1159 1.1 mrg
1160 1.1 mrg if (TARGET_GCN5_PLUS
1161 1.1 mrg && GET_CODE (x) == PLUS
1162 1.1 mrg && gcn_vec_address_register_p (XEXP (x, 0), DImode, false)
1163 1.1 mrg && CONST_INT_P (XEXP (x, 1)))
1164 1.1 mrg return true;
1165 1.1 mrg
1166 1.1 mrg return false;
1167 1.1 mrg }
1168 1.1 mrg
1169 1.1 mrg /* Return true if X would be valid inside a MEM using the Scalar Flat
1170 1.1 mrg address space. */
1171 1.1 mrg
1172 1.1 mrg bool
1173 1.1 mrg gcn_scalar_flat_address_p (rtx x)
1174 1.1 mrg {
1175 1.1 mrg if (gcn_address_register_p (x, DImode, false))
1176 1.1 mrg return true;
1177 1.1 mrg
1178 1.1 mrg if (GET_CODE (x) == PLUS
1179 1.1 mrg && gcn_address_register_p (XEXP (x, 0), DImode, false)
1180 1.1 mrg && CONST_INT_P (XEXP (x, 1)))
1181 1.1 mrg return true;
1182 1.1 mrg
1183 1.1 mrg return false;
1184 1.1 mrg }
1185 1.1 mrg
1186 1.1 mrg /* Return true if MEM X would be valid for the Scalar Flat address space. */
1187 1.1 mrg
1188 1.1 mrg bool
1189 1.1 mrg gcn_scalar_flat_mem_p (rtx x)
1190 1.1 mrg {
1191 1.1 mrg if (!MEM_P (x))
1192 1.1 mrg return false;
1193 1.1 mrg
1194 1.1 mrg if (GET_MODE_SIZE (GET_MODE (x)) < 4)
1195 1.1 mrg return false;
1196 1.1 mrg
1197 1.1 mrg return gcn_scalar_flat_address_p (XEXP (x, 0));
1198 1.1 mrg }
1199 1.1 mrg
1200 1.1 mrg /* Return true if X would be valid inside a MEM using the LDS or GDS
1201 1.1 mrg address spaces. */
1202 1.1 mrg
1203 1.1 mrg bool
1204 1.1 mrg gcn_ds_address_p (rtx x)
1205 1.1 mrg {
1206 1.1 mrg if (gcn_vec_address_register_p (x, SImode, false))
1207 1.1 mrg return true;
1208 1.1 mrg
1209 1.1 mrg if (GET_CODE (x) == PLUS
1210 1.1 mrg && gcn_vec_address_register_p (XEXP (x, 0), SImode, false)
1211 1.1 mrg && CONST_INT_P (XEXP (x, 1)))
1212 1.1 mrg return true;
1213 1.1 mrg
1214 1.1 mrg return false;
1215 1.1 mrg }
1216 1.1 mrg
1217 1.1 mrg /* Return true if ADDR would be valid inside a MEM using the Global
1218 1.1 mrg address space. */
1219 1.1 mrg
1220 1.1 mrg bool
1221 1.1 mrg gcn_global_address_p (rtx addr)
1222 1.1 mrg {
1223 1.1 mrg if (gcn_address_register_p (addr, DImode, false)
1224 1.1 mrg || gcn_vec_address_register_p (addr, DImode, false))
1225 1.1 mrg return true;
1226 1.1 mrg
1227 1.1 mrg if (GET_CODE (addr) == PLUS)
1228 1.1 mrg {
1229 1.1 mrg rtx base = XEXP (addr, 0);
1230 1.1 mrg rtx offset = XEXP (addr, 1);
1231 1.1 mrg bool immediate_p = (CONST_INT_P (offset)
1232 1.1 mrg && INTVAL (offset) >= -(1 << 12)
1233 1.1 mrg && INTVAL (offset) < (1 << 12));
1234 1.1 mrg
1235 1.1 mrg if ((gcn_address_register_p (base, DImode, false)
1236 1.1 mrg || gcn_vec_address_register_p (base, DImode, false))
1237 1.1 mrg && immediate_p)
1238 1.1 mrg /* SGPR + CONST or VGPR + CONST */
1239 1.1 mrg return true;
1240 1.1 mrg
1241 1.1 mrg if (gcn_address_register_p (base, DImode, false)
1242 1.1 mrg && gcn_vgpr_register_operand (offset, SImode))
1243 1.1 mrg /* SPGR + VGPR */
1244 1.1 mrg return true;
1245 1.1 mrg
1246 1.1 mrg if (GET_CODE (base) == PLUS
1247 1.1 mrg && gcn_address_register_p (XEXP (base, 0), DImode, false)
1248 1.1 mrg && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1249 1.1 mrg && immediate_p)
1250 1.1 mrg /* (SGPR + VGPR) + CONST */
1251 1.1 mrg return true;
1252 1.1 mrg }
1253 1.1 mrg
1254 1.1 mrg return false;
1255 1.1 mrg }
1256 1.1 mrg
1257 1.1 mrg /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
1258 1.1 mrg
1259 1.1 mrg Recognizes RTL expressions that are valid memory addresses for an
1260 1.1 mrg instruction. The MODE argument is the machine mode for the MEM
1261 1.1 mrg expression that wants to use this address.
1262 1.1 mrg
1263 1.1 mrg It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1264 1.1 mrg convert common non-canonical forms to canonical form so that they will
1265 1.1 mrg be recognized. */
1266 1.1 mrg
1267 1.1 mrg static bool
1268 1.1 mrg gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
1269 1.1 mrg addr_space_t as)
1270 1.1 mrg {
1271 1.1 mrg /* All vector instructions need to work on addresses in registers. */
1272 1.1 mrg if (!TARGET_GCN5_PLUS && (vgpr_vector_mode_p (mode) && !REG_P (x)))
1273 1.1 mrg return false;
1274 1.1 mrg
1275 1.1 mrg if (AS_SCALAR_FLAT_P (as))
1276 1.1 mrg {
1277 1.1 mrg if (mode == QImode || mode == HImode)
1278 1.1 mrg return 0;
1279 1.1 mrg
1280 1.1 mrg switch (GET_CODE (x))
1281 1.1 mrg {
1282 1.1 mrg case REG:
1283 1.1 mrg return gcn_address_register_p (x, DImode, strict);
1284 1.1 mrg /* Addresses are in the form BASE+OFFSET
1285 1.1 mrg OFFSET is either 20bit unsigned immediate, SGPR or M0.
1286 1.1 mrg Writes and atomics do not accept SGPR. */
1287 1.1 mrg case PLUS:
1288 1.1 mrg {
1289 1.1 mrg rtx x0 = XEXP (x, 0);
1290 1.1 mrg rtx x1 = XEXP (x, 1);
1291 1.1 mrg if (!gcn_address_register_p (x0, DImode, strict))
1292 1.1 mrg return false;
1293 1.1 mrg /* FIXME: This is disabled because of the mode mismatch between
1294 1.1 mrg SImode (for the address or m0 register) and the DImode PLUS.
1295 1.1 mrg We'll need a zero_extend or similar.
1296 1.1 mrg
1297 1.1 mrg if (gcn_m0_register_p (x1, SImode, strict)
1298 1.1 mrg || gcn_address_register_p (x1, SImode, strict))
1299 1.1 mrg return true;
1300 1.1 mrg else*/
1301 1.1 mrg if (GET_CODE (x1) == CONST_INT)
1302 1.1 mrg {
1303 1.1 mrg if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20)
1304 1.1 mrg /* The low bits of the offset are ignored, even when
1305 1.1 mrg they're meant to realign the pointer. */
1306 1.1 mrg && !(INTVAL (x1) & 0x3))
1307 1.1 mrg return true;
1308 1.1 mrg }
1309 1.1 mrg return false;
1310 1.1 mrg }
1311 1.1 mrg
1312 1.1 mrg default:
1313 1.1 mrg break;
1314 1.1 mrg }
1315 1.1 mrg }
1316 1.1 mrg else if (AS_SCRATCH_P (as))
1317 1.1 mrg return gcn_address_register_p (x, SImode, strict);
1318 1.1 mrg else if (AS_FLAT_P (as) || AS_FLAT_SCRATCH_P (as))
1319 1.1 mrg {
1320 1.1 mrg if (TARGET_GCN3 || GET_CODE (x) == REG)
1321 1.1 mrg return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1322 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1323 1.1 mrg ? gcn_address_register_p (x, DImode, strict)
1324 1.1 mrg : gcn_vec_address_register_p (x, DImode, strict));
1325 1.1 mrg else
1326 1.1 mrg {
1327 1.1 mrg gcc_assert (TARGET_GCN5_PLUS);
1328 1.1 mrg
1329 1.1 mrg if (GET_CODE (x) == PLUS)
1330 1.1 mrg {
1331 1.1 mrg rtx x1 = XEXP (x, 1);
1332 1.1 mrg
1333 1.1 mrg if (VECTOR_MODE_P (mode)
1334 1.1 mrg ? !gcn_address_register_p (x, DImode, strict)
1335 1.1 mrg : !gcn_vec_address_register_p (x, DImode, strict))
1336 1.1 mrg return false;
1337 1.1 mrg
1338 1.1 mrg if (GET_CODE (x1) == CONST_INT)
1339 1.1 mrg {
1340 1.1 mrg if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 12)
1341 1.1 mrg /* The low bits of the offset are ignored, even when
1342 1.1 mrg they're meant to realign the pointer. */
1343 1.1 mrg && !(INTVAL (x1) & 0x3))
1344 1.1 mrg return true;
1345 1.1 mrg }
1346 1.1 mrg }
1347 1.1 mrg return false;
1348 1.1 mrg }
1349 1.1 mrg }
1350 1.1 mrg else if (AS_GLOBAL_P (as))
1351 1.1 mrg {
1352 1.1 mrg gcc_assert (TARGET_GCN5_PLUS);
1353 1.1 mrg
1354 1.1 mrg if (GET_CODE (x) == REG)
1355 1.1 mrg return (gcn_address_register_p (x, DImode, strict)
1356 1.1 mrg || (!VECTOR_MODE_P (mode)
1357 1.1 mrg && gcn_vec_address_register_p (x, DImode, strict)));
1358 1.1 mrg else if (GET_CODE (x) == PLUS)
1359 1.1 mrg {
1360 1.1 mrg rtx base = XEXP (x, 0);
1361 1.1 mrg rtx offset = XEXP (x, 1);
1362 1.1 mrg
1363 1.1 mrg bool immediate_p = (GET_CODE (offset) == CONST_INT
1364 1.1 mrg /* Signed 13-bit immediate. */
1365 1.1 mrg && INTVAL (offset) >= -(1 << 12)
1366 1.1 mrg && INTVAL (offset) < (1 << 12)
1367 1.1 mrg /* The low bits of the offset are ignored, even
1368 1.1 mrg when they're meant to realign the pointer. */
1369 1.1 mrg && !(INTVAL (offset) & 0x3));
1370 1.1 mrg
1371 1.1 mrg if (!VECTOR_MODE_P (mode))
1372 1.1 mrg {
1373 1.1 mrg if ((gcn_address_register_p (base, DImode, strict)
1374 1.1 mrg || gcn_vec_address_register_p (base, DImode, strict))
1375 1.1 mrg && immediate_p)
1376 1.1 mrg /* SGPR + CONST or VGPR + CONST */
1377 1.1 mrg return true;
1378 1.1 mrg
1379 1.1 mrg if (gcn_address_register_p (base, DImode, strict)
1380 1.1 mrg && gcn_vgpr_register_operand (offset, SImode))
1381 1.1 mrg /* SGPR + VGPR */
1382 1.1 mrg return true;
1383 1.1 mrg
1384 1.1 mrg if (GET_CODE (base) == PLUS
1385 1.1 mrg && gcn_address_register_p (XEXP (base, 0), DImode, strict)
1386 1.1 mrg && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1387 1.1 mrg && immediate_p)
1388 1.1 mrg /* (SGPR + VGPR) + CONST */
1389 1.1 mrg return true;
1390 1.1 mrg }
1391 1.1 mrg else
1392 1.1 mrg {
1393 1.1 mrg if (gcn_address_register_p (base, DImode, strict)
1394 1.1 mrg && immediate_p)
1395 1.1 mrg /* SGPR + CONST */
1396 1.1 mrg return true;
1397 1.1 mrg }
1398 1.1 mrg }
1399 1.1 mrg else
1400 1.1 mrg return false;
1401 1.1 mrg }
1402 1.1 mrg else if (AS_ANY_DS_P (as))
1403 1.1 mrg switch (GET_CODE (x))
1404 1.1 mrg {
1405 1.1 mrg case REG:
1406 1.1 mrg return (VECTOR_MODE_P (mode)
1407 1.1 mrg ? gcn_address_register_p (x, SImode, strict)
1408 1.1 mrg : gcn_vec_address_register_p (x, SImode, strict));
1409 1.1 mrg /* Addresses are in the form BASE+OFFSET
1410 1.1 mrg OFFSET is either 20bit unsigned immediate, SGPR or M0.
1411 1.1 mrg Writes and atomics do not accept SGPR. */
1412 1.1 mrg case PLUS:
1413 1.1 mrg {
1414 1.1 mrg rtx x0 = XEXP (x, 0);
1415 1.1 mrg rtx x1 = XEXP (x, 1);
1416 1.1 mrg if (!gcn_vec_address_register_p (x0, DImode, strict))
1417 1.1 mrg return false;
1418 1.1 mrg if (GET_CODE (x1) == REG)
1419 1.1 mrg {
1420 1.1 mrg if (GET_CODE (x1) != REG
1421 1.1 mrg || (REGNO (x1) <= FIRST_PSEUDO_REGISTER
1422 1.1 mrg && !gcn_ssrc_register_operand (x1, DImode)))
1423 1.1 mrg return false;
1424 1.1 mrg }
1425 1.1 mrg else if (GET_CODE (x1) == CONST_VECTOR
1426 1.1 mrg && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT
1427 1.1 mrg && single_cst_vector_p (x1))
1428 1.1 mrg {
1429 1.1 mrg x1 = CONST_VECTOR_ELT (x1, 0);
1430 1.1 mrg if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20))
1431 1.1 mrg return true;
1432 1.1 mrg }
1433 1.1 mrg return false;
1434 1.1 mrg }
1435 1.1 mrg
1436 1.1 mrg default:
1437 1.1 mrg break;
1438 1.1 mrg }
1439 1.1 mrg else
1440 1.1 mrg gcc_unreachable ();
1441 1.1 mrg return false;
1442 1.1 mrg }
1443 1.1 mrg
1444 1.1 mrg /* Implement TARGET_ADDR_SPACE_POINTER_MODE.
1445 1.1 mrg
1446 1.1 mrg Return the appropriate mode for a named address pointer. */
1447 1.1 mrg
1448 1.1 mrg static scalar_int_mode
1449 1.1 mrg gcn_addr_space_pointer_mode (addr_space_t addrspace)
1450 1.1 mrg {
1451 1.1 mrg switch (addrspace)
1452 1.1 mrg {
1453 1.1 mrg case ADDR_SPACE_SCRATCH:
1454 1.1 mrg case ADDR_SPACE_LDS:
1455 1.1 mrg case ADDR_SPACE_GDS:
1456 1.1 mrg return SImode;
1457 1.1 mrg case ADDR_SPACE_DEFAULT:
1458 1.1 mrg case ADDR_SPACE_FLAT:
1459 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH:
1460 1.1 mrg case ADDR_SPACE_SCALAR_FLAT:
1461 1.1 mrg return DImode;
1462 1.1 mrg default:
1463 1.1 mrg gcc_unreachable ();
1464 1.1 mrg }
1465 1.1 mrg }
1466 1.1 mrg
1467 1.1 mrg /* Implement TARGET_ADDR_SPACE_ADDRESS_MODE.
1468 1.1 mrg
1469 1.1 mrg Return the appropriate mode for a named address space address. */
1470 1.1 mrg
1471 1.1 mrg static scalar_int_mode
1472 1.1 mrg gcn_addr_space_address_mode (addr_space_t addrspace)
1473 1.1 mrg {
1474 1.1 mrg return gcn_addr_space_pointer_mode (addrspace);
1475 1.1 mrg }
1476 1.1 mrg
1477 1.1 mrg /* Implement TARGET_ADDR_SPACE_SUBSET_P.
1478 1.1 mrg
1479 1.1 mrg Determine if one named address space is a subset of another. */
1480 1.1 mrg
1481 1.1 mrg static bool
1482 1.1 mrg gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
1483 1.1 mrg {
1484 1.1 mrg if (subset == superset)
1485 1.1 mrg return true;
1486 1.1 mrg /* FIXME is this true? */
1487 1.1 mrg if (AS_FLAT_P (superset) || AS_SCALAR_FLAT_P (superset))
1488 1.1 mrg return true;
1489 1.1 mrg return false;
1490 1.1 mrg }
1491 1.1 mrg
1492 1.1 mrg /* Convert from one address space to another. */
1493 1.1 mrg
1494 1.1 mrg static rtx
1495 1.1 mrg gcn_addr_space_convert (rtx op, tree from_type, tree to_type)
1496 1.1 mrg {
1497 1.1 mrg gcc_assert (POINTER_TYPE_P (from_type));
1498 1.1 mrg gcc_assert (POINTER_TYPE_P (to_type));
1499 1.1 mrg
1500 1.1 mrg addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
1501 1.1 mrg addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
1502 1.1 mrg
1503 1.1 mrg if (AS_LDS_P (as_from) && AS_FLAT_P (as_to))
1504 1.1 mrg {
1505 1.1 mrg rtx queue = gen_rtx_REG (DImode,
1506 1.1 mrg cfun->machine->args.reg[QUEUE_PTR_ARG]);
1507 1.1 mrg rtx group_seg_aperture_hi = gen_rtx_MEM (SImode,
1508 1.1 mrg gen_rtx_PLUS (DImode, queue,
1509 1.1 mrg gen_int_mode (64, SImode)));
1510 1.1 mrg rtx tmp = gen_reg_rtx (DImode);
1511 1.1 mrg
1512 1.1 mrg emit_move_insn (gen_lowpart (SImode, tmp), op);
1513 1.1 mrg emit_move_insn (gen_highpart_mode (SImode, DImode, tmp),
1514 1.1 mrg group_seg_aperture_hi);
1515 1.1 mrg
1516 1.1 mrg return tmp;
1517 1.1 mrg }
1518 1.1 mrg else if (as_from == as_to)
1519 1.1 mrg return op;
1520 1.1 mrg else
1521 1.1 mrg gcc_unreachable ();
1522 1.1 mrg }
1523 1.1 mrg
1524 1.1 mrg /* Implement TARGET_ADDR_SPACE_DEBUG.
1525 1.1 mrg
1526 1.1 mrg Return the dwarf address space class for each hardware address space. */
1527 1.1 mrg
1528 1.1 mrg static int
1529 1.1 mrg gcn_addr_space_debug (addr_space_t as)
1530 1.1 mrg {
1531 1.1 mrg switch (as)
1532 1.1 mrg {
1533 1.1 mrg case ADDR_SPACE_DEFAULT:
1534 1.1 mrg case ADDR_SPACE_FLAT:
1535 1.1 mrg case ADDR_SPACE_SCALAR_FLAT:
1536 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH:
1537 1.1 mrg return DW_ADDR_none;
1538 1.1 mrg case ADDR_SPACE_GLOBAL:
1539 1.1 mrg return 1; // DW_ADDR_LLVM_global
1540 1.1 mrg case ADDR_SPACE_LDS:
1541 1.1 mrg return 3; // DW_ADDR_LLVM_group
1542 1.1 mrg case ADDR_SPACE_SCRATCH:
1543 1.1 mrg return 4; // DW_ADDR_LLVM_private
1544 1.1 mrg case ADDR_SPACE_GDS:
1545 1.1 mrg return 0x8000; // DW_ADDR_AMDGPU_region
1546 1.1 mrg }
1547 1.1 mrg gcc_unreachable ();
1548 1.1 mrg }
1549 1.1 mrg
1550 1.1 mrg
1551 1.1 mrg /* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
1552 1.1 mrg
1553 1.1 mrg Retun true if REGNO is OK for memory adressing. */
1554 1.1 mrg
1555 1.1 mrg bool
1556 1.1 mrg gcn_regno_mode_code_ok_for_base_p (int regno,
1557 1.1 mrg machine_mode, addr_space_t as, int, int)
1558 1.1 mrg {
1559 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER)
1560 1.1 mrg {
1561 1.1 mrg if (reg_renumber)
1562 1.1 mrg regno = reg_renumber[regno];
1563 1.1 mrg else
1564 1.1 mrg return true;
1565 1.1 mrg }
1566 1.1 mrg if (AS_FLAT_P (as))
1567 1.1 mrg return (VGPR_REGNO_P (regno)
1568 1.1 mrg || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1569 1.1 mrg else if (AS_SCALAR_FLAT_P (as))
1570 1.1 mrg return (SGPR_REGNO_P (regno)
1571 1.1 mrg || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1572 1.1 mrg else if (AS_GLOBAL_P (as))
1573 1.1 mrg {
1574 1.1 mrg return (SGPR_REGNO_P (regno)
1575 1.1 mrg || VGPR_REGNO_P (regno)
1576 1.1 mrg || regno == ARG_POINTER_REGNUM
1577 1.1 mrg || regno == FRAME_POINTER_REGNUM);
1578 1.1 mrg }
1579 1.1 mrg else
1580 1.1 mrg /* For now. */
1581 1.1 mrg return false;
1582 1.1 mrg }
1583 1.1 mrg
1584 1.1 mrg /* Implement MODE_CODE_BASE_REG_CLASS via gcn.h.
1585 1.1 mrg
1586 1.1 mrg Return a suitable register class for memory addressing. */
1587 1.1 mrg
1588 1.1 mrg reg_class
1589 1.1 mrg gcn_mode_code_base_reg_class (machine_mode mode, addr_space_t as, int oc,
1590 1.1 mrg int ic)
1591 1.1 mrg {
1592 1.1 mrg switch (as)
1593 1.1 mrg {
1594 1.1 mrg case ADDR_SPACE_DEFAULT:
1595 1.1 mrg return gcn_mode_code_base_reg_class (mode, DEFAULT_ADDR_SPACE, oc, ic);
1596 1.1 mrg case ADDR_SPACE_SCALAR_FLAT:
1597 1.1 mrg case ADDR_SPACE_SCRATCH:
1598 1.1 mrg return SGPR_REGS;
1599 1.1 mrg break;
1600 1.1 mrg case ADDR_SPACE_FLAT:
1601 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH:
1602 1.1 mrg case ADDR_SPACE_LDS:
1603 1.1 mrg case ADDR_SPACE_GDS:
1604 1.1 mrg return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1605 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1606 1.1 mrg ? SGPR_REGS : VGPR_REGS);
1607 1.1 mrg case ADDR_SPACE_GLOBAL:
1608 1.1 mrg return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1609 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1610 1.1 mrg ? SGPR_REGS : ALL_GPR_REGS);
1611 1.1 mrg }
1612 1.1 mrg gcc_unreachable ();
1613 1.1 mrg }
1614 1.1 mrg
1615 1.1 mrg /* Implement REGNO_OK_FOR_INDEX_P via gcn.h.
1616 1.1 mrg
1617 1.1 mrg Return true if REGNO is OK for index of memory addressing. */
1618 1.1 mrg
1619 1.1 mrg bool
1620 1.1 mrg regno_ok_for_index_p (int regno)
1621 1.1 mrg {
1622 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER)
1623 1.1 mrg {
1624 1.1 mrg if (reg_renumber)
1625 1.1 mrg regno = reg_renumber[regno];
1626 1.1 mrg else
1627 1.1 mrg return true;
1628 1.1 mrg }
1629 1.1 mrg return regno == M0_REG || VGPR_REGNO_P (regno);
1630 1.1 mrg }
1631 1.1 mrg
1632 1.1 mrg /* Generate move which uses the exec flags. If EXEC is NULL, then it is
1633 1.1 mrg assumed that all lanes normally relevant to the mode of the move are
1634 1.1 mrg affected. If PREV is NULL, then a sensible default is supplied for
1635 1.1 mrg the inactive lanes. */
1636 1.1 mrg
1637 1.1 mrg static rtx
1638 1.1 mrg gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL)
1639 1.1 mrg {
1640 1.1 mrg machine_mode mode = GET_MODE (op0);
1641 1.1 mrg
1642 1.1 mrg if (vgpr_vector_mode_p (mode))
1643 1.1 mrg {
1644 1.1 mrg if (exec && exec != CONSTM1_RTX (DImode))
1645 1.1 mrg {
1646 1.1 mrg if (!prev)
1647 1.1 mrg prev = op0;
1648 1.1 mrg }
1649 1.1 mrg else
1650 1.1 mrg {
1651 1.1 mrg if (!prev)
1652 1.1 mrg prev = gcn_gen_undef (mode);
1653 1.1 mrg exec = gcn_full_exec_reg ();
1654 1.1 mrg }
1655 1.1 mrg
1656 1.1 mrg rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec));
1657 1.1 mrg
1658 1.1 mrg return gen_rtx_PARALLEL (VOIDmode,
1659 1.1 mrg gen_rtvec (2, set,
1660 1.1 mrg gen_rtx_CLOBBER (VOIDmode,
1661 1.1 mrg gen_rtx_SCRATCH (V64DImode))));
1662 1.1 mrg }
1663 1.1 mrg
1664 1.1 mrg return (gen_rtx_PARALLEL
1665 1.1 mrg (VOIDmode,
1666 1.1 mrg gen_rtvec (2, gen_rtx_SET (op0, op1),
1667 1.1 mrg gen_rtx_USE (VOIDmode,
1668 1.1 mrg exec ? exec : gcn_scalar_exec ()))));
1669 1.1 mrg }
1670 1.1 mrg
1671 1.1 mrg /* Generate masked move. */
1672 1.1 mrg
1673 1.1 mrg static rtx
1674 1.1 mrg gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL)
1675 1.1 mrg {
1676 1.1 mrg if (exec)
1677 1.1 mrg return (gen_rtx_SET (op0,
1678 1.1 mrg gen_rtx_VEC_MERGE (GET_MODE (op0),
1679 1.1 mrg gen_rtx_VEC_DUPLICATE (GET_MODE
1680 1.1 mrg (op0), op1),
1681 1.1 mrg op2, exec)));
1682 1.1 mrg else
1683 1.1 mrg return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1)));
1684 1.1 mrg }
1685 1.1 mrg
1686 1.1 mrg /* Expand vector init of OP0 by VEC.
1687 1.1 mrg Implements vec_init instruction pattern. */
1688 1.1 mrg
1689 1.1 mrg void
1690 1.1 mrg gcn_expand_vector_init (rtx op0, rtx vec)
1691 1.1 mrg {
1692 1.1 mrg int64_t initialized_mask = 0;
1693 1.1 mrg int64_t curr_mask = 1;
1694 1.1 mrg machine_mode mode = GET_MODE (op0);
1695 1.1 mrg
1696 1.1 mrg rtx val = XVECEXP (vec, 0, 0);
1697 1.1 mrg
1698 1.1 mrg for (int i = 1; i < 64; i++)
1699 1.1 mrg if (rtx_equal_p (val, XVECEXP (vec, 0, i)))
1700 1.1 mrg curr_mask |= (int64_t) 1 << i;
1701 1.1 mrg
1702 1.1 mrg if (gcn_constant_p (val))
1703 1.1 mrg emit_move_insn (op0, gcn_vec_constant (mode, val));
1704 1.1 mrg else
1705 1.1 mrg {
1706 1.1 mrg val = force_reg (GET_MODE_INNER (mode), val);
1707 1.1 mrg emit_insn (gen_duplicate_load (op0, val));
1708 1.1 mrg }
1709 1.1 mrg initialized_mask |= curr_mask;
1710 1.1 mrg for (int i = 1; i < 64; i++)
1711 1.1 mrg if (!(initialized_mask & ((int64_t) 1 << i)))
1712 1.1 mrg {
1713 1.1 mrg curr_mask = (int64_t) 1 << i;
1714 1.1 mrg rtx val = XVECEXP (vec, 0, i);
1715 1.1 mrg
1716 1.1 mrg for (int j = i + 1; j < 64; j++)
1717 1.1 mrg if (rtx_equal_p (val, XVECEXP (vec, 0, j)))
1718 1.1 mrg curr_mask |= (int64_t) 1 << j;
1719 1.1 mrg if (gcn_constant_p (val))
1720 1.1 mrg emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val),
1721 1.1 mrg get_exec (curr_mask)));
1722 1.1 mrg else
1723 1.1 mrg {
1724 1.1 mrg val = force_reg (GET_MODE_INNER (mode), val);
1725 1.1 mrg emit_insn (gen_duplicate_load (op0, val, op0,
1726 1.1 mrg get_exec (curr_mask)));
1727 1.1 mrg }
1728 1.1 mrg initialized_mask |= curr_mask;
1729 1.1 mrg }
1730 1.1 mrg }
1731 1.1 mrg
1732 1.1 mrg /* Load vector constant where n-th lane contains BASE+n*VAL. */
1733 1.1 mrg
1734 1.1 mrg static rtx
1735 1.1 mrg strided_constant (machine_mode mode, int base, int val)
1736 1.1 mrg {
1737 1.1 mrg rtx x = gen_reg_rtx (mode);
1738 1.1 mrg emit_move_insn (x, gcn_vec_constant (mode, base));
1739 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32),
1740 1.1 mrg x, get_exec (0xffffffff00000000)));
1741 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16),
1742 1.1 mrg x, get_exec (0xffff0000ffff0000)));
1743 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8),
1744 1.1 mrg x, get_exec (0xff00ff00ff00ff00)));
1745 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4),
1746 1.1 mrg x, get_exec (0xf0f0f0f0f0f0f0f0)));
1747 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2),
1748 1.1 mrg x, get_exec (0xcccccccccccccccc)));
1749 1.1 mrg emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1),
1750 1.1 mrg x, get_exec (0xaaaaaaaaaaaaaaaa)));
1751 1.1 mrg return x;
1752 1.1 mrg }
1753 1.1 mrg
1754 1.1 mrg /* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS. */
1755 1.1 mrg
1756 1.1 mrg static rtx
1757 1.1 mrg gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
1758 1.1 mrg addr_space_t as)
1759 1.1 mrg {
1760 1.1 mrg switch (as)
1761 1.1 mrg {
1762 1.1 mrg case ADDR_SPACE_DEFAULT:
1763 1.1 mrg return gcn_addr_space_legitimize_address (x, old, mode,
1764 1.1 mrg DEFAULT_ADDR_SPACE);
1765 1.1 mrg case ADDR_SPACE_SCALAR_FLAT:
1766 1.1 mrg case ADDR_SPACE_SCRATCH:
1767 1.1 mrg /* Instructions working on vectors need the address to be in
1768 1.1 mrg a register. */
1769 1.1 mrg if (vgpr_vector_mode_p (mode))
1770 1.1 mrg return force_reg (GET_MODE (x), x);
1771 1.1 mrg
1772 1.1 mrg return x;
1773 1.1 mrg case ADDR_SPACE_FLAT:
1774 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH:
1775 1.1 mrg case ADDR_SPACE_GLOBAL:
1776 1.1 mrg return TARGET_GCN3 ? force_reg (DImode, x) : x;
1777 1.1 mrg case ADDR_SPACE_LDS:
1778 1.1 mrg case ADDR_SPACE_GDS:
1779 1.1 mrg /* FIXME: LDS support offsets, handle them!. */
1780 1.1 mrg if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode)
1781 1.1 mrg {
1782 1.1 mrg rtx addrs = gen_reg_rtx (V64SImode);
1783 1.1 mrg rtx base = force_reg (SImode, x);
1784 1.1 mrg rtx offsets = strided_constant (V64SImode, 0,
1785 1.1 mrg GET_MODE_UNIT_SIZE (mode));
1786 1.1 mrg
1787 1.1 mrg emit_insn (gen_vec_duplicatev64si (addrs, base));
1788 1.1 mrg emit_insn (gen_addv64si3 (addrs, offsets, addrs));
1789 1.1 mrg return addrs;
1790 1.1 mrg }
1791 1.1 mrg return x;
1792 1.1 mrg }
1793 1.1 mrg gcc_unreachable ();
1794 1.1 mrg }
1795 1.1 mrg
1796 1.1 mrg /* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the
1797 1.1 mrg proper vector of stepped addresses.
1798 1.1 mrg
1799 1.1 mrg MEM will be a DImode address of a vector in an SGPR.
1800 1.1 mrg TMP will be a V64DImode VGPR pair or (scratch:V64DI). */
1801 1.1 mrg
1802 1.1 mrg rtx
1803 1.1 mrg gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
1804 1.1 mrg rtx tmp)
1805 1.1 mrg {
1806 1.1 mrg gcc_assert (MEM_P (mem));
1807 1.1 mrg rtx mem_base = XEXP (mem, 0);
1808 1.1 mrg rtx mem_index = NULL_RTX;
1809 1.1 mrg
1810 1.1 mrg if (!TARGET_GCN5_PLUS)
1811 1.1 mrg {
1812 1.1 mrg /* gcn_addr_space_legitimize_address should have put the address in a
1813 1.1 mrg register. If not, it is too late to do anything about it. */
1814 1.1 mrg gcc_assert (REG_P (mem_base));
1815 1.1 mrg }
1816 1.1 mrg
1817 1.1 mrg if (GET_CODE (mem_base) == PLUS)
1818 1.1 mrg {
1819 1.1 mrg mem_index = XEXP (mem_base, 1);
1820 1.1 mrg mem_base = XEXP (mem_base, 0);
1821 1.1 mrg }
1822 1.1 mrg
1823 1.1 mrg /* RF and RM base registers for vector modes should be always an SGPR. */
1824 1.1 mrg gcc_assert (SGPR_REGNO_P (REGNO (mem_base))
1825 1.1 mrg || REGNO (mem_base) >= FIRST_PSEUDO_REGISTER);
1826 1.1 mrg
1827 1.1 mrg machine_mode inner = GET_MODE_INNER (mode);
1828 1.1 mrg int shift = exact_log2 (GET_MODE_SIZE (inner));
1829 1.1 mrg rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
1830 1.1 mrg rtx undef_v64si = gcn_gen_undef (V64SImode);
1831 1.1 mrg rtx new_base = NULL_RTX;
1832 1.1 mrg addr_space_t as = MEM_ADDR_SPACE (mem);
1833 1.1 mrg
1834 1.1 mrg rtx tmplo = (REG_P (tmp)
1835 1.1 mrg ? gcn_operand_part (V64DImode, tmp, 0)
1836 1.1 mrg : gen_reg_rtx (V64SImode));
1837 1.1 mrg
1838 1.1 mrg /* tmplo[:] = ramp[:] << shift */
1839 1.1 mrg if (exec)
1840 1.1 mrg emit_insn (gen_ashlv64si3_exec (tmplo, ramp,
1841 1.1 mrg gen_int_mode (shift, SImode),
1842 1.1 mrg undef_v64si, exec));
1843 1.1 mrg else
1844 1.1 mrg emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode)));
1845 1.1 mrg
1846 1.1 mrg if (AS_FLAT_P (as))
1847 1.1 mrg {
1848 1.1 mrg rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG);
1849 1.1 mrg
1850 1.1 mrg if (REG_P (tmp))
1851 1.1 mrg {
1852 1.1 mrg rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
1853 1.1 mrg rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
1854 1.1 mrg rtx tmphi = gcn_operand_part (V64DImode, tmp, 1);
1855 1.1 mrg
1856 1.1 mrg /* tmphi[:] = mem_base_hi */
1857 1.1 mrg if (exec)
1858 1.1 mrg emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi,
1859 1.1 mrg undef_v64si, exec));
1860 1.1 mrg else
1861 1.1 mrg emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi));
1862 1.1 mrg
1863 1.1 mrg /* tmp[:] += zext (mem_base) */
1864 1.1 mrg if (exec)
1865 1.1 mrg {
1866 1.1 mrg emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo,
1867 1.1 mrg vcc, undef_v64si, exec));
1868 1.1 mrg emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx,
1869 1.1 mrg vcc, vcc, undef_v64si, exec));
1870 1.1 mrg }
1871 1.1 mrg else
1872 1.1 mrg emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
1873 1.1 mrg }
1874 1.1 mrg else
1875 1.1 mrg {
1876 1.1 mrg tmp = gen_reg_rtx (V64DImode);
1877 1.1 mrg if (exec)
1878 1.1 mrg emit_insn (gen_addv64di3_vcc_zext_dup2_exec
1879 1.1 mrg (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode),
1880 1.1 mrg exec));
1881 1.1 mrg else
1882 1.1 mrg emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc));
1883 1.1 mrg }
1884 1.1 mrg
1885 1.1 mrg new_base = tmp;
1886 1.1 mrg }
1887 1.1 mrg else if (AS_ANY_DS_P (as))
1888 1.1 mrg {
1889 1.1 mrg if (!exec)
1890 1.1 mrg emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base));
1891 1.1 mrg else
1892 1.1 mrg emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base,
1893 1.1 mrg gcn_gen_undef (V64SImode), exec));
1894 1.1 mrg new_base = tmplo;
1895 1.1 mrg }
1896 1.1 mrg else
1897 1.1 mrg {
1898 1.1 mrg mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base);
1899 1.1 mrg new_base = gen_rtx_PLUS (V64DImode, mem_base,
1900 1.1 mrg gen_rtx_SIGN_EXTEND (V64DImode, tmplo));
1901 1.1 mrg }
1902 1.1 mrg
1903 1.1 mrg return gen_rtx_PLUS (GET_MODE (new_base), new_base,
1904 1.1 mrg gen_rtx_VEC_DUPLICATE (GET_MODE (new_base),
1905 1.1 mrg (mem_index ? mem_index
1906 1.1 mrg : const0_rtx)));
1907 1.1 mrg }
1908 1.1 mrg
1909 1.1 mrg /* Convert a BASE address, a vector of OFFSETS, and a SCALE, to addresses
1910 1.1 mrg suitable for the given address space. This is indented for use in
1911 1.1 mrg gather/scatter patterns.
1912 1.1 mrg
1913 1.1 mrg The offsets may be signed or unsigned, according to UNSIGNED_P.
1914 1.1 mrg If EXEC is set then _exec patterns will be used, otherwise plain.
1915 1.1 mrg
1916 1.1 mrg Return values.
1917 1.1 mrg ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses.
1918 1.1 mrg ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */
1919 1.1 mrg
1920 1.1 mrg rtx
1921 1.1 mrg gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
1922 1.1 mrg bool unsigned_p, rtx exec)
1923 1.1 mrg {
1924 1.1 mrg rtx tmpsi = gen_reg_rtx (V64SImode);
1925 1.1 mrg rtx tmpdi = gen_reg_rtx (V64DImode);
1926 1.1 mrg rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL;
1927 1.1 mrg rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL;
1928 1.1 mrg
1929 1.1 mrg if (CONST_INT_P (scale)
1930 1.1 mrg && INTVAL (scale) > 0
1931 1.1 mrg && exact_log2 (INTVAL (scale)) >= 0)
1932 1.1 mrg emit_insn (gen_ashlv64si3 (tmpsi, offsets,
1933 1.1 mrg GEN_INT (exact_log2 (INTVAL (scale)))));
1934 1.1 mrg else
1935 1.1 mrg (exec
1936 1.1 mrg ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi,
1937 1.1 mrg exec))
1938 1.1 mrg : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale)));
1939 1.1 mrg
1940 1.1 mrg /* "Global" instructions do not support negative register offsets. */
1941 1.1 mrg if (as == ADDR_SPACE_FLAT || !unsigned_p)
1942 1.1 mrg {
1943 1.1 mrg if (unsigned_p)
1944 1.1 mrg (exec
1945 1.1 mrg ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base,
1946 1.1 mrg undefdi, exec))
1947 1.1 mrg : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base)));
1948 1.1 mrg else
1949 1.1 mrg (exec
1950 1.1 mrg ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base,
1951 1.1 mrg undefdi, exec))
1952 1.1 mrg : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base)));
1953 1.1 mrg return tmpdi;
1954 1.1 mrg }
1955 1.1 mrg else if (as == ADDR_SPACE_GLOBAL)
1956 1.1 mrg return tmpsi;
1957 1.1 mrg
1958 1.1 mrg gcc_unreachable ();
1959 1.1 mrg }
1960 1.1 mrg
1961 1.1 mrg /* Return true if move from OP0 to OP1 is known to be executed in vector
1962 1.1 mrg unit. */
1963 1.1 mrg
1964 1.1 mrg bool
1965 1.1 mrg gcn_vgpr_move_p (rtx op0, rtx op1)
1966 1.1 mrg {
1967 1.1 mrg if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1968 1.1 mrg return true;
1969 1.1 mrg if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1970 1.1 mrg return true;
1971 1.1 mrg return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0)))
1972 1.1 mrg || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1)))
1973 1.1 mrg || vgpr_vector_mode_p (GET_MODE (op0)));
1974 1.1 mrg }
1975 1.1 mrg
1976 1.1 mrg /* Return true if move from OP0 to OP1 is known to be executed in scalar
1977 1.1 mrg unit. Used in the machine description. */
1978 1.1 mrg
1979 1.1 mrg bool
1980 1.1 mrg gcn_sgpr_move_p (rtx op0, rtx op1)
1981 1.1 mrg {
1982 1.1 mrg if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1983 1.1 mrg return true;
1984 1.1 mrg if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1985 1.1 mrg return true;
1986 1.1 mrg if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER
1987 1.1 mrg || VGPR_REGNO_P (REGNO (op0)))
1988 1.1 mrg return false;
1989 1.1 mrg if (REG_P (op1)
1990 1.1 mrg && REGNO (op1) < FIRST_PSEUDO_REGISTER
1991 1.1 mrg && !VGPR_REGNO_P (REGNO (op1)))
1992 1.1 mrg return true;
1993 1.1 mrg return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
1994 1.1 mrg }
1995 1.1 mrg
1996 1.1 mrg /* Implement TARGET_SECONDARY_RELOAD.
1997 1.1 mrg
1998 1.1 mrg The address space determines which registers can be used for loads and
1999 1.1 mrg stores. */
2000 1.1 mrg
2001 1.1 mrg static reg_class_t
2002 1.1 mrg gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
2003 1.1 mrg machine_mode reload_mode, secondary_reload_info *sri)
2004 1.1 mrg {
2005 1.1 mrg reg_class_t result = NO_REGS;
2006 1.1 mrg bool spilled_pseudo =
2007 1.1 mrg (REG_P (x) || GET_CODE (x) == SUBREG) && true_regnum (x) == -1;
2008 1.1 mrg
2009 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
2010 1.1 mrg {
2011 1.1 mrg fprintf (dump_file, "gcn_secondary_reload: ");
2012 1.1 mrg dump_value_slim (dump_file, x, 1);
2013 1.1 mrg fprintf (dump_file, " %s %s:%s", (in_p ? "->" : "<-"),
2014 1.1 mrg reg_class_names[rclass], GET_MODE_NAME (reload_mode));
2015 1.1 mrg if (REG_P (x) || GET_CODE (x) == SUBREG)
2016 1.1 mrg fprintf (dump_file, " (true regnum: %d \"%s\")", true_regnum (x),
2017 1.1 mrg (true_regnum (x) >= 0
2018 1.1 mrg && true_regnum (x) < FIRST_PSEUDO_REGISTER
2019 1.1 mrg ? reg_names[true_regnum (x)]
2020 1.1 mrg : (spilled_pseudo ? "stack spill" : "??")));
2021 1.1 mrg fprintf (dump_file, "\n");
2022 1.1 mrg }
2023 1.1 mrg
2024 1.1 mrg /* Some callers don't use or initialize icode. */
2025 1.1 mrg sri->icode = CODE_FOR_nothing;
2026 1.1 mrg
2027 1.1 mrg if (MEM_P (x) || spilled_pseudo)
2028 1.1 mrg {
2029 1.1 mrg addr_space_t as = DEFAULT_ADDR_SPACE;
2030 1.1 mrg
2031 1.1 mrg /* If we have a spilled pseudo, we can't find the address space
2032 1.1 mrg directly, but we know it's in ADDR_SPACE_FLAT space for GCN3 or
2033 1.1 mrg ADDR_SPACE_GLOBAL for GCN5. */
2034 1.1 mrg if (MEM_P (x))
2035 1.1 mrg as = MEM_ADDR_SPACE (x);
2036 1.1 mrg
2037 1.1 mrg if (as == ADDR_SPACE_DEFAULT)
2038 1.1 mrg as = DEFAULT_ADDR_SPACE;
2039 1.1 mrg
2040 1.1 mrg switch (as)
2041 1.1 mrg {
2042 1.1 mrg case ADDR_SPACE_SCALAR_FLAT:
2043 1.1 mrg result =
2044 1.1 mrg ((!MEM_P (x) || rclass == SGPR_REGS) ? NO_REGS : SGPR_REGS);
2045 1.1 mrg break;
2046 1.1 mrg case ADDR_SPACE_FLAT:
2047 1.1 mrg case ADDR_SPACE_FLAT_SCRATCH:
2048 1.1 mrg case ADDR_SPACE_GLOBAL:
2049 1.1 mrg if (GET_MODE_CLASS (reload_mode) == MODE_VECTOR_INT
2050 1.1 mrg || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT)
2051 1.1 mrg {
2052 1.1 mrg if (in_p)
2053 1.1 mrg switch (reload_mode)
2054 1.1 mrg {
2055 1.1 mrg case E_V64SImode:
2056 1.1 mrg sri->icode = CODE_FOR_reload_inv64si;
2057 1.1 mrg break;
2058 1.1 mrg case E_V64SFmode:
2059 1.1 mrg sri->icode = CODE_FOR_reload_inv64sf;
2060 1.1 mrg break;
2061 1.1 mrg case E_V64HImode:
2062 1.1 mrg sri->icode = CODE_FOR_reload_inv64hi;
2063 1.1 mrg break;
2064 1.1 mrg case E_V64HFmode:
2065 1.1 mrg sri->icode = CODE_FOR_reload_inv64hf;
2066 1.1 mrg break;
2067 1.1 mrg case E_V64QImode:
2068 1.1 mrg sri->icode = CODE_FOR_reload_inv64qi;
2069 1.1 mrg break;
2070 1.1 mrg case E_V64DImode:
2071 1.1 mrg sri->icode = CODE_FOR_reload_inv64di;
2072 1.1 mrg break;
2073 1.1 mrg case E_V64DFmode:
2074 1.1 mrg sri->icode = CODE_FOR_reload_inv64df;
2075 1.1 mrg break;
2076 1.1 mrg default:
2077 1.1 mrg gcc_unreachable ();
2078 1.1 mrg }
2079 1.1 mrg else
2080 1.1 mrg switch (reload_mode)
2081 1.1 mrg {
2082 1.1 mrg case E_V64SImode:
2083 1.1 mrg sri->icode = CODE_FOR_reload_outv64si;
2084 1.1 mrg break;
2085 1.1 mrg case E_V64SFmode:
2086 1.1 mrg sri->icode = CODE_FOR_reload_outv64sf;
2087 1.1 mrg break;
2088 1.1 mrg case E_V64HImode:
2089 1.1 mrg sri->icode = CODE_FOR_reload_outv64hi;
2090 1.1 mrg break;
2091 1.1 mrg case E_V64HFmode:
2092 1.1 mrg sri->icode = CODE_FOR_reload_outv64hf;
2093 1.1 mrg break;
2094 1.1 mrg case E_V64QImode:
2095 1.1 mrg sri->icode = CODE_FOR_reload_outv64qi;
2096 1.1 mrg break;
2097 1.1 mrg case E_V64DImode:
2098 1.1 mrg sri->icode = CODE_FOR_reload_outv64di;
2099 1.1 mrg break;
2100 1.1 mrg case E_V64DFmode:
2101 1.1 mrg sri->icode = CODE_FOR_reload_outv64df;
2102 1.1 mrg break;
2103 1.1 mrg default:
2104 1.1 mrg gcc_unreachable ();
2105 1.1 mrg }
2106 1.1 mrg break;
2107 1.1 mrg }
2108 1.1 mrg /* Fallthrough. */
2109 1.1 mrg case ADDR_SPACE_LDS:
2110 1.1 mrg case ADDR_SPACE_GDS:
2111 1.1 mrg case ADDR_SPACE_SCRATCH:
2112 1.1 mrg result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
2113 1.1 mrg break;
2114 1.1 mrg }
2115 1.1 mrg }
2116 1.1 mrg
2117 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
2118 1.1 mrg fprintf (dump_file, " <= %s (icode: %s)\n", reg_class_names[result],
2119 1.1 mrg get_insn_name (sri->icode));
2120 1.1 mrg
2121 1.1 mrg return result;
2122 1.1 mrg }
2123 1.1 mrg
2124 1.1 mrg /* Update register usage after having seen the compiler flags and kernel
2125 1.1 mrg attributes. We typically want to fix registers that contain values
2126 1.1 mrg set by the HSA runtime. */
2127 1.1 mrg
2128 1.1 mrg static void
2129 1.1 mrg gcn_conditional_register_usage (void)
2130 1.1 mrg {
2131 1.1 mrg if (!cfun || !cfun->machine)
2132 1.1 mrg return;
2133 1.1 mrg
2134 1.1 mrg if (cfun->machine->normal_function)
2135 1.1 mrg {
2136 1.1 mrg /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
2137 1.1 mrg for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT);
2138 1.1 mrg i <= LAST_SGPR_REG; i++)
2139 1.1 mrg fixed_regs[i] = 1, call_used_regs[i] = 1;
2140 1.1 mrg
2141 1.1 mrg for (int i = VGPR_REGNO (MAX_NORMAL_VGPR_COUNT);
2142 1.1 mrg i <= LAST_VGPR_REG; i++)
2143 1.1 mrg fixed_regs[i] = 1, call_used_regs[i] = 1;
2144 1.1 mrg
2145 1.1 mrg return;
2146 1.1 mrg }
2147 1.1 mrg
2148 1.1 mrg /* If the set of requested args is the default set, nothing more needs to
2149 1.1 mrg be done. */
2150 1.1 mrg if (cfun->machine->args.requested == default_requested_args)
2151 1.1 mrg return;
2152 1.1 mrg
2153 1.1 mrg /* Requesting a set of args different from the default violates the ABI. */
2154 1.1 mrg if (!leaf_function_p ())
2155 1.1 mrg warning (0, "A non-default set of initial values has been requested, "
2156 1.1 mrg "which violates the ABI");
2157 1.1 mrg
2158 1.1 mrg for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
2159 1.1 mrg fixed_regs[i] = 0;
2160 1.1 mrg
2161 1.1 mrg /* Fix the runtime argument register containing values that may be
2162 1.1 mrg needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
2163 1.1 mrg needed after the prologue so there's no need to fix them. */
2164 1.1 mrg if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0)
2165 1.1 mrg fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
2166 1.1 mrg if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
2167 1.1 mrg {
2168 1.1 mrg /* The upper 32-bits of the 64-bit descriptor are not used, so allow
2169 1.1 mrg the containing registers to be used for other purposes. */
2170 1.1 mrg fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
2171 1.1 mrg fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
2172 1.1 mrg }
2173 1.1 mrg if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
2174 1.1 mrg {
2175 1.1 mrg fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG]] = 1;
2176 1.1 mrg fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] + 1] = 1;
2177 1.1 mrg }
2178 1.1 mrg if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0)
2179 1.1 mrg {
2180 1.1 mrg fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG]] = 1;
2181 1.1 mrg fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG] + 1] = 1;
2182 1.1 mrg }
2183 1.1 mrg if (cfun->machine->args.reg[WORKGROUP_ID_X_ARG] >= 0)
2184 1.1 mrg fixed_regs[cfun->machine->args.reg[WORKGROUP_ID_X_ARG]] = 1;
2185 1.1 mrg if (cfun->machine->args.reg[WORK_ITEM_ID_X_ARG] >= 0)
2186 1.1 mrg fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_X_ARG]] = 1;
2187 1.1 mrg if (cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG] >= 0)
2188 1.1 mrg fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG]] = 1;
2189 1.1 mrg if (cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG] >= 0)
2190 1.1 mrg fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
2191 1.1 mrg }
2192 1.1 mrg
2193 1.1 mrg /* Determine if a load or store is valid, according to the register classes
2194 1.1 mrg and address space. Used primarily by the machine description to decide
2195 1.1 mrg when to split a move into two steps. */
2196 1.1 mrg
2197 1.1 mrg bool
2198 1.1 mrg gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
2199 1.1 mrg {
2200 1.1 mrg if (!MEM_P (dest) && !MEM_P (src))
2201 1.1 mrg return true;
2202 1.1 mrg
2203 1.1 mrg if (MEM_P (dest)
2204 1.1 mrg && AS_FLAT_P (MEM_ADDR_SPACE (dest))
2205 1.1 mrg && (gcn_flat_address_p (XEXP (dest, 0), mode)
2206 1.1 mrg || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2207 1.1 mrg || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2208 1.1 mrg && gcn_vgpr_register_operand (src, mode))
2209 1.1 mrg return true;
2210 1.1 mrg else if (MEM_P (src)
2211 1.1 mrg && AS_FLAT_P (MEM_ADDR_SPACE (src))
2212 1.1 mrg && (gcn_flat_address_p (XEXP (src, 0), mode)
2213 1.1 mrg || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2214 1.1 mrg || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2215 1.1 mrg && gcn_vgpr_register_operand (dest, mode))
2216 1.1 mrg return true;
2217 1.1 mrg
2218 1.1 mrg if (MEM_P (dest)
2219 1.1 mrg && AS_GLOBAL_P (MEM_ADDR_SPACE (dest))
2220 1.1 mrg && (gcn_global_address_p (XEXP (dest, 0))
2221 1.1 mrg || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2222 1.1 mrg || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2223 1.1 mrg && gcn_vgpr_register_operand (src, mode))
2224 1.1 mrg return true;
2225 1.1 mrg else if (MEM_P (src)
2226 1.1 mrg && AS_GLOBAL_P (MEM_ADDR_SPACE (src))
2227 1.1 mrg && (gcn_global_address_p (XEXP (src, 0))
2228 1.1 mrg || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2229 1.1 mrg || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2230 1.1 mrg && gcn_vgpr_register_operand (dest, mode))
2231 1.1 mrg return true;
2232 1.1 mrg
2233 1.1 mrg if (MEM_P (dest)
2234 1.1 mrg && MEM_ADDR_SPACE (dest) == ADDR_SPACE_SCALAR_FLAT
2235 1.1 mrg && (gcn_scalar_flat_address_p (XEXP (dest, 0))
2236 1.1 mrg || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2237 1.1 mrg || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2238 1.1 mrg && gcn_ssrc_register_operand (src, mode))
2239 1.1 mrg return true;
2240 1.1 mrg else if (MEM_P (src)
2241 1.1 mrg && MEM_ADDR_SPACE (src) == ADDR_SPACE_SCALAR_FLAT
2242 1.1 mrg && (gcn_scalar_flat_address_p (XEXP (src, 0))
2243 1.1 mrg || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2244 1.1 mrg || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2245 1.1 mrg && gcn_sdst_register_operand (dest, mode))
2246 1.1 mrg return true;
2247 1.1 mrg
2248 1.1 mrg if (MEM_P (dest)
2249 1.1 mrg && AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
2250 1.1 mrg && gcn_ds_address_p (XEXP (dest, 0))
2251 1.1 mrg && gcn_vgpr_register_operand (src, mode))
2252 1.1 mrg return true;
2253 1.1 mrg else if (MEM_P (src)
2254 1.1 mrg && AS_ANY_DS_P (MEM_ADDR_SPACE (src))
2255 1.1 mrg && gcn_ds_address_p (XEXP (src, 0))
2256 1.1 mrg && gcn_vgpr_register_operand (dest, mode))
2257 1.1 mrg return true;
2258 1.1 mrg
2259 1.1 mrg return false;
2260 1.1 mrg }
2261 1.1 mrg
2262 1.1 mrg /* }}} */
2263 1.1 mrg /* {{{ Functions and ABI. */
2264 1.1 mrg
2265 1.1 mrg /* Implement TARGET_FUNCTION_VALUE.
2266 1.1 mrg
2267 1.1 mrg Define how to find the value returned by a function.
2268 1.1 mrg The register location is always the same, but the mode depends on
2269 1.1 mrg VALTYPE. */
2270 1.1 mrg
2271 1.1 mrg static rtx
2272 1.1 mrg gcn_function_value (const_tree valtype, const_tree, bool)
2273 1.1 mrg {
2274 1.1 mrg machine_mode mode = TYPE_MODE (valtype);
2275 1.1 mrg
2276 1.1 mrg if (INTEGRAL_TYPE_P (valtype)
2277 1.1 mrg && GET_MODE_CLASS (mode) == MODE_INT
2278 1.1 mrg && GET_MODE_SIZE (mode) < 4)
2279 1.1 mrg mode = SImode;
2280 1.1 mrg
2281 1.1 mrg return gen_rtx_REG (mode, SGPR_REGNO (RETURN_VALUE_REG));
2282 1.1 mrg }
2283 1.1 mrg
2284 1.1 mrg /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
2285 1.1 mrg
2286 1.1 mrg Return true if N is a possible register number for the function return
2287 1.1 mrg value. */
2288 1.1 mrg
2289 1.1 mrg static bool
2290 1.1 mrg gcn_function_value_regno_p (const unsigned int n)
2291 1.1 mrg {
2292 1.1 mrg return n == RETURN_VALUE_REG;
2293 1.1 mrg }
2294 1.1 mrg
2295 1.1 mrg /* Calculate the number of registers required to hold function argument
2296 1.1 mrg ARG. */
2297 1.1 mrg
2298 1.1 mrg static int
2299 1.1 mrg num_arg_regs (const function_arg_info &arg)
2300 1.1 mrg {
2301 1.1 mrg if (targetm.calls.must_pass_in_stack (arg))
2302 1.1 mrg return 0;
2303 1.1 mrg
2304 1.1 mrg int size = arg.promoted_size_in_bytes ();
2305 1.1 mrg return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2306 1.1 mrg }
2307 1.1 mrg
2308 1.1 mrg /* Implement TARGET_STRICT_ARGUMENT_NAMING.
2309 1.1 mrg
2310 1.1 mrg Return true if the location where a function argument is passed
2311 1.1 mrg depends on whether or not it is a named argument
2312 1.1 mrg
2313 1.1 mrg For gcn, we know how to handle functions declared as stdarg: by
2314 1.1 mrg passing an extra pointer to the unnamed arguments. However, the
2315 1.1 mrg Fortran frontend can produce a different situation, where a
2316 1.1 mrg function pointer is declared with no arguments, but the actual
2317 1.1 mrg function and calls to it take more arguments. In that case, we
2318 1.1 mrg want to ensure the call matches the definition of the function. */
2319 1.1 mrg
2320 1.1 mrg static bool
2321 1.1 mrg gcn_strict_argument_naming (cumulative_args_t cum_v)
2322 1.1 mrg {
2323 1.1 mrg CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2324 1.1 mrg
2325 1.1 mrg return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
2326 1.1 mrg }
2327 1.1 mrg
2328 1.1 mrg /* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED.
2329 1.1 mrg
2330 1.1 mrg See comment on gcn_strict_argument_naming. */
2331 1.1 mrg
2332 1.1 mrg static bool
2333 1.1 mrg gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v)
2334 1.1 mrg {
2335 1.1 mrg return !gcn_strict_argument_naming (cum_v);
2336 1.1 mrg }
2337 1.1 mrg
2338 1.1 mrg /* Implement TARGET_FUNCTION_ARG.
2339 1.1 mrg
2340 1.1 mrg Return an RTX indicating whether a function argument is passed in a register
2341 1.1 mrg and if so, which register. */
2342 1.1 mrg
2343 1.1 mrg static rtx
2344 1.1 mrg gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
2345 1.1 mrg {
2346 1.1 mrg CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2347 1.1 mrg if (cum->normal_function)
2348 1.1 mrg {
2349 1.1 mrg if (!arg.named || arg.end_marker_p ())
2350 1.1 mrg return 0;
2351 1.1 mrg
2352 1.1 mrg if (targetm.calls.must_pass_in_stack (arg))
2353 1.1 mrg return 0;
2354 1.1 mrg
2355 1.1 mrg /* Vector parameters are not supported yet. */
2356 1.1 mrg if (VECTOR_MODE_P (arg.mode))
2357 1.1 mrg return 0;
2358 1.1 mrg
2359 1.1 mrg int reg_num = FIRST_PARM_REG + cum->num;
2360 1.1 mrg int num_regs = num_arg_regs (arg);
2361 1.1 mrg if (num_regs > 0)
2362 1.1 mrg while (reg_num % num_regs != 0)
2363 1.1 mrg reg_num++;
2364 1.1 mrg if (reg_num + num_regs <= FIRST_PARM_REG + NUM_PARM_REGS)
2365 1.1 mrg return gen_rtx_REG (arg.mode, reg_num);
2366 1.1 mrg }
2367 1.1 mrg else
2368 1.1 mrg {
2369 1.1 mrg if (cum->num >= cum->args.nargs)
2370 1.1 mrg {
2371 1.1 mrg cum->offset = (cum->offset + TYPE_ALIGN (arg.type) / 8 - 1)
2372 1.1 mrg & -(TYPE_ALIGN (arg.type) / 8);
2373 1.1 mrg cfun->machine->kernarg_segment_alignment
2374 1.1 mrg = MAX ((unsigned) cfun->machine->kernarg_segment_alignment,
2375 1.1 mrg TYPE_ALIGN (arg.type) / 8);
2376 1.1 mrg rtx addr = gen_rtx_REG (DImode,
2377 1.1 mrg cum->args.reg[KERNARG_SEGMENT_PTR_ARG]);
2378 1.1 mrg if (cum->offset)
2379 1.1 mrg addr = gen_rtx_PLUS (DImode, addr,
2380 1.1 mrg gen_int_mode (cum->offset, DImode));
2381 1.1 mrg rtx mem = gen_rtx_MEM (arg.mode, addr);
2382 1.1 mrg set_mem_attributes (mem, arg.type, 1);
2383 1.1 mrg set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT);
2384 1.1 mrg MEM_READONLY_P (mem) = 1;
2385 1.1 mrg return mem;
2386 1.1 mrg }
2387 1.1 mrg
2388 1.1 mrg int a = cum->args.order[cum->num];
2389 1.1 mrg if (arg.mode != gcn_kernel_arg_types[a].mode)
2390 1.1 mrg {
2391 1.1 mrg error ("wrong type of argument %s", gcn_kernel_arg_types[a].name);
2392 1.1 mrg return 0;
2393 1.1 mrg }
2394 1.1 mrg return gen_rtx_REG ((machine_mode) gcn_kernel_arg_types[a].mode,
2395 1.1 mrg cum->args.reg[a]);
2396 1.1 mrg }
2397 1.1 mrg return 0;
2398 1.1 mrg }
2399 1.1 mrg
2400 1.1 mrg /* Implement TARGET_FUNCTION_ARG_ADVANCE.
2401 1.1 mrg
2402 1.1 mrg Updates the summarizer variable pointed to by CUM_V to advance past an
2403 1.1 mrg argument in the argument list. */
2404 1.1 mrg
2405 1.1 mrg static void
2406 1.1 mrg gcn_function_arg_advance (cumulative_args_t cum_v,
2407 1.1 mrg const function_arg_info &arg)
2408 1.1 mrg {
2409 1.1 mrg CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2410 1.1 mrg
2411 1.1 mrg if (cum->normal_function)
2412 1.1 mrg {
2413 1.1 mrg if (!arg.named)
2414 1.1 mrg return;
2415 1.1 mrg
2416 1.1 mrg int num_regs = num_arg_regs (arg);
2417 1.1 mrg if (num_regs > 0)
2418 1.1 mrg while ((FIRST_PARM_REG + cum->num) % num_regs != 0)
2419 1.1 mrg cum->num++;
2420 1.1 mrg cum->num += num_regs;
2421 1.1 mrg }
2422 1.1 mrg else
2423 1.1 mrg {
2424 1.1 mrg if (cum->num < cum->args.nargs)
2425 1.1 mrg cum->num++;
2426 1.1 mrg else
2427 1.1 mrg {
2428 1.1 mrg cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (arg.type));
2429 1.1 mrg cfun->machine->kernarg_segment_byte_size = cum->offset;
2430 1.1 mrg }
2431 1.1 mrg }
2432 1.1 mrg }
2433 1.1 mrg
2434 1.1 mrg /* Implement TARGET_ARG_PARTIAL_BYTES.
2435 1.1 mrg
2436 1.1 mrg Returns the number of bytes at the beginning of an argument that must be put
2437 1.1 mrg in registers. The value must be zero for arguments that are passed entirely
2438 1.1 mrg in registers or that are entirely pushed on the stack. */
2439 1.1 mrg
2440 1.1 mrg static int
2441 1.1 mrg gcn_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
2442 1.1 mrg {
2443 1.1 mrg CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2444 1.1 mrg
2445 1.1 mrg if (!arg.named)
2446 1.1 mrg return 0;
2447 1.1 mrg
2448 1.1 mrg if (targetm.calls.must_pass_in_stack (arg))
2449 1.1 mrg return 0;
2450 1.1 mrg
2451 1.1 mrg if (cum->num >= NUM_PARM_REGS)
2452 1.1 mrg return 0;
2453 1.1 mrg
2454 1.1 mrg /* If the argument fits entirely in registers, return 0. */
2455 1.1 mrg if (cum->num + num_arg_regs (arg) <= NUM_PARM_REGS)
2456 1.1 mrg return 0;
2457 1.1 mrg
2458 1.1 mrg return (NUM_PARM_REGS - cum->num) * UNITS_PER_WORD;
2459 1.1 mrg }
2460 1.1 mrg
2461 1.1 mrg /* A normal function which takes a pointer argument may be passed a pointer to
2462 1.1 mrg LDS space (via a high-bits-set aperture), and that only works with FLAT
2463 1.1 mrg addressing, not GLOBAL. Force FLAT addressing if the function has an
2464 1.1 mrg incoming pointer parameter. NOTE: This is a heuristic that works in the
2465 1.1 mrg offloading case, but in general, a function might read global pointer
2466 1.1 mrg variables, etc. that may refer to LDS space or other special memory areas
2467 1.1 mrg not supported by GLOBAL instructions, and then this argument check would not
2468 1.1 mrg suffice. */
2469 1.1 mrg
2470 1.1 mrg static void
2471 1.1 mrg gcn_detect_incoming_pointer_arg (tree fndecl)
2472 1.1 mrg {
2473 1.1 mrg gcc_assert (cfun && cfun->machine);
2474 1.1 mrg
2475 1.1 mrg for (tree arg = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
2476 1.1 mrg arg;
2477 1.1 mrg arg = TREE_CHAIN (arg))
2478 1.1 mrg if (POINTER_TYPE_P (TREE_VALUE (arg)))
2479 1.1 mrg cfun->machine->use_flat_addressing = true;
2480 1.1 mrg }
2481 1.1 mrg
2482 1.1 mrg /* Implement INIT_CUMULATIVE_ARGS, via gcn.h.
2483 1.1 mrg
2484 1.1 mrg Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
2485 1.1 mrg whose data type is FNTYPE. For a library call, FNTYPE is 0. */
2486 1.1 mrg
2487 1.1 mrg void
2488 1.1 mrg gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ ,
2489 1.1 mrg tree fntype /* tree ptr for function decl */ ,
2490 1.1 mrg rtx libname /* SYMBOL_REF of library name or 0 */ ,
2491 1.1 mrg tree fndecl, int caller)
2492 1.1 mrg {
2493 1.1 mrg memset (cum, 0, sizeof (*cum));
2494 1.1 mrg cum->fntype = fntype;
2495 1.1 mrg if (libname)
2496 1.1 mrg {
2497 1.1 mrg gcc_assert (cfun && cfun->machine);
2498 1.1 mrg cum->normal_function = true;
2499 1.1 mrg if (!caller)
2500 1.1 mrg {
2501 1.1 mrg cfun->machine->normal_function = true;
2502 1.1 mrg gcn_detect_incoming_pointer_arg (fndecl);
2503 1.1 mrg }
2504 1.1 mrg return;
2505 1.1 mrg }
2506 1.1 mrg tree attr = NULL;
2507 1.1 mrg if (fndecl)
2508 1.1 mrg attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl));
2509 1.1 mrg if (fndecl && !attr)
2510 1.1 mrg attr = lookup_attribute ("amdgpu_hsa_kernel",
2511 1.1 mrg TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
2512 1.1 mrg if (!attr && fntype)
2513 1.1 mrg attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype));
2514 1.1 mrg /* Handle main () as kernel, so we can run testsuite.
2515 1.1 mrg Handle OpenACC kernels similarly to main. */
2516 1.1 mrg if (!attr && !caller && fndecl
2517 1.1 mrg && (MAIN_NAME_P (DECL_NAME (fndecl))
2518 1.1 mrg || lookup_attribute ("omp target entrypoint",
2519 1.1 mrg DECL_ATTRIBUTES (fndecl)) != NULL_TREE))
2520 1.1 mrg gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE);
2521 1.1 mrg else
2522 1.1 mrg {
2523 1.1 mrg if (!attr || caller)
2524 1.1 mrg {
2525 1.1 mrg gcc_assert (cfun && cfun->machine);
2526 1.1 mrg cum->normal_function = true;
2527 1.1 mrg if (!caller)
2528 1.1 mrg cfun->machine->normal_function = true;
2529 1.1 mrg }
2530 1.1 mrg gcn_parse_amdgpu_hsa_kernel_attribute
2531 1.1 mrg (&cum->args, attr ? TREE_VALUE (attr) : NULL_TREE);
2532 1.1 mrg }
2533 1.1 mrg cfun->machine->args = cum->args;
2534 1.1 mrg if (!caller && cfun->machine->normal_function)
2535 1.1 mrg gcn_detect_incoming_pointer_arg (fndecl);
2536 1.1 mrg
2537 1.1 mrg reinit_regs ();
2538 1.1 mrg }
2539 1.1 mrg
2540 1.1 mrg static bool
2541 1.1 mrg gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
2542 1.1 mrg {
2543 1.1 mrg machine_mode mode = TYPE_MODE (type);
2544 1.1 mrg HOST_WIDE_INT size = int_size_in_bytes (type);
2545 1.1 mrg
2546 1.1 mrg if (AGGREGATE_TYPE_P (type))
2547 1.1 mrg return true;
2548 1.1 mrg
2549 1.1 mrg /* Vector return values are not supported yet. */
2550 1.1 mrg if (VECTOR_TYPE_P (type))
2551 1.1 mrg return true;
2552 1.1 mrg
2553 1.1 mrg if (mode == BLKmode)
2554 1.1 mrg return true;
2555 1.1 mrg
2556 1.1 mrg if (size > 2 * UNITS_PER_WORD)
2557 1.1 mrg return true;
2558 1.1 mrg
2559 1.1 mrg return false;
2560 1.1 mrg }
2561 1.1 mrg
2562 1.1 mrg /* Implement TARGET_PROMOTE_FUNCTION_MODE.
2563 1.1 mrg
2564 1.1 mrg Return the mode to use for outgoing function arguments. */
2565 1.1 mrg
2566 1.1 mrg machine_mode
2567 1.1 mrg gcn_promote_function_mode (const_tree ARG_UNUSED (type), machine_mode mode,
2568 1.1 mrg int *ARG_UNUSED (punsignedp),
2569 1.1 mrg const_tree ARG_UNUSED (funtype),
2570 1.1 mrg int ARG_UNUSED (for_return))
2571 1.1 mrg {
2572 1.1 mrg if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4)
2573 1.1 mrg return SImode;
2574 1.1 mrg
2575 1.1 mrg return mode;
2576 1.1 mrg }
2577 1.1 mrg
2578 1.1 mrg /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
2579 1.1 mrg
2580 1.1 mrg Derived from hppa_gimplify_va_arg_expr. The generic routine doesn't handle
2581 1.1 mrg ARGS_GROW_DOWNWARDS. */
2582 1.1 mrg
2583 1.1 mrg static tree
2584 1.1 mrg gcn_gimplify_va_arg_expr (tree valist, tree type,
2585 1.1 mrg gimple_seq *ARG_UNUSED (pre_p),
2586 1.1 mrg gimple_seq *ARG_UNUSED (post_p))
2587 1.1 mrg {
2588 1.1 mrg tree ptr = build_pointer_type (type);
2589 1.1 mrg tree valist_type;
2590 1.1 mrg tree t, u;
2591 1.1 mrg bool indirect;
2592 1.1 mrg
2593 1.1 mrg indirect = pass_va_arg_by_reference (type);
2594 1.1 mrg if (indirect)
2595 1.1 mrg {
2596 1.1 mrg type = ptr;
2597 1.1 mrg ptr = build_pointer_type (type);
2598 1.1 mrg }
2599 1.1 mrg valist_type = TREE_TYPE (valist);
2600 1.1 mrg
2601 1.1 mrg /* Args grow down. Not handled by generic routines. */
2602 1.1 mrg
2603 1.1 mrg u = fold_convert (sizetype, size_in_bytes (type));
2604 1.1 mrg u = fold_build1 (NEGATE_EXPR, sizetype, u);
2605 1.1 mrg t = fold_build_pointer_plus (valist, u);
2606 1.1 mrg
2607 1.1 mrg /* Align to 8 byte boundary. */
2608 1.1 mrg
2609 1.1 mrg u = build_int_cst (TREE_TYPE (t), -8);
2610 1.1 mrg t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
2611 1.1 mrg t = fold_convert (valist_type, t);
2612 1.1 mrg
2613 1.1 mrg t = build2 (MODIFY_EXPR, valist_type, valist, t);
2614 1.1 mrg
2615 1.1 mrg t = fold_convert (ptr, t);
2616 1.1 mrg t = build_va_arg_indirect_ref (t);
2617 1.1 mrg
2618 1.1 mrg if (indirect)
2619 1.1 mrg t = build_va_arg_indirect_ref (t);
2620 1.1 mrg
2621 1.1 mrg return t;
2622 1.1 mrg }
2623 1.1 mrg
2624 1.1 mrg /* Return 1 if TRAIT NAME is present in the OpenMP context's
2625 1.1 mrg device trait set, return 0 if not present in any OpenMP context in the
2626 1.1 mrg whole translation unit, or -1 if not present in the current OpenMP context
2627 1.1 mrg but might be present in another OpenMP context in the same TU. */
2628 1.1 mrg
2629 1.1 mrg int
2630 1.1 mrg gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
2631 1.1 mrg const char *name)
2632 1.1 mrg {
2633 1.1 mrg switch (trait)
2634 1.1 mrg {
2635 1.1 mrg case omp_device_kind:
2636 1.1 mrg return strcmp (name, "gpu") == 0;
2637 1.1 mrg case omp_device_arch:
2638 1.1 mrg return strcmp (name, "gcn") == 0;
2639 1.1 mrg case omp_device_isa:
2640 1.1 mrg if (strcmp (name, "fiji") == 0)
2641 1.1 mrg return gcn_arch == PROCESSOR_FIJI;
2642 1.1 mrg if (strcmp (name, "gfx900") == 0)
2643 1.1 mrg return gcn_arch == PROCESSOR_VEGA10;
2644 1.1 mrg if (strcmp (name, "gfx906") == 0)
2645 1.1 mrg return gcn_arch == PROCESSOR_VEGA20;
2646 1.1 mrg if (strcmp (name, "gfx908") == 0)
2647 1.1 mrg return gcn_arch == PROCESSOR_GFX908;
2648 1.1 mrg return 0;
2649 1.1 mrg default:
2650 1.1 mrg gcc_unreachable ();
2651 1.1 mrg }
2652 1.1 mrg }
2653 1.1 mrg
2654 1.1 mrg /* Calculate stack offsets needed to create prologues and epilogues. */
2655 1.1 mrg
2656 1.1 mrg static struct machine_function *
2657 1.1 mrg gcn_compute_frame_offsets (void)
2658 1.1 mrg {
2659 1.1 mrg machine_function *offsets = cfun->machine;
2660 1.1 mrg
2661 1.1 mrg if (reload_completed)
2662 1.1 mrg return offsets;
2663 1.1 mrg
2664 1.1 mrg offsets->need_frame_pointer = frame_pointer_needed;
2665 1.1 mrg
2666 1.1 mrg offsets->outgoing_args_size = crtl->outgoing_args_size;
2667 1.1 mrg offsets->pretend_size = crtl->args.pretend_args_size;
2668 1.1 mrg
2669 1.1 mrg offsets->local_vars = get_frame_size ();
2670 1.1 mrg
2671 1.1 mrg offsets->lr_needs_saving = (!leaf_function_p ()
2672 1.1 mrg || df_regs_ever_live_p (LR_REGNUM)
2673 1.1 mrg || df_regs_ever_live_p (LR_REGNUM + 1));
2674 1.1 mrg
2675 1.1 mrg offsets->callee_saves = offsets->lr_needs_saving ? 8 : 0;
2676 1.1 mrg
2677 1.1 mrg for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2678 1.1 mrg if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
2679 1.1 mrg || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2680 1.1 mrg && frame_pointer_needed))
2681 1.1 mrg offsets->callee_saves += (VGPR_REGNO_P (regno) ? 256 : 4);
2682 1.1 mrg
2683 1.1 mrg /* Round up to 64-bit boundary to maintain stack alignment. */
2684 1.1 mrg offsets->callee_saves = (offsets->callee_saves + 7) & ~7;
2685 1.1 mrg
2686 1.1 mrg return offsets;
2687 1.1 mrg }
2688 1.1 mrg
2689 1.1 mrg /* Insert code into the prologue or epilogue to store or load any
2690 1.1 mrg callee-save register to/from the stack.
2691 1.1 mrg
2692 1.1 mrg Helper function for gcn_expand_prologue and gcn_expand_epilogue. */
2693 1.1 mrg
2694 1.1 mrg static void
2695 1.1 mrg move_callee_saved_registers (rtx sp, machine_function *offsets,
2696 1.1 mrg bool prologue)
2697 1.1 mrg {
2698 1.1 mrg int regno, offset, saved_scalars;
2699 1.1 mrg rtx exec = gen_rtx_REG (DImode, EXEC_REG);
2700 1.1 mrg rtx vcc = gen_rtx_REG (DImode, VCC_LO_REG);
2701 1.1 mrg rtx offreg = gen_rtx_REG (SImode, SGPR_REGNO (22));
2702 1.1 mrg rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE);
2703 1.1 mrg HOST_WIDE_INT exec_set = 0;
2704 1.1 mrg int offreg_set = 0;
2705 1.1 mrg auto_vec<int> saved_sgprs;
2706 1.1 mrg
2707 1.1 mrg start_sequence ();
2708 1.1 mrg
2709 1.1 mrg /* Move scalars into two vector registers. */
2710 1.1 mrg for (regno = 0, saved_scalars = 0; regno < FIRST_VGPR_REG; regno++)
2711 1.1 mrg if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
2712 1.1 mrg || ((regno & ~1) == LINK_REGNUM && offsets->lr_needs_saving)
2713 1.1 mrg || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2714 1.1 mrg && offsets->need_frame_pointer))
2715 1.1 mrg {
2716 1.1 mrg rtx reg = gen_rtx_REG (SImode, regno);
2717 1.1 mrg rtx vreg = gen_rtx_REG (V64SImode,
2718 1.1 mrg VGPR_REGNO (6 + (saved_scalars / 64)));
2719 1.1 mrg int lane = saved_scalars % 64;
2720 1.1 mrg
2721 1.1 mrg if (prologue)
2722 1.1 mrg {
2723 1.1 mrg emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
2724 1.1 mrg saved_sgprs.safe_push (regno);
2725 1.1 mrg }
2726 1.1 mrg else
2727 1.1 mrg emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane)));
2728 1.1 mrg
2729 1.1 mrg saved_scalars++;
2730 1.1 mrg }
2731 1.1 mrg
2732 1.1 mrg rtx move_scalars = get_insns ();
2733 1.1 mrg end_sequence ();
2734 1.1 mrg start_sequence ();
2735 1.1 mrg
2736 1.1 mrg /* Ensure that all vector lanes are moved. */
2737 1.1 mrg exec_set = -1;
2738 1.1 mrg emit_move_insn (exec, GEN_INT (exec_set));
2739 1.1 mrg
2740 1.1 mrg /* Set up a vector stack pointer. */
2741 1.1 mrg rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2742 1.1 mrg rtx _0_4_8_12 = gen_rtx_REG (V64SImode, VGPR_REGNO (3));
2743 1.1 mrg emit_insn (gen_ashlv64si3_exec (_0_4_8_12, _0_1_2_3, GEN_INT (2),
2744 1.1 mrg gcn_gen_undef (V64SImode), exec));
2745 1.1 mrg rtx vsp = gen_rtx_REG (V64DImode, VGPR_REGNO (4));
2746 1.1 mrg emit_insn (gen_vec_duplicatev64di_exec (vsp, sp, gcn_gen_undef (V64DImode),
2747 1.1 mrg exec));
2748 1.1 mrg emit_insn (gen_addv64si3_vcc_exec (gcn_operand_part (V64SImode, vsp, 0),
2749 1.1 mrg gcn_operand_part (V64SImode, vsp, 0),
2750 1.1 mrg _0_4_8_12, vcc, gcn_gen_undef (V64SImode),
2751 1.1 mrg exec));
2752 1.1 mrg emit_insn (gen_addcv64si3_exec (gcn_operand_part (V64SImode, vsp, 1),
2753 1.1 mrg gcn_operand_part (V64SImode, vsp, 1),
2754 1.1 mrg const0_rtx, vcc, vcc,
2755 1.1 mrg gcn_gen_undef (V64SImode), exec));
2756 1.1 mrg
2757 1.1 mrg /* Move vectors. */
2758 1.1 mrg for (regno = FIRST_VGPR_REG, offset = 0;
2759 1.1 mrg regno < FIRST_PSEUDO_REGISTER; regno++)
2760 1.1 mrg if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
2761 1.1 mrg || (regno == VGPR_REGNO (6) && saved_scalars > 0)
2762 1.1 mrg || (regno == VGPR_REGNO (7) && saved_scalars > 63))
2763 1.1 mrg {
2764 1.1 mrg rtx reg = gen_rtx_REG (V64SImode, regno);
2765 1.1 mrg int size = 256;
2766 1.1 mrg
2767 1.1 mrg if (regno == VGPR_REGNO (6) && saved_scalars < 64)
2768 1.1 mrg size = saved_scalars * 4;
2769 1.1 mrg else if (regno == VGPR_REGNO (7) && saved_scalars < 128)
2770 1.1 mrg size = (saved_scalars - 64) * 4;
2771 1.1 mrg
2772 1.1 mrg if (size != 256 || exec_set != -1)
2773 1.1 mrg {
2774 1.1 mrg exec_set = ((unsigned HOST_WIDE_INT) 1 << (size / 4)) - 1;
2775 1.1 mrg emit_move_insn (exec, gen_int_mode (exec_set, DImode));
2776 1.1 mrg }
2777 1.1 mrg
2778 1.1 mrg if (prologue)
2779 1.1 mrg {
2780 1.1 mrg rtx insn = emit_insn (gen_scatterv64si_insn_1offset_exec
2781 1.1 mrg (vsp, const0_rtx, reg, as, const0_rtx,
2782 1.1 mrg exec));
2783 1.1 mrg
2784 1.1 mrg /* Add CFI metadata. */
2785 1.1 mrg rtx note;
2786 1.1 mrg if (regno == VGPR_REGNO (6) || regno == VGPR_REGNO (7))
2787 1.1 mrg {
2788 1.1 mrg int start = (regno == VGPR_REGNO (7) ? 64 : 0);
2789 1.1 mrg int count = MIN (saved_scalars - start, 64);
2790 1.1 mrg int add_lr = (regno == VGPR_REGNO (6)
2791 1.1 mrg && offsets->lr_needs_saving);
2792 1.1 mrg int lrdest = -1;
2793 1.1 mrg rtvec seq = rtvec_alloc (count + add_lr);
2794 1.1 mrg
2795 1.1 mrg /* Add an REG_FRAME_RELATED_EXPR entry for each scalar
2796 1.1 mrg register that was saved in this batch. */
2797 1.1 mrg for (int idx = 0; idx < count; idx++)
2798 1.1 mrg {
2799 1.1 mrg int stackaddr = offset + idx * 4;
2800 1.1 mrg rtx dest = gen_rtx_MEM (SImode,
2801 1.1 mrg gen_rtx_PLUS
2802 1.1 mrg (DImode, sp,
2803 1.1 mrg GEN_INT (stackaddr)));
2804 1.1 mrg rtx src = gen_rtx_REG (SImode, saved_sgprs[start + idx]);
2805 1.1 mrg rtx set = gen_rtx_SET (dest, src);
2806 1.1 mrg RTX_FRAME_RELATED_P (set) = 1;
2807 1.1 mrg RTVEC_ELT (seq, idx) = set;
2808 1.1 mrg
2809 1.1 mrg if (saved_sgprs[start + idx] == LINK_REGNUM)
2810 1.1 mrg lrdest = stackaddr;
2811 1.1 mrg }
2812 1.1 mrg
2813 1.1 mrg /* Add an additional expression for DWARF_LINK_REGISTER if
2814 1.1 mrg LINK_REGNUM was saved. */
2815 1.1 mrg if (lrdest != -1)
2816 1.1 mrg {
2817 1.1 mrg rtx dest = gen_rtx_MEM (DImode,
2818 1.1 mrg gen_rtx_PLUS
2819 1.1 mrg (DImode, sp,
2820 1.1 mrg GEN_INT (lrdest)));
2821 1.1 mrg rtx src = gen_rtx_REG (DImode, DWARF_LINK_REGISTER);
2822 1.1 mrg rtx set = gen_rtx_SET (dest, src);
2823 1.1 mrg RTX_FRAME_RELATED_P (set) = 1;
2824 1.1 mrg RTVEC_ELT (seq, count) = set;
2825 1.1 mrg }
2826 1.1 mrg
2827 1.1 mrg note = gen_rtx_SEQUENCE (VOIDmode, seq);
2828 1.1 mrg }
2829 1.1 mrg else
2830 1.1 mrg {
2831 1.1 mrg rtx dest = gen_rtx_MEM (V64SImode,
2832 1.1 mrg gen_rtx_PLUS (DImode, sp,
2833 1.1 mrg GEN_INT (offset)));
2834 1.1 mrg rtx src = gen_rtx_REG (V64SImode, regno);
2835 1.1 mrg note = gen_rtx_SET (dest, src);
2836 1.1 mrg }
2837 1.1 mrg RTX_FRAME_RELATED_P (insn) = 1;
2838 1.1 mrg add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
2839 1.1 mrg }
2840 1.1 mrg else
2841 1.1 mrg emit_insn (gen_gatherv64si_insn_1offset_exec
2842 1.1 mrg (reg, vsp, const0_rtx, as, const0_rtx,
2843 1.1 mrg gcn_gen_undef (V64SImode), exec));
2844 1.1 mrg
2845 1.1 mrg /* Move our VSP to the next stack entry. */
2846 1.1 mrg if (offreg_set != size)
2847 1.1 mrg {
2848 1.1 mrg offreg_set = size;
2849 1.1 mrg emit_move_insn (offreg, GEN_INT (size));
2850 1.1 mrg }
2851 1.1 mrg if (exec_set != -1)
2852 1.1 mrg {
2853 1.1 mrg exec_set = -1;
2854 1.1 mrg emit_move_insn (exec, GEN_INT (exec_set));
2855 1.1 mrg }
2856 1.1 mrg emit_insn (gen_addv64si3_vcc_dup_exec
2857 1.1 mrg (gcn_operand_part (V64SImode, vsp, 0),
2858 1.1 mrg offreg, gcn_operand_part (V64SImode, vsp, 0),
2859 1.1 mrg vcc, gcn_gen_undef (V64SImode), exec));
2860 1.1 mrg emit_insn (gen_addcv64si3_exec
2861 1.1 mrg (gcn_operand_part (V64SImode, vsp, 1),
2862 1.1 mrg gcn_operand_part (V64SImode, vsp, 1),
2863 1.1 mrg const0_rtx, vcc, vcc, gcn_gen_undef (V64SImode), exec));
2864 1.1 mrg
2865 1.1 mrg offset += size;
2866 1.1 mrg }
2867 1.1 mrg
2868 1.1 mrg rtx move_vectors = get_insns ();
2869 1.1 mrg end_sequence ();
2870 1.1 mrg
2871 1.1 mrg if (prologue)
2872 1.1 mrg {
2873 1.1 mrg emit_insn (move_scalars);
2874 1.1 mrg emit_insn (move_vectors);
2875 1.1 mrg }
2876 1.1 mrg else
2877 1.1 mrg {
2878 1.1 mrg emit_insn (move_vectors);
2879 1.1 mrg emit_insn (move_scalars);
2880 1.1 mrg }
2881 1.1 mrg }
2882 1.1 mrg
2883 1.1 mrg /* Generate prologue. Called from gen_prologue during pro_and_epilogue pass.
2884 1.1 mrg
2885 1.1 mrg For a non-kernel function, the stack layout looks like this (interim),
2886 1.1 mrg growing *upwards*:
2887 1.1 mrg
2888 1.1 mrg hi | + ...
2889 1.1 mrg |__________________| <-- current SP
2890 1.1 mrg | outgoing args |
2891 1.1 mrg |__________________|
2892 1.1 mrg | (alloca space) |
2893 1.1 mrg |__________________|
2894 1.1 mrg | local vars |
2895 1.1 mrg |__________________| <-- FP/hard FP
2896 1.1 mrg | callee-save regs |
2897 1.1 mrg |__________________| <-- soft arg pointer
2898 1.1 mrg | pretend args |
2899 1.1 mrg |__________________| <-- incoming SP
2900 1.1 mrg | incoming args |
2901 1.1 mrg lo |..................|
2902 1.1 mrg
2903 1.1 mrg This implies arguments (beyond the first N in registers) must grow
2904 1.1 mrg downwards (as, apparently, PA has them do).
2905 1.1 mrg
2906 1.1 mrg For a kernel function we have the simpler:
2907 1.1 mrg
2908 1.1 mrg hi | + ...
2909 1.1 mrg |__________________| <-- current SP
2910 1.1 mrg | outgoing args |
2911 1.1 mrg |__________________|
2912 1.1 mrg | (alloca space) |
2913 1.1 mrg |__________________|
2914 1.1 mrg | local vars |
2915 1.1 mrg lo |__________________| <-- FP/hard FP
2916 1.1 mrg
2917 1.1 mrg */
2918 1.1 mrg
2919 1.1 mrg void
2920 1.1 mrg gcn_expand_prologue ()
2921 1.1 mrg {
2922 1.1 mrg machine_function *offsets = gcn_compute_frame_offsets ();
2923 1.1 mrg
2924 1.1 mrg if (!cfun || !cfun->machine || cfun->machine->normal_function)
2925 1.1 mrg {
2926 1.1 mrg rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2927 1.1 mrg rtx sp_hi = gcn_operand_part (Pmode, sp, 1);
2928 1.1 mrg rtx sp_lo = gcn_operand_part (Pmode, sp, 0);
2929 1.1 mrg rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2930 1.1 mrg rtx fp_hi = gcn_operand_part (Pmode, fp, 1);
2931 1.1 mrg rtx fp_lo = gcn_operand_part (Pmode, fp, 0);
2932 1.1 mrg
2933 1.1 mrg start_sequence ();
2934 1.1 mrg
2935 1.1 mrg if (offsets->pretend_size > 0)
2936 1.1 mrg {
2937 1.1 mrg /* FIXME: Do the actual saving of register pretend args to the stack.
2938 1.1 mrg Register order needs consideration. */
2939 1.1 mrg }
2940 1.1 mrg
2941 1.1 mrg /* Save callee-save regs. */
2942 1.1 mrg move_callee_saved_registers (sp, offsets, true);
2943 1.1 mrg
2944 1.1 mrg HOST_WIDE_INT sp_adjust = offsets->pretend_size
2945 1.1 mrg + offsets->callee_saves
2946 1.1 mrg + offsets->local_vars + offsets->outgoing_args_size;
2947 1.1 mrg if (sp_adjust > 0)
2948 1.1 mrg {
2949 1.1 mrg /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
2950 1.1 mrg we use split add explictly, and specify the DImode add in
2951 1.1 mrg the note. */
2952 1.1 mrg rtx scc = gen_rtx_REG (BImode, SCC_REG);
2953 1.1 mrg rtx adjustment = gen_int_mode (sp_adjust, SImode);
2954 1.1 mrg rtx insn = emit_insn (gen_addsi3_scalar_carry (sp_lo, sp_lo,
2955 1.1 mrg adjustment, scc));
2956 1.1 mrg if (!offsets->need_frame_pointer)
2957 1.1 mrg {
2958 1.1 mrg RTX_FRAME_RELATED_P (insn) = 1;
2959 1.1 mrg add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2960 1.1 mrg gen_rtx_SET (sp,
2961 1.1 mrg gen_rtx_PLUS (DImode, sp,
2962 1.1 mrg adjustment)));
2963 1.1 mrg }
2964 1.1 mrg emit_insn (gen_addcsi3_scalar_zero (sp_hi, sp_hi, scc));
2965 1.1 mrg }
2966 1.1 mrg
2967 1.1 mrg if (offsets->need_frame_pointer)
2968 1.1 mrg {
2969 1.1 mrg /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
2970 1.1 mrg we use split add explictly, and specify the DImode add in
2971 1.1 mrg the note. */
2972 1.1 mrg rtx scc = gen_rtx_REG (BImode, SCC_REG);
2973 1.1 mrg int fp_adjust = -(offsets->local_vars + offsets->outgoing_args_size);
2974 1.1 mrg rtx adjustment = gen_int_mode (fp_adjust, SImode);
2975 1.1 mrg rtx insn = emit_insn (gen_addsi3_scalar_carry(fp_lo, sp_lo,
2976 1.1 mrg adjustment, scc));
2977 1.1 mrg emit_insn (gen_addcsi3_scalar (fp_hi, sp_hi,
2978 1.1 mrg (fp_adjust < 0 ? GEN_INT (-1)
2979 1.1 mrg : const0_rtx),
2980 1.1 mrg scc, scc));
2981 1.1 mrg
2982 1.1 mrg /* Set the CFA to the entry stack address, as an offset from the
2983 1.1 mrg frame pointer. This is preferred because the frame pointer is
2984 1.1 mrg saved in each frame, whereas the stack pointer is not. */
2985 1.1 mrg RTX_FRAME_RELATED_P (insn) = 1;
2986 1.1 mrg add_reg_note (insn, REG_CFA_DEF_CFA,
2987 1.1 mrg gen_rtx_PLUS (DImode, fp,
2988 1.1 mrg GEN_INT (-(offsets->pretend_size
2989 1.1 mrg + offsets->callee_saves))));
2990 1.1 mrg }
2991 1.1 mrg
2992 1.1 mrg rtx_insn *seq = get_insns ();
2993 1.1 mrg end_sequence ();
2994 1.1 mrg
2995 1.1 mrg emit_insn (seq);
2996 1.1 mrg }
2997 1.1 mrg else
2998 1.1 mrg {
2999 1.1 mrg rtx wave_offset = gen_rtx_REG (SImode,
3000 1.1 mrg cfun->machine->args.
3001 1.1 mrg reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);
3002 1.1 mrg
3003 1.1 mrg if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
3004 1.1 mrg {
3005 1.1 mrg rtx fs_init_lo =
3006 1.1 mrg gen_rtx_REG (SImode,
3007 1.1 mrg cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]);
3008 1.1 mrg rtx fs_init_hi =
3009 1.1 mrg gen_rtx_REG (SImode,
3010 1.1 mrg cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] + 1);
3011 1.1 mrg rtx fs_reg_lo = gen_rtx_REG (SImode, FLAT_SCRATCH_REG);
3012 1.1 mrg rtx fs_reg_hi = gen_rtx_REG (SImode, FLAT_SCRATCH_REG + 1);
3013 1.1 mrg
3014 1.1 mrg /*rtx queue = gen_rtx_REG(DImode,
3015 1.1 mrg cfun->machine->args.reg[QUEUE_PTR_ARG]);
3016 1.1 mrg rtx aperture = gen_rtx_MEM (SImode,
3017 1.1 mrg gen_rtx_PLUS (DImode, queue,
3018 1.1 mrg gen_int_mode (68, SImode)));
3019 1.1 mrg set_mem_addr_space (aperture, ADDR_SPACE_SCALAR_FLAT);*/
3020 1.1 mrg
3021 1.1 mrg /* Set up flat_scratch. */
3022 1.1 mrg emit_insn (gen_addsi3_scc (fs_reg_hi, fs_init_lo, wave_offset));
3023 1.1 mrg emit_insn (gen_lshrsi3_scc (fs_reg_hi, fs_reg_hi,
3024 1.1 mrg gen_int_mode (8, SImode)));
3025 1.1 mrg emit_move_insn (fs_reg_lo, fs_init_hi);
3026 1.1 mrg }
3027 1.1 mrg
3028 1.1 mrg /* Set up frame pointer and stack pointer. */
3029 1.1 mrg rtx sp = gen_rtx_REG (DImode, STACK_POINTER_REGNUM);
3030 1.1 mrg rtx sp_hi = simplify_gen_subreg (SImode, sp, DImode, 4);
3031 1.1 mrg rtx sp_lo = simplify_gen_subreg (SImode, sp, DImode, 0);
3032 1.1 mrg rtx fp = gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM);
3033 1.1 mrg rtx fp_hi = simplify_gen_subreg (SImode, fp, DImode, 4);
3034 1.1 mrg rtx fp_lo = simplify_gen_subreg (SImode, fp, DImode, 0);
3035 1.1 mrg
3036 1.1 mrg HOST_WIDE_INT sp_adjust = (offsets->local_vars
3037 1.1 mrg + offsets->outgoing_args_size);
3038 1.1 mrg
3039 1.1 mrg /* Initialise FP and SP from the buffer descriptor in s[0:3]. */
3040 1.1 mrg emit_move_insn (fp_lo, gen_rtx_REG (SImode, 0));
3041 1.1 mrg emit_insn (gen_andsi3_scc (fp_hi, gen_rtx_REG (SImode, 1),
3042 1.1 mrg gen_int_mode (0xffff, SImode)));
3043 1.1 mrg rtx scc = gen_rtx_REG (BImode, SCC_REG);
3044 1.1 mrg emit_insn (gen_addsi3_scalar_carry (fp_lo, fp_lo, wave_offset, scc));
3045 1.1 mrg emit_insn (gen_addcsi3_scalar_zero (fp_hi, fp_hi, scc));
3046 1.1 mrg
3047 1.1 mrg /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so we use
3048 1.1 mrg split add explictly, and specify the DImode add in the note.
3049 1.1 mrg The DWARF info expects that the callee-save data is in the frame,
3050 1.1 mrg even though it isn't (because this is the entry point), so we
3051 1.1 mrg make a notional adjustment to the DWARF frame offset here. */
3052 1.1 mrg rtx dbg_adjustment = gen_int_mode (sp_adjust + offsets->callee_saves,
3053 1.1 mrg DImode);
3054 1.1 mrg rtx insn;
3055 1.1 mrg if (sp_adjust > 0)
3056 1.1 mrg {
3057 1.1 mrg rtx scc = gen_rtx_REG (BImode, SCC_REG);
3058 1.1 mrg rtx adjustment = gen_int_mode (sp_adjust, DImode);
3059 1.1 mrg insn = emit_insn (gen_addsi3_scalar_carry(sp_lo, fp_lo, adjustment,
3060 1.1 mrg scc));
3061 1.1 mrg emit_insn (gen_addcsi3_scalar_zero (sp_hi, fp_hi, scc));
3062 1.1 mrg }
3063 1.1 mrg else
3064 1.1 mrg insn = emit_move_insn (sp, fp);
3065 1.1 mrg RTX_FRAME_RELATED_P (insn) = 1;
3066 1.1 mrg add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3067 1.1 mrg gen_rtx_SET (sp, gen_rtx_PLUS (DImode, sp,
3068 1.1 mrg dbg_adjustment)));
3069 1.1 mrg
3070 1.1 mrg if (offsets->need_frame_pointer)
3071 1.1 mrg {
3072 1.1 mrg /* Set the CFA to the entry stack address, as an offset from the
3073 1.1 mrg frame pointer. This is necessary when alloca is used, and
3074 1.1 mrg harmless otherwise. */
3075 1.1 mrg rtx neg_adjust = gen_int_mode (-offsets->callee_saves, DImode);
3076 1.1 mrg add_reg_note (insn, REG_CFA_DEF_CFA,
3077 1.1 mrg gen_rtx_PLUS (DImode, fp, neg_adjust));
3078 1.1 mrg }
3079 1.1 mrg
3080 1.1 mrg /* Make sure the flat scratch reg doesn't get optimised away. */
3081 1.1 mrg emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
3082 1.1 mrg }
3083 1.1 mrg
3084 1.1 mrg /* Ensure that the scheduler doesn't do anything unexpected. */
3085 1.1 mrg emit_insn (gen_blockage ());
3086 1.1 mrg
3087 1.1 mrg /* m0 is initialized for the usual LDS DS and FLAT memory case.
3088 1.1 mrg The low-part is the address of the topmost addressable byte, which is
3089 1.1 mrg size-1. The high-part is an offset and should be zero. */
3090 1.1 mrg emit_move_insn (gen_rtx_REG (SImode, M0_REG),
3091 1.1 mrg gen_int_mode (LDS_SIZE, SImode));
3092 1.1 mrg
3093 1.1 mrg emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
3094 1.1 mrg
3095 1.1 mrg if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
3096 1.1 mrg {
3097 1.1 mrg /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel. */
3098 1.1 mrg rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
3099 1.1 mrg emit_move_insn (fn_reg, gen_rtx_SYMBOL_REF (Pmode,
3100 1.1 mrg "gomp_gcn_enter_kernel"));
3101 1.1 mrg emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
3102 1.1 mrg }
3103 1.1 mrg }
3104 1.1 mrg
3105 1.1 mrg /* Generate epilogue. Called from gen_epilogue during pro_and_epilogue pass.
3106 1.1 mrg
3107 1.1 mrg See gcn_expand_prologue for stack details. */
3108 1.1 mrg
3109 1.1 mrg void
3110 1.1 mrg gcn_expand_epilogue (void)
3111 1.1 mrg {
3112 1.1 mrg /* Ensure that the scheduler doesn't do anything unexpected. */
3113 1.1 mrg emit_insn (gen_blockage ());
3114 1.1 mrg
3115 1.1 mrg if (!cfun || !cfun->machine || cfun->machine->normal_function)
3116 1.1 mrg {
3117 1.1 mrg machine_function *offsets = gcn_compute_frame_offsets ();
3118 1.1 mrg rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
3119 1.1 mrg rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
3120 1.1 mrg
3121 1.1 mrg HOST_WIDE_INT sp_adjust = offsets->callee_saves + offsets->pretend_size;
3122 1.1 mrg
3123 1.1 mrg if (offsets->need_frame_pointer)
3124 1.1 mrg {
3125 1.1 mrg /* Restore old SP from the frame pointer. */
3126 1.1 mrg if (sp_adjust > 0)
3127 1.1 mrg emit_insn (gen_subdi3 (sp, fp, gen_int_mode (sp_adjust, DImode)));
3128 1.1 mrg else
3129 1.1 mrg emit_move_insn (sp, fp);
3130 1.1 mrg }
3131 1.1 mrg else
3132 1.1 mrg {
3133 1.1 mrg /* Restore old SP from current SP. */
3134 1.1 mrg sp_adjust += offsets->outgoing_args_size + offsets->local_vars;
3135 1.1 mrg
3136 1.1 mrg if (sp_adjust > 0)
3137 1.1 mrg emit_insn (gen_subdi3 (sp, sp, gen_int_mode (sp_adjust, DImode)));
3138 1.1 mrg }
3139 1.1 mrg
3140 1.1 mrg move_callee_saved_registers (sp, offsets, false);
3141 1.1 mrg
3142 1.1 mrg /* There's no explicit use of the link register on the return insn. Emit
3143 1.1 mrg one here instead. */
3144 1.1 mrg if (offsets->lr_needs_saving)
3145 1.1 mrg emit_use (gen_rtx_REG (DImode, LINK_REGNUM));
3146 1.1 mrg
3147 1.1 mrg /* Similar for frame pointer. */
3148 1.1 mrg if (offsets->need_frame_pointer)
3149 1.1 mrg emit_use (gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM));
3150 1.1 mrg }
3151 1.1 mrg else if (flag_openmp)
3152 1.1 mrg {
3153 1.1 mrg /* OpenMP kernels have an implicit call to gomp_gcn_exit_kernel. */
3154 1.1 mrg rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
3155 1.1 mrg emit_move_insn (fn_reg,
3156 1.1 mrg gen_rtx_SYMBOL_REF (Pmode, "gomp_gcn_exit_kernel"));
3157 1.1 mrg emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
3158 1.1 mrg }
3159 1.1 mrg else if (TREE_CODE (TREE_TYPE (DECL_RESULT (cfun->decl))) != VOID_TYPE)
3160 1.1 mrg {
3161 1.1 mrg /* Assume that an exit value compatible with gcn-run is expected.
3162 1.1 mrg That is, the third input parameter is an int*.
3163 1.1 mrg
3164 1.1 mrg We can't allocate any new registers, but the kernarg_reg is
3165 1.1 mrg dead after this, so we'll use that. */
3166 1.1 mrg rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
3167 1.1 mrg [KERNARG_SEGMENT_PTR_ARG]);
3168 1.1 mrg rtx retptr_mem = gen_rtx_MEM (DImode,
3169 1.1 mrg gen_rtx_PLUS (DImode, kernarg_reg,
3170 1.1 mrg GEN_INT (16)));
3171 1.1 mrg set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
3172 1.1 mrg emit_move_insn (kernarg_reg, retptr_mem);
3173 1.1 mrg
3174 1.1 mrg rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
3175 1.1 mrg set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
3176 1.1 mrg emit_move_insn (retval_mem,
3177 1.1 mrg gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
3178 1.1 mrg }
3179 1.1 mrg
3180 1.1 mrg emit_jump_insn (gen_gcn_return ());
3181 1.1 mrg }
3182 1.1 mrg
3183 1.1 mrg /* Implement TARGET_FRAME_POINTER_REQUIRED.
3184 1.1 mrg
3185 1.1 mrg Return true if the frame pointer should not be eliminated. */
3186 1.1 mrg
3187 1.1 mrg bool
3188 1.1 mrg gcn_frame_pointer_rqd (void)
3189 1.1 mrg {
3190 1.1 mrg /* GDB needs the frame pointer in order to unwind properly,
3191 1.1 mrg but that's not important for the entry point, unless alloca is used.
3192 1.1 mrg It's not important for code execution, so we should repect the
3193 1.1 mrg -fomit-frame-pointer flag. */
3194 1.1 mrg return (!flag_omit_frame_pointer
3195 1.1 mrg && cfun
3196 1.1 mrg && (cfun->calls_alloca
3197 1.1 mrg || (cfun->machine && cfun->machine->normal_function)));
3198 1.1 mrg }
3199 1.1 mrg
3200 1.1 mrg /* Implement TARGET_CAN_ELIMINATE.
3201 1.1 mrg
3202 1.1 mrg Return true if the compiler is allowed to try to replace register number
3203 1.1 mrg FROM_REG with register number TO_REG.
3204 1.1 mrg
3205 1.1 mrg FIXME: is the default "true" not enough? Should this be a negative set? */
3206 1.1 mrg
3207 1.1 mrg bool
3208 1.1 mrg gcn_can_eliminate_p (int /*from_reg */ , int to_reg)
3209 1.1 mrg {
3210 1.1 mrg return (to_reg == HARD_FRAME_POINTER_REGNUM
3211 1.1 mrg || to_reg == STACK_POINTER_REGNUM);
3212 1.1 mrg }
3213 1.1 mrg
3214 1.1 mrg /* Implement INITIAL_ELIMINATION_OFFSET.
3215 1.1 mrg
3216 1.1 mrg Returns the initial difference between the specified pair of registers, in
3217 1.1 mrg terms of stack position. */
3218 1.1 mrg
3219 1.1 mrg HOST_WIDE_INT
3220 1.1 mrg gcn_initial_elimination_offset (int from, int to)
3221 1.1 mrg {
3222 1.1 mrg machine_function *offsets = gcn_compute_frame_offsets ();
3223 1.1 mrg
3224 1.1 mrg switch (from)
3225 1.1 mrg {
3226 1.1 mrg case ARG_POINTER_REGNUM:
3227 1.1 mrg if (to == STACK_POINTER_REGNUM)
3228 1.1 mrg return -(offsets->callee_saves + offsets->local_vars
3229 1.1 mrg + offsets->outgoing_args_size);
3230 1.1 mrg else if (to == FRAME_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM)
3231 1.1 mrg return -offsets->callee_saves;
3232 1.1 mrg else
3233 1.1 mrg gcc_unreachable ();
3234 1.1 mrg break;
3235 1.1 mrg
3236 1.1 mrg case FRAME_POINTER_REGNUM:
3237 1.1 mrg if (to == STACK_POINTER_REGNUM)
3238 1.1 mrg return -(offsets->local_vars + offsets->outgoing_args_size);
3239 1.1 mrg else if (to == HARD_FRAME_POINTER_REGNUM)
3240 1.1 mrg return 0;
3241 1.1 mrg else
3242 1.1 mrg gcc_unreachable ();
3243 1.1 mrg break;
3244 1.1 mrg
3245 1.1 mrg default:
3246 1.1 mrg gcc_unreachable ();
3247 1.1 mrg }
3248 1.1 mrg }
3249 1.1 mrg
3250 1.1 mrg /* Implement HARD_REGNO_RENAME_OK.
3251 1.1 mrg
3252 1.1 mrg Return true if it is permissible to rename a hard register from
3253 1.1 mrg FROM_REG to TO_REG. */
3254 1.1 mrg
3255 1.1 mrg bool
3256 1.1 mrg gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg)
3257 1.1 mrg {
3258 1.1 mrg if (from_reg == SCC_REG
3259 1.1 mrg || from_reg == VCC_LO_REG || from_reg == VCC_HI_REG
3260 1.1 mrg || from_reg == EXEC_LO_REG || from_reg == EXEC_HI_REG
3261 1.1 mrg || to_reg == SCC_REG
3262 1.1 mrg || to_reg == VCC_LO_REG || to_reg == VCC_HI_REG
3263 1.1 mrg || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG)
3264 1.1 mrg return false;
3265 1.1 mrg
3266 1.1 mrg /* Allow the link register to be used if it was saved. */
3267 1.1 mrg if ((to_reg & ~1) == LINK_REGNUM)
3268 1.1 mrg return !cfun || cfun->machine->lr_needs_saving;
3269 1.1 mrg
3270 1.1 mrg /* Allow the registers used for the static chain to be used if the chain is
3271 1.1 mrg not in active use. */
3272 1.1 mrg if ((to_reg & ~1) == STATIC_CHAIN_REGNUM)
3273 1.1 mrg return !cfun
3274 1.1 mrg || !(cfun->static_chain_decl
3275 1.1 mrg && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
3276 1.1 mrg && df_regs_ever_live_p (STATIC_CHAIN_REGNUM + 1));
3277 1.1 mrg
3278 1.1 mrg return true;
3279 1.1 mrg }
3280 1.1 mrg
3281 1.1 mrg /* Implement HARD_REGNO_CALLER_SAVE_MODE.
3282 1.1 mrg
3283 1.1 mrg Which mode is required for saving NREGS of a pseudo-register in
3284 1.1 mrg call-clobbered hard register REGNO. */
3285 1.1 mrg
3286 1.1 mrg machine_mode
3287 1.1 mrg gcn_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
3288 1.1 mrg machine_mode regmode)
3289 1.1 mrg {
3290 1.1 mrg machine_mode result = choose_hard_reg_mode (regno, nregs, NULL);
3291 1.1 mrg
3292 1.1 mrg if (VECTOR_MODE_P (result) && !VECTOR_MODE_P (regmode))
3293 1.1 mrg result = (nregs == 1 ? SImode : DImode);
3294 1.1 mrg
3295 1.1 mrg return result;
3296 1.1 mrg }
3297 1.1 mrg
3298 1.1 mrg /* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
3299 1.1 mrg
3300 1.1 mrg Output assembler code for a block containing the constant parts
3301 1.1 mrg of a trampoline, leaving space for the variable parts. */
3302 1.1 mrg
3303 1.1 mrg static void
3304 1.1 mrg gcn_asm_trampoline_template (FILE *f)
3305 1.1 mrg {
3306 1.1 mrg /* The source operand of the move instructions must be a 32-bit
3307 1.1 mrg constant following the opcode. */
3308 1.1 mrg asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM);
3309 1.1 mrg asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM + 1);
3310 1.1 mrg asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG);
3311 1.1 mrg asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG + 1);
3312 1.1 mrg asm_fprintf (f, "\ts_setpc_b64\ts[%i:%i]\n", CC_SAVE_REG, CC_SAVE_REG + 1);
3313 1.1 mrg }
3314 1.1 mrg
3315 1.1 mrg /* Implement TARGET_TRAMPOLINE_INIT.
3316 1.1 mrg
3317 1.1 mrg Emit RTL insns to initialize the variable parts of a trampoline.
3318 1.1 mrg FNDECL is the decl of the target address, M_TRAMP is a MEM for
3319 1.1 mrg the trampoline, and CHAIN_VALUE is an RTX for the static chain
3320 1.1 mrg to be passed to the target function. */
3321 1.1 mrg
3322 1.1 mrg static void
3323 1.1 mrg gcn_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3324 1.1 mrg {
3325 1.1 mrg if (TARGET_GCN5_PLUS)
3326 1.1 mrg sorry ("nested function trampolines not supported on GCN5 due to"
3327 1.1 mrg " non-executable stacks");
3328 1.1 mrg
3329 1.1 mrg emit_block_move (m_tramp, assemble_trampoline_template (),
3330 1.1 mrg GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3331 1.1 mrg
3332 1.1 mrg rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3333 1.1 mrg rtx chain_value_reg = copy_to_reg (chain_value);
3334 1.1 mrg rtx fnaddr_reg = copy_to_reg (fnaddr);
3335 1.1 mrg
3336 1.1 mrg for (int i = 0; i < 4; i++)
3337 1.1 mrg {
3338 1.1 mrg rtx mem = adjust_address (m_tramp, SImode, i * 8 + 4);
3339 1.1 mrg rtx reg = i < 2 ? chain_value_reg : fnaddr_reg;
3340 1.1 mrg emit_move_insn (mem, gen_rtx_SUBREG (SImode, reg, (i % 2) * 4));
3341 1.1 mrg }
3342 1.1 mrg
3343 1.1 mrg rtx tramp_addr = XEXP (m_tramp, 0);
3344 1.1 mrg emit_insn (gen_clear_icache (tramp_addr,
3345 1.1 mrg plus_constant (ptr_mode, tramp_addr,
3346 1.1 mrg TRAMPOLINE_SIZE)));
3347 1.1 mrg }
3348 1.1 mrg
3349 1.1 mrg /* }}} */
3350 1.1 mrg /* {{{ Miscellaneous. */
3351 1.1 mrg
3352 1.1 mrg /* Implement TARGET_CANNOT_COPY_INSN_P.
3353 1.1 mrg
3354 1.1 mrg Return true if INSN must not be duplicated. */
3355 1.1 mrg
3356 1.1 mrg static bool
3357 1.1 mrg gcn_cannot_copy_insn_p (rtx_insn *insn)
3358 1.1 mrg {
3359 1.1 mrg if (recog_memoized (insn) == CODE_FOR_gcn_wavefront_barrier)
3360 1.1 mrg return true;
3361 1.1 mrg
3362 1.1 mrg return false;
3363 1.1 mrg }
3364 1.1 mrg
3365 1.1 mrg /* Implement TARGET_DEBUG_UNWIND_INFO.
3366 1.1 mrg
3367 1.1 mrg Defines the mechanism that will be used for describing frame unwind
3368 1.1 mrg information to the debugger. */
3369 1.1 mrg
3370 1.1 mrg static enum unwind_info_type
3371 1.1 mrg gcn_debug_unwind_info ()
3372 1.1 mrg {
3373 1.1 mrg return UI_DWARF2;
3374 1.1 mrg }
3375 1.1 mrg
3376 1.1 mrg /* Determine if there is a suitable hardware conversion instruction.
3377 1.1 mrg Used primarily by the machine description. */
3378 1.1 mrg
3379 1.1 mrg bool
3380 1.1 mrg gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op)
3381 1.1 mrg {
3382 1.1 mrg if (VECTOR_MODE_P (from) != VECTOR_MODE_P (to))
3383 1.1 mrg return false;
3384 1.1 mrg
3385 1.1 mrg if (VECTOR_MODE_P (from))
3386 1.1 mrg {
3387 1.1 mrg from = GET_MODE_INNER (from);
3388 1.1 mrg to = GET_MODE_INNER (to);
3389 1.1 mrg }
3390 1.1 mrg
3391 1.1 mrg switch (op)
3392 1.1 mrg {
3393 1.1 mrg case fix_trunc_cvt:
3394 1.1 mrg case fixuns_trunc_cvt:
3395 1.1 mrg if (GET_MODE_CLASS (from) != MODE_FLOAT
3396 1.1 mrg || GET_MODE_CLASS (to) != MODE_INT)
3397 1.1 mrg return false;
3398 1.1 mrg break;
3399 1.1 mrg case float_cvt:
3400 1.1 mrg case floatuns_cvt:
3401 1.1 mrg if (GET_MODE_CLASS (from) != MODE_INT
3402 1.1 mrg || GET_MODE_CLASS (to) != MODE_FLOAT)
3403 1.1 mrg return false;
3404 1.1 mrg break;
3405 1.1 mrg case extend_cvt:
3406 1.1 mrg if (GET_MODE_CLASS (from) != MODE_FLOAT
3407 1.1 mrg || GET_MODE_CLASS (to) != MODE_FLOAT
3408 1.1 mrg || GET_MODE_SIZE (from) >= GET_MODE_SIZE (to))
3409 1.1 mrg return false;
3410 1.1 mrg break;
3411 1.1 mrg case trunc_cvt:
3412 1.1 mrg if (GET_MODE_CLASS (from) != MODE_FLOAT
3413 1.1 mrg || GET_MODE_CLASS (to) != MODE_FLOAT
3414 1.1 mrg || GET_MODE_SIZE (from) <= GET_MODE_SIZE (to))
3415 1.1 mrg return false;
3416 1.1 mrg break;
3417 1.1 mrg }
3418 1.1 mrg
3419 1.1 mrg return ((to == HImode && from == HFmode)
3420 1.1 mrg || (to == SImode && (from == SFmode || from == DFmode))
3421 1.1 mrg || (to == HFmode && (from == HImode || from == SFmode))
3422 1.1 mrg || (to == SFmode && (from == SImode || from == HFmode
3423 1.1 mrg || from == DFmode))
3424 1.1 mrg || (to == DFmode && (from == SImode || from == SFmode)));
3425 1.1 mrg }
3426 1.1 mrg
3427 1.1 mrg /* Implement TARGET_EMUTLS_VAR_INIT.
3428 1.1 mrg
3429 1.1 mrg Disable emutls (gthr-gcn.h does not support it, yet). */
3430 1.1 mrg
3431 1.1 mrg tree
3432 1.1 mrg gcn_emutls_var_init (tree, tree decl, tree)
3433 1.1 mrg {
3434 1.1 mrg sorry_at (DECL_SOURCE_LOCATION (decl), "TLS is not implemented for GCN.");
3435 1.1 mrg return NULL_TREE;
3436 1.1 mrg }
3437 1.1 mrg
3438 1.1 mrg /* }}} */
3439 1.1 mrg /* {{{ Costs. */
3440 1.1 mrg
3441 1.1 mrg /* Implement TARGET_RTX_COSTS.
3442 1.1 mrg
3443 1.1 mrg Compute a (partial) cost for rtx X. Return true if the complete
3444 1.1 mrg cost has been computed, and false if subexpressions should be
3445 1.1 mrg scanned. In either case, *TOTAL contains the cost result. */
3446 1.1 mrg
3447 1.1 mrg static bool
3448 1.1 mrg gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool)
3449 1.1 mrg {
3450 1.1 mrg enum rtx_code code = GET_CODE (x);
3451 1.1 mrg switch (code)
3452 1.1 mrg {
3453 1.1 mrg case CONST:
3454 1.1 mrg case CONST_DOUBLE:
3455 1.1 mrg case CONST_VECTOR:
3456 1.1 mrg case CONST_INT:
3457 1.1 mrg if (gcn_inline_constant_p (x))
3458 1.1 mrg *total = 0;
3459 1.1 mrg else if (code == CONST_INT
3460 1.1 mrg && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000)
3461 1.1 mrg *total = 1;
3462 1.1 mrg else if (gcn_constant_p (x))
3463 1.1 mrg *total = 2;
3464 1.1 mrg else
3465 1.1 mrg *total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4;
3466 1.1 mrg return true;
3467 1.1 mrg
3468 1.1 mrg case DIV:
3469 1.1 mrg *total = 100;
3470 1.1 mrg return false;
3471 1.1 mrg
3472 1.1 mrg default:
3473 1.1 mrg *total = 3;
3474 1.1 mrg return false;
3475 1.1 mrg }
3476 1.1 mrg }
3477 1.1 mrg
3478 1.1 mrg /* Implement TARGET_MEMORY_MOVE_COST.
3479 1.1 mrg
3480 1.1 mrg Return the cost of moving data of mode M between a
3481 1.1 mrg register and memory. A value of 2 is the default; this cost is
3482 1.1 mrg relative to those in `REGISTER_MOVE_COST'.
3483 1.1 mrg
3484 1.1 mrg This function is used extensively by register_move_cost that is used to
3485 1.1 mrg build tables at startup. Make it inline in this case.
3486 1.1 mrg When IN is 2, return maximum of in and out move cost.
3487 1.1 mrg
3488 1.1 mrg If moving between registers and memory is more expensive than
3489 1.1 mrg between two registers, you should define this macro to express the
3490 1.1 mrg relative cost.
3491 1.1 mrg
3492 1.1 mrg Model also increased moving costs of QImode registers in non
3493 1.1 mrg Q_REGS classes. */
3494 1.1 mrg
3495 1.1 mrg #define LOAD_COST 32
3496 1.1 mrg #define STORE_COST 32
3497 1.1 mrg static int
3498 1.1 mrg gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
3499 1.1 mrg {
3500 1.1 mrg int nregs = CEIL (GET_MODE_SIZE (mode), 4);
3501 1.1 mrg switch (regclass)
3502 1.1 mrg {
3503 1.1 mrg case SCC_CONDITIONAL_REG:
3504 1.1 mrg case VCCZ_CONDITIONAL_REG:
3505 1.1 mrg case VCC_CONDITIONAL_REG:
3506 1.1 mrg case EXECZ_CONDITIONAL_REG:
3507 1.1 mrg case ALL_CONDITIONAL_REGS:
3508 1.1 mrg case SGPR_REGS:
3509 1.1 mrg case SGPR_EXEC_REGS:
3510 1.1 mrg case EXEC_MASK_REG:
3511 1.1 mrg case SGPR_VOP_SRC_REGS:
3512 1.1 mrg case SGPR_MEM_SRC_REGS:
3513 1.1 mrg case SGPR_SRC_REGS:
3514 1.1 mrg case SGPR_DST_REGS:
3515 1.1 mrg case GENERAL_REGS:
3516 1.1 mrg case AFP_REGS:
3517 1.1 mrg if (!in)
3518 1.1 mrg return (STORE_COST + 2) * nregs;
3519 1.1 mrg return LOAD_COST * nregs;
3520 1.1 mrg case VGPR_REGS:
3521 1.1 mrg if (in)
3522 1.1 mrg return (LOAD_COST + 2) * nregs;
3523 1.1 mrg return STORE_COST * nregs;
3524 1.1 mrg case ALL_REGS:
3525 1.1 mrg case ALL_GPR_REGS:
3526 1.1 mrg case SRCDST_REGS:
3527 1.1 mrg if (in)
3528 1.1 mrg return (LOAD_COST + 2) * nregs;
3529 1.1 mrg return (STORE_COST + 2) * nregs;
3530 1.1 mrg default:
3531 1.1 mrg gcc_unreachable ();
3532 1.1 mrg }
3533 1.1 mrg }
3534 1.1 mrg
3535 1.1 mrg /* Implement TARGET_REGISTER_MOVE_COST.
3536 1.1 mrg
3537 1.1 mrg Return the cost of moving data from a register in class CLASS1 to
3538 1.1 mrg one in class CLASS2. Base value is 2. */
3539 1.1 mrg
3540 1.1 mrg static int
3541 1.1 mrg gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
3542 1.1 mrg {
3543 1.1 mrg /* Increase cost of moving from and to vector registers. While this is
3544 1.1 mrg fast in hardware (I think), it has hidden cost of setting up the exec
3545 1.1 mrg flags. */
3546 1.1 mrg if ((src < VGPR_REGS) != (dst < VGPR_REGS))
3547 1.1 mrg return 4;
3548 1.1 mrg return 2;
3549 1.1 mrg }
3550 1.1 mrg
3551 1.1 mrg /* }}} */
3552 1.1 mrg /* {{{ Builtins. */
3553 1.1 mrg
3554 1.1 mrg /* Type codes used by GCN built-in definitions. */
3555 1.1 mrg
3556 1.1 mrg enum gcn_builtin_type_index
3557 1.1 mrg {
3558 1.1 mrg GCN_BTI_END_OF_PARAMS,
3559 1.1 mrg
3560 1.1 mrg GCN_BTI_VOID,
3561 1.1 mrg GCN_BTI_BOOL,
3562 1.1 mrg GCN_BTI_INT,
3563 1.1 mrg GCN_BTI_UINT,
3564 1.1 mrg GCN_BTI_SIZE_T,
3565 1.1 mrg GCN_BTI_LLINT,
3566 1.1 mrg GCN_BTI_LLUINT,
3567 1.1 mrg GCN_BTI_EXEC,
3568 1.1 mrg
3569 1.1 mrg GCN_BTI_SF,
3570 1.1 mrg GCN_BTI_V64SI,
3571 1.1 mrg GCN_BTI_V64SF,
3572 1.1 mrg GCN_BTI_V64PTR,
3573 1.1 mrg GCN_BTI_SIPTR,
3574 1.1 mrg GCN_BTI_SFPTR,
3575 1.1 mrg GCN_BTI_VOIDPTR,
3576 1.1 mrg
3577 1.1 mrg GCN_BTI_LDS_VOIDPTR,
3578 1.1 mrg
3579 1.1 mrg GCN_BTI_MAX
3580 1.1 mrg };
3581 1.1 mrg
3582 1.1 mrg static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX];
3583 1.1 mrg
3584 1.1 mrg #define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC])
3585 1.1 mrg #define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
3586 1.1 mrg #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
3587 1.1 mrg #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
3588 1.1 mrg #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
3589 1.1 mrg #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
3590 1.1 mrg #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
3591 1.1 mrg #define voidptr_type_node (gcn_builtin_types[GCN_BTI_VOIDPTR])
3592 1.1 mrg #define size_t_type_node (gcn_builtin_types[GCN_BTI_SIZE_T])
3593 1.1 mrg
3594 1.1 mrg static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int,
3595 1.1 mrg struct gcn_builtin_description *);
3596 1.1 mrg static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int,
3597 1.1 mrg struct gcn_builtin_description *);
3598 1.1 mrg
3599 1.1 mrg struct gcn_builtin_description;
3600 1.1 mrg typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int,
3601 1.1 mrg struct gcn_builtin_description *);
3602 1.1 mrg
3603 1.1 mrg enum gcn_builtin_type
3604 1.1 mrg {
3605 1.1 mrg B_UNIMPLEMENTED, /* Sorry out */
3606 1.1 mrg B_INSN, /* Emit a pattern */
3607 1.1 mrg B_OVERLOAD /* Placeholder for an overloaded function */
3608 1.1 mrg };
3609 1.1 mrg
3610 1.1 mrg struct gcn_builtin_description
3611 1.1 mrg {
3612 1.1 mrg int fcode;
3613 1.1 mrg int icode;
3614 1.1 mrg const char *name;
3615 1.1 mrg enum gcn_builtin_type type;
3616 1.1 mrg /* The first element of parm is always the return type. The rest
3617 1.1 mrg are a zero terminated list of parameters. */
3618 1.1 mrg int parm[6];
3619 1.1 mrg gcn_builtin_expander expander;
3620 1.1 mrg };
3621 1.1 mrg
3622 1.1 mrg /* Read in the GCN builtins from gcn-builtins.def. */
3623 1.1 mrg
3624 1.1 mrg extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX];
3625 1.1 mrg
3626 1.1 mrg struct gcn_builtin_description gcn_builtins[] = {
3627 1.1 mrg #define DEF_BUILTIN(fcode, icode, name, type, params, expander) \
3628 1.1 mrg {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander},
3629 1.1 mrg
3630 1.1 mrg #define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \
3631 1.1 mrg {GCN_BUILTIN_ ## fcode ## _V64SI, \
3632 1.1 mrg CODE_FOR_ ## ic ##v64si3_exec, name "_v64int", B_INSN, \
3633 1.1 mrg {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
3634 1.1 mrg GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, \
3635 1.1 mrg {GCN_BUILTIN_ ## fcode ## _V64SI_unspec, \
3636 1.1 mrg CODE_FOR_ ## ic ##v64si3_exec, name "_v64int_unspec", B_INSN, \
3637 1.1 mrg {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
3638 1.1 mrg GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},
3639 1.1 mrg
3640 1.1 mrg #include "gcn-builtins.def"
3641 1.1 mrg #undef DEF_BUILTIN_BINOP_INT_FP
3642 1.1 mrg #undef DEF_BUILTIN
3643 1.1 mrg };
3644 1.1 mrg
3645 1.1 mrg static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX];
3646 1.1 mrg
3647 1.1 mrg /* Implement TARGET_BUILTIN_DECL.
3648 1.1 mrg
3649 1.1 mrg Return the GCN builtin for CODE. */
3650 1.1 mrg
3651 1.1 mrg tree
3652 1.1 mrg gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
3653 1.1 mrg {
3654 1.1 mrg if (code >= GCN_BUILTIN_MAX)
3655 1.1 mrg return error_mark_node;
3656 1.1 mrg
3657 1.1 mrg return gcn_builtin_decls[code];
3658 1.1 mrg }
3659 1.1 mrg
3660 1.1 mrg /* Helper function for gcn_init_builtins. */
3661 1.1 mrg
3662 1.1 mrg static void
3663 1.1 mrg gcn_init_builtin_types (void)
3664 1.1 mrg {
3665 1.1 mrg gcn_builtin_types[GCN_BTI_VOID] = void_type_node;
3666 1.1 mrg gcn_builtin_types[GCN_BTI_BOOL] = boolean_type_node;
3667 1.1 mrg gcn_builtin_types[GCN_BTI_INT] = intSI_type_node;
3668 1.1 mrg gcn_builtin_types[GCN_BTI_UINT] = unsigned_type_for (intSI_type_node);
3669 1.1 mrg gcn_builtin_types[GCN_BTI_SIZE_T] = size_type_node;
3670 1.1 mrg gcn_builtin_types[GCN_BTI_LLINT] = intDI_type_node;
3671 1.1 mrg gcn_builtin_types[GCN_BTI_LLUINT] = unsigned_type_for (intDI_type_node);
3672 1.1 mrg
3673 1.1 mrg exec_type_node = unsigned_intDI_type_node;
3674 1.1 mrg sf_type_node = float32_type_node;
3675 1.1 mrg v64si_type_node = build_vector_type (intSI_type_node, 64);
3676 1.1 mrg v64sf_type_node = build_vector_type (float_type_node, 64);
3677 1.1 mrg v64ptr_type_node = build_vector_type (unsigned_intDI_type_node
3678 1.1 mrg /*build_pointer_type
3679 1.1 mrg (integer_type_node) */
3680 1.1 mrg , 64);
3681 1.1 mrg tree tmp = build_distinct_type_copy (intSI_type_node);
3682 1.1 mrg TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3683 1.1 mrg siptr_type_node = build_pointer_type (tmp);
3684 1.1 mrg
3685 1.1 mrg tmp = build_distinct_type_copy (float_type_node);
3686 1.1 mrg TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3687 1.1 mrg sfptr_type_node = build_pointer_type (tmp);
3688 1.1 mrg
3689 1.1 mrg tmp = build_distinct_type_copy (void_type_node);
3690 1.1 mrg TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3691 1.1 mrg voidptr_type_node = build_pointer_type (tmp);
3692 1.1 mrg
3693 1.1 mrg tmp = build_distinct_type_copy (void_type_node);
3694 1.1 mrg TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_LDS;
3695 1.1 mrg gcn_builtin_types[GCN_BTI_LDS_VOIDPTR] = build_pointer_type (tmp);
3696 1.1 mrg }
3697 1.1 mrg
3698 1.1 mrg /* Implement TARGET_INIT_BUILTINS.
3699 1.1 mrg
3700 1.1 mrg Set up all builtin functions for this target. */
3701 1.1 mrg
3702 1.1 mrg static void
3703 1.1 mrg gcn_init_builtins (void)
3704 1.1 mrg {
3705 1.1 mrg gcn_init_builtin_types ();
3706 1.1 mrg
3707 1.1 mrg struct gcn_builtin_description *d;
3708 1.1 mrg unsigned int i;
3709 1.1 mrg for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++)
3710 1.1 mrg {
3711 1.1 mrg tree p;
3712 1.1 mrg char name[64]; /* build_function will make a copy. */
3713 1.1 mrg int parm;
3714 1.1 mrg
3715 1.1 mrg /* FIXME: Is this necessary/useful? */
3716 1.1 mrg if (d->name == 0)
3717 1.1 mrg continue;
3718 1.1 mrg
3719 1.1 mrg /* Find last parm. */
3720 1.1 mrg for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++)
3721 1.1 mrg ;
3722 1.1 mrg
3723 1.1 mrg p = void_list_node;
3724 1.1 mrg while (parm > 1)
3725 1.1 mrg p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p);
3726 1.1 mrg
3727 1.1 mrg p = build_function_type (gcn_builtin_types[d->parm[0]], p);
3728 1.1 mrg
3729 1.1 mrg sprintf (name, "__builtin_gcn_%s", d->name);
3730 1.1 mrg gcn_builtin_decls[i]
3731 1.1 mrg = add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
3732 1.1 mrg
3733 1.1 mrg /* These builtins don't throw. */
3734 1.1 mrg TREE_NOTHROW (gcn_builtin_decls[i]) = 1;
3735 1.1 mrg }
3736 1.1 mrg
3737 1.1 mrg /* These builtins need to take/return an LDS pointer: override the generic
3738 1.1 mrg versions here. */
3739 1.1 mrg
3740 1.1 mrg set_builtin_decl (BUILT_IN_GOACC_SINGLE_START,
3741 1.1 mrg gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_START], false);
3742 1.1 mrg
3743 1.1 mrg set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_START,
3744 1.1 mrg gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_START],
3745 1.1 mrg false);
3746 1.1 mrg
3747 1.1 mrg set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_END,
3748 1.1 mrg gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_END],
3749 1.1 mrg false);
3750 1.1 mrg
3751 1.1 mrg set_builtin_decl (BUILT_IN_GOACC_BARRIER,
3752 1.1 mrg gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false);
3753 1.1 mrg }
3754 1.1 mrg
3755 1.1 mrg /* Implement TARGET_INIT_LIBFUNCS. */
3756 1.1 mrg
3757 1.1 mrg static void
3758 1.1 mrg gcn_init_libfuncs (void)
3759 1.1 mrg {
3760 1.1 mrg /* BITS_PER_UNIT * 2 is 64 bits, which causes
3761 1.1 mrg optabs-libfuncs.cc:gen_int_libfunc to omit TImode (i.e 128 bits)
3762 1.1 mrg libcalls that we need to support operations for that type. Initialise
3763 1.1 mrg them here instead. */
3764 1.1 mrg set_optab_libfunc (udiv_optab, TImode, "__udivti3");
3765 1.1 mrg set_optab_libfunc (umod_optab, TImode, "__umodti3");
3766 1.1 mrg set_optab_libfunc (sdiv_optab, TImode, "__divti3");
3767 1.1 mrg set_optab_libfunc (smod_optab, TImode, "__modti3");
3768 1.1 mrg set_optab_libfunc (smul_optab, TImode, "__multi3");
3769 1.1 mrg set_optab_libfunc (addv_optab, TImode, "__addvti3");
3770 1.1 mrg set_optab_libfunc (subv_optab, TImode, "__subvti3");
3771 1.1 mrg set_optab_libfunc (negv_optab, TImode, "__negvti2");
3772 1.1 mrg set_optab_libfunc (absv_optab, TImode, "__absvti2");
3773 1.1 mrg set_optab_libfunc (smulv_optab, TImode, "__mulvti3");
3774 1.1 mrg set_optab_libfunc (ffs_optab, TImode, "__ffsti2");
3775 1.1 mrg set_optab_libfunc (clz_optab, TImode, "__clzti2");
3776 1.1 mrg set_optab_libfunc (ctz_optab, TImode, "__ctzti2");
3777 1.1 mrg set_optab_libfunc (clrsb_optab, TImode, "__clrsbti2");
3778 1.1 mrg set_optab_libfunc (popcount_optab, TImode, "__popcountti2");
3779 1.1 mrg set_optab_libfunc (parity_optab, TImode, "__parityti2");
3780 1.1 mrg set_optab_libfunc (bswap_optab, TImode, "__bswapti2");
3781 1.1 mrg }
3782 1.1 mrg
3783 1.1 mrg /* Expand the CMP_SWAP GCN builtins. We have our own versions that do
3784 1.1 mrg not require taking the address of any object, other than the memory
3785 1.1 mrg cell being operated on.
3786 1.1 mrg
3787 1.1 mrg Helper function for gcn_expand_builtin_1. */
3788 1.1 mrg
3789 1.1 mrg static rtx
3790 1.1 mrg gcn_expand_cmp_swap (tree exp, rtx target)
3791 1.1 mrg {
3792 1.1 mrg machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
3793 1.1 mrg addr_space_t as
3794 1.1 mrg = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (CALL_EXPR_ARG (exp, 0))));
3795 1.1 mrg machine_mode as_mode = gcn_addr_space_address_mode (as);
3796 1.1 mrg
3797 1.1 mrg if (!target)
3798 1.1 mrg target = gen_reg_rtx (mode);
3799 1.1 mrg
3800 1.1 mrg rtx addr = expand_expr (CALL_EXPR_ARG (exp, 0),
3801 1.1 mrg NULL_RTX, as_mode, EXPAND_NORMAL);
3802 1.1 mrg rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1),
3803 1.1 mrg NULL_RTX, mode, EXPAND_NORMAL);
3804 1.1 mrg rtx src = expand_expr (CALL_EXPR_ARG (exp, 2),
3805 1.1 mrg NULL_RTX, mode, EXPAND_NORMAL);
3806 1.1 mrg rtx pat;
3807 1.1 mrg
3808 1.1 mrg rtx mem = gen_rtx_MEM (mode, force_reg (as_mode, addr));
3809 1.1 mrg set_mem_addr_space (mem, as);
3810 1.1 mrg
3811 1.1 mrg if (!REG_P (cmp))
3812 1.1 mrg cmp = copy_to_mode_reg (mode, cmp);
3813 1.1 mrg if (!REG_P (src))
3814 1.1 mrg src = copy_to_mode_reg (mode, src);
3815 1.1 mrg
3816 1.1 mrg if (mode == SImode)
3817 1.1 mrg pat = gen_sync_compare_and_swapsi (target, mem, cmp, src);
3818 1.1 mrg else
3819 1.1 mrg pat = gen_sync_compare_and_swapdi (target, mem, cmp, src);
3820 1.1 mrg
3821 1.1 mrg emit_insn (pat);
3822 1.1 mrg
3823 1.1 mrg return target;
3824 1.1 mrg }
3825 1.1 mrg
3826 1.1 mrg /* Expand many different builtins.
3827 1.1 mrg
3828 1.1 mrg Intended for use in gcn-builtins.def. */
3829 1.1 mrg
3830 1.1 mrg static rtx
3831 1.1 mrg gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
3832 1.1 mrg machine_mode /*mode */ , int ignore,
3833 1.1 mrg struct gcn_builtin_description *)
3834 1.1 mrg {
3835 1.1 mrg tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3836 1.1 mrg switch (DECL_MD_FUNCTION_CODE (fndecl))
3837 1.1 mrg {
3838 1.1 mrg case GCN_BUILTIN_FLAT_LOAD_INT32:
3839 1.1 mrg {
3840 1.1 mrg if (ignore)
3841 1.1 mrg return target;
3842 1.1 mrg /*rtx exec = */
3843 1.1 mrg force_reg (DImode,
3844 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
3845 1.1 mrg EXPAND_NORMAL));
3846 1.1 mrg /*rtx ptr = */
3847 1.1 mrg force_reg (V64DImode,
3848 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
3849 1.1 mrg EXPAND_NORMAL));
3850 1.1 mrg /*emit_insn (gen_vector_flat_loadv64si
3851 1.1 mrg (target, gcn_gen_undef (V64SImode), ptr, exec)); */
3852 1.1 mrg return target;
3853 1.1 mrg }
3854 1.1 mrg case GCN_BUILTIN_FLAT_LOAD_PTR_INT32:
3855 1.1 mrg case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT:
3856 1.1 mrg {
3857 1.1 mrg if (ignore)
3858 1.1 mrg return target;
3859 1.1 mrg rtx exec = force_reg (DImode,
3860 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3861 1.1 mrg DImode,
3862 1.1 mrg EXPAND_NORMAL));
3863 1.1 mrg rtx ptr = force_reg (DImode,
3864 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3865 1.1 mrg V64DImode,
3866 1.1 mrg EXPAND_NORMAL));
3867 1.1 mrg rtx offsets = force_reg (V64SImode,
3868 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 2),
3869 1.1 mrg NULL_RTX, V64DImode,
3870 1.1 mrg EXPAND_NORMAL));
3871 1.1 mrg rtx addrs = gen_reg_rtx (V64DImode);
3872 1.1 mrg rtx tmp = gen_reg_rtx (V64SImode);
3873 1.1 mrg emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3874 1.1 mrg GEN_INT (2),
3875 1.1 mrg gcn_gen_undef (V64SImode), exec));
3876 1.1 mrg emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3877 1.1 mrg gcn_gen_undef (V64DImode),
3878 1.1 mrg exec));
3879 1.1 mrg rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
3880 1.1 mrg /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3881 1.1 mrg /* FIXME: set attributes. */
3882 1.1 mrg emit_insn (gen_mov_with_exec (target, mem, exec));
3883 1.1 mrg return target;
3884 1.1 mrg }
3885 1.1 mrg case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
3886 1.1 mrg case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT:
3887 1.1 mrg {
3888 1.1 mrg rtx exec = force_reg (DImode,
3889 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3890 1.1 mrg DImode,
3891 1.1 mrg EXPAND_NORMAL));
3892 1.1 mrg rtx ptr = force_reg (DImode,
3893 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3894 1.1 mrg V64DImode,
3895 1.1 mrg EXPAND_NORMAL));
3896 1.1 mrg rtx offsets = force_reg (V64SImode,
3897 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 2),
3898 1.1 mrg NULL_RTX, V64DImode,
3899 1.1 mrg EXPAND_NORMAL));
3900 1.1 mrg machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
3901 1.1 mrg 3)));
3902 1.1 mrg rtx val = force_reg (vmode,
3903 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
3904 1.1 mrg vmode,
3905 1.1 mrg EXPAND_NORMAL));
3906 1.1 mrg rtx addrs = gen_reg_rtx (V64DImode);
3907 1.1 mrg rtx tmp = gen_reg_rtx (V64SImode);
3908 1.1 mrg emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3909 1.1 mrg GEN_INT (2),
3910 1.1 mrg gcn_gen_undef (V64SImode), exec));
3911 1.1 mrg emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3912 1.1 mrg gcn_gen_undef (V64DImode),
3913 1.1 mrg exec));
3914 1.1 mrg rtx mem = gen_rtx_MEM (vmode, addrs);
3915 1.1 mrg /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3916 1.1 mrg /* FIXME: set attributes. */
3917 1.1 mrg emit_insn (gen_mov_with_exec (mem, val, exec));
3918 1.1 mrg return target;
3919 1.1 mrg }
3920 1.1 mrg case GCN_BUILTIN_SQRTVF:
3921 1.1 mrg {
3922 1.1 mrg if (ignore)
3923 1.1 mrg return target;
3924 1.1 mrg rtx exec = gcn_full_exec_reg ();
3925 1.1 mrg rtx arg = force_reg (V64SFmode,
3926 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3927 1.1 mrg V64SFmode,
3928 1.1 mrg EXPAND_NORMAL));
3929 1.1 mrg emit_insn (gen_sqrtv64sf2_exec
3930 1.1 mrg (target, arg, gcn_gen_undef (V64SFmode), exec));
3931 1.1 mrg return target;
3932 1.1 mrg }
3933 1.1 mrg case GCN_BUILTIN_SQRTF:
3934 1.1 mrg {
3935 1.1 mrg if (ignore)
3936 1.1 mrg return target;
3937 1.1 mrg rtx arg = force_reg (SFmode,
3938 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3939 1.1 mrg SFmode,
3940 1.1 mrg EXPAND_NORMAL));
3941 1.1 mrg emit_insn (gen_sqrtsf2 (target, arg));
3942 1.1 mrg return target;
3943 1.1 mrg }
3944 1.1 mrg case GCN_BUILTIN_OMP_DIM_SIZE:
3945 1.1 mrg {
3946 1.1 mrg if (ignore)
3947 1.1 mrg return target;
3948 1.1 mrg emit_insn (gen_oacc_dim_size (target,
3949 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0),
3950 1.1 mrg NULL_RTX, SImode,
3951 1.1 mrg EXPAND_NORMAL)));
3952 1.1 mrg return target;
3953 1.1 mrg }
3954 1.1 mrg case GCN_BUILTIN_OMP_DIM_POS:
3955 1.1 mrg {
3956 1.1 mrg if (ignore)
3957 1.1 mrg return target;
3958 1.1 mrg emit_insn (gen_oacc_dim_pos (target,
3959 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0),
3960 1.1 mrg NULL_RTX, SImode,
3961 1.1 mrg EXPAND_NORMAL)));
3962 1.1 mrg return target;
3963 1.1 mrg }
3964 1.1 mrg case GCN_BUILTIN_CMP_SWAP:
3965 1.1 mrg case GCN_BUILTIN_CMP_SWAPLL:
3966 1.1 mrg return gcn_expand_cmp_swap (exp, target);
3967 1.1 mrg
3968 1.1 mrg case GCN_BUILTIN_ACC_SINGLE_START:
3969 1.1 mrg {
3970 1.1 mrg if (ignore)
3971 1.1 mrg return target;
3972 1.1 mrg
3973 1.1 mrg rtx wavefront = gcn_oacc_dim_pos (1);
3974 1.1 mrg rtx cond = gen_rtx_EQ (VOIDmode, wavefront, const0_rtx);
3975 1.1 mrg rtx cc = (target && REG_P (target)) ? target : gen_reg_rtx (BImode);
3976 1.1 mrg emit_insn (gen_cstoresi4 (cc, cond, wavefront, const0_rtx));
3977 1.1 mrg return cc;
3978 1.1 mrg }
3979 1.1 mrg
3980 1.1 mrg case GCN_BUILTIN_ACC_SINGLE_COPY_START:
3981 1.1 mrg {
3982 1.1 mrg rtx blk = force_reg (SImode,
3983 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3984 1.1 mrg SImode, EXPAND_NORMAL));
3985 1.1 mrg rtx wavefront = gcn_oacc_dim_pos (1);
3986 1.1 mrg rtx cond = gen_rtx_NE (VOIDmode, wavefront, const0_rtx);
3987 1.1 mrg rtx not_zero = gen_label_rtx ();
3988 1.1 mrg emit_insn (gen_cbranchsi4 (cond, wavefront, const0_rtx, not_zero));
3989 1.1 mrg emit_move_insn (blk, const0_rtx);
3990 1.1 mrg emit_label (not_zero);
3991 1.1 mrg return blk;
3992 1.1 mrg }
3993 1.1 mrg
3994 1.1 mrg case GCN_BUILTIN_ACC_SINGLE_COPY_END:
3995 1.1 mrg return target;
3996 1.1 mrg
3997 1.1 mrg case GCN_BUILTIN_ACC_BARRIER:
3998 1.1 mrg emit_insn (gen_gcn_wavefront_barrier ());
3999 1.1 mrg return target;
4000 1.1 mrg
4001 1.1 mrg default:
4002 1.1 mrg gcc_unreachable ();
4003 1.1 mrg }
4004 1.1 mrg }
4005 1.1 mrg
4006 1.1 mrg /* Expansion of simple arithmetic and bit binary operation builtins.
4007 1.1 mrg
4008 1.1 mrg Intended for use with gcn_builtins table. */
4009 1.1 mrg
4010 1.1 mrg static rtx
4011 1.1 mrg gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget */ ,
4012 1.1 mrg machine_mode /*mode */ , int ignore,
4013 1.1 mrg struct gcn_builtin_description *d)
4014 1.1 mrg {
4015 1.1 mrg int icode = d->icode;
4016 1.1 mrg if (ignore)
4017 1.1 mrg return target;
4018 1.1 mrg
4019 1.1 mrg rtx exec = force_reg (DImode,
4020 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
4021 1.1 mrg EXPAND_NORMAL));
4022 1.1 mrg
4023 1.1 mrg machine_mode m1 = insn_data[icode].operand[1].mode;
4024 1.1 mrg rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1,
4025 1.1 mrg EXPAND_NORMAL);
4026 1.1 mrg if (!insn_data[icode].operand[1].predicate (arg1, m1))
4027 1.1 mrg arg1 = force_reg (m1, arg1);
4028 1.1 mrg
4029 1.1 mrg machine_mode m2 = insn_data[icode].operand[2].mode;
4030 1.1 mrg rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2,
4031 1.1 mrg EXPAND_NORMAL);
4032 1.1 mrg if (!insn_data[icode].operand[2].predicate (arg2, m2))
4033 1.1 mrg arg2 = force_reg (m2, arg2);
4034 1.1 mrg
4035 1.1 mrg rtx arg_prev;
4036 1.1 mrg if (call_expr_nargs (exp) == 4)
4037 1.1 mrg {
4038 1.1 mrg machine_mode m_prev = insn_data[icode].operand[4].mode;
4039 1.1 mrg arg_prev = force_reg (m_prev,
4040 1.1 mrg expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
4041 1.1 mrg m_prev, EXPAND_NORMAL));
4042 1.1 mrg }
4043 1.1 mrg else
4044 1.1 mrg arg_prev = gcn_gen_undef (GET_MODE (target));
4045 1.1 mrg
4046 1.1 mrg rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev);
4047 1.1 mrg emit_insn (pat);
4048 1.1 mrg return target;
4049 1.1 mrg }
4050 1.1 mrg
4051 1.1 mrg /* Implement TARGET_EXPAND_BUILTIN.
4052 1.1 mrg
4053 1.1 mrg Expand an expression EXP that calls a built-in function, with result going
4054 1.1 mrg to TARGET if that's convenient (and in mode MODE if that's convenient).
4055 1.1 mrg SUBTARGET may be used as the target for computing one of EXP's operands.
4056 1.1 mrg IGNORE is nonzero if the value is to be ignored. */
4057 1.1 mrg
4058 1.1 mrg rtx
4059 1.1 mrg gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
4060 1.1 mrg int ignore)
4061 1.1 mrg {
4062 1.1 mrg tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
4063 1.1 mrg unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
4064 1.1 mrg struct gcn_builtin_description *d;
4065 1.1 mrg
4066 1.1 mrg gcc_assert (fcode < GCN_BUILTIN_MAX);
4067 1.1 mrg d = &gcn_builtins[fcode];
4068 1.1 mrg
4069 1.1 mrg if (d->type == B_UNIMPLEMENTED)
4070 1.1 mrg sorry ("Builtin not implemented");
4071 1.1 mrg
4072 1.1 mrg return d->expander (exp, target, subtarget, mode, ignore, d);
4073 1.1 mrg }
4074 1.1 mrg
4075 1.1 mrg /* }}} */
4076 1.1 mrg /* {{{ Vectorization. */
4077 1.1 mrg
4078 1.1 mrg /* Implement TARGET_VECTORIZE_GET_MASK_MODE.
4079 1.1 mrg
4080 1.1 mrg A vector mask is a value that holds one boolean result for every element in
4081 1.1 mrg a vector. */
4082 1.1 mrg
4083 1.1 mrg opt_machine_mode
4084 1.1 mrg gcn_vectorize_get_mask_mode (machine_mode)
4085 1.1 mrg {
4086 1.1 mrg /* GCN uses a DImode bit-mask. */
4087 1.1 mrg return DImode;
4088 1.1 mrg }
4089 1.1 mrg
4090 1.1 mrg /* Return an RTX that references a vector with the i-th lane containing
4091 1.1 mrg PERM[i]*4.
4092 1.1 mrg
4093 1.1 mrg Helper function for gcn_vectorize_vec_perm_const. */
4094 1.1 mrg
4095 1.1 mrg static rtx
4096 1.1 mrg gcn_make_vec_perm_address (unsigned int *perm)
4097 1.1 mrg {
4098 1.1 mrg rtx x = gen_reg_rtx (V64SImode);
4099 1.1 mrg emit_move_insn (x, gcn_vec_constant (V64SImode, 0));
4100 1.1 mrg
4101 1.1 mrg /* Permutation addresses use byte addressing. With each vector lane being
4102 1.1 mrg 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
4103 1.1 mrg so only set those.
4104 1.1 mrg
4105 1.1 mrg The permutation given to the vec_perm* patterns range from 0 to 2N-1 to
4106 1.1 mrg select between lanes in two vectors, but as the DS_BPERMUTE* instructions
4107 1.1 mrg only take one source vector, the most-significant bit can be ignored
4108 1.1 mrg here. Instead, we can use EXEC masking to select the relevant part of
4109 1.1 mrg each source vector after they are permuted separately. */
4110 1.1 mrg uint64_t bit_mask = 1 << 2;
4111 1.1 mrg for (int i = 2; i < 8; i++, bit_mask <<= 1)
4112 1.1 mrg {
4113 1.1 mrg uint64_t exec_mask = 0;
4114 1.1 mrg uint64_t lane_mask = 1;
4115 1.1 mrg for (int j = 0; j < 64; j++, lane_mask <<= 1)
4116 1.1 mrg if ((perm[j] * 4) & bit_mask)
4117 1.1 mrg exec_mask |= lane_mask;
4118 1.1 mrg
4119 1.1 mrg if (exec_mask)
4120 1.1 mrg emit_insn (gen_addv64si3_exec (x, x,
4121 1.1 mrg gcn_vec_constant (V64SImode,
4122 1.1 mrg bit_mask),
4123 1.1 mrg x, get_exec (exec_mask)));
4124 1.1 mrg }
4125 1.1 mrg
4126 1.1 mrg return x;
4127 1.1 mrg }
4128 1.1 mrg
4129 1.1 mrg /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.
4130 1.1 mrg
4131 1.1 mrg Return true if permutation with SEL is possible.
4132 1.1 mrg
4133 1.1 mrg If DST/SRC0/SRC1 are non-null, emit the instructions to perform the
4134 1.1 mrg permutations. */
4135 1.1 mrg
4136 1.1 mrg static bool
4137 1.1 mrg gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst,
4138 1.1 mrg rtx src0, rtx src1,
4139 1.1 mrg const vec_perm_indices & sel)
4140 1.1 mrg {
4141 1.1 mrg unsigned int nelt = GET_MODE_NUNITS (vmode);
4142 1.1 mrg
4143 1.1 mrg gcc_assert (VECTOR_MODE_P (vmode));
4144 1.1 mrg gcc_assert (nelt <= 64);
4145 1.1 mrg gcc_assert (sel.length () == nelt);
4146 1.1 mrg
4147 1.1 mrg if (!dst)
4148 1.1 mrg {
4149 1.1 mrg /* All vector permutations are possible on this architecture,
4150 1.1 mrg with varying degrees of efficiency depending on the permutation. */
4151 1.1 mrg return true;
4152 1.1 mrg }
4153 1.1 mrg
4154 1.1 mrg unsigned int perm[64];
4155 1.1 mrg for (unsigned int i = 0; i < nelt; ++i)
4156 1.1 mrg perm[i] = sel[i] & (2 * nelt - 1);
4157 1.1 mrg for (unsigned int i = nelt; i < 64; ++i)
4158 1.1 mrg perm[i] = 0;
4159 1.1 mrg
4160 1.1 mrg src0 = force_reg (vmode, src0);
4161 1.1 mrg src1 = force_reg (vmode, src1);
4162 1.1 mrg
4163 1.1 mrg /* Make life a bit easier by swapping operands if necessary so that
4164 1.1 mrg the first element always comes from src0. */
4165 1.1 mrg if (perm[0] >= nelt)
4166 1.1 mrg {
4167 1.1 mrg std::swap (src0, src1);
4168 1.1 mrg
4169 1.1 mrg for (unsigned int i = 0; i < nelt; ++i)
4170 1.1 mrg if (perm[i] < nelt)
4171 1.1 mrg perm[i] += nelt;
4172 1.1 mrg else
4173 1.1 mrg perm[i] -= nelt;
4174 1.1 mrg }
4175 1.1 mrg
4176 1.1 mrg /* TODO: There are more efficient ways to implement certain permutations
4177 1.1 mrg using ds_swizzle_b32 and/or DPP. Test for and expand them here, before
4178 1.1 mrg this more inefficient generic approach is used. */
4179 1.1 mrg
4180 1.1 mrg int64_t src1_lanes = 0;
4181 1.1 mrg int64_t lane_bit = 1;
4182 1.1 mrg
4183 1.1 mrg for (unsigned int i = 0; i < nelt; ++i, lane_bit <<= 1)
4184 1.1 mrg {
4185 1.1 mrg /* Set the bits for lanes from src1. */
4186 1.1 mrg if (perm[i] >= nelt)
4187 1.1 mrg src1_lanes |= lane_bit;
4188 1.1 mrg }
4189 1.1 mrg
4190 1.1 mrg rtx addr = gcn_make_vec_perm_address (perm);
4191 1.1 mrg rtx (*ds_bpermute) (rtx, rtx, rtx, rtx);
4192 1.1 mrg
4193 1.1 mrg switch (vmode)
4194 1.1 mrg {
4195 1.1 mrg case E_V64QImode:
4196 1.1 mrg ds_bpermute = gen_ds_bpermutev64qi;
4197 1.1 mrg break;
4198 1.1 mrg case E_V64HImode:
4199 1.1 mrg ds_bpermute = gen_ds_bpermutev64hi;
4200 1.1 mrg break;
4201 1.1 mrg case E_V64SImode:
4202 1.1 mrg ds_bpermute = gen_ds_bpermutev64si;
4203 1.1 mrg break;
4204 1.1 mrg case E_V64HFmode:
4205 1.1 mrg ds_bpermute = gen_ds_bpermutev64hf;
4206 1.1 mrg break;
4207 1.1 mrg case E_V64SFmode:
4208 1.1 mrg ds_bpermute = gen_ds_bpermutev64sf;
4209 1.1 mrg break;
4210 1.1 mrg case E_V64DImode:
4211 1.1 mrg ds_bpermute = gen_ds_bpermutev64di;
4212 1.1 mrg break;
4213 1.1 mrg case E_V64DFmode:
4214 1.1 mrg ds_bpermute = gen_ds_bpermutev64df;
4215 1.1 mrg break;
4216 1.1 mrg default:
4217 1.1 mrg gcc_assert (false);
4218 1.1 mrg }
4219 1.1 mrg
4220 1.1 mrg /* Load elements from src0 to dst. */
4221 1.1 mrg gcc_assert (~src1_lanes);
4222 1.1 mrg emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ()));
4223 1.1 mrg
4224 1.1 mrg /* Load elements from src1 to dst. */
4225 1.1 mrg if (src1_lanes)
4226 1.1 mrg {
4227 1.1 mrg /* Masking a lane masks both the destination and source lanes for
4228 1.1 mrg DS_BPERMUTE, so we need to have all lanes enabled for the permute,
4229 1.1 mrg then add an extra masked move to merge the results of permuting
4230 1.1 mrg the two source vectors together.
4231 1.1 mrg */
4232 1.1 mrg rtx tmp = gen_reg_rtx (vmode);
4233 1.1 mrg emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ()));
4234 1.1 mrg emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes)));
4235 1.1 mrg }
4236 1.1 mrg
4237 1.1 mrg return true;
4238 1.1 mrg }
4239 1.1 mrg
4240 1.1 mrg /* Implements TARGET_VECTOR_MODE_SUPPORTED_P.
4241 1.1 mrg
4242 1.1 mrg Return nonzero if vector MODE is supported with at least move
4243 1.1 mrg instructions. */
4244 1.1 mrg
4245 1.1 mrg static bool
4246 1.1 mrg gcn_vector_mode_supported_p (machine_mode mode)
4247 1.1 mrg {
4248 1.1 mrg return (mode == V64QImode || mode == V64HImode
4249 1.1 mrg || mode == V64SImode || mode == V64DImode
4250 1.1 mrg || mode == V64SFmode || mode == V64DFmode);
4251 1.1 mrg }
4252 1.1 mrg
4253 1.1 mrg /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
4254 1.1 mrg
4255 1.1 mrg Enables autovectorization for all supported modes. */
4256 1.1 mrg
4257 1.1 mrg static machine_mode
4258 1.1 mrg gcn_vectorize_preferred_simd_mode (scalar_mode mode)
4259 1.1 mrg {
4260 1.1 mrg switch (mode)
4261 1.1 mrg {
4262 1.1 mrg case E_QImode:
4263 1.1 mrg return V64QImode;
4264 1.1 mrg case E_HImode:
4265 1.1 mrg return V64HImode;
4266 1.1 mrg case E_SImode:
4267 1.1 mrg return V64SImode;
4268 1.1 mrg case E_DImode:
4269 1.1 mrg return V64DImode;
4270 1.1 mrg case E_SFmode:
4271 1.1 mrg return V64SFmode;
4272 1.1 mrg case E_DFmode:
4273 1.1 mrg return V64DFmode;
4274 1.1 mrg default:
4275 1.1 mrg return word_mode;
4276 1.1 mrg }
4277 1.1 mrg }
4278 1.1 mrg
4279 1.1 mrg /* Implement TARGET_VECTORIZE_RELATED_MODE.
4280 1.1 mrg
4281 1.1 mrg All GCN vectors are 64-lane, so this is simpler than other architectures.
4282 1.1 mrg In particular, we do *not* want to match vector bit-size. */
4283 1.1 mrg
4284 1.1 mrg static opt_machine_mode
4285 1.1 mrg gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode),
4286 1.1 mrg scalar_mode element_mode, poly_uint64 nunits)
4287 1.1 mrg {
4288 1.1 mrg if (known_ne (nunits, 0U) && known_ne (nunits, 64U))
4289 1.1 mrg return VOIDmode;
4290 1.1 mrg
4291 1.1 mrg machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode);
4292 1.1 mrg if (!VECTOR_MODE_P (pref_mode))
4293 1.1 mrg return VOIDmode;
4294 1.1 mrg
4295 1.1 mrg return pref_mode;
4296 1.1 mrg }
4297 1.1 mrg
4298 1.1 mrg /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
4299 1.1 mrg
4300 1.1 mrg Returns the preferred alignment in bits for accesses to vectors of type type
4301 1.1 mrg in vectorized code. This might be less than or greater than the ABI-defined
4302 1.1 mrg value returned by TARGET_VECTOR_ALIGNMENT. It can be equal to the alignment
4303 1.1 mrg of a single element, in which case the vectorizer will not try to optimize
4304 1.1 mrg for alignment. */
4305 1.1 mrg
4306 1.1 mrg static poly_uint64
4307 1.1 mrg gcn_preferred_vector_alignment (const_tree type)
4308 1.1 mrg {
4309 1.1 mrg return TYPE_ALIGN (TREE_TYPE (type));
4310 1.1 mrg }
4311 1.1 mrg
4312 1.1 mrg /* Implement TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT.
4313 1.1 mrg
4314 1.1 mrg Return true if the target supports misaligned vector store/load of a
4315 1.1 mrg specific factor denoted in the misalignment parameter. */
4316 1.1 mrg
4317 1.1 mrg static bool
4318 1.1 mrg gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode),
4319 1.1 mrg const_tree type, int misalignment,
4320 1.1 mrg bool is_packed)
4321 1.1 mrg {
4322 1.1 mrg if (is_packed)
4323 1.1 mrg return false;
4324 1.1 mrg
4325 1.1 mrg /* If the misalignment is unknown, we should be able to handle the access
4326 1.1 mrg so long as it is not to a member of a packed data structure. */
4327 1.1 mrg if (misalignment == -1)
4328 1.1 mrg return true;
4329 1.1 mrg
4330 1.1 mrg /* Return true if the misalignment is a multiple of the natural alignment
4331 1.1 mrg of the vector's element type. This is probably always going to be
4332 1.1 mrg true in practice, since we've already established that this isn't a
4333 1.1 mrg packed access. */
4334 1.1 mrg return misalignment % TYPE_ALIGN_UNIT (type) == 0;
4335 1.1 mrg }
4336 1.1 mrg
4337 1.1 mrg /* Implement TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.
4338 1.1 mrg
4339 1.1 mrg Return true if vector alignment is reachable (by peeling N iterations) for
4340 1.1 mrg the given scalar type TYPE. */
4341 1.1 mrg
4342 1.1 mrg static bool
4343 1.1 mrg gcn_vector_alignment_reachable (const_tree ARG_UNUSED (type), bool is_packed)
4344 1.1 mrg {
4345 1.1 mrg /* Vectors which aren't in packed structures will not be less aligned than
4346 1.1 mrg the natural alignment of their element type, so this is safe. */
4347 1.1 mrg return !is_packed;
4348 1.1 mrg }
4349 1.1 mrg
4350 1.1 mrg /* Generate DPP instructions used for vector reductions.
4351 1.1 mrg
4352 1.1 mrg The opcode is given by INSN.
4353 1.1 mrg The first operand of the operation is shifted right by SHIFT vector lanes.
4354 1.1 mrg SHIFT must be a power of 2. If SHIFT is 16, the 15th lane of each row is
4355 1.1 mrg broadcast the next row (thereby acting like a shift of 16 for the end of
4356 1.1 mrg each row). If SHIFT is 32, lane 31 is broadcast to all the
4357 1.1 mrg following lanes (thereby acting like a shift of 32 for lane 63). */
4358 1.1 mrg
4359 1.1 mrg char *
4360 1.1 mrg gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
4361 1.1 mrg int unspec, int shift)
4362 1.1 mrg {
4363 1.1 mrg static char buf[128];
4364 1.1 mrg const char *dpp;
4365 1.1 mrg const char *vcc_in = "";
4366 1.1 mrg const char *vcc_out = "";
4367 1.1 mrg
4368 1.1 mrg /* Add the vcc operand if needed. */
4369 1.1 mrg if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
4370 1.1 mrg {
4371 1.1 mrg if (unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4372 1.1 mrg vcc_in = ", vcc";
4373 1.1 mrg
4374 1.1 mrg if (unspec == UNSPEC_PLUS_CARRY_DPP_SHR
4375 1.1 mrg || unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4376 1.1 mrg vcc_out = ", vcc";
4377 1.1 mrg }
4378 1.1 mrg
4379 1.1 mrg /* Add the DPP modifiers. */
4380 1.1 mrg switch (shift)
4381 1.1 mrg {
4382 1.1 mrg case 1:
4383 1.1 mrg dpp = "row_shr:1 bound_ctrl:0";
4384 1.1 mrg break;
4385 1.1 mrg case 2:
4386 1.1 mrg dpp = "row_shr:2 bound_ctrl:0";
4387 1.1 mrg break;
4388 1.1 mrg case 4:
4389 1.1 mrg dpp = "row_shr:4 bank_mask:0xe";
4390 1.1 mrg break;
4391 1.1 mrg case 8:
4392 1.1 mrg dpp = "row_shr:8 bank_mask:0xc";
4393 1.1 mrg break;
4394 1.1 mrg case 16:
4395 1.1 mrg dpp = "row_bcast:15 row_mask:0xa";
4396 1.1 mrg break;
4397 1.1 mrg case 32:
4398 1.1 mrg dpp = "row_bcast:31 row_mask:0xc";
4399 1.1 mrg break;
4400 1.1 mrg default:
4401 1.1 mrg gcc_unreachable ();
4402 1.1 mrg }
4403 1.1 mrg
4404 1.1 mrg if (unspec == UNSPEC_MOV_DPP_SHR && vgpr_2reg_mode_p (mode))
4405 1.1 mrg sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
4406 1.1 mrg insn, dpp, insn, dpp);
4407 1.1 mrg else if (unspec == UNSPEC_MOV_DPP_SHR)
4408 1.1 mrg sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp);
4409 1.1 mrg else
4410 1.1 mrg sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp);
4411 1.1 mrg
4412 1.1 mrg return buf;
4413 1.1 mrg }
4414 1.1 mrg
4415 1.1 mrg /* Generate vector reductions in terms of DPP instructions.
4416 1.1 mrg
4417 1.1 mrg The vector register SRC of mode MODE is reduced using the operation given
4418 1.1 mrg by UNSPEC, and the scalar result is returned in lane 63 of a vector
4419 1.1 mrg register. */
4420 1.1 mrg
4421 1.1 mrg rtx
4422 1.1 mrg gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
4423 1.1 mrg {
4424 1.1 mrg machine_mode orig_mode = mode;
4425 1.1 mrg bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR
4426 1.1 mrg || unspec == UNSPEC_SMAX_DPP_SHR
4427 1.1 mrg || unspec == UNSPEC_UMIN_DPP_SHR
4428 1.1 mrg || unspec == UNSPEC_UMAX_DPP_SHR)
4429 1.1 mrg && (mode == V64DImode
4430 1.1 mrg || mode == V64DFmode))
4431 1.1 mrg || (unspec == UNSPEC_PLUS_DPP_SHR
4432 1.1 mrg && mode == V64DFmode));
4433 1.1 mrg rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
4434 1.1 mrg : unspec == UNSPEC_SMAX_DPP_SHR ? SMAX
4435 1.1 mrg : unspec == UNSPEC_UMIN_DPP_SHR ? UMIN
4436 1.1 mrg : unspec == UNSPEC_UMAX_DPP_SHR ? UMAX
4437 1.1 mrg : unspec == UNSPEC_PLUS_DPP_SHR ? PLUS
4438 1.1 mrg : UNKNOWN);
4439 1.1 mrg bool use_extends = ((unspec == UNSPEC_SMIN_DPP_SHR
4440 1.1 mrg || unspec == UNSPEC_SMAX_DPP_SHR
4441 1.1 mrg || unspec == UNSPEC_UMIN_DPP_SHR
4442 1.1 mrg || unspec == UNSPEC_UMAX_DPP_SHR)
4443 1.1 mrg && (mode == V64QImode
4444 1.1 mrg || mode == V64HImode));
4445 1.1 mrg bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR
4446 1.1 mrg || unspec == UNSPEC_UMAX_DPP_SHR);
4447 1.1 mrg bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
4448 1.1 mrg && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4449 1.1 mrg && (TARGET_GCN3 || mode == V64DImode);
4450 1.1 mrg
4451 1.1 mrg if (use_plus_carry)
4452 1.1 mrg unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
4453 1.1 mrg
4454 1.1 mrg if (use_extends)
4455 1.1 mrg {
4456 1.1 mrg rtx tmp = gen_reg_rtx (V64SImode);
4457 1.1 mrg convert_move (tmp, src, unsignedp);
4458 1.1 mrg src = tmp;
4459 1.1 mrg mode = V64SImode;
4460 1.1 mrg }
4461 1.1 mrg
4462 1.1 mrg /* Perform reduction by first performing the reduction operation on every
4463 1.1 mrg pair of lanes, then on every pair of results from the previous
4464 1.1 mrg iteration (thereby effectively reducing every 4 lanes) and so on until
4465 1.1 mrg all lanes are reduced. */
4466 1.1 mrg rtx in, out = force_reg (mode, src);
4467 1.1 mrg for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
4468 1.1 mrg {
4469 1.1 mrg rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
4470 1.1 mrg in = out;
4471 1.1 mrg out = gen_reg_rtx (mode);
4472 1.1 mrg
4473 1.1 mrg if (use_moves)
4474 1.1 mrg {
4475 1.1 mrg rtx tmp = gen_reg_rtx (mode);
4476 1.1 mrg emit_insn (gen_dpp_move (mode, tmp, in, shift_val));
4477 1.1 mrg emit_insn (gen_rtx_SET (out, gen_rtx_fmt_ee (code, mode, tmp, in)));
4478 1.1 mrg }
4479 1.1 mrg else
4480 1.1 mrg {
4481 1.1 mrg rtx insn = gen_rtx_SET (out,
4482 1.1 mrg gen_rtx_UNSPEC (mode,
4483 1.1 mrg gen_rtvec (3, in, in,
4484 1.1 mrg shift_val),
4485 1.1 mrg unspec));
4486 1.1 mrg
4487 1.1 mrg /* Add clobber for instructions that set the carry flags. */
4488 1.1 mrg if (use_plus_carry)
4489 1.1 mrg {
4490 1.1 mrg rtx clobber = gen_rtx_CLOBBER (VOIDmode,
4491 1.1 mrg gen_rtx_REG (DImode, VCC_REG));
4492 1.1 mrg insn = gen_rtx_PARALLEL (VOIDmode,
4493 1.1 mrg gen_rtvec (2, insn, clobber));
4494 1.1 mrg }
4495 1.1 mrg
4496 1.1 mrg emit_insn (insn);
4497 1.1 mrg }
4498 1.1 mrg }
4499 1.1 mrg
4500 1.1 mrg if (use_extends)
4501 1.1 mrg {
4502 1.1 mrg rtx tmp = gen_reg_rtx (orig_mode);
4503 1.1 mrg convert_move (tmp, out, unsignedp);
4504 1.1 mrg out = tmp;
4505 1.1 mrg }
4506 1.1 mrg
4507 1.1 mrg return out;
4508 1.1 mrg }
4509 1.1 mrg
4510 1.1 mrg /* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST. */
4511 1.1 mrg
4512 1.1 mrg int
4513 1.1 mrg gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
4514 1.1 mrg tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
4515 1.1 mrg {
4516 1.1 mrg /* Always vectorize. */
4517 1.1 mrg return 1;
4518 1.1 mrg }
4519 1.1 mrg
4520 1.1 mrg /* }}} */
4521 1.1 mrg /* {{{ md_reorg pass. */
4522 1.1 mrg
4523 1.1 mrg /* Identify VMEM instructions from their "type" attribute. */
4524 1.1 mrg
4525 1.1 mrg static bool
4526 1.1 mrg gcn_vmem_insn_p (attr_type type)
4527 1.1 mrg {
4528 1.1 mrg switch (type)
4529 1.1 mrg {
4530 1.1 mrg case TYPE_MUBUF:
4531 1.1 mrg case TYPE_MTBUF:
4532 1.1 mrg case TYPE_FLAT:
4533 1.1 mrg return true;
4534 1.1 mrg case TYPE_UNKNOWN:
4535 1.1 mrg case TYPE_SOP1:
4536 1.1 mrg case TYPE_SOP2:
4537 1.1 mrg case TYPE_SOPK:
4538 1.1 mrg case TYPE_SOPC:
4539 1.1 mrg case TYPE_SOPP:
4540 1.1 mrg case TYPE_SMEM:
4541 1.1 mrg case TYPE_DS:
4542 1.1 mrg case TYPE_VOP2:
4543 1.1 mrg case TYPE_VOP1:
4544 1.1 mrg case TYPE_VOPC:
4545 1.1 mrg case TYPE_VOP3A:
4546 1.1 mrg case TYPE_VOP3B:
4547 1.1 mrg case TYPE_VOP_SDWA:
4548 1.1 mrg case TYPE_VOP_DPP:
4549 1.1 mrg case TYPE_MULT:
4550 1.1 mrg case TYPE_VMULT:
4551 1.1 mrg return false;
4552 1.1 mrg }
4553 1.1 mrg gcc_unreachable ();
4554 1.1 mrg return false;
4555 1.1 mrg }
4556 1.1 mrg
4557 1.1 mrg /* If INSN sets the EXEC register to a constant value, return the value,
4558 1.1 mrg otherwise return zero. */
4559 1.1 mrg
4560 1.1 mrg static int64_t
4561 1.1 mrg gcn_insn_exec_value (rtx_insn *insn)
4562 1.1 mrg {
4563 1.1 mrg if (!NONDEBUG_INSN_P (insn))
4564 1.1 mrg return 0;
4565 1.1 mrg
4566 1.1 mrg rtx pattern = PATTERN (insn);
4567 1.1 mrg
4568 1.1 mrg if (GET_CODE (pattern) == SET)
4569 1.1 mrg {
4570 1.1 mrg rtx dest = XEXP (pattern, 0);
4571 1.1 mrg rtx src = XEXP (pattern, 1);
4572 1.1 mrg
4573 1.1 mrg if (GET_MODE (dest) == DImode
4574 1.1 mrg && REG_P (dest) && REGNO (dest) == EXEC_REG
4575 1.1 mrg && CONST_INT_P (src))
4576 1.1 mrg return INTVAL (src);
4577 1.1 mrg }
4578 1.1 mrg
4579 1.1 mrg return 0;
4580 1.1 mrg }
4581 1.1 mrg
4582 1.1 mrg /* Sets the EXEC register before INSN to the value that it had after
4583 1.1 mrg LAST_EXEC_DEF. The constant value of the EXEC register is returned if
4584 1.1 mrg known, otherwise it returns zero. */
4585 1.1 mrg
4586 1.1 mrg static int64_t
4587 1.1 mrg gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
4588 1.1 mrg bool curr_exec_known, bool &last_exec_def_saved)
4589 1.1 mrg {
4590 1.1 mrg rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
4591 1.1 mrg rtx exec;
4592 1.1 mrg
4593 1.1 mrg int64_t exec_value = gcn_insn_exec_value (last_exec_def);
4594 1.1 mrg
4595 1.1 mrg if (exec_value)
4596 1.1 mrg {
4597 1.1 mrg /* If the EXEC value is a constant and it happens to be the same as the
4598 1.1 mrg current EXEC value, the restore can be skipped. */
4599 1.1 mrg if (curr_exec_known && exec_value == curr_exec)
4600 1.1 mrg return exec_value;
4601 1.1 mrg
4602 1.1 mrg exec = GEN_INT (exec_value);
4603 1.1 mrg }
4604 1.1 mrg else
4605 1.1 mrg {
4606 1.1 mrg /* If the EXEC value is not a constant, save it in a register after the
4607 1.1 mrg point of definition. */
4608 1.1 mrg rtx exec_save_reg = gen_rtx_REG (DImode, EXEC_SAVE_REG);
4609 1.1 mrg
4610 1.1 mrg if (!last_exec_def_saved)
4611 1.1 mrg {
4612 1.1 mrg start_sequence ();
4613 1.1 mrg emit_move_insn (exec_save_reg, exec_reg);
4614 1.1 mrg rtx_insn *seq = get_insns ();
4615 1.1 mrg end_sequence ();
4616 1.1 mrg
4617 1.1 mrg emit_insn_after (seq, last_exec_def);
4618 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4619 1.1 mrg fprintf (dump_file, "Saving EXEC after insn %d.\n",
4620 1.1 mrg INSN_UID (last_exec_def));
4621 1.1 mrg
4622 1.1 mrg last_exec_def_saved = true;
4623 1.1 mrg }
4624 1.1 mrg
4625 1.1 mrg exec = exec_save_reg;
4626 1.1 mrg }
4627 1.1 mrg
4628 1.1 mrg /* Restore EXEC register before the usage. */
4629 1.1 mrg start_sequence ();
4630 1.1 mrg emit_move_insn (exec_reg, exec);
4631 1.1 mrg rtx_insn *seq = get_insns ();
4632 1.1 mrg end_sequence ();
4633 1.1 mrg emit_insn_before (seq, insn);
4634 1.1 mrg
4635 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4636 1.1 mrg {
4637 1.1 mrg if (exec_value)
4638 1.1 mrg fprintf (dump_file, "Restoring EXEC to %ld before insn %d.\n",
4639 1.1 mrg exec_value, INSN_UID (insn));
4640 1.1 mrg else
4641 1.1 mrg fprintf (dump_file,
4642 1.1 mrg "Restoring EXEC from saved value before insn %d.\n",
4643 1.1 mrg INSN_UID (insn));
4644 1.1 mrg }
4645 1.1 mrg
4646 1.1 mrg return exec_value;
4647 1.1 mrg }
4648 1.1 mrg
4649 1.1 mrg /* Implement TARGET_MACHINE_DEPENDENT_REORG.
4650 1.1 mrg
4651 1.1 mrg Ensure that pipeline dependencies and lane masking are set correctly. */
4652 1.1 mrg
4653 1.1 mrg static void
4654 1.1 mrg gcn_md_reorg (void)
4655 1.1 mrg {
4656 1.1 mrg basic_block bb;
4657 1.1 mrg rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
4658 1.1 mrg regset_head live;
4659 1.1 mrg
4660 1.1 mrg INIT_REG_SET (&live);
4661 1.1 mrg
4662 1.1 mrg compute_bb_for_insn ();
4663 1.1 mrg
4664 1.1 mrg if (!optimize)
4665 1.1 mrg {
4666 1.1 mrg split_all_insns ();
4667 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4668 1.1 mrg {
4669 1.1 mrg fprintf (dump_file, "After split:\n");
4670 1.1 mrg print_rtl_with_bb (dump_file, get_insns (), dump_flags);
4671 1.1 mrg }
4672 1.1 mrg
4673 1.1 mrg /* Update data-flow information for split instructions. */
4674 1.1 mrg df_insn_rescan_all ();
4675 1.1 mrg }
4676 1.1 mrg
4677 1.1 mrg df_live_add_problem ();
4678 1.1 mrg df_live_set_all_dirty ();
4679 1.1 mrg df_analyze ();
4680 1.1 mrg
4681 1.1 mrg /* This pass ensures that the EXEC register is set correctly, according
4682 1.1 mrg to the "exec" attribute. However, care must be taken so that the
4683 1.1 mrg value that reaches explicit uses of the EXEC register remains the
4684 1.1 mrg same as before.
4685 1.1 mrg */
4686 1.1 mrg
4687 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
4688 1.1 mrg {
4689 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4690 1.1 mrg fprintf (dump_file, "BB %d:\n", bb->index);
4691 1.1 mrg
4692 1.1 mrg rtx_insn *insn, *curr;
4693 1.1 mrg rtx_insn *last_exec_def = BB_HEAD (bb);
4694 1.1 mrg bool last_exec_def_saved = false;
4695 1.1 mrg bool curr_exec_explicit = true;
4696 1.1 mrg bool curr_exec_known = true;
4697 1.1 mrg int64_t curr_exec = 0; /* 0 here means 'the value is that of EXEC
4698 1.1 mrg after last_exec_def is executed'. */
4699 1.1 mrg
4700 1.1 mrg bitmap live_in = DF_LR_IN (bb);
4701 1.1 mrg bool exec_live_on_entry = false;
4702 1.1 mrg if (bitmap_bit_p (live_in, EXEC_LO_REG)
4703 1.1 mrg || bitmap_bit_p (live_in, EXEC_HI_REG))
4704 1.1 mrg {
4705 1.1 mrg if (dump_file)
4706 1.1 mrg fprintf (dump_file, "EXEC reg is live on entry to block %d\n",
4707 1.1 mrg (int) bb->index);
4708 1.1 mrg exec_live_on_entry = true;
4709 1.1 mrg }
4710 1.1 mrg
4711 1.1 mrg FOR_BB_INSNS_SAFE (bb, insn, curr)
4712 1.1 mrg {
4713 1.1 mrg if (!NONDEBUG_INSN_P (insn))
4714 1.1 mrg continue;
4715 1.1 mrg
4716 1.1 mrg if (GET_CODE (PATTERN (insn)) == USE
4717 1.1 mrg || GET_CODE (PATTERN (insn)) == CLOBBER)
4718 1.1 mrg continue;
4719 1.1 mrg
4720 1.1 mrg HARD_REG_SET defs, uses;
4721 1.1 mrg CLEAR_HARD_REG_SET (defs);
4722 1.1 mrg CLEAR_HARD_REG_SET (uses);
4723 1.1 mrg note_stores (insn, record_hard_reg_sets, &defs);
4724 1.1 mrg note_uses (&PATTERN (insn), record_hard_reg_uses, &uses);
4725 1.1 mrg
4726 1.1 mrg bool exec_lo_def_p = TEST_HARD_REG_BIT (defs, EXEC_LO_REG);
4727 1.1 mrg bool exec_hi_def_p = TEST_HARD_REG_BIT (defs, EXEC_HI_REG);
4728 1.1 mrg bool exec_used = (hard_reg_set_intersect_p
4729 1.1 mrg (uses, reg_class_contents[(int) EXEC_MASK_REG])
4730 1.1 mrg || TEST_HARD_REG_BIT (uses, EXECZ_REG));
4731 1.1 mrg
4732 1.1 mrg /* Check the instruction for implicit setting of EXEC via an
4733 1.1 mrg attribute. */
4734 1.1 mrg attr_exec exec_attr = get_attr_exec (insn);
4735 1.1 mrg int64_t new_exec;
4736 1.1 mrg
4737 1.1 mrg switch (exec_attr)
4738 1.1 mrg {
4739 1.1 mrg case EXEC_NONE:
4740 1.1 mrg new_exec = 0;
4741 1.1 mrg break;
4742 1.1 mrg
4743 1.1 mrg case EXEC_SINGLE:
4744 1.1 mrg /* Instructions that do not involve memory accesses only require
4745 1.1 mrg bit 0 of EXEC to be set. */
4746 1.1 mrg if (gcn_vmem_insn_p (get_attr_type (insn))
4747 1.1 mrg || get_attr_type (insn) == TYPE_DS)
4748 1.1 mrg new_exec = 1;
4749 1.1 mrg else
4750 1.1 mrg new_exec = curr_exec | 1;
4751 1.1 mrg break;
4752 1.1 mrg
4753 1.1 mrg case EXEC_FULL:
4754 1.1 mrg new_exec = -1;
4755 1.1 mrg break;
4756 1.1 mrg
4757 1.1 mrg default: /* Auto-detect what setting is appropriate. */
4758 1.1 mrg {
4759 1.1 mrg new_exec = 0;
4760 1.1 mrg
4761 1.1 mrg /* If EXEC is referenced explicitly then we don't need to do
4762 1.1 mrg anything to set it, so we're done. */
4763 1.1 mrg if (exec_used)
4764 1.1 mrg break;
4765 1.1 mrg
4766 1.1 mrg /* Scan the insn for VGPRs defs or uses. The mode determines
4767 1.1 mrg what kind of exec is needed. */
4768 1.1 mrg subrtx_iterator::array_type array;
4769 1.1 mrg FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
4770 1.1 mrg {
4771 1.1 mrg const_rtx x = *iter;
4772 1.1 mrg if (REG_P (x) && VGPR_REGNO_P (REGNO (x)))
4773 1.1 mrg {
4774 1.1 mrg if (VECTOR_MODE_P (GET_MODE (x)))
4775 1.1 mrg {
4776 1.1 mrg new_exec = -1;
4777 1.1 mrg break;
4778 1.1 mrg }
4779 1.1 mrg else
4780 1.1 mrg new_exec = 1;
4781 1.1 mrg }
4782 1.1 mrg }
4783 1.1 mrg }
4784 1.1 mrg break;
4785 1.1 mrg }
4786 1.1 mrg
4787 1.1 mrg if (new_exec && (!curr_exec_known || new_exec != curr_exec))
4788 1.1 mrg {
4789 1.1 mrg start_sequence ();
4790 1.1 mrg emit_move_insn (exec_reg, GEN_INT (new_exec));
4791 1.1 mrg rtx_insn *seq = get_insns ();
4792 1.1 mrg end_sequence ();
4793 1.1 mrg emit_insn_before (seq, insn);
4794 1.1 mrg
4795 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4796 1.1 mrg fprintf (dump_file, "Setting EXEC to %ld before insn %d.\n",
4797 1.1 mrg new_exec, INSN_UID (insn));
4798 1.1 mrg
4799 1.1 mrg curr_exec = new_exec;
4800 1.1 mrg curr_exec_explicit = false;
4801 1.1 mrg curr_exec_known = true;
4802 1.1 mrg }
4803 1.1 mrg else if (new_exec && dump_file && (dump_flags & TDF_DETAILS))
4804 1.1 mrg {
4805 1.1 mrg fprintf (dump_file, "Exec already is %ld before insn %d.\n",
4806 1.1 mrg new_exec, INSN_UID (insn));
4807 1.1 mrg }
4808 1.1 mrg
4809 1.1 mrg /* The state of the EXEC register is unknown after a
4810 1.1 mrg function call. */
4811 1.1 mrg if (CALL_P (insn))
4812 1.1 mrg curr_exec_known = false;
4813 1.1 mrg
4814 1.1 mrg /* Handle explicit uses of EXEC. If the instruction is a partial
4815 1.1 mrg explicit definition of EXEC, then treat it as an explicit use of
4816 1.1 mrg EXEC as well. */
4817 1.1 mrg if (exec_used || exec_lo_def_p != exec_hi_def_p)
4818 1.1 mrg {
4819 1.1 mrg /* An instruction that explicitly uses EXEC should not also
4820 1.1 mrg implicitly define it. */
4821 1.1 mrg gcc_assert (!exec_used || !new_exec);
4822 1.1 mrg
4823 1.1 mrg if (!curr_exec_known || !curr_exec_explicit)
4824 1.1 mrg {
4825 1.1 mrg /* Restore the previous explicitly defined value. */
4826 1.1 mrg curr_exec = gcn_restore_exec (insn, last_exec_def,
4827 1.1 mrg curr_exec, curr_exec_known,
4828 1.1 mrg last_exec_def_saved);
4829 1.1 mrg curr_exec_explicit = true;
4830 1.1 mrg curr_exec_known = true;
4831 1.1 mrg }
4832 1.1 mrg }
4833 1.1 mrg
4834 1.1 mrg /* Handle explicit definitions of EXEC. */
4835 1.1 mrg if (exec_lo_def_p || exec_hi_def_p)
4836 1.1 mrg {
4837 1.1 mrg last_exec_def = insn;
4838 1.1 mrg last_exec_def_saved = false;
4839 1.1 mrg curr_exec = gcn_insn_exec_value (insn);
4840 1.1 mrg curr_exec_explicit = true;
4841 1.1 mrg curr_exec_known = true;
4842 1.1 mrg
4843 1.1 mrg if (dump_file && (dump_flags & TDF_DETAILS))
4844 1.1 mrg fprintf (dump_file,
4845 1.1 mrg "Found %s definition of EXEC at insn %d.\n",
4846 1.1 mrg exec_lo_def_p == exec_hi_def_p ? "full" : "partial",
4847 1.1 mrg INSN_UID (insn));
4848 1.1 mrg }
4849 1.1 mrg
4850 1.1 mrg exec_live_on_entry = false;
4851 1.1 mrg }
4852 1.1 mrg
4853 1.1 mrg COPY_REG_SET (&live, DF_LR_OUT (bb));
4854 1.1 mrg df_simulate_initialize_backwards (bb, &live);
4855 1.1 mrg
4856 1.1 mrg /* If EXEC is live after the basic block, restore the value of EXEC
4857 1.1 mrg at the end of the block. */
4858 1.1 mrg if ((REGNO_REG_SET_P (&live, EXEC_LO_REG)
4859 1.1 mrg || REGNO_REG_SET_P (&live, EXEC_HI_REG))
4860 1.1 mrg && (!curr_exec_known || !curr_exec_explicit || exec_live_on_entry))
4861 1.1 mrg {
4862 1.1 mrg rtx_insn *end_insn = BB_END (bb);
4863 1.1 mrg
4864 1.1 mrg /* If the instruction is not a jump instruction, do the restore
4865 1.1 mrg after the last instruction in the basic block. */
4866 1.1 mrg if (NONJUMP_INSN_P (end_insn))
4867 1.1 mrg end_insn = NEXT_INSN (end_insn);
4868 1.1 mrg
4869 1.1 mrg gcn_restore_exec (end_insn, last_exec_def, curr_exec,
4870 1.1 mrg curr_exec_known, last_exec_def_saved);
4871 1.1 mrg }
4872 1.1 mrg }
4873 1.1 mrg
4874 1.1 mrg CLEAR_REG_SET (&live);
4875 1.1 mrg
4876 1.1 mrg /* "Manually Inserted Wait States (NOPs)."
4877 1.1 mrg
4878 1.1 mrg GCN hardware detects most kinds of register dependencies, but there
4879 1.1 mrg are some exceptions documented in the ISA manual. This pass
4880 1.1 mrg detects the missed cases, and inserts the documented number of NOPs
4881 1.1 mrg required for correct execution. */
4882 1.1 mrg
4883 1.1 mrg const int max_waits = 5;
4884 1.1 mrg struct ilist
4885 1.1 mrg {
4886 1.1 mrg rtx_insn *insn;
4887 1.1 mrg attr_unit unit;
4888 1.1 mrg attr_delayeduse delayeduse;
4889 1.1 mrg HARD_REG_SET writes;
4890 1.1 mrg HARD_REG_SET reads;
4891 1.1 mrg int age;
4892 1.1 mrg } back[max_waits];
4893 1.1 mrg int oldest = 0;
4894 1.1 mrg for (int i = 0; i < max_waits; i++)
4895 1.1 mrg back[i].insn = NULL;
4896 1.1 mrg
4897 1.1 mrg rtx_insn *insn, *last_insn = NULL;
4898 1.1 mrg for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
4899 1.1 mrg {
4900 1.1 mrg if (!NONDEBUG_INSN_P (insn))
4901 1.1 mrg continue;
4902 1.1 mrg
4903 1.1 mrg if (GET_CODE (PATTERN (insn)) == USE
4904 1.1 mrg || GET_CODE (PATTERN (insn)) == CLOBBER)
4905 1.1 mrg continue;
4906 1.1 mrg
4907 1.1 mrg attr_type itype = get_attr_type (insn);
4908 1.1 mrg attr_unit iunit = get_attr_unit (insn);
4909 1.1 mrg attr_delayeduse idelayeduse = get_attr_delayeduse (insn);
4910 1.1 mrg HARD_REG_SET ireads, iwrites;
4911 1.1 mrg CLEAR_HARD_REG_SET (ireads);
4912 1.1 mrg CLEAR_HARD_REG_SET (iwrites);
4913 1.1 mrg note_stores (insn, record_hard_reg_sets, &iwrites);
4914 1.1 mrg note_uses (&PATTERN (insn), record_hard_reg_uses, &ireads);
4915 1.1 mrg
4916 1.1 mrg /* Scan recent previous instructions for dependencies not handled in
4917 1.1 mrg hardware. */
4918 1.1 mrg int nops_rqd = 0;
4919 1.1 mrg for (int i = oldest; i < oldest + max_waits; i++)
4920 1.1 mrg {
4921 1.1 mrg struct ilist *prev_insn = &back[i % max_waits];
4922 1.1 mrg
4923 1.1 mrg if (!prev_insn->insn)
4924 1.1 mrg continue;
4925 1.1 mrg
4926 1.1 mrg /* VALU writes SGPR followed by VMEM reading the same SGPR
4927 1.1 mrg requires 5 wait states. */
4928 1.1 mrg if ((prev_insn->age + nops_rqd) < 5
4929 1.1 mrg && prev_insn->unit == UNIT_VECTOR
4930 1.1 mrg && gcn_vmem_insn_p (itype))
4931 1.1 mrg {
4932 1.1 mrg HARD_REG_SET regs = prev_insn->writes & ireads;
4933 1.1 mrg if (hard_reg_set_intersect_p
4934 1.1 mrg (regs, reg_class_contents[(int) SGPR_REGS]))
4935 1.1 mrg nops_rqd = 5 - prev_insn->age;
4936 1.1 mrg }
4937 1.1 mrg
4938 1.1 mrg /* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ
4939 1.1 mrg requires 5 wait states. */
4940 1.1 mrg if ((prev_insn->age + nops_rqd) < 5
4941 1.1 mrg && prev_insn->unit == UNIT_VECTOR
4942 1.1 mrg && iunit == UNIT_VECTOR
4943 1.1 mrg && ((hard_reg_set_intersect_p
4944 1.1 mrg (prev_insn->writes,
4945 1.1 mrg reg_class_contents[(int) EXEC_MASK_REG])
4946 1.1 mrg && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
4947 1.1 mrg ||
4948 1.1 mrg (hard_reg_set_intersect_p
4949 1.1 mrg (prev_insn->writes,
4950 1.1 mrg reg_class_contents[(int) VCC_CONDITIONAL_REG])
4951 1.1 mrg && TEST_HARD_REG_BIT (ireads, VCCZ_REG))))
4952 1.1 mrg nops_rqd = 5 - prev_insn->age;
4953 1.1 mrg
4954 1.1 mrg /* VALU writes SGPR/VCC followed by v_{read,write}lane using
4955 1.1 mrg SGPR/VCC as lane select requires 4 wait states. */
4956 1.1 mrg if ((prev_insn->age + nops_rqd) < 4
4957 1.1 mrg && prev_insn->unit == UNIT_VECTOR
4958 1.1 mrg && get_attr_laneselect (insn) == LANESELECT_YES)
4959 1.1 mrg {
4960 1.1 mrg HARD_REG_SET regs = prev_insn->writes & ireads;
4961 1.1 mrg if (hard_reg_set_intersect_p
4962 1.1 mrg (regs, reg_class_contents[(int) SGPR_REGS])
4963 1.1 mrg || hard_reg_set_intersect_p
4964 1.1 mrg (regs, reg_class_contents[(int) VCC_CONDITIONAL_REG]))
4965 1.1 mrg nops_rqd = 4 - prev_insn->age;
4966 1.1 mrg }
4967 1.1 mrg
4968 1.1 mrg /* VALU writes VGPR followed by VALU_DPP reading that VGPR
4969 1.1 mrg requires 2 wait states. */
4970 1.1 mrg if ((prev_insn->age + nops_rqd) < 2
4971 1.1 mrg && prev_insn->unit == UNIT_VECTOR
4972 1.1 mrg && itype == TYPE_VOP_DPP)
4973 1.1 mrg {
4974 1.1 mrg HARD_REG_SET regs = prev_insn->writes & ireads;
4975 1.1 mrg if (hard_reg_set_intersect_p
4976 1.1 mrg (regs, reg_class_contents[(int) VGPR_REGS]))
4977 1.1 mrg nops_rqd = 2 - prev_insn->age;
4978 1.1 mrg }
4979 1.1 mrg
4980 1.1 mrg /* Store that requires input registers are not overwritten by
4981 1.1 mrg following instruction. */
4982 1.1 mrg if ((prev_insn->age + nops_rqd) < 1
4983 1.1 mrg && prev_insn->delayeduse == DELAYEDUSE_YES
4984 1.1 mrg && ((hard_reg_set_intersect_p
4985 1.1 mrg (prev_insn->reads, iwrites))))
4986 1.1 mrg nops_rqd = 1 - prev_insn->age;
4987 1.1 mrg }
4988 1.1 mrg
4989 1.1 mrg /* Insert the required number of NOPs. */
4990 1.1 mrg for (int i = nops_rqd; i > 0; i--)
4991 1.1 mrg emit_insn_after (gen_nop (), last_insn);
4992 1.1 mrg
4993 1.1 mrg /* Age the previous instructions. We can also ignore writes to
4994 1.1 mrg registers subsequently overwritten. */
4995 1.1 mrg HARD_REG_SET written;
4996 1.1 mrg CLEAR_HARD_REG_SET (written);
4997 1.1 mrg for (int i = oldest + max_waits - 1; i > oldest; i--)
4998 1.1 mrg {
4999 1.1 mrg struct ilist *prev_insn = &back[i % max_waits];
5000 1.1 mrg
5001 1.1 mrg /* Assume all instructions are equivalent to one "wait", the same
5002 1.1 mrg as s_nop. This is probably true for SALU, but not VALU (which
5003 1.1 mrg may take longer), so this is not optimal. However, AMD do
5004 1.1 mrg not publish the cycle times for instructions. */
5005 1.1 mrg prev_insn->age += 1 + nops_rqd;
5006 1.1 mrg
5007 1.1 mrg written |= iwrites;
5008 1.1 mrg prev_insn->writes &= ~written;
5009 1.1 mrg }
5010 1.1 mrg
5011 1.1 mrg /* Track the current instruction as a previous instruction. */
5012 1.1 mrg back[oldest].insn = insn;
5013 1.1 mrg back[oldest].unit = iunit;
5014 1.1 mrg back[oldest].delayeduse = idelayeduse;
5015 1.1 mrg back[oldest].writes = iwrites;
5016 1.1 mrg back[oldest].reads = ireads;
5017 1.1 mrg back[oldest].age = 0;
5018 1.1 mrg oldest = (oldest + 1) % max_waits;
5019 1.1 mrg
5020 1.1 mrg last_insn = insn;
5021 1.1 mrg }
5022 1.1 mrg }
5023 1.1 mrg
5024 1.1 mrg /* }}} */
5025 1.1 mrg /* {{{ OpenACC / OpenMP. */
5026 1.1 mrg
5027 1.1 mrg #define GCN_DEFAULT_GANGS 0 /* Choose at runtime. */
5028 1.1 mrg #define GCN_DEFAULT_WORKERS 0 /* Choose at runtime. */
5029 1.1 mrg #define GCN_DEFAULT_VECTORS 1 /* Use autovectorization only, for now. */
5030 1.1 mrg
5031 1.1 mrg /* Implement TARGET_GOACC_VALIDATE_DIMS.
5032 1.1 mrg
5033 1.1 mrg Check the launch dimensions provided for an OpenACC compute
5034 1.1 mrg region, or routine. */
5035 1.1 mrg
5036 1.1 mrg static bool
5037 1.1 mrg gcn_goacc_validate_dims (tree decl, int dims[], int fn_level,
5038 1.1 mrg unsigned /*used*/)
5039 1.1 mrg {
5040 1.1 mrg bool changed = false;
5041 1.1 mrg const int max_workers = 16;
5042 1.1 mrg
5043 1.1 mrg /* The vector size must appear to be 64, to the user, unless this is a
5044 1.1 mrg SEQ routine. The real, internal value is always 1, which means use
5045 1.1 mrg autovectorization, but the user should not see that. */
5046 1.1 mrg if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1
5047 1.1 mrg && dims[GOMP_DIM_VECTOR] >= 0)
5048 1.1 mrg {
5049 1.1 mrg if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0
5050 1.1 mrg && dims[GOMP_DIM_VECTOR] != 64)
5051 1.1 mrg warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
5052 1.1 mrg OPT_Wopenacc_dims,
5053 1.1 mrg (dims[GOMP_DIM_VECTOR]
5054 1.1 mrg ? G_("using %<vector_length (64)%>, ignoring %d")
5055 1.1 mrg : G_("using %<vector_length (64)%>, "
5056 1.1 mrg "ignoring runtime setting")),
5057 1.1 mrg dims[GOMP_DIM_VECTOR]);
5058 1.1 mrg dims[GOMP_DIM_VECTOR] = 1;
5059 1.1 mrg changed = true;
5060 1.1 mrg }
5061 1.1 mrg
5062 1.1 mrg /* Check the num workers is not too large. */
5063 1.1 mrg if (dims[GOMP_DIM_WORKER] > max_workers)
5064 1.1 mrg {
5065 1.1 mrg warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
5066 1.1 mrg OPT_Wopenacc_dims,
5067 1.1 mrg "using %<num_workers (%d)%>, ignoring %d",
5068 1.1 mrg max_workers, dims[GOMP_DIM_WORKER]);
5069 1.1 mrg dims[GOMP_DIM_WORKER] = max_workers;
5070 1.1 mrg changed = true;
5071 1.1 mrg }
5072 1.1 mrg
5073 1.1 mrg /* Set global defaults. */
5074 1.1 mrg if (!decl)
5075 1.1 mrg {
5076 1.1 mrg dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS;
5077 1.1 mrg if (dims[GOMP_DIM_WORKER] < 0)
5078 1.1 mrg dims[GOMP_DIM_WORKER] = GCN_DEFAULT_WORKERS;
5079 1.1 mrg if (dims[GOMP_DIM_GANG] < 0)
5080 1.1 mrg dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS;
5081 1.1 mrg changed = true;
5082 1.1 mrg }
5083 1.1 mrg
5084 1.1 mrg return changed;
5085 1.1 mrg }
5086 1.1 mrg
5087 1.1 mrg /* Helper function for oacc_dim_size instruction.
5088 1.1 mrg Also used for OpenMP, via builtin_gcn_dim_size, and the omp_gcn pass. */
5089 1.1 mrg
5090 1.1 mrg rtx
5091 1.1 mrg gcn_oacc_dim_size (int dim)
5092 1.1 mrg {
5093 1.1 mrg if (dim < 0 || dim > 2)
5094 1.1 mrg error ("offload dimension out of range (%d)", dim);
5095 1.1 mrg
5096 1.1 mrg /* Vectors are a special case. */
5097 1.1 mrg if (dim == 2)
5098 1.1 mrg return const1_rtx; /* Think of this as 1 times 64. */
5099 1.1 mrg
5100 1.1 mrg static int offset[] = {
5101 1.1 mrg /* Offsets into dispatch packet. */
5102 1.1 mrg 12, /* X dim = Gang / Team / Work-group. */
5103 1.1 mrg 20, /* Z dim = Worker / Thread / Wavefront. */
5104 1.1 mrg 16 /* Y dim = Vector / SIMD / Work-item. */
5105 1.1 mrg };
5106 1.1 mrg rtx addr = gen_rtx_PLUS (DImode,
5107 1.1 mrg gen_rtx_REG (DImode,
5108 1.1 mrg cfun->machine->args.
5109 1.1 mrg reg[DISPATCH_PTR_ARG]),
5110 1.1 mrg GEN_INT (offset[dim]));
5111 1.1 mrg return gen_rtx_MEM (SImode, addr);
5112 1.1 mrg }
5113 1.1 mrg
5114 1.1 mrg /* Helper function for oacc_dim_pos instruction.
5115 1.1 mrg Also used for OpenMP, via builtin_gcn_dim_pos, and the omp_gcn pass. */
5116 1.1 mrg
5117 1.1 mrg rtx
5118 1.1 mrg gcn_oacc_dim_pos (int dim)
5119 1.1 mrg {
5120 1.1 mrg if (dim < 0 || dim > 2)
5121 1.1 mrg error ("offload dimension out of range (%d)", dim);
5122 1.1 mrg
5123 1.1 mrg static const int reg[] = {
5124 1.1 mrg WORKGROUP_ID_X_ARG, /* Gang / Team / Work-group. */
5125 1.1 mrg WORK_ITEM_ID_Z_ARG, /* Worker / Thread / Wavefront. */
5126 1.1 mrg WORK_ITEM_ID_Y_ARG /* Vector / SIMD / Work-item. */
5127 1.1 mrg };
5128 1.1 mrg
5129 1.1 mrg int reg_num = cfun->machine->args.reg[reg[dim]];
5130 1.1 mrg
5131 1.1 mrg /* The information must have been requested by the kernel. */
5132 1.1 mrg gcc_assert (reg_num >= 0);
5133 1.1 mrg
5134 1.1 mrg return gen_rtx_REG (SImode, reg_num);
5135 1.1 mrg }
5136 1.1 mrg
5137 1.1 mrg /* Implement TARGET_GOACC_FORK_JOIN. */
5138 1.1 mrg
5139 1.1 mrg static bool
5140 1.1 mrg gcn_fork_join (gcall *call, const int dims[], bool is_fork)
5141 1.1 mrg {
5142 1.1 mrg tree arg = gimple_call_arg (call, 2);
5143 1.1 mrg unsigned axis = TREE_INT_CST_LOW (arg);
5144 1.1 mrg
5145 1.1 mrg if (!is_fork && axis == GOMP_DIM_WORKER && dims[axis] != 1)
5146 1.1 mrg return true;
5147 1.1 mrg
5148 1.1 mrg return false;
5149 1.1 mrg }
5150 1.1 mrg
5151 1.1 mrg /* Implement ???????
5152 1.1 mrg FIXME make this a real hook.
5153 1.1 mrg
5154 1.1 mrg Adjust FNDECL such that options inherited from the host compiler
5155 1.1 mrg are made appropriate for the accelerator compiler. */
5156 1.1 mrg
5157 1.1 mrg void
5158 1.1 mrg gcn_fixup_accel_lto_options (tree fndecl)
5159 1.1 mrg {
5160 1.1 mrg tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5161 1.1 mrg if (!func_optimize)
5162 1.1 mrg return;
5163 1.1 mrg
5164 1.1 mrg tree old_optimize
5165 1.1 mrg = build_optimization_node (&global_options, &global_options_set);
5166 1.1 mrg tree new_optimize;
5167 1.1 mrg
5168 1.1 mrg /* If the function changed the optimization levels as well as
5169 1.1 mrg setting target options, start with the optimizations
5170 1.1 mrg specified. */
5171 1.1 mrg if (func_optimize != old_optimize)
5172 1.1 mrg cl_optimization_restore (&global_options, &global_options_set,
5173 1.1 mrg TREE_OPTIMIZATION (func_optimize));
5174 1.1 mrg
5175 1.1 mrg gcn_option_override ();
5176 1.1 mrg
5177 1.1 mrg /* The target attributes may also change some optimization flags,
5178 1.1 mrg so update the optimization options if necessary. */
5179 1.1 mrg new_optimize = build_optimization_node (&global_options,
5180 1.1 mrg &global_options_set);
5181 1.1 mrg
5182 1.1 mrg if (old_optimize != new_optimize)
5183 1.1 mrg {
5184 1.1 mrg DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5185 1.1 mrg cl_optimization_restore (&global_options, &global_options_set,
5186 1.1 mrg TREE_OPTIMIZATION (old_optimize));
5187 1.1 mrg }
5188 1.1 mrg }
5189 1.1 mrg
5190 1.1 mrg /* Implement TARGET_GOACC_SHARED_MEM_LAYOUT hook. */
5191 1.1 mrg
5192 1.1 mrg static void
5193 1.1 mrg gcn_shared_mem_layout (unsigned HOST_WIDE_INT *lo,
5194 1.1 mrg unsigned HOST_WIDE_INT *hi,
5195 1.1 mrg int ARG_UNUSED (dims[GOMP_DIM_MAX]),
5196 1.1 mrg unsigned HOST_WIDE_INT
5197 1.1 mrg ARG_UNUSED (private_size[GOMP_DIM_MAX]),
5198 1.1 mrg unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX])
5199 1.1 mrg {
5200 1.1 mrg *lo = gang_private_size_opt + reduction_size[GOMP_DIM_WORKER];
5201 1.1 mrg /* !!! We can maybe use dims[] to estimate the maximum number of work
5202 1.1 mrg groups/wavefronts/etc. we will launch, and therefore tune the maximum
5203 1.1 mrg amount of LDS we should use. For now, use a minimal amount to try to
5204 1.1 mrg maximise occupancy. */
5205 1.1 mrg *hi = acc_lds_size;
5206 1.1 mrg machine_function *machfun = cfun->machine;
5207 1.1 mrg machfun->reduction_base = gang_private_size_opt;
5208 1.1 mrg machfun->reduction_limit
5209 1.1 mrg = gang_private_size_opt + reduction_size[GOMP_DIM_WORKER];
5210 1.1 mrg }
5211 1.1 mrg
5212 1.1 mrg /* }}} */
5213 1.1 mrg /* {{{ ASM Output. */
5214 1.1 mrg
5215 1.1 mrg /* Implement TARGET_ASM_FILE_START.
5216 1.1 mrg
5217 1.1 mrg Print assembler file header text. */
5218 1.1 mrg
5219 1.1 mrg static void
5220 1.1 mrg output_file_start (void)
5221 1.1 mrg {
5222 1.1 mrg const char *cpu;
5223 1.1 mrg bool use_xnack_attr = true;
5224 1.1 mrg bool use_sram_attr = true;
5225 1.1 mrg switch (gcn_arch)
5226 1.1 mrg {
5227 1.1 mrg case PROCESSOR_FIJI:
5228 1.1 mrg cpu = "gfx803";
5229 1.1 mrg #ifndef HAVE_GCN_XNACK_FIJI
5230 1.1 mrg use_xnack_attr = false;
5231 1.1 mrg #endif
5232 1.1 mrg use_sram_attr = false;
5233 1.1 mrg break;
5234 1.1 mrg case PROCESSOR_VEGA10:
5235 1.1 mrg cpu = "gfx900";
5236 1.1 mrg #ifndef HAVE_GCN_XNACK_GFX900
5237 1.1 mrg use_xnack_attr = false;
5238 1.1 mrg #endif
5239 1.1 mrg use_sram_attr = false;
5240 1.1 mrg break;
5241 1.1 mrg case PROCESSOR_VEGA20:
5242 1.1 mrg cpu = "gfx906";
5243 1.1 mrg #ifndef HAVE_GCN_XNACK_GFX906
5244 1.1 mrg use_xnack_attr = false;
5245 1.1 mrg #endif
5246 1.1 mrg use_sram_attr = false;
5247 1.1 mrg break;
5248 1.1 mrg case PROCESSOR_GFX908:
5249 1.1 mrg cpu = "gfx908";
5250 1.1 mrg #ifndef HAVE_GCN_XNACK_GFX908
5251 1.1 mrg use_xnack_attr = false;
5252 1.1 mrg #endif
5253 1.1 mrg #ifndef HAVE_GCN_SRAM_ECC_GFX908
5254 1.1 mrg use_sram_attr = false;
5255 1.1 mrg #endif
5256 1.1 mrg break;
5257 1.1 mrg default: gcc_unreachable ();
5258 1.1 mrg }
5259 1.1 mrg
5260 1.1 mrg #if HAVE_GCN_ASM_V3_SYNTAX
5261 1.1 mrg const char *xnack = (flag_xnack ? "+xnack" : "");
5262 1.1 mrg const char *sram_ecc = (flag_sram_ecc ? "+sram-ecc" : "");
5263 1.1 mrg #endif
5264 1.1 mrg #if HAVE_GCN_ASM_V4_SYNTAX
5265 1.1 mrg /* In HSACOv4 no attribute setting means the binary supports "any" hardware
5266 1.1 mrg configuration. In GCC binaries, this is true for SRAM ECC, but not
5267 1.1 mrg XNACK. */
5268 1.1 mrg const char *xnack = (flag_xnack ? ":xnack+" : ":xnack-");
5269 1.1 mrg const char *sram_ecc = (flag_sram_ecc == SRAM_ECC_ON ? ":sramecc+"
5270 1.1 mrg : flag_sram_ecc == SRAM_ECC_OFF ? ":sramecc-"
5271 1.1 mrg : "");
5272 1.1 mrg #endif
5273 1.1 mrg if (!use_xnack_attr)
5274 1.1 mrg xnack = "";
5275 1.1 mrg if (!use_sram_attr)
5276 1.1 mrg sram_ecc = "";
5277 1.1 mrg
5278 1.1 mrg fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n",
5279 1.1 mrg cpu,
5280 1.1 mrg #if HAVE_GCN_ASM_V3_SYNTAX
5281 1.1 mrg xnack, sram_ecc
5282 1.1 mrg #endif
5283 1.1 mrg #ifdef HAVE_GCN_ASM_V4_SYNTAX
5284 1.1 mrg sram_ecc, xnack
5285 1.1 mrg #endif
5286 1.1 mrg );
5287 1.1 mrg }
5288 1.1 mrg
5289 1.1 mrg /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
5290 1.1 mrg
5291 1.1 mrg Print the initial definition of a function name.
5292 1.1 mrg
5293 1.1 mrg For GCN kernel entry points this includes all the HSA meta-data, special
5294 1.1 mrg alignment constraints that don't apply to regular functions, and magic
5295 1.1 mrg comments that pass information to mkoffload. */
5296 1.1 mrg
5297 1.1 mrg void
5298 1.1 mrg gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
5299 1.1 mrg {
5300 1.1 mrg int sgpr, vgpr;
5301 1.1 mrg bool xnack_enabled = false;
5302 1.1 mrg
5303 1.1 mrg fputs ("\n\n", file);
5304 1.1 mrg
5305 1.1 mrg if (cfun && cfun->machine && cfun->machine->normal_function)
5306 1.1 mrg {
5307 1.1 mrg fputs ("\t.type\t", file);
5308 1.1 mrg assemble_name (file, name);
5309 1.1 mrg fputs (",@function\n", file);
5310 1.1 mrg assemble_name (file, name);
5311 1.1 mrg fputs (":\n", file);
5312 1.1 mrg return;
5313 1.1 mrg }
5314 1.1 mrg
5315 1.1 mrg /* Determine count of sgpr/vgpr registers by looking for last
5316 1.1 mrg one used. */
5317 1.1 mrg for (sgpr = 101; sgpr >= 0; sgpr--)
5318 1.1 mrg if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr))
5319 1.1 mrg break;
5320 1.1 mrg sgpr++;
5321 1.1 mrg for (vgpr = 255; vgpr >= 0; vgpr--)
5322 1.1 mrg if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
5323 1.1 mrg break;
5324 1.1 mrg vgpr++;
5325 1.1 mrg
5326 1.1 mrg if (!leaf_function_p ())
5327 1.1 mrg {
5328 1.1 mrg /* We can't know how many registers function calls might use. */
5329 1.1 mrg if (vgpr < MAX_NORMAL_VGPR_COUNT)
5330 1.1 mrg vgpr = MAX_NORMAL_VGPR_COUNT;
5331 1.1 mrg if (sgpr < MAX_NORMAL_SGPR_COUNT)
5332 1.1 mrg sgpr = MAX_NORMAL_SGPR_COUNT;
5333 1.1 mrg }
5334 1.1 mrg
5335 1.1 mrg fputs ("\t.rodata\n"
5336 1.1 mrg "\t.p2align\t6\n"
5337 1.1 mrg "\t.amdhsa_kernel\t", file);
5338 1.1 mrg assemble_name (file, name);
5339 1.1 mrg fputs ("\n", file);
5340 1.1 mrg int reg = FIRST_SGPR_REG;
5341 1.1 mrg for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
5342 1.1 mrg {
5343 1.1 mrg int reg_first = -1;
5344 1.1 mrg int reg_last;
5345 1.1 mrg if ((cfun->machine->args.requested & (1 << a))
5346 1.1 mrg && (gcn_kernel_arg_types[a].fixed_regno < 0))
5347 1.1 mrg {
5348 1.1 mrg reg_first = reg;
5349 1.1 mrg reg_last = (reg_first
5350 1.1 mrg + (GET_MODE_SIZE (gcn_kernel_arg_types[a].mode)
5351 1.1 mrg / UNITS_PER_WORD) - 1);
5352 1.1 mrg reg = reg_last + 1;
5353 1.1 mrg }
5354 1.1 mrg
5355 1.1 mrg if (gcn_kernel_arg_types[a].header_pseudo)
5356 1.1 mrg {
5357 1.1 mrg fprintf (file, "\t %s%s\t%i",
5358 1.1 mrg (cfun->machine->args.requested & (1 << a)) != 0 ? "" : ";",
5359 1.1 mrg gcn_kernel_arg_types[a].header_pseudo,
5360 1.1 mrg (cfun->machine->args.requested & (1 << a)) != 0);
5361 1.1 mrg if (reg_first != -1)
5362 1.1 mrg {
5363 1.1 mrg fprintf (file, " ; (");
5364 1.1 mrg for (int i = reg_first; i <= reg_last; ++i)
5365 1.1 mrg {
5366 1.1 mrg if (i != reg_first)
5367 1.1 mrg fprintf (file, ", ");
5368 1.1 mrg fprintf (file, "%s", reg_names[i]);
5369 1.1 mrg }
5370 1.1 mrg fprintf (file, ")");
5371 1.1 mrg }
5372 1.1 mrg fprintf (file, "\n");
5373 1.1 mrg }
5374 1.1 mrg else if (gcn_kernel_arg_types[a].fixed_regno >= 0
5375 1.1 mrg && cfun->machine->args.requested & (1 << a))
5376 1.1 mrg fprintf (file, "\t ; %s\t%i (%s)\n",
5377 1.1 mrg gcn_kernel_arg_types[a].name,
5378 1.1 mrg (cfun->machine->args.requested & (1 << a)) != 0,
5379 1.1 mrg reg_names[gcn_kernel_arg_types[a].fixed_regno]);
5380 1.1 mrg }
5381 1.1 mrg fprintf (file, "\t .amdhsa_system_vgpr_workitem_id\t%i\n",
5382 1.1 mrg (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG))
5383 1.1 mrg ? 2
5384 1.1 mrg : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
5385 1.1 mrg ? 1 : 0);
5386 1.1 mrg fprintf (file,
5387 1.1 mrg "\t .amdhsa_next_free_vgpr\t%i\n"
5388 1.1 mrg "\t .amdhsa_next_free_sgpr\t%i\n"
5389 1.1 mrg "\t .amdhsa_reserve_vcc\t1\n"
5390 1.1 mrg "\t .amdhsa_reserve_flat_scratch\t0\n"
5391 1.1 mrg "\t .amdhsa_reserve_xnack_mask\t%i\n"
5392 1.1 mrg "\t .amdhsa_private_segment_fixed_size\t%i\n"
5393 1.1 mrg "\t .amdhsa_group_segment_fixed_size\t%u\n"
5394 1.1 mrg "\t .amdhsa_float_denorm_mode_32\t3\n"
5395 1.1 mrg "\t .amdhsa_float_denorm_mode_16_64\t3\n",
5396 1.1 mrg vgpr,
5397 1.1 mrg sgpr,
5398 1.1 mrg xnack_enabled,
5399 1.1 mrg /* workitem_private_segment_bytes_size needs to be
5400 1.1 mrg one 64th the wave-front stack size. */
5401 1.1 mrg stack_size_opt / 64,
5402 1.1 mrg LDS_SIZE);
5403 1.1 mrg fputs ("\t.end_amdhsa_kernel\n", file);
5404 1.1 mrg
5405 1.1 mrg #if 1
5406 1.1 mrg /* The following is YAML embedded in assembler; tabs are not allowed. */
5407 1.1 mrg fputs (" .amdgpu_metadata\n"
5408 1.1 mrg " amdhsa.version:\n"
5409 1.1 mrg " - 1\n"
5410 1.1 mrg " - 0\n"
5411 1.1 mrg " amdhsa.kernels:\n"
5412 1.1 mrg " - .name: ", file);
5413 1.1 mrg assemble_name (file, name);
5414 1.1 mrg fputs ("\n .symbol: ", file);
5415 1.1 mrg assemble_name (file, name);
5416 1.1 mrg fprintf (file,
5417 1.1 mrg ".kd\n"
5418 1.1 mrg " .kernarg_segment_size: %i\n"
5419 1.1 mrg " .kernarg_segment_align: %i\n"
5420 1.1 mrg " .group_segment_fixed_size: %u\n"
5421 1.1 mrg " .private_segment_fixed_size: %i\n"
5422 1.1 mrg " .wavefront_size: 64\n"
5423 1.1 mrg " .sgpr_count: %i\n"
5424 1.1 mrg " .vgpr_count: %i\n"
5425 1.1 mrg " .max_flat_workgroup_size: 1024\n",
5426 1.1 mrg cfun->machine->kernarg_segment_byte_size,
5427 1.1 mrg cfun->machine->kernarg_segment_alignment,
5428 1.1 mrg LDS_SIZE,
5429 1.1 mrg stack_size_opt / 64,
5430 1.1 mrg sgpr, vgpr);
5431 1.1 mrg fputs (" .end_amdgpu_metadata\n", file);
5432 1.1 mrg #endif
5433 1.1 mrg
5434 1.1 mrg fputs ("\t.text\n", file);
5435 1.1 mrg fputs ("\t.align\t256\n", file);
5436 1.1 mrg fputs ("\t.type\t", file);
5437 1.1 mrg assemble_name (file, name);
5438 1.1 mrg fputs (",@function\n", file);
5439 1.1 mrg assemble_name (file, name);
5440 1.1 mrg fputs (":\n", file);
5441 1.1 mrg
5442 1.1 mrg /* This comment is read by mkoffload. */
5443 1.1 mrg if (flag_openacc)
5444 1.1 mrg fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
5445 1.1 mrg oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG),
5446 1.1 mrg oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER),
5447 1.1 mrg oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
5448 1.1 mrg }
5449 1.1 mrg
5450 1.1 mrg /* Implement TARGET_ASM_SELECT_SECTION.
5451 1.1 mrg
5452 1.1 mrg Return the section into which EXP should be placed. */
5453 1.1 mrg
5454 1.1 mrg static section *
5455 1.1 mrg gcn_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
5456 1.1 mrg {
5457 1.1 mrg if (TREE_TYPE (exp) != error_mark_node
5458 1.1 mrg && TYPE_ADDR_SPACE (TREE_TYPE (exp)) == ADDR_SPACE_LDS)
5459 1.1 mrg {
5460 1.1 mrg if (!DECL_P (exp))
5461 1.1 mrg return get_section (".lds_bss",
5462 1.1 mrg SECTION_WRITE | SECTION_BSS | SECTION_DEBUG,
5463 1.1 mrg NULL);
5464 1.1 mrg
5465 1.1 mrg return get_named_section (exp, ".lds_bss", reloc);
5466 1.1 mrg }
5467 1.1 mrg
5468 1.1 mrg return default_elf_select_section (exp, reloc, align);
5469 1.1 mrg }
5470 1.1 mrg
5471 1.1 mrg /* Implement TARGET_ASM_FUNCTION_PROLOGUE.
5472 1.1 mrg
5473 1.1 mrg Emits custom text into the assembler file at the head of each function. */
5474 1.1 mrg
5475 1.1 mrg static void
5476 1.1 mrg gcn_target_asm_function_prologue (FILE *file)
5477 1.1 mrg {
5478 1.1 mrg machine_function *offsets = gcn_compute_frame_offsets ();
5479 1.1 mrg
5480 1.1 mrg asm_fprintf (file, "\t; using %s addressing in function\n",
5481 1.1 mrg offsets->use_flat_addressing ? "flat" : "global");
5482 1.1 mrg
5483 1.1 mrg if (offsets->normal_function)
5484 1.1 mrg {
5485 1.1 mrg asm_fprintf (file, "\t; frame pointer needed: %s\n",
5486 1.1 mrg offsets->need_frame_pointer ? "true" : "false");
5487 1.1 mrg asm_fprintf (file, "\t; lr needs saving: %s\n",
5488 1.1 mrg offsets->lr_needs_saving ? "true" : "false");
5489 1.1 mrg asm_fprintf (file, "\t; outgoing args size: %wd\n",
5490 1.1 mrg offsets->outgoing_args_size);
5491 1.1 mrg asm_fprintf (file, "\t; pretend size: %wd\n", offsets->pretend_size);
5492 1.1 mrg asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5493 1.1 mrg asm_fprintf (file, "\t; callee save size: %wd\n",
5494 1.1 mrg offsets->callee_saves);
5495 1.1 mrg }
5496 1.1 mrg else
5497 1.1 mrg {
5498 1.1 mrg asm_fprintf (file, "\t; HSA kernel entry point\n");
5499 1.1 mrg asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5500 1.1 mrg asm_fprintf (file, "\t; outgoing args size: %wd\n",
5501 1.1 mrg offsets->outgoing_args_size);
5502 1.1 mrg }
5503 1.1 mrg }
5504 1.1 mrg
5505 1.1 mrg /* Helper function for print_operand and print_operand_address.
5506 1.1 mrg
5507 1.1 mrg Print a register as the assembler requires, according to mode and name. */
5508 1.1 mrg
5509 1.1 mrg static void
5510 1.1 mrg print_reg (FILE *file, rtx x)
5511 1.1 mrg {
5512 1.1 mrg machine_mode mode = GET_MODE (x);
5513 1.1 mrg if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
5514 1.1 mrg || mode == HFmode || mode == SFmode
5515 1.1 mrg || mode == V64SFmode || mode == V64SImode
5516 1.1 mrg || mode == V64QImode || mode == V64HImode)
5517 1.1 mrg fprintf (file, "%s", reg_names[REGNO (x)]);
5518 1.1 mrg else if (mode == DImode || mode == V64DImode
5519 1.1 mrg || mode == DFmode || mode == V64DFmode)
5520 1.1 mrg {
5521 1.1 mrg if (SGPR_REGNO_P (REGNO (x)))
5522 1.1 mrg fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5523 1.1 mrg REGNO (x) - FIRST_SGPR_REG + 1);
5524 1.1 mrg else if (VGPR_REGNO_P (REGNO (x)))
5525 1.1 mrg fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5526 1.1 mrg REGNO (x) - FIRST_VGPR_REG + 1);
5527 1.1 mrg else if (REGNO (x) == FLAT_SCRATCH_REG)
5528 1.1 mrg fprintf (file, "flat_scratch");
5529 1.1 mrg else if (REGNO (x) == EXEC_REG)
5530 1.1 mrg fprintf (file, "exec");
5531 1.1 mrg else if (REGNO (x) == VCC_LO_REG)
5532 1.1 mrg fprintf (file, "vcc");
5533 1.1 mrg else
5534 1.1 mrg fprintf (file, "[%s:%s]",
5535 1.1 mrg reg_names[REGNO (x)], reg_names[REGNO (x) + 1]);
5536 1.1 mrg }
5537 1.1 mrg else if (mode == TImode)
5538 1.1 mrg {
5539 1.1 mrg if (SGPR_REGNO_P (REGNO (x)))
5540 1.1 mrg fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5541 1.1 mrg REGNO (x) - FIRST_SGPR_REG + 3);
5542 1.1 mrg else if (VGPR_REGNO_P (REGNO (x)))
5543 1.1 mrg fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5544 1.1 mrg REGNO (x) - FIRST_VGPR_REG + 3);
5545 1.1 mrg else
5546 1.1 mrg gcc_unreachable ();
5547 1.1 mrg }
5548 1.1 mrg else
5549 1.1 mrg gcc_unreachable ();
5550 1.1 mrg }
5551 1.1 mrg
5552 1.1 mrg /* Implement TARGET_SECTION_TYPE_FLAGS.
5553 1.1 mrg
5554 1.1 mrg Return a set of section attributes for use by TARGET_ASM_NAMED_SECTION. */
5555 1.1 mrg
5556 1.1 mrg static unsigned int
5557 1.1 mrg gcn_section_type_flags (tree decl, const char *name, int reloc)
5558 1.1 mrg {
5559 1.1 mrg if (strcmp (name, ".lds_bss") == 0)
5560 1.1 mrg return SECTION_WRITE | SECTION_BSS | SECTION_DEBUG;
5561 1.1 mrg
5562 1.1 mrg return default_section_type_flags (decl, name, reloc);
5563 1.1 mrg }
5564 1.1 mrg
5565 1.1 mrg /* Helper function for gcn_asm_output_symbol_ref.
5566 1.1 mrg
5567 1.1 mrg FIXME: This function is used to lay out gang-private variables in LDS
5568 1.1 mrg on a per-CU basis.
5569 1.1 mrg There may be cases in which gang-private variables in different compilation
5570 1.1 mrg units could clobber each other. In that case we should be relying on the
5571 1.1 mrg linker to lay out gang-private LDS space, but that doesn't appear to be
5572 1.1 mrg possible at present. */
5573 1.1 mrg
5574 1.1 mrg static void
5575 1.1 mrg gcn_print_lds_decl (FILE *f, tree var)
5576 1.1 mrg {
5577 1.1 mrg int *offset;
5578 1.1 mrg if ((offset = lds_allocs.get (var)))
5579 1.1 mrg fprintf (f, "%u", (unsigned) *offset);
5580 1.1 mrg else
5581 1.1 mrg {
5582 1.1 mrg unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (var);
5583 1.1 mrg tree type = TREE_TYPE (var);
5584 1.1 mrg unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
5585 1.1 mrg if (size > align && size > 4 && align < 8)
5586 1.1 mrg align = 8;
5587 1.1 mrg
5588 1.1 mrg gang_private_hwm = ((gang_private_hwm + align - 1) & ~(align - 1));
5589 1.1 mrg
5590 1.1 mrg lds_allocs.put (var, gang_private_hwm);
5591 1.1 mrg fprintf (f, "%u", gang_private_hwm);
5592 1.1 mrg gang_private_hwm += size;
5593 1.1 mrg if (gang_private_hwm > gang_private_size_opt)
5594 1.1 mrg error ("%d bytes of gang-private data-share memory exhausted"
5595 1.1 mrg " (increase with %<-mgang-private-size=%d%>, for example)",
5596 1.1 mrg gang_private_size_opt, gang_private_hwm);
5597 1.1 mrg }
5598 1.1 mrg }
5599 1.1 mrg
5600 1.1 mrg /* Implement ASM_OUTPUT_SYMBOL_REF via gcn-hsa.h. */
5601 1.1 mrg
5602 1.1 mrg void
5603 1.1 mrg gcn_asm_output_symbol_ref (FILE *file, rtx x)
5604 1.1 mrg {
5605 1.1 mrg tree decl;
5606 1.1 mrg if (cfun
5607 1.1 mrg && (decl = SYMBOL_REF_DECL (x)) != 0
5608 1.1 mrg && TREE_CODE (decl) == VAR_DECL
5609 1.1 mrg && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5610 1.1 mrg {
5611 1.1 mrg /* LDS symbols (emitted using this hook) are only used at present
5612 1.1 mrg to propagate worker values from an active thread to neutered
5613 1.1 mrg threads. Use the same offset for each such block, but don't
5614 1.1 mrg use zero because null pointers are used to identify the active
5615 1.1 mrg thread in GOACC_single_copy_start calls. */
5616 1.1 mrg gcn_print_lds_decl (file, decl);
5617 1.1 mrg }
5618 1.1 mrg else
5619 1.1 mrg {
5620 1.1 mrg assemble_name (file, XSTR (x, 0));
5621 1.1 mrg /* FIXME: See above -- this condition is unreachable. */
5622 1.1 mrg if (cfun
5623 1.1 mrg && (decl = SYMBOL_REF_DECL (x)) != 0
5624 1.1 mrg && TREE_CODE (decl) == VAR_DECL
5625 1.1 mrg && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5626 1.1 mrg fputs ("@abs32", file);
5627 1.1 mrg }
5628 1.1 mrg }
5629 1.1 mrg
5630 1.1 mrg /* Implement TARGET_CONSTANT_ALIGNMENT.
5631 1.1 mrg
5632 1.1 mrg Returns the alignment in bits of a constant that is being placed in memory.
5633 1.1 mrg CONSTANT is the constant and BASIC_ALIGN is the alignment that the object
5634 1.1 mrg would ordinarily have. */
5635 1.1 mrg
5636 1.1 mrg static HOST_WIDE_INT
5637 1.1 mrg gcn_constant_alignment (const_tree ARG_UNUSED (constant),
5638 1.1 mrg HOST_WIDE_INT basic_align)
5639 1.1 mrg {
5640 1.1 mrg return basic_align > 128 ? basic_align : 128;
5641 1.1 mrg }
5642 1.1 mrg
5643 1.1 mrg /* Implement PRINT_OPERAND_ADDRESS via gcn.h. */
5644 1.1 mrg
5645 1.1 mrg void
5646 1.1 mrg print_operand_address (FILE *file, rtx mem)
5647 1.1 mrg {
5648 1.1 mrg gcc_assert (MEM_P (mem));
5649 1.1 mrg
5650 1.1 mrg rtx reg;
5651 1.1 mrg rtx offset;
5652 1.1 mrg addr_space_t as = MEM_ADDR_SPACE (mem);
5653 1.1 mrg rtx addr = XEXP (mem, 0);
5654 1.1 mrg gcc_assert (REG_P (addr) || GET_CODE (addr) == PLUS);
5655 1.1 mrg
5656 1.1 mrg if (AS_SCRATCH_P (as))
5657 1.1 mrg switch (GET_CODE (addr))
5658 1.1 mrg {
5659 1.1 mrg case REG:
5660 1.1 mrg print_reg (file, addr);
5661 1.1 mrg break;
5662 1.1 mrg
5663 1.1 mrg case PLUS:
5664 1.1 mrg reg = XEXP (addr, 0);
5665 1.1 mrg offset = XEXP (addr, 1);
5666 1.1 mrg print_reg (file, reg);
5667 1.1 mrg if (GET_CODE (offset) == CONST_INT)
5668 1.1 mrg fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5669 1.1 mrg else
5670 1.1 mrg abort ();
5671 1.1 mrg break;
5672 1.1 mrg
5673 1.1 mrg default:
5674 1.1 mrg debug_rtx (addr);
5675 1.1 mrg abort ();
5676 1.1 mrg }
5677 1.1 mrg else if (AS_ANY_FLAT_P (as))
5678 1.1 mrg {
5679 1.1 mrg if (GET_CODE (addr) == REG)
5680 1.1 mrg print_reg (file, addr);
5681 1.1 mrg else
5682 1.1 mrg {
5683 1.1 mrg gcc_assert (TARGET_GCN5_PLUS);
5684 1.1 mrg print_reg (file, XEXP (addr, 0));
5685 1.1 mrg }
5686 1.1 mrg }
5687 1.1 mrg else if (AS_GLOBAL_P (as))
5688 1.1 mrg {
5689 1.1 mrg gcc_assert (TARGET_GCN5_PLUS);
5690 1.1 mrg
5691 1.1 mrg rtx base = addr;
5692 1.1 mrg rtx vgpr_offset = NULL_RTX;
5693 1.1 mrg
5694 1.1 mrg if (GET_CODE (addr) == PLUS)
5695 1.1 mrg {
5696 1.1 mrg base = XEXP (addr, 0);
5697 1.1 mrg
5698 1.1 mrg if (GET_CODE (base) == PLUS)
5699 1.1 mrg {
5700 1.1 mrg /* (SGPR + VGPR) + CONST */
5701 1.1 mrg vgpr_offset = XEXP (base, 1);
5702 1.1 mrg base = XEXP (base, 0);
5703 1.1 mrg }
5704 1.1 mrg else
5705 1.1 mrg {
5706 1.1 mrg rtx offset = XEXP (addr, 1);
5707 1.1 mrg
5708 1.1 mrg if (REG_P (offset))
5709 1.1 mrg /* SGPR + VGPR */
5710 1.1 mrg vgpr_offset = offset;
5711 1.1 mrg else if (CONST_INT_P (offset))
5712 1.1 mrg /* VGPR + CONST or SGPR + CONST */
5713 1.1 mrg ;
5714 1.1 mrg else
5715 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5716 1.1 mrg }
5717 1.1 mrg }
5718 1.1 mrg
5719 1.1 mrg if (REG_P (base))
5720 1.1 mrg {
5721 1.1 mrg if (VGPR_REGNO_P (REGNO (base)))
5722 1.1 mrg print_reg (file, base);
5723 1.1 mrg else if (SGPR_REGNO_P (REGNO (base)))
5724 1.1 mrg {
5725 1.1 mrg /* The assembler requires a 64-bit VGPR pair here, even though
5726 1.1 mrg the offset should be only 32-bit. */
5727 1.1 mrg if (vgpr_offset == NULL_RTX)
5728 1.1 mrg /* In this case, the vector offset is zero, so we use the first
5729 1.1 mrg lane of v1, which is initialized to zero. */
5730 1.1 mrg {
5731 1.1 mrg if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED)
5732 1.1 mrg fprintf (file, "v1");
5733 1.1 mrg else
5734 1.1 mrg fprintf (file, "v[1:2]");
5735 1.1 mrg }
5736 1.1 mrg else if (REG_P (vgpr_offset)
5737 1.1 mrg && VGPR_REGNO_P (REGNO (vgpr_offset)))
5738 1.1 mrg {
5739 1.1 mrg if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED)
5740 1.1 mrg fprintf (file, "v%d",
5741 1.1 mrg REGNO (vgpr_offset) - FIRST_VGPR_REG);
5742 1.1 mrg else
5743 1.1 mrg fprintf (file, "v[%d:%d]",
5744 1.1 mrg REGNO (vgpr_offset) - FIRST_VGPR_REG,
5745 1.1 mrg REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
5746 1.1 mrg }
5747 1.1 mrg else
5748 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5749 1.1 mrg }
5750 1.1 mrg }
5751 1.1 mrg else
5752 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5753 1.1 mrg }
5754 1.1 mrg else if (AS_ANY_DS_P (as))
5755 1.1 mrg switch (GET_CODE (addr))
5756 1.1 mrg {
5757 1.1 mrg case REG:
5758 1.1 mrg print_reg (file, addr);
5759 1.1 mrg break;
5760 1.1 mrg
5761 1.1 mrg case PLUS:
5762 1.1 mrg reg = XEXP (addr, 0);
5763 1.1 mrg print_reg (file, reg);
5764 1.1 mrg break;
5765 1.1 mrg
5766 1.1 mrg default:
5767 1.1 mrg debug_rtx (addr);
5768 1.1 mrg abort ();
5769 1.1 mrg }
5770 1.1 mrg else
5771 1.1 mrg switch (GET_CODE (addr))
5772 1.1 mrg {
5773 1.1 mrg case REG:
5774 1.1 mrg print_reg (file, addr);
5775 1.1 mrg fprintf (file, ", 0");
5776 1.1 mrg break;
5777 1.1 mrg
5778 1.1 mrg case PLUS:
5779 1.1 mrg reg = XEXP (addr, 0);
5780 1.1 mrg offset = XEXP (addr, 1);
5781 1.1 mrg print_reg (file, reg);
5782 1.1 mrg fprintf (file, ", ");
5783 1.1 mrg if (GET_CODE (offset) == REG)
5784 1.1 mrg print_reg (file, reg);
5785 1.1 mrg else if (GET_CODE (offset) == CONST_INT)
5786 1.1 mrg fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5787 1.1 mrg else
5788 1.1 mrg abort ();
5789 1.1 mrg break;
5790 1.1 mrg
5791 1.1 mrg default:
5792 1.1 mrg debug_rtx (addr);
5793 1.1 mrg abort ();
5794 1.1 mrg }
5795 1.1 mrg }
5796 1.1 mrg
5797 1.1 mrg /* Implement PRINT_OPERAND via gcn.h.
5798 1.1 mrg
5799 1.1 mrg b - print operand size as untyped operand (b8/b16/b32/b64)
5800 1.1 mrg B - print operand size as SI/DI untyped operand (b32/b32/b32/b64)
5801 1.1 mrg i - print operand size as untyped operand (i16/b32/i64)
5802 1.1 mrg I - print operand size as SI/DI untyped operand(i32/b32/i64)
5803 1.1 mrg u - print operand size as untyped operand (u16/u32/u64)
5804 1.1 mrg U - print operand size as SI/DI untyped operand(u32/u64)
5805 1.1 mrg o - print operand size as memory access size for loads
5806 1.1 mrg (ubyte/ushort/dword/dwordx2/wordx3/dwordx4)
5807 1.1 mrg s - print operand size as memory access size for stores
5808 1.1 mrg (byte/short/dword/dwordx2/wordx3/dwordx4)
5809 1.1 mrg C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...)
5810 1.1 mrg c - print inverse conditional code for s_cbranch
5811 1.1 mrg D - print conditional code for s_cmp (eq_u64/lg_u64...)
5812 1.1 mrg E - print conditional code for v_cmp (eq_u64/ne_u64...)
5813 1.1 mrg A - print address in formatting suitable for given address space.
5814 1.1 mrg O - print offset:n for data share operations.
5815 1.1 mrg ^ - print "_co" suffix for GCN5 mnemonics
5816 1.1 mrg g - print "glc", if appropriate for given MEM
5817 1.1 mrg */
5818 1.1 mrg
5819 1.1 mrg void
5820 1.1 mrg print_operand (FILE *file, rtx x, int code)
5821 1.1 mrg {
5822 1.1 mrg int xcode = x ? GET_CODE (x) : 0;
5823 1.1 mrg bool invert = false;
5824 1.1 mrg switch (code)
5825 1.1 mrg {
5826 1.1 mrg /* Instructions have the following suffixes.
5827 1.1 mrg If there are two suffixes, the first is the destination type,
5828 1.1 mrg and the second is the source type.
5829 1.1 mrg
5830 1.1 mrg B32 Bitfield (untyped data) 32-bit
5831 1.1 mrg B64 Bitfield (untyped data) 64-bit
5832 1.1 mrg F16 floating-point 16-bit
5833 1.1 mrg F32 floating-point 32-bit (IEEE 754 single-precision float)
5834 1.1 mrg F64 floating-point 64-bit (IEEE 754 double-precision float)
5835 1.1 mrg I16 signed 32-bit integer
5836 1.1 mrg I32 signed 32-bit integer
5837 1.1 mrg I64 signed 64-bit integer
5838 1.1 mrg U16 unsigned 32-bit integer
5839 1.1 mrg U32 unsigned 32-bit integer
5840 1.1 mrg U64 unsigned 64-bit integer */
5841 1.1 mrg
5842 1.1 mrg /* Print operand size as untyped suffix. */
5843 1.1 mrg case 'b':
5844 1.1 mrg {
5845 1.1 mrg const char *s = "";
5846 1.1 mrg machine_mode mode = GET_MODE (x);
5847 1.1 mrg if (VECTOR_MODE_P (mode))
5848 1.1 mrg mode = GET_MODE_INNER (mode);
5849 1.1 mrg switch (GET_MODE_SIZE (mode))
5850 1.1 mrg {
5851 1.1 mrg case 1:
5852 1.1 mrg s = "_b8";
5853 1.1 mrg break;
5854 1.1 mrg case 2:
5855 1.1 mrg s = "_b16";
5856 1.1 mrg break;
5857 1.1 mrg case 4:
5858 1.1 mrg s = "_b32";
5859 1.1 mrg break;
5860 1.1 mrg case 8:
5861 1.1 mrg s = "_b64";
5862 1.1 mrg break;
5863 1.1 mrg default:
5864 1.1 mrg output_operand_lossage ("invalid operand %%xn code");
5865 1.1 mrg return;
5866 1.1 mrg }
5867 1.1 mrg fputs (s, file);
5868 1.1 mrg }
5869 1.1 mrg return;
5870 1.1 mrg case 'B':
5871 1.1 mrg {
5872 1.1 mrg const char *s = "";
5873 1.1 mrg machine_mode mode = GET_MODE (x);
5874 1.1 mrg if (VECTOR_MODE_P (mode))
5875 1.1 mrg mode = GET_MODE_INNER (mode);
5876 1.1 mrg switch (GET_MODE_SIZE (mode))
5877 1.1 mrg {
5878 1.1 mrg case 1:
5879 1.1 mrg case 2:
5880 1.1 mrg case 4:
5881 1.1 mrg s = "_b32";
5882 1.1 mrg break;
5883 1.1 mrg case 8:
5884 1.1 mrg s = "_b64";
5885 1.1 mrg break;
5886 1.1 mrg default:
5887 1.1 mrg output_operand_lossage ("invalid operand %%xn code");
5888 1.1 mrg return;
5889 1.1 mrg }
5890 1.1 mrg fputs (s, file);
5891 1.1 mrg }
5892 1.1 mrg return;
5893 1.1 mrg case 'e':
5894 1.1 mrg fputs ("sext(", file);
5895 1.1 mrg print_operand (file, x, 0);
5896 1.1 mrg fputs (")", file);
5897 1.1 mrg return;
5898 1.1 mrg case 'i':
5899 1.1 mrg case 'I':
5900 1.1 mrg case 'u':
5901 1.1 mrg case 'U':
5902 1.1 mrg {
5903 1.1 mrg bool signed_p = code == 'i';
5904 1.1 mrg bool min32_p = code == 'I' || code == 'U';
5905 1.1 mrg const char *s = "";
5906 1.1 mrg machine_mode mode = GET_MODE (x);
5907 1.1 mrg if (VECTOR_MODE_P (mode))
5908 1.1 mrg mode = GET_MODE_INNER (mode);
5909 1.1 mrg if (mode == VOIDmode)
5910 1.1 mrg switch (GET_CODE (x))
5911 1.1 mrg {
5912 1.1 mrg case CONST_INT:
5913 1.1 mrg s = signed_p ? "_i32" : "_u32";
5914 1.1 mrg break;
5915 1.1 mrg case CONST_DOUBLE:
5916 1.1 mrg s = "_f64";
5917 1.1 mrg break;
5918 1.1 mrg default:
5919 1.1 mrg output_operand_lossage ("invalid operand %%xn code");
5920 1.1 mrg return;
5921 1.1 mrg }
5922 1.1 mrg else if (FLOAT_MODE_P (mode))
5923 1.1 mrg switch (GET_MODE_SIZE (mode))
5924 1.1 mrg {
5925 1.1 mrg case 2:
5926 1.1 mrg s = "_f16";
5927 1.1 mrg break;
5928 1.1 mrg case 4:
5929 1.1 mrg s = "_f32";
5930 1.1 mrg break;
5931 1.1 mrg case 8:
5932 1.1 mrg s = "_f64";
5933 1.1 mrg break;
5934 1.1 mrg default:
5935 1.1 mrg output_operand_lossage ("invalid operand %%xn code");
5936 1.1 mrg return;
5937 1.1 mrg }
5938 1.1 mrg else if (min32_p)
5939 1.1 mrg switch (GET_MODE_SIZE (mode))
5940 1.1 mrg {
5941 1.1 mrg case 1:
5942 1.1 mrg case 2:
5943 1.1 mrg case 4:
5944 1.1 mrg s = signed_p ? "_i32" : "_u32";
5945 1.1 mrg break;
5946 1.1 mrg case 8:
5947 1.1 mrg s = signed_p ? "_i64" : "_u64";
5948 1.1 mrg break;
5949 1.1 mrg default:
5950 1.1 mrg output_operand_lossage ("invalid operand %%xn code");
5951 1.1 mrg return;
5952 1.1 mrg }
5953 1.1 mrg else
5954 1.1 mrg switch (GET_MODE_SIZE (mode))
5955 1.1 mrg {
5956 1.1 mrg case 1:
5957 1.1 mrg s = signed_p ? "_i8" : "_u8";
5958 1.1 mrg break;
5959 1.1 mrg case 2:
5960 1.1 mrg s = signed_p ? "_i16" : "_u16";
5961 1.1 mrg break;
5962 1.1 mrg case 4:
5963 1.1 mrg s = signed_p ? "_i32" : "_u32";
5964 1.1 mrg break;
5965 1.1 mrg case 8:
5966 1.1 mrg s = signed_p ? "_i64" : "_u64";
5967 1.1 mrg break;
5968 1.1 mrg default:
5969 1.1 mrg output_operand_lossage ("invalid operand %%xn code");
5970 1.1 mrg return;
5971 1.1 mrg }
5972 1.1 mrg fputs (s, file);
5973 1.1 mrg }
5974 1.1 mrg return;
5975 1.1 mrg /* Print operand size as untyped suffix. */
5976 1.1 mrg case 'o':
5977 1.1 mrg {
5978 1.1 mrg const char *s = 0;
5979 1.1 mrg switch (GET_MODE_SIZE (GET_MODE (x)))
5980 1.1 mrg {
5981 1.1 mrg case 1:
5982 1.1 mrg s = "_ubyte";
5983 1.1 mrg break;
5984 1.1 mrg case 2:
5985 1.1 mrg s = "_ushort";
5986 1.1 mrg break;
5987 1.1 mrg /* The following are full-vector variants. */
5988 1.1 mrg case 64:
5989 1.1 mrg s = "_ubyte";
5990 1.1 mrg break;
5991 1.1 mrg case 128:
5992 1.1 mrg s = "_ushort";
5993 1.1 mrg break;
5994 1.1 mrg }
5995 1.1 mrg
5996 1.1 mrg if (s)
5997 1.1 mrg {
5998 1.1 mrg fputs (s, file);
5999 1.1 mrg return;
6000 1.1 mrg }
6001 1.1 mrg
6002 1.1 mrg /* Fall-through - the other cases for 'o' are the same as for 's'. */
6003 1.1 mrg gcc_fallthrough();
6004 1.1 mrg }
6005 1.1 mrg case 's':
6006 1.1 mrg {
6007 1.1 mrg const char *s = "";
6008 1.1 mrg switch (GET_MODE_SIZE (GET_MODE (x)))
6009 1.1 mrg {
6010 1.1 mrg case 1:
6011 1.1 mrg s = "_byte";
6012 1.1 mrg break;
6013 1.1 mrg case 2:
6014 1.1 mrg s = "_short";
6015 1.1 mrg break;
6016 1.1 mrg case 4:
6017 1.1 mrg s = "_dword";
6018 1.1 mrg break;
6019 1.1 mrg case 8:
6020 1.1 mrg s = "_dwordx2";
6021 1.1 mrg break;
6022 1.1 mrg case 12:
6023 1.1 mrg s = "_dwordx3";
6024 1.1 mrg break;
6025 1.1 mrg case 16:
6026 1.1 mrg s = "_dwordx4";
6027 1.1 mrg break;
6028 1.1 mrg case 32:
6029 1.1 mrg s = "_dwordx8";
6030 1.1 mrg break;
6031 1.1 mrg case 64:
6032 1.1 mrg s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16";
6033 1.1 mrg break;
6034 1.1 mrg /* The following are full-vector variants. */
6035 1.1 mrg case 128:
6036 1.1 mrg s = "_short";
6037 1.1 mrg break;
6038 1.1 mrg case 256:
6039 1.1 mrg s = "_dword";
6040 1.1 mrg break;
6041 1.1 mrg case 512:
6042 1.1 mrg s = "_dwordx2";
6043 1.1 mrg break;
6044 1.1 mrg default:
6045 1.1 mrg output_operand_lossage ("invalid operand %%xn code");
6046 1.1 mrg return;
6047 1.1 mrg }
6048 1.1 mrg fputs (s, file);
6049 1.1 mrg }
6050 1.1 mrg return;
6051 1.1 mrg case 'A':
6052 1.1 mrg if (xcode != MEM)
6053 1.1 mrg {
6054 1.1 mrg output_operand_lossage ("invalid %%xn code");
6055 1.1 mrg return;
6056 1.1 mrg }
6057 1.1 mrg print_operand_address (file, x);
6058 1.1 mrg return;
6059 1.1 mrg case 'O':
6060 1.1 mrg {
6061 1.1 mrg if (xcode != MEM)
6062 1.1 mrg {
6063 1.1 mrg output_operand_lossage ("invalid %%xn code");
6064 1.1 mrg return;
6065 1.1 mrg }
6066 1.1 mrg if (AS_GDS_P (MEM_ADDR_SPACE (x)))
6067 1.1 mrg fprintf (file, " gds");
6068 1.1 mrg
6069 1.1 mrg rtx x0 = XEXP (x, 0);
6070 1.1 mrg if (AS_GLOBAL_P (MEM_ADDR_SPACE (x)))
6071 1.1 mrg {
6072 1.1 mrg gcc_assert (TARGET_GCN5_PLUS);
6073 1.1 mrg
6074 1.1 mrg fprintf (file, ", ");
6075 1.1 mrg
6076 1.1 mrg rtx base = x0;
6077 1.1 mrg rtx const_offset = NULL_RTX;
6078 1.1 mrg
6079 1.1 mrg if (GET_CODE (base) == PLUS)
6080 1.1 mrg {
6081 1.1 mrg rtx offset = XEXP (x0, 1);
6082 1.1 mrg base = XEXP (x0, 0);
6083 1.1 mrg
6084 1.1 mrg if (GET_CODE (base) == PLUS)
6085 1.1 mrg /* (SGPR + VGPR) + CONST */
6086 1.1 mrg /* Ignore the VGPR offset for this operand. */
6087 1.1 mrg base = XEXP (base, 0);
6088 1.1 mrg
6089 1.1 mrg if (CONST_INT_P (offset))
6090 1.1 mrg const_offset = XEXP (x0, 1);
6091 1.1 mrg else if (REG_P (offset))
6092 1.1 mrg /* SGPR + VGPR */
6093 1.1 mrg /* Ignore the VGPR offset for this operand. */
6094 1.1 mrg ;
6095 1.1 mrg else
6096 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
6097 1.1 mrg }
6098 1.1 mrg
6099 1.1 mrg if (REG_P (base))
6100 1.1 mrg {
6101 1.1 mrg if (VGPR_REGNO_P (REGNO (base)))
6102 1.1 mrg /* The VGPR address is specified in the %A operand. */
6103 1.1 mrg fprintf (file, "off");
6104 1.1 mrg else if (SGPR_REGNO_P (REGNO (base)))
6105 1.1 mrg print_reg (file, base);
6106 1.1 mrg else
6107 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
6108 1.1 mrg }
6109 1.1 mrg else
6110 1.1 mrg output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
6111 1.1 mrg
6112 1.1 mrg if (const_offset != NULL_RTX)
6113 1.1 mrg fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC,
6114 1.1 mrg INTVAL (const_offset));
6115 1.1 mrg
6116 1.1 mrg return;
6117 1.1 mrg }
6118 1.1 mrg
6119 1.1 mrg if (GET_CODE (x0) == REG)
6120 1.1 mrg return;
6121 1.1 mrg if (GET_CODE (x0) != PLUS)
6122 1.1 mrg {
6123 1.1 mrg output_operand_lossage ("invalid %%xn code");
6124 1.1 mrg return;
6125 1.1 mrg }
6126 1.1 mrg rtx val = XEXP (x0, 1);
6127 1.1 mrg if (GET_CODE (val) == CONST_VECTOR)
6128 1.1 mrg val = CONST_VECTOR_ELT (val, 0);
6129 1.1 mrg if (GET_CODE (val) != CONST_INT)
6130 1.1 mrg {
6131 1.1 mrg output_operand_lossage ("invalid %%xn code");
6132 1.1 mrg return;
6133 1.1 mrg }
6134 1.1 mrg fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (val));
6135 1.1 mrg
6136 1.1 mrg }
6137 1.1 mrg return;
6138 1.1 mrg case 'c':
6139 1.1 mrg invert = true;
6140 1.1 mrg /* Fall through. */
6141 1.1 mrg case 'C':
6142 1.1 mrg {
6143 1.1 mrg const char *s;
6144 1.1 mrg bool num = false;
6145 1.1 mrg if ((xcode != EQ && xcode != NE) || !REG_P (XEXP (x, 0)))
6146 1.1 mrg {
6147 1.1 mrg output_operand_lossage ("invalid %%xn code");
6148 1.1 mrg return;
6149 1.1 mrg }
6150 1.1 mrg switch (REGNO (XEXP (x, 0)))
6151 1.1 mrg {
6152 1.1 mrg case VCC_REG:
6153 1.1 mrg case VCCZ_REG:
6154 1.1 mrg s = "_vcc";
6155 1.1 mrg break;
6156 1.1 mrg case SCC_REG:
6157 1.1 mrg /* For some reason llvm-mc insists on scc0 instead of sccz. */
6158 1.1 mrg num = true;
6159 1.1 mrg s = "_scc";
6160 1.1 mrg break;
6161 1.1 mrg case EXECZ_REG:
6162 1.1 mrg s = "_exec";
6163 1.1 mrg break;
6164 1.1 mrg default:
6165 1.1 mrg output_operand_lossage ("invalid %%xn code");
6166 1.1 mrg return;
6167 1.1 mrg }
6168 1.1 mrg fputs (s, file);
6169 1.1 mrg if (xcode == (invert ? NE : EQ))
6170 1.1 mrg fputc (num ? '0' : 'z', file);
6171 1.1 mrg else
6172 1.1 mrg fputs (num ? "1" : "nz", file);
6173 1.1 mrg return;
6174 1.1 mrg }
6175 1.1 mrg case 'D':
6176 1.1 mrg {
6177 1.1 mrg const char *s;
6178 1.1 mrg bool cmp_signed = false;
6179 1.1 mrg switch (xcode)
6180 1.1 mrg {
6181 1.1 mrg case EQ:
6182 1.1 mrg s = "_eq_";
6183 1.1 mrg break;
6184 1.1 mrg case NE:
6185 1.1 mrg s = "_lg_";
6186 1.1 mrg break;
6187 1.1 mrg case LT:
6188 1.1 mrg s = "_lt_";
6189 1.1 mrg cmp_signed = true;
6190 1.1 mrg break;
6191 1.1 mrg case LE:
6192 1.1 mrg s = "_le_";
6193 1.1 mrg cmp_signed = true;
6194 1.1 mrg break;
6195 1.1 mrg case GT:
6196 1.1 mrg s = "_gt_";
6197 1.1 mrg cmp_signed = true;
6198 1.1 mrg break;
6199 1.1 mrg case GE:
6200 1.1 mrg s = "_ge_";
6201 1.1 mrg cmp_signed = true;
6202 1.1 mrg break;
6203 1.1 mrg case LTU:
6204 1.1 mrg s = "_lt_";
6205 1.1 mrg break;
6206 1.1 mrg case LEU:
6207 1.1 mrg s = "_le_";
6208 1.1 mrg break;
6209 1.1 mrg case GTU:
6210 1.1 mrg s = "_gt_";
6211 1.1 mrg break;
6212 1.1 mrg case GEU:
6213 1.1 mrg s = "_ge_";
6214 1.1 mrg break;
6215 1.1 mrg default:
6216 1.1 mrg output_operand_lossage ("invalid %%xn code");
6217 1.1 mrg return;
6218 1.1 mrg }
6219 1.1 mrg fputs (s, file);
6220 1.1 mrg fputc (cmp_signed ? 'i' : 'u', file);
6221 1.1 mrg
6222 1.1 mrg machine_mode mode = GET_MODE (XEXP (x, 0));
6223 1.1 mrg
6224 1.1 mrg if (mode == VOIDmode)
6225 1.1 mrg mode = GET_MODE (XEXP (x, 1));
6226 1.1 mrg
6227 1.1 mrg /* If both sides are constants, then assume the instruction is in
6228 1.1 mrg SImode since s_cmp can only do integer compares. */
6229 1.1 mrg if (mode == VOIDmode)
6230 1.1 mrg mode = SImode;
6231 1.1 mrg
6232 1.1 mrg switch (GET_MODE_SIZE (mode))
6233 1.1 mrg {
6234 1.1 mrg case 4:
6235 1.1 mrg s = "32";
6236 1.1 mrg break;
6237 1.1 mrg case 8:
6238 1.1 mrg s = "64";
6239 1.1 mrg break;
6240 1.1 mrg default:
6241 1.1 mrg output_operand_lossage ("invalid operand %%xn code");
6242 1.1 mrg return;
6243 1.1 mrg }
6244 1.1 mrg fputs (s, file);
6245 1.1 mrg return;
6246 1.1 mrg }
6247 1.1 mrg case 'E':
6248 1.1 mrg {
6249 1.1 mrg const char *s;
6250 1.1 mrg bool cmp_signed = false;
6251 1.1 mrg machine_mode mode = GET_MODE (XEXP (x, 0));
6252 1.1 mrg
6253 1.1 mrg if (mode == VOIDmode)
6254 1.1 mrg mode = GET_MODE (XEXP (x, 1));
6255 1.1 mrg
6256 1.1 mrg /* If both sides are constants, assume the instruction is in SFmode
6257 1.1 mrg if either operand is floating point, otherwise assume SImode. */
6258 1.1 mrg if (mode == VOIDmode)
6259 1.1 mrg {
6260 1.1 mrg if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6261 1.1 mrg || GET_CODE (XEXP (x, 1)) == CONST_DOUBLE)
6262 1.1 mrg mode = SFmode;
6263 1.1 mrg else
6264 1.1 mrg mode = SImode;
6265 1.1 mrg }
6266 1.1 mrg
6267 1.1 mrg /* Use the same format code for vector comparisons. */
6268 1.1 mrg if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
6269 1.1 mrg || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
6270 1.1 mrg mode = GET_MODE_INNER (mode);
6271 1.1 mrg
6272 1.1 mrg bool float_p = GET_MODE_CLASS (mode) == MODE_FLOAT;
6273 1.1 mrg
6274 1.1 mrg switch (xcode)
6275 1.1 mrg {
6276 1.1 mrg case EQ:
6277 1.1 mrg s = "_eq_";
6278 1.1 mrg break;
6279 1.1 mrg case NE:
6280 1.1 mrg s = float_p ? "_neq_" : "_ne_";
6281 1.1 mrg break;
6282 1.1 mrg case LT:
6283 1.1 mrg s = "_lt_";
6284 1.1 mrg cmp_signed = true;
6285 1.1 mrg break;
6286 1.1 mrg case LE:
6287 1.1 mrg s = "_le_";
6288 1.1 mrg cmp_signed = true;
6289 1.1 mrg break;
6290 1.1 mrg case GT:
6291 1.1 mrg s = "_gt_";
6292 1.1 mrg cmp_signed = true;
6293 1.1 mrg break;
6294 1.1 mrg case GE:
6295 1.1 mrg s = "_ge_";
6296 1.1 mrg cmp_signed = true;
6297 1.1 mrg break;
6298 1.1 mrg case LTU:
6299 1.1 mrg s = "_lt_";
6300 1.1 mrg break;
6301 1.1 mrg case LEU:
6302 1.1 mrg s = "_le_";
6303 1.1 mrg break;
6304 1.1 mrg case GTU:
6305 1.1 mrg s = "_gt_";
6306 1.1 mrg break;
6307 1.1 mrg case GEU:
6308 1.1 mrg s = "_ge_";
6309 1.1 mrg break;
6310 1.1 mrg case ORDERED:
6311 1.1 mrg s = "_o_";
6312 1.1 mrg break;
6313 1.1 mrg case UNORDERED:
6314 1.1 mrg s = "_u_";
6315 1.1 mrg break;
6316 1.1 mrg case UNEQ:
6317 1.1 mrg s = "_nlg_";
6318 1.1 mrg break;
6319 1.1 mrg case UNGE:
6320 1.1 mrg s = "_nlt_";
6321 1.1 mrg break;
6322 1.1 mrg case UNGT:
6323 1.1 mrg s = "_nle_";
6324 1.1 mrg break;
6325 1.1 mrg case UNLE:
6326 1.1 mrg s = "_ngt_";
6327 1.1 mrg break;
6328 1.1 mrg case UNLT:
6329 1.1 mrg s = "_nge_";
6330 1.1 mrg break;
6331 1.1 mrg case LTGT:
6332 1.1 mrg s = "_lg_";
6333 1.1 mrg break;
6334 1.1 mrg default:
6335 1.1 mrg output_operand_lossage ("invalid %%xn code");
6336 1.1 mrg return;
6337 1.1 mrg }
6338 1.1 mrg fputs (s, file);
6339 1.1 mrg fputc (float_p ? 'f' : cmp_signed ? 'i' : 'u', file);
6340 1.1 mrg
6341 1.1 mrg switch (GET_MODE_SIZE (mode))
6342 1.1 mrg {
6343 1.1 mrg case 1:
6344 1.1 mrg output_operand_lossage ("operand %%xn code invalid for QImode");
6345 1.1 mrg return;
6346 1.1 mrg case 2:
6347 1.1 mrg s = "16";
6348 1.1 mrg break;
6349 1.1 mrg case 4:
6350 1.1 mrg s = "32";
6351 1.1 mrg break;
6352 1.1 mrg case 8:
6353 1.1 mrg s = "64";
6354 1.1 mrg break;
6355 1.1 mrg default:
6356 1.1 mrg output_operand_lossage ("invalid operand %%xn code");
6357 1.1 mrg return;
6358 1.1 mrg }
6359 1.1 mrg fputs (s, file);
6360 1.1 mrg return;
6361 1.1 mrg }
6362 1.1 mrg case 'L':
6363 1.1 mrg print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0);
6364 1.1 mrg return;
6365 1.1 mrg case 'H':
6366 1.1 mrg print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
6367 1.1 mrg return;
6368 1.1 mrg case 'R':
6369 1.1 mrg /* Print a scalar register number as an integer. Temporary hack. */
6370 1.1 mrg gcc_assert (REG_P (x));
6371 1.1 mrg fprintf (file, "%u", (int) REGNO (x));
6372 1.1 mrg return;
6373 1.1 mrg case 'V':
6374 1.1 mrg /* Print a vector register number as an integer. Temporary hack. */
6375 1.1 mrg gcc_assert (REG_P (x));
6376 1.1 mrg fprintf (file, "%u", (int) REGNO (x) - FIRST_VGPR_REG);
6377 1.1 mrg return;
6378 1.1 mrg case 0:
6379 1.1 mrg if (xcode == REG)
6380 1.1 mrg print_reg (file, x);
6381 1.1 mrg else if (xcode == MEM)
6382 1.1 mrg output_address (GET_MODE (x), x);
6383 1.1 mrg else if (xcode == CONST_INT)
6384 1.1 mrg fprintf (file, "%i", (int) INTVAL (x));
6385 1.1 mrg else if (xcode == CONST_VECTOR)
6386 1.1 mrg print_operand (file, CONST_VECTOR_ELT (x, 0), code);
6387 1.1 mrg else if (xcode == CONST_DOUBLE)
6388 1.1 mrg {
6389 1.1 mrg const char *str;
6390 1.1 mrg switch (gcn_inline_fp_constant_p (x, false))
6391 1.1 mrg {
6392 1.1 mrg case 240:
6393 1.1 mrg str = "0.5";
6394 1.1 mrg break;
6395 1.1 mrg case 241:
6396 1.1 mrg str = "-0.5";
6397 1.1 mrg break;
6398 1.1 mrg case 242:
6399 1.1 mrg str = "1.0";
6400 1.1 mrg break;
6401 1.1 mrg case 243:
6402 1.1 mrg str = "-1.0";
6403 1.1 mrg break;
6404 1.1 mrg case 244:
6405 1.1 mrg str = "2.0";
6406 1.1 mrg break;
6407 1.1 mrg case 245:
6408 1.1 mrg str = "-2.0";
6409 1.1 mrg break;
6410 1.1 mrg case 246:
6411 1.1 mrg str = "4.0";
6412 1.1 mrg break;
6413 1.1 mrg case 247:
6414 1.1 mrg str = "-4.0";
6415 1.1 mrg break;
6416 1.1 mrg case 248:
6417 1.1 mrg str = "1/pi";
6418 1.1 mrg break;
6419 1.1 mrg default:
6420 1.1 mrg rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
6421 1.1 mrg ? DImode : SImode,
6422 1.1 mrg x, GET_MODE (x), 0);
6423 1.1 mrg if (x)
6424 1.1 mrg print_operand (file, ix, code);
6425 1.1 mrg else
6426 1.1 mrg output_operand_lossage ("invalid fp constant");
6427 1.1 mrg return;
6428 1.1 mrg break;
6429 1.1 mrg }
6430 1.1 mrg fprintf (file, str);
6431 1.1 mrg return;
6432 1.1 mrg }
6433 1.1 mrg else
6434 1.1 mrg output_addr_const (file, x);
6435 1.1 mrg return;
6436 1.1 mrg case '^':
6437 1.1 mrg if (TARGET_GCN5_PLUS)
6438 1.1 mrg fputs ("_co", file);
6439 1.1 mrg return;
6440 1.1 mrg case 'g':
6441 1.1 mrg gcc_assert (xcode == MEM);
6442 1.1 mrg if (MEM_VOLATILE_P (x))
6443 1.1 mrg fputs (" glc", file);
6444 1.1 mrg return;
6445 1.1 mrg default:
6446 1.1 mrg output_operand_lossage ("invalid %%xn code");
6447 1.1 mrg }
6448 1.1 mrg gcc_unreachable ();
6449 1.1 mrg }
6450 1.1 mrg
6451 1.1 mrg /* Implement DBX_REGISTER_NUMBER macro.
6452 1.1 mrg
6453 1.1 mrg Return the DWARF register number that corresponds to the GCC internal
6454 1.1 mrg REGNO. */
6455 1.1 mrg
6456 1.1 mrg unsigned int
6457 1.1 mrg gcn_dwarf_register_number (unsigned int regno)
6458 1.1 mrg {
6459 1.1 mrg /* Registers defined in DWARF. */
6460 1.1 mrg if (regno == EXEC_LO_REG)
6461 1.1 mrg return 17;
6462 1.1 mrg /* We need to use a more complex DWARF expression for this
6463 1.1 mrg else if (regno == EXEC_HI_REG)
6464 1.1 mrg return 17; */
6465 1.1 mrg else if (regno == VCC_LO_REG)
6466 1.1 mrg return 768;
6467 1.1 mrg /* We need to use a more complex DWARF expression for this
6468 1.1 mrg else if (regno == VCC_HI_REG)
6469 1.1 mrg return 768; */
6470 1.1 mrg else if (regno == SCC_REG)
6471 1.1 mrg return 128;
6472 1.1 mrg else if (regno == DWARF_LINK_REGISTER)
6473 1.1 mrg return 16;
6474 1.1 mrg else if (SGPR_REGNO_P (regno))
6475 1.1 mrg {
6476 1.1 mrg if (regno - FIRST_SGPR_REG < 64)
6477 1.1 mrg return (regno - FIRST_SGPR_REG + 32);
6478 1.1 mrg else
6479 1.1 mrg return (regno - FIRST_SGPR_REG + 1024);
6480 1.1 mrg }
6481 1.1 mrg else if (VGPR_REGNO_P (regno))
6482 1.1 mrg return (regno - FIRST_VGPR_REG + 2560);
6483 1.1 mrg
6484 1.1 mrg /* Otherwise, there's nothing sensible to do. */
6485 1.1 mrg return regno + 100000;
6486 1.1 mrg }
6487 1.1 mrg
6488 1.1 mrg /* Implement TARGET_DWARF_REGISTER_SPAN.
6489 1.1 mrg
6490 1.1 mrg DImode and Vector DImode require additional registers. */
6491 1.1 mrg
6492 1.1 mrg static rtx
6493 1.1 mrg gcn_dwarf_register_span (rtx rtl)
6494 1.1 mrg {
6495 1.1 mrg machine_mode mode = GET_MODE (rtl);
6496 1.1 mrg
6497 1.1 mrg if (VECTOR_MODE_P (mode))
6498 1.1 mrg mode = GET_MODE_INNER (mode);
6499 1.1 mrg
6500 1.1 mrg if (GET_MODE_SIZE (mode) != 8)
6501 1.1 mrg return NULL_RTX;
6502 1.1 mrg
6503 1.1 mrg unsigned regno = REGNO (rtl);
6504 1.1 mrg
6505 1.1 mrg if (regno == DWARF_LINK_REGISTER)
6506 1.1 mrg return NULL_RTX;
6507 1.1 mrg
6508 1.1 mrg rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
6509 1.1 mrg XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
6510 1.1 mrg XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
6511 1.1 mrg
6512 1.1 mrg return p;
6513 1.1 mrg }
6514 1.1 mrg
6515 1.1 mrg /* }}} */
6516 1.1 mrg /* {{{ TARGET hook overrides. */
6517 1.1 mrg
6518 1.1 mrg #undef TARGET_ADDR_SPACE_ADDRESS_MODE
6519 1.1 mrg #define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
6520 1.1 mrg #undef TARGET_ADDR_SPACE_DEBUG
6521 1.1 mrg #define TARGET_ADDR_SPACE_DEBUG gcn_addr_space_debug
6522 1.1 mrg #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
6523 1.1 mrg #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
6524 1.1 mrg gcn_addr_space_legitimate_address_p
6525 1.1 mrg #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
6526 1.1 mrg #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address
6527 1.1 mrg #undef TARGET_ADDR_SPACE_POINTER_MODE
6528 1.1 mrg #define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode
6529 1.1 mrg #undef TARGET_ADDR_SPACE_SUBSET_P
6530 1.1 mrg #define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p
6531 1.1 mrg #undef TARGET_ADDR_SPACE_CONVERT
6532 1.1 mrg #define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert
6533 1.1 mrg #undef TARGET_ARG_PARTIAL_BYTES
6534 1.1 mrg #define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes
6535 1.1 mrg #undef TARGET_ASM_ALIGNED_DI_OP
6536 1.1 mrg #define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t"
6537 1.1 mrg #undef TARGET_ASM_FILE_START
6538 1.1 mrg #define TARGET_ASM_FILE_START output_file_start
6539 1.1 mrg #undef TARGET_ASM_FUNCTION_PROLOGUE
6540 1.1 mrg #define TARGET_ASM_FUNCTION_PROLOGUE gcn_target_asm_function_prologue
6541 1.1 mrg #undef TARGET_ASM_SELECT_SECTION
6542 1.1 mrg #define TARGET_ASM_SELECT_SECTION gcn_asm_select_section
6543 1.1 mrg #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6544 1.1 mrg #define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
6545 1.1 mrg #undef TARGET_ATTRIBUTE_TABLE
6546 1.1 mrg #define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
6547 1.1 mrg #undef TARGET_BUILTIN_DECL
6548 1.1 mrg #define TARGET_BUILTIN_DECL gcn_builtin_decl
6549 1.1 mrg #undef TARGET_CAN_CHANGE_MODE_CLASS
6550 1.1 mrg #define TARGET_CAN_CHANGE_MODE_CLASS gcn_can_change_mode_class
6551 1.1 mrg #undef TARGET_CAN_ELIMINATE
6552 1.1 mrg #define TARGET_CAN_ELIMINATE gcn_can_eliminate_p
6553 1.1 mrg #undef TARGET_CANNOT_COPY_INSN_P
6554 1.1 mrg #define TARGET_CANNOT_COPY_INSN_P gcn_cannot_copy_insn_p
6555 1.1 mrg #undef TARGET_CLASS_LIKELY_SPILLED_P
6556 1.1 mrg #define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p
6557 1.1 mrg #undef TARGET_CLASS_MAX_NREGS
6558 1.1 mrg #define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs
6559 1.1 mrg #undef TARGET_CONDITIONAL_REGISTER_USAGE
6560 1.1 mrg #define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage
6561 1.1 mrg #undef TARGET_CONSTANT_ALIGNMENT
6562 1.1 mrg #define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment
6563 1.1 mrg #undef TARGET_DEBUG_UNWIND_INFO
6564 1.1 mrg #define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info
6565 1.1 mrg #undef TARGET_DWARF_REGISTER_SPAN
6566 1.1 mrg #define TARGET_DWARF_REGISTER_SPAN gcn_dwarf_register_span
6567 1.1 mrg #undef TARGET_EMUTLS_VAR_INIT
6568 1.1 mrg #define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
6569 1.1 mrg #undef TARGET_EXPAND_BUILTIN
6570 1.1 mrg #define TARGET_EXPAND_BUILTIN gcn_expand_builtin
6571 1.1 mrg #undef TARGET_FRAME_POINTER_REQUIRED
6572 1.1 mrg #define TARGET_FRAME_POINTER_REQUIRED gcn_frame_pointer_rqd
6573 1.1 mrg #undef TARGET_FUNCTION_ARG
6574 1.1 mrg #undef TARGET_FUNCTION_ARG_ADVANCE
6575 1.1 mrg #define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
6576 1.1 mrg #define TARGET_FUNCTION_ARG gcn_function_arg
6577 1.1 mrg #undef TARGET_FUNCTION_VALUE
6578 1.1 mrg #define TARGET_FUNCTION_VALUE gcn_function_value
6579 1.1 mrg #undef TARGET_FUNCTION_VALUE_REGNO_P
6580 1.1 mrg #define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
6581 1.1 mrg #undef TARGET_GIMPLIFY_VA_ARG_EXPR
6582 1.1 mrg #define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr
6583 1.1 mrg #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
6584 1.1 mrg #define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa
6585 1.1 mrg #undef TARGET_GOACC_ADJUST_PRIVATE_DECL
6586 1.1 mrg #define TARGET_GOACC_ADJUST_PRIVATE_DECL gcn_goacc_adjust_private_decl
6587 1.1 mrg #undef TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD
6588 1.1 mrg #define TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD \
6589 1.1 mrg gcn_goacc_create_worker_broadcast_record
6590 1.1 mrg #undef TARGET_GOACC_FORK_JOIN
6591 1.1 mrg #define TARGET_GOACC_FORK_JOIN gcn_fork_join
6592 1.1 mrg #undef TARGET_GOACC_REDUCTION
6593 1.1 mrg #define TARGET_GOACC_REDUCTION gcn_goacc_reduction
6594 1.1 mrg #undef TARGET_GOACC_VALIDATE_DIMS
6595 1.1 mrg #define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims
6596 1.1 mrg #undef TARGET_GOACC_SHARED_MEM_LAYOUT
6597 1.1 mrg #define TARGET_GOACC_SHARED_MEM_LAYOUT gcn_shared_mem_layout
6598 1.1 mrg #undef TARGET_HARD_REGNO_MODE_OK
6599 1.1 mrg #define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok
6600 1.1 mrg #undef TARGET_HARD_REGNO_NREGS
6601 1.1 mrg #define TARGET_HARD_REGNO_NREGS gcn_hard_regno_nregs
6602 1.1 mrg #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
6603 1.1 mrg #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
6604 1.1 mrg #undef TARGET_INIT_BUILTINS
6605 1.1 mrg #define TARGET_INIT_BUILTINS gcn_init_builtins
6606 1.1 mrg #undef TARGET_INIT_LIBFUNCS
6607 1.1 mrg #define TARGET_INIT_LIBFUNCS gcn_init_libfuncs
6608 1.1 mrg #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
6609 1.1 mrg #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
6610 1.1 mrg gcn_ira_change_pseudo_allocno_class
6611 1.1 mrg #undef TARGET_LEGITIMATE_CONSTANT_P
6612 1.1 mrg #define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
6613 1.1 mrg #undef TARGET_LRA_P
6614 1.1 mrg #define TARGET_LRA_P hook_bool_void_true
6615 1.1 mrg #undef TARGET_MACHINE_DEPENDENT_REORG
6616 1.1 mrg #define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
6617 1.1 mrg #undef TARGET_MEMORY_MOVE_COST
6618 1.1 mrg #define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost
6619 1.1 mrg #undef TARGET_MODES_TIEABLE_P
6620 1.1 mrg #define TARGET_MODES_TIEABLE_P gcn_modes_tieable_p
6621 1.1 mrg #undef TARGET_OPTION_OVERRIDE
6622 1.1 mrg #define TARGET_OPTION_OVERRIDE gcn_option_override
6623 1.1 mrg #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
6624 1.1 mrg #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED \
6625 1.1 mrg gcn_pretend_outgoing_varargs_named
6626 1.1 mrg #undef TARGET_PROMOTE_FUNCTION_MODE
6627 1.1 mrg #define TARGET_PROMOTE_FUNCTION_MODE gcn_promote_function_mode
6628 1.1 mrg #undef TARGET_REGISTER_MOVE_COST
6629 1.1 mrg #define TARGET_REGISTER_MOVE_COST gcn_register_move_cost
6630 1.1 mrg #undef TARGET_RETURN_IN_MEMORY
6631 1.1 mrg #define TARGET_RETURN_IN_MEMORY gcn_return_in_memory
6632 1.1 mrg #undef TARGET_RTX_COSTS
6633 1.1 mrg #define TARGET_RTX_COSTS gcn_rtx_costs
6634 1.1 mrg #undef TARGET_SECONDARY_RELOAD
6635 1.1 mrg #define TARGET_SECONDARY_RELOAD gcn_secondary_reload
6636 1.1 mrg #undef TARGET_SECTION_TYPE_FLAGS
6637 1.1 mrg #define TARGET_SECTION_TYPE_FLAGS gcn_section_type_flags
6638 1.1 mrg #undef TARGET_SCALAR_MODE_SUPPORTED_P
6639 1.1 mrg #define TARGET_SCALAR_MODE_SUPPORTED_P gcn_scalar_mode_supported_p
6640 1.1 mrg #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
6641 1.1 mrg #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
6642 1.1 mrg gcn_small_register_classes_for_mode_p
6643 1.1 mrg #undef TARGET_SPILL_CLASS
6644 1.1 mrg #define TARGET_SPILL_CLASS gcn_spill_class
6645 1.1 mrg #undef TARGET_STRICT_ARGUMENT_NAMING
6646 1.1 mrg #define TARGET_STRICT_ARGUMENT_NAMING gcn_strict_argument_naming
6647 1.1 mrg #undef TARGET_TRAMPOLINE_INIT
6648 1.1 mrg #define TARGET_TRAMPOLINE_INIT gcn_trampoline_init
6649 1.1 mrg #undef TARGET_TRULY_NOOP_TRUNCATION
6650 1.1 mrg #define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
6651 1.1 mrg #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
6652 1.1 mrg #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
6653 1.1 mrg #undef TARGET_VECTORIZE_GET_MASK_MODE
6654 1.1 mrg #define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
6655 1.1 mrg #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6656 1.1 mrg #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
6657 1.1 mrg #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
6658 1.1 mrg #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
6659 1.1 mrg gcn_preferred_vector_alignment
6660 1.1 mrg #undef TARGET_VECTORIZE_RELATED_MODE
6661 1.1 mrg #define TARGET_VECTORIZE_RELATED_MODE gcn_related_vector_mode
6662 1.1 mrg #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
6663 1.1 mrg #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
6664 1.1 mrg gcn_vectorize_support_vector_misalignment
6665 1.1 mrg #undef TARGET_VECTORIZE_VEC_PERM_CONST
6666 1.1 mrg #define TARGET_VECTORIZE_VEC_PERM_CONST gcn_vectorize_vec_perm_const
6667 1.1 mrg #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
6668 1.1 mrg #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
6669 1.1 mrg gcn_vector_alignment_reachable
6670 1.1 mrg #undef TARGET_VECTOR_MODE_SUPPORTED_P
6671 1.1 mrg #define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
6672 1.1 mrg
6673 1.1 mrg struct gcc_target targetm = TARGET_INITIALIZER;
6674 1.1 mrg
6675 1.1 mrg #include "gt-gcn.h"
6676 1.1 mrg /* }}} */
6677