omp-oacc-neuter-broadcast.cc revision 1.1.1.1 1 1.1 mrg /* OpenACC worker partitioning via middle end neutering/broadcasting scheme
2 1.1 mrg
3 1.1 mrg Copyright (C) 2015-2022 Free Software Foundation, Inc.
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published
9 1.1 mrg by the Free Software Foundation; either version 3, or (at your
10 1.1 mrg option) any later version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 1.1 mrg License for more details.
16 1.1 mrg
17 1.1 mrg You should have received a copy of the GNU General Public License
18 1.1 mrg along with GCC; see the file COPYING3. If not see
19 1.1 mrg <http://www.gnu.org/licenses/>. */
20 1.1 mrg
21 1.1 mrg #include "config.h"
22 1.1 mrg #include "system.h"
23 1.1 mrg #include "coretypes.h"
24 1.1 mrg #include "backend.h"
25 1.1 mrg #include "rtl.h"
26 1.1 mrg #include "tree.h"
27 1.1 mrg #include "gimple.h"
28 1.1 mrg #include "tree-pass.h"
29 1.1 mrg #include "ssa.h"
30 1.1 mrg #include "cgraph.h"
31 1.1 mrg #include "pretty-print.h"
32 1.1 mrg #include "fold-const.h"
33 1.1 mrg #include "gimplify.h"
34 1.1 mrg #include "gimple-iterator.h"
35 1.1 mrg #include "gimple-walk.h"
36 1.1 mrg #include "tree-inline.h"
37 1.1 mrg #include "langhooks.h"
38 1.1 mrg #include "omp-general.h"
39 1.1 mrg #include "omp-low.h"
40 1.1 mrg #include "gimple-pretty-print.h"
41 1.1 mrg #include "cfghooks.h"
42 1.1 mrg #include "insn-config.h"
43 1.1 mrg #include "recog.h"
44 1.1 mrg #include "internal-fn.h"
45 1.1 mrg #include "bitmap.h"
46 1.1 mrg #include "tree-nested.h"
47 1.1 mrg #include "stor-layout.h"
48 1.1 mrg #include "tree-ssa-threadupdate.h"
49 1.1 mrg #include "tree-into-ssa.h"
50 1.1 mrg #include "splay-tree.h"
51 1.1 mrg #include "target.h"
52 1.1 mrg #include "cfgloop.h"
53 1.1 mrg #include "tree-cfg.h"
54 1.1 mrg #include "omp-offload.h"
55 1.1 mrg #include "attribs.h"
56 1.1 mrg #include "targhooks.h"
57 1.1 mrg #include "diagnostic-core.h"
58 1.1 mrg
59 1.1 mrg /* Loop structure of the function. The entire function is described as
60 1.1 mrg a NULL loop. */
61 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:struct parallel'. */
62 1.1 mrg
63 1.1 mrg struct parallel_g
64 1.1 mrg {
65 1.1 mrg /* Parent parallel. */
66 1.1 mrg parallel_g *parent;
67 1.1 mrg
68 1.1 mrg /* Next sibling parallel. */
69 1.1 mrg parallel_g *next;
70 1.1 mrg
71 1.1 mrg /* First child parallel. */
72 1.1 mrg parallel_g *inner;
73 1.1 mrg
74 1.1 mrg /* Partitioning mask of the parallel. */
75 1.1 mrg unsigned mask;
76 1.1 mrg
77 1.1 mrg /* Partitioning used within inner parallels. */
78 1.1 mrg unsigned inner_mask;
79 1.1 mrg
80 1.1 mrg /* Location of parallel forked and join. The forked is the first
81 1.1 mrg block in the parallel and the join is the first block after of
82 1.1 mrg the partition. */
83 1.1 mrg basic_block forked_block;
84 1.1 mrg basic_block join_block;
85 1.1 mrg
86 1.1 mrg gimple *forked_stmt;
87 1.1 mrg gimple *join_stmt;
88 1.1 mrg
89 1.1 mrg gimple *fork_stmt;
90 1.1 mrg gimple *joining_stmt;
91 1.1 mrg
92 1.1 mrg /* Basic blocks in this parallel, but not in child parallels. The
93 1.1 mrg FORKED and JOINING blocks are in the partition. The FORK and JOIN
94 1.1 mrg blocks are not. */
95 1.1 mrg auto_vec<basic_block> blocks;
96 1.1 mrg
97 1.1 mrg tree record_type;
98 1.1 mrg tree sender_decl;
99 1.1 mrg tree receiver_decl;
100 1.1 mrg
101 1.1 mrg public:
102 1.1 mrg parallel_g (parallel_g *parent, unsigned mode);
103 1.1 mrg ~parallel_g ();
104 1.1 mrg };
105 1.1 mrg
106 1.1 mrg /* Constructor links the new parallel into it's parent's chain of
107 1.1 mrg children. */
108 1.1 mrg
109 1.1 mrg parallel_g::parallel_g (parallel_g *parent_, unsigned mask_)
110 1.1 mrg :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0)
111 1.1 mrg {
112 1.1 mrg forked_block = join_block = 0;
113 1.1 mrg forked_stmt = join_stmt = NULL;
114 1.1 mrg fork_stmt = joining_stmt = NULL;
115 1.1 mrg
116 1.1 mrg record_type = NULL_TREE;
117 1.1 mrg sender_decl = NULL_TREE;
118 1.1 mrg receiver_decl = NULL_TREE;
119 1.1 mrg
120 1.1 mrg if (parent)
121 1.1 mrg {
122 1.1 mrg next = parent->inner;
123 1.1 mrg parent->inner = this;
124 1.1 mrg }
125 1.1 mrg }
126 1.1 mrg
127 1.1 mrg parallel_g::~parallel_g ()
128 1.1 mrg {
129 1.1 mrg delete inner;
130 1.1 mrg delete next;
131 1.1 mrg }
132 1.1 mrg
133 1.1 mrg static bool
134 1.1 mrg local_var_based_p (tree decl)
135 1.1 mrg {
136 1.1 mrg switch (TREE_CODE (decl))
137 1.1 mrg {
138 1.1 mrg case VAR_DECL:
139 1.1 mrg return !is_global_var (decl);
140 1.1 mrg
141 1.1 mrg case COMPONENT_REF:
142 1.1 mrg case BIT_FIELD_REF:
143 1.1 mrg case ARRAY_REF:
144 1.1 mrg return local_var_based_p (TREE_OPERAND (decl, 0));
145 1.1 mrg
146 1.1 mrg default:
147 1.1 mrg return false;
148 1.1 mrg }
149 1.1 mrg }
150 1.1 mrg
151 1.1 mrg /* Map of basic blocks to gimple stmts. */
152 1.1 mrg typedef hash_map<basic_block, gimple *> bb_stmt_map_t;
153 1.1 mrg
154 1.1 mrg /* Calls to OpenACC routines are made by all workers/wavefronts/warps, since
155 1.1 mrg the routine likely contains partitioned loops (else will do its own
156 1.1 mrg neutering and variable propagation). Return TRUE if a function call CALL
157 1.1 mrg should be made in (worker) single mode instead, rather than redundant
158 1.1 mrg mode. */
159 1.1 mrg
160 1.1 mrg static bool
161 1.1 mrg omp_sese_active_worker_call (gcall *call)
162 1.1 mrg {
163 1.1 mrg #define GOMP_DIM_SEQ GOMP_DIM_MAX
164 1.1 mrg tree fndecl = gimple_call_fndecl (call);
165 1.1 mrg
166 1.1 mrg if (!fndecl)
167 1.1 mrg return true;
168 1.1 mrg
169 1.1 mrg tree attrs = oacc_get_fn_attrib (fndecl);
170 1.1 mrg
171 1.1 mrg if (!attrs)
172 1.1 mrg return true;
173 1.1 mrg
174 1.1 mrg int level = oacc_fn_attrib_level (attrs);
175 1.1 mrg
176 1.1 mrg /* Neither regular functions nor "seq" routines should be run by all threads
177 1.1 mrg in worker-single mode. */
178 1.1 mrg return level == -1 || level == GOMP_DIM_SEQ;
179 1.1 mrg #undef GOMP_DIM_SEQ
180 1.1 mrg }
181 1.1 mrg
182 1.1 mrg /* Split basic blocks such that each forked and join unspecs are at
183 1.1 mrg the start of their basic blocks. Thus afterwards each block will
184 1.1 mrg have a single partitioning mode. We also do the same for return
185 1.1 mrg insns, as they are executed by every thread. Return the
186 1.1 mrg partitioning mode of the function as a whole. Populate MAP with
187 1.1 mrg head and tail blocks. We also clear the BB visited flag, which is
188 1.1 mrg used when finding partitions. */
189 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_split_blocks'. */
190 1.1 mrg
191 1.1 mrg static void
192 1.1 mrg omp_sese_split_blocks (bb_stmt_map_t *map)
193 1.1 mrg {
194 1.1 mrg auto_vec<gimple *> worklist;
195 1.1 mrg basic_block block;
196 1.1 mrg
197 1.1 mrg /* Locate all the reorg instructions of interest. */
198 1.1 mrg FOR_ALL_BB_FN (block, cfun)
199 1.1 mrg {
200 1.1 mrg /* Clear visited flag, for use by parallel locator */
201 1.1 mrg block->flags &= ~BB_VISITED;
202 1.1 mrg
203 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block);
204 1.1 mrg !gsi_end_p (gsi);
205 1.1 mrg gsi_next (&gsi))
206 1.1 mrg {
207 1.1 mrg gimple *stmt = gsi_stmt (gsi);
208 1.1 mrg
209 1.1 mrg if (gimple_call_internal_p (stmt, IFN_UNIQUE))
210 1.1 mrg {
211 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind)
212 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
213 1.1 mrg
214 1.1 mrg if (k == IFN_UNIQUE_OACC_JOIN)
215 1.1 mrg worklist.safe_push (stmt);
216 1.1 mrg else if (k == IFN_UNIQUE_OACC_FORK)
217 1.1 mrg {
218 1.1 mrg gcc_assert (gsi_one_before_end_p (gsi));
219 1.1 mrg basic_block forked_block = single_succ (block);
220 1.1 mrg gimple_stmt_iterator gsi2 = gsi_start_bb (forked_block);
221 1.1 mrg
222 1.1 mrg /* We push a NOP as a placeholder for the "forked" stmt.
223 1.1 mrg This is then recognized in omp_sese_find_par. */
224 1.1 mrg gimple *nop = gimple_build_nop ();
225 1.1 mrg gsi_insert_before (&gsi2, nop, GSI_SAME_STMT);
226 1.1 mrg
227 1.1 mrg worklist.safe_push (nop);
228 1.1 mrg }
229 1.1 mrg }
230 1.1 mrg else if (gimple_code (stmt) == GIMPLE_RETURN
231 1.1 mrg || gimple_code (stmt) == GIMPLE_COND
232 1.1 mrg || gimple_code (stmt) == GIMPLE_SWITCH
233 1.1 mrg || (gimple_code (stmt) == GIMPLE_CALL
234 1.1 mrg && !gimple_call_internal_p (stmt)
235 1.1 mrg && !omp_sese_active_worker_call (as_a <gcall *> (stmt))))
236 1.1 mrg worklist.safe_push (stmt);
237 1.1 mrg else if (is_gimple_assign (stmt))
238 1.1 mrg {
239 1.1 mrg tree lhs = gimple_assign_lhs (stmt);
240 1.1 mrg
241 1.1 mrg /* Force assignments to components/fields/elements of local
242 1.1 mrg aggregates into fully-partitioned (redundant) mode. This
243 1.1 mrg avoids having to broadcast the whole aggregate. The RHS of
244 1.1 mrg the assignment will be propagated using the normal
245 1.1 mrg mechanism. */
246 1.1 mrg
247 1.1 mrg switch (TREE_CODE (lhs))
248 1.1 mrg {
249 1.1 mrg case COMPONENT_REF:
250 1.1 mrg case BIT_FIELD_REF:
251 1.1 mrg case ARRAY_REF:
252 1.1 mrg {
253 1.1 mrg tree aggr = TREE_OPERAND (lhs, 0);
254 1.1 mrg
255 1.1 mrg if (local_var_based_p (aggr))
256 1.1 mrg worklist.safe_push (stmt);
257 1.1 mrg }
258 1.1 mrg break;
259 1.1 mrg
260 1.1 mrg default:
261 1.1 mrg ;
262 1.1 mrg }
263 1.1 mrg }
264 1.1 mrg }
265 1.1 mrg }
266 1.1 mrg
267 1.1 mrg /* Split blocks on the worklist. */
268 1.1 mrg unsigned ix;
269 1.1 mrg gimple *stmt;
270 1.1 mrg
271 1.1 mrg for (ix = 0; worklist.iterate (ix, &stmt); ix++)
272 1.1 mrg {
273 1.1 mrg basic_block block = gimple_bb (stmt);
274 1.1 mrg
275 1.1 mrg if (gimple_code (stmt) == GIMPLE_COND)
276 1.1 mrg {
277 1.1 mrg gcond *orig_cond = as_a <gcond *> (stmt);
278 1.1 mrg tree_code code = gimple_expr_code (orig_cond);
279 1.1 mrg tree pred = make_ssa_name (boolean_type_node);
280 1.1 mrg gimple *asgn = gimple_build_assign (pred, code,
281 1.1 mrg gimple_cond_lhs (orig_cond),
282 1.1 mrg gimple_cond_rhs (orig_cond));
283 1.1 mrg gcond *new_cond
284 1.1 mrg = gimple_build_cond (NE_EXPR, pred, boolean_false_node,
285 1.1 mrg gimple_cond_true_label (orig_cond),
286 1.1 mrg gimple_cond_false_label (orig_cond));
287 1.1 mrg
288 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
289 1.1 mrg gsi_insert_before (&gsi, asgn, GSI_SAME_STMT);
290 1.1 mrg gsi_replace (&gsi, new_cond, true);
291 1.1 mrg
292 1.1 mrg edge e = split_block (block, asgn);
293 1.1 mrg block = e->dest;
294 1.1 mrg map->get_or_insert (block) = new_cond;
295 1.1 mrg }
296 1.1 mrg else if ((gimple_code (stmt) == GIMPLE_CALL
297 1.1 mrg && !gimple_call_internal_p (stmt))
298 1.1 mrg || is_gimple_assign (stmt))
299 1.1 mrg {
300 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
301 1.1 mrg gsi_prev (&gsi);
302 1.1 mrg
303 1.1 mrg edge call = split_block (block, gsi_stmt (gsi));
304 1.1 mrg
305 1.1 mrg gimple *call_stmt = gsi_stmt (gsi_start_bb (call->dest));
306 1.1 mrg
307 1.1 mrg edge call_to_ret = split_block (call->dest, call_stmt);
308 1.1 mrg
309 1.1 mrg map->get_or_insert (call_to_ret->src) = call_stmt;
310 1.1 mrg }
311 1.1 mrg else
312 1.1 mrg {
313 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
314 1.1 mrg gsi_prev (&gsi);
315 1.1 mrg
316 1.1 mrg if (gsi_end_p (gsi))
317 1.1 mrg map->get_or_insert (block) = stmt;
318 1.1 mrg else
319 1.1 mrg {
320 1.1 mrg /* Split block before insn. The insn is in the new block. */
321 1.1 mrg edge e = split_block (block, gsi_stmt (gsi));
322 1.1 mrg
323 1.1 mrg block = e->dest;
324 1.1 mrg map->get_or_insert (block) = stmt;
325 1.1 mrg }
326 1.1 mrg }
327 1.1 mrg }
328 1.1 mrg }
329 1.1 mrg
330 1.1 mrg static const char *
331 1.1 mrg mask_name (unsigned mask)
332 1.1 mrg {
333 1.1 mrg switch (mask)
334 1.1 mrg {
335 1.1 mrg case 0: return "gang redundant";
336 1.1 mrg case 1: return "gang partitioned";
337 1.1 mrg case 2: return "worker partitioned";
338 1.1 mrg case 3: return "gang+worker partitioned";
339 1.1 mrg case 4: return "vector partitioned";
340 1.1 mrg case 5: return "gang+vector partitioned";
341 1.1 mrg case 6: return "worker+vector partitioned";
342 1.1 mrg case 7: return "fully partitioned";
343 1.1 mrg default: return "<illegal>";
344 1.1 mrg }
345 1.1 mrg }
346 1.1 mrg
347 1.1 mrg /* Dump this parallel and all its inner parallels. */
348 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_dump_pars'. */
349 1.1 mrg
350 1.1 mrg static void
351 1.1 mrg omp_sese_dump_pars (parallel_g *par, unsigned depth)
352 1.1 mrg {
353 1.1 mrg fprintf (dump_file, "%u: mask %d (%s) head=%d, tail=%d\n",
354 1.1 mrg depth, par->mask, mask_name (par->mask),
355 1.1 mrg par->forked_block ? par->forked_block->index : -1,
356 1.1 mrg par->join_block ? par->join_block->index : -1);
357 1.1 mrg
358 1.1 mrg fprintf (dump_file, " blocks:");
359 1.1 mrg
360 1.1 mrg basic_block block;
361 1.1 mrg for (unsigned ix = 0; par->blocks.iterate (ix, &block); ix++)
362 1.1 mrg fprintf (dump_file, " %d", block->index);
363 1.1 mrg fprintf (dump_file, "\n");
364 1.1 mrg if (par->inner)
365 1.1 mrg omp_sese_dump_pars (par->inner, depth + 1);
366 1.1 mrg
367 1.1 mrg if (par->next)
368 1.1 mrg omp_sese_dump_pars (par->next, depth);
369 1.1 mrg }
370 1.1 mrg
371 1.1 mrg /* If BLOCK contains a fork/join marker, process it to create or
372 1.1 mrg terminate a loop structure. Add this block to the current loop,
373 1.1 mrg and then walk successor blocks. */
374 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_find_par'. */
375 1.1 mrg
376 1.1 mrg static parallel_g *
377 1.1 mrg omp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block)
378 1.1 mrg {
379 1.1 mrg if (block->flags & BB_VISITED)
380 1.1 mrg return par;
381 1.1 mrg block->flags |= BB_VISITED;
382 1.1 mrg
383 1.1 mrg if (gimple **stmtp = map->get (block))
384 1.1 mrg {
385 1.1 mrg gimple *stmt = *stmtp;
386 1.1 mrg
387 1.1 mrg if (gimple_code (stmt) == GIMPLE_COND
388 1.1 mrg || gimple_code (stmt) == GIMPLE_SWITCH
389 1.1 mrg || gimple_code (stmt) == GIMPLE_RETURN
390 1.1 mrg || (gimple_code (stmt) == GIMPLE_CALL
391 1.1 mrg && !gimple_call_internal_p (stmt))
392 1.1 mrg || is_gimple_assign (stmt))
393 1.1 mrg {
394 1.1 mrg /* A single block that is forced to be at the maximum partition
395 1.1 mrg level. Make a singleton par for it. */
396 1.1 mrg par = new parallel_g (par, GOMP_DIM_MASK (GOMP_DIM_GANG)
397 1.1 mrg | GOMP_DIM_MASK (GOMP_DIM_WORKER)
398 1.1 mrg | GOMP_DIM_MASK (GOMP_DIM_VECTOR));
399 1.1 mrg par->forked_block = block;
400 1.1 mrg par->forked_stmt = stmt;
401 1.1 mrg par->blocks.safe_push (block);
402 1.1 mrg par = par->parent;
403 1.1 mrg goto walk_successors;
404 1.1 mrg }
405 1.1 mrg else if (gimple_nop_p (stmt))
406 1.1 mrg {
407 1.1 mrg basic_block pred = single_pred (block);
408 1.1 mrg gcc_assert (pred);
409 1.1 mrg gimple_stmt_iterator gsi = gsi_last_bb (pred);
410 1.1 mrg gimple *final_stmt = gsi_stmt (gsi);
411 1.1 mrg
412 1.1 mrg if (gimple_call_internal_p (final_stmt, IFN_UNIQUE))
413 1.1 mrg {
414 1.1 mrg gcall *call = as_a <gcall *> (final_stmt);
415 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind)
416 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
417 1.1 mrg
418 1.1 mrg if (k == IFN_UNIQUE_OACC_FORK)
419 1.1 mrg {
420 1.1 mrg HOST_WIDE_INT dim
421 1.1 mrg = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
422 1.1 mrg unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
423 1.1 mrg
424 1.1 mrg par = new parallel_g (par, mask);
425 1.1 mrg par->forked_block = block;
426 1.1 mrg par->forked_stmt = final_stmt;
427 1.1 mrg par->fork_stmt = stmt;
428 1.1 mrg }
429 1.1 mrg else
430 1.1 mrg gcc_unreachable ();
431 1.1 mrg }
432 1.1 mrg else
433 1.1 mrg gcc_unreachable ();
434 1.1 mrg }
435 1.1 mrg else if (gimple_call_internal_p (stmt, IFN_UNIQUE))
436 1.1 mrg {
437 1.1 mrg gcall *call = as_a <gcall *> (stmt);
438 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind)
439 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
440 1.1 mrg if (k == IFN_UNIQUE_OACC_JOIN)
441 1.1 mrg {
442 1.1 mrg HOST_WIDE_INT dim = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
443 1.1 mrg unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
444 1.1 mrg
445 1.1 mrg gcc_assert (par->mask == mask);
446 1.1 mrg par->join_block = block;
447 1.1 mrg par->join_stmt = stmt;
448 1.1 mrg par = par->parent;
449 1.1 mrg }
450 1.1 mrg else
451 1.1 mrg gcc_unreachable ();
452 1.1 mrg }
453 1.1 mrg else
454 1.1 mrg gcc_unreachable ();
455 1.1 mrg }
456 1.1 mrg
457 1.1 mrg if (par)
458 1.1 mrg /* Add this block onto the current loop's list of blocks. */
459 1.1 mrg par->blocks.safe_push (block);
460 1.1 mrg else
461 1.1 mrg /* This must be the entry block. Create a NULL parallel. */
462 1.1 mrg par = new parallel_g (0, 0);
463 1.1 mrg
464 1.1 mrg walk_successors:
465 1.1 mrg /* Walk successor blocks. */
466 1.1 mrg edge e;
467 1.1 mrg edge_iterator ei;
468 1.1 mrg
469 1.1 mrg FOR_EACH_EDGE (e, ei, block->succs)
470 1.1 mrg omp_sese_find_par (map, par, e->dest);
471 1.1 mrg
472 1.1 mrg return par;
473 1.1 mrg }
474 1.1 mrg
475 1.1 mrg /* DFS walk the CFG looking for fork & join markers. Construct
476 1.1 mrg loop structures as we go. MAP is a mapping of basic blocks
477 1.1 mrg to head & tail markers, discovered when splitting blocks. This
478 1.1 mrg speeds up the discovery. We rely on the BB visited flag having
479 1.1 mrg been cleared when splitting blocks. */
480 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_discover_pars'. */
481 1.1 mrg
482 1.1 mrg static parallel_g *
483 1.1 mrg omp_sese_discover_pars (bb_stmt_map_t *map)
484 1.1 mrg {
485 1.1 mrg basic_block block;
486 1.1 mrg
487 1.1 mrg /* Mark exit blocks as visited. */
488 1.1 mrg block = EXIT_BLOCK_PTR_FOR_FN (cfun);
489 1.1 mrg block->flags |= BB_VISITED;
490 1.1 mrg
491 1.1 mrg /* And entry block as not. */
492 1.1 mrg block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
493 1.1 mrg block->flags &= ~BB_VISITED;
494 1.1 mrg
495 1.1 mrg parallel_g *par = omp_sese_find_par (map, 0, block);
496 1.1 mrg
497 1.1 mrg if (dump_file)
498 1.1 mrg {
499 1.1 mrg fprintf (dump_file, "\nLoops\n");
500 1.1 mrg omp_sese_dump_pars (par, 0);
501 1.1 mrg fprintf (dump_file, "\n");
502 1.1 mrg }
503 1.1 mrg
504 1.1 mrg return par;
505 1.1 mrg }
506 1.1 mrg
507 1.1 mrg static void
508 1.1 mrg populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single,
509 1.1 mrg bitmap vector_single, unsigned outer_mask,
510 1.1 mrg int depth)
511 1.1 mrg {
512 1.1 mrg unsigned mask = outer_mask | par->mask;
513 1.1 mrg
514 1.1 mrg basic_block block;
515 1.1 mrg
516 1.1 mrg for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
517 1.1 mrg {
518 1.1 mrg if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
519 1.1 mrg bitmap_set_bit (worker_single, block->index);
520 1.1 mrg
521 1.1 mrg if ((mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) == 0)
522 1.1 mrg bitmap_set_bit (vector_single, block->index);
523 1.1 mrg }
524 1.1 mrg
525 1.1 mrg if (par->inner)
526 1.1 mrg populate_single_mode_bitmaps (par->inner, worker_single, vector_single,
527 1.1 mrg mask, depth + 1);
528 1.1 mrg if (par->next)
529 1.1 mrg populate_single_mode_bitmaps (par->next, worker_single, vector_single,
530 1.1 mrg outer_mask, depth);
531 1.1 mrg }
532 1.1 mrg
533 1.1 mrg /* A map from SSA names or var decls to record fields. */
534 1.1 mrg
535 1.1 mrg typedef hash_map<tree, tree> field_map_t;
536 1.1 mrg
537 1.1 mrg /* For each propagation record type, this is a map from SSA names or var decls
538 1.1 mrg to propagate, to the field in the record type that should be used for
539 1.1 mrg transmission and reception. */
540 1.1 mrg
541 1.1 mrg typedef hash_map<tree, field_map_t> record_field_map_t;
542 1.1 mrg
543 1.1 mrg static void
544 1.1 mrg install_var_field (tree var, tree record_type, field_map_t *fields)
545 1.1 mrg {
546 1.1 mrg tree name;
547 1.1 mrg char tmp[20];
548 1.1 mrg
549 1.1 mrg if (TREE_CODE (var) == SSA_NAME)
550 1.1 mrg {
551 1.1 mrg name = SSA_NAME_IDENTIFIER (var);
552 1.1 mrg if (!name)
553 1.1 mrg {
554 1.1 mrg sprintf (tmp, "_%u", (unsigned) SSA_NAME_VERSION (var));
555 1.1 mrg name = get_identifier (tmp);
556 1.1 mrg }
557 1.1 mrg }
558 1.1 mrg else if (TREE_CODE (var) == VAR_DECL)
559 1.1 mrg {
560 1.1 mrg name = DECL_NAME (var);
561 1.1 mrg if (!name)
562 1.1 mrg {
563 1.1 mrg sprintf (tmp, "D_%u", (unsigned) DECL_UID (var));
564 1.1 mrg name = get_identifier (tmp);
565 1.1 mrg }
566 1.1 mrg }
567 1.1 mrg else
568 1.1 mrg gcc_unreachable ();
569 1.1 mrg
570 1.1 mrg gcc_assert (!fields->get (var));
571 1.1 mrg
572 1.1 mrg tree type = TREE_TYPE (var);
573 1.1 mrg
574 1.1 mrg if (POINTER_TYPE_P (type)
575 1.1 mrg && TYPE_RESTRICT (type))
576 1.1 mrg type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT);
577 1.1 mrg
578 1.1 mrg tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, name, type);
579 1.1 mrg
580 1.1 mrg if (TREE_CODE (var) == VAR_DECL && type == TREE_TYPE (var))
581 1.1 mrg {
582 1.1 mrg SET_DECL_ALIGN (field, DECL_ALIGN (var));
583 1.1 mrg DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
584 1.1 mrg TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
585 1.1 mrg }
586 1.1 mrg else
587 1.1 mrg SET_DECL_ALIGN (field, TYPE_ALIGN (type));
588 1.1 mrg
589 1.1 mrg fields->put (var, field);
590 1.1 mrg
591 1.1 mrg insert_field_into_struct (record_type, field);
592 1.1 mrg }
593 1.1 mrg
594 1.1 mrg /* Sets of SSA_NAMES or VAR_DECLs to propagate. */
595 1.1 mrg typedef hash_set<tree> propagation_set;
596 1.1 mrg
597 1.1 mrg static void
598 1.1 mrg find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask,
599 1.1 mrg bitmap worker_single, bitmap vector_single,
600 1.1 mrg vec<propagation_set *> *prop_set)
601 1.1 mrg {
602 1.1 mrg unsigned mask = outer_mask | par->mask;
603 1.1 mrg
604 1.1 mrg if (par->inner)
605 1.1 mrg find_ssa_names_to_propagate (par->inner, mask, worker_single,
606 1.1 mrg vector_single, prop_set);
607 1.1 mrg if (par->next)
608 1.1 mrg find_ssa_names_to_propagate (par->next, outer_mask, worker_single,
609 1.1 mrg vector_single, prop_set);
610 1.1 mrg
611 1.1 mrg if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
612 1.1 mrg {
613 1.1 mrg basic_block block;
614 1.1 mrg int ix;
615 1.1 mrg
616 1.1 mrg for (ix = 0; par->blocks.iterate (ix, &block); ix++)
617 1.1 mrg {
618 1.1 mrg for (gphi_iterator psi = gsi_start_phis (block);
619 1.1 mrg !gsi_end_p (psi); gsi_next (&psi))
620 1.1 mrg {
621 1.1 mrg gphi *phi = psi.phi ();
622 1.1 mrg use_operand_p use;
623 1.1 mrg ssa_op_iter iter;
624 1.1 mrg
625 1.1 mrg FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE)
626 1.1 mrg {
627 1.1 mrg tree var = USE_FROM_PTR (use);
628 1.1 mrg
629 1.1 mrg if (TREE_CODE (var) != SSA_NAME)
630 1.1 mrg continue;
631 1.1 mrg
632 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (var);
633 1.1 mrg
634 1.1 mrg if (gimple_nop_p (def_stmt))
635 1.1 mrg continue;
636 1.1 mrg
637 1.1 mrg basic_block def_bb = gimple_bb (def_stmt);
638 1.1 mrg
639 1.1 mrg if (bitmap_bit_p (worker_single, def_bb->index))
640 1.1 mrg {
641 1.1 mrg if (!(*prop_set)[def_bb->index])
642 1.1 mrg (*prop_set)[def_bb->index] = new propagation_set;
643 1.1 mrg
644 1.1 mrg propagation_set *ws_prop = (*prop_set)[def_bb->index];
645 1.1 mrg
646 1.1 mrg ws_prop->add (var);
647 1.1 mrg }
648 1.1 mrg }
649 1.1 mrg }
650 1.1 mrg
651 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block);
652 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
653 1.1 mrg {
654 1.1 mrg use_operand_p use;
655 1.1 mrg ssa_op_iter iter;
656 1.1 mrg gimple *stmt = gsi_stmt (gsi);
657 1.1 mrg
658 1.1 mrg FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
659 1.1 mrg {
660 1.1 mrg tree var = USE_FROM_PTR (use);
661 1.1 mrg
662 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (var);
663 1.1 mrg
664 1.1 mrg if (gimple_nop_p (def_stmt))
665 1.1 mrg continue;
666 1.1 mrg
667 1.1 mrg basic_block def_bb = gimple_bb (def_stmt);
668 1.1 mrg
669 1.1 mrg if (bitmap_bit_p (worker_single, def_bb->index))
670 1.1 mrg {
671 1.1 mrg if (!(*prop_set)[def_bb->index])
672 1.1 mrg (*prop_set)[def_bb->index] = new propagation_set;
673 1.1 mrg
674 1.1 mrg propagation_set *ws_prop = (*prop_set)[def_bb->index];
675 1.1 mrg
676 1.1 mrg ws_prop->add (var);
677 1.1 mrg }
678 1.1 mrg }
679 1.1 mrg }
680 1.1 mrg }
681 1.1 mrg }
682 1.1 mrg }
683 1.1 mrg
684 1.1 mrg /* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a
685 1.1 mrg statement. */
686 1.1 mrg
687 1.1 mrg static tree
688 1.1 mrg find_partitioned_var_uses_1 (tree *node, int *, void *data)
689 1.1 mrg {
690 1.1 mrg walk_stmt_info *wi = (walk_stmt_info *) data;
691 1.1 mrg hash_set<tree> *partitioned_var_uses = (hash_set<tree> *) wi->info;
692 1.1 mrg
693 1.1 mrg if (!wi->is_lhs && VAR_P (*node))
694 1.1 mrg partitioned_var_uses->add (*node);
695 1.1 mrg
696 1.1 mrg return NULL_TREE;
697 1.1 mrg }
698 1.1 mrg
699 1.1 mrg static void
700 1.1 mrg find_partitioned_var_uses (parallel_g *par, unsigned outer_mask,
701 1.1 mrg hash_set<tree> *partitioned_var_uses)
702 1.1 mrg {
703 1.1 mrg unsigned mask = outer_mask | par->mask;
704 1.1 mrg
705 1.1 mrg if (par->inner)
706 1.1 mrg find_partitioned_var_uses (par->inner, mask, partitioned_var_uses);
707 1.1 mrg if (par->next)
708 1.1 mrg find_partitioned_var_uses (par->next, outer_mask, partitioned_var_uses);
709 1.1 mrg
710 1.1 mrg if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
711 1.1 mrg {
712 1.1 mrg basic_block block;
713 1.1 mrg int ix;
714 1.1 mrg
715 1.1 mrg for (ix = 0; par->blocks.iterate (ix, &block); ix++)
716 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block);
717 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
718 1.1 mrg {
719 1.1 mrg walk_stmt_info wi;
720 1.1 mrg memset (&wi, 0, sizeof (wi));
721 1.1 mrg wi.info = (void *) partitioned_var_uses;
722 1.1 mrg walk_gimple_stmt (&gsi, NULL, find_partitioned_var_uses_1, &wi);
723 1.1 mrg }
724 1.1 mrg }
725 1.1 mrg }
726 1.1 mrg
727 1.1 mrg /* Gang-private variables (typically placed in a GPU's shared memory) do not
728 1.1 mrg need to be processed by the worker-propagation mechanism. Populate the
729 1.1 mrg GANG_PRIVATE_VARS set with any such variables found in the current
730 1.1 mrg function. */
731 1.1 mrg
732 1.1 mrg static void
733 1.1 mrg find_gang_private_vars (hash_set<tree> *gang_private_vars)
734 1.1 mrg {
735 1.1 mrg basic_block block;
736 1.1 mrg
737 1.1 mrg FOR_EACH_BB_FN (block, cfun)
738 1.1 mrg {
739 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block);
740 1.1 mrg !gsi_end_p (gsi);
741 1.1 mrg gsi_next (&gsi))
742 1.1 mrg {
743 1.1 mrg gimple *stmt = gsi_stmt (gsi);
744 1.1 mrg
745 1.1 mrg if (gimple_call_internal_p (stmt, IFN_UNIQUE))
746 1.1 mrg {
747 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind)
748 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
749 1.1 mrg if (k == IFN_UNIQUE_OACC_PRIVATE)
750 1.1 mrg {
751 1.1 mrg HOST_WIDE_INT level
752 1.1 mrg = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
753 1.1 mrg if (level != GOMP_DIM_GANG)
754 1.1 mrg continue;
755 1.1 mrg for (unsigned i = 3; i < gimple_call_num_args (stmt); i++)
756 1.1 mrg {
757 1.1 mrg tree arg = gimple_call_arg (stmt, i);
758 1.1 mrg gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
759 1.1 mrg tree decl = TREE_OPERAND (arg, 0);
760 1.1 mrg gang_private_vars->add (decl);
761 1.1 mrg }
762 1.1 mrg }
763 1.1 mrg }
764 1.1 mrg }
765 1.1 mrg }
766 1.1 mrg }
767 1.1 mrg
768 1.1 mrg static void
769 1.1 mrg find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask,
770 1.1 mrg hash_set<tree> *partitioned_var_uses,
771 1.1 mrg hash_set<tree> *gang_private_vars,
772 1.1 mrg bitmap writes_gang_private,
773 1.1 mrg vec<propagation_set *> *prop_set)
774 1.1 mrg {
775 1.1 mrg unsigned mask = outer_mask | par->mask;
776 1.1 mrg
777 1.1 mrg if (par->inner)
778 1.1 mrg find_local_vars_to_propagate (par->inner, mask, partitioned_var_uses,
779 1.1 mrg gang_private_vars, writes_gang_private,
780 1.1 mrg prop_set);
781 1.1 mrg if (par->next)
782 1.1 mrg find_local_vars_to_propagate (par->next, outer_mask, partitioned_var_uses,
783 1.1 mrg gang_private_vars, writes_gang_private,
784 1.1 mrg prop_set);
785 1.1 mrg
786 1.1 mrg if (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)))
787 1.1 mrg {
788 1.1 mrg basic_block block;
789 1.1 mrg int ix;
790 1.1 mrg
791 1.1 mrg for (ix = 0; par->blocks.iterate (ix, &block); ix++)
792 1.1 mrg {
793 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block);
794 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi))
795 1.1 mrg {
796 1.1 mrg gimple *stmt = gsi_stmt (gsi);
797 1.1 mrg tree var;
798 1.1 mrg unsigned i;
799 1.1 mrg
800 1.1 mrg FOR_EACH_LOCAL_DECL (cfun, i, var)
801 1.1 mrg {
802 1.1 mrg if (!VAR_P (var)
803 1.1 mrg || is_global_var (var)
804 1.1 mrg || AGGREGATE_TYPE_P (TREE_TYPE (var))
805 1.1 mrg || !partitioned_var_uses->contains (var))
806 1.1 mrg continue;
807 1.1 mrg
808 1.1 mrg if (stmt_may_clobber_ref_p (stmt, var))
809 1.1 mrg {
810 1.1 mrg if (dump_file)
811 1.1 mrg {
812 1.1 mrg fprintf (dump_file, "bb %u: local variable may be "
813 1.1 mrg "clobbered in %s mode: ", block->index,
814 1.1 mrg mask_name (mask));
815 1.1 mrg print_generic_expr (dump_file, var, TDF_SLIM);
816 1.1 mrg fprintf (dump_file, "\n");
817 1.1 mrg }
818 1.1 mrg
819 1.1 mrg if (gang_private_vars->contains (var))
820 1.1 mrg {
821 1.1 mrg /* If we write a gang-private variable, we want a
822 1.1 mrg barrier at the end of the block. */
823 1.1 mrg bitmap_set_bit (writes_gang_private, block->index);
824 1.1 mrg continue;
825 1.1 mrg }
826 1.1 mrg
827 1.1 mrg if (!(*prop_set)[block->index])
828 1.1 mrg (*prop_set)[block->index] = new propagation_set;
829 1.1 mrg
830 1.1 mrg propagation_set *ws_prop
831 1.1 mrg = (*prop_set)[block->index];
832 1.1 mrg
833 1.1 mrg ws_prop->add (var);
834 1.1 mrg }
835 1.1 mrg }
836 1.1 mrg }
837 1.1 mrg }
838 1.1 mrg }
839 1.1 mrg }
840 1.1 mrg
841 1.1 mrg /* Transform basic blocks FROM, TO (which may be the same block) into:
842 1.1 mrg if (GOACC_single_start ())
843 1.1 mrg BLOCK;
844 1.1 mrg GOACC_barrier ();
845 1.1 mrg \ | /
846 1.1 mrg +----+
847 1.1 mrg | | (new) predicate block
848 1.1 mrg +----+--
849 1.1 mrg \ | / \ | / |t \
850 1.1 mrg +----+ +----+ +----+ |
851 1.1 mrg | | | | ===> | | | f (old) from block
852 1.1 mrg +----+ +----+ +----+ |
853 1.1 mrg | t/ \f | /
854 1.1 mrg +----+/
855 1.1 mrg (split (split before | | skip block
856 1.1 mrg at end) condition) +----+
857 1.1 mrg t/ \f
858 1.1 mrg */
859 1.1 mrg
860 1.1 mrg static void
861 1.1 mrg worker_single_simple (basic_block from, basic_block to,
862 1.1 mrg hash_set<tree> *def_escapes_block)
863 1.1 mrg {
864 1.1 mrg gimple *call, *cond;
865 1.1 mrg tree lhs, decl;
866 1.1 mrg basic_block skip_block;
867 1.1 mrg
868 1.1 mrg gimple_stmt_iterator gsi = gsi_last_bb (to);
869 1.1 mrg if (EDGE_COUNT (to->succs) > 1)
870 1.1 mrg {
871 1.1 mrg gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND);
872 1.1 mrg gsi_prev (&gsi);
873 1.1 mrg }
874 1.1 mrg edge e = split_block (to, gsi_stmt (gsi));
875 1.1 mrg skip_block = e->dest;
876 1.1 mrg
877 1.1 mrg gimple_stmt_iterator start = gsi_after_labels (from);
878 1.1 mrg
879 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_START);
880 1.1 mrg lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
881 1.1 mrg call = gimple_build_call (decl, 0);
882 1.1 mrg gimple_call_set_lhs (call, lhs);
883 1.1 mrg gsi_insert_before (&start, call, GSI_NEW_STMT);
884 1.1 mrg update_stmt (call);
885 1.1 mrg
886 1.1 mrg cond = gimple_build_cond (EQ_EXPR, lhs,
887 1.1 mrg fold_convert_loc (UNKNOWN_LOCATION,
888 1.1 mrg TREE_TYPE (lhs),
889 1.1 mrg boolean_true_node),
890 1.1 mrg NULL_TREE, NULL_TREE);
891 1.1 mrg gsi_insert_after (&start, cond, GSI_NEW_STMT);
892 1.1 mrg update_stmt (cond);
893 1.1 mrg
894 1.1 mrg edge et = split_block (from, cond);
895 1.1 mrg et->flags &= ~EDGE_FALLTHRU;
896 1.1 mrg et->flags |= EDGE_TRUE_VALUE;
897 1.1 mrg /* Make the active worker the more probable path so we prefer fallthrough
898 1.1 mrg (letting the idle workers jump around more). */
899 1.1 mrg et->probability = profile_probability::likely ();
900 1.1 mrg
901 1.1 mrg edge ef = make_edge (from, skip_block, EDGE_FALSE_VALUE);
902 1.1 mrg ef->probability = et->probability.invert ();
903 1.1 mrg
904 1.1 mrg basic_block neutered = split_edge (ef);
905 1.1 mrg gimple_stmt_iterator neut_gsi = gsi_last_bb (neutered);
906 1.1 mrg
907 1.1 mrg for (gsi = gsi_start_bb (et->dest); !gsi_end_p (gsi); gsi_next (&gsi))
908 1.1 mrg {
909 1.1 mrg gimple *stmt = gsi_stmt (gsi);
910 1.1 mrg ssa_op_iter iter;
911 1.1 mrg tree var;
912 1.1 mrg
913 1.1 mrg FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF)
914 1.1 mrg {
915 1.1 mrg if (def_escapes_block->contains (var))
916 1.1 mrg {
917 1.1 mrg gphi *join_phi = create_phi_node (NULL_TREE, skip_block);
918 1.1 mrg create_new_def_for (var, join_phi,
919 1.1 mrg gimple_phi_result_ptr (join_phi));
920 1.1 mrg add_phi_arg (join_phi, var, e, UNKNOWN_LOCATION);
921 1.1 mrg
922 1.1 mrg tree neutered_def = copy_ssa_name (var, NULL);
923 1.1 mrg /* We really want "don't care" or some value representing
924 1.1 mrg undefined here, but optimizers will probably get rid of the
925 1.1 mrg zero-assignments anyway. */
926 1.1 mrg gassign *zero = gimple_build_assign (neutered_def,
927 1.1 mrg build_zero_cst (TREE_TYPE (neutered_def)));
928 1.1 mrg
929 1.1 mrg gsi_insert_after (&neut_gsi, zero, GSI_CONTINUE_LINKING);
930 1.1 mrg update_stmt (zero);
931 1.1 mrg
932 1.1 mrg add_phi_arg (join_phi, neutered_def, single_succ_edge (neutered),
933 1.1 mrg UNKNOWN_LOCATION);
934 1.1 mrg update_stmt (join_phi);
935 1.1 mrg }
936 1.1 mrg }
937 1.1 mrg }
938 1.1 mrg }
939 1.1 mrg
940 1.1 mrg static tree
941 1.1 mrg build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields)
942 1.1 mrg {
943 1.1 mrg tree x = build_simple_mem_ref (receiver_decl);
944 1.1 mrg tree field = *fields->get (var);
945 1.1 mrg TREE_THIS_NOTRAP (x) = 1;
946 1.1 mrg x = omp_build_component_ref (x, field);
947 1.1 mrg return x;
948 1.1 mrg }
949 1.1 mrg
950 1.1 mrg static tree
951 1.1 mrg build_sender_ref (tree var, tree sender_decl, field_map_t *fields)
952 1.1 mrg {
953 1.1 mrg if (POINTER_TYPE_P (TREE_TYPE (sender_decl)))
954 1.1 mrg sender_decl = build_simple_mem_ref (sender_decl);
955 1.1 mrg tree field = *fields->get (var);
956 1.1 mrg return omp_build_component_ref (sender_decl, field);
957 1.1 mrg }
958 1.1 mrg
959 1.1 mrg static int
960 1.1 mrg sort_by_ssa_version_or_uid (const void *p1, const void *p2)
961 1.1 mrg {
962 1.1 mrg const tree t1 = *(const tree *)p1;
963 1.1 mrg const tree t2 = *(const tree *)p2;
964 1.1 mrg
965 1.1 mrg if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) == SSA_NAME)
966 1.1 mrg return SSA_NAME_VERSION (t1) - SSA_NAME_VERSION (t2);
967 1.1 mrg else if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) != SSA_NAME)
968 1.1 mrg return -1;
969 1.1 mrg else if (TREE_CODE (t1) != SSA_NAME && TREE_CODE (t2) == SSA_NAME)
970 1.1 mrg return 1;
971 1.1 mrg else
972 1.1 mrg return DECL_UID (t1) - DECL_UID (t2);
973 1.1 mrg }
974 1.1 mrg
975 1.1 mrg static int
976 1.1 mrg sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2)
977 1.1 mrg {
978 1.1 mrg const tree t1 = *(const tree *)p1;
979 1.1 mrg const tree t2 = *(const tree *)p2;
980 1.1 mrg unsigned HOST_WIDE_INT s1 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1)));
981 1.1 mrg unsigned HOST_WIDE_INT s2 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2)));
982 1.1 mrg if (s1 != s2)
983 1.1 mrg return s2 - s1;
984 1.1 mrg else
985 1.1 mrg return sort_by_ssa_version_or_uid (p1, p2);
986 1.1 mrg }
987 1.1 mrg
988 1.1 mrg static void
989 1.1 mrg worker_single_copy (basic_block from, basic_block to,
990 1.1 mrg hash_set<tree> *def_escapes_block,
991 1.1 mrg hash_set<tree> *worker_partitioned_uses,
992 1.1 mrg tree record_type, record_field_map_t *record_field_map,
993 1.1 mrg unsigned HOST_WIDE_INT placement,
994 1.1 mrg bool isolate_broadcasts, bool has_gang_private_write)
995 1.1 mrg {
996 1.1 mrg /* If we only have virtual defs, we'll have no record type, but we still want
997 1.1 mrg to emit single_copy_start and (particularly) single_copy_end to act as
998 1.1 mrg a vdef source on the neutered edge representing memory writes on the
999 1.1 mrg non-neutered edge. */
1000 1.1 mrg if (!record_type)
1001 1.1 mrg record_type = char_type_node;
1002 1.1 mrg
1003 1.1 mrg tree sender_decl
1004 1.1 mrg = targetm.goacc.create_worker_broadcast_record (record_type, true,
1005 1.1 mrg ".oacc_worker_o",
1006 1.1 mrg placement);
1007 1.1 mrg tree receiver_decl
1008 1.1 mrg = targetm.goacc.create_worker_broadcast_record (record_type, false,
1009 1.1 mrg ".oacc_worker_i",
1010 1.1 mrg placement);
1011 1.1 mrg
1012 1.1 mrg gimple_stmt_iterator gsi = gsi_last_bb (to);
1013 1.1 mrg if (EDGE_COUNT (to->succs) > 1)
1014 1.1 mrg gsi_prev (&gsi);
1015 1.1 mrg edge e = split_block (to, gsi_stmt (gsi));
1016 1.1 mrg basic_block barrier_block = e->dest;
1017 1.1 mrg
1018 1.1 mrg gimple_stmt_iterator start = gsi_after_labels (from);
1019 1.1 mrg
1020 1.1 mrg tree decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START);
1021 1.1 mrg
1022 1.1 mrg tree lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
1023 1.1 mrg
1024 1.1 mrg gimple *call
1025 1.1 mrg = gimple_build_call (decl, 1,
1026 1.1 mrg POINTER_TYPE_P (TREE_TYPE (sender_decl))
1027 1.1 mrg ? sender_decl : build_fold_addr_expr (sender_decl));
1028 1.1 mrg gimple_call_set_lhs (call, lhs);
1029 1.1 mrg gsi_insert_before (&start, call, GSI_NEW_STMT);
1030 1.1 mrg update_stmt (call);
1031 1.1 mrg
1032 1.1 mrg /* The shared-memory range for this block overflowed. Add a barrier before
1033 1.1 mrg the GOACC_single_copy_start call. */
1034 1.1 mrg if (isolate_broadcasts)
1035 1.1 mrg {
1036 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1037 1.1 mrg gimple *acc_bar = gimple_build_call (decl, 0);
1038 1.1 mrg gsi_insert_before (&start, acc_bar, GSI_SAME_STMT);
1039 1.1 mrg }
1040 1.1 mrg
1041 1.1 mrg tree conv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
1042 1.1 mrg
1043 1.1 mrg gimple *conv = gimple_build_assign (conv_tmp,
1044 1.1 mrg fold_convert (TREE_TYPE (receiver_decl),
1045 1.1 mrg lhs));
1046 1.1 mrg update_stmt (conv);
1047 1.1 mrg gsi_insert_after (&start, conv, GSI_NEW_STMT);
1048 1.1 mrg gimple *asgn = gimple_build_assign (receiver_decl, conv_tmp);
1049 1.1 mrg gsi_insert_after (&start, asgn, GSI_NEW_STMT);
1050 1.1 mrg update_stmt (asgn);
1051 1.1 mrg
1052 1.1 mrg tree zero_ptr = build_int_cst (TREE_TYPE (receiver_decl), 0);
1053 1.1 mrg
1054 1.1 mrg tree recv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
1055 1.1 mrg asgn = gimple_build_assign (recv_tmp, receiver_decl);
1056 1.1 mrg gsi_insert_after (&start, asgn, GSI_NEW_STMT);
1057 1.1 mrg update_stmt (asgn);
1058 1.1 mrg
1059 1.1 mrg gimple *cond = gimple_build_cond (EQ_EXPR, recv_tmp, zero_ptr, NULL_TREE,
1060 1.1 mrg NULL_TREE);
1061 1.1 mrg update_stmt (cond);
1062 1.1 mrg
1063 1.1 mrg gsi_insert_after (&start, cond, GSI_NEW_STMT);
1064 1.1 mrg
1065 1.1 mrg edge et = split_block (from, cond);
1066 1.1 mrg et->flags &= ~EDGE_FALLTHRU;
1067 1.1 mrg et->flags |= EDGE_TRUE_VALUE;
1068 1.1 mrg /* Make the active worker the more probable path so we prefer fallthrough
1069 1.1 mrg (letting the idle workers jump around more). */
1070 1.1 mrg et->probability = profile_probability::likely ();
1071 1.1 mrg
1072 1.1 mrg basic_block body = et->dest;
1073 1.1 mrg
1074 1.1 mrg edge ef = make_edge (from, barrier_block, EDGE_FALSE_VALUE);
1075 1.1 mrg ef->probability = et->probability.invert ();
1076 1.1 mrg
1077 1.1 mrg gimple_stmt_iterator bar_gsi = gsi_start_bb (barrier_block);
1078 1.1 mrg cond = gimple_build_cond (NE_EXPR, recv_tmp, zero_ptr, NULL_TREE, NULL_TREE);
1079 1.1 mrg
1080 1.1 mrg if (record_type != char_type_node || has_gang_private_write)
1081 1.1 mrg {
1082 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1083 1.1 mrg gimple *acc_bar = gimple_build_call (decl, 0);
1084 1.1 mrg
1085 1.1 mrg gsi_insert_before (&bar_gsi, acc_bar, GSI_NEW_STMT);
1086 1.1 mrg gsi_insert_after (&bar_gsi, cond, GSI_NEW_STMT);
1087 1.1 mrg }
1088 1.1 mrg else
1089 1.1 mrg gsi_insert_before (&bar_gsi, cond, GSI_NEW_STMT);
1090 1.1 mrg
1091 1.1 mrg edge et2 = split_block (barrier_block, cond);
1092 1.1 mrg et2->flags &= ~EDGE_FALLTHRU;
1093 1.1 mrg et2->flags |= EDGE_TRUE_VALUE;
1094 1.1 mrg et2->probability = profile_probability::unlikely ();
1095 1.1 mrg
1096 1.1 mrg basic_block exit_block = et2->dest;
1097 1.1 mrg
1098 1.1 mrg basic_block copyout_block = split_edge (et2);
1099 1.1 mrg edge ef2 = make_edge (barrier_block, exit_block, EDGE_FALSE_VALUE);
1100 1.1 mrg ef2->probability = et2->probability.invert ();
1101 1.1 mrg
1102 1.1 mrg gimple_stmt_iterator copyout_gsi = gsi_start_bb (copyout_block);
1103 1.1 mrg
1104 1.1 mrg edge copyout_to_exit = single_succ_edge (copyout_block);
1105 1.1 mrg
1106 1.1 mrg gimple_seq sender_seq = NULL;
1107 1.1 mrg
1108 1.1 mrg /* Make sure we iterate over definitions in a stable order. */
1109 1.1 mrg auto_vec<tree> escape_vec (def_escapes_block->elements ());
1110 1.1 mrg for (hash_set<tree>::iterator it = def_escapes_block->begin ();
1111 1.1 mrg it != def_escapes_block->end (); ++it)
1112 1.1 mrg escape_vec.quick_push (*it);
1113 1.1 mrg escape_vec.qsort (sort_by_ssa_version_or_uid);
1114 1.1 mrg
1115 1.1 mrg for (unsigned i = 0; i < escape_vec.length (); i++)
1116 1.1 mrg {
1117 1.1 mrg tree var = escape_vec[i];
1118 1.1 mrg
1119 1.1 mrg if (TREE_CODE (var) == SSA_NAME && SSA_NAME_IS_VIRTUAL_OPERAND (var))
1120 1.1 mrg continue;
1121 1.1 mrg
1122 1.1 mrg tree barrier_def = 0;
1123 1.1 mrg
1124 1.1 mrg if (TREE_CODE (var) == SSA_NAME)
1125 1.1 mrg {
1126 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (var);
1127 1.1 mrg
1128 1.1 mrg if (gimple_nop_p (def_stmt))
1129 1.1 mrg continue;
1130 1.1 mrg
1131 1.1 mrg /* The barrier phi takes one result from the actual work of the
1132 1.1 mrg block we're neutering, and the other result is constant zero of
1133 1.1 mrg the same type. */
1134 1.1 mrg
1135 1.1 mrg gphi *barrier_phi = create_phi_node (NULL_TREE, barrier_block);
1136 1.1 mrg barrier_def = create_new_def_for (var, barrier_phi,
1137 1.1 mrg gimple_phi_result_ptr (barrier_phi));
1138 1.1 mrg
1139 1.1 mrg add_phi_arg (barrier_phi, var, e, UNKNOWN_LOCATION);
1140 1.1 mrg add_phi_arg (barrier_phi, build_zero_cst (TREE_TYPE (var)), ef,
1141 1.1 mrg UNKNOWN_LOCATION);
1142 1.1 mrg
1143 1.1 mrg update_stmt (barrier_phi);
1144 1.1 mrg }
1145 1.1 mrg else
1146 1.1 mrg gcc_assert (TREE_CODE (var) == VAR_DECL);
1147 1.1 mrg
1148 1.1 mrg /* If we had no record type, we will have no fields map. */
1149 1.1 mrg field_map_t *fields = record_field_map->get (record_type);
1150 1.1 mrg
1151 1.1 mrg if (worker_partitioned_uses->contains (var)
1152 1.1 mrg && fields
1153 1.1 mrg && fields->get (var))
1154 1.1 mrg {
1155 1.1 mrg tree neutered_def = make_ssa_name (TREE_TYPE (var));
1156 1.1 mrg
1157 1.1 mrg /* Receive definition from shared memory block. */
1158 1.1 mrg
1159 1.1 mrg tree receiver_ref = build_receiver_ref (var, receiver_decl, fields);
1160 1.1 mrg gassign *recv = gimple_build_assign (neutered_def,
1161 1.1 mrg receiver_ref);
1162 1.1 mrg gsi_insert_after (©out_gsi, recv, GSI_CONTINUE_LINKING);
1163 1.1 mrg update_stmt (recv);
1164 1.1 mrg
1165 1.1 mrg if (TREE_CODE (var) == VAR_DECL)
1166 1.1 mrg {
1167 1.1 mrg /* If it's a VAR_DECL, we only copied to an SSA temporary. Copy
1168 1.1 mrg to the final location now. */
1169 1.1 mrg gassign *asgn = gimple_build_assign (var, neutered_def);
1170 1.1 mrg gsi_insert_after (©out_gsi, asgn, GSI_CONTINUE_LINKING);
1171 1.1 mrg update_stmt (asgn);
1172 1.1 mrg }
1173 1.1 mrg else
1174 1.1 mrg {
1175 1.1 mrg /* If it's an SSA name, create a new phi at the join node to
1176 1.1 mrg represent either the output from the active worker (the
1177 1.1 mrg barrier) or the inactive workers (the copyout block). */
1178 1.1 mrg gphi *join_phi = create_phi_node (NULL_TREE, exit_block);
1179 1.1 mrg create_new_def_for (barrier_def, join_phi,
1180 1.1 mrg gimple_phi_result_ptr (join_phi));
1181 1.1 mrg add_phi_arg (join_phi, barrier_def, ef2, UNKNOWN_LOCATION);
1182 1.1 mrg add_phi_arg (join_phi, neutered_def, copyout_to_exit,
1183 1.1 mrg UNKNOWN_LOCATION);
1184 1.1 mrg update_stmt (join_phi);
1185 1.1 mrg }
1186 1.1 mrg
1187 1.1 mrg /* Send definition to shared memory block. */
1188 1.1 mrg
1189 1.1 mrg tree sender_ref = build_sender_ref (var, sender_decl, fields);
1190 1.1 mrg
1191 1.1 mrg if (TREE_CODE (var) == SSA_NAME)
1192 1.1 mrg {
1193 1.1 mrg gassign *send = gimple_build_assign (sender_ref, var);
1194 1.1 mrg gimple_seq_add_stmt (&sender_seq, send);
1195 1.1 mrg update_stmt (send);
1196 1.1 mrg }
1197 1.1 mrg else if (TREE_CODE (var) == VAR_DECL)
1198 1.1 mrg {
1199 1.1 mrg tree tmp = make_ssa_name (TREE_TYPE (var));
1200 1.1 mrg gassign *send = gimple_build_assign (tmp, var);
1201 1.1 mrg gimple_seq_add_stmt (&sender_seq, send);
1202 1.1 mrg update_stmt (send);
1203 1.1 mrg send = gimple_build_assign (sender_ref, tmp);
1204 1.1 mrg gimple_seq_add_stmt (&sender_seq, send);
1205 1.1 mrg update_stmt (send);
1206 1.1 mrg }
1207 1.1 mrg else
1208 1.1 mrg gcc_unreachable ();
1209 1.1 mrg }
1210 1.1 mrg }
1211 1.1 mrg
1212 1.1 mrg /* The shared-memory range for this block overflowed. Add a barrier at the
1213 1.1 mrg end. */
1214 1.1 mrg if (isolate_broadcasts)
1215 1.1 mrg {
1216 1.1 mrg gsi = gsi_start_bb (exit_block);
1217 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1218 1.1 mrg gimple *acc_bar = gimple_build_call (decl, 0);
1219 1.1 mrg gsi_insert_before (&gsi, acc_bar, GSI_SAME_STMT);
1220 1.1 mrg }
1221 1.1 mrg
1222 1.1 mrg /* It's possible for the ET->DEST block (the work done by the active thread)
1223 1.1 mrg to finish with a control-flow insn, e.g. a UNIQUE function call. Split
1224 1.1 mrg the block and add SENDER_SEQ in the latter part to avoid having control
1225 1.1 mrg flow in the middle of a BB. */
1226 1.1 mrg
1227 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_END);
1228 1.1 mrg call = gimple_build_call (decl, 1,
1229 1.1 mrg POINTER_TYPE_P (TREE_TYPE (sender_decl))
1230 1.1 mrg ? sender_decl
1231 1.1 mrg : build_fold_addr_expr (sender_decl));
1232 1.1 mrg gimple_seq_add_stmt (&sender_seq, call);
1233 1.1 mrg
1234 1.1 mrg gsi = gsi_last_bb (body);
1235 1.1 mrg gimple *last = gsi_stmt (gsi);
1236 1.1 mrg basic_block sender_block = split_block (body, last)->dest;
1237 1.1 mrg gsi = gsi_last_bb (sender_block);
1238 1.1 mrg gsi_insert_seq_after (&gsi, sender_seq, GSI_CONTINUE_LINKING);
1239 1.1 mrg }
1240 1.1 mrg
1241 1.1 mrg typedef hash_map<basic_block, std::pair<unsigned HOST_WIDE_INT, bool> >
1242 1.1 mrg blk_offset_map_t;
1243 1.1 mrg
1244 1.1 mrg static void
1245 1.1 mrg neuter_worker_single (parallel_g *par, unsigned outer_mask,
1246 1.1 mrg bitmap worker_single, bitmap vector_single,
1247 1.1 mrg vec<propagation_set *> *prop_set,
1248 1.1 mrg hash_set<tree> *partitioned_var_uses,
1249 1.1 mrg record_field_map_t *record_field_map,
1250 1.1 mrg blk_offset_map_t *blk_offset_map,
1251 1.1 mrg bitmap writes_gang_private)
1252 1.1 mrg {
1253 1.1 mrg unsigned mask = outer_mask | par->mask;
1254 1.1 mrg
1255 1.1 mrg if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
1256 1.1 mrg {
1257 1.1 mrg basic_block block;
1258 1.1 mrg
1259 1.1 mrg for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
1260 1.1 mrg {
1261 1.1 mrg bool has_defs = false;
1262 1.1 mrg hash_set<tree> def_escapes_block;
1263 1.1 mrg hash_set<tree> worker_partitioned_uses;
1264 1.1 mrg unsigned j;
1265 1.1 mrg tree var;
1266 1.1 mrg
1267 1.1 mrg FOR_EACH_SSA_NAME (j, var, cfun)
1268 1.1 mrg {
1269 1.1 mrg if (SSA_NAME_IS_VIRTUAL_OPERAND (var))
1270 1.1 mrg {
1271 1.1 mrg has_defs = true;
1272 1.1 mrg continue;
1273 1.1 mrg }
1274 1.1 mrg
1275 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (var);
1276 1.1 mrg
1277 1.1 mrg if (gimple_nop_p (def_stmt))
1278 1.1 mrg continue;
1279 1.1 mrg
1280 1.1 mrg if (gimple_bb (def_stmt)->index != block->index)
1281 1.1 mrg continue;
1282 1.1 mrg
1283 1.1 mrg gimple *use_stmt;
1284 1.1 mrg imm_use_iterator use_iter;
1285 1.1 mrg bool uses_outside_block = false;
1286 1.1 mrg bool worker_partitioned_use = false;
1287 1.1 mrg
1288 1.1 mrg FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, var)
1289 1.1 mrg {
1290 1.1 mrg int blocknum = gimple_bb (use_stmt)->index;
1291 1.1 mrg
1292 1.1 mrg /* Don't propagate SSA names that are only used in the
1293 1.1 mrg current block, unless the usage is in a phi node: that
1294 1.1 mrg means the name left the block, then came back in at the
1295 1.1 mrg top. */
1296 1.1 mrg if (blocknum != block->index
1297 1.1 mrg || gimple_code (use_stmt) == GIMPLE_PHI)
1298 1.1 mrg uses_outside_block = true;
1299 1.1 mrg if (!bitmap_bit_p (worker_single, blocknum))
1300 1.1 mrg worker_partitioned_use = true;
1301 1.1 mrg }
1302 1.1 mrg
1303 1.1 mrg if (uses_outside_block)
1304 1.1 mrg def_escapes_block.add (var);
1305 1.1 mrg
1306 1.1 mrg if (worker_partitioned_use)
1307 1.1 mrg {
1308 1.1 mrg worker_partitioned_uses.add (var);
1309 1.1 mrg has_defs = true;
1310 1.1 mrg }
1311 1.1 mrg }
1312 1.1 mrg
1313 1.1 mrg propagation_set *ws_prop = (*prop_set)[block->index];
1314 1.1 mrg
1315 1.1 mrg if (ws_prop)
1316 1.1 mrg {
1317 1.1 mrg for (propagation_set::iterator it = ws_prop->begin ();
1318 1.1 mrg it != ws_prop->end ();
1319 1.1 mrg ++it)
1320 1.1 mrg {
1321 1.1 mrg tree var = *it;
1322 1.1 mrg if (TREE_CODE (var) == VAR_DECL)
1323 1.1 mrg {
1324 1.1 mrg def_escapes_block.add (var);
1325 1.1 mrg if (partitioned_var_uses->contains (var))
1326 1.1 mrg {
1327 1.1 mrg worker_partitioned_uses.add (var);
1328 1.1 mrg has_defs = true;
1329 1.1 mrg }
1330 1.1 mrg }
1331 1.1 mrg }
1332 1.1 mrg
1333 1.1 mrg delete ws_prop;
1334 1.1 mrg (*prop_set)[block->index] = 0;
1335 1.1 mrg }
1336 1.1 mrg
1337 1.1 mrg bool only_marker_fns = true;
1338 1.1 mrg bool join_block = false;
1339 1.1 mrg
1340 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block);
1341 1.1 mrg !gsi_end_p (gsi);
1342 1.1 mrg gsi_next (&gsi))
1343 1.1 mrg {
1344 1.1 mrg gimple *stmt = gsi_stmt (gsi);
1345 1.1 mrg if (gimple_code (stmt) == GIMPLE_CALL
1346 1.1 mrg && gimple_call_internal_p (stmt, IFN_UNIQUE))
1347 1.1 mrg {
1348 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind)
1349 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1350 1.1 mrg if (k != IFN_UNIQUE_OACC_PRIVATE
1351 1.1 mrg && k != IFN_UNIQUE_OACC_JOIN
1352 1.1 mrg && k != IFN_UNIQUE_OACC_FORK
1353 1.1 mrg && k != IFN_UNIQUE_OACC_HEAD_MARK
1354 1.1 mrg && k != IFN_UNIQUE_OACC_TAIL_MARK)
1355 1.1 mrg only_marker_fns = false;
1356 1.1 mrg else if (k == IFN_UNIQUE_OACC_JOIN)
1357 1.1 mrg /* The JOIN marker is special in that it *cannot* be
1358 1.1 mrg predicated for worker zero, because it may be lowered
1359 1.1 mrg to a barrier instruction and all workers must typically
1360 1.1 mrg execute that barrier. We shouldn't be doing any
1361 1.1 mrg broadcasts from the join block anyway. */
1362 1.1 mrg join_block = true;
1363 1.1 mrg }
1364 1.1 mrg else if (gimple_code (stmt) == GIMPLE_CALL
1365 1.1 mrg && gimple_call_internal_p (stmt, IFN_GOACC_LOOP))
1366 1.1 mrg /* Empty. */;
1367 1.1 mrg else if (gimple_nop_p (stmt))
1368 1.1 mrg /* Empty. */;
1369 1.1 mrg else
1370 1.1 mrg only_marker_fns = false;
1371 1.1 mrg }
1372 1.1 mrg
1373 1.1 mrg /* We can skip predicating this block for worker zero if the only
1374 1.1 mrg thing it contains is marker functions that will be removed in the
1375 1.1 mrg oaccdevlow pass anyway.
1376 1.1 mrg Don't do this if the block has (any) phi nodes, because those
1377 1.1 mrg might define SSA names that need broadcasting.
1378 1.1 mrg TODO: We might be able to skip transforming blocks that only
1379 1.1 mrg contain some other trivial statements too. */
1380 1.1 mrg if (only_marker_fns && !phi_nodes (block))
1381 1.1 mrg continue;
1382 1.1 mrg
1383 1.1 mrg gcc_assert (!join_block);
1384 1.1 mrg
1385 1.1 mrg if (has_defs)
1386 1.1 mrg {
1387 1.1 mrg tree record_type = (tree) block->aux;
1388 1.1 mrg std::pair<unsigned HOST_WIDE_INT, bool> *off_rngalloc
1389 1.1 mrg = blk_offset_map->get (block);
1390 1.1 mrg gcc_assert (!record_type || off_rngalloc);
1391 1.1 mrg unsigned HOST_WIDE_INT offset
1392 1.1 mrg = off_rngalloc ? off_rngalloc->first : 0;
1393 1.1 mrg bool range_allocated
1394 1.1 mrg = off_rngalloc ? off_rngalloc->second : true;
1395 1.1 mrg bool has_gang_private_write
1396 1.1 mrg = bitmap_bit_p (writes_gang_private, block->index);
1397 1.1 mrg worker_single_copy (block, block, &def_escapes_block,
1398 1.1 mrg &worker_partitioned_uses, record_type,
1399 1.1 mrg record_field_map,
1400 1.1 mrg offset, !range_allocated,
1401 1.1 mrg has_gang_private_write);
1402 1.1 mrg }
1403 1.1 mrg else
1404 1.1 mrg worker_single_simple (block, block, &def_escapes_block);
1405 1.1 mrg }
1406 1.1 mrg }
1407 1.1 mrg
1408 1.1 mrg if ((outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
1409 1.1 mrg {
1410 1.1 mrg basic_block block;
1411 1.1 mrg
1412 1.1 mrg for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
1413 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block);
1414 1.1 mrg !gsi_end_p (gsi);
1415 1.1 mrg gsi_next (&gsi))
1416 1.1 mrg {
1417 1.1 mrg gimple *stmt = gsi_stmt (gsi);
1418 1.1 mrg
1419 1.1 mrg if (gimple_code (stmt) == GIMPLE_CALL
1420 1.1 mrg && !gimple_call_internal_p (stmt)
1421 1.1 mrg && !omp_sese_active_worker_call (as_a <gcall *> (stmt)))
1422 1.1 mrg {
1423 1.1 mrg /* If we have an OpenACC routine call in worker-single mode,
1424 1.1 mrg place barriers before and afterwards to prevent
1425 1.1 mrg clobbering re-used shared memory regions (as are used
1426 1.1 mrg for AMDGCN at present, for example). */
1427 1.1 mrg tree decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1428 1.1 mrg gsi_insert_before (&gsi, gimple_build_call (decl, 0),
1429 1.1 mrg GSI_SAME_STMT);
1430 1.1 mrg gsi_insert_after (&gsi, gimple_build_call (decl, 0),
1431 1.1 mrg GSI_NEW_STMT);
1432 1.1 mrg }
1433 1.1 mrg }
1434 1.1 mrg }
1435 1.1 mrg
1436 1.1 mrg if (par->inner)
1437 1.1 mrg neuter_worker_single (par->inner, mask, worker_single, vector_single,
1438 1.1 mrg prop_set, partitioned_var_uses, record_field_map,
1439 1.1 mrg blk_offset_map, writes_gang_private);
1440 1.1 mrg if (par->next)
1441 1.1 mrg neuter_worker_single (par->next, outer_mask, worker_single, vector_single,
1442 1.1 mrg prop_set, partitioned_var_uses, record_field_map,
1443 1.1 mrg blk_offset_map, writes_gang_private);
1444 1.1 mrg }
1445 1.1 mrg
1446 1.1 mrg static void
1447 1.1 mrg dfs_broadcast_reachable_1 (basic_block bb, sbitmap reachable)
1448 1.1 mrg {
1449 1.1 mrg if (bb->flags & BB_VISITED)
1450 1.1 mrg return;
1451 1.1 mrg
1452 1.1 mrg bb->flags |= BB_VISITED;
1453 1.1 mrg
1454 1.1 mrg if (bb->succs)
1455 1.1 mrg {
1456 1.1 mrg edge e;
1457 1.1 mrg edge_iterator ei;
1458 1.1 mrg FOR_EACH_EDGE (e, ei, bb->succs)
1459 1.1 mrg {
1460 1.1 mrg basic_block dest = e->dest;
1461 1.1 mrg if (dest->aux)
1462 1.1 mrg bitmap_set_bit (reachable, dest->index);
1463 1.1 mrg else
1464 1.1 mrg dfs_broadcast_reachable_1 (dest, reachable);
1465 1.1 mrg }
1466 1.1 mrg }
1467 1.1 mrg }
1468 1.1 mrg
1469 1.1 mrg typedef std::pair<int, tree> idx_decl_pair_t;
1470 1.1 mrg
1471 1.1 mrg typedef auto_vec<splay_tree> used_range_vec_t;
1472 1.1 mrg
1473 1.1 mrg static int
1474 1.1 mrg sort_size_descending (const void *a, const void *b)
1475 1.1 mrg {
1476 1.1 mrg const idx_decl_pair_t *pa = (const idx_decl_pair_t *) a;
1477 1.1 mrg const idx_decl_pair_t *pb = (const idx_decl_pair_t *) b;
1478 1.1 mrg unsigned HOST_WIDE_INT asize = tree_to_uhwi (TYPE_SIZE_UNIT (pa->second));
1479 1.1 mrg unsigned HOST_WIDE_INT bsize = tree_to_uhwi (TYPE_SIZE_UNIT (pb->second));
1480 1.1 mrg return bsize - asize;
1481 1.1 mrg }
1482 1.1 mrg
1483 1.1 mrg class addr_range
1484 1.1 mrg {
1485 1.1 mrg public:
1486 1.1 mrg addr_range (unsigned HOST_WIDE_INT addr_lo, unsigned HOST_WIDE_INT addr_hi)
1487 1.1 mrg : lo (addr_lo), hi (addr_hi)
1488 1.1 mrg { }
1489 1.1 mrg addr_range (const addr_range &ar) : lo (ar.lo), hi (ar.hi)
1490 1.1 mrg { }
1491 1.1 mrg addr_range () : lo (0), hi (0)
1492 1.1 mrg { }
1493 1.1 mrg
1494 1.1 mrg bool invalid () { return lo == 0 && hi == 0; }
1495 1.1 mrg
1496 1.1 mrg unsigned HOST_WIDE_INT lo;
1497 1.1 mrg unsigned HOST_WIDE_INT hi;
1498 1.1 mrg };
1499 1.1 mrg
1500 1.1 mrg static int
1501 1.1 mrg splay_tree_compare_addr_range (splay_tree_key a, splay_tree_key b)
1502 1.1 mrg {
1503 1.1 mrg addr_range *ar = (addr_range *) a;
1504 1.1 mrg addr_range *br = (addr_range *) b;
1505 1.1 mrg if (ar->lo == br->lo && ar->hi == br->hi)
1506 1.1 mrg return 0;
1507 1.1 mrg if (ar->hi <= br->lo)
1508 1.1 mrg return -1;
1509 1.1 mrg else if (ar->lo >= br->hi)
1510 1.1 mrg return 1;
1511 1.1 mrg return 0;
1512 1.1 mrg }
1513 1.1 mrg
1514 1.1 mrg static void
1515 1.1 mrg splay_tree_free_key (splay_tree_key k)
1516 1.1 mrg {
1517 1.1 mrg addr_range *ar = (addr_range *) k;
1518 1.1 mrg delete ar;
1519 1.1 mrg }
1520 1.1 mrg
1521 1.1 mrg static addr_range
1522 1.1 mrg first_fit_range (splay_tree s, unsigned HOST_WIDE_INT size,
1523 1.1 mrg unsigned HOST_WIDE_INT align, addr_range *bounds)
1524 1.1 mrg {
1525 1.1 mrg splay_tree_node min = splay_tree_min (s);
1526 1.1 mrg if (min)
1527 1.1 mrg {
1528 1.1 mrg splay_tree_node next;
1529 1.1 mrg while ((next = splay_tree_successor (s, min->key)))
1530 1.1 mrg {
1531 1.1 mrg unsigned HOST_WIDE_INT lo = ((addr_range *) min->key)->hi;
1532 1.1 mrg unsigned HOST_WIDE_INT hi = ((addr_range *) next->key)->lo;
1533 1.1 mrg unsigned HOST_WIDE_INT base = (lo + align - 1) & ~(align - 1);
1534 1.1 mrg if (base + size <= hi)
1535 1.1 mrg return addr_range (base, base + size);
1536 1.1 mrg min = next;
1537 1.1 mrg }
1538 1.1 mrg
1539 1.1 mrg unsigned HOST_WIDE_INT base = ((addr_range *)min->key)->hi;
1540 1.1 mrg base = (base + align - 1) & ~(align - 1);
1541 1.1 mrg if (base + size <= bounds->hi)
1542 1.1 mrg return addr_range (base, base + size);
1543 1.1 mrg else
1544 1.1 mrg return addr_range ();
1545 1.1 mrg }
1546 1.1 mrg else
1547 1.1 mrg {
1548 1.1 mrg unsigned HOST_WIDE_INT lo = bounds->lo;
1549 1.1 mrg lo = (lo + align - 1) & ~(align - 1);
1550 1.1 mrg if (lo + size <= bounds->hi)
1551 1.1 mrg return addr_range (lo, lo + size);
1552 1.1 mrg else
1553 1.1 mrg return addr_range ();
1554 1.1 mrg }
1555 1.1 mrg }
1556 1.1 mrg
1557 1.1 mrg static int
1558 1.1 mrg merge_ranges_1 (splay_tree_node n, void *ptr)
1559 1.1 mrg {
1560 1.1 mrg splay_tree accum = (splay_tree) ptr;
1561 1.1 mrg addr_range ar = *(addr_range *) n->key;
1562 1.1 mrg
1563 1.1 mrg splay_tree_node old = splay_tree_lookup (accum, n->key);
1564 1.1 mrg
1565 1.1 mrg /* We might have an overlap. Create a new range covering the
1566 1.1 mrg overlapping parts. */
1567 1.1 mrg if (old)
1568 1.1 mrg {
1569 1.1 mrg addr_range *old_ar = (addr_range *) old->key;
1570 1.1 mrg ar.lo = MIN (old_ar->lo, ar.lo);
1571 1.1 mrg ar.hi = MAX (old_ar->hi, ar.hi);
1572 1.1 mrg splay_tree_remove (accum, old->key);
1573 1.1 mrg }
1574 1.1 mrg
1575 1.1 mrg addr_range *new_ar = new addr_range (ar);
1576 1.1 mrg
1577 1.1 mrg splay_tree_insert (accum, (splay_tree_key) new_ar, n->value);
1578 1.1 mrg
1579 1.1 mrg return 0;
1580 1.1 mrg }
1581 1.1 mrg
1582 1.1 mrg static void
1583 1.1 mrg merge_ranges (splay_tree accum, splay_tree sp)
1584 1.1 mrg {
1585 1.1 mrg splay_tree_foreach (sp, merge_ranges_1, (void *) accum);
1586 1.1 mrg }
1587 1.1 mrg
1588 1.1 mrg static void
1589 1.1 mrg oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo,
1590 1.1 mrg unsigned HOST_WIDE_INT bounds_hi)
1591 1.1 mrg {
1592 1.1 mrg bb_stmt_map_t bb_stmt_map;
1593 1.1 mrg auto_bitmap worker_single, vector_single;
1594 1.1 mrg
1595 1.1 mrg omp_sese_split_blocks (&bb_stmt_map);
1596 1.1 mrg
1597 1.1 mrg if (dump_file)
1598 1.1 mrg {
1599 1.1 mrg fprintf (dump_file, "\n\nAfter splitting:\n\n");
1600 1.1 mrg dump_function_to_file (current_function_decl, dump_file, dump_flags);
1601 1.1 mrg }
1602 1.1 mrg
1603 1.1 mrg unsigned mask = 0;
1604 1.1 mrg
1605 1.1 mrg /* If this is a routine, calculate MASK as if the outer levels are already
1606 1.1 mrg partitioned. */
1607 1.1 mrg {
1608 1.1 mrg tree attr = oacc_get_fn_attrib (current_function_decl);
1609 1.1 mrg tree dims = TREE_VALUE (attr);
1610 1.1 mrg unsigned ix;
1611 1.1 mrg for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
1612 1.1 mrg {
1613 1.1 mrg tree allowed = TREE_PURPOSE (dims);
1614 1.1 mrg if (allowed && integer_zerop (allowed))
1615 1.1 mrg mask |= GOMP_DIM_MASK (ix);
1616 1.1 mrg }
1617 1.1 mrg }
1618 1.1 mrg
1619 1.1 mrg parallel_g *par = omp_sese_discover_pars (&bb_stmt_map);
1620 1.1 mrg populate_single_mode_bitmaps (par, worker_single, vector_single, mask, 0);
1621 1.1 mrg
1622 1.1 mrg basic_block bb;
1623 1.1 mrg FOR_ALL_BB_FN (bb, cfun)
1624 1.1 mrg bb->aux = NULL;
1625 1.1 mrg
1626 1.1 mrg vec<propagation_set *> prop_set (vNULL);
1627 1.1 mrg prop_set.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
1628 1.1 mrg
1629 1.1 mrg find_ssa_names_to_propagate (par, mask, worker_single, vector_single,
1630 1.1 mrg &prop_set);
1631 1.1 mrg
1632 1.1 mrg hash_set<tree> partitioned_var_uses;
1633 1.1 mrg hash_set<tree> gang_private_vars;
1634 1.1 mrg auto_bitmap writes_gang_private;
1635 1.1 mrg
1636 1.1 mrg find_gang_private_vars (&gang_private_vars);
1637 1.1 mrg find_partitioned_var_uses (par, mask, &partitioned_var_uses);
1638 1.1 mrg find_local_vars_to_propagate (par, mask, &partitioned_var_uses,
1639 1.1 mrg &gang_private_vars, writes_gang_private,
1640 1.1 mrg &prop_set);
1641 1.1 mrg
1642 1.1 mrg record_field_map_t record_field_map;
1643 1.1 mrg
1644 1.1 mrg FOR_ALL_BB_FN (bb, cfun)
1645 1.1 mrg {
1646 1.1 mrg propagation_set *ws_prop = prop_set[bb->index];
1647 1.1 mrg if (ws_prop)
1648 1.1 mrg {
1649 1.1 mrg tree record_type = lang_hooks.types.make_type (RECORD_TYPE);
1650 1.1 mrg tree name = create_tmp_var_name (".oacc_ws_data_s");
1651 1.1 mrg name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, record_type);
1652 1.1 mrg DECL_ARTIFICIAL (name) = 1;
1653 1.1 mrg DECL_NAMELESS (name) = 1;
1654 1.1 mrg TYPE_NAME (record_type) = name;
1655 1.1 mrg TYPE_ARTIFICIAL (record_type) = 1;
1656 1.1 mrg
1657 1.1 mrg auto_vec<tree> field_vec (ws_prop->elements ());
1658 1.1 mrg for (hash_set<tree>::iterator it = ws_prop->begin ();
1659 1.1 mrg it != ws_prop->end (); ++it)
1660 1.1 mrg field_vec.quick_push (*it);
1661 1.1 mrg
1662 1.1 mrg field_vec.qsort (sort_by_size_then_ssa_version_or_uid);
1663 1.1 mrg
1664 1.1 mrg bool existed;
1665 1.1 mrg field_map_t *fields
1666 1.1 mrg = &record_field_map.get_or_insert (record_type, &existed);
1667 1.1 mrg gcc_checking_assert (!existed);
1668 1.1 mrg
1669 1.1 mrg /* Insert var fields in reverse order, so the last inserted element
1670 1.1 mrg is the first in the structure. */
1671 1.1 mrg for (int i = field_vec.length () - 1; i >= 0; i--)
1672 1.1 mrg install_var_field (field_vec[i], record_type, fields);
1673 1.1 mrg
1674 1.1 mrg layout_type (record_type);
1675 1.1 mrg
1676 1.1 mrg bb->aux = (tree) record_type;
1677 1.1 mrg }
1678 1.1 mrg }
1679 1.1 mrg
1680 1.1 mrg sbitmap *reachable
1681 1.1 mrg = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
1682 1.1 mrg last_basic_block_for_fn (cfun));
1683 1.1 mrg
1684 1.1 mrg bitmap_vector_clear (reachable, last_basic_block_for_fn (cfun));
1685 1.1 mrg
1686 1.1 mrg auto_vec<std::pair<int, tree> > priority;
1687 1.1 mrg
1688 1.1 mrg FOR_ALL_BB_FN (bb, cfun)
1689 1.1 mrg {
1690 1.1 mrg if (bb->aux)
1691 1.1 mrg {
1692 1.1 mrg tree record_type = (tree) bb->aux;
1693 1.1 mrg
1694 1.1 mrg basic_block bb2;
1695 1.1 mrg FOR_ALL_BB_FN (bb2, cfun)
1696 1.1 mrg bb2->flags &= ~BB_VISITED;
1697 1.1 mrg
1698 1.1 mrg priority.safe_push (std::make_pair (bb->index, record_type));
1699 1.1 mrg dfs_broadcast_reachable_1 (bb, reachable[bb->index]);
1700 1.1 mrg }
1701 1.1 mrg }
1702 1.1 mrg
1703 1.1 mrg sbitmap *inverted
1704 1.1 mrg = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
1705 1.1 mrg last_basic_block_for_fn (cfun));
1706 1.1 mrg
1707 1.1 mrg bitmap_vector_clear (inverted, last_basic_block_for_fn (cfun));
1708 1.1 mrg
1709 1.1 mrg for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1710 1.1 mrg {
1711 1.1 mrg sbitmap_iterator bi;
1712 1.1 mrg unsigned int j;
1713 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (reachable[i], 0, j, bi)
1714 1.1 mrg bitmap_set_bit (inverted[j], i);
1715 1.1 mrg }
1716 1.1 mrg
1717 1.1 mrg for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1718 1.1 mrg bitmap_ior (reachable[i], reachable[i], inverted[i]);
1719 1.1 mrg
1720 1.1 mrg sbitmap_vector_free (inverted);
1721 1.1 mrg
1722 1.1 mrg used_range_vec_t used_ranges;
1723 1.1 mrg
1724 1.1 mrg used_ranges.safe_grow_cleared (last_basic_block_for_fn (cfun));
1725 1.1 mrg
1726 1.1 mrg blk_offset_map_t blk_offset_map;
1727 1.1 mrg
1728 1.1 mrg addr_range worker_shm_bounds (bounds_lo, bounds_hi);
1729 1.1 mrg
1730 1.1 mrg priority.qsort (sort_size_descending);
1731 1.1 mrg for (unsigned int i = 0; i < priority.length (); i++)
1732 1.1 mrg {
1733 1.1 mrg idx_decl_pair_t p = priority[i];
1734 1.1 mrg int blkno = p.first;
1735 1.1 mrg tree record_type = p.second;
1736 1.1 mrg HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (record_type));
1737 1.1 mrg HOST_WIDE_INT align = TYPE_ALIGN_UNIT (record_type);
1738 1.1 mrg
1739 1.1 mrg splay_tree conflicts = splay_tree_new (splay_tree_compare_addr_range,
1740 1.1 mrg splay_tree_free_key, NULL);
1741 1.1 mrg
1742 1.1 mrg if (!used_ranges[blkno])
1743 1.1 mrg used_ranges[blkno] = splay_tree_new (splay_tree_compare_addr_range,
1744 1.1 mrg splay_tree_free_key, NULL);
1745 1.1 mrg else
1746 1.1 mrg merge_ranges (conflicts, used_ranges[blkno]);
1747 1.1 mrg
1748 1.1 mrg sbitmap_iterator bi;
1749 1.1 mrg unsigned int j;
1750 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (reachable[blkno], 0, j, bi)
1751 1.1 mrg if (used_ranges[j])
1752 1.1 mrg merge_ranges (conflicts, used_ranges[j]);
1753 1.1 mrg
1754 1.1 mrg addr_range ar
1755 1.1 mrg = first_fit_range (conflicts, size, align, &worker_shm_bounds);
1756 1.1 mrg
1757 1.1 mrg splay_tree_delete (conflicts);
1758 1.1 mrg
1759 1.1 mrg if (ar.invalid ())
1760 1.1 mrg {
1761 1.1 mrg unsigned HOST_WIDE_INT base
1762 1.1 mrg = (bounds_lo + align - 1) & ~(align - 1);
1763 1.1 mrg if (base + size > bounds_hi)
1764 1.1 mrg error_at (UNKNOWN_LOCATION, "shared-memory region overflow");
1765 1.1 mrg std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
1766 1.1 mrg = std::make_pair (base, false);
1767 1.1 mrg blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng);
1768 1.1 mrg }
1769 1.1 mrg else
1770 1.1 mrg {
1771 1.1 mrg splay_tree_node old = splay_tree_lookup (used_ranges[blkno],
1772 1.1 mrg (splay_tree_key) &ar);
1773 1.1 mrg if (old)
1774 1.1 mrg {
1775 1.1 mrg fprintf (stderr, "trying to map [%d..%d] but [%d..%d] is "
1776 1.1 mrg "already mapped in block %d\n", (int) ar.lo,
1777 1.1 mrg (int) ar.hi, (int) ((addr_range *) old->key)->lo,
1778 1.1 mrg (int) ((addr_range *) old->key)->hi, blkno);
1779 1.1 mrg abort ();
1780 1.1 mrg }
1781 1.1 mrg
1782 1.1 mrg addr_range *arp = new addr_range (ar);
1783 1.1 mrg splay_tree_insert (used_ranges[blkno], (splay_tree_key) arp,
1784 1.1 mrg (splay_tree_value) blkno);
1785 1.1 mrg std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
1786 1.1 mrg = std::make_pair (ar.lo, true);
1787 1.1 mrg blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng);
1788 1.1 mrg }
1789 1.1 mrg }
1790 1.1 mrg
1791 1.1 mrg sbitmap_vector_free (reachable);
1792 1.1 mrg
1793 1.1 mrg neuter_worker_single (par, mask, worker_single, vector_single, &prop_set,
1794 1.1 mrg &partitioned_var_uses, &record_field_map,
1795 1.1 mrg &blk_offset_map, writes_gang_private);
1796 1.1 mrg
1797 1.1 mrg record_field_map.empty ();
1798 1.1 mrg
1799 1.1 mrg /* These are supposed to have been 'delete'd by 'neuter_worker_single'. */
1800 1.1 mrg for (auto it : prop_set)
1801 1.1 mrg gcc_checking_assert (!it);
1802 1.1 mrg prop_set.release ();
1803 1.1 mrg
1804 1.1 mrg delete par;
1805 1.1 mrg
1806 1.1 mrg /* This doesn't seem to make a difference. */
1807 1.1 mrg loops_state_clear (LOOP_CLOSED_SSA);
1808 1.1 mrg
1809 1.1 mrg /* Neutering worker-single neutered blocks will invalidate dominance info.
1810 1.1 mrg It may be possible to incrementally update just the affected blocks, but
1811 1.1 mrg obliterate everything for now. */
1812 1.1 mrg free_dominance_info (CDI_DOMINATORS);
1813 1.1 mrg free_dominance_info (CDI_POST_DOMINATORS);
1814 1.1 mrg
1815 1.1 mrg if (dump_file)
1816 1.1 mrg {
1817 1.1 mrg fprintf (dump_file, "\n\nAfter neutering:\n\n");
1818 1.1 mrg dump_function_to_file (current_function_decl, dump_file, dump_flags);
1819 1.1 mrg }
1820 1.1 mrg }
1821 1.1 mrg
1822 1.1 mrg static int
1823 1.1 mrg execute_omp_oacc_neuter_broadcast ()
1824 1.1 mrg {
1825 1.1 mrg unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX];
1826 1.1 mrg unsigned HOST_WIDE_INT private_size[GOMP_DIM_MAX];
1827 1.1 mrg
1828 1.1 mrg for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
1829 1.1 mrg {
1830 1.1 mrg reduction_size[i] = 0;
1831 1.1 mrg private_size[i] = 0;
1832 1.1 mrg }
1833 1.1 mrg
1834 1.1 mrg /* Calculate shared memory size required for reduction variables and
1835 1.1 mrg gang-private memory for this offloaded function. */
1836 1.1 mrg basic_block bb;
1837 1.1 mrg FOR_ALL_BB_FN (bb, cfun)
1838 1.1 mrg {
1839 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1840 1.1 mrg !gsi_end_p (gsi);
1841 1.1 mrg gsi_next (&gsi))
1842 1.1 mrg {
1843 1.1 mrg gimple *stmt = gsi_stmt (gsi);
1844 1.1 mrg if (!is_gimple_call (stmt))
1845 1.1 mrg continue;
1846 1.1 mrg gcall *call = as_a <gcall *> (stmt);
1847 1.1 mrg if (!gimple_call_internal_p (call))
1848 1.1 mrg continue;
1849 1.1 mrg enum internal_fn ifn_code = gimple_call_internal_fn (call);
1850 1.1 mrg switch (ifn_code)
1851 1.1 mrg {
1852 1.1 mrg default: break;
1853 1.1 mrg case IFN_GOACC_REDUCTION:
1854 1.1 mrg if (integer_minus_onep (gimple_call_arg (call, 3)))
1855 1.1 mrg continue;
1856 1.1 mrg else
1857 1.1 mrg {
1858 1.1 mrg unsigned code = TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1859 1.1 mrg /* Only count reduction variables once: the choice to pick
1860 1.1 mrg the setup call is fairly arbitrary. */
1861 1.1 mrg if (code == IFN_GOACC_REDUCTION_SETUP)
1862 1.1 mrg {
1863 1.1 mrg int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
1864 1.1 mrg tree var = gimple_call_arg (call, 2);
1865 1.1 mrg tree offset = gimple_call_arg (call, 5);
1866 1.1 mrg tree var_type = TREE_TYPE (var);
1867 1.1 mrg unsigned HOST_WIDE_INT limit
1868 1.1 mrg = (tree_to_uhwi (offset)
1869 1.1 mrg + tree_to_uhwi (TYPE_SIZE_UNIT (var_type)));
1870 1.1 mrg reduction_size[level]
1871 1.1 mrg = MAX (reduction_size[level], limit);
1872 1.1 mrg }
1873 1.1 mrg }
1874 1.1 mrg break;
1875 1.1 mrg case IFN_UNIQUE:
1876 1.1 mrg {
1877 1.1 mrg enum ifn_unique_kind kind
1878 1.1 mrg = ((enum ifn_unique_kind)
1879 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1880 1.1 mrg
1881 1.1 mrg if (kind == IFN_UNIQUE_OACC_PRIVATE)
1882 1.1 mrg {
1883 1.1 mrg HOST_WIDE_INT level
1884 1.1 mrg = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1885 1.1 mrg if (level == -1)
1886 1.1 mrg break;
1887 1.1 mrg for (unsigned i = 3;
1888 1.1 mrg i < gimple_call_num_args (call);
1889 1.1 mrg i++)
1890 1.1 mrg {
1891 1.1 mrg tree arg = gimple_call_arg (call, i);
1892 1.1 mrg gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
1893 1.1 mrg tree decl = TREE_OPERAND (arg, 0);
1894 1.1 mrg unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (decl);
1895 1.1 mrg private_size[level] = ((private_size[level] + align - 1)
1896 1.1 mrg & ~(align - 1));
1897 1.1 mrg unsigned HOST_WIDE_INT decl_size
1898 1.1 mrg = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (decl)));
1899 1.1 mrg private_size[level] += decl_size;
1900 1.1 mrg }
1901 1.1 mrg }
1902 1.1 mrg }
1903 1.1 mrg break;
1904 1.1 mrg }
1905 1.1 mrg }
1906 1.1 mrg }
1907 1.1 mrg
1908 1.1 mrg int dims[GOMP_DIM_MAX];
1909 1.1 mrg for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
1910 1.1 mrg dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
1911 1.1 mrg
1912 1.1 mrg /* Find bounds of shared-memory buffer space we can use. */
1913 1.1 mrg unsigned HOST_WIDE_INT bounds_lo = 0, bounds_hi = 0;
1914 1.1 mrg if (targetm.goacc.shared_mem_layout)
1915 1.1 mrg targetm.goacc.shared_mem_layout (&bounds_lo, &bounds_hi, dims,
1916 1.1 mrg private_size, reduction_size);
1917 1.1 mrg
1918 1.1 mrg /* Perform worker partitioning unless we know 'num_workers(1)'. */
1919 1.1 mrg if (dims[GOMP_DIM_WORKER] != 1)
1920 1.1 mrg oacc_do_neutering (bounds_lo, bounds_hi);
1921 1.1 mrg
1922 1.1 mrg return 0;
1923 1.1 mrg }
1924 1.1 mrg
1925 1.1 mrg namespace {
1926 1.1 mrg
1927 1.1 mrg const pass_data pass_data_omp_oacc_neuter_broadcast =
1928 1.1 mrg {
1929 1.1 mrg GIMPLE_PASS, /* type */
1930 1.1 mrg "omp_oacc_neuter_broadcast", /* name */
1931 1.1 mrg OPTGROUP_OMP, /* optinfo_flags */
1932 1.1 mrg TV_NONE, /* tv_id */
1933 1.1 mrg PROP_cfg, /* properties_required */
1934 1.1 mrg 0, /* properties_provided */
1935 1.1 mrg 0, /* properties_destroyed */
1936 1.1 mrg 0, /* todo_flags_start */
1937 1.1 mrg TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1938 1.1 mrg };
1939 1.1 mrg
1940 1.1 mrg class pass_omp_oacc_neuter_broadcast : public gimple_opt_pass
1941 1.1 mrg {
1942 1.1 mrg public:
1943 1.1 mrg pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
1944 1.1 mrg : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast, ctxt)
1945 1.1 mrg {}
1946 1.1 mrg
1947 1.1 mrg /* opt_pass methods: */
1948 1.1 mrg virtual bool gate (function *fun)
1949 1.1 mrg {
1950 1.1 mrg if (!flag_openacc)
1951 1.1 mrg return false;
1952 1.1 mrg
1953 1.1 mrg if (!targetm.goacc.create_worker_broadcast_record)
1954 1.1 mrg return false;
1955 1.1 mrg
1956 1.1 mrg /* Only relevant for OpenACC offloaded functions. */
1957 1.1 mrg tree attr = oacc_get_fn_attrib (fun->decl);
1958 1.1 mrg if (!attr)
1959 1.1 mrg return false;
1960 1.1 mrg
1961 1.1 mrg return true;
1962 1.1 mrg }
1963 1.1 mrg
1964 1.1 mrg virtual unsigned int execute (function *)
1965 1.1 mrg {
1966 1.1 mrg return execute_omp_oacc_neuter_broadcast ();
1967 1.1 mrg }
1968 1.1 mrg
1969 1.1 mrg }; // class pass_omp_oacc_neuter_broadcast
1970 1.1 mrg
1971 1.1 mrg } // anon namespace
1972 1.1 mrg
1973 1.1 mrg gimple_opt_pass *
1974 1.1 mrg make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
1975 1.1 mrg {
1976 1.1 mrg return new pass_omp_oacc_neuter_broadcast (ctxt);
1977 1.1 mrg }
1978