1 1.1 mrg /* OpenACC worker partitioning via middle end neutering/broadcasting scheme 2 1.1 mrg 3 1.1 mrg Copyright (C) 2015-2022 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg This file is part of GCC. 6 1.1 mrg 7 1.1 mrg GCC is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published 9 1.1 mrg by the Free Software Foundation; either version 3, or (at your 10 1.1 mrg option) any later version. 11 1.1 mrg 12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT 13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 1.1 mrg License for more details. 16 1.1 mrg 17 1.1 mrg You should have received a copy of the GNU General Public License 18 1.1 mrg along with GCC; see the file COPYING3. If not see 19 1.1 mrg <http://www.gnu.org/licenses/>. */ 20 1.1 mrg 21 1.1 mrg #include "config.h" 22 1.1 mrg #include "system.h" 23 1.1 mrg #include "coretypes.h" 24 1.1 mrg #include "backend.h" 25 1.1 mrg #include "rtl.h" 26 1.1 mrg #include "tree.h" 27 1.1 mrg #include "gimple.h" 28 1.1 mrg #include "tree-pass.h" 29 1.1 mrg #include "ssa.h" 30 1.1 mrg #include "cgraph.h" 31 1.1 mrg #include "pretty-print.h" 32 1.1 mrg #include "fold-const.h" 33 1.1 mrg #include "gimplify.h" 34 1.1 mrg #include "gimple-iterator.h" 35 1.1 mrg #include "gimple-walk.h" 36 1.1 mrg #include "tree-inline.h" 37 1.1 mrg #include "langhooks.h" 38 1.1 mrg #include "omp-general.h" 39 1.1 mrg #include "omp-low.h" 40 1.1 mrg #include "gimple-pretty-print.h" 41 1.1 mrg #include "cfghooks.h" 42 1.1 mrg #include "insn-config.h" 43 1.1 mrg #include "recog.h" 44 1.1 mrg #include "internal-fn.h" 45 1.1 mrg #include "bitmap.h" 46 1.1 mrg #include "tree-nested.h" 47 1.1 mrg #include "stor-layout.h" 48 1.1 mrg #include "tree-ssa-threadupdate.h" 49 1.1 mrg #include "tree-into-ssa.h" 50 1.1 mrg #include "splay-tree.h" 51 1.1 mrg #include "target.h" 52 1.1 mrg #include "cfgloop.h" 53 1.1 mrg #include "tree-cfg.h" 54 1.1 mrg #include "omp-offload.h" 55 1.1 mrg #include "attribs.h" 56 1.1 mrg #include "targhooks.h" 57 1.1 mrg #include "diagnostic-core.h" 58 1.1 mrg 59 1.1 mrg /* Loop structure of the function. The entire function is described as 60 1.1 mrg a NULL loop. */ 61 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:struct parallel'. */ 62 1.1 mrg 63 1.1 mrg struct parallel_g 64 1.1 mrg { 65 1.1 mrg /* Parent parallel. */ 66 1.1 mrg parallel_g *parent; 67 1.1 mrg 68 1.1 mrg /* Next sibling parallel. */ 69 1.1 mrg parallel_g *next; 70 1.1 mrg 71 1.1 mrg /* First child parallel. */ 72 1.1 mrg parallel_g *inner; 73 1.1 mrg 74 1.1 mrg /* Partitioning mask of the parallel. */ 75 1.1 mrg unsigned mask; 76 1.1 mrg 77 1.1 mrg /* Partitioning used within inner parallels. */ 78 1.1 mrg unsigned inner_mask; 79 1.1 mrg 80 1.1 mrg /* Location of parallel forked and join. The forked is the first 81 1.1 mrg block in the parallel and the join is the first block after of 82 1.1 mrg the partition. */ 83 1.1 mrg basic_block forked_block; 84 1.1 mrg basic_block join_block; 85 1.1 mrg 86 1.1 mrg gimple *forked_stmt; 87 1.1 mrg gimple *join_stmt; 88 1.1 mrg 89 1.1 mrg gimple *fork_stmt; 90 1.1 mrg gimple *joining_stmt; 91 1.1 mrg 92 1.1 mrg /* Basic blocks in this parallel, but not in child parallels. The 93 1.1 mrg FORKED and JOINING blocks are in the partition. The FORK and JOIN 94 1.1 mrg blocks are not. */ 95 1.1 mrg auto_vec<basic_block> blocks; 96 1.1 mrg 97 1.1 mrg tree record_type; 98 1.1 mrg tree sender_decl; 99 1.1 mrg tree receiver_decl; 100 1.1 mrg 101 1.1 mrg public: 102 1.1 mrg parallel_g (parallel_g *parent, unsigned mode); 103 1.1 mrg ~parallel_g (); 104 1.1 mrg }; 105 1.1 mrg 106 1.1 mrg /* Constructor links the new parallel into it's parent's chain of 107 1.1 mrg children. */ 108 1.1 mrg 109 1.1 mrg parallel_g::parallel_g (parallel_g *parent_, unsigned mask_) 110 1.1 mrg :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0) 111 1.1 mrg { 112 1.1 mrg forked_block = join_block = 0; 113 1.1 mrg forked_stmt = join_stmt = NULL; 114 1.1 mrg fork_stmt = joining_stmt = NULL; 115 1.1 mrg 116 1.1 mrg record_type = NULL_TREE; 117 1.1 mrg sender_decl = NULL_TREE; 118 1.1 mrg receiver_decl = NULL_TREE; 119 1.1 mrg 120 1.1 mrg if (parent) 121 1.1 mrg { 122 1.1 mrg next = parent->inner; 123 1.1 mrg parent->inner = this; 124 1.1 mrg } 125 1.1 mrg } 126 1.1 mrg 127 1.1 mrg parallel_g::~parallel_g () 128 1.1 mrg { 129 1.1 mrg delete inner; 130 1.1 mrg delete next; 131 1.1 mrg } 132 1.1 mrg 133 1.1 mrg static bool 134 1.1 mrg local_var_based_p (tree decl) 135 1.1 mrg { 136 1.1 mrg switch (TREE_CODE (decl)) 137 1.1 mrg { 138 1.1 mrg case VAR_DECL: 139 1.1 mrg return !is_global_var (decl); 140 1.1 mrg 141 1.1 mrg case COMPONENT_REF: 142 1.1 mrg case BIT_FIELD_REF: 143 1.1 mrg case ARRAY_REF: 144 1.1 mrg return local_var_based_p (TREE_OPERAND (decl, 0)); 145 1.1 mrg 146 1.1 mrg default: 147 1.1 mrg return false; 148 1.1 mrg } 149 1.1 mrg } 150 1.1 mrg 151 1.1 mrg /* Map of basic blocks to gimple stmts. */ 152 1.1 mrg typedef hash_map<basic_block, gimple *> bb_stmt_map_t; 153 1.1 mrg 154 1.1 mrg /* Calls to OpenACC routines are made by all workers/wavefronts/warps, since 155 1.1 mrg the routine likely contains partitioned loops (else will do its own 156 1.1 mrg neutering and variable propagation). Return TRUE if a function call CALL 157 1.1 mrg should be made in (worker) single mode instead, rather than redundant 158 1.1 mrg mode. */ 159 1.1 mrg 160 1.1 mrg static bool 161 1.1 mrg omp_sese_active_worker_call (gcall *call) 162 1.1 mrg { 163 1.1 mrg #define GOMP_DIM_SEQ GOMP_DIM_MAX 164 1.1 mrg tree fndecl = gimple_call_fndecl (call); 165 1.1 mrg 166 1.1 mrg if (!fndecl) 167 1.1 mrg return true; 168 1.1 mrg 169 1.1 mrg tree attrs = oacc_get_fn_attrib (fndecl); 170 1.1 mrg 171 1.1 mrg if (!attrs) 172 1.1 mrg return true; 173 1.1 mrg 174 1.1 mrg int level = oacc_fn_attrib_level (attrs); 175 1.1 mrg 176 1.1 mrg /* Neither regular functions nor "seq" routines should be run by all threads 177 1.1 mrg in worker-single mode. */ 178 1.1 mrg return level == -1 || level == GOMP_DIM_SEQ; 179 1.1 mrg #undef GOMP_DIM_SEQ 180 1.1 mrg } 181 1.1 mrg 182 1.1 mrg /* Split basic blocks such that each forked and join unspecs are at 183 1.1 mrg the start of their basic blocks. Thus afterwards each block will 184 1.1 mrg have a single partitioning mode. We also do the same for return 185 1.1 mrg insns, as they are executed by every thread. Return the 186 1.1 mrg partitioning mode of the function as a whole. Populate MAP with 187 1.1 mrg head and tail blocks. We also clear the BB visited flag, which is 188 1.1 mrg used when finding partitions. */ 189 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_split_blocks'. */ 190 1.1 mrg 191 1.1 mrg static void 192 1.1 mrg omp_sese_split_blocks (bb_stmt_map_t *map) 193 1.1 mrg { 194 1.1 mrg auto_vec<gimple *> worklist; 195 1.1 mrg basic_block block; 196 1.1 mrg 197 1.1 mrg /* Locate all the reorg instructions of interest. */ 198 1.1 mrg FOR_ALL_BB_FN (block, cfun) 199 1.1 mrg { 200 1.1 mrg /* Clear visited flag, for use by parallel locator */ 201 1.1 mrg block->flags &= ~BB_VISITED; 202 1.1 mrg 203 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block); 204 1.1 mrg !gsi_end_p (gsi); 205 1.1 mrg gsi_next (&gsi)) 206 1.1 mrg { 207 1.1 mrg gimple *stmt = gsi_stmt (gsi); 208 1.1 mrg 209 1.1 mrg if (gimple_call_internal_p (stmt, IFN_UNIQUE)) 210 1.1 mrg { 211 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind) 212 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); 213 1.1 mrg 214 1.1 mrg if (k == IFN_UNIQUE_OACC_JOIN) 215 1.1 mrg worklist.safe_push (stmt); 216 1.1 mrg else if (k == IFN_UNIQUE_OACC_FORK) 217 1.1 mrg { 218 1.1 mrg gcc_assert (gsi_one_before_end_p (gsi)); 219 1.1 mrg basic_block forked_block = single_succ (block); 220 1.1 mrg gimple_stmt_iterator gsi2 = gsi_start_bb (forked_block); 221 1.1 mrg 222 1.1 mrg /* We push a NOP as a placeholder for the "forked" stmt. 223 1.1 mrg This is then recognized in omp_sese_find_par. */ 224 1.1 mrg gimple *nop = gimple_build_nop (); 225 1.1 mrg gsi_insert_before (&gsi2, nop, GSI_SAME_STMT); 226 1.1 mrg 227 1.1 mrg worklist.safe_push (nop); 228 1.1 mrg } 229 1.1 mrg } 230 1.1 mrg else if (gimple_code (stmt) == GIMPLE_RETURN 231 1.1 mrg || gimple_code (stmt) == GIMPLE_COND 232 1.1 mrg || gimple_code (stmt) == GIMPLE_SWITCH 233 1.1 mrg || (gimple_code (stmt) == GIMPLE_CALL 234 1.1 mrg && !gimple_call_internal_p (stmt) 235 1.1 mrg && !omp_sese_active_worker_call (as_a <gcall *> (stmt)))) 236 1.1 mrg worklist.safe_push (stmt); 237 1.1 mrg else if (is_gimple_assign (stmt)) 238 1.1 mrg { 239 1.1 mrg tree lhs = gimple_assign_lhs (stmt); 240 1.1 mrg 241 1.1 mrg /* Force assignments to components/fields/elements of local 242 1.1 mrg aggregates into fully-partitioned (redundant) mode. This 243 1.1 mrg avoids having to broadcast the whole aggregate. The RHS of 244 1.1 mrg the assignment will be propagated using the normal 245 1.1 mrg mechanism. */ 246 1.1 mrg 247 1.1 mrg switch (TREE_CODE (lhs)) 248 1.1 mrg { 249 1.1 mrg case COMPONENT_REF: 250 1.1 mrg case BIT_FIELD_REF: 251 1.1 mrg case ARRAY_REF: 252 1.1 mrg { 253 1.1 mrg tree aggr = TREE_OPERAND (lhs, 0); 254 1.1 mrg 255 1.1 mrg if (local_var_based_p (aggr)) 256 1.1 mrg worklist.safe_push (stmt); 257 1.1 mrg } 258 1.1 mrg break; 259 1.1 mrg 260 1.1 mrg default: 261 1.1 mrg ; 262 1.1 mrg } 263 1.1 mrg } 264 1.1 mrg } 265 1.1 mrg } 266 1.1 mrg 267 1.1 mrg /* Split blocks on the worklist. */ 268 1.1 mrg unsigned ix; 269 1.1 mrg gimple *stmt; 270 1.1 mrg 271 1.1 mrg for (ix = 0; worklist.iterate (ix, &stmt); ix++) 272 1.1 mrg { 273 1.1 mrg basic_block block = gimple_bb (stmt); 274 1.1 mrg 275 1.1 mrg if (gimple_code (stmt) == GIMPLE_COND) 276 1.1 mrg { 277 1.1 mrg gcond *orig_cond = as_a <gcond *> (stmt); 278 1.1 mrg tree_code code = gimple_expr_code (orig_cond); 279 1.1 mrg tree pred = make_ssa_name (boolean_type_node); 280 1.1 mrg gimple *asgn = gimple_build_assign (pred, code, 281 1.1 mrg gimple_cond_lhs (orig_cond), 282 1.1 mrg gimple_cond_rhs (orig_cond)); 283 1.1 mrg gcond *new_cond 284 1.1 mrg = gimple_build_cond (NE_EXPR, pred, boolean_false_node, 285 1.1 mrg gimple_cond_true_label (orig_cond), 286 1.1 mrg gimple_cond_false_label (orig_cond)); 287 1.1 mrg 288 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 289 1.1 mrg gsi_insert_before (&gsi, asgn, GSI_SAME_STMT); 290 1.1 mrg gsi_replace (&gsi, new_cond, true); 291 1.1 mrg 292 1.1 mrg edge e = split_block (block, asgn); 293 1.1 mrg block = e->dest; 294 1.1 mrg map->get_or_insert (block) = new_cond; 295 1.1 mrg } 296 1.1 mrg else if ((gimple_code (stmt) == GIMPLE_CALL 297 1.1 mrg && !gimple_call_internal_p (stmt)) 298 1.1 mrg || is_gimple_assign (stmt)) 299 1.1 mrg { 300 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 301 1.1 mrg gsi_prev (&gsi); 302 1.1 mrg 303 1.1 mrg edge call = split_block (block, gsi_stmt (gsi)); 304 1.1 mrg 305 1.1 mrg gimple *call_stmt = gsi_stmt (gsi_start_bb (call->dest)); 306 1.1 mrg 307 1.1 mrg edge call_to_ret = split_block (call->dest, call_stmt); 308 1.1 mrg 309 1.1 mrg map->get_or_insert (call_to_ret->src) = call_stmt; 310 1.1 mrg } 311 1.1 mrg else 312 1.1 mrg { 313 1.1 mrg gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 314 1.1 mrg gsi_prev (&gsi); 315 1.1 mrg 316 1.1 mrg if (gsi_end_p (gsi)) 317 1.1 mrg map->get_or_insert (block) = stmt; 318 1.1 mrg else 319 1.1 mrg { 320 1.1 mrg /* Split block before insn. The insn is in the new block. */ 321 1.1 mrg edge e = split_block (block, gsi_stmt (gsi)); 322 1.1 mrg 323 1.1 mrg block = e->dest; 324 1.1 mrg map->get_or_insert (block) = stmt; 325 1.1 mrg } 326 1.1 mrg } 327 1.1 mrg } 328 1.1 mrg } 329 1.1 mrg 330 1.1 mrg static const char * 331 1.1 mrg mask_name (unsigned mask) 332 1.1 mrg { 333 1.1 mrg switch (mask) 334 1.1 mrg { 335 1.1 mrg case 0: return "gang redundant"; 336 1.1 mrg case 1: return "gang partitioned"; 337 1.1 mrg case 2: return "worker partitioned"; 338 1.1 mrg case 3: return "gang+worker partitioned"; 339 1.1 mrg case 4: return "vector partitioned"; 340 1.1 mrg case 5: return "gang+vector partitioned"; 341 1.1 mrg case 6: return "worker+vector partitioned"; 342 1.1 mrg case 7: return "fully partitioned"; 343 1.1 mrg default: return "<illegal>"; 344 1.1 mrg } 345 1.1 mrg } 346 1.1 mrg 347 1.1 mrg /* Dump this parallel and all its inner parallels. */ 348 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_dump_pars'. */ 349 1.1 mrg 350 1.1 mrg static void 351 1.1 mrg omp_sese_dump_pars (parallel_g *par, unsigned depth) 352 1.1 mrg { 353 1.1 mrg fprintf (dump_file, "%u: mask %d (%s) head=%d, tail=%d\n", 354 1.1 mrg depth, par->mask, mask_name (par->mask), 355 1.1 mrg par->forked_block ? par->forked_block->index : -1, 356 1.1 mrg par->join_block ? par->join_block->index : -1); 357 1.1 mrg 358 1.1 mrg fprintf (dump_file, " blocks:"); 359 1.1 mrg 360 1.1 mrg basic_block block; 361 1.1 mrg for (unsigned ix = 0; par->blocks.iterate (ix, &block); ix++) 362 1.1 mrg fprintf (dump_file, " %d", block->index); 363 1.1 mrg fprintf (dump_file, "\n"); 364 1.1 mrg if (par->inner) 365 1.1 mrg omp_sese_dump_pars (par->inner, depth + 1); 366 1.1 mrg 367 1.1 mrg if (par->next) 368 1.1 mrg omp_sese_dump_pars (par->next, depth); 369 1.1 mrg } 370 1.1 mrg 371 1.1 mrg /* If BLOCK contains a fork/join marker, process it to create or 372 1.1 mrg terminate a loop structure. Add this block to the current loop, 373 1.1 mrg and then walk successor blocks. */ 374 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_find_par'. */ 375 1.1 mrg 376 1.1 mrg static parallel_g * 377 1.1 mrg omp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block) 378 1.1 mrg { 379 1.1 mrg if (block->flags & BB_VISITED) 380 1.1 mrg return par; 381 1.1 mrg block->flags |= BB_VISITED; 382 1.1 mrg 383 1.1 mrg if (gimple **stmtp = map->get (block)) 384 1.1 mrg { 385 1.1 mrg gimple *stmt = *stmtp; 386 1.1 mrg 387 1.1 mrg if (gimple_code (stmt) == GIMPLE_COND 388 1.1 mrg || gimple_code (stmt) == GIMPLE_SWITCH 389 1.1 mrg || gimple_code (stmt) == GIMPLE_RETURN 390 1.1 mrg || (gimple_code (stmt) == GIMPLE_CALL 391 1.1 mrg && !gimple_call_internal_p (stmt)) 392 1.1 mrg || is_gimple_assign (stmt)) 393 1.1 mrg { 394 1.1 mrg /* A single block that is forced to be at the maximum partition 395 1.1 mrg level. Make a singleton par for it. */ 396 1.1 mrg par = new parallel_g (par, GOMP_DIM_MASK (GOMP_DIM_GANG) 397 1.1 mrg | GOMP_DIM_MASK (GOMP_DIM_WORKER) 398 1.1 mrg | GOMP_DIM_MASK (GOMP_DIM_VECTOR)); 399 1.1 mrg par->forked_block = block; 400 1.1 mrg par->forked_stmt = stmt; 401 1.1 mrg par->blocks.safe_push (block); 402 1.1 mrg par = par->parent; 403 1.1 mrg goto walk_successors; 404 1.1 mrg } 405 1.1 mrg else if (gimple_nop_p (stmt)) 406 1.1 mrg { 407 1.1 mrg basic_block pred = single_pred (block); 408 1.1 mrg gcc_assert (pred); 409 1.1 mrg gimple_stmt_iterator gsi = gsi_last_bb (pred); 410 1.1 mrg gimple *final_stmt = gsi_stmt (gsi); 411 1.1 mrg 412 1.1 mrg if (gimple_call_internal_p (final_stmt, IFN_UNIQUE)) 413 1.1 mrg { 414 1.1 mrg gcall *call = as_a <gcall *> (final_stmt); 415 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind) 416 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (call, 0))); 417 1.1 mrg 418 1.1 mrg if (k == IFN_UNIQUE_OACC_FORK) 419 1.1 mrg { 420 1.1 mrg HOST_WIDE_INT dim 421 1.1 mrg = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); 422 1.1 mrg unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0; 423 1.1 mrg 424 1.1 mrg par = new parallel_g (par, mask); 425 1.1 mrg par->forked_block = block; 426 1.1 mrg par->forked_stmt = final_stmt; 427 1.1 mrg par->fork_stmt = stmt; 428 1.1 mrg } 429 1.1 mrg else 430 1.1 mrg gcc_unreachable (); 431 1.1 mrg } 432 1.1 mrg else 433 1.1 mrg gcc_unreachable (); 434 1.1 mrg } 435 1.1 mrg else if (gimple_call_internal_p (stmt, IFN_UNIQUE)) 436 1.1 mrg { 437 1.1 mrg gcall *call = as_a <gcall *> (stmt); 438 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind) 439 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (call, 0))); 440 1.1 mrg if (k == IFN_UNIQUE_OACC_JOIN) 441 1.1 mrg { 442 1.1 mrg HOST_WIDE_INT dim = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2)); 443 1.1 mrg unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0; 444 1.1 mrg 445 1.1 mrg gcc_assert (par->mask == mask); 446 1.1 mrg par->join_block = block; 447 1.1 mrg par->join_stmt = stmt; 448 1.1 mrg par = par->parent; 449 1.1 mrg } 450 1.1 mrg else 451 1.1 mrg gcc_unreachable (); 452 1.1 mrg } 453 1.1 mrg else 454 1.1 mrg gcc_unreachable (); 455 1.1 mrg } 456 1.1 mrg 457 1.1 mrg if (par) 458 1.1 mrg /* Add this block onto the current loop's list of blocks. */ 459 1.1 mrg par->blocks.safe_push (block); 460 1.1 mrg else 461 1.1 mrg /* This must be the entry block. Create a NULL parallel. */ 462 1.1 mrg par = new parallel_g (0, 0); 463 1.1 mrg 464 1.1 mrg walk_successors: 465 1.1 mrg /* Walk successor blocks. */ 466 1.1 mrg edge e; 467 1.1 mrg edge_iterator ei; 468 1.1 mrg 469 1.1 mrg FOR_EACH_EDGE (e, ei, block->succs) 470 1.1 mrg omp_sese_find_par (map, par, e->dest); 471 1.1 mrg 472 1.1 mrg return par; 473 1.1 mrg } 474 1.1 mrg 475 1.1 mrg /* DFS walk the CFG looking for fork & join markers. Construct 476 1.1 mrg loop structures as we go. MAP is a mapping of basic blocks 477 1.1 mrg to head & tail markers, discovered when splitting blocks. This 478 1.1 mrg speeds up the discovery. We rely on the BB visited flag having 479 1.1 mrg been cleared when splitting blocks. */ 480 1.1 mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_discover_pars'. */ 481 1.1 mrg 482 1.1 mrg static parallel_g * 483 1.1 mrg omp_sese_discover_pars (bb_stmt_map_t *map) 484 1.1 mrg { 485 1.1 mrg basic_block block; 486 1.1 mrg 487 1.1 mrg /* Mark exit blocks as visited. */ 488 1.1 mrg block = EXIT_BLOCK_PTR_FOR_FN (cfun); 489 1.1 mrg block->flags |= BB_VISITED; 490 1.1 mrg 491 1.1 mrg /* And entry block as not. */ 492 1.1 mrg block = ENTRY_BLOCK_PTR_FOR_FN (cfun); 493 1.1 mrg block->flags &= ~BB_VISITED; 494 1.1 mrg 495 1.1 mrg parallel_g *par = omp_sese_find_par (map, 0, block); 496 1.1 mrg 497 1.1 mrg if (dump_file) 498 1.1 mrg { 499 1.1 mrg fprintf (dump_file, "\nLoops\n"); 500 1.1 mrg omp_sese_dump_pars (par, 0); 501 1.1 mrg fprintf (dump_file, "\n"); 502 1.1 mrg } 503 1.1 mrg 504 1.1 mrg return par; 505 1.1 mrg } 506 1.1 mrg 507 1.1 mrg static void 508 1.1 mrg populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single, 509 1.1 mrg bitmap vector_single, unsigned outer_mask, 510 1.1 mrg int depth) 511 1.1 mrg { 512 1.1 mrg unsigned mask = outer_mask | par->mask; 513 1.1 mrg 514 1.1 mrg basic_block block; 515 1.1 mrg 516 1.1 mrg for (unsigned i = 0; par->blocks.iterate (i, &block); i++) 517 1.1 mrg { 518 1.1 mrg if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0) 519 1.1 mrg bitmap_set_bit (worker_single, block->index); 520 1.1 mrg 521 1.1 mrg if ((mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) == 0) 522 1.1 mrg bitmap_set_bit (vector_single, block->index); 523 1.1 mrg } 524 1.1 mrg 525 1.1 mrg if (par->inner) 526 1.1 mrg populate_single_mode_bitmaps (par->inner, worker_single, vector_single, 527 1.1 mrg mask, depth + 1); 528 1.1 mrg if (par->next) 529 1.1 mrg populate_single_mode_bitmaps (par->next, worker_single, vector_single, 530 1.1 mrg outer_mask, depth); 531 1.1 mrg } 532 1.1 mrg 533 1.1 mrg /* A map from SSA names or var decls to record fields. */ 534 1.1 mrg 535 1.1 mrg typedef hash_map<tree, tree> field_map_t; 536 1.1 mrg 537 1.1 mrg /* For each propagation record type, this is a map from SSA names or var decls 538 1.1 mrg to propagate, to the field in the record type that should be used for 539 1.1 mrg transmission and reception. */ 540 1.1 mrg 541 1.1 mrg typedef hash_map<tree, field_map_t> record_field_map_t; 542 1.1 mrg 543 1.1 mrg static void 544 1.1 mrg install_var_field (tree var, tree record_type, field_map_t *fields) 545 1.1 mrg { 546 1.1 mrg tree name; 547 1.1 mrg char tmp[20]; 548 1.1 mrg 549 1.1 mrg if (TREE_CODE (var) == SSA_NAME) 550 1.1 mrg { 551 1.1 mrg name = SSA_NAME_IDENTIFIER (var); 552 1.1 mrg if (!name) 553 1.1 mrg { 554 1.1 mrg sprintf (tmp, "_%u", (unsigned) SSA_NAME_VERSION (var)); 555 1.1 mrg name = get_identifier (tmp); 556 1.1 mrg } 557 1.1 mrg } 558 1.1 mrg else if (TREE_CODE (var) == VAR_DECL) 559 1.1 mrg { 560 1.1 mrg name = DECL_NAME (var); 561 1.1 mrg if (!name) 562 1.1 mrg { 563 1.1 mrg sprintf (tmp, "D_%u", (unsigned) DECL_UID (var)); 564 1.1 mrg name = get_identifier (tmp); 565 1.1 mrg } 566 1.1 mrg } 567 1.1 mrg else 568 1.1 mrg gcc_unreachable (); 569 1.1 mrg 570 1.1 mrg gcc_assert (!fields->get (var)); 571 1.1 mrg 572 1.1 mrg tree type = TREE_TYPE (var); 573 1.1 mrg 574 1.1 mrg if (POINTER_TYPE_P (type) 575 1.1 mrg && TYPE_RESTRICT (type)) 576 1.1 mrg type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT); 577 1.1 mrg 578 1.1 mrg tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, name, type); 579 1.1 mrg 580 1.1 mrg if (TREE_CODE (var) == VAR_DECL && type == TREE_TYPE (var)) 581 1.1 mrg { 582 1.1 mrg SET_DECL_ALIGN (field, DECL_ALIGN (var)); 583 1.1 mrg DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var); 584 1.1 mrg TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var); 585 1.1 mrg } 586 1.1 mrg else 587 1.1 mrg SET_DECL_ALIGN (field, TYPE_ALIGN (type)); 588 1.1 mrg 589 1.1 mrg fields->put (var, field); 590 1.1 mrg 591 1.1 mrg insert_field_into_struct (record_type, field); 592 1.1 mrg } 593 1.1 mrg 594 1.1 mrg /* Sets of SSA_NAMES or VAR_DECLs to propagate. */ 595 1.1 mrg typedef hash_set<tree> propagation_set; 596 1.1 mrg 597 1.1 mrg static void 598 1.1 mrg find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask, 599 1.1 mrg bitmap worker_single, bitmap vector_single, 600 1.1 mrg vec<propagation_set *> *prop_set) 601 1.1 mrg { 602 1.1 mrg unsigned mask = outer_mask | par->mask; 603 1.1 mrg 604 1.1 mrg if (par->inner) 605 1.1 mrg find_ssa_names_to_propagate (par->inner, mask, worker_single, 606 1.1 mrg vector_single, prop_set); 607 1.1 mrg if (par->next) 608 1.1 mrg find_ssa_names_to_propagate (par->next, outer_mask, worker_single, 609 1.1 mrg vector_single, prop_set); 610 1.1 mrg 611 1.1 mrg if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) 612 1.1 mrg { 613 1.1 mrg basic_block block; 614 1.1 mrg int ix; 615 1.1 mrg 616 1.1 mrg for (ix = 0; par->blocks.iterate (ix, &block); ix++) 617 1.1 mrg { 618 1.1 mrg for (gphi_iterator psi = gsi_start_phis (block); 619 1.1 mrg !gsi_end_p (psi); gsi_next (&psi)) 620 1.1 mrg { 621 1.1 mrg gphi *phi = psi.phi (); 622 1.1 mrg use_operand_p use; 623 1.1 mrg ssa_op_iter iter; 624 1.1 mrg 625 1.1 mrg FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE) 626 1.1 mrg { 627 1.1 mrg tree var = USE_FROM_PTR (use); 628 1.1 mrg 629 1.1 mrg if (TREE_CODE (var) != SSA_NAME) 630 1.1 mrg continue; 631 1.1 mrg 632 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (var); 633 1.1 mrg 634 1.1 mrg if (gimple_nop_p (def_stmt)) 635 1.1 mrg continue; 636 1.1 mrg 637 1.1 mrg basic_block def_bb = gimple_bb (def_stmt); 638 1.1 mrg 639 1.1 mrg if (bitmap_bit_p (worker_single, def_bb->index)) 640 1.1 mrg { 641 1.1 mrg if (!(*prop_set)[def_bb->index]) 642 1.1 mrg (*prop_set)[def_bb->index] = new propagation_set; 643 1.1 mrg 644 1.1 mrg propagation_set *ws_prop = (*prop_set)[def_bb->index]; 645 1.1 mrg 646 1.1 mrg ws_prop->add (var); 647 1.1 mrg } 648 1.1 mrg } 649 1.1 mrg } 650 1.1 mrg 651 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block); 652 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi)) 653 1.1 mrg { 654 1.1 mrg use_operand_p use; 655 1.1 mrg ssa_op_iter iter; 656 1.1 mrg gimple *stmt = gsi_stmt (gsi); 657 1.1 mrg 658 1.1 mrg FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) 659 1.1 mrg { 660 1.1 mrg tree var = USE_FROM_PTR (use); 661 1.1 mrg 662 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (var); 663 1.1 mrg 664 1.1 mrg if (gimple_nop_p (def_stmt)) 665 1.1 mrg continue; 666 1.1 mrg 667 1.1 mrg basic_block def_bb = gimple_bb (def_stmt); 668 1.1 mrg 669 1.1 mrg if (bitmap_bit_p (worker_single, def_bb->index)) 670 1.1 mrg { 671 1.1 mrg if (!(*prop_set)[def_bb->index]) 672 1.1 mrg (*prop_set)[def_bb->index] = new propagation_set; 673 1.1 mrg 674 1.1 mrg propagation_set *ws_prop = (*prop_set)[def_bb->index]; 675 1.1 mrg 676 1.1 mrg ws_prop->add (var); 677 1.1 mrg } 678 1.1 mrg } 679 1.1 mrg } 680 1.1 mrg } 681 1.1 mrg } 682 1.1 mrg } 683 1.1 mrg 684 1.1 mrg /* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a 685 1.1 mrg statement. */ 686 1.1 mrg 687 1.1 mrg static tree 688 1.1 mrg find_partitioned_var_uses_1 (tree *node, int *, void *data) 689 1.1 mrg { 690 1.1 mrg walk_stmt_info *wi = (walk_stmt_info *) data; 691 1.1 mrg hash_set<tree> *partitioned_var_uses = (hash_set<tree> *) wi->info; 692 1.1 mrg 693 1.1 mrg if (!wi->is_lhs && VAR_P (*node)) 694 1.1 mrg partitioned_var_uses->add (*node); 695 1.1 mrg 696 1.1 mrg return NULL_TREE; 697 1.1 mrg } 698 1.1 mrg 699 1.1 mrg static void 700 1.1 mrg find_partitioned_var_uses (parallel_g *par, unsigned outer_mask, 701 1.1 mrg hash_set<tree> *partitioned_var_uses) 702 1.1 mrg { 703 1.1 mrg unsigned mask = outer_mask | par->mask; 704 1.1 mrg 705 1.1 mrg if (par->inner) 706 1.1 mrg find_partitioned_var_uses (par->inner, mask, partitioned_var_uses); 707 1.1 mrg if (par->next) 708 1.1 mrg find_partitioned_var_uses (par->next, outer_mask, partitioned_var_uses); 709 1.1 mrg 710 1.1 mrg if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) 711 1.1 mrg { 712 1.1 mrg basic_block block; 713 1.1 mrg int ix; 714 1.1 mrg 715 1.1 mrg for (ix = 0; par->blocks.iterate (ix, &block); ix++) 716 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block); 717 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi)) 718 1.1 mrg { 719 1.1 mrg walk_stmt_info wi; 720 1.1 mrg memset (&wi, 0, sizeof (wi)); 721 1.1 mrg wi.info = (void *) partitioned_var_uses; 722 1.1 mrg walk_gimple_stmt (&gsi, NULL, find_partitioned_var_uses_1, &wi); 723 1.1 mrg } 724 1.1 mrg } 725 1.1 mrg } 726 1.1 mrg 727 1.1 mrg /* Gang-private variables (typically placed in a GPU's shared memory) do not 728 1.1 mrg need to be processed by the worker-propagation mechanism. Populate the 729 1.1 mrg GANG_PRIVATE_VARS set with any such variables found in the current 730 1.1 mrg function. */ 731 1.1 mrg 732 1.1 mrg static void 733 1.1 mrg find_gang_private_vars (hash_set<tree> *gang_private_vars) 734 1.1 mrg { 735 1.1 mrg basic_block block; 736 1.1 mrg 737 1.1 mrg FOR_EACH_BB_FN (block, cfun) 738 1.1 mrg { 739 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block); 740 1.1 mrg !gsi_end_p (gsi); 741 1.1 mrg gsi_next (&gsi)) 742 1.1 mrg { 743 1.1 mrg gimple *stmt = gsi_stmt (gsi); 744 1.1 mrg 745 1.1 mrg if (gimple_call_internal_p (stmt, IFN_UNIQUE)) 746 1.1 mrg { 747 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind) 748 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); 749 1.1 mrg if (k == IFN_UNIQUE_OACC_PRIVATE) 750 1.1 mrg { 751 1.1 mrg HOST_WIDE_INT level 752 1.1 mrg = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2)); 753 1.1 mrg if (level != GOMP_DIM_GANG) 754 1.1 mrg continue; 755 1.1 mrg for (unsigned i = 3; i < gimple_call_num_args (stmt); i++) 756 1.1 mrg { 757 1.1 mrg tree arg = gimple_call_arg (stmt, i); 758 1.1 mrg gcc_assert (TREE_CODE (arg) == ADDR_EXPR); 759 1.1 mrg tree decl = TREE_OPERAND (arg, 0); 760 1.1 mrg gang_private_vars->add (decl); 761 1.1 mrg } 762 1.1 mrg } 763 1.1 mrg } 764 1.1 mrg } 765 1.1 mrg } 766 1.1 mrg } 767 1.1 mrg 768 1.1 mrg static void 769 1.1 mrg find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask, 770 1.1 mrg hash_set<tree> *partitioned_var_uses, 771 1.1 mrg hash_set<tree> *gang_private_vars, 772 1.1 mrg bitmap writes_gang_private, 773 1.1 mrg vec<propagation_set *> *prop_set) 774 1.1 mrg { 775 1.1 mrg unsigned mask = outer_mask | par->mask; 776 1.1 mrg 777 1.1 mrg if (par->inner) 778 1.1 mrg find_local_vars_to_propagate (par->inner, mask, partitioned_var_uses, 779 1.1 mrg gang_private_vars, writes_gang_private, 780 1.1 mrg prop_set); 781 1.1 mrg if (par->next) 782 1.1 mrg find_local_vars_to_propagate (par->next, outer_mask, partitioned_var_uses, 783 1.1 mrg gang_private_vars, writes_gang_private, 784 1.1 mrg prop_set); 785 1.1 mrg 786 1.1 mrg if (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))) 787 1.1 mrg { 788 1.1 mrg basic_block block; 789 1.1 mrg int ix; 790 1.1 mrg 791 1.1 mrg for (ix = 0; par->blocks.iterate (ix, &block); ix++) 792 1.1 mrg { 793 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block); 794 1.1 mrg !gsi_end_p (gsi); gsi_next (&gsi)) 795 1.1 mrg { 796 1.1 mrg gimple *stmt = gsi_stmt (gsi); 797 1.1 mrg tree var; 798 1.1 mrg unsigned i; 799 1.1 mrg 800 1.1 mrg FOR_EACH_LOCAL_DECL (cfun, i, var) 801 1.1 mrg { 802 1.1 mrg if (!VAR_P (var) 803 1.1 mrg || is_global_var (var) 804 1.1 mrg || AGGREGATE_TYPE_P (TREE_TYPE (var)) 805 1.1 mrg || !partitioned_var_uses->contains (var)) 806 1.1 mrg continue; 807 1.1 mrg 808 1.1 mrg if (stmt_may_clobber_ref_p (stmt, var)) 809 1.1 mrg { 810 1.1 mrg if (dump_file) 811 1.1 mrg { 812 1.1 mrg fprintf (dump_file, "bb %u: local variable may be " 813 1.1 mrg "clobbered in %s mode: ", block->index, 814 1.1 mrg mask_name (mask)); 815 1.1 mrg print_generic_expr (dump_file, var, TDF_SLIM); 816 1.1 mrg fprintf (dump_file, "\n"); 817 1.1 mrg } 818 1.1 mrg 819 1.1 mrg if (gang_private_vars->contains (var)) 820 1.1 mrg { 821 1.1 mrg /* If we write a gang-private variable, we want a 822 1.1 mrg barrier at the end of the block. */ 823 1.1 mrg bitmap_set_bit (writes_gang_private, block->index); 824 1.1 mrg continue; 825 1.1 mrg } 826 1.1 mrg 827 1.1 mrg if (!(*prop_set)[block->index]) 828 1.1 mrg (*prop_set)[block->index] = new propagation_set; 829 1.1 mrg 830 1.1 mrg propagation_set *ws_prop 831 1.1 mrg = (*prop_set)[block->index]; 832 1.1 mrg 833 1.1 mrg ws_prop->add (var); 834 1.1 mrg } 835 1.1 mrg } 836 1.1 mrg } 837 1.1 mrg } 838 1.1 mrg } 839 1.1 mrg } 840 1.1 mrg 841 1.1 mrg /* Transform basic blocks FROM, TO (which may be the same block) into: 842 1.1 mrg if (GOACC_single_start ()) 843 1.1 mrg BLOCK; 844 1.1 mrg GOACC_barrier (); 845 1.1 mrg \ | / 846 1.1 mrg +----+ 847 1.1 mrg | | (new) predicate block 848 1.1 mrg +----+-- 849 1.1 mrg \ | / \ | / |t \ 850 1.1 mrg +----+ +----+ +----+ | 851 1.1 mrg | | | | ===> | | | f (old) from block 852 1.1 mrg +----+ +----+ +----+ | 853 1.1 mrg | t/ \f | / 854 1.1 mrg +----+/ 855 1.1 mrg (split (split before | | skip block 856 1.1 mrg at end) condition) +----+ 857 1.1 mrg t/ \f 858 1.1 mrg */ 859 1.1 mrg 860 1.1 mrg static void 861 1.1 mrg worker_single_simple (basic_block from, basic_block to, 862 1.1 mrg hash_set<tree> *def_escapes_block) 863 1.1 mrg { 864 1.1 mrg gimple *call, *cond; 865 1.1 mrg tree lhs, decl; 866 1.1 mrg basic_block skip_block; 867 1.1 mrg 868 1.1 mrg gimple_stmt_iterator gsi = gsi_last_bb (to); 869 1.1 mrg if (EDGE_COUNT (to->succs) > 1) 870 1.1 mrg { 871 1.1 mrg gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND); 872 1.1 mrg gsi_prev (&gsi); 873 1.1 mrg } 874 1.1 mrg edge e = split_block (to, gsi_stmt (gsi)); 875 1.1 mrg skip_block = e->dest; 876 1.1 mrg 877 1.1 mrg gimple_stmt_iterator start = gsi_after_labels (from); 878 1.1 mrg 879 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_START); 880 1.1 mrg lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl))); 881 1.1 mrg call = gimple_build_call (decl, 0); 882 1.1 mrg gimple_call_set_lhs (call, lhs); 883 1.1 mrg gsi_insert_before (&start, call, GSI_NEW_STMT); 884 1.1 mrg update_stmt (call); 885 1.1 mrg 886 1.1 mrg cond = gimple_build_cond (EQ_EXPR, lhs, 887 1.1 mrg fold_convert_loc (UNKNOWN_LOCATION, 888 1.1 mrg TREE_TYPE (lhs), 889 1.1 mrg boolean_true_node), 890 1.1 mrg NULL_TREE, NULL_TREE); 891 1.1 mrg gsi_insert_after (&start, cond, GSI_NEW_STMT); 892 1.1 mrg update_stmt (cond); 893 1.1 mrg 894 1.1 mrg edge et = split_block (from, cond); 895 1.1 mrg et->flags &= ~EDGE_FALLTHRU; 896 1.1 mrg et->flags |= EDGE_TRUE_VALUE; 897 1.1 mrg /* Make the active worker the more probable path so we prefer fallthrough 898 1.1 mrg (letting the idle workers jump around more). */ 899 1.1 mrg et->probability = profile_probability::likely (); 900 1.1 mrg 901 1.1 mrg edge ef = make_edge (from, skip_block, EDGE_FALSE_VALUE); 902 1.1 mrg ef->probability = et->probability.invert (); 903 1.1 mrg 904 1.1 mrg basic_block neutered = split_edge (ef); 905 1.1 mrg gimple_stmt_iterator neut_gsi = gsi_last_bb (neutered); 906 1.1 mrg 907 1.1 mrg for (gsi = gsi_start_bb (et->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 908 1.1 mrg { 909 1.1 mrg gimple *stmt = gsi_stmt (gsi); 910 1.1 mrg ssa_op_iter iter; 911 1.1 mrg tree var; 912 1.1 mrg 913 1.1 mrg FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF) 914 1.1 mrg { 915 1.1 mrg if (def_escapes_block->contains (var)) 916 1.1 mrg { 917 1.1 mrg gphi *join_phi = create_phi_node (NULL_TREE, skip_block); 918 1.1 mrg create_new_def_for (var, join_phi, 919 1.1 mrg gimple_phi_result_ptr (join_phi)); 920 1.1 mrg add_phi_arg (join_phi, var, e, UNKNOWN_LOCATION); 921 1.1 mrg 922 1.1 mrg tree neutered_def = copy_ssa_name (var, NULL); 923 1.1 mrg /* We really want "don't care" or some value representing 924 1.1 mrg undefined here, but optimizers will probably get rid of the 925 1.1 mrg zero-assignments anyway. */ 926 1.1 mrg gassign *zero = gimple_build_assign (neutered_def, 927 1.1 mrg build_zero_cst (TREE_TYPE (neutered_def))); 928 1.1 mrg 929 1.1 mrg gsi_insert_after (&neut_gsi, zero, GSI_CONTINUE_LINKING); 930 1.1 mrg update_stmt (zero); 931 1.1 mrg 932 1.1 mrg add_phi_arg (join_phi, neutered_def, single_succ_edge (neutered), 933 1.1 mrg UNKNOWN_LOCATION); 934 1.1 mrg update_stmt (join_phi); 935 1.1 mrg } 936 1.1 mrg } 937 1.1 mrg } 938 1.1 mrg } 939 1.1 mrg 940 1.1 mrg static tree 941 1.1 mrg build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields) 942 1.1 mrg { 943 1.1 mrg tree x = build_simple_mem_ref (receiver_decl); 944 1.1 mrg tree field = *fields->get (var); 945 1.1 mrg TREE_THIS_NOTRAP (x) = 1; 946 1.1 mrg x = omp_build_component_ref (x, field); 947 1.1 mrg return x; 948 1.1 mrg } 949 1.1 mrg 950 1.1 mrg static tree 951 1.1 mrg build_sender_ref (tree var, tree sender_decl, field_map_t *fields) 952 1.1 mrg { 953 1.1 mrg if (POINTER_TYPE_P (TREE_TYPE (sender_decl))) 954 1.1 mrg sender_decl = build_simple_mem_ref (sender_decl); 955 1.1 mrg tree field = *fields->get (var); 956 1.1 mrg return omp_build_component_ref (sender_decl, field); 957 1.1 mrg } 958 1.1 mrg 959 1.1 mrg static int 960 1.1 mrg sort_by_ssa_version_or_uid (const void *p1, const void *p2) 961 1.1 mrg { 962 1.1 mrg const tree t1 = *(const tree *)p1; 963 1.1 mrg const tree t2 = *(const tree *)p2; 964 1.1 mrg 965 1.1 mrg if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) == SSA_NAME) 966 1.1 mrg return SSA_NAME_VERSION (t1) - SSA_NAME_VERSION (t2); 967 1.1 mrg else if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) != SSA_NAME) 968 1.1 mrg return -1; 969 1.1 mrg else if (TREE_CODE (t1) != SSA_NAME && TREE_CODE (t2) == SSA_NAME) 970 1.1 mrg return 1; 971 1.1 mrg else 972 1.1 mrg return DECL_UID (t1) - DECL_UID (t2); 973 1.1 mrg } 974 1.1 mrg 975 1.1 mrg static int 976 1.1 mrg sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2) 977 1.1 mrg { 978 1.1 mrg const tree t1 = *(const tree *)p1; 979 1.1 mrg const tree t2 = *(const tree *)p2; 980 1.1 mrg unsigned HOST_WIDE_INT s1 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1))); 981 1.1 mrg unsigned HOST_WIDE_INT s2 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2))); 982 1.1 mrg if (s1 != s2) 983 1.1 mrg return s2 - s1; 984 1.1 mrg else 985 1.1 mrg return sort_by_ssa_version_or_uid (p1, p2); 986 1.1 mrg } 987 1.1 mrg 988 1.1 mrg static void 989 1.1 mrg worker_single_copy (basic_block from, basic_block to, 990 1.1 mrg hash_set<tree> *def_escapes_block, 991 1.1 mrg hash_set<tree> *worker_partitioned_uses, 992 1.1 mrg tree record_type, record_field_map_t *record_field_map, 993 1.1 mrg unsigned HOST_WIDE_INT placement, 994 1.1 mrg bool isolate_broadcasts, bool has_gang_private_write) 995 1.1 mrg { 996 1.1 mrg /* If we only have virtual defs, we'll have no record type, but we still want 997 1.1 mrg to emit single_copy_start and (particularly) single_copy_end to act as 998 1.1 mrg a vdef source on the neutered edge representing memory writes on the 999 1.1 mrg non-neutered edge. */ 1000 1.1 mrg if (!record_type) 1001 1.1 mrg record_type = char_type_node; 1002 1.1 mrg 1003 1.1 mrg tree sender_decl 1004 1.1 mrg = targetm.goacc.create_worker_broadcast_record (record_type, true, 1005 1.1 mrg ".oacc_worker_o", 1006 1.1 mrg placement); 1007 1.1 mrg tree receiver_decl 1008 1.1 mrg = targetm.goacc.create_worker_broadcast_record (record_type, false, 1009 1.1 mrg ".oacc_worker_i", 1010 1.1 mrg placement); 1011 1.1 mrg 1012 1.1 mrg gimple_stmt_iterator gsi = gsi_last_bb (to); 1013 1.1 mrg if (EDGE_COUNT (to->succs) > 1) 1014 1.1 mrg gsi_prev (&gsi); 1015 1.1 mrg edge e = split_block (to, gsi_stmt (gsi)); 1016 1.1 mrg basic_block barrier_block = e->dest; 1017 1.1 mrg 1018 1.1 mrg gimple_stmt_iterator start = gsi_after_labels (from); 1019 1.1 mrg 1020 1.1 mrg tree decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START); 1021 1.1 mrg 1022 1.1 mrg tree lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl))); 1023 1.1 mrg 1024 1.1 mrg gimple *call 1025 1.1 mrg = gimple_build_call (decl, 1, 1026 1.1 mrg POINTER_TYPE_P (TREE_TYPE (sender_decl)) 1027 1.1 mrg ? sender_decl : build_fold_addr_expr (sender_decl)); 1028 1.1 mrg gimple_call_set_lhs (call, lhs); 1029 1.1 mrg gsi_insert_before (&start, call, GSI_NEW_STMT); 1030 1.1 mrg update_stmt (call); 1031 1.1 mrg 1032 1.1 mrg /* The shared-memory range for this block overflowed. Add a barrier before 1033 1.1 mrg the GOACC_single_copy_start call. */ 1034 1.1 mrg if (isolate_broadcasts) 1035 1.1 mrg { 1036 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); 1037 1.1 mrg gimple *acc_bar = gimple_build_call (decl, 0); 1038 1.1 mrg gsi_insert_before (&start, acc_bar, GSI_SAME_STMT); 1039 1.1 mrg } 1040 1.1 mrg 1041 1.1 mrg tree conv_tmp = make_ssa_name (TREE_TYPE (receiver_decl)); 1042 1.1 mrg 1043 1.1 mrg gimple *conv = gimple_build_assign (conv_tmp, 1044 1.1 mrg fold_convert (TREE_TYPE (receiver_decl), 1045 1.1 mrg lhs)); 1046 1.1 mrg update_stmt (conv); 1047 1.1 mrg gsi_insert_after (&start, conv, GSI_NEW_STMT); 1048 1.1 mrg gimple *asgn = gimple_build_assign (receiver_decl, conv_tmp); 1049 1.1 mrg gsi_insert_after (&start, asgn, GSI_NEW_STMT); 1050 1.1 mrg update_stmt (asgn); 1051 1.1 mrg 1052 1.1 mrg tree zero_ptr = build_int_cst (TREE_TYPE (receiver_decl), 0); 1053 1.1 mrg 1054 1.1 mrg tree recv_tmp = make_ssa_name (TREE_TYPE (receiver_decl)); 1055 1.1 mrg asgn = gimple_build_assign (recv_tmp, receiver_decl); 1056 1.1 mrg gsi_insert_after (&start, asgn, GSI_NEW_STMT); 1057 1.1 mrg update_stmt (asgn); 1058 1.1 mrg 1059 1.1 mrg gimple *cond = gimple_build_cond (EQ_EXPR, recv_tmp, zero_ptr, NULL_TREE, 1060 1.1 mrg NULL_TREE); 1061 1.1 mrg update_stmt (cond); 1062 1.1 mrg 1063 1.1 mrg gsi_insert_after (&start, cond, GSI_NEW_STMT); 1064 1.1 mrg 1065 1.1 mrg edge et = split_block (from, cond); 1066 1.1 mrg et->flags &= ~EDGE_FALLTHRU; 1067 1.1 mrg et->flags |= EDGE_TRUE_VALUE; 1068 1.1 mrg /* Make the active worker the more probable path so we prefer fallthrough 1069 1.1 mrg (letting the idle workers jump around more). */ 1070 1.1 mrg et->probability = profile_probability::likely (); 1071 1.1 mrg 1072 1.1 mrg basic_block body = et->dest; 1073 1.1 mrg 1074 1.1 mrg edge ef = make_edge (from, barrier_block, EDGE_FALSE_VALUE); 1075 1.1 mrg ef->probability = et->probability.invert (); 1076 1.1 mrg 1077 1.1 mrg gimple_stmt_iterator bar_gsi = gsi_start_bb (barrier_block); 1078 1.1 mrg cond = gimple_build_cond (NE_EXPR, recv_tmp, zero_ptr, NULL_TREE, NULL_TREE); 1079 1.1 mrg 1080 1.1 mrg if (record_type != char_type_node || has_gang_private_write) 1081 1.1 mrg { 1082 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); 1083 1.1 mrg gimple *acc_bar = gimple_build_call (decl, 0); 1084 1.1 mrg 1085 1.1 mrg gsi_insert_before (&bar_gsi, acc_bar, GSI_NEW_STMT); 1086 1.1 mrg gsi_insert_after (&bar_gsi, cond, GSI_NEW_STMT); 1087 1.1 mrg } 1088 1.1 mrg else 1089 1.1 mrg gsi_insert_before (&bar_gsi, cond, GSI_NEW_STMT); 1090 1.1 mrg 1091 1.1 mrg edge et2 = split_block (barrier_block, cond); 1092 1.1 mrg et2->flags &= ~EDGE_FALLTHRU; 1093 1.1 mrg et2->flags |= EDGE_TRUE_VALUE; 1094 1.1 mrg et2->probability = profile_probability::unlikely (); 1095 1.1 mrg 1096 1.1 mrg basic_block exit_block = et2->dest; 1097 1.1 mrg 1098 1.1 mrg basic_block copyout_block = split_edge (et2); 1099 1.1 mrg edge ef2 = make_edge (barrier_block, exit_block, EDGE_FALSE_VALUE); 1100 1.1 mrg ef2->probability = et2->probability.invert (); 1101 1.1 mrg 1102 1.1 mrg gimple_stmt_iterator copyout_gsi = gsi_start_bb (copyout_block); 1103 1.1 mrg 1104 1.1 mrg edge copyout_to_exit = single_succ_edge (copyout_block); 1105 1.1 mrg 1106 1.1 mrg gimple_seq sender_seq = NULL; 1107 1.1 mrg 1108 1.1 mrg /* Make sure we iterate over definitions in a stable order. */ 1109 1.1 mrg auto_vec<tree> escape_vec (def_escapes_block->elements ()); 1110 1.1 mrg for (hash_set<tree>::iterator it = def_escapes_block->begin (); 1111 1.1 mrg it != def_escapes_block->end (); ++it) 1112 1.1 mrg escape_vec.quick_push (*it); 1113 1.1 mrg escape_vec.qsort (sort_by_ssa_version_or_uid); 1114 1.1 mrg 1115 1.1 mrg for (unsigned i = 0; i < escape_vec.length (); i++) 1116 1.1 mrg { 1117 1.1 mrg tree var = escape_vec[i]; 1118 1.1 mrg 1119 1.1 mrg if (TREE_CODE (var) == SSA_NAME && SSA_NAME_IS_VIRTUAL_OPERAND (var)) 1120 1.1 mrg continue; 1121 1.1 mrg 1122 1.1 mrg tree barrier_def = 0; 1123 1.1 mrg 1124 1.1 mrg if (TREE_CODE (var) == SSA_NAME) 1125 1.1 mrg { 1126 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (var); 1127 1.1 mrg 1128 1.1 mrg if (gimple_nop_p (def_stmt)) 1129 1.1 mrg continue; 1130 1.1 mrg 1131 1.1 mrg /* The barrier phi takes one result from the actual work of the 1132 1.1 mrg block we're neutering, and the other result is constant zero of 1133 1.1 mrg the same type. */ 1134 1.1 mrg 1135 1.1 mrg gphi *barrier_phi = create_phi_node (NULL_TREE, barrier_block); 1136 1.1 mrg barrier_def = create_new_def_for (var, barrier_phi, 1137 1.1 mrg gimple_phi_result_ptr (barrier_phi)); 1138 1.1 mrg 1139 1.1 mrg add_phi_arg (barrier_phi, var, e, UNKNOWN_LOCATION); 1140 1.1 mrg add_phi_arg (barrier_phi, build_zero_cst (TREE_TYPE (var)), ef, 1141 1.1 mrg UNKNOWN_LOCATION); 1142 1.1 mrg 1143 1.1 mrg update_stmt (barrier_phi); 1144 1.1 mrg } 1145 1.1 mrg else 1146 1.1 mrg gcc_assert (TREE_CODE (var) == VAR_DECL); 1147 1.1 mrg 1148 1.1 mrg /* If we had no record type, we will have no fields map. */ 1149 1.1 mrg field_map_t *fields = record_field_map->get (record_type); 1150 1.1 mrg 1151 1.1 mrg if (worker_partitioned_uses->contains (var) 1152 1.1 mrg && fields 1153 1.1 mrg && fields->get (var)) 1154 1.1 mrg { 1155 1.1 mrg tree neutered_def = make_ssa_name (TREE_TYPE (var)); 1156 1.1 mrg 1157 1.1 mrg /* Receive definition from shared memory block. */ 1158 1.1 mrg 1159 1.1 mrg tree receiver_ref = build_receiver_ref (var, receiver_decl, fields); 1160 1.1 mrg gassign *recv = gimple_build_assign (neutered_def, 1161 1.1 mrg receiver_ref); 1162 1.1 mrg gsi_insert_after (©out_gsi, recv, GSI_CONTINUE_LINKING); 1163 1.1 mrg update_stmt (recv); 1164 1.1 mrg 1165 1.1 mrg if (TREE_CODE (var) == VAR_DECL) 1166 1.1 mrg { 1167 1.1 mrg /* If it's a VAR_DECL, we only copied to an SSA temporary. Copy 1168 1.1 mrg to the final location now. */ 1169 1.1 mrg gassign *asgn = gimple_build_assign (var, neutered_def); 1170 1.1 mrg gsi_insert_after (©out_gsi, asgn, GSI_CONTINUE_LINKING); 1171 1.1 mrg update_stmt (asgn); 1172 1.1 mrg } 1173 1.1 mrg else 1174 1.1 mrg { 1175 1.1 mrg /* If it's an SSA name, create a new phi at the join node to 1176 1.1 mrg represent either the output from the active worker (the 1177 1.1 mrg barrier) or the inactive workers (the copyout block). */ 1178 1.1 mrg gphi *join_phi = create_phi_node (NULL_TREE, exit_block); 1179 1.1 mrg create_new_def_for (barrier_def, join_phi, 1180 1.1 mrg gimple_phi_result_ptr (join_phi)); 1181 1.1 mrg add_phi_arg (join_phi, barrier_def, ef2, UNKNOWN_LOCATION); 1182 1.1 mrg add_phi_arg (join_phi, neutered_def, copyout_to_exit, 1183 1.1 mrg UNKNOWN_LOCATION); 1184 1.1 mrg update_stmt (join_phi); 1185 1.1 mrg } 1186 1.1 mrg 1187 1.1 mrg /* Send definition to shared memory block. */ 1188 1.1 mrg 1189 1.1 mrg tree sender_ref = build_sender_ref (var, sender_decl, fields); 1190 1.1 mrg 1191 1.1 mrg if (TREE_CODE (var) == SSA_NAME) 1192 1.1 mrg { 1193 1.1 mrg gassign *send = gimple_build_assign (sender_ref, var); 1194 1.1 mrg gimple_seq_add_stmt (&sender_seq, send); 1195 1.1 mrg update_stmt (send); 1196 1.1 mrg } 1197 1.1 mrg else if (TREE_CODE (var) == VAR_DECL) 1198 1.1 mrg { 1199 1.1 mrg tree tmp = make_ssa_name (TREE_TYPE (var)); 1200 1.1 mrg gassign *send = gimple_build_assign (tmp, var); 1201 1.1 mrg gimple_seq_add_stmt (&sender_seq, send); 1202 1.1 mrg update_stmt (send); 1203 1.1 mrg send = gimple_build_assign (sender_ref, tmp); 1204 1.1 mrg gimple_seq_add_stmt (&sender_seq, send); 1205 1.1 mrg update_stmt (send); 1206 1.1 mrg } 1207 1.1 mrg else 1208 1.1 mrg gcc_unreachable (); 1209 1.1 mrg } 1210 1.1 mrg } 1211 1.1 mrg 1212 1.1 mrg /* The shared-memory range for this block overflowed. Add a barrier at the 1213 1.1 mrg end. */ 1214 1.1 mrg if (isolate_broadcasts) 1215 1.1 mrg { 1216 1.1 mrg gsi = gsi_start_bb (exit_block); 1217 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); 1218 1.1 mrg gimple *acc_bar = gimple_build_call (decl, 0); 1219 1.1 mrg gsi_insert_before (&gsi, acc_bar, GSI_SAME_STMT); 1220 1.1 mrg } 1221 1.1 mrg 1222 1.1 mrg /* It's possible for the ET->DEST block (the work done by the active thread) 1223 1.1 mrg to finish with a control-flow insn, e.g. a UNIQUE function call. Split 1224 1.1 mrg the block and add SENDER_SEQ in the latter part to avoid having control 1225 1.1 mrg flow in the middle of a BB. */ 1226 1.1 mrg 1227 1.1 mrg decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_END); 1228 1.1 mrg call = gimple_build_call (decl, 1, 1229 1.1 mrg POINTER_TYPE_P (TREE_TYPE (sender_decl)) 1230 1.1 mrg ? sender_decl 1231 1.1 mrg : build_fold_addr_expr (sender_decl)); 1232 1.1 mrg gimple_seq_add_stmt (&sender_seq, call); 1233 1.1 mrg 1234 1.1 mrg gsi = gsi_last_bb (body); 1235 1.1 mrg gimple *last = gsi_stmt (gsi); 1236 1.1 mrg basic_block sender_block = split_block (body, last)->dest; 1237 1.1 mrg gsi = gsi_last_bb (sender_block); 1238 1.1 mrg gsi_insert_seq_after (&gsi, sender_seq, GSI_CONTINUE_LINKING); 1239 1.1 mrg } 1240 1.1 mrg 1241 1.1 mrg typedef hash_map<basic_block, std::pair<unsigned HOST_WIDE_INT, bool> > 1242 1.1 mrg blk_offset_map_t; 1243 1.1 mrg 1244 1.1 mrg static void 1245 1.1 mrg neuter_worker_single (parallel_g *par, unsigned outer_mask, 1246 1.1 mrg bitmap worker_single, bitmap vector_single, 1247 1.1 mrg vec<propagation_set *> *prop_set, 1248 1.1 mrg hash_set<tree> *partitioned_var_uses, 1249 1.1 mrg record_field_map_t *record_field_map, 1250 1.1 mrg blk_offset_map_t *blk_offset_map, 1251 1.1 mrg bitmap writes_gang_private) 1252 1.1 mrg { 1253 1.1 mrg unsigned mask = outer_mask | par->mask; 1254 1.1 mrg 1255 1.1 mrg if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0) 1256 1.1 mrg { 1257 1.1 mrg basic_block block; 1258 1.1 mrg 1259 1.1 mrg for (unsigned i = 0; par->blocks.iterate (i, &block); i++) 1260 1.1 mrg { 1261 1.1 mrg bool has_defs = false; 1262 1.1 mrg hash_set<tree> def_escapes_block; 1263 1.1 mrg hash_set<tree> worker_partitioned_uses; 1264 1.1 mrg unsigned j; 1265 1.1 mrg tree var; 1266 1.1 mrg 1267 1.1 mrg FOR_EACH_SSA_NAME (j, var, cfun) 1268 1.1 mrg { 1269 1.1 mrg if (SSA_NAME_IS_VIRTUAL_OPERAND (var)) 1270 1.1 mrg { 1271 1.1 mrg has_defs = true; 1272 1.1 mrg continue; 1273 1.1 mrg } 1274 1.1 mrg 1275 1.1 mrg gimple *def_stmt = SSA_NAME_DEF_STMT (var); 1276 1.1 mrg 1277 1.1 mrg if (gimple_nop_p (def_stmt)) 1278 1.1 mrg continue; 1279 1.1 mrg 1280 1.1 mrg if (gimple_bb (def_stmt)->index != block->index) 1281 1.1 mrg continue; 1282 1.1 mrg 1283 1.1 mrg gimple *use_stmt; 1284 1.1 mrg imm_use_iterator use_iter; 1285 1.1 mrg bool uses_outside_block = false; 1286 1.1 mrg bool worker_partitioned_use = false; 1287 1.1 mrg 1288 1.1 mrg FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, var) 1289 1.1 mrg { 1290 1.1 mrg int blocknum = gimple_bb (use_stmt)->index; 1291 1.1 mrg 1292 1.1 mrg /* Don't propagate SSA names that are only used in the 1293 1.1 mrg current block, unless the usage is in a phi node: that 1294 1.1 mrg means the name left the block, then came back in at the 1295 1.1 mrg top. */ 1296 1.1 mrg if (blocknum != block->index 1297 1.1 mrg || gimple_code (use_stmt) == GIMPLE_PHI) 1298 1.1 mrg uses_outside_block = true; 1299 1.1 mrg if (!bitmap_bit_p (worker_single, blocknum)) 1300 1.1 mrg worker_partitioned_use = true; 1301 1.1 mrg } 1302 1.1 mrg 1303 1.1 mrg if (uses_outside_block) 1304 1.1 mrg def_escapes_block.add (var); 1305 1.1 mrg 1306 1.1 mrg if (worker_partitioned_use) 1307 1.1 mrg { 1308 1.1 mrg worker_partitioned_uses.add (var); 1309 1.1 mrg has_defs = true; 1310 1.1 mrg } 1311 1.1 mrg } 1312 1.1 mrg 1313 1.1 mrg propagation_set *ws_prop = (*prop_set)[block->index]; 1314 1.1 mrg 1315 1.1 mrg if (ws_prop) 1316 1.1 mrg { 1317 1.1 mrg for (propagation_set::iterator it = ws_prop->begin (); 1318 1.1 mrg it != ws_prop->end (); 1319 1.1 mrg ++it) 1320 1.1 mrg { 1321 1.1 mrg tree var = *it; 1322 1.1 mrg if (TREE_CODE (var) == VAR_DECL) 1323 1.1 mrg { 1324 1.1 mrg def_escapes_block.add (var); 1325 1.1 mrg if (partitioned_var_uses->contains (var)) 1326 1.1 mrg { 1327 1.1 mrg worker_partitioned_uses.add (var); 1328 1.1 mrg has_defs = true; 1329 1.1 mrg } 1330 1.1 mrg } 1331 1.1 mrg } 1332 1.1 mrg 1333 1.1 mrg delete ws_prop; 1334 1.1 mrg (*prop_set)[block->index] = 0; 1335 1.1 mrg } 1336 1.1 mrg 1337 1.1 mrg bool only_marker_fns = true; 1338 1.1 mrg bool join_block = false; 1339 1.1 mrg 1340 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block); 1341 1.1 mrg !gsi_end_p (gsi); 1342 1.1 mrg gsi_next (&gsi)) 1343 1.1 mrg { 1344 1.1 mrg gimple *stmt = gsi_stmt (gsi); 1345 1.1 mrg if (gimple_code (stmt) == GIMPLE_CALL 1346 1.1 mrg && gimple_call_internal_p (stmt, IFN_UNIQUE)) 1347 1.1 mrg { 1348 1.1 mrg enum ifn_unique_kind k = ((enum ifn_unique_kind) 1349 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); 1350 1.1 mrg if (k != IFN_UNIQUE_OACC_PRIVATE 1351 1.1 mrg && k != IFN_UNIQUE_OACC_JOIN 1352 1.1 mrg && k != IFN_UNIQUE_OACC_FORK 1353 1.1 mrg && k != IFN_UNIQUE_OACC_HEAD_MARK 1354 1.1 mrg && k != IFN_UNIQUE_OACC_TAIL_MARK) 1355 1.1 mrg only_marker_fns = false; 1356 1.1 mrg else if (k == IFN_UNIQUE_OACC_JOIN) 1357 1.1 mrg /* The JOIN marker is special in that it *cannot* be 1358 1.1 mrg predicated for worker zero, because it may be lowered 1359 1.1 mrg to a barrier instruction and all workers must typically 1360 1.1 mrg execute that barrier. We shouldn't be doing any 1361 1.1 mrg broadcasts from the join block anyway. */ 1362 1.1 mrg join_block = true; 1363 1.1 mrg } 1364 1.1 mrg else if (gimple_code (stmt) == GIMPLE_CALL 1365 1.1 mrg && gimple_call_internal_p (stmt, IFN_GOACC_LOOP)) 1366 1.1 mrg /* Empty. */; 1367 1.1 mrg else if (gimple_nop_p (stmt)) 1368 1.1 mrg /* Empty. */; 1369 1.1 mrg else 1370 1.1 mrg only_marker_fns = false; 1371 1.1 mrg } 1372 1.1 mrg 1373 1.1 mrg /* We can skip predicating this block for worker zero if the only 1374 1.1 mrg thing it contains is marker functions that will be removed in the 1375 1.1 mrg oaccdevlow pass anyway. 1376 1.1 mrg Don't do this if the block has (any) phi nodes, because those 1377 1.1 mrg might define SSA names that need broadcasting. 1378 1.1 mrg TODO: We might be able to skip transforming blocks that only 1379 1.1 mrg contain some other trivial statements too. */ 1380 1.1 mrg if (only_marker_fns && !phi_nodes (block)) 1381 1.1 mrg continue; 1382 1.1 mrg 1383 1.1 mrg gcc_assert (!join_block); 1384 1.1 mrg 1385 1.1 mrg if (has_defs) 1386 1.1 mrg { 1387 1.1 mrg tree record_type = (tree) block->aux; 1388 1.1 mrg std::pair<unsigned HOST_WIDE_INT, bool> *off_rngalloc 1389 1.1 mrg = blk_offset_map->get (block); 1390 1.1 mrg gcc_assert (!record_type || off_rngalloc); 1391 1.1 mrg unsigned HOST_WIDE_INT offset 1392 1.1 mrg = off_rngalloc ? off_rngalloc->first : 0; 1393 1.1 mrg bool range_allocated 1394 1.1 mrg = off_rngalloc ? off_rngalloc->second : true; 1395 1.1 mrg bool has_gang_private_write 1396 1.1 mrg = bitmap_bit_p (writes_gang_private, block->index); 1397 1.1 mrg worker_single_copy (block, block, &def_escapes_block, 1398 1.1 mrg &worker_partitioned_uses, record_type, 1399 1.1 mrg record_field_map, 1400 1.1 mrg offset, !range_allocated, 1401 1.1 mrg has_gang_private_write); 1402 1.1 mrg } 1403 1.1 mrg else 1404 1.1 mrg worker_single_simple (block, block, &def_escapes_block); 1405 1.1 mrg } 1406 1.1 mrg } 1407 1.1 mrg 1408 1.1 mrg if ((outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0) 1409 1.1 mrg { 1410 1.1 mrg basic_block block; 1411 1.1 mrg 1412 1.1 mrg for (unsigned i = 0; par->blocks.iterate (i, &block); i++) 1413 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (block); 1414 1.1 mrg !gsi_end_p (gsi); 1415 1.1 mrg gsi_next (&gsi)) 1416 1.1 mrg { 1417 1.1 mrg gimple *stmt = gsi_stmt (gsi); 1418 1.1 mrg 1419 1.1 mrg if (gimple_code (stmt) == GIMPLE_CALL 1420 1.1 mrg && !gimple_call_internal_p (stmt) 1421 1.1 mrg && !omp_sese_active_worker_call (as_a <gcall *> (stmt))) 1422 1.1 mrg { 1423 1.1 mrg /* If we have an OpenACC routine call in worker-single mode, 1424 1.1 mrg place barriers before and afterwards to prevent 1425 1.1 mrg clobbering re-used shared memory regions (as are used 1426 1.1 mrg for AMDGCN at present, for example). */ 1427 1.1 mrg tree decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); 1428 1.1 mrg gsi_insert_before (&gsi, gimple_build_call (decl, 0), 1429 1.1 mrg GSI_SAME_STMT); 1430 1.1 mrg gsi_insert_after (&gsi, gimple_build_call (decl, 0), 1431 1.1 mrg GSI_NEW_STMT); 1432 1.1 mrg } 1433 1.1 mrg } 1434 1.1 mrg } 1435 1.1 mrg 1436 1.1 mrg if (par->inner) 1437 1.1 mrg neuter_worker_single (par->inner, mask, worker_single, vector_single, 1438 1.1 mrg prop_set, partitioned_var_uses, record_field_map, 1439 1.1 mrg blk_offset_map, writes_gang_private); 1440 1.1 mrg if (par->next) 1441 1.1 mrg neuter_worker_single (par->next, outer_mask, worker_single, vector_single, 1442 1.1 mrg prop_set, partitioned_var_uses, record_field_map, 1443 1.1 mrg blk_offset_map, writes_gang_private); 1444 1.1 mrg } 1445 1.1 mrg 1446 1.1 mrg static void 1447 1.1 mrg dfs_broadcast_reachable_1 (basic_block bb, sbitmap reachable) 1448 1.1 mrg { 1449 1.1 mrg if (bb->flags & BB_VISITED) 1450 1.1 mrg return; 1451 1.1 mrg 1452 1.1 mrg bb->flags |= BB_VISITED; 1453 1.1 mrg 1454 1.1 mrg if (bb->succs) 1455 1.1 mrg { 1456 1.1 mrg edge e; 1457 1.1 mrg edge_iterator ei; 1458 1.1 mrg FOR_EACH_EDGE (e, ei, bb->succs) 1459 1.1 mrg { 1460 1.1 mrg basic_block dest = e->dest; 1461 1.1 mrg if (dest->aux) 1462 1.1 mrg bitmap_set_bit (reachable, dest->index); 1463 1.1 mrg else 1464 1.1 mrg dfs_broadcast_reachable_1 (dest, reachable); 1465 1.1 mrg } 1466 1.1 mrg } 1467 1.1 mrg } 1468 1.1 mrg 1469 1.1 mrg typedef std::pair<int, tree> idx_decl_pair_t; 1470 1.1 mrg 1471 1.1 mrg typedef auto_vec<splay_tree> used_range_vec_t; 1472 1.1 mrg 1473 1.1 mrg static int 1474 1.1 mrg sort_size_descending (const void *a, const void *b) 1475 1.1 mrg { 1476 1.1 mrg const idx_decl_pair_t *pa = (const idx_decl_pair_t *) a; 1477 1.1 mrg const idx_decl_pair_t *pb = (const idx_decl_pair_t *) b; 1478 1.1 mrg unsigned HOST_WIDE_INT asize = tree_to_uhwi (TYPE_SIZE_UNIT (pa->second)); 1479 1.1 mrg unsigned HOST_WIDE_INT bsize = tree_to_uhwi (TYPE_SIZE_UNIT (pb->second)); 1480 1.1 mrg return bsize - asize; 1481 1.1 mrg } 1482 1.1 mrg 1483 1.1 mrg class addr_range 1484 1.1 mrg { 1485 1.1 mrg public: 1486 1.1 mrg addr_range (unsigned HOST_WIDE_INT addr_lo, unsigned HOST_WIDE_INT addr_hi) 1487 1.1 mrg : lo (addr_lo), hi (addr_hi) 1488 1.1 mrg { } 1489 1.1 mrg addr_range (const addr_range &ar) : lo (ar.lo), hi (ar.hi) 1490 1.1 mrg { } 1491 1.1 mrg addr_range () : lo (0), hi (0) 1492 1.1 mrg { } 1493 1.1 mrg 1494 1.1 mrg bool invalid () { return lo == 0 && hi == 0; } 1495 1.1 mrg 1496 1.1 mrg unsigned HOST_WIDE_INT lo; 1497 1.1 mrg unsigned HOST_WIDE_INT hi; 1498 1.1 mrg }; 1499 1.1 mrg 1500 1.1 mrg static int 1501 1.1 mrg splay_tree_compare_addr_range (splay_tree_key a, splay_tree_key b) 1502 1.1 mrg { 1503 1.1 mrg addr_range *ar = (addr_range *) a; 1504 1.1 mrg addr_range *br = (addr_range *) b; 1505 1.1 mrg if (ar->lo == br->lo && ar->hi == br->hi) 1506 1.1 mrg return 0; 1507 1.1 mrg if (ar->hi <= br->lo) 1508 1.1 mrg return -1; 1509 1.1 mrg else if (ar->lo >= br->hi) 1510 1.1 mrg return 1; 1511 1.1 mrg return 0; 1512 1.1 mrg } 1513 1.1 mrg 1514 1.1 mrg static void 1515 1.1 mrg splay_tree_free_key (splay_tree_key k) 1516 1.1 mrg { 1517 1.1 mrg addr_range *ar = (addr_range *) k; 1518 1.1 mrg delete ar; 1519 1.1 mrg } 1520 1.1 mrg 1521 1.1 mrg static addr_range 1522 1.1 mrg first_fit_range (splay_tree s, unsigned HOST_WIDE_INT size, 1523 1.1 mrg unsigned HOST_WIDE_INT align, addr_range *bounds) 1524 1.1 mrg { 1525 1.1 mrg splay_tree_node min = splay_tree_min (s); 1526 1.1 mrg if (min) 1527 1.1 mrg { 1528 1.1 mrg splay_tree_node next; 1529 1.1 mrg while ((next = splay_tree_successor (s, min->key))) 1530 1.1 mrg { 1531 1.1 mrg unsigned HOST_WIDE_INT lo = ((addr_range *) min->key)->hi; 1532 1.1 mrg unsigned HOST_WIDE_INT hi = ((addr_range *) next->key)->lo; 1533 1.1 mrg unsigned HOST_WIDE_INT base = (lo + align - 1) & ~(align - 1); 1534 1.1 mrg if (base + size <= hi) 1535 1.1 mrg return addr_range (base, base + size); 1536 1.1 mrg min = next; 1537 1.1 mrg } 1538 1.1 mrg 1539 1.1 mrg unsigned HOST_WIDE_INT base = ((addr_range *)min->key)->hi; 1540 1.1 mrg base = (base + align - 1) & ~(align - 1); 1541 1.1 mrg if (base + size <= bounds->hi) 1542 1.1 mrg return addr_range (base, base + size); 1543 1.1 mrg else 1544 1.1 mrg return addr_range (); 1545 1.1 mrg } 1546 1.1 mrg else 1547 1.1 mrg { 1548 1.1 mrg unsigned HOST_WIDE_INT lo = bounds->lo; 1549 1.1 mrg lo = (lo + align - 1) & ~(align - 1); 1550 1.1 mrg if (lo + size <= bounds->hi) 1551 1.1 mrg return addr_range (lo, lo + size); 1552 1.1 mrg else 1553 1.1 mrg return addr_range (); 1554 1.1 mrg } 1555 1.1 mrg } 1556 1.1 mrg 1557 1.1 mrg static int 1558 1.1 mrg merge_ranges_1 (splay_tree_node n, void *ptr) 1559 1.1 mrg { 1560 1.1 mrg splay_tree accum = (splay_tree) ptr; 1561 1.1 mrg addr_range ar = *(addr_range *) n->key; 1562 1.1 mrg 1563 1.1 mrg splay_tree_node old = splay_tree_lookup (accum, n->key); 1564 1.1 mrg 1565 1.1 mrg /* We might have an overlap. Create a new range covering the 1566 1.1 mrg overlapping parts. */ 1567 1.1 mrg if (old) 1568 1.1 mrg { 1569 1.1 mrg addr_range *old_ar = (addr_range *) old->key; 1570 1.1 mrg ar.lo = MIN (old_ar->lo, ar.lo); 1571 1.1 mrg ar.hi = MAX (old_ar->hi, ar.hi); 1572 1.1 mrg splay_tree_remove (accum, old->key); 1573 1.1 mrg } 1574 1.1 mrg 1575 1.1 mrg addr_range *new_ar = new addr_range (ar); 1576 1.1 mrg 1577 1.1 mrg splay_tree_insert (accum, (splay_tree_key) new_ar, n->value); 1578 1.1 mrg 1579 1.1 mrg return 0; 1580 1.1 mrg } 1581 1.1 mrg 1582 1.1 mrg static void 1583 1.1 mrg merge_ranges (splay_tree accum, splay_tree sp) 1584 1.1 mrg { 1585 1.1 mrg splay_tree_foreach (sp, merge_ranges_1, (void *) accum); 1586 1.1 mrg } 1587 1.1 mrg 1588 1.1 mrg static void 1589 1.1 mrg oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo, 1590 1.1 mrg unsigned HOST_WIDE_INT bounds_hi) 1591 1.1 mrg { 1592 1.1 mrg bb_stmt_map_t bb_stmt_map; 1593 1.1 mrg auto_bitmap worker_single, vector_single; 1594 1.1 mrg 1595 1.1 mrg omp_sese_split_blocks (&bb_stmt_map); 1596 1.1 mrg 1597 1.1 mrg if (dump_file) 1598 1.1 mrg { 1599 1.1 mrg fprintf (dump_file, "\n\nAfter splitting:\n\n"); 1600 1.1 mrg dump_function_to_file (current_function_decl, dump_file, dump_flags); 1601 1.1 mrg } 1602 1.1 mrg 1603 1.1 mrg unsigned mask = 0; 1604 1.1 mrg 1605 1.1 mrg /* If this is a routine, calculate MASK as if the outer levels are already 1606 1.1 mrg partitioned. */ 1607 1.1 mrg { 1608 1.1 mrg tree attr = oacc_get_fn_attrib (current_function_decl); 1609 1.1 mrg tree dims = TREE_VALUE (attr); 1610 1.1 mrg unsigned ix; 1611 1.1 mrg for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims)) 1612 1.1 mrg { 1613 1.1 mrg tree allowed = TREE_PURPOSE (dims); 1614 1.1 mrg if (allowed && integer_zerop (allowed)) 1615 1.1 mrg mask |= GOMP_DIM_MASK (ix); 1616 1.1 mrg } 1617 1.1 mrg } 1618 1.1 mrg 1619 1.1 mrg parallel_g *par = omp_sese_discover_pars (&bb_stmt_map); 1620 1.1 mrg populate_single_mode_bitmaps (par, worker_single, vector_single, mask, 0); 1621 1.1 mrg 1622 1.1 mrg basic_block bb; 1623 1.1 mrg FOR_ALL_BB_FN (bb, cfun) 1624 1.1 mrg bb->aux = NULL; 1625 1.1 mrg 1626 1.1 mrg vec<propagation_set *> prop_set (vNULL); 1627 1.1 mrg prop_set.safe_grow_cleared (last_basic_block_for_fn (cfun), true); 1628 1.1 mrg 1629 1.1 mrg find_ssa_names_to_propagate (par, mask, worker_single, vector_single, 1630 1.1 mrg &prop_set); 1631 1.1 mrg 1632 1.1 mrg hash_set<tree> partitioned_var_uses; 1633 1.1 mrg hash_set<tree> gang_private_vars; 1634 1.1 mrg auto_bitmap writes_gang_private; 1635 1.1 mrg 1636 1.1 mrg find_gang_private_vars (&gang_private_vars); 1637 1.1 mrg find_partitioned_var_uses (par, mask, &partitioned_var_uses); 1638 1.1 mrg find_local_vars_to_propagate (par, mask, &partitioned_var_uses, 1639 1.1 mrg &gang_private_vars, writes_gang_private, 1640 1.1 mrg &prop_set); 1641 1.1 mrg 1642 1.1 mrg record_field_map_t record_field_map; 1643 1.1 mrg 1644 1.1 mrg FOR_ALL_BB_FN (bb, cfun) 1645 1.1 mrg { 1646 1.1 mrg propagation_set *ws_prop = prop_set[bb->index]; 1647 1.1 mrg if (ws_prop) 1648 1.1 mrg { 1649 1.1 mrg tree record_type = lang_hooks.types.make_type (RECORD_TYPE); 1650 1.1 mrg tree name = create_tmp_var_name (".oacc_ws_data_s"); 1651 1.1 mrg name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, record_type); 1652 1.1 mrg DECL_ARTIFICIAL (name) = 1; 1653 1.1 mrg DECL_NAMELESS (name) = 1; 1654 1.1 mrg TYPE_NAME (record_type) = name; 1655 1.1 mrg TYPE_ARTIFICIAL (record_type) = 1; 1656 1.1 mrg 1657 1.1 mrg auto_vec<tree> field_vec (ws_prop->elements ()); 1658 1.1 mrg for (hash_set<tree>::iterator it = ws_prop->begin (); 1659 1.1 mrg it != ws_prop->end (); ++it) 1660 1.1 mrg field_vec.quick_push (*it); 1661 1.1 mrg 1662 1.1 mrg field_vec.qsort (sort_by_size_then_ssa_version_or_uid); 1663 1.1 mrg 1664 1.1 mrg bool existed; 1665 1.1 mrg field_map_t *fields 1666 1.1 mrg = &record_field_map.get_or_insert (record_type, &existed); 1667 1.1 mrg gcc_checking_assert (!existed); 1668 1.1 mrg 1669 1.1 mrg /* Insert var fields in reverse order, so the last inserted element 1670 1.1 mrg is the first in the structure. */ 1671 1.1 mrg for (int i = field_vec.length () - 1; i >= 0; i--) 1672 1.1 mrg install_var_field (field_vec[i], record_type, fields); 1673 1.1 mrg 1674 1.1 mrg layout_type (record_type); 1675 1.1 mrg 1676 1.1 mrg bb->aux = (tree) record_type; 1677 1.1 mrg } 1678 1.1 mrg } 1679 1.1 mrg 1680 1.1 mrg sbitmap *reachable 1681 1.1 mrg = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1682 1.1 mrg last_basic_block_for_fn (cfun)); 1683 1.1 mrg 1684 1.1 mrg bitmap_vector_clear (reachable, last_basic_block_for_fn (cfun)); 1685 1.1 mrg 1686 1.1 mrg auto_vec<std::pair<int, tree> > priority; 1687 1.1 mrg 1688 1.1 mrg FOR_ALL_BB_FN (bb, cfun) 1689 1.1 mrg { 1690 1.1 mrg if (bb->aux) 1691 1.1 mrg { 1692 1.1 mrg tree record_type = (tree) bb->aux; 1693 1.1 mrg 1694 1.1 mrg basic_block bb2; 1695 1.1 mrg FOR_ALL_BB_FN (bb2, cfun) 1696 1.1 mrg bb2->flags &= ~BB_VISITED; 1697 1.1 mrg 1698 1.1 mrg priority.safe_push (std::make_pair (bb->index, record_type)); 1699 1.1 mrg dfs_broadcast_reachable_1 (bb, reachable[bb->index]); 1700 1.1 mrg } 1701 1.1 mrg } 1702 1.1 mrg 1703 1.1 mrg sbitmap *inverted 1704 1.1 mrg = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1705 1.1 mrg last_basic_block_for_fn (cfun)); 1706 1.1 mrg 1707 1.1 mrg bitmap_vector_clear (inverted, last_basic_block_for_fn (cfun)); 1708 1.1 mrg 1709 1.1 mrg for (int i = 0; i < last_basic_block_for_fn (cfun); i++) 1710 1.1 mrg { 1711 1.1 mrg sbitmap_iterator bi; 1712 1.1 mrg unsigned int j; 1713 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (reachable[i], 0, j, bi) 1714 1.1 mrg bitmap_set_bit (inverted[j], i); 1715 1.1 mrg } 1716 1.1 mrg 1717 1.1 mrg for (int i = 0; i < last_basic_block_for_fn (cfun); i++) 1718 1.1 mrg bitmap_ior (reachable[i], reachable[i], inverted[i]); 1719 1.1 mrg 1720 1.1 mrg sbitmap_vector_free (inverted); 1721 1.1 mrg 1722 1.1 mrg used_range_vec_t used_ranges; 1723 1.1 mrg 1724 1.1 mrg used_ranges.safe_grow_cleared (last_basic_block_for_fn (cfun)); 1725 1.1 mrg 1726 1.1 mrg blk_offset_map_t blk_offset_map; 1727 1.1 mrg 1728 1.1 mrg addr_range worker_shm_bounds (bounds_lo, bounds_hi); 1729 1.1 mrg 1730 1.1 mrg priority.qsort (sort_size_descending); 1731 1.1 mrg for (unsigned int i = 0; i < priority.length (); i++) 1732 1.1 mrg { 1733 1.1 mrg idx_decl_pair_t p = priority[i]; 1734 1.1 mrg int blkno = p.first; 1735 1.1 mrg tree record_type = p.second; 1736 1.1 mrg HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (record_type)); 1737 1.1 mrg HOST_WIDE_INT align = TYPE_ALIGN_UNIT (record_type); 1738 1.1 mrg 1739 1.1 mrg splay_tree conflicts = splay_tree_new (splay_tree_compare_addr_range, 1740 1.1 mrg splay_tree_free_key, NULL); 1741 1.1 mrg 1742 1.1 mrg if (!used_ranges[blkno]) 1743 1.1 mrg used_ranges[blkno] = splay_tree_new (splay_tree_compare_addr_range, 1744 1.1 mrg splay_tree_free_key, NULL); 1745 1.1 mrg else 1746 1.1 mrg merge_ranges (conflicts, used_ranges[blkno]); 1747 1.1 mrg 1748 1.1 mrg sbitmap_iterator bi; 1749 1.1 mrg unsigned int j; 1750 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (reachable[blkno], 0, j, bi) 1751 1.1 mrg if (used_ranges[j]) 1752 1.1 mrg merge_ranges (conflicts, used_ranges[j]); 1753 1.1 mrg 1754 1.1 mrg addr_range ar 1755 1.1 mrg = first_fit_range (conflicts, size, align, &worker_shm_bounds); 1756 1.1 mrg 1757 1.1 mrg splay_tree_delete (conflicts); 1758 1.1 mrg 1759 1.1 mrg if (ar.invalid ()) 1760 1.1 mrg { 1761 1.1 mrg unsigned HOST_WIDE_INT base 1762 1.1 mrg = (bounds_lo + align - 1) & ~(align - 1); 1763 1.1 mrg if (base + size > bounds_hi) 1764 1.1 mrg error_at (UNKNOWN_LOCATION, "shared-memory region overflow"); 1765 1.1 mrg std::pair<unsigned HOST_WIDE_INT, bool> base_inrng 1766 1.1 mrg = std::make_pair (base, false); 1767 1.1 mrg blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng); 1768 1.1 mrg } 1769 1.1 mrg else 1770 1.1 mrg { 1771 1.1 mrg splay_tree_node old = splay_tree_lookup (used_ranges[blkno], 1772 1.1 mrg (splay_tree_key) &ar); 1773 1.1 mrg if (old) 1774 1.1 mrg { 1775 1.1 mrg fprintf (stderr, "trying to map [%d..%d] but [%d..%d] is " 1776 1.1 mrg "already mapped in block %d\n", (int) ar.lo, 1777 1.1 mrg (int) ar.hi, (int) ((addr_range *) old->key)->lo, 1778 1.1 mrg (int) ((addr_range *) old->key)->hi, blkno); 1779 1.1 mrg abort (); 1780 1.1 mrg } 1781 1.1 mrg 1782 1.1 mrg addr_range *arp = new addr_range (ar); 1783 1.1 mrg splay_tree_insert (used_ranges[blkno], (splay_tree_key) arp, 1784 1.1 mrg (splay_tree_value) blkno); 1785 1.1 mrg std::pair<unsigned HOST_WIDE_INT, bool> base_inrng 1786 1.1 mrg = std::make_pair (ar.lo, true); 1787 1.1 mrg blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng); 1788 1.1 mrg } 1789 1.1 mrg } 1790 1.1 mrg 1791 1.1 mrg sbitmap_vector_free (reachable); 1792 1.1 mrg 1793 1.1 mrg neuter_worker_single (par, mask, worker_single, vector_single, &prop_set, 1794 1.1 mrg &partitioned_var_uses, &record_field_map, 1795 1.1 mrg &blk_offset_map, writes_gang_private); 1796 1.1 mrg 1797 1.1 mrg record_field_map.empty (); 1798 1.1 mrg 1799 1.1 mrg /* These are supposed to have been 'delete'd by 'neuter_worker_single'. */ 1800 1.1 mrg for (auto it : prop_set) 1801 1.1 mrg gcc_checking_assert (!it); 1802 1.1 mrg prop_set.release (); 1803 1.1 mrg 1804 1.1 mrg delete par; 1805 1.1 mrg 1806 1.1 mrg /* This doesn't seem to make a difference. */ 1807 1.1 mrg loops_state_clear (LOOP_CLOSED_SSA); 1808 1.1 mrg 1809 1.1 mrg /* Neutering worker-single neutered blocks will invalidate dominance info. 1810 1.1 mrg It may be possible to incrementally update just the affected blocks, but 1811 1.1 mrg obliterate everything for now. */ 1812 1.1 mrg free_dominance_info (CDI_DOMINATORS); 1813 1.1 mrg free_dominance_info (CDI_POST_DOMINATORS); 1814 1.1 mrg 1815 1.1 mrg if (dump_file) 1816 1.1 mrg { 1817 1.1 mrg fprintf (dump_file, "\n\nAfter neutering:\n\n"); 1818 1.1 mrg dump_function_to_file (current_function_decl, dump_file, dump_flags); 1819 1.1 mrg } 1820 1.1 mrg } 1821 1.1 mrg 1822 1.1 mrg static int 1823 1.1 mrg execute_omp_oacc_neuter_broadcast () 1824 1.1 mrg { 1825 1.1 mrg unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX]; 1826 1.1 mrg unsigned HOST_WIDE_INT private_size[GOMP_DIM_MAX]; 1827 1.1 mrg 1828 1.1 mrg for (unsigned i = 0; i < GOMP_DIM_MAX; i++) 1829 1.1 mrg { 1830 1.1 mrg reduction_size[i] = 0; 1831 1.1 mrg private_size[i] = 0; 1832 1.1 mrg } 1833 1.1 mrg 1834 1.1 mrg /* Calculate shared memory size required for reduction variables and 1835 1.1 mrg gang-private memory for this offloaded function. */ 1836 1.1 mrg basic_block bb; 1837 1.1 mrg FOR_ALL_BB_FN (bb, cfun) 1838 1.1 mrg { 1839 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start_bb (bb); 1840 1.1 mrg !gsi_end_p (gsi); 1841 1.1 mrg gsi_next (&gsi)) 1842 1.1 mrg { 1843 1.1 mrg gimple *stmt = gsi_stmt (gsi); 1844 1.1 mrg if (!is_gimple_call (stmt)) 1845 1.1 mrg continue; 1846 1.1 mrg gcall *call = as_a <gcall *> (stmt); 1847 1.1 mrg if (!gimple_call_internal_p (call)) 1848 1.1 mrg continue; 1849 1.1 mrg enum internal_fn ifn_code = gimple_call_internal_fn (call); 1850 1.1 mrg switch (ifn_code) 1851 1.1 mrg { 1852 1.1 mrg default: break; 1853 1.1 mrg case IFN_GOACC_REDUCTION: 1854 1.1 mrg if (integer_minus_onep (gimple_call_arg (call, 3))) 1855 1.1 mrg continue; 1856 1.1 mrg else 1857 1.1 mrg { 1858 1.1 mrg unsigned code = TREE_INT_CST_LOW (gimple_call_arg (call, 0)); 1859 1.1 mrg /* Only count reduction variables once: the choice to pick 1860 1.1 mrg the setup call is fairly arbitrary. */ 1861 1.1 mrg if (code == IFN_GOACC_REDUCTION_SETUP) 1862 1.1 mrg { 1863 1.1 mrg int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); 1864 1.1 mrg tree var = gimple_call_arg (call, 2); 1865 1.1 mrg tree offset = gimple_call_arg (call, 5); 1866 1.1 mrg tree var_type = TREE_TYPE (var); 1867 1.1 mrg unsigned HOST_WIDE_INT limit 1868 1.1 mrg = (tree_to_uhwi (offset) 1869 1.1 mrg + tree_to_uhwi (TYPE_SIZE_UNIT (var_type))); 1870 1.1 mrg reduction_size[level] 1871 1.1 mrg = MAX (reduction_size[level], limit); 1872 1.1 mrg } 1873 1.1 mrg } 1874 1.1 mrg break; 1875 1.1 mrg case IFN_UNIQUE: 1876 1.1 mrg { 1877 1.1 mrg enum ifn_unique_kind kind 1878 1.1 mrg = ((enum ifn_unique_kind) 1879 1.1 mrg TREE_INT_CST_LOW (gimple_call_arg (call, 0))); 1880 1.1 mrg 1881 1.1 mrg if (kind == IFN_UNIQUE_OACC_PRIVATE) 1882 1.1 mrg { 1883 1.1 mrg HOST_WIDE_INT level 1884 1.1 mrg = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); 1885 1.1 mrg if (level == -1) 1886 1.1 mrg break; 1887 1.1 mrg for (unsigned i = 3; 1888 1.1 mrg i < gimple_call_num_args (call); 1889 1.1 mrg i++) 1890 1.1 mrg { 1891 1.1 mrg tree arg = gimple_call_arg (call, i); 1892 1.1 mrg gcc_assert (TREE_CODE (arg) == ADDR_EXPR); 1893 1.1 mrg tree decl = TREE_OPERAND (arg, 0); 1894 1.1 mrg unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (decl); 1895 1.1 mrg private_size[level] = ((private_size[level] + align - 1) 1896 1.1 mrg & ~(align - 1)); 1897 1.1 mrg unsigned HOST_WIDE_INT decl_size 1898 1.1 mrg = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (decl))); 1899 1.1 mrg private_size[level] += decl_size; 1900 1.1 mrg } 1901 1.1 mrg } 1902 1.1 mrg } 1903 1.1 mrg break; 1904 1.1 mrg } 1905 1.1 mrg } 1906 1.1 mrg } 1907 1.1 mrg 1908 1.1 mrg int dims[GOMP_DIM_MAX]; 1909 1.1 mrg for (unsigned i = 0; i < GOMP_DIM_MAX; i++) 1910 1.1 mrg dims[i] = oacc_get_fn_dim_size (current_function_decl, i); 1911 1.1 mrg 1912 1.1 mrg /* Find bounds of shared-memory buffer space we can use. */ 1913 1.1 mrg unsigned HOST_WIDE_INT bounds_lo = 0, bounds_hi = 0; 1914 1.1 mrg if (targetm.goacc.shared_mem_layout) 1915 1.1 mrg targetm.goacc.shared_mem_layout (&bounds_lo, &bounds_hi, dims, 1916 1.1 mrg private_size, reduction_size); 1917 1.1 mrg 1918 1.1 mrg /* Perform worker partitioning unless we know 'num_workers(1)'. */ 1919 1.1 mrg if (dims[GOMP_DIM_WORKER] != 1) 1920 1.1 mrg oacc_do_neutering (bounds_lo, bounds_hi); 1921 1.1 mrg 1922 1.1 mrg return 0; 1923 1.1 mrg } 1924 1.1 mrg 1925 1.1 mrg namespace { 1926 1.1 mrg 1927 1.1 mrg const pass_data pass_data_omp_oacc_neuter_broadcast = 1928 1.1 mrg { 1929 1.1 mrg GIMPLE_PASS, /* type */ 1930 1.1 mrg "omp_oacc_neuter_broadcast", /* name */ 1931 1.1 mrg OPTGROUP_OMP, /* optinfo_flags */ 1932 1.1 mrg TV_NONE, /* tv_id */ 1933 1.1 mrg PROP_cfg, /* properties_required */ 1934 1.1 mrg 0, /* properties_provided */ 1935 1.1 mrg 0, /* properties_destroyed */ 1936 1.1 mrg 0, /* todo_flags_start */ 1937 1.1 mrg TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */ 1938 1.1 mrg }; 1939 1.1 mrg 1940 1.1 mrg class pass_omp_oacc_neuter_broadcast : public gimple_opt_pass 1941 1.1 mrg { 1942 1.1 mrg public: 1943 1.1 mrg pass_omp_oacc_neuter_broadcast (gcc::context *ctxt) 1944 1.1 mrg : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast, ctxt) 1945 1.1 mrg {} 1946 1.1 mrg 1947 1.1 mrg /* opt_pass methods: */ 1948 1.1 mrg virtual bool gate (function *fun) 1949 1.1 mrg { 1950 1.1 mrg if (!flag_openacc) 1951 1.1 mrg return false; 1952 1.1 mrg 1953 1.1 mrg if (!targetm.goacc.create_worker_broadcast_record) 1954 1.1 mrg return false; 1955 1.1 mrg 1956 1.1 mrg /* Only relevant for OpenACC offloaded functions. */ 1957 1.1 mrg tree attr = oacc_get_fn_attrib (fun->decl); 1958 1.1 mrg if (!attr) 1959 1.1 mrg return false; 1960 1.1 mrg 1961 1.1 mrg return true; 1962 1.1 mrg } 1963 1.1 mrg 1964 1.1 mrg virtual unsigned int execute (function *) 1965 1.1 mrg { 1966 1.1 mrg return execute_omp_oacc_neuter_broadcast (); 1967 1.1 mrg } 1968 1.1 mrg 1969 1.1 mrg }; // class pass_omp_oacc_neuter_broadcast 1970 1.1 mrg 1971 1.1 mrg } // anon namespace 1972 1.1 mrg 1973 1.1 mrg gimple_opt_pass * 1974 1.1 mrg make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt) 1975 1.1 mrg { 1976 1.1 mrg return new pass_omp_oacc_neuter_broadcast (ctxt); 1977 1.1 mrg } 1978