Home | History | Annotate | Line # | Download | only in gcc
omp-oacc-neuter-broadcast.cc revision 1.1.1.1
      1  1.1  mrg /* OpenACC worker partitioning via middle end neutering/broadcasting scheme
      2  1.1  mrg 
      3  1.1  mrg    Copyright (C) 2015-2022 Free Software Foundation, Inc.
      4  1.1  mrg 
      5  1.1  mrg    This file is part of GCC.
      6  1.1  mrg 
      7  1.1  mrg    GCC is free software; you can redistribute it and/or modify it
      8  1.1  mrg    under the terms of the GNU General Public License as published
      9  1.1  mrg    by the Free Software Foundation; either version 3, or (at your
     10  1.1  mrg    option) any later version.
     11  1.1  mrg 
     12  1.1  mrg    GCC is distributed in the hope that it will be useful, but WITHOUT
     13  1.1  mrg    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14  1.1  mrg    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     15  1.1  mrg    License for more details.
     16  1.1  mrg 
     17  1.1  mrg    You should have received a copy of the GNU General Public License
     18  1.1  mrg    along with GCC; see the file COPYING3.  If not see
     19  1.1  mrg    <http://www.gnu.org/licenses/>.  */
     20  1.1  mrg 
     21  1.1  mrg #include "config.h"
     22  1.1  mrg #include "system.h"
     23  1.1  mrg #include "coretypes.h"
     24  1.1  mrg #include "backend.h"
     25  1.1  mrg #include "rtl.h"
     26  1.1  mrg #include "tree.h"
     27  1.1  mrg #include "gimple.h"
     28  1.1  mrg #include "tree-pass.h"
     29  1.1  mrg #include "ssa.h"
     30  1.1  mrg #include "cgraph.h"
     31  1.1  mrg #include "pretty-print.h"
     32  1.1  mrg #include "fold-const.h"
     33  1.1  mrg #include "gimplify.h"
     34  1.1  mrg #include "gimple-iterator.h"
     35  1.1  mrg #include "gimple-walk.h"
     36  1.1  mrg #include "tree-inline.h"
     37  1.1  mrg #include "langhooks.h"
     38  1.1  mrg #include "omp-general.h"
     39  1.1  mrg #include "omp-low.h"
     40  1.1  mrg #include "gimple-pretty-print.h"
     41  1.1  mrg #include "cfghooks.h"
     42  1.1  mrg #include "insn-config.h"
     43  1.1  mrg #include "recog.h"
     44  1.1  mrg #include "internal-fn.h"
     45  1.1  mrg #include "bitmap.h"
     46  1.1  mrg #include "tree-nested.h"
     47  1.1  mrg #include "stor-layout.h"
     48  1.1  mrg #include "tree-ssa-threadupdate.h"
     49  1.1  mrg #include "tree-into-ssa.h"
     50  1.1  mrg #include "splay-tree.h"
     51  1.1  mrg #include "target.h"
     52  1.1  mrg #include "cfgloop.h"
     53  1.1  mrg #include "tree-cfg.h"
     54  1.1  mrg #include "omp-offload.h"
     55  1.1  mrg #include "attribs.h"
     56  1.1  mrg #include "targhooks.h"
     57  1.1  mrg #include "diagnostic-core.h"
     58  1.1  mrg 
     59  1.1  mrg /* Loop structure of the function.  The entire function is described as
     60  1.1  mrg    a NULL loop.  */
     61  1.1  mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:struct parallel'.  */
     62  1.1  mrg 
     63  1.1  mrg struct parallel_g
     64  1.1  mrg {
     65  1.1  mrg   /* Parent parallel.  */
     66  1.1  mrg   parallel_g *parent;
     67  1.1  mrg 
     68  1.1  mrg   /* Next sibling parallel.  */
     69  1.1  mrg   parallel_g *next;
     70  1.1  mrg 
     71  1.1  mrg   /* First child parallel.  */
     72  1.1  mrg   parallel_g *inner;
     73  1.1  mrg 
     74  1.1  mrg   /* Partitioning mask of the parallel.  */
     75  1.1  mrg   unsigned mask;
     76  1.1  mrg 
     77  1.1  mrg   /* Partitioning used within inner parallels. */
     78  1.1  mrg   unsigned inner_mask;
     79  1.1  mrg 
     80  1.1  mrg   /* Location of parallel forked and join.  The forked is the first
     81  1.1  mrg      block in the parallel and the join is the first block after of
     82  1.1  mrg      the partition.  */
     83  1.1  mrg   basic_block forked_block;
     84  1.1  mrg   basic_block join_block;
     85  1.1  mrg 
     86  1.1  mrg   gimple *forked_stmt;
     87  1.1  mrg   gimple *join_stmt;
     88  1.1  mrg 
     89  1.1  mrg   gimple *fork_stmt;
     90  1.1  mrg   gimple *joining_stmt;
     91  1.1  mrg 
     92  1.1  mrg   /* Basic blocks in this parallel, but not in child parallels.  The
     93  1.1  mrg      FORKED and JOINING blocks are in the partition.  The FORK and JOIN
     94  1.1  mrg      blocks are not.  */
     95  1.1  mrg   auto_vec<basic_block> blocks;
     96  1.1  mrg 
     97  1.1  mrg   tree record_type;
     98  1.1  mrg   tree sender_decl;
     99  1.1  mrg   tree receiver_decl;
    100  1.1  mrg 
    101  1.1  mrg public:
    102  1.1  mrg   parallel_g (parallel_g *parent, unsigned mode);
    103  1.1  mrg   ~parallel_g ();
    104  1.1  mrg };
    105  1.1  mrg 
    106  1.1  mrg /* Constructor links the new parallel into it's parent's chain of
    107  1.1  mrg    children.  */
    108  1.1  mrg 
    109  1.1  mrg parallel_g::parallel_g (parallel_g *parent_, unsigned mask_)
    110  1.1  mrg   :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0)
    111  1.1  mrg {
    112  1.1  mrg   forked_block = join_block = 0;
    113  1.1  mrg   forked_stmt = join_stmt = NULL;
    114  1.1  mrg   fork_stmt = joining_stmt = NULL;
    115  1.1  mrg 
    116  1.1  mrg   record_type = NULL_TREE;
    117  1.1  mrg   sender_decl = NULL_TREE;
    118  1.1  mrg   receiver_decl = NULL_TREE;
    119  1.1  mrg 
    120  1.1  mrg   if (parent)
    121  1.1  mrg     {
    122  1.1  mrg       next = parent->inner;
    123  1.1  mrg       parent->inner = this;
    124  1.1  mrg     }
    125  1.1  mrg }
    126  1.1  mrg 
    127  1.1  mrg parallel_g::~parallel_g ()
    128  1.1  mrg {
    129  1.1  mrg   delete inner;
    130  1.1  mrg   delete next;
    131  1.1  mrg }
    132  1.1  mrg 
    133  1.1  mrg static bool
    134  1.1  mrg local_var_based_p (tree decl)
    135  1.1  mrg {
    136  1.1  mrg   switch (TREE_CODE (decl))
    137  1.1  mrg     {
    138  1.1  mrg     case VAR_DECL:
    139  1.1  mrg       return !is_global_var (decl);
    140  1.1  mrg 
    141  1.1  mrg     case COMPONENT_REF:
    142  1.1  mrg     case BIT_FIELD_REF:
    143  1.1  mrg     case ARRAY_REF:
    144  1.1  mrg       return local_var_based_p (TREE_OPERAND (decl, 0));
    145  1.1  mrg 
    146  1.1  mrg     default:
    147  1.1  mrg       return false;
    148  1.1  mrg     }
    149  1.1  mrg }
    150  1.1  mrg 
    151  1.1  mrg /* Map of basic blocks to gimple stmts.  */
    152  1.1  mrg typedef hash_map<basic_block, gimple *> bb_stmt_map_t;
    153  1.1  mrg 
    154  1.1  mrg /* Calls to OpenACC routines are made by all workers/wavefronts/warps, since
    155  1.1  mrg    the routine likely contains partitioned loops (else will do its own
    156  1.1  mrg    neutering and variable propagation). Return TRUE if a function call CALL
    157  1.1  mrg    should be made in (worker) single mode instead, rather than redundant
    158  1.1  mrg    mode.  */
    159  1.1  mrg 
    160  1.1  mrg static bool
    161  1.1  mrg omp_sese_active_worker_call (gcall *call)
    162  1.1  mrg {
    163  1.1  mrg #define GOMP_DIM_SEQ GOMP_DIM_MAX
    164  1.1  mrg   tree fndecl = gimple_call_fndecl (call);
    165  1.1  mrg 
    166  1.1  mrg   if (!fndecl)
    167  1.1  mrg     return true;
    168  1.1  mrg 
    169  1.1  mrg   tree attrs = oacc_get_fn_attrib (fndecl);
    170  1.1  mrg 
    171  1.1  mrg   if (!attrs)
    172  1.1  mrg     return true;
    173  1.1  mrg 
    174  1.1  mrg   int level = oacc_fn_attrib_level (attrs);
    175  1.1  mrg 
    176  1.1  mrg   /* Neither regular functions nor "seq" routines should be run by all threads
    177  1.1  mrg      in worker-single mode.  */
    178  1.1  mrg   return level == -1 || level == GOMP_DIM_SEQ;
    179  1.1  mrg #undef GOMP_DIM_SEQ
    180  1.1  mrg }
    181  1.1  mrg 
    182  1.1  mrg /* Split basic blocks such that each forked and join unspecs are at
    183  1.1  mrg    the start of their basic blocks.  Thus afterwards each block will
    184  1.1  mrg    have a single partitioning mode.  We also do the same for return
    185  1.1  mrg    insns, as they are executed by every thread.  Return the
    186  1.1  mrg    partitioning mode of the function as a whole.  Populate MAP with
    187  1.1  mrg    head and tail blocks.  We also clear the BB visited flag, which is
    188  1.1  mrg    used when finding partitions.  */
    189  1.1  mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_split_blocks'.  */
    190  1.1  mrg 
    191  1.1  mrg static void
    192  1.1  mrg omp_sese_split_blocks (bb_stmt_map_t *map)
    193  1.1  mrg {
    194  1.1  mrg   auto_vec<gimple *> worklist;
    195  1.1  mrg   basic_block block;
    196  1.1  mrg 
    197  1.1  mrg   /* Locate all the reorg instructions of interest.  */
    198  1.1  mrg   FOR_ALL_BB_FN (block, cfun)
    199  1.1  mrg     {
    200  1.1  mrg       /* Clear visited flag, for use by parallel locator  */
    201  1.1  mrg       block->flags &= ~BB_VISITED;
    202  1.1  mrg 
    203  1.1  mrg       for (gimple_stmt_iterator gsi = gsi_start_bb (block);
    204  1.1  mrg 	   !gsi_end_p (gsi);
    205  1.1  mrg 	   gsi_next (&gsi))
    206  1.1  mrg 	{
    207  1.1  mrg 	  gimple *stmt = gsi_stmt (gsi);
    208  1.1  mrg 
    209  1.1  mrg 	  if (gimple_call_internal_p (stmt, IFN_UNIQUE))
    210  1.1  mrg 	    {
    211  1.1  mrg 	      enum ifn_unique_kind k = ((enum ifn_unique_kind)
    212  1.1  mrg 		TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
    213  1.1  mrg 
    214  1.1  mrg 	      if (k == IFN_UNIQUE_OACC_JOIN)
    215  1.1  mrg 		worklist.safe_push (stmt);
    216  1.1  mrg 	      else if (k == IFN_UNIQUE_OACC_FORK)
    217  1.1  mrg 		{
    218  1.1  mrg 		  gcc_assert (gsi_one_before_end_p (gsi));
    219  1.1  mrg 		  basic_block forked_block = single_succ (block);
    220  1.1  mrg 		  gimple_stmt_iterator gsi2 = gsi_start_bb (forked_block);
    221  1.1  mrg 
    222  1.1  mrg 		  /* We push a NOP as a placeholder for the "forked" stmt.
    223  1.1  mrg 		     This is then recognized in omp_sese_find_par.  */
    224  1.1  mrg 		  gimple *nop = gimple_build_nop ();
    225  1.1  mrg 		  gsi_insert_before (&gsi2, nop, GSI_SAME_STMT);
    226  1.1  mrg 
    227  1.1  mrg 		  worklist.safe_push (nop);
    228  1.1  mrg 		}
    229  1.1  mrg 	    }
    230  1.1  mrg 	  else if (gimple_code (stmt) == GIMPLE_RETURN
    231  1.1  mrg 		   || gimple_code (stmt) == GIMPLE_COND
    232  1.1  mrg 		   || gimple_code (stmt) == GIMPLE_SWITCH
    233  1.1  mrg 		   || (gimple_code (stmt) == GIMPLE_CALL
    234  1.1  mrg 		       && !gimple_call_internal_p (stmt)
    235  1.1  mrg 		       && !omp_sese_active_worker_call (as_a <gcall *> (stmt))))
    236  1.1  mrg 	    worklist.safe_push (stmt);
    237  1.1  mrg 	  else if (is_gimple_assign (stmt))
    238  1.1  mrg 	    {
    239  1.1  mrg 	      tree lhs = gimple_assign_lhs (stmt);
    240  1.1  mrg 
    241  1.1  mrg 	      /* Force assignments to components/fields/elements of local
    242  1.1  mrg 		 aggregates into fully-partitioned (redundant) mode.  This
    243  1.1  mrg 		 avoids having to broadcast the whole aggregate.  The RHS of
    244  1.1  mrg 		 the assignment will be propagated using the normal
    245  1.1  mrg 		 mechanism.  */
    246  1.1  mrg 
    247  1.1  mrg 	      switch (TREE_CODE (lhs))
    248  1.1  mrg 		{
    249  1.1  mrg 		case COMPONENT_REF:
    250  1.1  mrg 		case BIT_FIELD_REF:
    251  1.1  mrg 		case ARRAY_REF:
    252  1.1  mrg 		  {
    253  1.1  mrg 		    tree aggr = TREE_OPERAND (lhs, 0);
    254  1.1  mrg 
    255  1.1  mrg 		    if (local_var_based_p (aggr))
    256  1.1  mrg 		      worklist.safe_push (stmt);
    257  1.1  mrg 		  }
    258  1.1  mrg 		  break;
    259  1.1  mrg 
    260  1.1  mrg 		default:
    261  1.1  mrg 		  ;
    262  1.1  mrg 		}
    263  1.1  mrg 	    }
    264  1.1  mrg 	}
    265  1.1  mrg     }
    266  1.1  mrg 
    267  1.1  mrg   /* Split blocks on the worklist.  */
    268  1.1  mrg   unsigned ix;
    269  1.1  mrg   gimple *stmt;
    270  1.1  mrg 
    271  1.1  mrg   for (ix = 0; worklist.iterate (ix, &stmt); ix++)
    272  1.1  mrg     {
    273  1.1  mrg       basic_block block = gimple_bb (stmt);
    274  1.1  mrg 
    275  1.1  mrg       if (gimple_code (stmt) == GIMPLE_COND)
    276  1.1  mrg 	{
    277  1.1  mrg 	  gcond *orig_cond = as_a <gcond *> (stmt);
    278  1.1  mrg 	  tree_code code = gimple_expr_code (orig_cond);
    279  1.1  mrg 	  tree pred = make_ssa_name (boolean_type_node);
    280  1.1  mrg 	  gimple *asgn = gimple_build_assign (pred, code,
    281  1.1  mrg 			   gimple_cond_lhs (orig_cond),
    282  1.1  mrg 			   gimple_cond_rhs (orig_cond));
    283  1.1  mrg 	  gcond *new_cond
    284  1.1  mrg 	    = gimple_build_cond (NE_EXPR, pred, boolean_false_node,
    285  1.1  mrg 				 gimple_cond_true_label (orig_cond),
    286  1.1  mrg 				 gimple_cond_false_label (orig_cond));
    287  1.1  mrg 
    288  1.1  mrg 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
    289  1.1  mrg 	  gsi_insert_before (&gsi, asgn, GSI_SAME_STMT);
    290  1.1  mrg 	  gsi_replace (&gsi, new_cond, true);
    291  1.1  mrg 
    292  1.1  mrg 	  edge e = split_block (block, asgn);
    293  1.1  mrg 	  block = e->dest;
    294  1.1  mrg 	  map->get_or_insert (block) = new_cond;
    295  1.1  mrg 	}
    296  1.1  mrg       else if ((gimple_code (stmt) == GIMPLE_CALL
    297  1.1  mrg 		&& !gimple_call_internal_p (stmt))
    298  1.1  mrg 	       || is_gimple_assign (stmt))
    299  1.1  mrg 	{
    300  1.1  mrg 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
    301  1.1  mrg 	  gsi_prev (&gsi);
    302  1.1  mrg 
    303  1.1  mrg 	  edge call = split_block (block, gsi_stmt (gsi));
    304  1.1  mrg 
    305  1.1  mrg 	  gimple *call_stmt = gsi_stmt (gsi_start_bb (call->dest));
    306  1.1  mrg 
    307  1.1  mrg 	  edge call_to_ret = split_block (call->dest, call_stmt);
    308  1.1  mrg 
    309  1.1  mrg 	  map->get_or_insert (call_to_ret->src) = call_stmt;
    310  1.1  mrg 	}
    311  1.1  mrg       else
    312  1.1  mrg 	{
    313  1.1  mrg 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
    314  1.1  mrg 	  gsi_prev (&gsi);
    315  1.1  mrg 
    316  1.1  mrg 	  if (gsi_end_p (gsi))
    317  1.1  mrg 	    map->get_or_insert (block) = stmt;
    318  1.1  mrg 	  else
    319  1.1  mrg 	    {
    320  1.1  mrg 	      /* Split block before insn. The insn is in the new block.  */
    321  1.1  mrg 	      edge e = split_block (block, gsi_stmt (gsi));
    322  1.1  mrg 
    323  1.1  mrg 	      block = e->dest;
    324  1.1  mrg 	      map->get_or_insert (block) = stmt;
    325  1.1  mrg 	    }
    326  1.1  mrg 	}
    327  1.1  mrg     }
    328  1.1  mrg }
    329  1.1  mrg 
    330  1.1  mrg static const char *
    331  1.1  mrg mask_name (unsigned mask)
    332  1.1  mrg {
    333  1.1  mrg   switch (mask)
    334  1.1  mrg     {
    335  1.1  mrg     case 0: return "gang redundant";
    336  1.1  mrg     case 1: return "gang partitioned";
    337  1.1  mrg     case 2: return "worker partitioned";
    338  1.1  mrg     case 3: return "gang+worker partitioned";
    339  1.1  mrg     case 4: return "vector partitioned";
    340  1.1  mrg     case 5: return "gang+vector partitioned";
    341  1.1  mrg     case 6: return "worker+vector partitioned";
    342  1.1  mrg     case 7: return "fully partitioned";
    343  1.1  mrg     default: return "<illegal>";
    344  1.1  mrg     }
    345  1.1  mrg }
    346  1.1  mrg 
    347  1.1  mrg /* Dump this parallel and all its inner parallels.  */
    348  1.1  mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_dump_pars'.  */
    349  1.1  mrg 
    350  1.1  mrg static void
    351  1.1  mrg omp_sese_dump_pars (parallel_g *par, unsigned depth)
    352  1.1  mrg {
    353  1.1  mrg   fprintf (dump_file, "%u: mask %d (%s) head=%d, tail=%d\n",
    354  1.1  mrg 	   depth, par->mask, mask_name (par->mask),
    355  1.1  mrg 	   par->forked_block ? par->forked_block->index : -1,
    356  1.1  mrg 	   par->join_block ? par->join_block->index : -1);
    357  1.1  mrg 
    358  1.1  mrg   fprintf (dump_file, "    blocks:");
    359  1.1  mrg 
    360  1.1  mrg   basic_block block;
    361  1.1  mrg   for (unsigned ix = 0; par->blocks.iterate (ix, &block); ix++)
    362  1.1  mrg     fprintf (dump_file, " %d", block->index);
    363  1.1  mrg   fprintf (dump_file, "\n");
    364  1.1  mrg   if (par->inner)
    365  1.1  mrg     omp_sese_dump_pars (par->inner, depth + 1);
    366  1.1  mrg 
    367  1.1  mrg   if (par->next)
    368  1.1  mrg     omp_sese_dump_pars (par->next, depth);
    369  1.1  mrg }
    370  1.1  mrg 
    371  1.1  mrg /* If BLOCK contains a fork/join marker, process it to create or
    372  1.1  mrg    terminate a loop structure.  Add this block to the current loop,
    373  1.1  mrg    and then walk successor blocks.   */
    374  1.1  mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_find_par'.  */
    375  1.1  mrg 
    376  1.1  mrg static parallel_g *
    377  1.1  mrg omp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block)
    378  1.1  mrg {
    379  1.1  mrg   if (block->flags & BB_VISITED)
    380  1.1  mrg     return par;
    381  1.1  mrg   block->flags |= BB_VISITED;
    382  1.1  mrg 
    383  1.1  mrg   if (gimple **stmtp = map->get (block))
    384  1.1  mrg     {
    385  1.1  mrg       gimple *stmt = *stmtp;
    386  1.1  mrg 
    387  1.1  mrg       if (gimple_code (stmt) == GIMPLE_COND
    388  1.1  mrg 	  || gimple_code (stmt) == GIMPLE_SWITCH
    389  1.1  mrg 	  || gimple_code (stmt) == GIMPLE_RETURN
    390  1.1  mrg 	  || (gimple_code (stmt) == GIMPLE_CALL
    391  1.1  mrg 	      && !gimple_call_internal_p (stmt))
    392  1.1  mrg 	  || is_gimple_assign (stmt))
    393  1.1  mrg 	{
    394  1.1  mrg 	  /* A single block that is forced to be at the maximum partition
    395  1.1  mrg 	     level.  Make a singleton par for it.  */
    396  1.1  mrg 	  par = new parallel_g (par, GOMP_DIM_MASK (GOMP_DIM_GANG)
    397  1.1  mrg 				   | GOMP_DIM_MASK (GOMP_DIM_WORKER)
    398  1.1  mrg 				   | GOMP_DIM_MASK (GOMP_DIM_VECTOR));
    399  1.1  mrg 	  par->forked_block = block;
    400  1.1  mrg 	  par->forked_stmt = stmt;
    401  1.1  mrg 	  par->blocks.safe_push (block);
    402  1.1  mrg 	  par = par->parent;
    403  1.1  mrg 	  goto walk_successors;
    404  1.1  mrg 	}
    405  1.1  mrg       else if (gimple_nop_p (stmt))
    406  1.1  mrg 	{
    407  1.1  mrg 	  basic_block pred = single_pred (block);
    408  1.1  mrg 	  gcc_assert (pred);
    409  1.1  mrg 	  gimple_stmt_iterator gsi = gsi_last_bb (pred);
    410  1.1  mrg 	  gimple *final_stmt = gsi_stmt (gsi);
    411  1.1  mrg 
    412  1.1  mrg 	  if (gimple_call_internal_p (final_stmt, IFN_UNIQUE))
    413  1.1  mrg 	    {
    414  1.1  mrg 	      gcall *call = as_a <gcall *> (final_stmt);
    415  1.1  mrg 	      enum ifn_unique_kind k = ((enum ifn_unique_kind)
    416  1.1  mrg 		TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
    417  1.1  mrg 
    418  1.1  mrg 	      if (k == IFN_UNIQUE_OACC_FORK)
    419  1.1  mrg 		{
    420  1.1  mrg 		  HOST_WIDE_INT dim
    421  1.1  mrg 		    = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
    422  1.1  mrg 		  unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
    423  1.1  mrg 
    424  1.1  mrg 		  par = new parallel_g (par, mask);
    425  1.1  mrg 		  par->forked_block = block;
    426  1.1  mrg 		  par->forked_stmt = final_stmt;
    427  1.1  mrg 		  par->fork_stmt = stmt;
    428  1.1  mrg 		}
    429  1.1  mrg 	      else
    430  1.1  mrg 		gcc_unreachable ();
    431  1.1  mrg 	    }
    432  1.1  mrg 	  else
    433  1.1  mrg 	    gcc_unreachable ();
    434  1.1  mrg 	}
    435  1.1  mrg       else if (gimple_call_internal_p (stmt, IFN_UNIQUE))
    436  1.1  mrg 	{
    437  1.1  mrg 	  gcall *call = as_a <gcall *> (stmt);
    438  1.1  mrg 	  enum ifn_unique_kind k = ((enum ifn_unique_kind)
    439  1.1  mrg 	    TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
    440  1.1  mrg 	  if (k == IFN_UNIQUE_OACC_JOIN)
    441  1.1  mrg 	    {
    442  1.1  mrg 	      HOST_WIDE_INT dim = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
    443  1.1  mrg 	      unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
    444  1.1  mrg 
    445  1.1  mrg 	      gcc_assert (par->mask == mask);
    446  1.1  mrg 	      par->join_block = block;
    447  1.1  mrg 	      par->join_stmt = stmt;
    448  1.1  mrg 	      par = par->parent;
    449  1.1  mrg 	    }
    450  1.1  mrg 	  else
    451  1.1  mrg 	    gcc_unreachable ();
    452  1.1  mrg 	}
    453  1.1  mrg       else
    454  1.1  mrg 	gcc_unreachable ();
    455  1.1  mrg     }
    456  1.1  mrg 
    457  1.1  mrg   if (par)
    458  1.1  mrg     /* Add this block onto the current loop's list of blocks.  */
    459  1.1  mrg     par->blocks.safe_push (block);
    460  1.1  mrg   else
    461  1.1  mrg     /* This must be the entry block.  Create a NULL parallel.  */
    462  1.1  mrg     par = new parallel_g (0, 0);
    463  1.1  mrg 
    464  1.1  mrg walk_successors:
    465  1.1  mrg   /* Walk successor blocks.  */
    466  1.1  mrg   edge e;
    467  1.1  mrg   edge_iterator ei;
    468  1.1  mrg 
    469  1.1  mrg   FOR_EACH_EDGE (e, ei, block->succs)
    470  1.1  mrg     omp_sese_find_par (map, par, e->dest);
    471  1.1  mrg 
    472  1.1  mrg   return par;
    473  1.1  mrg }
    474  1.1  mrg 
    475  1.1  mrg /* DFS walk the CFG looking for fork & join markers.  Construct
    476  1.1  mrg    loop structures as we go.  MAP is a mapping of basic blocks
    477  1.1  mrg    to head & tail markers, discovered when splitting blocks.  This
    478  1.1  mrg    speeds up the discovery.  We rely on the BB visited flag having
    479  1.1  mrg    been cleared when splitting blocks.  */
    480  1.1  mrg /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_discover_pars'.  */
    481  1.1  mrg 
    482  1.1  mrg static parallel_g *
    483  1.1  mrg omp_sese_discover_pars (bb_stmt_map_t *map)
    484  1.1  mrg {
    485  1.1  mrg   basic_block block;
    486  1.1  mrg 
    487  1.1  mrg   /* Mark exit blocks as visited.  */
    488  1.1  mrg   block = EXIT_BLOCK_PTR_FOR_FN (cfun);
    489  1.1  mrg   block->flags |= BB_VISITED;
    490  1.1  mrg 
    491  1.1  mrg   /* And entry block as not.  */
    492  1.1  mrg   block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
    493  1.1  mrg   block->flags &= ~BB_VISITED;
    494  1.1  mrg 
    495  1.1  mrg   parallel_g *par = omp_sese_find_par (map, 0, block);
    496  1.1  mrg 
    497  1.1  mrg   if (dump_file)
    498  1.1  mrg     {
    499  1.1  mrg       fprintf (dump_file, "\nLoops\n");
    500  1.1  mrg       omp_sese_dump_pars (par, 0);
    501  1.1  mrg       fprintf (dump_file, "\n");
    502  1.1  mrg     }
    503  1.1  mrg 
    504  1.1  mrg   return par;
    505  1.1  mrg }
    506  1.1  mrg 
    507  1.1  mrg static void
    508  1.1  mrg populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single,
    509  1.1  mrg 			      bitmap vector_single, unsigned outer_mask,
    510  1.1  mrg 			      int depth)
    511  1.1  mrg {
    512  1.1  mrg   unsigned mask = outer_mask | par->mask;
    513  1.1  mrg 
    514  1.1  mrg   basic_block block;
    515  1.1  mrg 
    516  1.1  mrg   for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
    517  1.1  mrg     {
    518  1.1  mrg       if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
    519  1.1  mrg 	bitmap_set_bit (worker_single, block->index);
    520  1.1  mrg 
    521  1.1  mrg       if ((mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) == 0)
    522  1.1  mrg 	bitmap_set_bit (vector_single, block->index);
    523  1.1  mrg     }
    524  1.1  mrg 
    525  1.1  mrg   if (par->inner)
    526  1.1  mrg     populate_single_mode_bitmaps (par->inner, worker_single, vector_single,
    527  1.1  mrg 				  mask, depth + 1);
    528  1.1  mrg   if (par->next)
    529  1.1  mrg     populate_single_mode_bitmaps (par->next, worker_single, vector_single,
    530  1.1  mrg 				  outer_mask, depth);
    531  1.1  mrg }
    532  1.1  mrg 
    533  1.1  mrg /* A map from SSA names or var decls to record fields.  */
    534  1.1  mrg 
    535  1.1  mrg typedef hash_map<tree, tree> field_map_t;
    536  1.1  mrg 
    537  1.1  mrg /* For each propagation record type, this is a map from SSA names or var decls
    538  1.1  mrg    to propagate, to the field in the record type that should be used for
    539  1.1  mrg    transmission and reception.  */
    540  1.1  mrg 
    541  1.1  mrg typedef hash_map<tree, field_map_t> record_field_map_t;
    542  1.1  mrg 
    543  1.1  mrg static void
    544  1.1  mrg install_var_field (tree var, tree record_type, field_map_t *fields)
    545  1.1  mrg {
    546  1.1  mrg   tree name;
    547  1.1  mrg   char tmp[20];
    548  1.1  mrg 
    549  1.1  mrg   if (TREE_CODE (var) == SSA_NAME)
    550  1.1  mrg     {
    551  1.1  mrg       name = SSA_NAME_IDENTIFIER (var);
    552  1.1  mrg       if (!name)
    553  1.1  mrg 	{
    554  1.1  mrg 	  sprintf (tmp, "_%u", (unsigned) SSA_NAME_VERSION (var));
    555  1.1  mrg 	  name = get_identifier (tmp);
    556  1.1  mrg 	}
    557  1.1  mrg     }
    558  1.1  mrg   else if (TREE_CODE (var) == VAR_DECL)
    559  1.1  mrg     {
    560  1.1  mrg       name = DECL_NAME (var);
    561  1.1  mrg       if (!name)
    562  1.1  mrg 	{
    563  1.1  mrg 	  sprintf (tmp, "D_%u", (unsigned) DECL_UID (var));
    564  1.1  mrg 	  name = get_identifier (tmp);
    565  1.1  mrg 	}
    566  1.1  mrg     }
    567  1.1  mrg   else
    568  1.1  mrg     gcc_unreachable ();
    569  1.1  mrg 
    570  1.1  mrg   gcc_assert (!fields->get (var));
    571  1.1  mrg 
    572  1.1  mrg   tree type = TREE_TYPE (var);
    573  1.1  mrg 
    574  1.1  mrg   if (POINTER_TYPE_P (type)
    575  1.1  mrg       && TYPE_RESTRICT (type))
    576  1.1  mrg     type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT);
    577  1.1  mrg 
    578  1.1  mrg   tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, name, type);
    579  1.1  mrg 
    580  1.1  mrg   if (TREE_CODE (var) == VAR_DECL && type == TREE_TYPE (var))
    581  1.1  mrg     {
    582  1.1  mrg       SET_DECL_ALIGN (field, DECL_ALIGN (var));
    583  1.1  mrg       DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
    584  1.1  mrg       TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
    585  1.1  mrg     }
    586  1.1  mrg   else
    587  1.1  mrg     SET_DECL_ALIGN (field, TYPE_ALIGN (type));
    588  1.1  mrg 
    589  1.1  mrg   fields->put (var, field);
    590  1.1  mrg 
    591  1.1  mrg   insert_field_into_struct (record_type, field);
    592  1.1  mrg }
    593  1.1  mrg 
    594  1.1  mrg /* Sets of SSA_NAMES or VAR_DECLs to propagate.  */
    595  1.1  mrg typedef hash_set<tree> propagation_set;
    596  1.1  mrg 
    597  1.1  mrg static void
    598  1.1  mrg find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask,
    599  1.1  mrg 			     bitmap worker_single, bitmap vector_single,
    600  1.1  mrg 			     vec<propagation_set *> *prop_set)
    601  1.1  mrg {
    602  1.1  mrg   unsigned mask = outer_mask | par->mask;
    603  1.1  mrg 
    604  1.1  mrg   if (par->inner)
    605  1.1  mrg     find_ssa_names_to_propagate (par->inner, mask, worker_single,
    606  1.1  mrg 				 vector_single, prop_set);
    607  1.1  mrg   if (par->next)
    608  1.1  mrg     find_ssa_names_to_propagate (par->next, outer_mask, worker_single,
    609  1.1  mrg 				 vector_single, prop_set);
    610  1.1  mrg 
    611  1.1  mrg   if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
    612  1.1  mrg     {
    613  1.1  mrg       basic_block block;
    614  1.1  mrg       int ix;
    615  1.1  mrg 
    616  1.1  mrg       for (ix = 0; par->blocks.iterate (ix, &block); ix++)
    617  1.1  mrg 	{
    618  1.1  mrg 	  for (gphi_iterator psi = gsi_start_phis (block);
    619  1.1  mrg 	       !gsi_end_p (psi); gsi_next (&psi))
    620  1.1  mrg 	    {
    621  1.1  mrg 	      gphi *phi = psi.phi ();
    622  1.1  mrg 	      use_operand_p use;
    623  1.1  mrg 	      ssa_op_iter iter;
    624  1.1  mrg 
    625  1.1  mrg 	      FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE)
    626  1.1  mrg 		{
    627  1.1  mrg 		  tree var = USE_FROM_PTR (use);
    628  1.1  mrg 
    629  1.1  mrg 		  if (TREE_CODE (var) != SSA_NAME)
    630  1.1  mrg 		    continue;
    631  1.1  mrg 
    632  1.1  mrg 		  gimple *def_stmt = SSA_NAME_DEF_STMT (var);
    633  1.1  mrg 
    634  1.1  mrg 		  if (gimple_nop_p (def_stmt))
    635  1.1  mrg 		    continue;
    636  1.1  mrg 
    637  1.1  mrg 		  basic_block def_bb = gimple_bb (def_stmt);
    638  1.1  mrg 
    639  1.1  mrg 		  if (bitmap_bit_p (worker_single, def_bb->index))
    640  1.1  mrg 		    {
    641  1.1  mrg 		      if (!(*prop_set)[def_bb->index])
    642  1.1  mrg 			(*prop_set)[def_bb->index] = new propagation_set;
    643  1.1  mrg 
    644  1.1  mrg 		      propagation_set *ws_prop = (*prop_set)[def_bb->index];
    645  1.1  mrg 
    646  1.1  mrg 		      ws_prop->add (var);
    647  1.1  mrg 		    }
    648  1.1  mrg 		}
    649  1.1  mrg 	    }
    650  1.1  mrg 
    651  1.1  mrg 	  for (gimple_stmt_iterator gsi = gsi_start_bb (block);
    652  1.1  mrg 	       !gsi_end_p (gsi); gsi_next (&gsi))
    653  1.1  mrg 	    {
    654  1.1  mrg 	      use_operand_p use;
    655  1.1  mrg 	      ssa_op_iter iter;
    656  1.1  mrg 	      gimple *stmt = gsi_stmt (gsi);
    657  1.1  mrg 
    658  1.1  mrg 	      FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
    659  1.1  mrg 		{
    660  1.1  mrg 		  tree var = USE_FROM_PTR (use);
    661  1.1  mrg 
    662  1.1  mrg 		  gimple *def_stmt = SSA_NAME_DEF_STMT (var);
    663  1.1  mrg 
    664  1.1  mrg 		  if (gimple_nop_p (def_stmt))
    665  1.1  mrg 		    continue;
    666  1.1  mrg 
    667  1.1  mrg 		  basic_block def_bb = gimple_bb (def_stmt);
    668  1.1  mrg 
    669  1.1  mrg 		  if (bitmap_bit_p (worker_single, def_bb->index))
    670  1.1  mrg 		    {
    671  1.1  mrg 		      if (!(*prop_set)[def_bb->index])
    672  1.1  mrg 			(*prop_set)[def_bb->index] = new propagation_set;
    673  1.1  mrg 
    674  1.1  mrg 		      propagation_set *ws_prop = (*prop_set)[def_bb->index];
    675  1.1  mrg 
    676  1.1  mrg 		      ws_prop->add (var);
    677  1.1  mrg 		    }
    678  1.1  mrg 		}
    679  1.1  mrg 	    }
    680  1.1  mrg 	}
    681  1.1  mrg     }
    682  1.1  mrg }
    683  1.1  mrg 
    684  1.1  mrg /* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a
    685  1.1  mrg    statement.  */
    686  1.1  mrg 
    687  1.1  mrg static tree
    688  1.1  mrg find_partitioned_var_uses_1 (tree *node, int *, void *data)
    689  1.1  mrg {
    690  1.1  mrg   walk_stmt_info *wi = (walk_stmt_info *) data;
    691  1.1  mrg   hash_set<tree> *partitioned_var_uses = (hash_set<tree> *) wi->info;
    692  1.1  mrg 
    693  1.1  mrg   if (!wi->is_lhs && VAR_P (*node))
    694  1.1  mrg     partitioned_var_uses->add (*node);
    695  1.1  mrg 
    696  1.1  mrg   return NULL_TREE;
    697  1.1  mrg }
    698  1.1  mrg 
    699  1.1  mrg static void
    700  1.1  mrg find_partitioned_var_uses (parallel_g *par, unsigned outer_mask,
    701  1.1  mrg 			   hash_set<tree> *partitioned_var_uses)
    702  1.1  mrg {
    703  1.1  mrg   unsigned mask = outer_mask | par->mask;
    704  1.1  mrg 
    705  1.1  mrg   if (par->inner)
    706  1.1  mrg     find_partitioned_var_uses (par->inner, mask, partitioned_var_uses);
    707  1.1  mrg   if (par->next)
    708  1.1  mrg     find_partitioned_var_uses (par->next, outer_mask, partitioned_var_uses);
    709  1.1  mrg 
    710  1.1  mrg   if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
    711  1.1  mrg     {
    712  1.1  mrg       basic_block block;
    713  1.1  mrg       int ix;
    714  1.1  mrg 
    715  1.1  mrg       for (ix = 0; par->blocks.iterate (ix, &block); ix++)
    716  1.1  mrg 	for (gimple_stmt_iterator gsi = gsi_start_bb (block);
    717  1.1  mrg 	     !gsi_end_p (gsi); gsi_next (&gsi))
    718  1.1  mrg 	  {
    719  1.1  mrg 	    walk_stmt_info wi;
    720  1.1  mrg 	    memset (&wi, 0, sizeof (wi));
    721  1.1  mrg 	    wi.info = (void *) partitioned_var_uses;
    722  1.1  mrg 	    walk_gimple_stmt (&gsi, NULL, find_partitioned_var_uses_1, &wi);
    723  1.1  mrg 	  }
    724  1.1  mrg     }
    725  1.1  mrg }
    726  1.1  mrg 
    727  1.1  mrg /* Gang-private variables (typically placed in a GPU's shared memory) do not
    728  1.1  mrg    need to be processed by the worker-propagation mechanism.  Populate the
    729  1.1  mrg    GANG_PRIVATE_VARS set with any such variables found in the current
    730  1.1  mrg    function.  */
    731  1.1  mrg 
    732  1.1  mrg static void
    733  1.1  mrg find_gang_private_vars (hash_set<tree> *gang_private_vars)
    734  1.1  mrg {
    735  1.1  mrg   basic_block block;
    736  1.1  mrg 
    737  1.1  mrg   FOR_EACH_BB_FN (block, cfun)
    738  1.1  mrg     {
    739  1.1  mrg       for (gimple_stmt_iterator gsi = gsi_start_bb (block);
    740  1.1  mrg 	   !gsi_end_p (gsi);
    741  1.1  mrg 	   gsi_next (&gsi))
    742  1.1  mrg 	{
    743  1.1  mrg 	  gimple *stmt = gsi_stmt (gsi);
    744  1.1  mrg 
    745  1.1  mrg 	  if (gimple_call_internal_p (stmt, IFN_UNIQUE))
    746  1.1  mrg 	    {
    747  1.1  mrg 	      enum ifn_unique_kind k = ((enum ifn_unique_kind)
    748  1.1  mrg 		TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
    749  1.1  mrg 	      if (k == IFN_UNIQUE_OACC_PRIVATE)
    750  1.1  mrg 		{
    751  1.1  mrg 		  HOST_WIDE_INT level
    752  1.1  mrg 		    = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
    753  1.1  mrg 		  if (level != GOMP_DIM_GANG)
    754  1.1  mrg 		    continue;
    755  1.1  mrg 		  for (unsigned i = 3; i < gimple_call_num_args (stmt); i++)
    756  1.1  mrg 		    {
    757  1.1  mrg 		      tree arg = gimple_call_arg (stmt, i);
    758  1.1  mrg 		      gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
    759  1.1  mrg 		      tree decl = TREE_OPERAND (arg, 0);
    760  1.1  mrg 		      gang_private_vars->add (decl);
    761  1.1  mrg 		    }
    762  1.1  mrg 		}
    763  1.1  mrg 	    }
    764  1.1  mrg 	}
    765  1.1  mrg     }
    766  1.1  mrg }
    767  1.1  mrg 
    768  1.1  mrg static void
    769  1.1  mrg find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask,
    770  1.1  mrg 			      hash_set<tree> *partitioned_var_uses,
    771  1.1  mrg 			      hash_set<tree> *gang_private_vars,
    772  1.1  mrg 			      bitmap writes_gang_private,
    773  1.1  mrg 			      vec<propagation_set *> *prop_set)
    774  1.1  mrg {
    775  1.1  mrg   unsigned mask = outer_mask | par->mask;
    776  1.1  mrg 
    777  1.1  mrg   if (par->inner)
    778  1.1  mrg     find_local_vars_to_propagate (par->inner, mask, partitioned_var_uses,
    779  1.1  mrg 				  gang_private_vars, writes_gang_private,
    780  1.1  mrg 				  prop_set);
    781  1.1  mrg   if (par->next)
    782  1.1  mrg     find_local_vars_to_propagate (par->next, outer_mask, partitioned_var_uses,
    783  1.1  mrg 				  gang_private_vars, writes_gang_private,
    784  1.1  mrg 				  prop_set);
    785  1.1  mrg 
    786  1.1  mrg   if (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)))
    787  1.1  mrg     {
    788  1.1  mrg       basic_block block;
    789  1.1  mrg       int ix;
    790  1.1  mrg 
    791  1.1  mrg       for (ix = 0; par->blocks.iterate (ix, &block); ix++)
    792  1.1  mrg 	{
    793  1.1  mrg 	  for (gimple_stmt_iterator gsi = gsi_start_bb (block);
    794  1.1  mrg 	       !gsi_end_p (gsi); gsi_next (&gsi))
    795  1.1  mrg 	    {
    796  1.1  mrg 	      gimple *stmt = gsi_stmt (gsi);
    797  1.1  mrg 	      tree var;
    798  1.1  mrg 	      unsigned i;
    799  1.1  mrg 
    800  1.1  mrg 	      FOR_EACH_LOCAL_DECL (cfun, i, var)
    801  1.1  mrg 		{
    802  1.1  mrg 		  if (!VAR_P (var)
    803  1.1  mrg 		      || is_global_var (var)
    804  1.1  mrg 		      || AGGREGATE_TYPE_P (TREE_TYPE (var))
    805  1.1  mrg 		      || !partitioned_var_uses->contains (var))
    806  1.1  mrg 		    continue;
    807  1.1  mrg 
    808  1.1  mrg 		  if (stmt_may_clobber_ref_p (stmt, var))
    809  1.1  mrg 		    {
    810  1.1  mrg 		      if (dump_file)
    811  1.1  mrg 			{
    812  1.1  mrg 			  fprintf (dump_file, "bb %u: local variable may be "
    813  1.1  mrg 				   "clobbered in %s mode: ", block->index,
    814  1.1  mrg 				   mask_name (mask));
    815  1.1  mrg 			  print_generic_expr (dump_file, var, TDF_SLIM);
    816  1.1  mrg 			  fprintf (dump_file, "\n");
    817  1.1  mrg 			}
    818  1.1  mrg 
    819  1.1  mrg 		      if (gang_private_vars->contains (var))
    820  1.1  mrg 			{
    821  1.1  mrg 			  /* If we write a gang-private variable, we want a
    822  1.1  mrg 			     barrier at the end of the block.  */
    823  1.1  mrg 			  bitmap_set_bit (writes_gang_private, block->index);
    824  1.1  mrg 			  continue;
    825  1.1  mrg 			}
    826  1.1  mrg 
    827  1.1  mrg 		      if (!(*prop_set)[block->index])
    828  1.1  mrg 			(*prop_set)[block->index] = new propagation_set;
    829  1.1  mrg 
    830  1.1  mrg 		      propagation_set *ws_prop
    831  1.1  mrg 			= (*prop_set)[block->index];
    832  1.1  mrg 
    833  1.1  mrg 		      ws_prop->add (var);
    834  1.1  mrg 		    }
    835  1.1  mrg 		}
    836  1.1  mrg 	    }
    837  1.1  mrg 	}
    838  1.1  mrg     }
    839  1.1  mrg }
    840  1.1  mrg 
    841  1.1  mrg /* Transform basic blocks FROM, TO (which may be the same block) into:
    842  1.1  mrg    if (GOACC_single_start ())
    843  1.1  mrg      BLOCK;
    844  1.1  mrg    GOACC_barrier ();
    845  1.1  mrg 			      \  |  /
    846  1.1  mrg 			      +----+
    847  1.1  mrg 			      |    |        (new) predicate block
    848  1.1  mrg 			      +----+--
    849  1.1  mrg    \  |  /   \  |  /	        |t    \
    850  1.1  mrg    +----+    +----+	      +----+  |
    851  1.1  mrg    |	|    |    |	===>  |    |  | f   (old) from block
    852  1.1  mrg    +----+    +----+	      +----+  |
    853  1.1  mrg      |       t/  \f	        |    /
    854  1.1  mrg 			      +----+/
    855  1.1  mrg   (split  (split before       |    |        skip block
    856  1.1  mrg   at end)   condition)	      +----+
    857  1.1  mrg 			      t/  \f
    858  1.1  mrg */
    859  1.1  mrg 
    860  1.1  mrg static void
    861  1.1  mrg worker_single_simple (basic_block from, basic_block to,
    862  1.1  mrg 		      hash_set<tree> *def_escapes_block)
    863  1.1  mrg {
    864  1.1  mrg   gimple *call, *cond;
    865  1.1  mrg   tree lhs, decl;
    866  1.1  mrg   basic_block skip_block;
    867  1.1  mrg 
    868  1.1  mrg   gimple_stmt_iterator gsi = gsi_last_bb (to);
    869  1.1  mrg   if (EDGE_COUNT (to->succs) > 1)
    870  1.1  mrg     {
    871  1.1  mrg       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND);
    872  1.1  mrg       gsi_prev (&gsi);
    873  1.1  mrg     }
    874  1.1  mrg   edge e = split_block (to, gsi_stmt (gsi));
    875  1.1  mrg   skip_block = e->dest;
    876  1.1  mrg 
    877  1.1  mrg   gimple_stmt_iterator start = gsi_after_labels (from);
    878  1.1  mrg 
    879  1.1  mrg   decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_START);
    880  1.1  mrg   lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
    881  1.1  mrg   call = gimple_build_call (decl, 0);
    882  1.1  mrg   gimple_call_set_lhs (call, lhs);
    883  1.1  mrg   gsi_insert_before (&start, call, GSI_NEW_STMT);
    884  1.1  mrg   update_stmt (call);
    885  1.1  mrg 
    886  1.1  mrg   cond = gimple_build_cond (EQ_EXPR, lhs,
    887  1.1  mrg 			    fold_convert_loc (UNKNOWN_LOCATION,
    888  1.1  mrg 					      TREE_TYPE (lhs),
    889  1.1  mrg 					      boolean_true_node),
    890  1.1  mrg 			    NULL_TREE, NULL_TREE);
    891  1.1  mrg   gsi_insert_after (&start, cond, GSI_NEW_STMT);
    892  1.1  mrg   update_stmt (cond);
    893  1.1  mrg 
    894  1.1  mrg   edge et = split_block (from, cond);
    895  1.1  mrg   et->flags &= ~EDGE_FALLTHRU;
    896  1.1  mrg   et->flags |= EDGE_TRUE_VALUE;
    897  1.1  mrg   /* Make the active worker the more probable path so we prefer fallthrough
    898  1.1  mrg      (letting the idle workers jump around more).  */
    899  1.1  mrg   et->probability = profile_probability::likely ();
    900  1.1  mrg 
    901  1.1  mrg   edge ef = make_edge (from, skip_block, EDGE_FALSE_VALUE);
    902  1.1  mrg   ef->probability = et->probability.invert ();
    903  1.1  mrg 
    904  1.1  mrg   basic_block neutered = split_edge (ef);
    905  1.1  mrg   gimple_stmt_iterator neut_gsi = gsi_last_bb (neutered);
    906  1.1  mrg 
    907  1.1  mrg   for (gsi = gsi_start_bb (et->dest); !gsi_end_p (gsi); gsi_next (&gsi))
    908  1.1  mrg     {
    909  1.1  mrg       gimple *stmt = gsi_stmt (gsi);
    910  1.1  mrg       ssa_op_iter iter;
    911  1.1  mrg       tree var;
    912  1.1  mrg 
    913  1.1  mrg       FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF)
    914  1.1  mrg 	{
    915  1.1  mrg 	  if (def_escapes_block->contains (var))
    916  1.1  mrg 	    {
    917  1.1  mrg 	      gphi *join_phi = create_phi_node (NULL_TREE, skip_block);
    918  1.1  mrg 	      create_new_def_for (var, join_phi,
    919  1.1  mrg 				  gimple_phi_result_ptr (join_phi));
    920  1.1  mrg 	      add_phi_arg (join_phi, var, e, UNKNOWN_LOCATION);
    921  1.1  mrg 
    922  1.1  mrg 	      tree neutered_def = copy_ssa_name (var, NULL);
    923  1.1  mrg 	      /* We really want "don't care" or some value representing
    924  1.1  mrg 		 undefined here, but optimizers will probably get rid of the
    925  1.1  mrg 		 zero-assignments anyway.  */
    926  1.1  mrg 	      gassign *zero = gimple_build_assign (neutered_def,
    927  1.1  mrg 				build_zero_cst (TREE_TYPE (neutered_def)));
    928  1.1  mrg 
    929  1.1  mrg 	      gsi_insert_after (&neut_gsi, zero, GSI_CONTINUE_LINKING);
    930  1.1  mrg 	      update_stmt (zero);
    931  1.1  mrg 
    932  1.1  mrg 	      add_phi_arg (join_phi, neutered_def, single_succ_edge (neutered),
    933  1.1  mrg 			   UNKNOWN_LOCATION);
    934  1.1  mrg 	      update_stmt (join_phi);
    935  1.1  mrg 	    }
    936  1.1  mrg 	}
    937  1.1  mrg     }
    938  1.1  mrg }
    939  1.1  mrg 
    940  1.1  mrg static tree
    941  1.1  mrg build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields)
    942  1.1  mrg {
    943  1.1  mrg   tree x = build_simple_mem_ref (receiver_decl);
    944  1.1  mrg   tree field = *fields->get (var);
    945  1.1  mrg   TREE_THIS_NOTRAP (x) = 1;
    946  1.1  mrg   x = omp_build_component_ref (x, field);
    947  1.1  mrg   return x;
    948  1.1  mrg }
    949  1.1  mrg 
    950  1.1  mrg static tree
    951  1.1  mrg build_sender_ref (tree var, tree sender_decl, field_map_t *fields)
    952  1.1  mrg {
    953  1.1  mrg   if (POINTER_TYPE_P (TREE_TYPE (sender_decl)))
    954  1.1  mrg     sender_decl = build_simple_mem_ref (sender_decl);
    955  1.1  mrg   tree field = *fields->get (var);
    956  1.1  mrg   return omp_build_component_ref (sender_decl, field);
    957  1.1  mrg }
    958  1.1  mrg 
    959  1.1  mrg static int
    960  1.1  mrg sort_by_ssa_version_or_uid (const void *p1, const void *p2)
    961  1.1  mrg {
    962  1.1  mrg   const tree t1 = *(const tree *)p1;
    963  1.1  mrg   const tree t2 = *(const tree *)p2;
    964  1.1  mrg 
    965  1.1  mrg   if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) == SSA_NAME)
    966  1.1  mrg     return SSA_NAME_VERSION (t1) - SSA_NAME_VERSION (t2);
    967  1.1  mrg   else if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) != SSA_NAME)
    968  1.1  mrg     return -1;
    969  1.1  mrg   else if (TREE_CODE (t1) != SSA_NAME && TREE_CODE (t2) == SSA_NAME)
    970  1.1  mrg     return 1;
    971  1.1  mrg   else
    972  1.1  mrg     return DECL_UID (t1) - DECL_UID (t2);
    973  1.1  mrg }
    974  1.1  mrg 
    975  1.1  mrg static int
    976  1.1  mrg sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2)
    977  1.1  mrg {
    978  1.1  mrg   const tree t1 = *(const tree *)p1;
    979  1.1  mrg   const tree t2 = *(const tree *)p2;
    980  1.1  mrg   unsigned HOST_WIDE_INT s1 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1)));
    981  1.1  mrg   unsigned HOST_WIDE_INT s2 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2)));
    982  1.1  mrg   if (s1 != s2)
    983  1.1  mrg     return s2 - s1;
    984  1.1  mrg   else
    985  1.1  mrg     return sort_by_ssa_version_or_uid (p1, p2);
    986  1.1  mrg }
    987  1.1  mrg 
    988  1.1  mrg static void
    989  1.1  mrg worker_single_copy (basic_block from, basic_block to,
    990  1.1  mrg 		    hash_set<tree> *def_escapes_block,
    991  1.1  mrg 		    hash_set<tree> *worker_partitioned_uses,
    992  1.1  mrg 		    tree record_type, record_field_map_t *record_field_map,
    993  1.1  mrg 		    unsigned HOST_WIDE_INT placement,
    994  1.1  mrg 		    bool isolate_broadcasts, bool has_gang_private_write)
    995  1.1  mrg {
    996  1.1  mrg   /* If we only have virtual defs, we'll have no record type, but we still want
    997  1.1  mrg      to emit single_copy_start and (particularly) single_copy_end to act as
    998  1.1  mrg      a vdef source on the neutered edge representing memory writes on the
    999  1.1  mrg      non-neutered edge.  */
   1000  1.1  mrg   if (!record_type)
   1001  1.1  mrg     record_type = char_type_node;
   1002  1.1  mrg 
   1003  1.1  mrg   tree sender_decl
   1004  1.1  mrg     = targetm.goacc.create_worker_broadcast_record (record_type, true,
   1005  1.1  mrg 						    ".oacc_worker_o",
   1006  1.1  mrg 						    placement);
   1007  1.1  mrg   tree receiver_decl
   1008  1.1  mrg     = targetm.goacc.create_worker_broadcast_record (record_type, false,
   1009  1.1  mrg 						    ".oacc_worker_i",
   1010  1.1  mrg 						    placement);
   1011  1.1  mrg 
   1012  1.1  mrg   gimple_stmt_iterator gsi = gsi_last_bb (to);
   1013  1.1  mrg   if (EDGE_COUNT (to->succs) > 1)
   1014  1.1  mrg     gsi_prev (&gsi);
   1015  1.1  mrg   edge e = split_block (to, gsi_stmt (gsi));
   1016  1.1  mrg   basic_block barrier_block = e->dest;
   1017  1.1  mrg 
   1018  1.1  mrg   gimple_stmt_iterator start = gsi_after_labels (from);
   1019  1.1  mrg 
   1020  1.1  mrg   tree decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START);
   1021  1.1  mrg 
   1022  1.1  mrg   tree lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
   1023  1.1  mrg 
   1024  1.1  mrg   gimple *call
   1025  1.1  mrg     = gimple_build_call (decl, 1,
   1026  1.1  mrg 			 POINTER_TYPE_P (TREE_TYPE (sender_decl))
   1027  1.1  mrg 			 ? sender_decl : build_fold_addr_expr (sender_decl));
   1028  1.1  mrg   gimple_call_set_lhs (call, lhs);
   1029  1.1  mrg   gsi_insert_before (&start, call, GSI_NEW_STMT);
   1030  1.1  mrg   update_stmt (call);
   1031  1.1  mrg 
   1032  1.1  mrg   /* The shared-memory range for this block overflowed.  Add a barrier before
   1033  1.1  mrg      the GOACC_single_copy_start call.  */
   1034  1.1  mrg   if (isolate_broadcasts)
   1035  1.1  mrg     {
   1036  1.1  mrg       decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
   1037  1.1  mrg       gimple *acc_bar = gimple_build_call (decl, 0);
   1038  1.1  mrg       gsi_insert_before (&start, acc_bar, GSI_SAME_STMT);
   1039  1.1  mrg     }
   1040  1.1  mrg 
   1041  1.1  mrg   tree conv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
   1042  1.1  mrg 
   1043  1.1  mrg   gimple *conv = gimple_build_assign (conv_tmp,
   1044  1.1  mrg 				      fold_convert (TREE_TYPE (receiver_decl),
   1045  1.1  mrg 						    lhs));
   1046  1.1  mrg   update_stmt (conv);
   1047  1.1  mrg   gsi_insert_after (&start, conv, GSI_NEW_STMT);
   1048  1.1  mrg   gimple *asgn = gimple_build_assign (receiver_decl, conv_tmp);
   1049  1.1  mrg   gsi_insert_after (&start, asgn, GSI_NEW_STMT);
   1050  1.1  mrg   update_stmt (asgn);
   1051  1.1  mrg 
   1052  1.1  mrg   tree zero_ptr = build_int_cst (TREE_TYPE (receiver_decl), 0);
   1053  1.1  mrg 
   1054  1.1  mrg   tree recv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
   1055  1.1  mrg   asgn = gimple_build_assign (recv_tmp, receiver_decl);
   1056  1.1  mrg   gsi_insert_after (&start, asgn, GSI_NEW_STMT);
   1057  1.1  mrg   update_stmt (asgn);
   1058  1.1  mrg 
   1059  1.1  mrg   gimple *cond = gimple_build_cond (EQ_EXPR, recv_tmp, zero_ptr, NULL_TREE,
   1060  1.1  mrg 				    NULL_TREE);
   1061  1.1  mrg   update_stmt (cond);
   1062  1.1  mrg 
   1063  1.1  mrg   gsi_insert_after (&start, cond, GSI_NEW_STMT);
   1064  1.1  mrg 
   1065  1.1  mrg   edge et = split_block (from, cond);
   1066  1.1  mrg   et->flags &= ~EDGE_FALLTHRU;
   1067  1.1  mrg   et->flags |= EDGE_TRUE_VALUE;
   1068  1.1  mrg   /* Make the active worker the more probable path so we prefer fallthrough
   1069  1.1  mrg      (letting the idle workers jump around more).  */
   1070  1.1  mrg   et->probability = profile_probability::likely ();
   1071  1.1  mrg 
   1072  1.1  mrg   basic_block body = et->dest;
   1073  1.1  mrg 
   1074  1.1  mrg   edge ef = make_edge (from, barrier_block, EDGE_FALSE_VALUE);
   1075  1.1  mrg   ef->probability = et->probability.invert ();
   1076  1.1  mrg 
   1077  1.1  mrg   gimple_stmt_iterator bar_gsi = gsi_start_bb (barrier_block);
   1078  1.1  mrg   cond = gimple_build_cond (NE_EXPR, recv_tmp, zero_ptr, NULL_TREE, NULL_TREE);
   1079  1.1  mrg 
   1080  1.1  mrg   if (record_type != char_type_node || has_gang_private_write)
   1081  1.1  mrg     {
   1082  1.1  mrg       decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
   1083  1.1  mrg       gimple *acc_bar = gimple_build_call (decl, 0);
   1084  1.1  mrg 
   1085  1.1  mrg       gsi_insert_before (&bar_gsi, acc_bar, GSI_NEW_STMT);
   1086  1.1  mrg       gsi_insert_after (&bar_gsi, cond, GSI_NEW_STMT);
   1087  1.1  mrg     }
   1088  1.1  mrg   else
   1089  1.1  mrg     gsi_insert_before (&bar_gsi, cond, GSI_NEW_STMT);
   1090  1.1  mrg 
   1091  1.1  mrg   edge et2 = split_block (barrier_block, cond);
   1092  1.1  mrg   et2->flags &= ~EDGE_FALLTHRU;
   1093  1.1  mrg   et2->flags |= EDGE_TRUE_VALUE;
   1094  1.1  mrg   et2->probability = profile_probability::unlikely ();
   1095  1.1  mrg 
   1096  1.1  mrg   basic_block exit_block = et2->dest;
   1097  1.1  mrg 
   1098  1.1  mrg   basic_block copyout_block = split_edge (et2);
   1099  1.1  mrg   edge ef2 = make_edge (barrier_block, exit_block, EDGE_FALSE_VALUE);
   1100  1.1  mrg   ef2->probability = et2->probability.invert ();
   1101  1.1  mrg 
   1102  1.1  mrg   gimple_stmt_iterator copyout_gsi = gsi_start_bb (copyout_block);
   1103  1.1  mrg 
   1104  1.1  mrg   edge copyout_to_exit = single_succ_edge (copyout_block);
   1105  1.1  mrg 
   1106  1.1  mrg   gimple_seq sender_seq = NULL;
   1107  1.1  mrg 
   1108  1.1  mrg   /* Make sure we iterate over definitions in a stable order.  */
   1109  1.1  mrg   auto_vec<tree> escape_vec (def_escapes_block->elements ());
   1110  1.1  mrg   for (hash_set<tree>::iterator it = def_escapes_block->begin ();
   1111  1.1  mrg        it != def_escapes_block->end (); ++it)
   1112  1.1  mrg     escape_vec.quick_push (*it);
   1113  1.1  mrg   escape_vec.qsort (sort_by_ssa_version_or_uid);
   1114  1.1  mrg 
   1115  1.1  mrg   for (unsigned i = 0; i < escape_vec.length (); i++)
   1116  1.1  mrg     {
   1117  1.1  mrg       tree var = escape_vec[i];
   1118  1.1  mrg 
   1119  1.1  mrg       if (TREE_CODE (var) == SSA_NAME && SSA_NAME_IS_VIRTUAL_OPERAND (var))
   1120  1.1  mrg 	continue;
   1121  1.1  mrg 
   1122  1.1  mrg       tree barrier_def = 0;
   1123  1.1  mrg 
   1124  1.1  mrg       if (TREE_CODE (var) == SSA_NAME)
   1125  1.1  mrg 	{
   1126  1.1  mrg 	  gimple *def_stmt = SSA_NAME_DEF_STMT (var);
   1127  1.1  mrg 
   1128  1.1  mrg 	  if (gimple_nop_p (def_stmt))
   1129  1.1  mrg 	    continue;
   1130  1.1  mrg 
   1131  1.1  mrg 	  /* The barrier phi takes one result from the actual work of the
   1132  1.1  mrg 	     block we're neutering, and the other result is constant zero of
   1133  1.1  mrg 	     the same type.  */
   1134  1.1  mrg 
   1135  1.1  mrg 	  gphi *barrier_phi = create_phi_node (NULL_TREE, barrier_block);
   1136  1.1  mrg 	  barrier_def = create_new_def_for (var, barrier_phi,
   1137  1.1  mrg 			  gimple_phi_result_ptr (barrier_phi));
   1138  1.1  mrg 
   1139  1.1  mrg 	  add_phi_arg (barrier_phi, var, e, UNKNOWN_LOCATION);
   1140  1.1  mrg 	  add_phi_arg (barrier_phi, build_zero_cst (TREE_TYPE (var)), ef,
   1141  1.1  mrg 		       UNKNOWN_LOCATION);
   1142  1.1  mrg 
   1143  1.1  mrg 	  update_stmt (barrier_phi);
   1144  1.1  mrg 	}
   1145  1.1  mrg       else
   1146  1.1  mrg 	gcc_assert (TREE_CODE (var) == VAR_DECL);
   1147  1.1  mrg 
   1148  1.1  mrg       /* If we had no record type, we will have no fields map.  */
   1149  1.1  mrg       field_map_t *fields = record_field_map->get (record_type);
   1150  1.1  mrg 
   1151  1.1  mrg       if (worker_partitioned_uses->contains (var)
   1152  1.1  mrg 	  && fields
   1153  1.1  mrg 	  && fields->get (var))
   1154  1.1  mrg 	{
   1155  1.1  mrg 	  tree neutered_def = make_ssa_name (TREE_TYPE (var));
   1156  1.1  mrg 
   1157  1.1  mrg 	  /* Receive definition from shared memory block.  */
   1158  1.1  mrg 
   1159  1.1  mrg 	  tree receiver_ref = build_receiver_ref (var, receiver_decl, fields);
   1160  1.1  mrg 	  gassign *recv = gimple_build_assign (neutered_def,
   1161  1.1  mrg 					       receiver_ref);
   1162  1.1  mrg 	  gsi_insert_after (&copyout_gsi, recv, GSI_CONTINUE_LINKING);
   1163  1.1  mrg 	  update_stmt (recv);
   1164  1.1  mrg 
   1165  1.1  mrg 	  if (TREE_CODE (var) == VAR_DECL)
   1166  1.1  mrg 	    {
   1167  1.1  mrg 	      /* If it's a VAR_DECL, we only copied to an SSA temporary.  Copy
   1168  1.1  mrg 		 to the final location now.  */
   1169  1.1  mrg 	      gassign *asgn = gimple_build_assign (var, neutered_def);
   1170  1.1  mrg 	      gsi_insert_after (&copyout_gsi, asgn, GSI_CONTINUE_LINKING);
   1171  1.1  mrg 	      update_stmt (asgn);
   1172  1.1  mrg 	    }
   1173  1.1  mrg 	  else
   1174  1.1  mrg 	    {
   1175  1.1  mrg 	      /* If it's an SSA name, create a new phi at the join node to
   1176  1.1  mrg 		 represent either the output from the active worker (the
   1177  1.1  mrg 		 barrier) or the inactive workers (the copyout block).  */
   1178  1.1  mrg 	      gphi *join_phi = create_phi_node (NULL_TREE, exit_block);
   1179  1.1  mrg 	      create_new_def_for (barrier_def, join_phi,
   1180  1.1  mrg 				  gimple_phi_result_ptr (join_phi));
   1181  1.1  mrg 	      add_phi_arg (join_phi, barrier_def, ef2, UNKNOWN_LOCATION);
   1182  1.1  mrg 	      add_phi_arg (join_phi, neutered_def, copyout_to_exit,
   1183  1.1  mrg 			   UNKNOWN_LOCATION);
   1184  1.1  mrg 	      update_stmt (join_phi);
   1185  1.1  mrg 	    }
   1186  1.1  mrg 
   1187  1.1  mrg 	  /* Send definition to shared memory block.  */
   1188  1.1  mrg 
   1189  1.1  mrg 	  tree sender_ref = build_sender_ref (var, sender_decl, fields);
   1190  1.1  mrg 
   1191  1.1  mrg 	  if (TREE_CODE (var) == SSA_NAME)
   1192  1.1  mrg 	    {
   1193  1.1  mrg 	      gassign *send = gimple_build_assign (sender_ref, var);
   1194  1.1  mrg 	      gimple_seq_add_stmt (&sender_seq, send);
   1195  1.1  mrg 	      update_stmt (send);
   1196  1.1  mrg 	    }
   1197  1.1  mrg 	  else if (TREE_CODE (var) == VAR_DECL)
   1198  1.1  mrg 	    {
   1199  1.1  mrg 	      tree tmp = make_ssa_name (TREE_TYPE (var));
   1200  1.1  mrg 	      gassign *send = gimple_build_assign (tmp, var);
   1201  1.1  mrg 	      gimple_seq_add_stmt (&sender_seq, send);
   1202  1.1  mrg 	      update_stmt (send);
   1203  1.1  mrg 	      send = gimple_build_assign (sender_ref, tmp);
   1204  1.1  mrg 	      gimple_seq_add_stmt (&sender_seq, send);
   1205  1.1  mrg 	      update_stmt (send);
   1206  1.1  mrg 	    }
   1207  1.1  mrg 	  else
   1208  1.1  mrg 	    gcc_unreachable ();
   1209  1.1  mrg 	}
   1210  1.1  mrg     }
   1211  1.1  mrg 
   1212  1.1  mrg   /* The shared-memory range for this block overflowed.  Add a barrier at the
   1213  1.1  mrg      end.  */
   1214  1.1  mrg   if (isolate_broadcasts)
   1215  1.1  mrg     {
   1216  1.1  mrg       gsi = gsi_start_bb (exit_block);
   1217  1.1  mrg       decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
   1218  1.1  mrg       gimple *acc_bar = gimple_build_call (decl, 0);
   1219  1.1  mrg       gsi_insert_before (&gsi, acc_bar, GSI_SAME_STMT);
   1220  1.1  mrg     }
   1221  1.1  mrg 
   1222  1.1  mrg   /* It's possible for the ET->DEST block (the work done by the active thread)
   1223  1.1  mrg      to finish with a control-flow insn, e.g. a UNIQUE function call.  Split
   1224  1.1  mrg      the block and add SENDER_SEQ in the latter part to avoid having control
   1225  1.1  mrg      flow in the middle of a BB.  */
   1226  1.1  mrg 
   1227  1.1  mrg   decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_END);
   1228  1.1  mrg   call = gimple_build_call (decl, 1,
   1229  1.1  mrg 			    POINTER_TYPE_P (TREE_TYPE (sender_decl))
   1230  1.1  mrg 			    ? sender_decl
   1231  1.1  mrg 			    : build_fold_addr_expr (sender_decl));
   1232  1.1  mrg   gimple_seq_add_stmt (&sender_seq, call);
   1233  1.1  mrg 
   1234  1.1  mrg   gsi = gsi_last_bb (body);
   1235  1.1  mrg   gimple *last = gsi_stmt (gsi);
   1236  1.1  mrg   basic_block sender_block = split_block (body, last)->dest;
   1237  1.1  mrg   gsi = gsi_last_bb (sender_block);
   1238  1.1  mrg   gsi_insert_seq_after (&gsi, sender_seq, GSI_CONTINUE_LINKING);
   1239  1.1  mrg }
   1240  1.1  mrg 
   1241  1.1  mrg typedef hash_map<basic_block, std::pair<unsigned HOST_WIDE_INT, bool> >
   1242  1.1  mrg   blk_offset_map_t;
   1243  1.1  mrg 
   1244  1.1  mrg static void
   1245  1.1  mrg neuter_worker_single (parallel_g *par, unsigned outer_mask,
   1246  1.1  mrg 		      bitmap worker_single, bitmap vector_single,
   1247  1.1  mrg 		      vec<propagation_set *> *prop_set,
   1248  1.1  mrg 		      hash_set<tree> *partitioned_var_uses,
   1249  1.1  mrg 		      record_field_map_t *record_field_map,
   1250  1.1  mrg 		      blk_offset_map_t *blk_offset_map,
   1251  1.1  mrg 		      bitmap writes_gang_private)
   1252  1.1  mrg {
   1253  1.1  mrg   unsigned mask = outer_mask | par->mask;
   1254  1.1  mrg 
   1255  1.1  mrg   if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
   1256  1.1  mrg     {
   1257  1.1  mrg       basic_block block;
   1258  1.1  mrg 
   1259  1.1  mrg       for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
   1260  1.1  mrg 	{
   1261  1.1  mrg 	  bool has_defs = false;
   1262  1.1  mrg 	  hash_set<tree> def_escapes_block;
   1263  1.1  mrg 	  hash_set<tree> worker_partitioned_uses;
   1264  1.1  mrg 	  unsigned j;
   1265  1.1  mrg 	  tree var;
   1266  1.1  mrg 
   1267  1.1  mrg 	  FOR_EACH_SSA_NAME (j, var, cfun)
   1268  1.1  mrg 	    {
   1269  1.1  mrg 	      if (SSA_NAME_IS_VIRTUAL_OPERAND (var))
   1270  1.1  mrg 		{
   1271  1.1  mrg 		  has_defs = true;
   1272  1.1  mrg 		  continue;
   1273  1.1  mrg 		}
   1274  1.1  mrg 
   1275  1.1  mrg 	      gimple *def_stmt = SSA_NAME_DEF_STMT (var);
   1276  1.1  mrg 
   1277  1.1  mrg 	      if (gimple_nop_p (def_stmt))
   1278  1.1  mrg 		continue;
   1279  1.1  mrg 
   1280  1.1  mrg 	      if (gimple_bb (def_stmt)->index != block->index)
   1281  1.1  mrg 		continue;
   1282  1.1  mrg 
   1283  1.1  mrg 	      gimple *use_stmt;
   1284  1.1  mrg 	      imm_use_iterator use_iter;
   1285  1.1  mrg 	      bool uses_outside_block = false;
   1286  1.1  mrg 	      bool worker_partitioned_use = false;
   1287  1.1  mrg 
   1288  1.1  mrg 	      FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, var)
   1289  1.1  mrg 		{
   1290  1.1  mrg 		  int blocknum = gimple_bb (use_stmt)->index;
   1291  1.1  mrg 
   1292  1.1  mrg 		  /* Don't propagate SSA names that are only used in the
   1293  1.1  mrg 		     current block, unless the usage is in a phi node: that
   1294  1.1  mrg 		     means the name left the block, then came back in at the
   1295  1.1  mrg 		     top.  */
   1296  1.1  mrg 		  if (blocknum != block->index
   1297  1.1  mrg 		      || gimple_code (use_stmt) == GIMPLE_PHI)
   1298  1.1  mrg 		    uses_outside_block = true;
   1299  1.1  mrg 		  if (!bitmap_bit_p (worker_single, blocknum))
   1300  1.1  mrg 		    worker_partitioned_use = true;
   1301  1.1  mrg 		}
   1302  1.1  mrg 
   1303  1.1  mrg 	      if (uses_outside_block)
   1304  1.1  mrg 		def_escapes_block.add (var);
   1305  1.1  mrg 
   1306  1.1  mrg 	      if (worker_partitioned_use)
   1307  1.1  mrg 		{
   1308  1.1  mrg 		  worker_partitioned_uses.add (var);
   1309  1.1  mrg 		  has_defs = true;
   1310  1.1  mrg 		}
   1311  1.1  mrg 	    }
   1312  1.1  mrg 
   1313  1.1  mrg 	  propagation_set *ws_prop = (*prop_set)[block->index];
   1314  1.1  mrg 
   1315  1.1  mrg 	  if (ws_prop)
   1316  1.1  mrg 	    {
   1317  1.1  mrg 	      for (propagation_set::iterator it = ws_prop->begin ();
   1318  1.1  mrg 		   it != ws_prop->end ();
   1319  1.1  mrg 		   ++it)
   1320  1.1  mrg 		{
   1321  1.1  mrg 		  tree var = *it;
   1322  1.1  mrg 		  if (TREE_CODE (var) == VAR_DECL)
   1323  1.1  mrg 		    {
   1324  1.1  mrg 		      def_escapes_block.add (var);
   1325  1.1  mrg 		      if (partitioned_var_uses->contains (var))
   1326  1.1  mrg 			{
   1327  1.1  mrg 			  worker_partitioned_uses.add (var);
   1328  1.1  mrg 			  has_defs = true;
   1329  1.1  mrg 			}
   1330  1.1  mrg 		    }
   1331  1.1  mrg 		}
   1332  1.1  mrg 
   1333  1.1  mrg 	      delete ws_prop;
   1334  1.1  mrg 	      (*prop_set)[block->index] = 0;
   1335  1.1  mrg 	    }
   1336  1.1  mrg 
   1337  1.1  mrg 	  bool only_marker_fns = true;
   1338  1.1  mrg 	  bool join_block = false;
   1339  1.1  mrg 
   1340  1.1  mrg 	  for (gimple_stmt_iterator gsi = gsi_start_bb (block);
   1341  1.1  mrg 	       !gsi_end_p (gsi);
   1342  1.1  mrg 	       gsi_next (&gsi))
   1343  1.1  mrg 	    {
   1344  1.1  mrg 	      gimple *stmt = gsi_stmt (gsi);
   1345  1.1  mrg 	      if (gimple_code (stmt) == GIMPLE_CALL
   1346  1.1  mrg 		  && gimple_call_internal_p (stmt, IFN_UNIQUE))
   1347  1.1  mrg 		{
   1348  1.1  mrg 		  enum ifn_unique_kind k = ((enum ifn_unique_kind)
   1349  1.1  mrg 		    TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
   1350  1.1  mrg 		  if (k != IFN_UNIQUE_OACC_PRIVATE
   1351  1.1  mrg 		      && k != IFN_UNIQUE_OACC_JOIN
   1352  1.1  mrg 		      && k != IFN_UNIQUE_OACC_FORK
   1353  1.1  mrg 		      && k != IFN_UNIQUE_OACC_HEAD_MARK
   1354  1.1  mrg 		      && k != IFN_UNIQUE_OACC_TAIL_MARK)
   1355  1.1  mrg 		    only_marker_fns = false;
   1356  1.1  mrg 		  else if (k == IFN_UNIQUE_OACC_JOIN)
   1357  1.1  mrg 		    /* The JOIN marker is special in that it *cannot* be
   1358  1.1  mrg 		       predicated for worker zero, because it may be lowered
   1359  1.1  mrg 		       to a barrier instruction and all workers must typically
   1360  1.1  mrg 		       execute that barrier.  We shouldn't be doing any
   1361  1.1  mrg 		       broadcasts from the join block anyway.  */
   1362  1.1  mrg 		    join_block = true;
   1363  1.1  mrg 		}
   1364  1.1  mrg 	      else if (gimple_code (stmt) == GIMPLE_CALL
   1365  1.1  mrg 		       && gimple_call_internal_p (stmt, IFN_GOACC_LOOP))
   1366  1.1  mrg 		/* Empty.  */;
   1367  1.1  mrg 	      else if (gimple_nop_p (stmt))
   1368  1.1  mrg 		/* Empty.  */;
   1369  1.1  mrg 	      else
   1370  1.1  mrg 		only_marker_fns = false;
   1371  1.1  mrg 	    }
   1372  1.1  mrg 
   1373  1.1  mrg 	  /* We can skip predicating this block for worker zero if the only
   1374  1.1  mrg 	     thing it contains is marker functions that will be removed in the
   1375  1.1  mrg 	     oaccdevlow pass anyway.
   1376  1.1  mrg 	     Don't do this if the block has (any) phi nodes, because those
   1377  1.1  mrg 	     might define SSA names that need broadcasting.
   1378  1.1  mrg 	     TODO: We might be able to skip transforming blocks that only
   1379  1.1  mrg 	     contain some other trivial statements too.  */
   1380  1.1  mrg 	  if (only_marker_fns && !phi_nodes (block))
   1381  1.1  mrg 	    continue;
   1382  1.1  mrg 
   1383  1.1  mrg 	  gcc_assert (!join_block);
   1384  1.1  mrg 
   1385  1.1  mrg 	  if (has_defs)
   1386  1.1  mrg 	    {
   1387  1.1  mrg 	      tree record_type = (tree) block->aux;
   1388  1.1  mrg 	      std::pair<unsigned HOST_WIDE_INT, bool> *off_rngalloc
   1389  1.1  mrg 		= blk_offset_map->get (block);
   1390  1.1  mrg 	      gcc_assert (!record_type || off_rngalloc);
   1391  1.1  mrg 	      unsigned HOST_WIDE_INT offset
   1392  1.1  mrg 		= off_rngalloc ? off_rngalloc->first : 0;
   1393  1.1  mrg 	      bool range_allocated
   1394  1.1  mrg 		= off_rngalloc ? off_rngalloc->second : true;
   1395  1.1  mrg 	      bool has_gang_private_write
   1396  1.1  mrg 		= bitmap_bit_p (writes_gang_private, block->index);
   1397  1.1  mrg 	      worker_single_copy (block, block, &def_escapes_block,
   1398  1.1  mrg 				  &worker_partitioned_uses, record_type,
   1399  1.1  mrg 				  record_field_map,
   1400  1.1  mrg 				  offset, !range_allocated,
   1401  1.1  mrg 				  has_gang_private_write);
   1402  1.1  mrg 	    }
   1403  1.1  mrg 	  else
   1404  1.1  mrg 	    worker_single_simple (block, block, &def_escapes_block);
   1405  1.1  mrg 	}
   1406  1.1  mrg     }
   1407  1.1  mrg 
   1408  1.1  mrg   if ((outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
   1409  1.1  mrg     {
   1410  1.1  mrg       basic_block block;
   1411  1.1  mrg 
   1412  1.1  mrg       for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
   1413  1.1  mrg 	for (gimple_stmt_iterator gsi = gsi_start_bb (block);
   1414  1.1  mrg 	     !gsi_end_p (gsi);
   1415  1.1  mrg 	     gsi_next (&gsi))
   1416  1.1  mrg 	  {
   1417  1.1  mrg 	    gimple *stmt = gsi_stmt (gsi);
   1418  1.1  mrg 
   1419  1.1  mrg 	    if (gimple_code (stmt) == GIMPLE_CALL
   1420  1.1  mrg 		&& !gimple_call_internal_p (stmt)
   1421  1.1  mrg 		&& !omp_sese_active_worker_call (as_a <gcall *> (stmt)))
   1422  1.1  mrg 	      {
   1423  1.1  mrg 		/* If we have an OpenACC routine call in worker-single mode,
   1424  1.1  mrg 		   place barriers before and afterwards to prevent
   1425  1.1  mrg 		   clobbering re-used shared memory regions (as are used
   1426  1.1  mrg 		   for AMDGCN at present, for example).  */
   1427  1.1  mrg 		tree decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
   1428  1.1  mrg 		gsi_insert_before (&gsi, gimple_build_call (decl, 0),
   1429  1.1  mrg 				   GSI_SAME_STMT);
   1430  1.1  mrg 		gsi_insert_after (&gsi, gimple_build_call (decl, 0),
   1431  1.1  mrg 				  GSI_NEW_STMT);
   1432  1.1  mrg 	      }
   1433  1.1  mrg 	  }
   1434  1.1  mrg     }
   1435  1.1  mrg 
   1436  1.1  mrg   if (par->inner)
   1437  1.1  mrg     neuter_worker_single (par->inner, mask, worker_single, vector_single,
   1438  1.1  mrg 			  prop_set, partitioned_var_uses, record_field_map,
   1439  1.1  mrg 			  blk_offset_map, writes_gang_private);
   1440  1.1  mrg   if (par->next)
   1441  1.1  mrg     neuter_worker_single (par->next, outer_mask, worker_single, vector_single,
   1442  1.1  mrg 			  prop_set, partitioned_var_uses, record_field_map,
   1443  1.1  mrg 			  blk_offset_map, writes_gang_private);
   1444  1.1  mrg }
   1445  1.1  mrg 
   1446  1.1  mrg static void
   1447  1.1  mrg dfs_broadcast_reachable_1 (basic_block bb, sbitmap reachable)
   1448  1.1  mrg {
   1449  1.1  mrg   if (bb->flags & BB_VISITED)
   1450  1.1  mrg     return;
   1451  1.1  mrg 
   1452  1.1  mrg   bb->flags |= BB_VISITED;
   1453  1.1  mrg 
   1454  1.1  mrg   if (bb->succs)
   1455  1.1  mrg     {
   1456  1.1  mrg       edge e;
   1457  1.1  mrg       edge_iterator ei;
   1458  1.1  mrg       FOR_EACH_EDGE (e, ei, bb->succs)
   1459  1.1  mrg 	{
   1460  1.1  mrg 	  basic_block dest = e->dest;
   1461  1.1  mrg 	  if (dest->aux)
   1462  1.1  mrg 	    bitmap_set_bit (reachable, dest->index);
   1463  1.1  mrg 	  else
   1464  1.1  mrg 	    dfs_broadcast_reachable_1 (dest, reachable);
   1465  1.1  mrg 	}
   1466  1.1  mrg     }
   1467  1.1  mrg }
   1468  1.1  mrg 
   1469  1.1  mrg typedef std::pair<int, tree> idx_decl_pair_t;
   1470  1.1  mrg 
   1471  1.1  mrg typedef auto_vec<splay_tree> used_range_vec_t;
   1472  1.1  mrg 
   1473  1.1  mrg static int
   1474  1.1  mrg sort_size_descending (const void *a, const void *b)
   1475  1.1  mrg {
   1476  1.1  mrg   const idx_decl_pair_t *pa = (const idx_decl_pair_t *) a;
   1477  1.1  mrg   const idx_decl_pair_t *pb = (const idx_decl_pair_t *) b;
   1478  1.1  mrg   unsigned HOST_WIDE_INT asize = tree_to_uhwi (TYPE_SIZE_UNIT (pa->second));
   1479  1.1  mrg   unsigned HOST_WIDE_INT bsize = tree_to_uhwi (TYPE_SIZE_UNIT (pb->second));
   1480  1.1  mrg   return bsize - asize;
   1481  1.1  mrg }
   1482  1.1  mrg 
   1483  1.1  mrg class addr_range
   1484  1.1  mrg {
   1485  1.1  mrg public:
   1486  1.1  mrg   addr_range (unsigned HOST_WIDE_INT addr_lo, unsigned HOST_WIDE_INT addr_hi)
   1487  1.1  mrg     : lo (addr_lo), hi (addr_hi)
   1488  1.1  mrg     { }
   1489  1.1  mrg   addr_range (const addr_range &ar) : lo (ar.lo), hi (ar.hi)
   1490  1.1  mrg     { }
   1491  1.1  mrg   addr_range () : lo (0), hi (0)
   1492  1.1  mrg     { }
   1493  1.1  mrg 
   1494  1.1  mrg   bool invalid () { return lo == 0 && hi == 0; }
   1495  1.1  mrg 
   1496  1.1  mrg   unsigned HOST_WIDE_INT lo;
   1497  1.1  mrg   unsigned HOST_WIDE_INT hi;
   1498  1.1  mrg };
   1499  1.1  mrg 
   1500  1.1  mrg static int
   1501  1.1  mrg splay_tree_compare_addr_range (splay_tree_key a, splay_tree_key b)
   1502  1.1  mrg {
   1503  1.1  mrg   addr_range *ar = (addr_range *) a;
   1504  1.1  mrg   addr_range *br = (addr_range *) b;
   1505  1.1  mrg   if (ar->lo == br->lo && ar->hi == br->hi)
   1506  1.1  mrg     return 0;
   1507  1.1  mrg   if (ar->hi <= br->lo)
   1508  1.1  mrg     return -1;
   1509  1.1  mrg   else if (ar->lo >= br->hi)
   1510  1.1  mrg     return 1;
   1511  1.1  mrg   return 0;
   1512  1.1  mrg }
   1513  1.1  mrg 
   1514  1.1  mrg static void
   1515  1.1  mrg splay_tree_free_key (splay_tree_key k)
   1516  1.1  mrg {
   1517  1.1  mrg   addr_range *ar = (addr_range *) k;
   1518  1.1  mrg   delete ar;
   1519  1.1  mrg }
   1520  1.1  mrg 
   1521  1.1  mrg static addr_range
   1522  1.1  mrg first_fit_range (splay_tree s, unsigned HOST_WIDE_INT size,
   1523  1.1  mrg 		 unsigned HOST_WIDE_INT align, addr_range *bounds)
   1524  1.1  mrg {
   1525  1.1  mrg   splay_tree_node min = splay_tree_min (s);
   1526  1.1  mrg   if (min)
   1527  1.1  mrg     {
   1528  1.1  mrg       splay_tree_node next;
   1529  1.1  mrg       while ((next = splay_tree_successor (s, min->key)))
   1530  1.1  mrg 	{
   1531  1.1  mrg 	  unsigned HOST_WIDE_INT lo = ((addr_range *) min->key)->hi;
   1532  1.1  mrg 	  unsigned HOST_WIDE_INT hi = ((addr_range *) next->key)->lo;
   1533  1.1  mrg 	  unsigned HOST_WIDE_INT base = (lo + align - 1) & ~(align - 1);
   1534  1.1  mrg 	  if (base + size <= hi)
   1535  1.1  mrg 	    return addr_range (base, base + size);
   1536  1.1  mrg 	  min = next;
   1537  1.1  mrg 	}
   1538  1.1  mrg 
   1539  1.1  mrg       unsigned HOST_WIDE_INT base = ((addr_range *)min->key)->hi;
   1540  1.1  mrg       base = (base + align - 1) & ~(align - 1);
   1541  1.1  mrg       if (base + size <= bounds->hi)
   1542  1.1  mrg 	return addr_range (base, base + size);
   1543  1.1  mrg       else
   1544  1.1  mrg 	return addr_range ();
   1545  1.1  mrg     }
   1546  1.1  mrg   else
   1547  1.1  mrg     {
   1548  1.1  mrg       unsigned HOST_WIDE_INT lo = bounds->lo;
   1549  1.1  mrg       lo = (lo + align - 1) & ~(align - 1);
   1550  1.1  mrg       if (lo + size <= bounds->hi)
   1551  1.1  mrg 	return addr_range (lo, lo + size);
   1552  1.1  mrg       else
   1553  1.1  mrg 	return addr_range ();
   1554  1.1  mrg     }
   1555  1.1  mrg }
   1556  1.1  mrg 
   1557  1.1  mrg static int
   1558  1.1  mrg merge_ranges_1 (splay_tree_node n, void *ptr)
   1559  1.1  mrg {
   1560  1.1  mrg   splay_tree accum = (splay_tree) ptr;
   1561  1.1  mrg   addr_range ar = *(addr_range *) n->key;
   1562  1.1  mrg 
   1563  1.1  mrg   splay_tree_node old = splay_tree_lookup (accum, n->key);
   1564  1.1  mrg 
   1565  1.1  mrg   /* We might have an overlap.  Create a new range covering the
   1566  1.1  mrg      overlapping parts.  */
   1567  1.1  mrg   if (old)
   1568  1.1  mrg     {
   1569  1.1  mrg       addr_range *old_ar = (addr_range *) old->key;
   1570  1.1  mrg       ar.lo = MIN (old_ar->lo, ar.lo);
   1571  1.1  mrg       ar.hi = MAX (old_ar->hi, ar.hi);
   1572  1.1  mrg       splay_tree_remove (accum, old->key);
   1573  1.1  mrg     }
   1574  1.1  mrg 
   1575  1.1  mrg   addr_range *new_ar = new addr_range (ar);
   1576  1.1  mrg 
   1577  1.1  mrg   splay_tree_insert (accum, (splay_tree_key) new_ar, n->value);
   1578  1.1  mrg 
   1579  1.1  mrg   return 0;
   1580  1.1  mrg }
   1581  1.1  mrg 
   1582  1.1  mrg static void
   1583  1.1  mrg merge_ranges (splay_tree accum, splay_tree sp)
   1584  1.1  mrg {
   1585  1.1  mrg   splay_tree_foreach (sp, merge_ranges_1, (void *) accum);
   1586  1.1  mrg }
   1587  1.1  mrg 
   1588  1.1  mrg static void
   1589  1.1  mrg oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo,
   1590  1.1  mrg 		   unsigned HOST_WIDE_INT bounds_hi)
   1591  1.1  mrg {
   1592  1.1  mrg   bb_stmt_map_t bb_stmt_map;
   1593  1.1  mrg   auto_bitmap worker_single, vector_single;
   1594  1.1  mrg 
   1595  1.1  mrg   omp_sese_split_blocks (&bb_stmt_map);
   1596  1.1  mrg 
   1597  1.1  mrg   if (dump_file)
   1598  1.1  mrg     {
   1599  1.1  mrg       fprintf (dump_file, "\n\nAfter splitting:\n\n");
   1600  1.1  mrg       dump_function_to_file (current_function_decl, dump_file, dump_flags);
   1601  1.1  mrg     }
   1602  1.1  mrg 
   1603  1.1  mrg   unsigned mask = 0;
   1604  1.1  mrg 
   1605  1.1  mrg   /* If this is a routine, calculate MASK as if the outer levels are already
   1606  1.1  mrg      partitioned.  */
   1607  1.1  mrg   {
   1608  1.1  mrg     tree attr = oacc_get_fn_attrib (current_function_decl);
   1609  1.1  mrg     tree dims = TREE_VALUE (attr);
   1610  1.1  mrg     unsigned ix;
   1611  1.1  mrg     for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
   1612  1.1  mrg       {
   1613  1.1  mrg 	tree allowed = TREE_PURPOSE (dims);
   1614  1.1  mrg 	if (allowed && integer_zerop (allowed))
   1615  1.1  mrg 	  mask |= GOMP_DIM_MASK (ix);
   1616  1.1  mrg       }
   1617  1.1  mrg   }
   1618  1.1  mrg 
   1619  1.1  mrg   parallel_g *par = omp_sese_discover_pars (&bb_stmt_map);
   1620  1.1  mrg   populate_single_mode_bitmaps (par, worker_single, vector_single, mask, 0);
   1621  1.1  mrg 
   1622  1.1  mrg   basic_block bb;
   1623  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1624  1.1  mrg     bb->aux = NULL;
   1625  1.1  mrg 
   1626  1.1  mrg   vec<propagation_set *> prop_set (vNULL);
   1627  1.1  mrg   prop_set.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
   1628  1.1  mrg 
   1629  1.1  mrg   find_ssa_names_to_propagate (par, mask, worker_single, vector_single,
   1630  1.1  mrg 			       &prop_set);
   1631  1.1  mrg 
   1632  1.1  mrg   hash_set<tree> partitioned_var_uses;
   1633  1.1  mrg   hash_set<tree> gang_private_vars;
   1634  1.1  mrg   auto_bitmap writes_gang_private;
   1635  1.1  mrg 
   1636  1.1  mrg   find_gang_private_vars (&gang_private_vars);
   1637  1.1  mrg   find_partitioned_var_uses (par, mask, &partitioned_var_uses);
   1638  1.1  mrg   find_local_vars_to_propagate (par, mask, &partitioned_var_uses,
   1639  1.1  mrg 				&gang_private_vars, writes_gang_private,
   1640  1.1  mrg 				&prop_set);
   1641  1.1  mrg 
   1642  1.1  mrg   record_field_map_t record_field_map;
   1643  1.1  mrg 
   1644  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1645  1.1  mrg     {
   1646  1.1  mrg       propagation_set *ws_prop = prop_set[bb->index];
   1647  1.1  mrg       if (ws_prop)
   1648  1.1  mrg 	{
   1649  1.1  mrg 	  tree record_type = lang_hooks.types.make_type (RECORD_TYPE);
   1650  1.1  mrg 	  tree name = create_tmp_var_name (".oacc_ws_data_s");
   1651  1.1  mrg 	  name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, record_type);
   1652  1.1  mrg 	  DECL_ARTIFICIAL (name) = 1;
   1653  1.1  mrg 	  DECL_NAMELESS (name) = 1;
   1654  1.1  mrg 	  TYPE_NAME (record_type) = name;
   1655  1.1  mrg 	  TYPE_ARTIFICIAL (record_type) = 1;
   1656  1.1  mrg 
   1657  1.1  mrg 	  auto_vec<tree> field_vec (ws_prop->elements ());
   1658  1.1  mrg 	  for (hash_set<tree>::iterator it = ws_prop->begin ();
   1659  1.1  mrg 	       it != ws_prop->end (); ++it)
   1660  1.1  mrg 	    field_vec.quick_push (*it);
   1661  1.1  mrg 
   1662  1.1  mrg 	  field_vec.qsort (sort_by_size_then_ssa_version_or_uid);
   1663  1.1  mrg 
   1664  1.1  mrg 	  bool existed;
   1665  1.1  mrg 	  field_map_t *fields
   1666  1.1  mrg 	    = &record_field_map.get_or_insert (record_type, &existed);
   1667  1.1  mrg 	  gcc_checking_assert (!existed);
   1668  1.1  mrg 
   1669  1.1  mrg 	  /* Insert var fields in reverse order, so the last inserted element
   1670  1.1  mrg 	     is the first in the structure.  */
   1671  1.1  mrg 	  for (int i = field_vec.length () - 1; i >= 0; i--)
   1672  1.1  mrg 	    install_var_field (field_vec[i], record_type, fields);
   1673  1.1  mrg 
   1674  1.1  mrg 	  layout_type (record_type);
   1675  1.1  mrg 
   1676  1.1  mrg 	  bb->aux = (tree) record_type;
   1677  1.1  mrg 	}
   1678  1.1  mrg     }
   1679  1.1  mrg 
   1680  1.1  mrg   sbitmap *reachable
   1681  1.1  mrg     = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
   1682  1.1  mrg 			    last_basic_block_for_fn (cfun));
   1683  1.1  mrg 
   1684  1.1  mrg   bitmap_vector_clear (reachable, last_basic_block_for_fn (cfun));
   1685  1.1  mrg 
   1686  1.1  mrg   auto_vec<std::pair<int, tree> > priority;
   1687  1.1  mrg 
   1688  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1689  1.1  mrg     {
   1690  1.1  mrg       if (bb->aux)
   1691  1.1  mrg 	{
   1692  1.1  mrg 	  tree record_type = (tree) bb->aux;
   1693  1.1  mrg 
   1694  1.1  mrg 	  basic_block bb2;
   1695  1.1  mrg 	  FOR_ALL_BB_FN (bb2, cfun)
   1696  1.1  mrg 	    bb2->flags &= ~BB_VISITED;
   1697  1.1  mrg 
   1698  1.1  mrg 	  priority.safe_push (std::make_pair (bb->index, record_type));
   1699  1.1  mrg 	  dfs_broadcast_reachable_1 (bb, reachable[bb->index]);
   1700  1.1  mrg 	}
   1701  1.1  mrg     }
   1702  1.1  mrg 
   1703  1.1  mrg   sbitmap *inverted
   1704  1.1  mrg     = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
   1705  1.1  mrg 			    last_basic_block_for_fn (cfun));
   1706  1.1  mrg 
   1707  1.1  mrg   bitmap_vector_clear (inverted, last_basic_block_for_fn (cfun));
   1708  1.1  mrg 
   1709  1.1  mrg   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
   1710  1.1  mrg     {
   1711  1.1  mrg       sbitmap_iterator bi;
   1712  1.1  mrg       unsigned int j;
   1713  1.1  mrg       EXECUTE_IF_SET_IN_BITMAP (reachable[i], 0, j, bi)
   1714  1.1  mrg 	bitmap_set_bit (inverted[j], i);
   1715  1.1  mrg     }
   1716  1.1  mrg 
   1717  1.1  mrg   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
   1718  1.1  mrg     bitmap_ior (reachable[i], reachable[i], inverted[i]);
   1719  1.1  mrg 
   1720  1.1  mrg   sbitmap_vector_free (inverted);
   1721  1.1  mrg 
   1722  1.1  mrg   used_range_vec_t used_ranges;
   1723  1.1  mrg 
   1724  1.1  mrg   used_ranges.safe_grow_cleared (last_basic_block_for_fn (cfun));
   1725  1.1  mrg 
   1726  1.1  mrg   blk_offset_map_t blk_offset_map;
   1727  1.1  mrg 
   1728  1.1  mrg   addr_range worker_shm_bounds (bounds_lo, bounds_hi);
   1729  1.1  mrg 
   1730  1.1  mrg   priority.qsort (sort_size_descending);
   1731  1.1  mrg   for (unsigned int i = 0; i < priority.length (); i++)
   1732  1.1  mrg     {
   1733  1.1  mrg       idx_decl_pair_t p = priority[i];
   1734  1.1  mrg       int blkno = p.first;
   1735  1.1  mrg       tree record_type = p.second;
   1736  1.1  mrg       HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (record_type));
   1737  1.1  mrg       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (record_type);
   1738  1.1  mrg 
   1739  1.1  mrg       splay_tree conflicts = splay_tree_new (splay_tree_compare_addr_range,
   1740  1.1  mrg 					     splay_tree_free_key, NULL);
   1741  1.1  mrg 
   1742  1.1  mrg       if (!used_ranges[blkno])
   1743  1.1  mrg 	used_ranges[blkno] = splay_tree_new (splay_tree_compare_addr_range,
   1744  1.1  mrg 					     splay_tree_free_key, NULL);
   1745  1.1  mrg       else
   1746  1.1  mrg 	merge_ranges (conflicts, used_ranges[blkno]);
   1747  1.1  mrg 
   1748  1.1  mrg       sbitmap_iterator bi;
   1749  1.1  mrg       unsigned int j;
   1750  1.1  mrg       EXECUTE_IF_SET_IN_BITMAP (reachable[blkno], 0, j, bi)
   1751  1.1  mrg 	if (used_ranges[j])
   1752  1.1  mrg 	  merge_ranges (conflicts, used_ranges[j]);
   1753  1.1  mrg 
   1754  1.1  mrg       addr_range ar
   1755  1.1  mrg 	= first_fit_range (conflicts, size, align, &worker_shm_bounds);
   1756  1.1  mrg 
   1757  1.1  mrg       splay_tree_delete (conflicts);
   1758  1.1  mrg 
   1759  1.1  mrg       if (ar.invalid ())
   1760  1.1  mrg 	{
   1761  1.1  mrg 	  unsigned HOST_WIDE_INT base
   1762  1.1  mrg 	    = (bounds_lo + align - 1) & ~(align - 1);
   1763  1.1  mrg 	  if (base + size > bounds_hi)
   1764  1.1  mrg 	    error_at (UNKNOWN_LOCATION, "shared-memory region overflow");
   1765  1.1  mrg 	  std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
   1766  1.1  mrg 	    = std::make_pair (base, false);
   1767  1.1  mrg 	  blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng);
   1768  1.1  mrg 	}
   1769  1.1  mrg       else
   1770  1.1  mrg 	{
   1771  1.1  mrg 	  splay_tree_node old = splay_tree_lookup (used_ranges[blkno],
   1772  1.1  mrg 						   (splay_tree_key) &ar);
   1773  1.1  mrg 	  if (old)
   1774  1.1  mrg 	    {
   1775  1.1  mrg 	      fprintf (stderr, "trying to map [%d..%d] but [%d..%d] is "
   1776  1.1  mrg 		       "already mapped in block %d\n", (int) ar.lo,
   1777  1.1  mrg 		       (int) ar.hi, (int) ((addr_range *) old->key)->lo,
   1778  1.1  mrg 		       (int) ((addr_range *) old->key)->hi, blkno);
   1779  1.1  mrg 	      abort ();
   1780  1.1  mrg 	    }
   1781  1.1  mrg 
   1782  1.1  mrg 	  addr_range *arp = new addr_range (ar);
   1783  1.1  mrg 	  splay_tree_insert (used_ranges[blkno], (splay_tree_key) arp,
   1784  1.1  mrg 			     (splay_tree_value) blkno);
   1785  1.1  mrg 	  std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
   1786  1.1  mrg 	    = std::make_pair (ar.lo, true);
   1787  1.1  mrg 	  blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng);
   1788  1.1  mrg 	}
   1789  1.1  mrg     }
   1790  1.1  mrg 
   1791  1.1  mrg   sbitmap_vector_free (reachable);
   1792  1.1  mrg 
   1793  1.1  mrg   neuter_worker_single (par, mask, worker_single, vector_single, &prop_set,
   1794  1.1  mrg 			&partitioned_var_uses, &record_field_map,
   1795  1.1  mrg 			&blk_offset_map, writes_gang_private);
   1796  1.1  mrg 
   1797  1.1  mrg   record_field_map.empty ();
   1798  1.1  mrg 
   1799  1.1  mrg   /* These are supposed to have been 'delete'd by 'neuter_worker_single'.  */
   1800  1.1  mrg   for (auto it : prop_set)
   1801  1.1  mrg     gcc_checking_assert (!it);
   1802  1.1  mrg   prop_set.release ();
   1803  1.1  mrg 
   1804  1.1  mrg   delete par;
   1805  1.1  mrg 
   1806  1.1  mrg   /* This doesn't seem to make a difference.  */
   1807  1.1  mrg   loops_state_clear (LOOP_CLOSED_SSA);
   1808  1.1  mrg 
   1809  1.1  mrg   /* Neutering worker-single neutered blocks will invalidate dominance info.
   1810  1.1  mrg      It may be possible to incrementally update just the affected blocks, but
   1811  1.1  mrg      obliterate everything for now.  */
   1812  1.1  mrg   free_dominance_info (CDI_DOMINATORS);
   1813  1.1  mrg   free_dominance_info (CDI_POST_DOMINATORS);
   1814  1.1  mrg 
   1815  1.1  mrg   if (dump_file)
   1816  1.1  mrg     {
   1817  1.1  mrg       fprintf (dump_file, "\n\nAfter neutering:\n\n");
   1818  1.1  mrg       dump_function_to_file (current_function_decl, dump_file, dump_flags);
   1819  1.1  mrg     }
   1820  1.1  mrg }
   1821  1.1  mrg 
   1822  1.1  mrg static int
   1823  1.1  mrg execute_omp_oacc_neuter_broadcast ()
   1824  1.1  mrg {
   1825  1.1  mrg   unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX];
   1826  1.1  mrg   unsigned HOST_WIDE_INT private_size[GOMP_DIM_MAX];
   1827  1.1  mrg 
   1828  1.1  mrg   for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
   1829  1.1  mrg     {
   1830  1.1  mrg       reduction_size[i] = 0;
   1831  1.1  mrg       private_size[i] = 0;
   1832  1.1  mrg     }
   1833  1.1  mrg 
   1834  1.1  mrg   /* Calculate shared memory size required for reduction variables and
   1835  1.1  mrg      gang-private memory for this offloaded function.  */
   1836  1.1  mrg   basic_block bb;
   1837  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1838  1.1  mrg     {
   1839  1.1  mrg       for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
   1840  1.1  mrg 	   !gsi_end_p (gsi);
   1841  1.1  mrg 	   gsi_next (&gsi))
   1842  1.1  mrg 	{
   1843  1.1  mrg 	  gimple *stmt = gsi_stmt (gsi);
   1844  1.1  mrg 	  if (!is_gimple_call (stmt))
   1845  1.1  mrg 	    continue;
   1846  1.1  mrg 	  gcall *call = as_a <gcall *> (stmt);
   1847  1.1  mrg 	  if (!gimple_call_internal_p (call))
   1848  1.1  mrg 	    continue;
   1849  1.1  mrg 	  enum internal_fn ifn_code = gimple_call_internal_fn (call);
   1850  1.1  mrg 	  switch (ifn_code)
   1851  1.1  mrg 	    {
   1852  1.1  mrg 	    default: break;
   1853  1.1  mrg 	    case IFN_GOACC_REDUCTION:
   1854  1.1  mrg 	      if (integer_minus_onep (gimple_call_arg (call, 3)))
   1855  1.1  mrg 		continue;
   1856  1.1  mrg 	      else
   1857  1.1  mrg 		{
   1858  1.1  mrg 		  unsigned code = TREE_INT_CST_LOW (gimple_call_arg (call, 0));
   1859  1.1  mrg 		  /* Only count reduction variables once: the choice to pick
   1860  1.1  mrg 		     the setup call is fairly arbitrary.  */
   1861  1.1  mrg 		  if (code == IFN_GOACC_REDUCTION_SETUP)
   1862  1.1  mrg 		    {
   1863  1.1  mrg 		      int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
   1864  1.1  mrg 		      tree var = gimple_call_arg (call, 2);
   1865  1.1  mrg 		      tree offset = gimple_call_arg (call, 5);
   1866  1.1  mrg 		      tree var_type = TREE_TYPE (var);
   1867  1.1  mrg 		      unsigned HOST_WIDE_INT limit
   1868  1.1  mrg 			= (tree_to_uhwi (offset)
   1869  1.1  mrg 			   + tree_to_uhwi (TYPE_SIZE_UNIT (var_type)));
   1870  1.1  mrg 		      reduction_size[level]
   1871  1.1  mrg 			= MAX (reduction_size[level], limit);
   1872  1.1  mrg 		    }
   1873  1.1  mrg 		}
   1874  1.1  mrg 	      break;
   1875  1.1  mrg 	    case IFN_UNIQUE:
   1876  1.1  mrg 	      {
   1877  1.1  mrg 		enum ifn_unique_kind kind
   1878  1.1  mrg 		  = ((enum ifn_unique_kind)
   1879  1.1  mrg 		     TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
   1880  1.1  mrg 
   1881  1.1  mrg 		if (kind == IFN_UNIQUE_OACC_PRIVATE)
   1882  1.1  mrg 		  {
   1883  1.1  mrg 		    HOST_WIDE_INT level
   1884  1.1  mrg 		      = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
   1885  1.1  mrg 		    if (level == -1)
   1886  1.1  mrg 		      break;
   1887  1.1  mrg 		    for (unsigned i = 3;
   1888  1.1  mrg 			 i < gimple_call_num_args (call);
   1889  1.1  mrg 			 i++)
   1890  1.1  mrg 		      {
   1891  1.1  mrg 			tree arg = gimple_call_arg (call, i);
   1892  1.1  mrg 			gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
   1893  1.1  mrg 			tree decl = TREE_OPERAND (arg, 0);
   1894  1.1  mrg 			unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (decl);
   1895  1.1  mrg 			private_size[level] = ((private_size[level] + align - 1)
   1896  1.1  mrg 					       & ~(align - 1));
   1897  1.1  mrg 			unsigned HOST_WIDE_INT decl_size
   1898  1.1  mrg 			  = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (decl)));
   1899  1.1  mrg 			private_size[level] += decl_size;
   1900  1.1  mrg 		      }
   1901  1.1  mrg 		  }
   1902  1.1  mrg 	      }
   1903  1.1  mrg 	      break;
   1904  1.1  mrg 	    }
   1905  1.1  mrg 	}
   1906  1.1  mrg     }
   1907  1.1  mrg 
   1908  1.1  mrg   int dims[GOMP_DIM_MAX];
   1909  1.1  mrg   for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
   1910  1.1  mrg     dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
   1911  1.1  mrg 
   1912  1.1  mrg   /* Find bounds of shared-memory buffer space we can use.  */
   1913  1.1  mrg   unsigned HOST_WIDE_INT bounds_lo = 0, bounds_hi = 0;
   1914  1.1  mrg   if (targetm.goacc.shared_mem_layout)
   1915  1.1  mrg     targetm.goacc.shared_mem_layout (&bounds_lo, &bounds_hi, dims,
   1916  1.1  mrg 				     private_size, reduction_size);
   1917  1.1  mrg 
   1918  1.1  mrg   /* Perform worker partitioning unless we know 'num_workers(1)'.  */
   1919  1.1  mrg   if (dims[GOMP_DIM_WORKER] != 1)
   1920  1.1  mrg     oacc_do_neutering (bounds_lo, bounds_hi);
   1921  1.1  mrg 
   1922  1.1  mrg   return 0;
   1923  1.1  mrg }
   1924  1.1  mrg 
   1925  1.1  mrg namespace {
   1926  1.1  mrg 
   1927  1.1  mrg const pass_data pass_data_omp_oacc_neuter_broadcast =
   1928  1.1  mrg {
   1929  1.1  mrg   GIMPLE_PASS, /* type */
   1930  1.1  mrg   "omp_oacc_neuter_broadcast", /* name */
   1931  1.1  mrg   OPTGROUP_OMP, /* optinfo_flags */
   1932  1.1  mrg   TV_NONE, /* tv_id */
   1933  1.1  mrg   PROP_cfg, /* properties_required */
   1934  1.1  mrg   0, /* properties_provided */
   1935  1.1  mrg   0, /* properties_destroyed */
   1936  1.1  mrg   0, /* todo_flags_start */
   1937  1.1  mrg   TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
   1938  1.1  mrg };
   1939  1.1  mrg 
   1940  1.1  mrg class pass_omp_oacc_neuter_broadcast : public gimple_opt_pass
   1941  1.1  mrg {
   1942  1.1  mrg public:
   1943  1.1  mrg   pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
   1944  1.1  mrg     : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast, ctxt)
   1945  1.1  mrg   {}
   1946  1.1  mrg 
   1947  1.1  mrg   /* opt_pass methods: */
   1948  1.1  mrg   virtual bool gate (function *fun)
   1949  1.1  mrg   {
   1950  1.1  mrg     if (!flag_openacc)
   1951  1.1  mrg       return false;
   1952  1.1  mrg 
   1953  1.1  mrg     if (!targetm.goacc.create_worker_broadcast_record)
   1954  1.1  mrg       return false;
   1955  1.1  mrg 
   1956  1.1  mrg     /* Only relevant for OpenACC offloaded functions.  */
   1957  1.1  mrg     tree attr = oacc_get_fn_attrib (fun->decl);
   1958  1.1  mrg     if (!attr)
   1959  1.1  mrg       return false;
   1960  1.1  mrg 
   1961  1.1  mrg     return true;
   1962  1.1  mrg   }
   1963  1.1  mrg 
   1964  1.1  mrg   virtual unsigned int execute (function *)
   1965  1.1  mrg     {
   1966  1.1  mrg       return execute_omp_oacc_neuter_broadcast ();
   1967  1.1  mrg     }
   1968  1.1  mrg 
   1969  1.1  mrg }; // class pass_omp_oacc_neuter_broadcast
   1970  1.1  mrg 
   1971  1.1  mrg } // anon namespace
   1972  1.1  mrg 
   1973  1.1  mrg gimple_opt_pass *
   1974  1.1  mrg make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
   1975  1.1  mrg {
   1976  1.1  mrg   return new pass_omp_oacc_neuter_broadcast (ctxt);
   1977  1.1  mrg }
   1978