Home | History | Annotate | Line # | Download | only in gcc
      1  1.1  mrg /* Decompose OpenACC 'kernels' constructs into parts, a sequence of compute
      2  1.1  mrg    constructs
      3  1.1  mrg 
      4  1.1  mrg    Copyright (C) 2020-2022 Free Software Foundation, Inc.
      5  1.1  mrg 
      6  1.1  mrg This file is part of GCC.
      7  1.1  mrg 
      8  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      9  1.1  mrg the terms of the GNU General Public License as published by the Free
     10  1.1  mrg Software Foundation; either version 3, or (at your option) any later
     11  1.1  mrg version.
     12  1.1  mrg 
     13  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     14  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     15  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     16  1.1  mrg for more details.
     17  1.1  mrg 
     18  1.1  mrg You should have received a copy of the GNU General Public License
     19  1.1  mrg along with GCC; see the file COPYING3.  If not see
     20  1.1  mrg <http://www.gnu.org/licenses/>.  */
     21  1.1  mrg 
     22  1.1  mrg #include "config.h"
     23  1.1  mrg #include "system.h"
     24  1.1  mrg #include "coretypes.h"
     25  1.1  mrg #include "backend.h"
     26  1.1  mrg #include "target.h"
     27  1.1  mrg #include "tree.h"
     28  1.1  mrg #include "langhooks.h"
     29  1.1  mrg #include "gimple.h"
     30  1.1  mrg #include "tree-pass.h"
     31  1.1  mrg #include "cgraph.h"
     32  1.1  mrg #include "fold-const.h"
     33  1.1  mrg #include "gimplify.h"
     34  1.1  mrg #include "gimple-iterator.h"
     35  1.1  mrg #include "gimple-walk.h"
     36  1.1  mrg #include "gomp-constants.h"
     37  1.1  mrg #include "omp-general.h"
     38  1.1  mrg #include "diagnostic-core.h"
     39  1.1  mrg 
     40  1.1  mrg 
     41  1.1  mrg /* This preprocessing pass is run immediately before lower_omp.  It decomposes
     42  1.1  mrg    OpenACC 'kernels' constructs into parts, a sequence of compute constructs.
     43  1.1  mrg 
     44  1.1  mrg    The translation is as follows:
     45  1.1  mrg      - The entire 'kernels' region is turned into a 'data' region with clauses
     46  1.1  mrg        taken from the 'kernels' region.  New 'create' clauses are added for all
     47  1.1  mrg        variables declared at the top level in the kernels region.
     48  1.1  mrg      - Any loop nests annotated with an OpenACC 'loop' directive are wrapped in
     49  1.1  mrg        a new compute construct.
     50  1.1  mrg 	 - 'loop' directives without an explicit 'independent' or 'seq' clause
     51  1.1  mrg 	   get an 'auto' clause added; other clauses are preserved on the loop
     52  1.1  mrg 	   or moved to the new surrounding compute construct, as applicable.
     53  1.1  mrg      - Any sequences of other code (non-loops, non-OpenACC 'loop's) are wrapped
     54  1.1  mrg        in new "gang-single" compute construct: 'worker'/'vector' parallelism is
     55  1.1  mrg        preserved, but 'num_gangs (1)' is enforced.
     56  1.1  mrg      - Both points above only apply at the topmost level in the region, that
     57  1.1  mrg        is, the transformation does not introduce new compute constructs inside
     58  1.1  mrg        nested statement bodies.  In particular, this means that a
     59  1.1  mrg        gang-parallelizable loop inside an 'if' statement is made "gang-single".
     60  1.1  mrg      - In order to make the host wait only once for the whole region instead
     61  1.1  mrg        of once per device kernel launch, the new compute constructs are
     62  1.1  mrg        annotated 'async'.  Unless the original 'kernels' construct already was
     63  1.1  mrg        marked 'async', the entire region ends with a 'wait' directive.  If the
     64  1.1  mrg        original 'kernels' construct was marked 'async', the synthesized 'async'
     65  1.1  mrg        clauses use the original 'kernels' construct's 'async' argument
     66  1.1  mrg        (possibly implicit).
     67  1.1  mrg */
     68  1.1  mrg 
     69  1.1  mrg 
     70  1.1  mrg /*TODO Things are conceptually wrong here: 'loop' clauses may be hidden behind
     71  1.1  mrg   'device_type', so we have to defer a lot of processing until we're in the
     72  1.1  mrg   offloading compilation.  "Fortunately", GCC doesn't support the OpenACC
     73  1.1  mrg   'device_type' clause yet, so we get away that.  */
     74  1.1  mrg 
     75  1.1  mrg 
     76  1.1  mrg /* Helper function for decompose_kernels_region_body.  If STMT contains a
     77  1.1  mrg    "top-level" OMP_FOR statement, returns a pointer to that statement;
     78  1.1  mrg    returns NULL otherwise.
     79  1.1  mrg 
     80  1.1  mrg    A "top-level" OMP_FOR statement is one that is possibly accompanied by
     81  1.1  mrg    small snippets of setup code.  Specifically, this function accepts an
     82  1.1  mrg    OMP_FOR possibly wrapped in a singleton bind and a singleton try
     83  1.1  mrg    statement to allow for a local loop variable, but not an OMP_FOR
     84  1.1  mrg    statement nested in any other constructs.  Alternatively, it accepts a
     85  1.1  mrg    non-singleton bind containing only assignments and then an OMP_FOR
     86  1.1  mrg    statement at the very end.  The former style can be generated by the C
     87  1.1  mrg    frontend, the latter by the Fortran frontend.  */
     88  1.1  mrg 
     89  1.1  mrg static gimple *
     90  1.1  mrg top_level_omp_for_in_stmt (gimple *stmt)
     91  1.1  mrg {
     92  1.1  mrg   if (gimple_code (stmt) == GIMPLE_OMP_FOR)
     93  1.1  mrg     return stmt;
     94  1.1  mrg 
     95  1.1  mrg   if (gimple_code (stmt) == GIMPLE_BIND)
     96  1.1  mrg     {
     97  1.1  mrg       gimple_seq body = gimple_bind_body (as_a <gbind *> (stmt));
     98  1.1  mrg       if (gimple_seq_singleton_p (body))
     99  1.1  mrg 	{
    100  1.1  mrg 	  /* Accept an OMP_FOR statement, or a try statement containing only
    101  1.1  mrg 	     a single OMP_FOR.  */
    102  1.1  mrg 	  gimple *maybe_for_or_try = gimple_seq_first_stmt (body);
    103  1.1  mrg 	  if (gimple_code (maybe_for_or_try) == GIMPLE_OMP_FOR)
    104  1.1  mrg 	    return maybe_for_or_try;
    105  1.1  mrg 	  else if (gimple_code (maybe_for_or_try) == GIMPLE_TRY)
    106  1.1  mrg 	    {
    107  1.1  mrg 	      gimple_seq try_body = gimple_try_eval (maybe_for_or_try);
    108  1.1  mrg 	      if (!gimple_seq_singleton_p (try_body))
    109  1.1  mrg 		return NULL;
    110  1.1  mrg 	      gimple *maybe_omp_for_stmt = gimple_seq_first_stmt (try_body);
    111  1.1  mrg 	      if (gimple_code (maybe_omp_for_stmt) == GIMPLE_OMP_FOR)
    112  1.1  mrg 		return maybe_omp_for_stmt;
    113  1.1  mrg 	    }
    114  1.1  mrg 	}
    115  1.1  mrg       else
    116  1.1  mrg 	{
    117  1.1  mrg 	  gimple_stmt_iterator gsi;
    118  1.1  mrg 	  /* Accept only a block of optional assignments followed by an
    119  1.1  mrg 	     OMP_FOR at the end.  No other kinds of statements allowed.  */
    120  1.1  mrg 	  for (gsi = gsi_start (body); !gsi_end_p (gsi); gsi_next (&gsi))
    121  1.1  mrg 	    {
    122  1.1  mrg 	      gimple *body_stmt = gsi_stmt (gsi);
    123  1.1  mrg 	      if (gimple_code (body_stmt) == GIMPLE_ASSIGN)
    124  1.1  mrg 		continue;
    125  1.1  mrg 	      else if (gimple_code (body_stmt) == GIMPLE_OMP_FOR
    126  1.1  mrg 		       && gsi_one_before_end_p (gsi))
    127  1.1  mrg 		return body_stmt;
    128  1.1  mrg 	      else
    129  1.1  mrg 		return NULL;
    130  1.1  mrg 	    }
    131  1.1  mrg 	}
    132  1.1  mrg     }
    133  1.1  mrg 
    134  1.1  mrg   return NULL;
    135  1.1  mrg }
    136  1.1  mrg 
    137  1.1  mrg /* Helper for adjust_region_code: evaluate the statement at GSI_P.  */
    138  1.1  mrg 
    139  1.1  mrg static tree
    140  1.1  mrg adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p,
    141  1.1  mrg 				 bool *handled_ops_p,
    142  1.1  mrg 				 struct walk_stmt_info *wi)
    143  1.1  mrg {
    144  1.1  mrg   int *region_code = (int *) wi->info;
    145  1.1  mrg 
    146  1.1  mrg   gimple *stmt = gsi_stmt (*gsi_p);
    147  1.1  mrg   switch (gimple_code (stmt))
    148  1.1  mrg     {
    149  1.1  mrg     case GIMPLE_OMP_FOR:
    150  1.1  mrg       {
    151  1.1  mrg 	tree clauses = gimple_omp_for_clauses (stmt);
    152  1.1  mrg 	if (omp_find_clause (clauses, OMP_CLAUSE_INDEPENDENT))
    153  1.1  mrg 	  {
    154  1.1  mrg 	    /* Explicit 'independent' clause.  */
    155  1.1  mrg 	    /* Keep going; recurse into loop body.  */
    156  1.1  mrg 	    break;
    157  1.1  mrg 	  }
    158  1.1  mrg 	else if (omp_find_clause (clauses, OMP_CLAUSE_SEQ))
    159  1.1  mrg 	  {
    160  1.1  mrg 	    /* Explicit 'seq' clause.  */
    161  1.1  mrg 	    /* We'll "parallelize" if at some level a loop construct has been
    162  1.1  mrg 	       marked up by the user as unparallelizable ('seq' clause; we'll
    163  1.1  mrg 	       respect that in the later processing).  Given that the user has
    164  1.1  mrg 	       explicitly marked it up, this loop construct cannot be
    165  1.1  mrg 	       performance-critical, and in this case it's also fine to
    166  1.1  mrg 	       "parallelize" instead of "gang-single", because any outer or
    167  1.1  mrg 	       inner loops may still exploit the available parallelism.  */
    168  1.1  mrg 	    /* Keep going; recurse into loop body.  */
    169  1.1  mrg 	    break;
    170  1.1  mrg 	  }
    171  1.1  mrg 	else
    172  1.1  mrg 	  {
    173  1.1  mrg 	    /* Explicit or implicit 'auto' clause.  */
    174  1.1  mrg 	    /* The user would like this loop analyzed ('auto' clause) and
    175  1.1  mrg 	       typically parallelized, but we don't have available yet the
    176  1.1  mrg 	       compiler logic to analyze this, so can't parallelize it here, so
    177  1.1  mrg 	       we'd very likely be running into a performance problem if we
    178  1.1  mrg 	       were to execute this unparallelized, thus forward the whole loop
    179  1.1  mrg 	       nest to 'parloops'.  */
    180  1.1  mrg 	    *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
    181  1.1  mrg 	    /* Terminate: final decision for this region.  */
    182  1.1  mrg 	    *handled_ops_p = true;
    183  1.1  mrg 	    return integer_zero_node;
    184  1.1  mrg 	  }
    185  1.1  mrg 	gcc_unreachable ();
    186  1.1  mrg       }
    187  1.1  mrg 
    188  1.1  mrg     case GIMPLE_COND:
    189  1.1  mrg     case GIMPLE_GOTO:
    190  1.1  mrg     case GIMPLE_SWITCH:
    191  1.1  mrg     case GIMPLE_ASM:
    192  1.1  mrg     case GIMPLE_TRANSACTION:
    193  1.1  mrg     case GIMPLE_RETURN:
    194  1.1  mrg       /* Statement that might constitute some looping/control flow pattern.  */
    195  1.1  mrg       /* The user would like this code analyzed (implicit inside a 'kernels'
    196  1.1  mrg 	 region) and typically parallelized, but we don't have available yet
    197  1.1  mrg 	 the compiler logic to analyze this, so can't parallelize it here, so
    198  1.1  mrg 	 we'd very likely be running into a performance problem if we were to
    199  1.1  mrg 	 execute this unparallelized, thus forward the whole thing to
    200  1.1  mrg 	 'parloops'.  */
    201  1.1  mrg       *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
    202  1.1  mrg       /* Terminate: final decision for this region.  */
    203  1.1  mrg       *handled_ops_p = true;
    204  1.1  mrg       return integer_zero_node;
    205  1.1  mrg 
    206  1.1  mrg     default:
    207  1.1  mrg       /* Keep going.  */
    208  1.1  mrg       break;
    209  1.1  mrg     }
    210  1.1  mrg 
    211  1.1  mrg   return NULL;
    212  1.1  mrg }
    213  1.1  mrg 
    214  1.1  mrg /* Adjust the REGION_CODE for the region in GS.  */
    215  1.1  mrg 
    216  1.1  mrg static void
    217  1.1  mrg adjust_region_code (gimple_seq gs, int *region_code)
    218  1.1  mrg {
    219  1.1  mrg   struct walk_stmt_info wi;
    220  1.1  mrg   memset (&wi, 0, sizeof (wi));
    221  1.1  mrg   wi.info = region_code;
    222  1.1  mrg   walk_gimple_seq (gs, adjust_region_code_walk_stmt_fn, NULL, &wi);
    223  1.1  mrg }
    224  1.1  mrg 
    225  1.1  mrg /* Helper function for make_loops_gang_single for walking the tree.  If the
    226  1.1  mrg    statement indicated by GSI_P is an OpenACC for loop with a gang clause,
    227  1.1  mrg    issue a warning and remove the clause.  */
    228  1.1  mrg 
    229  1.1  mrg static tree
    230  1.1  mrg visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p,
    231  1.1  mrg 				   bool *handled_ops_p,
    232  1.1  mrg 				   struct walk_stmt_info *)
    233  1.1  mrg {
    234  1.1  mrg   *handled_ops_p = false;
    235  1.1  mrg 
    236  1.1  mrg   gimple *stmt = gsi_stmt (*gsi_p);
    237  1.1  mrg   switch (gimple_code (stmt))
    238  1.1  mrg     {
    239  1.1  mrg     case GIMPLE_OMP_FOR:
    240  1.1  mrg       /*TODO Given the current 'adjust_region_code' algorithm, this is
    241  1.1  mrg 	actually...  */
    242  1.1  mrg #if 0
    243  1.1  mrg       gcc_unreachable ();
    244  1.1  mrg #else
    245  1.1  mrg       /* ..., but due to bugs (PR100400), we may actually come here.
    246  1.1  mrg 	 Reliably catch this, regardless of checking level.  */
    247  1.1  mrg       internal_error ("PR100400");
    248  1.1  mrg #endif
    249  1.1  mrg 
    250  1.1  mrg       {
    251  1.1  mrg 	tree clauses = gimple_omp_for_clauses (stmt);
    252  1.1  mrg 	tree prev_clause = NULL;
    253  1.1  mrg 	for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause))
    254  1.1  mrg 	  {
    255  1.1  mrg 	    if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG)
    256  1.1  mrg 	      {
    257  1.1  mrg 		/* It makes no sense to have a 'gang' clause in a "gang-single"
    258  1.1  mrg 		   region, so warn and remove it.  */
    259  1.1  mrg 		warning_at (gimple_location (stmt), 0,
    260  1.1  mrg 			    "conditionally executed loop in %<kernels%> region"
    261  1.1  mrg 			    " will be executed by a single gang;"
    262  1.1  mrg 			    " ignoring %<gang%> clause");
    263  1.1  mrg 		if (prev_clause != NULL)
    264  1.1  mrg 		  OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause);
    265  1.1  mrg 		else
    266  1.1  mrg 		  clauses = OMP_CLAUSE_CHAIN (clause);
    267  1.1  mrg 
    268  1.1  mrg 		break;
    269  1.1  mrg 	      }
    270  1.1  mrg 	    prev_clause = clause;
    271  1.1  mrg 	  }
    272  1.1  mrg 	gimple_omp_for_set_clauses (stmt, clauses);
    273  1.1  mrg       }
    274  1.1  mrg       /* No need to recurse into nested statements; no loop nested inside
    275  1.1  mrg 	 this loop can be gang-partitioned.  */
    276  1.1  mrg       sorry ("%<gang%> loop in %<gang-single%> region");
    277  1.1  mrg       *handled_ops_p = true;
    278  1.1  mrg       break;
    279  1.1  mrg 
    280  1.1  mrg     default:
    281  1.1  mrg       break;
    282  1.1  mrg     }
    283  1.1  mrg 
    284  1.1  mrg   return NULL;
    285  1.1  mrg }
    286  1.1  mrg 
    287  1.1  mrg /* Visit all nested OpenACC loops in the sequence indicated by GS.  This
    288  1.1  mrg    statement is expected to be inside a gang-single region.  Issue a warning
    289  1.1  mrg    for any loops inside it that have gang clauses and remove the clauses.  */
    290  1.1  mrg 
    291  1.1  mrg static void
    292  1.1  mrg make_loops_gang_single (gimple_seq gs)
    293  1.1  mrg {
    294  1.1  mrg   struct walk_stmt_info wi;
    295  1.1  mrg   memset (&wi, 0, sizeof (wi));
    296  1.1  mrg   walk_gimple_seq (gs, visit_loops_in_gang_single_region, NULL, &wi);
    297  1.1  mrg }
    298  1.1  mrg 
    299  1.1  mrg /* Construct a "gang-single" compute construct at LOC containing the STMTS.
    300  1.1  mrg    Annotate with CLAUSES, which must not contain a 'num_gangs' clause, and an
    301  1.1  mrg    additional 'num_gangs (1)' clause to force "gang-single" execution.  */
    302  1.1  mrg 
    303  1.1  mrg static gimple *
    304  1.1  mrg make_region_seq (location_t loc, gimple_seq stmts,
    305  1.1  mrg 		 tree num_gangs_clause,
    306  1.1  mrg 		 tree num_workers_clause,
    307  1.1  mrg 		 tree vector_length_clause,
    308  1.1  mrg 		 tree clauses)
    309  1.1  mrg {
    310  1.1  mrg   /* This correctly unshares the entire clause chain rooted here.  */
    311  1.1  mrg   clauses = unshare_expr (clauses);
    312  1.1  mrg 
    313  1.1  mrg   dump_user_location_t loc_stmts_first = gimple_seq_first (stmts);
    314  1.1  mrg 
    315  1.1  mrg   /* Figure out the region code for this region.  */
    316  1.1  mrg   /* Optimistic default: assume "setup code", no looping; thus not
    317  1.1  mrg      performance-critical.  */
    318  1.1  mrg   int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE;
    319  1.1  mrg   adjust_region_code (stmts, &region_code);
    320  1.1  mrg 
    321  1.1  mrg   if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)
    322  1.1  mrg     {
    323  1.1  mrg       if (dump_enabled_p ())
    324  1.1  mrg 	/*TODO MSG_MISSED_OPTIMIZATION? */
    325  1.1  mrg 	dump_printf_loc (MSG_NOTE, loc_stmts_first,
    326  1.1  mrg 			 "beginning %<gang-single%> part"
    327  1.1  mrg 			 " in OpenACC %<kernels%> region\n");
    328  1.1  mrg 
    329  1.1  mrg       /* Synthesize a 'num_gangs (1)' clause.  */
    330  1.1  mrg       tree gang_single_clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
    331  1.1  mrg       OMP_CLAUSE_OPERAND (gang_single_clause, 0) = integer_one_node;
    332  1.1  mrg       OMP_CLAUSE_CHAIN (gang_single_clause) = clauses;
    333  1.1  mrg       clauses = gang_single_clause;
    334  1.1  mrg 
    335  1.1  mrg       /* Remove and issue warnings about gang clauses on any OpenACC
    336  1.1  mrg 	 loops nested inside this sequentially executed statement.  */
    337  1.1  mrg       make_loops_gang_single (stmts);
    338  1.1  mrg     }
    339  1.1  mrg   else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
    340  1.1  mrg     {
    341  1.1  mrg       if (dump_enabled_p ())
    342  1.1  mrg 	dump_printf_loc (MSG_NOTE, loc_stmts_first,
    343  1.1  mrg 			 "beginning %<parloops%> part"
    344  1.1  mrg 			 " in OpenACC %<kernels%> region\n");
    345  1.1  mrg 
    346  1.1  mrg       /* As we're transforming a 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
    347  1.1  mrg 	 'GF_OMP_TARGET_KIND_OACC_KERNELS', this isn't doing any of the clauses
    348  1.1  mrg 	 mangling that 'make_region_loop_nest' is doing.  */
    349  1.1  mrg       /* Re-assemble the clauses stripped off earlier.  */
    350  1.1  mrg       if (num_gangs_clause != NULL)
    351  1.1  mrg 	{
    352  1.1  mrg 	  tree c = unshare_expr (num_gangs_clause);
    353  1.1  mrg 	  OMP_CLAUSE_CHAIN (c) = clauses;
    354  1.1  mrg 	  clauses = c;
    355  1.1  mrg 	}
    356  1.1  mrg       if (num_workers_clause != NULL)
    357  1.1  mrg 	{
    358  1.1  mrg 	  tree c = unshare_expr (num_workers_clause);
    359  1.1  mrg 	  OMP_CLAUSE_CHAIN (c) = clauses;
    360  1.1  mrg 	  clauses = c;
    361  1.1  mrg 	}
    362  1.1  mrg       if (vector_length_clause != NULL)
    363  1.1  mrg 	{
    364  1.1  mrg 	  tree c = unshare_expr (vector_length_clause);
    365  1.1  mrg 	  OMP_CLAUSE_CHAIN (c) = clauses;
    366  1.1  mrg 	  clauses = c;
    367  1.1  mrg 	}
    368  1.1  mrg     }
    369  1.1  mrg   else
    370  1.1  mrg     gcc_unreachable ();
    371  1.1  mrg 
    372  1.1  mrg   /* Build the gang-single region.  */
    373  1.1  mrg   gimple *single_region = gimple_build_omp_target (NULL, region_code, clauses);
    374  1.1  mrg   gimple_set_location (single_region, loc);
    375  1.1  mrg   gbind *single_body = gimple_build_bind (NULL, stmts, make_node (BLOCK));
    376  1.1  mrg   gimple_omp_set_body (single_region, single_body);
    377  1.1  mrg 
    378  1.1  mrg   return single_region;
    379  1.1  mrg }
    380  1.1  mrg 
    381  1.1  mrg /* Helper function for make_region_loop_nest.  Adds a 'num_gangs'
    382  1.1  mrg    ('num_workers', 'vector_length') clause to the given CLAUSES, either the one
    383  1.1  mrg    from the parent compute construct (PARENT_CLAUSE) or a new one based on the
    384  1.1  mrg    loop's own LOOP_CLAUSE ('gang (num: N)' or similar for 'worker' or 'vector'
    385  1.1  mrg    clauses) with the given CLAUSE_CODE.  Does nothing if neither PARENT_CLAUSE
    386  1.1  mrg    nor LOOP_CLAUSE exist.  Returns the new clauses.  */
    387  1.1  mrg 
    388  1.1  mrg static tree
    389  1.1  mrg add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause,
    390  1.1  mrg 			       omp_clause_code clause_code, tree clauses)
    391  1.1  mrg {
    392  1.1  mrg   if (parent_clause != NULL)
    393  1.1  mrg     {
    394  1.1  mrg       tree num_clause = unshare_expr (parent_clause);
    395  1.1  mrg       OMP_CLAUSE_CHAIN (num_clause) = clauses;
    396  1.1  mrg       clauses = num_clause;
    397  1.1  mrg     }
    398  1.1  mrg   else if (loop_clause != NULL)
    399  1.1  mrg     {
    400  1.1  mrg       /* The kernels region does not have a 'num_gangs' clause, but the loop
    401  1.1  mrg 	 itself had a 'gang (num: N)' clause.  Honor it by adding a
    402  1.1  mrg 	 'num_gangs (N)' clause on the compute construct.  */
    403  1.1  mrg       tree num = OMP_CLAUSE_OPERAND (loop_clause, 0);
    404  1.1  mrg       tree new_num_clause
    405  1.1  mrg 	= build_omp_clause (OMP_CLAUSE_LOCATION (loop_clause), clause_code);
    406  1.1  mrg       OMP_CLAUSE_OPERAND (new_num_clause, 0) = num;
    407  1.1  mrg       OMP_CLAUSE_CHAIN (new_num_clause) = clauses;
    408  1.1  mrg       clauses = new_num_clause;
    409  1.1  mrg     }
    410  1.1  mrg   return clauses;
    411  1.1  mrg }
    412  1.1  mrg 
    413  1.1  mrg /* Helper for make_region_loop_nest, looking for 'worker (num: N)' or 'vector
    414  1.1  mrg    (length: N)' clauses in nested loops.  Removes the argument, transferring it
    415  1.1  mrg    to the enclosing compute construct (via WI->INFO).  If arguments within the
    416  1.1  mrg    same loop nest conflict, emits a warning.
    417  1.1  mrg 
    418  1.1  mrg    This function also decides whether to add an 'auto' clause on each of these
    419  1.1  mrg    nested loops.  */
    420  1.1  mrg 
    421  1.1  mrg struct adjust_nested_loop_clauses_wi_info
    422  1.1  mrg {
    423  1.1  mrg   tree *loop_gang_clause_ptr;
    424  1.1  mrg   tree *loop_worker_clause_ptr;
    425  1.1  mrg   tree *loop_vector_clause_ptr;
    426  1.1  mrg };
    427  1.1  mrg 
    428  1.1  mrg static tree
    429  1.1  mrg adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *,
    430  1.1  mrg 			    struct walk_stmt_info *wi)
    431  1.1  mrg {
    432  1.1  mrg   struct adjust_nested_loop_clauses_wi_info *wi_info
    433  1.1  mrg     = (struct adjust_nested_loop_clauses_wi_info *) wi->info;
    434  1.1  mrg   gimple *stmt = gsi_stmt (*gsi_p);
    435  1.1  mrg 
    436  1.1  mrg   if (gimple_code (stmt) == GIMPLE_OMP_FOR)
    437  1.1  mrg     {
    438  1.1  mrg       bool add_auto_clause = true;
    439  1.1  mrg       tree loop_clauses = gimple_omp_for_clauses (stmt);
    440  1.1  mrg       tree loop_clause = loop_clauses;
    441  1.1  mrg       for (; loop_clause; loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
    442  1.1  mrg 	{
    443  1.1  mrg 	  tree *outer_clause_ptr = NULL;
    444  1.1  mrg 	  switch (OMP_CLAUSE_CODE (loop_clause))
    445  1.1  mrg 	    {
    446  1.1  mrg 	    case OMP_CLAUSE_GANG:
    447  1.1  mrg 	      outer_clause_ptr = wi_info->loop_gang_clause_ptr;
    448  1.1  mrg 	      break;
    449  1.1  mrg 	    case OMP_CLAUSE_WORKER:
    450  1.1  mrg 	      outer_clause_ptr = wi_info->loop_worker_clause_ptr;
    451  1.1  mrg 	      break;
    452  1.1  mrg 	    case OMP_CLAUSE_VECTOR:
    453  1.1  mrg 	      outer_clause_ptr = wi_info->loop_vector_clause_ptr;
    454  1.1  mrg 	      break;
    455  1.1  mrg 	    case OMP_CLAUSE_SEQ:
    456  1.1  mrg 	    case OMP_CLAUSE_INDEPENDENT:
    457  1.1  mrg 	    case OMP_CLAUSE_AUTO:
    458  1.1  mrg 	      add_auto_clause = false;
    459  1.1  mrg 	    default:
    460  1.1  mrg 	      break;
    461  1.1  mrg 	    }
    462  1.1  mrg 	  if (outer_clause_ptr != NULL)
    463  1.1  mrg 	    {
    464  1.1  mrg 	      if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL
    465  1.1  mrg 		  && *outer_clause_ptr == NULL)
    466  1.1  mrg 		{
    467  1.1  mrg 		  /* Transfer the clause to the enclosing compute construct and
    468  1.1  mrg 		     remove the numerical argument from the 'loop'.  */
    469  1.1  mrg 		  *outer_clause_ptr = unshare_expr (loop_clause);
    470  1.1  mrg 		  OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
    471  1.1  mrg 		}
    472  1.1  mrg 	      else if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL &&
    473  1.1  mrg 		       OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0) != NULL)
    474  1.1  mrg 		{
    475  1.1  mrg 		  /* See if both of these are the same constant.  If they
    476  1.1  mrg 		     aren't, emit a warning.  */
    477  1.1  mrg 		  tree old_op = OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0);
    478  1.1  mrg 		  tree new_op = OMP_CLAUSE_OPERAND (loop_clause, 0);
    479  1.1  mrg 		  if (!(cst_and_fits_in_hwi (old_op) &&
    480  1.1  mrg 			cst_and_fits_in_hwi (new_op) &&
    481  1.1  mrg 			int_cst_value (old_op) == int_cst_value (new_op)))
    482  1.1  mrg 		    {
    483  1.1  mrg 		      const char *clause_name
    484  1.1  mrg 			= omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
    485  1.1  mrg 		      error_at (gimple_location (stmt),
    486  1.1  mrg 				"cannot honor conflicting %qs clause",
    487  1.1  mrg 				clause_name);
    488  1.1  mrg 		      inform (OMP_CLAUSE_LOCATION (*outer_clause_ptr),
    489  1.1  mrg 			      "location of the previous clause"
    490  1.1  mrg 			      " in the same loop nest");
    491  1.1  mrg 		    }
    492  1.1  mrg 		  OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
    493  1.1  mrg 		}
    494  1.1  mrg 	    }
    495  1.1  mrg 	}
    496  1.1  mrg       if (add_auto_clause)
    497  1.1  mrg 	{
    498  1.1  mrg 	  tree auto_clause
    499  1.1  mrg 	    = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_AUTO);
    500  1.1  mrg 	  OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
    501  1.1  mrg 	  gimple_omp_for_set_clauses (stmt, auto_clause);
    502  1.1  mrg 	}
    503  1.1  mrg     }
    504  1.1  mrg 
    505  1.1  mrg   return NULL;
    506  1.1  mrg }
    507  1.1  mrg 
    508  1.1  mrg /* Helper for make_region_loop_nest.  Transform OpenACC 'kernels'/'loop'
    509  1.1  mrg    construct clauses into OpenACC 'parallel'/'loop' construct ones.  */
    510  1.1  mrg 
    511  1.1  mrg static tree
    512  1.1  mrg transform_kernels_loop_clauses (gimple *omp_for,
    513  1.1  mrg 				tree num_gangs_clause,
    514  1.1  mrg 				tree num_workers_clause,
    515  1.1  mrg 				tree vector_length_clause,
    516  1.1  mrg 				tree clauses)
    517  1.1  mrg {
    518  1.1  mrg   /* If this loop in a kernels region does not have an explicit 'seq',
    519  1.1  mrg      'independent', or 'auto' clause, we must give it an explicit 'auto'
    520  1.1  mrg      clause.
    521  1.1  mrg      We also check for 'gang (num: N)' clauses.  These must not appear in
    522  1.1  mrg      kernels regions that have their own 'num_gangs' clause.  Otherwise, they
    523  1.1  mrg      must be converted and put on the region; similarly for 'worker' and
    524  1.1  mrg      'vector' clauses.  */
    525  1.1  mrg   bool add_auto_clause = true;
    526  1.1  mrg   tree loop_gang_clause = NULL, loop_worker_clause = NULL,
    527  1.1  mrg        loop_vector_clause = NULL;
    528  1.1  mrg   tree loop_clauses = gimple_omp_for_clauses (omp_for);
    529  1.1  mrg   for (tree loop_clause = loop_clauses;
    530  1.1  mrg        loop_clause;
    531  1.1  mrg        loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
    532  1.1  mrg     {
    533  1.1  mrg       bool found_num_clause = false;
    534  1.1  mrg       tree *clause_ptr, clause_to_check;
    535  1.1  mrg       switch (OMP_CLAUSE_CODE (loop_clause))
    536  1.1  mrg 	{
    537  1.1  mrg 	case OMP_CLAUSE_GANG:
    538  1.1  mrg 	  found_num_clause = true;
    539  1.1  mrg 	  clause_ptr = &loop_gang_clause;
    540  1.1  mrg 	  clause_to_check = num_gangs_clause;
    541  1.1  mrg 	  break;
    542  1.1  mrg 	case OMP_CLAUSE_WORKER:
    543  1.1  mrg 	  found_num_clause = true;
    544  1.1  mrg 	  clause_ptr = &loop_worker_clause;
    545  1.1  mrg 	  clause_to_check = num_workers_clause;
    546  1.1  mrg 	  break;
    547  1.1  mrg 	case OMP_CLAUSE_VECTOR:
    548  1.1  mrg 	  found_num_clause = true;
    549  1.1  mrg 	  clause_ptr = &loop_vector_clause;
    550  1.1  mrg 	  clause_to_check = vector_length_clause;
    551  1.1  mrg 	  break;
    552  1.1  mrg 	case OMP_CLAUSE_INDEPENDENT:
    553  1.1  mrg 	case OMP_CLAUSE_SEQ:
    554  1.1  mrg 	case OMP_CLAUSE_AUTO:
    555  1.1  mrg 	  add_auto_clause = false;
    556  1.1  mrg 	default:
    557  1.1  mrg 	  break;
    558  1.1  mrg 	}
    559  1.1  mrg       if (found_num_clause && OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL)
    560  1.1  mrg 	{
    561  1.1  mrg 	  if (clause_to_check)
    562  1.1  mrg 	    {
    563  1.1  mrg 	      const char *clause_name
    564  1.1  mrg 		= omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
    565  1.1  mrg 	      const char *parent_clause_name
    566  1.1  mrg 		= omp_clause_code_name[OMP_CLAUSE_CODE (clause_to_check)];
    567  1.1  mrg 	      error_at (OMP_CLAUSE_LOCATION (loop_clause),
    568  1.1  mrg 			"argument not permitted on %qs clause"
    569  1.1  mrg 			" in OpenACC %<kernels%> region with a %qs clause",
    570  1.1  mrg 			clause_name, parent_clause_name);
    571  1.1  mrg 	      inform (OMP_CLAUSE_LOCATION (clause_to_check),
    572  1.1  mrg 		      "location of OpenACC %<kernels%>");
    573  1.1  mrg 	    }
    574  1.1  mrg 	  /* Copy the 'gang (N)'/'worker (N)'/'vector (N)' clause to the
    575  1.1  mrg 	     enclosing compute construct.  */
    576  1.1  mrg 	  *clause_ptr = unshare_expr (loop_clause);
    577  1.1  mrg 	  OMP_CLAUSE_CHAIN (*clause_ptr) = NULL;
    578  1.1  mrg 	  /* Leave a 'gang'/'worker'/'vector' clause on the 'loop', but without
    579  1.1  mrg 	     argument.  */
    580  1.1  mrg 	  OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
    581  1.1  mrg 	}
    582  1.1  mrg     }
    583  1.1  mrg   if (add_auto_clause)
    584  1.1  mrg     {
    585  1.1  mrg       tree auto_clause = build_omp_clause (gimple_location (omp_for),
    586  1.1  mrg 					   OMP_CLAUSE_AUTO);
    587  1.1  mrg       OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
    588  1.1  mrg       loop_clauses = auto_clause;
    589  1.1  mrg     }
    590  1.1  mrg   gimple_omp_for_set_clauses (omp_for, loop_clauses);
    591  1.1  mrg   /* We must also recurse into the loop; it might contain nested loops having
    592  1.1  mrg      their own 'worker (num: W)' or 'vector (length: V)' clauses.  Turn these
    593  1.1  mrg      into 'worker'/'vector' clauses on the compute construct.  */
    594  1.1  mrg   struct walk_stmt_info wi;
    595  1.1  mrg   memset (&wi, 0, sizeof (wi));
    596  1.1  mrg   struct adjust_nested_loop_clauses_wi_info wi_info;
    597  1.1  mrg   wi_info.loop_gang_clause_ptr = &loop_gang_clause;
    598  1.1  mrg   wi_info.loop_worker_clause_ptr = &loop_worker_clause;
    599  1.1  mrg   wi_info.loop_vector_clause_ptr = &loop_vector_clause;
    600  1.1  mrg   wi.info = &wi_info;
    601  1.1  mrg   gimple *body = gimple_omp_body (omp_for);
    602  1.1  mrg   walk_gimple_seq (body, adjust_nested_loop_clauses, NULL, &wi);
    603  1.1  mrg   /* Check if there were conflicting numbers of workers or vector length.  */
    604  1.1  mrg   if (loop_gang_clause != NULL &&
    605  1.1  mrg       OMP_CLAUSE_OPERAND (loop_gang_clause, 0) == NULL)
    606  1.1  mrg     loop_gang_clause = NULL;
    607  1.1  mrg   if (loop_worker_clause != NULL &&
    608  1.1  mrg       OMP_CLAUSE_OPERAND (loop_worker_clause, 0) == NULL)
    609  1.1  mrg     loop_worker_clause = NULL;
    610  1.1  mrg   if (loop_vector_clause != NULL &&
    611  1.1  mrg       OMP_CLAUSE_OPERAND (loop_vector_clause, 0) == NULL)
    612  1.1  mrg     vector_length_clause = NULL;
    613  1.1  mrg 
    614  1.1  mrg   /* If the kernels region had 'num_gangs', 'num_worker', 'vector_length'
    615  1.1  mrg      clauses, add these to this new compute construct.  */
    616  1.1  mrg   clauses
    617  1.1  mrg     = add_parent_or_loop_num_clause (num_gangs_clause, loop_gang_clause,
    618  1.1  mrg 				     OMP_CLAUSE_NUM_GANGS, clauses);
    619  1.1  mrg   clauses
    620  1.1  mrg     = add_parent_or_loop_num_clause (num_workers_clause, loop_worker_clause,
    621  1.1  mrg 				     OMP_CLAUSE_NUM_WORKERS, clauses);
    622  1.1  mrg   clauses
    623  1.1  mrg     = add_parent_or_loop_num_clause (vector_length_clause, loop_vector_clause,
    624  1.1  mrg 				     OMP_CLAUSE_VECTOR_LENGTH, clauses);
    625  1.1  mrg 
    626  1.1  mrg   return clauses;
    627  1.1  mrg }
    628  1.1  mrg 
    629  1.1  mrg /* Construct a possibly gang-parallel compute construct containing the STMT,
    630  1.1  mrg    which must be identical to, or a bind containing, the loop OMP_FOR.
    631  1.1  mrg 
    632  1.1  mrg    The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are
    633  1.1  mrg    optional clauses from the original kernels region and must not be contained
    634  1.1  mrg    in the other CLAUSES. The newly created compute construct is annotated with
    635  1.1  mrg    the optional NUM_GANGS_CLAUSE as well as the other CLAUSES.  If there is no
    636  1.1  mrg    NUM_GANGS_CLAUSE but the loop has a 'gang (num: N)' clause, that is
    637  1.1  mrg    converted to a 'num_gangs (N)' clause on the new compute construct, and
    638  1.1  mrg    similarly for 'worker' and 'vector' clauses.
    639  1.1  mrg 
    640  1.1  mrg    The outermost loop gets an 'auto' clause unless there already is an
    641  1.1  mrg    'seq'/'independent'/'auto' clause.  Nested loops inside OMP_FOR are treated
    642  1.1  mrg    similarly by the adjust_nested_loop_clauses function.  */
    643  1.1  mrg 
    644  1.1  mrg static gimple *
    645  1.1  mrg make_region_loop_nest (gimple *omp_for, gimple_seq stmts,
    646  1.1  mrg 		       tree num_gangs_clause,
    647  1.1  mrg 		       tree num_workers_clause,
    648  1.1  mrg 		       tree vector_length_clause,
    649  1.1  mrg 		       tree clauses)
    650  1.1  mrg {
    651  1.1  mrg   /* This correctly unshares the entire clause chain rooted here.  */
    652  1.1  mrg   clauses = unshare_expr (clauses);
    653  1.1  mrg 
    654  1.1  mrg   /* Figure out the region code for this region.  */
    655  1.1  mrg   /* Optimistic default: assume that the loop nest is parallelizable
    656  1.1  mrg      (essentially, no GIMPLE_OMP_FOR with (explicit or implicit) 'auto' clause,
    657  1.1  mrg      and no un-annotated loops).  */
    658  1.1  mrg   int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED;
    659  1.1  mrg   adjust_region_code (stmts, &region_code);
    660  1.1  mrg 
    661  1.1  mrg   if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
    662  1.1  mrg     {
    663  1.1  mrg       if (dump_enabled_p ())
    664  1.1  mrg 	/* This is not MSG_OPTIMIZED_LOCATIONS, as we're just doing what the
    665  1.1  mrg 	   user asked us to.  */
    666  1.1  mrg 	dump_printf_loc (MSG_NOTE, omp_for,
    667  1.1  mrg 			 "parallelized loop nest"
    668  1.1  mrg 			 " in OpenACC %<kernels%> region\n");
    669  1.1  mrg 
    670  1.1  mrg       clauses = transform_kernels_loop_clauses (omp_for,
    671  1.1  mrg 						num_gangs_clause,
    672  1.1  mrg 						num_workers_clause,
    673  1.1  mrg 						vector_length_clause,
    674  1.1  mrg 						clauses);
    675  1.1  mrg     }
    676  1.1  mrg   else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
    677  1.1  mrg     {
    678  1.1  mrg       if (dump_enabled_p ())
    679  1.1  mrg 	dump_printf_loc (MSG_NOTE, omp_for,
    680  1.1  mrg 			 "forwarded loop nest"
    681  1.1  mrg 			 " in OpenACC %<kernels%> region"
    682  1.1  mrg 			 " to %<parloops%> for analysis\n");
    683  1.1  mrg 
    684  1.1  mrg       /* We're transforming one 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
    685  1.1  mrg 	 'GF_OMP_TARGET_KIND_OACC_KERNELS', so don't have to
    686  1.1  mrg 	 'transform_kernels_loop_clauses'.  */
    687  1.1  mrg       /* Re-assemble the clauses stripped off earlier.  */
    688  1.1  mrg       clauses
    689  1.1  mrg 	= add_parent_or_loop_num_clause (num_gangs_clause, NULL,
    690  1.1  mrg 					 OMP_CLAUSE_NUM_GANGS, clauses);
    691  1.1  mrg       clauses
    692  1.1  mrg 	= add_parent_or_loop_num_clause (num_workers_clause, NULL,
    693  1.1  mrg 					 OMP_CLAUSE_NUM_WORKERS, clauses);
    694  1.1  mrg       clauses
    695  1.1  mrg 	= add_parent_or_loop_num_clause (vector_length_clause, NULL,
    696  1.1  mrg 					 OMP_CLAUSE_VECTOR_LENGTH, clauses);
    697  1.1  mrg     }
    698  1.1  mrg   else
    699  1.1  mrg     gcc_unreachable ();
    700  1.1  mrg 
    701  1.1  mrg   gimple *parallel_body_bind
    702  1.1  mrg     = gimple_build_bind (NULL, stmts, make_node (BLOCK));
    703  1.1  mrg   gimple *parallel_region
    704  1.1  mrg     = gimple_build_omp_target (parallel_body_bind, region_code, clauses);
    705  1.1  mrg   gimple_set_location (parallel_region, gimple_location (omp_for));
    706  1.1  mrg 
    707  1.1  mrg   return parallel_region;
    708  1.1  mrg }
    709  1.1  mrg 
    710  1.1  mrg /* Eliminate any binds directly inside BIND by adding their statements to
    711  1.1  mrg    BIND (i.e., modifying it in place), excluding binds that hold only an
    712  1.1  mrg    OMP_FOR loop and associated setup/cleanup code.  Recurse into binds but
    713  1.1  mrg    not other statements.  Return a chain of the local variables of eliminated
    714  1.1  mrg    binds, i.e., the local variables found in nested binds.  If
    715  1.1  mrg    INCLUDE_TOPLEVEL_VARS is true, this also includes the variables belonging
    716  1.1  mrg    to BIND itself. */
    717  1.1  mrg 
    718  1.1  mrg static tree
    719  1.1  mrg flatten_binds (gbind *bind, bool include_toplevel_vars = false)
    720  1.1  mrg {
    721  1.1  mrg   tree vars = NULL, last_var = NULL;
    722  1.1  mrg 
    723  1.1  mrg   if (include_toplevel_vars)
    724  1.1  mrg     {
    725  1.1  mrg       vars = gimple_bind_vars (bind);
    726  1.1  mrg       last_var = vars;
    727  1.1  mrg     }
    728  1.1  mrg 
    729  1.1  mrg   gimple_seq new_body = NULL;
    730  1.1  mrg   gimple_seq body_sequence = gimple_bind_body (bind);
    731  1.1  mrg   gimple_stmt_iterator gsi, gsi_n;
    732  1.1  mrg   for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n)
    733  1.1  mrg     {
    734  1.1  mrg       /* Advance the iterator here because otherwise it would be invalidated
    735  1.1  mrg 	 by moving statements below.  */
    736  1.1  mrg       gsi_n = gsi;
    737  1.1  mrg       gsi_next (&gsi_n);
    738  1.1  mrg 
    739  1.1  mrg       gimple *stmt = gsi_stmt (gsi);
    740  1.1  mrg       /* Flatten bind statements, except the ones that contain only an
    741  1.1  mrg 	 OpenACC for loop.  */
    742  1.1  mrg       if (gimple_code (stmt) == GIMPLE_BIND
    743  1.1  mrg 	  && !top_level_omp_for_in_stmt (stmt))
    744  1.1  mrg 	{
    745  1.1  mrg 	  gbind *inner_bind = as_a <gbind *> (stmt);
    746  1.1  mrg 	  /* Flatten recursively, and collect all variables.  */
    747  1.1  mrg 	  tree inner_vars = flatten_binds (inner_bind, true);
    748  1.1  mrg 	  gimple_seq inner_sequence = gimple_bind_body (inner_bind);
    749  1.1  mrg 	  if (flag_checking)
    750  1.1  mrg 	    {
    751  1.1  mrg 	      for (gimple_stmt_iterator inner_gsi = gsi_start (inner_sequence);
    752  1.1  mrg 		   !gsi_end_p (inner_gsi);
    753  1.1  mrg 		   gsi_next (&inner_gsi))
    754  1.1  mrg 		{
    755  1.1  mrg 		  gimple *inner_stmt = gsi_stmt (inner_gsi);
    756  1.1  mrg 		  gcc_assert (gimple_code (inner_stmt) != GIMPLE_BIND
    757  1.1  mrg 			      || top_level_omp_for_in_stmt (inner_stmt));
    758  1.1  mrg 		}
    759  1.1  mrg 	    }
    760  1.1  mrg 	  gimple_seq_add_seq (&new_body, inner_sequence);
    761  1.1  mrg 	  /* Find the last variable; we will append others to it.  */
    762  1.1  mrg 	  while (last_var != NULL && TREE_CHAIN (last_var) != NULL)
    763  1.1  mrg 	    last_var = TREE_CHAIN (last_var);
    764  1.1  mrg 	  if (last_var != NULL)
    765  1.1  mrg 	    {
    766  1.1  mrg 	      TREE_CHAIN (last_var) = inner_vars;
    767  1.1  mrg 	      last_var = inner_vars;
    768  1.1  mrg 	    }
    769  1.1  mrg 	  else
    770  1.1  mrg 	    {
    771  1.1  mrg 	      vars = inner_vars;
    772  1.1  mrg 	      last_var = vars;
    773  1.1  mrg 	    }
    774  1.1  mrg 	}
    775  1.1  mrg       else
    776  1.1  mrg 	gimple_seq_add_stmt (&new_body, stmt);
    777  1.1  mrg     }
    778  1.1  mrg 
    779  1.1  mrg   /* Put the possibly transformed body back into the bind.  */
    780  1.1  mrg   gimple_bind_set_body (bind, new_body);
    781  1.1  mrg   return vars;
    782  1.1  mrg }
    783  1.1  mrg 
    784  1.1  mrg /* Helper function for places where we construct data regions.  Wraps the BODY
    785  1.1  mrg    inside a try-finally construct at LOC that calls __builtin_GOACC_data_end
    786  1.1  mrg    in its cleanup block.  Returns this try statement.  */
    787  1.1  mrg 
    788  1.1  mrg static gimple *
    789  1.1  mrg make_data_region_try_statement (location_t loc, gimple *body)
    790  1.1  mrg {
    791  1.1  mrg   tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END);
    792  1.1  mrg   gimple *call = gimple_build_call (data_end_fn, 0);
    793  1.1  mrg   gimple_seq cleanup = NULL;
    794  1.1  mrg   gimple_seq_add_stmt (&cleanup, call);
    795  1.1  mrg   gimple *try_stmt = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY);
    796  1.1  mrg   gimple_set_location (body, loc);
    797  1.1  mrg   return try_stmt;
    798  1.1  mrg }
    799  1.1  mrg 
    800  1.1  mrg /* If INNER_BIND_VARS holds variables, build an OpenACC data region with
    801  1.1  mrg    location LOC containing BODY and having 'create (var)' clauses for each
    802  1.1  mrg    variable (as a side effect, such variables also get TREE_ADDRESSABLE set).
    803  1.1  mrg    If INNER_CLEANUP is present, add a try-finally statement with
    804  1.1  mrg    this cleanup code in the finally block.  Return the new data region, or
    805  1.1  mrg    the original BODY if no data region was needed.  */
    806  1.1  mrg 
    807  1.1  mrg static gimple *
    808  1.1  mrg maybe_build_inner_data_region (location_t loc, gimple *body,
    809  1.1  mrg 			       tree inner_bind_vars, gimple *inner_cleanup)
    810  1.1  mrg {
    811  1.1  mrg   /* Is this an instantiation of a template?  (In this case, we don't care what
    812  1.1  mrg      the generic decl is - just whether the function decl has one.)  */
    813  1.1  mrg   bool generic_inst_p
    814  1.1  mrg     = (lang_hooks.decls.get_generic_function_decl (current_function_decl)
    815  1.1  mrg        != NULL);
    816  1.1  mrg 
    817  1.1  mrg   /* Build data 'create (var)' clauses for these local variables.
    818  1.1  mrg      Below we will add these to a data region enclosing the entire body
    819  1.1  mrg      of the decomposed kernels region.  */
    820  1.1  mrg   tree prev_mapped_var = NULL, next = NULL, artificial_vars = NULL,
    821  1.1  mrg        inner_data_clauses = NULL;
    822  1.1  mrg   for (tree v = inner_bind_vars; v; v = next)
    823  1.1  mrg     {
    824  1.1  mrg       next = TREE_CHAIN (v);
    825  1.1  mrg       if (DECL_ARTIFICIAL (v)
    826  1.1  mrg 	  || TREE_CODE (v) == CONST_DECL
    827  1.1  mrg 	  || generic_inst_p)
    828  1.1  mrg 	{
    829  1.1  mrg 	  /* If this is an artificial temporary, it need not be mapped.  We
    830  1.1  mrg 	     move its declaration into the bind inside the data region.
    831  1.1  mrg 	     Also avoid mapping variables if we are inside a template
    832  1.1  mrg 	     instantiation; the code does not contain all the copies to
    833  1.1  mrg 	     temporaries that would make this legal.  */
    834  1.1  mrg 	  TREE_CHAIN (v) = artificial_vars;
    835  1.1  mrg 	  artificial_vars = v;
    836  1.1  mrg 	  if (prev_mapped_var != NULL)
    837  1.1  mrg 	    TREE_CHAIN (prev_mapped_var) = next;
    838  1.1  mrg 	  else
    839  1.1  mrg 	    inner_bind_vars = next;
    840  1.1  mrg 	}
    841  1.1  mrg       else
    842  1.1  mrg 	{
    843  1.1  mrg 	  /* Otherwise, build the map clause.  */
    844  1.1  mrg 	  tree new_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
    845  1.1  mrg 	  OMP_CLAUSE_SET_MAP_KIND (new_clause, GOMP_MAP_ALLOC);
    846  1.1  mrg 	  OMP_CLAUSE_DECL (new_clause) = v;
    847  1.1  mrg 	  OMP_CLAUSE_SIZE (new_clause) = DECL_SIZE_UNIT (v);
    848  1.1  mrg 	  OMP_CLAUSE_CHAIN (new_clause) = inner_data_clauses;
    849  1.1  mrg 	  inner_data_clauses = new_clause;
    850  1.1  mrg 
    851  1.1  mrg 	  prev_mapped_var = v;
    852  1.1  mrg 
    853  1.1  mrg 	  /* See <https://gcc.gnu.org/PR100280>.  */
    854  1.1  mrg 	  if (!TREE_ADDRESSABLE (v))
    855  1.1  mrg 	    {
    856  1.1  mrg 	      /* Request that OMP lowering make 'v' addressable.  */
    857  1.1  mrg 	      OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
    858  1.1  mrg 
    859  1.1  mrg 	      if (dump_enabled_p ())
    860  1.1  mrg 		{
    861  1.1  mrg 		  const dump_user_location_t d_u_loc
    862  1.1  mrg 		    = dump_user_location_t::from_location_t (loc);
    863  1.1  mrg 		  /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
    864  1.1  mrg #if __GNUC__ >= 10
    865  1.1  mrg # pragma GCC diagnostic push
    866  1.1  mrg # pragma GCC diagnostic ignored "-Wformat"
    867  1.1  mrg #endif
    868  1.1  mrg 		  dump_printf_loc (MSG_NOTE, d_u_loc,
    869  1.1  mrg 				   "OpenACC %<kernels%> decomposition:"
    870  1.1  mrg 				   " variable %<%T%> declared in block"
    871  1.1  mrg 				   " requested to be made addressable\n",
    872  1.1  mrg 				   v);
    873  1.1  mrg #if __GNUC__ >= 10
    874  1.1  mrg # pragma GCC diagnostic pop
    875  1.1  mrg #endif
    876  1.1  mrg 		}
    877  1.1  mrg 	    }
    878  1.1  mrg 	}
    879  1.1  mrg     }
    880  1.1  mrg 
    881  1.1  mrg   if (artificial_vars)
    882  1.1  mrg     body = gimple_build_bind (artificial_vars, body, make_node (BLOCK));
    883  1.1  mrg 
    884  1.1  mrg   /* If we determined above that there are variables that need to be created
    885  1.1  mrg      on the device, construct a data region for them and wrap the body
    886  1.1  mrg      inside that.  */
    887  1.1  mrg   if (inner_data_clauses != NULL)
    888  1.1  mrg     {
    889  1.1  mrg       gcc_assert (inner_bind_vars != NULL);
    890  1.1  mrg       gimple *inner_data_region
    891  1.1  mrg 	= gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
    892  1.1  mrg 				   inner_data_clauses);
    893  1.1  mrg       gimple_set_location (inner_data_region, loc);
    894  1.1  mrg       /* Make sure __builtin_GOACC_data_end is called at the end.  */
    895  1.1  mrg       gimple *try_stmt = make_data_region_try_statement (loc, body);
    896  1.1  mrg       gimple_omp_set_body (inner_data_region, try_stmt);
    897  1.1  mrg       gimple *bind_body;
    898  1.1  mrg       if (inner_cleanup != NULL)
    899  1.1  mrg 	/* Clobber all the inner variables that need to be clobbered.  */
    900  1.1  mrg 	bind_body = gimple_build_try (inner_data_region, inner_cleanup,
    901  1.1  mrg 				      GIMPLE_TRY_FINALLY);
    902  1.1  mrg       else
    903  1.1  mrg 	bind_body = inner_data_region;
    904  1.1  mrg       body = gimple_build_bind (inner_bind_vars, bind_body, make_node (BLOCK));
    905  1.1  mrg     }
    906  1.1  mrg 
    907  1.1  mrg   return body;
    908  1.1  mrg }
    909  1.1  mrg 
    910  1.1  mrg static void
    911  1.1  mrg add_wait (location_t loc, gimple_seq *region_body)
    912  1.1  mrg {
    913  1.1  mrg   /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0).  */
    914  1.1  mrg   tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
    915  1.1  mrg   tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
    916  1.1  mrg   gimple *wait_call = gimple_build_call (wait_fn, 2,
    917  1.1  mrg 					 sync_arg, integer_zero_node);
    918  1.1  mrg   gimple_set_location (wait_call, loc);
    919  1.1  mrg   gimple_seq_add_stmt (region_body, wait_call);
    920  1.1  mrg }
    921  1.1  mrg 
    922  1.1  mrg /* Helper function of decompose_kernels_region_body.  The statements in
    923  1.1  mrg    REGION_BODY are expected to be decomposed parts; add an 'async' clause to
    924  1.1  mrg    each.  Also add a 'wait' directive at the end of the sequence.  */
    925  1.1  mrg 
    926  1.1  mrg static void
    927  1.1  mrg add_async_clauses_and_wait (location_t loc, gimple_seq *region_body)
    928  1.1  mrg {
    929  1.1  mrg   tree default_async_queue
    930  1.1  mrg     = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL);
    931  1.1  mrg   for (gimple_stmt_iterator gsi = gsi_start (*region_body);
    932  1.1  mrg        !gsi_end_p (gsi);
    933  1.1  mrg        gsi_next (&gsi))
    934  1.1  mrg     {
    935  1.1  mrg       gimple *stmt = gsi_stmt (gsi);
    936  1.1  mrg       tree target_clauses = gimple_omp_target_clauses (stmt);
    937  1.1  mrg       tree new_async_clause = build_omp_clause (loc, OMP_CLAUSE_ASYNC);
    938  1.1  mrg       OMP_CLAUSE_OPERAND (new_async_clause, 0) = default_async_queue;
    939  1.1  mrg       OMP_CLAUSE_CHAIN (new_async_clause) = target_clauses;
    940  1.1  mrg       target_clauses = new_async_clause;
    941  1.1  mrg       gimple_omp_target_set_clauses (as_a <gomp_target *> (stmt),
    942  1.1  mrg 				     target_clauses);
    943  1.1  mrg     }
    944  1.1  mrg   add_wait (loc, region_body);
    945  1.1  mrg }
    946  1.1  mrg 
    947  1.1  mrg /* Auxiliary analysis of the body of a kernels region, to determine for each
    948  1.1  mrg    OpenACC loop whether it is control-dependent (i.e., not necessarily
    949  1.1  mrg    executed every time the kernels region is entered) or not.
    950  1.1  mrg    We say that a loop is control-dependent if there is some cond, switch, or
    951  1.1  mrg    goto statement that jumps over it, forwards or backwards.  For example,
    952  1.1  mrg    if the loop is controlled by an if statement, then a jump to the true
    953  1.1  mrg    block, the false block, or from one of those blocks to the control flow
    954  1.1  mrg    join point will necessarily jump over the loop.
    955  1.1  mrg    This analysis implements an ad-hoc union-find data structure classifying
    956  1.1  mrg    statements into "control-flow regions" as follows: Most statements are in
    957  1.1  mrg    the same region as their predecessor, except that each OpenACC loop is in
    958  1.1  mrg    a region of its own, and each OpenACC loop's successor starts a new
    959  1.1  mrg    region.  We then unite the regions of any statements linked by jumps,
    960  1.1  mrg    placing any cond, switch, or goto statement in the same region as its
    961  1.1  mrg    target label(s).
    962  1.1  mrg    In the end, control dependence of OpenACC loops can be determined by
    963  1.1  mrg    comparing their immediate predecessor and successor statements' regions.
    964  1.1  mrg    A jump crosses the loop if and only if the predecessor and successor are
    965  1.1  mrg    in the same region.  (If there is no predecessor or successor, the loop
    966  1.1  mrg    is executed unconditionally.)
    967  1.1  mrg    The methods in this class identify statements by their index in the
    968  1.1  mrg    kernels region's body.  */
    969  1.1  mrg 
    970  1.1  mrg class control_flow_regions
    971  1.1  mrg {
    972  1.1  mrg   public:
    973  1.1  mrg     /* Initialize an instance and pre-compute the control-flow region
    974  1.1  mrg        information for the statement sequence SEQ.  */
    975  1.1  mrg     control_flow_regions (gimple_seq seq);
    976  1.1  mrg 
    977  1.1  mrg     /* Return true if the statement with the given index IDX in the analyzed
    978  1.1  mrg        statement sequence is an unconditionally executed OpenACC loop.  */
    979  1.1  mrg     bool is_unconditional_oacc_for_loop (size_t idx);
    980  1.1  mrg 
    981  1.1  mrg   private:
    982  1.1  mrg     /* Find the region representative for the statement identified by index
    983  1.1  mrg        STMT_IDX.  */
    984  1.1  mrg     size_t find_rep (size_t stmt_idx);
    985  1.1  mrg 
    986  1.1  mrg     /* Union the regions containing the statements represented by
    987  1.1  mrg        representatives A and B.  */
    988  1.1  mrg     void union_reps (size_t a, size_t b);
    989  1.1  mrg 
    990  1.1  mrg     /* Helper for the constructor.  Performs the actual computation of the
    991  1.1  mrg        control-flow regions in the statement sequence SEQ.  */
    992  1.1  mrg     void compute_regions (gimple_seq seq);
    993  1.1  mrg 
    994  1.1  mrg     /* The mapping from statement indices to region representatives.  */
    995  1.1  mrg     vec <size_t> representatives;
    996  1.1  mrg 
    997  1.1  mrg     /* A cache mapping statement indices to a flag indicating whether the
    998  1.1  mrg        statement is a top level OpenACC for loop.  */
    999  1.1  mrg     vec <bool> omp_for_loops;
   1000  1.1  mrg };
   1001  1.1  mrg 
   1002  1.1  mrg control_flow_regions::control_flow_regions (gimple_seq seq)
   1003  1.1  mrg {
   1004  1.1  mrg   representatives.create (1);
   1005  1.1  mrg   omp_for_loops.create (1);
   1006  1.1  mrg   compute_regions (seq);
   1007  1.1  mrg }
   1008  1.1  mrg 
   1009  1.1  mrg bool
   1010  1.1  mrg control_flow_regions::is_unconditional_oacc_for_loop (size_t idx)
   1011  1.1  mrg {
   1012  1.1  mrg   if (idx == 0 || idx == representatives.length () - 1)
   1013  1.1  mrg     /* The first or last statement in the kernels region.  This means that
   1014  1.1  mrg        there is no room before or after it for a jump or a label.  Thus
   1015  1.1  mrg        there cannot be a jump across it, so it is unconditional.  */
   1016  1.1  mrg     return true;
   1017  1.1  mrg   /* Otherwise, the loop is unconditional if the statements before and after
   1018  1.1  mrg      it are in different control flow regions.  Scan forward and backward,
   1019  1.1  mrg      skipping over neighboring OpenACC for loops, to find these preceding
   1020  1.1  mrg      statements.  */
   1021  1.1  mrg   size_t prev_index = idx - 1;
   1022  1.1  mrg   while (prev_index > 0 && omp_for_loops [prev_index] == true)
   1023  1.1  mrg     prev_index--;
   1024  1.1  mrg   /* If all preceding statements are also OpenACC loops, all of these are
   1025  1.1  mrg      unconditional.  */
   1026  1.1  mrg   if (prev_index == 0)
   1027  1.1  mrg     return true;
   1028  1.1  mrg   size_t succ_index = idx + 1;
   1029  1.1  mrg   while (succ_index < omp_for_loops.length ()
   1030  1.1  mrg 	 && omp_for_loops [succ_index] == true)
   1031  1.1  mrg     succ_index++;
   1032  1.1  mrg   /* If all following statements are also OpenACC loops, all of these are
   1033  1.1  mrg      unconditional.  */
   1034  1.1  mrg   if (succ_index == omp_for_loops.length ())
   1035  1.1  mrg     return true;
   1036  1.1  mrg   return (find_rep (prev_index) != find_rep (succ_index));
   1037  1.1  mrg }
   1038  1.1  mrg 
   1039  1.1  mrg size_t
   1040  1.1  mrg control_flow_regions::find_rep (size_t stmt_idx)
   1041  1.1  mrg {
   1042  1.1  mrg   size_t rep = stmt_idx, aux = stmt_idx;
   1043  1.1  mrg   /* Find the root representative of this statement.  */
   1044  1.1  mrg   while (representatives[rep] != rep)
   1045  1.1  mrg     rep = representatives[rep];
   1046  1.1  mrg   /* Compress the path from the original statement to the representative.  */
   1047  1.1  mrg   while (representatives[aux] != rep)
   1048  1.1  mrg     {
   1049  1.1  mrg       size_t tmp = representatives[aux];
   1050  1.1  mrg       representatives[aux] = rep;
   1051  1.1  mrg       aux = tmp;
   1052  1.1  mrg     }
   1053  1.1  mrg   return rep;
   1054  1.1  mrg }
   1055  1.1  mrg 
   1056  1.1  mrg void
   1057  1.1  mrg control_flow_regions::union_reps (size_t a, size_t b)
   1058  1.1  mrg {
   1059  1.1  mrg   a = find_rep (a);
   1060  1.1  mrg   b = find_rep (b);
   1061  1.1  mrg   representatives[b] = a;
   1062  1.1  mrg }
   1063  1.1  mrg 
   1064  1.1  mrg void
   1065  1.1  mrg control_flow_regions::compute_regions (gimple_seq seq)
   1066  1.1  mrg {
   1067  1.1  mrg   hash_map <gimple *, size_t> control_flow_reps;
   1068  1.1  mrg   hash_map <tree, size_t> label_reps;
   1069  1.1  mrg   size_t current_region = 0, idx = 0;
   1070  1.1  mrg 
   1071  1.1  mrg   /* In a first pass, assign an initial region to each statement.  Except in
   1072  1.1  mrg      the case of OpenACC loops, each statement simply gets the same region
   1073  1.1  mrg      representative as its predecessor.  */
   1074  1.1  mrg   for (gimple_stmt_iterator gsi = gsi_start (seq);
   1075  1.1  mrg        !gsi_end_p (gsi);
   1076  1.1  mrg        gsi_next (&gsi))
   1077  1.1  mrg     {
   1078  1.1  mrg       gimple *stmt = gsi_stmt (gsi);
   1079  1.1  mrg       gimple *omp_for = top_level_omp_for_in_stmt (stmt);
   1080  1.1  mrg       omp_for_loops.safe_push (omp_for != NULL);
   1081  1.1  mrg       if (omp_for != NULL)
   1082  1.1  mrg 	{
   1083  1.1  mrg 	  /* Assign a new region to this loop and to its successor.  */
   1084  1.1  mrg 	  current_region = idx;
   1085  1.1  mrg 	  representatives.safe_push (current_region);
   1086  1.1  mrg 	  current_region++;
   1087  1.1  mrg 	}
   1088  1.1  mrg       else
   1089  1.1  mrg 	{
   1090  1.1  mrg 	  representatives.safe_push (current_region);
   1091  1.1  mrg 	  /* Remember any jumps and labels for the second pass below.  */
   1092  1.1  mrg 	  if (gimple_code (stmt) == GIMPLE_COND
   1093  1.1  mrg 	      || gimple_code (stmt) == GIMPLE_SWITCH
   1094  1.1  mrg 	      || gimple_code (stmt) == GIMPLE_GOTO)
   1095  1.1  mrg 	    control_flow_reps.put (stmt, current_region);
   1096  1.1  mrg 	  else if (gimple_code (stmt) == GIMPLE_LABEL)
   1097  1.1  mrg 	    label_reps.put (gimple_label_label (as_a <glabel *> (stmt)),
   1098  1.1  mrg 			    current_region);
   1099  1.1  mrg 	}
   1100  1.1  mrg       idx++;
   1101  1.1  mrg     }
   1102  1.1  mrg   gcc_assert (representatives.length () == omp_for_loops.length ());
   1103  1.1  mrg 
   1104  1.1  mrg   /* Revisit all the control flow statements and union the region of each
   1105  1.1  mrg      cond, switch, or goto statement with the target labels' regions.  */
   1106  1.1  mrg   for (hash_map <gimple *, size_t>::iterator it = control_flow_reps.begin ();
   1107  1.1  mrg        it != control_flow_reps.end ();
   1108  1.1  mrg        ++it)
   1109  1.1  mrg     {
   1110  1.1  mrg       gimple *stmt = (*it).first;
   1111  1.1  mrg       size_t stmt_rep = (*it).second;
   1112  1.1  mrg       switch (gimple_code (stmt))
   1113  1.1  mrg 	{
   1114  1.1  mrg 	  tree label;
   1115  1.1  mrg 	  unsigned int n;
   1116  1.1  mrg 
   1117  1.1  mrg 	case GIMPLE_COND:
   1118  1.1  mrg 	  label = gimple_cond_true_label (as_a <gcond *> (stmt));
   1119  1.1  mrg 	  union_reps (stmt_rep, *label_reps.get (label));
   1120  1.1  mrg 	  label = gimple_cond_false_label (as_a <gcond *> (stmt));
   1121  1.1  mrg 	  union_reps (stmt_rep, *label_reps.get (label));
   1122  1.1  mrg 	  break;
   1123  1.1  mrg 
   1124  1.1  mrg 	case GIMPLE_SWITCH:
   1125  1.1  mrg 	  n = gimple_switch_num_labels (as_a <gswitch *> (stmt));
   1126  1.1  mrg 	  for (unsigned int i = 0; i < n; i++)
   1127  1.1  mrg 	    {
   1128  1.1  mrg 	      tree switch_case
   1129  1.1  mrg 		= gimple_switch_label (as_a <gswitch *> (stmt), i);
   1130  1.1  mrg 	      label = CASE_LABEL (switch_case);
   1131  1.1  mrg 	      union_reps (stmt_rep, *label_reps.get (label));
   1132  1.1  mrg 	    }
   1133  1.1  mrg 	  break;
   1134  1.1  mrg 
   1135  1.1  mrg 	case GIMPLE_GOTO:
   1136  1.1  mrg 	  label = gimple_goto_dest (stmt);
   1137  1.1  mrg 	  union_reps (stmt_rep, *label_reps.get (label));
   1138  1.1  mrg 	  break;
   1139  1.1  mrg 
   1140  1.1  mrg 	default:
   1141  1.1  mrg 	  gcc_unreachable ();
   1142  1.1  mrg 	}
   1143  1.1  mrg     }
   1144  1.1  mrg }
   1145  1.1  mrg 
   1146  1.1  mrg /* Decompose the body of the KERNELS_REGION, which was originally annotated
   1147  1.1  mrg    with the KERNELS_CLAUSES, into a series of compute constructs.  */
   1148  1.1  mrg 
   1149  1.1  mrg static gimple *
   1150  1.1  mrg decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses)
   1151  1.1  mrg {
   1152  1.1  mrg   location_t loc = gimple_location (kernels_region);
   1153  1.1  mrg 
   1154  1.1  mrg   /* The kernels clauses will be propagated to the child clauses unmodified,
   1155  1.1  mrg      except that the 'num_gangs', 'num_workers', and 'vector_length' clauses
   1156  1.1  mrg      will only be added to loop regions.  The other regions are "gang-single"
   1157  1.1  mrg      and get an explicit 'num_gangs (1)' clause.  So separate out the
   1158  1.1  mrg      'num_gangs', 'num_workers', and 'vector_length' clauses here.
   1159  1.1  mrg      Also check for the presence of an 'async' clause but do not remove it from
   1160  1.1  mrg      the 'kernels' clauses.  */
   1161  1.1  mrg   tree num_gangs_clause = NULL, num_workers_clause = NULL,
   1162  1.1  mrg        vector_length_clause = NULL;
   1163  1.1  mrg   tree async_clause = NULL;
   1164  1.1  mrg   tree prev_clause = NULL, next_clause = NULL;
   1165  1.1  mrg   tree parallel_clauses = kernels_clauses;
   1166  1.1  mrg   for (tree c = parallel_clauses; c; c = next_clause)
   1167  1.1  mrg     {
   1168  1.1  mrg       /* Preserve this here, as we might NULL it later.  */
   1169  1.1  mrg       next_clause = OMP_CLAUSE_CHAIN (c);
   1170  1.1  mrg 
   1171  1.1  mrg       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS
   1172  1.1  mrg 	  || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS
   1173  1.1  mrg 	  || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH)
   1174  1.1  mrg 	{
   1175  1.1  mrg 	  /* Cut this clause out of the chain.  */
   1176  1.1  mrg 	  if (prev_clause != NULL)
   1177  1.1  mrg 	    OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (c);
   1178  1.1  mrg 	  else
   1179  1.1  mrg 	    kernels_clauses = OMP_CLAUSE_CHAIN (c);
   1180  1.1  mrg 	  OMP_CLAUSE_CHAIN (c) = NULL;
   1181  1.1  mrg 	  switch (OMP_CLAUSE_CODE (c))
   1182  1.1  mrg 	    {
   1183  1.1  mrg 	    case OMP_CLAUSE_NUM_GANGS:
   1184  1.1  mrg 	      num_gangs_clause = c;
   1185  1.1  mrg 	      break;
   1186  1.1  mrg 	    case OMP_CLAUSE_NUM_WORKERS:
   1187  1.1  mrg 	      num_workers_clause = c;
   1188  1.1  mrg 	      break;
   1189  1.1  mrg 	    case OMP_CLAUSE_VECTOR_LENGTH:
   1190  1.1  mrg 	      vector_length_clause = c;
   1191  1.1  mrg 	      break;
   1192  1.1  mrg 	    default:
   1193  1.1  mrg 	      gcc_unreachable ();
   1194  1.1  mrg 	    }
   1195  1.1  mrg 	}
   1196  1.1  mrg       else
   1197  1.1  mrg 	prev_clause = c;
   1198  1.1  mrg       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
   1199  1.1  mrg 	async_clause = c;
   1200  1.1  mrg     }
   1201  1.1  mrg 
   1202  1.1  mrg   gimple *kernels_body = gimple_omp_body (kernels_region);
   1203  1.1  mrg   gbind *kernels_bind = as_a <gbind *> (kernels_body);
   1204  1.1  mrg 
   1205  1.1  mrg   /* The body of the region may contain other nested binds declaring inner
   1206  1.1  mrg      local variables.  Collapse all these binds into one to ensure that we
   1207  1.1  mrg      have a single sequence of statements to iterate over; also, collect all
   1208  1.1  mrg      inner variables.  */
   1209  1.1  mrg   tree inner_bind_vars = flatten_binds (kernels_bind);
   1210  1.1  mrg   gimple_seq body_sequence = gimple_bind_body (kernels_bind);
   1211  1.1  mrg 
   1212  1.1  mrg   /* All these inner variables will get allocated on the device (below, by
   1213  1.1  mrg      calling maybe_build_inner_data_region).  Here we create 'present'
   1214  1.1  mrg      clauses for them and add these clauses to the list of clauses to be
   1215  1.1  mrg      attached to each inner compute construct.  */
   1216  1.1  mrg   tree present_clauses = kernels_clauses;
   1217  1.1  mrg   for (tree var = inner_bind_vars; var; var = TREE_CHAIN (var))
   1218  1.1  mrg     {
   1219  1.1  mrg       if (!DECL_ARTIFICIAL (var) && TREE_CODE (var) != CONST_DECL)
   1220  1.1  mrg 	{
   1221  1.1  mrg 	  tree present_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
   1222  1.1  mrg 	  OMP_CLAUSE_SET_MAP_KIND (present_clause, GOMP_MAP_FORCE_PRESENT);
   1223  1.1  mrg 	  OMP_CLAUSE_DECL (present_clause) = var;
   1224  1.1  mrg 	  OMP_CLAUSE_SIZE (present_clause) = DECL_SIZE_UNIT (var);
   1225  1.1  mrg 	  OMP_CLAUSE_CHAIN (present_clause) = present_clauses;
   1226  1.1  mrg 	  present_clauses = present_clause;
   1227  1.1  mrg 	}
   1228  1.1  mrg     }
   1229  1.1  mrg   kernels_clauses = present_clauses;
   1230  1.1  mrg 
   1231  1.1  mrg   /* In addition to nested binds, the "real" body of the region may be
   1232  1.1  mrg      nested inside a try-finally block.  Find its cleanup block, which
   1233  1.1  mrg      contains code to clobber the local variables that must be clobbered.  */
   1234  1.1  mrg   gimple *inner_cleanup = NULL;
   1235  1.1  mrg   if (body_sequence != NULL && gimple_code (body_sequence) == GIMPLE_TRY)
   1236  1.1  mrg     {
   1237  1.1  mrg       if (gimple_seq_singleton_p (body_sequence))
   1238  1.1  mrg 	{
   1239  1.1  mrg 	  /* The try statement is the only thing inside the bind.  */
   1240  1.1  mrg 	  inner_cleanup = gimple_try_cleanup (body_sequence);
   1241  1.1  mrg 	  body_sequence = gimple_try_eval (body_sequence);
   1242  1.1  mrg 	}
   1243  1.1  mrg       else
   1244  1.1  mrg 	{
   1245  1.1  mrg 	  /* The bind's body starts with a try statement, but it is followed
   1246  1.1  mrg 	     by other things.  */
   1247  1.1  mrg 	  gimple_stmt_iterator gsi = gsi_start (body_sequence);
   1248  1.1  mrg 	  gimple *try_stmt = gsi_stmt (gsi);
   1249  1.1  mrg 	  inner_cleanup = gimple_try_cleanup (try_stmt);
   1250  1.1  mrg 	  gimple *try_body = gimple_try_eval (try_stmt);
   1251  1.1  mrg 
   1252  1.1  mrg 	  gsi_remove (&gsi, false);
   1253  1.1  mrg 	  /* Now gsi indicates the sequence of statements after the try
   1254  1.1  mrg 	     statement in the bind.  Append the statement in the try body and
   1255  1.1  mrg 	     the trailing statements from gsi.  */
   1256  1.1  mrg 	  gsi_insert_seq_before (&gsi, try_body, GSI_CONTINUE_LINKING);
   1257  1.1  mrg 	  body_sequence = gsi_stmt (gsi);
   1258  1.1  mrg 	}
   1259  1.1  mrg     }
   1260  1.1  mrg 
   1261  1.1  mrg   /* This sequence will collect all the top-level statements in the body of
   1262  1.1  mrg      the data region we are about to construct.  */
   1263  1.1  mrg   gimple_seq region_body = NULL;
   1264  1.1  mrg   /* This sequence will collect consecutive statements to be put into a
   1265  1.1  mrg      gang-single region.  */
   1266  1.1  mrg   gimple_seq gang_single_seq = NULL;
   1267  1.1  mrg   /* Flag recording whether the gang_single_seq only contains copies to
   1268  1.1  mrg      local variables.  These may be loop setup code that should not be
   1269  1.1  mrg      separated from the loop.  */
   1270  1.1  mrg   bool only_simple_assignments = true;
   1271  1.1  mrg 
   1272  1.1  mrg   /* Precompute the control flow region information to determine whether an
   1273  1.1  mrg      OpenACC loop is executed conditionally or unconditionally.  */
   1274  1.1  mrg   control_flow_regions cf_regions (body_sequence);
   1275  1.1  mrg 
   1276  1.1  mrg   /* Iterate over the statements in the kernels region's body.  */
   1277  1.1  mrg   size_t idx = 0;
   1278  1.1  mrg   gimple_stmt_iterator gsi, gsi_n;
   1279  1.1  mrg   for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n, idx++)
   1280  1.1  mrg     {
   1281  1.1  mrg       /* Advance the iterator here because otherwise it would be invalidated
   1282  1.1  mrg 	 by moving statements below.  */
   1283  1.1  mrg       gsi_n = gsi;
   1284  1.1  mrg       gsi_next (&gsi_n);
   1285  1.1  mrg 
   1286  1.1  mrg       gimple *stmt = gsi_stmt (gsi);
   1287  1.1  mrg       if (gimple_code (stmt) == GIMPLE_DEBUG)
   1288  1.1  mrg 	{
   1289  1.1  mrg 	  if (flag_compare_debug_opt || flag_compare_debug)
   1290  1.1  mrg 	    /* Let the usual '-fcompare-debug' analysis bail out, as
   1291  1.1  mrg 	       necessary.  */
   1292  1.1  mrg 	    ;
   1293  1.1  mrg 	  else
   1294  1.1  mrg 	    sorry_at (loc, "%qs not yet supported",
   1295  1.1  mrg 		      gimple_code_name[gimple_code (stmt)]);
   1296  1.1  mrg 	}
   1297  1.1  mrg       gimple *omp_for = top_level_omp_for_in_stmt (stmt);
   1298  1.1  mrg       bool is_unconditional_oacc_for_loop = false;
   1299  1.1  mrg       if (omp_for != NULL)
   1300  1.1  mrg 	is_unconditional_oacc_for_loop
   1301  1.1  mrg 	  = cf_regions.is_unconditional_oacc_for_loop (idx);
   1302  1.1  mrg       if (omp_for != NULL
   1303  1.1  mrg 	  && is_unconditional_oacc_for_loop)
   1304  1.1  mrg 	{
   1305  1.1  mrg 	  /* This is an OMP for statement, put it into a separate region.
   1306  1.1  mrg 	     But first, construct a gang-single region containing any
   1307  1.1  mrg 	     complex sequential statements we may have seen.  */
   1308  1.1  mrg 	  if (gang_single_seq != NULL && !only_simple_assignments)
   1309  1.1  mrg 	    {
   1310  1.1  mrg 	      gimple *single_region
   1311  1.1  mrg 		= make_region_seq (loc, gang_single_seq,
   1312  1.1  mrg 				   num_gangs_clause,
   1313  1.1  mrg 				   num_workers_clause,
   1314  1.1  mrg 				   vector_length_clause,
   1315  1.1  mrg 				   kernels_clauses);
   1316  1.1  mrg 	      gimple_seq_add_stmt (&region_body, single_region);
   1317  1.1  mrg 	    }
   1318  1.1  mrg 	  else if (gang_single_seq != NULL && only_simple_assignments)
   1319  1.1  mrg 	    {
   1320  1.1  mrg 	      /* There is a sequence of sequential statements preceding this
   1321  1.1  mrg 		 loop, but they are all simple assignments.  This is
   1322  1.1  mrg 		 probably setup code for the loop; in particular, Fortran DO
   1323  1.1  mrg 		 loops are preceded by code to copy the loop limit variable
   1324  1.1  mrg 		 to a temporary.  Group this code together with the loop
   1325  1.1  mrg 		 itself.  */
   1326  1.1  mrg 	      gimple_seq_add_stmt (&gang_single_seq, stmt);
   1327  1.1  mrg 	      stmt = gimple_build_bind (NULL, gang_single_seq,
   1328  1.1  mrg 					make_node (BLOCK));
   1329  1.1  mrg 	    }
   1330  1.1  mrg 	  gang_single_seq = NULL;
   1331  1.1  mrg 	  only_simple_assignments = true;
   1332  1.1  mrg 
   1333  1.1  mrg 	  gimple_seq parallel_seq = NULL;
   1334  1.1  mrg 	  gimple_seq_add_stmt (&parallel_seq, stmt);
   1335  1.1  mrg 	  gimple *parallel_region
   1336  1.1  mrg 	    = make_region_loop_nest (omp_for, parallel_seq,
   1337  1.1  mrg 				     num_gangs_clause,
   1338  1.1  mrg 				     num_workers_clause,
   1339  1.1  mrg 				     vector_length_clause,
   1340  1.1  mrg 				     kernels_clauses);
   1341  1.1  mrg 	  gimple_seq_add_stmt (&region_body, parallel_region);
   1342  1.1  mrg 	}
   1343  1.1  mrg       else
   1344  1.1  mrg 	{
   1345  1.1  mrg 	  if (omp_for != NULL)
   1346  1.1  mrg 	    {
   1347  1.1  mrg 	      gcc_checking_assert (!is_unconditional_oacc_for_loop);
   1348  1.1  mrg 	      if (dump_enabled_p ())
   1349  1.1  mrg 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, omp_for,
   1350  1.1  mrg 				 "unparallelized loop nest"
   1351  1.1  mrg 				 " in OpenACC %<kernels%> region:"
   1352  1.1  mrg 				 " it's executed conditionally\n");
   1353  1.1  mrg 	    }
   1354  1.1  mrg 
   1355  1.1  mrg 	  /* This is not an unconditional OMP for statement, so it will be
   1356  1.1  mrg 	     put into a gang-single region.  */
   1357  1.1  mrg 	  gimple_seq_add_stmt (&gang_single_seq, stmt);
   1358  1.1  mrg 	  /* Is this a simple assignment? We call it simple if it is an
   1359  1.1  mrg 	     assignment to an artificial local variable.  This captures
   1360  1.1  mrg 	     Fortran loop setup code computing loop bounds and offsets.  */
   1361  1.1  mrg 	  bool is_simple_assignment
   1362  1.1  mrg 	    = (gimple_code (stmt) == GIMPLE_ASSIGN
   1363  1.1  mrg 	       && TREE_CODE (gimple_assign_lhs (stmt)) == VAR_DECL
   1364  1.1  mrg 	       && DECL_ARTIFICIAL (gimple_assign_lhs (stmt)));
   1365  1.1  mrg 	  if (!is_simple_assignment)
   1366  1.1  mrg 	    only_simple_assignments = false;
   1367  1.1  mrg 	}
   1368  1.1  mrg     }
   1369  1.1  mrg 
   1370  1.1  mrg   /* If we did not emit a new region, and are not going to emit one now
   1371  1.1  mrg      (that is, the original region was empty), prepare to emit a dummy so as
   1372  1.1  mrg      to preserve the original construct, which other processing (at least
   1373  1.1  mrg      test cases) depend on.  */
   1374  1.1  mrg   if (region_body == NULL && gang_single_seq == NULL)
   1375  1.1  mrg     {
   1376  1.1  mrg       gimple *stmt = gimple_build_nop ();
   1377  1.1  mrg       gimple_set_location (stmt, loc);
   1378  1.1  mrg       gimple_seq_add_stmt (&gang_single_seq, stmt);
   1379  1.1  mrg     }
   1380  1.1  mrg 
   1381  1.1  mrg   /* Gather up any remaining gang-single statements.  */
   1382  1.1  mrg   if (gang_single_seq != NULL)
   1383  1.1  mrg     {
   1384  1.1  mrg       gimple *single_region
   1385  1.1  mrg 	= make_region_seq (loc, gang_single_seq,
   1386  1.1  mrg 			   num_gangs_clause,
   1387  1.1  mrg 			   num_workers_clause,
   1388  1.1  mrg 			   vector_length_clause,
   1389  1.1  mrg 			   kernels_clauses);
   1390  1.1  mrg       gimple_seq_add_stmt (&region_body, single_region);
   1391  1.1  mrg     }
   1392  1.1  mrg 
   1393  1.1  mrg   /* We want to launch these kernels asynchronously.  If the original
   1394  1.1  mrg      kernels region had an async clause, this is done automatically because
   1395  1.1  mrg      that async clause was copied to the individual regions we created.
   1396  1.1  mrg      Otherwise, add an async clause to each newly created region, as well as
   1397  1.1  mrg      a wait directive at the end.  */
   1398  1.1  mrg   if (async_clause == NULL)
   1399  1.1  mrg     add_async_clauses_and_wait (loc, &region_body);
   1400  1.1  mrg   else
   1401  1.1  mrg     /* !!! If we have asynchronous parallel blocks inside a (synchronous) data
   1402  1.1  mrg        region, then target memory will get unmapped at the point the data
   1403  1.1  mrg        region ends, even if the inner asynchronous parallels have not yet
   1404  1.1  mrg        completed.  For kernels marked "async", we might want to use "enter data
   1405  1.1  mrg        async(...)" and "exit data async(...)" instead, or asynchronous data
   1406  1.1  mrg        regions (see also <https://gcc.gnu.org/PR97390>
   1407  1.1  mrg        "[OpenACC] 'async' clause on 'data' construct",
   1408  1.1  mrg        which is to share the same implementation).
   1409  1.1  mrg        For now, insert a (synchronous) wait at the end of the block.  */
   1410  1.1  mrg     add_wait (loc, &region_body);
   1411  1.1  mrg 
   1412  1.1  mrg   tree kernels_locals = gimple_bind_vars (as_a <gbind *> (kernels_body));
   1413  1.1  mrg   gimple *body = gimple_build_bind (kernels_locals, region_body,
   1414  1.1  mrg 				    make_node (BLOCK));
   1415  1.1  mrg 
   1416  1.1  mrg   /* If we found variables declared in nested scopes, build a data region to
   1417  1.1  mrg      map them to the device.  */
   1418  1.1  mrg   body = maybe_build_inner_data_region (loc, body, inner_bind_vars,
   1419  1.1  mrg 					inner_cleanup);
   1420  1.1  mrg 
   1421  1.1  mrg   return body;
   1422  1.1  mrg }
   1423  1.1  mrg 
   1424  1.1  mrg /* Decompose one OpenACC 'kernels' construct into an OpenACC 'data' construct
   1425  1.1  mrg    containing the original OpenACC 'kernels' construct's region cut up into a
   1426  1.1  mrg    sequence of compute constructs.  */
   1427  1.1  mrg 
   1428  1.1  mrg static gimple *
   1429  1.1  mrg omp_oacc_kernels_decompose_1 (gimple *kernels_stmt)
   1430  1.1  mrg {
   1431  1.1  mrg   gcc_checking_assert (gimple_omp_target_kind (kernels_stmt)
   1432  1.1  mrg 		       == GF_OMP_TARGET_KIND_OACC_KERNELS);
   1433  1.1  mrg   location_t loc = gimple_location (kernels_stmt);
   1434  1.1  mrg 
   1435  1.1  mrg   /* Collect the data clauses of the OpenACC 'kernels' directive and create a
   1436  1.1  mrg      new OpenACC 'data' construct with those clauses.  */
   1437  1.1  mrg   tree kernels_clauses = gimple_omp_target_clauses (kernels_stmt);
   1438  1.1  mrg   tree data_clauses = NULL;
   1439  1.1  mrg   for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c))
   1440  1.1  mrg     {
   1441  1.1  mrg       /* Certain clauses are copied to the enclosing OpenACC 'data'.  Other
   1442  1.1  mrg 	 clauses remain on the OpenACC 'kernels'.  */
   1443  1.1  mrg       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
   1444  1.1  mrg 	{
   1445  1.1  mrg 	  tree decl = OMP_CLAUSE_DECL (c);
   1446  1.1  mrg 	  HOST_WIDE_INT map_kind = OMP_CLAUSE_MAP_KIND (c);
   1447  1.1  mrg 	  switch (map_kind)
   1448  1.1  mrg 	    {
   1449  1.1  mrg 	    default:
   1450  1.1  mrg 	      if (map_kind == GOMP_MAP_ALLOC
   1451  1.1  mrg 		  && integer_zerop (OMP_CLAUSE_SIZE (c)))
   1452  1.1  mrg 		/* ??? This is an alloc clause for mapping a pointer whose
   1453  1.1  mrg 		   target is already mapped.  We leave these on the inner
   1454  1.1  mrg 		   compute constructs because moving them to the outer data
   1455  1.1  mrg 		   region causes runtime errors.  */
   1456  1.1  mrg 		break;
   1457  1.1  mrg 
   1458  1.1  mrg 	      /* For non-artificial variables, and for non-declaration
   1459  1.1  mrg 		 expressions like A[0:n], copy the clause to the data
   1460  1.1  mrg 		 region.  */
   1461  1.1  mrg 	      if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl))
   1462  1.1  mrg 		  || !DECL_P (decl))
   1463  1.1  mrg 		{
   1464  1.1  mrg 		  tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c),
   1465  1.1  mrg 						      OMP_CLAUSE_MAP);
   1466  1.1  mrg 		  OMP_CLAUSE_SET_MAP_KIND (new_clause, map_kind);
   1467  1.1  mrg 		  /* This must be unshared here to avoid "incorrect sharing
   1468  1.1  mrg 		     of tree nodes" errors from verify_gimple.  */
   1469  1.1  mrg 		  OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl);
   1470  1.1  mrg 		  OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c);
   1471  1.1  mrg 		  OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
   1472  1.1  mrg 		  data_clauses = new_clause;
   1473  1.1  mrg 
   1474  1.1  mrg 		  /* Now that this data is mapped, turn the data clause on the
   1475  1.1  mrg 		     inner OpenACC 'kernels' into a 'present' clause.  */
   1476  1.1  mrg 		  OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT);
   1477  1.1  mrg 
   1478  1.1  mrg 		  /* See <https://gcc.gnu.org/PR100280>,
   1479  1.1  mrg 		     <https://gcc.gnu.org/PR104086>.  */
   1480  1.1  mrg 		  if (DECL_P (decl)
   1481  1.1  mrg 		      && !TREE_ADDRESSABLE (decl))
   1482  1.1  mrg 		    {
   1483  1.1  mrg 		      /* Request that OMP lowering make 'decl' addressable.  */
   1484  1.1  mrg 		      OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
   1485  1.1  mrg 
   1486  1.1  mrg 		      if (dump_enabled_p ())
   1487  1.1  mrg 			{
   1488  1.1  mrg 			  location_t loc = OMP_CLAUSE_LOCATION (new_clause);
   1489  1.1  mrg 			  const dump_user_location_t d_u_loc
   1490  1.1  mrg 			    = dump_user_location_t::from_location_t (loc);
   1491  1.1  mrg 			  /* PR100695 "Format decoder, quoting in 'dump_printf'
   1492  1.1  mrg 			     etc." */
   1493  1.1  mrg #if __GNUC__ >= 10
   1494  1.1  mrg # pragma GCC diagnostic push
   1495  1.1  mrg # pragma GCC diagnostic ignored "-Wformat"
   1496  1.1  mrg #endif
   1497  1.1  mrg 			  dump_printf_loc
   1498  1.1  mrg 			    (MSG_NOTE, d_u_loc,
   1499  1.1  mrg 			     "OpenACC %<kernels%> decomposition:"
   1500  1.1  mrg 			     " variable %<%T%> in %qs clause"
   1501  1.1  mrg 			     " requested to be made addressable\n",
   1502  1.1  mrg 			     decl,
   1503  1.1  mrg 			     user_omp_clause_code_name (new_clause, true));
   1504  1.1  mrg #if __GNUC__ >= 10
   1505  1.1  mrg # pragma GCC diagnostic pop
   1506  1.1  mrg #endif
   1507  1.1  mrg 			}
   1508  1.1  mrg 		    }
   1509  1.1  mrg 		}
   1510  1.1  mrg 	      break;
   1511  1.1  mrg 
   1512  1.1  mrg 	    case GOMP_MAP_POINTER:
   1513  1.1  mrg 	    case GOMP_MAP_TO_PSET:
   1514  1.1  mrg 	    case GOMP_MAP_FIRSTPRIVATE_POINTER:
   1515  1.1  mrg 	    case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
   1516  1.1  mrg 	      /* ??? Copying these map kinds leads to internal compiler
   1517  1.1  mrg 		 errors in later passes.  */
   1518  1.1  mrg 	      break;
   1519  1.1  mrg 	    }
   1520  1.1  mrg 	}
   1521  1.1  mrg       else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
   1522  1.1  mrg 	{
   1523  1.1  mrg 	  /* If there is an 'if' clause, it must be duplicated to the
   1524  1.1  mrg 	     enclosing data region.  Temporarily remove the if clause's
   1525  1.1  mrg 	     chain to avoid copying it.  */
   1526  1.1  mrg 	  tree saved_chain = OMP_CLAUSE_CHAIN (c);
   1527  1.1  mrg 	  OMP_CLAUSE_CHAIN (c) = NULL;
   1528  1.1  mrg 	  tree new_if_clause = unshare_expr (c);
   1529  1.1  mrg 	  OMP_CLAUSE_CHAIN (c) = saved_chain;
   1530  1.1  mrg 	  OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses;
   1531  1.1  mrg 	  data_clauses = new_if_clause;
   1532  1.1  mrg 	}
   1533  1.1  mrg     }
   1534  1.1  mrg   /* Restore the original order of the clauses.  */
   1535  1.1  mrg   data_clauses = nreverse (data_clauses);
   1536  1.1  mrg 
   1537  1.1  mrg   gimple *data_region
   1538  1.1  mrg     = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
   1539  1.1  mrg 			       data_clauses);
   1540  1.1  mrg   gimple_set_location (data_region, loc);
   1541  1.1  mrg 
   1542  1.1  mrg   /* Transform the body of the kernels region into a sequence of compute
   1543  1.1  mrg      constructs.  */
   1544  1.1  mrg   gimple *body = decompose_kernels_region_body (kernels_stmt,
   1545  1.1  mrg 						kernels_clauses);
   1546  1.1  mrg 
   1547  1.1  mrg   /* Put the transformed pieces together.  The entire body of the region is
   1548  1.1  mrg      wrapped in a try-finally statement that calls __builtin_GOACC_data_end
   1549  1.1  mrg      for cleanup.  */
   1550  1.1  mrg   gimple *try_stmt = make_data_region_try_statement (loc, body);
   1551  1.1  mrg   gimple_omp_set_body (data_region, try_stmt);
   1552  1.1  mrg 
   1553  1.1  mrg   return data_region;
   1554  1.1  mrg }
   1555  1.1  mrg 
   1556  1.1  mrg 
   1557  1.1  mrg /* Decompose OpenACC 'kernels' constructs in the current function.  */
   1558  1.1  mrg 
   1559  1.1  mrg static tree
   1560  1.1  mrg omp_oacc_kernels_decompose_callback_stmt (gimple_stmt_iterator *gsi_p,
   1561  1.1  mrg 					  bool *handled_ops_p,
   1562  1.1  mrg 					  struct walk_stmt_info *)
   1563  1.1  mrg {
   1564  1.1  mrg   gimple *stmt = gsi_stmt (*gsi_p);
   1565  1.1  mrg 
   1566  1.1  mrg   if ((gimple_code (stmt) == GIMPLE_OMP_TARGET)
   1567  1.1  mrg       && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
   1568  1.1  mrg     {
   1569  1.1  mrg       gimple *stmt_new = omp_oacc_kernels_decompose_1 (stmt);
   1570  1.1  mrg       gsi_replace (gsi_p, stmt_new, false);
   1571  1.1  mrg       *handled_ops_p = true;
   1572  1.1  mrg     }
   1573  1.1  mrg   else
   1574  1.1  mrg     *handled_ops_p = false;
   1575  1.1  mrg 
   1576  1.1  mrg   return NULL;
   1577  1.1  mrg }
   1578  1.1  mrg 
   1579  1.1  mrg static unsigned int
   1580  1.1  mrg omp_oacc_kernels_decompose (void)
   1581  1.1  mrg {
   1582  1.1  mrg   gimple_seq body = gimple_body (current_function_decl);
   1583  1.1  mrg 
   1584  1.1  mrg   struct walk_stmt_info wi;
   1585  1.1  mrg   memset (&wi, 0, sizeof (wi));
   1586  1.1  mrg   walk_gimple_seq_mod (&body, omp_oacc_kernels_decompose_callback_stmt, NULL,
   1587  1.1  mrg 		       &wi);
   1588  1.1  mrg 
   1589  1.1  mrg   gimple_set_body (current_function_decl, body);
   1590  1.1  mrg 
   1591  1.1  mrg   return 0;
   1592  1.1  mrg }
   1593  1.1  mrg 
   1594  1.1  mrg 
   1595  1.1  mrg namespace {
   1596  1.1  mrg 
   1597  1.1  mrg const pass_data pass_data_omp_oacc_kernels_decompose =
   1598  1.1  mrg {
   1599  1.1  mrg   GIMPLE_PASS, /* type */
   1600  1.1  mrg   "omp_oacc_kernels_decompose", /* name */
   1601  1.1  mrg   OPTGROUP_OMP, /* optinfo_flags */
   1602  1.1  mrg   TV_NONE, /* tv_id */
   1603  1.1  mrg   PROP_gimple_any, /* properties_required */
   1604  1.1  mrg   0, /* properties_provided */
   1605  1.1  mrg   0, /* properties_destroyed */
   1606  1.1  mrg   0, /* todo_flags_start */
   1607  1.1  mrg   0, /* todo_flags_finish */
   1608  1.1  mrg };
   1609  1.1  mrg 
   1610  1.1  mrg class pass_omp_oacc_kernels_decompose : public gimple_opt_pass
   1611  1.1  mrg {
   1612  1.1  mrg public:
   1613  1.1  mrg   pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
   1614  1.1  mrg     : gimple_opt_pass (pass_data_omp_oacc_kernels_decompose, ctxt)
   1615  1.1  mrg   {}
   1616  1.1  mrg 
   1617  1.1  mrg   /* opt_pass methods: */
   1618  1.1  mrg   virtual bool gate (function *)
   1619  1.1  mrg   {
   1620  1.1  mrg     return (flag_openacc
   1621  1.1  mrg 	    && param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE);
   1622  1.1  mrg   }
   1623  1.1  mrg   virtual unsigned int execute (function *)
   1624  1.1  mrg   {
   1625  1.1  mrg     return omp_oacc_kernels_decompose ();
   1626  1.1  mrg   }
   1627  1.1  mrg 
   1628  1.1  mrg }; // class pass_omp_oacc_kernels_decompose
   1629  1.1  mrg 
   1630  1.1  mrg } // anon namespace
   1631  1.1  mrg 
   1632  1.1  mrg gimple_opt_pass *
   1633  1.1  mrg make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
   1634  1.1  mrg {
   1635  1.1  mrg   return new pass_omp_oacc_kernels_decompose (ctxt);
   1636  1.1  mrg }
   1637