Home | History | Annotate | Line # | Download | only in gcc
omp-oacc-kernels-decompose.cc revision 1.1.1.1
      1 /* Decompose OpenACC 'kernels' constructs into parts, a sequence of compute
      2    constructs
      3 
      4    Copyright (C) 2020-2022 Free Software Foundation, Inc.
      5 
      6 This file is part of GCC.
      7 
      8 GCC is free software; you can redistribute it and/or modify it under
      9 the terms of the GNU General Public License as published by the Free
     10 Software Foundation; either version 3, or (at your option) any later
     11 version.
     12 
     13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
     15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     16 for more details.
     17 
     18 You should have received a copy of the GNU General Public License
     19 along with GCC; see the file COPYING3.  If not see
     20 <http://www.gnu.org/licenses/>.  */
     21 
     22 #include "config.h"
     23 #include "system.h"
     24 #include "coretypes.h"
     25 #include "backend.h"
     26 #include "target.h"
     27 #include "tree.h"
     28 #include "langhooks.h"
     29 #include "gimple.h"
     30 #include "tree-pass.h"
     31 #include "cgraph.h"
     32 #include "fold-const.h"
     33 #include "gimplify.h"
     34 #include "gimple-iterator.h"
     35 #include "gimple-walk.h"
     36 #include "gomp-constants.h"
     37 #include "omp-general.h"
     38 #include "diagnostic-core.h"
     39 
     40 
     41 /* This preprocessing pass is run immediately before lower_omp.  It decomposes
     42    OpenACC 'kernels' constructs into parts, a sequence of compute constructs.
     43 
     44    The translation is as follows:
     45      - The entire 'kernels' region is turned into a 'data' region with clauses
     46        taken from the 'kernels' region.  New 'create' clauses are added for all
     47        variables declared at the top level in the kernels region.
     48      - Any loop nests annotated with an OpenACC 'loop' directive are wrapped in
     49        a new compute construct.
     50 	 - 'loop' directives without an explicit 'independent' or 'seq' clause
     51 	   get an 'auto' clause added; other clauses are preserved on the loop
     52 	   or moved to the new surrounding compute construct, as applicable.
     53      - Any sequences of other code (non-loops, non-OpenACC 'loop's) are wrapped
     54        in new "gang-single" compute construct: 'worker'/'vector' parallelism is
     55        preserved, but 'num_gangs (1)' is enforced.
     56      - Both points above only apply at the topmost level in the region, that
     57        is, the transformation does not introduce new compute constructs inside
     58        nested statement bodies.  In particular, this means that a
     59        gang-parallelizable loop inside an 'if' statement is made "gang-single".
     60      - In order to make the host wait only once for the whole region instead
     61        of once per device kernel launch, the new compute constructs are
     62        annotated 'async'.  Unless the original 'kernels' construct already was
     63        marked 'async', the entire region ends with a 'wait' directive.  If the
     64        original 'kernels' construct was marked 'async', the synthesized 'async'
     65        clauses use the original 'kernels' construct's 'async' argument
     66        (possibly implicit).
     67 */
     68 
     69 
     70 /*TODO Things are conceptually wrong here: 'loop' clauses may be hidden behind
     71   'device_type', so we have to defer a lot of processing until we're in the
     72   offloading compilation.  "Fortunately", GCC doesn't support the OpenACC
     73   'device_type' clause yet, so we get away that.  */
     74 
     75 
     76 /* Helper function for decompose_kernels_region_body.  If STMT contains a
     77    "top-level" OMP_FOR statement, returns a pointer to that statement;
     78    returns NULL otherwise.
     79 
     80    A "top-level" OMP_FOR statement is one that is possibly accompanied by
     81    small snippets of setup code.  Specifically, this function accepts an
     82    OMP_FOR possibly wrapped in a singleton bind and a singleton try
     83    statement to allow for a local loop variable, but not an OMP_FOR
     84    statement nested in any other constructs.  Alternatively, it accepts a
     85    non-singleton bind containing only assignments and then an OMP_FOR
     86    statement at the very end.  The former style can be generated by the C
     87    frontend, the latter by the Fortran frontend.  */
     88 
     89 static gimple *
     90 top_level_omp_for_in_stmt (gimple *stmt)
     91 {
     92   if (gimple_code (stmt) == GIMPLE_OMP_FOR)
     93     return stmt;
     94 
     95   if (gimple_code (stmt) == GIMPLE_BIND)
     96     {
     97       gimple_seq body = gimple_bind_body (as_a <gbind *> (stmt));
     98       if (gimple_seq_singleton_p (body))
     99 	{
    100 	  /* Accept an OMP_FOR statement, or a try statement containing only
    101 	     a single OMP_FOR.  */
    102 	  gimple *maybe_for_or_try = gimple_seq_first_stmt (body);
    103 	  if (gimple_code (maybe_for_or_try) == GIMPLE_OMP_FOR)
    104 	    return maybe_for_or_try;
    105 	  else if (gimple_code (maybe_for_or_try) == GIMPLE_TRY)
    106 	    {
    107 	      gimple_seq try_body = gimple_try_eval (maybe_for_or_try);
    108 	      if (!gimple_seq_singleton_p (try_body))
    109 		return NULL;
    110 	      gimple *maybe_omp_for_stmt = gimple_seq_first_stmt (try_body);
    111 	      if (gimple_code (maybe_omp_for_stmt) == GIMPLE_OMP_FOR)
    112 		return maybe_omp_for_stmt;
    113 	    }
    114 	}
    115       else
    116 	{
    117 	  gimple_stmt_iterator gsi;
    118 	  /* Accept only a block of optional assignments followed by an
    119 	     OMP_FOR at the end.  No other kinds of statements allowed.  */
    120 	  for (gsi = gsi_start (body); !gsi_end_p (gsi); gsi_next (&gsi))
    121 	    {
    122 	      gimple *body_stmt = gsi_stmt (gsi);
    123 	      if (gimple_code (body_stmt) == GIMPLE_ASSIGN)
    124 		continue;
    125 	      else if (gimple_code (body_stmt) == GIMPLE_OMP_FOR
    126 		       && gsi_one_before_end_p (gsi))
    127 		return body_stmt;
    128 	      else
    129 		return NULL;
    130 	    }
    131 	}
    132     }
    133 
    134   return NULL;
    135 }
    136 
    137 /* Helper for adjust_region_code: evaluate the statement at GSI_P.  */
    138 
    139 static tree
    140 adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p,
    141 				 bool *handled_ops_p,
    142 				 struct walk_stmt_info *wi)
    143 {
    144   int *region_code = (int *) wi->info;
    145 
    146   gimple *stmt = gsi_stmt (*gsi_p);
    147   switch (gimple_code (stmt))
    148     {
    149     case GIMPLE_OMP_FOR:
    150       {
    151 	tree clauses = gimple_omp_for_clauses (stmt);
    152 	if (omp_find_clause (clauses, OMP_CLAUSE_INDEPENDENT))
    153 	  {
    154 	    /* Explicit 'independent' clause.  */
    155 	    /* Keep going; recurse into loop body.  */
    156 	    break;
    157 	  }
    158 	else if (omp_find_clause (clauses, OMP_CLAUSE_SEQ))
    159 	  {
    160 	    /* Explicit 'seq' clause.  */
    161 	    /* We'll "parallelize" if at some level a loop construct has been
    162 	       marked up by the user as unparallelizable ('seq' clause; we'll
    163 	       respect that in the later processing).  Given that the user has
    164 	       explicitly marked it up, this loop construct cannot be
    165 	       performance-critical, and in this case it's also fine to
    166 	       "parallelize" instead of "gang-single", because any outer or
    167 	       inner loops may still exploit the available parallelism.  */
    168 	    /* Keep going; recurse into loop body.  */
    169 	    break;
    170 	  }
    171 	else
    172 	  {
    173 	    /* Explicit or implicit 'auto' clause.  */
    174 	    /* The user would like this loop analyzed ('auto' clause) and
    175 	       typically parallelized, but we don't have available yet the
    176 	       compiler logic to analyze this, so can't parallelize it here, so
    177 	       we'd very likely be running into a performance problem if we
    178 	       were to execute this unparallelized, thus forward the whole loop
    179 	       nest to 'parloops'.  */
    180 	    *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
    181 	    /* Terminate: final decision for this region.  */
    182 	    *handled_ops_p = true;
    183 	    return integer_zero_node;
    184 	  }
    185 	gcc_unreachable ();
    186       }
    187 
    188     case GIMPLE_COND:
    189     case GIMPLE_GOTO:
    190     case GIMPLE_SWITCH:
    191     case GIMPLE_ASM:
    192     case GIMPLE_TRANSACTION:
    193     case GIMPLE_RETURN:
    194       /* Statement that might constitute some looping/control flow pattern.  */
    195       /* The user would like this code analyzed (implicit inside a 'kernels'
    196 	 region) and typically parallelized, but we don't have available yet
    197 	 the compiler logic to analyze this, so can't parallelize it here, so
    198 	 we'd very likely be running into a performance problem if we were to
    199 	 execute this unparallelized, thus forward the whole thing to
    200 	 'parloops'.  */
    201       *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
    202       /* Terminate: final decision for this region.  */
    203       *handled_ops_p = true;
    204       return integer_zero_node;
    205 
    206     default:
    207       /* Keep going.  */
    208       break;
    209     }
    210 
    211   return NULL;
    212 }
    213 
    214 /* Adjust the REGION_CODE for the region in GS.  */
    215 
    216 static void
    217 adjust_region_code (gimple_seq gs, int *region_code)
    218 {
    219   struct walk_stmt_info wi;
    220   memset (&wi, 0, sizeof (wi));
    221   wi.info = region_code;
    222   walk_gimple_seq (gs, adjust_region_code_walk_stmt_fn, NULL, &wi);
    223 }
    224 
    225 /* Helper function for make_loops_gang_single for walking the tree.  If the
    226    statement indicated by GSI_P is an OpenACC for loop with a gang clause,
    227    issue a warning and remove the clause.  */
    228 
    229 static tree
    230 visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p,
    231 				   bool *handled_ops_p,
    232 				   struct walk_stmt_info *)
    233 {
    234   *handled_ops_p = false;
    235 
    236   gimple *stmt = gsi_stmt (*gsi_p);
    237   switch (gimple_code (stmt))
    238     {
    239     case GIMPLE_OMP_FOR:
    240       /*TODO Given the current 'adjust_region_code' algorithm, this is
    241 	actually...  */
    242 #if 0
    243       gcc_unreachable ();
    244 #else
    245       /* ..., but due to bugs (PR100400), we may actually come here.
    246 	 Reliably catch this, regardless of checking level.  */
    247       internal_error ("PR100400");
    248 #endif
    249 
    250       {
    251 	tree clauses = gimple_omp_for_clauses (stmt);
    252 	tree prev_clause = NULL;
    253 	for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause))
    254 	  {
    255 	    if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG)
    256 	      {
    257 		/* It makes no sense to have a 'gang' clause in a "gang-single"
    258 		   region, so warn and remove it.  */
    259 		warning_at (gimple_location (stmt), 0,
    260 			    "conditionally executed loop in %<kernels%> region"
    261 			    " will be executed by a single gang;"
    262 			    " ignoring %<gang%> clause");
    263 		if (prev_clause != NULL)
    264 		  OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause);
    265 		else
    266 		  clauses = OMP_CLAUSE_CHAIN (clause);
    267 
    268 		break;
    269 	      }
    270 	    prev_clause = clause;
    271 	  }
    272 	gimple_omp_for_set_clauses (stmt, clauses);
    273       }
    274       /* No need to recurse into nested statements; no loop nested inside
    275 	 this loop can be gang-partitioned.  */
    276       sorry ("%<gang%> loop in %<gang-single%> region");
    277       *handled_ops_p = true;
    278       break;
    279 
    280     default:
    281       break;
    282     }
    283 
    284   return NULL;
    285 }
    286 
    287 /* Visit all nested OpenACC loops in the sequence indicated by GS.  This
    288    statement is expected to be inside a gang-single region.  Issue a warning
    289    for any loops inside it that have gang clauses and remove the clauses.  */
    290 
    291 static void
    292 make_loops_gang_single (gimple_seq gs)
    293 {
    294   struct walk_stmt_info wi;
    295   memset (&wi, 0, sizeof (wi));
    296   walk_gimple_seq (gs, visit_loops_in_gang_single_region, NULL, &wi);
    297 }
    298 
    299 /* Construct a "gang-single" compute construct at LOC containing the STMTS.
    300    Annotate with CLAUSES, which must not contain a 'num_gangs' clause, and an
    301    additional 'num_gangs (1)' clause to force "gang-single" execution.  */
    302 
    303 static gimple *
    304 make_region_seq (location_t loc, gimple_seq stmts,
    305 		 tree num_gangs_clause,
    306 		 tree num_workers_clause,
    307 		 tree vector_length_clause,
    308 		 tree clauses)
    309 {
    310   /* This correctly unshares the entire clause chain rooted here.  */
    311   clauses = unshare_expr (clauses);
    312 
    313   dump_user_location_t loc_stmts_first = gimple_seq_first (stmts);
    314 
    315   /* Figure out the region code for this region.  */
    316   /* Optimistic default: assume "setup code", no looping; thus not
    317      performance-critical.  */
    318   int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE;
    319   adjust_region_code (stmts, &region_code);
    320 
    321   if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)
    322     {
    323       if (dump_enabled_p ())
    324 	/*TODO MSG_MISSED_OPTIMIZATION? */
    325 	dump_printf_loc (MSG_NOTE, loc_stmts_first,
    326 			 "beginning %<gang-single%> part"
    327 			 " in OpenACC %<kernels%> region\n");
    328 
    329       /* Synthesize a 'num_gangs (1)' clause.  */
    330       tree gang_single_clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
    331       OMP_CLAUSE_OPERAND (gang_single_clause, 0) = integer_one_node;
    332       OMP_CLAUSE_CHAIN (gang_single_clause) = clauses;
    333       clauses = gang_single_clause;
    334 
    335       /* Remove and issue warnings about gang clauses on any OpenACC
    336 	 loops nested inside this sequentially executed statement.  */
    337       make_loops_gang_single (stmts);
    338     }
    339   else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
    340     {
    341       if (dump_enabled_p ())
    342 	dump_printf_loc (MSG_NOTE, loc_stmts_first,
    343 			 "beginning %<parloops%> part"
    344 			 " in OpenACC %<kernels%> region\n");
    345 
    346       /* As we're transforming a 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
    347 	 'GF_OMP_TARGET_KIND_OACC_KERNELS', this isn't doing any of the clauses
    348 	 mangling that 'make_region_loop_nest' is doing.  */
    349       /* Re-assemble the clauses stripped off earlier.  */
    350       if (num_gangs_clause != NULL)
    351 	{
    352 	  tree c = unshare_expr (num_gangs_clause);
    353 	  OMP_CLAUSE_CHAIN (c) = clauses;
    354 	  clauses = c;
    355 	}
    356       if (num_workers_clause != NULL)
    357 	{
    358 	  tree c = unshare_expr (num_workers_clause);
    359 	  OMP_CLAUSE_CHAIN (c) = clauses;
    360 	  clauses = c;
    361 	}
    362       if (vector_length_clause != NULL)
    363 	{
    364 	  tree c = unshare_expr (vector_length_clause);
    365 	  OMP_CLAUSE_CHAIN (c) = clauses;
    366 	  clauses = c;
    367 	}
    368     }
    369   else
    370     gcc_unreachable ();
    371 
    372   /* Build the gang-single region.  */
    373   gimple *single_region = gimple_build_omp_target (NULL, region_code, clauses);
    374   gimple_set_location (single_region, loc);
    375   gbind *single_body = gimple_build_bind (NULL, stmts, make_node (BLOCK));
    376   gimple_omp_set_body (single_region, single_body);
    377 
    378   return single_region;
    379 }
    380 
    381 /* Helper function for make_region_loop_nest.  Adds a 'num_gangs'
    382    ('num_workers', 'vector_length') clause to the given CLAUSES, either the one
    383    from the parent compute construct (PARENT_CLAUSE) or a new one based on the
    384    loop's own LOOP_CLAUSE ('gang (num: N)' or similar for 'worker' or 'vector'
    385    clauses) with the given CLAUSE_CODE.  Does nothing if neither PARENT_CLAUSE
    386    nor LOOP_CLAUSE exist.  Returns the new clauses.  */
    387 
    388 static tree
    389 add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause,
    390 			       omp_clause_code clause_code, tree clauses)
    391 {
    392   if (parent_clause != NULL)
    393     {
    394       tree num_clause = unshare_expr (parent_clause);
    395       OMP_CLAUSE_CHAIN (num_clause) = clauses;
    396       clauses = num_clause;
    397     }
    398   else if (loop_clause != NULL)
    399     {
    400       /* The kernels region does not have a 'num_gangs' clause, but the loop
    401 	 itself had a 'gang (num: N)' clause.  Honor it by adding a
    402 	 'num_gangs (N)' clause on the compute construct.  */
    403       tree num = OMP_CLAUSE_OPERAND (loop_clause, 0);
    404       tree new_num_clause
    405 	= build_omp_clause (OMP_CLAUSE_LOCATION (loop_clause), clause_code);
    406       OMP_CLAUSE_OPERAND (new_num_clause, 0) = num;
    407       OMP_CLAUSE_CHAIN (new_num_clause) = clauses;
    408       clauses = new_num_clause;
    409     }
    410   return clauses;
    411 }
    412 
    413 /* Helper for make_region_loop_nest, looking for 'worker (num: N)' or 'vector
    414    (length: N)' clauses in nested loops.  Removes the argument, transferring it
    415    to the enclosing compute construct (via WI->INFO).  If arguments within the
    416    same loop nest conflict, emits a warning.
    417 
    418    This function also decides whether to add an 'auto' clause on each of these
    419    nested loops.  */
    420 
    421 struct adjust_nested_loop_clauses_wi_info
    422 {
    423   tree *loop_gang_clause_ptr;
    424   tree *loop_worker_clause_ptr;
    425   tree *loop_vector_clause_ptr;
    426 };
    427 
    428 static tree
    429 adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *,
    430 			    struct walk_stmt_info *wi)
    431 {
    432   struct adjust_nested_loop_clauses_wi_info *wi_info
    433     = (struct adjust_nested_loop_clauses_wi_info *) wi->info;
    434   gimple *stmt = gsi_stmt (*gsi_p);
    435 
    436   if (gimple_code (stmt) == GIMPLE_OMP_FOR)
    437     {
    438       bool add_auto_clause = true;
    439       tree loop_clauses = gimple_omp_for_clauses (stmt);
    440       tree loop_clause = loop_clauses;
    441       for (; loop_clause; loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
    442 	{
    443 	  tree *outer_clause_ptr = NULL;
    444 	  switch (OMP_CLAUSE_CODE (loop_clause))
    445 	    {
    446 	    case OMP_CLAUSE_GANG:
    447 	      outer_clause_ptr = wi_info->loop_gang_clause_ptr;
    448 	      break;
    449 	    case OMP_CLAUSE_WORKER:
    450 	      outer_clause_ptr = wi_info->loop_worker_clause_ptr;
    451 	      break;
    452 	    case OMP_CLAUSE_VECTOR:
    453 	      outer_clause_ptr = wi_info->loop_vector_clause_ptr;
    454 	      break;
    455 	    case OMP_CLAUSE_SEQ:
    456 	    case OMP_CLAUSE_INDEPENDENT:
    457 	    case OMP_CLAUSE_AUTO:
    458 	      add_auto_clause = false;
    459 	    default:
    460 	      break;
    461 	    }
    462 	  if (outer_clause_ptr != NULL)
    463 	    {
    464 	      if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL
    465 		  && *outer_clause_ptr == NULL)
    466 		{
    467 		  /* Transfer the clause to the enclosing compute construct and
    468 		     remove the numerical argument from the 'loop'.  */
    469 		  *outer_clause_ptr = unshare_expr (loop_clause);
    470 		  OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
    471 		}
    472 	      else if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL &&
    473 		       OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0) != NULL)
    474 		{
    475 		  /* See if both of these are the same constant.  If they
    476 		     aren't, emit a warning.  */
    477 		  tree old_op = OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0);
    478 		  tree new_op = OMP_CLAUSE_OPERAND (loop_clause, 0);
    479 		  if (!(cst_and_fits_in_hwi (old_op) &&
    480 			cst_and_fits_in_hwi (new_op) &&
    481 			int_cst_value (old_op) == int_cst_value (new_op)))
    482 		    {
    483 		      const char *clause_name
    484 			= omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
    485 		      error_at (gimple_location (stmt),
    486 				"cannot honor conflicting %qs clause",
    487 				clause_name);
    488 		      inform (OMP_CLAUSE_LOCATION (*outer_clause_ptr),
    489 			      "location of the previous clause"
    490 			      " in the same loop nest");
    491 		    }
    492 		  OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
    493 		}
    494 	    }
    495 	}
    496       if (add_auto_clause)
    497 	{
    498 	  tree auto_clause
    499 	    = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_AUTO);
    500 	  OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
    501 	  gimple_omp_for_set_clauses (stmt, auto_clause);
    502 	}
    503     }
    504 
    505   return NULL;
    506 }
    507 
    508 /* Helper for make_region_loop_nest.  Transform OpenACC 'kernels'/'loop'
    509    construct clauses into OpenACC 'parallel'/'loop' construct ones.  */
    510 
    511 static tree
    512 transform_kernels_loop_clauses (gimple *omp_for,
    513 				tree num_gangs_clause,
    514 				tree num_workers_clause,
    515 				tree vector_length_clause,
    516 				tree clauses)
    517 {
    518   /* If this loop in a kernels region does not have an explicit 'seq',
    519      'independent', or 'auto' clause, we must give it an explicit 'auto'
    520      clause.
    521      We also check for 'gang (num: N)' clauses.  These must not appear in
    522      kernels regions that have their own 'num_gangs' clause.  Otherwise, they
    523      must be converted and put on the region; similarly for 'worker' and
    524      'vector' clauses.  */
    525   bool add_auto_clause = true;
    526   tree loop_gang_clause = NULL, loop_worker_clause = NULL,
    527        loop_vector_clause = NULL;
    528   tree loop_clauses = gimple_omp_for_clauses (omp_for);
    529   for (tree loop_clause = loop_clauses;
    530        loop_clause;
    531        loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
    532     {
    533       bool found_num_clause = false;
    534       tree *clause_ptr, clause_to_check;
    535       switch (OMP_CLAUSE_CODE (loop_clause))
    536 	{
    537 	case OMP_CLAUSE_GANG:
    538 	  found_num_clause = true;
    539 	  clause_ptr = &loop_gang_clause;
    540 	  clause_to_check = num_gangs_clause;
    541 	  break;
    542 	case OMP_CLAUSE_WORKER:
    543 	  found_num_clause = true;
    544 	  clause_ptr = &loop_worker_clause;
    545 	  clause_to_check = num_workers_clause;
    546 	  break;
    547 	case OMP_CLAUSE_VECTOR:
    548 	  found_num_clause = true;
    549 	  clause_ptr = &loop_vector_clause;
    550 	  clause_to_check = vector_length_clause;
    551 	  break;
    552 	case OMP_CLAUSE_INDEPENDENT:
    553 	case OMP_CLAUSE_SEQ:
    554 	case OMP_CLAUSE_AUTO:
    555 	  add_auto_clause = false;
    556 	default:
    557 	  break;
    558 	}
    559       if (found_num_clause && OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL)
    560 	{
    561 	  if (clause_to_check)
    562 	    {
    563 	      const char *clause_name
    564 		= omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
    565 	      const char *parent_clause_name
    566 		= omp_clause_code_name[OMP_CLAUSE_CODE (clause_to_check)];
    567 	      error_at (OMP_CLAUSE_LOCATION (loop_clause),
    568 			"argument not permitted on %qs clause"
    569 			" in OpenACC %<kernels%> region with a %qs clause",
    570 			clause_name, parent_clause_name);
    571 	      inform (OMP_CLAUSE_LOCATION (clause_to_check),
    572 		      "location of OpenACC %<kernels%>");
    573 	    }
    574 	  /* Copy the 'gang (N)'/'worker (N)'/'vector (N)' clause to the
    575 	     enclosing compute construct.  */
    576 	  *clause_ptr = unshare_expr (loop_clause);
    577 	  OMP_CLAUSE_CHAIN (*clause_ptr) = NULL;
    578 	  /* Leave a 'gang'/'worker'/'vector' clause on the 'loop', but without
    579 	     argument.  */
    580 	  OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
    581 	}
    582     }
    583   if (add_auto_clause)
    584     {
    585       tree auto_clause = build_omp_clause (gimple_location (omp_for),
    586 					   OMP_CLAUSE_AUTO);
    587       OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
    588       loop_clauses = auto_clause;
    589     }
    590   gimple_omp_for_set_clauses (omp_for, loop_clauses);
    591   /* We must also recurse into the loop; it might contain nested loops having
    592      their own 'worker (num: W)' or 'vector (length: V)' clauses.  Turn these
    593      into 'worker'/'vector' clauses on the compute construct.  */
    594   struct walk_stmt_info wi;
    595   memset (&wi, 0, sizeof (wi));
    596   struct adjust_nested_loop_clauses_wi_info wi_info;
    597   wi_info.loop_gang_clause_ptr = &loop_gang_clause;
    598   wi_info.loop_worker_clause_ptr = &loop_worker_clause;
    599   wi_info.loop_vector_clause_ptr = &loop_vector_clause;
    600   wi.info = &wi_info;
    601   gimple *body = gimple_omp_body (omp_for);
    602   walk_gimple_seq (body, adjust_nested_loop_clauses, NULL, &wi);
    603   /* Check if there were conflicting numbers of workers or vector length.  */
    604   if (loop_gang_clause != NULL &&
    605       OMP_CLAUSE_OPERAND (loop_gang_clause, 0) == NULL)
    606     loop_gang_clause = NULL;
    607   if (loop_worker_clause != NULL &&
    608       OMP_CLAUSE_OPERAND (loop_worker_clause, 0) == NULL)
    609     loop_worker_clause = NULL;
    610   if (loop_vector_clause != NULL &&
    611       OMP_CLAUSE_OPERAND (loop_vector_clause, 0) == NULL)
    612     vector_length_clause = NULL;
    613 
    614   /* If the kernels region had 'num_gangs', 'num_worker', 'vector_length'
    615      clauses, add these to this new compute construct.  */
    616   clauses
    617     = add_parent_or_loop_num_clause (num_gangs_clause, loop_gang_clause,
    618 				     OMP_CLAUSE_NUM_GANGS, clauses);
    619   clauses
    620     = add_parent_or_loop_num_clause (num_workers_clause, loop_worker_clause,
    621 				     OMP_CLAUSE_NUM_WORKERS, clauses);
    622   clauses
    623     = add_parent_or_loop_num_clause (vector_length_clause, loop_vector_clause,
    624 				     OMP_CLAUSE_VECTOR_LENGTH, clauses);
    625 
    626   return clauses;
    627 }
    628 
    629 /* Construct a possibly gang-parallel compute construct containing the STMT,
    630    which must be identical to, or a bind containing, the loop OMP_FOR.
    631 
    632    The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are
    633    optional clauses from the original kernels region and must not be contained
    634    in the other CLAUSES. The newly created compute construct is annotated with
    635    the optional NUM_GANGS_CLAUSE as well as the other CLAUSES.  If there is no
    636    NUM_GANGS_CLAUSE but the loop has a 'gang (num: N)' clause, that is
    637    converted to a 'num_gangs (N)' clause on the new compute construct, and
    638    similarly for 'worker' and 'vector' clauses.
    639 
    640    The outermost loop gets an 'auto' clause unless there already is an
    641    'seq'/'independent'/'auto' clause.  Nested loops inside OMP_FOR are treated
    642    similarly by the adjust_nested_loop_clauses function.  */
    643 
    644 static gimple *
    645 make_region_loop_nest (gimple *omp_for, gimple_seq stmts,
    646 		       tree num_gangs_clause,
    647 		       tree num_workers_clause,
    648 		       tree vector_length_clause,
    649 		       tree clauses)
    650 {
    651   /* This correctly unshares the entire clause chain rooted here.  */
    652   clauses = unshare_expr (clauses);
    653 
    654   /* Figure out the region code for this region.  */
    655   /* Optimistic default: assume that the loop nest is parallelizable
    656      (essentially, no GIMPLE_OMP_FOR with (explicit or implicit) 'auto' clause,
    657      and no un-annotated loops).  */
    658   int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED;
    659   adjust_region_code (stmts, &region_code);
    660 
    661   if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
    662     {
    663       if (dump_enabled_p ())
    664 	/* This is not MSG_OPTIMIZED_LOCATIONS, as we're just doing what the
    665 	   user asked us to.  */
    666 	dump_printf_loc (MSG_NOTE, omp_for,
    667 			 "parallelized loop nest"
    668 			 " in OpenACC %<kernels%> region\n");
    669 
    670       clauses = transform_kernels_loop_clauses (omp_for,
    671 						num_gangs_clause,
    672 						num_workers_clause,
    673 						vector_length_clause,
    674 						clauses);
    675     }
    676   else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
    677     {
    678       if (dump_enabled_p ())
    679 	dump_printf_loc (MSG_NOTE, omp_for,
    680 			 "forwarded loop nest"
    681 			 " in OpenACC %<kernels%> region"
    682 			 " to %<parloops%> for analysis\n");
    683 
    684       /* We're transforming one 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
    685 	 'GF_OMP_TARGET_KIND_OACC_KERNELS', so don't have to
    686 	 'transform_kernels_loop_clauses'.  */
    687       /* Re-assemble the clauses stripped off earlier.  */
    688       clauses
    689 	= add_parent_or_loop_num_clause (num_gangs_clause, NULL,
    690 					 OMP_CLAUSE_NUM_GANGS, clauses);
    691       clauses
    692 	= add_parent_or_loop_num_clause (num_workers_clause, NULL,
    693 					 OMP_CLAUSE_NUM_WORKERS, clauses);
    694       clauses
    695 	= add_parent_or_loop_num_clause (vector_length_clause, NULL,
    696 					 OMP_CLAUSE_VECTOR_LENGTH, clauses);
    697     }
    698   else
    699     gcc_unreachable ();
    700 
    701   gimple *parallel_body_bind
    702     = gimple_build_bind (NULL, stmts, make_node (BLOCK));
    703   gimple *parallel_region
    704     = gimple_build_omp_target (parallel_body_bind, region_code, clauses);
    705   gimple_set_location (parallel_region, gimple_location (omp_for));
    706 
    707   return parallel_region;
    708 }
    709 
    710 /* Eliminate any binds directly inside BIND by adding their statements to
    711    BIND (i.e., modifying it in place), excluding binds that hold only an
    712    OMP_FOR loop and associated setup/cleanup code.  Recurse into binds but
    713    not other statements.  Return a chain of the local variables of eliminated
    714    binds, i.e., the local variables found in nested binds.  If
    715    INCLUDE_TOPLEVEL_VARS is true, this also includes the variables belonging
    716    to BIND itself. */
    717 
    718 static tree
    719 flatten_binds (gbind *bind, bool include_toplevel_vars = false)
    720 {
    721   tree vars = NULL, last_var = NULL;
    722 
    723   if (include_toplevel_vars)
    724     {
    725       vars = gimple_bind_vars (bind);
    726       last_var = vars;
    727     }
    728 
    729   gimple_seq new_body = NULL;
    730   gimple_seq body_sequence = gimple_bind_body (bind);
    731   gimple_stmt_iterator gsi, gsi_n;
    732   for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n)
    733     {
    734       /* Advance the iterator here because otherwise it would be invalidated
    735 	 by moving statements below.  */
    736       gsi_n = gsi;
    737       gsi_next (&gsi_n);
    738 
    739       gimple *stmt = gsi_stmt (gsi);
    740       /* Flatten bind statements, except the ones that contain only an
    741 	 OpenACC for loop.  */
    742       if (gimple_code (stmt) == GIMPLE_BIND
    743 	  && !top_level_omp_for_in_stmt (stmt))
    744 	{
    745 	  gbind *inner_bind = as_a <gbind *> (stmt);
    746 	  /* Flatten recursively, and collect all variables.  */
    747 	  tree inner_vars = flatten_binds (inner_bind, true);
    748 	  gimple_seq inner_sequence = gimple_bind_body (inner_bind);
    749 	  if (flag_checking)
    750 	    {
    751 	      for (gimple_stmt_iterator inner_gsi = gsi_start (inner_sequence);
    752 		   !gsi_end_p (inner_gsi);
    753 		   gsi_next (&inner_gsi))
    754 		{
    755 		  gimple *inner_stmt = gsi_stmt (inner_gsi);
    756 		  gcc_assert (gimple_code (inner_stmt) != GIMPLE_BIND
    757 			      || top_level_omp_for_in_stmt (inner_stmt));
    758 		}
    759 	    }
    760 	  gimple_seq_add_seq (&new_body, inner_sequence);
    761 	  /* Find the last variable; we will append others to it.  */
    762 	  while (last_var != NULL && TREE_CHAIN (last_var) != NULL)
    763 	    last_var = TREE_CHAIN (last_var);
    764 	  if (last_var != NULL)
    765 	    {
    766 	      TREE_CHAIN (last_var) = inner_vars;
    767 	      last_var = inner_vars;
    768 	    }
    769 	  else
    770 	    {
    771 	      vars = inner_vars;
    772 	      last_var = vars;
    773 	    }
    774 	}
    775       else
    776 	gimple_seq_add_stmt (&new_body, stmt);
    777     }
    778 
    779   /* Put the possibly transformed body back into the bind.  */
    780   gimple_bind_set_body (bind, new_body);
    781   return vars;
    782 }
    783 
    784 /* Helper function for places where we construct data regions.  Wraps the BODY
    785    inside a try-finally construct at LOC that calls __builtin_GOACC_data_end
    786    in its cleanup block.  Returns this try statement.  */
    787 
    788 static gimple *
    789 make_data_region_try_statement (location_t loc, gimple *body)
    790 {
    791   tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END);
    792   gimple *call = gimple_build_call (data_end_fn, 0);
    793   gimple_seq cleanup = NULL;
    794   gimple_seq_add_stmt (&cleanup, call);
    795   gimple *try_stmt = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY);
    796   gimple_set_location (body, loc);
    797   return try_stmt;
    798 }
    799 
    800 /* If INNER_BIND_VARS holds variables, build an OpenACC data region with
    801    location LOC containing BODY and having 'create (var)' clauses for each
    802    variable (as a side effect, such variables also get TREE_ADDRESSABLE set).
    803    If INNER_CLEANUP is present, add a try-finally statement with
    804    this cleanup code in the finally block.  Return the new data region, or
    805    the original BODY if no data region was needed.  */
    806 
    807 static gimple *
    808 maybe_build_inner_data_region (location_t loc, gimple *body,
    809 			       tree inner_bind_vars, gimple *inner_cleanup)
    810 {
    811   /* Is this an instantiation of a template?  (In this case, we don't care what
    812      the generic decl is - just whether the function decl has one.)  */
    813   bool generic_inst_p
    814     = (lang_hooks.decls.get_generic_function_decl (current_function_decl)
    815        != NULL);
    816 
    817   /* Build data 'create (var)' clauses for these local variables.
    818      Below we will add these to a data region enclosing the entire body
    819      of the decomposed kernels region.  */
    820   tree prev_mapped_var = NULL, next = NULL, artificial_vars = NULL,
    821        inner_data_clauses = NULL;
    822   for (tree v = inner_bind_vars; v; v = next)
    823     {
    824       next = TREE_CHAIN (v);
    825       if (DECL_ARTIFICIAL (v)
    826 	  || TREE_CODE (v) == CONST_DECL
    827 	  || generic_inst_p)
    828 	{
    829 	  /* If this is an artificial temporary, it need not be mapped.  We
    830 	     move its declaration into the bind inside the data region.
    831 	     Also avoid mapping variables if we are inside a template
    832 	     instantiation; the code does not contain all the copies to
    833 	     temporaries that would make this legal.  */
    834 	  TREE_CHAIN (v) = artificial_vars;
    835 	  artificial_vars = v;
    836 	  if (prev_mapped_var != NULL)
    837 	    TREE_CHAIN (prev_mapped_var) = next;
    838 	  else
    839 	    inner_bind_vars = next;
    840 	}
    841       else
    842 	{
    843 	  /* Otherwise, build the map clause.  */
    844 	  tree new_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
    845 	  OMP_CLAUSE_SET_MAP_KIND (new_clause, GOMP_MAP_ALLOC);
    846 	  OMP_CLAUSE_DECL (new_clause) = v;
    847 	  OMP_CLAUSE_SIZE (new_clause) = DECL_SIZE_UNIT (v);
    848 	  OMP_CLAUSE_CHAIN (new_clause) = inner_data_clauses;
    849 	  inner_data_clauses = new_clause;
    850 
    851 	  prev_mapped_var = v;
    852 
    853 	  /* See <https://gcc.gnu.org/PR100280>.  */
    854 	  if (!TREE_ADDRESSABLE (v))
    855 	    {
    856 	      /* Request that OMP lowering make 'v' addressable.  */
    857 	      OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
    858 
    859 	      if (dump_enabled_p ())
    860 		{
    861 		  const dump_user_location_t d_u_loc
    862 		    = dump_user_location_t::from_location_t (loc);
    863 		  /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
    864 #if __GNUC__ >= 10
    865 # pragma GCC diagnostic push
    866 # pragma GCC diagnostic ignored "-Wformat"
    867 #endif
    868 		  dump_printf_loc (MSG_NOTE, d_u_loc,
    869 				   "OpenACC %<kernels%> decomposition:"
    870 				   " variable %<%T%> declared in block"
    871 				   " requested to be made addressable\n",
    872 				   v);
    873 #if __GNUC__ >= 10
    874 # pragma GCC diagnostic pop
    875 #endif
    876 		}
    877 	    }
    878 	}
    879     }
    880 
    881   if (artificial_vars)
    882     body = gimple_build_bind (artificial_vars, body, make_node (BLOCK));
    883 
    884   /* If we determined above that there are variables that need to be created
    885      on the device, construct a data region for them and wrap the body
    886      inside that.  */
    887   if (inner_data_clauses != NULL)
    888     {
    889       gcc_assert (inner_bind_vars != NULL);
    890       gimple *inner_data_region
    891 	= gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
    892 				   inner_data_clauses);
    893       gimple_set_location (inner_data_region, loc);
    894       /* Make sure __builtin_GOACC_data_end is called at the end.  */
    895       gimple *try_stmt = make_data_region_try_statement (loc, body);
    896       gimple_omp_set_body (inner_data_region, try_stmt);
    897       gimple *bind_body;
    898       if (inner_cleanup != NULL)
    899 	/* Clobber all the inner variables that need to be clobbered.  */
    900 	bind_body = gimple_build_try (inner_data_region, inner_cleanup,
    901 				      GIMPLE_TRY_FINALLY);
    902       else
    903 	bind_body = inner_data_region;
    904       body = gimple_build_bind (inner_bind_vars, bind_body, make_node (BLOCK));
    905     }
    906 
    907   return body;
    908 }
    909 
    910 static void
    911 add_wait (location_t loc, gimple_seq *region_body)
    912 {
    913   /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0).  */
    914   tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
    915   tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
    916   gimple *wait_call = gimple_build_call (wait_fn, 2,
    917 					 sync_arg, integer_zero_node);
    918   gimple_set_location (wait_call, loc);
    919   gimple_seq_add_stmt (region_body, wait_call);
    920 }
    921 
    922 /* Helper function of decompose_kernels_region_body.  The statements in
    923    REGION_BODY are expected to be decomposed parts; add an 'async' clause to
    924    each.  Also add a 'wait' directive at the end of the sequence.  */
    925 
    926 static void
    927 add_async_clauses_and_wait (location_t loc, gimple_seq *region_body)
    928 {
    929   tree default_async_queue
    930     = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL);
    931   for (gimple_stmt_iterator gsi = gsi_start (*region_body);
    932        !gsi_end_p (gsi);
    933        gsi_next (&gsi))
    934     {
    935       gimple *stmt = gsi_stmt (gsi);
    936       tree target_clauses = gimple_omp_target_clauses (stmt);
    937       tree new_async_clause = build_omp_clause (loc, OMP_CLAUSE_ASYNC);
    938       OMP_CLAUSE_OPERAND (new_async_clause, 0) = default_async_queue;
    939       OMP_CLAUSE_CHAIN (new_async_clause) = target_clauses;
    940       target_clauses = new_async_clause;
    941       gimple_omp_target_set_clauses (as_a <gomp_target *> (stmt),
    942 				     target_clauses);
    943     }
    944   add_wait (loc, region_body);
    945 }
    946 
    947 /* Auxiliary analysis of the body of a kernels region, to determine for each
    948    OpenACC loop whether it is control-dependent (i.e., not necessarily
    949    executed every time the kernels region is entered) or not.
    950    We say that a loop is control-dependent if there is some cond, switch, or
    951    goto statement that jumps over it, forwards or backwards.  For example,
    952    if the loop is controlled by an if statement, then a jump to the true
    953    block, the false block, or from one of those blocks to the control flow
    954    join point will necessarily jump over the loop.
    955    This analysis implements an ad-hoc union-find data structure classifying
    956    statements into "control-flow regions" as follows: Most statements are in
    957    the same region as their predecessor, except that each OpenACC loop is in
    958    a region of its own, and each OpenACC loop's successor starts a new
    959    region.  We then unite the regions of any statements linked by jumps,
    960    placing any cond, switch, or goto statement in the same region as its
    961    target label(s).
    962    In the end, control dependence of OpenACC loops can be determined by
    963    comparing their immediate predecessor and successor statements' regions.
    964    A jump crosses the loop if and only if the predecessor and successor are
    965    in the same region.  (If there is no predecessor or successor, the loop
    966    is executed unconditionally.)
    967    The methods in this class identify statements by their index in the
    968    kernels region's body.  */
    969 
    970 class control_flow_regions
    971 {
    972   public:
    973     /* Initialize an instance and pre-compute the control-flow region
    974        information for the statement sequence SEQ.  */
    975     control_flow_regions (gimple_seq seq);
    976 
    977     /* Return true if the statement with the given index IDX in the analyzed
    978        statement sequence is an unconditionally executed OpenACC loop.  */
    979     bool is_unconditional_oacc_for_loop (size_t idx);
    980 
    981   private:
    982     /* Find the region representative for the statement identified by index
    983        STMT_IDX.  */
    984     size_t find_rep (size_t stmt_idx);
    985 
    986     /* Union the regions containing the statements represented by
    987        representatives A and B.  */
    988     void union_reps (size_t a, size_t b);
    989 
    990     /* Helper for the constructor.  Performs the actual computation of the
    991        control-flow regions in the statement sequence SEQ.  */
    992     void compute_regions (gimple_seq seq);
    993 
    994     /* The mapping from statement indices to region representatives.  */
    995     vec <size_t> representatives;
    996 
    997     /* A cache mapping statement indices to a flag indicating whether the
    998        statement is a top level OpenACC for loop.  */
    999     vec <bool> omp_for_loops;
   1000 };
   1001 
   1002 control_flow_regions::control_flow_regions (gimple_seq seq)
   1003 {
   1004   representatives.create (1);
   1005   omp_for_loops.create (1);
   1006   compute_regions (seq);
   1007 }
   1008 
   1009 bool
   1010 control_flow_regions::is_unconditional_oacc_for_loop (size_t idx)
   1011 {
   1012   if (idx == 0 || idx == representatives.length () - 1)
   1013     /* The first or last statement in the kernels region.  This means that
   1014        there is no room before or after it for a jump or a label.  Thus
   1015        there cannot be a jump across it, so it is unconditional.  */
   1016     return true;
   1017   /* Otherwise, the loop is unconditional if the statements before and after
   1018      it are in different control flow regions.  Scan forward and backward,
   1019      skipping over neighboring OpenACC for loops, to find these preceding
   1020      statements.  */
   1021   size_t prev_index = idx - 1;
   1022   while (prev_index > 0 && omp_for_loops [prev_index] == true)
   1023     prev_index--;
   1024   /* If all preceding statements are also OpenACC loops, all of these are
   1025      unconditional.  */
   1026   if (prev_index == 0)
   1027     return true;
   1028   size_t succ_index = idx + 1;
   1029   while (succ_index < omp_for_loops.length ()
   1030 	 && omp_for_loops [succ_index] == true)
   1031     succ_index++;
   1032   /* If all following statements are also OpenACC loops, all of these are
   1033      unconditional.  */
   1034   if (succ_index == omp_for_loops.length ())
   1035     return true;
   1036   return (find_rep (prev_index) != find_rep (succ_index));
   1037 }
   1038 
   1039 size_t
   1040 control_flow_regions::find_rep (size_t stmt_idx)
   1041 {
   1042   size_t rep = stmt_idx, aux = stmt_idx;
   1043   /* Find the root representative of this statement.  */
   1044   while (representatives[rep] != rep)
   1045     rep = representatives[rep];
   1046   /* Compress the path from the original statement to the representative.  */
   1047   while (representatives[aux] != rep)
   1048     {
   1049       size_t tmp = representatives[aux];
   1050       representatives[aux] = rep;
   1051       aux = tmp;
   1052     }
   1053   return rep;
   1054 }
   1055 
   1056 void
   1057 control_flow_regions::union_reps (size_t a, size_t b)
   1058 {
   1059   a = find_rep (a);
   1060   b = find_rep (b);
   1061   representatives[b] = a;
   1062 }
   1063 
   1064 void
   1065 control_flow_regions::compute_regions (gimple_seq seq)
   1066 {
   1067   hash_map <gimple *, size_t> control_flow_reps;
   1068   hash_map <tree, size_t> label_reps;
   1069   size_t current_region = 0, idx = 0;
   1070 
   1071   /* In a first pass, assign an initial region to each statement.  Except in
   1072      the case of OpenACC loops, each statement simply gets the same region
   1073      representative as its predecessor.  */
   1074   for (gimple_stmt_iterator gsi = gsi_start (seq);
   1075        !gsi_end_p (gsi);
   1076        gsi_next (&gsi))
   1077     {
   1078       gimple *stmt = gsi_stmt (gsi);
   1079       gimple *omp_for = top_level_omp_for_in_stmt (stmt);
   1080       omp_for_loops.safe_push (omp_for != NULL);
   1081       if (omp_for != NULL)
   1082 	{
   1083 	  /* Assign a new region to this loop and to its successor.  */
   1084 	  current_region = idx;
   1085 	  representatives.safe_push (current_region);
   1086 	  current_region++;
   1087 	}
   1088       else
   1089 	{
   1090 	  representatives.safe_push (current_region);
   1091 	  /* Remember any jumps and labels for the second pass below.  */
   1092 	  if (gimple_code (stmt) == GIMPLE_COND
   1093 	      || gimple_code (stmt) == GIMPLE_SWITCH
   1094 	      || gimple_code (stmt) == GIMPLE_GOTO)
   1095 	    control_flow_reps.put (stmt, current_region);
   1096 	  else if (gimple_code (stmt) == GIMPLE_LABEL)
   1097 	    label_reps.put (gimple_label_label (as_a <glabel *> (stmt)),
   1098 			    current_region);
   1099 	}
   1100       idx++;
   1101     }
   1102   gcc_assert (representatives.length () == omp_for_loops.length ());
   1103 
   1104   /* Revisit all the control flow statements and union the region of each
   1105      cond, switch, or goto statement with the target labels' regions.  */
   1106   for (hash_map <gimple *, size_t>::iterator it = control_flow_reps.begin ();
   1107        it != control_flow_reps.end ();
   1108        ++it)
   1109     {
   1110       gimple *stmt = (*it).first;
   1111       size_t stmt_rep = (*it).second;
   1112       switch (gimple_code (stmt))
   1113 	{
   1114 	  tree label;
   1115 	  unsigned int n;
   1116 
   1117 	case GIMPLE_COND:
   1118 	  label = gimple_cond_true_label (as_a <gcond *> (stmt));
   1119 	  union_reps (stmt_rep, *label_reps.get (label));
   1120 	  label = gimple_cond_false_label (as_a <gcond *> (stmt));
   1121 	  union_reps (stmt_rep, *label_reps.get (label));
   1122 	  break;
   1123 
   1124 	case GIMPLE_SWITCH:
   1125 	  n = gimple_switch_num_labels (as_a <gswitch *> (stmt));
   1126 	  for (unsigned int i = 0; i < n; i++)
   1127 	    {
   1128 	      tree switch_case
   1129 		= gimple_switch_label (as_a <gswitch *> (stmt), i);
   1130 	      label = CASE_LABEL (switch_case);
   1131 	      union_reps (stmt_rep, *label_reps.get (label));
   1132 	    }
   1133 	  break;
   1134 
   1135 	case GIMPLE_GOTO:
   1136 	  label = gimple_goto_dest (stmt);
   1137 	  union_reps (stmt_rep, *label_reps.get (label));
   1138 	  break;
   1139 
   1140 	default:
   1141 	  gcc_unreachable ();
   1142 	}
   1143     }
   1144 }
   1145 
   1146 /* Decompose the body of the KERNELS_REGION, which was originally annotated
   1147    with the KERNELS_CLAUSES, into a series of compute constructs.  */
   1148 
   1149 static gimple *
   1150 decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses)
   1151 {
   1152   location_t loc = gimple_location (kernels_region);
   1153 
   1154   /* The kernels clauses will be propagated to the child clauses unmodified,
   1155      except that the 'num_gangs', 'num_workers', and 'vector_length' clauses
   1156      will only be added to loop regions.  The other regions are "gang-single"
   1157      and get an explicit 'num_gangs (1)' clause.  So separate out the
   1158      'num_gangs', 'num_workers', and 'vector_length' clauses here.
   1159      Also check for the presence of an 'async' clause but do not remove it from
   1160      the 'kernels' clauses.  */
   1161   tree num_gangs_clause = NULL, num_workers_clause = NULL,
   1162        vector_length_clause = NULL;
   1163   tree async_clause = NULL;
   1164   tree prev_clause = NULL, next_clause = NULL;
   1165   tree parallel_clauses = kernels_clauses;
   1166   for (tree c = parallel_clauses; c; c = next_clause)
   1167     {
   1168       /* Preserve this here, as we might NULL it later.  */
   1169       next_clause = OMP_CLAUSE_CHAIN (c);
   1170 
   1171       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS
   1172 	  || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS
   1173 	  || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH)
   1174 	{
   1175 	  /* Cut this clause out of the chain.  */
   1176 	  if (prev_clause != NULL)
   1177 	    OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (c);
   1178 	  else
   1179 	    kernels_clauses = OMP_CLAUSE_CHAIN (c);
   1180 	  OMP_CLAUSE_CHAIN (c) = NULL;
   1181 	  switch (OMP_CLAUSE_CODE (c))
   1182 	    {
   1183 	    case OMP_CLAUSE_NUM_GANGS:
   1184 	      num_gangs_clause = c;
   1185 	      break;
   1186 	    case OMP_CLAUSE_NUM_WORKERS:
   1187 	      num_workers_clause = c;
   1188 	      break;
   1189 	    case OMP_CLAUSE_VECTOR_LENGTH:
   1190 	      vector_length_clause = c;
   1191 	      break;
   1192 	    default:
   1193 	      gcc_unreachable ();
   1194 	    }
   1195 	}
   1196       else
   1197 	prev_clause = c;
   1198       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
   1199 	async_clause = c;
   1200     }
   1201 
   1202   gimple *kernels_body = gimple_omp_body (kernels_region);
   1203   gbind *kernels_bind = as_a <gbind *> (kernels_body);
   1204 
   1205   /* The body of the region may contain other nested binds declaring inner
   1206      local variables.  Collapse all these binds into one to ensure that we
   1207      have a single sequence of statements to iterate over; also, collect all
   1208      inner variables.  */
   1209   tree inner_bind_vars = flatten_binds (kernels_bind);
   1210   gimple_seq body_sequence = gimple_bind_body (kernels_bind);
   1211 
   1212   /* All these inner variables will get allocated on the device (below, by
   1213      calling maybe_build_inner_data_region).  Here we create 'present'
   1214      clauses for them and add these clauses to the list of clauses to be
   1215      attached to each inner compute construct.  */
   1216   tree present_clauses = kernels_clauses;
   1217   for (tree var = inner_bind_vars; var; var = TREE_CHAIN (var))
   1218     {
   1219       if (!DECL_ARTIFICIAL (var) && TREE_CODE (var) != CONST_DECL)
   1220 	{
   1221 	  tree present_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
   1222 	  OMP_CLAUSE_SET_MAP_KIND (present_clause, GOMP_MAP_FORCE_PRESENT);
   1223 	  OMP_CLAUSE_DECL (present_clause) = var;
   1224 	  OMP_CLAUSE_SIZE (present_clause) = DECL_SIZE_UNIT (var);
   1225 	  OMP_CLAUSE_CHAIN (present_clause) = present_clauses;
   1226 	  present_clauses = present_clause;
   1227 	}
   1228     }
   1229   kernels_clauses = present_clauses;
   1230 
   1231   /* In addition to nested binds, the "real" body of the region may be
   1232      nested inside a try-finally block.  Find its cleanup block, which
   1233      contains code to clobber the local variables that must be clobbered.  */
   1234   gimple *inner_cleanup = NULL;
   1235   if (body_sequence != NULL && gimple_code (body_sequence) == GIMPLE_TRY)
   1236     {
   1237       if (gimple_seq_singleton_p (body_sequence))
   1238 	{
   1239 	  /* The try statement is the only thing inside the bind.  */
   1240 	  inner_cleanup = gimple_try_cleanup (body_sequence);
   1241 	  body_sequence = gimple_try_eval (body_sequence);
   1242 	}
   1243       else
   1244 	{
   1245 	  /* The bind's body starts with a try statement, but it is followed
   1246 	     by other things.  */
   1247 	  gimple_stmt_iterator gsi = gsi_start (body_sequence);
   1248 	  gimple *try_stmt = gsi_stmt (gsi);
   1249 	  inner_cleanup = gimple_try_cleanup (try_stmt);
   1250 	  gimple *try_body = gimple_try_eval (try_stmt);
   1251 
   1252 	  gsi_remove (&gsi, false);
   1253 	  /* Now gsi indicates the sequence of statements after the try
   1254 	     statement in the bind.  Append the statement in the try body and
   1255 	     the trailing statements from gsi.  */
   1256 	  gsi_insert_seq_before (&gsi, try_body, GSI_CONTINUE_LINKING);
   1257 	  body_sequence = gsi_stmt (gsi);
   1258 	}
   1259     }
   1260 
   1261   /* This sequence will collect all the top-level statements in the body of
   1262      the data region we are about to construct.  */
   1263   gimple_seq region_body = NULL;
   1264   /* This sequence will collect consecutive statements to be put into a
   1265      gang-single region.  */
   1266   gimple_seq gang_single_seq = NULL;
   1267   /* Flag recording whether the gang_single_seq only contains copies to
   1268      local variables.  These may be loop setup code that should not be
   1269      separated from the loop.  */
   1270   bool only_simple_assignments = true;
   1271 
   1272   /* Precompute the control flow region information to determine whether an
   1273      OpenACC loop is executed conditionally or unconditionally.  */
   1274   control_flow_regions cf_regions (body_sequence);
   1275 
   1276   /* Iterate over the statements in the kernels region's body.  */
   1277   size_t idx = 0;
   1278   gimple_stmt_iterator gsi, gsi_n;
   1279   for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n, idx++)
   1280     {
   1281       /* Advance the iterator here because otherwise it would be invalidated
   1282 	 by moving statements below.  */
   1283       gsi_n = gsi;
   1284       gsi_next (&gsi_n);
   1285 
   1286       gimple *stmt = gsi_stmt (gsi);
   1287       if (gimple_code (stmt) == GIMPLE_DEBUG)
   1288 	{
   1289 	  if (flag_compare_debug_opt || flag_compare_debug)
   1290 	    /* Let the usual '-fcompare-debug' analysis bail out, as
   1291 	       necessary.  */
   1292 	    ;
   1293 	  else
   1294 	    sorry_at (loc, "%qs not yet supported",
   1295 		      gimple_code_name[gimple_code (stmt)]);
   1296 	}
   1297       gimple *omp_for = top_level_omp_for_in_stmt (stmt);
   1298       bool is_unconditional_oacc_for_loop = false;
   1299       if (omp_for != NULL)
   1300 	is_unconditional_oacc_for_loop
   1301 	  = cf_regions.is_unconditional_oacc_for_loop (idx);
   1302       if (omp_for != NULL
   1303 	  && is_unconditional_oacc_for_loop)
   1304 	{
   1305 	  /* This is an OMP for statement, put it into a separate region.
   1306 	     But first, construct a gang-single region containing any
   1307 	     complex sequential statements we may have seen.  */
   1308 	  if (gang_single_seq != NULL && !only_simple_assignments)
   1309 	    {
   1310 	      gimple *single_region
   1311 		= make_region_seq (loc, gang_single_seq,
   1312 				   num_gangs_clause,
   1313 				   num_workers_clause,
   1314 				   vector_length_clause,
   1315 				   kernels_clauses);
   1316 	      gimple_seq_add_stmt (&region_body, single_region);
   1317 	    }
   1318 	  else if (gang_single_seq != NULL && only_simple_assignments)
   1319 	    {
   1320 	      /* There is a sequence of sequential statements preceding this
   1321 		 loop, but they are all simple assignments.  This is
   1322 		 probably setup code for the loop; in particular, Fortran DO
   1323 		 loops are preceded by code to copy the loop limit variable
   1324 		 to a temporary.  Group this code together with the loop
   1325 		 itself.  */
   1326 	      gimple_seq_add_stmt (&gang_single_seq, stmt);
   1327 	      stmt = gimple_build_bind (NULL, gang_single_seq,
   1328 					make_node (BLOCK));
   1329 	    }
   1330 	  gang_single_seq = NULL;
   1331 	  only_simple_assignments = true;
   1332 
   1333 	  gimple_seq parallel_seq = NULL;
   1334 	  gimple_seq_add_stmt (&parallel_seq, stmt);
   1335 	  gimple *parallel_region
   1336 	    = make_region_loop_nest (omp_for, parallel_seq,
   1337 				     num_gangs_clause,
   1338 				     num_workers_clause,
   1339 				     vector_length_clause,
   1340 				     kernels_clauses);
   1341 	  gimple_seq_add_stmt (&region_body, parallel_region);
   1342 	}
   1343       else
   1344 	{
   1345 	  if (omp_for != NULL)
   1346 	    {
   1347 	      gcc_checking_assert (!is_unconditional_oacc_for_loop);
   1348 	      if (dump_enabled_p ())
   1349 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, omp_for,
   1350 				 "unparallelized loop nest"
   1351 				 " in OpenACC %<kernels%> region:"
   1352 				 " it's executed conditionally\n");
   1353 	    }
   1354 
   1355 	  /* This is not an unconditional OMP for statement, so it will be
   1356 	     put into a gang-single region.  */
   1357 	  gimple_seq_add_stmt (&gang_single_seq, stmt);
   1358 	  /* Is this a simple assignment? We call it simple if it is an
   1359 	     assignment to an artificial local variable.  This captures
   1360 	     Fortran loop setup code computing loop bounds and offsets.  */
   1361 	  bool is_simple_assignment
   1362 	    = (gimple_code (stmt) == GIMPLE_ASSIGN
   1363 	       && TREE_CODE (gimple_assign_lhs (stmt)) == VAR_DECL
   1364 	       && DECL_ARTIFICIAL (gimple_assign_lhs (stmt)));
   1365 	  if (!is_simple_assignment)
   1366 	    only_simple_assignments = false;
   1367 	}
   1368     }
   1369 
   1370   /* If we did not emit a new region, and are not going to emit one now
   1371      (that is, the original region was empty), prepare to emit a dummy so as
   1372      to preserve the original construct, which other processing (at least
   1373      test cases) depend on.  */
   1374   if (region_body == NULL && gang_single_seq == NULL)
   1375     {
   1376       gimple *stmt = gimple_build_nop ();
   1377       gimple_set_location (stmt, loc);
   1378       gimple_seq_add_stmt (&gang_single_seq, stmt);
   1379     }
   1380 
   1381   /* Gather up any remaining gang-single statements.  */
   1382   if (gang_single_seq != NULL)
   1383     {
   1384       gimple *single_region
   1385 	= make_region_seq (loc, gang_single_seq,
   1386 			   num_gangs_clause,
   1387 			   num_workers_clause,
   1388 			   vector_length_clause,
   1389 			   kernels_clauses);
   1390       gimple_seq_add_stmt (&region_body, single_region);
   1391     }
   1392 
   1393   /* We want to launch these kernels asynchronously.  If the original
   1394      kernels region had an async clause, this is done automatically because
   1395      that async clause was copied to the individual regions we created.
   1396      Otherwise, add an async clause to each newly created region, as well as
   1397      a wait directive at the end.  */
   1398   if (async_clause == NULL)
   1399     add_async_clauses_and_wait (loc, &region_body);
   1400   else
   1401     /* !!! If we have asynchronous parallel blocks inside a (synchronous) data
   1402        region, then target memory will get unmapped at the point the data
   1403        region ends, even if the inner asynchronous parallels have not yet
   1404        completed.  For kernels marked "async", we might want to use "enter data
   1405        async(...)" and "exit data async(...)" instead, or asynchronous data
   1406        regions (see also <https://gcc.gnu.org/PR97390>
   1407        "[OpenACC] 'async' clause on 'data' construct",
   1408        which is to share the same implementation).
   1409        For now, insert a (synchronous) wait at the end of the block.  */
   1410     add_wait (loc, &region_body);
   1411 
   1412   tree kernels_locals = gimple_bind_vars (as_a <gbind *> (kernels_body));
   1413   gimple *body = gimple_build_bind (kernels_locals, region_body,
   1414 				    make_node (BLOCK));
   1415 
   1416   /* If we found variables declared in nested scopes, build a data region to
   1417      map them to the device.  */
   1418   body = maybe_build_inner_data_region (loc, body, inner_bind_vars,
   1419 					inner_cleanup);
   1420 
   1421   return body;
   1422 }
   1423 
   1424 /* Decompose one OpenACC 'kernels' construct into an OpenACC 'data' construct
   1425    containing the original OpenACC 'kernels' construct's region cut up into a
   1426    sequence of compute constructs.  */
   1427 
   1428 static gimple *
   1429 omp_oacc_kernels_decompose_1 (gimple *kernels_stmt)
   1430 {
   1431   gcc_checking_assert (gimple_omp_target_kind (kernels_stmt)
   1432 		       == GF_OMP_TARGET_KIND_OACC_KERNELS);
   1433   location_t loc = gimple_location (kernels_stmt);
   1434 
   1435   /* Collect the data clauses of the OpenACC 'kernels' directive and create a
   1436      new OpenACC 'data' construct with those clauses.  */
   1437   tree kernels_clauses = gimple_omp_target_clauses (kernels_stmt);
   1438   tree data_clauses = NULL;
   1439   for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c))
   1440     {
   1441       /* Certain clauses are copied to the enclosing OpenACC 'data'.  Other
   1442 	 clauses remain on the OpenACC 'kernels'.  */
   1443       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
   1444 	{
   1445 	  tree decl = OMP_CLAUSE_DECL (c);
   1446 	  HOST_WIDE_INT map_kind = OMP_CLAUSE_MAP_KIND (c);
   1447 	  switch (map_kind)
   1448 	    {
   1449 	    default:
   1450 	      if (map_kind == GOMP_MAP_ALLOC
   1451 		  && integer_zerop (OMP_CLAUSE_SIZE (c)))
   1452 		/* ??? This is an alloc clause for mapping a pointer whose
   1453 		   target is already mapped.  We leave these on the inner
   1454 		   compute constructs because moving them to the outer data
   1455 		   region causes runtime errors.  */
   1456 		break;
   1457 
   1458 	      /* For non-artificial variables, and for non-declaration
   1459 		 expressions like A[0:n], copy the clause to the data
   1460 		 region.  */
   1461 	      if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl))
   1462 		  || !DECL_P (decl))
   1463 		{
   1464 		  tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c),
   1465 						      OMP_CLAUSE_MAP);
   1466 		  OMP_CLAUSE_SET_MAP_KIND (new_clause, map_kind);
   1467 		  /* This must be unshared here to avoid "incorrect sharing
   1468 		     of tree nodes" errors from verify_gimple.  */
   1469 		  OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl);
   1470 		  OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c);
   1471 		  OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
   1472 		  data_clauses = new_clause;
   1473 
   1474 		  /* Now that this data is mapped, turn the data clause on the
   1475 		     inner OpenACC 'kernels' into a 'present' clause.  */
   1476 		  OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT);
   1477 
   1478 		  /* See <https://gcc.gnu.org/PR100280>,
   1479 		     <https://gcc.gnu.org/PR104086>.  */
   1480 		  if (DECL_P (decl)
   1481 		      && !TREE_ADDRESSABLE (decl))
   1482 		    {
   1483 		      /* Request that OMP lowering make 'decl' addressable.  */
   1484 		      OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
   1485 
   1486 		      if (dump_enabled_p ())
   1487 			{
   1488 			  location_t loc = OMP_CLAUSE_LOCATION (new_clause);
   1489 			  const dump_user_location_t d_u_loc
   1490 			    = dump_user_location_t::from_location_t (loc);
   1491 			  /* PR100695 "Format decoder, quoting in 'dump_printf'
   1492 			     etc." */
   1493 #if __GNUC__ >= 10
   1494 # pragma GCC diagnostic push
   1495 # pragma GCC diagnostic ignored "-Wformat"
   1496 #endif
   1497 			  dump_printf_loc
   1498 			    (MSG_NOTE, d_u_loc,
   1499 			     "OpenACC %<kernels%> decomposition:"
   1500 			     " variable %<%T%> in %qs clause"
   1501 			     " requested to be made addressable\n",
   1502 			     decl,
   1503 			     user_omp_clause_code_name (new_clause, true));
   1504 #if __GNUC__ >= 10
   1505 # pragma GCC diagnostic pop
   1506 #endif
   1507 			}
   1508 		    }
   1509 		}
   1510 	      break;
   1511 
   1512 	    case GOMP_MAP_POINTER:
   1513 	    case GOMP_MAP_TO_PSET:
   1514 	    case GOMP_MAP_FIRSTPRIVATE_POINTER:
   1515 	    case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
   1516 	      /* ??? Copying these map kinds leads to internal compiler
   1517 		 errors in later passes.  */
   1518 	      break;
   1519 	    }
   1520 	}
   1521       else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
   1522 	{
   1523 	  /* If there is an 'if' clause, it must be duplicated to the
   1524 	     enclosing data region.  Temporarily remove the if clause's
   1525 	     chain to avoid copying it.  */
   1526 	  tree saved_chain = OMP_CLAUSE_CHAIN (c);
   1527 	  OMP_CLAUSE_CHAIN (c) = NULL;
   1528 	  tree new_if_clause = unshare_expr (c);
   1529 	  OMP_CLAUSE_CHAIN (c) = saved_chain;
   1530 	  OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses;
   1531 	  data_clauses = new_if_clause;
   1532 	}
   1533     }
   1534   /* Restore the original order of the clauses.  */
   1535   data_clauses = nreverse (data_clauses);
   1536 
   1537   gimple *data_region
   1538     = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
   1539 			       data_clauses);
   1540   gimple_set_location (data_region, loc);
   1541 
   1542   /* Transform the body of the kernels region into a sequence of compute
   1543      constructs.  */
   1544   gimple *body = decompose_kernels_region_body (kernels_stmt,
   1545 						kernels_clauses);
   1546 
   1547   /* Put the transformed pieces together.  The entire body of the region is
   1548      wrapped in a try-finally statement that calls __builtin_GOACC_data_end
   1549      for cleanup.  */
   1550   gimple *try_stmt = make_data_region_try_statement (loc, body);
   1551   gimple_omp_set_body (data_region, try_stmt);
   1552 
   1553   return data_region;
   1554 }
   1555 
   1556 
   1557 /* Decompose OpenACC 'kernels' constructs in the current function.  */
   1558 
   1559 static tree
   1560 omp_oacc_kernels_decompose_callback_stmt (gimple_stmt_iterator *gsi_p,
   1561 					  bool *handled_ops_p,
   1562 					  struct walk_stmt_info *)
   1563 {
   1564   gimple *stmt = gsi_stmt (*gsi_p);
   1565 
   1566   if ((gimple_code (stmt) == GIMPLE_OMP_TARGET)
   1567       && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
   1568     {
   1569       gimple *stmt_new = omp_oacc_kernels_decompose_1 (stmt);
   1570       gsi_replace (gsi_p, stmt_new, false);
   1571       *handled_ops_p = true;
   1572     }
   1573   else
   1574     *handled_ops_p = false;
   1575 
   1576   return NULL;
   1577 }
   1578 
   1579 static unsigned int
   1580 omp_oacc_kernels_decompose (void)
   1581 {
   1582   gimple_seq body = gimple_body (current_function_decl);
   1583 
   1584   struct walk_stmt_info wi;
   1585   memset (&wi, 0, sizeof (wi));
   1586   walk_gimple_seq_mod (&body, omp_oacc_kernels_decompose_callback_stmt, NULL,
   1587 		       &wi);
   1588 
   1589   gimple_set_body (current_function_decl, body);
   1590 
   1591   return 0;
   1592 }
   1593 
   1594 
   1595 namespace {
   1596 
   1597 const pass_data pass_data_omp_oacc_kernels_decompose =
   1598 {
   1599   GIMPLE_PASS, /* type */
   1600   "omp_oacc_kernels_decompose", /* name */
   1601   OPTGROUP_OMP, /* optinfo_flags */
   1602   TV_NONE, /* tv_id */
   1603   PROP_gimple_any, /* properties_required */
   1604   0, /* properties_provided */
   1605   0, /* properties_destroyed */
   1606   0, /* todo_flags_start */
   1607   0, /* todo_flags_finish */
   1608 };
   1609 
   1610 class pass_omp_oacc_kernels_decompose : public gimple_opt_pass
   1611 {
   1612 public:
   1613   pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
   1614     : gimple_opt_pass (pass_data_omp_oacc_kernels_decompose, ctxt)
   1615   {}
   1616 
   1617   /* opt_pass methods: */
   1618   virtual bool gate (function *)
   1619   {
   1620     return (flag_openacc
   1621 	    && param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE);
   1622   }
   1623   virtual unsigned int execute (function *)
   1624   {
   1625     return omp_oacc_kernels_decompose ();
   1626   }
   1627 
   1628 }; // class pass_omp_oacc_kernels_decompose
   1629 
   1630 } // anon namespace
   1631 
   1632 gimple_opt_pass *
   1633 make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
   1634 {
   1635   return new pass_omp_oacc_kernels_decompose (ctxt);
   1636 }
   1637