1 1.1 mrg /* Decompose OpenACC 'kernels' constructs into parts, a sequence of compute 2 1.1 mrg constructs 3 1.1 mrg 4 1.1 mrg Copyright (C) 2020-2022 Free Software Foundation, Inc. 5 1.1 mrg 6 1.1 mrg This file is part of GCC. 7 1.1 mrg 8 1.1 mrg GCC is free software; you can redistribute it and/or modify it under 9 1.1 mrg the terms of the GNU General Public License as published by the Free 10 1.1 mrg Software Foundation; either version 3, or (at your option) any later 11 1.1 mrg version. 12 1.1 mrg 13 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 1.1 mrg for more details. 17 1.1 mrg 18 1.1 mrg You should have received a copy of the GNU General Public License 19 1.1 mrg along with GCC; see the file COPYING3. If not see 20 1.1 mrg <http://www.gnu.org/licenses/>. */ 21 1.1 mrg 22 1.1 mrg #include "config.h" 23 1.1 mrg #include "system.h" 24 1.1 mrg #include "coretypes.h" 25 1.1 mrg #include "backend.h" 26 1.1 mrg #include "target.h" 27 1.1 mrg #include "tree.h" 28 1.1 mrg #include "langhooks.h" 29 1.1 mrg #include "gimple.h" 30 1.1 mrg #include "tree-pass.h" 31 1.1 mrg #include "cgraph.h" 32 1.1 mrg #include "fold-const.h" 33 1.1 mrg #include "gimplify.h" 34 1.1 mrg #include "gimple-iterator.h" 35 1.1 mrg #include "gimple-walk.h" 36 1.1 mrg #include "gomp-constants.h" 37 1.1 mrg #include "omp-general.h" 38 1.1 mrg #include "diagnostic-core.h" 39 1.1 mrg 40 1.1 mrg 41 1.1 mrg /* This preprocessing pass is run immediately before lower_omp. It decomposes 42 1.1 mrg OpenACC 'kernels' constructs into parts, a sequence of compute constructs. 43 1.1 mrg 44 1.1 mrg The translation is as follows: 45 1.1 mrg - The entire 'kernels' region is turned into a 'data' region with clauses 46 1.1 mrg taken from the 'kernels' region. New 'create' clauses are added for all 47 1.1 mrg variables declared at the top level in the kernels region. 48 1.1 mrg - Any loop nests annotated with an OpenACC 'loop' directive are wrapped in 49 1.1 mrg a new compute construct. 50 1.1 mrg - 'loop' directives without an explicit 'independent' or 'seq' clause 51 1.1 mrg get an 'auto' clause added; other clauses are preserved on the loop 52 1.1 mrg or moved to the new surrounding compute construct, as applicable. 53 1.1 mrg - Any sequences of other code (non-loops, non-OpenACC 'loop's) are wrapped 54 1.1 mrg in new "gang-single" compute construct: 'worker'/'vector' parallelism is 55 1.1 mrg preserved, but 'num_gangs (1)' is enforced. 56 1.1 mrg - Both points above only apply at the topmost level in the region, that 57 1.1 mrg is, the transformation does not introduce new compute constructs inside 58 1.1 mrg nested statement bodies. In particular, this means that a 59 1.1 mrg gang-parallelizable loop inside an 'if' statement is made "gang-single". 60 1.1 mrg - In order to make the host wait only once for the whole region instead 61 1.1 mrg of once per device kernel launch, the new compute constructs are 62 1.1 mrg annotated 'async'. Unless the original 'kernels' construct already was 63 1.1 mrg marked 'async', the entire region ends with a 'wait' directive. If the 64 1.1 mrg original 'kernels' construct was marked 'async', the synthesized 'async' 65 1.1 mrg clauses use the original 'kernels' construct's 'async' argument 66 1.1 mrg (possibly implicit). 67 1.1 mrg */ 68 1.1 mrg 69 1.1 mrg 70 1.1 mrg /*TODO Things are conceptually wrong here: 'loop' clauses may be hidden behind 71 1.1 mrg 'device_type', so we have to defer a lot of processing until we're in the 72 1.1 mrg offloading compilation. "Fortunately", GCC doesn't support the OpenACC 73 1.1 mrg 'device_type' clause yet, so we get away that. */ 74 1.1 mrg 75 1.1 mrg 76 1.1 mrg /* Helper function for decompose_kernels_region_body. If STMT contains a 77 1.1 mrg "top-level" OMP_FOR statement, returns a pointer to that statement; 78 1.1 mrg returns NULL otherwise. 79 1.1 mrg 80 1.1 mrg A "top-level" OMP_FOR statement is one that is possibly accompanied by 81 1.1 mrg small snippets of setup code. Specifically, this function accepts an 82 1.1 mrg OMP_FOR possibly wrapped in a singleton bind and a singleton try 83 1.1 mrg statement to allow for a local loop variable, but not an OMP_FOR 84 1.1 mrg statement nested in any other constructs. Alternatively, it accepts a 85 1.1 mrg non-singleton bind containing only assignments and then an OMP_FOR 86 1.1 mrg statement at the very end. The former style can be generated by the C 87 1.1 mrg frontend, the latter by the Fortran frontend. */ 88 1.1 mrg 89 1.1 mrg static gimple * 90 1.1 mrg top_level_omp_for_in_stmt (gimple *stmt) 91 1.1 mrg { 92 1.1 mrg if (gimple_code (stmt) == GIMPLE_OMP_FOR) 93 1.1 mrg return stmt; 94 1.1 mrg 95 1.1 mrg if (gimple_code (stmt) == GIMPLE_BIND) 96 1.1 mrg { 97 1.1 mrg gimple_seq body = gimple_bind_body (as_a <gbind *> (stmt)); 98 1.1 mrg if (gimple_seq_singleton_p (body)) 99 1.1 mrg { 100 1.1 mrg /* Accept an OMP_FOR statement, or a try statement containing only 101 1.1 mrg a single OMP_FOR. */ 102 1.1 mrg gimple *maybe_for_or_try = gimple_seq_first_stmt (body); 103 1.1 mrg if (gimple_code (maybe_for_or_try) == GIMPLE_OMP_FOR) 104 1.1 mrg return maybe_for_or_try; 105 1.1 mrg else if (gimple_code (maybe_for_or_try) == GIMPLE_TRY) 106 1.1 mrg { 107 1.1 mrg gimple_seq try_body = gimple_try_eval (maybe_for_or_try); 108 1.1 mrg if (!gimple_seq_singleton_p (try_body)) 109 1.1 mrg return NULL; 110 1.1 mrg gimple *maybe_omp_for_stmt = gimple_seq_first_stmt (try_body); 111 1.1 mrg if (gimple_code (maybe_omp_for_stmt) == GIMPLE_OMP_FOR) 112 1.1 mrg return maybe_omp_for_stmt; 113 1.1 mrg } 114 1.1 mrg } 115 1.1 mrg else 116 1.1 mrg { 117 1.1 mrg gimple_stmt_iterator gsi; 118 1.1 mrg /* Accept only a block of optional assignments followed by an 119 1.1 mrg OMP_FOR at the end. No other kinds of statements allowed. */ 120 1.1 mrg for (gsi = gsi_start (body); !gsi_end_p (gsi); gsi_next (&gsi)) 121 1.1 mrg { 122 1.1 mrg gimple *body_stmt = gsi_stmt (gsi); 123 1.1 mrg if (gimple_code (body_stmt) == GIMPLE_ASSIGN) 124 1.1 mrg continue; 125 1.1 mrg else if (gimple_code (body_stmt) == GIMPLE_OMP_FOR 126 1.1 mrg && gsi_one_before_end_p (gsi)) 127 1.1 mrg return body_stmt; 128 1.1 mrg else 129 1.1 mrg return NULL; 130 1.1 mrg } 131 1.1 mrg } 132 1.1 mrg } 133 1.1 mrg 134 1.1 mrg return NULL; 135 1.1 mrg } 136 1.1 mrg 137 1.1 mrg /* Helper for adjust_region_code: evaluate the statement at GSI_P. */ 138 1.1 mrg 139 1.1 mrg static tree 140 1.1 mrg adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p, 141 1.1 mrg bool *handled_ops_p, 142 1.1 mrg struct walk_stmt_info *wi) 143 1.1 mrg { 144 1.1 mrg int *region_code = (int *) wi->info; 145 1.1 mrg 146 1.1 mrg gimple *stmt = gsi_stmt (*gsi_p); 147 1.1 mrg switch (gimple_code (stmt)) 148 1.1 mrg { 149 1.1 mrg case GIMPLE_OMP_FOR: 150 1.1 mrg { 151 1.1 mrg tree clauses = gimple_omp_for_clauses (stmt); 152 1.1 mrg if (omp_find_clause (clauses, OMP_CLAUSE_INDEPENDENT)) 153 1.1 mrg { 154 1.1 mrg /* Explicit 'independent' clause. */ 155 1.1 mrg /* Keep going; recurse into loop body. */ 156 1.1 mrg break; 157 1.1 mrg } 158 1.1 mrg else if (omp_find_clause (clauses, OMP_CLAUSE_SEQ)) 159 1.1 mrg { 160 1.1 mrg /* Explicit 'seq' clause. */ 161 1.1 mrg /* We'll "parallelize" if at some level a loop construct has been 162 1.1 mrg marked up by the user as unparallelizable ('seq' clause; we'll 163 1.1 mrg respect that in the later processing). Given that the user has 164 1.1 mrg explicitly marked it up, this loop construct cannot be 165 1.1 mrg performance-critical, and in this case it's also fine to 166 1.1 mrg "parallelize" instead of "gang-single", because any outer or 167 1.1 mrg inner loops may still exploit the available parallelism. */ 168 1.1 mrg /* Keep going; recurse into loop body. */ 169 1.1 mrg break; 170 1.1 mrg } 171 1.1 mrg else 172 1.1 mrg { 173 1.1 mrg /* Explicit or implicit 'auto' clause. */ 174 1.1 mrg /* The user would like this loop analyzed ('auto' clause) and 175 1.1 mrg typically parallelized, but we don't have available yet the 176 1.1 mrg compiler logic to analyze this, so can't parallelize it here, so 177 1.1 mrg we'd very likely be running into a performance problem if we 178 1.1 mrg were to execute this unparallelized, thus forward the whole loop 179 1.1 mrg nest to 'parloops'. */ 180 1.1 mrg *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS; 181 1.1 mrg /* Terminate: final decision for this region. */ 182 1.1 mrg *handled_ops_p = true; 183 1.1 mrg return integer_zero_node; 184 1.1 mrg } 185 1.1 mrg gcc_unreachable (); 186 1.1 mrg } 187 1.1 mrg 188 1.1 mrg case GIMPLE_COND: 189 1.1 mrg case GIMPLE_GOTO: 190 1.1 mrg case GIMPLE_SWITCH: 191 1.1 mrg case GIMPLE_ASM: 192 1.1 mrg case GIMPLE_TRANSACTION: 193 1.1 mrg case GIMPLE_RETURN: 194 1.1 mrg /* Statement that might constitute some looping/control flow pattern. */ 195 1.1 mrg /* The user would like this code analyzed (implicit inside a 'kernels' 196 1.1 mrg region) and typically parallelized, but we don't have available yet 197 1.1 mrg the compiler logic to analyze this, so can't parallelize it here, so 198 1.1 mrg we'd very likely be running into a performance problem if we were to 199 1.1 mrg execute this unparallelized, thus forward the whole thing to 200 1.1 mrg 'parloops'. */ 201 1.1 mrg *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS; 202 1.1 mrg /* Terminate: final decision for this region. */ 203 1.1 mrg *handled_ops_p = true; 204 1.1 mrg return integer_zero_node; 205 1.1 mrg 206 1.1 mrg default: 207 1.1 mrg /* Keep going. */ 208 1.1 mrg break; 209 1.1 mrg } 210 1.1 mrg 211 1.1 mrg return NULL; 212 1.1 mrg } 213 1.1 mrg 214 1.1 mrg /* Adjust the REGION_CODE for the region in GS. */ 215 1.1 mrg 216 1.1 mrg static void 217 1.1 mrg adjust_region_code (gimple_seq gs, int *region_code) 218 1.1 mrg { 219 1.1 mrg struct walk_stmt_info wi; 220 1.1 mrg memset (&wi, 0, sizeof (wi)); 221 1.1 mrg wi.info = region_code; 222 1.1 mrg walk_gimple_seq (gs, adjust_region_code_walk_stmt_fn, NULL, &wi); 223 1.1 mrg } 224 1.1 mrg 225 1.1 mrg /* Helper function for make_loops_gang_single for walking the tree. If the 226 1.1 mrg statement indicated by GSI_P is an OpenACC for loop with a gang clause, 227 1.1 mrg issue a warning and remove the clause. */ 228 1.1 mrg 229 1.1 mrg static tree 230 1.1 mrg visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p, 231 1.1 mrg bool *handled_ops_p, 232 1.1 mrg struct walk_stmt_info *) 233 1.1 mrg { 234 1.1 mrg *handled_ops_p = false; 235 1.1 mrg 236 1.1 mrg gimple *stmt = gsi_stmt (*gsi_p); 237 1.1 mrg switch (gimple_code (stmt)) 238 1.1 mrg { 239 1.1 mrg case GIMPLE_OMP_FOR: 240 1.1 mrg /*TODO Given the current 'adjust_region_code' algorithm, this is 241 1.1 mrg actually... */ 242 1.1 mrg #if 0 243 1.1 mrg gcc_unreachable (); 244 1.1 mrg #else 245 1.1 mrg /* ..., but due to bugs (PR100400), we may actually come here. 246 1.1 mrg Reliably catch this, regardless of checking level. */ 247 1.1 mrg internal_error ("PR100400"); 248 1.1 mrg #endif 249 1.1 mrg 250 1.1 mrg { 251 1.1 mrg tree clauses = gimple_omp_for_clauses (stmt); 252 1.1 mrg tree prev_clause = NULL; 253 1.1 mrg for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause)) 254 1.1 mrg { 255 1.1 mrg if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG) 256 1.1 mrg { 257 1.1 mrg /* It makes no sense to have a 'gang' clause in a "gang-single" 258 1.1 mrg region, so warn and remove it. */ 259 1.1 mrg warning_at (gimple_location (stmt), 0, 260 1.1 mrg "conditionally executed loop in %<kernels%> region" 261 1.1 mrg " will be executed by a single gang;" 262 1.1 mrg " ignoring %<gang%> clause"); 263 1.1 mrg if (prev_clause != NULL) 264 1.1 mrg OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause); 265 1.1 mrg else 266 1.1 mrg clauses = OMP_CLAUSE_CHAIN (clause); 267 1.1 mrg 268 1.1 mrg break; 269 1.1 mrg } 270 1.1 mrg prev_clause = clause; 271 1.1 mrg } 272 1.1 mrg gimple_omp_for_set_clauses (stmt, clauses); 273 1.1 mrg } 274 1.1 mrg /* No need to recurse into nested statements; no loop nested inside 275 1.1 mrg this loop can be gang-partitioned. */ 276 1.1 mrg sorry ("%<gang%> loop in %<gang-single%> region"); 277 1.1 mrg *handled_ops_p = true; 278 1.1 mrg break; 279 1.1 mrg 280 1.1 mrg default: 281 1.1 mrg break; 282 1.1 mrg } 283 1.1 mrg 284 1.1 mrg return NULL; 285 1.1 mrg } 286 1.1 mrg 287 1.1 mrg /* Visit all nested OpenACC loops in the sequence indicated by GS. This 288 1.1 mrg statement is expected to be inside a gang-single region. Issue a warning 289 1.1 mrg for any loops inside it that have gang clauses and remove the clauses. */ 290 1.1 mrg 291 1.1 mrg static void 292 1.1 mrg make_loops_gang_single (gimple_seq gs) 293 1.1 mrg { 294 1.1 mrg struct walk_stmt_info wi; 295 1.1 mrg memset (&wi, 0, sizeof (wi)); 296 1.1 mrg walk_gimple_seq (gs, visit_loops_in_gang_single_region, NULL, &wi); 297 1.1 mrg } 298 1.1 mrg 299 1.1 mrg /* Construct a "gang-single" compute construct at LOC containing the STMTS. 300 1.1 mrg Annotate with CLAUSES, which must not contain a 'num_gangs' clause, and an 301 1.1 mrg additional 'num_gangs (1)' clause to force "gang-single" execution. */ 302 1.1 mrg 303 1.1 mrg static gimple * 304 1.1 mrg make_region_seq (location_t loc, gimple_seq stmts, 305 1.1 mrg tree num_gangs_clause, 306 1.1 mrg tree num_workers_clause, 307 1.1 mrg tree vector_length_clause, 308 1.1 mrg tree clauses) 309 1.1 mrg { 310 1.1 mrg /* This correctly unshares the entire clause chain rooted here. */ 311 1.1 mrg clauses = unshare_expr (clauses); 312 1.1 mrg 313 1.1 mrg dump_user_location_t loc_stmts_first = gimple_seq_first (stmts); 314 1.1 mrg 315 1.1 mrg /* Figure out the region code for this region. */ 316 1.1 mrg /* Optimistic default: assume "setup code", no looping; thus not 317 1.1 mrg performance-critical. */ 318 1.1 mrg int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE; 319 1.1 mrg adjust_region_code (stmts, ®ion_code); 320 1.1 mrg 321 1.1 mrg if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE) 322 1.1 mrg { 323 1.1 mrg if (dump_enabled_p ()) 324 1.1 mrg /*TODO MSG_MISSED_OPTIMIZATION? */ 325 1.1 mrg dump_printf_loc (MSG_NOTE, loc_stmts_first, 326 1.1 mrg "beginning %<gang-single%> part" 327 1.1 mrg " in OpenACC %<kernels%> region\n"); 328 1.1 mrg 329 1.1 mrg /* Synthesize a 'num_gangs (1)' clause. */ 330 1.1 mrg tree gang_single_clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS); 331 1.1 mrg OMP_CLAUSE_OPERAND (gang_single_clause, 0) = integer_one_node; 332 1.1 mrg OMP_CLAUSE_CHAIN (gang_single_clause) = clauses; 333 1.1 mrg clauses = gang_single_clause; 334 1.1 mrg 335 1.1 mrg /* Remove and issue warnings about gang clauses on any OpenACC 336 1.1 mrg loops nested inside this sequentially executed statement. */ 337 1.1 mrg make_loops_gang_single (stmts); 338 1.1 mrg } 339 1.1 mrg else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS) 340 1.1 mrg { 341 1.1 mrg if (dump_enabled_p ()) 342 1.1 mrg dump_printf_loc (MSG_NOTE, loc_stmts_first, 343 1.1 mrg "beginning %<parloops%> part" 344 1.1 mrg " in OpenACC %<kernels%> region\n"); 345 1.1 mrg 346 1.1 mrg /* As we're transforming a 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another 347 1.1 mrg 'GF_OMP_TARGET_KIND_OACC_KERNELS', this isn't doing any of the clauses 348 1.1 mrg mangling that 'make_region_loop_nest' is doing. */ 349 1.1 mrg /* Re-assemble the clauses stripped off earlier. */ 350 1.1 mrg if (num_gangs_clause != NULL) 351 1.1 mrg { 352 1.1 mrg tree c = unshare_expr (num_gangs_clause); 353 1.1 mrg OMP_CLAUSE_CHAIN (c) = clauses; 354 1.1 mrg clauses = c; 355 1.1 mrg } 356 1.1 mrg if (num_workers_clause != NULL) 357 1.1 mrg { 358 1.1 mrg tree c = unshare_expr (num_workers_clause); 359 1.1 mrg OMP_CLAUSE_CHAIN (c) = clauses; 360 1.1 mrg clauses = c; 361 1.1 mrg } 362 1.1 mrg if (vector_length_clause != NULL) 363 1.1 mrg { 364 1.1 mrg tree c = unshare_expr (vector_length_clause); 365 1.1 mrg OMP_CLAUSE_CHAIN (c) = clauses; 366 1.1 mrg clauses = c; 367 1.1 mrg } 368 1.1 mrg } 369 1.1 mrg else 370 1.1 mrg gcc_unreachable (); 371 1.1 mrg 372 1.1 mrg /* Build the gang-single region. */ 373 1.1 mrg gimple *single_region = gimple_build_omp_target (NULL, region_code, clauses); 374 1.1 mrg gimple_set_location (single_region, loc); 375 1.1 mrg gbind *single_body = gimple_build_bind (NULL, stmts, make_node (BLOCK)); 376 1.1 mrg gimple_omp_set_body (single_region, single_body); 377 1.1 mrg 378 1.1 mrg return single_region; 379 1.1 mrg } 380 1.1 mrg 381 1.1 mrg /* Helper function for make_region_loop_nest. Adds a 'num_gangs' 382 1.1 mrg ('num_workers', 'vector_length') clause to the given CLAUSES, either the one 383 1.1 mrg from the parent compute construct (PARENT_CLAUSE) or a new one based on the 384 1.1 mrg loop's own LOOP_CLAUSE ('gang (num: N)' or similar for 'worker' or 'vector' 385 1.1 mrg clauses) with the given CLAUSE_CODE. Does nothing if neither PARENT_CLAUSE 386 1.1 mrg nor LOOP_CLAUSE exist. Returns the new clauses. */ 387 1.1 mrg 388 1.1 mrg static tree 389 1.1 mrg add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause, 390 1.1 mrg omp_clause_code clause_code, tree clauses) 391 1.1 mrg { 392 1.1 mrg if (parent_clause != NULL) 393 1.1 mrg { 394 1.1 mrg tree num_clause = unshare_expr (parent_clause); 395 1.1 mrg OMP_CLAUSE_CHAIN (num_clause) = clauses; 396 1.1 mrg clauses = num_clause; 397 1.1 mrg } 398 1.1 mrg else if (loop_clause != NULL) 399 1.1 mrg { 400 1.1 mrg /* The kernels region does not have a 'num_gangs' clause, but the loop 401 1.1 mrg itself had a 'gang (num: N)' clause. Honor it by adding a 402 1.1 mrg 'num_gangs (N)' clause on the compute construct. */ 403 1.1 mrg tree num = OMP_CLAUSE_OPERAND (loop_clause, 0); 404 1.1 mrg tree new_num_clause 405 1.1 mrg = build_omp_clause (OMP_CLAUSE_LOCATION (loop_clause), clause_code); 406 1.1 mrg OMP_CLAUSE_OPERAND (new_num_clause, 0) = num; 407 1.1 mrg OMP_CLAUSE_CHAIN (new_num_clause) = clauses; 408 1.1 mrg clauses = new_num_clause; 409 1.1 mrg } 410 1.1 mrg return clauses; 411 1.1 mrg } 412 1.1 mrg 413 1.1 mrg /* Helper for make_region_loop_nest, looking for 'worker (num: N)' or 'vector 414 1.1 mrg (length: N)' clauses in nested loops. Removes the argument, transferring it 415 1.1 mrg to the enclosing compute construct (via WI->INFO). If arguments within the 416 1.1 mrg same loop nest conflict, emits a warning. 417 1.1 mrg 418 1.1 mrg This function also decides whether to add an 'auto' clause on each of these 419 1.1 mrg nested loops. */ 420 1.1 mrg 421 1.1 mrg struct adjust_nested_loop_clauses_wi_info 422 1.1 mrg { 423 1.1 mrg tree *loop_gang_clause_ptr; 424 1.1 mrg tree *loop_worker_clause_ptr; 425 1.1 mrg tree *loop_vector_clause_ptr; 426 1.1 mrg }; 427 1.1 mrg 428 1.1 mrg static tree 429 1.1 mrg adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *, 430 1.1 mrg struct walk_stmt_info *wi) 431 1.1 mrg { 432 1.1 mrg struct adjust_nested_loop_clauses_wi_info *wi_info 433 1.1 mrg = (struct adjust_nested_loop_clauses_wi_info *) wi->info; 434 1.1 mrg gimple *stmt = gsi_stmt (*gsi_p); 435 1.1 mrg 436 1.1 mrg if (gimple_code (stmt) == GIMPLE_OMP_FOR) 437 1.1 mrg { 438 1.1 mrg bool add_auto_clause = true; 439 1.1 mrg tree loop_clauses = gimple_omp_for_clauses (stmt); 440 1.1 mrg tree loop_clause = loop_clauses; 441 1.1 mrg for (; loop_clause; loop_clause = OMP_CLAUSE_CHAIN (loop_clause)) 442 1.1 mrg { 443 1.1 mrg tree *outer_clause_ptr = NULL; 444 1.1 mrg switch (OMP_CLAUSE_CODE (loop_clause)) 445 1.1 mrg { 446 1.1 mrg case OMP_CLAUSE_GANG: 447 1.1 mrg outer_clause_ptr = wi_info->loop_gang_clause_ptr; 448 1.1 mrg break; 449 1.1 mrg case OMP_CLAUSE_WORKER: 450 1.1 mrg outer_clause_ptr = wi_info->loop_worker_clause_ptr; 451 1.1 mrg break; 452 1.1 mrg case OMP_CLAUSE_VECTOR: 453 1.1 mrg outer_clause_ptr = wi_info->loop_vector_clause_ptr; 454 1.1 mrg break; 455 1.1 mrg case OMP_CLAUSE_SEQ: 456 1.1 mrg case OMP_CLAUSE_INDEPENDENT: 457 1.1 mrg case OMP_CLAUSE_AUTO: 458 1.1 mrg add_auto_clause = false; 459 1.1 mrg default: 460 1.1 mrg break; 461 1.1 mrg } 462 1.1 mrg if (outer_clause_ptr != NULL) 463 1.1 mrg { 464 1.1 mrg if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL 465 1.1 mrg && *outer_clause_ptr == NULL) 466 1.1 mrg { 467 1.1 mrg /* Transfer the clause to the enclosing compute construct and 468 1.1 mrg remove the numerical argument from the 'loop'. */ 469 1.1 mrg *outer_clause_ptr = unshare_expr (loop_clause); 470 1.1 mrg OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL; 471 1.1 mrg } 472 1.1 mrg else if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL && 473 1.1 mrg OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0) != NULL) 474 1.1 mrg { 475 1.1 mrg /* See if both of these are the same constant. If they 476 1.1 mrg aren't, emit a warning. */ 477 1.1 mrg tree old_op = OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0); 478 1.1 mrg tree new_op = OMP_CLAUSE_OPERAND (loop_clause, 0); 479 1.1 mrg if (!(cst_and_fits_in_hwi (old_op) && 480 1.1 mrg cst_and_fits_in_hwi (new_op) && 481 1.1 mrg int_cst_value (old_op) == int_cst_value (new_op))) 482 1.1 mrg { 483 1.1 mrg const char *clause_name 484 1.1 mrg = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)]; 485 1.1 mrg error_at (gimple_location (stmt), 486 1.1 mrg "cannot honor conflicting %qs clause", 487 1.1 mrg clause_name); 488 1.1 mrg inform (OMP_CLAUSE_LOCATION (*outer_clause_ptr), 489 1.1 mrg "location of the previous clause" 490 1.1 mrg " in the same loop nest"); 491 1.1 mrg } 492 1.1 mrg OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL; 493 1.1 mrg } 494 1.1 mrg } 495 1.1 mrg } 496 1.1 mrg if (add_auto_clause) 497 1.1 mrg { 498 1.1 mrg tree auto_clause 499 1.1 mrg = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_AUTO); 500 1.1 mrg OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses; 501 1.1 mrg gimple_omp_for_set_clauses (stmt, auto_clause); 502 1.1 mrg } 503 1.1 mrg } 504 1.1 mrg 505 1.1 mrg return NULL; 506 1.1 mrg } 507 1.1 mrg 508 1.1 mrg /* Helper for make_region_loop_nest. Transform OpenACC 'kernels'/'loop' 509 1.1 mrg construct clauses into OpenACC 'parallel'/'loop' construct ones. */ 510 1.1 mrg 511 1.1 mrg static tree 512 1.1 mrg transform_kernels_loop_clauses (gimple *omp_for, 513 1.1 mrg tree num_gangs_clause, 514 1.1 mrg tree num_workers_clause, 515 1.1 mrg tree vector_length_clause, 516 1.1 mrg tree clauses) 517 1.1 mrg { 518 1.1 mrg /* If this loop in a kernels region does not have an explicit 'seq', 519 1.1 mrg 'independent', or 'auto' clause, we must give it an explicit 'auto' 520 1.1 mrg clause. 521 1.1 mrg We also check for 'gang (num: N)' clauses. These must not appear in 522 1.1 mrg kernels regions that have their own 'num_gangs' clause. Otherwise, they 523 1.1 mrg must be converted and put on the region; similarly for 'worker' and 524 1.1 mrg 'vector' clauses. */ 525 1.1 mrg bool add_auto_clause = true; 526 1.1 mrg tree loop_gang_clause = NULL, loop_worker_clause = NULL, 527 1.1 mrg loop_vector_clause = NULL; 528 1.1 mrg tree loop_clauses = gimple_omp_for_clauses (omp_for); 529 1.1 mrg for (tree loop_clause = loop_clauses; 530 1.1 mrg loop_clause; 531 1.1 mrg loop_clause = OMP_CLAUSE_CHAIN (loop_clause)) 532 1.1 mrg { 533 1.1 mrg bool found_num_clause = false; 534 1.1 mrg tree *clause_ptr, clause_to_check; 535 1.1 mrg switch (OMP_CLAUSE_CODE (loop_clause)) 536 1.1 mrg { 537 1.1 mrg case OMP_CLAUSE_GANG: 538 1.1 mrg found_num_clause = true; 539 1.1 mrg clause_ptr = &loop_gang_clause; 540 1.1 mrg clause_to_check = num_gangs_clause; 541 1.1 mrg break; 542 1.1 mrg case OMP_CLAUSE_WORKER: 543 1.1 mrg found_num_clause = true; 544 1.1 mrg clause_ptr = &loop_worker_clause; 545 1.1 mrg clause_to_check = num_workers_clause; 546 1.1 mrg break; 547 1.1 mrg case OMP_CLAUSE_VECTOR: 548 1.1 mrg found_num_clause = true; 549 1.1 mrg clause_ptr = &loop_vector_clause; 550 1.1 mrg clause_to_check = vector_length_clause; 551 1.1 mrg break; 552 1.1 mrg case OMP_CLAUSE_INDEPENDENT: 553 1.1 mrg case OMP_CLAUSE_SEQ: 554 1.1 mrg case OMP_CLAUSE_AUTO: 555 1.1 mrg add_auto_clause = false; 556 1.1 mrg default: 557 1.1 mrg break; 558 1.1 mrg } 559 1.1 mrg if (found_num_clause && OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL) 560 1.1 mrg { 561 1.1 mrg if (clause_to_check) 562 1.1 mrg { 563 1.1 mrg const char *clause_name 564 1.1 mrg = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)]; 565 1.1 mrg const char *parent_clause_name 566 1.1 mrg = omp_clause_code_name[OMP_CLAUSE_CODE (clause_to_check)]; 567 1.1 mrg error_at (OMP_CLAUSE_LOCATION (loop_clause), 568 1.1 mrg "argument not permitted on %qs clause" 569 1.1 mrg " in OpenACC %<kernels%> region with a %qs clause", 570 1.1 mrg clause_name, parent_clause_name); 571 1.1 mrg inform (OMP_CLAUSE_LOCATION (clause_to_check), 572 1.1 mrg "location of OpenACC %<kernels%>"); 573 1.1 mrg } 574 1.1 mrg /* Copy the 'gang (N)'/'worker (N)'/'vector (N)' clause to the 575 1.1 mrg enclosing compute construct. */ 576 1.1 mrg *clause_ptr = unshare_expr (loop_clause); 577 1.1 mrg OMP_CLAUSE_CHAIN (*clause_ptr) = NULL; 578 1.1 mrg /* Leave a 'gang'/'worker'/'vector' clause on the 'loop', but without 579 1.1 mrg argument. */ 580 1.1 mrg OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL; 581 1.1 mrg } 582 1.1 mrg } 583 1.1 mrg if (add_auto_clause) 584 1.1 mrg { 585 1.1 mrg tree auto_clause = build_omp_clause (gimple_location (omp_for), 586 1.1 mrg OMP_CLAUSE_AUTO); 587 1.1 mrg OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses; 588 1.1 mrg loop_clauses = auto_clause; 589 1.1 mrg } 590 1.1 mrg gimple_omp_for_set_clauses (omp_for, loop_clauses); 591 1.1 mrg /* We must also recurse into the loop; it might contain nested loops having 592 1.1 mrg their own 'worker (num: W)' or 'vector (length: V)' clauses. Turn these 593 1.1 mrg into 'worker'/'vector' clauses on the compute construct. */ 594 1.1 mrg struct walk_stmt_info wi; 595 1.1 mrg memset (&wi, 0, sizeof (wi)); 596 1.1 mrg struct adjust_nested_loop_clauses_wi_info wi_info; 597 1.1 mrg wi_info.loop_gang_clause_ptr = &loop_gang_clause; 598 1.1 mrg wi_info.loop_worker_clause_ptr = &loop_worker_clause; 599 1.1 mrg wi_info.loop_vector_clause_ptr = &loop_vector_clause; 600 1.1 mrg wi.info = &wi_info; 601 1.1 mrg gimple *body = gimple_omp_body (omp_for); 602 1.1 mrg walk_gimple_seq (body, adjust_nested_loop_clauses, NULL, &wi); 603 1.1 mrg /* Check if there were conflicting numbers of workers or vector length. */ 604 1.1 mrg if (loop_gang_clause != NULL && 605 1.1 mrg OMP_CLAUSE_OPERAND (loop_gang_clause, 0) == NULL) 606 1.1 mrg loop_gang_clause = NULL; 607 1.1 mrg if (loop_worker_clause != NULL && 608 1.1 mrg OMP_CLAUSE_OPERAND (loop_worker_clause, 0) == NULL) 609 1.1 mrg loop_worker_clause = NULL; 610 1.1 mrg if (loop_vector_clause != NULL && 611 1.1 mrg OMP_CLAUSE_OPERAND (loop_vector_clause, 0) == NULL) 612 1.1 mrg vector_length_clause = NULL; 613 1.1 mrg 614 1.1 mrg /* If the kernels region had 'num_gangs', 'num_worker', 'vector_length' 615 1.1 mrg clauses, add these to this new compute construct. */ 616 1.1 mrg clauses 617 1.1 mrg = add_parent_or_loop_num_clause (num_gangs_clause, loop_gang_clause, 618 1.1 mrg OMP_CLAUSE_NUM_GANGS, clauses); 619 1.1 mrg clauses 620 1.1 mrg = add_parent_or_loop_num_clause (num_workers_clause, loop_worker_clause, 621 1.1 mrg OMP_CLAUSE_NUM_WORKERS, clauses); 622 1.1 mrg clauses 623 1.1 mrg = add_parent_or_loop_num_clause (vector_length_clause, loop_vector_clause, 624 1.1 mrg OMP_CLAUSE_VECTOR_LENGTH, clauses); 625 1.1 mrg 626 1.1 mrg return clauses; 627 1.1 mrg } 628 1.1 mrg 629 1.1 mrg /* Construct a possibly gang-parallel compute construct containing the STMT, 630 1.1 mrg which must be identical to, or a bind containing, the loop OMP_FOR. 631 1.1 mrg 632 1.1 mrg The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are 633 1.1 mrg optional clauses from the original kernels region and must not be contained 634 1.1 mrg in the other CLAUSES. The newly created compute construct is annotated with 635 1.1 mrg the optional NUM_GANGS_CLAUSE as well as the other CLAUSES. If there is no 636 1.1 mrg NUM_GANGS_CLAUSE but the loop has a 'gang (num: N)' clause, that is 637 1.1 mrg converted to a 'num_gangs (N)' clause on the new compute construct, and 638 1.1 mrg similarly for 'worker' and 'vector' clauses. 639 1.1 mrg 640 1.1 mrg The outermost loop gets an 'auto' clause unless there already is an 641 1.1 mrg 'seq'/'independent'/'auto' clause. Nested loops inside OMP_FOR are treated 642 1.1 mrg similarly by the adjust_nested_loop_clauses function. */ 643 1.1 mrg 644 1.1 mrg static gimple * 645 1.1 mrg make_region_loop_nest (gimple *omp_for, gimple_seq stmts, 646 1.1 mrg tree num_gangs_clause, 647 1.1 mrg tree num_workers_clause, 648 1.1 mrg tree vector_length_clause, 649 1.1 mrg tree clauses) 650 1.1 mrg { 651 1.1 mrg /* This correctly unshares the entire clause chain rooted here. */ 652 1.1 mrg clauses = unshare_expr (clauses); 653 1.1 mrg 654 1.1 mrg /* Figure out the region code for this region. */ 655 1.1 mrg /* Optimistic default: assume that the loop nest is parallelizable 656 1.1 mrg (essentially, no GIMPLE_OMP_FOR with (explicit or implicit) 'auto' clause, 657 1.1 mrg and no un-annotated loops). */ 658 1.1 mrg int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED; 659 1.1 mrg adjust_region_code (stmts, ®ion_code); 660 1.1 mrg 661 1.1 mrg if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED) 662 1.1 mrg { 663 1.1 mrg if (dump_enabled_p ()) 664 1.1 mrg /* This is not MSG_OPTIMIZED_LOCATIONS, as we're just doing what the 665 1.1 mrg user asked us to. */ 666 1.1 mrg dump_printf_loc (MSG_NOTE, omp_for, 667 1.1 mrg "parallelized loop nest" 668 1.1 mrg " in OpenACC %<kernels%> region\n"); 669 1.1 mrg 670 1.1 mrg clauses = transform_kernels_loop_clauses (omp_for, 671 1.1 mrg num_gangs_clause, 672 1.1 mrg num_workers_clause, 673 1.1 mrg vector_length_clause, 674 1.1 mrg clauses); 675 1.1 mrg } 676 1.1 mrg else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS) 677 1.1 mrg { 678 1.1 mrg if (dump_enabled_p ()) 679 1.1 mrg dump_printf_loc (MSG_NOTE, omp_for, 680 1.1 mrg "forwarded loop nest" 681 1.1 mrg " in OpenACC %<kernels%> region" 682 1.1 mrg " to %<parloops%> for analysis\n"); 683 1.1 mrg 684 1.1 mrg /* We're transforming one 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another 685 1.1 mrg 'GF_OMP_TARGET_KIND_OACC_KERNELS', so don't have to 686 1.1 mrg 'transform_kernels_loop_clauses'. */ 687 1.1 mrg /* Re-assemble the clauses stripped off earlier. */ 688 1.1 mrg clauses 689 1.1 mrg = add_parent_or_loop_num_clause (num_gangs_clause, NULL, 690 1.1 mrg OMP_CLAUSE_NUM_GANGS, clauses); 691 1.1 mrg clauses 692 1.1 mrg = add_parent_or_loop_num_clause (num_workers_clause, NULL, 693 1.1 mrg OMP_CLAUSE_NUM_WORKERS, clauses); 694 1.1 mrg clauses 695 1.1 mrg = add_parent_or_loop_num_clause (vector_length_clause, NULL, 696 1.1 mrg OMP_CLAUSE_VECTOR_LENGTH, clauses); 697 1.1 mrg } 698 1.1 mrg else 699 1.1 mrg gcc_unreachable (); 700 1.1 mrg 701 1.1 mrg gimple *parallel_body_bind 702 1.1 mrg = gimple_build_bind (NULL, stmts, make_node (BLOCK)); 703 1.1 mrg gimple *parallel_region 704 1.1 mrg = gimple_build_omp_target (parallel_body_bind, region_code, clauses); 705 1.1 mrg gimple_set_location (parallel_region, gimple_location (omp_for)); 706 1.1 mrg 707 1.1 mrg return parallel_region; 708 1.1 mrg } 709 1.1 mrg 710 1.1 mrg /* Eliminate any binds directly inside BIND by adding their statements to 711 1.1 mrg BIND (i.e., modifying it in place), excluding binds that hold only an 712 1.1 mrg OMP_FOR loop and associated setup/cleanup code. Recurse into binds but 713 1.1 mrg not other statements. Return a chain of the local variables of eliminated 714 1.1 mrg binds, i.e., the local variables found in nested binds. If 715 1.1 mrg INCLUDE_TOPLEVEL_VARS is true, this also includes the variables belonging 716 1.1 mrg to BIND itself. */ 717 1.1 mrg 718 1.1 mrg static tree 719 1.1 mrg flatten_binds (gbind *bind, bool include_toplevel_vars = false) 720 1.1 mrg { 721 1.1 mrg tree vars = NULL, last_var = NULL; 722 1.1 mrg 723 1.1 mrg if (include_toplevel_vars) 724 1.1 mrg { 725 1.1 mrg vars = gimple_bind_vars (bind); 726 1.1 mrg last_var = vars; 727 1.1 mrg } 728 1.1 mrg 729 1.1 mrg gimple_seq new_body = NULL; 730 1.1 mrg gimple_seq body_sequence = gimple_bind_body (bind); 731 1.1 mrg gimple_stmt_iterator gsi, gsi_n; 732 1.1 mrg for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n) 733 1.1 mrg { 734 1.1 mrg /* Advance the iterator here because otherwise it would be invalidated 735 1.1 mrg by moving statements below. */ 736 1.1 mrg gsi_n = gsi; 737 1.1 mrg gsi_next (&gsi_n); 738 1.1 mrg 739 1.1 mrg gimple *stmt = gsi_stmt (gsi); 740 1.1 mrg /* Flatten bind statements, except the ones that contain only an 741 1.1 mrg OpenACC for loop. */ 742 1.1 mrg if (gimple_code (stmt) == GIMPLE_BIND 743 1.1 mrg && !top_level_omp_for_in_stmt (stmt)) 744 1.1 mrg { 745 1.1 mrg gbind *inner_bind = as_a <gbind *> (stmt); 746 1.1 mrg /* Flatten recursively, and collect all variables. */ 747 1.1 mrg tree inner_vars = flatten_binds (inner_bind, true); 748 1.1 mrg gimple_seq inner_sequence = gimple_bind_body (inner_bind); 749 1.1 mrg if (flag_checking) 750 1.1 mrg { 751 1.1 mrg for (gimple_stmt_iterator inner_gsi = gsi_start (inner_sequence); 752 1.1 mrg !gsi_end_p (inner_gsi); 753 1.1 mrg gsi_next (&inner_gsi)) 754 1.1 mrg { 755 1.1 mrg gimple *inner_stmt = gsi_stmt (inner_gsi); 756 1.1 mrg gcc_assert (gimple_code (inner_stmt) != GIMPLE_BIND 757 1.1 mrg || top_level_omp_for_in_stmt (inner_stmt)); 758 1.1 mrg } 759 1.1 mrg } 760 1.1 mrg gimple_seq_add_seq (&new_body, inner_sequence); 761 1.1 mrg /* Find the last variable; we will append others to it. */ 762 1.1 mrg while (last_var != NULL && TREE_CHAIN (last_var) != NULL) 763 1.1 mrg last_var = TREE_CHAIN (last_var); 764 1.1 mrg if (last_var != NULL) 765 1.1 mrg { 766 1.1 mrg TREE_CHAIN (last_var) = inner_vars; 767 1.1 mrg last_var = inner_vars; 768 1.1 mrg } 769 1.1 mrg else 770 1.1 mrg { 771 1.1 mrg vars = inner_vars; 772 1.1 mrg last_var = vars; 773 1.1 mrg } 774 1.1 mrg } 775 1.1 mrg else 776 1.1 mrg gimple_seq_add_stmt (&new_body, stmt); 777 1.1 mrg } 778 1.1 mrg 779 1.1 mrg /* Put the possibly transformed body back into the bind. */ 780 1.1 mrg gimple_bind_set_body (bind, new_body); 781 1.1 mrg return vars; 782 1.1 mrg } 783 1.1 mrg 784 1.1 mrg /* Helper function for places where we construct data regions. Wraps the BODY 785 1.1 mrg inside a try-finally construct at LOC that calls __builtin_GOACC_data_end 786 1.1 mrg in its cleanup block. Returns this try statement. */ 787 1.1 mrg 788 1.1 mrg static gimple * 789 1.1 mrg make_data_region_try_statement (location_t loc, gimple *body) 790 1.1 mrg { 791 1.1 mrg tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END); 792 1.1 mrg gimple *call = gimple_build_call (data_end_fn, 0); 793 1.1 mrg gimple_seq cleanup = NULL; 794 1.1 mrg gimple_seq_add_stmt (&cleanup, call); 795 1.1 mrg gimple *try_stmt = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY); 796 1.1 mrg gimple_set_location (body, loc); 797 1.1 mrg return try_stmt; 798 1.1 mrg } 799 1.1 mrg 800 1.1 mrg /* If INNER_BIND_VARS holds variables, build an OpenACC data region with 801 1.1 mrg location LOC containing BODY and having 'create (var)' clauses for each 802 1.1 mrg variable (as a side effect, such variables also get TREE_ADDRESSABLE set). 803 1.1 mrg If INNER_CLEANUP is present, add a try-finally statement with 804 1.1 mrg this cleanup code in the finally block. Return the new data region, or 805 1.1 mrg the original BODY if no data region was needed. */ 806 1.1 mrg 807 1.1 mrg static gimple * 808 1.1 mrg maybe_build_inner_data_region (location_t loc, gimple *body, 809 1.1 mrg tree inner_bind_vars, gimple *inner_cleanup) 810 1.1 mrg { 811 1.1 mrg /* Is this an instantiation of a template? (In this case, we don't care what 812 1.1 mrg the generic decl is - just whether the function decl has one.) */ 813 1.1 mrg bool generic_inst_p 814 1.1 mrg = (lang_hooks.decls.get_generic_function_decl (current_function_decl) 815 1.1 mrg != NULL); 816 1.1 mrg 817 1.1 mrg /* Build data 'create (var)' clauses for these local variables. 818 1.1 mrg Below we will add these to a data region enclosing the entire body 819 1.1 mrg of the decomposed kernels region. */ 820 1.1 mrg tree prev_mapped_var = NULL, next = NULL, artificial_vars = NULL, 821 1.1 mrg inner_data_clauses = NULL; 822 1.1 mrg for (tree v = inner_bind_vars; v; v = next) 823 1.1 mrg { 824 1.1 mrg next = TREE_CHAIN (v); 825 1.1 mrg if (DECL_ARTIFICIAL (v) 826 1.1 mrg || TREE_CODE (v) == CONST_DECL 827 1.1 mrg || generic_inst_p) 828 1.1 mrg { 829 1.1 mrg /* If this is an artificial temporary, it need not be mapped. We 830 1.1 mrg move its declaration into the bind inside the data region. 831 1.1 mrg Also avoid mapping variables if we are inside a template 832 1.1 mrg instantiation; the code does not contain all the copies to 833 1.1 mrg temporaries that would make this legal. */ 834 1.1 mrg TREE_CHAIN (v) = artificial_vars; 835 1.1 mrg artificial_vars = v; 836 1.1 mrg if (prev_mapped_var != NULL) 837 1.1 mrg TREE_CHAIN (prev_mapped_var) = next; 838 1.1 mrg else 839 1.1 mrg inner_bind_vars = next; 840 1.1 mrg } 841 1.1 mrg else 842 1.1 mrg { 843 1.1 mrg /* Otherwise, build the map clause. */ 844 1.1 mrg tree new_clause = build_omp_clause (loc, OMP_CLAUSE_MAP); 845 1.1 mrg OMP_CLAUSE_SET_MAP_KIND (new_clause, GOMP_MAP_ALLOC); 846 1.1 mrg OMP_CLAUSE_DECL (new_clause) = v; 847 1.1 mrg OMP_CLAUSE_SIZE (new_clause) = DECL_SIZE_UNIT (v); 848 1.1 mrg OMP_CLAUSE_CHAIN (new_clause) = inner_data_clauses; 849 1.1 mrg inner_data_clauses = new_clause; 850 1.1 mrg 851 1.1 mrg prev_mapped_var = v; 852 1.1 mrg 853 1.1 mrg /* See <https://gcc.gnu.org/PR100280>. */ 854 1.1 mrg if (!TREE_ADDRESSABLE (v)) 855 1.1 mrg { 856 1.1 mrg /* Request that OMP lowering make 'v' addressable. */ 857 1.1 mrg OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1; 858 1.1 mrg 859 1.1 mrg if (dump_enabled_p ()) 860 1.1 mrg { 861 1.1 mrg const dump_user_location_t d_u_loc 862 1.1 mrg = dump_user_location_t::from_location_t (loc); 863 1.1 mrg /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */ 864 1.1 mrg #if __GNUC__ >= 10 865 1.1 mrg # pragma GCC diagnostic push 866 1.1 mrg # pragma GCC diagnostic ignored "-Wformat" 867 1.1 mrg #endif 868 1.1 mrg dump_printf_loc (MSG_NOTE, d_u_loc, 869 1.1 mrg "OpenACC %<kernels%> decomposition:" 870 1.1 mrg " variable %<%T%> declared in block" 871 1.1 mrg " requested to be made addressable\n", 872 1.1 mrg v); 873 1.1 mrg #if __GNUC__ >= 10 874 1.1 mrg # pragma GCC diagnostic pop 875 1.1 mrg #endif 876 1.1 mrg } 877 1.1 mrg } 878 1.1 mrg } 879 1.1 mrg } 880 1.1 mrg 881 1.1 mrg if (artificial_vars) 882 1.1 mrg body = gimple_build_bind (artificial_vars, body, make_node (BLOCK)); 883 1.1 mrg 884 1.1 mrg /* If we determined above that there are variables that need to be created 885 1.1 mrg on the device, construct a data region for them and wrap the body 886 1.1 mrg inside that. */ 887 1.1 mrg if (inner_data_clauses != NULL) 888 1.1 mrg { 889 1.1 mrg gcc_assert (inner_bind_vars != NULL); 890 1.1 mrg gimple *inner_data_region 891 1.1 mrg = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS, 892 1.1 mrg inner_data_clauses); 893 1.1 mrg gimple_set_location (inner_data_region, loc); 894 1.1 mrg /* Make sure __builtin_GOACC_data_end is called at the end. */ 895 1.1 mrg gimple *try_stmt = make_data_region_try_statement (loc, body); 896 1.1 mrg gimple_omp_set_body (inner_data_region, try_stmt); 897 1.1 mrg gimple *bind_body; 898 1.1 mrg if (inner_cleanup != NULL) 899 1.1 mrg /* Clobber all the inner variables that need to be clobbered. */ 900 1.1 mrg bind_body = gimple_build_try (inner_data_region, inner_cleanup, 901 1.1 mrg GIMPLE_TRY_FINALLY); 902 1.1 mrg else 903 1.1 mrg bind_body = inner_data_region; 904 1.1 mrg body = gimple_build_bind (inner_bind_vars, bind_body, make_node (BLOCK)); 905 1.1 mrg } 906 1.1 mrg 907 1.1 mrg return body; 908 1.1 mrg } 909 1.1 mrg 910 1.1 mrg static void 911 1.1 mrg add_wait (location_t loc, gimple_seq *region_body) 912 1.1 mrg { 913 1.1 mrg /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0). */ 914 1.1 mrg tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT); 915 1.1 mrg tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC); 916 1.1 mrg gimple *wait_call = gimple_build_call (wait_fn, 2, 917 1.1 mrg sync_arg, integer_zero_node); 918 1.1 mrg gimple_set_location (wait_call, loc); 919 1.1 mrg gimple_seq_add_stmt (region_body, wait_call); 920 1.1 mrg } 921 1.1 mrg 922 1.1 mrg /* Helper function of decompose_kernels_region_body. The statements in 923 1.1 mrg REGION_BODY are expected to be decomposed parts; add an 'async' clause to 924 1.1 mrg each. Also add a 'wait' directive at the end of the sequence. */ 925 1.1 mrg 926 1.1 mrg static void 927 1.1 mrg add_async_clauses_and_wait (location_t loc, gimple_seq *region_body) 928 1.1 mrg { 929 1.1 mrg tree default_async_queue 930 1.1 mrg = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL); 931 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start (*region_body); 932 1.1 mrg !gsi_end_p (gsi); 933 1.1 mrg gsi_next (&gsi)) 934 1.1 mrg { 935 1.1 mrg gimple *stmt = gsi_stmt (gsi); 936 1.1 mrg tree target_clauses = gimple_omp_target_clauses (stmt); 937 1.1 mrg tree new_async_clause = build_omp_clause (loc, OMP_CLAUSE_ASYNC); 938 1.1 mrg OMP_CLAUSE_OPERAND (new_async_clause, 0) = default_async_queue; 939 1.1 mrg OMP_CLAUSE_CHAIN (new_async_clause) = target_clauses; 940 1.1 mrg target_clauses = new_async_clause; 941 1.1 mrg gimple_omp_target_set_clauses (as_a <gomp_target *> (stmt), 942 1.1 mrg target_clauses); 943 1.1 mrg } 944 1.1 mrg add_wait (loc, region_body); 945 1.1 mrg } 946 1.1 mrg 947 1.1 mrg /* Auxiliary analysis of the body of a kernels region, to determine for each 948 1.1 mrg OpenACC loop whether it is control-dependent (i.e., not necessarily 949 1.1 mrg executed every time the kernels region is entered) or not. 950 1.1 mrg We say that a loop is control-dependent if there is some cond, switch, or 951 1.1 mrg goto statement that jumps over it, forwards or backwards. For example, 952 1.1 mrg if the loop is controlled by an if statement, then a jump to the true 953 1.1 mrg block, the false block, or from one of those blocks to the control flow 954 1.1 mrg join point will necessarily jump over the loop. 955 1.1 mrg This analysis implements an ad-hoc union-find data structure classifying 956 1.1 mrg statements into "control-flow regions" as follows: Most statements are in 957 1.1 mrg the same region as their predecessor, except that each OpenACC loop is in 958 1.1 mrg a region of its own, and each OpenACC loop's successor starts a new 959 1.1 mrg region. We then unite the regions of any statements linked by jumps, 960 1.1 mrg placing any cond, switch, or goto statement in the same region as its 961 1.1 mrg target label(s). 962 1.1 mrg In the end, control dependence of OpenACC loops can be determined by 963 1.1 mrg comparing their immediate predecessor and successor statements' regions. 964 1.1 mrg A jump crosses the loop if and only if the predecessor and successor are 965 1.1 mrg in the same region. (If there is no predecessor or successor, the loop 966 1.1 mrg is executed unconditionally.) 967 1.1 mrg The methods in this class identify statements by their index in the 968 1.1 mrg kernels region's body. */ 969 1.1 mrg 970 1.1 mrg class control_flow_regions 971 1.1 mrg { 972 1.1 mrg public: 973 1.1 mrg /* Initialize an instance and pre-compute the control-flow region 974 1.1 mrg information for the statement sequence SEQ. */ 975 1.1 mrg control_flow_regions (gimple_seq seq); 976 1.1 mrg 977 1.1 mrg /* Return true if the statement with the given index IDX in the analyzed 978 1.1 mrg statement sequence is an unconditionally executed OpenACC loop. */ 979 1.1 mrg bool is_unconditional_oacc_for_loop (size_t idx); 980 1.1 mrg 981 1.1 mrg private: 982 1.1 mrg /* Find the region representative for the statement identified by index 983 1.1 mrg STMT_IDX. */ 984 1.1 mrg size_t find_rep (size_t stmt_idx); 985 1.1 mrg 986 1.1 mrg /* Union the regions containing the statements represented by 987 1.1 mrg representatives A and B. */ 988 1.1 mrg void union_reps (size_t a, size_t b); 989 1.1 mrg 990 1.1 mrg /* Helper for the constructor. Performs the actual computation of the 991 1.1 mrg control-flow regions in the statement sequence SEQ. */ 992 1.1 mrg void compute_regions (gimple_seq seq); 993 1.1 mrg 994 1.1 mrg /* The mapping from statement indices to region representatives. */ 995 1.1 mrg vec <size_t> representatives; 996 1.1 mrg 997 1.1 mrg /* A cache mapping statement indices to a flag indicating whether the 998 1.1 mrg statement is a top level OpenACC for loop. */ 999 1.1 mrg vec <bool> omp_for_loops; 1000 1.1 mrg }; 1001 1.1 mrg 1002 1.1 mrg control_flow_regions::control_flow_regions (gimple_seq seq) 1003 1.1 mrg { 1004 1.1 mrg representatives.create (1); 1005 1.1 mrg omp_for_loops.create (1); 1006 1.1 mrg compute_regions (seq); 1007 1.1 mrg } 1008 1.1 mrg 1009 1.1 mrg bool 1010 1.1 mrg control_flow_regions::is_unconditional_oacc_for_loop (size_t idx) 1011 1.1 mrg { 1012 1.1 mrg if (idx == 0 || idx == representatives.length () - 1) 1013 1.1 mrg /* The first or last statement in the kernels region. This means that 1014 1.1 mrg there is no room before or after it for a jump or a label. Thus 1015 1.1 mrg there cannot be a jump across it, so it is unconditional. */ 1016 1.1 mrg return true; 1017 1.1 mrg /* Otherwise, the loop is unconditional if the statements before and after 1018 1.1 mrg it are in different control flow regions. Scan forward and backward, 1019 1.1 mrg skipping over neighboring OpenACC for loops, to find these preceding 1020 1.1 mrg statements. */ 1021 1.1 mrg size_t prev_index = idx - 1; 1022 1.1 mrg while (prev_index > 0 && omp_for_loops [prev_index] == true) 1023 1.1 mrg prev_index--; 1024 1.1 mrg /* If all preceding statements are also OpenACC loops, all of these are 1025 1.1 mrg unconditional. */ 1026 1.1 mrg if (prev_index == 0) 1027 1.1 mrg return true; 1028 1.1 mrg size_t succ_index = idx + 1; 1029 1.1 mrg while (succ_index < omp_for_loops.length () 1030 1.1 mrg && omp_for_loops [succ_index] == true) 1031 1.1 mrg succ_index++; 1032 1.1 mrg /* If all following statements are also OpenACC loops, all of these are 1033 1.1 mrg unconditional. */ 1034 1.1 mrg if (succ_index == omp_for_loops.length ()) 1035 1.1 mrg return true; 1036 1.1 mrg return (find_rep (prev_index) != find_rep (succ_index)); 1037 1.1 mrg } 1038 1.1 mrg 1039 1.1 mrg size_t 1040 1.1 mrg control_flow_regions::find_rep (size_t stmt_idx) 1041 1.1 mrg { 1042 1.1 mrg size_t rep = stmt_idx, aux = stmt_idx; 1043 1.1 mrg /* Find the root representative of this statement. */ 1044 1.1 mrg while (representatives[rep] != rep) 1045 1.1 mrg rep = representatives[rep]; 1046 1.1 mrg /* Compress the path from the original statement to the representative. */ 1047 1.1 mrg while (representatives[aux] != rep) 1048 1.1 mrg { 1049 1.1 mrg size_t tmp = representatives[aux]; 1050 1.1 mrg representatives[aux] = rep; 1051 1.1 mrg aux = tmp; 1052 1.1 mrg } 1053 1.1 mrg return rep; 1054 1.1 mrg } 1055 1.1 mrg 1056 1.1 mrg void 1057 1.1 mrg control_flow_regions::union_reps (size_t a, size_t b) 1058 1.1 mrg { 1059 1.1 mrg a = find_rep (a); 1060 1.1 mrg b = find_rep (b); 1061 1.1 mrg representatives[b] = a; 1062 1.1 mrg } 1063 1.1 mrg 1064 1.1 mrg void 1065 1.1 mrg control_flow_regions::compute_regions (gimple_seq seq) 1066 1.1 mrg { 1067 1.1 mrg hash_map <gimple *, size_t> control_flow_reps; 1068 1.1 mrg hash_map <tree, size_t> label_reps; 1069 1.1 mrg size_t current_region = 0, idx = 0; 1070 1.1 mrg 1071 1.1 mrg /* In a first pass, assign an initial region to each statement. Except in 1072 1.1 mrg the case of OpenACC loops, each statement simply gets the same region 1073 1.1 mrg representative as its predecessor. */ 1074 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start (seq); 1075 1.1 mrg !gsi_end_p (gsi); 1076 1.1 mrg gsi_next (&gsi)) 1077 1.1 mrg { 1078 1.1 mrg gimple *stmt = gsi_stmt (gsi); 1079 1.1 mrg gimple *omp_for = top_level_omp_for_in_stmt (stmt); 1080 1.1 mrg omp_for_loops.safe_push (omp_for != NULL); 1081 1.1 mrg if (omp_for != NULL) 1082 1.1 mrg { 1083 1.1 mrg /* Assign a new region to this loop and to its successor. */ 1084 1.1 mrg current_region = idx; 1085 1.1 mrg representatives.safe_push (current_region); 1086 1.1 mrg current_region++; 1087 1.1 mrg } 1088 1.1 mrg else 1089 1.1 mrg { 1090 1.1 mrg representatives.safe_push (current_region); 1091 1.1 mrg /* Remember any jumps and labels for the second pass below. */ 1092 1.1 mrg if (gimple_code (stmt) == GIMPLE_COND 1093 1.1 mrg || gimple_code (stmt) == GIMPLE_SWITCH 1094 1.1 mrg || gimple_code (stmt) == GIMPLE_GOTO) 1095 1.1 mrg control_flow_reps.put (stmt, current_region); 1096 1.1 mrg else if (gimple_code (stmt) == GIMPLE_LABEL) 1097 1.1 mrg label_reps.put (gimple_label_label (as_a <glabel *> (stmt)), 1098 1.1 mrg current_region); 1099 1.1 mrg } 1100 1.1 mrg idx++; 1101 1.1 mrg } 1102 1.1 mrg gcc_assert (representatives.length () == omp_for_loops.length ()); 1103 1.1 mrg 1104 1.1 mrg /* Revisit all the control flow statements and union the region of each 1105 1.1 mrg cond, switch, or goto statement with the target labels' regions. */ 1106 1.1 mrg for (hash_map <gimple *, size_t>::iterator it = control_flow_reps.begin (); 1107 1.1 mrg it != control_flow_reps.end (); 1108 1.1 mrg ++it) 1109 1.1 mrg { 1110 1.1 mrg gimple *stmt = (*it).first; 1111 1.1 mrg size_t stmt_rep = (*it).second; 1112 1.1 mrg switch (gimple_code (stmt)) 1113 1.1 mrg { 1114 1.1 mrg tree label; 1115 1.1 mrg unsigned int n; 1116 1.1 mrg 1117 1.1 mrg case GIMPLE_COND: 1118 1.1 mrg label = gimple_cond_true_label (as_a <gcond *> (stmt)); 1119 1.1 mrg union_reps (stmt_rep, *label_reps.get (label)); 1120 1.1 mrg label = gimple_cond_false_label (as_a <gcond *> (stmt)); 1121 1.1 mrg union_reps (stmt_rep, *label_reps.get (label)); 1122 1.1 mrg break; 1123 1.1 mrg 1124 1.1 mrg case GIMPLE_SWITCH: 1125 1.1 mrg n = gimple_switch_num_labels (as_a <gswitch *> (stmt)); 1126 1.1 mrg for (unsigned int i = 0; i < n; i++) 1127 1.1 mrg { 1128 1.1 mrg tree switch_case 1129 1.1 mrg = gimple_switch_label (as_a <gswitch *> (stmt), i); 1130 1.1 mrg label = CASE_LABEL (switch_case); 1131 1.1 mrg union_reps (stmt_rep, *label_reps.get (label)); 1132 1.1 mrg } 1133 1.1 mrg break; 1134 1.1 mrg 1135 1.1 mrg case GIMPLE_GOTO: 1136 1.1 mrg label = gimple_goto_dest (stmt); 1137 1.1 mrg union_reps (stmt_rep, *label_reps.get (label)); 1138 1.1 mrg break; 1139 1.1 mrg 1140 1.1 mrg default: 1141 1.1 mrg gcc_unreachable (); 1142 1.1 mrg } 1143 1.1 mrg } 1144 1.1 mrg } 1145 1.1 mrg 1146 1.1 mrg /* Decompose the body of the KERNELS_REGION, which was originally annotated 1147 1.1 mrg with the KERNELS_CLAUSES, into a series of compute constructs. */ 1148 1.1 mrg 1149 1.1 mrg static gimple * 1150 1.1 mrg decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses) 1151 1.1 mrg { 1152 1.1 mrg location_t loc = gimple_location (kernels_region); 1153 1.1 mrg 1154 1.1 mrg /* The kernels clauses will be propagated to the child clauses unmodified, 1155 1.1 mrg except that the 'num_gangs', 'num_workers', and 'vector_length' clauses 1156 1.1 mrg will only be added to loop regions. The other regions are "gang-single" 1157 1.1 mrg and get an explicit 'num_gangs (1)' clause. So separate out the 1158 1.1 mrg 'num_gangs', 'num_workers', and 'vector_length' clauses here. 1159 1.1 mrg Also check for the presence of an 'async' clause but do not remove it from 1160 1.1 mrg the 'kernels' clauses. */ 1161 1.1 mrg tree num_gangs_clause = NULL, num_workers_clause = NULL, 1162 1.1 mrg vector_length_clause = NULL; 1163 1.1 mrg tree async_clause = NULL; 1164 1.1 mrg tree prev_clause = NULL, next_clause = NULL; 1165 1.1 mrg tree parallel_clauses = kernels_clauses; 1166 1.1 mrg for (tree c = parallel_clauses; c; c = next_clause) 1167 1.1 mrg { 1168 1.1 mrg /* Preserve this here, as we might NULL it later. */ 1169 1.1 mrg next_clause = OMP_CLAUSE_CHAIN (c); 1170 1.1 mrg 1171 1.1 mrg if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS 1172 1.1 mrg || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS 1173 1.1 mrg || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH) 1174 1.1 mrg { 1175 1.1 mrg /* Cut this clause out of the chain. */ 1176 1.1 mrg if (prev_clause != NULL) 1177 1.1 mrg OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (c); 1178 1.1 mrg else 1179 1.1 mrg kernels_clauses = OMP_CLAUSE_CHAIN (c); 1180 1.1 mrg OMP_CLAUSE_CHAIN (c) = NULL; 1181 1.1 mrg switch (OMP_CLAUSE_CODE (c)) 1182 1.1 mrg { 1183 1.1 mrg case OMP_CLAUSE_NUM_GANGS: 1184 1.1 mrg num_gangs_clause = c; 1185 1.1 mrg break; 1186 1.1 mrg case OMP_CLAUSE_NUM_WORKERS: 1187 1.1 mrg num_workers_clause = c; 1188 1.1 mrg break; 1189 1.1 mrg case OMP_CLAUSE_VECTOR_LENGTH: 1190 1.1 mrg vector_length_clause = c; 1191 1.1 mrg break; 1192 1.1 mrg default: 1193 1.1 mrg gcc_unreachable (); 1194 1.1 mrg } 1195 1.1 mrg } 1196 1.1 mrg else 1197 1.1 mrg prev_clause = c; 1198 1.1 mrg if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC) 1199 1.1 mrg async_clause = c; 1200 1.1 mrg } 1201 1.1 mrg 1202 1.1 mrg gimple *kernels_body = gimple_omp_body (kernels_region); 1203 1.1 mrg gbind *kernels_bind = as_a <gbind *> (kernels_body); 1204 1.1 mrg 1205 1.1 mrg /* The body of the region may contain other nested binds declaring inner 1206 1.1 mrg local variables. Collapse all these binds into one to ensure that we 1207 1.1 mrg have a single sequence of statements to iterate over; also, collect all 1208 1.1 mrg inner variables. */ 1209 1.1 mrg tree inner_bind_vars = flatten_binds (kernels_bind); 1210 1.1 mrg gimple_seq body_sequence = gimple_bind_body (kernels_bind); 1211 1.1 mrg 1212 1.1 mrg /* All these inner variables will get allocated on the device (below, by 1213 1.1 mrg calling maybe_build_inner_data_region). Here we create 'present' 1214 1.1 mrg clauses for them and add these clauses to the list of clauses to be 1215 1.1 mrg attached to each inner compute construct. */ 1216 1.1 mrg tree present_clauses = kernels_clauses; 1217 1.1 mrg for (tree var = inner_bind_vars; var; var = TREE_CHAIN (var)) 1218 1.1 mrg { 1219 1.1 mrg if (!DECL_ARTIFICIAL (var) && TREE_CODE (var) != CONST_DECL) 1220 1.1 mrg { 1221 1.1 mrg tree present_clause = build_omp_clause (loc, OMP_CLAUSE_MAP); 1222 1.1 mrg OMP_CLAUSE_SET_MAP_KIND (present_clause, GOMP_MAP_FORCE_PRESENT); 1223 1.1 mrg OMP_CLAUSE_DECL (present_clause) = var; 1224 1.1 mrg OMP_CLAUSE_SIZE (present_clause) = DECL_SIZE_UNIT (var); 1225 1.1 mrg OMP_CLAUSE_CHAIN (present_clause) = present_clauses; 1226 1.1 mrg present_clauses = present_clause; 1227 1.1 mrg } 1228 1.1 mrg } 1229 1.1 mrg kernels_clauses = present_clauses; 1230 1.1 mrg 1231 1.1 mrg /* In addition to nested binds, the "real" body of the region may be 1232 1.1 mrg nested inside a try-finally block. Find its cleanup block, which 1233 1.1 mrg contains code to clobber the local variables that must be clobbered. */ 1234 1.1 mrg gimple *inner_cleanup = NULL; 1235 1.1 mrg if (body_sequence != NULL && gimple_code (body_sequence) == GIMPLE_TRY) 1236 1.1 mrg { 1237 1.1 mrg if (gimple_seq_singleton_p (body_sequence)) 1238 1.1 mrg { 1239 1.1 mrg /* The try statement is the only thing inside the bind. */ 1240 1.1 mrg inner_cleanup = gimple_try_cleanup (body_sequence); 1241 1.1 mrg body_sequence = gimple_try_eval (body_sequence); 1242 1.1 mrg } 1243 1.1 mrg else 1244 1.1 mrg { 1245 1.1 mrg /* The bind's body starts with a try statement, but it is followed 1246 1.1 mrg by other things. */ 1247 1.1 mrg gimple_stmt_iterator gsi = gsi_start (body_sequence); 1248 1.1 mrg gimple *try_stmt = gsi_stmt (gsi); 1249 1.1 mrg inner_cleanup = gimple_try_cleanup (try_stmt); 1250 1.1 mrg gimple *try_body = gimple_try_eval (try_stmt); 1251 1.1 mrg 1252 1.1 mrg gsi_remove (&gsi, false); 1253 1.1 mrg /* Now gsi indicates the sequence of statements after the try 1254 1.1 mrg statement in the bind. Append the statement in the try body and 1255 1.1 mrg the trailing statements from gsi. */ 1256 1.1 mrg gsi_insert_seq_before (&gsi, try_body, GSI_CONTINUE_LINKING); 1257 1.1 mrg body_sequence = gsi_stmt (gsi); 1258 1.1 mrg } 1259 1.1 mrg } 1260 1.1 mrg 1261 1.1 mrg /* This sequence will collect all the top-level statements in the body of 1262 1.1 mrg the data region we are about to construct. */ 1263 1.1 mrg gimple_seq region_body = NULL; 1264 1.1 mrg /* This sequence will collect consecutive statements to be put into a 1265 1.1 mrg gang-single region. */ 1266 1.1 mrg gimple_seq gang_single_seq = NULL; 1267 1.1 mrg /* Flag recording whether the gang_single_seq only contains copies to 1268 1.1 mrg local variables. These may be loop setup code that should not be 1269 1.1 mrg separated from the loop. */ 1270 1.1 mrg bool only_simple_assignments = true; 1271 1.1 mrg 1272 1.1 mrg /* Precompute the control flow region information to determine whether an 1273 1.1 mrg OpenACC loop is executed conditionally or unconditionally. */ 1274 1.1 mrg control_flow_regions cf_regions (body_sequence); 1275 1.1 mrg 1276 1.1 mrg /* Iterate over the statements in the kernels region's body. */ 1277 1.1 mrg size_t idx = 0; 1278 1.1 mrg gimple_stmt_iterator gsi, gsi_n; 1279 1.1 mrg for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n, idx++) 1280 1.1 mrg { 1281 1.1 mrg /* Advance the iterator here because otherwise it would be invalidated 1282 1.1 mrg by moving statements below. */ 1283 1.1 mrg gsi_n = gsi; 1284 1.1 mrg gsi_next (&gsi_n); 1285 1.1 mrg 1286 1.1 mrg gimple *stmt = gsi_stmt (gsi); 1287 1.1 mrg if (gimple_code (stmt) == GIMPLE_DEBUG) 1288 1.1 mrg { 1289 1.1 mrg if (flag_compare_debug_opt || flag_compare_debug) 1290 1.1 mrg /* Let the usual '-fcompare-debug' analysis bail out, as 1291 1.1 mrg necessary. */ 1292 1.1 mrg ; 1293 1.1 mrg else 1294 1.1 mrg sorry_at (loc, "%qs not yet supported", 1295 1.1 mrg gimple_code_name[gimple_code (stmt)]); 1296 1.1 mrg } 1297 1.1 mrg gimple *omp_for = top_level_omp_for_in_stmt (stmt); 1298 1.1 mrg bool is_unconditional_oacc_for_loop = false; 1299 1.1 mrg if (omp_for != NULL) 1300 1.1 mrg is_unconditional_oacc_for_loop 1301 1.1 mrg = cf_regions.is_unconditional_oacc_for_loop (idx); 1302 1.1 mrg if (omp_for != NULL 1303 1.1 mrg && is_unconditional_oacc_for_loop) 1304 1.1 mrg { 1305 1.1 mrg /* This is an OMP for statement, put it into a separate region. 1306 1.1 mrg But first, construct a gang-single region containing any 1307 1.1 mrg complex sequential statements we may have seen. */ 1308 1.1 mrg if (gang_single_seq != NULL && !only_simple_assignments) 1309 1.1 mrg { 1310 1.1 mrg gimple *single_region 1311 1.1 mrg = make_region_seq (loc, gang_single_seq, 1312 1.1 mrg num_gangs_clause, 1313 1.1 mrg num_workers_clause, 1314 1.1 mrg vector_length_clause, 1315 1.1 mrg kernels_clauses); 1316 1.1 mrg gimple_seq_add_stmt (®ion_body, single_region); 1317 1.1 mrg } 1318 1.1 mrg else if (gang_single_seq != NULL && only_simple_assignments) 1319 1.1 mrg { 1320 1.1 mrg /* There is a sequence of sequential statements preceding this 1321 1.1 mrg loop, but they are all simple assignments. This is 1322 1.1 mrg probably setup code for the loop; in particular, Fortran DO 1323 1.1 mrg loops are preceded by code to copy the loop limit variable 1324 1.1 mrg to a temporary. Group this code together with the loop 1325 1.1 mrg itself. */ 1326 1.1 mrg gimple_seq_add_stmt (&gang_single_seq, stmt); 1327 1.1 mrg stmt = gimple_build_bind (NULL, gang_single_seq, 1328 1.1 mrg make_node (BLOCK)); 1329 1.1 mrg } 1330 1.1 mrg gang_single_seq = NULL; 1331 1.1 mrg only_simple_assignments = true; 1332 1.1 mrg 1333 1.1 mrg gimple_seq parallel_seq = NULL; 1334 1.1 mrg gimple_seq_add_stmt (¶llel_seq, stmt); 1335 1.1 mrg gimple *parallel_region 1336 1.1 mrg = make_region_loop_nest (omp_for, parallel_seq, 1337 1.1 mrg num_gangs_clause, 1338 1.1 mrg num_workers_clause, 1339 1.1 mrg vector_length_clause, 1340 1.1 mrg kernels_clauses); 1341 1.1 mrg gimple_seq_add_stmt (®ion_body, parallel_region); 1342 1.1 mrg } 1343 1.1 mrg else 1344 1.1 mrg { 1345 1.1 mrg if (omp_for != NULL) 1346 1.1 mrg { 1347 1.1 mrg gcc_checking_assert (!is_unconditional_oacc_for_loop); 1348 1.1 mrg if (dump_enabled_p ()) 1349 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, omp_for, 1350 1.1 mrg "unparallelized loop nest" 1351 1.1 mrg " in OpenACC %<kernels%> region:" 1352 1.1 mrg " it's executed conditionally\n"); 1353 1.1 mrg } 1354 1.1 mrg 1355 1.1 mrg /* This is not an unconditional OMP for statement, so it will be 1356 1.1 mrg put into a gang-single region. */ 1357 1.1 mrg gimple_seq_add_stmt (&gang_single_seq, stmt); 1358 1.1 mrg /* Is this a simple assignment? We call it simple if it is an 1359 1.1 mrg assignment to an artificial local variable. This captures 1360 1.1 mrg Fortran loop setup code computing loop bounds and offsets. */ 1361 1.1 mrg bool is_simple_assignment 1362 1.1 mrg = (gimple_code (stmt) == GIMPLE_ASSIGN 1363 1.1 mrg && TREE_CODE (gimple_assign_lhs (stmt)) == VAR_DECL 1364 1.1 mrg && DECL_ARTIFICIAL (gimple_assign_lhs (stmt))); 1365 1.1 mrg if (!is_simple_assignment) 1366 1.1 mrg only_simple_assignments = false; 1367 1.1 mrg } 1368 1.1 mrg } 1369 1.1 mrg 1370 1.1 mrg /* If we did not emit a new region, and are not going to emit one now 1371 1.1 mrg (that is, the original region was empty), prepare to emit a dummy so as 1372 1.1 mrg to preserve the original construct, which other processing (at least 1373 1.1 mrg test cases) depend on. */ 1374 1.1 mrg if (region_body == NULL && gang_single_seq == NULL) 1375 1.1 mrg { 1376 1.1 mrg gimple *stmt = gimple_build_nop (); 1377 1.1 mrg gimple_set_location (stmt, loc); 1378 1.1 mrg gimple_seq_add_stmt (&gang_single_seq, stmt); 1379 1.1 mrg } 1380 1.1 mrg 1381 1.1 mrg /* Gather up any remaining gang-single statements. */ 1382 1.1 mrg if (gang_single_seq != NULL) 1383 1.1 mrg { 1384 1.1 mrg gimple *single_region 1385 1.1 mrg = make_region_seq (loc, gang_single_seq, 1386 1.1 mrg num_gangs_clause, 1387 1.1 mrg num_workers_clause, 1388 1.1 mrg vector_length_clause, 1389 1.1 mrg kernels_clauses); 1390 1.1 mrg gimple_seq_add_stmt (®ion_body, single_region); 1391 1.1 mrg } 1392 1.1 mrg 1393 1.1 mrg /* We want to launch these kernels asynchronously. If the original 1394 1.1 mrg kernels region had an async clause, this is done automatically because 1395 1.1 mrg that async clause was copied to the individual regions we created. 1396 1.1 mrg Otherwise, add an async clause to each newly created region, as well as 1397 1.1 mrg a wait directive at the end. */ 1398 1.1 mrg if (async_clause == NULL) 1399 1.1 mrg add_async_clauses_and_wait (loc, ®ion_body); 1400 1.1 mrg else 1401 1.1 mrg /* !!! If we have asynchronous parallel blocks inside a (synchronous) data 1402 1.1 mrg region, then target memory will get unmapped at the point the data 1403 1.1 mrg region ends, even if the inner asynchronous parallels have not yet 1404 1.1 mrg completed. For kernels marked "async", we might want to use "enter data 1405 1.1 mrg async(...)" and "exit data async(...)" instead, or asynchronous data 1406 1.1 mrg regions (see also <https://gcc.gnu.org/PR97390> 1407 1.1 mrg "[OpenACC] 'async' clause on 'data' construct", 1408 1.1 mrg which is to share the same implementation). 1409 1.1 mrg For now, insert a (synchronous) wait at the end of the block. */ 1410 1.1 mrg add_wait (loc, ®ion_body); 1411 1.1 mrg 1412 1.1 mrg tree kernels_locals = gimple_bind_vars (as_a <gbind *> (kernels_body)); 1413 1.1 mrg gimple *body = gimple_build_bind (kernels_locals, region_body, 1414 1.1 mrg make_node (BLOCK)); 1415 1.1 mrg 1416 1.1 mrg /* If we found variables declared in nested scopes, build a data region to 1417 1.1 mrg map them to the device. */ 1418 1.1 mrg body = maybe_build_inner_data_region (loc, body, inner_bind_vars, 1419 1.1 mrg inner_cleanup); 1420 1.1 mrg 1421 1.1 mrg return body; 1422 1.1 mrg } 1423 1.1 mrg 1424 1.1 mrg /* Decompose one OpenACC 'kernels' construct into an OpenACC 'data' construct 1425 1.1 mrg containing the original OpenACC 'kernels' construct's region cut up into a 1426 1.1 mrg sequence of compute constructs. */ 1427 1.1 mrg 1428 1.1 mrg static gimple * 1429 1.1 mrg omp_oacc_kernels_decompose_1 (gimple *kernels_stmt) 1430 1.1 mrg { 1431 1.1 mrg gcc_checking_assert (gimple_omp_target_kind (kernels_stmt) 1432 1.1 mrg == GF_OMP_TARGET_KIND_OACC_KERNELS); 1433 1.1 mrg location_t loc = gimple_location (kernels_stmt); 1434 1.1 mrg 1435 1.1 mrg /* Collect the data clauses of the OpenACC 'kernels' directive and create a 1436 1.1 mrg new OpenACC 'data' construct with those clauses. */ 1437 1.1 mrg tree kernels_clauses = gimple_omp_target_clauses (kernels_stmt); 1438 1.1 mrg tree data_clauses = NULL; 1439 1.1 mrg for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c)) 1440 1.1 mrg { 1441 1.1 mrg /* Certain clauses are copied to the enclosing OpenACC 'data'. Other 1442 1.1 mrg clauses remain on the OpenACC 'kernels'. */ 1443 1.1 mrg if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP) 1444 1.1 mrg { 1445 1.1 mrg tree decl = OMP_CLAUSE_DECL (c); 1446 1.1 mrg HOST_WIDE_INT map_kind = OMP_CLAUSE_MAP_KIND (c); 1447 1.1 mrg switch (map_kind) 1448 1.1 mrg { 1449 1.1 mrg default: 1450 1.1 mrg if (map_kind == GOMP_MAP_ALLOC 1451 1.1 mrg && integer_zerop (OMP_CLAUSE_SIZE (c))) 1452 1.1 mrg /* ??? This is an alloc clause for mapping a pointer whose 1453 1.1 mrg target is already mapped. We leave these on the inner 1454 1.1 mrg compute constructs because moving them to the outer data 1455 1.1 mrg region causes runtime errors. */ 1456 1.1 mrg break; 1457 1.1 mrg 1458 1.1 mrg /* For non-artificial variables, and for non-declaration 1459 1.1 mrg expressions like A[0:n], copy the clause to the data 1460 1.1 mrg region. */ 1461 1.1 mrg if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl)) 1462 1.1 mrg || !DECL_P (decl)) 1463 1.1 mrg { 1464 1.1 mrg tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c), 1465 1.1 mrg OMP_CLAUSE_MAP); 1466 1.1 mrg OMP_CLAUSE_SET_MAP_KIND (new_clause, map_kind); 1467 1.1 mrg /* This must be unshared here to avoid "incorrect sharing 1468 1.1 mrg of tree nodes" errors from verify_gimple. */ 1469 1.1 mrg OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl); 1470 1.1 mrg OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c); 1471 1.1 mrg OMP_CLAUSE_CHAIN (new_clause) = data_clauses; 1472 1.1 mrg data_clauses = new_clause; 1473 1.1 mrg 1474 1.1 mrg /* Now that this data is mapped, turn the data clause on the 1475 1.1 mrg inner OpenACC 'kernels' into a 'present' clause. */ 1476 1.1 mrg OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT); 1477 1.1 mrg 1478 1.1 mrg /* See <https://gcc.gnu.org/PR100280>, 1479 1.1 mrg <https://gcc.gnu.org/PR104086>. */ 1480 1.1 mrg if (DECL_P (decl) 1481 1.1 mrg && !TREE_ADDRESSABLE (decl)) 1482 1.1 mrg { 1483 1.1 mrg /* Request that OMP lowering make 'decl' addressable. */ 1484 1.1 mrg OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1; 1485 1.1 mrg 1486 1.1 mrg if (dump_enabled_p ()) 1487 1.1 mrg { 1488 1.1 mrg location_t loc = OMP_CLAUSE_LOCATION (new_clause); 1489 1.1 mrg const dump_user_location_t d_u_loc 1490 1.1 mrg = dump_user_location_t::from_location_t (loc); 1491 1.1 mrg /* PR100695 "Format decoder, quoting in 'dump_printf' 1492 1.1 mrg etc." */ 1493 1.1 mrg #if __GNUC__ >= 10 1494 1.1 mrg # pragma GCC diagnostic push 1495 1.1 mrg # pragma GCC diagnostic ignored "-Wformat" 1496 1.1 mrg #endif 1497 1.1 mrg dump_printf_loc 1498 1.1 mrg (MSG_NOTE, d_u_loc, 1499 1.1 mrg "OpenACC %<kernels%> decomposition:" 1500 1.1 mrg " variable %<%T%> in %qs clause" 1501 1.1 mrg " requested to be made addressable\n", 1502 1.1 mrg decl, 1503 1.1 mrg user_omp_clause_code_name (new_clause, true)); 1504 1.1 mrg #if __GNUC__ >= 10 1505 1.1 mrg # pragma GCC diagnostic pop 1506 1.1 mrg #endif 1507 1.1 mrg } 1508 1.1 mrg } 1509 1.1 mrg } 1510 1.1 mrg break; 1511 1.1 mrg 1512 1.1 mrg case GOMP_MAP_POINTER: 1513 1.1 mrg case GOMP_MAP_TO_PSET: 1514 1.1 mrg case GOMP_MAP_FIRSTPRIVATE_POINTER: 1515 1.1 mrg case GOMP_MAP_FIRSTPRIVATE_REFERENCE: 1516 1.1 mrg /* ??? Copying these map kinds leads to internal compiler 1517 1.1 mrg errors in later passes. */ 1518 1.1 mrg break; 1519 1.1 mrg } 1520 1.1 mrg } 1521 1.1 mrg else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF) 1522 1.1 mrg { 1523 1.1 mrg /* If there is an 'if' clause, it must be duplicated to the 1524 1.1 mrg enclosing data region. Temporarily remove the if clause's 1525 1.1 mrg chain to avoid copying it. */ 1526 1.1 mrg tree saved_chain = OMP_CLAUSE_CHAIN (c); 1527 1.1 mrg OMP_CLAUSE_CHAIN (c) = NULL; 1528 1.1 mrg tree new_if_clause = unshare_expr (c); 1529 1.1 mrg OMP_CLAUSE_CHAIN (c) = saved_chain; 1530 1.1 mrg OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses; 1531 1.1 mrg data_clauses = new_if_clause; 1532 1.1 mrg } 1533 1.1 mrg } 1534 1.1 mrg /* Restore the original order of the clauses. */ 1535 1.1 mrg data_clauses = nreverse (data_clauses); 1536 1.1 mrg 1537 1.1 mrg gimple *data_region 1538 1.1 mrg = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS, 1539 1.1 mrg data_clauses); 1540 1.1 mrg gimple_set_location (data_region, loc); 1541 1.1 mrg 1542 1.1 mrg /* Transform the body of the kernels region into a sequence of compute 1543 1.1 mrg constructs. */ 1544 1.1 mrg gimple *body = decompose_kernels_region_body (kernels_stmt, 1545 1.1 mrg kernels_clauses); 1546 1.1 mrg 1547 1.1 mrg /* Put the transformed pieces together. The entire body of the region is 1548 1.1 mrg wrapped in a try-finally statement that calls __builtin_GOACC_data_end 1549 1.1 mrg for cleanup. */ 1550 1.1 mrg gimple *try_stmt = make_data_region_try_statement (loc, body); 1551 1.1 mrg gimple_omp_set_body (data_region, try_stmt); 1552 1.1 mrg 1553 1.1 mrg return data_region; 1554 1.1 mrg } 1555 1.1 mrg 1556 1.1 mrg 1557 1.1 mrg /* Decompose OpenACC 'kernels' constructs in the current function. */ 1558 1.1 mrg 1559 1.1 mrg static tree 1560 1.1 mrg omp_oacc_kernels_decompose_callback_stmt (gimple_stmt_iterator *gsi_p, 1561 1.1 mrg bool *handled_ops_p, 1562 1.1 mrg struct walk_stmt_info *) 1563 1.1 mrg { 1564 1.1 mrg gimple *stmt = gsi_stmt (*gsi_p); 1565 1.1 mrg 1566 1.1 mrg if ((gimple_code (stmt) == GIMPLE_OMP_TARGET) 1567 1.1 mrg && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) 1568 1.1 mrg { 1569 1.1 mrg gimple *stmt_new = omp_oacc_kernels_decompose_1 (stmt); 1570 1.1 mrg gsi_replace (gsi_p, stmt_new, false); 1571 1.1 mrg *handled_ops_p = true; 1572 1.1 mrg } 1573 1.1 mrg else 1574 1.1 mrg *handled_ops_p = false; 1575 1.1 mrg 1576 1.1 mrg return NULL; 1577 1.1 mrg } 1578 1.1 mrg 1579 1.1 mrg static unsigned int 1580 1.1 mrg omp_oacc_kernels_decompose (void) 1581 1.1 mrg { 1582 1.1 mrg gimple_seq body = gimple_body (current_function_decl); 1583 1.1 mrg 1584 1.1 mrg struct walk_stmt_info wi; 1585 1.1 mrg memset (&wi, 0, sizeof (wi)); 1586 1.1 mrg walk_gimple_seq_mod (&body, omp_oacc_kernels_decompose_callback_stmt, NULL, 1587 1.1 mrg &wi); 1588 1.1 mrg 1589 1.1 mrg gimple_set_body (current_function_decl, body); 1590 1.1 mrg 1591 1.1 mrg return 0; 1592 1.1 mrg } 1593 1.1 mrg 1594 1.1 mrg 1595 1.1 mrg namespace { 1596 1.1 mrg 1597 1.1 mrg const pass_data pass_data_omp_oacc_kernels_decompose = 1598 1.1 mrg { 1599 1.1 mrg GIMPLE_PASS, /* type */ 1600 1.1 mrg "omp_oacc_kernels_decompose", /* name */ 1601 1.1 mrg OPTGROUP_OMP, /* optinfo_flags */ 1602 1.1 mrg TV_NONE, /* tv_id */ 1603 1.1 mrg PROP_gimple_any, /* properties_required */ 1604 1.1 mrg 0, /* properties_provided */ 1605 1.1 mrg 0, /* properties_destroyed */ 1606 1.1 mrg 0, /* todo_flags_start */ 1607 1.1 mrg 0, /* todo_flags_finish */ 1608 1.1 mrg }; 1609 1.1 mrg 1610 1.1 mrg class pass_omp_oacc_kernels_decompose : public gimple_opt_pass 1611 1.1 mrg { 1612 1.1 mrg public: 1613 1.1 mrg pass_omp_oacc_kernels_decompose (gcc::context *ctxt) 1614 1.1 mrg : gimple_opt_pass (pass_data_omp_oacc_kernels_decompose, ctxt) 1615 1.1 mrg {} 1616 1.1 mrg 1617 1.1 mrg /* opt_pass methods: */ 1618 1.1 mrg virtual bool gate (function *) 1619 1.1 mrg { 1620 1.1 mrg return (flag_openacc 1621 1.1 mrg && param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE); 1622 1.1 mrg } 1623 1.1 mrg virtual unsigned int execute (function *) 1624 1.1 mrg { 1625 1.1 mrg return omp_oacc_kernels_decompose (); 1626 1.1 mrg } 1627 1.1 mrg 1628 1.1 mrg }; // class pass_omp_oacc_kernels_decompose 1629 1.1 mrg 1630 1.1 mrg } // anon namespace 1631 1.1 mrg 1632 1.1 mrg gimple_opt_pass * 1633 1.1 mrg make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt) 1634 1.1 mrg { 1635 1.1 mrg return new pass_omp_oacc_kernels_decompose (ctxt); 1636 1.1 mrg } 1637