omp-oacc-kernels-decompose.cc revision 1.1 1 1.1 mrg /* Decompose OpenACC 'kernels' constructs into parts, a sequence of compute
2 1.1 mrg constructs
3 1.1 mrg
4 1.1 mrg Copyright (C) 2020-2022 Free Software Foundation, Inc.
5 1.1 mrg
6 1.1 mrg This file is part of GCC.
7 1.1 mrg
8 1.1 mrg GCC is free software; you can redistribute it and/or modify it under
9 1.1 mrg the terms of the GNU General Public License as published by the Free
10 1.1 mrg Software Foundation; either version 3, or (at your option) any later
11 1.1 mrg version.
12 1.1 mrg
13 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 1.1 mrg for more details.
17 1.1 mrg
18 1.1 mrg You should have received a copy of the GNU General Public License
19 1.1 mrg along with GCC; see the file COPYING3. If not see
20 1.1 mrg <http://www.gnu.org/licenses/>. */
21 1.1 mrg
22 1.1 mrg #include "config.h"
23 1.1 mrg #include "system.h"
24 1.1 mrg #include "coretypes.h"
25 1.1 mrg #include "backend.h"
26 1.1 mrg #include "target.h"
27 1.1 mrg #include "tree.h"
28 1.1 mrg #include "langhooks.h"
29 1.1 mrg #include "gimple.h"
30 1.1 mrg #include "tree-pass.h"
31 1.1 mrg #include "cgraph.h"
32 1.1 mrg #include "fold-const.h"
33 1.1 mrg #include "gimplify.h"
34 1.1 mrg #include "gimple-iterator.h"
35 1.1 mrg #include "gimple-walk.h"
36 1.1 mrg #include "gomp-constants.h"
37 1.1 mrg #include "omp-general.h"
38 1.1 mrg #include "diagnostic-core.h"
39 1.1 mrg
40 1.1 mrg
41 1.1 mrg /* This preprocessing pass is run immediately before lower_omp. It decomposes
42 1.1 mrg OpenACC 'kernels' constructs into parts, a sequence of compute constructs.
43 1.1 mrg
44 1.1 mrg The translation is as follows:
45 1.1 mrg - The entire 'kernels' region is turned into a 'data' region with clauses
46 1.1 mrg taken from the 'kernels' region. New 'create' clauses are added for all
47 1.1 mrg variables declared at the top level in the kernels region.
48 1.1 mrg - Any loop nests annotated with an OpenACC 'loop' directive are wrapped in
49 1.1 mrg a new compute construct.
50 1.1 mrg - 'loop' directives without an explicit 'independent' or 'seq' clause
51 1.1 mrg get an 'auto' clause added; other clauses are preserved on the loop
52 1.1 mrg or moved to the new surrounding compute construct, as applicable.
53 1.1 mrg - Any sequences of other code (non-loops, non-OpenACC 'loop's) are wrapped
54 1.1 mrg in new "gang-single" compute construct: 'worker'/'vector' parallelism is
55 1.1 mrg preserved, but 'num_gangs (1)' is enforced.
56 1.1 mrg - Both points above only apply at the topmost level in the region, that
57 1.1 mrg is, the transformation does not introduce new compute constructs inside
58 1.1 mrg nested statement bodies. In particular, this means that a
59 1.1 mrg gang-parallelizable loop inside an 'if' statement is made "gang-single".
60 1.1 mrg - In order to make the host wait only once for the whole region instead
61 1.1 mrg of once per device kernel launch, the new compute constructs are
62 1.1 mrg annotated 'async'. Unless the original 'kernels' construct already was
63 1.1 mrg marked 'async', the entire region ends with a 'wait' directive. If the
64 1.1 mrg original 'kernels' construct was marked 'async', the synthesized 'async'
65 1.1 mrg clauses use the original 'kernels' construct's 'async' argument
66 1.1 mrg (possibly implicit).
67 1.1 mrg */
68 1.1 mrg
69 1.1 mrg
70 1.1 mrg /*TODO Things are conceptually wrong here: 'loop' clauses may be hidden behind
71 1.1 mrg 'device_type', so we have to defer a lot of processing until we're in the
72 1.1 mrg offloading compilation. "Fortunately", GCC doesn't support the OpenACC
73 1.1 mrg 'device_type' clause yet, so we get away that. */
74 1.1 mrg
75 1.1 mrg
76 1.1 mrg /* Helper function for decompose_kernels_region_body. If STMT contains a
77 1.1 mrg "top-level" OMP_FOR statement, returns a pointer to that statement;
78 1.1 mrg returns NULL otherwise.
79 1.1 mrg
80 1.1 mrg A "top-level" OMP_FOR statement is one that is possibly accompanied by
81 1.1 mrg small snippets of setup code. Specifically, this function accepts an
82 1.1 mrg OMP_FOR possibly wrapped in a singleton bind and a singleton try
83 1.1 mrg statement to allow for a local loop variable, but not an OMP_FOR
84 1.1 mrg statement nested in any other constructs. Alternatively, it accepts a
85 1.1 mrg non-singleton bind containing only assignments and then an OMP_FOR
86 1.1 mrg statement at the very end. The former style can be generated by the C
87 1.1 mrg frontend, the latter by the Fortran frontend. */
88 1.1 mrg
89 1.1 mrg static gimple *
90 1.1 mrg top_level_omp_for_in_stmt (gimple *stmt)
91 1.1 mrg {
92 1.1 mrg if (gimple_code (stmt) == GIMPLE_OMP_FOR)
93 1.1 mrg return stmt;
94 1.1 mrg
95 1.1 mrg if (gimple_code (stmt) == GIMPLE_BIND)
96 1.1 mrg {
97 1.1 mrg gimple_seq body = gimple_bind_body (as_a <gbind *> (stmt));
98 1.1 mrg if (gimple_seq_singleton_p (body))
99 1.1 mrg {
100 1.1 mrg /* Accept an OMP_FOR statement, or a try statement containing only
101 1.1 mrg a single OMP_FOR. */
102 1.1 mrg gimple *maybe_for_or_try = gimple_seq_first_stmt (body);
103 1.1 mrg if (gimple_code (maybe_for_or_try) == GIMPLE_OMP_FOR)
104 1.1 mrg return maybe_for_or_try;
105 1.1 mrg else if (gimple_code (maybe_for_or_try) == GIMPLE_TRY)
106 1.1 mrg {
107 1.1 mrg gimple_seq try_body = gimple_try_eval (maybe_for_or_try);
108 1.1 mrg if (!gimple_seq_singleton_p (try_body))
109 1.1 mrg return NULL;
110 1.1 mrg gimple *maybe_omp_for_stmt = gimple_seq_first_stmt (try_body);
111 1.1 mrg if (gimple_code (maybe_omp_for_stmt) == GIMPLE_OMP_FOR)
112 1.1 mrg return maybe_omp_for_stmt;
113 1.1 mrg }
114 1.1 mrg }
115 1.1 mrg else
116 1.1 mrg {
117 1.1 mrg gimple_stmt_iterator gsi;
118 1.1 mrg /* Accept only a block of optional assignments followed by an
119 1.1 mrg OMP_FOR at the end. No other kinds of statements allowed. */
120 1.1 mrg for (gsi = gsi_start (body); !gsi_end_p (gsi); gsi_next (&gsi))
121 1.1 mrg {
122 1.1 mrg gimple *body_stmt = gsi_stmt (gsi);
123 1.1 mrg if (gimple_code (body_stmt) == GIMPLE_ASSIGN)
124 1.1 mrg continue;
125 1.1 mrg else if (gimple_code (body_stmt) == GIMPLE_OMP_FOR
126 1.1 mrg && gsi_one_before_end_p (gsi))
127 1.1 mrg return body_stmt;
128 1.1 mrg else
129 1.1 mrg return NULL;
130 1.1 mrg }
131 1.1 mrg }
132 1.1 mrg }
133 1.1 mrg
134 1.1 mrg return NULL;
135 1.1 mrg }
136 1.1 mrg
137 1.1 mrg /* Helper for adjust_region_code: evaluate the statement at GSI_P. */
138 1.1 mrg
139 1.1 mrg static tree
140 1.1 mrg adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p,
141 1.1 mrg bool *handled_ops_p,
142 1.1 mrg struct walk_stmt_info *wi)
143 1.1 mrg {
144 1.1 mrg int *region_code = (int *) wi->info;
145 1.1 mrg
146 1.1 mrg gimple *stmt = gsi_stmt (*gsi_p);
147 1.1 mrg switch (gimple_code (stmt))
148 1.1 mrg {
149 1.1 mrg case GIMPLE_OMP_FOR:
150 1.1 mrg {
151 1.1 mrg tree clauses = gimple_omp_for_clauses (stmt);
152 1.1 mrg if (omp_find_clause (clauses, OMP_CLAUSE_INDEPENDENT))
153 1.1 mrg {
154 1.1 mrg /* Explicit 'independent' clause. */
155 1.1 mrg /* Keep going; recurse into loop body. */
156 1.1 mrg break;
157 1.1 mrg }
158 1.1 mrg else if (omp_find_clause (clauses, OMP_CLAUSE_SEQ))
159 1.1 mrg {
160 1.1 mrg /* Explicit 'seq' clause. */
161 1.1 mrg /* We'll "parallelize" if at some level a loop construct has been
162 1.1 mrg marked up by the user as unparallelizable ('seq' clause; we'll
163 1.1 mrg respect that in the later processing). Given that the user has
164 1.1 mrg explicitly marked it up, this loop construct cannot be
165 1.1 mrg performance-critical, and in this case it's also fine to
166 1.1 mrg "parallelize" instead of "gang-single", because any outer or
167 1.1 mrg inner loops may still exploit the available parallelism. */
168 1.1 mrg /* Keep going; recurse into loop body. */
169 1.1 mrg break;
170 1.1 mrg }
171 1.1 mrg else
172 1.1 mrg {
173 1.1 mrg /* Explicit or implicit 'auto' clause. */
174 1.1 mrg /* The user would like this loop analyzed ('auto' clause) and
175 1.1 mrg typically parallelized, but we don't have available yet the
176 1.1 mrg compiler logic to analyze this, so can't parallelize it here, so
177 1.1 mrg we'd very likely be running into a performance problem if we
178 1.1 mrg were to execute this unparallelized, thus forward the whole loop
179 1.1 mrg nest to 'parloops'. */
180 1.1 mrg *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
181 1.1 mrg /* Terminate: final decision for this region. */
182 1.1 mrg *handled_ops_p = true;
183 1.1 mrg return integer_zero_node;
184 1.1 mrg }
185 1.1 mrg gcc_unreachable ();
186 1.1 mrg }
187 1.1 mrg
188 1.1 mrg case GIMPLE_COND:
189 1.1 mrg case GIMPLE_GOTO:
190 1.1 mrg case GIMPLE_SWITCH:
191 1.1 mrg case GIMPLE_ASM:
192 1.1 mrg case GIMPLE_TRANSACTION:
193 1.1 mrg case GIMPLE_RETURN:
194 1.1 mrg /* Statement that might constitute some looping/control flow pattern. */
195 1.1 mrg /* The user would like this code analyzed (implicit inside a 'kernels'
196 1.1 mrg region) and typically parallelized, but we don't have available yet
197 1.1 mrg the compiler logic to analyze this, so can't parallelize it here, so
198 1.1 mrg we'd very likely be running into a performance problem if we were to
199 1.1 mrg execute this unparallelized, thus forward the whole thing to
200 1.1 mrg 'parloops'. */
201 1.1 mrg *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
202 1.1 mrg /* Terminate: final decision for this region. */
203 1.1 mrg *handled_ops_p = true;
204 1.1 mrg return integer_zero_node;
205 1.1 mrg
206 1.1 mrg default:
207 1.1 mrg /* Keep going. */
208 1.1 mrg break;
209 1.1 mrg }
210 1.1 mrg
211 1.1 mrg return NULL;
212 1.1 mrg }
213 1.1 mrg
214 1.1 mrg /* Adjust the REGION_CODE for the region in GS. */
215 1.1 mrg
216 1.1 mrg static void
217 1.1 mrg adjust_region_code (gimple_seq gs, int *region_code)
218 1.1 mrg {
219 1.1 mrg struct walk_stmt_info wi;
220 1.1 mrg memset (&wi, 0, sizeof (wi));
221 1.1 mrg wi.info = region_code;
222 1.1 mrg walk_gimple_seq (gs, adjust_region_code_walk_stmt_fn, NULL, &wi);
223 1.1 mrg }
224 1.1 mrg
225 1.1 mrg /* Helper function for make_loops_gang_single for walking the tree. If the
226 1.1 mrg statement indicated by GSI_P is an OpenACC for loop with a gang clause,
227 1.1 mrg issue a warning and remove the clause. */
228 1.1 mrg
229 1.1 mrg static tree
230 1.1 mrg visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p,
231 1.1 mrg bool *handled_ops_p,
232 1.1 mrg struct walk_stmt_info *)
233 1.1 mrg {
234 1.1 mrg *handled_ops_p = false;
235 1.1 mrg
236 1.1 mrg gimple *stmt = gsi_stmt (*gsi_p);
237 1.1 mrg switch (gimple_code (stmt))
238 1.1 mrg {
239 1.1 mrg case GIMPLE_OMP_FOR:
240 1.1 mrg /*TODO Given the current 'adjust_region_code' algorithm, this is
241 1.1 mrg actually... */
242 1.1 mrg #if 0
243 1.1 mrg gcc_unreachable ();
244 1.1 mrg #else
245 1.1 mrg /* ..., but due to bugs (PR100400), we may actually come here.
246 1.1 mrg Reliably catch this, regardless of checking level. */
247 1.1 mrg internal_error ("PR100400");
248 1.1 mrg #endif
249 1.1 mrg
250 1.1 mrg {
251 1.1 mrg tree clauses = gimple_omp_for_clauses (stmt);
252 1.1 mrg tree prev_clause = NULL;
253 1.1 mrg for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause))
254 1.1 mrg {
255 1.1 mrg if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG)
256 1.1 mrg {
257 1.1 mrg /* It makes no sense to have a 'gang' clause in a "gang-single"
258 1.1 mrg region, so warn and remove it. */
259 1.1 mrg warning_at (gimple_location (stmt), 0,
260 1.1 mrg "conditionally executed loop in %<kernels%> region"
261 1.1 mrg " will be executed by a single gang;"
262 1.1 mrg " ignoring %<gang%> clause");
263 1.1 mrg if (prev_clause != NULL)
264 1.1 mrg OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause);
265 1.1 mrg else
266 1.1 mrg clauses = OMP_CLAUSE_CHAIN (clause);
267 1.1 mrg
268 1.1 mrg break;
269 1.1 mrg }
270 1.1 mrg prev_clause = clause;
271 1.1 mrg }
272 1.1 mrg gimple_omp_for_set_clauses (stmt, clauses);
273 1.1 mrg }
274 1.1 mrg /* No need to recurse into nested statements; no loop nested inside
275 1.1 mrg this loop can be gang-partitioned. */
276 1.1 mrg sorry ("%<gang%> loop in %<gang-single%> region");
277 1.1 mrg *handled_ops_p = true;
278 1.1 mrg break;
279 1.1 mrg
280 1.1 mrg default:
281 1.1 mrg break;
282 1.1 mrg }
283 1.1 mrg
284 1.1 mrg return NULL;
285 1.1 mrg }
286 1.1 mrg
287 1.1 mrg /* Visit all nested OpenACC loops in the sequence indicated by GS. This
288 1.1 mrg statement is expected to be inside a gang-single region. Issue a warning
289 1.1 mrg for any loops inside it that have gang clauses and remove the clauses. */
290 1.1 mrg
291 1.1 mrg static void
292 1.1 mrg make_loops_gang_single (gimple_seq gs)
293 1.1 mrg {
294 1.1 mrg struct walk_stmt_info wi;
295 1.1 mrg memset (&wi, 0, sizeof (wi));
296 1.1 mrg walk_gimple_seq (gs, visit_loops_in_gang_single_region, NULL, &wi);
297 1.1 mrg }
298 1.1 mrg
299 1.1 mrg /* Construct a "gang-single" compute construct at LOC containing the STMTS.
300 1.1 mrg Annotate with CLAUSES, which must not contain a 'num_gangs' clause, and an
301 1.1 mrg additional 'num_gangs (1)' clause to force "gang-single" execution. */
302 1.1 mrg
303 1.1 mrg static gimple *
304 1.1 mrg make_region_seq (location_t loc, gimple_seq stmts,
305 1.1 mrg tree num_gangs_clause,
306 1.1 mrg tree num_workers_clause,
307 1.1 mrg tree vector_length_clause,
308 1.1 mrg tree clauses)
309 1.1 mrg {
310 1.1 mrg /* This correctly unshares the entire clause chain rooted here. */
311 1.1 mrg clauses = unshare_expr (clauses);
312 1.1 mrg
313 1.1 mrg dump_user_location_t loc_stmts_first = gimple_seq_first (stmts);
314 1.1 mrg
315 1.1 mrg /* Figure out the region code for this region. */
316 1.1 mrg /* Optimistic default: assume "setup code", no looping; thus not
317 1.1 mrg performance-critical. */
318 1.1 mrg int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE;
319 1.1 mrg adjust_region_code (stmts, ®ion_code);
320 1.1 mrg
321 1.1 mrg if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)
322 1.1 mrg {
323 1.1 mrg if (dump_enabled_p ())
324 1.1 mrg /*TODO MSG_MISSED_OPTIMIZATION? */
325 1.1 mrg dump_printf_loc (MSG_NOTE, loc_stmts_first,
326 1.1 mrg "beginning %<gang-single%> part"
327 1.1 mrg " in OpenACC %<kernels%> region\n");
328 1.1 mrg
329 1.1 mrg /* Synthesize a 'num_gangs (1)' clause. */
330 1.1 mrg tree gang_single_clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
331 1.1 mrg OMP_CLAUSE_OPERAND (gang_single_clause, 0) = integer_one_node;
332 1.1 mrg OMP_CLAUSE_CHAIN (gang_single_clause) = clauses;
333 1.1 mrg clauses = gang_single_clause;
334 1.1 mrg
335 1.1 mrg /* Remove and issue warnings about gang clauses on any OpenACC
336 1.1 mrg loops nested inside this sequentially executed statement. */
337 1.1 mrg make_loops_gang_single (stmts);
338 1.1 mrg }
339 1.1 mrg else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
340 1.1 mrg {
341 1.1 mrg if (dump_enabled_p ())
342 1.1 mrg dump_printf_loc (MSG_NOTE, loc_stmts_first,
343 1.1 mrg "beginning %<parloops%> part"
344 1.1 mrg " in OpenACC %<kernels%> region\n");
345 1.1 mrg
346 1.1 mrg /* As we're transforming a 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
347 1.1 mrg 'GF_OMP_TARGET_KIND_OACC_KERNELS', this isn't doing any of the clauses
348 1.1 mrg mangling that 'make_region_loop_nest' is doing. */
349 1.1 mrg /* Re-assemble the clauses stripped off earlier. */
350 1.1 mrg if (num_gangs_clause != NULL)
351 1.1 mrg {
352 1.1 mrg tree c = unshare_expr (num_gangs_clause);
353 1.1 mrg OMP_CLAUSE_CHAIN (c) = clauses;
354 1.1 mrg clauses = c;
355 1.1 mrg }
356 1.1 mrg if (num_workers_clause != NULL)
357 1.1 mrg {
358 1.1 mrg tree c = unshare_expr (num_workers_clause);
359 1.1 mrg OMP_CLAUSE_CHAIN (c) = clauses;
360 1.1 mrg clauses = c;
361 1.1 mrg }
362 1.1 mrg if (vector_length_clause != NULL)
363 1.1 mrg {
364 1.1 mrg tree c = unshare_expr (vector_length_clause);
365 1.1 mrg OMP_CLAUSE_CHAIN (c) = clauses;
366 1.1 mrg clauses = c;
367 1.1 mrg }
368 1.1 mrg }
369 1.1 mrg else
370 1.1 mrg gcc_unreachable ();
371 1.1 mrg
372 1.1 mrg /* Build the gang-single region. */
373 1.1 mrg gimple *single_region = gimple_build_omp_target (NULL, region_code, clauses);
374 1.1 mrg gimple_set_location (single_region, loc);
375 1.1 mrg gbind *single_body = gimple_build_bind (NULL, stmts, make_node (BLOCK));
376 1.1 mrg gimple_omp_set_body (single_region, single_body);
377 1.1 mrg
378 1.1 mrg return single_region;
379 1.1 mrg }
380 1.1 mrg
381 1.1 mrg /* Helper function for make_region_loop_nest. Adds a 'num_gangs'
382 1.1 mrg ('num_workers', 'vector_length') clause to the given CLAUSES, either the one
383 1.1 mrg from the parent compute construct (PARENT_CLAUSE) or a new one based on the
384 1.1 mrg loop's own LOOP_CLAUSE ('gang (num: N)' or similar for 'worker' or 'vector'
385 1.1 mrg clauses) with the given CLAUSE_CODE. Does nothing if neither PARENT_CLAUSE
386 1.1 mrg nor LOOP_CLAUSE exist. Returns the new clauses. */
387 1.1 mrg
388 1.1 mrg static tree
389 1.1 mrg add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause,
390 1.1 mrg omp_clause_code clause_code, tree clauses)
391 1.1 mrg {
392 1.1 mrg if (parent_clause != NULL)
393 1.1 mrg {
394 1.1 mrg tree num_clause = unshare_expr (parent_clause);
395 1.1 mrg OMP_CLAUSE_CHAIN (num_clause) = clauses;
396 1.1 mrg clauses = num_clause;
397 1.1 mrg }
398 1.1 mrg else if (loop_clause != NULL)
399 1.1 mrg {
400 1.1 mrg /* The kernels region does not have a 'num_gangs' clause, but the loop
401 1.1 mrg itself had a 'gang (num: N)' clause. Honor it by adding a
402 1.1 mrg 'num_gangs (N)' clause on the compute construct. */
403 1.1 mrg tree num = OMP_CLAUSE_OPERAND (loop_clause, 0);
404 1.1 mrg tree new_num_clause
405 1.1 mrg = build_omp_clause (OMP_CLAUSE_LOCATION (loop_clause), clause_code);
406 1.1 mrg OMP_CLAUSE_OPERAND (new_num_clause, 0) = num;
407 1.1 mrg OMP_CLAUSE_CHAIN (new_num_clause) = clauses;
408 1.1 mrg clauses = new_num_clause;
409 1.1 mrg }
410 1.1 mrg return clauses;
411 1.1 mrg }
412 1.1 mrg
413 1.1 mrg /* Helper for make_region_loop_nest, looking for 'worker (num: N)' or 'vector
414 1.1 mrg (length: N)' clauses in nested loops. Removes the argument, transferring it
415 1.1 mrg to the enclosing compute construct (via WI->INFO). If arguments within the
416 1.1 mrg same loop nest conflict, emits a warning.
417 1.1 mrg
418 1.1 mrg This function also decides whether to add an 'auto' clause on each of these
419 1.1 mrg nested loops. */
420 1.1 mrg
421 1.1 mrg struct adjust_nested_loop_clauses_wi_info
422 1.1 mrg {
423 1.1 mrg tree *loop_gang_clause_ptr;
424 1.1 mrg tree *loop_worker_clause_ptr;
425 1.1 mrg tree *loop_vector_clause_ptr;
426 1.1 mrg };
427 1.1 mrg
428 1.1 mrg static tree
429 1.1 mrg adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *,
430 1.1 mrg struct walk_stmt_info *wi)
431 1.1 mrg {
432 1.1 mrg struct adjust_nested_loop_clauses_wi_info *wi_info
433 1.1 mrg = (struct adjust_nested_loop_clauses_wi_info *) wi->info;
434 1.1 mrg gimple *stmt = gsi_stmt (*gsi_p);
435 1.1 mrg
436 1.1 mrg if (gimple_code (stmt) == GIMPLE_OMP_FOR)
437 1.1 mrg {
438 1.1 mrg bool add_auto_clause = true;
439 1.1 mrg tree loop_clauses = gimple_omp_for_clauses (stmt);
440 1.1 mrg tree loop_clause = loop_clauses;
441 1.1 mrg for (; loop_clause; loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
442 1.1 mrg {
443 1.1 mrg tree *outer_clause_ptr = NULL;
444 1.1 mrg switch (OMP_CLAUSE_CODE (loop_clause))
445 1.1 mrg {
446 1.1 mrg case OMP_CLAUSE_GANG:
447 1.1 mrg outer_clause_ptr = wi_info->loop_gang_clause_ptr;
448 1.1 mrg break;
449 1.1 mrg case OMP_CLAUSE_WORKER:
450 1.1 mrg outer_clause_ptr = wi_info->loop_worker_clause_ptr;
451 1.1 mrg break;
452 1.1 mrg case OMP_CLAUSE_VECTOR:
453 1.1 mrg outer_clause_ptr = wi_info->loop_vector_clause_ptr;
454 1.1 mrg break;
455 1.1 mrg case OMP_CLAUSE_SEQ:
456 1.1 mrg case OMP_CLAUSE_INDEPENDENT:
457 1.1 mrg case OMP_CLAUSE_AUTO:
458 1.1 mrg add_auto_clause = false;
459 1.1 mrg default:
460 1.1 mrg break;
461 1.1 mrg }
462 1.1 mrg if (outer_clause_ptr != NULL)
463 1.1 mrg {
464 1.1 mrg if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL
465 1.1 mrg && *outer_clause_ptr == NULL)
466 1.1 mrg {
467 1.1 mrg /* Transfer the clause to the enclosing compute construct and
468 1.1 mrg remove the numerical argument from the 'loop'. */
469 1.1 mrg *outer_clause_ptr = unshare_expr (loop_clause);
470 1.1 mrg OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
471 1.1 mrg }
472 1.1 mrg else if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL &&
473 1.1 mrg OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0) != NULL)
474 1.1 mrg {
475 1.1 mrg /* See if both of these are the same constant. If they
476 1.1 mrg aren't, emit a warning. */
477 1.1 mrg tree old_op = OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0);
478 1.1 mrg tree new_op = OMP_CLAUSE_OPERAND (loop_clause, 0);
479 1.1 mrg if (!(cst_and_fits_in_hwi (old_op) &&
480 1.1 mrg cst_and_fits_in_hwi (new_op) &&
481 1.1 mrg int_cst_value (old_op) == int_cst_value (new_op)))
482 1.1 mrg {
483 1.1 mrg const char *clause_name
484 1.1 mrg = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
485 1.1 mrg error_at (gimple_location (stmt),
486 1.1 mrg "cannot honor conflicting %qs clause",
487 1.1 mrg clause_name);
488 1.1 mrg inform (OMP_CLAUSE_LOCATION (*outer_clause_ptr),
489 1.1 mrg "location of the previous clause"
490 1.1 mrg " in the same loop nest");
491 1.1 mrg }
492 1.1 mrg OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
493 1.1 mrg }
494 1.1 mrg }
495 1.1 mrg }
496 1.1 mrg if (add_auto_clause)
497 1.1 mrg {
498 1.1 mrg tree auto_clause
499 1.1 mrg = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_AUTO);
500 1.1 mrg OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
501 1.1 mrg gimple_omp_for_set_clauses (stmt, auto_clause);
502 1.1 mrg }
503 1.1 mrg }
504 1.1 mrg
505 1.1 mrg return NULL;
506 1.1 mrg }
507 1.1 mrg
508 1.1 mrg /* Helper for make_region_loop_nest. Transform OpenACC 'kernels'/'loop'
509 1.1 mrg construct clauses into OpenACC 'parallel'/'loop' construct ones. */
510 1.1 mrg
511 1.1 mrg static tree
512 1.1 mrg transform_kernels_loop_clauses (gimple *omp_for,
513 1.1 mrg tree num_gangs_clause,
514 1.1 mrg tree num_workers_clause,
515 1.1 mrg tree vector_length_clause,
516 1.1 mrg tree clauses)
517 1.1 mrg {
518 1.1 mrg /* If this loop in a kernels region does not have an explicit 'seq',
519 1.1 mrg 'independent', or 'auto' clause, we must give it an explicit 'auto'
520 1.1 mrg clause.
521 1.1 mrg We also check for 'gang (num: N)' clauses. These must not appear in
522 1.1 mrg kernels regions that have their own 'num_gangs' clause. Otherwise, they
523 1.1 mrg must be converted and put on the region; similarly for 'worker' and
524 1.1 mrg 'vector' clauses. */
525 1.1 mrg bool add_auto_clause = true;
526 1.1 mrg tree loop_gang_clause = NULL, loop_worker_clause = NULL,
527 1.1 mrg loop_vector_clause = NULL;
528 1.1 mrg tree loop_clauses = gimple_omp_for_clauses (omp_for);
529 1.1 mrg for (tree loop_clause = loop_clauses;
530 1.1 mrg loop_clause;
531 1.1 mrg loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
532 1.1 mrg {
533 1.1 mrg bool found_num_clause = false;
534 1.1 mrg tree *clause_ptr, clause_to_check;
535 1.1 mrg switch (OMP_CLAUSE_CODE (loop_clause))
536 1.1 mrg {
537 1.1 mrg case OMP_CLAUSE_GANG:
538 1.1 mrg found_num_clause = true;
539 1.1 mrg clause_ptr = &loop_gang_clause;
540 1.1 mrg clause_to_check = num_gangs_clause;
541 1.1 mrg break;
542 1.1 mrg case OMP_CLAUSE_WORKER:
543 1.1 mrg found_num_clause = true;
544 1.1 mrg clause_ptr = &loop_worker_clause;
545 1.1 mrg clause_to_check = num_workers_clause;
546 1.1 mrg break;
547 1.1 mrg case OMP_CLAUSE_VECTOR:
548 1.1 mrg found_num_clause = true;
549 1.1 mrg clause_ptr = &loop_vector_clause;
550 1.1 mrg clause_to_check = vector_length_clause;
551 1.1 mrg break;
552 1.1 mrg case OMP_CLAUSE_INDEPENDENT:
553 1.1 mrg case OMP_CLAUSE_SEQ:
554 1.1 mrg case OMP_CLAUSE_AUTO:
555 1.1 mrg add_auto_clause = false;
556 1.1 mrg default:
557 1.1 mrg break;
558 1.1 mrg }
559 1.1 mrg if (found_num_clause && OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL)
560 1.1 mrg {
561 1.1 mrg if (clause_to_check)
562 1.1 mrg {
563 1.1 mrg const char *clause_name
564 1.1 mrg = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
565 1.1 mrg const char *parent_clause_name
566 1.1 mrg = omp_clause_code_name[OMP_CLAUSE_CODE (clause_to_check)];
567 1.1 mrg error_at (OMP_CLAUSE_LOCATION (loop_clause),
568 1.1 mrg "argument not permitted on %qs clause"
569 1.1 mrg " in OpenACC %<kernels%> region with a %qs clause",
570 1.1 mrg clause_name, parent_clause_name);
571 1.1 mrg inform (OMP_CLAUSE_LOCATION (clause_to_check),
572 1.1 mrg "location of OpenACC %<kernels%>");
573 1.1 mrg }
574 1.1 mrg /* Copy the 'gang (N)'/'worker (N)'/'vector (N)' clause to the
575 1.1 mrg enclosing compute construct. */
576 1.1 mrg *clause_ptr = unshare_expr (loop_clause);
577 1.1 mrg OMP_CLAUSE_CHAIN (*clause_ptr) = NULL;
578 1.1 mrg /* Leave a 'gang'/'worker'/'vector' clause on the 'loop', but without
579 1.1 mrg argument. */
580 1.1 mrg OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
581 1.1 mrg }
582 1.1 mrg }
583 1.1 mrg if (add_auto_clause)
584 1.1 mrg {
585 1.1 mrg tree auto_clause = build_omp_clause (gimple_location (omp_for),
586 1.1 mrg OMP_CLAUSE_AUTO);
587 1.1 mrg OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
588 1.1 mrg loop_clauses = auto_clause;
589 1.1 mrg }
590 1.1 mrg gimple_omp_for_set_clauses (omp_for, loop_clauses);
591 1.1 mrg /* We must also recurse into the loop; it might contain nested loops having
592 1.1 mrg their own 'worker (num: W)' or 'vector (length: V)' clauses. Turn these
593 1.1 mrg into 'worker'/'vector' clauses on the compute construct. */
594 1.1 mrg struct walk_stmt_info wi;
595 1.1 mrg memset (&wi, 0, sizeof (wi));
596 1.1 mrg struct adjust_nested_loop_clauses_wi_info wi_info;
597 1.1 mrg wi_info.loop_gang_clause_ptr = &loop_gang_clause;
598 1.1 mrg wi_info.loop_worker_clause_ptr = &loop_worker_clause;
599 1.1 mrg wi_info.loop_vector_clause_ptr = &loop_vector_clause;
600 1.1 mrg wi.info = &wi_info;
601 1.1 mrg gimple *body = gimple_omp_body (omp_for);
602 1.1 mrg walk_gimple_seq (body, adjust_nested_loop_clauses, NULL, &wi);
603 1.1 mrg /* Check if there were conflicting numbers of workers or vector length. */
604 1.1 mrg if (loop_gang_clause != NULL &&
605 1.1 mrg OMP_CLAUSE_OPERAND (loop_gang_clause, 0) == NULL)
606 1.1 mrg loop_gang_clause = NULL;
607 1.1 mrg if (loop_worker_clause != NULL &&
608 1.1 mrg OMP_CLAUSE_OPERAND (loop_worker_clause, 0) == NULL)
609 1.1 mrg loop_worker_clause = NULL;
610 1.1 mrg if (loop_vector_clause != NULL &&
611 1.1 mrg OMP_CLAUSE_OPERAND (loop_vector_clause, 0) == NULL)
612 1.1 mrg vector_length_clause = NULL;
613 1.1 mrg
614 1.1 mrg /* If the kernels region had 'num_gangs', 'num_worker', 'vector_length'
615 1.1 mrg clauses, add these to this new compute construct. */
616 1.1 mrg clauses
617 1.1 mrg = add_parent_or_loop_num_clause (num_gangs_clause, loop_gang_clause,
618 1.1 mrg OMP_CLAUSE_NUM_GANGS, clauses);
619 1.1 mrg clauses
620 1.1 mrg = add_parent_or_loop_num_clause (num_workers_clause, loop_worker_clause,
621 1.1 mrg OMP_CLAUSE_NUM_WORKERS, clauses);
622 1.1 mrg clauses
623 1.1 mrg = add_parent_or_loop_num_clause (vector_length_clause, loop_vector_clause,
624 1.1 mrg OMP_CLAUSE_VECTOR_LENGTH, clauses);
625 1.1 mrg
626 1.1 mrg return clauses;
627 1.1 mrg }
628 1.1 mrg
629 1.1 mrg /* Construct a possibly gang-parallel compute construct containing the STMT,
630 1.1 mrg which must be identical to, or a bind containing, the loop OMP_FOR.
631 1.1 mrg
632 1.1 mrg The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are
633 1.1 mrg optional clauses from the original kernels region and must not be contained
634 1.1 mrg in the other CLAUSES. The newly created compute construct is annotated with
635 1.1 mrg the optional NUM_GANGS_CLAUSE as well as the other CLAUSES. If there is no
636 1.1 mrg NUM_GANGS_CLAUSE but the loop has a 'gang (num: N)' clause, that is
637 1.1 mrg converted to a 'num_gangs (N)' clause on the new compute construct, and
638 1.1 mrg similarly for 'worker' and 'vector' clauses.
639 1.1 mrg
640 1.1 mrg The outermost loop gets an 'auto' clause unless there already is an
641 1.1 mrg 'seq'/'independent'/'auto' clause. Nested loops inside OMP_FOR are treated
642 1.1 mrg similarly by the adjust_nested_loop_clauses function. */
643 1.1 mrg
644 1.1 mrg static gimple *
645 1.1 mrg make_region_loop_nest (gimple *omp_for, gimple_seq stmts,
646 1.1 mrg tree num_gangs_clause,
647 1.1 mrg tree num_workers_clause,
648 1.1 mrg tree vector_length_clause,
649 1.1 mrg tree clauses)
650 1.1 mrg {
651 1.1 mrg /* This correctly unshares the entire clause chain rooted here. */
652 1.1 mrg clauses = unshare_expr (clauses);
653 1.1 mrg
654 1.1 mrg /* Figure out the region code for this region. */
655 1.1 mrg /* Optimistic default: assume that the loop nest is parallelizable
656 1.1 mrg (essentially, no GIMPLE_OMP_FOR with (explicit or implicit) 'auto' clause,
657 1.1 mrg and no un-annotated loops). */
658 1.1 mrg int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED;
659 1.1 mrg adjust_region_code (stmts, ®ion_code);
660 1.1 mrg
661 1.1 mrg if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
662 1.1 mrg {
663 1.1 mrg if (dump_enabled_p ())
664 1.1 mrg /* This is not MSG_OPTIMIZED_LOCATIONS, as we're just doing what the
665 1.1 mrg user asked us to. */
666 1.1 mrg dump_printf_loc (MSG_NOTE, omp_for,
667 1.1 mrg "parallelized loop nest"
668 1.1 mrg " in OpenACC %<kernels%> region\n");
669 1.1 mrg
670 1.1 mrg clauses = transform_kernels_loop_clauses (omp_for,
671 1.1 mrg num_gangs_clause,
672 1.1 mrg num_workers_clause,
673 1.1 mrg vector_length_clause,
674 1.1 mrg clauses);
675 1.1 mrg }
676 1.1 mrg else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
677 1.1 mrg {
678 1.1 mrg if (dump_enabled_p ())
679 1.1 mrg dump_printf_loc (MSG_NOTE, omp_for,
680 1.1 mrg "forwarded loop nest"
681 1.1 mrg " in OpenACC %<kernels%> region"
682 1.1 mrg " to %<parloops%> for analysis\n");
683 1.1 mrg
684 1.1 mrg /* We're transforming one 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
685 1.1 mrg 'GF_OMP_TARGET_KIND_OACC_KERNELS', so don't have to
686 1.1 mrg 'transform_kernels_loop_clauses'. */
687 1.1 mrg /* Re-assemble the clauses stripped off earlier. */
688 1.1 mrg clauses
689 1.1 mrg = add_parent_or_loop_num_clause (num_gangs_clause, NULL,
690 1.1 mrg OMP_CLAUSE_NUM_GANGS, clauses);
691 1.1 mrg clauses
692 1.1 mrg = add_parent_or_loop_num_clause (num_workers_clause, NULL,
693 1.1 mrg OMP_CLAUSE_NUM_WORKERS, clauses);
694 1.1 mrg clauses
695 1.1 mrg = add_parent_or_loop_num_clause (vector_length_clause, NULL,
696 1.1 mrg OMP_CLAUSE_VECTOR_LENGTH, clauses);
697 1.1 mrg }
698 1.1 mrg else
699 1.1 mrg gcc_unreachable ();
700 1.1 mrg
701 1.1 mrg gimple *parallel_body_bind
702 1.1 mrg = gimple_build_bind (NULL, stmts, make_node (BLOCK));
703 1.1 mrg gimple *parallel_region
704 1.1 mrg = gimple_build_omp_target (parallel_body_bind, region_code, clauses);
705 1.1 mrg gimple_set_location (parallel_region, gimple_location (omp_for));
706 1.1 mrg
707 1.1 mrg return parallel_region;
708 1.1 mrg }
709 1.1 mrg
710 1.1 mrg /* Eliminate any binds directly inside BIND by adding their statements to
711 1.1 mrg BIND (i.e., modifying it in place), excluding binds that hold only an
712 1.1 mrg OMP_FOR loop and associated setup/cleanup code. Recurse into binds but
713 1.1 mrg not other statements. Return a chain of the local variables of eliminated
714 1.1 mrg binds, i.e., the local variables found in nested binds. If
715 1.1 mrg INCLUDE_TOPLEVEL_VARS is true, this also includes the variables belonging
716 1.1 mrg to BIND itself. */
717 1.1 mrg
718 1.1 mrg static tree
719 1.1 mrg flatten_binds (gbind *bind, bool include_toplevel_vars = false)
720 1.1 mrg {
721 1.1 mrg tree vars = NULL, last_var = NULL;
722 1.1 mrg
723 1.1 mrg if (include_toplevel_vars)
724 1.1 mrg {
725 1.1 mrg vars = gimple_bind_vars (bind);
726 1.1 mrg last_var = vars;
727 1.1 mrg }
728 1.1 mrg
729 1.1 mrg gimple_seq new_body = NULL;
730 1.1 mrg gimple_seq body_sequence = gimple_bind_body (bind);
731 1.1 mrg gimple_stmt_iterator gsi, gsi_n;
732 1.1 mrg for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n)
733 1.1 mrg {
734 1.1 mrg /* Advance the iterator here because otherwise it would be invalidated
735 1.1 mrg by moving statements below. */
736 1.1 mrg gsi_n = gsi;
737 1.1 mrg gsi_next (&gsi_n);
738 1.1 mrg
739 1.1 mrg gimple *stmt = gsi_stmt (gsi);
740 1.1 mrg /* Flatten bind statements, except the ones that contain only an
741 1.1 mrg OpenACC for loop. */
742 1.1 mrg if (gimple_code (stmt) == GIMPLE_BIND
743 1.1 mrg && !top_level_omp_for_in_stmt (stmt))
744 1.1 mrg {
745 1.1 mrg gbind *inner_bind = as_a <gbind *> (stmt);
746 1.1 mrg /* Flatten recursively, and collect all variables. */
747 1.1 mrg tree inner_vars = flatten_binds (inner_bind, true);
748 1.1 mrg gimple_seq inner_sequence = gimple_bind_body (inner_bind);
749 1.1 mrg if (flag_checking)
750 1.1 mrg {
751 1.1 mrg for (gimple_stmt_iterator inner_gsi = gsi_start (inner_sequence);
752 1.1 mrg !gsi_end_p (inner_gsi);
753 1.1 mrg gsi_next (&inner_gsi))
754 1.1 mrg {
755 1.1 mrg gimple *inner_stmt = gsi_stmt (inner_gsi);
756 1.1 mrg gcc_assert (gimple_code (inner_stmt) != GIMPLE_BIND
757 1.1 mrg || top_level_omp_for_in_stmt (inner_stmt));
758 1.1 mrg }
759 1.1 mrg }
760 1.1 mrg gimple_seq_add_seq (&new_body, inner_sequence);
761 1.1 mrg /* Find the last variable; we will append others to it. */
762 1.1 mrg while (last_var != NULL && TREE_CHAIN (last_var) != NULL)
763 1.1 mrg last_var = TREE_CHAIN (last_var);
764 1.1 mrg if (last_var != NULL)
765 1.1 mrg {
766 1.1 mrg TREE_CHAIN (last_var) = inner_vars;
767 1.1 mrg last_var = inner_vars;
768 1.1 mrg }
769 1.1 mrg else
770 1.1 mrg {
771 1.1 mrg vars = inner_vars;
772 1.1 mrg last_var = vars;
773 1.1 mrg }
774 1.1 mrg }
775 1.1 mrg else
776 1.1 mrg gimple_seq_add_stmt (&new_body, stmt);
777 1.1 mrg }
778 1.1 mrg
779 1.1 mrg /* Put the possibly transformed body back into the bind. */
780 1.1 mrg gimple_bind_set_body (bind, new_body);
781 1.1 mrg return vars;
782 1.1 mrg }
783 1.1 mrg
784 1.1 mrg /* Helper function for places where we construct data regions. Wraps the BODY
785 1.1 mrg inside a try-finally construct at LOC that calls __builtin_GOACC_data_end
786 1.1 mrg in its cleanup block. Returns this try statement. */
787 1.1 mrg
788 1.1 mrg static gimple *
789 1.1 mrg make_data_region_try_statement (location_t loc, gimple *body)
790 1.1 mrg {
791 1.1 mrg tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END);
792 1.1 mrg gimple *call = gimple_build_call (data_end_fn, 0);
793 1.1 mrg gimple_seq cleanup = NULL;
794 1.1 mrg gimple_seq_add_stmt (&cleanup, call);
795 1.1 mrg gimple *try_stmt = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY);
796 1.1 mrg gimple_set_location (body, loc);
797 1.1 mrg return try_stmt;
798 1.1 mrg }
799 1.1 mrg
800 1.1 mrg /* If INNER_BIND_VARS holds variables, build an OpenACC data region with
801 1.1 mrg location LOC containing BODY and having 'create (var)' clauses for each
802 1.1 mrg variable (as a side effect, such variables also get TREE_ADDRESSABLE set).
803 1.1 mrg If INNER_CLEANUP is present, add a try-finally statement with
804 1.1 mrg this cleanup code in the finally block. Return the new data region, or
805 1.1 mrg the original BODY if no data region was needed. */
806 1.1 mrg
807 1.1 mrg static gimple *
808 1.1 mrg maybe_build_inner_data_region (location_t loc, gimple *body,
809 1.1 mrg tree inner_bind_vars, gimple *inner_cleanup)
810 1.1 mrg {
811 1.1 mrg /* Is this an instantiation of a template? (In this case, we don't care what
812 1.1 mrg the generic decl is - just whether the function decl has one.) */
813 1.1 mrg bool generic_inst_p
814 1.1 mrg = (lang_hooks.decls.get_generic_function_decl (current_function_decl)
815 1.1 mrg != NULL);
816 1.1 mrg
817 1.1 mrg /* Build data 'create (var)' clauses for these local variables.
818 1.1 mrg Below we will add these to a data region enclosing the entire body
819 1.1 mrg of the decomposed kernels region. */
820 1.1 mrg tree prev_mapped_var = NULL, next = NULL, artificial_vars = NULL,
821 1.1 mrg inner_data_clauses = NULL;
822 1.1 mrg for (tree v = inner_bind_vars; v; v = next)
823 1.1 mrg {
824 1.1 mrg next = TREE_CHAIN (v);
825 1.1 mrg if (DECL_ARTIFICIAL (v)
826 1.1 mrg || TREE_CODE (v) == CONST_DECL
827 1.1 mrg || generic_inst_p)
828 1.1 mrg {
829 1.1 mrg /* If this is an artificial temporary, it need not be mapped. We
830 1.1 mrg move its declaration into the bind inside the data region.
831 1.1 mrg Also avoid mapping variables if we are inside a template
832 1.1 mrg instantiation; the code does not contain all the copies to
833 1.1 mrg temporaries that would make this legal. */
834 1.1 mrg TREE_CHAIN (v) = artificial_vars;
835 1.1 mrg artificial_vars = v;
836 1.1 mrg if (prev_mapped_var != NULL)
837 1.1 mrg TREE_CHAIN (prev_mapped_var) = next;
838 1.1 mrg else
839 1.1 mrg inner_bind_vars = next;
840 1.1 mrg }
841 1.1 mrg else
842 1.1 mrg {
843 1.1 mrg /* Otherwise, build the map clause. */
844 1.1 mrg tree new_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
845 1.1 mrg OMP_CLAUSE_SET_MAP_KIND (new_clause, GOMP_MAP_ALLOC);
846 1.1 mrg OMP_CLAUSE_DECL (new_clause) = v;
847 1.1 mrg OMP_CLAUSE_SIZE (new_clause) = DECL_SIZE_UNIT (v);
848 1.1 mrg OMP_CLAUSE_CHAIN (new_clause) = inner_data_clauses;
849 1.1 mrg inner_data_clauses = new_clause;
850 1.1 mrg
851 1.1 mrg prev_mapped_var = v;
852 1.1 mrg
853 1.1 mrg /* See <https://gcc.gnu.org/PR100280>. */
854 1.1 mrg if (!TREE_ADDRESSABLE (v))
855 1.1 mrg {
856 1.1 mrg /* Request that OMP lowering make 'v' addressable. */
857 1.1 mrg OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
858 1.1 mrg
859 1.1 mrg if (dump_enabled_p ())
860 1.1 mrg {
861 1.1 mrg const dump_user_location_t d_u_loc
862 1.1 mrg = dump_user_location_t::from_location_t (loc);
863 1.1 mrg /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
864 1.1 mrg #if __GNUC__ >= 10
865 1.1 mrg # pragma GCC diagnostic push
866 1.1 mrg # pragma GCC diagnostic ignored "-Wformat"
867 1.1 mrg #endif
868 1.1 mrg dump_printf_loc (MSG_NOTE, d_u_loc,
869 1.1 mrg "OpenACC %<kernels%> decomposition:"
870 1.1 mrg " variable %<%T%> declared in block"
871 1.1 mrg " requested to be made addressable\n",
872 1.1 mrg v);
873 1.1 mrg #if __GNUC__ >= 10
874 1.1 mrg # pragma GCC diagnostic pop
875 1.1 mrg #endif
876 1.1 mrg }
877 1.1 mrg }
878 1.1 mrg }
879 1.1 mrg }
880 1.1 mrg
881 1.1 mrg if (artificial_vars)
882 1.1 mrg body = gimple_build_bind (artificial_vars, body, make_node (BLOCK));
883 1.1 mrg
884 1.1 mrg /* If we determined above that there are variables that need to be created
885 1.1 mrg on the device, construct a data region for them and wrap the body
886 1.1 mrg inside that. */
887 1.1 mrg if (inner_data_clauses != NULL)
888 1.1 mrg {
889 1.1 mrg gcc_assert (inner_bind_vars != NULL);
890 1.1 mrg gimple *inner_data_region
891 1.1 mrg = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
892 1.1 mrg inner_data_clauses);
893 1.1 mrg gimple_set_location (inner_data_region, loc);
894 1.1 mrg /* Make sure __builtin_GOACC_data_end is called at the end. */
895 1.1 mrg gimple *try_stmt = make_data_region_try_statement (loc, body);
896 1.1 mrg gimple_omp_set_body (inner_data_region, try_stmt);
897 1.1 mrg gimple *bind_body;
898 1.1 mrg if (inner_cleanup != NULL)
899 1.1 mrg /* Clobber all the inner variables that need to be clobbered. */
900 1.1 mrg bind_body = gimple_build_try (inner_data_region, inner_cleanup,
901 1.1 mrg GIMPLE_TRY_FINALLY);
902 1.1 mrg else
903 1.1 mrg bind_body = inner_data_region;
904 1.1 mrg body = gimple_build_bind (inner_bind_vars, bind_body, make_node (BLOCK));
905 1.1 mrg }
906 1.1 mrg
907 1.1 mrg return body;
908 1.1 mrg }
909 1.1 mrg
910 1.1 mrg static void
911 1.1 mrg add_wait (location_t loc, gimple_seq *region_body)
912 1.1 mrg {
913 1.1 mrg /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0). */
914 1.1 mrg tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
915 1.1 mrg tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
916 1.1 mrg gimple *wait_call = gimple_build_call (wait_fn, 2,
917 1.1 mrg sync_arg, integer_zero_node);
918 1.1 mrg gimple_set_location (wait_call, loc);
919 1.1 mrg gimple_seq_add_stmt (region_body, wait_call);
920 1.1 mrg }
921 1.1 mrg
922 1.1 mrg /* Helper function of decompose_kernels_region_body. The statements in
923 1.1 mrg REGION_BODY are expected to be decomposed parts; add an 'async' clause to
924 1.1 mrg each. Also add a 'wait' directive at the end of the sequence. */
925 1.1 mrg
926 1.1 mrg static void
927 1.1 mrg add_async_clauses_and_wait (location_t loc, gimple_seq *region_body)
928 1.1 mrg {
929 1.1 mrg tree default_async_queue
930 1.1 mrg = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL);
931 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start (*region_body);
932 1.1 mrg !gsi_end_p (gsi);
933 1.1 mrg gsi_next (&gsi))
934 1.1 mrg {
935 1.1 mrg gimple *stmt = gsi_stmt (gsi);
936 1.1 mrg tree target_clauses = gimple_omp_target_clauses (stmt);
937 1.1 mrg tree new_async_clause = build_omp_clause (loc, OMP_CLAUSE_ASYNC);
938 1.1 mrg OMP_CLAUSE_OPERAND (new_async_clause, 0) = default_async_queue;
939 1.1 mrg OMP_CLAUSE_CHAIN (new_async_clause) = target_clauses;
940 1.1 mrg target_clauses = new_async_clause;
941 1.1 mrg gimple_omp_target_set_clauses (as_a <gomp_target *> (stmt),
942 1.1 mrg target_clauses);
943 1.1 mrg }
944 1.1 mrg add_wait (loc, region_body);
945 1.1 mrg }
946 1.1 mrg
947 1.1 mrg /* Auxiliary analysis of the body of a kernels region, to determine for each
948 1.1 mrg OpenACC loop whether it is control-dependent (i.e., not necessarily
949 1.1 mrg executed every time the kernels region is entered) or not.
950 1.1 mrg We say that a loop is control-dependent if there is some cond, switch, or
951 1.1 mrg goto statement that jumps over it, forwards or backwards. For example,
952 1.1 mrg if the loop is controlled by an if statement, then a jump to the true
953 1.1 mrg block, the false block, or from one of those blocks to the control flow
954 1.1 mrg join point will necessarily jump over the loop.
955 1.1 mrg This analysis implements an ad-hoc union-find data structure classifying
956 1.1 mrg statements into "control-flow regions" as follows: Most statements are in
957 1.1 mrg the same region as their predecessor, except that each OpenACC loop is in
958 1.1 mrg a region of its own, and each OpenACC loop's successor starts a new
959 1.1 mrg region. We then unite the regions of any statements linked by jumps,
960 1.1 mrg placing any cond, switch, or goto statement in the same region as its
961 1.1 mrg target label(s).
962 1.1 mrg In the end, control dependence of OpenACC loops can be determined by
963 1.1 mrg comparing their immediate predecessor and successor statements' regions.
964 1.1 mrg A jump crosses the loop if and only if the predecessor and successor are
965 1.1 mrg in the same region. (If there is no predecessor or successor, the loop
966 1.1 mrg is executed unconditionally.)
967 1.1 mrg The methods in this class identify statements by their index in the
968 1.1 mrg kernels region's body. */
969 1.1 mrg
970 1.1 mrg class control_flow_regions
971 1.1 mrg {
972 1.1 mrg public:
973 1.1 mrg /* Initialize an instance and pre-compute the control-flow region
974 1.1 mrg information for the statement sequence SEQ. */
975 1.1 mrg control_flow_regions (gimple_seq seq);
976 1.1 mrg
977 1.1 mrg /* Return true if the statement with the given index IDX in the analyzed
978 1.1 mrg statement sequence is an unconditionally executed OpenACC loop. */
979 1.1 mrg bool is_unconditional_oacc_for_loop (size_t idx);
980 1.1 mrg
981 1.1 mrg private:
982 1.1 mrg /* Find the region representative for the statement identified by index
983 1.1 mrg STMT_IDX. */
984 1.1 mrg size_t find_rep (size_t stmt_idx);
985 1.1 mrg
986 1.1 mrg /* Union the regions containing the statements represented by
987 1.1 mrg representatives A and B. */
988 1.1 mrg void union_reps (size_t a, size_t b);
989 1.1 mrg
990 1.1 mrg /* Helper for the constructor. Performs the actual computation of the
991 1.1 mrg control-flow regions in the statement sequence SEQ. */
992 1.1 mrg void compute_regions (gimple_seq seq);
993 1.1 mrg
994 1.1 mrg /* The mapping from statement indices to region representatives. */
995 1.1 mrg vec <size_t> representatives;
996 1.1 mrg
997 1.1 mrg /* A cache mapping statement indices to a flag indicating whether the
998 1.1 mrg statement is a top level OpenACC for loop. */
999 1.1 mrg vec <bool> omp_for_loops;
1000 1.1 mrg };
1001 1.1 mrg
1002 1.1 mrg control_flow_regions::control_flow_regions (gimple_seq seq)
1003 1.1 mrg {
1004 1.1 mrg representatives.create (1);
1005 1.1 mrg omp_for_loops.create (1);
1006 1.1 mrg compute_regions (seq);
1007 1.1 mrg }
1008 1.1 mrg
1009 1.1 mrg bool
1010 1.1 mrg control_flow_regions::is_unconditional_oacc_for_loop (size_t idx)
1011 1.1 mrg {
1012 1.1 mrg if (idx == 0 || idx == representatives.length () - 1)
1013 1.1 mrg /* The first or last statement in the kernels region. This means that
1014 1.1 mrg there is no room before or after it for a jump or a label. Thus
1015 1.1 mrg there cannot be a jump across it, so it is unconditional. */
1016 1.1 mrg return true;
1017 1.1 mrg /* Otherwise, the loop is unconditional if the statements before and after
1018 1.1 mrg it are in different control flow regions. Scan forward and backward,
1019 1.1 mrg skipping over neighboring OpenACC for loops, to find these preceding
1020 1.1 mrg statements. */
1021 1.1 mrg size_t prev_index = idx - 1;
1022 1.1 mrg while (prev_index > 0 && omp_for_loops [prev_index] == true)
1023 1.1 mrg prev_index--;
1024 1.1 mrg /* If all preceding statements are also OpenACC loops, all of these are
1025 1.1 mrg unconditional. */
1026 1.1 mrg if (prev_index == 0)
1027 1.1 mrg return true;
1028 1.1 mrg size_t succ_index = idx + 1;
1029 1.1 mrg while (succ_index < omp_for_loops.length ()
1030 1.1 mrg && omp_for_loops [succ_index] == true)
1031 1.1 mrg succ_index++;
1032 1.1 mrg /* If all following statements are also OpenACC loops, all of these are
1033 1.1 mrg unconditional. */
1034 1.1 mrg if (succ_index == omp_for_loops.length ())
1035 1.1 mrg return true;
1036 1.1 mrg return (find_rep (prev_index) != find_rep (succ_index));
1037 1.1 mrg }
1038 1.1 mrg
1039 1.1 mrg size_t
1040 1.1 mrg control_flow_regions::find_rep (size_t stmt_idx)
1041 1.1 mrg {
1042 1.1 mrg size_t rep = stmt_idx, aux = stmt_idx;
1043 1.1 mrg /* Find the root representative of this statement. */
1044 1.1 mrg while (representatives[rep] != rep)
1045 1.1 mrg rep = representatives[rep];
1046 1.1 mrg /* Compress the path from the original statement to the representative. */
1047 1.1 mrg while (representatives[aux] != rep)
1048 1.1 mrg {
1049 1.1 mrg size_t tmp = representatives[aux];
1050 1.1 mrg representatives[aux] = rep;
1051 1.1 mrg aux = tmp;
1052 1.1 mrg }
1053 1.1 mrg return rep;
1054 1.1 mrg }
1055 1.1 mrg
1056 1.1 mrg void
1057 1.1 mrg control_flow_regions::union_reps (size_t a, size_t b)
1058 1.1 mrg {
1059 1.1 mrg a = find_rep (a);
1060 1.1 mrg b = find_rep (b);
1061 1.1 mrg representatives[b] = a;
1062 1.1 mrg }
1063 1.1 mrg
1064 1.1 mrg void
1065 1.1 mrg control_flow_regions::compute_regions (gimple_seq seq)
1066 1.1 mrg {
1067 1.1 mrg hash_map <gimple *, size_t> control_flow_reps;
1068 1.1 mrg hash_map <tree, size_t> label_reps;
1069 1.1 mrg size_t current_region = 0, idx = 0;
1070 1.1 mrg
1071 1.1 mrg /* In a first pass, assign an initial region to each statement. Except in
1072 1.1 mrg the case of OpenACC loops, each statement simply gets the same region
1073 1.1 mrg representative as its predecessor. */
1074 1.1 mrg for (gimple_stmt_iterator gsi = gsi_start (seq);
1075 1.1 mrg !gsi_end_p (gsi);
1076 1.1 mrg gsi_next (&gsi))
1077 1.1 mrg {
1078 1.1 mrg gimple *stmt = gsi_stmt (gsi);
1079 1.1 mrg gimple *omp_for = top_level_omp_for_in_stmt (stmt);
1080 1.1 mrg omp_for_loops.safe_push (omp_for != NULL);
1081 1.1 mrg if (omp_for != NULL)
1082 1.1 mrg {
1083 1.1 mrg /* Assign a new region to this loop and to its successor. */
1084 1.1 mrg current_region = idx;
1085 1.1 mrg representatives.safe_push (current_region);
1086 1.1 mrg current_region++;
1087 1.1 mrg }
1088 1.1 mrg else
1089 1.1 mrg {
1090 1.1 mrg representatives.safe_push (current_region);
1091 1.1 mrg /* Remember any jumps and labels for the second pass below. */
1092 1.1 mrg if (gimple_code (stmt) == GIMPLE_COND
1093 1.1 mrg || gimple_code (stmt) == GIMPLE_SWITCH
1094 1.1 mrg || gimple_code (stmt) == GIMPLE_GOTO)
1095 1.1 mrg control_flow_reps.put (stmt, current_region);
1096 1.1 mrg else if (gimple_code (stmt) == GIMPLE_LABEL)
1097 1.1 mrg label_reps.put (gimple_label_label (as_a <glabel *> (stmt)),
1098 1.1 mrg current_region);
1099 1.1 mrg }
1100 1.1 mrg idx++;
1101 1.1 mrg }
1102 1.1 mrg gcc_assert (representatives.length () == omp_for_loops.length ());
1103 1.1 mrg
1104 1.1 mrg /* Revisit all the control flow statements and union the region of each
1105 1.1 mrg cond, switch, or goto statement with the target labels' regions. */
1106 1.1 mrg for (hash_map <gimple *, size_t>::iterator it = control_flow_reps.begin ();
1107 1.1 mrg it != control_flow_reps.end ();
1108 1.1 mrg ++it)
1109 1.1 mrg {
1110 1.1 mrg gimple *stmt = (*it).first;
1111 1.1 mrg size_t stmt_rep = (*it).second;
1112 1.1 mrg switch (gimple_code (stmt))
1113 1.1 mrg {
1114 1.1 mrg tree label;
1115 1.1 mrg unsigned int n;
1116 1.1 mrg
1117 1.1 mrg case GIMPLE_COND:
1118 1.1 mrg label = gimple_cond_true_label (as_a <gcond *> (stmt));
1119 1.1 mrg union_reps (stmt_rep, *label_reps.get (label));
1120 1.1 mrg label = gimple_cond_false_label (as_a <gcond *> (stmt));
1121 1.1 mrg union_reps (stmt_rep, *label_reps.get (label));
1122 1.1 mrg break;
1123 1.1 mrg
1124 1.1 mrg case GIMPLE_SWITCH:
1125 1.1 mrg n = gimple_switch_num_labels (as_a <gswitch *> (stmt));
1126 1.1 mrg for (unsigned int i = 0; i < n; i++)
1127 1.1 mrg {
1128 1.1 mrg tree switch_case
1129 1.1 mrg = gimple_switch_label (as_a <gswitch *> (stmt), i);
1130 1.1 mrg label = CASE_LABEL (switch_case);
1131 1.1 mrg union_reps (stmt_rep, *label_reps.get (label));
1132 1.1 mrg }
1133 1.1 mrg break;
1134 1.1 mrg
1135 1.1 mrg case GIMPLE_GOTO:
1136 1.1 mrg label = gimple_goto_dest (stmt);
1137 1.1 mrg union_reps (stmt_rep, *label_reps.get (label));
1138 1.1 mrg break;
1139 1.1 mrg
1140 1.1 mrg default:
1141 1.1 mrg gcc_unreachable ();
1142 1.1 mrg }
1143 1.1 mrg }
1144 1.1 mrg }
1145 1.1 mrg
1146 1.1 mrg /* Decompose the body of the KERNELS_REGION, which was originally annotated
1147 1.1 mrg with the KERNELS_CLAUSES, into a series of compute constructs. */
1148 1.1 mrg
1149 1.1 mrg static gimple *
1150 1.1 mrg decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses)
1151 1.1 mrg {
1152 1.1 mrg location_t loc = gimple_location (kernels_region);
1153 1.1 mrg
1154 1.1 mrg /* The kernels clauses will be propagated to the child clauses unmodified,
1155 1.1 mrg except that the 'num_gangs', 'num_workers', and 'vector_length' clauses
1156 1.1 mrg will only be added to loop regions. The other regions are "gang-single"
1157 1.1 mrg and get an explicit 'num_gangs (1)' clause. So separate out the
1158 1.1 mrg 'num_gangs', 'num_workers', and 'vector_length' clauses here.
1159 1.1 mrg Also check for the presence of an 'async' clause but do not remove it from
1160 1.1 mrg the 'kernels' clauses. */
1161 1.1 mrg tree num_gangs_clause = NULL, num_workers_clause = NULL,
1162 1.1 mrg vector_length_clause = NULL;
1163 1.1 mrg tree async_clause = NULL;
1164 1.1 mrg tree prev_clause = NULL, next_clause = NULL;
1165 1.1 mrg tree parallel_clauses = kernels_clauses;
1166 1.1 mrg for (tree c = parallel_clauses; c; c = next_clause)
1167 1.1 mrg {
1168 1.1 mrg /* Preserve this here, as we might NULL it later. */
1169 1.1 mrg next_clause = OMP_CLAUSE_CHAIN (c);
1170 1.1 mrg
1171 1.1 mrg if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS
1172 1.1 mrg || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS
1173 1.1 mrg || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH)
1174 1.1 mrg {
1175 1.1 mrg /* Cut this clause out of the chain. */
1176 1.1 mrg if (prev_clause != NULL)
1177 1.1 mrg OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (c);
1178 1.1 mrg else
1179 1.1 mrg kernels_clauses = OMP_CLAUSE_CHAIN (c);
1180 1.1 mrg OMP_CLAUSE_CHAIN (c) = NULL;
1181 1.1 mrg switch (OMP_CLAUSE_CODE (c))
1182 1.1 mrg {
1183 1.1 mrg case OMP_CLAUSE_NUM_GANGS:
1184 1.1 mrg num_gangs_clause = c;
1185 1.1 mrg break;
1186 1.1 mrg case OMP_CLAUSE_NUM_WORKERS:
1187 1.1 mrg num_workers_clause = c;
1188 1.1 mrg break;
1189 1.1 mrg case OMP_CLAUSE_VECTOR_LENGTH:
1190 1.1 mrg vector_length_clause = c;
1191 1.1 mrg break;
1192 1.1 mrg default:
1193 1.1 mrg gcc_unreachable ();
1194 1.1 mrg }
1195 1.1 mrg }
1196 1.1 mrg else
1197 1.1 mrg prev_clause = c;
1198 1.1 mrg if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
1199 1.1 mrg async_clause = c;
1200 1.1 mrg }
1201 1.1 mrg
1202 1.1 mrg gimple *kernels_body = gimple_omp_body (kernels_region);
1203 1.1 mrg gbind *kernels_bind = as_a <gbind *> (kernels_body);
1204 1.1 mrg
1205 1.1 mrg /* The body of the region may contain other nested binds declaring inner
1206 1.1 mrg local variables. Collapse all these binds into one to ensure that we
1207 1.1 mrg have a single sequence of statements to iterate over; also, collect all
1208 1.1 mrg inner variables. */
1209 1.1 mrg tree inner_bind_vars = flatten_binds (kernels_bind);
1210 1.1 mrg gimple_seq body_sequence = gimple_bind_body (kernels_bind);
1211 1.1 mrg
1212 1.1 mrg /* All these inner variables will get allocated on the device (below, by
1213 1.1 mrg calling maybe_build_inner_data_region). Here we create 'present'
1214 1.1 mrg clauses for them and add these clauses to the list of clauses to be
1215 1.1 mrg attached to each inner compute construct. */
1216 1.1 mrg tree present_clauses = kernels_clauses;
1217 1.1 mrg for (tree var = inner_bind_vars; var; var = TREE_CHAIN (var))
1218 1.1 mrg {
1219 1.1 mrg if (!DECL_ARTIFICIAL (var) && TREE_CODE (var) != CONST_DECL)
1220 1.1 mrg {
1221 1.1 mrg tree present_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
1222 1.1 mrg OMP_CLAUSE_SET_MAP_KIND (present_clause, GOMP_MAP_FORCE_PRESENT);
1223 1.1 mrg OMP_CLAUSE_DECL (present_clause) = var;
1224 1.1 mrg OMP_CLAUSE_SIZE (present_clause) = DECL_SIZE_UNIT (var);
1225 1.1 mrg OMP_CLAUSE_CHAIN (present_clause) = present_clauses;
1226 1.1 mrg present_clauses = present_clause;
1227 1.1 mrg }
1228 1.1 mrg }
1229 1.1 mrg kernels_clauses = present_clauses;
1230 1.1 mrg
1231 1.1 mrg /* In addition to nested binds, the "real" body of the region may be
1232 1.1 mrg nested inside a try-finally block. Find its cleanup block, which
1233 1.1 mrg contains code to clobber the local variables that must be clobbered. */
1234 1.1 mrg gimple *inner_cleanup = NULL;
1235 1.1 mrg if (body_sequence != NULL && gimple_code (body_sequence) == GIMPLE_TRY)
1236 1.1 mrg {
1237 1.1 mrg if (gimple_seq_singleton_p (body_sequence))
1238 1.1 mrg {
1239 1.1 mrg /* The try statement is the only thing inside the bind. */
1240 1.1 mrg inner_cleanup = gimple_try_cleanup (body_sequence);
1241 1.1 mrg body_sequence = gimple_try_eval (body_sequence);
1242 1.1 mrg }
1243 1.1 mrg else
1244 1.1 mrg {
1245 1.1 mrg /* The bind's body starts with a try statement, but it is followed
1246 1.1 mrg by other things. */
1247 1.1 mrg gimple_stmt_iterator gsi = gsi_start (body_sequence);
1248 1.1 mrg gimple *try_stmt = gsi_stmt (gsi);
1249 1.1 mrg inner_cleanup = gimple_try_cleanup (try_stmt);
1250 1.1 mrg gimple *try_body = gimple_try_eval (try_stmt);
1251 1.1 mrg
1252 1.1 mrg gsi_remove (&gsi, false);
1253 1.1 mrg /* Now gsi indicates the sequence of statements after the try
1254 1.1 mrg statement in the bind. Append the statement in the try body and
1255 1.1 mrg the trailing statements from gsi. */
1256 1.1 mrg gsi_insert_seq_before (&gsi, try_body, GSI_CONTINUE_LINKING);
1257 1.1 mrg body_sequence = gsi_stmt (gsi);
1258 1.1 mrg }
1259 1.1 mrg }
1260 1.1 mrg
1261 1.1 mrg /* This sequence will collect all the top-level statements in the body of
1262 1.1 mrg the data region we are about to construct. */
1263 1.1 mrg gimple_seq region_body = NULL;
1264 1.1 mrg /* This sequence will collect consecutive statements to be put into a
1265 1.1 mrg gang-single region. */
1266 1.1 mrg gimple_seq gang_single_seq = NULL;
1267 1.1 mrg /* Flag recording whether the gang_single_seq only contains copies to
1268 1.1 mrg local variables. These may be loop setup code that should not be
1269 1.1 mrg separated from the loop. */
1270 1.1 mrg bool only_simple_assignments = true;
1271 1.1 mrg
1272 1.1 mrg /* Precompute the control flow region information to determine whether an
1273 1.1 mrg OpenACC loop is executed conditionally or unconditionally. */
1274 1.1 mrg control_flow_regions cf_regions (body_sequence);
1275 1.1 mrg
1276 1.1 mrg /* Iterate over the statements in the kernels region's body. */
1277 1.1 mrg size_t idx = 0;
1278 1.1 mrg gimple_stmt_iterator gsi, gsi_n;
1279 1.1 mrg for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n, idx++)
1280 1.1 mrg {
1281 1.1 mrg /* Advance the iterator here because otherwise it would be invalidated
1282 1.1 mrg by moving statements below. */
1283 1.1 mrg gsi_n = gsi;
1284 1.1 mrg gsi_next (&gsi_n);
1285 1.1 mrg
1286 1.1 mrg gimple *stmt = gsi_stmt (gsi);
1287 1.1 mrg if (gimple_code (stmt) == GIMPLE_DEBUG)
1288 1.1 mrg {
1289 1.1 mrg if (flag_compare_debug_opt || flag_compare_debug)
1290 1.1 mrg /* Let the usual '-fcompare-debug' analysis bail out, as
1291 1.1 mrg necessary. */
1292 1.1 mrg ;
1293 1.1 mrg else
1294 1.1 mrg sorry_at (loc, "%qs not yet supported",
1295 1.1 mrg gimple_code_name[gimple_code (stmt)]);
1296 1.1 mrg }
1297 1.1 mrg gimple *omp_for = top_level_omp_for_in_stmt (stmt);
1298 1.1 mrg bool is_unconditional_oacc_for_loop = false;
1299 1.1 mrg if (omp_for != NULL)
1300 1.1 mrg is_unconditional_oacc_for_loop
1301 1.1 mrg = cf_regions.is_unconditional_oacc_for_loop (idx);
1302 1.1 mrg if (omp_for != NULL
1303 1.1 mrg && is_unconditional_oacc_for_loop)
1304 1.1 mrg {
1305 1.1 mrg /* This is an OMP for statement, put it into a separate region.
1306 1.1 mrg But first, construct a gang-single region containing any
1307 1.1 mrg complex sequential statements we may have seen. */
1308 1.1 mrg if (gang_single_seq != NULL && !only_simple_assignments)
1309 1.1 mrg {
1310 1.1 mrg gimple *single_region
1311 1.1 mrg = make_region_seq (loc, gang_single_seq,
1312 1.1 mrg num_gangs_clause,
1313 1.1 mrg num_workers_clause,
1314 1.1 mrg vector_length_clause,
1315 1.1 mrg kernels_clauses);
1316 1.1 mrg gimple_seq_add_stmt (®ion_body, single_region);
1317 1.1 mrg }
1318 1.1 mrg else if (gang_single_seq != NULL && only_simple_assignments)
1319 1.1 mrg {
1320 1.1 mrg /* There is a sequence of sequential statements preceding this
1321 1.1 mrg loop, but they are all simple assignments. This is
1322 1.1 mrg probably setup code for the loop; in particular, Fortran DO
1323 1.1 mrg loops are preceded by code to copy the loop limit variable
1324 1.1 mrg to a temporary. Group this code together with the loop
1325 1.1 mrg itself. */
1326 1.1 mrg gimple_seq_add_stmt (&gang_single_seq, stmt);
1327 1.1 mrg stmt = gimple_build_bind (NULL, gang_single_seq,
1328 1.1 mrg make_node (BLOCK));
1329 1.1 mrg }
1330 1.1 mrg gang_single_seq = NULL;
1331 1.1 mrg only_simple_assignments = true;
1332 1.1 mrg
1333 1.1 mrg gimple_seq parallel_seq = NULL;
1334 1.1 mrg gimple_seq_add_stmt (¶llel_seq, stmt);
1335 1.1 mrg gimple *parallel_region
1336 1.1 mrg = make_region_loop_nest (omp_for, parallel_seq,
1337 1.1 mrg num_gangs_clause,
1338 1.1 mrg num_workers_clause,
1339 1.1 mrg vector_length_clause,
1340 1.1 mrg kernels_clauses);
1341 1.1 mrg gimple_seq_add_stmt (®ion_body, parallel_region);
1342 1.1 mrg }
1343 1.1 mrg else
1344 1.1 mrg {
1345 1.1 mrg if (omp_for != NULL)
1346 1.1 mrg {
1347 1.1 mrg gcc_checking_assert (!is_unconditional_oacc_for_loop);
1348 1.1 mrg if (dump_enabled_p ())
1349 1.1 mrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, omp_for,
1350 1.1 mrg "unparallelized loop nest"
1351 1.1 mrg " in OpenACC %<kernels%> region:"
1352 1.1 mrg " it's executed conditionally\n");
1353 1.1 mrg }
1354 1.1 mrg
1355 1.1 mrg /* This is not an unconditional OMP for statement, so it will be
1356 1.1 mrg put into a gang-single region. */
1357 1.1 mrg gimple_seq_add_stmt (&gang_single_seq, stmt);
1358 1.1 mrg /* Is this a simple assignment? We call it simple if it is an
1359 1.1 mrg assignment to an artificial local variable. This captures
1360 1.1 mrg Fortran loop setup code computing loop bounds and offsets. */
1361 1.1 mrg bool is_simple_assignment
1362 1.1 mrg = (gimple_code (stmt) == GIMPLE_ASSIGN
1363 1.1 mrg && TREE_CODE (gimple_assign_lhs (stmt)) == VAR_DECL
1364 1.1 mrg && DECL_ARTIFICIAL (gimple_assign_lhs (stmt)));
1365 1.1 mrg if (!is_simple_assignment)
1366 1.1 mrg only_simple_assignments = false;
1367 1.1 mrg }
1368 1.1 mrg }
1369 1.1 mrg
1370 1.1 mrg /* If we did not emit a new region, and are not going to emit one now
1371 1.1 mrg (that is, the original region was empty), prepare to emit a dummy so as
1372 1.1 mrg to preserve the original construct, which other processing (at least
1373 1.1 mrg test cases) depend on. */
1374 1.1 mrg if (region_body == NULL && gang_single_seq == NULL)
1375 1.1 mrg {
1376 1.1 mrg gimple *stmt = gimple_build_nop ();
1377 1.1 mrg gimple_set_location (stmt, loc);
1378 1.1 mrg gimple_seq_add_stmt (&gang_single_seq, stmt);
1379 1.1 mrg }
1380 1.1 mrg
1381 1.1 mrg /* Gather up any remaining gang-single statements. */
1382 1.1 mrg if (gang_single_seq != NULL)
1383 1.1 mrg {
1384 1.1 mrg gimple *single_region
1385 1.1 mrg = make_region_seq (loc, gang_single_seq,
1386 1.1 mrg num_gangs_clause,
1387 1.1 mrg num_workers_clause,
1388 1.1 mrg vector_length_clause,
1389 1.1 mrg kernels_clauses);
1390 1.1 mrg gimple_seq_add_stmt (®ion_body, single_region);
1391 1.1 mrg }
1392 1.1 mrg
1393 1.1 mrg /* We want to launch these kernels asynchronously. If the original
1394 1.1 mrg kernels region had an async clause, this is done automatically because
1395 1.1 mrg that async clause was copied to the individual regions we created.
1396 1.1 mrg Otherwise, add an async clause to each newly created region, as well as
1397 1.1 mrg a wait directive at the end. */
1398 1.1 mrg if (async_clause == NULL)
1399 1.1 mrg add_async_clauses_and_wait (loc, ®ion_body);
1400 1.1 mrg else
1401 1.1 mrg /* !!! If we have asynchronous parallel blocks inside a (synchronous) data
1402 1.1 mrg region, then target memory will get unmapped at the point the data
1403 1.1 mrg region ends, even if the inner asynchronous parallels have not yet
1404 1.1 mrg completed. For kernels marked "async", we might want to use "enter data
1405 1.1 mrg async(...)" and "exit data async(...)" instead, or asynchronous data
1406 1.1 mrg regions (see also <https://gcc.gnu.org/PR97390>
1407 1.1 mrg "[OpenACC] 'async' clause on 'data' construct",
1408 1.1 mrg which is to share the same implementation).
1409 1.1 mrg For now, insert a (synchronous) wait at the end of the block. */
1410 1.1 mrg add_wait (loc, ®ion_body);
1411 1.1 mrg
1412 1.1 mrg tree kernels_locals = gimple_bind_vars (as_a <gbind *> (kernels_body));
1413 1.1 mrg gimple *body = gimple_build_bind (kernels_locals, region_body,
1414 1.1 mrg make_node (BLOCK));
1415 1.1 mrg
1416 1.1 mrg /* If we found variables declared in nested scopes, build a data region to
1417 1.1 mrg map them to the device. */
1418 1.1 mrg body = maybe_build_inner_data_region (loc, body, inner_bind_vars,
1419 1.1 mrg inner_cleanup);
1420 1.1 mrg
1421 1.1 mrg return body;
1422 1.1 mrg }
1423 1.1 mrg
1424 1.1 mrg /* Decompose one OpenACC 'kernels' construct into an OpenACC 'data' construct
1425 1.1 mrg containing the original OpenACC 'kernels' construct's region cut up into a
1426 1.1 mrg sequence of compute constructs. */
1427 1.1 mrg
1428 1.1 mrg static gimple *
1429 1.1 mrg omp_oacc_kernels_decompose_1 (gimple *kernels_stmt)
1430 1.1 mrg {
1431 1.1 mrg gcc_checking_assert (gimple_omp_target_kind (kernels_stmt)
1432 1.1 mrg == GF_OMP_TARGET_KIND_OACC_KERNELS);
1433 1.1 mrg location_t loc = gimple_location (kernels_stmt);
1434 1.1 mrg
1435 1.1 mrg /* Collect the data clauses of the OpenACC 'kernels' directive and create a
1436 1.1 mrg new OpenACC 'data' construct with those clauses. */
1437 1.1 mrg tree kernels_clauses = gimple_omp_target_clauses (kernels_stmt);
1438 1.1 mrg tree data_clauses = NULL;
1439 1.1 mrg for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c))
1440 1.1 mrg {
1441 1.1 mrg /* Certain clauses are copied to the enclosing OpenACC 'data'. Other
1442 1.1 mrg clauses remain on the OpenACC 'kernels'. */
1443 1.1 mrg if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
1444 1.1 mrg {
1445 1.1 mrg tree decl = OMP_CLAUSE_DECL (c);
1446 1.1 mrg HOST_WIDE_INT map_kind = OMP_CLAUSE_MAP_KIND (c);
1447 1.1 mrg switch (map_kind)
1448 1.1 mrg {
1449 1.1 mrg default:
1450 1.1 mrg if (map_kind == GOMP_MAP_ALLOC
1451 1.1 mrg && integer_zerop (OMP_CLAUSE_SIZE (c)))
1452 1.1 mrg /* ??? This is an alloc clause for mapping a pointer whose
1453 1.1 mrg target is already mapped. We leave these on the inner
1454 1.1 mrg compute constructs because moving them to the outer data
1455 1.1 mrg region causes runtime errors. */
1456 1.1 mrg break;
1457 1.1 mrg
1458 1.1 mrg /* For non-artificial variables, and for non-declaration
1459 1.1 mrg expressions like A[0:n], copy the clause to the data
1460 1.1 mrg region. */
1461 1.1 mrg if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl))
1462 1.1 mrg || !DECL_P (decl))
1463 1.1 mrg {
1464 1.1 mrg tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c),
1465 1.1 mrg OMP_CLAUSE_MAP);
1466 1.1 mrg OMP_CLAUSE_SET_MAP_KIND (new_clause, map_kind);
1467 1.1 mrg /* This must be unshared here to avoid "incorrect sharing
1468 1.1 mrg of tree nodes" errors from verify_gimple. */
1469 1.1 mrg OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl);
1470 1.1 mrg OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c);
1471 1.1 mrg OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
1472 1.1 mrg data_clauses = new_clause;
1473 1.1 mrg
1474 1.1 mrg /* Now that this data is mapped, turn the data clause on the
1475 1.1 mrg inner OpenACC 'kernels' into a 'present' clause. */
1476 1.1 mrg OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT);
1477 1.1 mrg
1478 1.1 mrg /* See <https://gcc.gnu.org/PR100280>,
1479 1.1 mrg <https://gcc.gnu.org/PR104086>. */
1480 1.1 mrg if (DECL_P (decl)
1481 1.1 mrg && !TREE_ADDRESSABLE (decl))
1482 1.1 mrg {
1483 1.1 mrg /* Request that OMP lowering make 'decl' addressable. */
1484 1.1 mrg OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
1485 1.1 mrg
1486 1.1 mrg if (dump_enabled_p ())
1487 1.1 mrg {
1488 1.1 mrg location_t loc = OMP_CLAUSE_LOCATION (new_clause);
1489 1.1 mrg const dump_user_location_t d_u_loc
1490 1.1 mrg = dump_user_location_t::from_location_t (loc);
1491 1.1 mrg /* PR100695 "Format decoder, quoting in 'dump_printf'
1492 1.1 mrg etc." */
1493 1.1 mrg #if __GNUC__ >= 10
1494 1.1 mrg # pragma GCC diagnostic push
1495 1.1 mrg # pragma GCC diagnostic ignored "-Wformat"
1496 1.1 mrg #endif
1497 1.1 mrg dump_printf_loc
1498 1.1 mrg (MSG_NOTE, d_u_loc,
1499 1.1 mrg "OpenACC %<kernels%> decomposition:"
1500 1.1 mrg " variable %<%T%> in %qs clause"
1501 1.1 mrg " requested to be made addressable\n",
1502 1.1 mrg decl,
1503 1.1 mrg user_omp_clause_code_name (new_clause, true));
1504 1.1 mrg #if __GNUC__ >= 10
1505 1.1 mrg # pragma GCC diagnostic pop
1506 1.1 mrg #endif
1507 1.1 mrg }
1508 1.1 mrg }
1509 1.1 mrg }
1510 1.1 mrg break;
1511 1.1 mrg
1512 1.1 mrg case GOMP_MAP_POINTER:
1513 1.1 mrg case GOMP_MAP_TO_PSET:
1514 1.1 mrg case GOMP_MAP_FIRSTPRIVATE_POINTER:
1515 1.1 mrg case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
1516 1.1 mrg /* ??? Copying these map kinds leads to internal compiler
1517 1.1 mrg errors in later passes. */
1518 1.1 mrg break;
1519 1.1 mrg }
1520 1.1 mrg }
1521 1.1 mrg else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
1522 1.1 mrg {
1523 1.1 mrg /* If there is an 'if' clause, it must be duplicated to the
1524 1.1 mrg enclosing data region. Temporarily remove the if clause's
1525 1.1 mrg chain to avoid copying it. */
1526 1.1 mrg tree saved_chain = OMP_CLAUSE_CHAIN (c);
1527 1.1 mrg OMP_CLAUSE_CHAIN (c) = NULL;
1528 1.1 mrg tree new_if_clause = unshare_expr (c);
1529 1.1 mrg OMP_CLAUSE_CHAIN (c) = saved_chain;
1530 1.1 mrg OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses;
1531 1.1 mrg data_clauses = new_if_clause;
1532 1.1 mrg }
1533 1.1 mrg }
1534 1.1 mrg /* Restore the original order of the clauses. */
1535 1.1 mrg data_clauses = nreverse (data_clauses);
1536 1.1 mrg
1537 1.1 mrg gimple *data_region
1538 1.1 mrg = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
1539 1.1 mrg data_clauses);
1540 1.1 mrg gimple_set_location (data_region, loc);
1541 1.1 mrg
1542 1.1 mrg /* Transform the body of the kernels region into a sequence of compute
1543 1.1 mrg constructs. */
1544 1.1 mrg gimple *body = decompose_kernels_region_body (kernels_stmt,
1545 1.1 mrg kernels_clauses);
1546 1.1 mrg
1547 1.1 mrg /* Put the transformed pieces together. The entire body of the region is
1548 1.1 mrg wrapped in a try-finally statement that calls __builtin_GOACC_data_end
1549 1.1 mrg for cleanup. */
1550 1.1 mrg gimple *try_stmt = make_data_region_try_statement (loc, body);
1551 1.1 mrg gimple_omp_set_body (data_region, try_stmt);
1552 1.1 mrg
1553 1.1 mrg return data_region;
1554 1.1 mrg }
1555 1.1 mrg
1556 1.1 mrg
1557 1.1 mrg /* Decompose OpenACC 'kernels' constructs in the current function. */
1558 1.1 mrg
1559 1.1 mrg static tree
1560 1.1 mrg omp_oacc_kernels_decompose_callback_stmt (gimple_stmt_iterator *gsi_p,
1561 1.1 mrg bool *handled_ops_p,
1562 1.1 mrg struct walk_stmt_info *)
1563 1.1 mrg {
1564 1.1 mrg gimple *stmt = gsi_stmt (*gsi_p);
1565 1.1 mrg
1566 1.1 mrg if ((gimple_code (stmt) == GIMPLE_OMP_TARGET)
1567 1.1 mrg && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
1568 1.1 mrg {
1569 1.1 mrg gimple *stmt_new = omp_oacc_kernels_decompose_1 (stmt);
1570 1.1 mrg gsi_replace (gsi_p, stmt_new, false);
1571 1.1 mrg *handled_ops_p = true;
1572 1.1 mrg }
1573 1.1 mrg else
1574 1.1 mrg *handled_ops_p = false;
1575 1.1 mrg
1576 1.1 mrg return NULL;
1577 1.1 mrg }
1578 1.1 mrg
1579 1.1 mrg static unsigned int
1580 1.1 mrg omp_oacc_kernels_decompose (void)
1581 1.1 mrg {
1582 1.1 mrg gimple_seq body = gimple_body (current_function_decl);
1583 1.1 mrg
1584 1.1 mrg struct walk_stmt_info wi;
1585 1.1 mrg memset (&wi, 0, sizeof (wi));
1586 1.1 mrg walk_gimple_seq_mod (&body, omp_oacc_kernels_decompose_callback_stmt, NULL,
1587 1.1 mrg &wi);
1588 1.1 mrg
1589 1.1 mrg gimple_set_body (current_function_decl, body);
1590 1.1 mrg
1591 1.1 mrg return 0;
1592 1.1 mrg }
1593 1.1 mrg
1594 1.1 mrg
1595 1.1 mrg namespace {
1596 1.1 mrg
1597 1.1 mrg const pass_data pass_data_omp_oacc_kernels_decompose =
1598 1.1 mrg {
1599 1.1 mrg GIMPLE_PASS, /* type */
1600 1.1 mrg "omp_oacc_kernels_decompose", /* name */
1601 1.1 mrg OPTGROUP_OMP, /* optinfo_flags */
1602 1.1 mrg TV_NONE, /* tv_id */
1603 1.1 mrg PROP_gimple_any, /* properties_required */
1604 1.1 mrg 0, /* properties_provided */
1605 1.1 mrg 0, /* properties_destroyed */
1606 1.1 mrg 0, /* todo_flags_start */
1607 1.1 mrg 0, /* todo_flags_finish */
1608 1.1 mrg };
1609 1.1 mrg
1610 1.1 mrg class pass_omp_oacc_kernels_decompose : public gimple_opt_pass
1611 1.1 mrg {
1612 1.1 mrg public:
1613 1.1 mrg pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
1614 1.1 mrg : gimple_opt_pass (pass_data_omp_oacc_kernels_decompose, ctxt)
1615 1.1 mrg {}
1616 1.1 mrg
1617 1.1 mrg /* opt_pass methods: */
1618 1.1 mrg virtual bool gate (function *)
1619 1.1 mrg {
1620 1.1 mrg return (flag_openacc
1621 1.1 mrg && param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE);
1622 1.1 mrg }
1623 1.1 mrg virtual unsigned int execute (function *)
1624 1.1 mrg {
1625 1.1 mrg return omp_oacc_kernels_decompose ();
1626 1.1 mrg }
1627 1.1 mrg
1628 1.1 mrg }; // class pass_omp_oacc_kernels_decompose
1629 1.1 mrg
1630 1.1 mrg } // anon namespace
1631 1.1 mrg
1632 1.1 mrg gimple_opt_pass *
1633 1.1 mrg make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
1634 1.1 mrg {
1635 1.1 mrg return new pass_omp_oacc_kernels_decompose (ctxt);
1636 1.1 mrg }
1637