oacc-parallel.c revision 1.1.1.9 1 1.1.1.9 mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg Contributed by Mentor Embedded.
4 1.1 mrg
5 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
6 1.1 mrg (libgomp).
7 1.1 mrg
8 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
9 1.1 mrg under the terms of the GNU General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
11 1.1 mrg any later version.
12 1.1 mrg
13 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 1.1 mrg more details.
17 1.1 mrg
18 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
19 1.1 mrg permissions described in the GCC Runtime Library Exception, version
20 1.1 mrg 3.1, as published by the Free Software Foundation.
21 1.1 mrg
22 1.1 mrg You should have received a copy of the GNU General Public License and
23 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
24 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 1.1 mrg <http://www.gnu.org/licenses/>. */
26 1.1 mrg
27 1.1 mrg /* This file handles OpenACC constructs. */
28 1.1 mrg
29 1.1 mrg #include "openacc.h"
30 1.1 mrg #include "libgomp.h"
31 1.1 mrg #include "gomp-constants.h"
32 1.1 mrg #include "oacc-int.h"
33 1.1 mrg #ifdef HAVE_INTTYPES_H
34 1.1 mrg # include <inttypes.h> /* For PRIu64. */
35 1.1 mrg #endif
36 1.1 mrg #include <string.h>
37 1.1 mrg #include <stdarg.h>
38 1.1 mrg #include <assert.h>
39 1.1 mrg
40 1.1.1.7 mrg
41 1.1.1.7 mrg /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
42 1.1.1.7 mrg continue to support the following two legacy values. */
43 1.1.1.7 mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
44 1.1.1.7 mrg "legacy GOMP_DEVICE_ICV broken");
45 1.1.1.7 mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
46 1.1.1.7 mrg == GOACC_FLAG_HOST_FALLBACK,
47 1.1.1.7 mrg "legacy GOMP_DEVICE_HOST_FALLBACK broken");
48 1.1.1.7 mrg
49 1.1.1.7 mrg
50 1.1.1.7 mrg /* Handle the mapping pair that are presented when a
51 1.1.1.7 mrg deviceptr clause is used with Fortran. */
52 1.1.1.7 mrg
53 1.1.1.7 mrg static void
54 1.1.1.7 mrg handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
55 1.1.1.7 mrg unsigned short *kinds)
56 1.1.1.7 mrg {
57 1.1.1.7 mrg int i;
58 1.1.1.7 mrg
59 1.1.1.7 mrg for (i = 0; i < mapnum; i++)
60 1.1.1.7 mrg {
61 1.1.1.7 mrg unsigned short kind1 = kinds[i] & 0xff;
62 1.1.1.7 mrg
63 1.1.1.7 mrg /* Handle Fortran deviceptr clause. */
64 1.1.1.7 mrg if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
65 1.1.1.7 mrg {
66 1.1.1.7 mrg unsigned short kind2;
67 1.1.1.7 mrg
68 1.1.1.7 mrg if (i < (signed)mapnum - 1)
69 1.1.1.7 mrg kind2 = kinds[i + 1] & 0xff;
70 1.1.1.7 mrg else
71 1.1.1.7 mrg kind2 = 0xffff;
72 1.1.1.7 mrg
73 1.1.1.7 mrg if (sizes[i] == sizeof (void *))
74 1.1.1.7 mrg continue;
75 1.1.1.7 mrg
76 1.1.1.7 mrg /* At this point, we're dealing with a Fortran deviceptr.
77 1.1.1.7 mrg If the next element is not what we're expecting, then
78 1.1.1.7 mrg this is an instance of where the deviceptr variable was
79 1.1.1.7 mrg not used within the region and the pointer was removed
80 1.1.1.7 mrg by the gimplifier. */
81 1.1.1.7 mrg if (kind2 == GOMP_MAP_POINTER
82 1.1.1.7 mrg && sizes[i + 1] == 0
83 1.1.1.7 mrg && hostaddrs[i] == *(void **)hostaddrs[i + 1])
84 1.1.1.7 mrg {
85 1.1.1.7 mrg kinds[i+1] = kinds[i];
86 1.1.1.7 mrg sizes[i+1] = sizeof (void *);
87 1.1.1.7 mrg }
88 1.1.1.7 mrg
89 1.1.1.7 mrg /* Invalidate the entry. */
90 1.1.1.7 mrg hostaddrs[i] = NULL;
91 1.1.1.7 mrg }
92 1.1.1.7 mrg }
93 1.1 mrg }
94 1.1 mrg
95 1.1.1.2 mrg
96 1.1.1.7 mrg /* Launch a possibly offloaded function with FLAGS. FN is the host fn
97 1.1.1.2 mrg address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
98 1.1.1.2 mrg blocks to be copied to/from the device. Varadic arguments are
99 1.1.1.2 mrg keyed optional parameters terminated with a zero. */
100 1.1 mrg
101 1.1 mrg void
102 1.1.1.7 mrg GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
103 1.1.1.2 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
104 1.1.1.2 mrg unsigned short *kinds, ...)
105 1.1 mrg {
106 1.1.1.7 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
107 1.1.1.7 mrg
108 1.1 mrg va_list ap;
109 1.1 mrg struct goacc_thread *thr;
110 1.1 mrg struct gomp_device_descr *acc_dev;
111 1.1 mrg struct target_mem_desc *tgt;
112 1.1 mrg void **devaddrs;
113 1.1 mrg unsigned int i;
114 1.1 mrg struct splay_tree_key_s k;
115 1.1 mrg splay_tree_key tgt_fn_key;
116 1.1 mrg void (*tgt_fn);
117 1.1.1.2 mrg int async = GOMP_ASYNC_SYNC;
118 1.1.1.2 mrg unsigned dims[GOMP_DIM_MAX];
119 1.1.1.2 mrg unsigned tag;
120 1.1 mrg
121 1.1 mrg #ifdef HAVE_INTTYPES_H
122 1.1.1.2 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
123 1.1.1.2 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
124 1.1 mrg #else
125 1.1.1.2 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
126 1.1.1.2 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
127 1.1 mrg #endif
128 1.1 mrg goacc_lazy_initialize ();
129 1.1 mrg
130 1.1 mrg thr = goacc_thread ();
131 1.1 mrg acc_dev = thr->dev;
132 1.1 mrg
133 1.1.1.8 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
134 1.1.1.8 mrg
135 1.1.1.8 mrg acc_prof_info prof_info;
136 1.1.1.8 mrg if (profiling_p)
137 1.1.1.8 mrg {
138 1.1.1.8 mrg thr->prof_info = &prof_info;
139 1.1.1.8 mrg
140 1.1.1.8 mrg prof_info.event_type = acc_ev_compute_construct_start;
141 1.1.1.8 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
142 1.1.1.8 mrg prof_info.version = _ACC_PROF_INFO_VERSION;
143 1.1.1.8 mrg prof_info.device_type = acc_device_type (acc_dev->type);
144 1.1.1.8 mrg prof_info.device_number = acc_dev->target_id;
145 1.1.1.8 mrg prof_info.thread_id = -1;
146 1.1.1.8 mrg prof_info.async = async;
147 1.1.1.8 mrg prof_info.async_queue = prof_info.async;
148 1.1.1.8 mrg prof_info.src_file = NULL;
149 1.1.1.8 mrg prof_info.func_name = NULL;
150 1.1.1.8 mrg prof_info.line_no = -1;
151 1.1.1.8 mrg prof_info.end_line_no = -1;
152 1.1.1.8 mrg prof_info.func_line_no = -1;
153 1.1.1.8 mrg prof_info.func_end_line_no = -1;
154 1.1.1.8 mrg }
155 1.1.1.8 mrg acc_event_info compute_construct_event_info;
156 1.1.1.8 mrg if (profiling_p)
157 1.1.1.8 mrg {
158 1.1.1.8 mrg compute_construct_event_info.other_event.event_type
159 1.1.1.8 mrg = prof_info.event_type;
160 1.1.1.8 mrg compute_construct_event_info.other_event.valid_bytes
161 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
162 1.1.1.8 mrg compute_construct_event_info.other_event.parent_construct
163 1.1.1.8 mrg = acc_construct_parallel;
164 1.1.1.8 mrg compute_construct_event_info.other_event.implicit = 0;
165 1.1.1.8 mrg compute_construct_event_info.other_event.tool_info = NULL;
166 1.1.1.8 mrg }
167 1.1.1.8 mrg acc_api_info api_info;
168 1.1.1.8 mrg if (profiling_p)
169 1.1.1.8 mrg {
170 1.1.1.8 mrg thr->api_info = &api_info;
171 1.1.1.8 mrg
172 1.1.1.8 mrg api_info.device_api = acc_device_api_none;
173 1.1.1.8 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
174 1.1.1.8 mrg api_info.device_type = prof_info.device_type;
175 1.1.1.8 mrg api_info.vendor = -1;
176 1.1.1.8 mrg api_info.device_handle = NULL;
177 1.1.1.8 mrg api_info.context_handle = NULL;
178 1.1.1.8 mrg api_info.async_handle = NULL;
179 1.1.1.8 mrg }
180 1.1.1.8 mrg
181 1.1.1.8 mrg if (profiling_p)
182 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
183 1.1.1.8 mrg &api_info);
184 1.1.1.8 mrg
185 1.1.1.7 mrg handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
186 1.1.1.7 mrg
187 1.1 mrg /* Host fallback if "if" clause is false or if the current device is set to
188 1.1 mrg the host. */
189 1.1.1.7 mrg if (flags & GOACC_FLAG_HOST_FALLBACK)
190 1.1 mrg {
191 1.1.1.8 mrg prof_info.device_type = acc_device_host;
192 1.1.1.8 mrg api_info.device_type = prof_info.device_type;
193 1.1 mrg goacc_save_and_set_bind (acc_device_host);
194 1.1 mrg fn (hostaddrs);
195 1.1 mrg goacc_restore_bind ();
196 1.1.1.8 mrg goto out_prof;
197 1.1 mrg }
198 1.1 mrg else if (acc_device_type (acc_dev->type) == acc_device_host)
199 1.1 mrg {
200 1.1 mrg fn (hostaddrs);
201 1.1.1.8 mrg goto out_prof;
202 1.1 mrg }
203 1.1 mrg
204 1.1.1.2 mrg /* Default: let the runtime choose. */
205 1.1.1.2 mrg for (i = 0; i != GOMP_DIM_MAX; i++)
206 1.1.1.2 mrg dims[i] = 0;
207 1.1.1.2 mrg
208 1.1.1.2 mrg va_start (ap, kinds);
209 1.1.1.2 mrg /* TODO: This will need amending when device_type is implemented. */
210 1.1.1.2 mrg while ((tag = va_arg (ap, unsigned)) != 0)
211 1.1.1.2 mrg {
212 1.1.1.2 mrg if (GOMP_LAUNCH_DEVICE (tag))
213 1.1.1.2 mrg gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
214 1.1.1.2 mrg GOMP_LAUNCH_DEVICE (tag));
215 1.1 mrg
216 1.1.1.2 mrg switch (GOMP_LAUNCH_CODE (tag))
217 1.1.1.2 mrg {
218 1.1.1.2 mrg case GOMP_LAUNCH_DIM:
219 1.1.1.2 mrg {
220 1.1.1.2 mrg unsigned mask = GOMP_LAUNCH_OP (tag);
221 1.1.1.2 mrg
222 1.1.1.2 mrg for (i = 0; i != GOMP_DIM_MAX; i++)
223 1.1.1.2 mrg if (mask & GOMP_DIM_MASK (i))
224 1.1.1.2 mrg dims[i] = va_arg (ap, unsigned);
225 1.1.1.2 mrg }
226 1.1.1.2 mrg break;
227 1.1.1.2 mrg
228 1.1.1.2 mrg case GOMP_LAUNCH_ASYNC:
229 1.1.1.2 mrg {
230 1.1.1.2 mrg /* Small constant values are encoded in the operand. */
231 1.1.1.2 mrg async = GOMP_LAUNCH_OP (tag);
232 1.1.1.2 mrg
233 1.1.1.2 mrg if (async == GOMP_LAUNCH_OP_MAX)
234 1.1.1.2 mrg async = va_arg (ap, unsigned);
235 1.1.1.8 mrg
236 1.1.1.8 mrg if (profiling_p)
237 1.1.1.8 mrg {
238 1.1.1.8 mrg prof_info.async = async;
239 1.1.1.8 mrg prof_info.async_queue = prof_info.async;
240 1.1.1.8 mrg }
241 1.1.1.8 mrg
242 1.1.1.2 mrg break;
243 1.1.1.2 mrg }
244 1.1 mrg
245 1.1.1.2 mrg case GOMP_LAUNCH_WAIT:
246 1.1.1.2 mrg {
247 1.1.1.2 mrg unsigned num_waits = GOMP_LAUNCH_OP (tag);
248 1.1.1.7 mrg goacc_wait (async, num_waits, &ap);
249 1.1.1.2 mrg break;
250 1.1.1.2 mrg }
251 1.1.1.2 mrg
252 1.1.1.2 mrg default:
253 1.1.1.2 mrg gomp_fatal ("unrecognized offload code '%d',"
254 1.1.1.2 mrg " libgomp is too old", GOMP_LAUNCH_CODE (tag));
255 1.1.1.2 mrg }
256 1.1.1.2 mrg }
257 1.1.1.2 mrg va_end (ap);
258 1.1.1.2 mrg
259 1.1 mrg if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
260 1.1 mrg {
261 1.1 mrg k.host_start = (uintptr_t) fn;
262 1.1 mrg k.host_end = k.host_start + 1;
263 1.1 mrg gomp_mutex_lock (&acc_dev->lock);
264 1.1 mrg tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
265 1.1 mrg gomp_mutex_unlock (&acc_dev->lock);
266 1.1 mrg
267 1.1 mrg if (tgt_fn_key == NULL)
268 1.1 mrg gomp_fatal ("target function wasn't mapped");
269 1.1 mrg
270 1.1 mrg tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
271 1.1 mrg }
272 1.1 mrg else
273 1.1 mrg tgt_fn = (void (*)) fn;
274 1.1 mrg
275 1.1.1.8 mrg acc_event_info enter_exit_data_event_info;
276 1.1.1.8 mrg if (profiling_p)
277 1.1.1.8 mrg {
278 1.1.1.8 mrg prof_info.event_type = acc_ev_enter_data_start;
279 1.1.1.8 mrg enter_exit_data_event_info.other_event.event_type
280 1.1.1.8 mrg = prof_info.event_type;
281 1.1.1.8 mrg enter_exit_data_event_info.other_event.valid_bytes
282 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
283 1.1.1.8 mrg enter_exit_data_event_info.other_event.parent_construct
284 1.1.1.8 mrg = compute_construct_event_info.other_event.parent_construct;
285 1.1.1.8 mrg enter_exit_data_event_info.other_event.implicit = 1;
286 1.1.1.8 mrg enter_exit_data_event_info.other_event.tool_info = NULL;
287 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
288 1.1.1.8 mrg &api_info);
289 1.1.1.8 mrg }
290 1.1.1.8 mrg
291 1.1.1.8 mrg goacc_aq aq = get_goacc_asyncqueue (async);
292 1.1.1.8 mrg
293 1.1.1.9 mrg tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
294 1.1.1.9 mrg true, 0);
295 1.1.1.8 mrg if (profiling_p)
296 1.1.1.8 mrg {
297 1.1.1.8 mrg prof_info.event_type = acc_ev_enter_data_end;
298 1.1.1.8 mrg enter_exit_data_event_info.other_event.event_type
299 1.1.1.8 mrg = prof_info.event_type;
300 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
301 1.1.1.8 mrg &api_info);
302 1.1.1.8 mrg }
303 1.1.1.9 mrg
304 1.1 mrg devaddrs = gomp_alloca (sizeof (void *) * mapnum);
305 1.1 mrg for (i = 0; i < mapnum; i++)
306 1.1.1.8 mrg devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
307 1.1.1.8 mrg
308 1.1.1.8 mrg if (aq == NULL)
309 1.1.1.8 mrg acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
310 1.1.1.8 mrg tgt);
311 1.1.1.8 mrg else
312 1.1.1.8 mrg acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
313 1.1.1.8 mrg dims, tgt, aq);
314 1.1 mrg
315 1.1.1.8 mrg if (profiling_p)
316 1.1.1.8 mrg {
317 1.1.1.8 mrg prof_info.event_type = acc_ev_exit_data_start;
318 1.1.1.8 mrg enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
319 1.1.1.8 mrg enter_exit_data_event_info.other_event.tool_info = NULL;
320 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
321 1.1.1.8 mrg &api_info);
322 1.1.1.8 mrg }
323 1.1 mrg
324 1.1.1.9 mrg /* If running synchronously (aq == NULL), this will unmap immediately. */
325 1.1.1.9 mrg goacc_unmap_vars (tgt, true, aq);
326 1.1.1.8 mrg
327 1.1.1.8 mrg if (profiling_p)
328 1.1.1.7 mrg {
329 1.1.1.8 mrg prof_info.event_type = acc_ev_exit_data_end;
330 1.1.1.8 mrg enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
331 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
332 1.1.1.8 mrg &api_info);
333 1.1.1.7 mrg }
334 1.1 mrg
335 1.1.1.8 mrg out_prof:
336 1.1.1.8 mrg if (profiling_p)
337 1.1.1.8 mrg {
338 1.1.1.8 mrg prof_info.event_type = acc_ev_compute_construct_end;
339 1.1.1.8 mrg compute_construct_event_info.other_event.event_type
340 1.1.1.8 mrg = prof_info.event_type;
341 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
342 1.1.1.8 mrg &api_info);
343 1.1.1.8 mrg
344 1.1.1.8 mrg thr->prof_info = NULL;
345 1.1.1.8 mrg thr->api_info = NULL;
346 1.1.1.8 mrg }
347 1.1 mrg }
348 1.1 mrg
349 1.1.1.8 mrg /* Legacy entry point (GCC 5). Only provide host fallback execution. */
350 1.1.1.2 mrg
351 1.1.1.2 mrg void
352 1.1.1.7 mrg GOACC_parallel (int flags_m, void (*fn) (void *),
353 1.1.1.2 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
354 1.1.1.2 mrg unsigned short *kinds,
355 1.1.1.2 mrg int num_gangs, int num_workers, int vector_length,
356 1.1.1.2 mrg int async, int num_waits, ...)
357 1.1.1.2 mrg {
358 1.1.1.2 mrg goacc_save_and_set_bind (acc_device_host);
359 1.1.1.2 mrg fn (hostaddrs);
360 1.1.1.2 mrg goacc_restore_bind ();
361 1.1.1.2 mrg }
362 1.1.1.2 mrg
363 1.1 mrg void
364 1.1.1.7 mrg GOACC_data_start (int flags_m, size_t mapnum,
365 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds)
366 1.1 mrg {
367 1.1.1.7 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
368 1.1.1.7 mrg
369 1.1 mrg struct target_mem_desc *tgt;
370 1.1 mrg
371 1.1 mrg #ifdef HAVE_INTTYPES_H
372 1.1 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
373 1.1 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
374 1.1 mrg #else
375 1.1 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
376 1.1 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
377 1.1 mrg #endif
378 1.1 mrg
379 1.1 mrg goacc_lazy_initialize ();
380 1.1 mrg
381 1.1 mrg struct goacc_thread *thr = goacc_thread ();
382 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev;
383 1.1 mrg
384 1.1.1.8 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
385 1.1.1.8 mrg
386 1.1.1.8 mrg acc_prof_info prof_info;
387 1.1.1.8 mrg if (profiling_p)
388 1.1.1.8 mrg {
389 1.1.1.8 mrg thr->prof_info = &prof_info;
390 1.1.1.8 mrg
391 1.1.1.8 mrg prof_info.event_type = acc_ev_enter_data_start;
392 1.1.1.8 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
393 1.1.1.8 mrg prof_info.version = _ACC_PROF_INFO_VERSION;
394 1.1.1.8 mrg prof_info.device_type = acc_device_type (acc_dev->type);
395 1.1.1.8 mrg prof_info.device_number = acc_dev->target_id;
396 1.1.1.8 mrg prof_info.thread_id = -1;
397 1.1.1.8 mrg prof_info.async = acc_async_sync; /* Always synchronous. */
398 1.1.1.8 mrg prof_info.async_queue = prof_info.async;
399 1.1.1.8 mrg prof_info.src_file = NULL;
400 1.1.1.8 mrg prof_info.func_name = NULL;
401 1.1.1.8 mrg prof_info.line_no = -1;
402 1.1.1.8 mrg prof_info.end_line_no = -1;
403 1.1.1.8 mrg prof_info.func_line_no = -1;
404 1.1.1.8 mrg prof_info.func_end_line_no = -1;
405 1.1.1.8 mrg }
406 1.1.1.8 mrg acc_event_info enter_data_event_info;
407 1.1.1.8 mrg if (profiling_p)
408 1.1.1.8 mrg {
409 1.1.1.8 mrg enter_data_event_info.other_event.event_type
410 1.1.1.8 mrg = prof_info.event_type;
411 1.1.1.8 mrg enter_data_event_info.other_event.valid_bytes
412 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
413 1.1.1.8 mrg enter_data_event_info.other_event.parent_construct = acc_construct_data;
414 1.1.1.8 mrg for (int i = 0; i < mapnum; ++i)
415 1.1.1.8 mrg if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
416 1.1.1.8 mrg || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
417 1.1.1.8 mrg {
418 1.1.1.8 mrg /* If there is one such data mapping kind, then this is actually an
419 1.1.1.8 mrg OpenACC 'host_data' construct. (GCC maps the OpenACC
420 1.1.1.8 mrg 'host_data' construct to the OpenACC 'data' construct.) Apart
421 1.1.1.8 mrg from artificial test cases (such as an OpenACC 'host_data'
422 1.1.1.8 mrg construct's (implicit) device initialization when there hasn't
423 1.1.1.8 mrg been any device data be set up before...), there can't really
424 1.1.1.8 mrg any meaningful events be generated from OpenACC 'host_data'
425 1.1.1.8 mrg constructs, though. */
426 1.1.1.8 mrg enter_data_event_info.other_event.parent_construct
427 1.1.1.8 mrg = acc_construct_host_data;
428 1.1.1.8 mrg break;
429 1.1.1.8 mrg }
430 1.1.1.8 mrg enter_data_event_info.other_event.implicit = 0;
431 1.1.1.8 mrg enter_data_event_info.other_event.tool_info = NULL;
432 1.1.1.8 mrg }
433 1.1.1.8 mrg acc_api_info api_info;
434 1.1.1.8 mrg if (profiling_p)
435 1.1.1.8 mrg {
436 1.1.1.8 mrg thr->api_info = &api_info;
437 1.1.1.8 mrg
438 1.1.1.8 mrg api_info.device_api = acc_device_api_none;
439 1.1.1.8 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
440 1.1.1.8 mrg api_info.device_type = prof_info.device_type;
441 1.1.1.8 mrg api_info.vendor = -1;
442 1.1.1.8 mrg api_info.device_handle = NULL;
443 1.1.1.8 mrg api_info.context_handle = NULL;
444 1.1.1.8 mrg api_info.async_handle = NULL;
445 1.1.1.8 mrg }
446 1.1.1.8 mrg
447 1.1.1.8 mrg if (profiling_p)
448 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
449 1.1.1.8 mrg
450 1.1 mrg /* Host fallback or 'do nothing'. */
451 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
452 1.1.1.7 mrg || (flags & GOACC_FLAG_HOST_FALLBACK))
453 1.1 mrg {
454 1.1.1.8 mrg prof_info.device_type = acc_device_host;
455 1.1.1.8 mrg api_info.device_type = prof_info.device_type;
456 1.1.1.9 mrg tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0);
457 1.1 mrg tgt->prev = thr->mapped_data;
458 1.1 mrg thr->mapped_data = tgt;
459 1.1 mrg
460 1.1.1.8 mrg goto out_prof;
461 1.1 mrg }
462 1.1 mrg
463 1.1 mrg gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
464 1.1.1.9 mrg tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds,
465 1.1.1.9 mrg true, 0);
466 1.1 mrg gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
467 1.1 mrg tgt->prev = thr->mapped_data;
468 1.1 mrg thr->mapped_data = tgt;
469 1.1.1.8 mrg
470 1.1.1.8 mrg out_prof:
471 1.1.1.8 mrg if (profiling_p)
472 1.1.1.8 mrg {
473 1.1.1.8 mrg prof_info.event_type = acc_ev_enter_data_end;
474 1.1.1.8 mrg enter_data_event_info.other_event.event_type = prof_info.event_type;
475 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
476 1.1.1.8 mrg
477 1.1.1.8 mrg thr->prof_info = NULL;
478 1.1.1.8 mrg thr->api_info = NULL;
479 1.1.1.8 mrg }
480 1.1 mrg }
481 1.1 mrg
482 1.1 mrg void
483 1.1 mrg GOACC_data_end (void)
484 1.1 mrg {
485 1.1 mrg struct goacc_thread *thr = goacc_thread ();
486 1.1.1.8 mrg struct gomp_device_descr *acc_dev = thr->dev;
487 1.1 mrg struct target_mem_desc *tgt = thr->mapped_data;
488 1.1 mrg
489 1.1.1.8 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
490 1.1 mrg
491 1.1.1.8 mrg acc_prof_info prof_info;
492 1.1.1.8 mrg if (profiling_p)
493 1.1 mrg {
494 1.1.1.8 mrg thr->prof_info = &prof_info;
495 1.1 mrg
496 1.1.1.8 mrg prof_info.event_type = acc_ev_exit_data_start;
497 1.1.1.8 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
498 1.1.1.8 mrg prof_info.version = _ACC_PROF_INFO_VERSION;
499 1.1.1.8 mrg prof_info.device_type = acc_device_type (acc_dev->type);
500 1.1.1.8 mrg prof_info.device_number = acc_dev->target_id;
501 1.1.1.8 mrg prof_info.thread_id = -1;
502 1.1.1.8 mrg prof_info.async = acc_async_sync; /* Always synchronous. */
503 1.1.1.8 mrg prof_info.async_queue = prof_info.async;
504 1.1.1.8 mrg prof_info.src_file = NULL;
505 1.1.1.8 mrg prof_info.func_name = NULL;
506 1.1.1.8 mrg prof_info.line_no = -1;
507 1.1.1.8 mrg prof_info.end_line_no = -1;
508 1.1.1.8 mrg prof_info.func_line_no = -1;
509 1.1.1.8 mrg prof_info.func_end_line_no = -1;
510 1.1.1.8 mrg }
511 1.1.1.8 mrg acc_event_info exit_data_event_info;
512 1.1.1.8 mrg if (profiling_p)
513 1.1.1.8 mrg {
514 1.1.1.8 mrg exit_data_event_info.other_event.event_type
515 1.1.1.8 mrg = prof_info.event_type;
516 1.1.1.8 mrg exit_data_event_info.other_event.valid_bytes
517 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
518 1.1.1.8 mrg exit_data_event_info.other_event.parent_construct = acc_construct_data;
519 1.1.1.8 mrg exit_data_event_info.other_event.implicit = 0;
520 1.1.1.8 mrg exit_data_event_info.other_event.tool_info = NULL;
521 1.1.1.8 mrg }
522 1.1.1.8 mrg acc_api_info api_info;
523 1.1.1.8 mrg if (profiling_p)
524 1.1.1.8 mrg {
525 1.1.1.8 mrg thr->api_info = &api_info;
526 1.1.1.8 mrg
527 1.1.1.8 mrg api_info.device_api = acc_device_api_none;
528 1.1.1.8 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
529 1.1.1.8 mrg api_info.device_type = prof_info.device_type;
530 1.1.1.8 mrg api_info.vendor = -1;
531 1.1.1.8 mrg api_info.device_handle = NULL;
532 1.1.1.8 mrg api_info.context_handle = NULL;
533 1.1.1.8 mrg api_info.async_handle = NULL;
534 1.1 mrg }
535 1.1 mrg
536 1.1.1.8 mrg if (profiling_p)
537 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
538 1.1.1.7 mrg
539 1.1.1.8 mrg gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
540 1.1.1.8 mrg thr->mapped_data = tgt->prev;
541 1.1.1.9 mrg goacc_unmap_vars (tgt, true, NULL);
542 1.1.1.8 mrg gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
543 1.1 mrg
544 1.1.1.8 mrg if (profiling_p)
545 1.1 mrg {
546 1.1.1.8 mrg prof_info.event_type = acc_ev_exit_data_end;
547 1.1.1.8 mrg exit_data_event_info.other_event.event_type = prof_info.event_type;
548 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
549 1.1 mrg
550 1.1.1.8 mrg thr->prof_info = NULL;
551 1.1.1.8 mrg thr->api_info = NULL;
552 1.1 mrg }
553 1.1 mrg }
554 1.1 mrg
555 1.1 mrg void
556 1.1.1.7 mrg GOACC_update (int flags_m, size_t mapnum,
557 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
558 1.1 mrg int async, int num_waits, ...)
559 1.1 mrg {
560 1.1.1.7 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
561 1.1.1.7 mrg
562 1.1 mrg size_t i;
563 1.1 mrg
564 1.1 mrg goacc_lazy_initialize ();
565 1.1 mrg
566 1.1 mrg struct goacc_thread *thr = goacc_thread ();
567 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev;
568 1.1 mrg
569 1.1.1.8 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
570 1.1.1.8 mrg
571 1.1.1.8 mrg acc_prof_info prof_info;
572 1.1.1.8 mrg if (profiling_p)
573 1.1.1.8 mrg {
574 1.1.1.8 mrg thr->prof_info = &prof_info;
575 1.1.1.8 mrg
576 1.1.1.8 mrg prof_info.event_type = acc_ev_update_start;
577 1.1.1.8 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
578 1.1.1.8 mrg prof_info.version = _ACC_PROF_INFO_VERSION;
579 1.1.1.8 mrg prof_info.device_type = acc_device_type (acc_dev->type);
580 1.1.1.8 mrg prof_info.device_number = acc_dev->target_id;
581 1.1.1.8 mrg prof_info.thread_id = -1;
582 1.1.1.8 mrg prof_info.async = async;
583 1.1.1.8 mrg prof_info.async_queue = prof_info.async;
584 1.1.1.8 mrg prof_info.src_file = NULL;
585 1.1.1.8 mrg prof_info.func_name = NULL;
586 1.1.1.8 mrg prof_info.line_no = -1;
587 1.1.1.8 mrg prof_info.end_line_no = -1;
588 1.1.1.8 mrg prof_info.func_line_no = -1;
589 1.1.1.8 mrg prof_info.func_end_line_no = -1;
590 1.1.1.8 mrg }
591 1.1.1.8 mrg acc_event_info update_event_info;
592 1.1.1.8 mrg if (profiling_p)
593 1.1.1.8 mrg {
594 1.1.1.8 mrg update_event_info.other_event.event_type
595 1.1.1.8 mrg = prof_info.event_type;
596 1.1.1.8 mrg update_event_info.other_event.valid_bytes
597 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
598 1.1.1.8 mrg update_event_info.other_event.parent_construct = acc_construct_update;
599 1.1.1.8 mrg update_event_info.other_event.implicit = 0;
600 1.1.1.8 mrg update_event_info.other_event.tool_info = NULL;
601 1.1.1.8 mrg }
602 1.1.1.8 mrg acc_api_info api_info;
603 1.1.1.8 mrg if (profiling_p)
604 1.1.1.8 mrg {
605 1.1.1.8 mrg thr->api_info = &api_info;
606 1.1.1.8 mrg
607 1.1.1.8 mrg api_info.device_api = acc_device_api_none;
608 1.1.1.8 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
609 1.1.1.8 mrg api_info.device_type = prof_info.device_type;
610 1.1.1.8 mrg api_info.vendor = -1;
611 1.1.1.8 mrg api_info.device_handle = NULL;
612 1.1.1.8 mrg api_info.context_handle = NULL;
613 1.1.1.8 mrg api_info.async_handle = NULL;
614 1.1.1.8 mrg }
615 1.1.1.8 mrg
616 1.1.1.8 mrg if (profiling_p)
617 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
618 1.1.1.8 mrg
619 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
620 1.1.1.7 mrg || (flags & GOACC_FLAG_HOST_FALLBACK))
621 1.1.1.8 mrg {
622 1.1.1.8 mrg prof_info.device_type = acc_device_host;
623 1.1.1.8 mrg api_info.device_type = prof_info.device_type;
624 1.1.1.8 mrg
625 1.1.1.8 mrg goto out_prof;
626 1.1.1.8 mrg }
627 1.1 mrg
628 1.1.1.2 mrg if (num_waits)
629 1.1 mrg {
630 1.1 mrg va_list ap;
631 1.1 mrg
632 1.1 mrg va_start (ap, num_waits);
633 1.1.1.2 mrg goacc_wait (async, num_waits, &ap);
634 1.1 mrg va_end (ap);
635 1.1 mrg }
636 1.1 mrg
637 1.1.1.7 mrg bool update_device = false;
638 1.1 mrg for (i = 0; i < mapnum; ++i)
639 1.1 mrg {
640 1.1 mrg unsigned char kind = kinds[i] & 0xff;
641 1.1 mrg
642 1.1 mrg switch (kind)
643 1.1 mrg {
644 1.1 mrg case GOMP_MAP_POINTER:
645 1.1 mrg case GOMP_MAP_TO_PSET:
646 1.1 mrg break;
647 1.1 mrg
648 1.1.1.7 mrg case GOMP_MAP_ALWAYS_POINTER:
649 1.1.1.7 mrg if (update_device)
650 1.1.1.7 mrg {
651 1.1.1.7 mrg /* Save the contents of the host pointer. */
652 1.1.1.7 mrg void *dptr = acc_deviceptr (hostaddrs[i-1]);
653 1.1.1.7 mrg uintptr_t t = *(uintptr_t *) hostaddrs[i];
654 1.1.1.7 mrg
655 1.1.1.7 mrg /* Update the contents of the host pointer to reflect
656 1.1.1.7 mrg the value of the allocated device memory in the
657 1.1.1.7 mrg previous pointer. */
658 1.1.1.7 mrg *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
659 1.1.1.8 mrg /* TODO: verify that we really cannot use acc_update_device_async
660 1.1.1.8 mrg here. */
661 1.1.1.7 mrg acc_update_device (hostaddrs[i], sizeof (uintptr_t));
662 1.1.1.7 mrg
663 1.1.1.7 mrg /* Restore the host pointer. */
664 1.1.1.7 mrg *(uintptr_t *) hostaddrs[i] = t;
665 1.1.1.7 mrg update_device = false;
666 1.1.1.7 mrg }
667 1.1.1.7 mrg break;
668 1.1.1.7 mrg
669 1.1.1.7 mrg case GOMP_MAP_TO:
670 1.1.1.7 mrg if (!acc_is_present (hostaddrs[i], sizes[i]))
671 1.1.1.7 mrg {
672 1.1.1.7 mrg update_device = false;
673 1.1.1.7 mrg break;
674 1.1.1.7 mrg }
675 1.1.1.7 mrg /* Fallthru */
676 1.1 mrg case GOMP_MAP_FORCE_TO:
677 1.1.1.7 mrg update_device = true;
678 1.1.1.8 mrg acc_update_device_async (hostaddrs[i], sizes[i], async);
679 1.1 mrg break;
680 1.1 mrg
681 1.1.1.7 mrg case GOMP_MAP_FROM:
682 1.1.1.7 mrg if (!acc_is_present (hostaddrs[i], sizes[i]))
683 1.1.1.7 mrg {
684 1.1.1.7 mrg update_device = false;
685 1.1.1.7 mrg break;
686 1.1.1.7 mrg }
687 1.1.1.7 mrg /* Fallthru */
688 1.1 mrg case GOMP_MAP_FORCE_FROM:
689 1.1.1.7 mrg update_device = false;
690 1.1.1.8 mrg acc_update_self_async (hostaddrs[i], sizes[i], async);
691 1.1 mrg break;
692 1.1 mrg
693 1.1 mrg default:
694 1.1 mrg gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
695 1.1 mrg break;
696 1.1 mrg }
697 1.1 mrg }
698 1.1 mrg
699 1.1.1.8 mrg out_prof:
700 1.1.1.8 mrg if (profiling_p)
701 1.1.1.2 mrg {
702 1.1.1.8 mrg prof_info.event_type = acc_ev_update_end;
703 1.1.1.8 mrg update_event_info.other_event.event_type = prof_info.event_type;
704 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
705 1.1 mrg
706 1.1.1.8 mrg thr->prof_info = NULL;
707 1.1.1.8 mrg thr->api_info = NULL;
708 1.1.1.2 mrg }
709 1.1 mrg }
710 1.1 mrg
711 1.1.1.8 mrg
712 1.1.1.8 mrg /* Legacy entry point (GCC 5). */
713 1.1.1.8 mrg
714 1.1 mrg int
715 1.1 mrg GOACC_get_num_threads (void)
716 1.1 mrg {
717 1.1 mrg return 1;
718 1.1 mrg }
719 1.1 mrg
720 1.1.1.8 mrg /* Legacy entry point (GCC 5). */
721 1.1.1.8 mrg
722 1.1 mrg int
723 1.1 mrg GOACC_get_thread_num (void)
724 1.1 mrg {
725 1.1 mrg return 0;
726 1.1 mrg }
727