oacc-parallel.c revision 1.6 1 1.6 mrg /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg Contributed by Mentor Embedded.
4 1.1 mrg
5 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
6 1.1 mrg (libgomp).
7 1.1 mrg
8 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
9 1.1 mrg under the terms of the GNU General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
11 1.1 mrg any later version.
12 1.1 mrg
13 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 1.1 mrg more details.
17 1.1 mrg
18 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
19 1.1 mrg permissions described in the GCC Runtime Library Exception, version
20 1.1 mrg 3.1, as published by the Free Software Foundation.
21 1.1 mrg
22 1.1 mrg You should have received a copy of the GNU General Public License and
23 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
24 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 1.1 mrg <http://www.gnu.org/licenses/>. */
26 1.1 mrg
27 1.1 mrg /* This file handles OpenACC constructs. */
28 1.1 mrg
29 1.1 mrg #include "openacc.h"
30 1.1 mrg #include "libgomp.h"
31 1.1 mrg #include "gomp-constants.h"
32 1.1 mrg #include "oacc-int.h"
33 1.1 mrg #ifdef HAVE_INTTYPES_H
34 1.1 mrg # include <inttypes.h> /* For PRIu64. */
35 1.1 mrg #endif
36 1.1 mrg #include <string.h>
37 1.1 mrg #include <stdarg.h>
38 1.1 mrg #include <assert.h>
39 1.1 mrg
40 1.5 mrg
41 1.5 mrg /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
42 1.5 mrg continue to support the following two legacy values. */
43 1.5 mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
44 1.5 mrg "legacy GOMP_DEVICE_ICV broken");
45 1.5 mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
46 1.5 mrg == GOACC_FLAG_HOST_FALLBACK,
47 1.5 mrg "legacy GOMP_DEVICE_HOST_FALLBACK broken");
48 1.5 mrg
49 1.5 mrg
50 1.5 mrg /* Handle the mapping pair that are presented when a
51 1.5 mrg deviceptr clause is used with Fortran. */
52 1.5 mrg
53 1.5 mrg static void
54 1.5 mrg handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
55 1.5 mrg unsigned short *kinds)
56 1.5 mrg {
57 1.5 mrg int i;
58 1.5 mrg
59 1.5 mrg for (i = 0; i < mapnum; i++)
60 1.5 mrg {
61 1.5 mrg unsigned short kind1 = kinds[i] & 0xff;
62 1.5 mrg
63 1.5 mrg /* Handle Fortran deviceptr clause. */
64 1.5 mrg if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
65 1.5 mrg {
66 1.5 mrg unsigned short kind2;
67 1.5 mrg
68 1.5 mrg if (i < (signed)mapnum - 1)
69 1.5 mrg kind2 = kinds[i + 1] & 0xff;
70 1.5 mrg else
71 1.5 mrg kind2 = 0xffff;
72 1.5 mrg
73 1.5 mrg if (sizes[i] == sizeof (void *))
74 1.5 mrg continue;
75 1.5 mrg
76 1.5 mrg /* At this point, we're dealing with a Fortran deviceptr.
77 1.5 mrg If the next element is not what we're expecting, then
78 1.5 mrg this is an instance of where the deviceptr variable was
79 1.5 mrg not used within the region and the pointer was removed
80 1.5 mrg by the gimplifier. */
81 1.5 mrg if (kind2 == GOMP_MAP_POINTER
82 1.5 mrg && sizes[i + 1] == 0
83 1.5 mrg && hostaddrs[i] == *(void **)hostaddrs[i + 1])
84 1.5 mrg {
85 1.5 mrg kinds[i+1] = kinds[i];
86 1.5 mrg sizes[i+1] = sizeof (void *);
87 1.5 mrg }
88 1.5 mrg
89 1.5 mrg /* Invalidate the entry. */
90 1.5 mrg hostaddrs[i] = NULL;
91 1.5 mrg }
92 1.5 mrg }
93 1.1 mrg }
94 1.1 mrg
95 1.3 mrg
96 1.5 mrg /* Launch a possibly offloaded function with FLAGS. FN is the host fn
97 1.3 mrg address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
98 1.3 mrg blocks to be copied to/from the device. Varadic arguments are
99 1.3 mrg keyed optional parameters terminated with a zero. */
100 1.1 mrg
101 1.1 mrg void
102 1.5 mrg GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
103 1.3 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
104 1.3 mrg unsigned short *kinds, ...)
105 1.1 mrg {
106 1.5 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
107 1.5 mrg
108 1.1 mrg va_list ap;
109 1.1 mrg struct goacc_thread *thr;
110 1.1 mrg struct gomp_device_descr *acc_dev;
111 1.1 mrg struct target_mem_desc *tgt;
112 1.1 mrg void **devaddrs;
113 1.1 mrg unsigned int i;
114 1.1 mrg struct splay_tree_key_s k;
115 1.1 mrg splay_tree_key tgt_fn_key;
116 1.1 mrg void (*tgt_fn);
117 1.3 mrg int async = GOMP_ASYNC_SYNC;
118 1.3 mrg unsigned dims[GOMP_DIM_MAX];
119 1.3 mrg unsigned tag;
120 1.1 mrg
121 1.1 mrg #ifdef HAVE_INTTYPES_H
122 1.3 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
123 1.3 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
124 1.1 mrg #else
125 1.3 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
126 1.3 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
127 1.1 mrg #endif
128 1.1 mrg goacc_lazy_initialize ();
129 1.1 mrg
130 1.1 mrg thr = goacc_thread ();
131 1.1 mrg acc_dev = thr->dev;
132 1.1 mrg
133 1.6 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
134 1.6 mrg
135 1.6 mrg acc_prof_info prof_info;
136 1.6 mrg if (profiling_p)
137 1.6 mrg {
138 1.6 mrg thr->prof_info = &prof_info;
139 1.6 mrg
140 1.6 mrg prof_info.event_type = acc_ev_compute_construct_start;
141 1.6 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
142 1.6 mrg prof_info.version = _ACC_PROF_INFO_VERSION;
143 1.6 mrg prof_info.device_type = acc_device_type (acc_dev->type);
144 1.6 mrg prof_info.device_number = acc_dev->target_id;
145 1.6 mrg prof_info.thread_id = -1;
146 1.6 mrg prof_info.async = async;
147 1.6 mrg prof_info.async_queue = prof_info.async;
148 1.6 mrg prof_info.src_file = NULL;
149 1.6 mrg prof_info.func_name = NULL;
150 1.6 mrg prof_info.line_no = -1;
151 1.6 mrg prof_info.end_line_no = -1;
152 1.6 mrg prof_info.func_line_no = -1;
153 1.6 mrg prof_info.func_end_line_no = -1;
154 1.6 mrg }
155 1.6 mrg acc_event_info compute_construct_event_info;
156 1.6 mrg if (profiling_p)
157 1.6 mrg {
158 1.6 mrg compute_construct_event_info.other_event.event_type
159 1.6 mrg = prof_info.event_type;
160 1.6 mrg compute_construct_event_info.other_event.valid_bytes
161 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
162 1.6 mrg compute_construct_event_info.other_event.parent_construct
163 1.6 mrg = acc_construct_parallel;
164 1.6 mrg compute_construct_event_info.other_event.implicit = 0;
165 1.6 mrg compute_construct_event_info.other_event.tool_info = NULL;
166 1.6 mrg }
167 1.6 mrg acc_api_info api_info;
168 1.6 mrg if (profiling_p)
169 1.6 mrg {
170 1.6 mrg thr->api_info = &api_info;
171 1.6 mrg
172 1.6 mrg api_info.device_api = acc_device_api_none;
173 1.6 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
174 1.6 mrg api_info.device_type = prof_info.device_type;
175 1.6 mrg api_info.vendor = -1;
176 1.6 mrg api_info.device_handle = NULL;
177 1.6 mrg api_info.context_handle = NULL;
178 1.6 mrg api_info.async_handle = NULL;
179 1.6 mrg }
180 1.6 mrg
181 1.6 mrg if (profiling_p)
182 1.6 mrg goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
183 1.6 mrg &api_info);
184 1.6 mrg
185 1.5 mrg handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
186 1.5 mrg
187 1.1 mrg /* Host fallback if "if" clause is false or if the current device is set to
188 1.1 mrg the host. */
189 1.5 mrg if (flags & GOACC_FLAG_HOST_FALLBACK)
190 1.1 mrg {
191 1.6 mrg prof_info.device_type = acc_device_host;
192 1.6 mrg api_info.device_type = prof_info.device_type;
193 1.1 mrg goacc_save_and_set_bind (acc_device_host);
194 1.1 mrg fn (hostaddrs);
195 1.1 mrg goacc_restore_bind ();
196 1.6 mrg goto out_prof;
197 1.1 mrg }
198 1.1 mrg else if (acc_device_type (acc_dev->type) == acc_device_host)
199 1.1 mrg {
200 1.1 mrg fn (hostaddrs);
201 1.6 mrg goto out_prof;
202 1.1 mrg }
203 1.1 mrg
204 1.3 mrg /* Default: let the runtime choose. */
205 1.3 mrg for (i = 0; i != GOMP_DIM_MAX; i++)
206 1.3 mrg dims[i] = 0;
207 1.3 mrg
208 1.3 mrg va_start (ap, kinds);
209 1.3 mrg /* TODO: This will need amending when device_type is implemented. */
210 1.3 mrg while ((tag = va_arg (ap, unsigned)) != 0)
211 1.3 mrg {
212 1.3 mrg if (GOMP_LAUNCH_DEVICE (tag))
213 1.3 mrg gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
214 1.3 mrg GOMP_LAUNCH_DEVICE (tag));
215 1.3 mrg
216 1.3 mrg switch (GOMP_LAUNCH_CODE (tag))
217 1.3 mrg {
218 1.3 mrg case GOMP_LAUNCH_DIM:
219 1.3 mrg {
220 1.3 mrg unsigned mask = GOMP_LAUNCH_OP (tag);
221 1.3 mrg
222 1.3 mrg for (i = 0; i != GOMP_DIM_MAX; i++)
223 1.3 mrg if (mask & GOMP_DIM_MASK (i))
224 1.3 mrg dims[i] = va_arg (ap, unsigned);
225 1.3 mrg }
226 1.3 mrg break;
227 1.3 mrg
228 1.3 mrg case GOMP_LAUNCH_ASYNC:
229 1.3 mrg {
230 1.3 mrg /* Small constant values are encoded in the operand. */
231 1.3 mrg async = GOMP_LAUNCH_OP (tag);
232 1.3 mrg
233 1.3 mrg if (async == GOMP_LAUNCH_OP_MAX)
234 1.3 mrg async = va_arg (ap, unsigned);
235 1.6 mrg
236 1.6 mrg if (profiling_p)
237 1.6 mrg {
238 1.6 mrg prof_info.async = async;
239 1.6 mrg prof_info.async_queue = prof_info.async;
240 1.6 mrg }
241 1.6 mrg
242 1.3 mrg break;
243 1.3 mrg }
244 1.3 mrg
245 1.3 mrg case GOMP_LAUNCH_WAIT:
246 1.3 mrg {
247 1.3 mrg unsigned num_waits = GOMP_LAUNCH_OP (tag);
248 1.5 mrg goacc_wait (async, num_waits, &ap);
249 1.3 mrg break;
250 1.3 mrg }
251 1.1 mrg
252 1.3 mrg default:
253 1.3 mrg gomp_fatal ("unrecognized offload code '%d',"
254 1.3 mrg " libgomp is too old", GOMP_LAUNCH_CODE (tag));
255 1.3 mrg }
256 1.3 mrg }
257 1.1 mrg va_end (ap);
258 1.3 mrg
259 1.1 mrg if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
260 1.1 mrg {
261 1.1 mrg k.host_start = (uintptr_t) fn;
262 1.1 mrg k.host_end = k.host_start + 1;
263 1.1 mrg gomp_mutex_lock (&acc_dev->lock);
264 1.1 mrg tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
265 1.1 mrg gomp_mutex_unlock (&acc_dev->lock);
266 1.1 mrg
267 1.1 mrg if (tgt_fn_key == NULL)
268 1.1 mrg gomp_fatal ("target function wasn't mapped");
269 1.1 mrg
270 1.1 mrg tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
271 1.1 mrg }
272 1.1 mrg else
273 1.1 mrg tgt_fn = (void (*)) fn;
274 1.1 mrg
275 1.6 mrg acc_event_info enter_exit_data_event_info;
276 1.6 mrg if (profiling_p)
277 1.6 mrg {
278 1.6 mrg prof_info.event_type = acc_ev_enter_data_start;
279 1.6 mrg enter_exit_data_event_info.other_event.event_type
280 1.6 mrg = prof_info.event_type;
281 1.6 mrg enter_exit_data_event_info.other_event.valid_bytes
282 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
283 1.6 mrg enter_exit_data_event_info.other_event.parent_construct
284 1.6 mrg = compute_construct_event_info.other_event.parent_construct;
285 1.6 mrg enter_exit_data_event_info.other_event.implicit = 1;
286 1.6 mrg enter_exit_data_event_info.other_event.tool_info = NULL;
287 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
288 1.6 mrg &api_info);
289 1.6 mrg }
290 1.6 mrg
291 1.6 mrg goacc_aq aq = get_goacc_asyncqueue (async);
292 1.6 mrg
293 1.6 mrg tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
294 1.6 mrg true, GOMP_MAP_VARS_OPENACC);
295 1.6 mrg if (profiling_p)
296 1.6 mrg {
297 1.6 mrg prof_info.event_type = acc_ev_enter_data_end;
298 1.6 mrg enter_exit_data_event_info.other_event.event_type
299 1.6 mrg = prof_info.event_type;
300 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
301 1.6 mrg &api_info);
302 1.6 mrg }
303 1.6 mrg
304 1.1 mrg devaddrs = gomp_alloca (sizeof (void *) * mapnum);
305 1.1 mrg for (i = 0; i < mapnum; i++)
306 1.6 mrg devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
307 1.6 mrg
308 1.6 mrg if (aq == NULL)
309 1.6 mrg acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
310 1.6 mrg tgt);
311 1.6 mrg else
312 1.6 mrg acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
313 1.6 mrg dims, tgt, aq);
314 1.1 mrg
315 1.6 mrg if (profiling_p)
316 1.6 mrg {
317 1.6 mrg prof_info.event_type = acc_ev_exit_data_start;
318 1.6 mrg enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
319 1.6 mrg enter_exit_data_event_info.other_event.tool_info = NULL;
320 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
321 1.6 mrg &api_info);
322 1.6 mrg }
323 1.1 mrg
324 1.1 mrg /* If running synchronously, unmap immediately. */
325 1.6 mrg if (aq == NULL)
326 1.1 mrg gomp_unmap_vars (tgt, true);
327 1.1 mrg else
328 1.6 mrg gomp_unmap_vars_async (tgt, true, aq);
329 1.6 mrg
330 1.6 mrg if (profiling_p)
331 1.5 mrg {
332 1.6 mrg prof_info.event_type = acc_ev_exit_data_end;
333 1.6 mrg enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
334 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
335 1.6 mrg &api_info);
336 1.5 mrg }
337 1.1 mrg
338 1.6 mrg out_prof:
339 1.6 mrg if (profiling_p)
340 1.6 mrg {
341 1.6 mrg prof_info.event_type = acc_ev_compute_construct_end;
342 1.6 mrg compute_construct_event_info.other_event.event_type
343 1.6 mrg = prof_info.event_type;
344 1.6 mrg goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
345 1.6 mrg &api_info);
346 1.6 mrg
347 1.6 mrg thr->prof_info = NULL;
348 1.6 mrg thr->api_info = NULL;
349 1.6 mrg }
350 1.1 mrg }
351 1.1 mrg
352 1.6 mrg /* Legacy entry point (GCC 5). Only provide host fallback execution. */
353 1.3 mrg
354 1.3 mrg void
355 1.5 mrg GOACC_parallel (int flags_m, void (*fn) (void *),
356 1.3 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
357 1.3 mrg unsigned short *kinds,
358 1.3 mrg int num_gangs, int num_workers, int vector_length,
359 1.3 mrg int async, int num_waits, ...)
360 1.3 mrg {
361 1.3 mrg goacc_save_and_set_bind (acc_device_host);
362 1.3 mrg fn (hostaddrs);
363 1.3 mrg goacc_restore_bind ();
364 1.3 mrg }
365 1.3 mrg
366 1.1 mrg void
367 1.5 mrg GOACC_data_start (int flags_m, size_t mapnum,
368 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds)
369 1.1 mrg {
370 1.5 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
371 1.5 mrg
372 1.1 mrg struct target_mem_desc *tgt;
373 1.1 mrg
374 1.1 mrg #ifdef HAVE_INTTYPES_H
375 1.1 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
376 1.1 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
377 1.1 mrg #else
378 1.1 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
379 1.1 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
380 1.1 mrg #endif
381 1.1 mrg
382 1.1 mrg goacc_lazy_initialize ();
383 1.1 mrg
384 1.1 mrg struct goacc_thread *thr = goacc_thread ();
385 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev;
386 1.1 mrg
387 1.6 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
388 1.6 mrg
389 1.6 mrg acc_prof_info prof_info;
390 1.6 mrg if (profiling_p)
391 1.6 mrg {
392 1.6 mrg thr->prof_info = &prof_info;
393 1.6 mrg
394 1.6 mrg prof_info.event_type = acc_ev_enter_data_start;
395 1.6 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
396 1.6 mrg prof_info.version = _ACC_PROF_INFO_VERSION;
397 1.6 mrg prof_info.device_type = acc_device_type (acc_dev->type);
398 1.6 mrg prof_info.device_number = acc_dev->target_id;
399 1.6 mrg prof_info.thread_id = -1;
400 1.6 mrg prof_info.async = acc_async_sync; /* Always synchronous. */
401 1.6 mrg prof_info.async_queue = prof_info.async;
402 1.6 mrg prof_info.src_file = NULL;
403 1.6 mrg prof_info.func_name = NULL;
404 1.6 mrg prof_info.line_no = -1;
405 1.6 mrg prof_info.end_line_no = -1;
406 1.6 mrg prof_info.func_line_no = -1;
407 1.6 mrg prof_info.func_end_line_no = -1;
408 1.6 mrg }
409 1.6 mrg acc_event_info enter_data_event_info;
410 1.6 mrg if (profiling_p)
411 1.6 mrg {
412 1.6 mrg enter_data_event_info.other_event.event_type
413 1.6 mrg = prof_info.event_type;
414 1.6 mrg enter_data_event_info.other_event.valid_bytes
415 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
416 1.6 mrg enter_data_event_info.other_event.parent_construct = acc_construct_data;
417 1.6 mrg for (int i = 0; i < mapnum; ++i)
418 1.6 mrg if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
419 1.6 mrg || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
420 1.6 mrg {
421 1.6 mrg /* If there is one such data mapping kind, then this is actually an
422 1.6 mrg OpenACC 'host_data' construct. (GCC maps the OpenACC
423 1.6 mrg 'host_data' construct to the OpenACC 'data' construct.) Apart
424 1.6 mrg from artificial test cases (such as an OpenACC 'host_data'
425 1.6 mrg construct's (implicit) device initialization when there hasn't
426 1.6 mrg been any device data be set up before...), there can't really
427 1.6 mrg any meaningful events be generated from OpenACC 'host_data'
428 1.6 mrg constructs, though. */
429 1.6 mrg enter_data_event_info.other_event.parent_construct
430 1.6 mrg = acc_construct_host_data;
431 1.6 mrg break;
432 1.6 mrg }
433 1.6 mrg enter_data_event_info.other_event.implicit = 0;
434 1.6 mrg enter_data_event_info.other_event.tool_info = NULL;
435 1.6 mrg }
436 1.6 mrg acc_api_info api_info;
437 1.6 mrg if (profiling_p)
438 1.6 mrg {
439 1.6 mrg thr->api_info = &api_info;
440 1.6 mrg
441 1.6 mrg api_info.device_api = acc_device_api_none;
442 1.6 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
443 1.6 mrg api_info.device_type = prof_info.device_type;
444 1.6 mrg api_info.vendor = -1;
445 1.6 mrg api_info.device_handle = NULL;
446 1.6 mrg api_info.context_handle = NULL;
447 1.6 mrg api_info.async_handle = NULL;
448 1.6 mrg }
449 1.6 mrg
450 1.6 mrg if (profiling_p)
451 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
452 1.6 mrg
453 1.1 mrg /* Host fallback or 'do nothing'. */
454 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
455 1.5 mrg || (flags & GOACC_FLAG_HOST_FALLBACK))
456 1.1 mrg {
457 1.6 mrg prof_info.device_type = acc_device_host;
458 1.6 mrg api_info.device_type = prof_info.device_type;
459 1.3 mrg tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
460 1.3 mrg GOMP_MAP_VARS_OPENACC);
461 1.1 mrg tgt->prev = thr->mapped_data;
462 1.1 mrg thr->mapped_data = tgt;
463 1.1 mrg
464 1.6 mrg goto out_prof;
465 1.1 mrg }
466 1.1 mrg
467 1.1 mrg gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
468 1.1 mrg tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
469 1.3 mrg GOMP_MAP_VARS_OPENACC);
470 1.1 mrg gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
471 1.1 mrg tgt->prev = thr->mapped_data;
472 1.1 mrg thr->mapped_data = tgt;
473 1.6 mrg
474 1.6 mrg out_prof:
475 1.6 mrg if (profiling_p)
476 1.6 mrg {
477 1.6 mrg prof_info.event_type = acc_ev_enter_data_end;
478 1.6 mrg enter_data_event_info.other_event.event_type = prof_info.event_type;
479 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
480 1.6 mrg
481 1.6 mrg thr->prof_info = NULL;
482 1.6 mrg thr->api_info = NULL;
483 1.6 mrg }
484 1.1 mrg }
485 1.1 mrg
486 1.1 mrg void
487 1.1 mrg GOACC_data_end (void)
488 1.1 mrg {
489 1.1 mrg struct goacc_thread *thr = goacc_thread ();
490 1.6 mrg struct gomp_device_descr *acc_dev = thr->dev;
491 1.1 mrg struct target_mem_desc *tgt = thr->mapped_data;
492 1.1 mrg
493 1.6 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
494 1.1 mrg
495 1.6 mrg acc_prof_info prof_info;
496 1.6 mrg if (profiling_p)
497 1.1 mrg {
498 1.6 mrg thr->prof_info = &prof_info;
499 1.1 mrg
500 1.6 mrg prof_info.event_type = acc_ev_exit_data_start;
501 1.6 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
502 1.6 mrg prof_info.version = _ACC_PROF_INFO_VERSION;
503 1.6 mrg prof_info.device_type = acc_device_type (acc_dev->type);
504 1.6 mrg prof_info.device_number = acc_dev->target_id;
505 1.6 mrg prof_info.thread_id = -1;
506 1.6 mrg prof_info.async = acc_async_sync; /* Always synchronous. */
507 1.6 mrg prof_info.async_queue = prof_info.async;
508 1.6 mrg prof_info.src_file = NULL;
509 1.6 mrg prof_info.func_name = NULL;
510 1.6 mrg prof_info.line_no = -1;
511 1.6 mrg prof_info.end_line_no = -1;
512 1.6 mrg prof_info.func_line_no = -1;
513 1.6 mrg prof_info.func_end_line_no = -1;
514 1.6 mrg }
515 1.6 mrg acc_event_info exit_data_event_info;
516 1.6 mrg if (profiling_p)
517 1.6 mrg {
518 1.6 mrg exit_data_event_info.other_event.event_type
519 1.6 mrg = prof_info.event_type;
520 1.6 mrg exit_data_event_info.other_event.valid_bytes
521 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
522 1.6 mrg exit_data_event_info.other_event.parent_construct = acc_construct_data;
523 1.6 mrg exit_data_event_info.other_event.implicit = 0;
524 1.6 mrg exit_data_event_info.other_event.tool_info = NULL;
525 1.6 mrg }
526 1.6 mrg acc_api_info api_info;
527 1.6 mrg if (profiling_p)
528 1.6 mrg {
529 1.6 mrg thr->api_info = &api_info;
530 1.6 mrg
531 1.6 mrg api_info.device_api = acc_device_api_none;
532 1.6 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
533 1.6 mrg api_info.device_type = prof_info.device_type;
534 1.6 mrg api_info.vendor = -1;
535 1.6 mrg api_info.device_handle = NULL;
536 1.6 mrg api_info.context_handle = NULL;
537 1.6 mrg api_info.async_handle = NULL;
538 1.1 mrg }
539 1.1 mrg
540 1.6 mrg if (profiling_p)
541 1.6 mrg goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
542 1.5 mrg
543 1.6 mrg gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
544 1.6 mrg thr->mapped_data = tgt->prev;
545 1.6 mrg gomp_unmap_vars (tgt, true);
546 1.6 mrg gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
547 1.1 mrg
548 1.6 mrg if (profiling_p)
549 1.1 mrg {
550 1.6 mrg prof_info.event_type = acc_ev_exit_data_end;
551 1.6 mrg exit_data_event_info.other_event.event_type = prof_info.event_type;
552 1.6 mrg goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
553 1.1 mrg
554 1.6 mrg thr->prof_info = NULL;
555 1.6 mrg thr->api_info = NULL;
556 1.1 mrg }
557 1.1 mrg }
558 1.1 mrg
559 1.1 mrg void
560 1.5 mrg GOACC_update (int flags_m, size_t mapnum,
561 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
562 1.1 mrg int async, int num_waits, ...)
563 1.1 mrg {
564 1.5 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
565 1.5 mrg
566 1.1 mrg size_t i;
567 1.1 mrg
568 1.1 mrg goacc_lazy_initialize ();
569 1.1 mrg
570 1.1 mrg struct goacc_thread *thr = goacc_thread ();
571 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev;
572 1.1 mrg
573 1.6 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
574 1.6 mrg
575 1.6 mrg acc_prof_info prof_info;
576 1.6 mrg if (profiling_p)
577 1.6 mrg {
578 1.6 mrg thr->prof_info = &prof_info;
579 1.6 mrg
580 1.6 mrg prof_info.event_type = acc_ev_update_start;
581 1.6 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
582 1.6 mrg prof_info.version = _ACC_PROF_INFO_VERSION;
583 1.6 mrg prof_info.device_type = acc_device_type (acc_dev->type);
584 1.6 mrg prof_info.device_number = acc_dev->target_id;
585 1.6 mrg prof_info.thread_id = -1;
586 1.6 mrg prof_info.async = async;
587 1.6 mrg prof_info.async_queue = prof_info.async;
588 1.6 mrg prof_info.src_file = NULL;
589 1.6 mrg prof_info.func_name = NULL;
590 1.6 mrg prof_info.line_no = -1;
591 1.6 mrg prof_info.end_line_no = -1;
592 1.6 mrg prof_info.func_line_no = -1;
593 1.6 mrg prof_info.func_end_line_no = -1;
594 1.6 mrg }
595 1.6 mrg acc_event_info update_event_info;
596 1.6 mrg if (profiling_p)
597 1.6 mrg {
598 1.6 mrg update_event_info.other_event.event_type
599 1.6 mrg = prof_info.event_type;
600 1.6 mrg update_event_info.other_event.valid_bytes
601 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
602 1.6 mrg update_event_info.other_event.parent_construct = acc_construct_update;
603 1.6 mrg update_event_info.other_event.implicit = 0;
604 1.6 mrg update_event_info.other_event.tool_info = NULL;
605 1.6 mrg }
606 1.6 mrg acc_api_info api_info;
607 1.6 mrg if (profiling_p)
608 1.6 mrg {
609 1.6 mrg thr->api_info = &api_info;
610 1.6 mrg
611 1.6 mrg api_info.device_api = acc_device_api_none;
612 1.6 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
613 1.6 mrg api_info.device_type = prof_info.device_type;
614 1.6 mrg api_info.vendor = -1;
615 1.6 mrg api_info.device_handle = NULL;
616 1.6 mrg api_info.context_handle = NULL;
617 1.6 mrg api_info.async_handle = NULL;
618 1.6 mrg }
619 1.6 mrg
620 1.6 mrg if (profiling_p)
621 1.6 mrg goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
622 1.6 mrg
623 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
624 1.5 mrg || (flags & GOACC_FLAG_HOST_FALLBACK))
625 1.6 mrg {
626 1.6 mrg prof_info.device_type = acc_device_host;
627 1.6 mrg api_info.device_type = prof_info.device_type;
628 1.6 mrg
629 1.6 mrg goto out_prof;
630 1.6 mrg }
631 1.1 mrg
632 1.3 mrg if (num_waits)
633 1.1 mrg {
634 1.1 mrg va_list ap;
635 1.1 mrg
636 1.1 mrg va_start (ap, num_waits);
637 1.3 mrg goacc_wait (async, num_waits, &ap);
638 1.1 mrg va_end (ap);
639 1.1 mrg }
640 1.1 mrg
641 1.5 mrg bool update_device = false;
642 1.1 mrg for (i = 0; i < mapnum; ++i)
643 1.1 mrg {
644 1.1 mrg unsigned char kind = kinds[i] & 0xff;
645 1.1 mrg
646 1.1 mrg switch (kind)
647 1.1 mrg {
648 1.1 mrg case GOMP_MAP_POINTER:
649 1.1 mrg case GOMP_MAP_TO_PSET:
650 1.1 mrg break;
651 1.1 mrg
652 1.5 mrg case GOMP_MAP_ALWAYS_POINTER:
653 1.5 mrg if (update_device)
654 1.5 mrg {
655 1.5 mrg /* Save the contents of the host pointer. */
656 1.5 mrg void *dptr = acc_deviceptr (hostaddrs[i-1]);
657 1.5 mrg uintptr_t t = *(uintptr_t *) hostaddrs[i];
658 1.5 mrg
659 1.5 mrg /* Update the contents of the host pointer to reflect
660 1.5 mrg the value of the allocated device memory in the
661 1.5 mrg previous pointer. */
662 1.5 mrg *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
663 1.6 mrg /* TODO: verify that we really cannot use acc_update_device_async
664 1.6 mrg here. */
665 1.5 mrg acc_update_device (hostaddrs[i], sizeof (uintptr_t));
666 1.5 mrg
667 1.5 mrg /* Restore the host pointer. */
668 1.5 mrg *(uintptr_t *) hostaddrs[i] = t;
669 1.5 mrg update_device = false;
670 1.5 mrg }
671 1.5 mrg break;
672 1.5 mrg
673 1.5 mrg case GOMP_MAP_TO:
674 1.5 mrg if (!acc_is_present (hostaddrs[i], sizes[i]))
675 1.5 mrg {
676 1.5 mrg update_device = false;
677 1.5 mrg break;
678 1.5 mrg }
679 1.5 mrg /* Fallthru */
680 1.1 mrg case GOMP_MAP_FORCE_TO:
681 1.5 mrg update_device = true;
682 1.6 mrg acc_update_device_async (hostaddrs[i], sizes[i], async);
683 1.1 mrg break;
684 1.1 mrg
685 1.5 mrg case GOMP_MAP_FROM:
686 1.5 mrg if (!acc_is_present (hostaddrs[i], sizes[i]))
687 1.5 mrg {
688 1.5 mrg update_device = false;
689 1.5 mrg break;
690 1.5 mrg }
691 1.5 mrg /* Fallthru */
692 1.1 mrg case GOMP_MAP_FORCE_FROM:
693 1.5 mrg update_device = false;
694 1.6 mrg acc_update_self_async (hostaddrs[i], sizes[i], async);
695 1.1 mrg break;
696 1.1 mrg
697 1.1 mrg default:
698 1.1 mrg gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
699 1.1 mrg break;
700 1.1 mrg }
701 1.1 mrg }
702 1.1 mrg
703 1.6 mrg out_prof:
704 1.6 mrg if (profiling_p)
705 1.3 mrg {
706 1.6 mrg prof_info.event_type = acc_ev_update_end;
707 1.6 mrg update_event_info.other_event.event_type = prof_info.event_type;
708 1.6 mrg goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
709 1.1 mrg
710 1.6 mrg thr->prof_info = NULL;
711 1.6 mrg thr->api_info = NULL;
712 1.3 mrg }
713 1.1 mrg }
714 1.1 mrg
715 1.6 mrg
716 1.6 mrg /* Legacy entry point (GCC 5). */
717 1.6 mrg
718 1.1 mrg int
719 1.1 mrg GOACC_get_num_threads (void)
720 1.1 mrg {
721 1.1 mrg return 1;
722 1.1 mrg }
723 1.1 mrg
724 1.6 mrg /* Legacy entry point (GCC 5). */
725 1.6 mrg
726 1.1 mrg int
727 1.1 mrg GOACC_get_thread_num (void)
728 1.1 mrg {
729 1.1 mrg return 0;
730 1.1 mrg }
731 1.3 mrg
732 1.3 mrg void
733 1.5 mrg GOACC_declare (int flags_m, size_t mapnum,
734 1.3 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds)
735 1.3 mrg {
736 1.3 mrg int i;
737 1.3 mrg
738 1.3 mrg for (i = 0; i < mapnum; i++)
739 1.3 mrg {
740 1.3 mrg unsigned char kind = kinds[i] & 0xff;
741 1.3 mrg
742 1.3 mrg if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
743 1.3 mrg continue;
744 1.3 mrg
745 1.3 mrg switch (kind)
746 1.3 mrg {
747 1.3 mrg case GOMP_MAP_FORCE_ALLOC:
748 1.3 mrg case GOMP_MAP_FORCE_FROM:
749 1.3 mrg case GOMP_MAP_FORCE_TO:
750 1.3 mrg case GOMP_MAP_POINTER:
751 1.5 mrg case GOMP_MAP_RELEASE:
752 1.3 mrg case GOMP_MAP_DELETE:
753 1.5 mrg GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
754 1.5 mrg &kinds[i], GOMP_ASYNC_SYNC, 0);
755 1.3 mrg break;
756 1.3 mrg
757 1.3 mrg case GOMP_MAP_FORCE_DEVICEPTR:
758 1.3 mrg break;
759 1.3 mrg
760 1.3 mrg case GOMP_MAP_ALLOC:
761 1.3 mrg if (!acc_is_present (hostaddrs[i], sizes[i]))
762 1.5 mrg GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
763 1.5 mrg &kinds[i], GOMP_ASYNC_SYNC, 0);
764 1.3 mrg break;
765 1.3 mrg
766 1.3 mrg case GOMP_MAP_TO:
767 1.5 mrg GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
768 1.5 mrg &kinds[i], GOMP_ASYNC_SYNC, 0);
769 1.3 mrg
770 1.3 mrg break;
771 1.3 mrg
772 1.3 mrg case GOMP_MAP_FROM:
773 1.5 mrg GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
774 1.5 mrg &kinds[i], GOMP_ASYNC_SYNC, 0);
775 1.3 mrg break;
776 1.3 mrg
777 1.3 mrg case GOMP_MAP_FORCE_PRESENT:
778 1.3 mrg if (!acc_is_present (hostaddrs[i], sizes[i]))
779 1.3 mrg gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
780 1.3 mrg (unsigned long) sizes[i]);
781 1.3 mrg break;
782 1.3 mrg
783 1.3 mrg default:
784 1.3 mrg assert (0);
785 1.3 mrg break;
786 1.3 mrg }
787 1.3 mrg }
788 1.3 mrg }
789