oacc-parallel.c revision 1.1 1 1.1 mrg /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg Contributed by Mentor Embedded.
4 1.1 mrg
5 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
6 1.1 mrg (libgomp).
7 1.1 mrg
8 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
9 1.1 mrg under the terms of the GNU General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
11 1.1 mrg any later version.
12 1.1 mrg
13 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 1.1 mrg more details.
17 1.1 mrg
18 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
19 1.1 mrg permissions described in the GCC Runtime Library Exception, version
20 1.1 mrg 3.1, as published by the Free Software Foundation.
21 1.1 mrg
22 1.1 mrg You should have received a copy of the GNU General Public License and
23 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
24 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 1.1 mrg <http://www.gnu.org/licenses/>. */
26 1.1 mrg
27 1.1 mrg /* This file handles OpenACC constructs. */
28 1.1 mrg
29 1.1 mrg #include "openacc.h"
30 1.1 mrg #include "libgomp.h"
31 1.1 mrg #include "libgomp_g.h"
32 1.1 mrg #include "gomp-constants.h"
33 1.1 mrg #include "oacc-int.h"
34 1.1 mrg #ifdef HAVE_INTTYPES_H
35 1.1 mrg # include <inttypes.h> /* For PRIu64. */
36 1.1 mrg #endif
37 1.1 mrg #include <string.h>
38 1.1 mrg #include <stdarg.h>
39 1.1 mrg #include <assert.h>
40 1.1 mrg
41 1.1 mrg static int
42 1.1 mrg find_pset (int pos, size_t mapnum, unsigned short *kinds)
43 1.1 mrg {
44 1.1 mrg if (pos + 1 >= mapnum)
45 1.1 mrg return 0;
46 1.1 mrg
47 1.1 mrg unsigned char kind = kinds[pos+1] & 0xff;
48 1.1 mrg
49 1.1 mrg return kind == GOMP_MAP_TO_PSET;
50 1.1 mrg }
51 1.1 mrg
52 1.1 mrg static void goacc_wait (int async, int num_waits, va_list ap);
53 1.1 mrg
54 1.1 mrg void
55 1.1 mrg GOACC_parallel (int device, void (*fn) (void *),
56 1.1 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
57 1.1 mrg unsigned short *kinds,
58 1.1 mrg int num_gangs, int num_workers, int vector_length,
59 1.1 mrg int async, int num_waits, ...)
60 1.1 mrg {
61 1.1 mrg bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
62 1.1 mrg va_list ap;
63 1.1 mrg struct goacc_thread *thr;
64 1.1 mrg struct gomp_device_descr *acc_dev;
65 1.1 mrg struct target_mem_desc *tgt;
66 1.1 mrg void **devaddrs;
67 1.1 mrg unsigned int i;
68 1.1 mrg struct splay_tree_key_s k;
69 1.1 mrg splay_tree_key tgt_fn_key;
70 1.1 mrg void (*tgt_fn);
71 1.1 mrg
72 1.1 mrg if (num_gangs != 1)
73 1.1 mrg gomp_fatal ("num_gangs (%d) different from one is not yet supported",
74 1.1 mrg num_gangs);
75 1.1 mrg if (num_workers != 1)
76 1.1 mrg gomp_fatal ("num_workers (%d) different from one is not yet supported",
77 1.1 mrg num_workers);
78 1.1 mrg
79 1.1 mrg #ifdef HAVE_INTTYPES_H
80 1.1 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, "
81 1.1 mrg "async = %d\n",
82 1.1 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
83 1.1 mrg #else
84 1.1 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
85 1.1 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
86 1.1 mrg async);
87 1.1 mrg #endif
88 1.1 mrg goacc_lazy_initialize ();
89 1.1 mrg
90 1.1 mrg thr = goacc_thread ();
91 1.1 mrg acc_dev = thr->dev;
92 1.1 mrg
93 1.1 mrg /* Host fallback if "if" clause is false or if the current device is set to
94 1.1 mrg the host. */
95 1.1 mrg if (host_fallback)
96 1.1 mrg {
97 1.1 mrg goacc_save_and_set_bind (acc_device_host);
98 1.1 mrg fn (hostaddrs);
99 1.1 mrg goacc_restore_bind ();
100 1.1 mrg return;
101 1.1 mrg }
102 1.1 mrg else if (acc_device_type (acc_dev->type) == acc_device_host)
103 1.1 mrg {
104 1.1 mrg fn (hostaddrs);
105 1.1 mrg return;
106 1.1 mrg }
107 1.1 mrg
108 1.1 mrg va_start (ap, num_waits);
109 1.1 mrg
110 1.1 mrg if (num_waits > 0)
111 1.1 mrg goacc_wait (async, num_waits, ap);
112 1.1 mrg
113 1.1 mrg va_end (ap);
114 1.1 mrg
115 1.1 mrg acc_dev->openacc.async_set_async_func (async);
116 1.1 mrg
117 1.1 mrg if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
118 1.1 mrg {
119 1.1 mrg k.host_start = (uintptr_t) fn;
120 1.1 mrg k.host_end = k.host_start + 1;
121 1.1 mrg gomp_mutex_lock (&acc_dev->lock);
122 1.1 mrg tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
123 1.1 mrg gomp_mutex_unlock (&acc_dev->lock);
124 1.1 mrg
125 1.1 mrg if (tgt_fn_key == NULL)
126 1.1 mrg gomp_fatal ("target function wasn't mapped");
127 1.1 mrg
128 1.1 mrg tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
129 1.1 mrg }
130 1.1 mrg else
131 1.1 mrg tgt_fn = (void (*)) fn;
132 1.1 mrg
133 1.1 mrg tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
134 1.1 mrg false);
135 1.1 mrg
136 1.1 mrg devaddrs = gomp_alloca (sizeof (void *) * mapnum);
137 1.1 mrg for (i = 0; i < mapnum; i++)
138 1.1 mrg devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
139 1.1 mrg + tgt->list[i]->tgt_offset);
140 1.1 mrg
141 1.1 mrg acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
142 1.1 mrg num_gangs, num_workers, vector_length, async,
143 1.1 mrg tgt);
144 1.1 mrg
145 1.1 mrg /* If running synchronously, unmap immediately. */
146 1.1 mrg if (async < acc_async_noval)
147 1.1 mrg gomp_unmap_vars (tgt, true);
148 1.1 mrg else
149 1.1 mrg {
150 1.1 mrg gomp_copy_from_async (tgt);
151 1.1 mrg acc_dev->openacc.register_async_cleanup_func (tgt);
152 1.1 mrg }
153 1.1 mrg
154 1.1 mrg acc_dev->openacc.async_set_async_func (acc_async_sync);
155 1.1 mrg }
156 1.1 mrg
157 1.1 mrg void
158 1.1 mrg GOACC_data_start (int device, size_t mapnum,
159 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds)
160 1.1 mrg {
161 1.1 mrg bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
162 1.1 mrg struct target_mem_desc *tgt;
163 1.1 mrg
164 1.1 mrg #ifdef HAVE_INTTYPES_H
165 1.1 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
166 1.1 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
167 1.1 mrg #else
168 1.1 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
169 1.1 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
170 1.1 mrg #endif
171 1.1 mrg
172 1.1 mrg goacc_lazy_initialize ();
173 1.1 mrg
174 1.1 mrg struct goacc_thread *thr = goacc_thread ();
175 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev;
176 1.1 mrg
177 1.1 mrg /* Host fallback or 'do nothing'. */
178 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
179 1.1 mrg || host_fallback)
180 1.1 mrg {
181 1.1 mrg tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
182 1.1 mrg tgt->prev = thr->mapped_data;
183 1.1 mrg thr->mapped_data = tgt;
184 1.1 mrg
185 1.1 mrg return;
186 1.1 mrg }
187 1.1 mrg
188 1.1 mrg gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
189 1.1 mrg tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
190 1.1 mrg false);
191 1.1 mrg gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
192 1.1 mrg tgt->prev = thr->mapped_data;
193 1.1 mrg thr->mapped_data = tgt;
194 1.1 mrg }
195 1.1 mrg
196 1.1 mrg void
197 1.1 mrg GOACC_data_end (void)
198 1.1 mrg {
199 1.1 mrg struct goacc_thread *thr = goacc_thread ();
200 1.1 mrg struct target_mem_desc *tgt = thr->mapped_data;
201 1.1 mrg
202 1.1 mrg gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
203 1.1 mrg thr->mapped_data = tgt->prev;
204 1.1 mrg gomp_unmap_vars (tgt, true);
205 1.1 mrg gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
206 1.1 mrg }
207 1.1 mrg
208 1.1 mrg void
209 1.1 mrg GOACC_enter_exit_data (int device, size_t mapnum,
210 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
211 1.1 mrg int async, int num_waits, ...)
212 1.1 mrg {
213 1.1 mrg struct goacc_thread *thr;
214 1.1 mrg struct gomp_device_descr *acc_dev;
215 1.1 mrg bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
216 1.1 mrg bool data_enter = false;
217 1.1 mrg size_t i;
218 1.1 mrg
219 1.1 mrg goacc_lazy_initialize ();
220 1.1 mrg
221 1.1 mrg thr = goacc_thread ();
222 1.1 mrg acc_dev = thr->dev;
223 1.1 mrg
224 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
225 1.1 mrg || host_fallback)
226 1.1 mrg return;
227 1.1 mrg
228 1.1 mrg if (num_waits > 0)
229 1.1 mrg {
230 1.1 mrg va_list ap;
231 1.1 mrg
232 1.1 mrg va_start (ap, num_waits);
233 1.1 mrg
234 1.1 mrg goacc_wait (async, num_waits, ap);
235 1.1 mrg
236 1.1 mrg va_end (ap);
237 1.1 mrg }
238 1.1 mrg
239 1.1 mrg acc_dev->openacc.async_set_async_func (async);
240 1.1 mrg
241 1.1 mrg /* Determine if this is an "acc enter data". */
242 1.1 mrg for (i = 0; i < mapnum; ++i)
243 1.1 mrg {
244 1.1 mrg unsigned char kind = kinds[i] & 0xff;
245 1.1 mrg
246 1.1 mrg if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
247 1.1 mrg continue;
248 1.1 mrg
249 1.1 mrg if (kind == GOMP_MAP_FORCE_ALLOC
250 1.1 mrg || kind == GOMP_MAP_FORCE_PRESENT
251 1.1 mrg || kind == GOMP_MAP_FORCE_TO)
252 1.1 mrg {
253 1.1 mrg data_enter = true;
254 1.1 mrg break;
255 1.1 mrg }
256 1.1 mrg
257 1.1 mrg if (kind == GOMP_MAP_FORCE_DEALLOC
258 1.1 mrg || kind == GOMP_MAP_FORCE_FROM)
259 1.1 mrg break;
260 1.1 mrg
261 1.1 mrg gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
262 1.1 mrg kind);
263 1.1 mrg }
264 1.1 mrg
265 1.1 mrg if (data_enter)
266 1.1 mrg {
267 1.1 mrg for (i = 0; i < mapnum; i++)
268 1.1 mrg {
269 1.1 mrg unsigned char kind = kinds[i] & 0xff;
270 1.1 mrg
271 1.1 mrg /* Scan for PSETs. */
272 1.1 mrg int psets = find_pset (i, mapnum, kinds);
273 1.1 mrg
274 1.1 mrg if (!psets)
275 1.1 mrg {
276 1.1 mrg switch (kind)
277 1.1 mrg {
278 1.1 mrg case GOMP_MAP_POINTER:
279 1.1 mrg gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
280 1.1 mrg &kinds[i]);
281 1.1 mrg break;
282 1.1 mrg case GOMP_MAP_FORCE_ALLOC:
283 1.1 mrg acc_create (hostaddrs[i], sizes[i]);
284 1.1 mrg break;
285 1.1 mrg case GOMP_MAP_FORCE_PRESENT:
286 1.1 mrg acc_present_or_copyin (hostaddrs[i], sizes[i]);
287 1.1 mrg break;
288 1.1 mrg case GOMP_MAP_FORCE_TO:
289 1.1 mrg acc_present_or_copyin (hostaddrs[i], sizes[i]);
290 1.1 mrg break;
291 1.1 mrg default:
292 1.1 mrg gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
293 1.1 mrg kind);
294 1.1 mrg break;
295 1.1 mrg }
296 1.1 mrg }
297 1.1 mrg else
298 1.1 mrg {
299 1.1 mrg gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
300 1.1 mrg /* Increment 'i' by two because OpenACC requires fortran
301 1.1 mrg arrays to be contiguous, so each PSET is associated with
302 1.1 mrg one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
303 1.1 mrg one MAP_POINTER. */
304 1.1 mrg i += 2;
305 1.1 mrg }
306 1.1 mrg }
307 1.1 mrg }
308 1.1 mrg else
309 1.1 mrg for (i = 0; i < mapnum; ++i)
310 1.1 mrg {
311 1.1 mrg unsigned char kind = kinds[i] & 0xff;
312 1.1 mrg
313 1.1 mrg int psets = find_pset (i, mapnum, kinds);
314 1.1 mrg
315 1.1 mrg if (!psets)
316 1.1 mrg {
317 1.1 mrg switch (kind)
318 1.1 mrg {
319 1.1 mrg case GOMP_MAP_POINTER:
320 1.1 mrg gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
321 1.1 mrg == GOMP_MAP_FORCE_FROM,
322 1.1 mrg async, 1);
323 1.1 mrg break;
324 1.1 mrg case GOMP_MAP_FORCE_DEALLOC:
325 1.1 mrg acc_delete (hostaddrs[i], sizes[i]);
326 1.1 mrg break;
327 1.1 mrg case GOMP_MAP_FORCE_FROM:
328 1.1 mrg acc_copyout (hostaddrs[i], sizes[i]);
329 1.1 mrg break;
330 1.1 mrg default:
331 1.1 mrg gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
332 1.1 mrg kind);
333 1.1 mrg break;
334 1.1 mrg }
335 1.1 mrg }
336 1.1 mrg else
337 1.1 mrg {
338 1.1 mrg gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
339 1.1 mrg == GOMP_MAP_FORCE_FROM, async, 3);
340 1.1 mrg /* See the above comment. */
341 1.1 mrg i += 2;
342 1.1 mrg }
343 1.1 mrg }
344 1.1 mrg
345 1.1 mrg acc_dev->openacc.async_set_async_func (acc_async_sync);
346 1.1 mrg }
347 1.1 mrg
348 1.1 mrg static void
349 1.1 mrg goacc_wait (int async, int num_waits, va_list ap)
350 1.1 mrg {
351 1.1 mrg struct goacc_thread *thr = goacc_thread ();
352 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev;
353 1.1 mrg int i;
354 1.1 mrg
355 1.1 mrg assert (num_waits >= 0);
356 1.1 mrg
357 1.1 mrg if (async == acc_async_sync && num_waits == 0)
358 1.1 mrg {
359 1.1 mrg acc_wait_all ();
360 1.1 mrg return;
361 1.1 mrg }
362 1.1 mrg
363 1.1 mrg if (async == acc_async_sync && num_waits)
364 1.1 mrg {
365 1.1 mrg for (i = 0; i < num_waits; i++)
366 1.1 mrg {
367 1.1 mrg int qid = va_arg (ap, int);
368 1.1 mrg
369 1.1 mrg if (acc_async_test (qid))
370 1.1 mrg continue;
371 1.1 mrg
372 1.1 mrg acc_wait (qid);
373 1.1 mrg }
374 1.1 mrg return;
375 1.1 mrg }
376 1.1 mrg
377 1.1 mrg if (async == acc_async_noval && num_waits == 0)
378 1.1 mrg {
379 1.1 mrg acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
380 1.1 mrg return;
381 1.1 mrg }
382 1.1 mrg
383 1.1 mrg for (i = 0; i < num_waits; i++)
384 1.1 mrg {
385 1.1 mrg int qid = va_arg (ap, int);
386 1.1 mrg
387 1.1 mrg if (acc_async_test (qid))
388 1.1 mrg continue;
389 1.1 mrg
390 1.1 mrg /* If we're waiting on the same asynchronous queue as we're launching on,
391 1.1 mrg the queue itself will order work as required, so there's no need to
392 1.1 mrg wait explicitly. */
393 1.1 mrg if (qid != async)
394 1.1 mrg acc_dev->openacc.async_wait_async_func (qid, async);
395 1.1 mrg }
396 1.1 mrg }
397 1.1 mrg
398 1.1 mrg void
399 1.1 mrg GOACC_update (int device, size_t mapnum,
400 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
401 1.1 mrg int async, int num_waits, ...)
402 1.1 mrg {
403 1.1 mrg bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
404 1.1 mrg size_t i;
405 1.1 mrg
406 1.1 mrg goacc_lazy_initialize ();
407 1.1 mrg
408 1.1 mrg struct goacc_thread *thr = goacc_thread ();
409 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev;
410 1.1 mrg
411 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
412 1.1 mrg || host_fallback)
413 1.1 mrg return;
414 1.1 mrg
415 1.1 mrg if (num_waits > 0)
416 1.1 mrg {
417 1.1 mrg va_list ap;
418 1.1 mrg
419 1.1 mrg va_start (ap, num_waits);
420 1.1 mrg
421 1.1 mrg goacc_wait (async, num_waits, ap);
422 1.1 mrg
423 1.1 mrg va_end (ap);
424 1.1 mrg }
425 1.1 mrg
426 1.1 mrg acc_dev->openacc.async_set_async_func (async);
427 1.1 mrg
428 1.1 mrg for (i = 0; i < mapnum; ++i)
429 1.1 mrg {
430 1.1 mrg unsigned char kind = kinds[i] & 0xff;
431 1.1 mrg
432 1.1 mrg switch (kind)
433 1.1 mrg {
434 1.1 mrg case GOMP_MAP_POINTER:
435 1.1 mrg case GOMP_MAP_TO_PSET:
436 1.1 mrg break;
437 1.1 mrg
438 1.1 mrg case GOMP_MAP_FORCE_TO:
439 1.1 mrg acc_update_device (hostaddrs[i], sizes[i]);
440 1.1 mrg break;
441 1.1 mrg
442 1.1 mrg case GOMP_MAP_FORCE_FROM:
443 1.1 mrg acc_update_self (hostaddrs[i], sizes[i]);
444 1.1 mrg break;
445 1.1 mrg
446 1.1 mrg default:
447 1.1 mrg gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
448 1.1 mrg break;
449 1.1 mrg }
450 1.1 mrg }
451 1.1 mrg
452 1.1 mrg acc_dev->openacc.async_set_async_func (acc_async_sync);
453 1.1 mrg }
454 1.1 mrg
455 1.1 mrg void
456 1.1 mrg GOACC_wait (int async, int num_waits, ...)
457 1.1 mrg {
458 1.1 mrg va_list ap;
459 1.1 mrg
460 1.1 mrg va_start (ap, num_waits);
461 1.1 mrg
462 1.1 mrg goacc_wait (async, num_waits, ap);
463 1.1 mrg
464 1.1 mrg va_end (ap);
465 1.1 mrg }
466 1.1 mrg
467 1.1 mrg int
468 1.1 mrg GOACC_get_num_threads (void)
469 1.1 mrg {
470 1.1 mrg return 1;
471 1.1 mrg }
472 1.1 mrg
473 1.1 mrg int
474 1.1 mrg GOACC_get_thread_num (void)
475 1.1 mrg {
476 1.1 mrg return 0;
477 1.1 mrg }
478