oacc-async.c revision 1.7 1 1.1 mrg /* OpenACC Runtime Library Definitions.
2 1.1 mrg
3 1.7 mrg Copyright (C) 2013-2022 Free Software Foundation, Inc.
4 1.1 mrg
5 1.1 mrg Contributed by Mentor Embedded.
6 1.1 mrg
7 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
8 1.1 mrg (libgomp).
9 1.1 mrg
10 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
11 1.1 mrg under the terms of the GNU General Public License as published by
12 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
13 1.1 mrg any later version.
14 1.1 mrg
15 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 1.1 mrg more details.
19 1.1 mrg
20 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
21 1.1 mrg permissions described in the GCC Runtime Library Exception, version
22 1.1 mrg 3.1, as published by the Free Software Foundation.
23 1.1 mrg
24 1.1 mrg You should have received a copy of the GNU General Public License and
25 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
26 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 1.1 mrg <http://www.gnu.org/licenses/>. */
28 1.1 mrg
29 1.1 mrg #include <assert.h>
30 1.6 mrg #include <string.h>
31 1.1 mrg #include "openacc.h"
32 1.1 mrg #include "libgomp.h"
33 1.1 mrg #include "oacc-int.h"
34 1.1 mrg
35 1.6 mrg static struct goacc_thread *
36 1.6 mrg get_goacc_thread (void)
37 1.6 mrg {
38 1.6 mrg struct goacc_thread *thr = goacc_thread ();
39 1.6 mrg
40 1.6 mrg if (!thr || !thr->dev)
41 1.6 mrg gomp_fatal ("no device active");
42 1.6 mrg
43 1.6 mrg return thr;
44 1.6 mrg }
45 1.6 mrg
46 1.6 mrg static int
47 1.6 mrg validate_async_val (int async)
48 1.6 mrg {
49 1.6 mrg if (!async_valid_p (async))
50 1.6 mrg gomp_fatal ("invalid async-argument: %d", async);
51 1.6 mrg
52 1.6 mrg if (async == acc_async_sync)
53 1.6 mrg return -1;
54 1.6 mrg
55 1.6 mrg if (async == acc_async_noval)
56 1.6 mrg return 0;
57 1.6 mrg
58 1.6 mrg if (async >= 0)
59 1.6 mrg /* TODO: we reserve 0 for acc_async_noval before we can clarify the
60 1.6 mrg semantics of "default_async". */
61 1.6 mrg return 1 + async;
62 1.6 mrg else
63 1.6 mrg __builtin_unreachable ();
64 1.6 mrg }
65 1.6 mrg
66 1.6 mrg /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
67 1.6 mrg might return NULL if no asyncqueue is to be used. Otherwise, if CREATE,
68 1.6 mrg create the asyncqueue if it doesn't exist yet.
69 1.6 mrg
70 1.6 mrg Unless CREATE, this will not generate any OpenACC Profiling Interface
71 1.6 mrg events. */
72 1.6 mrg
73 1.6 mrg attribute_hidden struct goacc_asyncqueue *
74 1.6 mrg lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
75 1.6 mrg {
76 1.6 mrg async = validate_async_val (async);
77 1.6 mrg if (async < 0)
78 1.6 mrg return NULL;
79 1.6 mrg
80 1.6 mrg struct goacc_asyncqueue *ret_aq = NULL;
81 1.6 mrg struct gomp_device_descr *dev = thr->dev;
82 1.6 mrg
83 1.6 mrg gomp_mutex_lock (&dev->openacc.async.lock);
84 1.6 mrg
85 1.6 mrg if (!create
86 1.6 mrg && (async >= dev->openacc.async.nasyncqueue
87 1.6 mrg || !dev->openacc.async.asyncqueue[async]))
88 1.6 mrg goto end;
89 1.6 mrg
90 1.6 mrg if (async >= dev->openacc.async.nasyncqueue)
91 1.6 mrg {
92 1.6 mrg int diff = async + 1 - dev->openacc.async.nasyncqueue;
93 1.6 mrg dev->openacc.async.asyncqueue
94 1.6 mrg = gomp_realloc (dev->openacc.async.asyncqueue,
95 1.6 mrg sizeof (goacc_aq) * (async + 1));
96 1.6 mrg memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
97 1.6 mrg 0, sizeof (goacc_aq) * diff);
98 1.6 mrg dev->openacc.async.nasyncqueue = async + 1;
99 1.6 mrg }
100 1.6 mrg
101 1.6 mrg if (!dev->openacc.async.asyncqueue[async])
102 1.6 mrg {
103 1.6 mrg dev->openacc.async.asyncqueue[async]
104 1.6 mrg = dev->openacc.async.construct_func (dev->target_id);
105 1.6 mrg
106 1.6 mrg if (!dev->openacc.async.asyncqueue[async])
107 1.6 mrg {
108 1.6 mrg gomp_mutex_unlock (&dev->openacc.async.lock);
109 1.6 mrg gomp_fatal ("async %d creation failed", async);
110 1.6 mrg }
111 1.6 mrg
112 1.6 mrg /* Link new async queue into active list. */
113 1.6 mrg goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
114 1.6 mrg n->aq = dev->openacc.async.asyncqueue[async];
115 1.6 mrg n->next = dev->openacc.async.active;
116 1.6 mrg dev->openacc.async.active = n;
117 1.6 mrg }
118 1.6 mrg
119 1.6 mrg ret_aq = dev->openacc.async.asyncqueue[async];
120 1.6 mrg
121 1.6 mrg end:
122 1.6 mrg gomp_mutex_unlock (&dev->openacc.async.lock);
123 1.6 mrg return ret_aq;
124 1.6 mrg }
125 1.6 mrg
126 1.6 mrg /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
127 1.6 mrg might return NULL if no asyncqueue is to be used. Otherwise, create the
128 1.6 mrg asyncqueue if it doesn't exist yet. */
129 1.6 mrg
130 1.6 mrg attribute_hidden struct goacc_asyncqueue *
131 1.6 mrg get_goacc_asyncqueue (int async)
132 1.6 mrg {
133 1.6 mrg struct goacc_thread *thr = get_goacc_thread ();
134 1.6 mrg return lookup_goacc_asyncqueue (thr, true, async);
135 1.6 mrg }
136 1.6 mrg
137 1.1 mrg int
138 1.1 mrg acc_async_test (int async)
139 1.1 mrg {
140 1.1 mrg struct goacc_thread *thr = goacc_thread ();
141 1.1 mrg
142 1.1 mrg if (!thr || !thr->dev)
143 1.1 mrg gomp_fatal ("no device active");
144 1.1 mrg
145 1.6 mrg goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
146 1.6 mrg if (!aq)
147 1.6 mrg return 1;
148 1.6 mrg
149 1.6 mrg acc_prof_info prof_info;
150 1.6 mrg acc_api_info api_info;
151 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
152 1.6 mrg if (profiling_p)
153 1.6 mrg {
154 1.6 mrg prof_info.async = async;
155 1.6 mrg prof_info.async_queue = prof_info.async;
156 1.6 mrg }
157 1.6 mrg
158 1.6 mrg int res = thr->dev->openacc.async.test_func (aq);
159 1.6 mrg
160 1.6 mrg if (profiling_p)
161 1.6 mrg {
162 1.6 mrg thr->prof_info = NULL;
163 1.6 mrg thr->api_info = NULL;
164 1.6 mrg }
165 1.6 mrg
166 1.6 mrg return res;
167 1.1 mrg }
168 1.1 mrg
169 1.1 mrg int
170 1.1 mrg acc_async_test_all (void)
171 1.1 mrg {
172 1.6 mrg struct goacc_thread *thr = get_goacc_thread ();
173 1.1 mrg
174 1.6 mrg acc_prof_info prof_info;
175 1.6 mrg acc_api_info api_info;
176 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
177 1.6 mrg
178 1.6 mrg int ret = 1;
179 1.6 mrg gomp_mutex_lock (&thr->dev->openacc.async.lock);
180 1.6 mrg for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
181 1.6 mrg if (!thr->dev->openacc.async.test_func (l->aq))
182 1.6 mrg {
183 1.6 mrg ret = 0;
184 1.6 mrg break;
185 1.6 mrg }
186 1.6 mrg gomp_mutex_unlock (&thr->dev->openacc.async.lock);
187 1.6 mrg
188 1.6 mrg if (profiling_p)
189 1.6 mrg {
190 1.6 mrg thr->prof_info = NULL;
191 1.6 mrg thr->api_info = NULL;
192 1.6 mrg }
193 1.1 mrg
194 1.6 mrg return ret;
195 1.1 mrg }
196 1.1 mrg
197 1.1 mrg void
198 1.1 mrg acc_wait (int async)
199 1.1 mrg {
200 1.6 mrg struct goacc_thread *thr = get_goacc_thread ();
201 1.1 mrg
202 1.6 mrg goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
203 1.6 mrg if (!aq)
204 1.6 mrg return;
205 1.6 mrg
206 1.6 mrg acc_prof_info prof_info;
207 1.6 mrg acc_api_info api_info;
208 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
209 1.6 mrg if (profiling_p)
210 1.6 mrg {
211 1.6 mrg prof_info.async = async;
212 1.6 mrg prof_info.async_queue = prof_info.async;
213 1.6 mrg }
214 1.6 mrg
215 1.6 mrg if (!thr->dev->openacc.async.synchronize_func (aq))
216 1.6 mrg gomp_fatal ("wait on %d failed", async);
217 1.6 mrg
218 1.6 mrg if (profiling_p)
219 1.6 mrg {
220 1.6 mrg thr->prof_info = NULL;
221 1.6 mrg thr->api_info = NULL;
222 1.6 mrg }
223 1.1 mrg }
224 1.1 mrg
225 1.4 mrg /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */
226 1.4 mrg #ifdef HAVE_ATTRIBUTE_ALIAS
227 1.4 mrg strong_alias (acc_wait, acc_async_wait)
228 1.4 mrg #else
229 1.4 mrg void
230 1.4 mrg acc_async_wait (int async)
231 1.4 mrg {
232 1.4 mrg acc_wait (async);
233 1.4 mrg }
234 1.4 mrg #endif
235 1.4 mrg
236 1.1 mrg void
237 1.1 mrg acc_wait_async (int async1, int async2)
238 1.1 mrg {
239 1.6 mrg struct goacc_thread *thr = get_goacc_thread ();
240 1.1 mrg
241 1.6 mrg goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
242 1.6 mrg /* TODO: Is this also correct for acc_async_sync, assuming that in this case,
243 1.6 mrg we'll always be synchronous anyways? */
244 1.6 mrg if (!aq1)
245 1.6 mrg return;
246 1.6 mrg
247 1.6 mrg acc_prof_info prof_info;
248 1.6 mrg acc_api_info api_info;
249 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
250 1.6 mrg if (profiling_p)
251 1.6 mrg {
252 1.6 mrg prof_info.async = async2;
253 1.6 mrg prof_info.async_queue = prof_info.async;
254 1.6 mrg }
255 1.6 mrg
256 1.6 mrg goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
257 1.6 mrg /* An async queue is always synchronized with itself. */
258 1.6 mrg if (aq1 == aq2)
259 1.6 mrg goto out_prof;
260 1.6 mrg
261 1.6 mrg if (aq2)
262 1.6 mrg {
263 1.6 mrg if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
264 1.6 mrg gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
265 1.6 mrg }
266 1.6 mrg else
267 1.6 mrg {
268 1.6 mrg /* TODO: Local thread synchronization.
269 1.6 mrg Necessary for the "async2 == acc_async_sync" case, or can just skip? */
270 1.6 mrg if (!thr->dev->openacc.async.synchronize_func (aq1))
271 1.6 mrg gomp_fatal ("wait on %d failed", async1);
272 1.6 mrg }
273 1.6 mrg
274 1.6 mrg out_prof:
275 1.6 mrg if (profiling_p)
276 1.6 mrg {
277 1.6 mrg thr->prof_info = NULL;
278 1.6 mrg thr->api_info = NULL;
279 1.6 mrg }
280 1.1 mrg }
281 1.1 mrg
282 1.1 mrg void
283 1.1 mrg acc_wait_all (void)
284 1.1 mrg {
285 1.1 mrg struct goacc_thread *thr = goacc_thread ();
286 1.1 mrg
287 1.6 mrg acc_prof_info prof_info;
288 1.6 mrg acc_api_info api_info;
289 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
290 1.6 mrg
291 1.6 mrg bool ret = true;
292 1.6 mrg gomp_mutex_lock (&thr->dev->openacc.async.lock);
293 1.6 mrg for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
294 1.6 mrg ret &= thr->dev->openacc.async.synchronize_func (l->aq);
295 1.6 mrg gomp_mutex_unlock (&thr->dev->openacc.async.lock);
296 1.6 mrg
297 1.6 mrg if (profiling_p)
298 1.6 mrg {
299 1.6 mrg thr->prof_info = NULL;
300 1.6 mrg thr->api_info = NULL;
301 1.6 mrg }
302 1.1 mrg
303 1.6 mrg if (!ret)
304 1.6 mrg gomp_fatal ("wait all failed");
305 1.1 mrg }
306 1.1 mrg
307 1.4 mrg /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */
308 1.4 mrg #ifdef HAVE_ATTRIBUTE_ALIAS
309 1.4 mrg strong_alias (acc_wait_all, acc_async_wait_all)
310 1.4 mrg #else
311 1.4 mrg void
312 1.4 mrg acc_async_wait_all (void)
313 1.4 mrg {
314 1.4 mrg acc_wait_all ();
315 1.4 mrg }
316 1.4 mrg #endif
317 1.4 mrg
318 1.1 mrg void
319 1.1 mrg acc_wait_all_async (int async)
320 1.1 mrg {
321 1.6 mrg struct goacc_thread *thr = get_goacc_thread ();
322 1.6 mrg
323 1.6 mrg acc_prof_info prof_info;
324 1.6 mrg acc_api_info api_info;
325 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
326 1.6 mrg if (profiling_p)
327 1.6 mrg {
328 1.6 mrg prof_info.async = async;
329 1.6 mrg prof_info.async_queue = prof_info.async;
330 1.6 mrg }
331 1.6 mrg
332 1.6 mrg goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
333 1.6 mrg
334 1.6 mrg bool ret = true;
335 1.6 mrg gomp_mutex_lock (&thr->dev->openacc.async.lock);
336 1.6 mrg for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
337 1.6 mrg {
338 1.6 mrg if (waiting_queue)
339 1.6 mrg ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
340 1.6 mrg else
341 1.6 mrg /* TODO: Local thread synchronization.
342 1.6 mrg Necessary for the "async2 == acc_async_sync" case, or can just skip? */
343 1.6 mrg ret &= thr->dev->openacc.async.synchronize_func (l->aq);
344 1.6 mrg }
345 1.6 mrg gomp_mutex_unlock (&thr->dev->openacc.async.lock);
346 1.6 mrg
347 1.6 mrg if (profiling_p)
348 1.6 mrg {
349 1.6 mrg thr->prof_info = NULL;
350 1.6 mrg thr->api_info = NULL;
351 1.6 mrg }
352 1.6 mrg
353 1.6 mrg if (!ret)
354 1.6 mrg gomp_fatal ("wait all async(%d) failed", async);
355 1.6 mrg }
356 1.6 mrg
357 1.6 mrg void
358 1.6 mrg GOACC_wait (int async, int num_waits, ...)
359 1.6 mrg {
360 1.6 mrg goacc_lazy_initialize ();
361 1.1 mrg
362 1.1 mrg struct goacc_thread *thr = goacc_thread ();
363 1.1 mrg
364 1.6 mrg /* No nesting. */
365 1.6 mrg assert (thr->prof_info == NULL);
366 1.6 mrg assert (thr->api_info == NULL);
367 1.6 mrg acc_prof_info prof_info;
368 1.6 mrg acc_api_info api_info;
369 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
370 1.6 mrg if (profiling_p)
371 1.6 mrg {
372 1.6 mrg prof_info.async = async;
373 1.6 mrg prof_info.async_queue = prof_info.async;
374 1.6 mrg }
375 1.6 mrg
376 1.6 mrg if (num_waits)
377 1.6 mrg {
378 1.6 mrg va_list ap;
379 1.6 mrg
380 1.6 mrg va_start (ap, num_waits);
381 1.6 mrg goacc_wait (async, num_waits, &ap);
382 1.6 mrg va_end (ap);
383 1.6 mrg }
384 1.6 mrg else if (async == acc_async_sync)
385 1.6 mrg acc_wait_all ();
386 1.6 mrg else
387 1.6 mrg acc_wait_all_async (async);
388 1.6 mrg
389 1.6 mrg if (profiling_p)
390 1.6 mrg {
391 1.6 mrg thr->prof_info = NULL;
392 1.6 mrg thr->api_info = NULL;
393 1.6 mrg }
394 1.6 mrg }
395 1.6 mrg
396 1.6 mrg attribute_hidden void
397 1.6 mrg goacc_wait (int async, int num_waits, va_list *ap)
398 1.6 mrg {
399 1.6 mrg while (num_waits--)
400 1.6 mrg {
401 1.6 mrg int qid = va_arg (*ap, int);
402 1.6 mrg
403 1.6 mrg /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
404 1.6 mrg if (qid == acc_async_noval)
405 1.6 mrg {
406 1.6 mrg if (async == acc_async_sync)
407 1.6 mrg acc_wait_all ();
408 1.6 mrg else
409 1.6 mrg acc_wait_all_async (async);
410 1.6 mrg break;
411 1.6 mrg }
412 1.6 mrg
413 1.6 mrg if (acc_async_test (qid))
414 1.6 mrg continue;
415 1.6 mrg
416 1.6 mrg if (async == acc_async_sync)
417 1.6 mrg acc_wait (qid);
418 1.6 mrg else if (qid == async)
419 1.6 mrg /* If we're waiting on the same asynchronous queue as we're
420 1.6 mrg launching on, the queue itself will order work as
421 1.6 mrg required, so there's no need to wait explicitly. */
422 1.6 mrg ;
423 1.6 mrg else
424 1.6 mrg acc_wait_async (qid, async);
425 1.6 mrg }
426 1.6 mrg }
427 1.6 mrg
428 1.6 mrg attribute_hidden void
429 1.6 mrg goacc_async_free (struct gomp_device_descr *devicep,
430 1.6 mrg struct goacc_asyncqueue *aq, void *ptr)
431 1.6 mrg {
432 1.6 mrg if (!aq)
433 1.6 mrg free (ptr);
434 1.6 mrg else
435 1.6 mrg devicep->openacc.async.queue_callback_func (aq, free, ptr);
436 1.6 mrg }
437 1.6 mrg
438 1.6 mrg /* This function initializes the asyncqueues for the device specified by
439 1.6 mrg DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on
440 1.6 mrg return. */
441 1.6 mrg
442 1.6 mrg attribute_hidden void
443 1.6 mrg goacc_init_asyncqueues (struct gomp_device_descr *devicep)
444 1.6 mrg {
445 1.6 mrg devicep->openacc.async.nasyncqueue = 0;
446 1.6 mrg devicep->openacc.async.asyncqueue = NULL;
447 1.6 mrg devicep->openacc.async.active = NULL;
448 1.6 mrg gomp_mutex_init (&devicep->openacc.async.lock);
449 1.6 mrg }
450 1.6 mrg
451 1.6 mrg /* This function finalizes the asyncqueues for the device specified by DEVICEP.
452 1.6 mrg TODO DEVICEP must be locked on entry, and remains locked on return. */
453 1.1 mrg
454 1.6 mrg attribute_hidden bool
455 1.6 mrg goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
456 1.6 mrg {
457 1.6 mrg bool ret = true;
458 1.6 mrg gomp_mutex_lock (&devicep->openacc.async.lock);
459 1.6 mrg if (devicep->openacc.async.nasyncqueue > 0)
460 1.6 mrg {
461 1.6 mrg goacc_aq_list next;
462 1.6 mrg for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
463 1.6 mrg {
464 1.6 mrg ret &= devicep->openacc.async.destruct_func (l->aq);
465 1.6 mrg next = l->next;
466 1.6 mrg free (l);
467 1.6 mrg }
468 1.6 mrg free (devicep->openacc.async.asyncqueue);
469 1.6 mrg devicep->openacc.async.nasyncqueue = 0;
470 1.6 mrg devicep->openacc.async.asyncqueue = NULL;
471 1.6 mrg devicep->openacc.async.active = NULL;
472 1.6 mrg }
473 1.6 mrg gomp_mutex_unlock (&devicep->openacc.async.lock);
474 1.6 mrg gomp_mutex_destroy (&devicep->openacc.async.lock);
475 1.6 mrg return ret;
476 1.1 mrg }
477