1 1.1 mrg /* OpenACC Runtime Library Definitions. 2 1.1 mrg 3 1.7 mrg Copyright (C) 2013-2022 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg Contributed by Mentor Embedded. 6 1.1 mrg 7 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library 8 1.1 mrg (libgomp). 9 1.1 mrg 10 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it 11 1.1 mrg under the terms of the GNU General Public License as published by 12 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 13 1.1 mrg any later version. 14 1.1 mrg 15 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 16 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 18 1.1 mrg more details. 19 1.1 mrg 20 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 21 1.1 mrg permissions described in the GCC Runtime Library Exception, version 22 1.1 mrg 3.1, as published by the Free Software Foundation. 23 1.1 mrg 24 1.1 mrg You should have received a copy of the GNU General Public License and 25 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 26 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 27 1.1 mrg <http://www.gnu.org/licenses/>. */ 28 1.1 mrg 29 1.1 mrg #include <assert.h> 30 1.6 mrg #include <string.h> 31 1.1 mrg #include "openacc.h" 32 1.1 mrg #include "libgomp.h" 33 1.1 mrg #include "oacc-int.h" 34 1.1 mrg 35 1.6 mrg static struct goacc_thread * 36 1.6 mrg get_goacc_thread (void) 37 1.6 mrg { 38 1.6 mrg struct goacc_thread *thr = goacc_thread (); 39 1.6 mrg 40 1.6 mrg if (!thr || !thr->dev) 41 1.6 mrg gomp_fatal ("no device active"); 42 1.6 mrg 43 1.6 mrg return thr; 44 1.6 mrg } 45 1.6 mrg 46 1.6 mrg static int 47 1.6 mrg validate_async_val (int async) 48 1.6 mrg { 49 1.6 mrg if (!async_valid_p (async)) 50 1.6 mrg gomp_fatal ("invalid async-argument: %d", async); 51 1.6 mrg 52 1.6 mrg if (async == acc_async_sync) 53 1.6 mrg return -1; 54 1.6 mrg 55 1.6 mrg if (async == acc_async_noval) 56 1.6 mrg return 0; 57 1.6 mrg 58 1.6 mrg if (async >= 0) 59 1.6 mrg /* TODO: we reserve 0 for acc_async_noval before we can clarify the 60 1.6 mrg semantics of "default_async". */ 61 1.6 mrg return 1 + async; 62 1.6 mrg else 63 1.6 mrg __builtin_unreachable (); 64 1.6 mrg } 65 1.6 mrg 66 1.6 mrg /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This 67 1.6 mrg might return NULL if no asyncqueue is to be used. Otherwise, if CREATE, 68 1.6 mrg create the asyncqueue if it doesn't exist yet. 69 1.6 mrg 70 1.6 mrg Unless CREATE, this will not generate any OpenACC Profiling Interface 71 1.6 mrg events. */ 72 1.6 mrg 73 1.6 mrg attribute_hidden struct goacc_asyncqueue * 74 1.6 mrg lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async) 75 1.6 mrg { 76 1.6 mrg async = validate_async_val (async); 77 1.6 mrg if (async < 0) 78 1.6 mrg return NULL; 79 1.6 mrg 80 1.6 mrg struct goacc_asyncqueue *ret_aq = NULL; 81 1.6 mrg struct gomp_device_descr *dev = thr->dev; 82 1.6 mrg 83 1.6 mrg gomp_mutex_lock (&dev->openacc.async.lock); 84 1.6 mrg 85 1.6 mrg if (!create 86 1.6 mrg && (async >= dev->openacc.async.nasyncqueue 87 1.6 mrg || !dev->openacc.async.asyncqueue[async])) 88 1.6 mrg goto end; 89 1.6 mrg 90 1.6 mrg if (async >= dev->openacc.async.nasyncqueue) 91 1.6 mrg { 92 1.6 mrg int diff = async + 1 - dev->openacc.async.nasyncqueue; 93 1.6 mrg dev->openacc.async.asyncqueue 94 1.6 mrg = gomp_realloc (dev->openacc.async.asyncqueue, 95 1.6 mrg sizeof (goacc_aq) * (async + 1)); 96 1.6 mrg memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue, 97 1.6 mrg 0, sizeof (goacc_aq) * diff); 98 1.6 mrg dev->openacc.async.nasyncqueue = async + 1; 99 1.6 mrg } 100 1.6 mrg 101 1.6 mrg if (!dev->openacc.async.asyncqueue[async]) 102 1.6 mrg { 103 1.6 mrg dev->openacc.async.asyncqueue[async] 104 1.6 mrg = dev->openacc.async.construct_func (dev->target_id); 105 1.6 mrg 106 1.6 mrg if (!dev->openacc.async.asyncqueue[async]) 107 1.6 mrg { 108 1.6 mrg gomp_mutex_unlock (&dev->openacc.async.lock); 109 1.6 mrg gomp_fatal ("async %d creation failed", async); 110 1.6 mrg } 111 1.6 mrg 112 1.6 mrg /* Link new async queue into active list. */ 113 1.6 mrg goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list)); 114 1.6 mrg n->aq = dev->openacc.async.asyncqueue[async]; 115 1.6 mrg n->next = dev->openacc.async.active; 116 1.6 mrg dev->openacc.async.active = n; 117 1.6 mrg } 118 1.6 mrg 119 1.6 mrg ret_aq = dev->openacc.async.asyncqueue[async]; 120 1.6 mrg 121 1.6 mrg end: 122 1.6 mrg gomp_mutex_unlock (&dev->openacc.async.lock); 123 1.6 mrg return ret_aq; 124 1.6 mrg } 125 1.6 mrg 126 1.6 mrg /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This 127 1.6 mrg might return NULL if no asyncqueue is to be used. Otherwise, create the 128 1.6 mrg asyncqueue if it doesn't exist yet. */ 129 1.6 mrg 130 1.6 mrg attribute_hidden struct goacc_asyncqueue * 131 1.6 mrg get_goacc_asyncqueue (int async) 132 1.6 mrg { 133 1.6 mrg struct goacc_thread *thr = get_goacc_thread (); 134 1.6 mrg return lookup_goacc_asyncqueue (thr, true, async); 135 1.6 mrg } 136 1.6 mrg 137 1.1 mrg int 138 1.1 mrg acc_async_test (int async) 139 1.1 mrg { 140 1.1 mrg struct goacc_thread *thr = goacc_thread (); 141 1.1 mrg 142 1.1 mrg if (!thr || !thr->dev) 143 1.1 mrg gomp_fatal ("no device active"); 144 1.1 mrg 145 1.6 mrg goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); 146 1.6 mrg if (!aq) 147 1.6 mrg return 1; 148 1.6 mrg 149 1.6 mrg acc_prof_info prof_info; 150 1.6 mrg acc_api_info api_info; 151 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); 152 1.6 mrg if (profiling_p) 153 1.6 mrg { 154 1.6 mrg prof_info.async = async; 155 1.6 mrg prof_info.async_queue = prof_info.async; 156 1.6 mrg } 157 1.6 mrg 158 1.6 mrg int res = thr->dev->openacc.async.test_func (aq); 159 1.6 mrg 160 1.6 mrg if (profiling_p) 161 1.6 mrg { 162 1.6 mrg thr->prof_info = NULL; 163 1.6 mrg thr->api_info = NULL; 164 1.6 mrg } 165 1.6 mrg 166 1.6 mrg return res; 167 1.1 mrg } 168 1.1 mrg 169 1.1 mrg int 170 1.1 mrg acc_async_test_all (void) 171 1.1 mrg { 172 1.6 mrg struct goacc_thread *thr = get_goacc_thread (); 173 1.1 mrg 174 1.6 mrg acc_prof_info prof_info; 175 1.6 mrg acc_api_info api_info; 176 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); 177 1.6 mrg 178 1.6 mrg int ret = 1; 179 1.6 mrg gomp_mutex_lock (&thr->dev->openacc.async.lock); 180 1.6 mrg for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) 181 1.6 mrg if (!thr->dev->openacc.async.test_func (l->aq)) 182 1.6 mrg { 183 1.6 mrg ret = 0; 184 1.6 mrg break; 185 1.6 mrg } 186 1.6 mrg gomp_mutex_unlock (&thr->dev->openacc.async.lock); 187 1.6 mrg 188 1.6 mrg if (profiling_p) 189 1.6 mrg { 190 1.6 mrg thr->prof_info = NULL; 191 1.6 mrg thr->api_info = NULL; 192 1.6 mrg } 193 1.1 mrg 194 1.6 mrg return ret; 195 1.1 mrg } 196 1.1 mrg 197 1.1 mrg void 198 1.1 mrg acc_wait (int async) 199 1.1 mrg { 200 1.6 mrg struct goacc_thread *thr = get_goacc_thread (); 201 1.1 mrg 202 1.6 mrg goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); 203 1.6 mrg if (!aq) 204 1.6 mrg return; 205 1.6 mrg 206 1.6 mrg acc_prof_info prof_info; 207 1.6 mrg acc_api_info api_info; 208 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); 209 1.6 mrg if (profiling_p) 210 1.6 mrg { 211 1.6 mrg prof_info.async = async; 212 1.6 mrg prof_info.async_queue = prof_info.async; 213 1.6 mrg } 214 1.6 mrg 215 1.6 mrg if (!thr->dev->openacc.async.synchronize_func (aq)) 216 1.6 mrg gomp_fatal ("wait on %d failed", async); 217 1.6 mrg 218 1.6 mrg if (profiling_p) 219 1.6 mrg { 220 1.6 mrg thr->prof_info = NULL; 221 1.6 mrg thr->api_info = NULL; 222 1.6 mrg } 223 1.1 mrg } 224 1.1 mrg 225 1.4 mrg /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */ 226 1.4 mrg #ifdef HAVE_ATTRIBUTE_ALIAS 227 1.4 mrg strong_alias (acc_wait, acc_async_wait) 228 1.4 mrg #else 229 1.4 mrg void 230 1.4 mrg acc_async_wait (int async) 231 1.4 mrg { 232 1.4 mrg acc_wait (async); 233 1.4 mrg } 234 1.4 mrg #endif 235 1.4 mrg 236 1.1 mrg void 237 1.1 mrg acc_wait_async (int async1, int async2) 238 1.1 mrg { 239 1.6 mrg struct goacc_thread *thr = get_goacc_thread (); 240 1.1 mrg 241 1.6 mrg goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1); 242 1.6 mrg /* TODO: Is this also correct for acc_async_sync, assuming that in this case, 243 1.6 mrg we'll always be synchronous anyways? */ 244 1.6 mrg if (!aq1) 245 1.6 mrg return; 246 1.6 mrg 247 1.6 mrg acc_prof_info prof_info; 248 1.6 mrg acc_api_info api_info; 249 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); 250 1.6 mrg if (profiling_p) 251 1.6 mrg { 252 1.6 mrg prof_info.async = async2; 253 1.6 mrg prof_info.async_queue = prof_info.async; 254 1.6 mrg } 255 1.6 mrg 256 1.6 mrg goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2); 257 1.6 mrg /* An async queue is always synchronized with itself. */ 258 1.6 mrg if (aq1 == aq2) 259 1.6 mrg goto out_prof; 260 1.6 mrg 261 1.6 mrg if (aq2) 262 1.6 mrg { 263 1.6 mrg if (!thr->dev->openacc.async.serialize_func (aq1, aq2)) 264 1.6 mrg gomp_fatal ("ordering of async ids %d and %d failed", async1, async2); 265 1.6 mrg } 266 1.6 mrg else 267 1.6 mrg { 268 1.6 mrg /* TODO: Local thread synchronization. 269 1.6 mrg Necessary for the "async2 == acc_async_sync" case, or can just skip? */ 270 1.6 mrg if (!thr->dev->openacc.async.synchronize_func (aq1)) 271 1.6 mrg gomp_fatal ("wait on %d failed", async1); 272 1.6 mrg } 273 1.6 mrg 274 1.6 mrg out_prof: 275 1.6 mrg if (profiling_p) 276 1.6 mrg { 277 1.6 mrg thr->prof_info = NULL; 278 1.6 mrg thr->api_info = NULL; 279 1.6 mrg } 280 1.1 mrg } 281 1.1 mrg 282 1.1 mrg void 283 1.1 mrg acc_wait_all (void) 284 1.1 mrg { 285 1.1 mrg struct goacc_thread *thr = goacc_thread (); 286 1.1 mrg 287 1.6 mrg acc_prof_info prof_info; 288 1.6 mrg acc_api_info api_info; 289 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); 290 1.6 mrg 291 1.6 mrg bool ret = true; 292 1.6 mrg gomp_mutex_lock (&thr->dev->openacc.async.lock); 293 1.6 mrg for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) 294 1.6 mrg ret &= thr->dev->openacc.async.synchronize_func (l->aq); 295 1.6 mrg gomp_mutex_unlock (&thr->dev->openacc.async.lock); 296 1.6 mrg 297 1.6 mrg if (profiling_p) 298 1.6 mrg { 299 1.6 mrg thr->prof_info = NULL; 300 1.6 mrg thr->api_info = NULL; 301 1.6 mrg } 302 1.1 mrg 303 1.6 mrg if (!ret) 304 1.6 mrg gomp_fatal ("wait all failed"); 305 1.1 mrg } 306 1.1 mrg 307 1.4 mrg /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */ 308 1.4 mrg #ifdef HAVE_ATTRIBUTE_ALIAS 309 1.4 mrg strong_alias (acc_wait_all, acc_async_wait_all) 310 1.4 mrg #else 311 1.4 mrg void 312 1.4 mrg acc_async_wait_all (void) 313 1.4 mrg { 314 1.4 mrg acc_wait_all (); 315 1.4 mrg } 316 1.4 mrg #endif 317 1.4 mrg 318 1.1 mrg void 319 1.1 mrg acc_wait_all_async (int async) 320 1.1 mrg { 321 1.6 mrg struct goacc_thread *thr = get_goacc_thread (); 322 1.6 mrg 323 1.6 mrg acc_prof_info prof_info; 324 1.6 mrg acc_api_info api_info; 325 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); 326 1.6 mrg if (profiling_p) 327 1.6 mrg { 328 1.6 mrg prof_info.async = async; 329 1.6 mrg prof_info.async_queue = prof_info.async; 330 1.6 mrg } 331 1.6 mrg 332 1.6 mrg goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async); 333 1.6 mrg 334 1.6 mrg bool ret = true; 335 1.6 mrg gomp_mutex_lock (&thr->dev->openacc.async.lock); 336 1.6 mrg for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) 337 1.6 mrg { 338 1.6 mrg if (waiting_queue) 339 1.6 mrg ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue); 340 1.6 mrg else 341 1.6 mrg /* TODO: Local thread synchronization. 342 1.6 mrg Necessary for the "async2 == acc_async_sync" case, or can just skip? */ 343 1.6 mrg ret &= thr->dev->openacc.async.synchronize_func (l->aq); 344 1.6 mrg } 345 1.6 mrg gomp_mutex_unlock (&thr->dev->openacc.async.lock); 346 1.6 mrg 347 1.6 mrg if (profiling_p) 348 1.6 mrg { 349 1.6 mrg thr->prof_info = NULL; 350 1.6 mrg thr->api_info = NULL; 351 1.6 mrg } 352 1.6 mrg 353 1.6 mrg if (!ret) 354 1.6 mrg gomp_fatal ("wait all async(%d) failed", async); 355 1.6 mrg } 356 1.6 mrg 357 1.6 mrg void 358 1.6 mrg GOACC_wait (int async, int num_waits, ...) 359 1.6 mrg { 360 1.6 mrg goacc_lazy_initialize (); 361 1.1 mrg 362 1.1 mrg struct goacc_thread *thr = goacc_thread (); 363 1.1 mrg 364 1.6 mrg /* No nesting. */ 365 1.6 mrg assert (thr->prof_info == NULL); 366 1.6 mrg assert (thr->api_info == NULL); 367 1.6 mrg acc_prof_info prof_info; 368 1.6 mrg acc_api_info api_info; 369 1.6 mrg bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); 370 1.6 mrg if (profiling_p) 371 1.6 mrg { 372 1.6 mrg prof_info.async = async; 373 1.6 mrg prof_info.async_queue = prof_info.async; 374 1.6 mrg } 375 1.6 mrg 376 1.6 mrg if (num_waits) 377 1.6 mrg { 378 1.6 mrg va_list ap; 379 1.6 mrg 380 1.6 mrg va_start (ap, num_waits); 381 1.6 mrg goacc_wait (async, num_waits, &ap); 382 1.6 mrg va_end (ap); 383 1.6 mrg } 384 1.6 mrg else if (async == acc_async_sync) 385 1.6 mrg acc_wait_all (); 386 1.6 mrg else 387 1.6 mrg acc_wait_all_async (async); 388 1.6 mrg 389 1.6 mrg if (profiling_p) 390 1.6 mrg { 391 1.6 mrg thr->prof_info = NULL; 392 1.6 mrg thr->api_info = NULL; 393 1.6 mrg } 394 1.6 mrg } 395 1.6 mrg 396 1.6 mrg attribute_hidden void 397 1.6 mrg goacc_wait (int async, int num_waits, va_list *ap) 398 1.6 mrg { 399 1.6 mrg while (num_waits--) 400 1.6 mrg { 401 1.6 mrg int qid = va_arg (*ap, int); 402 1.6 mrg 403 1.6 mrg /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */ 404 1.6 mrg if (qid == acc_async_noval) 405 1.6 mrg { 406 1.6 mrg if (async == acc_async_sync) 407 1.6 mrg acc_wait_all (); 408 1.6 mrg else 409 1.6 mrg acc_wait_all_async (async); 410 1.6 mrg break; 411 1.6 mrg } 412 1.6 mrg 413 1.6 mrg if (acc_async_test (qid)) 414 1.6 mrg continue; 415 1.6 mrg 416 1.6 mrg if (async == acc_async_sync) 417 1.6 mrg acc_wait (qid); 418 1.6 mrg else if (qid == async) 419 1.6 mrg /* If we're waiting on the same asynchronous queue as we're 420 1.6 mrg launching on, the queue itself will order work as 421 1.6 mrg required, so there's no need to wait explicitly. */ 422 1.6 mrg ; 423 1.6 mrg else 424 1.6 mrg acc_wait_async (qid, async); 425 1.6 mrg } 426 1.6 mrg } 427 1.6 mrg 428 1.6 mrg attribute_hidden void 429 1.6 mrg goacc_async_free (struct gomp_device_descr *devicep, 430 1.6 mrg struct goacc_asyncqueue *aq, void *ptr) 431 1.6 mrg { 432 1.6 mrg if (!aq) 433 1.6 mrg free (ptr); 434 1.6 mrg else 435 1.6 mrg devicep->openacc.async.queue_callback_func (aq, free, ptr); 436 1.6 mrg } 437 1.6 mrg 438 1.6 mrg /* This function initializes the asyncqueues for the device specified by 439 1.6 mrg DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on 440 1.6 mrg return. */ 441 1.6 mrg 442 1.6 mrg attribute_hidden void 443 1.6 mrg goacc_init_asyncqueues (struct gomp_device_descr *devicep) 444 1.6 mrg { 445 1.6 mrg devicep->openacc.async.nasyncqueue = 0; 446 1.6 mrg devicep->openacc.async.asyncqueue = NULL; 447 1.6 mrg devicep->openacc.async.active = NULL; 448 1.6 mrg gomp_mutex_init (&devicep->openacc.async.lock); 449 1.6 mrg } 450 1.6 mrg 451 1.6 mrg /* This function finalizes the asyncqueues for the device specified by DEVICEP. 452 1.6 mrg TODO DEVICEP must be locked on entry, and remains locked on return. */ 453 1.1 mrg 454 1.6 mrg attribute_hidden bool 455 1.6 mrg goacc_fini_asyncqueues (struct gomp_device_descr *devicep) 456 1.6 mrg { 457 1.6 mrg bool ret = true; 458 1.6 mrg gomp_mutex_lock (&devicep->openacc.async.lock); 459 1.6 mrg if (devicep->openacc.async.nasyncqueue > 0) 460 1.6 mrg { 461 1.6 mrg goacc_aq_list next; 462 1.6 mrg for (goacc_aq_list l = devicep->openacc.async.active; l; l = next) 463 1.6 mrg { 464 1.6 mrg ret &= devicep->openacc.async.destruct_func (l->aq); 465 1.6 mrg next = l->next; 466 1.6 mrg free (l); 467 1.6 mrg } 468 1.6 mrg free (devicep->openacc.async.asyncqueue); 469 1.6 mrg devicep->openacc.async.nasyncqueue = 0; 470 1.6 mrg devicep->openacc.async.asyncqueue = NULL; 471 1.6 mrg devicep->openacc.async.active = NULL; 472 1.6 mrg } 473 1.6 mrg gomp_mutex_unlock (&devicep->openacc.async.lock); 474 1.6 mrg gomp_mutex_destroy (&devicep->openacc.async.lock); 475 1.6 mrg return ret; 476 1.1 mrg } 477