Home | History | Annotate | Line # | Download | only in libgomp
oacc-async.c revision 1.7
      1 /* OpenACC Runtime Library Definitions.
      2 
      3    Copyright (C) 2013-2022 Free Software Foundation, Inc.
      4 
      5    Contributed by Mentor Embedded.
      6 
      7    This file is part of the GNU Offloading and Multi Processing Library
      8    (libgomp).
      9 
     10    Libgomp is free software; you can redistribute it and/or modify it
     11    under the terms of the GNU General Public License as published by
     12    the Free Software Foundation; either version 3, or (at your option)
     13    any later version.
     14 
     15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     18    more details.
     19 
     20    Under Section 7 of GPL version 3, you are granted additional
     21    permissions described in the GCC Runtime Library Exception, version
     22    3.1, as published by the Free Software Foundation.
     23 
     24    You should have received a copy of the GNU General Public License and
     25    a copy of the GCC Runtime Library Exception along with this program;
     26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     27    <http://www.gnu.org/licenses/>.  */
     28 
     29 #include <assert.h>
     30 #include <string.h>
     31 #include "openacc.h"
     32 #include "libgomp.h"
     33 #include "oacc-int.h"
     34 
     35 static struct goacc_thread *
     36 get_goacc_thread (void)
     37 {
     38   struct goacc_thread *thr = goacc_thread ();
     39 
     40   if (!thr || !thr->dev)
     41     gomp_fatal ("no device active");
     42 
     43   return thr;
     44 }
     45 
     46 static int
     47 validate_async_val (int async)
     48 {
     49   if (!async_valid_p (async))
     50     gomp_fatal ("invalid async-argument: %d", async);
     51 
     52   if (async == acc_async_sync)
     53     return -1;
     54 
     55   if (async == acc_async_noval)
     56     return 0;
     57 
     58   if (async >= 0)
     59     /* TODO: we reserve 0 for acc_async_noval before we can clarify the
     60        semantics of "default_async".  */
     61     return 1 + async;
     62   else
     63     __builtin_unreachable ();
     64 }
     65 
     66 /* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
     67    might return NULL if no asyncqueue is to be used.  Otherwise, if CREATE,
     68    create the asyncqueue if it doesn't exist yet.
     69 
     70    Unless CREATE, this will not generate any OpenACC Profiling Interface
     71    events.  */
     72 
     73 attribute_hidden struct goacc_asyncqueue *
     74 lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
     75 {
     76   async = validate_async_val (async);
     77   if (async < 0)
     78     return NULL;
     79 
     80   struct goacc_asyncqueue *ret_aq = NULL;
     81   struct gomp_device_descr *dev = thr->dev;
     82 
     83   gomp_mutex_lock (&dev->openacc.async.lock);
     84 
     85   if (!create
     86       && (async >= dev->openacc.async.nasyncqueue
     87 	  || !dev->openacc.async.asyncqueue[async]))
     88     goto end;
     89 
     90   if (async >= dev->openacc.async.nasyncqueue)
     91     {
     92       int diff = async + 1 - dev->openacc.async.nasyncqueue;
     93       dev->openacc.async.asyncqueue
     94 	= gomp_realloc (dev->openacc.async.asyncqueue,
     95 			sizeof (goacc_aq) * (async + 1));
     96       memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
     97 	      0, sizeof (goacc_aq) * diff);
     98       dev->openacc.async.nasyncqueue = async + 1;
     99     }
    100 
    101   if (!dev->openacc.async.asyncqueue[async])
    102     {
    103       dev->openacc.async.asyncqueue[async]
    104 	= dev->openacc.async.construct_func (dev->target_id);
    105 
    106       if (!dev->openacc.async.asyncqueue[async])
    107 	{
    108 	  gomp_mutex_unlock (&dev->openacc.async.lock);
    109 	  gomp_fatal ("async %d creation failed", async);
    110 	}
    111 
    112       /* Link new async queue into active list.  */
    113       goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
    114       n->aq = dev->openacc.async.asyncqueue[async];
    115       n->next = dev->openacc.async.active;
    116       dev->openacc.async.active = n;
    117     }
    118 
    119   ret_aq = dev->openacc.async.asyncqueue[async];
    120 
    121  end:
    122   gomp_mutex_unlock (&dev->openacc.async.lock);
    123   return ret_aq;
    124 }
    125 
    126 /* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
    127    might return NULL if no asyncqueue is to be used.  Otherwise, create the
    128    asyncqueue if it doesn't exist yet.  */
    129 
    130 attribute_hidden struct goacc_asyncqueue *
    131 get_goacc_asyncqueue (int async)
    132 {
    133   struct goacc_thread *thr = get_goacc_thread ();
    134   return lookup_goacc_asyncqueue (thr, true, async);
    135 }
    136 
    137 int
    138 acc_async_test (int async)
    139 {
    140   struct goacc_thread *thr = goacc_thread ();
    141 
    142   if (!thr || !thr->dev)
    143     gomp_fatal ("no device active");
    144 
    145   goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
    146   if (!aq)
    147     return 1;
    148 
    149   acc_prof_info prof_info;
    150   acc_api_info api_info;
    151   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
    152   if (profiling_p)
    153     {
    154       prof_info.async = async;
    155       prof_info.async_queue = prof_info.async;
    156     }
    157 
    158   int res = thr->dev->openacc.async.test_func (aq);
    159 
    160   if (profiling_p)
    161     {
    162       thr->prof_info = NULL;
    163       thr->api_info = NULL;
    164     }
    165 
    166   return res;
    167 }
    168 
    169 int
    170 acc_async_test_all (void)
    171 {
    172   struct goacc_thread *thr = get_goacc_thread ();
    173 
    174   acc_prof_info prof_info;
    175   acc_api_info api_info;
    176   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
    177 
    178   int ret = 1;
    179   gomp_mutex_lock (&thr->dev->openacc.async.lock);
    180   for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
    181     if (!thr->dev->openacc.async.test_func (l->aq))
    182       {
    183 	ret = 0;
    184 	break;
    185       }
    186   gomp_mutex_unlock (&thr->dev->openacc.async.lock);
    187 
    188   if (profiling_p)
    189     {
    190       thr->prof_info = NULL;
    191       thr->api_info = NULL;
    192     }
    193 
    194   return ret;
    195 }
    196 
    197 void
    198 acc_wait (int async)
    199 {
    200   struct goacc_thread *thr = get_goacc_thread ();
    201 
    202   goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
    203   if (!aq)
    204     return;
    205 
    206   acc_prof_info prof_info;
    207   acc_api_info api_info;
    208   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
    209   if (profiling_p)
    210     {
    211       prof_info.async = async;
    212       prof_info.async_queue = prof_info.async;
    213     }
    214 
    215   if (!thr->dev->openacc.async.synchronize_func (aq))
    216     gomp_fatal ("wait on %d failed", async);
    217 
    218   if (profiling_p)
    219     {
    220       thr->prof_info = NULL;
    221       thr->api_info = NULL;
    222     }
    223 }
    224 
    225 /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait.  */
    226 #ifdef HAVE_ATTRIBUTE_ALIAS
    227 strong_alias (acc_wait, acc_async_wait)
    228 #else
    229 void
    230 acc_async_wait (int async)
    231 {
    232   acc_wait (async);
    233 }
    234 #endif
    235 
    236 void
    237 acc_wait_async (int async1, int async2)
    238 {
    239   struct goacc_thread *thr = get_goacc_thread ();
    240 
    241   goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
    242   /* TODO: Is this also correct for acc_async_sync, assuming that in this case,
    243      we'll always be synchronous anyways?  */
    244   if (!aq1)
    245     return;
    246 
    247   acc_prof_info prof_info;
    248   acc_api_info api_info;
    249   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
    250   if (profiling_p)
    251     {
    252       prof_info.async = async2;
    253       prof_info.async_queue = prof_info.async;
    254     }
    255 
    256   goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
    257   /* An async queue is always synchronized with itself.  */
    258   if (aq1 == aq2)
    259     goto out_prof;
    260 
    261   if (aq2)
    262     {
    263       if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
    264 	gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
    265     }
    266   else
    267     {
    268       /* TODO: Local thread synchronization.
    269 	 Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
    270       if (!thr->dev->openacc.async.synchronize_func (aq1))
    271 	gomp_fatal ("wait on %d failed", async1);
    272     }
    273 
    274  out_prof:
    275   if (profiling_p)
    276     {
    277       thr->prof_info = NULL;
    278       thr->api_info = NULL;
    279     }
    280 }
    281 
    282 void
    283 acc_wait_all (void)
    284 {
    285   struct goacc_thread *thr = goacc_thread ();
    286 
    287   acc_prof_info prof_info;
    288   acc_api_info api_info;
    289   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
    290 
    291   bool ret = true;
    292   gomp_mutex_lock (&thr->dev->openacc.async.lock);
    293   for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
    294     ret &= thr->dev->openacc.async.synchronize_func (l->aq);
    295   gomp_mutex_unlock (&thr->dev->openacc.async.lock);
    296 
    297   if (profiling_p)
    298     {
    299       thr->prof_info = NULL;
    300       thr->api_info = NULL;
    301     }
    302 
    303   if (!ret)
    304     gomp_fatal ("wait all failed");
    305 }
    306 
    307 /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all.  */
    308 #ifdef HAVE_ATTRIBUTE_ALIAS
    309 strong_alias (acc_wait_all, acc_async_wait_all)
    310 #else
    311 void
    312 acc_async_wait_all (void)
    313 {
    314   acc_wait_all ();
    315 }
    316 #endif
    317 
    318 void
    319 acc_wait_all_async (int async)
    320 {
    321   struct goacc_thread *thr = get_goacc_thread ();
    322 
    323   acc_prof_info prof_info;
    324   acc_api_info api_info;
    325   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
    326   if (profiling_p)
    327     {
    328       prof_info.async = async;
    329       prof_info.async_queue = prof_info.async;
    330     }
    331 
    332   goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
    333 
    334   bool ret = true;
    335   gomp_mutex_lock (&thr->dev->openacc.async.lock);
    336   for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
    337     {
    338       if (waiting_queue)
    339 	ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
    340       else
    341 	/* TODO: Local thread synchronization.
    342 	   Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
    343 	ret &= thr->dev->openacc.async.synchronize_func (l->aq);
    344     }
    345   gomp_mutex_unlock (&thr->dev->openacc.async.lock);
    346 
    347   if (profiling_p)
    348     {
    349       thr->prof_info = NULL;
    350       thr->api_info = NULL;
    351     }
    352 
    353   if (!ret)
    354     gomp_fatal ("wait all async(%d) failed", async);
    355 }
    356 
    357 void
    358 GOACC_wait (int async, int num_waits, ...)
    359 {
    360   goacc_lazy_initialize ();
    361 
    362   struct goacc_thread *thr = goacc_thread ();
    363 
    364   /* No nesting.  */
    365   assert (thr->prof_info == NULL);
    366   assert (thr->api_info == NULL);
    367   acc_prof_info prof_info;
    368   acc_api_info api_info;
    369   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
    370   if (profiling_p)
    371     {
    372       prof_info.async = async;
    373       prof_info.async_queue = prof_info.async;
    374     }
    375 
    376   if (num_waits)
    377     {
    378       va_list ap;
    379 
    380       va_start (ap, num_waits);
    381       goacc_wait (async, num_waits, &ap);
    382       va_end (ap);
    383     }
    384   else if (async == acc_async_sync)
    385     acc_wait_all ();
    386   else
    387     acc_wait_all_async (async);
    388 
    389   if (profiling_p)
    390     {
    391       thr->prof_info = NULL;
    392       thr->api_info = NULL;
    393     }
    394 }
    395 
    396 attribute_hidden void
    397 goacc_wait (int async, int num_waits, va_list *ap)
    398 {
    399   while (num_waits--)
    400     {
    401       int qid = va_arg (*ap, int);
    402 
    403       /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'.  */
    404       if (qid == acc_async_noval)
    405 	{
    406 	  if (async == acc_async_sync)
    407 	    acc_wait_all ();
    408 	  else
    409 	    acc_wait_all_async (async);
    410 	  break;
    411 	}
    412 
    413       if (acc_async_test (qid))
    414 	continue;
    415 
    416       if (async == acc_async_sync)
    417 	acc_wait (qid);
    418       else if (qid == async)
    419 	/* If we're waiting on the same asynchronous queue as we're
    420 	   launching on, the queue itself will order work as
    421 	   required, so there's no need to wait explicitly.  */
    422 	;
    423       else
    424 	acc_wait_async (qid, async);
    425     }
    426 }
    427 
    428 attribute_hidden void
    429 goacc_async_free (struct gomp_device_descr *devicep,
    430 		  struct goacc_asyncqueue *aq, void *ptr)
    431 {
    432   if (!aq)
    433     free (ptr);
    434   else
    435     devicep->openacc.async.queue_callback_func (aq, free, ptr);
    436 }
    437 
    438 /* This function initializes the asyncqueues for the device specified by
    439    DEVICEP.  TODO DEVICEP must be locked on entry, and remains locked on
    440    return.  */
    441 
    442 attribute_hidden void
    443 goacc_init_asyncqueues (struct gomp_device_descr *devicep)
    444 {
    445   devicep->openacc.async.nasyncqueue = 0;
    446   devicep->openacc.async.asyncqueue = NULL;
    447   devicep->openacc.async.active = NULL;
    448   gomp_mutex_init (&devicep->openacc.async.lock);
    449 }
    450 
    451 /* This function finalizes the asyncqueues for the device specified by DEVICEP.
    452    TODO DEVICEP must be locked on entry, and remains locked on return.  */
    453 
    454 attribute_hidden bool
    455 goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
    456 {
    457   bool ret = true;
    458   gomp_mutex_lock (&devicep->openacc.async.lock);
    459   if (devicep->openacc.async.nasyncqueue > 0)
    460     {
    461       goacc_aq_list next;
    462       for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
    463 	{
    464 	  ret &= devicep->openacc.async.destruct_func (l->aq);
    465 	  next = l->next;
    466 	  free (l);
    467 	}
    468       free (devicep->openacc.async.asyncqueue);
    469       devicep->openacc.async.nasyncqueue = 0;
    470       devicep->openacc.async.asyncqueue = NULL;
    471       devicep->openacc.async.active = NULL;
    472     }
    473   gomp_mutex_unlock (&devicep->openacc.async.lock);
    474   gomp_mutex_destroy (&devicep->openacc.async.lock);
    475   return ret;
    476 }
    477