Home | History | Annotate | Line # | Download | only in libgomp
oacc-init.c revision 1.1.1.1
      1 /* OpenACC Runtime initialization routines
      2 
      3    Copyright (C) 2013-2015 Free Software Foundation, Inc.
      4 
      5    Contributed by Mentor Embedded.
      6 
      7    This file is part of the GNU Offloading and Multi Processing Library
      8    (libgomp).
      9 
     10    Libgomp is free software; you can redistribute it and/or modify it
     11    under the terms of the GNU General Public License as published by
     12    the Free Software Foundation; either version 3, or (at your option)
     13    any later version.
     14 
     15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     18    more details.
     19 
     20    Under Section 7 of GPL version 3, you are granted additional
     21    permissions described in the GCC Runtime Library Exception, version
     22    3.1, as published by the Free Software Foundation.
     23 
     24    You should have received a copy of the GNU General Public License and
     25    a copy of the GCC Runtime Library Exception along with this program;
     26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     27    <http://www.gnu.org/licenses/>.  */
     28 
     29 #include "libgomp.h"
     30 #include "oacc-int.h"
     31 #include "openacc.h"
     32 #include "plugin/plugin-host.h"
     33 #include <assert.h>
     34 #include <stdlib.h>
     35 #include <strings.h>
     36 #include <stdbool.h>
     37 #include <string.h>
     38 
     39 static gomp_mutex_t acc_device_lock;
     40 
     41 /* A cached version of the dispatcher for the global "current" accelerator type,
     42    e.g. used as the default when creating new host threads.  This is the
     43    device-type equivalent of goacc_device_num (which specifies which device to
     44    use out of potentially several of the same type).  If there are several
     45    devices of a given type, this points at the first one.  */
     46 
     47 static struct gomp_device_descr *cached_base_dev = NULL;
     48 
     49 #if defined HAVE_TLS || defined USE_EMUTLS
     50 __thread struct goacc_thread *goacc_tls_data;
     51 #else
     52 pthread_key_t goacc_tls_key;
     53 #endif
     54 static pthread_key_t goacc_cleanup_key;
     55 
     56 static struct goacc_thread *goacc_threads;
     57 static gomp_mutex_t goacc_thread_lock;
     58 
     59 /* An array of dispatchers for device types, indexed by the type.  This array
     60    only references "base" devices, and other instances of the same type are
     61    found by simply indexing from each such device (which are stored linearly,
     62    grouped by device in target.c:devices).  */
     63 static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
     64 
     65 attribute_hidden void
     66 goacc_register (struct gomp_device_descr *disp)
     67 {
     68   /* Only register the 0th device here.  */
     69   if (disp->target_id != 0)
     70     return;
     71 
     72   gomp_mutex_lock (&acc_device_lock);
     73 
     74   assert (acc_device_type (disp->type) != acc_device_none
     75 	  && acc_device_type (disp->type) != acc_device_default
     76 	  && acc_device_type (disp->type) != acc_device_not_host);
     77   assert (!dispatchers[disp->type]);
     78   dispatchers[disp->type] = disp;
     79 
     80   gomp_mutex_unlock (&acc_device_lock);
     81 }
     82 
     83 /* OpenACC names some things a little differently.  */
     84 
     85 static const char *
     86 get_openacc_name (const char *name)
     87 {
     88   if (strcmp (name, "nvptx") == 0)
     89     return "nvidia";
     90   else
     91     return name;
     92 }
     93 
     94 static const char *
     95 name_of_acc_device_t (enum acc_device_t type)
     96 {
     97   switch (type)
     98     {
     99     case acc_device_none: return "none";
    100     case acc_device_default: return "default";
    101     case acc_device_host: return "host";
    102     case acc_device_host_nonshm: return "host_nonshm";
    103     case acc_device_not_host: return "not_host";
    104     case acc_device_nvidia: return "nvidia";
    105     default: gomp_fatal ("unknown device type %u", (unsigned) type);
    106     }
    107 }
    108 
    109 static struct gomp_device_descr *
    110 resolve_device (acc_device_t d)
    111 {
    112   acc_device_t d_arg = d;
    113 
    114   switch (d)
    115     {
    116     case acc_device_default:
    117       {
    118 	if (goacc_device_type)
    119 	  {
    120 	    /* Lookup the named device.  */
    121 	    while (++d != _ACC_device_hwm)
    122 	      if (dispatchers[d]
    123 		  && !strcasecmp (goacc_device_type,
    124 				  get_openacc_name (dispatchers[d]->name))
    125 		  && dispatchers[d]->get_num_devices_func () > 0)
    126 		goto found;
    127 
    128 	    gomp_fatal ("device type %s not supported", goacc_device_type);
    129 	  }
    130 
    131 	/* No default device specified, so start scanning for any non-host
    132 	   device that is available.  */
    133 	d = acc_device_not_host;
    134       }
    135       /* FALLTHROUGH */
    136 
    137     case acc_device_not_host:
    138       /* Find the first available device after acc_device_not_host.  */
    139       while (++d != _ACC_device_hwm)
    140 	if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
    141 	  goto found;
    142       if (d_arg == acc_device_default)
    143 	{
    144 	  d = acc_device_host;
    145 	  goto found;
    146 	}
    147       gomp_fatal ("no device found");
    148       break;
    149 
    150     case acc_device_host:
    151       break;
    152 
    153     default:
    154       if (d > _ACC_device_hwm)
    155 	gomp_fatal ("device %u out of range", (unsigned)d);
    156       break;
    157     }
    158  found:
    159 
    160   assert (d != acc_device_none
    161 	  && d != acc_device_default
    162 	  && d != acc_device_not_host);
    163 
    164   return dispatchers[d];
    165 }
    166 
    167 /* This is called when plugins have been initialized, and serves to call
    168    (indirectly) the target's device_init hook.  Calling multiple times without
    169    an intervening acc_shutdown_1 call is an error.  */
    170 
    171 static struct gomp_device_descr *
    172 acc_init_1 (acc_device_t d)
    173 {
    174   struct gomp_device_descr *base_dev, *acc_dev;
    175   int ndevs;
    176 
    177   base_dev = resolve_device (d);
    178 
    179   ndevs = base_dev->get_num_devices_func ();
    180 
    181   if (!base_dev || ndevs <= 0 || goacc_device_num >= ndevs)
    182     gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
    183 
    184   acc_dev = &base_dev[goacc_device_num];
    185 
    186   if (acc_dev->is_initialized)
    187     gomp_fatal ("device already active");
    188 
    189   gomp_init_device (acc_dev);
    190 
    191   return base_dev;
    192 }
    193 
    194 static void
    195 acc_shutdown_1 (acc_device_t d)
    196 {
    197   struct gomp_device_descr *base_dev;
    198   struct goacc_thread *walk;
    199   int ndevs, i;
    200   bool devices_active = false;
    201 
    202   /* Get the base device for this device type.  */
    203   base_dev = resolve_device (d);
    204 
    205   if (!base_dev)
    206     gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
    207 
    208   gomp_mutex_lock (&goacc_thread_lock);
    209 
    210   /* Free target-specific TLS data and close all devices.  */
    211   for (walk = goacc_threads; walk != NULL; walk = walk->next)
    212     {
    213       if (walk->target_tls)
    214 	base_dev->openacc.destroy_thread_data_func (walk->target_tls);
    215 
    216       walk->target_tls = NULL;
    217 
    218       /* This would mean the user is shutting down OpenACC in the middle of an
    219          "acc data" pragma.  Likely not intentional.  */
    220       if (walk->mapped_data)
    221 	gomp_fatal ("shutdown in 'acc data' region");
    222 
    223       /* Similarly, if this happens then user code has done something weird.  */
    224       if (walk->saved_bound_dev)
    225         gomp_fatal ("shutdown during host fallback");
    226 
    227       if (walk->dev)
    228 	{
    229 	  gomp_mutex_lock (&walk->dev->lock);
    230 	  gomp_free_memmap (&walk->dev->mem_map);
    231 	  gomp_mutex_unlock (&walk->dev->lock);
    232 
    233 	  walk->dev = NULL;
    234 	  walk->base_dev = NULL;
    235 	}
    236     }
    237 
    238   gomp_mutex_unlock (&goacc_thread_lock);
    239 
    240   ndevs = base_dev->get_num_devices_func ();
    241 
    242   /* Close all the devices of this type that have been opened.  */
    243   for (i = 0; i < ndevs; i++)
    244     {
    245       struct gomp_device_descr *acc_dev = &base_dev[i];
    246       if (acc_dev->is_initialized)
    247         {
    248 	  devices_active = true;
    249 	  gomp_fini_device (acc_dev);
    250 	}
    251     }
    252 
    253   if (!devices_active)
    254     gomp_fatal ("no device initialized");
    255 }
    256 
    257 static struct goacc_thread *
    258 goacc_new_thread (void)
    259 {
    260   struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread));
    261 
    262 #if defined HAVE_TLS || defined USE_EMUTLS
    263   goacc_tls_data = thr;
    264 #else
    265   pthread_setspecific (goacc_tls_key, thr);
    266 #endif
    267 
    268   pthread_setspecific (goacc_cleanup_key, thr);
    269 
    270   gomp_mutex_lock (&goacc_thread_lock);
    271   thr->next = goacc_threads;
    272   goacc_threads = thr;
    273   gomp_mutex_unlock (&goacc_thread_lock);
    274 
    275   return thr;
    276 }
    277 
    278 static void
    279 goacc_destroy_thread (void *data)
    280 {
    281   struct goacc_thread *thr = data, *walk, *prev;
    282 
    283   gomp_mutex_lock (&goacc_thread_lock);
    284 
    285   if (thr)
    286     {
    287       struct gomp_device_descr *acc_dev = thr->dev;
    288 
    289       if (acc_dev && thr->target_tls)
    290 	{
    291 	  acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
    292 	  thr->target_tls = NULL;
    293 	}
    294 
    295       assert (!thr->mapped_data);
    296 
    297       /* Remove from thread list.  */
    298       for (prev = NULL, walk = goacc_threads; walk;
    299 	   prev = walk, walk = walk->next)
    300 	if (walk == thr)
    301 	  {
    302 	    if (prev == NULL)
    303 	      goacc_threads = walk->next;
    304 	    else
    305 	      prev->next = walk->next;
    306 
    307 	    free (thr);
    308 
    309 	    break;
    310 	  }
    311 
    312       assert (walk);
    313     }
    314 
    315   gomp_mutex_unlock (&goacc_thread_lock);
    316 }
    317 
    318 /* Use the ORD'th device instance for the current host thread (or -1 for the
    319    current global default).  The device (and the runtime) must be initialised
    320    before calling this function.  */
    321 
    322 void
    323 goacc_attach_host_thread_to_device (int ord)
    324 {
    325   struct goacc_thread *thr = goacc_thread ();
    326   struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
    327   int num_devices;
    328 
    329   if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
    330     return;
    331 
    332   if (ord < 0)
    333     ord = goacc_device_num;
    334 
    335   /* Decide which type of device to use.  If the current thread has a device
    336      type already (e.g. set by acc_set_device_type), use that, else use the
    337      global default.  */
    338   if (thr && thr->base_dev)
    339     base_dev = thr->base_dev;
    340   else
    341     {
    342       assert (cached_base_dev);
    343       base_dev = cached_base_dev;
    344     }
    345 
    346   num_devices = base_dev->get_num_devices_func ();
    347   if (num_devices <= 0 || ord >= num_devices)
    348     gomp_fatal ("device %u out of range", ord);
    349 
    350   if (!thr)
    351     thr = goacc_new_thread ();
    352 
    353   thr->base_dev = base_dev;
    354   thr->dev = acc_dev = &base_dev[ord];
    355   thr->saved_bound_dev = NULL;
    356   thr->mapped_data = NULL;
    357 
    358   thr->target_tls
    359     = acc_dev->openacc.create_thread_data_func (ord);
    360 
    361   acc_dev->openacc.async_set_async_func (acc_async_sync);
    362 }
    363 
    364 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
    365    init/shutdown is per-process or per-thread.  We choose per-process.  */
    366 
    367 void
    368 acc_init (acc_device_t d)
    369 {
    370   if (!cached_base_dev)
    371     gomp_init_targets_once ();
    372 
    373   gomp_mutex_lock (&acc_device_lock);
    374 
    375   cached_base_dev = acc_init_1 (d);
    376 
    377   gomp_mutex_unlock (&acc_device_lock);
    378 
    379   goacc_attach_host_thread_to_device (-1);
    380 }
    381 
    382 ialias (acc_init)
    383 
    384 void
    385 acc_shutdown (acc_device_t d)
    386 {
    387   gomp_mutex_lock (&acc_device_lock);
    388 
    389   acc_shutdown_1 (d);
    390 
    391   gomp_mutex_unlock (&acc_device_lock);
    392 }
    393 
    394 ialias (acc_shutdown)
    395 
    396 int
    397 acc_get_num_devices (acc_device_t d)
    398 {
    399   int n = 0;
    400   struct gomp_device_descr *acc_dev;
    401 
    402   if (d == acc_device_none)
    403     return 0;
    404 
    405   gomp_init_targets_once ();
    406 
    407   acc_dev = resolve_device (d);
    408   if (!acc_dev)
    409     return 0;
    410 
    411   n = acc_dev->get_num_devices_func ();
    412   if (n < 0)
    413     n = 0;
    414 
    415   return n;
    416 }
    417 
    418 ialias (acc_get_num_devices)
    419 
    420 /* Set the device type for the current thread only (using the current global
    421    default device number), initialising that device if necessary.  Also set the
    422    default device type for new threads to D.  */
    423 
    424 void
    425 acc_set_device_type (acc_device_t d)
    426 {
    427   struct gomp_device_descr *base_dev, *acc_dev;
    428   struct goacc_thread *thr = goacc_thread ();
    429 
    430   gomp_mutex_lock (&acc_device_lock);
    431 
    432   if (!cached_base_dev)
    433     gomp_init_targets_once ();
    434 
    435   cached_base_dev = base_dev = resolve_device (d);
    436   acc_dev = &base_dev[goacc_device_num];
    437 
    438   if (!acc_dev->is_initialized)
    439     gomp_init_device (acc_dev);
    440 
    441   gomp_mutex_unlock (&acc_device_lock);
    442 
    443   /* We're changing device type: invalidate the current thread's dev and
    444      base_dev pointers.  */
    445   if (thr && thr->base_dev != base_dev)
    446     {
    447       thr->base_dev = thr->dev = NULL;
    448       if (thr->mapped_data)
    449         gomp_fatal ("acc_set_device_type in 'acc data' region");
    450     }
    451 
    452   goacc_attach_host_thread_to_device (-1);
    453 }
    454 
    455 ialias (acc_set_device_type)
    456 
    457 acc_device_t
    458 acc_get_device_type (void)
    459 {
    460   acc_device_t res = acc_device_none;
    461   struct gomp_device_descr *dev;
    462   struct goacc_thread *thr = goacc_thread ();
    463 
    464   if (thr && thr->base_dev)
    465     res = acc_device_type (thr->base_dev->type);
    466   else
    467     {
    468       gomp_init_targets_once ();
    469 
    470       dev = resolve_device (acc_device_default);
    471       res = acc_device_type (dev->type);
    472     }
    473 
    474   assert (res != acc_device_default
    475 	  && res != acc_device_not_host);
    476 
    477   return res;
    478 }
    479 
    480 ialias (acc_get_device_type)
    481 
    482 int
    483 acc_get_device_num (acc_device_t d)
    484 {
    485   const struct gomp_device_descr *dev;
    486   struct goacc_thread *thr = goacc_thread ();
    487 
    488   if (d >= _ACC_device_hwm)
    489     gomp_fatal ("device %u out of range", (unsigned)d);
    490 
    491   if (!cached_base_dev)
    492     gomp_init_targets_once ();
    493 
    494   dev = resolve_device (d);
    495   if (!dev)
    496     gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
    497 
    498   if (thr && thr->base_dev == dev && thr->dev)
    499     return thr->dev->target_id;
    500 
    501   return goacc_device_num;
    502 }
    503 
    504 ialias (acc_get_device_num)
    505 
    506 void
    507 acc_set_device_num (int ord, acc_device_t d)
    508 {
    509   struct gomp_device_descr *base_dev, *acc_dev;
    510   int num_devices;
    511 
    512   if (!cached_base_dev)
    513     gomp_init_targets_once ();
    514 
    515   if (ord < 0)
    516     ord = goacc_device_num;
    517 
    518   if ((int) d == 0)
    519     /* Set whatever device is being used by the current host thread to use
    520        device instance ORD.  It's unclear if this is supposed to affect other
    521        host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num).  */
    522     goacc_attach_host_thread_to_device (ord);
    523   else
    524     {
    525       gomp_mutex_lock (&acc_device_lock);
    526 
    527       cached_base_dev = base_dev = resolve_device (d);
    528 
    529       num_devices = base_dev->get_num_devices_func ();
    530 
    531       if (ord >= num_devices)
    532         gomp_fatal ("device %u out of range", ord);
    533 
    534       acc_dev = &base_dev[ord];
    535 
    536       if (!acc_dev->is_initialized)
    537         gomp_init_device (acc_dev);
    538 
    539       gomp_mutex_unlock (&acc_device_lock);
    540 
    541       goacc_attach_host_thread_to_device (ord);
    542     }
    543 
    544   goacc_device_num = ord;
    545 }
    546 
    547 ialias (acc_set_device_num)
    548 
    549 int
    550 acc_on_device (acc_device_t dev)
    551 {
    552   struct goacc_thread *thr = goacc_thread ();
    553 
    554   /* We only want to appear to be the "host_nonshm" plugin from "offloaded"
    555      code -- i.e. within a parallel region.  Test a flag set by the
    556      openacc_parallel hook of the host_nonshm plugin to determine that.  */
    557   if (acc_get_device_type () == acc_device_host_nonshm
    558       && thr && thr->target_tls
    559       && ((struct nonshm_thread *)thr->target_tls)->nonshm_exec)
    560     return dev == acc_device_host_nonshm || dev == acc_device_not_host;
    561 
    562   /* For OpenACC, libgomp is only built for the host, so this is sufficient.  */
    563   return dev == acc_device_host || dev == acc_device_none;
    564 }
    565 
    566 ialias (acc_on_device)
    567 
    568 attribute_hidden void
    569 goacc_runtime_initialize (void)
    570 {
    571   gomp_mutex_init (&acc_device_lock);
    572 
    573 #if !(defined HAVE_TLS || defined USE_EMUTLS)
    574   pthread_key_create (&goacc_tls_key, NULL);
    575 #endif
    576 
    577   pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
    578 
    579   cached_base_dev = NULL;
    580 
    581   goacc_threads = NULL;
    582   gomp_mutex_init (&goacc_thread_lock);
    583 
    584   /* Initialize and register the 'host' device type.  */
    585   goacc_host_init ();
    586 }
    587 
    588 /* Compiler helper functions */
    589 
    590 attribute_hidden void
    591 goacc_save_and_set_bind (acc_device_t d)
    592 {
    593   struct goacc_thread *thr = goacc_thread ();
    594 
    595   assert (!thr->saved_bound_dev);
    596 
    597   thr->saved_bound_dev = thr->dev;
    598   thr->dev = dispatchers[d];
    599 }
    600 
    601 attribute_hidden void
    602 goacc_restore_bind (void)
    603 {
    604   struct goacc_thread *thr = goacc_thread ();
    605 
    606   thr->dev = thr->saved_bound_dev;
    607   thr->saved_bound_dev = NULL;
    608 }
    609 
    610 /* This is called from any OpenACC support function that may need to implicitly
    611    initialize the libgomp runtime, either globally or from a new host thread.
    612    On exit "goacc_thread" will return a valid & populated thread block.  */
    613 
    614 attribute_hidden void
    615 goacc_lazy_initialize (void)
    616 {
    617   struct goacc_thread *thr = goacc_thread ();
    618 
    619   if (thr && thr->dev)
    620     return;
    621 
    622   if (!cached_base_dev)
    623     acc_init (acc_device_default);
    624   else
    625     goacc_attach_host_thread_to_device (-1);
    626 }
    627