Home | History | Annotate | Line # | Download | only in libgomp
target.c revision 1.1.1.2.2.1
      1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
      2    Contributed by Jakub Jelinek <jakub (at) redhat.com>.
      3 
      4    This file is part of the GNU Offloading and Multi Processing Library
      5    (libgomp).
      6 
      7    Libgomp is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3, or (at your option)
     10    any later version.
     11 
     12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15    more details.
     16 
     17    Under Section 7 of GPL version 3, you are granted additional
     18    permissions described in the GCC Runtime Library Exception, version
     19    3.1, as published by the Free Software Foundation.
     20 
     21    You should have received a copy of the GNU General Public License and
     22    a copy of the GCC Runtime Library Exception along with this program;
     23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24    <http://www.gnu.org/licenses/>.  */
     25 
     26 /* This file contains the support of offloading.  */
     27 
     28 #include "config.h"
     29 #include "libgomp.h"
     30 #include "oacc-plugin.h"
     31 #include "oacc-int.h"
     32 #include "gomp-constants.h"
     33 #include <limits.h>
     34 #include <stdbool.h>
     35 #include <stdlib.h>
     36 #ifdef HAVE_INTTYPES_H
     37 # include <inttypes.h>  /* For PRIu64.  */
     38 #endif
     39 #include <string.h>
     40 #include <assert.h>
     41 #include <errno.h>
     42 
     43 #ifdef PLUGIN_SUPPORT
     44 #include <dlfcn.h>
     45 #include "plugin-suffix.h"
     46 #endif
     47 
     48 static void gomp_target_init (void);
     49 
     50 /* The whole initialization code for offloading plugins is only run one.  */
     51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
     52 
     53 /* Mutex for offload image registration.  */
     54 static gomp_mutex_t register_lock;
     55 
     56 /* This structure describes an offload image.
     57    It contains type of the target device, pointer to host table descriptor, and
     58    pointer to target data.  */
     59 struct offload_image_descr {
     60   unsigned version;
     61   enum offload_target_type type;
     62   const void *host_table;
     63   const void *target_data;
     64 };
     65 
     66 /* Array of descriptors of offload images.  */
     67 static struct offload_image_descr *offload_images;
     68 
     69 /* Total number of offload images.  */
     70 static int num_offload_images;
     71 
     72 /* Array of descriptors for all available devices.  */
     73 static struct gomp_device_descr *devices;
     74 
     75 /* Total number of available devices.  */
     76 static int num_devices;
     77 
     78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
     79 static int num_devices_openmp;
     80 
     81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
     82 
     83 static void *
     84 gomp_realloc_unlock (void *old, size_t size)
     85 {
     86   void *ret = realloc (old, size);
     87   if (ret == NULL)
     88     {
     89       gomp_mutex_unlock (&register_lock);
     90       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
     91     }
     92   return ret;
     93 }
     94 
     95 attribute_hidden void
     96 gomp_init_targets_once (void)
     97 {
     98   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
     99 }
    100 
    101 attribute_hidden int
    102 gomp_get_num_devices (void)
    103 {
    104   gomp_init_targets_once ();
    105   return num_devices_openmp;
    106 }
    107 
    108 static struct gomp_device_descr *
    109 resolve_device (int device_id)
    110 {
    111   if (device_id == GOMP_DEVICE_ICV)
    112     {
    113       struct gomp_task_icv *icv = gomp_icv (false);
    114       device_id = icv->default_device_var;
    115     }
    116 
    117   if (device_id < 0 || device_id >= gomp_get_num_devices ())
    118     return NULL;
    119 
    120   gomp_mutex_lock (&devices[device_id].lock);
    121   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
    122     gomp_init_device (&devices[device_id]);
    123   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
    124     {
    125       gomp_mutex_unlock (&devices[device_id].lock);
    126       return NULL;
    127     }
    128   gomp_mutex_unlock (&devices[device_id].lock);
    129 
    130   return &devices[device_id];
    131 }
    132 
    133 
    134 static inline splay_tree_key
    135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
    136 {
    137   if (key->host_start != key->host_end)
    138     return splay_tree_lookup (mem_map, key);
    139 
    140   key->host_end++;
    141   splay_tree_key n = splay_tree_lookup (mem_map, key);
    142   key->host_end--;
    143   if (n)
    144     return n;
    145   key->host_start--;
    146   n = splay_tree_lookup (mem_map, key);
    147   key->host_start++;
    148   if (n)
    149     return n;
    150   return splay_tree_lookup (mem_map, key);
    151 }
    152 
    153 static inline splay_tree_key
    154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
    155 {
    156   if (key->host_start != key->host_end)
    157     return splay_tree_lookup (mem_map, key);
    158 
    159   key->host_end++;
    160   splay_tree_key n = splay_tree_lookup (mem_map, key);
    161   key->host_end--;
    162   return n;
    163 }
    164 
    165 static inline void
    166 gomp_device_copy (struct gomp_device_descr *devicep,
    167 		  bool (*copy_func) (int, void *, const void *, size_t),
    168 		  const char *dst, void *dstaddr,
    169 		  const char *src, const void *srcaddr,
    170 		  size_t size)
    171 {
    172   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
    173     {
    174       gomp_mutex_unlock (&devicep->lock);
    175       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
    176 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
    177     }
    178 }
    179 
    180 static void
    181 gomp_copy_host2dev (struct gomp_device_descr *devicep,
    182 		    void *d, const void *h, size_t sz)
    183 {
    184   gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
    185 }
    186 
    187 static void
    188 gomp_copy_dev2host (struct gomp_device_descr *devicep,
    189 		    void *h, const void *d, size_t sz)
    190 {
    191   gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
    192 }
    193 
    194 static void
    195 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
    196 {
    197   if (!devicep->free_func (devicep->target_id, devptr))
    198     {
    199       gomp_mutex_unlock (&devicep->lock);
    200       gomp_fatal ("error in freeing device memory block at %p", devptr);
    201     }
    202 }
    203 
    204 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
    205    gomp_map_0len_lookup found oldn for newn.
    206    Helper function of gomp_map_vars.  */
    207 
    208 static inline void
    209 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
    210 			splay_tree_key newn, struct target_var_desc *tgt_var,
    211 			unsigned char kind)
    212 {
    213   tgt_var->key = oldn;
    214   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
    215   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
    216   tgt_var->offset = newn->host_start - oldn->host_start;
    217   tgt_var->length = newn->host_end - newn->host_start;
    218 
    219   if ((kind & GOMP_MAP_FLAG_FORCE)
    220       || oldn->host_start > newn->host_start
    221       || oldn->host_end < newn->host_end)
    222     {
    223       gomp_mutex_unlock (&devicep->lock);
    224       gomp_fatal ("Trying to map into device [%p..%p) object when "
    225 		  "[%p..%p) is already mapped",
    226 		  (void *) newn->host_start, (void *) newn->host_end,
    227 		  (void *) oldn->host_start, (void *) oldn->host_end);
    228     }
    229 
    230   if (GOMP_MAP_ALWAYS_TO_P (kind))
    231     gomp_copy_host2dev (devicep,
    232 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
    233 				  + newn->host_start - oldn->host_start),
    234 			(void *) newn->host_start,
    235 			newn->host_end - newn->host_start);
    236 
    237   if (oldn->refcount != REFCOUNT_INFINITY)
    238     oldn->refcount++;
    239 }
    240 
    241 static int
    242 get_kind (bool short_mapkind, void *kinds, int idx)
    243 {
    244   return short_mapkind ? ((unsigned short *) kinds)[idx]
    245 		       : ((unsigned char *) kinds)[idx];
    246 }
    247 
    248 static void
    249 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
    250 		  uintptr_t target_offset, uintptr_t bias)
    251 {
    252   struct gomp_device_descr *devicep = tgt->device_descr;
    253   struct splay_tree_s *mem_map = &devicep->mem_map;
    254   struct splay_tree_key_s cur_node;
    255 
    256   cur_node.host_start = host_ptr;
    257   if (cur_node.host_start == (uintptr_t) NULL)
    258     {
    259       cur_node.tgt_offset = (uintptr_t) NULL;
    260       /* FIXME: see comment about coalescing host/dev transfers below.  */
    261       gomp_copy_host2dev (devicep,
    262 			  (void *) (tgt->tgt_start + target_offset),
    263 			  (void *) &cur_node.tgt_offset,
    264 			  sizeof (void *));
    265       return;
    266     }
    267   /* Add bias to the pointer value.  */
    268   cur_node.host_start += bias;
    269   cur_node.host_end = cur_node.host_start;
    270   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
    271   if (n == NULL)
    272     {
    273       gomp_mutex_unlock (&devicep->lock);
    274       gomp_fatal ("Pointer target of array section wasn't mapped");
    275     }
    276   cur_node.host_start -= n->host_start;
    277   cur_node.tgt_offset
    278     = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
    279   /* At this point tgt_offset is target address of the
    280      array section.  Now subtract bias to get what we want
    281      to initialize the pointer with.  */
    282   cur_node.tgt_offset -= bias;
    283   /* FIXME: see comment about coalescing host/dev transfers below.  */
    284   gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
    285 		      (void *) &cur_node.tgt_offset, sizeof (void *));
    286 }
    287 
    288 static void
    289 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
    290 			  size_t first, size_t i, void **hostaddrs,
    291 			  size_t *sizes, void *kinds)
    292 {
    293   struct gomp_device_descr *devicep = tgt->device_descr;
    294   struct splay_tree_s *mem_map = &devicep->mem_map;
    295   struct splay_tree_key_s cur_node;
    296   int kind;
    297   const bool short_mapkind = true;
    298   const int typemask = short_mapkind ? 0xff : 0x7;
    299 
    300   cur_node.host_start = (uintptr_t) hostaddrs[i];
    301   cur_node.host_end = cur_node.host_start + sizes[i];
    302   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
    303   kind = get_kind (short_mapkind, kinds, i);
    304   if (n2
    305       && n2->tgt == n->tgt
    306       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
    307     {
    308       gomp_map_vars_existing (devicep, n2, &cur_node,
    309 			      &tgt->list[i], kind & typemask);
    310       return;
    311     }
    312   if (sizes[i] == 0)
    313     {
    314       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
    315 	{
    316 	  cur_node.host_start--;
    317 	  n2 = splay_tree_lookup (mem_map, &cur_node);
    318 	  cur_node.host_start++;
    319 	  if (n2
    320 	      && n2->tgt == n->tgt
    321 	      && n2->host_start - n->host_start
    322 		 == n2->tgt_offset - n->tgt_offset)
    323 	    {
    324 	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
    325 				      kind & typemask);
    326 	      return;
    327 	    }
    328 	}
    329       cur_node.host_end++;
    330       n2 = splay_tree_lookup (mem_map, &cur_node);
    331       cur_node.host_end--;
    332       if (n2
    333 	  && n2->tgt == n->tgt
    334 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
    335 	{
    336 	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
    337 				  kind & typemask);
    338 	  return;
    339 	}
    340     }
    341   gomp_mutex_unlock (&devicep->lock);
    342   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
    343 	      "other mapped elements from the same structure weren't mapped "
    344 	      "together with it", (void *) cur_node.host_start,
    345 	      (void *) cur_node.host_end);
    346 }
    347 
    348 static inline uintptr_t
    349 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
    350 {
    351   if (tgt->list[i].key != NULL)
    352     return tgt->list[i].key->tgt->tgt_start
    353 	   + tgt->list[i].key->tgt_offset
    354 	   + tgt->list[i].offset;
    355   if (tgt->list[i].offset == ~(uintptr_t) 0)
    356     return (uintptr_t) hostaddrs[i];
    357   if (tgt->list[i].offset == ~(uintptr_t) 1)
    358     return 0;
    359   if (tgt->list[i].offset == ~(uintptr_t) 2)
    360     return tgt->list[i + 1].key->tgt->tgt_start
    361 	   + tgt->list[i + 1].key->tgt_offset
    362 	   + tgt->list[i + 1].offset
    363 	   + (uintptr_t) hostaddrs[i]
    364 	   - (uintptr_t) hostaddrs[i + 1];
    365   return tgt->tgt_start + tgt->list[i].offset;
    366 }
    367 
    368 attribute_hidden struct target_mem_desc *
    369 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
    370 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
    371 	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
    372 {
    373   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
    374   bool has_firstprivate = false;
    375   const int rshift = short_mapkind ? 8 : 3;
    376   const int typemask = short_mapkind ? 0xff : 0x7;
    377   struct splay_tree_s *mem_map = &devicep->mem_map;
    378   struct splay_tree_key_s cur_node;
    379   struct target_mem_desc *tgt
    380     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
    381   tgt->list_count = mapnum;
    382   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
    383   tgt->device_descr = devicep;
    384 
    385   if (mapnum == 0)
    386     {
    387       tgt->tgt_start = 0;
    388       tgt->tgt_end = 0;
    389       return tgt;
    390     }
    391 
    392   tgt_align = sizeof (void *);
    393   tgt_size = 0;
    394   if (pragma_kind == GOMP_MAP_VARS_TARGET)
    395     {
    396       size_t align = 4 * sizeof (void *);
    397       tgt_align = align;
    398       tgt_size = mapnum * sizeof (void *);
    399     }
    400 
    401   gomp_mutex_lock (&devicep->lock);
    402   if (devicep->state == GOMP_DEVICE_FINALIZED)
    403     {
    404       gomp_mutex_unlock (&devicep->lock);
    405       free (tgt);
    406       return NULL;
    407     }
    408 
    409   for (i = 0; i < mapnum; i++)
    410     {
    411       int kind = get_kind (short_mapkind, kinds, i);
    412       if (hostaddrs[i] == NULL
    413 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
    414 	{
    415 	  tgt->list[i].key = NULL;
    416 	  tgt->list[i].offset = ~(uintptr_t) 0;
    417 	  continue;
    418 	}
    419       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
    420 	{
    421 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
    422 	  cur_node.host_end = cur_node.host_start;
    423 	  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
    424 	  if (n == NULL)
    425 	    {
    426 	      gomp_mutex_unlock (&devicep->lock);
    427 	      gomp_fatal ("use_device_ptr pointer wasn't mapped");
    428 	    }
    429 	  cur_node.host_start -= n->host_start;
    430 	  hostaddrs[i]
    431 	    = (void *) (n->tgt->tgt_start + n->tgt_offset
    432 			+ cur_node.host_start);
    433 	  tgt->list[i].key = NULL;
    434 	  tgt->list[i].offset = ~(uintptr_t) 0;
    435 	  continue;
    436 	}
    437       else if ((kind & typemask) == GOMP_MAP_STRUCT)
    438 	{
    439 	  size_t first = i + 1;
    440 	  size_t last = i + sizes[i];
    441 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
    442 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
    443 			      + sizes[last];
    444 	  tgt->list[i].key = NULL;
    445 	  tgt->list[i].offset = ~(uintptr_t) 2;
    446 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
    447 	  if (n == NULL)
    448 	    {
    449 	      size_t align = (size_t) 1 << (kind >> rshift);
    450 	      if (tgt_align < align)
    451 		tgt_align = align;
    452 	      tgt_size -= (uintptr_t) hostaddrs[first]
    453 			  - (uintptr_t) hostaddrs[i];
    454 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
    455 	      tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
    456 	      not_found_cnt += last - i;
    457 	      for (i = first; i <= last; i++)
    458 		tgt->list[i].key = NULL;
    459 	      i--;
    460 	      continue;
    461 	    }
    462 	  for (i = first; i <= last; i++)
    463 	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
    464 				      sizes, kinds);
    465 	  i--;
    466 	  continue;
    467 	}
    468       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
    469 	{
    470 	  tgt->list[i].key = NULL;
    471 	  tgt->list[i].offset = ~(uintptr_t) 1;
    472 	  has_firstprivate = true;
    473 	  continue;
    474 	}
    475       cur_node.host_start = (uintptr_t) hostaddrs[i];
    476       if (!GOMP_MAP_POINTER_P (kind & typemask))
    477 	cur_node.host_end = cur_node.host_start + sizes[i];
    478       else
    479 	cur_node.host_end = cur_node.host_start + sizeof (void *);
    480       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
    481 	{
    482 	  tgt->list[i].key = NULL;
    483 
    484 	  size_t align = (size_t) 1 << (kind >> rshift);
    485 	  if (tgt_align < align)
    486 	    tgt_align = align;
    487 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
    488 	  tgt_size += cur_node.host_end - cur_node.host_start;
    489 	  has_firstprivate = true;
    490 	  continue;
    491 	}
    492       splay_tree_key n;
    493       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
    494 	{
    495 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
    496 	  if (!n)
    497 	    {
    498 	      tgt->list[i].key = NULL;
    499 	      tgt->list[i].offset = ~(uintptr_t) 1;
    500 	      continue;
    501 	    }
    502 	}
    503       else
    504 	n = splay_tree_lookup (mem_map, &cur_node);
    505       if (n && n->refcount != REFCOUNT_LINK)
    506 	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
    507 				kind & typemask);
    508       else
    509 	{
    510 	  tgt->list[i].key = NULL;
    511 
    512 	  size_t align = (size_t) 1 << (kind >> rshift);
    513 	  not_found_cnt++;
    514 	  if (tgt_align < align)
    515 	    tgt_align = align;
    516 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
    517 	  tgt_size += cur_node.host_end - cur_node.host_start;
    518 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
    519 	    {
    520 	      size_t j;
    521 	      for (j = i + 1; j < mapnum; j++)
    522 		if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
    523 					 & typemask))
    524 		  break;
    525 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
    526 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
    527 			     > cur_node.host_end))
    528 		  break;
    529 		else
    530 		  {
    531 		    tgt->list[j].key = NULL;
    532 		    i++;
    533 		  }
    534 	    }
    535 	}
    536     }
    537 
    538   if (devaddrs)
    539     {
    540       if (mapnum != 1)
    541 	{
    542 	  gomp_mutex_unlock (&devicep->lock);
    543 	  gomp_fatal ("unexpected aggregation");
    544 	}
    545       tgt->to_free = devaddrs[0];
    546       tgt->tgt_start = (uintptr_t) tgt->to_free;
    547       tgt->tgt_end = tgt->tgt_start + sizes[0];
    548     }
    549   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
    550     {
    551       /* Allocate tgt_align aligned tgt_size block of memory.  */
    552       /* FIXME: Perhaps change interface to allocate properly aligned
    553 	 memory.  */
    554       tgt->to_free = devicep->alloc_func (devicep->target_id,
    555 					  tgt_size + tgt_align - 1);
    556       if (!tgt->to_free)
    557 	{
    558 	  gomp_mutex_unlock (&devicep->lock);
    559 	  gomp_fatal ("device memory allocation fail");
    560 	}
    561 
    562       tgt->tgt_start = (uintptr_t) tgt->to_free;
    563       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
    564       tgt->tgt_end = tgt->tgt_start + tgt_size;
    565     }
    566   else
    567     {
    568       tgt->to_free = NULL;
    569       tgt->tgt_start = 0;
    570       tgt->tgt_end = 0;
    571     }
    572 
    573   tgt_size = 0;
    574   if (pragma_kind == GOMP_MAP_VARS_TARGET)
    575     tgt_size = mapnum * sizeof (void *);
    576 
    577   tgt->array = NULL;
    578   if (not_found_cnt || has_firstprivate)
    579     {
    580       if (not_found_cnt)
    581 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
    582       splay_tree_node array = tgt->array;
    583       size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
    584       uintptr_t field_tgt_base = 0;
    585 
    586       for (i = 0; i < mapnum; i++)
    587 	if (tgt->list[i].key == NULL)
    588 	  {
    589 	    int kind = get_kind (short_mapkind, kinds, i);
    590 	    if (hostaddrs[i] == NULL)
    591 	      continue;
    592 	    switch (kind & typemask)
    593 	      {
    594 		size_t align, len, first, last;
    595 		splay_tree_key n;
    596 	      case GOMP_MAP_FIRSTPRIVATE:
    597 		align = (size_t) 1 << (kind >> rshift);
    598 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
    599 		tgt->list[i].offset = tgt_size;
    600 		len = sizes[i];
    601 		gomp_copy_host2dev (devicep,
    602 				    (void *) (tgt->tgt_start + tgt_size),
    603 				    (void *) hostaddrs[i], len);
    604 		tgt_size += len;
    605 		continue;
    606 	      case GOMP_MAP_FIRSTPRIVATE_INT:
    607 	      case GOMP_MAP_USE_DEVICE_PTR:
    608 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
    609 		continue;
    610 	      case GOMP_MAP_STRUCT:
    611 		first = i + 1;
    612 		last = i + sizes[i];
    613 		cur_node.host_start = (uintptr_t) hostaddrs[i];
    614 		cur_node.host_end = (uintptr_t) hostaddrs[last]
    615 				    + sizes[last];
    616 		if (tgt->list[first].key != NULL)
    617 		  continue;
    618 		n = splay_tree_lookup (mem_map, &cur_node);
    619 		if (n == NULL)
    620 		  {
    621 		    size_t align = (size_t) 1 << (kind >> rshift);
    622 		    tgt_size -= (uintptr_t) hostaddrs[first]
    623 				- (uintptr_t) hostaddrs[i];
    624 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
    625 		    tgt_size += (uintptr_t) hostaddrs[first]
    626 				- (uintptr_t) hostaddrs[i];
    627 		    field_tgt_base = (uintptr_t) hostaddrs[first];
    628 		    field_tgt_offset = tgt_size;
    629 		    field_tgt_clear = last;
    630 		    tgt_size += cur_node.host_end
    631 				- (uintptr_t) hostaddrs[first];
    632 		    continue;
    633 		  }
    634 		for (i = first; i <= last; i++)
    635 		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
    636 					    sizes, kinds);
    637 		i--;
    638 		continue;
    639 	      case GOMP_MAP_ALWAYS_POINTER:
    640 		cur_node.host_start = (uintptr_t) hostaddrs[i];
    641 		cur_node.host_end = cur_node.host_start + sizeof (void *);
    642 		n = splay_tree_lookup (mem_map, &cur_node);
    643 		if (n == NULL
    644 		    || n->host_start > cur_node.host_start
    645 		    || n->host_end < cur_node.host_end)
    646 		  {
    647 		    gomp_mutex_unlock (&devicep->lock);
    648 		    gomp_fatal ("always pointer not mapped");
    649 		  }
    650 		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
    651 		    != GOMP_MAP_ALWAYS_POINTER)
    652 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
    653 		if (cur_node.tgt_offset)
    654 		  cur_node.tgt_offset -= sizes[i];
    655 		gomp_copy_host2dev (devicep,
    656 				    (void *) (n->tgt->tgt_start
    657 					      + n->tgt_offset
    658 					      + cur_node.host_start
    659 					      - n->host_start),
    660 				    (void *) &cur_node.tgt_offset,
    661 				    sizeof (void *));
    662 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
    663 				      + cur_node.host_start - n->host_start;
    664 		continue;
    665 	      default:
    666 		break;
    667 	      }
    668 	    splay_tree_key k = &array->key;
    669 	    k->host_start = (uintptr_t) hostaddrs[i];
    670 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
    671 	      k->host_end = k->host_start + sizes[i];
    672 	    else
    673 	      k->host_end = k->host_start + sizeof (void *);
    674 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
    675 	    if (n && n->refcount != REFCOUNT_LINK)
    676 	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
    677 				      kind & typemask);
    678 	    else
    679 	      {
    680 		k->link_key = NULL;
    681 		if (n && n->refcount == REFCOUNT_LINK)
    682 		  {
    683 		    /* Replace target address of the pointer with target address
    684 		       of mapped object in the splay tree.  */
    685 		    splay_tree_remove (mem_map, n);
    686 		    k->link_key = n;
    687 		  }
    688 		size_t align = (size_t) 1 << (kind >> rshift);
    689 		tgt->list[i].key = k;
    690 		k->tgt = tgt;
    691 		if (field_tgt_clear != ~(size_t) 0)
    692 		  {
    693 		    k->tgt_offset = k->host_start - field_tgt_base
    694 				    + field_tgt_offset;
    695 		    if (i == field_tgt_clear)
    696 		      field_tgt_clear = ~(size_t) 0;
    697 		  }
    698 		else
    699 		  {
    700 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
    701 		    k->tgt_offset = tgt_size;
    702 		    tgt_size += k->host_end - k->host_start;
    703 		  }
    704 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
    705 		tgt->list[i].always_copy_from
    706 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
    707 		tgt->list[i].offset = 0;
    708 		tgt->list[i].length = k->host_end - k->host_start;
    709 		k->refcount = 1;
    710 		tgt->refcount++;
    711 		array->left = NULL;
    712 		array->right = NULL;
    713 		splay_tree_insert (mem_map, array);
    714 		switch (kind & typemask)
    715 		  {
    716 		  case GOMP_MAP_ALLOC:
    717 		  case GOMP_MAP_FROM:
    718 		  case GOMP_MAP_FORCE_ALLOC:
    719 		  case GOMP_MAP_FORCE_FROM:
    720 		  case GOMP_MAP_ALWAYS_FROM:
    721 		    break;
    722 		  case GOMP_MAP_TO:
    723 		  case GOMP_MAP_TOFROM:
    724 		  case GOMP_MAP_FORCE_TO:
    725 		  case GOMP_MAP_FORCE_TOFROM:
    726 		  case GOMP_MAP_ALWAYS_TO:
    727 		  case GOMP_MAP_ALWAYS_TOFROM:
    728 		    /* FIXME: Perhaps add some smarts, like if copying
    729 		       several adjacent fields from host to target, use some
    730 		       host buffer to avoid sending each var individually.  */
    731 		    gomp_copy_host2dev (devicep,
    732 					(void *) (tgt->tgt_start
    733 						  + k->tgt_offset),
    734 					(void *) k->host_start,
    735 					k->host_end - k->host_start);
    736 		    break;
    737 		  case GOMP_MAP_POINTER:
    738 		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
    739 				      k->tgt_offset, sizes[i]);
    740 		    break;
    741 		  case GOMP_MAP_TO_PSET:
    742 		    /* FIXME: see above FIXME comment.  */
    743 		    gomp_copy_host2dev (devicep,
    744 					(void *) (tgt->tgt_start
    745 						  + k->tgt_offset),
    746 					(void *) k->host_start,
    747 					k->host_end - k->host_start);
    748 
    749 		    for (j = i + 1; j < mapnum; j++)
    750 		      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
    751 							 j)
    752 					       & typemask))
    753 			break;
    754 		      else if ((uintptr_t) hostaddrs[j] < k->host_start
    755 			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
    756 				   > k->host_end))
    757 			break;
    758 		      else
    759 			{
    760 			  tgt->list[j].key = k;
    761 			  tgt->list[j].copy_from = false;
    762 			  tgt->list[j].always_copy_from = false;
    763 			  if (k->refcount != REFCOUNT_INFINITY)
    764 			    k->refcount++;
    765 			  gomp_map_pointer (tgt,
    766 					    (uintptr_t) *(void **) hostaddrs[j],
    767 					    k->tgt_offset
    768 					    + ((uintptr_t) hostaddrs[j]
    769 					       - k->host_start),
    770 					    sizes[j]);
    771 			  i++;
    772 			}
    773 		    break;
    774 		  case GOMP_MAP_FORCE_PRESENT:
    775 		    {
    776 		      /* We already looked up the memory region above and it
    777 			 was missing.  */
    778 		      size_t size = k->host_end - k->host_start;
    779 		      gomp_mutex_unlock (&devicep->lock);
    780 #ifdef HAVE_INTTYPES_H
    781 		      gomp_fatal ("present clause: !acc_is_present (%p, "
    782 				  "%"PRIu64" (0x%"PRIx64"))",
    783 				  (void *) k->host_start,
    784 				  (uint64_t) size, (uint64_t) size);
    785 #else
    786 		      gomp_fatal ("present clause: !acc_is_present (%p, "
    787 				  "%lu (0x%lx))", (void *) k->host_start,
    788 				  (unsigned long) size, (unsigned long) size);
    789 #endif
    790 		    }
    791 		    break;
    792 		  case GOMP_MAP_FORCE_DEVICEPTR:
    793 		    assert (k->host_end - k->host_start == sizeof (void *));
    794 		    gomp_copy_host2dev (devicep,
    795 					(void *) (tgt->tgt_start
    796 						  + k->tgt_offset),
    797 					(void *) k->host_start,
    798 					sizeof (void *));
    799 		    break;
    800 		  default:
    801 		    gomp_mutex_unlock (&devicep->lock);
    802 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
    803 				kind);
    804 		  }
    805 
    806 		if (k->link_key)
    807 		  {
    808 		    /* Set link pointer on target to the device address of the
    809 		       mapped object.  */
    810 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
    811 		    devicep->host2dev_func (devicep->target_id,
    812 					    (void *) n->tgt_offset,
    813 					    &tgt_addr, sizeof (void *));
    814 		  }
    815 		array++;
    816 	      }
    817 	  }
    818     }
    819 
    820   if (pragma_kind == GOMP_MAP_VARS_TARGET)
    821     {
    822       for (i = 0; i < mapnum; i++)
    823 	{
    824 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
    825 	  /* FIXME: see above FIXME comment.  */
    826 	  gomp_copy_host2dev (devicep,
    827 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
    828 			      (void *) &cur_node.tgt_offset, sizeof (void *));
    829 	}
    830     }
    831 
    832   /* If the variable from "omp target enter data" map-list was already mapped,
    833      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
    834      gomp_exit_data.  */
    835   if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
    836     {
    837       free (tgt);
    838       tgt = NULL;
    839     }
    840 
    841   gomp_mutex_unlock (&devicep->lock);
    842   return tgt;
    843 }
    844 
    845 static void
    846 gomp_unmap_tgt (struct target_mem_desc *tgt)
    847 {
    848   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
    849   if (tgt->tgt_end)
    850     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
    851 
    852   free (tgt->array);
    853   free (tgt);
    854 }
    855 
    856 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
    857    variables back from device to host: if it is false, it is assumed that this
    858    has been done already.  */
    859 
    860 attribute_hidden void
    861 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
    862 {
    863   struct gomp_device_descr *devicep = tgt->device_descr;
    864 
    865   if (tgt->list_count == 0)
    866     {
    867       free (tgt);
    868       return;
    869     }
    870 
    871   gomp_mutex_lock (&devicep->lock);
    872   if (devicep->state == GOMP_DEVICE_FINALIZED)
    873     {
    874       gomp_mutex_unlock (&devicep->lock);
    875       free (tgt->array);
    876       free (tgt);
    877       return;
    878     }
    879 
    880   size_t i;
    881   for (i = 0; i < tgt->list_count; i++)
    882     {
    883       splay_tree_key k = tgt->list[i].key;
    884       if (k == NULL)
    885 	continue;
    886 
    887       bool do_unmap = false;
    888       if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
    889 	k->refcount--;
    890       else if (k->refcount == 1)
    891 	{
    892 	  k->refcount--;
    893 	  do_unmap = true;
    894 	}
    895 
    896       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
    897 	  || tgt->list[i].always_copy_from)
    898 	gomp_copy_dev2host (devicep,
    899 			    (void *) (k->host_start + tgt->list[i].offset),
    900 			    (void *) (k->tgt->tgt_start + k->tgt_offset
    901 				      + tgt->list[i].offset),
    902 			    tgt->list[i].length);
    903       if (do_unmap)
    904 	{
    905 	  splay_tree_remove (&devicep->mem_map, k);
    906 	  if (k->link_key)
    907 	    splay_tree_insert (&devicep->mem_map,
    908 			       (splay_tree_node) k->link_key);
    909 	  if (k->tgt->refcount > 1)
    910 	    k->tgt->refcount--;
    911 	  else
    912 	    gomp_unmap_tgt (k->tgt);
    913 	}
    914     }
    915 
    916   if (tgt->refcount > 1)
    917     tgt->refcount--;
    918   else
    919     gomp_unmap_tgt (tgt);
    920 
    921   gomp_mutex_unlock (&devicep->lock);
    922 }
    923 
    924 static void
    925 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
    926 	     size_t *sizes, void *kinds, bool short_mapkind)
    927 {
    928   size_t i;
    929   struct splay_tree_key_s cur_node;
    930   const int typemask = short_mapkind ? 0xff : 0x7;
    931 
    932   if (!devicep)
    933     return;
    934 
    935   if (mapnum == 0)
    936     return;
    937 
    938   gomp_mutex_lock (&devicep->lock);
    939   if (devicep->state == GOMP_DEVICE_FINALIZED)
    940     {
    941       gomp_mutex_unlock (&devicep->lock);
    942       return;
    943     }
    944 
    945   for (i = 0; i < mapnum; i++)
    946     if (sizes[i])
    947       {
    948 	cur_node.host_start = (uintptr_t) hostaddrs[i];
    949 	cur_node.host_end = cur_node.host_start + sizes[i];
    950 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
    951 	if (n)
    952 	  {
    953 	    int kind = get_kind (short_mapkind, kinds, i);
    954 	    if (n->host_start > cur_node.host_start
    955 		|| n->host_end < cur_node.host_end)
    956 	      {
    957 		gomp_mutex_unlock (&devicep->lock);
    958 		gomp_fatal ("Trying to update [%p..%p) object when "
    959 			    "only [%p..%p) is mapped",
    960 			    (void *) cur_node.host_start,
    961 			    (void *) cur_node.host_end,
    962 			    (void *) n->host_start,
    963 			    (void *) n->host_end);
    964 	      }
    965 
    966 
    967 	    void *hostaddr = (void *) cur_node.host_start;
    968 	    void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
    969 				      + cur_node.host_start - n->host_start);
    970 	    size_t size = cur_node.host_end - cur_node.host_start;
    971 
    972 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
    973 	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size);
    974 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
    975 	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
    976 	  }
    977       }
    978   gomp_mutex_unlock (&devicep->lock);
    979 }
    980 
    981 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
    982    And insert to splay tree the mapping between addresses from HOST_TABLE and
    983    from loaded target image.  We rely in the host and device compiler
    984    emitting variable and functions in the same order.  */
    985 
    986 static void
    987 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
    988 			   const void *host_table, const void *target_data,
    989 			   bool is_register_lock)
    990 {
    991   void **host_func_table = ((void ***) host_table)[0];
    992   void **host_funcs_end  = ((void ***) host_table)[1];
    993   void **host_var_table  = ((void ***) host_table)[2];
    994   void **host_vars_end   = ((void ***) host_table)[3];
    995 
    996   /* The func table contains only addresses, the var table contains addresses
    997      and corresponding sizes.  */
    998   int num_funcs = host_funcs_end - host_func_table;
    999   int num_vars  = (host_vars_end - host_var_table) / 2;
   1000 
   1001   /* Load image to device and get target addresses for the image.  */
   1002   struct addr_pair *target_table = NULL;
   1003   int i, num_target_entries;
   1004 
   1005   num_target_entries
   1006     = devicep->load_image_func (devicep->target_id, version,
   1007 				target_data, &target_table);
   1008 
   1009   if (num_target_entries != num_funcs + num_vars)
   1010     {
   1011       gomp_mutex_unlock (&devicep->lock);
   1012       if (is_register_lock)
   1013 	gomp_mutex_unlock (&register_lock);
   1014       gomp_fatal ("Cannot map target functions or variables"
   1015 		  " (expected %u, have %u)", num_funcs + num_vars,
   1016 		  num_target_entries);
   1017     }
   1018 
   1019   /* Insert host-target address mapping into splay tree.  */
   1020   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   1021   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
   1022   tgt->refcount = REFCOUNT_INFINITY;
   1023   tgt->tgt_start = 0;
   1024   tgt->tgt_end = 0;
   1025   tgt->to_free = NULL;
   1026   tgt->prev = NULL;
   1027   tgt->list_count = 0;
   1028   tgt->device_descr = devicep;
   1029   splay_tree_node array = tgt->array;
   1030 
   1031   for (i = 0; i < num_funcs; i++)
   1032     {
   1033       splay_tree_key k = &array->key;
   1034       k->host_start = (uintptr_t) host_func_table[i];
   1035       k->host_end = k->host_start + 1;
   1036       k->tgt = tgt;
   1037       k->tgt_offset = target_table[i].start;
   1038       k->refcount = REFCOUNT_INFINITY;
   1039       k->link_key = NULL;
   1040       array->left = NULL;
   1041       array->right = NULL;
   1042       splay_tree_insert (&devicep->mem_map, array);
   1043       array++;
   1044     }
   1045 
   1046   /* Most significant bit of the size in host and target tables marks
   1047      "omp declare target link" variables.  */
   1048   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
   1049   const uintptr_t size_mask = ~link_bit;
   1050 
   1051   for (i = 0; i < num_vars; i++)
   1052     {
   1053       struct addr_pair *target_var = &target_table[num_funcs + i];
   1054       uintptr_t target_size = target_var->end - target_var->start;
   1055 
   1056       if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
   1057 	{
   1058 	  gomp_mutex_unlock (&devicep->lock);
   1059 	  if (is_register_lock)
   1060 	    gomp_mutex_unlock (&register_lock);
   1061 	  gomp_fatal ("Cannot map target variables (size mismatch)");
   1062 	}
   1063 
   1064       splay_tree_key k = &array->key;
   1065       k->host_start = (uintptr_t) host_var_table[i * 2];
   1066       k->host_end
   1067 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
   1068       k->tgt = tgt;
   1069       k->tgt_offset = target_var->start;
   1070       k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
   1071       k->link_key = NULL;
   1072       array->left = NULL;
   1073       array->right = NULL;
   1074       splay_tree_insert (&devicep->mem_map, array);
   1075       array++;
   1076     }
   1077 
   1078   free (target_table);
   1079 }
   1080 
   1081 /* Unload the mappings described by target_data from device DEVICE_P.
   1082    The device must be locked.   */
   1083 
   1084 static void
   1085 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
   1086 			       unsigned version,
   1087 			       const void *host_table, const void *target_data)
   1088 {
   1089   void **host_func_table = ((void ***) host_table)[0];
   1090   void **host_funcs_end  = ((void ***) host_table)[1];
   1091   void **host_var_table  = ((void ***) host_table)[2];
   1092   void **host_vars_end   = ((void ***) host_table)[3];
   1093 
   1094   /* The func table contains only addresses, the var table contains addresses
   1095      and corresponding sizes.  */
   1096   int num_funcs = host_funcs_end - host_func_table;
   1097   int num_vars  = (host_vars_end - host_var_table) / 2;
   1098 
   1099   struct splay_tree_key_s k;
   1100   splay_tree_key node = NULL;
   1101 
   1102   /* Find mapping at start of node array */
   1103   if (num_funcs || num_vars)
   1104     {
   1105       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
   1106 		      : (uintptr_t) host_var_table[0]);
   1107       k.host_end = k.host_start + 1;
   1108       node = splay_tree_lookup (&devicep->mem_map, &k);
   1109     }
   1110 
   1111   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
   1112     {
   1113       gomp_mutex_unlock (&devicep->lock);
   1114       gomp_fatal ("image unload fail");
   1115     }
   1116 
   1117   /* Remove mappings from splay tree.  */
   1118   int i;
   1119   for (i = 0; i < num_funcs; i++)
   1120     {
   1121       k.host_start = (uintptr_t) host_func_table[i];
   1122       k.host_end = k.host_start + 1;
   1123       splay_tree_remove (&devicep->mem_map, &k);
   1124     }
   1125 
   1126   /* Most significant bit of the size in host and target tables marks
   1127      "omp declare target link" variables.  */
   1128   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
   1129   const uintptr_t size_mask = ~link_bit;
   1130   bool is_tgt_unmapped = false;
   1131 
   1132   for (i = 0; i < num_vars; i++)
   1133     {
   1134       k.host_start = (uintptr_t) host_var_table[i * 2];
   1135       k.host_end
   1136 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
   1137 
   1138       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
   1139 	splay_tree_remove (&devicep->mem_map, &k);
   1140       else
   1141 	{
   1142 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
   1143 	  splay_tree_remove (&devicep->mem_map, n);
   1144 	  if (n->link_key)
   1145 	    {
   1146 	      if (n->tgt->refcount > 1)
   1147 		n->tgt->refcount--;
   1148 	      else
   1149 		{
   1150 		  is_tgt_unmapped = true;
   1151 		  gomp_unmap_tgt (n->tgt);
   1152 		}
   1153 	    }
   1154 	}
   1155     }
   1156 
   1157   if (node && !is_tgt_unmapped)
   1158     {
   1159       free (node->tgt);
   1160       free (node);
   1161     }
   1162 }
   1163 
   1164 /* This function should be called from every offload image while loading.
   1165    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
   1166    the target, and TARGET_DATA needed by target plugin.  */
   1167 
   1168 void
   1169 GOMP_offload_register_ver (unsigned version, const void *host_table,
   1170 			   int target_type, const void *target_data)
   1171 {
   1172   int i;
   1173 
   1174   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
   1175     gomp_fatal ("Library too old for offload (version %u < %u)",
   1176 		GOMP_VERSION, GOMP_VERSION_LIB (version));
   1177 
   1178   gomp_mutex_lock (&register_lock);
   1179 
   1180   /* Load image to all initialized devices.  */
   1181   for (i = 0; i < num_devices; i++)
   1182     {
   1183       struct gomp_device_descr *devicep = &devices[i];
   1184       gomp_mutex_lock (&devicep->lock);
   1185       if (devicep->type == target_type
   1186 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
   1187 	gomp_load_image_to_device (devicep, version,
   1188 				   host_table, target_data, true);
   1189       gomp_mutex_unlock (&devicep->lock);
   1190     }
   1191 
   1192   /* Insert image to array of pending images.  */
   1193   offload_images
   1194     = gomp_realloc_unlock (offload_images,
   1195 			   (num_offload_images + 1)
   1196 			   * sizeof (struct offload_image_descr));
   1197   offload_images[num_offload_images].version = version;
   1198   offload_images[num_offload_images].type = target_type;
   1199   offload_images[num_offload_images].host_table = host_table;
   1200   offload_images[num_offload_images].target_data = target_data;
   1201 
   1202   num_offload_images++;
   1203   gomp_mutex_unlock (&register_lock);
   1204 }
   1205 
   1206 void
   1207 GOMP_offload_register (const void *host_table, int target_type,
   1208 		       const void *target_data)
   1209 {
   1210   GOMP_offload_register_ver (0, host_table, target_type, target_data);
   1211 }
   1212 
   1213 /* This function should be called from every offload image while unloading.
   1214    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
   1215    the target, and TARGET_DATA needed by target plugin.  */
   1216 
   1217 void
   1218 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
   1219 			     int target_type, const void *target_data)
   1220 {
   1221   int i;
   1222 
   1223   gomp_mutex_lock (&register_lock);
   1224 
   1225   /* Unload image from all initialized devices.  */
   1226   for (i = 0; i < num_devices; i++)
   1227     {
   1228       struct gomp_device_descr *devicep = &devices[i];
   1229       gomp_mutex_lock (&devicep->lock);
   1230       if (devicep->type == target_type
   1231 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
   1232 	gomp_unload_image_from_device (devicep, version,
   1233 				       host_table, target_data);
   1234       gomp_mutex_unlock (&devicep->lock);
   1235     }
   1236 
   1237   /* Remove image from array of pending images.  */
   1238   for (i = 0; i < num_offload_images; i++)
   1239     if (offload_images[i].target_data == target_data)
   1240       {
   1241 	offload_images[i] = offload_images[--num_offload_images];
   1242 	break;
   1243       }
   1244 
   1245   gomp_mutex_unlock (&register_lock);
   1246 }
   1247 
   1248 void
   1249 GOMP_offload_unregister (const void *host_table, int target_type,
   1250 			 const void *target_data)
   1251 {
   1252   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
   1253 }
   1254 
   1255 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
   1256    must be locked on entry, and remains locked on return.  */
   1257 
   1258 attribute_hidden void
   1259 gomp_init_device (struct gomp_device_descr *devicep)
   1260 {
   1261   int i;
   1262   if (!devicep->init_device_func (devicep->target_id))
   1263     {
   1264       gomp_mutex_unlock (&devicep->lock);
   1265       gomp_fatal ("device initialization failed");
   1266     }
   1267 
   1268   /* Load to device all images registered by the moment.  */
   1269   for (i = 0; i < num_offload_images; i++)
   1270     {
   1271       struct offload_image_descr *image = &offload_images[i];
   1272       if (image->type == devicep->type)
   1273 	gomp_load_image_to_device (devicep, image->version,
   1274 				   image->host_table, image->target_data,
   1275 				   false);
   1276     }
   1277 
   1278   devicep->state = GOMP_DEVICE_INITIALIZED;
   1279 }
   1280 
   1281 attribute_hidden void
   1282 gomp_unload_device (struct gomp_device_descr *devicep)
   1283 {
   1284   if (devicep->state == GOMP_DEVICE_INITIALIZED)
   1285     {
   1286       unsigned i;
   1287 
   1288       /* Unload from device all images registered at the moment.  */
   1289       for (i = 0; i < num_offload_images; i++)
   1290 	{
   1291 	  struct offload_image_descr *image = &offload_images[i];
   1292 	  if (image->type == devicep->type)
   1293 	    gomp_unload_image_from_device (devicep, image->version,
   1294 					   image->host_table,
   1295 					   image->target_data);
   1296 	}
   1297     }
   1298 }
   1299 
   1300 /* Free address mapping tables.  MM must be locked on entry, and remains locked
   1301    on return.  */
   1302 
   1303 attribute_hidden void
   1304 gomp_free_memmap (struct splay_tree_s *mem_map)
   1305 {
   1306   while (mem_map->root)
   1307     {
   1308       struct target_mem_desc *tgt = mem_map->root->key.tgt;
   1309 
   1310       splay_tree_remove (mem_map, &mem_map->root->key);
   1311       free (tgt->array);
   1312       free (tgt);
   1313     }
   1314 }
   1315 
   1316 /* Host fallback for GOMP_target{,_ext} routines.  */
   1317 
   1318 static void
   1319 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
   1320 {
   1321   struct gomp_thread old_thr, *thr = gomp_thread ();
   1322   old_thr = *thr;
   1323   memset (thr, '\0', sizeof (*thr));
   1324   if (gomp_places_list)
   1325     {
   1326       thr->place = old_thr.place;
   1327       thr->ts.place_partition_len = gomp_places_list_len;
   1328     }
   1329   fn (hostaddrs);
   1330   gomp_free_thread (thr);
   1331   *thr = old_thr;
   1332 }
   1333 
   1334 /* Calculate alignment and size requirements of a private copy of data shared
   1335    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
   1336 
   1337 static inline void
   1338 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
   1339 				     unsigned short *kinds, size_t *tgt_align,
   1340 				     size_t *tgt_size)
   1341 {
   1342   size_t i;
   1343   for (i = 0; i < mapnum; i++)
   1344     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
   1345       {
   1346 	size_t align = (size_t) 1 << (kinds[i] >> 8);
   1347 	if (*tgt_align < align)
   1348 	  *tgt_align = align;
   1349 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
   1350 	*tgt_size += sizes[i];
   1351       }
   1352 }
   1353 
   1354 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
   1355 
   1356 static inline void
   1357 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
   1358 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
   1359 			size_t tgt_size)
   1360 {
   1361   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
   1362   if (al)
   1363     tgt += tgt_align - al;
   1364   tgt_size = 0;
   1365   size_t i;
   1366   for (i = 0; i < mapnum; i++)
   1367     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
   1368       {
   1369 	size_t align = (size_t) 1 << (kinds[i] >> 8);
   1370 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
   1371 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
   1372 	hostaddrs[i] = tgt + tgt_size;
   1373 	tgt_size = tgt_size + sizes[i];
   1374       }
   1375 }
   1376 
   1377 /* Helper function of GOMP_target{,_ext} routines.  */
   1378 
   1379 static void *
   1380 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
   1381 			 void (*host_fn) (void *))
   1382 {
   1383   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
   1384     return (void *) host_fn;
   1385   else
   1386     {
   1387       gomp_mutex_lock (&devicep->lock);
   1388       if (devicep->state == GOMP_DEVICE_FINALIZED)
   1389 	{
   1390 	  gomp_mutex_unlock (&devicep->lock);
   1391 	  return NULL;
   1392 	}
   1393 
   1394       struct splay_tree_key_s k;
   1395       k.host_start = (uintptr_t) host_fn;
   1396       k.host_end = k.host_start + 1;
   1397       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
   1398       gomp_mutex_unlock (&devicep->lock);
   1399       if (tgt_fn == NULL)
   1400 	return NULL;
   1401 
   1402       return (void *) tgt_fn->tgt_offset;
   1403     }
   1404 }
   1405 
   1406 /* Called when encountering a target directive.  If DEVICE
   1407    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
   1408    GOMP_DEVICE_HOST_FALLBACK (or any value
   1409    larger than last available hw device), use host fallback.
   1410    FN is address of host code, UNUSED is part of the current ABI, but
   1411    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
   1412    with MAPNUM entries, with addresses of the host objects,
   1413    sizes of the host objects (resp. for pointer kind pointer bias
   1414    and assumed sizeof (void *) size) and kinds.  */
   1415 
   1416 void
   1417 GOMP_target (int device, void (*fn) (void *), const void *unused,
   1418 	     size_t mapnum, void **hostaddrs, size_t *sizes,
   1419 	     unsigned char *kinds)
   1420 {
   1421   struct gomp_device_descr *devicep = resolve_device (device);
   1422 
   1423   void *fn_addr;
   1424   if (devicep == NULL
   1425       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1426       /* All shared memory devices should use the GOMP_target_ext function.  */
   1427       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
   1428       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
   1429     return gomp_target_fallback (fn, hostaddrs);
   1430 
   1431   struct target_mem_desc *tgt_vars
   1432     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
   1433 		     GOMP_MAP_VARS_TARGET);
   1434   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
   1435 		     NULL);
   1436   gomp_unmap_vars (tgt_vars, true);
   1437 }
   1438 
   1439 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
   1440    and several arguments have been added:
   1441    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
   1442    DEPEND is array of dependencies, see GOMP_task for details.
   1443 
   1444    ARGS is a pointer to an array consisting of a variable number of both
   1445    device-independent and device-specific arguments, which can take one two
   1446    elements where the first specifies for which device it is intended, the type
   1447    and optionally also the value.  If the value is not present in the first
   1448    one, the whole second element the actual value.  The last element of the
   1449    array is a single NULL.  Among the device independent can be for example
   1450    NUM_TEAMS and THREAD_LIMIT.
   1451 
   1452    NUM_TEAMS is positive if GOMP_teams will be called in the body with
   1453    that value, or 1 if teams construct is not present, or 0, if
   1454    teams construct does not have num_teams clause and so the choice is
   1455    implementation defined, and -1 if it can't be determined on the host
   1456    what value will GOMP_teams have on the device.
   1457    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
   1458    body with that value, or 0, if teams construct does not have thread_limit
   1459    clause or the teams construct is not present, or -1 if it can't be
   1460    determined on the host what value will GOMP_teams have on the device.  */
   1461 
   1462 void
   1463 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
   1464 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
   1465 		 unsigned int flags, void **depend, void **args)
   1466 {
   1467   struct gomp_device_descr *devicep = resolve_device (device);
   1468   size_t tgt_align = 0, tgt_size = 0;
   1469   bool fpc_done = false;
   1470 
   1471   if (flags & GOMP_TARGET_FLAG_NOWAIT)
   1472     {
   1473       struct gomp_thread *thr = gomp_thread ();
   1474       /* Create a team if we don't have any around, as nowait
   1475 	 target tasks make sense to run asynchronously even when
   1476 	 outside of any parallel.  */
   1477       if (__builtin_expect (thr->ts.team == NULL, 0))
   1478 	{
   1479 	  struct gomp_team *team = gomp_new_team (1);
   1480 	  struct gomp_task *task = thr->task;
   1481 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
   1482 	  team->prev_ts = thr->ts;
   1483 	  thr->ts.team = team;
   1484 	  thr->ts.team_id = 0;
   1485 	  thr->ts.work_share = &team->work_shares[0];
   1486 	  thr->ts.last_work_share = NULL;
   1487 #ifdef HAVE_SYNC_BUILTINS
   1488 	  thr->ts.single_count = 0;
   1489 #endif
   1490 	  thr->ts.static_trip = 0;
   1491 	  thr->task = &team->implicit_task[0];
   1492 	  gomp_init_task (thr->task, NULL, icv);
   1493 	  if (task)
   1494 	    {
   1495 	      thr->task = task;
   1496 	      gomp_end_task ();
   1497 	      free (task);
   1498 	      thr->task = &team->implicit_task[0];
   1499 	    }
   1500 	  else
   1501 	    pthread_setspecific (gomp_thread_destructor, thr);
   1502 	}
   1503       if (thr->ts.team
   1504 	  && !thr->task->final_task)
   1505 	{
   1506 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
   1507 				   sizes, kinds, flags, depend, args,
   1508 				   GOMP_TARGET_TASK_BEFORE_MAP);
   1509 	  return;
   1510 	}
   1511     }
   1512 
   1513   /* If there are depend clauses, but nowait is not present
   1514      (or we are in a final task), block the parent task until the
   1515      dependencies are resolved and then just continue with the rest
   1516      of the function as if it is a merged task.  */
   1517   if (depend != NULL)
   1518     {
   1519       struct gomp_thread *thr = gomp_thread ();
   1520       if (thr->task && thr->task->depend_hash)
   1521 	{
   1522 	  /* If we might need to wait, copy firstprivate now.  */
   1523 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
   1524 					       &tgt_align, &tgt_size);
   1525 	  if (tgt_align)
   1526 	    {
   1527 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   1528 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
   1529 				      tgt_align, tgt_size);
   1530 	    }
   1531 	  fpc_done = true;
   1532 	  gomp_task_maybe_wait_for_dependencies (depend);
   1533 	}
   1534     }
   1535 
   1536   void *fn_addr;
   1537   if (devicep == NULL
   1538       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1539       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
   1540       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
   1541     {
   1542       if (!fpc_done)
   1543 	{
   1544 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
   1545 					       &tgt_align, &tgt_size);
   1546 	  if (tgt_align)
   1547 	    {
   1548 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   1549 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
   1550 				      tgt_align, tgt_size);
   1551 	    }
   1552 	}
   1553       gomp_target_fallback (fn, hostaddrs);
   1554       return;
   1555     }
   1556 
   1557   struct target_mem_desc *tgt_vars;
   1558   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1559     {
   1560       if (!fpc_done)
   1561 	{
   1562 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
   1563 					       &tgt_align, &tgt_size);
   1564 	  if (tgt_align)
   1565 	    {
   1566 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   1567 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
   1568 				      tgt_align, tgt_size);
   1569 	    }
   1570 	}
   1571       tgt_vars = NULL;
   1572     }
   1573   else
   1574     tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
   1575 			      true, GOMP_MAP_VARS_TARGET);
   1576   devicep->run_func (devicep->target_id, fn_addr,
   1577 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
   1578 		     args);
   1579   if (tgt_vars)
   1580     gomp_unmap_vars (tgt_vars, true);
   1581 }
   1582 
   1583 /* Host fallback for GOMP_target_data{,_ext} routines.  */
   1584 
   1585 static void
   1586 gomp_target_data_fallback (void)
   1587 {
   1588   struct gomp_task_icv *icv = gomp_icv (false);
   1589   if (icv->target_data)
   1590     {
   1591       /* Even when doing a host fallback, if there are any active
   1592          #pragma omp target data constructs, need to remember the
   1593          new #pragma omp target data, otherwise GOMP_target_end_data
   1594          would get out of sync.  */
   1595       struct target_mem_desc *tgt
   1596 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
   1597 			 GOMP_MAP_VARS_DATA);
   1598       tgt->prev = icv->target_data;
   1599       icv->target_data = tgt;
   1600     }
   1601 }
   1602 
   1603 void
   1604 GOMP_target_data (int device, const void *unused, size_t mapnum,
   1605 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
   1606 {
   1607   struct gomp_device_descr *devicep = resolve_device (device);
   1608 
   1609   if (devicep == NULL
   1610       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1611       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
   1612     return gomp_target_data_fallback ();
   1613 
   1614   struct target_mem_desc *tgt
   1615     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
   1616 		     GOMP_MAP_VARS_DATA);
   1617   struct gomp_task_icv *icv = gomp_icv (true);
   1618   tgt->prev = icv->target_data;
   1619   icv->target_data = tgt;
   1620 }
   1621 
   1622 void
   1623 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
   1624 		      size_t *sizes, unsigned short *kinds)
   1625 {
   1626   struct gomp_device_descr *devicep = resolve_device (device);
   1627 
   1628   if (devicep == NULL
   1629       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1630       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1631     return gomp_target_data_fallback ();
   1632 
   1633   struct target_mem_desc *tgt
   1634     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
   1635 		     GOMP_MAP_VARS_DATA);
   1636   struct gomp_task_icv *icv = gomp_icv (true);
   1637   tgt->prev = icv->target_data;
   1638   icv->target_data = tgt;
   1639 }
   1640 
   1641 void
   1642 GOMP_target_end_data (void)
   1643 {
   1644   struct gomp_task_icv *icv = gomp_icv (false);
   1645   if (icv->target_data)
   1646     {
   1647       struct target_mem_desc *tgt = icv->target_data;
   1648       icv->target_data = tgt->prev;
   1649       gomp_unmap_vars (tgt, true);
   1650     }
   1651 }
   1652 
   1653 void
   1654 GOMP_target_update (int device, const void *unused, size_t mapnum,
   1655 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
   1656 {
   1657   struct gomp_device_descr *devicep = resolve_device (device);
   1658 
   1659   if (devicep == NULL
   1660       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1661       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1662     return;
   1663 
   1664   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
   1665 }
   1666 
   1667 void
   1668 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
   1669 			size_t *sizes, unsigned short *kinds,
   1670 			unsigned int flags, void **depend)
   1671 {
   1672   struct gomp_device_descr *devicep = resolve_device (device);
   1673 
   1674   /* If there are depend clauses, but nowait is not present,
   1675      block the parent task until the dependencies are resolved
   1676      and then just continue with the rest of the function as if it
   1677      is a merged task.  Until we are able to schedule task during
   1678      variable mapping or unmapping, ignore nowait if depend clauses
   1679      are not present.  */
   1680   if (depend != NULL)
   1681     {
   1682       struct gomp_thread *thr = gomp_thread ();
   1683       if (thr->task && thr->task->depend_hash)
   1684 	{
   1685 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
   1686 	      && thr->ts.team
   1687 	      && !thr->task->final_task)
   1688 	    {
   1689 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
   1690 					   mapnum, hostaddrs, sizes, kinds,
   1691 					   flags | GOMP_TARGET_FLAG_UPDATE,
   1692 					   depend, NULL, GOMP_TARGET_TASK_DATA))
   1693 		return;
   1694 	    }
   1695 	  else
   1696 	    {
   1697 	      struct gomp_team *team = thr->ts.team;
   1698 	      /* If parallel or taskgroup has been cancelled, don't start new
   1699 		 tasks.  */
   1700 	      if (team
   1701 		  && (gomp_team_barrier_cancelled (&team->barrier)
   1702 		      || (thr->task->taskgroup
   1703 			  && thr->task->taskgroup->cancelled)))
   1704 		return;
   1705 
   1706 	      gomp_task_maybe_wait_for_dependencies (depend);
   1707 	    }
   1708 	}
   1709     }
   1710 
   1711   if (devicep == NULL
   1712       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1713       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1714     return;
   1715 
   1716   struct gomp_thread *thr = gomp_thread ();
   1717   struct gomp_team *team = thr->ts.team;
   1718   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
   1719   if (team
   1720       && (gomp_team_barrier_cancelled (&team->barrier)
   1721 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
   1722     return;
   1723 
   1724   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
   1725 }
   1726 
   1727 static void
   1728 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
   1729 		void **hostaddrs, size_t *sizes, unsigned short *kinds)
   1730 {
   1731   const int typemask = 0xff;
   1732   size_t i;
   1733   gomp_mutex_lock (&devicep->lock);
   1734   if (devicep->state == GOMP_DEVICE_FINALIZED)
   1735     {
   1736       gomp_mutex_unlock (&devicep->lock);
   1737       return;
   1738     }
   1739 
   1740   for (i = 0; i < mapnum; i++)
   1741     {
   1742       struct splay_tree_key_s cur_node;
   1743       unsigned char kind = kinds[i] & typemask;
   1744       switch (kind)
   1745 	{
   1746 	case GOMP_MAP_FROM:
   1747 	case GOMP_MAP_ALWAYS_FROM:
   1748 	case GOMP_MAP_DELETE:
   1749 	case GOMP_MAP_RELEASE:
   1750 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
   1751 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
   1752 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
   1753 	  cur_node.host_end = cur_node.host_start + sizes[i];
   1754 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
   1755 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
   1756 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
   1757 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
   1758 	  if (!k)
   1759 	    continue;
   1760 
   1761 	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
   1762 	    k->refcount--;
   1763 	  if ((kind == GOMP_MAP_DELETE
   1764 	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
   1765 	      && k->refcount != REFCOUNT_INFINITY)
   1766 	    k->refcount = 0;
   1767 
   1768 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
   1769 	      || kind == GOMP_MAP_ALWAYS_FROM)
   1770 	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
   1771 				(void *) (k->tgt->tgt_start + k->tgt_offset
   1772 					  + cur_node.host_start
   1773 					  - k->host_start),
   1774 				cur_node.host_end - cur_node.host_start);
   1775 	  if (k->refcount == 0)
   1776 	    {
   1777 	      splay_tree_remove (&devicep->mem_map, k);
   1778 	      if (k->link_key)
   1779 		splay_tree_insert (&devicep->mem_map,
   1780 				   (splay_tree_node) k->link_key);
   1781 	      if (k->tgt->refcount > 1)
   1782 		k->tgt->refcount--;
   1783 	      else
   1784 		gomp_unmap_tgt (k->tgt);
   1785 	    }
   1786 
   1787 	  break;
   1788 	default:
   1789 	  gomp_mutex_unlock (&devicep->lock);
   1790 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
   1791 		      kind);
   1792 	}
   1793     }
   1794 
   1795   gomp_mutex_unlock (&devicep->lock);
   1796 }
   1797 
   1798 void
   1799 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
   1800 			     size_t *sizes, unsigned short *kinds,
   1801 			     unsigned int flags, void **depend)
   1802 {
   1803   struct gomp_device_descr *devicep = resolve_device (device);
   1804 
   1805   /* If there are depend clauses, but nowait is not present,
   1806      block the parent task until the dependencies are resolved
   1807      and then just continue with the rest of the function as if it
   1808      is a merged task.  Until we are able to schedule task during
   1809      variable mapping or unmapping, ignore nowait if depend clauses
   1810      are not present.  */
   1811   if (depend != NULL)
   1812     {
   1813       struct gomp_thread *thr = gomp_thread ();
   1814       if (thr->task && thr->task->depend_hash)
   1815 	{
   1816 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
   1817 	      && thr->ts.team
   1818 	      && !thr->task->final_task)
   1819 	    {
   1820 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
   1821 					   mapnum, hostaddrs, sizes, kinds,
   1822 					   flags, depend, NULL,
   1823 					   GOMP_TARGET_TASK_DATA))
   1824 		return;
   1825 	    }
   1826 	  else
   1827 	    {
   1828 	      struct gomp_team *team = thr->ts.team;
   1829 	      /* If parallel or taskgroup has been cancelled, don't start new
   1830 		 tasks.  */
   1831 	      if (team
   1832 		  && (gomp_team_barrier_cancelled (&team->barrier)
   1833 		      || (thr->task->taskgroup
   1834 			  && thr->task->taskgroup->cancelled)))
   1835 		return;
   1836 
   1837 	      gomp_task_maybe_wait_for_dependencies (depend);
   1838 	    }
   1839 	}
   1840     }
   1841 
   1842   if (devicep == NULL
   1843       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1844       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1845     return;
   1846 
   1847   struct gomp_thread *thr = gomp_thread ();
   1848   struct gomp_team *team = thr->ts.team;
   1849   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
   1850   if (team
   1851       && (gomp_team_barrier_cancelled (&team->barrier)
   1852 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
   1853     return;
   1854 
   1855   size_t i;
   1856   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
   1857     for (i = 0; i < mapnum; i++)
   1858       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
   1859 	{
   1860 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
   1861 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
   1862 	  i += sizes[i];
   1863 	}
   1864       else
   1865 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
   1866 		       true, GOMP_MAP_VARS_ENTER_DATA);
   1867   else
   1868     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
   1869 }
   1870 
   1871 bool
   1872 gomp_target_task_fn (void *data)
   1873 {
   1874   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
   1875   struct gomp_device_descr *devicep = ttask->devicep;
   1876 
   1877   if (ttask->fn != NULL)
   1878     {
   1879       void *fn_addr;
   1880       if (devicep == NULL
   1881 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1882 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
   1883 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
   1884 	{
   1885 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
   1886 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs);
   1887 	  return false;
   1888 	}
   1889 
   1890       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
   1891 	{
   1892 	  if (ttask->tgt)
   1893 	    gomp_unmap_vars (ttask->tgt, true);
   1894 	  return false;
   1895 	}
   1896 
   1897       void *actual_arguments;
   1898       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1899 	{
   1900 	  ttask->tgt = NULL;
   1901 	  actual_arguments = ttask->hostaddrs;
   1902 	}
   1903       else
   1904 	{
   1905 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
   1906 				      NULL, ttask->sizes, ttask->kinds, true,
   1907 				      GOMP_MAP_VARS_TARGET);
   1908 	  actual_arguments = (void *) ttask->tgt->tgt_start;
   1909 	}
   1910       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
   1911 
   1912       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
   1913 			       ttask->args, (void *) ttask);
   1914       return true;
   1915     }
   1916   else if (devicep == NULL
   1917 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1918 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1919     return false;
   1920 
   1921   size_t i;
   1922   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
   1923     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
   1924 		 ttask->kinds, true);
   1925   else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
   1926     for (i = 0; i < ttask->mapnum; i++)
   1927       if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
   1928 	{
   1929 	  gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
   1930 			 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
   1931 			 GOMP_MAP_VARS_ENTER_DATA);
   1932 	  i += ttask->sizes[i];
   1933 	}
   1934       else
   1935 	gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
   1936 		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
   1937   else
   1938     gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
   1939 		    ttask->kinds);
   1940   return false;
   1941 }
   1942 
   1943 void
   1944 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
   1945 {
   1946   if (thread_limit)
   1947     {
   1948       struct gomp_task_icv *icv = gomp_icv (true);
   1949       icv->thread_limit_var
   1950 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
   1951     }
   1952   (void) num_teams;
   1953 }
   1954 
   1955 void *
   1956 omp_target_alloc (size_t size, int device_num)
   1957 {
   1958   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   1959     return malloc (size);
   1960 
   1961   if (device_num < 0)
   1962     return NULL;
   1963 
   1964   struct gomp_device_descr *devicep = resolve_device (device_num);
   1965   if (devicep == NULL)
   1966     return NULL;
   1967 
   1968   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1969       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1970     return malloc (size);
   1971 
   1972   gomp_mutex_lock (&devicep->lock);
   1973   void *ret = devicep->alloc_func (devicep->target_id, size);
   1974   gomp_mutex_unlock (&devicep->lock);
   1975   return ret;
   1976 }
   1977 
   1978 void
   1979 omp_target_free (void *device_ptr, int device_num)
   1980 {
   1981   if (device_ptr == NULL)
   1982     return;
   1983 
   1984   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   1985     {
   1986       free (device_ptr);
   1987       return;
   1988     }
   1989 
   1990   if (device_num < 0)
   1991     return;
   1992 
   1993   struct gomp_device_descr *devicep = resolve_device (device_num);
   1994   if (devicep == NULL)
   1995     return;
   1996 
   1997   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1998       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1999     {
   2000       free (device_ptr);
   2001       return;
   2002     }
   2003 
   2004   gomp_mutex_lock (&devicep->lock);
   2005   gomp_free_device_memory (devicep, device_ptr);
   2006   gomp_mutex_unlock (&devicep->lock);
   2007 }
   2008 
   2009 int
   2010 omp_target_is_present (void *ptr, int device_num)
   2011 {
   2012   if (ptr == NULL)
   2013     return 1;
   2014 
   2015   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   2016     return 1;
   2017 
   2018   if (device_num < 0)
   2019     return 0;
   2020 
   2021   struct gomp_device_descr *devicep = resolve_device (device_num);
   2022   if (devicep == NULL)
   2023     return 0;
   2024 
   2025   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2026       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2027     return 1;
   2028 
   2029   gomp_mutex_lock (&devicep->lock);
   2030   struct splay_tree_s *mem_map = &devicep->mem_map;
   2031   struct splay_tree_key_s cur_node;
   2032 
   2033   cur_node.host_start = (uintptr_t) ptr;
   2034   cur_node.host_end = cur_node.host_start;
   2035   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
   2036   int ret = n != NULL;
   2037   gomp_mutex_unlock (&devicep->lock);
   2038   return ret;
   2039 }
   2040 
   2041 int
   2042 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
   2043 		   size_t src_offset, int dst_device_num, int src_device_num)
   2044 {
   2045   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
   2046   bool ret;
   2047 
   2048   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
   2049     {
   2050       if (dst_device_num < 0)
   2051 	return EINVAL;
   2052 
   2053       dst_devicep = resolve_device (dst_device_num);
   2054       if (dst_devicep == NULL)
   2055 	return EINVAL;
   2056 
   2057       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2058 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2059 	dst_devicep = NULL;
   2060     }
   2061   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
   2062     {
   2063       if (src_device_num < 0)
   2064 	return EINVAL;
   2065 
   2066       src_devicep = resolve_device (src_device_num);
   2067       if (src_devicep == NULL)
   2068 	return EINVAL;
   2069 
   2070       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2071 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2072 	src_devicep = NULL;
   2073     }
   2074   if (src_devicep == NULL && dst_devicep == NULL)
   2075     {
   2076       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
   2077       return 0;
   2078     }
   2079   if (src_devicep == NULL)
   2080     {
   2081       gomp_mutex_lock (&dst_devicep->lock);
   2082       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
   2083 					(char *) dst + dst_offset,
   2084 					(char *) src + src_offset, length);
   2085       gomp_mutex_unlock (&dst_devicep->lock);
   2086       return (ret ? 0 : EINVAL);
   2087     }
   2088   if (dst_devicep == NULL)
   2089     {
   2090       gomp_mutex_lock (&src_devicep->lock);
   2091       ret = src_devicep->dev2host_func (src_devicep->target_id,
   2092 					(char *) dst + dst_offset,
   2093 					(char *) src + src_offset, length);
   2094       gomp_mutex_unlock (&src_devicep->lock);
   2095       return (ret ? 0 : EINVAL);
   2096     }
   2097   if (src_devicep == dst_devicep)
   2098     {
   2099       gomp_mutex_lock (&src_devicep->lock);
   2100       ret = src_devicep->dev2dev_func (src_devicep->target_id,
   2101 				       (char *) dst + dst_offset,
   2102 				       (char *) src + src_offset, length);
   2103       gomp_mutex_unlock (&src_devicep->lock);
   2104       return (ret ? 0 : EINVAL);
   2105     }
   2106   return EINVAL;
   2107 }
   2108 
   2109 static int
   2110 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
   2111 			       int num_dims, const size_t *volume,
   2112 			       const size_t *dst_offsets,
   2113 			       const size_t *src_offsets,
   2114 			       const size_t *dst_dimensions,
   2115 			       const size_t *src_dimensions,
   2116 			       struct gomp_device_descr *dst_devicep,
   2117 			       struct gomp_device_descr *src_devicep)
   2118 {
   2119   size_t dst_slice = element_size;
   2120   size_t src_slice = element_size;
   2121   size_t j, dst_off, src_off, length;
   2122   int i, ret;
   2123 
   2124   if (num_dims == 1)
   2125     {
   2126       if (__builtin_mul_overflow (element_size, volume[0], &length)
   2127 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
   2128 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
   2129 	return EINVAL;
   2130       if (dst_devicep == NULL && src_devicep == NULL)
   2131 	{
   2132 	  memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
   2133 	  ret = 1;
   2134 	}
   2135       else if (src_devicep == NULL)
   2136 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
   2137 					  (char *) dst + dst_off,
   2138 					  (char *) src + src_off, length);
   2139       else if (dst_devicep == NULL)
   2140 	ret = src_devicep->dev2host_func (src_devicep->target_id,
   2141 					  (char *) dst + dst_off,
   2142 					  (char *) src + src_off, length);
   2143       else if (src_devicep == dst_devicep)
   2144 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
   2145 					 (char *) dst + dst_off,
   2146 					 (char *) src + src_off, length);
   2147       else
   2148 	ret = 0;
   2149       return ret ? 0 : EINVAL;
   2150     }
   2151 
   2152   /* FIXME: it would be nice to have some plugin function to handle
   2153      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
   2154      be handled in the generic recursion below, and for host-host it
   2155      should be used even for any num_dims >= 2.  */
   2156 
   2157   for (i = 1; i < num_dims; i++)
   2158     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
   2159 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
   2160       return EINVAL;
   2161   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
   2162       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
   2163     return EINVAL;
   2164   for (j = 0; j < volume[0]; j++)
   2165     {
   2166       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
   2167 					   (char *) src + src_off,
   2168 					   element_size, num_dims - 1,
   2169 					   volume + 1, dst_offsets + 1,
   2170 					   src_offsets + 1, dst_dimensions + 1,
   2171 					   src_dimensions + 1, dst_devicep,
   2172 					   src_devicep);
   2173       if (ret)
   2174 	return ret;
   2175       dst_off += dst_slice;
   2176       src_off += src_slice;
   2177     }
   2178   return 0;
   2179 }
   2180 
   2181 int
   2182 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
   2183 			int num_dims, const size_t *volume,
   2184 			const size_t *dst_offsets,
   2185 			const size_t *src_offsets,
   2186 			const size_t *dst_dimensions,
   2187 			const size_t *src_dimensions,
   2188 			int dst_device_num, int src_device_num)
   2189 {
   2190   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
   2191 
   2192   if (!dst && !src)
   2193     return INT_MAX;
   2194 
   2195   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
   2196     {
   2197       if (dst_device_num < 0)
   2198 	return EINVAL;
   2199 
   2200       dst_devicep = resolve_device (dst_device_num);
   2201       if (dst_devicep == NULL)
   2202 	return EINVAL;
   2203 
   2204       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2205 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2206 	dst_devicep = NULL;
   2207     }
   2208   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
   2209     {
   2210       if (src_device_num < 0)
   2211 	return EINVAL;
   2212 
   2213       src_devicep = resolve_device (src_device_num);
   2214       if (src_devicep == NULL)
   2215 	return EINVAL;
   2216 
   2217       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2218 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2219 	src_devicep = NULL;
   2220     }
   2221 
   2222   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
   2223     return EINVAL;
   2224 
   2225   if (src_devicep)
   2226     gomp_mutex_lock (&src_devicep->lock);
   2227   else if (dst_devicep)
   2228     gomp_mutex_lock (&dst_devicep->lock);
   2229   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
   2230 					   volume, dst_offsets, src_offsets,
   2231 					   dst_dimensions, src_dimensions,
   2232 					   dst_devicep, src_devicep);
   2233   if (src_devicep)
   2234     gomp_mutex_unlock (&src_devicep->lock);
   2235   else if (dst_devicep)
   2236     gomp_mutex_unlock (&dst_devicep->lock);
   2237   return ret;
   2238 }
   2239 
   2240 int
   2241 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
   2242 			  size_t device_offset, int device_num)
   2243 {
   2244   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   2245     return EINVAL;
   2246 
   2247   if (device_num < 0)
   2248     return EINVAL;
   2249 
   2250   struct gomp_device_descr *devicep = resolve_device (device_num);
   2251   if (devicep == NULL)
   2252     return EINVAL;
   2253 
   2254   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2255       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2256     return EINVAL;
   2257 
   2258   gomp_mutex_lock (&devicep->lock);
   2259 
   2260   struct splay_tree_s *mem_map = &devicep->mem_map;
   2261   struct splay_tree_key_s cur_node;
   2262   int ret = EINVAL;
   2263 
   2264   cur_node.host_start = (uintptr_t) host_ptr;
   2265   cur_node.host_end = cur_node.host_start + size;
   2266   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
   2267   if (n)
   2268     {
   2269       if (n->tgt->tgt_start + n->tgt_offset
   2270 	  == (uintptr_t) device_ptr + device_offset
   2271 	  && n->host_start <= cur_node.host_start
   2272 	  && n->host_end >= cur_node.host_end)
   2273 	ret = 0;
   2274     }
   2275   else
   2276     {
   2277       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   2278       tgt->array = gomp_malloc (sizeof (*tgt->array));
   2279       tgt->refcount = 1;
   2280       tgt->tgt_start = 0;
   2281       tgt->tgt_end = 0;
   2282       tgt->to_free = NULL;
   2283       tgt->prev = NULL;
   2284       tgt->list_count = 0;
   2285       tgt->device_descr = devicep;
   2286       splay_tree_node array = tgt->array;
   2287       splay_tree_key k = &array->key;
   2288       k->host_start = cur_node.host_start;
   2289       k->host_end = cur_node.host_end;
   2290       k->tgt = tgt;
   2291       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
   2292       k->refcount = REFCOUNT_INFINITY;
   2293       array->left = NULL;
   2294       array->right = NULL;
   2295       splay_tree_insert (&devicep->mem_map, array);
   2296       ret = 0;
   2297     }
   2298   gomp_mutex_unlock (&devicep->lock);
   2299   return ret;
   2300 }
   2301 
   2302 int
   2303 omp_target_disassociate_ptr (void *ptr, int device_num)
   2304 {
   2305   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   2306     return EINVAL;
   2307 
   2308   if (device_num < 0)
   2309     return EINVAL;
   2310 
   2311   struct gomp_device_descr *devicep = resolve_device (device_num);
   2312   if (devicep == NULL)
   2313     return EINVAL;
   2314 
   2315   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
   2316     return EINVAL;
   2317 
   2318   gomp_mutex_lock (&devicep->lock);
   2319 
   2320   struct splay_tree_s *mem_map = &devicep->mem_map;
   2321   struct splay_tree_key_s cur_node;
   2322   int ret = EINVAL;
   2323 
   2324   cur_node.host_start = (uintptr_t) ptr;
   2325   cur_node.host_end = cur_node.host_start;
   2326   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
   2327   if (n
   2328       && n->host_start == cur_node.host_start
   2329       && n->refcount == REFCOUNT_INFINITY
   2330       && n->tgt->tgt_start == 0
   2331       && n->tgt->to_free == NULL
   2332       && n->tgt->refcount == 1
   2333       && n->tgt->list_count == 0)
   2334     {
   2335       splay_tree_remove (&devicep->mem_map, n);
   2336       gomp_unmap_tgt (n->tgt);
   2337       ret = 0;
   2338     }
   2339 
   2340   gomp_mutex_unlock (&devicep->lock);
   2341   return ret;
   2342 }
   2343 
   2344 #ifdef PLUGIN_SUPPORT
   2345 
   2346 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
   2347    in PLUGIN_NAME.
   2348    The handles of the found functions are stored in the corresponding fields
   2349    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
   2350 
   2351 static bool
   2352 gomp_load_plugin_for_device (struct gomp_device_descr *device,
   2353 			     const char *plugin_name)
   2354 {
   2355   const char *err = NULL, *last_missing = NULL;
   2356 
   2357   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
   2358   if (!plugin_handle)
   2359     goto dl_fail;
   2360 
   2361   /* Check if all required functions are available in the plugin and store
   2362      their handlers.  None of the symbols can legitimately be NULL,
   2363      so we don't need to check dlerror all the time.  */
   2364 #define DLSYM(f)							\
   2365   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
   2366     goto dl_fail
   2367   /* Similar, but missing functions are not an error.  Return false if
   2368      failed, true otherwise.  */
   2369 #define DLSYM_OPT(f, n)							\
   2370   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
   2371    || (last_missing = #n, 0))
   2372 
   2373   DLSYM (version);
   2374   if (device->version_func () != GOMP_VERSION)
   2375     {
   2376       err = "plugin version mismatch";
   2377       goto fail;
   2378     }
   2379 
   2380   DLSYM (get_name);
   2381   DLSYM (get_caps);
   2382   DLSYM (get_type);
   2383   DLSYM (get_num_devices);
   2384   DLSYM (init_device);
   2385   DLSYM (fini_device);
   2386   DLSYM (load_image);
   2387   DLSYM (unload_image);
   2388   DLSYM (alloc);
   2389   DLSYM (free);
   2390   DLSYM (dev2host);
   2391   DLSYM (host2dev);
   2392   device->capabilities = device->get_caps_func ();
   2393   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2394     {
   2395       DLSYM (run);
   2396       DLSYM (async_run);
   2397       DLSYM_OPT (can_run, can_run);
   2398       DLSYM (dev2dev);
   2399     }
   2400   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
   2401     {
   2402       if (!DLSYM_OPT (openacc.exec, openacc_exec)
   2403 	  || !DLSYM_OPT (openacc.register_async_cleanup,
   2404 			 openacc_register_async_cleanup)
   2405 	  || !DLSYM_OPT (openacc.async_test, openacc_async_test)
   2406 	  || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
   2407 	  || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
   2408 	  || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
   2409 	  || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
   2410 	  || !DLSYM_OPT (openacc.async_wait_all_async,
   2411 			 openacc_async_wait_all_async)
   2412 	  || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
   2413 	  || !DLSYM_OPT (openacc.create_thread_data,
   2414 			 openacc_create_thread_data)
   2415 	  || !DLSYM_OPT (openacc.destroy_thread_data,
   2416 			 openacc_destroy_thread_data))
   2417 	{
   2418 	  /* Require all the OpenACC handlers if we have
   2419 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
   2420 	  err = "plugin missing OpenACC handler function";
   2421 	  goto fail;
   2422 	}
   2423 
   2424       unsigned cuda = 0;
   2425       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
   2426 			 openacc_cuda_get_current_device);
   2427       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
   2428 			 openacc_cuda_get_current_context);
   2429       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
   2430       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
   2431       if (cuda && cuda != 4)
   2432 	{
   2433 	  /* Make sure all the CUDA functions are there if any of them are.  */
   2434 	  err = "plugin missing OpenACC CUDA handler function";
   2435 	  goto fail;
   2436 	}
   2437     }
   2438 #undef DLSYM
   2439 #undef DLSYM_OPT
   2440 
   2441   return 1;
   2442 
   2443  dl_fail:
   2444   err = dlerror ();
   2445  fail:
   2446   gomp_error ("while loading %s: %s", plugin_name, err);
   2447   if (last_missing)
   2448     gomp_error ("missing function was %s", last_missing);
   2449   if (plugin_handle)
   2450     dlclose (plugin_handle);
   2451 
   2452   return 0;
   2453 }
   2454 
   2455 /* This function finalizes all initialized devices.  */
   2456 
   2457 static void
   2458 gomp_target_fini (void)
   2459 {
   2460   int i;
   2461   for (i = 0; i < num_devices; i++)
   2462     {
   2463       bool ret = true;
   2464       struct gomp_device_descr *devicep = &devices[i];
   2465       gomp_mutex_lock (&devicep->lock);
   2466       if (devicep->state == GOMP_DEVICE_INITIALIZED)
   2467 	{
   2468 	  ret = devicep->fini_device_func (devicep->target_id);
   2469 	  devicep->state = GOMP_DEVICE_FINALIZED;
   2470 	}
   2471       gomp_mutex_unlock (&devicep->lock);
   2472       if (!ret)
   2473 	gomp_fatal ("device finalization failed");
   2474     }
   2475 }
   2476 
   2477 /* This function initializes the runtime needed for offloading.
   2478    It parses the list of offload targets and tries to load the plugins for
   2479    these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
   2480    will be set, and the array DEVICES initialized, containing descriptors for
   2481    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
   2482    by the others.  */
   2483 
   2484 static void
   2485 gomp_target_init (void)
   2486 {
   2487   const char *prefix ="libgomp-plugin-";
   2488   const char *suffix = SONAME_SUFFIX (1);
   2489   const char *cur, *next;
   2490   char *plugin_name;
   2491   int i, new_num_devices;
   2492 
   2493   num_devices = 0;
   2494   devices = NULL;
   2495 
   2496   cur = OFFLOAD_TARGETS;
   2497   if (*cur)
   2498     do
   2499       {
   2500 	struct gomp_device_descr current_device;
   2501 
   2502 	next = strchr (cur, ',');
   2503 
   2504 	plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
   2505 				       + strlen (prefix) + strlen (suffix));
   2506 	if (!plugin_name)
   2507 	  {
   2508 	    num_devices = 0;
   2509 	    break;
   2510 	  }
   2511 
   2512 	strcpy (plugin_name, prefix);
   2513 	strncat (plugin_name, cur, next ? next - cur : strlen (cur));
   2514 	strcat (plugin_name, suffix);
   2515 
   2516 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
   2517 	  {
   2518 	    new_num_devices = current_device.get_num_devices_func ();
   2519 	    if (new_num_devices >= 1)
   2520 	      {
   2521 		/* Augment DEVICES and NUM_DEVICES.  */
   2522 
   2523 		devices = realloc (devices, (num_devices + new_num_devices)
   2524 				   * sizeof (struct gomp_device_descr));
   2525 		if (!devices)
   2526 		  {
   2527 		    num_devices = 0;
   2528 		    free (plugin_name);
   2529 		    break;
   2530 		  }
   2531 
   2532 		current_device.name = current_device.get_name_func ();
   2533 		/* current_device.capabilities has already been set.  */
   2534 		current_device.type = current_device.get_type_func ();
   2535 		current_device.mem_map.root = NULL;
   2536 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
   2537 		current_device.openacc.data_environ = NULL;
   2538 		for (i = 0; i < new_num_devices; i++)
   2539 		  {
   2540 		    current_device.target_id = i;
   2541 		    devices[num_devices] = current_device;
   2542 		    gomp_mutex_init (&devices[num_devices].lock);
   2543 		    num_devices++;
   2544 		  }
   2545 	      }
   2546 	  }
   2547 
   2548 	free (plugin_name);
   2549 	cur = next + 1;
   2550       }
   2551     while (next);
   2552 
   2553   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
   2554      NUM_DEVICES_OPENMP.  */
   2555   struct gomp_device_descr *devices_s
   2556     = malloc (num_devices * sizeof (struct gomp_device_descr));
   2557   if (!devices_s)
   2558     {
   2559       num_devices = 0;
   2560       free (devices);
   2561       devices = NULL;
   2562     }
   2563   num_devices_openmp = 0;
   2564   for (i = 0; i < num_devices; i++)
   2565     if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2566       devices_s[num_devices_openmp++] = devices[i];
   2567   int num_devices_after_openmp = num_devices_openmp;
   2568   for (i = 0; i < num_devices; i++)
   2569     if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
   2570       devices_s[num_devices_after_openmp++] = devices[i];
   2571   free (devices);
   2572   devices = devices_s;
   2573 
   2574   for (i = 0; i < num_devices; i++)
   2575     {
   2576       /* The 'devices' array can be moved (by the realloc call) until we have
   2577 	 found all the plugins, so registering with the OpenACC runtime (which
   2578 	 takes a copy of the pointer argument) must be delayed until now.  */
   2579       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
   2580 	goacc_register (&devices[i]);
   2581     }
   2582 
   2583   if (atexit (gomp_target_fini) != 0)
   2584     gomp_fatal ("atexit failed");
   2585 }
   2586 
   2587 #else /* PLUGIN_SUPPORT */
   2588 /* If dlfcn.h is unavailable we always fallback to host execution.
   2589    GOMP_target* routines are just stubs for this case.  */
   2590 static void
   2591 gomp_target_init (void)
   2592 {
   2593 }
   2594 #endif /* PLUGIN_SUPPORT */
   2595