Home | History | Annotate | Line # | Download | only in libgomp
target.c revision 1.1.1.7
      1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
      2    Contributed by Jakub Jelinek <jakub (at) redhat.com>.
      3 
      4    This file is part of the GNU Offloading and Multi Processing Library
      5    (libgomp).
      6 
      7    Libgomp is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3, or (at your option)
     10    any later version.
     11 
     12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15    more details.
     16 
     17    Under Section 7 of GPL version 3, you are granted additional
     18    permissions described in the GCC Runtime Library Exception, version
     19    3.1, as published by the Free Software Foundation.
     20 
     21    You should have received a copy of the GNU General Public License and
     22    a copy of the GCC Runtime Library Exception along with this program;
     23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24    <http://www.gnu.org/licenses/>.  */
     25 
     26 /* This file contains the support of offloading.  */
     27 
     28 #include "config.h"
     29 #include "libgomp.h"
     30 #include "oacc-plugin.h"
     31 #include "oacc-int.h"
     32 #include "gomp-constants.h"
     33 #include <limits.h>
     34 #include <stdbool.h>
     35 #include <stdlib.h>
     36 #ifdef HAVE_INTTYPES_H
     37 # include <inttypes.h>  /* For PRIu64.  */
     38 #endif
     39 #include <string.h>
     40 #include <assert.h>
     41 #include <errno.h>
     42 
     43 #ifdef PLUGIN_SUPPORT
     44 #include <dlfcn.h>
     45 #include "plugin-suffix.h"
     46 #endif
     47 
     48 static void gomp_target_init (void);
     49 
     50 /* The whole initialization code for offloading plugins is only run one.  */
     51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
     52 
     53 /* Mutex for offload image registration.  */
     54 static gomp_mutex_t register_lock;
     55 
     56 /* This structure describes an offload image.
     57    It contains type of the target device, pointer to host table descriptor, and
     58    pointer to target data.  */
     59 struct offload_image_descr {
     60   unsigned version;
     61   enum offload_target_type type;
     62   const void *host_table;
     63   const void *target_data;
     64 };
     65 
     66 /* Array of descriptors of offload images.  */
     67 static struct offload_image_descr *offload_images;
     68 
     69 /* Total number of offload images.  */
     70 static int num_offload_images;
     71 
     72 /* Array of descriptors for all available devices.  */
     73 static struct gomp_device_descr *devices;
     74 
     75 /* Total number of available devices.  */
     76 static int num_devices;
     77 
     78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
     79 static int num_devices_openmp;
     80 
     81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
     82 
     83 static void *
     84 gomp_realloc_unlock (void *old, size_t size)
     85 {
     86   void *ret = realloc (old, size);
     87   if (ret == NULL)
     88     {
     89       gomp_mutex_unlock (&register_lock);
     90       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
     91     }
     92   return ret;
     93 }
     94 
     95 attribute_hidden void
     96 gomp_init_targets_once (void)
     97 {
     98   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
     99 }
    100 
    101 attribute_hidden int
    102 gomp_get_num_devices (void)
    103 {
    104   gomp_init_targets_once ();
    105   return num_devices_openmp;
    106 }
    107 
    108 static struct gomp_device_descr *
    109 resolve_device (int device_id)
    110 {
    111   if (device_id == GOMP_DEVICE_ICV)
    112     {
    113       struct gomp_task_icv *icv = gomp_icv (false);
    114       device_id = icv->default_device_var;
    115     }
    116 
    117   if (device_id < 0 || device_id >= gomp_get_num_devices ())
    118     return NULL;
    119 
    120   gomp_mutex_lock (&devices[device_id].lock);
    121   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
    122     gomp_init_device (&devices[device_id]);
    123   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
    124     {
    125       gomp_mutex_unlock (&devices[device_id].lock);
    126       return NULL;
    127     }
    128   gomp_mutex_unlock (&devices[device_id].lock);
    129 
    130   return &devices[device_id];
    131 }
    132 
    133 
    134 static inline splay_tree_key
    135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
    136 {
    137   if (key->host_start != key->host_end)
    138     return splay_tree_lookup (mem_map, key);
    139 
    140   key->host_end++;
    141   splay_tree_key n = splay_tree_lookup (mem_map, key);
    142   key->host_end--;
    143   if (n)
    144     return n;
    145   key->host_start--;
    146   n = splay_tree_lookup (mem_map, key);
    147   key->host_start++;
    148   if (n)
    149     return n;
    150   return splay_tree_lookup (mem_map, key);
    151 }
    152 
    153 static inline splay_tree_key
    154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
    155 {
    156   if (key->host_start != key->host_end)
    157     return splay_tree_lookup (mem_map, key);
    158 
    159   key->host_end++;
    160   splay_tree_key n = splay_tree_lookup (mem_map, key);
    161   key->host_end--;
    162   return n;
    163 }
    164 
    165 static inline void
    166 gomp_device_copy (struct gomp_device_descr *devicep,
    167 		  bool (*copy_func) (int, void *, const void *, size_t),
    168 		  const char *dst, void *dstaddr,
    169 		  const char *src, const void *srcaddr,
    170 		  size_t size)
    171 {
    172   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
    173     {
    174       gomp_mutex_unlock (&devicep->lock);
    175       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
    176 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
    177     }
    178 }
    179 
    180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
    181    host to device memory transfers.  */
    182 
    183 struct gomp_coalesce_chunk
    184 {
    185   /* The starting and ending point of a coalesced chunk of memory.  */
    186   size_t start, end;
    187 };
    188 
    189 struct gomp_coalesce_buf
    190 {
    191   /* Buffer into which gomp_copy_host2dev will memcpy data and from which
    192      it will be copied to the device.  */
    193   void *buf;
    194   struct target_mem_desc *tgt;
    195   /* Array with offsets, chunks[i].start is the starting offset and
    196      chunks[i].end ending offset relative to tgt->tgt_start device address
    197      of chunks which are to be copied to buf and later copied to device.  */
    198   struct gomp_coalesce_chunk *chunks;
    199   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
    200      be performed.  */
    201   long chunk_cnt;
    202   /* During construction of chunks array, how many memory regions are within
    203      the last chunk.  If there is just one memory region for a chunk, we copy
    204      it directly to device rather than going through buf.  */
    205   long use_cnt;
    206 };
    207 
    208 /* Maximum size of memory region considered for coalescing.  Larger copies
    209    are performed directly.  */
    210 #define MAX_COALESCE_BUF_SIZE	(32 * 1024)
    211 
    212 /* Maximum size of a gap in between regions to consider them being copied
    213    within the same chunk.  All the device offsets considered are within
    214    newly allocated device memory, so it isn't fatal if we copy some padding
    215    in between from host to device.  The gaps come either from alignment
    216    padding or from memory regions which are not supposed to be copied from
    217    host to device (e.g. map(alloc:), map(from:) etc.).  */
    218 #define MAX_COALESCE_BUF_GAP	(4 * 1024)
    219 
    220 /* Add region with device tgt_start relative offset and length to CBUF.  */
    221 
    222 static inline void
    223 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
    224 {
    225   if (len > MAX_COALESCE_BUF_SIZE || len == 0)
    226     return;
    227   if (cbuf->chunk_cnt)
    228     {
    229       if (cbuf->chunk_cnt < 0)
    230 	return;
    231       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
    232 	{
    233 	  cbuf->chunk_cnt = -1;
    234 	  return;
    235 	}
    236       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
    237 	{
    238 	  cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
    239 	  cbuf->use_cnt++;
    240 	  return;
    241 	}
    242       /* If the last chunk is only used by one mapping, discard it,
    243 	 as it will be one host to device copy anyway and
    244 	 memcpying it around will only waste cycles.  */
    245       if (cbuf->use_cnt == 1)
    246 	cbuf->chunk_cnt--;
    247     }
    248   cbuf->chunks[cbuf->chunk_cnt].start = start;
    249   cbuf->chunks[cbuf->chunk_cnt].end = start + len;
    250   cbuf->chunk_cnt++;
    251   cbuf->use_cnt = 1;
    252 }
    253 
    254 /* Return true for mapping kinds which need to copy data from the
    255    host to device for regions that weren't previously mapped.  */
    256 
    257 static inline bool
    258 gomp_to_device_kind_p (int kind)
    259 {
    260   switch (kind)
    261     {
    262     case GOMP_MAP_ALLOC:
    263     case GOMP_MAP_FROM:
    264     case GOMP_MAP_FORCE_ALLOC:
    265     case GOMP_MAP_ALWAYS_FROM:
    266       return false;
    267     default:
    268       return true;
    269     }
    270 }
    271 
    272 static void
    273 gomp_copy_host2dev (struct gomp_device_descr *devicep,
    274 		    void *d, const void *h, size_t sz,
    275 		    struct gomp_coalesce_buf *cbuf)
    276 {
    277   if (cbuf)
    278     {
    279       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
    280       if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
    281 	{
    282 	  long first = 0;
    283 	  long last = cbuf->chunk_cnt - 1;
    284 	  while (first <= last)
    285 	    {
    286 	      long middle = (first + last) >> 1;
    287 	      if (cbuf->chunks[middle].end <= doff)
    288 		first = middle + 1;
    289 	      else if (cbuf->chunks[middle].start <= doff)
    290 		{
    291 		  if (doff + sz > cbuf->chunks[middle].end)
    292 		    gomp_fatal ("internal libgomp cbuf error");
    293 		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
    294 			  h, sz);
    295 		  return;
    296 		}
    297 	      else
    298 		last = middle - 1;
    299 	    }
    300 	}
    301     }
    302   gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
    303 }
    304 
    305 static void
    306 gomp_copy_dev2host (struct gomp_device_descr *devicep,
    307 		    void *h, const void *d, size_t sz)
    308 {
    309   gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
    310 }
    311 
    312 static void
    313 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
    314 {
    315   if (!devicep->free_func (devicep->target_id, devptr))
    316     {
    317       gomp_mutex_unlock (&devicep->lock);
    318       gomp_fatal ("error in freeing device memory block at %p", devptr);
    319     }
    320 }
    321 
    322 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
    323    gomp_map_0len_lookup found oldn for newn.
    324    Helper function of gomp_map_vars.  */
    325 
    326 static inline void
    327 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
    328 			splay_tree_key newn, struct target_var_desc *tgt_var,
    329 			unsigned char kind, struct gomp_coalesce_buf *cbuf)
    330 {
    331   tgt_var->key = oldn;
    332   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
    333   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
    334   tgt_var->offset = newn->host_start - oldn->host_start;
    335   tgt_var->length = newn->host_end - newn->host_start;
    336 
    337   if ((kind & GOMP_MAP_FLAG_FORCE)
    338       || oldn->host_start > newn->host_start
    339       || oldn->host_end < newn->host_end)
    340     {
    341       gomp_mutex_unlock (&devicep->lock);
    342       gomp_fatal ("Trying to map into device [%p..%p) object when "
    343 		  "[%p..%p) is already mapped",
    344 		  (void *) newn->host_start, (void *) newn->host_end,
    345 		  (void *) oldn->host_start, (void *) oldn->host_end);
    346     }
    347 
    348   if (GOMP_MAP_ALWAYS_TO_P (kind))
    349     gomp_copy_host2dev (devicep,
    350 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
    351 				  + newn->host_start - oldn->host_start),
    352 			(void *) newn->host_start,
    353 			newn->host_end - newn->host_start, cbuf);
    354 
    355   if (oldn->refcount != REFCOUNT_INFINITY)
    356     oldn->refcount++;
    357 }
    358 
    359 static int
    360 get_kind (bool short_mapkind, void *kinds, int idx)
    361 {
    362   return short_mapkind ? ((unsigned short *) kinds)[idx]
    363 		       : ((unsigned char *) kinds)[idx];
    364 }
    365 
    366 static void
    367 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
    368 		  uintptr_t target_offset, uintptr_t bias,
    369 		  struct gomp_coalesce_buf *cbuf)
    370 {
    371   struct gomp_device_descr *devicep = tgt->device_descr;
    372   struct splay_tree_s *mem_map = &devicep->mem_map;
    373   struct splay_tree_key_s cur_node;
    374 
    375   cur_node.host_start = host_ptr;
    376   if (cur_node.host_start == (uintptr_t) NULL)
    377     {
    378       cur_node.tgt_offset = (uintptr_t) NULL;
    379       gomp_copy_host2dev (devicep,
    380 			  (void *) (tgt->tgt_start + target_offset),
    381 			  (void *) &cur_node.tgt_offset,
    382 			  sizeof (void *), cbuf);
    383       return;
    384     }
    385   /* Add bias to the pointer value.  */
    386   cur_node.host_start += bias;
    387   cur_node.host_end = cur_node.host_start;
    388   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
    389   if (n == NULL)
    390     {
    391       gomp_mutex_unlock (&devicep->lock);
    392       gomp_fatal ("Pointer target of array section wasn't mapped");
    393     }
    394   cur_node.host_start -= n->host_start;
    395   cur_node.tgt_offset
    396     = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
    397   /* At this point tgt_offset is target address of the
    398      array section.  Now subtract bias to get what we want
    399      to initialize the pointer with.  */
    400   cur_node.tgt_offset -= bias;
    401   gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
    402 		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
    403 }
    404 
    405 static void
    406 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
    407 			  size_t first, size_t i, void **hostaddrs,
    408 			  size_t *sizes, void *kinds,
    409 			  struct gomp_coalesce_buf *cbuf)
    410 {
    411   struct gomp_device_descr *devicep = tgt->device_descr;
    412   struct splay_tree_s *mem_map = &devicep->mem_map;
    413   struct splay_tree_key_s cur_node;
    414   int kind;
    415   const bool short_mapkind = true;
    416   const int typemask = short_mapkind ? 0xff : 0x7;
    417 
    418   cur_node.host_start = (uintptr_t) hostaddrs[i];
    419   cur_node.host_end = cur_node.host_start + sizes[i];
    420   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
    421   kind = get_kind (short_mapkind, kinds, i);
    422   if (n2
    423       && n2->tgt == n->tgt
    424       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
    425     {
    426       gomp_map_vars_existing (devicep, n2, &cur_node,
    427 			      &tgt->list[i], kind & typemask, cbuf);
    428       return;
    429     }
    430   if (sizes[i] == 0)
    431     {
    432       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
    433 	{
    434 	  cur_node.host_start--;
    435 	  n2 = splay_tree_lookup (mem_map, &cur_node);
    436 	  cur_node.host_start++;
    437 	  if (n2
    438 	      && n2->tgt == n->tgt
    439 	      && n2->host_start - n->host_start
    440 		 == n2->tgt_offset - n->tgt_offset)
    441 	    {
    442 	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
    443 				      kind & typemask, cbuf);
    444 	      return;
    445 	    }
    446 	}
    447       cur_node.host_end++;
    448       n2 = splay_tree_lookup (mem_map, &cur_node);
    449       cur_node.host_end--;
    450       if (n2
    451 	  && n2->tgt == n->tgt
    452 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
    453 	{
    454 	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
    455 				  kind & typemask, cbuf);
    456 	  return;
    457 	}
    458     }
    459   gomp_mutex_unlock (&devicep->lock);
    460   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
    461 	      "other mapped elements from the same structure weren't mapped "
    462 	      "together with it", (void *) cur_node.host_start,
    463 	      (void *) cur_node.host_end);
    464 }
    465 
    466 static inline uintptr_t
    467 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
    468 {
    469   if (tgt->list[i].key != NULL)
    470     return tgt->list[i].key->tgt->tgt_start
    471 	   + tgt->list[i].key->tgt_offset
    472 	   + tgt->list[i].offset;
    473   if (tgt->list[i].offset == ~(uintptr_t) 0)
    474     return (uintptr_t) hostaddrs[i];
    475   if (tgt->list[i].offset == ~(uintptr_t) 1)
    476     return 0;
    477   if (tgt->list[i].offset == ~(uintptr_t) 2)
    478     return tgt->list[i + 1].key->tgt->tgt_start
    479 	   + tgt->list[i + 1].key->tgt_offset
    480 	   + tgt->list[i + 1].offset
    481 	   + (uintptr_t) hostaddrs[i]
    482 	   - (uintptr_t) hostaddrs[i + 1];
    483   return tgt->tgt_start + tgt->list[i].offset;
    484 }
    485 
    486 attribute_hidden struct target_mem_desc *
    487 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
    488 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
    489 	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
    490 {
    491   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
    492   bool has_firstprivate = false;
    493   const int rshift = short_mapkind ? 8 : 3;
    494   const int typemask = short_mapkind ? 0xff : 0x7;
    495   struct splay_tree_s *mem_map = &devicep->mem_map;
    496   struct splay_tree_key_s cur_node;
    497   struct target_mem_desc *tgt
    498     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
    499   tgt->list_count = mapnum;
    500   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
    501   tgt->device_descr = devicep;
    502   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
    503 
    504   if (mapnum == 0)
    505     {
    506       tgt->tgt_start = 0;
    507       tgt->tgt_end = 0;
    508       return tgt;
    509     }
    510 
    511   tgt_align = sizeof (void *);
    512   tgt_size = 0;
    513   cbuf.chunks = NULL;
    514   cbuf.chunk_cnt = -1;
    515   cbuf.use_cnt = 0;
    516   cbuf.buf = NULL;
    517   if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
    518     {
    519       size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
    520       cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
    521       cbuf.chunk_cnt = 0;
    522     }
    523   if (pragma_kind == GOMP_MAP_VARS_TARGET)
    524     {
    525       size_t align = 4 * sizeof (void *);
    526       tgt_align = align;
    527       tgt_size = mapnum * sizeof (void *);
    528       cbuf.chunk_cnt = 1;
    529       cbuf.use_cnt = 1 + (mapnum > 1);
    530       cbuf.chunks[0].start = 0;
    531       cbuf.chunks[0].end = tgt_size;
    532     }
    533 
    534   gomp_mutex_lock (&devicep->lock);
    535   if (devicep->state == GOMP_DEVICE_FINALIZED)
    536     {
    537       gomp_mutex_unlock (&devicep->lock);
    538       free (tgt);
    539       return NULL;
    540     }
    541 
    542   for (i = 0; i < mapnum; i++)
    543     {
    544       int kind = get_kind (short_mapkind, kinds, i);
    545       if (hostaddrs[i] == NULL
    546 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
    547 	{
    548 	  tgt->list[i].key = NULL;
    549 	  tgt->list[i].offset = ~(uintptr_t) 0;
    550 	  continue;
    551 	}
    552       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
    553 	{
    554 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
    555 	  cur_node.host_end = cur_node.host_start;
    556 	  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
    557 	  if (n == NULL)
    558 	    {
    559 	      gomp_mutex_unlock (&devicep->lock);
    560 	      gomp_fatal ("use_device_ptr pointer wasn't mapped");
    561 	    }
    562 	  cur_node.host_start -= n->host_start;
    563 	  hostaddrs[i]
    564 	    = (void *) (n->tgt->tgt_start + n->tgt_offset
    565 			+ cur_node.host_start);
    566 	  tgt->list[i].key = NULL;
    567 	  tgt->list[i].offset = ~(uintptr_t) 0;
    568 	  continue;
    569 	}
    570       else if ((kind & typemask) == GOMP_MAP_STRUCT)
    571 	{
    572 	  size_t first = i + 1;
    573 	  size_t last = i + sizes[i];
    574 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
    575 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
    576 			      + sizes[last];
    577 	  tgt->list[i].key = NULL;
    578 	  tgt->list[i].offset = ~(uintptr_t) 2;
    579 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
    580 	  if (n == NULL)
    581 	    {
    582 	      size_t align = (size_t) 1 << (kind >> rshift);
    583 	      if (tgt_align < align)
    584 		tgt_align = align;
    585 	      tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
    586 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
    587 	      tgt_size += cur_node.host_end - cur_node.host_start;
    588 	      not_found_cnt += last - i;
    589 	      for (i = first; i <= last; i++)
    590 		{
    591 		  tgt->list[i].key = NULL;
    592 		  if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
    593 					     & typemask))
    594 		    gomp_coalesce_buf_add (&cbuf,
    595 					   tgt_size - cur_node.host_end
    596 					   + (uintptr_t) hostaddrs[i],
    597 					   sizes[i]);
    598 		}
    599 	      i--;
    600 	      continue;
    601 	    }
    602 	  for (i = first; i <= last; i++)
    603 	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
    604 				      sizes, kinds, NULL);
    605 	  i--;
    606 	  continue;
    607 	}
    608       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
    609 	{
    610 	  tgt->list[i].key = NULL;
    611 	  tgt->list[i].offset = ~(uintptr_t) 1;
    612 	  has_firstprivate = true;
    613 	  continue;
    614 	}
    615       cur_node.host_start = (uintptr_t) hostaddrs[i];
    616       if (!GOMP_MAP_POINTER_P (kind & typemask))
    617 	cur_node.host_end = cur_node.host_start + sizes[i];
    618       else
    619 	cur_node.host_end = cur_node.host_start + sizeof (void *);
    620       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
    621 	{
    622 	  tgt->list[i].key = NULL;
    623 
    624 	  size_t align = (size_t) 1 << (kind >> rshift);
    625 	  if (tgt_align < align)
    626 	    tgt_align = align;
    627 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
    628 	  gomp_coalesce_buf_add (&cbuf, tgt_size,
    629 				 cur_node.host_end - cur_node.host_start);
    630 	  tgt_size += cur_node.host_end - cur_node.host_start;
    631 	  has_firstprivate = true;
    632 	  continue;
    633 	}
    634       splay_tree_key n;
    635       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
    636 	{
    637 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
    638 	  if (!n)
    639 	    {
    640 	      tgt->list[i].key = NULL;
    641 	      tgt->list[i].offset = ~(uintptr_t) 1;
    642 	      continue;
    643 	    }
    644 	}
    645       else
    646 	n = splay_tree_lookup (mem_map, &cur_node);
    647       if (n && n->refcount != REFCOUNT_LINK)
    648 	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
    649 				kind & typemask, NULL);
    650       else
    651 	{
    652 	  tgt->list[i].key = NULL;
    653 
    654 	  size_t align = (size_t) 1 << (kind >> rshift);
    655 	  not_found_cnt++;
    656 	  if (tgt_align < align)
    657 	    tgt_align = align;
    658 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
    659 	  if (gomp_to_device_kind_p (kind & typemask))
    660 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
    661 				   cur_node.host_end - cur_node.host_start);
    662 	  tgt_size += cur_node.host_end - cur_node.host_start;
    663 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
    664 	    {
    665 	      size_t j;
    666 	      for (j = i + 1; j < mapnum; j++)
    667 		if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
    668 					 & typemask))
    669 		  break;
    670 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
    671 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
    672 			     > cur_node.host_end))
    673 		  break;
    674 		else
    675 		  {
    676 		    tgt->list[j].key = NULL;
    677 		    i++;
    678 		  }
    679 	    }
    680 	}
    681     }
    682 
    683   if (devaddrs)
    684     {
    685       if (mapnum != 1)
    686 	{
    687 	  gomp_mutex_unlock (&devicep->lock);
    688 	  gomp_fatal ("unexpected aggregation");
    689 	}
    690       tgt->to_free = devaddrs[0];
    691       tgt->tgt_start = (uintptr_t) tgt->to_free;
    692       tgt->tgt_end = tgt->tgt_start + sizes[0];
    693     }
    694   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
    695     {
    696       /* Allocate tgt_align aligned tgt_size block of memory.  */
    697       /* FIXME: Perhaps change interface to allocate properly aligned
    698 	 memory.  */
    699       tgt->to_free = devicep->alloc_func (devicep->target_id,
    700 					  tgt_size + tgt_align - 1);
    701       if (!tgt->to_free)
    702 	{
    703 	  gomp_mutex_unlock (&devicep->lock);
    704 	  gomp_fatal ("device memory allocation fail");
    705 	}
    706 
    707       tgt->tgt_start = (uintptr_t) tgt->to_free;
    708       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
    709       tgt->tgt_end = tgt->tgt_start + tgt_size;
    710 
    711       if (cbuf.use_cnt == 1)
    712 	cbuf.chunk_cnt--;
    713       if (cbuf.chunk_cnt > 0)
    714 	{
    715 	  cbuf.buf
    716 	    = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
    717 	  if (cbuf.buf)
    718 	    {
    719 	      cbuf.tgt = tgt;
    720 	      cbufp = &cbuf;
    721 	    }
    722 	}
    723     }
    724   else
    725     {
    726       tgt->to_free = NULL;
    727       tgt->tgt_start = 0;
    728       tgt->tgt_end = 0;
    729     }
    730 
    731   tgt_size = 0;
    732   if (pragma_kind == GOMP_MAP_VARS_TARGET)
    733     tgt_size = mapnum * sizeof (void *);
    734 
    735   tgt->array = NULL;
    736   if (not_found_cnt || has_firstprivate)
    737     {
    738       if (not_found_cnt)
    739 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
    740       splay_tree_node array = tgt->array;
    741       size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
    742       uintptr_t field_tgt_base = 0;
    743 
    744       for (i = 0; i < mapnum; i++)
    745 	if (tgt->list[i].key == NULL)
    746 	  {
    747 	    int kind = get_kind (short_mapkind, kinds, i);
    748 	    if (hostaddrs[i] == NULL)
    749 	      continue;
    750 	    switch (kind & typemask)
    751 	      {
    752 		size_t align, len, first, last;
    753 		splay_tree_key n;
    754 	      case GOMP_MAP_FIRSTPRIVATE:
    755 		align = (size_t) 1 << (kind >> rshift);
    756 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
    757 		tgt->list[i].offset = tgt_size;
    758 		len = sizes[i];
    759 		gomp_copy_host2dev (devicep,
    760 				    (void *) (tgt->tgt_start + tgt_size),
    761 				    (void *) hostaddrs[i], len, cbufp);
    762 		tgt_size += len;
    763 		continue;
    764 	      case GOMP_MAP_FIRSTPRIVATE_INT:
    765 	      case GOMP_MAP_USE_DEVICE_PTR:
    766 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
    767 		continue;
    768 	      case GOMP_MAP_STRUCT:
    769 		first = i + 1;
    770 		last = i + sizes[i];
    771 		cur_node.host_start = (uintptr_t) hostaddrs[i];
    772 		cur_node.host_end = (uintptr_t) hostaddrs[last]
    773 				    + sizes[last];
    774 		if (tgt->list[first].key != NULL)
    775 		  continue;
    776 		n = splay_tree_lookup (mem_map, &cur_node);
    777 		if (n == NULL)
    778 		  {
    779 		    size_t align = (size_t) 1 << (kind >> rshift);
    780 		    tgt_size -= (uintptr_t) hostaddrs[first]
    781 				- (uintptr_t) hostaddrs[i];
    782 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
    783 		    tgt_size += (uintptr_t) hostaddrs[first]
    784 				- (uintptr_t) hostaddrs[i];
    785 		    field_tgt_base = (uintptr_t) hostaddrs[first];
    786 		    field_tgt_offset = tgt_size;
    787 		    field_tgt_clear = last;
    788 		    tgt_size += cur_node.host_end
    789 				- (uintptr_t) hostaddrs[first];
    790 		    continue;
    791 		  }
    792 		for (i = first; i <= last; i++)
    793 		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
    794 					    sizes, kinds, cbufp);
    795 		i--;
    796 		continue;
    797 	      case GOMP_MAP_ALWAYS_POINTER:
    798 		cur_node.host_start = (uintptr_t) hostaddrs[i];
    799 		cur_node.host_end = cur_node.host_start + sizeof (void *);
    800 		n = splay_tree_lookup (mem_map, &cur_node);
    801 		if (n == NULL
    802 		    || n->host_start > cur_node.host_start
    803 		    || n->host_end < cur_node.host_end)
    804 		  {
    805 		    gomp_mutex_unlock (&devicep->lock);
    806 		    gomp_fatal ("always pointer not mapped");
    807 		  }
    808 		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
    809 		    != GOMP_MAP_ALWAYS_POINTER)
    810 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
    811 		if (cur_node.tgt_offset)
    812 		  cur_node.tgt_offset -= sizes[i];
    813 		gomp_copy_host2dev (devicep,
    814 				    (void *) (n->tgt->tgt_start
    815 					      + n->tgt_offset
    816 					      + cur_node.host_start
    817 					      - n->host_start),
    818 				    (void *) &cur_node.tgt_offset,
    819 				    sizeof (void *), cbufp);
    820 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
    821 				      + cur_node.host_start - n->host_start;
    822 		continue;
    823 	      default:
    824 		break;
    825 	      }
    826 	    splay_tree_key k = &array->key;
    827 	    k->host_start = (uintptr_t) hostaddrs[i];
    828 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
    829 	      k->host_end = k->host_start + sizes[i];
    830 	    else
    831 	      k->host_end = k->host_start + sizeof (void *);
    832 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
    833 	    if (n && n->refcount != REFCOUNT_LINK)
    834 	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
    835 				      kind & typemask, cbufp);
    836 	    else
    837 	      {
    838 		k->link_key = NULL;
    839 		if (n && n->refcount == REFCOUNT_LINK)
    840 		  {
    841 		    /* Replace target address of the pointer with target address
    842 		       of mapped object in the splay tree.  */
    843 		    splay_tree_remove (mem_map, n);
    844 		    k->link_key = n;
    845 		  }
    846 		size_t align = (size_t) 1 << (kind >> rshift);
    847 		tgt->list[i].key = k;
    848 		k->tgt = tgt;
    849 		if (field_tgt_clear != ~(size_t) 0)
    850 		  {
    851 		    k->tgt_offset = k->host_start - field_tgt_base
    852 				    + field_tgt_offset;
    853 		    if (i == field_tgt_clear)
    854 		      field_tgt_clear = ~(size_t) 0;
    855 		  }
    856 		else
    857 		  {
    858 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
    859 		    k->tgt_offset = tgt_size;
    860 		    tgt_size += k->host_end - k->host_start;
    861 		  }
    862 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
    863 		tgt->list[i].always_copy_from
    864 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
    865 		tgt->list[i].offset = 0;
    866 		tgt->list[i].length = k->host_end - k->host_start;
    867 		k->refcount = 1;
    868 		k->dynamic_refcount = 0;
    869 		tgt->refcount++;
    870 		array->left = NULL;
    871 		array->right = NULL;
    872 		splay_tree_insert (mem_map, array);
    873 		switch (kind & typemask)
    874 		  {
    875 		  case GOMP_MAP_ALLOC:
    876 		  case GOMP_MAP_FROM:
    877 		  case GOMP_MAP_FORCE_ALLOC:
    878 		  case GOMP_MAP_FORCE_FROM:
    879 		  case GOMP_MAP_ALWAYS_FROM:
    880 		    break;
    881 		  case GOMP_MAP_TO:
    882 		  case GOMP_MAP_TOFROM:
    883 		  case GOMP_MAP_FORCE_TO:
    884 		  case GOMP_MAP_FORCE_TOFROM:
    885 		  case GOMP_MAP_ALWAYS_TO:
    886 		  case GOMP_MAP_ALWAYS_TOFROM:
    887 		    gomp_copy_host2dev (devicep,
    888 					(void *) (tgt->tgt_start
    889 						  + k->tgt_offset),
    890 					(void *) k->host_start,
    891 					k->host_end - k->host_start, cbufp);
    892 		    break;
    893 		  case GOMP_MAP_POINTER:
    894 		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
    895 				      k->tgt_offset, sizes[i], cbufp);
    896 		    break;
    897 		  case GOMP_MAP_TO_PSET:
    898 		    gomp_copy_host2dev (devicep,
    899 					(void *) (tgt->tgt_start
    900 						  + k->tgt_offset),
    901 					(void *) k->host_start,
    902 					k->host_end - k->host_start, cbufp);
    903 
    904 		    for (j = i + 1; j < mapnum; j++)
    905 		      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
    906 							 j)
    907 					       & typemask))
    908 			break;
    909 		      else if ((uintptr_t) hostaddrs[j] < k->host_start
    910 			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
    911 				   > k->host_end))
    912 			break;
    913 		      else
    914 			{
    915 			  tgt->list[j].key = k;
    916 			  tgt->list[j].copy_from = false;
    917 			  tgt->list[j].always_copy_from = false;
    918 			  if (k->refcount != REFCOUNT_INFINITY)
    919 			    k->refcount++;
    920 			  gomp_map_pointer (tgt,
    921 					    (uintptr_t) *(void **) hostaddrs[j],
    922 					    k->tgt_offset
    923 					    + ((uintptr_t) hostaddrs[j]
    924 					       - k->host_start),
    925 					    sizes[j], cbufp);
    926 			  i++;
    927 			}
    928 		    break;
    929 		  case GOMP_MAP_FORCE_PRESENT:
    930 		    {
    931 		      /* We already looked up the memory region above and it
    932 			 was missing.  */
    933 		      size_t size = k->host_end - k->host_start;
    934 		      gomp_mutex_unlock (&devicep->lock);
    935 #ifdef HAVE_INTTYPES_H
    936 		      gomp_fatal ("present clause: !acc_is_present (%p, "
    937 				  "%"PRIu64" (0x%"PRIx64"))",
    938 				  (void *) k->host_start,
    939 				  (uint64_t) size, (uint64_t) size);
    940 #else
    941 		      gomp_fatal ("present clause: !acc_is_present (%p, "
    942 				  "%lu (0x%lx))", (void *) k->host_start,
    943 				  (unsigned long) size, (unsigned long) size);
    944 #endif
    945 		    }
    946 		    break;
    947 		  case GOMP_MAP_FORCE_DEVICEPTR:
    948 		    assert (k->host_end - k->host_start == sizeof (void *));
    949 		    gomp_copy_host2dev (devicep,
    950 					(void *) (tgt->tgt_start
    951 						  + k->tgt_offset),
    952 					(void *) k->host_start,
    953 					sizeof (void *), cbufp);
    954 		    break;
    955 		  default:
    956 		    gomp_mutex_unlock (&devicep->lock);
    957 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
    958 				kind);
    959 		  }
    960 
    961 		if (k->link_key)
    962 		  {
    963 		    /* Set link pointer on target to the device address of the
    964 		       mapped object.  */
    965 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
    966 		    /* We intentionally do not use coalescing here, as it's not
    967 		       data allocated by the current call to this function.  */
    968 		    gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
    969 					&tgt_addr, sizeof (void *), NULL);
    970 		  }
    971 		array++;
    972 	      }
    973 	  }
    974     }
    975 
    976   if (pragma_kind == GOMP_MAP_VARS_TARGET)
    977     {
    978       for (i = 0; i < mapnum; i++)
    979 	{
    980 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
    981 	  gomp_copy_host2dev (devicep,
    982 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
    983 			      (void *) &cur_node.tgt_offset, sizeof (void *),
    984 			      cbufp);
    985 	}
    986     }
    987 
    988   if (cbufp)
    989     {
    990       long c = 0;
    991       for (c = 0; c < cbuf.chunk_cnt; ++c)
    992 	gomp_copy_host2dev (devicep,
    993 			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
    994 			    (char *) cbuf.buf + (cbuf.chunks[c].start
    995 						 - cbuf.chunks[0].start),
    996 			    cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
    997       free (cbuf.buf);
    998       cbuf.buf = NULL;
    999       cbufp = NULL;
   1000     }
   1001 
   1002   /* If the variable from "omp target enter data" map-list was already mapped,
   1003      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
   1004      gomp_exit_data.  */
   1005   if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
   1006     {
   1007       free (tgt);
   1008       tgt = NULL;
   1009     }
   1010 
   1011   gomp_mutex_unlock (&devicep->lock);
   1012   return tgt;
   1013 }
   1014 
   1015 static void
   1016 gomp_unmap_tgt (struct target_mem_desc *tgt)
   1017 {
   1018   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
   1019   if (tgt->tgt_end)
   1020     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
   1021 
   1022   free (tgt->array);
   1023   free (tgt);
   1024 }
   1025 
   1026 attribute_hidden bool
   1027 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
   1028 {
   1029   bool is_tgt_unmapped = false;
   1030   splay_tree_remove (&devicep->mem_map, k);
   1031   if (k->link_key)
   1032     splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
   1033   if (k->tgt->refcount > 1)
   1034     k->tgt->refcount--;
   1035   else
   1036     {
   1037       is_tgt_unmapped = true;
   1038       gomp_unmap_tgt (k->tgt);
   1039     }
   1040   return is_tgt_unmapped;
   1041 }
   1042 
   1043 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
   1044    variables back from device to host: if it is false, it is assumed that this
   1045    has been done already.  */
   1046 
   1047 attribute_hidden void
   1048 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
   1049 {
   1050   struct gomp_device_descr *devicep = tgt->device_descr;
   1051 
   1052   if (tgt->list_count == 0)
   1053     {
   1054       free (tgt);
   1055       return;
   1056     }
   1057 
   1058   gomp_mutex_lock (&devicep->lock);
   1059   if (devicep->state == GOMP_DEVICE_FINALIZED)
   1060     {
   1061       gomp_mutex_unlock (&devicep->lock);
   1062       free (tgt->array);
   1063       free (tgt);
   1064       return;
   1065     }
   1066 
   1067   size_t i;
   1068   for (i = 0; i < tgt->list_count; i++)
   1069     {
   1070       splay_tree_key k = tgt->list[i].key;
   1071       if (k == NULL)
   1072 	continue;
   1073 
   1074       bool do_unmap = false;
   1075       if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
   1076 	k->refcount--;
   1077       else if (k->refcount == 1)
   1078 	{
   1079 	  k->refcount--;
   1080 	  do_unmap = true;
   1081 	}
   1082 
   1083       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
   1084 	  || tgt->list[i].always_copy_from)
   1085 	gomp_copy_dev2host (devicep,
   1086 			    (void *) (k->host_start + tgt->list[i].offset),
   1087 			    (void *) (k->tgt->tgt_start + k->tgt_offset
   1088 				      + tgt->list[i].offset),
   1089 			    tgt->list[i].length);
   1090       if (do_unmap)
   1091 	gomp_remove_var (devicep, k);
   1092     }
   1093 
   1094   if (tgt->refcount > 1)
   1095     tgt->refcount--;
   1096   else
   1097     gomp_unmap_tgt (tgt);
   1098 
   1099   gomp_mutex_unlock (&devicep->lock);
   1100 }
   1101 
   1102 static void
   1103 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
   1104 	     size_t *sizes, void *kinds, bool short_mapkind)
   1105 {
   1106   size_t i;
   1107   struct splay_tree_key_s cur_node;
   1108   const int typemask = short_mapkind ? 0xff : 0x7;
   1109 
   1110   if (!devicep)
   1111     return;
   1112 
   1113   if (mapnum == 0)
   1114     return;
   1115 
   1116   gomp_mutex_lock (&devicep->lock);
   1117   if (devicep->state == GOMP_DEVICE_FINALIZED)
   1118     {
   1119       gomp_mutex_unlock (&devicep->lock);
   1120       return;
   1121     }
   1122 
   1123   for (i = 0; i < mapnum; i++)
   1124     if (sizes[i])
   1125       {
   1126 	cur_node.host_start = (uintptr_t) hostaddrs[i];
   1127 	cur_node.host_end = cur_node.host_start + sizes[i];
   1128 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
   1129 	if (n)
   1130 	  {
   1131 	    int kind = get_kind (short_mapkind, kinds, i);
   1132 	    if (n->host_start > cur_node.host_start
   1133 		|| n->host_end < cur_node.host_end)
   1134 	      {
   1135 		gomp_mutex_unlock (&devicep->lock);
   1136 		gomp_fatal ("Trying to update [%p..%p) object when "
   1137 			    "only [%p..%p) is mapped",
   1138 			    (void *) cur_node.host_start,
   1139 			    (void *) cur_node.host_end,
   1140 			    (void *) n->host_start,
   1141 			    (void *) n->host_end);
   1142 	      }
   1143 
   1144 
   1145 	    void *hostaddr = (void *) cur_node.host_start;
   1146 	    void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
   1147 				      + cur_node.host_start - n->host_start);
   1148 	    size_t size = cur_node.host_end - cur_node.host_start;
   1149 
   1150 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
   1151 	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
   1152 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
   1153 	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
   1154 	  }
   1155       }
   1156   gomp_mutex_unlock (&devicep->lock);
   1157 }
   1158 
   1159 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
   1160    And insert to splay tree the mapping between addresses from HOST_TABLE and
   1161    from loaded target image.  We rely in the host and device compiler
   1162    emitting variable and functions in the same order.  */
   1163 
   1164 static void
   1165 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
   1166 			   const void *host_table, const void *target_data,
   1167 			   bool is_register_lock)
   1168 {
   1169   void **host_func_table = ((void ***) host_table)[0];
   1170   void **host_funcs_end  = ((void ***) host_table)[1];
   1171   void **host_var_table  = ((void ***) host_table)[2];
   1172   void **host_vars_end   = ((void ***) host_table)[3];
   1173 
   1174   /* The func table contains only addresses, the var table contains addresses
   1175      and corresponding sizes.  */
   1176   int num_funcs = host_funcs_end - host_func_table;
   1177   int num_vars  = (host_vars_end - host_var_table) / 2;
   1178 
   1179   /* Load image to device and get target addresses for the image.  */
   1180   struct addr_pair *target_table = NULL;
   1181   int i, num_target_entries;
   1182 
   1183   num_target_entries
   1184     = devicep->load_image_func (devicep->target_id, version,
   1185 				target_data, &target_table);
   1186 
   1187   if (num_target_entries != num_funcs + num_vars)
   1188     {
   1189       gomp_mutex_unlock (&devicep->lock);
   1190       if (is_register_lock)
   1191 	gomp_mutex_unlock (&register_lock);
   1192       gomp_fatal ("Cannot map target functions or variables"
   1193 		  " (expected %u, have %u)", num_funcs + num_vars,
   1194 		  num_target_entries);
   1195     }
   1196 
   1197   /* Insert host-target address mapping into splay tree.  */
   1198   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   1199   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
   1200   tgt->refcount = REFCOUNT_INFINITY;
   1201   tgt->tgt_start = 0;
   1202   tgt->tgt_end = 0;
   1203   tgt->to_free = NULL;
   1204   tgt->prev = NULL;
   1205   tgt->list_count = 0;
   1206   tgt->device_descr = devicep;
   1207   splay_tree_node array = tgt->array;
   1208 
   1209   for (i = 0; i < num_funcs; i++)
   1210     {
   1211       splay_tree_key k = &array->key;
   1212       k->host_start = (uintptr_t) host_func_table[i];
   1213       k->host_end = k->host_start + 1;
   1214       k->tgt = tgt;
   1215       k->tgt_offset = target_table[i].start;
   1216       k->refcount = REFCOUNT_INFINITY;
   1217       k->dynamic_refcount = 0;
   1218       k->link_key = NULL;
   1219       array->left = NULL;
   1220       array->right = NULL;
   1221       splay_tree_insert (&devicep->mem_map, array);
   1222       array++;
   1223     }
   1224 
   1225   /* Most significant bit of the size in host and target tables marks
   1226      "omp declare target link" variables.  */
   1227   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
   1228   const uintptr_t size_mask = ~link_bit;
   1229 
   1230   for (i = 0; i < num_vars; i++)
   1231     {
   1232       struct addr_pair *target_var = &target_table[num_funcs + i];
   1233       uintptr_t target_size = target_var->end - target_var->start;
   1234 
   1235       if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
   1236 	{
   1237 	  gomp_mutex_unlock (&devicep->lock);
   1238 	  if (is_register_lock)
   1239 	    gomp_mutex_unlock (&register_lock);
   1240 	  gomp_fatal ("Cannot map target variables (size mismatch)");
   1241 	}
   1242 
   1243       splay_tree_key k = &array->key;
   1244       k->host_start = (uintptr_t) host_var_table[i * 2];
   1245       k->host_end
   1246 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
   1247       k->tgt = tgt;
   1248       k->tgt_offset = target_var->start;
   1249       k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
   1250       k->dynamic_refcount = 0;
   1251       k->link_key = NULL;
   1252       array->left = NULL;
   1253       array->right = NULL;
   1254       splay_tree_insert (&devicep->mem_map, array);
   1255       array++;
   1256     }
   1257 
   1258   free (target_table);
   1259 }
   1260 
   1261 /* Unload the mappings described by target_data from device DEVICE_P.
   1262    The device must be locked.   */
   1263 
   1264 static void
   1265 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
   1266 			       unsigned version,
   1267 			       const void *host_table, const void *target_data)
   1268 {
   1269   void **host_func_table = ((void ***) host_table)[0];
   1270   void **host_funcs_end  = ((void ***) host_table)[1];
   1271   void **host_var_table  = ((void ***) host_table)[2];
   1272   void **host_vars_end   = ((void ***) host_table)[3];
   1273 
   1274   /* The func table contains only addresses, the var table contains addresses
   1275      and corresponding sizes.  */
   1276   int num_funcs = host_funcs_end - host_func_table;
   1277   int num_vars  = (host_vars_end - host_var_table) / 2;
   1278 
   1279   struct splay_tree_key_s k;
   1280   splay_tree_key node = NULL;
   1281 
   1282   /* Find mapping at start of node array */
   1283   if (num_funcs || num_vars)
   1284     {
   1285       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
   1286 		      : (uintptr_t) host_var_table[0]);
   1287       k.host_end = k.host_start + 1;
   1288       node = splay_tree_lookup (&devicep->mem_map, &k);
   1289     }
   1290 
   1291   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
   1292     {
   1293       gomp_mutex_unlock (&devicep->lock);
   1294       gomp_fatal ("image unload fail");
   1295     }
   1296 
   1297   /* Remove mappings from splay tree.  */
   1298   int i;
   1299   for (i = 0; i < num_funcs; i++)
   1300     {
   1301       k.host_start = (uintptr_t) host_func_table[i];
   1302       k.host_end = k.host_start + 1;
   1303       splay_tree_remove (&devicep->mem_map, &k);
   1304     }
   1305 
   1306   /* Most significant bit of the size in host and target tables marks
   1307      "omp declare target link" variables.  */
   1308   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
   1309   const uintptr_t size_mask = ~link_bit;
   1310   bool is_tgt_unmapped = false;
   1311 
   1312   for (i = 0; i < num_vars; i++)
   1313     {
   1314       k.host_start = (uintptr_t) host_var_table[i * 2];
   1315       k.host_end
   1316 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
   1317 
   1318       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
   1319 	splay_tree_remove (&devicep->mem_map, &k);
   1320       else
   1321 	{
   1322 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
   1323 	  is_tgt_unmapped = gomp_remove_var (devicep, n);
   1324 	}
   1325     }
   1326 
   1327   if (node && !is_tgt_unmapped)
   1328     {
   1329       free (node->tgt);
   1330       free (node);
   1331     }
   1332 }
   1333 
   1334 /* This function should be called from every offload image while loading.
   1335    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
   1336    the target, and TARGET_DATA needed by target plugin.  */
   1337 
   1338 void
   1339 GOMP_offload_register_ver (unsigned version, const void *host_table,
   1340 			   int target_type, const void *target_data)
   1341 {
   1342   int i;
   1343 
   1344   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
   1345     gomp_fatal ("Library too old for offload (version %u < %u)",
   1346 		GOMP_VERSION, GOMP_VERSION_LIB (version));
   1347 
   1348   gomp_mutex_lock (&register_lock);
   1349 
   1350   /* Load image to all initialized devices.  */
   1351   for (i = 0; i < num_devices; i++)
   1352     {
   1353       struct gomp_device_descr *devicep = &devices[i];
   1354       gomp_mutex_lock (&devicep->lock);
   1355       if (devicep->type == target_type
   1356 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
   1357 	gomp_load_image_to_device (devicep, version,
   1358 				   host_table, target_data, true);
   1359       gomp_mutex_unlock (&devicep->lock);
   1360     }
   1361 
   1362   /* Insert image to array of pending images.  */
   1363   offload_images
   1364     = gomp_realloc_unlock (offload_images,
   1365 			   (num_offload_images + 1)
   1366 			   * sizeof (struct offload_image_descr));
   1367   offload_images[num_offload_images].version = version;
   1368   offload_images[num_offload_images].type = target_type;
   1369   offload_images[num_offload_images].host_table = host_table;
   1370   offload_images[num_offload_images].target_data = target_data;
   1371 
   1372   num_offload_images++;
   1373   gomp_mutex_unlock (&register_lock);
   1374 }
   1375 
   1376 void
   1377 GOMP_offload_register (const void *host_table, int target_type,
   1378 		       const void *target_data)
   1379 {
   1380   GOMP_offload_register_ver (0, host_table, target_type, target_data);
   1381 }
   1382 
   1383 /* This function should be called from every offload image while unloading.
   1384    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
   1385    the target, and TARGET_DATA needed by target plugin.  */
   1386 
   1387 void
   1388 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
   1389 			     int target_type, const void *target_data)
   1390 {
   1391   int i;
   1392 
   1393   gomp_mutex_lock (&register_lock);
   1394 
   1395   /* Unload image from all initialized devices.  */
   1396   for (i = 0; i < num_devices; i++)
   1397     {
   1398       struct gomp_device_descr *devicep = &devices[i];
   1399       gomp_mutex_lock (&devicep->lock);
   1400       if (devicep->type == target_type
   1401 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
   1402 	gomp_unload_image_from_device (devicep, version,
   1403 				       host_table, target_data);
   1404       gomp_mutex_unlock (&devicep->lock);
   1405     }
   1406 
   1407   /* Remove image from array of pending images.  */
   1408   for (i = 0; i < num_offload_images; i++)
   1409     if (offload_images[i].target_data == target_data)
   1410       {
   1411 	offload_images[i] = offload_images[--num_offload_images];
   1412 	break;
   1413       }
   1414 
   1415   gomp_mutex_unlock (&register_lock);
   1416 }
   1417 
   1418 void
   1419 GOMP_offload_unregister (const void *host_table, int target_type,
   1420 			 const void *target_data)
   1421 {
   1422   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
   1423 }
   1424 
   1425 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
   1426    must be locked on entry, and remains locked on return.  */
   1427 
   1428 attribute_hidden void
   1429 gomp_init_device (struct gomp_device_descr *devicep)
   1430 {
   1431   int i;
   1432   if (!devicep->init_device_func (devicep->target_id))
   1433     {
   1434       gomp_mutex_unlock (&devicep->lock);
   1435       gomp_fatal ("device initialization failed");
   1436     }
   1437 
   1438   /* Load to device all images registered by the moment.  */
   1439   for (i = 0; i < num_offload_images; i++)
   1440     {
   1441       struct offload_image_descr *image = &offload_images[i];
   1442       if (image->type == devicep->type)
   1443 	gomp_load_image_to_device (devicep, image->version,
   1444 				   image->host_table, image->target_data,
   1445 				   false);
   1446     }
   1447 
   1448   devicep->state = GOMP_DEVICE_INITIALIZED;
   1449 }
   1450 
   1451 attribute_hidden void
   1452 gomp_unload_device (struct gomp_device_descr *devicep)
   1453 {
   1454   if (devicep->state == GOMP_DEVICE_INITIALIZED)
   1455     {
   1456       unsigned i;
   1457 
   1458       /* Unload from device all images registered at the moment.  */
   1459       for (i = 0; i < num_offload_images; i++)
   1460 	{
   1461 	  struct offload_image_descr *image = &offload_images[i];
   1462 	  if (image->type == devicep->type)
   1463 	    gomp_unload_image_from_device (devicep, image->version,
   1464 					   image->host_table,
   1465 					   image->target_data);
   1466 	}
   1467     }
   1468 }
   1469 
   1470 /* Free address mapping tables.  MM must be locked on entry, and remains locked
   1471    on return.  */
   1472 
   1473 attribute_hidden void
   1474 gomp_free_memmap (struct splay_tree_s *mem_map)
   1475 {
   1476   while (mem_map->root)
   1477     {
   1478       struct target_mem_desc *tgt = mem_map->root->key.tgt;
   1479 
   1480       splay_tree_remove (mem_map, &mem_map->root->key);
   1481       free (tgt->array);
   1482       free (tgt);
   1483     }
   1484 }
   1485 
   1486 /* Host fallback for GOMP_target{,_ext} routines.  */
   1487 
   1488 static void
   1489 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
   1490 {
   1491   struct gomp_thread old_thr, *thr = gomp_thread ();
   1492   old_thr = *thr;
   1493   memset (thr, '\0', sizeof (*thr));
   1494   if (gomp_places_list)
   1495     {
   1496       thr->place = old_thr.place;
   1497       thr->ts.place_partition_len = gomp_places_list_len;
   1498     }
   1499   fn (hostaddrs);
   1500   gomp_free_thread (thr);
   1501   *thr = old_thr;
   1502 }
   1503 
   1504 /* Calculate alignment and size requirements of a private copy of data shared
   1505    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
   1506 
   1507 static inline void
   1508 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
   1509 				     unsigned short *kinds, size_t *tgt_align,
   1510 				     size_t *tgt_size)
   1511 {
   1512   size_t i;
   1513   for (i = 0; i < mapnum; i++)
   1514     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
   1515       {
   1516 	size_t align = (size_t) 1 << (kinds[i] >> 8);
   1517 	if (*tgt_align < align)
   1518 	  *tgt_align = align;
   1519 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
   1520 	*tgt_size += sizes[i];
   1521       }
   1522 }
   1523 
   1524 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
   1525 
   1526 static inline void
   1527 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
   1528 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
   1529 			size_t tgt_size)
   1530 {
   1531   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
   1532   if (al)
   1533     tgt += tgt_align - al;
   1534   tgt_size = 0;
   1535   size_t i;
   1536   for (i = 0; i < mapnum; i++)
   1537     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
   1538       {
   1539 	size_t align = (size_t) 1 << (kinds[i] >> 8);
   1540 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
   1541 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
   1542 	hostaddrs[i] = tgt + tgt_size;
   1543 	tgt_size = tgt_size + sizes[i];
   1544       }
   1545 }
   1546 
   1547 /* Helper function of GOMP_target{,_ext} routines.  */
   1548 
   1549 static void *
   1550 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
   1551 			 void (*host_fn) (void *))
   1552 {
   1553   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
   1554     return (void *) host_fn;
   1555   else
   1556     {
   1557       gomp_mutex_lock (&devicep->lock);
   1558       if (devicep->state == GOMP_DEVICE_FINALIZED)
   1559 	{
   1560 	  gomp_mutex_unlock (&devicep->lock);
   1561 	  return NULL;
   1562 	}
   1563 
   1564       struct splay_tree_key_s k;
   1565       k.host_start = (uintptr_t) host_fn;
   1566       k.host_end = k.host_start + 1;
   1567       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
   1568       gomp_mutex_unlock (&devicep->lock);
   1569       if (tgt_fn == NULL)
   1570 	return NULL;
   1571 
   1572       return (void *) tgt_fn->tgt_offset;
   1573     }
   1574 }
   1575 
   1576 /* Called when encountering a target directive.  If DEVICE
   1577    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
   1578    GOMP_DEVICE_HOST_FALLBACK (or any value
   1579    larger than last available hw device), use host fallback.
   1580    FN is address of host code, UNUSED is part of the current ABI, but
   1581    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
   1582    with MAPNUM entries, with addresses of the host objects,
   1583    sizes of the host objects (resp. for pointer kind pointer bias
   1584    and assumed sizeof (void *) size) and kinds.  */
   1585 
   1586 void
   1587 GOMP_target (int device, void (*fn) (void *), const void *unused,
   1588 	     size_t mapnum, void **hostaddrs, size_t *sizes,
   1589 	     unsigned char *kinds)
   1590 {
   1591   struct gomp_device_descr *devicep = resolve_device (device);
   1592 
   1593   void *fn_addr;
   1594   if (devicep == NULL
   1595       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1596       /* All shared memory devices should use the GOMP_target_ext function.  */
   1597       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
   1598       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
   1599     return gomp_target_fallback (fn, hostaddrs);
   1600 
   1601   struct target_mem_desc *tgt_vars
   1602     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
   1603 		     GOMP_MAP_VARS_TARGET);
   1604   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
   1605 		     NULL);
   1606   gomp_unmap_vars (tgt_vars, true);
   1607 }
   1608 
   1609 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
   1610    and several arguments have been added:
   1611    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
   1612    DEPEND is array of dependencies, see GOMP_task for details.
   1613 
   1614    ARGS is a pointer to an array consisting of a variable number of both
   1615    device-independent and device-specific arguments, which can take one two
   1616    elements where the first specifies for which device it is intended, the type
   1617    and optionally also the value.  If the value is not present in the first
   1618    one, the whole second element the actual value.  The last element of the
   1619    array is a single NULL.  Among the device independent can be for example
   1620    NUM_TEAMS and THREAD_LIMIT.
   1621 
   1622    NUM_TEAMS is positive if GOMP_teams will be called in the body with
   1623    that value, or 1 if teams construct is not present, or 0, if
   1624    teams construct does not have num_teams clause and so the choice is
   1625    implementation defined, and -1 if it can't be determined on the host
   1626    what value will GOMP_teams have on the device.
   1627    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
   1628    body with that value, or 0, if teams construct does not have thread_limit
   1629    clause or the teams construct is not present, or -1 if it can't be
   1630    determined on the host what value will GOMP_teams have on the device.  */
   1631 
   1632 void
   1633 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
   1634 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
   1635 		 unsigned int flags, void **depend, void **args)
   1636 {
   1637   struct gomp_device_descr *devicep = resolve_device (device);
   1638   size_t tgt_align = 0, tgt_size = 0;
   1639   bool fpc_done = false;
   1640 
   1641   if (flags & GOMP_TARGET_FLAG_NOWAIT)
   1642     {
   1643       struct gomp_thread *thr = gomp_thread ();
   1644       /* Create a team if we don't have any around, as nowait
   1645 	 target tasks make sense to run asynchronously even when
   1646 	 outside of any parallel.  */
   1647       if (__builtin_expect (thr->ts.team == NULL, 0))
   1648 	{
   1649 	  struct gomp_team *team = gomp_new_team (1);
   1650 	  struct gomp_task *task = thr->task;
   1651 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
   1652 	  team->prev_ts = thr->ts;
   1653 	  thr->ts.team = team;
   1654 	  thr->ts.team_id = 0;
   1655 	  thr->ts.work_share = &team->work_shares[0];
   1656 	  thr->ts.last_work_share = NULL;
   1657 #ifdef HAVE_SYNC_BUILTINS
   1658 	  thr->ts.single_count = 0;
   1659 #endif
   1660 	  thr->ts.static_trip = 0;
   1661 	  thr->task = &team->implicit_task[0];
   1662 	  gomp_init_task (thr->task, NULL, icv);
   1663 	  if (task)
   1664 	    {
   1665 	      thr->task = task;
   1666 	      gomp_end_task ();
   1667 	      free (task);
   1668 	      thr->task = &team->implicit_task[0];
   1669 	    }
   1670 	  else
   1671 	    pthread_setspecific (gomp_thread_destructor, thr);
   1672 	}
   1673       if (thr->ts.team
   1674 	  && !thr->task->final_task)
   1675 	{
   1676 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
   1677 				   sizes, kinds, flags, depend, args,
   1678 				   GOMP_TARGET_TASK_BEFORE_MAP);
   1679 	  return;
   1680 	}
   1681     }
   1682 
   1683   /* If there are depend clauses, but nowait is not present
   1684      (or we are in a final task), block the parent task until the
   1685      dependencies are resolved and then just continue with the rest
   1686      of the function as if it is a merged task.  */
   1687   if (depend != NULL)
   1688     {
   1689       struct gomp_thread *thr = gomp_thread ();
   1690       if (thr->task && thr->task->depend_hash)
   1691 	{
   1692 	  /* If we might need to wait, copy firstprivate now.  */
   1693 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
   1694 					       &tgt_align, &tgt_size);
   1695 	  if (tgt_align)
   1696 	    {
   1697 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   1698 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
   1699 				      tgt_align, tgt_size);
   1700 	    }
   1701 	  fpc_done = true;
   1702 	  gomp_task_maybe_wait_for_dependencies (depend);
   1703 	}
   1704     }
   1705 
   1706   void *fn_addr;
   1707   if (devicep == NULL
   1708       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1709       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
   1710       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
   1711     {
   1712       if (!fpc_done)
   1713 	{
   1714 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
   1715 					       &tgt_align, &tgt_size);
   1716 	  if (tgt_align)
   1717 	    {
   1718 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   1719 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
   1720 				      tgt_align, tgt_size);
   1721 	    }
   1722 	}
   1723       gomp_target_fallback (fn, hostaddrs);
   1724       return;
   1725     }
   1726 
   1727   struct target_mem_desc *tgt_vars;
   1728   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1729     {
   1730       if (!fpc_done)
   1731 	{
   1732 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
   1733 					       &tgt_align, &tgt_size);
   1734 	  if (tgt_align)
   1735 	    {
   1736 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   1737 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
   1738 				      tgt_align, tgt_size);
   1739 	    }
   1740 	}
   1741       tgt_vars = NULL;
   1742     }
   1743   else
   1744     tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
   1745 			      true, GOMP_MAP_VARS_TARGET);
   1746   devicep->run_func (devicep->target_id, fn_addr,
   1747 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
   1748 		     args);
   1749   if (tgt_vars)
   1750     gomp_unmap_vars (tgt_vars, true);
   1751 }
   1752 
   1753 /* Host fallback for GOMP_target_data{,_ext} routines.  */
   1754 
   1755 static void
   1756 gomp_target_data_fallback (void)
   1757 {
   1758   struct gomp_task_icv *icv = gomp_icv (false);
   1759   if (icv->target_data)
   1760     {
   1761       /* Even when doing a host fallback, if there are any active
   1762          #pragma omp target data constructs, need to remember the
   1763          new #pragma omp target data, otherwise GOMP_target_end_data
   1764          would get out of sync.  */
   1765       struct target_mem_desc *tgt
   1766 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
   1767 			 GOMP_MAP_VARS_DATA);
   1768       tgt->prev = icv->target_data;
   1769       icv->target_data = tgt;
   1770     }
   1771 }
   1772 
   1773 void
   1774 GOMP_target_data (int device, const void *unused, size_t mapnum,
   1775 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
   1776 {
   1777   struct gomp_device_descr *devicep = resolve_device (device);
   1778 
   1779   if (devicep == NULL
   1780       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1781       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
   1782     return gomp_target_data_fallback ();
   1783 
   1784   struct target_mem_desc *tgt
   1785     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
   1786 		     GOMP_MAP_VARS_DATA);
   1787   struct gomp_task_icv *icv = gomp_icv (true);
   1788   tgt->prev = icv->target_data;
   1789   icv->target_data = tgt;
   1790 }
   1791 
   1792 void
   1793 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
   1794 		      size_t *sizes, unsigned short *kinds)
   1795 {
   1796   struct gomp_device_descr *devicep = resolve_device (device);
   1797 
   1798   if (devicep == NULL
   1799       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1800       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1801     return gomp_target_data_fallback ();
   1802 
   1803   struct target_mem_desc *tgt
   1804     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
   1805 		     GOMP_MAP_VARS_DATA);
   1806   struct gomp_task_icv *icv = gomp_icv (true);
   1807   tgt->prev = icv->target_data;
   1808   icv->target_data = tgt;
   1809 }
   1810 
   1811 void
   1812 GOMP_target_end_data (void)
   1813 {
   1814   struct gomp_task_icv *icv = gomp_icv (false);
   1815   if (icv->target_data)
   1816     {
   1817       struct target_mem_desc *tgt = icv->target_data;
   1818       icv->target_data = tgt->prev;
   1819       gomp_unmap_vars (tgt, true);
   1820     }
   1821 }
   1822 
   1823 void
   1824 GOMP_target_update (int device, const void *unused, size_t mapnum,
   1825 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
   1826 {
   1827   struct gomp_device_descr *devicep = resolve_device (device);
   1828 
   1829   if (devicep == NULL
   1830       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1831       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1832     return;
   1833 
   1834   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
   1835 }
   1836 
   1837 void
   1838 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
   1839 			size_t *sizes, unsigned short *kinds,
   1840 			unsigned int flags, void **depend)
   1841 {
   1842   struct gomp_device_descr *devicep = resolve_device (device);
   1843 
   1844   /* If there are depend clauses, but nowait is not present,
   1845      block the parent task until the dependencies are resolved
   1846      and then just continue with the rest of the function as if it
   1847      is a merged task.  Until we are able to schedule task during
   1848      variable mapping or unmapping, ignore nowait if depend clauses
   1849      are not present.  */
   1850   if (depend != NULL)
   1851     {
   1852       struct gomp_thread *thr = gomp_thread ();
   1853       if (thr->task && thr->task->depend_hash)
   1854 	{
   1855 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
   1856 	      && thr->ts.team
   1857 	      && !thr->task->final_task)
   1858 	    {
   1859 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
   1860 					   mapnum, hostaddrs, sizes, kinds,
   1861 					   flags | GOMP_TARGET_FLAG_UPDATE,
   1862 					   depend, NULL, GOMP_TARGET_TASK_DATA))
   1863 		return;
   1864 	    }
   1865 	  else
   1866 	    {
   1867 	      struct gomp_team *team = thr->ts.team;
   1868 	      /* If parallel or taskgroup has been cancelled, don't start new
   1869 		 tasks.  */
   1870 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
   1871 		{
   1872 		  if (gomp_team_barrier_cancelled (&team->barrier))
   1873 		    return;
   1874 		  if (thr->task->taskgroup)
   1875 		    {
   1876 		      if (thr->task->taskgroup->cancelled)
   1877 			return;
   1878 		      if (thr->task->taskgroup->workshare
   1879 			  && thr->task->taskgroup->prev
   1880 			  && thr->task->taskgroup->prev->cancelled)
   1881 			return;
   1882 		    }
   1883 		}
   1884 
   1885 	      gomp_task_maybe_wait_for_dependencies (depend);
   1886 	    }
   1887 	}
   1888     }
   1889 
   1890   if (devicep == NULL
   1891       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   1892       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   1893     return;
   1894 
   1895   struct gomp_thread *thr = gomp_thread ();
   1896   struct gomp_team *team = thr->ts.team;
   1897   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
   1898   if (__builtin_expect (gomp_cancel_var, 0) && team)
   1899     {
   1900       if (gomp_team_barrier_cancelled (&team->barrier))
   1901 	return;
   1902       if (thr->task->taskgroup)
   1903 	{
   1904 	  if (thr->task->taskgroup->cancelled)
   1905 	    return;
   1906 	  if (thr->task->taskgroup->workshare
   1907 	      && thr->task->taskgroup->prev
   1908 	      && thr->task->taskgroup->prev->cancelled)
   1909 	    return;
   1910 	}
   1911     }
   1912 
   1913   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
   1914 }
   1915 
   1916 static void
   1917 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
   1918 		void **hostaddrs, size_t *sizes, unsigned short *kinds)
   1919 {
   1920   const int typemask = 0xff;
   1921   size_t i;
   1922   gomp_mutex_lock (&devicep->lock);
   1923   if (devicep->state == GOMP_DEVICE_FINALIZED)
   1924     {
   1925       gomp_mutex_unlock (&devicep->lock);
   1926       return;
   1927     }
   1928 
   1929   for (i = 0; i < mapnum; i++)
   1930     {
   1931       struct splay_tree_key_s cur_node;
   1932       unsigned char kind = kinds[i] & typemask;
   1933       switch (kind)
   1934 	{
   1935 	case GOMP_MAP_FROM:
   1936 	case GOMP_MAP_ALWAYS_FROM:
   1937 	case GOMP_MAP_DELETE:
   1938 	case GOMP_MAP_RELEASE:
   1939 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
   1940 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
   1941 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
   1942 	  cur_node.host_end = cur_node.host_start + sizes[i];
   1943 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
   1944 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
   1945 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
   1946 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
   1947 	  if (!k)
   1948 	    continue;
   1949 
   1950 	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
   1951 	    k->refcount--;
   1952 	  if ((kind == GOMP_MAP_DELETE
   1953 	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
   1954 	      && k->refcount != REFCOUNT_INFINITY)
   1955 	    k->refcount = 0;
   1956 
   1957 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
   1958 	      || kind == GOMP_MAP_ALWAYS_FROM)
   1959 	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
   1960 				(void *) (k->tgt->tgt_start + k->tgt_offset
   1961 					  + cur_node.host_start
   1962 					  - k->host_start),
   1963 				cur_node.host_end - cur_node.host_start);
   1964 	  if (k->refcount == 0)
   1965 	    {
   1966 	      splay_tree_remove (&devicep->mem_map, k);
   1967 	      if (k->link_key)
   1968 		splay_tree_insert (&devicep->mem_map,
   1969 				   (splay_tree_node) k->link_key);
   1970 	      if (k->tgt->refcount > 1)
   1971 		k->tgt->refcount--;
   1972 	      else
   1973 		gomp_unmap_tgt (k->tgt);
   1974 	    }
   1975 
   1976 	  break;
   1977 	default:
   1978 	  gomp_mutex_unlock (&devicep->lock);
   1979 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
   1980 		      kind);
   1981 	}
   1982     }
   1983 
   1984   gomp_mutex_unlock (&devicep->lock);
   1985 }
   1986 
   1987 void
   1988 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
   1989 			     size_t *sizes, unsigned short *kinds,
   1990 			     unsigned int flags, void **depend)
   1991 {
   1992   struct gomp_device_descr *devicep = resolve_device (device);
   1993 
   1994   /* If there are depend clauses, but nowait is not present,
   1995      block the parent task until the dependencies are resolved
   1996      and then just continue with the rest of the function as if it
   1997      is a merged task.  Until we are able to schedule task during
   1998      variable mapping or unmapping, ignore nowait if depend clauses
   1999      are not present.  */
   2000   if (depend != NULL)
   2001     {
   2002       struct gomp_thread *thr = gomp_thread ();
   2003       if (thr->task && thr->task->depend_hash)
   2004 	{
   2005 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
   2006 	      && thr->ts.team
   2007 	      && !thr->task->final_task)
   2008 	    {
   2009 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
   2010 					   mapnum, hostaddrs, sizes, kinds,
   2011 					   flags, depend, NULL,
   2012 					   GOMP_TARGET_TASK_DATA))
   2013 		return;
   2014 	    }
   2015 	  else
   2016 	    {
   2017 	      struct gomp_team *team = thr->ts.team;
   2018 	      /* If parallel or taskgroup has been cancelled, don't start new
   2019 		 tasks.  */
   2020 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
   2021 		{
   2022 		  if (gomp_team_barrier_cancelled (&team->barrier))
   2023 		    return;
   2024 		  if (thr->task->taskgroup)
   2025 		    {
   2026 		      if (thr->task->taskgroup->cancelled)
   2027 			return;
   2028 		      if (thr->task->taskgroup->workshare
   2029 			  && thr->task->taskgroup->prev
   2030 			  && thr->task->taskgroup->prev->cancelled)
   2031 			return;
   2032 		    }
   2033 		}
   2034 
   2035 	      gomp_task_maybe_wait_for_dependencies (depend);
   2036 	    }
   2037 	}
   2038     }
   2039 
   2040   if (devicep == NULL
   2041       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2042       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2043     return;
   2044 
   2045   struct gomp_thread *thr = gomp_thread ();
   2046   struct gomp_team *team = thr->ts.team;
   2047   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
   2048   if (__builtin_expect (gomp_cancel_var, 0) && team)
   2049     {
   2050       if (gomp_team_barrier_cancelled (&team->barrier))
   2051 	return;
   2052       if (thr->task->taskgroup)
   2053 	{
   2054 	  if (thr->task->taskgroup->cancelled)
   2055 	    return;
   2056 	  if (thr->task->taskgroup->workshare
   2057 	      && thr->task->taskgroup->prev
   2058 	      && thr->task->taskgroup->prev->cancelled)
   2059 	    return;
   2060 	}
   2061     }
   2062 
   2063   size_t i;
   2064   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
   2065     for (i = 0; i < mapnum; i++)
   2066       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
   2067 	{
   2068 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
   2069 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
   2070 	  i += sizes[i];
   2071 	}
   2072       else
   2073 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
   2074 		       true, GOMP_MAP_VARS_ENTER_DATA);
   2075   else
   2076     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
   2077 }
   2078 
   2079 bool
   2080 gomp_target_task_fn (void *data)
   2081 {
   2082   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
   2083   struct gomp_device_descr *devicep = ttask->devicep;
   2084 
   2085   if (ttask->fn != NULL)
   2086     {
   2087       void *fn_addr;
   2088       if (devicep == NULL
   2089 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2090 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
   2091 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
   2092 	{
   2093 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
   2094 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs);
   2095 	  return false;
   2096 	}
   2097 
   2098       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
   2099 	{
   2100 	  if (ttask->tgt)
   2101 	    gomp_unmap_vars (ttask->tgt, true);
   2102 	  return false;
   2103 	}
   2104 
   2105       void *actual_arguments;
   2106       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2107 	{
   2108 	  ttask->tgt = NULL;
   2109 	  actual_arguments = ttask->hostaddrs;
   2110 	}
   2111       else
   2112 	{
   2113 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
   2114 				      NULL, ttask->sizes, ttask->kinds, true,
   2115 				      GOMP_MAP_VARS_TARGET);
   2116 	  actual_arguments = (void *) ttask->tgt->tgt_start;
   2117 	}
   2118       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
   2119 
   2120       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
   2121 			       ttask->args, (void *) ttask);
   2122       return true;
   2123     }
   2124   else if (devicep == NULL
   2125 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2126 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2127     return false;
   2128 
   2129   size_t i;
   2130   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
   2131     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
   2132 		 ttask->kinds, true);
   2133   else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
   2134     for (i = 0; i < ttask->mapnum; i++)
   2135       if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
   2136 	{
   2137 	  gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
   2138 			 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
   2139 			 GOMP_MAP_VARS_ENTER_DATA);
   2140 	  i += ttask->sizes[i];
   2141 	}
   2142       else
   2143 	gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
   2144 		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
   2145   else
   2146     gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
   2147 		    ttask->kinds);
   2148   return false;
   2149 }
   2150 
   2151 void
   2152 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
   2153 {
   2154   if (thread_limit)
   2155     {
   2156       struct gomp_task_icv *icv = gomp_icv (true);
   2157       icv->thread_limit_var
   2158 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
   2159     }
   2160   (void) num_teams;
   2161 }
   2162 
   2163 void *
   2164 omp_target_alloc (size_t size, int device_num)
   2165 {
   2166   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   2167     return malloc (size);
   2168 
   2169   if (device_num < 0)
   2170     return NULL;
   2171 
   2172   struct gomp_device_descr *devicep = resolve_device (device_num);
   2173   if (devicep == NULL)
   2174     return NULL;
   2175 
   2176   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2177       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2178     return malloc (size);
   2179 
   2180   gomp_mutex_lock (&devicep->lock);
   2181   void *ret = devicep->alloc_func (devicep->target_id, size);
   2182   gomp_mutex_unlock (&devicep->lock);
   2183   return ret;
   2184 }
   2185 
   2186 void
   2187 omp_target_free (void *device_ptr, int device_num)
   2188 {
   2189   if (device_ptr == NULL)
   2190     return;
   2191 
   2192   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   2193     {
   2194       free (device_ptr);
   2195       return;
   2196     }
   2197 
   2198   if (device_num < 0)
   2199     return;
   2200 
   2201   struct gomp_device_descr *devicep = resolve_device (device_num);
   2202   if (devicep == NULL)
   2203     return;
   2204 
   2205   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2206       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2207     {
   2208       free (device_ptr);
   2209       return;
   2210     }
   2211 
   2212   gomp_mutex_lock (&devicep->lock);
   2213   gomp_free_device_memory (devicep, device_ptr);
   2214   gomp_mutex_unlock (&devicep->lock);
   2215 }
   2216 
   2217 int
   2218 omp_target_is_present (const void *ptr, int device_num)
   2219 {
   2220   if (ptr == NULL)
   2221     return 1;
   2222 
   2223   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   2224     return 1;
   2225 
   2226   if (device_num < 0)
   2227     return 0;
   2228 
   2229   struct gomp_device_descr *devicep = resolve_device (device_num);
   2230   if (devicep == NULL)
   2231     return 0;
   2232 
   2233   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2234       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2235     return 1;
   2236 
   2237   gomp_mutex_lock (&devicep->lock);
   2238   struct splay_tree_s *mem_map = &devicep->mem_map;
   2239   struct splay_tree_key_s cur_node;
   2240 
   2241   cur_node.host_start = (uintptr_t) ptr;
   2242   cur_node.host_end = cur_node.host_start;
   2243   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
   2244   int ret = n != NULL;
   2245   gomp_mutex_unlock (&devicep->lock);
   2246   return ret;
   2247 }
   2248 
   2249 int
   2250 omp_target_memcpy (void *dst, const void *src, size_t length,
   2251 		   size_t dst_offset, size_t src_offset, int dst_device_num,
   2252 		   int src_device_num)
   2253 {
   2254   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
   2255   bool ret;
   2256 
   2257   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
   2258     {
   2259       if (dst_device_num < 0)
   2260 	return EINVAL;
   2261 
   2262       dst_devicep = resolve_device (dst_device_num);
   2263       if (dst_devicep == NULL)
   2264 	return EINVAL;
   2265 
   2266       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2267 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2268 	dst_devicep = NULL;
   2269     }
   2270   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
   2271     {
   2272       if (src_device_num < 0)
   2273 	return EINVAL;
   2274 
   2275       src_devicep = resolve_device (src_device_num);
   2276       if (src_devicep == NULL)
   2277 	return EINVAL;
   2278 
   2279       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2280 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2281 	src_devicep = NULL;
   2282     }
   2283   if (src_devicep == NULL && dst_devicep == NULL)
   2284     {
   2285       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
   2286       return 0;
   2287     }
   2288   if (src_devicep == NULL)
   2289     {
   2290       gomp_mutex_lock (&dst_devicep->lock);
   2291       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
   2292 					(char *) dst + dst_offset,
   2293 					(char *) src + src_offset, length);
   2294       gomp_mutex_unlock (&dst_devicep->lock);
   2295       return (ret ? 0 : EINVAL);
   2296     }
   2297   if (dst_devicep == NULL)
   2298     {
   2299       gomp_mutex_lock (&src_devicep->lock);
   2300       ret = src_devicep->dev2host_func (src_devicep->target_id,
   2301 					(char *) dst + dst_offset,
   2302 					(char *) src + src_offset, length);
   2303       gomp_mutex_unlock (&src_devicep->lock);
   2304       return (ret ? 0 : EINVAL);
   2305     }
   2306   if (src_devicep == dst_devicep)
   2307     {
   2308       gomp_mutex_lock (&src_devicep->lock);
   2309       ret = src_devicep->dev2dev_func (src_devicep->target_id,
   2310 				       (char *) dst + dst_offset,
   2311 				       (char *) src + src_offset, length);
   2312       gomp_mutex_unlock (&src_devicep->lock);
   2313       return (ret ? 0 : EINVAL);
   2314     }
   2315   return EINVAL;
   2316 }
   2317 
   2318 static int
   2319 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
   2320 			       int num_dims, const size_t *volume,
   2321 			       const size_t *dst_offsets,
   2322 			       const size_t *src_offsets,
   2323 			       const size_t *dst_dimensions,
   2324 			       const size_t *src_dimensions,
   2325 			       struct gomp_device_descr *dst_devicep,
   2326 			       struct gomp_device_descr *src_devicep)
   2327 {
   2328   size_t dst_slice = element_size;
   2329   size_t src_slice = element_size;
   2330   size_t j, dst_off, src_off, length;
   2331   int i, ret;
   2332 
   2333   if (num_dims == 1)
   2334     {
   2335       if (__builtin_mul_overflow (element_size, volume[0], &length)
   2336 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
   2337 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
   2338 	return EINVAL;
   2339       if (dst_devicep == NULL && src_devicep == NULL)
   2340 	{
   2341 	  memcpy ((char *) dst + dst_off, (const char *) src + src_off,
   2342 		  length);
   2343 	  ret = 1;
   2344 	}
   2345       else if (src_devicep == NULL)
   2346 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
   2347 					  (char *) dst + dst_off,
   2348 					  (const char *) src + src_off,
   2349 					  length);
   2350       else if (dst_devicep == NULL)
   2351 	ret = src_devicep->dev2host_func (src_devicep->target_id,
   2352 					  (char *) dst + dst_off,
   2353 					  (const char *) src + src_off,
   2354 					  length);
   2355       else if (src_devicep == dst_devicep)
   2356 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
   2357 					 (char *) dst + dst_off,
   2358 					 (const char *) src + src_off,
   2359 					 length);
   2360       else
   2361 	ret = 0;
   2362       return ret ? 0 : EINVAL;
   2363     }
   2364 
   2365   /* FIXME: it would be nice to have some plugin function to handle
   2366      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
   2367      be handled in the generic recursion below, and for host-host it
   2368      should be used even for any num_dims >= 2.  */
   2369 
   2370   for (i = 1; i < num_dims; i++)
   2371     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
   2372 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
   2373       return EINVAL;
   2374   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
   2375       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
   2376     return EINVAL;
   2377   for (j = 0; j < volume[0]; j++)
   2378     {
   2379       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
   2380 					   (const char *) src + src_off,
   2381 					   element_size, num_dims - 1,
   2382 					   volume + 1, dst_offsets + 1,
   2383 					   src_offsets + 1, dst_dimensions + 1,
   2384 					   src_dimensions + 1, dst_devicep,
   2385 					   src_devicep);
   2386       if (ret)
   2387 	return ret;
   2388       dst_off += dst_slice;
   2389       src_off += src_slice;
   2390     }
   2391   return 0;
   2392 }
   2393 
   2394 int
   2395 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
   2396 			int num_dims, const size_t *volume,
   2397 			const size_t *dst_offsets,
   2398 			const size_t *src_offsets,
   2399 			const size_t *dst_dimensions,
   2400 			const size_t *src_dimensions,
   2401 			int dst_device_num, int src_device_num)
   2402 {
   2403   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
   2404 
   2405   if (!dst && !src)
   2406     return INT_MAX;
   2407 
   2408   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
   2409     {
   2410       if (dst_device_num < 0)
   2411 	return EINVAL;
   2412 
   2413       dst_devicep = resolve_device (dst_device_num);
   2414       if (dst_devicep == NULL)
   2415 	return EINVAL;
   2416 
   2417       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2418 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2419 	dst_devicep = NULL;
   2420     }
   2421   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
   2422     {
   2423       if (src_device_num < 0)
   2424 	return EINVAL;
   2425 
   2426       src_devicep = resolve_device (src_device_num);
   2427       if (src_devicep == NULL)
   2428 	return EINVAL;
   2429 
   2430       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2431 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2432 	src_devicep = NULL;
   2433     }
   2434 
   2435   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
   2436     return EINVAL;
   2437 
   2438   if (src_devicep)
   2439     gomp_mutex_lock (&src_devicep->lock);
   2440   else if (dst_devicep)
   2441     gomp_mutex_lock (&dst_devicep->lock);
   2442   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
   2443 					   volume, dst_offsets, src_offsets,
   2444 					   dst_dimensions, src_dimensions,
   2445 					   dst_devicep, src_devicep);
   2446   if (src_devicep)
   2447     gomp_mutex_unlock (&src_devicep->lock);
   2448   else if (dst_devicep)
   2449     gomp_mutex_unlock (&dst_devicep->lock);
   2450   return ret;
   2451 }
   2452 
   2453 int
   2454 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
   2455 			  size_t size, size_t device_offset, int device_num)
   2456 {
   2457   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   2458     return EINVAL;
   2459 
   2460   if (device_num < 0)
   2461     return EINVAL;
   2462 
   2463   struct gomp_device_descr *devicep = resolve_device (device_num);
   2464   if (devicep == NULL)
   2465     return EINVAL;
   2466 
   2467   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2468       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   2469     return EINVAL;
   2470 
   2471   gomp_mutex_lock (&devicep->lock);
   2472 
   2473   struct splay_tree_s *mem_map = &devicep->mem_map;
   2474   struct splay_tree_key_s cur_node;
   2475   int ret = EINVAL;
   2476 
   2477   cur_node.host_start = (uintptr_t) host_ptr;
   2478   cur_node.host_end = cur_node.host_start + size;
   2479   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
   2480   if (n)
   2481     {
   2482       if (n->tgt->tgt_start + n->tgt_offset
   2483 	  == (uintptr_t) device_ptr + device_offset
   2484 	  && n->host_start <= cur_node.host_start
   2485 	  && n->host_end >= cur_node.host_end)
   2486 	ret = 0;
   2487     }
   2488   else
   2489     {
   2490       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   2491       tgt->array = gomp_malloc (sizeof (*tgt->array));
   2492       tgt->refcount = 1;
   2493       tgt->tgt_start = 0;
   2494       tgt->tgt_end = 0;
   2495       tgt->to_free = NULL;
   2496       tgt->prev = NULL;
   2497       tgt->list_count = 0;
   2498       tgt->device_descr = devicep;
   2499       splay_tree_node array = tgt->array;
   2500       splay_tree_key k = &array->key;
   2501       k->host_start = cur_node.host_start;
   2502       k->host_end = cur_node.host_end;
   2503       k->tgt = tgt;
   2504       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
   2505       k->refcount = REFCOUNT_INFINITY;
   2506       k->dynamic_refcount = 0;
   2507       array->left = NULL;
   2508       array->right = NULL;
   2509       splay_tree_insert (&devicep->mem_map, array);
   2510       ret = 0;
   2511     }
   2512   gomp_mutex_unlock (&devicep->lock);
   2513   return ret;
   2514 }
   2515 
   2516 int
   2517 omp_target_disassociate_ptr (const void *ptr, int device_num)
   2518 {
   2519   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   2520     return EINVAL;
   2521 
   2522   if (device_num < 0)
   2523     return EINVAL;
   2524 
   2525   struct gomp_device_descr *devicep = resolve_device (device_num);
   2526   if (devicep == NULL)
   2527     return EINVAL;
   2528 
   2529   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
   2530     return EINVAL;
   2531 
   2532   gomp_mutex_lock (&devicep->lock);
   2533 
   2534   struct splay_tree_s *mem_map = &devicep->mem_map;
   2535   struct splay_tree_key_s cur_node;
   2536   int ret = EINVAL;
   2537 
   2538   cur_node.host_start = (uintptr_t) ptr;
   2539   cur_node.host_end = cur_node.host_start;
   2540   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
   2541   if (n
   2542       && n->host_start == cur_node.host_start
   2543       && n->refcount == REFCOUNT_INFINITY
   2544       && n->tgt->tgt_start == 0
   2545       && n->tgt->to_free == NULL
   2546       && n->tgt->refcount == 1
   2547       && n->tgt->list_count == 0)
   2548     {
   2549       splay_tree_remove (&devicep->mem_map, n);
   2550       gomp_unmap_tgt (n->tgt);
   2551       ret = 0;
   2552     }
   2553 
   2554   gomp_mutex_unlock (&devicep->lock);
   2555   return ret;
   2556 }
   2557 
   2558 int
   2559 omp_pause_resource (omp_pause_resource_t kind, int device_num)
   2560 {
   2561   (void) kind;
   2562   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
   2563     return gomp_pause_host ();
   2564   if (device_num < 0 || device_num >= gomp_get_num_devices ())
   2565     return -1;
   2566   /* Do nothing for target devices for now.  */
   2567   return 0;
   2568 }
   2569 
   2570 int
   2571 omp_pause_resource_all (omp_pause_resource_t kind)
   2572 {
   2573   (void) kind;
   2574   if (gomp_pause_host ())
   2575     return -1;
   2576   /* Do nothing for target devices for now.  */
   2577   return 0;
   2578 }
   2579 
   2580 ialias (omp_pause_resource)
   2581 ialias (omp_pause_resource_all)
   2582 
   2583 #ifdef PLUGIN_SUPPORT
   2584 
   2585 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
   2586    in PLUGIN_NAME.
   2587    The handles of the found functions are stored in the corresponding fields
   2588    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
   2589 
   2590 static bool
   2591 gomp_load_plugin_for_device (struct gomp_device_descr *device,
   2592 			     const char *plugin_name)
   2593 {
   2594   const char *err = NULL, *last_missing = NULL;
   2595 
   2596   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
   2597   if (!plugin_handle)
   2598     goto dl_fail;
   2599 
   2600   /* Check if all required functions are available in the plugin and store
   2601      their handlers.  None of the symbols can legitimately be NULL,
   2602      so we don't need to check dlerror all the time.  */
   2603 #define DLSYM(f)							\
   2604   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
   2605     goto dl_fail
   2606   /* Similar, but missing functions are not an error.  Return false if
   2607      failed, true otherwise.  */
   2608 #define DLSYM_OPT(f, n)							\
   2609   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
   2610    || (last_missing = #n, 0))
   2611 
   2612   DLSYM (version);
   2613   if (device->version_func () != GOMP_VERSION)
   2614     {
   2615       err = "plugin version mismatch";
   2616       goto fail;
   2617     }
   2618 
   2619   DLSYM (get_name);
   2620   DLSYM (get_caps);
   2621   DLSYM (get_type);
   2622   DLSYM (get_num_devices);
   2623   DLSYM (init_device);
   2624   DLSYM (fini_device);
   2625   DLSYM (load_image);
   2626   DLSYM (unload_image);
   2627   DLSYM (alloc);
   2628   DLSYM (free);
   2629   DLSYM (dev2host);
   2630   DLSYM (host2dev);
   2631   device->capabilities = device->get_caps_func ();
   2632   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2633     {
   2634       DLSYM (run);
   2635       DLSYM (async_run);
   2636       DLSYM_OPT (can_run, can_run);
   2637       DLSYM (dev2dev);
   2638     }
   2639   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
   2640     {
   2641       if (!DLSYM_OPT (openacc.exec, openacc_exec)
   2642 	  || !DLSYM_OPT (openacc.register_async_cleanup,
   2643 			 openacc_register_async_cleanup)
   2644 	  || !DLSYM_OPT (openacc.async_test, openacc_async_test)
   2645 	  || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
   2646 	  || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
   2647 	  || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
   2648 	  || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
   2649 	  || !DLSYM_OPT (openacc.async_wait_all_async,
   2650 			 openacc_async_wait_all_async)
   2651 	  || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
   2652 	  || !DLSYM_OPT (openacc.create_thread_data,
   2653 			 openacc_create_thread_data)
   2654 	  || !DLSYM_OPT (openacc.destroy_thread_data,
   2655 			 openacc_destroy_thread_data))
   2656 	{
   2657 	  /* Require all the OpenACC handlers if we have
   2658 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
   2659 	  err = "plugin missing OpenACC handler function";
   2660 	  goto fail;
   2661 	}
   2662 
   2663       unsigned cuda = 0;
   2664       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
   2665 			 openacc_cuda_get_current_device);
   2666       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
   2667 			 openacc_cuda_get_current_context);
   2668       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
   2669       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
   2670       if (cuda && cuda != 4)
   2671 	{
   2672 	  /* Make sure all the CUDA functions are there if any of them are.  */
   2673 	  err = "plugin missing OpenACC CUDA handler function";
   2674 	  goto fail;
   2675 	}
   2676     }
   2677 #undef DLSYM
   2678 #undef DLSYM_OPT
   2679 
   2680   return 1;
   2681 
   2682  dl_fail:
   2683   err = dlerror ();
   2684  fail:
   2685   gomp_error ("while loading %s: %s", plugin_name, err);
   2686   if (last_missing)
   2687     gomp_error ("missing function was %s", last_missing);
   2688   if (plugin_handle)
   2689     dlclose (plugin_handle);
   2690 
   2691   return 0;
   2692 }
   2693 
   2694 /* This function finalizes all initialized devices.  */
   2695 
   2696 static void
   2697 gomp_target_fini (void)
   2698 {
   2699   int i;
   2700   for (i = 0; i < num_devices; i++)
   2701     {
   2702       bool ret = true;
   2703       struct gomp_device_descr *devicep = &devices[i];
   2704       gomp_mutex_lock (&devicep->lock);
   2705       if (devicep->state == GOMP_DEVICE_INITIALIZED)
   2706 	{
   2707 	  ret = devicep->fini_device_func (devicep->target_id);
   2708 	  devicep->state = GOMP_DEVICE_FINALIZED;
   2709 	}
   2710       gomp_mutex_unlock (&devicep->lock);
   2711       if (!ret)
   2712 	gomp_fatal ("device finalization failed");
   2713     }
   2714 }
   2715 
   2716 /* This function initializes the runtime for offloading.
   2717    It parses the list of offload plugins, and tries to load these.
   2718    On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
   2719    will be set, and the array DEVICES initialized, containing descriptors for
   2720    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
   2721    by the others.  */
   2722 
   2723 static void
   2724 gomp_target_init (void)
   2725 {
   2726   const char *prefix ="libgomp-plugin-";
   2727   const char *suffix = SONAME_SUFFIX (1);
   2728   const char *cur, *next;
   2729   char *plugin_name;
   2730   int i, new_num_devices;
   2731 
   2732   num_devices = 0;
   2733   devices = NULL;
   2734 
   2735   cur = OFFLOAD_PLUGINS;
   2736   if (*cur)
   2737     do
   2738       {
   2739 	struct gomp_device_descr current_device;
   2740 	size_t prefix_len, suffix_len, cur_len;
   2741 
   2742 	next = strchr (cur, ',');
   2743 
   2744 	prefix_len = strlen (prefix);
   2745 	cur_len = next ? next - cur : strlen (cur);
   2746 	suffix_len = strlen (suffix);
   2747 
   2748 	plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
   2749 	if (!plugin_name)
   2750 	  {
   2751 	    num_devices = 0;
   2752 	    break;
   2753 	  }
   2754 
   2755 	memcpy (plugin_name, prefix, prefix_len);
   2756 	memcpy (plugin_name + prefix_len, cur, cur_len);
   2757 	memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
   2758 
   2759 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
   2760 	  {
   2761 	    new_num_devices = current_device.get_num_devices_func ();
   2762 	    if (new_num_devices >= 1)
   2763 	      {
   2764 		/* Augment DEVICES and NUM_DEVICES.  */
   2765 
   2766 		devices = realloc (devices, (num_devices + new_num_devices)
   2767 				   * sizeof (struct gomp_device_descr));
   2768 		if (!devices)
   2769 		  {
   2770 		    num_devices = 0;
   2771 		    free (plugin_name);
   2772 		    break;
   2773 		  }
   2774 
   2775 		current_device.name = current_device.get_name_func ();
   2776 		/* current_device.capabilities has already been set.  */
   2777 		current_device.type = current_device.get_type_func ();
   2778 		current_device.mem_map.root = NULL;
   2779 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
   2780 		current_device.openacc.data_environ = NULL;
   2781 		for (i = 0; i < new_num_devices; i++)
   2782 		  {
   2783 		    current_device.target_id = i;
   2784 		    devices[num_devices] = current_device;
   2785 		    gomp_mutex_init (&devices[num_devices].lock);
   2786 		    num_devices++;
   2787 		  }
   2788 	      }
   2789 	  }
   2790 
   2791 	free (plugin_name);
   2792 	cur = next + 1;
   2793       }
   2794     while (next);
   2795 
   2796   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
   2797      NUM_DEVICES_OPENMP.  */
   2798   struct gomp_device_descr *devices_s
   2799     = malloc (num_devices * sizeof (struct gomp_device_descr));
   2800   if (!devices_s)
   2801     {
   2802       num_devices = 0;
   2803       free (devices);
   2804       devices = NULL;
   2805     }
   2806   num_devices_openmp = 0;
   2807   for (i = 0; i < num_devices; i++)
   2808     if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2809       devices_s[num_devices_openmp++] = devices[i];
   2810   int num_devices_after_openmp = num_devices_openmp;
   2811   for (i = 0; i < num_devices; i++)
   2812     if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
   2813       devices_s[num_devices_after_openmp++] = devices[i];
   2814   free (devices);
   2815   devices = devices_s;
   2816 
   2817   for (i = 0; i < num_devices; i++)
   2818     {
   2819       /* The 'devices' array can be moved (by the realloc call) until we have
   2820 	 found all the plugins, so registering with the OpenACC runtime (which
   2821 	 takes a copy of the pointer argument) must be delayed until now.  */
   2822       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
   2823 	goacc_register (&devices[i]);
   2824     }
   2825 
   2826   if (atexit (gomp_target_fini) != 0)
   2827     gomp_fatal ("atexit failed");
   2828 }
   2829 
   2830 #else /* PLUGIN_SUPPORT */
   2831 /* If dlfcn.h is unavailable we always fallback to host execution.
   2832    GOMP_target* routines are just stubs for this case.  */
   2833 static void
   2834 gomp_target_init (void)
   2835 {
   2836 }
   2837 #endif /* PLUGIN_SUPPORT */
   2838