Home | History | Annotate | Line # | Download | only in libgomp
      1  1.1.1.11  mrg /* Copyright (C) 2013-2024 Free Software Foundation, Inc.
      2       1.1  mrg    Contributed by Jakub Jelinek <jakub (at) redhat.com>.
      3       1.1  mrg 
      4       1.1  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5       1.1  mrg    (libgomp).
      6       1.1  mrg 
      7       1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8       1.1  mrg    under the terms of the GNU General Public License as published by
      9       1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     10       1.1  mrg    any later version.
     11       1.1  mrg 
     12       1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13       1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14       1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15       1.1  mrg    more details.
     16       1.1  mrg 
     17       1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     18       1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     19       1.1  mrg    3.1, as published by the Free Software Foundation.
     20       1.1  mrg 
     21       1.1  mrg    You should have received a copy of the GNU General Public License and
     22       1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23       1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24       1.1  mrg    <http://www.gnu.org/licenses/>.  */
     25       1.1  mrg 
     26       1.1  mrg /* This file contains the support of offloading.  */
     27       1.1  mrg 
     28       1.1  mrg #include "libgomp.h"
     29       1.1  mrg #include "oacc-plugin.h"
     30       1.1  mrg #include "oacc-int.h"
     31       1.1  mrg #include "gomp-constants.h"
     32       1.1  mrg #include <limits.h>
     33       1.1  mrg #include <stdbool.h>
     34       1.1  mrg #include <stdlib.h>
     35       1.1  mrg #ifdef HAVE_INTTYPES_H
     36       1.1  mrg # include <inttypes.h>  /* For PRIu64.  */
     37       1.1  mrg #endif
     38       1.1  mrg #include <string.h>
     39  1.1.1.11  mrg #include <stdio.h>  /* For snprintf. */
     40       1.1  mrg #include <assert.h>
     41   1.1.1.2  mrg #include <errno.h>
     42       1.1  mrg 
     43       1.1  mrg #ifdef PLUGIN_SUPPORT
     44       1.1  mrg #include <dlfcn.h>
     45       1.1  mrg #include "plugin-suffix.h"
     46       1.1  mrg #endif
     47       1.1  mrg 
     48  1.1.1.11  mrg /* Define another splay tree instantiation - for reverse offload.  */
     49  1.1.1.11  mrg #define splay_tree_prefix reverse
     50  1.1.1.11  mrg #define splay_tree_static
     51  1.1.1.11  mrg #define splay_tree_c
     52  1.1.1.11  mrg #include "splay-tree.h"
     53  1.1.1.11  mrg 
     54  1.1.1.11  mrg 
     55  1.1.1.10  mrg typedef uintptr_t *hash_entry_type;
     56  1.1.1.10  mrg static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
     57  1.1.1.10  mrg static inline void htab_free (void *ptr) { free (ptr); }
     58  1.1.1.10  mrg #include "hashtab.h"
     59  1.1.1.10  mrg 
     60  1.1.1.11  mrg ialias_redirect (GOMP_task)
     61  1.1.1.11  mrg 
     62  1.1.1.10  mrg static inline hashval_t
     63  1.1.1.10  mrg htab_hash (hash_entry_type element)
     64  1.1.1.10  mrg {
     65  1.1.1.10  mrg   return hash_pointer ((void *) element);
     66  1.1.1.10  mrg }
     67  1.1.1.10  mrg 
     68  1.1.1.10  mrg static inline bool
     69  1.1.1.10  mrg htab_eq (hash_entry_type x, hash_entry_type y)
     70  1.1.1.10  mrg {
     71  1.1.1.10  mrg   return x == y;
     72  1.1.1.10  mrg }
     73  1.1.1.10  mrg 
     74   1.1.1.8  mrg #define FIELD_TGT_EMPTY (~(size_t) 0)
     75   1.1.1.8  mrg 
     76       1.1  mrg static void gomp_target_init (void);
     77       1.1  mrg 
     78       1.1  mrg /* The whole initialization code for offloading plugins is only run one.  */
     79       1.1  mrg static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
     80       1.1  mrg 
     81       1.1  mrg /* Mutex for offload image registration.  */
     82       1.1  mrg static gomp_mutex_t register_lock;
     83       1.1  mrg 
     84       1.1  mrg /* This structure describes an offload image.
     85       1.1  mrg    It contains type of the target device, pointer to host table descriptor, and
     86       1.1  mrg    pointer to target data.  */
     87       1.1  mrg struct offload_image_descr {
     88   1.1.1.2  mrg   unsigned version;
     89       1.1  mrg   enum offload_target_type type;
     90   1.1.1.2  mrg   const void *host_table;
     91   1.1.1.2  mrg   const void *target_data;
     92       1.1  mrg };
     93       1.1  mrg 
     94       1.1  mrg /* Array of descriptors of offload images.  */
     95       1.1  mrg static struct offload_image_descr *offload_images;
     96       1.1  mrg 
     97       1.1  mrg /* Total number of offload images.  */
     98       1.1  mrg static int num_offload_images;
     99       1.1  mrg 
    100       1.1  mrg /* Array of descriptors for all available devices.  */
    101       1.1  mrg static struct gomp_device_descr *devices;
    102       1.1  mrg 
    103       1.1  mrg /* Total number of available devices.  */
    104       1.1  mrg static int num_devices;
    105       1.1  mrg 
    106       1.1  mrg /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
    107       1.1  mrg static int num_devices_openmp;
    108       1.1  mrg 
    109  1.1.1.11  mrg /* OpenMP requires mask.  */
    110  1.1.1.11  mrg static int omp_requires_mask;
    111  1.1.1.11  mrg 
    112       1.1  mrg /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
    113       1.1  mrg 
    114       1.1  mrg static void *
    115       1.1  mrg gomp_realloc_unlock (void *old, size_t size)
    116       1.1  mrg {
    117       1.1  mrg   void *ret = realloc (old, size);
    118       1.1  mrg   if (ret == NULL)
    119       1.1  mrg     {
    120       1.1  mrg       gomp_mutex_unlock (&register_lock);
    121       1.1  mrg       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
    122       1.1  mrg     }
    123       1.1  mrg   return ret;
    124       1.1  mrg }
    125       1.1  mrg 
    126       1.1  mrg attribute_hidden void
    127       1.1  mrg gomp_init_targets_once (void)
    128       1.1  mrg {
    129       1.1  mrg   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
    130       1.1  mrg }
    131       1.1  mrg 
    132       1.1  mrg attribute_hidden int
    133       1.1  mrg gomp_get_num_devices (void)
    134       1.1  mrg {
    135       1.1  mrg   gomp_init_targets_once ();
    136       1.1  mrg   return num_devices_openmp;
    137       1.1  mrg }
    138       1.1  mrg 
    139       1.1  mrg static struct gomp_device_descr *
    140  1.1.1.11  mrg resolve_device (int device_id, bool remapped)
    141       1.1  mrg {
    142  1.1.1.11  mrg   /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
    143  1.1.1.11  mrg      called, which must be done before using default_device_var.  */
    144  1.1.1.11  mrg   int num_devices = gomp_get_num_devices ();
    145  1.1.1.11  mrg 
    146  1.1.1.11  mrg   if (remapped && device_id == GOMP_DEVICE_ICV)
    147       1.1  mrg     {
    148       1.1  mrg       struct gomp_task_icv *icv = gomp_icv (false);
    149       1.1  mrg       device_id = icv->default_device_var;
    150  1.1.1.11  mrg       remapped = false;
    151       1.1  mrg     }
    152       1.1  mrg 
    153  1.1.1.11  mrg   if (device_id < 0)
    154  1.1.1.11  mrg     {
    155  1.1.1.11  mrg       if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
    156  1.1.1.11  mrg 				 : omp_initial_device))
    157  1.1.1.11  mrg 	return NULL;
    158  1.1.1.11  mrg       if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
    159  1.1.1.11  mrg 	  && num_devices == 0)
    160  1.1.1.11  mrg 	gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
    161  1.1.1.11  mrg 		    "but only the host device is available");
    162  1.1.1.11  mrg       else if (device_id == omp_invalid_device)
    163  1.1.1.11  mrg 	gomp_fatal ("omp_invalid_device encountered");
    164  1.1.1.11  mrg       else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
    165  1.1.1.11  mrg 	gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
    166  1.1.1.11  mrg 		    "but device not found");
    167  1.1.1.11  mrg 
    168  1.1.1.11  mrg       return NULL;
    169  1.1.1.11  mrg     }
    170  1.1.1.11  mrg   else if (device_id >= num_devices)
    171  1.1.1.10  mrg     {
    172  1.1.1.10  mrg       if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
    173  1.1.1.11  mrg 	  && device_id != num_devices)
    174  1.1.1.10  mrg 	gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
    175  1.1.1.10  mrg 		    "but device not found");
    176  1.1.1.10  mrg 
    177  1.1.1.10  mrg       return NULL;
    178  1.1.1.10  mrg     }
    179       1.1  mrg 
    180   1.1.1.2  mrg   gomp_mutex_lock (&devices[device_id].lock);
    181   1.1.1.2  mrg   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
    182   1.1.1.2  mrg     gomp_init_device (&devices[device_id]);
    183   1.1.1.2  mrg   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
    184   1.1.1.2  mrg     {
    185   1.1.1.2  mrg       gomp_mutex_unlock (&devices[device_id].lock);
    186  1.1.1.10  mrg 
    187  1.1.1.10  mrg       if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
    188  1.1.1.10  mrg 	gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
    189  1.1.1.10  mrg 		    "but device is finalized");
    190  1.1.1.10  mrg 
    191   1.1.1.2  mrg       return NULL;
    192   1.1.1.2  mrg     }
    193   1.1.1.2  mrg   gomp_mutex_unlock (&devices[device_id].lock);
    194   1.1.1.2  mrg 
    195       1.1  mrg   return &devices[device_id];
    196       1.1  mrg }
    197       1.1  mrg 
    198       1.1  mrg 
    199   1.1.1.2  mrg static inline splay_tree_key
    200   1.1.1.2  mrg gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
    201   1.1.1.2  mrg {
    202   1.1.1.2  mrg   if (key->host_start != key->host_end)
    203   1.1.1.2  mrg     return splay_tree_lookup (mem_map, key);
    204   1.1.1.2  mrg 
    205   1.1.1.2  mrg   key->host_end++;
    206   1.1.1.2  mrg   splay_tree_key n = splay_tree_lookup (mem_map, key);
    207   1.1.1.2  mrg   key->host_end--;
    208   1.1.1.2  mrg   if (n)
    209   1.1.1.2  mrg     return n;
    210   1.1.1.2  mrg   key->host_start--;
    211   1.1.1.2  mrg   n = splay_tree_lookup (mem_map, key);
    212   1.1.1.2  mrg   key->host_start++;
    213   1.1.1.2  mrg   if (n)
    214   1.1.1.2  mrg     return n;
    215   1.1.1.2  mrg   return splay_tree_lookup (mem_map, key);
    216   1.1.1.2  mrg }
    217   1.1.1.2  mrg 
    218  1.1.1.11  mrg static inline reverse_splay_tree_key
    219  1.1.1.11  mrg gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
    220  1.1.1.11  mrg {
    221  1.1.1.11  mrg   return reverse_splay_tree_lookup (mem_map_rev, key);
    222  1.1.1.11  mrg }
    223  1.1.1.11  mrg 
    224   1.1.1.2  mrg static inline splay_tree_key
    225   1.1.1.2  mrg gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
    226   1.1.1.2  mrg {
    227   1.1.1.2  mrg   if (key->host_start != key->host_end)
    228   1.1.1.2  mrg     return splay_tree_lookup (mem_map, key);
    229   1.1.1.2  mrg 
    230   1.1.1.2  mrg   key->host_end++;
    231   1.1.1.2  mrg   splay_tree_key n = splay_tree_lookup (mem_map, key);
    232   1.1.1.2  mrg   key->host_end--;
    233   1.1.1.2  mrg   return n;
    234   1.1.1.2  mrg }
    235   1.1.1.2  mrg 
    236   1.1.1.3  mrg static inline void
    237   1.1.1.3  mrg gomp_device_copy (struct gomp_device_descr *devicep,
    238   1.1.1.3  mrg 		  bool (*copy_func) (int, void *, const void *, size_t),
    239   1.1.1.3  mrg 		  const char *dst, void *dstaddr,
    240   1.1.1.3  mrg 		  const char *src, const void *srcaddr,
    241   1.1.1.3  mrg 		  size_t size)
    242   1.1.1.3  mrg {
    243   1.1.1.3  mrg   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
    244   1.1.1.3  mrg     {
    245   1.1.1.3  mrg       gomp_mutex_unlock (&devicep->lock);
    246   1.1.1.3  mrg       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
    247   1.1.1.3  mrg 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
    248   1.1.1.3  mrg     }
    249   1.1.1.3  mrg }
    250   1.1.1.3  mrg 
    251   1.1.1.8  mrg static inline void
    252   1.1.1.8  mrg goacc_device_copy_async (struct gomp_device_descr *devicep,
    253   1.1.1.8  mrg 			 bool (*copy_func) (int, void *, const void *, size_t,
    254   1.1.1.8  mrg 					    struct goacc_asyncqueue *),
    255   1.1.1.8  mrg 			 const char *dst, void *dstaddr,
    256   1.1.1.8  mrg 			 const char *src, const void *srcaddr,
    257  1.1.1.10  mrg 			 const void *srcaddr_orig,
    258   1.1.1.8  mrg 			 size_t size, struct goacc_asyncqueue *aq)
    259   1.1.1.8  mrg {
    260   1.1.1.8  mrg   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
    261   1.1.1.8  mrg     {
    262   1.1.1.8  mrg       gomp_mutex_unlock (&devicep->lock);
    263  1.1.1.10  mrg       if (srcaddr_orig && srcaddr_orig != srcaddr)
    264  1.1.1.10  mrg 	gomp_fatal ("Copying of %s object [%p..%p)"
    265  1.1.1.10  mrg 		    " via buffer %s object [%p..%p)"
    266  1.1.1.10  mrg 		    " to %s object [%p..%p) failed",
    267  1.1.1.10  mrg 		    src, srcaddr_orig, srcaddr_orig + size,
    268  1.1.1.10  mrg 		    src, srcaddr, srcaddr + size,
    269  1.1.1.10  mrg 		    dst, dstaddr, dstaddr + size);
    270  1.1.1.10  mrg       else
    271  1.1.1.10  mrg 	gomp_fatal ("Copying of %s object [%p..%p)"
    272  1.1.1.10  mrg 		    " to %s object [%p..%p) failed",
    273  1.1.1.10  mrg 		    src, srcaddr, srcaddr + size,
    274  1.1.1.10  mrg 		    dst, dstaddr, dstaddr + size);
    275   1.1.1.8  mrg     }
    276   1.1.1.8  mrg }
    277   1.1.1.8  mrg 
    278   1.1.1.6  mrg /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
    279   1.1.1.6  mrg    host to device memory transfers.  */
    280   1.1.1.6  mrg 
    281   1.1.1.7  mrg struct gomp_coalesce_chunk
    282   1.1.1.7  mrg {
    283   1.1.1.7  mrg   /* The starting and ending point of a coalesced chunk of memory.  */
    284   1.1.1.7  mrg   size_t start, end;
    285   1.1.1.7  mrg };
    286   1.1.1.7  mrg 
    287   1.1.1.6  mrg struct gomp_coalesce_buf
    288   1.1.1.6  mrg {
    289   1.1.1.6  mrg   /* Buffer into which gomp_copy_host2dev will memcpy data and from which
    290   1.1.1.6  mrg      it will be copied to the device.  */
    291   1.1.1.6  mrg   void *buf;
    292   1.1.1.6  mrg   struct target_mem_desc *tgt;
    293   1.1.1.7  mrg   /* Array with offsets, chunks[i].start is the starting offset and
    294   1.1.1.7  mrg      chunks[i].end ending offset relative to tgt->tgt_start device address
    295   1.1.1.6  mrg      of chunks which are to be copied to buf and later copied to device.  */
    296   1.1.1.7  mrg   struct gomp_coalesce_chunk *chunks;
    297   1.1.1.6  mrg   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
    298   1.1.1.6  mrg      be performed.  */
    299   1.1.1.6  mrg   long chunk_cnt;
    300   1.1.1.6  mrg   /* During construction of chunks array, how many memory regions are within
    301   1.1.1.6  mrg      the last chunk.  If there is just one memory region for a chunk, we copy
    302   1.1.1.6  mrg      it directly to device rather than going through buf.  */
    303   1.1.1.6  mrg   long use_cnt;
    304   1.1.1.6  mrg };
    305   1.1.1.6  mrg 
    306   1.1.1.6  mrg /* Maximum size of memory region considered for coalescing.  Larger copies
    307   1.1.1.6  mrg    are performed directly.  */
    308   1.1.1.6  mrg #define MAX_COALESCE_BUF_SIZE	(32 * 1024)
    309   1.1.1.6  mrg 
    310   1.1.1.6  mrg /* Maximum size of a gap in between regions to consider them being copied
    311   1.1.1.6  mrg    within the same chunk.  All the device offsets considered are within
    312   1.1.1.6  mrg    newly allocated device memory, so it isn't fatal if we copy some padding
    313   1.1.1.6  mrg    in between from host to device.  The gaps come either from alignment
    314   1.1.1.6  mrg    padding or from memory regions which are not supposed to be copied from
    315   1.1.1.6  mrg    host to device (e.g. map(alloc:), map(from:) etc.).  */
    316   1.1.1.6  mrg #define MAX_COALESCE_BUF_GAP	(4 * 1024)
    317   1.1.1.6  mrg 
    318  1.1.1.10  mrg /* Add region with device tgt_start relative offset and length to CBUF.
    319  1.1.1.10  mrg 
    320  1.1.1.10  mrg    This must not be used for asynchronous copies, because the host data might
    321  1.1.1.10  mrg    not be computed yet (by an earlier asynchronous compute region, for
    322  1.1.1.11  mrg    example).  The exception is for EPHEMERAL data, that we know is available
    323  1.1.1.11  mrg    already "by construction".  */
    324   1.1.1.6  mrg 
    325   1.1.1.6  mrg static inline void
    326   1.1.1.6  mrg gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
    327   1.1.1.6  mrg {
    328   1.1.1.6  mrg   if (len > MAX_COALESCE_BUF_SIZE || len == 0)
    329   1.1.1.6  mrg     return;
    330   1.1.1.6  mrg   if (cbuf->chunk_cnt)
    331   1.1.1.6  mrg     {
    332   1.1.1.6  mrg       if (cbuf->chunk_cnt < 0)
    333   1.1.1.6  mrg 	return;
    334   1.1.1.7  mrg       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
    335   1.1.1.6  mrg 	{
    336   1.1.1.6  mrg 	  cbuf->chunk_cnt = -1;
    337   1.1.1.6  mrg 	  return;
    338   1.1.1.6  mrg 	}
    339   1.1.1.7  mrg       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
    340   1.1.1.6  mrg 	{
    341   1.1.1.7  mrg 	  cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
    342   1.1.1.6  mrg 	  cbuf->use_cnt++;
    343   1.1.1.6  mrg 	  return;
    344   1.1.1.6  mrg 	}
    345   1.1.1.6  mrg       /* If the last chunk is only used by one mapping, discard it,
    346   1.1.1.6  mrg 	 as it will be one host to device copy anyway and
    347   1.1.1.6  mrg 	 memcpying it around will only waste cycles.  */
    348   1.1.1.6  mrg       if (cbuf->use_cnt == 1)
    349   1.1.1.6  mrg 	cbuf->chunk_cnt--;
    350   1.1.1.6  mrg     }
    351   1.1.1.7  mrg   cbuf->chunks[cbuf->chunk_cnt].start = start;
    352   1.1.1.7  mrg   cbuf->chunks[cbuf->chunk_cnt].end = start + len;
    353   1.1.1.6  mrg   cbuf->chunk_cnt++;
    354   1.1.1.6  mrg   cbuf->use_cnt = 1;
    355   1.1.1.6  mrg }
    356   1.1.1.6  mrg 
    357   1.1.1.6  mrg /* Return true for mapping kinds which need to copy data from the
    358   1.1.1.6  mrg    host to device for regions that weren't previously mapped.  */
    359   1.1.1.6  mrg 
    360   1.1.1.6  mrg static inline bool
    361   1.1.1.6  mrg gomp_to_device_kind_p (int kind)
    362   1.1.1.6  mrg {
    363   1.1.1.6  mrg   switch (kind)
    364   1.1.1.6  mrg     {
    365   1.1.1.6  mrg     case GOMP_MAP_ALLOC:
    366   1.1.1.6  mrg     case GOMP_MAP_FROM:
    367   1.1.1.6  mrg     case GOMP_MAP_FORCE_ALLOC:
    368   1.1.1.8  mrg     case GOMP_MAP_FORCE_FROM:
    369   1.1.1.6  mrg     case GOMP_MAP_ALWAYS_FROM:
    370  1.1.1.11  mrg     case GOMP_MAP_ALWAYS_PRESENT_FROM:
    371  1.1.1.11  mrg     case GOMP_MAP_FORCE_PRESENT:
    372   1.1.1.6  mrg       return false;
    373   1.1.1.6  mrg     default:
    374   1.1.1.6  mrg       return true;
    375   1.1.1.6  mrg     }
    376   1.1.1.6  mrg }
    377   1.1.1.6  mrg 
    378  1.1.1.10  mrg /* Copy host memory to an offload device.  In asynchronous mode (if AQ is
    379  1.1.1.10  mrg    non-NULL), when the source data is stack or may otherwise be deallocated
    380  1.1.1.10  mrg    before the asynchronous copy takes place, EPHEMERAL must be passed as
    381  1.1.1.10  mrg    TRUE.  */
    382  1.1.1.10  mrg 
    383   1.1.1.8  mrg attribute_hidden void
    384   1.1.1.3  mrg gomp_copy_host2dev (struct gomp_device_descr *devicep,
    385   1.1.1.8  mrg 		    struct goacc_asyncqueue *aq,
    386   1.1.1.6  mrg 		    void *d, const void *h, size_t sz,
    387  1.1.1.10  mrg 		    bool ephemeral, struct gomp_coalesce_buf *cbuf)
    388   1.1.1.3  mrg {
    389   1.1.1.6  mrg   if (cbuf)
    390   1.1.1.6  mrg     {
    391   1.1.1.6  mrg       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
    392   1.1.1.7  mrg       if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
    393   1.1.1.6  mrg 	{
    394   1.1.1.6  mrg 	  long first = 0;
    395   1.1.1.6  mrg 	  long last = cbuf->chunk_cnt - 1;
    396   1.1.1.6  mrg 	  while (first <= last)
    397   1.1.1.6  mrg 	    {
    398   1.1.1.6  mrg 	      long middle = (first + last) >> 1;
    399   1.1.1.7  mrg 	      if (cbuf->chunks[middle].end <= doff)
    400   1.1.1.6  mrg 		first = middle + 1;
    401   1.1.1.7  mrg 	      else if (cbuf->chunks[middle].start <= doff)
    402   1.1.1.6  mrg 		{
    403   1.1.1.7  mrg 		  if (doff + sz > cbuf->chunks[middle].end)
    404  1.1.1.10  mrg 		    {
    405  1.1.1.10  mrg 		      gomp_mutex_unlock (&devicep->lock);
    406  1.1.1.10  mrg 		      gomp_fatal ("internal libgomp cbuf error");
    407  1.1.1.10  mrg 		    }
    408  1.1.1.11  mrg 
    409  1.1.1.11  mrg 		  /* In an asynchronous context, verify that CBUF isn't used
    410  1.1.1.11  mrg 		     with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'.  */
    411  1.1.1.11  mrg 		  if (__builtin_expect (aq != NULL, 0))
    412  1.1.1.11  mrg 		    assert (ephemeral);
    413  1.1.1.11  mrg 
    414   1.1.1.7  mrg 		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
    415   1.1.1.6  mrg 			  h, sz);
    416   1.1.1.6  mrg 		  return;
    417   1.1.1.6  mrg 		}
    418   1.1.1.6  mrg 	      else
    419   1.1.1.6  mrg 		last = middle - 1;
    420   1.1.1.6  mrg 	    }
    421   1.1.1.6  mrg 	}
    422   1.1.1.6  mrg     }
    423  1.1.1.10  mrg 
    424  1.1.1.11  mrg   if (__builtin_expect (aq != NULL, 0))
    425  1.1.1.11  mrg     {
    426  1.1.1.11  mrg       void *h_buf = (void *) h;
    427  1.1.1.11  mrg       if (ephemeral)
    428  1.1.1.11  mrg 	{
    429  1.1.1.11  mrg 	  /* We're queueing up an asynchronous copy from data that may
    430  1.1.1.11  mrg 	     disappear before the transfer takes place (i.e. because it is a
    431  1.1.1.11  mrg 	     stack local in a function that is no longer executing).  As we've
    432  1.1.1.11  mrg 	     not been able to use CBUF, make a copy of the data into a
    433  1.1.1.11  mrg 	     temporary buffer.  */
    434  1.1.1.11  mrg 	  h_buf = gomp_malloc (sz);
    435  1.1.1.11  mrg 	  memcpy (h_buf, h, sz);
    436  1.1.1.11  mrg 	}
    437  1.1.1.11  mrg       goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
    438  1.1.1.11  mrg 			       "dev", d, "host", h_buf, h, sz, aq);
    439  1.1.1.11  mrg       if (ephemeral)
    440  1.1.1.11  mrg 	/* Free once the transfer has completed.  */
    441  1.1.1.11  mrg 	devicep->openacc.async.queue_callback_func (aq, free, h_buf);
    442  1.1.1.11  mrg     }
    443  1.1.1.11  mrg   else
    444  1.1.1.11  mrg     gomp_device_copy (devicep, devicep->host2dev_func,
    445  1.1.1.11  mrg 		      "dev", d, "host", h, sz);
    446   1.1.1.3  mrg }
    447   1.1.1.3  mrg 
    448   1.1.1.8  mrg attribute_hidden void
    449   1.1.1.3  mrg gomp_copy_dev2host (struct gomp_device_descr *devicep,
    450   1.1.1.8  mrg 		    struct goacc_asyncqueue *aq,
    451   1.1.1.3  mrg 		    void *h, const void *d, size_t sz)
    452   1.1.1.3  mrg {
    453   1.1.1.8  mrg   if (__builtin_expect (aq != NULL, 0))
    454   1.1.1.8  mrg     goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
    455  1.1.1.10  mrg 			     "host", h, "dev", d, NULL, sz, aq);
    456   1.1.1.8  mrg   else
    457   1.1.1.8  mrg     gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
    458   1.1.1.3  mrg }
    459   1.1.1.3  mrg 
    460   1.1.1.3  mrg static void
    461   1.1.1.3  mrg gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
    462   1.1.1.3  mrg {
    463   1.1.1.3  mrg   if (!devicep->free_func (devicep->target_id, devptr))
    464   1.1.1.3  mrg     {
    465   1.1.1.3  mrg       gomp_mutex_unlock (&devicep->lock);
    466   1.1.1.3  mrg       gomp_fatal ("error in freeing device memory block at %p", devptr);
    467   1.1.1.3  mrg     }
    468   1.1.1.3  mrg }
    469   1.1.1.3  mrg 
    470  1.1.1.10  mrg /* Increment reference count of a splay_tree_key region K by 1.
    471  1.1.1.10  mrg    If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
    472  1.1.1.10  mrg    increment the value if refcount is not yet contained in the set (used for
    473  1.1.1.10  mrg    OpenMP 5.0, which specifies that a region's refcount is adjusted at most
    474  1.1.1.10  mrg    once for each construct).  */
    475  1.1.1.10  mrg 
    476  1.1.1.10  mrg static inline void
    477  1.1.1.10  mrg gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
    478  1.1.1.10  mrg {
    479  1.1.1.11  mrg   if (k == NULL
    480  1.1.1.11  mrg       || k->refcount == REFCOUNT_INFINITY
    481  1.1.1.11  mrg       || k->refcount == REFCOUNT_ACC_MAP_DATA)
    482  1.1.1.10  mrg     return;
    483  1.1.1.10  mrg 
    484  1.1.1.10  mrg   uintptr_t *refcount_ptr = &k->refcount;
    485  1.1.1.10  mrg 
    486  1.1.1.10  mrg   if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
    487  1.1.1.10  mrg     refcount_ptr = &k->structelem_refcount;
    488  1.1.1.10  mrg   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
    489  1.1.1.10  mrg     refcount_ptr = k->structelem_refcount_ptr;
    490  1.1.1.10  mrg 
    491  1.1.1.10  mrg   if (refcount_set)
    492  1.1.1.10  mrg     {
    493  1.1.1.10  mrg       if (htab_find (*refcount_set, refcount_ptr))
    494  1.1.1.10  mrg 	return;
    495  1.1.1.10  mrg       uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
    496  1.1.1.10  mrg       *slot = refcount_ptr;
    497  1.1.1.10  mrg     }
    498  1.1.1.10  mrg 
    499  1.1.1.10  mrg   *refcount_ptr += 1;
    500  1.1.1.10  mrg   return;
    501  1.1.1.10  mrg }
    502  1.1.1.10  mrg 
    503  1.1.1.10  mrg /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
    504  1.1.1.10  mrg    is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
    505  1.1.1.10  mrg    track already seen refcounts, and only adjust the value if refcount is not
    506  1.1.1.10  mrg    yet contained in the set (like gomp_increment_refcount).
    507  1.1.1.10  mrg 
    508  1.1.1.10  mrg    Return out-values: set *DO_COPY to true if we set the refcount to zero, or
    509  1.1.1.10  mrg    it is already zero and we know we decremented it earlier. This signals that
    510  1.1.1.10  mrg    associated maps should be copied back to host.
    511  1.1.1.10  mrg 
    512  1.1.1.10  mrg    *DO_REMOVE is set to true when we this is the first handling of this refcount
    513  1.1.1.10  mrg    and we are setting it to zero. This signals a removal of this key from the
    514  1.1.1.10  mrg    splay-tree map.
    515  1.1.1.10  mrg 
    516  1.1.1.10  mrg    Copy and removal are separated due to cases like handling of structure
    517  1.1.1.10  mrg    elements, e.g. each map of a structure element representing a possible copy
    518  1.1.1.10  mrg    out of a structure field has to be handled individually, but we only signal
    519  1.1.1.10  mrg    removal for one (the first encountered) sibing map.  */
    520  1.1.1.10  mrg 
    521  1.1.1.10  mrg static inline void
    522  1.1.1.10  mrg gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
    523  1.1.1.10  mrg 			 bool *do_copy, bool *do_remove)
    524  1.1.1.10  mrg {
    525  1.1.1.11  mrg   if (k == NULL
    526  1.1.1.11  mrg       || k->refcount == REFCOUNT_INFINITY
    527  1.1.1.11  mrg       || k->refcount == REFCOUNT_ACC_MAP_DATA)
    528  1.1.1.10  mrg     {
    529  1.1.1.10  mrg       *do_copy = *do_remove = false;
    530  1.1.1.10  mrg       return;
    531  1.1.1.10  mrg     }
    532  1.1.1.10  mrg 
    533  1.1.1.10  mrg   uintptr_t *refcount_ptr = &k->refcount;
    534  1.1.1.10  mrg 
    535  1.1.1.10  mrg   if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
    536  1.1.1.10  mrg     refcount_ptr = &k->structelem_refcount;
    537  1.1.1.10  mrg   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
    538  1.1.1.10  mrg     refcount_ptr = k->structelem_refcount_ptr;
    539  1.1.1.10  mrg 
    540  1.1.1.10  mrg   bool new_encountered_refcount;
    541  1.1.1.10  mrg   bool set_to_zero = false;
    542  1.1.1.10  mrg   bool is_zero = false;
    543  1.1.1.10  mrg 
    544  1.1.1.10  mrg   uintptr_t orig_refcount = *refcount_ptr;
    545  1.1.1.10  mrg 
    546  1.1.1.10  mrg   if (refcount_set)
    547  1.1.1.10  mrg     {
    548  1.1.1.10  mrg       if (htab_find (*refcount_set, refcount_ptr))
    549  1.1.1.10  mrg 	{
    550  1.1.1.10  mrg 	  new_encountered_refcount = false;
    551  1.1.1.10  mrg 	  goto end;
    552  1.1.1.10  mrg 	}
    553  1.1.1.10  mrg 
    554  1.1.1.10  mrg       uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
    555  1.1.1.10  mrg       *slot = refcount_ptr;
    556  1.1.1.10  mrg       new_encountered_refcount = true;
    557  1.1.1.10  mrg     }
    558  1.1.1.10  mrg   else
    559  1.1.1.10  mrg     /* If no refcount_set being used, assume all keys are being decremented
    560  1.1.1.10  mrg        for the first time.  */
    561  1.1.1.10  mrg     new_encountered_refcount = true;
    562  1.1.1.10  mrg 
    563  1.1.1.10  mrg   if (delete_p)
    564  1.1.1.10  mrg     *refcount_ptr = 0;
    565  1.1.1.10  mrg   else if (*refcount_ptr > 0)
    566  1.1.1.10  mrg     *refcount_ptr -= 1;
    567  1.1.1.10  mrg 
    568  1.1.1.10  mrg  end:
    569  1.1.1.10  mrg   if (*refcount_ptr == 0)
    570  1.1.1.10  mrg     {
    571  1.1.1.10  mrg       if (orig_refcount > 0)
    572  1.1.1.10  mrg 	set_to_zero = true;
    573  1.1.1.10  mrg 
    574  1.1.1.10  mrg       is_zero = true;
    575  1.1.1.10  mrg     }
    576  1.1.1.10  mrg 
    577  1.1.1.10  mrg   *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
    578  1.1.1.10  mrg   *do_remove = (new_encountered_refcount && set_to_zero);
    579  1.1.1.10  mrg }
    580  1.1.1.10  mrg 
    581   1.1.1.2  mrg /* Handle the case where gomp_map_lookup, splay_tree_lookup or
    582   1.1.1.2  mrg    gomp_map_0len_lookup found oldn for newn.
    583       1.1  mrg    Helper function of gomp_map_vars.  */
    584       1.1  mrg 
    585       1.1  mrg static inline void
    586   1.1.1.8  mrg gomp_map_vars_existing (struct gomp_device_descr *devicep,
    587   1.1.1.8  mrg 			struct goacc_asyncqueue *aq, splay_tree_key oldn,
    588   1.1.1.2  mrg 			splay_tree_key newn, struct target_var_desc *tgt_var,
    589  1.1.1.10  mrg 			unsigned char kind, bool always_to_flag, bool implicit,
    590  1.1.1.10  mrg 			struct gomp_coalesce_buf *cbuf,
    591  1.1.1.10  mrg 			htab_t *refcount_set)
    592       1.1  mrg {
    593  1.1.1.10  mrg   assert (kind != GOMP_MAP_ATTACH
    594  1.1.1.10  mrg 	  || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
    595   1.1.1.8  mrg 
    596   1.1.1.2  mrg   tgt_var->key = oldn;
    597   1.1.1.2  mrg   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
    598   1.1.1.2  mrg   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
    599   1.1.1.8  mrg   tgt_var->is_attach = false;
    600   1.1.1.2  mrg   tgt_var->offset = newn->host_start - oldn->host_start;
    601  1.1.1.10  mrg 
    602  1.1.1.10  mrg   /* For implicit maps, old contained in new is valid.  */
    603  1.1.1.10  mrg   bool implicit_subset = (implicit
    604  1.1.1.10  mrg 			  && newn->host_start <= oldn->host_start
    605  1.1.1.10  mrg 			  && oldn->host_end <= newn->host_end);
    606  1.1.1.10  mrg   if (implicit_subset)
    607  1.1.1.10  mrg     tgt_var->length = oldn->host_end - oldn->host_start;
    608  1.1.1.10  mrg   else
    609  1.1.1.10  mrg     tgt_var->length = newn->host_end - newn->host_start;
    610   1.1.1.2  mrg 
    611  1.1.1.11  mrg   if (GOMP_MAP_FORCE_P (kind)
    612  1.1.1.10  mrg       /* For implicit maps, old contained in new is valid.  */
    613  1.1.1.10  mrg       || !(implicit_subset
    614  1.1.1.10  mrg 	   /* Otherwise, new contained inside old is considered valid.  */
    615  1.1.1.10  mrg 	   || (oldn->host_start <= newn->host_start
    616  1.1.1.10  mrg 	       && newn->host_end <= oldn->host_end)))
    617       1.1  mrg     {
    618       1.1  mrg       gomp_mutex_unlock (&devicep->lock);
    619       1.1  mrg       gomp_fatal ("Trying to map into device [%p..%p) object when "
    620       1.1  mrg 		  "[%p..%p) is already mapped",
    621       1.1  mrg 		  (void *) newn->host_start, (void *) newn->host_end,
    622       1.1  mrg 		  (void *) oldn->host_start, (void *) oldn->host_end);
    623       1.1  mrg     }
    624   1.1.1.2  mrg 
    625  1.1.1.10  mrg   if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
    626  1.1.1.10  mrg     {
    627  1.1.1.10  mrg       /* Implicit + always should not happen. If this does occur, below
    628  1.1.1.10  mrg 	 address/length adjustment is a TODO.  */
    629  1.1.1.10  mrg       assert (!implicit_subset);
    630  1.1.1.10  mrg 
    631  1.1.1.10  mrg       if (oldn->aux && oldn->aux->attach_count)
    632  1.1.1.10  mrg 	{
    633  1.1.1.10  mrg 	  /* We have to be careful not to overwrite still attached pointers
    634  1.1.1.10  mrg 	     during the copyback to host.  */
    635  1.1.1.10  mrg 	  uintptr_t addr = newn->host_start;
    636  1.1.1.10  mrg 	  while (addr < newn->host_end)
    637  1.1.1.10  mrg 	    {
    638  1.1.1.10  mrg 	      size_t i = (addr - oldn->host_start) / sizeof (void *);
    639  1.1.1.10  mrg 	      if (oldn->aux->attach_count[i] == 0)
    640  1.1.1.10  mrg 		gomp_copy_host2dev (devicep, aq,
    641  1.1.1.10  mrg 				    (void *) (oldn->tgt->tgt_start
    642  1.1.1.10  mrg 					      + oldn->tgt_offset
    643  1.1.1.10  mrg 					      + addr - oldn->host_start),
    644  1.1.1.10  mrg 				    (void *) addr,
    645  1.1.1.10  mrg 				    sizeof (void *), false, cbuf);
    646  1.1.1.10  mrg 	      addr += sizeof (void *);
    647  1.1.1.10  mrg 	    }
    648  1.1.1.10  mrg 	}
    649  1.1.1.10  mrg       else
    650  1.1.1.10  mrg 	gomp_copy_host2dev (devicep, aq,
    651  1.1.1.10  mrg 			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
    652  1.1.1.10  mrg 				      + newn->host_start - oldn->host_start),
    653  1.1.1.10  mrg 			    (void *) newn->host_start,
    654  1.1.1.10  mrg 			    newn->host_end - newn->host_start, false, cbuf);
    655  1.1.1.10  mrg     }
    656   1.1.1.3  mrg 
    657  1.1.1.10  mrg   gomp_increment_refcount (oldn, refcount_set);
    658       1.1  mrg }
    659       1.1  mrg 
    660       1.1  mrg static int
    661   1.1.1.2  mrg get_kind (bool short_mapkind, void *kinds, int idx)
    662   1.1.1.2  mrg {
    663  1.1.1.10  mrg   if (!short_mapkind)
    664  1.1.1.10  mrg     return ((unsigned char *) kinds)[idx];
    665  1.1.1.10  mrg 
    666  1.1.1.10  mrg   int val = ((unsigned short *) kinds)[idx];
    667  1.1.1.10  mrg   if (GOMP_MAP_IMPLICIT_P (val))
    668  1.1.1.10  mrg     val &= ~GOMP_MAP_IMPLICIT;
    669  1.1.1.10  mrg   return val;
    670  1.1.1.10  mrg }
    671  1.1.1.10  mrg 
    672  1.1.1.10  mrg 
    673  1.1.1.10  mrg static bool
    674  1.1.1.10  mrg get_implicit (bool short_mapkind, void *kinds, int idx)
    675  1.1.1.10  mrg {
    676  1.1.1.10  mrg   if (!short_mapkind)
    677  1.1.1.10  mrg     return false;
    678  1.1.1.10  mrg 
    679  1.1.1.10  mrg   int val = ((unsigned short *) kinds)[idx];
    680  1.1.1.10  mrg   return GOMP_MAP_IMPLICIT_P (val);
    681   1.1.1.2  mrg }
    682   1.1.1.2  mrg 
    683   1.1.1.2  mrg static void
    684   1.1.1.8  mrg gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
    685   1.1.1.8  mrg 		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
    686  1.1.1.10  mrg 		  struct gomp_coalesce_buf *cbuf,
    687  1.1.1.10  mrg 		  bool allow_zero_length_array_sections)
    688   1.1.1.2  mrg {
    689   1.1.1.2  mrg   struct gomp_device_descr *devicep = tgt->device_descr;
    690   1.1.1.2  mrg   struct splay_tree_s *mem_map = &devicep->mem_map;
    691   1.1.1.2  mrg   struct splay_tree_key_s cur_node;
    692   1.1.1.2  mrg 
    693   1.1.1.2  mrg   cur_node.host_start = host_ptr;
    694   1.1.1.2  mrg   if (cur_node.host_start == (uintptr_t) NULL)
    695   1.1.1.2  mrg     {
    696   1.1.1.2  mrg       cur_node.tgt_offset = (uintptr_t) NULL;
    697   1.1.1.8  mrg       gomp_copy_host2dev (devicep, aq,
    698   1.1.1.3  mrg 			  (void *) (tgt->tgt_start + target_offset),
    699  1.1.1.10  mrg 			  (void *) &cur_node.tgt_offset, sizeof (void *),
    700  1.1.1.10  mrg 			  true, cbuf);
    701   1.1.1.2  mrg       return;
    702   1.1.1.2  mrg     }
    703   1.1.1.2  mrg   /* Add bias to the pointer value.  */
    704   1.1.1.2  mrg   cur_node.host_start += bias;
    705   1.1.1.2  mrg   cur_node.host_end = cur_node.host_start;
    706   1.1.1.2  mrg   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
    707   1.1.1.2  mrg   if (n == NULL)
    708   1.1.1.2  mrg     {
    709  1.1.1.10  mrg       if (allow_zero_length_array_sections)
    710  1.1.1.11  mrg 	cur_node.tgt_offset = cur_node.host_start;
    711  1.1.1.10  mrg       else
    712  1.1.1.10  mrg 	{
    713  1.1.1.10  mrg 	  gomp_mutex_unlock (&devicep->lock);
    714  1.1.1.10  mrg 	  gomp_fatal ("Pointer target of array section wasn't mapped");
    715  1.1.1.10  mrg 	}
    716  1.1.1.10  mrg     }
    717  1.1.1.10  mrg   else
    718  1.1.1.10  mrg     {
    719  1.1.1.10  mrg       cur_node.host_start -= n->host_start;
    720  1.1.1.10  mrg       cur_node.tgt_offset
    721  1.1.1.10  mrg 	= n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
    722  1.1.1.10  mrg       /* At this point tgt_offset is target address of the
    723  1.1.1.10  mrg 	 array section.  Now subtract bias to get what we want
    724  1.1.1.10  mrg 	 to initialize the pointer with.  */
    725  1.1.1.10  mrg       cur_node.tgt_offset -= bias;
    726   1.1.1.2  mrg     }
    727   1.1.1.8  mrg   gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
    728  1.1.1.10  mrg 		      (void *) &cur_node.tgt_offset, sizeof (void *),
    729  1.1.1.10  mrg 		      true, cbuf);
    730   1.1.1.2  mrg }
    731   1.1.1.2  mrg 
    732   1.1.1.2  mrg static void
    733   1.1.1.8  mrg gomp_map_fields_existing (struct target_mem_desc *tgt,
    734   1.1.1.8  mrg 			  struct goacc_asyncqueue *aq, splay_tree_key n,
    735   1.1.1.2  mrg 			  size_t first, size_t i, void **hostaddrs,
    736   1.1.1.6  mrg 			  size_t *sizes, void *kinds,
    737  1.1.1.10  mrg 			  struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
    738       1.1  mrg {
    739   1.1.1.2  mrg   struct gomp_device_descr *devicep = tgt->device_descr;
    740   1.1.1.2  mrg   struct splay_tree_s *mem_map = &devicep->mem_map;
    741   1.1.1.2  mrg   struct splay_tree_key_s cur_node;
    742   1.1.1.2  mrg   int kind;
    743  1.1.1.10  mrg   bool implicit;
    744   1.1.1.2  mrg   const bool short_mapkind = true;
    745   1.1.1.2  mrg   const int typemask = short_mapkind ? 0xff : 0x7;
    746   1.1.1.2  mrg 
    747   1.1.1.2  mrg   cur_node.host_start = (uintptr_t) hostaddrs[i];
    748   1.1.1.2  mrg   cur_node.host_end = cur_node.host_start + sizes[i];
    749  1.1.1.11  mrg   splay_tree_key n2 = gomp_map_0len_lookup (mem_map, &cur_node);
    750   1.1.1.2  mrg   kind = get_kind (short_mapkind, kinds, i);
    751  1.1.1.10  mrg   implicit = get_implicit (short_mapkind, kinds, i);
    752   1.1.1.2  mrg   if (n2
    753   1.1.1.2  mrg       && n2->tgt == n->tgt
    754   1.1.1.2  mrg       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
    755   1.1.1.2  mrg     {
    756  1.1.1.10  mrg       gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
    757  1.1.1.10  mrg 			      kind & typemask, false, implicit, cbuf,
    758  1.1.1.10  mrg 			      refcount_set);
    759   1.1.1.2  mrg       return;
    760   1.1.1.2  mrg     }
    761   1.1.1.2  mrg   if (sizes[i] == 0)
    762   1.1.1.2  mrg     {
    763   1.1.1.2  mrg       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
    764   1.1.1.2  mrg 	{
    765   1.1.1.2  mrg 	  cur_node.host_start--;
    766   1.1.1.2  mrg 	  n2 = splay_tree_lookup (mem_map, &cur_node);
    767   1.1.1.2  mrg 	  cur_node.host_start++;
    768   1.1.1.2  mrg 	  if (n2
    769   1.1.1.2  mrg 	      && n2->tgt == n->tgt
    770   1.1.1.2  mrg 	      && n2->host_start - n->host_start
    771   1.1.1.2  mrg 		 == n2->tgt_offset - n->tgt_offset)
    772   1.1.1.2  mrg 	    {
    773  1.1.1.10  mrg 	      gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
    774  1.1.1.10  mrg 				      kind & typemask, false, implicit, cbuf,
    775  1.1.1.10  mrg 				      refcount_set);
    776   1.1.1.2  mrg 	      return;
    777   1.1.1.2  mrg 	    }
    778   1.1.1.2  mrg 	}
    779   1.1.1.2  mrg       cur_node.host_end++;
    780   1.1.1.2  mrg       n2 = splay_tree_lookup (mem_map, &cur_node);
    781   1.1.1.2  mrg       cur_node.host_end--;
    782   1.1.1.2  mrg       if (n2
    783   1.1.1.2  mrg 	  && n2->tgt == n->tgt
    784   1.1.1.2  mrg 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
    785   1.1.1.2  mrg 	{
    786   1.1.1.8  mrg 	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
    787  1.1.1.10  mrg 				  kind & typemask, false, implicit, cbuf,
    788  1.1.1.10  mrg 				  refcount_set);
    789   1.1.1.2  mrg 	  return;
    790   1.1.1.2  mrg 	}
    791   1.1.1.2  mrg     }
    792   1.1.1.2  mrg   gomp_mutex_unlock (&devicep->lock);
    793   1.1.1.2  mrg   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
    794   1.1.1.2  mrg 	      "other mapped elements from the same structure weren't mapped "
    795   1.1.1.2  mrg 	      "together with it", (void *) cur_node.host_start,
    796   1.1.1.2  mrg 	      (void *) cur_node.host_end);
    797   1.1.1.2  mrg }
    798   1.1.1.2  mrg 
    799   1.1.1.8  mrg attribute_hidden void
    800   1.1.1.8  mrg gomp_attach_pointer (struct gomp_device_descr *devicep,
    801   1.1.1.8  mrg 		     struct goacc_asyncqueue *aq, splay_tree mem_map,
    802   1.1.1.8  mrg 		     splay_tree_key n, uintptr_t attach_to, size_t bias,
    803  1.1.1.10  mrg 		     struct gomp_coalesce_buf *cbufp,
    804  1.1.1.10  mrg 		     bool allow_zero_length_array_sections)
    805   1.1.1.8  mrg {
    806   1.1.1.8  mrg   struct splay_tree_key_s s;
    807   1.1.1.8  mrg   size_t size, idx;
    808   1.1.1.8  mrg 
    809   1.1.1.8  mrg   if (n == NULL)
    810   1.1.1.8  mrg     {
    811   1.1.1.8  mrg       gomp_mutex_unlock (&devicep->lock);
    812   1.1.1.8  mrg       gomp_fatal ("enclosing struct not mapped for attach");
    813   1.1.1.8  mrg     }
    814   1.1.1.8  mrg 
    815   1.1.1.8  mrg   size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
    816   1.1.1.8  mrg   /* We might have a pointer in a packed struct: however we cannot have more
    817   1.1.1.8  mrg      than one such pointer in each pointer-sized portion of the struct, so
    818   1.1.1.8  mrg      this is safe.  */
    819   1.1.1.8  mrg   idx = (attach_to - n->host_start) / sizeof (void *);
    820   1.1.1.8  mrg 
    821   1.1.1.8  mrg   if (!n->aux)
    822   1.1.1.8  mrg     n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
    823   1.1.1.8  mrg 
    824   1.1.1.8  mrg   if (!n->aux->attach_count)
    825   1.1.1.8  mrg     n->aux->attach_count
    826   1.1.1.8  mrg       = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
    827   1.1.1.8  mrg 
    828   1.1.1.8  mrg   if (n->aux->attach_count[idx] < UINTPTR_MAX)
    829   1.1.1.8  mrg     n->aux->attach_count[idx]++;
    830   1.1.1.8  mrg   else
    831   1.1.1.8  mrg     {
    832   1.1.1.8  mrg       gomp_mutex_unlock (&devicep->lock);
    833   1.1.1.8  mrg       gomp_fatal ("attach count overflow");
    834   1.1.1.8  mrg     }
    835   1.1.1.8  mrg 
    836   1.1.1.8  mrg   if (n->aux->attach_count[idx] == 1)
    837   1.1.1.8  mrg     {
    838   1.1.1.8  mrg       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
    839   1.1.1.8  mrg 			 - n->host_start;
    840   1.1.1.8  mrg       uintptr_t target = (uintptr_t) *(void **) attach_to;
    841   1.1.1.8  mrg       splay_tree_key tn;
    842   1.1.1.8  mrg       uintptr_t data;
    843   1.1.1.8  mrg 
    844   1.1.1.8  mrg       if ((void *) target == NULL)
    845   1.1.1.8  mrg 	{
    846  1.1.1.11  mrg 	  /* As a special case, allow attaching NULL host pointers.  This
    847  1.1.1.11  mrg 	     allows e.g. unassociated Fortran pointers to be mapped
    848  1.1.1.11  mrg 	     properly.  */
    849  1.1.1.11  mrg 	  data = 0;
    850  1.1.1.11  mrg 
    851  1.1.1.11  mrg 	  gomp_debug (1,
    852  1.1.1.11  mrg 		      "%s: attaching NULL host pointer, target %p "
    853  1.1.1.11  mrg 		      "(struct base %p)\n", __FUNCTION__, (void *) devptr,
    854  1.1.1.11  mrg 		      (void *) (n->tgt->tgt_start + n->tgt_offset));
    855  1.1.1.11  mrg 
    856  1.1.1.11  mrg 	  gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
    857  1.1.1.11  mrg 			      sizeof (void *), true, cbufp);
    858  1.1.1.11  mrg 
    859  1.1.1.11  mrg 	  return;
    860   1.1.1.8  mrg 	}
    861   1.1.1.8  mrg 
    862   1.1.1.8  mrg       s.host_start = target + bias;
    863   1.1.1.8  mrg       s.host_end = s.host_start + 1;
    864   1.1.1.8  mrg       tn = splay_tree_lookup (mem_map, &s);
    865   1.1.1.8  mrg 
    866   1.1.1.8  mrg       if (!tn)
    867   1.1.1.8  mrg 	{
    868  1.1.1.10  mrg 	  if (allow_zero_length_array_sections)
    869  1.1.1.10  mrg 	    /* When allowing attachment to zero-length array sections, we
    870  1.1.1.11  mrg 	       copy the host pointer when the target region is not mapped.  */
    871  1.1.1.11  mrg 	    data = target;
    872  1.1.1.10  mrg 	  else
    873  1.1.1.10  mrg 	    {
    874  1.1.1.10  mrg 	      gomp_mutex_unlock (&devicep->lock);
    875  1.1.1.10  mrg 	      gomp_fatal ("pointer target not mapped for attach");
    876  1.1.1.10  mrg 	    }
    877   1.1.1.8  mrg 	}
    878  1.1.1.10  mrg       else
    879  1.1.1.10  mrg 	data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
    880   1.1.1.8  mrg 
    881   1.1.1.8  mrg       gomp_debug (1,
    882   1.1.1.8  mrg 		  "%s: attaching host %p, target %p (struct base %p) to %p\n",
    883   1.1.1.8  mrg 		  __FUNCTION__, (void *) attach_to, (void *) devptr,
    884   1.1.1.8  mrg 		  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
    885   1.1.1.8  mrg 
    886   1.1.1.8  mrg       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
    887  1.1.1.10  mrg 			  sizeof (void *), true, cbufp);
    888   1.1.1.8  mrg     }
    889   1.1.1.8  mrg   else
    890   1.1.1.8  mrg     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
    891   1.1.1.8  mrg 		(void *) attach_to, (int) n->aux->attach_count[idx]);
    892   1.1.1.8  mrg }
    893   1.1.1.8  mrg 
    894   1.1.1.8  mrg attribute_hidden void
    895   1.1.1.8  mrg gomp_detach_pointer (struct gomp_device_descr *devicep,
    896   1.1.1.8  mrg 		     struct goacc_asyncqueue *aq, splay_tree_key n,
    897   1.1.1.8  mrg 		     uintptr_t detach_from, bool finalize,
    898   1.1.1.8  mrg 		     struct gomp_coalesce_buf *cbufp)
    899   1.1.1.8  mrg {
    900   1.1.1.8  mrg   size_t idx;
    901   1.1.1.8  mrg 
    902   1.1.1.8  mrg   if (n == NULL)
    903   1.1.1.8  mrg     {
    904   1.1.1.8  mrg       gomp_mutex_unlock (&devicep->lock);
    905   1.1.1.8  mrg       gomp_fatal ("enclosing struct not mapped for detach");
    906   1.1.1.8  mrg     }
    907   1.1.1.8  mrg 
    908   1.1.1.8  mrg   idx = (detach_from - n->host_start) / sizeof (void *);
    909   1.1.1.8  mrg 
    910   1.1.1.8  mrg   if (!n->aux || !n->aux->attach_count)
    911   1.1.1.8  mrg     {
    912   1.1.1.8  mrg       gomp_mutex_unlock (&devicep->lock);
    913   1.1.1.8  mrg       gomp_fatal ("no attachment counters for struct");
    914   1.1.1.8  mrg     }
    915   1.1.1.8  mrg 
    916   1.1.1.8  mrg   if (finalize)
    917   1.1.1.8  mrg     n->aux->attach_count[idx] = 1;
    918   1.1.1.8  mrg 
    919   1.1.1.8  mrg   if (n->aux->attach_count[idx] == 0)
    920   1.1.1.8  mrg     {
    921   1.1.1.8  mrg       gomp_mutex_unlock (&devicep->lock);
    922   1.1.1.8  mrg       gomp_fatal ("attach count underflow");
    923   1.1.1.8  mrg     }
    924   1.1.1.8  mrg   else
    925   1.1.1.8  mrg     n->aux->attach_count[idx]--;
    926   1.1.1.8  mrg 
    927   1.1.1.8  mrg   if (n->aux->attach_count[idx] == 0)
    928   1.1.1.8  mrg     {
    929   1.1.1.8  mrg       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
    930   1.1.1.8  mrg 			 - n->host_start;
    931   1.1.1.8  mrg       uintptr_t target = (uintptr_t) *(void **) detach_from;
    932   1.1.1.8  mrg 
    933   1.1.1.8  mrg       gomp_debug (1,
    934   1.1.1.8  mrg 		  "%s: detaching host %p, target %p (struct base %p) to %p\n",
    935   1.1.1.8  mrg 		  __FUNCTION__, (void *) detach_from, (void *) devptr,
    936   1.1.1.8  mrg 		  (void *) (n->tgt->tgt_start + n->tgt_offset),
    937   1.1.1.8  mrg 		  (void *) target);
    938   1.1.1.8  mrg 
    939   1.1.1.8  mrg       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
    940  1.1.1.10  mrg 			  sizeof (void *), true, cbufp);
    941   1.1.1.8  mrg     }
    942   1.1.1.8  mrg   else
    943   1.1.1.8  mrg     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
    944   1.1.1.8  mrg 		(void *) detach_from, (int) n->aux->attach_count[idx]);
    945   1.1.1.8  mrg }
    946   1.1.1.8  mrg 
    947   1.1.1.8  mrg attribute_hidden uintptr_t
    948   1.1.1.2  mrg gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
    949   1.1.1.2  mrg {
    950   1.1.1.2  mrg   if (tgt->list[i].key != NULL)
    951   1.1.1.2  mrg     return tgt->list[i].key->tgt->tgt_start
    952   1.1.1.2  mrg 	   + tgt->list[i].key->tgt_offset
    953   1.1.1.2  mrg 	   + tgt->list[i].offset;
    954   1.1.1.8  mrg 
    955   1.1.1.8  mrg   switch (tgt->list[i].offset)
    956   1.1.1.8  mrg     {
    957   1.1.1.8  mrg     case OFFSET_INLINED:
    958   1.1.1.8  mrg       return (uintptr_t) hostaddrs[i];
    959   1.1.1.8  mrg 
    960   1.1.1.8  mrg     case OFFSET_POINTER:
    961   1.1.1.8  mrg       return 0;
    962   1.1.1.8  mrg 
    963   1.1.1.8  mrg     case OFFSET_STRUCT:
    964   1.1.1.8  mrg       return tgt->list[i + 1].key->tgt->tgt_start
    965   1.1.1.8  mrg 	     + tgt->list[i + 1].key->tgt_offset
    966   1.1.1.8  mrg 	     + tgt->list[i + 1].offset
    967   1.1.1.8  mrg 	     + (uintptr_t) hostaddrs[i]
    968   1.1.1.8  mrg 	     - (uintptr_t) hostaddrs[i + 1];
    969   1.1.1.8  mrg 
    970   1.1.1.8  mrg     default:
    971   1.1.1.8  mrg       return tgt->tgt_start + tgt->list[i].offset;
    972   1.1.1.8  mrg     }
    973       1.1  mrg }
    974       1.1  mrg 
    975   1.1.1.8  mrg static inline __attribute__((always_inline)) struct target_mem_desc *
    976   1.1.1.8  mrg gomp_map_vars_internal (struct gomp_device_descr *devicep,
    977   1.1.1.8  mrg 			struct goacc_asyncqueue *aq, size_t mapnum,
    978   1.1.1.8  mrg 			void **hostaddrs, void **devaddrs, size_t *sizes,
    979   1.1.1.8  mrg 			void *kinds, bool short_mapkind,
    980  1.1.1.10  mrg 			htab_t *refcount_set,
    981   1.1.1.8  mrg 			enum gomp_map_vars_kind pragma_kind)
    982       1.1  mrg {
    983       1.1  mrg   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
    984   1.1.1.2  mrg   bool has_firstprivate = false;
    985  1.1.1.10  mrg   bool has_always_ptrset = false;
    986  1.1.1.10  mrg   bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
    987   1.1.1.2  mrg   const int rshift = short_mapkind ? 8 : 3;
    988   1.1.1.2  mrg   const int typemask = short_mapkind ? 0xff : 0x7;
    989       1.1  mrg   struct splay_tree_s *mem_map = &devicep->mem_map;
    990       1.1  mrg   struct splay_tree_key_s cur_node;
    991       1.1  mrg   struct target_mem_desc *tgt
    992       1.1  mrg     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
    993       1.1  mrg   tgt->list_count = mapnum;
    994  1.1.1.10  mrg   tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
    995       1.1  mrg   tgt->device_descr = devicep;
    996   1.1.1.8  mrg   tgt->prev = NULL;
    997   1.1.1.6  mrg   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
    998       1.1  mrg 
    999       1.1  mrg   if (mapnum == 0)
   1000   1.1.1.2  mrg     {
   1001   1.1.1.2  mrg       tgt->tgt_start = 0;
   1002   1.1.1.2  mrg       tgt->tgt_end = 0;
   1003   1.1.1.2  mrg       return tgt;
   1004   1.1.1.2  mrg     }
   1005       1.1  mrg 
   1006       1.1  mrg   tgt_align = sizeof (void *);
   1007       1.1  mrg   tgt_size = 0;
   1008   1.1.1.6  mrg   cbuf.chunks = NULL;
   1009   1.1.1.6  mrg   cbuf.chunk_cnt = -1;
   1010   1.1.1.6  mrg   cbuf.use_cnt = 0;
   1011   1.1.1.6  mrg   cbuf.buf = NULL;
   1012  1.1.1.11  mrg   if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
   1013   1.1.1.6  mrg     {
   1014   1.1.1.7  mrg       size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
   1015   1.1.1.7  mrg       cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
   1016   1.1.1.6  mrg       cbuf.chunk_cnt = 0;
   1017   1.1.1.6  mrg     }
   1018  1.1.1.11  mrg   if (pragma_kind & GOMP_MAP_VARS_TARGET)
   1019       1.1  mrg     {
   1020       1.1  mrg       size_t align = 4 * sizeof (void *);
   1021       1.1  mrg       tgt_align = align;
   1022       1.1  mrg       tgt_size = mapnum * sizeof (void *);
   1023   1.1.1.6  mrg       cbuf.chunk_cnt = 1;
   1024   1.1.1.6  mrg       cbuf.use_cnt = 1 + (mapnum > 1);
   1025   1.1.1.7  mrg       cbuf.chunks[0].start = 0;
   1026   1.1.1.7  mrg       cbuf.chunks[0].end = tgt_size;
   1027       1.1  mrg     }
   1028       1.1  mrg 
   1029       1.1  mrg   gomp_mutex_lock (&devicep->lock);
   1030   1.1.1.2  mrg   if (devicep->state == GOMP_DEVICE_FINALIZED)
   1031   1.1.1.2  mrg     {
   1032   1.1.1.2  mrg       gomp_mutex_unlock (&devicep->lock);
   1033   1.1.1.2  mrg       free (tgt);
   1034   1.1.1.2  mrg       return NULL;
   1035   1.1.1.2  mrg     }
   1036       1.1  mrg 
   1037       1.1  mrg   for (i = 0; i < mapnum; i++)
   1038       1.1  mrg     {
   1039   1.1.1.2  mrg       int kind = get_kind (short_mapkind, kinds, i);
   1040  1.1.1.10  mrg       bool implicit = get_implicit (short_mapkind, kinds, i);
   1041   1.1.1.2  mrg       if (hostaddrs[i] == NULL
   1042   1.1.1.2  mrg 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
   1043   1.1.1.2  mrg 	{
   1044   1.1.1.2  mrg 	  tgt->list[i].key = NULL;
   1045   1.1.1.8  mrg 	  tgt->list[i].offset = OFFSET_INLINED;
   1046   1.1.1.2  mrg 	  continue;
   1047   1.1.1.2  mrg 	}
   1048   1.1.1.8  mrg       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
   1049   1.1.1.8  mrg 	       || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
   1050   1.1.1.2  mrg 	{
   1051   1.1.1.8  mrg 	  tgt->list[i].key = NULL;
   1052   1.1.1.8  mrg 	  if (!not_found_cnt)
   1053   1.1.1.2  mrg 	    {
   1054   1.1.1.8  mrg 	      /* In OpenMP < 5.0 and OpenACC the mapping has to be done
   1055   1.1.1.8  mrg 		 on a separate construct prior to using use_device_{addr,ptr}.
   1056   1.1.1.8  mrg 		 In OpenMP 5.0, map directives need to be ordered by the
   1057   1.1.1.8  mrg 		 middle-end before the use_device_* clauses.  If
   1058   1.1.1.8  mrg 		 !not_found_cnt, all mappings requested (if any) are already
   1059   1.1.1.8  mrg 		 mapped, so use_device_{addr,ptr} can be resolved right away.
   1060   1.1.1.8  mrg 		 Otherwise, if not_found_cnt, gomp_map_lookup might fail
   1061   1.1.1.8  mrg 		 now but would succeed after performing the mappings in the
   1062   1.1.1.8  mrg 		 following loop.  We can't defer this always to the second
   1063   1.1.1.8  mrg 		 loop, because it is not even invoked when !not_found_cnt
   1064   1.1.1.8  mrg 		 after the first loop.  */
   1065   1.1.1.8  mrg 	      cur_node.host_start = (uintptr_t) hostaddrs[i];
   1066   1.1.1.8  mrg 	      cur_node.host_end = cur_node.host_start;
   1067   1.1.1.8  mrg 	      splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
   1068   1.1.1.8  mrg 	      if (n != NULL)
   1069   1.1.1.8  mrg 		{
   1070   1.1.1.8  mrg 		  cur_node.host_start -= n->host_start;
   1071   1.1.1.8  mrg 		  hostaddrs[i]
   1072   1.1.1.8  mrg 		    = (void *) (n->tgt->tgt_start + n->tgt_offset
   1073   1.1.1.8  mrg 				+ cur_node.host_start);
   1074   1.1.1.8  mrg 		}
   1075   1.1.1.8  mrg 	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
   1076   1.1.1.8  mrg 		{
   1077   1.1.1.8  mrg 		  gomp_mutex_unlock (&devicep->lock);
   1078   1.1.1.8  mrg 		  gomp_fatal ("use_device_ptr pointer wasn't mapped");
   1079   1.1.1.8  mrg 		}
   1080   1.1.1.8  mrg 	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
   1081   1.1.1.8  mrg 		/* If not present, continue using the host address.  */
   1082   1.1.1.8  mrg 		;
   1083   1.1.1.8  mrg 	      else
   1084   1.1.1.8  mrg 		__builtin_unreachable ();
   1085   1.1.1.8  mrg 	      tgt->list[i].offset = OFFSET_INLINED;
   1086   1.1.1.2  mrg 	    }
   1087   1.1.1.8  mrg 	  else
   1088   1.1.1.8  mrg 	    tgt->list[i].offset = 0;
   1089   1.1.1.2  mrg 	  continue;
   1090   1.1.1.2  mrg 	}
   1091  1.1.1.11  mrg       else if ((kind & typemask) == GOMP_MAP_STRUCT
   1092  1.1.1.11  mrg 	       || (kind & typemask) == GOMP_MAP_STRUCT_UNORD)
   1093   1.1.1.2  mrg 	{
   1094   1.1.1.2  mrg 	  size_t first = i + 1;
   1095   1.1.1.2  mrg 	  size_t last = i + sizes[i];
   1096   1.1.1.2  mrg 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
   1097   1.1.1.2  mrg 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
   1098   1.1.1.2  mrg 			      + sizes[last];
   1099   1.1.1.2  mrg 	  tgt->list[i].key = NULL;
   1100   1.1.1.8  mrg 	  tgt->list[i].offset = OFFSET_STRUCT;
   1101   1.1.1.2  mrg 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
   1102   1.1.1.2  mrg 	  if (n == NULL)
   1103   1.1.1.2  mrg 	    {
   1104   1.1.1.2  mrg 	      size_t align = (size_t) 1 << (kind >> rshift);
   1105   1.1.1.2  mrg 	      if (tgt_align < align)
   1106   1.1.1.2  mrg 		tgt_align = align;
   1107   1.1.1.6  mrg 	      tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
   1108   1.1.1.2  mrg 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
   1109   1.1.1.6  mrg 	      tgt_size += cur_node.host_end - cur_node.host_start;
   1110   1.1.1.2  mrg 	      not_found_cnt += last - i;
   1111   1.1.1.2  mrg 	      for (i = first; i <= last; i++)
   1112   1.1.1.6  mrg 		{
   1113   1.1.1.6  mrg 		  tgt->list[i].key = NULL;
   1114  1.1.1.10  mrg 		  if (!aq
   1115  1.1.1.10  mrg 		      && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
   1116  1.1.1.11  mrg 						& typemask)
   1117  1.1.1.11  mrg 		      && sizes[i] != 0)
   1118   1.1.1.6  mrg 		    gomp_coalesce_buf_add (&cbuf,
   1119   1.1.1.6  mrg 					   tgt_size - cur_node.host_end
   1120   1.1.1.6  mrg 					   + (uintptr_t) hostaddrs[i],
   1121   1.1.1.6  mrg 					   sizes[i]);
   1122   1.1.1.6  mrg 		}
   1123   1.1.1.2  mrg 	      i--;
   1124   1.1.1.2  mrg 	      continue;
   1125   1.1.1.2  mrg 	    }
   1126   1.1.1.2  mrg 	  for (i = first; i <= last; i++)
   1127   1.1.1.8  mrg 	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
   1128  1.1.1.10  mrg 				      sizes, kinds, NULL, refcount_set);
   1129   1.1.1.2  mrg 	  i--;
   1130   1.1.1.2  mrg 	  continue;
   1131   1.1.1.2  mrg 	}
   1132   1.1.1.2  mrg       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
   1133       1.1  mrg 	{
   1134   1.1.1.2  mrg 	  tgt->list[i].key = NULL;
   1135   1.1.1.8  mrg 	  tgt->list[i].offset = OFFSET_POINTER;
   1136   1.1.1.8  mrg 	  has_firstprivate = true;
   1137   1.1.1.8  mrg 	  continue;
   1138   1.1.1.8  mrg 	}
   1139  1.1.1.10  mrg       else if ((kind & typemask) == GOMP_MAP_ATTACH
   1140  1.1.1.10  mrg 	       || ((kind & typemask)
   1141  1.1.1.10  mrg 		   == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
   1142   1.1.1.8  mrg 	{
   1143   1.1.1.8  mrg 	  tgt->list[i].key = NULL;
   1144   1.1.1.2  mrg 	  has_firstprivate = true;
   1145       1.1  mrg 	  continue;
   1146       1.1  mrg 	}
   1147       1.1  mrg       cur_node.host_start = (uintptr_t) hostaddrs[i];
   1148       1.1  mrg       if (!GOMP_MAP_POINTER_P (kind & typemask))
   1149       1.1  mrg 	cur_node.host_end = cur_node.host_start + sizes[i];
   1150       1.1  mrg       else
   1151       1.1  mrg 	cur_node.host_end = cur_node.host_start + sizeof (void *);
   1152   1.1.1.2  mrg       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
   1153   1.1.1.2  mrg 	{
   1154   1.1.1.2  mrg 	  tgt->list[i].key = NULL;
   1155   1.1.1.2  mrg 
   1156   1.1.1.2  mrg 	  size_t align = (size_t) 1 << (kind >> rshift);
   1157   1.1.1.2  mrg 	  if (tgt_align < align)
   1158   1.1.1.2  mrg 	    tgt_align = align;
   1159   1.1.1.2  mrg 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
   1160  1.1.1.10  mrg 	  if (!aq)
   1161  1.1.1.10  mrg 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
   1162  1.1.1.10  mrg 				   cur_node.host_end - cur_node.host_start);
   1163   1.1.1.2  mrg 	  tgt_size += cur_node.host_end - cur_node.host_start;
   1164   1.1.1.2  mrg 	  has_firstprivate = true;
   1165   1.1.1.2  mrg 	  continue;
   1166   1.1.1.2  mrg 	}
   1167   1.1.1.2  mrg       splay_tree_key n;
   1168   1.1.1.2  mrg       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
   1169       1.1  mrg 	{
   1170   1.1.1.2  mrg 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
   1171   1.1.1.2  mrg 	  if (!n)
   1172   1.1.1.2  mrg 	    {
   1173   1.1.1.2  mrg 	      tgt->list[i].key = NULL;
   1174  1.1.1.11  mrg 	      tgt->list[i].offset = OFFSET_INLINED;
   1175   1.1.1.2  mrg 	      continue;
   1176   1.1.1.2  mrg 	    }
   1177       1.1  mrg 	}
   1178       1.1  mrg       else
   1179   1.1.1.2  mrg 	n = splay_tree_lookup (mem_map, &cur_node);
   1180   1.1.1.2  mrg       if (n && n->refcount != REFCOUNT_LINK)
   1181  1.1.1.10  mrg 	{
   1182  1.1.1.10  mrg 	  int always_to_cnt = 0;
   1183  1.1.1.10  mrg 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
   1184  1.1.1.10  mrg 	    {
   1185  1.1.1.10  mrg 	      bool has_nullptr = false;
   1186  1.1.1.10  mrg 	      size_t j;
   1187  1.1.1.10  mrg 	      for (j = 0; j < n->tgt->list_count; j++)
   1188  1.1.1.10  mrg 		if (n->tgt->list[j].key == n)
   1189  1.1.1.10  mrg 		  {
   1190  1.1.1.10  mrg 		    has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
   1191  1.1.1.10  mrg 		    break;
   1192  1.1.1.10  mrg 		  }
   1193  1.1.1.10  mrg 	      if (n->tgt->list_count == 0)
   1194  1.1.1.10  mrg 		{
   1195  1.1.1.10  mrg 		  /* 'declare target'; assume has_nullptr; it could also be
   1196  1.1.1.10  mrg 		     statically assigned pointer, but that it should be to
   1197  1.1.1.10  mrg 		     the equivalent variable on the host.  */
   1198  1.1.1.10  mrg 		  assert (n->refcount == REFCOUNT_INFINITY);
   1199  1.1.1.10  mrg 		  has_nullptr = true;
   1200  1.1.1.10  mrg 		}
   1201  1.1.1.10  mrg 	      else
   1202  1.1.1.10  mrg 		assert (j < n->tgt->list_count);
   1203  1.1.1.10  mrg 	      /* Re-map the data if there is an 'always' modifier or if it a
   1204  1.1.1.10  mrg 		 null pointer was there and non a nonnull has been found; that
   1205  1.1.1.10  mrg 		 permits transparent re-mapping for Fortran array descriptors
   1206  1.1.1.10  mrg 		 which were previously mapped unallocated.  */
   1207  1.1.1.10  mrg 	      for (j = i + 1; j < mapnum; j++)
   1208  1.1.1.10  mrg 		{
   1209  1.1.1.10  mrg 		  int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
   1210  1.1.1.10  mrg 		  if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
   1211  1.1.1.10  mrg 		      && (!has_nullptr
   1212  1.1.1.10  mrg 			  || !GOMP_MAP_POINTER_P (ptr_kind)
   1213  1.1.1.10  mrg 			  || *(void **) hostaddrs[j] == NULL))
   1214  1.1.1.10  mrg 		    break;
   1215  1.1.1.10  mrg 		  else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
   1216  1.1.1.10  mrg 			   || ((uintptr_t) hostaddrs[j] + sizeof (void *)
   1217  1.1.1.10  mrg 			       > cur_node.host_end))
   1218  1.1.1.10  mrg 		    break;
   1219  1.1.1.10  mrg 		  else
   1220  1.1.1.10  mrg 		    {
   1221  1.1.1.10  mrg 		      has_always_ptrset = true;
   1222  1.1.1.10  mrg 		      ++always_to_cnt;
   1223  1.1.1.10  mrg 		    }
   1224  1.1.1.10  mrg 		}
   1225  1.1.1.10  mrg 	    }
   1226  1.1.1.10  mrg 	  gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
   1227  1.1.1.10  mrg 				  kind & typemask, always_to_cnt > 0, implicit,
   1228  1.1.1.10  mrg 				  NULL, refcount_set);
   1229  1.1.1.10  mrg 	  i += always_to_cnt;
   1230  1.1.1.10  mrg 	}
   1231   1.1.1.2  mrg       else
   1232       1.1  mrg 	{
   1233   1.1.1.2  mrg 	  tgt->list[i].key = NULL;
   1234       1.1  mrg 
   1235   1.1.1.8  mrg 	  if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
   1236   1.1.1.8  mrg 	    {
   1237   1.1.1.8  mrg 	      /* Not present, hence, skip entry - including its MAP_POINTER,
   1238   1.1.1.8  mrg 		 when existing.  */
   1239  1.1.1.11  mrg 	      tgt->list[i].offset = OFFSET_INLINED;
   1240   1.1.1.8  mrg 	      if (i + 1 < mapnum
   1241   1.1.1.8  mrg 		  && ((typemask & get_kind (short_mapkind, kinds, i + 1))
   1242   1.1.1.8  mrg 		      == GOMP_MAP_POINTER))
   1243   1.1.1.8  mrg 		{
   1244   1.1.1.8  mrg 		  ++i;
   1245   1.1.1.8  mrg 		  tgt->list[i].key = NULL;
   1246   1.1.1.8  mrg 		  tgt->list[i].offset = 0;
   1247   1.1.1.8  mrg 		}
   1248   1.1.1.8  mrg 	      continue;
   1249   1.1.1.8  mrg 	    }
   1250       1.1  mrg 	  size_t align = (size_t) 1 << (kind >> rshift);
   1251       1.1  mrg 	  not_found_cnt++;
   1252       1.1  mrg 	  if (tgt_align < align)
   1253       1.1  mrg 	    tgt_align = align;
   1254       1.1  mrg 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
   1255  1.1.1.10  mrg 	  if (!aq
   1256  1.1.1.10  mrg 	      && gomp_to_device_kind_p (kind & typemask))
   1257   1.1.1.6  mrg 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
   1258   1.1.1.6  mrg 				   cur_node.host_end - cur_node.host_start);
   1259       1.1  mrg 	  tgt_size += cur_node.host_end - cur_node.host_start;
   1260       1.1  mrg 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
   1261       1.1  mrg 	    {
   1262       1.1  mrg 	      size_t j;
   1263  1.1.1.10  mrg 	      int kind;
   1264       1.1  mrg 	      for (j = i + 1; j < mapnum; j++)
   1265  1.1.1.10  mrg 		if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
   1266  1.1.1.10  mrg 						  kinds, j)) & typemask))
   1267  1.1.1.10  mrg 		    && !GOMP_MAP_ALWAYS_POINTER_P (kind))
   1268       1.1  mrg 		  break;
   1269       1.1  mrg 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
   1270       1.1  mrg 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
   1271       1.1  mrg 			     > cur_node.host_end))
   1272       1.1  mrg 		  break;
   1273       1.1  mrg 		else
   1274       1.1  mrg 		  {
   1275   1.1.1.2  mrg 		    tgt->list[j].key = NULL;
   1276       1.1  mrg 		    i++;
   1277       1.1  mrg 		  }
   1278       1.1  mrg 	    }
   1279       1.1  mrg 	}
   1280       1.1  mrg     }
   1281       1.1  mrg 
   1282       1.1  mrg   if (devaddrs)
   1283       1.1  mrg     {
   1284       1.1  mrg       if (mapnum != 1)
   1285       1.1  mrg 	{
   1286       1.1  mrg 	  gomp_mutex_unlock (&devicep->lock);
   1287       1.1  mrg 	  gomp_fatal ("unexpected aggregation");
   1288       1.1  mrg 	}
   1289       1.1  mrg       tgt->to_free = devaddrs[0];
   1290       1.1  mrg       tgt->tgt_start = (uintptr_t) tgt->to_free;
   1291       1.1  mrg       tgt->tgt_end = tgt->tgt_start + sizes[0];
   1292       1.1  mrg     }
   1293  1.1.1.11  mrg   else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
   1294       1.1  mrg     {
   1295       1.1  mrg       /* Allocate tgt_align aligned tgt_size block of memory.  */
   1296       1.1  mrg       /* FIXME: Perhaps change interface to allocate properly aligned
   1297       1.1  mrg 	 memory.  */
   1298       1.1  mrg       tgt->to_free = devicep->alloc_func (devicep->target_id,
   1299       1.1  mrg 					  tgt_size + tgt_align - 1);
   1300   1.1.1.3  mrg       if (!tgt->to_free)
   1301   1.1.1.3  mrg 	{
   1302   1.1.1.3  mrg 	  gomp_mutex_unlock (&devicep->lock);
   1303   1.1.1.3  mrg 	  gomp_fatal ("device memory allocation fail");
   1304   1.1.1.3  mrg 	}
   1305   1.1.1.3  mrg 
   1306       1.1  mrg       tgt->tgt_start = (uintptr_t) tgt->to_free;
   1307       1.1  mrg       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
   1308       1.1  mrg       tgt->tgt_end = tgt->tgt_start + tgt_size;
   1309   1.1.1.6  mrg 
   1310   1.1.1.6  mrg       if (cbuf.use_cnt == 1)
   1311   1.1.1.6  mrg 	cbuf.chunk_cnt--;
   1312   1.1.1.6  mrg       if (cbuf.chunk_cnt > 0)
   1313   1.1.1.6  mrg 	{
   1314   1.1.1.6  mrg 	  cbuf.buf
   1315   1.1.1.7  mrg 	    = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
   1316   1.1.1.6  mrg 	  if (cbuf.buf)
   1317   1.1.1.6  mrg 	    {
   1318   1.1.1.6  mrg 	      cbuf.tgt = tgt;
   1319   1.1.1.6  mrg 	      cbufp = &cbuf;
   1320   1.1.1.6  mrg 	    }
   1321   1.1.1.6  mrg 	}
   1322       1.1  mrg     }
   1323       1.1  mrg   else
   1324       1.1  mrg     {
   1325       1.1  mrg       tgt->to_free = NULL;
   1326       1.1  mrg       tgt->tgt_start = 0;
   1327       1.1  mrg       tgt->tgt_end = 0;
   1328       1.1  mrg     }
   1329       1.1  mrg 
   1330       1.1  mrg   tgt_size = 0;
   1331  1.1.1.11  mrg   if (pragma_kind & GOMP_MAP_VARS_TARGET)
   1332       1.1  mrg     tgt_size = mapnum * sizeof (void *);
   1333       1.1  mrg 
   1334       1.1  mrg   tgt->array = NULL;
   1335  1.1.1.10  mrg   if (not_found_cnt || has_firstprivate || has_always_ptrset)
   1336       1.1  mrg     {
   1337   1.1.1.2  mrg       if (not_found_cnt)
   1338   1.1.1.2  mrg 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
   1339       1.1  mrg       splay_tree_node array = tgt->array;
   1340  1.1.1.10  mrg       size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
   1341   1.1.1.2  mrg       uintptr_t field_tgt_base = 0;
   1342  1.1.1.10  mrg       splay_tree_key field_tgt_structelem_first = NULL;
   1343       1.1  mrg 
   1344       1.1  mrg       for (i = 0; i < mapnum; i++)
   1345  1.1.1.10  mrg 	if (has_always_ptrset
   1346  1.1.1.10  mrg 	    && tgt->list[i].key
   1347  1.1.1.10  mrg 	    && (get_kind (short_mapkind, kinds, i) & typemask)
   1348  1.1.1.10  mrg 	       == GOMP_MAP_TO_PSET)
   1349  1.1.1.10  mrg 	  {
   1350  1.1.1.10  mrg 	    splay_tree_key k = tgt->list[i].key;
   1351  1.1.1.10  mrg 	    bool has_nullptr = false;
   1352  1.1.1.10  mrg 	    size_t j;
   1353  1.1.1.10  mrg 	    for (j = 0; j < k->tgt->list_count; j++)
   1354  1.1.1.10  mrg 	      if (k->tgt->list[j].key == k)
   1355  1.1.1.10  mrg 		{
   1356  1.1.1.10  mrg 		  has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
   1357  1.1.1.10  mrg 		  break;
   1358  1.1.1.10  mrg 		}
   1359  1.1.1.10  mrg 	    if (k->tgt->list_count == 0)
   1360  1.1.1.10  mrg 	      has_nullptr = true;
   1361  1.1.1.10  mrg 	    else
   1362  1.1.1.10  mrg 	      assert (j < k->tgt->list_count);
   1363  1.1.1.10  mrg 
   1364  1.1.1.10  mrg 	    tgt->list[i].has_null_ptr_assoc = false;
   1365  1.1.1.10  mrg 	    for (j = i + 1; j < mapnum; j++)
   1366  1.1.1.10  mrg 	      {
   1367  1.1.1.10  mrg 		int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
   1368  1.1.1.10  mrg 		if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
   1369  1.1.1.10  mrg 		    && (!has_nullptr
   1370  1.1.1.10  mrg 			|| !GOMP_MAP_POINTER_P (ptr_kind)
   1371  1.1.1.10  mrg 			|| *(void **) hostaddrs[j] == NULL))
   1372  1.1.1.10  mrg 		  break;
   1373  1.1.1.10  mrg 		else if ((uintptr_t) hostaddrs[j] < k->host_start
   1374  1.1.1.10  mrg 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
   1375  1.1.1.10  mrg 			     > k->host_end))
   1376  1.1.1.10  mrg 		  break;
   1377  1.1.1.10  mrg 		else
   1378  1.1.1.10  mrg 		  {
   1379  1.1.1.10  mrg 		    if (*(void **) hostaddrs[j] == NULL)
   1380  1.1.1.10  mrg 		      tgt->list[i].has_null_ptr_assoc = true;
   1381  1.1.1.10  mrg 		    tgt->list[j].key = k;
   1382  1.1.1.10  mrg 		    tgt->list[j].copy_from = false;
   1383  1.1.1.10  mrg 		    tgt->list[j].always_copy_from = false;
   1384  1.1.1.10  mrg 		    tgt->list[j].is_attach = false;
   1385  1.1.1.10  mrg 		    gomp_increment_refcount (k, refcount_set);
   1386  1.1.1.10  mrg 		    gomp_map_pointer (k->tgt, aq,
   1387  1.1.1.10  mrg 				      (uintptr_t) *(void **) hostaddrs[j],
   1388  1.1.1.10  mrg 				      k->tgt_offset + ((uintptr_t) hostaddrs[j]
   1389  1.1.1.10  mrg 						       - k->host_start),
   1390  1.1.1.10  mrg 				      sizes[j], cbufp, false);
   1391  1.1.1.10  mrg 		  }
   1392  1.1.1.10  mrg 	      }
   1393  1.1.1.10  mrg 	    i = j - 1;
   1394  1.1.1.10  mrg 	  }
   1395  1.1.1.10  mrg 	else if (tgt->list[i].key == NULL)
   1396       1.1  mrg 	  {
   1397   1.1.1.2  mrg 	    int kind = get_kind (short_mapkind, kinds, i);
   1398  1.1.1.10  mrg 	    bool implicit = get_implicit (short_mapkind, kinds, i);
   1399       1.1  mrg 	    if (hostaddrs[i] == NULL)
   1400       1.1  mrg 	      continue;
   1401   1.1.1.2  mrg 	    switch (kind & typemask)
   1402   1.1.1.2  mrg 	      {
   1403   1.1.1.2  mrg 		size_t align, len, first, last;
   1404   1.1.1.2  mrg 		splay_tree_key n;
   1405   1.1.1.2  mrg 	      case GOMP_MAP_FIRSTPRIVATE:
   1406   1.1.1.2  mrg 		align = (size_t) 1 << (kind >> rshift);
   1407   1.1.1.2  mrg 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
   1408   1.1.1.2  mrg 		tgt->list[i].offset = tgt_size;
   1409   1.1.1.2  mrg 		len = sizes[i];
   1410   1.1.1.8  mrg 		gomp_copy_host2dev (devicep, aq,
   1411   1.1.1.3  mrg 				    (void *) (tgt->tgt_start + tgt_size),
   1412  1.1.1.10  mrg 				    (void *) hostaddrs[i], len, false, cbufp);
   1413  1.1.1.11  mrg 		/* Save device address in hostaddr to permit latter availablity
   1414  1.1.1.11  mrg 		   when doing a deep-firstprivate with pointer attach.  */
   1415  1.1.1.11  mrg 		hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
   1416   1.1.1.2  mrg 		tgt_size += len;
   1417  1.1.1.11  mrg 
   1418  1.1.1.11  mrg 		/* If followed by GOMP_MAP_ATTACH, pointer assign this
   1419  1.1.1.11  mrg 		   firstprivate to hostaddrs[i+1], which is assumed to contain a
   1420  1.1.1.11  mrg 		   device address.  */
   1421  1.1.1.11  mrg 		if (i + 1 < mapnum
   1422  1.1.1.11  mrg 		    && (GOMP_MAP_ATTACH
   1423  1.1.1.11  mrg 			== (typemask & get_kind (short_mapkind, kinds, i+1))))
   1424  1.1.1.11  mrg 		  {
   1425  1.1.1.11  mrg 		    uintptr_t target = (uintptr_t) hostaddrs[i];
   1426  1.1.1.11  mrg 		    void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
   1427  1.1.1.11  mrg 		    /* Per
   1428  1.1.1.11  mrg 		       <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
   1429  1.1.1.11  mrg 		       "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
   1430  1.1.1.11  mrg 		       this probably needs revision for 'aq' usage.  */
   1431  1.1.1.11  mrg 		    assert (!aq);
   1432  1.1.1.11  mrg 		    gomp_copy_host2dev (devicep, aq, devptr, &target,
   1433  1.1.1.11  mrg 					sizeof (void *), false, cbufp);
   1434  1.1.1.11  mrg 		    ++i;
   1435  1.1.1.11  mrg 		  }
   1436   1.1.1.2  mrg 		continue;
   1437   1.1.1.2  mrg 	      case GOMP_MAP_FIRSTPRIVATE_INT:
   1438   1.1.1.2  mrg 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
   1439   1.1.1.2  mrg 		continue;
   1440   1.1.1.8  mrg 	      case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
   1441   1.1.1.8  mrg 		/* The OpenACC 'host_data' construct only allows 'use_device'
   1442   1.1.1.8  mrg 		   "mapping" clauses, so in the first loop, 'not_found_cnt'
   1443   1.1.1.8  mrg 		   must always have been zero, so all OpenACC 'use_device'
   1444   1.1.1.8  mrg 		   clauses have already been handled.  (We can only easily test
   1445   1.1.1.8  mrg 		   'use_device' with 'if_present' clause here.)  */
   1446   1.1.1.8  mrg 		assert (tgt->list[i].offset == OFFSET_INLINED);
   1447   1.1.1.8  mrg 		/* Nevertheless, FALLTHRU to the normal handling, to keep the
   1448   1.1.1.8  mrg 		   code conceptually simple, similar to the first loop.  */
   1449   1.1.1.8  mrg 	      case GOMP_MAP_USE_DEVICE_PTR:
   1450   1.1.1.8  mrg 		if (tgt->list[i].offset == 0)
   1451   1.1.1.8  mrg 		  {
   1452   1.1.1.8  mrg 		    cur_node.host_start = (uintptr_t) hostaddrs[i];
   1453   1.1.1.8  mrg 		    cur_node.host_end = cur_node.host_start;
   1454   1.1.1.8  mrg 		    n = gomp_map_lookup (mem_map, &cur_node);
   1455   1.1.1.8  mrg 		    if (n != NULL)
   1456   1.1.1.8  mrg 		      {
   1457   1.1.1.8  mrg 			cur_node.host_start -= n->host_start;
   1458   1.1.1.8  mrg 			hostaddrs[i]
   1459   1.1.1.8  mrg 			  = (void *) (n->tgt->tgt_start + n->tgt_offset
   1460   1.1.1.8  mrg 				      + cur_node.host_start);
   1461   1.1.1.8  mrg 		      }
   1462   1.1.1.8  mrg 		    else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
   1463   1.1.1.8  mrg 		      {
   1464   1.1.1.8  mrg 			gomp_mutex_unlock (&devicep->lock);
   1465   1.1.1.8  mrg 			gomp_fatal ("use_device_ptr pointer wasn't mapped");
   1466   1.1.1.8  mrg 		      }
   1467   1.1.1.8  mrg 		    else if ((kind & typemask)
   1468   1.1.1.8  mrg 			     == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
   1469   1.1.1.8  mrg 		      /* If not present, continue using the host address.  */
   1470   1.1.1.8  mrg 		      ;
   1471   1.1.1.8  mrg 		    else
   1472   1.1.1.8  mrg 		      __builtin_unreachable ();
   1473   1.1.1.8  mrg 		    tgt->list[i].offset = OFFSET_INLINED;
   1474   1.1.1.8  mrg 		  }
   1475   1.1.1.8  mrg 		continue;
   1476  1.1.1.11  mrg 	      case GOMP_MAP_STRUCT_UNORD:
   1477  1.1.1.11  mrg 		if (sizes[i] > 1)
   1478  1.1.1.11  mrg 		  {
   1479  1.1.1.11  mrg 		    void *first = hostaddrs[i + 1];
   1480  1.1.1.11  mrg 		    for (size_t j = i + 1; j < i + sizes[i]; j++)
   1481  1.1.1.11  mrg 		      if (hostaddrs[j + 1] != first)
   1482  1.1.1.11  mrg 			{
   1483  1.1.1.11  mrg 			  gomp_mutex_unlock (&devicep->lock);
   1484  1.1.1.11  mrg 			  gomp_fatal ("Mapped array elements must be the "
   1485  1.1.1.11  mrg 				      "same (%p vs %p)", first,
   1486  1.1.1.11  mrg 				      hostaddrs[j + 1]);
   1487  1.1.1.11  mrg 			}
   1488  1.1.1.11  mrg 		  }
   1489  1.1.1.11  mrg 		/* Fallthrough.  */
   1490   1.1.1.2  mrg 	      case GOMP_MAP_STRUCT:
   1491   1.1.1.2  mrg 		first = i + 1;
   1492   1.1.1.2  mrg 		last = i + sizes[i];
   1493   1.1.1.2  mrg 		cur_node.host_start = (uintptr_t) hostaddrs[i];
   1494   1.1.1.2  mrg 		cur_node.host_end = (uintptr_t) hostaddrs[last]
   1495   1.1.1.2  mrg 				    + sizes[last];
   1496   1.1.1.2  mrg 		if (tgt->list[first].key != NULL)
   1497   1.1.1.2  mrg 		  continue;
   1498  1.1.1.11  mrg 		if (sizes[last] == 0)
   1499  1.1.1.11  mrg 		  cur_node.host_end++;
   1500   1.1.1.2  mrg 		n = splay_tree_lookup (mem_map, &cur_node);
   1501  1.1.1.11  mrg 		if (sizes[last] == 0)
   1502  1.1.1.11  mrg 		  cur_node.host_end--;
   1503  1.1.1.11  mrg 		if (n == NULL && cur_node.host_start == cur_node.host_end)
   1504  1.1.1.11  mrg 		  {
   1505  1.1.1.11  mrg 		    gomp_mutex_unlock (&devicep->lock);
   1506  1.1.1.11  mrg 		    gomp_fatal ("Struct pointer member not mapped (%p)",
   1507  1.1.1.11  mrg 				(void*) hostaddrs[first]);
   1508  1.1.1.11  mrg 		  }
   1509   1.1.1.2  mrg 		if (n == NULL)
   1510   1.1.1.2  mrg 		  {
   1511   1.1.1.2  mrg 		    size_t align = (size_t) 1 << (kind >> rshift);
   1512   1.1.1.2  mrg 		    tgt_size -= (uintptr_t) hostaddrs[first]
   1513   1.1.1.2  mrg 				- (uintptr_t) hostaddrs[i];
   1514   1.1.1.2  mrg 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
   1515   1.1.1.2  mrg 		    tgt_size += (uintptr_t) hostaddrs[first]
   1516   1.1.1.2  mrg 				- (uintptr_t) hostaddrs[i];
   1517   1.1.1.2  mrg 		    field_tgt_base = (uintptr_t) hostaddrs[first];
   1518   1.1.1.2  mrg 		    field_tgt_offset = tgt_size;
   1519   1.1.1.2  mrg 		    field_tgt_clear = last;
   1520  1.1.1.10  mrg 		    field_tgt_structelem_first = NULL;
   1521   1.1.1.2  mrg 		    tgt_size += cur_node.host_end
   1522   1.1.1.2  mrg 				- (uintptr_t) hostaddrs[first];
   1523   1.1.1.2  mrg 		    continue;
   1524   1.1.1.2  mrg 		  }
   1525   1.1.1.2  mrg 		for (i = first; i <= last; i++)
   1526   1.1.1.8  mrg 		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
   1527  1.1.1.10  mrg 					    sizes, kinds, cbufp, refcount_set);
   1528   1.1.1.2  mrg 		i--;
   1529   1.1.1.2  mrg 		continue;
   1530   1.1.1.2  mrg 	      case GOMP_MAP_ALWAYS_POINTER:
   1531   1.1.1.2  mrg 		cur_node.host_start = (uintptr_t) hostaddrs[i];
   1532   1.1.1.2  mrg 		cur_node.host_end = cur_node.host_start + sizeof (void *);
   1533   1.1.1.2  mrg 		n = splay_tree_lookup (mem_map, &cur_node);
   1534   1.1.1.2  mrg 		if (n == NULL
   1535   1.1.1.2  mrg 		    || n->host_start > cur_node.host_start
   1536   1.1.1.2  mrg 		    || n->host_end < cur_node.host_end)
   1537   1.1.1.2  mrg 		  {
   1538   1.1.1.2  mrg 		    gomp_mutex_unlock (&devicep->lock);
   1539   1.1.1.2  mrg 		    gomp_fatal ("always pointer not mapped");
   1540   1.1.1.2  mrg 		  }
   1541  1.1.1.11  mrg 		if (i > 0
   1542  1.1.1.11  mrg 		    && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
   1543  1.1.1.11  mrg 			!= GOMP_MAP_ALWAYS_POINTER))
   1544   1.1.1.2  mrg 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
   1545   1.1.1.2  mrg 		if (cur_node.tgt_offset)
   1546   1.1.1.2  mrg 		  cur_node.tgt_offset -= sizes[i];
   1547   1.1.1.8  mrg 		gomp_copy_host2dev (devicep, aq,
   1548   1.1.1.3  mrg 				    (void *) (n->tgt->tgt_start
   1549   1.1.1.3  mrg 					      + n->tgt_offset
   1550   1.1.1.3  mrg 					      + cur_node.host_start
   1551   1.1.1.3  mrg 					      - n->host_start),
   1552   1.1.1.3  mrg 				    (void *) &cur_node.tgt_offset,
   1553  1.1.1.10  mrg 				    sizeof (void *), true, cbufp);
   1554   1.1.1.2  mrg 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
   1555   1.1.1.2  mrg 				      + cur_node.host_start - n->host_start;
   1556   1.1.1.2  mrg 		continue;
   1557   1.1.1.8  mrg 	      case GOMP_MAP_IF_PRESENT:
   1558   1.1.1.8  mrg 		/* Not present - otherwise handled above. Skip over its
   1559   1.1.1.8  mrg 		   MAP_POINTER as well.  */
   1560   1.1.1.8  mrg 		if (i + 1 < mapnum
   1561   1.1.1.8  mrg 		    && ((typemask & get_kind (short_mapkind, kinds, i + 1))
   1562   1.1.1.8  mrg 			== GOMP_MAP_POINTER))
   1563   1.1.1.8  mrg 		  ++i;
   1564   1.1.1.8  mrg 		continue;
   1565   1.1.1.8  mrg 	      case GOMP_MAP_ATTACH:
   1566  1.1.1.10  mrg 	      case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
   1567   1.1.1.8  mrg 		{
   1568   1.1.1.8  mrg 		  cur_node.host_start = (uintptr_t) hostaddrs[i];
   1569   1.1.1.8  mrg 		  cur_node.host_end = cur_node.host_start + sizeof (void *);
   1570   1.1.1.8  mrg 		  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
   1571   1.1.1.8  mrg 		  if (n != NULL)
   1572   1.1.1.8  mrg 		    {
   1573   1.1.1.8  mrg 		      tgt->list[i].key = n;
   1574   1.1.1.8  mrg 		      tgt->list[i].offset = cur_node.host_start - n->host_start;
   1575   1.1.1.8  mrg 		      tgt->list[i].length = n->host_end - n->host_start;
   1576   1.1.1.8  mrg 		      tgt->list[i].copy_from = false;
   1577   1.1.1.8  mrg 		      tgt->list[i].always_copy_from = false;
   1578   1.1.1.8  mrg 		      tgt->list[i].is_attach = true;
   1579   1.1.1.8  mrg 		      /* OpenACC 'attach'/'detach' doesn't affect
   1580   1.1.1.8  mrg 			 structured/dynamic reference counts ('n->refcount',
   1581   1.1.1.8  mrg 			 'n->dynamic_refcount').  */
   1582  1.1.1.10  mrg 
   1583  1.1.1.10  mrg 		      bool zlas
   1584  1.1.1.10  mrg 			= ((kind & typemask)
   1585  1.1.1.10  mrg 			   == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
   1586  1.1.1.10  mrg 		      gomp_attach_pointer (devicep, aq, mem_map, n,
   1587  1.1.1.10  mrg 					   (uintptr_t) hostaddrs[i], sizes[i],
   1588  1.1.1.10  mrg 					   cbufp, zlas);
   1589   1.1.1.8  mrg 		    }
   1590  1.1.1.10  mrg 		  else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
   1591   1.1.1.8  mrg 		    {
   1592   1.1.1.8  mrg 		      gomp_mutex_unlock (&devicep->lock);
   1593   1.1.1.8  mrg 		      gomp_fatal ("outer struct not mapped for attach");
   1594   1.1.1.8  mrg 		    }
   1595   1.1.1.8  mrg 		  continue;
   1596   1.1.1.8  mrg 		}
   1597   1.1.1.2  mrg 	      default:
   1598   1.1.1.2  mrg 		break;
   1599   1.1.1.2  mrg 	      }
   1600       1.1  mrg 	    splay_tree_key k = &array->key;
   1601       1.1  mrg 	    k->host_start = (uintptr_t) hostaddrs[i];
   1602       1.1  mrg 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
   1603       1.1  mrg 	      k->host_end = k->host_start + sizes[i];
   1604       1.1  mrg 	    else
   1605       1.1  mrg 	      k->host_end = k->host_start + sizeof (void *);
   1606       1.1  mrg 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
   1607   1.1.1.2  mrg 	    if (n && n->refcount != REFCOUNT_LINK)
   1608  1.1.1.11  mrg 	      {
   1609  1.1.1.11  mrg 		if (field_tgt_clear != FIELD_TGT_EMPTY)
   1610  1.1.1.11  mrg 		  {
   1611  1.1.1.11  mrg 		    /* For this condition to be true, there must be a
   1612  1.1.1.11  mrg 		       duplicate struct element mapping.  This can happen with
   1613  1.1.1.11  mrg 		       GOMP_MAP_STRUCT_UNORD mappings, for example.  */
   1614  1.1.1.11  mrg 		    tgt->list[i].key = n;
   1615  1.1.1.11  mrg 		    if (openmp_p)
   1616  1.1.1.11  mrg 		      {
   1617  1.1.1.11  mrg 			assert ((n->refcount & REFCOUNT_STRUCTELEM) != 0);
   1618  1.1.1.11  mrg 			assert (field_tgt_structelem_first != NULL);
   1619  1.1.1.11  mrg 
   1620  1.1.1.11  mrg 			if (i == field_tgt_clear)
   1621  1.1.1.11  mrg 			  {
   1622  1.1.1.11  mrg 			    n->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
   1623  1.1.1.11  mrg 			    field_tgt_structelem_first = NULL;
   1624  1.1.1.11  mrg 			  }
   1625  1.1.1.11  mrg 		      }
   1626  1.1.1.11  mrg 		    if (i == field_tgt_clear)
   1627  1.1.1.11  mrg 		      field_tgt_clear = FIELD_TGT_EMPTY;
   1628  1.1.1.11  mrg 		    gomp_increment_refcount (n, refcount_set);
   1629  1.1.1.11  mrg 		    tgt->list[i].copy_from
   1630  1.1.1.11  mrg 		      = GOMP_MAP_COPY_FROM_P (kind & typemask);
   1631  1.1.1.11  mrg 		    tgt->list[i].always_copy_from
   1632  1.1.1.11  mrg 		      = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
   1633  1.1.1.11  mrg 		    tgt->list[i].is_attach = false;
   1634  1.1.1.11  mrg 		    tgt->list[i].offset = 0;
   1635  1.1.1.11  mrg 		    tgt->list[i].length = k->host_end - k->host_start;
   1636  1.1.1.11  mrg 		  }
   1637  1.1.1.11  mrg 		else
   1638  1.1.1.11  mrg 		  gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
   1639  1.1.1.11  mrg 					  kind & typemask, false, implicit,
   1640  1.1.1.11  mrg 					  cbufp, refcount_set);
   1641  1.1.1.11  mrg 	      }
   1642       1.1  mrg 	    else
   1643       1.1  mrg 	      {
   1644   1.1.1.8  mrg 		k->aux = NULL;
   1645   1.1.1.2  mrg 		if (n && n->refcount == REFCOUNT_LINK)
   1646   1.1.1.2  mrg 		  {
   1647   1.1.1.2  mrg 		    /* Replace target address of the pointer with target address
   1648   1.1.1.2  mrg 		       of mapped object in the splay tree.  */
   1649   1.1.1.2  mrg 		    splay_tree_remove (mem_map, n);
   1650   1.1.1.8  mrg 		    k->aux
   1651   1.1.1.8  mrg 		      = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
   1652   1.1.1.8  mrg 		    k->aux->link_key = n;
   1653   1.1.1.2  mrg 		  }
   1654       1.1  mrg 		size_t align = (size_t) 1 << (kind >> rshift);
   1655   1.1.1.2  mrg 		tgt->list[i].key = k;
   1656       1.1  mrg 		k->tgt = tgt;
   1657  1.1.1.10  mrg 		k->refcount = 0;
   1658  1.1.1.10  mrg 		k->dynamic_refcount = 0;
   1659   1.1.1.8  mrg 		if (field_tgt_clear != FIELD_TGT_EMPTY)
   1660   1.1.1.2  mrg 		  {
   1661   1.1.1.2  mrg 		    k->tgt_offset = k->host_start - field_tgt_base
   1662   1.1.1.2  mrg 				    + field_tgt_offset;
   1663  1.1.1.10  mrg 		    if (openmp_p)
   1664  1.1.1.10  mrg 		      {
   1665  1.1.1.10  mrg 			k->refcount = REFCOUNT_STRUCTELEM;
   1666  1.1.1.10  mrg 			if (field_tgt_structelem_first == NULL)
   1667  1.1.1.10  mrg 			  {
   1668  1.1.1.10  mrg 			    /* Set to first structure element of sequence.  */
   1669  1.1.1.10  mrg 			    k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
   1670  1.1.1.10  mrg 			    field_tgt_structelem_first = k;
   1671  1.1.1.10  mrg 			  }
   1672  1.1.1.10  mrg 			else
   1673  1.1.1.10  mrg 			  /* Point to refcount of leading element, but do not
   1674  1.1.1.10  mrg 			     increment again.  */
   1675  1.1.1.10  mrg 			  k->structelem_refcount_ptr
   1676  1.1.1.10  mrg 			    = &field_tgt_structelem_first->structelem_refcount;
   1677  1.1.1.10  mrg 
   1678  1.1.1.10  mrg 			if (i == field_tgt_clear)
   1679  1.1.1.10  mrg 			  {
   1680  1.1.1.10  mrg 			    k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
   1681  1.1.1.10  mrg 			    field_tgt_structelem_first = NULL;
   1682  1.1.1.10  mrg 			  }
   1683  1.1.1.10  mrg 		      }
   1684   1.1.1.2  mrg 		    if (i == field_tgt_clear)
   1685   1.1.1.8  mrg 		      field_tgt_clear = FIELD_TGT_EMPTY;
   1686   1.1.1.2  mrg 		  }
   1687   1.1.1.2  mrg 		else
   1688   1.1.1.2  mrg 		  {
   1689   1.1.1.2  mrg 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
   1690   1.1.1.2  mrg 		    k->tgt_offset = tgt_size;
   1691   1.1.1.2  mrg 		    tgt_size += k->host_end - k->host_start;
   1692   1.1.1.2  mrg 		  }
   1693  1.1.1.10  mrg 		/* First increment, from 0 to 1. gomp_increment_refcount
   1694  1.1.1.10  mrg 		   encapsulates the different increment cases, so use this
   1695  1.1.1.10  mrg 		   instead of directly setting 1 during initialization.  */
   1696  1.1.1.10  mrg 		gomp_increment_refcount (k, refcount_set);
   1697  1.1.1.10  mrg 
   1698   1.1.1.2  mrg 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
   1699   1.1.1.2  mrg 		tgt->list[i].always_copy_from
   1700   1.1.1.2  mrg 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
   1701   1.1.1.8  mrg 		tgt->list[i].is_attach = false;
   1702   1.1.1.2  mrg 		tgt->list[i].offset = 0;
   1703   1.1.1.2  mrg 		tgt->list[i].length = k->host_end - k->host_start;
   1704       1.1  mrg 		tgt->refcount++;
   1705       1.1  mrg 		array->left = NULL;
   1706       1.1  mrg 		array->right = NULL;
   1707       1.1  mrg 		splay_tree_insert (mem_map, array);
   1708       1.1  mrg 		switch (kind & typemask)
   1709       1.1  mrg 		  {
   1710       1.1  mrg 		  case GOMP_MAP_ALLOC:
   1711       1.1  mrg 		  case GOMP_MAP_FROM:
   1712       1.1  mrg 		  case GOMP_MAP_FORCE_ALLOC:
   1713       1.1  mrg 		  case GOMP_MAP_FORCE_FROM:
   1714   1.1.1.2  mrg 		  case GOMP_MAP_ALWAYS_FROM:
   1715       1.1  mrg 		    break;
   1716       1.1  mrg 		  case GOMP_MAP_TO:
   1717       1.1  mrg 		  case GOMP_MAP_TOFROM:
   1718       1.1  mrg 		  case GOMP_MAP_FORCE_TO:
   1719       1.1  mrg 		  case GOMP_MAP_FORCE_TOFROM:
   1720   1.1.1.2  mrg 		  case GOMP_MAP_ALWAYS_TO:
   1721   1.1.1.2  mrg 		  case GOMP_MAP_ALWAYS_TOFROM:
   1722   1.1.1.8  mrg 		    gomp_copy_host2dev (devicep, aq,
   1723   1.1.1.3  mrg 					(void *) (tgt->tgt_start
   1724   1.1.1.3  mrg 						  + k->tgt_offset),
   1725   1.1.1.3  mrg 					(void *) k->host_start,
   1726  1.1.1.10  mrg 					k->host_end - k->host_start,
   1727  1.1.1.10  mrg 					false, cbufp);
   1728       1.1  mrg 		    break;
   1729       1.1  mrg 		  case GOMP_MAP_POINTER:
   1730  1.1.1.10  mrg 		  case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
   1731  1.1.1.10  mrg 		    gomp_map_pointer
   1732  1.1.1.10  mrg 		      (tgt, aq, (uintptr_t) *(void **) k->host_start,
   1733  1.1.1.10  mrg 		       k->tgt_offset, sizes[i], cbufp,
   1734  1.1.1.10  mrg 		       ((kind & typemask)
   1735  1.1.1.10  mrg 			== GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
   1736       1.1  mrg 		    break;
   1737       1.1  mrg 		  case GOMP_MAP_TO_PSET:
   1738   1.1.1.8  mrg 		    gomp_copy_host2dev (devicep, aq,
   1739   1.1.1.3  mrg 					(void *) (tgt->tgt_start
   1740   1.1.1.3  mrg 						  + k->tgt_offset),
   1741   1.1.1.3  mrg 					(void *) k->host_start,
   1742  1.1.1.10  mrg 					k->host_end - k->host_start,
   1743  1.1.1.10  mrg 					false, cbufp);
   1744  1.1.1.10  mrg 		    tgt->list[i].has_null_ptr_assoc = false;
   1745       1.1  mrg 
   1746       1.1  mrg 		    for (j = i + 1; j < mapnum; j++)
   1747  1.1.1.10  mrg 		      {
   1748  1.1.1.10  mrg 			int ptr_kind = (get_kind (short_mapkind, kinds, j)
   1749  1.1.1.10  mrg 					& typemask);
   1750  1.1.1.10  mrg 			if (!GOMP_MAP_POINTER_P (ptr_kind)
   1751  1.1.1.10  mrg 			    && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
   1752  1.1.1.10  mrg 			  break;
   1753  1.1.1.10  mrg 			else if ((uintptr_t) hostaddrs[j] < k->host_start
   1754  1.1.1.10  mrg 				 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
   1755  1.1.1.10  mrg 				     > k->host_end))
   1756  1.1.1.10  mrg 			  break;
   1757  1.1.1.10  mrg 			else
   1758  1.1.1.10  mrg 			  {
   1759  1.1.1.10  mrg 			    tgt->list[j].key = k;
   1760  1.1.1.10  mrg 			    tgt->list[j].copy_from = false;
   1761  1.1.1.10  mrg 			    tgt->list[j].always_copy_from = false;
   1762  1.1.1.10  mrg 			    tgt->list[j].is_attach = false;
   1763  1.1.1.10  mrg 			    tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
   1764  1.1.1.10  mrg 			    /* For OpenMP, the use of refcount_sets causes
   1765  1.1.1.10  mrg 			       errors if we set k->refcount = 1 above but also
   1766  1.1.1.10  mrg 			       increment it again here, for decrementing will
   1767  1.1.1.10  mrg 			       not properly match, since we decrement only once
   1768  1.1.1.10  mrg 			       for each key's refcount. Therefore avoid this
   1769  1.1.1.10  mrg 			       increment for OpenMP constructs.  */
   1770  1.1.1.10  mrg 			    if (!openmp_p)
   1771  1.1.1.10  mrg 			      gomp_increment_refcount (k, refcount_set);
   1772  1.1.1.10  mrg 			    gomp_map_pointer (tgt, aq,
   1773  1.1.1.10  mrg 					      (uintptr_t) *(void **) hostaddrs[j],
   1774  1.1.1.10  mrg 					      k->tgt_offset
   1775  1.1.1.10  mrg 					      + ((uintptr_t) hostaddrs[j]
   1776  1.1.1.10  mrg 						 - k->host_start),
   1777  1.1.1.10  mrg 					      sizes[j], cbufp, false);
   1778  1.1.1.10  mrg 			  }
   1779  1.1.1.11  mrg 		      }
   1780  1.1.1.10  mrg 		    i = j - 1;
   1781       1.1  mrg 		    break;
   1782       1.1  mrg 		  case GOMP_MAP_FORCE_PRESENT:
   1783  1.1.1.11  mrg 		  case GOMP_MAP_ALWAYS_PRESENT_TO:
   1784  1.1.1.11  mrg 		  case GOMP_MAP_ALWAYS_PRESENT_FROM:
   1785  1.1.1.11  mrg 		  case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
   1786       1.1  mrg 		    {
   1787       1.1  mrg 		      /* We already looked up the memory region above and it
   1788       1.1  mrg 			 was missing.  */
   1789       1.1  mrg 		      size_t size = k->host_end - k->host_start;
   1790       1.1  mrg 		      gomp_mutex_unlock (&devicep->lock);
   1791       1.1  mrg #ifdef HAVE_INTTYPES_H
   1792  1.1.1.11  mrg 		      gomp_fatal ("present clause: not present on the device "
   1793  1.1.1.11  mrg 				  "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
   1794  1.1.1.11  mrg 				  "dev: %d)", (void *) k->host_start,
   1795  1.1.1.11  mrg 				  (uint64_t) size, (uint64_t) size,
   1796  1.1.1.11  mrg 				  devicep->target_id);
   1797       1.1  mrg #else
   1798  1.1.1.11  mrg 		      gomp_fatal ("present clause: not present on the device "
   1799  1.1.1.11  mrg 				  "(addr: %p, size: %lu (0x%lx), dev: %d)",
   1800  1.1.1.11  mrg 				  (void *) k->host_start,
   1801  1.1.1.11  mrg 				  (unsigned long) size, (unsigned long) size,
   1802  1.1.1.11  mrg 				  devicep->target_id);
   1803       1.1  mrg #endif
   1804       1.1  mrg 		    }
   1805       1.1  mrg 		    break;
   1806       1.1  mrg 		  case GOMP_MAP_FORCE_DEVICEPTR:
   1807       1.1  mrg 		    assert (k->host_end - k->host_start == sizeof (void *));
   1808   1.1.1.8  mrg 		    gomp_copy_host2dev (devicep, aq,
   1809   1.1.1.3  mrg 					(void *) (tgt->tgt_start
   1810   1.1.1.3  mrg 						  + k->tgt_offset),
   1811   1.1.1.3  mrg 					(void *) k->host_start,
   1812  1.1.1.10  mrg 					sizeof (void *), false, cbufp);
   1813       1.1  mrg 		    break;
   1814       1.1  mrg 		  default:
   1815       1.1  mrg 		    gomp_mutex_unlock (&devicep->lock);
   1816       1.1  mrg 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
   1817       1.1  mrg 				kind);
   1818       1.1  mrg 		  }
   1819   1.1.1.2  mrg 
   1820   1.1.1.8  mrg 		if (k->aux && k->aux->link_key)
   1821   1.1.1.2  mrg 		  {
   1822   1.1.1.2  mrg 		    /* Set link pointer on target to the device address of the
   1823   1.1.1.2  mrg 		       mapped object.  */
   1824   1.1.1.2  mrg 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
   1825   1.1.1.7  mrg 		    /* We intentionally do not use coalescing here, as it's not
   1826   1.1.1.7  mrg 		       data allocated by the current call to this function.  */
   1827   1.1.1.8  mrg 		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
   1828  1.1.1.10  mrg 					&tgt_addr, sizeof (void *), true, NULL);
   1829   1.1.1.2  mrg 		  }
   1830       1.1  mrg 		array++;
   1831       1.1  mrg 	      }
   1832       1.1  mrg 	  }
   1833       1.1  mrg     }
   1834       1.1  mrg 
   1835  1.1.1.11  mrg   if (pragma_kind & GOMP_MAP_VARS_TARGET)
   1836       1.1  mrg     {
   1837       1.1  mrg       for (i = 0; i < mapnum; i++)
   1838       1.1  mrg 	{
   1839   1.1.1.2  mrg 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
   1840   1.1.1.8  mrg 	  gomp_copy_host2dev (devicep, aq,
   1841   1.1.1.3  mrg 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
   1842   1.1.1.6  mrg 			      (void *) &cur_node.tgt_offset, sizeof (void *),
   1843  1.1.1.10  mrg 			      true, cbufp);
   1844       1.1  mrg 	}
   1845       1.1  mrg     }
   1846       1.1  mrg 
   1847   1.1.1.6  mrg   if (cbufp)
   1848   1.1.1.6  mrg     {
   1849   1.1.1.6  mrg       long c = 0;
   1850   1.1.1.6  mrg       for (c = 0; c < cbuf.chunk_cnt; ++c)
   1851   1.1.1.8  mrg 	gomp_copy_host2dev (devicep, aq,
   1852   1.1.1.7  mrg 			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
   1853   1.1.1.7  mrg 			    (char *) cbuf.buf + (cbuf.chunks[c].start
   1854   1.1.1.7  mrg 						 - cbuf.chunks[0].start),
   1855  1.1.1.10  mrg 			    cbuf.chunks[c].end - cbuf.chunks[c].start,
   1856  1.1.1.11  mrg 			    false, NULL);
   1857  1.1.1.11  mrg       if (aq)
   1858  1.1.1.11  mrg 	/* Free once the transfer has completed.  */
   1859  1.1.1.11  mrg 	devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
   1860  1.1.1.11  mrg       else
   1861  1.1.1.11  mrg 	free (cbuf.buf);
   1862   1.1.1.7  mrg       cbuf.buf = NULL;
   1863   1.1.1.7  mrg       cbufp = NULL;
   1864   1.1.1.6  mrg     }
   1865   1.1.1.6  mrg 
   1866   1.1.1.2  mrg   /* If the variable from "omp target enter data" map-list was already mapped,
   1867   1.1.1.2  mrg      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
   1868   1.1.1.2  mrg      gomp_exit_data.  */
   1869  1.1.1.10  mrg   if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
   1870   1.1.1.2  mrg     {
   1871   1.1.1.2  mrg       free (tgt);
   1872   1.1.1.2  mrg       tgt = NULL;
   1873   1.1.1.2  mrg     }
   1874   1.1.1.2  mrg 
   1875       1.1  mrg   gomp_mutex_unlock (&devicep->lock);
   1876       1.1  mrg   return tgt;
   1877       1.1  mrg }
   1878       1.1  mrg 
   1879  1.1.1.10  mrg static struct target_mem_desc *
   1880   1.1.1.8  mrg gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
   1881   1.1.1.8  mrg 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
   1882  1.1.1.10  mrg 	       bool short_mapkind, htab_t *refcount_set,
   1883  1.1.1.10  mrg 	       enum gomp_map_vars_kind pragma_kind)
   1884   1.1.1.8  mrg {
   1885  1.1.1.10  mrg   /* This management of a local refcount_set is for convenience of callers
   1886  1.1.1.10  mrg      who do not share a refcount_set over multiple map/unmap uses.  */
   1887  1.1.1.10  mrg   htab_t local_refcount_set = NULL;
   1888  1.1.1.10  mrg   if (refcount_set == NULL)
   1889  1.1.1.10  mrg     {
   1890  1.1.1.10  mrg       local_refcount_set = htab_create (mapnum);
   1891  1.1.1.10  mrg       refcount_set = &local_refcount_set;
   1892  1.1.1.10  mrg     }
   1893  1.1.1.10  mrg 
   1894  1.1.1.10  mrg   struct target_mem_desc *tgt;
   1895  1.1.1.10  mrg   tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
   1896  1.1.1.10  mrg 				sizes, kinds, short_mapkind, refcount_set,
   1897  1.1.1.10  mrg 				pragma_kind);
   1898  1.1.1.10  mrg   if (local_refcount_set)
   1899  1.1.1.10  mrg     htab_free (local_refcount_set);
   1900  1.1.1.10  mrg 
   1901  1.1.1.10  mrg   return tgt;
   1902   1.1.1.8  mrg }
   1903   1.1.1.8  mrg 
   1904   1.1.1.8  mrg attribute_hidden struct target_mem_desc *
   1905  1.1.1.10  mrg goacc_map_vars (struct gomp_device_descr *devicep,
   1906  1.1.1.10  mrg 		struct goacc_asyncqueue *aq, size_t mapnum,
   1907  1.1.1.10  mrg 		void **hostaddrs, void **devaddrs, size_t *sizes,
   1908  1.1.1.10  mrg 		void *kinds, bool short_mapkind,
   1909  1.1.1.10  mrg 		enum gomp_map_vars_kind pragma_kind)
   1910   1.1.1.8  mrg {
   1911   1.1.1.8  mrg   return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
   1912  1.1.1.10  mrg 				 sizes, kinds, short_mapkind, NULL,
   1913  1.1.1.10  mrg 				 GOMP_MAP_VARS_OPENACC | pragma_kind);
   1914   1.1.1.8  mrg }
   1915   1.1.1.8  mrg 
   1916       1.1  mrg static void
   1917       1.1  mrg gomp_unmap_tgt (struct target_mem_desc *tgt)
   1918       1.1  mrg {
   1919       1.1  mrg   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
   1920       1.1  mrg   if (tgt->tgt_end)
   1921   1.1.1.3  mrg     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
   1922       1.1  mrg 
   1923       1.1  mrg   free (tgt->array);
   1924       1.1  mrg   free (tgt);
   1925       1.1  mrg }
   1926       1.1  mrg 
   1927   1.1.1.8  mrg static bool
   1928   1.1.1.8  mrg gomp_unref_tgt (void *ptr)
   1929   1.1.1.7  mrg {
   1930   1.1.1.7  mrg   bool is_tgt_unmapped = false;
   1931   1.1.1.8  mrg 
   1932   1.1.1.8  mrg   struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
   1933   1.1.1.8  mrg 
   1934   1.1.1.8  mrg   if (tgt->refcount > 1)
   1935   1.1.1.8  mrg     tgt->refcount--;
   1936   1.1.1.7  mrg   else
   1937   1.1.1.7  mrg     {
   1938   1.1.1.8  mrg       gomp_unmap_tgt (tgt);
   1939   1.1.1.7  mrg       is_tgt_unmapped = true;
   1940   1.1.1.7  mrg     }
   1941   1.1.1.8  mrg 
   1942   1.1.1.8  mrg   return is_tgt_unmapped;
   1943   1.1.1.8  mrg }
   1944   1.1.1.8  mrg 
   1945   1.1.1.8  mrg static void
   1946   1.1.1.8  mrg gomp_unref_tgt_void (void *ptr)
   1947   1.1.1.8  mrg {
   1948   1.1.1.8  mrg   (void) gomp_unref_tgt (ptr);
   1949   1.1.1.8  mrg }
   1950   1.1.1.8  mrg 
   1951  1.1.1.10  mrg static void
   1952  1.1.1.10  mrg gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
   1953   1.1.1.8  mrg {
   1954  1.1.1.10  mrg   splay_tree_remove (sp, k);
   1955   1.1.1.8  mrg   if (k->aux)
   1956   1.1.1.8  mrg     {
   1957   1.1.1.8  mrg       if (k->aux->link_key)
   1958  1.1.1.10  mrg 	splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
   1959   1.1.1.8  mrg       if (k->aux->attach_count)
   1960   1.1.1.8  mrg 	free (k->aux->attach_count);
   1961   1.1.1.8  mrg       free (k->aux);
   1962   1.1.1.8  mrg       k->aux = NULL;
   1963   1.1.1.8  mrg     }
   1964  1.1.1.10  mrg }
   1965  1.1.1.10  mrg 
   1966  1.1.1.10  mrg static inline __attribute__((always_inline)) bool
   1967  1.1.1.10  mrg gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
   1968  1.1.1.10  mrg 			  struct goacc_asyncqueue *aq)
   1969  1.1.1.10  mrg {
   1970  1.1.1.10  mrg   bool is_tgt_unmapped = false;
   1971  1.1.1.10  mrg 
   1972  1.1.1.10  mrg   if (REFCOUNT_STRUCTELEM_P (k->refcount))
   1973  1.1.1.10  mrg     {
   1974  1.1.1.10  mrg       if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
   1975  1.1.1.10  mrg 	/* Infer the splay_tree_key of the first structelem key using the
   1976  1.1.1.10  mrg 	   pointer to the first structleme_refcount.  */
   1977  1.1.1.10  mrg 	k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
   1978  1.1.1.10  mrg 			      - offsetof (struct splay_tree_key_s,
   1979  1.1.1.10  mrg 					  structelem_refcount));
   1980  1.1.1.10  mrg       assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
   1981  1.1.1.10  mrg 
   1982  1.1.1.10  mrg       /* The array created by gomp_map_vars is an array of splay_tree_nodes,
   1983  1.1.1.10  mrg 	 with the splay_tree_keys embedded inside.  */
   1984  1.1.1.10  mrg       splay_tree_node node =
   1985  1.1.1.10  mrg 	(splay_tree_node) ((char *) k
   1986  1.1.1.10  mrg 			   - offsetof (struct splay_tree_node_s, key));
   1987  1.1.1.10  mrg       while (true)
   1988  1.1.1.10  mrg 	{
   1989  1.1.1.10  mrg 	  /* Starting from the _FIRST key, and continue for all following
   1990  1.1.1.10  mrg 	     sibling keys.  */
   1991  1.1.1.10  mrg 	  gomp_remove_splay_tree_key (&devicep->mem_map, k);
   1992  1.1.1.10  mrg 	  if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
   1993  1.1.1.10  mrg 	    break;
   1994  1.1.1.10  mrg 	  else
   1995  1.1.1.10  mrg 	    k = &(++node)->key;
   1996  1.1.1.10  mrg 	}
   1997  1.1.1.10  mrg     }
   1998  1.1.1.10  mrg   else
   1999  1.1.1.10  mrg     gomp_remove_splay_tree_key (&devicep->mem_map, k);
   2000  1.1.1.10  mrg 
   2001   1.1.1.8  mrg   if (aq)
   2002   1.1.1.8  mrg     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
   2003   1.1.1.8  mrg 						(void *) k->tgt);
   2004   1.1.1.8  mrg   else
   2005   1.1.1.8  mrg     is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
   2006   1.1.1.7  mrg   return is_tgt_unmapped;
   2007   1.1.1.7  mrg }
   2008   1.1.1.7  mrg 
   2009   1.1.1.8  mrg attribute_hidden bool
   2010   1.1.1.8  mrg gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
   2011   1.1.1.8  mrg {
   2012   1.1.1.8  mrg   return gomp_remove_var_internal (devicep, k, NULL);
   2013   1.1.1.8  mrg }
   2014   1.1.1.8  mrg 
   2015   1.1.1.8  mrg /* Remove a variable asynchronously.  This actually removes the variable
   2016   1.1.1.8  mrg    mapping immediately, but retains the linked target_mem_desc until the
   2017   1.1.1.8  mrg    asynchronous operation has completed (as it may still refer to target
   2018   1.1.1.8  mrg    memory).  The device lock must be held before entry, and remains locked on
   2019   1.1.1.8  mrg    exit.  */
   2020   1.1.1.8  mrg 
   2021   1.1.1.8  mrg attribute_hidden void
   2022   1.1.1.8  mrg gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
   2023   1.1.1.8  mrg 		       struct goacc_asyncqueue *aq)
   2024   1.1.1.8  mrg {
   2025   1.1.1.8  mrg   (void) gomp_remove_var_internal (devicep, k, aq);
   2026   1.1.1.8  mrg }
   2027   1.1.1.8  mrg 
   2028       1.1  mrg /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
   2029       1.1  mrg    variables back from device to host: if it is false, it is assumed that this
   2030   1.1.1.3  mrg    has been done already.  */
   2031       1.1  mrg 
   2032   1.1.1.8  mrg static inline __attribute__((always_inline)) void
   2033   1.1.1.8  mrg gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
   2034  1.1.1.10  mrg 			  htab_t *refcount_set, struct goacc_asyncqueue *aq)
   2035       1.1  mrg {
   2036       1.1  mrg   struct gomp_device_descr *devicep = tgt->device_descr;
   2037       1.1  mrg 
   2038       1.1  mrg   if (tgt->list_count == 0)
   2039       1.1  mrg     {
   2040       1.1  mrg       free (tgt);
   2041       1.1  mrg       return;
   2042       1.1  mrg     }
   2043       1.1  mrg 
   2044       1.1  mrg   gomp_mutex_lock (&devicep->lock);
   2045   1.1.1.2  mrg   if (devicep->state == GOMP_DEVICE_FINALIZED)
   2046   1.1.1.2  mrg     {
   2047   1.1.1.2  mrg       gomp_mutex_unlock (&devicep->lock);
   2048   1.1.1.2  mrg       free (tgt->array);
   2049   1.1.1.2  mrg       free (tgt);
   2050   1.1.1.2  mrg       return;
   2051   1.1.1.2  mrg     }
   2052       1.1  mrg 
   2053       1.1  mrg   size_t i;
   2054   1.1.1.8  mrg 
   2055   1.1.1.8  mrg   /* We must perform detachments before any copies back to the host.  */
   2056   1.1.1.8  mrg   for (i = 0; i < tgt->list_count; i++)
   2057   1.1.1.8  mrg     {
   2058   1.1.1.8  mrg       splay_tree_key k = tgt->list[i].key;
   2059   1.1.1.8  mrg 
   2060   1.1.1.8  mrg       if (k != NULL && tgt->list[i].is_attach)
   2061   1.1.1.8  mrg 	gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
   2062   1.1.1.8  mrg 					     + tgt->list[i].offset,
   2063   1.1.1.8  mrg 			     false, NULL);
   2064   1.1.1.8  mrg     }
   2065   1.1.1.8  mrg 
   2066       1.1  mrg   for (i = 0; i < tgt->list_count; i++)
   2067   1.1.1.2  mrg     {
   2068   1.1.1.2  mrg       splay_tree_key k = tgt->list[i].key;
   2069   1.1.1.2  mrg       if (k == NULL)
   2070   1.1.1.2  mrg 	continue;
   2071   1.1.1.2  mrg 
   2072   1.1.1.8  mrg       /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
   2073   1.1.1.8  mrg 	 counts ('n->refcount', 'n->dynamic_refcount').  */
   2074   1.1.1.8  mrg       if (tgt->list[i].is_attach)
   2075   1.1.1.8  mrg 	continue;
   2076   1.1.1.8  mrg 
   2077  1.1.1.10  mrg       bool do_copy, do_remove;
   2078  1.1.1.10  mrg       gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
   2079   1.1.1.2  mrg 
   2080  1.1.1.10  mrg       if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
   2081   1.1.1.2  mrg 	  || tgt->list[i].always_copy_from)
   2082   1.1.1.8  mrg 	gomp_copy_dev2host (devicep, aq,
   2083   1.1.1.3  mrg 			    (void *) (k->host_start + tgt->list[i].offset),
   2084   1.1.1.3  mrg 			    (void *) (k->tgt->tgt_start + k->tgt_offset
   2085   1.1.1.3  mrg 				      + tgt->list[i].offset),
   2086   1.1.1.3  mrg 			    tgt->list[i].length);
   2087  1.1.1.10  mrg       if (do_remove)
   2088   1.1.1.8  mrg 	{
   2089   1.1.1.8  mrg 	  struct target_mem_desc *k_tgt = k->tgt;
   2090   1.1.1.8  mrg 	  bool is_tgt_unmapped = gomp_remove_var (devicep, k);
   2091   1.1.1.8  mrg 	  /* It would be bad if TGT got unmapped while we're still iterating
   2092   1.1.1.8  mrg 	     over its LIST_COUNT, and also expect to use it in the following
   2093   1.1.1.8  mrg 	     code.  */
   2094   1.1.1.8  mrg 	  assert (!is_tgt_unmapped
   2095   1.1.1.8  mrg 		  || k_tgt != tgt);
   2096   1.1.1.8  mrg 	}
   2097   1.1.1.2  mrg     }
   2098       1.1  mrg 
   2099   1.1.1.8  mrg   if (aq)
   2100   1.1.1.8  mrg     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
   2101   1.1.1.8  mrg 						(void *) tgt);
   2102       1.1  mrg   else
   2103   1.1.1.8  mrg     gomp_unref_tgt ((void *) tgt);
   2104       1.1  mrg 
   2105       1.1  mrg   gomp_mutex_unlock (&devicep->lock);
   2106       1.1  mrg }
   2107       1.1  mrg 
   2108  1.1.1.10  mrg static void
   2109  1.1.1.10  mrg gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
   2110  1.1.1.10  mrg 		 htab_t *refcount_set)
   2111   1.1.1.8  mrg {
   2112  1.1.1.10  mrg   /* This management of a local refcount_set is for convenience of callers
   2113  1.1.1.10  mrg      who do not share a refcount_set over multiple map/unmap uses.  */
   2114  1.1.1.10  mrg   htab_t local_refcount_set = NULL;
   2115  1.1.1.10  mrg   if (refcount_set == NULL)
   2116  1.1.1.10  mrg     {
   2117  1.1.1.10  mrg       local_refcount_set = htab_create (tgt->list_count);
   2118  1.1.1.10  mrg       refcount_set = &local_refcount_set;
   2119  1.1.1.10  mrg     }
   2120  1.1.1.10  mrg 
   2121  1.1.1.10  mrg   gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
   2122  1.1.1.10  mrg 
   2123  1.1.1.10  mrg   if (local_refcount_set)
   2124  1.1.1.10  mrg     htab_free (local_refcount_set);
   2125   1.1.1.8  mrg }
   2126   1.1.1.8  mrg 
   2127   1.1.1.8  mrg attribute_hidden void
   2128  1.1.1.10  mrg goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
   2129  1.1.1.10  mrg 		  struct goacc_asyncqueue *aq)
   2130   1.1.1.8  mrg {
   2131  1.1.1.10  mrg   gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
   2132   1.1.1.8  mrg }
   2133   1.1.1.8  mrg 
   2134       1.1  mrg static void
   2135       1.1  mrg gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
   2136   1.1.1.2  mrg 	     size_t *sizes, void *kinds, bool short_mapkind)
   2137       1.1  mrg {
   2138       1.1  mrg   size_t i;
   2139       1.1  mrg   struct splay_tree_key_s cur_node;
   2140   1.1.1.2  mrg   const int typemask = short_mapkind ? 0xff : 0x7;
   2141       1.1  mrg 
   2142       1.1  mrg   if (!devicep)
   2143       1.1  mrg     return;
   2144       1.1  mrg 
   2145       1.1  mrg   if (mapnum == 0)
   2146       1.1  mrg     return;
   2147       1.1  mrg 
   2148       1.1  mrg   gomp_mutex_lock (&devicep->lock);
   2149   1.1.1.2  mrg   if (devicep->state == GOMP_DEVICE_FINALIZED)
   2150   1.1.1.2  mrg     {
   2151   1.1.1.2  mrg       gomp_mutex_unlock (&devicep->lock);
   2152   1.1.1.2  mrg       return;
   2153   1.1.1.2  mrg     }
   2154   1.1.1.2  mrg 
   2155       1.1  mrg   for (i = 0; i < mapnum; i++)
   2156       1.1  mrg     if (sizes[i])
   2157       1.1  mrg       {
   2158       1.1  mrg 	cur_node.host_start = (uintptr_t) hostaddrs[i];
   2159       1.1  mrg 	cur_node.host_end = cur_node.host_start + sizes[i];
   2160       1.1  mrg 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
   2161       1.1  mrg 	if (n)
   2162       1.1  mrg 	  {
   2163   1.1.1.2  mrg 	    int kind = get_kind (short_mapkind, kinds, i);
   2164       1.1  mrg 	    if (n->host_start > cur_node.host_start
   2165       1.1  mrg 		|| n->host_end < cur_node.host_end)
   2166       1.1  mrg 	      {
   2167       1.1  mrg 		gomp_mutex_unlock (&devicep->lock);
   2168       1.1  mrg 		gomp_fatal ("Trying to update [%p..%p) object when "
   2169       1.1  mrg 			    "only [%p..%p) is mapped",
   2170       1.1  mrg 			    (void *) cur_node.host_start,
   2171       1.1  mrg 			    (void *) cur_node.host_end,
   2172       1.1  mrg 			    (void *) n->host_start,
   2173       1.1  mrg 			    (void *) n->host_end);
   2174       1.1  mrg 	      }
   2175   1.1.1.3  mrg 
   2176  1.1.1.10  mrg 	    if (n->aux && n->aux->attach_count)
   2177  1.1.1.10  mrg 	      {
   2178  1.1.1.10  mrg 		uintptr_t addr = cur_node.host_start;
   2179  1.1.1.10  mrg 		while (addr < cur_node.host_end)
   2180  1.1.1.10  mrg 		  {
   2181  1.1.1.10  mrg 		    /* We have to be careful not to overwrite still attached
   2182  1.1.1.10  mrg 		       pointers during host<->device updates.  */
   2183  1.1.1.10  mrg 		    size_t i = (addr - cur_node.host_start) / sizeof (void *);
   2184  1.1.1.10  mrg 		    if (n->aux->attach_count[i] == 0)
   2185  1.1.1.10  mrg 		      {
   2186  1.1.1.10  mrg 			void *devaddr = (void *) (n->tgt->tgt_start
   2187  1.1.1.10  mrg 						  + n->tgt_offset
   2188  1.1.1.10  mrg 						  + addr - n->host_start);
   2189  1.1.1.10  mrg 			if (GOMP_MAP_COPY_TO_P (kind & typemask))
   2190  1.1.1.10  mrg 			  gomp_copy_host2dev (devicep, NULL,
   2191  1.1.1.10  mrg 					      devaddr, (void *) addr,
   2192  1.1.1.10  mrg 					      sizeof (void *), false, NULL);
   2193  1.1.1.10  mrg 			if (GOMP_MAP_COPY_FROM_P (kind & typemask))
   2194  1.1.1.10  mrg 			  gomp_copy_dev2host (devicep, NULL,
   2195  1.1.1.10  mrg 					      (void *) addr, devaddr,
   2196  1.1.1.10  mrg 					      sizeof (void *));
   2197  1.1.1.10  mrg 		      }
   2198  1.1.1.10  mrg 		    addr += sizeof (void *);
   2199  1.1.1.10  mrg 		  }
   2200  1.1.1.10  mrg 	      }
   2201  1.1.1.10  mrg 	    else
   2202  1.1.1.10  mrg 	      {
   2203  1.1.1.10  mrg 		void *hostaddr = (void *) cur_node.host_start;
   2204  1.1.1.10  mrg 		void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
   2205  1.1.1.10  mrg 					  + cur_node.host_start
   2206  1.1.1.10  mrg 					  - n->host_start);
   2207  1.1.1.10  mrg 		size_t size = cur_node.host_end - cur_node.host_start;
   2208   1.1.1.3  mrg 
   2209  1.1.1.10  mrg 		if (GOMP_MAP_COPY_TO_P (kind & typemask))
   2210  1.1.1.10  mrg 		  gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
   2211  1.1.1.10  mrg 				      false, NULL);
   2212  1.1.1.10  mrg 		if (GOMP_MAP_COPY_FROM_P (kind & typemask))
   2213  1.1.1.10  mrg 		  gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
   2214  1.1.1.10  mrg 	      }
   2215       1.1  mrg 	  }
   2216  1.1.1.11  mrg 	else
   2217  1.1.1.11  mrg 	  {
   2218  1.1.1.11  mrg 	    int kind = get_kind (short_mapkind, kinds, i);
   2219  1.1.1.11  mrg 
   2220  1.1.1.11  mrg 	    if (GOMP_MAP_PRESENT_P (kind))
   2221  1.1.1.11  mrg 	      {
   2222  1.1.1.11  mrg 		/* We already looked up the memory region above and it
   2223  1.1.1.11  mrg 		   was missing.  */
   2224  1.1.1.11  mrg 		gomp_mutex_unlock (&devicep->lock);
   2225  1.1.1.11  mrg #ifdef HAVE_INTTYPES_H
   2226  1.1.1.11  mrg 		gomp_fatal ("present clause: not present on the device "
   2227  1.1.1.11  mrg 			    "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
   2228  1.1.1.11  mrg 			    "dev: %d)", (void *) hostaddrs[i],
   2229  1.1.1.11  mrg 			    (uint64_t) sizes[i], (uint64_t) sizes[i],
   2230  1.1.1.11  mrg 			    devicep->target_id);
   2231  1.1.1.11  mrg #else
   2232  1.1.1.11  mrg 		gomp_fatal ("present clause: not present on the device "
   2233  1.1.1.11  mrg 			    "(addr: %p, size: %lu (0x%lx), dev: %d)",
   2234  1.1.1.11  mrg 			    (void *) hostaddrs[i], (unsigned long) sizes[i],
   2235  1.1.1.11  mrg 			    (unsigned long) sizes[i], devicep->target_id);
   2236  1.1.1.11  mrg #endif
   2237  1.1.1.11  mrg 	      }
   2238  1.1.1.11  mrg 	  }
   2239       1.1  mrg       }
   2240       1.1  mrg   gomp_mutex_unlock (&devicep->lock);
   2241       1.1  mrg }
   2242       1.1  mrg 
   2243  1.1.1.11  mrg static struct gomp_offload_icv_list *
   2244  1.1.1.11  mrg gomp_get_offload_icv_item (int dev_num)
   2245  1.1.1.11  mrg {
   2246  1.1.1.11  mrg   struct gomp_offload_icv_list *l = gomp_offload_icv_list;
   2247  1.1.1.11  mrg   while (l != NULL && l->device_num != dev_num)
   2248  1.1.1.11  mrg     l = l->next;
   2249  1.1.1.11  mrg 
   2250  1.1.1.11  mrg   return l;
   2251  1.1.1.11  mrg }
   2252  1.1.1.11  mrg 
   2253  1.1.1.11  mrg /* Helper function for 'gomp_load_image_to_device'.  Returns the ICV values
   2254  1.1.1.11  mrg    depending on the device num and the variable hierarchy
   2255  1.1.1.11  mrg    (_DEV_42, _DEV, _ALL).  If no ICV was initially configured for the given
   2256  1.1.1.11  mrg    device and thus no item with that device number is contained in
   2257  1.1.1.11  mrg    gomp_offload_icv_list, then a new item is created and added to the list.  */
   2258  1.1.1.11  mrg 
   2259  1.1.1.11  mrg static struct gomp_offload_icvs *
   2260  1.1.1.11  mrg get_gomp_offload_icvs (int dev_num)
   2261  1.1.1.11  mrg {
   2262  1.1.1.11  mrg   struct gomp_icv_list *dev
   2263  1.1.1.11  mrg     = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
   2264  1.1.1.11  mrg   struct gomp_icv_list *all
   2265  1.1.1.11  mrg     = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
   2266  1.1.1.11  mrg   struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
   2267  1.1.1.11  mrg   struct gomp_offload_icv_list *offload_icvs
   2268  1.1.1.11  mrg     = gomp_get_offload_icv_item (dev_num);
   2269  1.1.1.11  mrg 
   2270  1.1.1.11  mrg   if (offload_icvs != NULL)
   2271  1.1.1.11  mrg     return &offload_icvs->icvs;
   2272  1.1.1.11  mrg 
   2273  1.1.1.11  mrg   struct gomp_offload_icv_list *new
   2274  1.1.1.11  mrg     = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
   2275  1.1.1.11  mrg 
   2276  1.1.1.11  mrg   new->device_num = dev_num;
   2277  1.1.1.11  mrg   new->icvs.device_num = dev_num;
   2278  1.1.1.11  mrg   new->next = gomp_offload_icv_list;
   2279  1.1.1.11  mrg 
   2280  1.1.1.11  mrg   if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
   2281  1.1.1.11  mrg     new->icvs.nteams = dev_x->icvs.nteams_var;
   2282  1.1.1.11  mrg   else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
   2283  1.1.1.11  mrg     new->icvs.nteams = dev->icvs.nteams_var;
   2284  1.1.1.11  mrg   else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
   2285  1.1.1.11  mrg     new->icvs.nteams = all->icvs.nteams_var;
   2286  1.1.1.11  mrg   else
   2287  1.1.1.11  mrg     new->icvs.nteams = gomp_default_icv_values.nteams_var;
   2288  1.1.1.11  mrg 
   2289  1.1.1.11  mrg   if (dev_x != NULL
   2290  1.1.1.11  mrg       && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
   2291  1.1.1.11  mrg     new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
   2292  1.1.1.11  mrg   else if (dev != NULL
   2293  1.1.1.11  mrg 	   && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
   2294  1.1.1.11  mrg     new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
   2295  1.1.1.11  mrg   else if (all != NULL
   2296  1.1.1.11  mrg 	   && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
   2297  1.1.1.11  mrg     new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
   2298  1.1.1.11  mrg   else
   2299  1.1.1.11  mrg     new->icvs.teams_thread_limit
   2300  1.1.1.11  mrg       = gomp_default_icv_values.teams_thread_limit_var;
   2301  1.1.1.11  mrg 
   2302  1.1.1.11  mrg   if (dev_x != NULL
   2303  1.1.1.11  mrg       && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
   2304  1.1.1.11  mrg     new->icvs.default_device = dev_x->icvs.default_device_var;
   2305  1.1.1.11  mrg   else if (dev != NULL
   2306  1.1.1.11  mrg 	   && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
   2307  1.1.1.11  mrg     new->icvs.default_device = dev->icvs.default_device_var;
   2308  1.1.1.11  mrg   else if (all != NULL
   2309  1.1.1.11  mrg 	   && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
   2310  1.1.1.11  mrg     new->icvs.default_device = all->icvs.default_device_var;
   2311  1.1.1.11  mrg   else
   2312  1.1.1.11  mrg     new->icvs.default_device = gomp_default_icv_values.default_device_var;
   2313  1.1.1.11  mrg 
   2314  1.1.1.11  mrg   gomp_offload_icv_list = new;
   2315  1.1.1.11  mrg   return &new->icvs;
   2316  1.1.1.11  mrg }
   2317  1.1.1.11  mrg 
   2318       1.1  mrg /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
   2319       1.1  mrg    And insert to splay tree the mapping between addresses from HOST_TABLE and
   2320   1.1.1.2  mrg    from loaded target image.  We rely in the host and device compiler
   2321   1.1.1.2  mrg    emitting variable and functions in the same order.  */
   2322       1.1  mrg 
   2323       1.1  mrg static void
   2324   1.1.1.2  mrg gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
   2325   1.1.1.2  mrg 			   const void *host_table, const void *target_data,
   2326   1.1.1.2  mrg 			   bool is_register_lock)
   2327       1.1  mrg {
   2328       1.1  mrg   void **host_func_table = ((void ***) host_table)[0];
   2329       1.1  mrg   void **host_funcs_end  = ((void ***) host_table)[1];
   2330       1.1  mrg   void **host_var_table  = ((void ***) host_table)[2];
   2331       1.1  mrg   void **host_vars_end   = ((void ***) host_table)[3];
   2332  1.1.1.11  mrg   void **host_ind_func_table = NULL;
   2333  1.1.1.11  mrg   void **host_ind_funcs_end  = NULL;
   2334       1.1  mrg 
   2335  1.1.1.11  mrg   if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
   2336  1.1.1.11  mrg     {
   2337  1.1.1.11  mrg       host_ind_func_table = ((void ***) host_table)[4];
   2338  1.1.1.11  mrg       host_ind_funcs_end  = ((void ***) host_table)[5];
   2339  1.1.1.11  mrg     }
   2340  1.1.1.11  mrg 
   2341  1.1.1.11  mrg   /* The func and ind_func tables contain only addresses, the var table
   2342  1.1.1.11  mrg      contains addresses and corresponding sizes.  */
   2343       1.1  mrg   int num_funcs = host_funcs_end - host_func_table;
   2344       1.1  mrg   int num_vars  = (host_vars_end - host_var_table) / 2;
   2345  1.1.1.11  mrg   int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
   2346  1.1.1.10  mrg 
   2347       1.1  mrg   /* Load image to device and get target addresses for the image.  */
   2348       1.1  mrg   struct addr_pair *target_table = NULL;
   2349  1.1.1.11  mrg   uint64_t *rev_target_fn_table = NULL;
   2350   1.1.1.2  mrg   int i, num_target_entries;
   2351   1.1.1.2  mrg 
   2352  1.1.1.11  mrg   /* With reverse offload, insert also target-host addresses. */
   2353  1.1.1.11  mrg   bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
   2354  1.1.1.11  mrg 
   2355   1.1.1.2  mrg   num_target_entries
   2356   1.1.1.2  mrg     = devicep->load_image_func (devicep->target_id, version,
   2357  1.1.1.11  mrg 				target_data, &target_table,
   2358  1.1.1.11  mrg 				rev_lookup ? &rev_target_fn_table : NULL,
   2359  1.1.1.11  mrg 				num_ind_funcs
   2360  1.1.1.11  mrg 				  ? (uint64_t *) host_ind_func_table : NULL);
   2361       1.1  mrg 
   2362  1.1.1.10  mrg   if (num_target_entries != num_funcs + num_vars
   2363  1.1.1.11  mrg       /* "+1" due to the additional ICV struct.  */
   2364  1.1.1.11  mrg       && num_target_entries != num_funcs + num_vars + 1)
   2365       1.1  mrg     {
   2366       1.1  mrg       gomp_mutex_unlock (&devicep->lock);
   2367       1.1  mrg       if (is_register_lock)
   2368       1.1  mrg 	gomp_mutex_unlock (&register_lock);
   2369   1.1.1.2  mrg       gomp_fatal ("Cannot map target functions or variables"
   2370   1.1.1.2  mrg 		  " (expected %u, have %u)", num_funcs + num_vars,
   2371   1.1.1.2  mrg 		  num_target_entries);
   2372       1.1  mrg     }
   2373       1.1  mrg 
   2374       1.1  mrg   /* Insert host-target address mapping into splay tree.  */
   2375       1.1  mrg   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   2376  1.1.1.11  mrg   /* "+1" due to the additional ICV struct.  */
   2377  1.1.1.11  mrg   tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
   2378  1.1.1.11  mrg 			    * sizeof (*tgt->array));
   2379  1.1.1.11  mrg   if (rev_target_fn_table)
   2380  1.1.1.11  mrg     tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
   2381  1.1.1.11  mrg   else
   2382  1.1.1.11  mrg     tgt->rev_array = NULL;
   2383   1.1.1.2  mrg   tgt->refcount = REFCOUNT_INFINITY;
   2384       1.1  mrg   tgt->tgt_start = 0;
   2385       1.1  mrg   tgt->tgt_end = 0;
   2386       1.1  mrg   tgt->to_free = NULL;
   2387       1.1  mrg   tgt->prev = NULL;
   2388       1.1  mrg   tgt->list_count = 0;
   2389       1.1  mrg   tgt->device_descr = devicep;
   2390       1.1  mrg   splay_tree_node array = tgt->array;
   2391  1.1.1.11  mrg   reverse_splay_tree_node rev_array = tgt->rev_array;
   2392       1.1  mrg 
   2393       1.1  mrg   for (i = 0; i < num_funcs; i++)
   2394       1.1  mrg     {
   2395       1.1  mrg       splay_tree_key k = &array->key;
   2396       1.1  mrg       k->host_start = (uintptr_t) host_func_table[i];
   2397       1.1  mrg       k->host_end = k->host_start + 1;
   2398       1.1  mrg       k->tgt = tgt;
   2399       1.1  mrg       k->tgt_offset = target_table[i].start;
   2400   1.1.1.2  mrg       k->refcount = REFCOUNT_INFINITY;
   2401   1.1.1.7  mrg       k->dynamic_refcount = 0;
   2402   1.1.1.8  mrg       k->aux = NULL;
   2403       1.1  mrg       array->left = NULL;
   2404       1.1  mrg       array->right = NULL;
   2405       1.1  mrg       splay_tree_insert (&devicep->mem_map, array);
   2406  1.1.1.11  mrg       if (rev_target_fn_table)
   2407  1.1.1.11  mrg 	{
   2408  1.1.1.11  mrg 	  reverse_splay_tree_key k2 = &rev_array->key;
   2409  1.1.1.11  mrg 	  k2->dev = rev_target_fn_table[i];
   2410  1.1.1.11  mrg 	  k2->k = k;
   2411  1.1.1.11  mrg 	  rev_array->left = NULL;
   2412  1.1.1.11  mrg 	  rev_array->right = NULL;
   2413  1.1.1.11  mrg 	  if (k2->dev != 0)
   2414  1.1.1.11  mrg 	    reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
   2415  1.1.1.11  mrg 	  rev_array++;
   2416  1.1.1.11  mrg 	}
   2417       1.1  mrg       array++;
   2418       1.1  mrg     }
   2419       1.1  mrg 
   2420   1.1.1.2  mrg   /* Most significant bit of the size in host and target tables marks
   2421   1.1.1.2  mrg      "omp declare target link" variables.  */
   2422   1.1.1.2  mrg   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
   2423   1.1.1.2  mrg   const uintptr_t size_mask = ~link_bit;
   2424   1.1.1.2  mrg 
   2425       1.1  mrg   for (i = 0; i < num_vars; i++)
   2426       1.1  mrg     {
   2427       1.1  mrg       struct addr_pair *target_var = &target_table[num_funcs + i];
   2428   1.1.1.2  mrg       uintptr_t target_size = target_var->end - target_var->start;
   2429   1.1.1.8  mrg       bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
   2430   1.1.1.2  mrg 
   2431   1.1.1.8  mrg       if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
   2432       1.1  mrg 	{
   2433       1.1  mrg 	  gomp_mutex_unlock (&devicep->lock);
   2434       1.1  mrg 	  if (is_register_lock)
   2435       1.1  mrg 	    gomp_mutex_unlock (&register_lock);
   2436   1.1.1.2  mrg 	  gomp_fatal ("Cannot map target variables (size mismatch)");
   2437       1.1  mrg 	}
   2438       1.1  mrg 
   2439       1.1  mrg       splay_tree_key k = &array->key;
   2440       1.1  mrg       k->host_start = (uintptr_t) host_var_table[i * 2];
   2441   1.1.1.2  mrg       k->host_end
   2442   1.1.1.2  mrg 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
   2443       1.1  mrg       k->tgt = tgt;
   2444       1.1  mrg       k->tgt_offset = target_var->start;
   2445   1.1.1.8  mrg       k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
   2446   1.1.1.7  mrg       k->dynamic_refcount = 0;
   2447   1.1.1.8  mrg       k->aux = NULL;
   2448       1.1  mrg       array->left = NULL;
   2449       1.1  mrg       array->right = NULL;
   2450       1.1  mrg       splay_tree_insert (&devicep->mem_map, array);
   2451       1.1  mrg       array++;
   2452       1.1  mrg     }
   2453       1.1  mrg 
   2454  1.1.1.11  mrg   /* Last entry is for a ICVs variable.
   2455  1.1.1.11  mrg      Tolerate case where plugin does not return those entries.  */
   2456  1.1.1.10  mrg   if (num_funcs + num_vars < num_target_entries)
   2457  1.1.1.10  mrg     {
   2458  1.1.1.11  mrg       struct addr_pair *var = &target_table[num_funcs + num_vars];
   2459  1.1.1.11  mrg 
   2460  1.1.1.11  mrg       /* Start address will be non-zero for the ICVs variable if
   2461  1.1.1.11  mrg 	 the variable was found in this image.  */
   2462  1.1.1.11  mrg       if (var->start != 0)
   2463  1.1.1.10  mrg 	{
   2464  1.1.1.10  mrg 	  /* The index of the devicep within devices[] is regarded as its
   2465  1.1.1.10  mrg 	     'device number', which is different from the per-device type
   2466  1.1.1.10  mrg 	     devicep->target_id.  */
   2467  1.1.1.11  mrg 	  int dev_num = (int) (devicep - &devices[0]);
   2468  1.1.1.11  mrg 	  struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
   2469  1.1.1.11  mrg 	  size_t var_size = var->end - var->start;
   2470  1.1.1.11  mrg 	  if (var_size != sizeof (struct gomp_offload_icvs))
   2471  1.1.1.10  mrg 	    {
   2472  1.1.1.10  mrg 	      gomp_mutex_unlock (&devicep->lock);
   2473  1.1.1.10  mrg 	      if (is_register_lock)
   2474  1.1.1.10  mrg 		gomp_mutex_unlock (&register_lock);
   2475  1.1.1.11  mrg 	      gomp_fatal ("offload plugin managed 'icv struct' not of expected "
   2476  1.1.1.10  mrg 			  "format");
   2477  1.1.1.10  mrg 	    }
   2478  1.1.1.11  mrg 	  /* Copy the ICVs variable to place on device memory, hereby
   2479  1.1.1.11  mrg 	     actually designating its device number into effect.  */
   2480  1.1.1.11  mrg 	  gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
   2481  1.1.1.11  mrg 			      var_size, false, NULL);
   2482  1.1.1.11  mrg 	  splay_tree_key k = &array->key;
   2483  1.1.1.11  mrg 	  k->host_start = (uintptr_t) icvs;
   2484  1.1.1.11  mrg 	  k->host_end =
   2485  1.1.1.11  mrg 	    k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
   2486  1.1.1.11  mrg 	  k->tgt = tgt;
   2487  1.1.1.11  mrg 	  k->tgt_offset = var->start;
   2488  1.1.1.11  mrg 	  k->refcount = REFCOUNT_INFINITY;
   2489  1.1.1.11  mrg 	  k->dynamic_refcount = 0;
   2490  1.1.1.11  mrg 	  k->aux = NULL;
   2491  1.1.1.11  mrg 	  array->left = NULL;
   2492  1.1.1.11  mrg 	  array->right = NULL;
   2493  1.1.1.11  mrg 	  splay_tree_insert (&devicep->mem_map, array);
   2494  1.1.1.11  mrg 	  array++;
   2495  1.1.1.10  mrg 	}
   2496  1.1.1.10  mrg     }
   2497  1.1.1.10  mrg 
   2498       1.1  mrg   free (target_table);
   2499       1.1  mrg }
   2500       1.1  mrg 
   2501   1.1.1.2  mrg /* Unload the mappings described by target_data from device DEVICE_P.
   2502   1.1.1.2  mrg    The device must be locked.   */
   2503   1.1.1.2  mrg 
   2504   1.1.1.2  mrg static void
   2505   1.1.1.2  mrg gomp_unload_image_from_device (struct gomp_device_descr *devicep,
   2506   1.1.1.2  mrg 			       unsigned version,
   2507   1.1.1.2  mrg 			       const void *host_table, const void *target_data)
   2508   1.1.1.2  mrg {
   2509   1.1.1.2  mrg   void **host_func_table = ((void ***) host_table)[0];
   2510   1.1.1.2  mrg   void **host_funcs_end  = ((void ***) host_table)[1];
   2511   1.1.1.2  mrg   void **host_var_table  = ((void ***) host_table)[2];
   2512   1.1.1.2  mrg   void **host_vars_end   = ((void ***) host_table)[3];
   2513   1.1.1.2  mrg 
   2514   1.1.1.2  mrg   /* The func table contains only addresses, the var table contains addresses
   2515   1.1.1.2  mrg      and corresponding sizes.  */
   2516   1.1.1.2  mrg   int num_funcs = host_funcs_end - host_func_table;
   2517   1.1.1.2  mrg   int num_vars  = (host_vars_end - host_var_table) / 2;
   2518   1.1.1.2  mrg 
   2519   1.1.1.2  mrg   struct splay_tree_key_s k;
   2520   1.1.1.2  mrg   splay_tree_key node = NULL;
   2521   1.1.1.2  mrg 
   2522   1.1.1.2  mrg   /* Find mapping at start of node array */
   2523   1.1.1.2  mrg   if (num_funcs || num_vars)
   2524   1.1.1.2  mrg     {
   2525   1.1.1.2  mrg       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
   2526   1.1.1.2  mrg 		      : (uintptr_t) host_var_table[0]);
   2527   1.1.1.2  mrg       k.host_end = k.host_start + 1;
   2528   1.1.1.2  mrg       node = splay_tree_lookup (&devicep->mem_map, &k);
   2529   1.1.1.2  mrg     }
   2530   1.1.1.2  mrg 
   2531   1.1.1.3  mrg   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
   2532   1.1.1.3  mrg     {
   2533   1.1.1.3  mrg       gomp_mutex_unlock (&devicep->lock);
   2534   1.1.1.3  mrg       gomp_fatal ("image unload fail");
   2535   1.1.1.3  mrg     }
   2536  1.1.1.11  mrg   if (devicep->mem_map_rev.root)
   2537  1.1.1.11  mrg     {
   2538  1.1.1.11  mrg       /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
   2539  1.1.1.11  mrg 	 real allocation.  */
   2540  1.1.1.11  mrg       assert (node && node->tgt && node->tgt->rev_array);
   2541  1.1.1.11  mrg       assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
   2542  1.1.1.11  mrg       free (node->tgt->rev_array);
   2543  1.1.1.11  mrg       devicep->mem_map_rev.root = NULL;
   2544  1.1.1.11  mrg     }
   2545   1.1.1.2  mrg 
   2546   1.1.1.2  mrg   /* Remove mappings from splay tree.  */
   2547   1.1.1.2  mrg   int i;
   2548   1.1.1.2  mrg   for (i = 0; i < num_funcs; i++)
   2549   1.1.1.2  mrg     {
   2550   1.1.1.2  mrg       k.host_start = (uintptr_t) host_func_table[i];
   2551   1.1.1.2  mrg       k.host_end = k.host_start + 1;
   2552   1.1.1.2  mrg       splay_tree_remove (&devicep->mem_map, &k);
   2553   1.1.1.2  mrg     }
   2554   1.1.1.2  mrg 
   2555   1.1.1.2  mrg   /* Most significant bit of the size in host and target tables marks
   2556   1.1.1.2  mrg      "omp declare target link" variables.  */
   2557   1.1.1.2  mrg   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
   2558   1.1.1.2  mrg   const uintptr_t size_mask = ~link_bit;
   2559   1.1.1.2  mrg   bool is_tgt_unmapped = false;
   2560   1.1.1.2  mrg 
   2561   1.1.1.2  mrg   for (i = 0; i < num_vars; i++)
   2562   1.1.1.2  mrg     {
   2563   1.1.1.2  mrg       k.host_start = (uintptr_t) host_var_table[i * 2];
   2564   1.1.1.2  mrg       k.host_end
   2565   1.1.1.2  mrg 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
   2566   1.1.1.2  mrg 
   2567   1.1.1.2  mrg       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
   2568   1.1.1.2  mrg 	splay_tree_remove (&devicep->mem_map, &k);
   2569   1.1.1.2  mrg       else
   2570   1.1.1.2  mrg 	{
   2571   1.1.1.2  mrg 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
   2572   1.1.1.7  mrg 	  is_tgt_unmapped = gomp_remove_var (devicep, n);
   2573   1.1.1.2  mrg 	}
   2574   1.1.1.2  mrg     }
   2575   1.1.1.2  mrg 
   2576   1.1.1.2  mrg   if (node && !is_tgt_unmapped)
   2577   1.1.1.2  mrg     {
   2578   1.1.1.2  mrg       free (node->tgt);
   2579   1.1.1.2  mrg       free (node);
   2580   1.1.1.2  mrg     }
   2581   1.1.1.2  mrg }
   2582   1.1.1.2  mrg 
   2583  1.1.1.11  mrg static void
   2584  1.1.1.11  mrg gomp_requires_to_name (char *buf, size_t size, int requires_mask)
   2585  1.1.1.11  mrg {
   2586  1.1.1.11  mrg   char *end = buf + size, *p = buf;
   2587  1.1.1.11  mrg   if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
   2588  1.1.1.11  mrg     p += snprintf (p, end - p, "unified_address");
   2589  1.1.1.11  mrg   if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
   2590  1.1.1.11  mrg     p += snprintf (p, end - p, "%sunified_shared_memory",
   2591  1.1.1.11  mrg 		   (p == buf ? "" : ", "));
   2592  1.1.1.11  mrg   if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
   2593  1.1.1.11  mrg     p += snprintf (p, end - p, "%sreverse_offload",
   2594  1.1.1.11  mrg 		   (p == buf ? "" : ", "));
   2595  1.1.1.11  mrg }
   2596  1.1.1.11  mrg 
   2597       1.1  mrg /* This function should be called from every offload image while loading.
   2598       1.1  mrg    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
   2599  1.1.1.11  mrg    the target, and DATA.  */
   2600       1.1  mrg 
   2601       1.1  mrg void
   2602   1.1.1.2  mrg GOMP_offload_register_ver (unsigned version, const void *host_table,
   2603  1.1.1.11  mrg 			   int target_type, const void *data)
   2604       1.1  mrg {
   2605       1.1  mrg   int i;
   2606   1.1.1.2  mrg 
   2607   1.1.1.2  mrg   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
   2608   1.1.1.2  mrg     gomp_fatal ("Library too old for offload (version %u < %u)",
   2609   1.1.1.2  mrg 		GOMP_VERSION, GOMP_VERSION_LIB (version));
   2610  1.1.1.11  mrg 
   2611  1.1.1.11  mrg   int omp_req;
   2612  1.1.1.11  mrg   const void *target_data;
   2613  1.1.1.11  mrg   if (GOMP_VERSION_LIB (version) > 1)
   2614  1.1.1.11  mrg     {
   2615  1.1.1.11  mrg       omp_req = (int) (size_t) ((void **) data)[0];
   2616  1.1.1.11  mrg       target_data = &((void **) data)[1];
   2617  1.1.1.11  mrg     }
   2618  1.1.1.11  mrg   else
   2619  1.1.1.11  mrg     {
   2620  1.1.1.11  mrg       omp_req = 0;
   2621  1.1.1.11  mrg       target_data = data;
   2622  1.1.1.11  mrg     }
   2623  1.1.1.11  mrg 
   2624       1.1  mrg   gomp_mutex_lock (&register_lock);
   2625       1.1  mrg 
   2626  1.1.1.11  mrg   if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
   2627  1.1.1.11  mrg     {
   2628  1.1.1.11  mrg       char buf1[sizeof ("unified_address, unified_shared_memory, "
   2629  1.1.1.11  mrg 			"reverse_offload")];
   2630  1.1.1.11  mrg       char buf2[sizeof ("unified_address, unified_shared_memory, "
   2631  1.1.1.11  mrg 			"reverse_offload")];
   2632  1.1.1.11  mrg       gomp_requires_to_name (buf2, sizeof (buf2),
   2633  1.1.1.11  mrg 			     omp_req != GOMP_REQUIRES_TARGET_USED
   2634  1.1.1.11  mrg 			     ? omp_req : omp_requires_mask);
   2635  1.1.1.11  mrg       if (omp_req != GOMP_REQUIRES_TARGET_USED
   2636  1.1.1.11  mrg 	  && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
   2637  1.1.1.11  mrg 	{
   2638  1.1.1.11  mrg 	  gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
   2639  1.1.1.11  mrg 	  gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
   2640  1.1.1.11  mrg 		      "in multiple compilation units: '%s' vs. '%s'",
   2641  1.1.1.11  mrg 		      buf1, buf2);
   2642  1.1.1.11  mrg 	}
   2643  1.1.1.11  mrg       else
   2644  1.1.1.11  mrg 	gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
   2645  1.1.1.11  mrg 		    "some compilation units", buf2);
   2646  1.1.1.11  mrg     }
   2647  1.1.1.11  mrg   omp_requires_mask = omp_req;
   2648  1.1.1.11  mrg 
   2649       1.1  mrg   /* Load image to all initialized devices.  */
   2650       1.1  mrg   for (i = 0; i < num_devices; i++)
   2651       1.1  mrg     {
   2652       1.1  mrg       struct gomp_device_descr *devicep = &devices[i];
   2653       1.1  mrg       gomp_mutex_lock (&devicep->lock);
   2654   1.1.1.2  mrg       if (devicep->type == target_type
   2655   1.1.1.2  mrg 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
   2656   1.1.1.2  mrg 	gomp_load_image_to_device (devicep, version,
   2657   1.1.1.2  mrg 				   host_table, target_data, true);
   2658       1.1  mrg       gomp_mutex_unlock (&devicep->lock);
   2659       1.1  mrg     }
   2660       1.1  mrg 
   2661       1.1  mrg   /* Insert image to array of pending images.  */
   2662       1.1  mrg   offload_images
   2663       1.1  mrg     = gomp_realloc_unlock (offload_images,
   2664       1.1  mrg 			   (num_offload_images + 1)
   2665       1.1  mrg 			   * sizeof (struct offload_image_descr));
   2666   1.1.1.2  mrg   offload_images[num_offload_images].version = version;
   2667       1.1  mrg   offload_images[num_offload_images].type = target_type;
   2668       1.1  mrg   offload_images[num_offload_images].host_table = host_table;
   2669       1.1  mrg   offload_images[num_offload_images].target_data = target_data;
   2670       1.1  mrg 
   2671       1.1  mrg   num_offload_images++;
   2672       1.1  mrg   gomp_mutex_unlock (&register_lock);
   2673       1.1  mrg }
   2674       1.1  mrg 
   2675  1.1.1.11  mrg /* Legacy entry point.  */
   2676  1.1.1.11  mrg 
   2677   1.1.1.2  mrg void
   2678   1.1.1.2  mrg GOMP_offload_register (const void *host_table, int target_type,
   2679   1.1.1.2  mrg 		       const void *target_data)
   2680   1.1.1.2  mrg {
   2681   1.1.1.2  mrg   GOMP_offload_register_ver (0, host_table, target_type, target_data);
   2682   1.1.1.2  mrg }
   2683   1.1.1.2  mrg 
   2684       1.1  mrg /* This function should be called from every offload image while unloading.
   2685       1.1  mrg    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
   2686  1.1.1.11  mrg    the target, and DATA.  */
   2687       1.1  mrg 
   2688       1.1  mrg void
   2689   1.1.1.2  mrg GOMP_offload_unregister_ver (unsigned version, const void *host_table,
   2690  1.1.1.11  mrg 			     int target_type, const void *data)
   2691       1.1  mrg {
   2692       1.1  mrg   int i;
   2693       1.1  mrg 
   2694  1.1.1.11  mrg   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
   2695  1.1.1.11  mrg     gomp_fatal ("Library too old for offload (version %u < %u)",
   2696  1.1.1.11  mrg 		GOMP_VERSION, GOMP_VERSION_LIB (version));
   2697  1.1.1.11  mrg 
   2698  1.1.1.11  mrg   const void *target_data;
   2699  1.1.1.11  mrg   if (GOMP_VERSION_LIB (version) > 1)
   2700  1.1.1.11  mrg     target_data = &((void **) data)[1];
   2701  1.1.1.11  mrg   else
   2702  1.1.1.11  mrg     target_data = data;
   2703  1.1.1.11  mrg 
   2704       1.1  mrg   gomp_mutex_lock (&register_lock);
   2705       1.1  mrg 
   2706       1.1  mrg   /* Unload image from all initialized devices.  */
   2707       1.1  mrg   for (i = 0; i < num_devices; i++)
   2708       1.1  mrg     {
   2709       1.1  mrg       struct gomp_device_descr *devicep = &devices[i];
   2710       1.1  mrg       gomp_mutex_lock (&devicep->lock);
   2711   1.1.1.2  mrg       if (devicep->type == target_type
   2712   1.1.1.2  mrg 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
   2713   1.1.1.2  mrg 	gomp_unload_image_from_device (devicep, version,
   2714   1.1.1.2  mrg 				       host_table, target_data);
   2715       1.1  mrg       gomp_mutex_unlock (&devicep->lock);
   2716       1.1  mrg     }
   2717       1.1  mrg 
   2718       1.1  mrg   /* Remove image from array of pending images.  */
   2719       1.1  mrg   for (i = 0; i < num_offload_images; i++)
   2720       1.1  mrg     if (offload_images[i].target_data == target_data)
   2721       1.1  mrg       {
   2722       1.1  mrg 	offload_images[i] = offload_images[--num_offload_images];
   2723       1.1  mrg 	break;
   2724       1.1  mrg       }
   2725       1.1  mrg 
   2726       1.1  mrg   gomp_mutex_unlock (&register_lock);
   2727       1.1  mrg }
   2728       1.1  mrg 
   2729  1.1.1.11  mrg /* Legacy entry point.  */
   2730  1.1.1.11  mrg 
   2731   1.1.1.2  mrg void
   2732   1.1.1.2  mrg GOMP_offload_unregister (const void *host_table, int target_type,
   2733   1.1.1.2  mrg 			 const void *target_data)
   2734   1.1.1.2  mrg {
   2735   1.1.1.2  mrg   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
   2736   1.1.1.2  mrg }
   2737   1.1.1.2  mrg 
   2738       1.1  mrg /* This function initializes the target device, specified by DEVICEP.  DEVICEP
   2739       1.1  mrg    must be locked on entry, and remains locked on return.  */
   2740       1.1  mrg 
   2741       1.1  mrg attribute_hidden void
   2742       1.1  mrg gomp_init_device (struct gomp_device_descr *devicep)
   2743       1.1  mrg {
   2744       1.1  mrg   int i;
   2745   1.1.1.3  mrg   if (!devicep->init_device_func (devicep->target_id))
   2746   1.1.1.3  mrg     {
   2747   1.1.1.3  mrg       gomp_mutex_unlock (&devicep->lock);
   2748   1.1.1.3  mrg       gomp_fatal ("device initialization failed");
   2749   1.1.1.3  mrg     }
   2750       1.1  mrg 
   2751       1.1  mrg   /* Load to device all images registered by the moment.  */
   2752       1.1  mrg   for (i = 0; i < num_offload_images; i++)
   2753       1.1  mrg     {
   2754       1.1  mrg       struct offload_image_descr *image = &offload_images[i];
   2755       1.1  mrg       if (image->type == devicep->type)
   2756   1.1.1.2  mrg 	gomp_load_image_to_device (devicep, image->version,
   2757   1.1.1.2  mrg 				   image->host_table, image->target_data,
   2758   1.1.1.2  mrg 				   false);
   2759       1.1  mrg     }
   2760       1.1  mrg 
   2761   1.1.1.8  mrg   /* Initialize OpenACC asynchronous queues.  */
   2762   1.1.1.8  mrg   goacc_init_asyncqueues (devicep);
   2763   1.1.1.8  mrg 
   2764   1.1.1.2  mrg   devicep->state = GOMP_DEVICE_INITIALIZED;
   2765   1.1.1.2  mrg }
   2766   1.1.1.2  mrg 
   2767   1.1.1.8  mrg /* This function finalizes the target device, specified by DEVICEP.  DEVICEP
   2768   1.1.1.8  mrg    must be locked on entry, and remains locked on return.  */
   2769   1.1.1.8  mrg 
   2770   1.1.1.8  mrg attribute_hidden bool
   2771   1.1.1.8  mrg gomp_fini_device (struct gomp_device_descr *devicep)
   2772   1.1.1.8  mrg {
   2773   1.1.1.8  mrg   bool ret = goacc_fini_asyncqueues (devicep);
   2774   1.1.1.8  mrg   ret &= devicep->fini_device_func (devicep->target_id);
   2775   1.1.1.8  mrg   devicep->state = GOMP_DEVICE_FINALIZED;
   2776   1.1.1.8  mrg   return ret;
   2777   1.1.1.8  mrg }
   2778   1.1.1.8  mrg 
   2779   1.1.1.2  mrg attribute_hidden void
   2780   1.1.1.2  mrg gomp_unload_device (struct gomp_device_descr *devicep)
   2781   1.1.1.2  mrg {
   2782   1.1.1.2  mrg   if (devicep->state == GOMP_DEVICE_INITIALIZED)
   2783   1.1.1.2  mrg     {
   2784   1.1.1.2  mrg       unsigned i;
   2785  1.1.1.11  mrg 
   2786   1.1.1.2  mrg       /* Unload from device all images registered at the moment.  */
   2787   1.1.1.2  mrg       for (i = 0; i < num_offload_images; i++)
   2788   1.1.1.2  mrg 	{
   2789   1.1.1.2  mrg 	  struct offload_image_descr *image = &offload_images[i];
   2790   1.1.1.2  mrg 	  if (image->type == devicep->type)
   2791   1.1.1.2  mrg 	    gomp_unload_image_from_device (devicep, image->version,
   2792   1.1.1.2  mrg 					   image->host_table,
   2793   1.1.1.2  mrg 					   image->target_data);
   2794   1.1.1.2  mrg 	}
   2795   1.1.1.2  mrg     }
   2796       1.1  mrg }
   2797       1.1  mrg 
   2798   1.1.1.2  mrg /* Host fallback for GOMP_target{,_ext} routines.  */
   2799       1.1  mrg 
   2800   1.1.1.2  mrg static void
   2801  1.1.1.10  mrg gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
   2802  1.1.1.10  mrg 		      struct gomp_device_descr *devicep, void **args)
   2803       1.1  mrg {
   2804   1.1.1.2  mrg   struct gomp_thread old_thr, *thr = gomp_thread ();
   2805  1.1.1.10  mrg 
   2806  1.1.1.10  mrg   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
   2807  1.1.1.10  mrg       && devicep != NULL)
   2808  1.1.1.10  mrg     gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
   2809  1.1.1.10  mrg 		"be used for offloading");
   2810  1.1.1.10  mrg 
   2811   1.1.1.2  mrg   old_thr = *thr;
   2812   1.1.1.2  mrg   memset (thr, '\0', sizeof (*thr));
   2813   1.1.1.2  mrg   if (gomp_places_list)
   2814   1.1.1.2  mrg     {
   2815   1.1.1.2  mrg       thr->place = old_thr.place;
   2816   1.1.1.2  mrg       thr->ts.place_partition_len = gomp_places_list_len;
   2817   1.1.1.2  mrg     }
   2818  1.1.1.10  mrg   if (args)
   2819  1.1.1.10  mrg     while (*args)
   2820  1.1.1.10  mrg       {
   2821  1.1.1.10  mrg 	intptr_t id = (intptr_t) *args++, val;
   2822  1.1.1.10  mrg 	if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
   2823  1.1.1.10  mrg 	  val = (intptr_t) *args++;
   2824  1.1.1.10  mrg 	else
   2825  1.1.1.10  mrg 	  val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
   2826  1.1.1.10  mrg 	if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
   2827  1.1.1.10  mrg 	  continue;
   2828  1.1.1.10  mrg 	id &= GOMP_TARGET_ARG_ID_MASK;
   2829  1.1.1.10  mrg 	if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
   2830  1.1.1.10  mrg 	  continue;
   2831  1.1.1.10  mrg 	val = val > INT_MAX ? INT_MAX : val;
   2832  1.1.1.10  mrg 	if (val)
   2833  1.1.1.10  mrg 	  gomp_icv (true)->thread_limit_var = val;
   2834  1.1.1.10  mrg 	break;
   2835  1.1.1.10  mrg       }
   2836  1.1.1.10  mrg 
   2837   1.1.1.2  mrg   fn (hostaddrs);
   2838   1.1.1.2  mrg   gomp_free_thread (thr);
   2839   1.1.1.2  mrg   *thr = old_thr;
   2840       1.1  mrg }
   2841       1.1  mrg 
   2842   1.1.1.2  mrg /* Calculate alignment and size requirements of a private copy of data shared
   2843   1.1.1.2  mrg    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
   2844   1.1.1.2  mrg 
   2845   1.1.1.2  mrg static inline void
   2846   1.1.1.2  mrg calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
   2847   1.1.1.2  mrg 				     unsigned short *kinds, size_t *tgt_align,
   2848   1.1.1.2  mrg 				     size_t *tgt_size)
   2849   1.1.1.2  mrg {
   2850   1.1.1.2  mrg   size_t i;
   2851   1.1.1.2  mrg   for (i = 0; i < mapnum; i++)
   2852   1.1.1.2  mrg     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
   2853   1.1.1.2  mrg       {
   2854   1.1.1.2  mrg 	size_t align = (size_t) 1 << (kinds[i] >> 8);
   2855   1.1.1.2  mrg 	if (*tgt_align < align)
   2856   1.1.1.2  mrg 	  *tgt_align = align;
   2857   1.1.1.2  mrg 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
   2858   1.1.1.2  mrg 	*tgt_size += sizes[i];
   2859   1.1.1.2  mrg       }
   2860   1.1.1.2  mrg }
   2861   1.1.1.2  mrg 
   2862   1.1.1.2  mrg /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
   2863   1.1.1.2  mrg 
   2864   1.1.1.2  mrg static inline void
   2865   1.1.1.2  mrg copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
   2866   1.1.1.2  mrg 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
   2867   1.1.1.2  mrg 			size_t tgt_size)
   2868   1.1.1.2  mrg {
   2869   1.1.1.2  mrg   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
   2870   1.1.1.2  mrg   if (al)
   2871   1.1.1.2  mrg     tgt += tgt_align - al;
   2872   1.1.1.2  mrg   tgt_size = 0;
   2873   1.1.1.2  mrg   size_t i;
   2874   1.1.1.2  mrg   for (i = 0; i < mapnum; i++)
   2875  1.1.1.10  mrg     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
   2876   1.1.1.2  mrg       {
   2877   1.1.1.2  mrg 	size_t align = (size_t) 1 << (kinds[i] >> 8);
   2878   1.1.1.2  mrg 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
   2879   1.1.1.2  mrg 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
   2880   1.1.1.2  mrg 	hostaddrs[i] = tgt + tgt_size;
   2881   1.1.1.2  mrg 	tgt_size = tgt_size + sizes[i];
   2882  1.1.1.11  mrg 	if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
   2883  1.1.1.11  mrg 	  {
   2884  1.1.1.11  mrg 	    *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
   2885  1.1.1.11  mrg 	    ++i;
   2886  1.1.1.11  mrg 	  }
   2887   1.1.1.2  mrg       }
   2888   1.1.1.2  mrg }
   2889   1.1.1.2  mrg 
   2890   1.1.1.2  mrg /* Helper function of GOMP_target{,_ext} routines.  */
   2891   1.1.1.2  mrg 
   2892   1.1.1.2  mrg static void *
   2893   1.1.1.2  mrg gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
   2894   1.1.1.2  mrg 			 void (*host_fn) (void *))
   2895   1.1.1.2  mrg {
   2896   1.1.1.2  mrg   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
   2897   1.1.1.2  mrg     return (void *) host_fn;
   2898   1.1.1.2  mrg   else
   2899   1.1.1.2  mrg     {
   2900   1.1.1.2  mrg       gomp_mutex_lock (&devicep->lock);
   2901   1.1.1.2  mrg       if (devicep->state == GOMP_DEVICE_FINALIZED)
   2902   1.1.1.2  mrg 	{
   2903   1.1.1.2  mrg 	  gomp_mutex_unlock (&devicep->lock);
   2904   1.1.1.2  mrg 	  return NULL;
   2905   1.1.1.2  mrg 	}
   2906   1.1.1.2  mrg 
   2907   1.1.1.2  mrg       struct splay_tree_key_s k;
   2908   1.1.1.2  mrg       k.host_start = (uintptr_t) host_fn;
   2909   1.1.1.2  mrg       k.host_end = k.host_start + 1;
   2910   1.1.1.2  mrg       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
   2911   1.1.1.2  mrg       gomp_mutex_unlock (&devicep->lock);
   2912   1.1.1.2  mrg       if (tgt_fn == NULL)
   2913   1.1.1.2  mrg 	return NULL;
   2914   1.1.1.2  mrg 
   2915   1.1.1.2  mrg       return (void *) tgt_fn->tgt_offset;
   2916   1.1.1.2  mrg     }
   2917   1.1.1.2  mrg }
   2918   1.1.1.2  mrg 
   2919   1.1.1.2  mrg /* Called when encountering a target directive.  If DEVICE
   2920       1.1  mrg    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
   2921       1.1  mrg    GOMP_DEVICE_HOST_FALLBACK (or any value
   2922       1.1  mrg    larger than last available hw device), use host fallback.
   2923       1.1  mrg    FN is address of host code, UNUSED is part of the current ABI, but
   2924       1.1  mrg    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
   2925       1.1  mrg    with MAPNUM entries, with addresses of the host objects,
   2926       1.1  mrg    sizes of the host objects (resp. for pointer kind pointer bias
   2927       1.1  mrg    and assumed sizeof (void *) size) and kinds.  */
   2928       1.1  mrg 
   2929       1.1  mrg void
   2930       1.1  mrg GOMP_target (int device, void (*fn) (void *), const void *unused,
   2931       1.1  mrg 	     size_t mapnum, void **hostaddrs, size_t *sizes,
   2932       1.1  mrg 	     unsigned char *kinds)
   2933       1.1  mrg {
   2934  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device, true);
   2935       1.1  mrg 
   2936   1.1.1.2  mrg   void *fn_addr;
   2937       1.1  mrg   if (devicep == NULL
   2938   1.1.1.2  mrg       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   2939   1.1.1.2  mrg       /* All shared memory devices should use the GOMP_target_ext function.  */
   2940   1.1.1.2  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
   2941   1.1.1.2  mrg       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
   2942  1.1.1.10  mrg     return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
   2943   1.1.1.2  mrg 
   2944  1.1.1.10  mrg   htab_t refcount_set = htab_create (mapnum);
   2945   1.1.1.2  mrg   struct target_mem_desc *tgt_vars
   2946   1.1.1.2  mrg     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
   2947  1.1.1.10  mrg 		     &refcount_set, GOMP_MAP_VARS_TARGET);
   2948   1.1.1.2  mrg   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
   2949   1.1.1.2  mrg 		     NULL);
   2950  1.1.1.10  mrg   htab_clear (refcount_set);
   2951  1.1.1.10  mrg   gomp_unmap_vars (tgt_vars, true, &refcount_set);
   2952  1.1.1.10  mrg   htab_free (refcount_set);
   2953   1.1.1.2  mrg }
   2954   1.1.1.2  mrg 
   2955   1.1.1.8  mrg static inline unsigned int
   2956   1.1.1.8  mrg clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
   2957   1.1.1.8  mrg {
   2958   1.1.1.8  mrg   /* If we cannot run asynchronously, simply ignore nowait.  */
   2959   1.1.1.8  mrg   if (devicep != NULL && devicep->async_run_func == NULL)
   2960   1.1.1.8  mrg     flags &= ~GOMP_TARGET_FLAG_NOWAIT;
   2961   1.1.1.8  mrg 
   2962   1.1.1.8  mrg   return flags;
   2963   1.1.1.8  mrg }
   2964   1.1.1.8  mrg 
   2965  1.1.1.11  mrg static void
   2966  1.1.1.11  mrg gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
   2967  1.1.1.11  mrg {
   2968  1.1.1.11  mrg   struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
   2969  1.1.1.11  mrg   if (item == NULL)
   2970  1.1.1.11  mrg     return;
   2971  1.1.1.11  mrg 
   2972  1.1.1.11  mrg   void *host_ptr = &item->icvs;
   2973  1.1.1.11  mrg   void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
   2974  1.1.1.11  mrg   if (dev_ptr != NULL)
   2975  1.1.1.11  mrg     gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
   2976  1.1.1.11  mrg 			sizeof (struct gomp_offload_icvs));
   2977  1.1.1.11  mrg }
   2978  1.1.1.11  mrg 
   2979   1.1.1.2  mrg /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
   2980   1.1.1.2  mrg    and several arguments have been added:
   2981   1.1.1.2  mrg    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
   2982   1.1.1.2  mrg    DEPEND is array of dependencies, see GOMP_task for details.
   2983   1.1.1.2  mrg 
   2984   1.1.1.2  mrg    ARGS is a pointer to an array consisting of a variable number of both
   2985   1.1.1.2  mrg    device-independent and device-specific arguments, which can take one two
   2986   1.1.1.2  mrg    elements where the first specifies for which device it is intended, the type
   2987   1.1.1.2  mrg    and optionally also the value.  If the value is not present in the first
   2988   1.1.1.2  mrg    one, the whole second element the actual value.  The last element of the
   2989   1.1.1.2  mrg    array is a single NULL.  Among the device independent can be for example
   2990   1.1.1.2  mrg    NUM_TEAMS and THREAD_LIMIT.
   2991   1.1.1.2  mrg 
   2992   1.1.1.2  mrg    NUM_TEAMS is positive if GOMP_teams will be called in the body with
   2993   1.1.1.2  mrg    that value, or 1 if teams construct is not present, or 0, if
   2994   1.1.1.2  mrg    teams construct does not have num_teams clause and so the choice is
   2995   1.1.1.2  mrg    implementation defined, and -1 if it can't be determined on the host
   2996   1.1.1.2  mrg    what value will GOMP_teams have on the device.
   2997   1.1.1.2  mrg    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
   2998   1.1.1.2  mrg    body with that value, or 0, if teams construct does not have thread_limit
   2999   1.1.1.2  mrg    clause or the teams construct is not present, or -1 if it can't be
   3000   1.1.1.2  mrg    determined on the host what value will GOMP_teams have on the device.  */
   3001   1.1.1.2  mrg 
   3002   1.1.1.2  mrg void
   3003   1.1.1.2  mrg GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
   3004   1.1.1.2  mrg 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
   3005   1.1.1.2  mrg 		 unsigned int flags, void **depend, void **args)
   3006   1.1.1.2  mrg {
   3007  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device, true);
   3008   1.1.1.2  mrg   size_t tgt_align = 0, tgt_size = 0;
   3009   1.1.1.2  mrg   bool fpc_done = false;
   3010   1.1.1.2  mrg 
   3011  1.1.1.11  mrg   /* Obtain the original TEAMS and THREADS values from ARGS.  */
   3012  1.1.1.11  mrg   intptr_t orig_teams = 1, orig_threads = 0;
   3013  1.1.1.11  mrg   size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
   3014  1.1.1.11  mrg   void **tmpargs = args;
   3015  1.1.1.11  mrg   while (*tmpargs)
   3016  1.1.1.11  mrg     {
   3017  1.1.1.11  mrg       intptr_t id = (intptr_t) *tmpargs++, val;
   3018  1.1.1.11  mrg       if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
   3019  1.1.1.11  mrg 	{
   3020  1.1.1.11  mrg 	  val = (intptr_t) *tmpargs++;
   3021  1.1.1.11  mrg 	  len = 2;
   3022  1.1.1.11  mrg 	}
   3023  1.1.1.11  mrg       else
   3024  1.1.1.11  mrg 	{
   3025  1.1.1.11  mrg 	  val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
   3026  1.1.1.11  mrg 	  len = 1;
   3027  1.1.1.11  mrg 	}
   3028  1.1.1.11  mrg       num_args += len;
   3029  1.1.1.11  mrg       if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
   3030  1.1.1.11  mrg 	continue;
   3031  1.1.1.11  mrg       val = val > INT_MAX ? INT_MAX : val;
   3032  1.1.1.11  mrg       if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
   3033  1.1.1.11  mrg 	{
   3034  1.1.1.11  mrg 	  orig_teams = val;
   3035  1.1.1.11  mrg 	  teams_len = len;
   3036  1.1.1.11  mrg 	}
   3037  1.1.1.11  mrg       else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
   3038  1.1.1.11  mrg 	{
   3039  1.1.1.11  mrg 	  orig_threads = val;
   3040  1.1.1.11  mrg 	  threads_len = len;
   3041  1.1.1.11  mrg 	}
   3042  1.1.1.11  mrg     }
   3043  1.1.1.11  mrg 
   3044  1.1.1.11  mrg   intptr_t new_teams = orig_teams, new_threads = orig_threads;
   3045  1.1.1.11  mrg   /* ORIG_TEAMS == -2: No explicit teams construct specified.  Set to 1.
   3046  1.1.1.11  mrg      ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
   3047  1.1.1.11  mrg 		       value could not be determined.  No change.
   3048  1.1.1.11  mrg      ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
   3049  1.1.1.11  mrg 		      Set device-specific value.
   3050  1.1.1.11  mrg      ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
   3051  1.1.1.11  mrg 		     No change.  */
   3052  1.1.1.11  mrg   if (orig_teams == -2)
   3053  1.1.1.11  mrg     new_teams = 1;
   3054  1.1.1.11  mrg   else if (orig_teams == 0)
   3055  1.1.1.11  mrg     {
   3056  1.1.1.11  mrg       struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
   3057  1.1.1.11  mrg       if (item != NULL)
   3058  1.1.1.11  mrg 	new_teams = item->icvs.nteams;
   3059  1.1.1.11  mrg     }
   3060  1.1.1.11  mrg   /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
   3061  1.1.1.11  mrg      region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
   3062  1.1.1.11  mrg      e.g. a THREAD_LIMIT clause.  */
   3063  1.1.1.11  mrg   if (orig_teams > -2 && orig_threads == 0)
   3064  1.1.1.11  mrg     {
   3065  1.1.1.11  mrg       struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
   3066  1.1.1.11  mrg       if (item != NULL)
   3067  1.1.1.11  mrg 	new_threads = item->icvs.teams_thread_limit;
   3068  1.1.1.11  mrg     }
   3069  1.1.1.11  mrg 
   3070  1.1.1.11  mrg   /* Copy and change the arguments list only if TEAMS or THREADS need to be
   3071  1.1.1.11  mrg      updated.  */
   3072  1.1.1.11  mrg   void **new_args = args;
   3073  1.1.1.11  mrg   if (orig_teams != new_teams || orig_threads != new_threads)
   3074  1.1.1.11  mrg     {
   3075  1.1.1.11  mrg       size_t tms_len = (orig_teams == new_teams
   3076  1.1.1.11  mrg 			? teams_len
   3077  1.1.1.11  mrg 			: (new_teams > -(1 << 15) && new_teams < (1 << 15)
   3078  1.1.1.11  mrg 			   ? 1 : 2));
   3079  1.1.1.11  mrg       size_t ths_len = (orig_threads == new_threads
   3080  1.1.1.11  mrg 			? threads_len
   3081  1.1.1.11  mrg 			: (new_threads > -(1 << 15) && new_threads < (1 << 15)
   3082  1.1.1.11  mrg 			   ? 1 : 2));
   3083  1.1.1.11  mrg       /* One additional item after the last arg must be NULL.  */
   3084  1.1.1.11  mrg       size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
   3085  1.1.1.11  mrg 			    + ths_len + 1;
   3086  1.1.1.11  mrg       new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
   3087  1.1.1.11  mrg 
   3088  1.1.1.11  mrg       tmpargs = args;
   3089  1.1.1.11  mrg       void **tmp_new_args = new_args;
   3090  1.1.1.11  mrg       /* Copy all args except TEAMS and THREADS.  TEAMS and THREADS are copied
   3091  1.1.1.11  mrg 	 too if they have not been changed and skipped otherwise.  */
   3092  1.1.1.11  mrg       while (*tmpargs)
   3093  1.1.1.11  mrg 	{
   3094  1.1.1.11  mrg 	  intptr_t id = (intptr_t) *tmpargs;
   3095  1.1.1.11  mrg 	  if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
   3096  1.1.1.11  mrg 	       && orig_teams != new_teams)
   3097  1.1.1.11  mrg 	      || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
   3098  1.1.1.11  mrg 		  && orig_threads != new_threads))
   3099  1.1.1.11  mrg 	    {
   3100  1.1.1.11  mrg 	      tmpargs++;
   3101  1.1.1.11  mrg 	      if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
   3102  1.1.1.11  mrg 		tmpargs++;
   3103  1.1.1.11  mrg 	    }
   3104  1.1.1.11  mrg 	  else
   3105  1.1.1.11  mrg 	    {
   3106  1.1.1.11  mrg 	      *tmp_new_args++ = *tmpargs++;
   3107  1.1.1.11  mrg 	      if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
   3108  1.1.1.11  mrg 		*tmp_new_args++ = *tmpargs++;
   3109  1.1.1.11  mrg 	    }
   3110  1.1.1.11  mrg 	}
   3111  1.1.1.11  mrg 
   3112  1.1.1.11  mrg       /* Add the new TEAMS arg to the new args list if it has been changed.  */
   3113  1.1.1.11  mrg       if (orig_teams != new_teams)
   3114  1.1.1.11  mrg 	{
   3115  1.1.1.11  mrg 	  intptr_t new_val = new_teams;
   3116  1.1.1.11  mrg 	  if (tms_len == 1)
   3117  1.1.1.11  mrg 	    {
   3118  1.1.1.11  mrg 	      new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
   3119  1.1.1.11  mrg 			 | GOMP_TARGET_ARG_NUM_TEAMS;
   3120  1.1.1.11  mrg 	      *tmp_new_args++ = (void *) new_val;
   3121  1.1.1.11  mrg 	    }
   3122  1.1.1.11  mrg 	  else
   3123  1.1.1.11  mrg 	    {
   3124  1.1.1.11  mrg 	      *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
   3125  1.1.1.11  mrg 					  | GOMP_TARGET_ARG_NUM_TEAMS);
   3126  1.1.1.11  mrg 	      *tmp_new_args++ = (void *) new_val;
   3127  1.1.1.11  mrg 	    }
   3128  1.1.1.11  mrg 	}
   3129  1.1.1.11  mrg 
   3130  1.1.1.11  mrg       /* Add the new THREADS arg to the new args list if it has been changed. */
   3131  1.1.1.11  mrg       if (orig_threads != new_threads)
   3132  1.1.1.11  mrg 	{
   3133  1.1.1.11  mrg 	  intptr_t new_val = new_threads;
   3134  1.1.1.11  mrg 	  if (ths_len == 1)
   3135  1.1.1.11  mrg 	    {
   3136  1.1.1.11  mrg 	      new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
   3137  1.1.1.11  mrg 			 | GOMP_TARGET_ARG_THREAD_LIMIT;
   3138  1.1.1.11  mrg 	      *tmp_new_args++ = (void *) new_val;
   3139  1.1.1.11  mrg 	    }
   3140  1.1.1.11  mrg 	  else
   3141  1.1.1.11  mrg 	    {
   3142  1.1.1.11  mrg 	      *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
   3143  1.1.1.11  mrg 					  | GOMP_TARGET_ARG_THREAD_LIMIT);
   3144  1.1.1.11  mrg 	      *tmp_new_args++ = (void *) new_val;
   3145  1.1.1.11  mrg 	    }
   3146  1.1.1.11  mrg 	}
   3147  1.1.1.11  mrg 
   3148  1.1.1.11  mrg       *tmp_new_args = NULL;
   3149  1.1.1.11  mrg     }
   3150  1.1.1.11  mrg 
   3151   1.1.1.8  mrg   flags = clear_unsupported_flags (devicep, flags);
   3152   1.1.1.8  mrg 
   3153   1.1.1.2  mrg   if (flags & GOMP_TARGET_FLAG_NOWAIT)
   3154       1.1  mrg     {
   3155   1.1.1.2  mrg       struct gomp_thread *thr = gomp_thread ();
   3156   1.1.1.2  mrg       /* Create a team if we don't have any around, as nowait
   3157   1.1.1.2  mrg 	 target tasks make sense to run asynchronously even when
   3158   1.1.1.2  mrg 	 outside of any parallel.  */
   3159   1.1.1.2  mrg       if (__builtin_expect (thr->ts.team == NULL, 0))
   3160   1.1.1.2  mrg 	{
   3161   1.1.1.2  mrg 	  struct gomp_team *team = gomp_new_team (1);
   3162   1.1.1.2  mrg 	  struct gomp_task *task = thr->task;
   3163   1.1.1.9  mrg 	  struct gomp_task **implicit_task = &task;
   3164   1.1.1.2  mrg 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
   3165   1.1.1.2  mrg 	  team->prev_ts = thr->ts;
   3166   1.1.1.2  mrg 	  thr->ts.team = team;
   3167   1.1.1.2  mrg 	  thr->ts.team_id = 0;
   3168   1.1.1.2  mrg 	  thr->ts.work_share = &team->work_shares[0];
   3169   1.1.1.2  mrg 	  thr->ts.last_work_share = NULL;
   3170   1.1.1.2  mrg #ifdef HAVE_SYNC_BUILTINS
   3171   1.1.1.2  mrg 	  thr->ts.single_count = 0;
   3172   1.1.1.2  mrg #endif
   3173   1.1.1.2  mrg 	  thr->ts.static_trip = 0;
   3174   1.1.1.2  mrg 	  thr->task = &team->implicit_task[0];
   3175   1.1.1.2  mrg 	  gomp_init_task (thr->task, NULL, icv);
   3176   1.1.1.9  mrg 	  while (*implicit_task
   3177   1.1.1.9  mrg 		 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
   3178   1.1.1.9  mrg 	    implicit_task = &(*implicit_task)->parent;
   3179   1.1.1.9  mrg 	  if (*implicit_task)
   3180   1.1.1.2  mrg 	    {
   3181   1.1.1.9  mrg 	      thr->task = *implicit_task;
   3182   1.1.1.2  mrg 	      gomp_end_task ();
   3183   1.1.1.9  mrg 	      free (*implicit_task);
   3184   1.1.1.2  mrg 	      thr->task = &team->implicit_task[0];
   3185   1.1.1.2  mrg 	    }
   3186   1.1.1.2  mrg 	  else
   3187   1.1.1.2  mrg 	    pthread_setspecific (gomp_thread_destructor, thr);
   3188   1.1.1.9  mrg 	  if (implicit_task != &task)
   3189   1.1.1.9  mrg 	    {
   3190   1.1.1.9  mrg 	      *implicit_task = thr->task;
   3191   1.1.1.9  mrg 	      thr->task = task;
   3192   1.1.1.9  mrg 	    }
   3193   1.1.1.2  mrg 	}
   3194   1.1.1.2  mrg       if (thr->ts.team
   3195   1.1.1.2  mrg 	  && !thr->task->final_task)
   3196   1.1.1.2  mrg 	{
   3197   1.1.1.2  mrg 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
   3198  1.1.1.11  mrg 				   sizes, kinds, flags, depend, new_args,
   3199   1.1.1.2  mrg 				   GOMP_TARGET_TASK_BEFORE_MAP);
   3200   1.1.1.2  mrg 	  return;
   3201   1.1.1.2  mrg 	}
   3202       1.1  mrg     }
   3203       1.1  mrg 
   3204   1.1.1.2  mrg   /* If there are depend clauses, but nowait is not present
   3205   1.1.1.2  mrg      (or we are in a final task), block the parent task until the
   3206   1.1.1.2  mrg      dependencies are resolved and then just continue with the rest
   3207   1.1.1.2  mrg      of the function as if it is a merged task.  */
   3208   1.1.1.2  mrg   if (depend != NULL)
   3209   1.1.1.2  mrg     {
   3210   1.1.1.2  mrg       struct gomp_thread *thr = gomp_thread ();
   3211   1.1.1.2  mrg       if (thr->task && thr->task->depend_hash)
   3212   1.1.1.2  mrg 	{
   3213   1.1.1.2  mrg 	  /* If we might need to wait, copy firstprivate now.  */
   3214   1.1.1.2  mrg 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
   3215   1.1.1.2  mrg 					       &tgt_align, &tgt_size);
   3216   1.1.1.2  mrg 	  if (tgt_align)
   3217   1.1.1.2  mrg 	    {
   3218   1.1.1.2  mrg 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   3219   1.1.1.2  mrg 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
   3220   1.1.1.2  mrg 				      tgt_align, tgt_size);
   3221   1.1.1.2  mrg 	    }
   3222   1.1.1.2  mrg 	  fpc_done = true;
   3223   1.1.1.2  mrg 	  gomp_task_maybe_wait_for_dependencies (depend);
   3224   1.1.1.2  mrg 	}
   3225   1.1.1.2  mrg     }
   3226       1.1  mrg 
   3227       1.1  mrg   void *fn_addr;
   3228   1.1.1.2  mrg   if (devicep == NULL
   3229   1.1.1.2  mrg       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   3230   1.1.1.2  mrg       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
   3231   1.1.1.2  mrg       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
   3232       1.1  mrg     {
   3233   1.1.1.2  mrg       if (!fpc_done)
   3234       1.1  mrg 	{
   3235   1.1.1.2  mrg 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
   3236   1.1.1.2  mrg 					       &tgt_align, &tgt_size);
   3237   1.1.1.2  mrg 	  if (tgt_align)
   3238   1.1.1.2  mrg 	    {
   3239   1.1.1.2  mrg 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   3240   1.1.1.2  mrg 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
   3241   1.1.1.2  mrg 				      tgt_align, tgt_size);
   3242   1.1.1.2  mrg 	    }
   3243       1.1  mrg 	}
   3244  1.1.1.11  mrg       gomp_target_fallback (fn, hostaddrs, devicep, new_args);
   3245   1.1.1.2  mrg       return;
   3246   1.1.1.2  mrg     }
   3247       1.1  mrg 
   3248   1.1.1.2  mrg   struct target_mem_desc *tgt_vars;
   3249  1.1.1.10  mrg   htab_t refcount_set = NULL;
   3250  1.1.1.10  mrg 
   3251   1.1.1.2  mrg   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   3252   1.1.1.2  mrg     {
   3253   1.1.1.2  mrg       if (!fpc_done)
   3254   1.1.1.2  mrg 	{
   3255   1.1.1.2  mrg 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
   3256   1.1.1.2  mrg 					       &tgt_align, &tgt_size);
   3257   1.1.1.2  mrg 	  if (tgt_align)
   3258   1.1.1.2  mrg 	    {
   3259   1.1.1.2  mrg 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   3260   1.1.1.2  mrg 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
   3261   1.1.1.2  mrg 				      tgt_align, tgt_size);
   3262   1.1.1.2  mrg 	    }
   3263   1.1.1.2  mrg 	}
   3264   1.1.1.2  mrg       tgt_vars = NULL;
   3265       1.1  mrg     }
   3266   1.1.1.2  mrg   else
   3267  1.1.1.10  mrg     {
   3268  1.1.1.10  mrg       refcount_set = htab_create (mapnum);
   3269  1.1.1.10  mrg       tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
   3270  1.1.1.10  mrg 				true, &refcount_set, GOMP_MAP_VARS_TARGET);
   3271  1.1.1.10  mrg     }
   3272   1.1.1.2  mrg   devicep->run_func (devicep->target_id, fn_addr,
   3273   1.1.1.2  mrg 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
   3274  1.1.1.11  mrg 		     new_args);
   3275   1.1.1.2  mrg   if (tgt_vars)
   3276  1.1.1.10  mrg     {
   3277  1.1.1.10  mrg       htab_clear (refcount_set);
   3278  1.1.1.10  mrg       gomp_unmap_vars (tgt_vars, true, &refcount_set);
   3279  1.1.1.10  mrg     }
   3280  1.1.1.10  mrg   if (refcount_set)
   3281  1.1.1.10  mrg     htab_free (refcount_set);
   3282  1.1.1.11  mrg 
   3283  1.1.1.11  mrg   /* Copy back ICVs from device to host.
   3284  1.1.1.11  mrg      HOST_PTR is expected to exist since it was added in
   3285  1.1.1.11  mrg      gomp_load_image_to_device if not already available.  */
   3286  1.1.1.11  mrg   gomp_copy_back_icvs (devicep, device);
   3287  1.1.1.11  mrg 
   3288   1.1.1.2  mrg }
   3289       1.1  mrg 
   3290   1.1.1.2  mrg 
   3291  1.1.1.11  mrg /* Reverse lookup (device addr -> host addr) for reverse offload.  We avoid
   3292  1.1.1.11  mrg    keeping track of all variable handling - assuming that reverse offload occurs
   3293  1.1.1.11  mrg    ony very rarely.  Downside is that the reverse search is slow.  */
   3294  1.1.1.11  mrg 
   3295  1.1.1.11  mrg struct gomp_splay_tree_rev_lookup_data {
   3296  1.1.1.11  mrg   uintptr_t tgt_start;
   3297  1.1.1.11  mrg   uintptr_t tgt_end;
   3298  1.1.1.11  mrg   splay_tree_key key;
   3299  1.1.1.11  mrg };
   3300  1.1.1.11  mrg 
   3301  1.1.1.11  mrg static int
   3302  1.1.1.11  mrg gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
   3303   1.1.1.2  mrg {
   3304  1.1.1.11  mrg   struct gomp_splay_tree_rev_lookup_data *data;
   3305  1.1.1.11  mrg   data = (struct gomp_splay_tree_rev_lookup_data *)d;
   3306  1.1.1.11  mrg   uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
   3307  1.1.1.10  mrg 
   3308  1.1.1.11  mrg   if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
   3309  1.1.1.11  mrg     return 0;
   3310  1.1.1.10  mrg 
   3311  1.1.1.11  mrg   size_t j;
   3312  1.1.1.11  mrg   for (j = 0; j < key->tgt->list_count; j++)
   3313  1.1.1.11  mrg     if (key->tgt->list[j].key == key)
   3314  1.1.1.11  mrg       break;
   3315  1.1.1.11  mrg   assert (j < key->tgt->list_count);
   3316  1.1.1.11  mrg   uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
   3317  1.1.1.11  mrg 
   3318  1.1.1.11  mrg   if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
   3319  1.1.1.11  mrg       || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
   3320       1.1  mrg     {
   3321  1.1.1.11  mrg       data->key = key;
   3322  1.1.1.11  mrg       return 1;
   3323       1.1  mrg     }
   3324  1.1.1.11  mrg   return 0;
   3325       1.1  mrg }
   3326       1.1  mrg 
   3327  1.1.1.11  mrg static inline splay_tree_key
   3328  1.1.1.11  mrg gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
   3329  1.1.1.11  mrg 		     bool zero_len)
   3330       1.1  mrg {
   3331  1.1.1.11  mrg   struct gomp_splay_tree_rev_lookup_data data;
   3332  1.1.1.11  mrg   data.key = NULL;
   3333  1.1.1.11  mrg   data.tgt_start = tgt_start;
   3334  1.1.1.11  mrg   data.tgt_end = tgt_end;
   3335       1.1  mrg 
   3336  1.1.1.11  mrg   if (tgt_start != tgt_end)
   3337  1.1.1.11  mrg     {
   3338  1.1.1.11  mrg       splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
   3339  1.1.1.11  mrg       return data.key;
   3340  1.1.1.11  mrg     }
   3341       1.1  mrg 
   3342  1.1.1.11  mrg   data.tgt_end++;
   3343  1.1.1.11  mrg   splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
   3344  1.1.1.11  mrg   if (data.key != NULL || zero_len)
   3345  1.1.1.11  mrg     return data.key;
   3346  1.1.1.11  mrg   data.tgt_end--;
   3347  1.1.1.11  mrg 
   3348  1.1.1.11  mrg   data.tgt_start--;
   3349  1.1.1.11  mrg   splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
   3350  1.1.1.11  mrg   return data.key;
   3351   1.1.1.2  mrg }
   3352   1.1.1.2  mrg 
   3353  1.1.1.11  mrg struct cpy_data
   3354   1.1.1.2  mrg {
   3355  1.1.1.11  mrg   uint64_t devaddr;
   3356  1.1.1.11  mrg   bool present, aligned;
   3357  1.1.1.11  mrg };
   3358   1.1.1.2  mrg 
   3359   1.1.1.2  mrg 
   3360  1.1.1.11  mrg /* Search just mapped reverse-offload data; returns index if found,
   3361  1.1.1.11  mrg    otherwise >= n.  */
   3362  1.1.1.11  mrg 
   3363  1.1.1.11  mrg static inline int
   3364  1.1.1.11  mrg gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
   3365  1.1.1.11  mrg 			   unsigned short *kinds, uint64_t *sizes, size_t n,
   3366  1.1.1.11  mrg 			   uint64_t tgt_start, uint64_t tgt_end)
   3367  1.1.1.11  mrg {
   3368  1.1.1.11  mrg   const bool short_mapkind = true;
   3369  1.1.1.11  mrg   const int typemask = short_mapkind ? 0xff : 0x7;
   3370  1.1.1.11  mrg   size_t i;
   3371  1.1.1.11  mrg   for (i = 0; i < n; i++)
   3372  1.1.1.11  mrg     {
   3373  1.1.1.11  mrg       bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
   3374  1.1.1.11  mrg 			== GOMP_MAP_STRUCT);
   3375  1.1.1.11  mrg       uint64_t dev_end;
   3376  1.1.1.11  mrg       if (!is_struct)
   3377  1.1.1.11  mrg 	dev_end = d[i].devaddr + sizes[i];
   3378  1.1.1.11  mrg       else
   3379  1.1.1.11  mrg 	{
   3380  1.1.1.11  mrg 	  if (i + sizes[i] < n)
   3381  1.1.1.11  mrg 	    dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
   3382  1.1.1.11  mrg 	  else
   3383  1.1.1.11  mrg 	    dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
   3384  1.1.1.11  mrg 	}
   3385  1.1.1.11  mrg       if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
   3386  1.1.1.11  mrg 	  || (dev_end > tgt_start && d[i].devaddr < tgt_end))
   3387  1.1.1.11  mrg 	break;
   3388  1.1.1.11  mrg       if (is_struct)
   3389  1.1.1.11  mrg 	i += sizes[i];
   3390  1.1.1.11  mrg     }
   3391  1.1.1.11  mrg   return i;
   3392  1.1.1.11  mrg }
   3393  1.1.1.11  mrg 
   3394  1.1.1.11  mrg static inline int
   3395  1.1.1.11  mrg gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
   3396  1.1.1.11  mrg 		       unsigned short *kinds, uint64_t *sizes,
   3397  1.1.1.11  mrg 		       size_t n, uint64_t tgt_start, uint64_t tgt_end,
   3398  1.1.1.11  mrg 		       bool zero_len)
   3399  1.1.1.11  mrg {
   3400  1.1.1.11  mrg   size_t i;
   3401  1.1.1.11  mrg   if (tgt_start != tgt_end)
   3402  1.1.1.11  mrg     return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
   3403  1.1.1.11  mrg 				      tgt_start, tgt_end);
   3404  1.1.1.11  mrg   tgt_end++;
   3405  1.1.1.11  mrg   i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
   3406  1.1.1.11  mrg 				 tgt_start, tgt_end);
   3407  1.1.1.11  mrg   if (i < n || zero_len)
   3408  1.1.1.11  mrg     return i;
   3409  1.1.1.11  mrg   tgt_end--;
   3410  1.1.1.11  mrg 
   3411  1.1.1.11  mrg   tgt_start--;
   3412  1.1.1.11  mrg   return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
   3413  1.1.1.11  mrg 				    tgt_start, tgt_end);
   3414  1.1.1.11  mrg }
   3415  1.1.1.11  mrg 
   3416  1.1.1.11  mrg /* Handle reverse offload.  This is called by the device plugins for a
   3417  1.1.1.11  mrg    reverse offload; it is not called if the outer target runs on the host.
   3418  1.1.1.11  mrg    The mapping is simplified device-affecting constructs (except for target
   3419  1.1.1.11  mrg    with device(ancestor:1)) must not be encountered; in particular not
   3420  1.1.1.11  mrg    target (enter/exit) data.  */
   3421  1.1.1.11  mrg 
   3422  1.1.1.11  mrg void
   3423  1.1.1.11  mrg gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
   3424  1.1.1.11  mrg 		 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
   3425  1.1.1.11  mrg 		 struct goacc_asyncqueue *aq)
   3426  1.1.1.11  mrg {
   3427  1.1.1.11  mrg   /* Return early if there is no offload code.  */
   3428  1.1.1.11  mrg   if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
   3429  1.1.1.11  mrg     return;
   3430  1.1.1.11  mrg   /* Currently, this fails because of calculate_firstprivate_requirements
   3431  1.1.1.11  mrg      below; it could be fixed but additional code needs to be updated to
   3432  1.1.1.11  mrg      handle 32bit hosts - thus, it is not worthwhile.  */
   3433  1.1.1.11  mrg   if (sizeof (void *) != sizeof (uint64_t))
   3434  1.1.1.11  mrg     gomp_fatal ("Reverse offload of 32bit hosts not supported.");
   3435  1.1.1.11  mrg 
   3436  1.1.1.11  mrg   struct cpy_data *cdata = NULL;
   3437  1.1.1.11  mrg   uint64_t *devaddrs;
   3438  1.1.1.11  mrg   uint64_t *sizes;
   3439  1.1.1.11  mrg   unsigned short *kinds;
   3440  1.1.1.11  mrg   const bool short_mapkind = true;
   3441  1.1.1.11  mrg   const int typemask = short_mapkind ? 0xff : 0x7;
   3442  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (dev_num, false);
   3443  1.1.1.11  mrg 
   3444  1.1.1.11  mrg   reverse_splay_tree_key n;
   3445  1.1.1.11  mrg   struct reverse_splay_tree_key_s k;
   3446  1.1.1.11  mrg   k.dev = fn_ptr;
   3447  1.1.1.11  mrg 
   3448  1.1.1.11  mrg   gomp_mutex_lock (&devicep->lock);
   3449  1.1.1.11  mrg   n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
   3450  1.1.1.11  mrg   gomp_mutex_unlock (&devicep->lock);
   3451  1.1.1.11  mrg 
   3452  1.1.1.11  mrg   if (n == NULL)
   3453  1.1.1.11  mrg     gomp_fatal ("Cannot find reverse-offload function");
   3454  1.1.1.11  mrg   void (*host_fn) (void *) = (void (*) (void *)) n->k->host_start;
   3455  1.1.1.11  mrg 
   3456  1.1.1.11  mrg   if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
   3457  1.1.1.11  mrg     {
   3458  1.1.1.11  mrg       devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
   3459  1.1.1.11  mrg       sizes = (uint64_t *) (uintptr_t) sizes_ptr;
   3460  1.1.1.11  mrg       kinds = (unsigned short *) (uintptr_t) kinds_ptr;
   3461  1.1.1.11  mrg     }
   3462  1.1.1.11  mrg   else
   3463  1.1.1.11  mrg     {
   3464  1.1.1.11  mrg       devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
   3465  1.1.1.11  mrg       sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
   3466  1.1.1.11  mrg       kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
   3467  1.1.1.11  mrg       gomp_copy_dev2host (devicep, aq, devaddrs,
   3468  1.1.1.11  mrg 			  (const void *) (uintptr_t) devaddrs_ptr,
   3469  1.1.1.11  mrg 			  mapnum * sizeof (uint64_t));
   3470  1.1.1.11  mrg       gomp_copy_dev2host (devicep, aq, sizes,
   3471  1.1.1.11  mrg 			  (const void *) (uintptr_t) sizes_ptr,
   3472  1.1.1.11  mrg 			  mapnum * sizeof (uint64_t));
   3473  1.1.1.11  mrg       gomp_copy_dev2host (devicep, aq, kinds,
   3474  1.1.1.11  mrg 			  (const void *) (uintptr_t) kinds_ptr,
   3475  1.1.1.11  mrg 			  mapnum * sizeof (unsigned short));
   3476  1.1.1.11  mrg       if (aq && !devicep->openacc.async.synchronize_func (aq))
   3477  1.1.1.11  mrg 	exit (EXIT_FAILURE);
   3478  1.1.1.11  mrg     }
   3479  1.1.1.11  mrg 
   3480  1.1.1.11  mrg   size_t tgt_align = 0, tgt_size = 0;
   3481  1.1.1.11  mrg 
   3482  1.1.1.11  mrg   /* If actually executed on 32bit systems, the casts lead to wrong code;
   3483  1.1.1.11  mrg      but 32bit with offloading is not supported; see top of this function.  */
   3484  1.1.1.11  mrg   calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
   3485  1.1.1.11  mrg 				       (void *) (uintptr_t) kinds,
   3486  1.1.1.11  mrg 				       &tgt_align, &tgt_size);
   3487  1.1.1.11  mrg 
   3488  1.1.1.11  mrg   if (tgt_align)
   3489  1.1.1.11  mrg     {
   3490  1.1.1.11  mrg       char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
   3491  1.1.1.11  mrg       uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
   3492  1.1.1.11  mrg       if (al)
   3493  1.1.1.11  mrg 	tgt += tgt_align - al;
   3494  1.1.1.11  mrg       tgt_size = 0;
   3495  1.1.1.11  mrg       for (uint64_t i = 0; i < mapnum; i++)
   3496  1.1.1.11  mrg 	if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
   3497  1.1.1.11  mrg 	    && devaddrs[i] != 0)
   3498  1.1.1.11  mrg 	  {
   3499  1.1.1.11  mrg 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
   3500  1.1.1.11  mrg 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
   3501  1.1.1.11  mrg 	    if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   3502  1.1.1.11  mrg 	      memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
   3503  1.1.1.11  mrg 		      (size_t) sizes[i]);
   3504  1.1.1.11  mrg 	    else
   3505  1.1.1.11  mrg 	      {
   3506  1.1.1.11  mrg 		gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
   3507  1.1.1.11  mrg 				    (void *) (uintptr_t) devaddrs[i],
   3508  1.1.1.11  mrg 				    (size_t) sizes[i]);
   3509  1.1.1.11  mrg 		if (aq && !devicep->openacc.async.synchronize_func (aq))
   3510  1.1.1.11  mrg 		  exit (EXIT_FAILURE);
   3511  1.1.1.11  mrg 	      }
   3512  1.1.1.11  mrg 	    devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
   3513  1.1.1.11  mrg 	    tgt_size = tgt_size + sizes[i];
   3514  1.1.1.11  mrg 	    if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   3515  1.1.1.11  mrg 		&& i + 1 < mapnum
   3516  1.1.1.11  mrg 		&& ((get_kind (short_mapkind, kinds, i) & typemask)
   3517  1.1.1.11  mrg 		    == GOMP_MAP_ATTACH))
   3518  1.1.1.11  mrg 	      {
   3519  1.1.1.11  mrg 		*(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
   3520  1.1.1.11  mrg 		  = (uint64_t) devaddrs[i];
   3521  1.1.1.11  mrg 		++i;
   3522  1.1.1.11  mrg 	      }
   3523  1.1.1.11  mrg 	  }
   3524  1.1.1.11  mrg     }
   3525  1.1.1.11  mrg 
   3526  1.1.1.11  mrg   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
   3527  1.1.1.11  mrg     {
   3528  1.1.1.11  mrg       size_t j, struct_cpy = 0;
   3529  1.1.1.11  mrg       splay_tree_key n2;
   3530  1.1.1.11  mrg       cdata = gomp_alloca (sizeof (*cdata) * mapnum);
   3531  1.1.1.11  mrg       memset (cdata, '\0', sizeof (*cdata) * mapnum);
   3532  1.1.1.11  mrg       gomp_mutex_lock (&devicep->lock);
   3533  1.1.1.11  mrg       for (uint64_t i = 0; i < mapnum; i++)
   3534  1.1.1.11  mrg 	{
   3535  1.1.1.11  mrg 	  if (devaddrs[i] == 0)
   3536  1.1.1.11  mrg 	    continue;
   3537  1.1.1.11  mrg 	  n = NULL;
   3538  1.1.1.11  mrg 	  int kind = get_kind (short_mapkind, kinds, i) & typemask;
   3539  1.1.1.11  mrg 	  switch (kind)
   3540  1.1.1.11  mrg 	    {
   3541  1.1.1.11  mrg 	      case GOMP_MAP_FIRSTPRIVATE:
   3542  1.1.1.11  mrg 	      case GOMP_MAP_FIRSTPRIVATE_INT:
   3543  1.1.1.11  mrg 		continue;
   3544  1.1.1.11  mrg 
   3545  1.1.1.11  mrg 	      case GOMP_MAP_DELETE:
   3546  1.1.1.11  mrg 	      case GOMP_MAP_RELEASE:
   3547  1.1.1.11  mrg 	      case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
   3548  1.1.1.11  mrg 		/* Assume it is present; look it up - but ignore unless the
   3549  1.1.1.11  mrg 		   present clause is there. */
   3550  1.1.1.11  mrg 	      case GOMP_MAP_ALLOC:
   3551  1.1.1.11  mrg 	      case GOMP_MAP_FROM:
   3552  1.1.1.11  mrg 	      case GOMP_MAP_FORCE_ALLOC:
   3553  1.1.1.11  mrg 	      case GOMP_MAP_FORCE_FROM:
   3554  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_FROM:
   3555  1.1.1.11  mrg 	      case GOMP_MAP_TO:
   3556  1.1.1.11  mrg 	      case GOMP_MAP_TOFROM:
   3557  1.1.1.11  mrg 	      case GOMP_MAP_FORCE_TO:
   3558  1.1.1.11  mrg 	      case GOMP_MAP_FORCE_TOFROM:
   3559  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_TO:
   3560  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_TOFROM:
   3561  1.1.1.11  mrg 	      case GOMP_MAP_FORCE_PRESENT:
   3562  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_PRESENT_FROM:
   3563  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_PRESENT_TO:
   3564  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
   3565  1.1.1.11  mrg 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
   3566  1.1.1.11  mrg 		cdata[i].devaddr = devaddrs[i];
   3567  1.1.1.11  mrg 		bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
   3568  1.1.1.11  mrg 				 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
   3569  1.1.1.11  mrg 		j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
   3570  1.1.1.11  mrg 					   devaddrs[i],
   3571  1.1.1.11  mrg 					   devaddrs[i] + sizes[i], zero_len);
   3572  1.1.1.11  mrg 		if (j < i)
   3573  1.1.1.11  mrg 		  {
   3574  1.1.1.11  mrg 		    n2 = NULL;
   3575  1.1.1.11  mrg 		    cdata[i].present = true;
   3576  1.1.1.11  mrg 		    devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
   3577  1.1.1.11  mrg 		  }
   3578  1.1.1.11  mrg 		else
   3579  1.1.1.11  mrg 		  {
   3580  1.1.1.11  mrg 		    n2 = gomp_map_rev_lookup (&devicep->mem_map,
   3581  1.1.1.11  mrg 					      devaddrs[i],
   3582  1.1.1.11  mrg 					      devaddrs[i] + sizes[i], zero_len);
   3583  1.1.1.11  mrg 		    cdata[i].present = n2 != NULL;
   3584  1.1.1.11  mrg 		  }
   3585  1.1.1.11  mrg 		if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
   3586  1.1.1.11  mrg 		  {
   3587  1.1.1.11  mrg 		    gomp_mutex_unlock (&devicep->lock);
   3588  1.1.1.11  mrg #ifdef HAVE_INTTYPES_H
   3589  1.1.1.11  mrg 		    gomp_fatal ("present clause: no corresponding data on "
   3590  1.1.1.11  mrg 				"parent device at %p with size %"PRIu64,
   3591  1.1.1.11  mrg 				(void *) (uintptr_t) devaddrs[i],
   3592  1.1.1.11  mrg 				(uint64_t) sizes[i]);
   3593  1.1.1.11  mrg #else
   3594  1.1.1.11  mrg 		    gomp_fatal ("present clause: no corresponding data on "
   3595  1.1.1.11  mrg 				"parent device at %p with size %lu",
   3596  1.1.1.11  mrg 				(void *) (uintptr_t) devaddrs[i],
   3597  1.1.1.11  mrg 				(unsigned long) sizes[i]);
   3598  1.1.1.11  mrg #endif
   3599  1.1.1.11  mrg 		    break;
   3600  1.1.1.11  mrg 		  }
   3601  1.1.1.11  mrg 		else if (!cdata[i].present
   3602  1.1.1.11  mrg 		    && kind != GOMP_MAP_DELETE
   3603  1.1.1.11  mrg 		    && kind != GOMP_MAP_RELEASE
   3604  1.1.1.11  mrg 		    && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
   3605  1.1.1.11  mrg 		  {
   3606  1.1.1.11  mrg 		    cdata[i].aligned = true;
   3607  1.1.1.11  mrg 		    size_t align = (size_t) 1 << (kinds[i] >> 8);
   3608  1.1.1.11  mrg 		    devaddrs[i]
   3609  1.1.1.11  mrg 		      = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
   3610  1.1.1.11  mrg 								   sizes[i]);
   3611  1.1.1.11  mrg 		  }
   3612  1.1.1.11  mrg 		else if (n2 != NULL)
   3613  1.1.1.11  mrg 		  devaddrs[i] = (n2->host_start + cdata[i].devaddr
   3614  1.1.1.11  mrg 				 - (n2->tgt->tgt_start + n2->tgt_offset));
   3615  1.1.1.11  mrg 		if (((!cdata[i].present || struct_cpy)
   3616  1.1.1.11  mrg 		     && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
   3617  1.1.1.11  mrg 		    || kind == GOMP_MAP_FORCE_TO
   3618  1.1.1.11  mrg 		    || kind == GOMP_MAP_FORCE_TOFROM
   3619  1.1.1.11  mrg 		    || GOMP_MAP_ALWAYS_TO_P (kind))
   3620  1.1.1.11  mrg 		  {
   3621  1.1.1.11  mrg 		    gomp_copy_dev2host (devicep, aq,
   3622  1.1.1.11  mrg 					(void *) (uintptr_t) devaddrs[i],
   3623  1.1.1.11  mrg 					(void *) (uintptr_t) cdata[i].devaddr,
   3624  1.1.1.11  mrg 					sizes[i]);
   3625  1.1.1.11  mrg 		    if (aq && !devicep->openacc.async.synchronize_func (aq))
   3626  1.1.1.11  mrg 		      {
   3627  1.1.1.11  mrg 			gomp_mutex_unlock (&devicep->lock);
   3628  1.1.1.11  mrg 			exit (EXIT_FAILURE);
   3629  1.1.1.11  mrg 		      }
   3630  1.1.1.11  mrg 		  }
   3631  1.1.1.11  mrg 		if (struct_cpy)
   3632  1.1.1.11  mrg 		  struct_cpy--;
   3633  1.1.1.11  mrg 		break;
   3634  1.1.1.11  mrg 	      case GOMP_MAP_ATTACH:
   3635  1.1.1.11  mrg 	      case GOMP_MAP_POINTER:
   3636  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_POINTER:
   3637  1.1.1.11  mrg 		n2 = gomp_map_rev_lookup (&devicep->mem_map,
   3638  1.1.1.11  mrg 					  devaddrs[i] + sizes[i],
   3639  1.1.1.11  mrg 					  devaddrs[i] + sizes[i]
   3640  1.1.1.11  mrg 					  + sizeof (void*), false);
   3641  1.1.1.11  mrg 		cdata[i].present = n2 != NULL;
   3642  1.1.1.11  mrg 		cdata[i].devaddr = devaddrs[i];
   3643  1.1.1.11  mrg 		if (n2)
   3644  1.1.1.11  mrg 		  devaddrs[i] = (n2->host_start + cdata[i].devaddr
   3645  1.1.1.11  mrg 				 - (n2->tgt->tgt_start + n2->tgt_offset));
   3646  1.1.1.11  mrg 		else
   3647  1.1.1.11  mrg 		  {
   3648  1.1.1.11  mrg 		    j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
   3649  1.1.1.11  mrg 					       devaddrs[i] + sizes[i],
   3650  1.1.1.11  mrg 					       devaddrs[i] + sizes[i]
   3651  1.1.1.11  mrg 					       + sizeof (void*), false);
   3652  1.1.1.11  mrg 		    if (j < i)
   3653  1.1.1.11  mrg 		      {
   3654  1.1.1.11  mrg 			cdata[i].present = true;
   3655  1.1.1.11  mrg 			devaddrs[i] = (devaddrs[j] + devaddrs[i]
   3656  1.1.1.11  mrg 				       - cdata[j].devaddr);
   3657  1.1.1.11  mrg 		      }
   3658  1.1.1.11  mrg 		  }
   3659  1.1.1.11  mrg 		if (!cdata[i].present)
   3660  1.1.1.11  mrg 		  devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
   3661  1.1.1.11  mrg 		/* Assume that when present, the pointer is already correct.  */
   3662  1.1.1.11  mrg 		if (!n2)
   3663  1.1.1.11  mrg 		  *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
   3664  1.1.1.11  mrg 		    = devaddrs[i-1];
   3665  1.1.1.11  mrg 		break;
   3666  1.1.1.11  mrg 	      case GOMP_MAP_TO_PSET:
   3667  1.1.1.11  mrg 		/* Assume that when present, the pointers are fine and no 'to:'
   3668  1.1.1.11  mrg 		   is required.  */
   3669  1.1.1.11  mrg 		n2 = gomp_map_rev_lookup (&devicep->mem_map,
   3670  1.1.1.11  mrg 					  devaddrs[i], devaddrs[i] + sizes[i],
   3671  1.1.1.11  mrg 					  false);
   3672  1.1.1.11  mrg 		cdata[i].present = n2 != NULL;
   3673  1.1.1.11  mrg 		cdata[i].devaddr = devaddrs[i];
   3674  1.1.1.11  mrg 		if (n2)
   3675  1.1.1.11  mrg 		  devaddrs[i] = (n2->host_start + cdata[i].devaddr
   3676  1.1.1.11  mrg 				 - (n2->tgt->tgt_start + n2->tgt_offset));
   3677  1.1.1.11  mrg 		else
   3678  1.1.1.11  mrg 		  {
   3679  1.1.1.11  mrg 		    j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
   3680  1.1.1.11  mrg 					       devaddrs[i],
   3681  1.1.1.11  mrg 					       devaddrs[i] + sizes[i], false);
   3682  1.1.1.11  mrg 		    if (j < i)
   3683  1.1.1.11  mrg 		      {
   3684  1.1.1.11  mrg 			cdata[i].present = true;
   3685  1.1.1.11  mrg 			devaddrs[i] = (devaddrs[j] + devaddrs[i]
   3686  1.1.1.11  mrg 				       - cdata[j].devaddr);
   3687  1.1.1.11  mrg 		      }
   3688  1.1.1.11  mrg 		  }
   3689  1.1.1.11  mrg 		if (!cdata[i].present)
   3690  1.1.1.11  mrg 		  {
   3691  1.1.1.11  mrg 		    cdata[i].aligned = true;
   3692  1.1.1.11  mrg 		    size_t align = (size_t) 1 << (kinds[i] >> 8);
   3693  1.1.1.11  mrg 		    devaddrs[i]
   3694  1.1.1.11  mrg 		      = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
   3695  1.1.1.11  mrg 								   sizes[i]);
   3696  1.1.1.11  mrg 		    gomp_copy_dev2host (devicep, aq,
   3697  1.1.1.11  mrg 					(void *) (uintptr_t) devaddrs[i],
   3698  1.1.1.11  mrg 					(void *) (uintptr_t) cdata[i].devaddr,
   3699  1.1.1.11  mrg 					sizes[i]);
   3700  1.1.1.11  mrg 		    if (aq && !devicep->openacc.async.synchronize_func (aq))
   3701  1.1.1.11  mrg 		      {
   3702  1.1.1.11  mrg 			gomp_mutex_unlock (&devicep->lock);
   3703  1.1.1.11  mrg 			exit (EXIT_FAILURE);
   3704  1.1.1.11  mrg 		      }
   3705  1.1.1.11  mrg 		  }
   3706  1.1.1.11  mrg 		for (j = i + 1; j < mapnum; j++)
   3707  1.1.1.11  mrg 		  {
   3708  1.1.1.11  mrg 		    kind = get_kind (short_mapkind, kinds, j) & typemask;
   3709  1.1.1.11  mrg 		    if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
   3710  1.1.1.11  mrg 			&& !GOMP_MAP_POINTER_P (kind))
   3711  1.1.1.11  mrg 		      break;
   3712  1.1.1.11  mrg 		    if (devaddrs[j] < devaddrs[i])
   3713  1.1.1.11  mrg 		      break;
   3714  1.1.1.11  mrg 		    if (cdata[i].present)
   3715  1.1.1.11  mrg 		      continue;
   3716  1.1.1.11  mrg 		    if (devaddrs[j] == 0)
   3717  1.1.1.11  mrg 		      {
   3718  1.1.1.11  mrg 			*(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
   3719  1.1.1.11  mrg 			continue;
   3720  1.1.1.11  mrg 		      }
   3721  1.1.1.11  mrg 		    int k;
   3722  1.1.1.11  mrg 		    n2 = NULL;
   3723  1.1.1.11  mrg 		    /* Dereference devaddrs[j] to get the device addr.  */
   3724  1.1.1.11  mrg 		    assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
   3725  1.1.1.11  mrg 		    devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
   3726  1.1.1.11  mrg 							     + sizes[j]);
   3727  1.1.1.11  mrg 		    cdata[j].present = true;
   3728  1.1.1.11  mrg 		    cdata[j].devaddr = devaddrs[j];
   3729  1.1.1.11  mrg 		    if (devaddrs[j] == 0)
   3730  1.1.1.11  mrg 		      continue;
   3731  1.1.1.11  mrg 		    k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
   3732  1.1.1.11  mrg 					       devaddrs[j],
   3733  1.1.1.11  mrg 					       devaddrs[j] + sizeof (void*),
   3734  1.1.1.11  mrg 					       false);
   3735  1.1.1.11  mrg 		    if (k < j)
   3736  1.1.1.11  mrg 		      devaddrs[j] = (devaddrs[k] + devaddrs[j]
   3737  1.1.1.11  mrg 				     - cdata[k].devaddr);
   3738  1.1.1.11  mrg 		    else
   3739  1.1.1.11  mrg 		      {
   3740  1.1.1.11  mrg 			n2 = gomp_map_rev_lookup (&devicep->mem_map,
   3741  1.1.1.11  mrg 						  devaddrs[j],
   3742  1.1.1.11  mrg 						  devaddrs[j] + sizeof (void*),
   3743  1.1.1.11  mrg 						  false);
   3744  1.1.1.11  mrg 			if (n2 == NULL)
   3745  1.1.1.11  mrg 			  {
   3746  1.1.1.11  mrg 			    gomp_mutex_unlock (&devicep->lock);
   3747  1.1.1.11  mrg 			    gomp_fatal ("Pointer target wasn't mapped");
   3748  1.1.1.11  mrg 			  }
   3749  1.1.1.11  mrg 			devaddrs[j] = (n2->host_start + cdata[j].devaddr
   3750  1.1.1.11  mrg 				       - (n2->tgt->tgt_start + n2->tgt_offset));
   3751  1.1.1.11  mrg 		      }
   3752  1.1.1.11  mrg 		    *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
   3753  1.1.1.11  mrg 		      = (void *) (uintptr_t) devaddrs[j];
   3754  1.1.1.11  mrg 		  }
   3755  1.1.1.11  mrg 		i = j -1;
   3756  1.1.1.11  mrg 		break;
   3757  1.1.1.11  mrg 	      case GOMP_MAP_STRUCT:
   3758  1.1.1.11  mrg 		n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
   3759  1.1.1.11  mrg 					  devaddrs[i + sizes[i]]
   3760  1.1.1.11  mrg 					  + sizes[i + sizes[i]], false);
   3761  1.1.1.11  mrg 		cdata[i].present = n2 != NULL;
   3762  1.1.1.11  mrg 		cdata[i].devaddr = devaddrs[i];
   3763  1.1.1.11  mrg 		struct_cpy = cdata[i].present ? 0 : sizes[i];
   3764  1.1.1.11  mrg 		if (!n2)
   3765  1.1.1.11  mrg 		  {
   3766  1.1.1.11  mrg 		    size_t sz = (size_t) (devaddrs[i + sizes[i]]
   3767  1.1.1.11  mrg 					  - devaddrs[i+1]
   3768  1.1.1.11  mrg 					  + sizes[i + sizes[i]]);
   3769  1.1.1.11  mrg 		    size_t align = (size_t) 1 << (kinds[i] >> 8);
   3770  1.1.1.11  mrg 		    cdata[i].aligned = true;
   3771  1.1.1.11  mrg 		    devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
   3772  1.1.1.11  mrg 		    devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
   3773  1.1.1.11  mrg 		  }
   3774  1.1.1.11  mrg 		else
   3775  1.1.1.11  mrg 		  devaddrs[i] = (n2->host_start + cdata[i].devaddr
   3776  1.1.1.11  mrg 				 - (n2->tgt->tgt_start + n2->tgt_offset));
   3777  1.1.1.11  mrg 		break;
   3778  1.1.1.11  mrg 	      default:
   3779  1.1.1.11  mrg 		gomp_mutex_unlock (&devicep->lock);
   3780  1.1.1.11  mrg 		gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
   3781  1.1.1.11  mrg 	    }
   3782  1.1.1.11  mrg 	}
   3783  1.1.1.11  mrg       gomp_mutex_unlock (&devicep->lock);
   3784  1.1.1.11  mrg     }
   3785  1.1.1.11  mrg 
   3786  1.1.1.11  mrg   host_fn (devaddrs);
   3787  1.1.1.11  mrg 
   3788  1.1.1.11  mrg   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
   3789  1.1.1.11  mrg     {
   3790  1.1.1.11  mrg       uint64_t struct_cpy = 0;
   3791  1.1.1.11  mrg       bool clean_struct = false;
   3792  1.1.1.11  mrg       for (uint64_t i = 0; i < mapnum; i++)
   3793  1.1.1.11  mrg 	{
   3794  1.1.1.11  mrg 	  if (cdata[i].devaddr == 0)
   3795  1.1.1.11  mrg 	    continue;
   3796  1.1.1.11  mrg 	  int kind = get_kind (short_mapkind, kinds, i) & typemask;
   3797  1.1.1.11  mrg 	  bool copy = !cdata[i].present || struct_cpy;
   3798  1.1.1.11  mrg 	  switch (kind)
   3799  1.1.1.11  mrg 	    {
   3800  1.1.1.11  mrg 	      case GOMP_MAP_FORCE_FROM:
   3801  1.1.1.11  mrg 	      case GOMP_MAP_FORCE_TOFROM:
   3802  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_FROM:
   3803  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_TOFROM:
   3804  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_PRESENT_FROM:
   3805  1.1.1.11  mrg 	      case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
   3806  1.1.1.11  mrg 		copy = true;
   3807  1.1.1.11  mrg 		/* FALLTHRU */
   3808  1.1.1.11  mrg 	      case GOMP_MAP_FROM:
   3809  1.1.1.11  mrg 	      case GOMP_MAP_TOFROM:
   3810  1.1.1.11  mrg 		if (copy)
   3811  1.1.1.11  mrg 		  {
   3812  1.1.1.11  mrg 		    gomp_copy_host2dev (devicep, aq,
   3813  1.1.1.11  mrg 					(void *) (uintptr_t) cdata[i].devaddr,
   3814  1.1.1.11  mrg 					(void *) (uintptr_t) devaddrs[i],
   3815  1.1.1.11  mrg 					sizes[i], false, NULL);
   3816  1.1.1.11  mrg 		    if (aq && !devicep->openacc.async.synchronize_func (aq))
   3817  1.1.1.11  mrg 		      exit (EXIT_FAILURE);
   3818  1.1.1.11  mrg 		  }
   3819  1.1.1.11  mrg 	      default:
   3820  1.1.1.11  mrg 		break;
   3821  1.1.1.11  mrg 	    }
   3822  1.1.1.11  mrg 	  if (struct_cpy)
   3823  1.1.1.11  mrg 	    {
   3824  1.1.1.11  mrg 	      struct_cpy--;
   3825  1.1.1.11  mrg 	      continue;
   3826  1.1.1.11  mrg 	    }
   3827  1.1.1.11  mrg 	  if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
   3828  1.1.1.11  mrg 	    {
   3829  1.1.1.11  mrg 	      clean_struct = true;
   3830  1.1.1.11  mrg 	      struct_cpy = sizes[i];
   3831  1.1.1.11  mrg 	    }
   3832  1.1.1.11  mrg 	  else if (!cdata[i].present && cdata[i].aligned)
   3833  1.1.1.11  mrg 	    gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
   3834  1.1.1.11  mrg 	  else if (!cdata[i].present)
   3835  1.1.1.11  mrg 	    free ((void *) (uintptr_t) devaddrs[i]);
   3836  1.1.1.11  mrg 	}
   3837  1.1.1.11  mrg       if (clean_struct)
   3838  1.1.1.11  mrg 	for (uint64_t i = 0; i < mapnum; i++)
   3839  1.1.1.11  mrg 	  if (!cdata[i].present
   3840  1.1.1.11  mrg 	      && ((get_kind (short_mapkind, kinds, i) & typemask)
   3841  1.1.1.11  mrg 		  == GOMP_MAP_STRUCT))
   3842  1.1.1.11  mrg 	    {
   3843  1.1.1.11  mrg 	      devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
   3844  1.1.1.11  mrg 	      gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
   3845  1.1.1.11  mrg 	    }
   3846  1.1.1.11  mrg 
   3847  1.1.1.11  mrg       free (devaddrs);
   3848  1.1.1.11  mrg       free (sizes);
   3849  1.1.1.11  mrg       free (kinds);
   3850  1.1.1.11  mrg     }
   3851  1.1.1.11  mrg }
   3852  1.1.1.11  mrg 
   3853  1.1.1.11  mrg /* Host fallback for GOMP_target_data{,_ext} routines.  */
   3854  1.1.1.11  mrg 
   3855  1.1.1.11  mrg static void
   3856  1.1.1.11  mrg gomp_target_data_fallback (struct gomp_device_descr *devicep)
   3857  1.1.1.11  mrg {
   3858  1.1.1.11  mrg   struct gomp_task_icv *icv = gomp_icv (false);
   3859  1.1.1.11  mrg 
   3860  1.1.1.11  mrg   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
   3861  1.1.1.11  mrg       && devicep != NULL)
   3862  1.1.1.11  mrg     gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
   3863  1.1.1.11  mrg 		"be used for offloading");
   3864  1.1.1.11  mrg 
   3865  1.1.1.11  mrg   if (icv->target_data)
   3866  1.1.1.11  mrg     {
   3867  1.1.1.11  mrg       /* Even when doing a host fallback, if there are any active
   3868  1.1.1.11  mrg          #pragma omp target data constructs, need to remember the
   3869  1.1.1.11  mrg          new #pragma omp target data, otherwise GOMP_target_end_data
   3870  1.1.1.11  mrg          would get out of sync.  */
   3871  1.1.1.11  mrg       struct target_mem_desc *tgt
   3872  1.1.1.11  mrg 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
   3873  1.1.1.11  mrg 			 NULL, GOMP_MAP_VARS_DATA);
   3874  1.1.1.11  mrg       tgt->prev = icv->target_data;
   3875  1.1.1.11  mrg       icv->target_data = tgt;
   3876  1.1.1.11  mrg     }
   3877  1.1.1.11  mrg }
   3878  1.1.1.11  mrg 
   3879  1.1.1.11  mrg void
   3880  1.1.1.11  mrg GOMP_target_data (int device, const void *unused, size_t mapnum,
   3881  1.1.1.11  mrg 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
   3882  1.1.1.11  mrg {
   3883  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device, true);
   3884  1.1.1.11  mrg 
   3885  1.1.1.11  mrg   if (devicep == NULL
   3886  1.1.1.11  mrg       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   3887  1.1.1.11  mrg       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
   3888  1.1.1.11  mrg     return gomp_target_data_fallback (devicep);
   3889  1.1.1.11  mrg 
   3890  1.1.1.11  mrg   struct target_mem_desc *tgt
   3891  1.1.1.11  mrg     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
   3892  1.1.1.11  mrg 		     NULL, GOMP_MAP_VARS_DATA);
   3893  1.1.1.11  mrg   struct gomp_task_icv *icv = gomp_icv (true);
   3894  1.1.1.11  mrg   tgt->prev = icv->target_data;
   3895  1.1.1.11  mrg   icv->target_data = tgt;
   3896  1.1.1.11  mrg }
   3897  1.1.1.11  mrg 
   3898  1.1.1.11  mrg void
   3899  1.1.1.11  mrg GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
   3900  1.1.1.11  mrg 		      size_t *sizes, unsigned short *kinds)
   3901  1.1.1.11  mrg {
   3902  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device, true);
   3903  1.1.1.11  mrg 
   3904  1.1.1.11  mrg   if (devicep == NULL
   3905  1.1.1.11  mrg       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   3906  1.1.1.11  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   3907  1.1.1.11  mrg     return gomp_target_data_fallback (devicep);
   3908  1.1.1.11  mrg 
   3909  1.1.1.11  mrg   struct target_mem_desc *tgt
   3910  1.1.1.11  mrg     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
   3911  1.1.1.11  mrg 		     NULL, GOMP_MAP_VARS_DATA);
   3912  1.1.1.11  mrg   struct gomp_task_icv *icv = gomp_icv (true);
   3913  1.1.1.11  mrg   tgt->prev = icv->target_data;
   3914       1.1  mrg   icv->target_data = tgt;
   3915       1.1  mrg }
   3916       1.1  mrg 
   3917       1.1  mrg void
   3918       1.1  mrg GOMP_target_end_data (void)
   3919       1.1  mrg {
   3920       1.1  mrg   struct gomp_task_icv *icv = gomp_icv (false);
   3921       1.1  mrg   if (icv->target_data)
   3922       1.1  mrg     {
   3923       1.1  mrg       struct target_mem_desc *tgt = icv->target_data;
   3924       1.1  mrg       icv->target_data = tgt->prev;
   3925  1.1.1.10  mrg       gomp_unmap_vars (tgt, true, NULL);
   3926       1.1  mrg     }
   3927       1.1  mrg }
   3928       1.1  mrg 
   3929       1.1  mrg void
   3930       1.1  mrg GOMP_target_update (int device, const void *unused, size_t mapnum,
   3931       1.1  mrg 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
   3932       1.1  mrg {
   3933  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device, true);
   3934       1.1  mrg 
   3935       1.1  mrg   if (devicep == NULL
   3936   1.1.1.2  mrg       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   3937   1.1.1.2  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   3938   1.1.1.2  mrg     return;
   3939   1.1.1.2  mrg 
   3940   1.1.1.2  mrg   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
   3941   1.1.1.2  mrg }
   3942   1.1.1.2  mrg 
   3943   1.1.1.2  mrg void
   3944   1.1.1.2  mrg GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
   3945   1.1.1.2  mrg 			size_t *sizes, unsigned short *kinds,
   3946   1.1.1.2  mrg 			unsigned int flags, void **depend)
   3947   1.1.1.2  mrg {
   3948  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device, true);
   3949   1.1.1.2  mrg 
   3950   1.1.1.2  mrg   /* If there are depend clauses, but nowait is not present,
   3951   1.1.1.2  mrg      block the parent task until the dependencies are resolved
   3952   1.1.1.2  mrg      and then just continue with the rest of the function as if it
   3953   1.1.1.2  mrg      is a merged task.  Until we are able to schedule task during
   3954   1.1.1.2  mrg      variable mapping or unmapping, ignore nowait if depend clauses
   3955   1.1.1.2  mrg      are not present.  */
   3956   1.1.1.2  mrg   if (depend != NULL)
   3957   1.1.1.2  mrg     {
   3958   1.1.1.2  mrg       struct gomp_thread *thr = gomp_thread ();
   3959   1.1.1.2  mrg       if (thr->task && thr->task->depend_hash)
   3960   1.1.1.2  mrg 	{
   3961   1.1.1.2  mrg 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
   3962   1.1.1.2  mrg 	      && thr->ts.team
   3963   1.1.1.2  mrg 	      && !thr->task->final_task)
   3964   1.1.1.2  mrg 	    {
   3965   1.1.1.2  mrg 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
   3966   1.1.1.2  mrg 					   mapnum, hostaddrs, sizes, kinds,
   3967   1.1.1.2  mrg 					   flags | GOMP_TARGET_FLAG_UPDATE,
   3968   1.1.1.2  mrg 					   depend, NULL, GOMP_TARGET_TASK_DATA))
   3969   1.1.1.2  mrg 		return;
   3970   1.1.1.2  mrg 	    }
   3971   1.1.1.2  mrg 	  else
   3972   1.1.1.2  mrg 	    {
   3973   1.1.1.2  mrg 	      struct gomp_team *team = thr->ts.team;
   3974   1.1.1.2  mrg 	      /* If parallel or taskgroup has been cancelled, don't start new
   3975   1.1.1.2  mrg 		 tasks.  */
   3976   1.1.1.7  mrg 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
   3977   1.1.1.7  mrg 		{
   3978   1.1.1.7  mrg 		  if (gomp_team_barrier_cancelled (&team->barrier))
   3979   1.1.1.7  mrg 		    return;
   3980   1.1.1.7  mrg 		  if (thr->task->taskgroup)
   3981   1.1.1.7  mrg 		    {
   3982   1.1.1.7  mrg 		      if (thr->task->taskgroup->cancelled)
   3983   1.1.1.7  mrg 			return;
   3984   1.1.1.7  mrg 		      if (thr->task->taskgroup->workshare
   3985   1.1.1.7  mrg 			  && thr->task->taskgroup->prev
   3986   1.1.1.7  mrg 			  && thr->task->taskgroup->prev->cancelled)
   3987   1.1.1.7  mrg 			return;
   3988   1.1.1.7  mrg 		    }
   3989   1.1.1.7  mrg 		}
   3990   1.1.1.2  mrg 
   3991   1.1.1.2  mrg 	      gomp_task_maybe_wait_for_dependencies (depend);
   3992   1.1.1.2  mrg 	    }
   3993   1.1.1.2  mrg 	}
   3994   1.1.1.2  mrg     }
   3995   1.1.1.2  mrg 
   3996   1.1.1.2  mrg   if (devicep == NULL
   3997   1.1.1.2  mrg       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   3998   1.1.1.2  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   3999   1.1.1.2  mrg     return;
   4000   1.1.1.2  mrg 
   4001   1.1.1.2  mrg   struct gomp_thread *thr = gomp_thread ();
   4002   1.1.1.2  mrg   struct gomp_team *team = thr->ts.team;
   4003   1.1.1.2  mrg   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
   4004   1.1.1.7  mrg   if (__builtin_expect (gomp_cancel_var, 0) && team)
   4005   1.1.1.7  mrg     {
   4006   1.1.1.7  mrg       if (gomp_team_barrier_cancelled (&team->barrier))
   4007   1.1.1.7  mrg 	return;
   4008   1.1.1.7  mrg       if (thr->task->taskgroup)
   4009   1.1.1.7  mrg 	{
   4010   1.1.1.7  mrg 	  if (thr->task->taskgroup->cancelled)
   4011   1.1.1.7  mrg 	    return;
   4012   1.1.1.7  mrg 	  if (thr->task->taskgroup->workshare
   4013   1.1.1.7  mrg 	      && thr->task->taskgroup->prev
   4014   1.1.1.7  mrg 	      && thr->task->taskgroup->prev->cancelled)
   4015   1.1.1.7  mrg 	    return;
   4016   1.1.1.7  mrg 	}
   4017   1.1.1.7  mrg     }
   4018       1.1  mrg 
   4019   1.1.1.2  mrg   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
   4020   1.1.1.2  mrg }
   4021   1.1.1.2  mrg 
   4022   1.1.1.2  mrg static void
   4023   1.1.1.2  mrg gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
   4024  1.1.1.10  mrg 		void **hostaddrs, size_t *sizes, unsigned short *kinds,
   4025  1.1.1.10  mrg 		htab_t *refcount_set)
   4026   1.1.1.2  mrg {
   4027   1.1.1.2  mrg   const int typemask = 0xff;
   4028   1.1.1.2  mrg   size_t i;
   4029       1.1  mrg   gomp_mutex_lock (&devicep->lock);
   4030   1.1.1.2  mrg   if (devicep->state == GOMP_DEVICE_FINALIZED)
   4031   1.1.1.2  mrg     {
   4032   1.1.1.2  mrg       gomp_mutex_unlock (&devicep->lock);
   4033   1.1.1.2  mrg       return;
   4034   1.1.1.2  mrg     }
   4035   1.1.1.2  mrg 
   4036   1.1.1.2  mrg   for (i = 0; i < mapnum; i++)
   4037  1.1.1.10  mrg     if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
   4038  1.1.1.10  mrg       {
   4039  1.1.1.10  mrg 	struct splay_tree_key_s cur_node;
   4040  1.1.1.10  mrg 	cur_node.host_start = (uintptr_t) hostaddrs[i];
   4041  1.1.1.10  mrg 	cur_node.host_end = cur_node.host_start + sizeof (void *);
   4042  1.1.1.10  mrg 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
   4043  1.1.1.10  mrg 
   4044  1.1.1.10  mrg 	if (n)
   4045  1.1.1.10  mrg 	  gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
   4046  1.1.1.10  mrg 			       false, NULL);
   4047  1.1.1.10  mrg       }
   4048  1.1.1.10  mrg 
   4049  1.1.1.10  mrg   int nrmvars = 0;
   4050  1.1.1.10  mrg   splay_tree_key remove_vars[mapnum];
   4051  1.1.1.10  mrg 
   4052  1.1.1.10  mrg   for (i = 0; i < mapnum; i++)
   4053   1.1.1.2  mrg     {
   4054   1.1.1.2  mrg       struct splay_tree_key_s cur_node;
   4055   1.1.1.2  mrg       unsigned char kind = kinds[i] & typemask;
   4056   1.1.1.2  mrg       switch (kind)
   4057   1.1.1.2  mrg 	{
   4058   1.1.1.2  mrg 	case GOMP_MAP_FROM:
   4059   1.1.1.2  mrg 	case GOMP_MAP_ALWAYS_FROM:
   4060   1.1.1.2  mrg 	case GOMP_MAP_DELETE:
   4061   1.1.1.2  mrg 	case GOMP_MAP_RELEASE:
   4062   1.1.1.2  mrg 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
   4063   1.1.1.2  mrg 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
   4064   1.1.1.2  mrg 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
   4065   1.1.1.2  mrg 	  cur_node.host_end = cur_node.host_start + sizes[i];
   4066   1.1.1.2  mrg 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
   4067   1.1.1.2  mrg 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
   4068   1.1.1.2  mrg 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
   4069   1.1.1.2  mrg 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
   4070   1.1.1.2  mrg 	  if (!k)
   4071   1.1.1.2  mrg 	    continue;
   4072   1.1.1.2  mrg 
   4073  1.1.1.10  mrg 	  bool delete_p = (kind == GOMP_MAP_DELETE
   4074  1.1.1.10  mrg 			   || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
   4075  1.1.1.10  mrg 	  bool do_copy, do_remove;
   4076  1.1.1.10  mrg 	  gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
   4077  1.1.1.10  mrg 				   &do_remove);
   4078   1.1.1.2  mrg 
   4079  1.1.1.10  mrg 	  if ((kind == GOMP_MAP_FROM && do_copy)
   4080   1.1.1.2  mrg 	      || kind == GOMP_MAP_ALWAYS_FROM)
   4081  1.1.1.10  mrg 	    {
   4082  1.1.1.10  mrg 	      if (k->aux && k->aux->attach_count)
   4083  1.1.1.10  mrg 		{
   4084  1.1.1.10  mrg 		  /* We have to be careful not to overwrite still attached
   4085  1.1.1.10  mrg 		     pointers during the copyback to host.  */
   4086  1.1.1.10  mrg 		  uintptr_t addr = k->host_start;
   4087  1.1.1.10  mrg 		  while (addr < k->host_end)
   4088  1.1.1.10  mrg 		    {
   4089  1.1.1.10  mrg 		      size_t i = (addr - k->host_start) / sizeof (void *);
   4090  1.1.1.10  mrg 		      if (k->aux->attach_count[i] == 0)
   4091  1.1.1.10  mrg 			gomp_copy_dev2host (devicep, NULL, (void *) addr,
   4092  1.1.1.10  mrg 					    (void *) (k->tgt->tgt_start
   4093  1.1.1.10  mrg 						      + k->tgt_offset
   4094  1.1.1.10  mrg 						      + addr - k->host_start),
   4095  1.1.1.10  mrg 					    sizeof (void *));
   4096  1.1.1.10  mrg 		      addr += sizeof (void *);
   4097  1.1.1.10  mrg 		    }
   4098  1.1.1.10  mrg 		}
   4099  1.1.1.10  mrg 	      else
   4100  1.1.1.10  mrg 		gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
   4101  1.1.1.10  mrg 				    (void *) (k->tgt->tgt_start + k->tgt_offset
   4102  1.1.1.10  mrg 					      + cur_node.host_start
   4103  1.1.1.10  mrg 					      - k->host_start),
   4104  1.1.1.10  mrg 				    cur_node.host_end - cur_node.host_start);
   4105  1.1.1.10  mrg 	    }
   4106  1.1.1.10  mrg 
   4107  1.1.1.10  mrg 	  /* Structure elements lists are removed altogether at once, which
   4108  1.1.1.10  mrg 	     may cause immediate deallocation of the target_mem_desc, causing
   4109  1.1.1.10  mrg 	     errors if we still have following element siblings to copy back.
   4110  1.1.1.10  mrg 	     While we're at it, it also seems more disciplined to simply
   4111  1.1.1.10  mrg 	     queue all removals together for processing below.
   4112  1.1.1.10  mrg 
   4113  1.1.1.10  mrg 	     Structured block unmapping (i.e. gomp_unmap_vars_internal) should
   4114  1.1.1.10  mrg 	     not have this problem, since they maintain an additional
   4115  1.1.1.10  mrg 	     tgt->refcount = 1 reference to the target_mem_desc to start with.
   4116  1.1.1.10  mrg 	  */
   4117  1.1.1.10  mrg 	  if (do_remove)
   4118  1.1.1.10  mrg 	    remove_vars[nrmvars++] = k;
   4119  1.1.1.10  mrg 	  break;
   4120   1.1.1.2  mrg 
   4121  1.1.1.10  mrg 	case GOMP_MAP_DETACH:
   4122   1.1.1.2  mrg 	  break;
   4123   1.1.1.2  mrg 	default:
   4124   1.1.1.2  mrg 	  gomp_mutex_unlock (&devicep->lock);
   4125   1.1.1.2  mrg 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
   4126   1.1.1.2  mrg 		      kind);
   4127   1.1.1.2  mrg 	}
   4128   1.1.1.2  mrg     }
   4129   1.1.1.2  mrg 
   4130  1.1.1.10  mrg   for (int i = 0; i < nrmvars; i++)
   4131  1.1.1.10  mrg     gomp_remove_var (devicep, remove_vars[i]);
   4132  1.1.1.10  mrg 
   4133       1.1  mrg   gomp_mutex_unlock (&devicep->lock);
   4134   1.1.1.2  mrg }
   4135       1.1  mrg 
   4136   1.1.1.2  mrg void
   4137   1.1.1.2  mrg GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
   4138   1.1.1.2  mrg 			     size_t *sizes, unsigned short *kinds,
   4139   1.1.1.2  mrg 			     unsigned int flags, void **depend)
   4140   1.1.1.2  mrg {
   4141  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device, true);
   4142   1.1.1.2  mrg 
   4143   1.1.1.2  mrg   /* If there are depend clauses, but nowait is not present,
   4144   1.1.1.2  mrg      block the parent task until the dependencies are resolved
   4145   1.1.1.2  mrg      and then just continue with the rest of the function as if it
   4146   1.1.1.2  mrg      is a merged task.  Until we are able to schedule task during
   4147   1.1.1.2  mrg      variable mapping or unmapping, ignore nowait if depend clauses
   4148   1.1.1.2  mrg      are not present.  */
   4149   1.1.1.2  mrg   if (depend != NULL)
   4150   1.1.1.2  mrg     {
   4151   1.1.1.2  mrg       struct gomp_thread *thr = gomp_thread ();
   4152   1.1.1.2  mrg       if (thr->task && thr->task->depend_hash)
   4153   1.1.1.2  mrg 	{
   4154   1.1.1.2  mrg 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
   4155   1.1.1.2  mrg 	      && thr->ts.team
   4156   1.1.1.2  mrg 	      && !thr->task->final_task)
   4157   1.1.1.2  mrg 	    {
   4158   1.1.1.2  mrg 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
   4159   1.1.1.2  mrg 					   mapnum, hostaddrs, sizes, kinds,
   4160   1.1.1.2  mrg 					   flags, depend, NULL,
   4161   1.1.1.2  mrg 					   GOMP_TARGET_TASK_DATA))
   4162   1.1.1.2  mrg 		return;
   4163   1.1.1.2  mrg 	    }
   4164   1.1.1.2  mrg 	  else
   4165   1.1.1.2  mrg 	    {
   4166   1.1.1.2  mrg 	      struct gomp_team *team = thr->ts.team;
   4167   1.1.1.2  mrg 	      /* If parallel or taskgroup has been cancelled, don't start new
   4168   1.1.1.2  mrg 		 tasks.  */
   4169   1.1.1.7  mrg 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
   4170   1.1.1.7  mrg 		{
   4171   1.1.1.7  mrg 		  if (gomp_team_barrier_cancelled (&team->barrier))
   4172   1.1.1.7  mrg 		    return;
   4173   1.1.1.7  mrg 		  if (thr->task->taskgroup)
   4174   1.1.1.7  mrg 		    {
   4175   1.1.1.7  mrg 		      if (thr->task->taskgroup->cancelled)
   4176   1.1.1.7  mrg 			return;
   4177   1.1.1.7  mrg 		      if (thr->task->taskgroup->workshare
   4178   1.1.1.7  mrg 			  && thr->task->taskgroup->prev
   4179   1.1.1.7  mrg 			  && thr->task->taskgroup->prev->cancelled)
   4180   1.1.1.7  mrg 			return;
   4181   1.1.1.7  mrg 		    }
   4182   1.1.1.7  mrg 		}
   4183   1.1.1.2  mrg 
   4184   1.1.1.2  mrg 	      gomp_task_maybe_wait_for_dependencies (depend);
   4185   1.1.1.2  mrg 	    }
   4186   1.1.1.2  mrg 	}
   4187   1.1.1.2  mrg     }
   4188   1.1.1.2  mrg 
   4189   1.1.1.2  mrg   if (devicep == NULL
   4190   1.1.1.2  mrg       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   4191   1.1.1.2  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   4192   1.1.1.2  mrg     return;
   4193   1.1.1.2  mrg 
   4194   1.1.1.2  mrg   struct gomp_thread *thr = gomp_thread ();
   4195   1.1.1.2  mrg   struct gomp_team *team = thr->ts.team;
   4196   1.1.1.2  mrg   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
   4197   1.1.1.7  mrg   if (__builtin_expect (gomp_cancel_var, 0) && team)
   4198   1.1.1.7  mrg     {
   4199   1.1.1.7  mrg       if (gomp_team_barrier_cancelled (&team->barrier))
   4200   1.1.1.7  mrg 	return;
   4201   1.1.1.7  mrg       if (thr->task->taskgroup)
   4202   1.1.1.7  mrg 	{
   4203   1.1.1.7  mrg 	  if (thr->task->taskgroup->cancelled)
   4204   1.1.1.7  mrg 	    return;
   4205   1.1.1.7  mrg 	  if (thr->task->taskgroup->workshare
   4206   1.1.1.7  mrg 	      && thr->task->taskgroup->prev
   4207   1.1.1.7  mrg 	      && thr->task->taskgroup->prev->cancelled)
   4208   1.1.1.7  mrg 	    return;
   4209   1.1.1.7  mrg 	}
   4210   1.1.1.7  mrg     }
   4211   1.1.1.2  mrg 
   4212  1.1.1.10  mrg   htab_t refcount_set = htab_create (mapnum);
   4213  1.1.1.10  mrg 
   4214   1.1.1.8  mrg   /* The variables are mapped separately such that they can be released
   4215   1.1.1.8  mrg      independently.  */
   4216   1.1.1.8  mrg   size_t i, j;
   4217   1.1.1.2  mrg   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
   4218   1.1.1.2  mrg     for (i = 0; i < mapnum; i++)
   4219  1.1.1.11  mrg       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT
   4220  1.1.1.11  mrg 	  || (kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
   4221   1.1.1.2  mrg 	{
   4222   1.1.1.2  mrg 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
   4223  1.1.1.10  mrg 			 &kinds[i], true, &refcount_set,
   4224  1.1.1.10  mrg 			 GOMP_MAP_VARS_ENTER_DATA);
   4225   1.1.1.2  mrg 	  i += sizes[i];
   4226   1.1.1.2  mrg 	}
   4227   1.1.1.8  mrg       else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
   4228   1.1.1.8  mrg 	{
   4229   1.1.1.8  mrg 	  for (j = i + 1; j < mapnum; j++)
   4230  1.1.1.10  mrg 	    if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
   4231  1.1.1.10  mrg 		&& !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
   4232   1.1.1.8  mrg 	      break;
   4233   1.1.1.8  mrg 	  gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
   4234  1.1.1.10  mrg 			 &kinds[i], true, &refcount_set,
   4235  1.1.1.10  mrg 			 GOMP_MAP_VARS_ENTER_DATA);
   4236   1.1.1.8  mrg 	  i += j - i - 1;
   4237   1.1.1.8  mrg 	}
   4238  1.1.1.11  mrg       else if (i + 1 < mapnum
   4239  1.1.1.11  mrg 	       && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
   4240  1.1.1.11  mrg 		   || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
   4241  1.1.1.11  mrg 		       && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
   4242  1.1.1.10  mrg 	{
   4243  1.1.1.10  mrg 	  /* An attach operation must be processed together with the mapped
   4244  1.1.1.10  mrg 	     base-pointer list item.  */
   4245  1.1.1.10  mrg 	  gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
   4246  1.1.1.10  mrg 			 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
   4247  1.1.1.10  mrg 	  i += 1;
   4248  1.1.1.10  mrg 	}
   4249   1.1.1.2  mrg       else
   4250   1.1.1.2  mrg 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
   4251  1.1.1.10  mrg 		       true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
   4252   1.1.1.2  mrg   else
   4253  1.1.1.10  mrg     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
   4254  1.1.1.10  mrg   htab_free (refcount_set);
   4255   1.1.1.2  mrg }
   4256   1.1.1.2  mrg 
   4257   1.1.1.2  mrg bool
   4258   1.1.1.2  mrg gomp_target_task_fn (void *data)
   4259   1.1.1.2  mrg {
   4260   1.1.1.2  mrg   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
   4261   1.1.1.2  mrg   struct gomp_device_descr *devicep = ttask->devicep;
   4262   1.1.1.2  mrg 
   4263   1.1.1.2  mrg   if (ttask->fn != NULL)
   4264   1.1.1.2  mrg     {
   4265   1.1.1.2  mrg       void *fn_addr;
   4266   1.1.1.2  mrg       if (devicep == NULL
   4267   1.1.1.2  mrg 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   4268   1.1.1.2  mrg 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
   4269   1.1.1.2  mrg 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
   4270   1.1.1.2  mrg 	{
   4271   1.1.1.2  mrg 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
   4272  1.1.1.10  mrg 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
   4273  1.1.1.10  mrg 				ttask->args);
   4274   1.1.1.2  mrg 	  return false;
   4275   1.1.1.2  mrg 	}
   4276   1.1.1.2  mrg 
   4277   1.1.1.2  mrg       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
   4278   1.1.1.2  mrg 	{
   4279   1.1.1.2  mrg 	  if (ttask->tgt)
   4280  1.1.1.10  mrg 	    gomp_unmap_vars (ttask->tgt, true, NULL);
   4281   1.1.1.2  mrg 	  return false;
   4282   1.1.1.2  mrg 	}
   4283   1.1.1.2  mrg 
   4284   1.1.1.2  mrg       void *actual_arguments;
   4285   1.1.1.2  mrg       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   4286   1.1.1.2  mrg 	{
   4287   1.1.1.2  mrg 	  ttask->tgt = NULL;
   4288   1.1.1.2  mrg 	  actual_arguments = ttask->hostaddrs;
   4289   1.1.1.2  mrg 	}
   4290   1.1.1.2  mrg       else
   4291   1.1.1.2  mrg 	{
   4292   1.1.1.2  mrg 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
   4293   1.1.1.2  mrg 				      NULL, ttask->sizes, ttask->kinds, true,
   4294  1.1.1.10  mrg 				      NULL, GOMP_MAP_VARS_TARGET);
   4295   1.1.1.2  mrg 	  actual_arguments = (void *) ttask->tgt->tgt_start;
   4296   1.1.1.2  mrg 	}
   4297   1.1.1.2  mrg       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
   4298   1.1.1.2  mrg 
   4299   1.1.1.8  mrg       assert (devicep->async_run_func);
   4300   1.1.1.2  mrg       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
   4301   1.1.1.2  mrg 			       ttask->args, (void *) ttask);
   4302   1.1.1.2  mrg       return true;
   4303   1.1.1.2  mrg     }
   4304   1.1.1.2  mrg   else if (devicep == NULL
   4305   1.1.1.2  mrg 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   4306   1.1.1.2  mrg 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   4307   1.1.1.2  mrg     return false;
   4308   1.1.1.2  mrg 
   4309   1.1.1.2  mrg   size_t i;
   4310   1.1.1.2  mrg   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
   4311   1.1.1.2  mrg     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
   4312   1.1.1.2  mrg 		 ttask->kinds, true);
   4313   1.1.1.2  mrg   else
   4314  1.1.1.10  mrg     {
   4315  1.1.1.10  mrg       htab_t refcount_set = htab_create (ttask->mapnum);
   4316  1.1.1.10  mrg       if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
   4317  1.1.1.10  mrg 	for (i = 0; i < ttask->mapnum; i++)
   4318  1.1.1.11  mrg 	  if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT
   4319  1.1.1.11  mrg 	      || (ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
   4320  1.1.1.10  mrg 	    {
   4321  1.1.1.10  mrg 	      gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
   4322  1.1.1.10  mrg 			     NULL, &ttask->sizes[i], &ttask->kinds[i], true,
   4323  1.1.1.10  mrg 			     &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
   4324  1.1.1.10  mrg 	      i += ttask->sizes[i];
   4325  1.1.1.10  mrg 	    }
   4326  1.1.1.10  mrg 	  else
   4327  1.1.1.10  mrg 	    gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
   4328  1.1.1.10  mrg 			   &ttask->kinds[i], true, &refcount_set,
   4329  1.1.1.10  mrg 			   GOMP_MAP_VARS_ENTER_DATA);
   4330  1.1.1.10  mrg       else
   4331  1.1.1.10  mrg 	gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
   4332  1.1.1.10  mrg 			ttask->kinds, &refcount_set);
   4333  1.1.1.10  mrg       htab_free (refcount_set);
   4334  1.1.1.10  mrg     }
   4335   1.1.1.2  mrg   return false;
   4336       1.1  mrg }
   4337       1.1  mrg 
   4338       1.1  mrg void
   4339       1.1  mrg GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
   4340       1.1  mrg {
   4341       1.1  mrg   if (thread_limit)
   4342       1.1  mrg     {
   4343       1.1  mrg       struct gomp_task_icv *icv = gomp_icv (true);
   4344       1.1  mrg       icv->thread_limit_var
   4345       1.1  mrg 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
   4346       1.1  mrg     }
   4347       1.1  mrg   (void) num_teams;
   4348       1.1  mrg }
   4349       1.1  mrg 
   4350  1.1.1.10  mrg bool
   4351  1.1.1.10  mrg GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
   4352  1.1.1.10  mrg 	     unsigned int thread_limit, bool first)
   4353  1.1.1.10  mrg {
   4354  1.1.1.10  mrg   struct gomp_thread *thr = gomp_thread ();
   4355  1.1.1.10  mrg   if (first)
   4356  1.1.1.10  mrg     {
   4357  1.1.1.10  mrg       if (thread_limit)
   4358  1.1.1.10  mrg 	{
   4359  1.1.1.10  mrg 	  struct gomp_task_icv *icv = gomp_icv (true);
   4360  1.1.1.10  mrg 	  icv->thread_limit_var
   4361  1.1.1.10  mrg 	    = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
   4362  1.1.1.10  mrg 	}
   4363  1.1.1.10  mrg       (void) num_teams_high;
   4364  1.1.1.10  mrg       if (num_teams_low == 0)
   4365  1.1.1.10  mrg 	num_teams_low = 1;
   4366  1.1.1.10  mrg       thr->num_teams = num_teams_low - 1;
   4367  1.1.1.10  mrg       thr->team_num = 0;
   4368  1.1.1.10  mrg     }
   4369  1.1.1.10  mrg   else if (thr->team_num == thr->num_teams)
   4370  1.1.1.10  mrg     return false;
   4371  1.1.1.10  mrg   else
   4372  1.1.1.10  mrg     ++thr->team_num;
   4373  1.1.1.10  mrg   return true;
   4374  1.1.1.10  mrg }
   4375  1.1.1.10  mrg 
   4376   1.1.1.2  mrg void *
   4377   1.1.1.2  mrg omp_target_alloc (size_t size, int device_num)
   4378   1.1.1.2  mrg {
   4379  1.1.1.11  mrg   if (device_num == omp_initial_device
   4380  1.1.1.11  mrg       || device_num == gomp_get_num_devices ())
   4381   1.1.1.2  mrg     return malloc (size);
   4382   1.1.1.2  mrg 
   4383  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device_num, false);
   4384   1.1.1.2  mrg   if (devicep == NULL)
   4385   1.1.1.2  mrg     return NULL;
   4386   1.1.1.2  mrg 
   4387   1.1.1.2  mrg   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   4388   1.1.1.2  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   4389   1.1.1.2  mrg     return malloc (size);
   4390   1.1.1.2  mrg 
   4391   1.1.1.2  mrg   gomp_mutex_lock (&devicep->lock);
   4392   1.1.1.2  mrg   void *ret = devicep->alloc_func (devicep->target_id, size);
   4393   1.1.1.2  mrg   gomp_mutex_unlock (&devicep->lock);
   4394   1.1.1.2  mrg   return ret;
   4395   1.1.1.2  mrg }
   4396   1.1.1.2  mrg 
   4397   1.1.1.2  mrg void
   4398   1.1.1.2  mrg omp_target_free (void *device_ptr, int device_num)
   4399   1.1.1.2  mrg {
   4400  1.1.1.11  mrg   if (device_num == omp_initial_device
   4401  1.1.1.11  mrg       || device_num == gomp_get_num_devices ())
   4402   1.1.1.2  mrg     {
   4403   1.1.1.2  mrg       free (device_ptr);
   4404   1.1.1.2  mrg       return;
   4405   1.1.1.2  mrg     }
   4406   1.1.1.2  mrg 
   4407  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device_num, false);
   4408  1.1.1.11  mrg   if (devicep == NULL || device_ptr == NULL)
   4409   1.1.1.2  mrg     return;
   4410   1.1.1.2  mrg 
   4411   1.1.1.2  mrg   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   4412   1.1.1.2  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   4413   1.1.1.2  mrg     {
   4414   1.1.1.2  mrg       free (device_ptr);
   4415   1.1.1.2  mrg       return;
   4416   1.1.1.2  mrg     }
   4417   1.1.1.2  mrg 
   4418   1.1.1.2  mrg   gomp_mutex_lock (&devicep->lock);
   4419   1.1.1.3  mrg   gomp_free_device_memory (devicep, device_ptr);
   4420   1.1.1.2  mrg   gomp_mutex_unlock (&devicep->lock);
   4421   1.1.1.2  mrg }
   4422   1.1.1.2  mrg 
   4423   1.1.1.2  mrg int
   4424   1.1.1.7  mrg omp_target_is_present (const void *ptr, int device_num)
   4425   1.1.1.2  mrg {
   4426  1.1.1.11  mrg   if (device_num == omp_initial_device
   4427  1.1.1.11  mrg       || device_num == gomp_get_num_devices ())
   4428   1.1.1.2  mrg     return 1;
   4429   1.1.1.2  mrg 
   4430  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device_num, false);
   4431   1.1.1.2  mrg   if (devicep == NULL)
   4432   1.1.1.2  mrg     return 0;
   4433   1.1.1.2  mrg 
   4434  1.1.1.11  mrg   if (ptr == NULL)
   4435  1.1.1.11  mrg     return 1;
   4436  1.1.1.11  mrg 
   4437   1.1.1.2  mrg   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   4438   1.1.1.2  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   4439   1.1.1.2  mrg     return 1;
   4440   1.1.1.2  mrg 
   4441   1.1.1.2  mrg   gomp_mutex_lock (&devicep->lock);
   4442   1.1.1.2  mrg   struct splay_tree_s *mem_map = &devicep->mem_map;
   4443   1.1.1.2  mrg   struct splay_tree_key_s cur_node;
   4444   1.1.1.2  mrg 
   4445   1.1.1.2  mrg   cur_node.host_start = (uintptr_t) ptr;
   4446   1.1.1.2  mrg   cur_node.host_end = cur_node.host_start;
   4447   1.1.1.2  mrg   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
   4448   1.1.1.2  mrg   int ret = n != NULL;
   4449   1.1.1.2  mrg   gomp_mutex_unlock (&devicep->lock);
   4450   1.1.1.2  mrg   return ret;
   4451   1.1.1.2  mrg }
   4452   1.1.1.2  mrg 
   4453  1.1.1.11  mrg static int
   4454  1.1.1.11  mrg omp_target_memcpy_check (int dst_device_num, int src_device_num,
   4455  1.1.1.11  mrg 			 struct gomp_device_descr **dst_devicep,
   4456  1.1.1.11  mrg 			 struct gomp_device_descr **src_devicep)
   4457  1.1.1.11  mrg {
   4458  1.1.1.11  mrg   if (dst_device_num != gomp_get_num_devices ()
   4459  1.1.1.11  mrg       /* Above gomp_get_num_devices has to be called unconditionally.  */
   4460  1.1.1.11  mrg       && dst_device_num != omp_initial_device)
   4461   1.1.1.2  mrg     {
   4462  1.1.1.11  mrg       *dst_devicep = resolve_device (dst_device_num, false);
   4463  1.1.1.11  mrg       if (*dst_devicep == NULL)
   4464   1.1.1.2  mrg 	return EINVAL;
   4465   1.1.1.2  mrg 
   4466  1.1.1.11  mrg       if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   4467  1.1.1.11  mrg 	  || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   4468  1.1.1.11  mrg 	*dst_devicep = NULL;
   4469   1.1.1.2  mrg     }
   4470   1.1.1.2  mrg 
   4471  1.1.1.11  mrg   if (src_device_num != num_devices_openmp
   4472  1.1.1.11  mrg       && src_device_num != omp_initial_device)
   4473  1.1.1.11  mrg     {
   4474  1.1.1.11  mrg       *src_devicep = resolve_device (src_device_num, false);
   4475  1.1.1.11  mrg       if (*src_devicep == NULL)
   4476   1.1.1.2  mrg 	return EINVAL;
   4477   1.1.1.2  mrg 
   4478  1.1.1.11  mrg       if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   4479  1.1.1.11  mrg 	  || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   4480  1.1.1.11  mrg 	*src_devicep = NULL;
   4481   1.1.1.2  mrg     }
   4482  1.1.1.11  mrg 
   4483  1.1.1.11  mrg   return 0;
   4484  1.1.1.11  mrg }
   4485  1.1.1.11  mrg 
   4486  1.1.1.11  mrg static int
   4487  1.1.1.11  mrg omp_target_memcpy_copy (void *dst, const void *src, size_t length,
   4488  1.1.1.11  mrg 			size_t dst_offset, size_t src_offset,
   4489  1.1.1.11  mrg 			struct gomp_device_descr *dst_devicep,
   4490  1.1.1.11  mrg 			struct gomp_device_descr *src_devicep)
   4491  1.1.1.11  mrg {
   4492  1.1.1.11  mrg   bool ret;
   4493   1.1.1.2  mrg   if (src_devicep == NULL && dst_devicep == NULL)
   4494   1.1.1.2  mrg     {
   4495   1.1.1.2  mrg       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
   4496   1.1.1.2  mrg       return 0;
   4497   1.1.1.2  mrg     }
   4498   1.1.1.2  mrg   if (src_devicep == NULL)
   4499   1.1.1.2  mrg     {
   4500   1.1.1.2  mrg       gomp_mutex_lock (&dst_devicep->lock);
   4501   1.1.1.3  mrg       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
   4502   1.1.1.3  mrg 					(char *) dst + dst_offset,
   4503   1.1.1.3  mrg 					(char *) src + src_offset, length);
   4504   1.1.1.2  mrg       gomp_mutex_unlock (&dst_devicep->lock);
   4505   1.1.1.3  mrg       return (ret ? 0 : EINVAL);
   4506   1.1.1.2  mrg     }
   4507   1.1.1.2  mrg   if (dst_devicep == NULL)
   4508   1.1.1.2  mrg     {
   4509   1.1.1.2  mrg       gomp_mutex_lock (&src_devicep->lock);
   4510   1.1.1.3  mrg       ret = src_devicep->dev2host_func (src_devicep->target_id,
   4511   1.1.1.3  mrg 					(char *) dst + dst_offset,
   4512   1.1.1.3  mrg 					(char *) src + src_offset, length);
   4513   1.1.1.2  mrg       gomp_mutex_unlock (&src_devicep->lock);
   4514   1.1.1.3  mrg       return (ret ? 0 : EINVAL);
   4515   1.1.1.2  mrg     }
   4516   1.1.1.2  mrg   if (src_devicep == dst_devicep)
   4517   1.1.1.2  mrg     {
   4518   1.1.1.2  mrg       gomp_mutex_lock (&src_devicep->lock);
   4519   1.1.1.3  mrg       ret = src_devicep->dev2dev_func (src_devicep->target_id,
   4520   1.1.1.3  mrg 				       (char *) dst + dst_offset,
   4521   1.1.1.3  mrg 				       (char *) src + src_offset, length);
   4522   1.1.1.2  mrg       gomp_mutex_unlock (&src_devicep->lock);
   4523   1.1.1.3  mrg       return (ret ? 0 : EINVAL);
   4524   1.1.1.2  mrg     }
   4525   1.1.1.2  mrg   return EINVAL;
   4526   1.1.1.2  mrg }
   4527   1.1.1.2  mrg 
   4528  1.1.1.11  mrg int
   4529  1.1.1.11  mrg omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
   4530  1.1.1.11  mrg 		   size_t src_offset, int dst_device_num, int src_device_num)
   4531  1.1.1.11  mrg {
   4532  1.1.1.11  mrg   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
   4533  1.1.1.11  mrg   int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
   4534  1.1.1.11  mrg 				     &dst_devicep, &src_devicep);
   4535  1.1.1.11  mrg 
   4536  1.1.1.11  mrg   if (ret)
   4537  1.1.1.11  mrg     return ret;
   4538  1.1.1.11  mrg 
   4539  1.1.1.11  mrg   ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
   4540  1.1.1.11  mrg 				dst_devicep, src_devicep);
   4541  1.1.1.11  mrg 
   4542  1.1.1.11  mrg   return ret;
   4543  1.1.1.11  mrg }
   4544  1.1.1.11  mrg 
   4545  1.1.1.11  mrg typedef struct
   4546  1.1.1.11  mrg {
   4547  1.1.1.11  mrg   void *dst;
   4548  1.1.1.11  mrg   const void *src;
   4549  1.1.1.11  mrg   size_t length;
   4550  1.1.1.11  mrg   size_t dst_offset;
   4551  1.1.1.11  mrg   size_t src_offset;
   4552  1.1.1.11  mrg   struct gomp_device_descr *dst_devicep;
   4553  1.1.1.11  mrg   struct gomp_device_descr *src_devicep;
   4554  1.1.1.11  mrg } omp_target_memcpy_data;
   4555  1.1.1.11  mrg 
   4556  1.1.1.11  mrg static void
   4557  1.1.1.11  mrg omp_target_memcpy_async_helper (void *args)
   4558  1.1.1.11  mrg {
   4559  1.1.1.11  mrg   omp_target_memcpy_data *a = args;
   4560  1.1.1.11  mrg   if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
   4561  1.1.1.11  mrg 			      a->src_offset, a->dst_devicep, a->src_devicep))
   4562  1.1.1.11  mrg     gomp_fatal ("omp_target_memcpy failed");
   4563  1.1.1.11  mrg }
   4564  1.1.1.11  mrg 
   4565  1.1.1.11  mrg int
   4566  1.1.1.11  mrg omp_target_memcpy_async (void *dst, const void *src, size_t length,
   4567  1.1.1.11  mrg 			 size_t dst_offset, size_t src_offset,
   4568  1.1.1.11  mrg 			 int dst_device_num, int src_device_num,
   4569  1.1.1.11  mrg 			 int depobj_count, omp_depend_t *depobj_list)
   4570  1.1.1.11  mrg {
   4571  1.1.1.11  mrg   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
   4572  1.1.1.11  mrg   unsigned int flags = 0;
   4573  1.1.1.11  mrg   void *depend[depobj_count + 5];
   4574  1.1.1.11  mrg   int i;
   4575  1.1.1.11  mrg   int check = omp_target_memcpy_check (dst_device_num, src_device_num,
   4576  1.1.1.11  mrg 				       &dst_devicep, &src_devicep);
   4577  1.1.1.11  mrg 
   4578  1.1.1.11  mrg   omp_target_memcpy_data s = {
   4579  1.1.1.11  mrg     .dst = dst,
   4580  1.1.1.11  mrg     .src = src,
   4581  1.1.1.11  mrg     .length = length,
   4582  1.1.1.11  mrg     .dst_offset = dst_offset,
   4583  1.1.1.11  mrg     .src_offset = src_offset,
   4584  1.1.1.11  mrg     .dst_devicep = dst_devicep,
   4585  1.1.1.11  mrg     .src_devicep = src_devicep
   4586  1.1.1.11  mrg   };
   4587  1.1.1.11  mrg 
   4588  1.1.1.11  mrg   if (check)
   4589  1.1.1.11  mrg     return check;
   4590  1.1.1.11  mrg 
   4591  1.1.1.11  mrg   if (depobj_count > 0 && depobj_list != NULL)
   4592  1.1.1.11  mrg     {
   4593  1.1.1.11  mrg       flags |= GOMP_TASK_FLAG_DEPEND;
   4594  1.1.1.11  mrg       depend[0] = 0;
   4595  1.1.1.11  mrg       depend[1] = (void *) (uintptr_t) depobj_count;
   4596  1.1.1.11  mrg       depend[2] = depend[3] = depend[4] = 0;
   4597  1.1.1.11  mrg       for (i = 0; i < depobj_count; ++i)
   4598  1.1.1.11  mrg 	depend[i + 5] = &depobj_list[i];
   4599  1.1.1.11  mrg     }
   4600  1.1.1.11  mrg 
   4601  1.1.1.11  mrg   GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
   4602  1.1.1.11  mrg 	     __alignof__ (s), true, flags, depend, 0, NULL);
   4603  1.1.1.11  mrg 
   4604  1.1.1.11  mrg   return 0;
   4605  1.1.1.11  mrg }
   4606  1.1.1.11  mrg 
   4607   1.1.1.2  mrg static int
   4608   1.1.1.7  mrg omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
   4609   1.1.1.2  mrg 			       int num_dims, const size_t *volume,
   4610   1.1.1.2  mrg 			       const size_t *dst_offsets,
   4611   1.1.1.2  mrg 			       const size_t *src_offsets,
   4612   1.1.1.2  mrg 			       const size_t *dst_dimensions,
   4613   1.1.1.2  mrg 			       const size_t *src_dimensions,
   4614   1.1.1.2  mrg 			       struct gomp_device_descr *dst_devicep,
   4615  1.1.1.11  mrg 			       struct gomp_device_descr *src_devicep,
   4616  1.1.1.11  mrg 			       size_t *tmp_size, void **tmp)
   4617   1.1.1.2  mrg {
   4618   1.1.1.2  mrg   size_t dst_slice = element_size;
   4619   1.1.1.2  mrg   size_t src_slice = element_size;
   4620   1.1.1.2  mrg   size_t j, dst_off, src_off, length;
   4621   1.1.1.2  mrg   int i, ret;
   4622   1.1.1.2  mrg 
   4623   1.1.1.2  mrg   if (num_dims == 1)
   4624   1.1.1.2  mrg     {
   4625   1.1.1.2  mrg       if (__builtin_mul_overflow (element_size, volume[0], &length)
   4626   1.1.1.2  mrg 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
   4627   1.1.1.2  mrg 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
   4628   1.1.1.2  mrg 	return EINVAL;
   4629   1.1.1.2  mrg       if (dst_devicep == NULL && src_devicep == NULL)
   4630   1.1.1.3  mrg 	{
   4631   1.1.1.7  mrg 	  memcpy ((char *) dst + dst_off, (const char *) src + src_off,
   4632   1.1.1.7  mrg 		  length);
   4633   1.1.1.3  mrg 	  ret = 1;
   4634   1.1.1.3  mrg 	}
   4635   1.1.1.2  mrg       else if (src_devicep == NULL)
   4636   1.1.1.3  mrg 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
   4637   1.1.1.3  mrg 					  (char *) dst + dst_off,
   4638   1.1.1.7  mrg 					  (const char *) src + src_off,
   4639   1.1.1.7  mrg 					  length);
   4640   1.1.1.2  mrg       else if (dst_devicep == NULL)
   4641   1.1.1.3  mrg 	ret = src_devicep->dev2host_func (src_devicep->target_id,
   4642   1.1.1.3  mrg 					  (char *) dst + dst_off,
   4643   1.1.1.7  mrg 					  (const char *) src + src_off,
   4644   1.1.1.7  mrg 					  length);
   4645   1.1.1.2  mrg       else if (src_devicep == dst_devicep)
   4646   1.1.1.3  mrg 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
   4647   1.1.1.3  mrg 					 (char *) dst + dst_off,
   4648   1.1.1.7  mrg 					 (const char *) src + src_off,
   4649   1.1.1.7  mrg 					 length);
   4650   1.1.1.2  mrg       else
   4651  1.1.1.11  mrg 	{
   4652  1.1.1.11  mrg 	  if (*tmp_size == 0)
   4653  1.1.1.11  mrg 	    {
   4654  1.1.1.11  mrg 	      *tmp_size = length;
   4655  1.1.1.11  mrg 	      *tmp = malloc (length);
   4656  1.1.1.11  mrg 	      if (*tmp == NULL)
   4657  1.1.1.11  mrg 		return ENOMEM;
   4658  1.1.1.11  mrg 	    }
   4659  1.1.1.11  mrg 	  else if (*tmp_size < length)
   4660  1.1.1.11  mrg 	    {
   4661  1.1.1.11  mrg 	      *tmp_size = length;
   4662  1.1.1.11  mrg 	      free (*tmp);
   4663  1.1.1.11  mrg 	      *tmp = malloc (length);
   4664  1.1.1.11  mrg 	      if (*tmp == NULL)
   4665  1.1.1.11  mrg 		return ENOMEM;
   4666  1.1.1.11  mrg 	    }
   4667  1.1.1.11  mrg 	  ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
   4668  1.1.1.11  mrg 					    (const char *) src + src_off,
   4669  1.1.1.11  mrg 					    length);
   4670  1.1.1.11  mrg 	  if (ret == 1)
   4671  1.1.1.11  mrg 	    ret = dst_devicep->host2dev_func (dst_devicep->target_id,
   4672  1.1.1.11  mrg 					      (char *) dst + dst_off, *tmp,
   4673  1.1.1.11  mrg 					      length);
   4674  1.1.1.11  mrg 	}
   4675   1.1.1.3  mrg       return ret ? 0 : EINVAL;
   4676   1.1.1.2  mrg     }
   4677   1.1.1.2  mrg 
   4678  1.1.1.11  mrg   /* host->device, device->host and intra device.  */
   4679  1.1.1.11  mrg   if (num_dims == 2
   4680  1.1.1.11  mrg       && ((src_devicep
   4681  1.1.1.11  mrg 	   && src_devicep == dst_devicep
   4682  1.1.1.11  mrg 	   && src_devicep->memcpy2d_func)
   4683  1.1.1.11  mrg 	  || (!src_devicep != !dst_devicep
   4684  1.1.1.11  mrg 	      && ((src_devicep && src_devicep->memcpy2d_func)
   4685  1.1.1.11  mrg 		  || (dst_devicep && dst_devicep->memcpy2d_func)))))
   4686  1.1.1.11  mrg     {
   4687  1.1.1.11  mrg       size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
   4688  1.1.1.11  mrg       int dst_id = dst_devicep ? dst_devicep->target_id : -1;
   4689  1.1.1.11  mrg       int src_id = src_devicep ? src_devicep->target_id : -1;
   4690  1.1.1.11  mrg       struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
   4691  1.1.1.11  mrg 
   4692  1.1.1.11  mrg       if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
   4693  1.1.1.11  mrg 	  || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
   4694  1.1.1.11  mrg 	  || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
   4695  1.1.1.11  mrg 	  || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
   4696  1.1.1.11  mrg 	  || __builtin_mul_overflow (src_offsets[1], element_size,
   4697  1.1.1.11  mrg 				     &src_off_sz1))
   4698  1.1.1.11  mrg 	return EINVAL;
   4699  1.1.1.11  mrg       ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
   4700  1.1.1.11  mrg 				 dst, dst_off_sz1, dst_offsets[0], dst_sz1,
   4701  1.1.1.11  mrg 				 src, src_off_sz1, src_offsets[0], src_sz1);
   4702  1.1.1.11  mrg       if (ret != -1)
   4703  1.1.1.11  mrg 	return ret ? 0 : EINVAL;
   4704  1.1.1.11  mrg     }
   4705  1.1.1.11  mrg   else if (num_dims == 3
   4706  1.1.1.11  mrg 	   && ((src_devicep
   4707  1.1.1.11  mrg 		&& src_devicep == dst_devicep
   4708  1.1.1.11  mrg 		&& src_devicep->memcpy3d_func)
   4709  1.1.1.11  mrg 	       || (!src_devicep != !dst_devicep
   4710  1.1.1.11  mrg 		   && ((src_devicep && src_devicep->memcpy3d_func)
   4711  1.1.1.11  mrg 		       || (dst_devicep && dst_devicep->memcpy3d_func)))))
   4712  1.1.1.11  mrg     {
   4713  1.1.1.11  mrg       size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
   4714  1.1.1.11  mrg       int dst_id = dst_devicep ? dst_devicep->target_id : -1;
   4715  1.1.1.11  mrg       int src_id = src_devicep ? src_devicep->target_id : -1;
   4716  1.1.1.11  mrg       struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
   4717  1.1.1.11  mrg 
   4718  1.1.1.11  mrg       if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
   4719  1.1.1.11  mrg 	  || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
   4720  1.1.1.11  mrg 	  || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
   4721  1.1.1.11  mrg 	  || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
   4722  1.1.1.11  mrg 	  || __builtin_mul_overflow (src_offsets[2], element_size,
   4723  1.1.1.11  mrg 				     &src_off_sz2))
   4724  1.1.1.11  mrg 	return EINVAL;
   4725  1.1.1.11  mrg       ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
   4726  1.1.1.11  mrg 				 dst, dst_off_sz2, dst_offsets[1],
   4727  1.1.1.11  mrg 				 dst_offsets[0], dst_sz2, dst_dimensions[1],
   4728  1.1.1.11  mrg 				 src, src_off_sz2, src_offsets[1],
   4729  1.1.1.11  mrg 				 src_offsets[0], src_sz2, src_dimensions[1]);
   4730  1.1.1.11  mrg       if (ret != -1)
   4731  1.1.1.11  mrg 	return ret ? 0 : EINVAL;
   4732  1.1.1.11  mrg     }
   4733   1.1.1.2  mrg 
   4734   1.1.1.2  mrg   for (i = 1; i < num_dims; i++)
   4735   1.1.1.2  mrg     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
   4736   1.1.1.2  mrg 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
   4737   1.1.1.2  mrg       return EINVAL;
   4738   1.1.1.2  mrg   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
   4739   1.1.1.2  mrg       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
   4740   1.1.1.2  mrg     return EINVAL;
   4741   1.1.1.2  mrg   for (j = 0; j < volume[0]; j++)
   4742   1.1.1.2  mrg     {
   4743   1.1.1.2  mrg       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
   4744   1.1.1.7  mrg 					   (const char *) src + src_off,
   4745   1.1.1.2  mrg 					   element_size, num_dims - 1,
   4746   1.1.1.2  mrg 					   volume + 1, dst_offsets + 1,
   4747   1.1.1.2  mrg 					   src_offsets + 1, dst_dimensions + 1,
   4748   1.1.1.2  mrg 					   src_dimensions + 1, dst_devicep,
   4749  1.1.1.11  mrg 					   src_devicep, tmp_size, tmp);
   4750   1.1.1.2  mrg       if (ret)
   4751   1.1.1.2  mrg 	return ret;
   4752   1.1.1.2  mrg       dst_off += dst_slice;
   4753   1.1.1.2  mrg       src_off += src_slice;
   4754   1.1.1.2  mrg     }
   4755   1.1.1.2  mrg   return 0;
   4756   1.1.1.2  mrg }
   4757   1.1.1.2  mrg 
   4758  1.1.1.11  mrg static int
   4759  1.1.1.11  mrg omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
   4760  1.1.1.11  mrg 			      int src_device_num,
   4761  1.1.1.11  mrg 			      struct gomp_device_descr **dst_devicep,
   4762  1.1.1.11  mrg 			      struct gomp_device_descr **src_devicep)
   4763  1.1.1.11  mrg {
   4764  1.1.1.11  mrg   if (!dst && !src)
   4765  1.1.1.11  mrg     return INT_MAX;
   4766  1.1.1.11  mrg 
   4767  1.1.1.11  mrg   int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
   4768  1.1.1.11  mrg 				     dst_devicep, src_devicep);
   4769  1.1.1.11  mrg   if (ret)
   4770  1.1.1.11  mrg     return ret;
   4771  1.1.1.11  mrg 
   4772  1.1.1.11  mrg   return 0;
   4773  1.1.1.11  mrg }
   4774  1.1.1.11  mrg 
   4775  1.1.1.11  mrg static int
   4776  1.1.1.11  mrg omp_target_memcpy_rect_copy (void *dst, const void *src,
   4777  1.1.1.11  mrg 			     size_t element_size, int num_dims,
   4778  1.1.1.11  mrg 			     const size_t *volume, const size_t *dst_offsets,
   4779  1.1.1.11  mrg 			     const size_t *src_offsets,
   4780  1.1.1.11  mrg 			     const size_t *dst_dimensions,
   4781  1.1.1.11  mrg 			     const size_t *src_dimensions,
   4782  1.1.1.11  mrg 			     struct gomp_device_descr *dst_devicep,
   4783  1.1.1.11  mrg 			     struct gomp_device_descr *src_devicep)
   4784  1.1.1.11  mrg {
   4785  1.1.1.11  mrg   size_t tmp_size = 0;
   4786  1.1.1.11  mrg   void *tmp = NULL;
   4787  1.1.1.11  mrg   bool lock_src;
   4788  1.1.1.11  mrg   bool lock_dst;
   4789  1.1.1.11  mrg 
   4790  1.1.1.11  mrg   lock_src = src_devicep != NULL;
   4791  1.1.1.11  mrg   lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
   4792  1.1.1.11  mrg   if (lock_src)
   4793  1.1.1.11  mrg     gomp_mutex_lock (&src_devicep->lock);
   4794  1.1.1.11  mrg   if (lock_dst)
   4795  1.1.1.11  mrg     gomp_mutex_lock (&dst_devicep->lock);
   4796  1.1.1.11  mrg   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
   4797  1.1.1.11  mrg 					   volume, dst_offsets, src_offsets,
   4798  1.1.1.11  mrg 					   dst_dimensions, src_dimensions,
   4799  1.1.1.11  mrg 					   dst_devicep, src_devicep,
   4800  1.1.1.11  mrg 					   &tmp_size, &tmp);
   4801  1.1.1.11  mrg   if (lock_src)
   4802  1.1.1.11  mrg     gomp_mutex_unlock (&src_devicep->lock);
   4803  1.1.1.11  mrg   if (lock_dst)
   4804  1.1.1.11  mrg     gomp_mutex_unlock (&dst_devicep->lock);
   4805  1.1.1.11  mrg   if (tmp)
   4806  1.1.1.11  mrg     free (tmp);
   4807  1.1.1.11  mrg 
   4808  1.1.1.11  mrg   return ret;
   4809  1.1.1.11  mrg }
   4810  1.1.1.11  mrg 
   4811   1.1.1.2  mrg int
   4812   1.1.1.7  mrg omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
   4813   1.1.1.2  mrg 			int num_dims, const size_t *volume,
   4814   1.1.1.2  mrg 			const size_t *dst_offsets,
   4815   1.1.1.2  mrg 			const size_t *src_offsets,
   4816   1.1.1.2  mrg 			const size_t *dst_dimensions,
   4817   1.1.1.2  mrg 			const size_t *src_dimensions,
   4818   1.1.1.2  mrg 			int dst_device_num, int src_device_num)
   4819   1.1.1.2  mrg {
   4820   1.1.1.2  mrg   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
   4821   1.1.1.2  mrg 
   4822  1.1.1.11  mrg   int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
   4823  1.1.1.11  mrg 					    src_device_num, &dst_devicep,
   4824  1.1.1.11  mrg 					    &src_devicep);
   4825  1.1.1.11  mrg 
   4826  1.1.1.11  mrg   if (check)
   4827  1.1.1.11  mrg     return check;
   4828  1.1.1.11  mrg 
   4829  1.1.1.11  mrg   int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
   4830  1.1.1.11  mrg 					 volume, dst_offsets, src_offsets,
   4831  1.1.1.11  mrg 					 dst_dimensions, src_dimensions,
   4832  1.1.1.11  mrg 					 dst_devicep, src_devicep);
   4833   1.1.1.2  mrg 
   4834  1.1.1.11  mrg   return ret;
   4835  1.1.1.11  mrg }
   4836   1.1.1.2  mrg 
   4837  1.1.1.11  mrg typedef struct
   4838  1.1.1.11  mrg {
   4839  1.1.1.11  mrg   void *dst;
   4840  1.1.1.11  mrg   const void *src;
   4841  1.1.1.11  mrg   size_t element_size;
   4842  1.1.1.11  mrg   const size_t *volume;
   4843  1.1.1.11  mrg   const size_t *dst_offsets;
   4844  1.1.1.11  mrg   const size_t *src_offsets;
   4845  1.1.1.11  mrg   const size_t *dst_dimensions;
   4846  1.1.1.11  mrg   const size_t *src_dimensions;
   4847  1.1.1.11  mrg   struct gomp_device_descr *dst_devicep;
   4848  1.1.1.11  mrg   struct gomp_device_descr *src_devicep;
   4849  1.1.1.11  mrg   int num_dims;
   4850  1.1.1.11  mrg } omp_target_memcpy_rect_data;
   4851   1.1.1.2  mrg 
   4852  1.1.1.11  mrg static void
   4853  1.1.1.11  mrg omp_target_memcpy_rect_async_helper (void *args)
   4854  1.1.1.11  mrg {
   4855  1.1.1.11  mrg   omp_target_memcpy_rect_data *a = args;
   4856  1.1.1.11  mrg   int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
   4857  1.1.1.11  mrg 					 a->num_dims, a->volume, a->dst_offsets,
   4858  1.1.1.11  mrg 					 a->src_offsets, a->dst_dimensions,
   4859  1.1.1.11  mrg 					 a->src_dimensions, a->dst_devicep,
   4860  1.1.1.11  mrg 					 a->src_devicep);
   4861  1.1.1.11  mrg   if (ret)
   4862  1.1.1.11  mrg     gomp_fatal ("omp_target_memcpy_rect failed");
   4863  1.1.1.11  mrg }
   4864   1.1.1.2  mrg 
   4865  1.1.1.11  mrg int
   4866  1.1.1.11  mrg omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
   4867  1.1.1.11  mrg 			      int num_dims, const size_t *volume,
   4868  1.1.1.11  mrg 			      const size_t *dst_offsets,
   4869  1.1.1.11  mrg 			      const size_t *src_offsets,
   4870  1.1.1.11  mrg 			      const size_t *dst_dimensions,
   4871  1.1.1.11  mrg 			      const size_t *src_dimensions,
   4872  1.1.1.11  mrg 			      int dst_device_num, int src_device_num,
   4873  1.1.1.11  mrg 			      int depobj_count, omp_depend_t *depobj_list)
   4874  1.1.1.11  mrg {
   4875  1.1.1.11  mrg   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
   4876  1.1.1.11  mrg   unsigned flags = 0;
   4877  1.1.1.11  mrg   int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
   4878  1.1.1.11  mrg 					    src_device_num, &dst_devicep,
   4879  1.1.1.11  mrg 					    &src_devicep);
   4880  1.1.1.11  mrg   void *depend[depobj_count + 5];
   4881  1.1.1.11  mrg   int i;
   4882   1.1.1.2  mrg 
   4883  1.1.1.11  mrg   omp_target_memcpy_rect_data s = {
   4884  1.1.1.11  mrg     .dst = dst,
   4885  1.1.1.11  mrg     .src = src,
   4886  1.1.1.11  mrg     .element_size = element_size,
   4887  1.1.1.11  mrg     .num_dims = num_dims,
   4888  1.1.1.11  mrg     .volume = volume,
   4889  1.1.1.11  mrg     .dst_offsets = dst_offsets,
   4890  1.1.1.11  mrg     .src_offsets = src_offsets,
   4891  1.1.1.11  mrg     .dst_dimensions = dst_dimensions,
   4892  1.1.1.11  mrg     .src_dimensions = src_dimensions,
   4893  1.1.1.11  mrg     .dst_devicep = dst_devicep,
   4894  1.1.1.11  mrg     .src_devicep = src_devicep
   4895  1.1.1.11  mrg   };
   4896  1.1.1.11  mrg 
   4897  1.1.1.11  mrg   if (check)
   4898  1.1.1.11  mrg     return check;
   4899  1.1.1.11  mrg 
   4900  1.1.1.11  mrg   if (depobj_count > 0 && depobj_list != NULL)
   4901  1.1.1.11  mrg     {
   4902  1.1.1.11  mrg       flags |= GOMP_TASK_FLAG_DEPEND;
   4903  1.1.1.11  mrg       depend[0] = 0;
   4904  1.1.1.11  mrg       depend[1] = (void *) (uintptr_t) depobj_count;
   4905  1.1.1.11  mrg       depend[2] = depend[3] = depend[4] = 0;
   4906  1.1.1.11  mrg       for (i = 0; i < depobj_count; ++i)
   4907  1.1.1.11  mrg 	depend[i + 5] = &depobj_list[i];
   4908   1.1.1.2  mrg     }
   4909   1.1.1.2  mrg 
   4910  1.1.1.11  mrg   GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
   4911  1.1.1.11  mrg 	     __alignof__ (s), true, flags, depend, 0, NULL);
   4912   1.1.1.2  mrg 
   4913  1.1.1.11  mrg   return 0;
   4914   1.1.1.2  mrg }
   4915   1.1.1.2  mrg 
   4916   1.1.1.2  mrg int
   4917   1.1.1.7  mrg omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
   4918   1.1.1.7  mrg 			  size_t size, size_t device_offset, int device_num)
   4919   1.1.1.2  mrg {
   4920  1.1.1.11  mrg   if (device_num == omp_initial_device
   4921  1.1.1.11  mrg       || device_num == gomp_get_num_devices ())
   4922   1.1.1.2  mrg     return EINVAL;
   4923   1.1.1.2  mrg 
   4924  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device_num, false);
   4925   1.1.1.2  mrg   if (devicep == NULL)
   4926   1.1.1.2  mrg     return EINVAL;
   4927   1.1.1.2  mrg 
   4928   1.1.1.2  mrg   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   4929   1.1.1.2  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   4930   1.1.1.2  mrg     return EINVAL;
   4931   1.1.1.2  mrg 
   4932   1.1.1.2  mrg   gomp_mutex_lock (&devicep->lock);
   4933   1.1.1.2  mrg 
   4934   1.1.1.2  mrg   struct splay_tree_s *mem_map = &devicep->mem_map;
   4935   1.1.1.2  mrg   struct splay_tree_key_s cur_node;
   4936   1.1.1.2  mrg   int ret = EINVAL;
   4937   1.1.1.2  mrg 
   4938   1.1.1.2  mrg   cur_node.host_start = (uintptr_t) host_ptr;
   4939   1.1.1.2  mrg   cur_node.host_end = cur_node.host_start + size;
   4940   1.1.1.2  mrg   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
   4941   1.1.1.2  mrg   if (n)
   4942   1.1.1.2  mrg     {
   4943   1.1.1.2  mrg       if (n->tgt->tgt_start + n->tgt_offset
   4944   1.1.1.2  mrg 	  == (uintptr_t) device_ptr + device_offset
   4945   1.1.1.2  mrg 	  && n->host_start <= cur_node.host_start
   4946   1.1.1.2  mrg 	  && n->host_end >= cur_node.host_end)
   4947   1.1.1.2  mrg 	ret = 0;
   4948   1.1.1.2  mrg     }
   4949   1.1.1.2  mrg   else
   4950   1.1.1.2  mrg     {
   4951   1.1.1.2  mrg       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   4952   1.1.1.2  mrg       tgt->array = gomp_malloc (sizeof (*tgt->array));
   4953   1.1.1.2  mrg       tgt->refcount = 1;
   4954   1.1.1.2  mrg       tgt->tgt_start = 0;
   4955   1.1.1.2  mrg       tgt->tgt_end = 0;
   4956   1.1.1.2  mrg       tgt->to_free = NULL;
   4957   1.1.1.2  mrg       tgt->prev = NULL;
   4958   1.1.1.2  mrg       tgt->list_count = 0;
   4959   1.1.1.2  mrg       tgt->device_descr = devicep;
   4960   1.1.1.2  mrg       splay_tree_node array = tgt->array;
   4961   1.1.1.2  mrg       splay_tree_key k = &array->key;
   4962   1.1.1.2  mrg       k->host_start = cur_node.host_start;
   4963   1.1.1.2  mrg       k->host_end = cur_node.host_end;
   4964   1.1.1.2  mrg       k->tgt = tgt;
   4965   1.1.1.2  mrg       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
   4966   1.1.1.2  mrg       k->refcount = REFCOUNT_INFINITY;
   4967   1.1.1.7  mrg       k->dynamic_refcount = 0;
   4968   1.1.1.8  mrg       k->aux = NULL;
   4969   1.1.1.2  mrg       array->left = NULL;
   4970   1.1.1.2  mrg       array->right = NULL;
   4971   1.1.1.2  mrg       splay_tree_insert (&devicep->mem_map, array);
   4972   1.1.1.2  mrg       ret = 0;
   4973   1.1.1.2  mrg     }
   4974   1.1.1.2  mrg   gomp_mutex_unlock (&devicep->lock);
   4975   1.1.1.2  mrg   return ret;
   4976   1.1.1.2  mrg }
   4977   1.1.1.2  mrg 
   4978   1.1.1.2  mrg int
   4979   1.1.1.7  mrg omp_target_disassociate_ptr (const void *ptr, int device_num)
   4980   1.1.1.2  mrg {
   4981  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device_num, false);
   4982   1.1.1.2  mrg   if (devicep == NULL)
   4983   1.1.1.2  mrg     return EINVAL;
   4984   1.1.1.2  mrg 
   4985   1.1.1.2  mrg   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
   4986   1.1.1.2  mrg     return EINVAL;
   4987   1.1.1.2  mrg 
   4988   1.1.1.2  mrg   gomp_mutex_lock (&devicep->lock);
   4989   1.1.1.2  mrg 
   4990   1.1.1.2  mrg   struct splay_tree_s *mem_map = &devicep->mem_map;
   4991   1.1.1.2  mrg   struct splay_tree_key_s cur_node;
   4992   1.1.1.2  mrg   int ret = EINVAL;
   4993   1.1.1.2  mrg 
   4994   1.1.1.2  mrg   cur_node.host_start = (uintptr_t) ptr;
   4995   1.1.1.2  mrg   cur_node.host_end = cur_node.host_start;
   4996   1.1.1.2  mrg   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
   4997   1.1.1.2  mrg   if (n
   4998   1.1.1.2  mrg       && n->host_start == cur_node.host_start
   4999   1.1.1.2  mrg       && n->refcount == REFCOUNT_INFINITY
   5000   1.1.1.2  mrg       && n->tgt->tgt_start == 0
   5001   1.1.1.2  mrg       && n->tgt->to_free == NULL
   5002   1.1.1.2  mrg       && n->tgt->refcount == 1
   5003   1.1.1.2  mrg       && n->tgt->list_count == 0)
   5004   1.1.1.2  mrg     {
   5005   1.1.1.2  mrg       splay_tree_remove (&devicep->mem_map, n);
   5006   1.1.1.2  mrg       gomp_unmap_tgt (n->tgt);
   5007   1.1.1.2  mrg       ret = 0;
   5008   1.1.1.2  mrg     }
   5009   1.1.1.2  mrg 
   5010   1.1.1.2  mrg   gomp_mutex_unlock (&devicep->lock);
   5011   1.1.1.2  mrg   return ret;
   5012   1.1.1.2  mrg }
   5013   1.1.1.2  mrg 
   5014  1.1.1.11  mrg void *
   5015  1.1.1.11  mrg omp_get_mapped_ptr (const void *ptr, int device_num)
   5016  1.1.1.11  mrg {
   5017  1.1.1.11  mrg   if (device_num == omp_initial_device
   5018  1.1.1.11  mrg       || device_num == omp_get_initial_device ())
   5019  1.1.1.11  mrg     return (void *) ptr;
   5020  1.1.1.11  mrg 
   5021  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device_num, false);
   5022  1.1.1.11  mrg   if (devicep == NULL)
   5023  1.1.1.11  mrg     return NULL;
   5024  1.1.1.11  mrg 
   5025  1.1.1.11  mrg   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   5026  1.1.1.11  mrg       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
   5027  1.1.1.11  mrg     return (void *) ptr;
   5028  1.1.1.11  mrg 
   5029  1.1.1.11  mrg   gomp_mutex_lock (&devicep->lock);
   5030  1.1.1.11  mrg 
   5031  1.1.1.11  mrg   struct splay_tree_s *mem_map = &devicep->mem_map;
   5032  1.1.1.11  mrg   struct splay_tree_key_s cur_node;
   5033  1.1.1.11  mrg   void *ret = NULL;
   5034  1.1.1.11  mrg 
   5035  1.1.1.11  mrg   cur_node.host_start = (uintptr_t) ptr;
   5036  1.1.1.11  mrg   cur_node.host_end = cur_node.host_start;
   5037  1.1.1.11  mrg   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
   5038  1.1.1.11  mrg 
   5039  1.1.1.11  mrg   if (n)
   5040  1.1.1.11  mrg     {
   5041  1.1.1.11  mrg       uintptr_t offset = cur_node.host_start - n->host_start;
   5042  1.1.1.11  mrg       ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
   5043  1.1.1.11  mrg     }
   5044  1.1.1.11  mrg 
   5045  1.1.1.11  mrg   gomp_mutex_unlock (&devicep->lock);
   5046  1.1.1.11  mrg 
   5047  1.1.1.11  mrg   return ret;
   5048  1.1.1.11  mrg }
   5049  1.1.1.11  mrg 
   5050  1.1.1.11  mrg int
   5051  1.1.1.11  mrg omp_target_is_accessible (const void *ptr, size_t size, int device_num)
   5052  1.1.1.11  mrg {
   5053  1.1.1.11  mrg   if (device_num == omp_initial_device
   5054  1.1.1.11  mrg       || device_num == gomp_get_num_devices ())
   5055  1.1.1.11  mrg     return true;
   5056  1.1.1.11  mrg 
   5057  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device_num, false);
   5058  1.1.1.11  mrg   if (devicep == NULL)
   5059  1.1.1.11  mrg     return false;
   5060  1.1.1.11  mrg 
   5061  1.1.1.11  mrg   /* TODO: Unified shared memory must be handled when available.  */
   5062  1.1.1.11  mrg 
   5063  1.1.1.11  mrg   return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
   5064  1.1.1.11  mrg }
   5065  1.1.1.11  mrg 
   5066   1.1.1.7  mrg int
   5067   1.1.1.7  mrg omp_pause_resource (omp_pause_resource_t kind, int device_num)
   5068   1.1.1.7  mrg {
   5069   1.1.1.7  mrg   (void) kind;
   5070  1.1.1.11  mrg   if (device_num == omp_initial_device
   5071  1.1.1.11  mrg       || device_num == gomp_get_num_devices ())
   5072   1.1.1.7  mrg     return gomp_pause_host ();
   5073  1.1.1.11  mrg 
   5074  1.1.1.11  mrg   struct gomp_device_descr *devicep = resolve_device (device_num, false);
   5075  1.1.1.11  mrg   if (devicep == NULL)
   5076   1.1.1.7  mrg     return -1;
   5077  1.1.1.11  mrg 
   5078   1.1.1.7  mrg   /* Do nothing for target devices for now.  */
   5079   1.1.1.7  mrg   return 0;
   5080   1.1.1.7  mrg }
   5081   1.1.1.7  mrg 
   5082   1.1.1.7  mrg int
   5083   1.1.1.7  mrg omp_pause_resource_all (omp_pause_resource_t kind)
   5084   1.1.1.7  mrg {
   5085   1.1.1.7  mrg   (void) kind;
   5086   1.1.1.7  mrg   if (gomp_pause_host ())
   5087   1.1.1.7  mrg     return -1;
   5088   1.1.1.7  mrg   /* Do nothing for target devices for now.  */
   5089   1.1.1.7  mrg   return 0;
   5090   1.1.1.7  mrg }
   5091   1.1.1.7  mrg 
   5092   1.1.1.7  mrg ialias (omp_pause_resource)
   5093   1.1.1.7  mrg ialias (omp_pause_resource_all)
   5094   1.1.1.7  mrg 
   5095       1.1  mrg #ifdef PLUGIN_SUPPORT
   5096       1.1  mrg 
   5097       1.1  mrg /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
   5098       1.1  mrg    in PLUGIN_NAME.
   5099       1.1  mrg    The handles of the found functions are stored in the corresponding fields
   5100       1.1  mrg    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
   5101       1.1  mrg 
   5102       1.1  mrg static bool
   5103       1.1  mrg gomp_load_plugin_for_device (struct gomp_device_descr *device,
   5104       1.1  mrg 			     const char *plugin_name)
   5105       1.1  mrg {
   5106       1.1  mrg   const char *err = NULL, *last_missing = NULL;
   5107       1.1  mrg 
   5108       1.1  mrg   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
   5109       1.1  mrg   if (!plugin_handle)
   5110  1.1.1.10  mrg #if OFFLOAD_DEFAULTED
   5111  1.1.1.10  mrg     return 0;
   5112  1.1.1.10  mrg #else
   5113   1.1.1.2  mrg     goto dl_fail;
   5114  1.1.1.10  mrg #endif
   5115       1.1  mrg 
   5116       1.1  mrg   /* Check if all required functions are available in the plugin and store
   5117   1.1.1.2  mrg      their handlers.  None of the symbols can legitimately be NULL,
   5118   1.1.1.2  mrg      so we don't need to check dlerror all the time.  */
   5119       1.1  mrg #define DLSYM(f)							\
   5120   1.1.1.2  mrg   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
   5121   1.1.1.2  mrg     goto dl_fail
   5122   1.1.1.2  mrg   /* Similar, but missing functions are not an error.  Return false if
   5123   1.1.1.2  mrg      failed, true otherwise.  */
   5124   1.1.1.2  mrg #define DLSYM_OPT(f, n)							\
   5125   1.1.1.2  mrg   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
   5126   1.1.1.2  mrg    || (last_missing = #n, 0))
   5127   1.1.1.2  mrg 
   5128   1.1.1.2  mrg   DLSYM (version);
   5129   1.1.1.2  mrg   if (device->version_func () != GOMP_VERSION)
   5130   1.1.1.2  mrg     {
   5131   1.1.1.2  mrg       err = "plugin version mismatch";
   5132   1.1.1.2  mrg       goto fail;
   5133   1.1.1.2  mrg     }
   5134       1.1  mrg 
   5135       1.1  mrg   DLSYM (get_name);
   5136       1.1  mrg   DLSYM (get_caps);
   5137       1.1  mrg   DLSYM (get_type);
   5138       1.1  mrg   DLSYM (get_num_devices);
   5139       1.1  mrg   DLSYM (init_device);
   5140       1.1  mrg   DLSYM (fini_device);
   5141       1.1  mrg   DLSYM (load_image);
   5142       1.1  mrg   DLSYM (unload_image);
   5143       1.1  mrg   DLSYM (alloc);
   5144       1.1  mrg   DLSYM (free);
   5145       1.1  mrg   DLSYM (dev2host);
   5146       1.1  mrg   DLSYM (host2dev);
   5147  1.1.1.11  mrg   DLSYM_OPT (memcpy2d, memcpy2d);
   5148  1.1.1.11  mrg   DLSYM_OPT (memcpy3d, memcpy3d);
   5149       1.1  mrg   device->capabilities = device->get_caps_func ();
   5150       1.1  mrg   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   5151   1.1.1.2  mrg     {
   5152   1.1.1.2  mrg       DLSYM (run);
   5153   1.1.1.8  mrg       DLSYM_OPT (async_run, async_run);
   5154   1.1.1.2  mrg       DLSYM_OPT (can_run, can_run);
   5155   1.1.1.2  mrg       DLSYM (dev2dev);
   5156   1.1.1.2  mrg     }
   5157       1.1  mrg   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
   5158       1.1  mrg     {
   5159   1.1.1.3  mrg       if (!DLSYM_OPT (openacc.exec, openacc_exec)
   5160   1.1.1.2  mrg 	  || !DLSYM_OPT (openacc.create_thread_data,
   5161   1.1.1.2  mrg 			 openacc_create_thread_data)
   5162   1.1.1.2  mrg 	  || !DLSYM_OPT (openacc.destroy_thread_data,
   5163   1.1.1.8  mrg 			 openacc_destroy_thread_data)
   5164   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
   5165   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
   5166   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.async.test, openacc_async_test)
   5167   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
   5168   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
   5169   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.async.queue_callback,
   5170   1.1.1.8  mrg 			 openacc_async_queue_callback)
   5171   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
   5172   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
   5173   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
   5174   1.1.1.8  mrg 	  || !DLSYM_OPT (openacc.get_property, openacc_get_property))
   5175       1.1  mrg 	{
   5176   1.1.1.2  mrg 	  /* Require all the OpenACC handlers if we have
   5177   1.1.1.2  mrg 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
   5178       1.1  mrg 	  err = "plugin missing OpenACC handler function";
   5179   1.1.1.2  mrg 	  goto fail;
   5180       1.1  mrg 	}
   5181   1.1.1.2  mrg 
   5182   1.1.1.2  mrg       unsigned cuda = 0;
   5183   1.1.1.2  mrg       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
   5184   1.1.1.3  mrg 			 openacc_cuda_get_current_device);
   5185   1.1.1.2  mrg       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
   5186   1.1.1.3  mrg 			 openacc_cuda_get_current_context);
   5187   1.1.1.3  mrg       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
   5188   1.1.1.3  mrg       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
   5189   1.1.1.2  mrg       if (cuda && cuda != 4)
   5190       1.1  mrg 	{
   5191   1.1.1.2  mrg 	  /* Make sure all the CUDA functions are there if any of them are.  */
   5192       1.1  mrg 	  err = "plugin missing OpenACC CUDA handler function";
   5193   1.1.1.2  mrg 	  goto fail;
   5194       1.1  mrg 	}
   5195       1.1  mrg     }
   5196       1.1  mrg #undef DLSYM
   5197       1.1  mrg #undef DLSYM_OPT
   5198       1.1  mrg 
   5199   1.1.1.2  mrg   return 1;
   5200   1.1.1.2  mrg 
   5201   1.1.1.2  mrg  dl_fail:
   5202   1.1.1.2  mrg   err = dlerror ();
   5203   1.1.1.2  mrg  fail:
   5204   1.1.1.2  mrg   gomp_error ("while loading %s: %s", plugin_name, err);
   5205   1.1.1.2  mrg   if (last_missing)
   5206   1.1.1.2  mrg     gomp_error ("missing function was %s", last_missing);
   5207   1.1.1.2  mrg   if (plugin_handle)
   5208   1.1.1.2  mrg     dlclose (plugin_handle);
   5209   1.1.1.2  mrg 
   5210   1.1.1.2  mrg   return 0;
   5211   1.1.1.2  mrg }
   5212   1.1.1.2  mrg 
   5213   1.1.1.2  mrg /* This function finalizes all initialized devices.  */
   5214   1.1.1.2  mrg 
   5215   1.1.1.2  mrg static void
   5216   1.1.1.2  mrg gomp_target_fini (void)
   5217   1.1.1.2  mrg {
   5218   1.1.1.2  mrg   int i;
   5219   1.1.1.2  mrg   for (i = 0; i < num_devices; i++)
   5220       1.1  mrg     {
   5221   1.1.1.3  mrg       bool ret = true;
   5222   1.1.1.2  mrg       struct gomp_device_descr *devicep = &devices[i];
   5223   1.1.1.2  mrg       gomp_mutex_lock (&devicep->lock);
   5224   1.1.1.2  mrg       if (devicep->state == GOMP_DEVICE_INITIALIZED)
   5225   1.1.1.8  mrg 	ret = gomp_fini_device (devicep);
   5226   1.1.1.2  mrg       gomp_mutex_unlock (&devicep->lock);
   5227   1.1.1.3  mrg       if (!ret)
   5228   1.1.1.3  mrg 	gomp_fatal ("device finalization failed");
   5229       1.1  mrg     }
   5230       1.1  mrg }
   5231       1.1  mrg 
   5232   1.1.1.7  mrg /* This function initializes the runtime for offloading.
   5233   1.1.1.7  mrg    It parses the list of offload plugins, and tries to load these.
   5234   1.1.1.7  mrg    On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
   5235       1.1  mrg    will be set, and the array DEVICES initialized, containing descriptors for
   5236       1.1  mrg    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
   5237       1.1  mrg    by the others.  */
   5238       1.1  mrg 
   5239       1.1  mrg static void
   5240       1.1  mrg gomp_target_init (void)
   5241       1.1  mrg {
   5242       1.1  mrg   const char *prefix ="libgomp-plugin-";
   5243       1.1  mrg   const char *suffix = SONAME_SUFFIX (1);
   5244       1.1  mrg   const char *cur, *next;
   5245       1.1  mrg   char *plugin_name;
   5246  1.1.1.10  mrg   int i, new_num_devs;
   5247  1.1.1.10  mrg   int num_devs = 0, num_devs_openmp;
   5248  1.1.1.10  mrg   struct gomp_device_descr *devs = NULL;
   5249       1.1  mrg 
   5250  1.1.1.10  mrg   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
   5251  1.1.1.10  mrg     return;
   5252       1.1  mrg 
   5253   1.1.1.7  mrg   cur = OFFLOAD_PLUGINS;
   5254       1.1  mrg   if (*cur)
   5255       1.1  mrg     do
   5256       1.1  mrg       {
   5257       1.1  mrg 	struct gomp_device_descr current_device;
   5258   1.1.1.6  mrg 	size_t prefix_len, suffix_len, cur_len;
   5259       1.1  mrg 
   5260       1.1  mrg 	next = strchr (cur, ',');
   5261       1.1  mrg 
   5262   1.1.1.6  mrg 	prefix_len = strlen (prefix);
   5263   1.1.1.6  mrg 	cur_len = next ? next - cur : strlen (cur);
   5264   1.1.1.6  mrg 	suffix_len = strlen (suffix);
   5265   1.1.1.6  mrg 
   5266   1.1.1.6  mrg 	plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
   5267       1.1  mrg 	if (!plugin_name)
   5268       1.1  mrg 	  {
   5269  1.1.1.10  mrg 	    num_devs = 0;
   5270       1.1  mrg 	    break;
   5271       1.1  mrg 	  }
   5272       1.1  mrg 
   5273   1.1.1.6  mrg 	memcpy (plugin_name, prefix, prefix_len);
   5274   1.1.1.6  mrg 	memcpy (plugin_name + prefix_len, cur, cur_len);
   5275   1.1.1.6  mrg 	memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
   5276       1.1  mrg 
   5277       1.1  mrg 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
   5278       1.1  mrg 	  {
   5279  1.1.1.11  mrg 	    int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
   5280  1.1.1.11  mrg 	    new_num_devs = current_device.get_num_devices_func (omp_req);
   5281  1.1.1.11  mrg 	    if (gomp_debug_var > 0 && new_num_devs < 0)
   5282  1.1.1.11  mrg 	      {
   5283  1.1.1.11  mrg 		bool found = false;
   5284  1.1.1.11  mrg 		int type = current_device.get_type_func ();
   5285  1.1.1.11  mrg 		for (int img = 0; img < num_offload_images; img++)
   5286  1.1.1.11  mrg 		  if (type == offload_images[img].type)
   5287  1.1.1.11  mrg 		    found = true;
   5288  1.1.1.11  mrg 		if (found)
   5289  1.1.1.11  mrg 		  {
   5290  1.1.1.11  mrg 		    char buf[sizeof ("unified_address, unified_shared_memory, "
   5291  1.1.1.11  mrg 				     "reverse_offload")];
   5292  1.1.1.11  mrg 		    gomp_requires_to_name (buf, sizeof (buf), omp_req);
   5293  1.1.1.11  mrg 		    char *name = (char *) malloc (cur_len + 1);
   5294  1.1.1.11  mrg 		    memcpy (name, cur, cur_len);
   5295  1.1.1.11  mrg 		    name[cur_len] = '\0';
   5296  1.1.1.11  mrg 		    gomp_debug (1,
   5297  1.1.1.11  mrg 				"%s devices present but 'omp requires %s' "
   5298  1.1.1.11  mrg 				"cannot be fulfilled\n", name, buf);
   5299  1.1.1.11  mrg 		    free (name);
   5300  1.1.1.11  mrg 		  }
   5301  1.1.1.11  mrg 	      }
   5302  1.1.1.11  mrg 	    else if (new_num_devs >= 1)
   5303       1.1  mrg 	      {
   5304       1.1  mrg 		/* Augment DEVICES and NUM_DEVICES.  */
   5305       1.1  mrg 
   5306  1.1.1.10  mrg 		devs = realloc (devs, (num_devs + new_num_devs)
   5307  1.1.1.10  mrg 				      * sizeof (struct gomp_device_descr));
   5308  1.1.1.10  mrg 		if (!devs)
   5309       1.1  mrg 		  {
   5310  1.1.1.10  mrg 		    num_devs = 0;
   5311       1.1  mrg 		    free (plugin_name);
   5312       1.1  mrg 		    break;
   5313       1.1  mrg 		  }
   5314       1.1  mrg 
   5315       1.1  mrg 		current_device.name = current_device.get_name_func ();
   5316       1.1  mrg 		/* current_device.capabilities has already been set.  */
   5317       1.1  mrg 		current_device.type = current_device.get_type_func ();
   5318       1.1  mrg 		current_device.mem_map.root = NULL;
   5319  1.1.1.11  mrg 		current_device.mem_map_rev.root = NULL;
   5320   1.1.1.2  mrg 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
   5321  1.1.1.10  mrg 		for (i = 0; i < new_num_devs; i++)
   5322       1.1  mrg 		  {
   5323       1.1  mrg 		    current_device.target_id = i;
   5324  1.1.1.10  mrg 		    devs[num_devs] = current_device;
   5325  1.1.1.10  mrg 		    gomp_mutex_init (&devs[num_devs].lock);
   5326  1.1.1.10  mrg 		    num_devs++;
   5327       1.1  mrg 		  }
   5328       1.1  mrg 	      }
   5329       1.1  mrg 	  }
   5330       1.1  mrg 
   5331       1.1  mrg 	free (plugin_name);
   5332       1.1  mrg 	cur = next + 1;
   5333       1.1  mrg       }
   5334       1.1  mrg     while (next);
   5335       1.1  mrg 
   5336       1.1  mrg   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
   5337       1.1  mrg      NUM_DEVICES_OPENMP.  */
   5338  1.1.1.10  mrg   struct gomp_device_descr *devs_s
   5339  1.1.1.10  mrg     = malloc (num_devs * sizeof (struct gomp_device_descr));
   5340  1.1.1.10  mrg   if (!devs_s)
   5341  1.1.1.10  mrg     {
   5342  1.1.1.10  mrg       num_devs = 0;
   5343  1.1.1.10  mrg       free (devs);
   5344  1.1.1.10  mrg       devs = NULL;
   5345  1.1.1.10  mrg     }
   5346  1.1.1.10  mrg   num_devs_openmp = 0;
   5347  1.1.1.10  mrg   for (i = 0; i < num_devs; i++)
   5348  1.1.1.10  mrg     if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
   5349  1.1.1.10  mrg       devs_s[num_devs_openmp++] = devs[i];
   5350  1.1.1.10  mrg   int num_devs_after_openmp = num_devs_openmp;
   5351  1.1.1.10  mrg   for (i = 0; i < num_devs; i++)
   5352  1.1.1.10  mrg     if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
   5353  1.1.1.10  mrg       devs_s[num_devs_after_openmp++] = devs[i];
   5354  1.1.1.10  mrg   free (devs);
   5355  1.1.1.10  mrg   devs = devs_s;
   5356       1.1  mrg 
   5357  1.1.1.10  mrg   for (i = 0; i < num_devs; i++)
   5358       1.1  mrg     {
   5359       1.1  mrg       /* The 'devices' array can be moved (by the realloc call) until we have
   5360       1.1  mrg 	 found all the plugins, so registering with the OpenACC runtime (which
   5361       1.1  mrg 	 takes a copy of the pointer argument) must be delayed until now.  */
   5362  1.1.1.10  mrg       if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
   5363  1.1.1.10  mrg 	goacc_register (&devs[i]);
   5364       1.1  mrg     }
   5365  1.1.1.11  mrg   if (gomp_global_icv.default_device_var == INT_MIN)
   5366  1.1.1.11  mrg     {
   5367  1.1.1.11  mrg        /* This implies OMP_TARGET_OFFLOAD=mandatory.  */
   5368  1.1.1.11  mrg        struct gomp_icv_list *none;
   5369  1.1.1.11  mrg        none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
   5370  1.1.1.11  mrg        gomp_global_icv.default_device_var = (num_devs_openmp
   5371  1.1.1.11  mrg 					     ? 0 : omp_invalid_device);
   5372  1.1.1.11  mrg        none->icvs.default_device_var = gomp_global_icv.default_device_var;
   5373  1.1.1.11  mrg     }
   5374   1.1.1.2  mrg 
   5375  1.1.1.10  mrg   num_devices = num_devs;
   5376  1.1.1.10  mrg   num_devices_openmp = num_devs_openmp;
   5377  1.1.1.10  mrg   devices = devs;
   5378   1.1.1.2  mrg   if (atexit (gomp_target_fini) != 0)
   5379   1.1.1.2  mrg     gomp_fatal ("atexit failed");
   5380       1.1  mrg }
   5381       1.1  mrg 
   5382       1.1  mrg #else /* PLUGIN_SUPPORT */
   5383       1.1  mrg /* If dlfcn.h is unavailable we always fallback to host execution.
   5384       1.1  mrg    GOMP_target* routines are just stubs for this case.  */
   5385       1.1  mrg static void
   5386       1.1  mrg gomp_target_init (void)
   5387       1.1  mrg {
   5388       1.1  mrg }
   5389       1.1  mrg #endif /* PLUGIN_SUPPORT */
   5390