Home | History | Annotate | Line # | Download | only in libgomp
oacc-parallel.c revision 1.5
      1  1.5  mrg /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
      2  1.1  mrg 
      3  1.1  mrg    Contributed by Mentor Embedded.
      4  1.1  mrg 
      5  1.1  mrg    This file is part of the GNU Offloading and Multi Processing Library
      6  1.1  mrg    (libgomp).
      7  1.1  mrg 
      8  1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      9  1.1  mrg    under the terms of the GNU General Public License as published by
     10  1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     11  1.1  mrg    any later version.
     12  1.1  mrg 
     13  1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     14  1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     15  1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     16  1.1  mrg    more details.
     17  1.1  mrg 
     18  1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     19  1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     20  1.1  mrg    3.1, as published by the Free Software Foundation.
     21  1.1  mrg 
     22  1.1  mrg    You should have received a copy of the GNU General Public License and
     23  1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     24  1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     25  1.1  mrg    <http://www.gnu.org/licenses/>.  */
     26  1.1  mrg 
     27  1.1  mrg /* This file handles OpenACC constructs.  */
     28  1.1  mrg 
     29  1.1  mrg #include "openacc.h"
     30  1.1  mrg #include "libgomp.h"
     31  1.1  mrg #include "libgomp_g.h"
     32  1.1  mrg #include "gomp-constants.h"
     33  1.1  mrg #include "oacc-int.h"
     34  1.1  mrg #ifdef HAVE_INTTYPES_H
     35  1.1  mrg # include <inttypes.h>  /* For PRIu64.  */
     36  1.1  mrg #endif
     37  1.1  mrg #include <string.h>
     38  1.1  mrg #include <stdarg.h>
     39  1.1  mrg #include <assert.h>
     40  1.1  mrg 
     41  1.5  mrg 
     42  1.5  mrg /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
     43  1.5  mrg    continue to support the following two legacy values.  */
     44  1.5  mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
     45  1.5  mrg 		"legacy GOMP_DEVICE_ICV broken");
     46  1.5  mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
     47  1.5  mrg 		== GOACC_FLAG_HOST_FALLBACK,
     48  1.5  mrg 		"legacy GOMP_DEVICE_HOST_FALLBACK broken");
     49  1.5  mrg 
     50  1.5  mrg 
     51  1.5  mrg /* Returns the number of mappings associated with the pointer or pset. PSET
     52  1.5  mrg    have three mappings, whereas pointer have two.  */
     53  1.5  mrg 
     54  1.1  mrg static int
     55  1.5  mrg find_pointer (int pos, size_t mapnum, unsigned short *kinds)
     56  1.1  mrg {
     57  1.1  mrg   if (pos + 1 >= mapnum)
     58  1.1  mrg     return 0;
     59  1.1  mrg 
     60  1.1  mrg   unsigned char kind = kinds[pos+1] & 0xff;
     61  1.1  mrg 
     62  1.5  mrg   if (kind == GOMP_MAP_TO_PSET)
     63  1.5  mrg     return 3;
     64  1.5  mrg   else if (kind == GOMP_MAP_POINTER)
     65  1.5  mrg     return 2;
     66  1.5  mrg 
     67  1.5  mrg   return 0;
     68  1.5  mrg }
     69  1.5  mrg 
     70  1.5  mrg /* Handle the mapping pair that are presented when a
     71  1.5  mrg    deviceptr clause is used with Fortran.  */
     72  1.5  mrg 
     73  1.5  mrg static void
     74  1.5  mrg handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
     75  1.5  mrg 		     unsigned short *kinds)
     76  1.5  mrg {
     77  1.5  mrg   int i;
     78  1.5  mrg 
     79  1.5  mrg   for (i = 0; i < mapnum; i++)
     80  1.5  mrg     {
     81  1.5  mrg       unsigned short kind1 = kinds[i] & 0xff;
     82  1.5  mrg 
     83  1.5  mrg       /* Handle Fortran deviceptr clause.  */
     84  1.5  mrg       if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
     85  1.5  mrg 	{
     86  1.5  mrg 	  unsigned short kind2;
     87  1.5  mrg 
     88  1.5  mrg 	  if (i < (signed)mapnum - 1)
     89  1.5  mrg 	    kind2 = kinds[i + 1] & 0xff;
     90  1.5  mrg 	  else
     91  1.5  mrg 	    kind2 = 0xffff;
     92  1.5  mrg 
     93  1.5  mrg 	  if (sizes[i] == sizeof (void *))
     94  1.5  mrg 	    continue;
     95  1.5  mrg 
     96  1.5  mrg 	  /* At this point, we're dealing with a Fortran deviceptr.
     97  1.5  mrg 	     If the next element is not what we're expecting, then
     98  1.5  mrg 	     this is an instance of where the deviceptr variable was
     99  1.5  mrg 	     not used within the region and the pointer was removed
    100  1.5  mrg 	     by the gimplifier.  */
    101  1.5  mrg 	  if (kind2 == GOMP_MAP_POINTER
    102  1.5  mrg 	      && sizes[i + 1] == 0
    103  1.5  mrg 	      && hostaddrs[i] == *(void **)hostaddrs[i + 1])
    104  1.5  mrg 	    {
    105  1.5  mrg 	      kinds[i+1] = kinds[i];
    106  1.5  mrg 	      sizes[i+1] = sizeof (void *);
    107  1.5  mrg 	    }
    108  1.5  mrg 
    109  1.5  mrg 	  /* Invalidate the entry.  */
    110  1.5  mrg 	  hostaddrs[i] = NULL;
    111  1.5  mrg 	}
    112  1.5  mrg     }
    113  1.1  mrg }
    114  1.1  mrg 
    115  1.3  mrg static void goacc_wait (int async, int num_waits, va_list *ap);
    116  1.3  mrg 
    117  1.3  mrg 
    118  1.5  mrg /* Launch a possibly offloaded function with FLAGS.  FN is the host fn
    119  1.3  mrg    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
    120  1.3  mrg    blocks to be copied to/from the device.  Varadic arguments are
    121  1.3  mrg    keyed optional parameters terminated with a zero.  */
    122  1.1  mrg 
    123  1.1  mrg void
    124  1.5  mrg GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
    125  1.3  mrg 		      size_t mapnum, void **hostaddrs, size_t *sizes,
    126  1.3  mrg 		      unsigned short *kinds, ...)
    127  1.1  mrg {
    128  1.5  mrg   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
    129  1.5  mrg 
    130  1.1  mrg   va_list ap;
    131  1.1  mrg   struct goacc_thread *thr;
    132  1.1  mrg   struct gomp_device_descr *acc_dev;
    133  1.1  mrg   struct target_mem_desc *tgt;
    134  1.1  mrg   void **devaddrs;
    135  1.1  mrg   unsigned int i;
    136  1.1  mrg   struct splay_tree_key_s k;
    137  1.1  mrg   splay_tree_key tgt_fn_key;
    138  1.1  mrg   void (*tgt_fn);
    139  1.3  mrg   int async = GOMP_ASYNC_SYNC;
    140  1.3  mrg   unsigned dims[GOMP_DIM_MAX];
    141  1.3  mrg   unsigned tag;
    142  1.1  mrg 
    143  1.1  mrg #ifdef HAVE_INTTYPES_H
    144  1.3  mrg   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
    145  1.3  mrg 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
    146  1.1  mrg #else
    147  1.3  mrg   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
    148  1.3  mrg 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
    149  1.1  mrg #endif
    150  1.1  mrg   goacc_lazy_initialize ();
    151  1.1  mrg 
    152  1.1  mrg   thr = goacc_thread ();
    153  1.1  mrg   acc_dev = thr->dev;
    154  1.1  mrg 
    155  1.5  mrg   handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
    156  1.5  mrg 
    157  1.1  mrg   /* Host fallback if "if" clause is false or if the current device is set to
    158  1.1  mrg      the host.  */
    159  1.5  mrg   if (flags & GOACC_FLAG_HOST_FALLBACK)
    160  1.1  mrg     {
    161  1.1  mrg       goacc_save_and_set_bind (acc_device_host);
    162  1.1  mrg       fn (hostaddrs);
    163  1.1  mrg       goacc_restore_bind ();
    164  1.1  mrg       return;
    165  1.1  mrg     }
    166  1.1  mrg   else if (acc_device_type (acc_dev->type) == acc_device_host)
    167  1.1  mrg     {
    168  1.1  mrg       fn (hostaddrs);
    169  1.1  mrg       return;
    170  1.1  mrg     }
    171  1.1  mrg 
    172  1.3  mrg   /* Default: let the runtime choose.  */
    173  1.3  mrg   for (i = 0; i != GOMP_DIM_MAX; i++)
    174  1.3  mrg     dims[i] = 0;
    175  1.3  mrg 
    176  1.3  mrg   va_start (ap, kinds);
    177  1.3  mrg   /* TODO: This will need amending when device_type is implemented.  */
    178  1.3  mrg   while ((tag = va_arg (ap, unsigned)) != 0)
    179  1.3  mrg     {
    180  1.3  mrg       if (GOMP_LAUNCH_DEVICE (tag))
    181  1.3  mrg 	gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
    182  1.3  mrg 		    GOMP_LAUNCH_DEVICE (tag));
    183  1.3  mrg 
    184  1.3  mrg       switch (GOMP_LAUNCH_CODE (tag))
    185  1.3  mrg 	{
    186  1.3  mrg 	case GOMP_LAUNCH_DIM:
    187  1.3  mrg 	  {
    188  1.3  mrg 	    unsigned mask = GOMP_LAUNCH_OP (tag);
    189  1.3  mrg 
    190  1.3  mrg 	    for (i = 0; i != GOMP_DIM_MAX; i++)
    191  1.3  mrg 	      if (mask & GOMP_DIM_MASK (i))
    192  1.3  mrg 		dims[i] = va_arg (ap, unsigned);
    193  1.3  mrg 	  }
    194  1.3  mrg 	  break;
    195  1.3  mrg 
    196  1.3  mrg 	case GOMP_LAUNCH_ASYNC:
    197  1.3  mrg 	  {
    198  1.3  mrg 	    /* Small constant values are encoded in the operand.  */
    199  1.3  mrg 	    async = GOMP_LAUNCH_OP (tag);
    200  1.3  mrg 
    201  1.3  mrg 	    if (async == GOMP_LAUNCH_OP_MAX)
    202  1.3  mrg 	      async = va_arg (ap, unsigned);
    203  1.3  mrg 	    break;
    204  1.3  mrg 	  }
    205  1.3  mrg 
    206  1.3  mrg 	case GOMP_LAUNCH_WAIT:
    207  1.3  mrg 	  {
    208  1.3  mrg 	    unsigned num_waits = GOMP_LAUNCH_OP (tag);
    209  1.5  mrg 	    goacc_wait (async, num_waits, &ap);
    210  1.3  mrg 	    break;
    211  1.3  mrg 	  }
    212  1.1  mrg 
    213  1.3  mrg 	default:
    214  1.3  mrg 	  gomp_fatal ("unrecognized offload code '%d',"
    215  1.3  mrg 		      " libgomp is too old", GOMP_LAUNCH_CODE (tag));
    216  1.3  mrg 	}
    217  1.3  mrg     }
    218  1.1  mrg   va_end (ap);
    219  1.3  mrg 
    220  1.1  mrg   acc_dev->openacc.async_set_async_func (async);
    221  1.1  mrg 
    222  1.1  mrg   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
    223  1.1  mrg     {
    224  1.1  mrg       k.host_start = (uintptr_t) fn;
    225  1.1  mrg       k.host_end = k.host_start + 1;
    226  1.1  mrg       gomp_mutex_lock (&acc_dev->lock);
    227  1.1  mrg       tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
    228  1.1  mrg       gomp_mutex_unlock (&acc_dev->lock);
    229  1.1  mrg 
    230  1.1  mrg       if (tgt_fn_key == NULL)
    231  1.1  mrg 	gomp_fatal ("target function wasn't mapped");
    232  1.1  mrg 
    233  1.1  mrg       tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
    234  1.1  mrg     }
    235  1.1  mrg   else
    236  1.1  mrg     tgt_fn = (void (*)) fn;
    237  1.1  mrg 
    238  1.1  mrg   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
    239  1.3  mrg 		       GOMP_MAP_VARS_OPENACC);
    240  1.1  mrg 
    241  1.1  mrg   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
    242  1.1  mrg   for (i = 0; i < mapnum; i++)
    243  1.3  mrg     devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
    244  1.5  mrg 			    + tgt->list[i].key->tgt_offset
    245  1.5  mrg 			    + tgt->list[i].offset);
    246  1.1  mrg 
    247  1.3  mrg   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
    248  1.3  mrg 			      async, dims, tgt);
    249  1.1  mrg 
    250  1.1  mrg   /* If running synchronously, unmap immediately.  */
    251  1.5  mrg   bool copyfrom = true;
    252  1.5  mrg   if (async_synchronous_p (async))
    253  1.1  mrg     gomp_unmap_vars (tgt, true);
    254  1.1  mrg   else
    255  1.5  mrg     {
    256  1.5  mrg       bool async_unmap = false;
    257  1.5  mrg       for (size_t i = 0; i < tgt->list_count; i++)
    258  1.5  mrg 	{
    259  1.5  mrg 	  splay_tree_key k = tgt->list[i].key;
    260  1.5  mrg 	  if (k && k->refcount == 1)
    261  1.5  mrg 	    {
    262  1.5  mrg 	      async_unmap = true;
    263  1.5  mrg 	      break;
    264  1.5  mrg 	    }
    265  1.5  mrg 	}
    266  1.5  mrg       if (async_unmap)
    267  1.5  mrg 	tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
    268  1.5  mrg       else
    269  1.5  mrg 	{
    270  1.5  mrg 	  copyfrom = false;
    271  1.5  mrg 	  gomp_unmap_vars (tgt, copyfrom);
    272  1.5  mrg 	}
    273  1.5  mrg     }
    274  1.1  mrg 
    275  1.1  mrg   acc_dev->openacc.async_set_async_func (acc_async_sync);
    276  1.1  mrg }
    277  1.1  mrg 
    278  1.3  mrg /* Legacy entry point, only provide host execution.  */
    279  1.3  mrg 
    280  1.3  mrg void
    281  1.5  mrg GOACC_parallel (int flags_m, void (*fn) (void *),
    282  1.3  mrg 		size_t mapnum, void **hostaddrs, size_t *sizes,
    283  1.3  mrg 		unsigned short *kinds,
    284  1.3  mrg 		int num_gangs, int num_workers, int vector_length,
    285  1.3  mrg 		int async, int num_waits, ...)
    286  1.3  mrg {
    287  1.3  mrg   goacc_save_and_set_bind (acc_device_host);
    288  1.3  mrg   fn (hostaddrs);
    289  1.3  mrg   goacc_restore_bind ();
    290  1.3  mrg }
    291  1.3  mrg 
    292  1.1  mrg void
    293  1.5  mrg GOACC_data_start (int flags_m, size_t mapnum,
    294  1.1  mrg 		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
    295  1.1  mrg {
    296  1.5  mrg   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
    297  1.5  mrg 
    298  1.1  mrg   struct target_mem_desc *tgt;
    299  1.1  mrg 
    300  1.1  mrg #ifdef HAVE_INTTYPES_H
    301  1.1  mrg   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
    302  1.1  mrg 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
    303  1.1  mrg #else
    304  1.1  mrg   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
    305  1.1  mrg 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
    306  1.1  mrg #endif
    307  1.1  mrg 
    308  1.1  mrg   goacc_lazy_initialize ();
    309  1.1  mrg 
    310  1.1  mrg   struct goacc_thread *thr = goacc_thread ();
    311  1.1  mrg   struct gomp_device_descr *acc_dev = thr->dev;
    312  1.1  mrg 
    313  1.1  mrg   /* Host fallback or 'do nothing'.  */
    314  1.1  mrg   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    315  1.5  mrg       || (flags & GOACC_FLAG_HOST_FALLBACK))
    316  1.1  mrg     {
    317  1.3  mrg       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
    318  1.3  mrg 			   GOMP_MAP_VARS_OPENACC);
    319  1.1  mrg       tgt->prev = thr->mapped_data;
    320  1.1  mrg       thr->mapped_data = tgt;
    321  1.1  mrg 
    322  1.1  mrg       return;
    323  1.1  mrg     }
    324  1.1  mrg 
    325  1.1  mrg   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
    326  1.1  mrg   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
    327  1.3  mrg 		       GOMP_MAP_VARS_OPENACC);
    328  1.1  mrg   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
    329  1.1  mrg   tgt->prev = thr->mapped_data;
    330  1.1  mrg   thr->mapped_data = tgt;
    331  1.1  mrg }
    332  1.1  mrg 
    333  1.1  mrg void
    334  1.1  mrg GOACC_data_end (void)
    335  1.1  mrg {
    336  1.1  mrg   struct goacc_thread *thr = goacc_thread ();
    337  1.1  mrg   struct target_mem_desc *tgt = thr->mapped_data;
    338  1.1  mrg 
    339  1.1  mrg   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
    340  1.1  mrg   thr->mapped_data = tgt->prev;
    341  1.1  mrg   gomp_unmap_vars (tgt, true);
    342  1.1  mrg   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
    343  1.1  mrg }
    344  1.1  mrg 
    345  1.1  mrg void
    346  1.5  mrg GOACC_enter_exit_data (int flags_m, size_t mapnum,
    347  1.1  mrg 		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
    348  1.1  mrg 		       int async, int num_waits, ...)
    349  1.1  mrg {
    350  1.5  mrg   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
    351  1.5  mrg 
    352  1.1  mrg   struct goacc_thread *thr;
    353  1.1  mrg   struct gomp_device_descr *acc_dev;
    354  1.1  mrg   bool data_enter = false;
    355  1.1  mrg   size_t i;
    356  1.1  mrg 
    357  1.1  mrg   goacc_lazy_initialize ();
    358  1.1  mrg 
    359  1.1  mrg   thr = goacc_thread ();
    360  1.1  mrg   acc_dev = thr->dev;
    361  1.1  mrg 
    362  1.1  mrg   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    363  1.5  mrg       || (flags & GOACC_FLAG_HOST_FALLBACK))
    364  1.1  mrg     return;
    365  1.1  mrg 
    366  1.3  mrg   if (num_waits)
    367  1.1  mrg     {
    368  1.1  mrg       va_list ap;
    369  1.1  mrg 
    370  1.1  mrg       va_start (ap, num_waits);
    371  1.3  mrg       goacc_wait (async, num_waits, &ap);
    372  1.1  mrg       va_end (ap);
    373  1.1  mrg     }
    374  1.1  mrg 
    375  1.5  mrg   /* Determine whether "finalize" semantics apply to all mappings of this
    376  1.5  mrg      OpenACC directive.  */
    377  1.5  mrg   bool finalize = false;
    378  1.5  mrg   if (mapnum > 0)
    379  1.5  mrg     {
    380  1.5  mrg       unsigned char kind = kinds[0] & 0xff;
    381  1.5  mrg       if (kind == GOMP_MAP_DELETE
    382  1.5  mrg 	  || kind == GOMP_MAP_FORCE_FROM)
    383  1.5  mrg 	finalize = true;
    384  1.5  mrg     }
    385  1.5  mrg 
    386  1.1  mrg   acc_dev->openacc.async_set_async_func (async);
    387  1.1  mrg 
    388  1.1  mrg   /* Determine if this is an "acc enter data".  */
    389  1.1  mrg   for (i = 0; i < mapnum; ++i)
    390  1.1  mrg     {
    391  1.1  mrg       unsigned char kind = kinds[i] & 0xff;
    392  1.1  mrg 
    393  1.1  mrg       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
    394  1.1  mrg 	continue;
    395  1.1  mrg 
    396  1.1  mrg       if (kind == GOMP_MAP_FORCE_ALLOC
    397  1.1  mrg 	  || kind == GOMP_MAP_FORCE_PRESENT
    398  1.5  mrg 	  || kind == GOMP_MAP_FORCE_TO
    399  1.5  mrg 	  || kind == GOMP_MAP_TO
    400  1.5  mrg 	  || kind == GOMP_MAP_ALLOC)
    401  1.1  mrg 	{
    402  1.1  mrg 	  data_enter = true;
    403  1.1  mrg 	  break;
    404  1.1  mrg 	}
    405  1.1  mrg 
    406  1.5  mrg       if (kind == GOMP_MAP_RELEASE
    407  1.5  mrg 	  || kind == GOMP_MAP_DELETE
    408  1.5  mrg 	  || kind == GOMP_MAP_FROM
    409  1.1  mrg 	  || kind == GOMP_MAP_FORCE_FROM)
    410  1.1  mrg 	break;
    411  1.1  mrg 
    412  1.1  mrg       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
    413  1.1  mrg 		      kind);
    414  1.1  mrg     }
    415  1.1  mrg 
    416  1.5  mrg   /* In c, non-pointers and arrays are represented by a single data clause.
    417  1.5  mrg      Dynamically allocated arrays and subarrays are represented by a data
    418  1.5  mrg      clause followed by an internal GOMP_MAP_POINTER.
    419  1.5  mrg 
    420  1.5  mrg      In fortran, scalars and not allocated arrays are represented by a
    421  1.5  mrg      single data clause. Allocated arrays and subarrays have three mappings:
    422  1.5  mrg      1) the original data clause, 2) a PSET 3) a pointer to the array data.
    423  1.5  mrg   */
    424  1.5  mrg 
    425  1.1  mrg   if (data_enter)
    426  1.1  mrg     {
    427  1.1  mrg       for (i = 0; i < mapnum; i++)
    428  1.1  mrg 	{
    429  1.1  mrg 	  unsigned char kind = kinds[i] & 0xff;
    430  1.1  mrg 
    431  1.5  mrg 	  /* Scan for pointers and PSETs.  */
    432  1.5  mrg 	  int pointer = find_pointer (i, mapnum, kinds);
    433  1.1  mrg 
    434  1.5  mrg 	  if (!pointer)
    435  1.1  mrg 	    {
    436  1.1  mrg 	      switch (kind)
    437  1.1  mrg 		{
    438  1.5  mrg 		case GOMP_MAP_ALLOC:
    439  1.1  mrg 		case GOMP_MAP_FORCE_ALLOC:
    440  1.1  mrg 		  acc_create (hostaddrs[i], sizes[i]);
    441  1.1  mrg 		  break;
    442  1.5  mrg 		case GOMP_MAP_TO:
    443  1.1  mrg 		case GOMP_MAP_FORCE_TO:
    444  1.5  mrg 		  acc_copyin (hostaddrs[i], sizes[i]);
    445  1.1  mrg 		  break;
    446  1.1  mrg 		default:
    447  1.1  mrg 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
    448  1.1  mrg 			      kind);
    449  1.1  mrg 		  break;
    450  1.1  mrg 		}
    451  1.1  mrg 	    }
    452  1.1  mrg 	  else
    453  1.1  mrg 	    {
    454  1.5  mrg 	      gomp_acc_insert_pointer (pointer, &hostaddrs[i],
    455  1.5  mrg 				       &sizes[i], &kinds[i]);
    456  1.1  mrg 	      /* Increment 'i' by two because OpenACC requires fortran
    457  1.1  mrg 		 arrays to be contiguous, so each PSET is associated with
    458  1.1  mrg 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
    459  1.1  mrg 		 one MAP_POINTER.  */
    460  1.5  mrg 	      i += pointer - 1;
    461  1.1  mrg 	    }
    462  1.1  mrg 	}
    463  1.1  mrg     }
    464  1.1  mrg   else
    465  1.1  mrg     for (i = 0; i < mapnum; ++i)
    466  1.1  mrg       {
    467  1.1  mrg 	unsigned char kind = kinds[i] & 0xff;
    468  1.1  mrg 
    469  1.5  mrg 	int pointer = find_pointer (i, mapnum, kinds);
    470  1.1  mrg 
    471  1.5  mrg 	if (!pointer)
    472  1.1  mrg 	  {
    473  1.1  mrg 	    switch (kind)
    474  1.1  mrg 	      {
    475  1.5  mrg 	      case GOMP_MAP_RELEASE:
    476  1.3  mrg 	      case GOMP_MAP_DELETE:
    477  1.5  mrg 		if (acc_is_present (hostaddrs[i], sizes[i]))
    478  1.5  mrg 		  {
    479  1.5  mrg 		    if (finalize)
    480  1.5  mrg 		      acc_delete_finalize (hostaddrs[i], sizes[i]);
    481  1.5  mrg 		    else
    482  1.5  mrg 		      acc_delete (hostaddrs[i], sizes[i]);
    483  1.5  mrg 		  }
    484  1.1  mrg 		break;
    485  1.5  mrg 	      case GOMP_MAP_FROM:
    486  1.1  mrg 	      case GOMP_MAP_FORCE_FROM:
    487  1.5  mrg 		if (finalize)
    488  1.5  mrg 		  acc_copyout_finalize (hostaddrs[i], sizes[i]);
    489  1.5  mrg 		else
    490  1.5  mrg 		  acc_copyout (hostaddrs[i], sizes[i]);
    491  1.1  mrg 		break;
    492  1.1  mrg 	      default:
    493  1.1  mrg 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
    494  1.1  mrg 			    kind);
    495  1.1  mrg 		break;
    496  1.1  mrg 	      }
    497  1.1  mrg 	  }
    498  1.1  mrg 	else
    499  1.1  mrg 	  {
    500  1.5  mrg 	    bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
    501  1.5  mrg 			     || kind == GOMP_MAP_FROM);
    502  1.5  mrg 	    gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
    503  1.5  mrg 				     finalize, pointer);
    504  1.1  mrg 	    /* See the above comment.  */
    505  1.5  mrg 	    i += pointer - 1;
    506  1.1  mrg 	  }
    507  1.1  mrg       }
    508  1.1  mrg 
    509  1.1  mrg   acc_dev->openacc.async_set_async_func (acc_async_sync);
    510  1.1  mrg }
    511  1.1  mrg 
    512  1.1  mrg static void
    513  1.3  mrg goacc_wait (int async, int num_waits, va_list *ap)
    514  1.1  mrg {
    515  1.3  mrg   while (num_waits--)
    516  1.1  mrg     {
    517  1.3  mrg       int qid = va_arg (*ap, int);
    518  1.5  mrg 
    519  1.5  mrg       /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'.  */
    520  1.5  mrg       if (qid == acc_async_noval)
    521  1.5  mrg 	{
    522  1.5  mrg 	  if (async == acc_async_sync)
    523  1.5  mrg 	    acc_wait_all ();
    524  1.5  mrg 	  else
    525  1.5  mrg 	    acc_wait_all_async (async);
    526  1.5  mrg 	  break;
    527  1.5  mrg 	}
    528  1.5  mrg 
    529  1.1  mrg       if (acc_async_test (qid))
    530  1.1  mrg 	continue;
    531  1.1  mrg 
    532  1.3  mrg       if (async == acc_async_sync)
    533  1.3  mrg 	acc_wait (qid);
    534  1.3  mrg       else if (qid == async)
    535  1.3  mrg 	;/* If we're waiting on the same asynchronous queue as we're
    536  1.3  mrg 	    launching on, the queue itself will order work as
    537  1.3  mrg 	    required, so there's no need to wait explicitly.  */
    538  1.3  mrg       else
    539  1.5  mrg 	acc_wait_async (qid, async);
    540  1.1  mrg     }
    541  1.1  mrg }
    542  1.1  mrg 
    543  1.1  mrg void
    544  1.5  mrg GOACC_update (int flags_m, size_t mapnum,
    545  1.1  mrg 	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
    546  1.1  mrg 	      int async, int num_waits, ...)
    547  1.1  mrg {
    548  1.5  mrg   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
    549  1.5  mrg 
    550  1.1  mrg   size_t i;
    551  1.1  mrg 
    552  1.1  mrg   goacc_lazy_initialize ();
    553  1.1  mrg 
    554  1.1  mrg   struct goacc_thread *thr = goacc_thread ();
    555  1.1  mrg   struct gomp_device_descr *acc_dev = thr->dev;
    556  1.1  mrg 
    557  1.1  mrg   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    558  1.5  mrg       || (flags & GOACC_FLAG_HOST_FALLBACK))
    559  1.1  mrg     return;
    560  1.1  mrg 
    561  1.3  mrg   if (num_waits)
    562  1.1  mrg     {
    563  1.1  mrg       va_list ap;
    564  1.1  mrg 
    565  1.1  mrg       va_start (ap, num_waits);
    566  1.3  mrg       goacc_wait (async, num_waits, &ap);
    567  1.1  mrg       va_end (ap);
    568  1.1  mrg     }
    569  1.1  mrg 
    570  1.1  mrg   acc_dev->openacc.async_set_async_func (async);
    571  1.1  mrg 
    572  1.5  mrg   bool update_device = false;
    573  1.1  mrg   for (i = 0; i < mapnum; ++i)
    574  1.1  mrg     {
    575  1.1  mrg       unsigned char kind = kinds[i] & 0xff;
    576  1.1  mrg 
    577  1.1  mrg       switch (kind)
    578  1.1  mrg 	{
    579  1.1  mrg 	case GOMP_MAP_POINTER:
    580  1.1  mrg 	case GOMP_MAP_TO_PSET:
    581  1.1  mrg 	  break;
    582  1.1  mrg 
    583  1.5  mrg 	case GOMP_MAP_ALWAYS_POINTER:
    584  1.5  mrg 	  if (update_device)
    585  1.5  mrg 	    {
    586  1.5  mrg 	      /* Save the contents of the host pointer.  */
    587  1.5  mrg 	      void *dptr = acc_deviceptr (hostaddrs[i-1]);
    588  1.5  mrg 	      uintptr_t t = *(uintptr_t *) hostaddrs[i];
    589  1.5  mrg 
    590  1.5  mrg 	      /* Update the contents of the host pointer to reflect
    591  1.5  mrg 		 the value of the allocated device memory in the
    592  1.5  mrg 		 previous pointer.  */
    593  1.5  mrg 	      *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
    594  1.5  mrg 	      acc_update_device (hostaddrs[i], sizeof (uintptr_t));
    595  1.5  mrg 
    596  1.5  mrg 	      /* Restore the host pointer.  */
    597  1.5  mrg 	      *(uintptr_t *) hostaddrs[i] = t;
    598  1.5  mrg 	      update_device = false;
    599  1.5  mrg 	    }
    600  1.5  mrg 	  break;
    601  1.5  mrg 
    602  1.5  mrg 	case GOMP_MAP_TO:
    603  1.5  mrg 	  if (!acc_is_present (hostaddrs[i], sizes[i]))
    604  1.5  mrg 	    {
    605  1.5  mrg 	      update_device = false;
    606  1.5  mrg 	      break;
    607  1.5  mrg 	    }
    608  1.5  mrg 	  /* Fallthru  */
    609  1.1  mrg 	case GOMP_MAP_FORCE_TO:
    610  1.5  mrg 	  update_device = true;
    611  1.1  mrg 	  acc_update_device (hostaddrs[i], sizes[i]);
    612  1.1  mrg 	  break;
    613  1.1  mrg 
    614  1.5  mrg 	case GOMP_MAP_FROM:
    615  1.5  mrg 	  if (!acc_is_present (hostaddrs[i], sizes[i]))
    616  1.5  mrg 	    {
    617  1.5  mrg 	      update_device = false;
    618  1.5  mrg 	      break;
    619  1.5  mrg 	    }
    620  1.5  mrg 	  /* Fallthru  */
    621  1.1  mrg 	case GOMP_MAP_FORCE_FROM:
    622  1.5  mrg 	  update_device = false;
    623  1.1  mrg 	  acc_update_self (hostaddrs[i], sizes[i]);
    624  1.1  mrg 	  break;
    625  1.1  mrg 
    626  1.1  mrg 	default:
    627  1.1  mrg 	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
    628  1.1  mrg 	  break;
    629  1.1  mrg 	}
    630  1.1  mrg     }
    631  1.1  mrg 
    632  1.1  mrg   acc_dev->openacc.async_set_async_func (acc_async_sync);
    633  1.1  mrg }
    634  1.1  mrg 
    635  1.1  mrg void
    636  1.1  mrg GOACC_wait (int async, int num_waits, ...)
    637  1.1  mrg {
    638  1.3  mrg   if (num_waits)
    639  1.3  mrg     {
    640  1.3  mrg       va_list ap;
    641  1.1  mrg 
    642  1.3  mrg       va_start (ap, num_waits);
    643  1.3  mrg       goacc_wait (async, num_waits, &ap);
    644  1.3  mrg       va_end (ap);
    645  1.3  mrg     }
    646  1.3  mrg   else if (async == acc_async_sync)
    647  1.3  mrg     acc_wait_all ();
    648  1.5  mrg   else
    649  1.5  mrg     acc_wait_all_async (async);
    650  1.1  mrg }
    651  1.1  mrg 
    652  1.1  mrg int
    653  1.1  mrg GOACC_get_num_threads (void)
    654  1.1  mrg {
    655  1.1  mrg   return 1;
    656  1.1  mrg }
    657  1.1  mrg 
    658  1.1  mrg int
    659  1.1  mrg GOACC_get_thread_num (void)
    660  1.1  mrg {
    661  1.1  mrg   return 0;
    662  1.1  mrg }
    663  1.3  mrg 
    664  1.3  mrg void
    665  1.5  mrg GOACC_declare (int flags_m, size_t mapnum,
    666  1.3  mrg 	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
    667  1.3  mrg {
    668  1.3  mrg   int i;
    669  1.3  mrg 
    670  1.3  mrg   for (i = 0; i < mapnum; i++)
    671  1.3  mrg     {
    672  1.3  mrg       unsigned char kind = kinds[i] & 0xff;
    673  1.3  mrg 
    674  1.3  mrg       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
    675  1.3  mrg 	continue;
    676  1.3  mrg 
    677  1.3  mrg       switch (kind)
    678  1.3  mrg 	{
    679  1.3  mrg 	  case GOMP_MAP_FORCE_ALLOC:
    680  1.3  mrg 	  case GOMP_MAP_FORCE_FROM:
    681  1.3  mrg 	  case GOMP_MAP_FORCE_TO:
    682  1.3  mrg 	  case GOMP_MAP_POINTER:
    683  1.5  mrg 	  case GOMP_MAP_RELEASE:
    684  1.3  mrg 	  case GOMP_MAP_DELETE:
    685  1.5  mrg 	    GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
    686  1.5  mrg 				   &kinds[i], GOMP_ASYNC_SYNC, 0);
    687  1.3  mrg 	    break;
    688  1.3  mrg 
    689  1.3  mrg 	  case GOMP_MAP_FORCE_DEVICEPTR:
    690  1.3  mrg 	    break;
    691  1.3  mrg 
    692  1.3  mrg 	  case GOMP_MAP_ALLOC:
    693  1.3  mrg 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
    694  1.5  mrg 	      GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
    695  1.5  mrg 				     &kinds[i], GOMP_ASYNC_SYNC, 0);
    696  1.3  mrg 	    break;
    697  1.3  mrg 
    698  1.3  mrg 	  case GOMP_MAP_TO:
    699  1.5  mrg 	    GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
    700  1.5  mrg 				   &kinds[i], GOMP_ASYNC_SYNC, 0);
    701  1.3  mrg 
    702  1.3  mrg 	    break;
    703  1.3  mrg 
    704  1.3  mrg 	  case GOMP_MAP_FROM:
    705  1.5  mrg 	    GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
    706  1.5  mrg 				   &kinds[i], GOMP_ASYNC_SYNC, 0);
    707  1.3  mrg 	    break;
    708  1.3  mrg 
    709  1.3  mrg 	  case GOMP_MAP_FORCE_PRESENT:
    710  1.3  mrg 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
    711  1.3  mrg 	      gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
    712  1.3  mrg 			  (unsigned long) sizes[i]);
    713  1.3  mrg 	    break;
    714  1.3  mrg 
    715  1.3  mrg 	  default:
    716  1.3  mrg 	    assert (0);
    717  1.3  mrg 	    break;
    718  1.3  mrg 	}
    719  1.3  mrg     }
    720  1.3  mrg }
    721