Home | History | Annotate | Line # | Download | only in libgomp
      1  1.1.1.10  mrg /* Copyright (C) 2013-2024 Free Software Foundation, Inc.
      2       1.1  mrg 
      3       1.1  mrg    Contributed by Mentor Embedded.
      4       1.1  mrg 
      5       1.1  mrg    This file is part of the GNU Offloading and Multi Processing Library
      6       1.1  mrg    (libgomp).
      7       1.1  mrg 
      8       1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      9       1.1  mrg    under the terms of the GNU General Public License as published by
     10       1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     11       1.1  mrg    any later version.
     12       1.1  mrg 
     13       1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     14       1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     15       1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     16       1.1  mrg    more details.
     17       1.1  mrg 
     18       1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     19       1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     20       1.1  mrg    3.1, as published by the Free Software Foundation.
     21       1.1  mrg 
     22       1.1  mrg    You should have received a copy of the GNU General Public License and
     23       1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     24       1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     25       1.1  mrg    <http://www.gnu.org/licenses/>.  */
     26       1.1  mrg 
     27       1.1  mrg /* This file handles OpenACC constructs.  */
     28       1.1  mrg 
     29       1.1  mrg #include "openacc.h"
     30       1.1  mrg #include "libgomp.h"
     31       1.1  mrg #include "gomp-constants.h"
     32       1.1  mrg #include "oacc-int.h"
     33       1.1  mrg #ifdef HAVE_INTTYPES_H
     34       1.1  mrg # include <inttypes.h>  /* For PRIu64.  */
     35       1.1  mrg #endif
     36       1.1  mrg #include <string.h>
     37       1.1  mrg #include <stdarg.h>
     38       1.1  mrg #include <assert.h>
     39       1.1  mrg 
     40   1.1.1.7  mrg 
     41   1.1.1.7  mrg /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
     42   1.1.1.7  mrg    continue to support the following two legacy values.  */
     43   1.1.1.7  mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
     44   1.1.1.7  mrg 		"legacy GOMP_DEVICE_ICV broken");
     45   1.1.1.7  mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
     46   1.1.1.7  mrg 		== GOACC_FLAG_HOST_FALLBACK,
     47   1.1.1.7  mrg 		"legacy GOMP_DEVICE_HOST_FALLBACK broken");
     48   1.1.1.7  mrg 
     49   1.1.1.7  mrg 
     50   1.1.1.7  mrg /* Handle the mapping pair that are presented when a
     51   1.1.1.7  mrg    deviceptr clause is used with Fortran.  */
     52   1.1.1.7  mrg 
     53   1.1.1.7  mrg static void
     54   1.1.1.7  mrg handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
     55   1.1.1.7  mrg 		     unsigned short *kinds)
     56   1.1.1.7  mrg {
     57   1.1.1.7  mrg   int i;
     58   1.1.1.7  mrg 
     59   1.1.1.7  mrg   for (i = 0; i < mapnum; i++)
     60   1.1.1.7  mrg     {
     61   1.1.1.7  mrg       unsigned short kind1 = kinds[i] & 0xff;
     62   1.1.1.7  mrg 
     63   1.1.1.7  mrg       /* Handle Fortran deviceptr clause.  */
     64   1.1.1.7  mrg       if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
     65   1.1.1.7  mrg 	{
     66   1.1.1.7  mrg 	  unsigned short kind2;
     67   1.1.1.7  mrg 
     68   1.1.1.7  mrg 	  if (i < (signed)mapnum - 1)
     69   1.1.1.7  mrg 	    kind2 = kinds[i + 1] & 0xff;
     70   1.1.1.7  mrg 	  else
     71   1.1.1.7  mrg 	    kind2 = 0xffff;
     72   1.1.1.7  mrg 
     73   1.1.1.7  mrg 	  if (sizes[i] == sizeof (void *))
     74   1.1.1.7  mrg 	    continue;
     75   1.1.1.7  mrg 
     76   1.1.1.7  mrg 	  /* At this point, we're dealing with a Fortran deviceptr.
     77   1.1.1.7  mrg 	     If the next element is not what we're expecting, then
     78   1.1.1.7  mrg 	     this is an instance of where the deviceptr variable was
     79   1.1.1.7  mrg 	     not used within the region and the pointer was removed
     80   1.1.1.7  mrg 	     by the gimplifier.  */
     81   1.1.1.7  mrg 	  if (kind2 == GOMP_MAP_POINTER
     82   1.1.1.7  mrg 	      && sizes[i + 1] == 0
     83   1.1.1.7  mrg 	      && hostaddrs[i] == *(void **)hostaddrs[i + 1])
     84   1.1.1.7  mrg 	    {
     85   1.1.1.7  mrg 	      kinds[i+1] = kinds[i];
     86   1.1.1.7  mrg 	      sizes[i+1] = sizeof (void *);
     87   1.1.1.7  mrg 	    }
     88   1.1.1.7  mrg 
     89   1.1.1.7  mrg 	  /* Invalidate the entry.  */
     90   1.1.1.7  mrg 	  hostaddrs[i] = NULL;
     91   1.1.1.7  mrg 	}
     92   1.1.1.7  mrg     }
     93       1.1  mrg }
     94       1.1  mrg 
     95   1.1.1.2  mrg 
     96   1.1.1.7  mrg /* Launch a possibly offloaded function with FLAGS.  FN is the host fn
     97   1.1.1.2  mrg    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
     98   1.1.1.2  mrg    blocks to be copied to/from the device.  Varadic arguments are
     99   1.1.1.2  mrg    keyed optional parameters terminated with a zero.  */
    100       1.1  mrg 
    101       1.1  mrg void
    102   1.1.1.7  mrg GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
    103   1.1.1.2  mrg 		      size_t mapnum, void **hostaddrs, size_t *sizes,
    104   1.1.1.2  mrg 		      unsigned short *kinds, ...)
    105       1.1  mrg {
    106   1.1.1.7  mrg   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
    107   1.1.1.7  mrg 
    108       1.1  mrg   va_list ap;
    109       1.1  mrg   struct goacc_thread *thr;
    110       1.1  mrg   struct gomp_device_descr *acc_dev;
    111       1.1  mrg   unsigned int i;
    112       1.1  mrg   struct splay_tree_key_s k;
    113       1.1  mrg   splay_tree_key tgt_fn_key;
    114       1.1  mrg   void (*tgt_fn);
    115   1.1.1.2  mrg   int async = GOMP_ASYNC_SYNC;
    116   1.1.1.2  mrg   unsigned dims[GOMP_DIM_MAX];
    117   1.1.1.2  mrg   unsigned tag;
    118       1.1  mrg 
    119       1.1  mrg #ifdef HAVE_INTTYPES_H
    120   1.1.1.2  mrg   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
    121   1.1.1.2  mrg 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
    122       1.1  mrg #else
    123   1.1.1.2  mrg   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
    124   1.1.1.2  mrg 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
    125       1.1  mrg #endif
    126       1.1  mrg   goacc_lazy_initialize ();
    127       1.1  mrg 
    128       1.1  mrg   thr = goacc_thread ();
    129       1.1  mrg   acc_dev = thr->dev;
    130       1.1  mrg 
    131   1.1.1.8  mrg   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
    132   1.1.1.8  mrg 
    133   1.1.1.8  mrg   acc_prof_info prof_info;
    134   1.1.1.8  mrg   if (profiling_p)
    135   1.1.1.8  mrg     {
    136   1.1.1.8  mrg       thr->prof_info = &prof_info;
    137   1.1.1.8  mrg 
    138   1.1.1.8  mrg       prof_info.event_type = acc_ev_compute_construct_start;
    139   1.1.1.8  mrg       prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
    140   1.1.1.8  mrg       prof_info.version = _ACC_PROF_INFO_VERSION;
    141   1.1.1.8  mrg       prof_info.device_type = acc_device_type (acc_dev->type);
    142   1.1.1.8  mrg       prof_info.device_number = acc_dev->target_id;
    143   1.1.1.8  mrg       prof_info.thread_id = -1;
    144   1.1.1.8  mrg       prof_info.async = async;
    145   1.1.1.8  mrg       prof_info.async_queue = prof_info.async;
    146   1.1.1.8  mrg       prof_info.src_file = NULL;
    147   1.1.1.8  mrg       prof_info.func_name = NULL;
    148   1.1.1.8  mrg       prof_info.line_no = -1;
    149   1.1.1.8  mrg       prof_info.end_line_no = -1;
    150   1.1.1.8  mrg       prof_info.func_line_no = -1;
    151   1.1.1.8  mrg       prof_info.func_end_line_no = -1;
    152   1.1.1.8  mrg     }
    153   1.1.1.8  mrg   acc_event_info compute_construct_event_info;
    154   1.1.1.8  mrg   if (profiling_p)
    155   1.1.1.8  mrg     {
    156   1.1.1.8  mrg       compute_construct_event_info.other_event.event_type
    157   1.1.1.8  mrg 	= prof_info.event_type;
    158   1.1.1.8  mrg       compute_construct_event_info.other_event.valid_bytes
    159   1.1.1.8  mrg 	= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
    160   1.1.1.8  mrg       compute_construct_event_info.other_event.parent_construct
    161   1.1.1.8  mrg 	= acc_construct_parallel;
    162   1.1.1.8  mrg       compute_construct_event_info.other_event.implicit = 0;
    163   1.1.1.8  mrg       compute_construct_event_info.other_event.tool_info = NULL;
    164   1.1.1.8  mrg     }
    165   1.1.1.8  mrg   acc_api_info api_info;
    166   1.1.1.8  mrg   if (profiling_p)
    167   1.1.1.8  mrg     {
    168   1.1.1.8  mrg       thr->api_info = &api_info;
    169   1.1.1.8  mrg 
    170   1.1.1.8  mrg       api_info.device_api = acc_device_api_none;
    171   1.1.1.8  mrg       api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
    172   1.1.1.8  mrg       api_info.device_type = prof_info.device_type;
    173   1.1.1.8  mrg       api_info.vendor = -1;
    174   1.1.1.8  mrg       api_info.device_handle = NULL;
    175   1.1.1.8  mrg       api_info.context_handle = NULL;
    176   1.1.1.8  mrg       api_info.async_handle = NULL;
    177   1.1.1.8  mrg     }
    178   1.1.1.8  mrg 
    179   1.1.1.8  mrg   if (profiling_p)
    180   1.1.1.8  mrg     goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
    181   1.1.1.8  mrg 			      &api_info);
    182   1.1.1.8  mrg 
    183   1.1.1.7  mrg   handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
    184   1.1.1.7  mrg 
    185       1.1  mrg   /* Host fallback if "if" clause is false or if the current device is set to
    186       1.1  mrg      the host.  */
    187  1.1.1.10  mrg   if ((flags & GOACC_FLAG_HOST_FALLBACK)
    188  1.1.1.10  mrg       /* TODO: a proper pthreads based "multi-core CPU" local device
    189  1.1.1.10  mrg 	 implementation. Currently, this is still the same as host-fallback.  */
    190  1.1.1.10  mrg       || (flags & GOACC_FLAG_LOCAL_DEVICE))
    191       1.1  mrg     {
    192   1.1.1.8  mrg       prof_info.device_type = acc_device_host;
    193   1.1.1.8  mrg       api_info.device_type = prof_info.device_type;
    194       1.1  mrg       goacc_save_and_set_bind (acc_device_host);
    195       1.1  mrg       fn (hostaddrs);
    196       1.1  mrg       goacc_restore_bind ();
    197   1.1.1.8  mrg       goto out_prof;
    198       1.1  mrg     }
    199       1.1  mrg   else if (acc_device_type (acc_dev->type) == acc_device_host)
    200       1.1  mrg     {
    201       1.1  mrg       fn (hostaddrs);
    202   1.1.1.8  mrg       goto out_prof;
    203       1.1  mrg     }
    204       1.1  mrg 
    205   1.1.1.2  mrg   /* Default: let the runtime choose.  */
    206   1.1.1.2  mrg   for (i = 0; i != GOMP_DIM_MAX; i++)
    207   1.1.1.2  mrg     dims[i] = 0;
    208   1.1.1.2  mrg 
    209   1.1.1.2  mrg   va_start (ap, kinds);
    210   1.1.1.2  mrg   /* TODO: This will need amending when device_type is implemented.  */
    211   1.1.1.2  mrg   while ((tag = va_arg (ap, unsigned)) != 0)
    212   1.1.1.2  mrg     {
    213   1.1.1.2  mrg       if (GOMP_LAUNCH_DEVICE (tag))
    214   1.1.1.2  mrg 	gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
    215   1.1.1.2  mrg 		    GOMP_LAUNCH_DEVICE (tag));
    216       1.1  mrg 
    217   1.1.1.2  mrg       switch (GOMP_LAUNCH_CODE (tag))
    218   1.1.1.2  mrg 	{
    219   1.1.1.2  mrg 	case GOMP_LAUNCH_DIM:
    220   1.1.1.2  mrg 	  {
    221   1.1.1.2  mrg 	    unsigned mask = GOMP_LAUNCH_OP (tag);
    222   1.1.1.2  mrg 
    223   1.1.1.2  mrg 	    for (i = 0; i != GOMP_DIM_MAX; i++)
    224   1.1.1.2  mrg 	      if (mask & GOMP_DIM_MASK (i))
    225   1.1.1.2  mrg 		dims[i] = va_arg (ap, unsigned);
    226   1.1.1.2  mrg 	  }
    227   1.1.1.2  mrg 	  break;
    228   1.1.1.2  mrg 
    229   1.1.1.2  mrg 	case GOMP_LAUNCH_ASYNC:
    230   1.1.1.2  mrg 	  {
    231   1.1.1.2  mrg 	    /* Small constant values are encoded in the operand.  */
    232   1.1.1.2  mrg 	    async = GOMP_LAUNCH_OP (tag);
    233   1.1.1.2  mrg 
    234   1.1.1.2  mrg 	    if (async == GOMP_LAUNCH_OP_MAX)
    235   1.1.1.2  mrg 	      async = va_arg (ap, unsigned);
    236   1.1.1.8  mrg 
    237   1.1.1.8  mrg 	    if (profiling_p)
    238   1.1.1.8  mrg 	      {
    239   1.1.1.8  mrg 		prof_info.async = async;
    240   1.1.1.8  mrg 		prof_info.async_queue = prof_info.async;
    241   1.1.1.8  mrg 	      }
    242   1.1.1.8  mrg 
    243   1.1.1.2  mrg 	    break;
    244   1.1.1.2  mrg 	  }
    245       1.1  mrg 
    246   1.1.1.2  mrg 	case GOMP_LAUNCH_WAIT:
    247   1.1.1.2  mrg 	  {
    248   1.1.1.2  mrg 	    unsigned num_waits = GOMP_LAUNCH_OP (tag);
    249   1.1.1.7  mrg 	    goacc_wait (async, num_waits, &ap);
    250   1.1.1.2  mrg 	    break;
    251   1.1.1.2  mrg 	  }
    252   1.1.1.2  mrg 
    253   1.1.1.2  mrg 	default:
    254   1.1.1.2  mrg 	  gomp_fatal ("unrecognized offload code '%d',"
    255   1.1.1.2  mrg 		      " libgomp is too old", GOMP_LAUNCH_CODE (tag));
    256   1.1.1.2  mrg 	}
    257   1.1.1.2  mrg     }
    258   1.1.1.2  mrg   va_end (ap);
    259   1.1.1.2  mrg 
    260       1.1  mrg   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
    261       1.1  mrg     {
    262       1.1  mrg       k.host_start = (uintptr_t) fn;
    263       1.1  mrg       k.host_end = k.host_start + 1;
    264       1.1  mrg       gomp_mutex_lock (&acc_dev->lock);
    265       1.1  mrg       tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
    266       1.1  mrg       gomp_mutex_unlock (&acc_dev->lock);
    267       1.1  mrg 
    268       1.1  mrg       if (tgt_fn_key == NULL)
    269       1.1  mrg 	gomp_fatal ("target function wasn't mapped");
    270       1.1  mrg 
    271       1.1  mrg       tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
    272       1.1  mrg     }
    273       1.1  mrg   else
    274       1.1  mrg     tgt_fn = (void (*)) fn;
    275       1.1  mrg 
    276   1.1.1.8  mrg   acc_event_info enter_exit_data_event_info;
    277   1.1.1.8  mrg   if (profiling_p)
    278   1.1.1.8  mrg     {
    279   1.1.1.8  mrg       prof_info.event_type = acc_ev_enter_data_start;
    280   1.1.1.8  mrg       enter_exit_data_event_info.other_event.event_type
    281   1.1.1.8  mrg 	= prof_info.event_type;
    282   1.1.1.8  mrg       enter_exit_data_event_info.other_event.valid_bytes
    283   1.1.1.8  mrg 	= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
    284   1.1.1.8  mrg       enter_exit_data_event_info.other_event.parent_construct
    285   1.1.1.8  mrg 	= compute_construct_event_info.other_event.parent_construct;
    286   1.1.1.8  mrg       enter_exit_data_event_info.other_event.implicit = 1;
    287   1.1.1.8  mrg       enter_exit_data_event_info.other_event.tool_info = NULL;
    288   1.1.1.8  mrg       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
    289   1.1.1.8  mrg 				&api_info);
    290   1.1.1.8  mrg     }
    291   1.1.1.8  mrg 
    292   1.1.1.8  mrg   goacc_aq aq = get_goacc_asyncqueue (async);
    293   1.1.1.8  mrg 
    294  1.1.1.10  mrg   struct target_mem_desc *tgt
    295  1.1.1.10  mrg     = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true,
    296  1.1.1.10  mrg 		      GOMP_MAP_VARS_TARGET);
    297  1.1.1.10  mrg 
    298   1.1.1.8  mrg   if (profiling_p)
    299   1.1.1.8  mrg     {
    300   1.1.1.8  mrg       prof_info.event_type = acc_ev_enter_data_end;
    301   1.1.1.8  mrg       enter_exit_data_event_info.other_event.event_type
    302   1.1.1.8  mrg 	= prof_info.event_type;
    303   1.1.1.8  mrg       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
    304   1.1.1.8  mrg 				&api_info);
    305   1.1.1.8  mrg     }
    306   1.1.1.9  mrg 
    307  1.1.1.10  mrg   void **devaddrs = (void **) tgt->tgt_start;
    308   1.1.1.8  mrg   if (aq == NULL)
    309   1.1.1.8  mrg     acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
    310   1.1.1.8  mrg 				tgt);
    311   1.1.1.8  mrg   else
    312   1.1.1.8  mrg     acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
    313   1.1.1.8  mrg 				      dims, tgt, aq);
    314       1.1  mrg 
    315   1.1.1.8  mrg   if (profiling_p)
    316   1.1.1.8  mrg     {
    317   1.1.1.8  mrg       prof_info.event_type = acc_ev_exit_data_start;
    318   1.1.1.8  mrg       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
    319   1.1.1.8  mrg       enter_exit_data_event_info.other_event.tool_info = NULL;
    320   1.1.1.8  mrg       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
    321   1.1.1.8  mrg 				&api_info);
    322   1.1.1.8  mrg     }
    323       1.1  mrg 
    324   1.1.1.9  mrg   /* If running synchronously (aq == NULL), this will unmap immediately.  */
    325   1.1.1.9  mrg   goacc_unmap_vars (tgt, true, aq);
    326   1.1.1.8  mrg 
    327   1.1.1.8  mrg   if (profiling_p)
    328   1.1.1.7  mrg     {
    329   1.1.1.8  mrg       prof_info.event_type = acc_ev_exit_data_end;
    330   1.1.1.8  mrg       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
    331   1.1.1.8  mrg       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
    332   1.1.1.8  mrg 				&api_info);
    333   1.1.1.7  mrg     }
    334       1.1  mrg 
    335   1.1.1.8  mrg  out_prof:
    336   1.1.1.8  mrg   if (profiling_p)
    337   1.1.1.8  mrg     {
    338   1.1.1.8  mrg       prof_info.event_type = acc_ev_compute_construct_end;
    339   1.1.1.8  mrg       compute_construct_event_info.other_event.event_type
    340   1.1.1.8  mrg 	= prof_info.event_type;
    341   1.1.1.8  mrg       goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
    342   1.1.1.8  mrg 				&api_info);
    343   1.1.1.8  mrg 
    344   1.1.1.8  mrg       thr->prof_info = NULL;
    345   1.1.1.8  mrg       thr->api_info = NULL;
    346   1.1.1.8  mrg     }
    347       1.1  mrg }
    348       1.1  mrg 
    349   1.1.1.8  mrg /* Legacy entry point (GCC 5).  Only provide host fallback execution.  */
    350   1.1.1.2  mrg 
    351   1.1.1.2  mrg void
    352   1.1.1.7  mrg GOACC_parallel (int flags_m, void (*fn) (void *),
    353   1.1.1.2  mrg 		size_t mapnum, void **hostaddrs, size_t *sizes,
    354   1.1.1.2  mrg 		unsigned short *kinds,
    355   1.1.1.2  mrg 		int num_gangs, int num_workers, int vector_length,
    356   1.1.1.2  mrg 		int async, int num_waits, ...)
    357   1.1.1.2  mrg {
    358   1.1.1.2  mrg   goacc_save_and_set_bind (acc_device_host);
    359   1.1.1.2  mrg   fn (hostaddrs);
    360   1.1.1.2  mrg   goacc_restore_bind ();
    361   1.1.1.2  mrg }
    362   1.1.1.2  mrg 
    363       1.1  mrg void
    364   1.1.1.7  mrg GOACC_data_start (int flags_m, size_t mapnum,
    365       1.1  mrg 		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
    366       1.1  mrg {
    367   1.1.1.7  mrg   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
    368   1.1.1.7  mrg 
    369       1.1  mrg   struct target_mem_desc *tgt;
    370       1.1  mrg 
    371       1.1  mrg #ifdef HAVE_INTTYPES_H
    372       1.1  mrg   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
    373       1.1  mrg 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
    374       1.1  mrg #else
    375       1.1  mrg   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
    376       1.1  mrg 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
    377       1.1  mrg #endif
    378       1.1  mrg 
    379       1.1  mrg   goacc_lazy_initialize ();
    380       1.1  mrg 
    381       1.1  mrg   struct goacc_thread *thr = goacc_thread ();
    382       1.1  mrg   struct gomp_device_descr *acc_dev = thr->dev;
    383       1.1  mrg 
    384   1.1.1.8  mrg   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
    385   1.1.1.8  mrg 
    386   1.1.1.8  mrg   acc_prof_info prof_info;
    387   1.1.1.8  mrg   if (profiling_p)
    388   1.1.1.8  mrg     {
    389   1.1.1.8  mrg       thr->prof_info = &prof_info;
    390   1.1.1.8  mrg 
    391   1.1.1.8  mrg       prof_info.event_type = acc_ev_enter_data_start;
    392   1.1.1.8  mrg       prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
    393   1.1.1.8  mrg       prof_info.version = _ACC_PROF_INFO_VERSION;
    394   1.1.1.8  mrg       prof_info.device_type = acc_device_type (acc_dev->type);
    395   1.1.1.8  mrg       prof_info.device_number = acc_dev->target_id;
    396   1.1.1.8  mrg       prof_info.thread_id = -1;
    397   1.1.1.8  mrg       prof_info.async = acc_async_sync; /* Always synchronous.  */
    398   1.1.1.8  mrg       prof_info.async_queue = prof_info.async;
    399   1.1.1.8  mrg       prof_info.src_file = NULL;
    400   1.1.1.8  mrg       prof_info.func_name = NULL;
    401   1.1.1.8  mrg       prof_info.line_no = -1;
    402   1.1.1.8  mrg       prof_info.end_line_no = -1;
    403   1.1.1.8  mrg       prof_info.func_line_no = -1;
    404   1.1.1.8  mrg       prof_info.func_end_line_no = -1;
    405   1.1.1.8  mrg     }
    406   1.1.1.8  mrg   acc_event_info enter_data_event_info;
    407   1.1.1.8  mrg   if (profiling_p)
    408   1.1.1.8  mrg     {
    409   1.1.1.8  mrg       enter_data_event_info.other_event.event_type
    410   1.1.1.8  mrg 	= prof_info.event_type;
    411   1.1.1.8  mrg       enter_data_event_info.other_event.valid_bytes
    412   1.1.1.8  mrg 	= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
    413   1.1.1.8  mrg       enter_data_event_info.other_event.parent_construct = acc_construct_data;
    414   1.1.1.8  mrg       for (int i = 0; i < mapnum; ++i)
    415   1.1.1.8  mrg 	if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
    416   1.1.1.8  mrg 	    || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
    417   1.1.1.8  mrg 	  {
    418   1.1.1.8  mrg 	    /* If there is one such data mapping kind, then this is actually an
    419   1.1.1.8  mrg 	       OpenACC 'host_data' construct.  (GCC maps the OpenACC
    420   1.1.1.8  mrg 	       'host_data' construct to the OpenACC 'data' construct.)  Apart
    421   1.1.1.8  mrg 	       from artificial test cases (such as an OpenACC 'host_data'
    422   1.1.1.8  mrg 	       construct's (implicit) device initialization when there hasn't
    423   1.1.1.8  mrg 	       been any device data be set up before...), there can't really
    424   1.1.1.8  mrg 	       any meaningful events be generated from OpenACC 'host_data'
    425   1.1.1.8  mrg 	       constructs, though.  */
    426   1.1.1.8  mrg 	    enter_data_event_info.other_event.parent_construct
    427   1.1.1.8  mrg 	      = acc_construct_host_data;
    428   1.1.1.8  mrg 	    break;
    429   1.1.1.8  mrg 	  }
    430   1.1.1.8  mrg       enter_data_event_info.other_event.implicit = 0;
    431   1.1.1.8  mrg       enter_data_event_info.other_event.tool_info = NULL;
    432   1.1.1.8  mrg     }
    433   1.1.1.8  mrg   acc_api_info api_info;
    434   1.1.1.8  mrg   if (profiling_p)
    435   1.1.1.8  mrg     {
    436   1.1.1.8  mrg       thr->api_info = &api_info;
    437   1.1.1.8  mrg 
    438   1.1.1.8  mrg       api_info.device_api = acc_device_api_none;
    439   1.1.1.8  mrg       api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
    440   1.1.1.8  mrg       api_info.device_type = prof_info.device_type;
    441   1.1.1.8  mrg       api_info.vendor = -1;
    442   1.1.1.8  mrg       api_info.device_handle = NULL;
    443   1.1.1.8  mrg       api_info.context_handle = NULL;
    444   1.1.1.8  mrg       api_info.async_handle = NULL;
    445   1.1.1.8  mrg     }
    446   1.1.1.8  mrg 
    447   1.1.1.8  mrg   if (profiling_p)
    448   1.1.1.8  mrg     goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
    449   1.1.1.8  mrg 
    450       1.1  mrg   /* Host fallback or 'do nothing'.  */
    451       1.1  mrg   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    452  1.1.1.10  mrg       || (flags & GOACC_FLAG_HOST_FALLBACK)
    453  1.1.1.10  mrg       || (flags & GOACC_FLAG_LOCAL_DEVICE))
    454       1.1  mrg     {
    455   1.1.1.8  mrg       prof_info.device_type = acc_device_host;
    456   1.1.1.8  mrg       api_info.device_type = prof_info.device_type;
    457   1.1.1.9  mrg       tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0);
    458       1.1  mrg       tgt->prev = thr->mapped_data;
    459       1.1  mrg       thr->mapped_data = tgt;
    460       1.1  mrg 
    461   1.1.1.8  mrg       goto out_prof;
    462       1.1  mrg     }
    463       1.1  mrg 
    464       1.1  mrg   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
    465   1.1.1.9  mrg   tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds,
    466   1.1.1.9  mrg 			true, 0);
    467       1.1  mrg   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
    468       1.1  mrg   tgt->prev = thr->mapped_data;
    469       1.1  mrg   thr->mapped_data = tgt;
    470   1.1.1.8  mrg 
    471   1.1.1.8  mrg  out_prof:
    472   1.1.1.8  mrg   if (profiling_p)
    473   1.1.1.8  mrg     {
    474   1.1.1.8  mrg       prof_info.event_type = acc_ev_enter_data_end;
    475   1.1.1.8  mrg       enter_data_event_info.other_event.event_type = prof_info.event_type;
    476   1.1.1.8  mrg       goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
    477   1.1.1.8  mrg 
    478   1.1.1.8  mrg       thr->prof_info = NULL;
    479   1.1.1.8  mrg       thr->api_info = NULL;
    480   1.1.1.8  mrg     }
    481       1.1  mrg }
    482       1.1  mrg 
    483       1.1  mrg void
    484       1.1  mrg GOACC_data_end (void)
    485       1.1  mrg {
    486       1.1  mrg   struct goacc_thread *thr = goacc_thread ();
    487   1.1.1.8  mrg   struct gomp_device_descr *acc_dev = thr->dev;
    488       1.1  mrg   struct target_mem_desc *tgt = thr->mapped_data;
    489       1.1  mrg 
    490   1.1.1.8  mrg   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
    491       1.1  mrg 
    492   1.1.1.8  mrg   acc_prof_info prof_info;
    493   1.1.1.8  mrg   if (profiling_p)
    494       1.1  mrg     {
    495   1.1.1.8  mrg       thr->prof_info = &prof_info;
    496       1.1  mrg 
    497   1.1.1.8  mrg       prof_info.event_type = acc_ev_exit_data_start;
    498   1.1.1.8  mrg       prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
    499   1.1.1.8  mrg       prof_info.version = _ACC_PROF_INFO_VERSION;
    500   1.1.1.8  mrg       prof_info.device_type = acc_device_type (acc_dev->type);
    501   1.1.1.8  mrg       prof_info.device_number = acc_dev->target_id;
    502   1.1.1.8  mrg       prof_info.thread_id = -1;
    503   1.1.1.8  mrg       prof_info.async = acc_async_sync; /* Always synchronous.  */
    504   1.1.1.8  mrg       prof_info.async_queue = prof_info.async;
    505   1.1.1.8  mrg       prof_info.src_file = NULL;
    506   1.1.1.8  mrg       prof_info.func_name = NULL;
    507   1.1.1.8  mrg       prof_info.line_no = -1;
    508   1.1.1.8  mrg       prof_info.end_line_no = -1;
    509   1.1.1.8  mrg       prof_info.func_line_no = -1;
    510   1.1.1.8  mrg       prof_info.func_end_line_no = -1;
    511   1.1.1.8  mrg     }
    512   1.1.1.8  mrg   acc_event_info exit_data_event_info;
    513   1.1.1.8  mrg   if (profiling_p)
    514   1.1.1.8  mrg     {
    515   1.1.1.8  mrg       exit_data_event_info.other_event.event_type
    516   1.1.1.8  mrg 	= prof_info.event_type;
    517   1.1.1.8  mrg       exit_data_event_info.other_event.valid_bytes
    518   1.1.1.8  mrg 	= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
    519   1.1.1.8  mrg       exit_data_event_info.other_event.parent_construct = acc_construct_data;
    520   1.1.1.8  mrg       exit_data_event_info.other_event.implicit = 0;
    521   1.1.1.8  mrg       exit_data_event_info.other_event.tool_info = NULL;
    522   1.1.1.8  mrg     }
    523   1.1.1.8  mrg   acc_api_info api_info;
    524   1.1.1.8  mrg   if (profiling_p)
    525   1.1.1.8  mrg     {
    526   1.1.1.8  mrg       thr->api_info = &api_info;
    527   1.1.1.8  mrg 
    528   1.1.1.8  mrg       api_info.device_api = acc_device_api_none;
    529   1.1.1.8  mrg       api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
    530   1.1.1.8  mrg       api_info.device_type = prof_info.device_type;
    531   1.1.1.8  mrg       api_info.vendor = -1;
    532   1.1.1.8  mrg       api_info.device_handle = NULL;
    533   1.1.1.8  mrg       api_info.context_handle = NULL;
    534   1.1.1.8  mrg       api_info.async_handle = NULL;
    535       1.1  mrg     }
    536       1.1  mrg 
    537   1.1.1.8  mrg   if (profiling_p)
    538   1.1.1.8  mrg     goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
    539   1.1.1.7  mrg 
    540   1.1.1.8  mrg   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
    541   1.1.1.8  mrg   thr->mapped_data = tgt->prev;
    542   1.1.1.9  mrg   goacc_unmap_vars (tgt, true, NULL);
    543   1.1.1.8  mrg   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
    544       1.1  mrg 
    545   1.1.1.8  mrg   if (profiling_p)
    546       1.1  mrg     {
    547   1.1.1.8  mrg       prof_info.event_type = acc_ev_exit_data_end;
    548   1.1.1.8  mrg       exit_data_event_info.other_event.event_type = prof_info.event_type;
    549   1.1.1.8  mrg       goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
    550       1.1  mrg 
    551   1.1.1.8  mrg       thr->prof_info = NULL;
    552   1.1.1.8  mrg       thr->api_info = NULL;
    553       1.1  mrg     }
    554       1.1  mrg }
    555       1.1  mrg 
    556       1.1  mrg void
    557   1.1.1.7  mrg GOACC_update (int flags_m, size_t mapnum,
    558       1.1  mrg 	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
    559       1.1  mrg 	      int async, int num_waits, ...)
    560       1.1  mrg {
    561   1.1.1.7  mrg   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
    562   1.1.1.7  mrg 
    563       1.1  mrg   size_t i;
    564       1.1  mrg 
    565       1.1  mrg   goacc_lazy_initialize ();
    566       1.1  mrg 
    567       1.1  mrg   struct goacc_thread *thr = goacc_thread ();
    568       1.1  mrg   struct gomp_device_descr *acc_dev = thr->dev;
    569       1.1  mrg 
    570   1.1.1.8  mrg   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
    571   1.1.1.8  mrg 
    572   1.1.1.8  mrg   acc_prof_info prof_info;
    573   1.1.1.8  mrg   if (profiling_p)
    574   1.1.1.8  mrg     {
    575   1.1.1.8  mrg       thr->prof_info = &prof_info;
    576   1.1.1.8  mrg 
    577   1.1.1.8  mrg       prof_info.event_type = acc_ev_update_start;
    578   1.1.1.8  mrg       prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
    579   1.1.1.8  mrg       prof_info.version = _ACC_PROF_INFO_VERSION;
    580   1.1.1.8  mrg       prof_info.device_type = acc_device_type (acc_dev->type);
    581   1.1.1.8  mrg       prof_info.device_number = acc_dev->target_id;
    582   1.1.1.8  mrg       prof_info.thread_id = -1;
    583   1.1.1.8  mrg       prof_info.async = async;
    584   1.1.1.8  mrg       prof_info.async_queue = prof_info.async;
    585   1.1.1.8  mrg       prof_info.src_file = NULL;
    586   1.1.1.8  mrg       prof_info.func_name = NULL;
    587   1.1.1.8  mrg       prof_info.line_no = -1;
    588   1.1.1.8  mrg       prof_info.end_line_no = -1;
    589   1.1.1.8  mrg       prof_info.func_line_no = -1;
    590   1.1.1.8  mrg       prof_info.func_end_line_no = -1;
    591   1.1.1.8  mrg     }
    592   1.1.1.8  mrg   acc_event_info update_event_info;
    593   1.1.1.8  mrg   if (profiling_p)
    594   1.1.1.8  mrg     {
    595   1.1.1.8  mrg       update_event_info.other_event.event_type
    596   1.1.1.8  mrg 	= prof_info.event_type;
    597   1.1.1.8  mrg       update_event_info.other_event.valid_bytes
    598   1.1.1.8  mrg 	= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
    599   1.1.1.8  mrg       update_event_info.other_event.parent_construct = acc_construct_update;
    600   1.1.1.8  mrg       update_event_info.other_event.implicit = 0;
    601   1.1.1.8  mrg       update_event_info.other_event.tool_info = NULL;
    602   1.1.1.8  mrg     }
    603   1.1.1.8  mrg   acc_api_info api_info;
    604   1.1.1.8  mrg   if (profiling_p)
    605   1.1.1.8  mrg     {
    606   1.1.1.8  mrg       thr->api_info = &api_info;
    607   1.1.1.8  mrg 
    608   1.1.1.8  mrg       api_info.device_api = acc_device_api_none;
    609   1.1.1.8  mrg       api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
    610   1.1.1.8  mrg       api_info.device_type = prof_info.device_type;
    611   1.1.1.8  mrg       api_info.vendor = -1;
    612   1.1.1.8  mrg       api_info.device_handle = NULL;
    613   1.1.1.8  mrg       api_info.context_handle = NULL;
    614   1.1.1.8  mrg       api_info.async_handle = NULL;
    615   1.1.1.8  mrg     }
    616   1.1.1.8  mrg 
    617   1.1.1.8  mrg   if (profiling_p)
    618   1.1.1.8  mrg     goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
    619   1.1.1.8  mrg 
    620       1.1  mrg   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    621   1.1.1.7  mrg       || (flags & GOACC_FLAG_HOST_FALLBACK))
    622   1.1.1.8  mrg     {
    623   1.1.1.8  mrg       prof_info.device_type = acc_device_host;
    624   1.1.1.8  mrg       api_info.device_type = prof_info.device_type;
    625   1.1.1.8  mrg 
    626   1.1.1.8  mrg       goto out_prof;
    627   1.1.1.8  mrg     }
    628       1.1  mrg 
    629   1.1.1.2  mrg   if (num_waits)
    630       1.1  mrg     {
    631       1.1  mrg       va_list ap;
    632       1.1  mrg 
    633       1.1  mrg       va_start (ap, num_waits);
    634   1.1.1.2  mrg       goacc_wait (async, num_waits, &ap);
    635       1.1  mrg       va_end (ap);
    636       1.1  mrg     }
    637       1.1  mrg 
    638   1.1.1.7  mrg   bool update_device = false;
    639       1.1  mrg   for (i = 0; i < mapnum; ++i)
    640       1.1  mrg     {
    641       1.1  mrg       unsigned char kind = kinds[i] & 0xff;
    642       1.1  mrg 
    643       1.1  mrg       switch (kind)
    644       1.1  mrg 	{
    645       1.1  mrg 	case GOMP_MAP_POINTER:
    646       1.1  mrg 	case GOMP_MAP_TO_PSET:
    647       1.1  mrg 	  break;
    648       1.1  mrg 
    649   1.1.1.7  mrg 	case GOMP_MAP_ALWAYS_POINTER:
    650   1.1.1.7  mrg 	  if (update_device)
    651   1.1.1.7  mrg 	    {
    652   1.1.1.7  mrg 	      /* Save the contents of the host pointer.  */
    653   1.1.1.7  mrg 	      void *dptr = acc_deviceptr (hostaddrs[i-1]);
    654   1.1.1.7  mrg 	      uintptr_t t = *(uintptr_t *) hostaddrs[i];
    655   1.1.1.7  mrg 
    656   1.1.1.7  mrg 	      /* Update the contents of the host pointer to reflect
    657   1.1.1.7  mrg 		 the value of the allocated device memory in the
    658   1.1.1.7  mrg 		 previous pointer.  */
    659   1.1.1.7  mrg 	      *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
    660   1.1.1.8  mrg 	      /* TODO: verify that we really cannot use acc_update_device_async
    661   1.1.1.8  mrg 		 here.  */
    662   1.1.1.7  mrg 	      acc_update_device (hostaddrs[i], sizeof (uintptr_t));
    663   1.1.1.7  mrg 
    664   1.1.1.7  mrg 	      /* Restore the host pointer.  */
    665   1.1.1.7  mrg 	      *(uintptr_t *) hostaddrs[i] = t;
    666   1.1.1.7  mrg 	      update_device = false;
    667   1.1.1.7  mrg 	    }
    668   1.1.1.7  mrg 	  break;
    669   1.1.1.7  mrg 
    670   1.1.1.7  mrg 	case GOMP_MAP_TO:
    671   1.1.1.7  mrg 	  if (!acc_is_present (hostaddrs[i], sizes[i]))
    672   1.1.1.7  mrg 	    {
    673   1.1.1.7  mrg 	      update_device = false;
    674   1.1.1.7  mrg 	      break;
    675   1.1.1.7  mrg 	    }
    676   1.1.1.7  mrg 	  /* Fallthru  */
    677       1.1  mrg 	case GOMP_MAP_FORCE_TO:
    678   1.1.1.7  mrg 	  update_device = true;
    679   1.1.1.8  mrg 	  acc_update_device_async (hostaddrs[i], sizes[i], async);
    680       1.1  mrg 	  break;
    681       1.1  mrg 
    682   1.1.1.7  mrg 	case GOMP_MAP_FROM:
    683   1.1.1.7  mrg 	  if (!acc_is_present (hostaddrs[i], sizes[i]))
    684   1.1.1.7  mrg 	    {
    685   1.1.1.7  mrg 	      update_device = false;
    686   1.1.1.7  mrg 	      break;
    687   1.1.1.7  mrg 	    }
    688   1.1.1.7  mrg 	  /* Fallthru  */
    689       1.1  mrg 	case GOMP_MAP_FORCE_FROM:
    690   1.1.1.7  mrg 	  update_device = false;
    691   1.1.1.8  mrg 	  acc_update_self_async (hostaddrs[i], sizes[i], async);
    692       1.1  mrg 	  break;
    693       1.1  mrg 
    694       1.1  mrg 	default:
    695       1.1  mrg 	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
    696       1.1  mrg 	  break;
    697       1.1  mrg 	}
    698       1.1  mrg     }
    699       1.1  mrg 
    700   1.1.1.8  mrg  out_prof:
    701   1.1.1.8  mrg   if (profiling_p)
    702   1.1.1.2  mrg     {
    703   1.1.1.8  mrg       prof_info.event_type = acc_ev_update_end;
    704   1.1.1.8  mrg       update_event_info.other_event.event_type = prof_info.event_type;
    705   1.1.1.8  mrg       goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
    706       1.1  mrg 
    707   1.1.1.8  mrg       thr->prof_info = NULL;
    708   1.1.1.8  mrg       thr->api_info = NULL;
    709   1.1.1.2  mrg     }
    710       1.1  mrg }
    711       1.1  mrg 
    712   1.1.1.8  mrg 
    713   1.1.1.8  mrg /* Legacy entry point (GCC 5).  */
    714   1.1.1.8  mrg 
    715       1.1  mrg int
    716       1.1  mrg GOACC_get_num_threads (void)
    717       1.1  mrg {
    718       1.1  mrg   return 1;
    719       1.1  mrg }
    720       1.1  mrg 
    721   1.1.1.8  mrg /* Legacy entry point (GCC 5).  */
    722   1.1.1.8  mrg 
    723       1.1  mrg int
    724       1.1  mrg GOACC_get_thread_num (void)
    725       1.1  mrg {
    726       1.1  mrg   return 0;
    727       1.1  mrg }
    728