Home | History | Annotate | Line # | Download | only in libgomp
oacc-parallel.c revision 1.1
      1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
      2 
      3    Contributed by Mentor Embedded.
      4 
      5    This file is part of the GNU Offloading and Multi Processing Library
      6    (libgomp).
      7 
      8    Libgomp is free software; you can redistribute it and/or modify it
      9    under the terms of the GNU General Public License as published by
     10    the Free Software Foundation; either version 3, or (at your option)
     11    any later version.
     12 
     13    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     14    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     15    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     16    more details.
     17 
     18    Under Section 7 of GPL version 3, you are granted additional
     19    permissions described in the GCC Runtime Library Exception, version
     20    3.1, as published by the Free Software Foundation.
     21 
     22    You should have received a copy of the GNU General Public License and
     23    a copy of the GCC Runtime Library Exception along with this program;
     24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     25    <http://www.gnu.org/licenses/>.  */
     26 
     27 /* This file handles OpenACC constructs.  */
     28 
     29 #include "openacc.h"
     30 #include "libgomp.h"
     31 #include "libgomp_g.h"
     32 #include "gomp-constants.h"
     33 #include "oacc-int.h"
     34 #ifdef HAVE_INTTYPES_H
     35 # include <inttypes.h>  /* For PRIu64.  */
     36 #endif
     37 #include <string.h>
     38 #include <stdarg.h>
     39 #include <assert.h>
     40 
     41 static int
     42 find_pset (int pos, size_t mapnum, unsigned short *kinds)
     43 {
     44   if (pos + 1 >= mapnum)
     45     return 0;
     46 
     47   unsigned char kind = kinds[pos+1] & 0xff;
     48 
     49   return kind == GOMP_MAP_TO_PSET;
     50 }
     51 
     52 static void goacc_wait (int async, int num_waits, va_list ap);
     53 
     54 void
     55 GOACC_parallel (int device, void (*fn) (void *),
     56 		size_t mapnum, void **hostaddrs, size_t *sizes,
     57 		unsigned short *kinds,
     58 		int num_gangs, int num_workers, int vector_length,
     59 		int async, int num_waits, ...)
     60 {
     61   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
     62   va_list ap;
     63   struct goacc_thread *thr;
     64   struct gomp_device_descr *acc_dev;
     65   struct target_mem_desc *tgt;
     66   void **devaddrs;
     67   unsigned int i;
     68   struct splay_tree_key_s k;
     69   splay_tree_key tgt_fn_key;
     70   void (*tgt_fn);
     71 
     72   if (num_gangs != 1)
     73     gomp_fatal ("num_gangs (%d) different from one is not yet supported",
     74 		num_gangs);
     75   if (num_workers != 1)
     76     gomp_fatal ("num_workers (%d) different from one is not yet supported",
     77 		num_workers);
     78 
     79 #ifdef HAVE_INTTYPES_H
     80   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, "
     81 		 "async = %d\n",
     82 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
     83 #else
     84   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
     85 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
     86 	      async);
     87 #endif
     88   goacc_lazy_initialize ();
     89 
     90   thr = goacc_thread ();
     91   acc_dev = thr->dev;
     92 
     93   /* Host fallback if "if" clause is false or if the current device is set to
     94      the host.  */
     95   if (host_fallback)
     96     {
     97       goacc_save_and_set_bind (acc_device_host);
     98       fn (hostaddrs);
     99       goacc_restore_bind ();
    100       return;
    101     }
    102   else if (acc_device_type (acc_dev->type) == acc_device_host)
    103     {
    104       fn (hostaddrs);
    105       return;
    106     }
    107 
    108   va_start (ap, num_waits);
    109 
    110   if (num_waits > 0)
    111     goacc_wait (async, num_waits, ap);
    112 
    113   va_end (ap);
    114 
    115   acc_dev->openacc.async_set_async_func (async);
    116 
    117   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
    118     {
    119       k.host_start = (uintptr_t) fn;
    120       k.host_end = k.host_start + 1;
    121       gomp_mutex_lock (&acc_dev->lock);
    122       tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
    123       gomp_mutex_unlock (&acc_dev->lock);
    124 
    125       if (tgt_fn_key == NULL)
    126 	gomp_fatal ("target function wasn't mapped");
    127 
    128       tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
    129     }
    130   else
    131     tgt_fn = (void (*)) fn;
    132 
    133   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
    134 		       false);
    135 
    136   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
    137   for (i = 0; i < mapnum; i++)
    138     devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
    139 			    + tgt->list[i]->tgt_offset);
    140 
    141   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
    142 			      num_gangs, num_workers, vector_length, async,
    143 			      tgt);
    144 
    145   /* If running synchronously, unmap immediately.  */
    146   if (async < acc_async_noval)
    147     gomp_unmap_vars (tgt, true);
    148   else
    149     {
    150       gomp_copy_from_async (tgt);
    151       acc_dev->openacc.register_async_cleanup_func (tgt);
    152     }
    153 
    154   acc_dev->openacc.async_set_async_func (acc_async_sync);
    155 }
    156 
    157 void
    158 GOACC_data_start (int device, size_t mapnum,
    159 		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
    160 {
    161   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
    162   struct target_mem_desc *tgt;
    163 
    164 #ifdef HAVE_INTTYPES_H
    165   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
    166 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
    167 #else
    168   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
    169 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
    170 #endif
    171 
    172   goacc_lazy_initialize ();
    173 
    174   struct goacc_thread *thr = goacc_thread ();
    175   struct gomp_device_descr *acc_dev = thr->dev;
    176 
    177   /* Host fallback or 'do nothing'.  */
    178   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    179       || host_fallback)
    180     {
    181       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
    182       tgt->prev = thr->mapped_data;
    183       thr->mapped_data = tgt;
    184 
    185       return;
    186     }
    187 
    188   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
    189   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
    190 		       false);
    191   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
    192   tgt->prev = thr->mapped_data;
    193   thr->mapped_data = tgt;
    194 }
    195 
    196 void
    197 GOACC_data_end (void)
    198 {
    199   struct goacc_thread *thr = goacc_thread ();
    200   struct target_mem_desc *tgt = thr->mapped_data;
    201 
    202   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
    203   thr->mapped_data = tgt->prev;
    204   gomp_unmap_vars (tgt, true);
    205   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
    206 }
    207 
    208 void
    209 GOACC_enter_exit_data (int device, size_t mapnum,
    210 		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
    211 		       int async, int num_waits, ...)
    212 {
    213   struct goacc_thread *thr;
    214   struct gomp_device_descr *acc_dev;
    215   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
    216   bool data_enter = false;
    217   size_t i;
    218 
    219   goacc_lazy_initialize ();
    220 
    221   thr = goacc_thread ();
    222   acc_dev = thr->dev;
    223 
    224   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    225       || host_fallback)
    226     return;
    227 
    228   if (num_waits > 0)
    229     {
    230       va_list ap;
    231 
    232       va_start (ap, num_waits);
    233 
    234       goacc_wait (async, num_waits, ap);
    235 
    236       va_end (ap);
    237     }
    238 
    239   acc_dev->openacc.async_set_async_func (async);
    240 
    241   /* Determine if this is an "acc enter data".  */
    242   for (i = 0; i < mapnum; ++i)
    243     {
    244       unsigned char kind = kinds[i] & 0xff;
    245 
    246       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
    247 	continue;
    248 
    249       if (kind == GOMP_MAP_FORCE_ALLOC
    250 	  || kind == GOMP_MAP_FORCE_PRESENT
    251 	  || kind == GOMP_MAP_FORCE_TO)
    252 	{
    253 	  data_enter = true;
    254 	  break;
    255 	}
    256 
    257       if (kind == GOMP_MAP_FORCE_DEALLOC
    258 	  || kind == GOMP_MAP_FORCE_FROM)
    259 	break;
    260 
    261       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
    262 		      kind);
    263     }
    264 
    265   if (data_enter)
    266     {
    267       for (i = 0; i < mapnum; i++)
    268 	{
    269 	  unsigned char kind = kinds[i] & 0xff;
    270 
    271 	  /* Scan for PSETs.  */
    272 	  int psets = find_pset (i, mapnum, kinds);
    273 
    274 	  if (!psets)
    275 	    {
    276 	      switch (kind)
    277 		{
    278 		case GOMP_MAP_POINTER:
    279 		  gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
    280 					&kinds[i]);
    281 		  break;
    282 		case GOMP_MAP_FORCE_ALLOC:
    283 		  acc_create (hostaddrs[i], sizes[i]);
    284 		  break;
    285 		case GOMP_MAP_FORCE_PRESENT:
    286 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
    287 		  break;
    288 		case GOMP_MAP_FORCE_TO:
    289 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
    290 		  break;
    291 		default:
    292 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
    293 			      kind);
    294 		  break;
    295 		}
    296 	    }
    297 	  else
    298 	    {
    299 	      gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
    300 	      /* Increment 'i' by two because OpenACC requires fortran
    301 		 arrays to be contiguous, so each PSET is associated with
    302 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
    303 		 one MAP_POINTER.  */
    304 	      i += 2;
    305 	    }
    306 	}
    307     }
    308   else
    309     for (i = 0; i < mapnum; ++i)
    310       {
    311 	unsigned char kind = kinds[i] & 0xff;
    312 
    313 	int psets = find_pset (i, mapnum, kinds);
    314 
    315 	if (!psets)
    316 	  {
    317 	    switch (kind)
    318 	      {
    319 	      case GOMP_MAP_POINTER:
    320 		gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
    321 					 == GOMP_MAP_FORCE_FROM,
    322 					 async, 1);
    323 		break;
    324 	      case GOMP_MAP_FORCE_DEALLOC:
    325 		acc_delete (hostaddrs[i], sizes[i]);
    326 		break;
    327 	      case GOMP_MAP_FORCE_FROM:
    328 		acc_copyout (hostaddrs[i], sizes[i]);
    329 		break;
    330 	      default:
    331 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
    332 			    kind);
    333 		break;
    334 	      }
    335 	  }
    336 	else
    337 	  {
    338 	    gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
    339 				     == GOMP_MAP_FORCE_FROM, async, 3);
    340 	    /* See the above comment.  */
    341 	    i += 2;
    342 	  }
    343       }
    344 
    345   acc_dev->openacc.async_set_async_func (acc_async_sync);
    346 }
    347 
    348 static void
    349 goacc_wait (int async, int num_waits, va_list ap)
    350 {
    351   struct goacc_thread *thr = goacc_thread ();
    352   struct gomp_device_descr *acc_dev = thr->dev;
    353   int i;
    354 
    355   assert (num_waits >= 0);
    356 
    357   if (async == acc_async_sync && num_waits == 0)
    358     {
    359       acc_wait_all ();
    360       return;
    361     }
    362 
    363   if (async == acc_async_sync && num_waits)
    364     {
    365       for (i = 0; i < num_waits; i++)
    366         {
    367           int qid = va_arg (ap, int);
    368 
    369           if (acc_async_test (qid))
    370             continue;
    371 
    372           acc_wait (qid);
    373         }
    374       return;
    375     }
    376 
    377   if (async == acc_async_noval && num_waits == 0)
    378     {
    379       acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
    380       return;
    381     }
    382 
    383   for (i = 0; i < num_waits; i++)
    384     {
    385       int qid = va_arg (ap, int);
    386 
    387       if (acc_async_test (qid))
    388 	continue;
    389 
    390       /* If we're waiting on the same asynchronous queue as we're launching on,
    391          the queue itself will order work as required, so there's no need to
    392 	 wait explicitly.  */
    393       if (qid != async)
    394 	acc_dev->openacc.async_wait_async_func (qid, async);
    395     }
    396 }
    397 
    398 void
    399 GOACC_update (int device, size_t mapnum,
    400 	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
    401 	      int async, int num_waits, ...)
    402 {
    403   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
    404   size_t i;
    405 
    406   goacc_lazy_initialize ();
    407 
    408   struct goacc_thread *thr = goacc_thread ();
    409   struct gomp_device_descr *acc_dev = thr->dev;
    410 
    411   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    412       || host_fallback)
    413     return;
    414 
    415   if (num_waits > 0)
    416     {
    417       va_list ap;
    418 
    419       va_start (ap, num_waits);
    420 
    421       goacc_wait (async, num_waits, ap);
    422 
    423       va_end (ap);
    424     }
    425 
    426   acc_dev->openacc.async_set_async_func (async);
    427 
    428   for (i = 0; i < mapnum; ++i)
    429     {
    430       unsigned char kind = kinds[i] & 0xff;
    431 
    432       switch (kind)
    433 	{
    434 	case GOMP_MAP_POINTER:
    435 	case GOMP_MAP_TO_PSET:
    436 	  break;
    437 
    438 	case GOMP_MAP_FORCE_TO:
    439 	  acc_update_device (hostaddrs[i], sizes[i]);
    440 	  break;
    441 
    442 	case GOMP_MAP_FORCE_FROM:
    443 	  acc_update_self (hostaddrs[i], sizes[i]);
    444 	  break;
    445 
    446 	default:
    447 	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
    448 	  break;
    449 	}
    450     }
    451 
    452   acc_dev->openacc.async_set_async_func (acc_async_sync);
    453 }
    454 
    455 void
    456 GOACC_wait (int async, int num_waits, ...)
    457 {
    458   va_list ap;
    459 
    460   va_start (ap, num_waits);
    461 
    462   goacc_wait (async, num_waits, ap);
    463 
    464   va_end (ap);
    465 }
    466 
    467 int
    468 GOACC_get_num_threads (void)
    469 {
    470   return 1;
    471 }
    472 
    473 int
    474 GOACC_get_thread_num (void)
    475 {
    476   return 0;
    477 }
    478