Home | History | Annotate | Line # | Download | only in libgomp
oacc-parallel.c revision 1.1.1.2.4.2
      1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
      2 
      3    Contributed by Mentor Embedded.
      4 
      5    This file is part of the GNU Offloading and Multi Processing Library
      6    (libgomp).
      7 
      8    Libgomp is free software; you can redistribute it and/or modify it
      9    under the terms of the GNU General Public License as published by
     10    the Free Software Foundation; either version 3, or (at your option)
     11    any later version.
     12 
     13    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     14    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     15    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     16    more details.
     17 
     18    Under Section 7 of GPL version 3, you are granted additional
     19    permissions described in the GCC Runtime Library Exception, version
     20    3.1, as published by the Free Software Foundation.
     21 
     22    You should have received a copy of the GNU General Public License and
     23    a copy of the GCC Runtime Library Exception along with this program;
     24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     25    <http://www.gnu.org/licenses/>.  */
     26 
     27 /* This file handles OpenACC constructs.  */
     28 
     29 #include "openacc.h"
     30 #include "libgomp.h"
     31 #include "libgomp_g.h"
     32 #include "gomp-constants.h"
     33 #include "oacc-int.h"
     34 #ifdef HAVE_INTTYPES_H
     35 # include <inttypes.h>  /* For PRIu64.  */
     36 #endif
     37 #include <string.h>
     38 #include <stdarg.h>
     39 #include <assert.h>
     40 
     41 static int
     42 find_pset (int pos, size_t mapnum, unsigned short *kinds)
     43 {
     44   if (pos + 1 >= mapnum)
     45     return 0;
     46 
     47   unsigned char kind = kinds[pos+1] & 0xff;
     48 
     49   return kind == GOMP_MAP_TO_PSET;
     50 }
     51 
     52 static void goacc_wait (int async, int num_waits, va_list *ap);
     53 
     54 
     55 /* Launch a possibly offloaded function on DEVICE.  FN is the host fn
     56    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
     57    blocks to be copied to/from the device.  Varadic arguments are
     58    keyed optional parameters terminated with a zero.  */
     59 
     60 void
     61 GOACC_parallel_keyed (int device, void (*fn) (void *),
     62 		      size_t mapnum, void **hostaddrs, size_t *sizes,
     63 		      unsigned short *kinds, ...)
     64 {
     65   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
     66   va_list ap;
     67   struct goacc_thread *thr;
     68   struct gomp_device_descr *acc_dev;
     69   struct target_mem_desc *tgt;
     70   void **devaddrs;
     71   unsigned int i;
     72   struct splay_tree_key_s k;
     73   splay_tree_key tgt_fn_key;
     74   void (*tgt_fn);
     75   int async = GOMP_ASYNC_SYNC;
     76   unsigned dims[GOMP_DIM_MAX];
     77   unsigned tag;
     78 
     79 #ifdef HAVE_INTTYPES_H
     80   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
     81 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
     82 #else
     83   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
     84 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
     85 #endif
     86   goacc_lazy_initialize ();
     87 
     88   thr = goacc_thread ();
     89   acc_dev = thr->dev;
     90 
     91   /* Host fallback if "if" clause is false or if the current device is set to
     92      the host.  */
     93   if (host_fallback)
     94     {
     95       goacc_save_and_set_bind (acc_device_host);
     96       fn (hostaddrs);
     97       goacc_restore_bind ();
     98       return;
     99     }
    100   else if (acc_device_type (acc_dev->type) == acc_device_host)
    101     {
    102       fn (hostaddrs);
    103       return;
    104     }
    105 
    106   /* Default: let the runtime choose.  */
    107   for (i = 0; i != GOMP_DIM_MAX; i++)
    108     dims[i] = 0;
    109 
    110   va_start (ap, kinds);
    111   /* TODO: This will need amending when device_type is implemented.  */
    112   while ((tag = va_arg (ap, unsigned)) != 0)
    113     {
    114       if (GOMP_LAUNCH_DEVICE (tag))
    115 	gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
    116 		    GOMP_LAUNCH_DEVICE (tag));
    117 
    118       switch (GOMP_LAUNCH_CODE (tag))
    119 	{
    120 	case GOMP_LAUNCH_DIM:
    121 	  {
    122 	    unsigned mask = GOMP_LAUNCH_OP (tag);
    123 
    124 	    for (i = 0; i != GOMP_DIM_MAX; i++)
    125 	      if (mask & GOMP_DIM_MASK (i))
    126 		dims[i] = va_arg (ap, unsigned);
    127 	  }
    128 	  break;
    129 
    130 	case GOMP_LAUNCH_ASYNC:
    131 	  {
    132 	    /* Small constant values are encoded in the operand.  */
    133 	    async = GOMP_LAUNCH_OP (tag);
    134 
    135 	    if (async == GOMP_LAUNCH_OP_MAX)
    136 	      async = va_arg (ap, unsigned);
    137 	    break;
    138 	  }
    139 
    140 	case GOMP_LAUNCH_WAIT:
    141 	  {
    142 	    unsigned num_waits = GOMP_LAUNCH_OP (tag);
    143 
    144 	    if (num_waits)
    145 	      goacc_wait (async, num_waits, &ap);
    146 	    break;
    147 	  }
    148 
    149 	default:
    150 	  gomp_fatal ("unrecognized offload code '%d',"
    151 		      " libgomp is too old", GOMP_LAUNCH_CODE (tag));
    152 	}
    153     }
    154   va_end (ap);
    155 
    156   acc_dev->openacc.async_set_async_func (async);
    157 
    158   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
    159     {
    160       k.host_start = (uintptr_t) fn;
    161       k.host_end = k.host_start + 1;
    162       gomp_mutex_lock (&acc_dev->lock);
    163       tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
    164       gomp_mutex_unlock (&acc_dev->lock);
    165 
    166       if (tgt_fn_key == NULL)
    167 	gomp_fatal ("target function wasn't mapped");
    168 
    169       tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
    170     }
    171   else
    172     tgt_fn = (void (*)) fn;
    173 
    174   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
    175 		       GOMP_MAP_VARS_OPENACC);
    176 
    177   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
    178   for (i = 0; i < mapnum; i++)
    179     devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
    180 			    + tgt->list[i].key->tgt_offset);
    181 
    182   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
    183 			      async, dims, tgt);
    184 
    185   /* If running synchronously, unmap immediately.  */
    186   if (async < acc_async_noval)
    187     gomp_unmap_vars (tgt, true);
    188   else
    189     tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
    190 
    191   acc_dev->openacc.async_set_async_func (acc_async_sync);
    192 }
    193 
    194 /* Legacy entry point, only provide host execution.  */
    195 
    196 void
    197 GOACC_parallel (int device, void (*fn) (void *),
    198 		size_t mapnum, void **hostaddrs, size_t *sizes,
    199 		unsigned short *kinds,
    200 		int num_gangs, int num_workers, int vector_length,
    201 		int async, int num_waits, ...)
    202 {
    203   goacc_save_and_set_bind (acc_device_host);
    204   fn (hostaddrs);
    205   goacc_restore_bind ();
    206 }
    207 
    208 void
    209 GOACC_data_start (int device, size_t mapnum,
    210 		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
    211 {
    212   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
    213   struct target_mem_desc *tgt;
    214 
    215 #ifdef HAVE_INTTYPES_H
    216   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
    217 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
    218 #else
    219   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
    220 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
    221 #endif
    222 
    223   goacc_lazy_initialize ();
    224 
    225   struct goacc_thread *thr = goacc_thread ();
    226   struct gomp_device_descr *acc_dev = thr->dev;
    227 
    228   /* Host fallback or 'do nothing'.  */
    229   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    230       || host_fallback)
    231     {
    232       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
    233 			   GOMP_MAP_VARS_OPENACC);
    234       tgt->prev = thr->mapped_data;
    235       thr->mapped_data = tgt;
    236 
    237       return;
    238     }
    239 
    240   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
    241   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
    242 		       GOMP_MAP_VARS_OPENACC);
    243   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
    244   tgt->prev = thr->mapped_data;
    245   thr->mapped_data = tgt;
    246 }
    247 
    248 void
    249 GOACC_data_end (void)
    250 {
    251   struct goacc_thread *thr = goacc_thread ();
    252   struct target_mem_desc *tgt = thr->mapped_data;
    253 
    254   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
    255   thr->mapped_data = tgt->prev;
    256   gomp_unmap_vars (tgt, true);
    257   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
    258 }
    259 
    260 void
    261 GOACC_enter_exit_data (int device, size_t mapnum,
    262 		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
    263 		       int async, int num_waits, ...)
    264 {
    265   struct goacc_thread *thr;
    266   struct gomp_device_descr *acc_dev;
    267   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
    268   bool data_enter = false;
    269   size_t i;
    270 
    271   goacc_lazy_initialize ();
    272 
    273   thr = goacc_thread ();
    274   acc_dev = thr->dev;
    275 
    276   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    277       || host_fallback)
    278     return;
    279 
    280   if (num_waits)
    281     {
    282       va_list ap;
    283 
    284       va_start (ap, num_waits);
    285       goacc_wait (async, num_waits, &ap);
    286       va_end (ap);
    287     }
    288 
    289   acc_dev->openacc.async_set_async_func (async);
    290 
    291   /* Determine if this is an "acc enter data".  */
    292   for (i = 0; i < mapnum; ++i)
    293     {
    294       unsigned char kind = kinds[i] & 0xff;
    295 
    296       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
    297 	continue;
    298 
    299       if (kind == GOMP_MAP_FORCE_ALLOC
    300 	  || kind == GOMP_MAP_FORCE_PRESENT
    301 	  || kind == GOMP_MAP_FORCE_TO)
    302 	{
    303 	  data_enter = true;
    304 	  break;
    305 	}
    306 
    307       if (kind == GOMP_MAP_DELETE
    308 	  || kind == GOMP_MAP_FORCE_FROM)
    309 	break;
    310 
    311       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
    312 		      kind);
    313     }
    314 
    315   if (data_enter)
    316     {
    317       for (i = 0; i < mapnum; i++)
    318 	{
    319 	  unsigned char kind = kinds[i] & 0xff;
    320 
    321 	  /* Scan for PSETs.  */
    322 	  int psets = find_pset (i, mapnum, kinds);
    323 
    324 	  if (!psets)
    325 	    {
    326 	      switch (kind)
    327 		{
    328 		case GOMP_MAP_POINTER:
    329 		  gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
    330 					&kinds[i]);
    331 		  break;
    332 		case GOMP_MAP_FORCE_ALLOC:
    333 		  acc_create (hostaddrs[i], sizes[i]);
    334 		  break;
    335 		case GOMP_MAP_FORCE_PRESENT:
    336 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
    337 		  break;
    338 		case GOMP_MAP_FORCE_TO:
    339 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
    340 		  break;
    341 		default:
    342 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
    343 			      kind);
    344 		  break;
    345 		}
    346 	    }
    347 	  else
    348 	    {
    349 	      gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
    350 	      /* Increment 'i' by two because OpenACC requires fortran
    351 		 arrays to be contiguous, so each PSET is associated with
    352 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
    353 		 one MAP_POINTER.  */
    354 	      i += 2;
    355 	    }
    356 	}
    357     }
    358   else
    359     for (i = 0; i < mapnum; ++i)
    360       {
    361 	unsigned char kind = kinds[i] & 0xff;
    362 
    363 	int psets = find_pset (i, mapnum, kinds);
    364 
    365 	if (!psets)
    366 	  {
    367 	    switch (kind)
    368 	      {
    369 	      case GOMP_MAP_POINTER:
    370 		gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
    371 					 == GOMP_MAP_FORCE_FROM,
    372 					 async, 1);
    373 		break;
    374 	      case GOMP_MAP_DELETE:
    375 		acc_delete (hostaddrs[i], sizes[i]);
    376 		break;
    377 	      case GOMP_MAP_FORCE_FROM:
    378 		acc_copyout (hostaddrs[i], sizes[i]);
    379 		break;
    380 	      default:
    381 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
    382 			    kind);
    383 		break;
    384 	      }
    385 	  }
    386 	else
    387 	  {
    388 	    gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
    389 				     == GOMP_MAP_FORCE_FROM, async, 3);
    390 	    /* See the above comment.  */
    391 	    i += 2;
    392 	  }
    393       }
    394 
    395   acc_dev->openacc.async_set_async_func (acc_async_sync);
    396 }
    397 
    398 static void
    399 goacc_wait (int async, int num_waits, va_list *ap)
    400 {
    401   struct goacc_thread *thr = goacc_thread ();
    402   struct gomp_device_descr *acc_dev = thr->dev;
    403 
    404   while (num_waits--)
    405     {
    406       int qid = va_arg (*ap, int);
    407 
    408       if (acc_async_test (qid))
    409 	continue;
    410 
    411       if (async == acc_async_sync)
    412 	acc_wait (qid);
    413       else if (qid == async)
    414 	;/* If we're waiting on the same asynchronous queue as we're
    415 	    launching on, the queue itself will order work as
    416 	    required, so there's no need to wait explicitly.  */
    417       else
    418 	acc_dev->openacc.async_wait_async_func (qid, async);
    419     }
    420 }
    421 
    422 void
    423 GOACC_update (int device, size_t mapnum,
    424 	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
    425 	      int async, int num_waits, ...)
    426 {
    427   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
    428   size_t i;
    429 
    430   goacc_lazy_initialize ();
    431 
    432   struct goacc_thread *thr = goacc_thread ();
    433   struct gomp_device_descr *acc_dev = thr->dev;
    434 
    435   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    436       || host_fallback)
    437     return;
    438 
    439   if (num_waits)
    440     {
    441       va_list ap;
    442 
    443       va_start (ap, num_waits);
    444       goacc_wait (async, num_waits, &ap);
    445       va_end (ap);
    446     }
    447 
    448   acc_dev->openacc.async_set_async_func (async);
    449 
    450   for (i = 0; i < mapnum; ++i)
    451     {
    452       unsigned char kind = kinds[i] & 0xff;
    453 
    454       switch (kind)
    455 	{
    456 	case GOMP_MAP_POINTER:
    457 	case GOMP_MAP_TO_PSET:
    458 	  break;
    459 
    460 	case GOMP_MAP_FORCE_TO:
    461 	  acc_update_device (hostaddrs[i], sizes[i]);
    462 	  break;
    463 
    464 	case GOMP_MAP_FORCE_FROM:
    465 	  acc_update_self (hostaddrs[i], sizes[i]);
    466 	  break;
    467 
    468 	default:
    469 	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
    470 	  break;
    471 	}
    472     }
    473 
    474   acc_dev->openacc.async_set_async_func (acc_async_sync);
    475 }
    476 
    477 void
    478 GOACC_wait (int async, int num_waits, ...)
    479 {
    480   if (num_waits)
    481     {
    482       va_list ap;
    483 
    484       va_start (ap, num_waits);
    485       goacc_wait (async, num_waits, &ap);
    486       va_end (ap);
    487     }
    488   else if (async == acc_async_sync)
    489     acc_wait_all ();
    490   else if (async == acc_async_noval)
    491     goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
    492 }
    493 
    494 int
    495 GOACC_get_num_threads (void)
    496 {
    497   return 1;
    498 }
    499 
    500 int
    501 GOACC_get_thread_num (void)
    502 {
    503   return 0;
    504 }
    505 
    506 void
    507 GOACC_declare (int device, size_t mapnum,
    508 	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
    509 {
    510   int i;
    511 
    512   for (i = 0; i < mapnum; i++)
    513     {
    514       unsigned char kind = kinds[i] & 0xff;
    515 
    516       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
    517 	continue;
    518 
    519       switch (kind)
    520 	{
    521 	  case GOMP_MAP_FORCE_ALLOC:
    522 	  case GOMP_MAP_FORCE_FROM:
    523 	  case GOMP_MAP_FORCE_TO:
    524 	  case GOMP_MAP_POINTER:
    525 	  case GOMP_MAP_DELETE:
    526 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
    527 				   &kinds[i], 0, 0);
    528 	    break;
    529 
    530 	  case GOMP_MAP_FORCE_DEVICEPTR:
    531 	    break;
    532 
    533 	  case GOMP_MAP_ALLOC:
    534 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
    535 	      GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
    536 				     &kinds[i], 0, 0);
    537 	    break;
    538 
    539 	  case GOMP_MAP_TO:
    540 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
    541 				   &kinds[i], 0, 0);
    542 
    543 	    break;
    544 
    545 	  case GOMP_MAP_FROM:
    546 	    kinds[i] = GOMP_MAP_FORCE_FROM;
    547 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
    548 				   &kinds[i], 0, 0);
    549 	    break;
    550 
    551 	  case GOMP_MAP_FORCE_PRESENT:
    552 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
    553 	      gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
    554 			  (unsigned long) sizes[i]);
    555 	    break;
    556 
    557 	  default:
    558 	    assert (0);
    559 	    break;
    560 	}
    561     }
    562 }
    563